Repository: snipsco/tract
Branch: main
Commit: d5e7f043c6d3
Files: 1571
Total size: 10.3 MB

Directory structure:
gitextract_37zz2va9/

├── .all_crates.sh
├── .change_crate_dep.sh
├── .clang-format
├── .github/
│   ├── dependabot.yml
│   └── workflows/
│       ├── asan.yml
│       ├── binaries.yml
│       ├── cost_model.yml
│       ├── crates.yml
│       ├── cross-platform.yml
│       ├── examples.yml
│       ├── full.yml
│       ├── large_models.yml
│       ├── pydoc.yml
│       ├── release.yml
│       ├── tract-ci-bench.yml
│       ├── wheels.yml
│       └── windows.yml
├── .gitignore
├── .travis/
│   ├── README.md
│   ├── android-ndk.sh
│   ├── asan.sh
│   ├── bundle-entrypoint.sh
│   ├── cache_file.sh
│   ├── cargo-deny-check.sh
│   ├── ci-system-setup.sh
│   ├── cli-tests.sh
│   ├── cost_model_task_build.sh
│   ├── cross.sh
│   ├── debug-tests.sh
│   ├── docker-debian-stretch/
│   │   ├── Dockerfile
│   │   └── sources.list
│   ├── examples.sh
│   ├── llm-expectations-541
│   ├── make_bundle.sh
│   ├── minion.sh
│   ├── minionrc
│   ├── native.sh
│   ├── onnx-tests.sh
│   ├── regular-tests.sh
│   ├── run-bundle.sh
│   ├── run_all.sh
│   ├── setup-sccache.sh
│   ├── test-harness.sh
│   ├── test-llm.sh
│   ├── test-published-crates.sh
│   ├── test-rt.sh
│   ├── tf.sh
│   ├── tflite/
│   │   ├── Dockerfile.tensorflow-aarch64
│   │   ├── Dockerfile.tensorflow-official-rpi
│   │   ├── Dockerfile.tensorflow-rpitools
│   │   ├── build_tflite_aarch64.sh
│   │   ├── build_tflite_raspbian.sh
│   │   ├── convert_all.sh
│   │   ├── linux_makefile.inc
│   │   └── run_all.sh
│   ├── tflite.sh
│   └── travis.sh
├── .travis.yml
├── .vim/
│   └── coc-settings.json
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── api/
│   ├── .gitignore
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── c/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── grace_hopper_3_224_224.f32.raw
│   │   └── mobilenet.c
│   ├── ffi/
│   │   ├── Cargo.toml
│   │   ├── cbindgen.toml
│   │   └── src/
│   │       └── lib.rs
│   ├── generate-tract-h.sh
│   ├── proxy/
│   │   ├── Cargo.toml
│   │   ├── LICENSE
│   │   ├── LICENSE-APACHE
│   │   ├── LICENSE-MIT
│   │   ├── ci.sh
│   │   ├── src/
│   │   │   └── lib.rs
│   │   ├── sys/
│   │   │   ├── Cargo.toml
│   │   │   ├── build.rs
│   │   │   ├── src/
│   │   │   │   └── lib.rs
│   │   │   └── tract.h
│   │   └── tests/
│   │       └── mobilenet.rs
│   ├── py/
│   │   ├── .gitignore
│   │   ├── MANIFEST.in
│   │   ├── _static/
│   │   │   ├── redirect-index.html
│   │   │   └── version-switcher.js
│   │   ├── conf.py
│   │   ├── docs/
│   │   │   ├── fact.md
│   │   │   ├── index.md
│   │   │   ├── inference_model.md
│   │   │   ├── model.md
│   │   │   ├── nnef.md
│   │   │   ├── onnx.md
│   │   │   ├── runnable.md
│   │   │   └── tensor.md
│   │   ├── grace_hopper_1x3x224x244.npy
│   │   ├── pyproject.toml
│   │   ├── requirements-docs.txt
│   │   ├── requirements.txt
│   │   ├── setup.py
│   │   ├── tests/
│   │   │   └── mobilenet_onnx_test.py
│   │   └── tract/
│   │       ├── __init__.py
│   │       ├── bindings.py
│   │       ├── dim.py
│   │       ├── fact.py
│   │       ├── inference_model.py
│   │       ├── model.py
│   │       ├── nnef.py
│   │       ├── onnx.py
│   │       ├── runnable.py
│   │       ├── runtime.py
│   │       ├── state.py
│   │       ├── tensor.py
│   │       └── transform.py
│   ├── rs/
│   │   ├── Cargo.toml
│   │   ├── LICENSE
│   │   ├── LICENSE-APACHE
│   │   ├── LICENSE-MIT
│   │   ├── src/
│   │   │   └── lib.rs
│   │   └── tests/
│   │       └── mobilenet.rs
│   ├── src/
│   │   ├── lib.rs
│   │   ├── macros.rs
│   │   └── transform.rs
│   └── tests/
│       ├── grace_hopper_3_224_224.f32.raw
│       └── mobilenet/
│           └── mod.rs
├── ci/
│   └── tract-ci-minion/
│       ├── .gitignore
│       ├── Cargo.toml
│       ├── minion.toml.example
│       └── src/
│           └── main.rs
├── cli/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   └── src/
│       ├── bench.rs
│       ├── compare.rs
│       ├── cost.rs
│       ├── dump.rs
│       ├── hwbench.rs
│       ├── llm.rs
│       ├── macros.rs
│       ├── main.rs
│       ├── memory_arena.rs
│       ├── model.rs
│       ├── params.rs
│       ├── plan_options.rs
│       ├── run.rs
│       ├── runtimes.rs
│       ├── tensor.rs
│       └── utils.rs
├── core/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── src/
│   │   ├── axes/
│   │   │   ├── mapping.rs
│   │   │   ├── mod.rs
│   │   │   └── model.rs
│   │   ├── broadcast.rs
│   │   ├── floats.rs
│   │   ├── framework.rs
│   │   ├── late_bind.rs
│   │   ├── lib.rs
│   │   ├── macros.rs
│   │   ├── model/
│   │   │   ├── fact.rs
│   │   │   ├── graph.rs
│   │   │   ├── helpers.rs
│   │   │   ├── memory.rs
│   │   │   ├── mod.rs
│   │   │   ├── node.rs
│   │   │   ├── order.rs
│   │   │   ├── patch.rs
│   │   │   ├── rewriter.rs
│   │   │   ├── translator.rs
│   │   │   └── typed.rs
│   │   ├── ops/
│   │   │   ├── array/
│   │   │   │   ├── broadcast.rs
│   │   │   │   ├── concat.rs
│   │   │   │   ├── dyn_slice.rs
│   │   │   │   ├── gather.rs
│   │   │   │   ├── gather_elements.rs
│   │   │   │   ├── gather_nd.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── one_hot.rs
│   │   │   │   ├── pad.rs
│   │   │   │   ├── range.rs
│   │   │   │   ├── reshape.rs
│   │   │   │   ├── scatter_elements.rs
│   │   │   │   ├── scatter_nd.rs
│   │   │   │   ├── slice.rs
│   │   │   │   ├── strided_slice.rs
│   │   │   │   ├── tile.rs
│   │   │   │   ├── topk.rs
│   │   │   │   └── trilu.rs
│   │   │   ├── binary.rs
│   │   │   ├── cast.rs
│   │   │   ├── change_axes.rs
│   │   │   ├── cnn/
│   │   │   │   ├── conv/
│   │   │   │   │   ├── block_quant.rs
│   │   │   │   │   ├── conv.rs
│   │   │   │   │   ├── depth_wise.rs
│   │   │   │   │   ├── im2col.rs
│   │   │   │   │   ├── lazy_im2col.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   └── q_sum_b.rs
│   │   │   │   ├── deconv/
│   │   │   │   │   ├── deconv.rs
│   │   │   │   │   ├── deconv_sum.rs
│   │   │   │   │   └── mod.rs
│   │   │   │   ├── maxpool.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── padding.rs
│   │   │   │   ├── patch_axis.rs
│   │   │   │   ├── patches.rs
│   │   │   │   ├── pools.rs
│   │   │   │   └── sumpool.rs
│   │   │   ├── downsample/
│   │   │   │   ├── array.rs
│   │   │   │   ├── conv.rs
│   │   │   │   ├── mod.rs
│   │   │   │   └── scan.rs
│   │   │   ├── dummy.rs
│   │   │   ├── einsum/
│   │   │   │   ├── as_blas.rs
│   │   │   │   ├── einsum_matmul.rs
│   │   │   │   ├── eval.rs
│   │   │   │   ├── kernel_selection.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── prefix_matmul.rs
│   │   │   │   └── proptest.rs
│   │   │   ├── element_wise.rs
│   │   │   ├── fft.rs
│   │   │   ├── identity.rs
│   │   │   ├── konst.rs
│   │   │   ├── logic/
│   │   │   │   ├── comparison.rs
│   │   │   │   └── ite.rs
│   │   │   ├── logic.rs
│   │   │   ├── macros.rs
│   │   │   ├── math/
│   │   │   │   ├── complex.rs
│   │   │   │   └── mod.rs
│   │   │   ├── matmul/
│   │   │   │   ├── de_block_quant.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── optimized.rs
│   │   │   │   ├── pack.rs
│   │   │   │   └── quant.rs
│   │   │   ├── memory/
│   │   │   │   ├── force_eval.rs
│   │   │   │   ├── load.rs
│   │   │   │   ├── mod.rs
│   │   │   │   └── store.rs
│   │   │   ├── mod.rs
│   │   │   ├── nn/
│   │   │   │   ├── data_formats.rs
│   │   │   │   ├── gelu_approximate.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── reduce.rs
│   │   │   │   ├── rms_norm.rs
│   │   │   │   ├── silu.rs
│   │   │   │   └── softmax/
│   │   │   │       ├── fixedpoint.rs
│   │   │   │       ├── math.rs
│   │   │   │       └── mod.rs
│   │   │   ├── quant.rs
│   │   │   ├── scan/
│   │   │   │   ├── decluttered.rs
│   │   │   │   ├── mod.rs
│   │   │   │   └── optimized.rs
│   │   │   ├── source.rs
│   │   │   ├── submodel.rs
│   │   │   └── unimpl.rs
│   │   ├── optim/
│   │   │   ├── change_axes.rs
│   │   │   ├── concat_then_einsum.rs
│   │   │   ├── mod.rs
│   │   │   ├── op_optim.rs
│   │   │   ├── prop_const.rs
│   │   │   ├── propagate_roi.rs
│   │   │   ├── push_split_down.rs
│   │   │   ├── slice.rs
│   │   │   └── uniform_mask.rs
│   │   ├── plan.rs
│   │   ├── runtime.rs
│   │   ├── transform.rs
│   │   └── value.rs
│   └── test_data/
│       └── test_data.cfg
├── cuda/
│   ├── Cargo.toml
│   ├── benches/
│   │   └── cuda_flash.rs
│   └── src/
│       ├── context.rs
│       ├── kernels/
│       │   ├── array/
│       │   │   ├── cast.rs
│       │   │   ├── copy.rs
│       │   │   ├── dispatch.rs
│       │   │   ├── mod.rs
│       │   │   └── rotate_half.rs
│       │   ├── binary.rs
│       │   ├── conv.rs
│       │   ├── conv_cudnn.rs
│       │   ├── cu/
│       │   │   ├── array.cu
│       │   │   ├── binary.cu
│       │   │   ├── cnn.cu
│       │   │   ├── common.cuh
│       │   │   ├── element_wise.cu
│       │   │   ├── flash_attn.cu
│       │   │   ├── ggml_flash_attn.cu
│       │   │   ├── mm_mv.cu
│       │   │   ├── mm_mv_q.cu
│       │   │   ├── nn.cu
│       │   │   └── quantize.cu
│       │   ├── element_wise.rs
│       │   ├── flash_attn.rs
│       │   ├── ggml_flash_attn.rs
│       │   ├── iff.rs
│       │   ├── launch_args.rs
│       │   ├── matmul/
│       │   │   ├── mod.rs
│       │   │   └── quant_act_q81.rs
│       │   ├── mod.rs
│       │   ├── nn/
│       │   │   ├── apply_rope.rs
│       │   │   ├── gelu_approximate.rs
│       │   │   ├── leaky_relu.rs
│       │   │   ├── mod.rs
│       │   │   ├── reduce.rs
│       │   │   ├── rms_norm.rs
│       │   │   ├── scaled_masked_softmax.rs
│       │   │   └── softmax.rs
│       │   └── utils.rs
│       ├── lib.rs
│       ├── ops/
│       │   ├── conv.rs
│       │   ├── flash_attn.rs
│       │   ├── fused_axis_op.rs
│       │   ├── gemm.rs
│       │   ├── ggml_flash_attn.rs
│       │   ├── iff.rs
│       │   ├── mod.rs
│       │   └── quant_q81.rs
│       ├── rewrite_rules/
│       │   ├── add_matmul_broadcast.rs
│       │   ├── fuse_axis_op.rs
│       │   ├── mod.rs
│       │   ├── pad_q40_weights.rs
│       │   └── untranspose_matmul_output.rs
│       ├── tensor.rs
│       ├── transform.rs
│       └── utils.rs
├── data/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── benches/
│   │   ├── stack_tensors.rs
│   │   └── tensor_from_datum.rs
│   └── src/
│       ├── blob.rs
│       ├── datum.rs
│       ├── dim/
│       │   ├── assertion.rs
│       │   ├── mod.rs
│       │   ├── parse.rs
│       │   ├── resolve.rs
│       │   ├── sym.rs
│       │   └── tree.rs
│       ├── exotic.rs
│       ├── lib.rs
│       ├── macros.rs
│       ├── scatter.rs
│       ├── tensor/
│       │   ├── litteral.rs
│       │   ├── plain_view.rs
│       │   ├── storage.rs
│       │   └── view.rs
│       └── tensor.rs
├── deny.toml
├── doc/
│   ├── README.md
│   ├── cli-recipe.md
│   ├── graph.md
│   ├── intro.md
│   ├── kernel-notes.md
│   ├── nnef/
│   │   ├── tract-core.nnef
│   │   ├── tract-onnx.nnef
│   │   ├── tract-pulse.nnef
│   │   └── tract-resource.nnef
│   └── op.md
├── examples/
│   ├── .gitignore
│   ├── causal_llm/
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   ├── scripts/
│   │   │   └── generate_ci_llm_assets.sh
│   │   └── src/
│   │       ├── bin/
│   │       │   ├── client.rs
│   │       │   ├── common/
│   │       │   │   └── mod.rs
│   │       │   ├── complete.rs
│   │       │   └── serve.rs
│   │       └── lib.rs
│   ├── face_detection_yolov8onnx_example/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   └── src/
│   │       └── main.rs
│   ├── face_similarity_arcface_onnx/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   └── src/
│   │       ├── arc_face.rs
│   │       ├── main.rs
│   │       └── yolo_face.rs
│   ├── keras-tract-tf2/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh.nope
│   │   ├── example.py
│   │   ├── io.npz
│   │   ├── requirements.txt
│   │   └── src/
│   │       └── main.rs
│   ├── nemo-nemotron-asr/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── ci.sh
│   │   ├── nemotron.py
│   │   └── src/
│   │       └── main.rs
│   ├── nemo-parakeet-asr/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── ci.sh
│   │   ├── parakeet.py
│   │   └── src/
│   │       └── main.rs
│   ├── nnef-dump-mobilenet-v2/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   └── src/
│   │       └── main.rs
│   ├── nnef-mobilenet-v2/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── ci.sh
│   │   ├── imagenet_slim_labels.txt
│   │   └── src/
│   │       └── main.rs
│   ├── nnef-mobilenet-v2-api/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── ci.sh
│   │   ├── imagenet_slim_labels.txt
│   │   └── src/
│   │       └── main.rs
│   ├── onnx-mobilenet-v2/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   ├── imagenet_slim_labels.txt
│   │   └── src/
│   │       ├── bin/
│   │       │   └── dyn-shape.rs
│   │       └── main.rs
│   ├── pytorch-albert-v2/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   ├── export.py
│   │   └── src/
│   │       └── main.rs
│   ├── pytorch-resnet/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   ├── export.py
│   │   ├── requirements.txt
│   │   └── src/
│   │       └── main.rs
│   ├── stable-diffusion/
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci-gpu.sh
│   │   ├── export.py
│   │   ├── reference.py
│   │   └── src/
│   │       └── main.rs
│   ├── stable-diffusion-3/
│   │   ├── Cargo.toml
│   │   ├── export.py
│   │   ├── reference.py
│   │   ├── runme.sh
│   │   └── src/
│   │       └── main.rs
│   ├── stable-diffusion-xl/
│   │   ├── Cargo.toml
│   │   ├── ci-gpu.sh
│   │   ├── export.py
│   │   ├── reference.py
│   │   └── src/
│   │       └── main.rs
│   ├── tensorflow-mobilenet-v2/
│   │   ├── .gitignore
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── ci.sh
│   │   ├── imagenet_slim_labels.txt
│   │   └── src/
│   │       └── main.rs
│   └── tflite-mobilenet-v3/
│       ├── .gitignore
│       ├── Cargo.toml
│       ├── README.md
│       ├── ci.sh
│       ├── imagenet_slim_labels.txt
│       └── src/
│           └── main.rs
├── extra/
│   ├── Cargo.toml
│   └── src/
│       ├── exp_unit_norm.rs
│       └── lib.rs
├── gpu/
│   ├── Cargo.toml
│   └── src/
│       ├── device.rs
│       ├── fact.rs
│       ├── lib.rs
│       ├── memory/
│       │   ├── mod.rs
│       │   ├── pool.rs
│       │   └── schema.rs
│       ├── ops/
│       │   ├── RECIPE.md
│       │   ├── apply_rope.rs
│       │   ├── binary.rs
│       │   ├── broadcast.rs
│       │   ├── cast.rs
│       │   ├── change_axes.rs
│       │   ├── concat.rs
│       │   ├── copy_based.rs
│       │   ├── dyn_kv_cache.rs
│       │   ├── element_wise.rs
│       │   ├── gelu_approximate.rs
│       │   ├── iff.rs
│       │   ├── leaky_relu.rs
│       │   ├── mod.rs
│       │   ├── pulse.rs
│       │   ├── reduce.rs
│       │   ├── rms_norm.rs
│       │   ├── rotate_half.rs
│       │   ├── scaled_masked_softmax.rs
│       │   ├── slice.rs
│       │   └── softmax.rs
│       ├── rewrite_rules/
│       │   ├── mod.rs
│       │   ├── rewire_sdpa.rs
│       │   ├── rewire_syncs.rs
│       │   └── rms_norm.rs
│       ├── session_handler.rs
│       ├── sync.rs
│       ├── tensor/
│       │   ├── arena_view.rs
│       │   ├── mod.rs
│       │   └── owned.rs
│       └── utils.rs
├── harness/
│   ├── core-proptest-pulse/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       ├── conv_plus_conv.rs
│   │       ├── deconv.rs
│   │       ├── delay_plus_downsample.rs
│   │       ├── delay_plus_pool.rs
│   │       ├── einsum.rs
│   │       ├── lib.rs
│   │       └── pad_plus_conv.rs
│   ├── nemotron-speech-streaming-en-0.6b/
│   │   └── ci.sh
│   ├── nnef-inceptionv3/
│   │   ├── Cargo.toml
│   │   ├── download.sh
│   │   └── src/
│   │       └── lib.rs
│   ├── nnef-test-cases/
│   │   ├── .gitignore
│   │   ├── conv-bias/
│   │   │   ├── expected
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── conv-q40/
│   │   │   ├── conv2d/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_base_kernel1/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_base_kernel3/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_base_kernel9/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_dilation2/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_dilation4/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_dilation8/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_groups2/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_groups4/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_insize128/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_insize64/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── conv_stride2/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   └── conv_stride3/
│   │   │       ├── io.npz
│   │   │       ├── model.nnef.tgz
│   │   │       └── runme.sh
│   │   ├── conv-with-batch/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── debox/
│   │   │   ├── debox_base/
│   │   │   │   ├── graph.nnef
│   │   │   │   ├── io.npz
│   │   │   │   └── runme.sh
│   │   │   └── debox_high_dim/
│   │   │       ├── graph.nnef
│   │   │       ├── io.npz
│   │   │       └── runme.sh
│   │   ├── dyn_slice/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── fixed_roll/
│   │   │   ├── graph.nnef
│   │   │   ├── io.npz
│   │   │   └── runme.sh
│   │   ├── memory-arena/
│   │   │   ├── expected.json
│   │   │   └── runme.sh
│   │   ├── pool-padding/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── q40_linear_followed_slice/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── qmul/
│   │   │   ├── graph.nnef
│   │   │   ├── graph.quant
│   │   │   ├── io.npz
│   │   │   └── runme.sh
│   │   ├── range-slice-dyn-tile/
│   │   │   ├── model.nnef.tgz
│   │   │   └── runme.sh
│   │   ├── reshape/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── reshape_with_bc/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── sdpa/
│   │   │   ├── simple-causal-f32/
│   │   │   │   ├── graph.nnef
│   │   │   │   ├── io.npz
│   │   │   │   └── runme.sh
│   │   │   ├── simple-f16/
│   │   │   │   ├── graph.nnef
│   │   │   │   ├── io.npz
│   │   │   │   └── runme.sh
│   │   │   ├── simple-grouped-query-att-f32/
│   │   │   │   ├── io.npz
│   │   │   │   ├── model.nnef.tgz
│   │   │   │   └── runme.sh
│   │   │   ├── simple-mask-f32/
│   │   │   │   ├── graph.nnef
│   │   │   │   ├── io.npz
│   │   │   │   └── runme.sh
│   │   │   ├── simple-non-causal-f32/
│   │   │   │   ├── graph.nnef
│   │   │   │   ├── io.npz
│   │   │   │   └── runme.sh
│   │   │   └── simple-scale-f32/
│   │   │       ├── graph.nnef
│   │   │       ├── io.npz
│   │   │       └── runme.sh
│   │   ├── slice-over-slice-optim-loop/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── softmax/
│   │   │   ├── softmax-change-axis/
│   │   │   │   ├── expected
│   │   │   │   ├── graph.nnef
│   │   │   │   └── runme.sh
│   │   │   ├── softmax-change-axis-1/
│   │   │   │   ├── expected
│   │   │   │   ├── graph.nnef
│   │   │   │   └── runme.sh
│   │   │   └── softmax-quant/
│   │   │       ├── expected/
│   │   │       │   ├── graph.nnef
│   │   │       │   └── graph.quant
│   │   │       ├── model/
│   │   │       │   ├── graph.nnef
│   │   │       │   └── graph.quant
│   │   │       └── runme.sh
│   │   ├── submodel/
│   │   │   ├── expected
│   │   │   ├── graph.nnef
│   │   │   ├── graph.quant
│   │   │   ├── nnet2/
│   │   │   │   ├── graph.nnef
│   │   │   │   └── graph.quant
│   │   │   └── runme.sh
│   │   ├── tdim-cmp/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── test_all_reduce/
│   │   │   ├── io.npz
│   │   │   ├── model.nnef.tgz
│   │   │   └── runme.sh
│   │   ├── test_any_reduce/
│   │   │   ├── io.npz
│   │   │   ├── model.nnef.tgz
│   │   │   └── runme.sh
│   │   ├── test_manage_gru_states/
│   │   │   ├── io.npz
│   │   │   ├── model.nnef.tgz
│   │   │   └── runme.sh
│   │   ├── test_stft_smaller_win/
│   │   │   ├── io.npz
│   │   │   ├── model.nnef.tgz
│   │   │   └── runme.sh
│   │   ├── test_upcast_f32_attn/
│   │   │   ├── io.npz
│   │   │   ├── model.nnef.tgz
│   │   │   └── runme.sh
│   │   ├── tile-with-tdim/
│   │   │   ├── graph.nnef
│   │   │   └── runme.sh
│   │   ├── uniform-mul/
│   │   │   ├── expected
│   │   │   ├── graph.nnef
│   │   │   ├── io.npz
│   │   │   └── runme.sh
│   │   └── variable-in-fragment/
│   │       ├── graph.nnef
│   │       └── runme.sh
│   ├── parakeet-tdt-600m-v3/
│   │   └── ci.sh
│   ├── pre-optimized-graphes/
│   │   ├── .gitignore
│   │   ├── hey_snips_v4_model17/
│   │   │   ├── expected
│   │   │   └── runme.sh
│   │   └── mdl-en-2019-Q3-librispeech/
│   │       ├── expected
│   │       └── runme.sh
│   ├── tf-inceptionv3/
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   └── inceptionv3.rs
│   │   ├── download.sh
│   │   └── src/
│   │       └── lib.rs
│   ├── tf-mobilenet-v2/
│   │   ├── Cargo.toml
│   │   ├── download.sh
│   │   └── src/
│   │       └── lib.rs
│   └── tfl-mobilenet-v2-q/
│       ├── Cargo.toml
│       ├── download.sh
│       └── src/
│           └── lib.rs
├── hir/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   └── src/
│       ├── framework.rs
│       ├── infer/
│       │   ├── analyser.rs
│       │   ├── fact.rs
│       │   ├── factoid.rs
│       │   ├── helpers.rs
│       │   ├── mod.rs
│       │   ├── model.rs
│       │   ├── ops.rs
│       │   ├── optim.rs
│       │   └── rules/
│       │       ├── cache.rs
│       │       ├── expr.rs
│       │       ├── mod.rs
│       │       ├── path.rs
│       │       ├── proxies.rs
│       │       └── solver.rs
│       ├── lib.rs
│       ├── macros.rs
│       └── ops/
│           ├── activations.rs
│           ├── array/
│           │   ├── add_dims.rs
│           │   ├── array_feature_extractor.rs
│           │   ├── broadcast.rs
│           │   ├── concat.rs
│           │   ├── constant_like.rs
│           │   ├── constant_of_shape.rs
│           │   ├── crop.rs
│           │   ├── dyn_slice.rs
│           │   ├── flatten.rs
│           │   ├── gather.rs
│           │   ├── gather_elements.rs
│           │   ├── gather_nd.rs
│           │   ├── mod.rs
│           │   ├── pad.rs
│           │   ├── permute_axes.rs
│           │   ├── range.rs
│           │   ├── reshape.rs
│           │   ├── rm_dims.rs
│           │   ├── scatter_elements.rs
│           │   ├── scatter_nd.rs
│           │   ├── shape.rs
│           │   ├── size.rs
│           │   ├── slice.rs
│           │   ├── split.rs
│           │   ├── squeeze.rs
│           │   ├── strided_slice.rs
│           │   └── tile.rs
│           ├── binary.rs
│           ├── cast.rs
│           ├── cnn/
│           │   ├── conv.rs
│           │   ├── mod.rs
│           │   └── pools.rs
│           ├── downsample.rs
│           ├── dummy.rs
│           ├── element_wise.rs
│           ├── expandable.rs
│           ├── identity.rs
│           ├── konst.rs
│           ├── logic.rs
│           ├── matmul.rs
│           ├── mod.rs
│           ├── nn/
│           │   ├── global_pools.rs
│           │   ├── layer_max.rs
│           │   ├── mod.rs
│           │   ├── reduce.rs
│           │   └── softmax.rs
│           ├── quant.rs
│           ├── scan.rs
│           ├── source.rs
│           └── unimpl.rs
├── libcli/
│   ├── Cargo.toml
│   ├── src/
│   │   ├── annotations.rs
│   │   ├── display_params.rs
│   │   ├── draw.rs
│   │   ├── export.rs
│   │   ├── lib.rs
│   │   ├── model.rs
│   │   ├── profile.rs
│   │   ├── tensor.rs
│   │   ├── terminal.rs
│   │   └── time.rs
│   └── validate_wires.py
├── linalg/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── README.md
│   ├── arm32/
│   │   ├── armv7neon/
│   │   │   ├── armv7neon_mmm_f32_32x1_core.tmpl
│   │   │   ├── armv7neon_mmm_f32_8x1_core.tmpl
│   │   │   ├── armv7neon_mmm_f32_8x4_core.tmpl
│   │   │   ├── armv7neon_mmm_f32_8x6_core.tmpl
│   │   │   ├── armv7neon_mmm_f32_per_cols.tmpliq
│   │   │   ├── armv7neon_mmm_f32_per_rows.tmpliq
│   │   │   ├── armv7neon_mmm_f32_scalars.tmpliq
│   │   │   ├── armv7neon_mmm_i32_32x1.tmpl
│   │   │   ├── armv7neon_mmm_i32_8x4.tmpl
│   │   │   ├── armv7neon_mmm_i32_per_cols.tmpliq
│   │   │   ├── armv7neon_mmm_i32_per_rows.tmpliq
│   │   │   ├── armv7neon_mmm_i32_scalars.tmpliq
│   │   │   ├── armv7neon_mmm_i32_scale_q8_q15.tmpliq
│   │   │   ├── armv7neon_mmm_q_per_col.tmpliq
│   │   │   ├── armv7neon_mmm_q_per_row.tmpliq
│   │   │   ├── armv7neon_mmm_q_scalar.tmpliq
│   │   │   ├── armv7neon_prefetch.tmpl
│   │   │   ├── armv7neon_sigmoid_f32_4n.tmpl
│   │   │   ├── armv7neon_tanh_f32_4n.tmpl
│   │   │   └── dispatcher.tmpliq
│   │   └── armvfpv2/
│   │       ├── armvfpv2_mmm_f32_4x4.tmpl
│   │       └── dispatcher.tmpliq
│   ├── arm64/
│   │   ├── apple_amx/
│   │   │   ├── apple_amx_mmm_f16_64x1.tmpl
│   │   │   ├── apple_amx_mmm_f16_64x32.tmpl
│   │   │   ├── apple_amx_mmm_f32_32x1.tmpl
│   │   │   ├── apple_amx_mmm_f32_32x32.tmpl
│   │   │   ├── dispatcher.tmpliq
│   │   │   └── instructions.rs
│   │   ├── arm64fp16/
│   │   │   ├── arm64fp16_leaky_relu_f16_8n.tmpl
│   │   │   ├── arm64fp16_mmm_8h_per_col.tmpliq
│   │   │   ├── arm64fp16_mmm_8h_per_row.tmpliq
│   │   │   ├── arm64fp16_mmm_8h_scalar.tmpliq
│   │   │   ├── arm64fp16_mmm_f16_128x1/
│   │   │   │   ├── loop1/
│   │   │   │   │   ├── cortex_a53.tmpli
│   │   │   │   │   └── naive.tmpli
│   │   │   │   └── loop2/
│   │   │   │       └── cortex_a55.tmpli
│   │   │   ├── arm64fp16_mmm_f16_128x1_core.tmpl
│   │   │   ├── arm64fp16_mmm_f16_16x8/
│   │   │   │   ├── loop1/
│   │   │   │   │   └── naive.tmpli
│   │   │   │   └── loop2/
│   │   │   │       └── cortex_a55.tmpli
│   │   │   ├── arm64fp16_mmm_f16_16x8_core.tmpl
│   │   │   ├── arm64fp16_mmm_f16_32x4/
│   │   │   │   ├── loop1/
│   │   │   │   │   └── naive.tmpli
│   │   │   │   └── loop2/
│   │   │   │       └── cortex_a55.tmpli
│   │   │   ├── arm64fp16_mmm_f16_32x4_core.tmpl
│   │   │   ├── arm64fp16_mmm_f16_32x6.core.tmpl
│   │   │   ├── arm64fp16_mmm_f16_64x1.core.tmpl
│   │   │   ├── arm64fp16_mmm_f16_64x3.core.tmpl
│   │   │   ├── arm64fp16_mmm_f16_per_cols.tmpliq
│   │   │   ├── arm64fp16_mmm_f16_per_rows.tmpliq
│   │   │   ├── arm64fp16_mmm_f16_scalars.tmpliq
│   │   │   ├── arm64fp16_mmm_load_tile.tmpliq
│   │   │   ├── arm64fp16_sigmoid_f16_8n.tmpl
│   │   │   ├── arm64fp16_tanh_f16_8n.tmpl
│   │   │   ├── dispatcher.tmpliq
│   │   │   ├── dummy_fmla_no_pragma.S
│   │   │   └── dummy_fmla_pragma.S
│   │   └── arm64simd/
│   │       ├── arm64simd_mmm_4s_per_col.tmpliq
│   │       ├── arm64simd_mmm_4s_per_row.tmpliq
│   │       ├── arm64simd_mmm_4s_scalar.tmpliq
│   │       ├── arm64simd_mmm_f32_12x8/
│   │       │   ├── packed_packed_loop1/
│   │       │   │   ├── ldr_w_no_preload.tmpli
│   │       │   │   ├── ldr_w_preload.tmpli
│   │       │   │   ├── ldr_x_preload.tmpli
│   │       │   │   └── naive.tmpli
│   │       │   └── packed_packed_loop2/
│   │       │       └── cortex_a55.tmpli
│   │       ├── arm64simd_mmm_f32_12x8_core.tmpl
│   │       ├── arm64simd_mmm_f32_16x4/
│   │       │   ├── packed_packed_loop1/
│   │       │   │   ├── cortex_a53.tmpli
│   │       │   │   └── naive.tmpli
│   │       │   └── packed_packed_loop2/
│   │       │       └── cortex_a55.tmpli
│   │       ├── arm64simd_mmm_f32_16x4_core.tmpl
│   │       ├── arm64simd_mmm_f32_24x4/
│   │       │   ├── loop2/
│   │       │   │   └── cortex_a55.tmpli
│   │       │   └── packed_packed_loop1/
│   │       │       ├── cortex_a53.tmpli
│   │       │       ├── cortex_a55.tmpli
│   │       │       └── naive.tmpli
│   │       ├── arm64simd_mmm_f32_24x4_core.tmpl
│   │       ├── arm64simd_mmm_f32_32x1_core.tmpl
│   │       ├── arm64simd_mmm_f32_32x3_core.tmpl
│   │       ├── arm64simd_mmm_f32_64x1/
│   │       │   ├── loop1/
│   │       │   │   ├── cortex_a53.tmpli
│   │       │   │   └── naive.tmpli
│   │       │   └── loop2/
│   │       │       ├── cortex_a55.tmpli
│   │       │       └── naive.tmpli
│   │       ├── arm64simd_mmm_f32_64x1_core.tmpl
│   │       ├── arm64simd_mmm_f32_8x8/
│   │       │   ├── packed_packed_loop1/
│   │       │   │   ├── broken_chains.tmpli
│   │       │   │   ├── ldr_w_no_preload.tmpli
│   │       │   │   ├── ldr_w_preload.tmpli
│   │       │   │   ├── ldr_x_no_preload.tmpli
│   │       │   │   ├── ldr_x_preload.tmpli
│   │       │   │   └── naive.tmpli
│   │       │   └── packed_packed_loop2/
│   │       │       ├── broken_chains.tmpli
│   │       │       └── cortex_a55.tmpli
│   │       ├── arm64simd_mmm_f32_8x8_core.tmpl
│   │       ├── arm64simd_mmm_f32_per_cols.tmpliq
│   │       ├── arm64simd_mmm_f32_per_rows.tmpliq
│   │       ├── arm64simd_mmm_f32_scalars.tmpliq
│   │       ├── arm64simd_mmm_i32_64x1.tmpl
│   │       ├── arm64simd_mmm_i32_8x8.tmpl
│   │       ├── arm64simd_mmm_i32_per_cols.tmpliq
│   │       ├── arm64simd_mmm_i32_per_rows.tmpliq
│   │       ├── arm64simd_mmm_i32_scalars.tmpliq
│   │       ├── arm64simd_mmm_i32_scale_q16_q31.tmpliq
│   │       ├── arm64simd_mmm_load_tile.tmpliq
│   │       ├── arm64simd_sigmoid_f32_4n.tmpl
│   │       ├── arm64simd_tanh_f32_4n.tmpl
│   │       └── dispatcher.tmpliq
│   ├── benches/
│   │   ├── arm32neon.rs
│   │   ├── arm64.rs
│   │   ├── arm64simd.rs
│   │   ├── intel.rs
│   │   ├── leaky_relu.rs
│   │   ├── mat_vec.rs
│   │   ├── mm_for_asr_am.rs
│   │   ├── mm_for_inception.rs
│   │   ├── mm_for_wavenet_hw.rs
│   │   ├── sigmoid.rs
│   │   ├── softmax.rs
│   │   ├── utils.rs
│   │   ├── virtual_im2col.rs
│   │   └── x86_64.rs
│   ├── build.rs
│   ├── cost_model/
│   │   ├── Cargo.toml
│   │   ├── src/
│   │   │   └── main.rs
│   │   └── train/
│   │       ├── README.md
│   │       ├── requirements.txt
│   │       ├── runme.sh
│   │       └── train.py
│   ├── matmul-bench/
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   └── matmul.rs
│   │   ├── build.rs
│   │   ├── c/
│   │   │   ├── packed_tile_4x4.c
│   │   │   ├── packed_tile_8x8.c
│   │   │   ├── tile_1x1.c
│   │   │   ├── tile_2x2.c
│   │   │   ├── tile_4x4.c
│   │   │   └── tile_8x8.c
│   │   └── src/
│   │       └── lib.rs
│   ├── src/
│   │   ├── arm32/
│   │   │   ├── armv7neon.rs
│   │   │   ├── armvfpv2.rs
│   │   │   ├── cortex_a7.rs
│   │   │   ├── cortex_a7.txt
│   │   │   ├── cortex_a9.rs
│   │   │   └── cortex_a9.txt
│   │   ├── arm32.rs
│   │   ├── arm64/
│   │   │   ├── apple_amx.rs
│   │   │   ├── arm64fp16/
│   │   │   │   ├── by_scalar.rs
│   │   │   │   ├── leaky_relu.rs
│   │   │   │   ├── max.rs
│   │   │   │   ├── panel_extract.rs
│   │   │   │   ├── sum.rs
│   │   │   │   └── unicast.rs
│   │   │   ├── arm64fp16.rs
│   │   │   ├── arm64simd/
│   │   │   │   ├── by_scalar.rs
│   │   │   │   ├── leaky_relu.rs
│   │   │   │   ├── max.rs
│   │   │   │   ├── panel_extract.rs
│   │   │   │   ├── softmax.rs
│   │   │   │   ├── sum.rs
│   │   │   │   └── unicast.rs
│   │   │   ├── arm64simd.rs
│   │   │   ├── cortex_a53.rs
│   │   │   ├── cortex_a55.rs
│   │   │   ├── cortex_a72.rs
│   │   │   └── cortex_a73.rs
│   │   ├── arm64.rs
│   │   ├── frame/
│   │   │   ├── block_quant/
│   │   │   │   ├── helpers.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── q4_0.rs
│   │   │   │   ├── q8_1.rs
│   │   │   │   ├── storage.rs
│   │   │   │   └── value.rs
│   │   │   ├── by_scalar.rs
│   │   │   ├── element_wise.rs
│   │   │   ├── element_wise_helper.rs
│   │   │   ├── leaky_relu.rs
│   │   │   ├── lut.rs
│   │   │   ├── mmm/
│   │   │   │   ├── cost_model.rs
│   │   │   │   ├── fuse.rs
│   │   │   │   ├── input_store.rs
│   │   │   │   ├── kernel.rs
│   │   │   │   ├── macros.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── panel_extract.rs
│   │   │   │   ├── scratch.rs
│   │   │   │   ├── storage.rs
│   │   │   │   └── tests/
│   │   │   │       ├── frame.rs
│   │   │   │       ├── fuse.rs
│   │   │   │       ├── mod.rs
│   │   │   │       ├── packed_packed.rs
│   │   │   │       ├── q_scale.rs
│   │   │   │       └── store.rs
│   │   │   ├── mod.rs
│   │   │   ├── pack.rs
│   │   │   ├── reduce/
│   │   │   │   ├── max.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── softmax.rs
│   │   │   │   └── sum.rs
│   │   │   ├── sigmoid.rs
│   │   │   ├── tanh.rs
│   │   │   ├── unicast.rs
│   │   │   └── weights.rs
│   │   ├── generic/
│   │   │   ├── by_scalar.rs
│   │   │   ├── erf.rs
│   │   │   ├── leaky_relu.rs
│   │   │   ├── lut.rs
│   │   │   ├── mmm.rs
│   │   │   ├── reduce.rs
│   │   │   ├── rounding.rs
│   │   │   ├── sigmoid.rs
│   │   │   ├── tanh.rs
│   │   │   └── unicast.rs
│   │   ├── generic.rs
│   │   ├── hwbench/
│   │   │   ├── bandwidth.rs
│   │   │   ├── mod.rs
│   │   │   └── runner.rs
│   │   ├── lib.rs
│   │   ├── multithread.rs
│   │   ├── wasm.rs
│   │   ├── x86_64_fma/
│   │   │   ├── by_scalar.rs
│   │   │   ├── intel.rs
│   │   │   ├── max.rs
│   │   │   ├── mmm.rs
│   │   │   ├── panel_extract.rs
│   │   │   └── softmax.rs
│   │   └── x86_64_fma.rs
│   ├── tests/
│   │   └── virtual_im2col.rs
│   └── x86_64/
│       ├── avx512/
│       │   ├── 10x1/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 1x1/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512.tmpli
│       │   │       ├── unroll-16.tmpli
│       │   │       ├── unroll-4.tmpli
│       │   │       ├── unroll-8.tmpli
│       │   │       └── unroll.tmpli
│       │   ├── 1x12/
│       │   │   └── packed_packed_loop1/
│       │   │       └── avx-512.tmpli
│       │   ├── 2x5/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 2x6/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 3x4/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 4x3/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 5x2/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 6x1/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 6x2/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 7x1/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 8x1/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── 8x2/
│       │   │   └── packed_packed_loop1/
│       │   │       └── avx-512.tmpli
│       │   ├── 8x8/
│       │   │   └── packed_packed_loop1/
│       │   │       ├── avx-512-unroll.tmpli
│       │   │       └── avx-512.tmpli
│       │   ├── avx512_mmm_f32_128x1.tmpl
│       │   ├── avx512_mmm_f32_16x1.tmpl
│       │   ├── avx512_mmm_f32_16x12.tmpl
│       │   ├── avx512_mmm_f32_16x8.tmpl
│       │   ├── avx512_mmm_f32_32x5.tmpl
│       │   ├── avx512_mmm_f32_32x6.tmpl
│       │   ├── avx512_mmm_f32_48x4.tmpl
│       │   ├── avx512_mmm_f32_64x3.tmpl
│       │   ├── avx512_mmm_f32_80x2.tmpl
│       │   ├── avx512_mmm_load_tile.tmpliq
│       │   ├── dispatcher.tmpliq
│       │   ├── f32_per_cols.tmpliq
│       │   ├── f32_per_rows.tmpliq
│       │   ├── f32_scalars.tmpliq
│       │   ├── i32_per_cols.tmpliq
│       │   ├── i32_per_rows.tmpliq
│       │   ├── i32_scalars.tmpliq
│       │   ├── postamble.tmpliq
│       │   ├── preamble.tmpliq
│       │   ├── sigmoid_f32.tmpl
│       │   ├── tanh_f32.tmpl
│       │   ├── zmm_per_col.tmpliq
│       │   ├── zmm_per_row.tmpliq
│       │   └── zmm_scalar.tmpliq
│       └── fma/
│           ├── 10x1/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 2x5/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 2x6/
│           │   └── packed_packed_loop1/
│           │       ├── original-unroll.tmpli
│           │       └── original.tmpli
│           ├── 3x4/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 4x3/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 5x2/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 6x1/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 6x2/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 7x1/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 8x1/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── 8x8/
│           │   └── packed_packed_loop1/
│           │       ├── avx-unroll.tmpli
│           │       └── avx.tmpli
│           ├── avx2_mmm_i32_8x8.tmpl
│           ├── dispatcher.tmpliq
│           ├── fma_mmm_f32_16x5.tmpl
│           ├── fma_mmm_f32_16x6.tmpl
│           ├── fma_mmm_f32_24x4.tmpl
│           ├── fma_mmm_f32_32x1.tmpl
│           ├── fma_mmm_f32_32x3.tmpl
│           ├── fma_mmm_f32_40x2.tmpl
│           ├── fma_mmm_f32_64x1.tmpl
│           ├── fma_mmm_f32_8x8.tmpl
│           ├── fma_mmm_f32_per_cols.tmpliq
│           ├── fma_mmm_f32_per_rows.tmpliq
│           ├── fma_mmm_f32_scalars.tmpliq
│           ├── fma_mmm_i32_per_cols.tmpliq
│           ├── fma_mmm_i32_per_rows.tmpliq
│           ├── fma_mmm_i32_scalars.tmpliq
│           ├── fma_mmm_load_tile.tmpliq
│           ├── fma_mmm_ymm_per_col.tmpliq
│           ├── fma_mmm_ymm_per_row.tmpliq
│           ├── fma_mmm_ymm_scalar.tmpliq
│           ├── fma_sigmoid_f32.tmpl
│           ├── fma_tanh_f32.tmpl
│           ├── postamble.tmpliq
│           └── preamble.tmpliq
├── metal/
│   ├── Cargo.toml
│   ├── README.md
│   ├── benches/
│   │   └── metal_gemm.rs
│   └── src/
│       ├── command_buffer.rs
│       ├── context.rs
│       ├── encoder.rs
│       ├── func_constants.rs
│       ├── kernels/
│       │   ├── array/
│       │   │   ├── array_ops.metal
│       │   │   ├── cast.rs
│       │   │   ├── copy.rs
│       │   │   ├── dispatch.rs
│       │   │   ├── mod.rs
│       │   │   └── rotate_half.rs
│       │   ├── bin_ops.metal
│       │   ├── bin_ops.rs
│       │   ├── conv.metal
│       │   ├── conv.rs
│       │   ├── element_wise.metal
│       │   ├── element_wise.rs
│       │   ├── matmul/
│       │   │   ├── basic/
│       │   │   │   ├── basic_mat_mul.metal
│       │   │   │   └── mod.rs
│       │   │   ├── ggml_gemm/
│       │   │   │   ├── README.md
│       │   │   │   ├── ggml_mm_mv.metal
│       │   │   │   └── mod.rs
│       │   │   ├── mfa/
│       │   │   │   ├── libMetalFlashAttention-ios.metallib
│       │   │   │   ├── libMetalFlashAttention-macos.metallib
│       │   │   │   └── mod.rs
│       │   │   ├── mlx_gemm/
│       │   │   │   ├── mlx_gemm.metal
│       │   │   │   ├── mlx_gemv.metal
│       │   │   │   └── mod.rs
│       │   │   └── mod.rs
│       │   ├── mod.rs
│       │   ├── nn/
│       │   │   ├── apply_rope.rs
│       │   │   ├── gelu_approximate.rs
│       │   │   ├── leaky_relu.rs
│       │   │   ├── mod.rs
│       │   │   ├── nn_ops.metal
│       │   │   ├── reduce.rs
│       │   │   ├── rms_norm.rs
│       │   │   ├── scaled_masked_softmax.rs
│       │   │   ├── silu.rs
│       │   │   └── softmax.rs
│       │   └── utils.rs
│       ├── lib.rs
│       ├── ops/
│       │   ├── conv.rs
│       │   ├── fused_axis_op.rs
│       │   ├── gemm.rs
│       │   └── mod.rs
│       ├── rewrite_rules/
│       │   ├── add_matmul_broadcast.rs
│       │   ├── fuse_axis_op.rs
│       │   ├── mod.rs
│       │   └── untranspose_matmul_output.rs
│       ├── tensor.rs
│       ├── tests.rs
│       ├── transform.rs
│       └── utils.rs
├── nnef/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── cli/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── nnef-resources/
│   │   ├── Cargo.toml
│   │   ├── src/
│   │   │   ├── json_loader.rs
│   │   │   └── lib.rs
│   │   └── tests/
│   │       ├── nnef_with_json/
│   │       │   ├── graph.nnef
│   │       │   └── src_config.json
│   │       └── test_json_resource.rs
│   ├── src/
│   │   ├── ast/
│   │   │   ├── dump.rs
│   │   │   ├── dump_doc.rs
│   │   │   ├── parse.rs
│   │   │   └── quant.rs
│   │   ├── ast.rs
│   │   ├── deser.rs
│   │   ├── framework.rs
│   │   ├── lib.rs
│   │   ├── liquid.rs
│   │   ├── ops/
│   │   │   ├── core/
│   │   │   │   ├── broadcast.rs
│   │   │   │   ├── cast.rs
│   │   │   │   ├── complex.rs
│   │   │   │   ├── downsample.rs
│   │   │   │   ├── dyn_slice.rs
│   │   │   │   ├── einsum.rs
│   │   │   │   ├── fft.rs
│   │   │   │   ├── gather.rs
│   │   │   │   ├── gelu_approximate.rs
│   │   │   │   ├── is_inf.rs
│   │   │   │   ├── matmul.rs
│   │   │   │   ├── one_hot.rs
│   │   │   │   ├── qconv.rs
│   │   │   │   ├── qmatmul.rs
│   │   │   │   ├── range.rs
│   │   │   │   ├── reduce.rs
│   │   │   │   ├── rms_norm.rs
│   │   │   │   ├── scan.rs
│   │   │   │   ├── scatter.rs
│   │   │   │   ├── shape_of.rs
│   │   │   │   ├── silu.rs
│   │   │   │   ├── softmax.rs
│   │   │   │   ├── source.rs
│   │   │   │   ├── submodel.rs
│   │   │   │   ├── topk.rs
│   │   │   │   └── trilu.rs
│   │   │   ├── core.rs
│   │   │   ├── mod.rs
│   │   │   ├── nnef/
│   │   │   │   ├── deser.rs
│   │   │   │   ├── mod.rs
│   │   │   │   └── ser.rs
│   │   │   └── resource.rs
│   │   ├── registry.rs
│   │   ├── resource.rs
│   │   ├── ser.rs
│   │   ├── tensors.rs
│   │   └── transform.rs
│   ├── stdlib.nnef
│   └── tests/
│       ├── alexnet.nnef
│       └── parse.rs
├── onnx/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── benches/
│   │   ├── linear_classifier.rs
│   │   └── linear_regressor.rs
│   ├── build-proto.rs
│   ├── protos/
│   │   └── onnx/
│   │       ├── onnx-operators.proto3
│   │       ├── onnx.proto
│   │       └── onnx.proto3
│   ├── src/
│   │   ├── data_resolver.rs
│   │   ├── lib.rs
│   │   ├── model.rs
│   │   ├── ops/
│   │   │   ├── array/
│   │   │   │   ├── compress.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── nonzero.rs
│   │   │   │   ├── one_hot.rs
│   │   │   │   ├── pad.rs
│   │   │   │   ├── shape.rs
│   │   │   │   ├── slice.rs
│   │   │   │   ├── split.rs
│   │   │   │   ├── squeeze.rs
│   │   │   │   ├── topk.rs
│   │   │   │   ├── trilu.rs
│   │   │   │   └── unsqueeze.rs
│   │   │   ├── cast.rs
│   │   │   ├── cumsum.rs
│   │   │   ├── d2s.rs
│   │   │   ├── einsum.rs
│   │   │   ├── fft.rs
│   │   │   ├── grid_sample.rs
│   │   │   ├── logic.rs
│   │   │   ├── math/
│   │   │   │   ├── clip.rs
│   │   │   │   ├── gemm.rs
│   │   │   │   ├── mat_mul_integer.rs
│   │   │   │   ├── pow.rs
│   │   │   │   └── rem.rs
│   │   │   ├── math.rs
│   │   │   ├── ml/
│   │   │   │   ├── category_mapper.rs
│   │   │   │   ├── linear_classifier.rs
│   │   │   │   ├── linear_regressor.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── normalizer.rs
│   │   │   │   └── tree_ensemble_classifier.rs
│   │   │   ├── mod.rs
│   │   │   ├── multinomial.rs
│   │   │   ├── nn/
│   │   │   │   ├── batch_norm.rs
│   │   │   │   ├── conv_transpose.rs
│   │   │   │   ├── dropout.rs
│   │   │   │   ├── instance_norm.rs
│   │   │   │   ├── layer_norm.rs
│   │   │   │   ├── lrn.rs
│   │   │   │   ├── mod.rs
│   │   │   │   └── reduce.rs
│   │   │   ├── non_max_suppression.rs
│   │   │   ├── quant.rs
│   │   │   ├── random.rs
│   │   │   ├── rec/
│   │   │   │   ├── common.rs
│   │   │   │   ├── gru.rs
│   │   │   │   ├── lstm.rs
│   │   │   │   ├── rnn.rs
│   │   │   │   └── scan.rs
│   │   │   ├── rec.rs
│   │   │   ├── resize.rs
│   │   │   └── s2d.rs
│   │   ├── pb_helpers.rs
│   │   ├── prost/
│   │   │   └── onnx.rs
│   │   └── tensor.rs
│   └── test_cases/
│       ├── byte_sb_bidi_lstm/
│       │   ├── README.md
│       │   ├── generate_io.py
│       │   ├── io.npz
│       │   └── model.onnx
│       ├── deconv_group/
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   └── vars.sh
│       ├── lgbm_classifier_tensor/
│       │   ├── generate_io.py
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   └── vars.sh
│       ├── lgbm_regressor_tensor/
│       │   ├── generate_io.py
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   └── vars.sh
│       ├── linear_classifier/
│       │   └── model.onnx
│       ├── linear_regressor/
│       │   └── model.onnx
│       ├── qlstm_3-2-3_T3_S1/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qrelu_1/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qrelu_2/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qsigmoid_1/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qsigmoid_2/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qtanh_1/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qtanh_2/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── qtdnn_10x5_101_i32_biases/
│       │   ├── final.mdl
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   ├── model.raw
│       │   └── vars.sh
│       ├── run_all.sh
│       ├── tinyyolov2/
│       │   ├── io.npz
│       │   └── vars.sh
│       ├── transformer-mlm/
│       │   ├── generate_io.py
│       │   ├── io.npz
│       │   └── vars.sh
│       ├── xgboost_classifier_tree/
│       │   ├── generate_io.py
│       │   ├── io.npz
│       │   ├── model.onnx
│       │   └── vars.sh
│       └── xgboost_regressor_tree/
│           ├── generate_io.py
│           ├── io.npz
│           ├── model.onnx
│           └── vars.sh
├── onnx-opl/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   └── src/
│       ├── grid_sample.rs
│       ├── lib.rs
│       ├── lrn.rs
│       ├── ml/
│       │   ├── category_mapper.rs
│       │   ├── mod.rs
│       │   ├── tree.rs
│       │   └── tree_ensemble_classifier.rs
│       ├── multinomial.rs
│       ├── non_max_suppression.rs
│       ├── random.rs
│       └── resize.rs
├── post-release.sh
├── pulse/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   └── src/
│       ├── fact.rs
│       ├── lib.rs
│       ├── macros.rs
│       ├── model.rs
│       └── ops/
│           ├── array/
│           │   ├── broadcast.rs
│           │   ├── concat.rs
│           │   ├── mask.rs
│           │   ├── mod.rs
│           │   ├── pad.rs
│           │   └── slice.rs
│           ├── cnn/
│           │   ├── conv.rs
│           │   ├── deconv.rs
│           │   ├── mod.rs
│           │   └── pools.rs
│           ├── delay.rs
│           ├── downsample.rs
│           ├── dummy.rs
│           ├── fft.rs
│           ├── identity.rs
│           ├── mask.rs
│           ├── mod.rs
│           ├── scan.rs
│           ├── slice.rs
│           └── source.rs
├── pulse-opl/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   └── src/
│       ├── concat.rs
│       ├── deconv_delay.rs
│       ├── delay.rs
│       ├── lib.rs
│       ├── mask.rs
│       ├── pad.rs
│       └── slice.rs
├── release.sh
├── rustfmt.toml
├── tensorflow/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── benches/
│   │   └── hey_snips_3.pb
│   ├── build-proto.rs
│   ├── examples/
│   │   └── plus3.rs
│   ├── protos/
│   │   └── tensorflow/
│   │       └── core/
│   │           ├── framework/
│   │           │   ├── attr_value.proto
│   │           │   ├── function.proto
│   │           │   ├── graph.proto
│   │           │   ├── node_def.proto
│   │           │   ├── op_def.proto
│   │           │   ├── resource_handle.proto
│   │           │   ├── tensor.proto
│   │           │   ├── tensor_shape.proto
│   │           │   ├── types.proto
│   │           │   ├── variable.proto
│   │           │   └── versions.proto
│   │           └── protobuf/
│   │               ├── meta_graph.proto
│   │               ├── saved_model.proto
│   │               ├── saved_object_graph.proto
│   │               ├── saver.proto
│   │               ├── struct.proto
│   │               └── trackable_object_graph.proto
│   ├── src/
│   │   ├── conform/
│   │   │   ├── mod.rs
│   │   │   └── tf.rs
│   │   ├── lib.rs
│   │   ├── model.rs
│   │   ├── ops/
│   │   │   ├── array/
│   │   │   │   ├── concatv2.rs
│   │   │   │   ├── expand_dims.rs
│   │   │   │   ├── fill.rs
│   │   │   │   ├── gather_nd.rs
│   │   │   │   ├── gather_v2.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── pack.rs
│   │   │   │   ├── pad.rs
│   │   │   │   ├── squeeze.rs
│   │   │   │   └── transpose.rs
│   │   │   ├── control_flow.rs
│   │   │   ├── logic.rs
│   │   │   ├── math/
│   │   │   │   └── reduce.rs
│   │   │   ├── math.rs
│   │   │   ├── mod.rs
│   │   │   ├── nn/
│   │   │   │   ├── conv2d.rs
│   │   │   │   ├── dw_conv2d.rs
│   │   │   │   ├── fused_batch_norm.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── pools.rs
│   │   │   │   └── s2b/
│   │   │   │       ├── mod.rs
│   │   │   │       ├── raw.rs
│   │   │   │       └── unary.rs
│   │   │   ├── quant.rs
│   │   │   ├── random/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── philox.rs
│   │   │   │   └── random_uniform.rs
│   │   │   └── rec/
│   │   │       ├── block_lstm.rs
│   │   │       └── mod.rs
│   │   ├── prost/
│   │   │   ├── google.protobuf.rs
│   │   │   └── tensorflow.rs
│   │   ├── tensor.rs
│   │   └── tfpb.rs
│   └── tests/
│       ├── models/
│       │   └── plus3.pb
│       ├── ops_array_pack.rs
│       ├── ops_array_strided_slice.proptest-regressions
│       ├── ops_array_strided_slice.rs
│       ├── ops_fake_quant_with_min_max_vars.rs
│       ├── ops_nn_conv2d.proptest-regressions
│       ├── ops_nn_conv2d.rs
│       ├── ops_nn_dwconv2d.proptest-regressions
│       ├── ops_nn_dwconv2d.rs
│       ├── ops_nn_pools.proptest-regressions
│       ├── ops_nn_pools.rs
│       ├── ops_nn_space_to_batch.proptest-regressions
│       ├── ops_nn_space_to_batch.rs
│       ├── ops_random_uniform.rs
│       └── utils/
│           └── mod.rs
├── test-rt/
│   ├── infra/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── lib.rs
│   ├── suite-onnx/
│   │   ├── Cargo.toml
│   │   ├── node.txt
│   │   ├── pytorch-converted.txt
│   │   ├── pytorch-operator.txt
│   │   ├── simple.txt
│   │   └── src/
│   │       └── lib.rs
│   ├── suite-unit/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       ├── apply_rope.rs
│   │       ├── bin_einsum.rs
│   │       ├── binary.rs
│   │       ├── conv_f16.rs
│   │       ├── conv_f32.rs
│   │       ├── conv_q.rs
│   │       ├── deconv.rs
│   │       ├── downsample.rs
│   │       ├── elmwise.rs
│   │       ├── gelu_approximate.rs
│   │       ├── lib.rs
│   │       ├── matmul_q40.rs
│   │       ├── q_binary.rs
│   │       ├── q_elmwise.rs
│   │       ├── q_flavours.rs
│   │       ├── q_helpers.rs
│   │       ├── rms_norm.rs
│   │       ├── scaled_masked_softmax.rs
│   │       ├── sdpa.rs
│   │       ├── silu.rs
│   │       └── slice.rs
│   ├── test-blas/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── src/
│   │   │   └── lib.rs
│   │   └── suite.rs
│   ├── test-cuda/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── src/
│   │   │   └── lib.rs
│   │   └── suite.rs
│   ├── test-f16/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── src/
│   │   │   └── lib.rs
│   │   └── suite.rs
│   ├── test-metal/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── ggml_suite.rs
│   │   ├── src/
│   │   │   └── lib.rs
│   │   └── suite.rs
│   ├── test-nnef-cycle/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── src/
│   │   │   └── lib.rs
│   │   └── suite.rs
│   ├── test-onnx-core/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── debug-utils/
│   │   │   ├── Cargo.toml
│   │   │   ├── README.md
│   │   │   ├── save_all.py
│   │   │   └── src/
│   │   │       └── main.rs
│   │   ├── include-passing-ignored.sh
│   │   └── src/
│   │       ├── bin/
│   │       │   └── reset-test-list.rs
│   │       └── lib.rs
│   ├── test-tflite/
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   ├── src/
│   │   │   ├── lib.rs
│   │   │   └── tflite_runtime.rs
│   │   └── suite.rs
│   └── test-unit-core/
│       ├── Cargo.toml
│       ├── build.rs
│       └── src/
│           ├── lib.rs
│           └── main.rs
├── test-suite.sh
├── tflite/
│   ├── Cargo.toml
│   ├── Readme.md
│   ├── schema/
│   │   └── tflite.fbs
│   └── src/
│       ├── lib.rs
│       ├── model.rs
│       ├── ops/
│       │   ├── array.rs
│       │   ├── cnn.rs
│       │   ├── element_wise.rs
│       │   ├── math.rs
│       │   ├── mod.rs
│       │   └── nn.rs
│       ├── registry.rs
│       ├── rewriter.rs
│       ├── ser.rs
│       ├── tensors.rs
│       └── tflite_generated.rs
├── transformers/
│   ├── Cargo.toml
│   └── src/
│       ├── lib.rs
│       ├── ops/
│       │   ├── apply_rope.rs
│       │   ├── dyn_kv_cache.rs
│       │   ├── flash_sdpa.rs
│       │   ├── mod.rs
│       │   ├── scaled_masked_softmax.rs
│       │   ├── sdpa.rs
│       │   └── streamed_sdpa.rs
│       └── rewriter.rs
└── yank.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .all_crates.sh
================================================

ALL_CRATES_PATH="data linalg core nnef nnef/nnef-resources pulse-opl pulse extra transformers hir tflite tensorflow onnx-opl onnx gpu metal cuda libcli api api/rs api/ffi api/proxy/sys api/proxy cli"


================================================
FILE: .change_crate_dep.sh
================================================
#!/bin/bash

crate=$1
version=$2

perl -pi -e "s/^($crate = {.*version *= *)\"([^\"]*)\"(.*)$/\$1\"=$version\"\$3/" \
    `find . -name Cargo.toml \! -path "./target/*" \! -path "./issue*"`


================================================
FILE: .clang-format
================================================
BasedOnStyle: LLVM
IndentWidth: 4
TabWidth: 4
UseTab: Never
IndentPPDirectives: BeforeHash
PPIndentWidth: 4
ColumnLimit: 100

# OneLineFormatOffRegex: '^\s*#\s*pragma\s+unroll\b'


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
    groups:
      actions:
        patterns:
          - "*"

  - package-ecosystem: "cargo"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
    open-pull-requests-limit: 10
    groups:
      rust-dependencies:
        patterns:
          - "*"

  - package-ecosystem: "pip"
    directory: "/api/py"
    ignore:
      # Only update them manually since updating them might break compatibility
      - dependency-name: "numpy"
    schedule:
      interval: "weekly"
      day: "monday"


================================================
FILE: .github/workflows/asan.yml
================================================
name: Sanitized build tests

on:
  workflow_dispatch:
  schedule:
    - cron:  '0 5 * * MON'

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  sanitizer-address:
    strategy:
      fail-fast: false
      matrix:
        os: [ ubuntu-latest, macOS-latest ]

    runs-on: ${{matrix.os}}

    steps:
    - uses: actions/checkout@v6
    - name: Rustup update
      run: rustup update
    - name: Run sanitized tests
      run: .travis/asan.sh


================================================
FILE: .github/workflows/binaries.yml
================================================
on:
  release:
    types:
      - created

name: Upload Release Binaries

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  assets:
    name: Upload Release Binaries
    strategy:
      fail-fast: false
      matrix:
        os: [ ubuntu-latest, macOS-latest ]
        arch: [ x86_64, aarch64, armv7 ]
        include:
          - os: ubuntu-latest
            arch: x86_64
            target: x86_64-unknown-linux-musl
            musl: x86_64-linux-musl
          - os: ubuntu-latest
            arch: aarch64
            target: aarch64-unknown-linux-musl
            musl: aarch64-linux-musl
          - os: ubuntu-latest
            arch: armv7
            target: armv7-unknown-linux-musleabihf
            musl: armv7l-linux-musleabihf
          - os: macOS-latest
            arch: x86_64
            target: x86_64-apple-darwin
          - os: macOS-latest
            arch: aarch64
            target: aarch64-apple-darwin
        exclude:
          - os: macOS-latest
            arch: armv7

    runs-on: ${{ matrix.os }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Extract version tag
        id: version
        run: echo value=$(echo ${{ github.ref }} | cut -f 3 -d / | sed 's/^v//' ) >> $GITHUB_OUTPUT

      - name: Build tract
        run: |
          set -ex
          target=${{matrix.target}}
          version=${{steps.version.outputs.value}}
          name=${target}-${version}

          rustup update
          rustup target add ${target}

          if [ -n "${{matrix.musl}}" ]
          then
            MUSL_TRIPLE=${{matrix.musl}}
            curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/${MUSL_TRIPLE}-cross.tgz | tar zx

            MUSL_BIN=`pwd`/${MUSL_TRIPLE}-cross/bin
            export PATH=$MUSL_BIN:$PATH

            export TARGET_CC=$MUSL_BIN/${MUSL_TRIPLE}-gcc

            RUST_TRIPLE_ENV=$(echo ${target} | tr 'a-z-' 'A-Z_')
            export CARGO_TARGET_${RUST_TRIPLE_ENV}_CC=$TARGET_CC
            export CARGO_TARGET_${RUST_TRIPLE_ENV}_LINKER=$TARGET_CC
          fi

          cargo build --target ${target} --release -p tract-cli
          mkdir tract-$name
          cp target/${target}/release/tract tract-${name}
          tar czf tract-${name}.tgz tract-${name}

      - name: Upload asset
        uses: softprops/action-gh-release@v2
        with:
          files: tract-${{matrix.target}}-${{ steps.version.outputs.value }}.tgz
          name: ${{ steps.version.outputs.value }}
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/cost_model.yml
================================================

on:
  workflow_dispatch:
    inputs:
      dataset_id:
        description: 'dataset identifier'
        required: true

name: Generate cost model analysis dataset

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  build:
    name: Upload cost model tasks
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        target: [ "aarch64", "armv7" ]
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Build and upload
        run: ./.travis/cost_model_task_build.sh ${{matrix.target}} ${{github.event.inputs.dataset_id}}
        env:
          AWS_ACCESS_KEY_ID: ${{secrets.TRACT_CI_AWS_ACCESS_KEY_ID}}
          AWS_SECRET_ACCESS_KEY: ${{secrets.TRACT_CI_AWS_SECRET_ACCESS_KEY}}
          AWS_EC2_METADATA_DISABLED: true


================================================
FILE: .github/workflows/crates.yml
================================================
name: Rust crates

on:
  pull_request:
  schedule:
    - cron:  '0 3 * * *'
  workflow_dispatch:

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  prepare-matrix:
    runs-on: ubuntu-latest
    outputs:
      os: ${{steps.set-matrix.outputs.os}}
      rust: ${{steps.set-matrix.outputs.rust}}

    steps:
      - id: set-matrix
        env:
          FULL: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }}
        run: |
          if [ "$FULL" == "true" ]
          then
            echo 'os=["ubuntu-latest", "macos-latest"]' >> $GITHUB_OUTPUT
            echo 'rust=["1.91.0", "stable", "beta", "nightly"]' >> $GITHUB_OUTPUT
          else
            echo ::notice::Skipping macOS checks on PR and commit. Dispatch workflow manually if needed.
            echo 'os=["ubuntu-latest"]' >> $GITHUB_OUTPUT
            echo 'rust=["1.91.0"]' >> $GITHUB_OUTPUT
          fi

  crates:
    name: ${{matrix.os}} / ${{matrix.crate}} / ${{matrix.rust}}
    needs: prepare-matrix
    strategy:
      matrix:
        os: ${{fromJson(needs.prepare-matrix.outputs.os)}}
        rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}}
        crate: [  tract-data, tract-linalg, tract-core, tract-nnef, tract-hir, tract-onnx,
                  tract-pulse, tract-onnx-opl, tract-pulse-opl, tract,
                  test-unit-core, test-onnx-core, test-nnef-cycle, test-f16,
               ]
      fail-fast: false

    runs-on: ${{matrix.os}}
    env:
      RUSTUP_TOOLCHAIN: ${{matrix.rust}}

    steps:
    - uses: actions/checkout@v6

    - name: Cargo test
      run: cargo test -p ${{matrix.crate}}

  cuda:
    runs-on: cuda-lovelace
    needs: prepare-matrix
    strategy:
      matrix:
        rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}}
      fail-fast: false
    env:
      RUSTUP_TOOLCHAIN: ${{matrix.rust}}
    steps:
    - uses: actions/checkout@v6

    - name: Cargo test
      run: cargo test -p tract-cuda -p test-cuda

  metal:
    runs-on: macOS
    needs: prepare-matrix
    strategy:
      matrix:
        rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}}
      fail-fast: false
    env:
      RUSTUP_TOOLCHAIN: ${{matrix.rust}}
    steps:
    - uses: actions/checkout@v6

    - name: Cargo test
      run: cargo test -p tract-metal -p test-metal
   
  pedantic:
    name: fmt, clippy, etc (${{matrix.os}} / ${{matrix.rust}})
    needs: prepare-matrix
    strategy:
      matrix:
        os: ${{fromJson(needs.prepare-matrix.outputs.os)}}
        rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}}
      fail-fast: false
    runs-on: ${{matrix.os}}
    env:
      RUSTUP_TOOLCHAIN: ${{matrix.rust}}
    steps:
    - uses: actions/checkout@v6
    - run: rustup component add clippy && cargo clippy
    - name: fmt
      run: rustup component add rustfmt && cargo fmt --check
    - name: Warnings
      env:
        RUSTFLAGS: -D warnings
      run: cargo check

  cargo-deny:
    strategy:
      fail-fast: false

    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v6
    - name: Install cargo-deny
      run: |
        curl -L https://github.com/EmbarkStudios/cargo-deny/releases/download/$VERSION/cargo-deny-$VERSION-x86_64-unknown-linux-musl.tar.gz \
            | tar -zx --strip-components=1 "cargo-deny-$VERSION-x86_64-unknown-linux-musl/cargo-deny"
      env:
          VERSION: 0.18.9
    - name: Run cargo-deny
      run: .travis/cargo-deny-check.sh


================================================
FILE: .github/workflows/cross-platform.yml
================================================
name: Embedded targets

on:
  pull_request:
  schedule:
    - cron:  '0 5 * * *'
  workflow_dispatch:

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true
  RUSTUP_TOOLCHAIN: 1.91.0

jobs:
  linux:
    strategy:
      fail-fast: false
      matrix:
        platform:
          - raspbian
          - aarch64-unknown-linux-gnu
          - aarch64-unknown-linux-gnu-stretch
          - armv6vfp-unknown-linux-gnueabihf
          - armv7-unknown-linux-gnueabihf
          - armv7-unknown-linux-gnueabihf-stretch
          - aarch64-unknown-linux-musl
          - cortexa53-unknown-linux-musl
          - armv7-unknown-linux-musl
          - aarch64-linux-android
          - armv7-linux-androideabi
          - i686-linux-android
          - x86_64-linux-android
          - x86_64-unknown-linux-gnu-stretch
          - wasm32-unknown-unknown
          - wasm32-wasi

    runs-on: ubuntu-latest
    permissions:
      id-token: write
      contents: read

    steps:
    - uses: actions/checkout@v6

    - name: Get current date
      id: date
      run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

    - name: Configure AWS Credentials
      continue-on-error: true
      uses: aws-actions/configure-aws-credentials@v6
      with:
        role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci
        aws-region: us-east-2

    - uses: actions/cache@v5
      with:
        path: |
          ~/.rustup
          ~/.cargo/registry
          ~/.cargo/git
          #          ~/.cache/sccache
          .cached
          target
        key: ${{ runner.os }}-${{matrix.platform}}-${{steps.date.outputs.date}}

    - name: Setup wasmtime
      if: ${{ matrix.platform }} == "wasm32-wasi"
      uses: bytecodealliance/actions/wasmtime/setup@v1

    - name: Cross script
      env:
        PLATFORM: ${{matrix.platform}}
        AWS_EC2_METADATA_DISABLED: true
      run: .travis/cross.sh

  apple:
    strategy:
      fail-fast: false
      matrix:
        platform:
          - aarch64-apple-ios
          - aarch64-apple-darwin

    runs-on: macos-latest
    permissions:
      id-token: write
      contents: read

    steps:
    - uses: actions/checkout@v6

    - name: Configure AWS Credentials
      continue-on-error: true
      uses: aws-actions/configure-aws-credentials@v6
      with:
        role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci
        aws-region: us-east-2

    - name: Get current date
      id: date
      run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

    - name: Cross script
      env:
        PLATFORM: ${{matrix.platform}}
      run: .travis/cross.sh


================================================
FILE: .github/workflows/examples.yml
================================================
name: Examples

on:
  schedule:
    - cron:  '0 3 * * *'
  workflow_dispatch:

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true
  RUSTUP_TOOLCHAIN: 1.91.0

jobs:
  examples:
    runs-on: ubuntu-latest
    outputs:
      examples: ${{steps.set-matrix.outputs.examples}}

    steps:
      - uses: actions/checkout@v6
      - id: set-matrix
        run: |
          echo examples=`find examples -name ci.sh | cut -d/ -f 2 | jq -Rsc '. / "\n" - [""]'` >> "$GITHUB_OUTPUT"

  example:
    name: ${{ matrix.ex }}
    runs-on: ubuntu-latest
    needs: examples
    strategy:
      fail-fast: false
      matrix:
        ex: ${{fromJSON(needs.examples.outputs.examples)}}

    steps:
    - uses: actions/checkout@v6

    - name: Configure AWS Credentials
      # if: github.repository == 'sonos/tract'
      continue-on-error: true
      uses: aws-actions/configure-aws-credentials@v6
      with:
        role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci
        aws-region: us-east-2

    - name: example tests
      env:
        AWS_EC2_METADATA_DISABLED: true
      timeout-minutes: 30
      run: |
        cd examples/${{matrix.ex}}
        ./ci.sh

  build-tract-cli:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - run: cargo build -p tract-cli --profile opt-no-lto
      - uses: actions/upload-artifact@v7
        with:
          name: tract-cli-x86_64
          path: ./target/opt-no-lto/tract

  build-tract-cli-macos:
    runs-on: macOS
    steps:
      - uses: actions/checkout@v6
      - run: cargo build -p tract-cli --profile opt-no-lto
      - uses: actions/upload-artifact@v7
        with:
          name: tract-cli-aarch64-apple
          path: ./target/opt-no-lto/tract

  gpu-examples:
    runs-on: ubuntu-latest
    outputs:
      examples: ${{steps.set-matrix.outputs.examples}}

    steps:
      - uses: actions/checkout@v6
      - id: set-matrix
        run: |
          echo examples=`find examples -name ci-gpu.sh | cut -d/ -f 2 | jq -Rsc '. / "\n" - [""]'` >> "$GITHUB_OUTPUT"

  gpu-example:
    name: ${{ matrix.ex }} (CUDA)
    runs-on: cuda-lovelace
    needs: [gpu-examples, build-tract-cli]
    strategy:
      fail-fast: false
      matrix:
        ex: ${{fromJSON(needs.gpu-examples.outputs.examples)}}

    steps:
    - uses: actions/checkout@v6

    - uses: actions/download-artifact@v8
      with:
        name: tract-cli-x86_64
        path: target/opt-no-lto

    - run: chmod +x target/opt-no-lto/tract

    - name: GPU example tests
      timeout-minutes: 60
      run: |
        cd examples/${{matrix.ex}}
        ./ci-gpu.sh

  gpu-example-metal:
    name: ${{ matrix.ex }} (Metal)
    runs-on: macOS
    needs: [gpu-examples, build-tract-cli-macos]
    strategy:
      fail-fast: false
      matrix:
        ex: ${{fromJSON(needs.gpu-examples.outputs.examples)}}

    steps:
    - uses: actions/checkout@v6

    - uses: actions/download-artifact@v8
      with:
        name: tract-cli-aarch64-apple
        path: target/opt-no-lto

    - run: chmod +x target/opt-no-lto/tract

    - name: Metal GPU example tests
      timeout-minutes: 60
      run: |
        cd examples/${{matrix.ex}}
        ./ci-gpu.sh


================================================
FILE: .github/workflows/full.yml
================================================
name: Full test harness

on:
  schedule:
    - cron:  '0 3 * * *'
  workflow_dispatch:
    inputs:
      pr_number:
        description: "Optional PR number to test (from fork ok). Leave empty to run on selected branch."
        required: false
        type: number

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  prepare:
    runs-on: ubuntu-latest
    outputs:
      test_ref: ${{ steps.set.outputs.test_ref }}
    steps:
      - id: set
        uses: actions/github-script@v8
        with:
          script: |
            const prInput = context.payload.inputs?.pr_number;
            core.info(`Fetching PR ${prInput}`);
            if (!prInput) {
              // Use the ref the workflow was triggered on (branch/tag/SHA in base repo)
              core.setOutput('test_ref', process.env.GITHUB_SHA);
              return;
            }
            const pr = await github.rest.pulls.get({
              owner: context.repo.owner,
              repo: context.repo.repo,
              pull_number: Number(prInput),
            });
            core.info(pr.data.head.sha);
            core.setOutput('test_ref', pr.data.head.sha);

  old-harness:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
    permissions:
      id-token: write
      contents: read
    needs: prepare
    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0

    - name: Configure AWS Credentials
      continue-on-error: true
      uses: aws-actions/configure-aws-credentials@v6
      with:
        role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci
        aws-region: us-east-2

    - name: Full test
      run: .travis/test-harness.sh

  cli-tests:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: Full test 
      env:
        AWS_EC2_METADATA_DISABLED: true
      run: .travis/cli-tests.sh

  onnx-tests:
    runs-on: ubuntu-latest
    needs: prepare
    strategy:
      matrix:
        opset: [1_4_1, 1_5_0, 1_6_0, 1_7_0, 1_8_1, 1_9_0, 1_10_2, 1_11_0, 1_12_0, 1_13_0, 1_14_1, 1_15_0, 1_16_2, 1_17_0, 1_18_0, 1_19_1]

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: Full test
      run: .travis/onnx-tests.sh ${{ matrix.opset }}

  tflite:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: Full test
      run: .travis/tflite.sh

  some-tests-with-paranoid-asserts:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0

    - name: With assertions
      run: |
        rustup update
        cargo test --features tract-core/paranoid_assertions -p test-onnx-core -p test-unit-core

  without-default-features:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: Without default features
      run: |
        rustup update
        cargo check -p tract-cli --no-default-features $CARGO_EXTRA

  complexes:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: With complexes
      run: |
        rustup update
        cargo check -p tract-nnef --features complex $CARGO_EXTRA

  check-all-targets:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: Check all targets
      run: |
        ROOT=$(pwd) ./.travis/ci-system-setup.sh
        cargo check --all-targets --workspace --exclude test-metal --exclude tract-metal

  C:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: C smoke tests
      run: |
        cd api/c
        cargo install cbindgen
        make

  python:
    runs-on: ubuntu-latest
    needs: prepare

    steps:
    - uses: actions/checkout@v6
      with:
        ref: ${{ needs.prepare.outputs.test_ref }}
        fetch-depth: 0
    - name: Setup Python
      uses: actions/setup-python@v6
      with:
        python-version: "3.13"

    - name: Install uv
      uses: astral-sh/setup-uv@v7

    - name: Pytest bindings
      timeout-minutes: 60
      run: |
        cd api/py
        uv venv --python 3.13
        source .venv/bin/activate
        uv pip install -e ".[dev]"
        pytest .


================================================
FILE: .github/workflows/large_models.yml
================================================
name: Large models

on:
  pull_request:
  schedule:
    - cron:  '0 3 * * *'
  workflow_dispatch:

env:
  LARGE_MODELS: true
  
jobs:
  cli: 
    name: Build tract on ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ macos-latest, ubuntu-latest ]
    steps:
      - uses: actions/checkout@v6
      - run: |
          ROOT=. ./.travis/ci-system-setup.sh
          cargo build -p tract-cli --profile opt-no-lto --no-default-features --features transformers
      - run: echo uname=$(uname) >> $GITHUB_ENV
      - uses: actions/upload-artifact@v7
        with:
          name: tract-cli-${{env.uname}}
          path: ./target/opt-no-lto/tract

  foundation-llms:
    runs-on: ubuntu-latest
    outputs:
      models: ${{steps.set-matrix.outputs.models}}
      q: ${{steps.set-matrix.outputs.q}}

    steps:
      - id: set-matrix
        env:
          FULL: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }}
        run: |
          if [ "$FULL" = "true" ]
          then
            echo 'models=[ "openelm-270M", "llama-3.2-1B-instruct", "llama-3.2-3B-instruct", "llama-3.1-8B-instruct", "qwen3-1.7B", "qwen3-8B" ]' >> $GITHUB_OUTPUT
            echo 'q=[ "f16f16", "f32f32", "q40ef16" ]' >> $GITHUB_OUTPUT
          else
            echo ::notice::Skipping most checks on PR and commit. Dispatch workflow manually if needed.
            echo 'models=[ "llama-3.2-1B-instruct" ]' >> $GITHUB_OUTPUT
            echo 'q=[ "f32f32", "q40ef16" ]' >> $GITHUB_OUTPUT
          fi

  foundation-llm:
    name: ${{ matrix.os }} / ${{matrix.rt}} / ${{ matrix.model }} / ${{ matrix.q }}
    needs: [ cli, foundation-llms ]
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ macOS, cuda-lovelace ]
        model: ${{fromJson(needs.foundation-llms.outputs.models)}}
        q: ${{fromJson(needs.foundation-llms.outputs.q)}}
        rt: [ cpu, gpu ]
        exclude:
          - model: openelm-270M
            q: f32f32
          - model: Llama-3.2-3B-Instruct
            q: f32f32
          - model: Llama-3.2-3B-Instruct
            q: f32f32
          - model: Llama-3.1-8B-Instruct
            q: f32f32
          - model: Qwen3-1.7B
            q: f32f32
          - model: Qwen3-8B
            q: f32f32
          - model: OpenELM-270M
            q: f32f32
      fail-fast: false
    permissions:
      id-token: write
      contents: read

    steps:
    - uses: actions/checkout@v6
    - name: Configure AWS Credentials
      continue-on-error: true
      uses: aws-actions/configure-aws-credentials@v6
      with:
        role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci
        aws-region: us-east-2
    - run: echo uname=$(uname) >> $GITHUB_ENV
    - uses: actions/download-artifact@v8
      with:
        name: tract-cli-${{env.uname}}
        path: tract-cli-${{env.uname}}

    - name: Download and run
      run: |
        chmod +x tract-cli-${{env.uname}}/tract
        export TRACT_RUN=$GITHUB_WORKSPACE/tract-cli-${{env.uname}}/tract
        if [ "${{matrix.rt}}" = "gpu" ]
        then
          case $(uname) in
            Darwin) RT=metal;;
            Linux) RT=cuda;;
          esac
        fi
        .travis/test-llm.sh ${{matrix.model}} ${{matrix.q}} $RT

  parakeet-tdt-600m-v3:
    name: ${{matrix.os}} / Parakeet TDT 600m v3
    needs: [ cli ]
    strategy:
      matrix:
        os: [ macOS, cuda-lovelace ]
      fail-fast: false
    permissions:
      id-token: write
      contents: read
    runs-on: ${{ matrix.os }}
    steps:
    - uses: actions/checkout@v6
    - run: echo uname=$(uname) >> $GITHUB_ENV
    - uses: actions/download-artifact@v8
      with:
        name: tract-cli-${{env.uname}}
        path: tract-cli-${{env.uname}}

    - name: Download and run
      run: |
        chmod +x tract-cli-${{env.uname}}/tract
        export TRACT_RUN=$GITHUB_WORKSPACE/tract-cli-${{env.uname}}/tract
        ./harness/parakeet-tdt-600m-v3/ci.sh


================================================
FILE: .github/workflows/pydoc.yml
================================================
name: Python gh-pages doc

on:
  pull_request:
  release:
  workflow_dispatch:

env:
  CARGO_INCREMENTAL: false

jobs:
  build_doc:
    name: Build doc
    runs-on: ubuntu-latest
    if: github.repository == 'sonos/tract'

    steps:
      - uses: actions/checkout@v6

      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.12"

      - name: Extract version tag
        id: version
        if: github.event_name == 'release' && github.event.action == 'published'
        run: echo value=$(echo ${{ github.ref }} | cut -f 3 -d / | sed 's/^v//' ) >> $GITHUB_OUTPUT

      - name: Build doc
        run: |
          set -ex
          cd api/py
          python -m venv pydocs
          source pydocs/bin/activate
          pip install -r requirements-docs.txt
          pip install -e .
          sphinx-build -b html . _build/html
          cp _static/redirect-index.html _build/html/index.html

      - name: Deploy to gh-pages
        if: github.event_name != 'pull_request'
        run: |
          set -ex
          git config user.name "CI bot"
          git config user.email ci-bot@tract.rs

          version="${{ steps.version.outputs.value }}"
          if [ -z "$version" ]; then
            version="dev"
          fi

          # fetch existing gh-pages into a work directory
          git fetch origin gh-pages --depth=1 || true
          workdir=$(mktemp -d)
          git worktree add "$workdir" gh-pages 2>/dev/null || {
            git worktree add --orphan "$workdir" gh-pages
          }

          # copy new build into the versioned subdirectory
          rm -rf "$workdir/$version"
          cp -r api/py/_build/html "$workdir/$version"

          # regenerate versions.json (mike-compatible format) from directories present
          cd "$workdir"
          python3 -c "
          import json, os, re
          dirs = sorted(
              [d for d in os.listdir('.') if os.path.isdir(d) and d != '.git'],
              key=lambda v: [int(x) if x.isdigit() else x for x in re.split(r'(\d+)', v)],
              reverse=True,
          )
          versions = [{'version': d, 'title': d, 'aliases': []} for d in dirs]
          with open('versions.json', 'w') as f:
              json.dump(versions, f, indent=2)
          "

          # commit and push
          git add -A
          git commit -m "Update Python docs ($version)" || true
          git push origin gh-pages

          # clean up worktree
          cd -
          git worktree remove "$workdir"


================================================
FILE: .github/workflows/release.yml
================================================
on:
  push:
    tags:
    - 'v*'

name: Create release

env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  release:
    name: Create release
    runs-on: ubuntu-latest
    steps:
      - name: Extract version tag
        id: version
        run: echo value=$(echo ${{ github.ref }} | cut -f 3 -d / | sed 's/^v//' ) >> $GITHUB_OUTPUT

      - uses: actions/checkout@v6

      - name: Create Release
        uses: softprops/action-gh-release@v2
        with:
          name: tract ${{ steps.version.outputs.value }}
        env:
          GITHUB_TOKEN: ${{ secrets.ACCESS_TOKEN_RELEASE }}


================================================
FILE: .github/workflows/tract-ci-bench.yml
================================================
name: Bench with tract-ci-minion

on:
  schedule:
    - cron:  '1 * * * *' # every hour at minute 1
  workflow_dispatch:

jobs:
  minion:
    strategy:
      fail-fast: false
      matrix:
        os:
          - macOS
          - cuda-lovelace

    runs-on: ${{ matrix.os }}

    steps:
    - name: Run minion if found
      run: |
        if [ -d $HOME/tract-minion ]
        then
          echo "Running minion"
          cd $HOME/tract-minion
          ./tract-ci-minion --once
        else
          echo "Not running minion"
        fi


================================================
FILE: .github/workflows/wheels.yml
================================================
name: Python wheels

on:
  schedule:
    - cron:  '0 3 * * MON'
  release:
    types: [created]
  workflow_dispatch:
    inputs:
      publish:
        description: force publish to pypi
        type: boolean
      pypi_version_override:
        description: override version id detection
        type: string


env:
  CARGO_INCREMENTAL: false
  PYPI_VERSION_OVERRIDE: ${{ inputs.pypi_version_override }}
  CIBW_ENVIRONMENT_PASS_LINUX: "PYPI_VERSION_OVERRIDE"
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true
  MACOSX_DEPLOYMENT_TARGET: 10.13

jobs:
  build_wheels:
    name: Build wheels on ${{ matrix.os }}
    runs-on: ${{ matrix.os}}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04, windows-2022, macos-14]

    steps:
      - uses: actions/checkout@v6
        
      - name: Setup | Rust
        uses: dtolnay/rust-toolchain@stable

      - uses: actions/setup-python@v6
        with:
          python-version: "3.13"

      - name: Install uv
        uses: astral-sh/setup-uv@v7

      - name: Install rust toolchains
        if: startsWith(matrix.os, 'macOS')
        run: rustup target install x86_64-apple-darwin aarch64-apple-darwin

      - name: Build wheels
        uses: nick-fields/retry@v4
        with:
          max_attempts: 1
          timeout_seconds: 54000 # 15 hours :/
          command: uvx cibuildwheel --output-dir wheelhouse api/py

      - uses: actions/upload-artifact@v7
        with:
          name: wheels-${{github.run_id}}-${{matrix.os}}
          path: ./wheelhouse/*.whl

  make_sdist:
    name: Make SDist
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: Install uv
      uses: astral-sh/setup-uv@v7

    - name: Build SDist
      run: cd api/py && uv build --sdist

    - uses: actions/upload-artifact@v7
      with:
        name: wheels-${{github.run_id}}-src
        path: api/py/dist/*.tar.gz

  upload_all:
    needs: [build_wheels, make_sdist]
    runs-on: ubuntu-latest
    if: (github.event_name == 'release' && github.event.action == 'published') || inputs.publish

    steps:
    - uses: actions/download-artifact@v8
      with:
        pattern: wheels-${{github.run_id}}-*
        merge-multiple: true
        path: dist

    - uses: pypa/gh-action-pypi-publish@v1.13.0
      with:
        user: __token__
        password: ${{ secrets.PYPI }}
        verbose: true


================================================
FILE: .github/workflows/windows.yml
================================================
name: Windows unit tests

on:
#  pull_request:
  workflow_dispatch:
  schedule:
    - cron:  '0 3 * * *'


env:
  CARGO_INCREMENTAL: false
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true

jobs:
  windows:
    strategy:
      matrix:
        os: [ windows-2022 ]
        toolchain: [ gnu, msvc ]
      fail-fast: false

    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v6
    - uses: nick-fields/retry@v4
      name: Install Rustup using win.rustup.rs
      with:
        timeout_minutes: 10
        max_attempts: 8
        shell: pwsh
        command: |
            # Disable the download progress bar which can cause perf issues
            $ProgressPreference = "SilentlyContinue"
            Invoke-WebRequest https://win.rustup.rs/ -OutFile rustup-init.exe
            .\rustup-init.exe -y --default-host=x86_64-pc-windows-msvc --profile=minimal
    - uses: nick-fields/retry@v4
      name: Install the target
      with:
        timeout_minutes: 10
        max_attempts: 8
        shell: pwsh
        command: |
            rustup toolchain add stable-x86_64-pc-windows-${{matrix.toolchain}}
            rustup default stable-x86_64-pc-windows-${{matrix.toolchain}}
    - name: Install LLVM and Clang
      uses: KyleMayes/install-llvm-action@v2
      with:
        version: "11.0"
    - name: debug
      run: dir "C:\\Program Files\\LLVM"
    - name: debug lib
      run: dir "C:\\Program Files\\LLVM\\lib"
    - name: debug bin
      run: dir "C:\\Program Files\\LLVM\\bin"
    - name: top level cargo check
      run: cargo check --workspace --exclude test-blas --exclude tract-metal --exclude test-metal --exclude causal_llm
      env:
        LIBCLANG_PATH: "C:\\Program Files\\LLVM\\bin"
    - name: data / linalg / core / nnef / onnx / onnx-opl
      run: cargo test -p tract-data -p tract-linalg -p tract-core -p tract-nnef -p tract-onnx -p tract-onnx-opl
    - name: Onnx test suite
      run: |
          cargo test --release -p test-onnx-core -p test-unit-core
      env:
        TRACT_LOG: info


================================================
FILE: .gitignore
================================================
target
**/*.rs.bk
*.rustfmt
*.back
Cargo.lock
examples/data
.idea
.cached/**
flamegraph.svg
perf.data*
readings.*
metrics
tract.out
.gdb_history
/issue-*
/.dinghy.toml
.cargo
proptest-regressions
/tmp
wheelhouse
target-bisector*
/nvidia


================================================
FILE: .travis/README.md
================================================
# Travis & minions test infrastructure

## Principles

* travis is triggered on each commit, it will run `./.travis/native.sh` to
    perform x86_64 builds, plus a series of `./.travis/cross.sh` for as many
    arm boards configurations.
* `.travis/cross.sh` pushes a `.tgz` to a s3 bucket for each configuration. The
    bundle contains a `entrypoint.sh` script and anything it depends on,
    including the relevant `tract` cli executable. The script is actually names
    `bundle-entrypoint.sh` in the repository.
* devices are running `minion.sh` and will pick the new bundles from the s3 bucket,
    untar and run the `entrypoint.sh`

## Testing locally

```
cargo build --release -p tract-cli && cargo bench -p tract-linalg --no-run && .travis/run-bundle.sh `.travis/make_bundle.sh`
```

## minion setup

```
MINION=user@hostname.local
scp .travis/minionrc $MINION:.minionrc
scp .travis/minion.sh $MINION:
```

also setup aws credentials (.aws/credentials)

```
apt install wget curl perl awscli screen vim netcat
```

On device: `.minioncrc` set a MINION_ID. At this point, running `./minion.sh`
should work.

## crontab

`crontab -e`

```
*/10 * * * * $HOME/minion.sh
```

## systemd timers

in /etc/systemd/system/minion.service

```
[Unit]
Description=Travis ci bench minion

[Service]
User=root
Type=oneshot
ExecStart=/home/root/minion.sh
```

in /etc/systemd/system/minion.timer

```
[Unit]
Description=Run minion.service every 5 minutes

[Timer]
OnCalendar=*:0/5

[Install]
WantedBy=timers.target

```

then

```
systemctl enable minion.timer
systemctl start minion.timer
```

# Setup file server (http only)


```
sudo apt install nginx awscli vim
```

* setup aws credentials (.aws/credentials)
* in $HOME/sync-data.sh:

```

```
* chmod +x $HOME/sync-data.sh
* run it: ./sync-data.sh
* `crontab -e`

```
*/5 * * * * $HOME/sync-data.sh
```


* `sudo vi /etc/nginx/sites-available/models`

```
server {
    root /home/raspbian/models/;

    location /models {
    }
}
```

* `sudo ln -s /etc/nginx/sites-available/models /etc/nginx/sites-enabled/`
* `sudo rm /etc/nginx/sites-enabled/default`
* `sudo /etc/init.d/nginx reload`
* test : `curl -I http://localhost/hey_snips_v1.pb`


================================================
FILE: .travis/android-ndk.sh
================================================
#!/bin/sh

set -ex

which java || sudo apt install -y default-jdk

ANDROID_SDK=$HOME/cached/android-sdk
if [ ! -d "$ANDROID_SDK" ]
then
    mkdir -p $ANDROID_SDK
    cd $ANDROID_SDK

      # ANDROID_SDK_VERSION=4333796
      # "https://dl.google.com/android/repository/sdk-tools-linux-${ANDROID_SDK_VERSION}.zip"

    curl -s -o android-sdk.zip \
       https://dl.google.com/android/repository/commandlinetools-linux-8092744_latest.zip
    unzip -q android-sdk.zip
    rm android-sdk.zip
fi

yes | $ANDROID_SDK/cmdline-tools/bin/sdkmanager --sdk_root=$ANDROID_SDK --licenses > /dev/null

$ANDROID_SDK/cmdline-tools/bin/sdkmanager --sdk_root=$ANDROID_SDK \
    "build-tools;30.0.0" "platform-tools" "platforms;android-31" "tools" "ndk-bundle" \
    > /dev/null


================================================
FILE: .travis/asan.sh
================================================
#!/bin/sh

set -ex

# RUSTFLAGS=-Zsanitizer=address cargo +nightly test -Zbuild-std --target $(rustc -vV | sed -n 's|host: ||p')

TARGET=$(rustc -vV | sed -n 's|host: ||p')

rustup toolchain add nightly
rustup component add rust-src --toolchain nightly-$TARGET

export RUSTFLAGS=-Zsanitizer=address 
export RUSTDOCFLAGS=$RUSTFLAGS
export RUSTUP_TOOLCHAIN=nightly
export RUST_VERSION=nightly
export CARGO_EXTRA="--target $TARGET"

cargo -q test -q -p tract-linalg $CARGO_EXTRA

# inventory, asan and macos liner are not playing nice, so we have to stop there 
if [ $(uname) == "Darwin" ]
then
    exit 0
fi

cargo -q test -q -p tract-core --features paranoid_assertions $CARGO_EXTRA

./.travis/regular-tests.sh
if [ -n "$CI" ]
then
    cargo clean
fi
./.travis/onnx-tests.sh
if [ -n "$CI" ]
then
    cargo clean
fi
./.travis/cli-tests.sh


================================================
FILE: .travis/bundle-entrypoint.sh
================================================
#!/bin/sh

set -ex

start=$(date +%s)

ROOT=`pwd`

if [ -n "$TRACT_RUN" ]
then
    TRACT=$TRACT_RUN
elif [ -x tract ]
then
    TRACT="./tract"
else
    cargo build -p tract-cli -q --release
    TRACT="./target/release/tract"
fi

CACHEDIR=${CACHEDIR:-$HOME/.cache/tract-ci-minion-models}
case $CACHEDIR in
    "http"*)
        wget $CACHEDIR/private/private-benches.sh
        PRIVATE=`pwd`/private-benches.sh
    ;;
    *)
        [ -d $CACHEDIR ] || mkdir $CACHEDIR
        PATH=$PATH:/usr/local/bin # for aws command on darwin
        aws s3 sync s3://tract-ci-builds/model $CACHEDIR || echo "Warning: aws s3 sync failed, continuing with cached models"
        (cd $CACHEDIR
            [ -d en_libri_real ] || tar zxf en_libri_real.tar.gz
            [ -d en_tdnn_lstm_bn_q7 ] || tar zxf en_tdnn_lstm_bn_q7.tar.gz
        )
        PRIVATE=$CACHEDIR/private/private-benches.sh
    ;;
esac


touch metrics
if [ -e sizes ]
then
    cat sizes >> metrics
fi

if [ $(uname) = "Linux" ]
then
    if [ -r /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor -a `cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor` = "userspace" ]
    then
            F=$(printf "%s\n" `cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies` | sort -n | tail -1)
            echo $F > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed
    fi
fi

net_bench() {
    net=$1
    pb=$2
    shift 2

    $TRACT "$@" --machine-friendly -O bench --allow-random-input $BENCH_OPTS > tract.out
    v=`cat tract.out | grep -a real | cut -f 2 -d ' ' | sed 's/\([0-9]\{9,9\}\)[0-9]*/\1/'`
    echo net.$net.evaltime.$pb $v >> metrics

    $TRACT "$@" --readings --readings-heartbeat 1000 --machine-friendly -O bench --allow-random-input $BENCH_OPTS > tract.out

    for stage in model_ready before_optimize
    do
        pattern=$(echo $stage | sed 's/[_-]/./g')
        v=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 1 -d ' ')
        echo net.$net.time_to_$stage.$pb $v >> metrics
        v=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 4 -d ' ')
        echo net.$net.rsz_at_$stage.$pb $v >> metrics
        f=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 11 -d ' ')
        a=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 10 -d ' ')
        echo net.$net.active_at_$stage.$pb $(($a-$f)) >> metrics
    done
}

llm_bench() {
    net=$1
    pb=$2
    shift 2

    if  $TRACT "$@"  --llm --machine-friendly -O llm-bench $BENCH_OPTS > tract.out
    then
        cat tract.out
        echo llm.$net.pp512.$pb $(cat tract.out | grep -a PP512 | cut -f 2 -d ' ') >> metrics
        echo llm.$net.tg128.$pb $(cat tract.out | grep -a TG128 | cut -f 2 -d ' ') >> metrics
    fi 

    if $TRACT "$@" --readings --readings-heartbeat 1000 --llm --machine-friendly -O llm-bench $BENCH_OPTS > /dev/null
    then
        for stage in model_ready before_optimize
        do
            pattern=$(echo $stage | sed 's/[_-]/./g')
            v=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 1 -d ' ')
            echo llm.$net.time_to_$stage.$pb $v >> metrics
            v=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 4 -d ' ')
            echo llm.$net.rsz_at_$stage.$pb $v >> metrics
            f=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 11 -d ' ')
            a=$(grep -a $pattern readings.out | sed 's/  */ /g;s/^  *//' | cut -f 10 -d ' ')
            if [ -n "$a" -a -n "$f" ]
            then
                 echo llm.$net.active_at_$stage.$pb $(($a-$f)) >> metrics
            fi
        done
    fi
}

net_bench arm_ml_kws_cnn_m pass $CACHEDIR/ARM-ML-KWS-CNN-M.pb -i 49,10,f32 --partial --input-node Mfcc

net_bench hey_snips_v1 400ms $CACHEDIR/hey_snips_v1.pb -i 80,40,f32
net_bench hey_snips_v31 400ms $CACHEDIR/hey_snips_v3.1.pb -i 40,40,f32

net_bench hey_snips_v4_model17 2sec $CACHEDIR/hey_snips_v4_model17.pb -i 200,20,f32
net_bench hey_snips_v4_model17 pulse8 $CACHEDIR/hey_snips_v4_model17.pb -i S,20,f32 --pulse 8
net_bench hey_snips_v4_model17_nnef pulse8 --nnef-tract-pulse $CACHEDIR/hey_snips_v4_model17.alpha1.tar

net_bench mobilenet_v1_1 pass $CACHEDIR/mobilenet_v1_1.0_224_frozen.pb -i 1,224,224,3,f32
net_bench mobilenet_v2_1 pass $CACHEDIR/mobilenet_v2_1.4_224_frozen.pb -i 1,224,224,3,f32

net_bench inceptionv1q pass $CACHEDIR/inceptionv1_quant.nnef.tar.gz --nnef-tract-core
net_bench inceptionv3 pass $CACHEDIR/inception_v3_2016_08_28_frozen.pb -i 1,299,299,3,f32


net_bench mdl-en-2019-Q3-librispeech_onnx 2600ms $CACHEDIR/en_libri_real/model.onnx --output-node output -i 264,40
net_bench mdl-en-2019-Q3-librispeech_onnx pulse_240ms $CACHEDIR/en_libri_real/model.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_lstm_bn_q7 2600ms $CACHEDIR/en_tdnn_lstm_bn_q7/model.onnx --output-node output -i 264,40
net_bench en_tdnn_lstm_bn_q7 pulse_240ms $CACHEDIR/en_tdnn_lstm_bn_q7/model.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_8M 2600ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i 264,40
net_bench en_tdnn_8M pulse_240ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_8M pulse_180ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 18
net_bench en_tdnn_8M pulse_120ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 12
net_bench en_tdnn_8M_nnef pulse_240ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.alpha1.a.tar --nnef-tract-pulse
net_bench en_tdnn_15M 2600ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i 264,40
net_bench en_tdnn_15M pulse_240ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i S,40 --pulse 24
net_bench en_tdnn_15M pulse_120ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i S,40 --pulse 12
net_bench en_tdnn_15M_nnef pulse_240ms $CACHEDIR/en_tdnn_15M.alpha1.tar --nnef-tract-pulse
net_bench dummy-conmer-12M pulse_120ms $CACHEDIR/dummy-conmer-12M.nnef.tar --nnef-tract-core --pulse 12

net_bench en_tdnn_pyt_15M pulse_120ms $CACHEDIR/mdl-en-2023-03-27-allen-17h11m50s.nnef.tar --nnef-tract-core --pulse 12

net_bench speaker_id pulse8 $CACHEDIR/speaker-id-2019-03.onnx -i 1,S,40,f32 --output-node 257 --partial --pulse 8

net_bench voicecom_fake_quant 2sec $CACHEDIR/snips-voice-commands-cnn-fake-quant.pb -i 200,10,f32
net_bench voicecom_float 2sec $CACHEDIR/snips-voice-commands-cnn-float.pb -i 200,10,f32

net_bench trunet pulse1_f32 $CACHEDIR/trunet_dummy.nnef.tgz --nnef-tract-core --pulse 1
net_bench trunet pulse1_f16 $CACHEDIR/trunet_dummy.nnef.tgz --nnef-tract-core -t f32_to_f16 --pulse 1

. $PRIVATE

if [ $(uname) = "Darwin" ]
then
    LLM_BACKENDS="cpu metal"
fi

if which nvidia-smi 
then 
    LLM_BACKENDS="cpu cuda"
fi

if [ -n "$LLM_BACKENDS" ]
then
    for backend in $LLM_BACKENDS
    do
        case $backend in
            cpu) extra="--timeout 180";;
            metal) extra="--metal --timeout 60"
                   BENCH_OPTS="--warmup-loops 1"
                   ;;
            cuda) extra="--cuda --timeout 60"
                  BENCH_OPTS="--warmup-loops 1"
                  ;;
        esac
        llm_bench llama-3_2-1B-q40ef32-516 $backend $CACHEDIR/Llama-3.2-1B-q40ef32.516.nnef.tgz $extra
        llm_bench openelm-270M-q40ef16-516 $backend $CACHEDIR/OpenELM-270M-q40ef16.516.nnef.tgz $extra
        llm_bench llama-3_2-1B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.2-1B-Instruct-q40ef16.541.nnef.tgz $extra
        llm_bench openelm-270M-q40ef16-541 $backend $CACHEDIR/OpenELM-270M-q40ef16.541.nnef.tgz $extra
        net_bench parakeet-tdt-600m-v3-f32f32-preprocessor_1s $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.preprocessor.nnef.tgz \
                        -t transformers_detect_all --nnef-tract-transformers --set B=1 --set A=16000 $extra
        net_bench parakeet-tdt-600m-v3-f32f32-encoder_1s $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.encoder.p1.nnef.tgz \
                        -t transformers_detect_all --nnef-tract-transformers --set B=1 --set S=100 $extra
        net_bench parakeet-tdt-600m-v3-f32f32-decoder_pass $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.decoder.nnef.tgz \
                        -t transformers_detect_all --nnef-tract-transformers --set B=1 --set T=1 $extra
        net_bench parakeet-tdt-600m-v3-f32f32-joint_pass $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.joint.nnef.tgz \
                        -t transformers_detect_all --nnef-tract-transformers --set B=1 --set R=1 --set U=1 $extra

        if [ "$backend" != "cpu" ]
        then
            llm_bench llama-3_2-3B-q40ef32-516 $backend $CACHEDIR/Llama-3.2-3B-q40ef32.516.nnef.tgz $extra
            llm_bench llama-3_1-8B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.1-8B-Instruct-q40ef16.541.nnef.tgz $extra
            llm_bench llama-3_2-3B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.2-3B-Instruct-q40ef16.541.nnef.tgz $extra
            llm_bench qwen3-1_7B-q40ef16-541 $backend $CACHEDIR/Qwen3-1.7B-q40ef16.541.nnef.tgz $extra
        fi
    done
fi

end=$(date +%s)

echo bundle.bench-runtime  $(($end - $start)) >> metrics


================================================
FILE: .travis/cache_file.sh
================================================
#!/bin/sh

set -e

if [ -z "$CACHEDIR" ]
then
    CACHEDIR=`dirname $0`/../.cached
fi

mkdir -p $CACHEDIR
cd $CACHEDIR
for file in $@
do
    mkdir -p $(dirname $file)
    if [ ! -e $file ]
    then
        wget --no-verbose https://s3.amazonaws.com/tract-ci-builds/tests/$file -O $file.tmp \
        || aws s3 cp s3://tract-ci-builds/tests/$file $file.tmp
        mv $file.tmp $file
    fi
done

exit 0


================================================
FILE: .travis/cargo-deny-check.sh
================================================
#!/bin/sh

if [ -e cargo-deny ]
then
    CARGO_DENY=`pwd`/cargo-deny
else
    CARGO_DENY="cargo deny"
fi

(cd api/rs ; $CARGO_DENY check)


================================================
FILE: .travis/ci-system-setup.sh
================================================
#!/bin/sh
set -e

[ -d $ROOT/.travis ] || exit 1 "\$ROOT not set correctly '$ROOT'"

if [ -z "$RUSTUP_TOOLCHAIN" ]
then
    export RUSTUP_TOOLCHAIN=1.91.0
fi

export RUSTUP_TOOLCHAIN
PATH=$PATH:$HOME/.cargo/bin

if [ -n "$CI" -a ! -e /tmp/ci-setup-done ]
then
    if [ `uname` = "Darwin" ]
    then
        sysctl -n machdep.cpu.brand_string
        python3 --version
        brew install coreutils numpy python-setuptools jshon
        PATH="/opt/homebrew/opt/coreutils/libexec/gnubin:$PATH"
        export PYTHON_BIN_PATH=python3
    else
        if [ "$RUNNER_ENVIRONMENT" != "self-hosted" ]
        then
            if [ `whoami` != "root" ]
            then
                SUDO=sudo
            fi
            $SUDO apt-get update
            # $SUDO apt-get upgrade -y
            $SUDO apt-get install -y llvm python3 python3-numpy jshon wget curl build-essential sudo jshon clang 
            if ! which aws
            then
                curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o awscliv2.zip
                $SUDO apt-get install -y unzip
                unzip -q awscliv2.zip
                $SUDO ./aws/install
                aws --version
            fi
        fi
    fi

    which rustup || curl https://sh.rustup.rs -sSf | sh -s -- -y
    rustup update
    rustup toolchain add $RUSTUP_TOOLCHAIN
    [ -n "$GITHUB_PATH" ] && echo $HOME/.cargo/bin >> $GITHUB_PATH

    touch /tmp/ci-setup-done
fi

S3=https://s3.amazonaws.com/tract-ci-builds/tests

if  [ -n "$LARGE_MODELS" ]
then
    export CACHE_FILE=$ROOT/.travis/cache_file.sh
    export MODELS=$HOME/.cache/models
    export CACHEDIR=$MODELS
    mkdir -p $MODELS
elif [ -n "$CI" ]
then
    MODELS=$S3
    CACHE_FILE=true
else 
    CACHE_FILE=$ROOT/.travis/cache_file.sh
    MODELS=${MODELS:-$ROOT/.cached}
    mkdir -p $MODELS
fi

if [ -z "$TRACT_RUN" ]
then
    TRACT_RUN="cargo run -p tract-cli $CARGO_EXTRA --profile opt-no-lto --no-default-features --features transformers,pulse --"
    export TRACT_RUN
fi

TRACT_RUNTIMES="-O"
if [ "$(uname)" = "Darwin" ] && (system_profiler SPDisplaysDataType | grep -i "Metal")
then 
    TRACT_RUNTIMES="$TRACT_RUNTIMES --metal"
fi

if which nvidia-smi
then
    TRACT_RUNTIMES="$TRACT_RUNTIMES --cuda"
fi

echo $TRACT_RUNTIMES


================================================
FILE: .travis/cli-tests.sh
================================================
#!/bin/sh

WHITE='\033[1;37m'
NC='\033[0m' # No Color

set -e

ROOT=$(dirname $(dirname $(realpath $0)))
. $ROOT/.travis/ci-system-setup.sh

echo
echo $WHITE • build tract $NC
echo

TRACT_RUN=$(cargo build --message-format json -p tract-cli $CARGO_EXTRA --profile opt-no-lto | jq -r 'select(.target.name == "tract" and .executable).executable')
echo TRACT_RUN=$TRACT_RUN
export TRACT_RUN

echo
echo $WHITE • harness/nnef-test-cases $NC
echo

for t in `find harness/nnef-test-cases -name runme.sh`
do
    echo $WHITE$t$NC
    $t
done

echo
echo $WHITE • onnx/test_cases $NC
echo

# ( cd onnx/test_cases ; CACHEDIR=$MODELS ./run_all.sh )

echo
echo $WHITE • full models command line test cases $NC
echo

echo $WHITE     image $NC

$CACHE_FILE squeezenet.onnx
$TRACT_RUN $MODELS/squeezenet.onnx -O \
    run -q \
    --allow-random-input \
    --assert-output-fact 1,1000,1,1,f32

$CACHE_FILE inception_v3_2016_08_28_frozen.pb
$TRACT_RUN \
    $MODELS/inception_v3_2016_08_28_frozen.pb \
    -i 1,299,299,3,f32 -O \
    run -q \
    --allow-random-input \
    --assert-output-fact 1,1001,f32

$TRACT_RUN \
    $MODELS/inception_v3_2016_08_28_frozen.pb \
    -i 1,299,299,3,f32 -O \
    run -q \
    --allow-random-input \
    --assert-output-fact 1,1001,f32

$CACHE_FILE mobilenet_v1_1.0_224_frozen.pb
$TRACT_RUN $MODELS/mobilenet_v1_1.0_224_frozen.pb \
    -O -i 1,224,224,3,f32 \
    run -q \
    --allow-random-input \
    --assert-output-fact 1,1001,f32

$CACHE_FILE mobilenet_v2_1.4_224_frozen.pb
$TRACT_RUN $MODELS/mobilenet_v2_1.4_224_frozen.pb \
    -O -i 1,224,224,3,f32 \
    run -q \
    --allow-random-input \
    --assert-output-fact 1,1001,f32

$CACHE_FILE inceptionv1_quant.nnef.tar.gz inceptionv1_quant.io.npz
$TRACT_RUN $MODELS/inceptionv1_quant.nnef.tar.gz \
    --nnef-tract-core \
    --input-facts-from-bundle $MODELS/inceptionv1_quant.io.npz -O \
    run \
    --input-from-bundle $MODELS/inceptionv1_quant.io.npz \
    --allow-random-input \
    --assert-output-bundle $MODELS/inceptionv1_quant.io.npz

echo $WHITE     audio $NC

$CACHE_FILE ARM-ML-KWS-CNN-M.pb
$TRACT_RUN $MODELS/ARM-ML-KWS-CNN-M.pb \
    -O -i 49,10,f32 --partial \
    --input-node Mfcc \
    run -q \
    --allow-random-input

$CACHE_FILE GRU128KeywordSpotter-v2-10epochs.onnx
$TRACT_RUN $MODELS/GRU128KeywordSpotter-v2-10epochs.onnx \
    -O run -q \
    --allow-random-input \
    --assert-output-fact 1,3,f32

$CACHE_FILE mdl-en-2019-Q3-librispeech.onnx
$TRACT_RUN $MODELS/mdl-en-2019-Q3-librispeech.onnx \
    -O -i S,40,f32 --output-node output --pulse 24 \
    run -q \
    --allow-random-input
    
$CACHE_FILE hey_snips_v4_model17.pb
$TRACT_RUN $MODELS/hey_snips_v4_model17.pb \
    -i S,20,f32 --pulse 8 dump --cost -q \
    --assert-cost "FMA(F32)=2060448,Div(F32)=24576,Buffer(F32)=2920,Params(F32)=222251"

$TRACT_RUN $MODELS/hey_snips_v4_model17.pb -i S,20,f32 \
    dump -q \
    --assert-op-count AddAxis 0

$CACHE_FILE trunet_dummy.nnef.tgz
$TRACT_RUN --nnef-tract-core $MODELS/trunet_dummy.nnef.tgz dump -q

echo $WHITE     LLM $NC

TEMP_ELM=$(mktemp -d)
$CACHE_FILE 2024_06_25_elm_micro_export_with_kv_cache.nnef.tgz
$TRACT_RUN $MODELS/2024_06_25_elm_micro_export_with_kv_cache.nnef.tgz \
    --nnef-tract-core \
    --assert "S>0" --assert "P>0" --assert "S+P<2048" \
    dump -q --nnef $TEMP_ELM/with-asserts.nnef.tgz
$TRACT_RUN --nnef-tract-core $TEMP_ELM/with-asserts.nnef.tgz dump -q
rm -rf $TEMP_ELM

for t in harness/pre-optimized-graphes/*
do
    ( cd $t ; ./runme.sh)
done

(
if aws s3 ls tract-ci-builds/model/private
then
    echo
    echo $WHITE • private tests $NC
    echo
    if [ -n "$CI" ]
    then
        OUTPUT=/dev/null
    else
        set -x
        OUTPUT=/dev/stdout
    fi
    (
    mkdir -p $CACHEDIR
    cd $CACHEDIR
    aws s3 sync s3://tract-ci-builds/model/private private
    for t in `find private -name t.sh`
    do
        ( cd `dirname $t` ; sh ./t.sh )
    done
    ) 2>&1 > $OUTPUT

    echo
    echo $WHITE • benches on full models $NC
    echo

    ./.travis/bundle-entrypoint.sh
fi
)


================================================
FILE: .travis/cost_model_task_build.sh
================================================
#!/bin/sh

set -ex

ARCH=$1
ID=$2

case $ARCH in
    aarch64)
        MUSL_TRIPLE=aarch64-linux-musl
        RUST_TRIPLE=aarch64-unknown-linux-musl
        PLATFORM=aarch64-unknown-linux-musl
    ;;
    armv7)
        MUSL_TRIPLE=armv7l-linux-musleabihf
        RUST_TRIPLE=armv7-unknown-linux-musleabihf
        PLATFORM=armv7-unknown-linux-musl
    ;;
    *)
        exit "Can't build with musl for $ARCH"
    ;;
esac

rustup update
rustup target add $RUST_TRIPLE

#curl -s https://musl.cc/${MUSL_TRIPLE}-cross.tgz | tar zx
curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/${MUSL_TRIPLE}-cross.tgz | tar zx

MUSL_BIN=`pwd`/${MUSL_TRIPLE}-cross/bin
export PATH=$MUSL_BIN:$PATH

export TARGET_CC=$MUSL_BIN/${MUSL_TRIPLE}-gcc

RUST_TRIPLE_ENV=$(echo $RUST_TRIPLE | tr 'a-z-' 'A-Z_')
export CARGO_TARGET_${RUST_TRIPLE_ENV}_CC=$TARGET_CC
export CARGO_TARGET_${RUST_TRIPLE_ENV}_LINKER=$TARGET_CC

( cd linalg/cost_model ; cargo build --target $RUST_TRIPLE --release )

TASK_NAME=cost-model-dataset-$ID

mkdir $TASK_NAME
mv linalg/cost_model/target/${RUST_TRIPLE}/release/cost_model $TASK_NAME
echo "export TIMEOUT=$((86400*4))" > $TASK_NAME/vars
echo "#!/bin/sh" > $TASK_NAME/entrypoint.sh
echo "mkdir product" >> $TASK_NAME/entrypoint.sh
echo "./cost_model ds --size 10000 product/$TASK_NAME.txt" >> $TASK_NAME/entrypoint.sh
# echo "./cost_model ds --size 2000 -k 128 -n 16 product/$TASK_NAME-small-k-tiny-n.txt" >> $TASK_NAME/entrypoint.sh
# echo "./cost_model ds --size 5000 -m 1-512 -k 16,64,256 -n 1-20 product/$TASK_NAME-multiple-k-tiny-n.txt" >> $TASK_NAME/entrypoint.sh
# echo "./cost_model ds --size 1000 -m 1-512 -k 256,1024 -n 1-512 product/$TASK_NAME-bigmn" >> $TASK_NAME/entrypoint.sh
chmod +x $TASK_NAME/entrypoint.sh
tar czf $TASK_NAME.tgz $TASK_NAME

if [ -n "$AWS_ACCESS_KEY_ID" ]
then
    aws s3 cp $TASK_NAME.tgz s3://tract-ci-builds/tasks/$PLATFORM/$TASK_NAME.tgz
fi


================================================
FILE: .travis/cross.sh
================================================
#!/bin/sh

set -ex

ROOT=$(dirname $(dirname $(realpath $0)))
. $ROOT/.travis/ci-system-setup.sh

which cargo-dinghy || ( mkdir -p /tmp/cargo-dinghy
    if [ `arch` = x86_64 -o `arch` = i386 -o `arch` = arm64 ]
    then
         cd /tmp/cargo-dinghy
         if [ `uname` = "Darwin" ]
         then
             NAME=macos
         else
             NAME=linux
         fi
         VERSION=0.8.0
         wget -q https://github.com/snipsco/dinghy/releases/download/$VERSION/cargo-dinghy-$NAME-$VERSION.tgz -O cargo-dinghy.tgz
         tar vzxf cargo-dinghy.tgz --strip-components 1
         mv cargo-dinghy $HOME/.cargo/bin
    else
        cargo install cargo-dinghy
    fi
)

if [ -z "$PLATFORM" -a -n "$1" ]
then
    PLATFORM=$1
fi

case "$PLATFORM" in
    "raspbian")
        [ -e $HOME/cached/raspitools ] || git clone --depth 1 https://github.com/raspberrypi/tools $HOME/cached/raspitools
        TOOLCHAIN=$HOME/cached/raspitools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf
        export RUSTC_TRIPLE=arm-unknown-linux-gnueabihf
        rustup target add $RUSTC_TRIPLE
        echo "[platforms.$PLATFORM]\nrustc_triple='$RUSTC_TRIPLE'\ntoolchain='$TOOLCHAIN'" > .dinghy.toml
        cargo dinghy --platform $PLATFORM build --release -p tract-cli -p example-tensorflow-mobilenet-v2 -p tract-ffi
        ;;

    "aarch64-linux-android"|"armv7-linux-androideabi"|"i686-linux-android"|"x86_64-linux-android")
        case "$PLATFORM" in
            "aarch64-linux-android")
                ANDROID_CPU=aarch64
                RUSTC_TRIPLE=aarch64-linux-android
                ;;
            "armv7-linux-androideabi")
                ANDROID_CPU=armv7
                RUSTC_TRIPLE=armv7-linux-androideabi
                ;;
            "i686-linux-android")
                ANDROID_CPU=i686
                RUSTC_TRIPLE=i686-linux-android
                ;;
            "x86_64-linux-android")
                ANDROID_CPU=x86_64
                RUSTC_TRIPLE=x86_64-linux-android
                ;;
        esac

        export TARGET_AR=ar

        if [ -e /usr/local/lib/android/sdk/ndk-bundle ]
        then
            export ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle
        else
            export ANDROID_SDK_HOME=$HOME/cached/android-sdk
            [ -e $ANDROID_SDK_HOME ] || ./.travis/android-ndk.sh
        fi

        rustup target add $RUSTC_TRIPLE
        cargo dinghy --platform auto-android-$ANDROID_CPU build -p tract-linalg -p tract-ffi
        ;;

    "aarch64-apple-ios")
        rustup target add aarch64-apple-ios
        cargo dinghy --platform auto-ios-aarch64 check -p tract-linalg -p tract-ffi
        ;;

    "aarch64-apple-darwin" | "x86_64-unknown-linux-gnu")
        RUSTC_TRIPLE=$PLATFORM
        rustup target add $RUSTC_TRIPLE
        cargo build --target $RUSTC_TRIPLE -p tract-cli --release
        ;;

    "aarch64-unknown-linux-gnu-stretch" | "armv7-unknown-linux-gnueabihf-stretch" | "x86_64-unknown-linux-gnu-stretch")
        INNER_PLATFORM=${PLATFORM%-stretch}
        (cd .travis/docker-debian-stretch; docker build --tag debian-stretch .)
        docker run -v `pwd`:/tract -w /tract \
            -e CI=true \
            -e SKIP_QEMU_TEST=skip \
            -e PLATFORM=$INNER_PLATFORM debian-stretch \
            ./.travis/cross.sh
        sudo chown -R `whoami` .
        export RUSTC_TRIPLE=$INNER_PLATFORM
        ;;

    "aarch64-unknown-linux-gnu" | "armv6vfp-unknown-linux-gnueabihf" | "armv7-unknown-linux-gnueabihf" | \
        "aarch64-unknown-linux-musl" | "armv7-unknown-linux-musl" | "cortexa53-unknown-linux-musl" )

        case "$PLATFORM" in
            "aarch64-unknown-linux-gnu")
                export ARCH=aarch64
                export QEMU_ARCH=aarch64
                export LIBC_ARCH=arm64
                export TRACT_CPU_AARCH64_KIND=a55
                export RUSTC_TRIPLE=$ARCH-unknown-linux-gnu
                export DEBIAN_TRIPLE=$ARCH-linux-gnu
                ;;
            "armv6vfp-unknown-linux-gnueabihf")
                export ARCH=armv6vfp
                export LIBC_ARCH=armhf
                export QEMU_ARCH=arm
                export QEMU_OPTS="-cpu cortex-a15"
                export RUSTC_TRIPLE=arm-unknown-linux-gnueabihf
                export DEBIAN_TRIPLE=arm-linux-gnueabihf
                ;;
            "armv7-unknown-linux-gnueabihf")
                export ARCH=armv7
                export QEMU_ARCH=arm
                export LIBC_ARCH=armhf
                export QEMU_OPTS="-cpu cortex-a15"
                export RUSTC_TRIPLE=armv7-unknown-linux-gnueabihf
                export DEBIAN_TRIPLE=arm-linux-gnueabihf
                export TARGET_CC=$DEBIAN_TRIPLE-gcc
                export TRACT_CPU_ARM32_NEON=true
                export DINGHY_TEST_ARGS="--env TRACT_CPU_ARM32_NEON=true"
                ;;
            "aarch64-unknown-linux-musl")
                export ARCH=aarch64
                export QEMU_ARCH=aarch64
                export LIBC_ARCH=arm64
                export RUSTC_TRIPLE=$ARCH-unknown-linux-musl
                export DEBIAN_TRIPLE=$ARCH-linux-gnu
                export TRACT_CPU_AARCH64_KIND=a55
                export CUSTOM_TC=`pwd`/aarch64-linux-musl-cross
                [ -d "$CUSTOM_TC" ] || curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/aarch64-linux-musl-cross.tgz | tar zx
                ;;
            "cortexa53-unknown-linux-musl")
                export ARCH=aarch64
                export QEMU_ARCH=aarch64
                export LIBC_ARCH=arm64
                export RUSTC_TRIPLE=$ARCH-unknown-linux-musl
                export DEBIAN_TRIPLE=$ARCH-linux-gnu
                export TRACT_CPU_AARCH64_KIND=a53
                export QEMU_OPTS="-cpu cortex-a53"
                export CUSTOM_TC=`pwd`/aarch64-linux-musl-cross
                [ -d "$CUSTOM_TC" ] || curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/aarch64-linux-musl-cross.tgz | tar zx
                ;;
            "armv7-unknown-linux-musl")
                export ARCH=armv7
                export QEMU_ARCH=arm
                export LIBC_ARCH=armhf
                export RUSTC_TRIPLE=armv7-unknown-linux-musleabihf
                export DEBIAN_TRIPLE=arm-linux-gnueabihf
                export CUSTOM_TC=`pwd`/armv7l-linux-musleabihf-cross
                export TRACT_CPU_ARM32_NEON=true
                export DINGHY_TEST_ARGS="--env TRACT_CPU_ARM32_NEON=true"
                [ -d "$CUSTOM_TC" ] || curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/armv7l-linux-musleabihf-cross.tgz | tar zx
                export TARGET_CFLAGS="-mfpu=neon"
                ;;
            *)
                echo "unsupported platform $PLATFORM"
                exit 1
                ;;
        esac

        mkdir -p $ROOT/target/$RUSTC_TRIPLE
        echo "[platforms.$PLATFORM]\nrustc_triple='$RUSTC_TRIPLE'" > .dinghy.toml
        if [ -n "$DEBIAN_TRIPLE" ]
        then
            PACKAGES="$PACKAGES binutils-$DEBIAN_TRIPLE gcc-$DEBIAN_TRIPLE libc6-dev-$LIBC_ARCH-cross"
            echo "deb_multiarch='$DEBIAN_TRIPLE'" >> .dinghy.toml
        fi

        if [ -n "$CUSTOM_TC" ]
        then
            echo "toolchain='$CUSTOM_TC'" >> .dinghy.toml
        fi

        echo "[script_devices.qemu-$PLATFORM]\nplatform='$PLATFORM'\npath='$ROOT/target/$RUSTC_TRIPLE/qemu-$PLATFORM'" >> .dinghy.toml
        echo "#!/bin/sh\nexe=\$1\nshift\n/usr/bin/qemu-$QEMU_ARCH $QEMU_OPTS -L /usr/$DEBIAN_TRIPLE/ \$exe --test-threads 1 \"\$@\"" > $ROOT/target/$RUSTC_TRIPLE/qemu-$PLATFORM
        chmod +x $ROOT/target/$RUSTC_TRIPLE/qemu-$PLATFORM

        DINGHY_TEST_ARGS="$DINGHY_TEST_ARGS --env PROPTEST_MAX_SHRINK_ITERS=100000000"

        $SUDO apt-get -y install --no-install-recommends qemu-system-arm qemu-user libssl-dev pkg-config $PACKAGES
        rustup target add $RUSTC_TRIPLE
        if [ -z "$SKIP_QEMU_TEST" ]
        then
            qemu-$QEMU_ARCH --version
            cargo dinghy --platform $PLATFORM $DINGHY_TEST_ARGS test --profile opt-no-lto -p tract-linalg -- --nocapture
            cargo dinghy --platform $PLATFORM $DINGHY_TEST_ARGS test --profile opt-no-lto -p tract-core
        fi

        cargo dinghy --platform $PLATFORM $DINGHY_TEST_ARGS check -p tract-ffi
        # keep lto for these two are they're going to devices.
        cargo dinghy --platform $PLATFORM build --release -p tract-cli -p example-tensorflow-mobilenet-v2
        ;;

    wasm32-wasi)
        PLATFORM=wasm32-wasip1
        wasmtime --version

        rustup target add $PLATFORM
        cargo check --target $PLATFORM --features getrandom-js -p tract-onnx -p tract-tensorflow
        RUSTFLAGS='-C target-feature=+simd128' CARGO_TARGET_WASM32_WASIP1_RUNNER=wasmtime \
            cargo test --target=$PLATFORM -p tract-linalg -p tract-core -p test-unit-core
        ;;
    wasm32-*)
        rustup target add $PLATFORM
        cargo check --target $PLATFORM --features getrandom-js -p tract-onnx -p tract-tensorflow
        ;;
    *)
        echo "Don't know what to do for platform: $PLATFORM"
        exit 2
        ;;
esac

if [ -e "target/$RUSTC_TRIPLE/release/tract" ]
then
    export RUSTC_TRIPLE
    TASK_NAME=`.travis/make_bundle.sh`
    echo bench task: $TASK_NAME 
    if [ -n "$AWS_ACCESS_KEY_ID" ]
    then
        aws s3 cp $TASK_NAME.tgz s3://tract-ci-builds/tasks/$PLATFORM/$TASK_NAME.tgz
    fi
fi


================================================
FILE: .travis/debug-tests.sh
================================================
#!/bin/sh

set -ex

if [ -z "$CACHEDIR" ]
then
    CACHEDIR=`dirname $0`/../.cached
fi

# useful as debug_asserts will come into play
cargo test -p tract-core
cargo test -p test-onnx-core -p test-nnef-cycle -p test-unit-core


================================================
FILE: .travis/docker-debian-stretch/Dockerfile
================================================
FROM debian:stretch
COPY sources.list /etc/apt/sources.list


================================================
FILE: .travis/docker-debian-stretch/sources.list
================================================
deb http://archive.debian.org/debian/ stretch contrib main non-free
deb http://archive.debian.org/debian stretch-backports main
deb http://archive.debian.org/debian-security stretch/updates main


================================================
FILE: .travis/examples.sh
================================================
#!/bin/sh

WHITE='\033[1;37m'
NC='\033[0m' # No Color

set -e

ROOT=$(dirname $(dirname $(realpath $0)))
. $ROOT/.travis/ci-system-setup.sh

for t in `find examples -name ci.sh`
do
    df -h
    ex=$(dirname $t)
    echo ::group:: $ex
    echo $WHITE $ex $NC
    ( cd $ex ; sh ./ci.sh )
    if [ -n "$CI" ]
    then
        cargo clean
    fi
    echo ::endgroup::
done


================================================
FILE: .travis/llm-expectations-541
================================================
Qwen--Qwen3-1.7B-f16f16.p0s100.arm64.cpu 0.96
Qwen--Qwen3-1.7B-f16f16.p0s100.arm64.metal 0.96
Qwen--Qwen3-1.7B-f16f16.p0s100.x86_64.cpu 0.99
Qwen--Qwen3-1.7B-f16f16.p0s100.x86_64.cuda 0.99
Qwen--Qwen3-1.7B-f16f16.p50s50.arm64.cpu 0.97
Qwen--Qwen3-1.7B-f16f16.p50s50.arm64.metal 0.97
Qwen--Qwen3-1.7B-f16f16.p50s50.x86_64.cpu 0.99
Qwen--Qwen3-1.7B-f16f16.p50s50.x86_64.cuda 0.99
Qwen--Qwen3-1.7B-f16f16.p99s1.arm64.cpu 0.99
Qwen--Qwen3-1.7B-f16f16.p99s1.arm64.metal 0.99
Qwen--Qwen3-1.7B-f16f16.p99s1.x86_64.cpu 0.99
Qwen--Qwen3-1.7B-f16f16.p99s1.x86_64.cuda 0.99
Qwen--Qwen3-1.7B-q40ef16.p0s100.arm64.cpu 0.92
Qwen--Qwen3-1.7B-q40ef16.p0s100.arm64.metal 0.98
Qwen--Qwen3-1.7B-q40ef16.p0s100.x86_64.cpu 0.99
Qwen--Qwen3-1.7B-q40ef16.p0s100.x86_64.cuda 0.92
Qwen--Qwen3-1.7B-q40ef16.p50s50.arm64.cpu 0.96
Qwen--Qwen3-1.7B-q40ef16.p50s50.arm64.metal 0.99
Qwen--Qwen3-1.7B-q40ef16.p50s50.x86_64.cpu 0.99
Qwen--Qwen3-1.7B-q40ef16.p50s50.x86_64.cuda 0.98
Qwen--Qwen3-1.7B-q40ef16.p99s1.arm64.cpu 0.97
Qwen--Qwen3-1.7B-q40ef16.p99s1.arm64.metal 0.99
Qwen--Qwen3-1.7B-q40ef16.p99s1.x86_64.cpu 0.99
Qwen--Qwen3-1.7B-q40ef16.p99s1.x86_64.cuda 0.96
Qwen--Qwen3-8B-f16f16.p0s100.arm64.cpu 0.94
Qwen--Qwen3-8B-f16f16.p0s100.arm64.metal 0.95
Qwen--Qwen3-8B-f16f16.p0s100.x86_64.cpu 0.99
Qwen--Qwen3-8B-f16f16.p0s100.x86_64.cuda 0.99
Qwen--Qwen3-8B-f16f16.p50s50.arm64.cpu 0.94
Qwen--Qwen3-8B-f16f16.p50s50.arm64.metal 0.95
Qwen--Qwen3-8B-f16f16.p50s50.x86_64.cpu 0.99
Qwen--Qwen3-8B-f16f16.p50s50.x86_64.cuda 0.99
Qwen--Qwen3-8B-f16f16.p99s1.arm64.cpu 0.96
Qwen--Qwen3-8B-f16f16.p99s1.arm64.metal 0.99
Qwen--Qwen3-8B-f16f16.p99s1.x86_64.cpu 0.99
Qwen--Qwen3-8B-f16f16.p99s1.x86_64.cuda 0.99
Qwen--Qwen3-8B-q40ef16.p0s100.arm64.cpu 0.86
Qwen--Qwen3-8B-q40ef16.p0s100.arm64.metal 0.97
Qwen--Qwen3-8B-q40ef16.p0s100.x86_64.cpu 0.99
Qwen--Qwen3-8B-q40ef16.p0s100.x86_64.cuda 0.96
Qwen--Qwen3-8B-q40ef16.p50s50.arm64.cpu 0.98
Qwen--Qwen3-8B-q40ef16.p50s50.arm64.metal 0.99
Qwen--Qwen3-8B-q40ef16.p50s50.x86_64.cpu 0.99
Qwen--Qwen3-8B-q40ef16.p50s50.x86_64.cuda 0.99
Qwen--Qwen3-8B-q40ef16.p99s1.arm64.cpu 0.96
Qwen--Qwen3-8B-q40ef16.p99s1.arm64.metal 0.98
Qwen--Qwen3-8B-q40ef16.p99s1.x86_64.cpu 0.99
Qwen--Qwen3-8B-q40ef16.p99s1.x86_64.cuda 0.96
apple--OpenELM-270M-f16f16.p0s100.arm64.cpu 0.98
apple--OpenELM-270M-f16f16.p0s100.arm64.metal 0.99
apple--OpenELM-270M-f16f16.p0s100.x86_64.cpu 0.99
apple--OpenELM-270M-f16f16.p0s100.x86_64.cuda 0.98
apple--OpenELM-270M-f16f16.p50s50.arm64.cpu 0.92
apple--OpenELM-270M-f16f16.p50s50.arm64.metal 0.92
apple--OpenELM-270M-f16f16.p50s50.x86_64.cpu 0.99
apple--OpenELM-270M-f16f16.p50s50.x86_64.cuda 0.99
apple--OpenELM-270M-f16f16.p99s1.arm64.cpu 0.97
apple--OpenELM-270M-f16f16.p99s1.arm64.metal 0.99
apple--OpenELM-270M-f16f16.p99s1.x86_64.cpu 0.99
apple--OpenELM-270M-f16f16.p99s1.x86_64.cuda 0.99
apple--OpenELM-270M-q40ef16.p0s100.arm64.cpu 0.99
apple--OpenELM-270M-q40ef16.p0s100.arm64.metal 0.99
apple--OpenELM-270M-q40ef16.p0s100.x86_64.cpu 0.99
apple--OpenELM-270M-q40ef16.p0s100.x86_64.cuda 0.95
apple--OpenELM-270M-q40ef16.p50s50.arm64.cpu 0.97
apple--OpenELM-270M-q40ef16.p50s50.arm64.metal 0.95
apple--OpenELM-270M-q40ef16.p50s50.x86_64.cpu 0.99
apple--OpenELM-270M-q40ef16.p50s50.x86_64.cuda 0.94
apple--OpenELM-270M-q40ef16.p99s1.arm64.cpu 0.99
apple--OpenELM-270M-q40ef16.p99s1.arm64.metal 0.99
apple--OpenELM-270M-q40ef16.p99s1.x86_64.cpu 0.99
apple--OpenELM-270M-q40ef16.p99s1.x86_64.cuda 0.89
meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.arm64.cpu 0.96
meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.arm64.metal 0.92
meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.x86_64.cpu 0.99
meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.x86_64.cuda 0.99
meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.arm64.cpu 0.95
meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.arm64.metal 0.95
meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.x86_64.cpu 0.98
meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.x86_64.cuda 0.98
meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.arm64.cpu 0.97
meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.arm64.metal 0.99
meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.x86_64.cuda 0.99
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.arm64.cpu 0.93
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.arm64.metal 0.99
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.x86_64.cpu 0.97
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.x86_64.cuda 0.97
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.arm64.cpu 0.93
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.arm64.metal 0.98
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.x86_64.cpu 0.99
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.x86_64.cuda 0.99
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.arm64.cpu 0.97
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.arm64.metal 0.99
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.x86_64.cuda 0.97
meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.arm64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.x86_64.cuda 0.99
meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.arm64.cpu 0.96
meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.arm64.metal 0.96
meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.x86_64.cpu 0.98
meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.x86_64.cuda 0.97
meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.arm64.cpu 0.97
meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.x86_64.cuda 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.arm64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.x86_64.cuda 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.arm64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.arm64.metal 0.96
meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.x86_64.cuda 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.arm64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.x86_64.cuda 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.arm64.cpu 0.97
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.x86_64.cuda 0.98
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.arm64.cpu 0.86
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.x86_64.cuda 0.94
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.arm64.cpu 0.98
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.arm64.metal 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.x86_64.cuda 0.99
meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.arm64.cpu 0.98
meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.arm64.metal 0.97
meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.x86_64.cpu 0.99
meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.x86_64.cuda 0.99
meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.arm64.cpu 0.96
meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.arm64.metal 0.98
meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.x86_64.cpu 0.99
meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.x86_64.cuda 0.99
meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.arm64.cpu 0.96
meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.arm64.metal 0.98
meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.x86_64.cuda 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.arm64.cpu 0.96
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.arm64.metal 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.x86_64.cpu 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.x86_64.cuda 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.arm64.cpu 0.97
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.arm64.metal 0.98
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.x86_64.cpu 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.x86_64.cuda 0.97
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.arm64.cpu 0.93
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.arm64.metal 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.x86_64.cpu 0.99
meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.x86_64.cuda 0.94


================================================
FILE: .travis/make_bundle.sh
================================================
#!/bin/sh

set -ex

TRAVIS_COMMIT=${GITHUB_SHA:-dummy-commit-id}
BRANCH=$(echo $GITHUB_HEAD_REF | tr '/' '_')
BRANCH=${BRANCH:-main}
PLATFORM=${PLATFORM:-dummy-platform}

dates=`date -u +"%Y%m%dT%H%M%S %s"`
date_iso=`echo $dates | cut -f 1 -d ' '`
timestamp=`echo $dates | cut -f 2 -d ' '`
TASK_NAME=tract-$date_iso
mkdir -p $TASK_NAME
echo "export TASK_NAME=$TASK_NAME" > $TASK_NAME/vars
echo "export TRAVIS_COMMIT=$TRAVIS_COMMIT" >> $TASK_NAME/vars
TRAVIS_BRANCH_SANE=`echo $BRANCH | tr '/' '_'`
echo "export TRAVIS_BRANCH_SANE=$TRAVIS_BRANCH_SANE" >> $TASK_NAME/vars
echo "export DATE_ISO=$date_iso" >> $TASK_NAME/vars
echo "export TIMESTAMP=$timestamp" >> $TASK_NAME/vars
echo "export PLATFORM=$PLATFORM" >> $TASK_NAME/vars

if which gstat > /dev/null
then
    STAT=gstat
else
    STAT=stat
fi

touch sizes
for bin in example-tensorflow-mobilenet-v2 tract
do
    if [ -e target/$RUSTC_TRIPLE/release/$bin ]
    then

        binary_size_cli=$($STAT -c "%s" target/$RUSTC_TRIPLE/release/$bin)
        token=$(echo $bin | tr '-' '_')
        if [ "$bin" = "tract" ]
        then
            token=cli
        fi
        echo binary_size.$token $binary_size_cli >> sizes
    fi
done

cp target/$RUSTC_TRIPLE/release/tract $TASK_NAME
cp sizes $TASK_NAME
cp .travis/bundle-entrypoint.sh $TASK_NAME/entrypoint.sh
tar czf $TASK_NAME.tgz $TASK_NAME/

echo $TASK_NAME


================================================
FILE: .travis/minion.sh
================================================
#!/bin/bash

set -ex
. $HOME/.minionrc

exec 200>$LOCKFILE || exit 1
flock -n 200 || { echo "WARN: flock() failed." >&2; exit 0; }

mkdir -p $WORKDIR/taskdone/
for task in `aws s3 ls $S3PATH_TASKS/$PLATFORM/ | awk '{ print $4; }'`
do
    cd $HOME
    task_name="${task%.tgz}"
    if [ -e $WORKDIR/taskdone/$task_name ]
    then
        continue
    fi
    echo considering task $task
    rm -rf $WORKDIR/current
    mkdir -p $WORKDIR/current
    cd $WORKDIR/current
    aws s3 cp s3://$S3PATH_TASKS/$PLATFORM/$task .
    tar zxf $task
    . $task_name/vars
    cd $task_name
    (
        ./entrypoint.sh 2> stderr.log > stdout.log || true
    )
    gzip stderr.log
    gzip stdout.log
    aws s3 cp stderr.log.gz s3://$S3PATH_RESULTS/$MINION_ID/$task_name/stderr.log.gz
    aws s3 cp stdout.log.gz s3://$S3PATH_RESULTS/$MINION_ID/$task_name/stdout.log.gz
    touch $WORKDIR/taskdone/$task_name
    cat metrics | sed "s/^/$GRAPHITE_PREFIX.$PLATFORM.$MINION_ID.$TRAVIS_BRANCH_SANE./;s/$/ $TIMESTAMP/" \
        | tr '-' '_' > graphite
    if nc --version
    then
	# GNU
	export GRAPHITE_HOST
	export GRAPHITE_PORT
	cat graphite | while read line
	do
		echo $line | nc -c -w 1 $GRAPHITE_HOST $GRAPHITE_PORT
	done
    else
        # BSD
	nc -q 5 $GRAPHITE_HOST $GRAPHITE_PORT < graphite
    fi

done

sleep 1

echo "DONE"


================================================
FILE: .travis/minionrc
================================================
MINION_ID=
LOCKFILE=/tmp/minion-lock
PLATFORM=raspbian
GRAPHITE_HOST=graphite-proxy.snips.net
GRAPHITE_PORT=2003
GRAPHITE_PREFIX=tract

S3PATH_TASKS=tract-ci-builds/tasks
S3PATH_LOGS=tract-ci-builds/logs
S3PATH_RESULTS=tract-ci-builds/logs
WORKDIR=$HOME/tract-minion
CACHEDIR=$WORKDIR/cache


================================================
FILE: .travis/native.sh
================================================
#!/bin/sh

set -ex

if [ -z "$RUSTUP_TOOLCHAIN" ]
then
    export RUSTUP_TOOLCHAIN=1.91.0
fi

rustup update

cargo update
cargo check --all-targets --workspace --exclude test-tflite --exclude test-metal --exclude tract-metal

./.travis/onnx-tests.sh
./.travis/regular-tests.sh
./.travis/test-harness.sh

if [ -n "$CI" ]
then
    cargo clean
fi

if [ `uname` = "Linux" ]
then
    ./.travis/tflite.sh
fi

if [ -n "$CI" ]
then
    cargo clean
fi
if nvidia-smi > /dev/null 2>&1
then
    cargo test -p tract-cuda --lib
    cargo test -p test-cuda
fi

./.travis/cli-tests.sh


================================================
FILE: .travis/onnx-tests.sh
================================================
#!/bin/sh

set -ex

ROOT=$(dirname $(realpath $0))/..
. $ROOT/.travis/ci-system-setup.sh

opset=onnx_"${1:-1_13_0}"

cargo -q test -p test-unit-core $CARGO_EXTRA -q 
cargo -q test -p test-onnx-core $CARGO_EXTRA -q --no-default-features --features $opset
cargo -q test -p test-nnef-cycle $CARGO_EXTRA -q --no-default-features


================================================
FILE: .travis/regular-tests.sh
================================================
#!/bin/sh

set -e
set -x

cd $(dirname $0)

./test-published-crates.sh
if [ -n "$CI" ]
then
    cargo clean
fi
./test-rt.sh
if [ -n "$CI" ]
then
    cargo clean
fi


================================================
FILE: .travis/run-bundle.sh
================================================
#!/bin/sh

set -ex

BUNDLE_NAME=$1

tar zxf $BUNDLE_NAME.tgz
(
    cd $BUNDLE_NAME
    . ./vars
    ./entrypoint.sh
)
# rm -rf "$BUNDLE_NAME" "$BUNDLE_NAME.tgz"


================================================
FILE: .travis/run_all.sh
================================================
#!/bin/sh

set -ex

`dirname $0`/native.sh
cd `dirname $0`/../examples
for i in *
do
    (cd $i; cargo test --release)
done


================================================
FILE: .travis/setup-sccache.sh
================================================
#!/bin/sh

set -ex

export SCCACHE_DIR=$HOME/.cache/sccache
export SCCACHE_CACHE_SIZE=2G

if [ -n "$GITHUB_ENV" ]
then
    echo "SCCACHE_DIR=$HOME/.cache/sccache" >> $GITHUB_ENV
    echo "SCCACHE_CACHE_SIZE=2G" >> $GITHUB_ENV
    echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV
    echo "$HOME/.local/bin" >> $GITHUB_PATH
fi

LINK=https://github.com/mozilla/sccache/releases/download
SCCACHE_VERSION=v0.10.0

echo $HOME
if [ `uname` = "Linux" ]
then
  SCCACHE_FILE=sccache-$SCCACHE_VERSION-x86_64-unknown-linux-musl
else
  SCCACHE_FILE=sccache-$SCCACHE_VERSION-x86_64-apple-darwin
fi

mkdir -p $SCCACHE_DIR
mkdir -p $HOME/.local/bin
for i in 1 2 3 4 5
do
    curl -L "$LINK/$SCCACHE_VERSION/$SCCACHE_FILE.tar.gz" | tar xz && break
    sleep 15
done
mv -f $SCCACHE_FILE/sccache $HOME/.local/bin/sccache
chmod +x $HOME/.local/bin/sccache


================================================
FILE: .travis/test-harness.sh
================================================
#!/bin/sh

WHITE='\033[1;37m'
NC='\033[0m' # No Color

if [ -e /proc/cpuinfo ]
then
    grep "^flags" /proc/cpuinfo | head -1 | \
        grep --color=always '\(s\?sse[0-9_]*\|fma\|f16c\|avx[^ ]*\)'
fi

set -x

ROOT=$(dirname $0)/..
. $ROOT/.travis/ci-system-setup.sh

set -e

if [ `arch` = "x86_64" -a "$RUST_VERSION" = "stable" ]
then
    ALL_FEATURES=--all-features
fi

set +x

cargo -q test $CARGO_EXTRA -q -p tract
cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p core-proptest-pulse $ALL_FEATURES
cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p nnef-inceptionv3 $ALL_FEATURES
cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p tf-inceptionv3 $ALL_FEATURES
cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p tf-mobilenet-v2 $ALL_FEATURES
cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p tfl-mobilenet-v2-q $ALL_FEATURES


================================================
FILE: .travis/test-llm.sh
================================================
#!/bin/bash

set -e
set -o pipefail

export LC_ALL=C

ROOT=$(dirname $(dirname $(realpath $0)))
. $ROOT/.travis/ci-system-setup.sh

model=$1
q=$2
device=$3
if [ -z "$device" ]
then
    device=cpu
fi
generation=541

if [ "$model" = "all" ]
then
    for m in \
        openelm-270M \
	llama-3.2-1B-instruct \
	llama-3.2-3B-instruct \
	llama-3.1-8B-instruct \
	qwen3-1.7B \
	qwen3-8B
    do
        $0 $m $2 $device
    done
    exit 0
fi

model=$(echo $model | tr 'A-Z' 'a-z' | tr -d "_.-")

for m in \
    apple--OpenELM-270M \
    meta-llama--Llama-3.2-1B-Instruct \
    meta-llama--Llama-3.2-3B-Instruct \
    meta-llama--Llama-3.1-8B-Instruct \
    Qwen--Qwen3-1.7B \
    Qwen--Qwen3-8B
do
    norm=$(echo $m | tr "A-Z" "a-z" | tr -d "_.-")
    if [[ "$norm" == *"$model"* ]];
    then
        model_id=$m
    fi
done

if [ -z "$model_id" ]
then
    echo "No model matched"
fi

if [ "$q" = "all" ]
then
    for q in q40ef16 f16f16 f32f32
    do
        $0 $1 $q $device
    done
    exit 0
fi

id=$model_id-$q

if which gstat > /dev/null
then
    STAT=gstat
else
    STAT=stat
fi

nnef=llm/$generation/$id/$id.nnef.tgz

$CACHE_FILE $nnef

if [ -e $MODELS/$nnef ]
then
    size=$($STAT -c %s $MODELS/$nnef)
else
    size=$(curl -s -I $MODELS/$nnef | grep Content-Length | cut -d " " -f 2 | tr -cd 0123456789)
fi

if which nvidia-smi > /dev/null
then
    vram=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | awk '{print $1*1024*1024}')
    if [ $vram -lt $size ]
    then
        echo "::warning::Skipping this test, not enough VRAM."
        exit 0
    fi
fi

$TRACT_RUN -v --nnef-tract-transformers $MODELS/$nnef -O --readings  --assert-maximal-mm-quality-cost 0 $TRACT_EXTRA_ARGS dump -q
alloc_max=$(cat readings.out | tail -n +2 | awk '{print $10-$11}' | sort -n | tail -1)
ratio=$((alloc_max * 100 / size))

echo "  ###########################################"
echo "      Alloc max to model size ratio: ${ratio}%."
echo "  ###########################################"

limit=125

if [ $ratio -gt $limit ]
then
    echo "RSZ max is ${ratio}% the size of the unzipped model!"
    exit 1
fi

for t in p0s100 p50s50 p99s1 
do
    npz=llm/$generation/$id/$id.$t.io.npz
    $CACHE_FILE $npz

    key=$id.$t.$(arch).$device
    expectations="$ROOT/.travis/llm-expectations-541"

    echo
    echo "      Key: $key"
    echo

    case $device in
        cuda)
            DEVICE="--cuda"
            GPU_ASSERT="--assert-op-only Cuda*,Gpu*,DeviceSync*,Const,Source,Range,Gather"
        ;;
        metal)
            DEVICE="--metal"
            GPU_ASSERT="--assert-op-only Metal*,Gpu*,DeviceSync*,Const,Source,Range,Gather"
        ;;
        *)
            GPU_ASSERT=""
        ;;
    esac

    if [ -n "$RESET" ]
    then
        $TRACT_RUN -v $MODELS/$nnef $TRACT_EXTRA_ARGS \
            --llm --transform unfold-kv-cache -O $DEVICE run --prompt-chunk-size 60 --allow-missing-outputs \
            --input-from-npz $MODELS/$npz \
            --assert-output-bundle $MODELS/$npz \
            --assert-llm-rbo 0.0 \
            $approx --allow-float-casts $GPU_ASSERT 2>&1 | tee output.txt
        found=$(cat output.txt | perl -MPOSIX=floor -ne 'printf("%.2f\n", floor($1 * 100) / 100) if /LLM RBO:\s+([\d.]+)/')
        ( ( grep -v $key $expectations || true) ; echo $key $found) | sort > $expectations.tmp
        mv $expectations.tmp $expectations
    elif [ -n "$RELAX" ]
    then
        prior=$(grep $key $expectations | cut -f 2 -d ' ')
        $TRACT_RUN -v $MODELS/$nnef $TRACT_EXTRA_ARGS \
            --llm --transform unfold-kv-cache -O $DEVICE run --prompt-chunk-size 60 --allow-missing-outputs \
            --input-from-npz $MODELS/$npz \
            --assert-output-bundle $MODELS/$npz \
            --assert-llm-rbo 0.0 \
            $approx --allow-float-casts $GPU_ASSERT 2>&1 | tee output.txt
        found=$(cat output.txt | perl -MPOSIX=floor -ne 'printf("%.2f\n", floor($1 * 100) / 100) if /LLM RBO:\s+([\d.]+)/')
        if [ -n "$prior" ] && perl -e 'exit($ARGV[0] <= $ARGV[1] ? 1 : 0)' "$found" "$prior"
        then
            found=$prior
        fi
        ( ( grep -v $key $expectations || true) ; echo $key $found) | sort > $expectations.tmp
        mv $expectations.tmp $expectations
    else # test !
        expectation=$(grep $key $expectations | cut -f 2 -d ' ')
        $TRACT_RUN -v $MODELS/$nnef $TRACT_EXTRA_ARGS \
            --llm --transform unfold-kv-cache -O $DEVICE run --prompt-chunk-size 60 --allow-missing-outputs \
            --input-from-npz $MODELS/$npz \
            --assert-output-bundle $MODELS/$npz \
            --assert-llm-rbo $expectation \
            $approx --allow-float-casts $GPU_ASSERT
    fi

done


================================================
FILE: .travis/test-published-crates.sh
================================================
#!/bin/sh

WHITE='\033[1;37m'
NC='\033[0m' # No Color

if [ -e /proc/cpuinfo ]
then
    grep "^flags" /proc/cpuinfo | head -1 | \
        grep --color=always '\(s\?sse[0-9_]*\|fma\|f16c\|avx[^ ]*\)'
fi

set -x

ROOT=$(dirname $0)/..
. $ROOT/.travis/ci-system-setup.sh

set -e

if [ `arch` = "x86_64" -a "$RUST_VERSION" = "stable" ]
then
    ALL_FEATURES=--all-features
fi

set +x
cargo update

echo
echo "$WHITE ### tract ### $NC"
echo

cargo -q test $CARGO_EXTRA -q -p tract

for c in data linalg core nnef hir onnx pulse onnx-opl pulse-opl
do
    echo
    echo "$WHITE ### $c ### $NC"
    echo
    cargo -q test $CARGO_EXTRA -q -p tract-$c
done

if [ `uname` = "Darwin" -a -z "$CI" ]
then
    echo
    echo "$WHITE ### metal ### $NC"
    echo
    cargo -q test $CARGO_EXTRA -q -p tract-metal
fi

if command -v nvcc >/dev/null 2>&1 && [ -z "$CI" ]
then
    echo
    echo "$WHITE ### cuda ### $NC"
    echo
    cargo -q test -q -p tract-cuda
fi

$ROOT/api/proxy/ci.sh

# doc test are not finding libtensorflow.so
if ! cargo -q test $CARGO_EXTRA -q -p tract-tensorflow --lib $ALL_FEATURES
then
    # this crate triggers an incremental bug on nightly.
    cargo clean -p tract-tensorflow
    cargo -q test $CARGO_EXTRA -q -p tract-tensorflow --lib $ALL_FEATURES
fi


================================================
FILE: .travis/test-rt.sh
================================================
#!/bin/sh

WHITE='\033[1;37m'
NC='\033[0m' # No Color

if [ -e /proc/cpuinfo ]
then
    grep "^flags" /proc/cpuinfo | head -1 | \
        grep --color=always '\(s\?sse[0-9_]*\|fma\|f16c\|avx[^ ]*\)'
fi

set -x

ROOT=$(dirname $0)/..
. $ROOT/.travis/ci-system-setup.sh

set -e

if [ `arch` = "x86_64" -a "$RUST_VERSION" = "stable" ]
then
    ALL_FEATURES=--all-features
fi

set +x

cd $ROOT
for c in test-rt/test*; do
    case "$c" in
        test-rt/test-tflite)
            echo "$WHITE ### $c ### IGNORED $NC"
            continue
            ;;
        test-rt/test-metal)
            if [ "$(uname)" != "Darwin" ] || [ -n "$CI" ]; then
                echo "$WHITE ### $c ### IGNORED $NC"
                continue
            fi
            ;;
        test-rt/test-cuda)
            if ! command -v nvcc >/dev/null; then
                echo "$WHITE ### $c ### IGNORED $NC"
                continue
            fi
            ;;
    esac

    echo
    echo "$WHITE ### $c ### $NC"
    echo
    (cd "$c" && cargo test -q $CARGO_EXTRA)

    if [ -n "$CI" ]; then
        df -h
        cargo clean
    fi
done


================================================
FILE: .travis/tf.sh
================================================
#!/bin/sh

set -ex

if [ -z "$CACHEDIR" ]
then
    CACHEDIR=`dirname $0`/../.cached
fi


(cd tensorflow; cargo test --release --features conform)


================================================
FILE: .travis/tflite/Dockerfile.tensorflow-aarch64
================================================
# vim: set syntax=Dockerfile:

FROM tensorflow/tensorflow:devel

RUN apt-get update ; apt-get upgrade -y
RUN apt-get install -y crossbuild-essential-arm64
COPY linux_makefile.inc /tensorflow_src/tensorflow/lite/tools/make/targets/linux_makefile.inc
COPY disable_nnapi.patch /tensorflow_src

WORKDIR /tensorflow_src
RUN patch -p1 < disable_nnapi.patch


================================================
FILE: .travis/tflite/Dockerfile.tensorflow-official-rpi
================================================
# vim: set syntax=Dockerfile:

FROM tensorflow/tensorflow:nightly-devel

RUN apt-get update ; apt-get upgrade -y
RUN apt-get -y install git crossbuild-essential-armhf

WORKDIR /tensorflow
RUN ./tensorflow/lite/tools/make/download_dependencies.sh


================================================
FILE: .travis/tflite/Dockerfile.tensorflow-rpitools
================================================
# vim: set syntax=Dockerfile:

FROM tensorflow/tensorflow:nightly-devel

RUN apt-get update ; apt-get upgrade -y
RUN apt-get -yy  install git

WORKDIR /tensorflow
RUN ./tensorflow/lite/tools/make/download_dependencies.sh

RUN git clone https://github.com/raspberrypi/tools /raspitools
ENV PATH=/raspitools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin:$PATH


================================================
FILE: .travis/tflite/build_tflite_aarch64.sh
================================================
#!/bin/sh

set -ex
mkdir -p result

docker build -f Dockerfile.tensorflow-aarch64 --tag tensorflow-aarch64 .
docker run --rm -it \
    -v `pwd`/result:/result \
    tensorflow-aarch64 \
    sh -c "
         cd /tensorflow_src ;
         export EXTRA_CXXFLAGS=-flax-vector-conversions 
         export DISABLE_NNAPI=true
         ./tensorflow/lite/tools/make/download_dependencies.sh
         make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=linux TARGET_ARCH=aarch64 ;
         cp /tensorflow_src/tensorflow/lite/tools/make/gen/linux_aarch64/bin/benchmark_model /result/tflite_benchmark_model_aarch64
     "


================================================
FILE: .travis/tflite/build_tflite_raspbian.sh
================================================
#!/bin/sh

set -ex
mkdir -p result

# build pseudo-rpi official tensorflow, https://www.tensorflow.org/lite/rpi, only works on pi3

docker build -f Dockerfile.tensorflow-official-rpi --tag tensorflow-official-rpi .
docker run --rm \
    -e CC_PREFIX=arm-linux-gnueabihf- \
    -v `pwd`/result:/result \
    tensorflow-official-rpi \
    sh -c "
        make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=rpi TARGET_ARCH=armv7l;
        cp /tensorflow/tensorflow/lite/tools/make/gen/rpi_armv7l/bin/benchmark_model /result/tflite_benchmark_model_official_rpi
    "

# build with rpi tools (works on rpi0, 1 and 2)

docker build -f Dockerfile.tensorflow-rpitools --tag tensorflow-rpitools .
docker run --rm \
    -e CC_PREFIX=arm-linux-gnueabihf- \
    -v `pwd`/result:/result \
    tensorflow-rpitools \
    sh -c "
        make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=rpi TARGET_ARCH=armv6;
        cp /tensorflow/tensorflow/lite/tools/make/gen/rpi_armv6/bin/benchmark_model /result/tflite_benchmark_model_rpitools
    "


================================================
FILE: .travis/tflite/convert_all.sh
================================================

run_in_tf_docker() {
    docker run --rm -v $HOME/.cache:/models -it tensorflow/tensorflow:nightly-devel sh -c "$@"
}

# # inception v3 
# run_in_tf_docker "cd /models ; tflite_convert \
#     --graph_def_file inception_v3_2016_08_28_frozen.pb \
#     --input_arrays input \
#     --input_shapes 1,299,299,3 \
#     --output_arrays InceptionV3/Predictions/Reshape_1 \
#     --output_format tflite \
#     --output_file inception_v3_2016_08_28_frozen.tflite"
# 
# # arm ml kws
# run_in_tf_docker "cd /models ; tflite_convert \
#     --graph_def_file ARM-ML-KWS-CNN-M.pb \
#     --input_arrays Mfcc \
#     --input_shapes 1,49,10 \
#     --output_arrays labels_softmax \
#     --output_format tflite \
#     --output_file ARM-ML-KWS-CNN-M.tflite"

# hey_snips v1
run_in_tf_docker "cd /models ; tflite_convert \
    --graph_def_file hey_snips_v1.pb \
    --input_arrays inputs \
    --input_shapes 80,40 \
    --output_arrays logits \
    --output_format tflite \
    --output_file hey_snips_v1.tflite"

# hey_snips v3.1
# (tflite does not support 1D dil)
# run_in_tf_docker "cd /models ; tflite_convert \
#     --graph_def_file hey_snips_v3.1.pb \
#     --input_arrays inputs \
#     --input_shapes 40,40 \
#     --output_arrays logits \
#     --output_format tflite \
#     --output_file hey_snips_v3.1.tflite"
# 
# # hey_snips v4 model17, 2seconds
# (tflite does not support AddN)
# run_in_tf_docker "cd /models ; tflite_convert \
#     --graph_def_file hey_snips_v4_model17.pb \
#     --input_arrays input_node \
#     --input_shapes 200,20 \
#     --output_arrays output_node \
#     --output_format tflite \
#     --output_file hey_snips_v4_model17.tflite"


================================================
FILE: .travis/tflite/linux_makefile.inc
================================================
# Settings for Linux.
ifeq ($(TARGET), linux)
  CXXFLAGS += \
    -fPIC \
    -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
    -pthread
  # TODO(petewarden): In the future we may want to add architecture-specific
  # flags like -msse4.2
	LIBS := -lstdc++ -lpthread -lm -ldl
    TARGET_ARCH=aarch64
  TARGET_TOOLCHAIN_PREFIX := aarch64-linux-gnu-
endif


================================================
FILE: .travis/tflite/run_all.sh
================================================
#!/bin/sh

./benchmark_model --graph=inception_v3_2016_08_28_frozen.tflite


================================================
FILE: .travis/tflite.sh
================================================
#!/bin/sh

set -ex

ROOT=$(dirname $(dirname $(realpath $0)))
. $ROOT/.travis/ci-system-setup.sh

if [ `uname` = "Darwin" ]
then
    brew install coreutils
fi
if [ -n "$GITHUB_ACTIONS" ]
then
    pip install numpy
fi

cargo check -p tract-tflite
cargo -q test -p test-tflite $CARGO_EXTRA -q 


================================================
FILE: .travis/travis.sh
================================================
#!/bin/sh

set -ex

if [ -z "$PLATFORM" ]
then
  .travis/native.sh
else
  .travis/cross.sh
fi


================================================
FILE: .travis.yml
================================================
language: rust
dist: trusty
cache:
  # - cargo
  #- directories:
  #- $HOME/cached
addons:
  apt:
    packages:
    - awscli
    - jshon
    - moreutils
env:
  global:
  - RUST_BACKTRACE=1
  - AWS_DEFAULT_REGION=us-east-1
    #  - CACHEDIR=$HOME/cached
  - secure: DdQqS4fFDevY/JknS0M6CwW+ooj6ZhKmBk4lFliG3WQgjYietbPMCkiHPEC8Df8U07l54+8G4j+sZJJ4VYQY8WQcuKGWt9/ALjoYHYZ1Dlw0KW0rRJ1BZWLUh8MwpJ5pxHSWfl1a8QqTy/0mI3eJ8iVIDxiZR6b1fGwPYDkNXyqnfOvz31X1aMyoGslNkP7LitObCdBJyzobPlvWafGCQLf7oLbK4a5Ysyc9T607n1B0okco3Te2ztahEOwNoxmHlEFRojM6ZmAmo8LzwkCdFQNjHw+mQ3vScC8gpngi61G9U35luAfweMt30m1UmecVGADeEmwSnJLeAHo5HYKT5n6Q1begxlMGMxezinstTHUX6G8EhEumO/ii3PAscFJ6C+VfciGA7JDS2ICghygKSIqQvVeugNR0glW97lhszLnoXCNY45siknAZVTVqwhgn1ctTQiPWqGVuQp+m9NYIoQAYUpOFNo1mEtEjurHOk96Q0XjRJMfUSLOB5KfPakk/ghEY1ZYDDB9wi134f3Z5hLw1FGj/Uiw9LcnEIcORV2o8fbFrb2IgGsKQbRtdPEJ92q5bdeA00TbSrzoDDRFGbwBg+8ibFMF8O2J3Q54sUu6LmkP7qjtMIT2vB09M7LWQYtW3Vd6ovHwI6v+tNJK2D4cJA0KOSwzpOgIBhEubrZI=
  - secure: su5wLbN7lr59HSKGM8M9pW9VSFMtdTBbQ1oBZFTvw+RNoNew3BBMh8XymbjEv6R+yyGmDAa8Dw6E7HPiBtI5O4q89rHxl7MO5keZINjpXDzTydZ5MQ2juZPIRQpLfpl6AN1meG8I3SoOrTMDgGqfPh2rqEjIS6cBZbo8Re/0KSBuJB781qdT6x80Qfpy87sDu9GTRM8ueXsV+jRw0Yek709m29MSB8pkFAP9OitKHxzF5OFnxVPV2becj53racOe7q9ZE5QWmWnzPZUxflVyrtzDsN0J/C4g0SoEbxLFg1OLbffO1GVJ/Iv6ZeaggzYRvCYwSSANyfqorqSUDT5NPwQlUXjgBPHlOXbCfY2s5hFzQQ3z4R86fyzfdfBQ56uXTXkB4CWpn/JieUoviuDO0/YNaI0KU6hOrOn68BZrBSqwHcxwhtcP3cw/EXfR1aiok7OoDPAWnR4f3Lu/+fmkW+VUEg5Ufh/GgjZt7XwsNBfo+pmvO3mO/5okxa2/HbOwcoTpELAzPMKyh1xn5gjrk5bWcZofeGtFMoXN4+G8+1qlQ/sLp3144QHyRf00n4qlhA6xZwplpBWN12haXyKRx67lPTzE5QuT1BRoyCdRvbjQiOdo61xGvoOK9J8PL9C06xtnKQ+6iDnjFaWielASoENvcNL+DCKqiecpUb5hoR4=
  - secure: HZHubUhHLYh+v5yuyMy5TmfleHqAdcmVZd6hZf7c2sXQLsrcjoLGPxu4jzS9OJt27Sfp6xXVFeeA7SFDMobxe8AFm3+SRCbzvT6mu8/LlsuTsQO3jYQtt8j5OIhtLQ7yfDyOscXwy2I3SgluhVQ0HCIi6ShQ8YRD1vE119Qku2x/sWlKcZQckcl5T/yyig08sXfOM+IfFQPIW1gvMWM3dv6RigiCiy4qjfeQy8v8xbvbRayXeT4Vpfv9CqN79kAQ03r7MSmBBn6i88mGoQEzVDMEZPZq8rMNqn8qyIn8LxjXaCpUz0fTCYJrHSmzKyCE7+k7IEidlkyT6hJpvpXCfNYBSk2fB7SDxlm98ELVgqPBq6vjYoPaqsqs7Cz+pTTQYOCnCKvRDYhccqhAsgjNVKDRIJc0H7cT5sv4TuJxsMp/vYVh7RxoFem2r37ns4pu8XPP9RVsmoAVYzlHu8Fnd8TWY30MXACf3t43ceaPfor0IARrZcjQR1lt7eMJGQDkduJRxzq0cBB1djP8HfZGab/I0cVFEXGeJWDQfaHZ8Pq/M3+bBPLD9XLqKmpoNbW+gqQQl49/w01/EJrp9QhK/Og6ujfkeA1OCziPCUDLAHYvmwaYZYzV9z5VSPfUYwiiJzva92/ywWfhCmz3SpXPbq8cTPoDWzeBUeqcw8iIWVU=
  - secure: vH8bS7RVgaHLGZUeqtViCQYDJfhubMiCMETLPD959pv9sODmSfjOhYtFgZtbn0wZ2fhCQFgKhYKUJdti5Vo9OUlyBiUsfLPilAAaeZu0Y2SQIKpbuNU9kJibuzyj6KZoRhjvsifhO5/mB03W7CpzjSGvJntK62BM0b6CrDtUlHlOgjwd3U1c5brZS9VWfnkh8pohgneB/XYtefTJXHuGjJgf75uw2TO/ZKQmmaKJWPoMVN76cgarRmXS/SoGMLr0+7ArnvIMNW9QRMABrSzUgP0RBvNfndwjiIQDZpIefIz/UVTa5e/pS79CLoQKM9FSWZANf3ZJgz0SzYgMprSe9f3RZGu5i0BLQA0SzdxCjCra5/3/pz+p86/iWGHnBfV6pvH9c2W1OUCCTiohNk7bgUfXxVrxk2RHxhc375MFiCxu6KtPRW8kJoRTSZP+k2itaBPUSevV0cdWrVlRjnTwoCskxMIFQH+vStxQjUXV0/g9SZzwdIR/j1aKIjb6VdQS2WOh0+BKgHy0jy2w4GJHtuObIg0aTcQAtt44aK0T/VeHJ0f1FxfjzPxrcqcgSxvi2E4HgedSCvtOHPWs5YYYGt76yH0G5ZOMOF8xP2CRStlcNB0TtLdpcUvQT2ejK7t4sCOj8Kz81s2cbLCZnJdkFaaBsffV7JtbjexXRwohGxI=

matrix:
  include:
    - rust: stable
    - rust: stable
      env: PLATFORM=raspbian
    - rust: 1.35.0
    - rust: stable
      script: .travis/tf.sh
    - rust: stable
      script: .travis/debug-tests.sh
    - rust: stable
      os: osx
      osx_image: xcode9.3
      env: PARTIAL_CI=true
    - rust: stable
      env: PLATFORM=aarch64-unknown-linux-gnu
    - rust: stable
      env:
        - PLATFORM=armv6vfp-unknown-linux-gnueabihf
        - TRACT_CPU_EXPECT_ARM32_NEON=false
    - rust: stable
      env:
        - PLATFORM=armv7-unknown-linux-gnueabihf
        - TRACT_CPU_ARM32_NEON=true
        - TRACT_CPU_EXPECT_ARM32_NEON=true
    - rust: stable
      env: PLATFORM=aarch64-linux-android
    - rust: stable
      env: PLATFORM=armv7-linux-androideabi
    - rust: stable
      env: PLATFORM=i686-linux-android
    - rust: stable
      env: PLATFORM=x86_64-linux-android
    - rust: stable
      os: osx
      osx_image: xcode9.3
      env: PLATFORM=aarch64-apple-ios
    - rust: beta
    - rust: nightly
    - rust: stable
      os: windows
      script: cargo test -p tract-linalg
  allow_failures:
   - rust: nightly
   - os: windows

script: ".travis/travis.sh"


================================================
FILE: .vim/coc-settings.json
================================================
{
    "rust-analyzer.imports.granularity.group": "module"
}


================================================
FILE: CHANGELOG.md
================================================
# Unreleased

* [Breaking][MSRV] MSRV bumped to 1.91.0 (for `const TypeId::of`).

# 0.23.0-dev.3 — 2026-03-20

### Breaking changes

- **`Value` renamed to `Tensor`** across the entire public API surface (Rust, C, Python). The deprecated `Value` alias has been removed.
- **Crate renamed: `tract-rs` → `tract`** — update your `Cargo.toml` dependency accordingly. The CLI binary is now `tract-cli` (previously `tract`).
- **`into_tract()` renamed to `into_model()`** in all API layers.
- **`DatumType` variant names shortened** — the `TRACT_DATUM_TYPE_` prefix is dropped (C API).
- **Deprecated state methods removed**: `init_states()`, `state_initializers`, and the `n_states` parameter are gone from `State` trait and `RunTensors`.
- **Python**: `concretize_symbols` and `pulse` methods replaced by typed transform classes; `TransformSpec` is now an abstract base class.

### Improvements

- **`UnfoldKeyValueCacheTransform`** — explicit KV-cache I/O mode now available as a first-class transform (CLI: `--transform unfold-kv-cache`).
- **Structured `NodeFilter`** for `FloatPrecisionTranslator` — replaces raw filter strings.
- Python docs migrated from mkdocs to **Sphinx** (hosted on GitHub Pages with version switcher).
- New GPU inference section (CUDA example, `Runtime` usage).

# 0.23.0-dev.2 - 2026-02-18
* This is a pre-release release. It will be a pretty big one, here are some hilights.
  * New public api: tract-rs should be the main point of entry for any new project. A caveat: it does support most of tract simple uses as is, but some specialized sections like state management and model transforms are not satisfactory yet, so the real 0.23.0 will presumably break these again. The plan is that this facade will be tract public API, and that it will be the only surface under semver rules. Up to there is was essentially everything `pub`, which boils down to mostly "everything".
  * GPU: The new API puts forward the Runtime trait. It allows running models on GPU (runtimes "cuda" and "metal" offser some support, while "default" is the CPU runtime). See https://github.com/sonos/tract/blob/main/examples/nemo-parakeet-asr/src/main.rs#L21 for an example. Instead of calling into_runnable(), use Runtime::prepare().
  * Additionally, the internal API compatibility is broken in many places (e.g. RunnableModel always takes the model as an Arc<TypedModel> while it was accepting AsRef before). As you're going to need to fix code to upgrade, it is recommended to try and use the new "tract-rs" facade (please tell us if the current API coverage is not enough, or awkward to use).
* [Breaking][MSRV] MSRV bumped to 1.89.0
* [linalg] Avoid panic in Apple sysctl-based feature probing (AMX detection).

# 0.22.1 - 2026-02-23
* [backport] Small bug fixes release (Slice decluttering bug)

# 0.22.0 - 2025-08-25
* [Breaking][MSRV] MSRV bumped to 1.85.0
* port to edition 2024
* bump virtuaally each and every dependency
* (wip, experimental) cuda support for llm

# 0.21.14 - 2026-02-23
* [backport] Small bug fixes release (Slice decluttering bug)

# 0.21.12 - 2025-04-10

* multithread matmul is feature gated now ("multithread-mm" on linal)
* full hand made arm64 f32-accumulator matmul kit
* more auditing improvment around einsum and its matmul translations
* bugfix in matmul translation and gather
* more test-rt-level coverage of low-level matmuls (metal and cpu)
* memory arena improvements (metal)
* q40 for convolution weights


# 0.21.11 - 2025-03-19

* [cli] augment audit capabilities for mm implementation choices
* revisit matmul kernel selection
* improve gather with compressed inputs
* revisit slice bubbling up to unlock optimisations
* fix a bug around flipping substractions
* support for left q40 input in arm64 f32 accumulating kernels (unlocks q40f32 compression on arm64)

# 0.21.10 - 2025-02-21
* WIP llm testability (--approx-custom)
* [metal] ggml-ported kernels
* WIP einsum-to-matmul testability
* optimisation around reduce<sum> impacting some modern/exotic normalisation layers
* WIP towards better handling of shared weights (e.g. embeddings duplication)

# 0.21.9 - 2025-01-08
* [metal] experimental profile
* [cpu] new versatile (mmm/mmmv) kernels combinations for various architectures
* [metal] scaled-masked-softmax detector and impl


# 0.21.8 - 2024-12-05
* [linalg, compression] introduce mmm kits
* [linalg] (wip) rework f16 on non-f16 machines
* [linalg] element-wise binary operators optimisation
* [core, compression] gather with compressed weights
* [metal] new kernels
* [metal] new memory management
* [nnef] opt-in deterministic tar encoding

# 0.21.7 - 2024-09-23
* [metal] (experimental) introduce partial support for Apple Metal
* [core] Potential internal API breaking changes (operator names, comparison ops refactored)
* [data] (experimental) Smarter TDim simplification, handling of Min and Max. TDim assertions for simplifications.
* [data] (experimental) WIP around multiple scenarios (modes) for LLM inference
* Extra examples
* [linalg] (experimental) kernels targetting LLM block-quantized tasks (inc. intel 32x1 q40f32)

# 0.21.6 - 2024-07-24
* [data] Rework tdim and symbols, introduce inequalities assertions, min and max operators
* [data] Generalize Blob usage in Tensor
* [linalg] Rework reduce implementation, introduce more generic binary ops support (wip)
* [linalg] Introduce multithreaded matrix multiplication runner
* [linalg] Introduce Q4_0 block quantization for weights (wip)
* [linalg] Introduce AMX f16 kernels, Neon Q40F16 kernel (experimental)
* [linalg] wasm f32 4x4 kernel 
* [core] Introduce Opaque and OpaqueFact to escape Tensor and TValue formalism
* [core] generalize/improve float precision translator, with translation filter
* [core] Introduce garbage collecting in patch application, new compact algo, and rework constant propagation to spare memory
* [core] Rework packed format and packing metadata
* [linalg/core] Introduce multiple packing format for matmul kernels
* [core] Work In Progress refactoring binary, towards more optimized execution strategies
* [nnef] inequalities assertions extension, q4_0 extension
* [tflite] plug in tanh and sigmoid

# 0.21.5 - 2024-05-11
* [TFLite] fixes for fully connected and max pool layers
* Allow opting out of new memory friendly execution order optimisation

# 0.21.4 - 2024-04-23
* More memory/cache friendly execution order
* Several fixes around symbolic dimensions computation (some should help with attention models)

# 0.21.3 - 2024-04-03
* [AMX] Put AMX for iOS behind a feature gate ("tract-linalg/apple-amx-ios").

# 0.21.2 - 2024-03-29 (yanked)
* [ONNX] Support for external storage of tensors with offset and length
* [ONNX] Lots of fixes around binary quantized operators (add, mul, etc)
* [PY] Fix python source distribution
* [AMX] Activate AMX on iOS
* [API] Introduce transforms in external api
* [BLAS] Introduce a simple BLAS transform for Matrix multiplication
* [F16] Introduce a Reduce<MeanOfSquares> that solves many L2 normalization errors in f16

This version has been yanked to revert systematic activation of AMX on iOS. AMX is a private API and Apple may reject an App that performs AMX instructions.

# 0.21.1 - 2024-02-08
* [ONNX] Support for external storage of tensors with offset and length

# 0.21.0 - 2024-01-16
* MSRV is now 1.75.0
* [internal] ConvUnary and MatmulUnary are replaced by binary, potentially dynamic equivalent

# 0.20.22 - 2023-11-28
* [ONNX] LayerNormalization support

# 0.20.21 - 2023-10-31
* [ONNX] ignoring output shapes is now the default
* 

# 0.20.18 - 2023-08-30
* [intel] fix in AVX512F matrix vector product
* [tflite] alpha, embryonic support. some convolutional models working.
* [kaldi] remove abandonned kaldi experimental support
* [refactoring] Runtime abstraction and runtime-targetting tests
* [refactoring] Refactoring Python and C API around a possible tract-api. Introducing dylib support.
* [pytorch compat] fixes around node names starting by / (bug triggered by recent pytorch versions)

0.20.7 to 0.20.17 are misfires

# 0.20.6 - 2023-06-09
* Bug fixes, fix display of If operator

# 0.20.5 - 2023-05-26
* Various bugfix around Einsum
* Einsum now has functions to translate to MatMul and other axes manipulations

# 0.20.0, 0.20.1, 0,20.2, 0.20.3 - 2023-04-25
* [optim] 32x32 f32 AMX kernel (for Apple Silicon M family)
* [optim] bunch of AMX512F kernels (square, skinny, vector)
* [ONNX] introduce Trilu, TopK
* [NNEF/OPL] submodel loader
* [ONNX] support alternative layout for LSTM (layout=1, batch becomes first axis)
* [ONNX] If operators with dynamic condition (very basic optimisations, no nnef support yet).

# 0.19.9 & 0.19.10 - 2023-04-17
* HardSwiwh ONNX, tract_core_hard_swish in NNEF/OPL
* introducing tract_core_submodel in NNEF/OPL
* JSON resource loader in NNEF/OPL
* Profiling API tweaks
* `--folded` view for model command line dump (hide Scan loops)

# 0.19.8 - 2023-03-27
* Various bug fixes

# 0.19.7 & 0.19.6 - 2023-02-24
* more bug fixes
* wip on python doc auto-deploy

# 0.19.5 - 2023-02-22
* 0.19.3 and 0.19.4 are release misfires
* lots of bugfixes following 0.19 big changes
* introducing the JSON NNEF resource

# 0.19.2 - 2023-01-30
* [NNEF/OPL] introduce json resource loader
* extend Complex number support (under a feature flag)

# 0.19.1 - 2023-01-23
* [nnef] new identifier syntax is now opt-in for serialization (both accepted at loading)
* alpha-level C interface. how and how to deploy it (where to put the .h, whether or not to build and ship dylibs)
* alpha-level python interface. deployed on pypi as "tract". At this stage, API is undocumented and may still change significantly.

# 0.19.0 - 2023-01-11
* [BREAKING] TValue are now used in run() instead of the previous mix of Tensor and Arc<Tensor>
* internal API breaking changes: no more op_families, libcli split away. State is no longer Send (but can be "frozen" to a Send counterpart).
* Symbols can now be String instead of char. They are not shared globally anymore, but scoped in the Model instead.
* [pulse] S symbol is no longer magic. The time dimension symbol must be provided at pulsification time.
* [pulse] In most cases, we can now pulsify without an explicit pulse len (pulse len can be expression).
* [cli] deprecated "x" syntax for shape is removed
* [nnef/opl] new i"..." syntax for escaping identifiers: i"some arbitrary string". Allow serialization of any ONNX model with any kind of string as node names.
* [ONNX] Signal processing operators (DTF, STFT, MelWeightMatrix, BlackmanWindow, HammingWindow, HannWindow)
* [ONNX] bitwise operations
* [ONNX] Compatibility target raised to operator set 18

# 0.18.3 - 2022-10-27
* [NNEF] Introduce a "resource" extension for loading values from a separate source (as a config file)
* Workaround for cpu detection failure on FreeBSD / arm64
* Various bug fixes

# 0.18.2 - 2022-10-18
* [pulse] improve convolution (and others) pulsification to avoid some unecessary buffering delay
* [cli] support multiple streaming inputs and outputs
* [ONNX] more relaxed Clip operator rules

# 0.18.1 - 2022-10-06
* prepare NNEF for further tract-opl extension (resource support)
* more generic matmul
* optimise some EinSum cases as matmul

# 0.18.0 - 2022-09-21
* [ONNX Breaking] Several changes to move towards supporting ONNX symbolic dimensions (actual fixes, but they may break stuff that was working more or less by accident). It may be required to erase output shapes explicitely when input shape is overriden on models that were working before.
* [CLI breaking] ONXN symbolic dimensions has some impact here too. --input-bundle is deprecated, is was overriden and ambiguous. Instead, there is a  --input-facts-from-bundle global option, and a --input-from-bundle option in the subcommands run, profile, dump. --allow-random-input is also moved to subcommands. We think all previously supported behaviours are still there. Please open issues if not.

# 0.17.7 - 2022-09-05
* clippy up all tract code
* various fixes
* 0.17.5 and 0.17.6 are misfired

# 0.17.4 - 2022-08-11
* [cli] global --set (as a somehat cleaner --concretize successor) allow to set a symbol value after decluttering
* [cli] run --save-outputs output.npz to save execution outputs
* dozens of fixs and code cleanup (clippy-fication in progress)

# 0.17.3 - 2022-07-25
* [License] Allowing https://spdx.org/licenses/Unicode-DFS-2016.html (no tldr yet, but pretty similar to BSD-2)
* [Breaking] CLI --json option reports costs as strings instead of numbers (in order to allow symbolic values).
* Sigmoid/Tanh f32 reimpl, plus new f16 impl.

# 0.17.1 - 2022-07-11
* Sanitiser=address in the CI. Fixed a couple of overflow/memleaks. (Nothing looked too awful.)
* ONNX NonMaxSuppression

# 0.17.0 - 2022-06-13
* [Breaking] [ONNX-ML] TreeEnsembleClassifier with binary output (single class) now mimicks scikit-learn output layout.

# 0.16.9 - 2022-06-10
* bump ONNX protobuf file and support external tensors format
* new "skinny" kernels for avx2/fma f32 multiplication (positive impact on low, non 1 batch size for DNN-heavy loads)

# 0.16.7 - 2022-05-16
* Softmax is now an operator in core, coming with a direct quantized implementation
* new TypedFact constructor API ( f32::fact(&[1, 4, 12]), f32::fact(shape!(Symbol::from('N'), 12)))
* fixes and optimisation of re-quantization pipeline
* fixes around symbols in NNEF/OPL

# 0.16.6 - 2022-05-03
* Various changes around quantization support (qi32 appearance)

# 0.16.5 - 2022-04-27
* Intel optimisation are back
* Range is now more flexible, should unlock some BERT models with symbolic dimensions.

# 0.16.4 - 2022-04-14
* some optimisations in depthwise convolutions
* various bugfixes
* [Breaking] Fixed nnef "tile" operator definition ("repeats" is plural). As a consequence models using "tile" serialized with tract with prior versions can not be loaded anymore (and vice-versa).

# 0.16.3 - 2022-03-30
* [Breaking] tract-opl models Scan syntax changed a bit. Models exported by <0.16.2 are loadable in >=0.16.2, but not the other way around.
* Optimisation in deconv

# 0.16.1 - 2022-03-02
* [Breaking] Minimum Rust Supported Version is now 1.59.0
* [Breaking] Small API changes in model api: .compact(), .optimize(), .declutter() now take &mut self and work in place.
* [LICENSE] Only the licensing for dependencies of the top-level library crates (tensorflow, onnx, kaldi, pulse) will now be monitored. The command line tool (tract crate in cli folder) is for developpers (tract developpers or tract integrators), is not meant to be shipped to end-user, and it concentrates most of the license and dependency complexity.
* [LICENSE] BSD-3-Clause is now accepted in tract.
* Optimisations around convolutions and deconvolution
* Optimisation on Cortex-A53, first round of Cortex-A55 optimisation too.

# 0.15.8 - 2021-11-18
* Fix brand new ArrayFeatureExtractor inference

# 0.15.7 - 2021-11-16
* ONNX ArrayFeatureExtractor
* ConvTranspose/deconv optimisation

# 0.15.6 - yanked 
* just a release script failure

# 0.15.5 - 2021-10-26
* hold half at 1.7.x for compat with rust 1.50

# 0.15.4 - 2021-10-21
* ConvTranspose/deconv pulse support
* ONNX SpaceToDepth/DepthToSpace

# 0.15.3 - 2021-07-29
* optimise i8*u8, u8*i8 and u8*u8 matrix products (and convo)

# 0.15.2 - 2021-07-09
* bump prost dep

# 0.15.1 - 2021-07-08
* some optimisations for arm32 (cortex-a7 and a9)

# 0.15.0 - 2021-06-24

* Switched the order of item_type and item_type_vendor in the NNEF tendor format to be consistent with NNEF-tools, and changed the item_type of integers due to an error in the specification. Breaking for tensor files containing integers or strings.
* Scan output batching optimisation
* Concat pulsification over a secondary axis
* new aarch64 16x4 f32 kernel

## 0.14.2 - 2021-05-27

* better handling of errors in ONNX parser
* fix/workaround some performance regressions bubling from recent ndarray changes

## 0.14.1 - 2021-05-18

* ONNX ConvTranspose, Gather, GatherND, GatherElements, Scatter, ScatterND, ScatterElements support (and NNEF deconv)
* Fixes around integer serialisation in NNEF
* workaround subtle breaking changes in ndarray (between 0.15.1 and 0.15.2)

## 0.14.0 - 2021-04-19

* low-level functions in linalg are now version tagged: two versions of tract can now co-exist in the same binary
* rustc minimal version is now 1.50
* dependencies version bumps (ndarray, itertools, and others)

## 0.13.2

* fix sigmoid and tanh variability on intel

## 0.13.1

* temporary disable binary unicast add fusing (too many bugs)

## 0.13.0

* Release are now "in sync": all tract crate versions on a build *must* be aligned
* optimisations, with a focus on aarch64

## 0.12.5 - 2021-01-12

* Dependency bumps

## 0.12.4 - 2021-01-06

* 0.12.3 is a misfire
* hotfixes on 0.12.2 new tree classifier
* fix X compilation from macos/aarch64 to macos/intel

## 0.12.2 - 2021-01-05

* ONNX-ML: CategoryMapper and TreeEnsembleClassifier (partial, SoftmaxZero and Probits are missing). With NNEF support.
* cargo-deny enforces licences choices

## 0.12.1 - 2020-12-11

* 0.12.0 is a misfire.

* API BREAKING: TypedFact::dt_shape & friends can not fail anymore, no longer return a result (remove `?`)
* Breaking: Rust minimal version bumped to 1.42

* Early, basic, correct but slow support for i8 by u8 matrix mult.
* Support for Apple Silicon, aka M1, aka aarch64 darwin (but not in CI yet)
* dynamic quantization convolution support
* release now ships cli musl builds for linux
* optimizations targetting small Cortex-A (like 7, 8, and 9)
* command line dump --profile --cost now computes flops
* ONNX: OneHot op support

## 0.11.2 - 2020-10-26

* ONNX: new op: DynamicQuantizeLinear
* tract-data crate split from core, containing tensor, dim, and datum types.

## 0.11.1 - 2020-10-20

* switch from error_chain to anyhow
* simplify trivial gathers to a slice
* generalize symbolic dimension a bit: support "2S" and the like
* deprecate "x" syntax in CLI, please use `,`  instead

## 0.11.0

### Breaking 

* NNEF: tract-nnef no longer performs gunziping, but expect an uncompressed tar
    stream. We found out is it counter-productive (weights matrices are more or
    less random, they do not compress easily, and decompression is expensive).
    NNEF networks in the wild are .tgz file. Using flate2, decompression is a
    one-liner, but it must be done by the client code now.
* bumped extended nnef compat version (unchecked at this stage) to "alpha1"
* move pulse operators and translation to their own crate and nnef registry
* generalize TDim to support an arbitrary number of symbols
* concretize_stream_dim is superseded by concrentize_dims

### Notable

* new crates, building on tract-opl introduction:
    * *tract-pulse-opl*: pulse runtime (handful of ops, including Delay) is now separated from core
    * *tract-onnx-opl*: onnx runtime (4 ops not belonging in core)
    * *tract-pulse*: pulsification of models (model-translation time)
    * tract-onnx is now limited to onnx model loading and conversion

## 0.10.10 - 2020-08-30

* load a NNEF as a TypedModel using tract_nnef, and from the CLI
* dump a tract TypedModel to NNEF (with extensions for op not nnef compatbile)
* not a full coverage of nnef, but enough for most CNN (image categorizers zoo working)
* 80% of onnx tests are surviving a NNEF dump and reload at this stage

## 0.10.0 - 2020-07-28

### ONNX

* covered operators compatible with Operator Sets 9, 10, 11 (new) and 12 (new)

### API Breaking

* Tensor::l1 method is gone

### Windows

* Support for -gnu targets (non-mvsc).

### Notable

* --cost now gives the number of parameters in the model
* SimpleState is clonable again (actually useful !)

## 0.9.2 - 2020-06-16

* introduce `TypedModel::method.concretize_stream_dim`
* various pulsification bugfixes

## 0.9.1 - 2020-06-16

* fix Reshape with TDim

## 0.9.0 - 2020-06-15

Still no shortage of version numbers...

### API Breakage

* NormalizedModel (and friends) are gone. They were only useful as a pre-pulse transformation pre-requisite that the current TypedModel (& co) meets.
* TypedModel::into_optimized() is gone. InferenceModel::into_optimized() stays as an end-to-end shortcut for simple cases. It does .into_typed()?.declutter()?.optimize()).
* TypedModel::codegen() is now ::optimize()

## 0.8.0 - 2020-06-13

I wish I had seen these issues yesterday. Anyway, version numbers are cheap.

* Bumping minimum rust to 1.41

## 0.7.0 - 2020-06-12

* CLI refactoring (hopefully stabilizing a bit?)
    * `profile --bench` is now bench
    * profile is now `dump --profile`
    * cost is now `dump --cost`
    * profiling is now done during a full net instead of per op
    * new "compact" graph dumper, profile visual hints
    * `dump --cost --profile --json` output profiling and cost information
    * show logical names for ops instead of the Op struct names (not 100% sure it's right)
    * criterion integration
* WASM support for tract-onnx and tract-tensorflow targets (CI)
* Convenience methods added to Models to allow model building in fluent style, up to Plan instantiation (SimplePlan now nicknamed RunnableModel). Non breaking.
* Support for ONNX bidi LSTM (CI), GRU and RNN (untested, consider alpha)
* Fixes around nets with a non trivial batch size (axis simplification code, matmul op fusion)

## 0.6.3 - 2020-04-25

* Lock ndarray version to dodge rustc/llvm issue (https://github.com/rust-lang/rust/issues/71506)

## 0.6.2 - 2020-04-15

* Use http://gihub.com/kali/readings for instrumentation.

## 0.6.0 - 2020-02-19

### Notable

* New jupyter/keras/tf example
* ARMv8 tanh / sigmoid optimisation

### API Breaking

* refactor exports and dependencies
    * preferred way to use tract is now to `use tract_tensorflow::prelude::*;`
    * singleton framework is built by `let tensorflow = tensorflow()`. The Framework trait is in the prelude too.
    * the prelude contains a reexport of `tract_core`, and of ndarray as `tract_ndarray`
    * no more need to declare dependency on `tract-core` and/or `tract-linalg` in Cargo.toml
    * same goes for `tract_onnx`

## 0.5.9 - 2020-02-07

### Breaking

* Rustc minimum version is now 1.39

### Onnx

* Support for MatMulInteger, ConvInteger
* Support for QuantizeLinear DequantizeLinear
* Basic support for QLinearMatMul, QLinearConv

## 0.5.6 - 2019-10-30

### Tensorflow

* Initial support for GatherV2

### Onnx

* Fix PReLu normalization

## 0.5.5 - 2019-10-25

### Tensorflow

* Initial support for AddV2, Mean, Min, Prod, Sum

## 0.5.4 - 2019-09-30

### Notable

* Make Onnx loader operator set aware, and Slice-10 support.
* Cost now reports Delay ops buffer size
* Bump dependencies (protobuf) and fix codegen
* Windows CI now performs a top-level "cargo check"

## 0.5.1 - 2019-09-24

### Bugfix

* remove the no_panic checks, as too fragile (breaking non-lto builds)

## 0.5.0 - 2019-09-20

### Breaking

* Change tensor facts names for consistency: TensorFact is now InferenceFact.

### Notable

* Introduce Windows support, including CI coverage for linalg
* Switch from Travis to GitHub Actions
* Internal refactoring around tract-core canonic opset
* Tract CLI can now compute a FLOP number for networks ("cost" subcommand). 
    Furthermore the CI asserts its value for a few networks to prevent optimisation regressions.
* Fix: handling of -1 in ONNX Reshape op

## 0.4.2 - 2019-09-10

* Fix release script after 0.4.1 release disaster.

## 0.4.1 - 2019-09-09 [YANKED]

* Fix for OS where CARGO_CFG_TARGET_FAMILY is undefined
* Linear Algebra package refactor
* tract-core canonic operator set introduction
* significant performance boost (up to 20% on some real-life networks)

## 0.4.0 - 2019-07-30

* Start Kaldi networks support (LSTM, Renorm, Affine, downsample)

## Before...

This Changelog started way too late. But better late than never.


================================================
FILE: Cargo.toml
================================================
[workspace]
resolver = "2"
members = [
    "data",
    "linalg",
    "core",
    "pulse",
    "pulse-opl",
    "hir",
    "nnef",
    "nnef/cli",
    "nnef/nnef-resources",
    "tensorflow",
    "tflite",
    "onnx-opl",
    "onnx",
    "libcli",
    "cli",
    "gpu",
    "metal",
    "extra",
    "transformers",
    "cuda",

    "api",
    "api/rs",
    "api/ffi",
    "api/proxy",
    "api/proxy/sys",

    "examples/face_detection_yolov8onnx_example",
    "examples/face_similarity_arcface_onnx",
    "examples/tensorflow-mobilenet-v2",
    "examples/tflite-mobilenet-v3",
    "examples/keras-tract-tf2",
    "examples/nemo-parakeet-asr",
    "examples/nemo-nemotron-asr",
    "examples/nnef-dump-mobilenet-v2",
    "examples/nnef-mobilenet-v2",
    "examples/nnef-mobilenet-v2-api",
    "examples/onnx-mobilenet-v2",
    "examples/pytorch-albert-v2",
    "examples/pytorch-resnet",
    "examples/causal_llm",
    "examples/stable-diffusion",
    "examples/stable-diffusion-3",
    "examples/stable-diffusion-xl",

    "harness/core-proptest-pulse",
    "harness/nnef-inceptionv3",
    "harness/tf-inceptionv3",
    "harness/tf-mobilenet-v2",
    "harness/tfl-mobilenet-v2-q",

    "test-rt/infra",
    "test-rt/suite-unit",
    "test-rt/suite-onnx",
    "test-rt/test-f16",
    "test-rt/test-blas",
    "test-rt/test-metal",
    "test-rt/test-cuda",
    "test-rt/test-unit-core",
    "test-rt/test-onnx-core",
    "test-rt/test-nnef-cycle",
    "test-rt/test-tflite"
]

# same, without metal, test-metal, cuda, test-cuda and test-tflite which are probelematic on specific targets
default-members = [
    "data",
    "linalg",
    "core",
    "pulse",
    "pulse-opl",
    "hir",
    "nnef",
    "nnef/cli",
    "nnef/nnef-resources",
    "tensorflow",
    "tflite",
    "onnx-opl",
    "onnx",
    "libcli",
    "cli",
    "extra",
    "transformers",

    "api",
    "api/rs",
    "api/ffi",

    "examples/face_detection_yolov8onnx_example",
    "examples/face_similarity_arcface_onnx",
    "examples/tensorflow-mobilenet-v2",
    "examples/tflite-mobilenet-v3",
    "examples/keras-tract-tf2",
    "examples/nnef-dump-mobilenet-v2",
    "examples/nnef-mobilenet-v2",
    "examples/onnx-mobilenet-v2",
    "examples/pytorch-albert-v2",
    "examples/pytorch-resnet",

    "harness/core-proptest-pulse",
    "harness/nnef-inceptionv3",
    "harness/tf-inceptionv3",
    "harness/tf-mobilenet-v2",
    "harness/tfl-mobilenet-v2-q",

    "test-rt/infra",
    "test-rt/suite-unit",
    "test-rt/suite-onnx",
    "test-rt/test-f16",
    "test-rt/test-blas",
    "test-rt/test-unit-core",
    "test-rt/test-onnx-core",
    "test-rt/test-nnef-cycle",
]

[workspace.package]
rust-version = "1.91"

[workspace.dependencies]
accelerate-src = "0.3"
anstyle = "1.0.2"
anstyle-parse = "1.0.0"
anstyle-query = "1.0.0"
anyhow = "1.0.43"
anymap3 = "1.0"
approx = "0.5"
atty = "0.2.14"
bit-set = "0.10.0"
boow = "0.1.3"
box_drawing = "0.1.2"
byteorder = "1.4.3"
bytes = "1.0.1"
cc = "1.0.69"
clap = { version = "4", features = [ "cargo", "derive" ] }
colorous = "1.0.5"
core_affinity = "0.8.0"
criterion = "0.8"
cudarc = { version = "0.19", features = ["dynamic-loading", "f16", "cudnn"] }
derive-new = "0.7"
dinghy-test = "0.8"
dirs = "6.0.0"
downcast-rs = "2.0"
dyn-clone = "1.0.4"
dyn-eq = "0.1"
dyn-hash = "1.0"
env_logger = "0.11"
erased-serde = "0.4"
flatbuffers = "25.12.19"
flate2 = "1.0.20"
float-ord = "0.3.2"
fs-err = "3"
fs2 = "0.4.3"
getrandom = "0.4"
half = { version=">=2.4,<3.0", features = [ "std", "num-traits" ] }
home = "=0.5.12"
icu_normalizer = "2.1"
icu_normalizer_data = "2.1"
icu_properties = "2.1"
icu_properties_data = "2.1"
idna_adapter = "1.2.0"
image = "0.25"
inventory = "0.3.21"
itertools = "0.14"
lazy_static = "1.5.0"
libc = "0.2.164"
libloading = "0.9"
libm = "0.2.11"
liquid = "0.26.8"
liquid-core = "0.26.8"
liquid-derive = "0.26.8"
litemap = "0.8"
log = "0.4.14"
maplit = "1.0.2"
memmap2 = "0.9"
metal = { version = "0.33.0" }
ndarray = "0.17"
ndarray-npy = { version = "0.10", features = [ "compressed_npz" ] }
nom = "8.0.0"
nom-language = "0.1"
nu-ansi-term = "0.50"
num-complex = "0.4.0"
num-integer = "0.1.44"
num-traits = "0.2.14"
num_cpus = "1"
openblas-src = { version = "0.10", features = ["static"] }
pastey = "0.2"
proptest = "1.0.0"
prost = "0.14"
prost-types = "0.14"
py_literal = "0.4.0"
rand = "0.10"
rand_distr = "0.6"
rayon = "1.10"
readings-probe = "0.1.8"
regex = "1.5.4"
ron = "0.12"
reqwest = { version = "0.13", features = [ "blocking", "rustls-no-provider" ], default-features = false }
rustfft = { version = "6.1", features = [ "neon" ] }
rustls = { version = "0.23", default-features = false, features = [ "ring", "std", "tls12" ] }
webpki-roots = "1"
safetensors = "0.7"
scan_fmt = "0.2.6"
serde = { version = "1.0.127", features = [ "derive" ] }
serde_json = "1.0"
simd-adler32 = { version = "0.3.7", features = ["std"] }
smallvec = "1.6.1"
string-interner = "0.19"
tar = "0.4.37"
tempfile = "3.8"
tensorflow = "0.21.0"
tflitec = { git = "https://github.com/kali/tflitec-rs.git", rev="9ceb838" }
time = "0.3.23"
tokenizers = "0.22"
unicode-normalization = "0.1.19"
walkdir = "2.3.2"
zerofrom = "0.1.5"
tract-api = { version = "0.23.0-pre", path = 'api' }
tract-core = { version = "0.23.0-pre", path = 'core' }
tract-cuda = { version = "0.23.0-pre", path = 'cuda' }
tract-data = { version = "0.23.0-pre", path = 'data' }
tract-extra = { version = "0.23.0-pre", path = 'extra' }
tract-gpu = { version = "0.23.0-pre", path = 'gpu' }
tract-hir = { version = "0.23.0-pre", path = 'hir' }
tract-libcli = { version = "0.23.0-pre", path = 'libcli' }
tract-linalg = { version = "0.23.0-pre", path = 'linalg' }
tract-metal = { version = "0.23.0-pre", path = 'metal' }
tract-nnef-resources = { version = "0.23.0-pre", path = 'nnef/nnef-resources' }
tract-nnef = { version = "0.23.0-pre", path = 'nnef' }
tract-onnx-opl = { version = "0.23.0-pre", path = 'onnx-opl' }
tract-onnx = { version = "0.23.0-pre", path = 'onnx' }
tract-pulse-opl = { version = "0.23.0-pre", path = 'pulse-opl' }
tract-pulse = { version = "0.23.0-pre", path = 'pulse' }
tract-tensorflow = { version = "0.23.0-pre", path = 'tensorflow' }
tract-tflite = { version = "0.23.0-pre", path = 'tflite' }
tract-transformers = { version = "0.23.0-pre", path = 'transformers' }
tract = { version = "0.23.0-pre", path = 'api/rs' }
tract-proxy-sys = { version = "0.23.0-pre", path = 'api/proxy/sys' }
tract-cli = { version = "0.23.0-pre", path = 'cli' }
tract-ffi = { version = "0.23.0-pre" }
tract-proxy = { version = "0.23.0-pre" }


[profile.opt-no-lto]
inherits="release"
lto=false

[profile.release]
# debug = true
lto = true

[profile.bench]
debug = true

[profile.dev.package."*"]
opt-level = 2

[profile.dev.build-override]
debug = false
# strip = "debuginfo" does not work on android and ios
incremental = false


================================================
FILE: LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: README.md
================================================
![tract-logo](assets/tract-logo/PNG/tract-horizontal-blue.png)

![Rust](https://img.shields.io/badge/rust-%23000000.svg?style=for-the-badge&logo=rust&logoColor=white)
![rustc >= 1.91.0](https://img.shields.io/badge/rustc-%3E%3D1.91.0-brightgreen)
![MIT/Apache 2](https://img.shields.io/crates/l/tract)
[![Native Linux test status](https://github.com/snipsco/tract/workflows/Native%20Linux/badge.svg)](https://github.com/snipsco/tract/actions)
[![Embedded targets status](https://github.com/snipsco/tract/workflows/Embedded%20targets/badge.svg)](https://github.com/snipsco/tract/actions)
[![Doc](https://docs.rs/tract-core/badge.svg)](https://docs.rs/tract-core)

[![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](https://pypi.org/project/tract/)


Sonos' Neural Network inference engine.

_This project used to be called tfdeploy, or Tensorflow-deploy-rust._

## What ?

`tract` is a Neural Network inference toolkit. It can read ONNX or NNEF, optimize them and run them.

## Quick start, examples

* [MobileNet v2 with ONNX](examples/onnx-mobilenet-v2)
* [BERT example with ONNX](examples/pytorch-albert-v2)
* [MobileNet v2 with TensorFlow](examples/tensorflow-mobilenet-v2)
* [From Keras and TensorFlow 2 to tract](examples/keras-tract-tf2)
* [ResNet with PyTorch](examples/pytorch-resnet)

There is also [some technical documentation](doc/) and [blog](https://tech-blog.sonos.com/posts/optimising-a-neural-network-for-inference/) posts.

## Tract in the landscape

### ONNX

As of today, `tract` passes successfully about 85% of ONNX backends
tests. All "real life" integration tests in ONNX test suite are passing: 
bvlc_alexnet, densenet121, inception_v1, inception_v2, resnet50, shufflenet,
squeezenet, vgg19, zfnet512.

Notable missing parts are operators dealing with Tensor Sequences and Optional Tensors : tract /really/ wants to flow Tensors and nothing else.
This is structural. Changing it would be pretty difficult, and it's unclear whether it can be done without impairing performance or maintainability.
We are not convinced these features have shown their interest in the wild yet, so we prefer to leave them aside.

Other dark corners are specific operators like "Resize" which fit perfectly in the framework but need a complex internal logic that is far
from our core business. In these cases, we are happy to accept contributions and to help. 

The following operators are implemented and tested.

Abs, Acos, Acosh, Add, And, ArgMax, ArgMin, ArrayFeatureExtractor, Asin, Asinh, Atan, Atanh, AveragePool, BatchNormalization, BitShift, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, BlackmanWindow, Cast, CastLike, CategoryMapper, Ceil, Clip, Compress, Concat, Constant, ConstantLike, ConstantOfShape, Conv, ConvInteger, ConvTranspose, Cos, Cosh, CumSum, DFT, DepthToSpace, DequantizeLinear, Div, Dropout, DynamicQuantizeLinear, Einsum, Elu, Equal, Erf, Exp, Expand, EyeLike, Flatten, Floor, GRU, Gather, GatherElements, GatherND, Gemm, GlobalAveragePool, GlobalLpPool, GlobalMaxPool, Greater, GreaterOrEqual, HammingWindow, HannWindow, HardSigmoid, Hardmax, Identity, If, InstanceNormalization, IsInf, IsNaN, LRN, LSTM, LeakyRelu, Less, LessOrEqual, Log, LogSoftmax, MatMul, MatMulInteger, Max, MaxPool, Mean, MelWeightMatrix, Min, Mod, Mul, Multinomial, Neg, NonMaxSuppression, NonZero, Not, OneHot, Or, PRelu, Pad, ParametricSoftplus, Pow, QLinearConv, QLinearMatMul, QuantizeLinear, RNN, RandomNormal, RandomNormalLike, RandomUniform, RandomUniformLike, Range, Reciprocal, ReduceL1, ReduceL2, ReduceLogSum, ReduceLogSumExp, ReduceMax, ReduceMean, ReduceMin, ReduceProd, ReduceSum, ReduceSumSquare, Relu, Reshape, Resize, Round, Rsqrt, STFT, ScaledTanh, Scan, Scatter, ScatterElements, ScatterND, Selu, Shape, Shrink, Sigmoid, Sign, Sin, Sinh, Size, Slice, Softmax, Softplus, Softsign, SpaceToDepth, Split, Sqrt, Squeeze, Sub, Sum, Tan, Tanh, ThresholdedRelu, Tile, Transpose, TreeEnsembleClassifier, Unsqueeze, Where, Xor

We test these operators against from ONNX 1.4.1 (operator set 9), up to ONNX 1.13.0 (operator set 18).

We are using ONNX test suite, but it does not cover everything.
We also deliberately ignore some tests, or restricting their scope depending on what we feel is realistic.
Sometimes these decisions are just wrong, and sometimes they become wrong as time goes by and the fields moves in unexpected directions.
So if you are puzzled by an ONNX model that does not work in tract, we are happy to take a look.

### NNEF

Long story short, TensorFlow and ONNX formats are good for designing and
training networks. They need to move fast to follow the research field, tend to
integrate new features and operators greedily. They also exhibit a high level
of expressivity to facilitate network design.

On the other hand, only a subset of operators and network features actually
reach production, so systems running production network do not have to deal
with so many operators. Furthermore, some information required for training can
be stripped from the network before going to production for prediction.

NNEF tries to bridge the gap between training frameworks and inference by
proposing a format dedicated to production and prediction.

Tract supports NNEF:

* tract_nnef can load and execute NNEF networks
* tract supports most of the NNEF specification, the most notable exception
    being the ROI operators
* tract introduces tract-OPL, a series of NNEF extensions to support other
    operators (or extend some operators semantics) in order to represent the
    full range of tract-core neural network support: any network understood by
    tract should be serializable to tract-OPL. This is a work in progress.
* tract command line can translate networks from TensorFlow or ONNX to NNEF/OPL.

### tract-opl version compatibility

A remainder: NNEF is not expressive enough to represent all ONNX. tract-OPL extends
NNEF using proprietary to support what is missing. Notable extensions are pulse
operators, recurring operators (as Scan) and symbolic extensions.

There is no strict check in place here, so... implementation is not bullet proof.
* NNEF part aims at being very stable. It is strongly constrained with compatibility
with NNEF specification.
* tract-opl is a bit more in flux. Nevertheless we try to maintain the following
golden rule:

     `models serialized with tract 0.x.y should work with tract 0.x.z where z >= y`

* in practice, breaking changes have been relatively rare so far. Most models are
forward and retro compatible from when tract has acquired NNEF support.

Notable breakage occurred:
* 0.16.3 (forward compatible) on Scan operator
* 0.17.0 for binary decision tree classifier

Starting with `0.17.0`, a model property is injected in tract-opl files (`tract_nnef_ser_version`)
to tag which version of tract generated the file. As most models will remain compatible,
tract will not do any version check. It is up to the application developer to do so.

A softer version tag exists as `tract_nnef_format_version`. pre-0.17.0 version set it to
`alpha1`, post-0.17.0 set it `beta1`. Don't put too much emphasis into the "alpha-ness" naming 
of versions here.

### Note: support for TensorFlow 1.x

Even if `tract` is very far from supporting any arbitrary model, it can run
Google Inception v3 and Snips wake word models. Missing operators are relatively 
easy to add. The lack of easy to reuse test suite, and the wide diversity of 
operators in Tensorflow make it difficult to target a full support.

The following operators are implemented and tested:

Abs, Add, AddN, AddV2, Assign, AvgPool, BatchToSpaceND, BiasAdd, BlockLSTM, Cast, Ceil, ConcatV2, Const, Conv2D, DepthwiseConv2dNative, Div, Enter, Equal, Exit, ExpandDims, FakeQuantWithMinMaxVars, Fill, FloorMod, FusedBatchNorm, GatherNd, GatherV2, Greater, GreaterEqual, Identity, Less, LessEqual, Log, LogicalAnd, LogicalOr, LoopCond, MatMul, Max, MaxPool, Maximum, Mean, Merge, Min, Minimum, Mul, Neg, NoOp, Pack, Pad, Placeholder, Pow, Prod, RandomUniform, RandomUniformInt, Range, RealDiv, Relu, Relu6, Reshape, Rsqrt, Shape, Sigmoid, Slice, Softmax, SpaceToBatchND, Squeeze, StridedSlice, Sub, Sum, Switch, Tanh, Tile, Transpose, VariableV2

Additionally, the complexity of TensorFlow 2 make it very unlikely that a direct
support will ever exist in tract. But many TensorFlow 2 models can be
converted to ONNX and then loaded in tract.

## Example of supported networks

These models among others, are used to track tract performance evolution as
part of the Continuous Integration jobs. See [.travis/README.md](readme) and 
[.travis/bundle-entrypoint.sh](.travis/bundle-entrypoint.sh) for more
information.

### Keyword spotting on Arm Cortex-M Microcontrollers

https://github.com/ARM-software/ML-KWS-for-MCU

ARM demonstrated the capabilities of the Cortex-M family by providing
tutorials and pre-trained models for keyword spotting. While the exercise
is ultimately meant for micro-controllers, `tract` can run the intermediate
TensorFlow models.

For instance, on a Raspberry Pi Zero, the "CNN M" model runs in about 70
micro-seconds, and 11 micro-seconds on a Raspberry Pi 3.

### Snips wake word models

https://arxiv.org/abs/1811.07684

Snips uses `tract` to run the wake word detectors. While earlier models were
class-based and did not require any special treatment, `tract` pulsing
capabilities made it possible to run WaveNet models efficiently enough for a
Raspberry Pi Zero.

### Inception v3

|      Device         |      Family    |  TensorFlow-lite  |  tract  |
|---------------------|----------------|-------------------|---------|
|  Raspberry Pi Zero  |    Armv6 VFP   |        113s       |   39s   |
|  Raspberry Pi 2     |    Armv7 NEON  |         25s       |    7s   |
|  Raspberry Pi 3     |  aarch32 NEON  |          5s       |    5s   |

Notes:

 * while the Raspberry Pi 3 is an Armv8 device, this bench is running
     on Raspbian, an armv6 operating system, crippling the performance
     of both benches
 * there exists other benches on the internet that show better
     performance results for TensorFlow (not -Lite) on the Pi 3.
     They use all four cores of the device. Both TensorFlow-Lite and tract
     here have been made to run on a single-core.

# License

Note: files in the `tensorflow/protos` directory are copied from the
[TensorFlow](https://github.com/tensorflow/tensorflow) project and are not
covered by the following licence statement.

Note: files in the `onnx/protos` directory are copied from the
[ONNX](https://github.com/onnx/onnx) project and are not
covered by the following license statement.

## Apache 2.0/MIT

All original work licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

## Contribution

Unless you explicitly state otherwise, any Contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: api/.gitignore
================================================
*.nnef.tgz
*.onnx


================================================
FILE: api/Cargo.toml
================================================
[package]
name = "tract-api"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/sonos/tract"
keywords = [ "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true
include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*", "tract.h" ]

[dependencies]
anyhow.workspace = true
boow.workspace = true
flate2.workspace = true
half.workspace = true
ndarray.workspace = true
serde.workspace = true
serde_json.workspace = true

[features]
complex = []

[dev-dependencies]
lazy_static = "1.4.0"
reqwest.workspace = true
tempfile.workspace = true


================================================
FILE: api/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: api/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: api/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: api/c/.gitignore
================================================
*.h
*.so
mobilenet
mobilenet_v2_1.0.onnx.nnef.tgz


================================================
FILE: api/c/Makefile
================================================
run: mobilenet mobilenet_v2_1.0.onnx.nnef.tgz
	LD_LIBRARY_PATH=. ./mobilenet

clean:
	rm -f mobilenet libtract.so tract.h

mobilenet: tract.h libtract.so mobilenet.c
	cc mobilenet.c -o mobilenet -L. -ltract

libtract.so:
	cargo build -p tract-ffi --profile opt-no-lto
	cp ../../target/opt-no-lto/libtract.so .

tract.h:
	cd ../ffi ; cbindgen -l c > tract.h
	cp ../ffi/tract.h .

mobilenet_v2_1.0.onnx.nnef.tgz:
	wget -q https://s3.amazonaws.com/tract-ci-builds/tests/mobilenet_v2_1.0.onnx.nnef.tgz


================================================
FILE: api/c/mobilenet.c
================================================
#include "tract.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>

#define check(call)                                                            \
    {                                                                          \
        TRACT_RESULT result = call;                                            \
        if (result == TRACT_RESULT_KO) {                                       \
            fprintf(stderr, "Error calling tract: %s",                         \
                    tract_get_last_error());                                   \
            exit(1);                                                           \
        }                                                                      \
    }

int main() {
    // Initialize nnef parser
    TractNnef *nnef = NULL;
    check(tract_nnef_create(&nnef));
    assert(nnef);

    // Load the model
    TractModel *model = NULL;
    check(tract_nnef_load(nnef, "mobilenet_v2_1.0.onnx.nnef.tgz", &model));
    assert(model);
    assert(nnef);

    // once the model is build, the framework is not necessary anymore
    check(tract_nnef_destroy(&nnef));
    assert(!nnef);

    // Pick a runtime
    TractRuntime *runtime = NULL;
    check(tract_runtime_for_name("default", &runtime));
    assert(runtime);

    // Make the model runnable
    TractRunnable *runnable = NULL;
    check(tract_runtime_prepare(runtime, &model, &runnable));
    assert(runnable);
    assert(!model);

    float *image = malloc(3 * 224 * 224 * sizeof(float));
    FILE *fd = fopen("grace_hopper_3_224_224.f32.raw", "rb");
    assert(fread(image, sizeof(float), 3 * 224 * 224, fd) == 3 * 224 * 224);
    fclose(fd);

    TractTensor *input = NULL;
    size_t shape[] = {1, 3, 224, 224};
    check(
        tract_tensor_from_bytes(TRACT_DATUM_TYPE_F32, 4, shape, image, &input));
    free(image);

    TractTensor *output = NULL;

    // simple stateless run...
    check(tract_runnable_run(runnable, &input, &output));

    const float *data = NULL;
    check(tract_tensor_as_bytes(output, NULL, NULL, NULL, (const void **)&data));
    float max = data[0];
    int argmax = 0;
    for (int i = 0; i < 1000; i++) {
        float val = data[i];
        if (val > max) {
            max = val;
            argmax = i;
        }
    }
    printf("Max is %f for category %d\n", max, argmax);
    check(tract_tensor_destroy(&output));

    // or spawn a state to run the model
    TractState *state = NULL;
    check(tract_runnable_spawn_state(runnable, &state));
    assert(state);

    // runnable is refcounted by the spawned states, so we can release it now.
    check(tract_runnable_release(&runnable));
    assert(!runnable);

    check(tract_state_run(state, &input, &output));

    check(tract_tensor_as_bytes(output, NULL, NULL, NULL, (const void **)&data));
    assert(data[argmax] == max);
    check(tract_tensor_destroy(&output));

    // done with out state and input
    check(tract_state_destroy(&state));
    check(tract_tensor_destroy(&input));
}


================================================
FILE: api/ffi/Cargo.toml
================================================
[package]
name = "tract-ffi"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, neural network inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*" ]

[lib]
name = "tract"
crate-type = ["cdylib"]

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
anyhow.workspace = true
flate2.workspace = true
serde.workspace = true
serde_json.workspace = true
tract-api.workspace = true
tract-rs = { version = "0.23.0-pre", path = "../rs", package = "tract" }


================================================
FILE: api/ffi/cbindgen.toml
================================================
language = "C"

after_includes = """
    typedef enum DatumType {
      TRACT_DATUM_TYPE_BOOL = 1,
      TRACT_DATUM_TYPE_U8 = 17,
      TRACT_DATUM_TYPE_U16 = 18,
      TRACT_DATUM_TYPE_U32 = 20,
      TRACT_DATUM_TYPE_U64 = 24,
      TRACT_DATUM_TYPE_I8 = 33,
      TRACT_DATUM_TYPE_I16 = 34,
      TRACT_DATUM_TYPE_I32 = 36,
      TRACT_DATUM_TYPE_I64 = 40,
      TRACT_DATUM_TYPE_F16 = 50,
      TRACT_DATUM_TYPE_F32 = 52,
      TRACT_DATUM_TYPE_F64 = 56,
      TRACT_DATUM_TYPE_COMPLEX_I16 = 66,
      TRACT_DATUM_TYPE_COMPLEX_I32 = 68,
      TRACT_DATUM_TYPE_COMPLEX_I64 = 72,
      TRACT_DATUM_TYPE_COMPLEX_F16 = 82,
      TRACT_DATUM_TYPE_COMPLEX_F32 = 84,
      TRACT_DATUM_TYPE_COMPLEX_F64 = 88,
    } DatumType;
"""


================================================
FILE: api/ffi/src/lib.rs
================================================
#![allow(clippy::missing_safety_doc)]

use anyhow::{Context, Result};
use std::cell::RefCell;
use std::ffi::{CStr, CString, c_char, c_void};
use tract_api::{
    AsFact, DatumType, DimInterface, FactInterface, InferenceModelInterface, ModelInterface,
    NnefInterface, OnnxInterface, RunnableInterface, RuntimeInterface, StateInterface,
    TensorInterface,
};
use tract_rs::{State, Tensor};

/// Used as a return type of functions that can encounter errors.
/// If the function encountered an error, you can retrieve it using the `tract_get_last_error`
/// function
#[repr(C)]
#[allow(non_camel_case_types)]
#[derive(Debug, PartialEq, Eq)]
pub enum TRACT_RESULT {
    /// The function returned successfully
    TRACT_RESULT_OK = 0,
    /// The function returned an error
    TRACT_RESULT_KO = 1,
}

thread_local! {
    pub(crate) static LAST_ERROR: RefCell<Option<CString>> = const { RefCell::new(None) };
}

fn wrap<F: FnOnce() -> anyhow::Result<()>>(func: F) -> TRACT_RESULT {
    match func() {
        Ok(_) => TRACT_RESULT::TRACT_RESULT_OK,
        Err(e) => {
            let msg = format!("{e:?}");
            if std::env::var("TRACT_ERROR_STDERR").is_ok() {
                eprintln!("{msg}");
            }
            LAST_ERROR.with(|p| {
                *p.borrow_mut() = Some(CString::new(msg).unwrap_or_else(|_| {
                    CString::new("tract error message contains 0, can't convert to CString")
                        .unwrap()
                }))
            });
            TRACT_RESULT::TRACT_RESULT_KO
        }
    }
}

/// Retrieve the last error that happened in this thread. A function encountered an error if
/// its return type is of type `TRACT_RESULT` and it returned `TRACT_RESULT_KO`.
///
/// # Return value
///  It returns a pointer to a null-terminated UTF-8 string that will contain the error description.
///  Rust side keeps ownership of the buffer. It will be valid as long as no other tract calls is
///  performed by the thread.
///  If no error occured, null is returned.
#[unsafe(no_mangle)]
pub extern "C" fn tract_get_last_error() -> *const std::ffi::c_char {
    LAST_ERROR.with(|msg| msg.borrow().as_ref().map(|s| s.as_ptr()).unwrap_or(std::ptr::null()))
}

/// Returns a pointer to a static buffer containing a null-terminated version string.
///
/// The returned pointer must not be freed.
#[unsafe(no_mangle)]
pub extern "C" fn tract_version() -> *const std::ffi::c_char {
    unsafe {
        CStr::from_bytes_with_nul_unchecked(concat!(env!("CARGO_PKG_VERSION"), "\0").as_bytes())
            .as_ptr()
    }
}

/// Frees a string allocated by libtract.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_free_cstring(ptr: *mut std::ffi::c_char) {
    unsafe {
        if !ptr.is_null() {
            let _ = CString::from_raw(ptr);
        }
    }
}

macro_rules! check_not_null {
    ($($ptr:expr),*) => {
        $(
            if $ptr.is_null() {
                anyhow::bail!(concat!("Unexpected null pointer ", stringify!($ptr)));
            }
         )*
    }
}

macro_rules! release {
    ($ptr:expr) => {
        wrap(|| unsafe {
            check_not_null!($ptr, *$ptr);
            let _ = Box::from_raw(*$ptr);
            *$ptr = std::ptr::null_mut();
            Ok(())
        })
    };
}

// NNEF
pub struct TractNnef(tract_rs::Nnef);

/// Creates an instance of an NNEF framework and parser that can be used to load and dump NNEF models.
///
/// The returned object should be destroyed with `tract_nnef_destroy` once the model
/// has been loaded.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_create(nnef: *mut *mut TractNnef) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        *nnef = Box::into_raw(Box::new(TractNnef(tract_rs::nnef()?)));
        Ok(())
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_enable_tract_core(nnef: *mut TractNnef) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        (*nnef).0.enable_tract_core()
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_enable_tract_extra(nnef: *mut TractNnef) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        (*nnef).0.enable_tract_extra()
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_enable_tract_transformers(
    nnef: *mut TractNnef,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        (*nnef).0.enable_tract_transformers()
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_enable_onnx(nnef: *mut TractNnef) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        (*nnef).0.enable_onnx()
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_enable_pulse(nnef: *mut TractNnef) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        (*nnef).0.enable_pulse()
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_enable_extended_identifier_syntax(
    nnef: *mut TractNnef,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        (*nnef).0.enable_extended_identifier_syntax()
    })
}

/// Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_destroy(nnef: *mut *mut TractNnef) -> TRACT_RESULT {
    release!(nnef)
}

/// Parse and load an NNEF model as a tract TypedModel.
///
/// `path` is a null-terminated utf-8 string pointer. It can be an archive (tar or tar.gz file) or a
/// directory.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_load(
    nnef: *const TractNnef,
    path: *const c_char,
    model: *mut *mut TractModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef, model, path);
        *model = std::ptr::null_mut();
        let path = CStr::from_ptr(path).to_str()?;
        let m = Box::new(TractModel(
            (*nnef).0.load(path).with_context(|| format!("opening file {path:?}"))?,
        ));
        *model = Box::into_raw(m);
        Ok(())
    })
}

/// Parse and load an NNEF buffer as a tract TypedModel.
///
/// `data` is a buffer pointer
/// `len` ise the buffer len
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_load_buffer(
    nnef: *const TractNnef,
    data: *const c_void,
    len: usize,
    model: *mut *mut TractModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef, model, data);
        *model = std::ptr::null_mut();
        let slice = std::slice::from_raw_parts(data as *const u8, len);
        let m = Box::new(TractModel((*nnef).0.load_buffer(slice)?));
        *model = Box::into_raw(m);
        Ok(())
    })
}

/// Dump a TypedModel as a NNEF tar file.
///
/// `path` is a null-terminated utf-8 string pointer to the `.tar` file to be created.
///
/// This function creates a plain, non-compressed, archive.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_write_model_to_tar(
    nnef: *const TractNnef,
    path: *const c_char,
    model: *const TractModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef, model, path);
        let path = CStr::from_ptr(path).to_str()?;
        (*nnef).0.write_model_to_tar(path, &(*model).0)?;
        Ok(())
    })
}

/// Dump a TypedModel as a NNEF .tar.gz file.
///
/// `path` is a null-terminated utf-8 string pointer to the `.tar.gz` file to be created.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_write_model_to_tar_gz(
    nnef: *const TractNnef,
    path: *const c_char,
    model: *const TractModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef, model, path);
        let path = CStr::from_ptr(path).to_str()?;
        (*nnef).0.write_model_to_tar_gz(path, &(*model).0)?;
        Ok(())
    })
}

/// Dump a TypedModel as a NNEF directory.
///
/// `path` is a null-terminated utf-8 string pointer to the directory to be created.
///
/// This function creates a plain, non-compressed, archive.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_nnef_write_model_to_dir(
    nnef: *const TractNnef,
    path: *const c_char,
    model: *const TractModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef, model, path);
        let path = CStr::from_ptr(path).to_str()?;
        (*nnef).0.write_model_to_dir(path, &(*model).0)?;
        Ok(())
    })
}

// ONNX
pub struct TractOnnx(tract_rs::Onnx);

/// Creates an instance of an ONNX framework and parser that can be used to load models.
///
/// The returned object should be destroyed with `tract_nnef_destroy` once the model
/// has been loaded.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_onnx_create(onnx: *mut *mut TractOnnx) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(onnx);
        *onnx = Box::into_raw(Box::new(TractOnnx(tract_rs::onnx()?)));
        Ok(())
    })
}

/// Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_onnx_destroy(onnx: *mut *mut TractOnnx) -> TRACT_RESULT {
    release!(onnx)
}

/// Parse and load an ONNX model as a tract InferenceModel.
///
/// `path` is a null-terminated utf-8 string pointer. It must point to a `.onnx` model file.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_onnx_load(
    onnx: *const TractOnnx,
    path: *const c_char,
    model: *mut *mut TractInferenceModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(onnx, path, model);
        *model = std::ptr::null_mut();
        let path = CStr::from_ptr(path).to_str()?;
        let m = Box::new(TractInferenceModel((*onnx).0.load(path)?));
        *model = Box::into_raw(m);
        Ok(())
    })
}

/// Parse and load an ONNX buffer as a tract InferenceModel.
///
/// `data` is a buffer pointer
/// `len` ise the buffer len
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_onnx_load_buffer(
    onnx: *const TractOnnx,
    data: *const c_void,
    len: usize,
    model: *mut *mut TractInferenceModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(onnx, model, data);
        *model = std::ptr::null_mut();
        let slice = std::slice::from_raw_parts(data as *const u8, len);
        let m = Box::new(TractInferenceModel((*onnx).0.load_buffer(slice)?));
        *model = Box::into_raw(m);
        Ok(())
    })
}

// INFERENCE MODEL
pub struct TractInferenceModel(tract_rs::InferenceModel);

/// Query an InferenceModel input counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_input_count(
    model: *const TractInferenceModel,
    inputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, inputs);
        let model = &(*model).0;
        *inputs = model.input_count()?;
        Ok(())
    })
}

/// Query an InferenceModel output counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_output_count(
    model: *const TractInferenceModel,
    outputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, outputs);
        let model = &(*model).0;
        *outputs = model.output_count()?;
        Ok(())
    })
}

/// Query the name of a model input.
///
/// The returned name must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_input_name(
    model: *const TractInferenceModel,
    input: usize,
    name: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, name);
        *name = std::ptr::null_mut();
        let m = &(*model).0;
        *name = CString::new(&*m.input_name(input)?)?.into_raw();
        Ok(())
    })
}

/// Query the name of a model output.
///
/// The returned name must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_output_name(
    model: *const TractInferenceModel,
    output: usize,
    name: *mut *mut i8,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, name);
        *name = std::ptr::null_mut();
        let m = &(*model).0;
        *name = CString::new(&*m.output_name(output)?)?.into_raw() as _;
        Ok(())
    })
}

/// Query a model input fact.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_input_fact(
    model: *const TractInferenceModel,
    input_id: usize,
    fact: *mut *mut TractInferenceFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, fact);
        *fact = std::ptr::null_mut();
        let f = (*model).0.input_fact(input_id)?;
        *fact = Box::into_raw(Box::new(TractInferenceFact(f)));
        Ok(())
    })
}

/// Set an input fact of an InferenceModel.
///
/// The `fact` argument is only borrowed by this function, it still must be destroyed.
/// `fact` can be set to NULL to erase the current output fact of the model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_set_input_fact(
    model: *mut TractInferenceModel,
    input_id: usize,
    fact: *const TractInferenceFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model);
        let f = fact.as_ref().map(|f| &f.0).cloned().unwrap_or_default();
        (*model).0.set_input_fact(input_id, f)?;
        Ok(())
    })
}

/// Query an output fact for an InferenceModel.
///
/// The return model must be freed using `tract_inference_fact_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_output_fact(
    model: *const TractInferenceModel,
    output_id: usize,
    fact: *mut *mut TractInferenceFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, fact);
        *fact = std::ptr::null_mut();
        let f = (*model).0.output_fact(output_id)?;
        *fact = Box::into_raw(Box::new(TractInferenceFact(f)));
        Ok(())
    })
}

/// Set an output fact of an InferenceModel.
///
/// The `fact` argument is only borrowed by this function, it still must be destroyed.
/// `fact` can be set to NULL to erase the current output fact of the model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_set_output_fact(
    model: *mut TractInferenceModel,
    output_id: usize,
    fact: *const TractInferenceFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model);
        let f = fact.as_ref().map(|f| &f.0).cloned().unwrap_or_default();
        (*model).0.set_output_fact(output_id, f)?;
        Ok(())
    })
}

/// Analyse an InferencedModel in-place.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_analyse(
    model: *mut TractInferenceModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model);
        (*model).0.analyse()?;
        Ok(())
    })
}

/// Transform a fully analysed InferenceModel to a TypedModel.
///
/// This function takes ownership of the InferenceModel `model` whether it succeeds
/// or not. `tract_inference_model_destroy` must not be used on `model`.
///
/// On the other hand, caller will be owning the newly created typed model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_into_model(
    model: *mut *mut TractInferenceModel,
    typed: *mut *mut TractModel,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, *model, typed);
        *typed = std::ptr::null_mut();
        let m = Box::from_raw(*model);
        *model = std::ptr::null_mut();
        let result = m.0.into_model()?;
        *typed = Box::into_raw(Box::new(TractModel(result))) as _;
        Ok(())
    })
}

/// Destroy an InferenceModel.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_model_destroy(
    model: *mut *mut TractInferenceModel,
) -> TRACT_RESULT {
    release!(model)
}
// TYPED MODEL

pub struct TractModel(tract_rs::Model);

/// Query an InferenceModel input counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_input_count(
    model: *const TractModel,
    inputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, inputs);
        let model = &(*model).0;
        *inputs = model.input_count()?;
        Ok(())
    })
}

/// Query an InferenceModel output counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_output_count(
    model: *const TractModel,
    outputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, outputs);
        let model = &(*model).0;
        *outputs = model.output_count()?;
        Ok(())
    })
}

/// Query the name of a model input.
///
/// The returned name must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_input_name(
    model: *const TractModel,
    input: usize,
    name: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, name);
        *name = std::ptr::null_mut();
        let m = &(*model).0;
        *name = CString::new(m.input_name(input)?)?.into_raw();
        Ok(())
    })
}

/// Query the input fact of a model.
///
/// Thre returned fact must be freed with tract_fact_destroy.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_input_fact(
    model: *const TractModel,
    input_id: usize,
    fact: *mut *mut TractFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, fact);
        *fact = std::ptr::null_mut();
        let f = (*model).0.input_fact(input_id)?;
        *fact = Box::into_raw(Box::new(TractFact(f)));
        Ok(())
    })
}

/// Query the name of a model output.
///
/// The returned name must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_output_name(
    model: *const TractModel,
    output: usize,
    name: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, name);
        *name = std::ptr::null_mut();
        let m = &(*model).0;
        *name = CString::new(m.output_name(output)?)?.into_raw();
        Ok(())
    })
}

/// Query the output fact of a model.
///
/// Thre returned fact must be freed with tract_fact_destroy.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_output_fact(
    model: *const TractModel,
    input_id: usize,
    fact: *mut *mut TractFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, fact);
        *fact = std::ptr::null_mut();
        let f = (*model).0.output_fact(input_id)?;
        *fact = Box::into_raw(Box::new(TractFact(f)));
        Ok(())
    })
}

/// Apply a transform to the model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_transform(
    model: *mut TractModel,
    transform: *const i8,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, transform);
        let t = CStr::from_ptr(transform as _)
            .to_str()
            .context("failed to parse transform name (not utf8)")?;
        (*model).0.transform(t)
    })
}

/// Perform a profile of the model using the provided inputs.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_profile_json(
    model: *mut TractRunnable,
    inputs: *mut *mut TractTensor,
    json: *mut *mut i8,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, json);

        let input: Option<Vec<Tensor>> = if !inputs.is_null() {
            let input_len = (*model).0.input_count()?;
            Some(
                std::slice::from_raw_parts(inputs, input_len)
                    .iter()
                    .map(|tv| (**tv).0.clone())
                    .collect(),
            )
        } else {
            None
        };

        let profile = (*model).0.profile_json(input)?;
        *json = CString::new(profile)?.into_raw() as _;
        Ok(())
    })
}

/// Convert a TypedModel into a TypedRunnableModel.
///
/// This function transfers ownership of the `model` argument to the newly-created `runnable` model.
///
/// Runnable are reference counted. When done, it should be released with `tract_runnable_release`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_into_runnable(
    model: *mut *mut TractModel,
    runnable: *mut *mut TractRunnable,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, runnable);
        let m = Box::from_raw(*model).0;
        *model = std::ptr::null_mut();
        *runnable = Box::into_raw(Box::new(TractRunnable(m.into_runnable()?))) as _;
        Ok(())
    })
}

/// Query the number of properties in a model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_property_count(
    model: *const TractModel,
    count: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, count);
        *count = (*model).0.property_keys()?.len();
        Ok(())
    })
}

/// Query the properties names of a model.
///
/// The "names" array should be big enough to fit `tract_model_property_count` string pointers.
///
/// Each name will have to be freed using `tract_free_cstring`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_property_names(
    model: *const TractModel,
    names: *mut *mut i8,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, names);
        for (ix, name) in (*model).0.property_keys()?.iter().enumerate() {
            *names.add(ix) = CString::new(&**name)?.into_raw() as _;
        }
        Ok(())
    })
}

/// Query a property tensor in a model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_property(
    model: *const TractModel,
    name: *const i8,
    tensor: *mut *mut TractTensor,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, name, tensor);
        let name = CStr::from_ptr(name as _)
            .to_str()
            .context("failed to parse property name (not utf8)")?
            .to_owned();
        let v = (*model).0.property(name).context("Property not found")?;
        *tensor = Box::into_raw(Box::new(TractTensor(v)));
        Ok(())
    })
}

/// Parse a fact specification string into an Fact.
///
/// The returned fact must be free with `tract_fact_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_parse_fact(
    model: *mut TractModel,
    spec: *const c_char,
    fact: *mut *mut TractFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, spec, fact);
        let spec = CStr::from_ptr(spec).to_str()?;
        let f: tract_rs::Fact = spec.as_fact(&(*model).0)?.as_ref().clone();
        *fact = Box::into_raw(Box::new(TractFact(f)));
        Ok(())
    })
}

/// Destroy a TypedModel.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_model_destroy(model: *mut *mut TractModel) -> TRACT_RESULT {
    release!(model)
}

// RUNTIME MODEL
pub struct TractRuntime(tract_rs::Runtime);

/// Creates an instance of a tract Runtime that can be used to run model on a specific
/// hardware / software stack (like a GPU).
///
/// The returned object should be released with `tract_runtime_release`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runtime_for_name(
    name: *const c_char,
    nnef: *mut *mut TractRuntime,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(nnef);
        let name = CStr::from_ptr(name).to_str()?;
        *nnef = Box::into_raw(Box::new(TractRuntime(tract_rs::runtime_for_name(name)?)));
        Ok(())
    })
}

/// Query the name of a Runtime.
///
/// The returned name must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runtime_name(
    runtime: *const TractRuntime,
    name: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(runtime, name);
        *name = std::ptr::null_mut();
        let n = (*runtime).0.name()?;
        *name = CString::new(n)?.into_raw();
        Ok(())
    })
}

/// Convert a Model into a Runnable for this Runtime.
///
/// This function transfers ownership of the `model` argument to the newly-created `runnable` model.
///
/// Runnable are reference counted. When done, it should be released with `tract_runnable_release`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runtime_prepare(
    runtime: *const TractRuntime,
    model: *mut *mut TractModel,
    runnable: *mut *mut TractRunnable,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(runtime, model, runnable);
        let m = Box::from_raw(*model).0;
        *model = std::ptr::null_mut();
        *runnable = Box::into_raw(Box::new(TractRunnable((*runtime).0.prepare(m)?))) as _;
        Ok(())
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runtime_release(runtime: *mut *mut TractRuntime) -> TRACT_RESULT {
    release!(runtime)
}

// RUNNABLE MODEL
pub struct TractRunnable(tract_rs::Runnable);

/// Spawn a session state from a runnable model.
///
/// This function does not take ownership of the `runnable` object, it can be used again to spawn
/// other state instances. The runnable object is internally reference counted, it will be
/// kept alive as long as any associated `State` exists (or as long as the `runnable` is not
/// explicitely release with `tract_runnable_release`).
///
/// `state` is a newly-created object. It should ultimately be detroyed with `tract_state_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_spawn_state(
    runnable: *mut TractRunnable,
    state: *mut *mut TractState,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(runnable, state);
        *state = std::ptr::null_mut();
        let s = (*runnable).0.spawn_state()?;
        *state = Box::into_raw(Box::new(TractState(s)));
        Ok(())
    })
}

/// Convenience function to run a stateless model.
///
/// `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal
/// to the number of inputs of the models. The function does not take ownership of the input
/// tensors.
/// `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten
/// with pointers to output tensors. These tensors are under the responsiblity of the caller, it
/// will have to release them with `tract_tensor_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_run(
    runnable: *mut TractRunnable,
    inputs: *mut *mut TractTensor,
    outputs: *mut *mut TractTensor,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(runnable);
        let mut s = (*runnable).0.spawn_state()?;
        state_run(&mut s, inputs, outputs)
    })
}

/// Query a Runnable input counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_input_count(
    model: *const TractRunnable,
    inputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, inputs);
        let model = &(*model).0;
        *inputs = model.input_count()?;
        Ok(())
    })
}

/// Query an Runnable output counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_output_count(
    model: *const TractRunnable,
    outputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, outputs);
        let model = &(*model).0;
        *outputs = model.output_count()?;
        Ok(())
    })
}

/// Query the input fact of a runnable model.
///
/// Thre returned fact must be freed with tract_fact_destroy.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_input_fact(
    runnable: *const TractRunnable,
    input_id: usize,
    fact: *mut *mut TractFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(runnable, fact);
        *fact = std::ptr::null_mut();
        let f = (*runnable).0.input_fact(input_id)?;
        *fact = Box::into_raw(Box::new(TractFact(f)));
        Ok(())
    })
}

/// Query the output fact of a runnable model.
///
/// Thre returned fact must be freed with tract_fact_destroy.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_output_fact(
    runnable: *const TractRunnable,
    output_id: usize,
    fact: *mut *mut TractFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(runnable, fact);
        *fact = std::ptr::null_mut();
        let f = (*runnable).0.output_fact(output_id)?;
        *fact = Box::into_raw(Box::new(TractFact(f)));
        Ok(())
    })
}

/// Query the number of properties in a runnable model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_property_count(
    model: *const TractRunnable,
    count: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, count);
        *count = (*model).0.property_keys()?.len();
        Ok(())
    })
}

/// Query the properties names of a runnable model.
///
/// The "names" array should be big enough to fit `tract_model_property_count` string pointers.
///
/// Each name will have to be freed using `tract_free_cstring`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_property_names(
    model: *const TractRunnable,
    names: *mut *mut i8,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, names);
        for (ix, name) in (*model).0.property_keys()?.iter().enumerate() {
            *names.add(ix) = CString::new(&**name)?.into_raw() as _;
        }
        Ok(())
    })
}

/// Query a property tensor in a runnable model.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_property(
    model: *const TractRunnable,
    name: *const i8,
    tensor: *mut *mut TractTensor,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, name, tensor);
        let name = CStr::from_ptr(name as _)
            .to_str()
            .context("failed to parse property name (not utf8)")?
            .to_owned();
        let v = (*model).0.property(name).context("Property not found")?;
        *tensor = Box::into_raw(Box::new(TractTensor(v)));
        Ok(())
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_runnable_release(runnable: *mut *mut TractRunnable) -> TRACT_RESULT {
    release!(runnable)
}

// TENSOR
pub struct TractTensor(tract_rs::Tensor);

/// Create a TractTensor from caller data and metadata.
///
/// This call copies the data into tract space. All the pointers only need to be alive for the
/// duration of the call.
///
/// rank is the number of dimensions of the tensor (i.e. the length of the shape vector).
///
/// The returned tensor must be destroyed by `tract_tensor_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_tensor_from_bytes(
    datum_type: DatumType,
    rank: usize,
    shape: *const usize,
    data: *mut c_void,
    tensor: *mut *mut TractTensor,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(tensor);
        *tensor = std::ptr::null_mut();
        let shape = std::slice::from_raw_parts(shape, rank);
        let len = shape.iter().product::<usize>();
        let data = std::slice::from_raw_parts(data as *const u8, len * datum_type.size_of());
        let it = Tensor::from_bytes(datum_type, shape, data)?;
        *tensor = Box::into_raw(Box::new(TractTensor(it)));
        Ok(())
    })
}

/// Write a tensor as a debug string
///
/// The returned string must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_tensor_dump(
    tensor: *const TractTensor,
    spec: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(tensor, spec);
        *spec = CString::new(format!("{:?}", (*tensor).0))?.into_raw();
        Ok(())
    })
}

/// Convert a tensor to a new datum type.
///
/// This function will perform a cheap shallow clone if the destination type is
/// the same as the current type, otherwise it returns a newly allocated Tensor instead.
///
/// In both cases, the returned tensor must be destroyed by `tract_tensor_destroy`.
/// The input tensor is not consumed, it still need to be destroyed.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_tensor_convert_to(
    input: *const TractTensor,
    datum_type: DatumType,
    output: *mut *mut TractTensor,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(input, output);
        *output = std::ptr::null_mut();
        let new = (*input).0.convert_to(datum_type)?;
        *output = Box::into_raw(Box::new(TractTensor(new)));
        Ok(())
    })
}

/// Destroy a tensor.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_tensor_destroy(tensor: *mut *mut TractTensor) -> TRACT_RESULT {
    release!(tensor)
}

/// Inspect part of a tensor. Except `tensor`, all argument pointers can be null if only some specific bits
/// are required.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_tensor_as_bytes(
    tensor: *mut TractTensor,
    datum_type: *mut DatumType,
    rank: *mut usize,
    shape: *mut *const usize,
    data: *mut *const c_void,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(tensor);
        let tensor = &(*tensor).0;
        let bits = tensor.as_bytes()?;
        if !datum_type.is_null() {
            *datum_type = bits.0;
        }
        if !rank.is_null() {
            *rank = bits.1.len();
        }
        if !shape.is_null() {
            *shape = bits.1.as_ptr();
        }
        if !data.is_null() {
            *data = bits.2.as_ptr() as _;
        }
        Ok(())
    })
}

// STATE
pub struct TractState(tract_rs::State);

/// Run a turn on a model state
///
/// `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal
/// to the number of inputs of the models. The function does not take ownership of the input
/// tensors.
/// `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten
/// with pointers to output tensors. These tensors are under the responsiblity of the caller, it
/// will have to release them with `tract_tensor_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_state_run(
    state: *mut TractState,
    inputs: *mut *mut TractTensor,
    outputs: *mut *mut TractTensor,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(state, inputs, outputs);
        state_run(&mut (*state).0, inputs, outputs)
    })
}

/// Query a State input counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_state_input_count(
    state: *const TractState,
    inputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(state, inputs);
        let state = &(*state).0;
        *inputs = state.input_count()?;
        Ok(())
    })
}

/// Query an State output counts.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_state_output_count(
    state: *const TractState,
    outputs: *mut usize,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(state, outputs);
        let state = &(*state).0;
        *outputs = state.output_count()?;
        Ok(())
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_state_destroy(state: *mut *mut TractState) -> TRACT_RESULT {
    release!(state)
}

// FACT
pub struct TractFact(tract_rs::Fact);

/// Gets the rank (aka number of axes/dimensions) of a fact.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_fact_rank(fact: *const TractFact, rank: *mut usize) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact, rank);
        *rank = (*fact).0.rank()?;
        Ok(())
    })
}

/// Extract the datum type of the fact.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_fact_datum_type(
    fact: *const TractFact,
    datum_type: *mut DatumType,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact, datum_type);
        *datum_type = (*fact).0.datum_type()?;
        Ok(())
    })
}

/// Extract the dimension from one dimension of the fact.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_fact_dim(
    fact: *const TractFact,
    axis: usize,
    dim: *mut *mut TractDim,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact, dim);
        let d = (*fact).0.dim(axis)?;
        *dim = Box::into_raw(Box::new(TractDim(d)));
        Ok(())
    })
}

/// Write a fact as its specification string.
///
/// The returned string must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_fact_dump(
    fact: *const TractFact,
    spec: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact, spec);
        *spec = CString::new(format!("{}", (*fact).0))?.into_raw();
        Ok(())
    })
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_fact_destroy(fact: *mut *mut TractFact) -> TRACT_RESULT {
    release!(fact)
}

// INFERENCE FACT
pub struct TractInferenceFact(tract_rs::InferenceFact);

/// Parse a fact specification string into an InferenceFact.
///
/// The returned fact must be free with `tract_inference_fact_destroy`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_fact_parse(
    model: *mut TractInferenceModel,
    spec: *const c_char,
    fact: *mut *mut TractInferenceFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(model, spec, fact);
        let spec = CStr::from_ptr(spec).to_str()?;
        let f: tract_rs::InferenceFact = spec.as_fact(&(*model).0)?.as_ref().clone();
        *fact = Box::into_raw(Box::new(TractInferenceFact(f)));
        Ok(())
    })
}

/// Creates an empty inference fact.
///
/// The returned fact must be freed by the caller using tract_inference_fact_destroy
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_fact_empty(
    fact: *mut *mut TractInferenceFact,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact);
        *fact = Box::into_raw(Box::new(TractInferenceFact(Default::default())));
        Ok(())
    })
}

/// Write an inference fact as its specification string.
///
/// The returned string must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_fact_dump(
    fact: *const TractInferenceFact,
    spec: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact, spec);
        *spec = CString::new(format!("{}", (*fact).0))?.into_raw();
        Ok(())
    })
}

/// Destroy a fact.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_inference_fact_destroy(
    fact: *mut *mut TractInferenceFact,
) -> TRACT_RESULT {
    release!(fact)
}

/// Dim
pub struct TractDim(tract_rs::Dim);

/// Substitute symbols by the provided values in the Dim, generating a new one.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_dim_eval(
    dim: *const TractDim,
    nb_symbols: usize,
    symbols: *const *const i8,
    values: *const i64,
    result: *mut *mut TractDim,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(dim, symbols, values, result);
        let mut table = vec![];
        for i in 0..nb_symbols {
            let name = CStr::from_ptr(*symbols.add(i) as _)
                .to_str()
                .with_context(|| {
                    format!("failed to parse symbol name for {i}th symbol (not utf8)")
                })?
                .to_owned();
            table.push((name, *values.add(i)));
        }
        let r = (*dim).0.eval(table)?;
        *result = Box::into_raw(Box::new(TractDim(r)));
        Ok(())
    })
}

/// Try converting a Dim into an actual integer
///
/// Will fail if the Dim contains symbols.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_dim_to_int64(fact: *const TractDim, i: *mut i64) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(fact, i);
        *i = (*fact).0.to_int64()?;
        Ok(())
    })
}

/// Write a dim as its specification string.
///
/// The returned string must be freed by the caller using tract_free_cstring.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_dim_dump(
    dim: *const TractDim,
    spec: *mut *mut c_char,
) -> TRACT_RESULT {
    wrap(|| unsafe {
        check_not_null!(dim, spec);
        *spec = CString::new((*dim).0.to_string())?.into_raw();
        Ok(())
    })
}

/// Destroy a dim.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn tract_dim_destroy(dim: *mut *mut TractDim) -> TRACT_RESULT {
    release!(dim)
}

// MISC

// HELPERS

unsafe fn state_run(
    state: &mut State,
    inputs: *mut *mut TractTensor,
    outputs: *mut *mut TractTensor,
) -> Result<()> {
    unsafe {
        let values: Vec<_> = std::slice::from_raw_parts(inputs, state.input_count()?)
            .iter()
            .map(|tv| (**tv).0.clone())
            .collect();
        let values = state.run(values)?;
        for (i, value) in values.into_iter().enumerate() {
            *(outputs.add(i)) = Box::into_raw(Box::new(TractTensor(value)))
        }
        Ok(())
    }
}


================================================
FILE: api/generate-tract-h.sh
================================================
#!/bin/sh

set -ex

cargo install cbindgen

cbindgen ffi > tract.h
cp tract.h c
mv tract.h proxy/sys


================================================
FILE: api/proxy/Cargo.toml
================================================
[package]
name = "tract-proxy"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/sonos/tract"
keywords = [ "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true
include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*" ]

[dependencies]
anyhow.workspace = true
boow.workspace = true
home.workspace = true
ndarray.workspace = true
tract-api.workspace = true
tract-proxy-sys.workspace = true

[dev-dependencies]
reqwest.workspace = true
rustls.workspace = true
tempfile.workspace = true
serde_json.workspace = true


================================================
FILE: api/proxy/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: api/proxy/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: api/proxy/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: api/proxy/ci.sh
================================================
#!/bin/sh

ROOT=$(dirname $(realpath $0))/../..

set -ex

cargo build --release -p tract-ffi $CARGO_EXTRA
SO=$(cargo build  --message-format=json --release -p tract-ffi $CARGO_EXTRA | grep cdylib | jshon -e  filenames -e 0 -u)
SO_PATH=$(dirname $SO)
export TRACT_DYLIB_SEARCH_PATH=$SO_PATH
export LD_LIBRARY_PATH=$SO_PATH

cd $(dirname $(realpath $0))
cargo test $CARGO_EXTRA


================================================
FILE: api/proxy/src/lib.rs
================================================
use std::ffi::{CStr, CString};
use std::path::Path;
use std::ptr::{null, null_mut};

use tract_api::*;
use tract_proxy_sys as sys;

use anyhow::{Context, Result};
use ndarray::*;

macro_rules! check {
    ($expr:expr) => {
        unsafe {
            if $expr == sys::TRACT_RESULT_TRACT_RESULT_KO {
                let buf = CStr::from_ptr(sys::tract_get_last_error());
                Err(anyhow::anyhow!(buf.to_string_lossy().to_string()))
            } else {
                Ok(())
            }
        }
    };
}

macro_rules! wrapper {
    ($new_type:ident, $c_type:ident, $dest:ident $(, $typ:ty )*) => {
        #[derive(Debug, Clone)]
        pub struct $new_type(*mut sys::$c_type $(, $typ)*);

        impl Drop for $new_type {
            fn drop(&mut self) {
                unsafe {
                    sys::$dest(&mut self.0);
                }
            }
        }
    };
}

pub fn nnef() -> Result<Nnef> {
    let mut nnef = null_mut();
    check!(sys::tract_nnef_create(&mut nnef))?;
    Ok(Nnef(nnef))
}

pub fn onnx() -> Result<Onnx> {
    let mut onnx = null_mut();
    check!(sys::tract_onnx_create(&mut onnx))?;
    Ok(Onnx(onnx))
}

pub fn version() -> &'static str {
    unsafe { CStr::from_ptr(sys::tract_version()).to_str().unwrap() }
}

wrapper!(Nnef, TractNnef, tract_nnef_destroy);
impl NnefInterface for Nnef {
    type Model = Model;
    fn load(&self, path: impl AsRef<Path>) -> Result<Model> {
        let path = path.as_ref();
        let path = CString::new(
            path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?,
        )?;
        let mut model = null_mut();
        check!(sys::tract_nnef_load(self.0, path.as_ptr(), &mut model))?;
        Ok(Model(model))
    }

    fn load_buffer(&self, data: &[u8]) -> Result<Model> {
        let mut model = null_mut();
        check!(sys::tract_nnef_load_buffer(self.0, data.as_ptr() as _, data.len(), &mut model))?;
        Ok(Model(model))
    }

    fn enable_tract_core(&mut self) -> Result<()> {
        check!(sys::tract_nnef_enable_tract_core(self.0))
    }

    fn enable_tract_extra(&mut self) -> Result<()> {
        check!(sys::tract_nnef_enable_tract_extra(self.0))
    }

    fn enable_tract_transformers(&mut self) -> Result<()> {
        check!(sys::tract_nnef_enable_tract_transformers(self.0))
    }

    fn enable_onnx(&mut self) -> Result<()> {
        check!(sys::tract_nnef_enable_onnx(self.0))
    }

    fn enable_pulse(&mut self) -> Result<()> {
        check!(sys::tract_nnef_enable_pulse(self.0))
    }

    fn enable_extended_identifier_syntax(&mut self) -> Result<()> {
        check!(sys::tract_nnef_enable_extended_identifier_syntax(self.0))
    }

    fn write_model_to_dir(&self, path: impl AsRef<Path>, model: &Model) -> Result<()> {
        let path = path.as_ref();
        let path = CString::new(
            path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?,
        )?;
        check!(sys::tract_nnef_write_model_to_dir(self.0, path.as_ptr(), model.0))?;
        Ok(())
    }

    fn write_model_to_tar(&self, path: impl AsRef<Path>, model: &Model) -> Result<()> {
        let path = path.as_ref();
        let path = CString::new(
            path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?,
        )?;
        check!(sys::tract_nnef_write_model_to_tar(self.0, path.as_ptr(), model.0))?;
        Ok(())
    }

    fn write_model_to_tar_gz(&self, path: impl AsRef<Path>, model: &Model) -> Result<()> {
        let path = path.as_ref();
        let path = CString::new(
            path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?,
        )?;
        check!(sys::tract_nnef_write_model_to_tar_gz(self.0, path.as_ptr(), model.0))?;
        Ok(())
    }
}

// ONNX
wrapper!(Onnx, TractOnnx, tract_onnx_destroy);

impl OnnxInterface for Onnx {
    type InferenceModel = InferenceModel;
    fn load(&self, path: impl AsRef<Path>) -> Result<InferenceModel> {
        let path = path.as_ref();
        let path = CString::new(
            path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?,
        )?;
        let mut model = null_mut();
        check!(sys::tract_onnx_load(self.0, path.as_ptr(), &mut model))?;
        Ok(InferenceModel(model))
    }

    fn load_buffer(&self, data: &[u8]) -> Result<InferenceModel> {
        let mut model = null_mut();
        check!(sys::tract_onnx_load_buffer(self.0, data.as_ptr() as _, data.len(), &mut model))?;
        Ok(InferenceModel(model))
    }
}

// INFERENCE MODEL
wrapper!(InferenceModel, TractInferenceModel, tract_inference_model_destroy);
impl InferenceModelInterface for InferenceModel {
    type Model = Model;
    type InferenceFact = InferenceFact;
    fn input_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_inference_model_input_count(self.0, &mut count))?;
        Ok(count)
    }

    fn output_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_inference_model_output_count(self.0, &mut count))?;
        Ok(count)
    }

    fn input_name(&self, id: usize) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_inference_model_input_name(self.0, id, &mut ptr))?;
        unsafe {
            let ret = CStr::from_ptr(ptr).to_str()?.to_owned();
            sys::tract_free_cstring(ptr);
            Ok(ret)
        }
    }

    fn output_name(&self, id: usize) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_inference_model_output_name(self.0, id, &mut ptr))?;
        unsafe {
            let ret = CStr::from_ptr(ptr).to_str()?.to_owned();
            sys::tract_free_cstring(ptr);
            Ok(ret)
        }
    }

    fn input_fact(&self, id: usize) -> Result<InferenceFact> {
        let mut ptr = null_mut();
        check!(sys::tract_inference_model_input_fact(self.0, id, &mut ptr))?;
        Ok(InferenceFact(ptr))
    }

    fn set_input_fact(
        &mut self,
        id: usize,
        fact: impl AsFact<Self, Self::InferenceFact>,
    ) -> Result<()> {
        let fact = fact.as_fact(self)?;
        check!(sys::tract_inference_model_set_input_fact(self.0, id, fact.0))?;
        Ok(())
    }

    fn output_fact(&self, id: usize) -> Result<InferenceFact> {
        let mut ptr = null_mut();
        check!(sys::tract_inference_model_output_fact(self.0, id, &mut ptr))?;
        Ok(InferenceFact(ptr))
    }

    fn set_output_fact(
        &mut self,
        id: usize,
        fact: impl AsFact<InferenceModel, InferenceFact>,
    ) -> Result<()> {
        let fact = fact.as_fact(self)?;
        check!(sys::tract_inference_model_set_output_fact(self.0, id, fact.0))?;
        Ok(())
    }

    fn analyse(&mut self) -> Result<()> {
        check!(sys::tract_inference_model_analyse(self.0))?;
        Ok(())
    }

    fn into_model(mut self) -> Result<Self::Model> {
        let mut ptr = null_mut();
        check!(sys::tract_inference_model_into_model(&mut self.0, &mut ptr))?;
        Ok(Model(ptr))
    }
}

// MODEL
wrapper!(Model, TractModel, tract_model_destroy);

impl ModelInterface for Model {
    type Fact = Fact;
    type Tensor = Tensor;
    type Runnable = Runnable;
    fn input_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_model_input_count(self.0, &mut count))?;
        Ok(count)
    }

    fn output_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_model_output_count(self.0, &mut count))?;
        Ok(count)
    }

    fn input_name(&self, id: usize) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_model_input_name(self.0, id, &mut ptr))?;
        unsafe {
            let ret = CStr::from_ptr(ptr).to_str()?.to_owned();
            sys::tract_free_cstring(ptr);
            Ok(ret)
        }
    }

    fn output_name(&self, id: usize) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_model_output_name(self.0, id, &mut ptr))?;
        unsafe {
            let ret = CStr::from_ptr(ptr).to_str()?.to_owned();
            sys::tract_free_cstring(ptr);
            Ok(ret)
        }
    }

    fn input_fact(&self, id: usize) -> Result<Fact> {
        let mut ptr = null_mut();
        check!(sys::tract_model_input_fact(self.0, id, &mut ptr))?;
        Ok(Fact(ptr))
    }

    fn output_fact(&self, id: usize) -> Result<Fact> {
        let mut ptr = null_mut();
        check!(sys::tract_model_output_fact(self.0, id, &mut ptr))?;
        Ok(Fact(ptr))
    }

    fn into_runnable(self) -> Result<Runnable> {
        let mut model = self;
        let mut runnable = null_mut();
        check!(sys::tract_model_into_runnable(&mut model.0, &mut runnable))?;
        Ok(Runnable(runnable))
    }

    fn transform(&mut self, spec: impl Into<TransformSpec>) -> Result<()> {
        let transform = spec.into().to_transform_string();
        let t = CString::new(transform)?;
        check!(sys::tract_model_transform(self.0, t.as_ptr()))?;
        Ok(())
    }

    fn property_keys(&self) -> Result<Vec<String>> {
        let mut len = 0;
        check!(sys::tract_model_property_count(self.0, &mut len))?;
        let mut keys = vec![null_mut(); len];
        check!(sys::tract_model_property_names(self.0, keys.as_mut_ptr()))?;
        unsafe {
            keys.into_iter()
                .map(|pc| {
                    let s = CStr::from_ptr(pc).to_str()?.to_owned();
                    sys::tract_free_cstring(pc);
                    Ok(s)
                })
                .collect()
        }
    }

    fn property(&self, name: impl AsRef<str>) -> Result<Tensor> {
        let mut v = null_mut();
        let name = CString::new(name.as_ref())?;
        check!(sys::tract_model_property(self.0, name.as_ptr(), &mut v))?;
        Ok(Tensor(v))
    }

    fn parse_fact(&self, spec: &str) -> Result<Self::Fact> {
        let spec = CString::new(spec)?;
        let mut ptr = null_mut();
        check!(sys::tract_model_parse_fact(self.0, spec.as_ptr(), &mut ptr))?;
        Ok(Fact(ptr))
    }
}

// RUNTIME
wrapper!(Runtime, TractRuntime, tract_runtime_release);

pub fn runtime_for_name(name: &str) -> Result<Runtime> {
    let mut rt = null_mut();
    let name = CString::new(name)?;
    check!(sys::tract_runtime_for_name(name.as_ptr(), &mut rt))?;
    Ok(Runtime(rt))
}

impl RuntimeInterface for Runtime {
    type Runnable = Runnable;

    type Model = Model;

    fn name(&self) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_runtime_name(self.0, &mut ptr))?;
        unsafe {
            let ret = CStr::from_ptr(ptr).to_str()?.to_owned();
            sys::tract_free_cstring(ptr);
            Ok(ret)
        }
    }

    fn prepare(&self, model: Self::Model) -> Result<Self::Runnable> {
        let mut model = model;
        let mut runnable = null_mut();
        check!(sys::tract_runtime_prepare(self.0, &mut model.0, &mut runnable))?;
        Ok(Runnable(runnable))
    }
}

// RUNNABLE
wrapper!(Runnable, TractRunnable, tract_runnable_release);
unsafe impl Send for Runnable {}
unsafe impl Sync for Runnable {}

impl RunnableInterface for Runnable {
    type Tensor = Tensor;
    type State = State;
    type Fact = Fact;

    fn run(&self, inputs: impl IntoInputs<Tensor>) -> Result<Vec<Tensor>> {
        StateInterface::run(&mut self.spawn_state()?, inputs.into_inputs()?)
    }

    fn spawn_state(&self) -> Result<State> {
        let mut state = null_mut();
        check!(sys::tract_runnable_spawn_state(self.0, &mut state))?;
        Ok(State(state))
    }

    fn input_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_runnable_input_count(self.0, &mut count))?;
        Ok(count)
    }

    fn output_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_runnable_output_count(self.0, &mut count))?;
        Ok(count)
    }

    fn input_fact(&self, id: usize) -> Result<Self::Fact> {
        let mut ptr = null_mut();
        check!(sys::tract_runnable_input_fact(self.0, id, &mut ptr))?;
        Ok(Fact(ptr))
    }

    fn output_fact(&self, id: usize) -> Result<Self::Fact> {
        let mut ptr = null_mut();
        check!(sys::tract_runnable_output_fact(self.0, id, &mut ptr))?;
        Ok(Fact(ptr))
    }

    fn property_keys(&self) -> Result<Vec<String>> {
        let mut len = 0;
        check!(sys::tract_runnable_property_count(self.0, &mut len))?;
        let mut keys = vec![null_mut(); len];
        check!(sys::tract_runnable_property_names(self.0, keys.as_mut_ptr()))?;
        unsafe {
            keys.into_iter()
                .map(|pc| {
                    let s = CStr::from_ptr(pc).to_str()?.to_owned();
                    sys::tract_free_cstring(pc);
                    Ok(s)
                })
                .collect()
        }
    }

    fn property(&self, name: impl AsRef<str>) -> Result<Tensor> {
        let mut v = null_mut();
        let name = CString::new(name.as_ref())?;
        check!(sys::tract_runnable_property(self.0, name.as_ptr(), &mut v))?;
        Ok(Tensor(v))
    }

    fn cost_json(&self) -> Result<String> {
        let input: Option<Vec<Tensor>> = None;
        self.profile_json(input)
    }

    fn profile_json<I, IV, IE>(&self, inputs: Option<I>) -> Result<String>
    where
        I: IntoIterator<Item = IV>,
        IV: TryInto<Self::Tensor, Error = IE>,
        IE: Into<anyhow::Error>,
    {
        let inputs = if let Some(inputs) = inputs {
            let inputs = inputs
                .into_iter()
                .map(|i| i.try_into().map_err(|e| e.into()))
                .collect::<Result<Vec<Tensor>>>()?;
            anyhow::ensure!(self.input_count()? == inputs.len());
            Some(inputs)
        } else {
            None
        };
        let mut iptrs: Option<Vec<*mut sys::TractTensor>> =
            inputs.as_ref().map(|is| is.iter().map(|v| v.0).collect());
        let mut json: *mut i8 = null_mut();
        let values = iptrs.as_mut().map(|it| it.as_mut_ptr()).unwrap_or(null_mut());

        check!(sys::tract_runnable_profile_json(self.0, values, &mut json))?;
        anyhow::ensure!(!json.is_null());
        unsafe {
            let s = CStr::from_ptr(json).to_owned();
            sys::tract_free_cstring(json);
            Ok(s.to_str()?.to_owned())
        }
    }
}

// STATE
wrapper!(State, TractState, tract_state_destroy);

impl StateInterface for State {
    type Tensor = Tensor;
    type Fact = Fact;

    fn run(&mut self, inputs: impl IntoInputs<Tensor>) -> Result<Vec<Tensor>> {
        let inputs = inputs.into_inputs()?;
        let mut outputs = vec![null_mut(); self.output_count()?];
        let mut inputs: Vec<_> = inputs.iter().map(|v| v.0).collect();
        check!(sys::tract_state_run(self.0, inputs.as_mut_ptr(), outputs.as_mut_ptr()))?;
        let outputs = outputs.into_iter().map(Tensor).collect();
        Ok(outputs)
    }

    fn input_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_state_input_count(self.0, &mut count))?;
        Ok(count)
    }

    fn output_count(&self) -> Result<usize> {
        let mut count = 0;
        check!(sys::tract_state_output_count(self.0, &mut count))?;
        Ok(count)
    }
}

// TENSOR
wrapper!(Tensor, TractTensor, tract_tensor_destroy);
unsafe impl Send for Tensor {}
unsafe impl Sync for Tensor {}

impl TensorInterface for Tensor {
    fn from_bytes(dt: DatumType, shape: &[usize], data: &[u8]) -> Result<Self> {
        anyhow::ensure!(data.len() == shape.iter().product::<usize>() * dt.size_of());
        let mut value = null_mut();
        check!(sys::tract_tensor_from_bytes(
            dt as _,
            shape.len(),
            shape.as_ptr(),
            data.as_ptr() as _,
            &mut value
        ))?;
        Ok(Tensor(value))
    }

    fn as_bytes(&self) -> Result<(DatumType, &[usize], &[u8])> {
        let mut rank = 0;
        let mut dt = sys::DatumType_TRACT_DATUM_TYPE_BOOL as _;
        let mut shape = null();
        let mut data = null();
        check!(sys::tract_tensor_as_bytes(self.0, &mut dt, &mut rank, &mut shape, &mut data))?;
        unsafe {
            let dt: DatumType = std::mem::transmute(dt);
            let shape = std::slice::from_raw_parts(shape, rank);
            let len: usize = shape.iter().product();
            let data = std::slice::from_raw_parts(data as *const u8, len * dt.size_of());
            Ok((dt, shape, data))
        }
    }

    fn datum_type(&self) -> Result<DatumType> {
        let mut dt = sys::DatumType_TRACT_DATUM_TYPE_BOOL as _;
        check!(sys::tract_tensor_as_bytes(
            self.0,
            &mut dt,
            std::ptr::null_mut(),
            std::ptr::null_mut(),
            std::ptr::null_mut()
        ))?;
        unsafe {
            let dt: DatumType = std::mem::transmute(dt);
            Ok(dt)
        }
    }

    fn convert_to(&self, to: DatumType) -> Result<Self> {
        let mut new = null_mut();
        check!(sys::tract_tensor_convert_to(self.0, to as _, &mut new))?;
        Ok(Tensor(new))
    }
}

impl PartialEq for Tensor {
    fn eq(&self, other: &Self) -> bool {
        let Ok((me_dt, me_shape, me_data)) = self.as_bytes() else { return false };
        let Ok((other_dt, other_shape, other_data)) = other.as_bytes() else { return false };
        me_dt == other_dt && me_shape == other_shape && me_data == other_data
    }
}

tensor_from_to_ndarray!();

// FACT
wrapper!(Fact, TractFact, tract_fact_destroy);

impl Fact {
    fn new(model: &Model, spec: impl ToString) -> Result<Fact> {
        let cstr = CString::new(spec.to_string())?;
        let mut fact = null_mut();
        check!(sys::tract_model_parse_fact(model.0, cstr.as_ptr(), &mut fact))?;
        Ok(Fact(fact))
    }

    fn dump(&self) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_fact_dump(self.0, &mut ptr))?;
        unsafe {
            let s = CStr::from_ptr(ptr).to_owned();
            sys::tract_free_cstring(ptr);
            Ok(s.to_str()?.to_owned())
        }
    }
}

impl FactInterface for Fact {
    type Dim = Dim;

    fn datum_type(&self) -> Result<DatumType> {
        let mut dt = 0u32;
        check!(sys::tract_fact_datum_type(self.0, &mut dt as *const u32 as _))?;
        Ok(unsafe { std::mem::transmute::<u32, DatumType>(dt) })
    }

    fn rank(&self) -> Result<usize> {
        let mut rank = 0;
        check!(sys::tract_fact_rank(self.0, &mut rank))?;
        Ok(rank)
    }

    fn dim(&self, axis: usize) -> Result<Self::Dim> {
        let mut ptr = null_mut();
        check!(sys::tract_fact_dim(self.0, axis, &mut ptr))?;
        Ok(Dim(ptr))
    }
}

impl std::fmt::Display for Fact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self.dump() {
            Ok(s) => f.write_str(&s),
            Err(_) => Err(std::fmt::Error),
        }
    }
}

// INFERENCE FACT
wrapper!(InferenceFact, TractInferenceFact, tract_inference_fact_destroy);

impl InferenceFact {
    fn new(model: &InferenceModel, spec: impl ToString) -> Result<InferenceFact> {
        let cstr = CString::new(spec.to_string())?;
        let mut fact = null_mut();
        check!(sys::tract_inference_fact_parse(model.0, cstr.as_ptr(), &mut fact))?;
        Ok(InferenceFact(fact))
    }

    fn dump(&self) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_inference_fact_dump(self.0, &mut ptr))?;
        unsafe {
            let s = CStr::from_ptr(ptr).to_owned();
            sys::tract_free_cstring(ptr);
            Ok(s.to_str()?.to_owned())
        }
    }
}

impl InferenceFactInterface for InferenceFact {
    fn empty() -> Result<InferenceFact> {
        let mut fact = null_mut();
        check!(sys::tract_inference_fact_empty(&mut fact))?;
        Ok(InferenceFact(fact))
    }
}

impl Default for InferenceFact {
    fn default() -> Self {
        Self::empty().unwrap()
    }
}

impl std::fmt::Display for InferenceFact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self.dump() {
            Ok(s) => f.write_str(&s),
            Err(_) => Err(std::fmt::Error),
        }
    }
}

as_inference_fact_impl!(InferenceModel, InferenceFact);
as_fact_impl!(Model, Fact);

// Dim
wrapper!(Dim, TractDim, tract_dim_destroy);

impl Dim {
    fn dump(&self) -> Result<String> {
        let mut ptr = null_mut();
        check!(sys::tract_dim_dump(self.0, &mut ptr))?;
        unsafe {
            let s = CStr::from_ptr(ptr).to_owned();
            sys::tract_free_cstring(ptr);
            Ok(s.to_str()?.to_owned())
        }
    }
}

impl DimInterface for Dim {
    fn eval(&self, values: impl IntoIterator<Item = (impl AsRef<str>, i64)>) -> Result<Self> {
        let (names, values): (Vec<_>, Vec<_>) = values.into_iter().unzip();
        let c_strings: Vec<CString> =
            names.into_iter().map(|a| Ok(CString::new(a.as_ref())?)).collect::<Result<_>>()?;
        let ptrs: Vec<_> = c_strings.iter().map(|cs| cs.as_ptr()).collect();
        let mut ptr = null_mut();
        check!(sys::tract_dim_eval(self.0, ptrs.len(), ptrs.as_ptr(), values.as_ptr(), &mut ptr))?;
        Ok(Dim(ptr))
    }

    fn to_int64(&self) -> Result<i64> {
        let mut i = 0;
        check!(sys::tract_dim_to_int64(self.0, &mut i))?;
        Ok(i)
    }
}

impl std::fmt::Display for Dim {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self.dump() {
            Ok(s) => f.write_str(&s),
            Err(_) => Err(std::fmt::Error),
        }
    }
}


================================================
FILE: api/proxy/sys/Cargo.toml
================================================
[package]
name = "tract-proxy-sys"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/sonos/tract"
keywords = [ "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true
include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*", "build.rs", "tract.h" ]

[build-dependencies]
bindgen = "0.72.1"


================================================
FILE: api/proxy/sys/build.rs
================================================
use std::env;
use std::path::PathBuf;

fn main() {
    println!("cargo:rerun-if-env-changed=TRACT_DYLIB_SEARCH_PATH");
    println!("cargo:rerun-if-changed=tract.h");
    if let Ok(path) = std::env::var("TRACT_DYLIB_SEARCH_PATH") {
        println!("cargo:rustc-link-search={path}");
    }
    println!("cargo:rustc-link-lib=tract");

    let bindings = bindgen::Builder::default()
        .header("tract.h")
        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
        .generate()
        .expect("Unable to generate bindings");

    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
    bindings.write_to_file(out_path.join("bindings.rs")).expect("Couldn't write bindings!");
}


================================================
FILE: api/proxy/sys/src/lib.rs
================================================
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(improper_ctypes)]
#![allow(deref_nullptr)]
#![allow(unsafe_op_in_unsafe_fn)]
#![allow(clippy::redundant_static_lifetimes)]
#![allow(clippy::useless_transmute)]

include!(concat!(env!("OUT_DIR"), "/bindings.rs"));


================================================
FILE: api/proxy/sys/tract.h
================================================
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
    typedef enum DatumType {
      TRACT_DATUM_TYPE_BOOL = 1,
      TRACT_DATUM_TYPE_U8 = 17,
      TRACT_DATUM_TYPE_U16 = 18,
      TRACT_DATUM_TYPE_U32 = 20,
      TRACT_DATUM_TYPE_U64 = 24,
      TRACT_DATUM_TYPE_I8 = 33,
      TRACT_DATUM_TYPE_I16 = 34,
      TRACT_DATUM_TYPE_I32 = 36,
      TRACT_DATUM_TYPE_I64 = 40,
      TRACT_DATUM_TYPE_F16 = 50,
      TRACT_DATUM_TYPE_F32 = 52,
      TRACT_DATUM_TYPE_F64 = 56,
      TRACT_DATUM_TYPE_COMPLEX_I16 = 66,
      TRACT_DATUM_TYPE_COMPLEX_I32 = 68,
      TRACT_DATUM_TYPE_COMPLEX_I64 = 72,
      TRACT_DATUM_TYPE_COMPLEX_F16 = 82,
      TRACT_DATUM_TYPE_COMPLEX_F32 = 84,
      TRACT_DATUM_TYPE_COMPLEX_F64 = 88,
    } DatumType;


/**
 * Used as a return type of functions that can encounter errors.
 * If the function encountered an error, you can retrieve it using the `tract_get_last_error`
 * function
 */
typedef enum TRACT_RESULT {
  /**
   * The function returned successfully
   */
  TRACT_RESULT_OK = 0,
  /**
   * The function returned an error
   */
  TRACT_RESULT_KO = 1,
} TRACT_RESULT;

/**
 * Dim
 */
typedef struct TractDim TractDim;

typedef struct TractFact TractFact;

typedef struct TractInferenceFact TractInferenceFact;

typedef struct TractInferenceModel TractInferenceModel;

typedef struct TractModel TractModel;

typedef struct TractNnef TractNnef;

typedef struct TractOnnx TractOnnx;

typedef struct TractRunnable TractRunnable;

typedef struct TractRuntime TractRuntime;

typedef struct TractState TractState;

typedef struct TractTensor TractTensor;

/**
 * Retrieve the last error that happened in this thread. A function encountered an error if
 * its return type is of type `TRACT_RESULT` and it returned `TRACT_RESULT_KO`.
 *
 * # Return value
 *  It returns a pointer to a null-terminated UTF-8 string that will contain the error description.
 *  Rust side keeps ownership of the buffer. It will be valid as long as no other tract calls is
 *  performed by the thread.
 *  If no error occured, null is returned.
 */
const char *tract_get_last_error(void);

/**
 * Returns a pointer to a static buffer containing a null-terminated version string.
 *
 * The returned pointer must not be freed.
 */
const char *tract_version(void);

/**
 * Frees a string allocated by libtract.
 */
void tract_free_cstring(char *ptr);

/**
 * Creates an instance of an NNEF framework and parser that can be used to load and dump NNEF models.
 *
 * The returned object should be destroyed with `tract_nnef_destroy` once the model
 * has been loaded.
 */
enum TRACT_RESULT tract_nnef_create(struct TractNnef **nnef);

enum TRACT_RESULT tract_nnef_enable_tract_core(struct TractNnef *nnef);

enum TRACT_RESULT tract_nnef_enable_tract_extra(struct TractNnef *nnef);

enum TRACT_RESULT tract_nnef_enable_tract_transformers(struct TractNnef *nnef);

enum TRACT_RESULT tract_nnef_enable_onnx(struct TractNnef *nnef);

enum TRACT_RESULT tract_nnef_enable_pulse(struct TractNnef *nnef);

enum TRACT_RESULT tract_nnef_enable_extended_identifier_syntax(struct TractNnef *nnef);

/**
 * Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded.
 */
enum TRACT_RESULT tract_nnef_destroy(struct TractNnef **nnef);

/**
 * Parse and load an NNEF model as a tract TypedModel.
 *
 * `path` is a null-terminated utf-8 string pointer. It can be an archive (tar or tar.gz file) or a
 * directory.
 */
enum TRACT_RESULT tract_nnef_load(const struct TractNnef *nnef,
                                  const char *path,
                                  struct TractModel **model);

/**
 * Parse and load an NNEF buffer as a tract TypedModel.
 *
 * `data` is a buffer pointer
 * `len` ise the buffer len
 */
enum TRACT_RESULT tract_nnef_load_buffer(const struct TractNnef *nnef,
                                         const void *data,
                                         uintptr_t len,
                                         struct TractModel **model);

/**
 * Dump a TypedModel as a NNEF tar file.
 *
 * `path` is a null-terminated utf-8 string pointer to the `.tar` file to be created.
 *
 * This function creates a plain, non-compressed, archive.
 */
enum TRACT_RESULT tract_nnef_write_model_to_tar(const struct TractNnef *nnef,
                                                const char *path,
                                                const struct TractModel *model);

/**
 * Dump a TypedModel as a NNEF .tar.gz file.
 *
 * `path` is a null-terminated utf-8 string pointer to the `.tar.gz` file to be created.
 */
enum TRACT_RESULT tract_nnef_write_model_to_tar_gz(const struct TractNnef *nnef,
                                                   const char *path,
                                                   const struct TractModel *model);

/**
 * Dump a TypedModel as a NNEF directory.
 *
 * `path` is a null-terminated utf-8 string pointer to the directory to be created.
 *
 * This function creates a plain, non-compressed, archive.
 */
enum TRACT_RESULT tract_nnef_write_model_to_dir(const struct TractNnef *nnef,
                                                const char *path,
                                                const struct TractModel *model);

/**
 * Creates an instance of an ONNX framework and parser that can be used to load models.
 *
 * The returned object should be destroyed with `tract_nnef_destroy` once the model
 * has been loaded.
 */
enum TRACT_RESULT tract_onnx_create(struct TractOnnx **onnx);

/**
 * Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded.
 */
enum TRACT_RESULT tract_onnx_destroy(struct TractOnnx **onnx);

/**
 * Parse and load an ONNX model as a tract InferenceModel.
 *
 * `path` is a null-terminated utf-8 string pointer. It must point to a `.onnx` model file.
 */
enum TRACT_RESULT tract_onnx_load(const struct TractOnnx *onnx,
                                  const char *path,
                                  struct TractInferenceModel **model);

/**
 * Parse and load an ONNX buffer as a tract InferenceModel.
 *
 * `data` is a buffer pointer
 * `len` ise the buffer len
 */
enum TRACT_RESULT tract_onnx_load_buffer(const struct TractOnnx *onnx,
                                         const void *data,
                                         uintptr_t len,
                                         struct TractInferenceModel **model);

/**
 * Query an InferenceModel input counts.
 */
enum TRACT_RESULT tract_inference_model_input_count(const struct TractInferenceModel *model,
                                                    uintptr_t *inputs);

/**
 * Query an InferenceModel output counts.
 */
enum TRACT_RESULT tract_inference_model_output_count(const struct TractInferenceModel *model,
                                                     uintptr_t *outputs);

/**
 * Query the name of a model input.
 *
 * The returned name must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_inference_model_input_name(const struct TractInferenceModel *model,
                                                   uintptr_t input,
                                                   char **name);

/**
 * Query the name of a model output.
 *
 * The returned name must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_inference_model_output_name(const struct TractInferenceModel *model,
                                                    uintptr_t output,
                                                    int8_t **name);

/**
 * Query a model input fact.
 */
enum TRACT_RESULT tract_inference_model_input_fact(const struct TractInferenceModel *model,
                                                   uintptr_t input_id,
                                                   struct TractInferenceFact **fact);

/**
 * Set an input fact of an InferenceModel.
 *
 * The `fact` argument is only borrowed by this function, it still must be destroyed.
 * `fact` can be set to NULL to erase the current output fact of the model.
 */
enum TRACT_RESULT tract_inference_model_set_input_fact(struct TractInferenceModel *model,
                                                       uintptr_t input_id,
                                                       const struct TractInferenceFact *fact);

/**
 * Query an output fact for an InferenceModel.
 *
 * The return model must be freed using `tract_inference_fact_destroy`.
 */
enum TRACT_RESULT tract_inference_model_output_fact(const struct TractInferenceModel *model,
                                                    uintptr_t output_id,
                                                    struct TractInferenceFact **fact);

/**
 * Set an output fact of an InferenceModel.
 *
 * The `fact` argument is only borrowed by this function, it still must be destroyed.
 * `fact` can be set to NULL to erase the current output fact of the model.
 */
enum TRACT_RESULT tract_inference_model_set_output_fact(struct TractInferenceModel *model,
                                                        uintptr_t output_id,
                                                        const struct TractInferenceFact *fact);

/**
 * Analyse an InferencedModel in-place.
 */
enum TRACT_RESULT tract_inference_model_analyse(struct TractInferenceModel *model);

/**
 * Transform a fully analysed InferenceModel to a TypedModel.
 *
 * This function takes ownership of the InferenceModel `model` whether it succeeds
 * or not. `tract_inference_model_destroy` must not be used on `model`.
 *
 * On the other hand, caller will be owning the newly created typed model.
 */
enum TRACT_RESULT tract_inference_model_into_model(struct TractInferenceModel **model,
                                                   struct TractModel **typed);

/**
 * Destroy an InferenceModel.
 */
enum TRACT_RESULT tract_inference_model_destroy(struct TractInferenceModel **model);

/**
 * Query an InferenceModel input counts.
 */
enum TRACT_RESULT tract_model_input_count(const struct TractModel *model, uintptr_t *inputs);

/**
 * Query an InferenceModel output counts.
 */
enum TRACT_RESULT tract_model_output_count(const struct TractModel *model, uintptr_t *outputs);

/**
 * Query the name of a model input.
 *
 * The returned name must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_model_input_name(const struct TractModel *model,
                                         uintptr_t input,
                                         char **name);

/**
 * Query the input fact of a model.
 *
 * Thre returned fact must be freed with tract_fact_destroy.
 */
enum TRACT_RESULT tract_model_input_fact(const struct TractModel *model,
                                         uintptr_t input_id,
                                         struct TractFact **fact);

/**
 * Query the name of a model output.
 *
 * The returned name must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_model_output_name(const struct TractModel *model,
                                          uintptr_t output,
                                          char **name);

/**
 * Query the output fact of a model.
 *
 * Thre returned fact must be freed with tract_fact_destroy.
 */
enum TRACT_RESULT tract_model_output_fact(const struct TractModel *model,
                                          uintptr_t input_id,
                                          struct TractFact **fact);

/**
 * Apply a transform to the model.
 */
enum TRACT_RESULT tract_model_transform(struct TractModel *model, const int8_t *transform);

/**
 * Perform a profile of the model using the provided inputs.
 */
enum TRACT_RESULT tract_runnable_profile_json(struct TractRunnable *model,
                                              struct TractTensor **inputs,
                                              int8_t **json);

/**
 * Convert a TypedModel into a TypedRunnableModel.
 *
 * This function transfers ownership of the `model` argument to the newly-created `runnable` model.
 *
 * Runnable are reference counted. When done, it should be released with `tract_runnable_release`.
 */
enum TRACT_RESULT tract_model_into_runnable(struct TractModel **model,
                                            struct TractRunnable **runnable);

/**
 * Query the number of properties in a model.
 */
enum TRACT_RESULT tract_model_property_count(const struct TractModel *model, uintptr_t *count);

/**
 * Query the properties names of a model.
 *
 * The "names" array should be big enough to fit `tract_model_property_count` string pointers.
 *
 * Each name will have to be freed using `tract_free_cstring`.
 */
enum TRACT_RESULT tract_model_property_names(const struct TractModel *model, int8_t **names);

/**
 * Query a property tensor in a model.
 */
enum TRACT_RESULT tract_model_property(const struct TractModel *model,
                                       const int8_t *name,
                                       struct TractTensor **tensor);

/**
 * Parse a fact specification string into an Fact.
 *
 * The returned fact must be free with `tract_fact_destroy`.
 */
enum TRACT_RESULT tract_model_parse_fact(struct TractModel *model,
                                         const char *spec,
                                         struct TractFact **fact);

/**
 * Destroy a TypedModel.
 */
enum TRACT_RESULT tract_model_destroy(struct TractModel **model);

/**
 * Creates an instance of a tract Runtime that can be used to run model on a specific
 * hardware / software stack (like a GPU).
 *
 * The returned object should be released with `tract_runtime_release`.
 */
enum TRACT_RESULT tract_runtime_for_name(const char *name, struct TractRuntime **nnef);

/**
 * Query the name of a Runtime.
 *
 * The returned name must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_runtime_name(const struct TractRuntime *runtime, char **name);

/**
 * Convert a Model into a Runnable for this Runtime.
 *
 * This function transfers ownership of the `model` argument to the newly-created `runnable` model.
 *
 * Runnable are reference counted. When done, it should be released with `tract_runnable_release`.
 */
enum TRACT_RESULT tract_runtime_prepare(const struct TractRuntime *runtime,
                                        struct TractModel **model,
                                        struct TractRunnable **runnable);

enum TRACT_RESULT tract_runtime_release(struct TractRuntime **runtime);

/**
 * Spawn a session state from a runnable model.
 *
 * This function does not take ownership of the `runnable` object, it can be used again to spawn
 * other state instances. The runnable object is internally reference counted, it will be
 * kept alive as long as any associated `State` exists (or as long as the `runnable` is not
 * explicitely release with `tract_runnable_release`).
 *
 * `state` is a newly-created object. It should ultimately be detroyed with `tract_state_destroy`.
 */
enum TRACT_RESULT tract_runnable_spawn_state(struct TractRunnable *runnable,
                                             struct TractState **state);

/**
 * Convenience function to run a stateless model.
 *
 * `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal
 * to the number of inputs of the models. The function does not take ownership of the input
 * tensors.
 * `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten
 * with pointers to output tensors. These tensors are under the responsiblity of the caller, it
 * will have to release them with `tract_tensor_destroy`.
 */
enum TRACT_RESULT tract_runnable_run(struct TractRunnable *runnable,
                                     struct TractTensor **inputs,
                                     struct TractTensor **outputs);

/**
 * Query a Runnable input counts.
 */
enum TRACT_RESULT tract_runnable_input_count(const struct TractRunnable *model, uintptr_t *inputs);

/**
 * Query an Runnable output counts.
 */
enum TRACT_RESULT tract_runnable_output_count(const struct TractRunnable *model,
                                              uintptr_t *outputs);

/**
 * Query the input fact of a runnable model.
 *
 * Thre returned fact must be freed with tract_fact_destroy.
 */
enum TRACT_RESULT tract_runnable_input_fact(const struct TractRunnable *runnable,
                                            uintptr_t input_id,
                                            struct TractFact **fact);

/**
 * Query the output fact of a runnable model.
 *
 * Thre returned fact must be freed with tract_fact_destroy.
 */
enum TRACT_RESULT tract_runnable_output_fact(const struct TractRunnable *runnable,
                                             uintptr_t output_id,
                                             struct TractFact **fact);

/**
 * Query the number of properties in a runnable model.
 */
enum TRACT_RESULT tract_runnable_property_count(const struct TractRunnable *model,
                                                uintptr_t *count);

/**
 * Query the properties names of a runnable model.
 *
 * The "names" array should be big enough to fit `tract_model_property_count` string pointers.
 *
 * Each name will have to be freed using `tract_free_cstring`.
 */
enum TRACT_RESULT tract_runnable_property_names(const struct TractRunnable *model, int8_t **names);

/**
 * Query a property tensor in a runnable model.
 */
enum TRACT_RESULT tract_runnable_property(const struct TractRunnable *model,
                                          const int8_t *name,
                                          struct TractTensor **tensor);

enum TRACT_RESULT tract_runnable_release(struct TractRunnable **runnable);

/**
 * Create a TractTensor from caller data and metadata.
 *
 * This call copies the data into tract space. All the pointers only need to be alive for the
 * duration of the call.
 *
 * rank is the number of dimensions of the tensor (i.e. the length of the shape vector).
 *
 * The returned tensor must be destroyed by `tract_tensor_destroy`.
 */
enum TRACT_RESULT tract_tensor_from_bytes(DatumType datum_type,
                                         uintptr_t rank,
                                         const uintptr_t *shape,
                                         void *data,
                                         struct TractTensor **tensor);

/**
 * Write a tensor as a debug string
 *
 * The returned string must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_tensor_dump(const struct TractTensor *tensor, char **spec);

/**
 * Convert a tensor to a new datum type.
 *
 * This function will perform a cheap shallow clone if the destination type is
 * the same as the current type, otherwise it returns a newly allocated tensor instead.
 *
 * In both cases, the returned tensor must be destroyed by `tract_tensor_destroy`.
 * The input tensor is not consumed, it still need to be destroyed.
 */
enum TRACT_RESULT tract_tensor_convert_to(const struct TractTensor *input,
                                         DatumType datum_type,
                                         struct TractTensor **output);

/**
 * Destroy a tensor.
 */
enum TRACT_RESULT tract_tensor_destroy(struct TractTensor **tensor);

/**
 * Inspect part of a tensor. Except `tensor`, all argument pointers can be null if only some specific bits
 * are required.
 */
enum TRACT_RESULT tract_tensor_as_bytes(struct TractTensor *tensor,
                                       DatumType *datum_type,
                                       uintptr_t *rank,
                                       const uintptr_t **shape,
                                       const void **data);

/**
 * Run a turn on a model state
 *
 * `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal
 * to the number of inputs of the models. The function does not take ownership of the input
 * tensors.
 * `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten
 * with pointers to output tensors. These tensors are under the responsiblity of the caller, it
 * will have to release them with `tract_tensor_destroy`.
 */
enum TRACT_RESULT tract_state_run(struct TractState *state,
                                  struct TractTensor **inputs,
                                  struct TractTensor **outputs);

/**
 * Query a State input counts.
 */
enum TRACT_RESULT tract_state_input_count(const struct TractState *state, uintptr_t *inputs);

/**
 * Query an State output counts.
 */
enum TRACT_RESULT tract_state_output_count(const struct TractState *state, uintptr_t *outputs);

enum TRACT_RESULT tract_state_destroy(struct TractState **state);

/**
 * Gets the rank (aka number of axes/dimensions) of a fact.
 */
enum TRACT_RESULT tract_fact_rank(const struct TractFact *fact, uintptr_t *rank);

/**
 * Extract the datum type of the fact.
 */
enum TRACT_RESULT tract_fact_datum_type(const struct TractFact *fact, DatumType *datum_type);

/**
 * Extract the dimension from one dimension of the fact.
 */
enum TRACT_RESULT tract_fact_dim(const struct TractFact *fact,
                                 uintptr_t axis,
                                 struct TractDim **dim);

/**
 * Write a fact as its specification string.
 *
 * The returned string must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_fact_dump(const struct TractFact *fact, char **spec);

enum TRACT_RESULT tract_fact_destroy(struct TractFact **fact);

/**
 * Parse a fact specification string into an InferenceFact.
 *
 * The returned fact must be free with `tract_inference_fact_destroy`.
 */
enum TRACT_RESULT tract_inference_fact_parse(struct TractInferenceModel *model,
                                             const char *spec,
                                             struct TractInferenceFact **fact);

/**
 * Creates an empty inference fact.
 *
 * The returned fact must be freed by the caller using tract_inference_fact_destroy
 */
enum TRACT_RESULT tract_inference_fact_empty(struct TractInferenceFact **fact);

/**
 * Write an inference fact as its specification string.
 *
 * The returned string must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_inference_fact_dump(const struct TractInferenceFact *fact, char **spec);

/**
 * Destroy a fact.
 */
enum TRACT_RESULT tract_inference_fact_destroy(struct TractInferenceFact **fact);

/**
 * Substitute symbols by the provided values in the Dim, generating a new one.
 */
enum TRACT_RESULT tract_dim_eval(const struct TractDim *dim,
                                 uintptr_t nb_symbols,
                                 const int8_t *const *symbols,
                                 const int64_t *values,
                                 struct TractDim **result);

/**
 * Try converting a Dim into an actual integer
 *
 * Will fail if the Dim contains symbols.
 */
enum TRACT_RESULT tract_dim_to_int64(const struct TractDim *fact, int64_t *i);

/**
 * Write a dim as its specification string.
 *
 * The returned string must be freed by the caller using tract_free_cstring.
 */
enum TRACT_RESULT tract_dim_dump(const struct TractDim *dim, char **spec);

/**
 * Destroy a dim.
 */
enum TRACT_RESULT tract_dim_destroy(struct TractDim **dim);


================================================
FILE: api/proxy/tests/mobilenet.rs
================================================
use tract_api::*;
use tract_proxy::*;

include!("../../tests/mobilenet/mod.rs");


================================================
FILE: api/py/.gitignore
================================================
__pycache__
*.so
*.egg-info
*.onnx
build
mobilenet_v2_1.0.onnx.nnef.tgz
rust-workspace
dist


================================================
FILE: api/py/MANIFEST.in
================================================
graft rust-workspace
graft docs


================================================
FILE: api/py/_static/redirect-index.html
================================================
<!DOCTYPE html>
<html>
<head>
  <meta http-equiv="refresh" content="0; url=docs/index.html">
  <script>window.location.href = "docs/index.html";</script>
</head>
<body>
  <p>Redirecting to <a href="docs/index.html">documentation</a>...</p>
</body>
</html>


================================================
FILE: api/py/_static/version-switcher.js
================================================
// Version switcher for multi-version gh-pages docs.
// Reads versions.json (mike-compatible format) from the site root and injects
// a <select> into the page.
// Format: [{"version": "0.22.0", "title": "0.22.0", "aliases": ["latest"]}, ...]
(function () {
  "use strict";

  // Resolve the site root from script src.
  // Script lives at <root>/<version>/_static/version-switcher.js
  var scriptSrc = document.currentScript && document.currentScript.src;
  if (!scriptSrc) return;

  var parts = scriptSrc.split("/");
  // pop "version-switcher.js?...", "_static", "<version>"
  parts.pop(); // filename
  parts.pop(); // _static
  var current = parts.pop(); // version directory name
  var siteRoot = parts.join("/");

  function injectSwitcher() {
    fetch(siteRoot + "/versions.json")
      .then(function (r) { return r.json(); })
      .then(function (versions) {
        var select = document.createElement("select");
        select.setAttribute("aria-label", "Version");
        select.style.cssText =
          "display:block;margin:0.5em auto;padding:4px 8px;" +
          "border:1px solid var(--color-sidebar-border,#ccc);" +
          "border-radius:4px;font-size:.85em;" +
          "background:var(--color-sidebar-background,#fff);" +
          "color:var(--color-sidebar-text,#333);cursor:pointer;";

        versions.forEach(function (v) {
          var opt = document.createElement("option");
          opt.value = siteRoot + "/" + v.version + "/";
          opt.textContent = v.title || v.version;
          if (v.version === current) opt.selected = true;
          select.appendChild(opt);
        });

        select.addEventListener("change", function () {
          window.location.href = this.value;
        });

        // Insert into Furo's sidebar brand area
        var target =
          document.querySelector(".sidebar-brand") ||
          document.querySelector(".sidebar-sticky") ||
          document.querySelector("header");
        if (target) {
          target.appendChild(select);
        }
      })
      .catch(function () {
        // No versions.json (local build) — silently skip.
      });
  }

  // Run immediately — script is at end of <body> so DOM is ready
  injectSwitcher();
})();


================================================
FILE: api/py/conf.py
================================================
project = "tract-python"
copyright = "Sonos"
author = "Sonos"

extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.napoleon",
    "sphinx.ext.viewcode",
    "myst_parser",
]

html_theme = "furo"
autodoc_member_order = "bysource"
napoleon_google_docstring = True

master_doc = "docs/index"

exclude_patterns = [
    "_build",
    "_static",
    ".venv",
    ".pytest_cache",
    "rust-workspace",
    "tract.egg-info",
    "tests",
    "setup.py",
]

html_static_path = ["_static"]
html_js_files = ["version-switcher.js"]


================================================
FILE: api/py/docs/fact.md
================================================
# Facts and Dimensions

```{eval-rst}
.. automodule:: tract.fact
   :members:

.. automodule:: tract.dim
   :members:
```


================================================
FILE: api/py/docs/index.md
================================================
# `tract` python bindings

`tract` is a library for neural network inference. While PyTorch and TensorFlow
deal with the much harder training problem, `tract` focuses on what happens once
the model in trained.

`tract` ultimate goal is to use the model on end-user data (aka "running the
model") as efficiently as possible, in a variety of possible deployments,
including some which are no completely mainstream : a lot of energy have been
invested in making `tract` an efficient engine to run models on ARM single board
computers.

```{toctree}
:hidden:

onnx
nnef
inference_model
model
fact
runnable
tensor
```

## API Reference

- [ONNX](onnx.md) — load ONNX models
- [NNEF](nnef.md) — load and save NNEF models
- [Inference model](inference_model.md) — partially typed model from ONNX
- [Model](model.md) — fully typed model, central to the cooking pipeline
- [Facts and Dimensions](fact.md) — shape, type, and symbolic dimension information
- [Runtime, Runnable and State](runnable.md) — runtimes (CPU, Metal, CUDA), execution, and stateful models
- [Tensor](tensor.md) — tensor data

## Getting started

### Install tract library

`pip install tract`. Prebuilt wheels are provided for x86-64 Linux and
Windows, x86-64 and arm64 for MacOS.

### Downloading the model

First we need to obtain the model. We will download an ONNX-converted MobileNET
2.7 from the ONNX model zoo.

`wget https://github.com/onnx/models/raw/main/vision/classification/mobilenet/model/mobilenetv2-7.onnx`.

### Preprocessing an image

Then we need a sample image. You can use pretty much anything. If you lack
inspiration, you can this picture of Grace Hopper.

`wget https://s3.amazonaws.com/tract-ci-builds/tests/grace_hopper.jpg`

We will be needing `pillow` to load the image and crop it.

`pip install pillow`

Now let's start our python script. We will want to use tract, obviously, but we
will also need PIL's Image and numpy to put the data in the form MobileNet expects it.

```python
#!/usr/bin/env python

import tract
import numpy
from PIL import Image
```

We want to load the image, crop it into its central square, then scale this
square to be 224x224.

```python
im = Image.open("grace_hopper.jpg")
if im.height > im.width:
    top_crop = int((im.height - im.width) / 2)
    im = im.crop((0, top_crop, im.width, top_crop + im.width))
else:
    left_crop = int((im.width - im.height) / 2)
    im = im.crop((left_crop, 0, left_crop + im_height, im.height))
im = im.resize((224, 224))
im = numpy.array(im)
```

At this stage, we obtain a 224x224x3 tensor of 8-bit positive integers. We need to transform
these integers to floats and normalize them for MobileNet.
At some point during this normalization, numpy decides to promote our tensor to
double precision, but our model is single precison, so we are converting it
again after the normalization.

```python
im = (im.astype(float) / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
im = im.astype(numpy.single)
```

Finally, ONNX variant of Mobilenet expects its input in NCHW convention, and
our data is in HWC. We need to move the C axis before H and W, then insert the
N at the left.

```python
im = numpy.moveaxis(im, 2, 0)
im = numpy.expand_dims(im, 0)
```

### Loading the model

Loading a model is relatively simple. We need to instantiate the ONNX loader
first, the we use it to load the model. Then we ask tract to optimize the model
and get it ready to run.

```python
model = tract.onnx().load("./mobilenetv2-7.onnx").into_model().into_runnable()
```

If we wanted to process several images, this would only have to be done once
out of our image loop.

### Running the model

tract run methods take a list of inputs and returns a list of outputs. Each input
can be a numpy array. The outputs are tract's own Tensor data type, which should
be converted to numpy array.

```python
outputs = model.run([im])
output = outputs[0].to_numpy()
```

### Interpreting the result

If we print the output, what we get is a array of 1000 values. Each value is
the score of our image on one of the 1000 categoris of ImageNet. What we want
is to find the category with the highest score.

```python
print(numpy.argmax(output))
```

If all goes according to plan, this should output the number 652. There is a copy
of ImageNet categories at the following URL, with helpful line numbering.

```
https://github.com/sonos/tract/blob/main/examples/nnef-mobilenet-v2/imagenet_slim_labels.txt
```

And... 652 is "microphone". Which is wrong. The trick is, the lines are
numbered from 1, while our results starts at 0, plus the label list includes a
"dummy" label first that should be ignored. So the right value is at the line
654: "military uniform". If you looked at the picture before you noticed that
Grace Hopper is in uniform on the picture, so it does make sense.

## Running on GPU

The getting started example above runs on the CPU, which is the default runtime.
On systems with an NVIDIA GPU, `tract` can leverage CUDA for accelerated
inference. The only change is in how the model is prepared: instead of calling
`into_runnable()`, use a CUDA runtime to prepare the model.

```python
import tract

# load and type the model as before
model = tract.onnx().load("./mobilenetv2-7.onnx").into_model()

# prepare it for the CUDA runtime
cuda = tract.runtime_for_name("cuda")
runnable = cuda.prepare(model)

# run exactly as before
outputs = runnable.run([im])
output = outputs[0].to_numpy()
```

The Metal runtime works the same way on Apple Silicon Macs: just replace
`"cuda"` with `"metal"`.

## Model cooking with `tract`

Over the years of `tract` development, it became clear that beside "training"
and "running", there was a third time in the life-cycle of a model. One of
our contributors nicknamed it "model cooking" and the term stuck. This extra stage
is about all what happens after the training and before running.

If training and Runtime are relatively easy to define, the model cooking gets a
bit less obvious. It comes from the realisation that the training form (an ONNX
or TensorFlow file or ste of files) of a model may is usually not the most
convenient form for running it. Every time a device loads a model in ONNX form
and transform it into a suitable form for runtime, it goes through the same
series or more or less complicated operations, that can amount to several
seconds of high-CPU usage for current models. When running the model on a
device, this can have several negative impact on experience: the device will
take time to start-up, consume a lot of battery energy to get ready, maybe fight
over CPU availability with other processes trying to get ready at the same
instant on the device.

As this sequence of operations is generally the same, it becomes relevant to
persist the model resulting of the transformation. It could be persisted at the
first application start-up for instance. But it could also be "prepared", or
"cooked" before distribution to the devices.

## Cooking to NNEF

`tract` supports NNEF. It can read a NNEF neural network and run it. But it can
also dump its preferred representation of a model in NNEF.

At this stage, a possible path to production for a neural model becomes can be drawn:
* model is trained, typically on big servers on the cloud, and exported to ONNX.
* model is cooked, simplified, using `tract` command line or python bindings.
* model is shipped to devices or servers in charge of running it.

## Testing and benching models early

As soon as the model is in ONNX form, `tract` can load and run it. It gives
opportunities to validate and test on the training system, asserting early on that
`tract` will compute at runtime the same result than what the training model
predicts, limiting the risk of late-minute surprise.

But tract command line can also be used to bench and profile an ONNX model on
the target system answering very early the "will the device be fast enough"
question. The nature of neural network is such that in many cases an
untrained model, or a poorly trained one will perform the same computations than
the final model, so it may be possible to bench the model for on-device
efficiency before going through a costly and long model training.

## tract-opl

NNEF is a pretty little standard. But we needed to go beyond it and we extended
it in several ways. For instance, NNEF does not provide syntax for recurring
neural network (LSTM and friends), which are an absolute must in signal and voice
processing. `tract` also supports symbolic dimensions, which are useful to
represent a late bound batch dimension (if you don't know in advance how many
inputs will have to be computed concurrently).

## Pulsing

For interactive applications where time plays a role (voice, signal, ...),
`tract` can automatically transform batch models, to equivalent streaming models
suitable for runtime. While batch models are presented at training time the
whole signal in one go, a streaming model received the signal by "pulse" and
produces step by step the same output that the batching model.

It does not work for every model, `tract` can obviously not generate a model
where the output at a time depends on input not received yet. Of course, models
have to be *causal* to be pulsable. For instance, a bi-directional LSTM is not
pulsable. Most convolution nets can be made causal at designe time by padding,
or at cooking time by adding fixed delays.

This cooking step is a recurring annoyance in the real-time voice and signal
field : it can be done manually, but is very easy to get wrong. `tract` makes
it automactic.


================================================
FILE: api/py/docs/inference_model.md
================================================
# Inference model

```{eval-rst}
.. automodule:: tract.inference_model
   :members:
```


================================================
FILE: api/py/docs/model.md
================================================
# Model (aka Typed Model)

```{eval-rst}
.. automodule:: tract.model
   :members:
```


================================================
FILE: api/py/docs/nnef.md
================================================
# NNEF

```{eval-rst}
.. automodule:: tract.nnef
   :members:
```


================================================
FILE: api/py/docs/onnx.md
================================================
# ONNX

```{eval-rst}
.. automodule:: tract.onnx
   :members:
```


================================================
FILE: api/py/docs/runnable.md
================================================
# Runtime, Runnable and State

## Runtime

```{eval-rst}
.. autofunction:: tract.runtime.runtime_for_name

.. autoclass:: tract.runtime.Runtime
   :members:
```

## Runnable

```{eval-rst}
.. automodule:: tract.runnable
   :members:
```

## State

```{eval-rst}
.. automodule:: tract.state
   :members:
```


================================================
FILE: api/py/docs/tensor.md
================================================
# Tensor

```{eval-rst}
.. automodule:: tract.tensor
   :members:
```


================================================
FILE: api/py/pyproject.toml
================================================
[build-system]
requires = [
    "setuptools >=80, <83",
    "setuptools_rust >=1.12, <1.13",
    "wheel >=0.46, <0.47",
    "toml >=0.10, <0.11"
]

[tool.cibuildwheel]
build-frontend = "build[uv]"
environment = "PATH=$PATH:$HOME/.cargo/bin"
test-requires = "pytest"
test-command = """pytest {project}"""

[tool.cibuildwheel.linux]
skip = "*i686 cp???t-*"
before-build = """
set -ex
uv pip install --system "numpy>=2,<3" --config-settings=setup-args="-Dallow-noblas=true"
cargo --version || (curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal
. $HOME/.cargo/env
rustup toolchain add stable
rustup default stable)
[ -e $HOME/.local/bin/sccache ] || ./.travis/setup-sccache.sh ]
"""
environment = """
PATH=$HOME/.local/bin:$HOME/.cargo/bin:$PATH
SCCACHE_DIR=$HOME/.cache/sccache
SCCACHE_CACHE_SIZE=2G
RUSTC_WRAPPER=sccache
"""

[tool.cibuildwheel.macos]
archs = "x86_64 universal2 arm64"
skip = "pp* cp???t-*"
before-build = """
uv pip install --system "numpy>=2,<3" --config-settings=setup-args="-Dallow-noblas=true"
rustup target add aarch64-apple-darwin
[ -e $HOME/.local/bin/sccache ] || ./.travis/setup-sccache.sh ]
"""

[tool.cibuildwheel.windows]
before-build = """
choco install mingw --version=8.1.0
uv pip install --system "numpy==1.25.2"
"""
skip = "*-win32 pp* cp???t*"


================================================
FILE: api/py/requirements-docs.txt
================================================
sphinx
furo
myst-parser


================================================
FILE: api/py/requirements.txt
================================================
numpy==1.26.4
setuptools==82.0.1
setuptools_rust==1.12.1
toml==0.10.2


================================================
FILE: api/py/setup.py
================================================
from setuptools import setup
from setuptools_rust import Binding, RustExtension
import shutil
import toml
import re
import os

if not os.path.exists("rust-workspace"):
    shutil.copytree(
        "../..",
        "rust-workspace",
        ignore = shutil.ignore_patterns(".cached", "target", ".git", "issue-*", ".travis", "assets", ".github", "py")
    )

version = os.environ.get("PYPI_VERSION_OVERRIDE")
if version is None or version == "":
    version = toml.load("rust-workspace/api/Cargo.toml")["package"]["version"]
    version = re.sub("\-alpha\.", "a", version)
    version = re.sub("\\-dev(\\.)?", ".dev", version)

with open('docs/index.md', 'r') as file:
    readme = file.read()

build_requires = toml.load("pyproject.toml")["build-system"]["requires"]

setup(
        name="tract",
        author="Mathieu Poumeyrol, Sonos, and tract contributors",
        author_email="mathieu@poumeyrol.fr",
        keywords="onnx tensorflow nnef runtime neural network",
        version=version,
        description="Python bindings for tract, a neural network inference engine",
        project_urls={
            "Documentation": "https://sonos.github.io/tract",
            "Source": "https://github.com/sonos/tract",
        },
        license="(Apache-2.0 OR MIT)",
        long_description=readme,
        long_description_content_type="text/markdown",
        options={"bdist_wheel": {"universal": True}},
        classifiers=[
            "Programming Language :: Python :: 3",
            "Programming Language :: Python :: 3.9",
            "Programming Language :: Python :: 3.10",
            "Programming Language :: Python :: 3.11",
            "Programming Language :: Python :: 3.12",
            "Programming Language :: Python :: 3.13",
            "Programming Language :: Rust",
            "Topic :: Scientific/Engineering :: Mathematics",
            "Topic :: Scientific/Engineering :: Artificial Intelligence",
            "License :: OSI Approved :: Apache Software License",
            "License :: OSI Approved :: MIT License"
            ],
        rust_extensions=[RustExtension("tract.tract", binding=Binding.NoBinding, path="rust-workspace/api/ffi/Cargo.toml")],
        packages=["tract"],
        zip_safe=False,
        python_requires=">=3.9",
        install_requires=[ "numpy" ],
        extras_require={
            "test": ["pytest"],
            "dev": build_requires + ["pytest"],
        },
)


================================================
FILE: api/py/tests/mobilenet_onnx_test.py
================================================
import tract
import numpy
import urllib.request
import tempfile
import json
import pytest
from pathlib import Path

def setup_module(module):
    if not Path("mobilenetv2-7.onnx").exists():
        urllib.request.urlretrieve(
            "https://s3.amazonaws.com/tract-ci-builds/tests/mobilenetv2-7.onnx",
            "mobilenetv2-7.onnx",
        )
    if not Path("mobilenet_v2_1.0.onnx.nnef.tgz").exists():
        urllib.request.urlretrieve(
            "https://s3.amazonaws.com/tract-ci-builds/tests/mobilenet_v2_1.0.onnx.nnef.tgz",
            "mobilenet_v2_1.0.onnx.nnef.tgz"
        )

def grace_hopper_1x3x224x244():
    return numpy.load(Path(__file__).parent.parent / "grace_hopper_1x3x224x244.npy")

def test_version():
    tract.version()

def test_onnx():
    model = (
        tract.onnx()
        .load("./mobilenetv2-7.onnx")
        .into_model()
        .into_runnable()
    )
    result = model.run([grace_hopper_1x3x224x244()])
    confidences = result[0].to_numpy()
    assert numpy.argmax(confidences) == 652

def test_state():
    model = (
        tract.onnx()
        .load("./mobilenetv2-7.onnx")
        .into_model()
        .into_runnable()
    )
    state = model.spawn_state()
    result = state.run([grace_hopper_1x3x224x244()])
    confidences = result[0].to_numpy()
    assert numpy.argmax(confidences) == 652

def test_nnef_register():
    tract.nnef().with_tract_core().with_onnx().with_pulse().with_tract_extra()

def test_nnef():
    model = (
        tract.nnef()
        .load("mobilenet_v2_1.0.onnx.nnef.tgz")
        .into_runnable()
    )
    result = model.run([grace_hopper_1x3x224x244()])
    confidences = result[0].to_numpy()
    assert numpy.argmax(confidences) == 652

def test_inference_model():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    assert model.input_count() == 1
    assert model.output_count() == 1
    assert model.input_name(0) == "data"
    assert model.output_name(0) == "mobilenetv20_output_flatten0_reshape0"
    assert str(model.input_fact(0)) == "1,3,224,224,f32"
    model.set_input_fact(0, "B,3,224,224,f32")
    model.set_output_fact(0, None)
    model.analyse()
    assert str(model.output_fact(0)) == "B,1000,f32"
    typed = model.into_model()


def test_typed_model():
    model = tract.nnef().load("mobilenet_v2_1.0.onnx.nnef.tgz")
    assert model.input_count() == 1
    assert model.output_count() == 1
    assert model.input_name(0) == "data"
    assert model.output_name(0) == "conv_53"
    assert str(model.input_fact(0)) == "1,3,224,224,f32"
    assert str(model.output_fact(0)) == "1,1000,f32"

def test_runtime():
    model = tract.nnef().load("mobilenet_v2_1.0.onnx.nnef.tgz")
    rt = tract.runtime_for_name("default")
    runnable = rt.prepare(model)
    result = runnable.run([grace_hopper_1x3x224x244()])
    confidences = result[0].to_numpy()
    assert numpy.argmax(confidences) == 652


def test_concretize():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    assert str(typed.input_fact(0)) == "B,3,224,224,f32"
    assert str(typed.output_fact(0)) == "B,1000,f32"
    typed.transform(tract.ConcretizeSymbols({"B": 1}))
    assert str(typed.input_fact(0)) == "1,3,224,224,f32"
    assert str(typed.output_fact(0)) == "1,1000,f32"

def test_concretize_builder():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform(tract.ConcretizeSymbols().value("B", 1))
    assert str(typed.input_fact(0)) == "1,3,224,224,f32"
    assert str(typed.output_fact(0)) == "1,1000,f32"

def test_concretize_raw_string():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform('{"name":"concretize_symbols","values":{"B":1}}')
    assert str(typed.input_fact(0)) == "1,3,224,224,f32"
    assert str(typed.output_fact(0)) == "1,1000,f32"

def test_pulse():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    assert str(typed.input_fact(0)) == "B,3,224,224,f32"
    assert str(typed.output_fact(0)) == "B,1000,f32"
    typed.transform(tract.Pulse("5", symbol="B"))
    assert str(typed.input_fact(0)) == "5,3,224,224,f32"
    assert str(typed.output_fact(0)) == "5,1000,f32"
    properties = typed.property_keys()
    properties.sort()
    assert properties == ["pulse.delay", "pulse.input_axes", "pulse.output_axes"]
    assert typed.property("pulse.delay").to_numpy() == [0]

def test_pulse_builder():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform(tract.Pulse("5").symbol("B"))
    assert str(typed.input_fact(0)) == "5,3,224,224,f32"
    assert str(typed.output_fact(0)) == "5,1000,f32"

def test_pulse_raw_string():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform('{"name":"pulse","symbol":"B","pulse":"5"}')
    assert str(typed.input_fact(0)) == "5,3,224,224,f32"
    assert str(typed.output_fact(0)) == "5,1000,f32"

def test_runtime_fact():
    runnable = tract.nnef().load("mobilenet_v2_1.0.onnx.nnef.tgz").into_runnable()
    assert str(runnable.input_fact(0)) ==  "1,3,224,224,f32"
    assert str(runnable.output_fact(0)) == "1,1000,f32"

def test_runtime_properties():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform(tract.Pulse("5", symbol="B"))
    runnable = typed.into_runnable()
    properties = runnable.property_keys()
    properties.sort()
    assert properties == ["pulse.delay", "pulse.input_axes", "pulse.output_axes"]
    assert runnable.property("pulse.delay").to_numpy() == [0]

def test_f32_to_f16():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "1,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform(tract.FloatPrecision(tract.DatumType.F32, tract.DatumType.F16))
    assert str(typed.input_fact(0)) == "1,3,224,224,f16"
    assert str(typed.output_fact(0)) == "1,1000,f16"

def test_f32_to_f16_raw_string():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "1,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform("f32_to_f16")
    assert str(typed.input_fact(0)) == "1,3,224,224,f16"
    assert str(typed.output_fact(0)) == "1,1000,f16"

def test_f16_to_f32():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "1,3,224,224,f32")
    model.analyse()

    #Convert model to half
    typed = model.into_model()
    typed.transform(tract.FloatPrecision(tract.DatumType.F32, tract.DatumType.F16))
    assert str(typed.input_fact(0)) == "1,3,224,224,f16"
    assert str(typed.output_fact(0)) == "1,1000,f16"

    # Convert back to f32
    typed.transform(tract.FloatPrecision(tract.DatumType.F16, tract.DatumType.F32))
    assert str(typed.input_fact(0)) == "1,3,224,224,f32"
    assert str(typed.output_fact(0)) == "1,1000,f32"

def test_f16_to_f32_raw_string():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "1,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    typed.transform("f32_to_f16")
    typed.transform("f16_to_f32")
    assert str(typed.input_fact(0)) == "1,3,224,224,f32"
    assert str(typed.output_fact(0)) == "1,1000,f32"

def test_typed_model_to_nnef_and_back():
    model = tract.onnx().load("./mobilenetv2-7.onnx")
    model.set_input_fact(0, "B,3,224,224,f32")
    model.analyse()
    typed = model.into_model()
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdirname = Path(tmpdirname)
        nnef = tract.nnef().with_tract_core()

        path = tmpdirname / "nnef-dir"
        nnef.write_model_to_dir(typed, path)
        reloaded = nnef.load(path)
        assert str(reloaded.input_fact(0)) == "B,3,224,224,f32"
        assert str(reloaded.output_fact(0)) == "B,1000,f32"

        path = tmpdirname / "nnef.tar"
        nnef.write_model_to_tar(typed, path)
        reloaded = nnef.load(path)
        assert str(reloaded.input_fact(0)) == "B,3,224,224,f32"
        assert str(reloaded.output_fact(0)) == "B,1000,f32"

        path = tmpdirname / "nnef.tar.gz"
        nnef = nnef.with_extended_identifier_syntax()
        nnef.write_model_to_tar_gz(typed, path)
        reloaded = nnef.load(path)
        assert str(reloaded.input_fact(0)) == "B,3,224,224,f32"
        assert str(reloaded.output_fact(0)) == "B,1000,f32"

def test_cost():
    model = tract.nnef().load("mobilenet_v2_1.0.onnx.nnef.tgz")
    assert str(model.input_fact(0)) == "1,3,224,224,f32"
    runnable = model.into_runnable()
    profile = runnable.profile_json(None)
    profile = json.loads(profile)
    assert len(profile["nodes"]) > 10
    assert profile["nodes"][0]["node_name"] != ""
    assert profile["nodes"][0]["op_name"] != ""
    assert next(filter(lambda node: "cost" in node and "FMA(F32)" in node["cost"], profile["nodes"]), None) != None

def test_profile():
    model = tract.nnef().load("mobilenet_v2_1.0.onnx.nnef.tgz")
    assert str(model.input_fact(0)) == "1,3,224,224,f32"
    runnable = model.into_runnable()
    data = numpy.random.rand(1,3,224,224).astype(dtype="float32")
    profile = runnable.profile_json([data])
    profile = json.loads(profile)
    profiling_info = profile["profiling_info"]
    assert profiling_info["iterations"] >= 1
    assert len(profile["nodes"]) > 10
    assert profile["nodes"][0]["node_name"] != ""
    assert profile["nodes"][0]["op_name"] != ""
    if "secs_per_iter" in profile["nodes"][0]:
        assert profile["nodes"][0]["secs_per_iter"] >= 0
    assert next(filter(lambda node: "cost" in node and "FMA(F32)" in node["cost"], profile["nodes"]), None) != None

def test_transform_registry():
    nnef = tract.nnef().with_tract_core()
    model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")

    #Convert model to half
    model.transform("f32_to_f16")
    assert str(model.input_fact(0)) == "1,3,224,224,f16"
    assert str(model.output_fact(0)) == "1,1000,f16"
    
    # Convert back to f32 
    model.transform("f16_to_f32")
    assert str(model.input_fact(0)) == "1,3,224,224,f32"

def test_fact_and_dims():
    nnef = tract.nnef().with_tract_core()
    model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")
    fact = model.parse_fact("B,S+P,64,f32")
    assert fact.datum_type() == tract.DatumType.F32
    assert fact.rank() == 3
    assert str(fact.dim(1)) == "S+P"
    s_plus_p = fact.dim(1)
    s_plus_twelve = s_plus_p.eval({ "P": 12 })
    assert str(s_plus_twelve) == "S+12"
    fourteen = s_plus_twelve.eval({"S": 2})
    assert fourteen.to_int64() == 14
    assert int(fourteen) == 14

def test_fact_and_dims_iterators():
    nnef = tract.nnef().with_tract_core()
    model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")
    facts = model.input_facts()
    assert len(facts) == 1
    dims = facts[0].dims()
    assert len(dims) == 4
    assert int(dims[0]) == 1
    assert int(dims[1]) == 3
    assert int(dims[2]) == 224
    assert int(dims[3]) == 224

def test_runtime_fact_iterator():
    nnef = tract.nnef().with_tract_core()
    runnable = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz").into_runnable()
    inputs = runnable.input_facts();
    assert len(inputs) == 1
    assert str(inputs[0]) == "1,3,224,224,f32"
    outputs = runnable.output_facts();
    assert len(outputs) == 1
    assert str(outputs[0]) == "1,1000,f32"

def test_value_method():
    floats = tract.Tensor.from_numpy(numpy.array([-1, -0.3, 0., 0.25, 0.75, 1.2], dtype=numpy.float32))
    assert floats.datum_type().is_float()
    ints = floats.convert_to(tract.DatumType.I8)
    assert ints.datum_type().is_signed()
    assert numpy.array_equal(ints.to_numpy(), [-1, 0, 0, 0, 0, 1])
    same = tract.Tensor.from_numpy(numpy.array([-1, -0.3, 0., 0.25, 0.75, 1.2], dtype=numpy.float32))
    assert floats == same
    halves = ints.convert_to(tract.DatumType.F16)
    print(halves)
    print(halves.to_numpy())
    assert numpy.array_equal(halves.to_numpy(), [-1, 0, 0, 0, 0, 1])


================================================
FILE: api/py/tract/__init__.py
================================================
"""
`tract` Python bindings library

`tract` is a neural network inference engine.
Its main purpose is to *run* a neural network on production premises after it has been trained.
It is a native library written in Rust, and specific attention has been given to its performance
in sound streaming applications in embedded context (typically ARM Cortex-A CPUs), but it is meant
to be a generic engine, performing more than decently with various loads on most architectures. You
can use it to run image categorization on a PC.

```python
import tract

# load MobileNet version 2, an image categorization model
model = (
    tract.onnx()
    .load("./mobilenetv2-7.onnx")
    .into_model()
    .into_runnable()
)

# load, as a numpy array, a picture of Grace Hopper, wearing her military uniform
grace_hopper_1x3x224x244 = numpy.load("grace_hopper_1x3x224x244.npy")

# run the image through Mobilenet in tract
result = model.run([grace_hopper_1x3x224x244])

# output is an array of confidence for each class of the ImageNet challenge
confidences = result[0].to_numpy()

# class 652 is "military uniform"
assert numpy.argmax(confidences) == 652
```

`tract` can also be used as a "model cooking" toolbox: once a model has been trained, it is
sometimes useful to perform some transformations, simplifications and optimizations before shipping
it. These bindings offer access to some of `tract` cooking facilities.
"""

import numpy
from ctypes import *
from pathlib import Path
from typing import Dict, List, Union

from .bindings import check, lib, TractError
from .tensor import Tensor, DatumType
from .fact import Fact, InferenceFact
from .model import Model
from .inference_model import InferenceModel
from .runnable import Runnable
from .runtime import Runtime, runtime_for_name
from .transform import TransformSpec, ConcretizeSymbols, FloatPrecision, Pulse
from .nnef import Nnef
from .onnx import Onnx

def version() -> str:
    """Return the version string of `tract` native library"""
    return str(lib.tract_version(), "utf-8")

def nnef() -> Nnef:
    """Return a newly-created NNEF context for loading and saving models"""
    return Nnef()

def onnx() -> Onnx:
    """Return a newly-created ONNX context for loading models"""
    return Onnx()


================================================
FILE: api/py/tract/bindings.py
================================================
from ctypes import *
from pathlib import Path

class TractError(Exception):
    pass

if len(list(Path(__file__).parent.glob("*.so"))) > 0:
    dylib_path = list(Path(__file__).parent.glob("*.so"))[0]
elif len(list(Path(__file__).parent.glob("*.pyd"))) > 0:
    dylib_path = list(Path(__file__).parent.glob("*.pyd"))[0]
else:
    raise TractError("Can not find dynamic library")

lib = cdll.LoadLibrary(str(dylib_path))

lib.tract_version.restype = c_char_p
lib.tract_get_last_error.restype = c_char_p
lib.tract_free_cstring.restype = None


def check(err):
    if err != 0:
        raise TractError(str(lib.tract_get_last_error(), "utf-8"))


================================================
FILE: api/py/tract/dim.py
================================================
from ctypes import *
from tract.bindings import TractError, check, lib
from typing import Dict

class Dim:
    """
    A possibly symbolic dimension of a tensor.

    Dimensions can be concrete integers or symbolic expressions (e.g. ``N``,
    ``N+1``). Use :meth:`to_int64` to extract a concrete value, or :meth:`eval`
    to substitute symbols.
    """

    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        if self.ptr:
            check(lib.tract_dim_destroy(byref(self.ptr)))

    def __str__(self):
        return self.dump()

    def __int__(self):
        return self.to_int64()

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid dim")

    def dump(self) -> str:
        """Return a human-readable representation of the dimension."""
        self._valid()
        cstring = c_char_p();
        check(lib.tract_dim_dump(self.ptr, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def eval(self, values: Dict[str, int]) -> "Dim":
        """Substitute symbols by concrete values and return the resulting dimension."""
        self._valid()
        nb = len(values)
        names_str = []
        names = (c_char_p * nb)()
        values_list = (c_int64 * nb)()
        for ix, (k, v) in enumerate(values.items()):
            names_str.append(str(k).encode("utf-8"))
            names[ix] = names_str[ix]
            values_list[ix] = v
        ptr = c_void_p()
        check(lib.tract_dim_eval(self.ptr, c_size_t(nb), names, values_list, byref(ptr)))
        return Dim(ptr)

    def to_int64(self) -> int:
        """Convert to a concrete integer. Raises if the dimension is symbolic."""
        self._valid()
        i = c_int64()
        check(lib.tract_dim_to_int64(self.ptr, byref(i)))
        return i.value


================================================
FILE: api/py/tract/fact.py
================================================
from ctypes import *
from tract.bindings import TractError, check, lib
from .dim import Dim

class InferenceFact:
    """
    Tract inference fact, to be used with InferenceModel.

    It can represent partial type and shape information of a Tensor during model analysis.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        if self.ptr:
            check(lib.tract_inference_fact_destroy(byref(self.ptr)))

    def __str__(self):
        return self.dump()

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid inference fact (maybe already consumed ?)")

    def dump(self) -> str:
        """Return a human-readable representation of the fact."""
        self._valid()
        cstring = c_char_p();
        check(lib.tract_inference_fact_dump(self.ptr, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

class Fact:
    """
    Tract-core fact, to be used with Model.

    It always contains the full shape (sometimes using symbolic dimensions) and item type.
    In some situation it can also contain the constant value of the associated tensor.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        if self.ptr:
            check(lib.tract_fact_destroy(byref(self.ptr)))

    def __str__(self):
        return self.dump()

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid fact (maybe already consumed ?)")

    def datum_type(self) -> int:
        """Return the element type of the fact as a ``DatumType`` integer."""
        self._valid()
        dt = c_uint32()
        check(lib.tract_fact_datum_type(self.ptr, byref(dt)))
        return dt.value

    def rank(self) -> int:
        """Return the number of dimensions (axes) of the tensor."""
        self._valid()
        rank = c_size_t()
        check(lib.tract_fact_rank(self.ptr, byref(rank)))
        return rank.value

    def dim(self, axis: int) -> Dim:
        """Return the :class:`~tract.dim.Dim` for the given axis."""
        self._valid()
        ptr = c_void_p();
        check(lib.tract_fact_dim(self.ptr,  c_size_t(axis), byref(ptr)))
        return Dim(ptr)

    def dump(self) -> str:
        """Return a human-readable representation of the fact."""
        self._valid()
        cstring = c_char_p();
        check(lib.tract_fact_dump(self.ptr, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def dims(self):
        """Return a list of :class:`~tract.dim.Dim` for all axes."""
        return [ self.dim(axis) for axis in range(self.rank()) ]


================================================
FILE: api/py/tract/inference_model.py
================================================
from ctypes import *
from typing import Dict, List, Union
from .bindings import TractError, check, lib
from .fact import InferenceFact
from .model import Model

class InferenceModel:
    """
    ONNX models are loaded as ``InferenceModel`` instances instead of ``Model`` instances:
    many ONNX models come with partial shape and element type information, while tract's
    ``Model`` assumes full shape and element type knowledge. In this case, it is generally
    sufficient to inform tract about the input shape and type, then let tract *infer* the
    rest of the missing shape information before converting the ``InferenceModel`` to a
    regular ``Model``.

    .. code-block:: python

        # load the model as an InferenceModel
        model = tract.onnx().load("./mobilenetv2-7.onnx")

        # set the shape and type of its first and only input
        model.set_input_fact(0, "1,3,224,224,f32")

        # get ready to run the model
        model = model.into_model().into_runnable()
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        if self.ptr:
            check(lib.tract_inference_model_destroy(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid inference model (maybe already consumed ?)")

    def into_model(self) -> Model:
        """
        Convert an InferenceModel to a regular typed ``Model``.

        This will leave the opportunity to run more transformation on the intermediary form of the
        model, before optimising it all the way.
        """
        self._valid()
        model = c_void_p()
        check(lib.tract_inference_model_into_model(byref(self.ptr), byref(model)))
        return Model(model)

    def input_count(self) -> int:
        """Return the number of inputs of the model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_inference_model_input_count(self.ptr, byref(i)))
        return i.value

    def output_count(self) -> int:
        """Return the number of outputs of the model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_inference_model_output_count(self.ptr, byref(i)))
        return i.value

    def input_name(self, input_id: int) -> str:
        """Return the name of the ``input_id``-th input."""
        self._valid()
        cstring = c_char_p()
        check(lib.tract_inference_model_input_name(self.ptr, input_id, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def input_fact(self, input_id: int) -> InferenceFact:
        """Extract the InferenceFact of the ``input_id``-th input."""
        self._valid()
        fact = c_void_p()
        check(lib.tract_inference_model_input_fact(self.ptr, input_id, byref(fact)))
        return InferenceFact(fact)

    def set_input_fact(self, input_id: int, fact: Union[InferenceFact, str, None]) -> None:
        """Change the InferenceFact of the ``input_id``-th input."""
        self._valid()
        if isinstance(fact, str):
            fact = self.fact(fact)
        if fact == None:
            check(lib.tract_inference_model_set_input_fact(self.ptr, input_id, None))
        else:
            check(lib.tract_inference_model_set_input_fact(self.ptr, input_id, fact.ptr))

    def output_name(self, output_id: int) -> str:
        """Return the name of the ``output_id``-th output."""
        self._valid()
        cstring = c_char_p()
        check(lib.tract_inference_model_output_name(self.ptr, output_id, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def output_fact(self, output_id: int) -> InferenceFact:
        """Extract the InferenceFact of the ``output_id``-th output."""
        self._valid()
        fact = c_void_p()
        check(lib.tract_inference_model_output_fact(self.ptr, output_id, byref(fact)))
        return InferenceFact(fact)

    def set_output_fact(self, output_id: int, fact: Union[InferenceFact, str, None]) -> None:
        """Change the InferenceFact of the ``output_id``-th output."""
        self._valid()
        if isinstance(fact, str):
            fact = self.fact(fact)
        if fact == None:
            check(lib.tract_inference_model_set_output_fact(self.ptr, output_id, None))
        else:
            check(lib.tract_inference_model_set_output_fact(self.ptr, output_id, fact.ptr))

    def fact(self, spec:str) -> InferenceFact:
        """
        Parse a fact specification as an ``InferenceFact``.

        Typical ``InferenceFact`` specification is in the form "1,224,224,3,f32". Comma-separated
        list of dimension, one for each axis, plus an mnemonic for the element type. f32 is 
        single precision "float", i16 is a 16-bit signed integer, and u8 a 8-bit unsigned integer.
        """
        self._valid()
        spec = str(spec).encode("utf-8")
        fact = c_void_p();
        check(lib.tract_inference_fact_parse(self.ptr, spec, byref(fact)))
        return InferenceFact(fact)

    def analyse(self) -> None:
        """
        Perform shape and element type inference on the model.
        """
        self._valid()
        check(lib.tract_inference_model_analyse(self.ptr, False))

    def into_analysed(self) -> "InferenceModel":
        """
        Perform shape and element type inference on the model.
        """
        self.analyse()
        return self


================================================
FILE: api/py/tract/model.py
================================================
import numpy
from ctypes import *
from typing import Dict, List, Union
from .bindings import TractError, check, lib
from .fact import Fact
from .tensor import Tensor
from .runnable import Runnable
from .transform import TransformSpec

class Model:
    """
    Main model object, central focus point of the model transformation pipeline.

    The Model is the central point of tract model loading and "model cooking". ONNX and NNEF
    serialized models are converted to Model (more or less directly) before we can do anything
    of value with them. Model can be dumped to NNEF (or tract-opl which is NNEF plus tract
    proprietary extensions).

    A Model can be ``optimize()``-d, substituting the "high level" operators in tract-core
    operator set by the best implementation available for the current system. From there it
    can be transformed into a Runnable object that we will use to run.

    **Model cooking**

    Some model transformations can be performed on the Model class:

    - declutter (getting rid of training artefacts)
    - "pulsification" (transforming a batch-oriented model into a streaming model)
    - symbol substitution (make N or Batch a fixed number, unlocking potential optimisation later on)
    - static cost evaluation and dynamic profiling
    - ...

    In some situations, these operations are done "on-the-fly" when an ONNX or NNEF model is
    loaded, at start-up time. In other situations, when start-up time becomes an issue, it may
    be beneficial to "pre-cook" the model: apply the transformations once, serialize the model
    as NNEF (with tract-opl extension if needed). At start-up this model can be significantly
    less expensive to "cook" for inference.

    **Model and TypedModel**

    This class is actually a wrapper around the "TypedModel" in Rust codebase. The "Typed"
    bit means that all shapes and element types in all input, output and temporary values must
    be known. There is support in tract for symbols in dimensions, with some limited computation
    capabilities on symbolic expressions. For instance, it is relatively frequent to work with
    a Model where all tensor shapes start with ``N`` or ``Batch``.
    """

    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        if self.ptr:
            check(lib.tract_model_destroy(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid model (maybe already consumed ?)")

    def input_count(self) -> int:
        """Return the number of inputs of the model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_model_input_count(self.ptr, byref(i)))
        return i.value

    def output_count(self) -> int:
        """Return the number of outputs of the model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_model_output_count(self.ptr, byref(i)))
        return i.value

    def input_name(self, input_id: int) -> str:
        """Return the name of the input_id-th input"""
        self._valid()
        cstring = c_char_p()
        check(lib.tract_model_input_name(self.ptr, input_id, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def input_fact(self, input_id: int) -> Fact:
        """Return the fact of the input_id-th input"""
        self._valid()
        fact = c_void_p()
        check(lib.tract_model_input_fact(self.ptr, input_id, byref(fact)))
        return Fact(fact)

    def output_name(self, output_id: int) -> str:
        """Return the name of the output_id-th output"""
        self._valid()
        cstring = c_char_p()
        check(lib.tract_model_output_name(self.ptr, output_id, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def output_fact(self, output_id: int) -> Fact:
        """Return the fact of the output_id-th output"""
        self._valid()
        fact = c_void_p()
        check(lib.tract_model_output_fact(self.ptr, output_id, byref(fact)))
        return Fact(fact)

    def transform(self, transform: Union[str, "TransformSpec"]) -> None:
        """Apply a transform to the model.

        ``transform`` can be:

        - a plain string name (e.g. ``"f32_to_f16"``)
        - a JSON string with a ``"name"`` key and parameters
        - a :class:`TransformSpec` subclass such as :class:`ConcretizeSymbols`
          or :class:`Pulse`
        """
        self._valid()
        if isinstance(transform, TransformSpec):
            transform = transform.to_json()
        check(lib.tract_model_transform(self.ptr, str(transform).encode("utf-8")))

    def into_runnable(self) -> Runnable:
        """Transform the model into a ready to be used Runnable model"""
        self._valid()
        runnable = c_void_p()
        check(lib.tract_model_into_runnable(byref(self.ptr), byref(runnable)))
        self.ptr = None
        return Runnable(runnable)

    def parse_fact(self, spec: str) -> Fact:
        self._valid()
        fact = c_void_p()
        check(lib.tract_model_parse_fact(self.ptr, str(spec).encode("utf-8"), byref(fact)))
        return Fact(fact)

    def property_keys(self) -> List[str]:
        """Query the list of properties names of the model."""
        self._valid()
        count = c_size_t()
        check(lib.tract_model_property_count(self.ptr, byref(count)))
        count = count.value
        cstrings = (POINTER(c_char) * count)()
        check(lib.tract_model_property_names(self.ptr, cstrings))
        names = []
        for i in range(0, count):
            names.append(str(cast(cstrings[i], c_char_p).value, "utf-8"))
            lib.tract_free_cstring(cstrings[i])
        return names

    def property(self, name: str) -> Tensor:
        """Query a property by name"""
        self._valid()
        value = c_void_p()
        check(lib.tract_model_property(self.ptr, str(name).encode("utf-8"), byref(value)))
        return Tensor(value)

    def input_facts(self) -> List[Fact]:
        return [ self.input_fact(ix) for ix in range(self.input_count()) ]

    def output_facts(self):
        return [ self.output_fact(ix) for ix in range(self.output_count()) ]


================================================
FILE: api/py/tract/nnef.py
================================================
from ctypes import *
from pathlib import Path
from typing import Dict, List, Union
from .bindings import TractError, check, lib
from .model import Model

class Nnef:
    """
    Represent a NNEF context in tract.

    NNEF is a neural model interchange format, similar to ONNX but focusing on the needs
    of an inference engine instead of a training framework.

    ``tract`` can natively load NNEF models. It can also save models in tract internal format
    as ``tract-opl`` models. ``tract-opl`` is a set of proprietary extensions to NNEF allowing
    serialization of most of the models tract can handle. These extensions can be activated by
    the ``with_*()`` methods.
    """

    def __init__(self):
        ptr = c_void_p()
        check(lib.tract_nnef_create(byref(ptr)))
        self.ptr = ptr

    def __del__(self):
        check(lib.tract_nnef_destroy(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid inference model (maybe already consumed ?)")

    def load(self, path: Union[str, Path]) -> Model:
        """
        Load an NNEF model from the file or folder at ``path``.

        .. code-block:: python

            model = (
                tract.nnef()
                .load("mobilenet_v2_1.0.onnx.nnef.tgz")
                .into_runnable()
            )
        """
        self._valid()
        model = c_void_p()
        path = str(path).encode("utf-8")
        check(lib.tract_nnef_load(self.ptr, path, byref(model)))
        return Model(model)

    def with_tract_core(self) -> "Nnef":
        """
        Enable tract-opl extensions to NNEF to covers tract-core operator set
        """
        self._valid()
        check(lib.tract_nnef_enable_tract_core(self.ptr))
        return self

    def with_tract_extra(self) -> "Nnef":
        """
        Enable tract-extra extensions to NNEF.
        """
        self._valid()
        check(lib.tract_nnef_enable_tract_extra(self.ptr))
        return self

    def with_tract_transformers(self) -> "Nnef":
        """
        Enable tract-transformers extensions to NNEF.
        """
        self._valid()
        check(lib.tract_nnef_enable_tract_transformers(self.ptr))
        return self

    def with_onnx(self) -> "Nnef":
        """
        Enable tract-opl extensions to NNEF to covers (more or) ONNX operator set
        """
        self._valid()
        check(lib.tract_nnef_enable_onnx(self.ptr))
        return self

    def with_pulse(self) -> "Nnef":
        """
        Enable tract-opl extensions to NNEF for tract pulse operators (for audio streaming)
        """
        self._valid()
        check(lib.tract_nnef_enable_pulse(self.ptr))
        return self

    def with_extended_identifier_syntax(self) -> "Nnef":
        """
        Enable tract-opl extensions to NNEF for extended identifiers (will support PyTorch 2 path-like ids)
        """
        self._valid()
        check(lib.tract_nnef_enable_extended_identifier_syntax(self.ptr, True))
        return self

    def write_model_to_dir(self, model: Model, path: Union[str, Path]) -> None:
        """
        Save ``model`` as a NNEF directory model in ``path``.

        tract tries to stick to strict NNEF even if extensions have been enabled.
        """
        self._valid()
        model._valid()
        if not isinstance(model, Model):
            raise TractError("Expected a Model, called with " + model);
        path = str(path).encode("utf-8")
        check(lib.tract_nnef_write_model_to_dir(self.ptr, path, model.ptr))

    def write_model_to_tar(self, model: Model, path: Union[str, Path]) -> None:
        """
        Save ``model`` as a NNEF tar archive in ``path``.

        tract tries to stick to strict NNEF even if extensions have been enabled.
        """
        self._valid()
        model._valid()
        if not isinstance(model, Model):
            raise TractError("Expected a Model, called with " + model);
        path = str(path).encode("utf-8")
        check(lib.tract_nnef_write_model_to_tar(self.ptr, path, model.ptr))

    def write_model_to_tar_gz(self, model: Model, path: Union[str, Path]) -> None:
        """
        Save ``model`` as a NNEF tar compressed archive in ``path``.

        tract tries to stick to strict NNEF even if extensions have been enabled.
        """
        self._valid()
        model._valid()
        if not isinstance(model, Model):
            raise TractError("Expected a Model, called with " + model);
        path = str(path).encode("utf-8")
        check(lib.tract_nnef_write_model_to_tar_gz(self.ptr, path, model.ptr))


================================================
FILE: api/py/tract/onnx.py
================================================
from ctypes import *
from pathlib import Path
from typing import Dict, List, Union
from .bindings import check, lib
from .inference_model import InferenceModel

class Onnx:
    """
    Represent the ONNX context in tract.

    It essentially allows to load ONNX models. Note that an ONNX model is loaded as an
    ``InferenceModel`` and not as a ``Model``: many ONNX models come with partial shape and
    element type information, while tract's ``Model`` assume full shape and element type
    knownledge. In this case, it is generally sufficient to inform tract about the input
    shape and type, then let tract *infer* the rest of the missing shape information
    before converting the ``InferenceModel`` to a regular ``Model``.

    .. code-block:: python

        # load the model as an InferenceModel
        model = tract.onnx().load("./mobilenetv2-7.onnx")

        # set the shape and type of its first and only input
        model.set_input_fact(0, "1,3,224,224,f32")

        # get ready to run the model
        model = model.into_model().into_runnable()
    """

    def __init__(self):
        ptr = c_void_p()
        check(lib.tract_onnx_create(byref(ptr)))
        self.ptr = ptr

    def __del__(self):
        check(lib.tract_onnx_destroy(byref(self.ptr)))

    def load(self, path: Union[str, Path]) -> InferenceModel:
        """
        Load an ONNX file as an InferenceModel
        """
        model = c_void_p()
        path = str(path).encode("utf-8")
        check(lib.tract_onnx_load(self.ptr, path, byref(model)))
        return InferenceModel(model)


================================================
FILE: api/py/tract/runnable.py
================================================
from ctypes import *
from typing import Dict, List, Union # after ctypes so that Union is overriden
import numpy
from .fact import Fact
from .tensor import Tensor
from .state import State
from .bindings import TractError, check, lib

class Runnable:
    """
    A model that has been fully optimized and is ready to perform computation.

    This is the final stage of the model pipeline. A ``Runnable`` is obtained
    either by calling ``Model.into_runnable()`` (CPU default) or by passing a
    ``Model`` to ``Runtime.prepare()`` for GPU-accelerated execution. Once
    obtained, call :meth:`run` with numpy arrays or ``Tensor`` instances to
    perform inference.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        check(lib.tract_runnable_release(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid runnable (maybe already consumed ?)")

    def input_count(self) -> int:
        """Return the number of inputs of the underlying model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_runnable_input_count(self.ptr, byref(i)))
        return i.value

    def output_count(self) -> int:
        """Return the number of outputs of the underlying model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_runnable_output_count(self.ptr, byref(i)))
        return i.value

    def input_fact(self, input_id: int) -> Fact:
        """Return the fact of the input_id-th input"""
        self._valid()
        fact = c_void_p()
        check(lib.tract_runnable_input_fact(self.ptr, input_id, byref(fact)))
        return Fact(fact)

    def output_fact(self, output_id: int) -> Fact:
        """Return the fact of the output_id-th output"""
        self._valid()
        fact = c_void_p()
        check(lib.tract_runnable_output_fact(self.ptr, output_id, byref(fact)))
        return Fact(fact)

    def property_keys(self) -> List[str]:
        """Query the list of properties names of the runnable model."""
        self._valid()
        count = c_size_t()
        check(lib.tract_runnable_property_count(self.ptr, byref(count)))
        count = count.value
        cstrings = (POINTER(c_char) * count)()
        check(lib.tract_runnable_property_names(self.ptr, cstrings))
        names = []
        for i in range(0, count):
            names.append(str(cast(cstrings[i], c_char_p).value, "utf-8"))
            lib.tract_free_cstring(cstrings[i])
        return names

    def property(self, name: str) -> Tensor:
        """Query a property by name"""
        self._valid()
        value = c_void_p()
        check(lib.tract_runnable_property(self.ptr, str(name).encode("utf-8"), byref(value)))
        return Tensor(value)

    def run(self, inputs: List[Union[Tensor, numpy.ndarray]]) -> List[Tensor]:
        """
        Runs the model over the provided input list, and returns the model outputs.
        """
        return self.spawn_state().run(inputs)

    def spawn_state(self) -> State:
        """Create a new execution state for stateful (e.g. streaming) models."""
        self._valid()
        state = c_void_p()
        check(lib.tract_runnable_spawn_state(self.ptr, byref(state)))
        return State(state)

    def profile_json(self, inputs: Union[None, List[Union[Tensor, numpy.ndarray]]]) -> str:
        """Profile the model. Also compute the static costs of operators.

        Returns is a json buffer.
        """
        self._valid()
        cstring = c_char_p()
        input_values = []
        input_ptrs = None
        if inputs != None:
            for v in inputs:
                if isinstance(v, Tensor):
                    input_values.append(v)
                elif isinstance(v, numpy.ndarray):
                    input_values.append(Tensor.from_numpy(v))
                else:
                    raise TractError(f"Inputs must be of type tract.Tensor or numpy.Array, got {v}")
            input_ptrs = (c_void_p * len(inputs))()
            for ix, v in enumerate(input_values):
                input_ptrs[ix] = v.ptr

        check(lib.tract_runnable_profile_json(self.ptr, input_ptrs, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result

    def input_facts(self) -> List[Fact]:
        """Return the list of input facts."""
        return [ self.input_fact(ix) for ix in range(self.input_count()) ]

    def output_facts(self) -> List[Fact]:
        """Return the list of output facts."""
        return [ self.output_fact(ix) for ix in range(self.output_count()) ]


================================================
FILE: api/py/tract/runtime.py
================================================
from ctypes import *
from typing import Dict, List, Union # after ctypes so that Union is overriden
import numpy
from .runnable import Runnable
from .model import Model
from .bindings import TractError, check, lib

def runtime_for_name(name: str):
    """Look up a runtime by name and return a ``Runtime`` instance.

    Available runtimes depend on the build and the platform: ``"default"`` for
    CPU, ``"metal"`` on Apple Silicon Macs, ``"cuda"`` on systems with NVIDIA
    GPUs.
    """
    runtime = c_void_p()
    check(lib.tract_runtime_for_name(str(name).encode("utf-8"), byref(runtime)))
    return Runtime(runtime)

class Runtime:
    """
    A hardware/software backend that can execute a ``Model``.

    Calling ``Model.into_runnable()`` implicitly uses the default CPU runtime.
    To run on a GPU, obtain a ``Runtime`` via :func:`runtime_for_name` —
    ``"metal"`` on Apple Silicon Macs, ``"cuda"`` on NVIDIA systems — then
    call :meth:`prepare` to produce a ``Runnable`` optimized for that backend.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        check(lib.tract_runtime_release(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid runtime (maybe already consumed ?)")
   
    def name(self) -> str:
        """Return the name of this Runtime."""
        self._valid()
        ptr = c_char_p()
        check(lib.tract_runtime_name(self.ptr, byref(ptr)))
        result = ptr.value.decode("utf-8")
        lib.tract_free_cstring(ptr)
        return result

    def prepare(self, model:Model) -> Runnable:
        """Prepare a model for execution on the Runtime.

        NB: The passed model is invalidated by this call.
        """
        self._valid()
        runnable = c_void_p()
        check(lib.tract_runtime_prepare(self.ptr, byref(model.ptr), byref(runnable)))
        model.ptr = None
        return Runnable(runnable)

    
================================================
FILE: api/py/tract/state.py
================================================
import numpy
from ctypes import *
from typing import List, Union
from .bindings import TractError, check, lib
from .fact import Fact
from .tensor import Tensor

class State:
    """
    Mutable execution state for stateful (typically streaming) models.

    Stateful models maintain internal buffers between calls to :meth:`run`
    (e.g. recurrent networks, pulsed convolutions). A ``State`` is created by
    ``Runnable.spawn_state()`` and can be called repeatedly with successive
    input chunks. Use :meth:`freeze` to snapshot the state for later reuse.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        check(lib.tract_state_destroy(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid state (maybe already destroyed ?)")

    def input_count(self) -> int:
        """Return the number of inputs of the underlying model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_state_input_count(self.ptr, byref(i)))
        return i.value

    def output_count(self) -> int:
        """Return the number of outputs of the underlying model"""
        self._valid()
        i = c_size_t()
        check(lib.tract_state_output_count(self.ptr, byref(i)))
        return i.value

    def run(self, inputs: List[Union[Tensor, numpy.ndarray]]) -> List[Tensor]:
        """
        Runs the model over the provided input list, and returns the model outputs.
        """
        self._valid()
        input_values = []
        for v in inputs:
            if isinstance(v, Tensor):
                input_values.append(v)
            elif isinstance(v, numpy.ndarray):
                input_values.append(Tensor.from_numpy(v))
            else:
                raise TractError(f"Inputs must be of type tract.Tensor or numpy.Array, got {v}")
        input_ptrs = (c_void_p * self.input_count())()
        output_ptrs = (c_void_p * self.output_count())()
        for ix, v in enumerate(input_values):
            input_ptrs[ix] = v.ptr
        check(lib.tract_state_run(self.ptr, input_ptrs, output_ptrs))
        result = []
        for v in output_ptrs:
            result.append(Tensor(c_void_p(v)))
        return result

    def freeze(self) -> "FrozenState":
        """Freeze the state into an immutable ``FrozenState`` snapshot."""
        self._valid()
        frozen = c_void_p()
        check(lib.tract_state_freeze(self.ptr, byref(frozen)))
        return FrozenState(frozen)

class FrozenState:
    """
    A frozen (immutable) snapshot of a stateful model's state.

    Can be unfrozen back into a mutable ``State`` with :meth:`unfreeze`.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        check(lib.tract_frozen_state_destroy(byref(self.ptr)))

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid frozen state (maybe already destroyed ?)")

    def unfreeze(self) -> State:
        """Restore a mutable ``State`` from this frozen snapshot."""
        self._valid()
        state = c_void_p()
        check(lib.tract_frozen_state_unfreeze(self.ptr, byref(state)))
        return State(state)


================================================
FILE: api/py/tract/tensor.py
================================================
import numpy
import math
from ctypes import *
from typing import Dict, List, Union
from tract.bindings import TractError, check, lib

from enum import IntEnum

class DatumType(IntEnum):
    BOOL = 0x01
    U8 = 0x11
    U16 = 0x12
    U32 = 0x14
    U64 = 0x18
    I8 = 0x21
    I16 = 0x22
    I32 = 0x24
    I64 = 0x28
    F16 = 0x32
    F32 = 0x34
    F64 = 0x38
    COMPLEX_I16 = 0x42
    COMPLEX_I32 = 0x44
    COMPLEX_I64 = 0x48
    COMPLEX_F16 = 0x52
    COMPLEX_F32 = 0x54
    COMPLEX_F64 = 0x58

    def __str__(self) -> str:
        return self.name

    def is_bool(self) -> bool:
        return self == self.BOOL

    def is_number(self) -> bool:
        return self != self.BOOL

    def is_float(self) -> bool:
        return self == self.F16 or self == self.F32 or self == self.F64

    def is_signed(self) -> bool:
        return self == self.I8 or self == self.I16 or self == self.I32 or self == self.I64

    def is_unsigned(self) -> bool:
        return self == self.U8 or self == self.U16 or self == self.U32 or self == self.U64

    def ctype(self):
        if self == self.BOOL:
            return c_bool
        if self == self.U8:
            return c_uint8
        if self == self.U16 or self == self.F16:
            return c_uint16
        if self == self.U32:
            return c_uint32
        if self == self.U64:
            return c_uint64
        if self == self.I8:
            return c_int8
        if self == self.I16:
            return c_int16
        if self == self.I32:
            return c_int32
        if self == self.I64:
            return c_int64
        if self == self.F32:
            return c_float
        if self == self.F64:
            return c_double
        raise "invalid datum type"

def dt_numpy_to_tract(dt):
    if dt.kind == 'b':
        return DatumType.BOOL
    if dt.kind == 'u':
        return 0x10 + dt.itemsize
    if dt.kind == 'i':
        return 0x20 + dt.itemsize
    if dt.kind == 'f':
        return 0x30 + dt.itemsize
    if dt.kind == 'c':
        return 0x50 + dt.itemsize / 2
    raise TractError("Unsupported Numpy dtype: " + dt)


class Tensor:
    """
    A multidimensional array of typed data, the fundamental data carrier in tract.

    Tensors flow in and out of models: they are the inputs you feed to
    ``Runnable.run()`` and the outputs you get back. On the Python side,
    convert to and from numpy arrays with :meth:`to_numpy` and
    :func:`from_numpy`.
    """
    def __init__(self, ptr):
        self.ptr = ptr

    def __del__(self):
        if self.ptr:
            check(lib.tract_tensor_destroy(byref(self.ptr)))

    def __str__(self):
        return self.dump()

    def _valid(self):
        if self.ptr == None:
            raise TractError("invalid tensor (maybe already consumed ?)")

    def __eq__(self, other):
        (self_dt, self_shape, self_ptr) = self._parts()
        (other_dt, other_shape, other_ptr) = other._parts()
        self_len = math.prod(self_shape) * sizeof(self_dt.ctype())
        other_len = math.prod(self_shape) * sizeof(self_dt.ctype())
        self_buf = string_at(self_ptr, self_len)
        other_buf = string_at(other_ptr, other_len)
        return self_dt == other_dt and self_shape == other_shape and self_buf == other_buf


    def _parts(self) -> (DatumType, [int], c_void_p):
        self._valid()
        rank = c_size_t();
        shape = POINTER(c_size_t)()
        data = c_void_p();
        dt = c_uint32(0)
        check(lib.tract_tensor_as_bytes(self.ptr, byref(dt), byref(rank), byref(shape), byref(data)))
        rank = rank.value
        shape = [ int(shape[ix]) for ix in range(0, rank) ]
        return (DatumType(dt.value), shape, data)

    def from_numpy(array: numpy.ndarray) -> "Tensor":
        array = numpy.ascontiguousarray(array)

        data = array.__array_interface__['data'][0]
        data = c_void_p(data)
        ptr = c_void_p()
        shape_t = c_size_t * array.ndim
        shape = shape_t()
        for ix in range(0, array.ndim):
            shape[ix] = array.shape[ix]
        dt = dt_numpy_to_tract(array.dtype)
        check(lib.tract_tensor_from_bytes(dt, c_size_t(array.ndim), shape, data, byref(ptr)))
        return Tensor(ptr)

    def to_numpy(self) -> numpy.array:
        """Builds a numpy array equivalent to the data in this tensor."""
        (dt, shape, data) = self._parts()
        data = cast(data, POINTER(dt.ctype()))
        array = numpy.ctypeslib.as_array(data, shape).copy()
        if dt == DatumType.F16:
            array = array.view(numpy.float16)
        return array

    def into_numpy(self) -> numpy.array:
        """Same as to_numpy(), but drop the tensor content once the numpy array is built."""
        result = self.to_numpy()
        check(lib.tract_tensor_destroy(byref(self.ptr)))
        return result

    def datum_type(self) -> DatumType:
        self._valid()
        dt = c_uint32(0)
        check(lib.tract_tensor_as_bytes(self.ptr, byref(dt), None, None, None))
        return DatumType(dt.value)

    def convert_to(self, to: DatumType) -> "Tensor":
        self._valid()
        ptr = c_void_p()
        check(lib.tract_tensor_convert_to(self.ptr, to, byref(ptr)))
        return Tensor(ptr)

    def dump(self):
        self._valid()
        cstring = c_char_p();
        check(lib.tract_tensor_dump(self.ptr, byref(cstring)))
        result = str(cstring.value, "utf-8")
        lib.tract_free_cstring(cstring)
        return result


================================================
FILE: api/py/tract/transform.py
================================================
import json
from abc import ABC, abstractmethod
from typing import Dict, Optional, Union

from .tensor import DatumType


class TransformSpec(ABC):
    """Base class for typed transform specifications.

    Subclasses represent specific transforms with typed parameters.
    Can be passed directly to :meth:`Model.transform`.
    """

    @abstractmethod
    def to_json(self) -> str:
        """Serialize this transform spec to the JSON string the FFI layer expects."""
        ...


class ConcretizeSymbols(TransformSpec):
    """Replace symbolic dimensions with concrete integer values.

    Example::

        model.transform(ConcretizeSymbols({"B": 1}))
        # or with builder pattern:
        model.transform(ConcretizeSymbols().value("B", 1))
    """

    def __init__(self, values: Optional[Dict[str, int]] = None):
        self._values: Dict[str, int] = dict(values) if values else {}

    def value(self, symbol: str, val: int) -> "ConcretizeSymbols":
        """Set a symbol to a concrete value. Returns self for chaining."""
        self._values[symbol] = val
        return self

    def to_json(self) -> str:
        return json.dumps({"name": "concretize_symbols", "values": self._values})


class Pulse(TransformSpec):
    """Convert a model to a pulsed (streaming) model.

    Example::

        model.transform(Pulse("5", symbol="B"))
        # or with builder pattern:
        model.transform(Pulse("5").symbol("B"))
    """

    def __init__(self, pulse: Union[str, int], *, symbol: Optional[str] = None):
        self._pulse = str(pulse)
        self._symbol = symbol

    def symbol(self, symbol: str) -> "Pulse":
        """Set the symbol to pulse over. Returns self for chaining."""
        self._symbol = symbol
        return self

    def to_json(self) -> str:
        d = {"name": "pulse", "pulse": self._pulse}
        if self._symbol is not None:
            d["symbol"] = self._symbol
        return json.dumps(d)


class FloatPrecision(TransformSpec):
    """Change the float precision of a model (e.g. F32 to F16).

    Example::

        model.transform(FloatPrecision(DatumType.F32, DatumType.F16))
        # with include/exclude:
        model.transform(FloatPrecision(DatumType.F32, DatumType.F16, exclude=["layer.1"]))
    """

    _DT_NAMES = {
        DatumType.F16: "f16",
        DatumType.F32: "f32",
        DatumType.F64: "f64",
    }

    def __init__(
        self,
        from_dt: DatumType,
        to_dt: DatumType,
        *,
        include: Optional[list] = None,
        exclude: Optional[list] = None,
    ):
        if from_dt not in self._DT_NAMES:
            raise ValueError(f"from_dt must be a float DatumType, got {from_dt}")
        if to_dt not in self._DT_NAMES:
            raise ValueError(f"to_dt must be a float DatumType, got {to_dt}")
        self._from = from_dt
        self._to = to_dt
        self._include = list(include) if include else None
        self._exclude = list(exclude) if exclude else None

    def include(self, patterns: list) -> "FloatPrecision":
        """Set include patterns — only matching nodes are translated. Returns self for chaining."""
        self._include = list(patterns)
        return self

    def exclude(self, patterns: list) -> "FloatPrecision":
        """Set exclude patterns — matching nodes are excluded. Returns self for chaining."""
        self._exclude = list(patterns)
        return self

    def to_json(self) -> str:
        d = {
            "name": "float_precision",
            "from": self._DT_NAMES[self._from],
            "to": self._DT_NAMES[self._to],
        }
        if self._include is not None:
            d["include"] = self._include
        if self._exclude is not None:
            d["exclude"] = self._exclude
        return json.dumps(d)


================================================
FILE: api/rs/Cargo.toml
================================================
[package]
name = "tract"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/sonos/tract"
keywords = ["NeuralNetworks"]
categories = ["science"]
autobenches = false
edition = "2024"
rust-version.workspace = true
include = ["Cargo.toml", "src/**/*.rs", "LICENSE*", "tests"]

[dependencies]
anyhow.workspace = true
boow.workspace = true
erased-serde.workspace = true
flate2.workspace = true
half.workspace = true
icu_normalizer.workspace = true
icu_properties.workspace = true
ndarray.workspace = true
tract-api.workspace = true
tract-nnef.workspace = true
tract-onnx-opl.workspace = true
tract-onnx.workspace = true
tract-extra.workspace = true
tract-pulse.workspace = true
tract-libcli.workspace = true
tract-transformers.workspace = true
serde_json.workspace = true

[target.'cfg(any(target_vendor = "apple"))'.dependencies]
tract-metal.workspace = true

[target.'cfg(any(target_os = "linux", target_os = "windows"))'.dependencies]
tract-cuda.workspace = true


[dev-dependencies]
reqwest.workspace = true
rustls.workspace = true
tempfile.workspace = true
serde_json.workspace = true

[features]
complex = []
# default = [ "dylib" ]
# dylib = []
# staticlib = []


================================================
FILE: api/rs/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: api/rs/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: api/rs/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: api/rs/src/lib.rs
================================================
#[cfg(target_vendor = "apple")]
extern crate tract_metal;

#[cfg(any(target_os = "linux", target_os = "windows"))]
extern crate tract_cuda;
extern crate tract_transformers;

use std::fmt::{Debug, Display};
use std::io::Cursor;
use std::path::Path;
use std::sync::Arc;

use anyhow::{Context, Result};
use ndarray::{Data, Dimension, RawData};
use tract_extra::WithTractExtra;
use tract_libcli::annotations::Annotations;
use tract_libcli::profile::BenchLimits;
use tract_libcli::tensor::RunTensors;
use tract_nnef::internal::Runtime as _;
use tract_nnef::prelude::{
    Framework, IntoArcTensor, IntoTValue, SymbolValues, TDim, TValue, TVec,
    Tensor as InternalTensor, TractResult, TypedFact, TypedModel, TypedSimplePlan,
};
use tract_onnx::prelude::InferenceModelExt;
use tract_onnx_opl::WithOnnx;
use tract_pulse::WithPulse;
use tract_transformers::WithTractTransformers;

use tract_api::*;

pub mod prelude {
    // Concrete types
    pub use crate::{
        Dim, Fact, InferenceFact, InferenceModel, Model, Nnef, Onnx, Runnable, Runtime, State,
        Tensor, nnef, onnx, runtime_for_name,
    };
    pub use ndarray as tract_ndarray;

    // User-facing API types
    pub use tract_api::{
        ConcretizeSymbols, Datum, DatumType, FloatPrecision, Pulse, TransformSpec, tensor,
    };

    // Traits needed for method resolution — hidden from namespace
    pub use tract_api::{
        DimInterface as _, FactInterface as _, InferenceFactInterface as _,
        InferenceModelInterface as _, ModelInterface as _, NnefInterface as _, OnnxInterface as _,
        RunnableInterface as _, RuntimeInterface as _, StateInterface as _, TensorInterface as _,
    };
}

/// Creates an instance of an NNEF framework and parser that can be used to load and dump NNEF models.
pub fn nnef() -> Result<Nnef> {
    Ok(Nnef(tract_nnef::nnef()))
}

pub fn onnx() -> Result<Onnx> {
    Ok(Onnx(tract_onnx::onnx()))
}

pub fn runtime_for_name(name: &str) -> Result<Runtime> {
    if let Some(rt) = tract_onnx::tract_core::runtime::runtime_for_name(name) {
        Ok(Runtime(rt))
    } else {
        anyhow::bail!("Runtime {name} not available")
    }
}

/// tract version tag
pub fn version() -> &'static str {
    env!("CARGO_PKG_VERSION")
}

pub struct Nnef(tract_nnef::internal::Nnef);

impl Debug for Nnef {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Nnef")
    }
}

impl NnefInterface for Nnef {
    type Model = Model;
    fn load(&self, path: impl AsRef<Path>) -> Result<Model> {
        let m = self.0.model_for_path(path)?.into_decluttered()?;
        Ok(Model(m))
    }

    fn load_buffer(&self, data: &[u8]) -> Result<Self::Model> {
        let m = self.0.model_for_read(&mut Cursor::new(data))?.into_decluttered()?;
        Ok(Model(m))
    }

    fn enable_tract_core(&mut self) -> Result<()> {
        self.0.enable_tract_core();
        Ok(())
    }

    fn enable_tract_extra(&mut self) -> Result<()> {
        self.0.enable_tract_extra();
        Ok(())
    }

    fn enable_tract_transformers(&mut self) -> Result<()> {
        self.0.enable_tract_transformers();
        Ok(())
    }

    fn enable_onnx(&mut self) -> Result<()> {
        self.0.enable_onnx();
        Ok(())
    }

    fn enable_pulse(&mut self) -> Result<()> {
        self.0.enable_pulse();
        Ok(())
    }

    fn enable_extended_identifier_syntax(&mut self) -> Result<()> {
        self.0.allow_extended_identifier_syntax(true);
        Ok(())
    }

    fn write_model_to_dir(&self, path: impl AsRef<Path>, model: &Model) -> Result<()> {
        self.0.write_to_dir(&model.0, path)
    }

    fn write_model_to_tar(&self, path: impl AsRef<Path>, model: &Model) -> Result<()> {
        let file = std::fs::File::create(path)?;
        self.0.write_to_tar(&model.0, file)?;
        Ok(())
    }

    fn write_model_to_tar_gz(&self, path: impl AsRef<Path>, model: &Model) -> Result<()> {
        let file = std::fs::File::create(path)?;
        let gz = flate2::write::GzEncoder::new(file, flate2::Compression::default());
        self.0.write_to_tar(&model.0, gz)?;
        Ok(())
    }
}

pub struct Onnx(tract_onnx::Onnx);

impl Debug for Onnx {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Onnx")
    }
}

impl OnnxInterface for Onnx {
    type InferenceModel = InferenceModel;
    fn load(&self, path: impl AsRef<Path>) -> Result<Self::InferenceModel> {
        Ok(InferenceModel(self.0.model_for_path(path)?))
    }

    fn load_buffer(&self, data: &[u8]) -> Result<Self::InferenceModel> {
        let m = self.0.model_for_read(&mut Cursor::new(data))?;
        Ok(InferenceModel(m))
    }
}

pub struct InferenceModel(tract_onnx::prelude::InferenceModel);

impl Debug for InferenceModel {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "InferenceModel")
    }
}

impl InferenceModelInterface for InferenceModel {
    type Model = Model;
    type InferenceFact = InferenceFact;

    fn input_count(&self) -> Result<usize> {
        Ok(self.0.inputs.len())
    }

    fn output_count(&self) -> Result<usize> {
        Ok(self.0.outputs.len())
    }

    fn input_name(&self, id: usize) -> Result<String> {
        let node = self.0.inputs[id].node;
        Ok(self.0.node(node).name.to_string())
    }

    fn output_name(&self, id: usize) -> Result<String> {
        let node = self.0.outputs[id].node;
        Ok(self.0.node(node).name.to_string())
    }

    fn input_fact(&self, id: usize) -> Result<InferenceFact> {
        Ok(InferenceFact(self.0.input_fact(id)?.clone()))
    }

    fn set_input_fact(
        &mut self,
        id: usize,
        fact: impl AsFact<Self, Self::InferenceFact>,
    ) -> Result<()> {
        let fact = fact.as_fact(self)?.0.clone();
        self.0.set_input_fact(id, fact)
    }

    fn output_fact(&self, id: usize) -> Result<InferenceFact> {
        Ok(InferenceFact(self.0.output_fact(id)?.clone()))
    }

    fn set_output_fact(
        &mut self,
        id: usize,
        fact: impl AsFact<Self, Self::InferenceFact>,
    ) -> Result<()> {
        let fact = fact.as_fact(self)?.0.clone();
        self.0.set_output_fact(id, fact)
    }

    fn analyse(&mut self) -> Result<()> {
        self.0.analyse(false)?;
        Ok(())
    }

    fn into_model(self) -> Result<Self::Model> {
        let typed = self.0.into_typed()?.into_decluttered()?;
        Ok(Model(typed))
    }
}

// MODEL
#[derive(Debug, Clone)]
pub struct Model(TypedModel);

impl ModelInterface for Model {
    type Fact = Fact;
    type Runnable = Runnable;
    type Tensor = Tensor;

    fn input_count(&self) -> Result<usize> {
        Ok(self.0.inputs.len())
    }

    fn output_count(&self) -> Result<usize> {
        Ok(self.0.outputs.len())
    }

    fn input_name(&self, id: usize) -> Result<String> {
        let node = self.0.inputs[id].node;
        Ok(self.0.node(node).name.to_string())
    }

    fn output_name(&self, id: usize) -> Result<String> {
        let node = self.0.outputs[id].node;
        Ok(self.0.node(node).name.to_string())
    }

    fn input_fact(&self, id: usize) -> Result<Fact> {
        Ok(Fact(self.0.input_fact(id)?.clone()))
    }

    fn output_fact(&self, id: usize) -> Result<Fact> {
        Ok(Fact(self.0.output_fact(id)?.clone()))
    }

    fn into_runnable(self) -> Result<Runnable> {
        let runnable = tract_nnef::internal::DefaultRuntime.prepare(self.0)?;
        Ok(Runnable(runnable.into()))
    }

    fn transform(&mut self, spec: impl Into<TransformSpec>) -> Result<()> {
        let transform = spec.into().to_transform_string();
        let transform_obj = if transform.trim_start().starts_with('{') {
            // JSON input: parse, extract name, deserialize params
            let v: serde_json::Value = serde_json::from_str(&transform)?;
            let obj = v.as_object().context("expected JSON object")?;
            let name = obj
                .get("name")
                .and_then(|v| v.as_str())
                .context("missing 'name' field")?
                .to_string();
            let mut params = v.clone();
            params.as_object_mut().unwrap().remove("name");
            let mut erased = <dyn erased_serde::Deserializer>::erase(params);
            tract_onnx::tract_core::transform::get_transform_with_params(&name, &mut erased)?
                .with_context(|| format!("transform `{name}' could not be found"))?
        } else {
            // Plain name (no params)
            tract_onnx::tract_core::transform::get_transform(&transform)?
                .with_context(|| format!("transform `{transform}' could not be found"))?
        };
        transform_obj.transform(&mut self.0)?;
        self.0.declutter()
    }

    fn parse_fact(&self, spec: &str) -> Result<Fact> {
        let f = spec.as_fact(self)?;
        Ok(Fact(f.0.clone()))
    }

    fn property_keys(&self) -> Result<Vec<String>> {
        Ok(self.0.properties.keys().cloned().collect())
    }

    fn property(&self, name: impl AsRef<str>) -> Result<Tensor> {
        let name = name.as_ref();
        self.0
            .properties
            .get(name)
            .with_context(|| format!("no property for name {name}"))
            .map(|t| Tensor(t.clone()))
    }
}

// RUNTIME
pub struct Runtime(&'static dyn tract_nnef::internal::Runtime);

impl Debug for Runtime {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Runtime({:?})", self.0.name())
    }
}

impl RuntimeInterface for Runtime {
    type Runnable = Runnable;

    type Model = Model;

    fn name(&self) -> Result<String> {
        Ok(self.0.name().into_owned())
    }

    fn prepare(&self, model: Self::Model) -> Result<Self::Runnable> {
        let runnable = self.0.prepare(model.0)?;
        Ok(Runnable(runnable.into()))
    }
}

// RUNNABLE
#[derive(Debug, Clone)]
pub struct Runnable(Arc<dyn tract_nnef::internal::Runnable>);

impl RunnableInterface for Runnable {
    type Tensor = Tensor;
    type State = State;
    type Fact = Fact;

    fn run(&self, inputs: impl IntoInputs<Tensor>) -> Result<Vec<Tensor>> {
        StateInterface::run(&mut self.spawn_state()?, inputs.into_inputs()?)
    }

    fn spawn_state(&self) -> Result<State> {
        Ok(State(self.0.spawn()?))
    }

    fn input_count(&self) -> Result<usize> {
        Ok(self.0.input_count())
    }

    fn output_count(&self) -> Result<usize> {
        Ok(self.0.output_count())
    }

    fn input_fact(&self, id: usize) -> Result<Fact> {
        Ok(Fact(self.0.input_fact(id)?.clone()))
    }

    fn output_fact(&self, id: usize) -> Result<Fact> {
        Ok(Fact(self.0.output_fact(id)?.clone()))
    }

    fn property_keys(&self) -> Result<Vec<String>> {
        Ok(self.0.properties().keys().cloned().collect())
    }

    fn property(&self, name: impl AsRef<str>) -> Result<Tensor> {
        let name = name.as_ref();
        self.0
            .properties()
            .get(name)
            .with_context(|| format!("no property for name {name}"))
            .map(|t| Tensor(t.clone()))
    }

    fn cost_json(&self) -> Result<String> {
        let input: Option<Vec<Tensor>> = None;
        self.profile_json(input)
    }

    fn profile_json<I, IV, IE>(&self, inputs: Option<I>) -> Result<String>
    where
        I: IntoIterator<Item = IV>,
        IV: TryInto<Self::Tensor, Error = IE>,
        IE: Into<anyhow::Error> + Debug,
    {
        let model = self
            .0
            .downcast_ref::<Arc<TypedSimplePlan>>()
            .context("Can only profile TypedModel-based runnables")?
            .model();
        let mut annotations = Annotations::from_model(model)?;
        tract_libcli::profile::extract_costs(&mut annotations, model, &SymbolValues::default())?;
        if let Some(inputs) = inputs {
            let inputs = inputs
                .into_iter()
                .map(|v| Ok(v.try_into().unwrap().0.into_tvalue()))
                .collect::<TractResult<TVec<_>>>()?;

            tract_libcli::profile::profile(
                &self.0,
                &BenchLimits::default(),
                &mut annotations,
                &RunTensors { sources: vec![inputs] },
                None,
                true,
            )?;
        };
        let export = tract_libcli::export::GraphPerfInfo::from(model, &annotations);
        Ok(serde_json::to_string(&export)?)
    }
}

// STATE
pub struct State(Box<dyn tract_nnef::internal::State>);

impl Debug for State {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "State")
    }
}

impl StateInterface for State {
    type Fact = Fact;
    type Tensor = Tensor;

    fn input_count(&self) -> Result<usize> {
        Ok(self.0.input_count())
    }

    fn output_count(&self) -> Result<usize> {
        Ok(self.0.output_count())
    }

    fn run(&mut self, inputs: impl IntoInputs<Tensor>) -> Result<Vec<Tensor>> {
        let inputs: TVec<TValue> =
            inputs.into_inputs()?.into_iter().map(|v| v.0.into_tvalue()).collect();
        let outputs = self.0.run(inputs)?;
        Ok(outputs.into_iter().map(|t| Tensor(t.into_arc_tensor())).collect())
    }
}

// TENSOR
#[derive(Clone, Debug)]
pub struct Tensor(Arc<InternalTensor>);

impl TensorInterface for Tensor {
    fn datum_type(&self) -> Result<DatumType> {
        from_internal_dt(self.0.datum_type())
    }

    fn from_bytes(dt: DatumType, shape: &[usize], data: &[u8]) -> Result<Self> {
        let dt = to_internal_dt(dt);
        let len = shape.iter().product::<usize>() * dt.size_of();
        anyhow::ensure!(len == data.len());
        let tensor = unsafe { InternalTensor::from_raw_dt(dt, shape, data)? };
        Ok(Tensor(tensor.into_arc_tensor()))
    }

    fn as_bytes(&self) -> Result<(DatumType, &[usize], &[u8])> {
        let dt = from_internal_dt(self.0.datum_type())?;
        Ok((dt, self.0.shape(), self.0.try_as_plain()?.as_bytes()))
    }

    fn convert_to(&self, to: DatumType) -> Result<Self> {
        let to = to_internal_dt(to);
        if self.0.datum_type() == to {
            Ok(self.clone())
        } else {
            Ok(Tensor(self.0.cast_to_dt(to)?.into_owned().into_arc_tensor()))
        }
    }
}

impl PartialEq for Tensor {
    fn eq(&self, other: &Self) -> bool {
        let Ok((me_dt, me_shape, me_data)) = self.as_bytes() else { return false };
        let Ok((other_dt, other_shape, other_data)) = other.as_bytes() else { return false };
        me_dt == other_dt && me_shape == other_shape && me_data == other_data
    }
}

impl Display for Tensor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0.dump(false).map_err(|_| std::fmt::Error)?)
    }
}

#[derive(Clone, Debug)]
pub struct Fact(TypedFact);

impl FactInterface for Fact {
    type Dim = Dim;

    fn datum_type(&self) -> Result<DatumType> {
        from_internal_dt(self.0.datum_type)
    }

    fn rank(&self) -> Result<usize> {
        Ok(self.0.rank())
    }

    fn dim(&self, axis: usize) -> Result<Self::Dim> {
        anyhow::ensure!(axis < self.0.rank());
        Ok(Dim(self.0.shape[axis].clone()))
    }
}

impl Fact {
    fn new(model: &Model, spec: impl ToString) -> Result<Fact> {
        let fact = tract_libcli::tensor::parse_spec(&model.0.symbols, &spec.to_string())?;
        let fact = tract_onnx::prelude::Fact::to_typed_fact(&fact)?.into_owned();
        Ok(Fact(fact))
    }
}

impl Display for Fact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        for (i, dim) in self.0.shape.iter().enumerate() {
            if i > 0 {
                write!(f, ",")?;
            }
            write!(f, "{dim}")?;
        }
        let dt = format!("{:?}", self.0.datum_type).to_lowercase();
        if self.0.rank() > 0 {
            write!(f, ",")?;
        }
        write!(f, "{dt}")
    }
}

#[derive(Default, Clone, Debug)]
pub struct InferenceFact(tract_onnx::prelude::InferenceFact);

impl InferenceFactInterface for InferenceFact {
    fn empty() -> Result<InferenceFact> {
        Ok(InferenceFact(Default::default()))
    }
}

impl InferenceFact {
    fn new(model: &InferenceModel, spec: impl ToString) -> Result<InferenceFact> {
        let fact = tract_libcli::tensor::parse_spec(&model.0.symbols, &spec.to_string())?;
        Ok(InferenceFact(fact))
    }
}

impl Display for InferenceFact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = self.0.format_dt_shape();
        // Lowercase the trailing datum type token (e.g. "F32" → "f32")
        // to match the format accepted by the parser.
        if let Some(pos) = s.rfind(',') {
            let (dims, dt) = s.split_at(pos + 1);
            write!(f, "{dims}{}", dt.to_lowercase())
        } else {
            // Scalar or unknown: the entire string is the dtype
            write!(f, "{}", s.to_lowercase())
        }
    }
}

tensor_from_to_ndarray!();
as_inference_fact_impl!(InferenceModel, InferenceFact);
as_fact_impl!(Model, Fact);

#[derive(Clone, Debug)]
pub struct Dim(TDim);

impl DimInterface for Dim {
    fn eval(&self, values: impl IntoIterator<Item = (impl AsRef<str>, i64)>) -> Result<Dim> {
        if let Some(scope) = self.0.find_scope() {
            let mut table = SymbolValues::default();
            for (k, v) in values {
                table = table.with(&scope.sym(k.as_ref()), v);
            }
            let result = self.0.eval(&table);
            Ok(Dim(result))
        } else {
            Ok(self.clone())
        }
    }

    fn to_int64(&self) -> Result<i64> {
        self.0.to_i64()
    }
}

impl Display for Dim {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0)
    }
}

/*
#[inline(always)]
fn to_datum_type<T: TractProxyDatumType>() -> Result<tract_nnef::prelude::DatumType> {
macro_rules! dt { ($($t:ty),*) => { $(if TypeId::of::<T>() == TypeId::of::<$t>() { return Ok(<$t>::datum_type()); })* }}
dt!(f32, f16, f64, i64, i32, i16, i8, bool, u64, u32, u16, u8);
anyhow::bail!("Unsupported type {}", std::any::type_name::<T>())
}
*/

fn to_internal_dt(it: DatumType) -> tract_nnef::prelude::DatumType {
    type Api = DatumType;
    type Internal = tract_nnef::prelude::DatumType;
    match it {
        Api::Bool => Internal::Bool,
        Api::U8 => Internal::U8,
        Api::U16 => Internal::U16,
        Api::U32 => Internal::U32,
        Api::U64 => Internal::U64,
        Api::I8 => Internal::I8,
        Api::I16 => Internal::I16,
        Api::I32 => Internal::I32,
        Api::I64 => Internal::I64,
        Api::F16 => Internal::F16,
        Api::F32 => Internal::F32,
        Api::F64 => Internal::F64,
        #[cfg(feature = "complex")]
        Api::ComplexI16 => Internal::ComplexI16,
        #[cfg(feature = "complex")]
        Api::ComplexI32 => Internal::ComplexI32,
        #[cfg(feature = "complex")]
        Api::ComplexI64 => Internal::ComplexI64,
        #[cfg(feature = "complex")]
        Api::ComplexF16 => Internal::ComplexF16,
        #[cfg(feature = "complex")]
        Api::ComplexF32 => Internal::ComplexF32,
        #[cfg(feature = "complex")]
        Api::ComplexF64 => Internal::ComplexF64,
    }
}

fn from_internal_dt(it: tract_nnef::prelude::DatumType) -> Result<DatumType> {
    type Api = DatumType;
    type Internal = tract_nnef::prelude::DatumType;
    Ok(match it {
        Internal::Bool => Api::Bool,
        Internal::U8 => Api::U8,
        Internal::U16 => Api::U16,
        Internal::U32 => Api::U32,
        Internal::U64 => Api::U64,
        Internal::I8 => Api::I8,
        Internal::I16 => Api::I16,
        Internal::I32 => Api::I32,
        Internal::I64 => Api::I64,
        Internal::F16 => Api::F16,
        Internal::F32 => Api::F32,
        Internal::F64 => Api::F64,
        #[cfg(feature = "complex")]
        Internal::ComplexI16 => Api::ComplexI16,
        #[cfg(feature = "complex")]
        Internal::ComplexI32 => Api::ComplexI32,
        #[cfg(feature = "complex")]
        Internal::ComplexI64 => Api::ComplexI64,
        #[cfg(feature = "complex")]
        Internal::ComplexF16 => Api::ComplexF16,
        #[cfg(feature = "complex")]
        Internal::ComplexF32 => Api::ComplexF32,
        #[cfg(feature = "complex")]
        Internal::ComplexF64 => Api::ComplexF64,
        _ => {
            anyhow::bail!("Unsupported DatumType in the public API {:?}", it)
        }
    })
}


================================================
FILE: api/rs/tests/mobilenet.rs
================================================
use tract::prelude::*;

include!("../../tests/mobilenet/mod.rs");


================================================
FILE: api/src/lib.rs
================================================
use anyhow::{Result, ensure};
use boow::Bow;
use std::fmt::{Debug, Display};
use std::path::Path;

#[macro_use]
pub mod macros;
pub mod transform;

pub use transform::{ConcretizeSymbols, FloatPrecision, Pulse, TransformConfig, TransformSpec};

/// an implementation of tract's NNEF framework object
///
/// Entry point for NNEF model manipulation: loading from file, dumping to file.
pub trait NnefInterface: Debug + Sized {
    type Model: ModelInterface;
    /// Load a NNEF model from the path into a tract-core model.
    ///
    /// * `path` can point to a directory, a `tar` file or a `tar.gz` file.
    fn load(&self, path: impl AsRef<Path>) -> Result<Self::Model>;

    /// Load a NNEF model from a buffer into a tract-core model.
    ///
    /// data is the content of a NNEF model, as a `tar` file or a `tar.gz` file.
    fn load_buffer(&self, data: &[u8]) -> Result<Self::Model>;

    /// Allow the framework to use tract_core extensions instead of a stricter NNEF definition.
    fn enable_tract_core(&mut self) -> Result<()>;

    /// Allow the framework to use tract_extra extensions.
    fn enable_tract_extra(&mut self) -> Result<()>;

    /// Allow the framework to use tract_transformers extensions to support common transformer operators.
    fn enable_tract_transformers(&mut self) -> Result<()>;

    /// Allow the framework to use tract_onnx extensions to support operators in ONNX that are
    /// absent from NNEF.
    fn enable_onnx(&mut self) -> Result<()>;

    /// Allow the framework to use tract_pulse extensions to support stateful streaming operation.
    fn enable_pulse(&mut self) -> Result<()>;

    /// Allow the framework to use a tract-proprietary extension that can support special characters
    /// in node names. If disable, tract will replace everything by underscore '_' to keep
    /// compatibility with NNEF. If enabled, the extended syntax will be used, allowing to maintain
    /// the node names in serialized form.
    fn enable_extended_identifier_syntax(&mut self) -> Result<()>;

    /// Convenience function, similar with enable_tract_core but allowing method chaining.
    fn with_tract_core(mut self) -> Result<Self> {
        self.enable_tract_core()?;
        Ok(self)
    }

    /// Convenience function, similar with enable_tract_core but allowing method chaining.
    fn with_tract_extra(mut self) -> Result<Self> {
        self.enable_tract_extra()?;
        Ok(self)
    }

    /// Convenience function, similar with enable_tract_transformers but allowing method chaining.
    fn with_tract_transformers(mut self) -> Result<Self> {
        self.enable_tract_transformers()?;
        Ok(self)
    }

    /// Convenience function, similar with enable_onnx but allowing method chaining.
    fn with_onnx(mut self) -> Result<Self> {
        self.enable_onnx()?;
        Ok(self)
    }

    /// Convenience function, similar with enable_pulse but allowing method chaining.
    fn with_pulse(mut self) -> Result<Self> {
        self.enable_pulse()?;
        Ok(self)
    }

    /// Convenience function, similar with enable_extended_identifier_syntax but allowing method chaining.
    fn with_extended_identifier_syntax(mut self) -> Result<Self> {
        self.enable_extended_identifier_syntax()?;
        Ok(self)
    }

    /// Dump a TypedModel as a NNEF directory.
    ///
    /// `path` is the directory name to dump to
    fn write_model_to_dir(&self, path: impl AsRef<Path>, model: &Self::Model) -> Result<()>;

    /// Dump a TypedModel as a NNEF tar file.
    ///
    /// This function creates a plain, non-compressed, archive.
    ///
    /// `path` is the archive name
    fn write_model_to_tar(&self, path: impl AsRef<Path>, model: &Self::Model) -> Result<()>;
    fn write_model_to_tar_gz(&self, path: impl AsRef<Path>, model: &Self::Model) -> Result<()>;
}

pub trait OnnxInterface: Debug {
    type InferenceModel: InferenceModelInterface;
    fn load(&self, path: impl AsRef<Path>) -> Result<Self::InferenceModel>;
    /// Load a ONNX model from a buffer into an InferenceModel.
    fn load_buffer(&self, data: &[u8]) -> Result<Self::InferenceModel>;
}

pub trait InferenceModelInterface: Debug + Sized {
    type Model: ModelInterface;
    type InferenceFact: InferenceFactInterface;
    fn input_count(&self) -> Result<usize>;
    fn output_count(&self) -> Result<usize>;
    fn input_name(&self, id: usize) -> Result<String>;
    fn output_name(&self, id: usize) -> Result<String>;

    fn input_fact(&self, id: usize) -> Result<Self::InferenceFact>;

    fn set_input_fact(
        &mut self,
        id: usize,
        fact: impl AsFact<Self, Self::InferenceFact>,
    ) -> Result<()>;

    fn output_fact(&self, id: usize) -> Result<Self::InferenceFact>;

    fn set_output_fact(
        &mut self,
        id: usize,
        fact: impl AsFact<Self, Self::InferenceFact>,
    ) -> Result<()>;

    fn analyse(&mut self) -> Result<()>;

    fn into_model(self) -> Result<Self::Model>;
}

pub trait ModelInterface: Debug + Sized {
    type Fact: FactInterface;
    type Runnable: RunnableInterface;
    type Tensor: TensorInterface;
    fn input_count(&self) -> Result<usize>;

    fn output_count(&self) -> Result<usize>;

    fn input_name(&self, id: usize) -> Result<String>;

    fn output_name(&self, id: usize) -> Result<String>;

    fn input_fact(&self, id: usize) -> Result<Self::Fact>;

    fn output_fact(&self, id: usize) -> Result<Self::Fact>;

    fn into_runnable(self) -> Result<Self::Runnable>;

    fn transform(&mut self, spec: impl Into<TransformSpec>) -> Result<()>;

    fn property_keys(&self) -> Result<Vec<String>>;

    fn property(&self, name: impl AsRef<str>) -> Result<Self::Tensor>;

    fn parse_fact(&self, spec: &str) -> Result<Self::Fact>;

    fn input_facts(&self) -> Result<impl Iterator<Item = Self::Fact>> {
        Ok((0..self.input_count()?)
            .map(|ix| self.input_fact(ix))
            .collect::<Result<Vec<_>>>()?
            .into_iter())
    }

    fn output_facts(&self) -> Result<impl Iterator<Item = Self::Fact>> {
        Ok((0..self.output_count()?)
            .map(|ix| self.output_fact(ix))
            .collect::<Result<Vec<_>>>()?
            .into_iter())
    }
}

pub trait RuntimeInterface: Debug {
    type Runnable: RunnableInterface;
    type Model: ModelInterface;
    fn name(&self) -> Result<String>;
    fn prepare(&self, model: Self::Model) -> Result<Self::Runnable>;
}

pub trait RunnableInterface: Debug + Send + Sync {
    type Tensor: TensorInterface;
    type Fact: FactInterface;
    type State: StateInterface<Tensor = Self::Tensor>;
    fn run(&self, inputs: impl IntoInputs<Self::Tensor>) -> Result<Vec<Self::Tensor>> {
        self.spawn_state()?.run(inputs.into_inputs()?)
    }

    fn input_count(&self) -> Result<usize>;
    fn output_count(&self) -> Result<usize>;
    fn input_fact(&self, id: usize) -> Result<Self::Fact>;

    fn output_fact(&self, id: usize) -> Result<Self::Fact>;

    fn input_facts(&self) -> Result<impl Iterator<Item = Self::Fact>> {
        Ok((0..self.input_count()?)
            .map(|ix| self.input_fact(ix))
            .collect::<Result<Vec<_>>>()?
            .into_iter())
    }

    fn output_facts(&self) -> Result<impl Iterator<Item = Self::Fact>> {
        Ok((0..self.output_count()?)
            .map(|ix| self.output_fact(ix))
            .collect::<Result<Vec<_>>>()?
            .into_iter())
    }

    fn property_keys(&self) -> Result<Vec<String>>;
    fn property(&self, name: impl AsRef<str>) -> Result<Self::Tensor>;

    fn spawn_state(&self) -> Result<Self::State>;

    fn cost_json(&self) -> Result<String>;

    fn profile_json<I, IV, IE>(&self, inputs: Option<I>) -> Result<String>
    where
        I: IntoIterator<Item = IV>,
        IV: TryInto<Self::Tensor, Error = IE>,
        IE: Into<anyhow::Error> + Debug;
}

pub trait StateInterface: Debug {
    type Fact: FactInterface;
    type Tensor: TensorInterface;

    fn input_count(&self) -> Result<usize>;
    fn output_count(&self) -> Result<usize>;

    fn run(&mut self, inputs: impl IntoInputs<Self::Tensor>) -> Result<Vec<Self::Tensor>>;
}

pub trait TensorInterface: Debug + Sized + Clone + PartialEq + Send + Sync {
    fn datum_type(&self) -> Result<DatumType>;
    fn from_bytes(dt: DatumType, shape: &[usize], data: &[u8]) -> Result<Self>;
    fn as_bytes(&self) -> Result<(DatumType, &[usize], &[u8])>;

    fn from_slice<T: Datum>(shape: &[usize], data: &[T]) -> Result<Self> {
        let data = unsafe {
            std::slice::from_raw_parts(data.as_ptr() as *const u8, std::mem::size_of_val(data))
        };
        Self::from_bytes(T::datum_type(), shape, data)
    }

    fn as_slice<T: Datum>(&self) -> Result<&[T]> {
        let (dt, _shape, data) = self.as_bytes()?;
        ensure!(T::datum_type() == dt);
        let data = unsafe {
            std::slice::from_raw_parts(
                data.as_ptr() as *const T,
                data.len() / std::mem::size_of::<T>(),
            )
        };
        Ok(data)
    }

    fn as_shape_and_slice<T: Datum>(&self) -> Result<(&[usize], &[T])> {
        let (_, shape, _) = self.as_bytes()?;
        let data = self.as_slice()?;
        Ok((shape, data))
    }

    fn shape(&self) -> Result<&[usize]> {
        let (_, shape, _) = self.as_bytes()?;
        Ok(shape)
    }

    fn view<T: Datum>(&self) -> Result<ndarray::ArrayViewD<'_, T>> {
        let (shape, data) = self.as_shape_and_slice()?;
        Ok(unsafe { ndarray::ArrayViewD::from_shape_ptr(shape, data.as_ptr()) })
    }

    fn view1<T: Datum>(&self) -> Result<ndarray::ArrayView1<'_, T>> {
        Ok(self.view::<T>()?.into_dimensionality()?)
    }

    fn view2<T: Datum>(&self) -> Result<ndarray::ArrayView2<'_, T>> {
        Ok(self.view::<T>()?.into_dimensionality()?)
    }

    fn view3<T: Datum>(&self) -> Result<ndarray::ArrayView3<'_, T>> {
        Ok(self.view::<T>()?.into_dimensionality()?)
    }

    fn view4<T: Datum>(&self) -> Result<ndarray::ArrayView4<'_, T>> {
        Ok(self.view::<T>()?.into_dimensionality()?)
    }

    fn view5<T: Datum>(&self) -> Result<ndarray::ArrayView5<'_, T>> {
        Ok(self.view::<T>()?.into_dimensionality()?)
    }

    fn view6<T: Datum>(&self) -> Result<ndarray::ArrayView6<'_, T>> {
        Ok(self.view::<T>()?.into_dimensionality()?)
    }

    fn convert_to(&self, to: DatumType) -> Result<Self>;
}

pub trait FactInterface: Debug + Display + Clone {
    type Dim: DimInterface;
    fn datum_type(&self) -> Result<DatumType>;
    fn rank(&self) -> Result<usize>;
    fn dim(&self, axis: usize) -> Result<Self::Dim>;

    fn dims(&self) -> Result<impl Iterator<Item = Self::Dim>> {
        Ok((0..self.rank()?).map(|axis| self.dim(axis)).collect::<Result<Vec<_>>>()?.into_iter())
    }
}

pub trait DimInterface: Debug + Display + Clone {
    fn eval(&self, values: impl IntoIterator<Item = (impl AsRef<str>, i64)>) -> Result<Self>;
    fn to_int64(&self) -> Result<i64>;
}

pub trait InferenceFactInterface: Debug + Display + Default + Clone {
    fn empty() -> Result<Self>;
}

pub trait AsFact<M, F>: Debug {
    fn as_fact(&self, model: &M) -> Result<Bow<'_, F>>;
}

#[repr(C)]
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum DatumType {
    Bool = 0x01,
    U8 = 0x11,
    U16 = 0x12,
    U32 = 0x14,
    U64 = 0x18,
    I8 = 0x21,
    I16 = 0x22,
    I32 = 0x24,
    I64 = 0x28,
    F16 = 0x32,
    F32 = 0x34,
    F64 = 0x38,
    #[cfg(feature = "complex")]
    ComplexI16 = 0x42,
    #[cfg(feature = "complex")]
    ComplexI32 = 0x44,
    #[cfg(feature = "complex")]
    ComplexI64 = 0x48,
    #[cfg(feature = "complex")]
    ComplexF16 = 0x52,
    #[cfg(feature = "complex")]
    ComplexF32 = 0x54,
    #[cfg(feature = "complex")]
    ComplexF64 = 0x58,
}

impl DatumType {
    pub fn size_of(&self) -> usize {
        use DatumType::*;
        match &self {
            Bool | U8 | I8 => 1,
            U16 | I16 | F16 => 2,
            U32 | I32 | F32 => 4,
            U64 | I64 | F64 => 8,
            #[cfg(feature = "complex")]
            ComplexI16 | ComplexF16 => 4,
            #[cfg(feature = "complex")]
            ComplexI32 | ComplexF32 => 8,
            #[cfg(feature = "complex")]
            ComplexI64 | ComplexF64 => 16,
        }
    }

    pub fn is_bool(&self) -> bool {
        *self == DatumType::Bool
    }

    pub fn is_number(&self) -> bool {
        *self != DatumType::Bool
    }

    pub fn is_unsigned(&self) -> bool {
        use DatumType::*;
        *self == U8 || *self == U16 || *self == U32 || *self == U64
    }

    pub fn is_signed(&self) -> bool {
        use DatumType::*;
        *self == I8 || *self == I16 || *self == I32 || *self == I64
    }

    pub fn is_float(&self) -> bool {
        use DatumType::*;
        *self == F16 || *self == F32 || *self == F64
    }
}

pub trait Datum {
    fn datum_type() -> DatumType;
}

// IntoInputs trait — ergonomic input conversion for run()
pub trait IntoInputs<V: TensorInterface> {
    fn into_inputs(self) -> Result<Vec<V>>;
}

// Arrays of anything convertible to Tensor
impl<V, T, E, const N: usize> IntoInputs<V> for [T; N]
where
    V: TensorInterface,
    T: TryInto<V, Error = E>,
    E: Into<anyhow::Error>,
{
    fn into_inputs(self) -> Result<Vec<V>> {
        self.into_iter().map(|v| v.try_into().map_err(|e| e.into())).collect()
    }
}

// Vec<V> passthrough
impl<V: TensorInterface> IntoInputs<V> for Vec<V> {
    fn into_inputs(self) -> Result<Vec<V>> {
        Ok(self)
    }
}

// Tuples — each element converts independently
macro_rules! impl_into_inputs_tuple {
    ($($idx:tt : $T:ident),+) => {
        impl<V, $($T),+> IntoInputs<V> for ($($T,)+)
        where
            V: TensorInterface,
            $($T: TryInto<V>,
              <$T as TryInto<V>>::Error: Into<anyhow::Error>,)+
        {
            fn into_inputs(self) -> Result<Vec<V>> {
                Ok(vec![$(self.$idx.try_into().map_err(|e| e.into())?),+])
            }
        }
    };
}

impl_into_inputs_tuple!(0: A);
impl_into_inputs_tuple!(0: A, 1: B);
impl_into_inputs_tuple!(0: A, 1: B, 2: C);
impl_into_inputs_tuple!(0: A, 1: B, 2: C, 3: D);
impl_into_inputs_tuple!(0: A, 1: B, 2: C, 3: D, 4: E_);
impl_into_inputs_tuple!(0: A, 1: B, 2: C, 3: D, 4: E_, 5: F);
impl_into_inputs_tuple!(0: A, 1: B, 2: C, 3: D, 4: E_, 5: F, 6: G);
impl_into_inputs_tuple!(0: A, 1: B, 2: C, 3: D, 4: E_, 5: F, 6: G, 7: H);

/// Convert any compatible input into a `V: TensorInterface`.
pub fn tensor<V, T, E>(v: T) -> Result<V>
where
    V: TensorInterface,
    T: TryInto<V, Error = E>,
    E: Into<anyhow::Error>,
{
    v.try_into().map_err(|e| e.into())
}

macro_rules! impl_datum_type {
    ($ty:ty, $c_repr:expr) => {
        impl Datum for $ty {
            fn datum_type() -> DatumType {
                $c_repr
            }
        }
    };
}

impl_datum_type!(bool, DatumType::Bool);
impl_datum_type!(u8, DatumType::U8);
impl_datum_type!(u16, DatumType::U16);
impl_datum_type!(u32, DatumType::U32);
impl_datum_type!(u64, DatumType::U64);
impl_datum_type!(i8, DatumType::I8);
impl_datum_type!(i16, DatumType::I16);
impl_datum_type!(i32, DatumType::I32);
impl_datum_type!(i64, DatumType::I64);
impl_datum_type!(half::f16, DatumType::F16);
impl_datum_type!(f32, DatumType::F32);
impl_datum_type!(f64, DatumType::F64);


================================================
FILE: api/src/macros.rs
================================================
#[macro_export]
macro_rules! as_inference_fact_impl {
    ($IM:ident, $IF: ident) => {
        impl AsFact<$IM, $IF> for $IF {
            fn as_fact(&self, _model: &$IM) -> Result<boow::Bow<$IF>> {
                Ok(boow::Bow::Borrowed(self))
            }
        }

        impl AsFact<$IM, $IF> for &str {
            fn as_fact(&self, model: &$IM) -> Result<boow::Bow<$IF>> {
                Ok(boow::Bow::Owned($IF::new(model, self)?))
            }
        }

        impl AsFact<$IM, $IF> for () {
            fn as_fact(&self, model: &$IM) -> Result<boow::Bow<$IF>> {
                Ok(boow::Bow::Owned($IF::new(model, "")?))
            }
        }

        impl AsFact<$IM, $IF> for Option<&str> {
            fn as_fact(&self, model: &$IM) -> Result<boow::Bow<$IF>> {
                if let Some(it) = self {
                    Ok(boow::Bow::Owned($IF::new(model, it)?))
                } else {
                    Ok(boow::Bow::Owned($IF::new(model, "")?))
                }
            }
        }
    };
}

#[macro_export]
macro_rules! as_fact_impl {
    ($M:ident, $F: ident) => {
        impl AsFact<$M, $F> for $F {
            fn as_fact(&self, _model: &$M) -> Result<boow::Bow<$F>> {
                Ok(boow::Bow::Borrowed(self))
            }
        }

        impl AsFact<$M, $F> for &str {
            fn as_fact(&self, model: &$M) -> Result<boow::Bow<$F>> {
                Ok(boow::Bow::Owned($F::new(model, self)?))
            }
        }
    };
}

#[macro_export]
macro_rules! tensor_from_to_ndarray {
    () => {
        impl<T, S, D> TryFrom<ndarray::ArrayBase<S, D>> for Tensor
        where
            T: $crate::Datum + Clone + 'static,
            S: RawData<Elem = T> + Data,
            D: Dimension,
        {
            type Error = anyhow::Error;
            fn try_from(view: ndarray::ArrayBase<S, D>) -> Result<Tensor> {
                if let Some(slice) = view.as_slice_memory_order()
                    && (0..view.ndim()).all(|ix| {
                        view.strides().get(ix + 1).is_none_or(|next| *next <= view.strides()[ix])
                    })
                {
                    Tensor::from_slice(view.shape(), slice)
                } else {
                    let slice: Vec<_> = view.iter().cloned().collect();
                    Tensor::from_slice(view.shape(), &slice)
                }
            }
        }

        impl<'a, T: $crate::Datum> TryFrom<&'a Tensor> for ndarray::ArrayViewD<'a, T> {
            type Error = anyhow::Error;
            fn try_from(value: &'a Tensor) -> Result<Self, Self::Error> {
                value.view()
            }
        }
    };
}


================================================
FILE: api/src/transform.rs
================================================
use std::collections::HashMap;

use crate::DatumType;

/// A serialized transform specification passed to `ModelInterface::transform`.
///
/// Wraps the string representation expected by the transform registry.
/// Constructed from raw strings or typed config structs implementing [`TransformConfig`].
#[derive(Debug, Clone)]
pub struct TransformSpec(String);

impl TransformSpec {
    /// Produce the string the transform registry expects.
    pub fn to_transform_string(&self) -> String {
        self.0.clone()
    }
}

impl From<&str> for TransformSpec {
    fn from(s: &str) -> Self {
        TransformSpec(s.to_string())
    }
}

impl From<String> for TransformSpec {
    fn from(s: String) -> Self {
        TransformSpec(s)
    }
}

impl From<&String> for TransformSpec {
    fn from(s: &String) -> Self {
        TransformSpec(s.clone())
    }
}

/// Trait for typed transform configurations.
///
/// Implementors derive [`serde::Serialize`] and provide a transform [`name()`](TransformConfig::name).
/// The default [`to_transform_string()`](TransformConfig::to_transform_string) serializes the
/// struct as a JSON object and injects the `"name"` key.
pub trait TransformConfig: serde::Serialize {
    /// The transform registry name (e.g. `"pulse"`, `"float_precision"`).
    fn name(&self) -> &'static str;

    /// Produce the string the transform registry expects.
    ///
    /// The default implementation serializes `self` to a JSON object and inserts `"name"`.
    fn to_transform_string(&self) -> String {
        let mut obj: serde_json::Map<String, serde_json::Value> = serde_json::to_value(self)
            .expect("TransformConfig serialization cannot fail")
            .as_object()
            .expect("TransformConfig must serialize to a JSON object")
            .clone();
        obj.insert("name".into(), serde_json::Value::String(self.name().to_string()));
        serde_json::to_string(&obj).expect("serialization cannot fail")
    }
}

/// Implements [`TransformConfig`] and `From<$ty> for TransformSpec`.
macro_rules! transform_config {
    ($ty:ty, $name:expr) => {
        impl TransformConfig for $ty {
            fn name(&self) -> &'static str {
                $name
            }
        }

        impl From<$ty> for TransformSpec {
            fn from(config: $ty) -> Self {
                TransformSpec(config.to_transform_string())
            }
        }
    };
}

/// Typed config for the `concretize_symbols` transform.
///
/// Replaces symbolic dimensions with concrete integer values.
///
/// # Example
/// ```ignore
/// model.transform(ConcretizeSymbols::new().value("B", 1))?;
/// ```
#[derive(Debug, Clone, Default, serde::Serialize)]
pub struct ConcretizeSymbols {
    values: HashMap<String, i64>,
}

impl ConcretizeSymbols {
    pub fn new() -> Self {
        Self::default()
    }

    /// Set a symbol to a concrete value.
    pub fn value(mut self, symbol: impl Into<String>, val: i64) -> Self {
        self.values.insert(symbol.into(), val);
        self
    }
}

transform_config!(ConcretizeSymbols, "concretize_symbols");

/// Typed config for the `pulse` transform.
///
/// Converts a model to a pulsed (streaming) model.
///
/// # Example
/// ```ignore
/// model.transform(Pulse::new("5").symbol("B"))?;
/// ```
#[derive(Debug, Clone, serde::Serialize)]
pub struct Pulse {
    pulse: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    symbol: Option<String>,
}

impl Pulse {
    /// Create a new Pulse config with the given pulse dimension.
    pub fn new(pulse: impl Into<String>) -> Self {
        Self { pulse: pulse.into(), symbol: None }
    }

    /// Set the symbol to pulse over (defaults to "S" if not set).
    pub fn symbol(mut self, symbol: impl Into<String>) -> Self {
        self.symbol = Some(symbol.into());
        self
    }
}

transform_config!(Pulse, "pulse");

/// Typed config for the `float_precision` transform.
///
/// Changes the float precision of a model (e.g. F32 to F16).
///
/// # Example
/// ```ignore
/// use tract_api::DatumType;
/// model.transform(FloatPrecision::new(DatumType::F32, DatumType::F16))?;
/// ```
#[derive(Debug, Clone, serde::Serialize)]
pub struct FloatPrecision {
    from: String,
    to: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    include: Option<Vec<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    exclude: Option<Vec<String>>,
}

fn datum_type_to_str(dt: DatumType) -> &'static str {
    match dt {
        DatumType::F16 => "f16",
        DatumType::F32 => "f32",
        DatumType::F64 => "f64",
        _ => panic!("FloatPrecision only supports float datum types (F16, F32, F64)"),
    }
}

impl FloatPrecision {
    pub fn new(from: DatumType, to: DatumType) -> Self {
        Self {
            from: datum_type_to_str(from).to_string(),
            to: datum_type_to_str(to).to_string(),
            include: None,
            exclude: None,
        }
    }

    /// Set include patterns — only nodes matching at least one pattern are translated.
    pub fn include(mut self, patterns: Vec<String>) -> Self {
        self.include = Some(patterns);
        self
    }

    /// Set exclude patterns — matching nodes are excluded from translation.
    pub fn exclude(mut self, patterns: Vec<String>) -> Self {
        self.exclude = Some(patterns);
        self
    }
}

transform_config!(FloatPrecision, "float_precision");


================================================
FILE: api/tests/mobilenet/mod.rs
================================================
use std::sync::Once;

fn grace_hopper() -> Tensor {
    let data = std::fs::read("../tests/grace_hopper_3_224_224.f32.raw").unwrap();
    let data: &[f32] = unsafe { std::slice::from_raw_parts(data.as_ptr() as _, 3 * 224 * 224) };
    Tensor::from_slice(&[1, 3, 224, 224], data).unwrap()
}

fn ensure_models() -> anyhow::Result<()> {
    static START: Once = Once::new();
    START.call_once(|| {
        let _ = rustls::crypto::ring::default_provider().install_default();
        for (url, file) in [
            (
                "https://s3.amazonaws.com/tract-ci-builds/tests/mobilenetv2-7.onnx",
                "mobilenetv2-7.onnx",
            ),
            (
                "https://s3.amazonaws.com/tract-ci-builds/tests/mobilenet_v2_1.0.onnx.nnef.tgz",
                "mobilenet_v2_1.0.onnx.nnef.tgz",
            ),
        ] {
            if std::fs::metadata(file).is_err() {
                let client = reqwest::blocking::Client::new();
                let model = client.get(url).send().unwrap();
                std::fs::write(file, model.bytes().unwrap()).unwrap();
            }
        }
    });
    Ok(())
}

#[test]
fn test_onnx() -> anyhow::Result<()> {
    ensure_models()?;
    let model = onnx()?.load("mobilenetv2-7.onnx")?.into_model()?.into_runnable()?;
    let hopper = grace_hopper();
    let result = model.run([hopper])?;
    let result = result[0].view::<f32>()?;
    let best = result
        .as_slice()
        .unwrap()
        .iter()
        .enumerate()
        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
        .unwrap();
    assert_eq!(best.0, 652);
    Ok(())
}

#[test]
fn test_state() -> anyhow::Result<()> {
    ensure_models()?;
    let model = onnx()?.load("mobilenetv2-7.onnx")?.into_model()?.into_runnable()?;
    let mut state = model.spawn_state()?;
    let hopper = grace_hopper();
    let result = state.run([hopper])?;
    let result = result[0].view::<f32>()?;
    let best = result
        .as_slice()
        .unwrap()
        .iter()
        .enumerate()
        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
        .unwrap();
    assert_eq!(best.0, 652);
    Ok(())
}

#[test]
fn test_nnef() -> anyhow::Result<()> {
    ensure_models()?;
    let model = nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;
    let hopper = grace_hopper();
    let result = model.run([hopper])?;
    assert_eq!(result[0].datum_type()?, f32::datum_type());
    let result = result[0].view::<f32>()?;
    let best = result
        .as_slice()
        .unwrap()
        .iter()
        .enumerate()
        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
        .unwrap();
    assert_eq!(best.0, 652);
    Ok(())
}

#[test]
fn test_inference_model() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    assert_eq!(model.input_count().unwrap(), 1);
    assert_eq!(model.output_count().unwrap(), 1);
    assert_eq!(model.input_name(0).unwrap(), "data");
    assert_eq!(model.output_name(0).unwrap(), "mobilenetv20_output_flatten0_reshape0");
    assert_eq!(model.input_fact(0).unwrap().to_string(), "1,3,224,224,f32");
    model.set_input_fact(0, "1,3,224,224,f32")?;
    let model = model.into_model()?.into_runnable()?;
    let hopper = grace_hopper();
    let result = model.run([hopper])?;
    let view = result[0].view::<f32>()?;
    let best = view
        .as_slice()
        .unwrap()
        .iter()
        .enumerate()
        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
        .unwrap();
    assert_eq!(best.0, 652);
    Ok(())
}


#[test]
fn test_typed_model() -> anyhow::Result<()> {
    ensure_models()?;
    let model = nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?;
    assert_eq!(model.input_count()?, 1);
    assert_eq!(model.output_count()?, 1);
    assert_eq!(model.input_name(0)?, "data");
    assert_eq!(model.output_name(0)?, "conv_53");
    assert_eq!(model.input_fact(0)?.to_string(), "1,3,224,224,f32");
    assert_eq!(model.output_fact(0)?.to_string(), "1,1000,f32");
    Ok(())
}

#[test]
fn test_runtime() -> anyhow::Result<()> {
    ensure_models()?;
    let model = nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?;
    let rt = runtime_for_name("default")?;
    let runnable = rt.prepare(model)?;
    let hopper = grace_hopper();
    let _result = runnable.run([hopper])?;
    Ok(())
}


#[test]
fn test_concretize() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f32");
    typed.transform(ConcretizeSymbols::new().value("B", 1))?;
    assert_eq!(typed.input_fact(0)?.to_string(), "1,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "1,1000,f32");
    Ok(())
}

#[test]
fn test_concretize_raw_string() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    typed.transform(r#"{"name":"concretize_symbols","values":{"B":1}}"#)?;
    assert_eq!(typed.input_fact(0)?.to_string(), "1,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "1,1000,f32");
    Ok(())
}

#[test]
fn test_pulse() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f32");
    typed.transform(Pulse::new("5").symbol("B"))?;
    assert_eq!(typed.input_fact(0)?.to_string(), "5,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "5,1000,f32");
    let mut properties = typed.property_keys()?;
    properties.sort();
    assert_eq!(&properties, &["pulse.delay", "pulse.input_axes", "pulse.output_axes"]);
    assert_eq!(typed.property("pulse.delay")?.view::<i64>()?, ndarray::arr1(&[0i64]).into_dyn());
    Ok(())
}

#[test]
fn test_pulse_raw_string() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    typed.transform(r#"{"name":"pulse","symbol":"B","pulse":"5"}"#)?;
    assert_eq!(typed.input_fact(0)?.to_string(), "5,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "5,1000,f32");
    Ok(())
}

#[test]
fn test_runtime_fact() -> anyhow::Result<()> {
    ensure_models()?;
    let runnable = nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;
    assert_eq!(runnable.input_fact(0)?.to_string(), "1,3,224,224,f32");
    assert_eq!(runnable.output_fact(0)?.to_string(), "1,1000,f32");
    Ok(())
}

#[test]
fn test_runtime_properties() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    typed.transform(r#"{"name":"pulse","symbol":"B","pulse":"5"}"#)?;
    let runnable = typed.into_runnable()?;
    let mut properties = runnable.property_keys()?;
    properties.sort();
    assert_eq!(&properties, &["pulse.delay", "pulse.input_axes", "pulse.output_axes"]);
    assert_eq!(runnable.property("pulse.delay")?.view::<i64>()?, ndarray::arr1(&[0i64]).into_dyn());
    Ok(())
}

#[test]
fn test_f32_to_f16() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    typed.transform(FloatPrecision::new(
        DatumType::F32,
        DatumType::F16,
    ))?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f16");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f16");
    Ok(())
}

#[test]
fn test_f32_to_f16_raw_string() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    typed.transform("f32_to_f16")?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f16");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f16");
    Ok(())
}

#[test]
fn test_f16_to_f32() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;

    // Convert model to half
    typed.transform(FloatPrecision::new(
        DatumType::F32,
        DatumType::F16,
    ))?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f16");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f16");

    // Convert back to f32
    typed.transform(FloatPrecision::new(
        DatumType::F16,
        DatumType::F32,
    ))?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f32");
    Ok(())
}

#[test]
fn test_f16_to_f32_raw_string() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let mut typed = model.into_model()?;
    typed.transform("f32_to_f16")?;
    typed.transform("f16_to_f32")?;
    assert_eq!(typed.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(typed.output_fact(0)?.to_string(), "B,1000,f32");
    Ok(())
}

#[test]
fn test_typed_model_to_nnef_and_back() -> anyhow::Result<()> {
    ensure_models()?;
    let mut model = onnx()?.load("mobilenetv2-7.onnx")?;
    model.set_input_fact(0, "B,3,224,224,f32")?;
    model.analyse()?;
    let typed = model.into_model()?;
    let dir = tempfile::tempdir()?;
    let nnef = nnef()?.with_tract_core()?;

    let path = dir.path().join("nnef-dir");
    nnef.write_model_to_dir(&path, &typed)?;
    let reloaded = nnef.load(path)?;
    assert_eq!(reloaded.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(reloaded.output_fact(0)?.to_string(), "B,1000,f32");

    let path = dir.path().join("nnef.tar");
    nnef.write_model_to_tar(&path, &typed)?;
    let reloaded = nnef.load(path)?;
    assert_eq!(reloaded.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(reloaded.output_fact(0)?.to_string(), "B,1000,f32");

    let path = dir.path().join("nnef.tar.gz");
    nnef.write_model_to_tar_gz(&path, &typed)?;
    let reloaded = nnef.load(path)?;
    assert_eq!(reloaded.input_fact(0)?.to_string(), "B,3,224,224,f32");
    assert_eq!(reloaded.output_fact(0)?.to_string(), "B,1000,f32");
    Ok(())
}

#[test]
fn test_cost() -> anyhow::Result<()> {
    ensure_models()?;
    let model = nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;
    let profile = model.cost_json()?;
    let json: serde_json::Value = serde_json::from_str(&profile)?;
    let nodes = json.get("nodes").unwrap().as_array().unwrap();
    assert!(nodes.len() > 10);
    let node = nodes[0].as_object().unwrap();
    assert!(node["node_name"].as_str().unwrap() != "");
    assert!(node["op_name"].as_str().unwrap() != "");
    assert!(
        nodes
            .iter()
            .find_map(|n| n.get("cost").and_then(|c| c.as_object().unwrap().get("FMA(F32)")))
            .is_some()
    );
    Ok(())
}

#[test]
fn test_profile() -> anyhow::Result<()> {
    ensure_models()?;
    let model = nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;
    let data = ndarray::ArrayD::<f32>::zeros(vec![1, 3, 224, 224]);
    let profile = model.profile_json(Some([data]))?;
    let profile: serde_json::Value = serde_json::from_str(&profile)?;
    let profiling_info = profile["profiling_info"].as_object().unwrap();
    assert!(profiling_info["iterations"].as_i64().unwrap() >= 1);
    let nodes = profile.get("nodes").unwrap().as_array().unwrap();
    assert!(nodes.iter().find_map(|n| n.get("secs_per_iter").and_then(|c| c.as_f64())).is_some());
    Ok(())
}

#[test]
fn test_transform_registry() -> anyhow::Result<()> {
    ensure_models()?;

    let nnef = nnef()?.with_tract_core()?;
    let mut model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")?;

    // Convert model to half
    model.transform("f32_to_f16")?;
    assert_eq!(model.input_fact(0)?.to_string(), "1,3,224,224,f16");
    assert_eq!(model.output_fact(0)?.to_string(), "1,1000,f16");

    // Convert back to f32
    model.transform("f16_to_f32")?;
    assert_eq!(model.input_fact(0)?.to_string(), "1,3,224,224,f32");
    assert_eq!(model.output_fact(0)?.to_string(), "1,1000,f32");
    Ok(())
}

#[test]
fn test_fact_and_dims() -> anyhow::Result<()> {
    ensure_models()?;
    let nnef = nnef()?.with_tract_core()?;
    let model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")?;
    let fact = model.parse_fact("B,S+P,64,f32")?;
    assert_eq!(fact.datum_type()?, f32::datum_type());
    assert_eq!(fact.rank()?, 3);
    assert_eq!(fact.dim(1)?.to_string(), "S+P");
    let s_plus_p = fact.dim(1)?;
    let s_plus_twelve = s_plus_p.eval([("P", 12)])?;
    assert_eq!(s_plus_twelve.to_string(), "S+12");
    let fourteen = s_plus_twelve.eval([("S", 2)])?;
    assert_eq!(fourteen.to_int64()?, 14);
    Ok(())
}

#[test]
fn test_fact_and_dims_iterators() -> anyhow::Result<()> {
    ensure_models()?;
    let nnef = nnef()?.with_tract_core()?;
    let model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")?;
    let fact = model.input_facts()?.collect::<Vec<_>>();
    assert!(fact.len() == 1);
    let dims = fact[0].dims()?.collect::<Vec<_>>();
    assert_eq!(dims.len(), 4);
    assert_eq!(dims[0].to_string(), "1");
    assert_eq!(dims[1].to_string(), "3");
    assert_eq!(dims[2].to_string(), "224");
    assert_eq!(dims[3].to_string(), "224");
    Ok(())
}

#[test]
fn test_runtime_fact_iterator() -> anyhow::Result<()> {
    ensure_models()?;
    let nnef = nnef()?.with_tract_core()?;
    let runnable = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;
    let inputs = runnable.input_facts()?.collect::<Vec<_>>();
    assert!(inputs.len() == 1);
    assert_eq!(inputs[0].to_string(), "1,3,224,224,f32");
    let outputs = runnable.output_facts()?.collect::<Vec<_>>();
    assert!(outputs.len() == 1);
    assert_eq!(outputs[0].to_string(), "1,1000,f32");
    Ok(())
}

#[test]
fn test_tensor_methods() -> anyhow::Result<()> {
    let floats: Tensor = tensor(ndarray::prelude::arr1(&[-1f32, -0.3, 0., 0.25, 0.75, 1.2]))?;
    assert!(floats.datum_type()?.is_float());
    let ints = floats.convert_to(i8::datum_type())?;
    assert!(ints.datum_type()?.is_signed());
    assert_eq!(ints.view::<i8>()?.as_slice().unwrap(), &[-1, 0, 0, 0, 0, 1]);
    let same: Tensor = tensor(ndarray::prelude::arr1(&[-1f32, -0.3, 0., 0.25, 0.75, 1.2]))?;
    assert_eq!(floats, same);
    Ok(())
}


================================================
FILE: ci/tract-ci-minion/.gitignore
================================================
minion.toml


================================================
FILE: ci/tract-ci-minion/Cargo.toml
================================================
[package]
name = "tract-ci-minion"
version = "0.20.7-pre"
edition = "2024"

[workspace]
members = []

[dependencies]
clap = { version="4", features = ["derive"]}
dirs = "3"
fs2 = "0"
rust-s3 = { version = "0.26.4", default-features = false, features = [ "rustls-tls" ] }
serde = { version = "1.0", features = ["derive"] }
toml = "0.5"
anyhow = "1.0.42"
log = "0.4.14"
env_logger = "0.11"
pipe = "0.4.0"
flate2 = "1.0.20"
tar = "0.4.35"
daemonize = "0.5"
libc = "0.2.119"
wait-timeout = "0.2.0"
lazy_static = "1.4.0"
simple-signal = "1"


================================================
FILE: ci/tract-ci-minion/minion.toml.example
================================================
id = "tsar"
workdir = "tmp/workdir"
platform = "test"
graphite = { host = "...", port = 2003, prefix = "tract" }

# MINION_ID=
# LOCKFILE=/tmp/minion-lock
# PLATFORM=raspbian
# GRAPHITE_HOST=graphite-proxy.snips.net
# GRAPHITE_PORT=2003
# GRAPHITE_PREFIX=tract
# 
# S3PATH_TASKS=tract-ci-builds/tasks
# S3PATH_LOGS=tract-ci-builds/logs
# S3PATH_RESULTS=tract-ci-builds/logs
# WORKDIR=$HOME/tract-minion
# CACHEDIR=$WORKDIR/cache


================================================
FILE: ci/tract-ci-minion/src/main.rs
================================================
use anyhow::{bail, Context, Result};
use clap::Parser;
use flate2::read::GzEncoder;
use fs2::FileExt;
use s3::creds::Credentials;
use s3::serde_types::Object;
use s3::Bucket;
use s3::Region;
use serde::{Deserialize, Deserializer};
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::net::TcpStream;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicI32;
use std::time::Duration;
use wait_timeout::ChildExt;

lazy_static::lazy_static! {
    static ref CHILD: std::sync::Arc<AtomicI32> = std::sync::Arc::new(AtomicI32::new(0));
}

#[derive(Deserialize, Debug)]
struct Config {
    id: String,
    #[serde(default = "default_workdir")]
    workdir: PathBuf,
    #[serde(default = "default_region", deserialize_with = "deser_region")]
    region: Region,
    aws_credentials: Option<AwsCredentials>,
    #[serde(default = "default_bucket")]
    s3_bucket: String,
    #[serde(default = "default_tasks")]
    s3_tasks: String,
    #[serde(default = "default_logs")]
    s3_logs: String,
    #[serde(default = "default_products")]
    s3_products: String,
    platform: String,
    graphite: Option<Graphite>,
    #[serde(default = "default_idle_sleep_secs")]
    idle_sleep_secs: usize,
    #[serde(default)]
    env: HashMap<String, String>,
    #[serde(default = "default_timeout_runtime_secs")]
    timeout_runtime_secs: usize,
}

#[derive(Deserialize, Debug)]
struct Graphite {
    host: String,
    port: u16,
    prefix: String,
}

#[derive(Deserialize)]
struct AwsCredentials {
    access_key: String,
    secret_key: String,
}

impl std::fmt::Debug for AwsCredentials {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "AwsCredentials {{ access_key: {}, secret_key: <...> }}", self.access_key)
    }
}

fn default_workdir() -> PathBuf {
    dirs::home_dir().expect("HOME does not exist").join("tract-minion")
}

fn default_region() -> Region {
    Region::UsEast1
}

fn default_bucket() -> String {
    "tract-ci-builds".to_string()
}

fn default_tasks() -> String {
    "tasks".to_string()
}

fn default_logs() -> String {
    "logs".to_string()
}

fn default_products() -> String {
    "products".to_string()
}

fn default_idle_sleep_secs() -> usize {
    5 * 60
}

fn default_timeout_runtime_secs() -> usize {
    300
}

fn deser_region<'de, D>(d: D) -> Result<Region, D::Error>
where
    D: Deserializer<'de>,
{
    use serde::de::Error;
    let s: &str = Deserialize::deserialize(d)?;
    s.parse().map_err(D::Error::custom)
}

fn config_path() -> PathBuf {
    if let Ok(c) = std::env::var("TRACT_MINION_CONFIG") {
        PathBuf::from(c)
    } else if std::path::Path::new("minion.toml").exists() {
        PathBuf::from("minion.toml")
    } else {
        dirs::home_dir().context("HOME does not exist").unwrap().join(".minion.toml")
    }
}

fn read_config(path: impl AsRef<Path>) -> Result<Config> {
    let text = std::fs::read_to_string(&path)
        .with_context(|| format!("Opening config {:?}", path.as_ref()))?;
    let config = toml::from_str(&text)
        .with_context(|| format!("Parsing configuration file {:?}", path.as_ref()))?;
    Ok(config)
}

fn dl_task(task_name: &str) -> Result<()> {
    let config = config()?;
    let bucket = bucket(&config)?;
    let task_url = std::path::PathBuf::from(&config.s3_tasks)
        .join(&config.platform)
        .join(task_name)
        .with_extension("tgz");
    log::info!("Downloading task {}", task_name);
    let task_dir = config.workdir.join("current");
    if task_dir.exists() {
        std::fs::remove_dir_all(&task_dir)?;
    }
    std::fs::create_dir_all(&task_dir)?;
    let (reader, mut writer) = pipe::pipe_buffered();
    let task_dir_2 = task_dir.clone();
    let bucket_2 = bucket.clone();
    std::thread::spawn(move || {
        bucket_2.get_object_stream_blocking(task_url.to_str().unwrap(), &mut writer).unwrap();
    });
    let uncompressed = flate2::read::GzDecoder::new(reader);
    tar::Archive::new(uncompressed).unpack(task_dir_2)?;
    log::info!("Download {} ok", task_name);
    Ok(())
}

fn vars(task_name: &str) -> Result<HashMap<String, String>> {
    let config = config()?;
    let task_dir = config.workdir.join("current");
    let vars_file = task_dir.join(task_name).join("vars");
    let mut vars: HashMap<String, String> = config.env.clone();
    if vars_file.exists() {
        log::debug!("Reading vars...");
        for line in std::fs::read_to_string(vars_file)?.lines() {
            if line.starts_with("export ") {
                let mut pair = line.split_whitespace().nth(1).unwrap().split("=");
                vars.insert(pair.next().unwrap().to_string(), pair.next().unwrap().to_string());
            }
        }
    } else {
        log::info!("No vars file");
    }
    Ok(vars)
}

fn run_task(task_name: &str, timeout: Duration) -> Result<()> {
    let config = config()?;
    let bucket = bucket(&config)?;
    log::info!("Running task {}", task_name);
    let task_dir = config.workdir.join("current");
    let vars: HashMap<String, String> = vars(task_name)?;
    let mut cmd = std::process::Command::new("sh");
    cmd.current_dir(task_dir.join(task_name))
        .arg("-c")
        .arg("./entrypoint.sh 2> stderr.log > stdout.log");
    log::info!("Running {:?}", cmd);
    cmd.envs(&vars); // do not log env as it may contain sensitive vars
    let mut child = cmd.spawn()?;
    let result = child.wait_timeout(timeout)?;
    let Some(status) = result else {
        let _ = child.kill();
        log::info!("Script timeout, sending a kill, waiting...");
        child.wait()?;
        bail!("entrypoint.sh script timeout")
    };
    if !status.success() {
        bail!("entrypoint.sh script failed: {status:?}")
    }
    log::info!("entrypoint.sh ran successfully");
    for log in &["stderr.log", "stdout.log"] {
        let local_path = task_dir.join(task_name).join(log);
        if local_path.exists() {
            let s3name = Path::new(&config.s3_logs)
                .join(&config.id)
                .join(task_name)
                .join(log)
                .with_extension("gz");
            log::info!("Uploading {} to {:?}", log, s3name);
            let mut gz =
                GzEncoder::new(std::fs::File::open(&local_path)?, flate2::Compression::default());
            let mut content = vec![];
            gz.write_all(&mut content)?;
            bucket
                .put_object_blocking(s3name.to_str().unwrap(), &content)
                .with_context(|| format!("uploading {}", s3name.to_str().unwrap()))?;
        } else {
            log::info!("Could not find {}", log);
        }
    }
    let metrics_files = task_dir.join(task_name).join("metrics");
    if metrics_files.exists() {
        if let Some(gr) = &config.graphite {
            let prefix = format!(
                "{}.{}.{}.{}",
                gr.prefix, config.platform, config.id, vars["TRAVIS_BRANCH_SANE"]
            )
            .replace("-", "_");
            let mut socket = TcpStream::connect((gr.host.clone(), gr.port))
                .with_context(|| format!("Opening socket to {:?}", gr))?;
            let ts = &vars["TIMESTAMP"];
            for line in std::fs::read_to_string(metrics_files)?.lines() {
                let mut tokens = line.split_whitespace();
                let graphite = format!(
                    "{}.{} {} {}",
                    prefix,
                    tokens.next().unwrap().replace("-", "_"),
                    tokens.next().unwrap(),
                    ts
                );
                log::trace!("Sending to graphite: {graphite}");
                writeln!(socket, "{graphite}").context("Writing to graphite socket")?;
            }
        }
    }
    let product_dir = task_dir.join(task_name).join("product");
    if product_dir.exists() {
        let tar_name = format!("{}.{}", task_name, config.id);
        let s3name =
            Path::new(&config.s3_products).join(&config.id).join(task_name).with_extension("tgz");
        let mut buf = vec![];
        let tgz = flate2::write::GzEncoder::new(&mut buf, flate2::Compression::default());
        tar::Builder::new(tgz).append_dir_all(tar_name, product_dir)?;
        bucket
            .put_object_blocking(s3name.to_str().unwrap(), &buf)
            .with_context(|| format!("uploading {}", s3name.to_str().unwrap()))?;
    }
    Ok(())
}

#[derive(Debug)]
struct Timeout;
impl std::error::Error for Timeout {}

impl std::fmt::Display for Timeout {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Task imeout!")
    }
}

fn consider_task(config: &Config, task: &Object) -> Result<bool> {
    let task_path = Path::new(&task.key);
    let task_name = task_path
        .file_stem()
        .context("Filename can not be splitted")?
        .to_str()
        .unwrap()
        .to_string();
    let done_file = config.workdir.join("taskdone").join(&task_name);
    if done_file.exists() {
        return Ok(false);
    }
    for attempt in 0.. {
        // match subcommand("download-task", &task_name, Duration::from_secs(60)) {
        match dl_task(&task_name) {
            Err(e) if e.root_cause().is::<Timeout>() && attempt < 5 => continue,
            Err(e) => Err(e)?,
            Ok(()) => break,
        }
    }

    let vars = vars(&task_name)?;
    let timeout =
        vars.get("TIMEOUT").map(|s| s.parse()).transpose()?.unwrap_or(config.timeout_runtime_secs)
            as u64;
    // let result = subcommand("run-task", &task_name, Duration::from_secs(timeout));
    let result = run_task(&task_name, Duration::from_secs(timeout));
    std::fs::File::create(&done_file)?;
    result.map(|_| true)
}

fn run(config: &Config) -> Result<bool> {
    std::thread::sleep(Duration::from_secs(10));
    let bucket = bucket(config)?;
    let tasks_prefix = std::path::PathBuf::from(&config.s3_tasks).join(&config.platform).join("");
    let objects_parts =
        bucket.list_blocking(tasks_prefix.to_str().unwrap().to_string(), Some("/".to_string()))?;
    let mut done_anything = false;
    for parts in objects_parts {
        for item in parts.0.contents {
            done_anything = consider_task(config, &item)? || done_anything;
        }
    }
    Ok(done_anything)
}

fn config() -> Result<Config> {
    let cf_path = config_path();
    log::debug!("Reading config from {:?}", cf_path);
    let config = read_config(cf_path)?;
    log::debug!("{:?}", config);
    Ok(config)
}

fn bucket(config: &Config) -> Result<Bucket> {
    let credentials = Credentials::new(
        config.aws_credentials.as_ref().map(|cred| &*cred.access_key),
        config.aws_credentials.as_ref().map(|cred| &*cred.secret_key),
        None,
        None,
        None,
    )
    .unwrap();
    let bucket = Bucket::new(&config.s3_bucket, config.region.clone(), credentials)?;
    Ok(bucket)
}

#[derive(Debug, Clone, Parser)]
struct Args {
    /// Run in background
    #[arg(short, long)]
    daemonize: bool,

    /// Run once, stop when all tasks are done
    #[arg(short, long)]
    once: bool,
}

fn main_loop(args: &Args) -> Result<()> {
    let config = config()?;
    let lock = config.workdir.join("lock");
    log::info!("Locking {:?}", lock);
    std::fs::create_dir_all(&config.workdir)?;
    std::fs::create_dir_all(&config.workdir.join("taskdone"))?;
    let lock =
        std::fs::File::create(&lock).with_context(|| format!("Creating lock file {:?}", lock))?;
    loop {
        if let Ok(_) = lock.try_lock_exclusive() {
            log::info!("Acquired lock, fetching task list");
            match run(&config) {
                Ok(done_something) => {
                    if !done_something {
                        if args.once {
                            log::info!("No more work, stopping");
                            return Ok(());
                        } else {
                            let dur = Duration::from_secs(config.idle_sleep_secs as _);
                            log::info!("No task left, sleeping for {:?}", dur);
                            std::thread::sleep(dur);
                        }
                    }
                }
                Err(e) => {
                    if args.once {
                        return Err(e);
                    }
                    log::error!("{e:?}");
                }
            }
        } else {
            log::info!("Already locked, retry in 1 sec...");
            std::thread::sleep(Duration::from_secs(1));
        };
    }
}

extern "C" fn signal_handler(sig: libc::size_t) -> libc::size_t {
    let child_id = CHILD.load(std::sync::atomic::Ordering::SeqCst);
    if child_id != 0 {
        unsafe {
            libc::kill(-(child_id as i32), sig as _);
        }
    }
    eprintln!("** Caught signal, cleanup...");
    std::process::exit(1);
}

fn do_main() -> anyhow::Result<()> {
    let args = Args::parse();
    if args.daemonize {
        let _config = config()?;

        log::info!("Deamonizing");
        let stdout = File::create("tract-ci-minion.out")?;
        let stderr = File::create("tract-ci-minion.err")?;

        let daemonize = daemonize::Daemonize::new()
            .working_directory(std::env::current_dir()?)
            .pid_file("tract-ci-minion.pid")
            .stdout(stdout)
            .stderr(stderr);
        daemonize.start()?;
    }
    main_loop(&args)
}

fn main() {
    env_logger::Builder::new()
        .filter_level(log::LevelFilter::Info)
        .parse_env("TRACT_MINION_LOG")
        .init();
    unsafe {
        libc::signal(libc::SIGTERM, signal_handler as libc::sighandler_t);
        libc::signal(libc::SIGINT, signal_handler as libc::sighandler_t);
    }

    if let Err(e) = do_main() {
        log::error!("{e:?}");
        std::process::exit(1);
    }
}


================================================
FILE: cli/Cargo.toml
================================================
[package]
name = "tract-cli"
version = "0.23.0-pre"
authors = [
  "Romain Liautaud <romain.liautaud@snips.ai>",
  "Mathieu Poumeyrol <kali@zoy.org>",
]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = ["TensorFlow", "NeuralNetworks"]
categories = ["science"]
autobenches = false
edition = "2024"
include = ["Cargo.toml", "src/**/*.rs", "LICENSE*"]

[[bin]]
name = "tract"
path = "src/main.rs"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
atty.workspace = true
box_drawing.workspace = true
clap.workspace = true
erased-serde.workspace = true
criterion.workspace = true
colorous.workspace = true
env_logger.workspace = true
flate2.workspace = true
fs-err.workspace = true
icu_normalizer.workspace = true
icu_normalizer_data.workspace = true
icu_properties.workspace = true
icu_properties_data.workspace = true
idna_adapter.workspace = true
inventory.workspace = true
lazy_static.workspace = true
litemap.workspace = true
log.workspace = true
ndarray-npy.workspace = true
nu-ansi-term.workspace = true
num_cpus.workspace = true
py_literal.workspace = true
readings-probe.workspace = true
ron.workspace = true
regex.workspace = true
reqwest.workspace = true
rustls.workspace = true
webpki-roots.workspace = true
scan_fmt.workspace = true
serde.workspace = true
serde_json.workspace = true
tract-linalg = { workspace = true, features = ["hwbench"] }
tract-core.workspace = true
tract-hir.workspace = true
tract-nnef.workspace = true
tract-nnef-resources.workspace = true
tract-libcli.workspace = true
tract-gpu.workspace = true
tract-extra = { workspace = true, optional = true }
tract-pulse = { workspace = true, optional = true }
tract-pulse-opl = { workspace = true, optional = true }
tract-onnx = { workspace = true, optional = true }
tract-tensorflow = { workspace = true, optional = true }
tract-tflite = { workspace = true, optional = true }
tract-transformers = { workspace = true, optional = true }
zerofrom.workspace = true
float-ord.workspace = true

[target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies]
tract-metal.workspace = true

[target.'cfg(any(target_os = "linux", target_os = "windows"))'.dependencies]
cudarc.workspace = true
tract-cuda.workspace = true

[features]
default = [
  "onnx",
  "tf",
  "pulse",
  "pulse-opl",
  "tflite",
  "transformers",
  "extra",
]
apple-amx-ios = ["tract-linalg/apple-amx-ios"]
onnx = ["tract-onnx", "tract-libcli/hir", "tract-libcli/onnx"]
extra = ["tract-extra"]
pulse-opl = ["tract-pulse-opl"]
pulse = ["tract-pulse", "tract-pulse-opl"]
tf = ["tract-tensorflow", "tract-libcli/hir"]
tflite = ["tract-tflite"]
transformers = ["tract-transformers", "tract-libcli/transformers"]
conform = ["tract-tensorflow/conform"]
multithread-mm = ["tract-linalg/multithread-mm"]


================================================
FILE: cli/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: cli/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: cli/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: cli/src/bench.rs
================================================
use crate::Parameters;
use tract_hir::internal::*;
use tract_libcli::profile::{BenchLimits, run_one_step};
use tract_libcli::tensor::get_or_make_inputs;
use tract_libcli::terminal;

pub fn criterion(
    params: &Parameters,
    _matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
) -> TractResult<()> {
    let mut crit = criterion::Criterion::default();
    let mut group = crit.benchmark_group("net");

    let run_params = crate::tensor::run_params_from_subcommand(params, sub_matches)?;
    let inputs = get_or_make_inputs(&params.tract_model, &run_params)?;

    let runnable = params.req_runnable()?;
    let mut state = runnable.spawn()?;
    group.bench_function("run", move |b| b.iter(|| run_one_step(&runnable, &mut state, &inputs)));

    Ok(())
}

pub fn handle(
    params: &Parameters,
    sub_matches: &clap::ArgMatches,
    limits: &BenchLimits,
) -> TractResult<()> {
    let run_params = crate::tensor::run_params_from_subcommand(params, sub_matches)?;
    let inputs = get_or_make_inputs(&params.tract_model, &run_params)?;

    limits.warmup(&params.req_runnable()?, &inputs)?;

    let (iters, dur) = {
        #[cfg(any(target_os = "linux", target_os = "windows"))]
        let _profiler =
            sub_matches.get_flag("cuda-gpu-trace").then(cudarc::driver::safe::Profiler::new);
        limits.bench(&params.req_runnable()?, &inputs)?
    };
    let dur = dur.div_f64(iters as _);

    if params.machine_friendly {
        println!("real: {}", dur.as_secs_f64());
    } else {
        println!("Bench ran {} times, {}.", iters, terminal::dur_avg(dur));
    }

    if let Some(pp) = sub_matches.get_one::<String>("pp") {
        let pp = pp.parse::<usize>()?;
        let tokens = pp as f64 / dur.as_secs_f64();
        println!("PP{pp}: {tokens:.1} tokens/sec");
    }

    Ok(())
}


================================================
FILE: cli/src/compare.rs
================================================
#![allow(dead_code)]
use std::fmt::{Debug, Display};
#[allow(unused_imports)]
use std::fs;

use nu_ansi_term::Color::*;

use log::Level::Info;
use tract_core::internal::*;

use crate::dump::annotate_with_graph_def;
use crate::*;
use tract_libcli::display_params::DisplayParams;
use tract_libcli::tensor::{RunParams, get_or_make_inputs};

pub fn handle(
    params: &mut Parameters,
    _matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
    output_params: DisplayParams,
) -> TractResult<()> {
    let run_params = crate::tensor::run_params_from_subcommand(params, sub_matches)?;

    if sub_matches.get_flag("stream") {
        return handle_stream(params, &output_params, &run_params);
    }

    let cumulative = sub_matches.get_flag("cumulative");
    let resilent = sub_matches.get_flag("resilient");
    if sub_matches.get_one::<String>("stage").is_some() {
        // --with is by pipeline and put in params
        return handle_reference_stage(cumulative, params, &output_params, &run_params);
    } else if let Some(npz) = sub_matches.get_one::<String>("npz") {
        return handle_npz(cumulative, npz, params, &output_params, &run_params);
    } else if sub_matches.get_flag("twice") {
        return handle_twice(cumulative, params, &output_params, &run_params);
    }
    if let Some(pbdir) = sub_matches.get_one::<String>("pbdir") {
        return handle_pbdir(cumulative, pbdir, params, &output_params, &run_params);
    }
    if sub_matches.get_flag("tf") {
        return handle_tensorflow(cumulative, resilent, params, &output_params, &run_params);
    }
    bail!("No comparison target found")
}

#[cfg(not(feature = "conform"))]
pub fn handle_tensorflow(
    _cumulative: bool,
    _resilient: bool,
    _params: &mut Parameters,
    _output_params: &DisplayParams,
    _run_params: &RunParams,
) -> TractResult<()> {
    bail!("`tf` feature is required for this to work");
}

#[cfg(feature = "conform")]
pub fn handle_tensorflow(
    cumulative: bool,
    resilient: bool,
    params: &mut Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
) -> TractResult<()> {
    let tract = &params.tract_model;
    let mut tf = params.tf_model.take().unwrap();
    // First generate random values for the inputs.
    let input_facts = tract
        .input_outlets()
        .iter()
        .map(|&i| tract.outlet_typedfact(i))
        .collect::<TractResult<Vec<_>>>()?;
    let generated = crate::tensor::make_inputs(&*input_facts)?;

    // Execute the model on tensorflow first.
    info!("Running the model on tensorflow.");
    trace!("Inject inputs in tensorflow graph.");
    let pairs: Vec<_> = tract
        .input_outlets()
        .iter()
        .map(|s| &*tract.node_name(s.node))
        .zip(generated.iter().cloned())
        .collect();

    trace!("Execute the model on tensorflow.");
    let eval_order = tract.eval_order()?;

    let mut wanted_outputs: Vec<&str> = eval_order
        .iter()
        .filter(|&n| !tract.input_outlets().contains(&OutletId::new(*n, 0)))
        .map(|&n| tract.node_name(n))
        .collect();

    for o in tract.output_outlets() {
        let name = &*tract.node_name(o.node);
        if !wanted_outputs.contains(&name) {
            wanted_outputs.push(name);
        }
    }

    let mut all_values: HashMap<String, Vec<TractResult<TValue>>> = HashMap::new();
    if resilient {
        for name in wanted_outputs {
            all_values.insert(
                name.to_string(),
                vec![
                    tf.run(pairs.clone(), &name)
                        .map(|t| Arc::new(t[0].clone().into()))
                        .map_err(|e| e.into()),
                ],
            );
        }
    } else {
        tf.run_get_many(pairs, wanted_outputs)?.into_iter().for_each(|(k, v)| {
            all_values.insert(k.to_string(), vec![Ok(v[0].clone().into())]);
        });
    };

    for (ix, input) in tract.input_outlets().iter().enumerate() {
        let name = tract.node_name(input.node);
        all_values.insert(name.to_string(), vec![Ok(generated[ix].clone().into_arc_tensor())]);
    }
    dispatch_model_no_pulse!(params.tract_model, |m| compare(
        cumulative,
        m,
        &all_values,
        &params,
        &output_params,
        run_params,
        ("tract", "tf"),
    ))
}

pub fn handle_npz(
    cumulative: bool,
    npz: &str,
    params: &Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
) -> TractResult<()> {
    use tract_libcli::tensor::for_npz;
    let mut npz = ndarray_npy::NpzReader::new(fs_err::File::open(npz)?)?;
    let mut values: HashMap<String, Vec<TractResult<TValue>>> = HashMap::new();
    let multiturn = npz.names()?.iter().any(|n| n.starts_with("turn_0/"));
    for name in npz.names()? {
        if let Ok(value) = for_npz(&mut npz, &name) {
            if multiturn {
                let name = name
                    .split('/')
                    .nth(1)
                    .with_context(|| {
                        format!(
                            "npy filenames should be turn_XX/... in multiturn mode, got `{name}'"
                        )
                    })?
                    .trim_end_matches(".npy");
                values.entry(name.to_string()).or_default().push(Ok(value.into_tvalue()));
            } else {
                let name = name.trim_end_matches(".npy");
                values.insert(name.to_string(), vec![Ok(value.into())]);
            }
        }
    }
    dispatch_model_no_pulse!(&params.tract_model, |m| compare(
        cumulative,
        &m,
        &values,
        params,
        output_params,
        run_params,
        ("tract", "npz"),
    ))
}

#[cfg(not(feature = "onnx"))]
pub fn handle_pbdir(
    _cumulative: bool,
    _pbdir: &str,
    _params: &Parameters,
    _output_params: &DisplayParams,
    _run_params: &RunParams,
) -> TractResult<()> {
    bail!("`onnx` feature is required for this to work");
}

#[cfg(feature = "onnx")]
pub fn handle_pbdir(
    cumulative: bool,
    pbdir: &str,
    params: &Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
) -> TractResult<()> {
    use tract_onnx::data_resolver::FopenDataResolver;
    use tract_onnx::tensor::load_tensor;

    let mut values: HashMap<String, Vec<TractResult<TValue>>> = HashMap::new();
    for entry in fs::read_dir(pbdir)? {
        let entry = entry?;
        let file = fs::File::open(entry.path())?;
        let tensor = tract_onnx::tensor::proto_from_reader(file)?;
        let name = tensor.name.to_string();
        let value: Tensor = load_tensor(&FopenDataResolver, &tensor, None)?;
        values.insert(name, vec![Ok(value.into_tvalue())]);
    }
    dispatch_model_no_pulse!(&params.tract_model, |m| compare(
        cumulative,
        &m,
        &values,
        params,
        output_params,
        run_params,
        ("tract", "pbdir"),
    ))
}

pub fn handle_twice(
    cumulative: bool,
    params: &Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
) -> TractResult<()> {
    let reference_model = &params.tract_model;
    let reference_model: Arc<TypedModel> = Arc::downcast::<TypedModel>(reference_model.clone())
        .map_err(|_| anyhow!("Only works with a typed reference model"))?;
    handle_with_model(cumulative, params, output_params, &reference_model, run_params)
}

pub fn handle_reference_stage(
    cumulative: bool,
    params: &Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
) -> TractResult<()> {
    info!("Computing results for reference stage");
    let reference_model: &Arc<dyn Model> =
        params.reference_model.as_ref().context("Missing reference model")?;
    let reference_model = Arc::downcast::<TypedModel>(reference_model.clone())
        .map_err(|_| anyhow!("Only works with a typed reference model"))?;
    handle_with_model(cumulative, params, output_params, &reference_model, run_params)
}

pub fn handle_with_model(
    cumulative: bool,
    params: &Parameters,
    output_params: &DisplayParams,
    reference_model: &Arc<TypedModel>,
    run_params: &RunParams,
) -> TractResult<()> {
    let mut values: HashMap<String, Vec<TractResult<TValue>>> = HashMap::new();
    let plan = reference_model.clone().into_runnable()?;
    let mut state = plan.spawn()?;
    let inputs = get_or_make_inputs(&(reference_model.clone() as _), run_params)?;
    for input in inputs.sources {
        state.run_plan_with_eval(input, |session, state, node, input| -> TractResult<_> {
            let result = tract_core::plan::eval(session, state, node, input)?;
            if node.outputs.len() == 1 {
                values.entry(node.name.clone()).or_default().push(Ok(result[0].clone()));
            }
            for (output_slot, v) in result.iter().enumerate() {
                if let Some(tag) = reference_model.outlet_label((node.id, output_slot).into()) {
                    if tag != node.name {
                        values.entry(tag.to_string()).or_default().push(Ok(v.clone()));
                    }
                }
            }
            Ok(result)
        })?;
    }
    dispatch_model_no_pulse!(&params.tract_model, |m| compare(
        cumulative,
        &m,
        &values,
        params,
        output_params,
        run_params,
        ("tract", "reference"),
    ))
}

// handle_stream runs the pulsed model pulse-by-pulse, stitches per-node valid output regions
// (accounting for each node's delay), then delegates to compare() which runs the concretized
// reference model and handles all comparison, annotation, and rendering.
#[cfg(not(feature = "pulse"))]
pub fn handle_stream(
    _params: &Parameters,
    _output_params: &DisplayParams,
    _run_params: &RunParams,
) -> TractResult<()> {
    bail!("`pulse` feature is required for --stream to work");
}

#[cfg(feature = "pulse")]
pub fn handle_stream(
    params: &Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
) -> TractResult<()> {
    use tract_core::ndarray::{ArrayD, Axis};
    use tract_core::plan::SimpleState;
    use tract_pulse::internal::*;

    let reference: &TypedModel = params
        .reference_model
        .as_ref()
        .context("Reference (pre-pulse) model not available")?
        .downcast_ref::<TypedModel>()
        .context("Reference model is not a TypedModel")?;

    let model: Arc<TypedModel> =
        params.typed_model().context("Post-pulse TypedModel not available")?;

    let ref_input_fact = reference.input_fact(0)?;
    let model_input_fact = model.input_fact(0)?;

    let input_axis = model
        .properties
        .get("pulse.input_axes")
        .context("Expect pulse.input_axes property")?
        .cast_to::<i64>()?
        .try_as_plain()?
        .as_slice::<i64>()?[0] as usize;

    let stream_symbol = ref_input_fact.shape[input_axis]
        .symbols()
        .into_iter()
        .next()
        .context("Could not find streaming symbol in reference model input")?;

    let input_pulse = model_input_fact.shape[input_axis]
        .to_usize()
        .context("Pulse dimension should be concrete")?;

    // Recreate PulsedModel to get per-node delay/axis/pulse metadata
    let pulsed = PulsedModel::new(reference, stream_symbol.clone(), &input_pulse.to_dim())?;

    // Pre-build per-node pulse metadata (keyed by node name, slot 0 only)
    struct PulseInfo {
        delay: usize,
        output_axis: usize,
        output_pulse: usize,
        fixed_output_len: usize,
    }
    let mut max_delay: usize = 0;
    let mut pulse_meta: HashMap<String, PulseInfo> = HashMap::new();

    // We need stream_dim to compute fixed_output_len, but stream_dim depends on max_delay.
    // First pass: find max_delay.
    for pulsed_node in pulsed.nodes() {
        if let Ok(fact) = pulsed.outlet_fact(OutletId::new(pulsed_node.id, 0)) {
            if let Some(stream) = &fact.stream {
                max_delay = max_delay.max(stream.delay);
            }
        }
    }

    let stream_dim = max_delay + 3 * input_pulse + input_pulse / 2;
    let concrete_sym_values = SymbolValues::default().with(&stream_symbol, stream_dim as _);

    // Second pass: build full metadata with fixed_output_len
    for pulsed_node in pulsed.nodes() {
        if let Ok(fact) = pulsed.outlet_fact(OutletId::new(pulsed_node.id, 0)) {
            if let Some(stream) = &fact.stream {
                let output_pulse = fact.pulse().context("no pulse")?.to_usize()?;
                let fixed_output_len = stream.dim.eval_to_i64(&concrete_sym_values)? as usize;
                pulse_meta.insert(
                    pulsed_node.name.clone(),
                    PulseInfo {
                        delay: stream.delay,
                        output_axis: stream.axis,
                        output_pulse,
                        fixed_output_len,
                    },
                );
            }
        }
    }

    // Generate the fixed input (same seed as compare()'s get_or_make_inputs will use)
    let concrete_shape: Vec<usize> =
        ref_input_fact.shape.eval_to_usize(&concrete_sym_values)?.into_owned().into_vec();
    let concrete_input_fact = ref_input_fact.datum_type.fact(&*concrete_shape);
    let fixed_input = tract_libcli::tensor::tensor_for_fact(&concrete_input_fact, None, None)?;

    // Run the pulsed model pulse-by-pulse, collecting per-node valid output slices
    let plan = Arc::new(model.as_ref().clone().into_runnable()?);
    let mut state = SimpleState::new(&plan)?;
    let input_shape: TVec<usize> =
        model_input_fact.shape.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<_>>>()?;

    let mut node_slices: HashMap<String, Vec<Tensor>> = HashMap::new();
    let mut node_axes: HashMap<String, usize> = HashMap::new();
    let num_pulses = (stream_dim + max_delay) / input_pulse + 2;

    for i in 0..num_pulses {
        let mut pulsed_input = ArrayD::from_elem(&*input_shape, f32::NAN);
        let offset = i * input_pulse;
        if offset < stream_dim {
            let count = input_pulse.min(stream_dim - offset);
            pulsed_input.slice_axis_mut(Axis(input_axis), (0..count).into()).assign(
                &fixed_input
                    .to_plain_array_view::<f32>()?
                    .slice_axis(Axis(input_axis), (offset..offset + count).into()),
            );
        }
        if offset + input_pulse > stream_dim {
            state.turn_state.resolved_symbols.set(&stream_symbol, stream_dim as _);
        }

        state.run_plan_with_eval(
            tvec!(pulsed_input.into_tensor().into()),
            |session, op_state, node, input| -> TractResult<TVec<TValue>> {
                let result = tract_core::plan::eval(session, op_state, node, input)?;

                if let Some(info) = pulse_meta.get(&node.name) {
                    let output_offset = (i + 1) * info.output_pulse;
                    // Check if this pulse has valid output for this node
                    if output_offset > info.delay
                        && output_offset - info.output_pulse < info.delay + info.fixed_output_len
                    {
                        let (p_o, count) = if output_offset - info.output_pulse < info.delay {
                            // Beginning of signal: partial overlap
                            let count = output_offset - info.delay;
                            (info.output_pulse - count, count)
                        } else if output_offset > info.delay + info.fixed_output_len {
                            // End of signal: partial overlap
                            let count = info.delay + info.fixed_output_len
                                - (output_offset - info.output_pulse);
                            (0, count)
                        } else {
                            // Full pulse in valid region
                            (0, info.output_pulse)
                        };
                        let valid = result[0].slice(info.output_axis, p_o, p_o + count)?;
                        node_slices.entry(node.name.clone()).or_default().push(valid.into_tensor());
                        node_axes.insert(node.name.clone(), info.output_axis);
                    }
                } else if i == 0 && node.outputs.len() == 1 {
                    // Non-streaming node: capture on first pulse only
                    node_slices
                        .entry(node.name.clone())
                        .or_default()
                        .push(result[0].clone().into_tensor());
                }

                Ok(result)
            },
        )?;
    }

    // Stitch: concatenate valid slices per node into full-length tensors
    let mut all_values: HashMap<String, Vec<TractResult<TValue>>> = HashMap::new();
    for (name, slices) in &node_slices {
        if let Some(&axis) = node_axes.get(name) {
            let stitched = Tensor::stack_tensors(axis, slices)?;
            all_values.insert(name.clone(), vec![Ok(stitched.into_tvalue())]);
        } else {
            all_values.insert(name.clone(), vec![Ok(slices[0].clone().into_tvalue())]);
        }
    }

    // Concretize the reference model and delegate to compare()
    let concrete_ref = Arc::new(reference.clone().concretize_dims(&concrete_sym_values)?);
    compare(
        false,
        &concrete_ref,
        &all_values,
        params,
        output_params,
        run_params,
        ("reference", "pulsed"),
    )
}

pub fn compare<F, O>(
    cumulative: bool,
    tract: &Arc<Graph<F, O>>,
    all_values: &HashMap<String, Vec<TractResult<TValue>>>,
    params: &Parameters,
    output_params: &DisplayParams,
    run_params: &RunParams,
    labels: (&str, &str),
) -> TractResult<()>
where
    F: Fact + Clone + Hash,
    O: AsRef<dyn Op> + AsMut<dyn Op> + Display + Debug + Clone,
    Graph<F, O>: Model,
{
    info!("Obtained reference data, starting test run");
    // Execute the model step-by-step on tract.
    let plan = Arc::clone(tract).into_runnable()?;
    let mut state = plan.spawn()?;

    let mut annotations = Annotations::from_model(tract.as_ref() as &dyn Model)?;
    annotate_with_graph_def(&mut annotations, tract.as_ref(), &params.graph)?;

    let mut failing = std::collections::HashSet::new();
    let mut unchecked = std::collections::HashSet::new();
    let mut ok = 0;
    fn canonic(s: &str) -> String {
        s.replace(['.', '-'], "_")
    }
    let all_values: HashMap<String, &Vec<TractResult<TValue>>> =
        all_values.iter().map(|(k, v)| (canonic(k), v)).collect();
    let inputs = get_or_make_inputs(&(tract.clone() as _), run_params)?;
    for (turn, inputs) in inputs.sources.into_iter().enumerate() {
        state.run_plan_with_eval(
            inputs,
            |session_state, state, node, input| -> TractResult<TVec<TValue>> {
                let mut returning = tract_core::plan::eval(session_state, state, node, input)?;
                let tags = annotations.node_mut(node.id.into());
                let mut comparison_error = None;
                for slot in 0..returning.len() {
                    let get_value = |label: &str| {
                        all_values
                            .get(&canonic(label))
                            .and_then(|v| v.get(turn))
                            .and_then(|r| r.as_ref().ok())
                            .cloned()
                    };
                    let reference: Option<TValue> = tract
                        .outlet_label((node.id, slot).into())
                        .and_then(get_value)
                        .or_else(|| get_value(&node.name).filter(|_| slot == 0));

                    let Some(reference) = reference else {
                        tags.style = Some(Yellow.into());
                        unchecked.insert(node.id);
                        continue;
                    };

                    let mut reference = reference.into_tensor();
                    let clarified_fact = crate::utils::clarify_typed_fact(
                        node.outputs[slot].fact.to_typed_fact().unwrap(),
                    );
                    let needed_type = clarified_fact.datum_type;
                    let needed_shape =
                        clarified_fact.shape.eval_to_usize(&session_state.resolved_symbols)?;

                    if **needed_shape != *reference.shape() {
                        let Ok(reshaped) = reference.clone().into_shape(&needed_shape) else {
                            comparison_error = Some(format!("Incompatible shape on output {slot} reference is {reference:?}, model expects {:?}.", needed_shape));
                            tags.style = Some(Red.into());
                            continue;
                        };
                        reference = reshaped;
                    }

                    if needed_type != reference.datum_type() {
                        if needed_type.unquantized() == reference.datum_type().unquantized() {
                            unsafe { reference.set_datum_type(needed_type) };
                        } else if needed_type.is_float() && reference.datum_type().is_float() {
                            reference = reference.cast_to_dt(needed_type)?.into_owned();
                        } else {
                            comparison_error = Some(format!("Incompatible type on output {slot} reference is {reference:?}, model expects {:?}.", needed_type));
                            tags.style = Some(Red.into());
                            continue;
                        }
                    }

                    let computed = crate::utils::clarify_tvalue(&returning[slot])?;

                    if let Err(e) =
                        computed.close_enough(&reference, params.assertions.approximation)
                    {
                        comparison_error = Some("Mismatch value".to_string());
                        let mut msg = vec![Red
                            .bold()
                            .paint(format!(
                                "At turn {turn}, wrong value for output {slot}, {e}"
                            ))
                            .to_string()];
                        msg.push(format!("{:<8}: {:?}", labels.0, computed));
                        msg.push(format!("{:<8}: {:?}", labels.1, reference));
                        tags.sections.push(msg);
                    } else {
                        debug!(
                            "At turn {}, matching value for {:?}",
                            turn,
                            OutletId::new(node.id, slot)
                        )
                    }

                    if !cumulative && returning[slot].is_plain() {
                        returning[slot] = reference.into_tvalue();
                    }
                }
                if let Some(e) = comparison_error {
                    annotations.node_mut(node.id.into()).style = Some(Red.into());
                    annotations.node_mut(node.id.into()).labels.push(e);
                    failing.insert(node.id);
                } else {
                    ok += 1;
                }
                Ok(returning)
            },
        )?;
    }
    for node in tract.nodes() {
        let color: nu_ansi_term::Style = if failing.contains(&node.id) {
            Red.into()
        } else if unchecked.contains(&node.id) {
            White.into()
        } else {
            Green.bold()
        };
        annotations.node_mut(node.id.into()).style = Some(color);
    }

    if log_enabled!(Info) {
        tract_libcli::terminal::render(tract.as_ref(), &annotations, output_params)?;
    } else {
        for f in failing.iter().sorted() {
            tract_libcli::terminal::render_node(tract.as_ref(), *f, &annotations, output_params)?;
        }
    }

    if failing.len() > 0 {
        bail!("{} error(s).", failing.len())
    } else {
        println!("{}", Green.paint(format!("{ok} node(s) passed the comparison.")));
    };
    Ok(())
}


================================================
FILE: cli/src/cost.rs
================================================
use crate::TractResult;
use tract_hir::internal::*;

pub fn parse_costs(spec: &str) -> TractResult<Vec<(Cost, usize)>> {
    spec.split(',')
        .map(|spec| {
            let mut toks = spec.split('=');
            let name = toks.next().unwrap();
            let n = toks.next().unwrap().parse::<usize>().unwrap();
            let c = match name {
                "FMA(F32)" => Cost::FMA(f32::datum_type()),
                "Div(F32)" => Cost::Div(f32::datum_type()),
                "Buffer(F32)" => Cost::Buffer(f32::datum_type()),
                "Params(F32)" => Cost::Params(f32::datum_type()),
                _ => bail!("Unknown cost specifier {}", name),
            };
            Ok((c, n))
        })
        .collect()
}


================================================
FILE: cli/src/dump.rs
================================================
use crate::Parameters;
use crate::params::SomeGraphDef;
use crate::plan_options::plan_options_from_subcommand;
use crate::tensor::run_params_from_subcommand;
use fs_err as fs;
use nu_ansi_term::Color::*;
#[allow(unused_imports)]
use nu_ansi_term::Style;
use tract_core::ops::einsum::EinSum;
use tract_core::ops::matmul::optimized::{OptMatMul, ProtoFusedSpec};
use tract_core::ops::matmul::pack::DynPackedExoticFact;
use tract_core::ops::scan::OptScan;
#[allow(unused_imports)]
#[cfg(any(target_os = "linux", target_os = "windows"))]
use tract_cuda::utils::ensure_cuda_runtime_dependencies;
use tract_hir::internal::*;
use tract_itertools::Itertools;
use tract_libcli::annotations::*;
use tract_libcli::display_params::*;
use tract_libcli::model::Model;
use tract_libcli::profile::BenchLimits;
use tract_libcli::tensor::get_or_make_inputs;
use tract_libcli::terminal;
use tract_linalg::block_quant::PackedBlockQuantFact;
use tract_linalg::mmm::PackedExoticFact;

#[allow(unused_variables)]
pub fn annotate_with_graph_def(
    annotations: &mut Annotations,
    model: &dyn Model,
    graph_def: &SomeGraphDef,
) -> TractResult<()> {
    match graph_def {
        SomeGraphDef::NoGraphDef => Ok(()),
        SomeGraphDef::Nnef(_) => todo!(),
        #[cfg(feature = "onnx")]
        SomeGraphDef::Onnx(onnx, _) => annotate_with_onnx_model(annotations, model, onnx),
        #[cfg(feature = "tf")]
        SomeGraphDef::Tf(tf) => annotate_with_tf_graph_def(annotations, model, tf),
        #[cfg(feature = "tflite")]
        SomeGraphDef::Tflite(tflite) => annotate_with_tflite_graph_def(annotations, model, tflite),
    }
}

#[cfg(feature = "tf")]
fn annotate_with_tf_graph_def(
    annotations: &mut Annotations,
    model: &dyn Model,
    graph_def: &tract_tensorflow::tfpb::tensorflow::GraphDef,
) -> TractResult<()> {
    let bold = Style::new().bold();
    for gnode in graph_def.node.iter() {
        if let Ok(node_id) = model.node_id_by_name(&gnode.name) {
            let mut v = vec![];
            for a in gnode.attr.iter() {
                let value =
                    if let Some(tract_tensorflow::tfpb::tensorflow::attr_value::Value::Tensor(r)) =
                        &a.1.value
                    {
                        format!("{r:?}")
                    } else {
                        format!("{:?}", a.1)
                    };
                v.push(format!("Attr {}: {:.300}", bold.paint(a.0), value));
            }
            annotations.node_mut(node_id.into()).sections.push(v);
        }
    }
    Ok(())
}

#[cfg(feature = "tflite")]
fn annotate_with_tflite_graph_def(
    _annotations: &mut Annotations,
    _model: &dyn Model,
    _graph_def: &tract_tflite::internal::TfliteProtoModel,
) -> TractResult<()> {
    Ok(())
}

#[cfg(feature = "onnx")]
fn annotate_with_onnx_model(
    annotations: &mut Annotations,
    model: &dyn Model,
    model_proto: &tract_onnx::pb::ModelProto,
) -> TractResult<()> {
    use tract_onnx::data_resolver::FopenDataResolver;
    use tract_onnx::tensor::load_tensor;

    let bold = Style::new().bold();
    for gnode in model_proto.graph.as_ref().unwrap().node.iter() {
        if let Some(id) = model
            .node_id_by_name(&gnode.name)
            .ok()
            .or_else(|| gnode.output.first().and_then(|n| model.node_id_by_name(n).ok()))
        {
            let mut v = vec![];
            for a in gnode.attribute.iter() {
                let value = if let Some(t) = &a.t {
                    format!("{:?}", load_tensor(&FopenDataResolver, t, None)?)
                } else {
                    format!("{a:?}")
                };
                v.push(format!("Attr {}: {:.240}", bold.paint(&a.name), value));
            }
            annotations.node_mut(id.into()).sections.push(v);
        }
    }
    Ok(())
}

pub fn handle(
    params: &Parameters,
    options: &DisplayParams,
    matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
    bench_limits: &BenchLimits,
    _inner: Vec<String>,
) -> TractResult<()> {
    let mut annotations = Annotations::from_model(&*params.tract_model)?;
    annotate_with_graph_def(&mut annotations, &*params.tract_model, &params.graph)?;
    let run_params = run_params_from_subcommand(params, sub_matches)?;
    if options.cost || options.summary || options.audit_json {
        tract_libcli::profile::extract_costs(
            &mut annotations,
            &*params.tract_model,
            &run_params.symbols,
        )?;
    }
    if options.summary {
        terminal::render_summary(&*params.tract_model, &annotations)?;
        return Ok(());
    }
    if options.profile {
        let run_params = run_params_from_subcommand(params, sub_matches)?;
        let inputs = get_or_make_inputs(&params.tract_model, &run_params)?;

        if matches.get_flag("metal") || matches.get_flag("cuda") {
            #[cfg(not(any(
                target_os = "macos",
                target_os = "ios",
                target_os = "linux",
                target_os = "windows"
            )))]
            bail!("GPU profiling is only supported on window, linux, and osx, ios");

            #[cfg(not(any(target_os = "macos", target_os = "ios")))]
            {
                #[cfg(any(target_os = "linux", target_os = "windows"))]
                ensure_cuda_runtime_dependencies("GPU profiling called on non-GPU device")?;
            }

            #[cfg(any(target_os = "linux", target_os = "windows"))]
            let is_cuda = matches.get_flag("cuda");
            #[cfg(not(any(target_os = "linux", target_os = "windows")))]
            let is_cuda = false;

            if is_cuda {
                #[cfg(any(target_os = "linux", target_os = "windows"))]
                tract_cuda::with_cuda_stream(|s| {
                    s.enable_profiling();
                    Ok(())
                })?;
            }

            let before_node: Box<dyn Fn(usize)> = if is_cuda {
                #[cfg(any(target_os = "linux", target_os = "windows"))]
                {
                    Box::new(|node_id| {
                        tract_cuda::with_cuda_stream(|s| {
                            s.set_current_node(node_id);
                            Ok(())
                        })
                        .ok();
                    })
                }
                #[cfg(not(any(target_os = "linux", target_os = "windows")))]
                Box::new(|_| {})
            } else {
                Box::new(|_| {})
            };

            let after_iteration: Box<
                dyn Fn(
                    &mut tract_libcli::annotations::Annotations,
                    &[(usize, String)],
                ) -> TractResult<()>,
            > = if is_cuda {
                #[cfg(any(target_os = "linux", target_os = "windows"))]
                {
                    Box::new(|dg, prefix| {
                        tract_cuda::with_cuda_stream(|s| {
                            s.synchronize()?;
                            if let Some(entries) = s.drain_profile() {
                                for entry in entries {
                                    let ms = entry.start.elapsed_ms(&entry.end)?;
                                    let dur =
                                        std::time::Duration::from_secs_f64(ms as f64 / 1000.0);
                                    let node_id = tract_libcli::annotations::NodeQId(
                                        prefix.into(),
                                        entry.node_id,
                                    );
                                    *dg.node_mut(node_id)
                                        .accelerator_profile
                                        .get_or_insert(std::time::Duration::default()) += dur;
                                }
                            }
                            Ok(())
                        })
                    })
                }
                #[cfg(not(any(target_os = "linux", target_os = "windows")))]
                Box::new(|_, _| Ok(()))
            } else {
                Box::new(|_, _| Ok(()))
            };

            tract_libcli::profile::profile_gpu(
                &params.req_runnable()?,
                bench_limits,
                sub_matches,
                &mut annotations,
                &inputs,
                &*before_node,
                &*after_iteration,
            )?;
        } else {
            tract_libcli::profile::profile(
                &params.req_runnable()?,
                bench_limits,
                &mut annotations,
                &inputs,
                None,
                options.folded,
            )?;
        }
    }

    if sub_matches.get_flag("axes") || sub_matches.contains_id("axes-names") {
        let mut hints = HashMap::default();
        if let Some(names) = sub_matches.get_many::<String>("axes-names") {
            for param in names {
                let param = param.as_str();
                let (node, names) = if let Some((node, axes)) = param.split_once('=') {
                    (params.tract_model.node_id_by_name(node)?, axes)
                } else {
                    (params.tract_model.input_outlets()[0].node, param)
                };
                let names: TVec<String> = names.split(',').map(|s| s.to_string()).collect();
                hints.insert(OutletId::new(node, 0), names);
            }
        }
        annotations.track_axes(&*params.tract_model, &hints)?;
    }

    if sub_matches.contains_id("memory-arena") {
        #[cfg(not(any(target_os = "macos", target_os = "ios")))]
        {
            ensure_cuda_runtime_dependencies(
                "Memory arena is only enabled for MacOS / iOS devices or CUDA devices",
            )?;
        }
        crate::memory_arena::dump_metrics(
            &params.req_typed_model(),
            &plan_options_from_subcommand(sub_matches)?,
            std::path::Path::new(
                sub_matches
                    .get_one::<String>("memory-arena")
                    .ok_or(anyhow!("Path to JSON file required"))?,
            ),
        )?;
    }

    if sub_matches.get_flag("tmp_mem_usage") {
        let plan_options = plan_options_from_subcommand(sub_matches)?;
        annotations.track_tmp_memory_usage(
            &*params.tract_model,
            |n| !(n.op_is::<tract_core::ops::konst::Const>()),
            plan_options.skip_order_opt_ram,
        )?;
    }

    if let Some(asserts) = &params.assertions.assert_output_facts {
        let outputs_facts: Vec<InferenceFact> = params
            .tract_model
            .output_outlets()
            .iter()
            .map(|o| Ok(InferenceFact::from(params.tract_model.outlet_typedfact(*o)?)))
            .collect::<TractResult<Vec<InferenceFact>>>()?;
        crate::utils::check_inferred(&outputs_facts, asserts)?;
    }
    if let Some(asserts) = &params.assertions.assert_op_count {
        for (name, expected) in asserts {
            let count = crate::utils::count_op(&*params.tract_model, name)?;
            if count != *expected {
                bail!("Wrong number of {} operators: expected {}, got {}", name, expected, count);
            }
        }
    }
    if let Some(patterns) = &params.assertions.assert_op_only {
        crate::utils::check_op_only(&*params.tract_model, patterns)?;
    }

    let compress_submodels = sub_matches.get_flag("compress-submodels");
    let deterministic = sub_matches.get_flag("nnef-deterministic");
    if let Some(path) = sub_matches.get_one::<String>("nnef") {
        let nnef = super::nnef(matches);
        if let Some(typed) = params.typed_model().to_owned() {
            let mut typed = Arc::unwrap_or_clone(typed);
            rename_outputs(&mut typed, sub_matches)?;
            let file = fs::File::create(path)?;
            let encoder = flate2::write::GzEncoder::new(file, flate2::Compression::default());
            nnef.write_to_tar_with_config(&typed, encoder, compress_submodels, deterministic)
                .context("Writing model to tgz")?;
        } else {
            bail!("Only typed model can be dumped")
        }
    }

    if let Some(path) = sub_matches.get_one::<String>("nnef-tar") {
        let nnef = super::nnef(matches);
        if let Some(typed) = params.typed_model().to_owned() {
            let mut typed = Arc::unwrap_or_clone(typed);
            rename_outputs(&mut typed, sub_matches)?;
            let file = fs::File::create(path)?;
            nnef.write_to_tar_with_config(&typed, file, compress_submodels, deterministic)
                .context("Writing model to tar")?;
        } else {
            bail!("Only typed model can be dumped")
        }
    }

    if let Some(path) = sub_matches.get_one::<String>("nnef-dir") {
        let nnef = super::nnef(matches);
        if let Some(typed) = params.typed_model().to_owned() {
            let mut typed = Arc::unwrap_or_clone(typed);
            rename_outputs(&mut typed, sub_matches)?;
            if let Some(renamed) = sub_matches.get_many::<String>("nnef-override-output-name") {
                for (ix, name) in renamed.enumerate() {
                    let output = typed.wire_node(
                        name.as_str(),
                        tract_core::ops::identity::Identity,
                        &[typed.output_outlets()?[ix]],
                    )?;
                    typed.outputs[ix] = output[0];
                }
            }
            nnef.write_to_dir(&typed, path)?
        } else {
            bail!("Only typed model can be dumped")
        }
    }

    if let Some(path) = sub_matches.get_one::<String>("nnef-graph") {
        let nnef = super::nnef(matches);
        if let Some(typed) = params.typed_model().to_owned() {
            let mut typed = Arc::unwrap_or_clone(typed);
            rename_outputs(&mut typed, sub_matches)?;
            let proto = tract_nnef::ser::to_proto_model(&nnef, &typed)?;
            if path == "-" {
                tract_nnef::ast::dump::Dumper::new(&nnef, &mut std::io::stdout())
                    .document(&proto.doc)?;
            } else {
                let mut file = fs::File::create(path)?;
                tract_nnef::ast::dump::Dumper::new(&nnef, &mut file).document(&proto.doc)?;
            }
        } else {
            bail!("Only typed model can be dumped")
        }
    }

    #[cfg(feature = "tflite")]
    if let Some(path) = sub_matches.get_one::<String>("tflite") {
        let tflite = tract_tflite::tflite();
        if let Some(typed) = params.typed_model().to_owned() {
            let mut typed = Arc::unwrap_or_clone(typed);
            rename_outputs(&mut typed, sub_matches)?;
            let file = fs::File::create(path)?;
            tflite.write(&typed, file).context("Writing model to tflite")?;
        } else {
            bail!("Only typed model can be dumped")
        }
    }

    #[cfg(not(feature = "tflite"))]
    if sub_matches.get_one::<String>("tflite").is_some() {
        bail!("This is a tract build without support for tflite.")
    }

    if options.cost {
        let total = annotations.tags.values().sum::<NodeTags>();
        let assert = sub_matches
            .get_one::<String>("assert-cost")
            .map(|s| crate::cost::parse_costs(s))
            .transpose()?;
        if let Some(assert) = assert {
            let assert: HashMap<Cost, TDim> =
                assert.iter().map(|(c, n)| (c.clone(), n.to_dim())).collect();
            let total = total.cost.iter().cloned().collect::<HashMap<_, _>>();
            if assert != total {
                bail!("Cost assertion not met: expected {:?} got {:?}", assert, total);
            }
        }
    }

    if params
        .tract_model
        .properties()
        .get("tract_stage")
        .and_then(|t| t.try_as_plain().ok()?.to_scalar::<String>().ok().cloned())
        .is_some_and(|s| s == "optimized")
    {
        for n in 0..params.tract_model.nodes_len() {
            if params.tract_model.node_op_name(n) == "EinSum" {
                let tags = annotations.tags.entry(NodeQId(tvec!(), n)).or_default();
                tags.style = Some(Red.bold());
                tags.labels.push("⚠️⚠️⚠️ EinSum in optimised model".to_string());
            }
        }
    }

    if options.audit_json {
        tract_libcli::export::audit_json(&*params.tract_model, &annotations, std::io::stdout())?;
    } else if options.json {
        let export = tract_libcli::export::GraphPerfInfo::from(&*params.tract_model, &annotations);
        serde_json::to_writer(std::io::stdout(), &export)?;
    } else {
        terminal::render(&*params.tract_model, &annotations, options)?;
        terminal::render_summaries(&*params.tract_model, &annotations, options)?;
    }

    if options.mm {
        mm_report(params, options, matches, sub_matches)?;
    }

    Ok(())
}

fn rename_outputs(typed: &mut TypedModel, sub_matches: &clap::ArgMatches) -> TractResult<()> {
    if let Some(renamed) = sub_matches.get_many::<String>("nnef-override-output-name") {
        for (ix, name) in renamed.enumerate() {
            let output = typed.wire_node(
                name.as_str(),
                tract_core::ops::identity::Identity,
                &[typed.output_outlets()?[ix]],
            )?;
            typed.outputs[ix] = output[0];
        }
    }
    Ok(())
}
pub fn mm_report(
    params: &Parameters,
    _options: &DisplayParams,
    _matches: &clap::ArgMatches,
    _sub_matches: &clap::ArgMatches,
) -> TractResult<()> {
    println!("{}", White.bold().paint("# Matrix multiplication"));
    let Some(model) = params.tract_model.downcast_ref::<TypedModel>() else {
        println!("Only available on TypedModel");
        return Ok(());
    };
    let count = model.nodes.iter().filter(|n| n.op_is::<OptMatMul>()).count();
    println!("* {count} matrix multiplications");

    type EinsumConf<'m> = (String, String, String);
    type MatMulConf = (TDim, TDim, TDim, TDim, bool, String, String, String, String);

    let mut einsums = HashMap::<EinsumConf, TDim>::new();
    let mut opt_mat_muls = HashMap::<MatMulConf, TDim>::new();

    fn scan_model<'m>(
        model: &'m TypedModel,
        einsums: &mut HashMap<EinsumConf<'m>, TDim>,
        opt_mat_muls: &mut HashMap<MatMulConf, TDim>,
        mult: &TDim,
    ) -> TractResult<()> {
        for (n, op) in model.nodes.iter().filter_map(|n| n.op_as::<EinSum>().map(|m| (n, m))) {
            let it = (
                op.axes.to_string(),
                model
                    .node_input_facts(n.id)?
                    .iter()
                    .map(|f| format!("{:?}", f.without_value()))
                    .join(" • "),
                model
                    .node_output_facts(n.id)?
                    .iter()
                    .map(|f| format!("{:?}", f.without_value()))
                    .join(" • "),
            );
            *einsums.entry(it).or_default() += mult;
        }
        for (node, op) in model.nodes.iter().filter_map(|n| n.op_as::<OptMatMul>().map(|m| (n, m)))
        {
            let (m, k, n) = (op.m().clone(), op.guess_k().unwrap_or(TDim::Val(0)), op.n().clone());
            let facts = model.node_input_facts(node.id)?;
            let packings = op
                .micro_ops
                .iter()
                .find_map(|mo| {
                    if let ProtoFusedSpec::AddMatMul { packings, .. } = mo {
                        Some(packings.clone())
                    } else {
                        None
                    }
                })
                .unwrap();
            let panel_extractor = packings
                .iter()
                .map(|(_, repack)| {
                    repack.as_ref().map(|rp| rp.to_string()).unwrap_or("Ø".to_string())
                })
                .join(", ");
            let (pack_a, pack_b) = facts
                .iter()
                .take(2)
                .map(|fact| {
                    fact.exotic_fact
                        .as_ref()
                        .and_then(|of| {
                            of.downcast_ref::<DynPackedExoticFact>()
                                .map(|of| of.packers.iter().map(|m| format!("{m}")).join(", "))
                                .or_else(|| {
                                    of.downcast_ref::<PackedExoticFact>()
                                        .map(|pof| format!("{}", pof.format))
                                })
                                .or_else(|| {
                                    of.downcast_ref::<PackedBlockQuantFact>()
                                        .map(|pof| format!("{}", pof.format))
                                })
                        })
                        .unwrap_or_else(|| format!("{fact:?}"))
                    //                        .unwrap_or_default()
                })
                .collect_tuple()
                .unwrap();
            let iters = op
                .c_fact
                .shape
                .iter()
                .enumerate()
                .filter(|(ix, _dim)| Some(*ix) != op.c_m_axis && Some(*ix) != op.c_n_axis)
                .map(|(_ix, d)| d)
                .product::<TDim>();
            let mmm = op.mmm.iter().map(|m| format!("{m:?}")).join(", ");
            *opt_mat_muls
                .entry((
                    m,
                    k,
                    n,
                    iters * mult,
                    facts[0].konst.is_some(),
                    mmm,
                    pack_a,
                    panel_extractor,
                    pack_b,
                ))
                .or_default() += mult;
        }
        for (node, op) in model.nodes.iter().filter_map(|n| n.op_as::<OptScan>().map(|o| (n, o))) {
            let inputs = model.node_input_facts(node.id)?;
            let iters = &op.nested_model_multipliers(&inputs)[0].1;
            scan_model(op.plan.model(), einsums, opt_mat_muls, &(mult.clone() * iters))?;
        }
        Ok(())
    }
    scan_model(model, &mut einsums, &mut opt_mat_muls, &1.to_dim())?;

    let mmm_width = opt_mat_muls.keys().map(|cf| cf.5.len()).max().unwrap_or(0);
    let pa_width = opt_mat_muls.keys().map(|cf| cf.6.len()).max().unwrap_or(0);
    let panel_width = opt_mat_muls.keys().map(|cf| cf.7.len()).max().unwrap_or(0);
    let pb_width = opt_mat_muls.keys().map(|cf| cf.8.len()).max().unwrap_or(0);
    println!(
        "| count |     |     m |     k |     n | iters | {:^mmm_width$} | {:^pa_width$} | {:^panel_width$} | {:^pb_width$} |",
        "kernels", "packing a", "panel", "packing b",
    );
    for (config, count) in opt_mat_muls.iter().sorted_by_key(|(conf, count)| {
        (-(count.to_isize().unwrap_or_default()), -(conf.0.as_i64().unwrap_or(0)))
    }) {
        let (m, k, n, iters, w, mmm, pa, panel, pb) = config;
        println!(
            "| {:>5} | {} | {:>5} | {:>5} | {:>5} | {:>5} | {mmm:^mmm_width$} | {pa:^pa_width$} | {panel:^panel_width$} | {pb:^pb_width$} |",
            count.to_string(),
            if *w { "   " } else { "X•Y" },
            m.to_string(),
            k.to_string(),
            n.to_string(),
            iters.to_string()
        );
    }
    if einsums.len() > 0 {
        println!("{}", Red.bold().paint("# 💩💩💩 Unoptimized Einsums 💩💩💩"));
        for ((axes, ifacts, ofacts), count) in
            einsums.iter().sorted_by_key(|(_conf, count)| -count.as_i64().unwrap_or_default())
        {
            println!(
                "{}",
                Red.bold().paint(format!(
                    "| {:>5} | {axes:^20} | {ifacts} => {ofacts}",
                    count.to_string(),
                ))
            )
        }
    }
    Ok(())
}


================================================
FILE: cli/src/hwbench.rs
================================================
use nu_ansi_term::Color::*;
use tract_core::prelude::*;
use tract_core::tract_data::itertools::Itertools;
use tract_libcli::terminal::si_prefix;
use tract_linalg::hwbench::bandwidth::{l1_bandwidth_seq, main_memory_bandwith_seq};
use tract_linalg::hwbench::runner::run_bench;
use tract_linalg::mmm::{AsInputValue, FusedSpec};

pub(crate) fn handle() -> TractResult<()> {
    println!("# Cores");
    println!("cpus: {}", num_cpus::get());
    println!("physical cpus: {}", num_cpus::get_physical());
    println!();

    if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
        println!("# Excerpt from /proc/cpuinfo");
        for line in cpuinfo.lines() {
            if line.is_empty() {
                break;
            }
            if ["model name", "cache size", "bogomips", "BogoMIPS", "Features", "CPU", "flags"]
                .iter()
                .any(|needle| line.starts_with(needle))
            {
                println!(" * {line}");
            }
        }
        println!();

        if let Some(flags) = cpuinfo
            .lines()
            .find(|line| line.starts_with("flags") || line.starts_with("Features"))
            .and_then(|l| l.split_once(":"))
            .map(|pair| pair.1)
        {
            print!("# Relevant CPU flags/features: ");
            for flag in flags.split_whitespace() {
                if ["fpu", "sse", "avx", "f16", "fma", "fp", "asimd", "neon", "vfp"]
                    .iter()
                    .any(|needle| flag.starts_with(needle))
                {
                    print!("{flag} ")
                };
            }
            println!("\n");
        }
    }

    #[cfg(target_arch = "aarch64")]
    {
        println!(
            "# Aarch64 subfamily detected by tract-linalg: {:?}\n",
            tract_linalg::arm64::Kind::choose()
        );
    }

    println!("# Cache");
    let mut threads = (1..=num_cpus::get()).collect_vec();
    for extra in [1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0] {
        let value = (num_cpus::get() * (extra * 4.) as usize) / 4;
        if !threads.contains(&value) {
            threads.push(value);
        }
    }
    for &t in &threads {
        let m = l1_bandwidth_seq(t);
        println!(
            "{t:2}-thread L1 : {} — {}",
            si_prefix(m, "B/s"),
            si_prefix(m / t as f64, "B/s/thread"),
        );
    }

    println!("\n# Main memory");
    for &t in &threads {
        let measured = main_memory_bandwith_seq(t);
        println!(
            "{t:2}-thread L∞ : {} — {}",
            si_prefix(measured, "B/s"),
            si_prefix(measured / t as f64, "B/s/thread")
        );
    }
    println!();

    let big = if cfg!(target_arch = "arm") { 128 } else { 512 };
    mmm(f32::datum_type(), big, big, big)?;
    mmm(f32::datum_type(), big, big, 1)?;
    mmm(f16::datum_type(), big, big, big)?;
    mmm(f16::datum_type(), big, big, 1)?;

    Ok(())
}

fn mmm(dt: DatumType, m: usize, k: usize, n: usize) -> TractResult<()> {
    let a = Tensor::zero_dt(dt, &[m, k])?;
    let b = Tensor::zero_dt(dt, &[k, n])?;
    let mut c = Tensor::zero_dt(dt, &[m, n])?;
    let selection = tract_linalg::ops().mmm(dt, Some(m), Some(k), Some(n));
    println!("# Matmul {m}x{k}x{n}x{dt:?}\n");
    let mmms = tract_linalg::ops().mmm_impls();
    unsafe {
        mmms.iter()
            .flat_map(|mmm| {
                mmm.packings().iter().enumerate().map(move |(pix, (pa, pb))| (mmm, pix, pa, pb))
            })
            .filter(|(_mmm, _pix, pa, pb)| {
                pa.precursor().as_dt() == Some(dt) && pb.precursor().as_dt() == Some(dt)
            })
            .map(|(mmm, pix, pa, pb)| {
                if atty::is(atty::Stream::Stderr) {
                    eprint!("Benching {} ({pix})", mmm.name());
                }
                let a = pa.prepare_one(&a, 1, 0).unwrap();
                let b = pb.prepare_one(&b, 0, 1).unwrap();
                let pc = mmm.c_view(Some(0), Some(1)).wrap(&c.view_mut());
                let time = run_bench(|loops| {
                    let mut scratch = mmm.allocate_scratch_space();
                    for _ in 0..loops {
                        mmm.run_with_scratch_space(
                            m,
                            n,
                            scratch.as_mut(),
                            &[
                                FusedSpec::AddMatMul {
                                    a: AsInputValue::Borrowed(&*a),
                                    b: AsInputValue::Borrowed(&*b),
                                    packing: 0,
                                },
                                FusedSpec::Store(pc),
                            ],
                        )
                        .unwrap();
                    }
                });
                if atty::is(atty::Stream::Stderr) {
                    eprint!("\x1B[2K\r"); // clear current line + CR
                }
                let flops = (m * k * n) as f64 / time;
                (mmm, pix, pa, pb, flops)
            })
            .sorted_by_key(|(_mmm, _pix, _pa, _pb, flops)| -(*flops as i64))
            .for_each(|(mmm, pix, pa, pb, flops)| {
                print!("{:>35} {:30}", format!("{mmm:?} ({pix})"), format!("{pa} • {pb}"));
                let color = if flops.log10() > 9.0 {
                    Green
                } else if flops.log10() > 6.0 {
                    Yellow
                } else {
                    LightRed
                };
                println!(
                    " {} {}",
                    color.paint(si_prefix(flops, "flop/s")),
                    if pix == 0 && Some(mmm) == selection.as_ref() { "<--" } else { "" }
                );
            });
    }
    println!();

    Ok(())
}


================================================
FILE: cli/src/llm.rs
================================================
use crate::Parameters;
use float_ord::FloatOrd;
use readings_probe::Probe;
use std::time::{Duration, Instant};
use tract_core::num_traits::Zero;
use tract_core::tract_data::itertools::Itertools;
use tract_hir::internal::*;
use tract_libcli::profile::BenchLimits;
use tract_libcli::tensor::get_or_make_inputs;
#[cfg(feature = "transformers")]
use tract_transformers::figure_out_causal_llm_b_s_p;

pub fn handle(
    params: &Parameters,
    matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
    limits: &BenchLimits,
    probe: Option<&Probe>,
) -> TractResult<()> {
    bench_pp(params, matches, sub_matches, limits, 512, probe)?;
    bench_tg(params, matches, sub_matches, limits, 128, probe)?;
    Ok(())
}

pub fn bench_pp(
    params: &Parameters,
    _matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
    limits: &BenchLimits,
    pp: usize,
    _probe: Option<&Probe>,
) -> TractResult<()> {
    let mut run_params = crate::tensor::run_params_from_subcommand(params, sub_matches)?;
    run_params.allow_random_input = true;
    let model = params.req_typed_model();

    let (b, s, p) = tract_transformers::figure_out_causal_llm_b_s_p(&model)
        .context("Could not find out LLM symbolic parameters")?;
    if let Some(b) = b {
        run_params.symbols.set(&b, 1);
    }

    ensure!(s.is_some() && p.is_some(), "Could not find LLM symbols in model");
    // Warmup
    run_params.symbols.set(&p.unwrap(), 0);
    run_params.symbols.set(&s.unwrap(), pp as i64);
    let inputs = get_or_make_inputs(&params.tract_model, &run_params)?;
    limits.warmup(&params.req_runnable()?, &inputs)?;

    let inputs = get_or_make_inputs(&params.tract_model, &run_params)?;

    let (iters, dur) = limits.bench(&params.req_runnable()?, &inputs)?;
    let tokens = pp as f64 / dur.as_secs_f64() * iters as f64;
    println!("PP{pp}: {tokens:.1} tokens/sec");
    Ok(())
}

pub fn bench_tg(
    params: &Parameters,
    _matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
    limits: &BenchLimits,
    tg: usize,
    probe: Option<&Probe>,
) -> TractResult<()> {
    let mut run_params = crate::tensor::run_params_from_subcommand(params, sub_matches)?;
    run_params.allow_random_input = true;
    let model = params.req_typed_model();

    let (b, s, p) = figure_out_causal_llm_b_s_p(&model)
        .context("Could not find out LLM symbolic parameters")?;
    if let Some(b) = b {
        run_params.symbols.set(&b, 1);
    }

    ensure!(s.is_some() && p.is_some(), "Could not find LLM symbols in model");
    run_params.symbols.set(&s.unwrap(), 1);

    let p = p.unwrap();
    // Warmup
    if !limits.warmup_loops.is_zero() || !limits.warmup_time.is_zero() {
        let mut iters = 0;
        let max_loops =
            if limits.warmup_loops.is_zero() { usize::MAX } else { limits.warmup_loops };
        let max_time =
            if limits.warmup_time.is_zero() { Duration::MAX } else { limits.warmup_time };
        let start_warmup = Instant::now();
        info!("TG warming before profiling...");
        while iters < max_loops && start_warmup.elapsed() < max_time {
            let mut state = params.req_runnable()?.spawn()?;
            for t in 0..tg {
                run_params.symbols.set(&p, t as i64);
                let mut inputs = get_or_make_inputs(&params.tract_model, &run_params)?;
                state.run(inputs.sources.remove(0))?;
            }
            iters += 1;
        }
        info!("Done warming up.");
    }

    // Bench
    let mut tot_dur = Duration::default();
    let mut state = params.req_runnable()?.spawn()?;
    for t in 0..tg {
        if let Some(p) = probe {
            p.log_event(&format!("Starting token {t}"))?;
        }

        run_params.symbols.set(&p, t as i64);
        let mut inputs = get_or_make_inputs(&params.tract_model, &run_params)?;

        let start = Instant::now();
        state.run(inputs.sources.remove(0))?;
        tot_dur += start.elapsed();
    }
    let tokens = tg as f64 / tot_dur.as_secs_f64();
    println!("TG{tg}: {tokens:.1} tokens/sec");
    Ok(())
}

pub fn top_logits_rbo(test: &Tensor, reference: &Tensor, p: f64, depth: usize) -> TractResult<f64> {
    use std::collections::HashSet;

    let rankings: Vec<Vec<usize>> = [test, reference]
        .into_iter()
        .map(|t| {
            t.cast_to::<f32>()
                .unwrap()
                .try_as_plain()
                .unwrap()
                .as_slice::<f32>()
                .unwrap()
                .iter()
                .copied()
                .enumerate()
                .sorted_by_key(|(_, f)| FloatOrd(-*f))
                .map(|p| p.0)
                .collect_vec()
        })
        .collect();

    let a = &rankings[0];
    let b = &rankings[1];
    let k = depth.min(a.len()).min(b.len());

    let mut set_a: HashSet<usize> = HashSet::new();
    let mut set_b: HashSet<usize> = HashSet::new();
    let mut rbo = 0.0;

    for d in 1..=k {
        set_a.insert(a[d - 1]);
        set_b.insert(b[d - 1]);
        let overlap = set_a.intersection(&set_b).count() as f64 / d as f64;
        rbo += p.powi((d as i32) - 1) * overlap;
    }

    let top1_match = a[0] == b[0];
    let top5_overlap = {
        let sa: HashSet<usize> = a[..5.min(k)].iter().copied().collect();
        let sb: HashSet<usize> = b[..5.min(k)].iter().copied().collect();
        sa.intersection(&sb).count()
    };
    debug!("RBO detail: top1_match={top1_match} top5_overlap={top5_overlap}/5");

    Ok((1.0 - p) * rbo)
}


================================================
FILE: cli/src/macros.rs
================================================
#[macro_export]
macro_rules! dispatch_model {
    ($model: expr, $expr: expr) => {
        (|model: &Arc<dyn Model>| {
            if let Ok(m) = Arc::downcast::<tract_hir::prelude::InferenceModel>(model.clone()) {
                return $expr(m);
            }
            if let Ok(m) = Arc::downcast::<tract_hir::prelude::TypedModel>(model.clone()) {
                return $expr(m);
            }
            #[cfg(feature = "pulse")]
            {
                if let Ok(m) = Arc::downcast::<PulsedModel>(model.clone()) {
                    return $expr(m);
                }
            }
            unreachable!()
        })($model)
    };
}

#[macro_export]
macro_rules! dispatch_model_no_pulse {
    ($model: expr, $expr: expr) => {
        (|model: &Arc<dyn Model>| {
            if let Ok(m) = Arc::downcast::<tract_hir::prelude::InferenceModel>(model.clone()) {
                return $expr(m);
            }
            if let Ok(m) = Arc::downcast::<tract_hir::prelude::TypedModel>(model.clone()) {
                return $expr(m);
            }
            bail!("Pulse model are unsupported here")
        })($model)
    };
}


================================================
FILE: cli/src/main.rs
================================================
#![allow(clippy::len_zero)]
#![allow(clippy::redundant_closure_call)]
#![allow(clippy::collapsible_if)]
#[macro_use]
extern crate log;

#[macro_use]
mod macros;
pub(crate) mod runtimes;

#[allow(unused_imports)]
use tract_itertools::Itertools;

use tract_core::internal::*;
use tract_hir::internal::*;

use nu_ansi_term::Color::*;
use tract_libcli::annotations::Annotations;
use tract_libcli::display_params::DisplayParams;
use tract_libcli::model::Model;
use tract_libcli::profile::BenchLimits;

use fs_err as fs;
use readings_probe::*;

mod bench;
mod compare;
mod cost;
mod dump;
mod hwbench;
#[cfg(feature = "transformers")]
mod llm;
mod memory_arena;
mod params;
mod plan_options;
mod run;
mod tensor;
mod utils;

use params::*;
use tract_linalg::WeightType;
use tract_linalg::block_quant::Q4_0;
use tract_linalg::mmm::MatMatMul;

readings_probe::instrumented_allocator!();

pub const QUALITY_COLORS: [nu_ansi_term::Color; 5] = [LightGreen, Green, White, Yellow, LightRed];

fn info_usage(stage: &str, probe: Option<&Probe>) {
    if let Some(mon) = probe {
        let _ = mon.log_event(stage);
    }
    if log::log_enabled!(log::Level::Info) {
        let usage = readings_probe::get_os_readings().unwrap();
        let allocated = readings_probe::alloc::ALLOCATED.load(std::sync::atomic::Ordering::Relaxed);
        let freeed = readings_probe::alloc::FREEED.load(std::sync::atomic::Ordering::Relaxed);
        info!(
            "Resource usage {}: vsz:{} rsz:{} rszmax:{} alloc:{}",
            stage,
            usage.virtual_size,
            usage.resident_size,
            usage.resident_size_max,
            allocated.saturating_sub(freeed),
        );
    }
}

pub const STAGES: &[&str] = &[
    "load",
    "analyse",
    "incorporate",
    "type",
    "declutter",
    "pulse",
    "pulse-to-type",
    "pulse-declutter",
    "set",
    "set-declutter",
    "nnef-cycle",
    "nnef-cycle-declutter",
    "tflite-cycle-predump",
    "tflite-cycle",
    "tflite-cycle-declutter",
    "before-optimize",
    "optimize",
];

/// Entrypoint for the command-line interface.
fn main() -> TractResult<()> {
    use clap::*;
    let mut app = command!()
        .allow_hyphen_values(true)
        .arg(arg!(--readings "Start readings instrumentation"))
        .arg(arg!(--"readings-heartbeat" [MS] "Heartbeat for readings background collector").default_value("5"))
        .arg(arg!(verbose: -v ... "Sets the level of verbosity.").action(clap::ArgAction::Count))
        .arg(arg!(--"keep-last" "Keep last model alive to dump if there is an error"))
        .arg(arg!([model] "Sets the model to use").required(false))
        .arg(arg!(-f --format [format]
                  "Hint the model format ('onnx', 'nnef', 'tflite' or 'tf') instead of guess from extension."))
        .arg(Arg::new("input").long("input").short('i').num_args(1).action(clap::ArgAction::Append).long_help(
                "Set input shape and type (@file.pb or @file.npz:thing.npy or 3,4,i32)."))
        .arg(Arg::new("constantize").long("constantize").num_args(1).action(clap::ArgAction::Append).long_help(
                "Transorm an input into a Constant"))

        .arg(arg!(--"assert").num_args(1).action(clap::ArgAction::Append).long_help("Adds a TDim pre-condition (prefix by optional \"scenario_name:\")"))
        .arg(arg!(--"scenario").num_args(1).action(clap::ArgAction::Append).long_help("Adds a scenario"))

        // deprecated
        .arg(arg!(--"input-bundle" [input_bundle] "Path to an input container (.npz). This sets input facts and tensor values.").hide(true))
        // deprecated
        .arg(arg!(--"allow-random-input" "Will use random generated input").hide(true))

        .arg(arg!(--"input-facts-from-bundle" [input_bundle] "Path to an input container (.npz). This only sets input facts."))

        .arg(arg!(--"onnx-test-data-set" [data_set] "Use onnx-test data-set as input (expect test_data_set_N dir with input_X.pb, etc. inside)"))
        .arg(arg!(--"onnx-ignore-output-shapes" "Ignore output shapes from model (workaround for pytorch export bug with mask axes)"))
        .arg(arg!(--"onnx-ignore-output-types" "Ignore output shapes from types (workaround for tdim conflicting with integer types)"))
        .arg(arg!(--"onnx-ignore-value-info" "Ignore value info from ONNX file while loading network"))

        .arg(arg!(--"input-node" [node] ... "Override input nodes names (auto-detects otherwise)."))
        .arg(Arg::new("output-node").long("output-node").num_args(1).action(clap::ArgAction::Append).long_help(
                "Override output nodes by name."))
        .arg(arg!(--"label-wires" "Propagate node labels to wires"))

        .arg(arg!(--"override-fact" [fact] "Override a fact."))

        .arg(arg!(--"analyse-fail-fast" "Stop analyse at first error."))
        .arg(arg!(--recursive "Apply to sub graphes"))
        .arg(arg!(--proto "Keep proto model around after parse"))
        .arg(arg!(--determinize "Enforce a seed in random operator"))
        .arg(arg!(--partial "Before analyse, eliminate dead branches"))

        .arg(arg!(--pass [STAGE] "Pass to stop preprocessing after.").value_parser(clap::builder::PossibleValuesParser::new(STAGES)))
        .arg(arg!(--"declutter-step" [STEP] "Stop decluttering process after application of patch number N"))
        .arg(arg!(--"declutter-set-step" [STEP] "Stop decluttering process (the one after --set application) at patch number N"))
        .arg(arg!(--"optimize-step" [STEP] "Stop optimizing process after application of patch number N"))
        .arg(arg!(--"extract-decluttered-sub" [SUB] "Zoom on a subgraph after decluttering by parent node name"))

        .arg(arg!(--"metal").long_help("Convert supported operators to Metal GPU equivalent. Only available on MacOS and iOS"))
        .arg(Arg::new("force-metal-backend").long("force-metal-backend").num_args(1).long_help("Force specific implementations for MM kernels. Possible values: mlx, ggml, mfa. Backend is dynamically selected if option is not present"))
        .arg(arg!(--"cuda").long_help("Convert supported operators to CUDA equivalent"))
        .arg(arg!(-r --runtime [runtime] "Run on alternative runtime (cuda, metal, ...)"))
        .arg(Arg::new("transform").short('t').long("transform").num_args(1).action(clap::ArgAction::Append).help("Apply a built-in transformation to the model"))
        .arg(Arg::new("set").long("set").num_args(1).action(clap::ArgAction::Append).long_help("Set a symbol to a concrete value after decluttering"))
        .arg(Arg::new("hint").long("hint").num_args(1).action(clap::ArgAction::Append).long_help("Provide a typical value to a symbol to be used during planning (--hint S=12)"))

        .arg(arg!(--"causal-llm-hints" "Figures out P and S and gives them suitable hints"))
        .arg(arg!(--llm "Shortcut setting --opl (aka all nnef extensions) --causal-llm-hints -t transformers_detect_all"))
        // deprecated
        .arg(arg!(--"allow-float-casts" "Allow casting between f16, f32 and f64 around model").hide(true))

        .arg(arg!(--"nnef-cycle" "Perform NNEF dump and reload before optimizing"))
        .arg(arg!(--"tflite-cycle" "Perform TFLITE dump and reload before optimizing"))

        .arg(arg!(--"no-nnef-tract-core" "Disable usage of tract-core extension in NNEF dump and load"))
        .arg(arg!(--"nnef-tract-core" "Allow usage of tract-core extension in NNEF dump and load")).hide(true)
        .arg(arg!(--"nnef-tract-resource" "Allow usage of tract-resource extension in NNEF dump and load"))
        .arg(arg!(--"nnef-tract-onnx" "Allow usage of tract-onnx extension in NNEF dump and load"))
        .arg(arg!(--"nnef-tract-pulse" "Allow usage of tract-pulse extension in NNEF dump and load"))
        .arg(arg!(--"nnef-tract-extra" "Allow usage of tract-extra extension in NNEF dump and load"))
        .arg(arg!(--"nnef-tract-transformers" "Allow usage of tract-transformers extension in NNEF dump and load"))
        .arg(arg!(--"nnef-extended-identifier" "Allow usage of the i\"...\" syntax to escape identifier names"))
        .arg(arg!(--"nnef-extern-all-constants" "Do not inline small tensors"))
        .arg(arg!(--opl "Activates all NNEF tract extensions (like --nnef-tract-*)"))


        .arg(arg!(--"threads" [THREADS] "Setup a thread pool for computing. 0 will guess the number of physical cores"))

        .arg(arg!(-O --optimize "Optimize before running"))
        .arg(arg!(--"assert-maximal-mm-quality-cost" [MAX] "Maximum value for quality category (0=assembly, 4=dreadful rust code)"))
        .arg(arg!(--pulse [PULSE] "Translate to pulse network"))

        .arg(arg!(--"machine-friendly" "Machine friendly output"))
        .arg(arg!(--"timeout" [SECONDS] "Kill the process after this many seconds"))

        .subcommand(Command::new("list-ops").about("List ops in TF/ONNX frameworks"))
        .subcommand(Command::new("list-runtimes").about("List runtimes"))
        .subcommand(Command::new("kernels").about("Print kernels for the current plaform"))
        .subcommand(Command::new("hwbench").about("Print current hardware key metrics"));

    let compare = clap::Command::new("compare")
        .long_about("Compares the output of tract and tensorflow on randomly generated input.")
        .arg(
            Arg::new("stage")
                .long("stage")
                .value_parser(clap::builder::PossibleValuesParser::new(STAGES))
                .help("Loading pipeline stage to compare with"),
        )
        .arg(
            Arg::new("tf").long("tf").action(ArgAction::SetTrue).help("Compare against tensorflow"),
        )
        .arg(
            Arg::new("twice")
                .long("twice")
                .action(ArgAction::SetTrue)
                .help("Run twice and compare"),
        )
        .arg(Arg::new("npz").long("npz").num_args(1).help("NPZ file to compare against"))
        .arg(
            Arg::new("pbdir")
                .long("pbdir")
                .num_args(1)
                .help("protobuf directory file to compare against (like ONNX tests)"),
        )
        .arg(
            Arg::new("stream")
                .long("stream")
                .action(ArgAction::SetTrue)
                .help("Compare pulsed execution against non-pulsed reference"),
        )
        .group(
            ArgGroup::new("reference")
                .args(&["npz", "pbdir", "stage", "tf", "twice", "stream"])
                .required(true),
        )
        .arg(
            Arg::new("cumulative")
                .long("cumulative")
                .action(ArgAction::SetTrue)
                .help("Do not reset with reference values at each node"),
        )
        .arg(
            Arg::new("resilient")
                .long("resilient")
                .action(ArgAction::SetTrue)
                .help("Try nodes one per one to mitigate crashes"),
        );
    let compare = run_options(compare);
    let compare = assertions_options(compare);
    app = app.subcommand(output_options(compare));

    let bench =
        clap::Command::new("bench").long_about("Benchmarks tract on randomly generated input.");
    let bench = run_options(bench);
    let bench = output_options(bench);
    let bench = bench_options(bench);
    let bench = assertions_options(bench);
    app = app.subcommand(bench);

    let criterion = clap::Command::new("criterion")
        .long_about("Benchmarks tract on randomly generated input using criterion.");
    let criterion = run_options(criterion);
    app = app.subcommand(criterion);

    app = app.subcommand(dump_subcommand());

    let run = clap::Command::new("run")
        .long_about("Run the graph")
        .arg(Arg::new("dump").long("dump").action(ArgAction::SetTrue).help("Show output"))
        .arg(
            Arg::new("save-outputs-npz")
                .long("save-outputs-npz")
                .alias("save-outputs")
                .num_args(1)
                .help("Save the outputs into a npz file"),
        )
        .arg(
            Arg::new("save-outputs-nnef")
                .long("save-outputs-nnef")
                .num_args(1)
                .help("Save the output tensor into a folder of nnef .dat files"),
        )
        .arg(
            Arg::new("steps")
                .long("steps")
                .action(ArgAction::SetTrue)
                .help("Show all inputs and outputs"),
        )
        .arg(
            Arg::new("save-steps")
                .long("save-steps")
                .num_args(1)
                .help("Save intermediary values as a npz file"),
        )
        .arg(
            Arg::new("check-f16-overflow")
                .long("check-f16-overflow")
                .action(ArgAction::SetTrue)
                .help("Check for f16 overflow in all outputs"),
        )
        .arg(
            Arg::new("assert-sane-floats")
                .long("assert-sane-floats")
                .action(ArgAction::SetTrue)
                .help("Check float for NaN and infinites at each step"),
        );
    let run = run_options(run);
    let run = output_options(run);
    let run = assertions_options(run);
    app = app.subcommand(run);

    #[cfg(feature = "transformers")]
    {
        let llm_bench =
            clap::Command::new("llm-bench").long_about("llamas.cpp-style bench (tg128 and pp512)");
        let llm_bench = assertions_options(llm_bench);
        let llm_bench = run_options(llm_bench);
        let llm_bench = bench_options(llm_bench);
        app = app.subcommand(llm_bench);
    }

    let matches = app.get_matches();

    if let Some(timeout) = matches.get_one::<String>("timeout") {
        let seconds: u64 = timeout.parse().expect("--timeout value must be an integer (seconds)");
        std::thread::spawn(move || {
            std::thread::sleep(std::time::Duration::from_secs(seconds));
            eprintln!("Timeout: process killed after {seconds}s");
            std::process::exit(124);
        });
    }

    let probe = if matches.get_flag("readings") {
        let file = fs::File::create("readings.out").unwrap();
        let mut probe = Probe::new(file).unwrap();
        probe.register_i64("progress").unwrap();
        let heartbeat =
            matches.get_one::<String>("readings-heartbeat").unwrap().parse::<f32>().unwrap();
        probe.spawn_heartbeat(std::time::Duration::from_secs_f32(heartbeat / 1000.0)).unwrap();
        Some(probe)
    } else {
        None
    };

    if ::std::env::var("TRACT_LOG").is_err() {
        let level = match matches.get_count("verbose") {
            0 => "cli=warn,tract=warn",
            1 => "cli=info,tract=info",
            2 => "cli=debug,tract=debug",
            _ => "cli=trace,tract=trace",
        };
        unsafe {
            std::env::set_var("TRACT_LOG", level);
        }
    }

    let env = env_logger::Env::default().filter_or("TRACT_LOG", "warn");

    env_logger::Builder::from_env(env).format_timestamp_nanos().init();
    info_usage("init", probe.as_ref());

    rustls::crypto::ring::default_provider()
        .install_default()
        .expect("failed to install ring provider");

    let res = handle(matches, probe.as_ref());

    if let Err(e) = res {
        error!("{e:?}");
        std::process::exit(1);
    }

    info_usage("done", probe.as_ref());
    Ok(())
}

#[allow(clippy::let_and_return)]
fn dump_subcommand() -> clap::Command {
    use clap::*;
    let dump = clap::Command::new("dump")
        .long_about("Dumps the graph in human readable form.")
        .arg(
            Arg::new("axes")
            .long("axes")
            .action(clap::ArgAction::SetTrue)
            .help("Compute and display axis tracking")
            )
        .arg(
            Arg::new("axes-names")
            .number_of_values(1)
            .action(clap::ArgAction::Append)
            .long("axes-names")
            .help("Gave meaningful names to axes: [node_name=]axis0,axis1,..,axisN (apply to first input if no node_name is provided)")
            )
        .arg(
            Arg::new("assert-cost")
            .long("assert-cost")
            .num_args(1)
            .help("Checks computed against the provided value (form: \"FMA(F32)=2060448 DIV(F32)=24576\")")
            )
        .arg(
            Arg::new("memory-arena")
            .long("memory-arena")
            .num_args(1)
            .help("Dump arena memory statistics to a JSON file (MacOS / iOS only)")
        )
        .arg(
            Arg::new("nnef-override-output-name")
            .number_of_values(1)
            .long("nnef-override-output-name")
            .help("Rename output before dumping network")
            )
        .arg(
            Arg::new("nnef-dir")
            .long("nnef-dir")
            .num_args(1)
            .help("Dump the network in NNEF format (as a directory)"),
            )
        .arg(
            Arg::new("nnef-tar")
            .long("nnef-tar")
            .num_args(1)
            .help("Dump the network in NNEF format (as a tar file)"),
            )
        .arg(
            Arg::new("nnef")
            .long("nnef")
            .num_args(1)
            .help("Dump the network in NNEF format (as a tar.gz file)"),
            )
        .arg(
            Arg::new("tflite")
            .long("tflite")
            .num_args(1)
            .help("Dump the network in TfLite format"),
            )
        .arg(
            Arg::new("compress-submodels")
            .long("compress-submodels")
            .action(clap::ArgAction::SetTrue)
            .help("Compress submodels if any (as a .tgz file)"),
        )
        .arg(
            Arg::new("nnef-deterministic")
            .long("nnef-deterministic")
            .action(clap::ArgAction::SetTrue)
            .help("If provided, will try to make output .nnef.tar files deterministic"),
            )
        .arg(
            Arg::new("nnef-graph")
            .long("nnef-graph")
            .num_args(1)
            .help("Dump the network definition (without the weights) as a graph.nnef-like file"),
            )
        .arg(
            Arg::new("inner")
            .number_of_values(1)
            .action(clap::ArgAction::Append)
            .long("inner")
            .help("Navigate to a sub-model"),
            )
        .arg(
            Arg::new("summary")
            .short('s')
            .long("summary")
            .action(clap::ArgAction::SetTrue)
            .help("Display a short summary: properties, model inputs and outputs"),
            );
    let dump = run_options(dump);
    let dump = output_options(dump);
    let dump = assertions_options(dump);
    let dump = bench_options(dump);
    dump
}

fn assertions_options(command: clap::Command) -> clap::Command {
    use clap::*;
    command
        .arg(
            Arg::new("approx")
            .value_parser(["exact", "close", "approximate", "very", "super", "ultra"])
            .default_value("close")
            .long("approx")
            .help("Approximation level used in assertions."),
            )
        .arg(
            Arg::new("approx-custom")
            .long("approx-custom")
            .num_args(1)
            .help("Approximation level used in assertions (atol, rtol, outlier ratio). 3 coma-separated floats."),
            )
        .arg(
            Arg::new("assert-output")
            .action(clap::ArgAction::Append)
            .number_of_values(1)
            .long("assert-output")
            .help("Fact to check the ouput tensor against (@filename, or 3x4xf32)"),
            )
        .arg(
            Arg::new("assert-output-bundle")
            .long("assert-output-bundle")
            .num_args(1)
            .help("Checks values against these tensor (.npz)"),
            )
        .arg(
            Arg::new("assert-output-fact")
            .long("assert-output-fact")
            .num_args(1)
            .help("Infered shape and datum type must match exactly this"),
            )
        .arg(
            Arg::new("assert-output-count")
            .long("assert-output-count")
            .num_args(1)
            .help("Check the number of outputs found."),
            )
        .arg(
            Arg::new("allow-missing-outputs")
            .long("allow-missing-outputs")
            .action(clap::ArgAction::SetTrue)
            .help("Allow missing output in checks")
            )
        .arg(
            Arg::new("assert-llm-rbo")
            .long("assert-llm-rbo")
            .num_args(1)
            .help("Use RBO (Rank-Biased Overlap) on logit output. Pass minimum similarity score (0.0-1.0)")
            )
        .arg(
            Arg::new("assert-llm-rbo-p")
            .long("assert-llm-rbo-p")
            .default_value("0.9")
            .help("RBO persistence parameter (default 0.9)")
            )
        .arg(
            Arg::new("assert-llm-rbo-depth")
            .long("assert-llm-rbo-depth")
            .default_value("100")
            .help("RBO max evaluation depth (default 100)")
            )
        .arg(
            Arg::new("assert-op-count")
            .value_parser(clap::builder::NonEmptyStringValueParser::new())
            .number_of_values(2)
            .value_names(&["operator", "count"])
            .action(clap::ArgAction::Append)
            .long("assert-op-count")
            .help("Specified operator must appear exactly the specified number of times. This argument can appear multiple times."),
            )
        .arg(
            Arg::new("assert-op-only")
            .long("assert-op-only")
            .num_args(1)
            .help("Assert all ops match the given comma-separated patterns (prefix* or exact)"),
            )
}

fn bench_options(command: clap::Command) -> clap::Command {
    use clap::*;
    command.args(&[
                 arg!(--"warmup-time" [warmup_time] "Time to run (approx.) before starting the clock."),
                 arg!(--"warmup-loops" [warmup_loops] "Number of loops to run before starting the clock."),
                 arg!(--"max-loops" [max_iters] "Sets the maximum number of iterations for each node [default: 100_000].").alias("max-iters"),
                 arg!(--"max-time" [max_time] "Sets the maximum execution time for each node (in ms) [default: 5000].") ])
}

fn run_options(command: clap::Command) -> clap::Command {
    use clap::*;
    command
        .arg(
            Arg::new("input-from-npz")
            .long("input-from-npz")
            .alias("input-from-bundle")
            .num_args(1)
            .help("Path to an input container (.npz). This sets tensor values."),
            )
        .arg(
            Arg::new("set")
                .long("set")
                .action(clap::ArgAction::Append)
                .number_of_values(1)
                .help("Set a symbol value before running the model (--set S=12)"),
        )
        .arg(
            Arg::new("input-from-nnef").long("input-from-nnef").num_args(1).help(
                "Path to a directory containing input tensors in NNEF format (.dat files). This sets tensor values.",
                ),
                )
        .arg(arg!(--pp [pp] "Random input for LLM-like prompt processing"))
        .arg(arg!(--tg [tg] "Random input for LLM-like text generation"))
        .arg(Arg::new("skip-order-opt-ram")
            .long("skip-order-opt-ram")
            .action(clap::ArgAction::SetTrue)
            .help("Plan node evaluation order without RAM optimisation"),
            )
        .arg(
            Arg::new("allow-random-input")
            .short('R')
            .long("allow-random-input")
            .action(clap::ArgAction::SetTrue)
            .help("Will use random generated input"),
            )
        .arg(
            Arg::new("random-range")
            .long("random-range")
            .num_args(1)
            .action(clap::ArgAction::Append)
            .help("Constraint random values to a given range (example: input=1.0..10.0)"),
            )
        .arg(
            Arg::new("allow-float-casts")
            .long("allow-float-casts")
            .action(clap::ArgAction::SetTrue)
            .help("Allow casting between f16, f32 and f64 around model"),
            )
        .arg(
            Arg::new("metal-gpu-trace")
                .long("metal-gpu-trace")
                .num_args(1)
                .help("Capture Metal GPU trace and save it at given path. Only available on MacOS and iOS")
        )
        .arg(
            Arg::new("cuda-gpu-trace")
                .long("cuda-gpu-trace")
                .action(clap::ArgAction::SetTrue)
                .help("Capture CUDA GPU trace. Must be used with nsys profile -c cudaProfilerApi before cargo command")
        )
        .arg(
            Arg::new("prompt-chunk-size")
                .long("prompt-chunk-size")
                .number_of_values(1)
                .help("Set prompt chunk size. Help splitting too big prompts")
        )
}

fn output_options(command: clap::Command) -> clap::Command {
    use clap::*;
    command
        .args(&[
            arg!(--"natural-order" "dump nodes in id order instead of evaluation order"),
            arg!(--"opt-ram-order" "dump nodes in RAM optimising order"),
            arg!(-q --quiet "don't dump"),
        ])
        .arg(
            Arg::new("debug-op")
                .long("debug-op")
                .action(ArgAction::SetTrue)
                .help("show debug dump for each op"),
        )
        .arg(Arg::new("node-id").long("node-id").num_args(1).help("Select a node to dump"))
        .arg(Arg::new("successors").long("successors").num_args(1).help("Show successors of node"))
        .arg(Arg::new("op-name").long("op-name").num_args(1).help("Select one op to dump"))
        .arg(Arg::new("node-name").long("node-name").num_args(1).help("Select one node to dump"))
        .arg(
            Arg::new("const")
                .long("const")
                .action(ArgAction::SetTrue)
                .help("also display consts nodes"),
        )
        .arg(
            Arg::new("info")
                .long("info")
                .action(ArgAction::SetTrue)
                .help("show op inner information"),
        )
        .arg(
            Arg::new("io-long")
                .long("io-long")
                .action(ArgAction::SetTrue)
                .help("show full i/o information"),
        )
        .arg(
            Arg::new("io-none")
                .long("io-none")
                .action(ArgAction::SetTrue)
                .help("hide i/o information"),
        )
        .arg(
            Arg::new("json")
                .long("json")
                .action(ArgAction::SetTrue)
                .help("dump performance info as json"),
        )
        .arg(
            Arg::new("audit-json")
                .long("audit-json")
                .action(ArgAction::SetTrue)
                .help("dump full model graph as JSON for machine consumption"),
        )
        .arg(
            Arg::new("mm")
                .long("mm")
                .action(ArgAction::SetTrue)
                .help("display Matrix Multiplication report"),
        )
        .arg(
            Arg::new("outlet-labels")
                .long("outlet-labels")
                .action(ArgAction::SetTrue)
                .help("display outlet labels"),
        )
        .arg(
            Arg::new("cost")
                .long("cost")
                .action(ArgAction::SetTrue)
                .help("Include const information"),
        )
        .arg(
            Arg::new("tmp_mem_usage")
                .long("tmp-mem-usage")
                .action(ArgAction::SetTrue)
                .help("Include temporary memory usage information"),
        )
        .arg(
            Arg::new("profile")
                .long("profile")
                .action(ArgAction::SetTrue)
                .help("Include results for profile run"),
        )
        .arg(
            Arg::new("folded")
                .long("folded")
                .action(ArgAction::SetTrue)
                .help("Don't display submodel informations"),
        )
        .arg(
            Arg::new("invariants")
                .long("invariants")
                .action(ArgAction::SetTrue)
                .help("Display operators invariants"),
        )
}

/// Handles the command-line input.
fn handle(matches: clap::ArgMatches, probe: Option<&Probe>) -> TractResult<()> {
    match matches.subcommand() {
        Some(("list-runtimes", _)) => {
            tract_core::runtime::runtimes().for_each(|ir| {
                println!(" * {}", ir.name());
            });
            return Ok(());
        }
        Some(("list-ops", _)) => {
            #[cfg(feature = "onnx")]
            {
                let onnx = tract_onnx::onnx();
                let names = onnx.op_register.0.keys().sorted().join(", ");
                println!("Onnx:\n");
                println!("{names}");
                println!("\n");
            }
            #[cfg(feature = "tf")]
            {
                let tf = tract_tensorflow::tensorflow();
                let names = tf.op_register.0.keys().sorted().join(", ");
                println!("Tensorflow:\n");
                println!("{names}");
                println!("\n");
            }
            return Ok(());
        }
        Some(("hwbench", _)) => return hwbench::handle(),
        Some(("kernels", _)) => {
            println!();
            fn colored_name(m: &dyn MatMatMul) -> String {
                format!(
                    "{} {}",
                    QUALITY_COLORS[m.quality().cost()].paint(m.name()),
                    match m.dynamic_boost().signum() {
                        1 => Green.paint("●"),
                        -1 => Red.paint("●"),
                        _ => "-".to_string().into(),
                    }
                )
            }
            println!("{}", White.bold().paint("# By implementation"));
            println!();
            for m in tract_linalg::ops().mmm_impls() {
                println!("{} -> {:?}", colored_name(&**m), m.stores());
                for packings in m.packings() {
                    println!("   - {:?} • {:?}", packings.0, packings.1);
                }
            }
            println!();
            println!("{}", White.bold().paint("# By weights"));
            println!();
            for w in [
                WeightType::Plain(f16::datum_type()),
                WeightType::Plain(f32::datum_type()),
                WeightType::Plain(f64::datum_type()),
                WeightType::Plain(i8::datum_type()),
                WeightType::from(Q4_0),
            ] {
                println!("{}", White.bold().paint(format!("{w:?}")));
                for packing in tract_linalg::ops()
                    .all_possible_packing(w)
                    .sorted_by_key(|f| format!("{f:?}"))
                    .dedup()
                {
                    println!("  * {packing:?}");
                    for mmm in tract_linalg::ops().mmm_impls() {
                        for (ix, p) in mmm.packings().iter().enumerate() {
                            if p.0.dyn_eq(packing) {
                                println!(
                                    "    - {} ({ix}) {:?} {:?}",
                                    colored_name(&**mmm),
                                    p.0,
                                    p.1
                                );
                            } else if let Some(pe) = tract_linalg::ops()
                                .panel_extractors()
                                .iter()
                                .find(|pe| pe.from.dyn_eq(packing) && p.0.dyn_eq(&pe.to))
                            {
                                println!(
                                    "    - {} ({ix}) {:?} {:?} using {}",
                                    colored_name(&**mmm),
                                    p.0,
                                    p.1,
                                    pe.name
                                );
                            }
                        }
                    }
                }
            }
            return Ok(());
        }
        Some(("dump", m)) if m.contains_id("metal-gpu-trace") => {
            // Set env variable before loading metal lib
            unsafe {
                std::env::set_var("METAL_CAPTURE_ENABLED", "1");
                std::env::set_var("METAL_DEVICE_WRAPPER_TYPE", "1");
            }
        }
        _ => (),
    }

    let builder_result = Parameters::from_clap(&matches, probe);
    #[allow(unused_mut)]
    let mut params = match builder_result {
        Ok(params) => params,
        Err(e) => {
            if let Some(params::ModelBuildingError(broken_model, _)) = e.downcast_ref() {
                let mut broken_model: Box<dyn Model> =
                    tract_hir::tract_core::dyn_clone::clone(broken_model);
                let annotations = Annotations::from_model(broken_model.as_ref())?;
                let display_params = if let Some(("dump", sm)) = matches.subcommand() {
                    display_params_from_clap(&matches, sm)?
                } else {
                    DisplayParams::default()
                };

                if broken_model.output_outlets().len() == 0 {
                    broken_model.auto_outputs()?;
                }
                tract_libcli::terminal::render(
                    broken_model.as_ref(),
                    &annotations,
                    &display_params,
                )?;
            }
            Err(e)?
        }
    };

    let mut need_optimisations = false;

    #[cfg(feature = "multithread-mm")]
    if let Some(threads) = matches.get_one::<String>("threads") {
        let threads: usize = threads.parse()?;
        let threads = if threads == 0 { num_cpus::get_physical() } else { threads };
        multithread::set_default_executor(multithread::Executor::multithread(threads));
    }
    #[cfg(not(feature = "multithread-mm"))]
    if matches.get_one::<String>("threads").is_some() {
        bail!("tract is compiled without multithread support")
    }

    match matches.subcommand() {
        Some(("bench", m)) => {
            need_optimisations = true;
            bench::handle(&params, m, &params::bench_limits_from_clap(m)?)
        }

        Some(("criterion", m)) => {
            need_optimisations = true;
            bench::criterion(&params, &matches, m)
        }

        Some(("compare", m)) => {
            compare::handle(&mut params, &matches, m, display_params_from_clap(&matches, m)?)
        }

        Some(("run", m)) => run::handle(&params, &matches, m),

        None => dump::handle(
            &params,
            &DisplayParams::default(),
            &matches,
            &dump_subcommand().get_matches_from::<_, &'static str>([]),
            &BenchLimits::default(),
            vec![],
        ),

        Some(("dump", m)) => {
            need_optimisations = m.get_flag("profile");
            let inner = m
                .get_many::<String>("inner")
                .map(|ss| ss.map(|s| s.to_string()).collect())
                .unwrap_or_default();
            dump::handle(
                &params,
                &display_params_from_clap(&matches, m)?,
                &matches,
                m,
                &params::bench_limits_from_clap(m)?,
                inner,
            )
        }

        #[cfg(feature = "transformers")]
        Some(("llm-bench", m)) => {
            need_optimisations = true;
            llm::handle(&params, &matches, m, &params::bench_limits_from_clap(m)?, probe)
        }

        Some((s, _)) => bail!("Unknown subcommand {}.", s),
    }?;

    if need_optimisations {
        let style = nu_ansi_term::Style::new().fg(nu_ansi_term::Color::Red).bold();
        if cfg!(debug_assertions) {
            warn!("{}", style.paint("Profiling a debug build of tract!"));
        }
        if !matches.get_flag("cuda")
            && !matches.get_flag("metal")
            && !matches.get_flag("optimize")
            && !matches.contains_id("runtime")
        {
            warn!("{}", style.paint("Profiling a non-optimized model. Use -O or a runtime."));
        }
    }
    Ok(())
}

fn nnef(matches: &clap::ArgMatches) -> tract_nnef::internal::Nnef {
    let mut fw = tract_nnef::nnef();
    if matches.get_flag("nnef-tract-onnx") || matches.get_flag("opl") {
        #[cfg(feature = "onnx")]
        {
            use tract_onnx::WithOnnx;
            fw = fw.with_onnx();
        }
        #[cfg(not(feature = "onnx"))]
        {
            panic!("tract is build without ONNX support")
        }
    }
    if matches.get_flag("nnef-tract-pulse") || matches.get_flag("opl") {
        #[cfg(feature = "pulse-opl")]
        {
            use tract_pulse::WithPulse;
            fw = fw.with_pulse();
        }
        #[cfg(not(feature = "pulse-opl"))]
        {
            panic!("tract is build without pulse-opl support")
        }
    }
    if matches.get_flag("nnef-tract-extra") || matches.get_flag("opl") {
        #[cfg(feature = "extra")]
        {
            use tract_extra::WithTractExtra;
            fw = fw.with_tract_extra();
        }
        #[cfg(not(feature = "extra"))]
        {
            panic!("tract is build without tract-extra support")
        }
    }
    if matches.get_flag("nnef-tract-transformers")
        || matches.get_flag("llm")
        || matches.get_flag("opl")
    {
        #[cfg(feature = "transformers")]
        {
            use tract_transformers::WithTractTransformers;
            fw = fw.with_tract_transformers();
        }
        #[cfg(not(feature = "transformers"))]
        {
            panic!("tract is build without tract-transformers support")
        }
    }
    if !matches.get_flag("no-nnef-tract-core") {
        fw = fw.with_tract_core();
    }
    if matches.get_flag("nnef-tract-resource") || matches.get_flag("opl") {
        use tract_nnef_resources::internal::JsonLoader;
        fw = fw.with_tract_resource().with_resource_loader(JsonLoader);
    }
    if matches.get_flag("nnef-extended-identifier") || matches.get_flag("opl") {
        fw.allow_extended_identifier_syntax(true);
    }
    if matches.get_flag("nnef-extern-all-constants") {
        fw.extern_all_constants(true);
    }
    fw
}


================================================
FILE: cli/src/memory_arena.rs
================================================
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use tract_gpu::memory::DeviceMemSchema;
use tract_hir::internal::*;

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct MemArenaUsage {
    arena_memory_size: i64,
    peak_memory_size: i64,
    peak_memory_usage: f32,
}

impl MemArenaUsage {
    pub fn eval_from_schema(
        schema: &DeviceMemSchema,
        symbol_values: &SymbolValues,
    ) -> TractResult<Self> {
        Ok(Self {
            arena_memory_size: schema.eval_memory_size(symbol_values)?,
            peak_memory_size: schema.eval_peak_memory_size(symbol_values)?,
            peak_memory_usage: schema.eval_usage(symbol_values)?,
        })
    }
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct MemArenaMetrics {
    memory_size: String,
    size_by_partition: Vec<String>,
    pp: BTreeMap<i64, MemArenaUsage>,
    tg: BTreeMap<i64, MemArenaUsage>,
    max_memory_size: i64,
    aggregate_usage: f32,
}

impl MemArenaMetrics {
    pub fn from_schema(schema: &DeviceMemSchema) -> TractResult<Self> {
        log::info!("Analyzing memory arena utilization...");
        const MAX_GEN_TOKENS: i64 = 2048;
        const MAX_PROMPT_TOKENS: i64 = 2048;

        const STEP_TOKENS: i64 = 16;
        let memory_size: String = schema.memory_size().to_string();
        let size_by_partition: Vec<String> =
            schema.size_by_partition().iter().map(|it| it.to_string()).collect();
        let symbol_scope = SymbolScope::default();
        let sequence_length = symbol_scope.sym("S");
        let past_sequence_length = symbol_scope.sym("P");

        let mut pp = BTreeMap::new();
        let mut max_memory_size: i64 = 0;
        let mut sum_size: i64 = 0;
        let mut sum_used: i64 = 0;
        for s in (STEP_TOKENS..MAX_PROMPT_TOKENS + 1).step_by(STEP_TOKENS as usize) {
            log::info!("Prompt processing: P: 0, S: {s}");
            let symbol_values =
                SymbolValues::default().with(&sequence_length, s).with(&past_sequence_length, 0);
            let usage = MemArenaUsage::eval_from_schema(schema, &symbol_values)?;
            max_memory_size = max_memory_size.max(usage.arena_memory_size);
            sum_size += usage.arena_memory_size;
            sum_used += usage.peak_memory_size;
            pp.insert(s, usage);
        }
        let mut tg = BTreeMap::new();
        for p in (0..MAX_GEN_TOKENS + 1).step_by(STEP_TOKENS as usize) {
            log::info!("Token generation: P: {p}, S: 1");
            let symbol_values =
                SymbolValues::default().with(&sequence_length, 1).with(&past_sequence_length, p);
            let usage = MemArenaUsage::eval_from_schema(schema, &symbol_values)?;
            max_memory_size = max_memory_size.max(usage.arena_memory_size);
            sum_size += usage.arena_memory_size;
            sum_used += usage.peak_memory_size;
            tg.insert(p, usage);
        }

        let aggregate_usage = ((sum_used * 100 / sum_size.max(1)) as f32) / 100.0;
        Ok(Self { memory_size, size_by_partition, pp, tg, max_memory_size, aggregate_usage })
    }
}

pub fn dump_metrics(
    model: &Arc<TypedModel>,
    options: &RunOptions,
    path: impl AsRef<std::path::Path>,
) -> TractResult<()> {
    log::info!("Analyzing Metal memory schema utilization...");
    const SCHEMA_HINT_S: i64 = 1024;
    const SCHEMA_HINT_P: i64 = 0;

    let plan = SimplePlan::new_with_options(model.clone(), options)?;
    let order = plan.order_without_consts();
    let mut symbol_values = SymbolValues::default();
    let sequence_length = model.symbols.get("S").context("Could not find symbol S in model")?;
    let past_sequence_length =
        model.symbols.get("P").context("Could not find symbol P in model")?;

    symbol_values.set(&sequence_length, SCHEMA_HINT_S);
    symbol_values.set(&past_sequence_length, SCHEMA_HINT_P);

    let schema = DeviceMemSchema::build(model, order, &symbol_values)?;

    println!("resolved_memory_size: {}", schema.eval_memory_size(&symbol_values)?);
    println!("Schema:\n{schema}");

    let metrics = MemArenaMetrics::from_schema(&schema)?;

    std::fs::write(path.as_ref(), serde_json::to_string(&metrics)?).expect("Unable to write file");

    Ok(())
}


================================================
FILE: cli/src/model.rs
================================================


================================================
FILE: cli/src/params.rs
================================================
use fs_err as fs;
use reqwest::Url;
use scan_fmt::scan_fmt;
use std::io::Cursor;
use std::io::Read;
use std::path::PathBuf;
use std::str::FromStr;
use tract_core::internal::*;
use tract_core::model::TypedModel;
use tract_core::ops::konst::Const;
#[allow(unused_imports)]
use tract_core::transform::ModelTransform;
use tract_hir::internal::*;
#[allow(unused_imports)]
use tract_itertools::Itertools;
use tract_libcli::profile::BenchLimits;
use tract_nnef::tensors::read_tensor;
#[cfg(feature = "pulse")]
use tract_pulse::internal::*;
#[cfg(feature = "tf")]
use tract_tensorflow::tfpb::tensorflow::GraphDef;
#[cfg(feature = "tflite")]
use tract_tflite::internal::TfliteProtoModel;

use tract_nnef::ast::dump::Dumper;

use crate::TractResult;
use tract_libcli::display_params;
use tract_libcli::display_params::DisplayParams;
use tract_libcli::model::Model;
use tract_libcli::tensor;
use tract_libcli::tensor::{TensorValues, TensorsValues};

use readings_probe::*;

use super::info_usage;

use std::convert::*;

#[derive(Debug)]
enum Location {
    Fs(PathBuf),
    Http(Url),
}

impl Location {
    fn path(&self) -> Cow<'_, std::path::Path> {
        match self {
            Location::Fs(p) => p.into(),
            Location::Http(u) => std::path::Path::new(u.path()).into(),
        }
    }

    fn is_dir(&self) -> bool {
        if let &Location::Fs(p) = &self { p.is_dir() } else { false }
    }

    fn read(&self) -> TractResult<Box<dyn Read>> {
        match self {
            Location::Fs(p) => Ok(Box::new(fs::File::open(p)?)),
            Location::Http(u) => Ok(Box::new(http_client()?.get(u.clone()).send()?)),
        }
    }

    fn bytes(&self) -> TractResult<Vec<u8>> {
        let mut vec = vec![];
        self.read()?.read_to_end(&mut vec)?;
        Ok(vec)
    }

    fn find(s: impl AsRef<str>) -> TractResult<Self> {
        let s = s.as_ref();
        let path = std::path::PathBuf::from(s);
        if s.starts_with("http://") || s.starts_with("https://") {
            return Ok(Location::Http(s.parse()?));
        } else if path.exists() {
            return Ok(Location::Fs(path));
        } else if path.is_relative()
            && cfg!(any(
                target_os = "ios",
                target_os = "watchos",
                target_os = "tvos",
                target_os = "android"
            ))
        {
            if let Ok(pwd) = std::env::current_exe() {
                let absolute = pwd.parent().unwrap().join(&path);
                if absolute.exists() {
                    return Ok(Location::Fs(absolute));
                }
            }
        }
        bail!("File not found {}", s)
    }
}

#[derive(Debug, Clone)]
#[allow(clippy::large_enum_variant, dead_code)]
pub enum SomeGraphDef {
    NoGraphDef,
    Nnef(tract_nnef::ProtoModel),
    #[cfg(feature = "onnx")]
    Onnx(tract_onnx::pb::ModelProto, tract_onnx::model::ParseResult),
    #[cfg(feature = "tf")]
    Tf(GraphDef),
    #[cfg(feature = "tflite")]
    Tflite(TfliteProtoModel),
}

#[derive(Debug)]
pub struct ModelBuildingError(pub Box<dyn Model>, pub Box<dyn std::error::Error + Send + Sync>);

impl std::fmt::Display for ModelBuildingError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "ModelBuildingError")
    }
}

impl std::error::Error for ModelBuildingError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        Some(&*self.1)
    }
}

#[cfg(not(feature = "pulse"))]
type PulsedModel = ();

/// Structure holding the parsed parameters.
#[derive(Clone, Debug)]
pub struct Parameters {
    pub graph: SomeGraphDef,

    pub runnable: Option<Arc<dyn Runnable>>,
    pub tract_model: Arc<dyn Model>,
    pub reference_model: Option<Arc<dyn Model>>,

    #[cfg(feature = "conform")]
    pub tf_model: Option<tract_tensorflow::conform::tf::Tensorflow>,

    #[cfg(not(feature = "conform"))]
    #[allow(dead_code)]
    pub tf_model: (),

    pub tensors_values: TensorsValues,
    pub assertions: Assertions,

    pub machine_friendly: bool,
    pub allow_random_input: bool,
    pub allow_float_casts: bool,
}

#[cfg(feature = "tf")]
type TfExt = tract_tensorflow::model::TfModelExtensions;
#[cfg(not(feature = "tf"))]
type TfExt = ();

impl Parameters {
    fn disco_model(matches: &clap::ArgMatches) -> TractResult<(Location, bool)> {
        let model = matches.get_one::<String>("model").with_context(|| {
            format!(
                "Model argument required for subcommand {}",
                matches.subcommand_name().unwrap_or("")
            )
        })?;
        let location = Location::find(model)?;
        if location.is_dir() && location.path().join("model.onnx").exists() {
            Ok((Location::Fs(location.path().join("model.onnx")), false))
        } else {
            Ok((location, false))
        }
    }

    fn load_model(
        matches: &clap::ArgMatches,
        probe: Option<&Probe>,
        location: &Location,
        tensors_values: &TensorsValues,
        symbols: SymbolScope,
    ) -> TractResult<(SomeGraphDef, Box<dyn Model>, Option<TfExt>)> {
        let need_graph =
            matches.get_flag("proto") || matches.subcommand_name() == Some("compare-pbdir");

        let format = matches.get_one::<String>("format").map(String::as_str).unwrap_or(
            if location.path().extension().map(|s| s == "onnx").unwrap_or(false) {
                "onnx"
            } else if location.path().extension().map(|s| s == "tflite").unwrap_or(false) {
                "tflite"
            } else if location.is_dir()
                || location.path().to_string_lossy().ends_with(".tar")
                || location.path().to_string_lossy().ends_with(".tar.gz")
                || location.path().extension().map(|s| s == "tgz").unwrap_or(false)
            {
                "nnef"
            } else {
                "tf"
            },
        );
        let triplet: (SomeGraphDef, Box<dyn Model>, Option<TfExt>) = match format {
            "nnef" => {
                let nnef = super::nnef(matches);
                let mut proto_model = if location.is_dir() {
                    if let Location::Fs(dir) = location {
                        nnef.proto_model_for_path(dir)?
                    } else {
                        unreachable!();
                    }
                } else if location
                    .path()
                    .extension()
                    .map(|e| e.to_string_lossy().ends_with("gz"))
                    .unwrap_or(false)
                {
                    nnef.proto_model_for_read(&mut flate2::read::GzDecoder::new(
                        &mut *location.read()?,
                    ))?
                } else {
                    nnef.proto_model_for_read(&mut *location.read()?)?
                };
                for (ix, name) in proto_model.doc.graph_def.parameters.iter().enumerate() {
                    #[allow(unused_imports)]
                    use tract_nnef::ast::{LValue, RValue};
                    if let Some(over) = tensors_values
                        .input_by_name(&name.0)
                        .or_else(|| tensors_values.by_input_ix(ix))
                        .and_then(|tv| tv.fact.as_ref())
                    {
                        let assignment_id = proto_model
                            .doc
                            .graph_def
                            .body
                            .iter()
                            .position(|a| a.left == LValue::Identifier(name.clone()))
                            .context("Could not find input assignement in nnef body")?;
                        let mut formatted = vec![];
                        let ass = &mut proto_model.doc.graph_def.body[assignment_id];
                        let inv = if let RValue::Invocation(inv) = &mut ass.right {
                            inv
                        } else {
                            unreachable!();
                        };
                        assert!(
                            inv.id.0 == "external" || inv.id.0 == "tract_core_external",
                            "invalid id: expected 'external' or 'tract_core_external' but found {:?}",
                            inv.id
                        );
                        assert!(
                            inv.arguments.len() <= 2,
                            "expected 1 argument but found {:?} for inv.arguments={:?}",
                            inv.arguments.len(),
                            inv.arguments
                        );
                        assert_eq!(inv.arguments[0].id.as_ref().map(|i| &*i.0), Some("shape"));
                        Dumper::new(&nnef, &mut formatted).rvalue(&inv.arguments[0].rvalue)?;
                        let shape = over
                            .shape
                            .concretize()
                            .context("Can only use concrete shapes in override")?;
                        info!(
                            "Overriding model input shape named \"{}\". Replacing {} by {:?}.",
                            name.0,
                            String::from_utf8_lossy(&formatted),
                            &shape
                        );
                        inv.arguments[0].rvalue = tract_nnef::ser::tdims(&shape);
                    }
                }
                info_usage("proto model loaded", probe);
                let template = TypedModel { symbols, ..TypedModel::default() };
                let graph_def = if need_graph {
                    SomeGraphDef::Nnef(proto_model.clone())
                } else {
                    SomeGraphDef::NoGraphDef
                };
                (
                    graph_def,
                    Box::new(
                        nnef.translate(&proto_model, template)
                            .map_err(|(g, e)| ModelBuildingError(Box::new(g), e.into()))?,
                    ),
                    Option::<TfExt>::None,
                )
            }
            #[cfg(feature = "tflite")]
            "tflite" => {
                let tflite = tract_tflite::tflite();
                info_usage("loaded framework (tflite)", probe);
                let proto = tflite.proto_model_for_read(&mut *location.read()?)?;
                info_usage("proto model loaded", probe);
                let template = TypedModel { symbols, ..TypedModel::default() };
                let model = tflite.model_for_proto_model_with_model_template(&proto, template)?;
                info_usage("proto model translated", probe);
                (SomeGraphDef::Tflite(proto), Box::new(model), Option::<TfExt>::None)
            }
            #[cfg(feature = "onnx")]
            "onnx" => {
                let onnx = tract_onnx::onnx()
                    .with_ignore_output_shapes(matches.get_flag("onnx-ignore-output-shapes"))
                    .with_ignore_output_types(matches.get_flag("onnx-ignore-output-types"))
                    .with_ignore_value_info(matches.get_flag("onnx-ignore-value-info"));
                info_usage("loaded framework (onnx)", probe);
                let graph = onnx.proto_model_for_read(&mut *location.read()?)?;
                info_usage("proto model loaded", probe);
                let path = &location.path().clone();
                let template = InferenceModel { symbols, ..InferenceModel::default() };
                let mut parsed = onnx.parse_with_template(
                    &graph,
                    path.parent().and_then(|it| it.to_str()),
                    template,
                )?;

                if matches.get_flag("determinize") {
                    tract_onnx::Onnx::determinize(&mut parsed.model)?;
                }

                if need_graph {
                    (
                        SomeGraphDef::Onnx(graph, parsed.clone()),
                        Box::new(parsed.model),
                        Option::<TfExt>::None,
                    )
                } else {
                    (SomeGraphDef::NoGraphDef, Box::new(parsed.model), Option::<TfExt>::None)
                }
            }
            #[cfg(feature = "tf")]
            "tf" => {
                let tf = tract_tensorflow::tensorflow();
                info_usage("loaded framework (tf)", probe);
                let mut graph = tf.proto_model_for_read(&mut *location.read()?)?;
                info_usage("proto model loaded", probe);
                if matches.get_flag("determinize") {
                    tract_tensorflow::Tensorflow::determinize(&mut graph)?;
                }
                let template = InferenceModel { symbols, ..InferenceModel::default() };
                let model_and_ext = tf.parse_graph_with_template(&graph, template)?;
                if need_graph {
                    (SomeGraphDef::Tf(graph), Box::new(model_and_ext.0), Some(model_and_ext.1))
                } else {
                    (SomeGraphDef::NoGraphDef, Box::new(model_and_ext.0), Some(model_and_ext.1))
                }
            }
            _ => bail!(
                "Format {} not supported. You may need to recompile tract with the right features.",
                format
            ),
        };
        Ok(triplet)
    }

    fn use_onnx_test_case_data_set(
        symbol_table: &SymbolScope,
        inputs_dir: &std::path::Path,
    ) -> TractResult<Vec<TensorValues>> {
        let mut result = vec![];
        for file in inputs_dir.read_dir()? {
            let file = file?;
            let filename = file
                .file_name()
                .into_string()
                .map_err(|s| format_err!("Can't convert OSString to String ({:?})", s))?;
            let is_input = filename.starts_with("input_");
            let is_output = filename.starts_with("output_");
            if is_input || is_output {
                let ix = filename
                    .split('_')
                    .nth(1)
                    .unwrap()
                    .split('.')
                    .next()
                    .unwrap()
                    .parse::<usize>()?;
                let fd = fs::File::open(file.path())?;
                let (name, tensor) =
                    tensor::for_data(symbol_table, file.path().to_str().unwrap(), fd)?;
                result.push(TensorValues {
                    input_index: Some(ix).filter(|_| is_input),
                    output_index: Some(ix).filter(|_| is_output),
                    name,
                    values: tensor.value.concretize().map(|t| vec![t.into_tensor().into()]),
                    fact: Some(tensor.without_value()),
                    random_range: None,
                    only_input: is_input,
                    only_output: is_output,
                })
            }
        }
        Ok(result)
    }

    fn tensor_values_from_iter(
        iter: impl Iterator<Item = (String, usize, Tensor)>,
        get_values: bool,
        get_facts: bool,
    ) -> Vec<TensorValues> {
        let mut result = vec![];
        for (name, vals) in iter.chunk_by(|triple| triple.0.clone()).into_iter() {
            let vals: Vec<_> = vals
                .into_iter()
                .sorted_by_key(|(_, turn, _)| *turn)
                .map(|(_, _, tensor)| tensor.into_tvalue())
                .collect();
            result.push(TensorValues {
                name: Some(name),
                fact: if get_facts {
                    Some(vals[0].datum_type().fact(vals[0].shape()).into())
                } else {
                    None
                },
                values: if get_values { Some(vals) } else { None },
                ..TensorValues::default()
            })
        }
        result
    }

    pub fn parse_nnef_tensors(
        input: &str,
        get_values: bool,
        get_facts: bool,
    ) -> TractResult<Vec<TensorValues>> {
        let files = fs::read_dir(input)?;
        let vector = files
            .map(|n| {
                let file_path = n?.path();
                let tensor_file = fs::read(&file_path)?;
                let file_name = file_path.as_os_str().to_str().unwrap();
                if let Ok((turn, name)) =
                    scan_fmt::scan_fmt!(file_name, "turn_{d}/{}.dat", usize, String)
                {
                    Ok((name, turn, read_tensor(tensor_file.as_slice())?))
                } else {
                    let name = file_path.file_stem().unwrap().to_os_string().into_string().unwrap();
                    Ok((name, 0, read_tensor(tensor_file.as_slice())?))
                }
            })
            .collect::<TractResult<Vec<_>>>()?;
        Ok(Self::tensor_values_from_iter(vector.into_iter(), get_values, get_facts))
    }

    pub fn parse_set_and_hint(
        typed_model: &TypedModel,
        set: impl Iterator<Item = impl AsRef<str>>,
    ) -> TractResult<SymbolValues> {
        let mut values = SymbolValues::default();
        for set in set {
            let set = set.as_ref();
            let (key, value) = set.split_once('=').with_context(|| {
                format!("--set and --hint must be in the X=value form, got {set}")
            })?;
            let value: i64 = value
                .parse()
                .with_context(|| format!("value expected to be an integer, got {value}"))?;
            let key = typed_model.get_or_intern_symbol(key);
            values.set(&key, value);
        }
        Ok(values)
    }

    pub fn parse_npz(
        input: &str,
        get_values: bool,
        get_facts: bool,
    ) -> TractResult<Vec<TensorValues>> {
        let loc = Location::find(input)?;
        let mut npz = ndarray_npy::NpzReader::new(Cursor::new(loc.bytes()?))?;
        let triples = npz
            .names()?
            .iter()
            .map(|n| {
                if let Ok((turn, name)) = scan_fmt::scan_fmt!(n, "turn_{d}/{}.npy", usize, String) {
                    Ok((name, turn, tensor::for_npz(&mut npz, n)?))
                } else {
                    let name = n.trim_end_matches(".npy").to_string();
                    Ok((name, 0, tensor::for_npz(&mut npz, n)?))
                }
            })
            .collect::<TractResult<Vec<_>>>()?;
        Ok(Self::tensor_values_from_iter(triples.into_iter(), get_values, get_facts))
    }

    fn parse_tensors(
        matches: &clap::ArgMatches,
        location: &Location,
        onnx_tc: bool,
        symbol_table: &SymbolScope,
    ) -> TractResult<TensorsValues> {
        let mut result = TensorsValues::default();

        if let Some(inputs) = matches.get_many::<String>("input") {
            for (ix, v) in inputs.enumerate() {
                let v = v.as_str();
                let (name, fact) = tensor::for_string(symbol_table, v)?;
                let input_index = if name.is_some() { None } else { Some(ix) };
                result.add(TensorValues {
                    input_index,
                    name,
                    values: fact.value.concretize().map(|t| vec![t.into_tensor().into()]),
                    fact: Some(fact.without_value()),
                    only_input: true,
                    ..TensorValues::default()
                });
            }
        }

        if let Some(bundle) = matches.get_many::<String>("input-bundle") {
            warn!(
                "Argument --input-bundle is deprecated and may be removed in a future release. Use --input-facts-from-bundle and/or --input-from-bundle instead."
            );
            for input in bundle {
                let input = input.as_str();
                for tv in Self::parse_npz(input, true, true)? {
                    result.add(tv);
                }
            }
        }

        if let Some(bundle) = matches.get_many::<String>("input-facts-from-bundle") {
            for input in bundle {
                let input = input.as_str();
                for tv in Self::parse_npz(input, false, true)? {
                    result.add(tv);
                }
            }
        }

        if let Some((_, sub)) = matches.subcommand() {
            if let Some(values) = sub.get_many::<String>("assert-output") {
                for (ix, o) in values.enumerate() {
                    let o = o.as_str();
                    let (name, fact) = tensor::for_string(symbol_table, o)?;
                    info!(
                        "Output assertion #{}: (named: {}) {:?}",
                        ix,
                        name.as_deref().unwrap_or(""),
                        fact
                    );
                    result.add(TensorValues {
                        output_index: Some(ix),
                        name,
                        values: fact.value.concretize().map(|t| vec![t.into_tensor().into()]),
                        fact: Some(fact.without_value()),
                        only_output: true,
                        ..TensorValues::default()
                    });
                }
            }

            if let Some(bundles) = sub.get_many::<String>("assert-output-bundle") {
                for bundle in bundles {
                    let bundle = bundle.as_str();
                    for mut tv in Self::parse_npz(bundle, true, false)? {
                        tv.only_output = true;
                        result.add(tv);
                    }
                }
            }
        }

        if onnx_tc {
            let data_set_name = matches
                .get_one::<String>("onnx-test-data-set")
                .map(String::as_str)
                .unwrap_or("test_data_set_0");

            for tv in Self::use_onnx_test_case_data_set(
                symbol_table,
                location.path().parent().unwrap().join(data_set_name).as_path(),
            )? {
                result.add(tv)
            }
        }

        if let Some((_, sub)) = matches.subcommand() {
            if let Some(ranges) = sub.get_many::<String>("random-range") {
                for (ix, spec) in ranges.enumerate() {
                    let spec = spec.as_str();
                    let (name, from, to) = if let Ok((name, from, to)) =
                        scan_fmt!(spec, "{}={f}..{f}", String, f32, f32)
                    {
                        (Some(name), from, to)
                    } else if let Ok((from, to)) = scan_fmt!(spec, "{f}..{f}", f32, f32) {
                        (None, from, to)
                    } else {
                        bail!("Can't parse random-range parameter {}", spec)
                    };
                    let tv = if let Some(name) = name {
                        result.by_name_mut_with_default(&name)
                    } else {
                        result.by_input_ix_mut_with_default(ix)
                    };
                    tv.random_range = Some(from..to);
                }
            }
        }

        Ok(result)
    }

    #[allow(unused_variables)]
    #[allow(clippy::type_complexity)]
    fn load_and_declutter(
        matches: &clap::ArgMatches,
        probe: Option<&readings_probe::Probe>,
        raw_model: Box<dyn Model>,
        tf_model_extensions: Option<TfExt>,
        reference_stage: Option<&str>,
        keep_last: bool,
    ) -> TractResult<(Arc<dyn Model>, Option<Arc<dyn Model>>)> {
        let stop_at = matches
            .get_one::<String>("pass")
            .map(String::as_str)
            .unwrap_or(if matches.get_flag("optimize") { "optimize" } else { "before-optimize" });

        info!("Will stop at {stop_at}");

        if stop_at == "load" {
            return Ok((raw_model.into(), None));
        }

        let mut inference_model: Option<Arc<InferenceModel>> = None;
        let mut typed_model: Option<Arc<TypedModel>> = None;
        #[allow(unused_mut)]
        let mut pulsed_model: Option<Arc<PulsedModel>> = None;
        let mut reference_model: Option<Arc<dyn Model>> = None;

        if raw_model.is::<InferenceModel>() {
            inference_model = Some(raw_model.downcast::<InferenceModel>().unwrap().into());
        } else if raw_model.is::<TypedModel>() {
            typed_model = Some(raw_model.downcast::<TypedModel>().unwrap().into());
        }

        macro_rules! stage {
            ($name:expr, $from:ident -> $to:ident, $block:expr) => {
                if let Some(from) = $from.take() {
                    info!("Running {:?}", $name);
                    let mut last_model: Option<Box<dyn Model>> =
                        if keep_last { Some(Box::new(from.as_ref().clone())) } else { None };
                    let block: &dyn Fn(_) -> TractResult<_> = &$block;
                    let owned_model =
                        Arc::try_unwrap(from).unwrap_or_else(|from| from.as_ref().clone());
                    match block(owned_model).with_context(|| format!("Error at stage {:?}", $name)) {
                        Ok(it) => {
                            $to = Some(Arc::new(it));
                        }
                        Err(e) => {
                            if e.is::<ModelBuildingError>() {
                                return Err(e)?;
                            } else if let Some(last_model) = last_model.take() {
                                return Err(ModelBuildingError(last_model, e.into()))?;
                            } else {
                                return Err(e)?;
                            }
                        }
                    }
                    info_usage(&format!("after {:?}", $name), probe);
                    if reference_stage.as_deref() == Some($name) {
                        reference_model = Some($to.as_ref().unwrap().clone());
                    }
                    if stop_at == $name {
                        return Ok((
                                $to.take().expect("returnable model"),
                                reference_model,
                                ));
                    }
                } else {
                    debug!("Skip stage {}", $name);
                    if stop_at == $name {
                        bail!("Stage {} is skipped, it can not be used as stop with these input format or parameters.", $name);
                    }
                }
            };
        }

        stage!("analyse", inference_model -> inference_model,
        |mut m:InferenceModel| -> TractResult<_> {
            m.analyse(!matches.get_flag("analyse-fail-fast")).map_err(|e|
                                                                        ModelBuildingError(Box::new(m.clone()), e.into())
                                                                       )?;
            if let Some(fail) = m.missing_type_shape()?.first() {
                bail!(ModelBuildingError(Box::new(m.clone()), format!("{} has incomplete typing", m.node(fail.node)).into()))
            }
            Ok(m)
        });
        if let Some(ext) = tf_model_extensions {
            #[cfg(feature = "tf")]
            stage!("tf-preproc", inference_model -> inference_model, |m:InferenceModel| ext.preproc(m));
        }
        stage!("incorporate", inference_model -> inference_model, |m:InferenceModel| m.incorporate());
        stage!("type", inference_model -> typed_model, |m:InferenceModel| { let mut m = m.into_typed()?; m.compact()?; Ok(m) });
        stage!("declutter", typed_model -> typed_model, |mut m:TypedModel| {
            if matches.get_flag("label-wires") {
                for node in 0..m.nodes().len() {
                    if m.outlet_label(node.into()).is_none() {
                        m.set_outlet_label(node.into(), m.node(node).name.to_string())?;
                    }
                }
            }
            let mut dec = tract_core::optim::Optimizer::declutter();
            if let Some(steps) = matches.get_one::<String>("declutter-step") {
                dec = dec.stopping_at(steps.parse()?);
            }
            dec.optimize(&mut m)?;
            Ok(m)
        });
        #[cfg(not(feature = "pulse"))]
        {
            if matches.get_one::<String>("pulse").is_some() {
                bail!("This build of tract has pulse disabled.")
            }
        }
        #[cfg(feature = "pulse")]
        {
            if let Some(spec) = matches.get_one::<String>("pulse") {
                stage!("pulse", typed_model -> pulsed_model, |m:TypedModel| {
                    let (sym, pulse) = if let Ok((s,p)) = scan_fmt!(spec, "{}={}", String, String) {
                        (s, parse_tdim(&m.symbols, &p)?)
                    } else if let Ok(i) = parse_tdim(&m.symbols, spec) {
                        ("S".to_owned(), i)
                    } else {
                        bail!("Can not parse pulse specification {}", spec)
                    };
                    let sym = m.symbols.sym(&sym);
                    PulsedModel::new(&m, sym, &pulse)
                });
                stage!("pulse-to-type", pulsed_model -> typed_model, |m:PulsedModel| m.into_typed());
                stage!("pulse-declutter", typed_model -> typed_model, |m:TypedModel| m.into_decluttered());
            }
        }
        let mut transforms: Vec<&str> = matches
            .get_many::<String>("transform")
            .map(|values| values.map(String::as_str).collect())
            .unwrap_or_default();
        if matches.get_flag("llm") {
            transforms.insert(0, "transformers_detect_all");
        }
        if transforms.len() > 0 {
            for spec in transforms {
                let (name, params_str) = tract_core::transform::split_spec(spec);
                let transform = if params_str.is_empty() {
                    tract_core::transform::get_transform(&name)?
                } else {
                    let mut de = ron::Deserializer::from_str(params_str)
                        .with_context(|| format!("Parsing RON params for transform {name}"))?;
                    tract_core::transform::get_transform_with_params(
                        &name,
                        &mut <dyn erased_serde::Deserializer>::erase(&mut de),
                    )?
                }
                .with_context(|| format!("Could not find transform named {name}"))?;
                stage!(&transform.name(), typed_model -> typed_model, |m:TypedModel| {
                    transform.transform_into(m)
                });
                stage!(&format!("{}_declutter", transform.name()), typed_model -> typed_model, |m:TypedModel| m.into_decluttered());
            }
        }

        if let Some(set) = matches.get_many::<String>("set") {
            let values = Self::parse_set_and_hint(typed_model.as_ref().unwrap(), set)?;
            stage!("set", typed_model -> typed_model, |mut m: TypedModel| {
                for node in m.eval_order()? {
                    let node = m.node_mut(node);
                    if let Some(op) = node.op_as_mut::<Const>() {
                        if op.val().datum_type() == DatumType::TDim { {
                            // get inner value to Arc<Tensor>
                            let mut constant:Tensor = (**op.val()).clone();
                            // Generally a shape or hyperparam
                            constant
                                .try_as_plain_mut()?
                                .as_slice_mut::<TDim>()?
                                .iter_mut()
                                .for_each(|x| *x = x.eval(&values));

                            *op = Const::new(constant.into_arc_tensor())?;
                        }
                        }
                    }
                }
                m.concretize_dims(&values)
            });
            stage!("set-declutter", typed_model -> typed_model, |mut m| {
                let mut dec = tract_core::optim::Optimizer::declutter();
                if let Some(steps) = matches.get_one::<String>("declutter-set-step") {
                    dec = dec.stopping_at(steps.parse()?);
                }
                dec.optimize(&mut m)?;
                Ok(m)
            })
        }
        if matches.get_flag("nnef-cycle") {
            stage!("nnef-cycle", typed_model -> typed_model, |m:TypedModel| {
                let nnef = super::nnef(matches);
                let mut vec = vec!();
                nnef.write(&m, &mut vec).context("Serializing to nnef")?;
                info!("Dumped, now reloading...");
                nnef.model_for_read(&mut &*vec).context("Deserializing from nnef intermediary")
            });
            stage!("nnef-declutter", typed_model -> typed_model, |m:TypedModel| m.into_decluttered());
        }
        #[cfg(feature = "tflite")]
        if matches.get_flag("tflite-cycle") {
            stage!("tflite-cycle-predump", typed_model -> typed_model, |mut m:TypedModel| {
                tract_tflite::rewriter::rewrite_for_tflite(&mut m)?;
                Ok(m)
            });
            stage!("tflite-cycle", typed_model -> typed_model, |m:TypedModel| {
                let tflite = tract_tflite::tflite();
                let mut vec = vec!();
                tflite.write(&m, &mut vec).context("Serializing to tflite")?;
                info!("Dumped, now reloading...");
                tflite.model_for_read(&mut &*vec).context("Deserializing from tflite intermediary")
            });
            stage!("tflite-declutter", typed_model -> typed_model, |m:TypedModel| m.into_decluttered());
        }
        #[cfg(not(feature = "tflite"))]
        if matches.get_flag("tflite-cycle") {
            bail!("This tract build did not include tflite features.");
        }
        if let Some(sub) = matches.get_one::<String>("extract-decluttered-sub") {
            stage!("extract", typed_model -> typed_model, |m:TypedModel| {
                let node = m.node_id_by_name(sub)?;
                Ok(m.nested_models(node)[0].1.downcast_ref::<TypedModel>().unwrap().clone())
            });
        }
        stage!("before-optimize", typed_model -> typed_model, Ok);
        Ok((typed_model.clone().unwrap(), reference_model))
    }

    #[allow(unused_variables)]
    #[allow(clippy::let_unit_value)]
    /// Parses the command-line arguments.
    pub fn from_clap(matches: &clap::ArgMatches, probe: Option<&Probe>) -> TractResult<Parameters> {
        let symbols = SymbolScope::default();
        for scenario in matches.get_many::<String>("scenario").unwrap_or_default() {
            symbols.add_scenario(scenario)?;
        }
        for rule in matches.get_many::<String>("assert").unwrap_or_default() {
            if let Some((scenario, assertion)) = rule.split_once(':') {
                symbols.add_scenario_assertion(scenario, assertion)?;
            } else {
                symbols.add_assertion(rule)?;
            }
        }
        let (filename, onnx_tc) = Self::disco_model(matches)?;
        let tensors_values = Self::parse_tensors(matches, &filename, onnx_tc, &symbols)?;
        let (mut graph, mut raw_model, tf_model_extensions) =
            Self::load_model(matches, probe, &filename, &tensors_values, symbols.clone())?;

        info!("Model {filename:?} loaded");
        info_usage("model loaded", probe);

        let (need_tensorflow_model, need_reference_model) = match matches.subcommand() {
            Some(("compare", sm)) => {
                if let Some(with) = sm.get_one::<String>("stage").map(String::as_str) {
                    (false, Some(with))
                } else if sm.get_flag("stream") {
                    (false, Some("declutter"))
                } else {
                    (true, None)
                }
            }
            _ => (false, None),
        };

        #[cfg(not(feature = "conform"))]
        let tf_model = ();
        #[cfg(feature = "conform")]
        let tf_model = if need_tensorflow_model {
            info!("Tensorflow version: {}", tract_tensorflow::conform::tf::version());
            if matches.get_flag("determinize") {
                if let SomeGraphDef::Tf(ref graph) = graph {
                    let graph = graph.write_to_bytes().unwrap();
                    Some(tract_tensorflow::conform::tf::for_slice(&graph)?)
                } else {
                    unreachable!()
                }
            } else {
                Some(tract_tensorflow::conform::tf::for_path(&filename)?)
            }
        } else {
            None
        };

        let need_proto = matches.get_flag("proto")
            || (matches.subcommand_matches("compare").map(|sc| sc.contains_id("pbdir")))
                .unwrap_or(false);

        if !need_proto {
            graph = SomeGraphDef::NoGraphDef;
        }

        if let Some(inputs) = matches.get_many::<String>("input-node") {
            let inputs: Vec<&str> = inputs.map(String::as_str).collect();
            raw_model.set_input_names(&inputs)?;
        };

        if let Some(outputs) = matches.get_many::<String>("output-node") {
            let outputs: Vec<&str> = outputs.map(String::as_str).collect();
            raw_model.select_outputs_by_name(&outputs)?;
        };

        if let Some(override_facts) = matches.get_many::<String>("override-fact") {
            for fact in override_facts {
                let fact = fact.as_str();
                let (name, fact) = tensor::for_string(&symbols, fact)?;
                let node = raw_model.node_id_by_name(name.as_ref().unwrap())?;
                if let Some(inf) = raw_model.downcast_mut::<InferenceModel>() {
                    inf.set_outlet_fact(OutletId::new(node, 0), fact)?;
                } else if let Some(typ) = raw_model.downcast_mut::<TypedModel>() {
                    typ.set_outlet_fact(OutletId::new(node, 0), (&fact).try_into()?)?;
                }
            }
        };

        if let Some(consts) = matches.get_many::<String>("constantize") {
            for konst in consts {
                let konst = konst.as_str();
                if let Some(value) = tensors_values
                    .by_name(konst)
                    .and_then(|tv| tv.values.as_ref())
                    .and_then(|v| v.first())
                {
                    let value = value.clone().into_arc_tensor();
                    let id = raw_model.node_id_by_name(konst)?;
                    info!(
                        "Commuting {}, fact:{:?} into a const of {:?}",
                        raw_model.node_display(id),
                        raw_model.outlet_typedfact(id.into())?,
                        value
                    );
                    let op = Box::new(Const::new(value.clone().into_arc_tensor())?);
                    if let Some(inf) = raw_model.downcast_mut::<InferenceModel>() {
                        inf.inputs.retain(|i| i.node != id);
                        inf.nodes[id].op = op;
                        inf.nodes[id].outputs[0].fact = Default::default();
                    } else if let Some(typ) = raw_model.downcast_mut::<TypedModel>() {
                        typ.inputs.retain(|i| i.node != id);
                        typ.nodes[id].op = op;
                        typ.nodes[id].outputs[0].fact = TypedFact::try_from(value.clone())?;
                    }
                }
            }
        }

        let output_names_and_labels: Vec<Vec<String>> = raw_model
            .output_outlets()
            .iter()
            .map(|o| {
                let mut v = vec![format!("{}:{}", raw_model.node_name(o.node), o.slot)];
                if o.slot == 0 {
                    v.push(raw_model.node_name(o.node).to_string());
                }
                if let Some(l) = raw_model.outlet_label(*o) {
                    v.push(l.to_string());
                }
                v
            })
            .collect();

        let assertions = match matches.subcommand() {
            Some(("dump" | "run" | "compare", sm)) => Assertions::from_clap(sm, &symbols)?,
            _ => Assertions::default(),
        };

        if let Some(infer) = raw_model.downcast_mut::<InferenceModel>() {
            for (ix, node_id) in infer.inputs.iter().enumerate() {
                let tv = tensors_values
                    .input_by_name(&infer.node(node_id.node).name)
                    .or_else(|| tensors_values.by_input_ix(ix));
                if let Some(tv) = tv {
                    if let Some(fact) = &tv.fact {
                        infer.nodes[node_id.node].outputs[0].fact = fact.clone();
                    }
                }
            }
        }

        if matches.get_flag("partial") {
            if let Some(m) = raw_model.downcast_ref::<InferenceModel>() {
                raw_model = Box::new(m.clone().into_compact()?);
            } else if let Some(m) = raw_model.downcast_ref::<TypedModel>() {
                raw_model = Box::new(m.clone().into_compact()?);
            }
        }

        let (allow_random_input, allow_float_casts) = match matches.subcommand() {
            None => (false, false),
            Some((_, m)) => (m.get_flag("allow-random-input"), m.get_flag("allow-float-casts")),
        };

        let keep_last = matches.get_flag("keep-last");
        let (tract_model, reference_model) = Self::load_and_declutter(
            matches,
            probe,
            raw_model,
            tf_model_extensions,
            need_reference_model,
            keep_last,
        )?;

        info!("Model fully loaded");
        info_usage("model fully loaded", probe);

        let runtime = if let Some(rt) = matches.get_one::<String>("runtime").map(String::as_str) {
            rt
        } else if matches.get_flag("cuda") {
            "cuda"
        } else if matches.get_flag("metal") {
            "metal"
        } else if matches.get_flag("optimize") {
            "default"
        } else {
            "unoptimized"
        };

        let runtime =
            runtime_for_name(runtime).with_context(|| format!("Runtime `{runtime}' not found"))?;
        let (tract_model, runnable): (Arc<dyn Model>, Option<Arc<dyn Runnable>>) =
            if tract_model.downcast_ref::<TypedModel>().is_some() {
                let tract_model: Arc<TypedModel> = Arc::downcast(tract_model).unwrap();
                let typed_model = Arc::try_unwrap(tract_model).unwrap();
                let hints = if let Some(hints) = matches.get_many::<String>("hint") {
                    Some(Self::parse_set_and_hint(&typed_model, hints)?)
                } else if matches.get_flag("llm") || matches.get_flag("causal-llm-hints") {
                    #[cfg(feature = "transformers")]
                    {
                        Some(tract_transformers::memory_arena_hints_for_causal_llm(&typed_model)?)
                    }
                    #[cfg(not(feature = "transformers"))]
                    {
                        bail!("transformers feature is required for llms")
                    }
                } else {
                    None
                };

                let options = RunOptions { memory_sizing_hints: hints, ..Default::default() };
                let runnable = runtime.prepare_with_options(typed_model, &options)?;
                // we assume the runnable will be a typed_model() (it is the case for all current runtimes)
                // so we consume tract_model knowning the runnable will give us a new one later.
                // we should hold on the old model in the general case, but this leads to dup models weights in memory
                let typed_model = runnable.typed_model().unwrap();
                (typed_model.clone(), Some(runnable.into()))
            } else {
                (tract_model, None)
            };

        info!("Model ready");
        info_usage("model ready", probe);
        Ok(Parameters {
            graph,
            runnable,
            tract_model,
            reference_model,
            tf_model,
            tensors_values,
            assertions,
            machine_friendly: matches.get_flag("machine-friendly"),
            allow_random_input,
            allow_float_casts,
        })
    }

    pub(crate) fn typed_model(&self) -> Option<Arc<TypedModel>> {
        self.runnable
            .clone()
            .and_then(|runnable| runnable.typed_model().cloned())
            .or_else(|| Arc::downcast(self.tract_model.clone()).ok())
    }

    pub(crate) fn req_typed_model(&self) -> Arc<TypedModel> {
        self.typed_model().expect("Not a TypedModel")
    }

    #[allow(dead_code)]
    pub(crate) fn req_runnable(&self) -> TractResult<Arc<dyn Runnable>> {
        self.runnable.clone().context("Requires a runnable")
    }
}

pub fn bench_limits_from_clap(matches: &clap::ArgMatches) -> TractResult<BenchLimits> {
    let max_loops = matches
        .get_one::<String>("max-loops")
        .map(|s| usize::from_str(s))
        .transpose()?
        .unwrap_or(100_000);
    let warmup_loops = matches
        .get_one::<String>("warmup-loops")
        .map(|s| usize::from_str(s))
        .transpose()?
        .unwrap_or(0);
    let max_time = matches
        .get_one::<String>("max-time")
        .map(|s| u64::from_str(s))
        .transpose()?
        .map(std::time::Duration::from_millis)
        .unwrap_or(std::time::Duration::from_secs(5));
    let warmup_time = matches
        .get_one::<String>("warmup-time")
        .map(|s| u64::from_str(s))
        .transpose()?
        .map(std::time::Duration::from_millis)
        .unwrap_or(std::time::Duration::from_secs(0));
    Ok(BenchLimits { max_loops, max_time, warmup_time, warmup_loops })
}

pub fn display_params_from_clap(
    root_matches: &clap::ArgMatches,
    matches: &clap::ArgMatches,
) -> TractResult<DisplayParams> {
    Ok(DisplayParams {
        konst: matches.get_flag("const"),
        cost: matches.get_flag("cost"),
        tmp_mem_usage: matches.get_flag("tmp_mem_usage"),
        profile: matches.get_flag("profile"),
        folded: matches.get_flag("folded"),
        left_column_width: 0,
        invariants: matches.get_flag("invariants"),
        quiet: matches.get_flag("quiet"),
        natural_order: matches.get_flag("natural-order"),
        opt_ram_order: matches.get_flag("opt-ram-order"),
        debug_op: matches.get_flag("debug-op"),
        node_ids: matches.get_many::<String>("node-id").map(|values| {
            values.map(|id| tvec!((id.parse::<usize>().unwrap(), "".to_string()))).collect()
        }),
        node_name: matches.get_one::<String>("node-name").cloned(),
        op_name: matches.get_one::<String>("op-name").cloned(),
        //        successors: matches.get_one::<String>("successors").map(|id| id.parse().unwrap()),
        expect_core: root_matches
            .get_one::<String>("pass")
            .map(String::as_str)
            .unwrap_or("declutter")
            == "declutter"
            && !root_matches.get_flag("optimize"),
        outlet_labels: matches.get_flag("outlet-labels"),
        io: if matches.get_flag("io-long") {
            display_params::Io::Long
        } else if matches.get_flag("io-none") {
            display_params::Io::None
        } else {
            display_params::Io::Short
        },
        info: matches.get_flag("info"),
        json: matches.get_flag("json"),
        mm: matches.get_flag("mm"),
        summary: matches.try_get_one::<bool>("summary").ok().flatten().copied().unwrap_or(false),
        audit_json: matches
            .try_get_one::<bool>("audit-json")
            .ok()
            .flatten()
            .copied()
            .unwrap_or(false),
    })
}

#[derive(Debug, Default, Clone)]
pub struct Assertions {
    pub assert_outputs: bool,
    pub assert_output_facts: Option<Vec<InferenceFact>>,
    pub assert_op_count: Option<Vec<(String, usize)>>,
    pub approximation: Approximation,
    pub allow_missing_outputs: bool,
    pub assert_llm_rbo: Option<f64>,
    pub assert_llm_rbo_p: f64,
    pub assert_llm_rbo_depth: usize,
    pub assert_op_only: Option<Vec<String>>,
}

impl Assertions {
    fn from_clap(sub: &clap::ArgMatches, symbol_table: &SymbolScope) -> TractResult<Assertions> {
        let assert_outputs =
            sub.contains_id("assert-output") || sub.contains_id("assert-output-bundle");
        let assert_output_facts: Option<Vec<InferenceFact>> = sub
            .get_many::<String>("assert-output-fact")
            .map(|vs| vs.map(|v| tensor::for_string(symbol_table, v).unwrap().1).collect());
        let assert_op_count: Option<Vec<(String, usize)>> =
            sub.get_many::<String>("assert-op-count").and_then(|vs| {
                vs.map(String::as_str)
                    .chunks(2)
                    .into_iter()
                    .map(|mut args| Some((args.next()?.to_string(), args.next()?.parse().ok()?)))
                    .collect()
            });
        let allow_missing_outputs = sub.get_flag("allow-missing-outputs");
        let approximation = if let Some(custom) = sub.get_one::<String>("approx-custom") {
            let Some((atol, rtol, approx)) = custom.split(",").collect_tuple() else {
                bail!("Can't parse approx custom. It should look like 0.001,0.002,0.003")
            };
            Approximation::Custom(atol.parse()?, rtol.parse()?, approx.parse()?)
        } else {
            match sub.get_one::<String>("approx").map(String::as_str).unwrap() {
                "exact" => Approximation::Exact,
                "close" => Approximation::Close,
                "approx" | "approximate" => Approximation::Approximate,
                "very" => Approximation::VeryApproximate,
                "super" => Approximation::SuperApproximate,
                "ultra" => Approximation::UltraApproximate,
                _ => panic!(),
            }
        };
        let assert_llm_rbo =
            sub.get_one::<String>("assert-llm-rbo").map(|v| v.parse()).transpose()?;
        let assert_llm_rbo_p: f64 = sub
            .get_one::<String>("assert-llm-rbo-p")
            .map(String::as_str)
            .unwrap_or("0.9")
            .parse()?;
        let assert_llm_rbo_depth: usize = sub
            .get_one::<String>("assert-llm-rbo-depth")
            .map(String::as_str)
            .unwrap_or("100")
            .parse()?;
        let assert_op_only = sub
            .get_one::<String>("assert-op-only")
            .map(|v| v.split(',').map(|s| s.trim().to_string()).collect());
        Ok(Assertions {
            assert_outputs,
            assert_output_facts,
            assert_op_count,
            approximation,
            allow_missing_outputs,
            assert_llm_rbo,
            assert_llm_rbo_p,
            assert_llm_rbo_depth,
            assert_op_only,
        })
    }
}

fn http_client() -> TractResult<reqwest::blocking::Client> {
    use rustls::{ClientConfig, RootCertStore};
    use std::sync::Arc;

    let mut root_store = RootCertStore::empty();
    root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());

    let config =
        ClientConfig::builder_with_provider(Arc::new(rustls::crypto::ring::default_provider()))
            .with_safe_default_protocol_versions()?
            .with_root_certificates(root_store)
            .with_no_client_auth();

    Ok(reqwest::blocking::Client::builder().use_preconfigured_tls(config).build()?)
}


================================================
FILE: cli/src/plan_options.rs
================================================
use tract_core::internal::*;

pub fn plan_options_from_subcommand(sub_matches: &clap::ArgMatches) -> TractResult<RunOptions> {
    let skip_order_opt_ram: bool = sub_matches.get_flag("skip-order-opt-ram");
    if skip_order_opt_ram {
        log::info!("Plan options: skip_order_opt_ram -> {skip_order_opt_ram:?}");
    }
    Ok(RunOptions { skip_order_opt_ram, ..RunOptions::default() })
}


================================================
FILE: cli/src/run.rs
================================================
use fs::File;
use std::fmt::{Debug, Display};
use std::path::PathBuf;
use std::str::FromStr;

use crate::TractResult;
use crate::{Model, Parameters};
use fs_err as fs;
use ndarray_npy::NpzWriter;
use nu_ansi_term::Color::*;
use tract_core::ops::cnn::conv::Im2Col;
use tract_core::ops::matmul::pack::OptMatMulPack;
use tract_core::tract_data::itertools::izip;
use tract_hir::internal::*;
use tract_libcli::tensor::{RunTensors, get_or_make_inputs};
use tract_nnef::tensors::write_tensor;
#[cfg(feature = "pulse")]
use tract_pulse::internal::*;

/// Add a tensor entry into a npz file.
fn npz_add_tensor(npz: &mut NpzWriter<File>, name: String, tensor: &Tensor) -> TractResult<()> {
    match tensor.datum_type() {
        DatumType::F16 => {
            npz.add_array(name, &tensor.cast_to::<f32>()?.to_plain_array_view::<f32>()?)?
        }
        DatumType::Bool => npz.add_array(name, &tensor.to_plain_array_view::<bool>()?)?,
        DatumType::U8 => npz.add_array(name, &tensor.to_plain_array_view::<u8>()?)?,
        DatumType::U16 => npz.add_array(name, &tensor.to_plain_array_view::<u16>()?)?,
        DatumType::U32 => npz.add_array(name, &tensor.to_plain_array_view::<u32>()?)?,
        DatumType::U64 => npz.add_array(name, &tensor.to_plain_array_view::<u64>()?)?,
        DatumType::I8 => npz.add_array(name, &tensor.to_plain_array_view::<i8>()?)?,
        DatumType::I16 => npz.add_array(name, &tensor.to_plain_array_view::<i16>()?)?,
        DatumType::I32 => npz.add_array(name, &tensor.to_plain_array_view::<i32>()?)?,
        DatumType::I64 => npz.add_array(name, &tensor.to_plain_array_view::<i64>()?)?,
        DatumType::F32 => npz.add_array(name, &tensor.to_plain_array_view::<f32>()?)?,
        DatumType::F64 => npz.add_array(name, &tensor.to_plain_array_view::<f64>()?)?,
        DatumType::QI8(_) => npz.add_array(name, &tensor.to_plain_array_view::<i8>()?)?,
        DatumType::QU8(_) => npz.add_array(name, &tensor.to_plain_array_view::<u8>()?)?,
        DatumType::QI32(_) => npz.add_array(name, &tensor.to_plain_array_view::<i32>()?)?,
        _ => warn!("Not writing {name}, {tensor:?}, unsupported type"),
    }

    Ok(())
}

pub fn handle(
    params: &Parameters,
    matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
) -> TractResult<()> {
    let dump = sub_matches.get_flag("dump");
    // let outputs = dispatch_model!(&(Arc::clone(params.tract_model) as _), |m| run_regular(
    //     m,
    //     params,
    //     matches,
    //     sub_matches
    // ))?;
    let outputs = run_regular(&params.tract_model, params, matches, sub_matches)?;

    if dump {
        for (ix, output) in outputs.iter().enumerate() {
            for (turn, output) in output.iter().enumerate() {
                println!("output #{}, turn #{}\n{}\n", ix, turn, output.dump(true)?);
            }
        }
    }

    if let Some(file_path) = sub_matches.get_one::<String>("save-outputs-nnef") {
        fs::create_dir_all(file_path).with_context(|| format!("Creating {file_path} directory"))?;
        for (ix, outputs) in outputs.iter().enumerate() {
            let name = params
                .tract_model
                .outlet_label(params.tract_model.output_outlets()[ix])
                .map(|name| format!("{name}.dat"))
                .unwrap_or_else(|| format!("output_{ix}.dat"));

            if outputs.len() == 1 {
                let mut f = fs::File::create(PathBuf::from_str(file_path)?.join(&name))?;
                write_tensor(&mut f, &outputs[0])?;
            } else {
                for (turn, output) in outputs.iter().enumerate() {
                    let name = format!("turn_{turn}/{name}");
                    let mut f = fs::File::open(PathBuf::from_str(file_path)?.join(name))?;
                    write_tensor(&mut f, output)?;
                }
            }
        }
    }

    if let Some(file_path) = sub_matches.get_one::<String>("save-outputs-npz") {
        let file = fs::File::create(file_path).with_context(|| format!("Creating {file_path}"))?;
        let mut npz = ndarray_npy::NpzWriter::new_compressed(file);

        for (ix, outputs) in outputs.iter().enumerate() {
            let name = params
                .tract_model
                .outlet_label(params.tract_model.output_outlets()[ix])
                .map(|name| name.to_string())
                .unwrap_or_else(|| format!("output_{ix}"));
            if outputs.len() == 1 {
                npz_add_tensor(&mut npz, name, &outputs[0])?;
            } else {
                for (turn, output) in outputs.iter().enumerate() {
                    let name = format!("turn_{turn}/{name}");
                    npz_add_tensor(&mut npz, name, output)?;
                }
            }
        }
    }

    if let Some(count) = sub_matches.get_one::<String>("assert-output-count") {
        let count = count.parse::<usize>()?;
        if count != outputs.len() {
            bail!(
                "Wrong number of outputs, command line expected {}, found {:?}",
                count,
                outputs.len()
            );
        }
    }

    if params.assertions.assert_outputs {
        crate::utils::check_outputs(&outputs, params)?;
    }

    if let Some(facts) = &params.assertions.assert_output_facts {
        let outputs: Vec<InferenceFact> =
            outputs.iter().map(|t| t[0].datum_type().fact(t[0].shape()).into()).collect();
        crate::utils::check_inferred(&outputs, facts)?;
    }

    if let Some(asserts) = &params.assertions.assert_op_count {
        for (name, expected) in asserts {
            let count = crate::utils::count_op(&*params.tract_model, name)?;
            if count != *expected {
                bail!("Wrong number of {} operators: expected {}, got {}", name, expected, count);
            }
        }
    }

    if let Some(patterns) = &params.assertions.assert_op_only {
        crate::utils::check_op_only(&*params.tract_model, patterns)?;
    }

    Ok(())
}

fn run_regular_t<F, O>(
    state: &mut SimpleState<F, O>,
    inputs: RunTensors,
    steps: bool,
    check_f16_overflow: bool,
    assert_sane_floats: bool,
    mut npz: Option<NpzWriter<File>>,
) -> TractResult<TVec<Vec<TValue>>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    let mut results = tvec!(vec!(); state.model().outputs.len());
    let multiturn = inputs.sources.len() > 1;

    // Build output→input cache mapping for unfolded KV cache threading.
    // Output labeled "{name}_concat" feeds input named "{name}".
    let cache_output_to_input: Vec<(usize, usize)> = if multiturn {
        let model = state.model();
        let mut mapping = Vec::new();
        for (out_ix, out_outlet) in model.outputs.iter().enumerate() {
            let out_label = model.outlet_label(*out_outlet).unwrap_or("");
            if let Some(base) = out_label.strip_suffix("_concat") {
                for (in_ix, in_outlet) in model.inputs.iter().enumerate() {
                    let in_name = &model.nodes[in_outlet.node].name;
                    if in_name == base {
                        mapping.push((out_ix, in_ix));
                        break;
                    }
                }
            }
        }
        mapping
    } else {
        Vec::new()
    };

    let mut sources = inputs.sources;
    for turn in 0..sources.len() {
        let inputs = std::mem::replace(&mut sources[turn], TVec::new());
        let turn_results =
            state.run_plan_with_eval(inputs, |session_state, state, node, input| {
                if steps {
                    for (ix, i) in input.iter().enumerate() {
                        eprintln!(
                            "{} {}{}{:?}",
                            White.bold().paint(node.to_string()),
                            ix,
                            Blue.bold().paint("<< "),
                            i
                        );
                    }
                }
                let r = tract_core::plan::eval(session_state, state, node, input)?;

                if steps || npz.is_some() || check_f16_overflow || assert_sane_floats {
                    let clarified_r = crate::utils::clarify_tvalues(&r)?;
                    if steps {
                        for (ix, o) in clarified_r.iter().enumerate() {
                            eprintln!(
                                "{} {}{}{:?}",
                                White.bold().paint(node.to_string()),
                                ix,
                                Yellow.bold().paint(">> "),
                                o
                            );
                        }
                    }
                    if let Some(npz) = npz.as_mut() {
                        for (ix, t) in clarified_r.iter().enumerate() {
                            let mut name = if ix == 0 {
                                node.name.to_string()
                            } else {
                                format!("{}:{}", node.name, ix)
                            };
                            if multiturn {
                                name = format!("turn_{turn}/{name}");
                            }
                            npz_add_tensor(npz, name, t)?;
                        }
                    }
                    if check_f16_overflow {
                        for (ix, o) in clarified_r.iter().enumerate() {
                            if let Ok(plain) = o.try_as_plain() {
                                if let Ok(f32s) = plain.as_slice::<f32>() {
                                    if f32s.iter().any(|f| f.abs() > f16::MAX.to_f32()) {
                                        warn!("{node}, output {ix} overflows f16");
                                    }
                                }
                            }
                        }
                    }
                    if assert_sane_floats {
                        for (ix, o) in clarified_r.iter().enumerate() {
                            if node.op_is::<Im2Col>() || node.op_is::<OptMatMulPack>() {
                                continue;
                            }
                            if let Ok(plain) = o.try_as_plain() {
                                if let Ok(floats) = plain.as_slice::<f32>() {
                                    if let Some(pos) = floats.iter().position(|f| !f.is_finite()) {
                                        eprintln!("{floats:?}");
                                        bail!("Found {} in output {} of {}", floats[pos], ix, node);
                                    }
                                } else if let Ok(floats) = plain.as_slice::<f16>() {
                                    if let Some(pos) = floats.iter().position(|f| !f.is_finite()) {
                                        eprintln!("{floats:?}");
                                        bail!("Found {} in output {} of {}", floats[pos], ix, node);
                                    }
                                }
                            }
                        }
                    }
                }

                Ok(r)
            })?;
        // Thread cache outputs into next turn's inputs
        if turn + 1 < sources.len() && !cache_output_to_input.is_empty() {
            for &(out_ix, in_ix) in &cache_output_to_input {
                sources[turn + 1][in_ix] = turn_results[out_ix].clone();
            }
        }

        izip!(&mut results, turn_results).for_each(|(r, tr)| r.push(tr));
    }
    Ok(results)
}

fn run_regular(
    tract: &Arc<dyn Model>,
    params: &Parameters,
    _matches: &clap::ArgMatches,
    sub_matches: &clap::ArgMatches,
) -> TractResult<TVec<Vec<TValue>>> {
    let run_params = crate::tensor::run_params_from_subcommand(params, sub_matches)?;

    let steps = sub_matches.get_flag("steps");
    let check_f16_overflow = sub_matches.get_flag("check-f16-overflow");
    let assert_sane_floats = sub_matches.get_flag("assert-sane-floats");
    let npz = if let Some(npz) = sub_matches.get_one::<String>("save-steps") {
        let npz = fs::File::create(npz).with_context(|| format!("Creating {npz}"))?;
        Some(ndarray_npy::NpzWriter::new_compressed(npz))
    } else {
        None
    };

    let inputs = get_or_make_inputs(tract, &run_params)?;
    if let Some(runnable) = &params.runnable {
        if let Some(plan) = runnable.typed_plan() {
            let mut state = plan.spawn()?;
            let results = run_regular_t(
                &mut state,
                inputs,
                steps,
                check_f16_overflow,
                assert_sane_floats,
                npz,
            )?;
            Ok(results)
        } else {
            todo!("Run handler for abstract runtime/runnable");
        }
    } else {
        dispatch_model!(tract, |m| {
            let plan_options = crate::plan_options::plan_options_from_subcommand(sub_matches)?;
            let plan = SimplePlan::new_with_options(m, &plan_options)?;
            let mut state = plan.spawn()?;

            let results = run_regular_t(
                &mut state,
                inputs,
                steps,
                check_f16_overflow,
                assert_sane_floats,
                npz,
            )?;
            Ok(results)
        })
    }
}

/*
#[cfg(feature = "pulse")]
fn run_pulse_t(model: &PulsedModel, params: &Parameters) -> TractResult<TVec<TValue>> {
let input_fact = model.input_fact(0)?;
let output_fact = model.output_fact(0)?;

let output_pulse = output_fact.pulse();
//    println!("output_fact: {:?}", output_fact);
let axis = input_fact.axis;
let name = model.node_name(model.input_outlets()?[0].node);
let input: &Tensor = &params.tensors_values.by_name(name).unwrap().values.as_ref().unwrap()[0];
//    println!("input_shape: {:?}", input.shape());
let input_dim = input.shape()[axis];
//    println!("output_fact: {:?}", output_fact);
let output_dim = output_fact
.dim
.eval(&SymbolValues::default().with(stream_symbol(), input_dim as i64))
.to_usize()?;
let mut output_shape = output_fact.shape.to_vec();
output_shape[output_fact.axis] =
(output_dim as usize + output_fact.delay + 4 * output_fact.pulse()).to_dim();
let output_shape: TVec<usize> = output_shape.iter().map(|d| d.to_usize().unwrap()).collect();
let plan = SimplePlan::new(model)?;
let mut state = ::tract_core::plan::SimpleState::new(&plan)?;
//    println!("output_shape: {:?}", output_shape);
let pulse = input_fact.pulse();
let mut result = tract_ndarray::ArrayD::<f32>::default(&*output_shape);
let input = input.to_array_view::<f32>()?;
for ix in 0..input_dim.divceil(pulse) {
let chunk =
input.slice_axis(tract_ndarray::Axis(axis), (ix * pulse..(ix + 1) * pulse).into());
let input = if chunk.shape()[input_fact.axis] < pulse {
let mut chunk_shape = chunk.shape().to_vec();
chunk_shape[input_fact.axis] = pulse;
let mut padded_chunk = tract_ndarray::ArrayD::<f32>::default(chunk_shape);
padded_chunk
.slice_axis_mut(
tract_ndarray::Axis(input_fact.axis),
(..chunk.shape()[input_fact.axis]).into(),
)
.assign(&chunk);
padded_chunk
} else {
chunk.to_owned()
};
let outputs = state.run(tvec!(input.into_tensor().into()))?;
let result_chunk = outputs[0].to_array_view::<f32>()?;
result
.slice_axis_mut(
tract_ndarray::Axis(output_fact.axis),
((output_pulse * ix)..(output_pulse * (ix + 1))).into(),
)
.assign(&result_chunk);
}
result.slice_axis_inplace(tract_ndarray::Axis(output_fact.axis), (output_fact.delay..).into());
result
.slice_axis_inplace(tract_ndarray::Axis(output_fact.axis), (..output_dim as usize).into());
Ok(tvec!(result.into_tvalue()))
}
*/


================================================
FILE: cli/src/runtimes.rs
================================================
use tract_core::internal::*;

#[derive(Default, Debug, Copy, Clone)]
pub struct UnoptimizedRuntime;

register_runtime!(UnoptimizedRuntime = UnoptimizedRuntime);

impl Runtime for UnoptimizedRuntime {
    fn name(&self) -> StaticName {
        Cow::Borrowed("unoptimized")
    }

    fn prepare_with_options(
        &self,
        model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        Ok(Box::new(SimplePlan::new_with_options(model, options)?))
    }
    fn check(&self) -> TractResult<()> {
        Ok(())
    }
}


================================================
FILE: cli/src/tensor.rs
================================================
use tract_core::internal::*;
use tract_libcli::tensor::RunParams;
#[cfg(feature = "transformers")]
use tract_transformers::figure_out_causal_llm_b_s_p;

use crate::params::Parameters;

pub fn run_params_from_subcommand(
    params: &Parameters,
    sub_matches: &clap::ArgMatches,
) -> TractResult<RunParams> {
    let mut tv = params.tensors_values.clone();

    if let Some(bundle) = sub_matches.get_many::<String>("input-from-npz") {
        for input in bundle {
            let input = input.as_str();
            for tensor in Parameters::parse_npz(input, true, false)? {
                tv.add(tensor);
            }
        }
    }

    if let Some(dir) = sub_matches.get_one::<String>("input-from-nnef") {
        for tensor in Parameters::parse_nnef_tensors(dir, true, false)? {
            tv.add(tensor);
        }
    }

    // We also support the global arg variants for backward compatibility
    #[allow(unused_mut)]
    let mut allow_random_input: bool =
        params.allow_random_input || sub_matches.get_flag("allow-random-input");
    let allow_float_casts: bool =
        params.allow_float_casts || sub_matches.get_flag("allow-float-casts");

    let mut symbols = SymbolValues::default();

    #[cfg(feature = "transformers")]
    if let Some(pp) = sub_matches.get_one::<String>("pp") {
        let value: i64 =
            pp.parse().with_context(|| format!("Can not parse symbol value in --pp {pp}"))?;
        let Some(typed_model) = params.tract_model.downcast_ref::<TypedModel>() else {
            bail!("PP mode can only be used with a TypedModel");
        };
        let (b, s, p) = figure_out_causal_llm_b_s_p(typed_model)?;
        if let Some(b) = b {
            symbols.set(&b, 1);
        }

        ensure!(s.is_some() && p.is_some(), "Could not find LLM symbols in model");
        symbols.set(&p.unwrap(), 0);
        symbols.set(&s.unwrap(), value);
        allow_random_input = true
    }

    #[cfg(feature = "transformers")]
    if let Some(tg) = sub_matches.get_one::<String>("tg") {
        let value: i64 =
            tg.parse().with_context(|| format!("Can not parse symbol value in --tg {tg}"))?;
        let Some(typed_model) = params.tract_model.downcast_ref::<TypedModel>() else {
            bail!("TG mode can only be used with a TypedModel");
        };
        let (b, s, p) = figure_out_causal_llm_b_s_p(typed_model)?;
        if let Some(b) = b {
            symbols.set(&b, 1);
        }

        ensure!(s.is_some() && p.is_some(), "Could not find LLM symbols in model");
        symbols.set(&p.unwrap(), value - 1);
        symbols.set(&s.unwrap(), 1);
        allow_random_input = true
    }

    if let Some(set) = sub_matches.get_many::<String>("set") {
        for set in set {
            let set = set.as_str();
            let (sym, value) = set.split_once('=').context("--set expect S=12 form")?;
            let sym = params.tract_model.get_or_intern_symbol(sym);
            let value: i64 = value
                .parse()
                .with_context(|| format!("Can not parse symbol value in set {set}"))?;
            symbols.set(&sym, value);
        }
    }

    let prompt_chunk_size = sub_matches
        .get_one::<String>("prompt-chunk-size")
        .and_then(|chunk_size| chunk_size.parse().ok());
    Ok(RunParams {
        tensors_values: tv,
        allow_random_input,
        allow_float_casts,
        symbols,
        prompt_chunk_size,
    })
}


================================================
FILE: cli/src/utils.rs
================================================
use std::iter::once;

use crate::params::Parameters;
use tract_hir::internal::*;
use tract_itertools::Itertools;
use tract_libcli::model::Model;

/// Compares the outputs of a node in tract and tensorflow.
pub fn check_outputs(got: &[Vec<TValue>], params: &Parameters) -> TractResult<()> {
    let mut error = None;
    // iter over all possible tract model outputs
    for (ix, output) in params.tract_model.output_outlets().iter().enumerate() {
        // get either name from outlet_label or from node_name
        let mut lookup_names: Vec<&str> = params
            .tract_model
            .outlet_label(*output)
            .into_iter()
            .chain(once(params.tract_model.node_name(output.node)))
            .collect_vec();
        lookup_names.dedup();
        let exp = lookup_names.iter().find_map(|name| params.tensors_values.by_name(name));
        if exp.is_none() {
            if params.assertions.allow_missing_outputs {
                debug!("Missing reference output in bundle for {}", lookup_names.join(" or "));
                continue;
            } else {
                bail!("Missing reference output in bundle for {}", lookup_names.join(" or "));
            }
        }
        let exp = exp.unwrap();
        debug!("Output {ix}, expects {exp:?}");
        let mut exp: TValue = exp.values.as_ref().with_context(|| {
            format!("Output {lookup_names:?}: found reference info without value: {exp:?}")
        })?[0]
            .clone();

        let props = params.tract_model.properties();

        let got: TValue = if got[ix].len() > 1 && props.get("pulse.output_axes").is_some() {
            let axis = props.get("pulse.output_axes").unwrap().try_as_plain()?.as_slice::<i64>()?
                [ix] as usize;
            let delay =
                props.get("pulse.delay").unwrap().try_as_plain()?.as_slice::<i64>()?[ix] as usize;
            let stacked = Tensor::stack_tensors(axis, &got[ix])?;
            stacked.slice(axis, delay, delay + exp.shape()[axis])?.into()
        } else {
            // This handles LLM prompt-chunking output
            got[ix].last().unwrap().clone()
        };
        if (params.allow_float_casts
            && exp.datum_type() == f32::datum_type()
            && got.datum_type() == f16::datum_type())
            || exp.datum_type().unquantized() == got.datum_type().unquantized()
        {
            exp = exp.cast_to_dt(got.datum_type())?.into_owned().into_tvalue();
        }
        #[allow(unused)]
        let result: TractResult<()> = if let Some(min_rbo) = params.assertions.assert_llm_rbo {
            #[cfg(not(feature = "transformers"))]
            {
                bail!("transformers feature is required for RBO metric")
            }
            #[cfg(feature = "transformers")]
            {
                let rbo = crate::llm::top_logits_rbo(
                    &exp,
                    &got,
                    params.assertions.assert_llm_rbo_p,
                    params.assertions.assert_llm_rbo_depth,
                )?;
                let rbo = (rbo * 100.0).floor() / 100.0;
                info!("LLM RBO: {rbo:.2}");
                if rbo >= min_rbo {
                    Ok(())
                } else {
                    TractResult::Err(anyhow!(
                        "RBO criteria not met: rbo={rbo:.2}, min required {min_rbo}"
                    ))
                }
            }
        } else {
            exp.close_enough(&got, params.assertions.approximation)
        };
        if let Err(e) = result.context(format!("Checking output {ix}")) {
            if error.is_some() {
                error!("{e:?}");
            } else {
                error = Some(e);
            }
        } else {
            info!("Checked output #{ix}, ok.");
        }
    }

    if let Some(e) = error { Err(e) } else { Ok(()) }
}

/// Compares the outputs of a node in tract and tensorflow.
pub fn check_inferred(got: &[InferenceFact], expected: &[InferenceFact]) -> TractResult<()> {
    if got.len() != expected.len() {
        bail!("Number of output differ: got:{}, expected:{}", got.len(), expected.len())
    }

    for (got, exp) in got.iter().zip(expected.iter()) {
        if exp.datum_type != got.datum_type {
            bail!("Failed to infer datum type: expected {:?}, got {:?}", exp, got);
        }
        if exp.shape != got.shape {
            bail!("Failed to infer shape: expected {:?}, got {:?}", exp, got);
        }
    }

    Ok(())
}

pub fn clarify_tvalue(t: &TValue) -> TractResult<TValue> {
    Ok((*t).clone())
}

pub fn clarify_tvalues(values: &TVec<TValue>) -> TractResult<TVec<TValue>> {
    values.iter().map(clarify_tvalue).collect()
}

pub fn clarify_typed_fact<'a>(fact: impl Into<Cow<'a, TypedFact>>) -> Cow<'a, TypedFact> {
    let fact = fact.into();
    if fact.is_exotic() {
        fact.exotic_fact
            .as_ref()
            .and_then(|it| it.clarify_dt_shape())
            .map(|(dt, s)| Cow::Owned(TypedFact::dt_shape(dt, s)))
            .unwrap_or_else(|| fact)
    } else {
        fact
    }
}

fn matches_pattern(op_name: &str, pattern: &str) -> bool {
    if let Some(prefix) = pattern.strip_suffix('*') {
        op_name.starts_with(prefix)
    } else {
        op_name == pattern
    }
}

pub fn check_op_only(model: &dyn Model, patterns: &[String]) -> TractResult<()> {
    let mut unexpected = vec![];
    for node_id in model.eval_order()? {
        let op = model.node_op_name(node_id);
        if !patterns.iter().any(|p| matches_pattern(&op, p)) {
            let name = model.node_name(node_id);
            unexpected.push(format!("  {name} ({op})"));
        }
    }
    unexpected.sort();
    unexpected.dedup();
    if unexpected.is_empty() {
        Ok(())
    } else {
        bail!("Model has {} unexpected op(s):\n{}", unexpected.len(), unexpected.join("\n"))
    }
}

pub fn count_op(model: &dyn Model, name: &str) -> TractResult<usize> {
    Ok(model
        .eval_order()
        .context("Cannot assert op count without an eval order")?
        .into_iter()
        .map(|i| {
            if model.node_op_name(i) == name {
                1
            } else {
                model.nested_models(i).into_iter().flat_map(|(_, m)| count_op(m, name)).sum()
            }
        })
        .sum())
}


================================================
FILE: core/Cargo.toml
================================================
[package]
name = "tract-core"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
accelerate-src = { workspace = true, optional = true }
anyhow.workspace = true
anymap3.workspace = true
bit-set.workspace = true
blis-src = { version = "0.2", features = ["static", "pthreads"], optional = true }
cblas = { version = "0.5", optional = true }
derive-new.workspace = true
downcast-rs.workspace = true
erased-serde.workspace = true
dyn-clone.workspace = true
dyn-eq.workspace = true
lazy_static.workspace = true
log.workspace = true
maplit.workspace = true
ndarray.workspace = true
num-integer.workspace = true
num-traits.workspace = true
num-complex.workspace = true
openblas-src = { workspace=true, optional = true }
pastey.workspace = true
rustfft.workspace = true
smallvec.workspace = true
serde.workspace = true
tract-linalg.workspace = true
tract-data.workspace = true
inventory.workspace = true

[features]
default = [ ]
complex = [ "tract-data/complex", "tract-linalg/complex" ]
blas = [ "cblas" ]
accelerate = [ "blas", "accelerate-src" ]
blis = [ "blas", "blis-src" ]
openblas = [ "blas", "openblas-src" ]
paranoid_assertions = []

[dev-dependencies]
env_logger.workspace = true
lazy_static.workspace = true
approx.workspace = true

[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion.workspace = true
proptest.workspace = true

[target.'cfg(target_family = "wasm")'.dev-dependencies]
# Wasm doesn't support the `rayon` feature of criterion
criterion = { version = "0.8", default-features = false, features = ["plotters", "cargo_bench_support"] }
# Wasm doesn't support the `fork` feature of proptest.
proptest = { version = "1.0.0", default-features = false, features = ["std", "bit-set"] }


================================================
FILE: core/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: core/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: core/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: core/src/axes/mapping.rs
================================================
use std::fmt::Display;
use std::str::FromStr;

use tract_data::itertools::izip;
use tract_ndarray::{ArrayViewD, ArrayViewMutD};

use crate::internal::*;
use crate::prelude::tract_itertools::Itertools;

use super::Axis;

pub trait AxisPattern: std::fmt::Debug {
    fn search(&self, mapping: &AxesMapping) -> Option<usize>;
}

impl AxisPattern for char {
    fn search(&self, mapping: &AxesMapping) -> Option<usize> {
        mapping.axes.iter().position(|axis| axis.repr == *self)
    }
}

impl AxisPattern for (InOut, usize) {
    fn search(&self, mapping: &AxesMapping) -> Option<usize> {
        match self.0 {
            InOut::In(i) => mapping.axes.iter().position(|axis| axis.inputs[i].contains(&self.1)),
            InOut::Out(o) => mapping.axes.iter().position(|axis| axis.outputs[o].contains(&self.1)),
        }
    }
}

impl AxisPattern for &Axis {
    fn search(&self, mapping: &AxesMapping) -> Option<usize> {
        mapping.axes.iter().position(|ax| self == &ax)
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AxesMapping {
    input_count: usize,
    output_count: usize,
    axes: TVec<Axis>,
}

impl AxesMapping {
    pub fn new(
        input_count: usize,
        output_count: usize,
        it: impl AsRef<[Axis]>,
    ) -> TractResult<AxesMapping> {
        let axes: TVec<_> = it.as_ref().into();
        AxesMapping { axes, output_count, input_count }.sorted().check()
    }

    pub fn for_numpy_matmul(
        rank: usize,
        transposing_a: bool,
        transposing_b: bool,
        transposing_c: bool,
    ) -> TractResult<AxesMapping> {
        let mut axes: TVec<Axis> = ('a'..)
            .take(rank - 2)
            .enumerate()
            .map(|(ix, repr)| Axis {
                repr,
                inputs: tvec!(tvec!(ix), tvec!(ix)),
                outputs: tvec!(tvec!(ix)),
            })
            .collect();
        axes.push(Axis {
            repr: 'm',
            inputs: tvec!(tvec!(rank - 2 + transposing_a as usize), tvec!()),
            outputs: tvec!(tvec!(rank - 2 + transposing_c as usize)),
        });
        axes.push(Axis {
            repr: 'k',
            inputs: tvec!(
                tvec!(rank - 1 - transposing_a as usize),
                tvec!(rank - 2 + transposing_b as usize)
            ),
            outputs: tvec!(tvec!()),
        });
        axes.push(Axis {
            repr: 'n',
            inputs: tvec!(tvec!(), tvec!(rank - 1 - transposing_b as usize),),
            outputs: tvec!(tvec!(rank - 1 - transposing_c as usize)),
        });
        AxesMapping::new(2, 1, axes)
    }

    pub fn disconnected(inputs: &[&TypedFact], outputs: &[&TypedFact]) -> TractResult<AxesMapping> {
        let input_ranks: TVec<usize> = inputs.iter().map(|i| i.rank()).collect();
        let output_ranks: TVec<usize> = outputs.iter().map(|i| i.rank()).collect();
        Self::disconnected_for_ranks(&input_ranks, &output_ranks)
    }

    pub fn disconnected_for_ranks(inputs: &[usize], outputs: &[usize]) -> TractResult<AxesMapping> {
        let mut axes = tvec!();
        let mut alphabet = 'a'..;
        for (ix, &rank) in inputs.iter().enumerate() {
            for a in 0..rank {
                axes.push(
                    Axis::new(alphabet.next().unwrap(), inputs.len(), outputs.len()).input(ix, a),
                );
            }
        }
        for (ix, &rank) in outputs.iter().enumerate() {
            for a in 0..rank {
                axes.push(
                    Axis::new(alphabet.next().unwrap(), inputs.len(), outputs.len()).output(ix, a),
                );
            }
        }
        AxesMapping::new(inputs.len(), outputs.len(), axes)
    }

    pub fn natural(inputs: &[&TypedFact], outputs: &[&TypedFact]) -> TractResult<AxesMapping> {
        let rank = inputs[0].rank();
        let axes = (0..rank)
            .zip('a'..)
            .map(|(axis_id, repr)| Axis::natural(inputs.len(), outputs.len(), repr, axis_id))
            .collect::<TVec<_>>();
        AxesMapping::new(inputs.len(), outputs.len(), axes)
    }

    pub fn natural_for_rank(
        inputs: usize,
        outputs: usize,
        rank: usize,
    ) -> TractResult<AxesMapping> {
        let axes = (0..rank)
            .zip('a'..)
            .map(|(axis_id, repr)| Axis::natural(inputs, outputs, repr, axis_id))
            .collect::<TVec<_>>();
        AxesMapping::new(inputs, outputs, axes)
    }

    pub fn iter_all_axes(&self) -> impl Iterator<Item = &Axis> {
        self.axes.iter()
    }

    pub fn iter_all_axes_mut(&mut self) -> impl Iterator<Item = &mut Axis> {
        self.axes.iter_mut()
    }

    pub fn input_count(&self) -> usize {
        self.input_count
    }

    pub fn output_count(&self) -> usize {
        self.output_count
    }

    pub fn axis_positions(&self, io: InOut, p: impl AxisPattern) -> TractResult<&[usize]> {
        let axis = self.axis(p)?;
        Ok(match io {
            InOut::In(i) => &*axis.inputs[i],
            InOut::Out(o) => &*axis.outputs[o],
        })
    }

    pub fn rank(&self, io: InOut) -> usize {
        match io {
            InOut::In(i) => self.iter_all_axes().map(|axis| axis.inputs[i].len()).sum(),
            InOut::Out(o) => self.iter_all_axes().map(|axis| axis.outputs[o].len()).sum(),
        }
    }

    fn search(&self, p: impl AxisPattern) -> TractResult<usize> {
        p.search(self).with_context(|| format!("Axis {p:?} not found in {self}"))
    }

    pub fn axis(&self, p: impl AxisPattern) -> TractResult<&Axis> {
        Ok(&self.axes[self.search(p)?])
    }

    fn axis_mut(&mut self, p: impl AxisPattern) -> TractResult<&mut Axis> {
        let ix = self.search(p)?;
        Ok(&mut self.axes[ix])
    }

    pub fn axes(&self, io: InOut) -> impl Iterator<Item = &Axis> {
        (0..self.rank(io)).map(move |ix| self.axis((io, ix)).unwrap())
    }

    pub fn track_axis(&self, from: impl AxisPattern, to: InOut) -> TractResult<Option<usize>> {
        let axis = self.axis(from)?;
        let positions = axis.interface(to);
        Ok(if positions.len() == 1 { Some(positions[0]) } else { None })
    }

    pub fn renaming(mut self, axis: impl AxisPattern, name: char) -> TractResult<AxesMapping> {
        let position = self.search(axis)?;
        let old_label = self.axes[position].repr;
        if let Ok(conflict) = self.axis_mut(name) {
            conflict.repr = old_label
        }
        self.axes[position].repr = name;
        self.sort();
        self.check()
    }

    pub fn linking(
        mut self,
        target: impl AxisPattern,
        axis: impl AxisPattern,
    ) -> TractResult<AxesMapping> {
        let axis = self.axis(axis)?;
        let axis_ix = self.axes.iter().position(|a| a == axis).unwrap();
        let axis = self.axes.remove(axis_ix);
        let target = self.axis_mut(target)?;
        for (ia, ib) in target.inputs.iter_mut().zip(axis.inputs.iter()) {
            ia.extend(ib.into_iter().cloned())
        }
        for (ia, ib) in target.outputs.iter_mut().zip(axis.outputs.iter()) {
            ia.extend(ib.into_iter().cloned())
        }
        self.sort();
        self.check()
    }

    fn sort(&mut self) {
        let order: Vec<(usize, usize, usize, char)> = self
            .axes
            .iter()
            .flat_map(|axis| {
                axis.inputs
                    .iter()
                    .enumerate()
                    .flat_map(move |(slot, input)| {
                        input.iter().map(move |p| (1, slot, *p, axis.repr))
                    })
                    .chain(axis.outputs.iter().enumerate().flat_map(move |(slot, output)| {
                        output.iter().map(move |p| (0, slot, *p, axis.repr))
                    }))
            })
            .sorted()
            .dedup()
            .collect_vec();
        self.axes.sort_by_key(|axis| order.iter().position(|tuple| tuple.3 == axis.repr).unwrap());
    }

    fn sorted(mut self) -> AxesMapping {
        self.sort();
        self
    }

    fn do_check(&self) -> TractResult<()> {
        for axis in &self.axes {
            ensure!(axis.inputs.len() == self.input_count);
            ensure!(axis.outputs.len() == self.output_count);
            ensure!(
                axis.inputs.iter().map(|i| i.len()).sum::<usize>()
                    + axis.outputs.iter().map(|o| o.len()).sum::<usize>()
                    > 0
            );
        }
        for input_ix in 0..self.input_count() {
            for axis in 0..self.rank(InOut::In(input_ix)) {
                ensure!(self.axis((InOut::In(input_ix), axis)).is_ok());
            }
        }
        for output_ix in 0..self.output_count() {
            for axis in 0..self.rank(InOut::Out(output_ix)) {
                ensure!(self.axis((InOut::Out(output_ix), axis)).is_ok());
            }
        }
        ensure!(self.axes.iter().map(|ax| ax.repr).duplicates().count() == 0);
        ensure!(
            self == &{
                let mut x = self.clone();
                x.sort();
                x
            }
        );
        Ok(())
    }

    pub fn check(self) -> TractResult<AxesMapping> {
        self.do_check().with_context(|| format!("Checking {:?}", self.axes))?;
        Ok(self)
    }

    pub fn available_label(&self) -> char {
        self.available_labels().next().unwrap()
    }

    pub fn available_labels(&self) -> impl Iterator<Item = char> + '_ {
        ('a'..).filter(|c| self.iter_all_axes().all(|axis| axis.repr != *c))
    }

    pub fn is_element_wise_unary(&self) -> bool {
        self.input_count == 1
            && self.output_count == 1
            && self
                .iter_all_axes()
                .all(|axis| axis.inputs[0].len() == 1 && axis.outputs[0] == axis.inputs[0])
    }

    pub fn extract_sub_mapping(
        &self,
        inputs: &[usize],
        outputs: &[usize],
    ) -> TractResult<AxesMapping> {
        let axes: Vec<_> = self
            .iter_all_axes()
            .filter(|axis| {
                inputs.iter().any(|i| axis.inputs[*i].len() > 0)
                    || outputs.iter().any(|o| axis.outputs[*o].len() > 0)
            })
            .map(|axis| Axis {
                inputs: axis
                    .inputs
                    .iter()
                    .enumerate()
                    .filter(|(ix, _)| inputs.contains(ix))
                    .map(|(_, it)| it.clone())
                    .collect(),
                outputs: axis
                    .outputs
                    .iter()
                    .enumerate()
                    .filter(|(ix, _)| outputs.contains(ix))
                    .map(|(_, it)| it.clone())
                    .collect(),
                repr: axis.repr,
            })
            .collect();
        AxesMapping::new(inputs.len(), outputs.len(), axes)
    }

    pub fn relabel(mut self) -> TractResult<AxesMapping> {
        for (ax, repr) in self.axes.iter_mut().zip('a'..) {
            ax.repr = repr;
        }
        Ok(self)
    }

    pub fn remove_axis(&self, repr: char) -> TractResult<AxesMapping> {
        let mut axes: TVec<Axis> =
            self.axes.iter().filter(|axis| axis.repr != repr).cloned().collect();
        let removed = self.axis(repr).context("Axis not found")?;
        for input in 0..self.input_count {
            for &position in &removed.inputs[input] {
                for other in &mut axes {
                    other.inputs[input]
                        .iter_mut()
                        .for_each(|other_pos| *other_pos -= (*other_pos > position) as usize);
                }
            }
        }
        for output in 0..self.output_count {
            for &position in &removed.outputs[output] {
                for other in &mut axes {
                    other.outputs[output]
                        .iter_mut()
                        .for_each(|other_pos| *other_pos -= (*other_pos > position) as usize);
                }
            }
        }
        AxesMapping::new(self.input_count, self.output_count, axes)
    }

    pub fn remove_axis_occurency(&self, slot: InOut, position: usize) -> TractResult<AxesMapping> {
        let axis = self.axis((slot, position))?;
        if axis.inputs.iter().map(|i| i.len()).sum::<usize>()
            + axis.outputs.iter().map(|i| i.len()).sum::<usize>()
            == 1
        {
            return self.remove_axis(axis.repr);
        }
        let mut axes = self.axes.clone();
        match slot {
            InOut::In(slot) => {
                for axis in &mut axes {
                    axis.inputs[slot].retain(|pos| *pos != position);
                    axis.inputs[slot].iter_mut().for_each(|pos| *pos -= (*pos > position) as usize);
                }
            }
            InOut::Out(slot) => {
                for axis in &mut axes {
                    axis.outputs[slot].retain(|pos| *pos != position);
                    axis.outputs[slot]
                        .iter_mut()
                        .for_each(|pos| *pos -= (*pos > position) as usize);
                }
            }
        }
        AxesMapping::new(self.input_count, self.output_count, axes)
    }

    pub fn remove_slot(&self, slot: InOut) -> TractResult<AxesMapping> {
        let mut axes = self.clone();
        while axes.rank(slot) > 0 {
            axes = axes.remove_axis_occurency(slot, 0)?
        }
        match slot {
            InOut::In(slot) => {
                for axis in &mut axes.axes {
                    axis.inputs.remove(slot);
                }
                axes.input_count -= 1;
            }
            InOut::Out(slot) => {
                for axis in &mut axes.axes {
                    axis.outputs.remove(slot);
                }
                axes.output_count -= 1;
            }
        }
        axes.sorted().check()
    }

    pub fn with_extra_input(self, slot: usize) -> TractResult<AxesMapping> {
        let axes: TVec<Axis> = self
            .iter_all_axes()
            .map(|axis| {
                let mut axis = axis.clone();
                axis.inputs.insert(slot, tvec!());
                axis
            })
            .collect();
        AxesMapping::new(self.input_count + 1, self.output_count, axes)
    }

    pub fn with_extra_axis(
        mut self,
        repr: char,
        io: InOut,
        position: usize,
    ) -> TractResult<AxesMapping> {
        let axis = Axis::new(repr, self.input_count, self.output_count);
        self.axes.push(axis);
        self.with_extra_axis_occurency(repr, io, position)
    }

    pub fn with_extra_axis_occurency(
        mut self,
        axis: impl AxisPattern,
        io: InOut,
        position: usize,
    ) -> TractResult<AxesMapping> {
        match io {
            InOut::In(slot) => {
                self.axes.iter_mut().for_each(|axis| {
                    axis.inputs[slot].iter_mut().for_each(|pos| *pos += (*pos >= position) as usize)
                });
                self.axis_mut(axis)?.inputs[slot].push(position);
            }
            InOut::Out(slot) => {
                self.axes.iter_mut().for_each(|axis| {
                    axis.outputs[slot]
                        .iter_mut()
                        .for_each(|pos| *pos += (*pos >= position) as usize)
                });
                self.axis_mut(axis)?.outputs[slot].push(position);
            }
        }
        self.sort();
        self.check()
    }

    pub fn translate_to_axis_ops(&self) -> TractResult<Vec<AxisOp>> {
        ensure!(self.input_count() == 1);
        ensure!(self.output_count() == 1);
        ensure!(self.iter_all_axes().all(|axis| axis.inputs[0].len() <= 1));
        let rms = self
            .iter_all_axes()
            .filter(|a| a.outputs[0].len() == 0)
            .sorted_by_key(|axis| -(axis.inputs[0][0] as isize))
            .collect_vec();
        let adds = self
            .iter_all_axes()
            .filter(|a| a.inputs[0].len() == 0)
            .sorted_by_key(|axis| axis.outputs[0][0] as isize)
            .collect_vec();
        let permutation = rms
            .iter()
            .chain(adds.iter())
            .try_fold(self.clone(), |mapping, axis| mapping.remove_axis(axis.repr))?;
        let permutation = permutation
            .iter_all_axes()
            .sorted_by_key(|axis| axis.outputs[0][0])
            .map(|axis| axis.inputs[0][0])
            .collect_vec();
        let permutation = perm_to_ops(&permutation);
        let rms = rms.iter().map(|axis| AxisOp::Rm(axis.inputs[0][0]));
        let adds = adds.iter().map(|axis| AxisOp::Add(axis.outputs[0][0]));
        Ok(rms.chain(permutation).chain(adds).collect())
    }

    pub fn from_strs(
        inputs: &[impl AsRef<str>],
        outputs: &[impl AsRef<str>],
    ) -> TractResult<AxesMapping> {
        let mut axes = HashMap::<char, Axis>::default();
        for (input_ix, input) in inputs.iter().enumerate() {
            for (ix, axis) in input.as_ref().chars().enumerate() {
                axes.entry(axis)
                    .or_insert_with(|| Axis::new(axis, inputs.len(), outputs.len().max(1)))
                    .add_input(input_ix, ix);
            }
        }
        for (output_ix, output) in outputs.iter().enumerate() {
            for (ix, axis) in output.as_ref().chars().enumerate() {
                axes.entry(axis)
                    .or_insert_with(|| Axis::new(axis, inputs.len(), outputs.len().max(1)))
                    .add_output(output_ix, ix);
            }
        }
        if outputs.len() == 0 {
            axes.iter_mut()
                .sorted_by_key(|(k, _)| *k)
                .filter(|(_, v)| v.inputs.iter().map(|input| input.len()).sum::<usize>() == 1)
                .enumerate()
                .for_each(|(ix, (_, v))| v.add_output(0, ix))
        }
        Self::new(
            inputs.len(),
            outputs.len().max(1),
            axes.into_iter().sorted_by_key(|(k, _)| *k).map(|(_, v)| v).collect_vec(),
        )
    }

    pub fn to_strs(&self) -> (TVec<String>, TVec<String>) {
        let mut inputs = tvec![];
        let mut outputs = tvec![];
        for input in 0..self.input_count() {
            let s = self
                .iter_all_axes()
                .flat_map(|axis| {
                    axis.inputs[input].iter().map(move |position| (position, axis.repr))
                })
                .sorted()
                .map(|(_, r)| r)
                .collect();
            inputs.push(s);
        }
        for output in 0..self.output_count() {
            let s = self
                .iter_all_axes()
                .flat_map(|axis| {
                    axis.outputs[output].iter().map(move |position| (position, axis.repr))
                })
                .sorted()
                .map(|(_, r)| r)
                .collect();
            outputs.push(s);
        }
        (inputs, outputs)
    }

    pub fn change_axis_sink(&self, io: InOut, change: &AxisOp) -> TractResult<Option<AxesMapping>> {
        let (mut inputs, mut outputs) = self.to_strs();
        let interface: &mut String = match io {
            InOut::In(i) => &mut inputs[i],
            InOut::Out(o) => &mut outputs[o],
        };
        let mut axes: Vec<char> = interface.chars().collect();
        match change {
            AxisOp::Rm(rm) => {
                axes.remove(*rm);
            }
            AxisOp::Add(add) => axes.insert(*add, self.available_label()),
            AxisOp::Move(from, to) => {
                let c = axes.remove(*from);
                axes.insert(*to, c);
            }
            _ => return Ok(None),
        };
        *interface = axes.into_iter().collect();
        Ok(Some(AxesMapping::from_strs(&inputs, &outputs)?))
    }

    pub fn direct(&self, a: InOut, b: InOut) -> bool {
        self.axes.iter().all(|axis| axis.interface(a) == axis.interface(b))
    }

    pub fn same_layout<D: DimLike>(
        &self,
        a: InOut,
        b: InOut,
        shape_a: impl AsRef<[D]>,
        shape_b: impl AsRef<[D]>,
    ) -> bool {
        let shape_a = shape_a.as_ref();
        let shape_b = shape_b.as_ref();
        shape_a.iter().cloned().product::<D>() == shape_b.iter().cloned().product()
            && izip!(
                self.axes(a).zip(shape_a.iter()).filter(|(_axis, d)| **d != D::one()),
                self.axes(b).zip(shape_b.iter()).filter(|(_axis, d)| **d != D::one())
            )
            .all(|(a, b)| a == b)
    }

    pub fn axis_ops_to_canonical(&self, io: InOut) -> TractResult<Vec<AxisOp>> {
        let rank = self.rank(io);
        let target_rank = self.axes.len();
        let mut next_insert_axis = 0;
        let mut permutation = tvec!();
        for axis in &self.axes {
            let spec = match io {
                InOut::In(i) => axis.inputs[i].first(),
                InOut::Out(o) => axis.outputs[o].first(),
            };
            if let Some(pos_in_a) = spec {
                permutation.push(pos_in_a + target_rank - rank)
            } else {
                permutation.push(next_insert_axis);
                next_insert_axis += 1;
            }
        }
        let mut ops = vec![AxisOp::Add(0); target_rank - rank];
        ops.extend(crate::ops::change_axes::perm_to_ops(&permutation));
        Ok(ops)
    }

    pub fn view_to_canonical<D>(&self, io: InOut, view: &mut ArrayViewD<D>) -> TractResult<()> {
        for op in self.axis_ops_to_canonical(io)? {
            op.change_view(view)?;
        }
        Ok(())
    }

    pub fn view_to_canonical_mut<D>(
        &self,
        io: InOut,
        view: &mut ArrayViewMutD<D>,
    ) -> TractResult<()> {
        for op in self.axis_ops_to_canonical(io)? {
            op.change_view_mut(view)?;
        }
        Ok(())
    }

    pub fn compose(&self, other: &AxesMapping) -> TractResult<AxesMapping> {
        ensure!(self.input_count() == 1 && self.output_count() == 1);
        ensure!(other.input_count() == 1 && other.output_count() == 1);
        let mut result = AxesMapping::disconnected_for_ranks(
            &[self.rank(InOut::In(0))],
            &[other.rank(InOut::Out(0))],
        )?;
        for ix in 0..result.rank(InOut::In(0)) {
            let Some(inter) = self.track_axis((InOut::In(0), ix), InOut::Out(0))? else { continue };
            let Some(out) = other.track_axis((InOut::In(0), inter), InOut::Out(0))? else {
                continue;
            };
            result = result.linking((InOut::Out(0), out), (InOut::In(0), ix))?;
        }
        Ok(result)
    }
}

impl FromStr for AxesMapping {
    type Err = TractError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        assert!(!s.contains("..."));
        let s = s.replace(' ', "");
        let (inputs, outputs) =
            if let Some((i, r)) = s.split_once("->") { (i, r) } else { (&*s, "") };
        let inputs: TVec<&str> = inputs.split(',').collect();
        let outputs: TVec<&str> = outputs.split(',').filter(|s| s.len() > 0).collect();
        AxesMapping::from_strs(&inputs, &outputs)
    }
}

impl Display for AxesMapping {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let (inputs, outputs) = self.to_strs();
        write!(f, "{}->{}", inputs.iter().join(","), outputs.iter().join(","))
    }
}

#[cfg(test)]
mod test {
    use super::*;

    fn m(s: &str) -> AxesMapping {
        s.parse().unwrap()
    }

    #[test]
    fn test_parse_transpose() {
        assert_eq!(
            m("ij->ji"),
            AxesMapping::new(
                1,
                1,
                tvec![
                    Axis::new('i', 1, 1).output(0, 1).input(0, 0),
                    Axis::new('j', 1, 1).output(0, 0).input(0, 1)
                ]
            )
            .unwrap(),
        )
    }

    #[test]
    fn test_parse_diag() {
        assert_eq!(
            m("ii->i"),
            AxesMapping::new(
                1,
                1,
                tvec![Axis::new('i', 1, 1).output(0, 0).input(0, 0).input(0, 1)]
            )
            .unwrap(),
        )
    }

    #[test]
    fn test_parse_adamar_product_explicit() {
        assert_eq!(
            m("i,i->i"),
            AxesMapping::new(
                2,
                1,
                tvec![Axis::new('i', 2, 1).output(0, 0).input(0, 0).input(1, 0)]
            )
            .unwrap(),
        )
    }

    #[test]
    fn test_parse_inner_product_implicit() {
        assert_eq!(m("i,i"), m("i,i->"))
    }

    #[test]
    fn test_parse_batch_matmul() {
        assert_eq!(
            m("bij , bjk -> bik "),
            AxesMapping::new(
                2,
                1,
                tvec![
                    Axis::new('b', 2, 1).output(0, 0).input(0, 0).input(1, 0),
                    Axis::new('i', 2, 1).output(0, 1).input(0, 1),
                    Axis::new('j', 2, 1).input(0, 2).input(1, 1),
                    Axis::new('k', 2, 1).output(0, 2).input(1, 2)
                ]
            )
            .unwrap()
        )
    }

    #[test]
    fn test_parse_outer_product() {
        assert_eq!(
            m("i,j->ij"),
            AxesMapping::new(
                2,
                1,
                tvec![
                    Axis::new('i', 2, 1).output(0, 0).input(0, 0),
                    Axis::new('j', 2, 1).output(0, 1).input(1, 0)
                ]
            )
            .unwrap(),
        )
    }

    #[test]
    fn test_parse_bilinear() {
        assert_eq!(
            m("ik,jkl,il->ij"),
            AxesMapping::new(
                3,
                1,
                tvec![
                    Axis::new('i', 3, 1).output(0, 0).input(0, 0).input(2, 0),
                    Axis::new('j', 3, 1).output(0, 1).input(1, 0),
                    Axis::new('k', 3, 1).input(0, 1).input(1, 1),
                    Axis::new('l', 3, 1).input(1, 2).input(2, 1)
                ]
            )
            .unwrap(),
        )
    }

    #[test]
    fn test_parse_complex_tensor_contraction() {
        assert_eq!(
            m("pqrs,tuqvr->pstuv"),
            AxesMapping::new(
                2,
                1,
                tvec![
                    Axis::new('p', 2, 1).output(0, 0).input(0, 0),
                    Axis::new('q', 2, 1).input(0, 1).input(1, 2),
                    Axis::new('r', 2, 1).input(0, 2).input(1, 4),
                    Axis::new('s', 2, 1).output(0, 1).input(0, 3),
                    Axis::new('t', 2, 1).output(0, 2).input(1, 0),
                    Axis::new('u', 2, 1).output(0, 3).input(1, 1),
                    Axis::new('v', 2, 1).output(0, 4).input(1, 3),
                ]
            )
            .unwrap(),
        )
    }

    #[test]
    fn test_parse_complex_tensor_contraction_implicit() {
        assert_eq!(m("pqrs,tuqvr"), m("pqrs,tuqvr->pstuv"))
    }

    #[test]
    fn test_display_expr() {
        assert_eq!(m("pqrs,tuqvr->pstuv").to_string(), "pqrs,tuqvr->pstuv");
    }

    #[test]
    fn test_parse_pulsed_matmul() {
        assert_eq!(
            m("sij,ijk->sik"),
            AxesMapping::new(
                2,
                1,
                tvec![
                    Axis::new('i', 2, 1).output(0, 1).input(0, 1).input(1, 0),
                    Axis::new('j', 2, 1).input(0, 2).input(1, 1),
                    Axis::new('k', 2, 1).output(0, 2).input(1, 2),
                    Axis::new('s', 2, 1).output(0, 0).input(0, 0),
                ]
            )
            .unwrap()
        )
    }

    #[test]
    fn test_parse_pulsed_batch_matmul() {
        assert_eq!(
            m("bsij,ijk->bsik"),
            AxesMapping::new(
                2,
                1,
                tvec![
                    Axis::new('b', 2, 1).output(0, 0).input(0, 0),
                    Axis::new('i', 2, 1).output(0, 2).input(0, 2).input(1, 0),
                    Axis::new('j', 2, 1).input(0, 3).input(1, 1),
                    Axis::new('k', 2, 1).output(0, 3).input(1, 2),
                    Axis::new('s', 2, 1).output(0, 1).input(0, 1),
                ]
            )
            .unwrap()
        )
    }

    #[test]
    fn test_extract_sub_mapping() {
        assert_eq!(m("bsij,ijk->bsik").extract_sub_mapping(&[0], &[0]).unwrap(), m("bsij->bsik"));
        assert_eq!(m("bsij,ijk->bsik").extract_sub_mapping(&[1], &[0]).unwrap(), m("ijk->bsik"));
        assert_eq!(m("bsij,ijk->ij").extract_sub_mapping(&[1], &[0]).unwrap(), m("ijk->ij"));
    }

    #[test]
    fn test_remove_axis_0() {
        assert_eq!(m("ab->a").remove_axis('b').unwrap(), m("a->a"));
        assert_eq!(m("ba->a").remove_axis('b').unwrap(), m("a->a"));
        assert_eq!(m("a->ba").remove_axis('b').unwrap(), m("a->a"));
        assert_eq!(m("a->ab").remove_axis('b').unwrap(), m("a->a"));
        assert_eq!(m("ab,a->a").remove_axis('b').unwrap(), m("a,a->a"));
        assert_eq!(m("ba,a->a").remove_axis('b').unwrap(), m("a,a->a"));
        assert_eq!(m("a,ab->a").remove_axis('b').unwrap(), m("a,a->a"));
        assert_eq!(m("a,ba->a").remove_axis('b').unwrap(), m("a,a->a"));
        assert_eq!(m("a,a->ab").remove_axis('b').unwrap(), m("a,a->a"));
        assert_eq!(m("a,a->ba").remove_axis('b').unwrap(), m("a,a->a"));
        assert_eq!(m("bsij,ijk->bsik").remove_axis('i').unwrap(), m("bsj,jk->bsk"),);
    }

    #[test]
    fn test_translate_to_ops_rm_add() {
        assert_eq!(m("ab->a").translate_to_axis_ops().unwrap(), vec!(AxisOp::Rm(1)));
        assert_eq!(m("ba->a").translate_to_axis_ops().unwrap(), vec!(AxisOp::Rm(0)));
        assert_eq!(
            m("ab->c").translate_to_axis_ops().unwrap(),
            vec!(AxisOp::Rm(1), AxisOp::Rm(0), AxisOp::Add(0))
        );
    }

    #[test]
    fn test_translate_to_ops_add_0() {
        assert_eq!(
            m("bacmn->bmn").translate_to_axis_ops().unwrap(),
            vec!(AxisOp::Rm(2), AxisOp::Rm(1))
        );
    }

    #[test]
    fn test_translate_to_ops_move() {
        assert_eq!(m("ab->ba").translate_to_axis_ops().unwrap(), vec!(AxisOp::Move(1, 0)));
    }

    #[test]
    fn test_translate_to_ops_move_20() {
        assert_eq!(m("abc->cab").translate_to_axis_ops().unwrap(), vec!(AxisOp::Move(2, 0)));
    }

    #[test]
    fn test_translate_to_ops_complex() {
        assert_eq!(
            m("anbck->backn").translate_to_axis_ops().unwrap(),
            vec!(AxisOp::Move(2, 0), AxisOp::Move(2, 4))
        );
    }
}


================================================
FILE: core/src/axes/mod.rs
================================================
use crate::internal::*;

mod mapping;
mod model;

pub use mapping::AxesMapping;
pub use model::{for_model, full_axis_tracking};

#[derive(Debug, Clone, PartialEq, Eq, Default, Hash)]
pub struct Axis {
    pub inputs: TVec<TVec<usize>>,
    pub outputs: TVec<TVec<usize>>,
    pub repr: char,
}

impl Axis {
    pub fn new(repr: char, inputs: usize, outputs: usize) -> Axis {
        Axis { repr, inputs: tvec!(tvec!(); inputs), outputs: tvec!(tvec!(); outputs) }
    }

    pub fn natural(inputs: usize, outputs: usize, repr: char, axis_id: usize) -> Axis {
        let inputs = tvec!(tvec!(axis_id); inputs);
        let outputs = tvec!(tvec!(axis_id); outputs);
        Axis { inputs, outputs, repr }
    }

    #[allow(dead_code)]
    pub fn input(mut self, input_id: usize, axis: usize) -> Axis {
        self.add_input(input_id, axis);
        self
    }

    pub fn output(mut self, output_id: usize, axis: usize) -> Axis {
        self.add_output(output_id, axis);
        self
    }

    pub fn inputs_count(mut self, inputs: usize) -> Axis {
        self.inputs.resize(inputs, tvec!());
        self
    }

    pub fn outputs_count(mut self, outputs: usize) -> Axis {
        self.outputs.resize(outputs, tvec!());
        self
    }

    pub fn ensure_inputs_count(&mut self, inputs: usize) {
        if self.inputs.len() < inputs {
            self.inputs.resize(inputs, tvec!())
        }
    }

    pub fn ensure_outputs_count(&mut self, outputs: usize) {
        if self.outputs.len() < outputs {
            self.outputs.resize(outputs, tvec!())
        }
    }

    pub fn add_input(&mut self, input_id: usize, axis: usize) {
        self.ensure_inputs_count(input_id + 1);
        self.inputs[input_id].push(axis);
    }

    pub fn add_output(&mut self, output_id: usize, axis: usize) {
        self.ensure_outputs_count(output_id + 1);
        self.outputs[output_id].push(axis);
    }

    pub fn interface(&self, io: InOut) -> &[usize] {
        match io {
            InOut::In(ix) => &self.inputs[ix],
            InOut::Out(ix) => &self.outputs[ix],
        }
    }
}


================================================
FILE: core/src/axes/model.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, Default)]
pub struct OutletMap<T>(Vec<TVec<Option<T>>>);

impl<T: Clone> OutletMap<T> {
    fn insert(&mut self, outlet: OutletId, t: T) {
        if outlet.node >= self.0.len() {
            self.0.resize_with(outlet.node + 1, || tvec!());
        }
        let node = &mut self.0[outlet.node];
        if outlet.slot >= node.len() {
            node.resize(outlet.slot + 1, None);
        }
        node[outlet.slot] = Some(t)
    }
}

impl<T> OutletMap<T> {
    fn remove(&mut self, outlet: &OutletId) -> Option<T> {
        if let Some(node) = self.0.get_mut(outlet.node)
            && let Some(slot) = node.get_mut(outlet.slot)
        {
            return slot.take();
        }
        None
    }

    pub fn get(&self, outlet: &OutletId) -> Option<&T> {
        if let Some(node) = self.0.get(outlet.node)
            && let Some(slot) = node.get(outlet.slot)
        {
            return slot.as_ref();
        }
        None
    }

    pub fn keys(&self) -> OutletMapKeysIter<'_, T> {
        OutletMapKeysIter(self, (0, 0).into())
    }
}

impl<'a, T: Clone> std::ops::Index<&'a OutletId> for OutletMap<T> {
    type Output = T;
    fn index(&self, index: &'a OutletId) -> &Self::Output {
        self.get(index).unwrap()
    }
}

pub struct OutletMapKeysIter<'a, T>(&'a OutletMap<T>, OutletId);

impl<T> std::iter::Iterator for OutletMapKeysIter<'_, T> {
    type Item = OutletId;
    fn next(&mut self) -> Option<Self::Item> {
        loop {
            if self.1.node >= (self.0).0.len() {
                return None;
            }
            if self.1.slot >= (self.0).0[self.1.node].len() {
                self.1.slot = 0;
                self.1.node += 1;
                continue;
            }
            let current = self.1;
            self.1.slot += 1;
            if self.0.get(&current).is_some() {
                return Some(current);
            }
        }
    }
}

#[derive(Debug, Clone)]
pub struct AxisTracking {
    pub creators: TVec<OutletId>,
    pub destructors: TVec<InletId>,
    pub outlets: OutletMap<usize>,
}

impl AxisTracking {
    pub fn for_outlet_and_axis(
        model: &TypedModel,
        outlet: OutletId,
        axis: usize,
    ) -> TractResult<Option<AxisTracking>> {
        let mut mapped_outlets = OutletMap::default();
        let mut todo = OutletMap::default();
        let mut creators = tvec!();
        let mut destructors = tvec!();
        mapped_outlets.insert(outlet, axis);
        todo.insert(outlet, ());
        while let Some(wire) = todo.keys().next() {
            todo.remove(&wire);
            let axis = mapped_outlets[&wire];
            let emiter_node = model.node(wire.node);
            let mut nodes = vec![];
            let (input_facts, output_facts) = model.node_facts(emiter_node.id)?;
            let map = emiter_node
                .op
                .axes_mapping(&input_facts, &output_facts)
                .with_context(|| format!("Computing axes mapping for {emiter_node}"))?;
            let info = map.axis((InOut::Out(wire.slot), axis)).with_context(|| {
                format!(
                    "Axes mapping for {} is {map}, need output axis {:?} from slot {}",
                    emiter_node, axis, wire.slot,
                )
            })?;

            if info.inputs.iter().any(|i| i.len() > 0) {
                nodes.push((wire.node, info.clone()));
            } else {
                creators.push(wire);
            };
            for succ in &emiter_node.outputs[wire.slot].successors {
                let succ_node = model.node(succ.node);
                let (input_facts, output_facts) = model.node_facts(succ_node.id)?;
                let map = succ_node.op.axes_mapping(&input_facts, &output_facts)?;
                let info = map.axis((InOut::In(succ.slot), axis)).with_context(|| {
                    format!(
                        "Axes mapping for {succ_node} is {map}, need input axis {:?} from slot {}",
                        axis, succ.slot,
                    )
                })?;
                if info.outputs.iter().any(|o| o.len() > 0) {
                    nodes.push((succ_node.id, info.clone()));
                } else {
                    destructors.push(*succ);
                };
            }
            let mut new_outlets = vec![];
            for (n, axes) in nodes {
                let node = model.node(n);
                for slot in 0..node.outputs.len() {
                    if let &[axis] = &*axes.outputs[slot] {
                        new_outlets.push((OutletId::new(n, slot), axis));
                    }
                }
                for slot in 0..node.inputs.len() {
                    if let &[axis] = &*axes.inputs[slot] {
                        new_outlets.push((node.inputs[slot], axis));
                    }
                }
            }
            for (outlet, axis) in new_outlets {
                if let Some(prev) = mapped_outlets.get(&outlet) {
                    rule_if!(*prev == axis);
                } else {
                    mapped_outlets.insert(outlet, axis);
                    todo.insert(outlet, ());
                }
            }
        }
        Ok(Some(AxisTracking { creators, destructors, outlets: mapped_outlets }))
    }
}

pub fn full_axis_tracking(model: &TypedModel) -> TractResult<Vec<AxisTracking>> {
    let mut axes: Vec<AxisTracking> = vec![];
    for node in model.eval_order()? {
        for slot in 0..model.node(node).outputs.len() {
            let outlet = OutletId::new(node, slot);
            let input_fact = model.outlet_fact(outlet)?;
            'axis: for axis in 0..input_fact.rank() {
                if axes.iter().any(|tracking| tracking.outlets.get(&outlet) == Some(&axis)) {
                    continue 'axis;
                }
                if let Some(tracker) = AxisTracking::for_outlet_and_axis(model, outlet, axis)? {
                    axes.push(tracker);
                }
            }
        }
    }
    Ok(axes)
}

pub fn for_model(model: &TypedModel) -> TractResult<AxesMapping> {
    let input_ranks = model
        .input_outlets()?
        .iter()
        .map(|io| model.outlet_fact(*io).map(|f| f.rank()))
        .collect::<TractResult<TVec<usize>>>()?;
    let output_ranks = model
        .output_outlets()?
        .iter()
        .map(|io| model.outlet_fact(*io).map(|f| f.rank()))
        .collect::<TractResult<TVec<usize>>>()?;
    let mut result = AxesMapping::disconnected_for_ranks(&input_ranks, &output_ranks)?;
    for tracking in full_axis_tracking(model)? {
        let mut reprs: Vec<char> = vec![];
        for (ix, outlet) in model.input_outlets()?.iter().enumerate() {
            if let Some(appearance) = tracking.outlets.get(outlet) {
                reprs.push(result.axis((InOut::In(ix), *appearance)).unwrap().repr);
            }
        }
        for (ix, outlet) in model.output_outlets()?.iter().enumerate() {
            if let Some(appearance) = tracking.outlets.get(outlet) {
                reprs.push(result.axis((InOut::Out(ix), *appearance)).unwrap().repr);
            }
        }
        if reprs.len() > 1 {
            for other in &reprs[1..] {
                result = result.linking(reprs[0], *other)?;
            }
        }
    }
    result.relabel()
}


================================================
FILE: core/src/broadcast.rs
================================================
//! N-way tensor broadcast
use tract_data::internal::*;

/// Computes a shape, if any, to which all shapes can be broadcasted.
pub fn multi_broadcast<D>(shapes: &[impl AsRef<[D]>]) -> TractResult<TVec<D>>
where
    D: DimLike,
{
    let one = D::one();
    let Some(len) = shapes.iter().map(|shape| shape.as_ref().len()).max() else {
        return Ok(tvec!());
    };
    let mut shape: TVec<D> = tvec!();
    for i in 0..len {
        let mut wanted_size = D::one();
        for shape in shapes {
            let len = shape.as_ref().len();
            let dim = if i < len { &shape.as_ref()[len - i - 1] } else { &one };
            wanted_size = wanted_size.broadcast(dim.clone())?;
        }
        shape.push(wanted_size)
    }
    shape.reverse();
    Ok(shape)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn onnx_1() {
        assert_eq!(multi_broadcast(&tvec![tvec![2, 3, 4, 5], tvec![]]).unwrap(), tvec![2, 3, 4, 5])
    }

    #[test]
    fn onnx_2() {
        assert_eq!(multi_broadcast(&tvec![tvec![2, 3, 4, 5], tvec![5]]).unwrap(), tvec![2, 3, 4, 5])
    }

    #[test]
    fn onnx_3() {
        assert_eq!(
            multi_broadcast(&tvec![tvec![4, 5], tvec![2, 3, 4, 5]]).unwrap(),
            tvec![2, 3, 4, 5]
        )
    }

    #[test]
    fn onnx_4() {
        assert_eq!(
            multi_broadcast(&tvec![tvec![1, 4, 5], tvec![2, 3, 4, 1]]).unwrap(),
            tvec![2, 3, 4, 5]
        )
    }

    #[test]
    fn onnx_5() {
        assert_eq!(
            multi_broadcast(&tvec![tvec![3, 4, 5], tvec![2, 1, 1, 1]]).unwrap(),
            tvec![2, 3, 4, 5]
        )
    }
}


================================================
FILE: core/src/floats.rs
================================================
use crate::internal::translator::Translate;
use crate::internal::*;
use crate::ops::array::{Pad, PadMode};
use crate::ops::binary::TypedBinOp;
use crate::ops::cast::{Cast, cast};
use crate::ops::einsum::EinSum;
use crate::ops::element_wise::ElementWiseOp;
use crate::ops::konst::Const;
use crate::ops::scan::Scan;
use crate::ops::source::TypedSource;
use crate::transform::ModelTransform;

pub struct FloatPrecisionTranslator {
    from_dt: DatumType,
    to_dt: DatumType,
    #[allow(clippy::type_complexity)]
    node_predicate: Option<Box<dyn Fn(&TypedNode) -> bool>>,
}

impl FloatPrecisionTranslator {
    pub fn new(from_dt: DatumType, to_dt: DatumType) -> Self {
        Self { from_dt, to_dt, node_predicate: None }
    }

    pub fn with_filter(
        from_dt: DatumType,
        to_dt: DatumType,
        node_predicate: impl Fn(&TypedNode) -> bool + 'static,
    ) -> Self {
        Self { from_dt, to_dt, node_predicate: Some(Box::new(node_predicate)) }
    }

    fn should_translate_node(&self, node: &TypedNode) -> bool {
        self.node_predicate.as_ref().map(|it| (it)(node)).unwrap_or(true)
    }

    /// Cast node inputs to the working float precision for the operator
    /// Only input using float datumtype are impacted. This will add cast operations
    /// in the model. The function return the new input outlet ids.
    fn cast_inputs_if_required(
        &self,
        model: &mut TypedModel,
        node: &TypedNode,
        mapping: &HashMap<OutletId, OutletId>,
        op_float_dt: DatumType,
    ) -> TractResult<TVec<OutletId>> {
        let original_op_float_dt =
            if op_float_dt == self.from_dt { self.to_dt } else { self.from_dt };

        let mut mapped_inputs = tvec![];
        for (i_idx, i) in node.inputs.iter().enumerate() {
            let fact = model.outlet_fact(mapping[i])?;
            if fact.datum_type == original_op_float_dt && fact.is_plain() {
                let casted_mapped_input = model.wire_node(
                    format!("{}.cast-{i_idx}", node.name),
                    Cast { to: op_float_dt },
                    &[mapping[i]],
                )?[0];
                mapped_inputs.push(casted_mapped_input);
            } else {
                mapped_inputs.push(mapping[i])
            }
        }
        Ok(mapped_inputs)
    }

    /// Cast node output outlet ids to the destination float precision,
    /// after insertion in the target mode. This preserves the model output float
    /// precision.
    fn cast_model_outputs_if_required(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        target_node_outlet_ids: TVec<OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let mut outputs = tvec![];
        for (o_idx, o) in target_node_outlet_ids.into_iter().enumerate() {
            // Add Cast op for model output
            let is_source_output = source.outputs.contains(&OutletId::new(node.id, o_idx));
            let fact = target.outlet_fact(o)?;
            if fact.datum_type == self.from_dt && fact.is_plain() && is_source_output {
                let casted_output = target.wire_node(
                    format!("{}.cast-out-{o_idx}", node.name),
                    Cast { to: self.to_dt },
                    &[o],
                )?[0];
                outputs.push(casted_output);
            } else {
                outputs.push(o)
            }
        }
        Ok(outputs)
    }
}

impl std::fmt::Debug for FloatPrecisionTranslator {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("FloatPrecisionTranslator")
            .field("from", &self.from_dt)
            .field("to", &self.to_dt)
            .finish()
    }
}

impl ModelTransform for FloatPrecisionTranslator {
    fn name(&self) -> StaticName {
        format!("{:?}-to-{:?}", self.from_dt, self.to_dt).into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        let new = self.translate_model(model)?;
        *model = new;
        Ok(())
    }
}

impl Translate<TypedFact, Box<dyn TypedOp>, TypedFact, Box<dyn TypedOp>>
    for FloatPrecisionTranslator
{
    fn translate_node(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let is_source = node.op_as::<TypedSource>().is_some();
        if !self.should_translate_node(node) && !is_source {
            let new_op = node.op.clone();

            let casted_inputs =
                self.cast_inputs_if_required(target, node, mapping, self.from_dt)?;
            let target_node_outlet_ids = target.wire_node(&node.name, new_op, &casted_inputs)?;

            self.cast_model_outputs_if_required(source, node, target, target_node_outlet_ids)
        } else {
            let casted_inputs = self.cast_inputs_if_required(target, node, mapping, self.to_dt)?;

            let new_op = if let Some(source_op) = node.op_as::<TypedSource>() {
                let mut fact = source_op.fact.clone();
                if fact.datum_type == self.from_dt {
                    fact.datum_type = self.to_dt;
                }
                Box::new(TypedSource::new(fact))
            } else if let Some(konst) = node.op_as::<Const>() {
                if konst.val().datum_type() == self.from_dt && konst.val().is_plain() {
                    let wire = target.add_const(
                        format!("{}.{:?}", node.name, self.from_dt),
                        konst.val().clone(),
                    )?;
                    return target.wire_node(&node.name, cast(self.to_dt), &[wire]);
                } else {
                    node.op.clone()
                }
            } else if let Some(cast_op) = node.op_as::<Cast>() {
                if cast_op.to == self.from_dt {
                    Box::new(Cast { to: self.to_dt })
                } else {
                    node.op.clone()
                }
            } else if let Some(ew) = node.op_as::<ElementWiseOp>() {
                if ew.1 == Some(self.from_dt) {
                    Box::new(ElementWiseOp(ew.0.clone(), Some(self.to_dt)))
                } else {
                    node.op.clone()
                }
            } else if let Some(bin) = node.op_as::<TypedBinOp>() {
                if bin.1 == Some(self.from_dt) {
                    Box::new(TypedBinOp(bin.0.clone(), Some(self.to_dt)))
                } else {
                    node.op.clone()
                }
            } else if let Some(op) = node.op_as::<Scan>() {
                let body = FloatPrecisionTranslator::new(self.from_dt, self.to_dt)
                    .translate_model(&op.body)?;
                Box::new(Scan { body, ..op.clone() })
            } else if let Some(op) = node.op_as::<EinSum>() {
                let operating_dt =
                    if op.operating_dt == self.from_dt { self.to_dt } else { op.operating_dt };
                Box::new(EinSum { operating_dt, ..op.clone() })
            } else if let Some(op) = node.op_as::<Pad>() {
                if let PadMode::Constant(t) = &op.mode {
                    let new_t = if t.datum_type() == self.from_dt {
                        t.cast_to_dt(self.to_dt)?.into_owned().into_arc_tensor()
                    } else {
                        Arc::clone(t)
                    };
                    Box::new(Pad { mode: PadMode::Constant(new_t), ..op.clone() })
                } else {
                    Box::new(op.clone())
                }
            } else {
                node.op.clone()
            };
            target.wire_node(&node.name, new_op, &casted_inputs)
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::ops::math;
    use tract_data::prelude::f16;

    fn build_f32_model() -> TractResult<TypedModel> {
        // F32 model definition
        let mut model = TypedModel::default();
        let a = model.add_source("source", f32::fact([1])).unwrap();
        let multiplier = model.add_const("multiplier", tensor1(&[1.0f32]))?;
        let neg_infinity = model.add_const("neg_infinity", tensor1(&[f32::NEG_INFINITY]))?;
        let pow_factor = model.add_const("pow_factor", tensor1(&[10.0f32]))?;
        let add = model.wire_node("layer.0/add", math::add(), &[a, a]).unwrap()[0];
        let mul = model.wire_node("layer.0/mul", math::mul(), &[add, multiplier]).unwrap()[0];
        let pow = model.wire_node("layer.1/pow", math::pow(), &[mul, pow_factor]).unwrap()[0];
        let _output = model
            .wire_node("layer.1/add_neg_infinity", math::add(), &[pow, neg_infinity])
            .unwrap()[0];
        model.auto_outputs()?;
        Ok(model)
    }

    #[test]
    fn test_high_level_f16_transform_with_filter() -> TractResult<()> {
        // F32 model definition
        let model = build_f32_model()?;

        // Execution in F32
        let runnable_model = model.clone().into_runnable()?;
        assert_eq!(
            runnable_model.run(tvec![tensor1(&[5.0f32]).into()])?[0],
            tensor1(&[f32::NEG_INFINITY]).into()
        );

        // Execution in F16 with returns NaN
        let runnable_model = &crate::transform::get_transform("f32_to_f16")?
            .unwrap()
            .transform_into(model.clone())?
            .into_runnable()?;
        assert!(
            runnable_model.run(tvec![tensor1(&[f16::from_f32(5.0)]).into()])?[0]
                .try_as_plain()?
                .to_scalar::<f16>()?
                .is_nan()
        );

        // Execution in F16 with filter that returns the good output.
        let runnable_model = &crate::transform::build_float_translator(
            f32::datum_type(),
            f16::datum_type(),
            crate::transform::NodeFilter {
                exclude: Some(vec!["layer.1".into()]),
                ..Default::default()
            },
        )
        .transform_into(model.clone())?
        .into_runnable()?;
        assert_eq!(
            runnable_model.run(tvec![tensor1(&[f16::from_f32(5.0)]).into()])?[0],
            tensor1(&[f16::NEG_INFINITY]).into()
        );

        // Execution in F16 with returns NaN despite the filter.
        let runnable_model = &crate::transform::build_float_translator(
            f32::datum_type(),
            f16::datum_type(),
            crate::transform::NodeFilter {
                exclude: Some(vec!["layer.0".into()]),
                ..Default::default()
            },
        )
        .transform_into(model)?
        .into_runnable()?;
        assert!(
            runnable_model.run(tvec![tensor1(&[f16::from_f32(5.0)]).into()])?[0]
                .try_as_plain()?
                .to_scalar::<f16>()?
                .is_nan()
        );

        Ok(())
    }

    #[test]
    fn test_f16_transform_with_filter() -> TractResult<()> {
        // F32 model definition
        let model = build_f32_model()?;

        // Execution in F32
        let runnable_model = model.clone().into_runnable()?;
        assert_eq!(
            runnable_model.run(tvec![tensor1(&[5.0f32]).into()])?[0],
            tensor1(&[f32::NEG_INFINITY]).into()
        );

        // Execution in F16 with returns NaN
        let mut model_f16 = model.clone();
        model_f16
            .transform(&FloatPrecisionTranslator::new(f32::datum_type(), f16::datum_type()))?;
        let runnable_model_f16 = model_f16.clone().into_runnable()?;
        assert!(
            runnable_model_f16.run(tvec![tensor1(&[f16::from_f32(5.0)]).into()])?[0]
                .try_as_plain()?
                .to_scalar::<f16>()?
                .is_nan()
        );

        // Execution in F16 with filter that returns the good output.
        let mut model_f16_with_filter = model.clone();
        model_f16_with_filter.transform(&FloatPrecisionTranslator::with_filter(
            f32::datum_type(),
            f16::datum_type(),
            |node| !node.name.contains("layer.1"),
        ))?;
        let runnable_model_f16 = model_f16_with_filter.clone().into_runnable()?;
        assert_eq!(
            runnable_model_f16.run(tvec![tensor1(&[f16::from_f32(5.0)]).into()])?[0],
            tensor1(&[f16::NEG_INFINITY]).into()
        );
        let mut model_f16_with_filter = model.clone();
        model_f16_with_filter.transform(&FloatPrecisionTranslator::with_filter(
            f32::datum_type(),
            f16::datum_type(),
            |node| !node.name.contains("layer.0"),
        ))?;
        let runnable_model_f16 = model_f16_with_filter.clone().into_runnable()?;
        assert!(
            runnable_model_f16.run(tvec![tensor1(&[f16::from_f32(5.0)]).into()])?[0]
                .try_as_plain()?
                .to_scalar::<f16>()?
                .is_nan()
        );
        Ok(())
    }
}


================================================
FILE: core/src/framework.rs
================================================
//! Enforce consistent API between the implemented Frameworks importers.
use crate::internal::*;
use std::fmt::Debug;
use std::io::Read;
use std::path::Path;

/// A Framework that translate its model to tract core model.
///
/// The ProtoModel is the parsed representation of the imported model. It does
/// not have to be Protobuf based.
pub trait Framework<ProtoModel, Model>: Send + Sync
where
    ProtoModel: Debug,
    Model: Default,
{
    /// Parse a proto model from a reader.
    fn proto_model_for_read(&self, reader: &mut dyn Read) -> TractResult<ProtoModel>;

    /// Translate a proto model into a model.
    fn model_for_proto_model(&self, proto: &ProtoModel) -> TractResult<Model> {
        self.model_for_proto_model_with_model_template(proto, Model::default())
    }

    /// Translate a proto model into a model, with some symbols already listed.
    fn model_for_proto_model_with_model_template(
        &self,
        proto: &ProtoModel,
        template: Model,
    ) -> TractResult<Model>;

    /// Read a proto model from a filename.
    fn proto_model_for_path(&self, p: impl AsRef<Path>) -> TractResult<ProtoModel> {
        let mut r = std::fs::File::open(p.as_ref())
            .with_context(|| format!("Could not open {:?}", p.as_ref()))?;
        self.proto_model_for_read(&mut r)
    }

    /// Read a model from a reader
    fn model_for_read(&self, r: &mut dyn Read) -> TractResult<Model> {
        let proto_model = self.proto_model_for_read(r).context("Reading proto model")?;
        self.model_for_proto_model(&proto_model).context("Translating proto model to model")
    }

    /// Build a model from a filename.
    fn model_for_path(&self, p: impl AsRef<Path>) -> TractResult<Model> {
        let mut r = std::fs::File::open(p.as_ref())
            .with_context(|| format!("Could not open {:?}", p.as_ref()))?;
        self.model_for_read(&mut r)
    }
}


================================================
FILE: core/src/late_bind.rs
================================================
use crate::prelude::TractResult;
use std::borrow::Cow;

pub trait ResolveTo<Concrete> {
    type Param: ?Sized;
    fn resolve(&self, param: &Self::Param) -> TractResult<Concrete>;
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum GeometryBound<Symbolic, Concrete> {
    Symbolic(Symbolic),
    Concrete(Concrete),
}

impl<S: ResolveTo<C>, C: Clone> GeometryBound<S, C> {
    pub fn is_concrete(&self) -> bool {
        match self {
            GeometryBound::Concrete { .. } => true,
            GeometryBound::Symbolic { .. } => false,
        }
    }

    pub fn into_concrete(self, param: &S::Param) -> TractResult<Self> {
        match self {
            Self::Symbolic(sym) => Ok(Self::Concrete(sym.resolve(param)?)),
            Self::Concrete(conc) => Ok(Self::Concrete(conc)),
        }
    }

    pub fn to_concrete(&self, param: &S::Param) -> TractResult<Cow<'_, C>> {
        match self {
            Self::Symbolic(sym) => Ok(Cow::Owned(sym.resolve(param)?)),
            Self::Concrete(conc) => Ok(Cow::Borrowed(conc)),
        }
    }

    pub fn as_concrete(&self) -> Option<&C> {
        if let Self::Concrete(conc) = self { Some(conc) } else { None }
    }

    pub fn optimize_if(self, param: Option<&S::Param>) -> TractResult<Self> {
        if let Some(param) = param { self.into_concrete(param) } else { Ok(self) }
    }
}

impl<S, C> From<S> for GeometryBound<S, C> {
    fn from(s: S) -> Self {
        GeometryBound::Symbolic(s)
    }
}


================================================
FILE: core/src/lib.rs
================================================
#![allow(clippy::len_zero)]
#![allow(clippy::missing_safety_doc)]
#![allow(clippy::redundant_closure_call)]
//! # Tract
//!
//! Tiny, no-nonsense, self contained, portable TensorFlow and ONNX inference.
//!
//! ## Example
//!
//! ```
//! # extern crate tract_core;
//! # fn main() {
//! use tract_core::internal::*;
//!
//! // build a simple model that just add 3 to each input component
//! let mut model = TypedModel::default();
//!
//! let input_fact = f32::fact(&[3]);
//! let input = model.add_source("input", input_fact).unwrap();
//! let three = model.add_const("three".to_string(), tensor1(&[3f32])).unwrap();
//! let add = model.wire_node("add".to_string(),
//!     tract_core::ops::math::add(),
//!     [input, three].as_ref()
//!     ).unwrap();
//!
//! model.auto_outputs().unwrap();
//!
//! // We build an execution plan. Default inputs and outputs are inferred from
//! // the model graph.
//! let plan = model.into_runnable().unwrap();
//!
//! // run the computation.
//! let input = tensor1(&[1.0f32, 2.5, 5.0]);
//! let mut outputs = plan.run(tvec![input.into()]).unwrap();
//!
//! // take the first and only output tensor
//! let mut tensor = outputs.pop().unwrap();
//!
//! assert_eq!(tensor, tensor1(&[4.0f32, 5.5, 8.0]).into());
//! # }
//! ```
//!
//! While creating a model from Rust code is useful for testing the library,
//! real-life use-cases will usually load a TensorFlow or ONNX model using
//! tract-tensorflow or tract-onnx crates.
//!

#[cfg(feature = "accelerate")]
extern crate accelerate_src;
#[cfg(feature = "blis")]
extern crate blis_src;
#[cfg(feature = "blas")]
extern crate cblas;
#[cfg(feature = "openblas")]
extern crate openblas_src;

extern crate bit_set;
#[macro_use]
extern crate derive_new;
#[macro_use]
pub extern crate downcast_rs;
#[allow(unused_imports)]
#[macro_use]
extern crate log;
#[allow(unused_imports)]
#[macro_use]
pub extern crate ndarray;
#[cfg(test)]
extern crate env_logger;
pub extern crate num_traits;
#[cfg(test)]
extern crate proptest;

pub extern crate tract_data;
pub extern crate tract_linalg;

#[macro_use]
pub mod macros;
#[macro_use]
pub mod ops;

pub mod axes;
pub mod broadcast;
pub mod floats;
pub mod framework;
pub mod model;
pub mod optim;
pub mod plan;
#[macro_use]
pub mod runtime;
#[macro_use]
pub mod transform;
pub mod value;

pub use dyn_clone;

mod late_bind;

/// This prelude is meant for code using tract.
pub mod prelude {
    pub use crate::framework::Framework;
    pub use crate::model::*;
    pub use crate::runtime::{
        FrozenState, RunOptions, Runnable, Runtime, State, runtime_for_name, runtimes,
    };
    pub use crate::value::{IntoTValue, TValue};
    pub use std::sync::Arc;
    pub use tract_data::prelude::*;

    pub use ndarray as tract_ndarray;
    pub use num_traits as tract_num_traits;
    pub use tract_data;
    pub use tract_linalg;
    pub use tract_linalg::multithread;
}

/// This prelude is meant for code extending tract (like implementing new ops).
pub mod internal {
    pub extern crate inventory;
    pub use crate::axes::{AxesMapping, Axis};
    pub use crate::late_bind::*;
    pub use crate::model::*;
    pub use crate::ops::change_axes::*;
    pub use crate::ops::element_wise::ElementWiseMiniOp;
    pub use crate::ops::{Cost, EvalOp, FrozenOpState, Op, OpState, Validation};
    pub use crate::plan::{SessionStateHandler, SimplePlan, SimpleState, TurnState};
    pub use crate::prelude::*;
    pub use crate::runtime::{
        DefaultRuntime, Runnable, Runtime, State, runtime_for_name, runtimes,
    };
    pub use dims;
    pub use downcast_rs as tract_downcast_rs;
    pub use register_model_transform;
    pub use register_runtime;
    pub use register_simple_model_transform;
    pub use std::borrow::Cow;
    pub use std::collections::HashMap;
    pub use std::hash::Hash;
    pub use std::marker::PhantomData;
    pub use tract_data::internal::*;
    pub use tract_data::{
        dispatch_copy, dispatch_datum, dispatch_datum_by_size, dispatch_floatlike, dispatch_numbers,
    };
    pub use tvec;
    pub use {args_1, args_2, args_3, args_4, args_5, args_6, args_7, args_8};
    pub use {as_op, not_a_typed_op, op_as_typed_op};
    pub use {bin_to_super_type, element_wise, element_wise_oop};
    pub use {rule_if, rule_if_let, rule_if_some};
}

#[cfg(test)]
#[allow(dead_code)]
fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}


================================================
FILE: core/src/macros.rs
================================================
#[macro_export]
macro_rules! dims {
    ($($dim:expr),*) => {
        ShapeFact::from(&[$(TDim::from($dim.clone())),*])
    }
}


================================================
FILE: core/src/model/fact.rs
================================================
//! Partial and complete tensor types representations.
use crate::internal::*;
use downcast_rs::Downcast;
use dyn_eq::DynEq;
use std::fmt;
use tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct ShapeFact {
    dims: TVec<TDim>,
    concrete: Option<TVec<usize>>,
}

impl ShapeFact {
    #[inline]
    pub fn rank(&self) -> usize {
        self.dims.len()
    }

    fn compute_concrete(&mut self) {
        assert!(self.dims.iter().all(|d| d.to_isize().map(|d| d >= 0).unwrap_or(true)));
        self.concrete =
            self.dims.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<_>>>().ok()
    }

    /// Shape of the tensor, unless it has symbolic dimensions.
    #[inline]
    pub fn as_concrete(&self) -> Option<&[usize]> {
        self.concrete.as_deref()
    }

    /// Do we have a symbol-less value ?
    #[inline]
    pub fn is_concrete(&self) -> bool {
        self.concrete.is_some()
    }

    /// Convert the shape to an array of extended dimensions.
    #[inline]
    pub fn to_tvec(&self) -> TVec<TDim> {
        self.dims.clone()
    }

    /// Compute the volume of the tensor.
    #[inline]
    pub fn volume(&self) -> TDim {
        self.dims.iter().product()
    }

    #[inline]
    pub fn eval(&self, values: &SymbolValues) -> TractResult<Cow<'_, ShapeFact>> {
        if self.is_concrete() {
            Ok(Cow::Borrowed(self))
        } else {
            Ok(Cow::Owned(self.iter().map(|d| d.eval(values)).collect::<ShapeFact>()))
        }
    }

    #[inline]
    pub fn eval_to_usize(&self, values: &SymbolValues) -> TractResult<Cow<'_, TVec<usize>>> {
        if let Some(c) = &self.concrete {
            Ok(Cow::Borrowed(c))
        } else {
            Ok(Cow::Owned(
                self.iter()
                    .map(|d| d.eval_to_i64(values).map(|d| d as usize))
                    .collect::<TractResult<TVec<_>>>()?,
            ))
        }
    }

    #[inline]
    pub fn eval_to_isize(&self, values: &SymbolValues) -> TractResult<Cow<'_, TVec<isize>>> {
        if let Some(c) = &self.concrete {
            #[allow(unknown_lints, clippy::missing_transmute_annotations)]
            // TVec<usize> -> TVec<isize>
            Ok(unsafe { std::mem::transmute(Cow::Borrowed(c)) })
        } else {
            Ok(Cow::Owned(
                self.iter()
                    .map(|d| d.eval_to_i64(values).map(|d| d as isize))
                    .collect::<TractResult<TVec<_>>>()?,
            ))
        }
    }

    pub fn from_dims<D: ToDim, T: IntoIterator<Item = D>>(it: T) -> ShapeFact {
        let mut dims =
            ShapeFact { dims: it.into_iter().map(|d| d.to_dim()).collect(), concrete: None };
        dims.compute_concrete();
        dims
    }

    pub fn dims(&self) -> &[TDim] {
        self.dims.as_slice()
    }

    pub fn set(&mut self, ix: usize, dim: TDim) {
        self.dims[ix] = dim;
        self.compute_concrete();
    }

    pub fn insert_axis(&mut self, axis: usize) -> TractResult<()> {
        self.dims.insert(axis, 1.into());
        if let Some(concrete) = &mut self.concrete {
            concrete.insert(axis, 1);
        }
        Ok(())
    }

    pub fn remove_axis(&mut self, axis: usize) -> TractResult<()> {
        self.dims.remove(axis);
        if let Some(concrete) = &mut self.concrete {
            concrete.remove(axis);
        } else {
            self.compute_concrete();
        };
        Ok(())
    }

    pub fn compatible_with(&self, _other: &ShapeFact) -> bool {
        if self.rank() == _other.rank() {
            self.dims
                .iter()
                .zip(_other.dims.iter())
                .all(|(dim, other_dim)| dim.compatible_with(other_dim))
        } else {
            false
        }
    }

    pub fn scalar() -> ShapeFact {
        let void: &[usize] = &[];
        Self::from(void)
    }

    pub fn consistent(&self) -> TractResult<()> {
        ensure!(
            self.concrete
                == self.dims.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<_>>>().ok()
        );
        Ok(())
    }
}

impl std::ops::Deref for ShapeFact {
    type Target = [TDim];
    fn deref(&self) -> &[TDim] {
        &self.dims
    }
}

impl<D: ToDim, T: IntoIterator<Item = D>> From<T> for ShapeFact {
    fn from(it: T) -> ShapeFact {
        ShapeFact::from_dims(it)
    }
}

/// Type information about a tensor: shape, and element type, in various state
/// of determination.
pub trait Fact:
    std::fmt::Debug + Downcast + dyn_clone::DynClone + dyn_eq::DynEq + Send + Sync + 'static
{
    fn to_typed_fact(&self) -> TractResult<Cow<'_, TypedFact>>;

    fn matches(&self, t: &Tensor, symbols: Option<&SymbolValues>) -> TractResult<bool> {
        self.to_typed_fact()?.matches(t, symbols)
    }

    /// Ensure that self is same type as another fact or a subtype
    fn compatible_with(&self, _other: &dyn Fact) -> bool;

    fn datum_type(&self) -> Option<DatumType>;
}

impl_downcast!(Fact);
dyn_clone::clone_trait_object!(Fact);
dyn_eq::eq_trait_object!(Fact);

impl<D: ToDim> std::iter::FromIterator<D> for ShapeFact {
    fn from_iter<T: IntoIterator<Item = D>>(iter: T) -> Self {
        ShapeFact::from_dims(iter.into_iter().map(|d| d.to_dim()))
    }
}

impl fmt::Debug for ShapeFact {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        use tract_itertools::Itertools;
        write!(fmt, "{}", self.iter().join(","))
    }
}

impl AsRef<[TDim]> for ShapeFact {
    fn as_ref(&self) -> &[TDim] {
        &self.dims
    }
}

/// Fully determined tensor information for TypedModel.
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct TypedFact {
    /// tensor element type
    pub datum_type: DatumType,
    /// tensor shape
    pub shape: ShapeFact,
    /// optional constant value
    pub konst: Option<Arc<Tensor>>,
    /// optional uniform value
    pub uniform: Option<Arc<Tensor>>,
    /// optional exotic fact
    pub exotic_fact: Option<Box<dyn ExoticFact>>,
    /// Symbolic per-element value as a TDim expression, possibly involving
    /// coordinate symbols 🎯0,🎯1,… and/or model symbols.
    /// `None` means "unknown / not tracked".
    pub uniform_tdim: Option<TDim>,
    /// Boolean TDim expression in coordinate symbols defining which positions
    /// in the tensor are relevant to downstream consumers.
    /// `None` means "all positions matter" (no demand annotation).
    pub region_of_interest: Option<TDim>,
}

impl TypedFact {
    pub fn scalar<T>() -> TypedFact
    where
        T: Datum,
    {
        Self::dt_scalar(T::datum_type())
    }

    pub fn shape<T, S>(shape: S) -> TypedFact
    where
        T: Datum,
        S: Into<ShapeFact>,
    {
        Self::dt_shape(T::datum_type(), shape)
    }

    pub fn shape_and_dt_of(t: &Tensor) -> TypedFact {
        debug_assert!(
            t.is_plain(),
            "shape_and_dt_of called on exotic tensor, exotic_fact will be lost"
        );
        TypedFact {
            datum_type: t.datum_type(),
            shape: ShapeFact::from_dims(t.shape().iter().map(TDim::from)),
            uniform: None,
            konst: None,
            exotic_fact: None,
            uniform_tdim: None,
            region_of_interest: None,
        }
    }

    pub fn mem_size(&self) -> TDim {
        self.shape.volume() * self.datum_type.size_of()
            + self.exotic_fact().iter().flat_map(|it| it.buffer_sizes()).sum::<TDim>()
    }

    pub fn dt_scalar(datum_type: DatumType) -> TypedFact {
        TypedFact {
            datum_type,
            shape: ShapeFact::scalar(),
            konst: None,
            uniform: None,
            exotic_fact: None,
            uniform_tdim: None,
            region_of_interest: None,
        }
    }

    pub fn dt_shape<S>(datum_type: DatumType, shape: S) -> TypedFact
    where
        S: Into<ShapeFact>,
    {
        TypedFact {
            datum_type,
            shape: shape.into(),
            konst: None,
            uniform: None,
            exotic_fact: None,
            uniform_tdim: None,
            region_of_interest: None,
        }
    }

    pub fn rank(&self) -> usize {
        if cfg!(debug_assertions) {
            self.consistent().unwrap();
        }
        self.shape.rank()
    }

    fn format_dt_shape_nocheck(&self) -> String {
        if self.shape.rank() > 0 {
            format!("{:?},{:?}", self.shape, self.datum_type)
        } else {
            format!("{:?}", self.datum_type)
        }
    }

    pub fn format_dt_shape(&self) -> String {
        if cfg!(debug_assertions) {
            self.consistent().unwrap()
        }
        self.format_dt_shape_nocheck()
    }

    pub fn consistent(&self) -> TractResult<()> {
        self.shape.consistent()?;
        if let Some(k) = &self.konst {
            if !self.matches(k.as_ref(), None)? {
                bail!("fact says {}, constant is {:?}", self.format_dt_shape_nocheck(), k);
            }
            if let Some(bqf) = self.exotic_fact().and_then(|of| of.downcast_ref::<BlockQuantFact>())
            {
                if let Some(bqs) = k.storage_as::<BlockQuantStorage>() {
                    let inner_bqf =
                        BlockQuantFact::new(dyn_clone::clone_box(bqs.format()), k.shape().into());
                    ensure!(&inner_bqf == bqf, "BlockQuantStorage fact mismatch");
                }
            }
        }
        if let Some(u) = &self.uniform
            && self.datum_type != u.datum_type()
        {
            bail!("fact as uniform value {:?}, but is of type {:?}", u, self.datum_type);
        }
        if let (Some(u), Some(k)) = (self.uniform.as_deref(), self.konst.as_deref()) {
            if let Some(k) = k.as_uniform() {
                if &k != u {
                    bail!(
                        "Uniform value and uniform constant mismatch: value:{u:?}, uniform:{k:?}",
                    );
                }
            } else {
                bail!("Fact said to be uniform ({:?}) and equal to {:?} which is not.", u, k);
            }
        }
        Ok(())
    }

    pub fn without_value(&self) -> Self {
        let mut new = self.clone();
        new.konst = None;
        new.uniform = None;
        new.uniform_tdim = None;
        new.region_of_interest = None;
        new
    }

    pub fn with_exotic_fact<O: Into<Box<dyn ExoticFact>>>(mut self, exotic_fact: O) -> Self {
        self.exotic_fact = Some(exotic_fact.into());
        self
    }

    pub fn exotic_fact(&self) -> Option<&dyn ExoticFact> {
        self.exotic_fact.as_deref()
    }

    #[inline]
    pub fn is_exotic(&self) -> bool {
        self.exotic_fact.is_some()
    }

    #[inline]
    pub fn is_plain(&self) -> bool {
        self.exotic_fact.is_none()
    }
}

impl Fact for TypedFact {
    fn to_typed_fact(&self) -> TractResult<Cow<'_, TypedFact>> {
        if cfg!(debug_assertions) {
            self.consistent()?
        }
        Ok(Cow::Borrowed(self))
    }

    fn matches(&self, t: &Tensor, symbols: Option<&SymbolValues>) -> TractResult<bool> {
        if self.datum_type != t.datum_type() || self.shape.len() != t.rank() {
            return Ok(false);
        }
        for i in 0..t.rank() {
            if let Ok(dim) =
                self.shape[i].eval(symbols.unwrap_or(&SymbolValues::default())).to_usize()
                && dim != t.shape()[i]
            {
                return Ok(false);
            }
        }
        Ok(true)
    }

    fn compatible_with(&self, other: &dyn Fact) -> bool {
        if cfg!(debug_assertions) {
            self.consistent().unwrap()
        }
        if let Some(other) = other.downcast_ref::<Self>() {
            if cfg!(debug_assertions) {
                other.consistent().unwrap()
            }
            self.datum_type == other.datum_type
                && self.shape.compatible_with(&other.shape)
                && self
                    .exotic_fact()
                    .zip(other.exotic_fact())
                    .map(|(a, b)| a.compatible_with(b))
                    .unwrap_or(true)
        } else {
            false
        }
    }

    fn datum_type(&self) -> Option<DatumType> {
        Some(self.datum_type)
    }
}

impl TryFrom<Tensor> for TypedFact {
    type Error = TractError;
    fn try_from(t: Tensor) -> TractResult<TypedFact> {
        TypedFact::try_from(t.into_arc_tensor())
    }
}

impl TryFrom<Arc<Tensor>> for TypedFact {
    type Error = TractError;
    fn try_from(t: Arc<Tensor>) -> TractResult<TypedFact> {
        let exotic_fact = t.exotic_fact()?;
        let uniform_tdim = if t.datum_type() == TDim::datum_type() && t.len() == 1 {
            t.try_as_plain().ok().and_then(|d| d.as_slice::<TDim>().ok()).map(|s| s[0].clone())
        } else if t.len() == 1
            && t.try_as_plain().is_ok()
            && (t.datum_type().is_integer() || t.datum_type().is::<bool>())
        {
            t.cast_to_scalar::<i64>().ok().map(TDim::Val)
        } else {
            None
        };
        Ok(TypedFact {
            datum_type: t.datum_type(),
            shape: ShapeFact::from_dims(t.shape().iter().map(TDim::from)),
            uniform: t.as_uniform().map(Arc::new),
            exotic_fact,
            konst: Some(t),
            uniform_tdim,
            region_of_interest: None,
        })
    }
}

impl From<&TypedFact> for TypedFact {
    fn from(fact: &TypedFact) -> TypedFact {
        fact.clone()
    }
}

impl<'a> TryFrom<&'a Arc<Tensor>> for TypedFact {
    type Error = TractError;
    fn try_from(t: &'a Arc<Tensor>) -> TractResult<TypedFact> {
        TypedFact::try_from(Arc::clone(t))
    }
}

impl fmt::Debug for TypedFact {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{:?},{:?}", self.shape, self.datum_type)?;
        if self.is_exotic() {
            if let Some(of) = &self.exotic_fact {
                write!(fmt, " 🔍 {of:?} ")?
            } else {
                write!(fmt, " 🔍 <no exotic fact> ")?
            }
        }
        if let Some(k) = &self.konst {
            write!(fmt, "🟰 {k:?}")?
        }
        if let Some(u) = &self.uniform {
            write!(fmt, " ◻️{u:?}")?
        }
        if let Some(u) = &self.uniform_tdim {
            write!(fmt, " 📐{u}")?
        }
        if let Some(r) = &self.region_of_interest {
            write!(fmt, " 🬳 {r}")?
        }
        Ok(())
    }
}

pub trait DatumExt {
    fn scalar_fact() -> TypedFact;
    fn fact<S>(shape: S) -> TypedFact
    where
        S: Into<ShapeFact>;
}

impl<T: Datum> DatumExt for T {
    #[allow(clippy::needless_borrow)]
    fn scalar_fact() -> TypedFact {
        TypedFact::shape::<Self, &[usize]>(&[])
    }

    fn fact<S>(shape: S) -> TypedFact
    where
        S: Into<ShapeFact>,
    {
        TypedFact::shape::<Self, _>(shape)
    }
}

pub trait DatumTypeExt {
    fn scalar_fact(&self) -> TypedFact;
    fn fact<S>(&self, shape: S) -> TypedFact
    where
        S: Into<ShapeFact>;
}

impl DatumTypeExt for DatumType {
    #[allow(clippy::needless_borrow)]
    fn scalar_fact(&self) -> TypedFact {
        TypedFact::dt_shape::<&[usize]>(*self, &[])
    }

    fn fact<S>(&self, shape: S) -> TypedFact
    where
        S: Into<ShapeFact>,
    {
        TypedFact::dt_shape(*self, shape)
    }
}


================================================
FILE: core/src/model/graph.rs
================================================
use super::*;
use crate::internal::*;
use crate::ops::Op;
use crate::prelude::*;
use crate::runtime::RunOptions;

use std::fmt;
use tract_data::internal::*;
use tract_itertools::Itertools;

pub trait SpecialOps<F, O> {
    fn create_dummy(&self) -> O;
    fn create_source(&self, fact: F) -> O;
    fn is_source(op: &O) -> bool;
    fn wire_node(
        &mut self,
        name: impl Into<String>,
        op: impl Into<O>,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>>;
    fn add_const(
        &mut self,
        name: impl Into<String>,
        v: impl IntoArcTensor,
    ) -> TractResult<OutletId>;
}

/// Main model class
///
/// Parameterized by a Fact class.
#[derive(Clone, Debug)]
pub struct Graph<F, O>
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    /// all nodes in the model
    pub nodes: Vec<Node<F, O>>,
    /// model inputs
    pub inputs: Vec<OutletId>,
    /// model outputs
    pub outputs: Vec<OutletId>,
    /// outlet labels
    pub outlet_labels: HashMap<OutletId, String>,
    /// model properties
    pub properties: HashMap<String, Arc<Tensor>>,
    /// symbol scope, including table
    pub symbols: SymbolScope,
}

impl<F, O> Default for Graph<F, O>
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    fn default() -> Graph<F, O> {
        Graph {
            nodes: vec![],
            inputs: vec![],
            outputs: vec![],
            outlet_labels: HashMap::new(),
            properties: HashMap::new(),
            symbols: Default::default(),
        }
    }
}

impl<F, O> Graph<F, O>
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    Graph<F, O>: SpecialOps<F, O>,
{
    pub fn add_source(&mut self, name: impl Into<String>, fact: F) -> TractResult<OutletId> {
        let source = self.create_source(fact.clone());
        let id = self.add_node(name, source, tvec!(fact))?;
        let id = OutletId::new(id, 0);
        self.inputs.push(id);
        Ok(id)
    }
}

impl<F, O> Graph<F, O>
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    pub fn add_node(
        &mut self,
        name: impl Into<String>,
        op: impl Into<O>,
        output_facts: TVec<F>,
    ) -> TractResult<usize> {
        let op = op.into();
        let name = name.into();
        let id = self.nodes.len();
        let outputs =
            output_facts.into_iter().map(|fact| Outlet { fact, successors: tvec!() }).collect();
        let node = Node { id, name, op, inputs: vec![], outputs };
        self.nodes.push(node);
        Ok(id)
    }

    /// Connect a node outlet to a node inlet.
    pub fn add_edge(&mut self, outlet: OutletId, inlet: InletId) -> TractResult<()> {
        if let Some(previous) = self.nodes[inlet.node].inputs.get(inlet.slot).cloned() {
            self.nodes[previous.node].outputs[previous.slot]
                .successors
                .retain(|&mut succ| succ != inlet);
        }
        {
            let prec = &mut self.nodes[outlet.node];
            prec.outputs[outlet.slot].successors.push(inlet);
        }
        let succ = &mut self.nodes[inlet.node];
        #[allow(clippy::comparison_chain)]
        if inlet.slot == succ.inputs.len() {
            succ.inputs.push(outlet);
        } else if inlet.slot < succ.inputs.len() {
            succ.inputs[inlet.slot] = outlet;
        } else {
            bail!(
                "Edges must be added in order and consecutive. Trying to connect input {:?} of node {:?} ",
                inlet.slot,
                succ
            )
        }
        Ok(())
    }

    // Inputs

    /// Get model inputs.
    pub fn input_outlets(&self) -> TractResult<&[OutletId]> {
        Ok(&self.inputs)
    }

    /// Change model inputs.
    pub fn set_input_outlets(&mut self, inputs: &[OutletId]) -> TractResult<()> {
        self.inputs = inputs.to_vec();
        Ok(())
    }

    /// Change model inputs and return `self`.
    pub fn with_input_outlets(mut self, inputs: &[OutletId]) -> TractResult<Self> {
        self.set_input_outlets(inputs)?;
        Ok(self)
    }

    /// Set model inputs by the node name.
    pub fn set_input_names(
        &mut self,
        inputs: impl IntoIterator<Item = impl AsRef<str>>,
    ) -> TractResult<()> {
        let mut ids = vec![];
        for i in inputs.into_iter() {
            let node = self.node_by_name(&i)?;
            for o in 0..node.outputs.len() {
                ids.push(OutletId::new(node.id, o))
            }
        }
        self.inputs = ids;
        Ok(())
    }

    /// Set model inputs by the node name and return `self`.
    pub fn with_input_names(
        mut self,
        inputs: impl IntoIterator<Item = impl AsRef<str>>,
    ) -> TractResult<Self> {
        self.set_input_names(inputs)?;
        Ok(self)
    }

    /// Get the `ix`-th input tensor type information.
    pub fn input_fact(&self, ix: usize) -> TractResult<&F> {
        let input = self.input_outlets()?[ix];
        self.outlet_fact(input)
    }

    /// Get the `ix`-th input tensor type information, mutably.
    pub fn input_fact_mut(&mut self, ix: usize) -> TractResult<&mut F> {
        let input = self.input_outlets()?[ix];
        self.outlet_fact_mut(input)
    }

    /// Set the `ix`-th input tensor type information.
    pub fn set_input_fact(&mut self, input: usize, fact: F) -> TractResult<()> {
        let outlet = self.inputs[input];
        self.set_outlet_fact(outlet, fact)
    }

    /// Set the `ix`-th input tensor type information and return `self`.
    pub fn with_input_fact(mut self, input: usize, fact: F) -> TractResult<Self> {
        self.set_input_fact(input, fact)?;
        Ok(self)
    }

    // Outputs
    /// Get model outputs.
    pub fn output_outlets(&self) -> TractResult<&[OutletId]> {
        Ok(&self.outputs)
    }

    /// Guess outputs from the topology: node or nodes with no successors.
    pub fn auto_outputs(&mut self) -> TractResult<()> {
        let outputs = self
            .nodes
            .iter()
            .flat_map(|n| {
                let id = n.id;
                n.outputs.iter().enumerate().map(move |(ix, output_fact)| {
                    (OutletId::new(id, ix), output_fact.successors.len())
                })
            })
            .filter(|(_f, succs)| *succs == 0)
            .map(|(f, _)| f)
            .collect();
        self.outputs = outputs;
        Ok(())
    }

    /// Change model outputs.
    pub fn select_output_outlets(&mut self, outputs: &[OutletId]) -> TractResult<()> {
        self.outputs = outputs.to_vec();
        Ok(())
    }

    /// Change model outputs and return `self`.
    pub fn with_output_outlets(mut self, outputs: &[OutletId]) -> TractResult<Self> {
        self.select_output_outlets(outputs)?;
        Ok(self)
    }

    /// Set model outputs by node names.
    pub fn select_outputs_by_name(
        &mut self,
        outputs: impl IntoIterator<Item = impl AsRef<str>>,
    ) -> TractResult<()> {
        let mut labels: HashMap<StaticName, OutletId> =
            self.outlet_labels.iter().map(|(o, s)| (Cow::Owned((*s).to_string()), *o)).collect();
        for n in self.nodes() {
            for ix in 0..n.outputs.len() {
                labels.insert(Cow::Owned(format!("{}:{}", &n.name, ix)), OutletId::new(n.id, ix));
            }
        }
        let ids: Vec<OutletId> = outputs
            .into_iter()
            .map(|s| {
                let s = s.as_ref();
                labels
                    .get(s)
                    .cloned()
                    .or_else(|| self.nodes.iter().find(|n| n.name == s).map(|n| n.id.into()))
                    .ok_or_else(|| format_err!("Node {} not found", s))
            })
            .collect::<TractResult<_>>()?;
        self.outputs = ids;
        Ok(())
    }

    /// Set model outputs by node names and return `self`.
    pub fn with_outputs_by_name(
        mut self,
        outputs: impl IntoIterator<Item = impl AsRef<str>>,
    ) -> TractResult<Self> {
        self.select_outputs_by_name(outputs)?;
        Ok(self)
    }

    /// Get the `ix`-th input tensor type information.
    pub fn output_fact(&self, ix: usize) -> TractResult<&F> {
        let output = self.output_outlets()?[ix];
        self.outlet_fact(output)
    }

    /// Get the `ix`-th input tensor type information, mutably.
    pub fn output_fact_mut(&mut self, ix: usize) -> TractResult<&mut F> {
        let output = self.output_outlets()?[ix];
        self.outlet_fact_mut(output)
    }

    /// Set the `ix`-th output tensor type information.
    pub fn set_output_fact(&mut self, output: usize, fact: F) -> TractResult<()> {
        let outlet = self.outputs[output];
        self.set_outlet_fact(outlet, fact)
    }

    /// Set the `ix`-th output tensor type information and return `self`.
    pub fn with_output_fact(mut self, output: usize, fact: F) -> TractResult<Self> {
        self.set_output_fact(output, fact)?;
        Ok(self)
    }

    // nodes and their facts

    /// Iterate over all node names.
    pub fn node_names(&self) -> impl Iterator<Item = &str> {
        self.nodes.iter().map(|s| &*s.name)
    }

    pub fn node_id_by_name(&self, name: &str) -> TractResult<usize> {
        self.nodes
            .iter()
            .find(|n| n.name == name)
            .map(|n| n.id)
            .with_context(|| format!("No node found for name: \"{name}\""))
    }

    /// Find a node by its name.
    pub fn node_by_name(&self, name: impl AsRef<str>) -> TractResult<&Node<F, O>> {
        let id: usize = self.node_id_by_name(name.as_ref())?;
        Ok(&self.nodes[id])
    }

    /// Borrow mutably a node by its name.
    pub fn node_by_name_mut(&mut self, name: impl AsRef<str>) -> TractResult<&mut Node<F, O>> {
        let id: usize = self.node_id_by_name(name.as_ref())?;
        Ok(&mut self.nodes[id])
    }

    pub fn rename_node(&mut self, id: usize, name: &str) -> TractResult<()> {
        self.node_mut(id).name = name.to_string();
        Ok(())
    }

    /// Find a node by its id.
    pub fn node(&self, id: usize) -> &Node<F, O> {
        &self.nodes[id]
    }

    /// Find a node by its id.
    pub fn node_mut(&mut self, id: usize) -> &mut Node<F, O> {
        &mut self.nodes[id]
    }

    /// Access the nodes table.
    pub fn nodes(&self) -> &[Node<F, O>] {
        &self.nodes
    }

    /// Access the nodes table.
    pub fn nodes_mut(&mut self) -> &mut [Node<F, O>] {
        &mut self.nodes
    }

    /// Get input and output tensor information for a node.
    pub fn node_facts(&self, id: usize) -> TractResult<(TVec<&F>, TVec<&F>)> {
        Ok((self.node_input_facts(id)?, self.node_output_facts(id)?))
    }

    /// Get input tensor information for a node.
    pub fn node_input_facts(&self, node_id: usize) -> TractResult<TVec<&F>> {
        self.nodes[node_id].inputs.iter().map(|o| self.outlet_fact(*o)).collect()
    }

    /// Get output tensor information for a node.
    pub fn node_output_facts(&self, node_id: usize) -> TractResult<TVec<&F>> {
        Ok(self.nodes[node_id].outputs.iter().map(|o| &o.fact).collect())
    }

    // outlets

    /// Get tensor information for a single outlet.
    pub fn outlet_fact(&self, outlet: OutletId) -> TractResult<&F> {
        ensure!(outlet.node < self.nodes.len(), "Invalid outlet for graph");
        let outlets = &self.nodes[outlet.node].outputs;
        outlets
            .get(outlet.slot)
            .map(|o| &o.fact)
            .with_context(|| format!("Invalid outlet reference: {outlet:?}"))
    }

    /// Get tensor information for a single outlet.
    pub fn outlet_fact_mut(&mut self, outlet: OutletId) -> TractResult<&mut F> {
        let outlets = &mut self.nodes[outlet.node].outputs;
        outlets
            .get_mut(outlet.slot)
            .map(|o| &mut o.fact)
            .with_context(|| format!("Invalid outlet reference: {outlet:?}"))
    }

    /// Get multiple mutable tensor information for outlets.
    pub fn outlets_fact_mut(&mut self, outlets: &[OutletId]) -> TractResult<TVec<&mut F>> {
        assert!(outlets.iter().tuple_combinations().all(|(a, b)| a != b));
        unsafe {
            outlets
                .iter()
                .map(|o| Ok((self.outlet_fact(*o)? as *const F as *mut F).as_mut().unwrap()))
                .collect()
        }
    }

    /// Set tensor information for a single outlet.
    pub fn set_outlet_fact(&mut self, outlet: OutletId, fact: F) -> TractResult<()> {
        let outlets = &mut self.nodes[outlet.node].outputs;
        if outlets.len() <= outlet.slot {
            bail!("Invalid outlet refererence: {:?}", outlet)
        }
        outlets[outlet.slot].fact = fact;
        Ok(())
    }

    /// Set tensor information for a single outlet and return `self`.
    pub fn with_outlet_fact(mut self, outlet: OutletId, fact: F) -> TractResult<Self> {
        self.set_outlet_fact(outlet, fact)?;
        Ok(self)
    }

    // outlet labels

    /// Get label for an outlet.
    pub fn outlet_label(&self, outlet: OutletId) -> Option<&str> {
        self.outlet_labels.get(&outlet).map(|s| &**s)
    }

    /// Set label for an outlet.
    pub fn set_outlet_label(&mut self, outlet: OutletId, label: String) -> TractResult<()> {
        self.outlet_labels.insert(outlet, label);
        Ok(())
    }

    /// Set label for an outlet and return `self`.
    pub fn with_outlet_label(mut self, outlet: OutletId, label: String) -> TractResult<Self> {
        self.set_outlet_label(outlet, label)?;
        Ok(self)
    }

    /// Find outlet by label.
    pub fn find_outlet_label(&self, label: &str) -> Option<OutletId> {
        self.outlet_labels.iter().find(|(_k, v)| **v == label).map(|(k, _v)| *k)
    }

    // misc

    /// Computes an evalutation order for the graph inputs and outputs
    pub fn eval_order(&self) -> TractResult<Vec<usize>> {
        super::order::eval_order(self)
    }

    /// Computes an evalutation order for the graph inputs and outputs. This order will minimize
    /// temporary buffers.
    pub fn eval_order_opt_ram(&self) -> TractResult<Vec<usize>> {
        super::order::eval_order_opt_ram(self)
    }

    #[cfg(not(all(debug_assertions, feature = "paranoid_assertions")))]
    #[inline]
    pub fn check_edges(&self) -> TractResult<()> {
        Ok(())
    }

    /// Performs a sanity check on network connections.
    #[cfg(all(debug_assertions, feature = "paranoid_assertions"))]
    pub fn check_edges(&self) -> TractResult<()> {
        for node_id in self.eval_order()? {
            let node = &self.nodes[node_id];
            for (ix, input) in node.inputs.iter().enumerate() {
                let prec = &self.nodes[input.node];
                if !prec.outputs[input.slot].successors.contains(&InletId::new(node.id, ix)) {
                    bail!(
                        "Mismatched oncoming edge, node:{} input:{} to {:?} not reciprocated",
                        node.id,
                        ix,
                        prec
                    )
                }
            }
            for (ix, output) in node.outputs.iter().enumerate() {
                for succ in &output.successors {
                    if self.nodes[succ.node].inputs[succ.slot] != OutletId::new(node.id, ix) {
                        bail!(
                            "Mismatched outgoing edge, node:{} output:{} to {:?} not reciprocated",
                            node.id,
                            ix,
                            succ
                        )
                    }
                }
            }
        }
        Ok(())
    }

    /// Evaluate temporary memory usage with its related node at each step of the given order.
    pub fn eval_tmp_memory_usage<Flushable>(
        &self,
        order: &[usize],
        flushable: Flushable,
    ) -> TractResult<TVec<(usize, TDim)>>
    where
        Flushable: Fn(&Node<F, O>) -> bool,
    {
        super::memory::eval_tmp_memory_usage(self, order, flushable)
    }

    #[cfg(not(all(debug_assertions, feature = "paranoid_assertions")))]
    #[inline]
    pub fn check_names(&self) -> TractResult<()> {
        Ok(())
    }

    /// Performs a sanity check on network connections.
    #[cfg(all(debug_assertions, feature = "paranoid_assertions"))]
    pub fn check_names(&self) -> TractResult<()> {
        let dups =
            self.eval_order()?.iter().map(|n| &self.nodes[*n].name).duplicates().collect_vec();
        ensure!(dups.len() == 0, "Duplicate node name(s) : {:?}\n{}", dups, &self);
        Ok(())
    }

    // Converts the model into a `RunnableModel` to actually process user data.
    // pub fn into_runnable(self) -> TractResult<Arc<RunnableModel<F, O>>> {
    //     crate::plan::SimplePlan::new_with_options(self, &PlanOptions::default())
    // }

    /// Converts the model into a `RunnableModel` to actually process user data. This variant
    /// accepts options.
    pub fn into_runnable_with_options(
        self,
        options: &RunOptions,
    ) -> TractResult<Arc<RunnableModel<F, O>>> {
        crate::plan::SimplePlan::new_with_options(self, options)
    }

    pub fn linear_prec(&self, id: usize) -> TractResult<Option<&Node<F, O>>> {
        let node = &self.nodes()[id];
        rule_if!(node.inputs.len() == 1);
        let prec = &self.nodes()[node.inputs[0].node];
        rule_if!(prec.outputs.iter().map(|of| of.successors.len()).sum::<usize>() == 1);
        Ok(Some(prec))
    }

    pub fn single_prec(&self, id: usize) -> TractResult<Option<&Node<F, O>>> {
        let node = &self.nodes()[id];
        rule_if!(node.inputs.len() == 1);
        let prec = &self.nodes()[node.inputs[0].node];
        Ok(Some(prec))
    }

    pub fn all_prec(&self, id: usize) -> TractResult<Option<TVec<&Node<F, O>>>> {
        let node = &self.nodes()[id];
        rule_if!(node.inputs.len() > 0);
        Ok(Some(node.inputs.iter().map(|n| &self.nodes()[n.node]).collect()))
    }

    /// linear_succ is only intended for optimisation of simple operators
    /// with 1 output, and only 1 output successors (successor with only 1 input)
    pub fn linear_succ(&self, id: usize) -> TractResult<Option<&Node<F, O>>> {
        let node = &self.nodes()[id];

        rule_if!(node.outputs.len() == 1);
        rule_if!(node.outputs[0].successors.len() == 1);
        let succ = node.outputs[0].successors[0];
        let succ = &self.nodes()[succ.node];
        rule_if!(succ.inputs.len() == 1);
        Ok(Some(succ))
    }

    pub fn single_succ(&self, id: usize) -> TractResult<Option<&Node<F, O>>> {
        let node = &self.nodes()[id];

        rule_if!(node.outputs.len() == 1);
        rule_if!(node.outputs[0].successors.len() == 1);
        let succ = node.outputs[0].successors[0];
        Ok(Some(&self.nodes()[succ.node]))
    }

    pub fn all_succ(&self, id: usize) -> TractResult<Option<TVec<&Node<F, O>>>> {
        let node = &self.nodes()[id];
        rule_if!(!node.outputs.is_empty());

        Ok(Some(
            node.outputs
                .iter()
                .flat_map(|o| {
                    o.successors.iter().map(|succ| &self.nodes()[succ.node]).collect::<Vec<_>>()
                })
                .collect(),
        ))
    }

    pub fn outlet_successors(&self, outlet: OutletId) -> &[InletId] {
        &self.nodes[outlet.node].outputs[outlet.slot].successors
    }

    /// retrieve of create a symbol
    pub fn sym(&self, s: &str) -> Symbol {
        self.symbols.sym(s)
    }

    /// create a new symbol with the prefix
    pub fn new_sym_with_prefix(&self, prefix: &str) -> Symbol {
        self.symbols.new_with_prefix(prefix)
    }

    /// generates a name for a new node in the model that will not conflict (by suffixing with a
    /// dot and number)
    pub fn unique_name<'n>(&self, prefix: impl Into<Cow<'n, str>>) -> Cow<'n, str> {
        let prefix = prefix.into();
        if self.nodes.iter().all(|n| n.name != *prefix) {
            return prefix;
        }
        for i in 1.. {
            let s = format!("{prefix}.{i}");
            if self.nodes.iter().all(|n| n.name != s) {
                return Cow::Owned(s);
            }
        }
        unreachable!();
    }
}

impl<F, O> fmt::Display for Graph<F, O>
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        for i in 0..self.nodes.len() {
            let input_1 =
                self.nodes[i].inputs.first().map(|o| format!("{o:?}")).unwrap_or_default();
            let input_2 = self.nodes[i].inputs.get(1).map(|o| format!("{o:?}")).unwrap_or_default();
            let successors = self.nodes[i]
                .outputs
                .first()
                .iter()
                .flat_map(|o| o.successors.iter())
                .collect_vec();
            let output_1 = successors.first().map(|o| format!("{o:?}")).unwrap_or_default();
            let output_2 = successors.get(1).map(|o| format!("{o:?}")).unwrap_or_default();
            writeln!(
                fmt,
                "{:5} | {:8} {:8} -> {:8} {:8} | {:25} {:50} {} => {}",
                i,
                input_1,
                input_2,
                output_1,
                output_2,
                self.nodes[i].op().name(),
                self.nodes[i].name,
                self.node_input_facts(i).unwrap().iter().map(|f| format!("{f:?}")).join(" ; "),
                self.node_output_facts(i).unwrap().iter().map(|f| format!("{f:?}")).join(" ; "),
            )?;
            if self.nodes[i].inputs.len() > 2 {
                writeln!(
                    fmt,
                    "                                               |   * inputs: {}",
                    self.nodes[i].inputs.iter().map(|s| format!("{s:?}")).join(", ")
                )?;
            }
            if self.nodes[i].outputs.len() > 1
                || successors.len() > 2
                || (self.outlet_label(i.into()).is_some()
                    && self.outlet_label(i.into()).unwrap() != self.nodes[i].name)
            {
                for o in 0..self.nodes[i].outputs.len() {
                    if self.outlet_successors((i, o).into()).len() > 0 {
                        writeln!(
                            fmt,
                            "                                               |   * output #{}: {} {}",
                            o,
                            self.outlet_label((i, o).into()).unwrap_or(""),
                            self.outlet_successors((i, o).into())
                                .iter()
                                .map(|s| format!("{s:?}"))
                                .join(", "),
                        )?;
                    }
                }
            }
        }
        writeln!(fmt, "outputs: {}", self.outputs.iter().map(|o| format!("{o:?}")).join(", "))?;
        Ok(())
    }
}

impl<F, O> Graph<F, O>
where
    F: Fact + Clone + 'static + for<'a> std::convert::From<&'a F>,
    O: std::fmt::Display
        + std::fmt::Debug
        + Clone
        + AsRef<dyn Op>
        + AsMut<dyn Op>
        + Clone
        + 'static
        + for<'a> std::convert::From<&'a O>,
    Graph<F, O>: SpecialOps<F, O>,
{
    #[cfg(debug_assertions)]
    pub fn check_compact(&self) -> TractResult<()> {
        let order = self.eval_order()?;
        let useless_sources = self
            .input_outlets()?
            .iter()
            .filter(|io| {
                self.outlet_successors(**io).len() == 0
                    && !self.output_outlets().unwrap().contains(io)
            })
            .count();
        if order.len() + useless_sources != self.nodes.len() {
            bail!(
                "Eval order is {} long, nodes are {}, including {} unused sources",
                order.len(),
                self.nodes.len(),
                useless_sources
            );
        }
        if (0..order.len()).any(|ix| order[ix] != ix) {
            bail!("eval order is not trivial");
        }
        let mut seen = std::collections::HashSet::new();
        for (ix, n) in self.nodes.iter().enumerate() {
            if ix != n.id {
                bail!("Invalid node id: position is {}, node is {}", ix, n);
            }
            if seen.contains(&n.name) {
                bail!("duplicate name for node {n}");
            }
            seen.insert(&n.name);
        }
        Ok(())
    }

    pub fn compact(&mut self) -> TractResult<()> {
        let mut order = self.eval_order()?;
        if order.len() == self.nodes.len() && order.iter().enumerate().all(|(a, b)| a == *b) {
            return Ok(());
        }
        for i in &self.inputs {
            if !order.contains(&i.node) {
                order.push(i.node);
            }
        }
        let mut old_to_new = vec![usize::MAX; self.nodes.len()];
        let mut new_nodes = vec![
            Node {
                id: self.nodes.len(),
                name: "".to_string(),
                inputs: vec![],
                op: self.create_dummy(),
                outputs: tvec!(),
            };
            order.len()
        ];
        for (ix, id) in order.iter().enumerate() {
            old_to_new[*id] = ix;
            std::mem::swap(&mut new_nodes[ix], &mut self.nodes[*id]);
        }
        for node in &mut new_nodes {
            if self.inputs.iter().any(|n| n.node == node.id) && !Self::is_source(&node.op) {
                node.inputs.clear();
                node.op = self.create_source(node.outputs[0].fact.clone());
            }
            node.id = old_to_new[node.id];
            for input in &mut node.inputs {
                assert!(old_to_new[input.node] < order.len());
                input.node = old_to_new[input.node];
            }
            for output in &mut node.outputs {
                for succ in &mut output.successors {
                    succ.node = old_to_new[succ.node];
                }
                output.successors.retain(|s| s.node < order.len());
                output.successors.sort();
            }
        }
        self.nodes = new_nodes;
        for input in &mut self.inputs {
            assert!(old_to_new[input.node] < order.len());
            input.node = old_to_new[input.node];
        }
        for output in &mut self.outputs {
            assert!(old_to_new[output.node] < order.len());
            output.node = old_to_new[output.node];
        }
        self.outlet_labels = std::mem::take(&mut self.outlet_labels)
            .into_iter()
            .map(|(k, v)| (OutletId::new(old_to_new[k.node], k.slot), v))
            .filter(|(k, _)| k.node < order.len())
            .collect();
        ensure!(self.nodes.iter().enumerate().all(|(ix, n)| n.id == ix));
        #[cfg(debug_assertions)]
        {
            self.check_compact().context("after graph compaction")?;
        }
        Ok(())
    }

    pub fn into_compact(mut self) -> TractResult<Self> {
        self.compact()?;
        Ok(self)
    }
}

pub trait IntoRunnable<F, O>
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    fn into_runnable(self) -> TractResult<Arc<RunnableModel<F, O>>>;
}

impl<G, F, O> IntoRunnable<F, O> for G
where
    F: Fact + Clone + 'static,
    O: fmt::Debug + fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    G: Into<Arc<Graph<F, O>>>,
{
    fn into_runnable(self) -> TractResult<Arc<RunnableModel<F, O>>> {
        SimplePlan::new(self)
    }
}


================================================
FILE: core/src/model/helpers.rs
================================================
use crate::ops::binary::{BinMiniOp, TypedBinOp};
use crate::ops::konst::Const;
use crate::prelude::*;
use tract_data::internal::Approximation;

pub trait TypedModelHelpers {
    fn next_node(&self, node: &TypedNode) -> Option<&TypedNode>;
    fn previous_node(&self, node: &TypedNode) -> Option<&TypedNode>;
    fn previous_nodes(&self, node: &TypedNode) -> TVec<&TypedNode>;
    fn collect_const_inputs<'a>(&'a self, node: &TypedNode) -> TVec<&'a Const>;
    fn single_prev_node_as<O: TypedOp>(&self, node: &TypedNode) -> Option<(usize, &TypedNode)>;
    fn matches_single_input_const(&self, node: &TypedNode, konst: f32) -> bool;
    fn find_succ_bin_with_const<B: BinMiniOp>(
        &self,
        node: &TypedNode,
        konst: f32,
    ) -> Option<&TypedNode>;
    fn find_succ_bin_with_outlet<B: BinMiniOp>(
        &self,
        node: &TypedNode,
        outlet_id: &OutletId,
    ) -> Option<&TypedNode>;
}

impl TypedModelHelpers for TypedModel {
    fn next_node(&self, node: &TypedNode) -> Option<&TypedNode> {
        if node.outputs.iter().map(|of| of.successors.len()).sum::<usize>() != 1 {
            return None;
        }
        let succ = node.outputs[0].successors[0];
        Some(&self.nodes()[succ.node])
    }

    fn previous_node(&self, node: &TypedNode) -> Option<&TypedNode> {
        if node.inputs.len() != 1 {
            return None;
        }
        Some(&self.nodes()[node.inputs[0].node])
    }

    fn previous_nodes(&self, node: &TypedNode) -> TVec<&TypedNode> {
        node.inputs.iter().map(|n| &self.nodes()[n.node]).collect()
    }

    fn collect_const_inputs<'a>(&'a self, node: &TypedNode) -> TVec<&'a Const> {
        node.inputs
            .iter()
            .filter_map(|i| {
                let prec = &self.nodes()[i.node];
                prec.op_as::<Const>()
            })
            .collect::<TVec<_>>()
    }

    fn single_prev_node_as<O: TypedOp>(&self, node: &TypedNode) -> Option<(usize, &TypedNode)> {
        let prev_nodes = node
            .inputs
            .iter()
            .enumerate()
            .filter_map(|(in_idx, i)| {
                let prec = &self.nodes()[i.node];
                prec.op_is::<O>().then_some((in_idx, prec))
            })
            .collect::<TVec<_>>();

        if prev_nodes.len() != 1 { None } else { Some(prev_nodes[0]) }
    }

    fn matches_single_input_const(&self, node: &TypedNode, konst: f32) -> bool {
        let consts = self.collect_const_inputs(node);
        if consts.len() != 1 {
            return false;
        }
        let Ok(in_const) = consts[0].val().cast_to_dt(DatumType::F32) else {
            return false;
        };
        let Ok(in_const) = in_const.to_scalar_tensor() else {
            return false;
        };
        in_const
            .close_enough(&tract_data::prelude::tensor0(konst), Approximation::Approximate)
            .is_ok()
    }

    fn find_succ_bin_with_const<B: BinMiniOp>(
        &self,
        node: &TypedNode,
        konst: f32,
    ) -> Option<&TypedNode> {
        let succ = self.single_succ(node.id).ok()??;
        let succ_op = succ.op_as::<TypedBinOp>()?;
        (succ_op.0.is::<B>() && self.matches_single_input_const(succ, konst)).then_some(succ)
    }

    fn find_succ_bin_with_outlet<B: BinMiniOp>(
        &self,
        node: &TypedNode,
        outlet_id: &OutletId,
    ) -> Option<&TypedNode> {
        let succ = self.single_succ(node.id).ok()??;
        let succ_op = succ.op_as::<TypedBinOp>()?;
        (succ_op.0.is::<B>() && succ.inputs.contains(outlet_id)).then_some(succ)
    }
}


================================================
FILE: core/src/model/memory.rs
================================================
use super::*;
use crate::prelude::*;
use std::collections::HashSet;
use std::fmt::Debug;
use std::fmt::Display;
use tract_data::internal::*;

/// Evaluate temporary memory usage with its related node at each step of the given order.
pub fn eval_tmp_memory_usage<F, O, Flushable>(
    model: &Graph<F, O>,
    order: &[usize],
    flushable: Flushable,
) -> TractResult<TVec<(usize, TDim)>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    Flushable: Fn(&Node<F, O>) -> bool,
{
    let outputs = model.output_outlets()?.to_vec();

    let flush_lists = super::order::build_flush_list(model, order, &outputs, &flushable);
    let mut values: TVec<bool> = tvec![false; model.nodes.len()];

    let mut mem_by_steps: TVec<_> = tvec![(0, 0.into()); order.len()];

    let flushable_nodes = model
        .nodes()
        .iter()
        .filter(|node| (flushable)(node))
        .map(|it| it.id)
        .collect::<HashSet<_>>();

    for (step, n) in order.iter().enumerate() {
        let node = model.node(*n);

        for flush in flush_lists[step].iter() {
            values[*flush] = false;
        }

        // Active nodes are node that has not been flushed + inputs of the current node and current node.
        let mut step_active_nodes: HashSet<_> =
            values.iter().enumerate().filter_map(|(n, active)| active.then_some(n)).collect();

        step_active_nodes.extend(node.inputs.iter().map(|it| it.node));
        step_active_nodes.insert(*n);

        values[*n] = true;

        // Keep only flushable nodes.
        let step_active_flushable_nodes = step_active_nodes.intersection(&flushable_nodes);

        mem_by_steps[step] = (*n, 0.into());

        for n in step_active_flushable_nodes {
            let out_facts = model
                .node_output_facts(*n)?
                .into_iter()
                .map(|it| it.to_typed_fact())
                .collect::<TractResult<TVec<_>>>()?;
            mem_by_steps[step].1 += out_facts.iter().map(|it| it.mem_size()).sum::<TDim>();
        }
    }
    Ok(mem_by_steps)
}


================================================
FILE: core/src/model/mod.rs
================================================
//! ## Models and their lifecycle
//!
//! In order to reason on the model and performs optimisations, a model needs
//! to be `typed`. This means all tensor exchanged between the nodes have a
//! well defined element type (f32, i32, etc) and a shape ([1, 12, 53, 13]).
//!
//! A model typically starts as an `InferenceModel`, with minimum or partial
//! tensor type information. At this stage, the application developper can add
//! types and shapes hints (like the model inputs and output element types
//! and shapes), then `tract` will perform type inference propagating this
//! information. Hopefully `tract` will be able to infer a type and shape for
//! all tensors in the model graph.
//!
//! At this stage, the model can be converted into a `TypedModel`.
//!
//! InferanceModel and TypeModel are two variants of `Graph`, Parameterized
//! by a Fact implementation: TypedModel uses TypedFact, enforcing
//! complete determination of element type and shape, and allowing a constant
//! value for the tensor. InferenceModel uses InferenceFact, which can handle
//! partial information.
//!
//! We call `declutter` the process getting the network closer to a normal
//! form:.  This normal form is akin to an IR in compiler technologies. This is
//! the favourite form on which tract optimisation is implemented.
//!
//! For instance an Add node adding a constant input to a variable
//! tensor input would be replaced by an unary Add operator taking only the
//! variable input and for which the constant to add is a fixed construction
//! attribute. In the same decluttering process, we try and replace proprietary
//! operators (eg, from TensorFlow) by tract core operators: it is not always
//! possible to simply map TensorFlow operators to tract-core while loading the
//! network: their interfaces can be different (what is an input, what is an
//! attribute) and constant propagation may be necessary before the right
//! core operator could be chosen.
//!
use std::collections::HashMap;
use std::str;

mod fact;
mod graph;
mod helpers;
pub mod memory;
mod node;
pub mod order;
mod patch;
mod rewriter;
pub mod translator;
pub mod typed;
pub use self::helpers::*;

pub use self::fact::*;
pub use self::graph::*;
pub use self::node::*;
pub use self::patch::ModelPatch;
pub use self::rewriter::Rewriter;
pub use crate::ops::{Op, TypedOp};

pub use typed::*;


================================================
FILE: core/src/model/node.rs
================================================
use super::*;
use crate::internal::*;
use crate::ops::Op;
use std::fmt;
use std::fmt::{Debug, Display};
use tract_itertools::Itertools;

/// A Node in an Model.
///
/// Parameterized by a Fact implementation matching the one used in the
/// model.
#[derive(Debug, Clone)]
pub struct Node<F: Fact, O> {
    /// node id in the model
    ///
    /// Caution: this id will not be persistent during networks transformation
    pub id: usize,
    /// name of the node
    ///
    /// This will usually come from the importing framework. `tract`
    /// transformation try to maintain the names accross transformations.
    pub name: String,
    /// A list of incoming tensors, identified by the node outlet that creates
    /// them.
    pub inputs: Vec<OutletId>,
    /// The actual operation the node performs.
    pub op: O,
    /// List of ouputs, with their descendant and tensor type information.
    pub outputs: TVec<Outlet<F>>,
}

impl<F: Fact, O: std::fmt::Display> fmt::Display for Node<F, O> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "#{} \"{}\" {}", self.id, self.name, self.op)
    }
}

impl<F, NodeOp> Node<F, NodeOp>
where
    F: Fact,
    NodeOp: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + AsMut<dyn Op>,
{
    /// Access the op of the node
    pub fn op(&self) -> &dyn Op {
        self.op.as_ref()
    }

    /// Try to downcast the node operation to O.
    pub fn op_as<O: Op>(&self) -> Option<&O> {
        self.op().downcast_ref::<O>()
    }

    /// Try to downcast the node operation to O.
    pub fn op_as_mut<O: Op>(&mut self) -> Option<&mut O> {
        self.op.as_mut().downcast_mut::<O>()
    }

    /// Check if the node operation is of type O.
    pub fn op_is<O: Op>(&self) -> bool {
        self.op_as::<O>().is_some()
    }

    /// Check that this node produce the same outputs as `other`.
    pub fn same_as(&self, other: &Node<F, NodeOp>) -> bool {
        self.inputs == other.inputs && self.op().dyn_eq(other.op())
    }
}

/// Information for each outlet of a node
#[derive(Clone, Default)]
pub struct Outlet<F: Fact> {
    /// the tensor type information
    pub fact: F,
    /// where this outlet is used.
    pub successors: TVec<InletId>,
}

impl<F: Fact> fmt::Debug for Outlet<F> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(
            fmt,
            "{:?} {}",
            self.fact,
            self.successors.iter().map(|o| format!("{o:?}")).join(" ")
        )
    }
}

/// Identifier for a node output in the graph.
///
/// This happens to be a unique identifier of any variable tensor in the graph
/// (as the graph typically connect one single node output to one or several
/// inputs slots)
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, new)]
pub struct OutletId {
    /// node identifier in the graph
    pub node: usize,
    /// rank of the input in the node
    pub slot: usize,
}

impl fmt::Debug for OutletId {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}/{}>", self.node, self.slot)
    }
}

impl From<usize> for OutletId {
    fn from(node: usize) -> OutletId {
        OutletId::new(node, 0)
    }
}

impl From<(usize, usize)> for OutletId {
    fn from(pair: (usize, usize)) -> OutletId {
        OutletId::new(pair.0, pair.1)
    }
}

/// Identifier for a node input in the graph.
#[derive(Clone, Copy, PartialEq, Eq, Hash, new, Ord, PartialOrd)]
pub struct InletId {
    /// node identifier in the graph
    pub node: usize,
    /// rank of the input in the node
    pub slot: usize,
}

impl fmt::Debug for InletId {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, ">{}/{}", self.node, self.slot)
    }
}


================================================
FILE: core/src/model/order.rs
================================================
//! Evaluation order for nodes.
use crate::internal::*;
use bit_set::BitSet;
use std::collections::VecDeque;
use std::fmt::{Debug, Display};
use tract_itertools::Itertools;

/// Find an evaluation order for a model, using its default inputs and outputs
/// as boundaries.
pub fn eval_order<F, O>(model: &super::Graph<F, O>) -> TractResult<Vec<usize>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    let inputs = model.input_outlets()?.iter().map(|n| n.node).collect_vec();
    let targets = model.output_outlets()?.iter().map(|n| n.node).collect_vec();
    eval_order_for_nodes(model.nodes(), &inputs, &targets, &[])
}

/// Find a working evaluation order for a list of nodes.
/// This algorithm starts from the outputs, so it will only compute what is necessary.
pub fn eval_order_for_nodes<F, O>(
    nodes: &[Node<F, O>],
    model_inputs: &[usize],
    model_outputs: &[usize],
    more_dependencies: &[(usize, usize)],
) -> TractResult<Vec<usize>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    let mut done = BitSet::with_capacity(nodes.len());
    let mut order: Vec<usize> = vec![];
    for &model_target in model_outputs {
        if done.contains(model_target) {
            continue;
        }
        let mut current_stack: Vec<(usize, usize)> = vec![(model_target, 0)];
        let mut pending = BitSet::with_capacity(nodes.len());
        while let Some((current_node, current_input)) = current_stack.pop() {
            let deps_from_inputs = nodes[current_node].inputs.len();
            let all_deps_count =
                deps_from_inputs + more_dependencies.iter().filter(|a| a.0 == current_node).count();
            if model_inputs.contains(&current_node) || current_input == all_deps_count {
                order.push(current_node);
                done.insert(current_node);
                pending.remove(current_node);
            } else {
                let precursor: usize = nodes[current_node]
                    .inputs
                    .iter()
                    .filter(|n| nodes[n.node].inputs.len() > 0)
                    .map(|n| n.node)
                    .chain(more_dependencies.iter().filter(|a| a.0 == current_node).map(|n| n.1))
                    .chain(
                        nodes[current_node]
                            .inputs
                            .iter()
                            .filter(|n| nodes[n.node].inputs.len() == 0)
                            .map(|n| n.node),
                    )
                    .nth(current_input)
                    .unwrap();
                if done.contains(precursor) {
                    current_stack.push((current_node, current_input + 1));
                } else if pending.contains(precursor) {
                    if log_enabled!(log::Level::Debug) {
                        debug!("Loop detected:");
                        current_stack
                            .iter()
                            .skip_while(|s| s.0 != precursor)
                            .for_each(|n| debug!("  {}", nodes[n.0]));
                    }
                    bail!("Loop detected")
                } else {
                    pending.insert(precursor);
                    current_stack.push((current_node, current_input));
                    current_stack.push((precursor, 0));
                }
            }
        }
    }
    Ok(order)
}

pub fn build_flush_list<F, O, Flushable>(
    model: &Graph<F, O>,
    order: &[usize],
    outputs: &[OutletId],
    flushable: Flushable,
) -> Vec<TVec<usize>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    Flushable: Fn(&Node<F, O>) -> bool,
{
    let mut values_needed_until_step = vec![0; model.nodes().len()];
    for (step, node) in order.iter().enumerate() {
        for i in &model.node(*node).inputs {
            values_needed_until_step[i.node] = step;
        }
    }
    for o in outputs.iter() {
        values_needed_until_step[o.node] = order.len();
    }
    let mut flush_lists: Vec<TVec<usize>> = vec![tvec!(); order.len() + 1];

    for (node, &flush_at) in values_needed_until_step.iter().enumerate() {
        if flush_at != 0 && (flushable)(model.node(node)) {
            flush_lists[flush_at].push(node)
        }
    }
    flush_lists
}

/// Find an evaluation order for a list of model trying to minimize memory occupation.
pub fn eval_order_opt_ram<F, O>(model: &super::Graph<F, O>) -> TractResult<Vec<usize>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    let inputs = model.input_outlets()?.iter().map(|n| n.node).collect_vec();
    let targets = model.output_outlets()?.iter().map(|n| n.node).collect_vec();
    eval_order_opt_ram_for_nodes(model.nodes(), &inputs, &targets, &[])
}

/// Find an evaluation order for a list of nodes trying to minimize memory occupation.
pub fn eval_order_opt_ram_for_nodes<F, O>(
    nodes: &[Node<F, O>],
    model_inputs: &[usize],
    model_outputs: &[usize],
    more_dependencies: &[(usize, usize)],
) -> TractResult<Vec<usize>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    let tocompute: BitSet =
        eval_order_for_nodes(nodes, model_inputs, model_outputs, more_dependencies)?
            .into_iter()
            .collect();

    let mut ups = vec![tvec!(); nodes.len()];
    let mut downs = vec![tvec!(); nodes.len()];
    for ix in tocompute.iter() {
        for input in &nodes[ix].inputs {
            if !ups[ix].contains(&input.node) {
                ups[ix].push(input.node);
                downs[input.node].push(ix);
            }
        }
    }
    for (down, up) in more_dependencies {
        if !ups[*down].contains(up) {
            ups[*down].push(*up);
            downs[*up].push(*down);
        }
    }

    #[derive(Debug)]
    struct Dfs {
        ups: Vec<TVec<usize>>,
        downs: Vec<TVec<usize>>,
    }

    let dfs = Dfs { ups, downs };

    #[derive(Debug, Clone, PartialEq, Eq)]
    struct Path {
        order: Vec<usize>,
        done: BitSet,
        alive: BitSet,
        candidates: BitSet,
        cache_upstream: Vec<Option<(usize, BitSet)>>,
    }

    impl Path {
        fn with_size(nodes: usize) -> Path {
            Path {
                order: Vec::with_capacity(nodes),
                done: BitSet::with_capacity(nodes),
                alive: BitSet::with_capacity(nodes),
                candidates: BitSet::with_capacity(nodes),
                cache_upstream: vec![None; nodes],
            }
        }

        fn follow_one(&mut self, dfs: &Dfs, next: usize) {
            assert!(!self.done.contains(next));
            self.order.push(next);
            self.done.insert(next);
            self.alive.insert(next);
            self.candidates.remove(next);
            for &succ in &dfs.downs[next] {
                self.candidates.insert(succ);
            }
            for &maybe_dead in &dfs.ups[next] {
                if dfs.downs[maybe_dead].iter().all(|n| self.done.contains(*n)) {
                    self.alive.remove(maybe_dead);
                }
            }
            self.cache_upstream[next] = None;
            for c in &self.candidates {
                if let Some(upstream) = self.cache_upstream[c].as_mut() {
                    upstream.0 -= upstream.1.remove(next) as usize;
                }
            }
        }

        fn best_upstream_starter(&mut self, dfs: &Dfs) -> Option<usize> {
            for from in self.candidates.iter() {
                if self.cache_upstream[from].is_none() {
                    let mut found = BitSet::with_capacity(self.done.capacity());
                    let mut visited = self.done.clone();
                    let mut todo = VecDeque::<usize>::new();
                    todo.push_back(from);
                    visited.insert(from);
                    while let Some(next) = todo.pop_front() {
                        if dfs.ups[next].len() == 0 {
                            found.insert(next);
                        }
                        for up in &dfs.ups[next] {
                            if visited.insert(*up) {
                                todo.push_back(*up);
                            }
                        }
                    }
                    debug_assert!(found.count() > 0);
                    self.cache_upstream[from] = Some((found.count(), found));
                }
            }
            self.candidates
                .iter()
                .map(|n| self.cache_upstream[n].as_ref().unwrap())
                .min_by_key(|s| s.0)
                .map(|s| s.1.iter().next().unwrap())
        }
    }

    let mut done: Path = Path::with_size(nodes.len());
    for i in model_inputs {
        if tocompute.contains(*i) {
            done.follow_one(&dfs, *i);
        }
    }

    while !model_outputs.iter().all(|o| done.done.contains(*o)) {
        let next = if let Some(next) =
            done.candidates.iter().find(|n| dfs.ups[*n].iter().all(|n| done.done.contains(*n)))
        {
            next
        } else if let Some(next) = done.best_upstream_starter(&dfs) {
            next
        } else {
            tocompute
                .difference(&done.done)
                .find(|n| dfs.ups[*n].iter().all(|n| done.done.contains(*n)))
                .unwrap()
        };
        done.follow_one(&dfs, next);
    }

    Ok(done.order.clone())
}

#[cfg(test)]
mod tests {
    use crate::internal::*;
    use crate::ops::array::Gather;
    use crate::ops::math;

    #[test]
    fn simple() {
        let mut model = TypedModel::default();
        let a = model.add_source("a", f32::fact([1])).unwrap();
        let b = model.add_const("b", tensor1(&[12.0f32])).unwrap();
        let add = model.wire_node("add", math::add(), &[a, b]).unwrap()[0];
        model.auto_outputs().unwrap();
        assert_eq!(model.eval_order().unwrap(), vec!(a.node, b.node, add.node));
        assert_eq!(model.eval_order_opt_ram().unwrap(), vec!(a.node, b.node, add.node));
    }

    #[test]
    fn diamond() {
        let mut model = TypedModel::default();
        let a = model.add_source("a", f32::fact([1])).unwrap();
        let add = model.wire_node("add", math::add(), &[a, a]).unwrap()[0];
        model.auto_outputs().unwrap();
        assert_eq!(model.eval_order().unwrap(), vec!(a.node, add.node));
        assert_eq!(model.eval_order_opt_ram().unwrap(), vec!(a.node, add.node));
    }

    // The test is disabled on Wasm because it uses threads.
    #[cfg(not(target_family = "wasm"))]
    #[test]
    fn dodge_loop() {
        let mut model = TypedModel::default();
        let a = model.add_source("a", f32::fact([1])).unwrap();
        let add = model.wire_node("add", math::add(), &[a, a]).unwrap()[0];
        let neg = model.wire_node("neg", math::add(), &[add, a]).unwrap()[0];
        model.add_edge(neg, InletId::new(add.node, 1)).unwrap();
        model.select_output_outlets(&[neg]).unwrap();
        let cloned = model.clone();
        let (rx, tx) = std::sync::mpsc::channel();
        std::thread::spawn(move || {
            rx.send(cloned.eval_order()).unwrap();
        });
        assert!(tx.recv_timeout(std::time::Duration::from_secs(1)).unwrap().is_err());
        let (rx, tx) = std::sync::mpsc::channel();
        std::thread::spawn(move || {
            rx.send(model.eval_order_opt_ram()).unwrap();
        });
        assert!(tx.recv_timeout(std::time::Duration::from_secs(1)).unwrap().is_err());
    }

    #[test]
    fn opt_ram() -> TractResult<()> {
        let mut model = TypedModel::default();
        let b = model.add_const("b", tensor1(&[0i64; 1000]))?;
        let d = model.add_const("d", tensor1(&[0i64; 100]))?;
        let a = model.add_source("a", i32::fact([10]))?;
        let c = model.wire_node("c", Gather::new(0), &[a, b])?[0];
        let e = model.wire_node("e", Gather::new(0), &[c, d])?[0];
        model.select_output_outlets(&[e]).unwrap();
        eprintln!("{model}");
        assert!(model.eval_order_opt_ram()?[2..] == [c.node, d.node, e.node]);
        Ok(())
    }
}


================================================
FILE: core/src/model/patch.rs
================================================
use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::ops::{Deref, DerefMut};

use tract_data::itertools::{Itertools, izip};

use crate::internal::*;
use crate::model::*;

/// A change to apply to a model.
///
/// Actually structured around a model that represent the new nodes to be
/// inserted, plus information about how to connect these new nodes to the
/// pre-existing graph.
#[derive(Clone, Debug)]
pub struct ModelPatch<F, O>
where
    F: Fact + Clone + 'static,
    O: Display + Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    /// patch label for auditing and debugging
    pub context: Vec<String>,
    /// optimizer will ignore this patch in node to node loop if it was already
    /// encountered
    pub dont_apply_twice: Option<String>,
    /// the model-like 'patch' of nodes to add to the model
    pub model: Graph<F, O>,
    /// map of replaced inputs (patch node id to model node id)
    pub inputs: HashMap<usize, usize>,
    /// map of patch inputs to model wires
    pub taps: HashMap<OutletId, OutletId>,
    /// map of old model wires to be replaced by wires from the patch
    pub shunts: HashMap<OutletId, OutletId>,
    /// operations to discard from the model
    pub obliterate: Vec<usize>,
}

impl<F, O> Default for ModelPatch<F, O>
where
    F: Fact + Clone + 'static,
    O: Display + Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    fn default() -> ModelPatch<F, O> {
        ModelPatch {
            context: vec![],
            dont_apply_twice: None,
            model: Graph::default(),
            inputs: HashMap::default(),
            taps: HashMap::new(),
            shunts: HashMap::new(),
            obliterate: vec![],
        }
    }
}

impl<F, O> Deref for ModelPatch<F, O>
where
    F: Fact + Clone + 'static,
    O: Display + Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    type Target = Graph<F, O>;
    fn deref(&self) -> &Graph<F, O> {
        &self.model
    }
}

impl<F, O> DerefMut for ModelPatch<F, O>
where
    F: Fact + Clone + 'static,
    O: Display + Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    fn deref_mut(&mut self) -> &mut Graph<F, O> {
        &mut self.model
    }
}

impl<F, O> ModelPatch<F, O>
where
    F: Fact + Clone + 'static,
    O: Display + Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    Graph<F, O>: SpecialOps<F, O>,
{
    pub fn new(s: impl Into<String>) -> Self {
        Self::default().with_context(s)
    }

    pub fn push_context(&mut self, s: impl Into<String>) {
        self.context.push(s.into());
    }

    pub fn with_context(mut self, s: impl Into<String>) -> Self {
        self.context.push(s.into());
        self
    }

    pub fn is_empty(&self) -> bool {
        self.model.nodes.is_empty() && self.shunts.is_empty() && self.obliterate.is_empty()
    }

    /// Draw a tap from a preexisting node.
    ///
    /// returns an OutletId usable in the little "patch" model
    pub fn tap_model(&mut self, model: &Graph<F, O>, outlet: OutletId) -> TractResult<OutletId> {
        let fact = model.outlet_fact(outlet)?;
        let id = self.add_source(
            format!("tap.{}-{}/{}", model.node(outlet.node).name, outlet.node, outlet.slot),
            dyn_clone::clone(fact),
        )?;
        self.taps.insert(id, outlet);
        Ok(id)
    }

    /// Draw taps from a preexisting node.
    ///
    /// returns an OutletId usable in the little "patch" model
    pub fn taps<'a>(
        &mut self,
        model: &Graph<F, O>,
        outlets: impl IntoIterator<Item = &'a OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        outlets.into_iter().map(|o| self.tap_model(model, *o)).collect::<TractResult<TVec<_>>>()
    }

    pub unsafe fn shunt_outside_unchecked(
        &mut self,
        outlet: OutletId,
        by: OutletId,
    ) -> TractResult<()> {
        self.shunts.insert(outlet, by);
        Ok(())
    }

    /// Replace an Outlet in the target model by one from the patch.
    pub fn shunt_outside(
        &mut self,
        model: &Graph<F, O>,
        outlet: OutletId,
        by: OutletId,
    ) -> TractResult<()> {
        let original_fact = model.outlet_fact(outlet)?;
        let new_fact = self.model.outlet_fact(by)?;
        if !original_fact.compatible_with(new_fact) {
            bail!(
                "Trying to substitute a {:?} by {:?} as output #{} of {}.\n{:?}",
                original_fact,
                new_fact,
                outlet.slot,
                model.node(outlet.node),
                self
            );
        }
        self.shunts.insert(outlet, by);
        Ok(())
    }

    pub fn obliterate(&mut self, node: usize) -> TractResult<()> {
        self.obliterate.push(node);
        Ok(())
    }

    /// Convenience method creating a patch that replaces a single operation.
    pub fn replace_single_op<IO: Into<O>>(
        patched_model: &Graph<F, O>,
        node: &Node<F, O>,
        inputs: &[OutletId],
        new_op: IO,
    ) -> TractResult<ModelPatch<F, O>> {
        let mut patch = ModelPatch::default();
        let new_op = new_op.into();
        let inputs = patch.taps(patched_model, inputs)?;
        let wires = patch.wire_node(&node.name, new_op, &inputs)?;
        for (ix, o) in wires.iter().enumerate() {
            patch.shunt_outside(patched_model, OutletId::new(node.id, ix), *o)?;
        }
        patch.obliterate(node.id)?;
        Ok(patch)
    }

    /// Convenience method creating a patch that fuses an op with the next one.
    pub fn fuse_with_next<IO: Into<O>>(
        patched_model: &Graph<F, O>,
        node: &Node<F, O>,
        new_op: IO,
    ) -> TractResult<ModelPatch<F, O>> {
        let mut patch = ModelPatch::default();
        let succ = if let Some(succ) = patched_model.single_succ(node.id)? {
            succ
        } else {
            bail!("Non single successor fuse attempt")
        };
        let inputs = patch.taps(patched_model, &node.inputs)?;
        let output = patch.wire_node(&node.name, new_op.into(), &inputs)?;
        patch.shunt_outside(patched_model, succ.id.into(), output[0])?;
        Ok(patch)
    }

    /// Convenience method creating a patch that shunts the given node.
    pub fn shunt_one_op(
        patched_model: &Graph<F, O>,
        node: &Node<F, O>,
    ) -> TractResult<Option<ModelPatch<F, O>>> {
        ensure!(node.inputs.len() == 1);
        ensure!(node.outputs.len() == 1);
        if patched_model.outputs.contains(&node.id.into())
            && patched_model.outputs.contains(&node.inputs[0])
        {
            Ok(None)
        } else {
            Self::rewire(patched_model, &node.inputs, &[node.id.into()], &|_p, xs| Ok(xs.into()))
                .with_context(|| format!("Shunting {node}"))
                .map(Some)
        }
    }

    #[allow(clippy::type_complexity)]
    pub fn rewire(
        patched_model: &Graph<F, O>,
        from: &[OutletId],
        to: &[OutletId],
        wiring: &dyn Fn(&mut Self, &[OutletId]) -> TractResult<TVec<OutletId>>,
    ) -> TractResult<ModelPatch<F, O>> {
        let mut patch = ModelPatch::default();
        let taps = patch.taps(patched_model, from)?;
        let news = wiring(&mut patch, &taps)?;
        if news.len() != to.len() {
            bail!(
                "Wrong number of outputs for rewiring, expected {}, function returned {}",
                to.len(),
                news.len()
            );
        }
        for (new, &old) in izip!(news, to) {
            patch.shunt_outside(patched_model, old, new)?;
        }
        Ok(patch)
    }

    /// Convenience method creating a patch that replace a single unary operation.
    pub fn single_unary_op<IO: Into<O>>(
        patched_model: &Graph<F, O>,
        node: &Node<F, O>,
        new_op: IO,
    ) -> TractResult<ModelPatch<F, O>> {
        Self::replace_single_op(patched_model, node, &[node.inputs[0]], new_op)
    }

    /// Convenience method creating a patch that insert an unary op on an outlet.
    pub fn intercept<IO: Into<O>>(
        patched_model: &Graph<F, O>,
        outlet: OutletId,
        name: impl Into<String>,
        new_op: IO,
        fact: F,
    ) -> TractResult<ModelPatch<F, O>> {
        let mut patch = ModelPatch::default();
        let tap = patch.tap_model(patched_model, outlet)?;
        let new_id = patch.add_node(name, new_op, tvec!(fact))?;
        patch.add_edge(tap, InletId::new(new_id, 0))?;
        patch.shunt_outside(patched_model, outlet, OutletId::new(new_id, 0))?;
        Ok(patch)
    }

    pub fn wire_node(
        &mut self,
        name: impl Into<String>,
        op: impl Into<O>,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut name = name.into();
        if self.nodes.iter().any(|n| n.name == *name) {
            for i in 1.. {
                let s = format!("{name}#{i}");
                if self.nodes.iter().all(|n| n.name != s) {
                    name = s;
                    break;
                }
            }
        }
        self.model.wire_node(name, op.into(), inputs)
    }

    /// Apply all changes in the patch to the target model.
    pub fn apply(self, target: &mut Graph<F, O>) -> TractResult<()> {
        let prior_target_inputs = target.input_outlets()?.len();
        let prior_target_outputs = target.output_outlets()?.len();
        let ModelPatch {
            model: patch,
            taps: mut mapping,
            shunts: shunt_outlet_by,
            obliterate,
            inputs: replaced_inputs,
            ..
        } = self;
        let mut all_inputs = HashMap::new(); // new_node_id_in_model -> [ patch_outlet_id ]
        let mut model_input_outlets = target.input_outlets()?.to_vec();
        let mut new_nodes = HashSet::new();
        for node in patch.nodes {
            if <Graph<F, O>>::is_source(&node.op)
                && mapping.contains_key(&OutletId::new(node.id, 0))
            {
                // this is a tap
                continue;
            }
            let Node { id: patch_node_id, name, inputs, op, outputs } = node;
            let n_outputs = outputs.len();
            for dup in 0..target.nodes.len() {
                if target.node(dup).op().dyn_eq(op.as_ref())
                    && inputs.len() == target.node(dup).inputs.len()
                    && inputs
                        .iter()
                        .zip(target.node(dup).inputs.iter())
                        .all(|(patch_input, d)| mapping[patch_input] == *d)
                {
                    for ix in 0..n_outputs {
                        mapping.insert(OutletId::new(patch_node_id, ix), OutletId::new(dup, ix));
                    }
                    continue;
                }
            }
            let facts = outputs.into_iter().map(|of| of.fact).collect();
            let added_node_id = target.add_node(name, op, facts)?;
            new_nodes.insert(added_node_id);
            for ix in 0..n_outputs {
                mapping.insert(OutletId::new(patch_node_id, ix), OutletId::new(added_node_id, ix));
            }
            all_inputs.insert(added_node_id, inputs);
            if <Graph<F, O>>::is_source(&target.node(added_node_id).op) {
                // this is actually an input replacement
                model_input_outlets.iter_mut().for_each(|oo| {
                    if oo.node == replaced_inputs[&patch_node_id] {
                        oo.node = added_node_id;
                    }
                });
            }
        }
        debug_assert_eq!(target.input_outlets()?.len(), prior_target_inputs);
        debug_assert_eq!(target.output_outlets()?.len(), prior_target_outputs);
        for (&outlet, &by) in shunt_outlet_by.iter().sorted() {
            let replace_by = mapping[&by];
            let succs = target.nodes()[outlet.node].outputs[outlet.slot].successors.clone();
            for succ in succs {
                target.add_edge(replace_by, succ)?;
            }
            for o in target.outputs.iter_mut() {
                if *o == outlet {
                    *o = replace_by;
                }
            }
            if let Some(label) = target.outlet_labels.remove(&outlet) {
                target.set_outlet_label(replace_by, label)?;
            }
        }
        debug_assert_eq!(target.input_outlets()?.len(), prior_target_inputs);
        debug_assert_eq!(target.output_outlets()?.len(), prior_target_outputs);
        for (&node, inputs) in all_inputs.iter().sorted() {
            for (ix, input) in inputs.iter().enumerate() {
                target.add_edge(mapping[input], InletId::new(node, ix))?;
            }
        }
        debug_assert_eq!(target.input_outlets()?.len(), prior_target_inputs);
        debug_assert_eq!(target.output_outlets()?.len(), prior_target_outputs);
        for node in obliterate {
            target.node_mut(node).op = target.create_dummy();
        }
        debug_assert_eq!(target.input_outlets()?.len(), prior_target_inputs);
        debug_assert_eq!(target.output_outlets()?.len(), prior_target_outputs);
        target.set_input_outlets(&model_input_outlets)?;
        let mut maybe_garbage: HashSet<usize> = shunt_outlet_by.iter().map(|o| o.0.node).collect();
        while let Some(&maybe) = maybe_garbage.iter().next() {
            maybe_garbage.remove(&maybe);
            if !target.outputs.iter().any(|output| output.node == maybe)
                && !target.inputs.iter().any(|input| input.node == maybe)
                && target.node(maybe).outputs.iter().all(|of| of.successors.is_empty())
            {
                target.node_mut(maybe).op = target.create_dummy();
                target.node_mut(maybe).name = format!("Dummy-node-{maybe}");
                target.node_mut(maybe).outputs.clear(); // necessary to drop facts and consts
                let inputs = std::mem::take(&mut target.node_mut(maybe).inputs);
                for &i in &inputs {
                    target.node_mut(i.node).outputs[i.slot].successors.retain(|s| s.node != maybe);
                    maybe_garbage.insert(i.node);
                }
                target.check_edges()?;
            }
        }
        for n in new_nodes.iter() {
            if let Some((prefix, _)) = target.nodes[*n].name.split_once('#') {
                target.nodes[*n].name = target.unique_name(prefix).into();
            } else if target
                .nodes
                .iter()
                .any(|node| node.id != *n && target.nodes[*n].name == node.name)
            {
                target.nodes[*n].name = target.unique_name(&target.nodes[*n].name).to_string();
            }
        }
        Ok(())
    }
}


================================================
FILE: core/src/model/rewriter.rs
================================================
use std::any::TypeId;

use crate::internal::*;

type GenRewriteRule<Ctx> =
    Box<dyn Fn(&Ctx, &TypedModel, &TypedNode) -> TractResult<Option<TypedModelPatch>>>;

#[derive(Default)]
#[allow(clippy::type_complexity)]
pub struct Rewriter<Ctx> {
    rules: HashMap<TypeId, Vec<(Cow<'static, str>, GenRewriteRule<Ctx>)>>,
}

impl<Ctx> Rewriter<Ctx> {
    pub fn with_rule_for<O: Op + 'static>(
        mut self,
        name: impl Into<Cow<'static, str>>,
        rule: impl Fn(&Ctx, &TypedModel, &TypedNode, &str, &O) -> TractResult<Option<TypedModelPatch>>
        + 'static,
    ) -> Self {
        self.rules.entry(TypeId::of::<O>()).or_default().push((
            name.into(),
            Box::new(move |c: &Ctx, m: &TypedModel, n: &TypedNode| {
                if let Some(o) = n.op_as::<O>() { rule(c, m, n, &n.name, o) } else { Ok(None) }
            }),
        ));
        self
    }

    pub fn rewrite(&self, ctx: &Ctx, model: &mut TypedModel) -> TractResult<()> {
        loop {
            let mut done_anything = false;
            for n in model.eval_order()? {
                if let Some(rules) = self.rules.get(&(*model.node(n).op).type_id()) {
                    for (name, rule) in rules {
                        if let Some(patch) =
                            (rule)(ctx, model, model.node(n)).with_context(|| {
                                format!(
                                    "Evaluating rewriting rule \"{name}\" on node {}",
                                    model.node(n)
                                )
                            })?
                        {
                            patch.apply(model).with_context(|| {
                                format!(
                                    "Applying patch for rewriting rule \"{name}\" on node {}",
                                    model.node(n)
                                )
                            })?;
                            done_anything = true;
                        }
                    }
                }
            }
            if done_anything {
                model.prop_consts()?;
                model.compact()?;
            } else {
                return Ok(());
            }
        }
    }
}


================================================
FILE: core/src/model/translator.rs
================================================
use crate::internal::*;
use crate::model::{Fact, Graph, OutletId};
use std::collections::HashMap;
use std::convert::*;
use std::fmt;

pub trait Translate<TI1, O1, TI2, O2>: fmt::Debug
where
    TI1: Fact + Clone + 'static,
    TI2: Fact + Clone + 'static,
    O1: fmt::Display + fmt::Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    O2: fmt::Display + fmt::Debug + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    fn translate_node(
        &self,
        source: &Graph<TI1, O1>,
        node: &Node<TI1, O1>,
        target: &mut Graph<TI2, O2>,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>>;

    fn translate_model(&self, source: &Graph<TI1, O1>) -> TractResult<Graph<TI2, O2>> {
        Ok(self.translate_model_with_mappings(source)?.0)
    }

    fn translate_model_with_mappings(
        &self,
        source: &Graph<TI1, O1>,
    ) -> TractResult<(Graph<TI2, O2>, HashMap<OutletId, OutletId>)> {
        let _proof_session = source.symbols.proof_cache_session();
        let mut target = Graph { symbols: source.symbols.clone(), ..Graph::default() };
        let mut mapping = HashMap::new();
        for old_id in source.eval_order()? {
            let node = source.node(old_id);
            let outlets = self
                .translate_node(source, node, &mut target, &mapping)
                .with_context(|| format!("Translating node {node} {self:?}"))?;
            for (ix, outlet) in outlets.into_iter().enumerate() {
                mapping.insert(OutletId::new(node.id, ix), outlet);
                if let Some(label) = source.outlet_label(OutletId::new(node.id, ix)) {
                    target.set_outlet_label(outlet, label.to_string())?;
                }
            }
        }
        // do not drop inputs, even if they are useless, to maintain interface
        for i in source.input_outlets()? {
            if !mapping.contains_key(i) {
                let node = source.node(i.node);
                trace!("Translate useless source {node}");
                let outlets = self
                    .translate_node(source, node, &mut target, &mapping)
                    .with_context(|| format!("Translating input {node} {self:?}"))?;
                mapping.insert(*i, outlets[0]);
            }
        }
        // maintaining order of i/o interface
        target.inputs = source.input_outlets()?.iter().map(|i| mapping[i]).collect();
        target.outputs = source.output_outlets()?.iter().map(|o| mapping[o]).collect();
        target.properties.clone_from(&source.properties);
        Ok((target, mapping))
    }
}

#[derive(Debug)]
pub struct IntoTranslator;
impl<TI1, O1, TI2, O2, EO, ETI> Translate<TI1, O1, TI2, O2> for IntoTranslator
where
    TractError: From<EO> + From<ETI>,
    TI1: Fact + Clone + 'static,
    TI2: Fact + for<'a> TryFrom<&'a TI1, Error = EO> + Clone + 'static,
    O1: fmt::Display + fmt::Debug + Clone + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    O2: fmt::Display
        + for<'a> TryFrom<&'a O1, Error = ETI>
        + fmt::Debug
        + AsRef<dyn Op>
        + AsMut<dyn Op>
        + Clone
        + 'static,
    Graph<TI2, O2>: SpecialOps<TI2, O2>,
{
    fn translate_node(
        &self,
        source: &Graph<TI1, O1>,
        node: &Node<TI1, O1>,
        target: &mut Graph<TI2, O2>,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let node_is_input =
            (0..node.outputs.len()).all(|o| source.inputs.contains(&(node.id, o).into()));
        if node_is_input {
            (0..node.outputs.len())
                .map(|i| {
                    target.add_source(
                        if node.outputs.len() > 1 {
                            format!("{}-{}", node.name, i)
                        } else {
                            node.name.to_string()
                        },
                        TI2::try_from(&node.outputs[i].fact)?,
                    )
                })
                .collect()
        } else {
            let new_op = O2::try_from(&node.op)?;
            let facts = node
                .outputs
                .iter()
                .map(|of| Ok(TI2::try_from(&of.fact)?))
                .collect::<TractResult<TVec<_>>>()?;
            let new_id = target.add_node(node.name.clone(), new_op, facts)?;
            for (ix, o) in node.inputs.iter().enumerate() {
                target.add_edge(mapping[o], InletId::new(new_id, ix))?
            }
            Ok(node.outputs.iter().enumerate().map(|(ix, _)| OutletId::new(new_id, ix)).collect())
        }
    }
}


================================================
FILE: core/src/model/typed.rs
================================================
use crate::internal::*;
use crate::model::*;
use crate::ops;
use crate::ops::konst::Const;
use crate::optim::OptimizerSession;
use crate::plan::{FrozenSimpleState, SimplePlan, SimpleState};
use crate::transform::ModelTransform;
use tract_data::TooEarly;
use tract_num_traits::Zero;

/// A model with completely determined types and shapes.
pub type TypedModel = Graph<TypedFact, Box<dyn TypedOp>>;
/// Node for TypedModel graph
pub type TypedNode = Node<TypedFact, Box<dyn TypedOp>>;
/// A ModelPatch for TypedModel.
pub type TypedModelPatch = ModelPatch<TypedFact, Box<dyn TypedOp>>;
/// An execution plan for TypedModel.
pub type TypedSimplePlan = SimplePlan<TypedFact, Box<dyn TypedOp>>;
/// A runnable TypedModel (new name for SimplePlan).
pub type TypedRunnableModel = RunnableModel<TypedFact, Box<dyn TypedOp>>;
/// An execution state for TypedModel.
pub type TypedSimpleState = SimpleState<TypedFact, Box<dyn TypedOp>>;
/// An execution state for TypedModel, frozen (and Send).
pub type TypedFrozenSimpleState = FrozenSimpleState<TypedFact, Box<dyn TypedOp>>;

/// A runnable model with fixed inputs and outputs.
pub type RunnableModel<F, O> = SimplePlan<F, O>;

impl SpecialOps<TypedFact, Box<dyn TypedOp>> for TypedModel {
    fn is_source(op: &Box<dyn TypedOp>) -> bool {
        op.as_op().downcast_ref::<ops::source::TypedSource>().is_some()
    }

    fn create_dummy(&self) -> Box<dyn TypedOp> {
        Box::new(crate::ops::dummy::Dummy::new())
    }

    fn create_source(&self, fact: TypedFact) -> Box<dyn TypedOp> {
        Box::new(crate::ops::source::TypedSource::new(fact))
    }

    fn wire_node(
        &mut self,
        name: impl Into<String>,
        op: impl Into<Box<dyn TypedOp>>,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let op = op.into();
        let name = name.into();
        if let Some(konst) = op.downcast_ref::<Const>() {
            for node in &self.nodes {
                if node.op_as::<Const>().is_some_and(|other| other == konst) {
                    return Ok(tvec!(node.id.into()));
                }
            }
        }
        if self.nodes.iter().any(|n| n.name == name) {
            bail!("Duplicate node name: {name}");
        }
        {
            let input_facts = inputs
                .iter()
                .map(|o| self.outlet_fact(*o).cloned())
                .collect::<TractResult<TVec<_>>>()?;

            let input_facts: TVec<_> = input_facts.iter().collect();
            let mut output_facts = op
                .output_facts(&input_facts)
                .with_context(|| format!("in output_facts invocation for {name}: {}", op.name()))?;

            #[cfg(all(debug_assertions, feature = "paranoid_assertions"))]
            for o in &output_facts {
                o.consistent()?;
            }

            if op.is_stateless()
                && input_facts.len() > 0
                && let Some(tensors) = input_facts
                    .iter()
                    .map(|f| {
                        f.konst
                            .as_ref()
                            .filter(|k| k.volume() < 16 && k.is_plain())
                            .cloned()
                            .map(|t| t.into_tvalue())
                    })
                    .collect::<Option<TVec<_>>>()
                && let Ok(outputs) =
                    op.eval_with_session(usize::MAX, &TurnState::default(), tensors)
            {
                return outputs
                    .into_iter()
                    .enumerate()
                    .map(|(ix, o)| {
                        let name = if ix == 0 { name.clone() } else { format!("{name}.{ix}") };
                        self.wire_node(
                            name.clone(),
                            Const::new_with_opt_exotic_fact(
                                o.into_tensor().into(),
                                output_facts[ix].exotic_fact.clone(),
                            )?,
                            &[],
                        )
                        .with_context(|| format!("Eager const-folding {name}"))
                        .map(|vec| vec[0])
                    })
                    .collect::<TractResult<TVec<OutletId>>>();
            }

            for fact in &mut output_facts {
                if fact.konst.is_none()
                    && fact.exotic_fact.is_none()
                    && fact.shape.is_concrete()
                    && fact.shape.volume().is_zero()
                {
                    let tensor =
                        Tensor::zero_dt(fact.datum_type, fact.shape.as_concrete().unwrap())?;
                    fact.konst = Some(tensor.into_arc_tensor());
                }
            }
            let id = self.add_node(&name, &op, output_facts)?;
            inputs
                .iter()
                .enumerate()
                .try_for_each(|(ix, i)| self.add_edge(*i, InletId::new(id, ix)))?;
            TractResult::Ok(
                self.node(id)
                    .outputs
                    .iter()
                    .enumerate()
                    .map(|(ix, _)| OutletId::new(id, ix))
                    .collect(),
            )
        }
        .with_context(|| format!("Wiring node \"{name}\", {op:?}"))
    }

    fn add_const(
        &mut self,
        name: impl Into<String>,
        v: impl IntoArcTensor,
    ) -> TractResult<OutletId> {
        let v = v.into_arc_tensor();
        for node in &self.nodes {
            if node.op_is::<Const>() && node.outputs[0].fact.konst.as_ref() == Some(&v) {
                return Ok(node.id.into());
            }
        }
        let fact = TypedFact::try_from(v.clone())?;
        let name = name.into();
        let op = if let Some(exotic) = &fact.exotic_fact {
            crate::ops::konst::Const::new_with_exotic_fact(v, exotic.clone())?
        } else {
            crate::ops::konst::Const::new(v)?
        };
        self.add_node(name, op, tvec!(fact)).map(|id| id.into())
    }
}

impl TypedModel {
    pub fn into_optimized(mut self) -> TractResult<TypedModel> {
        self.declutter()?;
        self.optimize()?;
        Ok(self)
    }
    #[cfg(not(all(debug_assertions, feature = "paranoid_assertions")))]
    #[inline]
    pub fn check_consistency(&self) -> TractResult<()> {
        Ok(())
    }

    #[cfg(all(debug_assertions, feature = "paranoid_assertions"))]
    pub fn check_consistency(&self) -> TractResult<()> {
        self.check_edges()?;
        for node_id in &self.eval_order()? {
            let input_facts = self.node_input_facts(*node_id)?;
            let node = &self.nodes[*node_id];
            if node.id != *node_id {
                bail!("Node at position {} has id {}", node_id, node.id);
            }
            let output_facts = node.op.output_facts(&input_facts)?;
            if node.outputs.len() != output_facts.len() {
                bail!(
                    "Inconsistent model, node output count mismatch. Op says {}, node says {}. {}",
                    output_facts.len(),
                    node.outputs.len(),
                    node
                );
            }
            if node
                .outputs
                .iter()
                .map(|o| &o.fact)
                .zip(output_facts.iter())
                .any(|(a, b)| a.datum_type != b.datum_type || a.shape != b.shape)
            {
                bail!(
                    "Inconsistent model, output types mismatch. Op says: {:?}, node says: {:?}. {} with inputs {:?}. {}",
                    output_facts,
                    node.outputs.iter().map(|o| &o.fact).collect::<Vec<_>>(),
                    node,
                    input_facts,
                    node
                )
            }
        }
        for node in &self.nodes {
            for (ix, output) in node.outputs.iter().enumerate() {
                output.fact.consistent().with_context(|| {
                    format!("Inconsistent fact {:?}: {:?}", OutletId::new(node.id, ix), output.fact)
                })?
            }
        }
        self.axes_mapping().context("Checking model axes mapping")?;
        Ok(())
    }

    /// Re-run `output_facts` for every node in topological order, updating stored facts in-place.
    ///
    /// Call this after operations that rewire graph topology (e.g. `TypedModelPatch::apply`)
    /// without rebuilding the model, so that derived fact fields like `uniform_tdim` reflect
    /// the new input facts.
    pub fn refresh_output_facts(&mut self) -> TractResult<()> {
        let order = self.eval_order()?;
        for node_id in order {
            let output_facts = {
                let input_facts = self.node_input_facts(node_id)?;
                self.nodes[node_id].op.output_facts(&input_facts)?
            };
            for (ix, fact) in output_facts.into_iter().enumerate() {
                self.set_outlet_fact(OutletId::new(node_id, ix), fact)?;
            }
        }
        Ok(())
    }

    pub fn into_decluttered(mut self) -> TractResult<TypedModel> {
        self.declutter()?;
        Ok(self)
    }

    /// Perform declutter passes on the network.
    pub fn transform(&mut self, transform: &dyn ModelTransform) -> TractResult<()> {
        transform.transform(self)
    }

    /// Perform declutter passes on the network.
    pub fn declutter(&mut self) -> TractResult<()> {
        crate::optim::Optimizer::declutter().session().optimize(self)
    }

    /// Perform optimization passes on the model, using a given optimizer session.
    pub fn optimize_with_session(&mut self, session: &mut OptimizerSession) -> TractResult<()> {
        session.optimize(self)?;
        self.properties.insert("tract_stage".to_string(), rctensor0("optimized".to_string()));
        Ok(())
    }

    pub fn concretize_dims(&self, values: &SymbolValues) -> TractResult<TypedModel> {
        values.translate_model(self)
    }

    pub fn prop_consts(&mut self) -> TractResult<()> {
        crate::optim::Optimizer::prop_consts().optimize(self)
    }

    /// Translate the graph to locally optimized operators (LIR or MIR ops).
    pub fn optimize(&mut self) -> TractResult<()> {
        crate::optim::Optimizer::codegen().optimize(self)
    }

    pub fn node_axes_mapping(&self, id: usize) -> TractResult<AxesMapping> {
        let (inputs, outputs) = self.node_facts(id)?;
        self.nodes[id].op.axes_mapping(&inputs, &outputs)
    }

    pub fn axes_mapping(&self) -> TractResult<AxesMapping> {
        crate::axes::for_model(self)
    }

    pub fn compute_const_facts(&mut self) -> TractResult<()> {
        for n in self.eval_order()? {
            let node = self.node(n);
            let (inputs, outputs) = self.node_facts(n)?;
            if node.op.is_stateless()
                && inputs.iter().all(|i| i.konst.as_ref().is_some_and(|k| k.is_plain()))
                && outputs.iter().any(|o| o.konst.is_none())
            {
                let inputs_ref =
                    inputs.iter().map(|f| f.konst.clone().unwrap().into_tvalue()).collect();
                match node.op.eval_with_session(node.id, &TurnState::default(), inputs_ref) {
                    Ok(res) => {
                        drop(inputs);
                        drop(outputs);
                        for (ix, output) in res.into_iter().enumerate() {
                            self.nodes[n].outputs[ix].fact.konst = Some(output.into_arc_tensor());
                        }
                    }
                    Err(e) => {
                        if !e.root_cause().is::<TooEarly>() {
                            Err(e).with_context(|| {
                                format!("Eager eval {} during const fact computation", self.node(n))
                            })?;
                        }
                    }
                }
            }
        }
        Ok(())
    }
}

use crate::model::translator::Translate;
impl Translate<TypedFact, Box<dyn TypedOp>, TypedFact, Box<dyn TypedOp>> for SymbolValues {
    fn translate_node(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        target.check_consistency()?;
        let outlets = node.op.concretize_dims(source, node, target, mapping, self)?;
        for &outlet in &outlets {
            let fact = &mut target.nodes[outlet.node].outputs[outlet.slot].fact;
            if fact.shape.volume().is_zero()
                && let Some(shape) = fact.shape.as_concrete()
            {
                let tensor = Tensor::zero_dt(fact.datum_type, shape)?;
                fact.konst = Some(tensor.into_arc_tensor());
            }
            fact.consistent()?;
        }
        Ok(outlets)
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test() {
        fn is_sync<T: Sync>() {}
        is_sync::<TypedModel>();
    }
}


================================================
FILE: core/src/ops/array/broadcast.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct MultiBroadcastTo {
    pub shape: ShapeFact,
}

impl Op for MultiBroadcastTo {
    fn name(&self) -> StaticName {
        "MultiBroadcastTo".into()
    }

    op_as_typed_op!();
}

impl EvalOp for MultiBroadcastTo {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let shape = self.shape.eval_to_usize(&session.resolved_symbols)?;
        Ok(tvec!(inputs[0].broadcast_to_shape(&shape)?.into_tvalue()))
    }
}

impl TypedOp for MultiBroadcastTo {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 1);
        let mut fact = inputs[0].datum_type.fact(self.shape.clone());
        fact.uniform.clone_from(&inputs[0].uniform);
        fact.uniform_tdim = inputs[0].uniform_tdim.clone();
        Ok(tvec!(fact))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let input = mapping[&node.inputs[0]];
        let op =
            Self { shape: self.shape.iter().map(|d| d.eval(values)).collect::<TVec<_>>().into() };
        target.wire_node(&node.name, op, &[input])
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let input_fact = model.outlet_fact(node.inputs[0])?;
        if input_fact.shape == self.shape {
            TypedModelPatch::shunt_one_op(model, node)
        } else {
            Ok(None)
        }
    }

    as_op!();
}


================================================
FILE: core/src/ops/array/concat.rs
================================================
use tract_data::itertools::Itertools;
use tract_num_traits::Zero;

use crate::internal::*;

use super::Slice;

#[derive(new, Debug, Clone, Hash, PartialEq, Eq)]
pub struct TypedConcat {
    pub axis: usize,
}

impl TypedConcat {
    pub fn offsets(&self, inputs: &[&TypedFact]) -> TractResult<Vec<TDim>> {
        let mut offsets = vec![0.to_dim()];
        for slice in inputs {
            let len = slice.shape[self.axis].clone();
            let offset = len + offsets.last().unwrap();
            offsets.push(offset)
        }
        Ok(offsets)
    }
}

impl Op for TypedConcat {
    fn name(&self) -> StaticName {
        "Concat".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {}", self.axis)])
    }

    op_as_typed_op!();
}

impl TypedOp for TypedConcat {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() > 0);
        let mut fact = inputs[0].without_value();
        for input in inputs {
            if input.rank() != fact.rank()
                || input
                    .shape
                    .iter()
                    .zip(fact.shape.iter())
                    .enumerate()
                    .filter(|(ax, _)| *ax != self.axis)
                    .any(|(_, (i, f))| i != f)
            {
                bail!("Inconsistent concat {:?} inputs: {:?}", self, inputs);
            }
        }
        fact.shape.set(self.axis, self.offsets(inputs)?.pop().unwrap());
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut axes = AxesMapping::disconnected(inputs, outputs)?;
        for ax in 0..outputs[0].rank() {
            if ax != self.axis {
                for i in 0..inputs.len() {
                    axes = axes.linking((InOut::Out(0), ax), (InOut::In(i), ax))?;
                }
            }
        }
        Ok(axes)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        rule_if_some!(axis = change.transform_axis(self.axis));
        let op = TypedConcat { axis };
        Ok(Some(AxisChangeConsequence::new(model, node, Some(Box::new(op)), change)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if node.inputs.len() == 1 {
            return TypedModelPatch::shunt_one_op(model, node);
        }
        let inputs = model.node_input_facts(node.id)?;
        if let Some(pos) = inputs.iter().position(|f| f.shape.volume().is_zero()) {
            let mut inputs = node.inputs.clone();
            inputs.remove(pos);
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &inputs,
                self.clone(),
            )?));
        }
        Ok(None)
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        _node: &TypedNode,
        prefix: &str,
        inputs: &[OutletId],
        output_axis: usize,
        start: &TDim,
        end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        if output_axis != self.axis {
            return Ok(Some(patch.wire_node(prefix, self.clone(), inputs)?));
        }
        let facts =
            inputs.iter().map(|o| patch.outlet_fact(*o)).collect::<TractResult<TVec<_>>>()?;
        let offsets = self.offsets(&facts)?;
        std::mem::drop(facts);
        for (ix, (slice_start, slice_end)) in offsets.iter().tuple_windows().enumerate() {
            if (start.clone() - slice_start).prove_positive_or_zero()
                && (slice_end.clone() - end).prove_positive_or_zero()
            {
                return patch
                    .wire_node(
                        format!("{prefix}.slice-{output_axis}.{start}..{end}"),
                        Slice {
                            axis: output_axis,
                            start: (start.clone() - slice_start),
                            end: (end.clone() - slice_start),
                        },
                        &[inputs[ix]],
                    )
                    .map(Some);
            }
        }
        Ok(None)
    }
}

impl EvalOp for TypedConcat {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let result = Tensor::stack_tensors(self.axis, &inputs)?;
        Ok(tvec![result.into_tvalue()])
    }
}


================================================
FILE: core/src/ops/array/dyn_slice.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, PartialEq, Eq, Hash, new)]
pub struct DynSlice {
    pub axis: usize,
    pub len: TDim,
}

impl DynSlice {
    pub fn suffix(&self) -> String {
        format!("axis{}", self.axis)
    }
}

impl Op for DynSlice {
    fn name(&self) -> StaticName {
        "DynSlice".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {}", self.axis)])
    }

    op_as_typed_op!();
}

impl EvalOp for DynSlice {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let start = inputs[1]
            .cast_to::<TDim>()?
            .try_as_plain()?
            .to_scalar::<TDim>()?
            .eval(&session.resolved_symbols)
            .to_usize()?;
        let end = inputs[2]
            .cast_to::<TDim>()?
            .try_as_plain()?
            .to_scalar::<TDim>()?
            .eval(&session.resolved_symbols)
            .to_usize()?;
        ensure!(start <= end);
        if let Ok(len) = self.len.eval(&session.resolved_symbols).to_usize() {
            ensure!(start + len == end);
        }
        let slice = inputs[0].slice(self.axis, start, end)?;
        Ok(tvec!(slice.into()))
    }
}

impl TypedOp for DynSlice {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 3);
        let mut fact = inputs[0].without_value();
        fact.shape.set(self.axis, self.len.clone());
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        _outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural_for_rank(1, 1, inputs[0].rank())?
            .with_extra_input(1)?
            .with_extra_input(2)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        rule_if!(io != InOut::In(1) && io != InOut::In(2));
        rule_if_some!(axis = change.transform_axis(self.axis));
        if axis != self.axis {
            Ok(Some(AxisChangeConsequence::new(
                model,
                node,
                Some(Box::new(DynSlice { axis, ..self.clone() }) as _),
                change,
            )))
        } else {
            Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
        }
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let inputs = model.node_input_facts(node.id)?;
        rule_if_some!(start = &inputs[1].konst);
        rule_if_some!(end = &inputs[2].konst);
        let start = start.cast_to::<TDim>()?.try_as_plain()?.to_scalar::<TDim>()?.clone();
        let end = end.cast_to::<TDim>()?.try_as_plain()?.to_scalar::<TDim>()?.clone();

        Ok(Some(TypedModelPatch::replace_single_op(
            model,
            node,
            &[node.inputs[0]],
            crate::ops::array::Slice { axis: self.axis, start, end },
        )?))
    }

    as_op!();
}


================================================
FILE: core/src/ops/array/gather.rs
================================================
use crate::internal::*;
use crate::ops::einsum::block_quant_aware_input_shape;
use crate::ops::matmul::pack::OptSimpleMatMulPack;
use ndarray::*;
use tract_linalg::block_quant::BlockQuantStorage;
use tract_linalg::mmm::{MMMInputValue, PackedMatrixStorage};

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Gather {
    pub axis: usize,
    pub output_type: Option<DatumType>,
}

impl Op for Gather {
    fn name(&self) -> StaticName {
        "Gather".into()
    }

    op_as_typed_op!();
}

impl Gather {
    pub fn new(axis: usize) -> Gather {
        Gather { axis, output_type: None }
    }

    pub fn compute_output_shape<D: DimLike>(
        &self,
        input_shape: &[D],
        indices_shape: &[D],
    ) -> TractResult<TVec<D>> {
        ensure!(input_shape.len() > self.axis);
        let mut output_shape: TVec<D> = input_shape[..self.axis].into();
        output_shape.extend(indices_shape.iter().cloned());
        output_shape.extend(input_shape[self.axis + 1..].iter().cloned());
        Ok(output_shape)
    }

    fn eval_t<T: Datum>(&self, data: TValue, indices: &TValue) -> TractResult<Tensor> {
        let data_plain = data.try_as_plain()?;
        let data_view = unsafe { data_plain.to_array_view_unchecked::<T>() };
        let indices = indices.to_plain_array_view::<i64>()?;
        let output_shape = &*self.compute_output_shape(data.shape(), indices.shape())?;
        let mut output = unsafe { Tensor::uninitialized::<T>(output_shape)? };
        let mut output_plain = output.try_as_plain_mut()?;
        let mut output_view = output_plain.to_array_view_mut::<T>()?;

        let data_shape = data.shape();
        let data_axis = self.axis;

        let block_len = data_shape[data_axis + 1..].iter().product::<usize>();

        let can_block_copy = data_shape[..data_axis].iter().all(|&d| d == 1)
            && output_shape[..data_axis].iter().all(|&d| d == 1)
            && data_view.is_standard_layout()
            && output_view.is_standard_layout();

        if can_block_copy {
            let mut out_offset = 0;
            let input_slice = data_view.as_slice().unwrap();
            let output_slice = &mut output_view.as_slice_mut().unwrap();
            for idx_coords in indices.indexed_iter() {
                let index = *idx_coords.1;
                let axis_len = data_shape[data_axis] as i64;
                let resolved_index = if index < 0 { index + axis_len } else { index };
                let resolved_index = resolved_index as usize;

                let input_offset = resolved_index * block_len;

                output_slice[out_offset..out_offset + block_len]
                    .clone_from_slice(&input_slice[input_offset..input_offset + block_len]);
                out_offset += block_len;
            }
        } else {
            let ic_len = self.axis + 1 + output_shape.len() - (self.axis + indices.ndim());
            let mut icoords = vec![0; ic_len];
            let axis = self.axis;
            for coords in tract_ndarray::indices(output_shape) {
                let ocoords = coords.as_array_view();
                let ocoords = ocoords.as_slice().unwrap();

                let kcoords = &ocoords[self.axis..][..indices.ndim()];
                let k = indices[kcoords];
                let k = if k < 0 { k + data_view.shape()[self.axis] as i64 } else { k } as usize;
                icoords[0..axis].copy_from_slice(&ocoords[..self.axis]);
                icoords[self.axis] = k;
                icoords[self.axis + 1..].clone_from_slice(&ocoords[self.axis + indices.ndim()..]);
                output_view[ocoords] =
                    data_view.get(&*icoords).cloned().context("Invalid gather")?;
            }
            unsafe { output.set_datum_type(data.datum_type()) };
        }
        Ok(output)
    }

    fn eval_bq<F: Datum>(
        &self,
        data: &BlockQuantStorage,
        m: usize,
        k: usize,
        indices: &TValue,
    ) -> TractResult<Tensor> {
        ensure!(self.axis == 0);
        let data_shape = &[m, k];
        let output_shape = &*self.compute_output_shape(data_shape, indices.shape())?;
        let mut output = unsafe { Tensor::uninitialized::<F>(output_shape)? };
        let indices_plain = indices.try_as_plain()?;
        let indices_slice = indices_plain.as_slice::<i64>()?;
        let vector_len = k;
        let blob = data.value();

        let block_len = data.format().block_len();
        let block_bytes = data.format().block_bytes();
        if F::datum_type() == f16::datum_type() {
            let mut output_plain = output.try_as_plain_mut()?;
            let output_slice = output_plain.as_slice_mut::<f16>()?;
            for (pos, ix) in indices_slice.iter().enumerate() {
                let slice = &mut output_slice[pos * vector_len..][..vector_len];
                for i in (0..vector_len).step_by(block_len) {
                    let offset = k * *ix as usize + i;
                    let block_id = offset / block_len;
                    data.format().dequant_block_f16(
                        &blob[block_id * block_bytes..][..block_bytes],
                        &mut slice[i..i + block_len],
                    );
                }
            }
        } else {
            let mut output_plain = output.try_as_plain_mut()?;
            let output_slice = output_plain.as_slice_mut::<f32>()?;
            for (pos, ix) in indices_slice.iter().enumerate() {
                let slice = &mut output_slice[pos * vector_len..][..vector_len];
                for i in (0..vector_len).step_by(block_len) {
                    let offset = k * *ix as usize + i;
                    let block_id = offset / block_len;
                    data.format().dequant_block_f32(
                        &blob[block_id * block_bytes..][..block_bytes],
                        &mut slice[i..i + block_len],
                    );
                }
            }
        }
        Ok(output)
    }

    fn eval_input_store<F: Datum>(
        &self,
        data: &dyn MMMInputValue,
        indices: &TValue,
    ) -> TractResult<Tensor> {
        ensure!(self.axis == 0);
        let data_shape = &[data.mn(), data.k()];
        let output_shape = &*self.compute_output_shape(data_shape, indices.shape())?;
        let mut output = unsafe { Tensor::uninitialized::<F>(output_shape)? };
        let indices_plain = indices.try_as_plain()?;
        let indices_slice = indices_plain.as_slice::<i64>()?;
        let vector_len = data_shape[1];
        if F::datum_type() == f16::datum_type() {
            let mut output_plain = output.try_as_plain_mut()?;
            let output_slice = output_plain.as_slice_mut::<f16>()?;
            for (pos, m) in indices_slice.iter().enumerate() {
                let slice = &mut output_slice[pos * vector_len..][..vector_len];
                data.extract_at_mn_f16(*m as usize, slice)?;
            }
        } else {
            let mut output_plain = output.try_as_plain_mut()?;
            let output_slice = output_plain.as_slice_mut::<f32>()?;
            for (pos, m) in indices_slice.iter().enumerate() {
                let slice = &mut output_slice[pos * vector_len..][..vector_len];
                data.extract_at_mn_f32(*m as usize, slice)?;
            }
        }
        Ok(output)
    }
}

impl TypedOp for Gather {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        if let Some(dt) = self.output_type {
            ensure!(
                inputs[0].is_exotic() || inputs[0].datum_type == dt,
                "Inconsistent datum_type in Gather: attribute is {:?}, but inputs[0] is {:?}",
                dt,
                inputs[0].datum_type
            );
        } else {
            ensure!(
                inputs[0].is_plain(),
                "Gather applied to compressed data requires an explicit datum_type attribute for its output"
            );
        }
        ensure!(inputs[1].datum_type == i64::datum_type());
        if inputs[0].is_exotic() {
            let data_shape = block_quant_aware_input_shape(inputs[0])?;
            Ok(tvec!(
                self.output_type
                    .unwrap()
                    .fact(&*self.compute_output_shape(&data_shape, &inputs[1].shape)?)
            ))
        } else {
            Ok(tvec!(
                inputs[0]
                    .datum_type
                    .fact(&*self.compute_output_shape(&inputs[0].shape, &inputs[1].shape)?)
            ))
        }
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let (input_fact, indices_fact) = args_2!(model.node_input_facts(node.id)?);
        if let Some(indices) = indices_fact.konst.as_ref()
            && indices.rank() == 1
            && indices.len() == 1
            && input_fact.is_plain()
            && input_fact.datum_type.is_number()
        {
            let mut patch = TypedModelPatch::default();
            let mut wire = patch.tap_model(model, node.inputs[0])?;
            let index = indices.cast_to_scalar::<i64>()?;
            let index = if index < 0 {
                let data_fact = model.outlet_fact(node.inputs[0])?;
                data_fact.shape[self.axis].clone() + index.to_dim()
            } else {
                index.to_dim()
            };
            wire = patch.wire_node(
                format!("{}.slice", node.name),
                crate::ops::array::Slice { axis: self.axis, start: index.clone(), end: index + 1 },
                &[wire],
            )?[0];
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
        if input_fact.konst.is_some() {
            // look for a OptSimpleMatMulPack *sibling*
            if let Some(sibling) = model
                .outlet_successors(node.inputs[0])
                .iter()
                .find(|o| o.node != node.id && model.node(o.node).op_is::<OptSimpleMatMulPack>())
            {
                let mut patch = TypedModelPatch::default();
                let mut taps = patch.taps(model, &node.inputs)?;
                taps[0] = patch.tap_model(model, sibling.node.into())?;
                let wire = patch.wire_node(&node.name, self.clone(), &taps)?[0];
                patch.shunt_outside(model, node.id.into(), wire)?;
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }
}

impl EvalOp for Gather {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (data, indices) = args_2!(inputs);
        let result = if let Some(bqs) = data.storage_as::<BlockQuantStorage>() {
            let dt = self.output_type.unwrap();
            let m = data.shape()[data.rank() - 2];
            let k = *data.shape().last().unwrap();
            dispatch_floatlike!(Self::eval_bq(dt)(self, bqs, m, k, &indices))?
        } else if let Some(storage) = data.storage_as::<PackedMatrixStorage>()
            && storage.batch_shape().is_empty()
        {
            let dt = self.output_type.unwrap();
            let data_val = storage.value();
            dispatch_floatlike!(Self::eval_input_store(dt)(self, data_val, &indices))?
        } else {
            dispatch_datum!(Self::eval_t(data.datum_type())(self, data, &indices))?
        };
        Ok(tvec!(result.into_tvalue()))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_should_gather_scalar_index() {
        let data = Tensor::from(arr1(&[1i64, 2, 3]));
        let gatherer = Gather::new(0);
        for idx in 2..3 {
            let index = Tensor::from(arr0(idx));
            let outputs =
                gatherer.eval(tvec![data.clone().into_tvalue(), index.into_tvalue()]).unwrap();
            let output = &outputs[0];
            assert_eq!(output.shape().len(), 0);
            assert_eq!(*output.try_as_plain().unwrap().to_scalar::<i64>().unwrap(), idx + 1);
        }
    }
}


================================================
FILE: core/src/ops/array/gather_elements.rs
================================================
use crate::internal::*;
use ndarray::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct GatherElements {
    pub axis: usize,
}

impl Op for GatherElements {
    fn name(&self) -> StaticName {
        "GatherElements".into()
    }

    op_as_typed_op!();
}

impl GatherElements {
    unsafe fn eval_t<T: Datum>(
        &self,
        data: TValue,
        indices: &ArrayViewD<i64>,
    ) -> TractResult<TValue> {
        let data_plain = data.try_as_plain()?;
        let data_view = unsafe { data_plain.to_array_view_unchecked::<T>() };
        let output = ArrayD::<T>::from_shape_fn(indices.shape(), |mut coords| {
            let index = indices[&coords];
            coords[self.axis] =
                if index < 0 { index + data_view.shape()[self.axis] as i64 } else { index }
                    as usize;
            data_view[coords].clone()
        });
        let mut tensor = output.into_tensor();
        unsafe { tensor.set_datum_type(data.datum_type()) };
        Ok(tensor.into_tvalue())
    }
}

impl TypedOp for GatherElements {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(&*inputs[1].shape)))
    }
}

impl EvalOp for GatherElements {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (data, indices) = args_2!(inputs);
        let indices = indices.cast_to::<i64>()?;
        let indices = indices.to_plain_array_view::<i64>()?;
        unsafe {
            Ok(tvec!(dispatch_datum_by_size!(Self::eval_t(data.datum_type())(
                self, data, &indices
            ))?))
        }
    }
}


================================================
FILE: core/src/ops/array/gather_nd.rs
================================================
use crate::internal::*;
use tract_ndarray::prelude::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct GatherNd {
    pub batch_dims: usize,
}

impl GatherNd {
    fn compute_shape<D: DimLike>(
        &self,
        data_shape: &[D],
        indices_shape: &[D],
    ) -> TractResult<TVec<D>> {
        let mut shape: TVec<D> = indices_shape.into();
        let n = shape.pop().unwrap().to_usize()?;
        shape.extend(data_shape[n + self.batch_dims..].iter().cloned());
        Ok(shape)
    }

    unsafe fn eval_t<T: Datum>(
        &self,
        output: &mut Tensor,
        data: &Tensor,
        indices: &ArrayViewD<i32>,
    ) -> TractResult<()> {
        let batch_dims = self.batch_dims;
        assert_eq!(output.shape()[..batch_dims], data.shape()[..batch_dims]);
        assert_eq!(output.shape()[..batch_dims], indices.shape()[..batch_dims]);
        let batch_size = data.shape().iter().take(batch_dims).product();
        let n = indices.shape()[indices.ndim() - 1];

        let remaining = indices.shape().iter().skip(batch_dims).rev().skip(1).product();
        let indices_shape_op = tvec!(batch_size, remaining, n);
        let reshaped_indices: ArrayViewD<i32> =
            indices.view().into_shape_with_order(&*indices_shape_op).unwrap();

        let mut data_shape_op: TVec<usize> =
            data.shape().iter().skip(batch_dims).copied().collect();
        data_shape_op.insert(0, batch_size);
        let data_plain = data.try_as_plain()?;
        let reshaped_data = unsafe {
            data_plain
                .to_array_view_unchecked::<T>()
                .into_shape_with_order(&*data_shape_op)
                .unwrap()
        };

        let mut output_shape_op: TVec<usize> =
            data.shape().iter().skip(n + batch_dims).copied().collect();
        output_shape_op.insert(0, batch_size * remaining);
        let mut output_plain = output.try_as_plain_mut()?;
        let mut output = unsafe {
            output_plain
                .to_array_view_mut_unchecked::<T>()
                .into_shape_with_order(&*output_shape_op)
                .unwrap()
        };

        for b in 0..batch_size {
            let mut i = reshaped_data.view();
            i.index_axis_inplace(Axis(0), b);
            let mut coords = reshaped_indices.view();
            coords.index_axis_inplace(Axis(0), b);

            for ix in 0..remaining {
                let mut coords = coords.view();
                coords.index_axis_inplace(Axis(0), ix);

                let mut i = i.view();
                for x in coords {
                    i.index_axis_inplace(Axis(0), *x as usize);
                }

                let mut o = output.view_mut();
                o.index_axis_inplace(Axis(0), b * remaining + ix);
                o.assign(&i);
            }
        }
        Ok(())
    }
}

impl Op for GatherNd {
    fn name(&self) -> StaticName {
        "GatherNd".into()
    }

    op_as_typed_op!();
}

impl EvalOp for GatherNd {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (data, indices) = args_2!(inputs);
        let shape = self.compute_shape(data.shape(), indices.shape())?;
        let indices = indices.cast_to::<i32>()?;
        let indices = indices.to_plain_array_view::<i32>()?;
        unsafe {
            let mut output = Tensor::uninitialized_dt(data.datum_type(), &shape)?;
            dispatch_datum_by_size!(Self::eval_t(data.datum_type())(
                self,
                &mut output,
                &data,
                &indices
            ))?;
            Ok(tvec!(output.into_tvalue()))
        }
    }
}

impl TypedOp for GatherNd {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let shape = self.compute_shape(&inputs[0].shape.to_tvec(), &inputs[1].shape.to_tvec())?;
        Ok(tvec!(inputs[0].datum_type.fact(&shape)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(indices) = &model.outlet_fact(node.inputs[1])?.konst
            && indices.rank() == 2
            && indices.shape()[0] == 1
        {
            let mut patch = TypedModelPatch::default();
            let mut wire = patch.tap_model(model, node.inputs[0])?;
            for (axis, &i) in
                indices.cast_to::<i32>()?.try_as_plain()?.as_slice::<i32>()?.iter().enumerate()
            {
                wire = patch.wire_node(
                    format!("{}-slice-axis-{}", node.name, axis),
                    crate::ops::array::Slice::new(axis, i as usize, (i + 1) as usize),
                    &[wire],
                )?[0];
            }
            for i in (0..indices.shape()[1]).rev() {
                wire = patch.wire_node(
                    format!("{}-remove_axis_{}", node.name, i),
                    crate::ops::change_axes::AxisOp::Rm(i),
                    &[wire],
                )?[0];
            }
            wire = patch.wire_node(
                format!("{}-add_axis", node.name),
                crate::ops::change_axes::AxisOp::Add(0),
                &[wire],
            )?[0];
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
        Ok(None)
    }
}


================================================
FILE: core/src/ops/array/mod.rs
================================================
/// # Operators on array and shapes
mod broadcast;
pub(crate) mod concat;
pub mod dyn_slice;
mod gather;
mod gather_elements;
mod gather_nd;
mod one_hot;
mod pad;
mod range;
mod reshape;
mod scatter_elements;
mod scatter_nd;
mod slice;
pub mod strided_slice;
mod tile;
mod topk;
mod trilu;

pub use self::broadcast::MultiBroadcastTo;
pub use self::concat::TypedConcat;
pub use self::dyn_slice::DynSlice;
pub use self::gather::Gather;
pub use self::gather_elements::GatherElements;
pub use self::gather_nd::GatherNd;
pub use self::one_hot::OneHot;
pub use self::pad::{Pad, PadMode};
pub use self::range::Range;
pub use self::reshape::FiniteReshape;
pub use self::scatter_elements::ScatterElements;
pub use self::scatter_nd::{ScatterNd, ScatterReduction};
pub use self::slice::Slice;
pub use self::strided_slice::StridedSlice;
pub use self::tile::{DynTile, Tile};
pub use self::topk::Topk;
pub use self::trilu::Trilu;


================================================
FILE: core/src/ops/array/one_hot.rs
================================================
use tract_data::itertools::Itertools;

use crate::internal::*;

#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct OneHot {
    pub axis: usize,
    pub dim: usize,
    pub off: Arc<Tensor>,
    pub on: Arc<Tensor>,
}

impl Op for OneHot {
    fn name(&self) -> StaticName {
        "Onehot".into()
    }

    op_as_typed_op!();
}

impl TypedOp for OneHot {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut shape = inputs[0].shape.to_tvec();
        shape.insert(self.axis, self.dim.to_dim());
        Ok(tvec!(self.off.datum_type().fact(&*shape)))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let axes = (0..inputs[0].rank())
            .zip('a'..)
            .map(|(i, repr)| {
                Axis::new(repr, inputs.len(), outputs.len())
                    .input(0, i)
                    .output(0, i + (i >= self.axis) as usize)
            })
            .chain(std::iter::once(
                Axis::new('Z', inputs.len(), outputs.len()).output(0, self.axis),
            ))
            .collect_vec();
        AxesMapping::new(inputs.len(), outputs.len(), axes)
    }

    as_op!();
}

impl EvalOp for OneHot {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let mut shape: TVec<usize> = input.shape().into();
        shape.insert(self.axis, self.dim);
        unsafe {
            let mut output = self.off.broadcast_scalar_to_shape(&shape)?;
            dispatch_datum_by_size!(Self::eval_t(self.off.datum_type())(
                self,
                &input,
                &mut output
            ))?;
            Ok(tvec!(output.into_tvalue()))
        }
    }
}

impl OneHot {
    unsafe fn eval_t<T: Datum + Clone>(
        &self,
        input: &Tensor,
        output: &mut Tensor,
    ) -> TractResult<()> {
        let on_plain = self.on.try_as_plain()?;
        let on = unsafe { on_plain.to_scalar_unchecked::<T>() };
        let mut shape: TVec<usize> = input.shape().into();
        shape.insert(self.axis, self.dim);
        let mut output_plain = output.try_as_plain_mut()?;
        let mut array = unsafe { output_plain.to_array_view_mut_unchecked::<T>() };
        let input = input.cast_to::<i32>()?;
        let input = input.to_plain_array_view::<i32>()?;
        for icoord in tract_ndarray::indices_of(&input) {
            use tract_ndarray::Dimension;
            let mut ocoord: Vec<usize> = icoord.slice().into();
            let coord = input[&icoord];
            let coord = if coord < 0 { coord + self.dim as i32 } else { coord } as usize;
            ocoord.insert(self.axis, coord);
            array[&*ocoord] = on.clone();
        }
        Ok(())
    }
}


================================================
FILE: core/src/ops/array/pad.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PadMode {
    Constant(Arc<Tensor>),
    Reflect,
    Edge,
}

impl Default for PadMode {
    fn default() -> PadMode {
        PadMode::Constant(Arc::new(0.0f32.into()))
    }
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Pad {
    pub pads: Vec<(usize, usize)>,
    pub mode: PadMode,
}

impl Pad {
    fn eval_t<T>(&self, input_tensor: TValue) -> TractResult<TValue>
    where
        T: Copy + Datum,
    {
        use tract_ndarray::*;
        let input = input_tensor.to_plain_array_view::<T>()?;
        let output_shape: Vec<usize> =
            input.shape().iter().zip(self.pads.iter()).map(|(&d, &(a, b))| d + a + b).collect();
        let element = match &self.mode {
            PadMode::Constant(f) => f.cast_to_scalar::<T>()?,
            _ => T::default(),
        };
        let mut output = ArrayD::<T>::from_elem(output_shape, element);
        let slice_spec: Vec<SliceInfoElem> = self
            .pads
            .iter()
            .map(|&(a, b)| SliceInfoElem::Slice {
                start: a as isize,
                end: if b != 0 { Some(-(b as isize)) } else { None },
                step: 1,
            })
            .collect();
        let slice_info = SliceInfo::<_, IxDyn, IxDyn>::try_from(slice_spec).unwrap();
        output.slice_mut(slice_info.as_ref()).assign(&input);
        if self.mode == PadMode::Reflect || self.mode == PadMode::Edge {
            for (ax, &(bef, aft)) in self.pads.iter().enumerate() {
                let axis = Axis(ax);
                let dim = output.shape()[ax];
                {
                    let (mut pad, data) = output.view_mut().split_at(axis, bef);
                    for i in 0..bef {
                        let mut target = pad.slice_axis_mut(axis, Slice::from(i..i + 1));
                        let source_slice = match self.mode {
                            PadMode::Edge => 0,
                            PadMode::Reflect => bef - i,
                            _ => panic!(),
                        };
                        let source =
                            data.slice_axis(axis, Slice::from(source_slice..source_slice + 1));
                        target.assign(&source);
                    }
                }
                {
                    let (data, mut pad) = output.view_mut().split_at(axis, dim - aft);
                    for i in 0..aft {
                        let mut target = pad.slice_axis_mut(axis, Slice::from(i..i + 1));
                        let source_slice = match self.mode {
                            PadMode::Edge => dim - aft - 1,
                            PadMode::Reflect => dim - aft - 2 - i,
                            _ => panic!(),
                        };
                        let source =
                            data.slice_axis(axis, Slice::from(source_slice..source_slice + 1));
                        target.assign(&source);
                    }
                }
            }
        }
        let mut output = output.into_tensor();
        unsafe { output.set_datum_type(input_tensor.datum_type()) }
        Ok(output.into_tvalue())
    }
}

impl Op for Pad {
    fn name(&self) -> StaticName {
        "Pad".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("Mode: {:?}, pads: {:?})", self.mode, self.pads,)])
    }

    op_as_typed_op!();
}

impl EvalOp for Pad {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        Ok(tvec!(dispatch_numbers!(Self::eval_t(input.datum_type())(self, input))?))
    }
}

impl TypedOp for Pad {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].without_value();
        if self.pads.len() != fact.rank() {
            bail!("Inconsistent pad: input of rank {}, pads are: {:?}", fact.rank(), self.pads);
        }
        for (ix, (b, e)) in self.pads.iter().enumerate() {
            fact.shape.set(ix, fact.shape[ix].clone() + *b + *e);
        }
        Ok(tvec!(fact))
    }

    fn input_roi(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TVec<Option<TDim>>>> {
        let output_fact = model.outlet_fact(OutletId::new(node.id, 0))?;
        let Some(roi) = &output_fact.region_of_interest else { return Ok(None) };
        // For each padded axis, substitute 🎯axis → 🎯axis - before
        let mut input_roi = roi.clone();
        for (axis, &(before, _)) in self.pads.iter().enumerate() {
            if before == 0 {
                continue;
            }
            if let Some(sym) = input_roi
                .symbols()
                .into_iter()
                .find(|s| crate::ops::logic::sym_to_coord_axis(s) == Some(axis))
            {
                let shifted = TDim::Sym(sym.clone()) - TDim::Val(before as i64);
                input_roi = input_roi.substitute(&sym, &shifted).unwrap_or(input_roi);
            }
        }
        Ok(Some(tvec![Some(input_roi)]))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut result = AxesMapping::disconnected(inputs, outputs)?;
        for (ix, pads) in self.pads.iter().enumerate() {
            if pads == &(0, 0) {
                result = result.linking((InOut::In(0), ix), (InOut::Out(0), ix))?;
            }
        }
        Ok(result)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let mut new_op = self.clone();
        if let (InOut::In(0), AxisOp::Rm(ix)) = (io, change)
            && new_op.pads.remove(*ix) == (0, 0)
        {
            return Ok(Some(AxisChangeConsequence::new(
                model,
                node,
                Some(Box::new(new_op)),
                change,
            )));
        }
        if let (InOut::In(0), AxisOp::Add(ix)) = (io, change) {
            new_op.pads.insert(*ix, (0, 0));
            return Ok(Some(AxisChangeConsequence::new(
                model,
                node,
                Some(Box::new(new_op)),
                change,
            )));
        }
        Ok(None)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.pads.iter().all(|p| p.0 == 0 && p.1 == 0) {
            TypedModelPatch::shunt_one_op(model, node)
        } else {
            Ok(None)
        }
    }
}


================================================
FILE: core/src/ops/array/range.rs
================================================
use crate::ops::cast::Cast;
use tract_num_traits::AsPrimitive;
use tract_num_traits::Zero;

use crate::internal::*;

use super::Slice;

#[derive(Debug, Default, Clone, new, Hash, PartialEq, Eq)]
pub struct Range {
    len: TDim,
}

impl Op for Range {
    fn name(&self) -> StaticName {
        "Range".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Range {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (start, end, step) = args_3!(inputs);
        Ok(tvec!(self.make(&start, &end, &step, &session.resolved_symbols)?.into_tvalue()))
    }
}

impl Range {
    fn make_t<T: Datum + for<'a> std::ops::Add<&'a T, Output = T>>(
        start: &Tensor,
        step: &Tensor,
        len: usize,
    ) -> TractResult<Tensor> {
        unsafe {
            let mut result = Tensor::uninitialized::<T>(&[len])?;
            let mut v = start.try_as_plain()?.to_scalar::<T>()?.clone();
            let step = step.try_as_plain()?.to_scalar::<T>()?;
            {
                let mut result_plain = result.try_as_plain_mut()?;
                for i in 0..len {
                    result_plain.as_slice_mut_unchecked::<T>()[i] = v.clone();
                    v = v + step;
                }
            }
            Ok(result)
        }
    }

    fn make(
        &self,
        start: &Tensor,
        end: &Tensor,
        step: &Tensor,
        values: &SymbolValues,
    ) -> TractResult<Tensor> {
        if start.datum_type() == TDim::datum_type() {
            let start = start.try_as_plain()?.to_scalar::<TDim>()?.eval(values).to_i64()?;
            let step = step.try_as_plain()?.to_scalar::<TDim>()?.eval(values).to_i64()?;
            let len = {
                let end = end.try_as_plain()?.to_scalar::<TDim>()?.eval(values).to_i64()?;
                #[allow(clippy::cast_abs_to_unsigned)]
                ((end - start).abs() as usize).divceil(step.abs() as usize)
            };
            Self::make_t::<i64>(&tensor0(start), &tensor0(step), len)
        } else {
            let len = dispatch_numbers!(Self::len_for_numbers(start.datum_type())(
                self, start, end, step
            ))?;
            dispatch_numbers!(Self::make_t(start.datum_type())(start, step, len))
        }
    }

    fn len_for_numbers<T: Datum + AsPrimitive<f64>>(
        &self,
        start: &Tensor,
        end: &Tensor,
        step: &Tensor,
    ) -> TractResult<usize> {
        let start = start.try_as_plain()?.to_scalar::<T>()?;
        let end = end.try_as_plain()?.to_scalar::<T>()?;
        let step = step.try_as_plain()?.to_scalar::<T>()?;
        Ok(((end.as_() - start.as_()) / (step.as_())).ceil() as usize)
    }
}

impl TypedOp for Range {
    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        rule_if_some!(succ = model.single_succ(node.id)?);
        rule_if_some!(slice = succ.op_as::<Slice>());
        rule_if!(slice.start.is_zero());
        rule_if!(slice.end.is_zero());

        let mut patch = TypedModelPatch::default();
        let mut wire = patch.tap_model(model, node.inputs[0])?;
        if model.outlet_fact(node.inputs[0])?.datum_type.is_tdim() {
            wire = patch.wire_node(
                format!("{}.cast-tdim", node.name),
                Cast { to: DatumType::I64 },
                &[wire],
            )?[0];
        }
        let wire = patch.wire_node(&node.name, AxisOp::Add(0), &[wire])?;
        patch.shunt_outside(model, succ.id.into(), wire[0])?;
        Ok(Some(patch))
    }

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let [start, end, step] = inputs else {
            bail!("Expects three inputs");
        };
        ensure!(start.datum_type() == end.datum_type());
        ensure!(start.datum_type() == step.datum_type());
        ensure!(start.shape.volume().is_one());
        ensure!(end.shape.volume().is_one());
        ensure!(step.shape.volume().is_one());
        if let (Some(start), Some(end), Some(step)) = (&start.konst, &end.konst, &step.konst) {
            if start.datum_type() == TDim::datum_type() {
                let start_tdim = start.try_as_plain()?.to_scalar::<TDim>()?.clone();
                let end_tdim = end.try_as_plain()?.to_scalar::<TDim>()?;
                let step = step.cast_to_scalar::<i64>()?;
                let len = if step < 0 {
                    (start_tdim.clone() - end_tdim).divceil(-step as usize)
                } else {
                    (end_tdim.clone() - start_tdim.clone()).divceil(step as usize)
                };
                let mut fact = DatumType::I64.fact([len]);
                if let Some(scope) = start_tdim.find_scope().or_else(|| end_tdim.find_scope()) {
                    let x0 = TDim::Sym(scope.coord_sym(0));
                    let term = if step == 1 { x0 } else { TDim::MulInt(step, Box::new(x0)) };
                    fact.uniform_tdim = Some((start_tdim + term).reduce());
                }
                Ok(tvec!(fact))
            } else {
                let len = dispatch_numbers!(Self::len_for_numbers(start.datum_type())(
                    self, start, end, step
                ))?
                .to_dim();
                Ok(tvec!(start.datum_type().fact([len])))
            }
        } else {
            let mut fact = start.datum_type.fact(std::slice::from_ref(&self.len));
            if let (Some(s), Some(k)) = (&start.uniform_tdim, &step.uniform_tdim) {
                if let Some(scope) = self.len.find_scope() {
                    let x0 = TDim::Sym(scope.coord_sym(0));
                    let term = match k {
                        TDim::Val(1) => x0,
                        TDim::Val(v) => TDim::MulInt(*v, Box::new(x0)),
                        other => TDim::Mul(vec![other.clone(), x0]),
                    };
                    fact.uniform_tdim = Some((s.clone() + term).reduce());
                }
            }
            Ok(tvec!(fact))
        }
    }

    as_op!();
}


================================================
FILE: core/src/ops/array/reshape.rs
================================================
use crate::internal::*;
use tract_itertools::Itertools;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct FiniteReshape {
    pub shape: TVec<usize>,
}

impl Op for FiniteReshape {
    fn name(&self) -> StaticName {
        "Reshape".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("to shape: {}", self.shape.iter().join(","))])
    }

    op_as_typed_op!();
}

impl EvalOp for FiniteReshape {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let mut tensor = input.into_tensor();
        unsafe {
            tensor.set_shape_unchecked(&self.shape);
        }
        Ok(tvec!(tensor.into_tvalue()))
    }
}

impl TypedOp for FiniteReshape {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(&self.shape)))
    }

    as_op!();
}


================================================
FILE: core/src/ops/array/scatter_elements.rs
================================================
use super::scatter_nd::ScatterReduction;
use crate::internal::*;
use ndarray::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct ScatterElements {
    pub axis: usize,
    pub reduction: ScatterReduction,
}

impl Op for ScatterElements {
    fn name(&self) -> StaticName {
        "ScatterElements".into()
    }

    op_as_typed_op!();
}

impl ScatterElements {
    unsafe fn eval_t<T: Datum>(
        data: TValue,
        indices: &ArrayViewD<i64>,
        updates: TValue,
        axis: usize,
    ) -> TractResult<TValue> {
        let mut data = unsafe { data.into_tensor().into_array_unchecked::<T>() };
        let updates_plain = updates.try_as_plain()?;
        let updates_view = unsafe { updates_plain.to_array_view_unchecked::<T>() };
        for (mut coords, value) in updates_view.indexed_iter() {
            let index = indices[&coords];
            coords[axis] =
                if index < 0 { index + data.shape()[axis] as i64 } else { index } as usize;
            data[coords] = value.clone()
        }
        let mut tensor = data.into_tensor();
        unsafe { tensor.set_datum_type(updates.datum_type()) };
        Ok(tensor.into_tvalue())
    }

    unsafe fn eval_t_reduce<T: Datum + PartialOrd + std::ops::AddAssign + std::ops::MulAssign>(
        data: TValue,
        indices: &ArrayViewD<i64>,
        updates: TValue,
        axis: usize,
        reduction: ScatterReduction,
    ) -> TractResult<TValue> {
        let mut data = unsafe { data.into_tensor().into_array_unchecked::<T>() };
        let updates_plain = updates.try_as_plain()?;
        let updates_view = unsafe { updates_plain.to_array_view_unchecked::<T>() };
        for (mut coords, value) in updates_view.indexed_iter() {
            let index = indices[&coords];
            coords[axis] =
                if index < 0 { index + data.shape()[axis] as i64 } else { index } as usize;
            let d = &mut data[coords];
            match reduction {
                ScatterReduction::Add => *d += value.clone(),
                ScatterReduction::Mul => *d *= value.clone(),
                ScatterReduction::Min => {
                    if value < d {
                        *d = value.clone()
                    }
                }
                ScatterReduction::Max => {
                    if value > d {
                        *d = value.clone()
                    }
                }
                ScatterReduction::None => unreachable!(),
            }
        }
        let mut tensor = data.into_tensor();
        unsafe { tensor.set_datum_type(updates.datum_type()) };
        Ok(tensor.into_tvalue())
    }
}

impl TypedOp for ScatterElements {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(inputs[0].shape.clone())))
    }
}

impl EvalOp for ScatterElements {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (data, indices, updates) = args_3!(inputs);
        let indices = indices.cast_to::<i64>()?;
        let indices = indices.to_plain_array_view::<i64>()?;
        if data.datum_type() != updates.datum_type() {
            bail!(
                "Data and update must be of the same type, got {:?} and {:?}",
                data.datum_type(),
                updates.datum_type()
            );
        }
        unsafe {
            match self.reduction {
                ScatterReduction::None => {
                    Ok(tvec!(dispatch_datum_by_size!(Self::eval_t(data.datum_type())(
                        data, &indices, updates, self.axis
                    ))?))
                }
                reduction => Ok(tvec!(dispatch_numbers!(Self::eval_t_reduce(data.datum_type())(
                    data, &indices, updates, self.axis, reduction
                ))?)),
            }
        }
    }
}


================================================
FILE: core/src/ops/array/scatter_nd.rs
================================================
use crate::internal::*;
use ndarray::*;

#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Default)]
pub enum ScatterReduction {
    #[default]
    None,
    Add,
    Mul,
    Min,
    Max,
}

impl ScatterReduction {
    pub fn as_str(&self) -> &'static str {
        match self {
            ScatterReduction::None => "none",
            ScatterReduction::Add => "add",
            ScatterReduction::Mul => "mul",
            ScatterReduction::Min => "min",
            ScatterReduction::Max => "max",
        }
    }

    pub fn parse(s: &str) -> TractResult<Self> {
        Ok(match s {
            "none" => ScatterReduction::None,
            "add" => ScatterReduction::Add,
            "mul" => ScatterReduction::Mul,
            "min" => ScatterReduction::Min,
            "max" => ScatterReduction::Max,
            s => bail!("Unknown scatter reduction: {s}"),
        })
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct ScatterNd {
    pub reduction: ScatterReduction,
}

impl Op for ScatterNd {
    fn name(&self) -> StaticName {
        "ScatterNd".into()
    }

    op_as_typed_op!();
}

impl ScatterNd {
    unsafe fn eval_t<T: Datum>(
        data: TValue,
        indices: &ArrayViewD<i64>,
        updates: TValue,
    ) -> TractResult<TValue> {
        let mut data = unsafe { data.into_tensor().into_array_unchecked::<T>() };
        let updates_plain = updates.try_as_plain()?;
        let updates_view = unsafe { updates_plain.to_array_view_unchecked::<T>() };
        for coords in tract_ndarray::indices(&indices.shape()[..indices.ndim() - 1]) {
            let mut indices_into_data = indices.view();
            let mut updates = updates_view.view();
            for x in coords.slice() {
                indices_into_data.index_axis_inplace(Axis(0), *x);
                updates.index_axis_inplace(Axis(0), *x);
            }
            let mut data = data.view_mut();
            for x in indices_into_data {
                data.index_axis_inplace(Axis(0), *x as usize);
            }
            data.assign(&updates)
        }
        let mut tensor = data.into_tensor();
        unsafe { tensor.set_datum_type(updates.datum_type()) };
        Ok(tensor.into_tvalue())
    }

    unsafe fn eval_t_reduce<T: Datum + PartialOrd + std::ops::AddAssign + std::ops::MulAssign>(
        data: TValue,
        indices: &ArrayViewD<i64>,
        updates: TValue,
        reduction: ScatterReduction,
    ) -> TractResult<TValue> {
        let mut data = unsafe { data.into_tensor().into_array_unchecked::<T>() };
        let updates_plain = updates.try_as_plain()?;
        let updates_view = unsafe { updates_plain.to_array_view_unchecked::<T>() };
        for coords in tract_ndarray::indices(&indices.shape()[..indices.ndim() - 1]) {
            let mut indices_into_data = indices.view();
            let mut updates = updates_view.view();
            for x in coords.slice() {
                indices_into_data.index_axis_inplace(Axis(0), *x);
                updates.index_axis_inplace(Axis(0), *x);
            }
            let mut data = data.view_mut();
            for x in indices_into_data {
                data.index_axis_inplace(Axis(0), *x as usize);
            }
            Zip::from(&mut data).and(&updates).for_each(|d, u| match reduction {
                ScatterReduction::Add => *d += u.clone(),
                ScatterReduction::Mul => *d *= u.clone(),
                ScatterReduction::Min => {
                    if u < d {
                        *d = u.clone()
                    }
                }
                ScatterReduction::Max => {
                    if u > d {
                        *d = u.clone()
                    }
                }
                ScatterReduction::None => unreachable!(),
            });
        }
        let mut tensor = data.into_tensor();
        unsafe { tensor.set_datum_type(updates.datum_type()) };
        Ok(tensor.into_tvalue())
    }
}

impl TypedOp for ScatterNd {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(inputs[0].shape.to_tvec())))
    }
}

impl EvalOp for ScatterNd {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (data, indices, updates) = args_3!(inputs);
        let indices = indices.cast_to::<i64>()?;
        let indices = indices.to_plain_array_view::<i64>()?;
        if data.datum_type() != updates.datum_type() {
            bail!(
                "Data and update must be of the same type, got {:?} and {:?}",
                data.datum_type(),
                updates.datum_type()
            );
        }
        unsafe {
            match self.reduction {
                ScatterReduction::None => {
                    Ok(tvec!(dispatch_datum_by_size!(Self::eval_t(data.datum_type())(
                        data, &indices, updates
                    ))?))
                }
                reduction => Ok(tvec!(dispatch_numbers!(Self::eval_t_reduce(data.datum_type())(
                    data, &indices, updates, reduction
                ))?)),
            }
        }
    }
}


================================================
FILE: core/src/ops/array/slice.rs
================================================
use crate::internal::*;
use crate::num_traits::Zero;

#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
pub struct Slice {
    pub axis: usize,
    pub start: TDim,
    pub end: TDim,
}

impl Slice {
    pub fn new(axis: usize, start: impl ToDim, end: impl ToDim) -> Slice {
        Slice { axis, start: start.to_dim(), end: end.to_dim() }
    }

    pub fn suffix(&self, name: &str) -> String {
        format!("{}.axis{}_{}_{}", name, self.axis, self.start, self.end)
    }

    pub fn declutter_slice_after_slice(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let prec = model.node(node.inputs[0].node);
        if let Some(other) = prec.op_as::<Slice>()
            && other.axis == self.axis
        {
            return TypedModelPatch::replace_single_op(
                model,
                node,
                &prec.inputs,
                Slice {
                    axis: self.axis,
                    start: self.start.clone() + &other.start,
                    end: self.end.clone() + &other.start,
                },
            )
            .map(Some);
        }
        Ok(None)
    }
}

impl Op for Slice {
    fn name(&self) -> StaticName {
        "Slice".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {}, {}..{}", self.axis, self.start, self.end)])
    }

    op_as_typed_op!();
}

impl EvalOp for Slice {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let start = self.start.eval(&session.resolved_symbols).to_usize()?;
        let end = self.end.eval(&session.resolved_symbols).to_usize()?;
        eval_slice(&input, self.axis, start, end)
    }
}

fn eval_slice(input: &Tensor, axis: usize, start: usize, end: usize) -> TractResult<TVec<TValue>> {
    if end > input.shape()[axis] || start > end {
        bail!("Invalid range {}..{} for slicing {:?} on axis {}", start, end, input, axis);
    }
    unsafe {
        let mut shape: TVec<_> = input.shape().into();
        shape[axis] = end - start;
        let mut tensor = Tensor::uninitialized_dt(input.datum_type(), &shape)?;
        tensor.assign_slice_unchecked(.., input, start..end, axis);
        Ok(tvec!(tensor.into_tvalue()))
    }
}

impl TypedOp for Slice {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        anyhow::ensure!(inputs.len() == 1, "Slice has one single input");
        if let (Ok(start), Ok(end), Ok(len)) =
            (self.start.to_usize(), self.end.to_usize(), inputs[0].shape[self.axis].to_usize())
        {
            ensure!(start <= end);
            ensure!(end <= len);
        }
        let mut fact = inputs[0].without_value();
        fact.shape.set(self.axis, (self.end.clone() - &self.start).to_dim());
        Ok(tvec!(fact))
    }

    fn input_roi(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TVec<Option<TDim>>>> {
        let output_fact = model.outlet_fact(OutletId::new(node.id, 0))?;
        let Some(roi) = &output_fact.region_of_interest else { return Ok(None) };
        if self.start.is_zero() {
            return Ok(Some(tvec![Some(roi.clone())]));
        }
        // Remap: output 🎯axis = input 🎯axis - start, so substitute 🎯axis → 🎯axis + start
        if let Some(sym) = roi
            .symbols()
            .into_iter()
            .find(|s| crate::ops::logic::sym_to_coord_axis(s) == Some(self.axis))
        {
            let shifted = TDim::Sym(sym.clone()) + self.start.clone();
            if let Ok(input_roi) = roi.substitute(&sym, &shifted) {
                return Ok(Some(tvec![Some(input_roi)]));
            }
        }
        // ROI doesn't mention the sliced axis — pass through unchanged
        Ok(Some(tvec![Some(roi.clone())]))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut mapping = AxesMapping::disconnected(inputs, outputs)?;
        for (axis, repr) in (0..inputs[0].rank()).zip('a'..) {
            if self.axis != axis {
                mapping = mapping
                    .renaming((InOut::In(0), axis), repr)?
                    .linking(repr, (InOut::Out(0), axis))?;
            }
        }
        Ok(mapping)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        if let Some(axis) = change.transform_axis(self.axis) {
            if axis != self.axis {
                Ok(Some(AxisChangeConsequence::new(
                    model,
                    node,
                    Some(Box::new(Slice { axis, ..self.clone() }) as _),
                    change,
                )))
            } else {
                Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
            }
        } else {
            Ok(None)
        }
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.start.is_zero() && (self.end == model.outlet_fact(node.inputs[0])?.shape[self.axis])
        {
            TypedModelPatch::shunt_one_op(model, node)
        } else if let Some(p) = self.declutter_slice_after_slice(model, node)? {
            Ok(Some(p))
        } else {
            Ok(None)
        }
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let op =
            Slice { axis: self.axis, start: self.start.eval(values), end: self.end.eval(values) };
        let inputs = node.inputs.iter().map(|i| mapping[i]).collect::<TVec<_>>();
        target.wire_node(&node.name, op, &inputs)
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        node: &TypedNode,
        _prefix: &str,
        inputs: &[OutletId],
        _output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        patch.wire_node(&node.name, &node.op, inputs).map(Some)
    }

    as_op!();
}


================================================
FILE: core/src/ops/array/strided_slice.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct StridedSlice {
    pub optional_axes_input: Option<usize>,
    pub optional_steps_input: Option<usize>,
    pub begin_mask: i64,
    pub end_mask: i64,
    pub shrink_axis_mask: i64,
}

#[derive(Debug, Clone, PartialEq)]
pub struct Dim {
    // position of the first element to return
    pub begin: TDim,
    // position of the first element not to return
    pub end: TDim,
    pub stride: i32,
    pub shrink: bool,
}

impl Dim {
    pub fn soft_len(&self) -> TractResult<TDim> {
        if let Ok(len) = (self.end.clone() - &self.begin).to_isize() {
            Ok((((self.stride.abs() - 1) + len.abs() as i32) / self.stride.abs()).to_dim())
        } else if self.stride == 1 {
            Ok(self.end.clone() - &self.begin)
        } else {
            bail!("Streaming dimensions with strides are not supported for now")
        }
    }
}

impl StridedSlice {
    fn must_shrink(&self, ix: usize) -> bool {
        self.shrink_axis_mask & (1 << ix) != 0
    }
    fn ignore_begin(&self, ix: usize) -> bool {
        self.begin_mask & (1 << ix) != 0
    }
    fn ignore_end(&self, ix: usize) -> bool {
        self.end_mask & (1 << ix) != 0
    }
    pub fn prepare_one_dim(
        &self,
        ix: usize,
        dim: &TDim,
        begin: &Tensor,
        end: &Tensor,
        strides: &[i32],
    ) -> TractResult<Dim> {
        // cast bouds to Option<Dim>, dealing with ignore from mask, and spec shorted than dim
        // also for end, magic values in onnx :/
        let mut begin: Option<TDim> = if ix >= begin.len() {
            None
        } else {
            let begin = begin.cast_to::<TDim>()?;
            begin.try_as_plain()?.as_slice::<TDim>()?.get(ix).cloned()
        };

        let mut end: Option<TDim> = if self.ignore_end(ix) || ix >= end.len() {
            None
        } else if end.datum_type() == i64::datum_type() {
            let end = *end.try_as_plain()?.as_slice::<i64>()?.get(ix).unwrap();
            if end == i64::MAX || end == i64::MIN || end == i64::MIN + 1 || end == (i32::MAX as i64)
            {
                None
            } else {
                Some(end.to_dim())
            }
        } else {
            let end = end.cast_to::<TDim>()?;
            end.try_as_plain()?.as_slice::<TDim>()?.get(ix).cloned()
        };

        let stride = strides.get(ix).cloned().unwrap_or(1);

        // deal with negative indexing
        fn fix_negative(bound: &mut TDim, dim: &TDim) {
            let neg = if bound.prove_positive_or_zero() {
                false
            } else if bound.prove_negative_or_zero() {
                true
            } else {
                #[allow(clippy::mutable_key_type)]
                let symbols = bound.symbols();
                if symbols.len() == 1 {
                    let sym = symbols.into_iter().next().unwrap();
                    let values = SymbolValues::default().with(&sym, 100_000_000);
                    bound.eval(&values).to_isize().unwrap() < 0
                } else {
                    false
                }
            };
            if neg {
                *bound = bound.clone() + dim;
            }
        }
        if let Some(begin) = begin.as_mut() {
            fix_negative(begin, dim)
        }
        if let Some(end) = end.as_mut() {
            fix_negative(end, dim)
        }

        if self.must_shrink(ix) {
            return Ok(Dim {
                begin: begin.clone().unwrap_or_else(|| 0.to_dim()),
                end: begin.unwrap_or_else(|| 0.to_dim()) + 1,
                stride: 1,
                shrink: true,
            });
        }

        // must happen after dealing with must_shrink :/
        if self.ignore_begin(ix) {
            begin = None;
        }

        let mut begin =
            begin.unwrap_or_else(|| if stride > 0 { 0.to_dim() } else { dim.clone() - 1 });
        if begin.to_isize().map(|b| b < 0).unwrap_or(false) {
            if stride < 0 {
                return Ok(Dim { begin: 0.to_dim(), end: 0.to_dim(), stride, shrink: false });
            } else {
                begin = 0.to_dim();
            }
        }
        if let (Ok(b), Ok(d)) = (begin.to_isize(), dim.to_isize())
            && b > d - 1
        {
            if stride > 0 {
                return Ok(Dim { begin: 0.to_dim(), end: 0.to_dim(), stride, shrink: false });
            } else {
                begin = (d - 1).to_dim()
            }
        }

        let mut end = end.unwrap_or_else(|| if stride > 0 { dim.clone() } else { (-1).to_dim() });
        if end.to_isize().map(|e| e < 0).unwrap_or(false) {
            if stride > 0 {
                return Ok(Dim { begin: 0.to_dim(), end: 0.to_dim(), stride, shrink: false });
            } else {
                end = (-1).to_dim();
            }
        }
        if let (Ok(e), Ok(d)) = (end.to_isize(), dim.to_isize())
            && e > d - 1
        {
            if stride > 0 {
                end = d.to_dim()
            } else {
                return Ok(Dim { begin: 0.to_dim(), end: 0.to_dim(), stride, shrink: false });
            }
        }
        Ok(Dim { begin, end, stride, shrink: false })
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let params: TVec<Option<Arc<Tensor>>> = inputs[1..]
            .iter()
            .map(|i| Ok(target.outlet_fact(*i)?.konst.clone()))
            .collect::<TractResult<_>>()?;
        let input_shape = target.outlet_fact(inputs[0])?.shape.clone();
        let strides: TVec<i32> = if let Some(i) = self.optional_steps_input {
            let strides = params[i - 1]
                .as_ref()
                .context("StridedSlice is typable only if stride is a const")?
                .cast_to::<i32>()?;
            strides.try_as_plain()?.as_slice::<i32>()?.into()
        } else {
            tvec![1; input_shape.rank()]
        };
        let axes: TVec<usize> = if let Some(i) = self.optional_axes_input {
            let axes = params[i - 1]
                .as_ref()
                .context("StridedSlice is typable only if axis is a const")?
                .cast_to::<i32>()?;
            axes.try_as_plain()?
                .as_slice::<i32>()?
                .iter()
                .map(|&i| if i < 0 { input_shape.rank() as i32 + i } else { i } as usize)
                .collect()
        } else {
            (0..input_shape.rank()).collect()
        };
        let mut wire = inputs[0];
        let begin = params[0].as_ref();
        let end = params[1].as_ref();
        for (ix, &axis) in axes.iter().enumerate() {
            if let (Some(begin), Some(end)) = (begin, end) {
                let d = &input_shape[axis];
                let preped = self.prepare_one_dim(ix, d, begin, end, &strides)?;
                let (left, right) = if preped.stride > 0 {
                    (preped.begin, preped.end)
                } else {
                    (preped.end + 1, preped.begin + 1)
                };
                wire = target.wire_node(
                    format!("{prefix}.slice-axis-{axis}"),
                    crate::ops::array::Slice::new(axis, left, right),
                    [wire].as_ref(),
                )?[0];
                if preped.stride != 1 {
                    wire = target.wire_node(
                        format!("{prefix}.stride-axis-{axis}"),
                        crate::ops::downsample::Downsample::new(axis, preped.stride as isize, 0),
                        [wire].as_ref(),
                    )?[0];
                }
            } else if strides[ix] == 1 {
                let left = target.wire_node(
                    format!("{prefix}.slice-axis-{axis}-start"),
                    crate::ops::array::Slice::new(0, ix, ix + 1),
                    &[inputs[1]],
                )?;
                let left = target.wire_node(
                    format!("{prefix}.slice-axis-{axis}-start-rm-axis"),
                    AxisOp::Rm(0),
                    &left,
                )?[0];
                let right = target.wire_node(
                    format!("{prefix}.slice-axis-{axis}-end"),
                    crate::ops::array::Slice::new(0, ix, ix + 1),
                    &[inputs[2]],
                )?;
                let right = target.wire_node(
                    format!("{prefix}.slice-axis-{axis}-end-rm-axis"),
                    AxisOp::Rm(0),
                    &right,
                )?[0];
                let sym = target.symbols.new_with_prefix("l");
                wire = target.wire_node(
                    format!("{prefix}.slice-axis-{axis}"),
                    crate::ops::array::DynSlice::new(axis, sym.to_dim()),
                    &[wire, left, right],
                )?[0];
            }
        }
        let mut shrink = input_shape
            .iter()
            .enumerate()
            .filter(|(ix, _d)| self.must_shrink(*ix))
            .map(|pair| pair.0)
            .collect::<Vec<_>>();
        shrink.sort();
        for axis in shrink.iter().rev() {
            wire = target.wire_node(
                format!("{prefix}.RmDim-{axis}"),
                AxisOp::Rm(*axis),
                [wire].as_ref(),
            )?[0];
        }
        target.rename_node(wire.node, prefix)?;
        Ok(tvec!(wire))
    }
}

impl Op for StridedSlice {
    fn name(&self) -> StaticName {
        "StridedSlice".into()
    }

    op_as_typed_op!();
}

impl EvalOp for StridedSlice {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut model = TypedModel::default();
        let scope = inputs.iter().find_map(|i| {
            i.try_as_plain().ok().and_then(|d| {
                d.as_slice::<TDim>()
                    .ok()
                    .and_then(|slice| slice.iter().find_map(|dim| dim.find_scope()))
            })
        });
        model.symbols = scope.unwrap_or_default();
        let mut source = tvec!();
        for (ix, input) in inputs.iter().enumerate() {
            source.push(model.add_source(
                format!("adhoc_input.{ix}"),
                input.clone().into_arc_tensor().try_into()?,
            )?);
        }
        let output = self.wire("adhoc", &mut model, &source)?;
        model.select_output_outlets(&output)?;
        model.into_runnable()?.run(inputs)
    }
}

impl TypedOp for StridedSlice {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut model = TypedModel::default();
        let mut source = tvec!();
        for (ix, input) in inputs.iter().enumerate() {
            source.push(model.add_source(format!("adhoc_input.{ix}"), (*input).clone())?);
        }
        let output = self.wire("adhoc", &mut model, &source)?;
        model.select_output_outlets(&output)?;
        Ok(tvec!(model.outlet_fact(output[0])?.clone()))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut patch = TypedModelPatch::default();
        let mut source = tvec!();
        for &input in &node.inputs {
            source.push(patch.tap_model(model, input)?);
        }
        let output = self.wire(&node.name, &mut patch, &source)?;
        patch.shunt_outside(model, node.id.into(), output[0])?;
        Ok(Some(patch))
    }

    as_op!();
}

#[cfg(test)]
mod tests {
    use super::*;

    fn apply(
        input: &[i32],
        start: Option<isize>,
        end: Option<isize>,
        stride: Option<isize>,
    ) -> TValue {
        // [0,1,2,3,4,5][::2] => [0, 2, 4]
        let op = StridedSlice {
            optional_axes_input: None,
            optional_steps_input: if stride.is_some() { Some(3) } else { None },
            begin_mask: if start.is_some() { 0 } else { 1 },
            end_mask: if end.is_some() { 0 } else { 1 },
            shrink_axis_mask: 0,
        };
        let mut inputs = tvec!(
            tensor1(input).into(),
            tensor1(&[start.unwrap_or(0) as i32]).into(),
            tensor1(&[end.unwrap_or(0) as i32]).into(),
        );
        if let Some(stride) = stride {
            inputs.push(tensor1(&[stride as i32]).into());
        }
        op.eval(inputs).unwrap().remove(0)
    }

    #[test]
    fn numpy_pos_stride() {
        // [0,1,2,3][::2] => [0, 2]
        assert_eq!(apply(&[0, 1, 2, 3], None, None, Some(2)), tensor1(&[0, 2]).into());
    }

    #[test]
    fn numpy_neg_stride() {
        // [0,1,2,3][::-2] => [3, 1]
        assert_eq!(apply(&[0, 1, 2, 3], None, None, Some(-2)), tensor1(&[3, 1]).into());
    }

    #[test]
    fn numpy_neg_stride_with_start_even() {
        // [0,1,2,3][-1::-2] => [3, 1]
        assert_eq!(apply(&[0, 1, 2, 3], Some(-1), None, Some(-2)), tensor1(&[3, 1]).into());
    }

    #[test]
    fn numpy_neg_stride_with_start_odd() {
        // [0,1,2,3][-1::-2] => [3, 1]
        assert_eq!(apply(&[0, 1, 2, 3, 4], Some(-1), None, Some(-2)), tensor1(&[4, 2, 0]).into());
    }
}


================================================
FILE: core/src/ops/array/tile.rs
================================================
use crate::internal::*;
use ndarray::*;

use super::MultiBroadcastTo;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Tile {
    pub multipliers: TVec<TDim>,
}

impl Op for Tile {
    fn name(&self) -> StaticName {
        "Tile".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("multipliers: {:?}", self.multipliers)])
    }

    op_as_typed_op!();
}

impl EvalOp for Tile {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let multipliers: TVec<usize> = self
            .multipliers
            .iter()
            .map(|m| m.eval(&session.resolved_symbols).to_usize())
            .collect::<TractResult<_>>()?;
        let result =
            dispatch_datum_by_size!(eval_t(inputs[0].datum_type())(&inputs[0], &multipliers))?;
        Ok(tvec!(result))
    }
}

impl TypedOp for Tile {
    as_op!();

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let multipliers = self.multipliers.iter().map(|m| m.eval(values)).collect();
        target.wire_node(&node.name, Self { multipliers }, &[mapping[&node.inputs[0]]])
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let input_fact = model.outlet_fact(node.inputs[0])?;
        if input_fact
            .shape
            .iter()
            .zip(self.multipliers.iter())
            .all(|(i, m)| i.is_one() || m.is_one())
        {
            let output_fact = self.output_facts(&[input_fact])?.remove(0);
            TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs[0..1],
                MultiBroadcastTo { shape: output_fact.shape },
            )
            .map(Some)
        } else {
            Ok(None)
        }
    }

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let shape = inputs[0]
            .shape
            .iter()
            .zip(self.multipliers.iter())
            .map(|(a, b)| a.clone() * b)
            .collect::<TVec<_>>();
        Ok(tvec!(inputs[0].datum_type.fact(shape)))
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct DynTile {
    pub multiplier_placeholders: TVec<TDim>,
}

impl DynTile {
    pub fn new(scope: &SymbolScope, rank: usize) -> DynTile {
        let multiplier_placeholders =
            (0..rank).map(|_| scope.new_with_prefix("_tile_mult_").to_dim()).collect();
        DynTile { multiplier_placeholders }
    }
}

impl Op for DynTile {
    fn name(&self) -> StaticName {
        "DynTile".into()
    }

    op_as_typed_op!();
}

impl EvalOp for DynTile {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let multipliers = inputs[1].cast_to::<TDim>()?;
        let multipliers: TVec<usize> = multipliers
            .try_as_plain()?
            .as_slice::<TDim>()?
            .iter()
            .map(|m| Ok(m.eval_to_i64(&session.resolved_symbols)? as usize))
            .collect::<TractResult<_>>()?;
        let result =
            dispatch_datum_by_size!(eval_t(inputs[0].datum_type())(&inputs[0], &multipliers))?;
        Ok(tvec!(result))
    }
}

impl TypedOp for DynTile {
    as_op!();

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(mult) = &model.outlet_fact(node.inputs[1])?.konst {
            let multipliers = mult
                .cast_to::<TDim>()?
                .try_as_plain()?
                .as_slice::<TDim>()?
                .iter()
                .cloned()
                .collect();
            return TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs,
                Tile { multipliers },
            )
            .map(Some);
        }
        Ok(None)
    }

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let multipliers = if let Some(k) = &inputs[1].konst {
            k.cast_to::<TDim>()?.try_as_plain()?.as_slice::<TDim>()?.iter().cloned().collect()
        } else {
            self.multiplier_placeholders.clone()
        };
        let shape =
            inputs[0].shape.iter().zip(multipliers).map(|(a, b)| b * a).collect::<TVec<_>>();
        Ok(tvec!(inputs[0].datum_type.fact(shape)))
    }
}

fn eval_t<T: Datum>(data: &TValue, multipliers: &[usize]) -> TractResult<TValue> {
    let data_plain = data.try_as_plain()?;
    let view = unsafe { data_plain.to_array_view_unchecked::<T>() };
    let output_shape: TVec<usize> =
        view.shape().iter().zip(multipliers.iter()).map(|(&d, &m)| d * m).collect();
    let output = ndarray::ArrayD::from_shape_fn(&*output_shape, |coords| {
        let coords: TVec<usize> =
            coords.slice().iter().zip(data.shape().iter()).map(|(&x, &d)| x % d).collect();
        view[&*coords].clone()
    });
    let mut output = output.into_tensor();
    unsafe {
        output.set_datum_type(data.datum_type());
    }

    Ok(output.into_tvalue())
}


================================================
FILE: core/src/ops/array/topk.rs
================================================
use std::cmp::Ordering;

use tract_data::itertools::Itertools;
use tract_ndarray::{ArrayViewMutD, Axis, Dimension};

use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Topk {
    pub axis: usize,
    pub largest: bool,
    pub fallback_k: TDim,
}

impl Op for Topk {
    fn name(&self) -> StaticName {
        "Topk".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Topk {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, k) = args_2!(inputs);
        let mut output_shape: TVec<usize> = input.shape().into();
        let k = k.cast_to_scalar::<i64>()? as usize;
        output_shape[self.axis] = k;
        let dt = input.datum_type();
        let mut output_values = Tensor::zero_dt(dt, &output_shape)?;
        let mut output_indices = Tensor::zero::<i64>(&output_shape)?;
        let mut iterating_shape = output_shape.clone();
        iterating_shape[self.axis] = 1;
        let mut output_indices_plain = output_indices.try_as_plain_mut()?;
        let mut output_indices_view = output_indices_plain.to_array_view_mut::<i64>()?;
        for coords in tract_ndarray::indices(&*iterating_shape) {
            let mut coords: TVec<usize> = coords.as_array_view().as_slice().unwrap().into();
            dispatch_numbers!(Self::inner_loop_t(dt)(
                self,
                &mut coords,
                &input,
                &mut output_values,
                &mut output_indices_view,
                k
            ))?;
        }
        Ok(tvec!(output_values.into_tvalue(), output_indices.into_tvalue()))
    }
}

impl Topk {
    fn inner_loop_t<T: Datum + PartialOrd>(
        &self,
        coords: &mut [usize],
        input: &Tensor,
        output_values: &mut Tensor,
        output_indices_view: &mut ArrayViewMutD<i64>,
        k: usize,
    ) -> TractResult<()> {
        let mut output_values_plain = output_values.try_as_plain_mut()?;
        let mut output_values_view = output_values_plain.to_array_view_mut::<T>()?;
        let mut view = input.to_plain_array_view::<T>()?;
        for (ix, x) in coords.iter().enumerate() {
            if ix != self.axis {
                view.collapse_axis(Axis(ix), *x);
            }
        }
        for (ix, (argmax, max)) in view
            .iter()
            .cloned()
            .enumerate()
            .sorted_by(|a, b| {
                let ord = { a.1.partial_cmp(&b.1).unwrap_or(Ordering::Less) };
                if self.largest { ord.reverse() } else { ord }
            })
            .take(k)
            .enumerate()
        {
            coords[self.axis] = ix;
            output_values_view[&*coords] = max;
            output_indices_view[&*coords] = argmax as i64;
        }
        Ok(())
    }
}

impl TypedOp for Topk {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact_values = inputs[0].without_value();
        let mut fact_indices = inputs[0].without_value();
        let k: TDim = if let Some(k) = &inputs[1].konst {
            k.cast_to::<TDim>()?.try_as_plain()?.to_scalar::<TDim>()?.clone()
        } else {
            self.fallback_k.clone()
        };
        fact_values.shape.set(self.axis, k.clone());
        fact_indices.shape.set(self.axis, k);
        fact_indices.datum_type = i64::datum_type();
        Ok(tvec!(fact_values, fact_indices))
    }

    as_op!();
}


================================================
FILE: core/src/ops/array/trilu.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Trilu {
    pub upper: bool,
}

impl Op for Trilu {
    fn name(&self) -> StaticName {
        "Trilu".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Trilu {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, k) = args_2!(inputs);
        let mut input = input.into_tensor();
        let k = *k.try_as_plain()?.to_scalar::<i64>()?;
        fn eval_t<T: Datum>(tensor: &mut Tensor, upper: bool, k: i64) -> TractResult<()> {
            let mut tensor_plain = tensor.try_as_plain_mut()?;
            let mut view = tensor_plain.to_array_view_mut::<T>()?;
            for coords in tract_ndarray::indices(view.shape()) {
                let row = coords[view.ndim() - 2] as i64;
                let col = coords[view.ndim() - 1] as i64;
                if upper {
                    if col < row + k {
                        view[coords] = T::default();
                    }
                } else if col > row + k {
                    view[coords] = T::default();
                }
            }
            Ok(())
        }
        dispatch_datum!(eval_t(input.datum_type())(&mut input, self.upper, k))?;
        Ok(tvec!(input.into_tvalue()))
    }
}

impl TypedOp for Trilu {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].without_value()))
    }

    as_op!();
}


================================================
FILE: core/src/ops/binary.rs
================================================
use crate::internal::*;
use crate::ndarray::Dimension;
use downcast_rs::Downcast;
use dyn_eq::DynEq;
use std::fmt::{self, Debug};
use tract_data::itertools::izip;
use tract_itertools::Itertools;
use tract_linalg::{BinOp, LinalgFn};

use super::math::{Add, Max, Min, Mul, Sub};
use super::{cast::cast, math::SubF};

pub trait BinMiniOp:
    fmt::Debug + dyn_clone::DynClone + dyn_eq::DynEq + Send + Sync + 'static + Downcast
{
    fn name(&self) -> &'static str;
    fn validation(&self) -> Validation {
        Validation::Accurate
    }
    fn operating_datum_type(&self, a: DatumType, b: DatumType) -> TractResult<DatumType> {
        a.common_super_type(b).with_context(|| format_err!("No super type for {:?} and {:?}", a, b))
    }
    fn result_datum_type(&self, a: DatumType, b: DatumType) -> TractResult<DatumType>;
    fn eval_in_a(&self, a: &mut Tensor, b: &Tensor) -> TractResult<()>;
    fn eval_out_of_place(&self, c: &mut Tensor, a: &Tensor, b: &Tensor) -> TractResult<()>;

    fn is_commutative(&self) -> bool {
        true
    }
    fn neutral_element(&self) -> Option<i64> {
        None
    }
    fn absorbing_element(&self) -> Option<i64> {
        None
    }

    #[allow(unused_variables)]
    fn maybe_eval_qbinary_as_float_op(
        &self,
        a: &TValue,
        b: &TValue,
        c_dt: &DatumType,
    ) -> TractResult<Option<Tensor>> {
        Ok(None)
    }

    fn generic_eval(&self, a: TValue, b: TValue, c_dt: DatumType) -> TractResult<Tensor> {
        if let Some(tensor) = self.maybe_eval_qbinary_as_float_op(&a, &b, &c_dt)? {
            Ok(tensor)
        } else {
            let c_shape = crate::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
            if &*c_shape == a.shape() && c_dt == a.datum_type() {
                let mut a = a.into_tensor();
                self.eval_in_a(&mut a, &b)?;
                Ok(a)
            } else {
                let mut c = unsafe { Tensor::uninitialized_dt(c_dt, &c_shape)? };
                self.eval_out_of_place(&mut c, &a, &b)?;
                Ok(c)
            }
        }
    }
    fn eval(&self, a: TValue, b: TValue, c_dt: DatumType) -> TractResult<Tensor> {
        self.generic_eval(a, b, c_dt)
    }
    #[allow(unused_variables)]
    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }
    #[allow(unused_variables)]
    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }
    #[allow(unused_variables)]
    fn cost_per_element(&self, dt: DatumType) -> TVec<(Cost, usize)> {
        tvec!()
    }
    fn as_linalg_binop(&self) -> Option<tract_linalg::BinOp> {
        None
    }

    /// Override for ops that can evaluate symbolic TDim inputs (comparisons).
    #[allow(unused_variables)]
    fn eval_symbolic(
        &self,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<Option<TVec<TValue>>> {
        Ok(None)
    }

    /// Override for ops that produce TDim-level comparison expressions (comparisons).
    #[allow(unused_variables)]
    fn uniform_tdim_comparison(&self, a: &TDim, b: &TDim) -> Option<TDim> {
        None
    }
}
dyn_clone::clone_trait_object!(BinMiniOp);
dyn_eq::eq_trait_object!(BinMiniOp);
downcast_rs::impl_downcast!(BinMiniOp);

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TypedBinOp(pub Box<dyn BinMiniOp>, pub Option<DatumType>);

impl Op for TypedBinOp {
    fn name(&self) -> StaticName {
        self.0.name().into()
    }

    fn validation(&self) -> Validation {
        self.0.validation()
    }

    op_as_typed_op!();
}

impl TypedBinOp {
    fn output_datum_type(&self, a_dt: DatumType, b_dt: DatumType) -> TractResult<DatumType> {
        if let Some(dt) = self.1 { Ok(dt) } else { self.0.result_datum_type(a_dt, b_dt) }
    }
}

impl EvalOp for TypedBinOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        if let Some(result) = self.0.eval_symbolic(session, inputs.clone())? {
            return Ok(result);
        }
        let (a, b) = args_2!(inputs);
        ensure!(a.rank() == b.rank());
        let c_dt = self.output_datum_type(a.datum_type(), b.datum_type())?;
        Ok(tvec!(self.0.eval(a, b, c_dt)?.into_tvalue()))
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (a, b) = args_2!(inputs);
        ensure!(a.rank() == b.rank());
        let c_dt = self.output_datum_type(a.datum_type(), b.datum_type())?;
        Ok(tvec!(self.0.eval(a, b, c_dt)?.into_tvalue()))
    }
}

impl TypedBinOp {
    fn combine_uniform_tdim(&self, a: &TDim, b: &TDim) -> Option<TDim> {
        // Comparison ops provide their own TDim combination
        if let Some(result) = self.0.uniform_tdim_comparison(a, b) {
            return Some(result);
        }
        let a = tensor0(a.clone()).into_tvalue();
        let b = tensor0(b.clone()).into_tvalue();
        let result = self.0.eval(a, b, TDim::datum_type()).ok()?;
        result
            .try_as_plain()
            .ok()
            .and_then(|d| d.as_slice::<TDim>().ok())
            .and_then(|s| s.first())
            .cloned()
            .map(|d| d.reduce())
    }

    fn combine_uniform_tdim_with_konst(&self, a: &TDim, konst: &Tensor) -> Option<TDim> {
        if konst.len() != 1 {
            return None;
        }
        // Integer-valued scalar (including float constants like 2.0, 1.0, 3.0)
        let b_int: Option<i64> =
            if konst.datum_type().is_integer() || konst.datum_type().is::<bool>() {
                konst.cast_to_scalar::<i64>().ok()
            } else if konst.datum_type().is_float() {
                konst.cast_to_scalar::<f64>().ok().and_then(|f| {
                    if (f - f.round()).abs() < 1e-6 { Some(f.round() as i64) } else { None }
                })
            } else {
                None
            };
        if let Some(b) = b_int {
            return self.combine_uniform_tdim(a, &TDim::Val(b));
        }
        // Mul by reciprocal of integer (e.g. ×0.5 → Div(a, 2))
        if self.0.neutral_element() == Some(1)
            && let Some(f) = konst.cast_to_scalar::<f64>().ok().filter(|&f| f > 0.0)
        {
            let n = (1.0 / f).round() as u64;
            if n >= 2 && (f * n as f64 - 1.0).abs() < 1e-6 {
                return Some(TDim::Div(Box::new(a.clone()), n).reduce());
            }
        }
        None
    }
}

impl TypedOp for TypedBinOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        if inputs[0].rank() != inputs[1].rank() {
            bail!(
                "Typed ops require rank match. Invalid inputs for {}: {}",
                self.name(),
                inputs.iter().map(|s| format!("{s:?}")).join(" ; ")
            );
        }
        let out_dt = self.output_datum_type(inputs[0].datum_type, inputs[1].datum_type)?;
        let mut fact = out_dt.fact(&*crate::broadcast::multi_broadcast(&[
            &inputs[0].shape.to_tvec(),
            &inputs[1].shape.to_tvec(),
        ])?);
        if let (Some(a), Some(b)) = (&inputs[0].uniform_tdim, &inputs[1].uniform_tdim) {
            fact.uniform_tdim = self.combine_uniform_tdim(a, b);
            // And(a,b) has no TDim kernel; for 0/1 booleans And == Mul
            if fact.uniform_tdim.is_none() && self.0.is::<crate::ops::logic::And>() {
                fact.uniform_tdim = Some(TDim::Mul(vec![a.clone(), b.clone()]).reduce());
            }
        }
        // Fallback: one side has uniform_tdim, the other is a scalar constant
        if fact.uniform_tdim.is_none() {
            for (expr, konst_fact) in [
                (inputs[0].uniform_tdim.as_ref(), inputs[1]),
                (inputs[1].uniform_tdim.as_ref(), inputs[0]),
            ] {
                let Some(a) = expr else { continue };
                let Some(konst) = konst_fact.konst.as_ref() else { continue };
                fact.uniform_tdim = self.combine_uniform_tdim_with_konst(a, konst);
                if fact.uniform_tdim.is_some() {
                    break;
                }
            }
        }
        Ok(tvec!(fact))
    }

    fn input_roi(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TVec<Option<TDim>>>> {
        // Introduction: Mul (or any op with neutral_element=1) with a mask
        // that has uniform_tdim → the other input gets that expression as ROI.
        if self.0.neutral_element() == Some(1) {
            for (mask_ix, other_ix) in [(0usize, 1usize), (1, 0)] {
                let fact = model.outlet_fact(node.inputs[mask_ix])?;
                if let Some(mask_expr) = &fact.uniform_tdim {
                    let mut rois = tvec![None; node.inputs.len()];
                    rois[other_ix] = Some(mask_expr.clone());
                    return Ok(Some(rois));
                }
            }
        }
        // Bubbling: delegate to the natural blanket implementation.
        crate::optim::propagate_roi::bubble_roi(model, node)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        if let AxisOp::Rm(rm) = change {
            let (inputs, outputs) = model.node_facts(node.id)?;
            if inputs.len() >= 2
                && outputs.len() >= 1
                && inputs[0].rank() > *rm
                && inputs[1].rank() > *rm
                && outputs[0].rank() > *rm
            {
                rule_if!(inputs[0].shape[*rm].is_one());
                rule_if!(inputs[1].shape[*rm].is_one());
                rule_if!(outputs[0].shape[*rm].is_one());
            }
        }
        Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let count: TDim = self.output_facts(inputs)?[0].shape.iter().product();
        Ok(self
            .0
            .cost_per_element(inputs[0].datum_type)
            .into_iter()
            .map(|(c, n)| (c, count.clone() * n))
            .collect())
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        _node: &TypedNode,
        prefix: &str,
        inputs: &[OutletId],
        _output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        Ok(Some(patch.wire_node(prefix, self.clone(), inputs)?))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let (a_dt, b_dt) = if let &[a, b] = &*model.node_input_facts(node.id)? {
            (a.datum_type().unwrap(), b.datum_type().unwrap())
        } else {
            unreachable!("TypedBinOp has two inputs.")
        };
        if let Some(neutral_patch) =
            declutter_neutral(model, node, self.0.as_ref(), self.output_datum_type(a_dt, b_dt)?)?
        {
            return Ok(Some(neutral_patch));
        }
        if let Some(absorbing_patch) = declutter_absorbing(model, node, self.0.as_ref())? {
            return Ok(Some(absorbing_patch));
        }
        if let Some(broadcast_patch) =
            declutter_broadcasting_operand_1(model, node, self.0.clone())?
        {
            return Ok(Some(broadcast_patch));
        }
        self.0.declutter(model, node)
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(linalg_bin_op) = self.0.as_linalg_binop() {
            let input_facts = model.node_input_facts(node.id)?;
            let must_swap_inputs =
                input_facts.iter().collect_tuple().is_some_and(|(a_fact, b_fact)| {
                    (a_fact.shape.volume() - b_fact.shape.volume()).prove_strict_negative()
                });
            let (operand_1, operand_2) = if must_swap_inputs {
                (input_facts[1], input_facts[0])
            } else {
                (input_facts[0], input_facts[1])
            };

            let (by_scalar_should_be_efficient, unicast_should_be_efficient) =
                find_most_efficient_config(model, node, must_swap_inputs)?;

            // Check if op is quantized
            let c_dt = self.output_datum_type(operand_1.datum_type, operand_2.datum_type)?;
            let op_is_quant = c_dt.is_quantized()
                || operand_1.datum_type.is_quantized()
                || operand_2.datum_type.is_quantized();

            // Check if it can be evaluated in a
            let c_dt = self.output_datum_type(operand_1.datum_type, operand_2.datum_type)?;
            let c_shape = crate::broadcast::multi_broadcast(&[
                operand_1.shape.clone(),
                operand_2.shape.clone(),
            ])?;
            let can_eval_in_a =
                (c_shape.to_vec() == operand_1.shape.to_vec()) && (c_dt == operand_1.datum_type);

            // Swap input if required
            let inputs = if must_swap_inputs {
                let mut swap_input = node.inputs.clone();
                swap_input.swap(0, 1);
                swap_input
            } else {
                node.inputs.clone()
            };
            let actual_linalg_op =
                if must_swap_inputs { linalg_bin_op.flip() } else { linalg_bin_op };
            let actual_core_op = core_op_for_linalg_op(&actual_linalg_op);

            let dt = model.node_input_facts(node.id)?[0].datum_type;
            if by_scalar_should_be_efficient & can_eval_in_a & !op_is_quant {
                rule_if_some!(func = tract_linalg::bin_by_scalar(dt, actual_linalg_op));
                let eval_fn = Arc::from(func);
                return Ok(Some(
                    TypedModelPatch::replace_single_op(
                        model,
                        node,
                        &inputs,
                        OptBinByScalar { binop: actual_core_op, eval_fn },
                    )?
                    .with_context("ByScalar"),
                ));
            }

            if unicast_should_be_efficient & can_eval_in_a & !op_is_quant {
                rule_if_some!(func = tract_linalg::bin_unicast(dt, actual_linalg_op));
                let eval_fn = Arc::from(func);
                return Ok(Some(
                    TypedModelPatch::replace_single_op(
                        model,
                        node,
                        &inputs,
                        OptBinUnicast { binop: actual_core_op, eval_fn },
                    )?
                    .with_context("Unicast"),
                ));
            }
        }

        Ok(None)
    }
    as_op!();
}

fn core_op_for_linalg_op(linalg: &BinOp) -> Box<dyn BinMiniOp> {
    match linalg {
        BinOp::Min => Box::new(Min),
        BinOp::Max => Box::new(Max),
        BinOp::Add => Box::new(Add),
        BinOp::Mul => Box::new(Mul),
        BinOp::Sub => Box::new(Sub),
        BinOp::SubF => Box::new(SubF),
    }
}
fn declutter_broadcasting_operand_1(
    model: &TypedModel,
    node: &TypedNode,
    mini_op: Box<dyn BinMiniOp>,
) -> TractResult<Option<TypedModelPatch>> {
    let (a_shape, b_shape) = if let &[a, b] = &*model.node_input_facts(node.id)? {
        (a.shape.clone(), b.shape.clone())
    } else {
        unreachable!("TypedBinOp has two inputs.")
    };

    let a_num_elements = a_shape.iter().product::<TDim>();
    let b_num_elements = b_shape.iter().product::<TDim>();
    let a_should_be_broadcast = (a_num_elements - b_num_elements).prove_strict_negative();
    if a_should_be_broadcast & mini_op.is_commutative() {
        let mut swap_input = node.inputs.clone();
        swap_input.swap(0, 1);
        return Ok(Some(TypedModelPatch::replace_single_op(
            model,
            node,
            &swap_input,
            TypedBinOp(mini_op, None),
        )?));
    }

    Ok(None)
}

fn declutter_neutral(
    model: &TypedModel,
    node: &TypedNode,
    mini_op: &dyn BinMiniOp,
    out_dt: DatumType,
) -> TractResult<Option<TypedModelPatch>> {
    if let Some(uniform) = crate::ops::binary::one_input_is_uniform(model, node)? {
        let is_neutral = mini_op
            .neutral_element()
            .map(|neutral| tensor0(neutral).close_enough(&uniform.uni, false).is_ok())
            .unwrap_or(false);

        // For some operand neural element can be the left one while for other
        // it is not the case (neutral - 1 -> not ok, 1 - neutal -> ok)
        let pos_checked = mini_op.is_commutative() || !uniform.left_is_uniform;

        if is_neutral && pos_checked {
            // Neutral decluttering for quant values is special.
            // - if (fa) (a-az)*as + (fb = 0) (b-bz)*bs = (fc) (c-cz)*cs
            // - then even if fa = fc, quant params needs to be updated (a != c).
            // So it's not a no_op.
            if uniform.uni.datum_type().is_quantized() {
                return Ok(Some(TypedModelPatch::replace_single_op(
                    model,
                    node,
                    &[node.inputs[0]],
                    cast(out_dt),
                )?));
            // In the non quantized case, it's a no_op.
            } else {
                return Ok(Some(TypedModelPatch::rewire(
                    model,
                    &[uniform.var],
                    &[node.id.into()],
                    &|_, inputs| Ok(inputs.into()),
                )?));
            }
        }
    }
    Ok(None)
}

/// When one input is the absorbing element (e.g. 0 for Mul, false for And),
/// replace the entire op with the uniform (absorbing) input.
fn declutter_absorbing(
    model: &TypedModel,
    node: &TypedNode,
    mini_op: &dyn BinMiniOp,
) -> TractResult<Option<TypedModelPatch>> {
    if let Some(uniform) = crate::ops::binary::one_input_is_uniform(model, node)? {
        let is_absorbing = mini_op
            .absorbing_element()
            .map(|absorb| tensor0(absorb).close_enough(&uniform.uni, false).is_ok())
            .unwrap_or(false);
        if is_absorbing {
            let uni_inlet = if uniform.left_is_uniform { 0 } else { 1 };
            return Ok(Some(TypedModelPatch::rewire(
                model,
                &[node.inputs[uni_inlet]],
                &[node.id.into()],
                &|_, inputs| Ok(inputs.into()),
            )?));
        }
    }
    Ok(None)
}

fn find_most_efficient_config(
    model: &TypedModel,
    node: &TypedNode,
    swap_input: bool,
) -> TractResult<(bool, bool)> {
    if let &[a, b] = &*model.node_input_facts(node.id)? {
        let a_shape = if swap_input { b.shape.clone() } else { a.shape.clone() };
        let b_shape = if swap_input { a.shape.clone() } else { b.shape.clone() };

        let by_scalar_is_possible = OptBinByScalar::check_input_shapes(&a_shape, &b_shape);
        let num_by_scalar_elements = if by_scalar_is_possible {
            a_shape
                .iter()
                .zip(b_shape.iter())
                .rev()
                .take_while(|(_, rev_b_dim)| **rev_b_dim == TDim::Val(1))
                .map(|(rev_a_dim, _)| rev_a_dim)
                .product::<TDim>()
        } else {
            TDim::Val(0)
        };

        let unicast_is_possible = OptBinUnicast::check_input_shapes(&a_shape, &b_shape);
        let num_unicast_elements = if unicast_is_possible {
            a_shape
                .iter()
                .zip(b_shape.iter())
                .rev()
                .take_while(|(a_dim, b_dim)| a_dim == b_dim)
                .map(|(a_dim, _)| a_dim)
                .product::<TDim>()
        } else {
            TDim::Val(0)
        };

        let min_num_elements = 32;
        let by_scalar_should_be_efficient = gt_tdim(num_by_scalar_elements, min_num_elements);
        let unicast_should_be_efficient = gt_tdim(num_unicast_elements, min_num_elements);
        return Ok((by_scalar_should_be_efficient, unicast_should_be_efficient));
    }
    Ok((false, false))
}

pub fn gt_tdim(x: TDim, min_val: i64) -> bool {
    TDim::Val(min_val).mini(x).to_i64().is_ok_and(|v| v == min_val)
}

#[derive(Clone)]
pub struct OptBinByScalar {
    pub binop: Box<dyn BinMiniOp>,
    eval_fn: Arc<LinalgFn>,
}

impl Debug for OptBinByScalar {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        f.debug_struct("OptBinByScalar").field("binop", &self.binop).finish()
    }
}

impl OptBinByScalar {
    fn check_input_shapes(a_shape: &[TDim], b_shape: &[TDim]) -> bool {
        if a_shape.len() != b_shape.len() {
            return false;
        };

        a_shape
            .iter()
            .zip(b_shape.iter())
            .skip_while(|(a_dim, b_dim)| a_dim == b_dim)
            .all(|(_, b_dim)| *b_dim == 1.to_dim())
    }
}

impl PartialEq for OptBinByScalar {
    fn eq(&self, other: &Self) -> bool {
        *self.binop == *other.binop
    }
}
impl Eq for OptBinByScalar {}

impl Op for OptBinByScalar {
    fn name(&self) -> StaticName {
        format!("Opt{}ByScalar", self.binop.name()).into()
    }

    op_as_typed_op!();
}

impl EvalOp for OptBinByScalar {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (a, b) = args_2!(inputs);
        // Not a requirement as TensorView doesn't require a owned tensor but in reality
        // "a "should be mutable (it's omitted here as Rust compiler advise to remove it)
        let a = a.into_tensor();
        let b_shape = b.shape();

        let first_unary_axis = b_shape
            .iter()
            .enumerate()
            .rev()
            .take_while(|&(_, &dim)| dim == 1)
            .map(|(i, _)| i)
            .last()
            .context("Cannot use by_scalar when no trailing dimensions are unary")?;

        let iterating_shape = &a.shape()[..first_unary_axis];
        if !iterating_shape.is_empty() {
            for it_coords in tract_ndarray::indices(iterating_shape) {
                let mut view = TensorView::at_prefix(&a, it_coords.slice())?;
                let b_view = TensorView::at_prefix(&b, it_coords.slice())?;
                debug_assert_eq!(b_view.shape().iter().product::<usize>(), 1);
                (self.eval_fn)(&mut view, &b_view)?;
            }
        } else {
            let mut view = a.view();
            let b_view = b.view();
            debug_assert_eq!(b_view.shape().iter().product::<usize>(), 1);
            (self.eval_fn)(&mut view, &b_view)?;
        }
        Ok(tvec!(a.into_tvalue()))
    }
}

impl TypedOp for OptBinByScalar {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(Self::check_input_shapes(&inputs[0].shape, &inputs[1].shape));
        let out_dt = self.binop.result_datum_type(inputs[0].datum_type, inputs[1].datum_type)?;
        let out_shape = inputs[0].shape.clone();
        Ok(tvec!(out_dt.fact(out_shape)))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let count: TDim = self.output_facts(inputs)?[0].shape.iter().product();
        Ok(self
            .binop
            .cost_per_element(inputs[0].datum_type)
            .into_iter()
            .map(|(c, n)| (c, count.clone() * n))
            .collect())
    }

    as_op!();
}

#[derive(Clone)]
pub struct OptBinUnicast {
    pub binop: Box<dyn BinMiniOp>,
    eval_fn: Arc<LinalgFn>,
}

impl Debug for OptBinUnicast {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        f.debug_struct("OptBinUnicast").field("binop", &self.binop).finish()
    }
}

impl OptBinUnicast {
    fn check_b_alignement(a_shape: &[TDim], b_shape: &[TDim]) -> bool {
        let num_iterations: TDim = a_shape
            .iter()
            .zip(b_shape.iter())
            .take_while(|(_, b_dim)| **b_dim == 1.to_dim())
            .map(|(a_dim, _)| a_dim)
            .product();

        if num_iterations.is_one() {
            return true;
        }

        let elements_per_iteration: TDim = a_shape
            .iter()
            .zip(b_shape.iter())
            .skip_while(|(_, b_dim)| **b_dim == 1.to_dim())
            .map(|(_, b_dim)| b_dim)
            .product();

        if let Ok(num_element) = elements_per_iteration.to_i64() {
            let required_alignment = vector_size();
            (num_element as usize).is_multiple_of(required_alignment)
        } else {
            false
        }
    }
    fn check_input_shapes(a_shape: &[TDim], b_shape: &[TDim]) -> bool {
        if a_shape.len() != b_shape.len() {
            return false;
        };

        let unicast_possible = a_shape
            .iter()
            .zip(b_shape.iter())
            .skip_while(|(_, b_dim)| **b_dim == 1.to_dim())
            .all(|(a_dim, b_dim)| a_dim == b_dim);
        let unicast_is_aligned = Self::check_b_alignement(a_shape, b_shape);

        unicast_possible && unicast_is_aligned
    }
}

impl PartialEq for OptBinUnicast {
    fn eq(&self, other: &Self) -> bool {
        *self.binop == *other.binop
    }
}
impl Eq for OptBinUnicast {}

impl Op for OptBinUnicast {
    fn name(&self) -> StaticName {
        format!("Opt{}Unicast", self.binop.name()).into()
    }

    op_as_typed_op!();
}

impl EvalOp for OptBinUnicast {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (a, b) = args_2!(inputs);
        // Not a requirement as TensorView doesn't require a owned tensor but in reality
        // "a "should be mutable (it's omitted here as Rust compiler advise to remove it)
        let a = a.into_tensor();
        let b_shape = b.shape();
        let b_view = b.view();
        let first_non_unary_axis =
            b_shape.iter().enumerate().take_while(|&(_, &dim)| dim == 1).map(|(i, _)| i + 1).last();

        if let Some(first_non_unary_axis) = first_non_unary_axis {
            // Iterate on outter dimensions and evaluate with unicast subviews
            let iterating_shape = a.shape()[..first_non_unary_axis].to_vec();
            for it_coords in tract_ndarray::indices(iterating_shape) {
                let mut view = TensorView::at_prefix(&a, it_coords.slice())?;
                debug_assert_eq!(view.shape(), &b_view.shape()[it_coords.slice().len()..]);
                (self.eval_fn)(&mut view, &b_view)?;
            }
        } else {
            let mut view = a.view();
            debug_assert_eq!(view.shape(), b_view.shape());
            (self.eval_fn)(&mut view, &b_view)?;
        }

        Ok(tvec!(a.into_tvalue()))
    }
}

impl TypedOp for OptBinUnicast {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(Self::check_input_shapes(&inputs[0].shape, &inputs[1].shape));
        let out_dt = self.binop.result_datum_type(inputs[0].datum_type, inputs[1].datum_type)?;
        let out_shape = inputs[0].shape.clone();
        Ok(tvec!(out_dt.fact(out_shape)))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let count: TDim = self.output_facts(inputs)?[0].shape.iter().product();
        Ok(self
            .binop
            .cost_per_element(inputs[0].datum_type)
            .into_iter()
            .map(|(c, n)| (c, count.clone() * n))
            .collect())
    }

    as_op!();
}

#[macro_export]
macro_rules! bin_to_super_type {
    ($func:ident, $Op:ident,
     $(codegen: $codegen:expr,)?
     $(cost: $cost:expr,)?
     $(declutter: $declutter:expr,)?
     $(eval_in_a: $eval_in_a:expr,)?
     $(eval_override: $eval_override: expr,)?
     $(linalg: $linalg:ident,)?
     $(operating_datum_type: $operating_datum_type:expr,)?
     $(is_commutative: $is_commutative:expr,)?
     $(neutral_element: $neutral_element:expr,)?
     $(absorbing_element: $absorbing_element:expr,)?
     $(out_of_place: $out_of_place:expr,)?
     $(validation: $validation:expr,)?
     $(q: $([$($typ_dt:ident),*] => $cab_dt:expr),* ;)?
     $(q_op_on_f32: $q_op_on_f32:expr,)?
     $( [$($typ:ident),*] => $cab:expr),*) => {
        #[derive(Debug, Clone, Hash, PartialEq, Eq)]
        pub struct $Op;
        #[allow(clippy::redundant_closure_call)]
        impl $crate::ops::binary::BinMiniOp for $Op {
            fn name(&self) -> &'static str {
                stringify!($Op)
            }

            fn eval_out_of_place(&self, c: &mut Tensor, a: &Tensor, b: &Tensor) -> TractResult<()> {
                $(if $out_of_place(c, a, b)? { return Ok(()) } )?
                    $(
                        $(if c.datum_type() == $typ::datum_type() {
                            let a = a.to_plain_array_view::<$typ>()?;
                            let b = b.to_plain_array_view::<$typ>()?;
                            let mut c_plain = c.try_as_plain_mut()?;
                            let mut c = c_plain.to_array_view_mut::<$typ>()?;
                            $crate::ndarray::Zip::from(&mut c).and_broadcast(a).and_broadcast(b).for_each($cab);
                            return Ok(())
                        })*
                     )*
                    $(
                        $(
                            $(if a.datum_type().unquantized() == <$typ_dt>::datum_type().unquantized() {
                                let cab: fn(&mut $typ_dt, &$typ_dt, &$typ_dt, i32, f32) -> () = $cab_dt;
                                let (zp, scale) = a.datum_type().qparams().map(|q| q.zp_scale()).unwrap_or((0, 1.));
                                let a = a.to_plain_array_view::<$typ_dt>()?;
                                let b = b.to_plain_array_view::<$typ_dt>()?;
                                let mut c_plain = c.try_as_plain_mut()?;
                                let mut c = c_plain.to_array_view_mut::<$typ_dt>()?;
                                $crate::ndarray::Zip::from(&mut c).and_broadcast(a).and_broadcast(b).for_each(|c, a, b| cab(c, a, b, zp, scale));
                                return Ok(())
                            }
                            )*
                         )*
                     )?
                    bail!("{} does not support {:?} (out of place)", self.name(), c.datum_type());
            }

            $(fn is_commutative(&self) -> bool {
                $is_commutative
            })?
            $(fn neutral_element(&self) -> Option<i64> {
                Some($neutral_element)
            })?
            $(fn absorbing_element(&self) -> Option<i64> {
                Some($absorbing_element)
            })?
            fn eval_in_a(&self, a: &mut Tensor, b: &Tensor) -> TractResult<()> {
                // c and a are same type
                $(if $eval_in_a(a, b)? { return Ok(()) } )?
                $(
                    $(if b.datum_type() == $typ::datum_type() {
                        let cab: fn(&mut $typ, &$typ, &$typ) -> () = $cab;
                        let b = b.to_plain_array_view::<$typ>()?;
                        let mut a_plain = a.try_as_plain_mut()?;
                        let mut a = a_plain.to_array_view_mut::<$typ>()?;
                        $crate::ndarray::Zip::from(&mut a).and_broadcast(b).for_each(|a, b| cab(a, &a.clone(), b));
                        return Ok(())
                    })*
                )*
                $(
                    $(
                        $(if a.datum_type().unquantized() == <$typ_dt>::datum_type().unquantized() {
                            let cab: fn(&mut $typ_dt, &$typ_dt, &$typ_dt, i32, f32) -> () = $cab_dt;
                            let (zp, scale) = a.datum_type().qparams().map(|q| q.zp_scale()).unwrap_or((0, 1.));
                            let mut a_plain = a.try_as_plain_mut()?;
                            let mut a = a_plain.to_array_view_mut::<$typ_dt>()?;
                            let b = b.to_plain_array_view::<$typ_dt>()?;
                            $crate::ndarray::Zip::from(&mut a).and_broadcast(b).for_each(|a, b| {
                                cab(a, &(a.clone()), b, zp, scale)
                            });
                            return Ok(())
                        })*
                    )*
                )?
                bail!("{} does not support {:?} (eval in a)", self.name(), a.datum_type());
            }

            $(fn eval(&self, a: TValue, b: TValue, c_dt: DatumType) -> TractResult<Tensor> {
                $eval_override(a, b, c_dt)
            })?

            fn result_datum_type(&self, a: DatumType, b: DatumType) -> TractResult<DatumType> {
                if a.unquantized() == b.unquantized() {
                    if a.is_quantized() || !b.is_quantized() {
                        return Ok(a)
                    }
                    else {
                        return Ok(b)
                    }
                }
                self.operating_datum_type(a, b)
            }

                $(
                    fn declutter(
                        &self,
                        model: &TypedModel,
                        node: &TypedNode,
                        ) -> TractResult<Option<TypedModelPatch>> {
                        ($declutter)(self, model, node)
                    }
                 )?
                $(
                    fn codegen(
                        &self,
                        model: &TypedModel,
                        node: &TypedNode,
                        a: &Arc<Tensor>,
                        ) -> TractResult<Option<TypedModelPatch>> {
                        ($codegen)(self, model, node, a)
                    }
                 )?
                $(
                    fn cost_per_element(&self, dt: DatumType) -> TVec<(Cost, usize)> {
                        ($cost)(dt)
                    }
                 )?
                $(
                    fn validation(&self) -> Validation {
                        $validation
                    }
                 )?
                $(
                    fn as_linalg_binop(&self) -> Option<tract_linalg::BinOp> {
                        Some(tract_linalg::BinOp::$linalg)
                    }
                 )?
                $(
                    fn operating_datum_type(&self, a: DatumType, b: DatumType) -> TractResult<DatumType> {
                        ($operating_datum_type)(a, b)
                    })?


            /// Default simple binary operation for QFormat where
            /// we dequantise & apply requested operation in float & requantize it
            /// several implementation are provided with pro & con
            #[allow(unused_variables)]
            fn maybe_eval_qbinary_as_float_op(
                &self,
                a: &TValue,
                b: &TValue,
                c_dt: &DatumType,
            ) -> TractResult<Option<Tensor>> {
                $(
                    /// Implementation strive to minimise memory allocation and access
                    /// we apply only if type is QU8 zp_scale datum type
                    /// maybe more suited for large models tensors
                    fn memory_optimised_q_binary_as_float_op(
                        a: &TValue,
                        b: &TValue,
                        c_dt: &DatumType,
                    ) -> TractResult<Option<Tensor>> {
                        if let (DatumType::QU8(QParams::ZpScale {zero_point: a_zp, scale: a_scale}),
                                DatumType::QU8(QParams::ZpScale {zero_point: b_zp, scale: b_scale}),
                                DatumType::QU8(QParams::ZpScale {zero_point: c_zp, scale: c_scale})) =
                            (a.datum_type(), b.datum_type(), c_dt)
                        {
                            let c_inv_scale = 1.0 / c_scale;
                            let a = a.to_plain_array_view::<u8>()?;
                            let b = b.to_plain_array_view::<u8>()?;
                            let c_shape = $crate::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
                            let mut c = Tensor::zero_dt(*c_dt, &c_shape)?;
                            let mut c_plain = c.try_as_plain_mut()?;
                            let view = c_plain.to_array_view_mut::<u8>()?;
                            $crate::ndarray::Zip::from(view).and_broadcast(a).and_broadcast(b).for_each(|c, a, b| {
                                *c = (scale_by($q_op_on_f32(
                                            ((*a as i32 - a_zp as i32) as f32 * a_scale),
                                            ((*b as i32 - b_zp as i32) as f32 * b_scale),
                                ), c_inv_scale) as i32
                                    + *c_zp as i32)
                                    .clamp_cast()
                            });
                            return Ok(Some(c));
                        }
                        Ok(None)
                    }

                    /// Apply to all Q types
                    /// Take more memory but hopefully faster than memory_optimised_q_binary_as_float_op
                    /// especially once cast_to_dt will have will have vectorized implementations
                    fn generic_q_binary_as_float_op(
                        a: &TValue,
                        b: &TValue,
                        c_dt: &DatumType,
                        accumulator_dt: DatumType
                    ) -> TractResult<Option<Tensor>> {
                        if a.datum_type().is_quantized() && b.datum_type().is_quantized() && c_dt.is_quantized() {
                            let a = a.cast_to_dt(accumulator_dt)?.into_owned();
                            let b = b.cast_to_dt(accumulator_dt)?.into_owned();
                            let c_shape = $crate::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
                            let mut c = Tensor::zero_dt(accumulator_dt, &c_shape)?;
                            match accumulator_dt {
                                DatumType::F32 => {
                                    let mut c_plain = c.try_as_plain_mut()?;
                                    let view = c_plain.to_array_view_mut::<f32>()?;
                                    $crate::ndarray::Zip::from(view).and_broadcast(a.try_as_plain()?.to_array_view()?).and_broadcast(b.try_as_plain()?.to_array_view()?).for_each(|c, a, b| {
                                        *c = $q_op_on_f32(*a,*b);
                                    })
                                },
                                other => bail!("unexpected accumulator data type as {:?}", other)
                            };

                            return Ok(Some(c.cast_to_dt(*c_dt)?.into_owned()));
                        }
                        Ok(None)
                    }

                    if let Some(c) = memory_optimised_q_binary_as_float_op(a, b, c_dt)? {
                        return Ok(Some(c));
                    }
                    if let Some(d) = generic_q_binary_as_float_op(a, b, c_dt, DatumType::F32)? {
                        return Ok(Some(d));
                    }
                )?
                Ok(None)
            }
        }

        pub fn $func() -> $crate::ops::binary::TypedBinOp {
            $crate::ops::binary::TypedBinOp(Box::new($Op), None)
        }
    };
}

#[derive(Debug)]
pub(crate) struct OneUniformInput {
    pub uni: Arc<Tensor>,
    pub var: OutletId,
    pub left_is_uniform: bool,
}

pub(crate) fn one_input_is_uniform(
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<OneUniformInput>> {
    if let &[a, b] = &*model.node_input_facts(node.id)? {
        let uni = if let Some(a) = &a.uniform {
            OneUniformInput { uni: a.clone(), var: node.inputs[1], left_is_uniform: true }
        } else if let Some(b) = &b.uniform {
            OneUniformInput { uni: b.clone(), var: node.inputs[0], left_is_uniform: false }
        } else {
            return Ok(None);
        };
        let var_fact = [a, b][uni.left_is_uniform as usize];
        let uni_fact = [a, b][!uni.left_is_uniform as usize];
        if izip!(var_fact.shape.iter(), uni_fact.shape.iter()).all(|(v, u)| u.is_one() || u == v) {
            return Ok(Some(uni));
        }
    }
    Ok(None)
}


================================================
FILE: core/src/ops/cast.rs
================================================
use crate::internal::*;

pub fn cast(to: DatumType) -> Cast {
    Cast { to }
}

pub fn wire_cast(
    prefix: impl AsRef<str>,
    target: &mut TypedModel,
    inputs: &[OutletId],
    operating_datum_type: DatumType,
) -> TractResult<TVec<OutletId>> {
    let prefix = prefix.as_ref();
    let mut wires = tvec!();
    for mut wire in inputs.iter().copied() {
        if target.outlet_fact(wire)?.datum_type != operating_datum_type {
            wire = target.wire_node(
                target.unique_name(format!("{prefix}.cast")),
                crate::ops::cast::cast(operating_datum_type),
                &[wire],
            )?[0];
        }
        wires.push(wire);
    }
    Ok(wires)
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Cast {
    pub to: DatumType,
}

impl Op for Cast {
    fn name(&self) -> StaticName {
        "Cast".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Cast {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        state: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        if input.datum_type() == self.to {
            Ok(tvec!(input))
        } else if input.datum_type() == TDim::datum_type() {
            let mut tmp = Tensor::zero_dt(i64::datum_type(), input.shape())?;
            let input_plain = input.try_as_plain()?;
            let mut tmp_plain = tmp.try_as_plain_mut()?;
            for (dim, i) in tract_itertools::izip!(
                input_plain.as_slice::<TDim>()?,
                tmp_plain.as_slice_mut::<i64>()?
            ) {
                *i = dim.eval(&state.resolved_symbols).to_i64()?
            }
            Ok(tvec!(tmp.cast_to_dt(self.to)?.into_owned().into_tvalue()))
        } else {
            Ok(tvec!(input.cast_to_dt(self.to)?.into_owned().into_tvalue()))
        }
    }
}

impl TypedOp for Cast {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = self.to.fact(inputs[0].shape.clone());
        fact.uniform_tdim = inputs[0].uniform_tdim.clone();
        if let Some(u) = &inputs[0].uniform {
            if let Ok(cast_u) = u.cast_to_dt(self.to) {
                fact.uniform = Some(std::sync::Arc::new(cast_u.into_owned()));
            }
        }
        Ok(tvec!(fact))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if model.outlet_fact(node.inputs[0])?.datum_type == self.to {
            TypedModelPatch::shunt_one_op(model, node)
        } else {
            Ok(None)
        }
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        node: &TypedNode,
        _prefix: &str,
        inputs: &[OutletId],
        _output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        patch.wire_node(&node.name, &node.op, inputs).map(Some)
    }

    as_op!();
}


================================================
FILE: core/src/ops/change_axes.rs
================================================
use std::borrow::Borrow;
use std::fmt::Debug;

use crate::internal::*;
use crate::model::{TypedModel, TypedNode};
use crate::ops::identity::Identity;
use AxisOp::*;
use num_traits::One;
use tract_itertools::Itertools;
use tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};
use tract_ndarray::{ArrayViewD, ArrayViewMutD};

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum InOut {
    Out(usize),
    In(usize),
}

impl InOut {
    pub fn as_outlet<F: Clone + Fact, O: Clone>(&self, node: &Node<F, O>) -> OutletId {
        match self {
            InOut::In(ix) => node.inputs[*ix],
            InOut::Out(ix) => OutletId::new(node.id, *ix),
        }
    }

    pub fn is_input(&self) -> bool {
        matches!(self, InOut::In(_))
    }

    pub fn is_output(&self) -> bool {
        matches!(self, InOut::Out(_))
    }

    pub fn slot(&self) -> usize {
        match self {
            InOut::Out(o) => *o,
            InOut::In(i) => *i,
        }
    }
}

#[derive(Clone, Hash, Eq)]
#[allow(clippy::large_enum_variant)] // FIXME ?
#[allow(clippy::derived_hash_with_manual_eq)] // FIXME. this one may be pretty bad. how about a.canonical() == b.canonical() ? need proper canonicalizeation of Reshape
pub enum AxisOp {
    Add(usize),
    Rm(usize),
    Move(usize, usize),
    Reshape(usize, TVec<TDim>, TVec<TDim>),
}

impl Debug for AxisOp {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            AxisOp::Add(a) => write!(f, "Add({a})"),
            AxisOp::Rm(a) => write!(f, "Rm({a})"),
            AxisOp::Move(from, to) => write!(f, "Move({from},{to})"),
            AxisOp::Reshape(at, from, to) => {
                write!(f, "Reshape({at}, [{}], [{}])", from.iter().join(","), to.iter().join(","))
            }
        }
    }
}

impl PartialEq for AxisOp {
    fn eq(&self, other: &AxisOp) -> bool {
        if self.is_noop() && other.is_noop() {
            true
        } else if self.is_noop() != other.is_noop() {
            false
        } else {
            match (self, other) {
                (Add(a), Add(b)) | (Rm(a), Rm(b)) => a == b,
                (Move(f1, t1), Move(f2, t2)) => {
                    (f1 == f2 && t1 == t2)
                        || ((*t1 == f1 + 1 || *f1 == t1 + 1) && t2 == f1 && t1 == f2)
                }
                (Reshape(at1, f1, t1), Reshape(at2, f2, t2)) => at1 == at2 && f1 == f2 && t1 == t2,
                _ => false,
            }
        }
    }
}

impl AxisOp {
    pub fn canonical(&self) -> Cow<'_, AxisOp> {
        match self {
            Move(from, to) if *from == to + 1 => Cow::Owned(Move(*to, *from)),
            Reshape(at, from, to)
                if from.len() == 1 && to.len() == 2 && from[0] == to[0] && to[1].is_one() =>
            {
                Cow::Owned(Add(*at + 1))
            }
            Reshape(at, from, to)
                if from.len() == 1 && to.len() == 2 && from[0] == to[1] && to[0].is_one() =>
            {
                Cow::Owned(Add(*at))
            }
            Reshape(at, from, to)
                if from.len() == 2 && to.len() == 1 && from[0] == to[0] && from[1].is_one() =>
            {
                Cow::Owned(Rm(*at + 1))
            }
            Reshape(at, from, to)
                if from.len() == 2 && to.len() == 1 && from[1] == to[0] && from[0].is_one() =>
            {
                Cow::Owned(Rm(*at))
            }
            other => Cow::Borrowed(other),
        }
    }

    pub fn simplify(&self) -> TVec<AxisOp> {
        match self.canonical().borrow() {
            Reshape(_, from, to) if from == to => tvec!(),
            Reshape(at, from, to) if to.len() == 0 => tvec!(Rm(*at); from.len()),
            Reshape(at, from, to) if from.len() == 0 => tvec!(Add(*at); to.len()),
            Reshape(at, from, to) if from[0] == to[0] => {
                Reshape(at + 1, from[1..].into(), to[1..].into()).simplify()
            }
            Reshape(at, from, to) if from[from.len() - 1] == to[to.len() - 1] => {
                Reshape(*at, from[..from.len() - 1].into(), to[..to.len() - 1].into()).simplify()
            }
            Reshape(at, from, to) if from[0] == 1.to_dim() => std::iter::once(Rm(*at))
                .chain(Reshape(*at, from[1..].into(), to.clone()).simplify())
                .collect(),
            Reshape(at, from, to) if to[0] == 1.to_dim() => {
                Reshape(*at, from.clone(), to[1..].into())
                    .simplify()
                    .into_iter()
                    .chain(std::iter::once(Add(*at)))
                    .collect()
            }
            Reshape(at, from, to) if from[from.len() - 1] == 1.to_dim() => {
                std::iter::once(Rm(at + from.len() - 1))
                    .chain(Reshape(*at, from[..from.len() - 1].into(), to.clone()).simplify())
                    .collect()
            }
            Reshape(at, from, to) if to[to.len() - 1] == 1.to_dim() => {
                std::iter::once(Add(at + from.len()))
                    .chain(Reshape(*at, from.clone(), to[..to.len() - 1].into()).simplify())
                    .collect()
            }
            other => tvec!(other.clone()),
        }
    }

    pub fn transform_axis(&self, axis: usize) -> Option<usize> {
        match self.canonical().as_ref() {
            Add(ix) => Some(axis + (axis >= *ix) as usize),
            Rm(ix) => {
                if axis == *ix {
                    None
                } else {
                    Some(axis - (axis > *ix) as usize)
                }
            }
            Move(from, to) if from < to => {
                if axis < *from || axis > *to {
                    Some(axis)
                } else if axis == *from {
                    Some(*to)
                } else {
                    Some(axis - 1)
                }
            }
            Move(from, to) => {
                if axis < *to || axis > *from {
                    Some(axis)
                } else if axis == *from {
                    Some(*to)
                } else {
                    Some(axis + 1)
                }
            }
            Reshape(at, _, _) if axis < *at => Some(axis),
            Reshape(at, from, to) if axis >= at + from.len() => Some(axis + to.len() - from.len()),
            Reshape(_, _, _) => None,
        }
    }

    // if sucessful return Some()
    // first item is the Op we want to be replaced by. if none, we are now identity.
    // second item is the change to propagate. if none, the output is not
    // changed
    pub fn merge_incoming_change(
        &self,
        change: &AxisOp,
    ) -> Option<(Option<AxisOp>, Option<AxisOp>)> {
        match (self.canonical().as_ref(), change.canonical().as_ref()) {
            (Add(op), Add(c)) => {
                Some((Some(Add(op + (c < op) as usize)), Some(Add(c + (c >= op) as usize))))
            }
            (Add(op), Rm(c)) => {
                Some((Some(Add(op - (c < op) as usize)), Some(Rm(c + (c >= op) as usize))))
            }
            (Rm(op), Add(c)) => {
                Some((Some(Rm(op + (c <= op) as usize)), Some(Add(c - (op < c) as usize))))
            }
            (Rm(op), Rm(c)) => {
                Some((Some(Rm(op - (c < op) as usize)), Some(Rm(c - (op <= c) as usize))))
            }

            (Add(x), Move(from, to)) => {
                if x <= from.min(to) {
                    Some((Some(self.clone()), Some(Move(from + 1, to + 1))))
                } else if x > from.max(to) {
                    Some((Some(self.clone()), Some(change.clone())))
                } else {
                    None
                }
            }

            (Move(from, to), Add(x)) => {
                if x <= from.min(to) {
                    Some((Some(Move(from + 1, to + 1)), Some(Add(*x))))
                } else if x > from.max(to) {
                    Some((Some(Move(*from, *to)), Some(Add(*x))))
                } else {
                    None
                }
            }

            (Rm(x), Move(from, to)) => {
                if x == from {
                    Some((Some(Rm(*to)), None))
                } else if x < from.min(to) {
                    Some((Some(self.clone()), Some(Move(from - 1, to - 1))))
                } else if x > from.max(to) {
                    Some((Some(self.clone()), Some(change.clone())))
                } else if from + 1 == *to && x == to {
                    Some((Some(Rm(*from)), None))
                } else if from < to && x <= to {
                    Some((Some(Rm(x - 1)), Some(Move(*from, *to - 1))))
                } else {
                    Some((Some(Rm(x + 1)), Some(Move(*from - 1, *to))))
                }
            }

            (Move(from, to), Rm(x)) => {
                if x < from.min(to) {
                    Some((Some(Move(from - 1, to - 1)), Some(Rm(*x))))
                } else if x > from.max(to) {
                    Some((Some(Move(*from, *to)), Some(Rm(*x))))
                } else {
                    None
                }
            }

            (Add(op), Reshape(at, from, to)) => {
                if op <= at {
                    Some((Some(Add(*op)), Some(Reshape(at + 1, from.clone(), to.clone()))))
                } else if *op > at + from.len() {
                    Some((
                        Some(Add(*op + to.len() - from.len())),
                        Some(Reshape(*at, from.clone(), to.clone())),
                    ))
                } else {
                    None
                }
            }
            (Rm(op), Reshape(at, from, to)) => {
                if op < at {
                    Some((Some(Rm(*op)), Some(Reshape(at - 1, from.clone(), to.clone()))))
                } else if *op > at + from.len() {
                    Some((
                        Some(Rm(*op + to.len() - from.len())),
                        Some(Reshape(*at, from.clone(), to.clone())),
                    ))
                } else {
                    None
                }
            }
            (Reshape(at, from, to), Add(change)) => {
                if change < at {
                    Some((Some(Reshape(at + 1, from.clone(), to.clone())), Some(Add(*change))))
                } else if *change > *at + from.len() {
                    Some((
                        Some(Reshape(*at, from.clone(), to.clone())),
                        Some(Add(change + to.len() - from.len())),
                    ))
                } else {
                    None
                }
            }
            (Reshape(at, from, to), Rm(change)) => {
                if change < at {
                    Some((Some(Reshape(at - 1, from.clone(), to.clone())), Some(Rm(*change))))
                } else if *change > *at + from.len() {
                    Some((
                        Some(Reshape(*at, from.clone(), to.clone())),
                        Some(Rm(change + to.len() - from.len())),
                    ))
                } else {
                    None
                }
            }
            (Reshape(_, _, _), Move(_, _)) => None, // todo, some are manageable
            (Move(_, _), Reshape(_, _, _)) => None, // todo, some are manageable
            (Reshape(_, _, _), Reshape(_, _, _)) => None, // todo, some are manageable
            _ => None,
        }
    }

    pub fn change_shape_array<D: DimLike>(
        &self,
        shape: &mut TVec<D>,
        broadcasting: bool,
    ) -> TractResult<()> {
        match self.canonical().as_ref() {
            Add(ix) => {
                ensure!(*ix <= shape.len());
                shape.insert(*ix, D::one());
            }
            Rm(ix) => {
                ensure!(*ix < shape.len());
                shape.remove(*ix);
            }
            Move(from, to) => {
                ensure!(*from < shape.len());
                ensure!(*to < shape.len());
                let axis = shape.remove(*from);
                shape.insert(*to, axis);
            }
            Reshape(at, from, to) => {
                let from_volume = from.iter().product::<TDim>();
                let to_volume = to.iter().product::<TDim>();
                ensure!(from_volume == to_volume, "{from_volume} should be equal to {to_volume}");
                ensure!(*at + from.len() <= shape.len());
                if shape.len() >= from.len() + *at
                    && tract_itertools::izip!(shape.iter().skip(*at), from)
                        .all(|(shape, spec)| shape.to_dim() == *spec)
                {
                    for _ in from {
                        shape.remove(*at);
                    }
                    for d in to.iter().rev() {
                        shape.insert(*at, d.try_into()?);
                    }
                } else if broadcasting
                    && shape.iter().skip(*at).take(from.len()).all(|d| d.to_dim() == 1.to_dim())
                {
                    for _ in from {
                        shape.remove(*at);
                    }
                    for _ in to.iter().rev() {
                        shape.insert(*at, 1.into());
                    }
                } else {
                    bail!("Incompatible reshape for shape {:?} and {:?}", shape, self);
                }
            }
        }
        Ok(())
    }

    pub fn change_shape(&self, shape: &mut ShapeFact, broadcasting: bool) -> TractResult<()> {
        match self.canonical().as_ref() {
            Add(ix) => shape.insert_axis(*ix),
            Rm(ix) => {
                if shape.rank() <= *ix {
                    bail!("Attempt to remove axis #{} on shape {:?}", ix, shape);
                }
                if shape[*ix] != 1.to_dim() {
                    bail!("Removing non-trivial axis #{} of dim: {:?}", ix, shape);
                }
                shape.remove_axis(*ix)
            }
            _ => {
                let mut array = shape.to_tvec();
                self.change_shape_array(&mut array, broadcasting)?;
                let mut new_shape = ShapeFact::from_dims(array);
                std::mem::swap(shape, &mut new_shape);
                Ok(())
            }
        }
    }

    pub fn change_tensor(&self, tensor: &mut Tensor, broadcasting: bool) -> TractResult<()> {
        if tensor.storage_as::<BlockQuantStorage>().is_some() {
            let bqs = tensor.try_storage_as::<BlockQuantStorage>()?.clone();
            let mut new_shape: TVec<usize> = tensor.shape().into();
            self.change_shape_array(&mut new_shape, false)?;
            let mut new_tensor = bqs.into_tensor_with_shape(tensor.datum_type(), &new_shape);
            std::mem::swap(tensor, &mut new_tensor);
            return Ok(());
        }
        ensure!(self.required_rank() <= tensor.rank());
        match self.canonical().as_ref() {
            Add(ix) => tensor.insert_axis(*ix),
            Rm(ix) => tensor.remove_axis(*ix),
            Move(from, to) => {
                let mut tmp = tensor.clone().move_axis(*from, *to)?;
                std::mem::swap(tensor, &mut tmp);
                Ok(())
            }
            Reshape(at, from, to) => {
                let mut shape: TVec<usize> = tensor.shape().into();
                self.change_shape_array(&mut shape, true)?;
                if tensor.set_shape(&shape).is_ok() {
                    Ok(())
                } else if broadcasting
                    && tensor.shape().iter().skip(*at).take(from.len()).all(|d| *d == 1)
                {
                    if from.len() > to.len() {
                        for _ in to.len()..from.len() {
                            tensor.remove_axis(*at)?;
                        }
                    }
                    if to.len() > from.len() {
                        for _ in from.len()..to.len() {
                            tensor.insert_axis(*at)?;
                        }
                    }
                    Ok(())
                } else {
                    bail!(
                        "Invalid reshaping: {:?} on tensor {:?} (broadcasting allowed: {:?})",
                        self,
                        tensor,
                        broadcasting
                    )
                }
            }
        }
    }

    pub fn change_view<D>(&self, view: &mut ArrayViewD<D>) -> TractResult<()> {
        use tract_ndarray::Axis;
        match *self {
            AxisOp::Rm(axis) => view.index_axis_inplace(Axis(axis), 0),
            AxisOp::Add(axis) => view.insert_axis_inplace(Axis(axis)),
            AxisOp::Move(from, to) if from < to => {
                for left in from..to {
                    view.swap_axes(left, left + 1);
                }
            }
            AxisOp::Move(from, to) => {
                for left in (to..from).rev() {
                    view.swap_axes(left, left + 1);
                }
            }
            AxisOp::Reshape(_, _, _) => bail!("Reshape can not change views in place"),
        }
        Ok(())
    }

    pub fn change_view_mut<D>(&self, view: &mut ArrayViewMutD<D>) -> TractResult<()> {
        use tract_ndarray::Axis;
        match *self {
            AxisOp::Rm(axis) => view.index_axis_inplace(Axis(axis), 0),
            AxisOp::Add(axis) => view.insert_axis_inplace(Axis(axis)),
            AxisOp::Move(from, to) if from < to => {
                for left in from..to {
                    view.swap_axes(left, left + 1);
                }
            }
            AxisOp::Move(from, to) => {
                for left in (to..from).rev() {
                    view.swap_axes(left, left + 1);
                }
            }
            AxisOp::Reshape(_, _, _) => bail!("Reshape can not change views in place"),
        }
        Ok(())
    }

    pub fn recip(&self) -> AxisOp {
        match self.canonical().as_ref() {
            Add(ix) => Rm(*ix),
            Rm(ix) => Add(*ix),
            Move(from, to) if from == to => self.clone(),
            Move(from, to) if *from + 1 == *to => self.clone(),
            Move(from, to) if *from == *to + 1 => {
                unreachable!();
            }
            Move(from, to) => Move(*to, *from),
            Reshape(at, from, to) => Reshape(*at, to.clone(), from.clone()),
        }
    }

    pub fn is_noop(&self) -> bool {
        match self {
            Move(f, t) if f == t => true,
            Reshape(_, f, t) if f == t => true,
            _ => false,
        }
    }

    pub fn only_shape(&self) -> bool {
        if self.is_noop() {
            return true;
        }
        !matches!(self, Move(_, _))
    }

    pub fn wire_split_axis(
        model: &mut TypedModel,
        name: impl ToString,
        outlet: OutletId,
        axis: usize,
        outer_dim: usize,
    ) -> TractResult<TVec<OutletId>> {
        let fact = model.outlet_fact(outlet)?;
        let dim: TDim = fact.shape[axis].clone();
        let inner_dim = dim.clone() / outer_dim;
        let op = Self::Reshape(axis, tvec!(dim.clone()), tvec!(outer_dim.to_dim(), inner_dim));
        model.wire_node(name.to_string(), op, &[outlet])
    }

    pub fn wire_collapse_axis(
        model: &mut TypedModel,
        name: impl ToString,
        outlet: OutletId,
        axis: usize,
    ) -> TractResult<TVec<OutletId>> {
        let fact = model.outlet_fact(outlet)?;
        let dim: TDim = fact.shape[axis].clone();
        let next_dim: TDim = fact.shape[axis + 1].clone();
        let op = Self::Reshape(axis, tvec!(dim.clone(), next_dim.clone()), tvec!(dim * next_dim));
        model.wire_node(name.to_string(), op, &[outlet])
    }

    #[inline]
    pub fn required_rank(&self) -> usize {
        match self {
            Rm(r) => r + 1,
            Add(a) => *a,
            Reshape(at, from, _to) => at + from.len(),
            Move(from, to) => *from.max(to),
        }
    }

    pub fn trim_left(&self, prefix: usize) -> TractResult<AxisOp> {
        Ok(match self {
            Rm(r) if *r >= prefix => Rm(r - prefix),
            Add(a) if *a >= prefix => Add(a - prefix),
            Reshape(at, from, to) if *at >= prefix => {
                Reshape(at - prefix, from.clone(), to.clone())
            }
            Move(from, to) if *from >= prefix && *to >= prefix => Move(from - prefix, to - prefix),
            _ => bail!("Can no trim left {self:?} by {prefix}"),
        })
    }
}

pub fn wire_rank_broadcast(
    prefix: impl AsRef<str>,
    target: &mut TypedModel,
    inputs: &[OutletId],
) -> TractResult<TVec<OutletId>> {
    let facts =
        inputs.iter().map(|o| target.outlet_fact(*o).cloned()).collect::<TractResult<TVec<_>>>()?;
    let max_rank = facts.iter().map(|f| f.rank()).max().unwrap();
    let mut wires = tvec!();
    for i in 0..inputs.len() {
        let mut wire = inputs[i];
        for _ in facts[i].rank()..max_rank {
            let name = target.unique_name(prefix.as_ref().to_string() + ".fix-rank");
            wire = target.wire_node(name, AxisOp::Add(0), &[wire])?[0];
        }
        wires.push(wire);
    }
    Ok(wires)
}

pub fn wire_with_rank_broadcast(
    prefix: impl AsRef<str>,
    target: &mut TypedModel,
    op: impl Into<Box<dyn TypedOp>>,
    inputs: &[OutletId],
) -> TractResult<TVec<OutletId>> {
    let prefix = prefix.as_ref();
    let wires = wire_rank_broadcast(prefix, target, inputs)?;
    target.wire_node(prefix, op.into(), &wires)
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct AxisChange {
    pub outlet: OutletId,
    pub op: AxisOp,
}

#[derive(Clone, Default, Debug)]
pub struct AxisChangeConsequence {
    pub substitute_op: Option<Box<dyn TypedOp>>,
    pub wire_changes: TVec<(InOut, AxisOp)>,
}

impl AxisChangeConsequence {
    pub fn new(
        _model: &TypedModel,
        node: &TypedNode,
        op: Option<Box<dyn TypedOp>>,
        axis_op: &AxisOp,
    ) -> AxisChangeConsequence {
        let mut wire_changes = tvec!();
        for i in 0..node.inputs.len() {
            wire_changes.push((InOut::In(i), axis_op.clone()));
        }
        for i in 0..node.outputs.len() {
            wire_changes.push((InOut::Out(i), axis_op.clone()));
        }
        AxisChangeConsequence { wire_changes, substitute_op: op }
    }
}

impl Op for AxisOp {
    fn name(&self) -> StaticName {
        match self {
            Add(_) => "AddAxis".into(),
            Rm(_) => "RmAxis".into(),
            Move(_, _) => "MoveAxis".into(),
            Reshape(_, _, _) => "Reshape".into(),
        }
    }

    fn info(&self) -> TractResult<Vec<String>> {
        match self {
            Add(axis) | Rm(axis) => Ok(vec![format!("Axis: {axis}")]),
            Move(from, to) => Ok(vec![format!("Axis {from} to {to}")]),
            Reshape(at, from, to) => Ok(vec![format!(
                "Axes starting at {}: {:?} to {:?}",
                at,
                from.iter().join(","),
                to.iter().join(",")
            )]),
        }
    }

    op_as_typed_op!();
}

impl EvalOp for AxisOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let mut input = args_1!(inputs).into_tensor();
        match self {
            AxisOp::Reshape(skip, from, to) => {
                let from = from.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                let to = to.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                AxisOp::Reshape(*skip, from, to).change_tensor(&mut input, false)?
            }
            _ => self.change_tensor(&mut input, false)?,
        }
        Ok(tvec!(input.into_tvalue()))
    }
}

/// Remap coordinate symbols in a TDim expression according to an AxisOp.
/// Returns None if the remapping cannot be determined (e.g. general reshape).
fn remap_uniform_tdim(expr: &TDim, axis_op: &AxisOp) -> Option<TDim> {
    let syms = expr.symbols();
    let coord_syms: Vec<(usize, Symbol)> = syms
        .into_iter()
        .filter_map(|s| {
            let name = format!("{s}");
            name.strip_prefix("🎯").and_then(|rest| rest.parse::<usize>().ok()).map(|k| (k, s))
        })
        .collect();

    if coord_syms.is_empty() {
        // No coordinate symbols – the value is uniform across all positions; propagate as-is.
        return Some(expr.clone());
    }

    // Reshape: only handle trivial all-ones case.
    if let AxisOp::Reshape(_, from_dims, to_dims) = axis_op.canonical().as_ref() {
        return if from_dims.iter().all(|d| d.is_one()) && to_dims.iter().all(|d| d.is_one()) {
            Some(expr.clone())
        } else {
            None
        };
    }

    // For Add/Rm/Move: use transform_axis and substitute all at once to avoid
    // double-substitution when two axes swap positions (e.g. Move).
    let map: HashMap<Symbol, TDim> = coord_syms
        .into_iter()
        .filter_map(|(k, sym)| {
            let new_k = axis_op.transform_axis(k)?;
            if new_k == k {
                return None;
            }
            let scope = sym.scope()?;
            Some((sym, TDim::Sym(scope.coord_sym(new_k))))
        })
        .collect();
    if map.is_empty() {
        return Some(expr.clone());
    }
    expr.substitute_all(&map).ok()
}

impl TypedOp for AxisOp {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        if let Some(bqf) =
            inputs[0].exotic_fact().and_then(|of| of.downcast_ref::<BlockQuantFact>())
        {
            let mut new_shape: TVec<usize> = bqf.shape().into();
            self.change_shape_array(&mut new_shape, false)?;
            let new_bqf = BlockQuantFact::new(bqf.format.clone(), new_shape.clone());
            let shape: TVec<TDim> = new_shape.iter().map(|d| d.to_dim()).collect();
            let mut new_fact = inputs[0].datum_type.fact(&*shape).with_exotic_fact(new_bqf);
            if let Some(k) = &inputs[0].konst {
                let mut new = k.clone().into_tensor();
                self.change_tensor(&mut new, false)?;
                new_fact.konst = Some(new.into());
            }
            return Ok(tvec!(new_fact));
        }
        let mut shape = inputs[0].shape.clone();
        self.change_shape(&mut shape, false)?;
        let mut fact = inputs[0].datum_type.fact(shape);
        fact.exotic_fact.clone_from(&inputs[0].exotic_fact);
        if let Some(tdim) = &inputs[0].uniform_tdim {
            fact.uniform_tdim = remap_uniform_tdim(tdim, self);
        }
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut axes: Vec<Axis> = (0..inputs[0].rank())
            .zip('a'..)
            .map(|(axis_id, repr)| {
                let mut axis = Axis::new(repr, inputs.len(), outputs.len()).input(0, axis_id);
                if let Some(out) = self.transform_axis(axis_id) {
                    axis = axis.output(0, out);
                }
                axis
            })
            .collect();
        for (axis, letter) in (0..outputs[0].rank()).zip('A'..) {
            if self.recip().transform_axis(axis).is_none() {
                axes.push(Axis::new(letter, inputs.len(), outputs.len()).output(0, axis));
            }
        }
        AxesMapping::new(inputs.len(), outputs.len(), axes)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.is_noop()
            && let Some(p) = TypedModelPatch::shunt_one_op(model, node)?
        {
            return Ok(Some(p));
        }
        let simplified = self.simplify();
        if simplified.len() != 1 || &simplified[0] != self {
            let mut patch = TypedModelPatch::default();
            let mut wire = patch.tap_model(model, node.inputs[0])?;
            for (ix, op) in simplified.into_iter().enumerate() {
                wire = patch.wire_node(format!("{}.{}", node.name, ix), op, &[wire])?[0];
            }
            patch.shunt_outside(model, node.id.into(), wire)?;
            Ok(Some(patch))
        } else {
            Ok(None)
        }
    }

    fn suggested_axis_changes(&self) -> TractResult<TVec<(InOut, AxisOp)>> {
        Ok(tvec!((InOut::Out(0), self.recip()), (InOut::In(0), self.clone())))
    }

    fn change_axes(
        &self,
        _model: &TypedModel,
        _node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let op = if let InOut::Out(0) = io {
            rule_if_some!(more = self.recip().change_axes(_model, _node, InOut::In(0), change)?);
            AxisChangeConsequence {
                substitute_op: more.substitute_op.map(|op| {
                    if let Some(op) = op.as_op().downcast_ref::<AxisOp>() {
                        Box::new(op.recip())
                    } else {
                        op // have to be identity
                    }
                }),
                wire_changes: more
                    .wire_changes
                    .into_iter()
                    .map(|wc| {
                        (if wc.0 == InOut::In(0) { InOut::Out(0) } else { InOut::In(0) }, wc.1)
                    })
                    .collect(),
            }
        } else if change == self {
            AxisChangeConsequence { substitute_op: Some(Box::new(Identity)), wire_changes: tvec!() }
        } else {
            rule_if_some!((new_op, new_change) = self.merge_incoming_change(change));
            trace!("  Change:{change:?} self:{self:?} -> change:{new_change:?} op:{new_op:?}");
            let substitute_op: Box<dyn TypedOp> =
                if let Some(o) = new_op { Box::new(o) as _ } else { Box::new(Identity) };
            let mut wire_changes = tvec!();
            if !change.is_noop() {
                wire_changes.push((InOut::In(0), change.clone()))
            }
            if let Some(new_change) = new_change {
                wire_changes.push((InOut::Out(0), new_change))
            }
            AxisChangeConsequence { substitute_op: Some(substitute_op), wire_changes }
        };
        Ok(Some(op))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let op = if let AxisOp::Reshape(axis, from, to) = self {
            AxisOp::Reshape(
                *axis,
                from.iter().map(|d| d.eval(values)).collect(),
                to.iter().map(|d| d.eval(values)).collect(),
            )
        } else {
            self.clone()
        };
        target.wire_node(&node.name, op, &[mapping[&node.inputs[0]]])
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        node: &TypedNode,
        _prefix: &str,
        inputs: &[OutletId],
        output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        // is this test really useful ? or axis mapping preempt this ?
        if let Reshape(pos, _from, to) = self
            && output_axis >= *pos
            && output_axis < pos + to.len()
        {
            return Ok(None);
        }
        patch.wire_node(&node.name, &node.op, inputs).map(Some)
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if node.outputs[0].fact.exotic_fact.is_some() {
            return Ok(None);
        }
        if let Some(shape) = node.outputs[0].fact.shape.as_concrete()
            && !matches!(self, AxisOp::Move(_, _))
        {
            let (inputs, outputs) = model.node_facts(node.id)?;
            let mapping = self.axes_mapping(&inputs, &outputs)?;
            let op = IntoShape {
                mapping,
                len: shape.iter().product(),
                strides: Tensor::natural_strides(shape),
                dims: shape.into(),
            };
            return Ok(Some(TypedModelPatch::replace_single_op(model, node, &node.inputs, op)?));
        }
        Ok(None)
    }
}

// a, b, c is a <- b, b <- c, c <- a
fn perm_to_cycles(perm: &[usize]) -> TVec<TVec<usize>> {
    let mut cycles: TVec<TVec<usize>> = tvec!();
    let mut done = 0;
    while done < perm.len() {
        if perm[done] == done || cycles.iter().any(|c| c.contains(&done)) {
            done += 1;
            continue;
        }
        let mut cycle = tvec!();
        let mut current = done;
        loop {
            cycle.push(current);
            current = perm[current];
            if current == done {
                break;
            }
        }
        cycles.push(cycle)
    }
    cycles
}

fn is_rotation_cycle(cycle: &[usize]) -> Option<(usize, usize)> {
    if cycle.windows(2).all(|w| w[0] + 1 == w[1]) {
        Some((cycle[0], cycle[cycle.len() - 1]))
    } else if cycle[1..cycle.len()].windows(2).all(|w| w[0] - 1 == w[1])
        && cycle[cycle.len() - 1] - 1 == cycle[0]
    {
        Some((cycle[1], cycle[0]))
    } else {
        None
    }
}

fn perm_to_atoms(input: &[usize]) -> TVec<(usize, usize)> {
    let mut changes: TVec<(usize, usize)> = tvec!();
    'top: loop {
        let mut reached: TVec<usize> = (0..input.len()).collect();
        changes.iter().for_each(|(f, t)| {
            let axis = reached.remove(*f);
            reached.insert(*t, axis);
        });
        if &*reached == input {
            return changes;
        }
        let remaining: TVec<usize> =
            input.iter().map(|x| reached.iter().position(|y| y == x).unwrap()).collect();
        let cycles = perm_to_cycles(&remaining);
        for cycle in &cycles {
            if let Some(rot) = is_rotation_cycle(cycle) {
                changes.push(rot);
                continue 'top;
            }
        }
        changes.push((cycles[0][1], cycles[0][0]));
    }
}

pub fn perm_to_ops(input: &[usize]) -> TVec<AxisOp> {
    perm_to_atoms(input).into_iter().map(|pair| AxisOp::Move(pair.0, pair.1)).collect()
}

pub fn compute_shape_with_tf_rules(input: &[TDim], shape_spec: &[TDim]) -> TractResult<TVec<TDim>> {
    let mut shape: TVec<TDim> = shape_spec.into();
    fn deal_with_zero<'a>(
        mut input_dims: std::iter::Peekable<impl Iterator<Item = &'a TDim>>,
        shape: &mut [TDim],
    ) -> TractResult<()> {
        let mut remaining_dim_input = 1.to_dim();
        for slot in shape.iter_mut() {
            if *slot == (-1).into() {
                break;
            }
            if *slot == 0.into() {
                if remaining_dim_input != TDim::one() {
                    bail!("Invalid remaining dim");
                }
                *slot = (*input_dims.peek().context("Invalid")?).clone();
            }
            loop {
                let quotient = remaining_dim_input.maybe_div(slot);
                if quotient.is_err() || quotient.as_ref().unwrap().1 != 1 {
                    remaining_dim_input *= input_dims.next().context("Invalid")?;
                } else {
                    break;
                }
            }
            remaining_dim_input = remaining_dim_input.maybe_div(slot)?.0;
        }
        Ok(())
    }

    deal_with_zero(input.iter().peekable(), &mut shape)?;
    shape.reverse();
    deal_with_zero(input.iter().rev().peekable(), &mut shape)?;
    shape.reverse();

    if let Some(pos) = shape.iter().position(|d| *d == (-1).into()) {
        let input_vol: TDim = input.iter().product();
        let shape_vol: TDim = shape.iter().filter(|d| **d != (-1).into()).product();
        let div = input_vol.maybe_div(&shape_vol)?;
        if div.1 != 1 {
            bail!("invalid")
        }
        shape[pos] = div.0;
    }
    Ok(shape)
}

pub fn to_axis_ops_with_tf_rules(
    input_orig: &[TDim],
    output_spec: &[TDim],
) -> TractResult<TVec<AxisOp>> {
    let final_output = compute_shape_with_tf_rules(input_orig, output_spec)?;
    let mut stack: TVec<AxisOp> = tvec!();
    'top: loop {
        let current_input =
            stack.iter().try_fold(TVec::from(input_orig), |mut shape, op| -> TractResult<_> {
                op.change_shape_array(&mut shape, false)?;
                Ok(shape)
            })?;
        if current_input == final_output {
            return Ok(stack);
        }
        if let Some(common) =
            current_input.iter().zip(final_output.iter()).position(|(a, b)| a != b)
        {
            if current_input[common].is_one() {
                stack.push(AxisOp::Rm(common));
            } else if final_output[common].is_one() {
                stack.push(AxisOp::Add(common));
            } else {
                // actual regrouping. search for a match. this is quadratic, but
                // rank is expected to be somewhat reasonable
                for i in common..current_input.len() {
                    let i_group = &current_input[common..i + 1];
                    let i_volume: TDim = i_group.iter().product();
                    for o in common..final_output.len() {
                        let o_group = &final_output[common..o + 1];
                        let o_volume: TDim = o_group.iter().product();
                        if i_volume == o_volume {
                            stack.push(AxisOp::Reshape(common, i_group.into(), o_group.into()));
                            continue 'top;
                        }
                    }
                }
                todo!()
            }
        } else if final_output.len() > current_input.len() {
            stack.push(AxisOp::Add(current_input.len()));
        } else {
            stack.push(AxisOp::Rm(current_input.len() - 1));
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct IntoShape {
    pub mapping: AxesMapping,
    pub len: usize,
    pub dims: TVec<usize>,
    pub strides: TVec<isize>,
}

impl Op for IntoShape {
    fn name(&self) -> StaticName {
        "IntoShape".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("{}", self.mapping)])
    }

    op_as_typed_op!();
}

impl EvalOp for IntoShape {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut input = args_1!(inputs).into_tensor();
        ensure!(input.len() == self.len);
        unsafe { input.set_geometry_unchecked(&self.dims, &self.strides) };
        Ok(tvec!(input.into_tvalue()))
    }
}

impl TypedOp for IntoShape {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].datum_type.fact(&self.dims);
        if let Some(of) = &inputs[0].exotic_fact {
            fact = fact.with_exotic_fact(of.clone());
        }
        Ok(tvec!(fact))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let input = model.outlet_fact(node.inputs[0])?;
        if input.shape.as_concrete().is_some_and(|shape| shape == &*self.dims) {
            return TypedModelPatch::shunt_one_op(model, node);
        }
        if let Some(succ) = model.single_succ(node.id)?
            && let Some(into_shape) = succ.op_as::<IntoShape>()
        {
            let op =
                Self { mapping: self.mapping.compose(&into_shape.mapping)?, ..into_shape.clone() };
            return Ok(Some(TypedModelPatch::fuse_with_next(model, node, op)?));
        }
        Ok(None)
    }

    as_op!();
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_perm_to_cycles() {
        assert_eq!(perm_to_cycles(&[1, 2, 0]), tvec!(tvec!(0, 1, 2)));
        assert_eq!(perm_to_cycles(&[2, 0, 1]), tvec!(tvec!(0, 2, 1)));
        assert_eq!(perm_to_cycles(&[1, 2, 3, 0]), tvec!(tvec!(0, 1, 2, 3)));
        assert_eq!(perm_to_cycles(&[3, 0, 1, 2]), tvec!(tvec!(0, 3, 2, 1)));
        assert_eq!(perm_to_cycles(&[3, 1, 2, 0, 4]), tvec!(tvec!(0, 3)));
    }

    #[test]
    fn is_rotation() {
        assert_eq!(is_rotation_cycle(&[0, 1, 2]), Some((0, 2)));
        assert_eq!(is_rotation_cycle(&[0, 2, 1]), Some((2, 0)));
    }

    #[test]
    fn test_perm_one_rotation() {
        assert_eq!(perm_to_atoms(&[1, 2, 0, 3, 4]), tvec!((0, 2)));
    }

    #[test]
    fn test_perm_two_rotations() {
        assert_eq!(perm_to_atoms(&[1, 2, 0, 4, 3]), tvec!((0, 2), (3, 4)));
    }

    #[test]
    fn test_perm_complex() {
        assert_eq!(perm_to_atoms(&[3, 1, 2, 0, 4]), tvec!((3, 0), (1, 3)));
    }

    // ADD-ADD

    //                          Op
    //           b,c   ------|Add(0)|----->        n,b,c
    //   Add(0)                                            Add(1)
    //         a,b,c   ------|Add(0)|----->        a,n,b,c
    #[test]
    pub fn transform_op_add_0_add_0() {
        let change = Add(0);
        let op = Add(0);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Add(0)), Some(Add(1)))));
    }

    //                          Op
    //           b,c   ------|Add(1)|----->        b,n,c
    //   Add(0)                                                 Add(0)
    //         a,b,c   ------|Add(2)|----->        a,b,n,c
    #[test]
    pub fn transform_op_add_0_add_1() {
        let change = Add(0);
        let op = Add(1);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Add(2)), Some(Add(0)))));
    }

    //                          Op
    //           a,c   ------|Add(0)|----->        n,a,c
    //   Add(1)                                                 Add(2)
    //         a,b,c   ------|Add(0)|----->        n,a,b,c
    #[test]
    pub fn transform_op_add_1_add_0() {
        let change = Add(1);
        let op = Add(0);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Add(0)), Some(Add(2)))));
    }

    //                          Op
    //         a,b,c   ------|Rm(1)|----->         a,c
    //   Rm(0)                                             Rm(0)
    //           b,c   ------|Rm(0)|----->         c
    #[test]
    pub fn transform_op_rm_0_rm_1() {
        let change = Rm(0);
        let op = Rm(1);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Rm(0)), Some(Rm(0)))));
    }

    //                          Op
    //         a,b,c   ------|Rm(0)|----->         b,c
    //   Rm(1)                                             Rm(0)
    //           a,c   ------|Rm(0)|----->         c
    #[test]
    pub fn transform_op_rm_1_rm_0() {
        let change = Rm(1);
        let op = Rm(0);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Rm(0)), Some(Rm(0)))));
    }

    // ADD - RM

    //                          Op
    //          b,c     ------|Rm(0)|------>        c
    //   Add(0)                                                 Add(0)
    //          a,b,c   ------|Rm(1)|----->         a,c
    #[test]
    pub fn transform_op_add_0_rm_0() {
        let change = Add(0);
        let op = Rm(0);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Rm(1)), Some(Add(0)))));
    }

    //                          Op
    //          b,c     ------|Rm(1)|------>        b
    //   Add(0)                                                 Add(0)
    //          a,b,c   ------|Rm(2)|----->         a,b
    #[test]
    pub fn transform_op_add_0_rm_1() {
        let change = Add(0);
        let op = Rm(1);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Rm(2)), Some(Add(0)))));
    }

    //                          Op
    //          a,c     ------|Rm(0)|------>        c
    //   Add(1)                                                 Add(0)
    //          a,b,c   ------|Rm(0)|----->         b,c
    #[test]
    pub fn transform_op_add_1_rm_0() {
        let change = Add(1);
        let op = Rm(0);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Rm(0)), Some(Add(0)))));
    }

    // RM - ADD

    //                          Op
    //         a,b,c   ------|Add(0)|----->        X,a,b,c
    //   Rm(1)                                                 Rm(2)
    //           a,c   ------|Add(0)|----->        X,a,c
    #[test]
    pub fn transform_op_rm_1_add_0() {
        let change = Rm(1);
        let op = Add(0);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Add(0)), Some(Rm(2)))));
    }

    //                          Op
    //         a,b,c   ------|Add(1)|----->        a,X,b,c
    //   Rm(0)                                                 Rm(0)
    //           b,c   ------|Add(0)|----->        X,b,c
    #[test]
    pub fn transform_op_rm_0_add_1() {
        let change = Rm(0);
        let op = Add(1);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Add(0)), Some(Rm(0)))));
    }

    //                          Op
    //         a,b,c   ------|Rm(2)|----->        a,b
    //   Move(0, 2)                                           Move(0,1)
    //         b,c,a   ------|Rm(1)|----->        b,a
    #[test]
    pub fn transform_op_mv_02_rm_2() {
        let change = Move(0, 2);
        let op = Rm(2);
        assert_eq!(op.merge_incoming_change(&change), Some((Some(Rm(1)), Some(Move(0, 1)))));
    }
}

#[cfg(test)]
mod proptests {
    use super::*;
    use proptest::prelude::*;

    #[derive(Debug)]
    struct ComposeProblem {
        input: TVec<usize>,
        ops: TVec<AxisOp>,
    }

    impl Arbitrary for AxisOp {
        type Parameters = TVec<usize>;
        type Strategy = BoxedStrategy<AxisOp>;
        fn arbitrary_with(shape: TVec<usize>) -> Self::Strategy {
            let mut ops: BoxedStrategy<AxisOp> = (0usize..shape.len() + 1).prop_map(Add).boxed();
            if shape.len() > 1 {
                ops = ops
                    .prop_union(
                        (0..shape.len(), 0..shape.len() - 1)
                            .prop_map(|(a, b)| Move(a, b + (b >= a) as usize))
                            .boxed(),
                    )
                    .boxed()
            }
            let rms = (0..shape.len()).filter(|&ax| shape[ax] == 1).map(Rm).collect::<Vec<_>>();
            if rms.len() > 0 {
                ops = ops
                    .prop_union((0..rms.len()).prop_map(move |rm| rms[rm].clone()).boxed())
                    .boxed()
            }
            let mergeable: Vec<AxisOp> = shape
                .windows(2)
                .enumerate()
                .filter(|(_, w)| w[0] > 1 && w[1] > 1)
                .map(|(ix, w)| {
                    Reshape(ix, tvec!(w[0].to_dim(), w[1].to_dim()), tvec!((w[0] * w[1]).to_dim()))
                })
                .collect();
            if mergeable.len() > 1 {
                ops = ops
                    .prop_union(
                        (0..mergeable.len()).prop_map(move |ix| mergeable[ix].clone()).boxed(),
                    )
                    .boxed()
            }
            ops
        }
    }

    impl Arbitrary for ComposeProblem {
        type Parameters = ();
        type Strategy = BoxedStrategy<ComposeProblem>;
        fn arbitrary_with(_args: ()) -> Self::Strategy {
            let input = proptest::collection::vec(1usize..4, 1usize..4);
            fn tail(len: usize, shape: TVec<usize>) -> BoxedStrategy<TVec<AxisOp>> {
                if len == 0 {
                    Just(tvec!()).boxed()
                } else {
                    AxisOp::arbitrary_with(shape.clone())
                        .prop_flat_map(move |op| {
                            let mut shape = shape.clone();
                            op.change_shape_array(&mut shape, false).unwrap();
                            tail(len - 1, shape.clone()).prop_map(move |mut t| {
                                t.insert(0, op.clone());
                                t
                            })
                        })
                        .boxed()
                }
            }
            (input, 1usize..=5)
                .prop_flat_map(|(input, len)| (Just(input.clone()), tail(len, input.into())))
                .prop_map(|(input, ops)| ComposeProblem { input: input.into(), ops })
                .boxed()
        }
    }

    impl ComposeProblem {
        pub fn model(&self) -> TractResult<TypedModel> {
            let mut model = TypedModel::default();
            let mut wire = model.add_source("source", i64::fact(&self.input))?;
            for (ix, op) in self.ops.iter().enumerate() {
                wire = model.wire_node(format!("op_{ix}"), op.clone(), &[wire])?[0];
            }
            model.select_output_outlets(&[wire])?;
            Ok(model)
        }

        fn input(&self) -> TractResult<Tensor> {
            unsafe {
                let mut t = Tensor::uninitialized::<i64>(&self.input)?;
                for i in 0..t.len() {
                    t.try_as_plain_mut().unwrap().as_slice_mut().unwrap()[i] = i as i64;
                }
                Ok(t)
            }
        }

        fn check(&self) -> TractResult<()> {
            crate::setup_test_logger();
            let input = self.input()?;
            let model = self.model()?;
            let raw = model.into_runnable()?.run(tvec!(input.clone().into_tvalue()))?;
            let optimized = self.model()?.into_decluttered()?;
            let opt = optimized.into_runnable()?.run(tvec!(input.into_tvalue()))?;
            opt[0].close_enough(&raw[0], false)
        }
    }

    proptest! {
        #[test]
        fn recip(pb in any::<AxisOp>()) {
            assert_eq!(pb.recip().recip(), pb);
        }

        #[test]
        fn axis_ops(pb in any::<ComposeProblem>()) {
            pb.check().unwrap()
        }
    }

    #[test]
    fn add_0_rm_0() {
        let pb = ComposeProblem { input: tvec![1], ops: tvec![Add(0), Rm(0)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_move_01() {
        let pb = ComposeProblem { input: tvec![2], ops: tvec![Add(0), Move(0, 1)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_move_01_add_1() {
        let pb = ComposeProblem { input: tvec![2], ops: tvec![Add(0), Move(0, 1), Add(1)] };
        pb.check().unwrap();
    }

    #[test]
    fn recip_move_01() {
        let op = Move(1, 0);
        assert_eq!(op.recip().recip(), op);
    }

    #[test]
    fn recip_move_20() {
        let op = Move(2, 0);
        assert_eq!(op.recip().recip(), op);
    }

    #[test]
    fn recip_move_02() {
        let op = Move(0, 2);
        assert_eq!(op.recip().recip(), op);
    }

    #[test]
    fn add_0_add_1_move_02() {
        let pb = ComposeProblem { input: tvec![2], ops: tvec![Add(0), Add(1), Move(0, 2)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_add_0() {
        let pb = ComposeProblem { input: tvec![1], ops: tvec![Add(0), Add(0)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_add_0_move_02() {
        let pb = ComposeProblem { input: tvec![2], ops: tvec![Add(0), Add(0), Move(0, 2)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_add_2_move_12() {
        let pb = ComposeProblem { input: tvec![2], ops: tvec![Add(0), Add(2), Move(1, 2)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_add_0_move_02_rm_0() {
        let pb = ComposeProblem { input: tvec![1], ops: tvec![Add(0), Add(0), Move(0, 2), Rm(0)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_add_0_move_20_move_20() {
        let pb =
            ComposeProblem { input: tvec![2], ops: tvec![Add(0), Add(0), Move(2, 0), Move(2, 0)] };
        pb.check().unwrap();
    }

    #[test]
    fn move_01_add_0() {
        let pb = ComposeProblem { input: tvec![1, 1], ops: tvec![Move(0, 1), Add(0)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_move_02_move_02() {
        let pb = ComposeProblem { input: tvec![1, 1], ops: tvec![Add(0), Move(0, 2), Move(0, 2),] };
        pb.check().unwrap();
    }

    #[test]
    fn add_0_add_2_move_20_move_12_rm_2() {
        let pb = ComposeProblem {
            input: tvec![3],
            ops: tvec![Add(0), Add(2), Move(2, 0), Move(1, 2), Rm(2)],
        };
        pb.check().unwrap();
    }

    #[test]
    fn move_02_move_02() {
        let pb = ComposeProblem { input: tvec![2, 1, 1], ops: tvec![Move(0, 2), Move(0, 2)] };
        pb.check().unwrap();
    }

    #[test]
    fn rm_1_perm_10_add_0() {
        let pb = ComposeProblem { input: tvec![1, 1, 2], ops: tvec![Rm(1), Move(0, 1), Add(0)] };
        pb.check().unwrap();
    }

    #[test]
    fn add_2_move_02_move_02() {
        let pb = ComposeProblem { input: tvec![3, 2], ops: tvec![Add(2), Move(0, 2), Move(0, 2)] };
        pb.check().unwrap();
    }

    #[test]
    fn move_01_move_20_move_20() {
        let pb = ComposeProblem {
            input: tvec![2, 3, 2],
            ops: tvec![Move(0, 1), Move(2, 0), Move(2, 0)],
        };
        pb.check().unwrap();
    }

    #[test]
    fn reshape_axes_tracking() {
        let pb = ComposeProblem {
            input: tvec![2, 2, 2],
            ops: tvec![Reshape(0, tvec!(2.to_dim(), 2.to_dim()), tvec!(4.to_dim()))],
        };
        pb.check().unwrap();
    }

    #[test]
    fn simplify_reshape() {
        macro_rules! d {
            ($($dim: expr),*) =>  { tvec!($($dim.to_dim()),*) }
        }
        assert_eq!(Reshape(3, d!(), d!()).simplify(), tvec!());
        assert_eq!(Reshape(3, d!(2, 3), d!(2, 3)).simplify(), tvec!());
        assert_eq!(Reshape(3, d!(1), d!()).simplify(), tvec!(Rm(3)));
        assert_eq!(Reshape(3, d!(), d!(1)).simplify(), tvec!(Add(3)));
        assert_eq!(
            Reshape(3, d!(2, 3, 4), d!(2, 4, 3)).simplify(),
            tvec!(Reshape(4, d!(3, 4), d!(4, 3)))
        );
        assert_eq!(
            Reshape(3, d!(3, 4, 2), d!(4, 3, 2)).simplify(),
            tvec!(Reshape(3, d!(3, 4), d!(4, 3)))
        );
        assert_eq!(
            Reshape(3, d!(1, 2, 3), d!(3, 2)).simplify(),
            tvec!(Rm(3), Reshape(3, d!(2, 3), d!(3, 2)))
        );
        assert_eq!(
            Reshape(3, d!(2, 3), d!(1, 3, 2)).simplify(),
            tvec!(Reshape(3, d!(2, 3), d!(3, 2)), Add(3))
        );
        assert_eq!(
            Reshape(3, d!(2, 3, 1), d!(3, 2)).simplify(),
            tvec!(Rm(5), Reshape(3, d!(2, 3), d!(3, 2)))
        );
        assert_eq!(
            Reshape(3, d!(2, 3), d!(3, 2, 1)).simplify(),
            tvec!(Add(5), Reshape(3, d!(2, 3), d!(3, 2)))
        );
        assert_eq!(
            Reshape(2, d!(2, 2, 1), d!(4)).simplify(),
            tvec!(Rm(4), Reshape(2, d!(2, 2), d!(4)))
        );
        assert_eq!(Reshape(1, d!(1, 2), d!(2)).simplify(), tvec!(Rm(1)));
    }

    macro_rules! s {
        ($($a:expr),*) => {&[ $($a.clone().into()),* ]}
    }

    macro_rules! r {
        ($at: expr ; $($from:expr),* => $($to:expr),*) => {
            AxisOp::Reshape($at, tvec!($($from.into()),*),  tvec!($($to.into()),*))
        }
    }

    #[test]
    fn compute_invalid() {
        assert!(compute_shape_with_tf_rules(s![3, 4, 5], s!(100)).is_err());
    }

    #[test]
    fn compute_with_leading_zero() {
        assert_eq!(&*compute_shape_with_tf_rules(s![3, 4, 5], s!(0, 0, 5)).unwrap(), s![3, 4, 5])
    }

    #[test]
    fn compute_with_leading_zero_with_flatten() {
        assert_eq!(
            &*compute_shape_with_tf_rules(s![2, 3, 5, 7], s!(2, 0, 35)).unwrap(),
            s![2, 3, 35]
        )
    }

    #[test]
    fn compute_with_trailing_zero() {
        assert_eq!(&*compute_shape_with_tf_rules(s![3, 4, 5], s!(3, -1, 0)).unwrap(), s![3, 4, 5])
    }

    #[test]
    fn compute_bug_1() {
        let table = SymbolScope::default();
        let s = table.new_with_prefix("S");
        assert_eq!(
            &*compute_shape_with_tf_rules(s![s, 1, 2, 128], s!(0, 0, -1)).unwrap(),
            s![s, 1, 256]
        )
    }

    #[test]
    fn compute_bug_2() {
        let table = SymbolScope::default();
        let b = table.new_with_prefix("B");
        let s = table.new_with_prefix("S");
        assert_eq!(
            &*compute_shape_with_tf_rules(s![s, b, 2, 128], s!(0, 0, -1)).unwrap(),
            s![s, b, 256]
        )
    }

    #[test]
    fn axis_op_rm_begin() {
        assert_eq!(&*to_axis_ops_with_tf_rules(s![1, 2, 3], s!(2, 3)).unwrap(), &[Rm(0)])
    }

    #[test]
    fn axis_op_rm_end() {
        assert_eq!(&*to_axis_ops_with_tf_rules(s![2, 3, 1], s!(2, 3)).unwrap(), &[Rm(2)])
    }

    #[test]
    fn axis_op_insert_begin() {
        assert_eq!(&*to_axis_ops_with_tf_rules(s![2, 3], s!(1, 2, 3)).unwrap(), &[Add(0)])
    }

    #[test]
    fn axis_op_insert_end() {
        assert_eq!(&*to_axis_ops_with_tf_rules(s![2, 3], s!(2, 3, 1)).unwrap(), &[Add(2)])
    }

    #[test]
    fn axis_op_merge() {
        assert_eq!(
            &*to_axis_ops_with_tf_rules(s![2, 3, 5, 7], s!(2, 0, 35)).unwrap(),
            &[r!(2 ; 5,7 => 35 )]
        )
    }

    #[test]
    fn axis_op_complex() {
        assert_eq!(
            &*to_axis_ops_with_tf_rules(s![1, 2, 3, 5, 7], s!(2, 1, 3, 35, 1)).unwrap(),
            &[Rm(0), Add(1), r!(3 ; 5,7 => 35 ), Add(4)]
        )
    }
}


================================================
FILE: core/src/ops/cnn/conv/block_quant.rs
================================================
use tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};

use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct BlockQuantIntoShape {
    pub shape: TVec<usize>,
}

impl Op for BlockQuantIntoShape {
    fn name(&self) -> StaticName {
        "BlockQuantIntoShape".into()
    }
    op_as_typed_op!();
}

impl EvalOp for BlockQuantIntoShape {
    fn is_stateless(&self) -> bool {
        true
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs).into_tensor();
        let g = input.shape()[0];
        let bqs = input.try_storage_as::<BlockQuantStorage>()?.clone();
        let new_m = self.shape[0];
        let new_k: usize = self.shape[1..].iter().product();
        Ok(tvec!(bqs.into_tensor_with_shape(input.datum_type(), &[g, new_m, new_k]).into_tvalue()))
    }
}

impl TypedOp for BlockQuantIntoShape {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let input = inputs[0];
        let old = input
            .exotic_fact
            .as_ref()
            .and_then(|of| of.downcast_ref::<BlockQuantFact>())
            .context("Expects BlockQuantFact")?;
        let g: usize = input.shape[0].to_usize()?;
        let new_m = self.shape[0];
        let new_k: usize = self.shape[1..].iter().product();
        let bqf_shape = tvec!(g, new_m, new_k);
        let new = BlockQuantFact::new(old.format.clone(), bqf_shape.clone());
        let shape: TVec<TDim> = bqf_shape.iter().map(|d| d.to_dim()).collect();
        let fact = inputs[0].datum_type.fact(&*shape).with_exotic_fact(new);
        Ok(tvec!(fact))
    }
    as_op!();
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct SplitGroupBlockQuant {
    pub group: usize,
}

impl Op for SplitGroupBlockQuant {
    fn name(&self) -> StaticName {
        "SplitGroupBlockQuant".into()
    }

    op_as_typed_op!();
}

impl EvalOp for SplitGroupBlockQuant {
    fn is_stateless(&self) -> bool {
        true
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let bqs = input.try_storage_as::<BlockQuantStorage>()?.clone();
        let mut new_shape: TVec<usize> = input.shape().into();
        let o = new_shape[0];
        new_shape[0] = o / self.group;
        new_shape.insert(0, self.group);
        Ok(tvec!(bqs.into_tensor_with_shape(input.datum_type(), &new_shape).into_tvalue()))
    }
}

impl TypedOp for SplitGroupBlockQuant {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let input = inputs[0];
        let bqf = input
            .exotic_fact
            .as_ref()
            .and_then(|of| of.downcast_ref::<BlockQuantFact>())
            .context("Expect BlockQuantFact")?;
        let o: usize = input.shape[0].to_usize()?;
        ensure!(o % self.group == 0);
        let mut new_shape: TVec<usize> =
            input.shape.iter().map(|d| d.to_usize()).collect::<TractResult<_>>()?;
        new_shape[0] = o / self.group;
        new_shape.insert(0, self.group);
        let exotic_fact = BlockQuantFact::new(bqf.format.clone(), new_shape.clone());
        let fact = inputs[0]
            .datum_type
            .fact(&*new_shape.iter().map(|d| d.to_dim()).collect::<TVec<_>>())
            .with_exotic_fact(exotic_fact);
        Ok(tvec!(fact))
    }
    as_op!();
}


================================================
FILE: core/src/ops/cnn/conv/conv.rs
================================================
use tract_data::itertools::izip;
use tract_linalg::WeightType;
use tract_linalg::block_quant::{BlockQuantFact, PackedBlockQuantFormat};
use tract_num_traits::Zero;

use crate::internal::*;
use crate::model::*;
use crate::ops;
use crate::ops::array::Pad;
use crate::ops::array::PadMode;
use crate::ops::binary::TypedBinOp;
use crate::ops::cast::cast;
use crate::ops::cnn::PaddingSpec::*;
use crate::ops::cnn::conv::block_quant::{BlockQuantIntoShape, SplitGroupBlockQuant};
use crate::ops::cnn::conv::lazy_im2col::LazyIm2Col;
use crate::ops::cnn::conv::lazy_im2col::LazyIm2colParams;
use crate::ops::cnn::wire_reshape_bias_for_bin;
use crate::ops::einsum::EinSum;
use crate::ops::math::{Add, Div, Mul, Sub};
use crate::ops::math::{add, div, mul, sub};
use crate::ops::matmul::ModePicker;
use crate::ops::matmul::optimized::AddMatMulGeometry;
use crate::ops::matmul::optimized::MapOutputAxisToInput;
use crate::ops::matmul::pack::{OptMatMulPack, OptSimpleMatMulPack};
use crate::ops::matmul::quant::wire_ensure_q8_flavour;
use crate::ops::nn::Reduce;

use super::depth_wise::DepthWise;
use super::im2col::Im2Col;
use crate::ops::cnn::conv::KernelFormat;
use crate::ops::cnn::pools::{ConcretePoolGeometry, PoolGeometry, PoolSpec};
use crate::ops::matmul::optimized::{OptMatMul, ProtoFusedSpec};
use crate::ops::nn::{BaseDataShape, DataFormat, DataShape};

use tract_linalg::mmm::{MMMInputFormat, MatMatMul};
use tract_linalg::pack::PackedFormat;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Conv {
    pub pool_spec: PoolSpec,
    pub kernel_fmt: KernelFormat,
    pub group: usize,
    // None -> floats
    // Some(I32) -> output is I32 (use quantized kernels, but output will be i32). last 2 Q inputs
    // are ignored
    // Some(QXX) -> quantized XX, but parameters are ignored (I8, U8, or I32) in favor of last 2 Q inputs
    pub q_params: Option<DatumType>,
}

impl Conv {
    pub fn input_channels(&self) -> usize {
        self.pool_spec.input_channels
    }

    pub fn output_channels(&self) -> usize {
        self.pool_spec.output_channels
    }

    pub fn wire_kernel_as_g_o_ihw(
        &self,
        model: &mut TypedModel,
        name: &str,
        mut kernel: OutletId,
    ) -> TractResult<TVec<OutletId>> {
        let fact = model.outlet_fact(kernel)?;
        if fact.is_exotic() {
            ensure!(self.kernel_fmt == KernelFormat::OIHW && fact.rank() >= 2);
            kernel = model.wire_node(
                format!("{name}.prep_kernel.g"),
                SplitGroupBlockQuant { group: self.group },
                &[kernel],
            )?[0];
            kernel = model.wire_node(
                format!("{name}.prep_kernel.ihw"),
                BlockQuantIntoShape {
                    shape: tvec!(
                        self.output_channels() / self.group,
                        self.input_channels() / self.group
                            * self.pool_spec.kernel_shape.iter().product::<usize>(),
                    ),
                },
                &[kernel],
            )?[0];
            Ok(tvec!(kernel))
        } else {
            for (ix, op) in self
                .kernel_fmt
                .kernel_as_group_o_ihw_ops(&fact.shape, self.group)
                .into_iter()
                .enumerate()
            {
                kernel = model.wire_node(format!("{name}.prep_kernel.{ix}"), op, &[kernel])?[0];
            }
            Ok(tvec!(kernel))
        }
    }

    fn wire_pack_g_o_ihw(
        &self,
        model: &mut TypedModel,
        name: &str,
        format: &dyn MMMInputFormat,
        kernel: OutletId,
    ) -> TractResult<OutletId> {
        let fact = model.outlet_fact(kernel)?;
        let wire = if fact.is_exotic() {
            let fact = model
                .outlet_fact(kernel)?
                .exotic_fact
                .as_ref()
                .and_then(|of| of.downcast_ref::<BlockQuantFact>())
                .context("Only manage BlockQuant")?;
            model.wire_node(
                format!("{name}.prep_kernel.pack"),
                OptSimpleMatMulPack {
                    packed_format: format
                        .downcast_ref::<PackedBlockQuantFormat>()
                        .context("Expect a block quant format")?
                        .clone(),
                    k: fact.k(),
                    m: fact.m(),
                },
                &[kernel],
            )?
        } else {
            let format = format
                .downcast_ref::<PackedFormat>()
                .context("Expect regular packing for numeric weights")?;
            model.wire_node(
                format!("{name}.prep_kernel.pack"),
                OptMatMulPack {
                    packers: vec![format.clone()],
                    k_axis: 2,
                    mn_axis: 1,
                    mode_picker: ModePicker::Single,
                },
                &[kernel],
            )?
        };
        Ok(wire[0])
    }

    // group,bias
    fn wire_bias_as_non_linear(
        &self,
        model: &mut TypedModel,
        name: &str,
        bias: OutletId,
        c_group_axis: usize,
    ) -> TractResult<(ProtoFusedSpec, OutletId)> {
        use tract_linalg::BinOp::Add;
        let fact = model.outlet_fact(bias)?;
        if fact.shape.volume().is_one() {
            Ok((ProtoFusedSpec::BinScalar(2, Add), bias))
        } else {
            let bias = AxisOp::wire_split_axis(
                model,
                format!("{name}.reformat_bias"),
                bias,
                0,
                self.group,
            )?[0];
            let pfs =
                ProtoFusedSpec::BinPerRow(2, Add, MapOutputAxisToInput(tvec!((c_group_axis, 0))));
            Ok((pfs, bias))
        }
    }

    pub unsafe fn wire_as_quant_im2col(
        &self,
        model: &mut TypedModel,
        name: &str,
        wires: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        ensure!(self.q_params.is_some());
        use crate::ops::matmul::quant as qmm;

        let c_dt = self.q_params.unwrap();
        let &[mut x, mut kernel, bias, mut x0, x_scale, mut k0, mut k_scale, y0, y_scale] = wires
        else {
            bail!("Wrong number of inputs")
        };
        wire_ensure_q8_flavour(model, name, &mut kernel, "k", &mut k0, i8::datum_type())?;
        wire_ensure_q8_flavour(model, name, &mut x, "x", &mut x0, i8::datum_type())?;

        let a_fact = model.outlet_fact(kernel)?.clone();
        let b_fact = model.outlet_fact(x)?.clone();

        let (_geo, m, k, n) = self.compute_geo(&b_fact)?;
        let (mmm, packing) = self.choose_impl(&b_fact, &a_fact, m, k, &n)?;
        let output_shape = self.pool_spec.output_shape(&b_fact.shape)?;

        if !model.outlet_fact(k_scale)?.shape.volume().is_one() {
            // requant is performed before geo_reshape, so we need at most one geo axis to the
            // right
            if !output_shape.fmt.c_is_last() {
                k_scale = model.wire_node(
                    format!("{name}.a_scale_axis_fix"),
                    AxisOp::Add(1),
                    &[k_scale],
                )?[0];
            }
        }

        let abc_scale = qmm::combine_scales(model, name, k_scale, x_scale, y_scale)?;

        let im2col = model.wire_node(
            format!("{name}.im2col"),
            Im2Col::new(
                self.pool_spec.clone(),
                self.group,
                k,
                &b_fact.shape,
                mmm.clone(),
                packing,
            )?,
            &[x, x0],
        )?[0];

        let g_o_ihw = self.wire_kernel_as_g_o_ihw(model, name, kernel)?;
        let g_o_ihw_as_i32 =
            model.wire_node(format!("{name}.kernel_as_i32"), cast(i32::datum_type()), &g_o_ihw)?;
        let sum_ker_g_c_k = model.wire_node(
            format!("{name}.sum_ker_g_c_k"),
            Reduce::new(tvec!(2), ops::nn::Reducer::Sum),
            &g_o_ihw_as_i32,
        )?;
        let sum_ker_a_g_c =
            model.wire_node(format!("{name}.rm_k"), AxisOp::Rm(2), &sum_ker_g_c_k)?;
        // align sum_A from G,C to "C" shape: N,HW,G,C (or N,G,C,HW)
        let sum_ker_n_g_c = model.wire_node(
            format!("{name}.sum_ker_n_g_c.axis_0"),
            AxisOp::Add(0),
            &sum_ker_a_g_c,
        )?;
        let hw_position = if self.pool_spec.data_format.c_is_last() { 1 } else { 3 };
        let sum_ker = model.wire_node(
            format!("{name}.sum_ker_n_g_c"),
            AxisOp::Add(hw_position),
            &sum_ker_n_g_c,
        )?;

        ensure!(mmm.packings()[packing].1.downcast_ref::<PackedFormat>().is_some());
        let mut sum_x = model.wire_node(
            format!("{name}.sum_x"),
            super::QSumB { dt: b_fact.datum_type, n, r: mmm.nr(), k },
            &[im2col],
        )?;
        // sum_b is N,G,HW. make it N,HW,G,C or N,G,C,HW
        sum_x = model.wire_node(format!("{name}.add_c"), AxisOp::Add(2), &sum_x)?;
        if self.pool_spec.data_format.c_is_last() {
            sum_x =
                model.wire_node(format!("{name}.transpose_sum_b"), AxisOp::Move(3, 1), &sum_x)?;
        }

        let (mmm_output_shape, c_axis, h_axis) = self.mmm_output_shape(&output_shape)?;
        let bias_name = &model.node(bias.node).name;
        let bias =
            model.wire_node(format!("{bias_name}.cast"), cast(mmm.internal_type()), &[bias])?[0];
        let wire = self.wire_mm_weights_bias(
            model,
            name,
            im2col,
            g_o_ihw[0],
            bias,
            mmm,
            packing,
            i32::datum_type(),
            mmm_output_shape.clone().into(),
            k,
            c_axis,
            h_axis,
        )?;

        let wire = qmm::compensate_zero_points(
            model,
            name,
            wire[0],
            k.to_dim(),
            k0,
            x0,
            sum_ker[0],
            sum_x[0],
        )?;

        let wire = self.wire_remove_group(model, name, &[wire], &mmm_output_shape, c_axis)?;
        let wire = self.wire_rm_n_if_needed(model, name, &wire)?;
        let wire = qmm::requant(model, name, wire[0], c_dt, abc_scale, y0)?;
        Self::wire_geo_reshape(model, name, &[wire], &output_shape)
    }

    pub fn wire_remove_group<D: DimLike>(
        &self,
        model: &mut TypedModel,
        name: &str,
        wire: &[OutletId],
        mmm_output_shape: &[D],
        c_axis: usize,
    ) -> TractResult<TVec<OutletId>> {
        let m = &mmm_output_shape[c_axis];
        let op = if self.group == 1 {
            AxisOp::Rm(c_axis - 1)
        } else {
            AxisOp::Reshape(
                c_axis - 1,
                tvec!(self.group.to_dim(), m.to_dim()),
                tvec!(m.to_dim() * self.group),
            )
        };
        model.wire_node(format!("{name}.reshape_group"), op, wire)
    }

    pub unsafe fn wire_as_im2col_pair(
        &self,
        model: &mut TypedModel,
        name: &str,
        wire: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let &[x, w, bias] = wire else { bail!("Wrong number of inputs") };
        let x_fact = model.outlet_fact(x)?.clone();
        let w_fact = model.outlet_fact(w)?.clone();
        let c_dt = crate::ops::matmul::output_type(x_fact.datum_type);

        let (_, m, k, n) = self.compute_geo(&x_fact)?;
        let (mmm, packing) = self.choose_impl(&x_fact, &w_fact, m, k, &n)?;
        let geo_output_shape = self.pool_spec.output_shape(&x_fact.shape)?;
        let (mmm_output_shape, c_axis, h_axis) = self.mmm_output_shape(&geo_output_shape)?;

        let padding =
            model.add_const(format!("{name}.b0"), Tensor::zero_scalar_dt(x_fact.datum_type)?)?;

        let mut wire: TVec<_> = wire.into();
        wire[0] = model.wire_node(
            format!("{name}.im2col"),
            Im2Col::new(
                self.pool_spec.clone(),
                self.group,
                k,
                &x_fact.shape,
                mmm.clone(),
                packing,
            )?,
            &[wire[0], padding],
        )?[0];

        let g_o_ihw = self.wire_kernel_as_g_o_ihw(model, name, wire[1])?;

        let wire = self
            .wire_mm_weights_bias(
                model,
                name,
                wire[0],
                g_o_ihw[0],
                bias,
                mmm,
                packing,
                c_dt,
                mmm_output_shape.clone().into(),
                k.to_usize().unwrap(),
                c_axis,
                h_axis,
            )
            .context("in wire_opt_matmul")?;

        let wire = self.wire_remove_group(model, name, &wire, &mmm_output_shape, c_axis)?;
        let wire = self.wire_rm_n_if_needed(model, name, &wire)?;
        Self::wire_geo_reshape(model, name, &wire, &geo_output_shape)
    }

    // always have N and G. G is right before C, c_axis point to C, c_axis-1 points to G
    fn mmm_output_shape<D: DimLike>(
        &self,
        output_shape: &BaseDataShape<D, TVec<D>>,
    ) -> TractResult<(TVec<D>, usize, usize)> {
        let geo_collapsed_out: D = output_shape.hw_dims().iter().cloned().product();
        let shape: BaseDataShape<D, TVec<D>> = output_shape.fmt.with_n().from_n_c_hw(
            output_shape.n().cloned().unwrap_or_else(|| 1.into()),
            output_shape.c().clone(),
            tvec!(geo_collapsed_out),
        )?;
        let mut mmm_output_shape: TVec<D> = shape.shape.clone();
        let mut c_axis = shape.c_axis();
        let mut h_axis = shape.h_axis();
        mmm_output_shape[shape.c_axis()] = mmm_output_shape[c_axis].clone() / self.group;
        mmm_output_shape.insert(c_axis, self.group.into());
        if h_axis > c_axis {
            h_axis += 1;
        }
        c_axis += 1;
        Ok((mmm_output_shape, c_axis, h_axis))
    }

    fn wire_rm_n_if_needed(
        &self,
        model: &mut TypedModel,
        name: &str,
        wire: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if self.pool_spec.data_format.has_n() {
            Ok(wire.into())
        } else {
            model.wire_node(format!("{name}.rm_n"), AxisOp::Rm(0), wire)
        }
    }

    fn wire_geo_reshape<D: DimLike>(
        model: &mut TypedModel,
        name: &str,
        wire: &[OutletId],
        output_shape: &BaseDataShape<D, TVec<D>>,
    ) -> TractResult<TVec<OutletId>> {
        let geo_collapsed_out: D = output_shape.hw_dims().iter().cloned().product();
        model
            .wire_node(
                name,
                AxisOp::Reshape(
                    output_shape.h_axis(),
                    tvec!(geo_collapsed_out.to_dim()),
                    output_shape.hw_dims().iter().map(|d| d.to_dim()).collect(),
                ),
                wire,
            )
            .context("in wire_geo_reshape")
    }

    pub unsafe fn wire_as_lazy_im2col(
        &self,
        model: &mut TypedModel,
        name: &str,
        wire: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let &[mut x, kernel, bias] = wire else { bail!("Wrong number of inputs") };
        let mut x_fact = model.outlet_fact(x)?.clone();
        let w_fact = model.outlet_fact(kernel)?.clone();
        let (geo, m, k, n) = self.compute_geo(&x_fact)?;
        let (mmm, packing) = self.choose_impl(&x_fact, &w_fact, m, k, &n)?;
        debug!("{name} as lazy_im2col: m={m} k={k} n={n} {mmm:?}");
        let input_shape = x_fact.shape.as_concrete().unwrap().to_vec();
        let mut geo = geo.to_concrete(&input_shape)?.into_owned();
        let mut input_shape: DataShape = self.pool_spec.data_format.shape(input_shape.into())?;
        let padding = self.pool_spec.computed_padding(input_shape.hw_dims());
        if padding.iter().any(|axis| axis.pad_before != 0 || axis.pad_after != 0) {
            let mut pads = vec![(0, 0); x_fact.rank()];
            for (ix, ax) in padding.iter().enumerate() {
                pads[input_shape.h_axis() + ix] = (ax.pad_before, ax.pad_after);
            }
            let op = crate::ops::array::Pad {
                mode: crate::ops::array::PadMode::Constant(
                    Tensor::zero_scalar_dt(x_fact.datum_type)?.into_arc_tensor(),
                ),
                pads,
            };
            x = model.wire_node(format!("{name}.pad"), op, &[x])?[0];
            let valid_pool_spec = PoolSpec { padding: Valid, ..self.pool_spec.clone() };
            x_fact = model.outlet_fact(x)?.clone();
            let concrete_shape = x_fact.shape.as_concrete().unwrap();
            input_shape = valid_pool_spec.data_format.shape(concrete_shape.into())?;
            geo = valid_pool_spec
                .compute_geo(&x_fact.shape)?
                .to_concrete(concrete_shape)?
                .into_owned();
        }
        let c_dt = crate::ops::matmul::output_type(x_fact.datum_type);
        let c_stride = input_shape.c_stride();
        let size_of_b = x_fact.datum_type.size_of() as isize;
        let n_byte_offsets: Vec<isize> =
            geo.patch.centers_offsets().into_iter().map(|x| x * size_of_b).collect();
        let k_byte_offsets: Vec<isize> = (0..self.input_channels())
            .flat_map(|ici| {
                geo.patch
                    .standard_layout_data_field
                    .iter()
                    .map(move |x| (x + (ici * c_stride) as isize) * size_of_b)
            })
            .collect();
        let (mmm_output_shape, c_axis, h_axis) = self.mmm_output_shape(&geo.output_shape)?;
        let packer = mmm.packings()[packing]
            .1
            .downcast_ref::<PackedFormat>()
            .with_context(|| {
                format_err!(
                    "Quand Im2Col expects regular packed format, got {:?}",
                    mmm.packings()[packing].1
                )
            })?
            .clone();
        let params = LazyIm2colParams { packer, n_byte_offsets, k_byte_offsets };
        let x = model.wire_node(
            format!("{name}.lazyIm2col"),
            LazyIm2Col { params: Arc::new(params) },
            &[x],
        )?[0];

        let kernel = self.wire_kernel_as_g_o_ihw(model, name, kernel)?[0];
        let wire = self.wire_mm_weights_bias(
            model,
            name,
            x,
            kernel,
            bias,
            mmm,
            packing,
            c_dt,
            mmm_output_shape.clone().into(),
            k,
            c_axis,
            h_axis,
        )?;

        let wire = self.wire_remove_group(model, name, &wire, &mmm_output_shape, c_axis)?;
        let wire = self.wire_rm_n_if_needed(model, name, &wire)?;
        Self::wire_geo_reshape(model, name, &wire, &geo.output_shape)
    }

    #[allow(clippy::type_complexity)]
    fn compute_geo(
        &self,
        input_fact: &TypedFact,
    ) -> TractResult<(PoolGeometry, usize, usize, TDim)> {
        let geo = self.pool_spec.compute_geo(&input_fact.shape)?;

        trace!("output channels: {:?}", self.output_channels());
        let m = self.output_channels() / self.group;
        let k = self.input_channels() * self.pool_spec.kernel_shape.iter().product::<usize>()
            / self.group;
        let n: TDim =
            self.pool_spec.output_shape(&input_fact.shape)?.hw_dims().iter().cloned().product();
        Ok((geo, m, k, n))
    }

    fn choose_impl(
        &self,
        input_fact: &TypedFact,
        weight_fact: &TypedFact,
        m: usize,
        k: usize,
        n: &TDim,
    ) -> TractResult<(Box<dyn MatMatMul>, usize)> {
        let w_dt = weight_fact.datum_type;
        let x_dt = input_fact.datum_type;

        let acc = if x_dt.is_float() { x_dt } else { i32::datum_type() };
        if weight_fact.is_exotic() {
            let bqf = weight_fact
                .exotic_fact
                .as_ref()
                .and_then(|of| of.downcast_ref::<BlockQuantFact>())
                .unwrap();
            let weight_type = WeightType::BlockQuant(bqf.format.clone());
            tract_linalg::ops()
                .mmm_impls()
                .iter()
                .filter(|mmm| mmm.internal_type() == acc)
                .flat_map(|mmm| {
                    mmm.packings().iter().enumerate().map(move |(ix, p)| (mmm, ix, &p.0, &p.1))
                })
                .filter(|(_, _, pa, pb)| {
                    pb.precursor() == x_dt.into() && pa.precursor() == weight_type
                })
                .map(|(mmm, p, _, _)| (mmm.clone(), p))
                .min_by_key(|(mmm, _)| {
                    mmm.quality().cost() as isize * 1000 - (mmm.mr() * mmm.nr()) as isize
                })
                .context("Not matmu found")
        } else {
            let mmm = tract_linalg::ops()
                .mmm(acc, Some(m), Some(k), n.to_usize().ok())
                .context("No matmul found")?;
            let packing = mmm
                .packings()
                .iter()
                .position(|p| {
                    p.0.precursor() == w_dt.unquantized().into()
                        && p.1.precursor() == x_dt.unquantized().into()
                })
                .context("No packing found")?;
            Ok((mmm, packing))
        }
    }

    #[allow(clippy::too_many_arguments)]
    fn wire_mm_weights_bias(
        &self,
        model: &mut TypedModel,
        name: &str,
        input: OutletId,
        g_o_ihw: OutletId,
        bias: OutletId,
        mmm: Box<dyn MatMatMul>,
        packing: usize,
        c_datum_type: DatumType,
        mmm_output_shape: ShapeFact,
        k: usize,
        c_m_axis: usize,
        c_n_axis: usize,
    ) -> TractResult<TVec<OutletId>> {
        ensure!(model.outlet_fact(bias)?.datum_type == mmm.internal_type());
        let a_pack = &mmm.packings()[packing].0;
        let packed_ker = self
            .wire_pack_g_o_ihw(model, name, &**a_pack, g_o_ihw)
            .context("in kernel_as_packed_as")?;
        let (mut c_to_a_axis_mapping, mut c_to_b_axis_mapping) = (tvec!(), tvec!());

        c_to_a_axis_mapping.push((c_m_axis - 1, 0)); // Group
        c_to_b_axis_mapping.push((0, 0)); // Batch
        c_to_b_axis_mapping.push((c_m_axis - 1, 1)); // Group

        let geo = AddMatMulGeometry {
            k: k.to_dim(),
            c_to_a_axis_mapping: MapOutputAxisToInput(c_to_a_axis_mapping),
            c_to_b_axis_mapping: MapOutputAxisToInput(c_to_b_axis_mapping),
        };
        let mut ops: Vec<ProtoFusedSpec> =
            vec![ProtoFusedSpec::AddMatMul { geo, a: 1, b: 0, packings: vec![(packing, None)] }];
        let mut wires: TVec<OutletId> = tvec!(input, packed_ker);
        let bias_fact = model.outlet_fact(bias)?;
        if bias_fact.konst.is_none() || !bias_fact.konst.as_ref().unwrap().is_all_zero()? {
            let (fused, bias) = self.wire_bias_as_non_linear(model, name, bias, c_m_axis - 1)?;
            wires.push(bias);
            ops.push(fused);
        }
        ops.push(ProtoFusedSpec::Store(vec![unsafe {
            mmm.c_view(Some(c_m_axis), Some(c_n_axis))
        }]));
        model.wire_node(
            format!("{name}.matmatmul"),
            OptMatMul::new(
                vec![mmm],
                ModePicker::Single,
                c_datum_type.fact(mmm_output_shape),
                Some(c_m_axis),
                Some(c_n_axis),
                ops,
                packing == 0 && self.group == 1,
            )?,
            &wires,
        )
    }

    pub fn wire_as_depth_wise(
        &self,
        model: &mut TypedModel,
        name: &str,
        wire: &[OutletId],
    ) -> TractResult<OutletId> {
        let &[x, kernel, mut bias] = wire else { bail!("Wrong number of inputs") };
        let x_fact = model.outlet_fact(x)?.clone();
        let x_shape = x_fact.shape.as_concrete().unwrap();
        let ConcretePoolGeometry { input_shape, patch, output_shape } =
            self.pool_spec.compute_geo(&x_fact.shape)?.to_concrete(x_shape)?.into_owned();
        let kernel = self.wire_kernel_as_g_o_ihw(model, name, kernel)?;
        let c_axis = self.pool_spec.data_format.shape(x_shape)?.c_axis();
        bias = wire_reshape_bias_for_bin(
            model,
            name,
            bias,
            x_fact.rank(),
            c_axis,
            self.output_channels(),
        )?[0];
        let op = DepthWise::new(patch, input_shape, output_shape);
        Ok(model.wire_node(name, op, &[x, kernel[0], bias])?[0])
    }

    fn declutter_stride_slice_to_downsample(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let spatial_rank = self.pool_spec.rank();
        if let Some(axis) = (0..spatial_rank).find(|&ax| {
            self.pool_spec.stride(ax) > 1
                && self.pool_spec.padding.valid_dim(ax, self.pool_spec.stride(ax) == 1)
                && (self.pool_spec.kernel_shape[ax] == 1
                    || self.pool_spec.dilation(ax).is_multiple_of(self.pool_spec.stride(ax)))
        }) {
            let input_fact = model.outlet_fact(node.inputs[0])?;
            let downsample_factor = self.pool_spec.stride(axis);
            let mut new_op = self.clone();
            if new_op.pool_spec.dilation(axis) > 1 {
                new_op.pool_spec.dilations.as_mut().unwrap()[axis] =
                    new_op.pool_spec.dilations.as_mut().unwrap()[axis].divceil(downsample_factor);
            }
            new_op.pool_spec.strides.as_mut().unwrap()[axis] /= downsample_factor;
            let mut patch = TypedModelPatch::default();
            let mut taps = patch.taps(model, &node.inputs)?;
            let shape = self.pool_spec.data_format.shape(&input_fact.shape)?;
            taps[0] = patch.wire_node(
                format!("{}.downsample.{}", node.name, axis),
                crate::ops::Downsample::new(axis + shape.h_axis(), downsample_factor as isize, 0),
                &[taps[0]],
            )?[0];
            let id = patch.wire_node(&*node.name, new_op, &taps)?[0];
            patch.shunt_outside(model, OutletId::new(node.id, 0), id)?;
            return Ok(Some(patch));
        }
        Ok(None)
    }

    fn declutter_as_einsum(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let (input_facts, output_facts) = model.node_facts(node.id)?;
        let full_input_shape = input_facts[0].shape.to_tvec();
        let input_shape = self.pool_spec.data_format.shape(&full_input_shape)?;
        if self.group == 1
            && self.pool_spec.strides().iter().all(|s| *s == 1)
            && self.pool_spec.dilations().iter().all(|d| *d == 1)
            && self.pool_spec.kernel_shape.iter().product::<usize>() == 1
            && self
                .pool_spec
                .computed_padding(input_shape.hw_dims())
                .iter()
                .all(|pad| pad.pad_after.is_zero() && pad.pad_before.is_zero())
        {
            let mut axes = self.axes_mapping(&input_facts, &output_facts)?;
            let mut patch = TypedModelPatch::new("declutter_as_einsum");
            let mut taps = patch.taps(model, &node.inputs)?;
            let name = &node.name;
            let co = self.output_channels();
            taps[1] =
                self.wire_kernel_as_g_o_ihw(&mut patch, &format!("{name}.filters"), taps[1])?[0];
            taps[1] =
                patch.wire_node(format!("{name}.filters_as_co_ci"), AxisOp::Rm(0), &[taps[1]])?[0];

            while axes.rank(InOut::In(1)) > 0 {
                axes = axes.remove_axis_occurency(InOut::In(1), 0)?;
            }
            axes = axes
                .with_extra_axis_occurency('O', InOut::In(1), 0)?
                .with_extra_axis_occurency('I', InOut::In(1), 1)?;

            let bias_fact = input_facts[2];
            let wire = if self.q_params.is_some() {
                if bias_fact.rank() == 1 {
                    axes = axes.linking('O', (InOut::In(2), 0))?;
                }
                let op = EinSum { axes, operating_dt: i32::datum_type(), q_params: self.q_params };
                patch.wire_node(format!("{name}.einsum"), op, &taps)?[0]
            } else {
                axes = axes.remove_slot(InOut::In(2))?;
                let op = EinSum { axes, operating_dt: input_facts[0].datum_type, q_params: None };
                let mut wire = patch.wire_node(format!("{name}.einsum"), op, &taps[0..2])?[0];

                if !bias_fact.konst.as_ref().map(|f| f.is_zero()).transpose()?.unwrap_or(false) {
                    let bias_current_shape =
                        if bias_fact.rank() == 0 { tvec!() } else { tvec!(co.to_dim()) };
                    let mut bias_shape = tvec!(1.to_dim(); input_shape.rank());
                    if bias_fact.rank() > 0 {
                        bias_shape[input_shape.c_axis()] = co.to_dim();
                    }
                    let b = patch.wire_node(
                        format!("{name}.bias.reshape"),
                        AxisOp::Reshape(0, bias_current_shape, bias_shape),
                        &[taps[2]],
                    )?[0];
                    wire = patch.wire_node(
                        format!("{name}.bias"),
                        crate::ops::math::add(),
                        &[wire, b],
                    )?[0];
                }
                wire
            };
            patch.node_mut(wire.node).name = node.name.to_string();
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
        Ok(None)
    }

    fn declutter_precursor_padding(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        rule_if!(!matches!(
            self.pool_spec.padding,
            ExplicitOnnxPool(_, _, _) | SameLower | SameUpper
        ));
        let prec = model.node(node.inputs[0].node);
        rule_if_some!(pad = prec.op_as::<Pad>());
        rule_if_let!(PadMode::Constant(value) = &pad.mode);
        let shape = self.pool_spec.data_format.shape(&model.outlet_fact(node.inputs[0])?.shape)?;
        rule_if!(value.is_zero()?);
        rule_if!(pad.pads[shape.c_axis()] == (0, 0));
        if self.pool_spec.data_format.has_n() {
            rule_if!(pad.pads[0] == (0, 0));
        }
        let mut before: TVec<usize> = pad.pads[shape.hw_axes()].iter().map(|pair| pair.0).collect();
        let mut after: TVec<usize> = pad.pads[shape.hw_axes()].iter().map(|pair| pair.1).collect();
        if let Explicit(bef, aft) = &self.pool_spec.padding {
            izip!(&mut before, bef).for_each(|(pad, cv)| *pad += cv);
            izip!(&mut after, aft).for_each(|(pad, cv)| *pad += cv);
        }
        let padding = Explicit(before, after);
        let mut new = self.clone();
        new.pool_spec.padding = padding;
        let mut patch = TypedModelPatch::default();
        let mut wire = patch.taps(model, &node.inputs)?;
        wire[0] = patch.tap_model(model, prec.inputs[0])?;
        let wire = patch.wire_node(&node.name, new, &wire)?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        Ok(Some(patch))
    }

    fn declutter_channel_arithmetic_succ(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        rule_if!(self.q_params.is_none());
        rule_if!(self.group == 1);
        rule_if_let!(&[succ_outlet] = &*node.outputs[0].successors);
        let succ = model.node(succ_outlet.node);
        rule_if_some!(bin = succ.op_as::<TypedBinOp>());
        let other_input = succ.inputs[1 - succ_outlet.slot];
        let axes_mapping = model.node_axes_mapping(succ.id)?;
        let input_shape =
            self.pool_spec.data_format.shape(&model.outlet_fact(node.inputs[0])?.shape)?;
        let conv_c_axis = input_shape.c_axis();
        rule_if!(
            axes_mapping.axis((InOut::In(succ_outlet.slot), conv_c_axis))?.inputs
                [1 - succ_outlet.slot]
                .len()
                == 1
        );
        let mut other_expected_shape = tvec!(1.to_dim(); input_shape.rank());
        other_expected_shape[conv_c_axis] = self.output_channels().to_dim();
        rule_if!(*other_expected_shape == *model.outlet_fact(other_input)?.shape);

        let mut patch = TypedModelPatch::default();
        let [input, mut kernel, mut bias] = *patch.taps(model, &node.inputs)? else {
            panic!("Expect three inputs");
        };
        let name = &node.name;
        let succ_name = &succ.name;

        let operand = patch.tap_model(model, other_input)?;

        let renamed_bias = format!("{name}.{succ_name}.bias");
        let renamed_kernel = format!("{name}.{succ_name}.kernel");
        bias = wire_reshape_bias_for_bin(
            &mut patch,
            format!("{renamed_bias}.reshape"),
            bias,
            1,
            0,
            self.output_channels(),
        )?[0];

        let operand = wire_reshape_bias_for_bin(
            &mut patch,
            format!("{renamed_bias}.reshape_operand"),
            operand,
            1,
            0,
            self.output_channels(),
        )?[0];

        let operand_fact = patch.outlet_fact(operand)?.shape.to_tvec();
        let kernel_fact = patch.outlet_fact(kernel)?;
        let mut operand_shape_for_kernel = tvec!(1.to_dim(); 2 + input_shape.hw_rank());
        operand_shape_for_kernel[self.kernel_fmt.o_axis(&kernel_fact.shape)] =
            self.output_channels().to_dim();
        let operand_for_kernel = patch.wire_node(
            format!("{renamed_kernel}.reshape_operand"),
            AxisOp::Reshape(0, operand_fact, operand_shape_for_kernel),
            &[operand],
        )?[0];

        if bin.0.is::<Sub>() && succ_outlet.slot == 0 {
            bias = patch.wire_node(&renamed_bias, sub(), &[bias, operand])?[0];
        } else if bin.0.is::<Sub>() {
            bias = patch.wire_node(&renamed_bias, sub(), &[operand, bias])?[0];
        } else if bin.0.is::<Div>() && succ_outlet.slot == 0 {
            bias = patch.wire_node(&renamed_bias, div(), &[bias, operand])?[0];
            kernel = patch.wire_node(&renamed_kernel, div(), &[kernel, operand_for_kernel])?[0];
        } else if bin.0.is::<Div>() {
            bias = patch.wire_node(&renamed_bias, div(), &[operand, bias])?[0];
            kernel = patch.wire_node(&renamed_kernel, div(), &[operand_for_kernel, kernel])?[0];
        } else if bin.0.is::<Add>() {
            bias = patch.wire_node(&renamed_bias, add(), &[bias, operand])?[0];
        } else if bin.0.is::<Mul>() {
            bias = patch.wire_node(&renamed_bias, mul(), &[bias, operand])?[0];
            kernel = patch.wire_node(&renamed_kernel, mul(), &[kernel, operand_for_kernel])?[0];
        } else {
            return Ok(None);
        };
        let wire = patch.wire_node(&node.name, self.clone(), &[input, kernel, bias])?[0];
        patch.shunt_outside(model, succ_outlet.node.into(), wire)?;
        Ok(Some(patch))
    }
}

impl Op for Conv {
    fn name(&self) -> StaticName {
        "Conv".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut info = self.pool_spec.info();
        info.push(format!("Kernel {:?} (groups:{})", self.kernel_fmt, self.group));
        Ok(info)
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    op_as_typed_op!();
}

impl EvalOp for Conv {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut model = TypedModel::default();
        let wire: TVec<OutletId> = inputs
            .iter()
            .enumerate()
            .map(|(ix, v)| model.add_source(format!("source.{ix}"), v.datum_type().fact(v.shape())))
            .collect::<TractResult<_>>()?;
        let wire = unsafe {
            if self.q_params.is_some() {
                self.wire_as_quant_im2col(&mut model, "im2col-adhoc", &wire)?
            } else {
                self.wire_as_im2col_pair(&mut model, "im2col-adhoc", &wire)?
            }
        };
        model.select_output_outlets(&wire)?;
        model.into_runnable()?.run(inputs)
    }
}

impl TypedOp for Conv {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(self.q_params.is_some() || inputs[0].datum_type.is_float());
        let q_inputs = if self.q_params.is_some() { 6 } else { 0 };
        ensure!(inputs[1].datum_type.is_number() || self.kernel_fmt == KernelFormat::OIHW);
        if inputs.len() != 3 + q_inputs {
            bail!("Wrong number of inputs: expected {} got {}", 3 + q_inputs, inputs.len());
        }
        if self.q_params.is_some() {
            ensure!(inputs[2].datum_type == i32::datum_type());
            ensure!(inputs[3].datum_type == i32::datum_type());
            ensure!(inputs[4].datum_type.is_float());
            ensure!(inputs[5].datum_type == i32::datum_type());
            ensure!(inputs[6].datum_type.is_float());
            ensure!(inputs[7].datum_type == i32::datum_type());
            ensure!(inputs[8].datum_type.is_float());
        }
        ensure!(self.pool_spec.rank() + 2 == inputs[1].shape.len());
        if self.pool_spec.data_format.shape(&*inputs[0].shape)?.c()
            != &self.input_channels().to_dim()
        {
            bail!(
                "Inconsistent convolution: input is {:?}, but kernel expects {} input channels.\n{:?}",
                inputs[0],
                self.input_channels(),
                self
            );
        }
        if let ExplicitOnnxPool(bef, after, _) | Explicit(bef, after) = &self.pool_spec.padding {
            anyhow::ensure!(bef.len() == self.pool_spec.rank());
            anyhow::ensure!(after.len() == self.pool_spec.rank());
        }
        ensure!(
            inputs[2].rank() == 0
                || (inputs[2].rank() == 1
                    && inputs[2].shape.volume() == self.output_channels().to_dim()),
            "Bias should be scalar or a vector with one value per output channel. Output channels is {}, bias is {:?}",
            self.output_channels(),
            inputs[2]
        );
        let mut fact = self.pool_spec.output_facts(inputs)?.remove(0);
        if let Some(dt) = self.q_params {
            fact.datum_type = dt;
        } else {
            ensure!(
                inputs[1].is_exotic() || inputs[0].datum_type == inputs[1].datum_type,
                "Convolution input, weights and bias must have the same type, got {inputs:?}",
            )
        }
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let fact = &inputs[0];
        let shape = self.pool_spec.data_format.shape(&fact.shape)?;
        let mut axes = AxesMapping::disconnected(inputs, outputs)?
            .renaming((InOut::In(0), shape.c_axis()), 'I')?
            .renaming((InOut::Out(0), shape.c_axis()), 'O')?;
        if let Some(n_axis) = shape.n_axis() {
            axes = axes
                .renaming((InOut::In(0), n_axis), 'N')?
                .linking('N', (InOut::Out(0), n_axis))?;
        }
        let h_axis = shape.h_axis();
        let geo = "HWXYZ".chars().chain('a'..);
        let kernel_spatial_shape = &self.pool_spec.kernel_shape;
        let padding = self.pool_spec.computed_padding(shape.hw_dims());
        for ((ix, &dim), repr) in kernel_spatial_shape.iter().enumerate().zip(geo) {
            if dim == 1
                && self.pool_spec.dilation(ix) == 1
                && self.pool_spec.stride(ix) == 1
                && padding[ix].pad_before.is_zero()
                && padding[ix].pad_after.is_zero()
            {
                axes = axes
                    .renaming((InOut::In(0), ix + h_axis), repr)?
                    .linking(repr, (InOut::Out(0), ix + h_axis))?;
            }
        }
        if self.q_params.is_some() {
            for (qp_ix, qp) in inputs.iter().enumerate().skip(3) {
                if qp.rank() == 1 {
                    axes = match qp_ix {
                        3 | 4 => axes.linking('I', (InOut::In(qp_ix), 0))?,
                        5 | 6 => axes.linking('O', (InOut::In(qp_ix), 0))?,
                        7 | 8 => axes.linking('O', (InOut::In(qp_ix), 0))?,
                        _ => unreachable!(),
                    };
                }
            }
        }
        Ok(axes)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        macro_rules! pass {
            ($func:ident) => {
                if let Some(mut r) = self.$func(model, node).context(stringify!($func))? {
                    trace!(stringify!($func));
                    r.push_context(stringify!($func));
                    return Ok(Some(r));
                }
            };
        }
        pass!(declutter_stride_slice_to_downsample);
        pass!(declutter_as_einsum);
        pass!(declutter_channel_arithmetic_succ);
        pass!(declutter_precursor_padding);
        Ok(None)
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let shape = self.pool_spec.data_format.shape(inputs[0].shape.to_tvec())?;
        let kernel_spatial_shape = &self.pool_spec.kernel_shape;
        let output_dims = self.pool_spec.padding.compute(
            shape.hw_dims(),
            kernel_spatial_shape,
            &self
                .pool_spec
                .dilations
                .clone()
                .unwrap_or_else(|| tvec!(1; kernel_spatial_shape.len())),
            &self.pool_spec.strides.clone().unwrap_or_else(|| tvec!(1; kernel_spatial_shape.len())),
        );
        let n_output_points: TDim =
            output_dims.iter().map(|d| d.convoluted.clone()).product::<TDim>();
        let n_output_channels = self.output_channels().to_dim();
        let kernel_surface = kernel_spatial_shape.iter().product::<usize>().to_dim();
        let one = 1.to_dim();
        Ok(tvec!((
            Cost::FMA(inputs[0].datum_type),
            shape.n().cloned().unwrap_or(one)
                * shape.c()
                * n_output_channels
                * n_output_points
                * kernel_surface
                / self.group
        )))
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        if io == InOut::In(1) {
            return Ok(None);
        }
        if io == InOut::In(2)
            && let &AxisOp::Rm(_) = change
        {
            return Ok(Some(AxisChangeConsequence {
                substitute_op: Some(Box::new(self.clone())),
                wire_changes: tvec!(),
            }));
        }
        let full_input_shape = model.outlet_fact(node.inputs[0])?.shape.to_tvec();
        let shape = self.pool_spec.data_format.shape(full_input_shape.clone())?;
        // remove n
        if let Some(n) = shape.n_axis() {
            assert_eq!(n, 0);
            if change == &AxisOp::Rm(n) {
                let op = Conv { pool_spec: self.pool_spec.dispose_n_axis(), ..self.clone() };
                return Ok(Some(AxisChangeConsequence {
                    substitute_op: Some(Box::new(op)),
                    wire_changes: tvec!(
                        (InOut::In(0), change.clone()),
                        (InOut::Out(0), change.clone())
                    ),
                }));
            }
            if change.transform_axis(n).map(|axis| axis > 0).unwrap_or(true) {
                return Ok(None);
            }
        }
        // format swap: chw <-> hwc
        let (new_format, axis_move) = match self.pool_spec.data_format {
            DataFormat::NCHW => {
                (DataFormat::NHWC, AxisOp::Move(shape.c_axis(), full_input_shape.len() - 1))
            }
            DataFormat::CHW => {
                (DataFormat::HWC, AxisOp::Move(shape.c_axis(), full_input_shape.len() - 1))
            }
            DataFormat::NHWC => (DataFormat::NCHW, AxisOp::Move(shape.c_axis(), 1)),
            DataFormat::HWC => (DataFormat::CHW, AxisOp::Move(shape.c_axis(), 0)),
        };
        if *change == axis_move {
            let mut new_op = self.clone();
            new_op.pool_spec.data_format = new_format;
            return Ok(Some(AxisChangeConsequence {
                substitute_op: Some(Box::new(new_op)),
                wire_changes: tvec!(
                    (InOut::In(0), change.clone()),
                    (InOut::Out(0), change.clone())
                ),
            }));
        }
        // geo axis manips
        if model.node_input_facts(node.id)?[1].is_exotic() {
            return Ok(None);
        }
        use AxisOp::*;
        let h_axis = shape.h_axis();
        let hw_axes = shape.hw_axes();
        let kh_axis = self.kernel_fmt.h_axis();
        let (geo_adjusted, kernel_adjusted) = match change {
            Rm(a)
                if hw_axes.contains(a)
                    && hw_axes.len() > 1
                    && self.pool_spec.dilation(a - h_axis) == 1
                    && self.pool_spec.stride(a - h_axis) == 1
                    && self.pool_spec.kernel_shape[a - h_axis] == 1 =>
            {
                let geo_axis = a - h_axis;
                (Rm(geo_axis), Rm(kh_axis + geo_axis))
            }
            Add(a) if hw_axes.contains(a) => (Add(a - h_axis), Add(a - h_axis + kh_axis)),
            Move(f, t) if hw_axes.contains(f) && hw_axes.contains(t) => {
                (Move(f - h_axis, t - h_axis), Move(f - h_axis + kh_axis, t - h_axis + kh_axis))
            }
            _ => return Ok(None),
        };
        let pool_spec = self.pool_spec.change_geo_axes(&geo_adjusted)?;
        let new_op = Conv { pool_spec, ..self.clone() };
        Ok(Some(AxisChangeConsequence {
            substitute_op: Some(Box::new(new_op)),
            wire_changes: tvec!(
                (InOut::In(0), change.clone()),
                (InOut::In(1), kernel_adjusted),
                (InOut::Out(0), change.clone())
            ),
        }))
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let input_fact = model.outlet_fact(node.inputs[0])?;
        unsafe {
            if self.q_params.is_some() {
                let mut patch = TypedModelPatch::new("quantized-codegen");
                let inputs = patch.taps(model, &node.inputs)?;
                let wire = self
                    .wire_as_quant_im2col(&mut patch, &node.name, &inputs)
                    .context("in wire_as_quant_im2col")?;
                patch.shunt_outside(model, node.id.into(), wire[0])?;
                patch.obliterate(node.id)?;
                Ok(Some(patch))
            } else if input_fact
                .shape
                .as_concrete()
                .map(|s| {
                    should_use_lazy(
                        &self.pool_spec.data_format.shape(s.into()).unwrap(),
                        &self.pool_spec,
                        self.group,
                    )
                })
                .unwrap_or(false)
            {
                let mut patch = TypedModelPatch::new("lazy-im2col");
                let inputs = patch.taps(model, &node.inputs)?;
                let wire = self
                    .wire_as_lazy_im2col(&mut patch, &node.name, &inputs)
                    .context("wire_as_lazy_im2col")?[0];
                patch.shunt_outside(model, OutletId::new(node.id, 0), wire)?;
                patch.obliterate(node.id)?;
                Ok(Some(patch))
            } else if self.group != 1
                && self.group == self.output_channels()
                && self.group == self.input_channels()
                && input_fact.shape.as_concrete().is_some()
            {
                let mut patch = TypedModelPatch::new("depth_wise");
                let inputs = patch.taps(model, &node.inputs)?;
                let wire = self
                    .wire_as_depth_wise(&mut patch, &node.name, &inputs)
                    .context("wire_as_depth_wise")?;
                patch.shunt_outside(model, OutletId::new(node.id, 0), wire)?;
                patch.obliterate(node.id)?;
                Ok(Some(patch))
            } else {
                let mut patch = TypedModelPatch::new("im2col");
                let inputs = patch.taps(model, &node.inputs)?;
                let wire = self
                    .wire_as_im2col_pair(&mut patch, &node.name, &inputs)
                    .context("in wire_as_im2col_pair")?[0];
                patch.shunt_outside(model, OutletId::new(node.id, 0), wire)?;
                patch.obliterate(node.id)?;
                Ok(Some(patch))
            }
        }
    }

    as_op!();
}

fn should_use_lazy(input_shape: &DataShape, pool_spec: &PoolSpec, group: usize) -> bool {
    input_shape.n().unwrap_or(&1) == &1
        && group == 1
        && pool_spec.kernel_shape.iter().product::<usize>() > 5
}

#[allow(non_snake_case)]
#[cfg(test)]
mod test {
    use super::*;
    use crate::ops::array::Pad;
    use DataFormat::*;

    #[test]
    fn onnx_basic_convinteger() {
        let op = Conv {
            pool_spec: PoolSpec {
                data_format: NCHW,
                kernel_shape: tvec!(2, 2),
                padding: Valid,
                dilations: None,
                strides: None,
                input_channels: 1,
                output_channels: 1,
            },
            kernel_fmt: KernelFormat::OIHW,
            group: 1,
            q_params: Some(i32::datum_type()),
        };
        let input = tvec!(
            rctensor4(&[[[[1u8, 2, 3], [4, 5, 6], [7, 8, 9]]]]),
            rctensor4(&[[[[1u8, 1], [1, 1]]]]),
            rctensor0(0u32),
            rctensor0(1u8),
            rctensor0(1.0f32),
            rctensor0(0u8),
            rctensor0(1.0f32),
            rctensor0(0i32),
            rctensor0(1.0f32),
        );
        let input = input.into_iter().map(IntoTValue::into_tvalue).collect::<TVec<_>>();
        let output = op.eval(input).unwrap();
        assert_eq!(*output[0], tensor4(&[[[[8i32, 12], [20, 24]]]]));
    }

    #[test]
    fn valid_conv_absorbs_precursor_pad() -> TractResult<()> {
        let mut model = TypedModel::default();
        let wire = tvec!(model.add_source("source", f32::fact(dims!(1, 10)))?);
        let wire = model.wire_node(
            "pad",
            Pad {
                pads: vec![(0, 0), (1, 0)],
                mode: ops::array::PadMode::Constant(rctensor0(0f32)),
            },
            &wire,
        )?;
        let kernel = model.add_const("kernel", rctensor3(&[[[1f32, 2f32]]]))?;
        let bias = model.add_const("bias", rctensor0(0f32))?;
        let wire = model.wire_node(
            "conv",
            Conv {
                pool_spec: PoolSpec {
                    data_format: crate::ops::nn::DataFormat::CHW,
                    dilations: None,
                    strides: None,
                    kernel_shape: tvec![2],
                    padding: Explicit(tvec![0], tvec![0]),
                    input_channels: 1,
                    output_channels: 1,
                },
                kernel_fmt: crate::ops::cnn::KernelFormat::OIHW,
                group: 1,
                q_params: None,
            },
            &[wire[0], kernel, bias],
        )?;
        model.select_output_outlets(&wire)?;
        model.declutter()?;
        assert_eq!(model.nodes().len(), 4); // source + conv + kernel + bias
        let cv = model.nodes()[3].op_as::<Conv>().unwrap();
        assert_eq!(cv.pool_spec.padding, Explicit(tvec![1], tvec![0])); // source + conv
        Ok(())
    }
}


================================================
FILE: core/src/ops/cnn/conv/depth_wise.rs
================================================
use crate::internal::*;
use crate::ops::cnn::Patch;
use crate::ops::cnn::patches::{Zone, ZoneScanner};
use crate::ops::nn::DataShape;
use num_traits::Zero;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct DepthWise {
    patch: Patch,
    input_shape: DataShape,
    output_shape: DataShape,
}

impl Op for DepthWise {
    fn name(&self) -> StaticName {
        "DepthWiseConv".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("{:?}", self.patch)])
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    op_as_typed_op!();
}

impl EvalOp for DepthWise {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let dt = inputs[0].datum_type();
        #[cfg(target_arch = "aarch64")]
        if dt == f16::datum_type() && tract_linalg::arm64::has_fp16() {
            return unsafe {
                eval_t_aarch64fp16::<f16>(
                    self,
                    inputs,
                    |a, b| tract_linalg::arm64::add_f16(a, b),
                    |a, b| tract_linalg::arm64::mul_f16(a, b),
                )
            };
        }
        dispatch_floatlike!(Self::eval_gen(dt)(self, inputs))
    }
}

impl DepthWise {
    fn eval_gen<T: Datum + Copy + num_traits::Zero + ndarray::LinalgScalar>(
        &self,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        unsafe { eval_t_generic::<T>(self, inputs, |a, b| a + b, |a, b| a * b) }
    }
}

impl TypedOp for DepthWise {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        anyhow::ensure!(inputs.len() == 3);
        anyhow::ensure!(
            self.input_shape.c() == self.output_shape.c(),
            "DepthWiseConv must have same input and output channels"
        );
        anyhow::ensure!(
            self.input_shape.c().to_dim() == inputs[2].shape.volume(),
            "DepthWiseConv data has {} channels, bias has {}",
            self.input_shape.c(),
            inputs[2].shape.len()
        );
        Ok(tvec!(inputs[0].datum_type.fact(&self.output_shape.shape)))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let [_input, kernel, _bias] = inputs else {
            bail!("Depthwise expects three inputs");
        };
        let n_output_points = self.patch.output_shape.iter().cloned().product::<usize>();
        Ok(tvec!((
            Cost::FMA(inputs[0].datum_type),
            kernel.shape.volume() * self.input_shape.n().unwrap_or(&1) * n_output_points
        )))
    }

    as_op!();
}

macro_rules! impl_eval {
    ($(#[$meta: meta])* $suffix: ident ) => {
        pastey::paste! {
            $(#[$meta])*
            unsafe fn [<eval_t_ $suffix>]<T: Datum + Copy + num_traits::Zero + ndarray::LinalgScalar>(
                dw: &DepthWise,
                inputs: TVec<TValue>,
                add: impl Fn(T, T) -> T + Copy + 'static,
                mul: impl Fn(T, T) -> T + Copy + 'static,
            ) -> TractResult<TVec<TValue>> {
                let (img, kernel, bias) = args_3!(inputs);
                let mut output = unsafe { Tensor::uninitialized::<T>(&dw.output_shape.shape)? };
                let iptr = img.as_ptr::<T>()?;
                let optr = output.as_ptr_mut::<T>()?;
                let k_stride_i = kernel.strides()[1];
                let n = *dw.input_shape.n().unwrap_or(&1);
                let n_stride_i = *dw.input_shape.n_stride().unwrap_or(&0) as isize;
                let n_stride_o = *dw.output_shape.n_stride().unwrap_or(&0) as isize;
                let c_stride_i = *dw.input_shape.c_stride() as isize;
                let c_stride_o = *dw.output_shape.c_stride() as isize;
                let bias = bias.as_ptr::<T>()?;
                let kptr = kernel.as_ptr::<T>()?;
                unsafe {
                    for n in 0..n as isize {
                        let iptr = iptr.offset(n_stride_i * n);
                        let optr = optr.offset(n_stride_o * n);
                        for zone in &dw.patch.zones {
                            [<process_zone_ $suffix>](
                                dw, zone, c_stride_i, c_stride_o, k_stride_i, iptr, kptr, bias, optr,
                                add, mul,
                            )
                        }
                    }
                }
                Ok(tvec!(output.into_tvalue()))
            }

            #[inline(never)]
            #[allow(clippy::too_many_arguments)]
            $(#[$meta])*
            unsafe fn [<process_zone_ $suffix>]<T: Datum + Copy + Zero>(
                dw: &DepthWise,
                zone: &Zone,
                c_stride_i: isize,
                c_stride_o: isize,
                k_stride_i: isize,
                iptr: *const T,
                kptr: *const T,
                bias: *const T,
                optr: *mut T,
                add: impl Fn(T, T) -> T + Copy + 'static,
                mul: impl Fn(T, T) -> T + Copy + 'static,
                ) { unsafe {
                /*
                   if zone.values_offsets.len() == 2 {
                   self.process_zone_n::<T, 2, 4>(
                   zone, c_stride_i, c_stride_o, k_stride_i, iptr, kptr, bias, optr,
                   )
                   } else if zone.values_offsets.len() == 3 {
                   dw.process_zone_n::<T, 3, 4>(
                   zone, c_stride_i, c_stride_o, k_stride_i, iptr, kptr, bias, optr,
                   )
                   } else */
                if zone.values_offsets.len() == 4 {
                    [<process_zone_n_ $suffix>]::<T, 4, 4>(
                        dw, zone, c_stride_i, c_stride_o, k_stride_i, iptr, kptr, bias, optr, add, mul,
                        )
                        /*
                           } else if zone.values_offsets.len() == 5 {
                           dw.process_zone_n::<T, 5, 2>(
                           zone, c_stride_i, c_stride_o, k_stride_i, iptr, kptr, bias, optr,
                           )
                           } else if zone.values_offsets.len() == 9 {
                           dw.process_zone_n::<T, 9, 1>(
                           zone, c_stride_i, c_stride_o, k_stride_i, iptr, kptr, bias, optr,
                           )
                           */
                } else {
                    zone.visit_output(&dw.patch, |visitor| {
                        for c in 0..*dw.input_shape.c() as isize {
                            let iptr = iptr.offset(c_stride_i * c);
                            let optr = optr.offset(c_stride_o * c);
                            let kptr = kptr.offset(k_stride_i * c);
                            [<inner_loop_ $suffix>]::<T>(iptr, kptr, bias, optr, c, visitor, add, mul)
                        }
                    })
                }
            }}

            #[inline(never)]
            #[allow(clippy::too_many_arguments)]
            $(#[$meta])*
            unsafe fn [<process_zone_n_ $suffix>]<T: Datum + Copy + Zero, const N: usize, const UNROLL: usize>(
                dw: &DepthWise,
                zone: &Zone,
                c_stride_i: isize,
                c_stride_o: isize,
                k_stride_i: isize,
                iptr: *const T,
                kptr: *const T,
                bias: *const T,
                optr: *mut T,
                add: impl Fn(T, T) -> T,
                mul: impl Fn(T, T) -> T,
                ) { unsafe {
                let mut visitor = ZoneScanner::new(zone, &dw.patch);
                let mut ioffset = [0isize; N];
                for i in 0..N {
                    ioffset[i] = zone.values_offsets[i].1;
                }
                let mut k = [T::zero(); N];
                for c in 0..*dw.input_shape.c() as isize {
                    visitor.reset();
                    let iptr = iptr.offset(c_stride_i * c);
                    let optr = optr.offset(c_stride_o * c);
                    for n in 0..N {
                        k[n] = *kptr.offset(k_stride_i * c).add(zone.values_offsets[n].0);
                    }
                    let bias = *bias.offset(c);
                    while !visitor.done {
                        let iptr = iptr.offset(visitor.input_center_offset);
                        let optr = optr.offset(visitor.output_offset);
                        let mut i = 0isize;
                        while i + (UNROLL as isize) < visitor.inner_loop_len as isize {
                            let iptr = iptr.offset(visitor.inner_loop_input_full_stride * i);
                            let optr = optr.offset(visitor.inner_loop_output_stride * i);
                            let mut iptrs = [std::ptr::null(); UNROLL];
                            for u in 0..UNROLL {
                                iptrs[u] = iptr.offset(visitor.inner_loop_input_full_stride * u as isize);
                            }
                            let mut optrs = [std::ptr::null_mut(); UNROLL];
                            for u in 0..UNROLL {
                                optrs[u] = optr.offset(visitor.inner_loop_output_stride * u as isize);
                            }
                            let mut is = [[T::zero(); N]; UNROLL];
                            for u in 0..UNROLL {
                                for n in 0..N {
                                    is[u][n] = *iptrs[u].offset(ioffset[n]);
                                }
                            }
                            let mut ps = [[T::zero(); N]; UNROLL];
                            for u in 0..UNROLL {
                                for n in 0..N {
                                    ps[u][n] = mul(is[u][n], k[n]);
                                }
                            }
                            for u in 0..UNROLL {
                                let mut sum = bias;
                                for n in 0..N {
                                    sum = add(sum, ps[u][n]);
                                }
                                *optrs[u] = sum;
                            }
                            i += UNROLL as isize;
                        }
                        while i < visitor.inner_loop_len as isize {
                            let iptr = iptr.offset(visitor.inner_loop_input_full_stride * i);
                            let optr = optr.offset(visitor.inner_loop_output_stride * i);
                            let mut is = [T::zero(); N];
                            for n in 0..N {
                                is[n] = *iptr.offset(ioffset[n]);
                            }
                            let mut p = [T::zero(); N];
                            for n in 0..N {
                                p[n] = mul(is[n], k[n]);
                            }
                            let mut sum = bias;
                            for n in 0..N {
                                sum = add(sum, p[n]);
                            }
                            *optr = sum;
                            i += 1;
                        }
                        visitor.next_non_inner_axis()
                    }
                }
            }}

            #[inline(never)]
            #[allow(clippy::too_many_arguments)]
            $(#[$meta])*
            unsafe fn [<inner_loop_ $suffix>]<T: Datum + Copy>(
                iptr: *const T,
                kptr: *const T,
                bias: *const T,
                optr: *mut T,
                c: isize,
                visitor: &ZoneScanner,
                add: impl Fn(T, T) -> T,
                mul: impl Fn(T, T) -> T,
                ) { unsafe {
                let mut sum = *bias.offset(c);
                let mut iter = visitor.valid_offsets_ker_in();
                if iter.size_hint() == (3, Some(3)) {
                    let (ix, v) = iter.next().unwrap();
                    let k0 = *kptr.add(ix);
                    let i0 = *iptr.offset(v);
                    let (ix, v) = iter.next().unwrap();
                    let k1 = *kptr.add(ix);
                    let i1 = *iptr.offset(v);
                    let (ix, v) = iter.next().unwrap();
                    let k2 = *kptr.add(ix);
                    let i2 = *iptr.offset(v);
                    sum = add(add(add(sum, mul(k0, i0)), mul(k1, i1)), mul(k2, i2));
                } else {
                    for (ix, v) in iter {
                        let k = *kptr.add(ix);
                        let i = *iptr.offset(v);
                        sum = add(sum, mul(k, i));
                    }
                }
                let optr = optr.offset(visitor.output_offset);
                *optr = sum;
            }}
        }
    }
}

impl_eval!(generic);
impl_eval! {
#[target_feature(enable = "fp16")]
#[cfg(target_arch = "aarch64")]
aarch64fp16
}
//#[target_feature(enable = "fp16")] impl_eval!(aarch64fp16);

/* partial alternative impl that may be relevant when simd gets better */

/*
#[inline(never)]
unsafe fn process_zone_4_f32(
&self,
zone: &Zone,
c_stride_i: isize,
c_stride_o: isize,
k_stride_i: isize,
iptr: *const f32,
kptr: *const f32,
bias: *const f32,
optr: *mut f32,
) {
use std::simd::*;
let mut visitor = ZoneScanner::new(zone, &self.patch);
let ioffset0 = zone.values_offsets[0].1;
let ioffset1 = zone.values_offsets[1].1;
let ioffset2 = zone.values_offsets[2].1;
let ioffset3 = zone.values_offsets[3].1;
for c in 0..*self.input_shape.c() as isize {
visitor.reset();
let kptr = kptr.offset(k_stride_i * c);
let iptr = iptr.offset(c_stride_i * c);
let optr = optr.offset(c_stride_o * c);
let k0 = *kptr.offset(zone.values_offsets[0].0 as isize);
let k1 = *kptr.offset(zone.values_offsets[1].0 as isize);
let k2 = *kptr.offset(zone.values_offsets[2].0 as isize);
let k3 = *kptr.offset(zone.values_offsets[3].0 as isize);
let k0 = f32x4::splat(k0);
let k1 = f32x4::splat(k1);
let k2 = f32x4::splat(k2);
let k3 = f32x4::splat(k3);
let bias = f32x4::splat(*bias.offset(c));
while !visitor.done {
let iptr = iptr.offset(visitor.input_center_offset);
let optr = optr.offset(visitor.output_offset);
let mut i  = 0;
while i + 4 <
for i in 0..visitor.inner_loop_len as isize {
let iptr = iptr.offset(visitor.inner_loop_input_full_stride * i);
let optr = optr.offset(visitor.inner_loop_output_stride * i);
let i0 = *iptr.offset(ioffset0);
let i1 = *iptr.offset(ioffset1);
let i2 = *iptr.offset(ioffset2);
let i3 = *iptr.offset(ioffset3);
let i = f32x4::from_array([i0, i1, i2, i3]);
let p = (i * k).reduce_sum();
let sum = bias + p;
     *optr = sum
     }
     visitor.next_non_inner_axis()
     }
     }
     }
     */

/*
#[inline(never)]
unsafe fn process_zone_4_f32(
&self,
zone: &Zone,
c_stride_i: isize,
c_stride_o: isize,
k_stride_i: isize,
iptr: *const f32,
kptr: *const f32,
bias: *const f32,
optr: *mut f32,
) {
use std::simd::*;
let mut visitor = ZoneScanner::new(zone, &self.patch);
let ioffset0 = zone.values_offsets[0].1;
let ioffset1 = zone.values_offsets[1].1;
let ioffset2 = zone.values_offsets[2].1;
let ioffset3 = zone.values_offsets[3].1;
for c in 0..*self.input_shape.c() as isize {
visitor.reset();
let kptr = kptr.offset(k_stride_i * c);
let iptr = iptr.offset(c_stride_i * c);
let optr = optr.offset(c_stride_o * c);
let k0 = *kptr.offset(zone.values_offsets[0].0 as isize);
let k1 = *kptr.offset(zone.values_offsets[1].0 as isize);
let k2 = *kptr.offset(zone.values_offsets[2].0 as isize);
let k3 = *kptr.offset(zone.values_offsets[3].0 as isize);
let k = f32x4::from_array([k0, k1, k2, k3]);
let bias = *bias.offset(c);
while !visitor.done {
let iptr = iptr.offset(visitor.input_center_offset);
let optr = optr.offset(visitor.output_offset);
for i in 0..visitor.inner_loop_len as isize {
let iptr = iptr.offset(visitor.inner_loop_input_full_stride * i);
let optr = optr.offset(visitor.inner_loop_output_stride * i);
let i0 = *iptr.offset(ioffset0);
let i1 = *iptr.offset(ioffset1);
let i2 = *iptr.offset(ioffset2);
let i3 = *iptr.offset(ioffset3);
let i = f32x4::from_array([i0, i1, i2, i3]);
let p = (i * k).reduce_sum();
let sum = bias + p;
     *optr = sum
     }
     visitor.next_non_inner_axis()
     }
     }
     }
     */

/*
#[inline(never)]
unsafe fn process_zone_4<T: Datum + Copy + ndarray::LinalgScalar>(
&self,
zone: &Zone,
c_stride_i: isize,
c_stride_o: isize,
k_stride_i: isize,
iptr: *const T,
kptr: *const T,
bias: *const T,
optr: *mut T,
) {
let mut visitor = ZoneScanner::new(zone, &self.patch);
let ioffset0 = zone.values_offsets[0].1;
let ioffset1 = zone.values_offsets[1].1;
let ioffset2 = zone.values_offsets[2].1;
let ioffset3 = zone.values_offsets[3].1;
for c in 0..*self.input_shape.c() as isize {
visitor.reset();
let kptr = kptr.offset(k_stride_i * c);
let iptr = iptr.offset(c_stride_i * c);
let optr = optr.offset(c_stride_o * c);
let k0 = *kptr.offset(zone.values_offsets[0].0 as isize);
let k1 = *kptr.offset(zone.values_offsets[1].0 as isize);
let k2 = *kptr.offset(zone.values_offsets[2].0 as isize);
let k3 = *kptr.offset(zone.values_offsets[3].0 as isize);
let bias = *bias.offset(c);
while !visitor.done {
let iptr = iptr.offset(visitor.input_center_offset);
let optr = optr.offset(visitor.output_offset);
for i in 0..visitor.inner_loop_len as isize {
let iptr = iptr.offset(visitor.inner_loop_input_full_stride * i);
let optr = optr.offset(visitor.inner_loop_output_stride * i);
let i0 = *iptr.offset(ioffset0);
let i1 = *iptr.offset(ioffset1);
let i2 = *iptr.offset(ioffset2);
let i3 = *iptr.offset(ioffset3);
let p0 = i0 * k0;
let p1 = i1 * k1;
let p2 = i2 * k2;
let p3 = i3 * k3;
let sum = bias + p0 + p1 + p2 + p3;
     *optr = sum
     }
     visitor.next_non_inner_axis()
     }
     }
     }
     */


================================================
FILE: core/src/ops/cnn/conv/im2col.rs
================================================
use tract_linalg::mmm::{
    EagerPackedInput, MMMInputValue, MatMatMul, PackedExoticFact, PackedMatrixStorage,
};
use tract_linalg::pack::{PackedFormat, PackingWriter};

use crate::internal::*;
use ndarray::prelude::*;
use num_integer::Integer;

use crate::ops::cnn::pools::{ConcretePoolGeometry, PoolGeometry};
use crate::ops::cnn::{GeometryBound, PoolSpec, ResolveTo};
use crate::ops::nn::{BaseDataShape, DataFormat, DataShape};

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Im2Col {
    pub pool_spec: PoolSpec,
    pub group: usize,
    geometry: GeometryBound<SymbolicGeometry, ConcreteGeometry>,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct SymbolicGeometry {
    group: usize,
    pool_spec: PoolSpec,
    pool_geometry: PoolGeometry,
    b_pack: PackedFormat,
    k: usize,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct ConcreteGeometry {
    pool: ConcretePoolGeometry,
    pub n: usize,
    k: usize,
    pub b_pack: PackedFormat,
    pub ci_per_group: usize,
    patcher: Patcher,
    input_shape_with_n: DataShape,
    packed_shape: TVec<usize>, // always Batch,Group
}

impl GeometryBound<SymbolicGeometry, ConcreteGeometry> {
    pub fn b_pack(&self) -> &PackedFormat {
        match self {
            GeometryBound::Symbolic(s) => &s.b_pack,
            GeometryBound::Concrete(s) => &s.b_pack,
        }
    }
    pub fn k(&self) -> usize {
        match self {
            GeometryBound::Symbolic(s) => s.k,
            GeometryBound::Concrete(s) => s.k,
        }
    }
}

impl ResolveTo<ConcreteGeometry> for SymbolicGeometry {
    type Param = [usize];
    fn resolve(&self, input_full_shape: &[usize]) -> TractResult<ConcreteGeometry> {
        let pool = self.pool_geometry.to_concrete(input_full_shape)?.into_owned();
        let patcher = if !pool.patch.padded && pool.patch.rank() == 2 {
            Patcher::Valid2d
        } else if pool.patch.rank() == 2 {
            Patcher::Padded2d
        } else if !pool.patch.padded && pool.patch.rank() == 1 {
            Patcher::Valid1d
        } else {
            Patcher::Generic
        };
        let ci_per_group = pool.input_shape.c_dim() / self.group;
        let n = pool.output_shape.hw_dims().iter().product();
        let input_shape_with_n = match self.pool_spec.data_format {
            DataFormat::HWC => DataFormat::NHWC.from_n_c_hw(
                1,
                *pool.input_shape.c(),
                pool.input_shape.hw_dims(),
            )?,
            DataFormat::CHW => DataFormat::NCHW.from_n_c_hw(
                1,
                *pool.input_shape.c(),
                pool.input_shape.hw_dims(),
            )?,
            _ => pool.input_shape.clone(),
        };
        let packed_shape = Im2Col::packed_shape(&pool.input_shape, self.group)?;
        Ok(ConcreteGeometry {
            pool,
            n,
            k: self.k,
            ci_per_group,
            b_pack: self.b_pack.clone(),
            patcher,
            input_shape_with_n,
            packed_shape,
        })
    }
}

impl Im2Col {
    pub fn new(
        pool_spec: PoolSpec,
        group: usize,
        k: usize,
        input_full_shape: &ShapeFact,
        mmm: Box<dyn MatMatMul>,
        packing: usize,
    ) -> TractResult<Im2Col> {
        let b_pack = mmm.packings()[packing]
            .1
            .downcast_ref::<PackedFormat>()
            .context("Im2Col expects regular packed format")?
            .clone();

        let pool_geometry = pool_spec.compute_geo(input_full_shape)?;
        let geometry: GeometryBound<_, _> =
            SymbolicGeometry { group, pool_spec: pool_spec.clone(), pool_geometry, b_pack, k }
                .into();
        let geometry = geometry.optimize_if(input_full_shape.as_concrete())?;
        Ok(Im2Col { pool_spec, group, geometry })
    }

    // packed shape is Batch,Group
    fn packed_shape<D: DimLike>(
        input_shape: &BaseDataShape<D, TVec<D>>,
        group: usize,
    ) -> TractResult<TVec<D>> {
        let mut output_shape: TVec<D> = tvec!();
        output_shape.push(input_shape.n().cloned().unwrap_or_else(|| 1.into()));
        output_shape.push(group.into());
        Ok(output_shape)
    }
}

impl Op for Im2Col {
    fn name(&self) -> StaticName {
        "Im2col".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("groups:{}", self.group)])
    }

    op_as_typed_op!();
}

impl EvalOp for Im2Col {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, mut inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let geometry = self.geometry.to_concrete(inputs[0].shape())?;
        unsafe {
            let mut input = inputs.remove(0).into_tensor();
            let pad_value: Option<&Tensor> = if inputs.len() > 0 { Some(&inputs[0]) } else { None };
            if !self.pool_spec.data_format.has_n() {
                input.insert_axis(0)?;
            }
            let panel_bytes =
                geometry.b_pack.single_panel_len(geometry.k) * input.datum_type().size_of();

            let n_batches = *geometry.input_shape_with_n.n().unwrap_or(&1);
            let n_groups = self.group;
            let mut values: Vec<Box<dyn MMMInputValue>> = Vec::with_capacity(n_batches * n_groups);

            for i in 0..n_batches {
                let input = input.view_at_prefix(&[i])?;
                for g in 0..n_groups {
                    let n =
                        if geometry.pool.output_shape.shape.contains(&0) { 0 } else { geometry.n };
                    let mut data = Tensor::uninitialized_aligned_dt(
                        input.datum_type(),
                        &[geometry.b_pack.len(geometry.k, n)],
                        geometry.b_pack.alignment(),
                    )?;
                    if n > 0 {
                        dispatch_copy_by_size!(Patcher::patch(input.datum_type())(
                            &geometry.patcher,
                            &geometry,
                            &input,
                            &mut data.view_mut(),
                            g,
                            pad_value
                        ))?;
                    }
                    values.push(Box::new(EagerPackedInput {
                        fact: PackedExoticFact {
                            format: Box::new(geometry.b_pack.clone()),
                            k: geometry.k,
                            mn: n.to_dim(),
                        },
                        packed: data.into_blob()?.into(),
                        panel_bytes: if n > 0 { panel_bytes } else { 0 },
                        mn: n,
                    }));
                }
            }

            let output = PackedMatrixStorage::new_batched(&geometry.packed_shape, values)
                .into_tensor(input.datum_type());
            Ok(tvec!(output.into_tvalue()))
        }
    }
}

impl TypedOp for Im2Col {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let input_shape = self.pool_spec.data_format.shape(inputs[0].shape.to_tvec())?;
        let output_shape = self.pool_spec.output_shape(&inputs[0].shape)?;
        let mn = output_shape.hw_dims().iter().product::<TDim>();
        let pof = PackedExoticFact {
            format: Box::new(self.geometry.b_pack().clone()),
            k: self.geometry.k(),
            mn,
        };
        Ok(tvec!(
            inputs[0]
                .datum_type
                .fact(&[input_shape.n().cloned().unwrap_or(1.into()), self.group.into()])
                .with_exotic_fact(pof)
        ))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let input_fact = model.outlet_fact(node.inputs[0])?;
        if node.inputs.len() == 2
            && model.outlet_fact(node.inputs[1])?.konst.as_ref().and_then(|t| t.as_uniform())
                == Some(Tensor::zero_scalar_dt(input_fact.datum_type)?)
        {
            Ok(Some(
                TypedModelPatch::replace_single_op(model, node, &node.inputs[0..1], self.clone())?
                    .with_context("b0 is zero"),
            ))
        } else {
            Ok(None)
        }
    }
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
enum Patcher {
    Generic,
    Valid1d,
    Valid2d,
    Padded2d,
}

impl Patcher {
    fn patch<'p, T: Copy + Datum + num_traits::Zero>(
        &self,
        geo: &'p ConcreteGeometry,
        input: &TensorView,
        pack: &'p mut TensorView,
        g: usize,
        pad_value: Option<&Tensor>,
    ) -> TractResult<()> {
        match self {
            Patcher::Valid1d => Self::valid_1d::<T>(geo, input, pack, g),
            Patcher::Valid2d => Self::valid_2d::<T>(geo, input, pack, g),
            Patcher::Padded2d => Self::padded_2d::<T>(
                geo,
                input,
                pack,
                g,
                pad_value.unwrap_or(&Tensor::zero_scalar::<T>()?),
            ),
            _ => Self::generic::<T>(
                geo,
                input,
                pack,
                g,
                pad_value.unwrap_or(&Tensor::zero_scalar::<T>()?),
            ),
        }
    }

    #[inline(never)]
    fn generic<'p, T: Copy + Datum>(
        geometry: &'p ConcreteGeometry,
        input: &TensorView,
        pack: &'p mut TensorView,
        g: usize,
        pad_value: &Tensor,
    ) -> TractResult<()> {
        unsafe {
            let pad_value = *pad_value.to_scalar_unchecked();
            let mut mega_matrix = Tensor::uninitialized::<T>(&[geometry.k, geometry.n])?;
            let mut mega_matrix_view = mega_matrix.to_array_view_mut_unchecked::<T>();
            let ptr = input.as_ptr_unchecked::<T>();
            let ptr = ptr.add(geometry.input_shape_with_n.c_stride() * (g * geometry.ci_per_group));
            for (spatial, mut col) in ndarray::indices(&*geometry.pool.patch.output_shape)
                .into_iter()
                .zip(mega_matrix_view.axis_iter_mut(Axis(1)))
            {
                let mut col = col.iter_mut();
                for ci in 0..geometry.ci_per_group {
                    let ptr = ptr.add(geometry.input_shape_with_n.c_stride() * ci);
                    for v in geometry.pool.patch.at(spatial.slice()) {
                        *col.next().expect("geometry error in conv") =
                            v.map(|o| *ptr.offset(o)).unwrap_or(pad_value);
                    }
                }
            }
            geometry.b_pack.pack(pack, mega_matrix.view(), 0, 1);
            Ok(())
        }
    }

    #[inline(never)]
    fn valid_1d<'p, T: Copy + Datum>(
        geometry: &'p ConcreteGeometry,
        input: &TensorView,
        pack: &'p mut TensorView,
        g: usize,
    ) -> TractResult<()> {
        unsafe {
            let x_stride = *geometry.input_shape_with_n.h_stride() as isize
                * geometry.pool.patch.spec.strides[0] as isize;
            let c_stride = *geometry.input_shape_with_n.c_stride() as isize;
            let pack = pack.as_slice_mut_unchecked::<T>();
            let mut writer =
                geometry.b_pack.write_with_k_outer(pack.as_mut_ptr(), geometry.k, geometry.n);
            let iptr = input.as_ptr_unchecked::<T>();
            let iptr = iptr.add(g * geometry.ci_per_group * geometry.input_shape_with_n.c_stride());
            for ci in 0..geometry.ci_per_group {
                let iptr = iptr.offset(ci as isize * c_stride);
                for koffset in &geometry.pool.patch.standard_layout_data_field {
                    let iptr = iptr.offset(*koffset);
                    for x in 0..*geometry.pool.patch.output_shape.get_unchecked(0) {
                        writer.write(*iptr.offset(x as isize * x_stride));
                    }
                }
            }
            Ok(())
        }
    }

    #[inline(never)]
    fn padded_2d<'p, T: Copy + Datum>(
        geometry: &'p ConcreteGeometry,
        input: &TensorView,
        pack: &'p mut TensorView,
        g: usize,
        pad_value: &Tensor,
    ) -> TractResult<()> {
        unsafe {
            let pad_value = *pad_value.to_scalar_unchecked();
            let pack = pack.as_slice_mut_unchecked::<T>();
            let y_stride = geometry.pool.patch.spec.strides[0] as isize;
            let x_stride = geometry.pool.patch.spec.strides[1] as isize;
            let shape = &geometry.input_shape_with_n;
            let y_stride_ptr = y_stride * *shape.h_stride() as isize;
            let x_stride_ptr = x_stride * *shape.w_stride() as isize;
            let c_stride_ptr = *shape.c_stride() as isize;
            let input_heigth = shape.hw_dims()[0] as isize;
            let input_width = shape.hw_dims()[1] as isize;
            let kernel_len = geometry.pool.patch.standard_layout_data_field.len();
            let mut writer =
                geometry.b_pack.write_with_k_outer(pack.as_mut_ptr(), geometry.k, geometry.n);
            let iptr = input.as_ptr_unchecked::<T>();
            let iptr = iptr.add(g * geometry.ci_per_group * shape.c_stride());
            let output_width = *geometry.pool.patch.output_shape.get_unchecked(1);
            for ci in 0..geometry.ci_per_group {
                let iptr = iptr.offset(ci as isize * c_stride_ptr);
                for kitem in 0..kernel_len {
                    let dy = *geometry.pool.patch.data_field.as_ptr().offset(kitem as isize * 2);
                    let dx =
                        *geometry.pool.patch.data_field.as_ptr().offset(1 + kitem as isize * 2);
                    let valid_x_start =
                        Integer::div_ceil(&-dx, &x_stride).max(0).min(output_width as _);
                    let valid_x_end = Integer::div_ceil(&(input_width - dx), &x_stride)
                        .max(0)
                        .min(output_width as _);

                    let iptr = iptr.offset(
                        *geometry.pool.patch.standard_layout_data_field.get_unchecked(kitem),
                    );
                    for yo in 0..*geometry.pool.patch.output_shape.get_unchecked(0) {
                        let y = yo as isize * y_stride + dy;
                        let iptr = iptr.offset(yo as isize * y_stride_ptr);
                        if y >= 0 && y < input_heigth {
                            Self::padded_2d_invalid_x_loop(
                                valid_x_start as usize,
                                pad_value,
                                &mut writer,
                            );
                            Self::padded_2d_valid_x_loop(
                                valid_x_start,
                                valid_x_end,
                                x_stride_ptr,
                                iptr,
                                &mut writer,
                            );
                            Self::padded_2d_invalid_x_loop(
                                output_width - valid_x_end as usize,
                                pad_value,
                                &mut writer,
                            );
                        } else {
                            Self::padded_2d_invalid_x_loop(output_width, pad_value, &mut writer);
                        }
                    }
                }
            }
        }
        Ok(())
    }

    #[inline(never)]
    unsafe fn padded_2d_invalid_x_loop<T: Copy + Datum>(
        count: usize,
        pad_value: T,
        writer: &mut tract_linalg::pack::KOutWriter<T>,
    ) {
        for _ in 0..count {
            writer.write(pad_value);
        }
    }

    #[inline(never)]
    unsafe fn padded_2d_valid_x_loop<T: Copy + Datum>(
        x_min: isize,
        x_max: isize,
        x_stride_ptr: isize,
        iptr: *const T,
        writer: &mut tract_linalg::pack::KOutWriter<T>,
    ) {
        for x in x_min..x_max {
            writer.write(unsafe { *iptr.offset(x * x_stride_ptr) });
        }
    }

    #[inline(never)]
    fn valid_2d<'p, T: Copy + Datum>(
        geometry: &'p ConcreteGeometry,
        input: &TensorView,
        pack: &'p mut TensorView,
        g: usize,
    ) -> TractResult<()> {
        unsafe {
            let pack = pack.as_slice_mut_unchecked::<T>();
            let shape = &geometry.input_shape_with_n;
            let y_stride = geometry.pool.patch.spec.strides[0] as isize;
            let x_stride = geometry.pool.patch.spec.strides[1] as isize;
            let y_stride_ptr = y_stride * *shape.h_stride() as isize;
            let x_stride_ptr = x_stride * *shape.w_stride() as isize;
            let c_stride_ptr = *shape.c_stride() as isize;
            let mut writer =
                geometry.b_pack.write_with_k_outer(pack.as_mut_ptr(), geometry.k, geometry.n);
            let iptr = input.as_ptr_unchecked::<T>();
            let iptr = iptr.add(g * geometry.ci_per_group * shape.c_stride());
            for ci in 0..geometry.ci_per_group {
                let iptr = iptr.offset(ci as isize * c_stride_ptr);
                for koffset in &geometry.pool.patch.standard_layout_data_field {
                    let iptr = iptr.offset(*koffset);
                    for y in 0..*geometry.pool.patch.output_shape.get_unchecked(0) {
                        let iptr = iptr.offset(y as isize * y_stride_ptr);
                        for x in 0..*geometry.pool.patch.output_shape.get_unchecked(1) {
                            writer.write(*iptr.offset(x as isize * x_stride_ptr));
                        }
                    }
                }
            }
            Ok(())
        }
    }
}


================================================
FILE: core/src/ops/cnn/conv/lazy_im2col.rs
================================================
use crate::internal::*;
use crate::ops::matmul::pack::DynPackedExoticFact;
use std::fmt::{Debug, Display};
use std::ops::Range;
use tract_linalg::WeightType;
use tract_linalg::mmm::{MMMInputFormat, MMMInputValue, PackedMatrixStorage};
use tract_linalg::pack::{PackedFormat, PackingWriter};

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct LazyIm2colParams {
    pub packer: PackedFormat,
    pub n_byte_offsets: Vec<isize>,
    pub k_byte_offsets: Vec<isize>,
}

impl MMMInputFormat for LazyIm2colParams {
    fn r(&self) -> usize {
        self.packer.r
    }

    fn precursor(&self) -> WeightType {
        self.packer.precursor()
    }

    fn prepare_tensor(&self, _t: &Tensor, _k_axis: usize, _mn_axis: usize) -> TractResult<Tensor> {
        bail!("Unexpected call to prepare_tensor on LazyIm2Col")
    }

    fn k_alignment(&self) -> usize {
        1
    }

    fn mem_size(&self, k: TDim, mn: TDim) -> TDim {
        k * mn * self.packer.dt.size_of()
    }

    fn extract_at_mn_f16(
        &self,
        _data: &tract_linalg::mmm::EagerPackedInput,
        _mn: usize,
        _slice: &mut [f16],
    ) -> TractResult<()> {
        unimplemented!()
    }
    fn extract_at_mn_f32(
        &self,
        _data: &tract_linalg::mmm::EagerPackedInput,
        _mn: usize,
        _slice: &mut [f32],
    ) -> TractResult<()> {
        unimplemented!()
    }

    fn prepare_one(
        &self,
        _t: &Tensor,
        _k_axis: usize,
        _mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        bail!("Unexpected call to prepare_one on LazyIm2Col")
    }
}

impl Display for LazyIm2colParams {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "LazyIm2Col")
    }
}

impl ExoticFact for LazyIm2colParams {
    fn buffer_sizes(&self) -> TVec<TDim> {
        tvec!(MMMInputFormat::mem_size(
            self,
            self.k_byte_offsets.len().to_dim(),
            self.n_byte_offsets.len().to_dim(),
        ))
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct LazyIm2Col {
    pub params: Arc<LazyIm2colParams>,
}

impl Op for LazyIm2Col {
    fn name(&self) -> StaticName {
        "LazyIm2col".into()
    }

    op_as_typed_op!();
}

impl EvalOp for LazyIm2Col {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let tensor = args_1!(inputs);
        let dt = tensor.datum_type();
        let input: Box<dyn MMMInputValue> =
            Box::new(LazyIm2colInput { tensor, im2col: self.params.clone() });
        let output = PackedMatrixStorage::new_batched(&[1, 1], vec![input]).into_tensor(dt);
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for LazyIm2Col {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let exotic_fact = DynPackedExoticFact {
            k: self.params.k_byte_offsets.len().to_dim(),
            mn: self.params.n_byte_offsets.len().to_dim(),
            packers: vec![self.params.packer.clone()],
        };
        Ok(tvec!(inputs[0].datum_type.fact([1, 1]).with_exotic_fact(exotic_fact)))
    }

    as_op!();
}

#[derive(Clone, Debug, PartialEq, Eq)]
struct LazyIm2colInput {
    tensor: TValue,
    im2col: Arc<LazyIm2colParams>,
}

impl Display for LazyIm2colInput {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{self:?}")
    }
}

impl Hash for LazyIm2colInput {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        (self.tensor.as_bytes(), &self.im2col).hash(state);
    }
}

unsafe impl Send for LazyIm2colInput {}
unsafe impl Sync for LazyIm2colInput {}

impl LazyIm2colInput {
    fn input_8n<T: Datum + Copy>(
        &self,
        writer: &mut impl PackingWriter<T>,
        k_range: Range<isize>,
        n: isize,
    ) {
        let k_byte_offsets = self.im2col.k_byte_offsets.as_ptr();
        let n_byte_offsets = self.im2col.n_byte_offsets.as_ptr();
        unsafe {
            let ptr = self.tensor.as_ptr_unchecked::<u8>();
            let o1 = *n_byte_offsets.offset(n);
            let o2 = *n_byte_offsets.offset(n + 1);
            let o3 = *n_byte_offsets.offset(n + 2);
            let o4 = *n_byte_offsets.offset(n + 3);
            let o5 = *n_byte_offsets.offset(n + 4);
            let o6 = *n_byte_offsets.offset(n + 5);
            let o7 = *n_byte_offsets.offset(n + 6);
            let o8 = *n_byte_offsets.offset(n + 7);
            for k in k_range.start..k_range.end {
                let ptr = ptr.offset(*k_byte_offsets.offset(k));
                let v1 = *(ptr.offset(o1) as *const T);
                let v2 = *(ptr.offset(o2) as *const T);
                let v3 = *(ptr.offset(o3) as *const T);
                let v4 = *(ptr.offset(o4) as *const T);
                let v5 = *(ptr.offset(o5) as *const T);
                let v6 = *(ptr.offset(o6) as *const T);
                let v7 = *(ptr.offset(o7) as *const T);
                let v8 = *(ptr.offset(o8) as *const T);
                writer.write(v1);
                writer.write(v2);
                writer.write(v3);
                writer.write(v4);
                writer.write(v5);
                writer.write(v6);
                writer.write(v7);
                writer.write(v8);
            }
        }
    }

    fn input_6n<T: Datum + Copy>(
        &self,
        writer: &mut impl PackingWriter<T>,
        k_range: Range<isize>,
        n: isize,
    ) {
        unsafe {
            let ptr = self.tensor.as_ptr_unchecked::<u8>();
            let k_byte_offsets = self.im2col.k_byte_offsets.as_ptr();
            let n_byte_offsets = self.im2col.n_byte_offsets.as_ptr();
            let o1 = *n_byte_offsets.offset(n);
            let o2 = *n_byte_offsets.offset(n + 1);
            let o3 = *n_byte_offsets.offset(n + 2);
            let o4 = *n_byte_offsets.offset(n + 3);
            let o5 = *n_byte_offsets.offset(n + 4);
            let o6 = *n_byte_offsets.offset(n + 5);
            for k in k_range.start..k_range.end {
                let ptr = ptr.offset(*k_byte_offsets.offset(k));
                let v1 = *(ptr.offset(o1) as *const T);
                let v2 = *(ptr.offset(o2) as *const T);
                let v3 = *(ptr.offset(o3) as *const T);
                let v4 = *(ptr.offset(o4) as *const T);
                let v5 = *(ptr.offset(o5) as *const T);
                let v6 = *(ptr.offset(o6) as *const T);
                writer.write(v1);
                writer.write(v2);
                writer.write(v3);
                writer.write(v4);
                writer.write(v5);
                writer.write(v6);
            }
        }
    }

    fn input_4n<T: Datum + Copy>(
        &self,
        writer: &mut impl PackingWriter<T>,
        k_range: Range<isize>,
        n: isize,
    ) {
        unsafe {
            let ptr = self.tensor.as_ptr_unchecked::<u8>();
            let k_byte_offsets = self.im2col.k_byte_offsets.as_ptr();
            let n_byte_offsets = self.im2col.n_byte_offsets.as_ptr();
            let o1 = *n_byte_offsets.offset(n);
            let o2 = *n_byte_offsets.offset(n + 1);
            let o3 = *n_byte_offsets.offset(n + 2);
            let o4 = *n_byte_offsets.offset(n + 3);
            for k in k_range.start..k_range.end {
                let ptr = ptr.offset(*k_byte_offsets.offset(k));
                let v1 = *(ptr.offset(o1) as *const T);
                let v2 = *(ptr.offset(o2) as *const T);
                let v3 = *(ptr.offset(o3) as *const T);
                let v4 = *(ptr.offset(o4) as *const T);
                writer.write(v1);
                writer.write(v2);
                writer.write(v3);
                writer.write(v4);
            }
        }
    }

    fn input_2n<T: Datum + Copy>(
        &self,
        writer: &mut impl PackingWriter<T>,
        k_range: Range<isize>,
        n: isize,
    ) {
        unsafe {
            let ptr = self.tensor.as_ptr_unchecked::<u8>();
            let k_byte_offsets = self.im2col.k_byte_offsets.as_ptr();
            let n_byte_offsets = self.im2col.n_byte_offsets.as_ptr();
            let o1 = *n_byte_offsets.offset(n);
            let o2 = *n_byte_offsets.offset(n + 1);
            for k in k_range.start..k_range.end {
                let ptr = ptr.offset(*k_byte_offsets.offset(k));
                let v1 = *(ptr.offset(o1) as *const T);
                let v2 = *(ptr.offset(o2) as *const T);
                writer.write(v1);
                writer.write(v2);
            }
        }
    }

    fn write<T: Datum + Copy>(
        &self,
        writer: &mut impl PackingWriter<T>,
        k_range: std::ops::Range<isize>,
        mn_range: std::ops::Range<isize>,
    ) {
        let mn_end = mn_range.end.min(self.im2col.n_byte_offsets.len() as isize);
        let n_range = mn_range.start..mn_end;
        match n_range.len() {
            8 => return self.input_8n(writer, k_range, n_range.start),
            6 => return self.input_6n(writer, k_range, n_range.start),
            4 => return self.input_4n(writer, k_range, n_range.start),
            2 => return self.input_2n(writer, k_range, n_range.start),
            _ => (),
        }
        unsafe {
            let ptr = self.tensor.as_ptr_unchecked::<u8>();
            let k_byte_offsets = self.im2col.k_byte_offsets.as_ptr();
            let n_byte_offsets = self.im2col.n_byte_offsets.as_ptr();
            for k in k_range.start..k_range.end {
                let ptr = ptr.offset(*k_byte_offsets.offset(k));
                let mut n = n_range.start;
                while n + 8 <= n_range.end {
                    let o1 = *n_byte_offsets.offset(n);
                    let o2 = *n_byte_offsets.offset(n + 1);
                    let o3 = *n_byte_offsets.offset(n + 2);
                    let o4 = *n_byte_offsets.offset(n + 3);
                    let o5 = *n_byte_offsets.offset(n + 4);
                    let o6 = *n_byte_offsets.offset(n + 5);
                    let o7 = *n_byte_offsets.offset(n + 6);
                    let o8 = *n_byte_offsets.offset(n + 7);
                    let v1 = *(ptr.offset(o1) as *const T);
                    let v2 = *(ptr.offset(o2) as *const T);
                    let v3 = *(ptr.offset(o3) as *const T);
                    let v4 = *(ptr.offset(o4) as *const T);
                    let v5 = *(ptr.offset(o5) as *const T);
                    let v6 = *(ptr.offset(o6) as *const T);
                    let v7 = *(ptr.offset(o7) as *const T);
                    let v8 = *(ptr.offset(o8) as *const T);
                    writer.write(v1);
                    writer.write(v2);
                    writer.write(v3);
                    writer.write(v4);
                    writer.write(v5);
                    writer.write(v6);
                    writer.write(v7);
                    writer.write(v8);
                    n += 8;
                }
                while n + 6 <= n_range.end {
                    let o1 = *n_byte_offsets.offset(n);
                    let o2 = *n_byte_offsets.offset(n + 1);
                    let o3 = *n_byte_offsets.offset(n + 2);
                    let o4 = *n_byte_offsets.offset(n + 3);
                    let o5 = *n_byte_offsets.offset(n + 4);
                    let o6 = *n_byte_offsets.offset(n + 5);
                    let v1 = *(ptr.offset(o1) as *const T);
                    let v2 = *(ptr.offset(o2) as *const T);
                    let v3 = *(ptr.offset(o3) as *const T);
                    let v4 = *(ptr.offset(o4) as *const T);
                    let v5 = *(ptr.offset(o5) as *const T);
                    let v6 = *(ptr.offset(o6) as *const T);
                    writer.write(v1);
                    writer.write(v2);
                    writer.write(v3);
                    writer.write(v4);
                    writer.write(v5);
                    writer.write(v6);
                    n += 6;
                }
                while n + 4 <= n_range.end {
                    let o1 = *n_byte_offsets.offset(n);
                    let o2 = *n_byte_offsets.offset(n + 1);
                    let o3 = *n_byte_offsets.offset(n + 2);
                    let o4 = *n_byte_offsets.offset(n + 3);
                    let v1 = *(ptr.offset(o1) as *const T);
                    let v2 = *(ptr.offset(o2) as *const T);
                    let v3 = *(ptr.offset(o3) as *const T);
                    let v4 = *(ptr.offset(o4) as *const T);
                    writer.write(v1);
                    writer.write(v2);
                    writer.write(v3);
                    writer.write(v4);
                    n += 4;
                }
                while n < n_range.end {
                    let o1 = *n_byte_offsets.offset(n);
                    let v1 = *(ptr.offset(o1) as *const T);
                    writer.write(v1);
                    n += 1;
                }
            }
        }
    }
}

impl MMMInputValue for LazyIm2colInput {
    fn scratch_panel_buffer_layout(&self) -> Option<std::alloc::Layout> {
        let k = self.im2col.k_byte_offsets.len();
        Some(self.im2col.packer.single_panel_layout(k, self.tensor.datum_type().size_of()))
    }

    fn panel_bytes(&self, i: usize, buffer: Option<*mut u8>) -> TractResult<*const u8> {
        Ok(dispatch_copy!(Self::do_panel(self.tensor.datum_type())(self, i, buffer)))
    }

    fn k(&self) -> usize {
        self.im2col.k_byte_offsets.len()
    }

    fn mn(&self) -> usize {
        self.im2col.n_byte_offsets.len()
    }

    fn format(&self) -> &dyn MMMInputFormat {
        &*self.im2col
    }

    fn exotic_fact(&self) -> &dyn ExoticFact {
        &*self.im2col
    }
    fn extract_at_mn_f16(&self, _mn: usize, _slice: &mut [f16]) -> TractResult<()> {
        unimplemented!()
    }
    fn extract_at_mn_f32(&self, _mn: usize, _slice: &mut [f32]) -> TractResult<()> {
        unimplemented!()
    }
}

impl LazyIm2colInput {
    fn do_panel<T: Datum + Copy>(&self, i: usize, buffer: Option<*mut u8>) -> *const u8 {
        let r = self.im2col.packer.r;
        let mn_start = i * r;
        let mn_end = (mn_start + self.im2col.packer.r).min(self.im2col.n_byte_offsets.len());
        let k = self.im2col.k_byte_offsets.len();
        let mn_range = mn_start as isize..mn_end as isize;
        let k_range = 0..k as isize;
        let packed = buffer.unwrap();
        if mn_range.len() == r && mn_start.is_multiple_of(r) {
            let mut writer = self.im2col.packer.write_single_panel_with_k_outer(packed as *mut T);
            self.write(&mut writer, k_range, mn_range);
        } else {
            let mut writer = self.im2col.packer.write_with_k_outer(
                packed as *mut T,
                k_range.len(),
                mn_range.len(),
            );
            self.write(&mut writer, k_range, mn_range);
        }
        packed
    }
}


================================================
FILE: core/src/ops/cnn/conv/mod.rs
================================================
mod block_quant;
#[allow(clippy::module_inception)]
mod conv;
mod depth_wise;
mod im2col;
mod lazy_im2col;
mod q_sum_b;

use crate::internal::*;
use crate::ops::cnn::Deconv;

pub use self::conv::Conv;
pub use self::im2col::Im2Col;
pub(crate) use self::q_sum_b::QSumB;

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Default)]
pub enum KernelFormat {
    #[default]
    OIHW,
    HWIO,
    OHWI,
}

impl KernelFormat {
    pub fn h_axis(&self) -> usize {
        match self {
            KernelFormat::OIHW => 2,
            KernelFormat::HWIO => 0,
            KernelFormat::OHWI => 1,
        }
    }

    pub fn spatial_shape<'a, D>(&self, full_shape: &'a [D]) -> &'a [D] {
        &full_shape[self.h_axis()..][..full_shape.len() - 2]
    }

    pub fn hw<'a, D>(&self, full_shape: &'a [D]) -> &'a [D] {
        self.spatial_shape(full_shape)
    }

    pub fn i<'a, D>(&self, full_shape: &'a [D]) -> &'a D {
        match self {
            KernelFormat::OIHW => &full_shape[1],
            KernelFormat::HWIO => &full_shape[full_shape.len() - 2],
            KernelFormat::OHWI => &full_shape[full_shape.len() - 1],
        }
    }

    pub fn o_axis<D>(&self, full_shape: &[D]) -> usize {
        match self {
            KernelFormat::OIHW | KernelFormat::OHWI => 0,
            KernelFormat::HWIO => full_shape.len() - 1,
        }
    }

    pub fn i_axis<D>(&self, full_shape: &[D]) -> usize {
        match self {
            KernelFormat::OIHW => 1,
            KernelFormat::OHWI => full_shape.len() - 1,
            KernelFormat::HWIO => full_shape.len() - 2,
        }
    }

    pub fn o<'a, D>(&self, full_shape: &'a [D]) -> &'a D {
        &full_shape[self.o_axis(full_shape)]
    }

    pub fn input_channels<'s, D: DimLike>(
        &self,
        full_kernel_shape: &'s [D],
        group: usize,
    ) -> Cow<'s, D> {
        match self {
            KernelFormat::OIHW => Cow::Owned(self.i(full_kernel_shape).clone() * group),
            KernelFormat::HWIO | KernelFormat::OHWI => Cow::Borrowed(self.i(full_kernel_shape)),
        }
    }

    pub fn output_channels<'s, D: DimLike>(
        &self,
        full_kernel_shape: &'s [D],
        group: usize,
    ) -> Cow<'s, D> {
        match self {
            KernelFormat::OIHW => Cow::Borrowed(self.o(full_kernel_shape)),
            KernelFormat::HWIO | KernelFormat::OHWI => {
                Cow::Owned(self.o(full_kernel_shape).clone() * group)
            }
        }
    }

    pub fn kernel_as_group_o_i_h_w_ops(
        &self,
        full_shape: &[impl DimLike],
        group: usize,
    ) -> TVec<AxisOp> {
        let geo_rank = full_shape.len() - 2;
        match self {
            // g is on i
            KernelFormat::HWIO => {
                tvec!(
                    AxisOp::Reshape(
                        geo_rank,
                        tvec!(self.i(full_shape).to_dim()),
                        tvec!(group.to_dim(), self.i(full_shape).to_dim() / group),
                    ), // h w g i o
                    AxisOp::Move(geo_rank, 0),     // g h w i o
                    AxisOp::Move(geo_rank + 2, 1), // g o h w i
                    AxisOp::Move(geo_rank + 2, 2)
                ) // g o i h w
            }
            // g is on o
            KernelFormat::OIHW => {
                tvec!(AxisOp::Reshape(
                    0,
                    tvec!(self.o(full_shape).to_dim()),
                    tvec!(group.to_dim(), self.o(full_shape).to_dim() / group),
                ))
            }
            // g is on i
            KernelFormat::OHWI => {
                tvec!(
                    AxisOp::Reshape(
                        geo_rank + 1,
                        tvec!(self.i(full_shape).to_dim()),
                        tvec!(group.to_dim(), self.i(full_shape).to_dim() / group),
                    ), // o h w g i
                    AxisOp::Move(geo_rank + 1, 0), // g o h w i
                    AxisOp::Move(geo_rank + 2, 2)
                )
            }
        }
    }

    pub fn kernel_as_group_o_i_hw_ops(
        &self,
        full_shape: &[impl DimLike],
        group: usize,
    ) -> TVec<AxisOp> {
        let mut ops = self.kernel_as_group_o_i_h_w_ops(full_shape, group);
        if self.hw(full_shape).len() > 1 {
            ops.push(AxisOp::Reshape(
                3,
                self.hw(full_shape).iter().map(|t| t.to_dim()).collect(),
                tvec!(self.hw(full_shape).iter().map(|t| t.to_dim()).product()),
            ));
        }
        ops
    }

    pub fn kernel_as_group_o_ihw_ops(
        &self,
        full_shape: &[impl DimLike],
        group: usize,
    ) -> TVec<AxisOp> {
        let i = (self.input_channels(full_shape, group).into_owned() / group).to_dim();
        let hw = self.hw(full_shape).iter().map(|t| t.to_dim()).product::<TDim>();
        let mut ops = self.kernel_as_group_o_i_hw_ops(full_shape, group);
        ops.push(AxisOp::Reshape(2, tvec!(i.clone(), hw.clone()), tvec!(i * hw)));
        ops
    }

    pub fn kernel_as_group_o_i_hw(&self, kernel: &Tensor, group: usize) -> TractResult<Tensor> {
        let mut kernel = kernel.clone();
        let ops = self.kernel_as_group_o_i_hw_ops(kernel.shape(), group);
        for op in &ops {
            op.change_tensor(&mut kernel, false)?;
        }
        Ok(kernel)
    }

    pub fn kernel_as_group_o_ihw(&self, kernel: &Tensor, group: usize) -> TractResult<Tensor> {
        let group_o_i_hw = self.kernel_as_group_o_i_hw(kernel, group)?;
        Ok(group_o_i_hw.collapse_axis_with_next(2))
    }
}

pub fn rewrite_kernel_conv_in_oihw(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    rewrite_kernel_in_oihw(
        model,
        node,
        name,
        conv.kernel_fmt,
        conv.group,
        Box::new(Conv { kernel_fmt: KernelFormat::OIHW, ..conv.clone() }),
    )
}

pub fn rewrite_kernel_deconv_in_oihw(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Deconv,
) -> TractResult<Option<TypedModelPatch>> {
    rewrite_kernel_in_oihw(
        model,
        node,
        name,
        conv.kernel_format,
        conv.group,
        Box::new(Deconv { kernel_format: KernelFormat::OIHW, ..conv.clone() }),
    )
}

fn rewrite_kernel_in_oihw(
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    fmt: KernelFormat,
    group: usize,
    new: Box<dyn TypedOp>,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(fmt != KernelFormat::OIHW);
    let mut patch = TypedModelPatch::default();
    let mut wire = patch.taps(model, &node.inputs)?;
    let prefix = format!("{name}.kernel_reorg");
    for (ix, op) in fmt
        .kernel_as_group_o_i_h_w_ops(&patch.outlet_fact(wire[1])?.shape, group)
        .into_iter()
        .enumerate()
    {
        wire[1] = patch.wire_node(format!("{prefix}.{ix}"), op, &[wire[1]])?[0];
    }
    wire[1] =
        AxisOp::wire_collapse_axis(&mut patch, format!("{name}.kernel_reorg_go"), wire[1], 0)?[0];
    wire = patch.wire_node(name, new, &wire)?;
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/cnn/conv/q_sum_b.rs
================================================
use crate::internal::*;
use tract_linalg::mmm::{MMMInputValue, PackedMatrixStorage};
use tract_ndarray::prelude::*;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct QSumB {
    pub dt: DatumType,
    pub r: usize,
    pub n: TDim,
    pub k: usize,
}

impl Op for QSumB {
    fn name(&self) -> StaticName {
        "QSumB".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("r:{}, n:{}, k:{}", self.r, self.n, self.k)])
    }

    op_as_typed_op!();
}

impl EvalOp for QSumB {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let n = self.n.eval_to_i64(&session.resolved_symbols)? as usize;
        self.eval(inputs, n)
    }
}

impl TypedOp for QSumB {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut shape: TVec<TDim> = inputs[0].shape.to_tvec();
        shape.push(self.n.to_dim());
        Ok(tvec!(i32::fact(shape)))
    }
}

impl QSumB {
    fn eval(&self, inputs: TVec<TValue>, n: usize) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let storage = input
            .try_storage_as::<PackedMatrixStorage>()
            .context("Expected PackedMatrixStorage")?;
        let batch_shape = storage.batch_shape();
        let mut shape: TVec<usize> = batch_shape.into();
        shape.push(n);
        let mut output = ArrayD::<i32>::zeros(&*shape);
        for b in 0..batch_shape[0] {
            let mut output_view = output.index_axis_mut(Axis(0), b);
            for g in 0..batch_shape[1] {
                let mut output_view = output_view.index_axis_mut(Axis(0), g);
                let output_slice = output_view.as_slice_mut().unwrap();
                let payload = storage.value_at(&[b, g]);
                match self.dt.unquantized() {
                    DatumType::I8 => self.eval_t::<i8>(payload, output_slice)?,
                    DatumType::U8 => self.eval_t::<u8>(payload, output_slice)?,
                    dt => bail!("Unsupported input type in quantized operation ({:?})", dt),
                }
            }
        }
        Ok(tvec!(output.into_tvalue()))
    }

    fn eval_t<T: Datum + tract_num_traits::AsPrimitive<i32>>(
        &self,
        input: &dyn MMMInputValue,
        output: &mut [i32],
    ) -> TractResult<()> {
        let (r, k, n) = (input.format().r(), input.k(), input.mn());
        let panels = n.divceil(r);
        for ipanel in 0..panels {
            let panel = input.panel_bytes(ipanel, None)?;
            let panel: &[T] = unsafe { std::slice::from_raw_parts(panel as *const T, r * k) };
            let mut vec = vec![0i32; r];
            for ik in 0..k {
                for ir in 0..r {
                    vec[ir] += panel[ik * r + ir].as_();
                }
            }
            let len = r.min(n - r * ipanel);
            output[r * ipanel..][..len].copy_from_slice(&vec[..len]);
        }
        Ok(())
    }
}


================================================
FILE: core/src/ops/cnn/deconv/deconv.rs
================================================
use crate::internal::*;
use crate::ops::array::MultiBroadcastTo;
use crate::ops::cnn::KernelFormat;
use crate::ops::cnn::PoolSpec;
use crate::ops::cnn::wire_reshape_bias_for_bin;
use crate::ops::einsum::EinSum;

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct Deconv {
    pub pool_spec: PoolSpec,
    pub kernel_format: KernelFormat,
    pub adjustments: TVec<usize>,
    pub group: usize,
}

impl Deconv {
    fn wire_with_deconv_sum(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_shape = target.outlet_fact(inputs[0])?.shape.clone();
        let shape = self.pool_spec.data_format.shape(input_shape.to_tvec())?;
        let geo_dim = shape.hw_dims().iter().product();

        // collapse H and W together in input: (N) I HW or (N) HW I
        let mut input = target.wire_node(
            format!("{name}.reshaped_input"),
            AxisOp::Reshape(shape.h_axis(), shape.hw_dims().into(), tvec!(geo_dim)),
            &[inputs[0]],
        )?;

        // rework input to (N) (G) I/G HW or (N) (G) HW I/G
        if self.group != 1 {
            // input is (N) HW I or (N) I HW
            let i_axis = self.pool_spec.data_format.has_n() as usize
                + self.pool_spec.data_format.c_is_last() as usize;
            let i_dim = target.outlet_fact(input[0])?.shape[i_axis].clone();
            input = target.wire_node(
                format!("{name}.reshaped_input_for_group"),
                AxisOp::Reshape(
                    i_axis,
                    tvec![i_dim.clone()],
                    tvec!(self.group.to_dim(), i_dim / self.group),
                ),
                &input,
            )?;
            if self.pool_spec.data_format.c_is_last() {
                input = target.wire_node(
                    format!("{name}.group_axis_left"),
                    AxisOp::Move(
                        self.pool_spec.data_format.has_n() as usize + 1,
                        self.pool_spec.data_format.has_n() as usize,
                    ),
                    &input,
                )?;
            }
        }

        let mut kernel = tvec!(inputs[1]);
        let kernel_fact = target.outlet_fact(kernel[0])?.clone();
        for (ix, op) in self
            .kernel_format
            .kernel_as_group_o_i_hw_ops(&kernel_fact.shape, self.group)
            .into_iter()
            .enumerate()
        {
            kernel = target.wire_node(format!("{name}.kernel.{ix}"), op, &kernel)?;
        }

        kernel = target.wire_node(format!("{name}.kernel.mv_i"), AxisOp::Move(2, 3), &kernel)?;
        kernel =
            AxisOp::wire_collapse_axis(target, format!("{name}.kernel.col_ohw"), kernel[0], 1)?;
        if self.group == 1 {
            kernel = target.wire_node(format!("{name}.kernel.rm_g"), AxisOp::Rm(0), &kernel)?;
        }
        let mut expr = if self.pool_spec.data_format.c_is_last() {
            "gmk,Ngnk->Ngmn".to_string()
        } else {
            "gmk,Ngkn->Ngmn".to_string()
        };
        if !self.pool_spec.data_format.has_n() {
            expr = expr.replace('N', "");
        }
        if self.group == 1 {
            expr = expr.replace('g', "");
        }
        let einsum = target.wire_node(
            format!("{name}.einsum"),
            EinSum { axes: expr.parse()?, operating_dt: kernel_fact.datum_type, q_params: None },
            &[kernel[0], input[0]],
        )?;

        let mut bias = wire_reshape_bias_for_bin(
            target,
            format!("{name}.reshape_bias"),
            inputs[2],
            shape.rank(),
            shape.c_axis(),
            self.pool_spec.output_channels,
        )?[0];
        let output_shape = super::output_shape(&self.pool_spec, &shape.shape, &self.adjustments)?;
        bias = target.wire_node(
            format!("{name}.broadcast_bias"),
            MultiBroadcastTo { shape: output_shape.into() },
            &[bias],
        )?[0];

        // einsum must be (N_)CHkWk_HW
        let deconv_sum = target.wire_node(
            format!("{name}.deconv_sum"),
            super::deconv_sum::DeconvSum::new(
                self.pool_spec.clone(),
                self.kernel_format,
                input_shape,
                self.adjustments.clone(),
                self.group,
            ),
            &[einsum[0], bias],
        )?;
        Ok(deconv_sum)
    }
}

impl Op for Deconv {
    fn name(&self) -> StaticName {
        "Deconv".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("{:?}", self.pool_spec)])
    }

    op_as_typed_op!();
}

impl EvalOp for Deconv {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        ensure!(inputs.len() == 3);
        let mut model = TypedModel::default();
        let inputs = inputs
            .into_iter()
            .enumerate()
            .map(|(ix, input)| model.add_const(format!("s{ix}"), input.into_tensor()))
            .collect::<TractResult<TVec<OutletId>>>()?;
        let output = self.wire_with_deconv_sum("adhoc", &mut model, &inputs)?;
        model.select_output_outlets(&output)?;
        model.into_runnable()?.run(tvec![]).context("In adhoc deconvolution eval")
    }
}

impl TypedOp for Deconv {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 3);
        let x_fact = inputs[0];
        let k_fact = inputs[1];
        ensure!(
            &self.pool_spec.input_channels.to_dim()
                == self.pool_spec.data_format.shape(&inputs[0].shape)?.c()
        );
        ensure!(
            self.pool_spec.input_channels.to_dim()
                == *self.kernel_format.input_channels(&k_fact.shape, self.group)
        );
        let output_shape = super::output_shape(&self.pool_spec, &x_fact.shape, &self.adjustments)?;
        Ok(tvec!(x_fact.datum_type.fact(&output_shape)))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let fact = &inputs[0];
        let k_fact = &inputs[1];
        let shape = self.pool_spec.data_format.shape(&fact.shape)?;
        let mut axes = AxesMapping::disconnected(inputs, outputs)?
            .renaming((InOut::In(0), shape.c_axis()), 'I')?
            .renaming((InOut::Out(0), shape.c_axis()), 'O')?;
        if let Some(n_axis) = shape.n_axis() {
            axes = axes
                .renaming((InOut::In(0), n_axis), 'N')?
                .linking('N', (InOut::Out(0), n_axis))?;
        }
        let h_axis = shape.h_axis();
        let geo = "HWXYZ".chars().chain('a'..);
        let kernel_spatial_shape = self.kernel_format.spatial_shape(&k_fact.shape);
        for ((ix, dim), repr) in kernel_spatial_shape.iter().enumerate().zip(geo) {
            if dim.is_one()
                && self.pool_spec.stride(ix) == 1
                && self.pool_spec.padding.valid_dim(ix, true)
                && self.adjustments[ix] == 0
            {
                axes = axes
                    .renaming((InOut::In(0), ix + h_axis), repr)?
                    .linking((InOut::In(0), ix + h_axis), (InOut::Out(0), ix + h_axis))?;
            }
        }
        Ok(axes)
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut patch = TypedModelPatch::default();
        let inputs = patch.taps(model, &node.inputs)?;
        let output = self
            .wire_with_deconv_sum(&node.name, &mut patch, &inputs)
            .context("In wire_with_deconv_sum")?;
        patch.shunt_outside(model, node.id.into(), output[0])?;
        Ok(Some(patch))
    }

    as_op!();
}


================================================
FILE: core/src/ops/cnn/deconv/deconv_sum.rs
================================================
#![allow(dead_code)]

use std::ops::AddAssign;

use crate::internal::*;
use crate::ops::cnn::padding::ComputedPaddedDim;
use crate::ops::cnn::{KernelFormat, PoolSpec};
use crate::ops::nn::DataShape;
use tract_ndarray::prelude::*;
use tract_num_traits::Float;

/*
(N) (G) C   H   W
Reshaped Input (N) (G) C   HW
Kernel         (N) (G) OHkWk   C
Gemm           (N) (G) OHkWk   HW              (Gemm: m: OHkWk k:C n:HW)
DeconvSum      (N) (G) O   H'  W'
*/

// f32, ndarray::indices in order

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct DeconvSum {
    pub pool_spec: PoolSpec,
    pub kernel_format: KernelFormat,
    /// shape of the deconvolution input
    pub input_shape: ShapeFact,
    pub adjustments: TVec<usize>,
    pub group: usize,
}

impl Op for DeconvSum {
    fn name(&self) -> StaticName {
        "DeconvSum".into()
    }

    op_as_typed_op!();
}

impl EvalOp for DeconvSum {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        self.eval_with_values(inputs, &session.resolved_symbols)
    }
}

impl DeconvSum {
    fn eval_with_values(
        &self,
        inputs: TVec<TValue>,
        values: &SymbolValues,
    ) -> TractResult<TVec<TValue>> {
        let (gemm, bias) = args_2!(inputs);
        let input_shape = self.input_shape.eval_to_usize(values)?.into_owned();
        let input_shape = self.pool_spec.data_format.shape(input_shape)?;
        let output_shape =
            super::output_shape(&self.pool_spec, &input_shape.shape, &self.adjustments)?;
        let output_shape = self.pool_spec.data_format.shape(output_shape)?;
        let spatial_output_details = self.pool_spec.padding.compute_for_deconv(
            input_shape.hw_dims(),
            &self.pool_spec.kernel_shape,
            &self.pool_spec.dilations(),
            &self.pool_spec.strides(),
            &self.adjustments,
        )?;
        let mut tensor = bias.into_tensor();
        let hw = *gemm.shape().last().unwrap();
        let n = *output_shape.n().unwrap_or(&1);
        let n_o_hkwk_hw = gemm.into_tensor().into_shape(&[
            n,
            *output_shape.c(),
            self.pool_spec.kernel_shape.iter().product(),
            hw,
        ])?;
        if !self.pool_spec.data_format.has_n() {
            tensor.insert_axis(0)?;
        }
        eval(
            self,
            &input_shape,
            &output_shape,
            &spatial_output_details,
            &n_o_hkwk_hw,
            &mut tensor,
        )?;
        if !self.pool_spec.data_format.has_n() {
            tensor.remove_axis(0)?;
        }
        Ok(tvec!(tensor.into_tvalue()))
    }
}

impl TypedOp for DeconvSum {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 2);
        let shape = super::output_shape(&self.pool_spec, &self.input_shape, &self.adjustments)?;
        ensure!(*inputs[1].shape == *shape);
        Ok(tvec!(inputs[0].datum_type.fact(shape)))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        target.wire_node(
            &node.name,
            Self { input_shape: self.input_shape.eval(values)?.into_owned(), ..self.clone() },
            &[mapping[&node.inputs[0]], mapping[&node.inputs[1]]],
        )
    }

    as_op!();
}

fn eval(
    op: &DeconvSum,
    input_shape: &DataShape,
    output_shape: &DataShape,
    spatial_output_details: &[ComputedPaddedDim<usize>],
    n_o_hkwk_hw: &Tensor,
    output: &mut Tensor,
) -> TractResult<()> {
    let dt = output.datum_type();
    unsafe {
        #[cfg(target_arch = "aarch64")]
        if dt == f16::datum_type() && tract_linalg::arm64::has_fp16() {
            return eval_t_aarch64fp16::<f16>(
                op,
                input_shape,
                output_shape,
                spatial_output_details,
                n_o_hkwk_hw,
                output,
                |a, b| tract_linalg::arm64::add_f16(a, b),
            );
        }
        dispatch_floatlike!(eval_t_generic(dt)(
            op,
            input_shape,
            output_shape,
            spatial_output_details,
            n_o_hkwk_hw,
            output,
            |a, b| a + b
        ))
    }
}

macro_rules! impl_eval {
        ($(#[$meta: meta])* $suffix: ident) => {
            pastey::paste! {
                $(#[$meta])*
                    unsafe fn [<eval_t_ $suffix>]<T: Datum + Float + Copy + AddAssign<T>>(
                        op: &DeconvSum,
                        input_shape: &DataShape,
                        output_shape: &DataShape,
                        spatial_output_details: &[ComputedPaddedDim<usize>],
                        n_o_hkwk_hw: &Tensor,
                        output: &mut Tensor,
                        add: impl Fn(T, T) -> T + Copy + 'static,
                        ) -> TractResult<()> {
                        let mut output_plain = output.try_as_plain_mut()?;
                        let output = output_plain.to_array_view_mut::<T>()?;
                        let n_o_hkwk_hw: ArrayView4<T> = n_o_hkwk_hw.to_plain_array_view::<T>()?.into_dimensionality()?;
                        match input_shape.hw_rank() {
                            1 => [<main_loop_1d_ $suffix>](
                                op,
                                input_shape,
                                output_shape,
                                spatial_output_details,
                                &n_o_hkwk_hw,
                                &mut output.into_dimensionality().unwrap(),
                                add,
                                ),
                            2 => [<main_loop_2d_ $suffix>](
                                op,
                                input_shape,
                                output_shape,
                                spatial_output_details,
                                &n_o_hkwk_hw,
                                &mut output.into_dimensionality().unwrap(),
                                add,
                                ),
                            3 => [<main_loop_3d_ $suffix>](
                                op,
                                input_shape,
                                output_shape,
                                spatial_output_details,
                                &n_o_hkwk_hw,
                                &mut output.into_dimensionality().unwrap(),
                                add,
                                ),
                            _ => [<main_loop_ $suffix>](
                                op,
                                input_shape,
                                output_shape,
                                spatial_output_details,
                                &n_o_hkwk_hw,
                                &mut output.into_dimensionality().unwrap(),
                                add,
                                ),
                        }
                    }

                pub fn [<main_loop_1d_ $suffix>]<T: Datum + Float>(
                    op: &DeconvSum,
                    input_shape: &DataShape,
                    output_shape: &DataShape,
                    spatial_output_details: &[ComputedPaddedDim<usize>],
                    n_o_hkwk_hw: &ArrayView4<T>,
                    output: &mut ArrayViewMut3<T>,
                    add: impl Fn(T, T) -> T + Copy + 'static,
                    ) -> TractResult<()> {
                    let n = *output_shape.n().unwrap_or(&1);
                    let kernel_len = op.pool_spec.kernel_shape[0];
                    let geo_input_len = input_shape.hw_dims()[0];
                    let geo_output_len = output_shape.hw_dims()[0];
                    let x_stride = op.pool_spec.strides().as_ref()[0];
                    let x_dil = op.pool_spec.dilations().as_ref()[0];
                    let x_pad = spatial_output_details[0].pad_before as isize;
                    for n in 0..n {
                        for o in 0..*output_shape.c() {
                            for kx in 0..kernel_len {
                                for gx in 0..geo_input_len {
                                    let x = (kx * x_dil + gx * x_stride) as isize - x_pad;
                                    if x < 0 || x >= geo_output_len as isize {
                                        continue;
                                    }
                                    let coord = if op.pool_spec.data_format.c_is_last() {
                                        [n, x as usize, o]
                                    } else {
                                        [n, o, x as usize]
                                    };
                                    unsafe {
                                        let value = *n_o_hkwk_hw.uget((n, o, kx, gx));
                                        *output.uget_mut(coord) = add(*output.uget(coord), value);
                                    }
                                }
                            }
                        }
                    }
                    Ok(())
                }

                pub fn [<main_loop_2d_ $suffix>]<T: Datum + Float>(
                    op: &DeconvSum,
                    input_shape: &DataShape,
                    output_shape: &DataShape,
                    spatial_output_details: &[ComputedPaddedDim<usize>],
                    n_o_hkwk_hw: &ArrayView4<T>,
                    output: &mut ArrayViewMut4<T>,
                    add: impl Fn(T, T) -> T + Copy + 'static,
                    ) -> TractResult<()> {
                    let n = *output_shape.n().unwrap_or(&1);
                    let x_stride = op.pool_spec.strides().as_ref()[0];
                    let y_stride = op.pool_spec.strides().as_ref()[1];
                    let x_dil = op.pool_spec.dilations().as_ref()[0];
                    let y_dil = op.pool_spec.dilations().as_ref()[1];
                    let x_pad = spatial_output_details[0].pad_before as isize;
                    let y_pad = spatial_output_details[1].pad_before as isize;
                    let output_c = *output_shape.c();
                    let output_c_stride = *output_shape.c_stride() as isize;
                    let output_x_stride = output_shape.hw_strides()[0] as isize;
                    let output_y_stride = output_shape.hw_strides()[1] as isize;
                    let temp_n_stride = n_o_hkwk_hw.strides()[0];
                    let temp_o_stride = n_o_hkwk_hw.strides()[1];
                    let temp_k_stride = n_o_hkwk_hw.strides()[2];
                    let temp_i_stride = n_o_hkwk_hw.strides()[3];
                    let ox_len = output_shape.hw_dims()[0];
                    let oy_len = output_shape.hw_dims()[1];
                    let ix_len = input_shape.hw_dims()[0];
                    let iy_len = input_shape.hw_dims()[1];
                    let kx_len = op.pool_spec.kernel_shape[0];
                    let ky_len = op.pool_spec.kernel_shape[1];
                    unsafe {
                        for n in 0..n {
                            let output = output.as_mut_ptr().add(n * *output_shape.n_stride().unwrap_or(&0));
                            let temp = n_o_hkwk_hw.as_ptr().offset(n as isize * temp_n_stride);
                            for kx in 0..kx_len {
                                let temp = temp.offset((kx * ky_len) as isize * temp_k_stride);
                                for ix in 0..ix_len {
                                    let ox = (kx * x_dil + ix * x_stride) as isize - x_pad;
                                    if ox < 0 || ox >= ox_len as isize {
                                        continue;
                                    }
                                    let temp = temp.offset((ix * iy_len) as isize * temp_i_stride);
                                    let output = output.offset(ox * output_x_stride);
                                    for ky in 0..ky_len {
                                        let temp = temp.offset(ky as isize * temp_k_stride);
                                        let oy = (ky * y_dil) as isize - y_pad;
                                        for iy in 0..iy_len {
                                            let oy = oy + (iy * y_stride) as isize;
                                            if oy < 0 || oy >= oy_len as isize {
                                                continue;
                                            }
                                            let temp = temp.offset(iy as isize * temp_i_stride);
                                            let output = output.offset(oy * output_y_stride);
                                            [<main_loop_2d_inner_ $suffix>](
                                                output_c,
                                                temp,
                                                temp_o_stride,
                                                output,
                                                output_c_stride,
                                                add,
                                                )
                                        }
                                    }
                                }
                            }
                        }
                    }
                    Ok(())
                }

                #[inline(never)]
                #[allow(clippy::erasing_op)]
                #[allow(clippy::identity_op)]
                unsafe fn [<main_loop_2d_inner_ $suffix>]<T: Datum + Float>(
                    output_c: usize,
                    temp: *const T,
                    temp_o_stride: isize,
                    output: *mut T,
                    output_c_stride: isize,
                    add: impl Fn(T, T) -> T + Copy + 'static,
                    ) { unsafe {
                    let mut c = 0;
                    let mut right = temp;
                    let mut left = output;
                    while c + 8 < output_c {
                        let mut left0 = *left.offset(0 * output_c_stride);
                        let mut left1 = *left.offset(1 * output_c_stride);
                        let mut left2 = *left.offset(2 * output_c_stride);
                        let mut left3 = *left.offset(3 * output_c_stride);
                        let mut left4 = *left.offset(4 * output_c_stride);
                        let mut left5 = *left.offset(5 * output_c_stride);
                        let mut left6 = *left.offset(6 * output_c_stride);
                        let mut left7 = *left.offset(7 * output_c_stride);
                        let right0 = *right.offset(0 * temp_o_stride);
                        let right1 = *right.offset(1 * temp_o_stride);
                        let right2 = *right.offset(2 * temp_o_stride);
                        let right3 = *right.offset(3 * temp_o_stride);
                        let right4 = *right.offset(4 * temp_o_stride);
                        let right5 = *right.offset(5 * temp_o_stride);
                        let right6 = *right.offset(6 * temp_o_stride);
                        let right7 = *right.offset(7 * temp_o_stride);
                        left0 = add(left0, right0);
                        left1 = add(left1, right1);
                        left2 = add(left2, right2);
                        left3 = add(left3, right3);
                        left4 = add(left4, right4);
                        left5 = add(left5, right5);
                        left6 = add(left6, right6);
                        left7 = add(left7, right7);
                        *left.offset(0 * output_c_stride) = left0;
                        *left.offset(1 * output_c_stride) = left1;
                        *left.offset(2 * output_c_stride) = left2;
                        *left.offset(3 * output_c_stride) = left3;
                        *left.offset(4 * output_c_stride) = left4;
                        *left.offset(5 * output_c_stride) = left5;
                        *left.offset(6 * output_c_stride) = left6;
                        *left.offset(7 * output_c_stride) = left7;
                        c += 8;
                        left = left.offset(8 * output_c_stride);
                        right = right.offset(8 * temp_o_stride);
                    }
                    for c in c..output_c {
                        let value = *temp.offset(c as isize * temp_o_stride);
                        let ptr = output.offset(c as isize * output_c_stride);
                        *ptr = add(*ptr, value);
                    }
                }}

                pub fn [<main_loop_3d_ $suffix>]<T: Datum + Float>(
                    op: &DeconvSum,
                    input_shape: &DataShape,
                    output_shape: &DataShape,
                    spatial_output_details: &[ComputedPaddedDim<usize>],
                    n_o_hkwk_hw: &ArrayView4<T>,
                    output: &mut ArrayViewMut5<T>,
                    add: impl Fn(T, T) -> T + Copy + 'static,
                    ) -> TractResult<()> {
                    let n = *output_shape.n().unwrap_or(&1);
                    let kernel_shape: [usize; 3] =
                        [op.pool_spec.kernel_shape[0], op.pool_spec.kernel_shape[1], op.pool_spec.kernel_shape[2]];
                    let geo_input_shape: [usize; 3] =
                        [input_shape.hw_dims()[0], input_shape.hw_dims()[1], input_shape.hw_dims()[2]];
                    let geo_output_shape: [usize; 3] =
                        [output_shape.hw_dims()[0], output_shape.hw_dims()[1], output_shape.hw_dims()[2]];
                    let x_stride = op.pool_spec.strides().as_ref()[0];
                    let y_stride = op.pool_spec.strides().as_ref()[1];
                    let z_stride = op.pool_spec.strides().as_ref()[2];
                    let x_dil = op.pool_spec.dilations().as_ref()[0];
                    let y_dil = op.pool_spec.dilations().as_ref()[1];
                    let z_dil = op.pool_spec.dilations().as_ref()[2];
                    let x_pad = spatial_output_details[0].pad_before as isize;
                    let y_pad = spatial_output_details[1].pad_before as isize;
                    let z_pad = spatial_output_details[2].pad_before as isize;
                    for n in 0..n {
                        for o in 0..*output_shape.c() {
                            for (kix, (kx, ky, kz)) in tract_ndarray::indices(kernel_shape).into_iter().enumerate()
                            {
                                for (gix, (gx, gy, gz)) in
                                    tract_ndarray::indices(geo_input_shape).into_iter().enumerate()
                                    {
                                        let x = (kx * x_dil + gx * x_stride) as isize - x_pad;
                                        let y = (ky * y_dil + gy * y_stride) as isize - y_pad;
                                        let z = (kz * z_dil + gz * z_stride) as isize - z_pad;
                                        if x < 0
                                            || y < 0
                                                || z < 0
                                                || x >= geo_output_shape[0] as isize
                                                || y >= geo_output_shape[1] as isize
                                                || z >= geo_output_shape[2] as isize
                                                {
                                                    continue;
                                                }
                                        let coord = if op.pool_spec.data_format.c_is_last() {
                                            [n, x as usize, y as usize, z as usize, o]
                                        } else {
                                            [n, o, x as usize, y as usize, z as usize]
                                        };
                                        unsafe {
                                            let value = *n_o_hkwk_hw.uget((n, o, kix, gix));
                                            *output.uget_mut(coord) = add(*output.uget(coord), value);
                                        }
                                    }
                            }
                        }
                    }
                    Ok(())
                }

                pub fn [<main_loop_ $suffix>]<T: Datum + Float>(
                    op: &DeconvSum,
                    input_shape: &DataShape,
                    output_shape: &DataShape,
                    spatial_output_details: &[ComputedPaddedDim<usize>],
                    n_o_hkwk_hw: &ArrayView4<T>,
                    output: &mut ArrayViewMutD<T>,
                    add: impl Fn(T, T) -> T + Copy + 'static,
                    ) -> TractResult<()> {
                    let n = *output_shape.n().unwrap_or(&1);
                    let strides = op.pool_spec.strides();
                    let dilations = op.pool_spec.dilations();
                    for n in 0..n {
                        for o in 0..*output_shape.c() {
                            for (kix, kcoords) in
                                tract_ndarray::indices(&*op.pool_spec.kernel_shape).into_iter().enumerate()
                                {
                                    for (gix, gcoords) in
                                        tract_ndarray::indices(input_shape.hw_dims()).into_iter().enumerate()
                                        {
                                            // h' = stride * hg + dil * hk
                                            let ocoord: TVec<isize> = tract_itertools::izip!(
                                                kcoords.slice(),
                                                gcoords.slice(),
                                                strides.as_ref(),
                                                dilations.as_ref(),
                                                spatial_output_details
                                                )
                                                .map(|(k, g, s, d, details)| {
                                                    (k * d + g * s) as isize - details.pad_before as isize
                                                })
                                            .collect();
                                            if ocoord
                                                .iter()
                                                    .zip(output_shape.hw_dims().iter())
                                                    .all(|(x, dim)| *x >= 0 && (*x as usize) < *dim)
                                                    {
                                                        let ocoord = ocoord.iter().map(|x| *x as usize).collect::<TVec<_>>();
                                                        let ocoord = op.pool_spec.data_format.with_n().from_n_c_hw(n, o, ocoord)?;
                                                        let value = n_o_hkwk_hw[(n, o, kix, gix)];
                                                        output[&*ocoord.shape] = add(output[&*ocoord.shape], value)
                                                    }
                                        }
                                }
                        }
                    }
                    Ok(())
                }
            }
        }
    }

impl_eval!(generic);
impl_eval! {
#[target_feature(enable = "fp16")]
#[cfg(target_arch = "aarch64")]
        aarch64fp16
    }


================================================
FILE: core/src/ops/cnn/deconv/mod.rs
================================================
use crate::internal::*;
use crate::ops::cnn::{PaddingSpec, PoolSpec};

#[allow(clippy::module_inception)]
mod deconv;
mod deconv_sum;

pub use deconv::Deconv;

pub fn output_shape<D: DimLike>(
    pool_spec: &PoolSpec,
    x_shape: &[D],
    adjustments: &[usize],
) -> TractResult<TVec<D>> {
    let x_shape = pool_spec.data_format.shape(x_shape)?;
    let spatial_input_shape = x_shape.hw_dims();
    let spatial_output_details = pool_spec.padding.compute_for_deconv(
        spatial_input_shape,
        &pool_spec.kernel_shape,
        &pool_spec.dilations(),
        &pool_spec.strides(),
        adjustments,
    )?;
    let deconv_shape: TVec<D> =
        spatial_output_details.iter().map(|comp| comp.deconvoluted.clone()).collect();
    let output_shape = pool_spec.data_format.from_n_c_hw(
        x_shape.n().cloned().unwrap_or_else(|| 1.into()),
        pool_spec.output_channels.into(),
        deconv_shape,
    )?;
    Ok(output_shape.shape)
}

pub fn adjustments(
    pool_spec: &PoolSpec,
    input_geo: &[usize],
    output_geo: &[usize],
) -> TractResult<TVec<usize>> {
    debug_assert_eq!(pool_spec.rank(), pool_spec.strides().len());
    debug_assert_eq!(pool_spec.rank(), pool_spec.dilations().len());
    debug_assert_eq!(pool_spec.rank(), pool_spec.kernel_shape.len());
    debug_assert_eq!(pool_spec.rank(), input_geo.len());
    debug_assert_eq!(pool_spec.rank(), output_geo.len());
    let rank = pool_spec.rank();
    let pad: TVec<usize> = match &pool_spec.padding {
        PaddingSpec::Explicit(beg, end) => (0..rank).map(|r| beg[r] + end[r]).collect(),
        PaddingSpec::Valid => tvec!(0; rank),
        pad => todo!("Unsupported padding in deconvolution arguments {pad:?}"),
    };
    let strides = pool_spec.strides();
    let dilations = pool_spec.dilations();
    tract_itertools::izip!(
        input_geo,
        &pool_spec.kernel_shape,
        output_geo,
        strides.as_ref(),
        dilations.as_ref(),
        pad,
    )
    .map(|(x, k, y, s, d, p)| {
        let adj = y.to_usize()? + p - s * (x.to_usize()? - 1) - (k.to_usize()? - 1) * d - 1;
        Ok(adj)
    })
    .collect::<TractResult<TVec<usize>>>()
}


================================================
FILE: core/src/ops/cnn/maxpool.rs
================================================
use crate::internal::*;
use ndarray::prelude::*;

use crate::ops::cnn::pools::{ConcretePoolGeometry, PoolGeometry, PoolSpec};

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct MaxPool {
    pub pool_spec: PoolSpec,
    pub with_index_outputs: Option<DatumType>,
}

impl Op for MaxPool {
    fn name(&self) -> StaticName {
        "MaxPool".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(self.pool_spec.info())
    }

    op_as_typed_op!();
}

impl EvalOp for MaxPool {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let shape: TVec<TDim> = inputs[0].shape().iter().map(|d| d.to_dim()).collect();
        self.to_optimized(&shape)?.eval(inputs)
    }
}

impl TypedOp for MaxPool {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut facts = self.pool_spec.output_facts(inputs)?;
        if let Some(idt) = self.with_index_outputs {
            facts.push(facts[0].clone());
            facts[1].datum_type = idt;
        }
        Ok(facts)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.with_index_outputs.is_some()
            && node.outputs[1].successors.len() == 0
            && !model.output_outlets()?.contains(&OutletId::new(node.id, 1))
        {
            let op = Self { with_index_outputs: None, ..self.clone() };
            let mut patch = TypedModelPatch::default();
            let mut wire = patch.tap_model(model, node.inputs[0])?;
            wire = patch.wire_node(&node.name, op, &[wire])?[0];
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
        let fact = model.outlet_fact(node.inputs[0])?;
        if let Some(pool_spec) = self.pool_spec.declutter(&fact.shape)? {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs,
                Self { pool_spec, ..self.clone() },
            )?));
        }
        Ok(None)
    }

    as_op!();
}

impl MaxPool {
    fn to_optimized(&self, input_shape: &[TDim]) -> TractResult<OptMaxPool> {
        Ok(OptMaxPool {
            pool_spec: self.pool_spec.clone(),
            with_index_outputs: self.with_index_outputs,
            geometry: self.pool_spec.compute_geo(input_shape)?,
        })
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct OptMaxPool {
    pub pool_spec: PoolSpec,
    pub with_index_outputs: Option<DatumType>,
    pub geometry: PoolGeometry,
}

impl Op for OptMaxPool {
    fn name(&self) -> StaticName {
        "OptMaxPool".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(self.pool_spec.info())
    }

    op_as_typed_op!();
}

impl EvalOp for OptMaxPool {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let geo = self.geometry.to_concrete(input.shape())?;
        dispatch_numbers!(Self::eval_t(input.datum_type())(self, &*input, geo.as_ref()))
    }
}

impl TypedOp for OptMaxPool {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut facts = self.pool_spec.output_facts(inputs)?;
        if let Some(idt) = self.with_index_outputs {
            facts.push(facts[0].clone());
            facts[1].datum_type = idt;
        }
        Ok(facts)
    }

    as_op!();
}

impl OptMaxPool {
    fn eval_t<T: Datum + Copy + num_traits::Bounded + PartialOrd>(
        &self,
        input: &Tensor,
        geo: &ConcretePoolGeometry,
    ) -> TractResult<TVec<TValue>> {
        let input_dt = input.datum_type();
        let input_plain = input.try_as_plain()?;
        let input: ArrayViewD<T> = input_plain.to_array_view()?;
        let input_ptr = input.as_ptr();

        let mut values = unsafe { ArrayD::<T>::uninit(&*geo.output_shape.shape).assume_init() };
        let mut indices = if self.with_index_outputs.is_some() {
            Some(unsafe { ArrayD::<i32>::uninit(&*geo.output_shape.shape).assume_init() })
        } else {
            None
        };
        let n = *geo.input_shape.n().unwrap_or(&1);
        let n_stride_i = geo.input_shape.n_stride().unwrap_or(&0);
        let n_stride_o = geo.output_shape.n_stride().unwrap_or(&0);
        unsafe {
            geo.patch.visit_output(|visitor| {
                for n in 0..n {
                    let input_offset = n * n_stride_i;
                    let output_offset = n * n_stride_o;
                    for c in 0..*geo.input_shape.c() {
                        let input_offset = input_offset + geo.input_shape.c_stride() * c;
                        let output_offset = output_offset + geo.output_shape.c_stride() * c;
                        let max = visitor
                            .valid_offsets()
                            .map(|v| (v, *input_ptr.offset(v + input_offset as isize)))
                            .fold((0, T::min_value()), |acc, v| if acc.1 < v.1 { v } else { acc });
                        *values
                            .as_mut_ptr()
                            .offset(output_offset as isize + visitor.output_offset) = max.1;
                        if let Some(ref mut indices) = indices {
                            *indices
                                .as_mut_ptr()
                                .offset(output_offset as isize + visitor.output_offset) =
                                max.0 as i32 / geo.patch.spec.output_inner_stride as i32;
                        }
                    }
                }
            });
        }
        let mut values = values.into_tensor();
        unsafe {
            values.set_datum_type(input_dt);
        }
        if let Some(dt) = self.with_index_outputs {
            Ok(tvec!(
                values.into_tvalue(),
                indices.unwrap().into_tensor().cast_to_dt(dt)?.into_owned().into_tvalue()
            ))
        } else {
            Ok(tvec!(values.into_tvalue()))
        }
    }
}


================================================
FILE: core/src/ops/cnn/mod.rs
================================================
use crate::internal::*;

pub mod conv;
pub mod deconv;
mod maxpool;
mod padding;
mod patch_axis;
mod patches;
pub mod pools;
mod sumpool;

pub use self::conv::{Conv, KernelFormat};
pub use self::deconv::Deconv;
pub use self::maxpool::MaxPool;
pub use self::padding::PaddingSpec;
pub use self::patch_axis::PatchAxis;
pub use self::patches::{Patch, PatchSpec};
pub use self::pools::PoolSpec;
pub use self::sumpool::SumPool;

use super::array::MultiBroadcastTo;

pub fn wire_reshape_bias_as_vector(
    model: &mut TypedModel,
    name: impl AsRef<str>,
    outlet: OutletId,
    output_channels: usize,
) -> TractResult<TVec<OutletId>> {
    let name = name.as_ref();
    let mut bias = tvec!(outlet);
    let fact = model.outlet_fact(outlet)?.clone();
    if fact.shape.volume().is_one() && fact.rank() > 0 {
        bias = model.wire_node(
            format!("{name}.bias.make_scalar"),
            AxisOp::Reshape(0, fact.shape.to_tvec(), tvec![]),
            &bias,
        )?;
    }
    if model.outlet_fact(bias[0])?.rank() == 0 {
        bias = model.wire_node(
            format!("{name}.bias.broadcast"),
            MultiBroadcastTo { shape: tvec!(output_channels).into() },
            &bias,
        )?;
    }
    Ok(bias)
}

pub fn wire_reshape_bias_for_bin(
    model: &mut TypedModel,
    name: impl AsRef<str>,
    outlet: OutletId,
    rank: usize,
    c_axis: usize,
    output_channels: usize,
) -> TractResult<TVec<OutletId>> {
    let name = name.as_ref();
    let mut bias = wire_reshape_bias_as_vector(model, name, outlet, output_channels)?;
    let fact = model.outlet_fact(bias[0])?.clone();
    let mut bias_final_shape = tvec![1.to_dim(); rank];
    bias_final_shape[c_axis] = output_channels.to_dim();
    if *bias_final_shape != *fact.shape {
        bias = model.wire_node(
            format!("{name}.bias"),
            AxisOp::Reshape(0, fact.shape.to_tvec(), bias_final_shape),
            &bias,
        )?;
    }
    Ok(bias)
}

pub fn rewrite_conv_with_n_axis(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    if !conv.pool_spec.data_format.has_n() {
        let mut new = conv.clone();
        new.pool_spec.data_format = conv.pool_spec.data_format.with_n();
        let mut patch = TypedModelPatch::default();
        let mut wire = patch.taps(model, &node.inputs)?;
        wire[0] = patch.wire_node(format!("{name}.add_n"), AxisOp::Add(0), &[wire[0]])?[0];
        wire = patch.wire_node(name, new, &wire)?;
        wire = patch.wire_node(format!("{name}.rm_n"), AxisOp::Rm(0), &wire)?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}

pub fn rewrite_deconv_with_n_axis(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    deconv: &Deconv,
) -> TractResult<Option<TypedModelPatch>> {
    if !deconv.pool_spec.data_format.has_n() {
        let mut new = deconv.clone();
        new.pool_spec.data_format = deconv.pool_spec.data_format.with_n();
        let mut patch = TypedModelPatch::default();
        let mut wire = patch.taps(model, &node.inputs)?;
        wire[0] = patch.wire_node(format!("{name}.add_n"), AxisOp::Add(0), &[wire[0]])?[0];
        wire = patch.wire_node(name, new, &wire)?;
        wire = patch.wire_node(format!("{name}.rm_n"), AxisOp::Rm(0), &wire)?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}


================================================
FILE: core/src/ops/cnn/padding.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub enum PaddingSpec {
    Explicit(TVec<usize>, TVec<usize>),
    ExplicitOnnxPool(TVec<usize>, TVec<usize>, bool),
    #[default]
    Valid,
    SameUpper,
    SameLower,
}

use PaddingSpec::*;

#[derive(Debug, Clone, new, PartialEq, Eq)]
pub struct ComputedPaddedDim<D: DimLike> {
    pub deconvoluted: D,
    pub convoluted: D,
    pub pad_before: D,
    pub pad_after: D,
}

impl PaddingSpec {
    pub fn valid_dim(&self, d: usize, stride_is_one: bool) -> bool {
        match self {
            Valid => true,
            Explicit(bef, aft) => bef[d] == 0 && aft[d] == 0,
            ExplicitOnnxPool(a, b, ceil_mode) => {
                (*ceil_mode || stride_is_one) && a[d] == 0 && b[d] == 0
            }
            _ => false,
        }
    }

    pub fn change_geo_axes(&self, op: &AxisOp) -> TractResult<PaddingSpec> {
        match &self {
            ExplicitOnnxPool(before, after, round) => {
                let mut before: TVec<usize> = before.clone();
                let mut after: TVec<usize> = after.clone();
                op.change_shape_array(&mut before, false)?;
                op.change_shape_array(&mut after, false)?;
                if let AxisOp::Add(add) = op {
                    before[*add] = 0;
                    after[*add] = 0;
                }
                Ok(ExplicitOnnxPool(before, after, *round))
            }
            Explicit(before, after) => {
                let mut before: TVec<usize> = before.clone();
                let mut after: TVec<usize> = after.clone();
                op.change_shape_array(&mut before, false)?;
                op.change_shape_array(&mut after, false)?;
                if let AxisOp::Add(add) = op {
                    before[*add] = 0;
                    after[*add] = 0;
                }
                Ok(Explicit(before, after))
            }
            Valid | SameLower | SameUpper => Ok(self.clone()),
        }
    }

    pub fn compute<D: DimLike>(
        &self,
        input_spatial_shape: &[D],
        kernel_spatial_shape: &[usize],
        dilations: &[usize],
        strides: &[usize],
    ) -> TVec<ComputedPaddedDim<D>> {
        (0..input_spatial_shape.len())
            .map(|d| {
                self.compute_one(
                    d,
                    &input_spatial_shape[d],
                    kernel_spatial_shape[d],
                    dilations[d],
                    strides[d],
                )
            })
            .collect()
    }

    pub fn compute_for_deconv<D: DimLike>(
        &self,
        conv_spatial_shape: &[D],
        kernel_spatial_shape: &[usize],
        dilations: &[usize],
        strides: &[usize],
        adjustments: &[usize],
    ) -> TractResult<TVec<ComputedPaddedDim<D>>> {
        (0..conv_spatial_shape.len())
            .map(|d| {
                self.compute_one_for_deconv(
                    d,
                    &conv_spatial_shape[d],
                    kernel_spatial_shape[d],
                    dilations[d],
                    strides[d],
                    adjustments[d],
                )
            })
            .collect()
    }

    pub fn compute_one<D: DimLike>(
        &self,
        axis: usize,
        input: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
    ) -> ComputedPaddedDim<D> {
        match self {
            Valid => Self::valid(input, kernel, dilation, stride),
            Explicit(bef, aft) => {
                Self::explicit(input, kernel, dilation, stride, bef[axis], aft[axis])
            }
            ExplicitOnnxPool(bef, aft, ceil_mode) => Self::explicit_onnx_pool(
                input, kernel, dilation, stride, bef[axis], aft[axis], *ceil_mode,
            ),
            SameUpper => Self::same(input, kernel, dilation, stride, true),
            SameLower => Self::same(input, kernel, dilation, stride, false),
        }
    }

    pub fn compute_one_for_deconv<D: DimLike>(
        &self,
        axis: usize,
        input: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        adjustment: usize,
    ) -> TractResult<ComputedPaddedDim<D>> {
        match self {
            Valid => Self::valid_for_deconv(input, kernel, dilation, stride, adjustment),
            SameUpper => Self::same_for_deconv(input, kernel, dilation, stride, adjustment, true),
            SameLower => Self::same_for_deconv(input, kernel, dilation, stride, adjustment, false),
            Explicit(bef, aft) => Self::explicit_for_deconv(
                input, kernel, dilation, stride, bef[axis], aft[axis], adjustment,
            ),
            // unreachable ?
            ExplicitOnnxPool(bef, aft, _ceil_mode) => Self::explicit_for_deconv(
                input, kernel, dilation, stride, bef[axis], aft[axis], adjustment,
            ),
        }
    }

    fn valid<D: DimLike>(
        input: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
    ) -> ComputedPaddedDim<D> {
        let kernel_field = (kernel - 1) * dilation + 1;
        let output = if let Ok(int) = input.to_usize() {
            D::from((int + 1).saturating_sub(kernel_field).divceil(stride))
        } else {
            (input.clone() + 1 - kernel_field).divceil(stride)
        };
        ComputedPaddedDim::new(input.clone(), output, 0.into(), 0.into())
    }

    fn valid_for_deconv<D: DimLike>(
        convoluted: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        adjustment: usize,
    ) -> TractResult<ComputedPaddedDim<D>> {
        let kernel_field = (kernel - 1) * dilation + 1;
        let deconvoluted = (convoluted.clone() - 1) * stride + kernel_field + adjustment;
        Ok(ComputedPaddedDim::new(deconvoluted, convoluted.clone(), 0.into(), 0.into()))
    }

    fn explicit<D: DimLike>(
        input: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        bef: usize,
        aft: usize,
    ) -> ComputedPaddedDim<D> {
        if let Ok(i) = input.to_dim().to_usize() {
            let ints = Self::explicit_usize(i, kernel, dilation, stride, bef, aft);
            ComputedPaddedDim::new(
                input.clone(),
                ints.convoluted.into(),
                ints.pad_before.into(),
                ints.pad_after.into(),
            )
        } else {
            let kernel_field = (kernel - 1) * dilation + 1;
            let dividend = input.clone() + bef + aft - kernel_field;
            let output = dividend.div(stride) + 1;
            ComputedPaddedDim::new(input.clone(), output, bef.into(), aft.into())
        }
    }

    fn explicit_usize(
        input: usize,
        kernel: usize,
        dilation: usize,
        stride: usize,
        bef: usize,
        aft: usize,
    ) -> ComputedPaddedDim<usize> {
        let kernel_field = (kernel - 1) * dilation + 1;
        let dividend = (input + bef + aft + 1).saturating_sub(kernel_field);
        let output = dividend.divceil(stride);
        ComputedPaddedDim::new(input, output, bef, aft)
    }

    fn explicit_onnx_pool<D: DimLike>(
        input: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        bef: usize,
        aft: usize,
        ceil_mode: bool,
    ) -> ComputedPaddedDim<D> {
        if let Ok(i) = input.to_dim().to_usize() {
            let ints =
                Self::explicit_onnx_pool_usize(i, kernel, dilation, stride, bef, aft, ceil_mode);
            ComputedPaddedDim::new(
                input.clone(),
                ints.convoluted.into(),
                ints.pad_before.into(),
                ints.pad_after.into(),
            )
        } else {
            // output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)
            let kernel_field = (kernel - 1) * dilation + 1;
            let dividend = input.clone() + bef + aft - kernel_field;
            let output =
                if ceil_mode { dividend.divceil(stride) } else { dividend.div(stride) } + 1;
            ComputedPaddedDim::new(input.clone(), output, bef.into(), aft.into())
        }
    }

    fn explicit_onnx_pool_usize(
        input: usize,
        kernel: usize,
        dilation: usize,
        stride: usize,
        bef: usize,
        aft: usize,
        ceil_mode: bool,
    ) -> ComputedPaddedDim<usize> {
        // output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)
        let kernel_field = (kernel - 1) * dilation + 1;
        let dividend = (input + bef + aft).saturating_sub(kernel_field);
        let mut output = if ceil_mode { dividend.divceil(stride) } else { dividend / stride } + 1;
        if ceil_mode {
            // ensure that the last pooling starts inside the image
            // needed to avoid problems in ceil mode
            if (output - 1) * stride >= input + bef {
                output -= 1;
            }
        }
        ComputedPaddedDim::new(input, output, bef, aft)
    }

    fn explicit_for_deconv<D: DimLike>(
        convoluted: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        bef: usize,
        aft: usize,
        adjustment: usize,
    ) -> TractResult<ComputedPaddedDim<D>> {
        let kernel_field = (kernel - 1) * dilation + 1;
        let deconvoluted =
            (convoluted.clone() - 1) * stride + kernel_field - bef - aft + adjustment;
        Ok(ComputedPaddedDim::new(deconvoluted, convoluted.clone(), bef.into(), aft.into()))
    }

    fn same<D: DimLike>(
        input: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        upper: bool,
    ) -> ComputedPaddedDim<D> {
        let output = input.divceil(stride);
        let kernel_field = (kernel - 1) * dilation + 1;
        let pad = if let Ok(input) = input.to_usize() {
            let pad = (((output.clone() - 1) * stride + kernel_field).to_usize().unwrap())
                .saturating_sub(input);
            pad.into()
        } else {
            (output.clone() - 1) * stride + kernel_field - input
        };
        let lower_pad = pad.clone() / 2;
        let higher_pad = pad - &lower_pad;
        let (before, after) = if upper { (lower_pad, higher_pad) } else { (higher_pad, lower_pad) };
        ComputedPaddedDim::new(input.clone(), output, before, after) // TODO input is wrong for stride != 1
    }

    fn same_for_deconv<D: DimLike>(
        convoluted: &D,
        kernel: usize,
        dilation: usize,
        stride: usize,
        adjustment: usize,
        upper: bool,
    ) -> TractResult<ComputedPaddedDim<D>> {
        if (kernel - 1) * dilation < stride {
            bail!(
                "Invalid axis geometry for SAME padding: expect (kernel_len - 1) * dilation > stride - 1"
            );
        }
        let kernel_field = (kernel - 1) * dilation + 1;
        let crop = kernel_field + adjustment - stride;
        let lower_crop = crop / 2;
        let higher_crop = crop - lower_crop;
        let (before, after) =
            if upper { (lower_crop, higher_crop) } else { (higher_crop, lower_crop) };
        let deconvoluted = (convoluted.clone() - 1) * stride + kernel_field - before - after;
        Ok(ComputedPaddedDim::new(deconvoluted, convoluted.clone(), before.into(), after.into()))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use PaddingSpec as PS;

    #[test]
    fn same_stride_1() {
        assert_eq!(PS::same(&1usize, 2usize, 1, 1, true), ComputedPaddedDim::new(1, 1, 0, 1));
        assert_eq!(PS::same(&2usize, 2usize, 1, 1, true), ComputedPaddedDim::new(2, 2, 0, 1));
        assert_eq!(PS::same(&3usize, 2usize, 1, 1, true), ComputedPaddedDim::new(3, 3, 0, 1));
        assert_eq!(PS::same(&4usize, 2usize, 1, 1, true), ComputedPaddedDim::new(4, 4, 0, 1));
    }

    #[test]
    fn same_stride_2() {
        assert_eq!(PS::same(&1usize, 2usize, 1, 2, true), ComputedPaddedDim::new(1, 1, 0, 1));
        assert_eq!(PS::same(&2usize, 2usize, 1, 2, true), ComputedPaddedDim::new(2, 1, 0, 0));
        assert_eq!(PS::same(&3usize, 2usize, 1, 2, true), ComputedPaddedDim::new(3, 2, 0, 1));
        assert_eq!(PS::same(&4usize, 2usize, 1, 2, true), ComputedPaddedDim::new(4, 2, 0, 0));
    }

    #[test]
    fn same_1() {
        assert_eq!(PS::same(&6usize, 1usize, 1, 2, true), ComputedPaddedDim::new(6, 3, 0, 0));
    }

    #[test]
    fn same_lower() {
        assert_eq!(PS::same(&10usize, 2usize, 1, 3, false), ComputedPaddedDim::new(10, 4, 1, 0));
    }

    #[test]
    fn same_ker_3() {
        assert_eq!(PS::same(&1usize, 3usize, 1, 1, true), ComputedPaddedDim::new(1, 1, 1, 1));
        assert_eq!(PS::same(&2usize, 3usize, 1, 1, true), ComputedPaddedDim::new(2, 2, 1, 1));
        assert_eq!(PS::same(&3usize, 3usize, 1, 1, true), ComputedPaddedDim::new(3, 3, 1, 1));
        assert_eq!(PS::same(&4usize, 3usize, 1, 1, true), ComputedPaddedDim::new(4, 4, 1, 1));
    }

    #[test]
    fn same_ker_3_stride_3() {
        assert_eq!(PS::same(&3usize, 3usize, 1, 3, true), ComputedPaddedDim::new(3, 1, 0, 0));
    }

    #[test]
    fn valid_1() {
        assert_eq!(PS::valid(&10usize, 2usize, 1, 3), ComputedPaddedDim::new(10, 3, 0, 0));
    }

    #[test]
    fn explicit_2() {
        assert_eq!(
            PS::explicit_onnx_pool(&28usize, 3usize, 1, 1, 2, 2, true),
            ComputedPaddedDim::new(28, 30, 2, 2)
        );
    }

    #[test]
    #[ignore = "ONNX weird output computation for explicit"]
    fn explicit_3() {
        assert_eq!(
            PS::explicit_onnx_pool(&2usize, 1usize, 1, 2, 0, 0, true),
            ComputedPaddedDim::new(2, 2, 0, 0)
        );
    }

    #[test]
    fn same_upper() {
        assert_eq!(PS::same(&7usize, 1usize, 1, 2, true), ComputedPaddedDim::new(7, 4, 0, 0));
    }

    // 0 1 2 3 4 5 6 7 8 9 a b
    // 012 345 678 9ab
    #[test]
    fn bug_explicit_stride() {
        assert_eq!(
            PS::explicit_onnx_pool(&12usize, 3usize, 1, 3, 0, 0, false),
            ComputedPaddedDim::new(12, 4, 0, 0)
        );
    }
}


================================================
FILE: core/src/ops/cnn/patch_axis.rs
================================================
use crate::internal::*;

use std::ops::Range;
use tract_itertools::Itertools;

#[derive(Clone, Debug, new, PartialEq, Eq)]
pub struct Region {
    pub range: Range<usize>,
    pub mask: Option<TVec<bool>>,
}

#[derive(Clone, Debug, new, PartialEq, Eq)]
pub struct PatchAxis {
    pub input_dim: usize,
    pub kernel_dim: usize,
    pub pad_before: usize,
    pub pad_after: usize,
    pub output_dim: usize,
    pub stride: usize,
    pub dilation: usize,
}

impl PatchAxis {
    fn valid_range(&self) -> Option<Range<usize>> {
        let field = (self.kernel_dim - 1) * self.dilation + 1;
        if field > self.input_dim {
            return None;
        }
        let min = self.pad_before.divceil(self.stride);
        let max = (self.input_dim + self.pad_before).saturating_sub(field) / self.stride;
        if max >= min { Some(min..(max + 1)) } else { None }
    }

    fn invalid_at_left(&self, pos: usize) -> usize {
        let center_pos = pos * self.stride;
        self.pad_before.saturating_sub(center_pos).divceil(self.dilation).min(self.kernel_dim)
    }

    fn invalid_at_right(&self, pos: usize) -> usize {
        let center_pos = pos * self.stride;
        let last_valid = self.input_dim + self.pad_before;
        let valid = last_valid.saturating_sub(center_pos).divceil(self.dilation);
        self.kernel_dim.saturating_sub(valid)
    }

    fn make_invalid_regions(&self, range: Range<usize>) -> TVec<Region> {
        range
            .map(move |ix| (ix, (self.invalid_at_left(ix), self.invalid_at_right(ix))))
            .chunk_by(|&pair| pair.1)
            .into_iter()
            .map(move |(invalid, pairs)| {
                let (min, max) = pairs.map(|p| p.0).minmax().into_option().unwrap();
                let mut mask = tvec!(false; self.kernel_dim);
                for i in 0..invalid.0 {
                    mask[i] = true;
                }
                for i in 0..invalid.1 {
                    mask[self.kernel_dim - 1 - i] = true;
                }
                Region::new(min..max + 1, Some(mask))
            })
            .collect()
    }

    pub fn regions(&self) -> TVec<Region> {
        let mut regions = tvec!();
        if let Some(valid_range) = self.valid_range() {
            if valid_range.start > 0 {
                regions.extend(self.make_invalid_regions(0..valid_range.start));
            }
            if valid_range.start != valid_range.end {
                regions.push(Region::new(valid_range.clone(), None));
            }
            if valid_range.end < self.output_dim {
                regions.extend(self.make_invalid_regions(valid_range.end..self.output_dim));
            }
        } else {
            regions.extend(self.make_invalid_regions(0..self.output_dim));
        }
        regions
    }
}

#[cfg(test)]
pub mod test {
    use super::*;

    // • 0 1 2 3 4 • -> 3 -> (0) 1 2 3 (4)
    fn axis_5_3() -> PatchAxis {
        PatchAxis::new(5, 3, 1, 1, 5, 1, 1)
    }

    // • • 0 1 2 3 4 • -> 4 -> (0) (1) 2 3 (4)
    fn axis_5_4() -> PatchAxis {
        PatchAxis::new(5, 4, 2, 1, 5, 1, 1)
    }

    // • • 0 1 2 3 4 • • -> 4 -> (0) (1) 2 (3) (4)
    fn axis_5_5() -> PatchAxis {
        PatchAxis::new(5, 5, 2, 2, 5, 1, 1)
    }

    // • 0 1 2 3 4 • -> 3 -> (0) 2 (4)
    fn axis_5_3_s2() -> PatchAxis {
        PatchAxis::new(5, 3, 1, 1, 3, 2, 1)
    }

    // • • 0 1 2 3 4 • • -> 3x2 -> (0) (1) 2 (3) (4)
    fn axis_5_3_d2() -> PatchAxis {
        PatchAxis::new(5, 3, 2, 2, 5, 1, 2)
    }

    // 0 1 2 3 4 5 6 7 8 9 -> 2 -> 0 3 6
    fn axis_10_2_s3_valid() -> PatchAxis {
        PatchAxis::new(10, 2, 0, 0, 3, 3, 1)
    }

    #[test]
    fn axis_valid_ranges() {
        assert_eq!(axis_5_3().valid_range(), Some(1..4));
        assert_eq!(axis_5_4().valid_range(), Some(2..4));
        assert_eq!(axis_5_5().valid_range(), Some(2..3));
        assert_eq!(axis_5_3_s2().valid_range(), Some(1..2));
        assert_eq!(axis_5_3_d2().valid_range(), Some(2..3));
    }

    #[test]
    fn axis_invalid_at_left() {
        assert_eq!(axis_5_3().invalid_at_left(0), 1);
        assert_eq!(axis_5_3().invalid_at_left(1), 0);
        assert_eq!(axis_5_3().invalid_at_left(2), 0);

        assert_eq!(axis_5_4().invalid_at_left(0), 2);
        assert_eq!(axis_5_4().invalid_at_left(1), 1);
        assert_eq!(axis_5_4().invalid_at_left(2), 0);

        assert_eq!(axis_5_5().invalid_at_left(0), 2);
        assert_eq!(axis_5_5().invalid_at_left(1), 1);
        assert_eq!(axis_5_5().invalid_at_left(2), 0);

        assert_eq!(axis_5_3_d2().invalid_at_left(0), 1);
        assert_eq!(axis_5_3_d2().invalid_at_left(1), 1);
        assert_eq!(axis_5_3_d2().invalid_at_left(2), 0);
    }

    #[test]
    fn axis_invalid_at_right() {
        assert_eq!(axis_5_3().invalid_at_right(0), 0);
        assert_eq!(axis_5_3().invalid_at_right(3), 0);
        assert_eq!(axis_5_3().invalid_at_right(4), 1);

        assert_eq!(axis_5_4().invalid_at_right(0), 0);
        assert_eq!(axis_5_4().invalid_at_right(3), 0);
        assert_eq!(axis_5_4().invalid_at_right(4), 1);

        assert_eq!(axis_5_5().invalid_at_right(0), 0);
        assert_eq!(axis_5_5().invalid_at_right(3), 1);
        assert_eq!(axis_5_5().invalid_at_right(4), 2);
    }

    #[test]
    fn axis_5_3_regions() {
        let regions = axis_5_3().regions();
        assert_eq!(
            regions,
            tvec!(
                Region::new(0..1, Some(tvec!(true, false, false))),
                Region::new(1..4, None),
                Region::new(4..5, Some(tvec!(false, false, true)))
            )
        );
    }

    #[test]
    fn axis_5_3_s2_regions() {
        let regions = axis_5_3_s2().regions();
        assert_eq!(
            regions,
            tvec!(
                Region::new(0..1, Some(tvec!(true, false, false))),
                Region::new(1..2, None),
                Region::new(2..3, Some(tvec!(false, false, true)))
            )
        );
    }

    #[test]
    fn axis_5_3_d2_regions() {
        let regions = axis_5_3_d2().regions();
        assert_eq!(
            regions,
            tvec!(
                Region::new(0..2, Some(tvec!(true, false, false))),
                Region::new(2..3, None),
                Region::new(3..5, Some(tvec!(false, false, true)))
            )
        );
    }

    #[test]
    fn axis_10_2_s3_valid_regions() {
        let regions = axis_10_2_s3_valid().regions();
        assert_eq!(regions, tvec!(Region::new(0..3, None),));
    }

    #[test]
    fn axis_7_3_s2_regions() {
        // • 0 1 2 3 4 5 6 • -> 3 -> (0) 2 4 (6)
        let regions = PatchAxis::new(7, 3, 1, 1, 4, 2, 1).regions();
        assert_eq!(
            regions,
            tvec!(
                Region::new(0..1, Some(tvec!(true, false, false))),
                Region::new(1..3, None),
                Region::new(3..4, Some(tvec!(false, false, true)))
            )
        );
    }

    #[test]
    fn axis_5_2_s2_regions() {
        // • 0 1 2 3 4 • -> 2 -> (0) 2 4
        let regions = PatchAxis::new(5, 2, 1, 1, 3, 2, 1).regions();
        assert_eq!(
            regions,
            tvec!(Region::new(0..1, Some(tvec!(true, false))), Region::new(1..3, None),)
        );
    }

    #[test]
    fn axis_28_3_very_padded_regions() {
        // • • 0 1 2 3 ... 26 27 • • -> 2 -> (-1) (0) (1) 2 3 4 ... 26 (27) (28) (29)
        let regions = PatchAxis::new(28, 3, 2, 2, 30, 1, 1).regions();
        assert_eq!(
            regions,
            tvec!(
                Region::new(0..1, Some(tvec!(true, true, false))),
                Region::new(1..2, Some(tvec!(true, false, false))),
                Region::new(2..28, None),
                Region::new(28..29, Some(tvec!(false, false, true))),
                Region::new(29..30, Some(tvec!(false, true, true))),
            )
        );
    }

    #[test]
    fn axis_7_1_s2_regions() {
        // 0 1 2 3 4 5 6 -> 1 -> 0 2 4 6
        let regions = PatchAxis::new(7, 1, 0, 0, 4, 2, 1).regions();
        assert_eq!(regions, tvec!(Region::new(0..4, None),));
    }

    #[test]
    fn axis_1_2_regions() {
        // 0 -> 2 -> (0)
        let regions = PatchAxis::new(1, 2, 0, 1, 1, 1, 1).regions();
        assert_eq!(regions, tvec!(Region::new(0..1, Some(tvec!(false, true))),));
    }

    #[test]
    fn axis_dnn_left_pad() {
        let regions = PatchAxis::new(1, 1, 2, 0, 3, 1, 1).regions();
        assert_eq!(regions, tvec!(Region::new(0..2, Some(tvec!(true))), Region::new(2..3, None)));
    }
}


================================================
FILE: core/src/ops/cnn/patches.rs
================================================
use crate::internal::*;
use crate::ops::cnn::PaddingSpec;
use crate::ops::nn::{DataFormat, DataShape};
use ndarray::prelude::*;

use super::PatchAxis;

use std::fmt::Debug;
use std::ops::Range;

use tract_itertools::{Itertools, izip};

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct PatchSpec {
    pub input_shape: TVec<usize>,
    pub input_inner_stride: usize,
    pub output_inner_stride: usize,
    pub kernel_shape: TVec<usize>,
    pub strides: TVec<usize>,
    pub dilations: TVec<usize>,
    pub padding: PaddingSpec,
}

impl Debug for PatchSpec {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "input: {} kernel: {} strides: {} dil: {} pad: {:?}",
            self.input_shape.iter().join(","),
            self.kernel_shape.iter().join(","),
            self.strides.iter().join(","),
            self.dilations.iter().join(","),
            self.padding
        )
    }
}

impl PatchSpec {
    pub fn for_full_shape(
        data_format: DataFormat,
        input_full_shape: &[usize],
    ) -> TractResult<PatchSpec> {
        let shape = data_format.shape(input_full_shape.into())?;
        Ok(Self::for_data_shape(shape))
    }

    pub fn for_data_shape(data_shape: DataShape) -> PatchSpec {
        let input_shape: TVec<usize> = data_shape.hw_dims().into();
        PatchSpec {
            kernel_shape: tvec!(1; input_shape.len()),
            input_inner_stride: *data_shape.w_stride(),
            output_inner_stride: 1,
            strides: tvec!(1; input_shape.len()),
            dilations: tvec!(1; input_shape.len()),
            padding: PaddingSpec::Valid,
            input_shape,
        }
    }

    pub fn with_kernel_shape(self, kernel_shape: TVec<usize>) -> PatchSpec {
        PatchSpec { kernel_shape, ..self }
    }

    pub fn with_dilations(self, dilations: TVec<usize>) -> PatchSpec {
        PatchSpec { dilations, ..self }
    }

    pub fn with_strides(self, strides: TVec<usize>) -> PatchSpec {
        PatchSpec { strides, ..self }
    }

    pub fn with_padding(self, padding: PaddingSpec) -> PatchSpec {
        PatchSpec { padding, ..self }
    }

    pub fn with_output_inner_stride(self, output_inner_stride: usize) -> PatchSpec {
        PatchSpec { output_inner_stride, ..self }
    }

    pub fn into_patch(self) -> Patch {
        let dims = self.padding.compute(
            &self.input_shape,
            &self.kernel_shape,
            &self.dilations,
            &self.strides,
        );
        let output: TVec<usize> = dims.iter().map(|d| d.convoluted).collect();
        let pad_before: TVec<usize> = dims.iter().map(|d| d.pad_before).collect();
        let pad_after: TVec<usize> = dims.iter().map(|d| d.pad_after).collect();

        let data_field: Vec<isize> = ::ndarray::indices(&*self.kernel_shape)
            .into_iter()
            .flat_map(|coords| {
                #[allow(clippy::unnecessary_to_owned)] // I think this one is a clippy bug.
                coords
                    .slice()
                    .to_vec()
                    .into_iter()
                    .enumerate()
                    .map(|(ix, c)| (c * self.dilations[ix]) as isize - pad_before[ix] as isize)
            })
            .collect();
        let data_field = Array2::from_shape_vec(
            (self.kernel_shape.iter().cloned().product(), self.kernel_shape.len()),
            data_field,
        )
        .unwrap();
        let data_field_min_max: TVec<_> = data_field
            .columns()
            .into_iter()
            .map(|col| (col.iter().min().cloned().unwrap(), col.iter().max().cloned().unwrap()))
            .collect();

        fn strides(shape: &[usize], inner: usize) -> TVec<isize> {
            let mut strides: TVec<isize> = tvec![inner as isize];
            for dim in shape.iter().skip(1).rev() {
                let previous = *strides.last().unwrap();
                strides.push(*dim as isize * previous);
            }
            strides.reverse();
            strides
        }

        let input_storage_strides = strides(&self.input_shape, self.input_inner_stride);
        let output_storage_strides = strides(&output, self.output_inner_stride);

        let standard_layout_data_field: Vec<isize> = data_field
            .outer_iter()
            .map(|coords| izip!(coords, &input_storage_strides).map(|(a, b)| a * b).sum::<isize>())
            .collect();

        // regions[axis][range+mask]
        let regions: TVec<TVec<_>> = dims
            .iter()
            .enumerate()
            .map(|(ix, d)| {
                PatchAxis {
                    input_dim: self.input_shape[ix],
                    kernel_dim: self.kernel_shape[ix],
                    pad_before: d.pad_before,
                    pad_after: d.pad_after,
                    output_dim: d.convoluted,
                    stride: self.strides[ix],
                    dilation: self.dilations[ix],
                }
                .regions()
            })
            .collect::<TVec<_>>();

        let zone_strides = strides(&regions.iter().map(|d| d.len()).collect::<TVec<_>>(), 1);

        let zones: Vec<Zone> = regions
            .iter()
            .multi_cartesian_product()
            .map(|regions| Zone {
                input_zone_offset: 0,
                output_ranges: regions.iter().map(|reg| reg.range.clone()).collect(),
                output_shape: regions.iter().map(|reg| reg.range.end - reg.range.start).collect(),
                output_zone_offset: izip!(&regions, &output_storage_strides)
                    .map(|(reg, &stride)| reg.range.start as isize * stride)
                    .sum::<isize>(),
                valid: regions.iter().all(|reg| reg.mask.is_none()),
                values_offsets: izip!(
                    0..,
                    ndarray::indices(&*self.kernel_shape),
                    &standard_layout_data_field
                )
                .filter(|(_ix, coords, _offset)| {
                    izip!(coords.slice(), &regions)
                        .all(|(&x, axis)| !axis.mask.as_ref().map(|mask| mask[x]).unwrap_or(false))
                })
                .map(|(ix, _coords, &window_offset)| (ix, window_offset))
                .collect(),
            })
            .collect();

        let valid_zone = zones.iter().position(|z| z.valid);

        let mut valid_output_zone = tvec!();
        let mut invalid_output_zones = tvec!();
        for ix in 0..self.input_shape.len() {
            let min_max = data_field_min_max[ix];
            let min = (-min_max.0 as usize).divceil(self.strides[ix]);
            let max =
                (self.input_shape[ix].saturating_sub(min_max.1 as usize)).divceil(self.strides[ix]);
            if min != 0 {
                let mut invalid = valid_output_zone.clone();
                invalid.push(0..min);
                while invalid.len() < output.len() {
                    invalid.push(0..output[invalid.len()])
                }
                invalid_output_zones.push(invalid);
            }
            if max < output[ix] {
                let mut invalid = valid_output_zone.clone();
                invalid.push(max..output[ix]);
                while invalid.len() < output.len() {
                    invalid.push(0..output[invalid.len()])
                }
                invalid_output_zones.push(invalid);
            }
            valid_output_zone.push(min..max)
        }

        let op_strides_times_input_storage_strides =
            izip!(&self.strides, &input_storage_strides).map(|(a, b)| *a as isize * b).collect();

        Patch {
            spec: self,
            padded: pad_before.iter().any(|&p| p != 0) || pad_after.iter().any(|&p| p != 0),
            pad_before,
            pad_after,
            output_shape: output,
            data_field,
            data_field_min_max,
            standard_layout_data_field,
            input_storage_strides,
            output_storage_strides,
            op_strides_times_input_storage_strides,
            valid_output_zone,
            invalid_output_zones,
            zones,
            valid_zone_id: valid_zone,
            zone_strides,
        }
    }
}

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Patch {
    pub spec: PatchSpec,
    pub pad_before: TVec<usize>,
    pub pad_after: TVec<usize>,
    pub padded: bool,
    pub output_shape: TVec<usize>,
    pub data_field: Array2<isize>,
    pub data_field_min_max: TVec<(isize, isize)>,
    pub standard_layout_data_field: Vec<isize>,
    pub op_strides_times_input_storage_strides: TVec<isize>,
    pub valid_output_zone: TVec<Range<usize>>,
    pub invalid_output_zones: TVec<TVec<Range<usize>>>,
    pub zones: Vec<Zone>,
    pub valid_zone_id: Option<usize>,
    pub zone_strides: TVec<isize>,
    pub input_storage_strides: TVec<isize>,
    pub output_storage_strides: TVec<isize>,
}

impl Debug for Patch {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}", self.spec)
    }
}

impl Patch {
    #[inline]
    pub fn rank(&self) -> usize {
        self.spec.input_shape.len()
    }

    unsafe fn is_valid(&self, coords: &[usize]) -> bool {
        unsafe {
            for ix in 0..self.rank() {
                let c = *coords.get_unchecked(ix) as isize;
                let strides = *self.spec.strides.get_unchecked(ix) as isize;
                let pos = c * strides;
                let min_max = self.data_field_min_max.get_unchecked(ix);
                if pos + min_max.0 < 0
                    || pos + min_max.1 >= *self.spec.input_shape.get_unchecked(ix) as isize
                {
                    return false;
                }
            }
            true
        }
    }

    pub fn valid_zone(&self) -> Option<&Zone> {
        self.valid_zone_id.map(|id| &self.zones[id])
    }

    #[inline]
    pub fn visit_output(&self, mut acceptor: impl FnMut(&Scanner)) {
        if self.zones.len() == 0 {
            return;
        }
        let mut scanner = Scanner::new(self);
        while !scanner.done() {
            acceptor(&scanner);
            scanner.next();
        }
    }

    pub fn centers_offsets(&self) -> Vec<isize> {
        if self.zones.len() == 0 {
            return vec![];
        }
        let mut scanner = Scanner::new(self);
        let len = self.output_shape.iter().cloned().product();
        let mut v = Vec::with_capacity(len);
        for _ in 0..len {
            v.push(scanner.input_center_offset);
            scanner.next()
        }
        v
    }

    pub fn at<'p>(&'p self, coords: &[usize]) -> PatchIterator<'p> {
        self.at_hint(coords, None)
    }

    pub fn at_hint<'p>(&'p self, coords: &[usize], hint: Option<bool>) -> PatchIterator<'p> {
        unsafe {
            assert_eq!(coords.len(), self.spec.kernel_shape.len());
            let mut center = 0;
            for i in 0..self.op_strides_times_input_storage_strides.len() {
                center += *self.op_strides_times_input_storage_strides.get_unchecked(i)
                    * *coords.get_unchecked(i) as isize;
            }
            let valid = hint.unwrap_or_else(|| !self.padded || self.is_valid(coords));
            if valid {
                PatchIterator::Fast(FastPatchIterator { patch: self, center, item: 0 })
            } else {
                let mut input_patch_center: TVec<_> = coords.into();
                input_patch_center
                    .iter_mut()
                    .zip(self.spec.strides.iter())
                    .for_each(|(a, &b)| *a *= b);
                PatchIterator::Safe(SafePatchIterator {
                    patch: self,
                    item: 0,
                    input_patch_center,
                    center,
                })
            }
        }
    }

    pub fn global_offset_for(&self, coords: &[usize], patch_index: usize) -> usize {
        assert_eq!(coords.len(), self.spec.kernel_shape.len());
        let center = izip!(coords, &self.op_strides_times_input_storage_strides)
            .map(|(a, b)| *a as isize * *b)
            .sum::<isize>();
        (center + self.standard_layout_data_field[patch_index]) as usize
    }
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Zone {
    pub valid: bool,
    pub input_zone_offset: isize,
    pub output_zone_offset: isize,
    pub output_ranges: Box<[Range<usize>]>,
    pub output_shape: Box<[usize]>,
    /// (index in kernel, offset from center in image)
    pub values_offsets: Box<[(usize, isize)]>,
}

impl Zone {
    pub fn contains_output(&self, coords: &[usize]) -> bool {
        self.output_ranges.iter().zip(coords).all(|(range, &x)| x >= range.start && x < range.end)
    }

    #[inline]
    pub fn visit_output(&self, patch: &Patch, mut acceptor: impl FnMut(&ZoneScanner)) {
        let mut scanner = ZoneScanner::new(self, patch);
        while !scanner.done() {
            acceptor(&scanner);
            scanner.next();
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ZoneScanner<'p> {
    pub patch: &'p Patch,
    pub zone: &'p Zone,
    pub output_offset: isize,
    pub output_coords: Box<[usize]>,
    pub input_center_offset: isize,
    pub inner_loop_axis: usize,
    pub inner_loop_len: usize,
    pub inner_loop_output_range: Range<usize>,
    pub inner_loop_output_stride: isize,
    pub inner_loop_input_full_stride: isize,
    pub done: bool,
}

impl<'p> ZoneScanner<'p> {
    pub fn new(zone: &'p Zone, patch: &'p Patch) -> ZoneScanner<'p> {
        let inner_loop_axis =
            zone.output_shape.iter().enumerate().max_by_key(|(_, dim)| *dim).unwrap().0;
        let inner_loop_output_range = zone.output_ranges[inner_loop_axis].clone();
        let inner_loop_output_stride = patch.output_storage_strides[inner_loop_axis];
        let inner_loop_input_full_stride =
            patch.op_strides_times_input_storage_strides[inner_loop_axis];
        let mut scan = ZoneScanner {
            patch,
            zone,
            output_offset: 0,
            input_center_offset: 0,
            inner_loop_axis,
            inner_loop_len: inner_loop_output_range.len(),
            inner_loop_output_range,
            inner_loop_output_stride,
            inner_loop_input_full_stride,
            output_coords: zone.output_ranges.iter().map(|r| r.start).collect(),
            done: false,
        };
        scan.refresh_dependent();
        scan
    }

    #[inline]
    pub fn valid_offsets_ker_in(&self) -> impl Iterator<Item = (usize, isize)> + '_ {
        self.zone.values_offsets.iter().map(move |pair| (pair.0, pair.1 + self.input_center_offset))
    }

    pub unsafe fn next_non_inner_axis(&mut self) {
        unsafe {
            let rank = self.patch.rank();
            let inner_loop_axis = self.inner_loop_axis;
            for axis in (0..rank).rev() {
                if axis == inner_loop_axis {
                    continue;
                }
                *self.output_coords.get_unchecked_mut(axis) += 1;
                if *self.output_coords.get_unchecked_mut(axis)
                    < self.zone.output_ranges.get_unchecked(axis).end
                {
                    self.refresh_dependent();
                    return;
                }
                *self.output_coords.get_unchecked_mut(axis) =
                    self.zone.output_ranges.get_unchecked(axis).start;
            }
            self.done = true;
        }
    }

    pub unsafe fn reset(&mut self) {
        unsafe {
            self.output_offset = 0;
            self.input_center_offset = 0;
            for ix in 0..self.output_coords.len() {
                *self.output_coords.get_unchecked_mut(ix) =
                    self.zone.output_ranges.get_unchecked(ix).start;
            }
            self.done = false;
            self.refresh_dependent()
        }
    }

    #[inline(never)]
    fn refresh_dependent(&mut self) {
        self.input_center_offset = self
            .patch
            .op_strides_times_input_storage_strides
            .iter()
            .zip(self.output_coords.iter())
            .map(|(a, b)| *a * *b as isize)
            .sum();
        self.output_offset = self
            .patch
            .output_storage_strides
            .iter()
            .zip(self.output_coords.iter())
            .map(|(a, b)| a * *b as isize)
            .sum();
    }

    #[inline]
    pub fn next(&mut self) {
        let inner_loop_axis = self.inner_loop_axis;
        unsafe {
            *self.output_coords.get_unchecked_mut(inner_loop_axis) += 1;
            if *self.output_coords.get_unchecked(inner_loop_axis) < self.inner_loop_output_range.end
            {
                self.input_center_offset += self.inner_loop_input_full_stride;
                self.output_offset += self.inner_loop_output_stride;
            } else {
                *self.output_coords.get_unchecked_mut(inner_loop_axis) =
                    self.inner_loop_output_range.start;
                self.next_non_inner_axis();
            }
        }
    }

    pub fn done(&self) -> bool {
        self.done
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Scanner<'p> {
    pub patch: &'p Patch,
    pub zone_id: usize,
    pub zone_coords: TVec<usize>,
    pub zone: &'p Zone,
    pub output_offset: isize,
    pub output_coords: TVec<usize>,
    pub input_coords: TVec<usize>,
    pub input_center_offset: isize,
    done: bool,
}

impl<'p> Scanner<'p> {
    fn new(patch: &'p Patch) -> Scanner<'p> {
        let rank = patch.rank();
        let zone = &patch.zones[0];
        Scanner {
            patch,
            zone_coords: tvec!(0; rank),
            zone,
            zone_id: 0,
            output_offset: 0,
            input_center_offset: 0,
            input_coords: tvec!(0; rank),
            output_coords: tvec!(0; rank),
            done: false,
        }
    }

    #[inline]
    pub fn valid_count(&self) -> usize {
        self.zone.values_offsets.len()
    }

    #[inline]
    pub fn valid_offsets(&self) -> impl Iterator<Item = isize> + '_ {
        self.zone.values_offsets.iter().map(move |pair| pair.1 + self.input_center_offset)
    }

    #[inline]
    pub fn valid_offsets_ker_in(&self) -> impl Iterator<Item = (usize, isize)> + '_ {
        self.zone.values_offsets.iter().map(move |pair| (pair.0, pair.1 + self.input_center_offset))
    }

    #[inline]
    pub fn next(&mut self) {
        let rank = self.patch.rank();
        let inner_dim = rank - 1;
        unsafe {
            *self.output_coords.get_unchecked_mut(inner_dim) += 1;
            *self.input_coords.get_unchecked_mut(inner_dim) +=
                *self.patch.spec.strides.get_unchecked(inner_dim);
            self.output_offset += self.patch.spec.output_inner_stride as isize;
            self.input_center_offset +=
                self.patch.op_strides_times_input_storage_strides.get_unchecked(inner_dim);
            if *self.output_coords.get_unchecked(inner_dim)
                < self.zone.output_ranges.get_unchecked(inner_dim).end
            {
                return;
            }
            if self.output_coords.get_unchecked(inner_dim)
                < self.patch.output_shape.get_unchecked(inner_dim)
            {
                self.zone_id += 1;
                *self.zone_coords.get_unchecked_mut(inner_dim) += 1;
                self.zone = self.patch.zones.get_unchecked(self.zone_id);
            } else {
                for axis in (0..rank - 1).rev() {
                    *self.output_coords.get_unchecked_mut(axis + 1) = 0;
                    *self.input_coords.get_unchecked_mut(axis + 1) = 0;
                    *self.output_coords.get_unchecked_mut(axis) += 1;
                    *self.input_coords.get_unchecked_mut(axis) +=
                        self.patch.spec.strides.get_unchecked(axis);
                    *self.zone_coords.get_unchecked_mut(axis + 1) = 0;
                    if *self.output_coords.get_unchecked(axis)
                        == self.zone.output_ranges.get_unchecked(axis).end
                    {
                        *self.zone_coords.get_unchecked_mut(axis) += 1;
                    }
                    if *self.output_coords.get_unchecked(axis)
                        < *self.patch.output_shape.get_unchecked(axis)
                    {
                        break;
                    }
                }
                if self.output_coords.get_unchecked(0) == self.patch.output_shape.get_unchecked(0) {
                    self.done = true;
                    return;
                }
                self.zone_id = 0;
                self.input_center_offset = 0;
                for i in 0..rank {
                    self.zone_id += *self.zone_coords.get_unchecked(i)
                        * *self.patch.zone_strides.get_unchecked(i) as usize;
                    self.input_center_offset += *self.input_coords.get_unchecked(i) as isize
                        * *self.patch.input_storage_strides.get_unchecked(i);
                }
                self.zone = self.patch.zones.get_unchecked(self.zone_id);
            }
        }
    }

    pub fn done(&self) -> bool {
        self.done
    }
}

#[derive(Debug)]
pub enum PatchIterator<'p> {
    Fast(FastPatchIterator<'p>),
    Safe(SafePatchIterator<'p>),
}

impl Iterator for PatchIterator<'_> {
    type Item = Option<isize>;
    #[inline(always)]
    fn next(&mut self) -> Option<Option<isize>> {
        match self {
            PatchIterator::Fast(it) => it.next(),
            PatchIterator::Safe(it) => it.next(),
        }
    }
}

#[derive(Debug)]
pub struct FastPatchIterator<'p> {
    patch: &'p Patch,
    center: isize,
    item: usize,
}

impl Iterator for FastPatchIterator<'_> {
    type Item = Option<isize>;
    #[inline(always)]
    fn next(&mut self) -> Option<Option<isize>> {
        if self.item == self.patch.standard_layout_data_field.len() {
            return None;
        }
        unsafe {
            let position =
                self.center + self.patch.standard_layout_data_field.get_unchecked(self.item);
            self.item += 1;
            Some(Some(position))
        }
    }
}

#[derive(Debug)]
pub struct SafePatchIterator<'p> {
    patch: &'p Patch,
    item: usize,
    input_patch_center: TVec<usize>,
    center: isize,
}

impl Iterator for SafePatchIterator<'_> {
    type Item = Option<isize>;
    fn next(&mut self) -> Option<Option<isize>> {
        unsafe {
            if self.item == self.patch.standard_layout_data_field.len() {
                return None;
            }
            let input_shape = &self.patch.spec.input_shape;
            let img_offset = self.patch.data_field.as_ptr().add(self.item * input_shape.len());

            for ix in 0..input_shape.len() {
                let pos = *self.input_patch_center.get_unchecked(ix) as isize + *img_offset.add(ix);
                if pos < 0 || pos as usize >= *input_shape.get_unchecked(ix) {
                    self.item += 1;
                    return Some(None);
                }
            }
            let position =
                self.center + self.patch.standard_layout_data_field.get_unchecked(self.item);
            self.item += 1;
            Some(Some(position))
        }
    }
}

#[cfg(test)]
pub mod test {
    use super::*;
    use crate::ops::nn::DataFormat::*;
    use proptest::prelude::*;
    use proptest::*;

    fn compute_output_spatial_dim(
        input: usize,
        dilation: usize,
        kdim: usize,
        pad_before: usize,
        bad_after: usize,
        stride: usize,
    ) -> usize {
        let patch = PatchSpec::for_full_shape(NCHW, &[1, 1, input])
            .unwrap()
            .with_dilations(tvec!(dilation))
            .with_kernel_shape(tvec!(kdim))
            .with_padding(PaddingSpec::ExplicitOnnxPool(tvec![pad_before], tvec![bad_after], true))
            .with_strides(tvec![stride])
            .into_patch();
        patch.output_shape[0]
    }

    #[test]
    fn basic() {
        assert_eq!(compute_output_spatial_dim(5, 1, 3, 0, 0, 1), 3);
    }

    #[test]
    fn strides() {
        assert_eq!(compute_output_spatial_dim(7, 1, 3, 0, 0, 2), 3);
    }

    #[test]
    fn padding() {
        assert_eq!(compute_output_spatial_dim(5, 1, 3, 1, 1, 1), 5);
    }

    #[test]
    fn strides_and_padding() {
        assert_eq!(compute_output_spatial_dim(7, 1, 3, 1, 1, 2), 4);
    }

    fn field(kdim: &[usize], dilations: &[usize]) -> Array2<isize> {
        let patch =
            PatchSpec::for_data_shape(NCHW.from_n_c_hw(1, 1, tvec![10; kdim.len()]).unwrap())
                .with_dilations(dilations.into())
                .with_kernel_shape(kdim.into())
                .with_strides(tvec![1; kdim.len()])
                .into_patch();
        patch.data_field
    }

    #[test]
    fn test_field() {
        assert_eq!(field(&[3], &[1]), arr2(&[[0], [1], [2]]));
        assert_eq!(field(&[3], &[2]), arr2(&[[0], [2], [4]]));
        assert_eq!(field(&[2, 2], &[1, 1]), arr2(&[[0, 0], [0, 1], [1, 0], [1, 1]]));
        assert_eq!(field(&[2, 2], &[2, 1]), arr2(&[[0, 0], [0, 1], [2, 0], [2, 1]]));
    }

    pub fn tensor(shape: &[usize]) -> BoxedStrategy<Tensor> {
        let len = shape.iter().product::<usize>();
        let shape = shape.to_vec();
        proptest::collection::vec(any::<i8>().prop_map(|i| i as f32), len..=len)
            .prop_map(move |vec| ArrayD::from_shape_vec(shape.clone(), vec).unwrap().into_tensor())
            .boxed()
    }

    #[derive(Debug)]
    struct Problem {
        patch: Patch,
        input: Tensor,
        data_format: DataFormat,
    }

    impl Arbitrary for Problem {
        type Parameters = ();
        type Strategy = BoxedStrategy<Problem>;
        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
            (
                prop_oneof!(Just(NCHW), Just(NHWC)),
                (1usize..3, 1usize..3),
                1usize..3,
                (1usize..3, 1usize..3),
                prop_oneof![
                    Just(PaddingSpec::Valid),
                    Just(PaddingSpec::SameLower),
                    Just(PaddingSpec::SameUpper)
                ],
                (1usize..4, 1usize..4),
            )
                .prop_flat_map(|p| {
                    let dil = p.1;
                    let ks = p.3;
                    let strides = p.5;
                    let min_size: (usize, usize) = (1 + (ks.0 - 1) * dil.0, 1 + (ks.1 - 1) * dil.1);
                    (
                        Just(p),
                        (min_size.0..min_size.0 + strides.0 * 3),
                        (min_size.1..min_size.1 + strides.1 * 3),
                    )
                })
                .prop_flat_map(|(p, h, w)| {
                    let input_shape = p.0.from_n_c_hw(1, p.2, [h, w]).unwrap();
                    let input = tensor(&input_shape.shape);
                    (Just(p), input)
                })
                .prop_map(|((fmt, dil, c, ks, pad, strides), input)| {
                    let output_inner_stride = if fmt.c_is_last() { c } else { 1 };
                    Problem {
                        patch: PatchSpec::for_full_shape(fmt, input.shape())
                            .unwrap()
                            .with_dilations(tvec!(dil.0, dil.1))
                            .with_kernel_shape(tvec!(ks.0, ks.1))
                            .with_padding(pad)
                            .with_strides(tvec![strides.0, strides.1])
                            .with_output_inner_stride(output_inner_stride)
                            .into_patch(),
                        input,
                        data_format: fmt,
                    }
                })
                .boxed()
        }
    }

    impl Problem {
        fn input_shape(&self) -> DataShape {
            self.data_format.shape(self.input.shape().into()).unwrap()
        }

        fn output_shape(&self) -> DataShape {
            self.data_format
                .from_n_c_hw(
                    self.input_shape().n().cloned().unwrap_or(1),
                    *self.input_shape().c(),
                    &*self.patch.output_shape,
                )
                .unwrap()
        }

        fn reference_sumpool(&self) -> Tensor {
            let input_shape = self.input_shape();
            let output_shape = self.output_shape();
            let mut output = Tensor::zero::<f32>(&output_shape.shape).unwrap();
            for geo_out in tract_ndarray::indices(output_shape.hw_dims()) {
                for geo_ker in tract_ndarray::indices(&*self.patch.spec.kernel_shape) {
                    let geo_in: TVec<isize> = izip!(
                        geo_out.slice(),
                        geo_ker.slice(),
                        &self.patch.spec.strides,
                        &self.patch.spec.dilations,
                        &self.patch.pad_before
                    )
                    .map(|(o, k, s, d, p)| (o * s + k * d) as isize - *p as isize)
                    .collect();
                    if izip!(&geo_in, input_shape.hw_dims())
                        .any(|(g, i)| *g >= *i as isize || *g < 0)
                    {
                        continue;
                    }
                    let geo_in: TVec<usize> = geo_in.into_iter().map(|x| x as usize).collect();
                    for c in 0..*output_shape.c() {
                        let ocoords = self.data_format.from_n_c_hw(0, c, geo_out.slice()).unwrap();
                        let icoords = self.data_format.from_n_c_hw(0, c, &geo_in).unwrap();
                        output.to_plain_array_view_mut::<f32>().unwrap()[&*ocoords.shape] +=
                            self.input.to_plain_array_view::<f32>().unwrap()[&*icoords.shape];
                    }
                }
            }
            output
        }

        fn check_visitor(&self) {
            let input_shape = self.input_shape();
            let output_shape = self.output_shape();
            let mut output = Tensor::zero::<f32>(&output_shape.shape).unwrap();
            self.patch.visit_output(|visitor| {
                for (_k, offset_in) in visitor.valid_offsets_ker_in() {
                    for c in 0..*output_shape.c() {
                        output.try_as_plain_mut().unwrap().as_slice_mut::<f32>().unwrap()
                            [visitor.output_offset as usize + c * output_shape.c_stride()] +=
                            self.input.try_as_plain().unwrap().as_slice::<f32>().unwrap()
                                [offset_in as usize + c * input_shape.c_stride()];
                    }
                }
            });
            assert_eq!(output, self.reference_sumpool());
        }

        fn check_zone_visitor(&self) {
            let input_shape = self.input_shape();
            let output_shape = self.output_shape();
            let mut output = Tensor::zero::<f32>(&output_shape.shape).unwrap();
            for zone in &self.patch.zones {
                zone.visit_output(&self.patch, |visitor| {
                    for (_k, offset_in) in visitor.valid_offsets_ker_in() {
                        for c in 0..*output_shape.c() {
                            output.try_as_plain_mut().unwrap().as_slice_mut::<f32>().unwrap()
                                [visitor.output_offset as usize + c * output_shape.c_stride()] +=
                                self.input.try_as_plain().unwrap().as_slice::<f32>().unwrap()
                                    [offset_in as usize + c * input_shape.c_stride()];
                        }
                    }
                });
            }
            assert_eq!(output, self.reference_sumpool());
        }

        fn check_zoning(&self) {
            fn in_zone(full_coords: &[usize], h_axis: usize, zone: &[Range<usize>]) -> bool {
                for a in 0..zone.len() {
                    if full_coords[h_axis + a] < zone[a].start
                        || full_coords[h_axis + a] >= zone[a].end
                    {
                        return false;
                    }
                }
                true
            }

            let valid_zone = &self.patch.valid_output_zone;
            let invalid_zones = &self.patch.invalid_output_zones;
            let output_full_shape = self.output_shape();
            let h_axis = self.input_shape().h_axis();
            for coords in ndarray::indices(&*output_full_shape.shape) {
                let inside_valid = in_zone(coords.slice(), h_axis, valid_zone);
                let invalid_count =
                    invalid_zones.iter().filter(|z| in_zone(coords.slice(), h_axis, z)).count();
                unsafe {
                    assert_eq!(
                        inside_valid,
                        self.patch.is_valid(&coords.slice()[self.input_shape().hw_axes()]),
                        "coords {:?}, valid_zone: {:?} inside_valid: {:?}",
                        coords.slice(),
                        valid_zone,
                        inside_valid
                    );
                }
                if inside_valid {
                    assert_eq!(invalid_count, 0);
                } else {
                    assert_eq!(
                        invalid_count,
                        1,
                        "coords {:?}, valid_zone: {:?} inside_valid: {:?} invalid_zones: {:?}",
                        coords.slice(),
                        valid_zone,
                        inside_valid,
                        invalid_zones
                    );
                }
            }
        }
    }

    proptest! {
        #[test]
        fn test_visitor(pb in any::<Problem>()) {
            pb.check_visitor();
        }

        #[test]
        fn test_zone_visitor(pb in any::<Problem>()) {
            pb.check_zone_visitor();
        }

        #[test]
        fn test_zoning(pb in any::<Problem>()) {
            pb.check_zoning();
        }
    }

    #[test]
    fn test_visitor_1() {
        let input_shape = NCHW.from_n_c_hw(1, 1, [2, 2]).unwrap();
        let input = Tensor::zero::<f32>(&input_shape.shape).unwrap();
        let patch = PatchSpec::for_data_shape(input_shape.clone())
            .with_kernel_shape(tvec![2, 1])
            .with_padding(PaddingSpec::SameLower)
            .with_strides(tvec![1, 2])
            .into_patch();
        Problem { patch, input, data_format: input_shape.fmt }.check_visitor();
    }

    #[test]
    fn test_visitor_2() {
        let input_shape = NCHW.from_n_c_hw(1, 2, [1, 1]).unwrap();
        let input = tensor4(&[[[[0.]], [[1f32]]]]);
        assert_eq!(input.shape(), &*input_shape.shape);
        let patch =
            PatchSpec::for_data_shape(input_shape.clone()).with_output_inner_stride(2).into_patch();
        Problem { patch, input, data_format: input_shape.fmt }.check_visitor();
    }

    #[test]
    fn test_visitor_3() {
        let input_shape = NHWC.from_n_c_hw(1, 2, [2, 1]).unwrap();
        let input = tensor4(&[[[[0., 0.]], [[1., 0f32]]]]);
        assert_eq!(input.shape(), &*input_shape.shape);
        let patch =
            PatchSpec::for_data_shape(input_shape.clone()).with_output_inner_stride(2).into_patch();
        Problem { patch, input, data_format: input_shape.fmt }.check_visitor();
    }

    #[test]
    fn test_visitor_4() {
        let input_shape = NCHW.from_n_c_hw(1, 1, [1, 2]).unwrap();
        let input = tensor4(&[[[[0., 1f32]]]]);
        assert_eq!(input.shape(), &*input_shape.shape);
        let patch = PatchSpec::for_data_shape(input_shape.clone())
            .with_kernel_shape(tvec!(1, 2))
            .with_output_inner_stride(1)
            .with_padding(PaddingSpec::SameLower)
            .into_patch();
        Problem { patch, input, data_format: input_shape.fmt }.check_visitor();
    }

    #[test]
    fn test_zone_visitor_1() {
        let input_shape = NCHW.from_n_c_hw(1, 1, [2, 1]).unwrap();
        let input = tensor4(&[[[[0.], [1f32]]]]);
        assert_eq!(input.shape(), &*input_shape.shape);
        let patch = PatchSpec::for_data_shape(input_shape.clone()).into_patch();
        Problem { patch, input, data_format: input_shape.fmt }.check_zone_visitor();
    }

    #[test]
    fn test_zone_visitor_2() {
        let input_shape = NCHW.from_n_c_hw(1, 1, [1, 2]).unwrap();
        let input = tensor4(&[[[[0., 1f32]]]]);
        assert_eq!(input.shape(), &*input_shape.shape);
        let patch = PatchSpec::for_data_shape(input_shape.clone()).into_patch();
        Problem { patch, input, data_format: input_shape.fmt }.check_zone_visitor();
    }
}


================================================
FILE: core/src/ops/cnn/pools.rs
================================================
use crate::internal::*;

use crate::ops::cnn::{PaddingSpec, Patch, PatchSpec};
use crate::ops::nn::{BaseDataShape, DataFormat, DataShape, SymDataShape};

use super::padding::ComputedPaddedDim;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct PoolSpec {
    pub data_format: DataFormat,
    pub kernel_shape: TVec<usize>,
    pub padding: PaddingSpec,
    pub dilations: Option<TVec<usize>>,
    pub strides: Option<TVec<usize>>,
    pub input_channels: usize,
    pub output_channels: usize,
}

impl PoolSpec {
    pub fn info(&self) -> Vec<String> {
        vec![
            format!("Data format: {:?}", self.data_format),
            format!(
                "Kernel shape:{:?} (strides:{:?}, padding:{:?}, dilations:{:?})",
                self.kernel_shape, self.strides, self.padding, self.dilations,
            ),
        ]
    }

    pub fn rank(&self) -> usize {
        self.kernel_shape.len()
    }

    pub fn dilation(&self, geo_axis: usize) -> usize {
        self.dilations.as_ref().map(|d| d[geo_axis]).unwrap_or(1)
    }

    pub fn dilations(&self) -> Cow<'_, [usize]> {
        self.dilations
            .as_deref()
            .map_or_else(|| vec![1; self.kernel_shape.len()].into(), |d| d.into())
    }

    pub fn stride(&self, geo_axis: usize) -> usize {
        self.strides.as_ref().map(|s| s[geo_axis]).unwrap_or(1)
    }

    pub fn strides(&self) -> Cow<'_, [usize]> {
        self.strides
            .as_deref()
            .map_or_else(|| vec![1; self.kernel_shape.len()].into(), |d| d.into())
    }

    pub fn computed_padding<D: DimLike>(&self, input_hw: &[D]) -> TVec<ComputedPaddedDim<D>> {
        self.padding.compute(input_hw, &self.kernel_shape, &self.dilations(), &self.strides())
    }

    pub fn output_shape<D: DimLike>(&self, input: &[D]) -> TractResult<BaseDataShape<D, TVec<D>>> {
        let ishape: BaseDataShape<D, TVec<D>> = self.data_format.shape(input.into())?;
        ensure!(ishape.c().to_dim() == self.input_channels.to_dim());
        let computed = self.computed_padding(ishape.hw_dims());
        let spatial_dims = computed.into_iter().map(|d| d.convoluted).collect::<TVec<D>>();
        let oshape = self.data_format.from_n_c_hw(
            ishape.n().cloned().unwrap_or_else(|| 1.into()),
            self.output_channels.into(),
            spatial_dims,
        )?;
        Ok(oshape)
    }

    pub fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let oshape = self.output_shape(&inputs[0].shape)?;
        Ok(tvec!(inputs[0].datum_type.fact(oshape.shape)))
    }

    pub fn dispose_n_axis(&self) -> PoolSpec {
        PoolSpec { data_format: self.data_format.dispose_n_axis(), ..self.clone() }
    }

    pub fn compute_geo(&self, input_full_shape: &[TDim]) -> TractResult<PoolGeometry> {
        let output_shape = self.output_shape(input_full_shape)?;
        let input_shape: SymDataShape = self.data_format.shape(input_full_shape.into())?;
        Ok(PoolGeometry::Symbolic(SymbolicPoolGeometry {
            pool_spec: self.clone(),
            input_shape,
            output_shape,
        }))
    }

    pub fn change_geo_axes(&self, op: &AxisOp) -> TractResult<PoolSpec> {
        let mut dilations = self.dilations().into_owned().into();
        op.change_shape_array(&mut dilations, false)?;
        let mut kernel_shape = self.kernel_shape.clone();
        op.change_shape_array(&mut kernel_shape, false)?;
        let mut strides = self.strides().into_owned().into();
        op.change_shape_array(&mut strides, false)?;
        let padding = self.padding.change_geo_axes(op)?;
        Ok(PoolSpec {
            kernel_shape,
            padding,
            dilations: Some(dilations),
            strides: Some(strides),
            ..self.clone()
        })
    }

    pub fn declutter(&self, input: &[TDim]) -> TractResult<Option<PoolSpec>> {
        if let PaddingSpec::ExplicitOnnxPool(before, after, _) = &self.padding {
            let input = self.data_format.shape(input)?;
            let input_hw = input.hw_dims();
            let reference = self.computed_padding(input_hw);
            for replacement in [
                PaddingSpec::Valid,
                PaddingSpec::SameUpper,
                PaddingSpec::SameLower,
                PaddingSpec::Explicit(before.clone(), after.clone()),
            ] {
                let new_pool_spec = PoolSpec { padding: replacement, ..self.clone() };
                if new_pool_spec.computed_padding(input_hw) == reference {
                    return Ok(Some(new_pool_spec));
                }
            }
        }
        Ok(None)
    }
}

pub type PoolGeometry = super::GeometryBound<SymbolicPoolGeometry, ConcretePoolGeometry>;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct SymbolicPoolGeometry {
    pub pool_spec: PoolSpec,
    pub input_shape: SymDataShape,
    pub output_shape: SymDataShape,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ConcretePoolGeometry {
    pub input_shape: DataShape,
    pub patch: Patch,
    pub output_shape: DataShape,
}

impl super::ResolveTo<ConcretePoolGeometry> for SymbolicPoolGeometry {
    type Param = [usize];
    fn resolve(&self, input_full_shape: &[usize]) -> TractResult<ConcretePoolGeometry> {
        let input_shape = self.pool_spec.data_format.shape(input_full_shape.into())?;
        let output_inner_stride = match self.pool_spec.data_format {
            DataFormat::NCHW | DataFormat::CHW => 1,
            DataFormat::NHWC | DataFormat::HWC => self.pool_spec.output_channels,
        };
        let mut spec = PatchSpec::for_full_shape(self.pool_spec.data_format, input_full_shape)?
            .with_output_inner_stride(output_inner_stride)
            .with_kernel_shape(self.pool_spec.kernel_shape.clone())
            .with_padding(self.pool_spec.padding.clone());
        if let Some(strides) = self.pool_spec.strides.clone() {
            spec = spec.with_strides(strides);
        }
        if let Some(dilations) = self.pool_spec.dilations.clone() {
            spec = spec.with_dilations(dilations);
        }
        let patch = spec.into_patch();
        let output_shape = input_shape.fmt.from_n_c_hw(
            *input_shape.n().unwrap_or(&1),
            self.pool_spec.output_channels,
            &*patch.output_shape,
        )?;
        Ok(ConcretePoolGeometry { input_shape, patch, output_shape })
    }
}


================================================
FILE: core/src/ops/cnn/sumpool.rs
================================================
use crate::internal::*;
use num_traits::AsPrimitive;
use std::iter::Sum;

use crate::ops::cnn::pools::{ConcretePoolGeometry, PoolGeometry, PoolSpec};

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct SumPool {
    pub pool_spec: PoolSpec,
    pub count_include_pad: bool,
    pub normalize: bool,
}

impl Op for SumPool {
    fn name(&self) -> StaticName {
        "SumPool".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(self.pool_spec.info())
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    op_as_typed_op!();
}

impl EvalOp for SumPool {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let shape: TVec<TDim> = inputs[0].shape().iter().map(|d| d.to_dim()).collect();
        self.to_optimized(&shape)?.eval(inputs)
    }
}

impl TypedOp for SumPool {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        self.pool_spec.output_facts(inputs)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let fact = model.outlet_fact(node.inputs[0])?;
        if let Some(pool_spec) = self.pool_spec.declutter(&fact.shape)? {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs,
                Self { pool_spec, ..self.clone() },
            )?));
        }
        Ok(None)
    }

    as_op!();
}

impl SumPool {
    fn to_optimized(&self, input_shape: &[TDim]) -> TractResult<OptSumPool> {
        Ok(OptSumPool {
            pool_spec: self.pool_spec.clone(),
            count_include_pad: self.count_include_pad,
            normalize: self.normalize,
            geometry: self.pool_spec.compute_geo(input_shape)?,
        })
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct OptSumPool {
    pub pool_spec: PoolSpec,
    pub count_include_pad: bool,
    pub normalize: bool,
    pub geometry: PoolGeometry,
}

impl Op for OptSumPool {
    fn name(&self) -> StaticName {
        "OptSumPool".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(self.pool_spec.info())
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    op_as_typed_op!();
}

impl EvalOp for OptSumPool {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let geo = self.geometry.to_concrete(input.shape())?;
        let values = if input.datum_type().is_float() {
            let mut values =
                unsafe { Tensor::uninitialized_dt(input.datum_type(), &geo.output_shape.shape)? };
            dispatch_floatlike!(Self::eval_t(input.datum_type())(
                self,
                &*input,
                values.as_ptr_mut()?,
                geo.as_ref()
            ))?;
            values
        } else {
            let mut values =
                unsafe { Tensor::uninitialized_dt(DatumType::F32, &geo.output_shape.shape)? };
            let input_f32 = input.cast_to_dt(DatumType::F32)?;
            self.eval_t::<f32>(input_f32.as_ref(), values.as_ptr_mut()?, geo.as_ref())?;
            values.cast_to_dt(input.datum_type())?.into_owned()
        };

        Ok(tvec!(values.into_tvalue()))
    }
}

impl TypedOp for OptSumPool {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        self.pool_spec.output_facts(inputs)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let fact = model.outlet_fact(node.inputs[0])?;
        if let Some(pool_spec) = self.pool_spec.declutter(&fact.shape)? {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs,
                Self { pool_spec, ..self.clone() },
            )?));
        }
        Ok(None)
    }

    as_op!();
}

impl OptSumPool {
    fn eval_t<T: Copy + Datum + Sum + num_traits::Float>(
        &self,
        input: &Tensor,
        values_ptr: *mut T,
        geo: &ConcretePoolGeometry,
    ) -> TractResult<()>
    where
        usize: AsPrimitive<T>,
    {
        let input_ptr = input.as_ptr::<T>()?;

        let n = *geo.input_shape.n().unwrap_or(&1);
        let n_stride_i = geo.input_shape.n_stride().unwrap_or(&0);
        let n_stride_o = geo.output_shape.n_stride().unwrap_or(&0);
        unsafe {
            geo.patch.visit_output(|visitor| {
                let div: Option<T> = if self.normalize {
                    Some(
                        if self.count_include_pad {
                            geo.patch.standard_layout_data_field.len().as_()
                        } else {
                            visitor.valid_count().as_()
                        }
                        .recip(),
                    )
                } else {
                    None
                };
                for n in 0..n {
                    let input_offset = n * n_stride_i;
                    let output_offset = n * n_stride_o;
                    for c in 0..*geo.input_shape.c() {
                        let input_offset = input_offset + geo.input_shape.c_stride() * c;
                        let output_offset = output_offset + geo.output_shape.c_stride() * c;
                        let sum = visitor
                            .valid_offsets()
                            .map(|v| *input_ptr.offset(v + input_offset as isize))
                            .sum::<T>();

                        if let Some(div) = div {
                            *values_ptr.offset(output_offset as isize + visitor.output_offset) =
                                sum * div;
                        }
                    }
                }
            });
        }
        Ok(())
    }
}


================================================
FILE: core/src/ops/downsample/array.rs
================================================
use super::Downsample;
use crate::internal::*;
use crate::ops;

pub fn pull_downsample_over_slice(
    model: &TypedModel,
    slice_node: &TypedNode,
    slice_op: &ops::array::Slice,
    down_node: &TypedNode,
    down_op: &Downsample,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(down_op.axis == slice_op.axis);
    rule_if!(down_op.stride >= 0);
    let modulo = (down_op.modulo + slice_op.start.to_usize()?) % down_op.stride as usize;
    let left = (down_op.modulo + slice_op.start.to_usize()?) / down_op.stride as usize;
    let mut patch = TypedModelPatch::default();
    let tap = patch.tap_model(model, slice_node.inputs[0])?;
    let final_len = down_node.outputs[0].fact.shape[down_op.axis].clone();
    let new_down = Downsample::new(down_op.axis, down_op.stride, modulo);
    let ds = patch.wire_node(&*down_node.name, new_down, [tap].as_ref())?;
    let new_start = left;

    let new_end = final_len + left;
    let op = ops::array::Slice::new(slice_op.axis, new_start.to_dim(), new_end);
    let new_slice = patch.wire_node(&*slice_node.name, op, &ds)?[0];

    patch.shunt_outside(model, OutletId::new(down_node.id, 0), new_slice)?;
    Ok(Some(patch))
}

pub fn pull_downsample_over_axis_op(
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_op: &AxisOp,
    down_node: &TypedNode,
    down_op: &Downsample,
) -> TractResult<Option<TypedModelPatch>> {
    let mut patch = TypedModelPatch::default();
    let tap = patch.tap_model(model, axis_node.inputs[0])?;
    let mut new_down = down_op.clone();
    new_down.axis =
        axis_op.recip().transform_axis(down_op.axis).ok_or_else(|| format_err!("Invalid axis"))?;
    let wire = patch.wire_node(&*down_node.name, new_down, [tap].as_ref())?;
    let wire = patch.wire_node(&*axis_node.name, axis_op.clone(), &wire)?[0];
    patch.shunt_outside(model, OutletId::new(down_node.id, 0), wire)?;
    Ok(Some(patch))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ops;
    use proptest::prelude::*;
    use proptest::test_runner::TestCaseResult;

    fn crop_then_down_strat() -> BoxedStrategy<(usize, usize, usize, usize, usize)> {
        (1usize..5, 1usize..5)
            .prop_flat_map(|(cropped, stride)| {
                (Just(cropped), 0..=cropped, Just(stride), (cropped + 15)..=(cropped + 15))
            })
            .prop_flat_map(|(cropped, left, stride, len)| {
                (Just(len), Just(left), Just(cropped - left), Just(stride), 0..stride)
            })
            .boxed()
    }

    fn crop_then_down(
        len: usize,
        left: usize,
        right: usize,
        stride: usize,
        modulo: usize,
    ) -> TestCaseResult {
        let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
        let mut model = {
            let mut model = TypedModel::default();
            let input = model.add_source("input", i32::fact([len])).unwrap();
            let crop = model
                .wire_node(
                    "crop",
                    ops::array::Slice::new(0, left.to_dim(), (len - right).to_dim()),
                    &[input],
                )
                .unwrap();
            let down = model
                .wire_node("down", Downsample::new(0, stride as isize, modulo), &crop)
                .unwrap();
            model.select_output_outlets(&down).unwrap();
            model
        };
        trace!("{model:#?}");
        prop_assert!(model.node(model.output_outlets().unwrap()[0].node).op_is::<Downsample>());
        let input = tensor1(&(0i32..len as _).collect::<Vec<_>>());
        let expected =
            SimplePlan::new(model.clone()).unwrap().run(tvec!(input.clone().into())).unwrap();

        info!("Decluttering");
        model.declutter().unwrap();
        trace!("{model:#?}");
        let order = model.eval_order().unwrap();
        prop_assert!(
            model.node(order[1]).op_is::<Downsample>()
                || !model.nodes().iter().any(|n| n.op_is::<Downsample>())
        );
        let found = SimplePlan::new(model).unwrap().run(tvec!(input.into())).unwrap();
        prop_assert_eq!(found, expected);
        Ok(())
    }

    proptest! {
        #[test]
        fn crop_then_down_prop((len, left, right, stride, modulo) in crop_then_down_strat()) {
            crop_then_down(len, left, right, stride, modulo).unwrap()
        }
    }

    #[test]
    fn crop_then_down_1() {
        crop_then_down(1, 0, 0, 2, 0).unwrap()
    }

    #[test]
    fn crop_then_down_2() {
        crop_then_down(2, 0, 1, 2, 0).unwrap()
    }

    #[test]
    fn crop_then_down_5() {
        crop_then_down(16, 0, 1, 2, 1).unwrap()
    }
}


================================================
FILE: core/src/ops/downsample/conv.rs
================================================
use super::Downsample;
use crate::internal::*;
use crate::ops;

// trivial cases (sampling on N, mat-mul-as-conv) is handled by invariants
pub fn fuse_downsample_into_conv(
    model: &TypedModel,
    conv_node: &TypedNode,
    conv_op: &ops::cnn::conv::Conv,
    down_node: &TypedNode,
    down_op: &Downsample,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(down_op.stride >= 0);
    let input_fact = model.outlet_fact(conv_node.inputs[0])?;
    let input_shape = conv_op.pool_spec.data_format.shape(input_fact.shape.to_tvec())?;
    rule_if!(down_op.axis >= input_shape.h_axis());
    let geo_axis = down_op.axis - input_shape.h_axis();
    rule_if!(geo_axis < input_shape.rank());
    let mut new_conv = conv_op.clone();
    if new_conv.pool_spec.strides.is_none() {
        new_conv.pool_spec.strides = Some(tvec!(1; input_shape.hw_rank()));
    }
    new_conv.pool_spec.strides.as_mut().unwrap()[geo_axis] *= down_op.stride as usize;

    let mut patch = TypedModelPatch::default();
    let taps = patch.taps(model, &conv_node.inputs)?;
    let new_output = patch.wire_node(&*conv_node.name, new_conv, &taps)?[0];
    patch.shunt_outside(model, OutletId::new(down_node.id, 0), new_output)?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/downsample/mod.rs
================================================
use crate::internal::*;
use crate::ops;
use ndarray::prelude::*;

use super::identity::Identity;

mod array;
mod conv;
mod scan;

#[derive(Debug, Clone, new, Default, PartialEq, Eq, Hash)]
pub struct Downsample {
    pub axis: usize,
    pub stride: isize,
    pub modulo: usize,
}

impl Downsample {
    pub(crate) fn transform_dim(&self, input_dim: &TDim) -> TDim {
        (input_dim.clone() - self.modulo).div_ceil(self.stride.unsigned_abs() as u64)
    }

    pub(crate) fn transform_fact(&self, input_fact: &TypedFact) -> TractResult<TypedFact> {
        let mut downed = input_fact.clone();
        let down_len = self.transform_dim(&input_fact.shape[self.axis]);
        downed.shape.set(self.axis, down_len);
        if let Some(k) = downed.konst {
            let mut outputs = self.eval(tvec!(k.into_tvalue()))?;
            downed.konst = Some(outputs.remove(0).into_arc_tensor())
        }
        if cfg!(debug_assertions) {
            downed.consistent()?;
        }
        Ok(downed)
    }
}

impl Op for Downsample {
    fn name(&self) -> StaticName {
        "Downsample".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis:{} stride:{} modulo:{}", self.axis, self.stride, self.modulo)])
    }

    op_as_typed_op!();
}

impl EvalOp for Downsample {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        unsafe {
            let t = if self.modulo > input.shape()[self.axis] {
                let mut shape: TVec<usize> = input.shape().into();
                shape[self.axis] = 0;
                Tensor::uninitialized_dt(input.datum_type(), &shape)?
            } else {
                let slice = ndarray::Slice::new(self.modulo as isize, None, self.stride);
                unsafe fn do_slice<T: Datum>(
                    t: &Tensor,
                    axis: usize,
                    slice: ndarray::Slice,
                ) -> Tensor {
                    unsafe {
                        let dt = t.datum_type();
                        let mut t2 = t
                            .to_array_view_unchecked::<T>()
                            .slice_axis(Axis(axis), slice)
                            .into_owned()
                            .into_tensor();
                        t2.set_datum_type(dt);
                        t2
                    }
                }
                dispatch_datum_by_size!(do_slice(input.datum_type())(&*input, self.axis, slice))
            };
            Ok(tvec!(t.into_tvalue()))
        }
    }
}

impl TypedOp for Downsample {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(self.axis < inputs[0].rank());
        ensure!(
            self.modulo == 0 || self.stride > 0,
            "non-zero modulo is only defined with forward strides"
        );
        let mut downed = inputs[0].without_value();
        let down_len = self.transform_dim(&downed.shape[self.axis]);
        downed.shape.set(self.axis, down_len);
        Ok(tvec!(downed))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.stride == 1 {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs,
                Identity,
            )?));
        }
        pull_downsample_up(model, node)
            .with_context(|| format!("Pulling {} over {}", node, model.node(node.inputs[0].node)))
    }

    as_op!();
}

fn pull_downsample_up(
    model: &TypedModel,
    down_node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    model.check_consistency()?;
    let down_op = down_node.op_as::<Downsample>().unwrap();
    if let Some(prec) = model.linear_prec(down_node.id)? {
        let (input_facts, output_facts) = model.node_facts(prec.id)?;
        let axes_mapping = prec.op.axes_mapping(&input_facts, &output_facts)?;
        debug!("Consider pull {down_op:?} over {prec:?} (invariants: {axes_mapping:?})");
        if let Some(slice_op) = prec.op_as::<ops::array::Slice>() {
            if let Some(p) =
                array::pull_downsample_over_slice(model, prec, slice_op, down_node, down_op)?
            {
                return Ok(Some(p));
            }
        } else if let Some(other_op) = prec.op_as::<AxisOp>() {
            return array::pull_downsample_over_axis_op(model, prec, other_op, down_node, down_op);
        } else if let Some(conv_op) = prec.op_as::<ops::cnn::conv::Conv>() {
            return conv::fuse_downsample_into_conv(model, prec, conv_op, down_node, down_op);
        } else if let Some(other_op) = prec.op_as::<ops::scan::Scan>() {
            return scan::pull_downsample_over_scan(model, prec, other_op, down_node, down_op);
        }
        rule_if!(prec.outputs.len() <= 1 && prec.inputs.len() > 0);
        let axis_info = axes_mapping.axis((InOut::Out(0), down_op.axis))?;
        let mut patch = TypedModelPatch::default();
        let mut inputs = vec![];
        for (ix, (outlet, axis_info)) in prec.inputs.iter().zip(&axis_info.inputs).enumerate() {
            let mut wire = patch.tap_model(model, *outlet)?;
            if let &[axis] = &**axis_info {
                if !patch.outlet_fact(wire)?.shape[axis].is_one() {
                    let mut op = down_op.clone();
                    op.axis = axis;
                    wire = patch.wire_node(
                        format!("{}.{}-{}", down_node.name, prec.name, ix),
                        op,
                        &[wire],
                    )?[0];
                }
            } else {
                return Ok(None);
            }
            inputs.push(wire);
        }
        let other = patch.wire_node(&prec.name, prec.op.clone(), &inputs)?;
        patch.shunt_outside(model, OutletId::new(down_node.id, 0), other[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}


================================================
FILE: core/src/ops/downsample/scan.rs
================================================
use super::Downsample;
use crate::internal::*;
use crate::ops;
use crate::ops::identity::Identity;
use crate::ops::scan::*;

pub fn pull_downsample_over_scan(
    model: &TypedModel,
    scan_node: &TypedNode,
    scan_op: &ops::scan::Scan,
    down_node: &TypedNode,
    down_op: &Downsample,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(down_op.stride >= 0);

    // introduce downsample at end of body
    let mut downsampled_body = scan_op.body.clone();
    downsampled_body.check_consistency()?;
    let outputs = downsampled_body.output_outlets()?.to_owned();
    let downsample_outputs = outputs
        .into_iter()
        .enumerate()
        .map(|(ix, oo)| {
            Ok(downsampled_body.wire_node(
                format!("{}-{}", &down_node.name, ix),
                down_op.clone(),
                &[oo],
            )?[0])
        })
        .collect::<TractResult<Vec<_>>>()?;
    downsampled_body.select_output_outlets(&downsample_outputs)?;
    downsampled_body.declutter()?;
    downsampled_body.check_consistency()?;

    // check if downsample ops introduced at end have swimmed up to scan inputs during declutter
    for input in downsampled_body.input_outlets()? {
        let input = downsampled_body.node(input.node);
        rule_if!(
            input.outputs[0]
                .successors
                .iter()
                .all(|succ| downsampled_body.node(succ.node).op().dyn_eq(down_op))
        )
    }

    let inputs = downsampled_body.input_outlets()?.to_vec();
    for input in inputs {
        let node = &mut downsampled_body.node_mut(input.node);
        let fact = &mut node.outputs[0].fact;
        *fact = down_op.transform_fact(fact)?;
        node.op_as_mut::<crate::ops::source::TypedSource>().unwrap().fact = fact.clone();
        let downsamples = downsampled_body.node(input.node).outputs[0].successors.clone();
        for ds in downsamples {
            TypedModelPatch::replace_single_op(
                &downsampled_body,
                downsampled_body.node(ds.node),
                &downsampled_body.node(ds.node).inputs,
                Identity,
            )?
            .apply(&mut downsampled_body)?;
        }
    }

    downsampled_body.check_consistency()?;
    let inner_model = downsampled_body.into_decluttered()?;

    let mut new_scan = scan_op.clone();
    new_scan.body = inner_model;

    let mut patch = TypedModelPatch::default();
    let mut inputs = tvec!();
    for (slot, input) in &mut new_scan.input_mapping.iter_mut().enumerate() {
        match input {
            InputMapping::State => {
                let init = patch.tap_model(model, scan_node.inputs[slot])?;
                let ds = patch.wire_node(
                    format!("{}-{}", down_node.name, slot),
                    down_op.clone(),
                    &[init],
                )?[0];
                inputs.push(ds);
            }
            InputMapping::Scan(info) => {
                if info.chunk > 0 && !(info.chunk as usize).is_multiple_of(down_op.stride as usize)
                {
                    return Ok(None);
                }
                info.chunk = info.chunk.unsigned_abs().divceil(down_op.stride as usize) as isize
                    * info.chunk.signum();
                let tap = patch.tap_model(model, scan_node.inputs[slot])?;
                let ds = patch.wire_node(
                    format!("{}-{}", down_node.name, slot),
                    down_op.clone(),
                    &[tap],
                )?[0];
                inputs.push(ds);
            }
            _ => (),
        }
    }

    for output in &mut new_scan.output_mapping {
        if let Some(d) = output.full_dim_hint.as_mut() {
            *d = down_op.transform_dim(d)
        }
        if let Some((_slot, info)) = &mut output.scan {
            rule_if!((info.chunk as usize).is_multiple_of(down_op.stride as usize));
            info.chunk = info.chunk.unsigned_abs().divceil(down_op.stride as usize) as isize
                * info.chunk.signum()
        }
    }

    let scan = patch.wire_node(&*scan_node.name, new_scan, &inputs)?;
    for ix in 0..scan_node.outputs.len() {
        // FIXME need to check earlier on that all output are followed by a ds
        let succ = scan_node.outputs[ix].successors[0].node;
        patch.shunt_outside(model, OutletId::new(succ, 0), scan[ix])?;
    }
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/dummy.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Dummy;

impl Op for Dummy {
    fn name(&self) -> StaticName {
        "Dummy".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Dummy {
    fn is_stateless(&self) -> bool {
        false
    }

    fn eval(&self, _inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        bail!("eval() called on a Dummy op. This is a bug.")
    }
}

impl TypedOp for Dummy {
    as_op!();

    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!())
    }
}


================================================
FILE: core/src/ops/einsum/as_blas.rs
================================================
use tract_ndarray::Dimension;

use crate::transform::ModelTransform;
use crate::{broadcast, internal::*};
use std::fmt::Debug;

use super::prefix_matmul::{PrefixMatMul, rewrite_einsum_to_prefix_matmul};

#[derive(Debug, Default)]
pub struct AsBlas;

impl ModelTransform for AsBlas {
    fn name(&self) -> StaticName {
        "as_blas".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        rewrite_einsum_to_prefix_matmul(model, true)?;
        Rewriter::default()
            .with_rule_for("matmul-to-sgemm", matmul_to_sgemm)
            .rewrite(&(), model)?;
        Ok(())
    }
}

fn matmul_to_sgemm(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _node_name: &str,
    op: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    if !op.transpose_a
        && !op.transpose_b
        && !op.transpose_c
        && op.quantize_output.is_none()
        && model.node_input_facts(node.id)?.iter().all(|f| f.datum_type == f32::datum_type())
    {
        TypedModelPatch::replace_single_op(model, node, &node.inputs, SGemm::default()).map(Some)
    } else {
        Ok(None)
    }
}

#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct SGemm {}

impl Op for SGemm {
    fn name(&self) -> StaticName {
        "SGemm".into()
    }

    op_as_typed_op!();
}

impl SGemm {
    fn output_shape<D: DimLike>(&self, a: &[D], b: &[D]) -> TractResult<TVec<D>> {
        ensure!(a.len() == b.len());
        let a_rank = a.len();
        let b_rank = b.len();
        let m = a[a_rank - 2].clone();
        let n = b[b_rank - 1].clone();
        let mut c_shape = broadcast::multi_broadcast(&[&a[..a_rank - 2], &b[..b_rank - 2]])
            .context("Unable to broadcast")?;
        c_shape.push(m);
        c_shape.push(n);
        Ok(c_shape)
    }
}

impl EvalOp for SGemm {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (a, b) = args_2!(inputs);
        let a_ptr = a.as_ptr::<f32>()?;
        let b_ptr = b.as_ptr::<f32>()?;
        let c_shape = self.output_shape(a.shape(), b.shape())?;
        let rank = c_shape.len();
        let m = c_shape[rank - 2];
        let n = c_shape[rank - 1];
        let k = a.shape()[rank - 1];
        unsafe {
            let mut c = Tensor::uninitialized::<f32>(&c_shape)?;
            let c_ptr = c.as_ptr_mut::<f32>()?;
            let silent_a_axis = c.rank() - a.rank();
            let silent_b_axis = c.rank() - b.rank();
            for prefix in ndarray::indices(&c_shape[0..rank - 2]) {
                let mut a_ptr = a_ptr;
                let mut b_ptr = b_ptr;
                let mut c_ptr = c_ptr;
                for (axis, x) in prefix.as_array_view().iter().enumerate() {
                    if axis >= silent_a_axis && a.shape()[axis - silent_a_axis] != 1 {
                        a_ptr = a_ptr.offset(*x as isize * a.strides()[axis - silent_a_axis]);
                    }
                    if axis >= silent_b_axis && b.shape()[axis - silent_b_axis] != 1 {
                        b_ptr = b_ptr.offset(*x as isize * b.strides()[axis - silent_b_axis]);
                    }
                    c_ptr = c_ptr.offset(*x as isize * c.strides()[axis]);
                }
                if m == 1 {
                    cblas::sgemv(
                        cblas::Layout::RowMajor,
                        cblas::Transpose::Ordinary,
                        k as _,
                        n as _,
                        1.0,
                        std::slice::from_raw_parts(b_ptr, n * k),
                        n as _,
                        std::slice::from_raw_parts(a_ptr, k),
                        1,
                        0.0,
                        std::slice::from_raw_parts_mut(c_ptr, n),
                        1,
                    )
                } else if n == 1 {
                    cblas::sgemv(
                        cblas::Layout::RowMajor,
                        cblas::Transpose::None,
                        m as _,
                        k as _,
                        1.0,
                        std::slice::from_raw_parts(a_ptr, m * k),
                        k as _,
                        std::slice::from_raw_parts(b_ptr, k),
                        1,
                        0.0,
                        std::slice::from_raw_parts_mut(c_ptr, m),
                        1,
                    )
                } else {
                    cblas::sgemm(
                        cblas::Layout::RowMajor,
                        cblas::Transpose::None,
                        cblas::Transpose::None,
                        m as _,
                        n as _,
                        k as _,
                        1.0,
                        std::slice::from_raw_parts(a_ptr, m * k),
                        k as _,
                        std::slice::from_raw_parts(b_ptr, k * n),
                        n as _,
                        0.0,
                        std::slice::from_raw_parts_mut(c_ptr, m * n),
                        n as _,
                    )
                }
            }

            Ok(tvec!(c.into_tvalue()))
        }
    }
}

impl TypedOp for SGemm {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs[0].datum_type == f32::datum_type());
        ensure!(inputs[1].datum_type == f32::datum_type());
        Ok(tvec!(f32::fact(&self.output_shape(&inputs[0].shape, &inputs[1].shape)?)))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let fma = self.output_shape(&inputs[0].shape, &inputs[1].shape)?.iter().product::<TDim>()
            * inputs[0].shape.last().unwrap();
        Ok(tvec!((Cost::FMA(f32::datum_type()), fma)))
    }

    as_op!();
}


================================================
FILE: core/src/ops/einsum/einsum_matmul.rs
================================================
use std::fmt::Formatter;
use std::ops::Deref;

use tract_itertools::{izip, multiunzip};
use tract_linalg::block_quant::PackedBlockQuantFormat;
use tract_linalg::pack::PackedFormat;

use super::*;
use crate::ops::cast::cast;
use crate::ops::math::add;
use crate::ops::matmul::ModePicker;
use crate::ops::matmul::optimized::{
    AddMatMulGeometry, MapOutputAxisToInput, OptMatMul, ProtoFusedSpec,
};
use crate::ops::matmul::pack::{OptMatMulPack, OptSimpleMatMulPack};
use crate::ops::matmul::quant::{
    combine_scales, compensate_zero_points, requant, wire_ensure_q8_flavour,
};
use crate::ops::nn::{Reduce, Reducer};

pub fn merge_consecutive_same_role_axes(model: &mut TypedModel) -> TractResult<()> {
    Rewriter::default()
        .with_rule_for("merge-same-role-axes", merge_same_role_axes_rule)
        .rewrite(&(), model)
}

fn merge_same_role_axes_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &EinSum,
) -> TractResult<Option<TypedModelPatch>> {
    // Only handle 2-input EinSums (matmul-like)
    rule_if!(node.inputs.len() == 2);

    // Compute role signature for each axis: (in_input_0, in_input_1, in_output)
    type Role = (bool, bool, bool);
    let axes: Vec<(char, Role)> = op
        .axes
        .iter_all_axes()
        .map(|a| {
            (a.repr, (!a.inputs[0].is_empty(), !a.inputs[1].is_empty(), !a.outputs[0].is_empty()))
        })
        .collect();

    // For each input/output slot, get the axis order
    let a_order: Vec<char> = op.axes.axes(InOut::In(0)).map(|a| a.repr).collect();
    let b_order: Vec<char> = op.axes.axes(InOut::In(1)).map(|a| a.repr).collect();
    let c_order: Vec<char> = op.axes.axes(InOut::Out(0)).map(|a| a.repr).collect();

    // Find first group of 2+ same-role axes that are consecutive in all inputs.
    // Scan each input's axis order for runs of same-role axes.
    let role_map: std::collections::HashMap<char, Role> = axes.iter().cloned().collect();
    let mut best_group: Option<Vec<char>> = None;

    // Try each input order as the primary scan order
    let all_orders = [&a_order, &b_order];
    for (primary_idx, primary_order) in all_orders.iter().enumerate() {
        let mut i = 0;
        while i < primary_order.len() {
            let first = primary_order[i];
            let first_role = role_map[&first];
            let mut group = vec![first];
            let mut j = i + 1;
            while j < primary_order.len() {
                let candidate = primary_order[j];
                if role_map[&candidate] != first_role {
                    break;
                }
                // Check consecutive in the OTHER input too
                let consecutive_in_others = all_orders
                    .iter()
                    .enumerate()
                    .filter(|(idx, _)| *idx != primary_idx)
                    .all(|(_, order)| {
                        let positions: Vec<usize> = group
                            .iter()
                            .chain(std::iter::once(&candidate))
                            .filter_map(|c| order.iter().position(|x| x == c))
                            .collect();
                        if positions.len() <= 1 {
                            return true;
                        }
                        let mut sorted = positions.clone();
                        sorted.sort();
                        sorted == positions
                            && sorted.last().unwrap() - sorted.first().unwrap() == sorted.len() - 1
                    });
                if !consecutive_in_others {
                    break;
                }
                group.push(candidate);
                j += 1;
            }
            if group.len() >= 2 && best_group.as_ref().map_or(true, |bg| group.len() > bg.len()) {
                best_group = Some(group);
            }
            i = j;
        }
    }

    if let Some(group) = best_group {
        // Found a mergeable group. Emit the patch.
        let input_facts = model.node_input_facts(node.id)?;
        let input_shapes = op.actual_input_shapes_from_facts(&input_facts)?;
        let output_shape = super::eval::output_shape(&op.axes, &input_shapes)?;

        let drop_set: Vec<char> = group[1..].to_vec();

        let mut patch = TypedModelPatch::default();
        let mut wires: TVec<OutletId> = patch.taps(model, &node.inputs)?;

        // Reshape each input to merge the group
        for (slot, order) in [(0, &a_order), (1, &b_order)] {
            let positions: Vec<usize> =
                group.iter().filter_map(|c| order.iter().position(|x| x == c)).collect();
            if positions.len() < 2 {
                continue;
            }
            let start = positions[0];
            let from_dims: TVec<TDim> =
                positions.iter().map(|&p| input_shapes[slot][p].clone()).collect();
            let merged: TDim = from_dims.iter().product();
            wires[slot] = patch.wire_node(
                format!("{node_name}.merge_in{slot}"),
                AxisOp::Reshape(start, from_dims, tvec![merged]),
                &[wires[slot]],
            )?[0];
        }

        // If group axes aren't consecutive in C, reorder the EinSum output
        let c_positions: Vec<usize> =
            group.iter().filter_map(|c| c_order.iter().position(|x| x == c)).collect();
        let c_needs_reorder = c_positions.len() >= 2 && {
            let mut sorted = c_positions.clone();
            sorted.sort();
            sorted.last().unwrap() - sorted.first().unwrap() != sorted.len() - 1
                || sorted != c_positions
        };
        let mut adjusted_c_order = c_order.clone();
        if c_needs_reorder {
            // Move group axes together (put second next to first)
            for k in 1..c_positions.len() {
                let cur_pos = adjusted_c_order.iter().position(|&c| c == group[k]).unwrap();
                let target_pos =
                    adjusted_c_order.iter().position(|&c| c == group[k - 1]).unwrap() + 1;
                if cur_pos != target_pos {
                    let removed = adjusted_c_order.remove(cur_pos);
                    let insert_at = if cur_pos < target_pos { target_pos - 1 } else { target_pos };
                    adjusted_c_order.insert(insert_at, removed);
                }
            }
        }

        // Rebuild EinSum formula with adjusted output and dropped axes
        let in0: String = a_order.iter().collect();
        let in1: String = b_order.iter().collect();
        let out: String = adjusted_c_order.iter().collect();
        let expr = format!("{in0},{in1}->{out}");
        let mut new_axes: AxesMapping = expr.parse()?;
        for &drop in &drop_set {
            new_axes = new_axes.remove_axis(drop)?;
        }
        let new_op =
            EinSum { axes: new_axes, operating_dt: op.operating_dt, q_params: op.q_params };
        let mut result = patch.wire_node(node_name, new_op, &wires)?;

        // Reshape output to split the merged axis back
        let merged_c_positions: Vec<usize> =
            group.iter().filter_map(|c| adjusted_c_order.iter().position(|x| x == c)).collect();
        if merged_c_positions.len() >= 2 {
            let start = merged_c_positions[0];
            // Use original output dims for the group axes
            let original_c_positions: Vec<usize> =
                group.iter().filter_map(|c| c_order.iter().position(|x| x == c)).collect();
            let original_dims: TVec<TDim> =
                original_c_positions.iter().map(|&p| output_shape[p].clone()).collect();
            let merged: TDim = original_dims.iter().product();
            result[0] = patch.wire_node(
                format!("{node_name}.unmerge_out"),
                AxisOp::Reshape(start, tvec![merged], original_dims),
                &[result[0]],
            )?[0];
        }

        // Restore original output order if we reordered
        if c_needs_reorder {
            // After unmerge, axes are in adjusted_c_order (but with group expanded).
            // Need to permute back to c_order.
            // Build the unmerged adjusted order
            let mut unmerged_adj: Vec<char> = Vec::new();
            for &c in &adjusted_c_order {
                if c == group[0] {
                    unmerged_adj.extend(&group);
                } else if !group.contains(&c) {
                    unmerged_adj.push(c);
                }
            }
            // Find what moves are needed to get from unmerged_adj to c_order
            for target_pos in 0..c_order.len() {
                let cur_pos = unmerged_adj.iter().position(|&c| c == c_order[target_pos]).unwrap();
                if cur_pos != target_pos {
                    result[0] = patch.wire_node(
                        format!("{node_name}.restore_out_{target_pos}"),
                        AxisOp::Move(cur_pos, target_pos),
                        &[result[0]],
                    )?[0];
                    let removed = unmerged_adj.remove(cur_pos);
                    unmerged_adj.insert(target_pos, removed);
                }
            }
        }

        patch.shunt_outside(model, node.id.into(), result[0])?;
        return Ok(Some(patch));
    }

    // Second pass: look for same-role pairs separated by exactly one k-like axis
    // in a single input. Insert a MoveAxis to push the separator to the end.
    let k_role: Role = (true, true, false); // present in both inputs, absent from output
    let role_of = |c: char| axes.iter().find(|(ch, _)| *ch == c).map(|(_, r)| *r);

    for (slot, order) in [(0usize, &a_order), (1, &b_order)] {
        // Find three consecutive axes in this input where the outer two share a role
        // and the middle one is a k-axis
        for w in order.windows(3) {
            let (left, mid, right) = (w[0], w[1], w[2]);
            let left_role = role_of(left);
            let mid_role = role_of(mid);
            let right_role = role_of(right);
            if left_role != right_role || mid_role != Some(k_role) {
                continue;
            }
            // left and right must also be consecutive in other inputs
            // (output order is handled by the EinSum formula)
            let other_input_orders: Vec<&Vec<char>> = [(0, &a_order), (1, &b_order)]
                .iter()
                .filter(|(s, _)| *s != slot)
                .map(|(_, o)| *o)
                .collect();
            let consecutive_elsewhere = other_input_orders.iter().all(|order| {
                let lp = order.iter().position(|&c| c == left);
                let rp = order.iter().position(|&c| c == right);
                match (lp, rp) {
                    (Some(l), Some(r)) => r == l + 1,
                    _ => true, // one or both absent — no constraint
                }
            });
            if !consecutive_elsewhere {
                continue;
            }

            // Move the k-axis to the inner (last) position in inputs and
            // make left,right adjacent in the output too.
            let mid_pos = order.iter().position(|&c| c == mid).unwrap();
            let end_pos = order.len() - 1;
            if mid_pos == end_pos {
                continue;
            }

            // Use change_axes to update the EinSum formula for the input move
            let move_op = AxisOp::Move(mid_pos, end_pos);
            let Some(AxisChangeConsequence { substitute_op, .. }) =
                op.change_axes(model, node, InOut::In(slot), &move_op)?
            else {
                continue;
            };
            let mut current_op = *substitute_op
                .unwrap()
                .downcast::<EinSum>()
                .map_err(|_| anyhow!("expected EinSum"))?;

            // Also make left,right adjacent in the output if needed
            let new_c: Vec<char> = current_op.axes.axes(InOut::Out(0)).map(|a| a.repr).collect();
            let left_c = new_c.iter().position(|&c| c == left);
            let right_c = new_c.iter().position(|&c| c == right);
            let need_output_fix = matches!((left_c, right_c), (Some(l), Some(r)) if r != l + 1);
            if need_output_fix {
                let r_pos = right_c.unwrap();
                let l_pos = left_c.unwrap();
                let target = if r_pos < l_pos { l_pos } else { l_pos + 1 };
                if let Some(AxisChangeConsequence { substitute_op, .. }) = current_op.change_axes(
                    model,
                    node,
                    InOut::Out(0),
                    &AxisOp::Move(r_pos, target),
                )? {
                    current_op = *substitute_op
                        .unwrap()
                        .downcast::<EinSum>()
                        .map_err(|_| anyhow!("expected EinSum"))?;
                }
            }

            let mut patch = TypedModelPatch::default();
            let mut wires: TVec<OutletId> = patch.taps(model, &node.inputs)?;

            wires[slot] =
                patch.wire_node(format!("{node_name}.move_k_in{slot}"), move_op, &[wires[slot]])?
                    [0];

            let final_c: Vec<char> = current_op.axes.axes(InOut::Out(0)).map(|a| a.repr).collect();
            let mut result = patch.wire_node(node_name, current_op, &wires)?;

            // Restore original output order
            if need_output_fix {
                let r_cur = final_c.iter().position(|&c| c == right).unwrap();
                let r_orig = c_order.iter().position(|&c| c == right).unwrap();
                if r_cur != r_orig {
                    result[0] = patch.wire_node(
                        format!("{node_name}.restore_out"),
                        AxisOp::Move(r_cur, r_orig),
                        &[result[0]],
                    )?[0];
                }
            }

            patch.shunt_outside(model, node.id.into(), result[0])?;
            return Ok(Some(patch));
        }
    }

    Ok(None)
}

pub fn detect_all(model: &mut TypedModel) -> TractResult<()> {
    Rewriter::default().with_rule_for("detect-matmul-einsum", detect_rule).rewrite(&(), model)
}

pub fn flatten_all(model: &mut TypedModel) -> TractResult<()> {
    Rewriter::default().with_rule_for("flatten-matmul-einsum", flatten_rule).rewrite(&(), model)
}

#[derive(Clone, Hash, PartialEq, Eq)]
pub struct EinSumMatMul {
    pub op: EinSum,
    pub m_axis: char,
    pub k_axis: char,
    pub n_axis: char,
    pub m: TDim,
    pub k: TDim,
    pub n: TDim,
}

impl EinSumMatMul {
    pub fn m_axis(&self) -> &Axis {
        self.op.axes.axis(self.m_axis).unwrap()
    }
    pub fn k_axis(&self) -> &Axis {
        self.op.axes.axis(self.k_axis).unwrap()
    }
    pub fn n_axis(&self) -> &Axis {
        self.op.axes.axis(self.n_axis).unwrap()
    }
    pub fn a_m(&self) -> usize {
        self.m_axis().inputs[0][0]
    }
    pub fn a_k(&self) -> usize {
        self.k_axis().inputs[0][0]
    }
    pub fn b_k(&self) -> usize {
        self.k_axis().inputs[1][0]
    }
    pub fn b_n(&self) -> usize {
        self.n_axis().inputs[1][0]
    }
    pub fn c_m(&self) -> Option<usize> {
        self.m_axis().outputs[0].first().cloned()
    }
    pub fn c_n(&self) -> Option<usize> {
        self.n_axis().outputs[0].first().cloned()
    }

    fn new(
        op: EinSum,
        m_axis: char,
        k_axis: char,
        n_axis: char,
        m: TDim,
        k: TDim,
        n: TDim,
    ) -> Self {
        Self { op, m_axis, k_axis, n_axis, m, k, n }
    }
}

impl Debug for EinSumMatMul {
    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
        write!(
            f,
            "EinsumMatMul: {} {:?} m: {}={}; k: {}={}; n: {}={}",
            self.op.axes,
            self.op.operating_dt,
            self.m_axis,
            self.m,
            self.k_axis,
            self.k,
            self.n_axis,
            self.n
        )
    }
}

impl Deref for EinSumMatMul {
    type Target = EinSum;
    fn deref(&self) -> &Self::Target {
        &self.op
    }
}

impl Op for EinSumMatMul {
    fn name(&self) -> StaticName {
        "EinSumMatMul".into()
    }

    op_as_typed_op!();
}

impl EvalOp for EinSumMatMul {
    fn is_stateless(&self) -> bool {
        true
    }
    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        self.op.eval_with_session(node_id, session, inputs)
    }
}

impl TypedOp for EinSumMatMul {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        self.op.output_facts(inputs)
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        // deal with parametric quantization extra inputs
        if node.inputs.len() == 9 {
            ensure!(self.op.q_params.is_some());
            return dequant(model, node, self).map(Some);
        }
        ensure!(node.inputs.len() == 2);
        let (a, b) = model.node_input_facts(node.id)?.into_iter().collect_tuple().unwrap();
        // at this stage a and b must NOT be packed yet. if they are exotic, we can assume it's just compression
        let must_transpose = if let Some(of) = a.exotic_fact() {
            ensure!(of.is::<BlockQuantFact>());
            false
        } else if let Some(of) = b.exotic_fact() {
            ensure!(of.is::<BlockQuantFact>());
            true
        } else if self.m == self.n {
            false
        } else {
            match (self.m.as_i64(), self.n.as_i64()) {
                (Some(m), Some(n)) => m < n,
                (None, Some(n)) => n >= 8,
                (Some(_), _) => false,
                _ => (self.n.clone() - &self.m).prove_positive_or_zero(),
            }
        };
        if must_transpose {
            let mut op = self.clone();
            op.op.axes.iter_all_axes_mut().for_each(|axis| axis.inputs.swap(0, 1));
            std::mem::swap(&mut op.m_axis, &mut op.n_axis);
            std::mem::swap(&mut op.m, &mut op.n);
            return TypedModelPatch::replace_single_op(
                model,
                node,
                &[node.inputs[1], node.inputs[0]],
                op,
            )
            .map(|p| Some(p.with_context("transposing")));
        }
        // opt mat mul assumes we have at least one m or n
        if self.c_m().is_some() || self.c_n().is_some() {
            return optimized_mat_mul(model, node, self)
                .map(|opt| opt.map(|p| p.with_context("optimizing")));
        }
        Ok(None)
    }

    as_op!();
}

pub(crate) fn detect_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _name: &str,
    op: &EinSum,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(node.inputs.len() == (2 + op.q_params.is_some() as usize * 7));
    let input_facts = model.node_input_facts(node.id)?;
    let input_shapes = op.actual_input_shapes_from_facts(&input_facts)?;
    let output_shape = super::eval::output_shape(&op.axes, &input_shapes)?;
    let k_axes: TVec<&Axis> = op
        .axes
        .iter_all_axes()
        // Filter possible candidates (should be one time in each inputs but not in output)
        .filter(|a| a.inputs[0].len() == 1 && a.inputs[1].len() == 1 && a.outputs[0].is_empty())
        .collect();

    let non_trivial_k_axis = k_axes
        .iter()
        .filter(|a| {
            !input_shapes[0][a.inputs[0][0]].is_one() || !input_shapes[1][a.inputs[1][0]].is_one()
        })
        .copied()
        .collect::<TVec<_>>();

    let k_axis = if non_trivial_k_axis.len() > 1 {
        return regroup_k_axes(op, model, node, non_trivial_k_axis);
    } else {
        non_trivial_k_axis.first().or_else(|| k_axes.first()).copied()
    };
    let Some(k_axis) = k_axis else { return inject_k_axis(op, model, node).map(Some) };

    let mut possible_m_axes: Vec<_> = op
        .axes
        .iter_all_axes()
        .filter(|a| {
            a.inputs[0].len() == 1
                && (a.inputs[1].is_empty() || input_shapes[1][a.inputs[1][0]].is_one())
                && (a.outputs[0].len() == 1
                    || (input_shapes[0][a.inputs[0][0]].is_one() && a.inputs[1].is_empty()))
        })
        .collect();

    // Prioritize obvious m-axes
    if possible_m_axes.iter().any(|a| !a.outputs[0].is_empty()) {
        possible_m_axes.retain(|a| !a.outputs[0].is_empty());
    }

    let m_axis = possible_m_axes
        .into_iter()
        .max_by_key(|a| input_shapes[0][a.inputs[0][0]].as_i64().unwrap_or(i64::MAX));

    let Some(m_axis) = m_axis else {
        return inject_m_or_n_axis(op, model, node, false).map(Some);
    };

    let n_axis = op
        .axes
        .iter_all_axes()
        .filter(|a| {
            (a.inputs[0].is_empty() || input_shapes[0][a.inputs[0][0]].is_one())
                && a.inputs[1].len() == 1
                && a.outputs[0].len() == 1
                && *a != m_axis
        })
        .max_by_key(|a| input_shapes[1][a.inputs[1][0]].as_i64().unwrap_or(i64::MAX));
    let Some(n_axis) = n_axis else {
        return inject_m_or_n_axis(op, model, node, true).map(Some);
    };
    for axis in op.axes.iter_all_axes() {
        let one = TDim::one();
        let in_left =
            axis.inputs[0].first().map(|pos| &input_shapes[0][*pos]).unwrap_or(&one) != &one;
        let in_right =
            axis.inputs[1].first().map(|pos| &input_shapes[1][*pos]).unwrap_or(&one) != &one;
        let in_out = axis.outputs[0].first().map(|pos| &output_shape[*pos]).unwrap_or(&one) != &one;
        if (in_left ^ in_right) && !in_out {
            return Ok(None);
            // return Ok(AxesOrPatch::NotAMatMul(
            //     "non trivial single-side disappearing axis",
            //     vec![axis],
            // ));
        }
    }
    let m = input_shapes[0][m_axis.inputs[0][0]].clone();
    let k = input_shapes[0][k_axis.inputs[0][0]].clone();
    let n = input_shapes[1][n_axis.inputs[1][0]].clone();
    TypedModelPatch::replace_single_op(
        model,
        node,
        &node.inputs,
        EinSumMatMul::new(op.clone(), m_axis.repr, k_axis.repr, n_axis.repr, m, k, n),
    )
    .map(Some)
}

pub(super) fn inject_k_axis(
    op: &EinSum,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<TypedModelPatch> {
    let mut new_axes = op.axes.clone();
    let name = &node.name;
    let mut patch = TypedModelPatch::new("inject k axis");
    let mut wire = patch.taps(model, &node.inputs)?;
    let repr = new_axes.available_label();
    new_axes = new_axes.with_extra_axis(repr, InOut::In(0), 0)?.with_extra_axis_occurency(
        repr,
        InOut::In(1),
        0,
    )?;
    wire[0] = patch.wire_node(format!("{name}.add_k.0"), AxisOp::Add(0), &[wire[0]])?[0];
    wire[1] = patch.wire_node(format!("{name}.add_k.1"), AxisOp::Add(0), &[wire[1]])?[0];
    wire = patch.wire_node(&node.name, EinSum { axes: new_axes, ..op.clone() }, &wire)?;
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(patch)
}

pub(super) fn regroup_k_axes(
    op: &EinSum,
    model: &TypedModel,
    node: &TypedNode,
    mut k_axes: TVec<&Axis>,
) -> TractResult<Option<TypedModelPatch>> {
    let input_facts = model.node_input_facts(node.id)?;
    let input_shapes = op.actual_input_shapes_from_facts(&input_facts)?;
    let contig_in_a = k_axes
        .iter()
        .map(|axis| axis.inputs[0][0])
        .sorted()
        .tuple_windows()
        .all(|(a, b)| a + 1 == b);
    if contig_in_a {
        k_axes.sort_by_key(|ax| ax.inputs[0][0]);
    } else {
        k_axes.sort_by_key(|ax| ax.inputs[1][0]);
    }
    let k_dims: TVec<_> =
        k_axes.iter().map(|ax| input_shapes[0][ax.inputs[0][0]].clone()).collect();
    let k: TDim = k_dims.iter().product();
    let mut patch = TypedModelPatch::default();
    let mut wires = patch.taps(model, &node.inputs)?;
    let mut exprs: Vec<String> =
        (0..2).map(|slot| op.axes.axes(InOut::In(slot)).map(|ax| ax.repr).join("")).collect();
    for slot in 0..2 {
        if k_axes.iter().map(|ax| ax.inputs[slot][0]).tuple_windows().any(|(a, b)| a + 1 != b) {
            let after = op
                .axes
                .axes(InOut::In(slot))
                .filter(|ax| !k_axes.contains(ax))
                .chain(k_axes.iter().copied())
                .map(|ax| ax.repr)
                .join("");
            let transpose =
                AxesMapping::from_strs(&[&exprs[slot]], &[&after])?.translate_to_axis_ops()?;
            for (ix, op) in transpose.into_iter().enumerate() {
                wires[slot] = patch.wire_node(
                    format!("{}.transpose_input_{}.{}", &node.name, slot, ix),
                    op,
                    &[wires[slot]],
                )?[0];
            }
            exprs[slot] = after;
        }
        let pos = exprs[slot].chars().position(|c| k_axes[0].repr == c).unwrap();
        wires[slot] = patch.wire_node(
            format!("{}.fold_k_in_input_{}", &node.name, slot),
            AxisOp::Reshape(pos, k_dims.clone(), tvec!(k.clone())),
            &[wires[slot]],
        )?[0];
        exprs[slot] =
            exprs[slot].chars().filter(|c| !k_axes.iter().any(|k| k.repr == *c)).collect();
        exprs[slot].insert(pos, k_axes[0].repr);
    }
    let old = op.axes.to_string();
    let (iexpr, oexpr) = old.split_once("->").unwrap();
    let mut expr: String = exprs.iter().join(",");
    if node.inputs.len() > 2 {
        expr = expr + "," + &iexpr.split(",").skip(2).join(",");
    }
    expr = expr + "->" + oexpr;
    let wire = patch.wire_node(
        &node.name,
        EinSum { axes: expr.parse().unwrap(), ..op.clone() },
        &wires,
    )?[0];
    patch.shunt_outside(model, node.id.into(), wire)?;
    Ok(Some(patch))
}

pub(super) fn inject_m_or_n_axis(
    op: &EinSum,
    model: &TypedModel,
    node: &TypedNode,
    is_n: bool,
) -> TractResult<TypedModelPatch> {
    let input_to_fix = is_n as usize;
    let label = if is_n { "n" } else { "m" };
    let name = &node.name;
    let mut patch = TypedModelPatch::new("Injecting m or n axis");
    let mut wire = patch.taps(model, &node.inputs)?;
    let repr = op.axes.available_label();
    let new_axes = op
        .axes
        .clone()
        .with_extra_axis(repr, InOut::In(input_to_fix), 0)?
        .with_extra_axis_occurency(repr, InOut::Out(0), 0)?;
    wire[input_to_fix] =
        patch.wire_node(format!("{name}.add_{label}"), AxisOp::Add(0), &[wire[input_to_fix]])?[0];
    wire = patch.wire_node(name, EinSum { axes: new_axes, ..op.clone() }, &wire)?;
    wire = patch.wire_node(&node.name, AxisOp::Rm(0), &wire)?;
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(patch)
}

fn wire_axes_fix(
    patch: &mut TypedModelPatch,
    name: &str,
    var: &str,
    mapping: &AxesMapping,
    mut outlet: TVec<OutletId>,
) -> TractResult<TVec<OutletId>> {
    for (ix, axis_op) in mapping.translate_to_axis_ops()?.into_iter().enumerate() {
        outlet = patch.wire_node(format!("{name}.fix_{var}.{ix})"), axis_op, &outlet)?;
    }
    Ok(outlet)
}

fn dequant(
    model: &TypedModel,
    node: &TypedNode,
    op: &EinSumMatMul,
) -> TractResult<TypedModelPatch> {
    let name = &node.name;
    let mut patch = TypedModelPatch::new("Dequantizing einsum");

    let k_axis = op.k_axis();

    let mut taps = patch.taps(model, &node.inputs)?;
    for ab in [0, 1] {
        let scale_input = 4 + ab * 2;
        if !patch.outlet_fact(taps[scale_input])?.shape.volume().is_one() {
            let q_axis_in_output = op.axes.axis((InOut::In(scale_input), 0))?.outputs[0][0];
            let output_rank = node.outputs[0].fact.rank();
            for i in 1..(output_rank - q_axis_in_output) {
                taps[scale_input] = patch.wire_node(
                    format!("{name}.scale_input{ab}_axis_fix_{i}"),
                    AxisOp::Add(i),
                    &[taps[scale_input]],
                )?[0];
            }
        }
    }

    let [mut a, mut b, bias, mut a0, a_scale, mut b0, b_scale, c0, c_scale] = *taps else {
        bail!("Expect exactly 9 inputs")
    };

    wire_ensure_q8_flavour(&mut patch, &node.name, &mut a, "a", &mut a0, i8::datum_type())?;
    wire_ensure_q8_flavour(&mut patch, &node.name, &mut b, "b", &mut b0, i8::datum_type())?;

    let mut output = patch.wire_node(
        &node.name,
        EinSum {
            q_params: None,
            axes: op.axes.extract_sub_mapping(&[0, 1], &[0])?,
            operating_dt: op.operating_dt,
        },
        &[a, b],
    )?;

    let a_i32 = patch.wire_node(format!("{name}.a_as_i32"), cast(i32::datum_type()), &[a])?[0];
    let b_i32 = patch.wire_node(format!("{name}.b_as_i32"), cast(i32::datum_type()), &[b])?[0];
    let sum_a = patch.wire_node(
        format!("{name}.sum_a"),
        Reduce::new(tvec!(k_axis.inputs[0][0]), Reducer::Sum),
        &[a_i32],
    )?;
    let sum_b = patch.wire_node(
        format!("{name}.sum_b"),
        Reduce::new(tvec!(k_axis.inputs[1][0]), Reducer::Sum),
        &[b_i32],
    )?;

    let sum_a =
        wire_axes_fix(&mut patch, name, "sum_a", &op.axes.extract_sub_mapping(&[0], &[0])?, sum_a)?;
    let sum_b =
        wire_axes_fix(&mut patch, name, "sum_b", &op.axes.extract_sub_mapping(&[1], &[0])?, sum_b)?;
    let bias = tvec!(bias);
    let bias =
        wire_axes_fix(&mut patch, name, "bias", &op.axes.extract_sub_mapping(&[2], &[0])?, bias)?;

    let abc_scale = combine_scales(&mut patch, name, a_scale, b_scale, c_scale)?;

    output = patch.wire_node(format!("{name}.add_bias"), add(), &[output[0], bias[0]])?;

    let k = model.outlet_fact(node.inputs[0])?.shape[k_axis.inputs[0][0]].clone();
    let output = compensate_zero_points(&mut patch, name, output[0], k, a0, b0, sum_a[0], sum_b[0])
        .context("Zero point compensation")?;
    let output = requant(&mut patch, name, output, op.q_params.unwrap(), abc_scale, c0)?;
    patch.shunt_outside(model, node.id.into(), output)?;
    Ok(patch)
}

fn flatten_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _name: &str,
    op: &EinSumMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    TypedModelPatch::replace_single_op(model, node, &node.inputs, op.op.clone()).map(Some)
}

fn optimized_mat_mul(
    model: &TypedModel,
    node: &TypedNode,
    op: &EinSumMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    let (mode_picker, left_pack, impls) = kernel_selection::strategize(model, node, op)?;
    let input_facts = model.node_input_facts(node.id)?;
    let input_shapes = op.actual_input_shapes_from_facts(&input_facts)?;
    let prefix = &node.name;

    let mut patch = TypedModelPatch::new("Einsum to OptMatMul");
    let taps = patch.taps(model, &node.inputs)?;
    let name = &node.name;

    let pack_a: Box<dyn TypedOp> = if input_facts[0].konst.is_some() {
        if let Some(pf) = left_pack.downcast_ref::<PackedFormat>() {
            Box::new(OptMatMulPack {
                packers: vec![pf.clone()],
                mode_picker: ModePicker::Single,
                k_axis: op.a_k(),
                mn_axis: op.a_m(),
            })
        } else if let Some(packed_format) =
            left_pack.downcast_ref::<PackedBlockQuantFormat>().cloned()
        {
            Box::new(OptSimpleMatMulPack {
                packed_format,
                k: input_shapes[0][op.a_k()].to_usize().unwrap(),
                m: input_shapes[0][op.a_m()].to_usize().unwrap(),
            })
        } else {
            bail!("Unexpected static input format {left_pack:?}");
        }
    } else {
        Box::new(OptMatMulPack {
            packers: impls
                .iter()
                .map(|(mmm, p, pe)| {
                    pe.as_ref()
                        .map(|pe| &pe.from)
                        .unwrap_or(&mmm.packings()[*p].0)
                        .downcast_ref::<PackedFormat>()
                        .unwrap()
                        .clone()
                })
                .collect(),
            mode_picker: mode_picker.clone(),
            k_axis: op.a_k(),
            mn_axis: op.a_m(),
        })
    };
    let pa = patch.wire_node(format!("{prefix}.pack_a"), pack_a, &[taps[0]])?[0];

    let pb = patch.wire_node(
        format!("{prefix}.pack_b"),
        OptMatMulPack {
            k_axis: op.b_k(),
            mn_axis: op.b_n(),
            packers: impls
                .iter()
                .map(|(mmm, p, _)| {
                    mmm.packings()[*p].1.downcast_ref::<PackedFormat>().unwrap().clone()
                })
                .collect(),
            mode_picker: mode_picker.clone(),
        },
        &[taps[1]],
    )?[0];

    let mut c_to_a_axis_mapping = tvec!();
    let mut c_to_b_axis_mapping = tvec!();
    for axis in op
        .op
        .axes
        .iter_all_axes()
        .filter(|&axis| ![op.m_axis, op.k_axis, op.n_axis].contains(&axis.repr))
    {
        if let (&[c], &[a]) = (&*axis.outputs[0], &*axis.inputs[0])
            && input_shapes[0][a] != 1.to_dim()
        {
            let a = a - (a > op.a_m()) as usize - (a > op.a_k()) as usize;
            c_to_a_axis_mapping.push((c, a));
        }
        if let (&[c], &[b]) = (&*axis.outputs[0], &*axis.inputs[1])
            && input_shapes[1][b] != 1.to_dim()
        {
            let b = b - (b > op.b_n()) as usize - (b > op.b_k()) as usize;
            c_to_b_axis_mapping.push((c, b));
        }
    }

    let c_fact = op.output_facts(&input_facts)?.remove(0);
    let geo = AddMatMulGeometry {
        k: op.k.clone(),
        c_to_a_axis_mapping: MapOutputAxisToInput(c_to_a_axis_mapping),
        c_to_b_axis_mapping: MapOutputAxisToInput(c_to_b_axis_mapping),
    };
    let (mmms, packings, extractor): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(impls);
    let outputs = mmms.iter().map(|mmm| unsafe { mmm.c_view(op.c_m(), op.c_n()) }).collect();
    let trivial_packing = mmms.len() == 1
        && packings[0] == 0
        && extractor[0].is_none()
        && input_facts[0].exotic_fact.is_none();
    let opt = OptMatMul::new(
        mmms,
        mode_picker,
        c_fact,
        op.c_m(),
        op.c_n(),
        vec![
            ProtoFusedSpec::AddMatMul {
                geo,
                a: 0,
                b: 1,
                packings: izip!(packings, extractor).collect_vec(),
            },
            ProtoFusedSpec::Store(outputs),
        ],
        trivial_packing,
    )
    .context("Creating OptMatMul")?;
    let output = patch.wire_node(name, opt, &[pa, pb])?[0];
    patch.shunt_outside(model, node.id.into(), output)?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/einsum/eval.rs
================================================
use super::AxesMapping;
use crate::internal::*;
use ndarray::{ArrayViewD, Zip};
use tract_data::itertools::Itertools;
use tract_linalg::block_quant::{BlockQuantStorage, block_quant_slice};
use tract_ndarray::{Axis, Dimension};
use tract_num_traits::{One, Zero};

pub fn output_shape<D: DimLike>(
    expr: &AxesMapping,
    inputs: &[impl AsRef<[D]>],
) -> TractResult<TVec<D>> {
    Ok(expr
        .iter_all_axes()
        .filter(|a| a.outputs[0].len() > 0)
        .sorted_by_key(|axis| axis.outputs[0][0])
        .map(|axis| {
            axis.inputs[0..inputs.len()]
                .iter()
                .enumerate()
                .flat_map(|(input_id, positions)| {
                    positions.iter().map(move |p| inputs[input_id].as_ref()[*p].clone())
                })
                .find(|x| x != &1.into())
                .unwrap_or_else(|| 1.into())
        })
        .collect())
}

pub fn dequant_inputs(acc: DatumType, input: TVec<TValue>) -> TractResult<TVec<TValue>> {
    input
        .into_iter()
        .map(|i| {
            if i.is_plain() && i.datum_type().is_number() {
                Ok(i)
            } else {
                let s = i.shape();
                let k = *s.last().unwrap();
                // Leading dims are group/batch dims; last two are [M, K]
                let num_groups: usize =
                    if s.len() > 2 { s[..s.len() - 2].iter().product() } else { 1 };
                let m_per_group: usize = if s.len() >= 2 { s[s.len() - 2] } else { 1 };
                let bqs = i.try_storage_as::<BlockQuantStorage>()?;
                let mut unpacked: Vec<Tensor> = if acc.is::<f16>() {
                    (0..num_groups)
                        .map(|g| {
                            let slice =
                                block_quant_slice(bqs.value(), bqs.format(), m_per_group, k, g);
                            bqs.format().dequant_f16(slice)
                        })
                        .collect::<TractResult<_>>()?
                } else if acc.is::<f32>() {
                    (0..num_groups)
                        .map(|g| {
                            let slice =
                                block_quant_slice(bqs.value(), bqs.format(), m_per_group, k, g);
                            bqs.format().dequant_f32(slice)
                        })
                        .collect::<TractResult<_>>()?
                } else {
                    bail!(
                        "Only f32 and f16 accumulators are compatible with BlockQuantValue inputs"
                    );
                };
                unpacked.iter_mut().try_for_each(|t| t.insert_axis(0))?;
                let stacked = if unpacked.len() > 1 {
                    Tensor::stack_tensors(0, &unpacked)?
                } else {
                    unpacked.into_iter().next().unwrap()
                };
                Ok(stacked.into_shape(s)?.into_tvalue())
            }
        })
        .collect::<TractResult<TVec<TValue>>>()
}

pub fn eval_t<Acc: Datum + Zero + One>(
    expr: &AxesMapping,
    inputs: TVec<TValue>,
) -> TractResult<Tensor> {
    let inputs = dequant_inputs(Acc::datum_type(), inputs)?;
    let shapes: TVec<_> = inputs.iter().map(|t| t.shape()).collect();
    let output_shape = output_shape(expr, &shapes)?;
    let inputs: TVec<Cow<Tensor>> =
        inputs.iter().map(|t| t.cast_to::<Acc>()).collect::<TractResult<_>>()?;
    let inputs: TVec<tract_ndarray::ArrayViewD<Acc>> =
        inputs.iter().map(|t| t.to_plain_array_view::<Acc>()).collect::<TractResult<_>>()?;
    let summing_axes: TVec<_> = expr
        .iter_all_axes()
        .filter(|a| {
            a.outputs[0].len() == 0 && a.inputs[0..inputs.len()].iter().any(|i| i.len() > 0)
        })
        .collect();
    let summing_shape: TVec<usize> = summing_axes
        .iter()
        .map(|axis| {
            axis.inputs
                .iter()
                .take(inputs.len())
                .enumerate()
                .find_map(|(input_id, positions)| {
                    if positions.len() > 0 {
                        Some(inputs[input_id].shape()[positions[0]])
                    } else {
                        None
                    }
                })
                .unwrap()
        })
        .collect();
    let output = tract_ndarray::ArrayD::<Acc>::from_shape_fn(&*output_shape, |coords| {
        let coords = coords.as_array_view();
        let mut views = inputs.clone();
        for (axis, x) in expr
            .iter_all_axes()
            .filter(|a| a.outputs[0].len() > 0)
            .sorted_by_key(|axis| axis.outputs[0][0])
            .zip(coords)
        {
            for (input_id, input_axis_positions) in axis.inputs[0..inputs.len()].iter().enumerate()
            {
                for position in input_axis_positions {
                    let x = if views[input_id].shape()[*position] == 1 { 0 } else { *x };
                    views[input_id]
                        .slice_axis_inplace(tract_ndarray::Axis(*position), (x..=x).into());
                }
            }
        }
        let mut sum: Acc = Acc::zero();
        for sum_coords in tract_ndarray::indices(&*summing_shape) {
            let mut views = views.clone();
            let sum_coords = sum_coords.as_array_view();
            for (axis, x) in summing_axes.iter().zip(sum_coords) {
                for (input_id, input_axis_positions) in
                    axis.inputs.iter().take(inputs.len()).enumerate()
                {
                    for position in input_axis_positions {
                        views[input_id].slice_axis_inplace(Axis(*position), (*x..=*x).into())
                    }
                }
            }
            let mut product = Acc::one();
            for v in &views {
                debug_assert_eq!(v.len(), 1);
                product = product * v.iter().next().unwrap().clone();
            }
            sum = sum + product;
        }
        sum
    });
    Ok(output.into_tensor())
}

pub fn eval_q(expr: &AxesMapping, qp: DatumType, inputs: TVec<TValue>) -> TractResult<Tensor> {
    fn reshape_param<'a>(
        expr: &AxesMapping,
        data_slot: InOut,
        qp: &'a Tensor,
        qp_slot: InOut,
    ) -> TractResult<ArrayViewD<'a, f32>> {
        if qp.rank() == 0 {
            qp.try_as_plain()?.to_array_view()
        } else {
            let data_rank = expr.rank(data_slot);

            // Handle case where axis is not present in input (qp.len is necessarily 1)
            let pos_in_input =
                expr.axis((qp_slot, 0))?.interface(data_slot).first().cloned().unwrap_or(0);

            let mut shape = vec![1; data_rank];
            shape[pos_in_input] = qp.len();
            Ok(qp.try_as_plain()?.to_array_view()?.into_shape_with_order(shape)?)
        }
    }
    let [a, b, bias, a0, a_scale, b0, b_scale, c0, c_scale] = &*inputs else {
        bail!("Expect exactly 9 inputs")
    };

    let mut a = a.cast_to::<i32>()?.cast_to::<f32>()?.into_owned();
    let mut b = b.cast_to::<i32>()?.cast_to::<f32>()?.into_owned();

    let a0 = a0.cast_to::<f32>()?;
    let b0 = b0.cast_to::<f32>()?;
    let c0 = c0.cast_to::<f32>()?;
    let a_scale = a_scale.cast_to::<f32>()?;
    let b_scale = b_scale.cast_to::<f32>()?;
    let c_scale = c_scale.cast_to::<f32>()?;
    let bias = bias.cast_to::<f32>()?;
    ensure!(a0.rank() < 2);
    ensure!(b0.rank() < 2);
    ensure!(c0.rank() < 2);
    ensure!(a_scale.rank() < 2);
    ensure!(b_scale.rank() < 2);
    ensure!(c_scale.rank() < 2);
    ensure!(bias.rank() < 2);

    Zip::from(a.to_plain_array_view_mut::<f32>()?)
        .and_broadcast(reshape_param(expr, InOut::In(0), &a0, InOut::In(3))?)
        .and_broadcast(reshape_param(expr, InOut::In(0), &a_scale, InOut::In(4))?)
        .for_each(|a, a0, a_scale| *a = a_scale * (*a - a0));

    Zip::from(b.to_plain_array_view_mut::<f32>()?)
        .and_broadcast(reshape_param(expr, InOut::In(1), &b0, InOut::In(5))?)
        .and_broadcast(reshape_param(expr, InOut::In(1), &b_scale, InOut::In(6))?)
        .for_each(|b, b0, b_scale| *b = b_scale * (*b - b0));

    let mut output =
        eval_t::<f32>(expr, tvec!(a.into_tvalue(), b.into_tvalue()))?.into_plain_array::<f32>()?;

    Zip::from(&mut output)
        .and_broadcast(reshape_param(expr, InOut::Out(0), &bias, InOut::In(2))?)
        .and_broadcast(reshape_param(expr, InOut::Out(0), &c0, InOut::In(7))?)
        .and_broadcast(reshape_param(expr, InOut::Out(0), &c_scale, InOut::In(8))?)
        .and_broadcast(reshape_param(expr, InOut::Out(0), &a_scale, InOut::In(4))?)
        .and_broadcast(reshape_param(expr, InOut::Out(0), &b_scale, InOut::In(6))?)
        .for_each(|c, bias, c0, c_scale, a_scale, b_scale| {
            *c = ((*c + bias * a_scale * b_scale) / c_scale + c0).round()
        });

    if qp.unquantized() == i8::datum_type() {
        output.mapv_inplace(|x| x.clamp(i8::MIN as _, i8::MAX as _))
    } else if qp.unquantized() == u8::datum_type() {
        output.mapv_inplace(|x| x.clamp(u8::MIN as _, u8::MAX as _))
    }
    Ok(output.into_tensor().cast_to::<i32>()?.cast_to_dt(qp)?.into_owned())
}


================================================
FILE: core/src/ops/einsum/kernel_selection.rs
================================================
#![allow(clippy::type_complexity)]

use dyn_clone::clone_box;
use dyn_eq::DynEq;
use tract_itertools::Itertools;
use tract_linalg::WeightType;
use tract_linalg::block_quant::BlockQuantFact;
use tract_linalg::mmm::{ImplementationQuality, MMMInputFormat, MatMatMul, PanelExtractor};

use crate::internal::*;
use crate::ops::matmul::ModePicker;

use super::einsum_matmul::EinSumMatMul;

pub type Impl = (Box<dyn MatMatMul>, usize, Option<PanelExtractor>);
pub type Strat = (ModePicker, Box<dyn MMMInputFormat>, Vec<Impl>);

fn single_strat(it: Impl) -> Strat {
    (ModePicker::Single, it.0.packings()[it.1].0.clone(), vec![it])
}

pub fn strategize(model: &TypedModel, node: &TypedNode, op: &EinSumMatMul) -> TractResult<Strat> {
    let input_facts = model.node_input_facts(node.id)?;
    if let (Some(m), Some(k), Some(n)) = (op.m.as_i64(), op.k.as_i64(), op.n.as_i64())
        && input_facts[0].is_plain()
        && input_facts[1].is_plain()
        && op.op.operating_dt == input_facts[0].datum_type
        && op.op.operating_dt == input_facts[1].datum_type
        && let Some(mmm) = tract_linalg::ops().mmm(
            op.operating_dt,
            Some(m as usize),
            Some(k as usize),
            Some(n as usize),
        )
        && mmm.quality() == ImplementationQuality::ManuallyOptimized
    {
        return Ok((ModePicker::Single, mmm.packings()[0].0.clone(), vec![(mmm, 0, None)]));
    };

    let mut impls = list_impls(model, node, op)?;
    ensure!(impls.len() > 0);
    fn score(mmm: &dyn MatMatMul) -> isize {
        -(mmm.quality().cost() as isize * 1000) + mmm.dynamic_boost()
    }
    let wanted_quality = impls.iter().map(|(mmm, _, _)| score(&**mmm)).max().unwrap();
    impls.retain(|(mmm, _, _)| score(&**mmm) == wanted_quality);
    if impls.len() == 1 {
        return Ok(single_strat(impls.remove(0)));
    }
    if op.n.is_one() {
        let it =
            impls.into_iter().max_by_key(|(m, _, pe)| (m.nr() == 1, pe.is_none(), m.mr())).unwrap();
        return Ok(single_strat(it));
    }
    if op.n.as_i64().is_some_and(|n| n > 1) {
        let it =
            impls.into_iter().max_by_key(|(m, _, pe)| (pe.is_none(), m.nr() * m.mr())).unwrap();
        return Ok(single_strat(it));
    }
    let mut grouped_by_left_packing = Vec::<(&dyn MMMInputFormat, Vec<_>)>::new();
    'mmm: for (m, p, pe) in &impls {
        let left_packing: &dyn MMMInputFormat =
            pe.as_ref().map(|pe| &*pe.from).unwrap_or(&*m.packings()[*p].0);
        for kit in &mut grouped_by_left_packing {
            if let Some(merged) = kit.0.merge_with(left_packing) {
                kit.0 = merged;
                kit.1.push((m, p, pe));
                continue 'mmm;
            }
        }
        grouped_by_left_packing.push((left_packing, vec![(m, p, pe)]));
    }
    let (p, mmv, mmm) = grouped_by_left_packing
        .iter()
        .map(|(p, kit)| {
            let best_for_mmv =
                kit.iter().max_by_key(|(m, _, pe)| (m.nr() == 1, pe.is_none())).unwrap();
            let best_for_mmm = kit.iter().max_by_key(|(m, _, _)| m.nr()).unwrap();
            (p, best_for_mmv, best_for_mmm)
        })
        .max_by_key(|(_, mmv, mmm)| {
            (mmv.0.nr() == 1 && mmm.0.nr() > 1, mmv.2.is_none(), mmm.0.mr(), mmm.0.nr())
        })
        .unwrap();

    if mmm == mmv {
        Ok((ModePicker::Single, clone_box(*p), vec![(mmv.0.clone(), *mmv.1, mmv.2.clone())]))
    } else {
        Ok((
            ModePicker::VecVsMat,
            clone_box(*p),
            vec![(mmv.0.clone(), *mmv.1, mmv.2.clone()), (mmm.0.clone(), *mmm.1, mmm.2.clone())],
        ))
    }
}

pub fn list_impls(
    model: &TypedModel,
    node: &TypedNode,
    op: &EinSumMatMul,
) -> TractResult<Vec<Impl>> {
    let (a_fact, b_fact) = model.node_input_facts(node.id)?.into_iter().collect_tuple().unwrap();
    let a_dt = a_fact.datum_type;
    let b_dt = b_fact.datum_type;

    let a_weight: WeightType = if let Some(of) = a_fact.exotic_fact() {
        if let Some(bqf) = of.downcast_ref::<BlockQuantFact>() {
            WeightType::BlockQuant(bqf.format.clone())
        } else {
            bail!("Can not translate to matmul operand {a_fact:?}");
        }
    } else {
        a_dt.into()
    };

    let impls = tract_linalg::ops()
        .mmm_impls()
        .iter()
        .filter(|mmm| {
            op.acceptable_accumulators().contains(&mmm.internal_type())
                && mmm.stores().contains(&op.operating_dt.unquantized())
        })
        .flat_map(move |mmm| {
            mmm.packings().iter().enumerate().map(|(ix, p)| (mmm.clone(), ix, &p.0, &p.1))
        })
        .filter_map(|(m, p, pa, pb)| {
            if pb.precursor().as_dt().is_none_or(|dt| dt != b_dt.unquantized()) {
                return None;
            }
            if pa.precursor() == a_weight {
                Some((m, p, None))
            } else {
                tract_linalg::ops()
                    .panel_extractors()
                    .iter()
                    .find(|pe| pe.from.precursor() == a_weight && pe.to.dyn_eq(&**pa))
                    .map(|pe| (m, p, Some(pe.clone())))
            }
        })
        .collect_vec();
    Ok(impls)
}


================================================
FILE: core/src/ops/einsum/mod.rs
================================================
use std::borrow::Borrow;
use std::fmt::Debug;

use crate::internal::*;
use crate::ops::array::MultiBroadcastTo;
use crate::tract_data::itertools::Itertools;

mod eval;

#[cfg(feature = "blas")]
pub mod as_blas;
pub mod einsum_matmul;
pub mod kernel_selection;
pub mod prefix_matmul;

#[cfg(test)]
mod proptest;

use num_traits::One;
use tract_linalg::block_quant::{BlockQuantFact, PackedBlockQuantFact};
use tract_linalg::mmm::PackedExoticFact;

pub fn block_quant_aware_input_shape(fact: &TypedFact) -> TractResult<Cow<'_, [TDim]>> {
    if fact.is_plain() {
        return Ok(Cow::Borrowed(&*fact.shape));
    }
    let Some(exotic_fact) = fact.exotic_fact() else {
        bail!("Datum fact is exotic, but no exotic fact was found.")
    };
    if let Some(_bqf) = exotic_fact.downcast_ref::<BlockQuantFact>() {
        Ok(Cow::Borrowed(&*fact.shape))
    } else if let Some(pof) = exotic_fact.downcast_ref::<PackedBlockQuantFact>() {
        Ok(Cow::Owned(
            fact.shape.iter().cloned().chain(pof.shape.iter().map(|i| i.to_dim())).collect_vec(),
        ))
    } else if let Some(pof) = exotic_fact.downcast_ref::<PackedExoticFact>() {
        Ok(Cow::Owned(
            fact.shape.iter().cloned().chain([pof.mn.clone(), pof.k.to_dim()]).collect_vec(),
        ))
    } else {
        bail!("Unsupported exotic fact {exotic_fact:?}")
    }
}

#[derive(Clone, Hash, PartialEq, Eq)]
pub struct EinSum {
    pub axes: AxesMapping,
    pub operating_dt: DatumType,
    // if present, assume we're a binary op.
    // 9 inputs are: A,B,bias, A0,Ascale, B0,BScale, C0,Cscale
    pub q_params: Option<DatumType>,
}

impl EinSum {
    pub fn new(axes: AxesMapping, operating_dt: DatumType) -> EinSum {
        EinSum { axes, operating_dt, q_params: None }
    }

    pub fn newq(axes: AxesMapping, operating_dt: DatumType, output_type: DatumType) -> EinSum {
        EinSum { axes, operating_dt, q_params: Some(output_type) }
    }

    pub fn actual_input_shapes_from_facts<'m>(
        &self,
        inputs: &'m [impl Borrow<TypedFact>],
    ) -> TractResult<TVec<Cow<'m, [TDim]>>> {
        ensure!(inputs.len() == self.axes.input_count());
        let shapes: TVec<Cow<[TDim]>> = inputs
            .iter()
            .map(|t| block_quant_aware_input_shape(t.borrow()))
            .collect::<TractResult<_>>()?;
        ensure!(
            shapes.iter().enumerate().all(|(ix, fact)| fact.len() == self.axes.rank(InOut::In(ix)))
        );
        Ok(shapes)
    }

    #[allow(unused_variables)]
    pub(crate) fn propagate_axis(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        axis: usize,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut new_axis = self.axes.axis((io, axis))?.clone();
        let repr = new_axis.repr;
        let mut patch = TypedModelPatch::new(format!("Propagate axis {}", new_axis.repr));
        let mut taps = tvec!();
        for (ix, input) in node.inputs.iter().enumerate() {
            let mut tap = patch.tap_model(model, *input)?;
            if new_axis.inputs[ix].len() > 1 {
                return Ok(None); // FIXME maybe
            } else if new_axis.inputs[ix].is_empty() {
                let insert_at = self.axes.rank(InOut::In(ix));
                tap = patch.wire_node(
                    format!("{}.prop_axis.{}.input_{}", &node.name, new_axis.repr, ix),
                    AxisOp::Add(insert_at),
                    &[tap],
                )?[0];
                new_axis.inputs[ix].push(insert_at);
            }
            taps.push(tap);
        }
        let must_rm_axis: Option<usize> = if new_axis.outputs[0].len() == 0 {
            let insert_at = self.axes.rank(InOut::Out(0));
            new_axis.outputs[0].push(insert_at);
            Some(insert_at)
        } else {
            None
        };
        let new_expr = self
            .axes
            .iter_all_axes()
            .map(|it| if it.repr == new_axis.repr { new_axis.clone() } else { it.clone() })
            .collect_vec();
        let axes = AxesMapping::new(node.inputs.len(), 1, new_expr)?;
        let mut wire = patch.wire_node(&node.name, Self { axes, ..self.clone() }, &taps)?;
        if let Some(position) = must_rm_axis {
            wire = patch.wire_node(
                format!("{}.prop_axis.{}.output", &node.name, repr),
                AxisOp::Rm(position),
                &wire,
            )?;
        }
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        Ok(Some(patch))
    }

    pub fn acceptable_accumulators(&self) -> TVec<DatumType> {
        if self.operating_dt.is_integer() {
            tvec!(i32::datum_type())
        } else if self.operating_dt == f16::datum_type() {
            tvec!(f16::datum_type(), f32::datum_type())
        } else {
            tvec!(self.operating_dt)
        }
    }
}

impl Debug for EinSum {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "EinSum {} ({:?})", self.axes, self.operating_dt)
    }
}

impl Op for EinSum {
    fn name(&self) -> StaticName {
        "EinSum".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut info = vec![format!("{} ({:?})", self.axes, self.operating_dt)];
        if let Some(qp) = self.q_params {
            info.push(format!("Quantized output: {qp:?}"));
        }
        Ok(info)
    }

    op_as_typed_op!();
}

impl EvalOp for EinSum {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        if inputs.iter().all(|i| i.datum_type().is_number() && i.is_plain()) {
            let mut adhoc_model = TypedModel::default();
            let mut wires = tvec!();
            for (ix, input) in inputs.iter().enumerate() {
                let fact = TypedFact::shape_and_dt_of(input);
                let wire = adhoc_model.add_source(format!("input.{ix}"), fact)?;
                wires.push(wire);
            }
            let output = adhoc_model.wire_node("einsum", self.clone(), &wires)?;
            adhoc_model.select_output_outlets(&output)?;
            let opti = adhoc_model.into_optimized()?;
            if opti.nodes.iter().all(|node| !node.op_is::<Self>()) {
                return opti.into_runnable()?.run(inputs);
            }
        }

        let output = if let Some(qp) = self.q_params {
            eval::eval_q(&self.axes, qp, inputs)
        } else {
            dispatch_numbers!(eval::eval_t(self.operating_dt)(&self.axes, inputs))
        }?;
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for EinSum {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let shapes = self.actual_input_shapes_from_facts(inputs)?;
        for i in 0..inputs.len() {
            ensure!(shapes[i].len() == self.axes.rank(InOut::In(i)));
        }
        for axis in self.axes.iter_all_axes() {
            assert!(
                shapes
                    .iter()
                    .enumerate()
                    .flat_map(|(slot, shape)| axis.inputs[slot].iter().map(|a| &shape[*a]))
                    .try_fold(TDim::one(), |a, b| TDim::broadcast(a, b.clone()))
                    .is_ok()
            );
        }
        if let Some(qp) = self.q_params {
            ensure!(inputs.len() == 9);
            Ok(tvec!(qp.fact(eval::output_shape(&self.axes, &shapes[0..2])?)))
        } else {
            Ok(tvec!(TypedFact::dt_shape(
                self.operating_dt,
                eval::output_shape(&self.axes, &shapes)?
            )))
        }
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        _outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut axes = self.axes.clone();
        for (slot, i) in inputs.iter().enumerate() {
            if i.is_exotic()
                && (i.exotic_fact().is_some_and(|of| {
                    of.is::<PackedExoticFact>() || of.is::<PackedBlockQuantFact>()
                }))
            {
                axes = axes
                    .remove_axis_occurency(InOut::In(slot), i.rank())?
                    .remove_axis_occurency(InOut::In(slot), i.rank())?;
            }
        }
        Ok(axes)
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let shapes = self.actual_input_shapes_from_facts(inputs)?;
        let oshape = eval::output_shape(&self.axes, &shapes)?;
        let ks = self
            .axes
            .iter_all_axes()
            .filter(|axis| axis.outputs[0].len() == 0)
            .map(|axis| {
                axis.inputs
                    .iter()
                    .enumerate()
                    .flat_map(|(ix, axes)| {
                        axes.iter()
                            .map(|axis| shapes[ix][*axis].clone())
                            .collect::<TVec<_>>()
                            .into_iter()
                    })
                    .find(|d| !d.is_one())
                    .unwrap_or_else(|| 1.to_dim())
            })
            .product::<TDim>();
        Ok(tvec!((Cost::FMA(self.operating_dt), oshape.iter().product::<TDim>() * ks)))
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        model: &TypedModel,
        node: &TypedNode,
        prefix: &str,
        inputs: &[OutletId],
        output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        let facts = model.node_input_facts(node.id)?;
        let axis = self.axes.axis((InOut::Out(0), output_axis))?;
        if facts
            .iter()
            .enumerate()
            .any(|(slot, fact)| axis.inputs[slot].len() > 0 && fact.is_exotic())
        {
            Ok(None)
        } else {
            patch.wire_node(prefix, self.clone(), inputs).map(Some)
        }
    }

    #[allow(unused_variables)]
    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let (mut inputs, mut outputs) = self.axes.to_strs();
        let interface: &mut String = match io {
            InOut::In(i) => &mut inputs[i],
            InOut::Out(o) => &mut outputs[o],
        };
        let mut axes: Vec<char> = interface.chars().collect();
        match change {
            AxisOp::Rm(rm) => {
                axes.remove(*rm);
            }
            AxisOp::Add(add) => axes.insert(*add, self.axes.available_label()),
            AxisOp::Move(from, to) => {
                let c = axes.remove(*from);
                axes.insert(*to, c);
            }
            _ => {
                return Ok(None);
            }
        };
        *interface = axes.into_iter().collect();
        let axes = AxesMapping::from_strs(&inputs, &outputs)?;
        Ok(Some(AxisChangeConsequence {
            substitute_op: Some(Box::new(EinSum { axes, ..self.clone() })),
            wire_changes: tvec!((io, change.clone())),
        }))
    }

    fn declutter_with_session(
        &self,
        session: &mut crate::optim::OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(patch) = declutter_reshape_folding_input_axis(self, session, model, node)? {
            return Ok(Some(patch));
        }
        if let Some(patch) = declutter_broadcast(self, session, model, node)? {
            return Ok(Some(patch));
        }
        Ok(None)
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        rule_if!(
            (self.q_params.is_none() && node.inputs.len() == 2)
                || (self.q_params.is_some() && node.inputs.len() == 9)
        );
        einsum_matmul::detect_rule(&(), model, node, &node.name, self)
    }

    as_op!();
}

fn declutter_reshape_folding_input_axis(
    op: &EinSum,
    _session: &mut crate::optim::OptimizerSession,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    for (slot, prec) in node.inputs.iter().map(|n| model.node(n.node)).enumerate() {
        let Some(&AxisOp::Reshape(at, ref from, ref to)) = prec.op_as() else { continue };
        if to.len() > 1 {
            continue;
        }
        let mut axes = op.axes.clone();
        let extra_labels = axes.available_labels().take(from.len() - 1).collect_vec();
        // add a temporary input to axes to hold the extra axes
        let extra_input = node.inputs.len();
        axes = axes.with_extra_input(extra_input)?;
        for label in &extra_labels {
            axes = axes.with_extra_axis(*label, InOut::In(extra_input), 0)?;
        }
        let folded_axis = op.axes.axis((InOut::In(slot), at))?;
        if folded_axis.outputs[0].len() > 1 {
            return Ok(None);
        };
        let mut patch = TypedModelPatch::default();
        let mut taps = patch.taps(model, &node.inputs)?;
        for (input, tap) in taps.iter_mut().enumerate() {
            if folded_axis.inputs[input].len() == 0 {
                continue;
            };
            if folded_axis.inputs[input].len() > 1 {
                return Ok(None);
            };
            let pos = folded_axis.inputs[input][0];
            for label in &extra_labels {
                axes = axes.with_extra_axis_occurency(*label, InOut::In(input), pos)?;
            }
            *tap = patch.wire_node(
                format!("{}.reshape_folded_input_{}", node.name, input),
                AxisOp::Reshape(pos, to.clone(), from.clone()),
                &[*tap],
            )?[0];
        }
        if folded_axis.outputs[0].len() == 1 {
            let pos = folded_axis.outputs[0][0];
            for label in &extra_labels {
                axes = axes.with_extra_axis_occurency(*label, InOut::Out(0), pos)?;
            }
        }
        axes = axes.remove_slot(InOut::In(extra_input))?;
        let mut wire = patch.wire_node(&node.name, EinSum { axes, ..op.clone() }, &taps)?;
        if folded_axis.outputs[0].len() == 1 {
            let pos = folded_axis.outputs[0][0];
            wire = patch.wire_node(
                format!("{}.reshape_folded_output", node.name),
                AxisOp::Reshape(pos, from.clone(), to.clone()),
                &wire,
            )?;
        }
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}

fn declutter_broadcast(
    op: &EinSum,
    _session: &mut crate::optim::OptimizerSession,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    for (ix, outlet) in node.inputs.iter().enumerate() {
        let prec = model.node(outlet.node);
        if prec.op_is::<MultiBroadcastTo>() && prec.outputs[0].successors.len() == 1 {
            let mut patch = TypedModelPatch::default();
            let mut wires = patch.taps(model, &node.inputs)?;
            wires[ix] = patch.tap_model(model, prec.inputs[0])?;
            let wire = patch.wire_node(&node.name, op.clone(), &wires)?[0];
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
    }
    Ok(None)
}


================================================
FILE: core/src/ops/einsum/prefix_matmul.rs
================================================
use tract_data::itertools::Itertools;
use tract_linalg::Scaler;
use tract_ndarray::Ix2;
use tract_num_traits::One;

use super::einsum_matmul::EinSumMatMul;
use super::eval::dequant_inputs;
use crate::internal::*;
use crate::ops::einsum::block_quant_aware_input_shape;
use crate::ops::konst::Const;

#[derive(Debug, Default)]
pub struct EinSumToPrefixMatmulCtx {
    pub ensure_strict_matmul_semantic: bool,
}

pub fn rewrite_einsum_to_prefix_matmul(
    model: &mut TypedModel,
    ensure_strict_matmul_semantic: bool,
) -> TractResult<()> {
    super::einsum_matmul::merge_consecutive_same_role_axes(model)?;
    super::einsum_matmul::detect_all(model)?;
    let ctx = EinSumToPrefixMatmulCtx { ensure_strict_matmul_semantic };
    Rewriter::default().with_rule_for("einsum-to-prefix-matmul", rule).rewrite(&ctx, model)
}

fn rule(
    ctx: &EinSumToPrefixMatmulCtx,
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &EinSumMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    // F: 2 inputs
    // Q: 9 inputs
    let is_fp_mm = op.q_params.is_none() && node.inputs.len() == 2;
    let is_q_mm = op.q_params.is_some() && node.inputs.len() == 9;
    rule_if!(is_fp_mm || is_q_mm);
    rule_if!(
        op.q_params.is_none()
            || model.node_input_facts(node.id)?.iter().skip(3).all(|i| i.konst.is_some())
    );
    let prefix: String = op
        .axes
        .iter_all_axes()
        .filter(|a| ![op.m_axis, op.k_axis, op.n_axis].contains(&a.repr))
        .map(|a| a.repr)
        .collect();
    let mut patch = TypedModelPatch::default();
    let inputs = patch.taps(model, &node.inputs)?;
    let mut wire = tvec!(inputs[0], inputs[1]);

    let (m, k, n) = (op.m_axis, op.k_axis, op.n_axis);
    let a_order_es: String = op.axes.axes(InOut::In(0)).map(|a| a.repr).collect();
    let a_order_mm = format!("{prefix}{m}{k}");
    let a_order_mm_t = format!("{prefix}{k}{m}");
    let a_transform =
        format!("{a_order_es}->{a_order_mm}").parse::<AxesMapping>()?.translate_to_axis_ops()?;
    let a_transform_t =
        format!("{a_order_es}->{a_order_mm_t}").parse::<AxesMapping>()?.translate_to_axis_ops()?;
    let transpose_a = a_transform.len() > a_transform_t.len();
    let a_transform = if transpose_a { a_transform_t } else { a_transform };
    let name = format!("{node_name}.fix_a");
    for op in a_transform {
        wire[0] = patch.wire_node(&name, op, &[wire[0]])?[0];
    }
    // terrible hack to maintain exotic fact through eager propagation of constant through the
    // axes transformation
    if let Some(op) = patch.node_mut(wire[0].node).op_as_mut::<Const>() {
        *op = Const::new_with_opt_exotic_fact(
            op.val().clone(),
            model.outlet_fact(node.inputs[0])?.exotic_fact.clone(),
        )?;
    }
    patch
        .outlet_fact_mut(wire[0])?
        .exotic_fact
        .clone_from(&model.outlet_fact(node.inputs[0])?.exotic_fact);
    // end of hack

    let b_order_es: String = op.axes.axes(InOut::In(1)).map(|a| a.repr).collect();
    let b_order_mm = format!("{prefix}{k}{n}");
    let b_order_mm_t = format!("{prefix}{n}{k}");
    let b_transform =
        format!("{b_order_es}->{b_order_mm}").parse::<AxesMapping>()?.translate_to_axis_ops()?;
    let b_transform_t =
        format!("{b_order_es}->{b_order_mm_t}").parse::<AxesMapping>()?.translate_to_axis_ops()?;
    let transpose_b = b_transform.len() > b_transform_t.len();
    let b_transform = if transpose_b { b_transform_t } else { b_transform };
    let name = format!("{node_name}.fix_b");
    for op in b_transform {
        wire[1] = patch.wire_node(&name, op, &[wire[1]])?[0];
    }

    let c_order_es: String = op.axes.axes(InOut::Out(0)).map(|a| a.repr).collect();
    let c_order_mm = format!("{prefix}{m}{n}");
    let c_order_mm_t = format!("{prefix}{n}{m}");
    let c_transform =
        format!("{c_order_mm}->{c_order_es}").parse::<AxesMapping>()?.translate_to_axis_ops()?;
    let c_transform_t =
        format!("{c_order_mm_t}->{c_order_es}").parse::<AxesMapping>()?.translate_to_axis_ops()?;
    let transpose_c = c_transform.len() > c_transform_t.len();
    let c_transform = if transpose_c { c_transform_t } else { c_transform };
    let quantize_output = if let Some(qp) = op.q_params {
        let qparams: Vec<&Tensor> = inputs[3..9]
            .iter()
            .map(|f| {
                patch
                    .outlet_fact(*f)?
                    .konst
                    .as_deref()
                    .context("Can only translate fixed scalar quantization")
            })
            .try_collect()?;
        Some(qp.with_qparams(QParams::ZpScale {
            zero_point: qparams[4].cast_to_scalar::<i32>()?,
            scale: qparams[5].cast_to_scalar::<f32>()?,
        }))
    } else {
        None
    };

    let operating_dt = if ctx.ensure_strict_matmul_semantic {
        let input_facts = model.node_input_facts(node.id)?;
        let a_dt = input_facts[0].datum_type;
        let b_dt = input_facts[1].datum_type;
        let operating_dt = quantize_output.unwrap_or(op.operating_dt);
        let a_plain = input_facts[0].is_plain();
        let b_plain = input_facts[1].is_plain();
        let allowed_dt = matmul_semantic_output_dt(&a_dt, a_plain, &b_dt, b_plain);

        ensure!(
            operating_dt == allowed_dt,
            format!(
                "Strict matmul semantic require operating_dt to be {allowed_dt:?} \
                for (a: {a_dt:?}, b:{b_dt:?}) but got {:?}.",
                op.operating_dt
            )
        );

        None
    } else {
        Some(op.operating_dt)
    };

    wire = patch.wire_node(
        node_name,
        PrefixMatMul { transpose_a, transpose_b, transpose_c, quantize_output, operating_dt },
        &wire,
    )?;

    for (ix, op) in c_transform.into_iter().enumerate() {
        wire = patch.wire_node(format!("{node_name}.fix_c.{ix}"), op, &wire)?;
    }
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(Some(patch))
}

fn matmul_semantic_output_dt(
    a_dt: &DatumType,
    a_plain: bool,
    b_dt: &DatumType,
    b_plain: bool,
) -> DatumType {
    if a_plain && a_dt.is_number() {
        *a_dt
    } else if b_plain && b_dt.is_number() {
        *b_dt
    } else if a_dt.is_number() {
        *a_dt
    } else if b_dt.is_number() {
        *b_dt
    } else {
        f32::datum_type()
    }
}

#[derive(Clone, Debug, Copy, PartialEq, Eq)]
pub struct PrefixMatMul {
    pub transpose_a: bool,
    pub transpose_b: bool,
    pub transpose_c: bool,
    pub quantize_output: Option<DatumType>,
    pub operating_dt: Option<DatumType>,
}

impl PrefixMatMul {
    fn output_shape<D: DimLike + One>(&self, a: &[D], b: &[D]) -> TVec<D> {
        let rank = a.len();
        let mut output: TVec<D> = (0..rank - 2)
            .map(|ix| if a[ix].is_one() { b[ix].clone() } else { a[ix].clone() })
            .collect();
        output.push(a[rank - 2 + self.transpose_a as usize].clone());
        output.push(b[rank - 2 + !self.transpose_b as usize].clone());
        if self.transpose_c {
            output.swap(rank - 2, rank - 1);
        }
        output
    }

    fn mm<Acc: Datum + tract_ndarray::LinalgScalar>(
        &self,
        acc: &mut Tensor,
        a: &Tensor,
        b: &Tensor,
    ) -> TractResult<()> {
        use crate::ndarray::Dimension;
        let casted_a = a.cast_to::<Acc>()?;
        let a = casted_a.to_plain_array_view::<Acc>()?;
        let casted_b = b.cast_to::<Acc>()?;
        let b = casted_b.to_plain_array_view::<Acc>()?;
        let mut c_plain = acc.try_as_plain_mut()?;
        let mut c = c_plain.to_array_view_mut::<Acc>()?;
        for prefix in tract_ndarray::indices(&c.shape()[..c.ndim() - 2]) {
            let mut a = a.view();
            let mut b = b.view();
            let mut c = c.view_mut();
            for &d in prefix.slice().iter() {
                a.index_axis_inplace(tract_ndarray::Axis(0), d.min(a.shape()[0] - 1));
                b.index_axis_inplace(tract_ndarray::Axis(0), d.min(b.shape()[0] - 1));
                c.index_axis_inplace(tract_ndarray::Axis(0), d);
            }
            let a = a.into_dimensionality::<Ix2>().unwrap();
            let b = b.into_dimensionality::<Ix2>().unwrap();
            let mut c = c.into_dimensionality::<Ix2>().unwrap();
            let a = if self.transpose_a { a.t() } else { a };
            let b = if self.transpose_b { b.t() } else { b };
            if self.transpose_c { c.assign(&b.t().dot(&a.t())) } else { c.assign(&a.dot(&b)) }
        }
        Ok(())
    }
}

impl Op for PrefixMatMul {
    fn name(&self) -> StaticName {
        "PrefixMatMul".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!(
            "transpose_a: {} transpose_b: {} transpose_c: {} q: {:?}",
            self.transpose_a, self.transpose_b, self.transpose_c, self.quantize_output
        )])
    }

    op_as_typed_op!();
}

impl EvalOp for PrefixMatMul {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let c_dt = self.operating_dt.unwrap_or_else(|| {
            let a_dt = inputs[0].datum_type();
            let b_dt = inputs[1].datum_type();
            matmul_semantic_output_dt(&a_dt, inputs[0].is_plain(), &b_dt, inputs[1].is_plain())
        });

        let inputs = dequant_inputs(c_dt, inputs)?;

        let output_shape = self.output_shape(inputs[0].shape(), inputs[1].shape());

        if let Some(qp) = self.quantize_output {
            let mut acc = Tensor::zero_dt(i32::datum_type(), &output_shape)?;
            let mut a_i32 = inputs[0].cast_to::<i32>()?.into_owned();
            a_i32
                .try_as_plain_mut()?
                .as_slice_mut::<i32>()?
                .iter_mut()
                .for_each(|x| *x -= inputs[0].datum_type().zp_scale().0);
            let mut b_i32 = inputs[1].cast_to::<i32>()?.into_owned();
            b_i32
                .try_as_plain_mut()?
                .as_slice_mut::<i32>()?
                .iter_mut()
                .for_each(|x| *x -= inputs[1].datum_type().zp_scale().0);
            self.mm::<i32>(&mut acc, &a_i32, &b_i32)?;
            let scale = inputs[0].datum_type().zp_scale().1 * inputs[1].datum_type().zp_scale().1
                / qp.zp_scale().1;
            let scaler = Scaler::new(scale, tract_linalg::mmm::RoundingPolicy::Even);
            acc.to_plain_array_view_mut::<i32>()?.iter_mut().for_each(|x| *x = *x * scaler);
            let mut c: Tensor = acc.cast_to_dt(qp.unquantized())?.into_owned();
            unsafe { c.set_datum_type(qp) };
            Ok(tvec!(c.into_tvalue()))
        } else {
            let mut c = Tensor::zero_dt(c_dt, &output_shape)?;
            dispatch_floatlike!(Self::mm(c_dt)(self, &mut c, &inputs[0], &inputs[1]))?;
            Ok(tvec!(c.into_tvalue()))
        }
    }
}

impl TypedOp for PrefixMatMul {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let [a, b] = inputs else {
            bail!("Expects 2 inputs");
        };
        let a_shape = block_quant_aware_input_shape(a)?;
        let b_shape = block_quant_aware_input_shape(b)?;
        let dt = self.quantize_output.or(self.operating_dt).unwrap_or(matmul_semantic_output_dt(
            &a.datum_type,
            a.is_plain(),
            &b.datum_type,
            b.is_plain(),
        ));
        Ok(tvec!(dt.fact(self.output_shape(&a_shape, &b_shape))))
    }

    as_op!();
}

#[cfg(test)]
mod test {
    use crate::ops::einsum::EinSum;

    use super::*;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use proptest::test_runner::{TestCaseResult, TestRunner};
    use tract_data::itertools::Itertools;

    pub fn tensor(shape: &[usize]) -> BoxedStrategy<Tensor> {
        let shape = shape.to_vec();
        let len = shape.iter().product::<usize>();
        vec((-10i8..=10i8).prop_map(|i| i as f32), len..=len)
            .prop_map(move |vec| tensor1(&vec).into_shape(&shape).unwrap())
            .boxed()
    }

    fn full_shapes(e: &AxesMapping) -> BoxedStrategy<(Vec<usize>, Vec<usize>)> {
        let e = e.clone();
        let inputs_axes = e
            .iter_all_axes()
            .filter(|axis| axis.inputs[0].len() + axis.inputs[1].len() > 0)
            .cloned()
            .collect_vec();
        let dims = vec![2usize..6; inputs_axes.len()];
        dims.prop_map(move |dims| {
            let a: Vec<usize> = e
                .axes(InOut::In(0))
                .map(|a| dims[inputs_axes.iter().position(|b| a == b).unwrap()])
                .collect_vec();
            let b: Vec<usize> = e
                .axes(InOut::In(1))
                .map(|a| dims[inputs_axes.iter().position(|b| a == b).unwrap()])
                .collect_vec();
            (a, b)
        })
        .boxed()
    }

    fn test_expr(expr: &str) -> TestCaseResult {
        let expr = expr.to_string();
        let mut runner = TestRunner::default();
        let axes: AxesMapping = expr.parse().unwrap();
        fn is_k(axes: &AxesMapping, input: usize, position: usize) -> bool {
            let axis = axes.axis((InOut::In(input), position)).unwrap();
            axis.inputs[1 - input].len() == 1 && axis.outputs[0].len() == 0
        }
        fn is_disapearing_axis(axes: &AxesMapping, input: usize, position: usize) -> bool {
            let axis = axes.axis((InOut::In(input), position)).unwrap();
            axis.outputs[0].len() == 0
        }
        let cases = full_shapes(&axes)
            .prop_flat_map(|(a, b)| {
                (
                    a.iter()
                        .enumerate()
                        .map(|(ix, d)| {
                            if is_k(&axes, 0, ix) {
                                prop_oneof![Just(*d)].boxed()
                            } else if is_disapearing_axis(&axes, 0, ix) {
                                Just(1).boxed()
                            } else {
                                prop_oneof![Just(1usize), Just(*d)].boxed()
                            }
                        })
                        .collect_vec(),
                    b.iter()
                        .enumerate()
                        .map(|(ix, d)| {
                            if is_k(&axes, 1, ix) {
                                prop_oneof![Just(*d)].boxed()
                            } else if is_disapearing_axis(&axes, 1, ix) {
                                Just(1).boxed()
                            } else {
                                prop_oneof![Just(1usize), Just(*d)].boxed()
                            }
                        })
                        .collect_vec(),
                )
            })
            .prop_flat_map(|(a_shape, b_shape)| (tensor(&a_shape), tensor(&b_shape)))
            .prop_map(|(a, b)| EinSumProblem { expr: expr.clone(), a, b });
        runner.run(&cases, |pb| pb.check().map_err(|e| TestCaseError::fail(e.to_string())))?;
        Ok(())
    }

    #[derive(Debug, Clone, PartialEq, Eq)]
    struct EinSumProblem {
        expr: String,
        a: Tensor,
        b: Tensor,
    }

    impl EinSumProblem {
        fn check(&self) -> TractResult<()> {
            let mut model = TypedModel::default();
            let sa = model.add_source("a", f32::fact(self.a.shape()))?;
            let sb = model.add_source("b", f32::fact(self.b.shape()))?;
            let einsum = model.wire_node(
                "einsum",
                EinSum::new(self.expr.parse().unwrap(), f32::datum_type()),
                &[sa, sb],
            )?;
            model.select_output_outlets(&einsum)?;
            let a = self.a.clone().into_tvalue();
            let b = self.b.clone().into_tvalue();
            let inputs = tvec!(a, b);
            let reference = TypedRunnableModel::new(model.clone())?.run(inputs.clone())?.remove(0);
            rewrite_einsum_to_prefix_matmul(&mut model, true)?;
            assert!(model.nodes.iter().all(|n| !n.op_is::<EinSum>()));
            let test = TypedRunnableModel::new(model)?.run(inputs)?.remove(0);
            reference.close_enough(&test, true)
        }
    }

    #[rustfmt::skip] #[test] fn prop_mk_kn_mn() -> TestCaseResult { test_expr("mk,kn->mn") }
    #[rustfmt::skip] #[test] fn prop_km_kn_mn() -> TestCaseResult { test_expr("km,kn->mn") }
    #[rustfmt::skip] #[test] fn prop_mk_nk_mn() -> TestCaseResult { test_expr("mk,nk->mn") }
    #[rustfmt::skip] #[test] fn prop_mk_kn_nm() -> TestCaseResult { test_expr("mk,kn->nm") }
    #[rustfmt::skip] #[test] fn prop_k_kn_mn() -> TestCaseResult { test_expr("k,kn->mn") }
    #[rustfmt::skip] #[test] fn prop_mk_k_mn() -> TestCaseResult { test_expr("mk,k->mn") }
    #[rustfmt::skip] #[test] fn prop_m_n_mn() -> TestCaseResult { test_expr("m,n->mn") }
    #[rustfmt::skip] #[test] fn prop_amk_akn_amn() -> TestCaseResult { test_expr("amk,akn->amn") }
    #[rustfmt::skip] #[test] fn prop_mk_akn_amn() -> TestCaseResult { test_expr("mk,akn->amn") }
    #[rustfmt::skip] #[test] fn prop_btgi_gih_tgh() -> TestCaseResult { test_expr("btgi,gih->tgh") }
    #[rustfmt::skip] #[test] fn prop_tgi_gih_btgh() -> TestCaseResult { test_expr("tgi,gih->btgh") }

    #[test]
    fn k_kn_mn_0() -> TractResult<()> {
        EinSumProblem {
            expr: "k,kn->mn".to_string(),
            a: tensor1(&[0f32, 0f32]),
            b: tensor2(&[[0f32, 0.], [0., 0.]]),
        }
        .check()
    }

    #[test]
    fn mk_k_mn_0() -> TractResult<()> {
        EinSumProblem {
            expr: "mk,k->mn".to_string(),
            a: Tensor::zero::<f32>(&[2, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[2]).unwrap(),
        }
        .check()
    }

    #[test]
    fn mk_k_mn_1() -> TractResult<()> {
        EinSumProblem {
            expr: "mk,k->mn".to_string(),
            a: Tensor::zero::<f32>(&[1, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[2]).unwrap(),
        }
        .check()
    }

    #[test]
    fn mk_kn_nm_0() -> TractResult<()> {
        EinSumProblem {
            expr: "mk,kn->mn".to_string(),
            a: Tensor::zero::<f32>(&[3, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[2, 2]).unwrap(),
        }
        .check()
    }

    #[test]
    fn amk_akn_amn_0() -> TractResult<()> {
        EinSumProblem {
            expr: "amk,akn->amn".to_string(),
            a: Tensor::zero::<f32>(&[1, 1, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[1, 2, 1]).unwrap(),
        }
        .check()
    }

    #[test]
    fn amk_akn_amn_1() -> TractResult<()> {
        EinSumProblem {
            expr: "amk,akn->amn".to_string(),
            a: Tensor::zero::<f32>(&[2, 1, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[1, 2, 1]).unwrap(),
        }
        .check()
    }

    #[test]
    fn amk_akn_amn_2() -> TractResult<()> {
        EinSumProblem {
            expr: "amk,akn->amn".to_string(),
            a: Tensor::zero::<f32>(&[1, 1, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[2, 2, 2]).unwrap(),
        }
        .check()
    }

    #[test]
    fn amk_akn_amn_3() -> TractResult<()> {
        EinSumProblem {
            expr: "amk,akn->amn".to_string(),
            a: Tensor::zero::<f32>(&[1, 1, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[2, 2, 1]).unwrap(),
        }
        .check()
    }

    #[test]
    fn km_anbck_bmn_0() -> TractResult<()> {
        EinSumProblem {
            expr: "km,anbck->bmn".to_string(),
            a: Tensor::zero::<f32>(&[2, 1]).unwrap(),
            b: Tensor::zero::<f32>(&[1, 1, 1, 1, 2]).unwrap(),
        }
        .check()
    }

    #[test]
    fn q() -> TractResult<()> {
        let qp = QParams::ZpScale { zero_point: 0, scale: 0.1 };
        let op = EinSum {
            axes: "mk,kn,m,,,,,,->mn".parse()?,
            operating_dt: i32::datum_type(),
            q_params: Some(DatumType::QI8(qp)),
        };
        let mut model = TypedModelPatch::default();
        let inputs = [
            model.add_source("a", DatumType::QI8(qp).fact([3, 2]))?,
            model.add_source("b", DatumType::QI8(qp).fact([2, 4]))?,
            model.add_source("bias", i32::datum_type().fact([3]))?,
            model.add_const("a0", tensor0(qp.zp_scale().0))?,
            model.add_const("a_scale", tensor0(qp.zp_scale().1))?,
            model.add_const("b0", tensor0(qp.zp_scale().0))?,
            model.add_const("b_scale", tensor0(qp.zp_scale().1))?,
            model.add_const("c0", tensor0(qp.zp_scale().0))?,
            model.add_const("c_scale", tensor0(qp.zp_scale().1))?,
        ];
        let wire = model.wire_node("einsum", op.clone(), &inputs)?;
        model.select_output_outlets(&wire)?;
        rewrite_einsum_to_prefix_matmul(&mut model, true)?;
        assert!(model.nodes.iter().all(|n| !n.op_is::<EinSum>()));
        Ok(())
    }
}


================================================
FILE: core/src/ops/einsum/proptest.rs
================================================
use std::fmt;

use crate::internal::*;
use proptest::prelude::*;
use proptest::strategy::BoxedStrategy;
use tract_ndarray::ArrayD;

use crate::axes::AxesMapping;

use super::EinSum;

#[derive(Clone)]
struct BinEinsumProblem {
    expr: AxesMapping,
    a: Tensor,
    b: Tensor,
    a_constant: bool,
    b_constant: bool,
    unicast_add_constant: Option<Tensor>,
}

impl std::fmt::Debug for BinEinsumProblem {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{} A:{:?} B:{:?} a_constant:{:?} b_constant:{:?} unicast_add_constant:{:?}",
            self.expr, self.a, self.b, self.a_constant, self.b_constant, self.unicast_add_constant
        )
    }
}

impl Arbitrary for BinEinsumProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<BinEinsumProblem>;

    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
        (1..3usize, 1..3usize, 0..3usize)
            .prop_map(|(m_axes, n_axes, iter_axes)| {
                let m_axes: String = ('a'..).take(m_axes).collect();
                let n_axes: String = ('g'..).take(n_axes).collect();
                let iter_axes: String = ('w'..).take(iter_axes).collect();
                let a_axes: Vec<char> = (m_axes.clone() + &iter_axes + "k").chars().collect();
                let b_axes: Vec<char> = (n_axes.clone() + &iter_axes + "k").chars().collect();
                let c_axes: Vec<char> = (m_axes + &n_axes + &iter_axes).chars().collect();
                (Just(a_axes), Just(b_axes), Just(c_axes))
            })
            .prop_flat_map(|(a, b, c)| (a.prop_shuffle(), b.prop_shuffle(), c.prop_shuffle()))
            .prop_map(|(a, b, c)| {
                let a: String = a.into_iter().collect();
                let b: String = b.into_iter().collect();
                let c: String = c.into_iter().collect();
                let expr: AxesMapping = format!("{a},{b}->{c}").parse().unwrap();
                expr
            })
            .prop_flat_map(|expr| {
                let dims = expr.iter_all_axes().count();
                (Just(expr), proptest::collection::vec(1..4usize, dims..=dims))
            })
            .prop_flat_map(|(expr, axis_dims)| {
                let shape_a: TVec<usize> = expr
                    .axes(InOut::In(0))
                    .map(|axis| expr.iter_all_axes().position(|x| x == axis).unwrap())
                    .map(|dim| axis_dims[dim])
                    .collect();
                let shape_b: TVec<usize> = expr
                    .axes(InOut::In(1))
                    .map(|axis| expr.iter_all_axes().position(|x| x == axis).unwrap())
                    .map(|dim| axis_dims[dim])
                    .collect();
                let shape_output: TVec<usize> = expr
                    .axes(InOut::Out(0))
                    .map(|axis| expr.iter_all_axes().position(|x| x == axis).unwrap())
                    .map(|dim| axis_dims[dim])
                    .collect();
                let unicast_add_constant = proptest::option::of(tensor(&shape_output));
                (Just(expr), tensor(&shape_a), tensor(&shape_b), 0..3usize, unicast_add_constant)
            })
            .prop_map(|(expr, a, b, a_b_constant, unicast_add_constant)| {
                let a_constant = (a_b_constant & 0x1) != 0;
                let b_constant = (a_b_constant & 0x2) != 0;
                BinEinsumProblem { expr, a, b, a_constant, b_constant, unicast_add_constant }
            })
            .boxed()
    }
}

pub fn tensor(shape: &[usize]) -> BoxedStrategy<Tensor> {
    let len = shape.iter().product::<usize>();
    let shape: Vec<usize> = shape.into();
    proptest::collection::vec((-10i8..=10i8).prop_map(|i| i as f32), len..=len)
        .prop_map(move |vec| ArrayD::from_shape_vec(shape.clone(), vec).unwrap().into_tensor())
        .boxed()
}

impl BinEinsumProblem {
    fn check(&self) -> TractResult<()> {
        let mut model = TypedModel::default();
        let mut inputs = tvec!();
        let a = if self.a_constant {
            model.add_const("a", self.a.clone())?
        } else {
            inputs.push(self.a.clone().into_tvalue());
            model.add_source("a", TypedFact::shape_and_dt_of(&self.a))?
        };
        let b = if self.b_constant {
            model.add_const("b", self.b.clone())?
        } else {
            inputs.push(self.b.clone().into_tvalue());
            model.add_source("b", TypedFact::shape_and_dt_of(&self.b))?
        };
        let mut output = model.wire_node(
            "einsum",
            EinSum { axes: self.expr.clone(), operating_dt: f32::datum_type(), q_params: None },
            &[a, b],
        )?;
        if let Some(c) = &self.unicast_add_constant {
            let c = model.add_const("c", c.clone())?;
            output = model.wire_node("add", crate::ops::math::add(), &[output[0], c])?;
        }
        model.select_output_outlets(&output)?;
        model = model.into_decluttered()?;
        let expected = model.clone().into_runnable()?.run(inputs.clone())?.remove(0);
        let optimised = model.clone().into_optimized()?;
        //dbg!(&optimised);
        let found = optimised.into_runnable()?.run(inputs.clone())?.remove(0);
        found.close_enough(&expected, Approximation::Close)
    }
}

proptest::proptest! {
    #[test]
    fn prop(pb in any::<BinEinsumProblem>()) {
        pb.check().unwrap();
    }
}

#[test]
fn unicast_0() {
    BinEinsumProblem {
        expr: "ak,gk->ag".parse().unwrap(),
        a: Tensor::zero::<f32>(&[1, 2]).unwrap(),
        b: Tensor::zero::<f32>(&[1, 2]).unwrap(),
        a_constant: false,
        b_constant: false,
        unicast_add_constant: Some(Tensor::zero::<f32>(&[1, 1]).unwrap()),
    }
    .check()
    .unwrap()
}

#[test]
fn unicast_1() {
    BinEinsumProblem {
        expr: "ak,gk->ag".parse().unwrap(),
        a: Tensor::zero::<f32>(&[2, 1]).unwrap(),
        b: Tensor::zero::<f32>(&[2, 1]).unwrap(),
        a_constant: false,
        b_constant: false,
        unicast_add_constant: Some(tensor2(&[[0f32, 0.], [0., 1.]])),
    }
    .check()
    .unwrap()
}

#[test]
fn unicast_2() {
    BinEinsumProblem {
        expr: "abk,gk->abg".parse().unwrap(),
        a: Tensor::zero::<f32>(&[2, 2, 1]).unwrap(),
        b: Tensor::zero::<f32>(&[1, 1]).unwrap(),
        a_constant: false,
        b_constant: false,
        unicast_add_constant: Some(tensor3(&[[[0f32], [0.]], [[0.], [1.]]])),
    }
    .check()
    .unwrap()
}


================================================
FILE: core/src/ops/element_wise.rs
================================================
use crate::internal::*;
use downcast_rs::Downcast;
use dyn_eq::DynEq;
use std::fmt;

pub trait ElementWiseMiniOp:
    fmt::Debug + dyn_clone::DynClone + dyn_eq::DynEq + Send + Sync + 'static + Downcast
{
    fn name(&self) -> String;
    fn prefix(&self) -> &'static str {
        ""
    }
    fn validation(&self) -> Validation {
        Validation::Accurate
    }
    #[allow(unused_variables)]
    fn output_type(&self, input_type: DatumType) -> Option<DatumType> {
        None
    }
    #[allow(unused_variables)]
    fn eval_in_place(&self, t: &mut Tensor, out_dt: Option<DatumType>) -> TractResult<()> {
        bail!("Element wise eval in-place not defined");
    }
    #[allow(unused_variables)]
    fn eval_out_of_place(&self, t: &Tensor, out_dt: Option<DatumType>) -> TractResult<Tensor> {
        bail!("Element wise eval out-of-place place not defined");
    }
    #[allow(unused_variables)]
    fn cost_per_element(&self, dt: DatumType) -> TVec<(Cost, usize)> {
        tvec!()
    }
    #[allow(unused_variables)]
    fn operating_datum_type(&self, dt: DatumType) -> DatumType {
        dt
    }
    #[allow(unused_variables)]
    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }

    #[allow(unused_variables)]
    fn quantize(
        &self,
        dt: DatumType,
        scale: f32,
        zero_point: i32,
    ) -> TractResult<Option<Box<dyn ElementWiseMiniOp>>> {
        Ok(None)
    }
    #[allow(unused_variables)]
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![])
    }
}

dyn_clone::clone_trait_object!(ElementWiseMiniOp);
dyn_eq::eq_trait_object!(ElementWiseMiniOp);
downcast_rs::impl_downcast!(ElementWiseMiniOp);

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ElementWiseOp(pub Box<dyn ElementWiseMiniOp>, pub Option<DatumType>);

impl ElementWiseOp {
    fn output_datum_type(&self, input_dt: DatumType) -> DatumType {
        self.1.unwrap_or(self.0.operating_datum_type(input_dt))
    }
}

impl Op for ElementWiseOp {
    fn name(&self) -> StaticName {
        self.0.name().into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.0.info()
    }

    fn validation(&self) -> Validation {
        self.0.validation()
    }

    op_as_typed_op!();
}

impl EvalOp for ElementWiseOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, mut inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        if let Some(_dt) = self.0.output_type(inputs[0].datum_type()) {
            Ok(tvec!(self.0.eval_out_of_place(&inputs[0], self.1)?.into_tvalue()))
        } else {
            let mut m = inputs.remove(0).into_tensor();
            self.0.eval_in_place(&mut m, self.1)?;
            Ok(tvec!(m.into()))
        }
    }
}

impl TypedOp for ElementWiseOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].clone().without_value();
        let dt = self.output_datum_type(fact.datum_type);
        if let Some(dt) = self.1 {
            fact.datum_type = dt;
        } else if let Some(dt) = self.0.output_type(dt) {
            fact.datum_type = dt;
        }
        // Propagate uniform_tdim through this op.
        if let Some(tdim) = &inputs[0].uniform_tdim {
            // Logical NOT on bool tensors: NOT(x) = 1 - x for 0/1 values.
            // Not is bool-only by definition. BitNot is bitwise (valid on integers
            // where ~x ≠ 1-x), so only apply this for bool input.
            let is_logical_not = self.0.downcast_ref::<crate::ops::logic::Not>().is_some()
                || (self.0.downcast_ref::<crate::ops::logic::BitNot>().is_some()
                    && inputs[0].datum_type == bool::datum_type());
            if is_logical_not {
                fact.uniform_tdim = Some((TDim::Val(1) - tdim.clone()).reduce());
            } else {
                // General path: evaluate the op on a TDim scalar.
                // Ops with a TDim arm (e.g. Floor → identity) pass the value through;
                // ops without one return an error and uniform_tdim stays None.
                let mut tmp = tensor0(tdim.clone());
                if self.0.eval_in_place(&mut tmp, None).is_ok() {
                    fact.uniform_tdim = tmp
                        .try_as_plain()
                        .ok()
                        .and_then(|d| d.as_slice::<TDim>().ok())
                        .and_then(|s| s.first())
                        .cloned()
                        .map(|d| d.reduce());
                }
            }
        }
        Ok(tvec!(fact))
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(prec) = model.single_prec(node.id)?
            && (prec.op_is::<AxisOp>() || prec.op_is::<IntoShape>())
        {
            let mut patch = TypedModelPatch::default();
            let mut wire = tvec!(patch.tap_model(model, prec.inputs[0])?);
            wire = patch.wire_node(&node.name, &node.op, &wire)?;
            wire = patch.wire_node(&prec.name, &prec.op, &wire)?;
            patch.shunt_outside(model, node.id.into(), wire[0])?;
            return Ok(Some(patch));
        }
        self.0.declutter(model, node)
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let count: TDim = inputs[0].shape.iter().product();
        Ok(self
            .0
            .cost_per_element(inputs[0].datum_type)
            .into_iter()
            .map(|(c, n)| (c, count.clone() * n))
            .collect())
    }

    fn quantize(
        &self,
        _model: &TypedModel,
        _node: &TypedNode,
        dt: DatumType,
        scale: f32,
        zero_point: i32,
    ) -> TractResult<Option<Box<dyn TypedOp>>> {
        if let Some(mini) = self.0.quantize(dt, scale, zero_point)? {
            Ok(Some(Box::new(ElementWiseOp(mini, self.1))))
        } else {
            Ok(None)
        }
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        node: &TypedNode,
        _prefix: &str,
        inputs: &[OutletId],
        _output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        patch.wire_node(&node.name, &node.op, inputs).map(Some)
    }

    as_op!();
}

#[macro_export]
macro_rules! element_wise {
    ($func:ident, $Op:ident $({$( $(#[$meta: meta])? $var: ident : $var_typ: path),*})?,
        $([$($typ:ident),*] => $f:expr ),*
        $(; q: $( [$($typ_dt:ident),*] => $f_f32:expr),*)?
        $(; cost: $cost:expr )?
        $(; declutter: $declutter:expr )?
        $(; operating_datum_type: $operating_datum_type:expr )?
        $(; prefix: $prefix:expr )?
        $(; quantize: $quantize:expr )?
        $(; validation: $validation:expr )?
    ) => {
        #[derive(Debug, Clone, PartialEq)]
        pub struct $Op { $( $( $(#[$meta])? pub $var: $var_typ),* )? }
        impl Eq for $Op {}
        impl $crate::ops::element_wise::ElementWiseMiniOp for $Op {
            fn name(&self) -> String {
                format!("{}{}", self.prefix(), stringify!($Op))
            }
            fn eval_in_place(&self, t: &mut Tensor, out_dt: Option<DatumType>) -> TractResult<()> {
                $(
                    $(if out_dt.unwrap_or(t.datum_type()) == $typ::datum_type() {
                        let mut t_plain = t.try_as_plain_mut()?;
                        let t: &mut[$typ] = t_plain.as_slice_mut::<$typ>()?;
                        let f: fn(&Self, &mut[$typ]) -> TractResult<()> = $f;
                        f(self, t)?;
                        return Ok(())
                    }
                    )*
                )*
                $(
                    $(
                       $(
                        let mut input_dt = t.datum_type();
                        let sout_dt = out_dt.unwrap_or(input_dt);
                        if sout_dt.unquantized() == <$typ_dt>::datum_type().unquantized() {
                           if input_dt.unquantized() != sout_dt.unquantized() {
                               // align unquantized input type to unquantized output type
                               *t = match input_dt.unquantized() {
                                   DatumType::U8 => t.clone().into_arc_tensor().offset_u8_as_i8(),
                                   DatumType::I8 => t.clone().into_arc_tensor().offset_i8_as_u8(),
                                   unknown_dt => bail!("unexpected quantization input dt {:?}", unknown_dt)
                               }.into_tensor();
                               input_dt = t.datum_type(); // because zero_point change
                           }
                           unsafe { t.set_datum_type(sout_dt) } // force cast
                           let mut t_plain = t.try_as_plain_mut()?;
                           let t: &mut[$typ_dt] = t_plain.as_slice_mut::<$typ_dt>()?;
                           let f: fn(&Self, &mut[$typ_dt], DatumType, DatumType) -> TractResult<()> = |_, xs, input_dt, out_dt| {
                               let (izp, iscale) = input_dt.zp_scale();
                               let (ozp, oscale) = out_dt.zp_scale();
                               xs.iter_mut().for_each(|x| {
                                   let x_f32 = (*x as f32 - izp as f32) * iscale;
                                   *x = (($f_f32(x_f32) / oscale) + ozp as f32).as_()
                               });
                               Ok(())
                           };
                           f(self, t, input_dt, sout_dt)?;
                           return Ok(())
                       }
                       )*
                   )*
                )?
                bail!("{} does not support {:?}", self.name(), out_dt.unwrap_or(t.datum_type()));
            }
            $(
            fn cost_per_element(&self, dt: DatumType) -> TVec<(Cost, usize)> {
                $cost(dt)
            }
            )?
            $(
                fn declutter(
                    &self,
                    model: &TypedModel,
                    node: &TypedNode,
                ) -> TractResult<Option<TypedModelPatch>> {
                    $declutter(model, node)
                }
            )?
            $(
            fn prefix(&self) -> &'static str {
                $prefix
            }
            )?
            $(
            fn quantize(
                &self,
                dt: DatumType,
                scale: f32,
                zero_point: i32) -> TractResult<Option<Box<dyn ElementWiseMiniOp>>> {
                    $quantize(&self, dt, scale, zero_point)
            }
            )?
            $(
            fn validation(&self) -> Validation {
                $validation
            }
            )?
            $(
            fn operating_datum_type(&self, dt: DatumType) -> DatumType {
                ($operating_datum_type)(dt)
            }
            )?
        }
        pub fn $func($( $($var: $var_typ),* )?) -> $crate::ops::element_wise::ElementWiseOp {
            $crate::ops::element_wise::ElementWiseOp(Box::new($Op { $( $($var),* )? }), None)
        }
    }
}

#[macro_export]
macro_rules! element_wise_oop {
    ($(#[$fmeta:meta])* $func:ident, $Op:ident $({$( $(#[$meta: meta])? $var: ident : $var_typ: path),*})?,
        $( [$($typ:ident),*] => $typ_dst:ident $f:expr ),*
        $(; cost: $cost:expr )?
        $(; info: $info:expr )?
        $(; operating_datum_type: $operating_datum_type:expr )?
        $(; prefix: $prefix:expr )?
        $(; quantize: $quantize:expr )?
        $(; validation: $validation:expr )?
    ) => {
        #[derive(Debug, Clone)]
        pub struct $Op { $( $($(#[$meta])? pub $var: $var_typ),* )? }
        impl PartialEq for $Op {
            #[allow(unused_variables)]
            fn eq(&self, other: &Self) -> bool {
                $( $( if &self.$var != &other.$var { return false; })* )?
                true
            }
        }
        impl Eq for $Op {}
        impl $crate::ops::element_wise::ElementWiseMiniOp for $Op {
            fn name(&self) -> String {
                format!("{}{}", self.prefix(), stringify!($Op))
            }
            fn output_type(&self, input_type: DatumType) -> Option<DatumType> {
                $(
                    $(if input_type == $typ::datum_type() {
                        return Some(<$typ_dst>::datum_type())
                    }
                    )*
                )*
                None
            }
            fn eval_out_of_place(&self, t: &Tensor, _out_dt: Option<DatumType>) -> TractResult<Tensor> {
                $(
                    let mut dst = unsafe { Tensor::uninitialized_dt(<$typ_dst>::datum_type(), &t.shape())? };
                    $(if t.datum_type() == $typ::datum_type() {
                        let f: fn(&Self, &[$typ], &mut[$typ_dst]) -> TractResult<()> = $f;
                        let mut dst_plain = dst.try_as_plain_mut()?;
                        f(self, t.try_as_plain()?.as_slice::<$typ>()?, dst_plain.as_slice_mut::<$typ_dst>()?)?;
                        return Ok(dst)
                    }
                    )*
                )*
                bail!("{} does not support {:?}", self.name(), t.datum_type());
            }
            $(
            fn cost_per_element(&self, dt: DatumType) -> TVec<(Cost, usize)> {
                $cost(dt)
            }
            )?
            $(
            fn info(&self) -> TractResult<Vec<String>> {
                $info(self)
            }
            )?
            $(
            fn prefix(&self) -> &'static str {
                $prefix
            }
            )?
            $(
            fn quantize(
                &self,
                dt: DatumType,
                scale: f32,
                zero_point: i32) -> TractResult<Option<Box<dyn ElementWiseMiniOp>>> {
                    $quantize(ft, scale, zero_point)
            }
            )?
            $(
            fn validation(&self) -> Validation {
                $validation
            }
            )?
            $(
            fn operating_datum_type(&self, dt: DatumType) -> DatumType {
                ($operating_datum_type)(dt)
            }
            )?
        }
        $(#[$fmeta])*
        pub fn $func($( $($var: $var_typ),* )?) -> $crate::ops::element_wise::ElementWiseOp {
            $crate::ops::element_wise::ElementWiseOp(Box::new($Op { $( $($var),* )? }), None)
        }
    }
}


================================================
FILE: core/src/ops/fft.rs
================================================
use crate::internal::*;
use num_complex::Complex;
use rustfft::num_traits::{Float, FromPrimitive};
use rustfft::{FftDirection, FftNum};
use tract_data::itertools::Itertools;
use tract_ndarray::Axis;

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Fft {
    pub axis: usize,
    pub inverse: bool,
}

impl Fft {
    fn eval_t<T: Datum + FftNum + FromPrimitive + Float>(
        &self,
        tensor: &mut Tensor,
    ) -> TractResult<()> {
        let mut iterator_shape: TVec<usize> = tensor.shape().into();
        iterator_shape.pop(); // last dim is [re, im]
        iterator_shape[self.axis] = 1;
        let len = tensor.shape()[self.axis];
        let direction = if self.inverse { FftDirection::Inverse } else { FftDirection::Forward };
        let fft = rustfft::FftPlanner::new().plan_fft(len, direction);
        let mut tensor_plain = tensor.try_as_plain_mut()?;
        let mut array = tensor_plain.to_array_view_mut::<T>()?;
        let mut v = Vec::with_capacity(len);
        for coords in tract_ndarray::indices(&*iterator_shape) {
            v.clear();
            let mut slice = array.slice_each_axis_mut(|ax| {
                if ax.axis.index() == self.axis || ax.stride == 1 {
                    // ax.stride == 1 => last dim
                    (..).into()
                } else {
                    let c = coords[ax.axis.index()] as isize;
                    (c..=c).into()
                }
            });
            v.extend(slice.iter().tuples().map(|(r, i)| Complex::new(*r, *i)));
            fft.process(&mut v);
            slice
                .iter_mut()
                .zip(v.iter().flat_map(|cmpl| [cmpl.re, cmpl.im].into_iter()))
                .for_each(|(s, v)| *s = v);
        }
        Ok(())
    }
}

impl Op for Fft {
    fn name(&self) -> StaticName {
        "Fft".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![if self.inverse { "inverse" } else { "forward" }.into()])
    }

    op_as_typed_op!();
}

impl EvalOp for Fft {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut tensor = args_1!(inputs).into_tensor();
        match tensor.datum_type() {
            DatumType::F16 => {
                let mut temp = tensor.cast_to::<f32>()?.into_owned();
                self.eval_t::<f32>(&mut temp)?;
                tensor = temp.cast_to::<f16>()?.into_owned();
            }
            DatumType::F32 => self.eval_t::<f32>(&mut tensor)?,
            DatumType::F64 => self.eval_t::<f64>(&mut tensor)?,
            _ => bail!("FFT not implemented for type {:?}", tensor.datum_type()),
        }
        Ok(tvec!(tensor.into_tvalue()))
    }
}

impl TypedOp for Fft {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        anyhow::ensure!(
            inputs[0].rank() >= 2,
            "Expect rank 2 (one for fft dimension, one for complex dimension"
        );
        anyhow::ensure!(
            inputs[0].shape.last().unwrap() == &2.to_dim(),
            "Fft operators expect inner (last) dimension to be 2 for real and imaginary part"
        );
        Ok(tvec!(inputs[0].without_value()))
    }

    as_op!();
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Stft {
    pub axis: usize,
    pub frame: usize,
    pub stride: usize,
    pub window: Option<Arc<Tensor>>,
}

impl Stft {
    fn eval_t<T: Datum + FftNum + FromPrimitive + Float>(
        &self,
        input: &Tensor,
    ) -> TractResult<Tensor> {
        let mut iterator_shape: TVec<usize> = input.shape().into();
        iterator_shape.pop(); // [re,im]
        iterator_shape[self.axis] = 1;
        let mut output_shape: TVec<usize> = input.shape().into();
        let frames = (input.shape()[self.axis] - self.frame) / self.stride + 1;
        output_shape.insert(self.axis, frames);
        output_shape[self.axis + 1] = self.frame;
        let mut output = unsafe { Tensor::uninitialized::<T>(&output_shape)? };
        let fft = rustfft::FftPlanner::new().plan_fft_forward(self.frame);
        let input = input.to_plain_array_view::<T>()?;
        let mut output_plain = output.try_as_plain_mut()?;
        let mut oview = output_plain.to_array_view_mut::<T>()?;
        let mut v = Vec::with_capacity(self.frame);
        for coords in tract_ndarray::indices(&*iterator_shape) {
            let islice = input.slice_each_axis(|ax| {
                if ax.axis.index() == self.axis || ax.stride == 1 {
                    (..).into()
                } else {
                    let c = coords[ax.axis.index()] as isize;
                    (c..=c).into()
                }
            });
            let mut oslice = oview.slice_each_axis_mut(|ax| {
                if ax.stride == 1 {
                    (..).into()
                } else if ax.axis.index() < self.axis {
                    let c = coords[ax.axis.index()] as isize;
                    (c..=c).into()
                } else if ax.axis.index() == self.axis || ax.axis.index() == self.axis + 1 {
                    (..).into()
                } else {
                    let c = coords[ax.axis.index() - 1] as isize;
                    (c..=c).into()
                }
            });
            for f in 0..frames {
                v.clear();
                v.extend(
                    islice
                        .iter()
                        .tuples()
                        .skip(self.stride * f)
                        .take(self.frame)
                        .map(|(re, im)| Complex::new(*re, *im)),
                );
                if let Some(win) = &self.window {
                    let win = win.try_as_plain()?.as_slice::<T>()?;
                    // symmetric padding in case window is smaller than frames (aka n fft)
                    let pad_left = (self.frame - win.len()) / 2;
                    v.iter_mut().enumerate().for_each(|(ix, v)| {
                        *v = if ix < pad_left || ix >= pad_left + win.len() {
                            Complex::new(T::zero(), T::zero())
                        } else {
                            *v * Complex::new(win[ix - pad_left], T::zero())
                        }
                    });
                }
                fft.process(&mut v);
                oslice
                    .index_axis_mut(Axis(self.axis), f)
                    .iter_mut()
                    .zip(v.iter().flat_map(|cmpl| [cmpl.re, cmpl.im].into_iter()))
                    .for_each(|(s, v)| *s = v);
            }
        }
        Ok(output)
    }
}

impl Op for Stft {
    fn name(&self) -> StaticName {
        "STFT".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Stft {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let output = match input.datum_type() {
            DatumType::F16 => {
                let temp = input.cast_to::<f32>()?;
                self.eval_t::<f32>(&temp)?.cast_to::<f16>()?.into_owned()
            }
            DatumType::F32 => self.eval_t::<f32>(&input)?,
            DatumType::F64 => self.eval_t::<f64>(&input)?,
            _ => bail!("FFT not implemented for type {:?}", input.datum_type()),
        };
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for Stft {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        anyhow::ensure!(
            inputs[0].rank() >= 2,
            "Expect rank 2 (one for fft dimension, one for complex dimension"
        );
        anyhow::ensure!(
            inputs[0].shape.last().unwrap() == &2.to_dim(),
            "Fft operators expect inner (last) dimension to be 2 for real and imaginary part"
        );
        let mut shape = inputs[0].shape.to_tvec();
        let frames = (inputs[0].shape[self.axis].clone() - self.frame) / self.stride + 1;
        shape[self.axis] = frames;
        shape.insert(self.axis + 1, self.frame.to_dim());
        Ok(tvec!(inputs[0].datum_type.fact(shape)))
    }

    as_op!();
}


================================================
FILE: core/src/ops/identity.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct Identity;

impl Op for Identity {
    fn name(&self) -> StaticName {
        "Identity".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Identity {
    fn is_stateless(&self) -> bool {
        true
    }

    /// Evaluates the operation given the input tensors.
    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(inputs)
    }
}

impl TypedOp for Identity {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        TypedModelPatch::shunt_one_op(model, node)
    }

    fn fuse(&self, model: &TypedModel, node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
        TypedModelPatch::shunt_one_op(model, node)
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    as_op!();
}

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct PinConst;

impl Op for PinConst {
    fn name(&self) -> StaticName {
        "PinConst".into()
    }

    op_as_typed_op!();
}

impl EvalOp for PinConst {
    fn is_stateless(&self) -> bool {
        false
    }

    /// Evaluates the operation given the input tensors.
    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(inputs)
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(self.clone())))
    }
}

impl OpState for PinConst {
    fn eval(
        &mut self,
        _session: &mut TurnState,
        _op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        Ok(inputs)
    }
}

impl TypedOp for PinConst {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].without_value()))
    }

    as_op!();
}

trivial_op_state_freeze!(PinConst);


================================================
FILE: core/src/ops/konst.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Const(Arc<Tensor>, Option<Box<dyn ExoticFact>>);

impl Const {
    pub fn new(tensor: Arc<Tensor>) -> TractResult<Const> {
        Self::new_with_opt_exotic_fact(tensor, None)
    }

    pub fn new_with_exotic_fact(
        tensor: Arc<Tensor>,
        fact: Box<dyn ExoticFact>,
    ) -> TractResult<Const> {
        Self::new_with_opt_exotic_fact(tensor, Some(fact))
    }

    pub fn new_with_opt_exotic_fact(
        tensor: Arc<Tensor>,
        fact: Option<Box<dyn ExoticFact>>,
    ) -> TractResult<Const> {
        ensure!(fact.is_some() || tensor.is_plain(), "Exotic tensor requires an exotic_fact");
        Ok(Const(tensor, fact))
    }

    pub fn val(&self) -> &Arc<Tensor> {
        &self.0
    }

    pub fn exotic_fact(&self) -> Option<&dyn ExoticFact> {
        self.1.as_deref()
    }
}

impl Op for Const {
    fn name(&self) -> StaticName {
        "Const".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Const {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, _inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(tvec![Arc::clone(&self.0).into_tvalue()])
    }
}

impl TypedOp for Const {
    as_op!();

    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let fact = if self.1.is_some() {
            // Exotic const tensors (e.g. device-backed) may have storage that
            // cannot produce an ExoticFact (like DeviceTensor). Build the fact
            // from dt/shape and attach the explicit exotic_fact from self.1.
            let mut f = TypedFact::dt_shape(
                self.0.datum_type(),
                ShapeFact::from_dims(self.0.shape().iter().map(TDim::from)),
            );
            f.konst = Some(Arc::clone(&self.0));
            f.exotic_fact.clone_from(&self.1);
            f
        } else {
            // Plain tensor: TryFrom sets uniform, uniform_tdim, exotic_fact from storage.
            TypedFact::try_from(&self.0)?
        };
        Ok(tvec!(fact))
    }

    fn cost(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        Ok(tvec!((Cost::Params(self.0.datum_type().unquantized()), self.0.len().into())))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        _mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let op = if self.0.datum_type() == TDim::datum_type() {
            let mut tensor = self.0.clone().into_tensor();
            for d in tensor.try_as_plain_mut()?.as_slice_mut::<TDim>()? {
                *d = d.eval(values);
            }
            Const(tensor.into_arc_tensor(), self.1.clone())
        } else {
            self.clone()
        };
        target.wire_node(&node.name, op, &[])
    }

    fn change_axes(
        &self,
        _model: &TypedModel,
        _node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        anyhow::ensure!(io == InOut::Out(0));
        let mut new_tensor = self.0.clone().into_tensor();
        if change.change_tensor(&mut new_tensor, false).is_ok() {
            let mut sub = Const(new_tensor.into_arc_tensor(), None);
            if self.1.is_some() {
                let my_fact = self.output_facts(&[])?;
                let changed_fact = change.output_facts(&[&my_fact[0]])?;
                sub.1 = changed_fact[0].exotic_fact.clone();
            }
            Ok(Some(AxisChangeConsequence {
                substitute_op: Some(Box::new(sub)),
                wire_changes: tvec!((io, change.clone())),
            }))
        } else {
            Ok(None)
        }
    }
}


================================================
FILE: core/src/ops/logic/comparison.rs
================================================
use crate::broadcast::multi_broadcast;
use crate::internal::*;
use crate::ndarray::Zip;
use crate::ops::binary::BinMiniOp;

use tract_data::TooEarly;

// Helper for eval_out_of_place dispatch
fn eval_comp_oop<T: Datum + PartialOrd>(
    a: &Tensor,
    b: &Tensor,
    f: impl Fn(&T, &T) -> bool,
) -> TractResult<Tensor> {
    let a = a.to_plain_array_view::<T>()?;
    let b = b.to_plain_array_view::<T>()?;
    let shape = multi_broadcast(&[a.shape(), b.shape()])?;
    let mut c = unsafe { Tensor::uninitialized::<bool>(&shape)? };
    let mut c_plain = c.try_as_plain_mut()?;
    let mut view = c_plain.to_array_view_mut::<bool>()?;
    Zip::from(&mut view).and_broadcast(&a).and_broadcast(&b).for_each(|c, a, b| *c = f(a, b));
    Ok(c)
}

// Helper for TDim symbolic eval
fn eval_tdim_symbolic(
    session: &TurnState,
    inputs: &TVec<TValue>,
    prove: impl Fn(&TDim, &TDim) -> TractResult<bool>,
) -> TractResult<Option<TVec<TValue>>> {
    if inputs[0].datum_type() != TDim::datum_type() {
        return Ok(None);
    }
    let mut a = inputs[0].clone().into_tensor();
    let mut b = inputs[1].clone().into_tensor();
    for a in a.try_as_plain_mut()?.as_slice_mut::<TDim>()? {
        *a = a.eval(&session.resolved_symbols);
    }
    for b in b.try_as_plain_mut()?.as_slice_mut::<TDim>()? {
        *b = b.eval(&session.resolved_symbols);
    }
    if let (Ok(a_i64), Ok(b_i64)) = (a.cast_to::<i64>(), b.cast_to::<i64>()) {
        let result = eval_comp_oop::<i64>(&a_i64, &b_i64, |a, b| {
            prove(&(*a).into(), &(*b).into()).unwrap_or(false)
        })?;
        return Ok(Some(tvec!(result.into_tvalue())));
    }
    let a_view = inputs[0].to_plain_array_view::<TDim>()?;
    let b_view = inputs[1].to_plain_array_view::<TDim>()?;
    let shape = multi_broadcast(&[a_view.shape(), b_view.shape()])?;
    let mut c = unsafe { Tensor::uninitialized::<bool>(&shape)? };
    let mut c_plain = c.try_as_plain_mut()?;
    let mut view = c_plain.to_array_view_mut::<bool>()?;
    let a_bc = a_view.broadcast(&*shape).unwrap();
    let b_bc = b_view.broadcast(&*shape).unwrap();
    for ixs in tract_ndarray::indices(&*shape) {
        view[&ixs] = prove(&a_bc[&ixs], &b_bc[&ixs])?;
    }
    Ok(Some(tvec!(c.into_tvalue())))
}

macro_rules! comp_bin_mini_op {
    ($Op:ident, $name:literal, $cmp:tt, $prove_tdim:expr, $uniform_tdim:expr) => {
        #[derive(Debug, Clone, Hash, PartialEq, Eq)]
        pub struct $Op;

        impl BinMiniOp for $Op {
            fn name(&self) -> &'static str {
                $name
            }

            fn result_datum_type(&self, _a: DatumType, _b: DatumType) -> TractResult<DatumType> {
                Ok(bool::datum_type())
            }

            fn is_commutative(&self) -> bool {
                false
            }

            fn eval_in_a(&self, _a: &mut Tensor, _b: &Tensor) -> TractResult<()> {
                bail!("Comparison changes datum type, eval_in_a not supported")
            }

            fn eval_out_of_place(
                &self,
                c: &mut Tensor,
                a: &Tensor,
                b: &Tensor,
            ) -> TractResult<()> {
                let dt = a.datum_type();
                if dt == String::datum_type() {
                    let a = a.to_plain_array_view::<String>()?;
                    let b = b.to_plain_array_view::<String>()?;
                    let mut c_plain = c.try_as_plain_mut()?;
                    let mut view = c_plain.to_array_view_mut::<bool>()?;
                    Zip::from(&mut view).and_broadcast(&a).and_broadcast(&b)
                        .for_each(|c, a, b| *c = a $cmp b);
                    return Ok(());
                }
                fn inner<T: Datum + PartialOrd>(c: &mut Tensor, a: &Tensor, b: &Tensor, f: impl Fn(&T, &T) -> bool) -> TractResult<()> {
                    let a = a.to_plain_array_view::<T>()?;
                    let b = b.to_plain_array_view::<T>()?;
                    let mut c_plain = c.try_as_plain_mut()?;
                    let mut view = c_plain.to_array_view_mut::<bool>()?;
                    Zip::from(&mut view).and_broadcast(&a).and_broadcast(&b)
                        .for_each(|c, a, b| *c = f(a, b));
                    Ok(())
                }
                dispatch_numbers!(inner(dt)(c, a, b, |a: &_, b: &_| a $cmp b))
            }

            fn eval(&self, a: TValue, b: TValue, c_dt: DatumType) -> TractResult<Tensor> {
                let c_shape = crate::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
                let mut c = unsafe { Tensor::uninitialized_dt(c_dt, &c_shape)? };
                self.eval_out_of_place(&mut c, &a, &b)?;
                Ok(c)
            }

            fn eval_symbolic(
                &self,
                session: &TurnState,
                inputs: TVec<TValue>,
            ) -> TractResult<Option<TVec<TValue>>> {
                eval_tdim_symbolic(session, &inputs, $prove_tdim)
            }

            fn uniform_tdim_comparison(
                &self,
                a: &TDim,
                b: &TDim,
            ) -> Option<TDim> {
                Some(($uniform_tdim)(a, b))
            }
        }
    };
}

fn prove_eq(a: &TDim, b: &TDim) -> TractResult<bool> {
    Ok(a == b)
}

fn prove_ne(a: &TDim, b: &TDim) -> TractResult<bool> {
    Ok(a != b)
}

fn prove_gte(a: &TDim, b: &TDim) -> TractResult<bool> {
    let diff = a.clone() - b;
    if diff.prove_positive_or_zero() {
        Ok(true)
    } else if diff.prove_strict_negative() {
        Ok(false)
    } else {
        bail!(TooEarly::UndeterminedSymbol(diff.to_string()))
    }
}

fn prove_gt(a: &TDim, b: &TDim) -> TractResult<bool> {
    let diff = a.clone() - b;
    if diff.prove_strict_positive() {
        Ok(true)
    } else if diff.prove_negative_or_zero() {
        Ok(false)
    } else {
        bail!(TooEarly::UndeterminedSymbol(diff.to_string()))
    }
}

fn prove_lte(a: &TDim, b: &TDim) -> TractResult<bool> {
    prove_gte(b, a)
}

fn prove_lt(a: &TDim, b: &TDim) -> TractResult<bool> {
    prove_gt(b, a)
}

comp_bin_mini_op!(CompEq, "Eq", ==, prove_eq, |a: &TDim, b: &TDim|
    TDim::Eq(Box::new(a.clone()), Box::new(b.clone())).reduce()
);

comp_bin_mini_op!(CompNE, "NE", !=, prove_ne, |a: &TDim, b: &TDim|
    (TDim::Val(1) - TDim::Eq(Box::new(a.clone()), Box::new(b.clone()))).reduce()
);

comp_bin_mini_op!(CompLT, "LT", <, prove_lt, |a: &TDim, b: &TDim|
    TDim::Ge(Box::new(b.clone()), Box::new((a.clone() + TDim::Val(1)).reduce())).reduce()
);

comp_bin_mini_op!(CompGT, "GT", >, prove_gt, |a: &TDim, b: &TDim|
    TDim::Ge(Box::new((a.clone() + TDim::Val(1)).reduce()), Box::new(b.clone())).reduce()
);

comp_bin_mini_op!(CompLTE, "LTE", <=, prove_lte, |a: &TDim, b: &TDim|
    TDim::Ge(Box::new(b.clone()), Box::new(a.clone())).reduce()
);

comp_bin_mini_op!(CompGTE, "GTE", >=, prove_gte, |a: &TDim, b: &TDim|
    TDim::Ge(Box::new(a.clone()), Box::new(b.clone())).reduce()
);

// Factory functions
pub fn comp_eq() -> Box<dyn BinMiniOp> {
    Box::new(CompEq)
}
pub fn comp_ne() -> Box<dyn BinMiniOp> {
    Box::new(CompNE)
}
pub fn comp_lt() -> Box<dyn BinMiniOp> {
    Box::new(CompLT)
}
pub fn comp_gt() -> Box<dyn BinMiniOp> {
    Box::new(CompGT)
}
pub fn comp_lte() -> Box<dyn BinMiniOp> {
    Box::new(CompLTE)
}
pub fn comp_gte() -> Box<dyn BinMiniOp> {
    Box::new(CompGTE)
}


================================================
FILE: core/src/ops/logic/ite.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, Default)]
pub struct IfThenElse {
    pub then_body: TypedModel,
    pub then_input_mapping: Vec<usize>,
    pub else_body: TypedModel,
    pub else_input_mapping: Vec<usize>,
}

impl PartialEq for IfThenElse {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
}
impl Eq for IfThenElse {}

impl Op for IfThenElse {
    fn name(&self) -> StaticName {
        "IfThenElse".into()
    }

    op_as_typed_op!();
}

impl TypedOp for IfThenElse {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs[0].datum_type == bool::datum_type());
        ensure!(inputs[0].shape.volume() == 1.to_dim());
        ensure!(self.then_body.inputs.len() == self.then_input_mapping.len());
        ensure!(self.else_body.inputs.len() == self.else_input_mapping.len());
        let mut facts = tvec!();
        for i in 0..self.then_body.outputs.len() {
            ensure!(
                self.then_body.output_fact(i)?.without_value()
                    == self.else_body.output_fact(i)?.without_value()
            );
            facts.push(self.then_body.output_fact(i)?.clone());
        }
        Ok(facts)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(cond) = &model.outlet_fact(node.inputs[0])?.konst {
            let cond = cond.cast_to_scalar::<bool>()?;
            let (body, input_mapping) = if cond {
                (&self.then_body, &self.then_input_mapping)
            } else {
                (&self.else_body, &self.else_input_mapping)
            };
            let mut inner_mapping: HashMap<OutletId, OutletId> = HashMap::default();
            let mut patch = TypedModelPatch::default();
            for (input_ix, outlet) in tract_itertools::izip!(input_mapping, body.input_outlets()?) {
                let tap = patch.tap_model(model, node.inputs[*input_ix])?;
                inner_mapping.insert(*outlet, tap);
            }
            for node in body.eval_order()? {
                if Graph::is_source(&body.node(node).op) {
                    continue;
                }
                let node_inputs =
                    body.node(node).inputs.iter().map(|o| inner_mapping[o]).collect::<TVec<_>>();
                let node_outputs =
                    patch.wire_node(&body.node(node).name, &body.node(node).op, &node_inputs)?;
                for (slot_ix, outlet) in node_outputs.iter().enumerate() {
                    inner_mapping.insert((node, slot_ix).into(), *outlet);
                }
            }
            for (ix, output) in body.outputs.iter().enumerate() {
                patch.shunt_outside(model, OutletId::new(node.id, ix), inner_mapping[output])?;
            }
            Ok(Some(patch))
        } else {
            Ok(None)
        }
    }

    as_op!();
}

impl EvalOp for IfThenElse {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let cond = inputs[0].cast_to_scalar::<bool>()?;
        let (input_mapping, body) = if cond {
            (&self.then_input_mapping, &self.then_body)
        } else {
            (&self.else_input_mapping, &self.else_body)
        };
        let inputs: TVec<TValue> = input_mapping.iter().map(|&ix| inputs[ix].clone()).collect();
        body.clone().into_runnable()?.run(inputs)
    }
}


================================================
FILE: core/src/ops/logic.rs
================================================
#![allow(clippy::bool_comparison)]
#![allow(clippy::unnecessary_cast)]

mod comparison;
mod ite;
pub use comparison::{CompEq, CompGT, CompGTE, CompLT, CompLTE, CompNE};
pub use comparison::{comp_eq, comp_gt, comp_gte, comp_lt, comp_lte, comp_ne};
pub use ite::IfThenElse;

use ndarray::*;

use crate::broadcast::multi_broadcast;
use crate::internal::*;

bin_to_super_type!(and, And,
                   neutral_element: 1,
                   absorbing_element: 0,
                   [bool, u8, u16, u32, u64, i8, i16, i32, i64] => |c, &a, &b| *c = (a as i64 != 0 && b as i64 != 0) as _);
bin_to_super_type!(or, Or,
                   neutral_element: 0,
                   absorbing_element: 1,
                   [bool, u8, u16, u32, u64, i8, i16, i32, i64] => |c, &a, &b| *c = (a as i64 != 0 || b as i64 != 0) as _);
bin_to_super_type!(xor, Xor, declutter: declutter_xor, neutral_element: 0, [bool] => |c, &a, &b| *c = a ^ b);

fn declutter_xor(
    _op: &Xor,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    // Xor(x, 1) = Not(x)
    if let Some(uniform) = crate::ops::binary::one_input_is_uniform(model, node)? {
        if tensor0(1i64).close_enough(&uniform.uni, false).is_ok() {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &[uniform.var],
                crate::ops::element_wise::ElementWiseOp(Box::new(Not {}), None),
            )?));
        }
    }
    Ok(None)
}

element_wise!(not, Not, [bool] => |_, vs| {
    vs.iter_mut().for_each(|a| *a = !*a);
    Ok(())
});

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Iff;

impl Iff {
    pub unsafe fn eval_t<T: Datum>(
        cond: &ArrayViewD<bool>,
        out: &mut Tensor,
        t: &Tensor,
        f: &Tensor,
    ) {
        unsafe {
            Zip::from(out.to_array_view_mut_unchecked::<T>())
                .and_broadcast(cond)
                .and_broadcast(t.to_array_view_unchecked::<T>())
                .and_broadcast(f.to_array_view_unchecked::<T>())
                .for_each(|r, c, t, f| *r = if *c { t.clone() } else { f.clone() })
        }
    }
}

impl Op for Iff {
    fn name(&self) -> StaticName {
        "Iff".into()
    }
    op_as_typed_op!();
}

impl EvalOp for Iff {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (cond, t, f) = args_3!(inputs);
        anyhow::ensure!(t.datum_type() == f.datum_type());
        let shape: TVec<usize> = multi_broadcast(&[cond.shape(), t.shape(), f.shape()])?;
        unsafe {
            let mut result = Tensor::uninitialized_dt(t.datum_type(), &shape)?;
            let cond = cond.to_plain_array_view::<bool>()?;
            dispatch_datum_by_size!(Self::eval_t(t.datum_type())(&cond, &mut result, &t, &f));
            Ok(tvec!(result.into_tvalue()))
        }
    }
}

pub fn sym_to_coord_axis(sym: &Symbol) -> Option<usize> {
    format!("{sym}").strip_prefix("🎯")?.parse::<usize>().ok()
}

pub(crate) fn coord_bound_assertions(expr: &TDim, shape: &ShapeFact) -> Vec<Assertion> {
    expr.symbols()
        .into_iter()
        .filter_map(|s| sym_to_coord_axis(&s).filter(|k| *k < shape.rank()).map(|k| (k, s)))
        .flat_map(|(k, sym)| {
            [
                Assertion::GTE(TDim::Sym(sym.clone()), TDim::Val(0)),
                Assertion::LTE(TDim::Sym(sym), shape[k].clone() - TDim::Val(1)),
            ]
        })
        .collect()
}

pub(crate) fn is_provably_all_false(expr: &TDim, shape: &ShapeFact) -> bool {
    let extra = coord_bound_assertions(expr, shape);
    expr.clone().simplify_with_extra_assertions(&extra) == TDim::Val(0)
}

pub(crate) fn is_provably_all_true(expr: &TDim, shape: &ShapeFact) -> bool {
    let extra = coord_bound_assertions(expr, shape);
    expr.clone().simplify_with_extra_assertions(&extra) == TDim::Val(1)
}

/// The interval of indices along one axis where a boolean condition is true.
///
/// `None` on a bound means "open" — start defaults to 0, end defaults to `dim`.
///
/// | `start`   | `end`        | meaning                           |
/// |-----------|--------------|-----------------------------------|
/// | `None`    | `None`       | whole dimension (AllTrue)         |
/// | `None`    | `Some(0)`    | empty (AllFalse)                  |
/// | `None`    | `Some(e)`    | `[0, e)` — lower region true      |
/// | `Some(s)` | `None`       | `[s, dim)` — upper region true    |
/// | `Some(s)` | `Some(e)`    | `[s, e)` — three zones            |
#[derive(Debug, Clone)]
pub(crate) struct TrueRange {
    pub axis: usize,
    pub start: Option<TDim>, // None = 0
    pub end: Option<TDim>,   // None = dim
}

impl TrueRange {
    /// Condition is true for the entire dimension.
    pub fn is_full(&self) -> bool {
        self.start.is_none() && self.end.is_none()
    }
    /// Condition is never true (empty range).
    pub fn is_empty(&self) -> bool {
        match (&self.start, &self.end) {
            (None, Some(e)) => *e == TDim::Val(0),
            (Some(s), Some(e)) => s == e,
            _ => false,
        }
    }
}

pub(crate) fn classify_true_range(expr: &TDim, shape: &ShapeFact) -> Option<TrueRange> {
    fn try_ge(ge: &TDim, shape: &ShapeFact) -> Option<(usize, TDim)> {
        if let TDim::Ge(lhs, rhs) = ge {
            if let TDim::Sym(sym) = &**lhs {
                let k = sym_to_coord_axis(sym)?;
                if k < shape.rank() && !rhs.symbols().contains(sym) {
                    return Some((k, *rhs.clone()));
                }
            }
        }
        None
    }

    let simplified = expr.clone().simplify();
    // All-false: empty range on axis 0
    if simplified == TDim::Val(0) || is_provably_all_false(&simplified, shape) {
        return Some(TrueRange { axis: 0, start: None, end: Some(TDim::Val(0)) });
    }
    // All-true: open (unbounded) range on axis 0
    if simplified == TDim::Val(1) || is_provably_all_true(&simplified, shape) {
        return Some(TrueRange { axis: 0, start: None, end: None });
    }
    // Ge(x_k, split): true when x_k >= split → [split, dim)
    if let Some((axis, split)) = try_ge(&simplified, shape) {
        return Some(TrueRange { axis, start: Some(split), end: None });
    }
    // 1 - Ge(x_k, split): true when x_k < split → [0, split)
    let flipped = (TDim::Val(1) - simplified).simplify();
    if let Some((axis, split)) = try_ge(&flipped, shape) {
        return Some(TrueRange { axis, start: None, end: Some(split) });
    }
    None
}

impl TypedOp for Iff {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 3, "Iff expects 3 intputs.");
        ensure!(inputs[1].datum_type == inputs[2].datum_type);
        ensure!(inputs[0].datum_type.is::<bool>());
        ensure!(inputs[0].rank() == inputs[1].rank());
        ensure!(inputs[0].rank() == inputs[2].rank());
        let shape = multi_broadcast(&[
            inputs[0].shape.to_tvec(),
            inputs[1].shape.to_tvec(),
            inputs[2].shape.to_tvec(),
        ])
        .unwrap();
        let mut fact = inputs[1].datum_type.fact(shape);
        // Propagate uniform_tdim when condition is provably constant
        fact.uniform_tdim = match inputs[0].uniform_tdim.as_ref().map(|d| d.clone().simplify()) {
            Some(TDim::Val(0)) => inputs[2].uniform_tdim.clone(), // always false → false branch
            Some(TDim::Val(_)) => inputs[1].uniform_tdim.clone(), // always true → true branch
            _ => None,
        };
        Ok(tvec!(fact))
    }

    fn input_roi(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TVec<Option<TDim>>>> {
        // Introduction: condition's uniform_tdim defines which positions matter
        // for the true-branch (scores) input.
        let cond_fact = model.outlet_fact(node.inputs[0])?;
        if let Some(mask_expr) = &cond_fact.uniform_tdim {
            return Ok(Some(tvec![None, Some(mask_expr.clone()), None]));
        }
        // Bubbling: delegate to the natural blanket implementation.
        crate::optim::propagate_roi::bubble_roi(model, node)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        // Fold Iff(const, t, f) → t or f.
        // Symbolic uniform_tdim cases are handled upstream by FoldUniformMask,
        // which injects a concrete Const(0/1) that this rule then folds.
        let cond_fact = model.outlet_fact(node.inputs[0])?;
        rule_if_some!(uniform = &cond_fact.uniform);
        let Ok(cond_val) = uniform.cast_to_scalar::<bool>() else { return Ok(None) };
        let branch = if cond_val { node.inputs[1] } else { node.inputs[2] };
        let mut patch = TypedModelPatch::default();
        let wire = patch.tap_model(model, branch)?;
        patch.shunt_outside(model, node.id.into(), wire)?;
        Ok(Some(patch))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }
}

bin_to_super_type!(bitand, BitAnd,
                   absorbing_element: 0,
                   [bool, u8, u16, u32, u64, i8, i16, i32, i64] => |c, &a, &b| *c = a & b);
bin_to_super_type!(bitor, BitOr,
                   neutral_element: 0,
                   [bool, u8, u16, u32, u64, i8, i16, i32, i64] => |c, &a, &b| *c = a | b);
bin_to_super_type!(bitxor, BitXor,
                   declutter: declutter_bitxor,
                   neutral_element: 0,
                   [bool, u8, u16, u32, u64, i8, i16, i32, i64] => |c, &a, &b| *c = a ^ b);

fn declutter_bitxor(
    _op: &BitXor,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    // BitXor(x, all_ones) = BitNot(x) — for bool, all_ones = 1
    if let Some(uniform) = crate::ops::binary::one_input_is_uniform(model, node)? {
        let var_dt = model.outlet_fact(uniform.var)?.datum_type;
        let is_all_ones = if var_dt.is::<bool>() {
            tensor0(1i64).close_enough(&uniform.uni, false).is_ok()
        } else {
            tensor0(-1i64).close_enough(&uniform.uni, false).is_ok()
        };
        if is_all_ones {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &[uniform.var],
                crate::ops::element_wise::ElementWiseOp(Box::new(BitNot {}), None),
            )?));
        }
    }
    Ok(None)
}

element_wise!(bitnot, BitNot, [bool, u8, u16, u32, u64, i8, i16, i32, i64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = !*x);
    Ok(())
});

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ops::array::TypedConcat;
    use crate::ops::binary::TypedBinOp;
    use crate::ops::change_axes::AxisOp;

    /// Test Case 1: Iff where condition is Eq(T, 0) with T >= 1 assertion.
    /// After declutter, the Iff should fold to the false branch (inputs[2]).
    #[test]
    fn iff_fold_case1_eq_t_zero() -> TractResult<()> {
        let mut model = TypedModel::default();
        model.symbols.add_assertion("T >= 1")?;
        let t_sym = model.symbols.sym("T");
        let t_dim = TDim::Sym(t_sym.clone());

        // Const T (scalar TDim)
        let t_wire = model.wire_node(
            "T",
            crate::ops::konst::Const::new(tensor0(t_dim.clone()).into_arc_tensor())?,
            &[],
        )?[0];

        // Const 0 (scalar TDim)
        let zero_wire = model.wire_node(
            "zero",
            crate::ops::konst::Const::new(tensor0(TDim::Val(0)).into_arc_tensor())?,
            &[],
        )?[0];

        // Eq(T, 0) → bool scalar
        let eq_wire = model.wire_node("eq", TypedBinOp(comp_eq(), None), &[t_wire, zero_wire])?[0];

        // Some data wire for the false branch
        let data_wire = model.add_source("data", TDim::datum_type().scalar_fact())?;

        // Iff(eq, zero, data) — zero is "true" branch, data is "false" branch
        let iff_wire = model.wire_node("iff", Iff, &[eq_wire, zero_wire, data_wire])?[0];
        model.select_output_outlets(&[iff_wire])?;

        let model = model.into_decluttered()?;

        // The Iff should have been folded away (condition is always false given T >= 1)
        let iff_count = model.nodes().iter().filter(|n| n.op_as::<Iff>().is_some()).count();
        assert_eq!(iff_count, 0, "Expected Iff to be folded, but found {iff_count} Iff nodes");
        Ok(())
    }

    /// Test Case 2: range(0,T,1) → unsqueeze(0) → lt(_, T_unsqueezed) → bitnot → Iff
    /// The bitnot produces Ge(x1, T), all-false for x1 in [0, T-1].
    /// After declutter, the Iff should fold to the false branch (data input).
    #[test]
    fn iff_fold_case2_not_lt_x1_t() -> TractResult<()> {
        use crate::ops::array::Range;

        let mut model = TypedModel::default();
        model.symbols.add_assertion("T >= 1")?;
        let t_sym = model.symbols.sym("T");
        let t_dim = TDim::Sym(t_sym.clone());

        // Const start=0 (TDim) and step=1 (TDim) — these get uniform_tdim set in output_facts
        let start = model.wire_node(
            "start",
            crate::ops::konst::Const::new(tensor0(TDim::Val(0)).into_arc_tensor())?,
            &[],
        )?[0];
        let step = model.wire_node(
            "step",
            crate::ops::konst::Const::new(tensor0(TDim::Val(1)).into_arc_tensor())?,
            &[],
        )?[0];
        // T is a dynamic TDim input (not a Const) so Range takes the else branch and
        // sets uniform_tdim = start + step * x0 = x0
        let end = model.add_source("T_dyn", TDim::datum_type().scalar_fact())?;

        // Range(start=0, end=T, step=1) → [T] TDim with uniform_tdim = x0
        let range = model.wire_node("range", Range::new(t_dim.clone()), &[start, end, step])?[0];

        // unsqueeze(0) → [1, T] TDim, remap x0→x1 → uniform_tdim = x1
        let range_unsq = model.wire_node("range_unsq", AxisOp::Add(0), &[range])?[0];

        // T const for comparison, scalar TDim with uniform_tdim = Sym(T)
        let t_const = model.wire_node(
            "T_const",
            crate::ops::konst::Const::new(tensor0(t_dim.clone()).into_arc_tensor())?,
            &[],
        )?[0];
        // unsqueeze T_const → [1,1] TDim to match range_unsq rank
        let t_unsq = model.wire_node("T_unsq", AxisOp::Add(0), &[t_const])?[0];
        let t_unsq2 = model.wire_node("T_unsq2", AxisOp::Add(0), &[t_unsq])?[0];

        // lt(range_unsq=[1,T], t_unsq2=[1,1]) → bool [1,T], uniform_tdim = Lt(x1,T)
        let lt = model.wire_node("lt", TypedBinOp(comp_lt(), None), &[range_unsq, t_unsq2])?[0];

        // bitnot(lt): BitNot doesn't propagate uniform_tdim in output_facts,
        // but Iff::declutter traces through it to get Not(Lt(x1,T))=Ge(x1,T)
        let bn = model.wire_node("bitnot", bitnot(), &[lt])?[0];

        // Data source [1, T]
        let data_shape = tvec![TDim::Val(1), t_dim.clone()];
        let data = model.add_source("data", TDim::datum_type().fact(data_shape.clone()))?;

        // zeros broadcast to [1, T], uniform_tdim = Val(0)
        let zero_scalar = model.wire_node(
            "zero_s",
            crate::ops::konst::Const::new(tensor0(TDim::Val(0)).into_arc_tensor())?,
            &[],
        )?[0];
        let zeros = model.wire_node(
            "zeros",
            crate::ops::array::MultiBroadcastTo {
                shape: ShapeFact::from_dims(data_shape.iter().cloned()),
            },
            &[zero_scalar],
        )?[0];

        // Iff(bn, zeros, data): condition Ge(x1,T) is all-false → fold to data
        let iff = model.wire_node("iff", Iff, &[bn, zeros, data])?[0];
        model.select_output_outlets(&[iff])?;

        let model = model.into_decluttered()?;

        let iff_count = model.nodes().iter().filter(|n| n.op_as::<Iff>().is_some()).count();
        assert_eq!(iff_count, 0, "Expected Iff to be folded, but found {iff_count} Iff nodes");
        Ok(())
    }

    /// Rule 2: condition ge(x2, T/160) over [1,1,1+T/160] → slice+concat, no Iff remaining.
    #[test]
    fn iff_split_to_slice_concat() -> TractResult<()> {
        use crate::ops::array::Range;

        let mut model = TypedModel::default();
        model.symbols.add_assertion("T >= 160")?;
        let t_sym = model.symbols.sym("T");
        let t_dim = TDim::Sym(t_sym.clone());

        // split = T/160
        let split = t_dim.clone() / 160;
        // output shape: [1, 1, 1 + T/160]
        let out_len = TDim::Val(1) + split.clone();

        // Build condition: Range over [0, 1+T/160) on axis 2, then compare >= T/160
        // We'll construct it directly as a source with the right uniform_tdim.
        // Simpler: use Range + unsqueeze twice + Ge comparison.

        // Range(0, 1+T/160, 1) → [1+T/160] with uniform_tdim = x0
        let start = model.wire_node(
            "start",
            crate::ops::konst::Const::new(tensor0(TDim::Val(0)).into_arc_tensor())?,
            &[],
        )?[0];
        let step = model.wire_node(
            "step",
            crate::ops::konst::Const::new(tensor0(TDim::Val(1)).into_arc_tensor())?,
            &[],
        )?[0];
        let end_val = model.wire_node(
            "end_val",
            crate::ops::konst::Const::new(tensor0(out_len.clone()).into_arc_tensor())?,
            &[],
        )?[0];
        let range =
            model.wire_node("range", Range::new(out_len.clone()), &[start, end_val, step])?[0];
        // unsqueeze(0): [1, 1+T/160], x0 → x1
        let r1 = model.wire_node("r1", AxisOp::Add(0), &[range])?[0];
        // unsqueeze(0): [1, 1, 1+T/160], x1 → x2
        let r2 = model.wire_node("r2", AxisOp::Add(0), &[r1])?[0];

        // split const
        let split_const = model.wire_node(
            "split_const",
            crate::ops::konst::Const::new(tensor0(split.clone()).into_arc_tensor())?,
            &[],
        )?[0];
        // unsqueeze three times so it can broadcast against [1,1,1+T/160]
        let sc1 = model.wire_node("sc1", AxisOp::Add(0), &[split_const])?[0];
        let sc2 = model.wire_node("sc2", AxisOp::Add(0), &[sc1])?[0];
        let sc2 = model.wire_node("sc3", AxisOp::Add(0), &[sc2])?[0];

        // Ge(range_3d, split_3d) → bool [1,1,1+T/160], uniform_tdim = Ge(x2, T/160)
        let cond = model.wire_node("cond", TypedBinOp(comp_gte(), None), &[r2, sc2])?[0];

        // true and false branches: shape [1,1,1+T/160]
        let true_branch = model.add_source(
            "true_b",
            TDim::datum_type().fact(tvec![TDim::Val(1), TDim::Val(1), out_len.clone()]),
        )?;
        let false_branch = model.add_source(
            "false_b",
            TDim::datum_type().fact(tvec![TDim::Val(1), TDim::Val(1), out_len.clone()]),
        )?;

        let iff = model.wire_node("iff", Iff, &[cond, true_branch, false_branch])?[0];
        model.select_output_outlets(&[iff])?;

        let model = model.into_decluttered()?;

        let iff_count = model.nodes().iter().filter(|n| n.op_as::<Iff>().is_some()).count();
        assert_eq!(iff_count, 0, "Expected no Iff nodes after declutter, found {iff_count}");

        let concat_count =
            model.nodes().iter().filter(|n| n.op_as::<TypedConcat>().is_some()).count();
        assert!(concat_count > 0, "Expected at least one Concat node after declutter");

        Ok(())
    }

    /// Verify that uniform_tdim propagation produces the expected values at each stage.
    #[test]
    fn verify_uniform_tdim_propagation() -> TractResult<()> {
        use crate::ops::array::Range;

        let mut model = TypedModel::default();
        model.symbols.add_assertion("T >= 1")?;
        let t_sym = model.symbols.sym("T");
        let t_dim = TDim::Sym(t_sym.clone());

        let start = model.wire_node(
            "start",
            crate::ops::konst::Const::new(tensor0(TDim::Val(0)).into_arc_tensor())?,
            &[],
        )?[0];
        let step = model.wire_node(
            "step",
            crate::ops::konst::Const::new(tensor0(TDim::Val(1)).into_arc_tensor())?,
            &[],
        )?[0];
        let end = model.add_source("T_dyn", TDim::datum_type().scalar_fact())?;
        let range = model.wire_node("range", Range::new(t_dim.clone()), &[start, end, step])?[0];
        let range_unsq = model.wire_node("range_unsq", AxisOp::Add(0), &[range])?[0];
        let t_const = model.wire_node(
            "T_const",
            crate::ops::konst::Const::new(tensor0(t_dim.clone()).into_arc_tensor())?,
            &[],
        )?[0];
        let t_unsq = model.wire_node("T_unsq", AxisOp::Add(0), &[t_const])?[0];
        let t_unsq2 = model.wire_node("T_unsq2", AxisOp::Add(0), &[t_unsq])?[0];
        let lt = model.wire_node("lt", TypedBinOp(comp_lt(), None), &[range_unsq, t_unsq2])?[0];

        let range_fact = model.outlet_fact(range)?;
        let range_unsq_fact = model.outlet_fact(range_unsq)?;
        let t_unsq_fact = model.outlet_fact(t_unsq)?;
        let lt_fact = model.outlet_fact(lt)?;

        assert!(range_fact.uniform_tdim.is_some(), "range should have uniform_tdim");
        assert!(range_unsq_fact.uniform_tdim.is_some(), "range_unsq should have uniform_tdim");
        assert!(t_unsq_fact.uniform_tdim.is_some(), "t_unsq should have uniform_tdim");
        assert!(lt_fact.uniform_tdim.is_some(), "lt should have uniform_tdim");

        Ok(())
    }
}


================================================
FILE: core/src/ops/macros.rs
================================================
#[macro_export]
macro_rules! as_op {
    () => {
        fn as_op(&self) -> &dyn Op {
            self
        }

        fn as_op_mut(&mut self) -> &mut dyn Op {
            self
        }
    };
}

#[macro_export]
macro_rules! op_as_typed_op {
    () => {
        fn as_typed(&self) -> Option<&dyn TypedOp> {
            Some(self)
        }
    };
}

#[macro_export]
macro_rules! not_a_typed_op {
    () => {
        fn as_typed(&self) -> Option<&dyn TypedOp> {
            None
        }
    };
}

#[macro_export]
macro_rules! args_1 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 1 {
            $crate::internal::bail!("Expected 1 arg, got {:?}", inputs)
        }
        let result = inputs.pop().unwrap();
        result
    }};
}

#[macro_export]
macro_rules! args_2 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 2 {
            $crate::internal::bail!("Expected 2 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (inputs.pop().unwrap(), inputs.pop().unwrap());
        result
    }};
}

#[allow(unused_macros)]
#[macro_export]
macro_rules! args_3 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 3 {
            $crate::internal::bail!("Expected 3 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (inputs.pop().unwrap(), inputs.pop().unwrap(), inputs.pop().unwrap());
        result
    }};
}

#[allow(unused_macros)]
#[macro_export]
macro_rules! args_4 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 4 {
            $crate::internal::bail!("Expected 4 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
        );
        result
    }};
}

#[allow(unused_macros)]
#[macro_export]
macro_rules! args_5 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 5 {
            $crate::internal::bail!("Expected 5 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
        );
        result
    }};
}

#[allow(unused_macros)]
#[macro_export]
macro_rules! args_6 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 6 {
            $crate::internal::bail!("Expected 6 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
        );
        result
    }};
}

#[allow(unused_macros)]
#[macro_export]
macro_rules! args_7 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 7 {
            $crate::internal::bail!("Expected 7 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
        );
        result
    }};
}

#[allow(unused_macros)]
#[macro_export]
macro_rules! args_8 {
    ($inputs:expr) => {{
        let mut inputs = $inputs;
        if inputs.len() != 8 {
            $crate::internal::bail!("Expected 8 arg, got {:?}", inputs)
        }
        inputs.reverse();
        let result = (
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
            inputs.pop().unwrap(),
        );
        result
    }};
}

#[macro_export]
macro_rules! assert_close {
    ($left:expr, $right:expr) => ({
        match (&$left, &$right) {
            (left_val, right_val) => {
                if let Err(e) = left_val.close_enough(right_val, true) {
                    panic!(r#"assertion failed: `(left ~ right)`
  left: `{:?}`,
 right: `{:?}`
 {:?}"#, left_val, right_val, e)
                }
            }
        }
    });
    ($left:expr, $right:expr,) => ({
        assert_eq!($left, $right)
    });
    ($left:expr, $right:expr, $($arg:tt)+) => ({
        match (&($left), &($right)) {
            (left_val, right_val) => {
                if let Err(e) = left_val.close_enough(right_val, true) {
                    panic!(r#"assertion failed: `(left ~ right)`
  left: `{:?}`,
 right: `{:?}`: {}
 {:?}"#, left_val, right_val,
                           format_args!($($arg)+), e)
                }
            }
        }
    });
}

#[macro_export]
macro_rules! trivial_op_state_freeze {
    ($state:ty) => {
        impl $crate::ops::FrozenOpState for $state {
            fn unfreeze(&self) -> Box<dyn OpState> {
                Box::new(self.clone())
            }
        }
        impl $crate::ops::OpStateFreeze for $state {
            fn freeze(&self) -> Box<dyn $crate::ops::FrozenOpState> {
                Box::new(self.clone())
            }
        }
    };
}


================================================
FILE: core/src/ops/math/complex.rs
================================================
use std::iter::once;

use crate::internal::*;

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct InnerDimToComplex;

impl Op for InnerDimToComplex {
    fn name(&self) -> StaticName {
        "InnerDimToComplex".into()
    }

    op_as_typed_op!();
}

impl EvalOp for InnerDimToComplex {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, mut inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(tvec!(reinterpret_inner_dim_as_complex(inputs.remove(0).into_tensor())?.into_tvalue()))
    }
}

impl TypedOp for InnerDimToComplex {
    #[allow(clippy::into_iter_on_ref)]
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].without_value();
        if fact.shape.last() != Some(&2.to_dim()) {
            bail!("Expect inner tensor dimension to be 2")
        }
        fact.shape = fact.shape.into_iter().rev().skip(1).rev().collect();
        fact.datum_type = fact.datum_type.complexify()?;
        Ok(tvec!(fact))
    }

    as_op!();
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct ComplexToInnerDim;

impl Op for ComplexToInnerDim {
    fn name(&self) -> StaticName {
        "ComplexToInnerDim".into()
    }

    op_as_typed_op!();
}

impl EvalOp for ComplexToInnerDim {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, mut inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(tvec!(reinterpret_complex_as_inner_dim(inputs.remove(0).into_tensor())?.into_tvalue()))
    }
}

impl TypedOp for ComplexToInnerDim {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].without_value();
        fact.shape = fact.shape.iter().cloned().chain(once(2.to_dim())).into();
        fact.datum_type = fact.datum_type.decomplexify()?;
        Ok(tvec!(fact))
    }

    as_op!();
}


================================================
FILE: core/src/ops/math/mod.rs
================================================
#![allow(clippy::clone_on_copy)]
#![allow(clippy::unnecessary_cast)]
#![allow(clippy::blocks_in_conditions)]

use super::array::MultiBroadcastTo;
use super::binary::TypedBinOp;
use crate::internal::*;
use crate::ops::quant::scale_by;
use num_traits::bounds::Bounded;
use num_traits::int::PrimInt;
use num_traits::{Float, Zero};
use tract_data::internal::ClampCast;
use tract_data::itertools::Itertools;
pub use tract_data::prelude::round_ties_to_even;
use tract_linalg::{ScaleShiftAndRound, Scaler};
use tract_num_traits::AsPrimitive;

#[cfg(feature = "complex")]
mod complex;
#[cfg(feature = "complex")]
pub use complex::{ComplexToInnerDim, InnerDimToComplex};

bin_to_super_type!(add, Add,
                   linalg: Add,
                   neutral_element: 0,
                   validation: Validation::Rounding,
                   q: [i8, u8, i32, i32] => add_quant;
                   q_op_on_f32: |a: f32, b: f32| -> f32 {a+b},
                   [f32, i8, i16, i32, i64, u8, u16, u32, u64, f16, f64, TDim, String] => |c, a, b| *c = a.clone() + b);

fn add_quant<T>(c: &mut T, a: &T, b: &T, zp: i32, _: f32)
where
    T: PrimInt + Bounded + AsPrimitive<i64> + Datum,
    i64: AsPrimitive<T>,
{
    *c = (a.as_() + b.as_() - zp as i64).clamp_cast()
}

bin_to_super_type!(sub, Sub,
                   linalg:Sub,
                   is_commutative: false,
                   neutral_element: 0,
                   q: [i8, u8, i32, i32] => sub_quant;
                   q_op_on_f32: |a: f32, b: f32| -> f32 {a-b},
                   [f32, i8, i16, i32, i64, u8, u16, u32, u64, f16, f64, TDim] => |c, a, b| *c = a.clone() - b);

bin_to_super_type!(subf, SubF,
                   linalg:SubF,
                   is_commutative: false,
                   neutral_element: 0,
                   q: [i8, u8, i32, i32] => subf_quant;
                   q_op_on_f32: |a: f32, b: f32| -> f32 {b - a},
                   [f32, i8, i16, i32, i64, u8, u16, u32, u64, f16, f64, TDim] => |c, a, b| *c = b.clone() - a);

fn sub_quant<T>(c: &mut T, a: &T, b: &T, zp: i32, _: f32)
where
    T: PrimInt + Bounded + AsPrimitive<i16> + Datum,
    i16: AsPrimitive<T>,
{
    *c = (a.as_() - b.as_() + zp as i16).clamp_cast()
}

fn subf_quant<T>(c: &mut T, a: &T, b: &T, zp: i32, _: f32)
where
    T: PrimInt + Bounded + AsPrimitive<i16> + Datum,
    i16: AsPrimitive<T>,
{
    *c = (b.as_() - a.as_() + zp as i16).clamp_cast()
}

bin_to_super_type!(mul, Mul,
                   cost: |dt| tvec!((Cost::FMA(dt), 1)),
                   declutter: declutter_mul,
                   eval_override: |a:TValue, b: TValue, c_dt: DatumType| -> TractResult<Tensor> {
                    // we apply only if type is QU8 zp_scale datum type
                    if let (DatumType::QU8(QParams::ZpScale {zero_point: a_zp, scale: a_scale}),
                            DatumType::QU8(QParams::ZpScale {zero_point: b_zp, scale: b_scale}),
                            DatumType::QU8(QParams::ZpScale {zero_point: c_zp, scale: c_scale})) =
                        (a.datum_type(), b.datum_type(), c_dt)
                    {
                           let multiplier = a_scale  * b_scale * (1.0/ c_scale);
                           let a = a.to_plain_array_view::<u8>()?;
                           let b = b.to_plain_array_view::<u8>()?;
                           let c_shape = crate::broadcast::multi_broadcast(&[a.shape(), b.shape()]).context("no broadcast solution")?;
                           let mut c = Tensor::zero_dt(c_dt, &c_shape)?;
                           let mut c_plain = c.try_as_plain_mut()?;
                           let view = c_plain.to_array_view_mut::<u8>()?;
                           crate::ndarray::Zip::from(view)
                               .and_broadcast(a)
                               .and_broadcast(b)
                               .for_each(|c,a,b| *c = (scale_by((*a as i32 - a_zp as i32) * (*b as i32 - b_zp as i32), multiplier) + c_zp as i32).clamp_cast());
                           Ok(c)
                        } else {
                            Mul.generic_eval(a, b, c_dt)
                        }
                    },
                   linalg: Mul,
                   neutral_element: 1,
                   out_of_place: |c:&mut Tensor, a:&Tensor, b: &Tensor| -> TractResult<bool> {
                       if c.datum_type() == TDim::datum_type() &&
                           a.datum_type() == TDim::datum_type() && b.datum_type() == TDim::datum_type() {
                               let a = a.to_plain_array_view::<TDim>()?;
                               let b = b.cast_to::<i32>()?;
                               let b = b.to_plain_array_view::<i32>()?;
                               let mut c_plain = c.try_as_plain_mut()?;
                               let c = c_plain.to_array_view_mut::<TDim>()?;
                               crate::ndarray::Zip::from(c).and_broadcast(a).and_broadcast(b).for_each(|c,a,b| *c = a.clone() * *b);
                               Ok(true)
                           }
                       else {
                           match c.datum_type() {
                               DatumType::QI8(params) => {
                                   let (zp, scale) = params.zp_scale();
                                   let a = a.to_plain_array_view::<i8>()?;
                                   let b = b.to_plain_array_view::<i8>()?;
                                   let mut c_plain = c.try_as_plain_mut()?;
                                   let c = c_plain.to_array_view_mut::<i8>()?;
                                   crate::ndarray::Zip::from(c)
                                       .and_broadcast(a)
                                       .and_broadcast(b)
                                       .for_each(|c,a,b| *c = (scale_by((*a as i16 - zp as i16) * (*b as i16 - zp as i16), scale) + zp as i16).clamp_cast());
                                   Ok(true)
                               }
                               DatumType::QU8(params) => {
                                   let (zp, scale) = params.zp_scale();
                                   let a = a.to_plain_array_view::<u8>()?;
                                   let b = b.to_plain_array_view::<u8>()?;
                                   let mut c_plain = c.try_as_plain_mut()?;
                                   let c = c_plain.to_array_view_mut::<u8>()?;
                                   crate::ndarray::Zip::from(c)
                                       .and_broadcast(a)
                                       .and_broadcast(b)
                                       .for_each(|c,a,b| *c = (scale_by((*a as i32 - zp as i32) * (*b as i32 - zp as i32), scale) + zp as i32).clamp_cast());
                                   Ok(true)
                               }
                               _ => Ok(false)
                           }
                       }
                   },
                   q: [i8, u8, i32] => |c, a, b, zp, scale| {
                    *c = (scale_by((a.clone() as i32 - zp as i32) * (*b as i32 - zp as i32) , scale) + zp as i32).clamp_cast()
                   };
                   q_op_on_f32: |a: f32, b: f32| a * b,
                   [i8, i16, i32, i64, u8, u16, u32, u64] => |c, a, b| *c = a.wrapping_mul(*b),
                   [f32, f16, f64] => |c, a, b| *c = a * b,
                   [TDim] => |c, a, b| *c = a.clone() * b
);

bin_to_super_type!(div, Div,
cost: |dt| tvec!((Cost::Div(dt), 1)),
declutter: declutter_div,
eval_override: |a:TValue, b: TValue, c_dt: DatumType| -> TractResult<Tensor> {
    if
        a.datum_type() == TDim::datum_type() && b.datum_type() == TDim::datum_type() {
            let a = a.to_plain_array_view::<TDim>()?;
            let b = b.to_plain_array_view::<TDim>()?;
            let c_shape = crate::broadcast::multi_broadcast(&[a.shape(), b.shape()]).context("no broadcast solution")?;
            unsafe {
                let a = a.broadcast(&*c_shape).unwrap();
                let b = b.broadcast(&*c_shape).unwrap();
                let mut c = Tensor::uninitialized_dt(DatumType::TDim, &c_shape)?;
                let mut c_plain = c.try_as_plain_mut()?;
                let mut view = c_plain.to_array_view_mut::<TDim>()?;
                for coords in crate::ndarray::indices(&*c_shape) {
                    let (p, q) = a[&coords].maybe_div(&b[&coords])?;
                    view[&coords] = p/q;
                }
                Ok(c)
            }
        } else if let (DatumType::QU8(QParams::ZpScale {zero_point: a_zp, scale: a_scale}),
                       DatumType::QU8(QParams::ZpScale {zero_point: b_zp, scale: b_scale}),
                       DatumType::QU8(QParams::ZpScale {zero_point: c_zp, scale: c_scale})) =
                (a.datum_type(), b.datum_type(), c_dt) {

               let multiplier = a_scale / (b_scale * c_scale);
                let a = a.to_plain_array_view::<u8>()?;
                let b = b.to_plain_array_view::<u8>()?;
                let c_shape = crate::broadcast::multi_broadcast(&[a.shape(), b.shape()]).context("no broadcast solution")?;
                let mut c = Tensor::zero_dt(c_dt, &c_shape)?;
                let mut c_plain = c.try_as_plain_mut()?;
                let view = c_plain.to_array_view_mut::<u8>()?;
                crate::ndarray::Zip::from(view)
                    .and_broadcast(a)
                    .and_broadcast(b)
                    // maintain division in f32 before rescale to maintain high accuracy
                    .for_each(|c,a,b| *c = (
                            scale_by(
                                (*a as i32 - a_zp as i32) as f32 / (*b as i32 - b_zp as i32) as f32, multiplier
                            ) as i32 + c_zp as i32
                        ).clamp_cast());
                Ok(c)
        } else {
            Div.generic_eval(a, b, c_dt)
        }
},
is_commutative: false,
neutral_element: 1,
out_of_place: |c:&mut Tensor, a:&Tensor, b: &Tensor| -> TractResult<bool> {
    if c.datum_type() == TDim::datum_type() &&
        a.datum_type() == TDim::datum_type() && b.datum_type() == TDim::datum_type() {
            let a = a.to_plain_array_view::<TDim>()?;
            let b = b.cast_to::<i32>()?;
            let b = b.to_plain_array_view::<i32>()?;
            let mut c_plain = c.try_as_plain_mut()?;
            let c = c_plain.to_array_view_mut::<TDim>()?;
            crate::ndarray::Zip::from(c).and_broadcast(a).and_broadcast(b).for_each(|c,a,b| *c = a.clone() / *b);
            Ok(true)
        } else if c.datum_type().is_quantized() || b.datum_type().is_quantized() || a.datum_type().is_quantized() {
            let a_f32 = a.cast_to::<f32>()?;
            let a_f32 = a_f32.to_plain_array_view::<f32>()?;
            let b_f32 = b.cast_to::<f32>()?;
            let b_f32 = b_f32.to_plain_array_view::<f32>()?;
            let c_f32 = &a_f32 / &b_f32;
            *c = c_f32.into_tensor().cast_to_dt(c.datum_type())?.into_owned();
            Ok(true)
        } else {
            Ok(false)
        }
},
q_op_on_f32: |a: f32, b: f32| a / b,
[f32, i8, i16, i32, i64, u8, u16, u32, u64, f16, f64] => |c, a, b| *c = a.clone() / b
);

bin_to_super_type!(rem, Rem,
                                      eval_override: |a:TValue, b: TValue, c_dt: DatumType| -> TractResult<Tensor> {
                                          if
                                              a.datum_type() == TDim::datum_type() && b.datum_type() == TDim::datum_type() {
                                                  let a = a.to_plain_array_view::<TDim>()?;
                                                  let b = b.cast_to::<i32>()?;
                                                  let b = b.to_plain_array_view::<i32>()?;
                                                  let c_shape = crate::broadcast::multi_broadcast(&[a.shape(), b.shape()]).context("no broadcast solution")?;
                                                  unsafe {
                                                      let mut c = Tensor::uninitialized_dt(DatumType::TDim, &c_shape)?;
                                                      let mut c_plain = c.try_as_plain_mut()?;
                                                      let view = c_plain.to_array_view_mut::<TDim>()?;
                                                      crate::ndarray::Zip::from(view).and_broadcast(a).and_broadcast(b).for_each(|c,a,b| *c = a.clone() % *b);
                                                      Ok(c)
                                                  }
                                              } else {
                                                  Rem.generic_eval(a,b, c_dt)
                                              }
                                      },
                                      out_of_place: |c:&mut Tensor, a:&Tensor, b: &Tensor| -> TractResult<bool> {
                                          if c.datum_type() == TDim::datum_type() &&
                                              a.datum_type() == TDim::datum_type() && b.datum_type() == TDim::datum_type() {
                                                  let a = a.to_plain_array_view::<TDim>()?;
                                                  let b = b.cast_to::<i32>()?;
                                                  let b = b.to_plain_array_view::<i32>()?;
                                                  let mut c_plain = c.try_as_plain_mut()?;
                                                  let c = c_plain.to_array_view_mut::<TDim>()?;
                                                  crate::ndarray::Zip::from(c).and_broadcast(a).and_broadcast(b).for_each(|c,a,b| *c = a.clone() % *b);
                                                  Ok(true)
                                              } else {
                                                  Ok(false)
                                              }
                                      },
                                      [f32, i8, i16, i32, i64, u8, u16, u32, u64, f16, f64] => |c, a, b| *c = a.clone() % b);

bin_to_super_type!(min, Min, linalg:Min,
                   q: [i8, u8, i32] => |c, a, b, _, _| *c = if a < b { *a } else { *b };
                   q_op_on_f32: |a: f32, b: f32| a.min(b),
                   [f16, f32, f64] => |c,a,b| *c = a.min(*b),
                   [TDim] => |c,a,b| *c = a.clone().mini(b.clone()),
                   [i8, i16, i32, i64, u8, u16, u32, u64] => |c, a, b| *c = *a.min(b));

bin_to_super_type!(max, Max,
                   eval_override: |a:TValue, b: TValue, c_dt: DatumType| -> TractResult<Tensor> {
                   // Attempt to optimize relu case
                    if let (DatumType::QU8(QParams::ZpScale {zero_point: a_zp, scale: a_scale}),
                            DatumType::QU8(QParams::ZpScale {zero_point: b_zp, scale: b_scale}),
                            DatumType::QU8(QParams::ZpScale {zero_point: c_zp, scale: c_scale})) =
                        (a.datum_type(), b.datum_type(), c_dt)
                        && (a.is_uniform() || b.is_uniform()) {
                            // select e between a and b as uniform if exist
                            // and d remaining a or b
                            let (d, d_zp, d_scale, e, e_zp, e_scale) = if a.is_uniform() && !b.is_uniform() {
                                (&b, &b_zp, &b_scale, &a, &a_zp, &a_scale)
                            } else {
                                (&a, &a_zp, &a_scale, &b, &b_zp, &b_scale)
                            };
                            if e.is_uniform() { // may be relu or any scalar
                                let e = e.cast_to::<u8>()?.try_as_plain()?.as_slice::<u8>()?[0];
                                let e_val_as_d_aligned: i32 = scale_by(e as i32 - e_zp, e_scale / d_scale);
                                let multiplier = d_scale  * (1.0/ c_scale);
                                let d = d.to_plain_array_view::<u8>()?;
                                let mut c = Tensor::zero_dt(c_dt, d.shape())?;
                                let mut c_plain = c.try_as_plain_mut()?;
                                let view = c_plain.to_array_view_mut::<u8>()?;
                                crate::ndarray::Zip::from(view)
                                    .and_broadcast(d)
                                    .for_each(|c,d| {
                                        let d_min_zp = *d as i32 - *d_zp as i32;
                                        let c_val: i32 = if d_min_zp < e_val_as_d_aligned {
                                            e_val_as_d_aligned
                                        } else {
                                            d_min_zp
                                        };
                                        *c = (scale_by(c_val, multiplier) + c_zp as i32).clamp_cast();
                                    });
                                return Ok(c)
                            }
                        }
                    Max.generic_eval(a, b, c_dt)
                   },
                   linalg:Max,
                   q: [i8, u8, i32] => |c, a, b, _, _| *c = if a < b { *b } else { *a };
                   q_op_on_f32: |a: f32, b: f32| -> f32 {a.max(b)},
                   [f16, f32, f64] => |c,a,b| *c = a.max(*b),
                   [TDim] => |c,a,b| *c = a.clone().maxi(b.clone()),
                   [i8, i16, i32, i64, u8, u16, u32, u64] => |c, a, b| *c = *a.max(b));

bin_to_super_type!(pow, Pow,
                   declutter: declutter_pow,
                   is_commutative: false,
                   neutral_element: 1,
                   q_op_on_f32: |a: f32, b: f32| -> f32 {a.powf(b)},
                   [f16, f32, f64] => |c,a,b| *c = a.powf(*b),
                   [i32, i64] => |c,a,b| *c = a.pow(*b as u32));

bin_to_super_type!(shift_left, ShiftLeft,
                   is_commutative: false,
                   [i8, i16, i32, i64, u8, u16, u32, u64] => |c, a, b| *c = *a << *b);
bin_to_super_type!(shift_right, ShiftRight,
                   is_commutative: false,
                   [i8, i16, i32, i64, u8, u16, u32, u64] => |c, a, b| *c = *a >> *b);

fn declutter_mul(
    _op: &Mul,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    if node.inputs[0] == node.inputs[1] && !node.outputs[0].fact.datum_type.is_quantized() {
        return Ok(Some(TypedModelPatch::replace_single_op(
            model,
            node,
            &node.inputs[0..1],
            square(),
        )?));
    }

    if let Some(uniform) = crate::ops::binary::one_input_is_uniform(model, node)? {
        let var_fact = model.outlet_fact(uniform.var)?;
        if uniform.uni.cast_to_scalar::<f64>()? == 0.0 {
            let shapes =
                model.node_input_facts(node.id)?.iter().map(|f| &f.shape).collect::<TVec<_>>();
            let shape: ShapeFact =
                crate::broadcast::multi_broadcast(&shapes).context("Failed to broadcast")?.into();
            return Ok(Some(TypedModelPatch::rewire(
                model,
                &[],
                &[node.id.into()],
                &|patch, _| {
                    let scalar = patch.add_const(
                        format!("{}.zero", node.name),
                        if uniform.uni.datum_type().is_quantized() {
                            let output_dt = node.outputs[0].fact.datum_type;
                            Arc::new(uniform.uni.clone().cast_to_dt(output_dt)?.into_owned())
                        } else {
                            uniform.uni.clone()
                        },
                    )?;
                    let op = MultiBroadcastTo::new(shape.clone());
                    patch.wire_node(&node.name, op, &[scalar])
                },
            )?));
        }
        let dt = uniform.uni.datum_type();
        if !dt.is_quantized() {
            // avoid cast potential with Q tensor
            let integer = uniform.uni.cast_to_scalar::<i64>()?;
            if tensor0(integer)
                .cast_to_dt(uniform.uni.datum_type())?
                .close_enough(&uniform.uni, false)
                .is_ok()
                && uniform.uni.cast_to_scalar::<i64>()?.count_ones() == 1
                && dt.is_integer()
            {
                let shift = integer.trailing_zeros();
                return Ok(Some(TypedModelPatch::rewire(
                    model,
                    &[uniform.var],
                    &[node.id.into()],
                    &|patch, taps| {
                        let shift = patch.add_const(
                            format!("{}.shift", node.name),
                            tensor0(shift)
                                .cast_to_dt(dt)?
                                .into_owned()
                                .broadcast_into_rank(var_fact.rank())?,
                        )?;
                        patch.wire_node(&node.name, shift_left(), &[taps[0], shift])
                    },
                )?));
            }
        }
    }
    if let Some(patch) = declutter_mul_const_mul_const(model, node)? {
        return Ok(Some(patch));
    }
    Ok(None)
}

fn declutter_mul_const_mul_const(
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    let input_facts = model.node_input_facts(node.id)?;
    rule_if_some!(const_slot = input_facts.iter().position(|f| f.konst.is_some()));
    let prec = model.node(node.inputs[1 - const_slot].node);
    rule_if_some!(prec_mul = prec.op_as::<TypedBinOp>());
    rule_if!(prec.outputs[0].successors.len() <= 1);
    rule_if!(prec_mul.0.is::<Mul>());
    let prec_input_facts = model.node_input_facts(prec.id)?;
    rule_if_some!(prec_const_slot = prec_input_facts.iter().position(|f| f.konst.is_some()));

    let const_fact = model.outlet_fact(node.inputs[const_slot])?;
    let prec_const_fact = model.outlet_fact(prec.inputs[prec_const_slot])?;
    // todo: extend to anything broadcast compatible
    rule_if!(const_fact.shape.volume().is_one() || prec_const_fact.shape.volume().is_one());
    rule_if!(const_fact.datum_type.is_float());
    let result = mul()
        .eval(tvec!(
            const_fact.konst.clone().unwrap().into_tvalue(),
            prec_const_fact.konst.clone().unwrap().into_tvalue()
        ))?
        .remove(0)
        .into_arc_tensor();
    let mut patch = TypedModelPatch::default();
    let konst = patch.add_const(&prec.name, result)?;
    let input_tap = patch.tap_model(model, prec.inputs[1 - prec_const_slot])?;
    let wire = patch.wire_node(&node.name, mul(), &[konst, input_tap])?;
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(Some(patch))
}

fn declutter_div(
    _op: &Div,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    if let &[p, q] = &*model.node_input_facts(node.id)? {
        let dt = q.datum_type;
        if let Some(q) = &q.uniform
            && let Ok(integer) = q.cast_to_scalar::<i64>()
            && tensor0(integer).cast_to_dt(dt)?.close_enough(q, false).is_ok()
            && dt.is_integer()
            && q.cast_to_scalar::<i64>()?.count_ones() == 1
        {
            let shift = integer.trailing_zeros();
            return Ok(Some(TypedModelPatch::rewire(
                model,
                &[node.inputs[0]],
                &[node.id.into()],
                &|patch, taps| {
                    let shift = patch.add_const(
                        format!("{}.shift", node.name),
                        tensor0(shift)
                            .cast_to_dt(dt)?
                            .into_owned()
                            .broadcast_into_rank(p.rank())?,
                    )?;
                    patch.wire_node(&node.name, shift_right(), &[taps[0], shift])
                },
            )?));
        }
        if dt.is_float() {
            return Ok(Some(TypedModelPatch::rewire(
                model,
                &node.inputs,
                &[node.id.into()],
                &|patch, taps| {
                    let q =
                        patch.wire_node(format!("{}-recip", node.name), recip(), &[taps[1]])?[0];
                    patch.wire_node(&node.name, mul(), &[taps[0], q])
                },
            )?));
        }
    }
    Ok(None)
}

fn declutter_pow(
    _op: &Pow,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    let b = model.outlet_fact(node.inputs[1])?;
    if let Some(b) = &b.uniform {
        let b = b.cast_to_scalar::<f32>()?;
        if b == 2.0 {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &[node.inputs[0]],
                square(),
            )?));
        } else if b == 0.5 {
            return Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &[node.inputs[0]],
                sqrt(),
            )?));
        }
    }
    crate::ops::nn::gelu_approximate::detect_gelu_approx(_op, model, node)
}

element_wise!(abs, Abs, [i8, i16, i32, i64, f16, f32, i32] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.abs());
    Ok(())
};
q: [i8, u8, i32, i32] => f32::abs;
operating_datum_type: |dt| if dt == TDim::datum_type() { i64::datum_type() } else { dt }
);

element_wise!(exp, Exp, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.exp());
    Ok(())
};
q: [i8, u8, i32, i32] => f32::exp;
validation: Validation::Rounding
);

element_wise!(ln, Ln, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.ln());
    Ok(())
};
q: [i8, u8, i32, i32] => f32::ln;
validation: Validation::Rounding
);

element_wise!(square, Square, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.powi(2));
    Ok(())
};
q: [i8, u8, i32, i32] => |f : f32| f.powi(2);
declutter: declutter_square;
validation: Validation::Rounding
);

fn declutter_square(model: &TypedModel, node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
    use super::element_wise::*;
    // Square(Sqrt(x)) → x (Sqrt output is non-negative, so Square is exact inverse)
    if let Some(prec) = model.linear_prec(node.id)?
        && let Some(ew) = prec.op_as::<ElementWiseOp>()
        && ew.0.is::<Sqrt>()
    {
        let mut patch = TypedModelPatch::default();
        let tap = patch.tap_model(model, prec.inputs[0])?;
        patch.shunt_outside(model, node.id.into(), tap)?;
        return Ok(Some(patch));
    }
    Ok(None)
}

element_wise!(sqrt, Sqrt, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.sqrt());
    Ok(())
};
q: [i8, u8, i32, i32] => f32::sqrt;
validation: Validation::Rounding
);

element_wise!(recip, Recip, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.recip());
    Ok(())
};
q: [i8, u8, i32, i32] => f32::recip;
cost: |dt| {tvec!((Cost::Div(dt), 1))};
declutter: declutter_recip;
validation: Validation::Rounding
);

fn declutter_recip(model: &TypedModel, node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
    use super::element_wise::*;
    if let Some(prec) = model.linear_prec(node.id)?
        && let Some(ew) = prec.op_as::<ElementWiseOp>()
    {
        let repl = if ew.0.is::<Sqrt>() {
            Some(rsqrt())
        } else if ew.0.is::<Rsqrt>() {
            Some(sqrt())
        } else {
            None
        };
        if let Some(repl) = repl {
            let mut patch = TypedModelPatch::default();
            let mut wire = patch.tap_model(model, prec.inputs[0])?;
            wire = patch.wire_node(&node.name, repl, &[wire])?[0];
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
    }
    Ok(None)
}

element_wise!(rsqrt, Rsqrt, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.sqrt().recip());
    Ok(())
};
q: [i8, u8, i32] => |x : f32| x.sqrt().recip();
validation: Validation::Rounding
);

element_wise!(ceil, Ceil, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.ceil());
    Ok(())
}, [i8, i16,i32, i64, u8, u16, u32, u64, TDim] => |_, _| Ok(());
q: [i8, u8, i32] => f32::recip);

element_wise!(floor, Floor, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.floor());
    Ok(())
}, [i8, i16,i32, i64, u8, u16, u32, u64, TDim] => |_, _| Ok(());
q: [i8, u8, i32] => f32::floor);

element_wise!(round, Round, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.round());
    Ok(())
}, [i8, i16,i32, i64, u8, u16, u32, u64, TDim] => |_, _| Ok(());
q: [i8, u8, i32] => f32::round);

element_wise!(q_scale, QScale{scaler: Scaler},[i32] => |op, xs| {
    xs.iter_mut().for_each(|x| *x = x.q_scale(op.scaler));
    Ok(())
});

element_wise!(round_half_to_even, RoundHalfToEven,
[f32] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = round_ties_to_even(*x));
    Ok(())
},
[f16] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = f16::from_f32(round_ties_to_even(x.to_f32())));
    Ok(())
};
q: [i8, u8, i32] => round_ties_to_even);

element_wise!(cos, Cos, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.cos());
    Ok(())
};
q: [i8, u8, i32] => f32::cos);

element_wise!(sin, Sin, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.sin());
    Ok(())
};
q: [i8, u8, i32] => f32::sin);

element_wise!(tan, Tan, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.tan());
    Ok(())
};
q: [i8, u8, i32] => f32::tan);

element_wise!(acos, Acos, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.acos());
    Ok(())
};
q: [i8, u8, i32] => f32::acos);

element_wise!(asin, Asin, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.asin());
    Ok(())
};
q: [i8, u8, i32] => f32::asin);

element_wise!(atan, Atan, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.atan());
    Ok(())
};
q: [i8, u8, i32] => f32::atan);

element_wise!(cosh, Cosh, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.cosh());
    Ok(())
};
q: [i8, u8, i32] => f32::cosh);

element_wise!(sinh, Sinh, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.sinh());
    Ok(())
};
q: [i8, u8, i32] => f32::sinh);

element_wise!(tanh, Tanh,
 [f16] => |_, xs| { (tract_linalg::ops().tanh_f16)().run(xs) },
 [f32] => |_, xs| { (tract_linalg::ops().tanh_f32)().run(xs) },
 [f64] => |_, xs| { xs.iter_mut().for_each(|x| *x = x.tanh()); Ok(()) };
 q: [i8, u8, i32] => f32::tanh;
 cost: |dt| {tvec!((Cost::FMA(dt), 11), (Cost::Div(dt), 1))}
);

element_wise!(erf, Erf,
 [f32] => |_, xs| { (tract_linalg::ops().erf_f32)().run(xs) },
 [f16] => |_, xs| {
     let mut f32s = xs.iter().map(|x| x.to_f32()).collect_vec();
     (tract_linalg::ops().erf_f32)().run(&mut f32s)?;
     xs.iter_mut().zip(f32s.into_iter()).for_each(|(x, f)| *x = f16::from_f32(f));
     Ok(())
};
 cost: |dt| {tvec!((Cost::FMA(dt), 11), (Cost::Div(dt), 1))}
);

element_wise!(acosh, Acosh, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.acosh());
    Ok(())
};
q: [i8, u8, i32] => f32::acosh);
element_wise!(asinh, Asinh, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.asinh());
    Ok(())
};
q: [i8, u8, i32] => f32::asinh);
element_wise!(atanh, Atanh, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = x.atanh());
    Ok(())
};
q: [i8, u8, i32] => f32::atanh);

element_wise!(neg, Neg, [i8, i16, i32, i64, f16, f32, f64, TDim] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = -x.clone());
    Ok(())
};
q: [i8, u8, i32] => |x: f32| -x);

element_wise!(sign, Sign, [f16, f32, f64] => |_, xs| {
    xs.iter_mut().for_each(|x| *x = if x.is_zero() { *x } else { x.signum() });
    Ok(())
};
q: [i8, u8, i32] => f32::signum);

element_wise_oop!(is_inf, IsInf { detect_positive: bool, detect_negative: bool },
    [f32] => bool |op, xs, ys| {
        xs.iter().zip(ys.iter_mut()).for_each(|(x,y)|
            *y = (op.detect_positive && *x == f32::INFINITY) || (op.detect_negative && *x == f32::NEG_INFINITY)
        );
        Ok(())
    },
    [f16] => bool |op, xs, ys| {
        xs.iter().zip(ys.iter_mut()).for_each(|(x,y)|
            *y = (op.detect_positive && *x == f16::INFINITY) || (op.detect_negative && *x == f16::NEG_INFINITY)
        );
        Ok(())
    }
);

element_wise_oop!(is_nan, IsNan,
    [f16, f32] => bool |_, xs, ys| {
        xs.iter().zip(ys.iter_mut()).for_each(|(x,y)| *y = x.is_nan());
        Ok(())
    }
);

#[cfg(test)]
mod tests {
    use crate::ops::binary::TypedBinOp;

    use super::*;
    use ndarray::arr2;

    #[test]
    fn test_mul() {
        let a = arr2(&[[1., 2.], [3., 4.]]);
        let b = arr2(&[[1., 0.], [0., 0.]]);
        assert_eq!(a * b, arr2(&[[1., 0.], [0., 0.]]));
    }

    #[test]
    fn dot() {
        let a = arr2(&[[1., 2.], [3., 4.]]);
        let b = arr2(&[[1., 0.], [0., 0.]]);
        assert_eq!(a.dot(&b), arr2(&[[1., 0.], [3., 0.]]));
    }

    #[test]
    fn mul_as_shift_left() -> TractResult<()> {
        let mut model = TypedModel::default();
        let x = model.add_source("x", i32::fact([2usize, 2]))?;
        let a = model.add_const("a", tensor0(4i32).broadcast_into_rank(2)?.into_arc_tensor())?;
        let y = model.wire_node("y", mul(), &[x, a])?[0];
        model.select_output_outlets(&[y])?;
        let result =
            SimplePlan::new(model.clone())?.run(tvec!(tensor2(&[[1, 2], [3, 4]]).into()))?;
        assert_eq!(*result[0], tensor2(&[[4, 8], [12, 16]]));
        let decluttered = model.into_decluttered()?;
        let result =
            SimplePlan::new(decluttered.clone())?.run(tvec!(tensor2(&[[1, 2], [3, 4]]).into()))?;
        assert_eq!(*result[0], tensor2(&[[4, 8], [12, 16]]));
        let op = decluttered
            .node(decluttered.output_outlets()?[0].node)
            .op()
            .downcast_ref::<TypedBinOp>()
            .unwrap();
        assert!(op.0.downcast_ref::<ShiftLeft>().is_some());
        Ok(())
    }

    #[test]
    fn div_as_shift() -> TractResult<()> {
        let mut model = TypedModel::default();
        let x = model.add_source("a", i32::fact([2usize, 2]))?;
        let s = model.add_const("shift", tensor2(&[[4]]))?;
        let y = model.wire_node("c", div(), [x, s].as_ref())?[0];
        model.select_output_outlets(&[y])?;
        let result =
            SimplePlan::new(model.clone())?.run(tvec!(tensor2(&[[16, 32], [64, 68]]).into()))?;
        assert_eq!(*result[0], tensor2(&[[4, 8], [16, 17]]));
        let decluttered = model.into_decluttered()?;
        let result = SimplePlan::new(decluttered.clone())?
            .run(tvec!(tensor2(&[[16, 32], [64, 68]]).into()))?;
        assert_eq!(*result[0], tensor2(&[[4, 8], [16, 17]]));
        let op = decluttered
            .node(decluttered.output_outlets()?[0].node)
            .op()
            .downcast_ref::<TypedBinOp>()
            .unwrap();
        assert!(op.0.downcast_ref::<ShiftRight>().is_some());
        Ok(())
    }
}


================================================
FILE: core/src/ops/matmul/de_block_quant.rs
================================================
use tract_linalg::block_quant::{BlockQuant, BlockQuantFact, BlockQuantStorage, Q4_0};

use crate::internal::*;
use crate::ops::einsum::einsum_matmul::EinSumMatMul;
use crate::ops::konst::Const;
use crate::transform::ModelTransform;

#[derive(Debug)]
pub struct BlockQuantTransform;

impl ModelTransform for BlockQuantTransform {
    fn name(&self) -> StaticName {
        "block_quant".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        crate::ops::einsum::einsum_matmul::detect_all(model)?;
        Rewriter::<()>::default()
            .with_rule_for("block_quant_einsum_weights", block_quant_einsum_weights)
            .rewrite(&(), model)?;
        crate::ops::einsum::einsum_matmul::flatten_all(model)?;
        Ok(())
    }
}

fn block_quant_einsum_weights(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    prefix: &str,
    op: &EinSumMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(node.inputs.len() == 2);
    for (slot, fact) in model.node_input_facts(node.id)?.iter().enumerate() {
        let Some(a) = fact.konst.as_ref() else { continue };
        if a.rank() != 2 {
            continue;
        };
        if op.k_axis().inputs[slot][0] == 0 {
            let mut patch = TypedModelPatch::default();
            let mut taps = patch.taps(model, &node.inputs)?;
            taps[slot] = patch.wire_node(
                format!("{}.t_{}", &node.name, slot),
                AxisOp::Move(1, 0),
                &[taps[slot]],
            )?[0];
            let mut new_op = op.clone();
            new_op.op.axes = op
                .op
                .axes
                .clone()
                .remove_axis_occurency(InOut::In(slot), 0)?
                .with_extra_axis_occurency(op.k_axis, InOut::In(slot), 1)?;
            let output = patch.wire_node(prefix, new_op, &taps)?;
            patch.shunt_outside(model, node.id.into(), output[0])?;
            return Ok(Some(patch));
        }
        let format = Q4_0;
        let mut patch = TypedModelPatch::default();
        let weights = if a.datum_type() == f16::datum_type() {
            format.quant_f16(a.try_as_plain()?.as_slice::<f16>()?)?
        } else {
            format.quant_f32(a.cast_to::<f32>()?.try_as_plain()?.as_slice::<f32>()?)?
        };
        let name = &model.node(node.inputs[0].node).name;
        let m = a.shape()[0];
        let k = a.shape()[1];
        let bqs = BlockQuantStorage::new(Box::new(format), m, k, Arc::new(weights))?;
        let fact =
            Box::new(BlockQuantFact::new(dyn_clone::clone_box(bqs.format()), tvec!(1, m, k)));
        let weights = patch.wire_node(
            format!("{name}.bq"),
            Const::new_with_exotic_fact(
                Arc::new(bqs.into_tensor_with_shape(a.datum_type(), &[1, m, k])),
                fact,
            )?,
            &[],
        )?;
        let tap = patch.tap_model(model, node.inputs[1])?;
        // Block-quant tensor is rank 3 [G=1, M, K]; add a group dim to the axes
        let mut new_op = op.op.clone();
        new_op.axes = new_op.axes.with_extra_axis('G', InOut::In(slot), 0)?;
        let wire = patch.wire_node(prefix, new_op, &[weights[0], tap])?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}


================================================
FILE: core/src/ops/matmul/mod.rs
================================================
pub mod de_block_quant;
pub mod optimized;
pub mod pack;
pub mod quant;

use crate::internal::*;

pub fn output_type(input: DatumType) -> DatumType {
    if input.is_float() { input } else { i32::datum_type() }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub enum ModePicker {
    Single,
    VecVsMat,
}

impl ModePicker {
    #[inline]
    pub fn pick(&self, n: usize) -> TractResult<usize> {
        match self {
            ModePicker::Single => Ok(0),
            ModePicker::VecVsMat => Ok((n > 1) as usize),
        }
    }
}


================================================
FILE: core/src/ops/matmul/optimized.rs
================================================
use crate::internal::*;
use crate::ops::cast::{Cast, cast};
use crate::ops::change_axes::wire_with_rank_broadcast;
use crate::ops::nn::LeakyRelu;
use ndarray::*;
use tract_itertools::Itertools;

use tract_linalg::mmm::{
    AsInputValue, EagerPackedInput, FusedSpec, MatMatMul, OutputStoreSpec, PackedMatrixStorage,
    PanelExtractInput, PanelExtractor,
};
use tract_linalg::pack::PackedFormat;
use tract_linalg::{BinOp, Scaler};
use tract_smallvec::ToSmallVec;

use super::ModePicker;

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ProtoFusedSpec {
    AddMatMul {
        geo: AddMatMulGeometry,
        a: usize,
        b: usize,
        packings: Vec<(usize, Option<PanelExtractor>)>,
    },
    BinScalar(usize, BinOp),
    LeakyRelu(usize),
    BinPerRow(usize, BinOp, MapOutputAxisToInput),
    BinPerCol(usize, BinOp, MapOutputAxisToInput),
    AddRowColProducts(usize, usize),
    AddUnicast(OutputStoreSpec, usize, MapOutputAxisToInput),
    Scaler(Scaler),
    Store(Vec<OutputStoreSpec>),
}

impl ProtoFusedSpec {
    pub fn format(&self, mmm: &dyn MatMatMul, mode: usize) -> String {
        use ProtoFusedSpec::*;
        match self {
            AddMatMul { geo, packings: packing, .. } => {
                let (a, b) = &mmm.packings()[packing[mode].0];
                format!("matmul(k={}, {a:?}•{b:?})", geo.k)
            }
            BinScalar(_, op) => format!("scalar{op:?}"),
            LeakyRelu(alpha) => format!("leaky_relu({alpha:?})"),
            BinPerRow(_, op, _) => format!("row{op:?}"),
            BinPerCol(_, op, _) => format!("col{op:?}"),
            AddRowColProducts(_, _) => "add_row_col_product".to_string(),
            AddUnicast(_, _, _) => "add_to_matrix".to_string(),
            Scaler(s) => format!("scale({})", 1f32 * *s),
            Store(_oss) => "store".to_string(),
        }
    }

    pub fn resolve<'t>(
        &'t self,
        inputs: &'t [TValue],
        output_coords: &[usize],
        output: &Tensor,
        mmm: &dyn MatMatMul,
        mode: usize,
    ) -> FusedSpec<'t> {
        #[allow(clippy::let_and_return)]
        let fs = match self {
            ProtoFusedSpec::AddMatMul { geo, a, b, packings } => {
                let a_tensor = &inputs[*a];
                let a_storage = a_tensor.try_storage_as::<PackedMatrixStorage>().unwrap();
                let a_idx =
                    geo.c_to_a_axis_mapping.flat_index(output_coords, a_storage.batch_strides());
                let a = a_storage.value_at_flat(a_idx);

                let b_tensor = &inputs[*b];
                let b_storage = b_tensor.try_storage_as::<PackedMatrixStorage>().unwrap();
                let b_idx =
                    geo.c_to_b_axis_mapping.flat_index(output_coords, b_storage.batch_strides());
                let b = b_storage.value_at_flat(b_idx);

                let (_a_packing, b_packing) = &mmm.packings()[packings[mode].0];
                let pa = if let Some(extractor) = &packings[mode].1 {
                    let data = a.downcast_ref::<EagerPackedInput>().unwrap();
                    AsInputValue::Owned(Box::new(PanelExtractInput {
                        format: extractor.clone(),
                        data: data.clone(),
                    }))
                } else {
                    AsInputValue::Borrowed(a)
                };
                assert!(
                    b_packing.dyn_eq(b.format())
                        || (b_packing.is::<PackedFormat>() && b_packing.r() == b.format().r())
                );
                debug_assert!(pa.k().to_dim().compatible_with(&geo.k.to_dim()));
                debug_assert!(b.k().to_dim().compatible_with(&geo.k.to_dim()));
                FusedSpec::AddMatMul {
                    a: pa,
                    b: AsInputValue::Borrowed(b),
                    packing: packings[mode].0,
                }
            }
            ProtoFusedSpec::BinScalar(v, op) => FusedSpec::BinScalar(&inputs[*v], *op),
            ProtoFusedSpec::LeakyRelu(v) => FusedSpec::LeakyRelu(&inputs[*v]),
            ProtoFusedSpec::BinPerRow(v, op, map) => {
                let mut v = inputs[*v].view();
                unsafe { map.translate_view(output_coords, &mut v) }
                FusedSpec::BinPerRow(v, *op)
            }
            ProtoFusedSpec::BinPerCol(v, op, map) => {
                let mut v = inputs[*v].view();
                unsafe { map.translate_view(output_coords, &mut v) }
                FusedSpec::BinPerCol(v, *op)
            }
            ProtoFusedSpec::AddRowColProducts(row, col) => {
                FusedSpec::AddRowColProducts(&inputs[*row], &inputs[*col])
            }
            ProtoFusedSpec::AddUnicast(store, v, map) => unsafe {
                let mut view = inputs[*v].view();
                map.translate_view(output_coords, &mut view);
                FusedSpec::AddUnicast(store.wrap(&view))
            },
            ProtoFusedSpec::Scaler(scaler) => scaler.as_fused_spec(),
            ProtoFusedSpec::Store(oss) => unsafe {
                let view = output.view_offsetting_unchecked(output_coords);
                FusedSpec::Store(oss[mode].wrap(&view))
            },
        };
        fs
    }

    pub fn is_trivial(&self) -> bool {
        match self {
            ProtoFusedSpec::AddMatMul { geo, .. } => geo.k.as_i64().is_some(),
            _ => true,
        }
    }

    pub fn resolve_trivial<'t>(
        &'t self,
        inputs: &'t [TValue],
        output: &mut Tensor,
        _mmm: &dyn MatMatMul,
        mode: usize,
    ) -> FusedSpec<'t> {
        #[allow(clippy::let_and_return)]
        let fs = match self {
            ProtoFusedSpec::AddMatMul { a, b, packings, .. } => unsafe {
                debug_assert!(inputs.get(*a).is_some());
                debug_assert!(inputs.get(*b).is_some());
                let a = inputs.get_unchecked(*a);
                let b = inputs.get_unchecked(*b);
                debug_assert!(a.is_exotic());
                debug_assert!(a.len() == 1);
                debug_assert!(b.is_exotic());
                debug_assert!(b.len() == 1);
                let a_storage = a.try_storage_as::<PackedMatrixStorage>().unwrap_unchecked();
                let b_storage = b.try_storage_as::<PackedMatrixStorage>().unwrap_unchecked();
                let a = a_storage.value();
                let b = b_storage.value();
                debug_assert!(packings.len() == 1);
                debug_assert!(packings[0].1.is_none()); // no panel extraction
                #[cfg(debug_assertions)]
                {
                    let (a_packing, b_packing) = &_mmm.packings()[packings[mode].0];
                    debug_assert!(
                        a_packing.dyn_eq(a.format())
                            || (a_packing.is::<PackedFormat>() && a_packing.r() == a.format().r())
                    );
                    debug_assert!(
                        b_packing.dyn_eq(b.format())
                            || (b_packing.is::<PackedFormat>() && b_packing.r() == b.format().r())
                    );
                }
                FusedSpec::AddMatMul {
                    a: AsInputValue::Borrowed(a),
                    b: AsInputValue::Borrowed(b),
                    packing: packings[mode].0,
                }
            },
            ProtoFusedSpec::BinScalar(v, op) => FusedSpec::BinScalar(&inputs[*v], *op),
            ProtoFusedSpec::LeakyRelu(v) => FusedSpec::LeakyRelu(&inputs[*v]),
            ProtoFusedSpec::BinPerRow(v, op, _) => {
                let v = inputs[*v].view();
                FusedSpec::BinPerRow(v, *op)
            }
            ProtoFusedSpec::BinPerCol(v, op, _) => {
                let v = inputs[*v].view();
                FusedSpec::BinPerCol(v, *op)
            }
            ProtoFusedSpec::AddRowColProducts(row, col) => {
                FusedSpec::AddRowColProducts(&inputs[*row], &inputs[*col])
            }
            ProtoFusedSpec::AddUnicast(store, v, _) => unsafe {
                let view = inputs[*v].view();
                FusedSpec::AddUnicast(store.wrap(&view))
            },
            ProtoFusedSpec::Scaler(scaler) => scaler.as_fused_spec(),
            ProtoFusedSpec::Store(oss) => unsafe {
                FusedSpec::Store(oss[mode].wrap(&output.view_mut()))
            },
        };
        fs
    }

    fn check_inputs(&self, inputs: &[&TypedFact]) -> TractResult<()> {
        use ProtoFusedSpec::*;
        match self {
            AddMatMul { a, b, .. } => {
                ensure!(inputs[*a].is_exotic());
                ensure!(inputs[*b].is_exotic());
            }
            BinScalar(v, _)
            | LeakyRelu(v)
            | BinPerCol(v, _, _)
            | BinPerRow(v, _, _)
            | AddUnicast(_, v, _) => {
                ensure!(inputs[*v].datum_type.is_number());
            }
            AddRowColProducts(row, col) => {
                ensure!(inputs[*row].datum_type.is_number());
                ensure!(inputs[*col].datum_type.is_number());
            }
            _ => (),
        };
        Ok(())
    }

    fn cost(&self, m: &TDim, n: &TDim, idt: DatumType) -> TVec<(Cost, TDim)> {
        match self {
            ProtoFusedSpec::AddMatMul { geo, .. } => {
                tvec!((Cost::FMA(idt), m.clone() * n * &geo.k))
            }
            _ => tvec!(), /* FIXME maybe */
        }
    }

    fn rm_c_axis(&mut self, axis: usize) {
        use ProtoFusedSpec::*;
        match self {
            AddMatMul { geo, .. } => {
                geo.c_to_a_axis_mapping.rm_c_axis(axis);
                geo.c_to_b_axis_mapping.rm_c_axis(axis);
            }
            BinScalar(..) | Scaler(..) | AddRowColProducts(_, _) | LeakyRelu(_) => {}
            BinPerRow(_, _, map) | BinPerCol(_, _, map) => map.rm_c_axis(axis),
            AddUnicast(_, _, map) => {
                map.rm_c_axis(axis);
            }
            Store(oss, ..) => {
                for oss in oss {
                    match oss {
                        OutputStoreSpec::View { m_axis, n_axis, .. } => {
                            if let Some(m) = m_axis {
                                *m -= (*m > axis) as usize
                            };
                            if let Some(n) = n_axis {
                                *n -= (*n > axis) as usize
                            }
                        }
                        OutputStoreSpec::Strides { .. } => {}
                    }
                }
            }
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MapOutputAxisToInput(pub TVec<(usize, usize)>);

impl MapOutputAxisToInput {
    #[inline]
    unsafe fn translate_view(&self, output_coords: &[usize], v: &mut TensorView) {
        for &(out_axis, in_axis) in &self.0 {
            unsafe { v.offset_axis(in_axis, output_coords[out_axis] as isize) }
        }
    }

    #[inline]
    fn rm_c_axis(&mut self, axis: usize) {
        for (c, _) in &mut self.0 {
            *c -= (*c > axis) as usize;
        }
    }

    /// Compute a flat index into a PackedMatrixStorage from output coordinates and batch strides.
    #[inline]
    pub fn flat_index(&self, output_coords: &[usize], batch_strides: &[isize]) -> usize {
        self.0
            .iter()
            .map(|&(out_axis, in_axis)| output_coords[out_axis] * batch_strides[in_axis] as usize)
            .sum()
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AddMatMulGeometry {
    pub k: TDim,
    pub c_to_a_axis_mapping: MapOutputAxisToInput,
    pub c_to_b_axis_mapping: MapOutputAxisToInput,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct OptMatMul {
    pub c_fact: TypedFact,
    pub micro_ops: Vec<ProtoFusedSpec>,
    pub mmm: Vec<Box<dyn MatMatMul>>,
    pub mode_picker: ModePicker,
    pub c_m_axis: Option<usize>,
    pub c_n_axis: Option<usize>,
    pub trivial_packing: bool,
    pub trivial_path: bool,
}

impl Op for OptMatMul {
    fn name(&self) -> StaticName {
        "OptMatMul".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let m = self.c_m_axis.map(|ix| &self.c_fact.shape[ix]).unwrap_or(&TDim::Val(1));
        let n = self.c_n_axis.map(|ix| &self.c_fact.shape[ix]).unwrap_or(&TDim::Val(1));
        let mut infos = vec![format!(
            "c_shape:{:?}, c_m_axis:{:?} c_n_axis:{:?} m:{} n:{}",
            self.c_fact, self.c_m_axis, self.c_n_axis, m, n,
        )];
        if let Some(k) = self.guess_k() {
            infos.push(format!("Mult: m:{} k:{} n:{} with {:?}", m, k, n, self.mmm));
        } else {
            infos.push(format!("Mult: {:?}", self.mmm));
        }
        for (mode, mmm) in self.mmm.iter().enumerate() {
            infos.push(format!(
                "Ops: {}",
                self.micro_ops.iter().map(|o| o.format(&**mmm, mode)).join(" >>> ")
            ));
        }
        Ok(infos)
    }

    op_as_typed_op!();
}

impl EvalOp for OptMatMul {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        unsafe {
            let c_shape = self.c_fact.shape.eval_to_usize(&session.resolved_symbols)?;
            let mut c = Tensor::uninitialized_dt(self.c_fact.datum_type, &c_shape)?;
            let m = self.c_m_axis.map(|c_m| c.shape()[c_m]).unwrap_or(1);
            let n = self.c_n_axis.map(|c_n| c.shape()[c_n]).unwrap_or(1);
            let mode = self.mode_picker.pick(n)?;
            let mmm = &*self.mmm[mode];
            let mut cell = session.cached_mmm_scratch_space.borrow_mut();
            if !cell.as_ref().is_some_and(|scratch| mmm.can_use_scratch_space(&**scratch)) {
                *cell = None
            }
            let scratch = cell.get_or_insert_with(|| mmm.allocate_scratch_space());
            if self.trivial_path {
                let uops: Vec<FusedSpec> = self
                    .micro_ops
                    .iter()
                    .map(|o| o.resolve_trivial(&inputs, &mut c, mmm, mode))
                    .collect();
                mmm.run_with_scratch_space(m, n, scratch.as_mut(), &uops)?;
                Ok(tvec!(c.into_tvalue()))
            } else {
                let mut uops = vec![FusedSpec::ShiftLeft(0); self.micro_ops.len()];
                let mut looping_shape: TVec<usize> = c_shape.to_smallvec();
                if let Some(ax) = self.c_m_axis {
                    looping_shape[ax] = 1;
                }
                if let Some(ax) = self.c_n_axis {
                    looping_shape[ax] = 1;
                }
                for c_coords in indices(&*looping_shape) {
                    for ix in 0..self.micro_ops.len() {
                        *uops.get_unchecked_mut(ix) = self.micro_ops.get_unchecked(ix).resolve(
                            &inputs,
                            c_coords.slice(),
                            &c,
                            mmm,
                            mode,
                        );
                    }
                    mmm.run_with_scratch_space(m, n, scratch.as_mut(), &uops)
                        .context("In mmm.run_with_scratch_space")?;
                }
                Ok(tvec!(c.into_tvalue()))
            }
        }
    }
}

impl TypedOp for OptMatMul {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(self.c_m_axis.map(|ax| ax < self.c_fact.rank()).unwrap_or(true));
        ensure!(self.c_n_axis.map(|ax| ax < self.c_fact.rank()).unwrap_or(true));
        ensure!(self.trivial_path == self.can_use_trivial_path());
        ensure!(self.mmm.iter().map(|mmm| mmm.internal_type()).all_equal());
        for op in &self.micro_ops {
            op.check_inputs(inputs)?;
        }
        Ok(tvec!(self.c_fact.clone()))
    }

    fn cost(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let mut sums = HashMap::new();
        for op in &self.micro_ops {
            for (cost, count) in op.cost(self.m(), self.n(), self.mmm[0].internal_type()) {
                *sums.entry(cost).or_default() += count;
            }
        }
        let loops = self
            .c_fact
            .shape
            .iter()
            .enumerate()
            .map(|(ix, d)| {
                if Some(ix) == self.c_m_axis || Some(ix) == self.c_n_axis {
                    1.to_dim()
                } else {
                    d.clone()
                }
            })
            .product::<TDim>();
        for s in &mut sums.values_mut() {
            *s *= &loops;
        }
        Ok(sums.into_iter().collect())
    }

    fn fuse(&self, model: &TypedModel, node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
        use crate::ops;
        rule_if!(node.outputs.len() == 1);
        rule_if!(node.outputs[0].successors.len() == 1);
        rule_if!(!model.output_outlets()?.contains(&node.id.into()));
        let succ = model.node(node.outputs[0].successors[0].node);
        let mut patch = TypedModelPatch::new(format!("fusing {succ}"));

        if let Some(op) = succ.op_as::<ops::binary::TypedBinOp>() {
            rule_if_some!(mut binop = op.0.as_linalg_binop());
            let flipped = succ.inputs[0].node == node.id;
            if flipped {
                binop = binop.flip();
            }
            let other_outlet = succ.inputs[flipped as usize];
            return self.fuse_binary(model, node, patch, other_outlet, binop);
        }
        if let Some(op) = succ.op_as::<ops::binary::OptBinByScalar>() {
            rule_if_some!(mut binop = op.binop.as_linalg_binop());
            let flipped = succ.inputs[0].node == node.id;
            if flipped {
                binop = binop.flip();
            }
            let other_outlet = succ.inputs[flipped as usize];
            return self.fuse_binary(model, node, patch, other_outlet, binop);
        }

        if let Some(op) = succ.op_as::<ops::element_wise::ElementWiseOp>().map(|ew| ew.0.as_ref()) {
            if let Some(op) = op.downcast_ref::<ops::math::QScale>() {
                return self.fuse_op(
                    model,
                    node,
                    patch,
                    vec![ProtoFusedSpec::Scaler(op.scaler)],
                    &[],
                );
            }
            if let Some(op) = op.downcast_ref::<LeakyRelu>() {
                rule_if!(
                    self.mmm
                        .iter()
                        .all(|mmm| mmm.can_fuse(&FusedSpec::LeakyRelu(&tensor0(op.alpha))))
                );
                let alpha = patch.add_const(
                    node.name.to_string() + ".alpha",
                    tensor0(op.alpha).cast_to_dt(self.mmm[0].internal_type())?.into_owned(),
                )?;
                return self.fuse_op(
                    model,
                    node,
                    patch,
                    vec![ProtoFusedSpec::LeakyRelu(node.inputs.len())],
                    &[alpha],
                );
            }
        }
        if let Some(cast_to) = succ.op_as::<ops::cast::Cast>().map(|cast| cast.to)
            && (((cast_to.unquantized() == i8::datum_type()
                || cast_to.unquantized() == u8::datum_type())
                && self.c_fact.datum_type == i32::datum_type())
                || self.mmm.iter().all(|m| m.stores().contains(&cast_to)))
            && let Some(ProtoFusedSpec::Store(stores)) = self.micro_ops.last()
        {
            if stores.iter().any(|s| matches!(s, OutputStoreSpec::Strides { .. })) {
                return Ok(None);
            }
            let c_fact = cast_to.fact(self.c_fact.shape.clone());
            let mut patch =
                TypedModelPatch::fuse_with_next(model, node, Self { c_fact, ..self.clone() })?;
            patch.dont_apply_twice = Some(format!("Fuse {succ} into {node}"));
            return Ok(Some(patch));
        }
        if let Some(AxisOp::Rm(axis)) = succ.op_as::<ops::AxisOp>() {
            rule_if!(Some(*axis) != self.c_m_axis);
            rule_if!(Some(*axis) != self.c_n_axis);
            let mut new_op = self.clone();
            new_op.c_fact.shape.remove_axis(*axis)?;
            if let Some(c_m_axis) = &mut new_op.c_m_axis {
                *c_m_axis -= (*c_m_axis > *axis) as usize;
            }
            if let Some(c_n_axis) = &mut new_op.c_n_axis {
                *c_n_axis -= (*c_n_axis > *axis) as usize;
            }
            for uop in &mut new_op.micro_ops {
                uop.rm_c_axis(*axis);
            }
            let mut patch = TypedModelPatch::fuse_with_next(model, node, new_op)?;
            patch.dont_apply_twice = Some(format!("Fuse {succ} into {node}"));
            return Ok(Some(patch));
        }
        if (succ.op_is::<AxisOp>() || succ.op_is::<IntoShape>())
            && let &[next] = &*succ.outputs[0].successors
        {
            let next_node = model.node(next.node);
            if let Some(cast) = next_node.op_as::<Cast>() {
                let mut patch = TypedModelPatch::default();
                let mut wire = patch.tap_model(model, node.id.into())?;
                wire = patch.wire_node(&next_node.name, cast.clone(), &[wire])?[0];
                wire = patch.wire_node(&succ.name, succ.op.clone(), &[wire])?[0];
                patch.shunt_outside(model, next_node.id.into(), wire)?;
                return Ok(Some(patch));
            } else if let Some(op) = next_node.op_as::<ops::binary::TypedBinOp>() {
                rule_if!(op.0.as_linalg_binop().is_some());
                let flipped = succ.inputs[0].node == node.id;
                let other_outlet = next_node.inputs[flipped as usize];
                if let Some(uni) = &model.outlet_fact(other_outlet)?.uniform {
                    let mut patch = TypedModelPatch::default();
                    let cst = patch.add_const(&model.node(other_outlet.node).name, uni.clone())?;
                    let output = patch.tap_model(model, node.id.into())?;
                    let wire = wire_with_rank_broadcast(
                        &next_node.name,
                        &mut patch,
                        op.clone(),
                        &if flipped { [output, cst] } else { [cst, output] },
                    )?;
                    let wire = patch.wire_node(&succ.name, succ.op.clone(), &wire)?[0];
                    patch.shunt_outside(model, next_node.id.into(), wire)?;
                    return Ok(Some(patch));
                }
            }
        }
        if let Some(op) = succ.op_as::<ops::binary::OptBinUnicast>() {
            let in_1_fact = model.outlet_fact(succ.inputs[0])?;
            let in_2_fact = model.outlet_fact(succ.inputs[1])?;
            if op.binop.is::<ops::math::Add>()
                && self.mmm.len() == 1
                && in_1_fact.without_value() == in_2_fact.without_value()
            {
                let other_slot = 1 - node.outputs[0].successors[0].slot;
                let other_input = succ.inputs[other_slot];
                let other_input = patch.tap_model(model, other_input)?;
                let other_fact = patch.outlet_fact(other_input)?;

                if other_fact.shape == self.c_fact.shape {
                    let other_storage = unsafe { self.mmm[0].c_view(self.c_m_axis, self.c_n_axis) };
                    let mapping =
                        MapOutputAxisToInput((0..other_fact.rank()).map(|x| (x, x)).collect());
                    return self.fuse_op(
                        model,
                        node,
                        patch,
                        vec![ProtoFusedSpec::AddUnicast(other_storage, node.inputs.len(), mapping)],
                        &[other_input],
                    );
                }
            } else {
                rule_if_some!(mut binop = op.binop.as_linalg_binop());
                let flipped = succ.inputs[0].node == node.id;
                if flipped {
                    binop = binop.flip();
                }
                let other_outlet = succ.inputs[flipped as usize];
                return self.fuse_binary(model, node, patch, other_outlet, binop);
            }
        };
        Ok(None)
    }

    as_op!();
}

impl OptMatMul {
    pub fn new(
        mmm: Vec<Box<dyn MatMatMul>>,
        mode_picker: ModePicker,
        c_fact: TypedFact,
        c_m_axis: Option<usize>,
        c_n_axis: Option<usize>,
        micro_ops: Vec<ProtoFusedSpec>,
        trivial_packing: bool,
    ) -> TractResult<Self> {
        if let Some(m) = c_m_axis {
            ensure!(m < c_fact.rank());
        }
        if let Some(n) = c_n_axis {
            ensure!(n < c_fact.rank());
        }
        let mut it = OptMatMul {
            mmm,
            mode_picker,
            c_fact,
            c_m_axis,
            c_n_axis,
            micro_ops,
            trivial_path: false,
            trivial_packing,
        };
        it.update_trivial_path();
        Ok(it)
    }

    // for auditing only (may return None if no AddMatMul is found)
    pub fn guess_k(&self) -> Option<TDim> {
        self.micro_ops
            .iter()
            .find_map(
                |o| {
                    if let ProtoFusedSpec::AddMatMul { geo, .. } = o { Some(geo) } else { None }
                },
            )
            .map(|geo| geo.k.clone())
    }

    #[inline]
    pub fn m(&self) -> &TDim {
        self.c_m_axis.map(|ax| &self.c_fact.shape[ax]).unwrap_or(&TDim::Val(1))
    }

    #[inline]
    pub fn n(&self) -> &TDim {
        self.c_n_axis.map(|ax| &self.c_fact.shape[ax]).unwrap_or(&TDim::Val(1))
    }

    fn update_trivial_path(&mut self) {
        self.trivial_path = self.can_use_trivial_path();
    }

    fn can_use_trivial_path(&self) -> bool {
        self.c_fact.shape.is_concrete()
            && self.c_fact.shape.iter().enumerate().all(|(ax, dim)| {
                Some(ax) == self.c_m_axis || Some(ax) == self.c_n_axis || dim.is_one()
            })
            && self.trivial_packing
            && self.micro_ops.iter().all(|o| o.is_trivial())
    }

    fn fuse_op(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        mut patch: TypedModelPatch,
        fused_micro_op: Vec<ProtoFusedSpec>,
        additional_inputs: &[OutletId],
    ) -> TractResult<Option<TypedModelPatch>> {
        let succ = model.node(node.outputs[0].successors[0].node);
        let mut new_op = self.clone();
        let before_last = new_op.micro_ops.len() - 1..new_op.micro_ops.len() - 1;
        new_op.micro_ops.splice(before_last, fused_micro_op);
        new_op.c_fact = succ.outputs[0].fact.clone();
        new_op.update_trivial_path();
        let mut inputs = patch.taps(model, &node.inputs)?;
        inputs.extend(additional_inputs.iter().cloned());
        let output = patch.wire_node(&succ.name, new_op, &inputs)?;
        patch.shunt_outside(model, succ.id.into(), output[0])?;
        Ok(Some(patch))
    }

    fn fuse_binary(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        mut patch: TypedModelPatch,
        value: OutletId,
        binop: BinOp,
    ) -> TractResult<Option<TypedModelPatch>> {
        let fact = model.outlet_fact(value)?;
        let mut v = patch.tap_model(model, value)?;
        if fact.datum_type != self.mmm[0].internal_type() {
            v = patch.wire_node(
                format!("{}.cast-input-{}", node.name, node.inputs.len()),
                cast(self.mmm[0].internal_type()),
                &[v],
            )?[0];
        }
        let value = node.inputs.len();
        let additional_input = tvec!(v);
        if fact.shape.volume() == 1.to_dim() {
            return self.fuse_op(
                model,
                node,
                patch,
                vec![ProtoFusedSpec::BinScalar(value, binop)],
                &additional_input,
            );
        }
        let other_shape = fact.shape.to_owned();
        if self.c_m_axis.is_some_and(|ax| {
            other_shape[ax] == self.c_fact.shape[ax] && other_shape[ax] == other_shape.volume()
        }) {
            return self.fuse_op(
                model,
                node,
                patch,
                vec![ProtoFusedSpec::BinPerRow(
                    value,
                    binop,
                    MapOutputAxisToInput(tvec!((self.c_m_axis.unwrap(), self.c_m_axis.unwrap()))),
                )],
                &additional_input,
            );
        }
        if self.c_n_axis.is_some_and(|ax| {
            other_shape[ax] == self.c_fact.shape[ax] && other_shape[ax] == other_shape.volume()
        }) {
            return self.fuse_op(
                model,
                node,
                patch,
                vec![ProtoFusedSpec::BinPerCol(
                    value,
                    binop,
                    MapOutputAxisToInput(tvec!((self.c_n_axis.unwrap(), self.c_n_axis.unwrap()))),
                )],
                &additional_input,
            );
        }
        Ok(None)
    }
}


================================================
FILE: core/src/ops/matmul/pack.rs
================================================
use crate::axes::Axis;
use crate::internal::*;
use ndarray::*;
use tract_linalg::block_quant::{
    BlockQuantStorage, PackedBlockQuantFact, PackedBlockQuantFormat, block_quant_slice,
};
use tract_linalg::mmm::{MMMInputValue, PackedMatrixStorage};
use tract_linalg::pack::PackedFormat;

use super::ModePicker;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct OptMatMulPack {
    pub(crate) packers: Vec<PackedFormat>,
    pub(crate) mode_picker: ModePicker,
    pub(crate) k_axis: usize,
    pub(crate) mn_axis: usize,
}

impl Op for OptMatMulPack {
    fn name(&self) -> StaticName {
        "OptMatMulPack".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("{:?}. k axis: {}, mn axis: {}", self.packers, self.k_axis, self.mn_axis)])
    }

    op_as_typed_op!();
}

impl EvalOp for OptMatMulPack {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        _node_id: usize,
        session: &TurnState,
        mut inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        self.do_eval(session, inputs.remove(0))
    }
}

impl TypedOp for OptMatMulPack {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        match self.mode_picker {
            ModePicker::Single => ensure!(self.packers.len() == 1),
            ModePicker::VecVsMat => ensure!(self.packers.len() == 2),
        }
        let k = inputs[0].shape[self.k_axis].clone();
        let mn = inputs[0].shape[self.mn_axis].clone();
        let exotic_fact = DynPackedExoticFact { k, mn, packers: self.packers.clone() };
        Ok(tvec!(
            inputs[0]
                .datum_type
                .fact(self.output_shape(&inputs[0].shape))
                .with_exotic_fact(exotic_fact)
        ))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut axes: Vec<Axis> = (0..inputs[0].rank())
            .filter(|&ix| ix != self.k_axis && ix != self.mn_axis)
            .enumerate()
            .zip('a'..)
            .map(|((o, i), repr)| Axis::new(repr, 1, 1).input(0, i).output(0, o))
            .collect();
        axes.push(Axis::new('K', 1, 1).input(0, self.k_axis));
        axes.push(Axis::new('M', 1, 1).input(0, self.mn_axis));
        axes.push(Axis::new('P', 1, 1).output(0, outputs[0].rank()));
        AxesMapping::new(1, 1, axes)
    }

    as_op!();
}

impl OptMatMulPack {
    fn do_eval(&self, _session: &TurnState, input: TValue) -> TractResult<TVec<TValue>> {
        unsafe {
            let mode = self.mode_picker.pick(input.shape()[self.mn_axis])?;
            let packer = &self.packers[mode];
            let output_shape: TVec<usize> = self.output_shape(input.shape());
            let stores = if output_shape.iter().all(|d| *d == 1) {
                let packed = packer.pack_tensor_view(&input.view(), self.k_axis, self.mn_axis)?;
                PackedMatrixStorage::new_batched(&output_shape, vec![packed])
                    .into_tensor(input.datum_type())
            } else {
                let mut bc_shape: TVec<usize> = input.shape().into();
                bc_shape[self.k_axis] = 1;
                bc_shape[self.mn_axis] = 1;

                let mut values: Vec<Box<dyn MMMInputValue>> =
                    Vec::with_capacity(output_shape.iter().product());
                for coord in indices(&*bc_shape) {
                    let offset = coord
                        .as_array_view()
                        .iter()
                        .zip(input.strides())
                        .map(|(x, s)| *x as isize * s)
                        .sum::<isize>()
                        * input.datum_type().size_of() as isize;
                    values.push(packer.pack_tensor_view(
                        &TensorView::from_bytes(&input, offset, input.shape(), input.strides()),
                        self.k_axis,
                        self.mn_axis,
                    )?);
                }
                PackedMatrixStorage::new_batched(&output_shape, values)
                    .into_tensor(input.datum_type())
            };
            Ok(tvec!(stores.into_tvalue()))
        }
    }

    pub fn output_shape<D: DimLike>(&self, input: &[D]) -> TVec<D> {
        let mut packed_shape: TVec<D> = input.into();
        packed_shape.remove(self.mn_axis.max(self.k_axis));
        packed_shape.remove(self.mn_axis.min(self.k_axis));
        packed_shape
    }
}

#[derive(Hash, Clone, Debug, PartialEq, Eq)]
pub struct DynPackedExoticFact {
    pub k: TDim,
    pub mn: TDim,
    pub packers: Vec<PackedFormat>,
}

impl ExoticFact for DynPackedExoticFact {
    fn buffer_sizes(&self) -> TVec<TDim> {
        tvec!(self.k.clone() * &self.mn * self.packers[0].dt.size_of())
    }
}

#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct OptSimpleMatMulPack {
    pub(crate) packed_format: PackedBlockQuantFormat,
    pub(crate) k: usize,
    pub(crate) m: usize,
}

impl Op for OptSimpleMatMulPack {
    fn name(&self) -> StaticName {
        "OptSimpleMatMulPack".into()
    }
    op_as_typed_op!();
}

impl EvalOp for OptSimpleMatMulPack {
    fn is_stateless(&self) -> bool {
        true
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let bqs = input.try_storage_as::<BlockQuantStorage>()?;
        // Leading dims before the last 2 (M, K) are batch/group dims
        let num_groups: usize = input.shape()[..input.rank().saturating_sub(2)].iter().product();
        let m_per_group = input.shape()[input.rank() - 2];
        let k = *input.shape().last().unwrap();
        let values = (0..num_groups)
            .map(|g| {
                let slice = block_quant_slice(bqs.value(), bqs.format(), m_per_group, k, g);
                let iv: Box<dyn MMMInputValue> = Box::new(self.packed_format.pack(slice, k)?);
                Ok(iv)
            })
            .collect::<TractResult<Vec<_>>>()?;
        let leading_shape = &input.shape()[..input.rank().saturating_sub(2)];
        let output =
            PackedMatrixStorage::new_batched(leading_shape, values).into_tensor(input.datum_type());
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for OptSimpleMatMulPack {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let input = inputs[0];
        // Input shape is [G, M, K] — output removes M and K, keeping leading dims
        let output_shape: TVec<TDim> = if input.rank() > 2 {
            input.shape[..input.rank() - 2].to_vec().into()
        } else {
            tvec!()
        };
        let fact =
            inputs[0].datum_type.fact(&*output_shape).with_exotic_fact(PackedBlockQuantFact {
                format: self.packed_format.clone(),
                shape: tvec!(self.m, self.k),
            });
        Ok(tvec!(fact))
    }

    as_op!();
}


================================================
FILE: core/src/ops/matmul/quant.rs
================================================
use anyhow::ensure;
use ops::change_axes::wire_with_rank_broadcast;

use crate::internal::*;
use crate::ops;
use crate::ops::cast::cast;

/// Wires the offsetting of a matrix and zero point node.
///
/// Only wires nodes of u8 type and leaves nodes of different type untouched.
pub fn wire_ensure_q8_flavour(
    model: &mut TypedModel,
    prefix: &str,
    input: &mut OutletId,
    input_name: &str,
    zero_point: &mut OutletId,
    wanted_raw_dt: DatumType,
) -> TractResult<()> {
    ensure!(wanted_raw_dt.qparams().is_none());
    ensure!(wanted_raw_dt.size_of() == 1);
    let current = model.outlet_fact(*input)?.datum_type.unquantized();
    ensure!(current.size_of() == 1);
    ensure!(wanted_raw_dt.size_of() == 1);
    if model.outlet_fact(*zero_point)?.datum_type != i32::datum_type() {
        *zero_point = model.wire_node(
            format!("{prefix}.{input_name}_zp.cast"),
            cast(i32::datum_type()),
            &[*zero_point],
        )?[0];
    }
    if current == wanted_raw_dt {
        return Ok(());
    }
    let zp_rank = model.outlet_fact(*zero_point)?.rank();
    let i32_128 = model.add_const(
        format!("{prefix}.{input_name}.128"),
        tensor0(128i32).broadcast_into_rank(zp_rank)?,
    )?;
    if current.unquantized().is_signed() {
        *zero_point = model.wire_node(
            format!("{prefix}.offset_{input_name}_zp_as_u8"),
            ops::math::add(),
            &[*zero_point, i32_128],
        )?[0];
        *input = model.wire_node(
            format!("{prefix}.offset_{input_name}_as_u8"),
            ops::quant::offset_i8_as_u8(),
            &[*input],
        )?[0];
    } else {
        *zero_point = model.wire_node(
            format!("{prefix}.offset_{input_name}_zp_as_i8"),
            ops::math::sub(),
            &[*zero_point, i32_128],
        )?[0];
        *input = model.wire_node(
            format!("{prefix}.offset_{input_name}_as_i8"),
            ops::quant::offset_u8_as_i8(),
            &[*input],
        )?[0];
    }
    Ok(())
}

pub(crate) fn combine_scales(
    model: &mut TypedModel,
    name: &str,
    a_scale: OutletId,
    b_scale: OutletId,
    c_scale: OutletId,
) -> TractResult<OutletId> {
    let ab_scale = wire_with_rank_broadcast(
        format!("{name}.ab_scale"),
        model,
        ops::math::mul(),
        &[a_scale, b_scale],
    )?[0];
    let abc_scale = wire_with_rank_broadcast(
        format!("{name}.abc_scales"),
        model,
        ops::math::div(),
        &[ab_scale, c_scale],
    )?[0];
    Ok(abc_scale)
}

#[allow(clippy::too_many_arguments)]
pub(crate) fn compensate_zero_points(
    model: &mut TypedModel,
    name: &str,
    result: OutletId,
    k: TDim,
    a0: OutletId,
    b0: OutletId,
    sum_a: OutletId,
    sum_b: OutletId,
) -> TractResult<OutletId> {
    let output_rank = model.outlet_fact(result)?.rank();
    ensure!(model.outlet_fact(sum_a)?.rank() == output_rank);
    ensure!(model.outlet_fact(sum_b)?.rank() == output_rank);

    let a0 =
        model.wire_node(format!("{name}.cast_a0"), ops::cast::cast(i32::datum_type()), &[a0])?[0];

    let b0 =
        model.wire_node(format!("{name}.cast_b0"), ops::cast::cast(i32::datum_type()), &[b0])?[0];

    let k = model.add_const(format!("{name}.k"), rctensor0(k))?;
    let k = model.wire_node(format!("{name}.cast_k"), ops::cast::cast(i32::datum_type()), &[k])?[0];

    let a0_sum_b = wire_with_rank_broadcast(
        format!("{name}.a0_sum_b"),
        model,
        ops::math::mul(),
        &[a0, sum_b],
    )?[0];

    let b0_sum_a = wire_with_rank_broadcast(
        format!("{name}.b0_sum_a"),
        model,
        ops::math::mul(),
        &[b0, sum_a],
    )?[0];

    let a0_k =
        wire_with_rank_broadcast(format!("{name}.a0_k"), model, ops::math::mul(), &[a0, k])?[0];

    let a0_k_b0 =
        wire_with_rank_broadcast(format!("{name}.a0_k_b0"), model, ops::math::mul(), &[a0_k, b0])?
            [0];

    let result = wire_with_rank_broadcast(
        format!("{}.minus_a0_B", &name),
        model,
        ops::math::sub(),
        &[result, a0_sum_b],
    )?[0];
    let result = wire_with_rank_broadcast(
        format!("{}.minus_b0_A", &name),
        model,
        ops::math::sub(),
        &[result, b0_sum_a],
    )?[0];

    let result = wire_with_rank_broadcast(
        format!("{}.plus_a0_k_b0", &name),
        model,
        ops::math::add(),
        &[result, a0_k_b0],
    )?[0];

    Ok(result)
}

pub(crate) fn requant(
    model: &mut TypedModel,
    name: &str,
    wire: OutletId,
    dt: DatumType,
    scale: OutletId,
    zero_point: OutletId,
) -> TractResult<OutletId> {
    let wire = wire_with_rank_broadcast(
        format!("{name}.scale"),
        model,
        ops::quant::scale(),
        &[scale, wire],
    )?[0];

    let zero_point = model.wire_node(
        format!("{name}.cast_c0"),
        ops::cast::cast(i32::datum_type()),
        &[zero_point],
    )?[0];

    let wire = wire_with_rank_broadcast(
        format!("{name}.zeropoint"),
        model,
        ops::math::add(),
        &[wire, zero_point],
    )?[0];

    clamp_and_cast_to(model, name, dt, wire)
}

pub(crate) fn clamp_and_cast_to(
    model: &mut TypedModel,
    name: &str,
    dt: DatumType,
    wire: OutletId,
) -> TractResult<OutletId> {
    if dt == i32::datum_type() {
        return Ok(wire);
    }
    let rank = model.outlet_fact(wire)?.rank();
    let inf = dt
        .unquantized()
        .min_value()
        .cast_to_dt(DatumType::I32)?
        .into_owned()
        .broadcast_into_rank(rank)?
        .into_arc_tensor();
    let inf = model.add_const(format!("{name}.min.const"), inf)?;
    let sup = dt
        .unquantized()
        .max_value()
        .cast_to_dt(DatumType::I32)?
        .into_owned()
        .broadcast_into_rank(rank)?
        .into_arc_tensor();
    let sup = model.add_const(format!("{name}.max.const"), sup)?;
    let wire = model.wire_node(format!("{name}.min"), ops::math::min(), &[wire, sup])?;
    let wire = model.wire_node(format!("{name}.max"), ops::math::max(), &[wire[0], inf])?;
    let wire = model.wire_node(format!("{name}.cast"), ops::cast::cast(dt), &wire)?;
    Ok(wire[0])
}

#[cfg(test)]
mod test {
    use super::*;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_ndarray::prelude::*;

    proptest! {
        #[test]
        fn prop_i8_i8_i8(pb in any::<QMatMulProblemI8I8I8>()) {
            pb.check();
        }

        #[test]
        fn prop_i8_i8_u8(pb in any::<QMatMulProblemI8I8U8>()) {
            pb.check();
        }

        #[test]
        fn prop_i8_u8_i8(pb in any::<QMatMulProblemI8U8I8>()) {
            pb.check();
        }

        #[test]
        fn prop_u8_i8_i8(pb in any::<QMatMulProblemU8I8I8>()) {
            pb.check();
        }

        #[test]
        fn prop_i8_u8_u8(pb in any::<QMatMulProblemI8U8U8>()) {
            pb.check();
        }

        #[test]
        fn prop_u8_i8_u8(pb in any::<QMatMulProblemU8I8U8>()) {
            pb.check();
        }

        #[test]
        fn prop_u8_u8_i8(pb in any::<QMatMulProblemU8U8I8>()) {
            pb.check();
        }

        #[test]
        fn prop_u8_u8_u8(pb in any::<QMatMulProblemU8U8U8>()) {
            pb.check();
        }
    }

    #[test]
    fn c0() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[0]]),
            b: arr2(&[[0]]),
            bias: tensor0(0i32),
            a0: 0,
            b0: 0,
            c0: 1,
            a_scale: 1.0,
            b_scale: 1.0,
            c_scale: 1.0,
        }
        .check();
    }

    #[test]
    fn b_scale() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[0]]),
            b: arr2(&[[0]]),
            bias: tensor0(0i32),
            a0: 0,
            b0: 0,
            c0: 1,
            a_scale: 1.0,
            b_scale: 2.0,
            c_scale: 1.0,
        }
        .check();
    }

    #[test]
    fn sat() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[0]]),
            b: arr2(&[[34]]),
            bias: tensor0(0i32),
            a0: -17,
            b0: 1,
            c0: 0,
            a_scale: 1.0,
            b_scale: 0.05,
            c_scale: 0.25,
        }
        .check();
    }

    #[test]
    fn rounding() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[26]]),
            b: arr2(&[[0]]),
            bias: tensor0(0i32),
            a0: 27,
            b0: -1,
            c0: 1,
            a_scale: 1.0,
            b_scale: 0.05,
            c_scale: 1.0,
        }
        .check();
    }

    #[test]
    fn neg_rounding() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[-23]]),
            b: arr2(&[[-2]]),
            bias: tensor0(0i32),
            a0: -11,
            b0: -45,
            c0: 0,
            a_scale: 0.1,
            b_scale: 1.0,
            c_scale: 1.0,
        }
        .check();
    }

    #[test]
    fn rounding_ties_2() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[47], [0]]),
            b: arr2(&[[1, 0, 30]]),
            bias: tensor0(0i32),
            a0: 86,
            b0: 19,
            c0: 0,
            a_scale: 0.1,
            b_scale: 1.0,
            c_scale: 0.6,
        }
        .check();
    }

    #[test]
    fn rounding_ties_3() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[-30]]),
            b: arr2(&[[0, 107, 0]]),
            bias: tensor0(0i32),
            a0: -59,
            b0: 117,
            c0: 0,
            a_scale: 1.0,
            b_scale: 0.15,
            c_scale: 0.6,
        }
        .check();
    }

    #[test]
    fn onnx_test_matmulinteger() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[11, 7, 3], [10, 6, 2], [9, 5, 1], [8, 4, 0]]),
            b: arr2(&[[1, 4], [2, 5], [3, 6]]),
            bias: tensor0(0i32),
            a0: 12,
            b0: 0,
            c0: 0,
            a_scale: 1.0,
            b_scale: 1.0,
            c_scale: 1.0,
        }
        .check();
    }

    fn round_ties_to_right(x: f32) -> i32 {
        (x + 0.5).floor() as i32
    }

    fn scale() -> BoxedStrategy<f32> {
        prop_oneof![Just(1.0), (1i32..=20).prop_map(|x| x as f32 / 20.0)].boxed()
    }

    macro_rules! impl_qmmp {
        ($name:ident, $a:ty, $b:ty, $c:ty $(,)?) => {
            #[derive(Debug)]
            struct $name {
                a: Array2<$a>,
                b: Array2<$b>,
                bias: Tensor,
                a0: $a,
                b0: $b,
                c0: $c,
                a_scale: f32,
                b_scale: f32,
                c_scale: f32,
            }

            impl $name {
                fn check(&self) {
                    let check_with = |r: &Array2<$c>, opt: bool, qp: bool| {
                        let t = self.tract(opt, qp);
                        assert!(
                            r.iter().zip(t.iter()).all(|(r, t)| r.max(t) - r.min(t) <= 1),
                            "mismatch! optimized plan: {}, dynamic qparams: {}, reference: {:?}, tract: {:?}",
                            opt,
                            qp,
                            r,
                            t,
                            );
                    };

                    let r = self.reference();
                    check_with(&r, false, false);
                    check_with(&r, false, true);
                    check_with(&r, true, false);
                    check_with(&r, true, true);
                }

                fn reference(&self) -> Array2<$c> {
                    let a = self.a.map(|&x| (x as f32 - self.a0 as f32) * self.a_scale);
                    let b = self.b.map(|&x| (x as f32 - self.b0 as f32) * self.b_scale);
                    let c = a.dot(&b);
                    let c = c.map(|&x| round_ties_to_right(x / self.c_scale) + self.c0 as i32);
                    c.map(|&x| x.max(<$c>::MIN as i32).min(<$c>::MAX as i32) as $c)
                }

                fn tract(&self, opt: bool, qp: bool) -> Array2<$c> {
                    let mut model = TypedModel::default();
                    let mut inputs = tvec![];
                    inputs.push(
                        model
                        .add_source("a", <$a>::fact( &[self.a.nrows(), self.a.ncols()]))
                        .unwrap(),
                        );
                    inputs.push(
                        model
                        .add_source("b", <$b>::fact(&[self.b.nrows(), self.b.ncols()]))
                        .unwrap(),
                        );
                    inputs.push(
                        model
                        .add_source("bias", i32::fact(self.bias.shape()))
                        .unwrap(),
                        );
                    if qp {
                        inputs.push(model.add_source("a0", TypedFact::scalar::<$a>()).unwrap());
                        inputs
                            .push(model.add_source("a_scale", TypedFact::scalar::<f32>()).unwrap());
                        inputs.push(model.add_source("b0", TypedFact::scalar::<$b>()).unwrap());
                        inputs
                            .push(model.add_source("b_scale", TypedFact::scalar::<f32>()).unwrap());
                        inputs.push(model.add_source("c0", TypedFact::scalar::<$c>()).unwrap());
                        inputs
                            .push(model.add_source("c_scale", TypedFact::scalar::<f32>()).unwrap());
                    } else {
                        inputs.push(model.add_const("a0", rctensor0(self.a0)).unwrap());
                        inputs
                            .push(model.add_const("a_scale", rctensor0(self.a_scale)).unwrap());
                        inputs.push(model.add_const("b0", rctensor0(self.b0)).unwrap());
                        inputs
                            .push(model.add_const("b_scale", rctensor0(self.b_scale)).unwrap());
                        inputs.push(model.add_const("c0", rctensor0(self.c0)).unwrap());
                        inputs
                            .push(model.add_const("c_scale", rctensor0(self.c_scale)).unwrap());
                    };
                    let result = model
                        .wire_node(
                            "einsum",
                            crate::ops::einsum::EinSum {
                                axes: "mk,kn,,,,,,,->mn".parse().unwrap(),
                                operating_dt: i32::datum_type(),
                                q_params: Some(<$c>::datum_type())
                            },
                            &inputs,
                        ).unwrap();
                    model.select_output_outlets(&result).unwrap();

                    let inputs = if qp {
                        tvec![
                            self.a.clone().into_tensor(),
                            self.b.clone().into_tensor(),
                            self.bias.clone(),
                            self.a0.into(),
                            self.a_scale.into(),
                            self.b0.into(),
                            self.b_scale.into(),
                            self.c0.into(),
                            self.c_scale.into(),
                        ]
                    } else {
                        tvec![
                            self.a.clone().into_tensor(),
                            self.b.clone().into_tensor(),
                            self.bias.clone(),
                        ]
                    };
                    let inputs = inputs.into_iter().map(|t| t.into_tvalue()).collect();
                    let optimized = if opt { model.into_optimized().unwrap() } else { model };
                    let mut outputs = optimized.into_runnable()
                        .unwrap()
                        .run(inputs)
                        .unwrap();
                    outputs
                        .remove(0)
                        .into_tensor()
                        .into_plain_array::<$c>()
                        .unwrap()
                        .into_dimensionality()
                        .unwrap()
                }
            }

            impl Arbitrary for $name {
                type Parameters = ();
                type Strategy = BoxedStrategy<$name>;
                fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
                    (1usize..=4, 1usize..=4, 1usize..=4)
                        .prop_flat_map(|(m, k, n)| {
                            (
                                Just((m, k, n)),
                                vec(any::<$a>(), m * k..=m * k),
                                vec(any::<$b>(), k * n..=k * n),
                                any::<$a>(),
                                any::<$b>(),
                                any::<$c>(),
                                scale(),
                                scale(),
                                scale(),
                                )
                        })
                    .prop_map(|((m, k, n), a, b, a0, b0, c0, a_scale, b_scale, c_scale)| {
                        $name {
                            a: Array2::from_shape_vec((m, k), a).unwrap(),
                            b: Array2::from_shape_vec((k, n), b).unwrap(),
                            bias: tensor0(0i32),
                            a0,
                            b0,
                            c0,
                            a_scale,
                            b_scale,
                            c_scale,
                        }
                    })
                    .boxed()
                }
            }
        };
    }

    impl_qmmp! { QMatMulProblemI8I8I8, i8, i8, i8 }
    impl_qmmp! { QMatMulProblemI8I8U8, i8, i8, u8 }
    impl_qmmp! { QMatMulProblemI8U8I8, i8, u8, i8 }
    impl_qmmp! { QMatMulProblemU8I8I8, u8, i8, i8 }
    impl_qmmp! { QMatMulProblemI8U8U8, i8, u8, u8 }
    impl_qmmp! { QMatMulProblemU8I8U8, u8, i8, u8 }
    impl_qmmp! { QMatMulProblemU8U8I8, u8, u8, i8 }
    impl_qmmp! { QMatMulProblemU8U8U8, u8, u8, u8 }

    #[test]
    fn test_qmmp_i8_i8_i8_0() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[76, 76, 76], [127, -127, 102]]),
            b: arr2(&[[25, 51, 76, 102, 127], [-51, -25, 0, 25, 51], [-25, -51, -76, -102, -127]]),
            bias: tensor0(0i32),
            a0: 51,
            b0: 0,
            c0: -31,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[-52, -41, -31, -21, -10], [127, 64, 0, -62, -126]]
    }

    #[test]
    fn test_qmmp_i8_i8_i8_1() {
        QMatMulProblemI8I8I8 {
            a: arr2(&[[-34, -2]]),
            b: arr2(&[[-79], [21]]),
            bias: tensor0(0i32),
            a0: -87,
            b0: -17,
            c0: 0,
            a_scale: 1.0,
            b_scale: 1.0,
            c_scale: 1.0,
        }
        .check(); // c: [[-52, -41, -31, -21, -10], [127, 64, 0, -62, -126]]
    }

    #[test]
    fn test_qmmp_i8_i8_u8() {
        QMatMulProblemI8I8U8 {
            a: arr2(&[[76, 76, 76], [127, -127, 102]]),
            b: arr2(&[[25, 51, 76, 102, 127], [-51, -25, 0, 25, 51], [-25, -51, -76, -102, -127]]),
            bias: tensor0(0i32),
            a0: 51,
            b0: 0,
            c0: 96,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[75, 86, 96, 106, 117], [255, 191, 127, 65, 1]]
    }

    #[test]
    fn test_qmmp_i8_u8_i8() {
        QMatMulProblemI8U8I8 {
            a: arr2(&[[76, 76, 76], [127, -127, 102]]),
            b: arr2(&[[152, 178, 203, 229, 254], [76, 102, 127, 152, 178], [102, 76, 51, 25, 0]]),
            bias: tensor0(0i32),
            a0: 51,
            b0: 127,
            c0: -31,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[-52, -41, -31, -21, -10], [127, 64, 0, -62, -126]]
    }

    #[test]
    fn test_qmmp_u8_i8_i8() {
        QMatMulProblemU8I8I8 {
            a: arr2(&[[204, 204, 204], [255, 1, 230]]),
            b: arr2(&[[25, 51, 76, 102, 127], [-51, -25, 0, 25, 51], [-25, -51, -76, -102, -127]]),
            bias: tensor0(0i32),
            a0: 179,
            b0: 0,
            c0: -31,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[-52, -41, -31, -21, -10], [127, 64, 0, -62, -126]]
    }

    #[test]
    fn test_qmmp_i8_u8_u8() {
        QMatMulProblemI8U8U8 {
            a: arr2(&[[76, 76, 76], [127, -127, 102]]),
            b: arr2(&[[152, 178, 203, 229, 254], [76, 102, 127, 152, 178], [102, 76, 51, 25, 0]]),
            bias: tensor0(0i32),
            a0: 51,
            b0: 127,
            c0: 96,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[75, 86, 96, 106, 117], [255, 191, 127, 65, 1]]
    }

    #[test]
    fn test_qmmp_u8_i8_u8() {
        QMatMulProblemU8I8U8 {
            a: arr2(&[[204, 204, 204], [255, 1, 230]]),
            b: arr2(&[[25, 51, 76, 102, 127], [-51, -25, 0, 25, 51], [-25, -51, -76, -102, -127]]),
            bias: tensor0(0i32),
            a0: 179,
            b0: 0,
            c0: 96,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[75, 86, 96, 106, 117], [255, 191, 127, 65, 1]]
    }

    #[test]
    fn test_qmmp_u8_u8_i8() {
        QMatMulProblemU8U8I8 {
            a: arr2(&[[204, 204, 204], [255, 1, 230]]),
            b: arr2(&[[152, 178, 203, 229, 254], [76, 102, 127, 152, 178], [102, 76, 51, 25, 0]]),
            bias: tensor0(0i32),
            a0: 179,
            b0: 127,
            c0: -31,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[-52, -41, -31, -21, -10], [127, 64, 0, -62, -126]]
    }

    #[test]
    fn test_qmmp_u8_u8_u8() {
        QMatMulProblemU8U8U8 {
            a: arr2(&[[204, 204, 204], [255, 1, 230]]),
            b: arr2(&[[152, 178, 203, 229, 254], [76, 102, 127, 152, 178], [102, 76, 51, 25, 0]]),
            bias: tensor0(0i32),
            a0: 179,
            b0: 127,
            c0: 96,
            a_scale: 0.039215688,
            b_scale: 0.039215688,
            c_scale: 0.09411765,
        }
        .check(); // c: [[75, 86, 96, 106, 117], [255, 191, 127, 65, 1]]
    }
}


================================================
FILE: core/src/ops/memory/force_eval.rs
================================================
use crate::internal::*;

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ForceEval {
    pub slots: Vec<usize>,
}

impl ForceEval {
    pub fn new(slots: Vec<usize>) -> ForceEval {
        ForceEval { slots }
    }
}

impl Op for ForceEval {
    fn name(&self) -> StaticName {
        "ForceEval".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("slots: {:?}", self.slots)])
    }

    op_as_typed_op!();
}

impl EvalOp for ForceEval {
    fn is_stateless(&self) -> bool {
        true
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let max_slot_idx = self.slots.iter().copied().max().unwrap_or(0);
        ensure!(
            inputs.len() > max_slot_idx,
            format!(
                "Expected at least {} inputs given the slot indexes that needs to be forced eval: {:?}",
                max_slot_idx + 1,
                self.slots
            )
        );
        let outputs = inputs
            .into_iter()
            .enumerate()
            .filter_map(|(idx, val)| if !self.slots.contains(&idx) { Some(val) } else { None })
            .collect::<TVec<_>>();
        Ok(outputs)
    }
}

impl TypedOp for ForceEval {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let output_facts = inputs
            .iter()
            .enumerate()
            .filter_map(
                |(idx, fact)| {
                    if !self.slots.contains(&idx) { Some((*fact).clone()) } else { None }
                },
            )
            .collect::<TVec<_>>();
        Ok(output_facts)
    }
}


================================================
FILE: core/src/ops/memory/load.rs
================================================
use crate::internal::*;

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Load {
    pub id: String,
}

impl Load {
    pub fn new(id: &str) -> Load {
        Load { id: id.to_string() }
    }
}

impl Op for Load {
    fn name(&self) -> StaticName {
        "Load".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("id: {:?}", self.id)])
    }

    op_as_typed_op!();
}

impl EvalOp for Load {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(self.clone())))
    }
}

impl TypedOp for Load {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 1, "Expected one input (default value) for Load op");
        // New typed fact are created to avoid propagating const information
        let input_facts = inputs
            .iter()
            .map(|it| TypedFact::dt_shape(it.datum_type, it.shape.clone()))
            .collect::<TVec<_>>();
        Ok(input_facts)
    }
}

impl OpState for Load {
    fn eval(
        &mut self,
        session: &mut TurnState,
        _op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let tensor = session
            .tensors
            .get(&self.id)
            .map_or_else(
                || -> TractResult<TVec<TValue>> { Ok(tvec!(input.clone())) },
                |it| {
                    // Checks
                    ensure!(
                        it.datum_type() == input.datum_type(),
                        anyhow!(
                            "Expected datum {:?}, found {:?}",
                            input.datum_type(),
                            it.datum_type()
                        )
                    );
                    ensure!(
                        it.shape() == input.shape(),
                        anyhow!("Expected shape {:?}, found {:?}", input.shape(), it.shape())
                    );
                    Ok(tvec!(it.clone().into_tvalue()))
                },
            )
            .with_context(|| anyhow!("While loading tensor from session"))?;

        Ok(tensor)
    }
}

trivial_op_state_freeze!(Load);


================================================
FILE: core/src/ops/memory/mod.rs
================================================
pub mod force_eval;
pub mod load;
pub mod store;


================================================
FILE: core/src/ops/memory/store.rs
================================================
use crate::internal::*;

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Store {
    pub id: String,
}

impl Store {
    pub fn new(id: &str) -> Store {
        Store { id: id.to_string() }
    }
}

impl Op for Store {
    fn name(&self) -> StaticName {
        "Store".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("id: {:?}", self.id)])
    }

    op_as_typed_op!();
}

impl EvalOp for Store {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(self.clone())))
    }
}

impl TypedOp for Store {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(
            inputs.len() == 2,
            "Expected two inputs (input to propagate and state to store) for Store op"
        );
        Ok(tvec![inputs[0].clone()])
    }
}

impl OpState for Store {
    fn eval(
        &mut self,
        session: &mut TurnState,
        _op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (input, state) = args_2!(inputs);
        session.tensors.insert(self.id.clone(), state.into_tensor());
        Ok(tvec![input])
    }
}

trivial_op_state_freeze!(Store);


================================================
FILE: core/src/ops/mod.rs
================================================
//! Ops
use std::fmt;

use downcast_rs::Downcast;

use dyn_clone;
use dyn_eq::DynEq;

#[macro_use]
pub mod macros;
#[macro_use]
pub mod element_wise;
#[macro_use]
pub mod binary;

pub mod array;
pub mod cast;
pub mod change_axes;
pub mod cnn;
pub mod downsample;
pub mod dummy;
pub mod einsum;
pub mod fft;
pub mod identity;
pub mod konst;
pub mod logic;
pub mod math;
pub mod matmul;
// pub mod memory;
pub mod nn;
pub mod quant;
pub mod scan;
pub mod source;
pub mod submodel;
pub mod unimpl;

pub use downsample::Downsample;
pub use memory::*;

use crate::internal::*;
use crate::optim::OptimizerSession;

/// Level of precision to be expected in implementations comparisons.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Validation {
    /// Output is random
    Random,
    /// Implementation may induce rounding errors
    Rounding,
    /// Implementation must be accurate
    Accurate,
}

#[derive(Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum Cost {
    Div(DatumType),
    FMA(DatumType),
    Buffer(DatumType),
    Params(DatumType),
    Custom(bool, String),
}

impl Cost {
    pub fn is_compute(&self) -> bool {
        use Cost::*;
        match self {
            FMA(_) | Div(_) => true,
            Buffer(_) | Params(_) => false,
            Custom(compute, _) => *compute,
        }
    }
}

impl std::fmt::Debug for Cost {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use Cost::*;
        match self {
            Div(dt) => write!(f, "Div({dt:?})"),
            FMA(dt) => write!(f, "FMA({dt:?})"),
            Buffer(dt) => write!(f, "Buffer({dt:?})"),
            Params(dt) => write!(f, "Params({dt:?})"),
            Custom(_, name) => write!(f, "{name}"),
        }
    }
}

pub trait FrozenOpState: fmt::Debug + dyn_clone::DynClone + Send + 'static {
    fn unfreeze(&self) -> Box<dyn OpState>;
}

pub trait OpStateFreeze {
    fn freeze(&self) -> Box<dyn FrozenOpState>;
}

dyn_clone::clone_trait_object!(FrozenOpState);

pub trait OpState: fmt::Debug + dyn_clone::DynClone + OpStateFreeze + Downcast {
    fn load_from(
        &mut self,
        _: &mut TurnState,
        _: &mut dyn Iterator<Item = TValue>,
    ) -> TractResult<()> {
        Ok(())
    }

    fn save_to(&self, _: &mut Vec<TValue>) -> TractResult<()> {
        Ok(())
    }

    fn init_tensor_fact(&self) -> Option<(String, TypedFact)> {
        None
    }

    fn resolve_symbols(&mut self, _: &mut TurnState) -> TractResult<()> {
        Ok(())
    }

    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>>;
}
dyn_clone::clone_trait_object!(OpState);
impl_downcast!(OpState);

pub trait EvalOp {
    #[allow(unused_variables)]
    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        bail!("stateless evaluation not implemented")
    }

    #[allow(unused_variables)]
    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        self.eval(inputs).context("Running legacy eval")
    }

    #[allow(unused_variables)]
    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }

    fn is_stateless(&self) -> bool;
}

/// A base operation
pub trait Op:
    fmt::Debug + dyn_clone::DynClone + dyn_eq::DynEq + Send + Sync + 'static + Downcast + EvalOp
{
    fn name(&self) -> StaticName;

    /// The kind of accuracy check that should be performed on operation when
    /// testing them.
    fn validation(&self) -> Validation {
        Validation::Accurate
    }

    /// Short (one-line) strings giving hints on internal implementation or
    /// important configuration details to be displayed in dumps.
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![])
    }

    fn as_typed(&self) -> Option<&dyn TypedOp>;
}

impl_downcast!(Op);
dyn_clone::clone_trait_object!(Op);
dyn_eq::eq_trait_object!(Op);

pub trait TypedOp:
    Op + fmt::Debug + dyn_clone::DynClone + Send + Sync + 'static + Downcast + EvalOp
{
    /// Reinterpret the TypedOp as an Op.
    fn as_op(&self) -> &dyn Op;

    /// Reinterpret the TypedOp as an Op, mutably.
    fn as_op_mut(&mut self) -> &mut dyn Op;

    /// Deduce output facts from input facts.
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>>;

    #[allow(unused_variables)]
    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::disconnected(inputs, outputs)
    }

    /// Fuse op after codegen to deal with local optimisations.
    fn fuse(&self, _model: &TypedModel, _node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }

    /// Declutter the op to the tract_core operator set as much as possible.
    #[allow(unused_variables)]
    fn declutter_with_session(
        &self,
        session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        self.declutter(model, node)
    }

    /// Declutter the op to the tract_core operator set as much as possible.
    #[allow(unused_variables)]
    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }

    /// Computes a cost hint of the operation.
    ///
    /// Each pair is a type of operation and a number per call on eval.
    fn cost(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        Ok(tvec!())
    }

    /// Derive ROI (region of interest) expressions for this node's inputs.
    /// Called by the PropagateRoi pass. The default implementation uses
    /// axes_mapping to bubble output ROI to inputs with coordinate remapping.
    /// Override for ops that introduce ROIs (e.g. Iff, masked softmax) or
    /// that need arithmetic coordinate transforms (e.g. Slice, Pad).
    fn input_roi(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TVec<Option<TDim>>>> {
        crate::optim::propagate_roi::bubble_roi(model, node)
    }

    #[allow(unused_variables)]
    fn suggested_axis_changes(&self) -> TractResult<TVec<(InOut, AxisOp)>> {
        Ok(tvec!())
    }

    #[allow(unused_variables)]
    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        Ok(None)
    }

    #[allow(unused_variables)]
    #[allow(clippy::too_many_arguments)]
    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        model: &TypedModel,
        node: &TypedNode,
        prefix: &str,
        inputs: &[OutletId],
        output_axis: usize,
        start: &TDim,
        end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        Ok(None)
    }

    /// Transforms the op in an equivalent one, operating on dt (i8 or u8).
    ///
    /// Returns None if the op can not be translated.
    #[allow(unused_variables)]
    fn quantize(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        dt: DatumType,
        scale: f32,
        zero_point: i32,
    ) -> TractResult<Option<Box<dyn TypedOp>>> {
        Ok(None)
    }

    /// Transform the op into by providing a value to one or more symbols.
    #[allow(unused_variables)]
    fn concretize_dims(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let inputs = node.inputs.iter().map(|i| mapping[i]).collect::<TVec<_>>();
        target.wire_node(&node.name, node.op.clone(), &inputs)
    }

    /// Translate the op into the most efficient form possible for execution.
    ///
    /// This transformation is supposed to be final, no more pass are expected
    /// to be run on the codegen networks.
    #[allow(unused_variables)]
    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }

    /// Nested model multipliers, with label (for profiling).
    #[allow(unused_variables)]
    fn nested_model_multipliers(&self, inputs: &[&TypedFact]) -> Vec<(StaticName, TDim)> {
        vec![]
    }
}

impl_downcast!(TypedOp);
dyn_clone::clone_trait_object!(TypedOp);
dyn_eq::eq_trait_object!(TypedOp);

impl<O: Op> From<O> for Box<dyn Op> {
    fn from(it: O) -> Box<dyn Op> {
        Box::new(it)
    }
}

impl<O: TypedOp> From<O> for Box<dyn TypedOp> {
    fn from(it: O) -> Box<dyn TypedOp> {
        Box::new(it)
    }
}

impl<'a> From<&'a Box<dyn TypedOp>> for Box<dyn TypedOp> {
    fn from(it: &'a Box<dyn TypedOp>) -> Box<dyn TypedOp> {
        it.clone()
    }
}

impl AsRef<dyn Op> for dyn TypedOp {
    fn as_ref(&self) -> &dyn Op {
        self.as_op()
    }
}

impl AsRef<dyn Op> for Box<dyn TypedOp> {
    fn as_ref(&self) -> &dyn Op {
        self.as_op()
    }
}

impl AsMut<dyn Op> for dyn TypedOp {
    fn as_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }
}

impl AsMut<dyn Op> for Box<dyn TypedOp> {
    fn as_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }
}

impl std::fmt::Display for Box<dyn Op> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}", self.name())
    }
}

impl std::fmt::Display for Box<dyn TypedOp> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}", self.name())
    }
}


================================================
FILE: core/src/ops/nn/data_formats.rs
================================================
use crate::internal::*;
use std::fmt;
use tract_itertools::Itertools;

#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Default)]
pub enum DataFormat {
    #[default]
    NCHW,
    NHWC,
    CHW,
    HWC,
}

impl DataFormat {
    pub fn dispose_n_axis(&self) -> DataFormat {
        match self {
            DataFormat::NCHW => DataFormat::CHW,
            DataFormat::NHWC => DataFormat::HWC,
            _ => panic!("Attempt at removing N axis on {self:?}"),
        }
    }

    pub fn shape<D, S>(&self, shape: S) -> TractResult<BaseDataShape<D, S>>
    where
        D: DimLike,
        S: AsRef<[D]> + fmt::Debug,
    {
        let mut strides: TVec<D> = tvec![D::one()];
        for dim in shape.as_ref().iter().skip(1).rev() {
            let previous = strides.last().unwrap().clone();
            strides.push(previous * dim)
        }
        strides.reverse();
        Ok(BaseDataShape { fmt: *self, shape, strides })
    }

    pub fn from_n_c_hw<D, S>(&self, n: D, c: D, shape: S) -> TractResult<BaseDataShape<D, TVec<D>>>
    where
        D: DimLike,
        S: AsRef<[D]> + fmt::Debug,
    {
        let mut me = tvec!();
        if *self == DataFormat::NCHW || *self == DataFormat::NHWC {
            me.push(n);
        }
        if *self == DataFormat::NCHW || *self == DataFormat::CHW {
            me.push(c.clone());
        }
        me.extend(shape.as_ref().iter().cloned());
        if *self == DataFormat::NHWC || *self == DataFormat::HWC {
            me.push(c);
        }
        self.shape(me)
    }

    pub fn has_n(&self) -> bool {
        *self == DataFormat::NHWC || *self == DataFormat::NCHW
    }

    pub fn c_is_last(&self) -> bool {
        *self == DataFormat::NHWC || *self == DataFormat::HWC
    }

    pub fn h_axis(&self) -> usize {
        self.has_n() as usize + !self.c_is_last() as usize
    }

    pub fn with_n(&self) -> DataFormat {
        match self {
            DataFormat::CHW => DataFormat::NCHW,
            DataFormat::HWC => DataFormat::NHWC,
            _ => *self,
        }
    }
}

pub type SymDataShape = BaseDataShape<TDim, TVec<TDim>>;
pub type DataShape = BaseDataShape<usize, TVec<usize>>;

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct BaseDataShape<D, S>
where
    D: DimLike,
    S: AsRef<[D]> + fmt::Debug,
{
    pub fmt: DataFormat,
    pub shape: S,
    pub strides: TVec<D>,
}

impl<D, S> fmt::Debug for BaseDataShape<D, S>
where
    D: DimLike,
    S: AsRef<[D]> + fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{:?} {} (strides: {})",
            self.fmt,
            self.shape.as_ref().iter().join(","),
            self.strides.iter().join(",")
        )
    }
}

impl<D, S> BaseDataShape<D, S>
where
    D: DimLike,
    S: AsRef<[D]> + fmt::Debug,
{
    #[inline]
    pub fn rank(&self) -> usize {
        self.shape.as_ref().len()
    }

    #[inline]
    pub fn hw_rank(&self) -> usize {
        self.rank() - 1 - self.n_axis().is_some() as usize
    }

    #[inline]
    pub fn n_axis(&self) -> Option<usize> {
        match self.fmt {
            DataFormat::NHWC | DataFormat::NCHW => Some(0),
            DataFormat::HWC | DataFormat::CHW => None,
        }
    }

    #[inline]
    pub fn c_axis(&self) -> usize {
        match self.fmt {
            DataFormat::NHWC | DataFormat::HWC => self.shape.as_ref().len() - 1,
            DataFormat::NCHW => 1,
            DataFormat::CHW => 0,
        }
    }

    #[inline]
    pub fn h_axis(&self) -> usize {
        match self.fmt {
            DataFormat::HWC => 0,
            DataFormat::NHWC | DataFormat::CHW => 1,
            DataFormat::NCHW => 2,
        }
    }

    #[inline]
    pub fn hw_axes(&self) -> ::std::ops::Range<usize> {
        self.h_axis()..self.h_axis() + self.hw_rank()
    }

    #[inline]
    pub fn n_dim(&self) -> Option<&D> {
        self.n()
    }

    #[inline]
    pub fn c_dim(&self) -> &D {
        self.c()
    }

    #[inline]
    pub fn hw_dims(&self) -> &[D] {
        unsafe { self.shape.as_ref().get_unchecked(self.hw_axes()) }
    }

    #[inline]
    pub fn n(&self) -> Option<&D> {
        unsafe { self.n_axis().map(|axis| self.shape.as_ref().get_unchecked(axis)) }
    }

    #[inline]
    pub fn c(&self) -> &D {
        unsafe { self.shape.as_ref().get_unchecked(self.c_axis()) }
    }

    #[inline]
    pub fn n_stride(&self) -> Option<&D> {
        unsafe { self.n_axis().map(|axis| self.strides.get_unchecked(axis)) }
    }

    #[inline]
    pub fn h_stride(&self) -> &D {
        unsafe { self.hw_strides().get_unchecked(0) }
    }

    #[inline]
    pub fn hw_strides(&self) -> &[D] {
        unsafe { self.strides.get_unchecked(self.hw_axes()) }
    }

    #[inline]
    pub fn w_stride(&self) -> &D {
        unsafe { self.hw_strides().get_unchecked(self.hw_rank() - 1) }
    }

    #[inline]
    pub fn c_stride(&self) -> &D {
        unsafe { self.strides.get_unchecked(self.c_axis()) }
    }
}


================================================
FILE: core/src/ops/nn/gelu_approximate.rs
================================================
use crate::internal::*;
use crate::ops::binary::TypedBinOp;
use crate::ops::element_wise::ElementWiseOp;
use crate::ops::math::{Add, Mul, Pow, Tanh};

use tract_data::half::f16;

fn gelu_approx_f32(x: f32, pow: i32) -> f32 {
    let sqrt_2_over_pi = (2.0 / std::f32::consts::PI).sqrt();
    0.5 * x * (1.0 + f32::tanh(sqrt_2_over_pi * (x + 0.044715 * x.powi(pow))))
}

element_wise!(gelu_approximate, GeluApproximate { fast_impl: bool },
    [f16] => |op, xs| {
        let pow = if op.fast_impl { 2 } else { 3 };
        xs.iter_mut().for_each(|x| {
            *x = f16::from_f32(gelu_approx_f32(x.to_f32(), pow));
        });
        Ok(())
    },
    [f32] => |op, xs| {
        let pow = if op.fast_impl { 2 } else { 3 };
        xs.iter_mut().for_each(|x| {
            *x = gelu_approx_f32(*x, pow);
        });
        Ok(())
    };
    cost: |dt| {tvec!((Cost::FMA(dt), 15))}
);

/// Search pattern => NEW_GELU(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^N))); N ∈ {2, 3}
pub fn detect_gelu_approx(
    _op: &Pow,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    let pow_node = node;

    let in_fact = model.node_input_facts(pow_node.id)?[0];
    let dt = in_fact.datum_type;

    // Only F16 and F32 is supported.
    rule_if!(matches!(dt, DatumType::F32 | DatumType::F16));

    rule_if!(
        model.matches_single_input_const(pow_node, 3.0)
            || model.matches_single_input_const(pow_node, 2.0)
    );
    let fast_impl = model.matches_single_input_const(pow_node, 2.0);

    // 0.044715 * x^N
    rule_if_some!(mul_coef_a = model.find_succ_bin_with_const::<Mul>(pow_node, 0.044715));

    // x + 0.044715 * x^N
    rule_if_some!(
        x_plus_mul_coef_a = model.find_succ_bin_with_outlet::<Add>(mul_coef_a, &pow_node.inputs[0])
    );

    // sqrt(2/pi) * (x + 0.044715 * x^N)
    let sqrt_2_over_pi = (2.0 / std::f32::consts::PI).sqrt();
    rule_if_some!(
        mul_sqrt_2_over_pi =
            model.find_succ_bin_with_const::<Mul>(x_plus_mul_coef_a, sqrt_2_over_pi)
    );

    // tanh(sqrt(2/pi) * (x + 0.044715 * x^N))
    rule_if_some!(tanh_succ = model.single_succ(mul_sqrt_2_over_pi.id)?);
    rule_if_some!(tanh_succ_op = tanh_succ.op_as::<ElementWiseOp>());
    rule_if!(tanh_succ_op.0.is::<Tanh>());

    // 1.0 + tanh(sqrt(2/pi) * (x + 0.044715 * x^N)) N ∈ {2, 3}
    rule_if_some!(tanh_plus_1 = model.find_succ_bin_with_const::<Add>(tanh_succ, 1.0));

    // Identify Mul
    rule_if_some!(mul_succ = model.single_succ(tanh_plus_1.id)?);
    rule_if_some!(mul_succ_op = mul_succ.op_as::<TypedBinOp>());
    rule_if!(mul_succ_op.0.is::<Mul>());

    // Search first
    // tmp = x * (1.0 + tanh(sqrt(2/pi) * (x + 0.044715 * x^N)))
    // out = 0.5 * tmp
    let last_node_id = if mul_succ.inputs.contains(&pow_node.inputs[0]) {
        // 0.5 * x * (1.0 + tanh(sqrt(2/pi) * (x + 0.044715 * x^N)))
        rule_if_some!(last_mul_with_0_5 = model.find_succ_bin_with_const::<Mul>(mul_succ, 0.5));
        last_mul_with_0_5.id
    } else {
        // tmp = 0.5 * x
        // out = tmp * (1.0 + tanh(sqrt(2/pi) * (x + 0.044715 * x^N))) N ∈ {2, 3}
        rule_if_some!(
            x_mul_0_5 = mul_succ
                .inputs
                .iter()
                .filter_map(|i| {
                    let n = &model.nodes()[i.node];
                    let op = n.op_as::<TypedBinOp>()?;
                    op.0.is::<Mul>().then_some(n)
                })
                .next()
        );
        rule_if!(model.matches_single_input_const(x_mul_0_5, 0.5));
        rule_if!(x_mul_0_5.inputs.contains(&pow_node.inputs[0]));
        mul_succ.id
    };

    let mut patch = TypedModelPatch::default();
    let gelu_approx_input = patch.taps(model, &pow_node.inputs)?;
    let out = patch.wire_node(
        format!("{}.gelu_approx", pow_node.name),
        gelu_approximate(fast_impl),
        &[gelu_approx_input[0]],
    )?;
    patch.shunt_outside(model, last_node_id.into(), out[0])?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/nn/mod.rs
================================================
mod data_formats;
pub mod gelu_approximate;
mod reduce;
pub mod rms_norm;
pub mod silu;
mod softmax;

pub use self::data_formats::{BaseDataShape, DataFormat, DataShape, SymDataShape};
pub use self::gelu_approximate::GeluApproximate;
pub use self::reduce::{Reduce, Reducer, expand_mean_of_squares};
pub use self::rms_norm::RmsNorm;
pub use self::silu::Silu;
pub use self::softmax::{Softmax, SoftmaxExp, SoftmaxKind};

pub use crate::internal::*;

use tract_num_traits::AsPrimitive;

element_wise!(sigmoid, Sigmoid,
 [f16] => |_, xs| { (tract_linalg::ops().sigmoid_f16)().run(xs) },
 [f32] => |_, xs| { (tract_linalg::ops().sigmoid_f32)().run(xs) };
 q: [i8, u8, i32, i32] => |x: f32| 1.0 / (1.0+(-x).exp());
 cost: |dt| {tvec!((Cost::FMA(dt), 11), (Cost::Div(dt), 1))};
 declutter: silu::detect_silu
);

element_wise!(hard_swish, HardSwish,
[f16] => |_, xs| { xs.iter_mut().for_each(|x| *x = *x * f16::from_f32(0.0).max(f16::from_f32(1.0).min(f16::from_f32(1. / 6.) * *x + f16::from_f32(0.5)))); Ok(()) },
[f32] => |_, xs| { xs.iter_mut().for_each(|x| *x = *x * 0f32.max(1f32.min((1. / 6.) * *x + 0.5))); Ok(()) }
                                         );

element_wise!(leaky_relu, LeakyRelu { alpha: f32 },
 [f16] => |op, xs| { (tract_linalg::ops().leaky_relu_f16)().run_with_params(xs, f16::from_f32(op.alpha)) },
 [f32] => |op, xs| { (tract_linalg::ops().leaky_relu_f32)().run_with_params(xs, op.alpha) }
);


================================================
FILE: core/src/ops/nn/reduce.rs
================================================
use crate::internal::Axis;
use crate::internal::*;
use crate::ops::binary::TypedBinOp;
use crate::ops::cast::cast;
use crate::ops::change_axes::wire_with_rank_broadcast;
use crate::ops::element_wise::ElementWiseOp;
use crate::ops::math::{Mul, Square, div, square};
use std::convert::TryFrom;
use std::iter::Sum;
use std::mem::transmute;
use tract_data::internal::ClampCast;
use tract_data::itertools::Itertools;
use tract_ndarray::prelude::*;
use tract_num_traits::{AsPrimitive, Bounded};

macro_rules! r {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => {
        match $dt {
            DatumType::U8   => $($path)::*::<u8,_,_,_>($($args),*),
            DatumType::I8   => $($path)::*::<i8,_,_,_>($($args),*),
            DatumType::U16  => $($path)::*::<u16,_,_,_>($($args),*),
            DatumType::I16  => $($path)::*::<i16,_,_,_>($($args),*),
            DatumType::I32  => $($path)::*::<i32,_,_,_>($($args),*),
            DatumType::I64  => $($path)::*::<i64,_,_,_>($($args),*),
            DatumType::F16  => $($path)::*::<f16,_,_,_>($($args),*),
            DatumType::F32  => $($path)::*::<f32,_,_,_>($($args),*),
            DatumType::F64  => $($path)::*::<f64,_,_,_>($($args),*),
            DatumType::QI8(_)  => $($path)::*::<i8,_,_,_>($($args),*),
            DatumType::QU8(_)  => $($path)::*::<u8,_,_,_>($($args),*),
            _ => bail!("{:?} is not a number", $dt)
        }
    };
    ($($path:ident)::* ($dt:expr) ($($args:expr),*); $($q_path:ident)::* ($($q_args:expr),*)) => {
        match $dt {
            DatumType::U8   => $($path)::*::<u8,_,_,_>($($args),*),
            DatumType::I8   => $($path)::*::<i8,_,_,_>($($args),*),
            DatumType::U16  => $($path)::*::<u16,_,_,_>($($args),*),
            DatumType::I16  => $($path)::*::<i16,_,_,_>($($args),*),
            DatumType::I32  => $($path)::*::<i32,_,_,_>($($args),*),
            DatumType::I64  => $($path)::*::<i64,_,_,_>($($args),*),
            DatumType::F16  => $($path)::*::<f16,_,_,_>($($args),*),
            DatumType::F32  => $($path)::*::<f32,_,_,_>($($args),*),
            DatumType::F64  => $($path)::*::<f64,_,_,_>($($args),*),
            DatumType::QI8(_)  => $($q_path)::*::<i8,_,_,_>($($q_args),*),
            DatumType::QU8(_)  => $($q_path)::*::<u8,_,_,_>($($q_args),*),
            _ => bail!("{:?} is not a number", $dt)
        }
    }
}

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Reducer {
    ArgMax(bool), // take last
    ArgMin(bool),
    Max,
    Min,
    Prod,
    Sum,
    MeanOfSquares,
    All,
    Any,
}

impl Reducer {
    pub fn reduce(&self, axes: &[usize], input: &Tensor) -> TractResult<Tensor> {
        use Reducer::*;
        let dt = input.datum_type();
        let output_shape: Vec<usize> = input
            .shape()
            .iter()
            .enumerate()
            .map(|(ax, &d)| if axes.contains(&ax) { 1 } else { d })
            .collect();
        let (zp, scale) = input.datum_type().zp_scale();
        unsafe {
            let mut t = match self {
                ArgMax(last) => {
                    r!(Self::reduce_t(dt)(self, axes, &output_shape, input, argmax_t, *last))
                }
                ArgMin(last) => {
                    r!(Self::reduce_t(dt)(self, axes, &output_shape, input, argmin_t, *last))
                }
                Min => r!(Self::reduce_t(dt)(self, axes, &output_shape, input, min_t, ())),
                Max => r!(Self::reduce_t(dt)(self, axes, &output_shape, input, max_t, ())),
                Prod => {
                    r!(Self::reduce_t(dt)(self, axes, &output_shape, input, prod_t, ()); Self::reduce_t(self, axes, &output_shape, input, q_prod_t, (zp, scale)))
                }
                Sum => {
                    if dt.is_float() {
                        dispatch_floatlike!(Self::sum(dt)(self, axes, input))
                    } else {
                        r!(Self::reduce_t(dt)(
                            self,
                            axes,
                            &output_shape,
                            input,
                            q_sum_t,
                            (zp, scale)
                        ))
                    }
                }
                MeanOfSquares => self.mean_of_squares(axes, input)?,
                All => Self::reduce_t(self, axes, &output_shape, input, all_bool, ()),
                Any => Self::reduce_t(self, axes, &output_shape, input, any_bool, ()),
            };
            if input.datum_type().is_quantized()
                && input.datum_type().unquantized() == t.datum_type().unquantized()
            {
                t.set_datum_type(input.datum_type());
            }
            Ok(t)
        }
    }

    unsafe fn reduce_t<T, TO, F, A>(
        &self,
        axes: &[usize],
        output_shape: &[usize],
        input_tensor: &Tensor,
        f: F,
        args: A,
    ) -> Tensor
    where
        F: for<'a> Fn(ArrayViewD<'a, T>, A) -> TO,
        T: Copy + Datum,
        TO: Copy + Datum,
        A: Copy,
    {
        use ndarray::*;
        let input = unsafe { input_tensor.to_array_view_unchecked::<T>() };
        let result = Array::from_shape_fn(output_shape, |coords| {
            let slice_spec: Vec<SliceInfoElem> = coords
                .slice()
                .iter()
                .enumerate()
                .map(|(ax, &d)| if axes.contains(&ax) { (..).into() } else { d.into() })
                .collect();
            let slice_info = SliceInfo::<_, IxDyn, IxDyn>::try_from(slice_spec).unwrap();
            let slice = input.slice(&slice_info);
            f(slice, args)
        });
        result.into_tensor()
    }

    // sum is a special citizen: enough activity that it gets "special"
    // treatment. we could use the same "algo" for min, max and prod, to the
    // price of more code in the library. argmax and argmin are more
    // tricky (not associative)
    unsafe fn sum<T>(&self, axes: &[usize], input: &Tensor) -> Tensor
    where
        T: Copy + Datum + num_traits::Zero + Sum,
        f16: AsPrimitive<T>,
        f32: AsPrimitive<T>,
    {
        if axes.len() == 0 {
            return input.to_owned();
        }

        // use tract-optimized path only when single reuction axis and is at end
        if axes.len() > 1 || axes[0] != input.rank() - 1 {
            let mut operative_axes = vec![];
            let mut operative_shape: Vec<usize> = vec![];
            for (ix, dim) in input.shape().iter().enumerate() {
                // axis is reduced, but is not the first of a series of reduced axes
                if ix > 0 && axes.contains(&ix) && axes.contains(&(ix - 1)) {
                    *operative_shape.last_mut().unwrap() *= *dim;
                } else if axes.contains(&ix) {
                    operative_axes.push(operative_shape.len());
                    operative_shape.push(*dim);
                } else {
                    operative_shape.push(*dim);
                }
            }
            let mut output = unsafe {
                input
                    .to_array_view_unchecked::<T>()
                    .into_shape_with_order(operative_shape)
                    .unwrap()
                    .sum_axis(Axis(*operative_axes.iter().max().unwrap()))
            };

            for axis in operative_axes.iter().rev().skip(1) {
                output = output.sum_axis(Axis(*axis));
            }

            let mut output = output.into_tensor();

            for &axis in axes {
                output.insert_axis(axis).unwrap();
            }

            output
        } else {
            let mut output: Option<ArrayD<T>> = None;
            for axis in axes.iter().copied() {
                let input_view = output
                    .as_ref()
                    .map(|o| o.view())
                    .unwrap_or_else(|| unsafe { input.to_array_view_unchecked::<T>() });

                // Create array that will contain intermidiate result
                let reduced_dim = input_view.shape()[axis];
                let input_stride = input_view.strides()[axis] as usize;
                let output_shape = input_view
                    .shape()
                    .iter()
                    .enumerate()
                    .map(|(idx, dim)| if idx != axis { *dim } else { 1 })
                    .collect_vec();

                output = Some(ArrayD::from_shape_fn(output_shape.clone(), |coords| {
                    let mut view = input_view.view();
                    for ix in 0..output_shape.len() {
                        if ix != axis {
                            view.collapse_axis(Axis(ix), coords[ix]);
                        }
                    }

                    if let Some(slice) = view.as_slice() {
                        if T::datum_type() == f16::datum_type() {
                            let slice: &[f16] = unsafe { std::mem::transmute(slice) };
                            (tract_linalg::ops().sum_f16)()
                                .run_with_params(slice, ())
                                .unwrap()
                                .as_()
                        } else if T::datum_type() == f32::datum_type() {
                            let slice: &[f32] = unsafe { std::mem::transmute(slice) };
                            (tract_linalg::ops().sum_f32)()
                                .run_with_params(slice, ())
                                .unwrap()
                                .as_()
                        } else {
                            slice.iter().cloned().sum::<T>()
                        }
                    } else {
                        let first: *const T = &input_view[coords];
                        let mut sum = T::zero();
                        for i in 0..reduced_dim {
                            sum = sum + unsafe { *(first.add(i * input_stride)) };
                        }
                        sum
                    }
                }));
            }
            output.unwrap().into_tensor()
        }
    }

    fn mean_of_squares(&self, axis: &[usize], input: &Tensor) -> TractResult<Tensor> {
        let dt = input.datum_type();
        let mut input = input.cast_to::<f32>()?.into_owned();
        input.try_as_plain_mut()?.as_slice_mut::<f32>()?.iter_mut().for_each(|x| *x = *x * *x);
        let mut output = unsafe { self.sum::<f32>(axis, &input) };
        let norm = output.len() as f32 / input.len() as f32;
        output.try_as_plain_mut()?.as_slice_mut::<f32>()?.iter_mut().for_each(|x| *x *= norm);
        Ok(output.cast_to_dt(dt)?.into_owned())
    }
}

fn argmax_t<T>(v: ArrayViewD<T>, last: bool) -> i64
where
    T: Copy + Datum + num_traits::Bounded + ::std::cmp::PartialOrd,
{
    v.iter()
        .copied()
        .enumerate()
        .fold(
            (0usize, T::min_value()),
            |acc, v| {
                if v.1 > acc.1 || (last && acc.1 == v.1) { v } else { acc }
            },
        )
        .0 as i64
}

fn argmin_t<T>(v: ArrayViewD<T>, last: bool) -> i64
where
    T: Copy + Datum + num_traits::Bounded + ::std::cmp::PartialOrd,
{
    v.iter()
        .copied()
        .enumerate()
        .fold(
            (0usize, T::max_value()),
            |acc, v| {
                if v.1 < acc.1 || (last && acc.1 == v.1) { v } else { acc }
            },
        )
        .0 as i64
}

fn max_t<T>(v: ArrayViewD<T>, _: ()) -> T
where
    T: Copy + Datum + num_traits::Bounded + ::std::cmp::PartialOrd,
{
    if T::datum_type() == f32::datum_type()
        && let Some(slice) = v.as_slice()
    {
        let slice = unsafe { transmute::<&[T], &[f32]>(slice) };
        (tract_linalg::ops().max_f32)().run(slice).unwrap();
    }
    v.fold(T::min_value(), |acc, &v| if acc > v { acc } else { v })
}

fn min_t<T>(v: ArrayViewD<T>, _: ()) -> T
where
    T: Copy + Datum + num_traits::Bounded + ::std::cmp::PartialOrd,
{
    v.fold(T::max_value(), |acc, &v| if acc < v { acc } else { v })
}

fn prod_t<T>(v: ArrayViewD<T>, _: ()) -> T
where
    T: Copy + Datum + num_traits::One,
{
    v.fold(T::one(), |acc, &v| acc * v)
}

fn q_prod_t<T>(v: ArrayViewD<T>, zp_scale: (i32, f32)) -> T
where
    T: Copy + num_traits::AsPrimitive<f32> + Bounded + Datum,
    f32: num_traits::AsPrimitive<T>,
{
    let (zp, scale) = zp_scale;
    (v.fold(1f32, |acc, &v| acc * (v.as_() - zp as f32)) * scale.powi(v.len() as i32 - 1)
        + zp as f32)
        .clamp_cast()
}

fn q_sum_t<T>(v: ArrayViewD<T>, zp_scale: (i32, f32)) -> T
where
    T: Copy + Bounded + num_traits::AsPrimitive<i32> + Datum,
    i32: num_traits::AsPrimitive<T>,
{
    let (zp, _) = zp_scale;
    (v.fold(0i32, |acc, &v| acc + v.as_()) - zp * (v.len() as i32 - 1)).clamp_cast()
}

fn all_bool(v: ArrayViewD<bool>, _: ()) -> bool {
    v.iter().all(|v| *v)
}

fn any_bool(v: ArrayViewD<bool>, _: ()) -> bool {
    v.iter().any(|v| *v)
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct Reduce {
    pub axes: TVec<usize>,
    pub reducer: Reducer,
}

impl Op for Reduce {
    fn name(&self) -> StaticName {
        format!("Reduce<{:?}>", self.reducer).into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axes: {:?}", self.axes)])
    }
    op_as_typed_op!();
}

impl EvalOp for Reduce {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(tvec!(self.reducer.reduce(&self.axes, &inputs[0])?.into()))
    }
}

impl TypedOp for Reduce {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(self.axes.iter().tuple_windows().all(|(a, b)| a < b));
        if inputs[0].datum_type == TDim::datum_type() {
            bail!("Reduce input must be cast from TDim to i64 beforehand")
        }
        let mut shape: TVec<_> = inputs[0].shape.to_tvec();
        for &ax in &self.axes {
            shape[ax] = 1.to_dim();
        }
        let dt = if let Reducer::ArgMax(_) | Reducer::ArgMin(_) = self.reducer {
            DatumType::I64
        } else {
            inputs[0].datum_type
        };
        Ok(tvec!(dt.fact(shape)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if let Some(patch) = self.declutter_mean_of_square(model, node)? {
            return Ok(Some(patch));
        }
        if let Some(patch) = self.declutter_scalar_mul_then_sum(model, node)? {
            return Ok(Some(patch));
        }
        if let Some(patch) = self.declutter_reduce_reduce(model, node)? {
            return Ok(Some(patch));
        }
        if let Some(patch) = super::rms_norm::detect_rms_norm(self, model, node)? {
            return Ok(Some(patch));
        }
        Ok(None)
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let dt = inputs[0].datum_type;
        let count: TDim = inputs[0].shape.iter().product();
        match self.reducer {
            Reducer::Sum
            | Reducer::Prod
            | Reducer::Min
            | Reducer::Max
            | Reducer::All
            | Reducer::Any => Ok(tvec!((Cost::FMA(dt), count))),
            Reducer::MeanOfSquares => Ok(tvec!((Cost::FMA(dt), count * 2))),
            Reducer::ArgMax(_) | Reducer::ArgMin(_) => Ok(tvec!((Cost::FMA(dt), count))),
        }
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut letters = 'a'..;
        let axes = (0..inputs[0].rank())
            .flat_map(|ix| {
                if self.axes.contains(&ix) {
                    tvec!(
                        Axis::new(letters.next().unwrap(), inputs.len(), outputs.len())
                            .input(0, ix),
                        Axis::new(letters.next().unwrap(), inputs.len(), outputs.len())
                            .output(0, ix),
                    )
                } else {
                    tvec!(
                        Axis::new(letters.next().unwrap(), inputs.len(), outputs.len())
                            .input(0, ix)
                            .output(0, ix)
                    )
                }
                .into_iter()
            })
            .collect_vec();
        AxesMapping::new(1, 1, axes)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let mut axes = tvec!();
        for reduced in &self.axes {
            rule_if_some!(axis = change.transform_axis(*reduced));
            axes.push(axis);
        }
        axes.sort();
        let op = Some(Box::new(Self { axes, ..self.clone() }) as _);
        Ok(Some(AxisChangeConsequence::new(model, node, op, change)))
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        node: &TypedNode,
        _prefix: &str,
        inputs: &[OutletId],
        output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        rule_if!(!self.axes.contains(&output_axis));
        patch.wire_node(&node.name, &node.op, inputs).map(Some)
    }

    as_op!();
}

impl Reduce {
    fn declutter_reduce_reduce(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        use Reducer::*;
        rule_if_some!(prec = model.linear_prec(node.id)?);
        rule_if_some!(prec_reduce = prec.op_as::<Self>());
        rule_if!(prec_reduce.reducer == self.reducer);
        rule_if!([Sum, Prod, Min, Max].contains(&self.reducer));
        let mut patch = TypedModelPatch::default();
        let wire = patch.tap_model(model, prec.inputs[0])?;
        let wire = patch.wire_node(
            &node.name,
            Self {
                reducer: self.reducer,
                axes: prec_reduce
                    .axes
                    .iter()
                    .chain(self.axes.iter())
                    .copied()
                    .sorted()
                    .dedup()
                    .collect(),
            },
            &[wire],
        )?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        Ok(Some(patch))
    }

    fn declutter_scalar_mul_then_sum(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.reducer == Reducer::Sum {
            rule_if_some!(prec = model.linear_prec(node.id)?);
            rule_if_some!(prec_bin = prec.op_as::<TypedBinOp>());
            rule_if!(prec_bin.0.is::<Mul>());
            let mul_input_fact = model.node_input_facts(prec.id)?;
            rule_if_some!(
                scalar_slot = mul_input_fact
                    .iter()
                    .position(|f| f.konst.as_ref().is_some_and(|k| k.volume() == 1))
            );
            let mut patch = TypedModelPatch::default();
            let scalar = patch.tap_model(model, prec.inputs[scalar_slot])?;
            let wire = patch.tap_model(model, prec.inputs[1 - scalar_slot])?;
            let wire = patch.wire_node(&node.name, self.clone(), &[wire])?[0];
            let wire = patch.wire_node(&prec.name, prec_bin.clone(), &[wire, scalar])?[0];
            patch.shunt_outside(model, node.id.into(), wire)?;
            return Ok(Some(patch));
        }
        Ok(None)
    }

    fn declutter_mean_of_square(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.reducer == Reducer::Sum {
            rule_if_some!(prec = model.linear_prec(node.id)?);
            rule_if_some!(prec_ew = prec.op_as::<ElementWiseOp>());
            rule_if!(prec_ew.0.is::<Square>());
            rule_if!(node.outputs.len() == 1);
            rule_if!(node.outputs[0].successors.len() == 1);
            let our_inlet = node.outputs[0].successors[0];
            let succ = model.node(our_inlet.node);
            rule_if_some!(succ_bin = succ.op_as::<TypedBinOp>());
            rule_if!(succ_bin.0.is::<Mul>());
            let other = succ.inputs[1 - our_inlet.slot];
            rule_if_some!(other_konst = model.outlet_fact(other)?.uniform.as_ref());
            let norm: TDim = self.axes.iter().map(|&ax| &prec.outputs[0].fact.shape[ax]).product();
            rule_if_some!(norm = norm.as_i64());
            rule_if!(norm > 0);
            let norm = tensor0((norm as f32).recip());
            if other_konst.close_enough(&norm, Approximation::Close).is_ok() {
                let mut patch = TypedModelPatch::default();
                let wire = patch.tap_model(model, prec.inputs[0])?;
                let wire = patch.wire_node(
                    &node.name,
                    Reduce::new(self.axes.clone(), Reducer::MeanOfSquares),
                    &[wire],
                )?[0];
                patch.shunt_outside(model, succ.id.into(), wire)?;
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }
}

pub fn expand_mean_of_squares(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    op: &Reduce,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(op.reducer == Reducer::MeanOfSquares);
    let mut patch = TypedModelPatch::default();
    let mut wire = tvec!(patch.tap_model(model, node.inputs[0])?);
    let input_fact = model.outlet_fact(node.inputs[0])?;
    let dt = input_fact.datum_type;
    if dt != f32::datum_type() {
        wire = patch.wire_node(format!("{name}.to_f32"), cast(f32::datum_type()), &wire)?;
    }
    wire = patch.wire_node(format!("{name}.sqr"), square(), &wire)?;
    wire = patch.wire_node(
        format!("{name}.sum"),
        Reduce::new(op.axes.clone(), Reducer::Sum),
        &wire,
    )?;
    let card = input_fact
        .shape
        .iter()
        .enumerate()
        .filter(|(ix, _dim)| op.axes.contains(ix))
        .map(|(_ix, dim)| dim)
        .product::<TDim>();
    let card = patch.add_const(format!("{name}.card"), tensor0(card))?;
    let card = patch.wire_node(format!("{name}.card_to_f32"), cast(f32::datum_type()), &[card])?;

    wire =
        wire_with_rank_broadcast(format!("{name}.norm"), &mut patch, div(), &[wire[0], card[0]])?;
    if dt != f32::datum_type() {
        wire = patch.wire_node(format!("{name}.from_f32"), cast(dt), &wire)?;
    }
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/nn/rms_norm.rs
================================================
use crate::internal::*;
use crate::ops::binary::{BinMiniOp, TypedBinOp};
use crate::ops::element_wise::ElementWiseOp;
use crate::ops::math::{Add, Mul, Rsqrt};
use crate::ops::nn::{Reduce, Reducer};
use tract_itertools::Itertools;

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct RmsNorm {
    pub axis: usize,
    pub eps: Arc<Tensor>,
}

impl Op for RmsNorm {
    fn name(&self) -> StaticName {
        "RmsNorm".to_string().into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {:?}, eps: {:?}", self.axis, self.eps)])
    }
    op_as_typed_op!();
}

impl EvalOp for RmsNorm {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);

        let input_f32 = input.cast_to::<f32>()?.into_owned();
        let a1 = Reducer::MeanOfSquares.reduce(&[self.axis], &input_f32)?;
        let mut a2 = Add.eval(a1.into_tvalue(), self.eps.clone().into_tvalue(), DatumType::F32)?;
        Rsqrt {}.eval_in_place(&mut a2, None)?;
        let a3 = Mul.eval(a2.into_tvalue(), input_f32.into_tvalue(), DatumType::F32)?;
        Ok(tvec![a3.cast_to_dt(input.datum_type())?.into_owned().into()])
    }
}

impl TypedOp for RmsNorm {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(self.eps.rank() == 0, "RmsNorm: eps must be a rank-0 tensor");
        ensure!(
            self.axis < inputs[0].rank(),
            "RmsNorm: axis {} is out of bounds for input rank {}",
            self.axis,
            inputs[0].rank()
        );
        let dt = inputs[0].datum_type;
        let fact = dt.fact(inputs[0].shape.clone());
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        _outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let rank = inputs[0].rank();
        let mut letters = 'a'..;
        let axes = (0..rank)
            .map(|ix| {
                Axis::new(letters.next().unwrap(), inputs.len(), 1).input(0, ix).output(0, ix)
            })
            .collect_vec();
        AxesMapping::new(1, 1, axes)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        if let Some(axis) = change.transform_axis(self.axis) {
            let op = Some(Box::new(RmsNorm { axis, eps: self.eps.clone() }) as _);
            Ok(Some(AxisChangeConsequence::new(model, node, op, change)))
        } else {
            Ok(None)
        }
    }

    fn slice(
        &self,
        patch: &mut TypedModelPatch,
        _model: &TypedModel,
        node: &TypedNode,
        _prefix: &str,
        inputs: &[OutletId],
        output_axis: usize,
        _start: &TDim,
        _end: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        if output_axis == self.axis {
            return Ok(None);
        }
        patch.wire_node(&node.name, self.clone(), inputs).map(Some)
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let dt = inputs[0].datum_type;
        let count: TDim = inputs[0].shape.iter().product();
        // per element: square + accumulate + mul by rsqrt ≈ 3 FMA
        // per reduction group: 1 div (rsqrt)
        let groups: TDim = inputs[0]
            .shape
            .iter()
            .enumerate()
            .filter(|(i, _)| *i != self.axis)
            .map(|(_, d)| d)
            .product();
        Ok(tvec!((Cost::FMA(dt), count * 3), (Cost::Div(dt), groups)))
    }

    as_op!();
}

/// Search pattern => A = A * RSQRT(MEAN_OF_SQUARES(A) + EPS)
pub fn detect_rms_norm(
    op: &Reduce,
    model: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(op.reducer == Reducer::MeanOfSquares);
    rule_if!(op.axes.len() == 1);
    let axis = op.axes[0];

    let in_fact = model.node_input_facts(node.id)?[0];
    let dt = in_fact.datum_type;

    // Only F16 and F32 is supported.
    rule_if!(matches!(dt, DatumType::F32 | DatumType::F16));

    // Identify Add operator
    rule_if_some!(add_succ = model.single_succ(node.id)?);
    rule_if_some!(add_succ_op = add_succ.op_as::<TypedBinOp>());
    rule_if!(add_succ_op.0.is::<Add>());

    // Retrieve epsilon
    let add_consts = model.collect_const_inputs(add_succ);
    rule_if!(add_consts.len() == 1);
    let eps = add_consts[0].val().clone();
    rule_if!(eps.len() == 1);
    rule_if!(eps.datum_type() == dt);
    let eps = eps.into_tensor().into_shape(&[])?.into_arc_tensor();

    // Identify Rsqrt
    rule_if_some!(rsqrt_succ = model.single_succ(add_succ.id)?);
    rule_if_some!(rsqrt_succ_op = rsqrt_succ.op_as::<ElementWiseOp>());
    rule_if!(rsqrt_succ_op.0.is::<Rsqrt>());

    // Identify Mul: RSQRT(...) * A
    rule_if_some!(mul_succ = model.find_succ_bin_with_outlet::<Mul>(rsqrt_succ, &node.inputs[0]));

    let mut patch = TypedModelPatch::default();
    let rsm_input = patch.taps(model, &node.inputs)?;
    let out =
        patch.wire_node(format!("{}.rms_norm", node.name), RmsNorm { axis, eps }, &rsm_input)?;

    patch.shunt_outside(model, mul_succ.id.into(), out[0])?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/nn/silu.rs
================================================
use crate::internal::*;
use crate::ops::element_wise::ElementWiseOp;
use crate::ops::math::Mul;
use crate::ops::nn::Sigmoid;

use tract_data::half::f16;

element_wise!(silu, Silu,
    [f16] => |_, xs| {
        xs.iter_mut().for_each(|x| {
            let xf = x.to_f32();
            *x = f16::from_f32(xf / (1.0 + (-xf).exp()));
        });
        Ok(())
    },
    [f32] => |_, xs| {
        let mut sigmoid = xs.to_vec();
        (tract_linalg::ops().sigmoid_f32)().run(&mut sigmoid)?;
        xs.iter_mut().zip(sigmoid).for_each(|(x, s)| *x *= s);
        Ok(())
    };
    cost: |dt| {tvec!((Cost::FMA(dt), 12), (Cost::Div(dt), 1))};
    declutter: detect_silu
);

/// Search pattern => A = A * SIGMOID(A)
pub fn detect_silu(model: &TypedModel, node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(node.op_as::<ElementWiseOp>().is_some_and(|op| op.0.is::<Sigmoid>()));

    let in_fact = model.node_input_facts(node.id)?[0];
    let dt = in_fact.datum_type;

    // Only F16 and F32 is supported.
    rule_if!(matches!(dt, DatumType::F32 | DatumType::F16));

    // Identify Mul successor: Sigmoid(A) * A
    rule_if_some!(mul_succ = model.find_succ_bin_with_outlet::<Mul>(node, &node.inputs[0]));

    let mut patch = TypedModelPatch::default();
    let silu_input = patch.taps(model, &node.inputs)?;
    let out = patch.wire_node(format!("{}.silu", node.name), silu(), &silu_input)?;
    patch.shunt_outside(model, mul_succ.id.into(), out[0])?;
    Ok(Some(patch))
}


================================================
FILE: core/src/ops/nn/softmax/fixedpoint.rs
================================================
pub use num_traits::{AsPrimitive, PrimInt};
use std::fmt::{Binary, Debug, LowerHex};

use super::math::*;

macro_rules! impl_fixed_point_func_unary {
    ($func_name: ident) => {
        #[allow(dead_code)]
        pub fn $func_name(&self) -> Self {
            Self::from_raw($func_name(self.as_raw()))
        }
    };
}

macro_rules! impl_fixed_point_func_binary {
    ($func_name: ident) => {
        pub fn $func_name(&self, b: Self) -> Self {
            Self::from_raw($func_name(self.as_raw(), b.as_raw()))
        }
    };
}

pub type Q0_31 = FixedPoint<i32, 0>;
pub type Q1_30 = FixedPoint<i32, 1>;
pub type Q2_29 = FixedPoint<i32, 2>;
pub type Q5_26 = FixedPoint<i32, 5>;

#[derive(PartialEq, Eq, PartialOrd, Copy, Clone)]
pub struct FixedPoint<T: PrimInt, const INTEGER_BITS: usize>(T);

impl<T, const INTEGER_BITS: usize> FixedPoint<T, INTEGER_BITS>
where
    T: PrimInt,
{
    pub fn from_raw(x: T) -> Self {
        Self(x)
    }

    pub fn one() -> Self {
        if INTEGER_BITS == 0 {
            Self(T::max_value())
        } else {
            Self(T::one() << Self::fractional_bits())
        }
    }

    pub fn fractional_bits() -> usize {
        if Self::is_signed() {
            std::mem::size_of::<T>() * 8 - 1 - INTEGER_BITS
        } else {
            std::mem::size_of::<T>() * 8 - INTEGER_BITS
        }
    }

    #[allow(dead_code)]
    pub fn zero() -> Self {
        Self(T::zero())
    }

    pub fn as_raw(&self) -> T {
        self.0
    }

    pub fn is_signed() -> bool {
        is_signed::<T>()
    }
}

impl<T: 'static, const INTEGER_BITS: usize> FixedPoint<T, INTEGER_BITS>
where
    T: PrimInt + Debug,
    usize: AsPrimitive<T>,
{
    pub fn constant_pot(exponent: isize) -> Self {
        let offset = (Self::fractional_bits() as isize + exponent) as usize;
        assert!(offset < 31);
        Self(1_usize.as_() << offset)
    }
}

impl FixedPoint<i32, 0> {
    impl_fixed_point_func_unary!(exp_on_interval_between_negative_one_quarter_and_0_excl);
    impl_fixed_point_func_unary!(one_over_one_plus_x_for_x_in_0_1);
}

impl FixedPoint<i32, 5> {
    #[allow(dead_code)]
    pub fn exp_on_negative_values(&self) -> FixedPoint<i32, 0> {
        FixedPoint::<i32, 0>::from_raw(exp_on_negative_values(self.as_raw()))
    }
}

impl<const INTEGER_BITS: usize> FixedPoint<i32, INTEGER_BITS> {
    impl_fixed_point_func_unary!(mask_if_non_zero);
    impl_fixed_point_func_unary!(mask_if_zero);
    impl_fixed_point_func_binary!(rounding_half_sum);

    pub fn saturating_rounding_multiply_by_pot(&self, exponent: i32) -> Self {
        Self::from_raw(saturating_rounding_multiply_by_pot(self.as_raw(), exponent))
    }

    #[allow(dead_code)]
    pub fn rounding_divide_by_pot(&self, exponent: i32) -> Self {
        Self::from_raw(rounding_divide_by_pot(self.as_raw(), exponent))
    }

    pub fn select_using_mask(mask: i32, a: Self, b: Self) -> Self {
        Self::from_raw(select_using_mask(mask, a.as_raw(), b.as_raw()))
    }

    pub fn rescale<const DST_INTEGER_BITS: usize>(&self) -> FixedPoint<i32, DST_INTEGER_BITS> {
        FixedPoint::<i32, DST_INTEGER_BITS>::from_raw(rescale(
            self.as_raw(),
            INTEGER_BITS,
            DST_INTEGER_BITS,
        ))
    }

    #[allow(dead_code)]
    pub fn get_reciprocal(&self) -> (FixedPoint<i32, 0>, usize) {
        let (raw_res, num_bits_over_units) = get_reciprocal(self.as_raw(), INTEGER_BITS);
        (FixedPoint::<i32, 0>::from_raw(raw_res), num_bits_over_units)
    }
}

impl<T, const INTEGER_BITS: usize> Debug for FixedPoint<T, INTEGER_BITS>
where
    T: AsPrimitive<f32> + PrimInt + LowerHex + Debug + Binary,
    f32: AsPrimitive<T>,
{
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(fmt, "{:032b}({:?})({})", self.0, self.0, self.as_f32())
    }
}

impl<T, const INTEGER_BITS: usize> FixedPoint<T, INTEGER_BITS>
where
    T: AsPrimitive<f32> + PrimInt,
{
    pub fn as_f32(&self) -> f32 {
        self.0.as_() / 2_f32.powi(Self::fractional_bits() as i32)
    }
}

impl<T, const INTEGER_BITS: usize> FixedPoint<T, INTEGER_BITS>
where
    T: AsPrimitive<f32> + PrimInt,
    f32: AsPrimitive<T>,
{
    #[allow(dead_code)]
    pub fn from_f32(x: f32) -> Self {
        Self::from_raw(
            f32::min(
                f32::max(
                    f32::round(x * 2f32.powi(Self::fractional_bits().as_())),
                    T::min_value().as_(),
                ),
                T::max_value().as_(),
            )
            .as_(),
        )
    }
}

impl<T: PrimInt, const INTEGER_BITS: usize> std::ops::Add for FixedPoint<T, INTEGER_BITS> {
    type Output = FixedPoint<T, INTEGER_BITS>;
    fn add(self, rhs: Self) -> Self::Output {
        Self::from_raw(self.0 + rhs.0)
    }
}

impl<T: PrimInt, const INTEGER_BITS: usize> std::ops::Sub for FixedPoint<T, INTEGER_BITS> {
    type Output = FixedPoint<T, INTEGER_BITS>;
    fn sub(self, rhs: Self) -> Self::Output {
        Self::from_raw(self.0 - rhs.0)
    }
}

impl<T: PrimInt, const INTEGER_BITS: usize> std::ops::Shl<usize> for FixedPoint<T, INTEGER_BITS> {
    type Output = FixedPoint<T, INTEGER_BITS>;
    fn shl(self, rhs: usize) -> Self::Output {
        Self::from_raw(self.0 << rhs)
    }
}

impl<T: PrimInt, const INTEGER_BITS: usize> std::ops::Shr<usize> for FixedPoint<T, INTEGER_BITS> {
    type Output = FixedPoint<T, INTEGER_BITS>;
    fn shr(self, rhs: usize) -> Self::Output {
        Self::from_raw(self.0 >> rhs)
    }
}

impl<T: PrimInt, const INTEGER_BITS: usize> std::ops::BitAnd for FixedPoint<T, INTEGER_BITS> {
    type Output = FixedPoint<T, INTEGER_BITS>;
    fn bitand(self, rhs: Self) -> Self::Output {
        Self::from_raw(self.0 & rhs.0)
    }
}

macro_rules! impl_mul {
    ($T: ty, $LHS_INTEGER_BITS: literal, $RHS_INTEGER_BITS: literal, $OUT_INTEGER_BITS: literal) => {
        impl std::ops::Mul<FixedPoint<$T, $RHS_INTEGER_BITS>>
            for FixedPoint<$T, $LHS_INTEGER_BITS>
        {
            type Output = FixedPoint<$T, $OUT_INTEGER_BITS>;
            fn mul(self, rhs: FixedPoint<$T, $RHS_INTEGER_BITS>) -> Self::Output {
                Self::Output::from_raw(saturating_rounding_doubling_high_mul(self.0, rhs.0))
            }
        }
    };
}

impl_mul!(i32, 0, 0, 0);
impl_mul!(i32, 0, 2, 2);
impl_mul!(i32, 2, 0, 2);
impl_mul!(i32, 2, 2, 4);
impl_mul!(i32, 5, 5, 10);

#[cfg(test)]
mod test {
    use super::*;
    use approx::assert_abs_diff_eq;
    pub type Q10_21 = FixedPoint<i32, 10>;
    pub type Q12_19 = FixedPoint<i32, 12>;
    pub type Q26_5 = FixedPoint<i32, 26>;
    type Q0_7 = FixedPoint<i8, 0>;

    #[test]
    fn test_to_f32() {
        let x = Q26_5::from_raw(32);
        assert_eq!(x.as_f32(), 1.0);
    }

    #[test]
    fn test_to_f32_1() {
        let x = Q0_7::from_raw(32);
        assert_eq!(x.as_f32(), 0.25);
    }

    #[test]
    fn test_one() {
        let x = Q26_5::one();
        assert_eq!(x, Q26_5::from_raw(32));
    }

    #[test]
    fn test_one_limit() {
        let x = Q0_31::one();
        assert_eq!(x, Q0_31::from_raw(i32::MAX));
    }

    #[test]
    fn test_mul_1() {
        let a = Q5_26::from_f32(8.0); // 00000001
        let b = Q5_26::from_f32(3.0); // 01000000
        let product = a * b;
        let expected = Q10_21::from_f32(24.0);

        assert_eq!(product, expected);
    }

    #[test]
    fn test_add() {
        let a = Q5_26::from_f32(16.0);
        let b = Q5_26::from_f32(5.0);
        let sum = a + b;
        let expected = Q5_26::from_f32(21.0);
        assert_eq!(sum, expected);
    }

    #[test]
    fn test_one_over_one_plus_x_for_x_in_0_1() {
        let a = Q0_31::from_f32(0.75);
        let expected_res = Q0_31::from_f32(1.0 / 1.75);
        let res = a.one_over_one_plus_x_for_x_in_0_1();
        assert_eq!(res.as_f32(), expected_res.as_f32());
    }

    #[test]
    fn test_one_over_one_plus_x_for_x_in_0_1_1() {
        let a = Q0_31::from_f32(0.0);
        let expected_res = Q0_31::from_f32(1.0 / 1.0);
        let res = a.one_over_one_plus_x_for_x_in_0_1();
        assert_eq!(res.as_f32(), expected_res.as_f32());
    }

    #[test]
    fn test_get_reciprocal_1() {
        let a = Q5_26::from_f32(4.5);
        let expected_res = Q0_31::from_f32(1.0 / 4.5);
        let (shifted_res, num_bits_over_unit) = a.get_reciprocal();
        let res = shifted_res.rounding_divide_by_pot(num_bits_over_unit as i32);
        assert_eq!(res.as_f32(), expected_res.as_f32());
        assert_eq!(num_bits_over_unit, 2);
    }

    #[test]
    fn test_get_reciprocal_2() {
        let a = Q5_26::from_f32(4.5);
        let expected_res = Q0_31::from_f32(1.0 / 4.5);
        let (shifted_res, num_bits_over_unit) = a.get_reciprocal();
        let res = shifted_res.rounding_divide_by_pot(num_bits_over_unit as i32);
        assert_eq!(res.as_f32(), expected_res.as_f32());
        assert_eq!(num_bits_over_unit, 2);
    }

    #[test]
    fn test_get_reciprocal_3() {
        let a = Q12_19::from_f32(2.0);
        let expected_res = Q0_31::from_f32(1.0 / 2.0);
        let (shifted_res, num_bits_over_unit) = a.get_reciprocal();
        let res = shifted_res.rounding_divide_by_pot(num_bits_over_unit as i32);
        assert_eq!(res.as_f32(), expected_res.as_f32());
        assert_eq!(num_bits_over_unit, 1);
    }

    #[test]
    fn test_rescale_1() {
        let a = Q0_31::from_f32(0.75);
        let expeted_res = Q12_19::from_f32(0.75);
        let res = a.rescale::<12>();
        assert_eq!(res, expeted_res);
    }

    #[test]
    fn test_exp_on_interval_between_negative_one_quarter_and_0_excl() {
        let a = Q0_31::from_f32(-0.125);
        let expected_res = Q0_31::from_f32((-0.125_f32).exp());
        let res = a.exp_on_interval_between_negative_one_quarter_and_0_excl();
        assert_eq!(res.as_f32(), expected_res.as_f32());
    }

    #[test]
    fn test_exp_on_negative_values_1() {
        let a = Q5_26::from_f32(-0.125);
        let expected_res = Q0_31::from_f32((-0.125_f32).exp());
        let res = a.exp_on_negative_values();
        assert_abs_diff_eq!(res.as_f32(), expected_res.as_f32(), epsilon = 0.00001);
    }

    #[test]
    fn test_exp_on_negative_values_2() {
        let a = Q5_26::from_f32(0.0);
        let expected_res = Q0_31::from_f32((0_f32).exp());
        let res = a.exp_on_negative_values();
        assert_abs_diff_eq!(res.as_f32(), expected_res.as_f32(), epsilon = 0.00001);
    }

    #[test]
    fn test_exp_on_negative_values_3() {
        let a = Q5_26::from_f32(-0.25);
        let expected_res = Q0_31::from_f32((-0.25_f32).exp());
        let res = a.exp_on_negative_values();
        assert_abs_diff_eq!(res.as_f32(), expected_res.as_f32(), epsilon = 0.00001);
    }

    #[test]
    fn test_exp_on_negative_values_4() {
        let a = Q5_26::from_f32(-1.1875_f32);
        let expected_res = Q0_31::from_f32((-1.1875_f32).exp());
        let res = a.exp_on_negative_values();
        assert_abs_diff_eq!(res.as_f32(), expected_res.as_f32(), epsilon = 0.00001);
    }
}


================================================
FILE: core/src/ops/nn/softmax/math.rs
================================================
use super::fixedpoint::{Q0_31, Q1_30, Q2_29, Q5_26};
use num_traits::PrimInt;

// This function convert a scale (actually the inverse of an integer 1/D)
// into an integer multiplier and a shift (the multiplier being 1/D in Q0_31).
pub fn convert_scale_to_mult_shift(scale: f32) -> Option<(i32, isize)> {
    if scale <= 0.0 {
        return None;
    }

    let scale_bits = scale.to_bits();

    // We extract the exponent value
    let current_exponent = scale_bits >> 23;

    // We extract the fractional part of the float
    let fractional_part = scale_bits & 0x007fffff;

    if fractional_part == 0 {
        let shift = 127 - current_exponent as isize;
        Some((0, shift))
    } else {
        let bumped_multi = f32::from_bits(fractional_part | 0x3f000000);
        let int_multi = (bumped_multi * (1u32 << 31) as f32).round() as i32;
        let shift = 127 - current_exponent as isize - 1;
        Some((int_multi, shift))
    }
}

// Get inverse of X with a result as Q0.31
// Here we expect X to have at least fixed point = 1 and to be >= 1 -> required to have an output in Q0_31.
// https://github.com/tensorflow/tensorflow/blob/8c6f391a2282684a25cbfec7687bd5d35261a209/tensorflow/lite/kernels/internal/common.h#L765
pub(crate) fn get_reciprocal(x: i32, fixed_point: usize) -> (i32, usize) {
    assert!(fixed_point > 0);
    //assert!(x as f32 / 2_f32.powi((31 - fixed_point) as i32) > 1.0);
    // Sounds like we compute the smallest amount of integer bits needed to represent
    // the integer part of the number.
    let headroom_plus_one = (x as u32).leading_zeros() as usize;
    let num_bits_over_unit = fixed_point - headroom_plus_one;

    let shifted_sum_minus_one = ((x as u32) << headroom_plus_one) - (1_u32 << 31);
    let shifted_scale =
        Q0_31::from_raw(shifted_sum_minus_one as i32).one_over_one_plus_x_for_x_in_0_1();
    (shifted_scale.as_raw(), num_bits_over_unit)
}

// Returns 1 / (1 + x) for x in (0, 1).
// We expect input to be in Q0_31 and output to be in Q0_31.
// https://github.com/google/gemmlowp/blob/e844ffd17118c1e17d94e1ba4354c075a4577b88/fixedpoint/fixedpoint.h#L854
pub fn one_over_one_plus_x_for_x_in_0_1(a: i32) -> i32 {
    let a_in_q0_31 = Q0_31::from_raw(a);
    let constant_48_over_17 = Q2_29::from_raw(1515870810);
    let constant_neg_32_over_17 = Q2_29::from_raw(-1010580540);

    // 1.0 cannot be represented so we set it to 0.99999999
    // which is all bits to 1.
    let one_in_q0_31 = Q0_31::one();
    let one_in_q2_29 = Q2_29::one();

    // We are in Q0.31
    // Let's call D = a + 1 do stick with the Newton–Raphson naming.
    // D is in [1, 2] so we consider D'(half_denominator) = D / 2 is in [0.5, 1]
    let half_denominator = a_in_q0_31.rounding_half_sum(one_in_q0_31);

    // We are in Q2.29 (because Q0.31 * Q2.29 is a Q2.29)
    let x_0: Q2_29 = constant_48_over_17 + half_denominator * constant_neg_32_over_17;

    // The formulae used here is:
    // Xn+1 = Xn + Xn(1 - D' * Xn)
    let mut x_n = x_0;
    for _ in 0..3 {
        // We are in Q2.29 (because Q0.31 * Q2.29 is a Q2.29)
        let half_denominator_times_x_n = half_denominator * x_n;

        // We are in Q2.29 (because Q2.29 - Q2.29 is a Q2.29)
        let one_minus_half_denominator_times_x_n = one_in_q2_29 - half_denominator_times_x_n;
        // We are in Q4.27 !! (because Q2.29 - Q2.29 is a Q4.27)
        let x_times_one_minus_half_denominator_times_x_n =
            x_n * one_minus_half_denominator_times_x_n;

        // We are in Q4.27 so we need to rescale to be in Q2.29
        let rescaled_x_n = x_times_one_minus_half_denominator_times_x_n.rescale::<2>();
        x_n = x_n + rescaled_x_n;
    }

    // We now have a value for 1/D' = 2/D = 2/(a+1)
    // Instead of doing the division by two, we just pretend that
    // this value is in Q1.30
    let half_x_n = Q1_30::from_raw(x_n.as_raw());

    // We rescale in Q0.31
    let res_in_q0_31 = half_x_n.rescale::<0>();
    res_in_q0_31.as_raw()
}

// Rescale changes the number of IntegerBits and updates the underlying raw integer value accordingly
// https://github.com/google/gemmlowp/blob/13d57703abca3005d97b19df1f2db731607a7dc2/fixedpoint/fixedpoint.h#L678
pub(crate) fn rescale(x: i32, src_integer_bits: usize, dst_integer_bits: usize) -> i32 {
    let exponent = src_integer_bits as i32 - dst_integer_bits as i32;
    saturating_rounding_multiply_by_pot(x, exponent)
}

// Here we assume the input to be a Q5.26
pub fn exp_on_negative_values(a: i32) -> i32 {
    let a_q5_36 = Q5_26::from_raw(a);
    let k_one_quarter = Q5_26::constant_pot(-2); // 1/4
    let mask = k_one_quarter - Q5_26::from_raw(1); // 1/4 - 1/2^26 in Q5.26
    let a_mod_quarter_minus_one_quarter = (a_q5_36 & mask) - k_one_quarter;

    // We need to rescale from Q5.26 to Q0.31 as we compute the exp in this range
    let rescaled_a_mod_quarter_minus_one_quarter = a_mod_quarter_minus_one_quarter.rescale::<0>();

    // We are in Q0.31
    let mut result = rescaled_a_mod_quarter_minus_one_quarter
        .exp_on_interval_between_negative_one_quarter_and_0_excl();
    let remainder = (a_mod_quarter_minus_one_quarter - a_q5_36).as_raw();

    macro_rules! exp_barrel_shifter {
        ($exponent: expr, $quantized_value: expr) => {
            if 5 > $exponent {
                // equivalent to one_in_q5_26 << exponent when exponent > 0.
                // but handle the case exponent < 0.
                let k_shift_amount = 26 + $exponent;
                let mask = mask_if_non_zero(remainder & (1 << k_shift_amount));
                result = Q0_31::select_using_mask(
                    mask,
                    result * Q0_31::from_raw($quantized_value),
                    result,
                );
            }
        };
    }

    exp_barrel_shifter!(-2, 1672461947); // exp(-1/4)
    exp_barrel_shifter!(-1, 1302514674); // exp(-1/2)
    exp_barrel_shifter!(0, 790015084); // exp(-1)
    exp_barrel_shifter!(1, 290630308); // exp(-2)
    exp_barrel_shifter!(2, 39332535); // exp(-4)
    exp_barrel_shifter!(3, 720401); // exp(-8)
    exp_barrel_shifter!(4, 242); // exp(-16)

    let mask = a_q5_36.mask_if_zero();
    let res_in_q0_31 = Q0_31::select_using_mask(mask.as_raw(), Q0_31::one(), result);
    res_in_q0_31.as_raw()
}

// Here we assume the input to be a Q0.31
// Valid for x in [-1/4, 0).
pub(crate) fn exp_on_interval_between_negative_one_quarter_and_0_excl(a: i32) -> i32 {
    let a_in_q0_31 = Q0_31::from_raw(a);
    // We are in Q0.31 and this represents exp(-1/8)
    let exp_minus_one_over_eight = Q0_31::from_raw(1895147668);
    // We are in Q0.31 and this represents 1/3
    let constant_1_over_3 = Q0_31::from_raw(715827883);

    // We estimate the value by doing a taylor expansion around -1/8
    // so we do the change of variable x = a + 1/8
    let x = a_in_q0_31 + Q0_31::constant_pot(-3);
    let x2 = x * x;
    let x3 = x2 * x;
    let x4 = x2 * x2;
    let x4_over_4 = x4.saturating_rounding_multiply_by_pot(-2);
    let x4_over_24_plus_x3_over_6_plus_x2_over_2 =
        Q0_31::from_raw(saturating_rounding_multiply_by_pot(
            (((x4_over_4 + x3) * constant_1_over_3) + x2).as_raw(),
            -1,
        ));
    let res_in_q0_31 = exp_minus_one_over_eight
        + exp_minus_one_over_eight * (x + x4_over_24_plus_x3_over_6_plus_x2_over_2);
    res_in_q0_31.as_raw()
}

// Correctly-rounded-to-nearest division by a power-of-two.
// Also known as a rounding arithmetic right shift.
// https://github.com/google/gemmlowp/blob/13d57703abca3005d97b19df1f2db731607a7dc2/fixedpoint/fixedpoint.h#L368
//
// Taken from here:
// https://github.com/ARM-software/CMSIS_5/blob/61a22cd0eac4f22aa19314125f663198736afa3f/CMSIS/NN/Include/arm_nnsupportfunctions.h#L924
pub fn rounding_divide_by_pot(x: i32, exponent: i32) -> i32 {
    // Exponent is a bit shift so it should be in [0,31[
    assert!(exponent >= 0);
    assert!(exponent <= 31);

    let mask = ((1_i64 << exponent) - 1) as i32;
    let remainder = x & mask;

    // Basic division
    let mut result = x >> exponent as usize;

    // Adjust 'result' for rounding (mid point away from zero)
    let mut threshold = mask >> 1;
    if result < 0 {
        threshold += 1;
    }
    if remainder > threshold {
        result += 1;
    }

    result
}

// https://github.com/google/gemmlowp/blob/13d57703abca3005d97b19df1f2db731607a7dc2/fixedpoint/fixedpoint.h#L385
#[allow(clippy::comparison_chain)] // nah
pub fn saturating_rounding_multiply_by_pot(x: i32, exponent: i32) -> i32 {
    if exponent == 0 {
        x
    } else if exponent < 0 {
        rounding_divide_by_pot(x, -exponent)
    } else {
        let min = i32::MIN;
        let max = i32::MAX;
        let threshold = (1 << (32 - 1 - exponent)) - 1;
        let positive_mask = mask_if_non_zero((x > threshold) as i32);
        let negative_mask = mask_if_non_zero((x < -threshold) as i32);
        let mut result = x << exponent as usize;
        result = select_using_mask(positive_mask, max, result);
        result = select_using_mask(negative_mask, min, result);
        result
    }
}

// https://github.com/google/gemmlowp/blob/e844ffd17118c1e17d94e1ba4354c075a4577b88/fixedpoint/fixedpoint.h#L237
pub fn rounding_half_sum(a: i32, b: i32) -> i32 {
    let sum = (a as i64) + (b as i64);
    let sign: i64 = if sum >= 0 { 1 } else { -1 };
    ((sum + sign) / 2) as i32
}

pub fn mask_if_non_zero(x: i32) -> i32 {
    if x != 0 { !0 } else { 0 }
}

pub fn mask_if_zero(x: i32) -> i32 {
    if x == 0 { !0 } else { 0 }
}

pub fn select_using_mask(mask: i32, a: i32, b: i32) -> i32 {
    (mask & a) ^ (!mask & b)
}

pub fn saturating_rounding_doubling_high_mul(a: i32, b: i32) -> i32 {
    let overflow = a == b && a == i32::MIN;
    let product = (a as i64) * (b as i64);

    let nudge = if product >= 0 { 1 << 30 } else { 1 - (1 << 30) };
    let product_x2_high32 = ((product + nudge) / (1_i64 << 31)) as i32;

    if overflow { i32::MAX } else { product_x2_high32 }
}

pub fn is_signed<T: PrimInt>() -> bool {
    let mv = T::min_value();
    let z = T::zero();
    mv < z
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_rounding_divide_by_pot_1() {
        let x = 128;
        let res = rounding_divide_by_pot(x, 2);
        assert_eq!(res, 32);
    }

    #[test]
    fn test_rounding_divide_by_pot_2() {
        let x = 129;
        let res = rounding_divide_by_pot(x, 2);
        assert_eq!(res, 32);
    }

    #[test]
    fn test_rounding_half_sum_1() {
        let a = 22;
        let b = 22;
        let res = rounding_half_sum(a, b);
        assert_eq!(res, 22)
    }

    #[test]
    fn test_rounding_half_sum_2() {
        let a = 6; // 0.75 in Q28.3
        let b = 1_i32 << 3; // 1.0 in Q28.3
        let expected_res = 7; // (1.75 / 2 = 0.875) in Q28.3
        let res = rounding_half_sum(a, b);
        assert_eq!(res, expected_res)
    }

    #[test]
    fn test_rounding_half_sum_3() {
        let a = 1610612736; // 0.75 in Q0.31
        let b = i32::MAX; // 1.0 in Q0.31
        let expected_res = 1879048192; // (1.75 / 2 = 0.875) in Q0.31
        let res = rounding_half_sum(a, b);
        assert_eq!(res, expected_res)
    }

    #[test]
    fn test_saturating_rounding_doubling_high_mul() {
        let a: i32 = 1879048192; // (1.75 / 2 = 0.875) in Q0.31
        let b: i32 = -631612838; //
        let expected_res = -552661233;
        let res = saturating_rounding_doubling_high_mul(a, b);
        assert_eq!(res, expected_res);
    }
}


================================================
FILE: core/src/ops/nn/softmax/mod.rs
================================================
mod fixedpoint;
pub mod math;

use math::{
    convert_scale_to_mult_shift, exp_on_negative_values, get_reciprocal, rescale,
    rounding_divide_by_pot, saturating_rounding_doubling_high_mul,
    saturating_rounding_multiply_by_pot,
};
use num_traits::Float;
use std::fmt::Debug;
use tract_num_traits::Zero;

use crate::internal::*;
use ndarray::prelude::*;

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub enum SoftmaxKind {
    Softmax(SoftmaxExp),
    LogSoftmax,
}

impl Default for SoftmaxKind {
    fn default() -> Self {
        SoftmaxKind::Softmax(SoftmaxExp::default())
    }
}

#[derive(Debug, Copy, Clone, Hash, Default, PartialEq, Eq)]
pub enum SoftmaxExp {
    #[default]
    Libc,
    // https://nic.schraudolph.org/pubs/Schraudolph99.pdf
    FastCompact,
}

#[derive(Debug, Clone, new, Hash, Default, PartialEq, Eq)]
pub struct Softmax {
    pub axes: TVec<usize>,
    pub quant_output_dt: Option<DatumType>,
    pub kind: SoftmaxKind,
}

impl Op for Softmax {
    fn name(&self) -> StaticName {
        match self.kind {
            SoftmaxKind::Softmax(_) => "Softmax".into(),
            SoftmaxKind::LogSoftmax => "LogSoftmax".into(),
        }
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut infos = vec![format!("Axis: {:?}", self.axes)];
        if let SoftmaxKind::Softmax(exp) = self.kind {
            infos.push(format!("Exp impl: {exp:?}"))
        };
        Ok(infos)
    }

    op_as_typed_op!();
}

impl TypedOp for Softmax {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let dt = inputs[0].datum_type;
        if dt.is_float() {
            ensure!(
                self.quant_output_dt.is_none(),
                "Float softmax should not have quant_output_dt, have {:?}",
                self.quant_output_dt
            );
        } else if dt.is_quantized() {
            ensure!(
                self.quant_output_dt.map(|q| q.is_quantized()).unwrap_or(false),
                "Quantized softmax should have a quantized output type (got {:?})",
                self.quant_output_dt
            );
        } else {
            bail!(
                "Unsupported datum type in softmax: input type {:?}, output type {:?}",
                dt,
                self.quant_output_dt
            );
        }

        let fact = self.quant_output_dt.unwrap_or(dt).fact(inputs[0].shape.clone());
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let axes: Option<TVec<usize>> =
            self.axes.iter().map(|it| change.transform_axis(*it)).collect();
        if let Some(axes) = axes {
            Ok(Some(AxisChangeConsequence::new(
                model,
                node,
                Some(Box::new(Softmax { axes, ..self.clone() })),
                change,
            )))
        } else {
            Ok(None)
        }
    }

    as_op!();
}

impl EvalOp for Softmax {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let dt = input.datum_type();

        let output = match dt {
            DatumType::F64 => self.eval_t::<f64>(input)?,
            DatumType::F32 => self.eval_t::<f32>(input)?,
            DatumType::F16 => self.eval_t::<f16>(input)?,
            DatumType::QI8(_) | DatumType::QU8(_) => self.eval_quant(input)?,
            dt => bail!("Unsupported type {dt:?}"),
        };
        Ok(output)
    }
}

impl Softmax {
    fn eval_t<T>(&self, input: TValue) -> TractResult<TVec<TValue>>
    where
        T: Float + Datum + std::iter::Sum,
    {
        let mut iterating_shape: TVec<usize> = input.shape().into();

        for i in 0..iterating_shape.len() {
            if self.axes.contains(&i) {
                iterating_shape[i] = 1
            }
        }

        let mut output = input.into_tensor();
        let mut output_plain = output.try_as_plain_mut()?;
        let mut view = output_plain.to_array_view_mut::<T>()?;

        for it_coords in tract_ndarray::indices(&*iterating_shape) {
            let mut view = view.view_mut();
            for ix in 0..iterating_shape.len() {
                if !self.axes.contains(&ix) {
                    view.collapse_axis(Axis(ix), it_coords[ix]);
                }
            }
            if let Some(slice) =
                view.as_slice_mut().filter(|_| T::datum_type() == f32::datum_type())
            {
                let slice: &mut [f32] = unsafe { std::mem::transmute(slice) };
                self.softmax_inner_slice_f32(slice, self.kind)?;
            } else if let Some(slice) =
                view.as_slice_mut().filter(|_| T::datum_type() == f16::datum_type())
            {
                let slice: &mut [f16] = unsafe { std::mem::transmute(slice) };
                self.softmax_inner_slice_f16(slice, self.kind)?;
            } else {
                softmax_inner(view, self.kind);
            }
        }

        Ok(tvec!(output.into_tvalue()))
    }

    fn eval_quant(&self, input: TValue) -> TractResult<TVec<TValue>> {
        if self.kind == SoftmaxKind::LogSoftmax {
            bail!("Quantized LogSoftmax is not supported")
        }
        let mut iterating_shape: TVec<usize> = input.shape().into();
        let output_dt =
            self.quant_output_dt.context("Quandized softmax eval with no output type")?;

        for i in 0..iterating_shape.len() {
            if self.axes.contains(&i) {
                iterating_shape[i] = 1
            }
        }

        // All operations will be done in u8, we will cast the result appropriately afterward.
        let src_is_signed = input.datum_type().is_signed();
        let out_is_signed = output_dt.is_signed();
        let in_qp = input.datum_type().qparams().unwrap(); // Checked as we are in the quant case
        let out_qp = output_dt.qparams().unwrap(); // Checked as we are in the quant case
        let mut output = unsafe { input.into_tensor().into_array_unchecked::<u8>() };

        for it_coords in tract_ndarray::indices(&*iterating_shape) {
            let mut view = output.view_mut();
            for ix in 0..iterating_shape.len() {
                if !self.axes.contains(&ix) {
                    view.collapse_axis(Axis(ix), it_coords[ix]);
                }
            }
            softmax_quant_inner(view, src_is_signed, in_qp, out_is_signed, out_qp);
        }

        let mut output_tensor = output.into_tensor();
        unsafe { output_tensor.set_datum_type(output_dt) };
        Ok(tvec!(output_tensor.into_tvalue()))
    }

    fn softmax_inner_slice_f16(&self, slice: &mut [f16], kind: SoftmaxKind) -> TractResult<()> {
        let max = (tract_linalg::ops().max_f16)().run(slice)?;
        match kind {
            SoftmaxKind::Softmax(exp_impl) => {
                let sum = match exp_impl {
                    SoftmaxExp::Libc => {
                        let mut s = f16::zero();
                        slice.iter_mut().for_each(|x| {
                            *x = (*x - max).exp();
                            s += *x;
                        });
                        s
                    }
                    SoftmaxExp::FastCompact => (tract_linalg::ops().softmax2_fastcompact_f16)()
                        .run_with_params(slice, max)?,
                };
                let rsum = sum.recip();
                (tract_linalg::ops().mul_by_scalar_f16)().run_with_params(slice, rsum)?;
            }
            SoftmaxKind::LogSoftmax => {
                let mut exp_sum = f16::zero();
                slice.iter_mut().for_each(|x| {
                    *x -= max;
                    exp_sum += x.exp();
                });
                let log_sum = exp_sum.ln();
                slice.iter_mut().for_each(|x| *x -= log_sum);
            }
        }
        Ok(())
    }

    fn softmax_inner_slice_f32(&self, slice: &mut [f32], kind: SoftmaxKind) -> TractResult<()> {
        let max = (tract_linalg::ops().max_f32)().run(slice)?;
        match kind {
            SoftmaxKind::Softmax(exp_impl) => {
                let sum = match exp_impl {
                    SoftmaxExp::Libc => {
                        let mut s = f32::zero();
                        slice.iter_mut().for_each(|x| {
                            *x = (*x - max).exp();
                            s += *x;
                        });
                        s
                    }
                    SoftmaxExp::FastCompact => (tract_linalg::ops().softmax2_fastcompact_f32)()
                        .run_with_params(slice, max)?,
                };
                let rsum = sum.recip();
                (tract_linalg::ops().mul_by_scalar_f32)().run_with_params(slice, rsum)?;
            }
            SoftmaxKind::LogSoftmax => {
                let mut exp_sum = f32::zero();
                slice.iter_mut().for_each(|x| {
                    *x -= max;
                    exp_sum += x.exp();
                });
                let log_sum = exp_sum.ln();
                slice.iter_mut().for_each(|x| *x -= log_sum);
            }
        }
        Ok(())
    }
}

fn softmax_inner<T: Float + Datum + std::iter::Sum, D: Dimension>(
    mut view: ArrayViewMut<T, D>,
    kind: SoftmaxKind,
) {
    let max =
        *view.iter().max_by(|i, j| i.partial_cmp(j).unwrap_or(std::cmp::Ordering::Less)).unwrap();
    view.mapv_inplace(|x| x - max);
    let exp_sum = view.iter().map(|&x| x.exp()).sum();
    match kind {
        SoftmaxKind::Softmax(_) => {
            view.mapv_inplace(|x| x.exp() / exp_sum);
        }
        SoftmaxKind::LogSoftmax => {
            let log_sum = exp_sum.ln();
            view.mapv_inplace(|x| x - log_sum);
        }
    }
}

fn softmax_quant_inner<D: Dimension>(
    mut view: ArrayViewMut<u8, D>,
    src_is_signed: bool,
    in_qp: QParams,
    out_is_signed: bool,
    out_qp: QParams,
) {
    let (_, in_scale) = in_qp.zp_scale();
    let (scale_in_multiplier, scale_in_shift) = convert_scale_to_mult_shift(in_scale).unwrap();
    let (_, out_scale) = out_qp.zp_scale();
    let (scale_out_multiplier, scale_out_shift) = convert_scale_to_mult_shift(out_scale).unwrap();
    let shift = 26 - scale_in_shift;

    // Compute the exponentials x - max
    let mut buffer = vec![0_i32; view.len()];

    // Handle the case were we considered an i8 as an u8 and still get the right x - max.
    let safe_u8 = if src_is_signed { |x: &u8| x.wrapping_add(128) } else { |x: &u8| *x };

    let max = view.iter().map(safe_u8).max().unwrap();
    view.iter().zip(buffer.iter_mut()).for_each(|(x, exp)| {
        let input_diff = safe_u8(x) as i32 - max as i32;

        // We scale the input to be in Q5_26
        let scaled_input_diff = if scale_in_multiplier != 0 {
            saturating_rounding_multiply_by_pot(
                saturating_rounding_doubling_high_mul(input_diff, scale_in_multiplier),
                shift as i32,
            )
        } else {
            saturating_rounding_multiply_by_pot(input_diff, shift as i32)
        };

        // It expects an input from Q5_26 and returns an output in Q0_31
        *exp = exp_on_negative_values(scaled_input_diff);
    });

    // Compute sum of exp
    // The sum is stored as an Q12_19 that's why we need to recale from Q0_31 to Q12_19 before summing.
    let sum_of_exp = buffer.iter().map(|it| rescale(*it, 0, 12)).sum();

    // Compute 1/sum_of_exp
    // The result of this function is in Q0_31
    let (inv_sum_of_exp, num_bits_over_unit) = get_reciprocal(sum_of_exp, 12);

    // Compute the exponent value needed to be in Q24_8 before the final rescaling
    let exponent = num_bits_over_unit as isize + 31 - 8;

    view.iter_mut().zip(buffer.iter()).for_each(|(it, exp)| {
        // Compute the product of exp * 1/sum_of_exp and scale the result in Q24_8
        let unsat_output = rounding_divide_by_pot(
            saturating_rounding_doubling_high_mul(inv_sum_of_exp, *exp),
            exponent as i32,
        );

        // Scale the final result in the output scale range
        let unsat_scaled_output = {
            if scale_out_multiplier != 0 {
                let (inv_multiplier, num_bits) = get_reciprocal(scale_out_multiplier, 1);
                rounding_divide_by_pot(
                    saturating_rounding_doubling_high_mul(unsat_output, inv_multiplier),
                    (8 - scale_out_shift - 1 - num_bits as isize) as i32,
                )
            } else {
                rounding_divide_by_pot(unsat_output, (8 - scale_out_shift) as i32)
            }
        };

        // Return the final result by clipping the computed value within its range
        // and casting it to u8 in any case.
        #[allow(unknown_lints, unnecessary_transmutes)]
        if out_is_signed {
            *it = unsafe {
                std::mem::transmute::<i8, u8>(i32::max(
                    i32::min(unsat_scaled_output, i8::MAX as i32),
                    i8::MIN as i32,
                ) as i8)
            };
        } else {
            *it = i32::max(i32::min(unsat_scaled_output, u8::MAX as i32), u8::MIN as i32) as u8;
        }
    });
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::ops::nn::DataFormat::NCHW;
    use anyhow::Result;
    use num_traits::PrimInt;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_data::internal::QParams::ZpScale;

    fn assert_is_close(found: f32, expected: f32, in_dt: DatumType, out_dt: DatumType) {
        let (_, in_epsilon) = in_dt.zp_scale();
        let (_, out_epsilon) = out_dt.zp_scale();
        let epsilon = in_epsilon + out_epsilon;
        let error = (found - expected).abs();
        assert!(
            error <= epsilon,
            "epsilon eq failed: |{found:?}-{expected:?}|={error} should be <= {epsilon}"
        );
    }

    // Generate a random tensor with a quantized datum type
    fn qtensor<T: PrimInt + Datum + Arbitrary>(shape: Vec<usize>) -> BoxedStrategy<Tensor> {
        let len = shape.iter().product::<usize>();
        let dt = q_datum::<T>((0.0001f32..0.1).boxed());
        (vec(any::<T>(), len..=len), dt)
            .prop_map(move |(vec, dt)| (ArrayD::from_shape_vec(shape.clone(), vec).unwrap(), dt))
            .prop_map(move |(array, dt)| {
                let mut tensor = array.into_tensor();
                unsafe { tensor.set_datum_type(dt) };
                tensor
            })
            .boxed()
    }

    // Generate a random quantized datum type
    fn q_datum<T: PrimInt + Datum>(range: BoxedStrategy<f32>) -> BoxedStrategy<DatumType> {
        let max_integer_bits = std::mem::size_of::<T>() * 8 - T::datum_type().is_signed() as usize;
        prop_oneof![
            (1usize..max_integer_bits).prop_map(|fixed_point| { 2f32.powi(-(fixed_point as i32)) }),
            range
        ]
        .prop_map(|scale| {
            if T::datum_type().is_signed() {
                DatumType::QI8(ZpScale { zero_point: 0, scale })
            } else {
                DatumType::QU8(ZpScale { zero_point: 0, scale })
            }
        })
        .boxed()
    }

    #[derive(Debug)]
    struct SoftmaxProblem {
        data: Tensor,
        axes: TVec<usize>,
        output_dt: DatumType,
    }

    impl SoftmaxProblem {
        fn check(&self) -> Result<()> {
            let inputs = tvec!(self.data.clone().into_tvalue());
            let quant_output_dt = Some(self.output_dt).filter(|dt| !dt.is_float());
            let softmax =
                Softmax { axes: self.axes.clone(), quant_output_dt, ..Softmax::default() };

            // Compute quantized output
            let result = softmax.eval(inputs)?;
            let result = args_1!(result);
            let result_float = result.cast_to::<f32>()?;

            // Compute reference output
            let input_float = self.data.cast_to::<f32>()?;
            let inputs_float = tvec!(input_float.into_owned().into_tvalue());
            let softmax_float = Softmax { axes: self.axes.clone(), ..Softmax::default() };
            let reference_float = softmax_float.eval(inputs_float)?;
            let reference_array = args_1!(reference_float);
            let reference = reference_array.to_plain_array_view::<f32>()?;

            result_float
                .to_plain_array_view::<f32>()?
                .iter()
                .zip(reference.iter())
                .for_each(|(a, b)| assert_is_close(*a, *b, self.data.datum_type(), self.output_dt));
            Ok(())
        }
    }

    impl Arbitrary for SoftmaxProblem {
        type Parameters = ();
        type Strategy = BoxedStrategy<SoftmaxProblem>;
        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
            (1usize..2, 1usize..2, 1usize..5, 1usize..5, 0usize..4)
                .prop_flat_map(|(n, c, h, w, axis)| {
                    let shape_in: Vec<usize> =
                        NCHW.from_n_c_hw(n, c, [h, w]).unwrap().shape.to_vec();
                    (
                        prop_oneof![qtensor::<i8>(shape_in.clone()), qtensor::<u8>(shape_in)],
                        Just(tvec![axis]),
                        prop_oneof![
                            q_datum::<u8>((0.008f32..0.1).boxed()),
                            q_datum::<i8>((0.008f32..0.1).boxed())
                        ],
                    )
                })
                .prop_map(|(data, axes, output_dt)| SoftmaxProblem { data, axes, output_dt })
                .boxed()
        }
    }

    #[derive(Debug)]
    pub struct InnerSoftmaxProblem {
        in_qp: QParams,
        out_qp: QParams,
        data: Vec<i8>,
    }

    impl InnerSoftmaxProblem {
        fn check(&self) -> Result<()> {
            let quantized = self.quantized();
            let reference = self.reference();
            assert!(quantized.iter().zip(reference.iter()).all(|(quantized, expected)| {
                let abs_diff = if *quantized > *expected {
                    quantized - *expected
                } else {
                    expected - *quantized
                };
                abs_diff <= 1
            }));
            Ok(())
        }

        fn reference(&self) -> Vec<u8> {
            let (in_zero_point, in_scale) = self.in_qp.zp_scale();
            let (out_zero_point, out_scale) = self.out_qp.zp_scale();
            let in_float =
                self.data.iter().map(|it| (*it as f32 - in_zero_point as f32) * in_scale).collect();
            let mut in_float_array = Array1::from_vec(in_float);
            softmax_inner(in_float_array.view_mut(), SoftmaxKind::default());
            let rescaled_output = in_float_array
                .iter()
                .map(|it| {
                    ((*it / out_scale).round() as i32 + out_zero_point)
                        .max(u8::MIN as i32)
                        .min(u8::MAX as i32) as u8
                })
                .collect();
            rescaled_output
        }

        fn quantized(&self) -> Vec<u8> {
            let in_data: Vec<u8> = unsafe { std::mem::transmute(self.data.clone()) };
            let mut in_array = Array1::from_vec(in_data);
            softmax_quant_inner(in_array.view_mut(), true, self.in_qp, false, self.out_qp);
            in_array.to_vec()
        }
    }

    impl Arbitrary for InnerSoftmaxProblem {
        type Parameters = ();
        type Strategy = BoxedStrategy<InnerSoftmaxProblem>;
        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
            (
                prop_oneof![
                    q_datum::<i8>((0.0001f32..0.01).boxed()),
                    q_datum::<u8>((0.0001f32..0.01).boxed())
                ],
                prop_oneof![
                    q_datum::<u8>((0.008f32..0.1).boxed()),
                    q_datum::<i8>((0.008f32..0.1).boxed())
                ],
                vec(any::<i8>(), 1..10),
            )
                .prop_map(|(in_qp, out_qp, data)| InnerSoftmaxProblem {
                    in_qp: in_qp.qparams().unwrap(),
                    out_qp: out_qp.qparams().unwrap(),
                    data,
                })
                .boxed()
        }
    }

    proptest::proptest! {
        #![proptest_config(ProptestConfig::with_cases(1000))]
        #[test]
        fn test_softmax_inner_prop(pb in any::<InnerSoftmaxProblem>()) {
            pb.check().unwrap()
        }
    }

    proptest::proptest! {
        #![proptest_config(ProptestConfig::with_cases(1000))]
        #[test]
        fn test_softmax_prop(pb in any::<SoftmaxProblem>()) {
            pb.check().unwrap()
        }
    }

    #[test]
    // We test QU8 -> QU8
    fn test_softmax_trivial_0() -> Result<()> {
        let input_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.03125 }); // Q3_5
        let output_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.00390625 }); // Q0_8;
        let mut data = Tensor::from_shape(&[1, 1, 2, 2], &[0_u8, 0, 0, 4])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![3], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    // We test QI8 -> QU8
    fn test_softmax_trivial_1() -> Result<()> {
        let input_dt = DatumType::QI8(ZpScale { zero_point: 0, scale: 0.0625 }); // Q3_4
        let output_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.00390625 }); // Q0_8;
        let mut data = Tensor::from_shape(&[1, 1, 2, 2], &[0_i8, 0, 0, 4])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![3], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    // We test QI8 -> QI8
    fn test_softmax_trivial_2() -> Result<()> {
        let input_dt = DatumType::QI8(ZpScale { zero_point: 0, scale: 0.0625 }); // Q3_4
        let output_dt = DatumType::QI8(ZpScale { zero_point: 0, scale: 0.0078125 }); // Q0_7;
        let mut data = Tensor::from_shape(&[1, 1, 2, 2], &[0_i8, 0, 0, -4])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![3], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    // We test QU8 -> QI8
    fn test_softmax_trivial_3() -> Result<()> {
        let input_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.03125 }); // Q3_5
        let output_dt = DatumType::QI8(ZpScale { zero_point: 0, scale: 0.0078125 }); // Q0_7;
        let mut data = Tensor::from_shape(&[1, 1, 2, 2], &[0_u8, 0, 0, 4])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![2], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    fn test_softmax_1() -> Result<()> {
        let input_dt = DatumType::QI8(ZpScale { zero_point: 0, scale: 0.5 }); // Q6_1
        let output_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.5 }); // Q7_1
        let mut data = Tensor::from_shape(&[1, 1, 1, 2], &[115_i8, 115])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![3], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    fn test_softmax_2() -> Result<()> {
        let input_dt = DatumType::QI8(ZpScale { zero_point: 0, scale: 0.0001 });
        let output_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.008 });
        let mut data = Tensor::from_shape(&[1, 1, 1, 2], &[115_i8, 115])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![3], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    fn test_softmax_3() -> Result<()> {
        let input_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.6220956 });
        let output_dt = DatumType::QU8(ZpScale { zero_point: 0, scale: 0.5187921 });
        let mut data = Tensor::from_shape(&[1, 1, 1, 2], &[13_u8, 218])?;
        unsafe { data.set_datum_type(input_dt) };

        let prob = SoftmaxProblem { data, axes: tvec![3], output_dt };
        prob.check()?;
        Ok(())
    }

    #[test]
    fn test_inner_softmax_1() -> Result<()> {
        let in_qp = ZpScale { zero_point: 0, scale: 0.03125 };
        let out_qp = ZpScale { zero_point: 0, scale: 0.5 };
        let data = vec![0_i8, 1];

        let prob = InnerSoftmaxProblem { in_qp, out_qp, data };
        prob.check()?;
        Ok(())
    }

    #[test]
    fn test_inner_softmax_2() -> Result<()> {
        let in_qp = ZpScale { zero_point: 0, scale: 0.5 };
        let out_qp = ZpScale { zero_point: 0, scale: 0.03125 };
        let data = vec![100i8, -28];

        let prob = InnerSoftmaxProblem { in_qp, out_qp, data };
        prob.check()?;
        Ok(())
    }

    #[test]
    fn test_inner_softmax_not_pow_2_1() -> Result<()> {
        let in_qp = ZpScale { zero_point: 0, scale: 0.7298456 };
        let out_qp = ZpScale { zero_point: 0, scale: 0.03125 };
        let data = vec![100i8, -28];

        let prob = InnerSoftmaxProblem { in_qp, out_qp, data };
        prob.check()?;
        Ok(())
    }

    #[test]
    #[ignore]
    // Fails but the difference is quite low and the sum still give exactly one:
    // quantized: 110(0.88), 15(0.12)
    // expected: 112(0.896), 13(0.104)
    fn test_inner_softmax_not_pow_2_2() -> Result<()> {
        let in_qp = ZpScale { zero_point: 0, scale: 0.2123116 };
        let out_qp = ZpScale { zero_point: 0, scale: 0.008 };
        let data = vec![118i8, 108];

        let prob = InnerSoftmaxProblem { in_qp, out_qp, data };
        prob.check()?;
        Ok(())
    }

    #[test]
    #[ignore]
    // Fails but the difference is quite low and the sum still give exactly one:
    // quantized: 40(0.625), 24(0.375)
    // expected: 42(0.65625), 22(0.34375)
    fn test_inner_softmax_not_pow_2_3() -> Result<()> {
        let in_qp = ZpScale { zero_point: 0, scale: 0.33034274 };
        let out_qp = ZpScale { zero_point: 0, scale: 0.015625 };
        let data = vec![45i8, 43];

        let prob = InnerSoftmaxProblem { in_qp, out_qp, data };
        prob.check()?;
        Ok(())
    }
}


================================================
FILE: core/src/ops/quant.rs
================================================
#![allow(clippy::unnecessary_cast)]

use crate::internal::*;
use crate::ops::element_wise::ElementWiseOp;
use crate::ops::math::QScale;
use num_traits::AsPrimitive;
use tract_linalg::Scaler;
use tract_linalg::lut::Lut;
use tract_linalg::mmm::RoundingPolicy;

use super::binary::TypedBinOp;
use super::math::round_ties_to_even;

pub fn quantize_linear_f32_u8(x: f32, scale: f32, zero_point: i32) -> u8 {
    (((x * scale).round() as i32) + zero_point).clamp(u8::MIN as i32, u8::MAX as i32) as u8
}

pub fn quantize_linear_f32_i8(x: f32, scale: f32, zero_point: i32) -> i8 {
    (((x * scale).round() as i32) + zero_point).clamp(i8::MIN as i32, i8::MAX as i32) as i8
}

element_wise_oop!(quantize_linear_u8,
 QuantizeLinearU8 {
     scale: f32,
     zero_point: u8
 },
 [f16] => u8 |op, xs, ys| {
     xs.iter().zip(ys.iter_mut()).for_each(|(x,y)|
                                           *y = quantize_linear_f32_u8(x.to_f32(), op.scale, op.zero_point as i32)
                                          );
     Ok(())
 },
 [f32,i32] => u8 |op, xs, ys| {
     xs.iter().zip(ys.iter_mut()).for_each(|(x,y)|
                                           *y = quantize_linear_f32_u8(*x as f32, op.scale, op.zero_point as i32)
                                          );
     Ok(())
 };
 info: info_quantize_linear_u8
);

fn info_quantize_linear_u8(q: &QuantizeLinearU8) -> TractResult<Vec<String>> {
    Ok(vec![format!(
        "scale: {} zero_point: {} 1/scale: {}",
        q.scale,
        q.zero_point,
        q.scale.recip()
    )])
}

element_wise_oop!(quantize_linear_i8,
 QuantizeLinearI8 {
     scale: f32,
     zero_point: i8
 },
 [f32,i32] => i8 |op, xs, ys| {
     xs.iter().zip(ys.iter_mut()).for_each(|(x,y)|
                                           *y = quantize_linear_f32_i8(*x as f32, op.scale, op.zero_point as i32)
                                          );
     Ok(())
 };
 info: info_quantize_linear_i8
);

fn info_quantize_linear_i8(q: &QuantizeLinearI8) -> TractResult<Vec<String>> {
    Ok(vec![format!(
        "scale: {} zero_point: {} 1/scale: {}",
        q.scale,
        q.zero_point,
        q.scale.recip()
    )])
}

#[derive(Clone, Debug, new, PartialEq)]
pub struct DequantizeLinearF32 {
    pub scale: f32,
    pub zero_point: i32,
}

impl Eq for DequantizeLinearF32 {}

impl DequantizeLinearF32 {
    fn eval_t<T: Datum + AsPrimitive<i32>>(&self, input: &Tensor) -> TractResult<Tensor> {
        let mut output = unsafe { Tensor::uninitialized::<f32>(input.shape())? };
        input
            .try_as_plain()?
            .as_slice::<T>()?
            .iter()
            .zip(output.try_as_plain_mut()?.as_slice_mut::<f32>()?.iter_mut())
            .for_each(|(x, y)| *y = (x.as_() - self.zero_point) as f32 * self.scale);
        Ok(output)
    }
}

impl Op for DequantizeLinearF32 {
    fn name(&self) -> StaticName {
        "DequantizeLinearF32".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("scale: {} zero_point: {}", self.scale, self.zero_point)])
    }

    fn validation(&self) -> Validation {
        Validation::Accurate
    }

    op_as_typed_op!();
}

impl EvalOp for DequantizeLinearF32 {
    fn is_stateless(&self) -> bool {
        true
    }
    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let output = match inputs[0].datum_type() {
            DatumType::I8 => self.eval_t::<i8>(&inputs[0])?,
            DatumType::I32 => self.eval_t::<i32>(&inputs[0])?,
            DatumType::U8 => self.eval_t::<u8>(&inputs[0])?,
            dt => bail!("Unsupported type {:?}", dt),
        };
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for DequantizeLinearF32 {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].clone();
        fact.datum_type = f32::datum_type();
        Ok(tvec!(fact))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        dequant: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut current = dequant;
        let incoming_dt = model.node_input_facts(dequant.id)?[0].datum_type;
        while let Some(quant) = model.single_succ(current.id)? {
            let q_params = if let Some(op) = quant.op_as::<ElementWiseOp>() {
                if let Some(mop) = op.0.downcast_ref::<QuantizeLinearU8>() {
                    Some((mop.scale, mop.zero_point as i32, u8::datum_type()))
                } else {
                    op.0.downcast_ref::<QuantizeLinearI8>()
                        .map(|mop| (mop.scale, mop.zero_point as i32, i8::datum_type()))
                }
            } else {
                None
            };
            if let Some((scale, zero_point, dt)) = q_params {
                // first, try Op::quantize() on all ops in the chain
                let mut patch = TypedModelPatch::default();
                let mut wire: OutletId = patch.tap_model(model, dequant.inputs[0])?;
                let mut next = model.single_succ(dequant.id)?.unwrap();
                loop {
                    if let Some(op) = next
                        .op
                        .quantize(model, dequant, dt, scale, zero_point)
                        .with_context(|| format!("Quantizing {next}"))?
                    {
                        wire = patch.wire_node(&*next.name, op, [wire].as_ref())?[0];
                    } else {
                        break;
                    }
                    if next.id == current.id {
                        patch.shunt_outside(model, OutletId::new(quant.id, 0), wire)?;
                        return Ok(Some(patch));
                    } else {
                        next = model.single_succ(next.id)?.unwrap();
                    }
                }
                // or else make a lookup table
                if incoming_dt == DatumType::I8 || incoming_dt == DatumType::U8 {
                    let mut adhoc_model = TypedModel::default();
                    let mut wire = adhoc_model.add_source("ad-hoc", dt.fact([256]))?;
                    let mut next = model.single_succ(dequant.id)?.unwrap();
                    let mut name = None;
                    // plug in dequant
                    wire = adhoc_model.wire_node(
                        &*dequant.name,
                        dequant.op.clone(),
                        [wire].as_ref(),
                    )?[0];
                    while next.id != quant.id {
                        name.get_or_insert(&*next.name);
                        wire =
                            adhoc_model.wire_node(&*next.name, next.op.clone(), [wire].as_ref())?
                                [0];
                        next = model.single_succ(next.id)?.unwrap();
                    }
                    // plug in quant
                    wire =
                        adhoc_model.wire_node(&*quant.name, quant.op.clone(), [wire].as_ref())?[0];
                    adhoc_model.select_output_outlets(&[wire])?;
                    let input = (0u8..=255).collect::<Vec<u8>>();
                    let input = match dt {
                        DatumType::I8 => unsafe {
                            tensor1(std::mem::transmute::<&[u8], &[i8]>(&*input))
                        },
                        DatumType::U8 => tensor1(&input),
                        _ => unreachable!(),
                    };
                    let output =
                        SimplePlan::new(adhoc_model)?.run(tvec!(input.into_tvalue()))?.remove(0);
                    let table: &[u8] = match dt {
                        DatumType::I8 => unsafe {
                            std::mem::transmute::<&[i8], &[u8]>(
                                output.try_as_plain()?.as_slice::<i8>()?,
                            )
                        },
                        DatumType::U8 => output.try_as_plain()?.as_slice::<u8>()?,
                        _ => unreachable!(),
                    };
                    let op = lookup_table((tract_linalg::ops().lut_u8)(table));
                    let mut patch = TypedModelPatch::default();
                    let mut wire: OutletId = patch.tap_model(model, dequant.inputs[0])?;

                    wire = patch.wire_node(name.unwrap_or(&*dequant.name), op, [wire].as_ref())?[0];
                    patch.shunt_outside(model, OutletId::new(quant.id, 0), wire)?;
                    return Ok(Some(patch));
                }
            }
            let (input_facts, output_facts) = model.node_facts(quant.id)?;
            let invariants = quant
                .op
                .axes_mapping(&input_facts, &output_facts)
                .with_context(|| format!("Querying invariants for {quant}"))?;
            if invariants.is_element_wise_unary() {
                current = quant;
            } else {
                break;
            }
        }
        Ok(None)
    }

    as_op!();
}

element_wise_oop!(lookup_table,
 LookupTable {
     table: Box<dyn Lut>
 },
 [i8] => i8 |op, xs, ys| {
     ys.copy_from_slice(xs);
     unsafe {
         let casted = std::slice::from_raw_parts_mut(ys.as_mut_ptr() as *mut u8, ys.len());
         op.table.run(casted);
     }
     Ok(())
 },
 [u8] => u8 |op, xs, ys| {
     ys.copy_from_slice(xs);
     op.table.run(ys);
     Ok(())
 }
);

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Scale;

impl crate::ops::binary::BinMiniOp for Scale {
    fn name(&self) -> &'static str {
        "Scale"
    }
    fn result_datum_type(&self, a: DatumType, b: DatumType) -> TractResult<DatumType> {
        if !a.is_float() {
            bail!("Scale left operand must be float, got {:?}", a);
        }
        Ok(b)
    }

    fn operating_datum_type(&self, a: DatumType, b: DatumType) -> TractResult<DatumType> {
        if !a.is_float() {
            bail!("Scale left operand must be float, got {:?}", a);
        }
        Ok(b)
    }

    fn eval_out_of_place(&self, c: &mut Tensor, a: &Tensor, b: &Tensor) -> TractResult<()> {
        let a = a.cast_to::<f32>()?;
        let a = a.to_plain_array_view::<f32>()?;
        unsafe fn eval_out_of_place_t<T: Datum + AsPrimitive<f32>>(
            c: &mut Tensor,
            a: &ndarray::ArrayViewD<f32>,
            b: &Tensor,
        ) where
            f32: AsPrimitive<T>,
        {
            let b = unsafe { b.to_array_view_unchecked::<T>() };
            let mut c = unsafe { c.to_array_view_mut_unchecked::<T>() };
            ndarray::Zip::from(&mut c)
                .and_broadcast(a)
                .and_broadcast(b)
                .for_each(|c, a, b| *c = scale_by(*b, *a))
        }
        unsafe { dispatch_numbers!(eval_out_of_place_t(b.datum_type())(c, &a, b)) }
        Ok(())
    }

    fn eval_in_a(&self, a: &mut Tensor, b: &Tensor) -> TractResult<()> {
        let mut a_plain = a.try_as_plain_mut()?;
        let a = a_plain.to_array_view_mut::<f32>()?;
        let b = b.to_plain_array_view::<f32>()?;
        ndarray::Zip::from(a).and_broadcast(b).for_each(|a, b| *a = scale_by(*b, *a));
        Ok(())
    }

    fn is_commutative(&self) -> bool {
        false
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let a = model.outlet_fact(node.inputs[0])?;
        if let Some(a) = &a.uniform {
            if a.cast_to_scalar::<f32>()? == 1. {
                return Ok(Some(TypedModelPatch::rewire(
                    model,
                    &node.inputs[1..2],
                    &[node.id.into()],
                    &|_p, x| Ok(x.into()),
                )?));
            } else if node.outputs[0].fact.datum_type == DatumType::I32 {
                let factor = a.cast_to_scalar::<f32>()?;
                let scaler = Scaler::new(factor, RoundingPolicy::Even);

                let op = ElementWiseOp(Box::new(QScale { scaler }), None);
                let patch =
                    TypedModelPatch::replace_single_op(model, node, &node.inputs[1..2], op)?;

                return Ok(Some(patch));
            }
        }
        Ok(None)
    }
}

#[inline]
pub(crate) fn scale_by<T: Datum + AsPrimitive<f32>>(b: T, a: f32) -> T
where
    f32: AsPrimitive<T>,
{
    let b = b.as_();
    (round_ties_to_even(b.abs() * a) * b.signum()).as_()
}

pub fn scale() -> TypedBinOp {
    TypedBinOp(Box::new(Scale), None)
}

/// Offsets i8 integers as u8 integers.
pub(crate) fn offset_i8_as_u8_elementwise(x: i8) -> u8 {
    (x as u8).wrapping_add(128)
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OffsetI8asU8;
impl ElementWiseMiniOp for OffsetI8asU8 {
    fn name(&self) -> String {
        format!("{}{}", self.prefix(), stringify!(OffsetI8asU8))
    }
    fn output_type(&self, input_type: DatumType) -> Option<DatumType> {
        Some(if let DatumType::QI8(qp) = input_type {
            let (zp, scale) = qp.zp_scale();
            DatumType::QU8(QParams::ZpScale { zero_point: zp + 128, scale })
        } else if input_type == DatumType::I8 {
            DatumType::U8
        } else {
            input_type
        })
    }
    fn eval_out_of_place(&self, t: &Tensor, out_dt: Option<DatumType>) -> TractResult<Tensor> {
        let output_type = out_dt.unwrap_or(self.output_type(t.datum_type()).unwrap());
        let mut dst = unsafe { Tensor::uninitialized_dt(output_type, t.shape())? };
        if t.datum_type().unquantized() == i8::datum_type() {
            t.try_as_plain()?
                .as_slice::<i8>()?
                .iter()
                .zip(dst.try_as_plain_mut()?.as_slice_mut::<u8>()?.iter_mut())
                .for_each(|(x, y)| *y = offset_i8_as_u8_elementwise(*x));
            return Ok(dst);
        }

        bail!("{} does not support {:?}", self.name(), t.datum_type());
    }
}

pub fn offset_i8_as_u8() -> ElementWiseOp {
    ElementWiseOp(Box::new(OffsetI8asU8 {}), None)
}

/// Offsets u8 integers as i8 integers.
pub(crate) fn offset_u8_as_i8_elementwise(x: u8) -> i8 {
    x.wrapping_sub(128) as i8
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OffsetU8asI8;
impl ElementWiseMiniOp for OffsetU8asI8 {
    fn name(&self) -> String {
        format!("{}{}", self.prefix(), stringify!(OffsetU8asI8))
    }
    fn output_type(&self, input_type: DatumType) -> Option<DatumType> {
        Some(if let DatumType::QU8(qp) = input_type {
            let (zp, scale) = qp.zp_scale();
            DatumType::QI8(QParams::ZpScale { zero_point: zp - 128, scale })
        } else if input_type == DatumType::U8 {
            DatumType::I8
        } else {
            input_type
        })
    }
    fn eval_out_of_place(&self, t: &Tensor, out_dt: Option<DatumType>) -> TractResult<Tensor> {
        let output_type = out_dt.unwrap_or(self.output_type(t.datum_type()).unwrap());
        let mut dst = unsafe { Tensor::uninitialized_dt(output_type, t.shape())? };
        if t.datum_type().unquantized() == u8::datum_type() {
            t.try_as_plain()?
                .as_slice::<u8>()?
                .iter()
                .zip(dst.try_as_plain_mut()?.as_slice_mut::<i8>()?.iter_mut())
                .for_each(|(x, y)| *y = offset_u8_as_i8_elementwise(*x));
            return Ok(dst);
        }

        bail!("{} does not support {:?}", self.name(), t.datum_type());
    }
}
pub fn offset_u8_as_i8() -> ElementWiseOp {
    ElementWiseOp(Box::new(OffsetU8asI8 {}), None)
}

#[cfg(test)]
pub mod scale {
    use crate::internal::*;
    use crate::ops::einsum::EinSum;
    use crate::ops::math::round_ties_to_even;
    use proptest::prelude::*;

    fn test_scale(a: i8, b: i8, scale: f32) {
        let expected = (((a as i32) * (b as i32)) as f32) / scale;
        let expected = round_ties_to_even(expected.abs()) * expected.signum();
        let expected = (expected as i32).clamp(-128, 127);
        let expected = tensor2(&[[expected as i8]]);

        let input = tvec!(tensor2(&[[b]]).into_tvalue());
        let mut model = TypedModel::default();
        let a = model.add_const("a", tensor2(&[[a]])).unwrap();
        let b = model.add_source("b", i8::fact([1, 1])).unwrap();
        let bias = model.add_const("bias", tensor0(0i32)).unwrap();
        let a0 = model.add_const("a0", tensor0(0i8)).unwrap();
        let a_scale = model.add_const("a_scale", tensor0(1f32)).unwrap();
        let b0 = model.add_const("b0", tensor0(0i8)).unwrap();
        let b_scale = model.add_const("b_scale", tensor0(1f32)).unwrap();
        let c0 = model.add_const("c0", tensor0(0i8)).unwrap();
        let c_scale = model.add_const("c_scale", tensor0(scale)).unwrap();
        let op = EinSum {
            axes: "mk,kn,,,,,,,->mn".parse().unwrap(),
            operating_dt: i32::datum_type(),
            q_params: Some(i8::datum_type()),
        };
        let output = model
            .wire_node("mmm", op, &[a, b, bias, a0, a_scale, b0, b_scale, c0, c_scale])
            .unwrap();
        model.select_output_outlets(&output).unwrap();

        let plain = model.clone().into_runnable().unwrap().run(input.clone()).unwrap();
        assert_eq!(*plain[0], expected);

        let optim = model.into_optimized().unwrap().into_runnable().unwrap().run(input).unwrap();
        assert_eq!(*optim[0], expected);
    }

    proptest! {
        #[test]
        fn prop(a in any::<i8>(), b in any::<i8>(), scale in 0.00001f32..1000.) {
            test_scale(a, b, scale);
        }
    }

    #[test]
    fn t1() {
        test_scale(-117, 15, 37.753822);
    }

    #[test]
    fn t2() {
        test_scale(-4, -60, 475.21674);
    }
}


================================================
FILE: core/src/ops/scan/decluttered.rs
================================================
use std::collections::HashSet;

use crate::ops::einsum::EinSum;
use crate::ops::konst::Const;
use crate::optim::OptimizerSession;

use super::optimized::{OptScan, ScanOpParams};
use tract_data::internal::*;
use tract_data::itertools::izip;

use super::*;

#[derive(Debug, Clone, Default)]
pub struct Scan {
    pub skip: usize,
    pub reset_every_turn: bool,
    pub body: TypedModel,
    pub decluttered: bool,
    pub input_mapping: Vec<InputMapping>,
    pub output_mapping: Vec<OutputMapping<TDim>>,
}

impl PartialEq for Scan {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
}
impl Eq for Scan {}

impl Scan {
    pub fn to_codegen_op(&self, optimize_inner: bool) -> TractResult<OptScan> {
        let mut model = self.body.clone();
        if optimize_inner {
            model = model.into_optimized()?;
        }
        let plan = SimplePlan::new(model)?;

        Ok(OptScan::new(Arc::new(ScanOpParams::new(
            self.skip,
            self.reset_every_turn,
            plan,
            self.input_mapping.clone(),
            self.output_mapping.clone(),
        ))))
    }

    pub fn new(
        body: TypedModel,
        input_mapping: Vec<InputMapping>,
        output_mapping: Vec<OutputMapping<TDim>>,
        skip: usize,
    ) -> TractResult<Scan> {
        body.check_consistency()?;
        ensure!(input_mapping.len() == body.input_outlets()?.len());
        ensure!(output_mapping.len() == body.output_outlets()?.len());
        Ok(Scan {
            skip,
            reset_every_turn: false,
            body,
            decluttered: false,
            input_mapping,
            output_mapping,
        })
    }

    pub fn iteration_count(&self, inputs: &[&TypedFact]) -> Option<TDim> {
        self.to_codegen_op(false).unwrap().iteration_count(inputs)
    }

    fn declutter_body(
        &self,
        session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        rule_if!(!self.decluttered);
        let mut new = self.clone();
        let mut body = self.body.clone();
        session.optimize(&mut body)?;
        new.body = body;
        new.decluttered = true;
        Ok(Some(TypedModelPatch::replace_single_op(model, node, &node.inputs, new)?))
    }

    fn declutter_single_loop(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let inputs = model.node_input_facts(node.id)?;
        let iters =
            super::iteration_count(&self.input_mapping, &inputs).context("No scan input")?;
        if !iters.is_one() {
            return Ok(None);
        }
        let mut patch = TypedModelPatch::new("Inline single loop scan");
        patch.model = self.body.clone();
        for (outer_wire, inner_wire) in izip!(&node.inputs, &self.body.inputs) {
            patch.taps.insert(*inner_wire, *outer_wire);
        }
        for (inner_wire, mapping) in izip!(&self.body.outputs, &self.output_mapping) {
            if let Some((slot, _)) = mapping.scan {
                patch.shunt_outside(model, (node.id, slot).into(), *inner_wire)?;
            }
            if let Some(slot) = mapping.last_value_slot {
                patch.shunt_outside(model, (node.id, slot).into(), *inner_wire)?;
            }
        }
        Ok(Some(patch))
    }

    fn declutter_body_axes(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut suggestions = vec![];
        for n in self.body.eval_order()? {
            let node = self.body.node(n);
            for suggestion in node.op.suggested_axis_changes()? {
                let outlet = suggestion.0.as_outlet(node);
                suggestions.push(AxisChange { outlet, op: suggestion.1 })
            }
            for (slot, fact) in node.outputs.iter().enumerate() {
                for (ix, dim) in fact.fact.shape.iter().enumerate() {
                    if dim.is_one() {
                        suggestions.push(AxisChange {
                            outlet: OutletId::new(n, slot),
                            op: AxisOp::Rm(ix),
                        });
                    }
                }
            }
        }
        let node_input_facts = model.node_input_facts(node.id)?;
        for suggestion in suggestions.into_iter() {
            if let Some(conseq) = self.try_body_axes_change(suggestion, true, &node_input_facts)? {
                let mut patch = TypedModelPatch::default();
                let mut inputs = tvec!();
                for outlet in &node.inputs {
                    inputs.push(patch.tap_model(model, *outlet)?);
                }
                for change in conseq.wire_changes {
                    if let InOut::In(i) = change.0 {
                        let mut value = patch
                            .outlet_fact(inputs[i])?
                            .konst
                            .clone()
                            .context("Will only reshape constants")?
                            .into_tensor();
                        change.1.change_tensor(&mut value, false)?;
                        let konst_name = patch.node(inputs[i].node).name.clone();
                        inputs[i] = patch.add_const(konst_name, value)?;
                    }
                }
                let wires = patch.wire_node(
                    &node.name,
                    conseq.substitute_op.unwrap_or_else(|| Box::new(self.clone())),
                    &inputs,
                )?;
                for (ix, new) in wires.into_iter().enumerate() {
                    patch.shunt_outside(model, OutletId::new(node.id, ix), new)?;
                }
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }

    fn remove_outer_output_from_mappings(
        mappings: &[OutputMapping<TDim>],
        discarded: usize,
    ) -> Vec<OutputMapping<TDim>> {
        mappings
            .iter()
            .map(|m| OutputMapping {
                scan: m.scan.map(|(slot, info)| (slot - (slot > discarded) as usize, info)),
                last_value_slot: m.last_value_slot.map(|n| n - (n > discarded) as usize),
                full_dim_hint: m.full_dim_hint.clone(),
                state: m.state,
            })
            .collect()
    }

    fn declutter_const_input(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let inputs = model.node_input_facts(node.id)?;
        for (slot, mapping) in self.input_mapping.iter().enumerate() {
            if let InputMapping::Full = mapping
                && let Some(konst) = inputs[slot].konst.as_ref()
            {
                let mut op = self.clone();
                let src = op.body.inputs[slot];
                op.body.inputs.remove(slot);
                op.body.nodes[src.node].inputs.clear();
                op.body.nodes[src.node].op = Box::new(Const::new(konst.clone())?);
                op.input_mapping.remove(slot);
                let mut inputs = node.inputs.clone();
                inputs.remove(slot);
                return Ok(Some(TypedModelPatch::replace_single_op(model, node, &inputs, op)?));
            }
        }
        Ok(None)
    }

    fn declutter_discard_unused_input(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        for (slot, input) in self.body.input_outlets()?.iter().enumerate() {
            let source_node = self.body.node(input.node);
            if source_node.outputs[0].successors.len() == 0
                && !self.body.output_outlets()?.contains(input)
            {
                let mut new_inputs = node.inputs.clone();
                new_inputs.remove(slot);
                let mut new_mappings: Vec<_> = self.input_mapping.clone();
                new_mappings.remove(slot);
                let mut model_inputs = self.body.input_outlets()?.to_vec();
                model_inputs.remove(slot);
                let mut body = self.body.clone();
                let mut patch = TypedModelPatch::default();
                patch.obliterate(source_node.id)?;
                patch.apply(&mut body)?;
                body.set_input_outlets(&model_inputs)?;
                body.declutter()?;
                let op =
                    Self { body, input_mapping: new_mappings, decluttered: true, ..self.clone() };
                return Ok(Some(TypedModelPatch::replace_single_op(model, node, &new_inputs, op)?));
            }
        }
        Ok(None)
    }

    fn declutter_discard_useless_outer_output(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        for (ix, o) in node.outputs.iter().enumerate() {
            if o.successors.len() == 0
                && !model.output_outlets()?.contains(&OutletId::new(node.id, ix))
            {
                let mappings = self
                    .output_mapping
                    .iter()
                    .map(|m| OutputMapping {
                        scan: m.scan.filter(|(slot, _info)| *slot != ix),
                        last_value_slot: m.last_value_slot.filter(|s| *s != ix),
                        full_dim_hint: m.full_dim_hint.clone(),
                        state: m.state,
                    })
                    .collect::<Vec<_>>();
                let mut op = self.clone();
                op.output_mapping = Self::remove_outer_output_from_mappings(&mappings, ix);
                let mut patch = TypedModelPatch::default();
                let inputs = node
                    .inputs
                    .iter()
                    .map(|&i| patch.tap_model(model, i))
                    .collect::<TractResult<Vec<_>>>()?;
                let wires = patch.wire_node(&*node.name, op, &inputs)?;
                for oix in 0..node.outputs.len() {
                    if oix != ix {
                        patch.shunt_outside(
                            model,
                            OutletId::new(node.id, oix),
                            wires[oix - (oix > ix) as usize],
                        )?;
                    }
                }
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }

    fn declutter_discard_empty_output_mapping_with_body_output(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        for (ix, om) in self.output_mapping.iter().enumerate() {
            if om.last_value_slot.is_none() && om.scan.is_none() && !om.state {
                let mut new_op = self.clone();
                new_op.output_mapping.remove(ix);
                new_op.body.outputs.remove(ix);
                new_op.decluttered = false;
                return Ok(Some(TypedModelPatch::replace_single_op(
                    model,
                    node,
                    &node.inputs,
                    new_op,
                )?));
            }
        }
        Ok(None)
    }

    fn declutter_pull_batcheable_input(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        'candidate: for (slot, input) in self.input_mapping.iter().enumerate() {
            if let Some(scan_info) = input.as_scan() {
                let scan_source = self.body.input_outlets()?[slot];
                let scan_source_node = self.body.node(scan_source.node);
                for mut succ in &scan_source_node.outputs[0].successors {
                    for &succ_input in &self.body.node(succ.node).inputs {
                        if succ_input != scan_source
                            && self.body.outlet_fact(succ_input)?.konst.is_none()
                        {
                            continue 'candidate;
                        }
                    }
                    if self.body.node(succ.node).outputs.len() != 1 {
                        continue;
                    }
                    let mut new_body = self.body.clone();
                    // insert propagate axis on einsum
                    if let Some(einsum) = new_body.node(succ.node).op_as::<EinSum>()
                        && let Some(patch) = einsum
                            .propagate_axis(
                                &new_body,
                                new_body.node(succ.node),
                                InOut::In(succ.slot),
                                scan_info.axis,
                            )
                            .context("building axis propagating patch")?
                    {
                        patch.apply(&mut new_body)?;
                        new_body.compute_const_facts()?;
                        // propagate axis injects new nodes at the end. last successor of input
                        // in new net will be the new succ
                        let new_body_scan_input = new_body.input_outlets()?[slot];
                        succ = new_body.node(new_body_scan_input.node).outputs[0]
                            .successors
                            .last()
                            .unwrap();
                    }

                    let axes_mapping = {
                        let (input_facts, output_facts) =
                            new_body.node_facts(new_body.node(succ.node).id)?;
                        new_body.node(succ.node).op.axes_mapping(&input_facts, &output_facts)?
                    };
                    let axis_info = axes_mapping.axis((InOut::In(succ.slot), scan_info.axis))?;
                    if let &[axis_after] = &*axis_info.outputs[0] {
                        let mut outside_patch = TypedModelPatch::new(format!(
                            "Outer patch for input extraction of {}",
                            new_body.node(succ.node)
                        ));
                        let mut patch_inputs = node
                            .inputs
                            .iter()
                            .map(|&i| outside_patch.tap_model(model, i))
                            .collect::<TractResult<TVec<_>>>()?;
                        let mut extracted_op_inputs = tvec!();
                        for (ix, outlet) in new_body.node(succ.node).inputs.iter().enumerate() {
                            let wire = if ix == succ.slot {
                                patch_inputs[slot]
                            } else if let Some(konst) =
                                new_body.outlet_fact(*outlet)?.konst.as_ref()
                            {
                                outside_patch.add_const(
                                    format!(
                                        "{}.extracted.{}",
                                        node.name,
                                        new_body.node(outlet.node).name
                                    ),
                                    konst.clone(),
                                )?
                            } else {
                                unreachable!();
                            };
                            extracted_op_inputs.push(wire);
                        }
                        let new_input_wire = outside_patch.wire_node(
                            format!("{}.extracted.{}", node.name, new_body.node(succ.node).name),
                            new_body.node(succ.node).op.clone(),
                            &extracted_op_inputs,
                        )?[0];
                        patch_inputs.push(new_input_wire);
                        let new_input_outer_fact = outside_patch.outlet_fact(new_input_wire)?;
                        let mut new_input_inner_fact = new_input_outer_fact.clone();
                        new_input_inner_fact.shape.set(axis_after, scan_info.chunk.abs().to_dim());

                        let mut new_body = new_body.clone();
                        let new_source_wire = new_body.add_source(
                            format!("{}.extracted.{}", node.name, new_body.node(succ.node).name),
                            new_input_inner_fact,
                        )?;
                        let mut inner_patch = TypedModelPatch::new(format!(
                            "Inner body patch for extraction of {}",
                            new_body.node(succ.node)
                        ));
                        let new_source_wire_in_patch =
                            inner_patch.tap_model(&new_body, new_source_wire)?;
                        inner_patch
                            .shunt_outside(
                                &new_body,
                                OutletId::new(succ.node, 0),
                                new_source_wire_in_patch,
                            )
                            .with_context(|| "patching inner model")?;
                        inner_patch.apply(&mut new_body)?;

                        let mut input_mapping = self.input_mapping.clone();
                        input_mapping.push(InputMapping::Scan(ScanInfo {
                            axis: axis_after,
                            chunk: scan_info.chunk,
                        }));

                        let new_op = Self {
                            input_mapping,
                            decluttered: false,
                            body: new_body,
                            ..self.clone()
                        };
                        let output_wires =
                            outside_patch.wire_node(&*node.name, new_op, &patch_inputs)?;
                        for w in output_wires {
                            outside_patch
                                .shunt_outside(model, OutletId::new(node.id, w.slot), w)
                                .with_context(|| "patching outer model")?;
                        }
                        return Ok(Some(outside_patch));
                    }
                }
            }
        }
        Ok(None)
    }

    fn declutter_pull_constant_outputs(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        for (model_output_ix, mapping) in self.output_mapping.iter().enumerate() {
            if let Some(slot) = mapping.last_value_slot
                && let Some(k) = self.body.output_fact(model_output_ix)?.konst.clone()
            {
                let inner_node = self.body.output_outlets()?[model_output_ix].node;
                let inner_node = self.body.node(inner_node);
                let mut patch = TypedModelPatch::new(format!("Extract const node {inner_node}"));
                let cst = patch.add_const(format!("{}.{}", &node.name, &inner_node.name), k)?;
                patch.shunt_outside(model, OutletId::new(node.id, slot), cst)?;
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }

    fn declutter_pull_batcheable_output(
        &self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        for (mapping_ix, mapping) in self.output_mapping.iter().enumerate() {
            if let Some((_, scan_info)) = mapping.scan {
                let emitter_outlet = self.body.output_outlets()?[mapping_ix];
                if self.body.node(emitter_outlet.node).outputs[emitter_outlet.slot].successors.len()
                    > 0
                    || self.body.inputs.contains(&emitter_outlet)
                    || mapping.state
                    || mapping.scan.map(|(_slot, i)| i.chunk > 1).unwrap_or(true)
                {
                    // continue if both last_value and full values are exported
                    continue;
                }
                let mut new_body = self.body.clone();
                if let Some(einsum) = new_body.node(emitter_outlet.node).op_as::<EinSum>()
                    && let Some(patch) = einsum
                        .propagate_axis(
                            &new_body,
                            new_body.node(emitter_outlet.node),
                            InOut::Out(0),
                            scan_info.axis,
                        )
                        .context("building axis propagating patch")?
                {
                    patch.apply(&mut new_body)?;
                    new_body.prop_consts()?;
                }
                let emitter_outlet = new_body.output_outlets()?[mapping_ix];
                let invariants = {
                    let (input_facts, output_facts) = new_body.node_facts(emitter_outlet.node)?;
                    new_body
                        .node(emitter_outlet.node)
                        .op
                        .axes_mapping(&input_facts, &output_facts)?
                };
                let axis_tracking =
                    invariants.axis((InOut::Out(emitter_outlet.slot), scan_info.axis))?;
                rule_if!(axis_tracking.outputs.iter().all(|o| o.len() == 1));
                let mut new_output_mapping = self.output_mapping.clone();
                let mut new_scan_outputs = node.outputs.len();
                let mut outer_slots = vec![];

                // rewire input of the extracted node through the scan outlet boundary
                for (input_slot, input) in
                    new_body.node(emitter_outlet.node).inputs.clone().iter().enumerate()
                {
                    if new_body.outputs.iter().all(|o| o != input) {
                        new_output_mapping.push(OutputMapping::default());
                        new_body.outputs.push(*input);
                    }
                    let body_output_id = new_body.outputs.iter().position(|o| o == input).unwrap();
                    let mapping = &mut new_output_mapping[body_output_id];
                    let outer_slot = if new_body.outlet_fact(*input)?.konst.is_some() {
                        if mapping.last_value_slot.is_none() {
                            mapping.last_value_slot = Some(new_scan_outputs);
                            new_scan_outputs += 1;
                        }
                        mapping.last_value_slot.unwrap()
                    } else if let &[axis] = &*axis_tracking.inputs[input_slot] {
                        if mapping.scan.is_none() {
                            mapping.scan =
                                Some((new_scan_outputs, ScanInfo { axis, chunk: scan_info.chunk }));
                            new_scan_outputs += 1;
                        }
                        mapping.scan.unwrap().0
                    } else {
                        return Ok(None);
                    };
                    outer_slots.push(outer_slot);
                }
                let mut outside_patch = TypedModelPatch::new(format!(
                    "Outside patch for output extraction of {}",
                    new_body.node(emitter_outlet.node)
                ));
                let inputs = node
                    .inputs
                    .iter()
                    .map(|&i| outside_patch.tap_model(model, i))
                    .collect::<TractResult<TVec<_>>>()?;
                let new_op = Self {
                    output_mapping: new_output_mapping,
                    decluttered: false,
                    body: new_body.clone(), // FIXME maybe remove clone
                    ..self.clone()
                };
                let scan_outputs = outside_patch.wire_node(&node.name, new_op, &inputs)?;
                let output = mapping.scan.unwrap();
                let inputs =
                    outer_slots.iter().map(|slot| scan_outputs[*slot]).collect::<TVec<_>>();
                let wire = outside_patch.wire_node(
                    &new_body.node(emitter_outlet.node).name,
                    new_body.node(emitter_outlet.node).op.clone(),
                    &inputs,
                )?[0];
                outside_patch.shunt_outside(model, OutletId::new(node.id, output.0), wire)?;
                for output_slot in 0..node.outputs.len() {
                    if output_slot != output.0 {
                        outside_patch.shunt_outside(
                            model,
                            OutletId::new(node.id, output_slot),
                            OutletId::new(scan_outputs[0].node, output_slot),
                        )?;
                    }
                }
                return Ok(Some(outside_patch));
            }
        }
        Ok(None)
    }

    fn body_bounds(&self) -> TractResult<TVec<TVec<OutletId>>> {
        let input_state_outlets = self
            .input_mapping
            .iter()
            .zip(self.body.input_outlets()?.iter())
            .filter(|(m, _)| m.is_state())
            .map(|(_, o)| o);
        let output_state_outlets = self
            .output_mapping
            .iter()
            .zip(self.body.output_outlets()?.iter())
            .filter(|(m, _)| m.state)
            .map(|(_, o)| o);
        Ok(input_state_outlets.zip(output_state_outlets).map(|(&i, &o)| tvec!(i, o)).collect())
    }

    fn body_locked_outlets(&self, node_input_facts: &[&TypedFact]) -> TractResult<TVec<OutletId>> {
        let input_outlets =
            self.body.input_outlets()?.iter().enumerate().filter_map(|(slot, o)| {
                if node_input_facts[slot].konst.is_none() { Some(o) } else { None }
            });
        let output_outlets = self
            .output_mapping
            .iter()
            .zip(self.body.output_outlets()?.iter())
            .filter(|(m, _)| !m.invisible())
            .map(|(_, o)| o);
        Ok(input_outlets.chain(output_outlets).cloned().collect())
    }

    fn try_body_axes_change(
        &self,
        change: AxisChange,
        locked_interface: bool,
        node_input_facts: &[&TypedFact],
    ) -> TractResult<Option<AxisChangeConsequence>> {
        self.body.check_consistency()?;
        let locked_outlets = self.body_locked_outlets(node_input_facts)?;
        let mut explored: HashSet<AxisChange> = Default::default();
        rule_if_some!(
            (body_patch, body_changed_wires) = crate::optim::change_axes::change_axes(
                &self.body,
                &change,
                if locked_interface { &locked_outlets } else { &[] },
                &self.body_bounds()?,
                &mut explored,
            )?
        );
        let mut body = self.body.clone();
        body_patch.apply(&mut body)?;
        body.compact()?;
        let mut wire_changes = tvec!();
        let mut input_mapping: Vec<InputMapping> = self.input_mapping.clone();
        for (slot, m) in input_mapping.iter_mut().enumerate() {
            if let Some(change) = body_changed_wires
                .iter()
                .find(|(iface, _change)| iface == &InOut::In(slot))
                .map(|pair| pair.1.clone())
            {
                wire_changes.push((InOut::In(slot), change.clone()));
                if let InputMapping::Scan(info) = m {
                    rule_if_some!(axis = change.transform_axis(info.axis));
                    info.axis = axis;
                };
            }
        }
        let mut output_mapping: Vec<OutputMapping<TDim>> = self.output_mapping.clone();
        for (ix, m) in output_mapping.iter_mut().enumerate() {
            if let Some(change) = body_changed_wires
                .iter()
                .find(|(iface, _change)| iface == &InOut::Out(ix))
                .map(|pair| pair.1.clone())
            {
                if let Some((slot, info)) = m.scan.as_mut() {
                    rule_if_some!(new_axis = change.transform_axis(info.axis));
                    info.axis = new_axis;
                    wire_changes.push((InOut::Out(*slot), change.clone()));
                }
                if let Some(slot) = m.last_value_slot {
                    wire_changes.push((InOut::Out(slot), change.clone()));
                }
            };
        }
        body.check_consistency()?;
        let op = Some(Box::new(Scan {
            body,
            input_mapping,
            output_mapping,
            decluttered: false,
            ..self.clone()
        }) as _);
        Ok(Some(AxisChangeConsequence { substitute_op: op, wire_changes }))
    }
}

impl Op for Scan {
    fn name(&self) -> StaticName {
        "Scan".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut lines = vec![];
        for (ix, im) in self.input_mapping.iter().enumerate() {
            lines.push(format!("Model input  #{ix}: {im:?}"));
        }
        for (ix, om) in self.output_mapping.iter().enumerate() {
            lines.push(format!("Model output #{ix}: {om:?}"));
        }
        lines.push(format!("skip:{} reset_every_turn:{:?}", self.skip, self.reset_every_turn));
        Ok(lines)
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    op_as_typed_op!();
}

impl EvalOp for Scan {
    fn is_stateless(&self) -> bool {
        false
    }
    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        self.to_codegen_op(false)?.state(session, node_id)
    }
}

impl TypedOp for Scan {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        anyhow::ensure!(inputs.len() == self.body.inputs.len());
        anyhow::ensure!(self.input_mapping.len() == self.body.inputs.len());
        anyhow::ensure!(
            self.input_mapping.iter().filter(|m| m.is_state()).count()
                == self.output_mapping.iter().filter(|m| m.state).count()
        );
        for (i, o) in
            self.input_mapping.iter().enumerate().filter(|(_, m)| m.is_state()).map(|(i, _)| i).zip(
                self.output_mapping.iter().enumerate().filter(|(_, m)| m.state).map(|(o, _)| o),
            )
        {
            let ifact = self.body.outlet_fact(self.body.inputs[i])?;
            let ofact = self.body.outlet_fact(self.body.outputs[o])?;
            anyhow::ensure!(
                ifact == ofact,
                "inconsistent fact: body input {i} is {ifact:?} and body output {o} is {ofact:?}\n{}",
                self.body
            )
        }
        let mut outputs = tvec!();
        let iters = super::iteration_count(&self.input_mapping, inputs).context("No scan input")?;
        for (ix, output) in self.output_mapping.iter().enumerate() {
            let fact = self.body.output_fact(ix)?;
            if let Some((slot, info)) = output.scan {
                let mut shape = fact.shape.clone();
                let scanning_dim =
                    output.full_dim_hint.clone().unwrap_or(shape[info.axis].clone() * &iters);
                shape.set(info.axis, scanning_dim);
                outputs.push((slot, fact.datum_type.fact(shape)));
            }
            if let Some(slot) = output.last_value_slot {
                outputs.push((slot, fact.datum_type.fact(fact.shape.clone())));
            }
        }
        outputs.sort_by_key(|a| a.0);
        anyhow::ensure!(outputs.iter().enumerate().all(|(ix, (slot, _))| ix == *slot));
        let outputs: TVec<_> = outputs.into_iter().map(|(_slot, v)| v).collect();
        Ok(outputs)
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let mut mappings = vec![];
        let body_invs = self.body.axes_mapping().with_context(|| "Computing body axes mapping")?;
        for body_axis in body_invs.iter_all_axes() {
            let mut info = Axis::new(body_axis.repr, inputs.len(), outputs.len());
            info.inputs.clone_from(&body_axis.inputs);
            for (ix, output_mapping) in self.output_mapping.iter().enumerate() {
                let mut slots = vec![];
                if let Some((slot, _scan)) = output_mapping.scan {
                    slots.push(slot);
                }
                if let Some(slot) = output_mapping.last_value_slot {
                    slots.push(slot);
                }
                for slot in slots {
                    info.outputs[slot].clone_from(&body_axis.outputs[ix]);
                }
            }
            if info.inputs.iter().any(|i| i.len() > 0) || info.outputs.iter().any(|i| i.len() > 0) {
                mappings.push(info);
            }
        }
        AxesMapping::new(inputs.len(), outputs.len(), mappings)
    }

    fn suggested_axis_changes(&self) -> TractResult<TVec<(InOut, AxisOp)>> {
        let mut suggestions = tvec!();
        for (slot, input) in self.input_mapping.iter().enumerate() {
            if let InputMapping::Scan(info) = input
                && info.axis != 0
            {
                suggestions.push((InOut::In(slot), AxisOp::Move(info.axis, 0)))
            }
        }
        for output in &self.output_mapping {
            if let Some((slot, scan)) = output.scan
                && scan.axis != 0
            {
                suggestions.push((InOut::Out(slot), AxisOp::Move(scan.axis, 0)))
            }
        }
        Ok(suggestions)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        trace!("Propagating through {node}: {io:?} {change:?}");
        let body_leading_outlet = match io {
            InOut::In(ix) => self.body.input_outlets()?[ix],
            InOut::Out(slot) => {
                let output = self
                    .output_mapping
                    .iter()
                    .position(|im| {
                        im.scan.map(|(slot, _i)| slot) == Some(slot)
                            || im.last_value_slot == Some(slot)
                    })
                    .unwrap();
                self.body.output_outlets()?[output]
            }
        };
        let axis_change = AxisChange { outlet: body_leading_outlet, op: change.clone() };
        let node_input_facts = model.node_input_facts(node.id)?;
        let result = self
            .try_body_axes_change(axis_change, false, &node_input_facts)
            .with_context(|| "Attemping to run change through scan body".to_string())?;
        if result.is_some() {
            trace!("{node} accepted axis change");
        } else {
            trace!("{node} rejected axis change");
        }
        Ok(result)
    }

    fn declutter_with_session(
        &self,
        session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        macro_rules! pass {
            ($func:ident) => {
                if let Some(mut r) = self
                    .$func(session, model, node)
                    .with_context(|| format!("{}", stringify!($func)))?
                {
                    trace!(stringify!($func));
                    r.push_context(stringify!($func));
                    return Ok(Some(r));
                }
            };
        }
        pass!(declutter_single_loop);
        pass!(declutter_const_input);
        pass!(declutter_discard_unused_input);
        pass!(declutter_discard_useless_outer_output);
        pass!(declutter_discard_empty_output_mapping_with_body_output);
        pass!(declutter_body);
        pass!(declutter_body_axes);
        pass!(declutter_pull_constant_outputs);
        pass!(declutter_pull_batcheable_input);
        pass!(declutter_pull_batcheable_output);
        Ok(None)
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let inputs = node.inputs.iter().map(|o| mapping[o]).collect::<TVec<_>>();
        let op = Self {
            output_mapping: self
                .output_mapping
                .iter()
                .map(|om| om.concretize_dims(values))
                .collect::<TractResult<Vec<_>>>()?,
            body: self.body.concretize_dims(values)?,
            ..self.clone()
        };
        target.wire_node(&node.name, op, &inputs)
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(Some(TypedModelPatch::replace_single_op(
            model,
            node,
            &node.inputs,
            self.to_codegen_op(true)?,
        )?))
    }
}


================================================
FILE: core/src/ops/scan/mod.rs
================================================
use crate::internal::*;
use std::fmt;

mod decluttered;
mod optimized;

pub use decluttered::Scan;
pub use optimized::{OptScan, State};

#[derive(Clone, new, Hash, Eq, PartialEq, Copy, Debug)]
pub struct ScanInfo {
    pub axis: usize,
    pub chunk: isize,
}

#[derive(Clone, new, Hash, Debug, PartialEq, Eq)]
pub enum InputMapping {
    Full,
    State,
    Scan(ScanInfo),
}

impl InputMapping {
    pub fn is_state(&self) -> bool {
        matches!(self, InputMapping::State)
    }

    pub fn is_scan(&self) -> bool {
        self.as_scan().is_some()
    }

    pub fn as_scan(&self) -> Option<&ScanInfo> {
        match self {
            InputMapping::Scan(s) => Some(s),
            _ => None,
        }
    }
}

#[derive(Clone, new, Hash, Default, PartialEq, Eq)]
pub struct OutputMapping<F: Clone> {
    pub scan: Option<(usize, ScanInfo)>,
    pub full_dim_hint: Option<F>,
    pub last_value_slot: Option<usize>,
    pub state: bool,
}

impl<F: Clone> OutputMapping<F> {
    pub fn invisible(&self) -> bool {
        self.scan.is_none() && self.last_value_slot.is_none()
    }
}

impl<F: Clone + DimLike> OutputMapping<F> {
    pub fn concretize_dims(&self, values: &SymbolValues) -> TractResult<OutputMapping<F>> {
        Ok(Self {
            full_dim_hint: self.full_dim_hint.as_ref().map(|h| h.eval(values)),
            ..self.clone()
        })
    }
}

impl<F: Clone + fmt::Display> fmt::Debug for OutputMapping<F> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        if self.state {
            write!(fmt, "State. ")?;
        }
        if let Some(last_value_slot) = self.last_value_slot {
            write!(fmt, "Last value to outlet {last_value_slot}. ")?;
        }
        if let Some((slot, info)) = self.scan {
            write!(fmt, "Full value to outlet {} (axis: {}). ", slot, info.axis)?;
        }
        if let Some(full_dim_hint) = &self.full_dim_hint {
            write!(fmt, "Full len {full_dim_hint}. ")?;
        }
        Ok(())
    }
}

pub fn iteration_count(input_mapping: &[InputMapping], inputs: &[&TypedFact]) -> Option<TDim> {
    let (slot, info) = input_mapping
        .iter()
        .enumerate()
        .find_map(|(slot, im)| im.as_scan().map(|scan| (slot, scan)))?;
    let outside_dim = inputs[slot].shape[info.axis].clone();
    Some(outside_dim.div_ceil(info.chunk.unsigned_abs() as u64))
}


================================================
FILE: core/src/ops/scan/optimized.rs
================================================
use crate::ops::OpStateFreeze;

use super::*;
use tract_data::internal::*;

#[derive(Debug, Clone, new)]
pub struct ScanOpParams {
    pub skip: usize,
    pub reset_every_turn: bool,
    pub plan: Arc<TypedSimplePlan>,
    pub input_mapping: Vec<InputMapping>,
    pub output_mapping: Vec<OutputMapping<TDim>>,
}

#[derive(Debug, Clone, new)]
pub struct OptScan(Arc<ScanOpParams>);

impl std::ops::Deref for OptScan {
    type Target = ScanOpParams;
    fn deref(&self) -> &ScanOpParams {
        &self.0
    }
}

impl PartialEq for OptScan {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
}
impl Eq for OptScan {}

impl OptScan {
    pub fn iteration_count(&self, inputs: &[&TypedFact]) -> Option<TDim> {
        super::iteration_count(&self.input_mapping, inputs)
    }
}

impl Op for OptScan {
    fn name(&self) -> StaticName {
        "Scan".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut lines = vec![];
        for (ix, im) in self.input_mapping.iter().enumerate() {
            lines.push(format!("Model input  #{ix}: {im:?}"));
        }
        for (ix, om) in self.output_mapping.iter().enumerate() {
            lines.push(format!("Model output #{ix}: {om:?}"));
        }
        Ok(lines)
    }

    op_as_typed_op!();
}

impl EvalOp for OptScan {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(State {
            position: 0,
            hidden_state: tvec!(),
            model_state: self.plan.spawn()?,
            op: Arc::clone(&self.0),
        })))
    }
}

#[derive(Clone, Debug)]
pub struct State {
    op: Arc<ScanOpParams>,
    position: usize,
    hidden_state: TVec<TValue>,
    pub model_state: TypedSimpleState,
}

#[derive(Debug, Clone)]
struct FrozenState {
    op: Arc<ScanOpParams>,
    position: usize,
    hidden_state: TVec<Tensor>,
    model_state: TypedFrozenSimpleState,
}

impl OpStateFreeze for State {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(FrozenState {
            op: self.op.clone(),
            position: self.position,
            hidden_state: self.hidden_state.iter().map(|t| t.clone().into_tensor()).collect(),
            model_state: self.model_state.freeze(),
        })
    }
}

impl FrozenOpState for FrozenState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(State {
            op: self.op.clone(),
            position: self.position,
            hidden_state: self.hidden_state.iter().map(|t| t.clone().into_tvalue()).collect(),
            model_state: self.model_state.unfreeze(),
        })
    }
}

impl State {
    pub fn iteration_count(&self, inputs: &TVec<TValue>) -> usize {
        let (slot, info) = self
            .op
            .input_mapping
            .iter()
            .enumerate()
            .find_map(|(ix, it)| it.as_scan().map(|scan| (ix, scan)))
            .unwrap();
        inputs[slot].shape()[info.axis].divceil(info.chunk.unsigned_abs())
    }

    pub(super) fn slice_input(
        input: &Tensor,
        axis: usize,
        chunk_ix: usize,
        chunk_dim: isize,
    ) -> TractResult<Tensor> {
        unsafe {
            let full_len = input.shape()[axis];
            let mut shape: TVec<usize> = input.shape().into();
            shape[axis] = chunk_dim.unsigned_abs();
            let mut t = Tensor::uninitialized_dt(input.datum_type(), &shape)?;
            if chunk_dim < 0 {
                let chunk_dim = (-chunk_dim) as usize;
                for i in 0..chunk_dim {
                    if chunk_dim * chunk_ix + i < full_len {
                        let dst_ix = chunk_dim - i - 1;
                        let src_ix = full_len - 1 - (chunk_ix * chunk_dim + i);
                        t.assign_slice_unchecked(dst_ix..=dst_ix, input, src_ix..=src_ix, axis);
                    }
                }
            } else if (chunk_ix + 1) * chunk_dim as usize > full_len {
                let chunk_dim = chunk_dim as usize;
                let remain = full_len - chunk_ix * chunk_dim;
                let mut shape: TVec<usize> = input.shape().into();
                shape[axis] = chunk_dim;
                t.assign_slice_unchecked(..remain, input, chunk_ix * chunk_dim.., axis);
            } else {
                let start = chunk_dim as usize * chunk_ix;
                let end = start + chunk_dim as usize;
                t.assign_slice_unchecked(.., input, start..end, axis);
            }
            Ok(t)
        }
    }

    pub(super) fn assign_output(
        output: &mut Tensor,
        axis: usize,
        element_value: &Tensor,
        i: usize,
        backward: bool,
    ) {
        let full_len = output.shape()[axis];
        let offset = if backward {
            full_len - 1 - i * element_value.shape()[axis]
        } else {
            i * element_value.shape()[axis]
        };
        let count = element_value.shape()[axis].min(output.shape()[axis] - offset);
        unsafe {
            output.assign_slice_unchecked(offset..offset + count, element_value, ..count, axis)
        };
    }
}

impl OpState for State {
    fn eval(
        &mut self,
        session: &mut TurnState,
        _op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let iters = self.iteration_count(&inputs);

        let &mut State { ref op, ref mut hidden_state, ref mut position, ref mut model_state } =
            self;

        // initialize state at first pass, or when forced
        if op.reset_every_turn {
            hidden_state.clear()
        }
        if hidden_state.len() == 0 {
            for (slot, input) in op.input_mapping.iter().enumerate() {
                if input.is_state() {
                    hidden_state.push(inputs[slot].clone());
                }
            }
        }

        let mut outputs = tvec!();
        for (ix, output) in op.output_mapping.iter().enumerate() {
            if let Some((slot, info)) = output.scan {
                let fact = op.plan.model().output_fact(ix)?;
                let mut shape: TVec<usize> =
                    fact.shape.eval_to_usize(&session.resolved_symbols)?.into_owned();
                let scanning_dim = output
                    .full_dim_hint
                    .as_ref()
                    .and_then(|d| d.to_usize().ok())
                    .unwrap_or(shape[info.axis] * iters);
                shape[info.axis] = scanning_dim;
                let t = unsafe { Tensor::uninitialized_dt(fact.datum_type, &shape)? };
                outputs.push((slot, t));
            }
            if let Some(slot) = output.last_value_slot {
                outputs.push((slot, Tensor::default()));
            }
        }
        outputs.sort_by_key(|a| a.0);
        let mut outputs: TVec<Tensor> = outputs.into_iter().map(|(_slot, v)| v).collect();

        for i in 0..iters {
            *position += 1;
            if *position <= op.skip {
                continue;
            }
            hidden_state.reverse();

            let iter_inputs: TVec<TValue> = op
                .input_mapping
                .iter()
                .enumerate()
                .map(|(slot, m)| {
                    Ok(match m {
                        InputMapping::State => Some(hidden_state.pop().unwrap()),
                        InputMapping::Scan(info) => Some(
                            Self::slice_input(&inputs[slot], info.axis, i, info.chunk)?
                                .into_tvalue(),
                        ),
                        InputMapping::Full => Some(inputs[slot].clone()),
                    })
                })
                .collect::<TractResult<Vec<_>>>()?
                .into_iter()
                .flatten()
                .collect();

            trace!("iter_inputs #{i}: {iter_inputs:?}");
            let iter_outputs =
                model_state.run(iter_inputs).with_context(|| "Evaluating inner body")?;
            trace!("iter_outputs #{i}: {iter_outputs:?}");

            for (v, mapping) in iter_outputs.into_iter().zip(&op.output_mapping) {
                if let Some((slot, info)) = mapping.scan {
                    Self::assign_output(&mut outputs[slot], info.axis, &v, i, info.chunk < 0);
                }
                if i == iters - 1
                    && let Some(slot) = mapping.last_value_slot
                {
                    outputs[slot] = v.clone().into_tensor();
                }
                if mapping.state {
                    hidden_state.push(v);
                }
            }
        }

        Ok(outputs.into_iter().map(|t| t.into_tvalue()).collect())
    }
}

impl TypedOp for OptScan {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut outputs = tvec!();
        let iters = super::iteration_count(&self.input_mapping, inputs).unwrap();
        for (ix, output) in self.output_mapping.iter().enumerate() {
            let fact = self.plan.model().output_fact(ix)?;
            if let Some(slot) = output.last_value_slot {
                outputs.push((slot, fact.datum_type.fact(fact.shape.clone())));
            }
            if let Some((slot, info)) = output.scan {
                let mut shape = fact.shape.clone();
                let scanning_dim =
                    output.full_dim_hint.clone().unwrap_or(shape[info.axis].clone() * &iters);
                shape.set(info.axis, scanning_dim);
                outputs.push((slot, fact.datum_type.fact(shape)));
            }
        }
        outputs.sort_by_key(|a| a.0);
        let outputs: TVec<_> = outputs.into_iter().map(|(_slot, v)| v).collect();
        Ok(outputs)
    }

    fn nested_model_multipliers(&self, inputs: &[&TypedFact]) -> Vec<(StaticName, TDim)> {
        vec![(
            "loop".into(),
            super::iteration_count(&self.input_mapping, inputs).unwrap_or_else(|| 1.to_dim()),
        )]
    }
}


================================================
FILE: core/src/ops/source.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, new)]
pub struct SourceState(pub usize);
trivial_op_state_freeze!(SourceState);

impl OpState for SourceState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        _op: &dyn Op,
        _inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        Ok(tvec!(
            session
                .values
                .get(self.0)
                .and_then(|v| v.as_ref())
                .and_then(|vs| vs.first().cloned())
                .with_context(|| format!("Input for node {} is missing", self.0))?
        ))
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct TypedSource {
    pub fact: TypedFact,
}

impl Op for TypedSource {
    fn name(&self) -> StaticName {
        "Source".into()
    }
    op_as_typed_op!();
}

impl EvalOp for TypedSource {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(SourceState(node_id))))
    }
}

impl TypedOp for TypedSource {
    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(self.fact.clone()))
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let mut fact = self.fact.clone();
        change.change_shape(&mut fact.shape, false)?;
        Ok(Some(AxisChangeConsequence::new(
            model,
            node,
            Some(Box::new(TypedSource::new(fact))),
            change,
        )))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        _mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let shape: TVec<_> = self.fact.shape.iter().map(|d| d.eval(values)).collect();
        target.wire_node(&node.name, Self { fact: self.fact.datum_type.fact(&*shape) }, &[])
    }

    as_op!();
}


================================================
FILE: core/src/ops/submodel.rs
================================================
use std::fmt::Debug;

use tract_downcast_rs::Downcast;

use crate::{internal::*, ops::OpStateFreeze};

#[derive(Debug, Clone)]
pub struct SubmodelOp {
    pub model: Box<dyn InnerModel>,
    label: String,
    decluttered: bool,
    codegen: bool,
}

impl PartialEq for SubmodelOp {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
}
impl Eq for SubmodelOp {}

impl SubmodelOp {
    pub fn new(model: Box<dyn InnerModel>, label: &str) -> TractResult<Self> {
        Ok(Self { model, label: label.to_string(), decluttered: false, codegen: false })
    }

    pub fn iteration_count(&self, _inputs: &[&TypedFact]) -> Option<TDim> {
        None
    }

    pub fn model(&self) -> &TypedModel {
        self.model.as_typed()
    }

    pub fn label(&self) -> &str {
        self.label.as_str()
    }
}

impl Op for SubmodelOp {
    fn name(&self) -> StaticName {
        "SubmodelOp".into()
    }

    op_as_typed_op!();
}

impl EvalOp for SubmodelOp {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        self.model.state(session, node_id)
    }
}

impl TypedOp for SubmodelOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let facts = self.model.output_facts(inputs)?;
        Ok(facts)
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if !self.decluttered {
            let mut new = self.clone();
            new.model.declutter()?;
            new.decluttered = true;
            Ok(Some(TypedModelPatch::replace_single_op(model, node, &node.inputs, new)?))
        } else {
            Ok(None)
        }
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if !self.codegen {
            let mut new = self.clone();
            new.model.codegen()?;
            new.codegen = true;
            Ok(Some(TypedModelPatch::replace_single_op(model, node, &node.inputs, new)?))
        } else {
            Ok(None)
        }
    }

    as_op!();
}

pub trait InnerModel: Debug + dyn_clone::DynClone + Downcast + Sync + Send + 'static {
    #[allow(unused_variables)]
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>>;
    fn is_stateless(&self) -> bool;

    #[allow(unused_variables)]
    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }

    #[allow(unused_variables)]
    fn declutter(&mut self) -> TractResult<()>;

    fn codegen(&mut self) -> TractResult<()>;

    fn as_typed(&self) -> &TypedModel;
}

dyn_clone::clone_trait_object!(InnerModel);
downcast_rs::impl_downcast!(InnerModel);

impl InnerModel for TypedModel {
    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let facts = self
            .output_outlets()?
            .iter()
            .map(|outlet| self.outlet_fact(*outlet).cloned())
            .collect::<TractResult<TVec<_>>>()?;
        Ok(facts)
    }
    fn is_stateless(&self) -> bool {
        false
    }

    #[allow(unused_variables)]
    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        let plan = self.clone().into_runnable()?;
        let state = plan.spawn()?;
        Ok(Some(Box::new(state)))
    }

    #[allow(unused_variables)]
    fn declutter(&mut self) -> TractResult<()> {
        self.declutter()
    }

    fn codegen(&mut self) -> TractResult<()> {
        self.optimize()
    }

    fn as_typed(&self) -> &TypedModel {
        self
    }
}

pub type TypedModelOpState = TypedSimpleState;

impl OpState for TypedModelOpState {
    fn eval(
        &mut self,
        _session: &mut TurnState,
        _op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let inference_out = self.run(inputs)?;
        Ok(inference_out)
    }
}

pub type FrozenSubmodelOpState = TypedFrozenSimpleState;

impl FrozenOpState for FrozenSubmodelOpState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(self.unfreeze())
    }
}

impl OpStateFreeze for TypedModelOpState {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(self.freeze())
    }
}


================================================
FILE: core/src/ops/unimpl.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct UnimplementedOp {
    outputs: usize,
    name: String,
    message: String,
}

impl UnimplementedOp {
    pub fn new(outputs: usize, name: impl AsRef<str>, message: impl AsRef<str>) -> UnimplementedOp {
        UnimplementedOp {
            outputs,
            name: name.as_ref().to_string(),
            message: message.as_ref().to_string(),
        }
    }
}

impl Op for UnimplementedOp {
    fn name(&self) -> StaticName {
        format!("Unimplemented({})", self.name).into()
    }

    not_a_typed_op!();
}

impl EvalOp for UnimplementedOp {
    fn is_stateless(&self) -> bool {
        false
    }
}


================================================
FILE: core/src/optim/change_axes.rs
================================================
use super::OptimizerSession;
use super::TypedPass;
use crate::internal::*;
use crate::model::*;
use crate::ops::dummy::Dummy;
use crate::ops::einsum::EinSum;
use crate::ops::konst::Const;
use std::collections::HashSet;
use std::collections::hash_map::Entry;
use std::fmt::Debug;

use crate::ops::change_axes::*;

#[derive(Clone, Default)]
pub struct ChangeAxes(HashSet<crate::ops::change_axes::AxisChange>, usize);

impl Debug for ChangeAxes {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "ChangeAxes")
    }
}

impl TypedPass for ChangeAxes {
    fn reset(&mut self) -> TractResult<()> {
        self.0.clear();
        self.1 = 0;
        Ok(())
    }
    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut explored: HashSet<AxisChange> = Default::default();
        let mut interfaces = model.output_outlets()?.to_vec();
        interfaces.extend(model.input_outlets()?.iter());
        for node in &model.nodes[self.1..] {
            if node.op_is::<Dummy>() {
                continue;
            }
            for suggestion in node.op.suggested_axis_changes()? {
                let outlet = suggestion.0.as_outlet(node);
                let change = AxisChange { outlet, op: suggestion.1 };
                if self.0.insert(change.clone())
                    && let Some((patch, _)) =
                        change_axes(model, &change, &interfaces, &[], &mut explored)
                            .with_context(|| format!("Making patch for {change:?} from {node}"))?
                {
                    self.1 = node.id;
                    return Ok(Some(patch));
                }
            }
            for (slot, fact) in node.outputs.iter().enumerate() {
                for (ix, dim) in fact.fact.shape.iter().enumerate() {
                    if dim.is_one() {
                        let change =
                            AxisChange { outlet: OutletId::new(node.id, slot), op: AxisOp::Rm(ix) };
                        if self.0.insert(change.clone())
                            && let Some((patch, _)) =
                                change_axes(model, &change, &interfaces, &[], &mut explored)
                                    .with_context(|| {
                                        format!("Making patch for {change:?} from {node}")
                                    })?
                        {
                            self.1 = node.id;
                            return Ok(Some(patch));
                        }
                    }
                }
            }
        }
        Ok(None)
    }
}

#[allow(clippy::type_complexity)]
pub fn change_axes(
    model: &TypedModel,
    change: &AxisChange,
    locked: &[OutletId],
    bounds: &[TVec<OutletId>],
    explored: &mut HashSet<AxisChange>,
) -> TractResult<Option<(TypedModelPatch, TVec<(InOut, AxisOp)>)>> {
    if explored.contains(change) {
        debug!("  Not considering change because deja vu {change:?}");
        return Ok(None);
    }
    if model
        .node(change.outlet.node)
        .op_as::<Const>()
        .is_some_and(|c| c.val().volume() == 1 && c.val().is_plain())
    {
        debug!("  Not considering change from const {change:?}");
        return Ok(None);
    }
    debug!("  Considering change {change:?}");
    let mut todo_changes = vec![(change.clone(), None)];
    let mut changed_wires: HashMap<TVec<OutletId>, AxisOp> = HashMap::new();
    let bound_outlets = |o: OutletId| -> TVec<OutletId> {
        bounds.iter().find(|b| b.contains(&o)).cloned().unwrap_or_else(|| tvec!(o))
    };
    changed_wires.insert(bound_outlets(change.outlet), change.op.clone());
    let mut changed_ops: HashMap<usize, Box<dyn TypedOp>> = HashMap::new();
    let mut rewired_scalar_input: HashMap<InletId, (OutletId, AxisOp)> = Default::default();
    while let Some((change, emitter)) = todo_changes.pop() {
        rule_if!(explored.insert(change.clone()));
        let outlet_group = bound_outlets(change.outlet);
        for &outlet in &outlet_group {
            if locked.contains(&outlet) {
                debug!("    Change {change:?} blocked by locked interface {outlet:?}");
                return Ok(None);
            }
            let mut interfaces: Vec<(usize, InOut)> = vec![(outlet.node, InOut::Out(outlet.slot))];
            for inlet in model.outlet_successors(outlet) {
                interfaces.push((inlet.node, InOut::In(inlet.slot)));
            }
            for (node_id, io) in interfaces {
                if Some(node_id) == emitter {
                    continue;
                }
                let node = model.node(node_id);
                // if this is a revisit...
                let op = if let Some(op) = changed_ops.get(&node_id) {
                    trace!("  Change {:?} revisiting {}", change, model.node(node_id));
                    if op.is::<EinSum>() {
                        // FIXME Einsum can swallow any combination of axis change on all interfaces
                        op
                    } else {
                        debug!("  Change {:?} blocked: revisiting {}", change, model.node(node_id));
                        return Ok(None);
                    }
                } else {
                    &node.op
                };
                let more = op
                    .change_axes(model, node, io, &change.op)
                    .with_context(|| format!("Propagating {change:?} to node {node}"))?;
                if more.is_none() {
                    debug!("    Propagation of {change:?} blocked by {node}");
                    return Ok(None);
                }
                let AxisChangeConsequence { substitute_op, wire_changes } = more.unwrap();
                trace!("    Change {:?} enters {} from {:?}", change.op, node, io);
                trace!("       propagates as {wire_changes:?}");
                if let Some(op) = substitute_op {
                    trace!("       replace op by {op:?}");
                    changed_ops.insert(node.id, op);
                }
                for (wire, op) in wire_changes.into_iter() {
                    let outlet = wire.as_outlet(node);
                    // stop upstram propagation to a scalar constant: we will clone it and alter it
                    // at patch generation time
                    if let InOut::In(inlet) = wire
                        && model
                            .node(outlet.node)
                            .op_as::<Const>()
                            .is_some_and(|k| k.val().volume() == 1)
                    {
                        rewired_scalar_input.insert(InletId::new(node.id, inlet), (outlet, op));
                        continue;
                    }
                    let outlet_group = bound_outlets(wire.as_outlet(node));
                    match changed_wires.entry(outlet_group.clone()) {
                        Entry::Vacant(entry) => {
                            trace!("         {wire:?} {op:?} change on {outlet_group:?} is new");
                            entry.insert(op.clone());
                            todo_changes
                                .push((AxisChange { outlet: outlet_group[0], op }, Some(node_id)));
                        }
                        Entry::Occupied(previous) => {
                            if *previous.get() == op {
                                trace!(
                                    "         {wire:?} {op:?} change on {outlet_group:?} already done"
                                );
                            } else {
                                debug!(
                                    "         {wire:?} {op:?} change on {outlet_group:?} conflicting with {previous:?}. Blocked."
                                );
                                return Ok(None);
                            }
                        }
                    }
                }
            }
        }
    }
    debug!("Translating {change:?} to patch");
    let mut patch = TypedModelPatch::new(format!("{change:?}"));
    let mut replaced_wires: HashMap<OutletId, OutletId> = HashMap::default();
    let nodes_to_replace = changed_wires
        .keys()
        .flat_map(|outlets| outlets.iter().map(|o| o.node))
        .chain(changed_ops.keys().copied())
        .collect::<std::collections::HashSet<usize>>();
    for node_id in model.eval_order()? {
        let node = model.node(node_id);
        if nodes_to_replace.contains(&node_id) {
            let mut inputs = tvec!();
            for (slot, orig) in node.inputs.iter().enumerate() {
                let tgt = if let Some((outlet, alteration)) =
                    rewired_scalar_input.get(&InletId::new(node_id, slot))
                {
                    let const_node = model.node(outlet.node);
                    let mut value =
                        const_node.op_as::<Const>().unwrap().val().clone().into_tensor();
                    alteration.change_tensor(&mut value, false)?;
                    let name = model.unique_name(&const_node.name);
                    patch.add_const(name, value)?
                } else {
                    *replaced_wires
                        .entry(*orig)
                        .or_insert_with(|| patch.tap_model(model, *orig).unwrap())
                };
                inputs.push(tgt);
            }
            let op: Box<dyn TypedOp> =
                changed_ops.get(&node_id).cloned().unwrap_or_else(|| node.op.clone());
            let new_wires = patch
                .wire_node(&node.name, op.clone(), &inputs)
                .with_context(|| format!("wriring changed_op {op:?}"))?;
            if new_wires.len() == 1
                && patch.node(new_wires[0].node).op_is::<crate::ops::source::TypedSource>()
            {
                patch.inputs.insert(new_wires[0].node, node_id);
            }
            for (ix, w) in new_wires.iter().enumerate() {
                replaced_wires.insert((node_id, ix).into(), *w);
            }
        } else {
            for orig in &node.inputs {
                if let Some(replacement) = replaced_wires.get(orig) {
                    patch.shunt_outside(model, *orig, *replacement)?;
                }
            }
        }
    }
    for output in model.output_outlets()? {
        if let Some(replacement) = replaced_wires.get(output) {
            unsafe {
                patch.shunt_outside_unchecked(*output, *replacement)?;
            }
        }
    }
    let mut interface_change = tvec!();
    for (ix, input) in model.input_outlets()?.iter().enumerate() {
        if let Some(change) = changed_wires.get(&bound_outlets(*input)) {
            interface_change.push((InOut::In(ix), change.clone()));
        }
    }
    for (ix, output) in model.output_outlets()?.iter().enumerate() {
        if let Some(change) = changed_wires.get(&bound_outlets(*output)) {
            interface_change.push((InOut::Out(ix), change.clone()));
        }
    }
    debug!("Patch ready for {change:?}");
    Ok(Some((patch, interface_change)))
}


================================================
FILE: core/src/optim/concat_then_einsum.rs
================================================
use crate::internal::*;

use crate::ops::array::{Slice, TypedConcat};
use crate::ops::einsum::EinSum;
use crate::ops::math::add;
use crate::optim::OptimizerSession;
use tract_itertools::Itertools;

#[derive(Clone, Debug, Default)]
pub struct ConcatThenEinsum(Option<InletId>);

impl super::TypedPass for ConcatThenEinsum {
    fn reset(&mut self) -> TractResult<()> {
        self.0 = Default::default();
        Ok(())
    }

    #[allow(clippy::comparison_chain)]
    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        'outer: loop {
            self.0 = if let Some(previous) = self.0 {
                rule_if_some!(next = next_inlet(model, &previous));
                Some(next)
            } else if let Some(first) =
                model.nodes.iter().find(|n| n.inputs.len() > 0).map(|n| InletId::new(n.id, 0))
            {
                Some(first)
            } else {
                return Ok(None);
            };
            let inlet = self.0.unwrap();
            let outlet = model.nodes[inlet.node].inputs[inlet.slot];
            let concat_node = model.node(outlet.node);
            if model.outputs.contains(&concat_node.id.into()) {
                continue;
            }
            let einsum_node = &model.nodes[inlet.node];
            if einsum_node.inputs.len() != 2 {
                // should we try and apply this on quantized einsums ?
                continue;
            }
            if let (Some(concat), Some(einsum)) =
                (concat_node.op_as::<TypedConcat>(), einsum_node.op_as::<EinSum>())
            {
                let offsets = concat.offsets(&model.node_input_facts(concat_node.id)?)?;
                let axis_info = einsum.axes.axis((InOut::In(inlet.slot), concat.axis))?;
                // only split if axis is a summing axis
                if axis_info.outputs[0].len() > 0 {
                    continue;
                }
                let mut patch = TypedModelPatch::new(format!(
                    "Split Einsum for concat on axis {}",
                    axis_info.repr
                ));
                // inputs[einsum_input_slot][concated_slice]. concated_slice = 0 for broadcast
                let mut inputs: TVec<TVec<OutletId>> = tvec!();
                for (slot, input) in einsum_node.inputs.iter().enumerate() {
                    let tap = patch.tap_model(model, *input)?;
                    if axis_info.inputs[slot].len() > 1 {
                        continue 'outer;
                    } else if axis_info.inputs[slot].len() == 1 {
                        let mut slices = tvec!();
                        for (start, end) in offsets.iter().cloned().tuple_windows() {
                            let wire = patch.wire_node(
                                format!(
                                    "{}.concat-einsum-slice-{}.{}.{}..{}",
                                    einsum_node.name, axis_info.repr, slot, start, end
                                ),
                                Slice { axis: axis_info.inputs[slot][0], start, end },
                                &[tap],
                            )?;
                            slices.push(wire[0]);
                        }
                        inputs.push(slices);
                    } else {
                        inputs.push(tvec!(tap)); // broadcast
                    };
                }
                let mut einsums = tvec!();
                for (ix, (start, end)) in offsets.iter().tuple_windows().enumerate() {
                    let mut einsum_inputs = tvec!();
                    for input_ix in 0..einsum_node.inputs.len() {
                        einsum_inputs
                            .push(inputs[input_ix].get(ix).cloned().unwrap_or(inputs[input_ix][0]));
                    }
                    let einsum = patch.wire_node(
                        format!(
                            "{}.concat-einsum-{}.{}..{}",
                            einsum_node.name, axis_info.repr, start, end
                        ),
                        einsum.clone(),
                        &einsum_inputs,
                    )?[0];
                    einsums.push(einsum);
                }
                let wire = if let Some(axis) = axis_info.outputs[0].first().cloned() {
                    patch.wire_node(
                        format!("{}.concat-einsum-{}.concat", einsum_node.name, axis_info.repr),
                        TypedConcat { axis },
                        &einsums,
                    )?[0]
                } else {
                    let mut wire = einsums[0];
                    for ix in 1..einsums.len() {
                        wire = patch.wire_node(
                            format!(
                                "{}.concat-einsum-{}.add-{}",
                                einsum_node.name, axis_info.repr, ix
                            ),
                            add(),
                            &[wire, einsums[ix]],
                        )?[0]
                    }
                    wire
                };
                patch.shunt_outside(model, einsum_node.id.into(), wire)?;
                return Ok(Some(patch));
            }
        }
    }
}

fn next_inlet(model: &TypedModel, inlet: &InletId) -> Option<InletId> {
    if inlet.slot + 1 < model.nodes[inlet.node].inputs.len() {
        Some(InletId::new(inlet.node, inlet.slot + 1))
    } else {
        model.nodes[inlet.node + 1..]
            .iter()
            .find(|n| n.inputs.len() > 0)
            .map(|n| InletId::new(n.id, 0))
    }
}


================================================
FILE: core/src/optim/mod.rs
================================================
use crate::internal::*;
use std::collections::HashSet;
use std::fmt::Debug;
use tract_itertools::Itertools;

pub mod change_axes;
mod concat_then_einsum;
mod op_optim;
mod prop_const;
pub mod propagate_roi;
mod push_split_down;
mod slice;
mod uniform_mask;

use self::change_axes::ChangeAxes;
use self::prop_const::PropConst;
use self::propagate_roi::PropagateRoi;
use self::push_split_down::PushSplitDown;
use self::slice::PushSliceUp;
use self::uniform_mask::FoldUniformMask;
use op_optim::OpOptim;

pub trait TypedPass: Debug + Send + Sync + dyn_clone::DynClone {
    fn reset(&mut self) -> TractResult<()>;
    fn next(
        &mut self,
        session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>>;
    /// In-place model mutation hook. Returns true if the model was changed.
    fn run_direct(&mut self, _model: &mut TypedModel) -> TractResult<bool> {
        Ok(false)
    }
}

dyn_clone::clone_trait_object!(TypedPass);

#[derive(Debug)]
pub struct Optimizer {
    pub passes: Vec<Box<dyn TypedPass>>,
    pub steps: Option<usize>,
}

impl Optimizer {
    fn passes(passes: Vec<Box<dyn TypedPass>>) -> Optimizer {
        Optimizer { passes, steps: None }
    }

    pub fn add_pass(&mut self, idx: usize, pass: Box<dyn TypedPass>) {
        let num_pass = self.passes.len();
        if idx > num_pass {
            log::warn!(
                "Cannot add new pass {pass:?} at index {idx}. Optimizer currently as {num_pass} passes, pass will be added as the last pass."
            );
            self.passes.push(pass);
        } else {
            self.passes.insert(idx, pass);
        }
    }

    pub fn stopping_at(self, steps: usize) -> Optimizer {
        Optimizer { steps: Some(steps), ..self }
    }

    pub fn prop_consts() -> Optimizer {
        Optimizer::passes(vec![Box::<PropConst>::default()])
    }

    pub fn declutter() -> Optimizer {
        Optimizer::passes(vec![
            Box::<PropConst>::default(),
            Box::<PropagateRoi>::default(),
            Box::<FoldUniformMask>::default(),
            Box::new(OpOptim("declutter", TypedOp::declutter_with_session, 0)),
            Box::new(PushSliceUp),
            Box::new(PushSplitDown),
            Box::<concat_then_einsum::ConcatThenEinsum>::default(),
            Box::<ChangeAxes>::default(),
        ])
    }

    pub fn codegen() -> Optimizer {
        Optimizer::passes(vec![
            Box::<PropConst>::default(),
            Box::new(OpOptim(
                "codegen",
                |op, _session, model, node| TypedOp::codegen(op, model, node),
                0,
            )),
            Box::new(OpOptim("declutter", TypedOp::declutter_with_session, 0)),
            Box::new(PushSplitDown),
            Box::new(OpOptim(
                "fuse",
                |op, _session, model, node| TypedOp::fuse(op, model, node),
                0,
            )),
        ])
    }

    pub fn optimize(&self, model: &mut TypedModel) -> TractResult<()> {
        self.session().optimize(model)
    }

    pub fn session(&self) -> OptimizerSession<'_> {
        OptimizerSession { optimizer: self, counter: 0, seen: Default::default() }
    }
}

#[derive(Debug)]
pub struct OptimizerSession<'o> {
    optimizer: &'o Optimizer,
    counter: usize,
    seen: HashSet<String>,
}

impl OptimizerSession<'_> {
    pub fn optimize(&mut self, model: &mut TypedModel) -> TractResult<()> {
        let _proof_session = model.symbols.proof_cache_session();
        model.check_consistency().context("during optimizer preflight check")?;
        model.compact().context("during optimizer preflight compaction")?;
        model.check_names().context("after optimizer preflight compaction")?;
        for i in 0.. {
            let old = self.counter;
            self.run_all_passes(i, model)?;
            if old == self.counter {
                return Ok(());
            }
            model.compact()?;
        }
        unreachable!()
    }

    pub fn run_all_passes(&mut self, i: usize, model: &mut TypedModel) -> TractResult<()> {
        let mut passes = self.optimizer.passes.clone();
        for p in passes.iter_mut() {
            self.run_one_pass_outer(i, p.as_mut(), model)
                .with_context(|| format!("running pass {p:?}"))?;
            model.compact()?;
            model
                .check_consistency()
                .with_context(|| format!("consistency check after pass {p:?}"))?;
        }
        Ok(())
    }

    pub fn run_one_pass_outer(
        &mut self,
        i: usize,
        p: &mut dyn TypedPass,
        model: &mut TypedModel,
    ) -> TractResult<()> {
        loop {
            let old_counter = self.counter;
            self.run_one_pass_inner(i, p, model)?;
            if self.counter == old_counter {
                return Ok(());
            }
            model.compact().with_context(|| format!("after pass {p:?}"))?;
        }
    }

    pub fn run_one_pass_inner(
        &mut self,
        i: usize,
        p: &mut dyn TypedPass,
        model: &mut TypedModel,
    ) -> TractResult<()> {
        p.reset()?;
        if let Some(steps) = self.optimizer.steps
            && self.counter >= steps
        {
            return Ok(());
        }
        while let Some(mut patch) = p.next(self, model)? {
            patch.push_context(format!("{p:?}/{i}"));
            patch.model.check_consistency().context("checking patch internal consistency")?;
            model
                .check_consistency()
                .context("Checking target model consistency before patching")?;
            if let Some(watchdog) = patch.dont_apply_twice.take() {
                if self.seen.contains(&watchdog) {
                    debug!("Loop detected: {watchdog} seen before");
                    continue;
                } else {
                    self.seen.insert(watchdog);
                }
            }
            let patch_name = patch.context.iter().rev().join(" >> ");
            debug!("applying patch #{}: {patch_name}", self.counter);
            patch.apply(model).with_context(|| format!("Applying patch {patch_name}"))?;
            model
                .check_consistency()
                .context("Checking target model consistency after patching")?;
            self.counter += 1;
            if let Some(steps) = self.optimizer.steps
                && self.counter >= steps
            {
                return Ok(());
            }
        }
        if p.run_direct(model)? {
            model.check_consistency().with_context(|| format!("after run_direct {p:?}"))?;
        }
        model.check_consistency().with_context(|| format!("after pass {p:?}"))?;
        Ok(())
    }
}


================================================
FILE: core/src/optim/op_optim.rs
================================================
use crate::internal::*;
use crate::ops::dummy::Dummy;

use super::OptimizerSession;

#[derive(Clone)]
pub struct OpOptim(
    pub &'static str,
    pub  fn(
        op: &dyn TypedOp,
        session: &mut OptimizerSession,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>>,
    pub usize,
);

impl OpOptim {
    fn full_pass(
        &mut self,
        session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        for node in &model.nodes[self.2..] {
            if node.op_is::<Dummy>() {
                continue;
            }
            let patch = (self.1)(node.op.as_ref(), session, model, node)
                .with_context(|| format!("{self:?} node {node}"))?;
            if let Some(mut p) = patch {
                p.push_context(format!("{self:?} {node}"));
                self.2 = node.id + p.dont_apply_twice.is_some() as usize;
                return Ok(Some(p));
            }
        }
        Ok(None)
    }
}

impl std::fmt::Debug for OpOptim {
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(fmt, "{}", self.0)
    }
}

impl super::TypedPass for OpOptim {
    fn reset(&mut self) -> TractResult<()> {
        self.2 = 0;
        Ok(())
    }

    fn next(
        &mut self,
        session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        self.full_pass(session, model)
    }
}


================================================
FILE: core/src/optim/prop_const.rs
================================================
use tract_data::TooEarly;

use crate::internal::*;
use crate::ops::array::Slice;
use crate::ops::dummy::Dummy;
use crate::ops::konst::Const;
use crate::ops::source::TypedSource;
use crate::optim::OptimizerSession;

#[derive(Clone, Debug, Default)]
pub struct PropConst(usize);

impl super::TypedPass for PropConst {
    fn reset(&mut self) -> TractResult<()> {
        self.0 = 0;
        Ok(())
    }
    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        for node in &model.nodes[self.0..] {
            if node.op_is::<Const>() && node.outputs[0].fact.konst.is_none() {
                self.0 = node.id;
                let mut patch = TypedModelPatch::default();
                let wire =
                    patch.add_const(&node.name, node.op_as::<Const>().unwrap().val().clone())?;
                patch.shunt_outside(model, node.id.into(), wire)?;
                return Ok(Some(patch));
            }
            let inputs = model.node_input_facts(node.id)?;
            if !node.op_is::<Const>()
                && !node.op_is::<Dummy>()
                && !node.op_is::<TypedSource>()
                && node.op.is_stateless()
                && inputs.iter().zip(&node.inputs).all(|(fact, outlet)| {
                    fact.konst.is_some()
                        && (model.node(outlet.node).outputs[outlet.slot].successors.len() == 1
                            || node.op_is::<Slice>()
                            || (fact.datum_type.is_number()
                                && fact.shape.volume().as_i64().is_some_and(|d| d < 1024)))
                })
            {
                let inputs =
                    inputs.iter().map(|f| f.konst.clone().unwrap().into_tvalue()).collect();
                let input_mem: u64 = model
                    .node_input_facts(node.id)?
                    .iter()
                    .map(|f| f.mem_size().to_i64().unwrap_or(i64::MAX) as u64)
                    .sum();
                match node.op.eval_with_session(node.id, &TurnState::default(), inputs) {
                    Ok(mut res) => {
                        self.0 = node.id;
                        let output_mem: u64 = res
                            .iter()
                            .map(|t| (t.datum_type().size_of() * t.volume()) as u64)
                            .sum();
                        if output_mem > input_mem.max(1 << 20) {
                            continue;
                        }
                        let mut node = node;
                        loop {
                            let Some(succ) = model.single_succ(node.id)? else {
                                break;
                            };
                            if succ.inputs.len() > 1 || !succ.op.is_stateless() {
                                break;
                            }
                            let Ok(succ_res) = succ.op.eval_with_session(
                                node.id,
                                &TurnState::default(),
                                res.clone(),
                            ) else {
                                break;
                            };
                            let succ_mem: u64 = succ_res
                                .iter()
                                .map(|t| (t.datum_type().size_of() * t.volume()) as u64)
                                .sum();
                            if succ_mem > input_mem.max(1 << 20) {
                                break;
                            }
                            res = succ_res;
                            node = succ;
                        }
                        let mut patch = TypedModelPatch::default();
                        for (ix, output) in res.into_iter().enumerate() {
                            let exotic_fact =
                                model.outlet_fact(OutletId::new(node.id, ix))?.exotic_fact.clone();

                            let name = if ix > 0 {
                                format!("{}.{ix}", node.name)
                            } else {
                                node.name.clone()
                            };
                            let wire = patch.wire_node(
                                name,
                                Const::new_with_opt_exotic_fact(
                                    output.into_arc_tensor(),
                                    exotic_fact,
                                )?,
                                &[],
                            )?[0];
                            patch.shunt_outside(model, (node.id, ix).into(), wire)?;
                        }
                        self.0 = node.id;
                        return Ok(Some(patch));
                    }
                    Err(e) => {
                        if !e.root_cause().is::<TooEarly>() {
                            Err(e).with_context(|| {
                                format!("Eager eval {node} during optimisation")
                            })?;
                        }
                    }
                }
            }
        }
        Ok(None)
    }
}


================================================
FILE: core/src/optim/propagate_roi.rs
================================================
use crate::internal::*;
use crate::ops::logic::sym_to_coord_axis;
use crate::optim::OptimizerSession;

/// Backward pass that propagates `region_of_interest` annotations by
/// calling `TypedOp::input_roi` on each node.
///
/// Ops can **introduce** ROIs (e.g. Iff reads its mask's uniform_tdim and
/// creates a ROI on the scores input) or **bubble** them (e.g. element-wise
/// ops pass an output ROI through to their inputs).
///
/// When multiple consumers of a wire produce different ROIs, they are merged
/// via boolean OR using De Morgan: `a ∨ b = a + b - a * b`.
/// If any consumer returns `None` for a wire (needs all positions), that wire
/// gets no ROI.
///
/// The pass iterates to fixpoint: introductions may enable further bubbling.
#[derive(Clone, Debug, Default)]
pub struct PropagateRoi;

/// Merge two ROI expressions via boolean OR: `a ∨ b = a + b - a * b`.
fn roi_union(a: &TDim, b: &TDim) -> TDim {
    if a == b {
        return a.clone();
    }
    a.clone() + b.clone() - a.clone() * b.clone()
}

/// Bubble output ROI to inputs using the op's axes_mapping.
///
/// For each input, builds a coordinate substitution map from the axes mapping:
/// each output axis that appears in this input gets 🎯{out_pos} → 🎯{in_pos}.
/// If any ROI coordinate symbol has no corresponding input axis (contracted,
/// broadcast from dim=1, or absent), returns None for that input.
pub fn bubble_roi(model: &TypedModel, node: &TypedNode) -> TractResult<Option<TVec<Option<TDim>>>> {
    let output_fact = model.outlet_fact(OutletId::new(node.id, 0))?;
    let Some(roi) = &output_fact.region_of_interest else { return Ok(None) };

    let input_facts: TVec<&TypedFact> =
        node.inputs.iter().map(|i| model.outlet_fact(*i)).collect::<TractResult<_>>()?;
    let output_facts = tvec![output_fact];
    let inputs_ref: Vec<&TypedFact> = input_facts.iter().copied().collect();
    let outputs_ref: Vec<&TypedFact> = output_facts.iter().copied().collect();
    let mapping = node.op.as_typed().unwrap().axes_mapping(&inputs_ref, &outputs_ref)?;

    // Collect ROI coordinate symbols and their output axis positions.
    let roi_coord_syms: Vec<(usize, Symbol)> =
        roi.symbols().into_iter().filter_map(|s| sym_to_coord_axis(&s).map(|k| (k, s))).collect();

    let remap_for_input = |input_ix: usize| -> Option<TDim> {
        let mut sub_map: HashMap<Symbol, TDim> = HashMap::new();
        for (out_pos, sym) in &roi_coord_syms {
            let logical = mapping
                .iter_all_axes()
                .find(|a| a.outputs.first().is_some_and(|o| o.contains(out_pos)))?;
            if logical.inputs[input_ix].is_empty() {
                return None;
            }
            let in_pos = logical.inputs[input_ix][0];
            if input_facts[input_ix].shape[in_pos] != output_fact.shape[*out_pos] {
                return None;
            }
            if in_pos != *out_pos {
                let scope = sym.scope()?;
                sub_map.insert(sym.clone(), TDim::Sym(scope.coord_sym(in_pos)));
            }
        }
        if sub_map.is_empty() { Some(roi.clone()) } else { roi.substitute_all(&sub_map).ok() }
    };
    let result: TVec<Option<TDim>> = (0..node.inputs.len()).map(|ix| remap_for_input(ix)).collect();
    Ok(Some(result))
}

impl super::TypedPass for PropagateRoi {
    fn reset(&mut self) -> TractResult<()> {
        Ok(())
    }

    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        _model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        Ok(None)
    }

    fn run_direct(&mut self, model: &mut TypedModel) -> TractResult<bool> {
        let mut any_changed = false;
        loop {
            let order = model.eval_order()?;
            let mut changed = false;

            // Collect ROI demands from all nodes.
            let mut demands: HashMap<OutletId, Option<TDim>> = HashMap::new();

            for &node_id in &order {
                let node = &model.nodes()[node_id];
                let Some(input_rois) = node.op.as_typed().unwrap().input_roi(model, node)? else {
                    continue;
                };
                for (ix, roi) in input_rois.into_iter().enumerate() {
                    let outlet = node.inputs[ix];
                    match (demands.get(&outlet), &roi) {
                        (_, None) => {
                            demands.insert(outlet, None);
                        }
                        (Option::None, Some(roi)) => {
                            demands.insert(outlet, Some(roi.clone()));
                        }
                        (Some(None), Some(_)) => {}
                        (Some(Some(existing)), Some(new)) => {
                            demands.insert(outlet, Some(roi_union(existing, new)));
                        }
                    }
                }
            }

            // Apply demands to model facts.
            for (outlet, demand) in demands {
                if let Some(roi) = demand {
                    let fact = &mut model.nodes_mut()[outlet.node].outputs[outlet.slot].fact;
                    if fact.region_of_interest.as_ref() != Some(&roi) {
                        fact.region_of_interest = Some(roi);
                        changed = true;
                    }
                }
            }

            any_changed |= changed;
            if !changed {
                break;
            }
        }
        Ok(any_changed)
    }
}


================================================
FILE: core/src/optim/push_split_down.rs
================================================
use crate::internal::*;

use crate::optim::OptimizerSession;
use tract_itertools::Itertools;

#[derive(Clone, Debug)]
pub struct PushSplitDown;

impl super::TypedPass for PushSplitDown {
    fn reset(&mut self) -> TractResult<()> {
        Ok(())
    }
    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        let mut patch = TypedModelPatch::default();
        for node in model.eval_order()? {
            for output in &model.node(node).outputs {
                for (a, b) in output.successors.iter().tuple_combinations() {
                    if a.node == b.node {
                        // found where a square is implemented using a mul with duplicate input
                        continue;
                    }
                    if patch.obliterate.contains(&b.node) {
                        continue;
                    }
                    // dont merge outputs.
                    if model.outputs.contains(&a.node.into())
                        && model.outputs.contains(&b.node.into())
                    {
                        continue;
                    }
                    let a = model.node(a.node);
                    let b = model.node(b.node);
                    if a.same_as(b) {
                        for slot in 0..b.outputs.len() {
                            let tap = patch.tap_model(model, OutletId::new(a.id, slot))?;
                            patch.shunt_outside(model, OutletId::new(b.id, slot), tap)?;
                            patch.obliterate(b.id)?;
                        }
                    }
                }
            }
        }
        Ok(Some(patch).filter(|p| !p.is_empty()))
    }
}


================================================
FILE: core/src/optim/slice.rs
================================================
use tract_itertools::Itertools;

use crate::internal::*;
use crate::ops::array::Slice;
use crate::optim::OptimizerSession;

#[derive(Clone, Debug)]
pub struct PushSliceUp;

impl super::TypedPass for PushSliceUp {
    fn reset(&mut self) -> TractResult<()> {
        Ok(())
    }

    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        let eval_order = model.eval_order()?;
        for &n in &eval_order {
            let node = model.node(n);
            if model.node(n).outputs.len() != 1 {
                continue;
            }
            for axis in 0..node.outputs[0].fact.rank() {
                if let Some(succ) = model.single_succ(n)? {
                    let Some(slice) = succ.op_as::<Slice>() else { continue };
                    let full_len = &node.outputs[0].fact.shape[axis];
                    if slice.axis != axis {
                        continue;
                    }
                    if let Some(me) = node.op_as::<Slice>() {
                        if me.axis == slice.axis {
                            let start = me.start.clone() + &slice.start;
                            let len = slice.end.clone() - &slice.start;
                            let end = start.clone() + len;
                            let new = Slice { axis, start, end };
                            return TypedModelPatch::fuse_with_next(model, node, new).map(Some);
                        } else {
                            let my_len = &node.outputs[0].fact.shape[me.axis];
                            let slice_len = &succ.outputs[0].fact.shape[slice.axis];
                            if !(my_len.clone() - slice_len).prove_strict_positive() {
                                continue;
                            }
                        }
                    }
                    let boundaries =
                        tvec!(0.to_dim(), slice.start.clone(), slice.end.clone(), full_len.clone());
                    let Some((mut patch, splits)) =
                        op_slices_to_slice_op(model, node, axis, &boundaries)?
                    else {
                        continue;
                    };
                    ensure!(splits.len() == 3);
                    // ignore first split (0..start)
                    let wire = splits[1];
                    patch.shunt_outside(model, succ.id.into(), wire)?;

                    return Ok(Some(patch));
                // handle multiple slicing successors in fan-out fashion (think LSTM post linear op)
                // limited to concrete interger slicing boundaries for ordering
                // (it may actually work with generic TDim with ordering)
                } else if let Some(boundaries) =
                    should_slice_output(model, node, axis, &eval_order)?
                {
                    let boundaries_dim: TVec<TDim> =
                        boundaries.iter().map(|d| d.to_dim()).collect();
                    let Some((mut patch, splits)) =
                        op_slices_to_slice_op(model, node, axis, &boundaries_dim)?
                    else {
                        continue;
                    };
                    ensure!(splits.len() == boundaries.len() - 1);
                    rewire_sliced_outputs(model, node, axis, &mut patch, &boundaries, &splits)
                        .context("Rewiring sliced outputs")?;
                    return Ok(Some(patch));
                }
            }
        }
        Ok(None)
    }
}

fn op_slices_to_slice_op(
    model: &TypedModel,
    node: &TypedNode,
    axis: usize,
    boundaries: &[TDim],
) -> TractResult<Option<(TypedModelPatch, TVec<OutletId>)>> {
    let (ifacts, ofacts) = model.node_facts(node.id)?;
    let invariants = node
        .op
        .axes_mapping(&ifacts, &ofacts)
        .with_context(|| format!("Mapping axes for {node}"))?;
    let mut splits = tvec!();
    let mut patch = TypedModelPatch::new(format!("Slice {node} by {boundaries:?}"));
    let inputs = patch.taps(model, &node.inputs)?;
    let len = &node.outputs[0].fact.shape[axis];
    ensure!(boundaries[0] == 0.to_dim());
    ensure!(boundaries.last().as_ref().unwrap() == &len);
    let axis_info = invariants.axis((InOut::Out(0), axis)).unwrap();
    for (start, end) in boundaries.iter().tuple_windows() {
        let mut wires = tvec!();
        for input_ix in 0..inputs.len() {
            let mut wire = inputs[input_ix];
            if let &[input_axis] = &*axis_info.inputs[input_ix] {
                // dont propagate slice up if input looks like a broadcasting input
                if !patch.outlet_fact(wire)?.shape[input_axis].is_one() {
                    wire = patch.wire_node(
                        format!(
                            "{}.split-{}-over-{}.{}..{}.slice",
                            &node.name, input_ix, input_axis, start, end
                        ),
                        Slice { axis: input_axis, start: start.to_dim(), end: end.to_dim() },
                        &[wire],
                    )?[0];
                }
            }
            wires.push(wire);
        }
        rule_if_some!(
            wire = node
                .op
                .slice(
                    &mut patch,
                    model,
                    node,
                    &format!("{}.split-over-{}.{}..{}", &node.name, axis, start, end),
                    &wires,
                    axis,
                    start,
                    end,
                )
                .with_context(|| format!("Calling slice on {node}"))?
        );
        splits.push(wire[0]);
    }
    Ok(Some((patch, splits)))
}

fn should_slice_output(
    model: &TypedModel,
    node: &TypedNode,
    axis: usize,
    eval_order: &[usize],
) -> TractResult<Option<TVec<usize>>> {
    rule_if!(node.outputs[0].successors.len() > 0);
    rule_if!(!node.op_is::<Slice>());
    let slicers: TVec<usize> = node.outputs[0]
        .successors
        .iter()
        .filter(|inlet| {
            model.node(inlet.node).op_as::<Slice>().filter(|slice| slice.axis == axis).is_some()
        })
        .map(|inlet| inlet.node)
        .collect();
    /* aggressive: 1 slice as succesor => we propagate it */
    /*
    let Some(slice) = node.outputs[0].successors.iter().find_map(|inlet| {
        model.node(inlet.node).op_as::<Slice>().filter(|slice| slice.axis == axis).map(|_| inlet.node)
    }) else {
        return Ok(None)
    };
    */
    /* non-aggressive: we need all consumers to be slice */
    rule_if!(slicers.len() >= node.outputs[0].successors.len());
    let slice = node.outputs[0].successors[0].node;

    rule_if!(eval_order.contains(&slice));
    let slice_op = model.node(slice).op_as::<Slice>().unwrap();
    let axis = slice_op.axis;
    let mut boundaries = tvec!();
    for succ in &node.outputs[0].successors {
        if let Some(slice) = model.node(succ.node).op_as::<Slice>()
            && slice.axis == axis
        {
            boundaries.push(slice.start.clone());
            boundaries.push(slice.end.clone());
        }
    }
    rule_if_let!(Ok(mut boundaries) =
        boundaries.iter().map(|x| x.to_usize()).collect::<TractResult<TVec<usize>>>());
    rule_if_let!(Ok(end) = node.outputs[0].fact.shape[axis].to_usize());
    boundaries.push(end);
    boundaries.sort();
    boundaries.dedup();
    rule_if!(boundaries.len() != 2);
    Ok(Some(boundaries))
}

pub fn rewire_sliced_outputs(
    model: &TypedModel,
    node: &TypedNode,
    axis: usize,
    patch: &mut TypedModelPatch,
    boundaries: &[usize],
    splits: &[OutletId],
) -> TractResult<()> {
    let full = patch.wire_node(
        format!("{}.concat-{}", node.name, axis),
        crate::ops::array::TypedConcat::new(axis),
        splits,
    )?[0];
    patch.shunt_outside(model, node.id.into(), full)?;
    let zero = patch.add_const(
        format!("{}.zero", node.name),
        Tensor::zero_scalar_dt(node.outputs[0].fact.datum_type)?,
    )?;
    for (ix, succ) in node.outputs[0].successors.iter().enumerate() {
        if let Some(slice) =
            model.node(succ.node).op_as::<Slice>().filter(|slice| slice.axis == axis)
        {
            // example: boundaries: 2, 3, wanted: 0..2 -> [0]
            let slices: TVec<OutletId> = boundaries
                .iter()
                .tuple_windows()
                .zip(splits.iter())
                .filter_map(|((_down, up), split)| {
                    if *up > slice.start.to_usize().unwrap() && *up <= slice.end.to_usize().unwrap()
                    {
                        Some(*split)
                    } else {
                        None
                    }
                })
                .collect();
            let wire = if slices.len() == 0 {
                let mut empty_shape = node.outputs[0].fact.shape.clone();
                empty_shape.set(axis, 0.to_dim());
                patch.wire_node(
                    format!("{}.concat-m{}..{}..{}", node.name, ix, slice.start, slice.end),
                    crate::ops::array::MultiBroadcastTo::new(empty_shape),
                    &[zero],
                )?[0]
            } else if slices.len() > 1 {
                patch.wire_node(
                    format!("{}.concat-m{}..{}..{}", node.name, ix, slice.start, slice.end),
                    crate::ops::array::TypedConcat::new(axis),
                    &slices,
                )?[0]
            } else {
                slices[0]
            };
            patch.shunt_outside(model, succ.node.into(), wire)?;
        }
    }
    Ok(())
}


================================================
FILE: core/src/optim/uniform_mask.rs
================================================
use crate::internal::*;
use crate::ops::array::{Slice, TypedConcat};
use crate::ops::binary::TypedBinOp;
use crate::ops::logic::{And, Iff, classify_true_range};
use crate::optim::OptimizerSession;

/// Optimizer pass that exploits boolean-valued `uniform_tdim` on wires feeding `Iff` and `Mul`.
///
/// For each such wire it injects concrete constants or restructures the graph:
///
/// - **AllTrue / AllFalse** on any supported op → inject `Const(is_true, [1]×rank)`;
///   subsequent declutter folds the op away (e.g. `Iff::declutter`, `declutter_neutral`).
/// - **TwoRegions** on any supported op → slice all other inputs along the split axis,
///   duplicate the op once per region each with a concrete bool const, concat results.
///
/// The per-op logic is limited to `try_fold_node` (which input indices may carry a bool
/// `uniform_tdim`). All transformation logic in `try_fold_uniform_bool_input` and
/// `split_op_two_regions` is op-agnostic.
#[derive(Clone, Debug, Default)]
pub struct FoldUniformMask(usize);

impl super::TypedPass for FoldUniformMask {
    fn reset(&mut self) -> TractResult<()> {
        self.0 = 0;
        Ok(())
    }

    fn next(
        &mut self,
        _session: &mut OptimizerSession,
        model: &TypedModel,
    ) -> TractResult<Option<TypedModelPatch>> {
        for node in &model.nodes[self.0..] {
            self.0 = node.id + 1;
            if let Some(patch) = try_fold_node(model, node)? {
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }
}

fn try_fold_node(model: &TypedModel, node: &TypedNode) -> TractResult<Option<TypedModelPatch>> {
    let bool_indices: &[usize] = if node.op_is::<Iff>() {
        &[0] // only the condition wire can carry bool uniform_tdim
    } else if let Some(bin_op) = node.op_as::<TypedBinOp>() {
        let out_bool = model.outlet_fact(node.id.into())?.datum_type == bool::datum_type();
        if (bin_op.0.is::<And>() && out_bool) || bin_op.0.neutral_element() == Some(1) {
            &[0, 1]
        } else {
            return Ok(None);
        }
    } else {
        return Ok(None);
    };

    for &bool_ix in bool_indices {
        if let Some(patch) = try_fold_uniform_bool_input(model, node, bool_ix)? {
            return Ok(Some(patch));
        }
    }
    Ok(None)
}

// ── Op-agnostic transformation ────────────────────────────────────────────────

fn try_fold_uniform_bool_input(
    model: &TypedModel,
    node: &TypedNode,
    bool_ix: usize,
) -> TractResult<Option<TypedModelPatch>> {
    let bool_fact = model.outlet_fact(node.inputs[bool_ix])?;
    // If the input is already a concrete constant, the op's own declutter handles folding.
    // Skipping here prevents an infinite loop (inject const → same uniform_tdim → inject again).
    rule_if!(bool_fact.konst.is_none());
    rule_if_some!(expr = &bool_fact.uniform_tdim);
    rule_if_some!(range = classify_true_range(expr, &bool_fact.shape));

    let dt = bool_fact.datum_type;
    let rank = bool_fact.rank();
    if range.is_full() {
        return inject_scalar_const(model, node, bool_ix, dt, rank, true);
    }
    if range.is_empty() {
        return inject_scalar_const(model, node, bool_ix, dt, rank, false);
    }
    split_op_regions(model, node, bool_ix, dt, rank, &range)
}

/// Replace the bool input at `bool_ix` with `Const(is_true ? 1 : 0, [1]*rank)` and rewire.
/// Subsequent declutter folds the op:
/// - `Iff(const_true, x, y) → x` via `Iff::declutter`
/// - `Mul(signal, const_1) → signal` via `declutter_neutral`
/// - `Mul(signal, const_0) → zeros` via `declutter_mul`
/// - `And(signal, const_true) → signal` via `declutter_neutral`
fn inject_scalar_const(
    model: &TypedModel,
    node: &TypedNode,
    bool_ix: usize,
    bool_dt: DatumType,
    bool_rank: usize,
    is_true: bool,
) -> TractResult<Option<TypedModelPatch>> {
    let const_tensor = uniform_const(bool_dt, bool_rank, is_true)?;
    let mut patch = TypedModelPatch::default();
    let const_wire = patch.wire_node(
        format!("{}.bool_const", node.name),
        crate::ops::konst::Const::new(const_tensor.into_arc_tensor())?,
        &[],
    )?[0];
    let mut new_inputs = tvec![];
    for (ix, &outlet) in node.inputs.iter().enumerate() {
        new_inputs.push(if ix == bool_ix { const_wire } else { patch.tap_model(model, outlet)? });
    }
    let out = patch.wire_node(&node.name, node.op.clone(), &new_inputs)?[0];
    patch.shunt_outside(model, node.id.into(), out)?;
    Ok(Some(patch))
}

/// Slice all inputs along `range.axis`, duplicate the op once per region
/// (each with a concrete bool const for the bool input), then concat the outputs.
///
/// Handles two-region (one bound is None) and three-region (both bounds are Some) cases.
/// For each non-bool input:
/// - `shape[axis] == 1` → broadcast dimension; share the same wire across all regions.
/// - `shape[axis] == out_dim` → slice per region.
/// - anything else → bail (`Ok(None)`).
fn split_op_regions(
    model: &TypedModel,
    node: &TypedNode,
    bool_ix: usize,
    bool_dt: DatumType,
    bool_rank: usize,
    range: &crate::ops::logic::TrueRange,
) -> TractResult<Option<TypedModelPatch>> {
    let axis = range.axis;
    let out_dim = model.outlet_fact(node.id.into())?.shape[axis].clone();

    // Build the list of (start, end, is_true) regions.
    let regions: TVec<(TDim, TDim, bool)> = match (&range.start, &range.end) {
        (None, Some(e)) => {
            tvec![(TDim::Val(0), e.clone(), true), (e.clone(), out_dim.clone(), false),]
        }
        (Some(s), None) => {
            tvec![(TDim::Val(0), s.clone(), false), (s.clone(), out_dim.clone(), true),]
        }
        (Some(s), Some(e)) => tvec![
            (TDim::Val(0), s.clone(), false),
            (s.clone(), e.clone(), true),
            (e.clone(), out_dim.clone(), false),
        ],
        _ => return Ok(None), // full or empty — handled by caller
    };

    let mut patch = TypedModelPatch::default();
    let mut region_outs = tvec![];

    for (r_start, r_end, is_true) in &regions {
        let mut region_inputs = tvec![OutletId::new(0, 0); node.inputs.len()];
        for (ix, &outlet) in node.inputs.iter().enumerate() {
            if ix == bool_ix {
                let c = uniform_const(bool_dt, bool_rank, *is_true)?;
                region_inputs[ix] = patch.wire_node(
                    format!("{}.bool_{r_start}", node.name),
                    crate::ops::konst::Const::new(c.into_arc_tensor())?,
                    &[],
                )?[0];
            } else {
                let fact = model.outlet_fact(outlet)?;
                let wire = patch.tap_model(model, outlet)?;
                if fact.shape[axis].is_one() {
                    region_inputs[ix] = wire;
                } else if fact.shape[axis] == out_dim {
                    region_inputs[ix] = patch.wire_node(
                        format!("{}.slice_{r_start}_{ix}", node.name),
                        Slice::new(axis, r_start.clone(), r_end.clone()),
                        &[wire],
                    )?[0];
                } else {
                    return Ok(None);
                }
            }
        }
        region_outs.push(
            patch.wire_node(
                format!("{}.region_{r_start}", node.name),
                node.op.clone(),
                &region_inputs,
            )?[0],
        );
    }

    let result =
        patch.wire_node(format!("{}.concat", node.name), TypedConcat::new(axis), &region_outs)?[0];
    patch.shunt_outside(model, node.id.into(), result)?;
    Ok(Some(patch))
}

// ── Shared helpers ────────────────────────────────────────────────────────────

/// Build a tensor of shape `[1]*rank` with every element equal to `1` (if `is_true`) or `0`.
fn uniform_const(dt: DatumType, rank: usize, is_true: bool) -> TractResult<Tensor> {
    tensor0(is_true as i64).cast_to_dt(dt)?.into_owned().broadcast_into_rank(rank)
}


================================================
FILE: core/src/plan.rs
================================================
use std::borrow::Borrow;
use std::cell::RefCell;
use std::fmt::{Debug, Display};

use multithread::Executor;

use crate::internal::*;
use crate::model::{Fact, Graph, OutletId};
use crate::ops::FrozenOpState;
use crate::ops::konst::Const;
use crate::runtime::RunOptions;

use self::order::{build_flush_list, eval_order_for_nodes, eval_order_opt_ram_for_nodes};

pub struct TurnState {
    pub resolved_symbols: SymbolValues,
    pub scenario: Option<usize>,
    pub cached_mmm_scratch_space: RefCell<Option<Box<dyn tract_linalg::mmm::ScratchSpace>>>,
    pub scratch_extensions: anymap3::Map,
    pub values: Vec<Option<TVec<TValue>>>,
}

impl Default for TurnState {
    fn default() -> Self {
        TurnState {
            resolved_symbols: SymbolValues::default(),
            scenario: None,
            cached_mmm_scratch_space: None.into(),
            scratch_extensions: anymap3::Map::new(),
            values: vec![],
        }
    }
}

impl Clone for TurnState {
    fn clone(&self) -> Self {
        TurnState {
            resolved_symbols: self.resolved_symbols.clone(),
            scenario: self.scenario,
            cached_mmm_scratch_space: None.into(),
            scratch_extensions: anymap3::Map::new(),
            values: vec![],
        }
    }
}

pub trait SessionStateHandler: Send + Sync + Debug {
    fn before_plan_eval(&self, session_state: &mut TurnState) -> TractResult<()>;
    fn after_plan_eval(&self, session_state: &mut TurnState) -> TractResult<()>;
}

impl Debug for TurnState {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "SessionState({:?})", self.resolved_symbols)
    }
}

#[derive(Debug, Clone)]
pub struct SimplePlan<F, O>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    pub(crate) model: Arc<Graph<F, O>>,
    outputs: Vec<OutletId>,
    order: Vec<usize>,
    flush_lists: Vec<TVec<usize>>,
    has_unresolved_symbols: bool,
    executor: Option<Executor>,
    session_handler: Option<Arc<dyn SessionStateHandler + 'static>>,
}

impl<F, O> SimplePlan<F, O>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    /// This contructor returns a plan that will compute all the model default outputs in one pass.
    pub fn new(model: impl Into<Arc<Graph<F, O>>>) -> TractResult<Arc<SimplePlan<F, O>>> {
        let model = model.into();
        Self::build(model, &RunOptions::default()).map(Arc::new)
    }

    /// This contructor returns a plan that will compute all the model default outputs in one pass.
    pub fn new_with_options(
        model: impl Into<Arc<Graph<F, O>>>,
        options: &RunOptions,
    ) -> TractResult<Arc<SimplePlan<F, O>>> {
        let model = model.into();
        Self::build(model, options).map(Arc::new)
    }

    /// This contructor returns a plan that will compute the specified output.
    #[deprecated]
    pub fn new_for_output(
        model: Graph<F, O>,
        output: OutletId,
    ) -> TractResult<Arc<SimplePlan<F, O>>> {
        #[allow(deprecated)]
        Self::build_with_outputs_and_deps(model, &[output], &[], &RunOptions::default())
            .map(Arc::new)
    }

    /// This contructor returns a plan that will compute all specified outputs in one pass.
    #[deprecated]
    pub fn new_for_outputs(
        model: impl Into<Arc<Graph<F, O>>>,
        outputs: &[OutletId],
    ) -> TractResult<Arc<SimplePlan<F, O>>> {
        #[allow(deprecated)]
        Self::build_with_outputs_and_deps(model, outputs, &[], &RunOptions::default()).map(Arc::new)
    }

    pub fn with_session_handler<H: SessionStateHandler + 'static>(
        mut self,
        session_handler: H,
    ) -> Self {
        self.session_handler = Some(Arc::new(session_handler));
        self
    }

    #[deprecated]
    pub fn new_for_outputs_and_deps(
        model: impl Into<Arc<Graph<F, O>>>,
        outputs: &[OutletId],
        deps: &[(usize, usize)],
    ) -> TractResult<Arc<SimplePlan<F, O>>> {
        #[allow(deprecated)]
        Self::build_with_outputs_and_deps(model, outputs, deps, &RunOptions::default())
            .map(Arc::new)
    }

    pub fn build(
        model: impl Into<Arc<Graph<F, O>>>,
        options: &RunOptions,
    ) -> TractResult<SimplePlan<F, O>> {
        let model = model.into();
        let outputs = model.outputs.clone();
        #[allow(deprecated)]
        Self::build_with_outputs_and_deps(model, &outputs, &[], options)
    }

    #[deprecated]
    pub fn build_with_outputs_and_deps(
        model: impl Into<Arc<Graph<F, O>>>,
        outputs: &[OutletId],
        deps: &[(usize, usize)],
        options: &RunOptions,
    ) -> TractResult<SimplePlan<F, O>> {
        let model = model.into();
        let inputs = model.input_outlets()?.iter().map(|n| n.node).collect::<Vec<usize>>();
        let outputs_nodes = outputs.iter().map(|n| n.node).collect::<Vec<usize>>();
        let mut order = if options.skip_order_opt_ram {
            eval_order_for_nodes(model.nodes(), &inputs, &outputs_nodes, deps)?
        } else {
            eval_order_opt_ram_for_nodes(model.nodes(), &inputs, &outputs_nodes, deps)?
        };
        order.retain(|node| !model.node(*node).op_is::<Const>());
        let flush_lists = build_flush_list(&*model, &order, outputs, |n| !n.op_is::<Const>());

        #[allow(clippy::mutable_key_type)]
        let mut symbols: std::collections::HashSet<Symbol> = Default::default();
        for node in &model.nodes {
            for output in &node.outputs {
                if let Ok(fact) = output.fact.to_typed_fact() {
                    symbols.extend(fact.shape.iter().flat_map(|d| d.symbols()))
                }
            }
        }
        Ok(SimplePlan {
            model,
            order,
            flush_lists,
            outputs: outputs.to_vec(),
            has_unresolved_symbols: !symbols.is_empty(),
            executor: options.executor.clone(),
            session_handler: None,
        })
    }

    pub fn order_without_consts(&self) -> &[usize] {
        &self.order
    }

    pub fn run(self: &Arc<Self>, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut state = self.spawn()?;
        state.run(inputs)
    }

    pub fn model(&self) -> &Graph<F, O> {
        self.model.borrow()
    }

    pub fn spawn(self: &Arc<Self>) -> TractResult<SimpleState<F, O>> {
        SimpleState::new(self)
    }
}

#[derive(Clone, Debug)]
pub struct SimpleState<F, O>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    pub(crate) plan: Arc<SimplePlan<F, O>>,
    pub op_states: Vec<Option<Box<dyn OpState>>>,
    pub turn_state: TurnState,
}

impl<F, O> SimpleState<F, O>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    pub fn new(plan: &Arc<SimplePlan<F, O>>) -> TractResult<SimpleState<F, O>> {
        let plan = Arc::clone(plan);
        let turn = TurnState::default();
        let model = plan.model();
        let states: Vec<Option<Box<dyn OpState>>> = vec![None; model.nodes.len()];
        let mut state = SimpleState { plan, op_states: states, turn_state: turn };
        state.reset_op_states()?;
        Ok(state)
    }

    pub fn new_from_inputs(
        plan: &Arc<SimplePlan<F, O>>,
        inputs: TVec<TValue>,
    ) -> TractResult<SimpleState<F, O>> {
        let mut state = SimpleState::new(plan)?;
        state.set_inputs(inputs)?;
        state.resolve_symbols_with_states()?;

        Ok(state)
    }

    fn ready_turn(&mut self) {
        if self.turn_state.values.len() == 0 {
            self.turn_state.values = vec![None; self.plan.model.nodes().len()];
            for node in &self.plan.model.nodes {
                if let Some(k) = node.op_as::<Const>() {
                    self.turn_state.values[node.id] = Some(tvec!(k.val().clone().into_tvalue()));
                }
            }
        }
    }
    /// Reset wires state.
    pub fn reset_turn(&mut self) -> TractResult<()> {
        for node in &self.plan.order {
            self.turn_state.values[*node] = None;
        }
        self.turn_state.resolved_symbols = SymbolValues::default();
        Ok(())
    }

    /// Reset op inner state.
    fn reset_op_states(&mut self) -> TractResult<()> {
        let &mut SimpleState { ref plan, ref mut turn_state, op_states: ref mut states, .. } = self;
        for (ix, n) in plan.model.nodes.iter().enumerate() {
            states[ix] = if n.op().is_stateless() { None } else { n.op().state(turn_state, ix)? };
        }
        Ok(())
    }

    fn resolve_symbols_with_states(&mut self) -> TractResult<()> {
        for state in self
            .op_states
            .iter_mut()
            .filter_map(Option::as_mut)
            .filter(|s| s.init_tensor_fact().is_some())
        {
            state.resolve_symbols(&mut self.turn_state)?;
        }
        Ok(())
    }

    pub fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        self.run_plan_with_eval(inputs, self::eval)
    }

    pub fn exec(&mut self) -> TractResult<()> {
        self.exec_plan_with_eval(self::eval)
    }

    pub fn run_plan_with_eval<Eval, E>(
        &mut self,
        inputs: TVec<TValue>,
        eval: Eval,
    ) -> TractResult<TVec<TValue>>
    where
        Eval: for<'a, 'b, 'c> FnMut(
            &'a mut TurnState,
            Option<&'b mut (dyn OpState + 'static)>,
            &'c Node<F, O>,
            TVec<TValue>,
        ) -> Result<TVec<TValue>, E>,
        E: Into<anyhow::Error> + Send + Sync + 'static,
    {
        self.set_inputs(inputs)?;
        self.resolve_symbols_with_states()?;
        self.exec_plan_with_eval(eval)?;
        let outputs = self.outputs()?;
        self.reset_turn()?;
        Ok(outputs)
    }

    pub fn exec_plan_with_eval<Eval, E>(&mut self, eval: Eval) -> TractResult<()>
    where
        Eval: for<'a, 'b, 'c> FnMut(
            &'a mut TurnState,
            Option<&'b mut (dyn OpState + 'static)>,
            &'c Node<F, O>,
            TVec<TValue>,
        ) -> Result<TVec<TValue>, E>,
        E: Into<anyhow::Error> + Send + Sync + 'static,
    {
        if let Some(executor) = self.plan().executor.as_ref() {
            tract_linalg::multithread::multithread_tract_scope(executor.clone(), || {
                self.do_exec_plan_with_eval(eval)
            })
        } else {
            self.do_exec_plan_with_eval(eval)
        }
    }

    fn do_exec_plan_with_eval<Eval, E>(&mut self, mut eval: Eval) -> TractResult<()>
    where
        Eval: for<'a, 'b, 'c> FnMut(
            &'a mut TurnState,
            Option<&'b mut (dyn OpState + 'static)>,
            &'c Node<F, O>,
            TVec<TValue>,
        ) -> Result<TVec<TValue>, E>,
        E: Into<anyhow::Error> + Send + Sync + 'static,
    {
        {
            self.ready_turn();
            self.plan
                .session_handler
                .as_ref()
                .map(|it| it.before_plan_eval(&mut self.turn_state))
                .transpose()?;

            for (step, n) in self.plan.order.iter().enumerate() {
                let node = self.plan.model.node(*n);
                trace!("Running step {step}, node {node}");
                let mut inputs: TVec<TValue> = tvec![];
                for i in &node.inputs {
                    trace!("  use input {i:?}");
                    let prec_node = self.plan.model.node(i.node);
                    let prec = self.turn_state.values[i.node].as_ref().ok_or_else(|| {
                        format_err!("Computing {}, precursor {} not done:", node, prec_node)
                    })?;
                    inputs.push(prec[i.slot].clone())
                }

                for flush in &self.plan.flush_lists[step] {
                    trace!("  Ran {} can now flush {}", node, self.plan.model.node(*flush));
                    self.turn_state.values[*flush] = None;
                }

                if cfg!(debug_assertions) {
                    let facts = self.plan.model.node_input_facts(node.id)?;
                    if facts.len() != inputs.len() {
                        bail!(
                            "Evaluating {}: expected {} inputs, got {}",
                            node,
                            facts.len(),
                            inputs.len()
                        );
                    }
                    for (ix, (v, f)) in inputs.iter().zip(facts.iter()).enumerate() {
                        if !f.matches(v, Some(&self.turn_state.resolved_symbols))? {
                            bail!(
                                "Evaluating {}: input {:?}, expected {:?}, got {:?}",
                                node,
                                ix,
                                f,
                                v
                            );
                        }
                    }
                }

                let vs = eval(
                    &mut self.turn_state,
                    self.op_states[node.id].as_deref_mut(),
                    node,
                    inputs,
                )
                .map_err(|e| e.into())?;

                if self.plan.has_unresolved_symbols {
                    for (o, v) in node.outputs.iter().zip(vs.iter()) {
                        if let Ok(f) = o.fact.to_typed_fact() {
                            for (dim_abstract, dim_concrete) in f.shape.iter().zip(v.shape()) {
                                Self::resolve(
                                    &mut self.turn_state,
                                    dim_abstract,
                                    *dim_concrete as i64,
                                )?;
                            }
                        }
                    }
                }
                if cfg!(debug_assertions) {
                    let facts = self.plan.model.node_output_facts(node.id)?;
                    if facts.len() != vs.len() {
                        bail!(
                            "Evaluating {}: expected {} outputs, got {}",
                            node,
                            facts.len(),
                            vs.len()
                        );
                    }
                    for (ix, (v, f)) in vs.iter().zip(facts.iter()).enumerate() {
                        if node.outputs[ix].successors.len() == 0 {
                            continue;
                        }
                        if !f.matches(v, Some(&self.turn_state.resolved_symbols))? {
                            bail!(
                                "Evaluating {}: output {:?}, expected {:?}, got {:?}",
                                node,
                                ix,
                                f,
                                v
                            );
                        }
                    }
                }

                self.turn_state.values[node.id] = Some(vs);
            }
            self.plan
                .session_handler
                .as_ref()
                .map(|it| it.after_plan_eval(&mut self.turn_state))
                .transpose()?;
        }
        Ok(())
    }

    pub fn set_inputs(&mut self, inputs: TVec<TValue>) -> TractResult<()> {
        ensure!(
            inputs.len() == self.model().inputs.len(),
            "Wrong number of inputs for model. Expected {} got {}",
            self.model().inputs.len(),
            inputs.len()
        );

        for (ix, t) in inputs.into_iter().enumerate() {
            self.set_input(ix, t)?
        }
        Ok(())
    }

    fn resolve(state: &mut TurnState, expression: &TDim, provided: i64) -> TractResult<()> {
        let expected = expression.eval(&state.resolved_symbols);
        if let Ok(x) = expected.to_i64()
            && x != provided
        {
            bail!("Clashing resolution for expression. {expression}={x} != {provided}. ({state:?})")
        }
        if expected.symbols().len() == 1 {
            let sym = expected.symbols().into_iter().next().unwrap();
            if let Some(v) = solve_for(&sym, &expected, &provided.to_dim()) {
                debug!("Determined symbol {sym}={v}");
                state.resolved_symbols.set(&sym, v.to_i64().unwrap());
            }
            if state.scenario.is_none() {
                let scope = sym
                    .scope()
                    .with_context(|| format!("Symbol {sym:?} points to an invalid (dead ?) SymbolScope. Make sure to create symbols using the model-managed SymbolScope."))?;
                state.scenario = scope.guess_scenario(&state.resolved_symbols)?;
            }
        }
        Ok(())
    }

    pub fn set_input(&mut self, input: usize, t: TValue) -> TractResult<()> {
        let outlet: OutletId = *self
            .model()
            .input_outlets()?
            .get(input)
            .with_context(|| format!("Invalid input id for model ({input})."))?;
        if let Ok(fact) = self.plan.model.outlet_fact(outlet)?.to_typed_fact() {
            for (expected, provided) in fact.shape.iter().zip(t.shape()) {
                Self::resolve(&mut self.turn_state, expected, *provided as i64)?;
            }
        }
        let fact = self.plan.model.outlet_fact(outlet)?;
        ensure!(
            fact.matches(&t, Some(&self.turn_state.resolved_symbols))
                .with_context(|| format!("Setting input {input}"))?,
            "Input at index {input} has incorrect dtype or shape (got {t:?}, expected to match fact {fact:?})",
        );
        self.ready_turn();
        self.turn_state.values[outlet.node] = Some(tvec!(t));
        Ok(())
    }

    pub fn output(&self, id: usize) -> TractResult<&TValue> {
        let outlet = self.model().output_outlets()?.get(id).with_context(|| {
            format!(
                "Required output {}, only have {}",
                id,
                self.model().output_outlets().unwrap().len()
            )
        })?;
        let value: &TValue = self
            .turn_state
            .values
            .get(outlet.node)
            .context("node id for output beyond node values array")?
            .as_ref()
            .context("node is not an output")?
            .get(outlet.slot)
            .context("slot id too high")?;
        Ok(value)
    }

    pub fn outputs(&mut self) -> TractResult<TVec<TValue>> {
        let &mut SimpleState { ref plan, ref mut turn_state, .. } = self;
        let mut v = tvec![];
        for o in plan.outputs.iter() {
            let vs = turn_state.values[o.node].as_mut().ok_or_else(|| {
                format_err!("Outputs of {:?} are not computed", &plan.model.nodes()[o.node])
            })?;
            v.push(vs[o.slot].clone())
        }
        Ok(v)
    }

    pub fn set_values(&mut self, id: usize, values: TVec<TValue>) -> TractResult<()> {
        self.turn_state.values[id] = Some(values);
        Ok(())
    }

    pub fn set_value(&mut self, id: usize, value: TValue) -> TractResult<()> {
        self.set_values(id, tvec!(value))
    }

    pub fn prepare_inputs(&self, node: usize) -> TractResult<TVec<TValue>> {
        let SimpleState { plan, turn_state, .. } = self;
        let nodes = plan.model.nodes();
        let node = &nodes[node];
        let mut inputs: TVec<TValue> = tvec![];
        for i in &node.inputs {
            let prec_node = &nodes[i.node];
            let prec = turn_state.values[i.node].as_ref().ok_or_else(|| {
                format_err!("Computing {}, precursor {} not done.", node, prec_node)
            })?;
            inputs.push(prec[i.slot].clone())
        }
        Ok(inputs)
    }

    pub fn compute_one(&mut self, node: usize) -> TractResult<()> {
        let inputs = self.prepare_inputs(node)?;
        self.compute_one_with_inputs(node, inputs)
    }

    pub fn compute_one_with_inputs(
        &mut self,
        node: usize,
        inputs: TVec<TValue>,
    ) -> TractResult<()> {
        let &mut SimpleState { ref plan, ref mut turn_state, op_states: ref mut states, .. } = self;
        let nodes = plan.model.nodes();
        let node = &nodes[node];
        let vs = eval(turn_state, states[node.id].as_deref_mut(), node, inputs)?;
        turn_state.values[node.id] = Some(vs);
        Ok(())
    }

    pub fn compute_recursively(&mut self, node: usize) -> TractResult<&[TValue]> {
        let values = {
            #[allow(clippy::needless_collect)] // clippy bug ?
            let precs: Vec<usize> =
                self.model().nodes()[node].inputs.iter().map(|i| i.node).collect();
            for i in precs.into_iter() {
                if self.turn_state.values[i].is_none() {
                    let _ = self.compute_recursively(i)?;
                }
            }
            let mut inputs: TVec<TValue> = tvec![];
            {
                let node = &self.model().nodes()[node];
                for i in &node.inputs {
                    inputs.push(self.turn_state.values[i.node].as_ref().unwrap()[i.slot].clone())
                }
            }
            let &mut Self {
                op_states: ref mut states,
                turn_state: ref mut session_state,
                ref plan,
                ..
            } = self;
            eval(session_state, states[node].as_deref_mut(), &plan.model().nodes[node], inputs)?
        };
        self.turn_state.values[node] = Some(values);
        Ok(self.turn_state.values[node].as_ref().unwrap())
    }

    pub fn take_by_name(&mut self, name: &str) -> TractResult<TVec<Tensor>> {
        let id = self.model().node_by_name(name)?.id;
        Self::take(self, id)
    }

    pub fn take(&mut self, id: usize) -> TractResult<TVec<Tensor>> {
        Ok(self.turn_state.values[id]
            .take()
            .ok_or_else(|| format_err!("Node is not computed"))?
            .into_iter()
            .map(|v| v.into_tensor())
            .collect())
    }

    pub fn plan(&self) -> &Arc<SimplePlan<F, O>> {
        &self.plan
    }

    pub fn model(&self) -> &Graph<F, O> {
        &self.plan.model
    }

    pub fn freeze(&self) -> FrozenSimpleState<F, O> {
        FrozenSimpleState {
            plan: self.plan.clone(),
            resolved_symbols: self.turn_state.resolved_symbols.clone(),
            scenario: self.turn_state.scenario,
            states: self.op_states.iter().map(|s| s.as_ref().map(|s| s.freeze())).collect(),
            values: self
                .turn_state
                .values
                .iter()
                .enumerate()
                .map(|(ix, t)| {
                    if self.model().nodes[ix].op_is::<Const>() {
                        t.as_ref().map(|t| t.iter().map(|t| t.clone().into_tensor()).collect())
                    } else {
                        None
                    }
                })
                .collect(),
        }
    }
}

pub fn eval<F, O>(
    session_state: &mut TurnState,
    mut state: Option<&mut (dyn OpState + 'static)>,
    node: &Node<F, O>,
    input: TVec<TValue>,
) -> TractResult<TVec<TValue>>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    // eprint!("{node} {input:?}");
    #[allow(clippy::let_and_return)]
    let r = match state {
        Some(ref mut state) => state.eval(session_state, node.op(), input),
        None => node.op().eval_with_session(node.id, session_state, input),
    }
    .with_context(|| format!("Evaluating {node}"));
    // eprintln!(" ==> {}", r.as_ref().unwrap()[0].dump(true)?);
    r
}

#[derive(Clone, Debug)]
pub struct FrozenSimpleState<F, O>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    plan: Arc<SimplePlan<F, O>>,
    pub resolved_symbols: SymbolValues,
    pub scenario: Option<usize>,
    pub states: Vec<Option<Box<dyn FrozenOpState>>>,
    pub values: Vec<Option<TVec<Tensor>>>,
}

impl<F, O> FrozenSimpleState<F, O>
where
    F: Fact + Clone + 'static,
    O: Debug + Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
{
    pub fn unfreeze(&self) -> SimpleState<F, O> {
        SimpleState {
            plan: self.plan.clone(),
            turn_state: TurnState {
                resolved_symbols: self.resolved_symbols.clone(),
                scenario: self.scenario,
                cached_mmm_scratch_space: None.into(),
                scratch_extensions: anymap3::Map::new(),
                values: self
                    .values
                    .iter()
                    .map(|t| {
                        t.as_ref().map(|t| t.iter().map(|t| t.clone().into_tvalue()).collect())
                    })
                    .collect(),
            },
            op_states: self.states.iter().map(|s| s.as_ref().map(|s| s.unfreeze())).collect(),
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;
    fn is_send<T: Send>() {}
    fn is_sync<T: Sync>() {}

    #[test]
    fn type_model_is_sync() {
        is_sync::<TypedModel>();
    }

    #[test]
    fn type_model_is_send() {
        is_send::<TypedModel>();
    }

    #[test]
    fn type_plan_is_send() {
        is_send::<TypedSimplePlan>();
    }

    #[test]
    fn type_plan_is_sync() {
        is_sync::<TypedSimplePlan>();
    }

    #[test]
    fn frozen_type_state_is_send() {
        is_send::<TypedFrozenSimpleState>();
    }
}


================================================
FILE: core/src/runtime.rs
================================================
use std::any::Any;
use std::fmt::Debug;

use downcast_rs::Downcast;
use dyn_clone::DynClone;
use lazy_static::lazy_static;
use tract_linalg::multithread::Executor;

use crate::internal::*;

#[derive(Clone, Debug, Default)]
pub struct RunOptions {
    /// Use the simple ordering instead of the newer memory friendly one
    pub skip_order_opt_ram: bool,

    /// Override default global executor
    pub executor: Option<Executor>,

    /// Memory sizing hints
    pub memory_sizing_hints: Option<SymbolValues>,
}

pub trait Runtime: Debug + Send + Sync + 'static {
    fn name(&self) -> StaticName;
    fn prepare(&self, model: TypedModel) -> TractResult<Box<dyn Runnable>> {
        self.prepare_with_options(model, &Default::default())
    }
    fn check(&self) -> TractResult<()>;
    fn prepare_with_options(
        &self,
        model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>>;
}

pub trait Runnable: Any + Downcast + Debug + Send + Sync + 'static {
    fn run(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        self.spawn()?.run(inputs)
    }
    fn spawn(&self) -> TractResult<Box<dyn State>>;
    fn input_count(&self) -> usize {
        self.typed_model().context("Fallback implementation on typed_model()").unwrap().inputs.len()
    }
    fn output_count(&self) -> usize {
        self.typed_model()
            .context("Fallback implementation on typed_model()")
            .unwrap()
            .outputs
            .len()
    }
    fn input_fact(&self, ix: usize) -> TractResult<&TypedFact> {
        self.typed_model()
            .context("Fallback implementation on typed_model()")
            .unwrap()
            .input_fact(ix)
    }
    fn output_fact(&self, ix: usize) -> TractResult<&TypedFact> {
        self.typed_model()
            .context("Fallback implementation on typed_model()")
            .unwrap()
            .output_fact(ix)
    }
    fn properties(&self) -> &HashMap<String, Arc<Tensor>> {
        lazy_static! {
            static ref NO_PROPERTIES: HashMap<String, Arc<Tensor>> = Default::default();
        };
        self.typed_model().map(|model| &model.properties).unwrap_or(&NO_PROPERTIES)
    }

    fn typed_plan(&self) -> Option<&Arc<TypedSimplePlan>>;
    fn typed_model(&self) -> Option<&Arc<TypedModel>>;
}
impl_downcast!(Runnable);

pub trait State: Any + Downcast + Debug + 'static {
    fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>>;

    fn runnable(&self) -> &dyn Runnable;

    fn input_count(&self) -> usize {
        self.runnable().input_count()
    }

    fn output_count(&self) -> usize {
        self.runnable().output_count()
    }

    fn freeze(&self) -> Box<dyn FrozenState>;
}
impl_downcast!(State);

pub trait FrozenState: Any + Debug + DynClone + Send {
    fn unfreeze(&self) -> Box<dyn State>;
}
dyn_clone::clone_trait_object!(FrozenState);

#[derive(Debug)]
pub struct DefaultRuntime;

impl Runtime for DefaultRuntime {
    fn name(&self) -> StaticName {
        Cow::Borrowed("default")
    }

    fn prepare_with_options(
        &self,
        model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        let model = model.into_optimized()?;
        Ok(Box::new(TypedSimplePlan::new_with_options(model, options)?))
    }

    fn check(&self) -> TractResult<()> {
        Ok(())
    }
}

impl Runnable for Arc<TypedRunnableModel> {
    fn spawn(&self) -> TractResult<Box<dyn State>> {
        Ok(Box::new(self.spawn()?))
    }

    fn typed_plan(&self) -> Option<&Self> {
        Some(self)
    }

    fn typed_model(&self) -> Option<&Arc<TypedModel>> {
        Some(&self.model)
    }

    fn input_count(&self) -> usize {
        self.model.inputs.len()
    }

    fn output_count(&self) -> usize {
        self.model.outputs.len()
    }

    fn input_fact(&self, ix: usize) -> TractResult<&TypedFact> {
        self.model.input_fact(ix)
    }
    fn output_fact(&self, ix: usize) -> TractResult<&TypedFact> {
        self.model.output_fact(ix)
    }
}

impl State for TypedSimpleState {
    fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        self.run(inputs)
    }

    fn runnable(&self) -> &dyn Runnable {
        &self.plan
    }

    fn freeze(&self) -> Box<dyn FrozenState> {
        Box::new(TypedSimpleState::freeze(self))
    }
}

impl FrozenState for TypedFrozenSimpleState {
    fn unfreeze(&self) -> Box<dyn State> {
        Box::new(TypedFrozenSimpleState::unfreeze(self))
    }
}

pub struct InventorizedRuntime(pub &'static dyn Runtime);

impl Runtime for InventorizedRuntime {
    fn name(&self) -> StaticName {
        self.0.name()
    }

    fn prepare_with_options(
        &self,
        model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        self.0.prepare_with_options(model, options)
    }

    fn check(&self) -> TractResult<()> {
        self.0.check()
    }
}

impl Debug for InventorizedRuntime {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.0.fmt(f)
    }
}

inventory::collect!(InventorizedRuntime);

pub fn runtimes() -> impl Iterator<Item = &'static dyn Runtime> {
    inventory::iter::<InventorizedRuntime>().filter(|rt| rt.check().is_ok()).map(|ir| ir.0)
}

pub fn runtime_for_name(s: &str) -> Option<&'static dyn Runtime> {
    runtimes().find(|rt| rt.name() == s)
}

#[macro_export]
macro_rules! register_runtime {
    ($type: ty= $val:expr) => {
        static D: $type = $val;
        inventory::submit! { $crate::runtime::InventorizedRuntime(&D) }
    };
}

register_runtime!(DefaultRuntime = DefaultRuntime);


================================================
FILE: core/src/transform.rs
================================================
use std::borrow::Cow;

use crate::internal::*;
#[cfg(feature = "blas")]
use crate::ops::einsum::as_blas::AsBlas;
use crate::ops::matmul::de_block_quant::BlockQuantTransform;
use std::fmt::Debug;

use tract_data::TractResult;

use crate::floats::FloatPrecisionTranslator;
use crate::ops::nn::{Softmax, SoftmaxExp, SoftmaxKind, TypedModel};

#[macro_export]
macro_rules! rule_if {
    ($cond:expr) => {
        if !$cond {
            return Ok(None);
        }
    };
}

#[macro_export]
macro_rules! rule_if_let {
    ($pat:pat = $expr:expr) => {
        let $pat = $expr else {
            return Ok(None);
        };
    };
}

#[macro_export]
macro_rules! rule_if_some {
    ($pat:pat = $expr:expr) => {
        let Some($pat) = $expr else {
            return Ok(None);
        };
    };
}

/// Structured include/exclude filter for node names.
///
/// If `include` is `None`, all nodes are candidates; if `Some`, only nodes matching
/// at least one pattern are included. `exclude` then removes from that set.
#[derive(Debug, Clone, Default)]
pub struct NodeFilter {
    pub include: Option<Vec<String>>,
    pub exclude: Option<Vec<String>>,
}

impl NodeFilter {
    /// Returns `true` if the given node name passes the filter.
    pub fn matches(&self, name: &str) -> bool {
        let dominated = match &self.include {
            Some(patterns) => patterns.iter().any(|p| name.contains(p)),
            None => true,
        };
        if !dominated {
            return false;
        }
        match &self.exclude {
            Some(patterns) => !patterns.iter().any(|p| name.contains(p)),
            None => true,
        }
    }

    /// Returns `true` when neither include nor exclude is set.
    pub fn is_pass_through(&self) -> bool {
        self.include.is_none() && self.exclude.is_none()
    }
}

/// Parse a legacy filter string (`"!=..."` / `"==..."`) into a `NodeFilter`.
pub fn parse_legacy_filter(filter: Option<&str>) -> TractResult<NodeFilter> {
    let Some(filter) = filter.filter(|f| !f.is_empty()) else {
        return Ok(NodeFilter::default());
    };
    if let Some(patterns) = filter.strip_prefix("!=") {
        let patterns = patterns.split(',').map(|it| it.trim().to_string()).collect();
        Ok(NodeFilter { exclude: Some(patterns), ..Default::default() })
    } else if let Some(patterns) = filter.strip_prefix("==") {
        let patterns = patterns.split(',').map(|it| it.trim().to_string()).collect();
        Ok(NodeFilter { include: Some(patterns), ..Default::default() })
    } else {
        Ok(NodeFilter::default())
    }
}

/// Build Float precision translator given a `NodeFilter`. If the filter is pass-through,
/// all nodes will be translated during the transformation.
pub fn build_float_translator(
    from_dt: DatumType,
    to_dt: DatumType,
    filter: NodeFilter,
) -> Box<dyn ModelTransform> {
    if filter.is_pass_through() {
        return Box::new(FloatPrecisionTranslator::new(from_dt, to_dt));
    }
    Box::new(FloatPrecisionTranslator::with_filter(from_dt, to_dt, move |node| {
        filter.matches(&node.name)
    }))
}

pub trait ModelTransform: Debug {
    fn name(&self) -> StaticName;
    fn transform(&self, model: &mut TypedModel) -> TractResult<()>;
    fn transform_into(&self, mut model: TypedModel) -> TractResult<TypedModel> {
        self.transform(&mut model)?;
        Ok(model)
    }
}

#[derive(Debug)]
struct SoftmaxFastCompact;

impl ModelTransform for SoftmaxFastCompact {
    fn name(&self) -> StaticName {
        "softmax_fast_compact".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        for node in &mut model.nodes {
            if let Some(softmax) = node.op_as_mut::<Softmax>()
                && let SoftmaxKind::Softmax(kind) = &mut softmax.kind
            {
                *kind = SoftmaxExp::FastCompact
            }
        }
        Ok(())
    }
}

/// Config for float precision transforms (f32_to_f16, f16_to_f32).
#[derive(Debug, Default, serde::Deserialize)]
pub struct FloatTranslatorConfig {
    /// Legacy filter string (`"!=..."` / `"==..."`).
    #[serde(default)]
    pub filter: Option<String>,
    /// Include patterns — only nodes matching at least one pattern are translated.
    #[serde(default)]
    pub include: Option<Vec<String>>,
    /// Exclude patterns — matching nodes are excluded from translation.
    #[serde(default)]
    pub exclude: Option<Vec<String>>,
}

impl FloatTranslatorConfig {
    pub fn into_node_filter(self) -> TractResult<NodeFilter> {
        if self.include.is_some() || self.exclude.is_some() {
            Ok(NodeFilter { include: self.include, exclude: self.exclude })
        } else {
            parse_legacy_filter(self.filter.as_deref())
        }
    }
}

/// Config for the `float_precision` transform.
#[derive(Debug, serde::Deserialize)]
pub struct FloatPrecisionConfig {
    pub from: String,
    pub to: String,
    /// Include patterns — only nodes matching at least one pattern are translated.
    #[serde(default)]
    pub include: Option<Vec<String>>,
    /// Exclude patterns — matching nodes are excluded from translation.
    #[serde(default)]
    pub exclude: Option<Vec<String>>,
}

pub struct ModelTransformFactory {
    pub name: &'static str,
    /// Build with default config (no params).
    pub build_default: fn() -> TractResult<Box<dyn ModelTransform>>,
    /// Build from a type-erased deserializer.
    pub build: fn(&mut dyn erased_serde::Deserializer) -> TractResult<Box<dyn ModelTransform>>,
}

inventory::collect!(ModelTransformFactory);

#[macro_export]
macro_rules! register_simple_model_transform {
    ($name: expr, $type: expr) => {
        $crate::internal::inventory::submit! {
            $crate::transform::ModelTransformFactory {
                name: $name,
                build_default: || Ok(Box::new($type)),
                build: |_de| Ok(Box::new($type)),
            }
        }
    };
}

#[macro_export]
macro_rules! register_model_transform {
    ($name:expr, $config:ty, $builder:expr) => {
        $crate::internal::inventory::submit! {
            $crate::transform::ModelTransformFactory {
                name: $name,
                build_default: || {
                    let config = <$config>::default();
                    let builder: fn($config) -> $crate::prelude::TractResult<Box<dyn $crate::transform::ModelTransform>> = $builder;
                    builder(config)
                },
                build: |de: &mut dyn erased_serde::Deserializer| {
                    let config: $config = erased_serde::deserialize(de)
                        .map_err(|e| $crate::internal::anyhow!("deserializing transform config: {e}"))?;
                    let builder: fn($config) -> $crate::prelude::TractResult<Box<dyn $crate::transform::ModelTransform>> = $builder;
                    builder(config)
                },
            }
        }
    };
}

/// Split a transform spec like `"f32_to_f16(filter: \"!=layer.norm\")"` into name and params.
pub fn split_spec(spec: &str) -> (Cow<'_, str>, &str) {
    if let Some(pos) = spec.find('(') {
        (Cow::Borrowed(&spec[..pos]), &spec[pos..])
    } else if spec.contains('-') {
        // Backward compat: simple name with no params, convert kebab→snake
        (Cow::Owned(spec.replace('-', "_")), "")
    } else {
        (Cow::Borrowed(spec), "")
    }
}

/// Look up a transform by name, using default config.
pub fn get_transform(name: &str) -> TractResult<Option<Box<dyn ModelTransform>>> {
    let (name, _) = split_spec(name);
    for factory in inventory::iter::<ModelTransformFactory>() {
        if factory.name == &*name {
            return Ok(Some((factory.build_default)()?));
        }
    }
    Ok(None)
}

/// Look up a transform by name, deserializing config from the given deserializer.
pub fn get_transform_with_params(
    name: &str,
    de: &mut dyn erased_serde::Deserializer,
) -> TractResult<Option<Box<dyn ModelTransform>>> {
    for factory in inventory::iter::<ModelTransformFactory>() {
        if factory.name == name {
            return Ok(Some((factory.build)(de)?));
        }
    }
    Ok(None)
}

#[derive(Debug, Default, serde::Deserialize)]
pub struct ConcretizeSymbolsConfig {
    pub values: std::collections::HashMap<String, i64>,
}

#[derive(Debug)]
struct ConcretizeSymbolsTransform(ConcretizeSymbolsConfig);

impl ModelTransform for ConcretizeSymbolsTransform {
    fn name(&self) -> StaticName {
        "concretize_symbols".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        let mut table = SymbolValues::default();
        for (k, v) in &self.0.values {
            table = table.with(&model.symbols.sym(k), *v);
        }
        *model = model.concretize_dims(&table)?;
        Ok(())
    }
}

register_model_transform!("concretize_symbols", ConcretizeSymbolsConfig, |config| Ok(Box::new(
    ConcretizeSymbolsTransform(config)
)));

register_simple_model_transform!("softmax_fast_compact", SoftmaxFastCompact);
#[cfg(feature = "blas")]
register_simple_model_transform!("as_blas", AsBlas);
register_simple_model_transform!("block_quant", BlockQuantTransform);

#[derive(Debug, serde::Deserialize, Default)]
pub struct SelectOutputsConfig {
    pub outputs: Vec<String>,
}

#[derive(Debug)]
struct SelectOutputsTransform(SelectOutputsConfig);

impl ModelTransform for SelectOutputsTransform {
    fn name(&self) -> StaticName {
        "select_outputs".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        model.select_outputs_by_name(self.0.outputs.iter())
    }
}

register_model_transform!("select_outputs", SelectOutputsConfig, |config| Ok(Box::new(
    SelectOutputsTransform(config)
)));

inventory::submit! {
    ModelTransformFactory {
        name: "f32_to_f16",
        build_default: || Ok(build_float_translator(DatumType::F32, DatumType::F16, NodeFilter::default())),
        build: |de| {
            let config: FloatTranslatorConfig = erased_serde::deserialize(de)
                .map_err(|e| anyhow::anyhow!("deserializing f32_to_f16 config: {e}"))?;
            Ok(build_float_translator(DatumType::F32, DatumType::F16, config.into_node_filter()?))
        },
    }
}

inventory::submit! {
    ModelTransformFactory {
        name: "f16_to_f32",
        build_default: || Ok(build_float_translator(DatumType::F16, DatumType::F32, NodeFilter::default())),
        build: |de| {
            let config: FloatTranslatorConfig = erased_serde::deserialize(de)
                .map_err(|e| anyhow::anyhow!("deserializing f16_to_f32 config: {e}"))?;
            Ok(build_float_translator(DatumType::F16, DatumType::F32, config.into_node_filter()?))
        },
    }
}

inventory::submit! {
    ModelTransformFactory {
        name: "float_precision",
        build_default: || {
            anyhow::bail!("float_precision transform requires 'from' and 'to' parameters")
        },
        build: |de| {
            let config: FloatPrecisionConfig = erased_serde::deserialize(de)
                .map_err(|e| anyhow::anyhow!("deserializing float_precision config: {e}"))?;
            let from_dt: DatumType = config.from.parse()
                .map_err(|e| anyhow::anyhow!("parsing 'from' datum type: {e}"))?;
            let to_dt: DatumType = config.to.parse()
                .map_err(|e| anyhow::anyhow!("parsing 'to' datum type: {e}"))?;
            let filter = NodeFilter { include: config.include, exclude: config.exclude };
            Ok(build_float_translator(from_dt, to_dt, filter))
        },
    }
}


================================================
FILE: core/src/value.rs
================================================
use crate::internal::*;
use std::ops::Deref;
use std::rc::Rc;

use TValue::*;
use tract_ndarray::Array;

#[derive(Clone, Eq)]
pub enum TValue {
    Const(Arc<Tensor>),
    Var(Rc<Tensor>),
}

impl std::fmt::Debug for TValue {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        (**self).fmt(f)
    }
}

impl PartialEq for TValue {
    fn eq(&self, other: &Self) -> bool {
        self.deref() == other.deref()
    }
}

impl TValue {
    pub fn is_exclusive(&self) -> bool {
        match self {
            Var(it) => Rc::strong_count(it) == 1,
            Const(_) => false,
        }
    }

    pub fn from_const(t: Arc<Tensor>) -> Self {
        Const(t)
    }

    pub fn as_arc_tensor(&self) -> Option<&Arc<Tensor>> {
        match self {
            Const(t) => Some(t),
            Var(_) => None,
        }
    }
}

impl From<Tensor> for TValue {
    fn from(t: Tensor) -> Self {
        TValue::Var(std::rc::Rc::new(t))
    }
}

impl std::ops::Deref for TValue {
    type Target = Tensor;
    fn deref(&self) -> &Self::Target {
        match self {
            Const(it) => it,
            Var(it) => it,
        }
    }
}

impl std::borrow::Borrow<Tensor> for TValue {
    fn borrow(&self) -> &Tensor {
        self
    }
}

impl IntoTensor for TValue {
    fn into_tensor(self) -> Tensor {
        match self {
            Var(it) => Rc::try_unwrap(it).unwrap_or_else(|t| (*t).clone()),
            Const(it) => it.into_tensor(),
        }
    }
}

impl IntoArcTensor for TValue {
    fn into_arc_tensor(self) -> Arc<Tensor> {
        match self {
            Var(ref _it) => self.into_tensor().into_arc_tensor(),
            Const(t) => t,
        }
    }
}

pub trait IntoTValue {
    fn into_tvalue(self) -> TValue;
}

impl IntoTValue for Tensor {
    fn into_tvalue(self) -> TValue {
        self.into_tensor().into()
    }
}

impl IntoTValue for Arc<Tensor> {
    fn into_tvalue(self) -> TValue {
        Const(self)
    }
}

impl<D: ::ndarray::Dimension, T: Datum> IntoTValue for Array<T, D> {
    fn into_tvalue(self) -> TValue {
        Tensor::from(self).into_tvalue()
    }
}


================================================
FILE: core/test_data/test_data.cfg
================================================


================================================
FILE: cuda/Cargo.toml
================================================
[package]
name = "tract-cuda"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = [
	"Louis Chouraki <louis.chouraki@sonos.com>",
	"Mathieu Poumeyrol <kali@zoy.org>",
]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks", "CUDA" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
anyhow.workspace = true
derive-new.workspace = true
dyn-eq.workspace = true
dirs.workspace = true
downcast-rs.workspace = true
liquid.workspace = true
liquid-core.workspace = true
num-traits.workspace = true
tract-core.workspace = true
tract-pulse-opl.workspace = true
tract-transformers.workspace = true
tract-gpu.workspace = true
log.workspace = true
cudarc.workspace = true
inventory.workspace = true
libloading.workspace = true


[dev-dependencies]
criterion.workspace = true
proptest.workspace = true
rand.workspace = true

[features]
cuda-12060 = ["cudarc/cuda-12060"]
default = ["cuda-12060"]

[[bench]]
name = "cuda_flash"
harness = false


================================================
FILE: cuda/benches/cuda_flash.rs
================================================
use criterion::measurement::WallTime;
use criterion::*;

use tract_core::internal::*;
use tract_cuda::kernels::flash_attn::CudaFlashAttn;
use tract_cuda::kernels::ggml_flash_attn::GgmlFlashAttn;
use tract_cuda::with_cuda_stream;
use tract_gpu::tensor::IntoDevice;

pub fn cuda_ggml_flash(
    crit: &mut BenchmarkGroup<WallTime>,
    batch: usize,
    q_heads: usize,
    kv_heads: usize,
    past_seq_len: usize,
    seq_len: usize,
    out_dim: usize,
) {
    with_cuda_stream(|stream| {
        let q = Tensor::zero_dt(DatumType::F32, &[batch, q_heads, seq_len, out_dim]).unwrap();
        let k = Tensor::zero_dt(
            DatumType::F16,
            &[batch, kv_heads, (past_seq_len + seq_len).next_multiple_of(256), out_dim],
        )
        .unwrap();
        let v = Tensor::zero_dt(
            DatumType::F16,
            &[batch, kv_heads, (past_seq_len + seq_len).next_multiple_of(256), out_dim],
        )
        .unwrap();
        let mask = Tensor::zero_dt(
            DatumType::F16,
            &[1, 1, seq_len.next_multiple_of(16), (past_seq_len + seq_len).next_multiple_of(256)],
        )
        .unwrap();

        let cuda_q = q.into_device().unwrap();
        let cuda_k = k.into_device().unwrap();
        let cuda_v = v.into_device().unwrap();
        let cuda_mask = mask.into_device().unwrap();

        crit.bench_function(&format!("tract_cuda_ggml_flash"), |be| {
            be.iter(|| {
                let _ =
                    GgmlFlashAttn.eval(stream, &cuda_q, &cuda_k, &cuda_v, &cuda_mask, 1.0).unwrap();
            });
        });
        Ok(())
    })
    .unwrap()
}

pub fn cuda_minimal_flash(
    crit: &mut BenchmarkGroup<WallTime>,
    batch: usize,
    q_heads: usize,
    kv_heads: usize,
    past_seq_len: usize,
    seq_len: usize,
    out_dim: usize,
) {
    with_cuda_stream(|stream| {
        let q = Tensor::zero_dt(DatumType::F16, &[batch, q_heads, seq_len, out_dim]).unwrap();
        let k =
            Tensor::zero_dt(DatumType::F16, &[batch, kv_heads, past_seq_len + seq_len, out_dim])
                .unwrap();
        let v =
            Tensor::zero_dt(DatumType::F16, &[batch, kv_heads, past_seq_len + seq_len, out_dim])
                .unwrap();
        let mask =
            Tensor::zero_dt(DatumType::F16, &[1, 1, seq_len, past_seq_len + seq_len]).unwrap();

        let cuda_q = q.into_device().unwrap();
        let cuda_k = k.into_device().unwrap();
        let cuda_v = v.into_device().unwrap();
        let cuda_mask = mask.into_device().unwrap();

        crit.bench_function(&format!("tract_cuda_minimal_flash"), |be| {
            be.iter(|| {
                let _ = CudaFlashAttn
                    .eval(stream, &cuda_q, &cuda_k, &cuda_v, Some(&cuda_mask), 1.0, false)
                    .unwrap();
            });
        });
        Ok(())
    })
    .unwrap()
}

fn flash_attn(
    c: &mut Criterion,
    b: usize,
    qh: usize,
    kh: usize,
    p: usize,
    s: usize,
    out_dim: usize,
) {
    let mut c = c.benchmark_group(format!(
        "Batch: {b}. Q_head: {qh} KV_head: {kh} P: {p} S {s} d: {out_dim}"
    ));
    c.throughput(Throughput::Elements((4 * b * qh * s * (s + p) * out_dim) as _));

    cuda_ggml_flash(&mut c, b, qh, kh, p, s, out_dim);
    cuda_minimal_flash(&mut c, b, qh, kh, p, s, out_dim);
    c.finish();
}

#[allow(unused)]
fn small(c: &mut Criterion) {
    let shapes = vec![
        (1, 1, 1, 0, 128, 128),
        (1, 1, 1, 64, 64, 128),
        (1, 1, 1, 32, 128, 128),
        (1, 1, 1, 128, 128, 128),
        (1, 1, 1, 64, 64, 128),
        (1, 1, 1, 0, 256, 128),
        (1, 32, 4, 0, 512, 128),
        (2, 32, 4, 0, 512, 128),
        (1, 32, 4, 256, 256, 64),
    ];
    for (b, qh, kh, p, s, out_dim) in shapes {
        flash_attn(c, b, qh, kh, p, s, out_dim);
    }
}

#[allow(unused)]
fn benched_models_pp(c: &mut Criterion) {
    let shapes = vec![
        // Llama3.2-1B
        (1, 32, 8, 0, 512, 64),
        (1, 32, 8, 0, 2048, 64),
        (1, 32, 8, 0, 1, 64),
        (1, 32, 8, 127, 1, 64),
        // Llama3.1-8B / Qwen3-7B
        (1, 32, 8, 0, 512, 128),
        (1, 32, 8, 0, 2048, 128),
        (1, 32, 8, 0, 1, 128),
        (1, 32, 8, 127, 1, 128),
        // OpenELM-270M
        (1, 20, 20, 0, 512, 64),
        (1, 20, 20, 0, 2048, 64),
        (1, 20, 20, 0, 1, 64),
        (1, 20, 20, 127, 1, 64),
        // Qwen2.5-7B
        (1, 28, 4, 0, 512, 128),
        (1, 28, 4, 0, 2048, 128),
        (1, 28, 4, 0, 1, 128),
        (1, 28, 4, 127, 1, 128),
        // Qwen3-1.7B
        (1, 16, 8, 0, 512, 128),
        (1, 16, 8, 0, 2048, 128),
        (1, 16, 8, 0, 1, 128),
        (1, 16, 8, 127, 1, 128),
    ];
    for (b, qh, kh, p, s, out_dim) in shapes {
        flash_attn(c, b, qh, kh, p, s, out_dim);
    }
}

criterion_group!(benches, benched_models_pp);
criterion_main!(benches);


================================================
FILE: cuda/src/context.rs
================================================
use cudarc::cublas::CudaBlas;
use cudarc::cudnn::Cudnn;
use cudarc::nvrtc::Ptx;
use cudarc::runtime::result::device::get_device_prop;
use cudarc::runtime::sys::cudaDeviceProp;
use tract_gpu::device::DeviceContext;
use tract_gpu::tensor::{DeviceTensor, OwnedDeviceTensor};

use std::cell::{Cell, RefCell};
use std::mem::MaybeUninit;
use std::ops::Deref;
use std::sync::{OnceLock, RwLock};

use tract_core::internal::*;

use cudarc::driver::{CudaContext, CudaEvent, CudaFunction, CudaModule, CudaStream};

use crate::kernels::{COMMON_H, LibraryName, cubin_dir};
use crate::tensor::CudaTensor;

use cudarc::nvrtc::result::{compile_program, destroy_program, get_program_log};
use cudarc::nvrtc::sys::{
    nvrtcCreateProgram, nvrtcGetCUBIN, nvrtcGetCUBINSize, nvrtcProgram, nvrtcResult,
};
use std::ffi::{CStr, CString, c_char};
use std::path::{Path, PathBuf};

pub fn cuda_context() -> &'static TractCudaContext {
    static INSTANCE: OnceLock<TractCudaContext> = OnceLock::new();
    INSTANCE.get_or_init(|| {
        let ctxt = TractCudaContext::new().expect("Could not create CUDA context");
        tract_gpu::device::set_context(Box::new(ctxt.clone())).expect("Could not set CUDA context");
        ctxt
    })
}

thread_local! {
    static CUDA_STREAM: TractCudaStream = TractCudaStream::new().expect("Could not create Cuda Stream");
}

pub fn with_cuda_stream<R>(f: impl FnOnce(&TractCudaStream) -> TractResult<R>) -> TractResult<R> {
    CUDA_STREAM.with(|cell| f(cell))
}

#[derive(Debug, Clone)]
pub struct TractCudaContext {
    inner: Arc<CudaContext>,
    device_properties: cudaDeviceProp,
    cached_modules: Arc<RwLock<HashMap<LibraryName, Arc<CudaModule>>>>,
    #[allow(clippy::type_complexity)]
    cached_pipelines: Arc<RwLock<HashMap<(LibraryName, String), Arc<CudaFunction>>>>,
}

impl Deref for TractCudaContext {
    type Target = Arc<CudaContext>;

    fn deref(&self) -> &Self::Target {
        &self.inner
    }
}

impl TractCudaContext {
    pub fn new() -> TractResult<Self> {
        let context =
            CudaContext::new(0).with_context(|| "Could not find system default CUDA device")?;

        let prop = get_device_prop(0)?;
        let ctxt = Self {
            inner: context,
            device_properties: prop,
            cached_modules: Arc::new(RwLock::new(HashMap::new())),
            cached_pipelines: Arc::new(RwLock::new(HashMap::new())),
        };
        ctxt.compile_cubins()?;
        ctxt.preload_pipelines()?;
        Ok(ctxt)
    }

    pub fn properties(&self) -> &cudaDeviceProp {
        &self.device_properties
    }

    pub fn compile_cubins(&self) -> TractResult<()> {
        let cubin_dir = cubin_dir();
        if !cubin_dir.exists() {
            log::info!("Creating cache folder for CUDA cubins at {}", cubin_dir.display());
            std::fs::create_dir_all(cubin_dir)
                .with_context(|| format!("Failed to create {}", cubin_dir.display()))?;
        }

        let nvrtc_opts = self.build_nvrtc_opts()?;

        for lib in LibraryName::ALL {
            let out_path = lib.cubin_path();
            if out_path.exists() {
                continue;
            }

            log::info!("Compiling {:?} to {}…", lib, out_path.display());

            let mut input = lib.content().to_string();
            if input.contains("// liquid:true") {
                let parser = liquid::ParserBuilder::with_stdlib().build()?;
                let tmpl = parser.parse(lib.content())?;
                let globals = liquid::object!({});
                input = tmpl.render(&globals)?;
            }

            let c_src = CString::new(input).context("Failed to make CString from CUDA source")?;
            let prog = unsafe {
                let mut prog = MaybeUninit::uninit();
                nvrtcCreateProgram(
                    prog.as_mut_ptr(),
                    c_src.as_ptr(),
                    std::ptr::null(),
                    1,
                    &CString::new(COMMON_H)
                        .context("Failed to make CString from CUDA header")?
                        .as_ptr(),
                    &CString::new("common.cuh")
                        .context("Failed to make CString from CUDA header name")?
                        .as_ptr(),
                )
                .result()?;
                prog.assume_init()
            };

            if let Err(_e) = unsafe { compile_program::<String>(prog, &nvrtc_opts) } {
                let log = self.read_nvrtc_log(prog).unwrap_or_else(|_| "<no log>".into());
                let _ = unsafe { destroy_program(prog) };
                return Err(anyhow!("NVRTC compilation failed for {:?}:\n{}", lib, log));
            }

            let cubin = unsafe { self.get_cubin_bytes(prog) }
                .with_context(|| format!("Failed to extract CUBIN for {:?}", lib))?;

            unsafe { destroy_program(prog) }.context("nvrtcDestroyProgram failed")?;

            std::fs::write(&out_path, &cubin)
                .with_context(|| format!("Failed to write {:?}", out_path))?;
        }

        Ok(())
    }

    /// Build NVRTC options: include paths + GPU arch.
    fn build_nvrtc_opts(&self) -> TractResult<Vec<String>> {
        let cuda_home = std::env::var_os("CUDA_HOME")
            .map(PathBuf::from)
            .or_else(|| std::env::var_os("CUDA_PATH").map(PathBuf::from)) // Windows
            .or_else(|| {
                let p = Path::new("/usr/local/cuda");
                if p.exists() { Some(p.to_path_buf()) } else { None }
            })
            .or_else(|| {
                // Last resort: infer from nvcc location
                 std::process::Command::new("which").arg("nvcc").output().ok()
                    .and_then(|nvcc| Path::new(&String::from_utf8(nvcc.stdout).unwrap()).parent().and_then(|bin| bin.parent()).map(|p| p.to_path_buf()))
            });

        let cuda_inc = cuda_home.unwrap().join("include");
        if !cuda_inc.exists() {
            return Err(anyhow!("CUDA include dir not found at {:?}", cuda_inc));
        }

        let arch = format!(
            "--gpu-architecture=sm_{}{}",
            self.device_properties.major, self.device_properties.minor
        );

        Ok(vec!["--std=c++17".into(), arch, format!("-I{}", cuda_inc.display())])
    }

    /// Read the NVRTC program log as String.
    fn read_nvrtc_log(&self, prog: nvrtcProgram) -> TractResult<String> {
        let buf: Vec<c_char> =
            unsafe { get_program_log(prog).context("nvrtcGetProgramLog failed") }?;

        let bytes = unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len()) };

        match CStr::from_bytes_until_nul(bytes) {
            Ok(cstr) => Ok(cstr.to_string_lossy().into_owned()),
            Err(_) => Ok(String::from_utf8_lossy(bytes).into_owned()),
        }
    }

    /// Extract CUBIN bytes from an NVRTC program.
    unsafe fn get_cubin_bytes(&self, prog: nvrtcProgram) -> TractResult<Vec<u8>> {
        let mut len: usize = 0;
        let res = unsafe { nvrtcGetCUBINSize(prog, &mut len as *mut usize) };
        if res != nvrtcResult::NVRTC_SUCCESS {
            return Err(anyhow!("nvrtcGetCUBINSize failed ({:?})", res));
        }

        let mut cubin = vec![0u8; len];
        let res = unsafe { nvrtcGetCUBIN(prog, cubin.as_mut_ptr() as *mut c_char) };
        if res != nvrtcResult::NVRTC_SUCCESS {
            return Err(anyhow!("nvrtcGetCUBIN failed ({:?})", res));
        }
        Ok(cubin)
    }

    pub fn preload_pipelines(&self) -> TractResult<()> {
        for ew_func in crate::kernels::element_wise::all_functions() {
            let _ = self.load_pipeline(LibraryName::ElementWise, ew_func);
        }

        for bin_func in crate::kernels::binary::all_functions() {
            let _ = self.load_pipeline(LibraryName::Binary, bin_func);
        }

        for arr_func in crate::kernels::array::all_functions() {
            let _ = self.load_pipeline(LibraryName::Array, arr_func);
        }

        for nn_func in crate::kernels::nn::all_functions() {
            let _ = self.load_pipeline(LibraryName::NN, nn_func);
        }

        Ok(())
    }

    pub fn load_library(&self, name: &LibraryName) -> TractResult<Arc<CudaModule>> {
        {
            let cache = self.cached_modules.read().map_err(|e| anyhow!("{:?}", e))?;
            if let Some(module) = cache.get(name) {
                return Ok(module.clone());
            }
        }

        let module = self.inner.load_module(Ptx::from_file(name.cubin_path()))?;

        let mut cache = self.cached_modules.write().map_err(|e| anyhow!("{:?}", e))?;
        cache.insert(*name, module.clone());

        Ok(module)
    }

    pub fn load_pipeline(
        &self,
        library_name: LibraryName,
        func_name: String,
    ) -> TractResult<Arc<CudaFunction>> {
        // Check pipeline cache
        let key = (library_name, func_name.to_string());
        {
            let cache = self.cached_pipelines.read().map_err(|e| anyhow!("{:?}", e))?;
            if let Some(f) = cache.get(&key) {
                return Ok(f.clone());
            }
        }

        // Load module + function
        let module = self.load_library(&library_name)?;
        let func =
            module.load_function(&func_name).map_err(|e| anyhow!("{e}")).with_context(|| {
                format!(
                    "Failed to load function `{func_name}` from library `{}`",
                    library_name.cubin_path().display()
                )
            })?;

        let func = Arc::new(func);

        // Store in cache
        let mut cache = self.cached_pipelines.write().map_err(|e| anyhow!("{:?}", e))?;
        cache.insert(key, func.clone());

        Ok(func)
    }
}

impl DeviceContext for TractCudaContext {
    fn synchronize(&self) -> TractResult<()> {
        with_cuda_stream(|stream| stream.synchronize().map_err(|e| e.into()))
    }

    fn tensor_to_device(&self, tensor: TValue) -> TractResult<Box<dyn OwnedDeviceTensor>> {
        ensure!(DeviceTensor::is_supported_dt(tensor.datum_type()));
        Ok(Box::new(CudaTensor::from_tensor(tensor.view().tensor)?))
    }

    fn uninitialized_device_tensor(
        &self,
        shape: &[usize],
        dt: DatumType,
    ) -> TractResult<Box<dyn OwnedDeviceTensor>> {
        Ok(Box::new(CudaTensor::uninitialized_dt(shape, dt)?))
    }

    fn uninitialized_device_exotic_tensor(
        &self,
        exotic_fact: Box<dyn ExoticFact>,
    ) -> TractResult<Box<dyn OwnedDeviceTensor>> {
        Ok(Box::new(CudaTensor::uninitialized_exotic(exotic_fact)?))
    }

    fn copy_nd(
        &self,
        input: &DeviceTensor,
        input_offset: usize,
        input_strides: &[isize],
        output: &DeviceTensor,
        output_offset: usize,
        output_shape: &[usize],
        output_strides: &[isize],
    ) -> TractResult<()> {
        crate::kernels::array::cuda_copy_nd_dispatch(
            input,
            input_offset,
            input_strides,
            output,
            output_offset,
            output_shape,
            output_strides,
        )
    }
}

/// A recorded GPU kernel timing entry: start/end events tagged with a node_id.
pub struct GpuProfileEntry {
    pub node_id: usize,
    pub start: CudaEvent,
    pub end: CudaEvent,
}

pub struct TractCudaStream {
    inner: Arc<CudaStream>,
    cublas: CudaBlas,
    cudnn: Arc<Cudnn>,
    /// When Some, kernel launches record start/end events here.
    profile_log: RefCell<Option<Vec<GpuProfileEntry>>>,
    /// The node_id currently being evaluated (set by the profiling harness).
    current_node_id: Cell<usize>,
}

impl TractCudaStream {
    fn new() -> TractResult<TractCudaStream> {
        let stream = cuda_context().default_stream();
        let cublas = CudaBlas::new(stream.clone())?;
        let cudnn = Cudnn::new(stream.clone())?;
        Ok(TractCudaStream {
            inner: stream,
            cublas,
            cudnn,
            profile_log: RefCell::new(None),
            current_node_id: Cell::new(0),
        })
    }

    pub fn cublas(&self) -> &CudaBlas {
        &self.cublas
    }

    pub fn cudnn(&self) -> &Arc<Cudnn> {
        &self.cudnn
    }

    /// Enable GPU profiling. Kernel launches will record timing events.
    pub fn enable_profiling(&self) {
        *self.profile_log.borrow_mut() = Some(Vec::new());
    }

    /// Set the current node being evaluated (used by the profiling harness).
    pub fn set_current_node(&self, node_id: usize) {
        self.current_node_id.set(node_id);
    }

    /// Returns true if profiling is active.
    pub fn is_profiling(&self) -> bool {
        self.profile_log.borrow().is_some()
    }

    /// Record a start/end event pair around a kernel launch.
    /// Call this from `TractLaunchArgs::launch()` when profiling is active.
    pub fn record_profile_events(&self) -> TractResult<Option<(CudaEvent, CudaEvent)>> {
        if !self.is_profiling() {
            return Ok(None);
        }
        let flags = Some(cudarc::driver::sys::CUevent_flags::CU_EVENT_DEFAULT);
        let start = self.inner.record_event(flags)?;
        Ok(Some((start, self.inner.context().new_event(flags)?)))
    }

    /// Finish recording a profile entry (call after kernel launch).
    pub fn finish_profile_entry(&self, start: CudaEvent, end: CudaEvent) -> TractResult<()> {
        end.record(&self.inner)?;
        let node_id = self.current_node_id.get();
        self.profile_log.borrow_mut().as_mut().unwrap().push(GpuProfileEntry {
            node_id,
            start,
            end,
        });
        Ok(())
    }

    /// Drain all recorded profile entries. Caller should synchronize first.
    pub fn drain_profile(&self) -> Option<Vec<GpuProfileEntry>> {
        self.profile_log.borrow_mut().as_mut().map(|log| std::mem::take(log))
    }
}

impl Deref for TractCudaStream {
    type Target = Arc<CudaStream>;

    fn deref(&self) -> &Self::Target {
        &self.inner
    }
}


================================================
FILE: cuda/src/kernels/array/cast.rs
================================================
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use derive_new::new;
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, get_cuda_view, launch_args};

#[derive(Debug, Clone, new, PartialEq, Eq, Hash)]
pub struct Cast;

impl fmt::Display for Cast {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

impl Cast {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(
            dt,
            DatumType::F32
                | DatumType::F16
                | DatumType::U8
                | DatumType::U16
                | DatumType::U32
                | DatumType::U64
                | DatumType::I8
                | DatumType::I16
                | DatumType::I32
                | DatumType::I64
                | DatumType::Bool
        )
    }

    pub fn kernel_name(&self, from_dt: DatumType, to_dt: DatumType) -> TractResult<String> {
        ensure!(
            Self::is_supported_dt(from_dt),
            "Unsupported from_dt {:?} for cuda castop",
            from_dt
        );
        ensure!(Self::is_supported_dt(to_dt), "Unsupported to_dt {:?} for cuda castop", to_dt);
        let from_tname = DeviceTensor::tname(from_dt)?;
        let to_tname = DeviceTensor::tname(to_dt)?;
        Ok(format!("cast_{from_tname}_{to_tname}"))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        to_dt: DatumType,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(to_dt, input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(
            input.shape() == output.shape(),
            "Cast I/O don't have the same shape in: {:?}, out: {:?}",
            input.shape(),
            output.shape()
        );

        let kernel_name = self.kernel_name(input.datum_type(), output.datum_type())?;

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);
        let len = output.len();
        let func = cuda_context().load_pipeline(LibraryName::Array, kernel_name)?;

        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_i32(len);
        let cfg = LaunchConfig::for_num_elems(len as _);

        launch_args.launch(cfg)
    }
}

pub fn cuda_cast_dispatch(input: &DeviceTensor, output: &DeviceTensor) -> TractResult<()> {
    crate::with_cuda_stream(|stream| Cast.dispatch_eval(stream, input, output))
}

crate::register_cuda_op!(tract_core::ops::cast::Cast, |_source, _node, op| {
    Ok(crate::transform::cuda_cast_new(op.to).map(|c| Box::new(c) as _))
});

#[cfg(test)]
mod tests {

    use super::*;
    use tract_gpu::tensor::IntoDevice;
    use tract_itertools::Itertools;

    use num_traits::{FromPrimitive, Zero};

    use tract_core::internal::Tensor;

    fn run_test_case<T0: Datum + Copy + FromPrimitive, T1: Datum>(
        shape: &[usize],
    ) -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();
            let data = (0..len).map(|f| T0::from_f32(f as f32 / 2.).unwrap()).collect::<Vec<_>>();
            let input = Tensor::from_shape(shape, &data)?;

            let output = Cast {}.eval(stream, &input.clone().into_device()?, T1::datum_type())?;

            assert_eq!(
                output.to_host()?.into_tensor(),
                input.cast_to_dt(T1::datum_type())?.into_owned()
            );
            Ok(())
        })
    }

    #[test]
    fn test_cast() -> TractResult<()> {
        run_test_case::<f16, f32>(&[3, 4])?;
        run_test_case::<u8, f32>(&[2, 5])?;
        run_test_case::<f16, u32>(&[3, 2, 2])?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/array/copy.rs
================================================
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use derive_new::new;
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::{LibraryName, get_cuda_view, get_cuda_view_mut, get_sliced_cuda_view};

#[derive(Debug, Clone, new, PartialEq, Eq, Hash)]
pub struct Memcpy;

impl fmt::Display for Memcpy {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

impl Memcpy {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(
            dt,
            DatumType::F32
                | DatumType::F16
                | DatumType::U8
                | DatumType::U16
                | DatumType::U32
                | DatumType::U64
                | DatumType::I8
                | DatumType::I16
                | DatumType::I32
                | DatumType::I64
                | DatumType::Bool
        )
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        input_offset: usize,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(input_offset % input.datum_type().size_of() == 0);
        ensure!(output.len() <= input.len() - (input_offset / input.datum_type().size_of()));
        ensure!(
            Self::is_supported_dt(input.datum_type()),
            "Unsupported dt {:?} for cuda memcpy",
            input.datum_type()
        );

        let i_view = get_sliced_cuda_view(
            input,
            input_offset,
            input.len() * input.datum_type().size_of() - input_offset,
        )?;
        let mut o_view = get_cuda_view_mut(output);
        let len = output.len();
        stream.memcpy_dtod(&i_view, &mut o_view);

        Ok(())
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        input_offset: usize,
        output_shape: &[usize],
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), output_shape)? };
        self.dispatch_eval(stream, input, input_offset, &output)?;
        stream.synchronize()?;
        Ok(output)
    }
}

pub fn cuda_memcpy_dispatch(
    input: &DeviceTensor,
    input_offset: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| Memcpy.dispatch_eval(stream, input, input_offset, output))
}

#[cfg(test)]
mod tests {

    use super::*;
    use tract_gpu::tensor::IntoDevice;
    use tract_itertools::Itertools;

    use num_traits::Zero;

    use tract_core::internal::Tensor;

    fn run_test_case(shape: &[usize], offset: usize) -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();
            let data = (0..len).map(|f| f as f32).collect::<Vec<_>>();
            let input = Tensor::from_shape(shape, &data)?;

            let output = Memcpy {}.eval(
                stream,
                &input.clone().into_device()?,
                offset,
                &[len - (offset / size_of::<f32>())],
            )?;

            assert_eq!(
                output.to_host()?.into_tensor(),
                input.into_shape(&[len])?.slice(0, offset / size_of::<f32>(), len)?
            );
            Ok(())
        })
    }

    #[test]
    fn test_cpy() -> TractResult<()> {
        run_test_case(&[3, 4], 0)?;
        run_test_case(&[2, 5], 2 * size_of::<f32>())?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/array/dispatch.rs
================================================
use crate::context::cuda_context;
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::utils::cuda_launch_cfg_for_cpy;
use crate::kernels::{BroadcastKind, LibraryName, get_sliced_cuda_view};
use cudarc::driver::PushKernelArg;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

/// Single dispatch function for all copy_nd kernel launches.
/// Used by GpuMultiBroadcastTo, GpuSlice, GpuConcat, and GpuAxisOp.
pub fn cuda_copy_nd_dispatch(
    input: &DeviceTensor,
    input_offset: usize,
    input_strides: &[isize],
    output: &DeviceTensor,
    output_offset: usize,
    output_shape: &[usize],
    output_strides: &[isize],
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| {
        let kernel_name = BroadcastKind::from_rank(output_shape.len())?
            .copy_kernel_name(input.datum_type(), "")?;
        let func = cuda_context().load_pipeline(LibraryName::Array, kernel_name)?;

        let i_view = get_sliced_cuda_view(
            input,
            input_offset,
            input.len() * input.datum_type().size_of() - input_offset,
        )?;
        let o_view = get_sliced_cuda_view(
            output,
            output_offset,
            output.len() * output.datum_type().size_of() - output_offset,
        )?;

        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(input_strides);
        launch_args.push_slice_i32(output_shape);
        launch_args.push_slice_i32(output_strides);

        let cfg = cuda_launch_cfg_for_cpy(output_shape);
        launch_args.launch(cfg)
    })
}


================================================
FILE: cuda/src/kernels/array/mod.rs
================================================
mod cast;
mod copy;
mod dispatch;
mod rotate_half;

pub use cast::Cast;
pub use cast::cuda_cast_dispatch;
pub use copy::Memcpy;
pub use dispatch::cuda_copy_nd_dispatch;
pub use rotate_half::RotateHalf;
pub use rotate_half::cuda_rotate_half_dispatch;

pub fn all_functions() -> Vec<String> {
    use std::collections::HashSet;
    use tract_gpu::utils::BroadcastKind;
    let mut functions = HashSet::<String>::new();

    functions.extend(BroadcastKind::all_copy_kernel_names(""));

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt1| {
                tract_gpu::tensor::DeviceTensor::SUPPORTED_DT.into_iter().map(move |dt2| (dt1, dt2))
            })
            .flat_map(|(dt1, dt2)| Cast.kernel_name(dt1, dt2).into_iter()),
    );

    functions.into_iter().collect()
}


================================================
FILE: cuda/src/kernels/array/rotate_half.rs
================================================
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, get_cuda_view, utils};
use anyhow::ensure;
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RotateHalf;

impl fmt::Display for RotateHalf {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

impl RotateHalf {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(
            dt,
            DatumType::F32
                | DatumType::F16
                | DatumType::I8
                | DatumType::I16
                | DatumType::I32
                | DatumType::I64
        )
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for cuda rotate halfop", dt);
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("rotate_half_nd2_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        let shape_nd2 = utils::reshape_to_rank_2(input.shape(), input.rank() - 1);
        ensure!(
            shape_nd2[1].is_multiple_of(2),
            "Rotate half required most inner dimension to be a multiple of 2: {:?}",
            input.shape()
        );
        let strides_nd2 = Tensor::natural_strides(&shape_nd2);

        let kernel_name = self.kernel_name(input.datum_type())?;

        let func = cuda_context().load_pipeline(LibraryName::Array, kernel_name)?;

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);

        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(&shape_nd2);
        launch_args.push_slice_i32(&strides_nd2);

        let cfg = LaunchConfig {
            grid_dim: ((shape_nd2[1] / 2) as _, shape_nd2[0] as _, 1),
            block_dim: (1, 1, 1),
            shared_mem_bytes: 0,
        };
        launch_args.launch(cfg)
    }
}

pub fn cuda_rotate_half_dispatch(input: &DeviceTensor, output: &DeviceTensor) -> TractResult<()> {
    crate::with_cuda_stream(|stream| RotateHalf.dispatch_eval(stream, input, output))
}

crate::register_cuda_op!(tract_transformers::ops::apply_rope::RotateHalf, |source, node, _op| {
    rule_if!(RotateHalf::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::rotate_half::GpuRotateHalf::new(
        "Cuda",
        cuda_rotate_half_dispatch,
    ))))
});

#[cfg(test)]
mod tests {

    use super::*;
    use num_traits::AsPrimitive;
    use tract_core::internal::Tensor;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::apply_rope;

    fn run_test_case<F>(shape: &[usize]) -> TractResult<()>
    where
        F: Copy + 'static + Datum,
        usize: AsPrimitive<F>,
    {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a =
                Tensor::from_shape(shape, &(0..len).map(|f| -> F { f.as_() }).collect::<Vec<_>>())?;

            let cuda_a = a.clone().into_device()?;

            let cpu_output =
                apply_rope::RotateHalf.eval(tvec![a.clone().into()])?[0].clone().into_tensor();
            let cuda_output = RotateHalf.eval(stream, &cuda_a)?;

            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Exact)
                .with_context(|| {
                    format!(
                        "Input: {:?} Cpu: {:?}, Cuda: {:?}",
                        a.dump(true),
                        cpu_output.dump(true),
                        cuda_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_rotate_half() -> TractResult<()> {
        run_test_case::<f32>(&[2, 2])?;
        run_test_case::<f32>(&[512, 512])?;
        run_test_case::<f32>(&[10, 8, 8])?;
        run_test_case::<f32>(&[10, 512, 1024])?;
        run_test_case::<f32>(&[10, 512, 1024])?;
        run_test_case::<f16>(&[10, 256, 4])?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/binary.rs
================================================
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_core::ops::binary::BinMiniOp;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, get_cuda_view};

static BINARY_MAX_RANK: usize = 5;

const ALL_OP_NAMES: &[&str] = &[
    "mul", "add", "div", "sub", "pow", "min", "max", "gt", "gte", "eq", "ne", "lt", "lte", "and",
    "or", "bitor", "bitand", "bitxor",
];

pub fn all_functions() -> Vec<String> {
    ALL_OP_NAMES
        .iter()
        .flat_map(|kname| {
            DeviceTensor::SUPPORTED_DT.into_iter().flat_map(move |dt| {
                let tname = DeviceTensor::tname(dt).ok()?;
                Some(
                    ["large", "generic"]
                        .into_iter()
                        .map(move |variant| format!("binary_{kname}_{variant}_{tname}")),
                )
            })
        })
        .flatten()
        .collect()
}

pub fn is_supported(mini_op: &dyn BinMiniOp, dt: DatumType) -> bool {
    ALL_OP_NAMES.contains(&mini_op.name().to_lowercase().as_str())
        && (dt.is_number() || dt.is::<bool>())
}

pub fn dispatch_eval(
    stream: &TractCudaStream,
    mini_op: &dyn BinMiniOp,
    lhs: &DeviceTensor,
    rhs: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    let rank = lhs.rank();
    ensure!(rank == rhs.rank());
    ensure!(rank <= BINARY_MAX_RANK);

    let rank_offset = BINARY_MAX_RANK - rank;
    let mut lhs_shape = [1usize; BINARY_MAX_RANK];
    let mut rhs_shape = [1usize; BINARY_MAX_RANK];
    let mut out_shape = [1usize; BINARY_MAX_RANK];
    let mut lhs_strides = [0isize; BINARY_MAX_RANK];
    let mut rhs_strides = [0isize; BINARY_MAX_RANK];
    let mut out_strides = [0isize; BINARY_MAX_RANK];

    let base_l_shape = lhs.shape();
    let base_r_shape = rhs.shape();
    let base_o_shape = output.shape();
    let base_l_strides = lhs.strides();
    let base_r_strides = rhs.strides();
    let base_o_strides = output.strides();
    for i in 0..rank {
        let dst = rank_offset + i;
        lhs_shape[dst] = base_l_shape[i];
        rhs_shape[dst] = base_r_shape[i];
        out_shape[dst] = base_o_shape[i];
        lhs_strides[dst] =
            if base_l_shape[i] == 1 && base_r_shape[i] != 1 { 0 } else { base_l_strides[i] };
        rhs_strides[dst] =
            if base_r_shape[i] == 1 && base_l_shape[i] != 1 { 0 } else { base_r_strides[i] };
        out_strides[dst] = base_o_strides[i];
    }

    let total_elems: usize = out_shape.iter().product();
    let block_dim = (128_u32, 1, 1);
    let (grid_dim, variant) = if out_shape[BINARY_MAX_RANK - 1] >= 256 && total_elems >= 4096 {
        (
            (
                out_shape[BINARY_MAX_RANK - 2] as u32,
                out_shape[BINARY_MAX_RANK - 3] as u32,
                out_shape[..BINARY_MAX_RANK - 3].iter().product::<usize>() as u32,
            ),
            "large",
        )
    } else {
        ((total_elems.div_ceil(block_dim.0 as usize) as u32, 1, 1), "generic")
    };

    let op_name = mini_op.name().to_lowercase();
    let tname = DeviceTensor::tname(lhs.datum_type())?;
    let kname = format!("binary_{op_name}_{variant}_{tname}");
    let func = cuda_context().load_pipeline(LibraryName::Binary, kname)?;

    let cfg = LaunchConfig { grid_dim, block_dim, shared_mem_bytes: 0 };

    let lhs_view = get_cuda_view(lhs);
    let rhs_view = get_cuda_view(rhs);
    let out_view = get_cuda_view(output);

    let mut launch_args = TractLaunchArgs::new(stream, &func);
    launch_args.push_view(&lhs_view);
    launch_args.push_view(&rhs_view);
    launch_args.push_view(&out_view);
    launch_args.push_slice_i32(&rhs_shape);
    launch_args.push_slice_i32(&out_shape);
    launch_args.push_slice_i32(&lhs_strides);
    launch_args.push_slice_i32(&rhs_strides);
    launch_args.push_slice_i32(&out_strides);

    launch_args.launch(cfg)?;

    Ok(())
}

pub fn cuda_bin_op_dispatch(
    mini_op: &dyn BinMiniOp,
    lhs: &DeviceTensor,
    rhs: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| dispatch_eval(stream, mini_op, lhs, rhs, output))
}

pub fn cuda_bin_op(mini_op: Box<dyn BinMiniOp>) -> tract_gpu::ops::binary::GpuBinOp {
    tract_gpu::ops::binary::GpuBinOp::new(mini_op, "Cuda", cuda_bin_op_dispatch)
}

crate::register_cuda_op!(tract_core::ops::binary::TypedBinOp, |source, node, op| {
    rule_if!(is_supported(&*op.0, source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(cuda_bin_op(op.0.clone()))))
});

#[cfg(test)]
mod tests {
    use tract_gpu::tensor::IntoDevice;

    use super::*;
    use crate::with_cuda_stream;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;

    fn test_case<F>(
        mini_op: &dyn BinMiniOp,
        shape: &[usize],
        offset: f32,
        scale: f32,
    ) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let b = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset + 1.0).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let out_dt = mini_op.result_datum_type(a.datum_type(), b.datum_type())?;
            let output = unsafe { DeviceTensor::uninitialized_dt(out_dt, shape)? };
            dispatch_eval(stream, mini_op, &a, &b, &output)?;
            stream.synchronize()?;

            let out = output.to_host()?.into_tensor();
            assert_eq!(out.shape(), shape);
            Ok(())
        })
    }

    use tract_core::ops::math;

    #[test]
    fn test_binary_add() -> TractResult<()> {
        test_case::<f32>(&math::Add, &[4, 4], 0.0, 1.0)?;
        test_case::<f16>(&math::Add, &[4, 4], 0.0, 1.0 / 100.0)?;
        Ok(())
    }

    #[test]
    fn test_binary_mul() -> TractResult<()> {
        test_case::<f32>(&math::Mul, &[4, 4], 0.0, 1.0)?;
        test_case::<f16>(&math::Mul, &[4, 4], 0.0, 1.0 / 100.0)?;
        Ok(())
    }

    #[test]
    fn test_binary_sub() -> TractResult<()> {
        test_case::<f32>(&math::Sub, &[4, 4], 0.0, 1.0)?;
        Ok(())
    }

    #[test]
    fn test_binary_min() -> TractResult<()> {
        test_case::<f32>(&math::Min, &[4, 4], 0.0, 1.0)?;
        Ok(())
    }

    #[test]
    fn test_binary_max() -> TractResult<()> {
        test_case::<f32>(&math::Max, &[4, 4], 0.0, 1.0)?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/conv.rs
================================================
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{WARP_SIZE, get_cuda_view};
use cudarc::driver::{LaunchArgs, LaunchConfig, PushKernelArg};
use downcast_rs::{Downcast, impl_downcast};
use dyn_eq::DynEq;
use std::any::Any;
use std::fmt::Debug;
use tract_core::dyn_clone::{self, DynClone};
use tract_core::internal::*;
use tract_core::ops::cnn::Conv;
use tract_gpu::tensor::DeviceTensor;

pub trait ConvKernelScratch: Debug + Downcast {}
impl_downcast!(ConvKernelScratch);

pub trait ConvKernel: 'static + Send + Sync + Debug + DynClone + DynEq {
    fn name(&self) -> StaticName;
    #[allow(clippy::too_many_arguments)]
    fn state(&self) -> Box<dyn ConvKernelScratch>;
    #[allow(clippy::too_many_arguments)]
    fn dispatch(
        &self,
        state: &mut dyn ConvKernelScratch,
        node_id: usize,
        op: &Conv,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        weights: &DeviceTensor,
        bias: Option<&DeviceTensor>,
        output: &DeviceTensor,
    ) -> TractResult<()>;
}
dyn_clone::clone_trait_object!(ConvKernel);
dyn_eq::eq_trait_object!(ConvKernel);

impl ConvKernelScratch for () {}

#[derive(Hash, Clone, Debug, PartialEq, Eq)]
pub struct ConvGeneric;

impl ConvKernel for ConvGeneric {
    fn name(&self) -> StaticName {
        "Generic".into()
    }

    fn state(&self) -> Box<dyn ConvKernelScratch> {
        Box::new(())
    }

    fn dispatch(
        &self,
        _state: &mut dyn ConvKernelScratch,
        _node_id: usize,
        op: &Conv,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        weights: &DeviceTensor,
        bias: Option<&DeviceTensor>,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        let input_shape = op.pool_spec.data_format.shape(input.shape())?;

        let ctx = cuda_context();
        let dt_name = if input.datum_type() == DatumType::F16 { "f16" } else { "f32" };
        let func_name = format!("conv{}d_{}_generic", input_shape.hw_rank(), dt_name);
        let func = ctx.load_pipeline(crate::kernels::LibraryName::Cnn, func_name)?;
        let null = stream.null::<u8>()?;
        let null_view = null.as_view();

        let mut launcher = TractLaunchArgs::new(stream, &func);

        let input = get_cuda_view(input);

        launcher.push_view(&input);
        launcher.push_i32(*input_shape.n().unwrap_or(&1));
        launcher.push_i32(*input_shape.c());
        launcher.push_slice_i32(input_shape.hw_dims());

        launcher.push_i32(*input_shape.n_stride().unwrap_or(&0));
        launcher.push_i32(*input_shape.c_stride());
        launcher.push_slice_i32(input_shape.hw_strides());

        let kfmt = op.kernel_fmt;
        let co_per_group = op.pool_spec.output_channels / op.group;
        let ci_per_group = op.pool_spec.input_channels / op.group;

        let weights_view = get_cuda_view(weights);
        launcher.push_view(&weights_view);
        // split go_i_h_w in g_o_i_h_w
        launcher.push_i32(op.group);
        launcher.push_i32(co_per_group);
        launcher.push_slice_i32(&weights.shape()[1..]);

        let group_stride = weights.strides()[0] as usize * co_per_group;
        launcher.push_i32(group_stride);
        launcher.push_slice_i32(weights.strides());

        let mut bias_view = None;
        if let Some(bias) = &bias {
            bias_view = Some(get_cuda_view(bias));
            launcher.push_view(bias_view.as_ref().unwrap());
            launcher.push_i32(if bias.rank() == 0 {
                0 // scalar bias: stride = 0 is broadcasting
            } else {
                1
            });
        } else {
            launcher.push_view(&null_view);
            launcher.push_i32(0);
        }

        let padding = op.pool_spec.computed_padding(input_shape.hw_dims());
        for d in 0..input_shape.hw_rank() {
            launcher.push_i32(padding[d].pad_before);
        }

        let strides = op.pool_spec.strides();
        launcher.push_slice_i32(&strides);

        let dilations = op.pool_spec.dilations();
        launcher.push_slice_i32(&dilations);

        let output_shape = op.pool_spec.data_format.shape(output.shape())?;
        let output = get_cuda_view(output);
        launcher.push_view(&output);
        launcher.push_i32(*output_shape.n().unwrap_or(&1));
        launcher.push_i32(*output_shape.c());
        launcher.push_slice_i32(output_shape.hw_dims());

        launcher.push_i32(*output_shape.n_stride().unwrap_or(&0));
        launcher.push_i32(*output_shape.c_stride());
        launcher.push_slice_i32(output_shape.hw_strides());

        let cfg = LaunchConfig {
            grid_dim: (
                output_shape.hw_dims().iter().product::<usize>().div_ceil(WARP_SIZE) as u32,
                *output_shape.c() as u32,
                input_shape.n().copied().unwrap_or(1) as u32,
            ),
            block_dim: (WARP_SIZE as u32, 1, 1),
            shared_mem_bytes: 0,
        };

        launcher.launch(cfg)
    }
}


================================================
FILE: cuda/src/kernels/conv_cudnn.rs
================================================
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::conv::{ConvKernel, ConvKernelScratch};
use crate::kernels::{WARP_SIZE, get_cuda_view, get_cuda_view_mut};
use cudarc::cudnn::{
    ConvDescriptor, ConvForward, CudnnDataType, FilterDescriptor, TensorDescriptor,
};
use cudarc::driver::{CudaStream, LaunchArgs, LaunchConfig, PushKernelArg};
use std::any::Any;
use std::cell::RefCell;
use std::fmt::Debug;
use std::ops::Deref;
use std::sync::Weak;
use std::thread::LocalKey;
use tract_core::dyn_clone::{self, DynClone};
use tract_core::internal::*;
use tract_core::ops::cnn::{Conv, KernelFormat};
use tract_core::tract_data::half::f16;
use tract_core::tract_data::itertools::Itertools;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug)]
struct TypedDescriptors<T: CudnnDataType> {
    conv: ConvDescriptor<T>,
    x: TensorDescriptor<T>,
    w: FilterDescriptor<T>,
    y: TensorDescriptor<T>,
}

unsafe impl<T: CudnnDataType> Send for TypedDescriptors<T> {}

fn build_descriptors<T: CudnnDataType>(
    stream: &TractCudaStream,
    op: &Conv,
    input_shape_array: &[usize],
    weight_shape_array: &[usize],
    output_shape_array: &[usize],
) -> TractResult<TypedDescriptors<T>> {
    ensure!(op.pool_spec.data_format.has_n());
    ensure!(op.kernel_fmt == KernelFormat::OIHW);
    let input_shape = op.pool_spec.data_format.shape(input_shape_array)?;
    let output_shape = op.pool_spec.data_format.shape(output_shape_array)?;
    ensure!(input_shape.hw_rank() <= 6);

    let cudnn = stream.cudnn();
    let mut pads = op
        .pool_spec
        .computed_padding(input_shape.hw_dims())
        .iter()
        .map(|p| p.pad_before as i32)
        .collect_vec();
    let mut strides = op.pool_spec.strides().iter().map(|s| *s as i32).collect_vec();
    let mut dilations = op.pool_spec.dilations().iter().map(|d| *d as i32).collect_vec();
    if input_shape.hw_rank() == 1 {
        strides.push(1);
        dilations.push(1);
        pads.push(0);
    }
    let mut conv_descriptor = cudnn
        .create_convnd::<T>(
            &pads,
            &strides,
            &dilations,
            cudarc::cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
        )
        .context("in create_convnd")?;
    conv_descriptor.set_group_count(op.group as i32);

    let mut input_dims = input_shape.hw_dims().iter().map(|d| *d as i32).collect_vec();
    if input_dims.len() == 1 {
        input_dims.push(1);
    }
    input_dims.insert(0, *input_shape.n().unwrap() as i32);
    input_dims.insert(1, *input_shape.c() as i32);

    let mut input_strides = input_shape.hw_strides().iter().map(|s| *s as i32).collect_vec();
    if input_strides.len() == 1 {
        input_strides.push(*input_shape.w_stride() as i32);
    }
    input_strides.insert(0, *input_shape.n_stride().unwrap() as i32);
    input_strides.insert(1, *input_shape.c_stride() as i32);

    let input_descriptor = cudnn
        .create_nd_tensor::<T>(&input_dims, &input_strides)
        .context("in create_nd_tensor for input")?;

    let mut filter_dims = weight_shape_array.iter().map(|d| *d as i32).collect_vec();
    if filter_dims.len() == 3 {
        filter_dims.push(1);
    }
    let filter_descriptor = cudnn
        .create_nd_filter::<T>(
            cudarc::cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
            &filter_dims,
        )
        .context("in create_nd_filter")?;

    let mut output_dims = output_shape.hw_dims().iter().map(|d| *d as i32).collect_vec();
    if output_dims.len() == 1 {
        output_dims.push(1);
    }
    output_dims.insert(0, *output_shape.n().unwrap() as i32);
    output_dims.insert(1, *output_shape.c() as i32);

    let mut output_strides = output_shape.hw_strides().iter().map(|s| *s as i32).collect_vec();
    if output_strides.len() == 1 {
        output_strides.push(*output_shape.w_stride() as i32);
    }
    output_strides.insert(0, *output_shape.n_stride().unwrap() as i32);
    output_strides.insert(1, *output_shape.c_stride() as i32);

    let output_descriptor = cudnn
        .create_nd_tensor::<T>(&output_dims, &output_strides)
        .context("in create_nd_tensor for output")?;

    Ok(TypedDescriptors {
        conv: conv_descriptor,
        x: input_descriptor,
        w: filter_descriptor,
        y: output_descriptor,
    })
}

fn get_algo_and_workspace<T: CudnnDataType>(
    desc: &TypedDescriptors<T>,
) -> TractResult<(cudarc::cudnn::sys::cudnnConvolutionFwdAlgo_t, usize)> {
    let conv_fwd = ConvForward { conv: &desc.conv, x: &desc.x, w: &desc.w, y: &desc.y };
    let algo = cudarc::cudnn::sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
    let workspace_size = conv_fwd.get_workspace_size(algo).context("in get_workspace_size()")?;
    Ok((algo, workspace_size))
}

#[derive(Debug)]
enum CudnnConvDescriptors {
    F32 {
        input_shape: TVec<usize>,
        desc: TypedDescriptors<f32>,
        algo: cudarc::cudnn::sys::cudnnConvolutionFwdAlgo_t,
        workspace_size: usize,
    },
    F16 {
        input_shape: TVec<usize>,
        desc: TypedDescriptors<f16>,
        algo: cudarc::cudnn::sys::cudnnConvolutionFwdAlgo_t,
        workspace_size: usize,
    },
}

impl CudnnConvDescriptors {
    fn stored_input_shape(&self) -> &[usize] {
        match self {
            CudnnConvDescriptors::F32 { input_shape, .. }
            | CudnnConvDescriptors::F16 { input_shape, .. } => input_shape,
        }
    }

    fn new(
        stream: &TractCudaStream,
        op: &Conv,
        dt: DatumType,
        input_shape_array: &[usize],
        weight_shape_array: &[usize],
        output_shape_array: &[usize],
    ) -> TractResult<Self> {
        if dt == DatumType::F16 {
            let desc = build_descriptors::<f16>(
                stream,
                op,
                input_shape_array,
                weight_shape_array,
                output_shape_array,
            )?;
            let (algo, workspace_size) = get_algo_and_workspace(&desc)?;
            Ok(CudnnConvDescriptors::F16 {
                input_shape: input_shape_array.into(),
                desc,
                algo,
                workspace_size,
            })
        } else {
            let desc = build_descriptors::<f32>(
                stream,
                op,
                input_shape_array,
                weight_shape_array,
                output_shape_array,
            )?;
            let (algo, workspace_size) = get_algo_and_workspace(&desc)?;
            Ok(CudnnConvDescriptors::F32 {
                input_shape: input_shape_array.into(),
                desc,
                algo,
                workspace_size,
            })
        }
    }
}

impl ConvKernelScratch for Option<CudnnConvDescriptors> {}

#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct ConvCudnn;

impl ConvKernel for ConvCudnn {
    fn name(&self) -> StaticName {
        "ConvCudnn".into()
    }

    fn state(&self) -> Box<dyn ConvKernelScratch> {
        Box::<Option<CudnnConvDescriptors>>::default()
    }

    fn dispatch(
        &self,
        state: &mut dyn ConvKernelScratch,
        node_id: usize,
        op: &Conv,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        weights: &DeviceTensor,
        bias: Option<&DeviceTensor>,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(bias.is_none());

        let state: &mut Option<CudnnConvDescriptors> =
            state.downcast_mut().context("Wrong state")?;

        if state.as_ref().is_none_or(|desc| desc.stored_input_shape() != input.shape()) {
            *state = Some(CudnnConvDescriptors::new(
                stream,
                op,
                input.datum_type(),
                input.shape(),
                weights.shape(),
                output.shape(),
            )?);
        }

        let input_view = get_cuda_view(input);
        let weights_view = get_cuda_view(weights);
        let mut output_view = get_cuda_view_mut(output);

        match state.as_ref().unwrap() {
            CudnnConvDescriptors::F32 { desc, algo, workspace_size, .. } => {
                let conv_forward =
                    ConvForward { conv: &desc.conv, x: &desc.x, w: &desc.w, y: &desc.y };
                unsafe {
                    let mut workspace = if *workspace_size > 0 {
                        Some(stream.alloc::<u8>(*workspace_size)?)
                    } else {
                        None
                    };
                    let src =
                        input_view.transmute::<f32>(input_view.len() / size_of::<f32>()).unwrap();
                    let w = weights_view
                        .transmute::<f32>(weights_view.len() / size_of::<f32>())
                        .unwrap();
                    let mut dst = output_view
                        .transmute_mut::<f32>(output_view.len() / size_of::<f32>())
                        .unwrap();
                    conv_forward
                        .launch(
                            *algo,
                            workspace.as_mut().map(|w| w.as_view_mut()).as_mut(),
                            (1.0f32, 0.0f32),
                            &src,
                            &w,
                            &mut dst,
                        )
                        .context("in launch()")?;
                }
            }
            CudnnConvDescriptors::F16 { desc, algo, workspace_size, .. } => {
                let conv_forward =
                    ConvForward { conv: &desc.conv, x: &desc.x, w: &desc.w, y: &desc.y };
                unsafe {
                    let mut workspace = if *workspace_size > 0 {
                        Some(stream.alloc::<u8>(*workspace_size)?)
                    } else {
                        None
                    };
                    let src =
                        input_view.transmute::<f16>(input_view.len() / size_of::<f16>()).unwrap();
                    let w = weights_view
                        .transmute::<f16>(weights_view.len() / size_of::<f16>())
                        .unwrap();
                    let mut dst = output_view
                        .transmute_mut::<f16>(output_view.len() / size_of::<f16>())
                        .unwrap();
                    conv_forward
                        .launch(
                            *algo,
                            workspace.as_mut().map(|w| w.as_view_mut()).as_mut(),
                            (f16::from_f32(1.0), f16::from_f32(0.0)),
                            &src,
                            &w,
                            &mut dst,
                        )
                        .context("in launch()")?;
                }
            }
        }

        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/cu/array.cu
================================================
#include "common.cuh"

template <typename T>
static __device__ void pad_constant(
    const T* __restrict__ in_ptr,
    T* __restrict__ out_ptr,
    int32_t in_shape0,  int32_t in_shape1,  int32_t in_shape2,  int32_t in_shape3,  int32_t in_shape4,
    int32_t out_shape0, int32_t out_shape1, int32_t out_shape2, int32_t out_shape3, int32_t out_shape4,
    int32_t in_stride0, int32_t in_stride1, int32_t in_stride2, int32_t in_stride3, int32_t in_stride4,
    int32_t pad_before0, int32_t pad_before1, int32_t pad_before2, int32_t pad_before3, int32_t pad_before4,
    T fill,
    int32_t total_out_elems
) {
    int o = blockIdx.x * blockDim.x + threadIdx.x;
    if (o >= total_out_elems) return;

    int idx = o;

    int c4 = idx % out_shape4; idx /= out_shape4;
    int c3 = idx % out_shape3; idx /= out_shape3;
    int c2 = idx % out_shape2; idx /= out_shape2;
    int c1 = idx % out_shape1; idx /= out_shape1;
    int c0 = idx % out_shape0;

    int i4 = c4 - pad_before4;
    int i3 = c3 - pad_before3;
    int i2 = c2 - pad_before2;
    int i1 = c1 - pad_before1;
    int i0 = c0 - pad_before0;

    bool in_bounds =
        (i4 >= 0 && i4 < in_shape4) &&
        (i3 >= 0 && i3 < in_shape3) &&
        (i2 >= 0 && i2 < in_shape2) &&
        (i1 >= 0 && i1 < in_shape1) &&
        (i0 >= 0 && i0 < in_shape0);

    int in_off = 0;
    if (in_bounds) {
        in_off += i4 * in_stride4;
        in_off += i3 * in_stride3;
        in_off += i2 * in_stride2;
        in_off += i1 * in_stride1;
        in_off += i0 * in_stride0;
        out_ptr[o] = in_ptr[in_off];
    } else {
        out_ptr[o] = fill;
    }
}

#define INSTANTIATE_PAD_CONSTANT(name, T)                                              \
   extern "C" __global__ void pad_constant_##name(                                     \
        const T* __restrict__ in_ptr, \
        T* __restrict__ out_ptr, \
        int32_t in_shape0, int32_t in_shape1, int32_t in_shape2, int32_t in_shape3, int32_t in_shape4, \
        int32_t out_shape0, int32_t out_shape1, int32_t out_shape2, int32_t out_shape3, int32_t out_shape4, \
        int32_t in_stride0, int32_t in_stride1, int32_t in_stride2, int32_t in_stride3, int32_t in_stride4, \
        int32_t pad_before0, int32_t pad_before1, int32_t pad_before2, int32_t pad_before3, int32_t pad_before4, \
        T fill,                                                                              \
        int32_t total_out_elems) {                                    \
      pad_constant<T>(in_ptr, out_ptr, in_shape0, in_shape1, in_shape2, in_shape3, in_shape4, \
        out_shape0, out_shape1, out_shape2, out_shape3, out_shape4, \
        in_stride0, in_stride1, in_stride2, in_stride3, in_stride4, \
        pad_before0, pad_before1, pad_before2, pad_before3, pad_before4, \
        fill, total_out_elems);                        \
    }

#define INSTANTIATE_ROTATE_HALF(name, T)                                       \
  extern "C" __global__ void rotate_half_nd2_##name(                           \
      const T *input, T *output, int32_t shape_0, int32_t shape_1, int32_t strides_0,      \
      int32_t strides_1) {                                                         \
    int thread_idx_x = blockIdx.x * blockDim.x + threadIdx.x;                  \
    int thread_idx_y = blockIdx.y * blockDim.y + threadIdx.y;                  \
                                                                               \
    int rotated_idx =                                                          \
        (thread_idx_x + shape_1 / 2) * strides_1 + thread_idx_y * strides_0;   \
    int idx = thread_idx_x * strides_1 + thread_idx_y * strides_0;             \
                                                                               \
    output[idx] = -input[rotated_idx];                                         \
    output[rotated_idx] = input[idx];                                          \
  }

#define INSTANTIATE_CAST_OP(name, T_in, T_out)                                 \
  extern "C" __global__ void cast_##name(const T_in *input, T_out *output,     \
                                         int32_t len) {                            \
    int idx = blockIdx.x * blockDim.x + threadIdx.x;                           \
    if (idx < len) {                                                           \
      output[idx] = (T_out)input[idx];                                         \
    }                                                                          \
  }

#define INSTANTIATE_COPY(name, T)                                              \
  extern "C" __global__ void copy_nd1_##name(                                  \
      const T *input, T *output, int32_t in_strides_0, int32_t out_shape_0,            \
      int32_t out_strides_0) {                                                     \
    for (int i = threadIdx.x; i < out_shape_0; i += MAX_THREADS) {  \
      output[i * out_strides_0] = input[i * in_strides_0];                     \
    }                                                                          \
  }                                                                            \
                                                                               \
  extern "C" __global__ void copy_nd2_##name(                                  \
      const T *input, T *output, int32_t in_strides_0, int32_t in_strides_1,           \
      int32_t out_shape_0, int32_t out_shape_1, int32_t out_strides_0,                     \
      int32_t out_strides_1) {                                                     \
    int in_offset = blockIdx.x * in_strides_0;                                 \
    int out_offset = blockIdx.x * out_strides_0;                               \
    for (int i = threadIdx.x; i < out_shape_1; i += MAX_THREADS) {  \
      output[out_offset + i * out_strides_1] =                                 \
          input[in_offset + i * in_strides_1];                                 \
    }                                                                          \
  }                                                                            \
                                                                               \
  extern "C" __global__ void copy_nd3_##name(                                  \
      const T *input, T *output, int32_t in_strides_0, int32_t in_strides_1,           \
      int32_t in_strides_2, int32_t out_shape_0, int32_t out_shape_1, int32_t out_shape_2,     \
      int32_t out_strides_0, int32_t out_strides_1, int32_t out_strides_2) {               \
    int in_offset = blockIdx.x * in_strides_1 + blockIdx.y * in_strides_0;     \
    int out_offset = blockIdx.x * out_strides_1 + blockIdx.y * out_strides_0;  \
    for (int i = threadIdx.x; i < out_shape_2; i += MAX_THREADS) {  \
      output[out_offset + i * out_strides_2] =                                 \
          input[in_offset + i * in_strides_2];                                 \
    }                                                                          \
  }                                                                            \
                                                                               \
  extern "C" __global__ void copy_nd4_##name(                                  \
      const T *input, T *output, int32_t in_strides_0, int32_t in_strides_1,           \
      int32_t in_strides_2, int32_t in_strides_3, int32_t out_shape_0, int32_t out_shape_1,    \
      int32_t out_shape_2, int32_t out_shape_3, int32_t out_strides_0, int32_t out_strides_1,  \
      int32_t out_strides_2, int32_t out_strides_3) {                                  \
    int in_offset = blockIdx.x * in_strides_2 + blockIdx.y * in_strides_1 +    \
                    blockIdx.z * in_strides_0;                                 \
    int out_offset = blockIdx.x * out_strides_2 + blockIdx.y * out_strides_1 + \
                     blockIdx.z * out_strides_0;                               \
    for (int i = threadIdx.x; i < out_shape_3; i += MAX_THREADS) {             \
      output[out_offset + i * out_strides_3] =                                 \
          input[in_offset + i * in_strides_3];                                 \
    }                                                                          \
  }                                                                            \
                                                                               \
  /* nd5: z=d0, y=d1, x=d2*d3 (packed), threads=d4 */                         \
  extern "C" __global__ void copy_nd5_##name(                                  \
      const T *input, T *output, int32_t in_strides_0, int32_t in_strides_1,           \
      int32_t in_strides_2, int32_t in_strides_3, int32_t in_strides_4, int32_t out_shape_0,   \
      int32_t out_shape_1, int32_t out_shape_2, int32_t out_shape_3, int32_t out_shape_4,      \
      int32_t out_strides_0, int32_t out_strides_1, int32_t out_strides_2,                 \
      int32_t out_strides_3, int32_t out_strides_4) {                                  \
    int block_idx_x = blockIdx.x;                                              \
    int idx_3 = block_idx_x % out_shape_3;                                     \
    block_idx_x /= out_shape_3;                                                \
    int idx_2 = block_idx_x;                                                   \
    int in_offset = blockIdx.z * in_strides_0 + blockIdx.y * in_strides_1 +    \
                    idx_2 * in_strides_2 + idx_3 * in_strides_3;               \
    int out_offset = blockIdx.z * out_strides_0 + blockIdx.y * out_strides_1 + \
                     idx_2 * out_strides_2 + idx_3 * out_strides_3;            \
    for (int i = threadIdx.x; i < out_shape_4; i += MAX_THREADS) {             \
      output[out_offset + i * out_strides_4] =                                 \
          input[in_offset + i * in_strides_4];                                 \
    }                                                                          \
  }                                                                            \
                                                                               \
  /* nd6: z=d0, y=d1, x=d2*d3*d4 (packed), threads=d5 */                      \
  extern "C" __global__ void copy_nd6_##name(                                  \
      const T *input, T *output, int32_t in_strides_0, int32_t in_strides_1,           \
      int32_t in_strides_2, int32_t in_strides_3, int32_t in_strides_4, int32_t in_strides_5,  \
      int32_t out_shape_0, int32_t out_shape_1, int32_t out_shape_2, int32_t out_shape_3,      \
      int32_t out_shape_4, int32_t out_shape_5, int32_t out_strides_0, int32_t out_strides_1,  \
      int32_t out_strides_2, int32_t out_strides_3, int32_t out_strides_4,                 \
      int32_t out_strides_5) {                                                     \
    int block_idx_x = blockIdx.x;                                              \
    int idx_4 = block_idx_x % out_shape_4;                                     \
    block_idx_x /= out_shape_4;                                                \
    int idx_3 = block_idx_x % out_shape_3;                                     \
    block_idx_x /= out_shape_3;                                                \
    int idx_2 = block_idx_x;                                                   \
    int in_offset = blockIdx.z * in_strides_0 + blockIdx.y * in_strides_1 +    \
                    idx_2 * in_strides_2 + idx_3 * in_strides_3 +              \
                    idx_4 * in_strides_4;                                      \
    int out_offset = blockIdx.z * out_strides_0 + blockIdx.y * out_strides_1 + \
                     idx_2 * out_strides_2 + idx_3 * out_strides_3 +           \
                     idx_4 * out_strides_4;                                    \
    for (int i = threadIdx.x; i < out_shape_5; i += MAX_THREADS) {             \
      output[out_offset + i * out_strides_5] =                                 \
          input[in_offset + i * in_strides_5];                                 \
    }                                                                          \
  }

#define INSTANTIATE_CAST_FROM(tname, type)                                     \
  INSTANTIATE_CAST_OP(tname##_bool, type, bool)                                \
  INSTANTIATE_CAST_OP(tname##_f32, type, float)                                \
  INSTANTIATE_CAST_OP(tname##_f16, type, __half)                               \
  INSTANTIATE_CAST_OP(tname##_u8, type, uint8_t)                               \
  INSTANTIATE_CAST_OP(tname##_u16, type, uint16_t)                             \
  INSTANTIATE_CAST_OP(tname##_u32, type, uint32_t)                             \
  INSTANTIATE_CAST_OP(tname##_u64, type, uint64_t)                             \
  INSTANTIATE_CAST_OP(tname##_i8, type, int8_t)                                \
  INSTANTIATE_CAST_OP(tname##_i16, type, int16_t)                              \
  INSTANTIATE_CAST_OP(tname##_i32, type, int32_t)                              \
  INSTANTIATE_CAST_OP(tname##_i64, type, int64_t)

// Copy kernels: only u8/u16/u32/u64 (copy is type-size based)
INSTANTIATE_COPY(u8, uint8_t)
INSTANTIATE_COPY(u16, uint16_t)
INSTANTIATE_COPY(u32, uint32_t)
INSTANTIATE_COPY(u64, uint64_t)

// Cast kernels: all types
INSTANTIATE_CAST_FROM(bool, bool)
INSTANTIATE_CAST_FROM(f32, float)
INSTANTIATE_CAST_FROM(f16, __half)
INSTANTIATE_CAST_FROM(i8, int8_t)
INSTANTIATE_CAST_FROM(i16, int16_t)
INSTANTIATE_CAST_FROM(i32, int32_t)
INSTANTIATE_CAST_FROM(i64, int64_t)
INSTANTIATE_CAST_FROM(u8, uint8_t)
INSTANTIATE_CAST_FROM(u16, uint16_t)
INSTANTIATE_CAST_FROM(u32, uint32_t)
INSTANTIATE_CAST_FROM(u64, uint64_t)

// Rotate half: only float types
INSTANTIATE_ROTATE_HALF(f32, float)
INSTANTIATE_ROTATE_HALF(f16, __half)


================================================
FILE: cuda/src/kernels/cu/binary.cu
================================================
#include "common.cuh"
#include <cuda_runtime.h>

template <typename T> struct OpAdd {
    __device__ __forceinline__ T operator()(T a, T b) const { return a + b; }
};

template <typename T> struct OpSub {
    __device__ __forceinline__ T operator()(T a, T b) const { return a - b; }
};

template <typename T> struct OpMul {
    __device__ __forceinline__ T operator()(T a, T b) const { return a * b; }
};

template <typename T> struct OpDiv {
    __device__ __forceinline__ T operator()(T a, T b) const { return a / b; }
};

template <typename T> struct OpPow {
    __device__ __forceinline__ T operator()(T a, T b) const { return (T)powf((float)a, (float)b); }
};

template <typename T> struct OpMin {
    __device__ __forceinline__ T operator()(T a, T b) const { return (T)fmin((float)a, (float)b); }
};

template <typename T> struct OpMax {
    __device__ __forceinline__ T operator()(T a, T b) const { return (T)fmax((float)a, (float)b); }
};

template <typename T> struct OpLess {
    __device__ __forceinline__ bool operator()(T a, T b) const { return a < b; }
};

template <typename T> struct OpLessEqual {
    __device__ __forceinline__ bool operator()(T a, T b) const { return a <= b; }
};

template <typename T> struct OpGreater {
    __device__ __forceinline__ bool operator()(T a, T b) const { return a > b; }
};

template <typename T> struct OpGreaterEqual {
    __device__ __forceinline__ bool operator()(T a, T b) const { return a >= b; }
};

template <typename T> struct OpEquals {
    __device__ __forceinline__ bool operator()(T a, T b) const { return a == b; }
};

template <typename T> struct OpNotEquals {
    __device__ __forceinline__ bool operator()(T a, T b) const { return a != b; }
};

struct OpAnd {
    __device__ __forceinline__ bool operator()(bool a, bool b) const { return a && b; }
};

struct OpOr {
    __device__ __forceinline__ bool operator()(bool a, bool b) const { return a || b; }
};

template <typename T> struct OpBitOr {
    __device__ __forceinline__ T operator()(T a, T b) const { return a | b; }
};

template <typename T> struct OpBitAnd {
    __device__ __forceinline__ T operator()(T a, T b) const { return a & b; }
};

template <typename T> struct OpBitXor {
    __device__ __forceinline__ T operator()(T a, T b) const { return a ^ b; }
};

template <typename T_in, typename T_out, typename Op>
__device__ __forceinline__ void
bin_op_generic(const T_in *__restrict__ a, const T_in *__restrict__ b, T_out *__restrict__ out,
               int32_t b_shape_0, int32_t b_shape_1, int32_t b_shape_2, int32_t b_shape_3,
               int32_t b_shape_4, int32_t out_shape_0, int32_t out_shape_1, int32_t out_shape_2,
               int32_t out_shape_3, int32_t out_shape_4, int32_t a_strides_0, int32_t a_strides_1,
               int32_t a_strides_2, int32_t a_strides_3, int32_t a_strides_4, int32_t b_strides_0,
               int32_t b_strides_1, int32_t b_strides_2, int32_t b_strides_3, int32_t b_strides_4,
               int32_t o_strides_0, int32_t o_strides_1, int32_t o_strides_2, int32_t o_strides_3,
               int32_t o_strides_4, Op op) {
    const int32_t n0 = out_shape_0;
    const int32_t n1 = out_shape_1;
    const int32_t n2 = out_shape_2;
    const int32_t n3 = out_shape_3;
    const int32_t n4 = out_shape_4;

    const int32_t total = n0 * n1 * n2 * n3 * n4;

    int32_t tmp = blockIdx.x * blockDim.x + threadIdx.x;
    if (tmp >= total) {
        return;
    }

    const int32_t i4 = tmp % n4;
    tmp /= n4;
    const int32_t i3 = tmp % n3;
    tmp /= n3;
    const int32_t i2 = tmp % n2;
    tmp /= n2;
    const int32_t i1 = tmp % n1;
    tmp /= n1;
    const int32_t i0 = tmp;

    const int32_t ia = i0 * a_strides_0 + i1 * a_strides_1 + i2 * a_strides_2 + i3 * a_strides_3 +
                       i4 * a_strides_4;

    const int32_t ib = i0 * b_strides_0 + i1 * b_strides_1 + i2 * b_strides_2 + i3 * b_strides_3 +
                       i4 * b_strides_4;

    const int32_t io = i0 * o_strides_0 + i1 * o_strides_1 + i2 * o_strides_2 + i3 * o_strides_3 +
                       i4 * o_strides_4;

    out[io] = op(a[ia], b[ib]);
}

template <typename T_in, typename T_out, typename Op>
__device__ __forceinline__ void
bin_op_large(const T_in *__restrict__ a, const T_in *__restrict__ b, T_out *__restrict__ out,
             int32_t b_shape_0, int32_t b_shape_1, int32_t b_shape_2, int32_t b_shape_3,
             int32_t b_shape_4, int32_t out_shape_0, int32_t out_shape_1, int32_t out_shape_2,
             int32_t out_shape_3, int32_t out_shape_4, int32_t a_strides_0, int32_t a_strides_1,
             int32_t a_strides_2, int32_t a_strides_3, int32_t a_strides_4, int32_t b_strides_0,
             int32_t b_strides_1, int32_t b_strides_2, int32_t b_strides_3, int32_t b_strides_4,
             int32_t o_strides_0, int32_t o_strides_1, int32_t o_strides_2, int32_t o_strides_3,
             int32_t o_strides_4, Op op) {
    const int32_t n0 = out_shape_0;
    const int32_t n1 = out_shape_1;
    const int32_t n2 = out_shape_2;
    const int32_t n3 = out_shape_3;
    const int32_t n4 = out_shape_4;

    const int32_t i3 = blockIdx.x;
    const int32_t i2 = blockIdx.y;
    const int32_t bz = blockIdx.z;

    const int32_t i1 = bz % n1;
    const int32_t i0 = bz / n1;

    if (i0 >= n0 || i1 >= n1 || i2 >= n2 || i3 >= n3)
        return;

    // Base offsets for (i0,i1,i2,i3)
    const int32_t ia_base =
        i0 * a_strides_0 + i1 * a_strides_1 + i2 * a_strides_2 + i3 * a_strides_3;
    const int32_t ib_base =
        i0 * b_strides_0 + i1 * b_strides_1 + i2 * b_strides_2 + i3 * b_strides_3;
    const int32_t io_base =
        i0 * o_strides_0 + i1 * o_strides_1 + i2 * o_strides_2 + i3 * o_strides_3;

    // Each thread handles a strided subset of i4
    for (int32_t i4 = threadIdx.x; i4 < n4; i4 += blockDim.x) {
        const int32_t ia = ia_base + i4 * a_strides_4;
        const int32_t ib = ib_base + i4 * b_strides_4;
        const int32_t io = io_base + i4 * o_strides_4;

        out[io] = op(a[ia], b[ib]);
    }
}

#define DEFINE_BINARY_KERNEL(name, tname, T_in, T_out, OP_TYPE)                                    \
    extern "C" {                                                                                   \
    __global__ void name##_generic_##tname(                                                        \
        const T_in *__restrict__ a, const T_in *__restrict__ b, T_out *__restrict__ out,           \
        int32_t b_shape_0, int32_t b_shape_1, int32_t b_shape_2, int32_t b_shape_3,                \
        int32_t b_shape_4, int32_t out_shape_0, int32_t out_shape_1, int32_t out_shape_2,          \
        int32_t out_shape_3, int32_t out_shape_4, int32_t a_strides_0, int32_t a_strides_1,        \
        int32_t a_strides_2, int32_t a_strides_3, int32_t a_strides_4, int32_t b_strides_0,        \
        int32_t b_strides_1, int32_t b_strides_2, int32_t b_strides_3, int32_t b_strides_4,        \
        int32_t o_strides_0, int32_t o_strides_1, int32_t o_strides_2, int32_t o_strides_3,        \
        int32_t o_strides_4) {                                                                     \
        bin_op_generic<T_in, T_out, OP_TYPE>(                                                      \
            a, b, out, b_shape_0, b_shape_1, b_shape_2, b_shape_3, b_shape_4, out_shape_0,         \
            out_shape_1, out_shape_2, out_shape_3, out_shape_4, a_strides_0, a_strides_1,          \
            a_strides_2, a_strides_3, a_strides_4, b_strides_0, b_strides_1, b_strides_2,          \
            b_strides_3, b_strides_4, o_strides_0, o_strides_1, o_strides_2, o_strides_3,          \
            o_strides_4, OP_TYPE{});                                                               \
    }                                                                                              \
                                                                                                   \
    __global__ void name##_large_##tname(                                                          \
        const T_in *__restrict__ a, const T_in *__restrict__ b, T_out *__restrict__ out,           \
        int32_t b_shape_0, int32_t b_shape_1, int32_t b_shape_2, int32_t b_shape_3,                \
        int32_t b_shape_4, int32_t out_shape_0, int32_t out_shape_1, int32_t out_shape_2,          \
        int32_t out_shape_3, int32_t out_shape_4, int32_t a_strides_0, int32_t a_strides_1,        \
        int32_t a_strides_2, int32_t a_strides_3, int32_t a_strides_4, int32_t b_strides_0,        \
        int32_t b_strides_1, int32_t b_strides_2, int32_t b_strides_3, int32_t b_strides_4,        \
        int32_t o_strides_0, int32_t o_strides_1, int32_t o_strides_2, int32_t o_strides_3,        \
        int32_t o_strides_4) {                                                                     \
        bin_op_large<T_in, T_out, OP_TYPE>(                                                        \
            a, b, out, b_shape_0, b_shape_1, b_shape_2, b_shape_3, b_shape_4, out_shape_0,         \
            out_shape_1, out_shape_2, out_shape_3, out_shape_4, a_strides_0, a_strides_1,          \
            a_strides_2, a_strides_3, a_strides_4, b_strides_0, b_strides_1, b_strides_2,          \
            b_strides_3, b_strides_4, o_strides_0, o_strides_1, o_strides_2, o_strides_3,          \
            o_strides_4, OP_TYPE{});                                                               \
    }                                                                                              \
    }

#define DEFINE_ARITHMETIC_OP(name, OP)                                                             \
    DEFINE_BINARY_KERNEL(name, f32, float, float, OP<float>)                                       \
    DEFINE_BINARY_KERNEL(name, f16, __half, __half, OP<half>)                                      \
    DEFINE_BINARY_KERNEL(name, u8, uint8_t, uint8_t, OP<uint8_t>)                                  \
    DEFINE_BINARY_KERNEL(name, u16, uint16_t, uint16_t, OP<uint16_t>)                              \
    DEFINE_BINARY_KERNEL(name, u32, uint32_t, uint32_t, OP<uint32_t>)                              \
    DEFINE_BINARY_KERNEL(name, u64, uint64_t, uint64_t, OP<uint64_t>)                              \
    DEFINE_BINARY_KERNEL(name, i8, int8_t, int8_t, OP<int8_t>)                                     \
    DEFINE_BINARY_KERNEL(name, i16, int16_t, int16_t, OP<int16_t>)                                 \
    DEFINE_BINARY_KERNEL(name, i32, int32_t, int32_t, OP<int32_t>)                                 \
    DEFINE_BINARY_KERNEL(name, i64, int64_t, int64_t, OP<int64_t>)

#define DEFINE_BIT_OP(name, OP)                                                                    \
    DEFINE_BINARY_KERNEL(name, u8, uint8_t, uint8_t, OP<uint8_t>)                                  \
    DEFINE_BINARY_KERNEL(name, u16, uint16_t, uint16_t, OP<uint16_t>)                              \
    DEFINE_BINARY_KERNEL(name, u32, uint32_t, uint32_t, OP<uint32_t>)                              \
    DEFINE_BINARY_KERNEL(name, u64, uint64_t, uint64_t, OP<uint64_t>)                              \
    DEFINE_BINARY_KERNEL(name, i8, int8_t, int8_t, OP<int8_t>)                                     \
    DEFINE_BINARY_KERNEL(name, i16, int16_t, int16_t, OP<int16_t>)                                 \
    DEFINE_BINARY_KERNEL(name, i32, int32_t, int32_t, OP<int32_t>)                                 \
    DEFINE_BINARY_KERNEL(name, i64, int64_t, int64_t, OP<int64_t>)

#define DEFINE_COMP_OP(name, OP)                                                                   \
    DEFINE_BINARY_KERNEL(name, f32, float, bool, OP<float>)                                        \
    DEFINE_BINARY_KERNEL(name, f16, __half, bool, OP<half>)                                        \
    DEFINE_BINARY_KERNEL(name, u8, uint8_t, bool, OP<uint8_t>)                                     \
    DEFINE_BINARY_KERNEL(name, u16, uint16_t, bool, OP<uint16_t>)                                  \
    DEFINE_BINARY_KERNEL(name, u32, uint32_t, bool, OP<uint32_t>)                                  \
    DEFINE_BINARY_KERNEL(name, u64, uint64_t, bool, OP<uint64_t>)                                  \
    DEFINE_BINARY_KERNEL(name, i8, int8_t, bool, OP<int8_t>)                                       \
    DEFINE_BINARY_KERNEL(name, i16, int16_t, bool, OP<int16_t>)                                    \
    DEFINE_BINARY_KERNEL(name, i32, int32_t, bool, OP<int32_t>)                                    \
    DEFINE_BINARY_KERNEL(name, i64, int64_t, bool, OP<int64_t>)

#define DEFINE_LOGIC_OP(name, OP) DEFINE_BINARY_KERNEL(name, bool, bool, bool, OP)

DEFINE_ARITHMETIC_OP(binary_add, OpAdd)
DEFINE_ARITHMETIC_OP(binary_sub, OpSub)
DEFINE_ARITHMETIC_OP(binary_mul, OpMul)
DEFINE_ARITHMETIC_OP(binary_div, OpDiv)
DEFINE_ARITHMETIC_OP(binary_pow, OpPow)
DEFINE_ARITHMETIC_OP(binary_min, OpMin)
DEFINE_ARITHMETIC_OP(binary_max, OpMax)

DEFINE_BIT_OP(binary_bitor, OpBitOr)
DEFINE_BIT_OP(binary_bitand, OpBitAnd)
DEFINE_BIT_OP(binary_bitxor, OpBitXor)

DEFINE_COMP_OP(binary_lt, OpLess)
DEFINE_COMP_OP(binary_lte, OpLessEqual)
DEFINE_COMP_OP(binary_gt, OpGreater)
DEFINE_COMP_OP(binary_gte, OpGreaterEqual)
DEFINE_COMP_OP(binary_eq, OpEquals)
DEFINE_COMP_OP(binary_ne, OpNotEquals)

DEFINE_LOGIC_OP(binary_and, OpAnd)
DEFINE_LOGIC_OP(binary_or, OpOr)

template <typename T>
__device__ __forceinline__ void iff_generic(
    const bool *__restrict__ cond, const T *__restrict__ then_values,
    const T *__restrict__ else_values, T *__restrict__ out, int32_t out_shape_0,
    int32_t out_shape_1, int32_t out_shape_2, int32_t out_shape_3, int32_t out_shape_4,
    int32_t cond_strides_0, int32_t cond_strides_1, int32_t cond_strides_2, int32_t cond_strides_3,
    int32_t cond_strides_4, int32_t then_strides_0, int32_t then_strides_1, int32_t then_strides_2,
    int32_t then_strides_3, int32_t then_strides_4, int32_t else_strides_0, int32_t else_strides_1,
    int32_t else_strides_2, int32_t else_strides_3, int32_t else_strides_4, int32_t o_strides_0,
    int32_t o_strides_1, int32_t o_strides_2, int32_t o_strides_3, int32_t o_strides_4) {
    const int32_t n0 = out_shape_0;
    const int32_t n1 = out_shape_1;
    const int32_t n2 = out_shape_2;
    const int32_t n3 = out_shape_3;
    const int32_t n4 = out_shape_4;

    const int32_t total = n0 * n1 * n2 * n3 * n4;

    int32_t tmp = blockIdx.x * blockDim.x + threadIdx.x;
    if (tmp >= total) {
        return;
    }

    const int32_t i4 = tmp % n4;
    tmp /= n4;
    const int32_t i3 = tmp % n3;
    tmp /= n3;
    const int32_t i2 = tmp % n2;
    tmp /= n2;
    const int32_t i1 = tmp % n1;
    tmp /= n1;
    const int32_t i0 = tmp;

    const uint32_t icond = i0 * cond_strides_0 + i1 * cond_strides_1 + i2 * cond_strides_2 +
                           i3 * cond_strides_3 + i4 * cond_strides_4;
    bool pick = cond[icond];

    const int32_t offset = i0 * (pick ? then_strides_0 : else_strides_0) +
                           i1 * (pick ? then_strides_1 : else_strides_1) +
                           i2 * (pick ? then_strides_2 : else_strides_2) +
                           i3 * (pick ? then_strides_3 : else_strides_3) +
                           i4 * (pick ? then_strides_4 : else_strides_4);

    const int32_t io = i0 * o_strides_0 + i1 * o_strides_1 + i2 * o_strides_2 + i3 * o_strides_3 +
                       i4 * o_strides_4;

    out[io] = (pick ? then_values : else_values)[offset];
}

#define DEFINE_IFF_KERNEL(tname, T)                                                                \
    extern "C" {                                                                                   \
    __global__ void iff_generic_##tname(                                                           \
        const bool *__restrict__ cond_values, const T *__restrict__ then_values,                   \
        const T *__restrict__ else_values, T *__restrict__ out, int32_t out_shape_0,               \
        int32_t out_shape_1, int32_t out_shape_2, int32_t out_shape_3, int32_t out_shape_4,        \
        int32_t cond_strides_0, int32_t cond_strides_1, int32_t cond_strides_2,                    \
        int32_t cond_strides_3, int32_t cond_strides_4, int32_t then_strides_0,                    \
        int32_t then_strides_1, int32_t then_strides_2, int32_t then_strides_3,                    \
        int32_t then_strides_4, int32_t else_strides_0, int32_t else_strides_1,                    \
        int32_t else_strides_2, int32_t else_strides_3, int32_t else_strides_4,                    \
        int32_t o_strides_0, int32_t o_strides_1, int32_t o_strides_2, int32_t o_strides_3,        \
        int32_t o_strides_4) {                                                                     \
        iff_generic(cond_values, then_values, else_values, out, out_shape_0, out_shape_1,          \
                    out_shape_2, out_shape_3, out_shape_4, cond_strides_0, cond_strides_1,         \
                    cond_strides_2, cond_strides_3, cond_strides_4, then_strides_0,                \
                    then_strides_1, then_strides_2, then_strides_3, then_strides_4,                \
                    else_strides_0, else_strides_1, else_strides_2, else_strides_3,                \
                    else_strides_4, o_strides_0, o_strides_1, o_strides_2, o_strides_3,            \
                    o_strides_4);                                                                  \
    }                                                                                              \
    }

DEFINE_IFF_KERNEL(f16, half);
DEFINE_IFF_KERNEL(f32, float);
DEFINE_IFF_KERNEL(i8, int8_t);
DEFINE_IFF_KERNEL(i16, int16_t);
DEFINE_IFF_KERNEL(i32, int32_t);
DEFINE_IFF_KERNEL(i64, int64_t);
DEFINE_IFF_KERNEL(u8, uint8_t);
DEFINE_IFF_KERNEL(u16, uint16_t);
DEFINE_IFF_KERNEL(u32, uint32_t);
DEFINE_IFF_KERNEL(u64, uint64_t);


================================================
FILE: cuda/src/kernels/cu/cnn.cu
================================================
#include <cuda_runtime.h>
#include <math_constants.h>
#include "common.cuh"

// liquid:true

{% assign types = "f32,f16" | split: "," %}

{% for type in types %}
{% if type == "f32" %}
  {% assign T = "float" %}
  {% assign load = "" %}
  {% assign store = "" %}
{% else %}
  {% assign T = "__half" %}
  {% assign load = "__half2float(" %}
  {% assign store = "__float2half(" %}
{% endif %}

{% for georank in (1..4) %}

extern "C" __global__ void conv{{georank}}d_{{type}}_generic(
    const {{T}} *input,
    int32_t in_n, int32_t in_c,
    {% for i in (1..georank) %} int32_t in_{{i}}, {% endfor %}
    int32_t in_n_stride, int32_t in_c_stride,
    {% for i in (1..georank) %} int32_t in_{{i}}_stride, {% endfor %}

    const {{T}} *kernel,
    int32_t groups, int32_t co_per_group, int32_t ci_per_group,
    {% for i in (1..georank) %} int32_t ker_{{i}}, {% endfor %}
    int32_t ker_g_stride, int32_t ker_o_stride, int32_t ker_i_stride,
    {% for i in (1..georank) %} int32_t ker_{{i}}_stride, {% endfor %}

    const {{T}} *bias,
    int32_t bias_stride,

    {% for i in (1..georank) %} int32_t pad_{{i}}, {% endfor %}
    {% for i in (1..georank) %} int32_t stride_{{i}}, {% endfor %}
    {% for i in (1..georank) %} int32_t dil_{{i}}, {% endfor %}

    {{T}} *output,
    int32_t out_n, int32_t out_c,
    {% for i in (1..georank) %} int32_t out_{{i}}, {% endfor %}
    int32_t out_n_stride, int32_t out_c_stride
    {% for i in (1..georank) %}, int32_t out_{{i}}_stride {% endfor %}
) {
  assert(in_n == gridDim.z);
  assert(out_n == gridDim.z);
  assert(blockDim.z == 1);

  assert(blockDim.y == 1);

  size_t n = blockIdx.z;
  size_t co = blockIdx.y;
  size_t group = co / co_per_group;
  size_t xyz = blockIdx.x * blockDim.x + threadIdx.x;
  {% capture georank_minus_1 %}{{georank|minus:1}}{%endcapture%}
  {% for i in (1..georank_minus_1) reversed %}
     size_t ox_{{i}} = xyz % out_{{i}};
     xyz = xyz / out_{{i}};
  {% endfor %}
  size_t ox_{{georank}} = xyz;


  {% for i in (1..georank) %}
     if (ox_{{i}} >= out_{{i}}) {
        return;
     }
  {% endfor %}

  const {{T}} *pfi = input + n * in_n_stride + ci_per_group * group * in_c_stride;
  const {{T}} *pfk = kernel + co * ker_o_stride;

  float sum = 0;
  if(bias) {
    sum = {{load}}*(bias + co * bias_stride){% if type == "f16" %}){% endif %};
  }

  for(int ci = 0; ci < ci_per_group; ci++ ) {
  {% for i in (1..georank) %}
    for(int k_{{i}} = 0; k_{{i}} < ker_{{i}}; k_{{i}}++) {
      int x_{{i}} = ox_{{i}} * stride_{{i}} + k_{{i}} * dil_{{i}} - pad_{{i}};
      if (x_{{i}} < 0 || x_{{i}} >= in_{{i}}) {
        continue;
      }
  {% endfor %}

        float i = {{load}}*(pfi + ci * in_c_stride
        {% for i in (1..georank) %} + x_{{i}} * in_{{i}}_stride {%endfor%}){% if type == "f16" %}){% endif %};
        float k = {{load}}*(pfk + ci * ker_i_stride +
        {% for i in (1..georank) %} + k_{{i}} * ker_{{i}}_stride {%endfor%}){% if type == "f16" %}){% endif %};
        sum += i*k;
    {% for i in (1..georank) %} } {%endfor%} // nested georank loops
  } // ci loop

  size_t poffset = n * out_n_stride + co * out_c_stride
      {% for i in (1..georank) %} + ox_{{i}} * out_{{i}}_stride {%endfor%};
  {% if type == "f16" %}
  *(output + poffset) = __float2half(sum);
  {% else %}
  *(output + poffset) = sum;
  {% endif %}

}

{% endfor %}
{% endfor %}


================================================
FILE: cuda/src/kernels/cu/common.cuh
================================================
#include <cuda/std/cstdint>
#include <cuda_fp16.h>
#include <cuda/std/type_traits>

#define CAT2(a,b) a##b
#define CAT(a,b)  CAT2(a,b)
#define CAT3(a,b,c) CAT(CAT(a,b),c)
#define CAT4(a,b,c,d) CAT(CAT3(a,b,c),d)
#define CAT5(a,b,c,d,e) CAT(CAT(CAT3(a,b,c),d),e)

#define CUDA_CC_TURING 750
#define CUDA_CC_AMPERE 800

#define FLT_MAX 3.40282347e+38F

#define MAX_THREADS 1024
#define WARP_SIZE 32

#define QK8_1 32
#define QI8_1 (QK8_1 / (4 * QR8_1))
#define QR8_1 1

#define QK4_0 32
#define QI4_0 (QK4_0 / (4 * QR4_0))
#define QR4_0 2

#define QK8_0 32
#define QI8_0 (QK8_0 / (4 * QR8_0))
#define QR8_0 1

typedef struct {
  half d;                // delta
  uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;

typedef struct {
  half2 ds;
  int8_t qs[QK8_1]; // quants
} block_q8_1;
static_assert(sizeof(block_q8_1) == 2 * sizeof(half) + QK8_1,
              "wrong q8_1 block size/padding");

struct block_q8_1_mmq {
  // The y float data is converted to a data layout that can simply be copied to
  // shared memory as a contiguous block. The y float data is first grouped as
  // blocks of 128 values. These blocks are then treated as individual data
  // values and transposed.
  //
  // To avoid shared memory bank conflicts each block is padded with 16 bytes.
  // This padding is also used to store block scales/partial sums.
  // The scales multiplied with the quantized data are equal to the unquantized
  // values. The partial sums are obtained by summing up a subgroup of the
  // contained values (prior to quantization)
  //     and are only needed for performance reasons.
  half2 ds4[4]; // 1 16 bit scale + 1 16 bit partial sum per 32 values, stored
                // as d0,s0,d1,s1,d2,s2,d3,s3
  int8_t qs[4 * QK8_1]; // 128 values quantized to 8 bit each
};
static_assert(sizeof(block_q8_1_mmq) == 4 * QK8_1 + 4 * sizeof(half2),
              "Unexpected block_q8_1_mmq size");

template <int width = WARP_SIZE>
static __device__ __forceinline__ float warp_reduce_sum(float x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x += __shfl_xor_sync(0xffffffff, x, offset, width);
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ __half warp_reduce_sum(__half x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x += __shfl_xor_sync(0xffffffff, x, offset, width);
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ float warp_reduce_max(float x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x = fmaxf(x, __shfl_xor_sync(0xffffffff, x, offset, width));
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ __half warp_reduce_max(__half x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x = __hmax(x, __shfl_xor_sync(0xffffffff, x, offset, width));
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ float warp_reduce_min(float x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x = fminf(x, __shfl_xor_sync(0xffffffff, x, offset, width));
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ __half warp_reduce_min(__half x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x = __hmin(x, __shfl_xor_sync(0xffffffff, x, offset, width));
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ float warp_reduce_prod(float x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x *= __shfl_xor_sync(0xffffffff, x, offset, width);
  }
  return x;
}

template <int width = WARP_SIZE>
static __device__ __forceinline__ __half warp_reduce_prod(__half x) {
#pragma unroll
  for (int offset = width / 2; offset > 0; offset >>= 1) {
    x = __hmul(x, __shfl_xor_sync(0xffffffff, x, offset, width));
  }
  return x;
}

template<int width = WARP_SIZE>
static __device__ __forceinline__ int warp_reduce_all(int x) {
  return __all_sync(0xffffffff, x);
}

namespace cuda_mma {

template <int I_, int J_, typename T> struct tile {
  static constexpr int I = I_;
  static constexpr int J = J_;
  static constexpr int ne = I * J / WARP_SIZE;
  T x[ne] = {0};

  static __device__ __forceinline__ int get_i(const int l) {
    if constexpr (I == 8 && (J == 4 || J == 8)) {
      return threadIdx.x / 4;
    } else if constexpr (I == 16 && J == 8) {
      return (l / 2) * 8 + threadIdx.x / 4;
    } else if constexpr (I == 16 && J == 16) {
      return ((l / 2) % 2) * 8 + threadIdx.x / 4;
    } else {
      static_assert(I == -1 && J == -1,
                    "template specialization not implemented");
    }
  }

  static __device__ __forceinline__ int get_j(const int l) {
    if constexpr (I == 8 && J == 4) {
      return threadIdx.x % 4;
    } else if constexpr (I == 8 && J == 8) {
      return 4 * l + threadIdx.x % 4;
    } else if constexpr (I == 16 && J == 8) {
      return 2 * (threadIdx.x % 4) + l % 2;
    } else if constexpr (I == 16 && J == 16) {
      return 8 * (l / 4) + 2 * (threadIdx.x % 4) + l % 2;
    } else {
      static_assert(I == -1 && J == -1,
                    "template specialization not implemented");
    }
  }
};

template <int I_, int J_> struct tile<I_, J_, half2> {
  static constexpr int I = I_;
  static constexpr int J = J_;
  static constexpr int ne = I * J / WARP_SIZE;
  half2 x[ne] = {{0.0f, 0.0f}};

  static __device__ __forceinline__ int get_i(const int l) {
    if constexpr (I == 8 && J == 8) {
      return threadIdx.x / 4;
    } else if constexpr (I == 16 && J == 4) {
      return l * 8 + threadIdx.x / 4;
    } else if constexpr (I == 16 && J == 8) {
      return (l % 2) * 8 + threadIdx.x / 4;
    } else {
      static_assert(I == -1 && J == -1,
                    "template specialization not implemented");
    }
  }

  static __device__ __forceinline__ int get_j(const int l) {
    if constexpr (I == 8 && J == 8) {
      return l * 4 + threadIdx.x % 4;
    } else if constexpr (I == 16 && J == 4) {
      return threadIdx.x % 4;
    } else if constexpr (I == 16 && J == 8) {
      return (l / 2) * 4 + threadIdx.x % 4;
    } else {
      static_assert(I == -1 && J == -1,
                    "template specialization not implemented");
    }
  }
};

template <int I, int J>
static __device__ __forceinline__ tile<I, J / 2, half2>
get_half2(const tile<I, J, float> &tile_float) {
  tile<I, J / 2, half2> ret;
#pragma unroll
  for (int l0 = 0; l0 < tile_float.ne; l0 += 2) {
    ret.x[l0 / 2] = make_half2(tile_float.x[l0 + 0], tile_float.x[l0 + 1]);
  }
  return ret;
}

template <int I, int J, typename T>
static __device__ __forceinline__ void
load_generic(tile<I, J, T> &t, const T *__restrict__ xs0, const int stride) {
#pragma unroll
  for (int l = 0; l < t.ne; ++l) {
    t.x[l] = xs0[t.get_i(l) * stride + t.get_j(l)];
  }
}

template <typename T>
static __device__ __forceinline__ void
load_ldmatrix(tile<8, 8, T> &t, const T *__restrict__ xs0, const int stride) {
  int *xi = (int *)t.x;
  const int *xs = (const int *)xs0 + (threadIdx.x % t.I) * stride +
                  ((threadIdx.x / t.I) * (t.J / 2)) % t.J;
  asm volatile("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];"
               : "=r"(xi[0]), "=r"(xi[1])
               : "l"(xs));
}

template <typename T>
static __device__ __forceinline__ void
load_ldmatrix(tile<16, 4, T> &t, const T *__restrict__ xs0, const int stride) {
  int *xi = (int *)t.x;
  const int *xs = (const int *)xs0 + (threadIdx.x % t.I) * stride;
  asm volatile("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];"
               : "=r"(xi[0]), "=r"(xi[1])
               : "l"(xs));
}

template <typename T>
static __device__ __forceinline__ void
load_ldmatrix(tile<16, 8, T> &t, const T *__restrict__ xs0, const int stride) {
  int *xi = (int *)t.x;
  const int *xs = (const int *)xs0 + (threadIdx.x % t.I) * stride +
                  (threadIdx.x / t.I) * (t.J / 2);
  asm volatile("ldmatrix.sync.aligned.m8n8.x4.b16 {%0, %1, %2, %3}, [%4];"
               : "=r"(xi[0]), "=r"(xi[1]), "=r"(xi[2]), "=r"(xi[3])
               : "l"(xs));
}

template <typename T>
static __device__ __forceinline__ void
load_ldmatrix_trans(tile<16, 8, T> &t, const T *__restrict__ xs0,
                    const int stride) {
  int *xi = (int *)t.x;
  const int *xs = (const int *)xs0 + (threadIdx.x % t.I) * stride +
                  (threadIdx.x / t.I) * (t.J / 2);
  asm volatile("ldmatrix.sync.aligned.m8n8.x4.trans.b16 {%0, %1, %2, %3}, [%4];"
               : "=r"(xi[0]), "=r"(xi[2]), "=r"(xi[1]), "=r"(xi[3])
               : "l"(xs));
}

    static __device__ __forceinline__ void mma(
            tile<16, 8, int> & D, const tile<16, 4, int> & A, const tile<8, 4, int> & B) {
#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
        asm("mma.sync.aligned.m16n8k16.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(D.x[0]), "+r"(D.x[1]), "+r"(D.x[2]), "+r"(D.x[3])
            : "r"(A.x[0]), "r"(A.x[1]), "r"(B.x[0]));
#else
        // On Turing m16n8k16 mma is not available, use 2x m8n8k16 mma instead:
        asm("mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0, %1}, {%2}, {%3}, {%0, %1};"
            : "+r"(D.x[0]), "+r"(D.x[1])
            : "r"(A.x[0]), "r"(B.x[0]));
        asm("mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0, %1}, {%2}, {%3}, {%0, %1};"
            : "+r"(D.x[2]), "+r"(D.x[3])
            : "r"(A.x[1]), "r"(B.x[0]));
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE

    }

    static __device__ __forceinline__ void mma(
            tile<16, 8, int> & D, const tile<16, 8, int> & A, const tile<8, 8, int> & B) {
#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
        asm("mma.sync.aligned.m16n8k32.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};"
            : "+r"(D.x[0]), "+r"(D.x[1]), "+r"(D.x[2]), "+r"(D.x[3])
            : "r"(A.x[0]), "r"(A.x[1]), "r"(A.x[2]), "r"(A.x[3]), "r"(B.x[0]), "r"(B.x[1]));
#else
        // On Turing m16n8k32 mma is not available, use 4x m8n8k16 mma instead:
        asm("mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0, %1}, {%2}, {%3}, {%0, %1};"
            : "+r"(D.x[0]), "+r"(D.x[1])
            : "r"(A.x[0]), "r"(B.x[0]));
        asm("mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0, %1}, {%2}, {%3}, {%0, %1};"
            : "+r"(D.x[2]), "+r"(D.x[3])
            : "r"(A.x[1]), "r"(B.x[0]));
        asm("mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0, %1}, {%2}, {%3}, {%0, %1};"
            : "+r"(D.x[0]), "+r"(D.x[1])
            : "r"(A.x[2]), "r"(B.x[1]));
        asm("mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0, %1}, {%2}, {%3}, {%0, %1};"
            : "+r"(D.x[2]), "+r"(D.x[3])
            : "r"(A.x[3]), "r"(B.x[1]));
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE
    }

    static __device__ __forceinline__ void mma(
            tile<16, 4, half2> & D, const tile<16, 8, half2> & A, const tile<8, 8, half2> & B) {
        const int * Axi = (const int *) A.x;
        const int * Bxi = (const int *) B.x;
        int       * Dxi = (int       *) D.x;
#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
        asm("mma.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3, %4, %5}, {%6, %7}, {%0, %1};"
            : "+r"(Dxi[0]), "+r"(Dxi[1])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1]));
#else
        // On Turing m16n8k16 mma is not available, use 2x m8n8k8 mma instead:
        asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};"
            : "+r"(Dxi[0]), "+r"(Dxi[1])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0]));
        asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};"
            : "+r"(Dxi[0]), "+r"(Dxi[1])
            : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1]));
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE
    }

    static __device__ __forceinline__ void mma(
            tile<16, 8, half2> & D, const tile<16, 8, half2> & A, const tile<16, 8, half2> & B) {
        const int * Axi = (const int *) A.x;
        const int * Bxi = (const int *) B.x;
        int       * Dxi = (int       *) D.x;
#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
        asm("mma.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3, %4, %5}, {%6, %7}, {%0, %1};"
            : "+r"(Dxi[0]), "+r"(Dxi[1])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[2]));
        asm("mma.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3, %4, %5}, {%6, %7}, {%0, %1};"
            : "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1]), "r"(Bxi[3]));
#else
        // On Turing m16n8k16 mma is not available, use 4x m8n8k8 mma instead:
        asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};"
            : "+r"(Dxi[0]), "+r"(Dxi[1])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0]));
        asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};"
            : "+r"(Dxi[0]), "+r"(Dxi[1])
            : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[2]));
        asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};"
            : "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[1]));
        asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};"
            : "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[3]));
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE
    }

    static __device__ __forceinline__ void mma(
            tile<16, 8, float> & D, const tile<16, 8, float> & A, const tile<8, 8, float> & B) {
        const int * Axi = (const int *) A.x;
        const int * Bxi = (const int *) B.x;
        int       * Dxi = (int       *) D.x;
        asm("mma.sync.aligned.m16n8k8.row.col.f32.tf32.tf32.f32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1]));
    }

    static __device__ __forceinline__ void mma(
            tile<16, 8, float> & D, const tile<16, 8, half2> & A, const tile<8, 8, half2> & B) {
        const int * Axi = (const int *) A.x;
        const int * Bxi = (const int *) B.x;
        int       * Dxi = (int       *) D.x;
#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
        asm("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1]));
#else
        // On Turing m16n8k16 mma is not available, use 2x m8n8k8 mma instead:
        asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0]));
        asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1]));
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE
    }

    static __device__ __forceinline__ void mma(
            tile<16, 16, float> & D, const tile<16, 8, half2> & A, const tile<16, 8, half2> & B) {
        const int * Axi = (const int *) A.x;
        const int * Bxi = (const int *) B.x;
        int       * Dxi = (int       *) D.x;
#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
        asm("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[2]));
        asm("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};"
            : "+r"(Dxi[4]), "+r"(Dxi[5]), "+r"(Dxi[6]), "+r"(Dxi[7])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1]), "r"(Bxi[3]));
#else
        // On Turing m16n8k16 mma is not available, use 4x m8n8k8 mma instead:
        asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0]));
        asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
            : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[2]));
        asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(Dxi[4]), "+r"(Dxi[5]), "+r"(Dxi[6]), "+r"(Dxi[7])
            : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[1]));
        asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
            : "+r"(Dxi[4]), "+r"(Dxi[5]), "+r"(Dxi[6]), "+r"(Dxi[7])
            : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[3]));
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE
    }
} // namespace cuda_mma

#if CUDART_VERSION >= 11080

static __device__ __forceinline__ int cuda_movmatrix(const int x) {
    int ret = 0;

    asm("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;"
        : "=r"(ret) : "r"(x));
    return ret;
}

#else

static __device__ __forceinline__ int cuda_movmatrix(const int x) {
    // Imagine transposing row-major matrix to column-major matrix.
    const int src_i_low  = 2 * (threadIdx.x % 4);
    const int src_i_high = src_i_low + 1;
    const int src_j      = threadIdx.x / 4;

    const int src_laneid_low  = src_i_low  * 4 + src_j / 2;
    const int src_laneid_high = src_i_high * 4 + src_j / 2;

    const int shift_low  = ((src_j + 0) % 2) * 16;
    const int shift_high = ((src_j + 1) % 2) * 16;

    const int ret_low  = (__shfl_sync(0xFFFFFFFF, x, src_laneid_low,  WARP_SIZE) >> shift_low)  & 0x0000FFFF;
    const int ret_high = (__shfl_sync(0xFFFFFFFF, x, src_laneid_high, WARP_SIZE) << shift_high) & 0xFFFF0000;

    return ret_low | ret_high;
}

#endif // CUDART_VERSION >= 11080

static __device__ __forceinline__ half2 cuda_movmatrix(const half2 x) {
    half2 ret;
    *((int *) &ret) = cuda_movmatrix(*((const int *) &x));
    return ret;
}


// The compiler is always able to unroll loops if they contain continue expressions.
// In such cases loop unrolling can still be achieved via recursion:
template <int n>
struct cuda_unroll {
    template <typename Func, typename... Args>
    __device__ void operator()(const Func & f, Args... args) const {
        f(n - 1, args...);
        cuda_unroll<n - 1>{}(f, args...);
    }
};

template <>
struct cuda_unroll<1> {
    template <typename Func, typename... Args>
    __device__ void operator()(const Func & f, Args... args) const {
        f(0, args...);
    }
};


================================================
FILE: cuda/src/kernels/cu/element_wise.cu
================================================
#include "common.cuh"
#include <cuda_fp16.h>
#include <cuda_runtime.h>

static __device__ __forceinline__ float op_neg(float x) { return -x; }
static __device__ __forceinline__ __half op_neg(__half x) { return -x; }

static __device__ __forceinline__ float op_abs(float x) { return fabsf(x); }
static __device__ __forceinline__ __half op_abs(__half x) { return __habs(x); }

static __device__ __forceinline__ float op_sqr(float x) { return x * x; }
static __device__ __forceinline__ __half op_sqr(__half x) { return __hmul(x, x); }

static __device__ __forceinline__ float op_sqrt(float x) { return sqrtf(x); }
static __device__ __forceinline__ __half op_sqrt(__half x) { return hsqrt(x); }

static __device__ __forceinline__ float op_rsqrt(float x) { return 1.0f / sqrtf(x); }
static __device__ __forceinline__ __half op_rsqrt(__half x) { return hrsqrt(x); }

static __device__ __forceinline__ float op_recip(float x) { return 1.0f / x; }
static __device__ __forceinline__ __half op_recip(__half x) { return hrcp(x); }

static __device__ __forceinline__ float op_ceil(float x) { return ceilf(x); }
static __device__ __forceinline__ __half op_ceil(__half x) { return hceil(x); }

static __device__ __forceinline__ float op_floor(float x) { return floorf(x); }
static __device__ __forceinline__ __half op_floor(__half x) { return hfloor(x); }

static __device__ __forceinline__ float op_round(float x) { return round(x); }
static __device__ __forceinline__ __half op_round(__half x) {
    return (x < (__half)0.0f) ? hceil(x - (__half)0.5f) : hfloor(x + (__half)0.5f);
}

static __device__ __forceinline__ float op_rint(float x) { return rint(x); }
static __device__ __forceinline__ __half op_rint(__half x) { return hrint(x); }

static __device__ __forceinline__ float op_exp(float x) { return expf(x); }
static __device__ __forceinline__ __half op_exp(__half x) { return hexp(x); }

static __device__ __forceinline__ float op_sin(float x) { return sinf(x); }
static __device__ __forceinline__ __half op_sin(__half x) { return hsin(x); }

static __device__ __forceinline__ float op_sinh(float x) { return sinhf(x); }
static __device__ __forceinline__ __half op_sinh(__half x) {
    return (hexp(x) - hexp(-x)) / (__half)2.0f;
}

static __device__ __forceinline__ float op_asin(float x) { return asinf(x); }
static __device__ __forceinline__ __half op_asin(__half x) { return (__half)asinf((float)x); }

static __device__ __forceinline__ float op_asinh(float x) { return asinhf(x); }
static __device__ __forceinline__ __half op_asinh(__half x) { return (__half)asinhf((float)x); }

static __device__ __forceinline__ float op_cos(float x) { return cosf(x); }
static __device__ __forceinline__ __half op_cos(__half x) { return hcos(x); }

static __device__ __forceinline__ float op_cosh(float x) { return coshf(x); }
static __device__ __forceinline__ __half op_cosh(__half x) {
    return (hexp(x) + hexp(-x)) / (__half)2.0f;
}

static __device__ __forceinline__ float op_acos(float x) { return acosf(x); }
static __device__ __forceinline__ __half op_acos(__half x) { return (__half)acosf((float)x); }

static __device__ __forceinline__ float op_acosh(float x) { return acoshf(x); }
static __device__ __forceinline__ __half op_acosh(__half x) { return (__half)acoshf((float)x); }

static __device__ __forceinline__ float op_tan(float x) { return tanf(x); }
static __device__ __forceinline__ __half op_tan(__half x) { return hsin(x) / hcos(x); }

static __device__ __forceinline__ float op_tanh(float x) { return tanhf(x); }
static __device__ __forceinline__ __half op_tanh(__half x) { return (__half)tanhf((float)x); }

static __device__ __forceinline__ float op_atan(float x) { return atanf(x); }
static __device__ __forceinline__ __half op_atan(__half x) { return (__half)atanf((float)x); }

static __device__ __forceinline__ float op_atanh(float x) { return atanhf(x); }
static __device__ __forceinline__ __half op_atanh(__half x) { return (__half)atanhf((float)x); }

static __device__ __forceinline__ float op_sigmoid(float x) {
    float y = 1.0f / (1.0f + expf(-fabsf(x)));
    return (x < 0.0f) ? 1.0f - y : y;
}

static __device__ __forceinline__ __half op_sigmoid(__half x) {
    __half y = (__half)1.0f / ((__half)1.0f + hexp(-__habs(x)));
    return (x < (__half)0.0f) ? (__half)1.0f - y : y;
    ;
}

static __device__ __forceinline__ float op_ln(float x) { return logf(x); }
static __device__ __forceinline__ __half op_ln(__half x) { return hlog(x); }

static __device__ __forceinline__ float op_erf(float x) { return erff(x); }
static __device__ __forceinline__ __half op_erf(__half x) { return (__half)erff((float)x); }

static __device__ __forceinline__ float op_silu(float x) { return (x / (1.0f + expf(-x))); }
static __device__ __forceinline__ __half op_silu(__half x) {
    return (x / ((__half)1.0f + hexp(-x)));
}
static __device__ __forceinline__ float op_sign(float x) {
    return (x > 0.0f) ? 1.0f : ((x < 0.0f) ? -1.0f : 0.0f);
}
static __device__ __forceinline__ __half op_sign(__half x) {
    return (x > (__half)0.0f) ? (__half)1.0f : ((x < (__half)0.0f) ? (__half)-1.0f : (__half)0.0f);
}

static __device__ __forceinline__ float op_hard_swish(float x) {
    return x * fmaxf(0.0f, fminf(1.0f, x / 6.0f + 0.5f));
}
static __device__ __forceinline__ __half op_hard_swish(__half x) {
    __half zero = (__half)0.0f;
    __half one = (__half)1.0f;
    __half six = (__half)6.0f;
    __half half_val = (__half)0.5f;
    __half t = __hmax(zero, __hmin(one, __hdiv(x, six) + half_val));
    return __hmul(x, t);
}

template <typename T> static __device__ __forceinline__ T op_bitnot(T x) { return ~x; }

#define DEFINE_UNARY_KERNEL(name, tname, T, OP)                                                    \
    extern "C" __global__ void name##_##tname(const T *x, T *dst, int32_t k) {                     \
        int i = blockIdx.x * blockDim.x + threadIdx.x;                                             \
        if (i < k) {                                                                               \
            dst[i] = OP(x[i]);                                                                     \
        }                                                                                          \
    }

#define DEFINE_OP_FOR_ALL_TYPES(name, OP)                                                          \
    DEFINE_UNARY_KERNEL(name, f32, float, OP)                                                      \
    DEFINE_UNARY_KERNEL(name, f16, __half, OP)

DEFINE_OP_FOR_ALL_TYPES(element_wise_neg, op_neg)
DEFINE_OP_FOR_ALL_TYPES(element_wise_abs, op_abs)
DEFINE_OP_FOR_ALL_TYPES(element_wise_square, op_sqr)
DEFINE_OP_FOR_ALL_TYPES(element_wise_sqrt, op_sqrt)
DEFINE_OP_FOR_ALL_TYPES(element_wise_rsqrt, op_rsqrt)
DEFINE_OP_FOR_ALL_TYPES(element_wise_recip, op_recip)
DEFINE_OP_FOR_ALL_TYPES(element_wise_ceil, op_ceil)
DEFINE_OP_FOR_ALL_TYPES(element_wise_floor, op_floor)
DEFINE_OP_FOR_ALL_TYPES(element_wise_round, op_round)
DEFINE_OP_FOR_ALL_TYPES(element_wise_roundhalftoeven, op_rint)
DEFINE_OP_FOR_ALL_TYPES(element_wise_sin, op_sin)
DEFINE_OP_FOR_ALL_TYPES(element_wise_sinh, op_sinh)
DEFINE_OP_FOR_ALL_TYPES(element_wise_asin, op_asin)
DEFINE_OP_FOR_ALL_TYPES(element_wise_asinh, op_asinh)
DEFINE_OP_FOR_ALL_TYPES(element_wise_cos, op_cos)
DEFINE_OP_FOR_ALL_TYPES(element_wise_cosh, op_cosh)
DEFINE_OP_FOR_ALL_TYPES(element_wise_acos, op_acos)
DEFINE_OP_FOR_ALL_TYPES(element_wise_acosh, op_acosh)
DEFINE_OP_FOR_ALL_TYPES(element_wise_tan, op_tan)
DEFINE_OP_FOR_ALL_TYPES(element_wise_tanh, op_tanh)
DEFINE_OP_FOR_ALL_TYPES(element_wise_atan, op_atan)
DEFINE_OP_FOR_ALL_TYPES(element_wise_atanh, op_atanh)
DEFINE_OP_FOR_ALL_TYPES(element_wise_exp, op_exp)
DEFINE_OP_FOR_ALL_TYPES(element_wise_sigmoid, op_sigmoid)
DEFINE_OP_FOR_ALL_TYPES(element_wise_ln, op_ln)
DEFINE_OP_FOR_ALL_TYPES(element_wise_erf, op_erf)
DEFINE_OP_FOR_ALL_TYPES(element_wise_silu, op_silu)
DEFINE_OP_FOR_ALL_TYPES(element_wise_sign, op_sign)
DEFINE_OP_FOR_ALL_TYPES(element_wise_hardswish, op_hard_swish)

DEFINE_UNARY_KERNEL(element_wise_bitnot, u8, uint8_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, u16, uint16_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, u32, uint32_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, u64, uint64_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, i8, int8_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, i16, int16_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, i32, int32_t, op_bitnot);
DEFINE_UNARY_KERNEL(element_wise_bitnot, i64, int64_t, op_bitnot);

static __device__ __forceinline__ bool op_bitnot_bool(bool x) { return !x; }
DEFINE_UNARY_KERNEL(element_wise_bitnot, bool, bool, op_bitnot_bool);


================================================
FILE: cuda/src/kernels/cu/flash_attn.cu
================================================
#include "common.cuh"
#include <math_constants.h>

#define EXPF __expf

// NOTE: stride in bytes
template <int STRIDE> static __device__ __forceinline__ uint32_t swizzle(uint32_t index) {
    // no need swizzling
    if constexpr (STRIDE == 16)
        return index;

    uint32_t row_idx = (index / STRIDE) % 8;
    uint32_t bits_to_xor = row_idx / max(64 / STRIDE, 1);
    return index ^ (bits_to_xor << 4);
}

static __device__ inline void cp_async_cg_16B(uint32_t dst, const void *src) {
    asm volatile("cp.async.cg.shared.global [%0], [%1], 16;" ::"r"(dst), "l"(src));
}

static __device__ inline void cp_async_cg_16B_pred(uint32_t dst, const void *src, bool pred) {
#if __CUDA_ARCH__ >= 800
    asm volatile("{\n\t"
                 ".reg .pred p;\n\t"
                 ".reg .b32 z;\n\t"
                 "mov.b32 z, 0;\n\t"
                 "setp.ne.b32 p, %2, 0;\n\t"                          // p = (pred != 0)
                 "@p   cp.async.cg.shared.global [%0], [%1], 16;\n\t" // valid: async
                                                                      // copy 16B
                 "@!p  st.shared.v4.b32 [%0], {z, z, z, z};\n\t"      // invalid: zero-fill
                                                                      // 16B
                 "}\n" ::"r"(dst),
                 "l"(src), "r"((int)pred));
#else
    // Fallback path if you ever build for pre-SM80
    if (pred) {
        asm volatile("cp.async.cg.shared.global [%0], [%1], 16;" ::"r"(dst), "l"(src));
    } else {
        int z = 0;
        asm volatile("st.shared.v4.b32 [%0], {%1,%1,%1,%1};" ::"r"(dst), "r"(z));
    }
#endif
}

// ============================================================================
// Global->shared loaders
// ============================================================================

template <int HEIGHT, int DIM, int PADDED_DIM, int TB_SIZE>
static __device__ __forceinline__ void
global_to_shared_swizzle_pad(uint32_t dst, const half *__restrict__ src, int tid, int valid_rows) {
    static_assert(DIM % 8 == 0, "DIM must be multiple of 8 (16B segments)");
    static_assert(PADDED_DIM % 8 == 0, "PADDED_DIM must be multiple of 8");
    static_assert(PADDED_DIM >= DIM, "PADDED_DIM must be >= DIM");

    constexpr int segs_dst = PADDED_DIM / 8; // 16B segments per padded row
    constexpr int segs_src = DIM / 8;

    constexpr int total_segs = HEIGHT * segs_dst;
    static_assert(total_segs % TB_SIZE == 0, "total_segs must divide TB_SIZE");
    constexpr int iters = total_segs / TB_SIZE;

    _Pragma("unroll") for (int it = 0; it < iters; ++it) {
        const int seg_idx = it * TB_SIZE + tid; // 0..total_segs-1
        const int row = seg_idx / segs_dst;
        const int seg = seg_idx % segs_dst;
        const int col = seg * 8; // in half elements

        const uint32_t dst_addr =
            swizzle<PADDED_DIM * sizeof(half)>(dst + (row * PADDED_DIM + col) * sizeof(half));
        const bool pred = (row < valid_rows) && (seg < segs_src);
        // apparently, "forming" an invalid pointer in c++ UB, even not dereferrencing it.
        const half *__restrict__ src_addr = pred ? (src + (row * DIM + col)) : src;

        cp_async_cg_16B_pred(dst_addr, src_addr, pred);
    }
}

// ============================================================================
// Tensor Core helpers
// ============================================================================
static __device__ __forceinline__ void ldmatrix_x4(uint32_t regs[4], uint32_t addr) {
    asm volatile("ldmatrix.sync.aligned.m8n8.x4.shared.b16 {%0, %1, %2, %3}, [%4];"
                 : "=r"(regs[0]), "=r"(regs[1]), "=r"(regs[2]), "=r"(regs[3])
                 : "r"(addr));
}
static __device__ __forceinline__ void ldmatrix_x4_trans(uint32_t regs[4], uint32_t addr) {
    asm volatile("ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 {%0, %1, %2, %3}, [%4];"
                 : "=r"(regs[0]), "=r"(regs[1]), "=r"(regs[2]), "=r"(regs[3])
                 : "r"(addr));
}
static __device__ __forceinline__ void ldmatrix_x2(uint32_t regs[2], uint32_t addr) {
    asm volatile("ldmatrix.sync.aligned.m8n8.x2.shared.b16 {%0, %1}, [%2];"
                 : "=r"(regs[0]), "=r"(regs[1])
                 : "r"(addr));
}
static __device__ __forceinline__ void ldmatrix_x2_trans(uint32_t regs[2], uint32_t addr) {
    asm volatile("ldmatrix.sync.aligned.m8n8.x2.trans.shared.b16 {%0, %1}, [%2];"
                 : "=r"(regs[0]), "=r"(regs[1])
                 : "r"(addr));
}
static __device__ __forceinline__ void mma_m16n8k16(uint32_t A[4], uint32_t B[2], float D[4]) {
    asm volatile("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 "
                 "{%0, %1, %2, %3}, "
                 "{%4, %5, %6, %7}, "
                 "{%8, %9}, "
                 "{%10, %11, %12, %13};"
                 : "=f"(D[0]), "=f"(D[1]), "=f"(D[2]), "=f"(D[3])
                 : "r"(A[0]), "r"(A[1]), "r"(A[2]), "r"(A[3]), "r"(B[0]), "r"(B[1]), "f"(D[0]),
                   "f"(D[1]), "f"(D[2]), "f"(D[3]));
}

// ============================================================================
// Select and predicated global store
// ============================================================================
static __device__ __forceinline__ float fsel_neg_inf(bool keep, float x) {
    float out;
    asm volatile("{\n\t"
                 ".reg .pred p;\n\t"
                 "setp.ne.b32 p, %2, 0;\n\t"
                 "selp.f32 %0, %1, %3, p;\n\t"
                 "}\n"
                 : "=f"(out)
                 : "f"(x), "r"((int)keep), "f"(-CUDART_INF_F));
    return out;
}

static __device__ __forceinline__ void st_global_half2_pred(half2 *addr, half2 val, bool pred) {
#if __CUDA_ARCH__ >= 700
    uint32_t v;
    memcpy(&v, &val, sizeof(v)); // pack half2 to 32b
    asm volatile("{\n\t"
                 ".reg .pred p;\n\t"
                 "setp.ne.b32 p, %2, 0;\n\t"
                 "@p st.global.b32 [%0], %1;\n\t"
                 "}\n" ::"l"(addr),
                 "r"(v), "r"((int)pred));
#else
    if (pred) {
        *addr = val;
    }
#endif
}

enum MaskMode { nomask = 0, mask = 1, causal = 2 };
enum QTileMode { fullq = 0, tailq = 1 };

// ======= kv_iter_body (invariants hoisted, tight scopes) =====================
template <int BLOCK_Q, int BLOCK_KV, int DIM, int PADDED_DIM, int NUM_WARPS, MaskMode MASK_MODE,
          QTileMode Q_TILE_MODE, bool kv_tail>
static __device__ __forceinline__ void
kv_iter_body(const int kv_tile_base, const int len_q, const int len_kv, const int past,
             const int q_block_base, const int warp_id, const int lane_id, float scale,
             const half *__restrict__ MaskBase,
             float S_rmem[(BLOCK_Q / NUM_WARPS) / 16][BLOCK_KV / 8][4],
             uint32_t (&P_rmem)[(BLOCK_Q / NUM_WARPS) / 16][BLOCK_KV / 16][4],
             float (&O_rmem)[(BLOCK_Q / NUM_WARPS) / 16][PADDED_DIM / 8][4],
             float (&rowmax)[(BLOCK_Q / NUM_WARPS) / 16][2],
             float (&rowsumexp)[(BLOCK_Q / NUM_WARPS) / 16][2]) {
    static_assert(BLOCK_Q <= 2 * BLOCK_KV);
    constexpr int WARP_Q = BLOCK_Q / NUM_WARPS;
    constexpr int MMA_M = 16, MMA_N = 8;

    // hoisted per-lane constants
    const int lane_row0 = (lane_id >> 2); // 0..7
    const int lane_row1 = lane_row0 + 8;
    const int j_pair0 = (lane_id & 3) << 1;

    // per-qi
    _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi) {
        // hoisted per-qi invariants
        const int base_i_block = warp_id * (BLOCK_Q / NUM_WARPS) + qi * MMA_M;
        const int i0 = base_i_block + lane_row0;
        const int i1 = base_i_block + lane_row1;
        const int i0g = q_block_base + i0;
        const int i1g = q_block_base + i1;
        int jlim0 = 0, jlim1 = 0;
        if constexpr (MASK_MODE == causal) {
            jlim0 = past + i0g;
            jlim1 = past + i1g;
        }

        // ---- scale, (mask/causal), rowmax (tight scope for S_rmem live range)
        // ---
        float this_rowmax0 = -CUDART_INF_F;
        float this_rowmax1 = -CUDART_INF_F;

        _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_N; ++kvt) {
            float *r = S_rmem[qi][kvt];
            // scale
            r[0] *= scale;
            r[1] *= scale;
            r[2] *= scale;
            r[3] *= scale;

            const int col_base_tile = kv_tile_base + kvt * MMA_N;
            const int j0 = col_base_tile + j_pair0 + 0;
            const int j1 = col_base_tile + j_pair0 + 1;

            if constexpr (kv_tail) {
                // tail validity
                const bool i0_valid = Q_TILE_MODE == fullq ? true : (i0g < len_q);
                const bool i1_valid = Q_TILE_MODE == fullq ? true : (i1g < len_q);
                const bool j0_valid = (j0 < len_kv);
                const bool j1_valid = (j1 < len_kv);
                bool c00 = j0_valid & i0_valid;
                bool c01 = j1_valid & i0_valid;
                bool c10 = j0_valid & i1_valid;
                bool c11 = j1_valid & i1_valid;

                if constexpr (MASK_MODE == causal) {
                    c00 &= (j0 <= jlim0);
                    c01 &= (j1 <= jlim0);
                    c10 &= (j0 <= jlim1);
                    c11 &= (j1 <= jlim1);
                } else if constexpr (MASK_MODE == mask) {
                    if (i0_valid && j0_valid)
                        r[0] += (float)MaskBase[(size_t)i0 * len_kv + j0];
                    if (i0_valid && j1_valid)
                        r[1] += (float)MaskBase[(size_t)i0 * len_kv + j1];
                    if (i1_valid && j0_valid)
                        r[2] += (float)MaskBase[(size_t)i1 * len_kv + j0];
                    if (i1_valid && j1_valid)
                        r[3] += (float)MaskBase[(size_t)i1 * len_kv + j1];
                }

                r[0] = fsel_neg_inf(c00, r[0]);
                r[1] = fsel_neg_inf(c01, r[1]);
                r[2] = fsel_neg_inf(c10, r[2]);
                r[3] = fsel_neg_inf(c11, r[3]);
            } else {
                // full tiles: no row/col bounds
                if constexpr (MASK_MODE == causal) {
                    if (j0 > jlim0)
                        r[0] = -CUDART_INF_F;
                    if (j1 > jlim0)
                        r[1] = -CUDART_INF_F;
                    if (j0 > jlim1)
                        r[2] = -CUDART_INF_F;
                    if (j1 > jlim1)
                        r[3] = -CUDART_INF_F;
                } else if constexpr (MASK_MODE == mask) {
                    r[0] += (float)MaskBase[(size_t)i0 * len_kv + j0];
                    r[1] += (float)MaskBase[(size_t)i0 * len_kv + j1];
                    r[2] += (float)MaskBase[(size_t)i1 * len_kv + j0];
                    r[3] += (float)MaskBase[(size_t)i1 * len_kv + j1];
                }
            }

            // rowmax accumulate
            this_rowmax0 = fmaxf(this_rowmax0, fmaxf(r[0], r[1]));
            this_rowmax1 = fmaxf(this_rowmax1, fmaxf(r[2], r[3]));
        }

        // small warp reduce (xor tree 1,2)
        this_rowmax0 = fmaxf(this_rowmax0, __shfl_xor_sync(0xFFFFFFFF, this_rowmax0, 1));
        this_rowmax0 = fmaxf(this_rowmax0, __shfl_xor_sync(0xFFFFFFFF, this_rowmax0, 2));
        this_rowmax1 = fmaxf(this_rowmax1, __shfl_xor_sync(0xFFFFFFFF, this_rowmax1, 1));
        this_rowmax1 = fmaxf(this_rowmax1, __shfl_xor_sync(0xFFFFFFFF, this_rowmax1, 2));

        this_rowmax0 = fmaxf(this_rowmax0, rowmax[qi][0]);
        this_rowmax1 = fmaxf(this_rowmax1, rowmax[qi][1]);
        // rescale accumulators with EXACT same factor you'll use for
        // denominators
        const float rescale0 = EXPF(rowmax[qi][0] - this_rowmax0);
        const float rescale1 = EXPF(rowmax[qi][1] - this_rowmax1);

        _Pragma("unroll") for (int d = 0; d < PADDED_DIM / MMA_N; ++d) {
            O_rmem[qi][d][0] *= rescale0;
            O_rmem[qi][d][1] *= rescale0;
            O_rmem[qi][d][2] *= rescale1;
            O_rmem[qi][d][3] *= rescale1;
        }
        rowmax[qi][0] = this_rowmax0;
        rowmax[qi][1] = this_rowmax1;
        float this_rowsumexp0 = 0.f, this_rowsumexp1 = 0.f;

        // exponentiate, pack directly into P_rmem (keep S_rmem live only here)
        _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_N; ++kvt) {
            float *r = S_rmem[qi][kvt];
            r[0] = EXPF(r[0] - this_rowmax0);
            r[1] = EXPF(r[1] - this_rowmax0);
            r[2] = EXPF(r[2] - this_rowmax1);
            r[3] = EXPF(r[3] - this_rowmax1);

            this_rowsumexp0 += (r[0] + r[1]);
            this_rowsumexp1 += (r[2] + r[3]);

            half2 *Ppack = reinterpret_cast<half2 *>(P_rmem[qi][kvt / 2]);
            Ppack[(kvt & 1) * 2] = __float22half2_rn({r[0], r[1]});
            Ppack[(kvt & 1) * 2 + 1] = __float22half2_rn({r[2], r[3]});
        }

        // reduce sums (xor 1,2)
        this_rowsumexp0 += __shfl_xor_sync(0xFFFFFFFF, this_rowsumexp0, 1);
        this_rowsumexp0 += __shfl_xor_sync(0xFFFFFFFF, this_rowsumexp0, 2);
        this_rowsumexp1 += __shfl_xor_sync(0xFFFFFFFF, this_rowsumexp1, 1);
        this_rowsumexp1 += __shfl_xor_sync(0xFFFFFFFF, this_rowsumexp1, 2);

        rowsumexp[qi][0] = rowsumexp[qi][0] * rescale0 + this_rowsumexp0;
        rowsumexp[qi][1] = rowsumexp[qi][1] * rescale1 + this_rowsumexp1;
    }
}

// ============================================================================
// Kernel. Adapted from:
// https://github.com/gau-nernst/learn-cuda/blob/main/07_attention/attention_v5.cu
// ============================================================================
template <int BLOCK_Q, int BLOCK_KV, int DIM, int PADDED_DIM, int NUM_WARPS, MaskMode MASK_MODE,
          QTileMode Q_TILE_MODE>
static __device__ void attention_kernel(const half *__restrict__ Q, // [bs, len_q, DIM]
                                        const half *__restrict__ K, // [bs, len_kv, DIM]
                                        const half *__restrict__ V, // [bs, len_kv, DIM]
                                        const half *__restrict__ M, // [bs, len_q, len_kv]
                                        half *__restrict__ O,       // [bs, len_q, DIM]
                                        int32_t bs, int32_t qh, int32_t head_ratio, int32_t len_q,
                                        int32_t len_kv, int32_t mask_b_stride,
                                        int32_t mask_h_stride, float scale) {
    constexpr int TB_SIZE = NUM_WARPS * WARP_SIZE;
    constexpr int WARP_Q = BLOCK_Q / NUM_WARPS;
    constexpr int MMA_M = 16, MMA_N = 8, MMA_K = 16;

    const int bid = blockIdx.z;
    const int hid = blockIdx.y;
    const int q_block_id = blockIdx.x;

    const int tid = threadIdx.x;
    const int warp_id = tid / WARP_SIZE;
    const int lane_id = tid % WARP_SIZE;

    int q_block_id_offset = 0;
    if constexpr (Q_TILE_MODE == tailq) {
        q_block_id_offset = (len_q / BLOCK_Q);
    }
    const int q_block_base = (q_block_id + q_block_id_offset) * BLOCK_Q;
    const int q_valid = max(0, min(BLOCK_Q, (int)len_q - q_block_base));
    const int past = len_kv - len_q; // causal base

    const int q_heads = qh;
    const int kv_heads = q_heads / head_ratio;
    const int kv_head_id = hid / head_ratio;

    // Base pointers
    const half *Qptr = Q + (((size_t)bid * q_heads + hid) * (size_t)len_q + q_block_base) * DIM;
    const half *Kptr = K + (((size_t)bid * kv_heads + kv_head_id) * (size_t)len_kv) * DIM;
    const half *Vptr = V + (((size_t)bid * kv_heads + kv_head_id) * (size_t)len_kv) * DIM;
    half *Optr = O + (((size_t)bid * q_heads + hid) * (size_t)len_q + q_block_base) * DIM;

    const half *__restrict__ MaskBase = nullptr;
    if constexpr (MASK_MODE == mask) {
        if (M) {
            MaskBase =
                M + (size_t)(q_block_base * len_kv + mask_b_stride * bid + mask_h_stride * hid);
        }
    }

    // Shared memory layout:
    // Q_smem (BLOCK_Q x DIM) overlaps K_smem (2 * BLOCK_KV x DIM), plus V_smem
    // (BLOCK_KV x DIM)
    extern __shared__ half smem[];
    const uint32_t Q_smem = __cvta_generic_to_shared(smem);
    const uint32_t K_smem = Q_smem; // double buffer for K
    const uint32_t V_smem = K_smem + 2 * BLOCK_KV * PADDED_DIM * sizeof(half);

    // Per-thread swizzled bases
    uint32_t Q_smem_thread, K_smem_thread, V_smem_thread;
    {
        const int row_off = warp_id * WARP_Q + (lane_id % 16);
        const int col_off = (lane_id / 16) * 8;
        Q_smem_thread = swizzle<PADDED_DIM * sizeof(half)>(
            Q_smem + (row_off * PADDED_DIM + col_off) * sizeof(half));
    }
    {
        const int row_off = lane_id % 8;
        const int col_off = (lane_id / 8) * 8;
        K_smem_thread = swizzle<PADDED_DIM * sizeof(half)>(
            K_smem + (row_off * PADDED_DIM + col_off) * sizeof(half));
    }
    {
        const int row_off = lane_id % 16;
        const int col_off = (lane_id / 16) * 8;
        V_smem_thread = swizzle<PADDED_DIM * sizeof(half)>(
            V_smem + (row_off * PADDED_DIM + col_off) * sizeof(half));
    }

    // Registers
    uint32_t Q_rmem[WARP_Q / MMA_M][PADDED_DIM / MMA_K][4];
    uint32_t K_rmem[BLOCK_KV / MMA_N][PADDED_DIM / MMA_K][2];
    uint32_t V_rmem[BLOCK_KV / MMA_K][PADDED_DIM / MMA_N][2];
    uint32_t P_rmem[WARP_Q / MMA_M][BLOCK_KV / MMA_K][4];
    // Softmax accumulators
    float O_rmem[WARP_Q / MMA_M][PADDED_DIM / MMA_N][4] = {};
    float rowmax[WARP_Q / MMA_M][2];
    float rowsumexp[WARP_Q / MMA_M][2] = {};
    _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi) {
        rowmax[qi][0] = -FLT_MAX;
        rowmax[qi][1] = -FLT_MAX;
    }

    // ------------------ Load Q (tail-safe for last block only)
    // -----------------
    global_to_shared_swizzle_pad<BLOCK_Q, DIM, PADDED_DIM, TB_SIZE>(
        Q_smem, Qptr, tid, (Q_TILE_MODE == fullq ? BLOCK_Q : q_valid));
    asm volatile("cp.async.commit_group;");
    asm volatile("cp.async.wait_all;");
    __syncthreads();

    // Q: shared -> regs
    _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi)
        _Pragma("unroll") for (int dk = 0; dk < PADDED_DIM / MMA_K; ++dk) {
        uint32_t addr = Q_smem_thread + qi * MMA_M * PADDED_DIM * sizeof(half);
        addr ^= dk * MMA_K * sizeof(half);
        ldmatrix_x4(Q_rmem[qi][dk], addr);
    }
    __syncthreads();

    // ------------------ KV split: full tiles then optional tail
    // ----------------
    const int kv_full_iters = len_kv / BLOCK_KV; // exact full tiles

    // ---- Prefetch K0 for FULL loop (unguarded) ----
    const half *Kcur = Kptr;
    const half *Vcur = Vptr;

    if (kv_full_iters > 0) {
        global_to_shared_swizzle_pad<BLOCK_KV, DIM, PADDED_DIM, TB_SIZE>(K_smem + 0, Kcur, tid,
                                                                         BLOCK_KV);
        Kcur += (size_t)BLOCK_KV * DIM;
        asm volatile("cp.async.commit_group;");
    }

    // ----------------------------- FULL KV LOOP
    // -------------------------------
    for (int kv_id = 0; kv_id < kv_full_iters; ++kv_id) {
        const int kv_tile_base = kv_id * BLOCK_KV; // columns start for this tile

        // Prefetch V (unguarded)
        __syncthreads();
        global_to_shared_swizzle_pad<BLOCK_KV, DIM, PADDED_DIM, TB_SIZE>(V_smem, Vcur, tid,
                                                                         BLOCK_KV);
        Vcur += (size_t)BLOCK_KV * DIM;
        asm volatile("cp.async.commit_group;");

        // Wait K, load K into regs
        asm volatile("cp.async.wait_group 1;");
        __syncthreads();
        _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_N; ++kvt)
            _Pragma("unroll") for (int dk = 0; dk < PADDED_DIM / MMA_K; dk++) {
            uint32_t addr = K_smem_thread + (kv_id % 2) * (BLOCK_KV * PADDED_DIM * sizeof(half));
            addr += kvt * MMA_N * PADDED_DIM * sizeof(half);
            addr ^= dk * MMA_K * sizeof(half);
            ldmatrix_x2(K_rmem[kvt][dk], addr);
        }

        {
            // S = Q @ K^T
            float S_rmem[WARP_Q / MMA_M][BLOCK_KV / MMA_N][4] = {};
            _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi)
                _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_N; ++kvt)
                    _Pragma("unroll") for (int dk = 0; dk < PADDED_DIM / MMA_K; ++dk)
                        mma_m16n8k16(Q_rmem[qi][dk], K_rmem[kvt][dk], S_rmem[qi][kvt]);

            // Prefetch next FULL K (unguarded)
            if (kv_id + 1 < kv_full_iters) {
                const uint32_t Kdst =
                    K_smem + ((kv_id + 1) % 2) * (BLOCK_KV * PADDED_DIM * sizeof(half));
                global_to_shared_swizzle_pad<BLOCK_KV, DIM, PADDED_DIM, TB_SIZE>(Kdst, Kcur, tid,
                                                                                 BLOCK_KV);
                Kcur += (size_t)BLOCK_KV * DIM;
                asm volatile("cp.async.commit_group;");
            }

            kv_iter_body<BLOCK_Q, BLOCK_KV, DIM, PADDED_DIM, NUM_WARPS, MASK_MODE, Q_TILE_MODE,
                         false>(kv_tile_base, len_q, len_kv, past, q_block_base, warp_id, lane_id,
                                scale, MaskBase, S_rmem, P_rmem, O_rmem, rowmax, rowsumexp);
        }

        // Wait V, load V and do O += P@V
        asm volatile("cp.async.wait_group 1;");
        __syncthreads();

        _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_K; ++kvt)
            _Pragma("unroll") for (int d = 0; d < PADDED_DIM / MMA_N; d++) {
            uint32_t addr = V_smem_thread + kvt * MMA_K * PADDED_DIM * sizeof(half);
            addr ^= d * MMA_N * sizeof(half);
            ldmatrix_x2_trans(V_rmem[kvt][d], addr);
        }

        _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi)
            _Pragma("unroll") for (int d = 0; d < PADDED_DIM / MMA_N; ++d)
                _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_K; ++kvt) {
            mma_m16n8k16(P_rmem[qi][kvt], V_rmem[kvt][d], O_rmem[qi][d]);
        }
    }

    // ----------------------------- KV TAIL (optional)
    // --------------------------
    const int kv_rem = len_kv % BLOCK_KV;
    if (kv_rem > 0) {
        const int kv_tile_base = kv_full_iters * BLOCK_KV;

        // Prefetch tail K (predicated)
        const uint32_t Kdst = K_smem + (kv_full_iters % 2) * (BLOCK_KV * PADDED_DIM * sizeof(half));
        global_to_shared_swizzle_pad<BLOCK_KV, DIM, PADDED_DIM, TB_SIZE>(Kdst, Kcur, tid, kv_rem);
        Kcur += (size_t)BLOCK_KV * DIM;
        asm volatile("cp.async.commit_group;");

        __syncthreads();
        // Prefetch tail V (predicated)
        global_to_shared_swizzle_pad<BLOCK_KV, DIM, PADDED_DIM, TB_SIZE>(V_smem, Vcur, tid, kv_rem);
        Vcur += (size_t)BLOCK_KV * DIM;
        asm volatile("cp.async.commit_group;");

        // Wait K, load K regs
        asm volatile("cp.async.wait_group 1;");
        __syncthreads();
        _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_N; ++kvt)
            _Pragma("unroll") for (int dk = 0; dk < PADDED_DIM / MMA_K; dk++) {
            uint32_t addr =
                K_smem_thread + (kv_full_iters % 2) * (BLOCK_KV * PADDED_DIM * sizeof(half));
            addr += kvt * MMA_N * PADDED_DIM * sizeof(half);
            addr ^= dk * MMA_K * sizeof(half);
            ldmatrix_x2(K_rmem[kvt][dk], addr);
        }

        {
            // S = Q @ K^T
            float S_rmem[WARP_Q / MMA_M][BLOCK_KV / MMA_N][4] = {};
            _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi)
                _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_N; ++kvt)
                    _Pragma("unroll") for (int dk = 0; dk < PADDED_DIM / MMA_K; ++dk)
                        mma_m16n8k16(Q_rmem[qi][dk], K_rmem[kvt][dk], S_rmem[qi][kvt]);

            kv_iter_body<BLOCK_Q, BLOCK_KV, DIM, PADDED_DIM, NUM_WARPS, MASK_MODE, Q_TILE_MODE,
                         true>(kv_tile_base, len_q, len_kv, past, q_block_base, warp_id, lane_id,
                               scale, MaskBase, S_rmem, P_rmem, O_rmem, rowmax, rowsumexp);
        }
        // Wait V and finish O += P@V (tail)
        asm volatile("cp.async.wait_group 1;");
        __syncthreads();
        _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_K; ++kvt)
            _Pragma("unroll") for (int d = 0; d < PADDED_DIM / MMA_N; d++) {
            uint32_t addr = V_smem_thread + kvt * MMA_K * PADDED_DIM * sizeof(half);
            addr ^= d * MMA_N * sizeof(half);
            ldmatrix_x2_trans(V_rmem[kvt][d], addr);
        }

        // O += P @ V
        _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi)
            _Pragma("unroll") for (int d = 0; d < PADDED_DIM / MMA_N; ++d)
                _Pragma("unroll") for (int kvt = 0; kvt < BLOCK_KV / MMA_K; ++kvt)
                    mma_m16n8k16(P_rmem[qi][kvt], V_rmem[kvt][d], O_rmem[qi][d]);
    }

    // ----------------------------- Writeback (Q tail-safe)
    // ---------------------
    _Pragma("unroll") for (int qi = 0; qi < WARP_Q / MMA_M; ++qi)
        _Pragma("unroll") for (int d = 0; d < DIM / MMA_N; ++d) {
        const int row0 = warp_id * WARP_Q + qi * MMA_M + (lane_id / 4);
        const int col = d * MMA_N + (lane_id % 4) * 2;

        float *regs = O_rmem[qi][d];
        regs[0] /= rowsumexp[qi][0];
        regs[1] /= rowsumexp[qi][0];
        regs[2] /= rowsumexp[qi][1];
        regs[3] /= rowsumexp[qi][1];

        half2 v0 = __float22half2_rn({regs[0], regs[1]});
        half2 v1 = __float22half2_rn({regs[2], regs[3]});

        half2 *p0 = reinterpret_cast<half2 *>(Optr + (row0 + 0) * DIM + col);
        half2 *p1 = reinterpret_cast<half2 *>(Optr + (row0 + 8) * DIM + col);

        if constexpr (Q_TILE_MODE == fullq) {
            // No predicates in full Q tiles
            *p0 = v0;
            *p1 = v1;
        } else {
            const bool valid0 = (q_block_base + row0) < len_q;
            const bool valid1 = (q_block_base + row0 + 8) < len_q;
            st_global_half2_pred(p0, v0, valid0);
            st_global_half2_pred(p1, v1, valid1);
        }
    }
}

#define DEFINE_ATTN_KERNEL(name, BLOCK_Q, BLOCK_KV, D, PADDED_D, mask_mode, q_tile_mode)           \
    extern "C" {                                                                                   \
    __launch_bounds__(4 * WARP_SIZE) __global__                                                    \
        void name(const half *__restrict__ Q, const half *__restrict__ K,                          \
                  const half *__restrict__ V, const half *__restrict__ M, half *__restrict__ O,    \
                  int32_t bs, int32_t qh, int32_t head_ratio, int32_t len_q, int32_t len_kv,       \
                  int32_t mask_b_stride, int32_t mask_h_stride, float scale) {                     \
        attention_kernel<BLOCK_Q, BLOCK_KV, D, PADDED_D, 4, mask_mode, q_tile_mode>(               \
            Q, K, V, M, O, bs, qh, head_ratio, len_q, len_kv, mask_b_stride, mask_h_stride,        \
            scale);                                                                                \
    }                                                                                              \
    }

#define INSTANTIATE_FLASH_ATTN_FOR_MASK_STRATEGY(BQ, BKV, D, PADDED_D, mask_mode)                  \
    DEFINE_ATTN_KERNEL(attention_fullq_##BQ##_##BKV##_##D##_##mask_mode, BQ, BKV, D, PADDED_D,     \
                       mask_mode, fullq)                                                           \
    DEFINE_ATTN_KERNEL(attention_tailq_##BQ##_##BKV##_##D##_##mask_mode, BQ, BKV, D, PADDED_D,     \
                       mask_mode, tailq)

#define INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, D, PADDED_D)                               \
    INSTANTIATE_FLASH_ATTN_FOR_MASK_STRATEGY(block_q, block_kv, D, PADDED_D, nomask)               \
    INSTANTIATE_FLASH_ATTN_FOR_MASK_STRATEGY(block_q, block_kv, D, PADDED_D, mask)                 \
    INSTANTIATE_FLASH_ATTN_FOR_MASK_STRATEGY(block_q, block_kv, D, PADDED_D, causal)

#define INSTANTIATE_FLASH_ATTN_FOR_BLOCK_KV(block_q, block_kv)                                     \
    INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, 64, 64)                                        \
    INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, 128, 128)                                      \
    /* not a typo, pad 80 to 128 ! */                                                              \
    INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, 80, 128)                                       \
    // Other supported D value.
// Never encountered in practice so commented to keep compilation fast
//                                           INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, 96)
//                                                                                                                                       \
  //INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, 112) \
  //INSTANTIATE_FLASH_ATTN_FOR_D(block_q, block_kv, 256) \

#define INSTANTIATE_FLASH_ATTN_FOR_BLOCK_Q(block_q) INSTANTIATE_FLASH_ATTN_FOR_BLOCK_KV(block_q, 32)

#define INSTANTIATE_FLASH_ATTN() INSTANTIATE_FLASH_ATTN_FOR_BLOCK_Q(64)

INSTANTIATE_FLASH_ATTN()


================================================
FILE: cuda/src/kernels/cu/ggml_flash_attn.cu
================================================
#include "common.cuh"

// Check CC Version
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < CUDA_CC_TURING)
#error "Requires GPU with compute capability 7.5 or higher"
#endif // __CUDA_ARCH__ >= CUDA_CC_TURING

#if __CUDA_ARCH__ >= CUDA_CC_AMPERE
#define CP_ASYNC_AVAILABLE
#endif // __CUDA_ARCH__ >= CUDA_CC_AMPERE

#define FATTN_KQ_STRIDE       256
#define SOFTMAX_FTZ_THRESHOLD -20.0f

#define CUDA_UNUSED(x) (void)(x)

template<typename... Args>
__host__ __device__ constexpr inline void cuda_unused_vars_impl(Args&&...) noexcept {}
#define CUDA_UNUSED_VARS(...) cuda_unused_vars_impl(__VA_ARGS__)

enum cuda_type {
    CUDA_TYPE_F32     = 0,
    CUDA_TYPE_F16     = 1,
    CUDA_TYPE_Q4_0    = 2,
};

// Aligned memory transfers of 8/16 bytes can be faster than 2 transfers with 4 bytes, especially on AMD.
template <int nbytes, int alignment = 0>
static __device__ __forceinline__ void cuda_memcpy_1(void * __restrict__ dst, const void * __restrict__ src) {
    if constexpr (alignment != 0) {
        static_assert(nbytes % alignment == 0, "bad alignment");
    }
    constexpr int nb_per_cpy = alignment == 0 ? nbytes : alignment;

#pragma unroll
    for (int i = 0; i < nbytes/nb_per_cpy; ++i) {
        if constexpr (nb_per_cpy == 1) {
            ((char *) dst)[i] = ((const char *) src)[i];
        } else if constexpr (nb_per_cpy == 2) {
            ((short *) dst)[i] = ((const short *) src)[i];
        } else if constexpr (nb_per_cpy == 4) {
            ((int *) dst)[i] = ((const int *) src)[i];
        } else if constexpr (nb_per_cpy == 8) {
            ((int2 *) dst)[i] = ((const int2 *) src)[i];
        } else if constexpr (nb_per_cpy == 16) {
            ((int4 *) dst)[i] = ((const int4 *) src)[i];
        } else {
            static_assert(nbytes == 0 && nbytes == -1, "bad nbytes");
        }
    }
}

typedef void (*dequantize_V_t)(const void *, void *, const int64_t);
typedef float (*vec_dot_KQ_t)(
    const char * __restrict__ K_c, const void * __restrict__ Q_v, const int * __restrict__ Q_q8 , const void * __restrict__ Q_ds);

template <typename T, int ne>
static __device__ __forceinline__ void dequantize_V_f16(const void * __restrict__ vx, void * __restrict__ dst, const int64_t i0) {
    if constexpr (cuda::std::is_same_v<T, half>) {
        cuda_memcpy_1<ne*sizeof(half)>(dst, (const half *) vx + i0);
    } else if constexpr (cuda::std::is_same_v<T, float>) {
        static_assert(ne % 2 == 0, "bad ne");
        half2 tmp[ne/2];
        cuda_memcpy_1<ne*sizeof(half)>(tmp, (const half *) vx + i0);
        float2 * dst_f2 = (float2 *) dst;
#pragma unroll
        for (int l = 0; l < ne/2; ++l) {
            dst_f2[l] = __half22float2(tmp[l]);
        }
    } else {
        static_assert(cuda::std::is_same_v<T, void>, "unsupported type");
    }
}

template <typename T, int ne>
static __device__ __forceinline__ void dequantize_V_q4_0(const void * __restrict__ vx, void * __restrict__ dst, const int64_t i0) {
    const block_q4_0 * x = (const block_q4_0 *) vx;

    const int64_t ib    =  i0          /  QK4_0;
    const int     iqs   =  i0          % (QK4_0/2);
    const int     shift = (i0 % QK4_0) / (QK4_0/2);

    int q;
    static_assert(ne == 2 || ne == 4, "bad ne");
    cuda_memcpy_1<ne, 2>(&q, x[ib].qs + iqs);
    q >>= 4*shift;
    q &= 0x0F0F0F0F;
    q = __vsubss4(q, 0x08080808);

    const int8_t * q8 = (const int8_t *) &q;

    if constexpr (cuda::std::is_same_v<T, half>) {
        const half2 d = __half2half2(x[ib].d);

#pragma unroll
        for (int l0 = 0; l0 < ne; l0 += 2) {
            ((half2 *) dst)[l0/2] = d * make_half2(q8[l0 + 0], q8[l0 + 1]);
        }
    } else

    if constexpr (cuda::std::is_same_v<T, float>) {
        const float d = x[ib].d;

#pragma unroll
        for (int l = 0; l < ne; ++l) {
            ((float *) dst)[l] = d * q8[l];
        }
    } else {
        static_assert(cuda::std::is_same_v<T, void>, "bad type");
    }
}

static __device__ __forceinline__ void cuda_mad(float & acc, const half2 v, const half2 u) {
    const float2 tmp = __half22float2(v*u);
    acc += tmp.x + tmp.y;
}

//static __device__ __forceinline__ void cuda_mad(half2 & acc, const half2 v, const half2 u) {
//    acc += v*u;
//}

template <int D, int nthreads>
static __device__ __forceinline__ float vec_dot_fattn_vec_KQ_f16(
    const char * __restrict__ K_c, const void * __restrict__ Q_v, const int * __restrict__ Q_q8 , const void * __restrict__ Q_ds_v) {

    const half2 * K_h2 = (const half2 *) K_c;
    CUDA_UNUSED(Q_q8);
    CUDA_UNUSED(Q_ds_v);

    constexpr int cpy_ne = 4;

    float sum = 0.0f;

#pragma unroll
    for (int k_KQ_0 = 0; k_KQ_0 < D/2; k_KQ_0 += nthreads*cpy_ne) {
        half2 tmp[cpy_ne];
        cuda_memcpy_1<sizeof(tmp)>(tmp, K_h2 + k_KQ_0 + (threadIdx.x % nthreads)*cpy_ne);
#pragma unroll
        for (int k_KQ_1 = 0; k_KQ_1 < cpy_ne; ++k_KQ_1) {
            cuda_mad(sum, tmp[k_KQ_1], ((const half2  *) Q_v)[k_KQ_0/nthreads + k_KQ_1]);
        }
    }

    return sum;
}

template<int D, int nthreads>
static __device__ __forceinline__ float vec_dot_fattn_vec_KQ_q4_0(
    const char * __restrict__ K_c, const void * __restrict__ Q_v, const int * __restrict__ Q_q8, const void * __restrict__ Q_ds_v) {

    const block_q4_0 * K_q4_0 = (const block_q4_0 *) K_c;
    CUDA_UNUSED(Q_v);

    float sum = 0.0f;

#pragma unroll
    for (int k_KQ_0 = 0; k_KQ_0 < int(D/sizeof(int)); k_KQ_0 += nthreads) {
        const int k_KQ = k_KQ_0 + (nthreads == WARP_SIZE ? threadIdx.x : threadIdx.x % nthreads);

        const int ib    = k_KQ /  QI8_1;
        const int iqs4  = k_KQ %  QI4_0;
        const int shift = k_KQ & (QI8_1/2);

        int v;
        cuda_memcpy_1<sizeof(int), 2>(&v, K_q4_0[ib].qs + sizeof(int)*iqs4);
        v = (v >> shift) & 0x0F0F0F0F;
        const int u = Q_q8[k_KQ_0/nthreads];

        const int sumi = __dp4a(v, u, 0);

        const float2 Q_ds = ((const float2 *) Q_ds_v)[k_KQ_0/nthreads];
        sum += __half2float(K_q4_0[ib].d) * (sumi*Q_ds.x - (8/QI8_1)*Q_ds.y);
    }

    return sum;
}

template <cuda_type type_V, typename T, int ne>
constexpr __device__ dequantize_V_t get_dequantize_V() {
    if constexpr (type_V == CUDA_TYPE_F16) {
        return dequantize_V_f16<T, ne>;
    } else if constexpr (type_V == CUDA_TYPE_Q4_0) {
        return dequantize_V_q4_0<T, ne>;
    } else {
        static_assert(type_V == -1, "bad type");
        return nullptr;
    }
}

template <cuda_type type_K, int D, int nthreads>
constexpr __device__ vec_dot_KQ_t get_vec_dot_KQ() {
    if constexpr (type_K == CUDA_TYPE_F16) {
        return vec_dot_fattn_vec_KQ_f16<D, nthreads>;
    } else if constexpr (type_K == CUDA_TYPE_Q4_0) {
        return vec_dot_fattn_vec_KQ_q4_0<D, nthreads>;
    } else {
        static_assert(type_K == -1, "bad type");
        return nullptr;
    }
}

template <typename Tds, int ni>
static __device__ __forceinline__ void quantize_q8_1_to_shared(
    const float * __restrict__ x, const float scale, int * __restrict__ yq32, void * __restrict__ yds) {

    float vals[sizeof(int)] = {0.0f};
#pragma unroll
    for (int l = 0; l < int(sizeof(int)); ++l) {
        vals[l] = (ni == WARP_SIZE || threadIdx.x < ni) ? scale * x[4*threadIdx.x + l] : 0.0f;
    }

    float amax = fabsf(vals[0]);
    float sum  = vals[0];
#pragma unroll
    for (int l = 1; l < int(sizeof(int)); ++l) {
        amax = fmaxf(amax, fabsf(vals[l]));
        sum += vals[l];
    }
#pragma unroll
    for (int mask = QI8_1/2; mask > 0; mask >>= 1) {
        amax = fmaxf(amax, __shfl_xor_sync(0xFFFFFFFF, amax, mask, 32));
        sum +=             __shfl_xor_sync(0xFFFFFFFF, sum,  mask, 32);
    }

    const float d = amax / 127;
    int q32 = 0;
    int8_t * q8 = (int8_t *) &q32;

    if (d != 0.0f) {
#pragma unroll
        for (int l = 0; l < int(sizeof(int)); ++l) {
            q8[l] = roundf(vals[l] / d);
        }
    }

    yq32[threadIdx.x] = q32;
    if (threadIdx.x % QI8_1 == 0 && (ni == WARP_SIZE || threadIdx.x < ni)) {
        if (cuda::std::is_same<Tds, half2>::value) {
            ((half2  *) yds)[threadIdx.x/QI8_1] =  make_half2(d, sum);
        } else {
            ((float2 *) yds)[threadIdx.x/QI8_1] = make_float2(d, sum);
        }
    }
}

template<int D, int ncols, cuda_type type_K, cuda_type type_V> // D == head size
static __device__ void flash_attn_ext_vec(
        const char * __restrict__ Q,
        const char * __restrict__ K,
        const char * __restrict__ V,
        const char * __restrict__ mask,
        const int  * __restrict__ KV_max,
        float      * __restrict__ dst,
        float2     * __restrict__ dst_meta,
        const float scale,
        const int32_t ne00, const int32_t ne01, const int32_t ne02, const int32_t ne03,
                            const int32_t nb01, const int32_t nb02, const int32_t nb03,
        const int32_t ne10, const int32_t ne11, const int32_t ne12, const int32_t ne13,
                            const int32_t nb11, const int32_t nb12, const int32_t nb13,
                            const int32_t nb21, const int32_t nb22, const int32_t nb23,
                            const int32_t ne31, const int32_t ne32, const int32_t ne33,
                            const int32_t nb31, const int32_t nb32, const int32_t nb33) {
    //In this kernel Q, K, V are matrices while i, j, k are matrix indices.
    constexpr int cpy_nb = 16;
    constexpr int cpy_ne = cpy_nb / 4;

    constexpr int nthreads_KQ_q = (D/4 < 32 ? D/4 : 32);
    constexpr int nthreads_V_q  = (D/4 < 32 ? D/4 : 32);

    constexpr int nthreads    = 128;
    constexpr int nthreads_KQ = type_K == CUDA_TYPE_F16 ? 128 / cpy_nb : nthreads_KQ_q;
    constexpr int nthreads_V  = type_V == CUDA_TYPE_F16 ? 128 / cpy_nb : nthreads_V_q;

    static_assert(WARP_SIZE % nthreads_KQ == 0, "bad nthreads_K");
    static_assert(WARP_SIZE % nthreads_V  == 0, "bad nthreads_V");

    constexpr int V_rows_per_thread = type_V == CUDA_TYPE_F16 ? 2*cpy_ne : 4;
    constexpr int V_cols_per_iter   = WARP_SIZE / nthreads_V;

    constexpr vec_dot_KQ_t vec_dot_KQ = get_vec_dot_KQ<type_K, D, nthreads_KQ>();
    constexpr bool Q_q8_1 = type_K != CUDA_TYPE_F16;

    constexpr dequantize_V_t dequantize_V = get_dequantize_V<type_V, half,  V_rows_per_thread>();

    const int ic0 = blockIdx.x * ncols; // Index of the Q/QKV column to work on.

    const int sequence = blockIdx.z / ne02;
    const int head = blockIdx.z - sequence*ne02;
    const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix.
    Q += nb03*sequence + nb02* head              + nb01*ic0;
    K += nb13*sequence + nb12*(head / gqa_ratio);
    V += nb23*sequence + nb22*(head / gqa_ratio);

    const half * maskh  = (const half  *) (mask + nb33*(sequence % ne33) + nb31*ic0);

    static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64.");
    constexpr int nwarps = nthreads / WARP_SIZE;
    const int tid = WARP_SIZE*threadIdx.y + threadIdx.x;
    __builtin_assume(tid < nthreads);

    constexpr int ne_KQ      = ncols*D;
    constexpr int ne_combine = nwarps*V_cols_per_iter*D;

    half2            VKQ[ncols][(D/2)/nthreads_V] = {{{0.0f, 0.0f}}};
    __shared__ half   KQ[ne_KQ > ne_combine ? ne_KQ : ne_combine];

    float KQ_max[ncols];
    float KQ_sum[ncols];
#pragma unroll
    for (int j = 0; j < ncols; ++j) {
        KQ_max[j] = -FLT_MAX/2.0f;
        KQ_sum[j] = 0.0f;
    }

    // Convert Q to float2 (f16 K) or q8_1 (quantized K) and store in registers:
    half2  Q_reg[ncols][(D/2)/nthreads_KQ]; // Will be initialized completely.

    int    Q_i32[ncols][1 > D/(sizeof(int)*nthreads_KQ) ? 1 : D/(sizeof(int)*nthreads_KQ)];
    float2  Q_ds[ncols][1 > D/(sizeof(int)*nthreads_KQ) ? 1 : D/(sizeof(int)*nthreads_KQ)];
    if constexpr (Q_q8_1) {
#pragma unroll
        for (int j0 = 0; j0 < ncols; j0 += nwarps) {
            const int j = j0 + threadIdx.y;

            if (j0 + nwarps > ncols && j >= ncols) {
                break;
            }

            // Reuse KQ as temporary storage for converting Q to q8_1:
            int    * tmp_q_i32 = (int    *) &KQ[j*D];
            float2 * tmp_q_ds  = (float2 *) (tmp_q_i32 + D/sizeof(int));

            // Set memory to zero if out of bounds:
            if (ncols > 1 && ic0 + j >= ne01) {
#pragma unroll
                for (int i0 = 0; i0 < int(D/sizeof(int)); i0 += WARP_SIZE) {
                    const int i = i0 + threadIdx.x;

                    if (i0 + WARP_SIZE <= D/sizeof(int) || i < D/sizeof(int)) {
                        tmp_q_i32[i] = 0;
                    }
                }
                if (threadIdx.x < D/QK8_1) {
                    tmp_q_ds[threadIdx.x] = make_float2(0.0f, 0.0f);
                }
            } else {
                const float * Q_f = (const float *) (Q + j*nb01);
                constexpr int nthreads_quantize = D/sizeof(int) < WARP_SIZE ? D/sizeof(int) : WARP_SIZE;
#pragma unroll
                for (int i0 = 0; i0 < int(D/sizeof(int)); i0 += nthreads_quantize) {
                    quantize_q8_1_to_shared<float2, nthreads_quantize>
                        (Q_f + i0*sizeof(int), scale, tmp_q_i32 + i0, tmp_q_ds + i0/QI8_1);
                }
            }
        }

        __syncthreads();

#pragma unroll
        for (int j = 0; j < ncols; ++j) {
            int    * tmp_q_i32 = (int    *) &KQ[j*D];
            float2 * tmp_q_ds  = (float2 *) (tmp_q_i32 + D/sizeof(int));

#pragma unroll
            for (int i0 = 0; i0 < int(D/sizeof(int)); i0 += nthreads_KQ) {
                const int i = i0 + (nthreads_KQ == WARP_SIZE ? threadIdx.x : threadIdx.x % nthreads_KQ);

                Q_i32[j][i0/nthreads_KQ] = tmp_q_i32[i];
                Q_ds[j][i0/nthreads_KQ]  = tmp_q_ds[i/QI8_1];
            }
        }

        __syncthreads();
    } else {
        const half2 scale_h2 = make_half2(scale, scale);
#pragma unroll
        for (int j = 0; j < ncols; ++j) {
            const float2 * Q_j = (const float2 *) (Q + j*nb01);
#pragma unroll
            for (int i0 = 0; i0 < D/2; i0 += nthreads_KQ*cpy_ne) {
                const int i = i0 + (nthreads_KQ == WARP_SIZE ? threadIdx.x : threadIdx.x % nthreads_KQ)*cpy_ne;

                float2 tmp[cpy_ne] = {{0.0f, 0.0f}};
                if (ncols == 1 || ic0 + j < ne01) {
                    cuda_memcpy_1<cpy_nb>(tmp,            &Q_j[i]);
                    cuda_memcpy_1<cpy_nb>(tmp + cpy_ne/2, &Q_j[i + cpy_ne/2]);
                }
#pragma unroll
                for (int i1 = 0; i1 < cpy_ne; ++i1) {
                    Q_reg[j][i0/nthreads_KQ + i1] = make_half2(tmp[i1].x, tmp[i1].y);
                }
            }
#pragma unroll
            for (int k = 0; k < (D/2)/nthreads_KQ; ++k) {
                Q_reg[j][k] *= scale_h2;
            }
        }
    }

    const int k_VKQ_max = KV_max ? KV_max[sequence*gridDim.x + blockIdx.x] : ne11;
    K     += blockIdx.y*nthreads * nb11;
    V     += blockIdx.y*nthreads * nb21;
    maskh += blockIdx.y*nthreads;
    for (int k_VKQ_0 = blockIdx.y*nthreads; k_VKQ_0 < k_VKQ_max; k_VKQ_0 += gridDim.y*nthreads,
             // Increment pointers after each loop:
             K += gridDim.y*nthreads*nb11, V += gridDim.y*nthreads*nb21, maskh += gridDim.y*nthreads) {

        // Calculate KQ tile and keep track of new maximum KQ values:
        float KQ_reg[ncols]; // KQ in registers.

        float KQ_max_new[ncols];
#pragma unroll
        for (int j = 0; j < ncols; ++j) {
            KQ_max_new[j] = KQ_max[j];
        }

#pragma unroll
        for (int i_KQ_0 = 0; i_KQ_0 < nthreads_KQ; ++i_KQ_0) {
            const int i_KQ = threadIdx.y*WARP_SIZE + (nthreads_KQ == WARP_SIZE ? 0 : (threadIdx.x & ~(nthreads_KQ-1))) + i_KQ_0;

#pragma unroll
            for (int j = 0; j < ncols; ++j) {
                float sum = vec_dot_KQ(K + i_KQ*nb11, Q_reg[j], Q_i32[j], Q_ds[j]);
                sum = warp_reduce_sum<nthreads_KQ>(sum);

                if (mask) {
                    sum += __half2float(maskh[j*ne11 + i_KQ]);
                }

                KQ_max_new[j] = fmaxf(KQ_max_new[j], sum);

                if ((nthreads_KQ == WARP_SIZE ? threadIdx.x : threadIdx.x % nthreads_KQ) == i_KQ_0) {
                    KQ_reg[j] = sum;
                }
            }
        }

#pragma unroll
        for (int j = 0; j < ncols; ++j) {
#pragma unroll
            for (int offset = nthreads_KQ; offset < WARP_SIZE; offset <<= 1) {
                KQ_max_new[j] = fmaxf(KQ_max_new[j], __shfl_xor_sync(0xFFFFFFFF, KQ_max_new[j], offset, WARP_SIZE));
            }
            const float KQ_max_scale = expf(KQ_max[j] - KQ_max_new[j]);
            KQ_max[j] = KQ_max_new[j];

            KQ_reg[j] = expf(KQ_reg[j] - KQ_max[j]);
            KQ_sum[j] = KQ_sum[j]*KQ_max_scale + KQ_reg[j];
            KQ[j*nthreads + tid] = KQ_reg[j];

            const half2 KQ_max_scale_h2 = make_half2(KQ_max_scale, KQ_max_scale);
#pragma unroll
            for (int i_VKQ_0 = 0; i_VKQ_0 < D/2; i_VKQ_0 += nthreads_V) {
                VKQ[j][i_VKQ_0/nthreads_V] *= KQ_max_scale_h2;
            }
        }

        __syncwarp();

#pragma unroll
        for (int k0 = 0; k0 < WARP_SIZE; k0 += V_cols_per_iter) {
            const int k = threadIdx.y*WARP_SIZE + k0 + (nthreads_V == WARP_SIZE ? 0 : threadIdx.x / nthreads_V);

            half2 KQ_k[ncols];
#pragma unroll
            for (int j = 0; j < ncols; ++j) {
                KQ_k[j] = __half2half2(KQ[j*nthreads + k]);
            }
#pragma unroll
            for (int i_VKQ_0 = 0; i_VKQ_0 < D/2; i_VKQ_0 += nthreads_V*V_rows_per_thread/2) {
                half2 tmp[V_rows_per_thread/2];
                dequantize_V(V + k*nb21, tmp,
                    2*i_VKQ_0 + (nthreads_V == WARP_SIZE ? threadIdx.x : threadIdx.x % nthreads_V)*V_rows_per_thread);
#pragma unroll
                for (int i_VKQ_1 = 0; i_VKQ_1 < V_rows_per_thread/2; ++i_VKQ_1) {
#pragma unroll
                    for (int j = 0; j < ncols; ++j) {
                        VKQ[j][i_VKQ_0/nthreads_V + i_VKQ_1] += tmp[i_VKQ_1]*KQ_k[j];
                    }
                }
            }
        }
    }

    __shared__ float KQ_max_shared[ncols][WARP_SIZE];
    __shared__ float KQ_sum_shared[ncols][WARP_SIZE];
#pragma unroll
    for (int j = 0; j < ncols; ++j) {
        if (threadIdx.y == 0) {
            KQ_max_shared[j][threadIdx.x] = -FLT_MAX/2.0f;
            KQ_sum_shared[j][threadIdx.x] = 0.0f;
        }
    }

    __syncthreads();

#pragma unroll
    for (int j = 0; j < ncols; ++j) {
        if (threadIdx.x == 0) {
            KQ_max_shared[j][threadIdx.y] = KQ_max[j];
        }
    }
    __syncthreads();

#pragma unroll
    for (int j_VKQ = 0; j_VKQ < ncols; ++j_VKQ) {
        if (ncols > 1 && ic0 + j_VKQ >= ne01) {
            break;
        }

        float kqmax_new = KQ_max_shared[j_VKQ][threadIdx.x];
        kqmax_new = warp_reduce_max(kqmax_new);
        const float kqmax_scale = expf(KQ_max[j_VKQ] - kqmax_new);
        KQ_max[j_VKQ] = kqmax_new;

        half2 * VKQ_tmp = (half2 *) KQ + threadIdx.y*(V_cols_per_iter*D/2)
            + (nthreads_V == WARP_SIZE ? 0 : threadIdx.x / nthreads_V)*(D/2);

        const half2 kqmax_scale_h2 = make_half2(kqmax_scale, kqmax_scale);
#pragma unroll
        for (int i_VKQ_0 = 0; i_VKQ_0 < D/2; i_VKQ_0 += nthreads_V) {
            VKQ[j_VKQ][i_VKQ_0/nthreads_V] *= kqmax_scale_h2;
        }
#pragma unroll
        for (int i_VKQ_0 = 0; i_VKQ_0 < D/2; i_VKQ_0 += nthreads_V*V_rows_per_thread/2) {
            const int i_VKQ = i_VKQ_0 + (nthreads_V == WARP_SIZE ? threadIdx.x : threadIdx.x % nthreads_V)*(V_rows_per_thread/2);

            cuda_memcpy_1<V_rows_per_thread*sizeof(half)>(VKQ_tmp + i_VKQ, &VKQ[j_VKQ][i_VKQ_0/nthreads_V]);
        }

        KQ_sum[j_VKQ] *= kqmax_scale;
        KQ_sum[j_VKQ] = warp_reduce_sum(KQ_sum[j_VKQ]);
        if (threadIdx.x == 0) {
            KQ_sum_shared[j_VKQ][threadIdx.y] = KQ_sum[j_VKQ];
        }

        __syncthreads();

        if (nthreads <= D || tid < D) {
            KQ_sum[j_VKQ] = KQ_sum_shared[j_VKQ][threadIdx.x];
            KQ_sum[j_VKQ] = warp_reduce_sum(KQ_sum[j_VKQ]);

#pragma unroll
            for (int i0 = 0; i0 < D; i0 += nthreads) {
                float dst_val = 0;
#pragma unroll
                for (int w = 0; w < nwarps; ++w) {
#pragma unroll
                    for (int v = 0; v < V_cols_per_iter; ++v) {
                        dst_val += float(KQ[w*V_cols_per_iter*D + v*D + i0 + tid]);
                    }
                }
                if (gridDim.y == 1) {
                    dst_val /= KQ_sum[j_VKQ];
                }
                dst[(((sequence*ne02 + ic0 + j_VKQ)*ne01 + head)*gridDim.y + blockIdx.y)*D + i0 + tid] = dst_val;
            }
        }

        if (j_VKQ < ncols-1) {
            __syncthreads();
        }

    }

    if (gridDim.y != 1 && tid < ncols && (ncols == 1 || ic0 + tid < ne01)) {
        dst_meta[((sequence*ne01 + ic0 + tid)*ne02 + head)*gridDim.y + blockIdx.y] = make_float2(KQ_max[tid], KQ_sum[tid]);
    }
}

template<int D> // D == head size
static __device__ void flash_attn_combine_results(
        const float  * __restrict__ VKQ_parts,
        const float2 * __restrict__ VKQ_meta,
        float * __restrict__ dst,
        const int32_t parallel_blocks) {
    // Dimension 0: threadIdx.x
    // Dimension 1: blockIdx.x
    // Dimension 2: blockIdx.y
    // Dimension 3: blockIdx.z
    // Memory layout is permuted with [0, 2, 1, 3]

    const int ne01 = gridDim.x;
    const int ne02 = gridDim.y;

    const int col      = blockIdx.x;
    const int head     = blockIdx.y;
    const int sequence = blockIdx.z;

    const int j_dst_unrolled = (sequence*ne01 + col)*ne02 + head;

    VKQ_parts += j_dst_unrolled * parallel_blocks*D;
    VKQ_meta  += j_dst_unrolled * parallel_blocks;
    dst       += j_dst_unrolled *                 D;

    const int tid = threadIdx.x;
    __builtin_assume(tid < D);

    extern __shared__ float2 meta[];
    for (int i = tid; i < 2*parallel_blocks; i += D) {
        ((float *) meta)[i] = ((const float *)VKQ_meta) [i];
    }

    __syncthreads();

    float kqmax = meta[0].x;
    for (int l = 1; l < parallel_blocks; ++l) {
        kqmax = max(kqmax, meta[l].x);
    }

    float VKQ_numerator   = 0.0f;
    float VKQ_denominator = 0.0f;
    for (int l = 0; l < parallel_blocks; ++l) {
        const float KQ_max_scale = expf(meta[l].x - kqmax);

        VKQ_numerator   += KQ_max_scale * VKQ_parts[l*D + tid];
        VKQ_denominator += KQ_max_scale * meta[l].y;
    }

    dst[tid] = VKQ_numerator / VKQ_denominator;
}

#define INSTANTIATE_FLASH_ATTN_VEC_FOR_TYPES(D, K_type, K_type_name, V_type, V_type_name) \
    extern "C" {                                                                                  \
        __launch_bounds__(128, 1)                                                                 \
        __global__ void flash_attn_vec_##D##_1##_##K_type_name##_##V_type_name(            \
            const char * __restrict__ Q, \
            const char * __restrict__ K, \
            const char * __restrict__ V, \
            const char * __restrict__ mask, \
            const int  * __restrict__ KV_max, \
            float      * __restrict__ dst, \
            float2     * __restrict__ dst_meta, \
            const float scale, \
            const int32_t ne03, const int32_t ne02, const int32_t ne01, const int32_t ne00,  \
                                const int32_t nb03, const int32_t nb02, const int32_t nb01,  \
            const int32_t ne13, const int32_t ne12, const int32_t ne11, const int32_t ne10,  \
                                const int32_t nb13, const int32_t nb12, const int32_t nb11,  \
                                const int32_t nb23, const int32_t nb22, const int32_t nb21,  \
                                const int32_t ne33, const int32_t ne32, const int32_t ne31,  \
                                const int32_t nb33, const int32_t nb32, const int32_t nb31){ \
            flash_attn_ext_vec<D, 1, K_type, V_type>(                                   \
                            Q, K, V, mask, KV_max, dst, dst_meta, scale,                     \
                            ne00, ne01, ne02, ne03,                                          \
                            nb01, nb02, nb03,                                                \
                            ne10, ne11, ne12, ne13,                                          \
                            nb11, nb12, nb13,                                                \
                            nb21, nb22, nb23,                                                \
                            ne31, ne32, ne33,                                                \
                            nb31, nb32, nb33);                                               \
        }                                                                                    \
                                                                                             \
        __launch_bounds__(128, 1)                                                            \
        __global__ void flash_attn_vec_##D##_2##_##K_type_name##_##V_type_name(            \
            const char * __restrict__ Q, \
            const char * __restrict__ K, \
            const char * __restrict__ V, \
            const char * __restrict__ mask, \
            const int  * __restrict__ KV_max, \
            float      * __restrict__ dst, \
            float2     * __restrict__ dst_meta, \
            const float scale, \
            const int32_t ne03, const int32_t ne02, const int32_t ne01, const int32_t ne00,  \
                                const int32_t nb03, const int32_t nb02, const int32_t nb01,  \
            const int32_t ne13, const int32_t ne12, const int32_t ne11, const int32_t ne10,  \
                                const int32_t nb13, const int32_t nb12, const int32_t nb11,  \
                                const int32_t nb23, const int32_t nb22, const int32_t nb21,  \
                                const int32_t ne33, const int32_t ne32, const int32_t ne31,  \
                                const int32_t nb33, const int32_t nb32, const int32_t nb31){ \
            flash_attn_ext_vec<D, 2, K_type, V_type>(                                        \
                            Q, K, V, mask, KV_max, dst, dst_meta, scale,                     \
                            ne00, ne01, ne02, ne03,                                          \
                            nb01, nb02, nb03,                                                \
                            ne10, ne11, ne12, ne13,                                          \
                            nb11, nb12, nb13,                                                \
                            nb21, nb22, nb23,                                                \
                            ne31, ne32, ne33,                                                \
                            nb31, nb32, nb33);                                               \
        }                                                                                    \
                                                                                             \
        __launch_bounds__(D, 1)                                                              \
        __global__ void flash_attn_combine_results_##D(                                      \
            const float  * __restrict__ VKQ_parts, const float2 * __restrict__ VKQ_meta,     \
            float * __restrict__ dst, const int parallel_blocks){                            \
            flash_attn_combine_results<D>(                                                   \
                VKQ_parts, VKQ_meta, dst, parallel_blocks);                                  \
            }                                                                                \
    }

#define INSTANTIATE_FLASH_ATTN_VEC_FOR_D_NCOLS1(D) \
    INSTANTIATE_FLASH_ATTN_VEC_FOR_TYPES(D, CUDA_TYPE_F16, f16, CUDA_TYPE_F16, f16) \
    //INSTANTIATE_FLASH_ATTN_VEC_FOR_TYPES(D, CUDA_TYPE_Q4_0, q40, CUDA_TYPE_F16, f16) \
    //INSTANTIATE_FLASH_ATTN_VEC_FOR_TYPES(D, CUDA_TYPE_F16, f16, CUDA_TYPE_Q4_0, q40) \
    //INSTANTIATE_FLASH_ATTN_VEC_FOR_TYPES(D, CUDA_TYPE_Q4_0, q40, CUDA_TYPE_Q4_0, q40) \


#define INSTANTIATE_FLASH_ATTN_VEC() \
    INSTANTIATE_FLASH_ATTN_VEC_FOR_D_NCOLS1(64) \
    INSTANTIATE_FLASH_ATTN_VEC_FOR_D_NCOLS1(128) \
    //INSTANTIATE_FLASH_ATTN_VEC_FOR_D_NCOLS1(256) \

INSTANTIATE_FLASH_ATTN_VEC()

/*--------------------------------------------------------------------------------------------------------------------------*/
using namespace cuda_mma;

static __device__ __forceinline__ tile<8, 8, half2> get_transposed(const tile<16, 4, half2> & t) {
    tile<8, 8, half2> ret;
    ret.x[0] = cuda_movmatrix(t.x[0]);
    ret.x[1] = cuda_movmatrix(t.x[1]);

    return ret;
}

typedef tile<16,  8, half2> tile_A;
typedef tile< 8,  8, half2> tile_B;
typedef tile<16,  8, half2> tile_B_16;
typedef tile<16,  8, float> tile_C_KQ;
typedef tile<16, 16, float> tile_C_KQ_16;
typedef tile<16,  4, half2> tile_C_VKQ;
typedef tile<16,  8, half2> tile_C_VKQ_16;

// Config options for specific head sizes.
// Should not affect results, only speed/register pressure/shared memory use.
//
// nbatch_fa:      number of KV rows per softmax rescaling of KQ rowsums and VKQ accumulators.
// nwarps_max:     maximum number of warps per CUDA block, up to 8 warps in total can run per SM (given enough shared memory).
// Q_in_reg:       whether the Q values should be kept permanently in registers.
// nstages_target: targeted number of pipeline stages for cp_async (if available), 0 means synchronous data loading.
// nbatch_K2:      number of K half2 values in direction of D to load in parallel.
// nbatch_V2:      number of V half2 values in direction of D to load in parallel.
// nbatch_combine: number of VKQ half2 values in direction of D to combine in parallel.

template <int D>
struct fattn_mma_f16_config;

template <>
struct fattn_mma_f16_config<64> {
    static constexpr int  nbatch_fa      = 64;
    static constexpr int  nwarps_max     = 4;
    static constexpr bool Q_in_reg       = true;
    static constexpr int  nstages_target = 2;
    static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) {
        return 32;
    }

    static constexpr __device__ int get_nbatch_V2_device(int /*ncols*/) {
        return 32;
    }

    static constexpr __device__ int get_nbatch_combine_device(int /*ncols*/) {
        return 32;
    }
};

template <>
struct fattn_mma_f16_config<80> {
    static constexpr int  nbatch_fa      = 64;
    static constexpr int  nwarps_max     = 4;
    static constexpr bool Q_in_reg       = true;
    static constexpr int  nstages_target = 2;


    static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) {
        return 40;
    }

    static constexpr __device__ int get_nbatch_V2_device(int /*ncols*/) {
        return 40;
    }

    static constexpr __device__ int get_nbatch_combine_device(int /*ncols*/) {
        return 40;
    }
};

template <>
struct fattn_mma_f16_config<96> {
    static constexpr int  nbatch_fa      = 64;
    static constexpr int  nwarps_max     = 4;
    static constexpr bool Q_in_reg       = true;
    static constexpr int  nstages_target = 2;

    static __device__ constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) {
        return 48;
    }

    static constexpr __device__ int get_nbatch_V2_device(int /*ncols*/) {
        return 48;
    }

    static constexpr __device__ int get_nbatch_combine_device(int /*ncols*/) {
        return 48;
    }
};

template <>
struct fattn_mma_f16_config<112> {
    static constexpr int  nbatch_fa      = 64;
    static constexpr int  nwarps_max     = 4;
    static constexpr bool Q_in_reg       = true;
    static constexpr int  nstages_target = 2;

    static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) {
        return 56;
    }

    static constexpr __device__ int get_nbatch_V2_device(int /*ncols*/) {
        return 56;
    }

    static constexpr __device__ int get_nbatch_combine_device(int /*ncols*/) {
        return 56;
    }
};

template <>
struct fattn_mma_f16_config<128> {
    static constexpr int  nbatch_fa      = 64;
    static constexpr int  nwarps_max     = 4;
    static constexpr bool Q_in_reg       = true;
    static constexpr int  nstages_target = 2;

    static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) {
        return 64;
    }

    static constexpr __device__ int get_nbatch_V2_device(int /*ncols*/) {
        return 64;
    }

    static constexpr __device__ int get_nbatch_combine_device(int /*ncols*/) {
        return 64;
    }
};

template <>
struct fattn_mma_f16_config<256> {
    static constexpr int  nbatch_fa      = 32;
    static constexpr int  nwarps_max     = 4;
    static constexpr bool Q_in_reg       = true;
    static constexpr int  nstages_target = 2;

    static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) {
        return 128;
    }

    static constexpr __device__ int get_nbatch_V2_device(int /*ncols*/) {
        return 128;
    }

    static constexpr __device__ int get_nbatch_combine_device(int ncols) {
#if __CUDA_ARCH__ == cuda_CC_TURING
        return ncols <= 16 ? 128 : 64;
#else
        CUDA_UNUSED(ncols);
        return 128;
#endif // __CUDA_ARCH__ == CUDA_CC_TURING
    }
};

static __device__ __forceinline__ unsigned int cuda_cvta_generic_to_shared(void * generic_ptr) {
    return __cvta_generic_to_shared(generic_ptr);
}

// Copies data from global to shared memory, cg == cache global.
// Both the src and dst pointers must be aligned to 16 bit.
// Shared memory uses 32 bit addressing, the pointer is passed as unsigned int.
// Generic pointers can be converted to 32 bit shared memory pointers using __cvta_generic_to_shared.
// Only the 16 bit copy is exposed because 4 and 8 bit copies did not yield performance improvements.
template <int preload>
static __device__ __forceinline__ void cp_async_cg_16(const unsigned int dst, const void * src) {
    static_assert(preload == 0 || preload == 64 || preload == 128 || preload == 256, "bad preload");
#if CUDART_VERSION >= 11040
    if (preload == 256) {
        asm volatile("cp.async.cg.shared.global.L2::256B [%0], [%1], 16;"
            : : "r"(dst), "l"(src));
    } else if (preload == 128) {
        asm volatile("cp.async.cg.shared.global.L2::128B [%0], [%1], 16;"
            : : "r"(dst), "l"(src));
    } else if (preload == 64) {
        asm volatile("cp.async.cg.shared.global.L2::64B [%0], [%1], 16;"
            : : "r"(dst), "l"(src));
    } else
#endif // CUDART_VERSION >= 11040
    {
        asm volatile("cp.async.cg.shared.global [%0], [%1], 16;"
            : : "r"(dst), "l"(src));
    }
}

// Makes each thread wait until its asynchronous data copies are done.
// This does NOT provide any additional synchronization.
// In particular, when copying data with multiple warps a call to __syncthreads will be needed.
static __device__ __forceinline__ void cp_async_wait_all() {
    asm volatile("cp.async.wait_all;");
}


template<int stride_tile, int nwarps, int nbatch_fa, int N>
static __device__ __forceinline__
void load_cp_async_once(const half2* __restrict__ KV,
                        unsigned int tile_KV_32,
                        int chunks_per_row, int stride_KV) {
    constexpr int preload       = 64;
    constexpr int h2_per_chunk  = 16 / sizeof(half2);
    constexpr int stride_k      = WARP_SIZE >> N;
    constexpr int stride_i      = WARP_SIZE / stride_k;

    const int k0_start = stride_k == WARP_SIZE ? 0 : chunks_per_row - chunks_per_row % (2*stride_k);
    const int k0_stop  =                             chunks_per_row - chunks_per_row % (1*stride_k);
    if (k0_start == k0_stop) return;

    #pragma unroll
    for (int i0 = 0; i0 < nbatch_fa; i0 += nwarps*stride_i) {
        const int i = i0 + threadIdx.y*stride_i + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k);
        if (i0 + nwarps*stride_i > nbatch_fa && i >= nbatch_fa) break;

        #pragma unroll
        for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) {
            const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k);
            cp_async_cg_16<preload>(tile_KV_32 + i*(stride_tile*sizeof(half2)) + k*16,
                                    KV + i*stride_KV + k*h2_per_chunk);
        }
    }
}

template<int stride_tile, int nwarps, int nbatch_fa, int N, int MAX>
static __device__ __forceinline__
void unroll_cp_async(const half2* __restrict__ KV, unsigned int tile_KV_32,
                     int chunks_per_row, int stride_KV) {
    if constexpr (N < MAX) {
        load_cp_async_once<stride_tile, nwarps, nbatch_fa, N>(KV, tile_KV_32, chunks_per_row, stride_KV);
        unroll_cp_async<stride_tile, nwarps, nbatch_fa, N+1, MAX>(KV, tile_KV_32, chunks_per_row, stride_KV);
    }
}

template<int stride_tile, int nwarps, int nbatch_fa, int N>
static __device__ __forceinline__
void load_sync_once(const half2* __restrict__ KV, half2* __restrict__ tile_KV,
                    int D2, int stride_KV) {
    constexpr int stride_k = WARP_SIZE >> N;
    constexpr int stride_i = WARP_SIZE / stride_k;

    const int k0_start = stride_k == WARP_SIZE ? 0 : D2 - D2 % (2*stride_k);
    const int k0_stop  =                             D2 - D2 % (1*stride_k);
    if (k0_start == k0_stop) return;

    #pragma unroll
    for (int i0 = 0; i0 < nbatch_fa; i0 += nwarps*stride_i) {
        const int i = i0 + threadIdx.y*stride_i + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k);
        if (i0 + nwarps*stride_i > nbatch_fa && i >= nbatch_fa) break;

        #pragma unroll
        for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) {
            const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k);
            tile_KV[i*stride_tile + k] = KV[i*stride_KV + k];
        }
    }
}

template<int stride_tile, int nwarps, int nbatch_fa, int N, int MAX>
static __device__ __forceinline__
void unroll_sync(const half2* __restrict__ KV, half2* __restrict__ tile_KV,
                 int D2, int stride_KV) {
    if constexpr (N < MAX) {
        load_sync_once<stride_tile, nwarps, nbatch_fa, N>(KV, tile_KV, D2, stride_KV);
        unroll_sync<stride_tile, nwarps, nbatch_fa, N+1, MAX>(KV, tile_KV, D2, stride_KV);
    }
}

template<int stride_tile, int nwarps, int nbatch_fa, bool use_cp_async>
static __device__ __forceinline__
void flash_attn_ext_f16_load_tile(const half2* __restrict__ KV,
                                  half2* __restrict__ tile_KV,
                                  int D2, int stride_KV) {
    if constexpr (use_cp_async) {
        const int  chunks_per_row  = D2 / (16 / sizeof(half2));
        const unsigned tile_KV_32  = cuda_cvta_generic_to_shared(tile_KV);
        unroll_cp_async<stride_tile, nwarps, nbatch_fa, 0, 5>(KV, tile_KV_32, chunks_per_row, stride_KV);
    } else {
        static_assert(nbatch_fa % (4*nwarps) == 0, "out of bounds");
        unroll_sync<stride_tile, nwarps, nbatch_fa, 0, 3>(KV, tile_KV, D2, stride_KV);
    }
}

template<int ncols1, int nwarps, int nbatch_fa, bool use_cp_async>
static __device__ __forceinline__ void flash_attn_ext_f16_load_mask(
        const half2 * const __restrict__ mask_h2, half2 * const __restrict__ tile_mask, const int stride_mask) {
    static_assert(nbatch_fa == 2*WARP_SIZE || WARP_SIZE % nbatch_fa == 0, "bad KQ_per_iter");

    if (use_cp_async) {
        constexpr int preload = nbatch_fa >= 32 ? nbatch_fa * sizeof(half) : 64;
        constexpr int cols_per_warp = 8*WARP_SIZE/nbatch_fa;
        constexpr int stride_j = nwarps * cols_per_warp;

        const unsigned int tile_mask_32 = cuda_cvta_generic_to_shared(tile_mask);

#pragma unroll
        for (int j0 = 0; j0 < ncols1; j0 += stride_j) {
            const int j = j0 + threadIdx.y*cols_per_warp +
                (nbatch_fa == 2*WARP_SIZE ? threadIdx.x / (WARP_SIZE/4) : threadIdx.x / (WARP_SIZE/cols_per_warp));

            if (j0 + stride_j > ncols1 && j >= ncols1) {
                break;
            }

            const int i = 4 * (threadIdx.x % (nbatch_fa/8));

            cp_async_cg_16<preload>(tile_mask_32 + j*(nbatch_fa*sizeof(half) + 16) + i*sizeof(half2), mask_h2 + j*stride_mask + i);
        }
        return;
    }

    constexpr int cols_per_warp = 2*WARP_SIZE/nbatch_fa;
    constexpr int stride_j = nwarps * cols_per_warp;
#pragma unroll
    for (int j0 = 0; j0 < ncols1; j0 += stride_j) {
        const int j = j0 + threadIdx.y*cols_per_warp + (nbatch_fa == 2*WARP_SIZE ? 0 : threadIdx.x / (WARP_SIZE/cols_per_warp));

        if (j0 + stride_j > ncols1 && j >= ncols1) {
            break;
        }

        const int i = nbatch_fa == 2*WARP_SIZE ? threadIdx.x : threadIdx.x % (WARP_SIZE/cols_per_warp);

        tile_mask[j*(nbatch_fa/2 + 4) + i] = mask_h2[j*stride_mask + i];
    }
}

template<int D, int ncols1, int ncols2, int nwarps, int ntiles,
    bool needs_fixup, bool is_fixup, bool last_iter>
static __device__ __forceinline__ void flash_attn_ext_f16_iter(
        const float2 * const __restrict__ Q_f2,
        const half2  * const __restrict__ K_h2,
        const half2  * const __restrict__ V_h2,
        const half2  * const __restrict__ mask_h2,
        float2       * const __restrict__ dstk,
        float2       * const __restrict__ dstk_fixup,
        const float scale,
        const int ne01,
        const int ne02,
        const int stride_K,
        const int stride_V,
        const int stride_mask,
        half2        * const __restrict__ tile_Q,
        half2        * const __restrict__ tile_K,
        half2        * const __restrict__ tile_V,
        half2        * const __restrict__ tile_mask,
        const tile_B * const __restrict__ Q_B,
        tile_C_VKQ   * const __restrict__ VKQ_C,
        float        * const __restrict__ KQ_max,
        float        * const __restrict__ KQ_rowsum,
        const int kb0) {
    typedef fattn_mma_f16_config<D> c;

#ifdef CP_ASYNC_AVAILABLE
    constexpr int nstages = c::nstages_target;
#else
    constexpr int nstages = 0;
#endif // CP_ASYNC_AVAILABLE

    constexpr int cols_per_warp   = ntiles * tile_B::I;
    constexpr int cols_per_thread = ntiles == 1 ? 2 : ntiles;
    constexpr int np              = nwarps * (cols_per_warp/ncols2) / ncols1; // Number of parallel CUDA warps per Q column.
    constexpr int ncols           = ncols1 * ncols2;
    constexpr int nbatch_K2       = c::get_nbatch_K2_device(ncols);
    constexpr int nbatch_V2       = c::get_nbatch_V2_device(ncols);

    constexpr int stride_tile_Q = D/2     + 4;
    constexpr int stride_tile_K = nbatch_K2 + 4;

    constexpr int stride_tile_V = nbatch_V2 + 4;

    const int k_VKQ_0 = kb0 * c::nbatch_fa;
    tile_C_KQ KQ_C[c::nbatch_fa/(np*tile_C_KQ::I) * ntiles];

    // Use wide variants of tiles if ntiles >= 2.
    tile_B_16     * Q_B_16   = (tile_B_16     *) Q_B;
    tile_C_VKQ_16 * VKQ_C_16 = (tile_C_VKQ_16 *) VKQ_C;
    tile_C_KQ_16  * KQ_C_16  = (tile_C_KQ_16  *) KQ_C;

    if constexpr (nstages > 1) {
        static_assert(nbatch_K2 == D/2, "batching not implemented for multi stage loading");
        constexpr bool use_cp_async = true;
        cp_async_wait_all();
        __syncthreads();
        flash_attn_ext_f16_load_tile<stride_tile_V, nwarps, c::nbatch_fa, use_cp_async>
            (V_h2 + int64_t(k_VKQ_0)*stride_V, tile_V, nbatch_V2, stride_V);
    } else {
        constexpr bool use_cp_async = nstages == 1;
        if (ncols2 > 1 || mask_h2) {
            flash_attn_ext_f16_load_mask<ncols1, nwarps, c::nbatch_fa, use_cp_async>(mask_h2 + k_VKQ_0/2, tile_mask, stride_mask);
        }
    }

#pragma unroll
    for (int k0_start = 0; k0_start < D/2; k0_start += nbatch_K2) {
        const int k0_stop = k0_start + nbatch_K2 < D/2 ? k0_start + nbatch_K2 : D/2;
        const int k0_diff = k0_stop - k0_start;

        if (nstages <= 1) {
            constexpr bool use_cp_async = nstages == 1;
            flash_attn_ext_f16_load_tile<stride_tile_K, nwarps, c::nbatch_fa, use_cp_async>
                (K_h2 + int64_t(k_VKQ_0)*stride_K + k0_start, tile_K, k0_diff, stride_K);
            if (use_cp_async) {
                cp_async_wait_all();
            }
            __syncthreads();
        }

        // Calculate tile of KQ:
        if constexpr (c::Q_in_reg) {
#pragma unroll
            for (int i_KQ_00 = 0; i_KQ_00 < c::nbatch_fa; i_KQ_00 += np*tile_A::I) {
                const int i_KQ_0 = i_KQ_00 + (threadIdx.y % np)*tile_A::I;
#pragma unroll
                for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += tile_A::J) {
                    tile_A K_A;
                    load_ldmatrix(K_A, tile_K + i_KQ_0*stride_tile_K + (k_KQ_0 - k0_start), stride_tile_K);
                    if (ntiles == 1) {
                        mma(KQ_C[i_KQ_00/(np*tile_A::I)], K_A, Q_B[k_KQ_0/tile_A::J]);
                    } else {
#pragma unroll
                        for (int t = 0; t < ntiles/2; ++t) {
                            // Wide version of KQ_C is column-major => swap A and B.
                            mma(KQ_C_16[i_KQ_00/(np*tile_A::I) * ntiles/2 + t], Q_B_16[k_KQ_0/tile_A::J * ntiles/2 + t], K_A);
                        }
                    }
                }
            }
        } else {
            static_assert(ntiles == 2, "ntiles != 2 not implemented");
#pragma unroll
            for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += tile_A::J) {
                load_ldmatrix(Q_B_16[0], tile_Q + (threadIdx.y / np)*(tile_B_16::I*stride_tile_Q) + k_KQ_0, stride_tile_Q);

#pragma unroll
                for (int i_KQ_00 = 0; i_KQ_00 < c::nbatch_fa; i_KQ_00 += np*tile_A::I) {
                    const int i_KQ_0 = i_KQ_00 + (threadIdx.y % np)*tile_A::I;

                    tile_A K_A;
                    load_ldmatrix(K_A, tile_K + i_KQ_0*stride_tile_K + (k_KQ_0 - k0_start), stride_tile_K);

                    // Wide version of KQ_C is column-major => swap A and B.
                    mma(KQ_C_16[i_KQ_00/(np*tile_A::I)], Q_B_16[0], K_A);
                }
            }
        }

        if (nstages <= 1) {
            __syncthreads(); // Only needed if tile_K == tile_V.
        }
    }

    float KQ_max_new[cols_per_thread];
#pragma unroll
    for (int col = 0; col < cols_per_thread; ++col) {
        KQ_max_new[col] = KQ_max[col];
    }
    float KQ_rowsum_add[cols_per_thread] = {0.0f};

    if (ntiles == 1) {
        if (ncols2 > 1 || mask_h2) {
#pragma unroll
            for (int i00 = 0; i00 < c::nbatch_fa; i00 += np*tile_C_KQ::I) {
                const int i0 = i00 + (threadIdx.y % np)*tile_C_KQ::I;
#pragma unroll
                for (int l = 0; l < tile_C_KQ::ne; ++l) {
                    const int i = i0 + tile_C_KQ::get_i(l);
                    const int j = ((threadIdx.y / np)*tile_C_KQ::J + tile_C_KQ::get_j(l)) / ncols2;

                    KQ_C[i00/(np*tile_C_KQ::I)].x[l] +=
                        __half2float(((const half *) tile_mask)[j*(c::nbatch_fa + 8) + i]);
                }
            }
        }

        // Calculate softmax for each KQ column using the current max. value.
        // The divisor is stored in KQ_rowsum and will be applied at the end.
        static_assert(c::nbatch_fa % (np*tile_C_KQ::I) == 0, "bad loop size");
#pragma unroll
        for (int k = 0; k < c::nbatch_fa/(np*tile_C_KQ::I); ++k) {
#pragma unroll
            for (int l = 0; l < tile_C_KQ::ne; ++l) {
                KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k].x[l]);
            }
        }

        // Values per KQ column are spread across 8 threads, does not need full warp reduce:
#pragma unroll
        for (int col = 0; col < cols_per_thread; ++col) {
#pragma unroll
            for (int offset = 16; offset >= 4; offset >>= 1) {
                KQ_max_new[col] = fmaxf(KQ_max_new[col], __shfl_xor_sync(0xFFFFFFFF, KQ_max_new[col], offset, WARP_SIZE));
            }
        }

        static_assert(c::nbatch_fa % (np*tile_C_KQ::I) == 0, "bad loop size");
#pragma unroll
        for (int k = 0; k < c::nbatch_fa/(np*tile_C_KQ::I); ++k) {
#pragma unroll
            for (int l = 0; l < tile_C_KQ::ne; ++l) {
                KQ_C[k].x[l] = expf(KQ_C[k].x[l] - KQ_max_new[l % 2]);

                KQ_rowsum_add[l % 2] += KQ_C[k].x[l];
            }
        }
    } else { // ntiles > 1
        if (ncols2 > 1 || mask_h2) {
#pragma unroll
            for (int i00 = 0; i00 < c::nbatch_fa; i00 += np*tile_C_KQ_16::J) {
                const int i0 = i00 + (threadIdx.y % np)*tile_C_KQ_16::J;
#pragma unroll
                for (int t = 0; t < ntiles/2; ++t) {
#pragma unroll
                    for (int l0 = 0; l0 < tile_C_KQ_16::ne; l0 += 2) {
                        const int i = (i0 + tile_C_KQ_16::get_j(l0)) / 2;
                        const int j = ((threadIdx.y / np)*cols_per_warp + t*tile_C_KQ_16::I + tile_C_KQ_16::get_i(l0)) / ncols2;

                        const float2 tmp = __half22float2(tile_mask[j*(c::nbatch_fa/2 + 4) + i]);
                        const int KQ_index = i00/(np*tile_C_KQ_16::J) * ntiles/2 + t;
                        KQ_C_16[KQ_index].x[l0 + 0] += tmp.x;
                        KQ_C_16[KQ_index].x[l0 + 1] += tmp.y;
                    }
                }
            }
        }

        // Calculate softmax for each KQ column using the current max. value.
        // The divisor is stored in KQ_rowsum and will be applied at the end.
        static_assert(c::nbatch_fa % (np*tile_C_KQ::I) == 0, "bad loop size");
#pragma unroll
        for (int k = 0; k < c::nbatch_fa/(np*tile_C_KQ_16::J); ++k) {
#pragma unroll
            for (int t = 0; t < ntiles/2; ++t) {
#pragma unroll
                for (int l = 0; l < tile_C_KQ_16::ne; ++l) {
                    const int KQ_index = 2*t + (l/2) % 2;
                    KQ_max_new[KQ_index] = fmaxf(KQ_max_new[KQ_index], KQ_C_16[k*ntiles/2 + t].x[l]);
                }
            }
        }

        // Values per KQ column are spread across 4 threads, does not need full warp reduce:
#pragma unroll
        for (int col = 0; col < cols_per_thread; ++col) {
#pragma unroll
            for (int offset = 2; offset >= 1; offset >>= 1) {
                KQ_max_new[col] = fmaxf(KQ_max_new[col], __shfl_xor_sync(0xFFFFFFFF, KQ_max_new[col], offset, WARP_SIZE));
            }
        }

        static_assert(c::nbatch_fa % (np*tile_C_KQ_16::J) == 0, "bad loop size");
#pragma unroll
        for (int k = 0; k < c::nbatch_fa/(np*tile_C_KQ_16::J); ++k) {
#pragma unroll
            for (int t = 0; t < ntiles/2; ++t) {
#pragma unroll
                for (int l = 0; l < tile_C_KQ_16::ne; ++l) {
                    const int KQ_index = 2*t + (l/2) % 2;

                    KQ_C_16[k*ntiles/2 + t].x[l] = expf(KQ_C_16[k*ntiles/2 + t].x[l] - KQ_max_new[KQ_index]);

                    KQ_rowsum_add[KQ_index] += KQ_C_16[k*ntiles/2 + t].x[l];
                }
            }
        }
    }

    {
        float KQ_max_scale[cols_per_thread];
#pragma unroll
        for (int col = 0; col < cols_per_thread; ++col) {
            const float KQ_max_diff = KQ_max[col] - KQ_max_new[col];
            KQ_max_scale[col] = expf(KQ_max_diff);
            KQ_max[col] = KQ_max_new[col];

            *((uint32_t *) &KQ_max_scale[col]) *= KQ_max_diff >= SOFTMAX_FTZ_THRESHOLD;

            // Scale previous KQ_rowsum to account for a potential increase in KQ_max:
            KQ_rowsum[col] = KQ_max_scale[col]*KQ_rowsum[col] + KQ_rowsum_add[col];
        }

        if (ntiles == 1) {
            const half2 KQ_max_scale_h2 = make_half2(KQ_max_scale[0], KQ_max_scale[1]);
#pragma unroll
            for (int i = 0; i < D/tile_C_VKQ::I; ++i) {
#pragma unroll
                for (int l = 0; l < tile_C_VKQ::ne; ++l) {
                    VKQ_C[i].x[l] *= KQ_max_scale_h2;
                }
            }
        } else {
#pragma unroll
            for (int col = 0; col < cols_per_thread; ++col) {
                const half2 KQ_max_scale_h2 = make_half2(KQ_max_scale[col], KQ_max_scale[col]);
#pragma unroll
                for (int i = 0; i < D/tile_C_VKQ_16::J; ++i) {
#pragma unroll
                    for (int l0 = 0; l0 < tile_C_VKQ_16::ne; l0 += 2) {
                        VKQ_C_16[i*ntiles/2 + col/2].x[l0 + col % 2] *= KQ_max_scale_h2;
                    }
                }
            }
        }
    }

    // Convert KQ C tiles into B tiles for VKQ calculation:
    tile_B B[c::nbatch_fa/(np*2*tile_B::J) * ntiles];
    tile_B_16 * B_16 = (tile_B_16 *) B;
    static_assert(c::nbatch_fa % (np*2*tile_B::J) == 0, "bad loop size");
    if (ntiles == 1) {
#pragma unroll
        for (int k = 0; k < c::nbatch_fa/(np*2*tile_B::J); ++k) {
            B[k] = get_transposed(get_half2(KQ_C[k]));
        }
    } else {
        for (int k = 0; k < c::nbatch_fa/(np*2*tile_B_16::J); ++k) {
#pragma unroll
            for (int t = 0; t < ntiles/2; ++t) {
                B_16[k*ntiles/2 + t] = get_half2(KQ_C_16[k*ntiles/2 + t]);
            }
        }
    }

    if (nstages > 1) {
        // Preload K tile for next iteration:
        constexpr bool use_cp_async = true;
        cp_async_wait_all();
        __syncthreads();
        if (!last_iter) {
            if (ncols2 > 1 || mask_h2) {
                flash_attn_ext_f16_load_mask<ncols1, nwarps, c::nbatch_fa, use_cp_async>
                    (mask_h2 + (k_VKQ_0 + c::nbatch_fa)/2, tile_mask, stride_mask);
            }
            flash_attn_ext_f16_load_tile<stride_tile_K, nwarps, c::nbatch_fa, use_cp_async>
                (K_h2 + int64_t(k_VKQ_0 + c::nbatch_fa)*stride_K, tile_K, nbatch_K2, stride_K);
        }
    }

    constexpr int reusable_cutoff = D;
#pragma unroll
    for (int i0_stop = D; i0_stop > 0; i0_stop -= 2*nbatch_V2) {
        const int i0_start = i0_stop - 2*nbatch_V2 > 0 ? i0_stop - 2*nbatch_V2 : 0;
        const int i0_diff  = i0_stop - i0_start;

        if (nstages <= 1 && i0_start < reusable_cutoff) {
            constexpr bool use_cp_async = nstages == 1;
            flash_attn_ext_f16_load_tile<stride_tile_V, nwarps, c::nbatch_fa, use_cp_async>
                (V_h2 + int64_t(k_VKQ_0)*stride_V + i0_start/2, tile_V, i0_diff/2, stride_V);
            if (use_cp_async) {
                cp_async_wait_all();
            }
            __syncthreads();
        }
        const half2 * tile_V_i = i0_start < reusable_cutoff ? tile_V : tile_V + (i0_start - reusable_cutoff)/2;

        // Calculate VKQ tile:
#pragma unroll
        for (int i_VKQ_0 = i0_start; i_VKQ_0 < i0_stop; i_VKQ_0 += tile_C_VKQ::I) {
            static_assert((c::nbatch_fa/2) % (np*tile_A::J) == 0, "bad loop size");
#pragma unroll
            for (int k00 = 0; k00 < c::nbatch_fa/2; k00 += np*tile_A::J) {
                const int k0 = k00 + (threadIdx.y % np)*tile_A::J;

                tile_A A;
                load_ldmatrix_trans(A, tile_V_i + 2*k0*stride_tile_V + (i_VKQ_0 - i0_start)/2, stride_tile_V);
                if (ntiles == 1) {
                    mma(VKQ_C[i_VKQ_0/tile_C_VKQ::I], A, B[k00/(np*tile_A::J)]);
                } else {
#pragma unroll
                    for (int t = 0; t < ntiles/2; ++t) {
                        // Wide version of VKQ_C is column-major => swap A and B.
                        mma(VKQ_C_16[i_VKQ_0/tile_C_VKQ::I * ntiles/2 + t], B_16[k00/(np*tile_A::J) * ntiles/2 + t], A);
                    }
                }
            }
        }

        if (nstages <= 1) {
            __syncthreads(); // Only needed if tile_K == tile_V.
        }
    }
}

template<int D, int ncols1, int ncols2, int nwarps, int ntiles, bool needs_fixup, bool is_fixup>
static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
        const float2 * const __restrict__ Q_f2,
        const half2  * const __restrict__ K_h2,
        const half2  * const __restrict__ V_h2,
        const half2  * const __restrict__ mask_h2,
        float2       * const __restrict__ dstk,
        float2       * const __restrict__ dstk_fixup,
        const float scale,
        const int ne01,
        const int ne02,
        const int stride_Q1,
        const int stride_Q2,
        const int stride_K,
        const int stride_V,
        const int stride_mask,
        const int jt,
        const int kb0_start,
        const int kb0_stop) {
    //In this kernel Q, K, V are matrices while i, j, k are matrix indices.

    typedef fattn_mma_f16_config<D> c;

#ifdef CP_ASYNC_AVAILABLE
    constexpr int nstages = c::nstages_target;
#else
    constexpr int nstages = 0;
#endif // CP_ASYNC_AVAILABLE

    constexpr int ncols           = ncols1 * ncols2;
    constexpr int cols_per_warp   = ntiles * tile_B::I;
    constexpr int cols_per_thread = ntiles == 1 ? 2 : ntiles;
    constexpr int np              = nwarps * (cols_per_warp/ncols2) / ncols1; // Number of parallel CUDA warps per Q column.
    constexpr int nbatch_K2       = c::get_nbatch_K2_device(ncols);
    constexpr int nbatch_V2       = c::get_nbatch_V2_device(ncols);

    static_assert(nwarps * (cols_per_warp/ncols2) % ncols1 == 0, "bad nwarps");

    constexpr int stride_tile_Q = D/2     + 4;
    constexpr int stride_tile_K = nbatch_K2 + 4;

    constexpr int stride_tile_V = nbatch_V2 + 4;
    constexpr int stride_tile_KV_max = stride_tile_K > stride_tile_V ? stride_tile_K : stride_tile_V;

    extern __shared__ half2 tile_Q[];
    half2 * tile_K    = c::Q_in_reg ? tile_Q                                : tile_Q + ncols        * stride_tile_Q;
    half2 * tile_V    = nstages > 1 ? tile_K + c::nbatch_fa * stride_tile_K : tile_K;
    half2 * tile_mask = nstages > 1 ? tile_V + c::nbatch_fa * stride_tile_V : tile_V + c::nbatch_fa * stride_tile_KV_max;

    tile_B       Q_B[(c::Q_in_reg ? D/(2*tile_B::J) : 1) * ntiles];
    tile_C_VKQ VKQ_C[D/tile_C_VKQ::I  * ntiles];

    tile_B_16     * Q_B_16   = (tile_B_16     *) Q_B;
    tile_C_VKQ_16 * VKQ_C_16 = (tile_C_VKQ_16 *) VKQ_C;

    float KQ_rowsum[cols_per_thread] = {0.0f};
    float KQ_max[cols_per_thread];
#pragma unroll
    for (int col = 0; col < cols_per_thread; ++col) {
        KQ_max[col] = -FLT_MAX/2.0f;
    }

    // Load Q data into tile_Q, either temporarily or permanently.
    // Q in registers is faster, but register pressure is the biggest bottleneck.
    // The loading is done with decreasing granularity for D for better memory bandwidth.
    const half2 scale_h2 = make_half2(scale, scale);
#pragma unroll
    for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) {
        const int k0_start  = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k);
        const int k0_stop   =                             D/2 - (D/2) % (1*stride_k);
        const int stride_jc = WARP_SIZE / stride_k;

        if (k0_start == k0_stop) {
            continue;
        }

#pragma unroll
        for (int jc0 = 0; jc0 < ncols; jc0 += nwarps*stride_jc) {
            const int jc = jc0 + threadIdx.y*stride_jc + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k);

            if (jc0 + nwarps*stride_jc > ncols && jc >= ncols) {
                break;
            }

            const int j = jc / ncols2;
            const int c = jc % ncols2;

            if (jt*ncols1 + j < ne01) {
#pragma unroll
                for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) {
                    const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k);

                    const float2 tmp = Q_f2[(jt*ncols1 + j)*stride_Q1 + c*stride_Q2 + k];
                    tile_Q[jc*stride_tile_Q + k] = scale_h2 * make_half2(tmp.x, tmp.y);
                }
            } else {
#pragma unroll
                for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) {
                    const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k);

                    tile_Q[jc*stride_tile_Q + k] = make_half2(0.0f, 0.0f);
                }
            }
        }
    }

    __syncthreads();

    if (c::Q_in_reg) {
        const int j0 = (threadIdx.y / np) * cols_per_warp;

#pragma unroll
        for (int k0 = 0; k0 < D/2; k0 += tile_B::J) {
            if (ntiles == 1) {
                load_ldmatrix(Q_B[k0/tile_B::J], tile_Q + j0*stride_tile_Q + k0, stride_tile_Q);
            } else {
#pragma unroll
                for (int t = 0; t < ntiles/2; ++t) {
                    load_ldmatrix(Q_B_16[k0/tile_B_16::J * ntiles/2 + t],
                        tile_Q + (j0 + t*tile_B_16::I)*stride_tile_Q + k0, stride_tile_Q);
                }
            }
        }
    }

    __syncthreads();

    // Preload mask and K data for first iteration when using cp_async with multiple stages:
    if constexpr (nstages > 1) {
        static_assert(nbatch_K2 == D/2, "batching not implemented for multi-stage pipeline");
        constexpr bool use_cp_async = true;
        if (ncols2 > 1 || mask_h2) {
            flash_attn_ext_f16_load_mask<ncols1, nwarps, c::nbatch_fa, use_cp_async>
                (mask_h2 + kb0_start*c::nbatch_fa/2, tile_mask, stride_mask);
        }
        flash_attn_ext_f16_load_tile<stride_tile_K, nwarps, c::nbatch_fa, use_cp_async>
            (K_h2 + int64_t(kb0_start)*c::nbatch_fa*stride_K, tile_K, nbatch_K2, stride_K);
    }

    // Iterate over ne11 == previous tokens:
    int kb0 = kb0_start;
    for (; kb0 < kb0_stop-1; ++kb0) {
        constexpr bool last_iter = false;
        flash_attn_ext_f16_iter<D, ncols1, ncols2, nwarps, ntiles, needs_fixup, is_fixup, last_iter>
            (Q_f2, K_h2, V_h2, mask_h2, dstk, dstk_fixup, scale,
             ne01, ne02, stride_K, stride_V, stride_mask, tile_Q, tile_K, tile_V, tile_mask, Q_B, VKQ_C, KQ_max, KQ_rowsum, kb0);
    }
    { // kb0_start is always < kb0_stop so the last iter can be executed unconditionally.
        constexpr bool last_iter = true;
        flash_attn_ext_f16_iter<D, ncols1, ncols2, nwarps, ntiles, needs_fixup, is_fixup, last_iter>
            (Q_f2, K_h2, V_h2, mask_h2, dstk, dstk_fixup, scale,
             ne01, ne02, stride_K, stride_V, stride_mask, tile_Q, tile_K, tile_V, tile_mask, Q_B, VKQ_C, KQ_max, KQ_rowsum, kb0);
    }

    // With multi-stage loading there is no __syncthreads at the end of the iter,
    //     there can be a race condition on shared memory access for combining/writing back results.
    if (nstages > 1 && nwarps*cols_per_warp > c::nbatch_fa) {
        __syncthreads();
    }

    // Finally, sum up partial KQ rowsums.
    // The partial sums are spread across 8/4 threads each, does not need full reduce.
    {
        constexpr int offset_first = ntiles == 1 ? 16 : 2;
        constexpr int offset_last  = ntiles == 1 ?  4 : 1;
#pragma unroll
        for (int col = 0; col < cols_per_thread; ++col) {
#pragma unroll
            for (int offset = offset_first; offset >= offset_last; offset >>= 1) {
                KQ_rowsum[col] += __shfl_xor_sync(0xFFFFFFFF, KQ_rowsum[col], offset, WARP_SIZE);
            }
        }
    }

    // Combine VKQ accumulator values if np > 1.
    // It's also faster to do small writes to shared memory, then large write to VRAM than to do small writes to VRAM.
    // So also write VKQ accumulators to shared memory in column-major format if np == 1.

    constexpr int nbatch_combine = c::get_nbatch_combine_device(ncols);
    constexpr int tile_stride    = nbatch_combine + 4;
    static_assert((D/2) % nbatch_combine == 0, "bad nbatch_combine");

    if constexpr (ntiles == 1) {
        const int jc_cwmo = (threadIdx.x % (2*tile_C_VKQ::J)) / tile_C_VKQ::J; // jc combine write meta offset
        const int jc_cwm = threadIdx.y*(2*tile_C_VKQ::J) + 2*tile_C_VKQ::get_j(-1) + jc_cwmo; // jc combine write meta
        const float2 KQ_cmr = make_float2(KQ_max[jc_cwmo], KQ_rowsum[jc_cwmo]); // KQ combine max rowsum

        if (((!needs_fixup && !is_fixup) || np > 1) && threadIdx.x < 2*tile_C_VKQ::J) {
            // Use the 16 bytes of padding in each row to store the meta data: KQ max, KQ rowsum, KQ max scale.
            ((float2 *) tile_Q)[jc_cwm*(tile_stride/2) + nbatch_combine/2] = KQ_cmr;
        }

        __syncthreads();

        if (np == 1) {
            // No combination is needed, the meta data can be directly written from registers to VRAM.
            if (needs_fixup && threadIdx.x < tile_B::I) {
                float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols;
                dstk_fixup_meta[jc_cwm] = KQ_cmr;
            }
            if (is_fixup && threadIdx.x < tile_B::I) {
                float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols;
                dstk_fixup_meta[jc_cwm] = KQ_cmr;
            }
        }
    } else {
        static_assert(ntiles == 2 || ntiles == 4, "bad ntiles");
        const int jc_cwm = threadIdx.y*cols_per_warp // jc combine write meta
            + (ntiles == 4 ? ((threadIdx.x % 4) / 2) * tile_C_VKQ_16::I : 0)
            + tile_C_VKQ_16::get_i(threadIdx.x % 4);
        const float2 KQ_cmr = make_float2(KQ_max[threadIdx.x % cols_per_thread], KQ_rowsum[threadIdx.x % cols_per_thread]); // KQ combine max rowsum

        if (((!needs_fixup && !is_fixup) || np > 1) && (ntiles == 4 || threadIdx.x % 4 < cols_per_thread)) {
            // Use the 16 bytes of padding in each row to store the meta data: KQ max, KQ rowsum, KQ max scale.
            ((float2 *) tile_Q)[jc_cwm*(tile_stride/2) + nbatch_combine/2] = KQ_cmr;
        }

        __syncthreads();

        if (np == 1) {
            // No combination is needed, the meta data can be directly written from registers to VRAM.
            if (needs_fixup && (ntiles == 4 || threadIdx.x % 4 < ntiles)) {
                float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols;
                dstk_fixup_meta[jc_cwm] = KQ_cmr;
            }
            if (is_fixup && (ntiles == 4 || threadIdx.x % 4 < ntiles)) {
                float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols;
                dstk_fixup_meta[jc_cwm] = KQ_cmr;
            }
        }
    }

    static_assert(np == 1 || ntiles == 1 || ntiles == 2, "bad ntiles");
    if (np > 1 && threadIdx.y % np == 0) {
        // Combine the meta data for parallel warps via shared memory.
        // Warps with threadIdx.y % np != 0 must NOT return early.
        // All threads must return simultaneously to avoid race conditions with work on the next tile.

        constexpr int nmeta = np*cols_per_warp >= WARP_SIZE ? np*cols_per_warp/WARP_SIZE : 1;

        const int jc_meta = threadIdx.y*cols_per_warp + (np*cols_per_warp < WARP_SIZE ? threadIdx.x % (np*cols_per_warp) : threadIdx.x);
        float2 * const meta_ptr = ((float2 *) tile_Q) + jc_meta*(tile_stride/2) + nbatch_combine/2;
        float2 meta[nmeta];
#pragma unroll
        for (int imeta = 0; imeta < nmeta; ++imeta) {
            meta[imeta] = meta_ptr[imeta * WARP_SIZE * tile_stride/2];
        }

        float KQ_cmn = meta[0].x; // KQ combine max new, max between all parallel warps.
#pragma unroll
        for (int imeta = 1; imeta < nmeta; ++imeta) {
            KQ_cmn = fmaxf(KQ_cmn, meta[imeta].x);
        }
#pragma unroll
        for (int offset = np*cols_per_warp/2; offset >= cols_per_warp; offset >>= 1) {
            if (offset < WARP_SIZE) {
                KQ_cmn = fmaxf(KQ_cmn, __shfl_xor_sync(0xFFFFFFFF, KQ_cmn, offset, WARP_SIZE));
            }
        }

        float KQ_cms[nmeta]; // KQ combine max scale per warp.
#pragma unroll
        for (int imeta = 0; imeta < nmeta; ++imeta) {
            KQ_cms[imeta] = expf(meta[imeta].x - KQ_cmn);
        }

        float KQ_crs = KQ_cms[0]*meta[0].y; // KQ combine rowsum, scaled sum of all parallel warps.
#pragma unroll
        for (int imeta = 1; imeta < nmeta; ++imeta) {
            KQ_crs += KQ_cms[imeta]*meta[imeta].y;
        }
#pragma unroll
        for (int offset = np*cols_per_warp/2; offset >= cols_per_warp; offset >>= 1) {
            if (offset < WARP_SIZE) {
                KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE);
            }
        }

        __syncthreads();

        // Write back combined meta data:
#pragma unroll
        for (int imeta = 0; imeta < nmeta; ++imeta) {
            if (np*cols_per_warp >= WARP_SIZE || threadIdx.x < np*cols_per_warp) {
                // Combined KQ max scale + rowsum.
                meta_ptr[imeta * WARP_SIZE * tile_stride/2] = make_float2(KQ_cms[imeta], KQ_crs);
            }
        }

        // Combined KQ max + rowsum.
        static_assert(cols_per_warp <= WARP_SIZE);
        if (needs_fixup && (cols_per_warp == WARP_SIZE || threadIdx.x < cols_per_warp)) {
            float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols;
            dstk_fixup_meta[(threadIdx.y/np)*cols_per_warp + threadIdx.x] = make_float2(KQ_cmn, KQ_crs);
        }
        if (is_fixup && (cols_per_warp == WARP_SIZE || threadIdx.x < cols_per_warp)) {
            float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols;
            dstk_fixup_meta[(threadIdx.y/np)*cols_per_warp + threadIdx.x] = make_float2(KQ_cmn, KQ_crs);
        }
    } else if (np > 1) {
        // Warps with threadIdx.y % np == 0 execute a __syncthreads() in the if branch.
        // Therefore, all other warps also need to execute a __syncthreads().
        // Otherwise the points at which warps synchronize with each other would become misaligned.
        __syncthreads();
    }

#pragma unroll
    for (int k00 = 0; k00 < D/2; k00 += nbatch_combine) {
        if (ntiles == 1) {
            const int jc_cwd = threadIdx.y*tile_B::I + tile_B::get_i(-1); // jc combine write data
#pragma unroll
            for (int k0 = 0; k0 < nbatch_combine; k0 += tile_B::J) {
                const tile_B B = get_transposed(VKQ_C[(k00 + k0)/tile_B::J]); // Conversion of C to B matrix puts it in column-major format.

#pragma unroll
                for (int l = 0; l < tile_B::ne; ++l) {
                    const int k = k0 + tile_B::get_j(l);

                    tile_Q[jc_cwd*tile_stride + k] = B.x[l];
                }
            }
        } else {
#pragma unroll
            for (int t = 0; t < ntiles/2; ++t) {
                const int j0 = threadIdx.y*cols_per_warp + t*tile_C_VKQ_16::I;
#pragma unroll
                for (int k0 = 0; k0 < nbatch_combine; k0 += tile_C_VKQ_16::J) {
#pragma unroll
                    for (int l = 0; l < tile_C_VKQ_16::ne; ++l) {
                        const int j = j0 + tile_C_VKQ_16::get_i(l);
                        const int k = k0 + tile_C_VKQ_16::get_j(l);

                        tile_Q[j*tile_stride + k] = VKQ_C_16[(k00 + k0)/tile_C_VKQ_16::J * ntiles/2 + t].x[l];
                    }
                }
            }
        }

        __syncthreads();

        if (np == 1 || threadIdx.y % np == 0) {
            // The first 2*2*gridDim.x*ncols floats in dstk_fixup are for storing max. values and row sums.
            // The values after that are for the partial results of the individual blocks.
            float2 * dstk_fixup_data = dstk_fixup + gridDim.x*(2*ncols) + blockIdx.x*(ncols*(D/2));

#pragma unroll
            for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) {
                const int k0_start  = stride_k == WARP_SIZE ? 0 : nbatch_combine - nbatch_combine % (2*stride_k);
                const int k0_stop   =                             nbatch_combine - nbatch_combine % (1*stride_k);
                const int stride_jc = WARP_SIZE / stride_k;

                if (k0_start == k0_stop) {
                    continue;
                }

#pragma unroll
                for (int jc0_dst = 0; jc0_dst < ncols; jc0_dst += (nwarps/np)*stride_jc) {
                    const int jc_dst = jc0_dst + (threadIdx.y/np)*stride_jc + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k);

                    if (jc0_dst + (nwarps/np)*stride_jc > ncols && jc_dst >= ncols) {
                        break;
                    }

                    const int jc_tile_K = (jc_dst/cols_per_warp)*(np*cols_per_warp) + jc_dst % cols_per_warp;

                    const int j_dst = jc_dst / ncols2;
                    const int c_dst = jc_dst % ncols2;

                    if (!is_fixup && jt*ncols1 + j_dst >= ne01) {
                        continue;
                    }

                    const float * meta_j = (const float *) tile_Q + jc_tile_K*tile_stride + nbatch_combine;
#pragma unroll
                    for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) {
                        const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k);

                        float2 dstk_val = make_float2(0.0f, 0.0f);
#pragma unroll
                        for (int ip = 0; ip < np; ++ip) {
                            const float KQ_crs = np == 1 ? 1.0f : meta_j[ip*cols_per_warp * tile_stride + 0];
                            const float2 dstk_val_add = __half22float2(tile_Q[(jc_tile_K + ip*cols_per_warp) * tile_stride + k]);
                            dstk_val.x += dstk_val_add.x*KQ_crs;
                            dstk_val.y += dstk_val_add.y*KQ_crs;
                        }

                        if (!needs_fixup && !is_fixup) {
                            const float KQ_rowsum_j = meta_j[1];
                            dstk_val.x /= KQ_rowsum_j;
                            dstk_val.y /= KQ_rowsum_j;
                        }

                        if (is_fixup) {
                            dstk_fixup_data[jc_dst*(D/2) + k00 + k] = dstk_val;
                        } else {
                            dstk[((c_dst * ne01) + (jt*ncols1 + j_dst))*(D/2) + k00 + k] = dstk_val;
                        }
                    }
                }
            }
        }
        if (np > 1) {
            __syncthreads();
        }
    }
}


template<int D, int ncols1, int ncols2, int nwarps, int ntiles>
__launch_bounds__(nwarps*WARP_SIZE, 1)
static __global__ void flash_attn_ext_f16(
        const char * __restrict__ Q,
        const char * __restrict__ K,
        const char * __restrict__ V,
        const char * __restrict__ mask,
        const int  * __restrict__ KV_max,
        float      * __restrict__ dst,
        float2     * __restrict__ dst_meta,
        const float scale,
        const int32_t ne00, const int32_t ne01, const int32_t ne02, const int32_t ne03,
                            const int32_t nb01, const int32_t nb02, const int32_t nb03,
        const int32_t ne10, const int32_t ne11, const int32_t ne12, const int32_t ne13,
                            const int32_t nb11, const int32_t nb12, const int32_t nb13,
                            const int32_t nb21, const int32_t nb22, const int32_t nb23,
                            const int32_t ne31, const int32_t ne32, const int32_t ne33,
                            const int32_t nb31, const int32_t nb32, const int32_t nb33) {
    typedef fattn_mma_f16_config<D> c;

    const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix.

    const int stride_Q1   = nb01 / sizeof(float2);
    const int stride_Q2   = nb02 / sizeof(float2);
    const int stride_K    = nb11 / sizeof(half2);
    const int stride_mask = nb31 / sizeof(half2);

    const int stride_V = nb21 / sizeof(half2);

    const int iter_k = ne11 / FATTN_KQ_STRIDE;
    const int iter_j = (ne01 + (ncols1 - 1)) / ncols1;

    constexpr int kb_niter = FATTN_KQ_STRIDE / c::nbatch_fa; // Number of kernel iterations per assigned KQ slice.

    // kbc == k block continuous, current index in continuous ijk space.
    int       kbc      = (blockIdx.x + 0)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x;
    const int kbc_stop = (blockIdx.x + 1)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x;

    // If the seams of 2 CUDA blocks fall within an output tile their results need to be combined.
    // For this we need to track both the block that starts the tile (needs_fixup) and the block that finishes the tile (is_fixup).
    // In the most general case >2 seams can fall into the same tile.

    // kb0 == k start index when in the output tile.
    int kb0_start = kbc % iter_k;
    int kb0_stop  = min(iter_k, kb0_start + kbc_stop - kbc);

    while (kbc < kbc_stop && kb0_stop == iter_k) {
        const int sequence = kbc / (iter_k*iter_j*(ne02/ncols2));
        const int zt = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence) / (iter_k*iter_j); // head in units of ncols2
        const int jt = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence - iter_k*iter_j*zt) / iter_k; // j index of current tile.

        const int head0 = zt * ncols2;

        const float2 * Q_f2    = (const float2 *) (Q + nb03*sequence + nb02* head0);
        const half2  * K_h2    = (const half2  *) (K + nb13*sequence + nb12*(head0 / gqa_ratio));
        const half2  * mask_h2 = ncols2 == 1 && !mask ? nullptr :
            (const half2  *) (mask + nb33*(sequence % ne33) + nb31*jt*ncols1);
        float2       * dstk    = ((float2 *) dst) + ((sequence*ne02 + head0) * ne01) * (D/2);

        const half2 * V_h2 = (const half2 *) (V + nb23*sequence + nb22*(head0 / gqa_ratio));

        const int kb0_start_kernel = kb0_start * kb_niter;
        int       kb0_stop_kernel  = kb0_stop  * kb_niter;

        if (KV_max) {
            kb0_stop_kernel = min(kb0_stop_kernel, KV_max[sequence*iter_j + jt] / c::nbatch_fa);
        }

        constexpr bool is_fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer.
        if (kb0_start == 0) {
            constexpr bool needs_fixup = false; // CUDA block is working on an entire tile.
            flash_attn_ext_f16_process_tile<D, ncols1, ncols2, nwarps, ntiles, needs_fixup, is_fixup>
                (Q_f2, K_h2, V_h2, mask_h2, dstk, dst_meta, scale,
                 ne01, ne02, stride_Q1, stride_Q2, stride_K, stride_V, stride_mask, jt, kb0_start_kernel, kb0_stop_kernel);
        } else {
            constexpr bool needs_fixup = true; // CUDA block is working on the beginning of a tile.
            flash_attn_ext_f16_process_tile<D, ncols1, ncols2, nwarps, ntiles, needs_fixup, is_fixup>
                (Q_f2, K_h2, V_h2, mask_h2, dstk, dst_meta, scale,
                 ne01, ne02, stride_Q1, stride_Q2, stride_K, stride_V, stride_mask, jt, kb0_start_kernel, kb0_stop_kernel);
        }

        kbc += iter_k;
        kbc -= kbc % iter_k;

        kb0_start = 0;
        kb0_stop  = min(iter_k, kbc_stop - kbc);
    }

    if (kbc >= kbc_stop) {
        return;
    }

    const int sequence = kbc / (iter_k*iter_j*(ne02/ncols2));
    const int zt = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence) / (iter_k*iter_j); // head in units of ncols2
    const int jt = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence - iter_k*iter_j*zt) / iter_k; // j index of current tile.

    const int head0 = zt * ncols2;

    const float2 * Q_f2    = (const float2 *) (Q + nb03*sequence + nb02* head0);
    const half2  * K_h2    = (const half2  *) (K + nb13*sequence + nb12*(head0 / gqa_ratio));
    const half2  * mask_h2 = ncols2 == 1 && !mask ? nullptr :
        (const half2  *) (mask + nb33*(sequence % ne33) + nb31*jt*ncols1);
    float2       * dstk    = ((float2 *) dst) + ((sequence*ne02 + head0) * ne01) * (D/2);

    const half2 * V_h2 = (const half2 *) (V + nb23*sequence + nb22*(head0 / gqa_ratio));

    const int kb0_start_kernel = kb0_start * kb_niter;
    int       kb0_stop_kernel  = kb0_stop  * kb_niter;

    if (KV_max) {
        kb0_stop_kernel = min(kb0_stop_kernel, KV_max[sequence*iter_j + jt] / c::nbatch_fa);
    }

    constexpr bool is_fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks.
    constexpr bool needs_fixup = false;
    flash_attn_ext_f16_process_tile<D, ncols1, ncols2, nwarps, ntiles, needs_fixup, is_fixup>
        (Q_f2, K_h2, V_h2, mask_h2, dstk, dst_meta, scale,
         ne01, ne02, stride_Q1, stride_Q2, stride_K, stride_V, stride_mask, jt, kb0_start_kernel, kb0_stop_kernel);
}

template<int D, int ncols1, int ncols2> // D == head size
static __device__ void flash_attn_stream_k_fixup(
        float * __restrict__ dst, const float2 * __restrict__ dst_fixup, const int32_t ne01, const int32_t ne02, const int32_t ne03, const int32_t ne11) {
    constexpr int ncols = ncols1*ncols2;

    const int bidx0 = blockIdx.x;
    const int j     = blockIdx.y;
    const int c     = blockIdx.z;
    const int jc    = j*ncols2 + c;
    const int tid   = threadIdx.x;

    const float * dst_fixup_data = ((const float *) dst_fixup) + gridDim.x*(2*2*ncols);

    const int iter_k = ne11 / FATTN_KQ_STRIDE;
    const int iter_j = (ne01 + (ncols1 - 1)) / ncols1;

    const int kbc0      = (bidx0 + 0)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x;
    const int kbc0_stop = (bidx0 + 1)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x;

    const bool did_not_have_any_data   = kbc0 == kbc0_stop;
    const bool wrote_beginning_of_tile = kbc0 % iter_k == 0;
    const bool did_not_write_last      = kbc0/iter_k == kbc0_stop/iter_k && kbc0_stop % iter_k != 0;
    if (did_not_have_any_data || wrote_beginning_of_tile || did_not_write_last) {
        return;
    }

    const int sequence = kbc0 / (iter_k*iter_j*(ne02/ncols2));
    const int head = (kbc0 - iter_k*iter_j*(ne02/ncols2)*sequence) / (iter_k*iter_j);
    const int jt = (kbc0 - iter_k*iter_j*(ne02/ncols2)*sequence - iter_k*iter_j*head) / iter_k; // j index of current tile.

    if (jt*ncols1 + j >= ne01) {
        return;
    }

    dst += sequence*ne02*ne01*D
        + (head*ncols2 + c)*ne01*D
        + (jt*ncols1 + j)*D
        + tid;
    // Load the partial result that needs a fixup:
    float dst_val = 0.0f;
    float max_val = 0.0f;
    float rowsum  = 0.0f;
    {
        dst_val = *dst;

        const float2 tmp = dst_fixup[bidx0*ncols + jc];
        max_val = tmp.x;
        rowsum  = tmp.y;
    }

    // Iterate over previous blocks and compute the combined results.
    // All CUDA blocks that get here must have a previous block that needs a fixup.
    int bidx = bidx0 - 1;
    int kbc_stop = kbc0;
    while(true) {
        const int kbc = bidx*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x;
        if (kbc == kbc_stop) { // Did not have any data.
            bidx--;
            kbc_stop = kbc;
            continue;
        }

        const float dst_add = dst_fixup_data[bidx*ncols*D + jc*D + tid];

        const float2 tmp = dst_fixup[(gridDim.x + bidx)*ncols + jc];

        // Scale the current and new value accumulators depending on the max. values.
        const float max_val_new = fmaxf(max_val, tmp.x);

        const float diff_val = max_val - max_val_new;
        const float diff_add = tmp.x   - max_val_new;

        const float scale_val = diff_val >= SOFTMAX_FTZ_THRESHOLD ? expf(diff_val) : 0.0f;
        const float scale_add = diff_add >= SOFTMAX_FTZ_THRESHOLD ? expf(diff_add) : 0.0f;

        dst_val = scale_val*dst_val + scale_add*dst_add;
        rowsum  = scale_val*rowsum  + scale_add*tmp.y;

        max_val = max_val_new;

        // If this block started in a previous tile we are done and don't need to combine additional partial results.
        if (kbc % iter_k == 0 || kbc/iter_k < kbc0/iter_k) {
            break;
        }
        bidx--;
        kbc_stop = kbc;
    }

    // Write back final result:
    *dst = dst_val / rowsum;
}

#define INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, ncols, ncols2) \
    extern "C" {                                                                                                   \
        __global__ void flash_attn_ext_f16_##D##_##ncols##_##ncols2(                                               \
        const char * __restrict__ Q,                                                                               \
        const char * __restrict__ K,                                                                               \
        const char * __restrict__ V,                                                                               \
        const char * __restrict__ mask,                                                                            \
        const int  * __restrict__ KV_max,                                                                          \
        float      * __restrict__ dst,                                                                             \
        float2     * __restrict__ dst_meta,                                                                        \
        const float scale,                                                                                         \
        const int32_t ne03, const int32_t ne02, const int32_t ne01, const int32_t ne00,                            \
                            const int32_t nb03, const int32_t nb02, const int32_t nb01,                            \
        const int32_t ne13, const int32_t ne12, const int32_t ne11, const int32_t ne10,                            \
                            const int32_t nb13, const int32_t nb12, const int32_t nb11,                            \
                            const int32_t nb23, const int32_t nb22, const int32_t nb21,                            \
                            const int32_t ne33, const int32_t ne32, const int32_t ne31,                            \
                            const int32_t nb33, const int32_t nb32, const int32_t nb31)    { \
            typedef fattn_mma_f16_config<D> c; \
 \
            constexpr int ncols1         = ncols / ncols2; \
            constexpr int ntiles        = ncols <= 8 ? 1 : 2; \
            constexpr int cols_per_warp = ntiles * tile_B::I; \
            constexpr int nwarps_max_x  = ncols / cols_per_warp; \
            constexpr int nwarps_max_y  = c::nbatch_fa / tile_A::I; \
            constexpr int nwarps        = nwarps_max_x*nwarps_max_y <= c::nwarps_max ? nwarps_max_x*nwarps_max_y : c::nwarps_max; \
            flash_attn_ext_f16<D, ncols1, ncols2, nwarps, ntiles>(                                   \
                            Q, K, V, mask, KV_max, dst, dst_meta, scale,                     \
                            ne00, ne01, ne02, ne03,                                          \
                            nb01, nb02, nb03,                                                \
                            ne10, ne11, ne12, ne13,                                          \
                            nb11, nb12, nb13,                                                \
                            nb21, nb22, nb23,                                                \
                            ne31, ne32, ne33,                                                \
                            nb31, nb32, nb33);                                               \
        }                                                                                    \
                                                                                             \
        __launch_bounds__(D, 1)                                                              \
        __global__ void flash_attn_stream_k_fixup_##D##_##ncols##_##ncols2(                  \
            float * __restrict__ dst, const float2 * __restrict__ dst_fixup,                 \
            const int32_t ne03, const int32_t ne02, const int32_t ne01, const int32_t ne11){ \
            constexpr int ncols1         = ncols / ncols2;                                   \
            flash_attn_stream_k_fixup<D, ncols1, ncols2>(                                    \
                dst, dst_fixup, ne01, ne02, ne03, ne11);                                     \
            }                                                                                \
    }

#define INSTANTIATE_FLASH_ATTN_FIXUP_FOR_NCOLS(D, ncols, ncols2)                      \
extern "C" {                                                                            \
                                                                             \
    }

#define INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(ncols1)              \
extern "C" {                                                                  \
    __launch_bounds__(FATTN_KQ_STRIDE/2, 1)                                   \
    __global__ void flash_attn_mask_to_KV_max_##ncols1(                       \
        const half2 * __restrict__ mask, int * __restrict__ KV_max,           \
        const int ne30, const int s31, const int s33) {                       \
            flash_attn_mask_to_KV_max<ncols1>(mask, KV_max, ne30, s31, s33);  \
        }                                                                     \
}

#define INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS(D, ncols) \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, ncols , 1)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, ncols , 2)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, ncols , 4)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, ncols , 8)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, ncols , 16)  \

#define INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(D)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, 8 , 1)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, 8 , 2)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, 8 , 4)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS2(D, 8 , 8)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS(D, 16)     \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS(D, 32)     \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_NCOLS(D, 64)     \

#define INSTANTIATE_FLASH_ATTN_MMA_F16()      \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(64)  \
    INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(128) \
    //INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(80)  \
    //INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(96)  \
    //INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(112) \
    //INSTANTIATE_FLASH_ATTN_MMA_F16_FOR_D(256) \

INSTANTIATE_FLASH_ATTN_MMA_F16()

/*----------------------------------------------------------------------------------------------------------------------*/
template <int ncols1>
static __device__ void flash_attn_mask_to_KV_max(
        const half2 * __restrict__ mask, int * __restrict__ KV_max, const int32_t ne30, const int32_t s31, const int32_t s33) {
    const int ne31     = gridDim.x;
    const int tid      = threadIdx.x;
    const int sequence = blockIdx.y;
    const int jt       = blockIdx.x;

    // For batched mask support: mask += sequence*s33 + jt*ncols1*s31;
    mask += jt*ncols1*s31;

    __shared__ int buf_iw[WARP_SIZE];
    if (tid < WARP_SIZE) {
        buf_iw[tid] = 1;
    }
    __syncthreads();

    int KV_max_sj = (ne30 - 1) * FATTN_KQ_STRIDE;
    for (; KV_max_sj >= 0; KV_max_sj -= FATTN_KQ_STRIDE) {
        int all_inf = 1;

#pragma unroll
        for (int j = 0; j < ncols1; ++j) {
            const float2 tmp = __half22float2(mask[j*s31 + KV_max_sj/2 + tid]);
            all_inf = all_inf && int(isinf(tmp.x)) && int(isinf(tmp.y));
        }

        all_inf = warp_reduce_all(all_inf);
        if (tid % WARP_SIZE == 0) {
            buf_iw[tid / WARP_SIZE] = all_inf;
        }
        __syncthreads();
        all_inf = buf_iw[tid % WARP_SIZE];
        __syncthreads();
        all_inf = warp_reduce_all(all_inf);

        if (!all_inf) {
            break;
        }
    }

    // If the break in the loop was not triggered, KV_max_sj is now -FATTN_KQ_STRIDE.
    // If the break was triggered it's the lower edge of the tile with the first non-masked values.
    // In either case, walk back the decrementation by FATTN_KQ_STRIDE.
    KV_max_sj += FATTN_KQ_STRIDE;

    if (threadIdx.x != 0) {
        return;
    }

    KV_max[sequence*ne31 + jt] = KV_max_sj;
}

#define INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX()          \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(1)  \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(2)  \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(4)  \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(8)  \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(16) \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(32) \
    INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX_FOR_NCOLS1(64) \

INSTANTIATE_FLASH_ATTN_MASK_TO_KV_MAX()

================================================
FILE: cuda/src/kernels/cu/mm_mv.cu
================================================

#include <cuda_runtime.h>
#include "common.cuh"

template <typename T, int ncols_dst, int block_size>
static __device__ void
mul_mat_vec(const T *__restrict__ x, const T *__restrict__ y,
            T *__restrict__ dst, const int32_t ncols2, const int32_t nchannels_y,
            const int32_t stride_row, const int32_t stride_col_y2,
            const int32_t stride_col_dst, const int32_t channel_ratio,
            const int32_t stride_channel_x, const int32_t stride_channel_y,
            const int32_t stride_channel_dst) {
  const int row = blockIdx.x;
  const int channel_dst = blockIdx.y;
  const int channel_x = channel_dst / channel_ratio;
  const int channel_y = channel_dst;
  const int tid = threadIdx.x;

  x += channel_x * stride_channel_x + row * stride_row;
  y += channel_y * stride_channel_y;
  dst += channel_dst * stride_channel_dst;

  extern __shared__ char data_mmv[];
  float *buf_iw = (float *)data_mmv;

  if (block_size > WARP_SIZE) {
    if (tid < WARP_SIZE) {
      buf_iw[tid] = 0.0f;
    }
    __syncthreads();
  }

  float sumf[ncols_dst] = {0.0f};

  if constexpr (cuda::std::is_same_v<T, float>) {
    const float2 *x2 = (const float2 *)x;
    const float2 *y2 = (const float2 *)y;
    for (int col2 = tid; col2 < ncols2; col2 += block_size) {
      const float2 tmpx = x2[col2];

#pragma unroll
      for (int j = 0; j < ncols_dst; ++j) {
        const float2 tmpy = y2[j * stride_col_y2 + col2];
        sumf[j] += tmpx.x * tmpy.x;
        sumf[j] += tmpx.y * tmpy.y;
      }
    }
  } else if constexpr (cuda::std::is_same_v<T, half>) {
    const half2 *x2 = (const half2 *)x;
    const half2 *y2 = (const half2 *)y;
    half2 sumh2[ncols_dst] = {{0.0f, 0.0f}};

    for (int col2 = tid; col2 < ncols2; col2 += block_size) {
      const half2 tmpx = x2[col2];

#pragma unroll
      for (int j = 0; j < ncols_dst; ++j) {
        const half2 tmpy = y2[j * stride_col_y2 + col2];
        sumh2[j] += tmpx * make_half2(tmpy.x, tmpy.y);
      }
    }

#pragma unroll
    for (int j = 0; j < ncols_dst; ++j) {
      sumf[j] = __low2float(sumh2[j]) + __high2float(sumh2[j]);
    }
  } else {
    static_assert(cuda::std::is_same_v<T, void>, "unsupported type");
  }

#pragma unroll
  for (int j = 0; j < ncols_dst; ++j) {
    sumf[j] = warp_reduce_sum<WARP_SIZE>(sumf[j]);

    if (block_size > WARP_SIZE) {
      buf_iw[tid / WARP_SIZE] = sumf[j];
      __syncthreads();
      if (tid < WARP_SIZE) {
        sumf[j] = buf_iw[tid];
        sumf[j] = warp_reduce_sum<WARP_SIZE>(sumf[j]);
      }
      if (j < ncols_dst) {
        __syncthreads();
      }
    }
  }

  if (tid >= ncols_dst) {
    return;
  }

  dst[tid * stride_col_dst + row] = sumf[tid];
}

#define INSTANTIATE_MAT_VEC(type_name, T, ncols_dst, block_size)               \
  extern "C" __global__ void                                                   \
      ggml_matvec_##type_name##_ncols_##ncols_dst##_bs_##block_size(           \
          const T *__restrict__ x, const T *__restrict__ y,                    \
          T *__restrict__ dst, const int32_t ncols2, const int32_t nchannels_y,        \
          const int32_t stride_row, const int32_t stride_col_y2,                       \
          const int32_t stride_col_dst, const int32_t channel_ratio,                   \
          const int32_t stride_channel_x, const int32_t stride_channel_y,              \
          const int32_t stride_channel_dst) {                                      \
    mul_mat_vec<T, ncols_dst, block_size>(                                     \
        x, y, dst, ncols2, nchannels_y, stride_row, stride_col_y2,             \
        stride_col_dst, channel_ratio, stride_channel_x, stride_channel_y,     \
        stride_channel_dst);                                                   \
  }

#define INSTANTIATE_MAT_VEC_FOR_BS(name, T, blocksize)                         \
  INSTANTIATE_MAT_VEC(name, T, 1, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 2, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 3, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 4, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 5, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 6, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 7, blocksize)                                   \
  INSTANTIATE_MAT_VEC(name, T, 8, blocksize)

#define INSTANTIATE_MAT_VEC_FOR_T(name, T)                                     \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 32)                                      \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 64)                                      \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 96)                                      \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 128)                                     \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 160)                                     \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 192)                                     \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 224)                                     \
  INSTANTIATE_MAT_VEC_FOR_BS(name, T, 256)

INSTANTIATE_MAT_VEC_FOR_T(f32, float)
INSTANTIATE_MAT_VEC_FOR_T(f16, half)


================================================
FILE: cuda/src/kernels/cu/mm_mv_q.cu
================================================
#include "common.cuh"

// Check CC Version
#define CUDA_CC_TURING 750
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < CUDA_CC_TURING)
#error "Requires GPU with compute capability 7.5 or higher"
#endif

#define VDR_Q4_0_Q8_1_MMVQ 2
#define VDR_Q4_0_Q8_1_MMQ 4

#define N_WARPS 8

#define MMQ_Y 128

#define MMQ_ITER_K 256
#define MMQ_TILE_Y_K (WARP_SIZE + WARP_SIZE / QI8_1)

#define PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
#define MMQ_MMA_TILE_X_K_Q8_0 (2 * WARP_SIZE + 2 * WARP_SIZE / QI8_0 + 4)


static __device__ __forceinline__ int get_int_b2(const void *x,
                                                 const int &i32) {
  const uint16_t *x16 = (const uint16_t *)x; // assume at least 2 byte alignment

  int x32 = x16[2 * i32 + 0] << 0;
  x32 |= x16[2 * i32 + 1] << 16;

  return x32;
}

static __device__ __forceinline__ int get_int_b4(const void *x,
                                                 const int &i32) {
  return ((const int *)x)[i32]; // assume at least 4 byte alignment
}

using namespace cuda_mma;

typedef void (*load_tiles_mmq_t)(const char *__restrict__ x, int *x_tile,
                                 const int kbx0, const int i_max,
                                 const int stride);
typedef void (*vec_dot_mmq_t)(const int *__restrict__ x,
                              const int *__restrict__ y,
                              float *__restrict__ sum, const int k00);
typedef void (*mmq_write_back_t)(const float *__restrict__ sum,
                                 const int32_t *__restrict__ get_rows_to_sorted,
                                 float *__restrict__ dst, const int stride,
                                 const int i_max, const int j_max);

static constexpr __device__ int mmq_get_granularity_device(const int mmq_x) {
  return mmq_x >= 48 ? 16 : 8;
}

template <int mmq_y, int nwarps, bool need_check>
static __device__ __forceinline__ void
load_tiles_q4_0(const char *__restrict__ x, int *__restrict__ x_tile,
                const int kbx0, const int i_max, const int stride) {

  int *x_qs = (int *)x_tile;
  float *x_df = (float *)(x_qs + 2 * WARP_SIZE);

  const int kbx = threadIdx.x / QI4_0;
  const int kqsx = threadIdx.x % QI4_0;

  _Pragma("unroll") for (int i0 = 0; i0 < mmq_y; i0 += N_WARPS) {
    int i = i0 + threadIdx.y;

    if (need_check) {
      i = min(i, i_max);
    }
    const block_q4_0 *bxi = (const block_q4_0 *)x + kbx0 + i * stride + kbx;
    const int qs0 = get_int_b2(bxi->qs, kqsx);

    x_qs[i * MMQ_MMA_TILE_X_K_Q8_0 + kbx * (2 * QI4_0) + kqsx + 0] =
        __vsubss4((qs0 >> 0) & 0x0F0F0F0F, 0x08080808);
    x_qs[i * MMQ_MMA_TILE_X_K_Q8_0 + kbx * (2 * QI4_0) + kqsx + QI4_0] =
        __vsubss4((qs0 >> 4) & 0x0F0F0F0F, 0x08080808);
  }

  const int blocks_per_tile_x_row = WARP_SIZE / QI4_0;
  const int kbxd = threadIdx.x % blocks_per_tile_x_row;

  _Pragma("unroll") for (int i0 = 0; i0 < MMQ_Y; i0 += N_WARPS * QI4_0) {
    int i = i0 + threadIdx.y * QI4_0 + threadIdx.x / blocks_per_tile_x_row;

    if (need_check) {
      i = min(i, i_max);
    }

    const block_q4_0 *bxi = (const block_q4_0 *)x + kbx0 + i * stride + kbxd;

    x_df[i * MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d;
  }
}

template <int mmq_x, int mmq_y, int nwarps>
static __device__ __forceinline__ void
vec_dot_q8_0_q8_1_mma(const int *__restrict__ x, const int *__restrict__ y,
                      float *__restrict__ sum, const int k00) {

  typedef tile<16, 8, int> tile_A;
  typedef tile<8, 8, int> tile_B;
  typedef tile<16, 8, int> tile_C;

  constexpr int granularity = mmq_get_granularity_device(mmq_x);
  constexpr int rows_per_warp = 2 * granularity;
  constexpr int ntx =
      rows_per_warp / tile_C::I; // Number of x minitiles per warp.

  y += (threadIdx.y % ntx) * (tile_B::I * MMQ_TILE_Y_K);

  const int *x_qs = (const int *)x;
  const float *x_df = (const float *)x_qs + 2 * WARP_SIZE;
  const int *y_qs = (const int *)y + 4;
  const half2 *y_ds = (const half2 *)y;

  tile_A A[ntx][WARP_SIZE / QI8_0];
  float dA[ntx][tile_C::ne / 2][WARP_SIZE / QI8_0];

  const int i0 = (threadIdx.y / ntx) * rows_per_warp;

  _Pragma("unroll") for (int n = 0; n < ntx; ++n) {
    _Pragma("unroll") for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_0) {
      const int k0 = k00 + k01;

      load_ldmatrix(A[n][k01 / QI8_0],
                    x_qs + (i0 + n * tile_A::I) * MMQ_MMA_TILE_X_K_Q8_0 + k0,
                    MMQ_MMA_TILE_X_K_Q8_0);
    }

    _Pragma("unroll") for (int l = 0; l < tile_C::ne / 2; ++l) {
      const int i = i0 + n * tile_A::I + tile_C::get_i(2 * l);

      _Pragma("unroll") for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_0) {
        const int k0 = k00 + k01;

        dA[n][l][k01 / QI8_0] = x_df[i * MMQ_MMA_TILE_X_K_Q8_0 + k0 / QI8_0];
      }
    }
  }

  _Pragma("unroll") for (int j0 = 0; j0 < mmq_x; j0 += ntx * tile_C::J) {
    _Pragma("unroll") for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_0) {
      tile_B B;
      float dB[tile_C::ne / 2];

      load_generic(B, y_qs + j0 * MMQ_TILE_Y_K + k01,
                   MMQ_TILE_Y_K); // faster than load_ldmatrix

      _Pragma("unroll") for (int l = 0; l < tile_C::ne / 2; ++l) {
        const int j = j0 + tile_C::get_j(l);

        dB[l] = __low2float(y_ds[j * MMQ_TILE_Y_K + k01 / QI8_1]);
      }

      _Pragma("unroll") for (int n = 0; n < ntx; ++n) {
        tile_C C;
        mma(C, A[n][k01 / QI8_0], B);

        _Pragma("unroll") for (int l = 0; l < tile_C::ne; ++l) {
          sum[(j0 / tile_C::J + n) * tile_C::ne + l] +=
              C.x[l] * dA[n][l / 2][k01 / QI8_0] * dB[l % 2];
        }
      }
    }
  }
}

template <int mmq_x, int mmq_y, int nwarps, bool need_check>
static __device__ __forceinline__ void
mmq_write_back_mma(const float *__restrict__ sum,
                   const int *__restrict__ ids_dst, float *__restrict__ dst,
                   const int stride, const int i_max, const int j_max) {
  typedef tile<16, 8, int> tile_C;

  constexpr int granularity = mmq_get_granularity_device(mmq_x);
  constexpr int rows_per_warp = 2 * granularity;
  constexpr int ntx =
      rows_per_warp / tile_C::I; // Number of x minitiles per warp.

  const int i0 = (threadIdx.y / ntx) * (ntx * tile_C::I);
  static_assert(nwarps * tile_C::I == mmq_y, "nwarps*tile_C::I != mmq_y");

#pragma unroll
  for (int j0 = 0; j0 < mmq_x; j0 += ntx * tile_C::J) {
#pragma unroll
    for (int n = 0; n < ntx; ++n) {
#pragma unroll
      for (int l = 0; l < tile_C::ne; ++l) {
        const int j = j0 + (threadIdx.y % ntx) * tile_C::J + tile_C::get_j(l);

        if (j > j_max) {
          continue;
        }

        const int i = i0 + n * tile_C::I + tile_C::get_i(l);

        if (need_check && i > i_max) {
          continue;
        }

        dst[ids_dst[j] * stride + i] =
            sum[(j0 / tile_C::J + n) * tile_C::ne + l];
      }
    }
  }
}

template <int mmq_x, int mmq_y, int nwarps, bool need_check>
struct mmq_type_traits {
  static constexpr int vdr = VDR_Q4_0_Q8_1_MMQ;
  static constexpr load_tiles_mmq_t load_tiles =
      load_tiles_q4_0<mmq_y, nwarps, need_check>;
  static constexpr vec_dot_mmq_t vec_dot_mma =
      vec_dot_q8_0_q8_1_mma<mmq_x, mmq_y, nwarps>;
};

template <int mmq_x, int nwarps, bool need_check, bool fixup>
static __device__ __forceinline__ void mul_mat_q_process_tile(
    const char *__restrict__ x, const int offset_x, const int *__restrict__ y,
    const int *__restrict__ ids_dst, float *__restrict__ dst,
    float *__restrict__ tmp_fixup, const int stride_row_x, const int ncols_y,
    const int stride_col_dst, const int tile_x_max_i, const int tile_y_max_j,
    const int kb0_start, const int kb0_stop) {

  constexpr int qk = QK4_0;
  constexpr load_tiles_mmq_t load_tiles =
      mmq_type_traits<mmq_x, MMQ_Y, N_WARPS, need_check>::load_tiles;

  extern __shared__ int data_mul_mat_q[];
  int *tile_y = data_mul_mat_q + mmq_x;
  int *tile_x = tile_y + PAD(mmq_x * (WARP_SIZE + WARP_SIZE / QI8_1),
                             N_WARPS * WARP_SIZE);

  constexpr vec_dot_mmq_t vec_dot =
      mmq_type_traits<mmq_x, MMQ_Y, N_WARPS, need_check>::vec_dot_mma;
  constexpr mmq_write_back_t write_back =
      mmq_write_back_mma<mmq_x, MMQ_Y, N_WARPS, need_check>;

  constexpr int blocks_per_iter = MMQ_ITER_K / qk;

  float sum[mmq_x * MMQ_Y / (N_WARPS * WARP_SIZE)] = {0.0f};

  for (int kb0 = kb0_start; kb0 < kb0_stop; kb0 += blocks_per_iter) {
    load_tiles(x, tile_x, offset_x + kb0, tile_x_max_i, stride_row_x);

    {
      const int *by0 = y + ncols_y * (kb0 * (qk * sizeof(block_q8_1_mmq) /
                                             (4 * QK8_1 * sizeof(int))) +
                                      0 * sizeof(block_q8_1_mmq) / sizeof(int));
      _Pragma("unroll") for (int l0 = 0; l0 < mmq_x * MMQ_TILE_Y_K;
                             l0 += N_WARPS * WARP_SIZE) {
        int l = l0 + threadIdx.y * WARP_SIZE + threadIdx.x;

        tile_y[l] = by0[l];
      }
    }

    __syncthreads();

    vec_dot(tile_x, tile_y, sum, 0);

    __syncthreads();

    {
      const int *by0 = y + ncols_y * (kb0 * (qk * sizeof(block_q8_1_mmq) /
                                             (4 * QK8_1 * sizeof(int))) +
                                      1 * sizeof(block_q8_1_mmq) / sizeof(int));
      _Pragma("unroll") for (int l0 = 0; l0 < mmq_x * MMQ_TILE_Y_K;
                             l0 += N_WARPS * WARP_SIZE) {
        int l = l0 + threadIdx.y * WARP_SIZE + threadIdx.x;

        tile_y[l] = by0[l];
      }
    }

    __syncthreads();

    vec_dot(tile_x, tile_y, sum, WARP_SIZE);

    __syncthreads();
  }

  if (fixup) {
    write_back(sum, ids_dst, tmp_fixup + blockIdx.x * (mmq_x * MMQ_Y), MMQ_Y,
               MMQ_Y, mmq_x);
  } else {
    write_back(sum, ids_dst, dst, stride_col_dst, tile_x_max_i, tile_y_max_j);
  }
}

// The mul_mat_q kernel implements "stream-k" work partitioning as described in
// https://arxiv.org/abs/2301.03598

template <int mmq_x, int nwarps, bool need_check>
static __device__ __forceinline__ void
mul_mat_q(const char *__restrict__ x, const int *__restrict__ y,
          float *__restrict__ dst, float *__restrict__ tmp_fixup,
          const int32_t ncols_x, const int32_t nrows_x, const int32_t ncols_dst,
          const int32_t stride_row_x, const int32_t ncols_y, const int32_t stride_col_dst,
          const int32_t channel_ratio, const int32_t nchannels_y,
          const int32_t stride_channel_x, const int32_t stride_channel_y,
          const int32_t stride_channel_dst) {
  constexpr int qk = QK4_0;

  const int ntx = (ncols_dst + mmq_x - 1) / mmq_x; // Number of tiles x
  const int nty = (nrows_x + MMQ_Y - 1) / MMQ_Y;   // Number of tiles y

  // Initialize the ids for writing back data with just the index.
  // For regular matrix multiplications this is never changed.
  // For MoE the correct indices are loaded from ids_dst.
  extern __shared__ int
      ids_dst_shared[]; // Stored at beginning of shared memory.
  _Pragma("unroll") for (int j0 = 0; j0 < mmq_x; j0 += nwarps * WARP_SIZE) {
    const int j = j0 + threadIdx.y * WARP_SIZE + threadIdx.x;

    if (j0 + nwarps * WARP_SIZE > mmq_x && j >= mmq_x) {
      break;
    }

    ids_dst_shared[j] = j;
  }
  __syncthreads();

  const int64_t blocks_per_ne00 = ncols_x / qk;
  constexpr int blocks_per_iter = MMQ_ITER_K / qk;

  // kbc == k block continuous, current index in continuous ijk space.
  int64_t kbc = (int64_t)blockIdx.x * nchannels_y * ntx * nty *
                blocks_per_ne00 / gridDim.x;
  int64_t kbc_stop = (int64_t)(blockIdx.x + 1) * nchannels_y * ntx * nty *
                     blocks_per_ne00 / gridDim.x;

  kbc -= (kbc % blocks_per_ne00) % blocks_per_iter;
  kbc_stop -= (kbc_stop % blocks_per_ne00) % blocks_per_iter;

  // kb0 == k index when doing the matrix multiplication for an output tile.
  int kb0_start = kbc % blocks_per_ne00;
  int kb0_stop = min(blocks_per_ne00, kb0_start + kbc_stop - kbc);

  while (kbc < kbc_stop && kb0_stop == blocks_per_ne00) {
    int tmp = kbc;
    const int it = tmp / (nchannels_y * ntx * blocks_per_ne00);
    tmp -= it * (nchannels_y * ntx * blocks_per_ne00);
    const int zt = tmp / (ntx * blocks_per_ne00);
    tmp -= zt * (ntx * blocks_per_ne00);
    const int jt = tmp / blocks_per_ne00;

    // Defaults for regular matrix multiplication:
    int offset_y = zt * stride_channel_y;
    int offset_dst = zt * stride_channel_dst + jt * mmq_x * stride_col_dst;

    offset_y += jt * mmq_x * (sizeof(block_q8_1_mmq) / sizeof(int));
    offset_dst += it * MMQ_Y;

    const int tile_x_max_i = nrows_x - it * MMQ_Y - 1;
    const int tile_y_max_j = ncols_dst - jt * mmq_x - 1;

    const int offset_x =
        (zt / channel_ratio) * stride_channel_x + it * MMQ_Y * stride_row_x;

    constexpr bool fixup =
        false; // All but (potentially) the last iterations write their data to
               // dst rather than the fixup buffer.
    mul_mat_q_process_tile<mmq_x, nwarps, need_check, fixup>(
        x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup,
        stride_row_x, ncols_y, stride_col_dst, tile_x_max_i, tile_y_max_j,
        kb0_start, kb0_stop);

    kbc += blocks_per_ne00;
    kbc -= kbc % blocks_per_ne00;

    kb0_start = 0;
    kb0_stop = min(blocks_per_ne00, kbc_stop - kbc);
  }

  if (kbc >= kbc_stop) {
    return;
  }

  int tmp = kbc;
  const int it = tmp / (nchannels_y * ntx * blocks_per_ne00);
  tmp -= it * (nchannels_y * ntx * blocks_per_ne00);
  const int zt = tmp / (ntx * blocks_per_ne00);
  tmp -= zt * (ntx * blocks_per_ne00);
  const int jt = tmp / blocks_per_ne00;

  // Defaults for regular matrix multiplication:
  int offset_y = zt * stride_channel_y;
  int offset_dst = zt * stride_channel_dst + jt * mmq_x * stride_col_dst;

  offset_y += jt * mmq_x * (sizeof(block_q8_1_mmq) / sizeof(int));
  offset_dst += it * MMQ_Y;

  const int tile_x_max_i = nrows_x - it * MMQ_Y - 1;
  const int tile_y_max_j = ncols_dst - jt * mmq_x - 1;

  const int offset_x =
      (zt / channel_ratio) * stride_channel_x + it * MMQ_Y * stride_row_x;

  constexpr bool fixup = true; // Last index writes its data to fixup buffer to
                               // avoid data races with other blocks.
  mul_mat_q_process_tile<mmq_x, nwarps, need_check, fixup>(
      x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup,
      stride_row_x, ncols_y, stride_col_dst, tile_x_max_i, tile_y_max_j,
      kb0_start, kb0_stop);
}

template <int mmq_x, int nwarps, bool need_check>
static __device__ __forceinline__ void
mul_mat_q_stream_k_fixup(float *__restrict__ dst,
                         const float *__restrict__ tmp_last_tile,
                         const int32_t ncols_x, const int32_t nrows_x,
                         const int32_t ncols_dst, const int32_t stride_col_dst,
                         const int32_t nchannels_y, const int32_t stride_channel_dst) {
  constexpr int qk = QK4_0;
  constexpr int blocks_per_iter = MMQ_ITER_K / qk;
  const int64_t blocks_per_ne00 = ncols_x / qk;

  float sum[mmq_x * MMQ_Y / (nwarps * WARP_SIZE)] = {0.0f};

  const int ntx = (ncols_dst + mmq_x - 1) / mmq_x;
  const int nty = (nrows_x + MMQ_Y - 1) / MMQ_Y;

  const int bidx0 = blockIdx.x;

  // kbc == k block continuous, current index in continuous ijk space.
  int64_t kbc0 =
      (int64_t)bidx0 * nchannels_y * ntx * nty * blocks_per_ne00 / gridDim.x;
  int64_t kbc0_stop = (int64_t)(bidx0 + 1) * nchannels_y * ntx * nty *
                      blocks_per_ne00 / gridDim.x;

  kbc0 -= (kbc0 % blocks_per_ne00) % blocks_per_iter;
  kbc0_stop -= (kbc0_stop % blocks_per_ne00) % blocks_per_iter;

  const bool did_not_have_any_data = kbc0 == kbc0_stop;
  const bool wrote_beginning_of_tile = kbc0 % blocks_per_ne00 == 0;
  const bool did_not_write_last =
      kbc0 / blocks_per_ne00 == kbc0_stop / blocks_per_ne00 &&
      kbc0_stop % blocks_per_ne00 != 0;
  if (did_not_have_any_data || wrote_beginning_of_tile || did_not_write_last) {
    return;
  }

  bool any_fixup = false;

  // Iterate over previous blocks and sum up partial sums written to fixup
  // buffer. All CUDA blocks that get here must have a previous block that needs
  // a fixup.
  int64_t bidx = bidx0 - 1;
  int64_t kbc_stop = kbc0;
  while (true) {
    int64_t kbc = bidx * nchannels_y * ntx * nty * blocks_per_ne00 / gridDim.x;
    kbc -= (kbc % blocks_per_ne00) % blocks_per_iter;

    if (kbc == kbc_stop) { // Did not have any data.
      bidx--;
      kbc_stop = kbc;
      continue;
    }

    any_fixup = true;

    _Pragma("unroll") for (int j0 = 0; j0 < mmq_x; j0 += nwarps) {
      const int j = j0 + threadIdx.y;

      _Pragma("unroll") for (int i0 = 0; i0 < MMQ_Y; i0 += WARP_SIZE) {
        const int i = i0 + threadIdx.x;

        sum[(j0 / nwarps) * (MMQ_Y / WARP_SIZE) + i0 / WARP_SIZE] +=
            tmp_last_tile[bidx * (mmq_x * MMQ_Y) + j * MMQ_Y + i];
      }
    }

    // If this block started in a previous tile we are done and don't need to
    // combine additional partial results.
    if (kbc % blocks_per_ne00 == 0 ||
        kbc / blocks_per_ne00 < kbc0 / blocks_per_ne00) {
      break;
    }
    bidx--;
    kbc_stop = kbc;
  }

  if (!any_fixup) {
    return;
  }

  int tmp = kbc0;
  const int it = tmp / (nchannels_y * ntx * blocks_per_ne00);
  tmp -= it * (nchannels_y * ntx * blocks_per_ne00);
  const int zt = tmp / (ntx * blocks_per_ne00);
  tmp -= zt * (ntx * blocks_per_ne00);
  const int jt = tmp / blocks_per_ne00;

  const int offset_dst =
      zt * stride_channel_dst + jt * mmq_x * stride_col_dst + it * MMQ_Y;
  dst += offset_dst;

  const int i_max = nrows_x - it * MMQ_Y - 1;
  const int j_max = ncols_dst - jt * mmq_x - 1;

  _Pragma("unroll") for (int j0 = 0; j0 < mmq_x; j0 += nwarps) {
    const int j = j0 + threadIdx.y;

    if (j > j_max) {
      return;
    }

    _Pragma("unroll") for (int i0 = 0; i0 < MMQ_Y; i0 += WARP_SIZE) {
      const int i = i0 + threadIdx.x;

      if (need_check && i > i_max) {
        continue;
      }

      dst[j * stride_col_dst + i] +=
          sum[(j0 / nwarps) * (MMQ_Y / WARP_SIZE) + i0 / WARP_SIZE];
    }
  }
}

#define INSTANTIATE_MMQ_KERNEL(mmq_x, nwarps, need_check)                      \
  extern "C" {                                                                 \
  __launch_bounds__(WARP_SIZE *nwarps, 1) __global__ void mul_mat_q40##_       \
      ##mmq_x##_##nwarps##_                                                    \
      ##need_check(const char *__restrict__ x, const int *__restrict__ y,      \
                   float *__restrict__ dst, float *__restrict__ tmp_fixup,     \
                   const int32_t ncols_x, const int32_t nrows_x, const int32_t ncols_dst,  \
                   const int32_t stride_row_x, const int32_t ncols_y,                  \
                   const int32_t stride_col_dst, const int32_t channel_ratio,          \
                   const int32_t nchannels_y, const int32_t stride_channel_x,          \
                   const int32_t stride_channel_y, const int32_t stride_channel_dst) { \
    mul_mat_q<mmq_x, nwarps, need_check>(                                      \
        x, y, dst, tmp_fixup, ncols_x, nrows_x, ncols_dst, stride_row_x,       \
        ncols_y, stride_col_dst, channel_ratio, nchannels_y, stride_channel_x, \
        stride_channel_y, stride_channel_dst);                                 \
  }                                                                            \
                                                                               \
  __global__ void                                                              \
      mul_mat_q40_stream_k_fixup##_##mmq_x##_##nwarps##_##need_check(          \
          float *__restrict__ dst, const float *__restrict__ tmp_last_tile,    \
          const int32_t ncols_x, const int32_t nrows_x, const int32_t ncols_dst,           \
          const int32_t stride_col_dst, const int32_t nchannels_y,                     \
          const int32_t stride_channel_dst) {                                      \
    mul_mat_q_stream_k_fixup<mmq_x, nwarps, need_check>(                       \
        dst, tmp_last_tile, ncols_x, nrows_x, ncols_dst, stride_col_dst,       \
        nchannels_y, stride_channel_dst);                                      \
  }                                                                            \
  }

#define INSTANTIATE_MMQ_KERNEL_FOR_T(n_warps, needs_check)                     \
  INSTANTIATE_MMQ_KERNEL(8, n_warps, needs_check)                              \
  INSTANTIATE_MMQ_KERNEL(16, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(24, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(32, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(40, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(48, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(64, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(80, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(96, n_warps, needs_check)                             \
  INSTANTIATE_MMQ_KERNEL(112, n_warps, needs_check)                            \
  INSTANTIATE_MMQ_KERNEL(128, n_warps, needs_check)

INSTANTIATE_MMQ_KERNEL_FOR_T(N_WARPS, true)
INSTANTIATE_MMQ_KERNEL_FOR_T(N_WARPS, false)

static constexpr __host__ __device__ int calc_nwarps(int ncols_dst) {
  switch (ncols_dst) {
  case 1:
  case 2:
  case 3:
  case 4:
    return 4;
  case 5:
  case 6:
  case 7:
  case 8:
    return 2;
  default:
    return 1;
  }
}

template <int vdr>
static __device__ __forceinline__ float
vec_dot_q4_0_q8_1_impl(const int *v, const int *u, const float &d4,
                       const half2 &ds8) {

  int sumi = 0;

#pragma unroll
  for (int i = 0; i < vdr; ++i) {
    const int vi0 = (v[i] >> 0) & 0x0F0F0F0F;
    const int vi1 = (v[i] >> 4) & 0x0F0F0F0F;

    // SIMD dot product of quantized values
    sumi = __dp4a(vi0, u[2 * i + 0], sumi);
    sumi = __dp4a(vi1, u[2 * i + 1], sumi);
  }

  const float2 ds8f = __half22float2(ds8);

  // second part effectively subtracts 8 from each quant value
  return d4 * (sumi * ds8f.x - (8 * vdr / QI4_0) * ds8f.y);
}

static __device__ __forceinline__ float
vec_dot_q4_0_q8_1(const void *__restrict__ vbq,
                  const block_q8_1 *__restrict__ bq8_1, const int &kbx,
                  const int &iqs) {

  const block_q4_0 *bq4_0 = (const block_q4_0 *)vbq + kbx;

  int v[VDR_Q4_0_Q8_1_MMVQ];
  int u[2 * VDR_Q4_0_Q8_1_MMVQ];

#pragma unroll
  for (int i = 0; i < VDR_Q4_0_Q8_1_MMVQ; ++i) {
    v[i] = get_int_b2(bq4_0->qs, iqs + i);
    u[2 * i + 0] = get_int_b4(bq8_1->qs, iqs + i);
    u[2 * i + 1] = get_int_b4(bq8_1->qs, iqs + i + QI4_0);
  }

  return vec_dot_q4_0_q8_1_impl<VDR_Q4_0_Q8_1_MMVQ>(v, u, bq4_0->d, bq8_1->ds);
}

template <int ncols_dst>
static __device__ void
mul_mat_vec_q(const void *__restrict__ vx, const void *__restrict__ vy,
              float *__restrict__ dst, const int ncols_x, const int nchannels_y,
              const int stride_row_x, const int stride_col_y,
              const int stride_col_dst, const int channel_ratio,
              const int stride_channel_x, const int stride_channel_y,
              const int stride_channel_dst) {

  constexpr int qk = QK4_0;
  constexpr int qi = QI4_0;
  constexpr int vdr = VDR_Q4_0_Q8_1_MMVQ;
  constexpr int nwarps = calc_nwarps(ncols_dst);
  constexpr int rows_per_cuda_block = ncols_dst == 1 ? 1 : 2;

  const int tid = WARP_SIZE * threadIdx.y + threadIdx.x;
  const int row0 = rows_per_cuda_block * blockIdx.x;
  const int blocks_per_row_x = ncols_x / qk;
  constexpr int blocks_per_iter = vdr * nwarps * WARP_SIZE / qi;

  const int channel_dst = blockIdx.y;
  const int channel_x = channel_dst / channel_ratio;
  const int channel_y = channel_dst;

  // partial sum for each thread
  float tmp[ncols_dst][rows_per_cuda_block] = {{0.0f}};

  const block_q8_1 *y = ((const block_q8_1 *)vy) + channel_y * stride_channel_y;
  const int kbx_offset = channel_x * stride_channel_x;

  for (int kbx = tid / (qi / vdr); kbx < blocks_per_row_x;
       kbx += blocks_per_iter) {
    const int kby = kbx * (qk / QK8_1); // y block index that aligns with kbx

    // x block quant index when casting the quants to int
    const int kqs = vdr * (tid % (qi / vdr));

#pragma unroll
    for (int j = 0; j < ncols_dst; ++j) {
#pragma unroll
      for (int i0 = 0; i0 < rows_per_cuda_block; ++i0) {
        int i = i0 + row0;
        if (i >= stride_col_dst) { // Avoid OOB read from Q4_0
          break;
        }
        tmp[j][i0] +=
            vec_dot_q4_0_q8_1(vx, &y[j * stride_col_y + kby],
                              kbx_offset + i * stride_row_x + kbx, kqs);
      }
    }
  }

  __shared__ float tmp_shared[nwarps - 1 > 0 ? nwarps - 1 : 1][ncols_dst]
                             [rows_per_cuda_block][WARP_SIZE];
  if (threadIdx.y > 0) {
#pragma unroll
    for (int j = 0; j < ncols_dst; ++j) {
#pragma unroll
      for (int i = 0; i < rows_per_cuda_block; ++i) {
        tmp_shared[threadIdx.y - 1][j][i][threadIdx.x] = tmp[j][i];
      }
    }
  }
  __syncthreads();
  if (threadIdx.y > 0) {
    return;
  }

  dst += channel_dst * stride_channel_dst + row0;

  // sum up partial sums and write back result
#pragma unroll
  for (int j = 0; j < ncols_dst; ++j) {
#pragma unroll
    for (int i = 0; i < rows_per_cuda_block; ++i) {
#pragma unroll
      for (int l = 0; l < nwarps - 1; ++l) {
        tmp[j][i] += tmp_shared[l][j][i][threadIdx.x];
      }
      tmp[j][i] = warp_reduce_sum<WARP_SIZE>(tmp[j][i]);
    }

    if (threadIdx.x < rows_per_cuda_block &&
        (rows_per_cuda_block == 1 ||
         row0 + int(threadIdx.x) < stride_col_dst)) {
      dst[j * stride_col_dst + threadIdx.x] = tmp[j][threadIdx.x];
    }
  }
}

#define INSTANTIATE_MMQV_KERNEL(ncols_dst)                                     \
  extern "C" {                                                                 \
  __launch_bounds__(calc_nwarps(ncols_dst) * WARP_SIZE, 1) __global__          \
      void mul_vec_q40_m_                                                      \
      ##ncols_dst(const void *__restrict__ vx, const void *__restrict__ vy,    \
                  float *__restrict__ dst, const int32_t ncols_x,                  \
                  const int32_t nchannels_y, const int32_t stride_row_x,               \
                  const int32_t stride_col_y, const int32_t stride_col_dst,            \
                  const int32_t channel_ratio, const int32_t stride_channel_x,         \
                  const int32_t stride_channel_y, const int32_t stride_channel_dst) {  \
    mul_mat_vec_q<ncols_dst>(vx, vy, dst, ncols_x, nchannels_y, stride_row_x,  \
                             stride_col_y, stride_col_dst, channel_ratio,      \
                             stride_channel_x, stride_channel_y,               \
                             stride_channel_dst);                              \
  }                                                                            \
  }

INSTANTIATE_MMQV_KERNEL(1)
INSTANTIATE_MMQV_KERNEL(2)
INSTANTIATE_MMQV_KERNEL(3)
INSTANTIATE_MMQV_KERNEL(4)
INSTANTIATE_MMQV_KERNEL(5)
INSTANTIATE_MMQV_KERNEL(6)
INSTANTIATE_MMQV_KERNEL(7)
INSTANTIATE_MMQV_KERNEL(8)


================================================
FILE: cuda/src/kernels/cu/nn.cu
================================================
#include "common.cuh"
#include <cuda_runtime.h>
#include <math_constants.h>

#define GELU_COEF_A 0.044715f
#define SQRT_2_OVER_PI 0.79788456080286535587989211986876f

template <typename Acc> struct MaxOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return -CUDART_INF_F; }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a > b ? a : b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a;
    }
};

template <typename Acc> struct MinOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return CUDART_INF_F; }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a < b ? a : b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a;
    }
};

template <typename Acc> struct AddOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return acc_t(0); }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a + b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a;
    }
};

template <typename Acc> struct MulOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return acc_t(1); }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a * b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a;
    }
};

template <typename Acc> struct MeanOfSquaresOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return acc_t(0); }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a * a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a + b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a / (acc_t)size;
    }
};

template <typename Acc> struct BoolOrOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return acc_t(0); }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a || b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a;
    }
};

template <typename Acc> struct BoolAndOp {
    using acc_t = Acc;
    __device__ __forceinline__ static acc_t identity() { return acc_t(1); }
    __device__ __forceinline__ static acc_t pre(acc_t a) { return a; }
    __device__ __forceinline__ static acc_t combine(acc_t a, acc_t b) {
        return a && b;
    }
    __device__ __forceinline__ static acc_t norm(acc_t a, int32_t size) {
        return a;
    }
};

template <class Op>
__device__ __forceinline__ typename Op::acc_t
warp_reduce(typename Op::acc_t v) {
#pragma unroll
    for (int offset = WARP_SIZE / 2; offset > 0; offset >>= 1) {
        auto other = __shfl_xor_sync(0xffffffff, v, offset, WARP_SIZE);
        v = Op::combine(v, other);
    }
    return v;
}

template <typename T, int BLOCK_THREADS, class Op>
__device__ void reduce(const T *input, T *output, const int32_t shape_0,
                       const int32_t shape_1, const int32_t shape_2,
                       const int32_t in_stride_0, const int32_t in_stride_1,
                       const int32_t in_stride_2, const int32_t out_stride_0,
                       const int32_t out_stride_1, const int32_t out_stride_2) {

    using Acc = typename Op::acc_t;

    int64_t linear =
        (int64_t)blockIdx.x + (int64_t)blockIdx.y * (int64_t)gridDim.x;
    int64_t total = (int64_t)shape_0 * (int64_t)shape_2;
    if (linear >= total)
        return;

    int32_t y = (int32_t)(linear / shape_2);
    int32_t x = (int32_t)(linear - (int64_t)y * shape_2);

    input += (int64_t)y * in_stride_0 + (int64_t)x * in_stride_2;
    output += (int64_t)y * out_stride_0 + (int64_t)x * out_stride_2;

    Acc accu = Op::identity();
    // each thread computes reduction over BLOCK_SIZE values
    _Pragma("unroll") for (int i = threadIdx.x; i < shape_1;
                           i += BLOCK_THREADS) {
        accu = Op::combine(accu, Op::pre(input[i * in_stride_1]));
    }

    // each warp reduce the values of its 32 threads
    // (every wrap member converge to the value)
    accu = warp_reduce<Op>(accu);

    constexpr int NUM_WARPS = (BLOCK_THREADS + WARP_SIZE - 1) / WARP_SIZE;
    if constexpr (NUM_WARPS == 1) {
        if (threadIdx.x == 0) {
            *output = Op::norm(accu, shape_1);
        }
    } else {
        const int warp_id = threadIdx.x / WARP_SIZE;
        const int lane_id = threadIdx.x % WARP_SIZE;

        __shared__ Acc shared[NUM_WARPS];

        if (lane_id == 0) {
            shared[warp_id] = accu;
        }
        __syncthreads();

        if (warp_id == 0) {
            accu = (lane_id < NUM_WARPS) ? shared[lane_id] : Op::identity();
            accu = warp_reduce<Op>(accu);
            if (lane_id == 0) {
                *output = Op::norm(accu, shape_1);
            }
        }
    }
}

#define INSTANTIATE_REDUCE_1(op_name, name, T, Op, bname, block_size)          \
    extern "C" __global__ void CAT5(reduce_, op_name, _, bname, name)(         \
        const T *input, T *output, const int32_t shape_0,                      \
        const int32_t shape_1, const int32_t shape_2,                          \
        const int32_t in_stride_0, const int32_t in_stride_1,                  \
        const int32_t in_stride_2, const int32_t out_stride_0,                 \
        const int32_t out_stride_1, const int32_t out_stride_2) {              \
        reduce<T, block_size, Op<T>>(input, output, shape_0, shape_1, shape_2, \
                                     in_stride_0, in_stride_1, in_stride_2,    \
                                     out_stride_0, out_stride_1,               \
                                     out_stride_2);                            \
    }

#define INSTANTIATE_REDUCE(name, T, bname, block_size)                         \
    INSTANTIATE_REDUCE_1(max, name, T, MaxOp, bname, block_size)               \
    INSTANTIATE_REDUCE_1(min, name, T, MinOp, bname, block_size)               \
    INSTANTIATE_REDUCE_1(sum, name, T, AddOp, bname, block_size)               \
    INSTANTIATE_REDUCE_1(prod, name, T, MulOp, bname, block_size)              \
    INSTANTIATE_REDUCE_1(mean_of_squares, name, T, MeanOfSquaresOp, bname,     \
                         block_size)

extern "C" __global__ void gelu_approx_f32(const float *input, float *output,
                                           int32_t len) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < len) {
        float x = input[i];
        float output_f32 = 0.5 * x *
                           (1.0 + tanhf(SQRT_2_OVER_PI *
                                        (x + GELU_COEF_A * powf(x, (float)3))));
        output[i] = output_f32;
    }
}

extern "C" __global__ void gelu_approx_f16(const __half *input, __half *output,
                                           int32_t len) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < len) {
        float x = (float)input[i];
        float output_f32 = 0.5 * x *
                           (1.0 + tanhf(SQRT_2_OVER_PI *
                                        (x + GELU_COEF_A * powf(x, (float)3))));
        output[i] = (__half)output_f32;
    }
}

extern "C" __global__ void gelu_approx_fast_f32(const float *input,
                                                float *output, int32_t len) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < len) {
        float x = input[i];
        float output_f32 = 0.5 * x *
                           (1.0 + tanhf(SQRT_2_OVER_PI *
                                        (x + GELU_COEF_A * powf(x, (float)2))));
        output[i] = output_f32;
    }
}

extern "C" __global__ void gelu_approx_fast_f16(const __half *input,
                                                __half *output, int32_t len) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < len) {
        float x = (float)input[i];
        float output_f32 = 0.5 * x *
                           (1.0 + tanhf(SQRT_2_OVER_PI *
                                        (x + GELU_COEF_A * powf(x, (float)2))));
        output[i] = (__half)output_f32;
    }
}

extern "C" __global__ void leaky_relu_f32(const float *input, float *output,
                                          int32_t len, float alpha) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < len) {
        float x = input[i];
        output[i] = x * (x < 0 ? alpha : 1.0);
    }
}

extern "C" __global__ void leaky_relu_f16(const __half *input, __half *output,
                                          int32_t len, float alpha) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    __half alpha_f16 = (__half)alpha;
    if (i < len) {
        __half x = input[i];
        output[i] = x * (x < (__half)0.0 ? alpha_f16 : (__half)1.0);
    }
}

static __device__ __forceinline__ int
indices_to_idx_2(int x, int y, int x_strides, int y_strides) {
    return x * x_strides + y * y_strides;
}

static __device__ __forceinline__ int indices_to_idx_3(int x, int y, int z,
                                                       int x_strides,
                                                       int y_strides,
                                                       int z_strides) {
    return x * x_strides + y * y_strides + z * z_strides;
}

static __device__ __forceinline__ int
indices_to_idx_4(int x, int y, int z, int x_shape, int y_shape, int z_shape,
                 int b_shape, int x_strides, int y_strides, int z_strides,
                 int b_strides) {
    int idx = x * x_strides + y * y_strides;
    idx += (z % z_shape) * z_strides;
    z /= z_shape;
    idx += z * b_strides;
    return idx;
}

#define INSTANTIATE_APPLY_ROPE(name, T)                                        \
    extern "C" __global__ void apply_rope_nd2_##name(                          \
        const T *input, const T *cos, const T *sin, T *output,                 \
        int32_t in_shape_0, int32_t in_shape_1, int32_t in_strides_0,          \
        int32_t in_strides_1, int32_t cos_sin_strides_0,                       \
        int32_t cos_sin_strides_1, int32_t out_strides_0,                      \
        int32_t out_strides_1) {                                               \
        int thread_idx_x = blockIdx.x * blockDim.x + threadIdx.x;              \
        int thread_idx_y = blockIdx.y * blockDim.y + threadIdx.y;              \
        if (thread_idx_x >= in_shape_1 / 2 || thread_idx_y >= in_shape_0) {    \
            return;                                                            \
        }                                                                      \
        int rotated_idx_x = thread_idx_x + in_shape_1 / 2;                     \
                                                                               \
        int idx = indices_to_idx_2(thread_idx_x, thread_idx_y, in_strides_1,   \
                                   in_strides_0);                              \
        int rot_idx = indices_to_idx_2(rotated_idx_x, thread_idx_y,            \
                                       in_strides_1, in_strides_0);            \
        int out_idx = indices_to_idx_2(thread_idx_x, thread_idx_y,             \
                                       out_strides_1, out_strides_0);          \
        int out_rot_idx = indices_to_idx_2(rotated_idx_x, thread_idx_y,        \
                                           out_strides_1, out_strides_0);      \
                                                                               \
        int cos_sin_idx = indices_to_idx_2(                                    \
            thread_idx_x, thread_idx_y, cos_sin_strides_1, cos_sin_strides_0); \
        int rot_cos_sin_idx =                                                  \
            indices_to_idx_2(rotated_idx_x, thread_idx_y, cos_sin_strides_1,   \
                             cos_sin_strides_0);                               \
                                                                               \
        output[out_idx] =                                                      \
            input[idx] * cos[cos_sin_idx] - input[rot_idx] * sin[cos_sin_idx]; \
        output[out_rot_idx] = input[rot_idx] * cos[rot_cos_sin_idx] +          \
                              input[idx] * sin[rot_cos_sin_idx];               \
    }                                                                          \
                                                                               \
    extern "C" __global__ void apply_rope_nd3_##name(                          \
        const T *input, const T *cos, const T *sin, T *output,                 \
        int32_t in_shape_0, int32_t in_shape_1, int32_t in_shape_2,            \
        int32_t in_strides_0, int32_t in_strides_1, int32_t in_strides_2,      \
        int32_t cos_sin_strides_0, int32_t cos_sin_strides_1,                  \
        int32_t cos_sin_strides_2, int32_t out_strides_0,                      \
        int32_t out_strides_1, int32_t out_strides_2) {                        \
        int thread_idx_x = blockIdx.x * blockDim.x + threadIdx.x;              \
        int thread_idx_y = blockIdx.y * blockDim.y + threadIdx.y;              \
        int thread_idx_z = blockIdx.z * blockDim.z + threadIdx.z;              \
        if (thread_idx_x >= in_shape_2 / 2 || thread_idx_y >= in_shape_1 ||    \
            thread_idx_z >= in_shape_0) {                                      \
            return;                                                            \
        }                                                                      \
        int rotated_idx_x = thread_idx_x + in_shape_2 / 2;                     \
                                                                               \
        int idx = indices_to_idx_3(thread_idx_x, thread_idx_y, thread_idx_z,   \
                                   in_strides_2, in_strides_1, in_strides_0);  \
        int rot_idx =                                                          \
            indices_to_idx_3(rotated_idx_x, thread_idx_y, thread_idx_z,        \
                             in_strides_2, in_strides_1, in_strides_0);        \
        int out_idx =                                                          \
            indices_to_idx_3(thread_idx_x, thread_idx_y, thread_idx_z,         \
                             out_strides_2, out_strides_1, out_strides_0);     \
        int out_rot_idx =                                                      \
            indices_to_idx_3(rotated_idx_x, thread_idx_y, thread_idx_z,        \
                             out_strides_2, out_strides_1, out_strides_0);     \
                                                                               \
        int cos_sin_idx = indices_to_idx_3(                                    \
            thread_idx_x, thread_idx_y, thread_idx_z, cos_sin_strides_2,       \
            cos_sin_strides_1, cos_sin_strides_0);                             \
        int rot_cos_sin_idx = indices_to_idx_3(                                \
            rotated_idx_x, thread_idx_y, thread_idx_z, cos_sin_strides_2,      \
            cos_sin_strides_1, cos_sin_strides_0);                             \
                                                                               \
        output[out_idx] =                                                      \
            input[idx] * cos[cos_sin_idx] - input[rot_idx] * sin[cos_sin_idx]; \
        output[out_rot_idx] = input[rot_idx] * cos[rot_cos_sin_idx] +          \
                              input[idx] * sin[rot_cos_sin_idx];               \
    }                                                                          \
                                                                               \
    extern "C" __global__ void apply_rope_nd4_##name(                          \
        const T *input, const T *cos, const T *sin, T *output,                 \
        int32_t in_shape_0, int32_t in_shape_1, int32_t in_shape_2,            \
        int32_t in_shape_3, int32_t in_strides_0, int32_t in_strides_1,        \
        int32_t in_strides_2, int32_t in_strides_3, int32_t cos_sin_strides_0, \
        int32_t cos_sin_strides_1, int32_t cos_sin_strides_2,                  \
        int32_t cos_sin_strides_3, int32_t out_strides_0,                      \
        int32_t out_strides_1, int32_t out_strides_2, int32_t out_strides_3) { \
        int thread_idx_x = blockIdx.x * blockDim.x + threadIdx.x;              \
        int thread_idx_y = blockIdx.y * blockDim.y + threadIdx.y;              \
        int thread_idx_z = blockIdx.z * blockDim.z + threadIdx.z;              \
        if (thread_idx_x >= in_shape_3 / 2 || thread_idx_y >= in_shape_2 ||    \
            thread_idx_z >= (in_shape_1 * in_shape_0)) {                       \
            return;                                                            \
        }                                                                      \
        int rotated_idx_x = thread_idx_x + in_shape_3 / 2;                     \
                                                                               \
        int idx = indices_to_idx_4(thread_idx_x, thread_idx_y, thread_idx_z,   \
                                   in_shape_3, in_shape_2, in_shape_1,         \
                                   in_shape_0, in_strides_3, in_strides_2,     \
                                   in_strides_1, in_strides_0);                \
        int rot_idx = indices_to_idx_4(                                        \
            rotated_idx_x, thread_idx_y, thread_idx_z, in_shape_3, in_shape_2, \
            in_shape_1, in_shape_0, in_strides_3, in_strides_2, in_strides_1,  \
            in_strides_0);                                                     \
        int out_idx = indices_to_idx_4(                                        \
            thread_idx_x, thread_idx_y, thread_idx_z, in_shape_3, in_shape_2,  \
            in_shape_1, in_shape_0, out_strides_3, out_strides_2,              \
            out_strides_1, out_strides_0);                                     \
        int out_rot_idx = indices_to_idx_4(                                    \
            rotated_idx_x, thread_idx_y, thread_idx_z, in_shape_3, in_shape_2, \
            in_shape_1, in_shape_0, out_strides_3, out_strides_2,              \
            out_strides_1, out_strides_0);                                     \
                                                                               \
        int cos_sin_idx = indices_to_idx_4(                                    \
            thread_idx_x, thread_idx_y, thread_idx_z, in_shape_3, in_shape_2,  \
            in_shape_1, in_shape_0, cos_sin_strides_3, cos_sin_strides_2,      \
            cos_sin_strides_1, cos_sin_strides_0);                             \
        int rot_cos_sin_idx = indices_to_idx_4(                                \
            rotated_idx_x, thread_idx_y, thread_idx_z, in_shape_3, in_shape_2, \
            in_shape_1, in_shape_0, cos_sin_strides_3, cos_sin_strides_2,      \
            cos_sin_strides_1, cos_sin_strides_0);                             \
                                                                               \
        output[out_idx] =                                                      \
            input[idx] * cos[cos_sin_idx] - input[rot_idx] * sin[cos_sin_idx]; \
        output[out_rot_idx] = input[rot_idx] * cos[rot_cos_sin_idx] +          \
                              input[idx] * sin[rot_cos_sin_idx];               \
    }

#define INSTANTIATE_SOFTMAX(name, T, bname, block_size)                        \
    extern "C" __global__ void softmax_##bname##name(                          \
        const T *x, T *dst, const int32_t shape_0, const int32_t shape_1,      \
        const int32_t shape_2, const int32_t stride_0, const int32_t stride_1, \
        const int32_t stride_2) {                                              \
        int offset = (blockIdx.x % shape_2) * stride_2 +                       \
                     (blockIdx.x / shape_2) * stride_0;                        \
        x += offset;                                                           \
        dst += offset;                                                         \
                                                                               \
        const int warp_id = threadIdx.x / WARP_SIZE;                           \
        const int lane_id = threadIdx.x % WARP_SIZE;                           \
                                                                               \
        float max_val = -CUDART_INF_F;                                         \
        _Pragma("unroll") for (int i = threadIdx.x; i < shape_1;               \
                               i += blockDim.x) {                              \
            max_val = max(max_val, x[i * stride_1]);                           \
        }                                                                      \
                                                                               \
        max_val = warp_reduce_max(max_val);                                    \
        if (block_size > WARP_SIZE) {                                          \
            __shared__ float s_max[32];                                        \
            if (warp_id == 0) {                                                \
                s_max[lane_id] = -CUDART_INF_F;                                \
            }                                                                  \
            __syncthreads();                                                   \
                                                                               \
            if (lane_id == 0) {                                                \
                s_max[warp_id] = max_val;                                      \
            }                                                                  \
            __syncthreads();                                                   \
                                                                               \
            max_val = s_max[lane_id];                                          \
            max_val = warp_reduce_max(max_val);                                \
        }                                                                      \
                                                                               \
        float tmp = 0.0f;                                                      \
        for (int i = threadIdx.x; i < shape_1; i += blockDim.x) {              \
            float el = x[i * stride_1];                                        \
            const float val = expf(el - max_val);                              \
            tmp += val;                                                        \
            dst[i * stride_1] = val;                                           \
        }                                                                      \
                                                                               \
        tmp = warp_reduce_sum(tmp);                                            \
        if (block_size > WARP_SIZE) {                                          \
            __shared__ float s_sum[32];                                        \
            if (warp_id == 0) {                                                \
                s_sum[lane_id] = 0.0f;                                         \
            }                                                                  \
            __syncthreads();                                                   \
                                                                               \
            if (lane_id == 0) {                                                \
                s_sum[warp_id] = tmp;                                          \
            }                                                                  \
            __syncthreads();                                                   \
                                                                               \
            tmp = s_sum[lane_id];                                              \
            tmp = warp_reduce_sum(tmp);                                        \
        }                                                                      \
                                                                               \
        const float inv_sum = 1.0f / tmp;                                      \
                                                                               \
        for (int i = threadIdx.x; i < shape_1; i += blockDim.x) {              \
            dst[i * stride_1] *= inv_sum;                                      \
        }                                                                      \
    }

/// basic 4D f32-masked softmax, doing softmax on last axis (*_3)
// if input is [ b, h, rows, cols ], softmax is performed alongside the col
// dimension
// supports mask broadcast by mask_stride tuning
// grid dim is (rows, h, b)
// block_size is WARP_SIZE * 2^n
// shared_mem: (32 + next_poswer_of_2(cols) * sizeof(T)
template <typename T, int BLOCK_SIZE>
__device__ void scaled_masked_softmax(
    const T *x, const T *mask, const float scale, T *dst, const int32_t shape_0,
    const int32_t shape_1, const int32_t shape_2, const int32_t shape_3,
    const int32_t shape_4, const int32_t stride_0, const int32_t stride_1,
    const int32_t stride_2, const int32_t stride_3, const int32_t stride_4,
    const int32_t mask_stride_0, const int32_t mask_stride_1,
    const int32_t mask_stride_2, const int32_t mask_stride_3,
    const int32_t mask_stride_4, const int32_t out_stride_0,
    const int32_t out_stride_1, const int32_t out_stride_2,
    const int32_t out_stride_3, const int32_t out_stride_4) {
    int32_t z0 = blockIdx.z / shape_1;
    int32_t z1 = blockIdx.z % shape_1;
    x += blockIdx.x * stride_3 + blockIdx.y * stride_2 + z1 * stride_1 +
         z0 * stride_0;
    mask += mask ? blockIdx.x * mask_stride_3 + blockIdx.y * mask_stride_2 +
                       z1 * mask_stride_1 + z0 * mask_stride_0
                 : 0;
    dst += blockIdx.x * out_stride_3 + blockIdx.y * out_stride_2 +
           z1 * out_stride_1 + z0 * out_stride_0;

    const int block_size = BLOCK_SIZE == 0 ? blockDim.x : BLOCK_SIZE;

    const int warp_id = threadIdx.x / WARP_SIZE;
    const int lane_id = threadIdx.x % WARP_SIZE;

    extern __shared__ float data_soft_max_f32[];
    float *buf_iw = data_soft_max_f32;
    float *vals = buf_iw + WARP_SIZE;

    float max_val = -CUDART_INF_F;
    _Pragma("unroll") for (int col0 = 0; col0 < shape_4; col0 += block_size) {
        const int col = col0 + threadIdx.x;
        if (col >= shape_4) {
            break;
        }

        const float m = mask ? (float)mask[col * mask_stride_4] : 0.0f;
        const float val = ((float)x[col * stride_4]) * scale + m;
        vals[col] = val;
        max_val = max(max_val, val);
    }

    max_val = warp_reduce_max(max_val);
    if (block_size > WARP_SIZE) {
        if (warp_id == 0) {
            buf_iw[lane_id] = -CUDART_INF_F;
        }
        __syncthreads();

        if (lane_id == 0) {
            buf_iw[warp_id] = max_val;
        }
        __syncthreads();

        max_val = buf_iw[lane_id];
        max_val = warp_reduce_max(max_val);
    }

    float tmp = 0.0f;
    _Pragma("unroll") for (int col0 = 0; col0 < shape_4; col0 += block_size) {
        const int col = col0 + threadIdx.x;
        if (col >= shape_4) {
            break;
        }

        const float val = expf(vals[col] - max_val);
        tmp += val;
        vals[col] = val;
    }

    tmp = warp_reduce_sum(tmp);
    if (block_size > WARP_SIZE) {
        __syncthreads();
        if (warp_id == 0) {
            buf_iw[lane_id] = 0.0f;
        }
        __syncthreads();

        if (lane_id == 0) {
            buf_iw[warp_id] = tmp;
        }
        __syncthreads();

        tmp = buf_iw[lane_id];
        tmp = warp_reduce_sum(tmp);
    }

    const float inv_sum = 1.0f / tmp;

    _Pragma("unroll") for (int col0 = 0; col0 < shape_4; col0 += block_size) {
        const int col = col0 + threadIdx.x;
        if (col >= shape_4) {
            return;
        }
        dst[col * out_stride_4] = vals[col] * inv_sum;
    }
}

#define INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, bname, block_size_template) \
    extern "C" __global__ void scaled_masked_softmax_##bname##name(            \
        const T *x, const T *mask, const float scale, T *dst,                  \
        const int32_t shape_0, const int32_t shape_1, const int32_t shape_2,   \
        const int32_t shape_3, const int32_t shape_4, const int32_t stride_0,  \
        const int32_t stride_1, const int32_t stride_2,                        \
        const int32_t stride_3, const int32_t stride_4,                        \
        const int32_t mask_stride_0, const int32_t mask_stride_1,              \
        const int32_t mask_stride_2, const int32_t mask_stride_3,              \
        const int32_t mask_stride_4, const int32_t out_stride_0,               \
        const int32_t out_stride_1, const int32_t out_stride_2,                \
        const int32_t out_stride_3, const int32_t out_stride_4) {              \
        scaled_masked_softmax<T, block_size_template>(                         \
            x, mask, scale, dst, shape_0, shape_1, shape_2, shape_3, shape_4,  \
            stride_0, stride_1, stride_2, stride_3, stride_4, mask_stride_0,   \
            mask_stride_1, mask_stride_2, mask_stride_3, mask_stride_4,        \
            out_stride_0, out_stride_1, out_stride_2, out_stride_3,            \
            out_stride_4);                                                     \
    }

#define INSTANTIATE_RMS_NORM(name, T, bname, block_size)                       \
    extern "C" __global__ void rms_norm_##bname##name(                         \
        const T *x, T *dst, const int32_t shape_0, const int32_t shape_1,      \
        const int32_t shape_2, const int32_t strides_0,                        \
        const int32_t strides_1, const int32_t strides_2, const float eps) {   \
        int base_idx = (blockIdx.x % shape_2) * strides_2 +                    \
                       (blockIdx.x / shape_2) * strides_0;                     \
                                                                               \
        float tmp = 0.0f;                                                      \
                                                                               \
        for (int i = threadIdx.x; i < shape_1; i += blockDim.x) {              \
            const float xi = x[base_idx + i * strides_1];                      \
            tmp += xi * xi;                                                    \
        }                                                                      \
                                                                               \
        tmp = warp_reduce_sum(tmp);                                            \
        if constexpr (block_size > WARP_SIZE) {                                \
            __shared__ float s_sum[32];                                        \
            const int warp_id = threadIdx.x / WARP_SIZE;                       \
            const int lane_id = threadIdx.x % WARP_SIZE;                       \
            if (lane_id == 0) {                                                \
                s_sum[warp_id] = tmp;                                          \
            }                                                                  \
            __syncthreads();                                                   \
            tmp = s_sum[lane_id];                                              \
            tmp = warp_reduce_sum(tmp);                                        \
        }                                                                      \
                                                                               \
        const float mean = tmp / shape_1;                                      \
        const float scale = rsqrtf(mean + eps);                                \
                                                                               \
        for (int i = threadIdx.x; i < shape_1; i += blockDim.x) {              \
            int idx = base_idx + i * strides_1;                                \
            dst[idx] = scale * (float)x[idx];                                  \
        }                                                                      \
    }

INSTANTIATE_APPLY_ROPE(f32, float)
INSTANTIATE_APPLY_ROPE(f16, __half)

INSTANTIATE_RMS_NORM(f32, float, small_, 32)
INSTANTIATE_RMS_NORM(f32, float, , 1024)
INSTANTIATE_RMS_NORM(f16, __half, small_, 32)
INSTANTIATE_RMS_NORM(f16, __half, , 1024)

INSTANTIATE_SOFTMAX(f32, float, small_, 32)
INSTANTIATE_SOFTMAX(f32, float, , 1024)
INSTANTIATE_SOFTMAX(f16, __half, small_, 32)
INSTANTIATE_SOFTMAX(f16, __half, , 1024)

#define INSTANTIATE_SCALED_MASKED_SOFTMAX_FOR_T(name, T)                       \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 32_, 32)                        \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 64_, 64)                        \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 128_, 126)                      \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 256_, 256)                      \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 512_, 512)                      \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 1024_, 1024)                    \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 2048_, 1024)                    \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 4096_, 1024)                    \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 8192_, 1024)                    \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 16384_, 1024)                   \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 32768_, 1024)                   \
    INSTANTIATE_SCALED_MASKED_SOFTMAX(name, T, 0_, 0)

INSTANTIATE_SCALED_MASKED_SOFTMAX_FOR_T(f32, float)
INSTANTIATE_SCALED_MASKED_SOFTMAX_FOR_T(f16, __half)

INSTANTIATE_REDUCE(f32, float, small_, 32)
INSTANTIATE_REDUCE(f32, float, , 1024)
INSTANTIATE_REDUCE(f16, __half, small_, 32)
INSTANTIATE_REDUCE(f16, __half, , 1024)
INSTANTIATE_REDUCE_1(any, bool, char, BoolOrOp, small_, 32)
INSTANTIATE_REDUCE_1(any, bool, char, BoolOrOp, , 1024)
INSTANTIATE_REDUCE_1(all, bool, char, BoolAndOp, small_, 32)
INSTANTIATE_REDUCE_1(all, bool, char, BoolAndOp, , 1024)
INSTANTIATE_REDUCE_1(sum, i64, int64_t, AddOp, small_, 32)
INSTANTIATE_REDUCE_1(sum, i64, int64_t, AddOp, , 1024)
INSTANTIATE_REDUCE_1(prod, i64, int64_t, MulOp, small_, 32)
INSTANTIATE_REDUCE_1(prod, i64, int64_t, MulOp, , 1024)


================================================
FILE: cuda/src/kernels/cu/quantize.cu
================================================
#include "common.cuh"

static __device__ __forceinline__ void
compute_mmq_q81_block(float4 xi, int64_t ib, int64_t iqs, block_q8_1_mmq *y) {
  constexpr int vals_per_scale = 32;
  constexpr int vals_per_sum = 32;

  float amax = fabsf(xi.x);
  amax = fmaxf(amax, fabsf(xi.y));
  amax = fmaxf(amax, fabsf(xi.z));
  amax = fmaxf(amax, fabsf(xi.w));

// Exchange max. abs. value between vals_per_scale/4 threads.
#pragma unroll
  for (int offset = vals_per_scale / 8; offset > 0; offset >>= 1) {
    amax = fmaxf(amax, __shfl_xor_sync(0xFFFFFFFF, amax, offset, WARP_SIZE));
  }

  float sum;
  sum = xi.x + xi.y + xi.z + xi.w;

// Calculate sums across vals_per_sum/4 threads.
#pragma unroll
  for (int offset = vals_per_sum / 8; offset > 0; offset >>= 1) {
    sum += __shfl_xor_sync(0xFFFFFFFF, sum, offset, WARP_SIZE);
  }

  float d_inv = (amax > 0.f) ? 127.f / amax : 0.f;
  char4 q;
  q.x = roundf(xi.x * d_inv);
  q.y = roundf(xi.y * d_inv);
  q.z = roundf(xi.z * d_inv);
  q.w = roundf(xi.w * d_inv);

  // Write back 4 int8 values as a single 32 bit value for better memroy
  // bandwidth:
  char4 *yqs4 = (char4 *)y[ib].qs;
  yqs4[iqs / 4] = q;

  if (iqs % 32 != 0) {
    return;
  }

  const float d = (d_inv > 0.0f) ? (1.0f / d_inv) : 0.0f;
  y[ib].ds4[iqs / 32] = make_half2(d, sum);
}

extern "C" __global__ void quantize_mmq_q8_1_fast_nd2(
    const float *__restrict__ x, void *__restrict__ vy, const int32_t k,
    const int32_t in_strides_0, const int32_t in_strides_1,
    const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;

  const int64_t i00 = i0;
  const int64_t i01 = i1;

  const float4 *x4 = (const float4 *)x;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * gridDim.x +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  // Load 4 floats per thread and calculate max. abs. value between them:
  const float4 xi = i0 < k ? x4[(i01 * in_strides_0 + i00) / 4]
                           : make_float4(0.0f, 0.0f, 0.0f, 0.0f);

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void quantize_mmq_q8_1_fast_nd3(
    const float *__restrict__ x, void *__restrict__ vy, const int32_t k,
    const int32_t in_strides_0, const int32_t in_strides_1,
    const int32_t in_strides_2,
    const int32_t out_shape_1, const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;
  const int64_t i2 = blockIdx.z;

  const int64_t i00 = i0;
  const int64_t i01 = i1;
  const int64_t i02 = i2;

  const float4 *x4 = (const float4 *)x;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * out_shape_1 +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  // Load 4 floats per thread and calculate max. abs. value between them:
  const float4 xi = i0 < k ? x4[(i02 * in_strides_0 +
                                 i01 * in_strides_1 + i00) /
                                4]
                           : make_float4(0.0f, 0.0f, 0.0f, 0.0f);

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void quantize_mmq_q8_1_fast_nd4(
    const float *__restrict__ x, void *__restrict__ vy, const int32_t k,
    const int32_t in_strides_0, const int32_t in_strides_1,
    const int32_t in_strides_2, const int32_t in_strides_3,
    const int32_t out_shape_1, const int32_t out_shape_2, const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;
  const int64_t i2 = blockIdx.z % out_shape_1;
  const int64_t i3 = blockIdx.z / out_shape_1;

  const int64_t i00 = i0;
  const int64_t i01 = i1;
  const int64_t i02 = i2;
  const int64_t i03 = i3;

  const float4 *x4 = (const float4 *)x;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * out_shape_2 +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  // Load 4 floats per thread and calculate max. abs. value between them:
  const float4 xi = i0 < k ? x4[(i03 * in_strides_0 + i02 * in_strides_1 +
                                 i01 * in_strides_2 + i00) /
                                4]
                           : make_float4(0.0f, 0.0f, 0.0f, 0.0f);

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void quantize_mmq_q8_1_fast_nd5(
    const float *__restrict__ x, void *__restrict__ vy, const int32_t k,
    const int32_t in_strides_0, const int32_t in_strides_1,
    const int32_t in_strides_2, const int32_t in_strides_3, const int32_t in_strides_4,
    const int32_t out_shape_1, const int32_t out_shape_2, const int32_t out_shape_3, const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;
  const int64_t i2 = blockIdx.z % out_shape_2;
  const int64_t i3 = (blockIdx.z / out_shape_2) % out_shape_1;
  const int64_t i4 = blockIdx.z / (out_shape_2 * out_shape_1);

  const int64_t i00 = i0;
  const int64_t i01 = i1;
  const int64_t i02 = i2;
  const int64_t i03 = i3;
  const int64_t i04 = i4;

  const float4 *x4 = (const float4 *)x;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * out_shape_3 +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  // Load 4 floats per thread and calculate max. abs. value between them:
  const float4 xi = i0 < k ? x4[(i04 * in_strides_4 + i03 * in_strides_1 +
                                 i02 * in_strides_2 + i01 * in_strides_3 + i00) /
                                4]
                           : make_float4(0.0f, 0.0f, 0.0f, 0.0f);

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void
quantize_mmq_q8_1_nd2(const float *__restrict__ x, void *__restrict__ vy,
                      const int64_t k, const int64_t in_strides_0,
                      const int64_t in_strides_1, const int64_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;

  float4 xi = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
  const int64_t base = i1 * in_strides_0 + i0 * in_strides_1;

  xi.x = (i0 + 0 < k) ? x[base + 0 * in_strides_1] : 0.0f;
  xi.y = (i0 + 1 < k) ? x[base + 1 * in_strides_1] : 0.0f;
  xi.z = (i0 + 2 < k) ? x[base + 2 * in_strides_1] : 0.0f;
  xi.w = (i0 + 3 < k) ? x[base + 3 * in_strides_1] : 0.0f;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * gridDim.x +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void
quantize_mmq_q8_1_nd3(const float *__restrict__ x, void *__restrict__ vy,
                      const int32_t k, const int32_t in_strides_0,
                      const int32_t in_strides_1, const int32_t in_strides_2,
                      const int32_t out_shape_1, const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;
  const int64_t i2 = blockIdx.z;

  float4 xi = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
  const int64_t base =
      i2 * in_strides_0 + i1 * in_strides_1 + i0 * in_strides_2;

  xi.x = (i0 + 0 < k) ? x[base + 0 * in_strides_2] : 0.0f;
  xi.y = (i0 + 1 < k) ? x[base + 1 * in_strides_2] : 0.0f;
  xi.z = (i0 + 2 < k) ? x[base + 2 * in_strides_2] : 0.0f;
  xi.w = (i0 + 3 < k) ? x[base + 3 * in_strides_2] : 0.0f;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * out_shape_1 +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void
quantize_mmq_q8_1_nd4(const float *__restrict__ x, void *__restrict__ vy,
                      const int32_t k, const int32_t in_strides_0,
                      const int32_t in_strides_1, const int32_t in_strides_2,
                      const int32_t in_strides_3, const int32_t out_shape_1,
                      const int32_t out_shape_2, const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;
  const int64_t i2 = blockIdx.z % out_shape_1;
  const int64_t i3 = blockIdx.z / out_shape_1;

  float4 xi = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
  const int64_t base = i3 * in_strides_0 + i2 * in_strides_1 +
                       i1 * in_strides_2 + i0 * in_strides_3;

  xi.x = (i0 + 0 < k) ? x[base + 0 * in_strides_3] : 0.0f;
  xi.y = (i0 + 1 < k) ? x[base + 1 * in_strides_3] : 0.0f;
  xi.z = (i0 + 2 < k) ? x[base + 2 * in_strides_3] : 0.0f;
  xi.w = (i0 + 3 < k) ? x[base + 3 * in_strides_3] : 0.0f;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * out_shape_2 +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

extern "C" __global__ void
quantize_mmq_q8_1_nd5(const float *__restrict__ x, void *__restrict__ vy,
                      const int32_t k, const int32_t in_strides_0,
                      const int32_t in_strides_1, const int32_t in_strides_2,
                      const int32_t in_strides_3, const int32_t in_strides_4,
                      const int32_t out_shape_1, const int32_t out_shape_2,
                      const int32_t out_shape_3, const int32_t padded_k) {

  const int64_t i0 = ((int64_t)blockDim.x * blockIdx.y + threadIdx.x) * 4;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.x;
  const int64_t i2 = blockIdx.z % out_shape_2;
  const int64_t i3 = (blockIdx.z / out_shape_2) % out_shape_1;
  const int64_t i4 = blockIdx.z / (out_shape_2 * out_shape_1);

  float4 xi = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
  const int64_t base = i4 * in_strides_0 + i3 * in_strides_1 +
                       i2 * in_strides_2 + i1 * in_strides_3 +
                       i0 * in_strides_4;

  xi.x = (i0 + 0 < k) ? x[base + 0 * in_strides_4] : 0.0f;
  xi.y = (i0 + 1 < k) ? x[base + 1 * in_strides_4] : 0.0f;
  xi.z = (i0 + 2 < k) ? x[base + 2 * in_strides_4] : 0.0f;
  xi.w = (i0 + 3 < k) ? x[base + 3 * in_strides_4] : 0.0f;

  const int64_t ib0 =
      blockIdx.z * ((int64_t)gridDim.x * gridDim.y * blockDim.x /
                    QK8_1); // first block of channel
  const int64_t ib = ib0 + (i0 / (4 * QK8_1)) * out_shape_3 +
                     blockIdx.x;        // block index in channel
  const int64_t iqs = i0 % (4 * QK8_1); // quant index in block

  compute_mmq_q81_block(xi, ib, iqs, (block_q8_1_mmq *)vy);
}

static __device__ __forceinline__ void
compute_q81_block(float xi, int64_t i_cont, block_q8_1 **y_ptr) {
  block_q8_1 *y = *y_ptr;
  const int64_t ib = i_cont / QK8_1;  // block index
  const int64_t iqs = i_cont % QK8_1; // quant index
  float amax = fabsf(xi);
  float sum = xi;

  amax = warp_reduce_max(amax);
  sum = warp_reduce_sum(sum);

  const float d = amax / 127;
  const int8_t q = amax == 0.0f ? 0 : roundf(xi / d);

  y[ib].qs[iqs] = q;

  if (iqs > 0) {
    return;
  }

  reinterpret_cast<half &>(y[ib].ds.x) = d;
  reinterpret_cast<half &>(y[ib].ds.y) = sum;
}

extern "C" __global__ void
quantize_q8_1_nd2(const float *__restrict__ x, void *__restrict__ vy,
                  const int32_t k, const int32_t in_strides_0,
                  const int32_t in_strides_1, const int32_t padded_k) {
  const int64_t i0 = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.y;

  const int64_t &i00 = i0;
  const int64_t &i01 = i1;

  const int64_t i_cont = i1 * padded_k + i0;

  const float xi = i0 < k ? x[i01 * in_strides_0 + i00 * in_strides_1] : 0.0f;

  compute_q81_block(xi, i_cont, (block_q8_1 **)&vy);
}

extern "C" __global__ void
quantize_q8_1_nd3(const float *__restrict__ x, void *__restrict__ vy,
                  const int32_t k, const int32_t in_strides_0,
                  const int32_t in_strides_1, const int32_t in_strides_2,
                  const int32_t out_shape_1, const int32_t padded_k) {
  const int64_t i0 = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.y;
  const int64_t i2 = blockIdx.z;

  const int64_t &i00 = i0;
  const int64_t &i01 = i1;
  const int64_t &i02 = i2;

  const int64_t i_cont = (i2 * out_shape_1 + i1) * padded_k + i0;

  const float xi =
      i0 < k ? x[i02 * in_strides_0 + i01 * in_strides_1 + i00 * in_strides_2]
             : 0.0f;

  compute_q81_block(xi, i_cont, (block_q8_1 **)&vy);
}

extern "C" __global__ void
quantize_q8_1_nd4(const float *__restrict__ x, void *__restrict__ vy,
                  const int32_t k, const int32_t in_strides_0,
                  const int32_t in_strides_1, const int32_t in_strides_2,
                  const int32_t in_strides_3, const int32_t out_shape_1,
                  const int32_t out_shape_2, const int32_t padded_k) {
  const int64_t i0 = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.y;
  const int64_t i2 = blockIdx.z % out_shape_1;
  const int64_t i3 = blockIdx.z / out_shape_1;

  const int64_t &i00 = i0;
  const int64_t &i01 = i1;
  const int64_t &i02 = i2;
  const int64_t &i03 = i3;

  const int64_t i_cont =
      ((i3 * out_shape_1 + i2) * out_shape_2 + i1) * padded_k + i0;

  const float xi = i0 < k ? x[i03 * in_strides_0 + i02 * in_strides_1 +
                              i01 * in_strides_2 + i00 * in_strides_3]
                          : 0.0f;

  compute_q81_block(xi, i_cont, (block_q8_1 **)&vy);
}

extern "C" __global__ void
quantize_q8_1_nd5(const float *__restrict__ x, void *__restrict__ vy,
                  const int32_t k, const int32_t in_strides_4,
                  const int32_t in_strides_3, const int32_t in_strides_2,
                  const int32_t in_strides_1, const int32_t in_strides_0,
                  const int32_t out_shape_1, const int32_t out_shape_2,
                  const int32_t out_shape_3, const int32_t padded_k) {
  const int64_t i0 = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;

  if (i0 >= padded_k) {
    return;
  }

  const int64_t i1 = blockIdx.y;
  const int64_t i2 = blockIdx.z % out_shape_2;
  const int64_t i3 = (blockIdx.z / out_shape_2) % out_shape_1;
  const int64_t i4 = blockIdx.z / (out_shape_2 * out_shape_1);

  const int64_t &i00 = i0;
  const int64_t &i01 = i1;
  const int64_t &i02 = i2;
  const int64_t &i03 = i3;
  const int64_t &i04 = i4;

  const int64_t i_cont =
      (((i4 * out_shape_1 + i3) * out_shape_2 + i2) * out_shape_3 + i1) *
          padded_k +
      i0;

  const float xi =
      i0 < k ? x[i04 * in_strides_0 + i03 * in_strides_1 + i02 * in_strides_2 +
                 i01 * in_strides_3 + i00 * in_strides_4]
             : 0.0f;

  compute_q81_block(xi, i_cont, (block_q8_1 **)&vy);
}

================================================
FILE: cuda/src/kernels/element_wise.rs
================================================
use cudarc::driver::{LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_core::ops::element_wise::ElementWiseMiniOp;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::*;

const ALL_OP_NAMES: &[&str] = &[
    "neg",
    "abs",
    "square",
    "sqrt",
    "rsqrt",
    "recip",
    "ceil",
    "floor",
    "round",
    "roundhalftoeven",
    "exp",
    "sigmoid",
    "sin",
    "sinh",
    "asin",
    "asinh",
    "cos",
    "cosh",
    "acos",
    "acosh",
    "tan",
    "tanh",
    "atan",
    "atanh",
    "erf",
    "ln",
    "silu",
    "sign",
    "hardswish",
    "bitnot",
];

pub fn all_functions() -> Vec<String> {
    ALL_OP_NAMES
        .iter()
        .flat_map(|kname| {
            DeviceTensor::SUPPORTED_DT.into_iter().flat_map(move |dt| {
                let tname = DeviceTensor::tname(dt).ok()?;
                Some(format!("element_wise_{kname}_{tname}"))
            })
        })
        .collect()
}

pub fn is_supported(mini_op: &dyn ElementWiseMiniOp, dt: DatumType) -> bool {
    let name = mini_op.name().to_lowercase();
    ALL_OP_NAMES.contains(&name.as_str())
        && if name == "bitnot" {
            dt.is_integer() || dt.is::<bool>()
        } else {
            matches!(dt, DatumType::F32 | DatumType::F16)
        }
}

pub fn dispatch_eval(
    stream: &TractCudaStream,
    mini_op: &dyn ElementWiseMiniOp,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    ensure!(output.shape() == input.shape());
    ensure!(output.datum_type() == input.datum_type());

    let op_name = mini_op.name().to_lowercase();
    let tname = DeviceTensor::tname(input.datum_type())?;
    let kname = format!("element_wise_{op_name}_{tname}");

    let func = cuda_context().load_pipeline(LibraryName::ElementWise, kname)?;

    let len = input.len();

    let i_view = get_cuda_view(input);
    let o_view = get_cuda_view(output);

    let cfg = LaunchConfig::for_num_elems(len as _);
    let mut launch_args = TractLaunchArgs::new(stream, &func);
    launch_args.push_view(&i_view);
    launch_args.push_view(&o_view);
    launch_args.push_i32(len);

    launch_args.launch(cfg)
}

pub fn cuda_element_wise_dispatch(
    mini_op: &dyn ElementWiseMiniOp,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| dispatch_eval(stream, mini_op, input, output))
}

pub fn cuda_element_wise_op(
    mini_op: Box<dyn ElementWiseMiniOp>,
) -> tract_gpu::ops::element_wise::GpuElementWise {
    tract_gpu::ops::element_wise::GpuElementWise::new(mini_op, "Cuda", cuda_element_wise_dispatch)
}

// Generic element-wise fallback — checked after LeakyRelu, GeluApproximate.
crate::register_cuda_op!(tract_core::ops::element_wise::ElementWiseOp, |source, node, op| {
    rule_if!(is_supported(&*op.0, source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(cuda_element_wise_op(op.0.clone()))))
});

#[cfg(test)]
mod tests {
    use super::*;
    use crate::with_cuda_stream;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use tract_gpu::tensor::IntoDevice;

    fn test_case<F>(mini_op: &dyn ElementWiseMiniOp, shape: &[usize]) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();
            let input = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v / len as f32).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let output =
                unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
            dispatch_eval(stream, mini_op, &input, &output)?;
            stream.synchronize()?;

            let out = output.to_host()?.into_tensor();
            assert_eq!(out.shape(), shape);
            Ok(())
        })
    }

    use tract_core::ops::math;
    use tract_core::ops::nn;

    #[test]
    fn test_element_wise_exp() -> TractResult<()> {
        test_case::<f32>(&math::Exp {}, &[4, 4])?;
        test_case::<f16>(&math::Exp {}, &[4, 4])?;
        Ok(())
    }

    #[test]
    fn test_element_wise_sigmoid() -> TractResult<()> {
        test_case::<f32>(&nn::Sigmoid {}, &[4, 4])?;
        test_case::<f16>(&nn::Sigmoid {}, &[4, 4])?;
        Ok(())
    }

    #[test]
    fn test_element_wise_abs() -> TractResult<()> {
        test_case::<f32>(&math::Abs {}, &[4, 4])?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/flash_attn.rs
================================================
use cudarc::driver::sys::CUfunction_attribute;
use cudarc::driver::{CudaFunction, LaunchArgs, LaunchConfig, PushKernelArg};
use num_traits::One;
use std::fmt;
use tract_core::internal::*;
use tract_core::tract_data::itertools::Itertools;
use tract_gpu::tensor::{DeviceTensor, IntoDevice};

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::utils::compute_broadcast_strides;
use crate::kernels::{LibraryName, WARP_SIZE, get_cuda_view, launch_args};

#[derive(Debug, Clone)]
pub struct CudaFlashAttn;

impl fmt::Display for CudaFlashAttn {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

impl CudaFlashAttn {
    pub fn name(&self) -> Cow<'_, str> {
        format!("{self}").into()
    }

    pub fn output_shape<D: DimLike + One>(
        &self,
        q: &[D],
        k: &[D],
        v: &[D],
    ) -> TractResult<TVec<D>> {
        ensure!(q.len() == 4, "Q rank must be 4 (got {})", q.len());
        ensure!(
            k.len() == q.len() && v.len() == q.len(),
            "K and V must have the same rank as Q (Q={}, K={}, V={})",
            q.len(),
            k.len(),
            v.len()
        );

        match (q, k, v) {
            ([b, qh, s, _], [_, kh, _, _], [_, vh, _, d]) => {
                let (qh_i, kh_i, vh_i) = (qh.to_i64()?, kh.to_i64()?, vh.to_i64()?);
                ensure!(kh_i == vh_i, "K and V heads mismatch (K={}, V={})", kh_i, vh_i);
                ensure!(kh_i > 0, "K/V heads must be > 0 (got {kh_i})");
                ensure!(
                    qh_i % kh_i == 0,
                    "Q heads ({qh_i}) must be a multiple of K/V heads ({kh_i})"
                );
                Ok(tvec![b.clone(), qh.clone(), s.clone(), d.clone()])
            }
            _ => bail!("Inconsistent shapes: expected [B,H,S,D] for Q/K/V."),
        }
    }

    #[allow(clippy::too_many_arguments)]
    pub fn eval(
        &self,
        stream: &TractCudaStream,
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
        mask: Option<&DeviceTensor>,
        scale: f32,
        is_causal: bool,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe {
            DeviceTensor::uninitialized_dt(
                q.datum_type(),
                &self.output_shape(q.shape(), k.shape(), v.shape())?,
            )?
        };

        self.dispatch_eval(stream, q, k, v, mask, scale, &output, is_causal)?;
        stream.synchronize()?;
        Ok(output)
    }

    #[allow(clippy::too_many_arguments)]
    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
        mask: Option<&DeviceTensor>,
        scale: f32,
        out: &DeviceTensor,
        is_causal: bool,
    ) -> TractResult<()> {
        ensure!(q.datum_type() == DatumType::F16 && q.datum_type() == out.datum_type());
        ensure!(k.datum_type() == DatumType::F16 && k.datum_type() == v.datum_type());

        ensure!(out.shape() == self.output_shape(q.shape(), k.shape(), v.shape())?.as_slice());
        ensure!(!is_causal || mask.is_none());
        ensure!(mask.is_none_or(|m| m.datum_type() == DatumType::F16));

        let ctxt = cuda_context();
        let q_shape = q.shape();

        let b = q_shape[0];
        let n_qh = q_shape[1];
        let len_q = q_shape[2];
        let len_kv = k.shape()[2];
        let d = q_shape[3];

        ensure!(n_qh % k.shape()[1] == 0);
        ensure!(k.shape()[0] == b);

        let head_ratio = n_qh / k.shape()[1];
        let block_q = 64;
        let block_kv = 32;

        let n_warps = 4;

        let num_full_q_blocks = len_q / block_q;
        let tb_size = n_warps * WARP_SIZE;
        let smem_size = block_q.max(block_kv * 3) * d * size_of::<f16>();

        let mask_mode = if is_causal {
            "causal"
        } else if mask.is_some() {
            "mask"
        } else {
            "nomask"
        };

        let null_ptr = stream.null()?;

        let q_view = get_cuda_view(q);
        let k_view = get_cuda_view(k);
        let v_view = get_cuda_view(v);
        let m_view = mask.map(get_cuda_view).unwrap_or_else(|| null_ptr.as_view());
        let o_view = get_cuda_view(out);

        let mask_strides = if let Some(m) = mask {
            let strides = compute_broadcast_strides(m.shape(), m.strides())?;
            (strides[0], strides[1])
        } else {
            (0, 0)
        };

        let kernel_launcher = |suffix: &str, num_q_blocks: usize| -> TractResult<()> {
            let func = ctxt.load_pipeline(
                LibraryName::FlashAttn,
                format!("attention_{suffix}{block_q}_{block_kv}_{d}_{mask_mode}"),
            )?;

            func.set_attribute(
                CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
                smem_size as _,
            )?;

            let mut launch_args = TractLaunchArgs::new(stream, &func);
            launch_args.push_view(&q_view);
            launch_args.push_view(&k_view);
            launch_args.push_view(&v_view);
            launch_args.push_view(&m_view);
            launch_args.push_view(&o_view);
            launch_args.push_i32(b);
            launch_args.push_i32(n_qh);
            launch_args.push_i32(head_ratio);
            launch_args.push_i32(len_q);
            launch_args.push_i32(k.shape()[2]);
            launch_args.push_i32(mask_strides.0);
            launch_args.push_i32(mask_strides.1);
            launch_args.push::<f32>(scale);

            let cfg = LaunchConfig {
                grid_dim: (num_q_blocks as _, n_qh as _, b as _),
                block_dim: (tb_size as _, 1, 1),
                shared_mem_bytes: smem_size as _,
            };

            launch_args.launch(cfg)
        };

        if num_full_q_blocks > 0 {
            kernel_launcher("fullq_", num_full_q_blocks)?;
        }

        if !len_q.is_multiple_of(block_q) {
            kernel_launcher("tailq_", 1)?;
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use num_traits::Float;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::sdpa::Sdpa;

    use super::*;

    fn run_test_case(
        batch: usize,
        q_heads: usize,
        kv_heads: usize,
        past_seq_len: usize,
        seq_len: usize,
        out_dim: usize,
        scale: f32,
        is_causal: bool,
        create_mask: bool,
    ) -> TractResult<()> {
        ensure!(!(create_mask && is_causal));
        crate::with_cuda_stream(|stream| {
            let q_shape = [batch, q_heads, seq_len, out_dim];
            let kv_shape = [batch, kv_heads, past_seq_len + seq_len, out_dim];
            let m_shape = [1, 1, seq_len, past_seq_len + seq_len];

            let q_len = q_shape.iter().product::<usize>();
            let kv_len = kv_shape.iter().product::<usize>();
            let m_len = m_shape.iter().product::<usize>();

            let q = Tensor::from_shape(
                &q_shape,
                &(0..q_len).map(|f| f16::from_f32(f as f32 / q_len as f32)).collect::<Vec<_>>(),
            )?;

            let k = Tensor::from_shape(
                &kv_shape,
                &(0..kv_len).map(|f| f16::from_f32(f as f32 / kv_len as f32)).collect::<Vec<_>>(),
            )?;

            let v = Tensor::from_shape(
                &kv_shape,
                &(0..kv_len).map(|f| f16::from_f32(f as f32 / kv_len as f32)).collect::<Vec<_>>(),
            )?;

            let m = if create_mask {
                Tensor::from_shape(
                    &m_shape,
                    &(0..m_len).map(|f| f16::from_f32(1f32)).collect::<Vec<_>>(),
                )?
            } else {
                tensor0(0.0f32) // Unused 
            };

            let cuda_m = m.clone().into_device()?;
            let cuda_output = CudaFlashAttn.eval(
                stream,
                &q.clone().into_device()?,
                &k.clone().into_device()?,
                &v.clone().into_device()?,
                if create_mask { Some(&cuda_m) } else { None },
                scale,
                is_causal,
            )?;

            let mut ref_inputs = tvec!(q.into(), k.into(), v.into());

            if create_mask {
                ref_inputs.push(m.into())
            };
            let ref_output = Sdpa {
                scale: Some(scale.into()),
                datum_type: DatumType::F16,
                acc_datum_type: DatumType::F32,
                is_causal,
            }
            .eval(ref_inputs)?;

            cuda_output.to_host()?.close_enough(&ref_output[0], Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_nernst_fattn() -> TractResult<()> {
        run_test_case(1, 1, 1, 64, 1, 128, 1.0f32, false, false)?;
        run_test_case(1, 2, 1, 64, 1, 128, 1.0f32, false, true)?;
        run_test_case(1, 2, 2, 0, 1, 64, 1.0f32, false, false)?;
        run_test_case(2, 4, 2, 123, 1, 64, 1.0f32, false, false)?;
        run_test_case(1, 2, 2, 0, 1, 64, 1.0f32, true, false)?;
        run_test_case(1, 1, 1, 64, 64, 128, 1.0f32, false, false)?;
        run_test_case(2, 32, 4, 64, 64, 128, 1.0f32, false, false)?;
        run_test_case(1, 1, 1, 64, 64, 128, 1.0f32, false, true)?;
        run_test_case(1, 1, 1, 64, 64, 128, 1.0f32, true, false)?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/ggml_flash_attn.rs
================================================
use cudarc::driver::sys::CUfunction_attribute;
use cudarc::driver::{CudaFunction, LaunchConfig, PushKernelArg};
use num_traits::One;
use std::fmt;
use tract_core::internal::*;
use tract_core::tract_data::itertools::Itertools;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, WARP_SIZE, get_cuda_view, launch_args};

const CUDA_CC_TURING: i32 = 750;
const CUDA_CC_AMPERE: i32 = 800;
const FATTN_KQ_STRIDE: usize = 256;

#[derive(Debug, Clone)]
pub struct GgmlFlashAttn;

impl fmt::Display for GgmlFlashAttn {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

#[derive(Debug, Clone, Copy)]
enum FlashAttnImpl {
    Vec,
    MmaF16,
}

/* ------------------------- small helpers ------------------------- */

#[inline]
fn cc() -> i32 {
    let p = cuda_context().properties();
    p.major * 100 + p.minor * 10
}

#[inline]
fn newer_than_lovelace() -> bool {
    let p = cuda_context().properties();
    p.major > 8 || (p.major == 8 && p.minor >= 9)
}

#[inline]
fn bytes_of<T>() -> usize {
    std::mem::size_of::<T>()
}

#[inline]
fn to_i32(shape: &[usize]) -> TVec<i32> {
    shape.iter().map(|&s| s as i32).collect()
}

#[inline]
fn strides_to_bytes_i32<T>(strides_elems: &[isize]) -> TVec<i32> {
    let el = bytes_of::<T>() as i32;
    strides_elems.iter().map(|&s| (s as i32) * el).collect()
}

#[derive(Debug, Clone)]
struct FlashAttnParams {
    imp: FlashAttnImpl,
    // tiling
    d: usize,
    ncols1: usize,
    ncols2: usize,
    nwarps: usize,
    // shared
    shm_bytes: usize,
    // scheduling
    kq_row_granularity: usize,
    // derived
    kernel: Arc<CudaFunction>,
}

impl GgmlFlashAttn {
    pub fn is_supported_dts(k_dt: DatumType, v_dt: DatumType) -> bool {
        (k_dt == v_dt) && matches!(k_dt, DatumType::F16)
    }

    pub fn name(&self) -> Cow<'_, str> {
        format!("{self}").into()
    }

    pub fn vec_kernel_name(
        &self,
        d: usize,
        ncols1: usize,
        k_dt: DatumType,
        v_dt: DatumType,
    ) -> TractResult<String> {
        ensure!(
            Self::is_supported_dts(k_dt, v_dt),
            "Unsupported dts K: {:?} V: {:?} for Cuda Flash Attention Op",
            k_dt,
            v_dt
        );
        Ok(format!(
            "flash_attn_vec_{}_{}_{}_{}",
            d,
            ncols1,
            DeviceTensor::tname(k_dt)?,
            DeviceTensor::tname(v_dt)?
        ))
    }

    pub fn output_shape<D: DimLike + One>(
        &self,
        q: &[D],
        k: &[D],
        v: &[D],
    ) -> TractResult<TVec<D>> {
        ensure!(q.len() == 4, "Q rank must be 4 (got {})", q.len());
        ensure!(
            k.len() == q.len() && v.len() == q.len(),
            "K and V must have the same rank as Q (Q={}, K={}, V={})",
            q.len(),
            k.len(),
            v.len()
        );

        match (q, k, v) {
            ([b, qh, s, _], [_, kh, _, _], [_, vh, _, d]) => {
                let (qh_i, kh_i, vh_i) = (qh.to_i64()?, kh.to_i64()?, vh.to_i64()?);
                ensure!(kh_i == vh_i, "K and V heads mismatch (K={}, V={})", kh_i, vh_i);
                ensure!(kh_i > 0, "K/V heads must be > 0 (got {kh_i})");
                ensure!(
                    qh_i % kh_i == 0,
                    "Q heads ({qh_i}) must be a multiple of K/V heads ({kh_i})"
                );
                Ok(tvec![b.clone(), qh.clone(), s.clone(), d.clone()])
            }
            _ => bail!("Inconsistent shapes: expected [B,H,S,D] for Q/K/V."),
        }
    }

    /* ------------------------- kernel planning ------------------------- */
    fn pick_ncols2(head_ratio: usize) -> usize {
        if head_ratio.is_multiple_of(8) {
            8
        } else if head_ratio.is_multiple_of(4) {
            4
        } else if head_ratio.is_multiple_of(2) {
            2
        } else {
            1
        }
    }
    fn pick_ncols1(ncols2: usize, seq_q: usize, cc: i32) -> usize {
        if ncols2 <= 8 && seq_q <= 8 / ncols2 {
            8 / ncols2
        } else if seq_q <= 16 / ncols2 {
            16 / ncols2
        } else if cc == CUDA_CC_TURING || seq_q <= 32 / ncols2 {
            32 / ncols2
        } else {
            64 / ncols2
        }
    }

    fn get_flash_attn_vec_params(
        &self,
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
    ) -> TractResult<FlashAttnParams> {
        let qs = q.shape();
        let rank = qs.len();
        ensure!(matches!(qs[rank - 2], 1 | 2));
        let ncols1 = qs[rank - 2];
        let ncols2 = 1;
        let d = qs[rank - 1];

        let kernel_name = self.vec_kernel_name(d, ncols1, k.datum_type(), v.datum_type())?;
        let kernel = cuda_context().load_pipeline(LibraryName::GgmlFlashAttn, kernel_name)?;

        Ok(FlashAttnParams {
            imp: FlashAttnImpl::Vec,
            d,
            ncols1,
            ncols2,
            nwarps: 128 / WARP_SIZE,
            shm_bytes: 0,
            kq_row_granularity: d,
            kernel,
        })
    }

    fn get_flash_attn_mma_f16_params(
        &self,
        q: &DeviceTensor,
        k: &DeviceTensor,
    ) -> TractResult<FlashAttnParams> {
        let cc = cc();
        let qs = q.shape();
        let ks = k.shape();

        let d = qs[3];
        ensure!(d.is_multiple_of(8), "flash-attn f16: head dimension (D) must be multiple of 8");

        let head_ratio = qs[1] / ks[1];
        let ncols2 = Self::pick_ncols2(head_ratio);
        let ncols1 = Self::pick_ncols1(ncols2, qs[2], cc);
        let ncols = ncols1 * ncols2;

        let ntiles = if ncols <= 8 { 1 } else { 2 };
        let cols_per_warp = ntiles * 8;
        ensure!(ncols.is_multiple_of(cols_per_warp), "bad ncols vs cols_per_warp");

        let nbatch_fa = if d != 256 { 64 } else { 32 };
        let nwarps_max_x = ncols / cols_per_warp;
        let nwarps_max_y = nbatch_fa / 16;
        let nwarps = (nwarps_max_x * nwarps_max_y).min(4);

        // shared bytes
        let nbatch_k2 = d / 2;
        let nbatch_v2 = d / 2;
        let nbatch_combine = if cc == CUDA_CC_TURING && ncols1 <= 128 { 128 } else { 64 };
        let shm_kv_1stage = nbatch_fa * (nbatch_k2 + 4).max(nbatch_v2 + 4) * bytes_of::<f32>();
        let shm_kv_2stage = nbatch_fa * (nbatch_k2 + 4 + nbatch_v2 + 4) * bytes_of::<f32>();
        let shm_q = ncols * (d / 2 + 4) * bytes_of::<f32>();
        let shm_mask = ncols1 * (nbatch_fa / 2 + 4) * bytes_of::<f32>();
        let shm_combine = nwarps * cols_per_warp * (nbatch_combine + 4) * bytes_of::<f32>();
        let shm_kv = if cc >= CUDA_CC_AMPERE { shm_kv_2stage } else { shm_kv_1stage };
        let shm_bytes = shm_combine.max(shm_q.max(shm_kv + shm_mask));

        let kernel_name = format!("flash_attn_ext_f16_{}_{}_{}", d, ncols, ncols2);
        let kernel = cuda_context().load_pipeline(LibraryName::GgmlFlashAttn, kernel_name)?;
        kernel.set_attribute(
            CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
            shm_bytes as i32,
        )?;

        Ok(FlashAttnParams {
            imp: FlashAttnImpl::MmaF16,
            d,
            ncols1,
            ncols2,
            nwarps,
            shm_bytes,
            kq_row_granularity: FATTN_KQ_STRIDE,
            kernel,
        })
    }

    fn choose_impl(
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
        m: &DeviceTensor,
    ) -> TractResult<FlashAttnImpl> {
        let qh = q.shape()[1];
        let kh = k.shape()[1];
        ensure!(qh % kh == 0);
        let head_ratio = qh / kh;

        let newer = newer_than_lovelace();

        ensure!(
            matches!(k.shape()[3], 64 | 80 | 96 | 112 | 128 | 256) && k.shape()[3] == v.shape()[3],
            "No kernel for K/V D={} (must match and be one of 64|80|96|112|128|256)",
            k.shape()[3]
        );

        // Batched mask support could be done by modifying KV_max kernel
        ensure!(m.shape()[..2] == [1, 1]);

        let can_vec = q.shape()[3].is_multiple_of(64);
        let mut best = FlashAttnImpl::MmaF16;

        if can_vec {
            if newer
                && q.shape()[2] == 1
                && q.shape()[0] == 1
                && !(head_ratio > 4 && k.shape()[2] >= 8192)
            {
                best = FlashAttnImpl::Vec;
            }
            if !head_ratio.is_multiple_of(2) && q.shape()[2] == 1 {
                best = FlashAttnImpl::Vec; // GQA-specific case
            }
        }
        Ok(best)
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
        mask: &DeviceTensor,
        scale: f32,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe {
            DeviceTensor::uninitialized_dt(
                q.datum_type(),
                &self.output_shape(q.shape(), k.shape(), v.shape())?,
            )?
        };

        self.dispatch_eval(stream, q, k, v, mask, scale, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    #[allow(clippy::too_many_arguments)]
    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
        mask: &DeviceTensor,
        scale: f32,
        out: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(q.datum_type() == DatumType::F32 && q.datum_type() == out.datum_type());
        ensure!(out.shape() == self.output_shape(q.shape(), k.shape(), v.shape())?.as_slice());

        match Self::choose_impl(q, k, v, mask)? {
            FlashAttnImpl::Vec => {
                let params = self.get_flash_attn_vec_params(q, k, v)?;
                self.launch_with_plan(stream, q, k, v, mask, scale, out, params)
            }
            FlashAttnImpl::MmaF16 => {
                let params = self.get_flash_attn_mma_f16_params(q, k)?;
                self.launch_with_plan(stream, q, k, v, mask, scale, out, params)
            }
        }
    }

    #[allow(clippy::too_many_arguments)]
    fn launch_with_plan(
        &self,
        stream: &TractCudaStream,
        q: &DeviceTensor,
        k: &DeviceTensor,
        v: &DeviceTensor,
        mask: &DeviceTensor,
        scale: f32,
        out: &DeviceTensor,
        params: FlashAttnParams,
    ) -> TractResult<()> {
        // quick invariants shared by both variants
        ensure!(mask.shape()[2] >= q.shape()[2].next_multiple_of(16));
        ensure!(k.shape()[2].is_multiple_of(FATTN_KQ_STRIDE), "Incorrect KV cache padding");

        let qv = get_cuda_view(q);
        let kv = get_cuda_view(k);
        let vv = get_cuda_view(v);
        let mv = get_cuda_view(mask);
        let ov = get_cuda_view(out);

        // Grid/block sizing & occupancy
        let ncols = params.ncols1 * params.ncols2;
        let ntiles_x = q.shape()[2].div_ceil(params.ncols1);
        let ntiles_total = ntiles_x * (q.shape()[1] / params.ncols2) * q.shape()[0];

        // mask-to-KV-max
        let kv_max = if q.shape()[2] >= 1024 || q.shape()[0] > 1 {
            let mask_s2_div2 = mask.strides()[2] / 2;
            let mask_s0_div2 = mask.strides()[0] / 2;

            let blocks_num = (ntiles_x as _, q.shape()[0] as _, 1);
            let blocks_dim = ((FATTN_KQ_STRIDE / 2) as _, 1, 1);
            let ne_kv_max = ntiles_x * q.shape()[0];
            let iter_k = k.shape()[2] / FATTN_KQ_STRIDE;

            let kv_max = DeviceTensor::uninitialized_dt(DatumType::I64, &[ne_kv_max])?;
            let kv_max_v = get_cuda_view(&kv_max);
            let func = cuda_context().load_pipeline(
                LibraryName::GgmlFlashAttn,
                format!("flash_attn_mask_to_KV_max_{}", params.ncols1),
            )?;

            let mut la = TractLaunchArgs::new(stream, &func);
            la.push_view(&mv);
            la.push_view(&kv_max_v);
            la.push_i32(iter_k);
            la.push_i32(mask_s2_div2);
            la.push_i32(mask_s0_div2);
            let cfg =
                LaunchConfig { grid_dim: blocks_num, block_dim: blocks_dim, shared_mem_bytes: 0 };

            unsafe {
                la.launch(cfg);
            }
            Some(kv_max)
        } else {
            None
        };

        // occupancy & parallel layout
        let props = cuda_context().properties();
        let nsm = props.multiProcessorCount as usize;
        let block_dim = (WARP_SIZE as u32, params.nwarps as u32, 1);

        let max_blocks_per_sm = params.kernel.occupancy_max_active_blocks_per_multiprocessor(
            WARP_SIZE as u32 * params.nwarps as u32,
            params.shm_bytes,
            None,
        )? as usize;

        let mut parallel_blocks = max_blocks_per_sm;

        let (blocks_num, dst_tmp, dst_tmp_meta) = if matches!(params.imp, FlashAttnImpl::MmaF16) {
            let max_blocks = max_blocks_per_sm * nsm;
            let tiles_nwaves = ntiles_total.div_ceil(max_blocks);
            let tiles_eff_pct = 100 * ntiles_total / (max_blocks * tiles_nwaves);
            let newer = newer_than_lovelace();
            let use_stream_k = newer || tiles_eff_pct < 75;

            let blocks_num = (if use_stream_k { max_blocks } else { ntiles_total } as u32, 1, 1);
            let dst_tmp_meta = DeviceTensor::uninitialized_dt(
                DatumType::F32,
                &[2 * blocks_num.0 as usize * ncols * (2 * 2 + params.d) * bytes_of::<f32>()],
            )?;
            (blocks_num, None, Some(dst_tmp_meta))
        } else {
            ensure!(k.shape()[k.rank() - 2] % params.kq_row_granularity == 0);

            let ntiles_kq = k.shape()[k.rank() - 2].div_ceil(params.kq_row_granularity);
            let pb_min = parallel_blocks.min(ntiles_kq);

            // try to improve tail efficiency
            let blocks_per_wave = nsm * max_blocks_per_sm;
            let mut nwaves_best = 0;
            let mut eff_best = 0;
            for pb in (pb_min..=ntiles_kq) {
                let nblocks_total = ntiles_total * pb;
                let nwaves = nblocks_total.div_ceil(blocks_per_wave);
                let eff = 100 * nblocks_total / (nwaves * blocks_per_wave);
                if eff_best >= 95 && nwaves > nwaves_best {
                    break;
                }
                if eff > eff_best {
                    nwaves_best = nwaves;
                    eff_best = eff;
                    parallel_blocks = pb;
                }
            }

            ensure!(
                parallel_blocks > 1,
                "Unsupported config: Output won't be untransposed if we don't enter vec fixup kernel"
            );
            let blocks_num = (
                ntiles_x as u32,
                parallel_blocks as u32,
                (q.shape()[1] / params.ncols2 * q.shape()[0]) as u32,
            );

            (
                blocks_num,
                Some(DeviceTensor::uninitialized_dt(
                    DatumType::F32,
                    &[parallel_blocks * out.shape().iter().product::<usize>()],
                )?),
                Some(DeviceTensor::uninitialized_dt(
                    DatumType::F32,
                    &[2 * parallel_blocks * out.shape()[..3].iter().product::<usize>()],
                )?),
            )
        };

        ensure!(block_dim.0 % WARP_SIZE as u32 == 0);

        // Shapes/strides for kernel
        let q_shape_i32 = to_i32(q.shape());
        let k_shape_i32 = to_i32(k.shape());
        let mask_shape_i32 = to_i32(mask.shape());
        let q_strides_b = strides_to_bytes_i32::<f32>(q.strides());
        let k_strides_b = strides_to_bytes_i32::<f16>(k.strides());
        let v_strides_b = strides_to_bytes_i32::<f16>(v.strides());
        let mask_strides_b = strides_to_bytes_i32::<f16>(mask.strides());

        let null_ptr = stream.null()?;
        let kv_max_v = kv_max.as_ref().map(get_cuda_view).unwrap_or_else(|| null_ptr.as_view());
        let dst_tmp_v = dst_tmp.as_ref().map(get_cuda_view).unwrap_or_else(|| null_ptr.as_view());
        let dst_tmp_meta_v =
            dst_tmp_meta.as_ref().map(get_cuda_view).unwrap_or_else(|| null_ptr.as_view());

        // main kernel
        let mut la = TractLaunchArgs::new(stream, &params.kernel);
        la.push_view(&qv);
        la.push_view(&kv);
        la.push_view(&vv);
        la.push_view(&mv);
        la.push_view(&kv_max_v);
        la.push_view(if matches!(params.imp, FlashAttnImpl::Vec) { &dst_tmp_v } else { &ov });
        la.push_view(&dst_tmp_meta_v);
        la.push::<f32>(scale);
        la.push_slice_i32(&q_shape_i32);
        la.push_slice_i32(&q_strides_b[..3]);
        la.push_slice_i32(&k_shape_i32);
        la.push_slice_i32(&k_strides_b[..3]);
        la.push_slice_i32(&v_strides_b[..3]);
        la.push_slice_i32(&mask_shape_i32[..3]);
        la.push_slice_i32(&mask_strides_b[..3]);

        let cfg = LaunchConfig {
            grid_dim: blocks_num,
            block_dim,
            shared_mem_bytes: params.shm_bytes as u32,
        };

        la.launch(cfg)?;

        // fixups
        if matches!(params.imp, FlashAttnImpl::MmaF16) {
            if !(ntiles_total as u32).is_multiple_of(cfg.grid_dim.0) {
                let f = cuda_context().load_pipeline(
                    LibraryName::GgmlFlashAttn,
                    format!("flash_attn_stream_k_fixup_{}_{}_{}", params.d, ncols, params.ncols2),
                )?;
                let mut la = TractLaunchArgs::new(stream, &f);
                la.push_view(&ov);
                la.push_view(&dst_tmp_meta_v);
                la.push_slice_i32(&q_shape_i32[..3]);
                la.push_i32(k_shape_i32[2]);
                let cfg = LaunchConfig {
                    grid_dim: (cfg.grid_dim.0, params.ncols1 as _, params.ncols2 as _),
                    block_dim: (params.d as _, 1, 1),
                    shared_mem_bytes: 0,
                };
                la.launch(cfg)?;
            }
        } else {
            let f = cuda_context().load_pipeline(
                LibraryName::GgmlFlashAttn,
                format!("flash_attn_combine_results_{}", params.d),
            )?;
            let mut la = TractLaunchArgs::new(stream, &f);
            la.push_view(&dst_tmp_v);
            la.push_view(&dst_tmp_meta_v);
            la.push_view(&ov);
            la.push_i32(parallel_blocks);
            let cfg = LaunchConfig {
                grid_dim: (q_shape_i32[2] as _, q_shape_i32[1] as _, q_shape_i32[0] as _),
                block_dim: (params.d as _, 1, 1),
                shared_mem_bytes: (parallel_blocks * 2 * bytes_of::<f32>()) as u32,
            };

            la.launch(cfg);
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use num_traits::Float;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::sdpa::Sdpa;

    use super::*;

    fn pad_f16_tensor(
        a: &Tensor,
        axis: usize,
        block_size: usize,
        value: f16,
    ) -> TractResult<Tensor> {
        let mut shape = a.shape().to_owned();
        let old_value = shape[axis];
        shape[axis] = shape[axis].next_multiple_of(block_size);
        let mut padded_a = Tensor::zero::<f16>(&shape)?;
        padded_a.fill_t(value);
        padded_a
            .to_plain_array_view_mut::<f16>()?
            .slice_axis_move(tract_ndarray::Axis(axis), (0..old_value).into())
            .assign(&a.to_plain_array_view::<f16>()?);
        Ok(padded_a)
    }
    fn run_test_case(
        batch: usize,
        q_heads: usize,
        kv_heads: usize,
        past_seq_len: usize,
        seq_len: usize,
        out_dim: usize,
        scale: f32,
    ) -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let q_shape = [batch, q_heads, seq_len, out_dim];
            let kv_shape = [batch, kv_heads, past_seq_len + seq_len, out_dim];
            let m_shape = [1, 1, seq_len, past_seq_len + seq_len];

            let q_len = q_shape.iter().product::<usize>();
            let kv_len = kv_shape.iter().product::<usize>();
            let m_len = m_shape.iter().product::<usize>();

            let q = Tensor::from_shape(
                &q_shape,
                &(0..q_len).map(|f| f as f32 / q_len as f32).collect::<Vec<_>>(),
            )?;

            let k = Tensor::from_shape(
                &kv_shape,
                &(0..kv_len).map(|f| f16::from_f32(f as f32 / kv_len as f32)).collect::<Vec<_>>(),
            )?;

            let v = Tensor::from_shape(
                &kv_shape,
                &(0..kv_len).map(|f| f16::from_f32(f as f32 / kv_len as f32)).collect::<Vec<_>>(),
            )?;

            let m = Tensor::from_shape(
                &m_shape,
                &(0..m_len).map(|f| f16::from_f32(1f32)).collect::<Vec<_>>(),
            )?;

            let cuda_output = GgmlFlashAttn.eval(
                stream,
                &q.clone().into_device()?,
                &pad_f16_tensor(&k, 2, FATTN_KQ_STRIDE, f16::from_f32(0f32))?.into_device()?,
                &pad_f16_tensor(&v, 2, FATTN_KQ_STRIDE, f16::from_f32(0f32))?.into_device()?,
                &&pad_f16_tensor(
                    &pad_f16_tensor(&m, 3, FATTN_KQ_STRIDE, -f16::infinity())?,
                    2,
                    16,
                    -f16::infinity(),
                )?
                .into_device()?,
                scale,
            )?;

            let ref_output = Sdpa {
                scale: Some(scale.into()),
                datum_type: DatumType::F32,
                acc_datum_type: DatumType::F32,
                is_causal: false,
            }
            .eval(tvec!(q.into(), k.into(), v.into(), m.into()))?;

            cuda_output.to_host()?.close_enough(&ref_output[0], Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_fattn_vec() -> TractResult<()> {
        run_test_case(1, 1, 1, 0, 1, 64, 1.0f32)?;
        run_test_case(1, 32, 8, 0, 1, 64, 1.0f32)?;
        run_test_case(1, 8, 8, 0, 1, 128, 1.0f32)?;
        run_test_case(2, 8, 8, 0, 1, 64, 1.0f32)?;
        run_test_case(1, 1, 1, 3, 1, 128, 1.0f32)?;
        run_test_case(2, 24, 8, 3, 1, 64, 1.0f32)?;
        Ok(())
    }

    #[test]
    fn test_fattn_mma_f16() -> TractResult<()> {
        run_test_case(1, 1, 1, 0, 1, 64, 1.0f32)?;
        run_test_case(1, 8, 8, 1, 1, 64, 1.0f32)?;
        run_test_case(2, 4, 2, 1, 1, 128, 1.0f32)?;
        run_test_case(2, 8, 8, 0, 1, 128, 1.0f32)?;
        run_test_case(1, 1, 1, 3, 2, 64, 1.0f32)?;
        run_test_case(2, 2, 1, 3, 1, 128, 1.0f32)?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/iff.rs
================================================
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::utils::compute_broadcast_strides;
use crate::kernels::{LibraryName, MAX_THREADS, get_cuda_view};

static TERNARY_MAX_RANK: usize = 5;

#[derive(Debug, PartialEq)]
pub struct Iff;

impl fmt::Display for Iff {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Iff")
    }
}

impl Iff {
    pub fn name(&self) -> Cow<'_, str> {
        format!("{self}").into()
    }

    pub fn output_shape<D: DimLike>(&self, a: &[D], b: &[D], c: &[D]) -> TractResult<TVec<D>> {
        tract_core::broadcast::multi_broadcast(&[a, b, c])
            .with_context(|| format!("Error while broadcasting {a:?} {b:?} {c:?}"))
    }

    pub fn kernel_name(&self, dt: DatumType, variant: &str) -> TractResult<String> {
        Ok(format!("iff_{variant}_{}", tract_gpu::utils::BroadcastKind::copy_tname(dt)))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        cond: &DeviceTensor,
        then_value: &DeviceTensor,
        else_value: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let out_shape = self.output_shape(cond.shape(), then_value.shape(), else_value.shape())?;
        ensure!(then_value.datum_type() == else_value.datum_type());
        let out_dt = then_value.datum_type();
        let output = unsafe { DeviceTensor::uninitialized_dt(out_dt, &out_shape)? };

        self.dispatch_eval(stream, cond, then_value, else_value, &output)?;

        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        cond: &DeviceTensor,
        then_value: &DeviceTensor,
        else_value: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        let inputs = [cond, then_value, else_value];
        let rank = *[cond.rank(), then_value.rank(), else_value.rank()].iter().max().unwrap();
        ensure!(rank <= TERNARY_MAX_RANK);

        let rank_pad = TERNARY_MAX_RANK - rank;
        let mut strides = [[0isize; TERNARY_MAX_RANK]; 3];
        let mut out_shape = [1usize; TERNARY_MAX_RANK];
        let mut out_strides = [0isize; TERNARY_MAX_RANK];

        for axis in 0..rank {
            out_shape[rank_pad + axis] = output.shape()[axis];
            out_strides[rank_pad + axis] = output.strides()[axis];
            for input in 0..3 {
                strides[input][rank_pad + axis] =
                    if inputs[input].shape()[axis] < output.shape()[axis] {
                        0
                    } else {
                        inputs[input].strides()[axis]
                    };
            }
        }

        let total_elems: usize = out_shape.iter().product();
        let block_dim = (128_u32, 1, 1);
        let (grid_dim, variant) =
        //     if out_shape[TERNARY_MAX_RANK - 1] >= 256 && total_elems >= 4096 {
        //     (
        //         (
        //             out_shape[TERNARY_MAX_RANK - 2] as u32,
        //             out_shape[TERNARY_MAX_RANK - 3] as u32,
        //             out_shape[..TERNARY_MAX_RANK - 3].iter().product::<usize>() as u32,
        //         ),
        //         "large",
        //     )
        // } else {
            ((total_elems.div_ceil(block_dim.0 as usize) as u32, 1, 1), "generic")
        // };
        ;

        let kernel_name = self.kernel_name(output.datum_type(), variant)?;
        let func = cuda_context().load_pipeline(LibraryName::Binary, kernel_name)?;

        let cfg = LaunchConfig { grid_dim, block_dim, shared_mem_bytes: 0 };

        let cond_view = get_cuda_view(cond);
        let then_view = get_cuda_view(then_value);
        let else_view = get_cuda_view(else_value);
        let o_view = get_cuda_view(output);

        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&cond_view);
        launch_args.push_view(&then_view);
        launch_args.push_view(&else_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(&out_shape);
        for stride in &strides {
            launch_args.push_slice_i32(stride);
        }
        launch_args.push_slice_i32(&out_strides);

        launch_args.launch(cfg)?;

        Ok(())
    }
}

pub fn cuda_iff_dispatch(
    cond: &DeviceTensor,
    then_value: &DeviceTensor,
    else_value: &DeviceTensor,
    cond_strides: &[isize],
    then_strides: &[isize],
    else_strides: &[isize],
    output: &DeviceTensor,
    output_shape: &[usize],
    output_strides: &[isize],
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| {
        let total_elems: usize = output_shape.iter().product();
        let block_dim = (128_u32, 1, 1);
        let grid_dim = (total_elems.div_ceil(block_dim.0 as usize) as u32, 1, 1);

        let kernel_name = format!(
            "iff_generic_{}",
            tract_gpu::utils::BroadcastKind::copy_tname(output.datum_type())
        );
        let func = cuda_context().load_pipeline(LibraryName::Binary, kernel_name)?;
        let cfg = LaunchConfig { grid_dim, block_dim, shared_mem_bytes: 0 };

        let cond_view = get_cuda_view(cond);
        let then_view = get_cuda_view(then_value);
        let else_view = get_cuda_view(else_value);
        let o_view = get_cuda_view(output);

        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&cond_view);
        launch_args.push_view(&then_view);
        launch_args.push_view(&else_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(output_shape);
        launch_args.push_slice_i32(cond_strides);
        launch_args.push_slice_i32(then_strides);
        launch_args.push_slice_i32(else_strides);
        launch_args.push_slice_i32(output_strides);

        launch_args.launch(cfg)
    })
}

crate::register_cuda_op!(tract_core::ops::logic::Iff, |_source, _node, _op| {
    Ok(Some(Box::new(tract_gpu::ops::iff::GpuIff::new("Cuda", cuda_iff_dispatch))))
});


================================================
FILE: cuda/src/kernels/launch_args.rs
================================================
use core::{mem::size_of, ptr};
use cudarc::driver::{CudaFunction, CudaView, DeviceRepr, LaunchArgs, LaunchConfig, PushKernelArg};
use num_traits::AsPrimitive;
use std::ops::Deref;
use tract_core::prelude::TractResult;

use crate::context::TractCudaStream;

static VEC_CAPACITY: usize = 1024;

/// A LaunchArgs that can take by-value params by stashing owned bytes
/// and handing `&'a T` refs to `inner.arg(...)`.
pub struct TractLaunchArgs<'a> {
    inner: LaunchArgs<'a>,
    stream: &'a TractCudaStream,
    keepalive: Vec<u8>,
    keepalive_overflow: Vec<Box<[u8]>>,
}

impl<'a> TractLaunchArgs<'a> {
    pub fn new(stream: &'a TractCudaStream, func: &'a CudaFunction) -> Self {
        Self {
            inner: stream.launch_builder(func),
            stream,
            keepalive: Vec::with_capacity(VEC_CAPACITY),
            keepalive_overflow: Vec::new(),
        }
    }

    fn arg_typed<T: DeviceRepr + Copy + 'a>(&mut self, v: T) {
        unsafe {
            let slice = std::slice::from_raw_parts((&v) as *const T as *const u8, size_of::<T>());
            if self.keepalive.len() + slice.len() < VEC_CAPACITY {
                let arg: *const T = self.keepalive.as_ptr().add(self.keepalive.len()) as *const T;
                self.keepalive.extend(slice);
                self.inner.arg(arg.as_ref().unwrap());
            } else {
                let mut buf = slice.to_vec().into_boxed_slice();

                let r: &'a T = &*(buf.as_ptr() as *const T);
                self.inner.arg(r);

                self.keepalive_overflow.push(buf);
            }
        }
    }

    pub fn push_slice<U>(&mut self, slice: &[impl AsPrimitive<U>])
    where
        U: DeviceRepr + Copy + 'static,
    {
        for s in slice.iter().copied() {
            self.arg_typed::<U>(s.as_());
        }
    }

    pub fn push_slice_i32(&mut self, slice: &[impl AsPrimitive<i32>]) {
        for s in slice.iter().copied() {
            self.arg_typed::<i32>(s.as_());
        }
    }

    pub fn push<U>(&mut self, x: impl AsPrimitive<U>)
    where
        U: DeviceRepr + Copy + 'static,
    {
        self.arg_typed::<U>(x.as_());
    }

    pub fn push_i32(&mut self, x: impl AsPrimitive<i32>) {
        self.arg_typed::<i32>(x.as_());
    }

    pub fn push_view<T>(&mut self, x: &'a CudaView<'_, T>) {
        self.inner.arg(x);
    }

    pub fn launch(&mut self, cfg: LaunchConfig) -> TractResult<()> {
        if let Some((start, end)) = self.stream.record_profile_events()? {
            unsafe {
                self.inner.launch(cfg)?;
            }
            self.stream.finish_profile_entry(start, end)?;
        } else {
            unsafe {
                self.inner.launch(cfg)?;
            }
        }
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/matmul/mod.rs
================================================
pub(crate) mod quant_act_q81;

use cudarc::cublas::{self, CudaBlas, Gemm};
use cudarc::driver::sys::CUfunction_attribute;
use cudarc::driver::{CudaView, CudaViewMut, LaunchConfig, PushKernelArg};
use tract_core::tract_linalg::block_quant::{BlockQuant, Q4_0, Q8_1};

use num_traits::{Float, One};
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;
use tract_gpu::utils::{as_quant_fact, get_quant_fact};

use crate::Q40_ROW_PADDING;
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::matmul::quant_act_q81::{QUANTIZE_BLOCK_SIZE, QUANTIZE_BLOCK_SIZE_MMQ};
use crate::kernels::{
    LibraryName, get_cuda_view, get_cuda_view_mut, get_sliced_cuda_view, get_sliced_cuda_view_mut,
};
use crate::utils::get_ggml_q81_fact;

use DatumType::{F16, F32};

static N_WARPS: usize = 8;
static WARP_SIZE: usize = 32;

static MMQ_X_MAX: usize = 128;

static QK8_0: usize = 32;
static QI8_0: usize = QK8_0 / (4 * QR8_0);
static QR8_0: usize = 1;

static MMQ_MMA_TILE_X_K_Q8_0: usize = (2 * WARP_SIZE + 2 * WARP_SIZE / QI8_0 + 4);

// Squeeze batch axes and return a shape with a rank of 3.
fn squeeze_batch_axes(s: &[usize]) -> TractResult<TVec<usize>> {
    ensure!(s.len() >= 2);
    let rank = s.len();
    if s.len() == 2 {
        return Ok(tvec![1, s[rank - 2], s[rank - 1]]);
    }
    let rank = s.len();
    Ok(tvec![s[..rank - 2].iter().product(), s[rank - 2], s[rank - 1],])
}

fn mmq_get_nbytes_shared_q40(mmq_weights: usize, mmq_act: usize) -> usize {
    let nb_ids = mmq_weights * size_of::<i32>();
    let mmq_tile_w_l = MMQ_MMA_TILE_X_K_Q8_0;
    let nbs_w = mmq_act * mmq_tile_w_l * size_of::<i32>();
    let nbs_act = mmq_weights * 144;

    let pad = N_WARPS * WARP_SIZE * size_of::<i32>();
    nb_ids + nbs_w + nbs_act.next_multiple_of(pad)
}

pub fn get_concrete_shapes(
    a: &DeviceTensor,
    b: &DeviceTensor,
) -> TractResult<(Vec<usize>, Vec<usize>)> {
    let q81_a = get_ggml_q81_fact(a);
    let q40_b = get_quant_fact(b, &Q4_0);
    ensure!(q40_b.is_none() || (q40_b.is_some() && q81_a.is_some()));

    let a_shape = match q81_a {
        Some(bqf) => {
            let concrete = bqf.concrete_in_shape()?;
            a.shape().iter().copied().chain(concrete.iter().copied()).collect()
        }
        None => a.shape().to_vec(),
    };

    // For q40 weights the tensor shape already carries the full logical
    // dimensions [batch, n, k].  No need to chain with the fact shape.
    let b_shape = b.shape().to_vec();

    Ok((a_shape, b_shape))
}

fn find_block_size(k: usize) -> usize {
    let mut block_size_best = WARP_SIZE;
    let mut best_niter = k.div_ceil(2 * WARP_SIZE);

    for block_size in (2 * WARP_SIZE..=256).step_by(WARP_SIZE) {
        let niter = k.div_ceil(2 * block_size);
        if niter < best_niter {
            best_niter = niter;
            block_size_best = block_size;
        }
    }

    block_size_best
}

#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct GemmParams {
    pub dts: [DatumType; 3],
    pub w_batch: usize,
    pub act_batch: usize,
    pub m: usize,
    pub k: usize,
    pub n: usize,
    pub w_strides: TVec<isize>,
    pub act_strides: TVec<isize>,
    pub out_strides: TVec<isize>,
}

impl GemmParams {
    pub fn compute_gemm_params(
        dts: [DatumType; 3],
        w_shape: &[usize],
        act_shape: &[usize],
        out_shape: &[usize],
    ) -> TractResult<GemmParams> {
        let rank = out_shape.len();
        let squeezed_w_shape = squeeze_batch_axes(w_shape)?;
        let squeezed_act_shape = squeeze_batch_axes(act_shape)?;
        let squeezed_out_shape = squeeze_batch_axes(out_shape)?;

        let w_batch = squeezed_w_shape[0];
        let act_batch = squeezed_act_shape[0];

        ensure!(squeezed_out_shape[0] == act_batch || squeezed_out_shape[0] == w_batch);

        let m = out_shape[rank - 2];
        let n = out_shape[rank - 1];
        let k = act_shape[act_shape.len() - 1];

        ensure!((act_batch % w_batch == 0) || (act_batch == 1));
        let w_strides = natural_strides(&[w_batch, n, k]);
        let act_strides = natural_strides(&[act_batch, m, k]);
        let out_strides = natural_strides(&[act_batch.max(w_batch), m, n]);

        Ok(GemmParams { dts, act_batch, w_batch, m, n, k, act_strides, w_strides, out_strides })
    }
}

#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
pub struct GgmlGemm;

#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct CublasDispatchParams {
    pub w_batch: usize,
    pub act_batch: usize,
    pub m: usize,
    pub act_offset: usize,
    pub w_offset: usize,
    pub c_offset: usize,
}

impl CublasDispatchParams {
    pub fn compute_dispatch_params(params: &GemmParams) -> TractResult<Vec<CublasDispatchParams>> {
        match (params.act_batch, params.w_batch) {
            (act_batch, 1) if act_batch != 1 => Ok(vec![CublasDispatchParams {
                w_batch: 1,
                act_batch: 1,
                m: params.m * params.act_batch,
                act_offset: 0,
                w_offset: 0,
                c_offset: 0,
            }]),
            (1, w_batch) if w_batch != 1 => Ok((0..w_batch)
                .map(|w_batch_idx| CublasDispatchParams {
                    w_batch: 1,
                    act_batch: 1,
                    m: params.m,
                    act_offset: 0,
                    w_offset: w_batch_idx * params.dts[1].size_of() * params.w_strides[0] as usize,
                    c_offset: w_batch_idx * params.m * params.n * params.dts[2].size_of(),
                })
                .collect()),
            (act_batch, w_batch) => {
                ensure!(
                    act_batch == w_batch,
                    "Only support equal batches or either batch == 1 for Cublas MM"
                );
                Ok(vec![CublasDispatchParams {
                    w_batch: params.w_batch,
                    act_batch: params.act_batch,
                    m: params.m,
                    act_offset: 0,
                    w_offset: 0,
                    c_offset: 0,
                }])
            }
        }
    }
}

fn kernel_name_mat_vec(dt: DatumType, n_cols: usize, block_size: usize) -> TractResult<String> {
    Ok(format!("ggml_matvec_{}_ncols_{}_bs_{}", DeviceTensor::tname(dt)?, n_cols, block_size))
}

fn dispatch_ggml_matvec(
    stream: &TractCudaStream,
    weights: &DeviceTensor,
    activs: &DeviceTensor,
    output: &DeviceTensor,
    params: GemmParams,
) -> TractResult<()> {
    ensure!(params.act_batch % params.w_batch == 0);

    let w_view = get_cuda_view(weights);
    let act_view = get_cuda_view(activs);
    let output_view = get_cuda_view(output);

    let k_div_2 = params.k / 2;
    let ncols_act_div_2 = params.act_strides[1] / 2;
    ensure!(k_div_2 == ncols_act_div_2 as usize);
    let block_size = find_block_size(params.k);

    let batch_ratio = params.act_batch / params.w_batch;

    let kernel_name = kernel_name_mat_vec(params.dts[0], params.m, block_size)?;
    let mut func = cuda_context().load_pipeline(LibraryName::Ggml, kernel_name)?;
    let mut launch_args = TractLaunchArgs::new(stream, &func);
    launch_args.push_view(&w_view);
    launch_args.push_view(&act_view);
    launch_args.push_view(&output_view);
    launch_args.push_i32(k_div_2);
    launch_args.push_i32(params.act_batch);
    launch_args.push_i32(params.w_strides[1]);
    launch_args.push_i32(ncols_act_div_2);
    launch_args.push_i32(params.out_strides[1]);
    launch_args.push_i32(batch_ratio);
    launch_args.push_i32(params.w_strides[0]);
    launch_args.push_i32(params.act_strides[0]);
    launch_args.push_i32(params.out_strides[0]);

    let cfg = LaunchConfig {
        grid_dim: (params.n as _, params.act_batch as _, 1),
        block_dim: (block_size as _, 1, 1),
        shared_mem_bytes: (WARP_SIZE * size_of::<f32>()) as u32,
    };

    unsafe { launch_args.launch(cfg) };
    Ok(())
}

fn dispatch_cublas_gemm<F: Datum + Float>(
    stream: &TractCudaStream,
    weights: &DeviceTensor,
    activs: &DeviceTensor,
    c: &DeviceTensor,
    params: GemmParams,
) -> TractResult<()>
where
    CudaBlas: Gemm<F>,
{
    let events = stream.record_profile_events()?;
    let dispatch_params = CublasDispatchParams::compute_dispatch_params(&params)?;
    for d in dispatch_params {
        let act_len = params.act_strides[0] as usize * d.act_batch * params.dts[0].size_of();
        let w_len = params.w_strides[0] as usize * d.w_batch * params.dts[1].size_of();
        let act_view = get_sliced_cuda_view(activs, d.act_offset, act_len)?;
        let w_view = get_sliced_cuda_view(weights, d.w_offset, w_len)?;
        let mut c_view = get_sliced_cuda_view_mut(
            c,
            d.c_offset,
            params.out_strides[0] as usize * d.act_batch.max(d.w_batch) * params.dts[2].size_of(),
        )?;

        let cublas_gemm_cfg = cublas::GemmConfig {
            transa: cublas::sys::cublasOperation_t::CUBLAS_OP_T,
            transb: cublas::sys::cublasOperation_t::CUBLAS_OP_N,
            m: params.n as i32,
            n: d.m as i32,
            k: params.k as i32,
            alpha: F::from(1.0f32).unwrap(),
            lda: params.k as i32,
            ldb: params.k as i32,
            beta: F::from(0.0f32).unwrap(),
            ldc: params.n as i32,
        };

        let gemm_batched_strided_cfg = cublas::StridedBatchedConfig {
            gemm: cublas_gemm_cfg,
            batch_size: d.act_batch as i32,
            stride_a: params.w_strides[0] as _,
            stride_b: params.act_strides[0] as _,
            stride_c: params.out_strides[0] as _,
        };

        unsafe {
            stream.cublas().gemm_strided_batched(
                gemm_batched_strided_cfg,
                &w_view.transmute::<F>(w_view.len() / size_of::<F>()).unwrap(),
                &act_view.transmute::<F>(act_view.len() / size_of::<F>()).unwrap(),
                &mut c_view.transmute_mut::<F>(c_view.len() / size_of::<F>()).unwrap(),
            )
        };
    }

    if let Some((start, end)) = events {
        stream.finish_profile_entry(start, end)?;
    }
    Ok(())
}

fn kernel_name_q40(
    params: &GemmParams,
    mmq_w: usize,
    mmq_act: usize,
    fixup: bool,
) -> TractResult<String> {
    let need_check = !params.n.is_multiple_of(mmq_act);
    let fixup_str = if fixup { "stream_k_fixup_" } else { "" };
    Ok(format!("mul_mat_q40_{fixup_str}{mmq_w}_8_{need_check}"))
}

fn find_best_mmq_w(smbpo: usize, m: usize) -> usize {
    let mut mmq_w_best = 0;
    let mut ntiles_w_best = usize::MAX;

    let mut mmq_w = 0;
    while mmq_w <= MMQ_X_MAX && ntiles_w_best > 1 {
        mmq_w += 8;
        let granularity = if mmq_w >= 48 { 16 } else { 8 };
        if (mmq_w % granularity != 0 || mmq_get_nbytes_shared_q40(mmq_w, MMQ_X_MAX) > smbpo) {
            continue;
        }
        let ntiles_w = m.div_ceil(mmq_w);
        if (ntiles_w < ntiles_w_best) {
            mmq_w_best = mmq_w;
            ntiles_w_best = ntiles_w;
        }
    }
    mmq_w_best
}

#[allow(clippy::too_many_arguments)]
fn launch_matmul_q40(
    stream: &TractCudaStream,
    weights: &CudaView<'_, u8>,
    quant_activ: &CudaView<'_, u8>,
    output: &CudaView<'_, u8>,
    fixup_tens: &CudaView<'_, u8>,
    params: &GemmParams,
    act_batch_stride: usize,
    w_batch_stride: usize,
    batch_ratio: usize,
    mmq_w_best: usize,
    nbytes_shared: usize,
) -> TractResult<()> {
    let n_blocks = w_batch_stride / params.n;
    let kernel_name = kernel_name_q40(params, mmq_w_best, MMQ_X_MAX, false)?;

    let context = cuda_context();
    let props = context.properties();
    let func = context.load_pipeline(LibraryName::GgmlQ, kernel_name)?;
    func.set_attribute(
        CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
        nbytes_shared as i32,
    )?;
    let mut launch_args = TractLaunchArgs::new(stream, &func);
    launch_args.push_view(weights);
    launch_args.push_view(quant_activ);
    launch_args.push_view(output);
    launch_args.push_view(fixup_tens);
    launch_args.push_i32(params.k);
    launch_args.push_i32(params.n);
    launch_args.push_i32(params.m);
    launch_args.push_i32(n_blocks);
    launch_args.push_i32(params.m);
    launch_args.push_i32(params.n);
    launch_args.push_i32(batch_ratio);
    launch_args.push_i32(params.act_batch);
    launch_args.push_i32(w_batch_stride);
    launch_args.push_i32(act_batch_stride);
    launch_args.push_i32(params.out_strides[0]);

    let cfg = LaunchConfig {
        grid_dim: (props.multiProcessorCount as usize as _, 1, 1),
        block_dim: (WARP_SIZE as _, N_WARPS as _, 1),
        shared_mem_bytes: nbytes_shared as _,
    };

    unsafe {
        launch_args.launch(cfg);
    }
    Ok(())
}

fn launch_fixup_q40(
    stream: &TractCudaStream,
    output: &CudaView<'_, u8>,
    fixup_tens: &CudaView<'_, u8>,
    params: &GemmParams,
    mmq_w_best: usize,
) -> TractResult<()> {
    let kernel_name = kernel_name_q40(params, mmq_w_best, MMQ_X_MAX, true)?;

    let context = cuda_context();
    let props = context.properties();
    let func = context.load_pipeline(LibraryName::GgmlQ, kernel_name)?;
    let mut launch_args = TractLaunchArgs::new(stream, &func);
    launch_args.push_view(output);
    launch_args.push_view(fixup_tens);
    launch_args.push_i32(params.k);
    launch_args.push_i32(params.n);
    launch_args.push_i32(params.m);
    launch_args.push_i32(params.n);
    launch_args.push_i32(params.act_batch);
    launch_args.push_i32(params.out_strides[0]);

    let cfg = LaunchConfig {
        grid_dim: (props.multiProcessorCount as usize as _, 1, 1),
        block_dim: (WARP_SIZE as _, N_WARPS as _, 1),
        shared_mem_bytes: 0,
    };

    unsafe {
        launch_args.launch(cfg);
    }
    Ok(())
}

fn dispatch_ggml_matmul_q40(
    stream: &TractCudaStream,
    weights: &CudaView<'_, u8>,
    activs: &CudaView<'_, u8>,
    output: &CudaView<'_, u8>,
    params: GemmParams,
) -> TractResult<()> {
    ensure!(params.act_batch % params.w_batch == 0);

    let context = cuda_context();
    let props = context.properties();

    let null_ptr = stream.null()?;

    let padded_k = params.k.next_multiple_of(Q40_ROW_PADDING);
    let n_blocks = padded_k / Q4_0.block_len(); // padded Q40 weights

    let n_mmq_blocks = padded_k / (Q8_1.block_len() * 4);
    let act_batch_stride = n_mmq_blocks * params.m * Q8_1.block_bytes();
    let w_batch_stride = n_blocks * params.n;
    let batch_ratio = params.act_batch / params.w_batch;

    let mmq_w_best = find_best_mmq_w(props.sharedMemPerBlockOptin, params.m);
    let nbytes_shared = mmq_get_nbytes_shared_q40(mmq_w_best, MMQ_X_MAX);

    let ntx = params.m.div_ceil(mmq_w_best);
    let nty = params.n.div_ceil(MMQ_X_MAX);

    let fixup_tensor = {
        let needs_fixup =
            !(ntx * nty * params.act_batch).is_multiple_of(props.multiProcessorCount as usize);

        needs_fixup
            .then(|| {
                let fixup_shape = props.multiProcessorCount as usize * mmq_w_best * MMQ_X_MAX;
                unsafe { DeviceTensor::uninitialized_dt(DatumType::F32, &[fixup_shape]) }
            })
            .transpose()?
    };

    let fixup_view = fixup_tensor.as_ref().map(|t| get_cuda_view(t)).unwrap_or(null_ptr.as_view());

    launch_matmul_q40(
        stream,
        weights,
        activs,
        output,
        &fixup_view,
        &params,
        act_batch_stride,
        w_batch_stride,
        batch_ratio,
        mmq_w_best,
        nbytes_shared,
    )?;

    if let Some(ref fixup) = fixup_tensor {
        launch_fixup_q40(stream, output, &fixup_view, &params, mmq_w_best)?;
    }

    Ok(())
}

fn dispatch_ggml_matvec_q40(
    stream: &TractCudaStream,
    weights: &CudaView<'_, u8>,
    activs: &CudaView<'_, u8>,
    output: &CudaView<'_, u8>,
    params: GemmParams,
) -> TractResult<()> {
    ensure!(params.act_batch % params.w_batch == 0);

    let context = cuda_context();
    let null_ptr = stream.null::<u8>()?;

    let padded_k = params.k.next_multiple_of(Q40_ROW_PADDING);

    let n_blocks = padded_k / Q4_0.block_len();
    let stride_col_act = padded_k / Q8_1.block_len();
    let stride_col_out = params.n;
    let stride_channel_w = n_blocks * params.n;
    let stride_channel_act = stride_col_act * params.m;
    let stride_channel_out = params.m * params.n;

    let batch_ratio = params.act_batch / params.w_batch;

    let func = context.load_pipeline(LibraryName::GgmlQ, format!("mul_vec_q40_m_{}", params.m))?;
    let mut launch_args = TractLaunchArgs::new(stream, &func);
    launch_args.push_view(weights);
    launch_args.push_view(activs);
    launch_args.push_view(output);
    launch_args.push_i32(params.k);
    launch_args.push_i32(params.act_batch);
    launch_args.push_i32(n_blocks);
    launch_args.push_i32(stride_col_act);
    launch_args.push_i32(stride_col_out);
    launch_args.push_i32(batch_ratio);
    launch_args.push_i32(stride_channel_w);
    launch_args.push_i32(stride_channel_act);
    launch_args.push_i32(stride_channel_out);

    let rows_per_block = if params.m == 1 { 1 } else { 2 };
    let n_warps = if params.m <= 4 { 4 } else { 2 };
    let cfg = LaunchConfig {
        grid_dim: (params.n.div_ceil(rows_per_block) as _, params.act_batch as _, 1 as _),
        block_dim: (WARP_SIZE as _, n_warps, 1),
        shared_mem_bytes: 0,
    };

    launch_args.launch(cfg)
}

impl GgmlGemm {
    fn output_dt(&self, activ_dt: DatumType, weight_dt: DatumType) -> TractResult<DatumType> {
        ensure!(weight_dt == activ_dt);
        Ok(activ_dt)
    }

    pub fn output_shape<D: DimLike + One>(&self, a: &[D], b: &[D]) -> TVec<D> {
        // A: [b, n, k] B: [b, m, k]
        let rank = a.len();
        let mut output: TVec<D> = (0..rank - 2)
            .map(|ix| if a[ix].is_one() { b[ix].clone() } else { a[ix].clone() })
            .collect();
        output.push(a[rank - 2].clone());
        output.push(b[rank - 2].clone());
        output
    }

    pub fn output_facts(
        &self,
        shape: &[TDim],
        a_dt: DatumType,
        b_dt: DatumType,
    ) -> TractResult<TVec<TypedFact>> {
        let out_dt = self.output_dt(a_dt, b_dt)?;
        ensure!([DatumType::F16, DatumType::F32].contains(&out_dt));
        Ok(tvec!(out_dt.fact(shape)))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        activs: &DeviceTensor,
        weights: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let (act_shape, w_shape) = get_concrete_shapes(activs, weights)?;

        let c_dt = self.output_dt(activs.datum_type(), weights.datum_type())?;
        let c_shape = self.output_shape(&act_shape, &w_shape);
        let c = unsafe { DeviceTensor::uninitialized_dt(c_dt, &c_shape)? };

        self.dispatch_eval(stream, activs, weights, &c)?;
        stream.synchronize()?;
        Ok(c)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        activs: &DeviceTensor,
        weights: &DeviceTensor,
        out: &DeviceTensor,
    ) -> TractResult<()> {
        // Note: All the following MM/MV kernels transpose the output.
        // We name 'm' and 'n' according to the output
        // This means Weights: [n, k] x Activ: [m, k] => C: [m, n]

        let (act_shape, w_shape) = get_concrete_shapes(activs, weights)?;

        ensure!(out.shape() == self.output_shape(&act_shape, &w_shape).as_slice());

        if out.shape().iter().product::<usize>() == 0 {
            return Ok(());
        }

        let params = GemmParams::compute_gemm_params(
            [activs.datum_type(), weights.datum_type(), out.datum_type()],
            &w_shape,
            &act_shape,
            out.shape(),
        )?;

        if get_quant_fact(weights, &Q4_0).is_some() {
            let act_view = get_cuda_view(activs);
            let w_view = get_cuda_view(weights);
            let out_view = get_cuda_view(out);
            if params.m <= 8 {
                dispatch_ggml_matvec_q40(stream, &w_view, &act_view, &out_view, params)?;
            } else {
                dispatch_ggml_matmul_q40(stream, &w_view, &act_view, &out_view, params)?;
            }
        } else if (params.k % 2 == 0) && params.m <= 8 {
            dispatch_ggml_matvec(stream, weights, activs, out, params)?;
        } else if activs.datum_type() == DatumType::F32 {
            dispatch_cublas_gemm::<f32>(stream, weights, activs, out, params)?;
        } else {
            ensure!(activs.datum_type() == F16);
            dispatch_cublas_gemm::<f16>(stream, weights, activs, out, params)?;
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    use crate::kernels::matmul::quant_act_q81::GgmlQuantQ81;
    use crate::ops::GgmlQuantQ81Fact;
    use crate::utils::pad_q40;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
    use tract_core::tract_data::itertools::Itertools;
    use tract_core::tract_linalg::block_quant::{BlockQuant, BlockQuantStorage, Q4_0};
    use tract_gpu::tensor::IntoDevice;

    pub(crate) fn run_mmm_test_case(
        (act_batch, w_batch, m, k, n): (usize, usize, usize, usize, usize),
        transpose_a: bool,
        transpose_b: bool,
        act_dt: DatumType,
        w_dt: DatumType,
    ) -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let act_shape = if !transpose_a { [act_batch, m, k] } else { [act_batch, k, m] };
            let w_shape = if !transpose_b { [w_batch, k, n] } else { [w_batch, n, k] };
            let mut activs = if act_dt == DatumType::F16 {
                Tensor::from_shape(
                    &act_shape,
                    &(0..act_batch * m * k)
                        .map(|f| f16::from_f32(f as f32 / (act_batch * m * k) as f32))
                        .collect::<Vec<_>>(),
                )?
            } else {
                Tensor::from_shape(
                    &act_shape,
                    &(0..act_batch * m * k)
                        .map(|f| f as f32 / (act_batch * m * k) as f32)
                        .collect::<Vec<_>>(),
                )?
            };

            let mut weights = if w_dt == DatumType::F16 {
                Tensor::from_shape(
                    &w_shape,
                    &(0..w_batch * k * n)
                        .map(|f| f16::from_f32(f as f32 / (w_batch * n * k) as f32))
                        .collect::<Vec<_>>(),
                )?
            } else {
                Tensor::from_shape(
                    &w_shape,
                    &(0..w_batch * k * n)
                        .map(|f| f as f32 / (w_batch * m * k) as f32)
                        .collect::<Vec<_>>(),
                )?
            };

            let cuda_output = GgmlGemm.eval(
                stream,
                &activs.clone().into_device()?,
                &weights.clone().into_device()?,
            )?;

            let matmul = PrefixMatMul {
                transpose_a,
                transpose_b,
                transpose_c: false,
                quantize_output: None,
                operating_dt: Some(act_dt),
            };

            // Compare to full precision
            if act_dt == DatumType::F16 && !(w_dt == DatumType::F16) {
                activs = activs.clone().cast_to_dt(DatumType::F32).unwrap().into_owned();
            }
            if w_dt == DatumType::F16 && !(act_dt == DatumType::F16) {
                weights = weights.clone().cast_to_dt(DatumType::F32).unwrap().into_owned();
            }

            let output = args_1!(matmul.eval(tvec![activs.into_tvalue(), weights.into_tvalue()])?);
            cuda_output.to_host()?.close_enough(&output, Approximation::VeryApproximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_mat_vec() -> TractResult<()> {
        // f32_f32
        run_mmm_test_case((2, 1, 1, 2, 1), false, true, F32, F32)?;
        run_mmm_test_case((2, 1, 3, 60, 2), false, true, F32, F32)?;
        run_mmm_test_case((2, 2, 2, 128, 7), false, true, F32, F32)?;
        run_mmm_test_case((4, 1, 7, 2, 1), false, true, F32, F32)?;

        //// f16_f16
        run_mmm_test_case((1, 1, 5, 2, 1), false, true, F16, F16)?;
        run_mmm_test_case((2, 1, 8, 62, 2), false, true, F16, F16)?;
        run_mmm_test_case((2, 2, 2, 128, 9), false, true, F16, F16)?;
        run_mmm_test_case((4, 1, 1, 128, 9), false, true, F16, F16)?;
        Ok(())
    }

    #[test]
    fn test_mat_mul() -> TractResult<()> {
        // f32_f32
        run_mmm_test_case((1, 1, 9, 4, 2), false, true, F32, F32)?;
        run_mmm_test_case((1, 1, 11, 2, 3), false, true, F32, F32)?;
        run_mmm_test_case((2, 2, 15, 1, 2), false, true, F32, F32)?;
        run_mmm_test_case((2, 1, 10, 32, 2), false, true, F32, F32)?;
        run_mmm_test_case((1, 2, 12, 1, 2), false, true, F32, F32)?;

        // f16_f16
        run_mmm_test_case((1, 1, 12, 7, 2), false, true, F16, F16)?;
        run_mmm_test_case((1, 1, 9, 61, 2), false, true, F16, F16)?;
        run_mmm_test_case((2, 1, 10, 127, 9), false, true, F16, F16)?;
        run_mmm_test_case((1, 2, 16, 127, 9), false, true, F16, F16)?;
        Ok(())
    }

    #[test]
    fn test_squeeze_batch_axes() -> TractResult<()> {
        assert_eq!(squeeze_batch_axes(&[1, 2, 3, 4])?, tvec![2, 3, 4]);
        assert_eq!(squeeze_batch_axes(&[3, 2, 3, 4])?, tvec![6, 3, 4]);
        assert_eq!(squeeze_batch_axes(&[3, 1, 2, 3, 4])?, tvec![6, 3, 4]);
        assert!(squeeze_batch_axes(&[1]).is_err());
        assert_eq!(squeeze_batch_axes(&[1, 1, 3, 4])?, tvec![1, 3, 4]);
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn mmm_ggml_prop_f32(pb in <MmmProblem<f32>>::arbitrary_with(
            MmmProblemParams {
                force_k_as_inner_axis: true,
                q4_0_weights: false,
            }
        )) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::Approximate).is_ok())
        }

        #[test]
        fn mmm_ggml_prop_f16(pb in <MmmProblem<f16>>::arbitrary_with(
            MmmProblemParams {
                force_k_as_inner_axis: true,
                q4_0_weights: false,
            }
        )) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::VeryApproximate).is_ok())
        }

        #[test]
        fn mmm_ggml_prop_q4(pb in <MmmProblem<f32>>::arbitrary_with(
            MmmProblemParams {
                force_k_as_inner_axis: true,
                q4_0_weights: true,
            }
        )) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::VeryApproximate).is_ok())
        }
    }

    #[derive(Default, Debug, Clone)]
    pub struct MmmProblemParams {
        pub force_k_as_inner_axis: bool,
        pub q4_0_weights: bool,
    }

    #[derive(Debug)]
    pub struct MmmProblem<F: Datum + Float>
    where
        F: Datum + Float,
        f32: AsPrimitive<F>,
    {
        pub b: usize,
        pub m: usize,
        pub k: usize,
        pub n: usize,
        pub lhs: Vec<F>,
        pub transpose_lhs: bool,
        pub rhs: Vec<F>,
        pub transpose_rhs: bool,
        pub q4_0: bool,
    }

    impl<F> Arbitrary for MmmProblem<F>
    where
        F: Datum + Float,
        f32: AsPrimitive<F>,
    {
        type Parameters = MmmProblemParams;
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(params: MmmProblemParams) -> Self::Strategy {
            (1usize..4, 1usize..16, 1usize..128, 1usize..16)
                .prop_flat_map(move |(b, m, mut k, n)| {
                    if params.q4_0_weights {
                        k = k.div_ceil(32) * 32
                    };

                    let lhs_len = b * m * k;
                    let rhs_len = b * n * k;
                    let datum = (0f32..1f32).prop_map(|x| x.as_());
                    (
                        Just(b),
                        Just(m),
                        Just(k),
                        Just(n),
                        vec(datum.clone(), lhs_len..=lhs_len),
                        proptest::bool::ANY,
                        vec(datum, rhs_len..=rhs_len),
                        proptest::bool::ANY,
                    )
                })
                .prop_map(move |(b, m, k, n, lhs, mut transpose_lhs, rhs, mut transpose_rhs)| {
                    if params.force_k_as_inner_axis {
                        (transpose_lhs, transpose_rhs) = (false, true);
                    }
                    Self {
                        b,
                        m,
                        k,
                        n,
                        lhs,
                        transpose_lhs,
                        rhs,
                        transpose_rhs,
                        q4_0: params.q4_0_weights,
                    }
                })
                .boxed()
        }
    }

    impl<F> MmmProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let matmul = PrefixMatMul {
                transpose_a: self.transpose_lhs,
                transpose_b: self.transpose_rhs,
                transpose_c: false,
                quantize_output: None,
                operating_dt: Some(F::datum_type()),
            };

            let lhs_tensor = if self.transpose_lhs {
                Tensor::from_shape(&[self.b, self.k, self.m], &self.lhs)?
            } else {
                Tensor::from_shape(&[self.b, self.m, self.k], &self.lhs)?
            };
            let mut rhs_tensor = if self.transpose_rhs {
                Tensor::from_shape(&[self.b, self.n, self.k], &self.rhs)?
            } else {
                Tensor::from_shape(&[self.b, self.k, self.n], &self.rhs)?
            };

            if self.q4_0 {
                rhs_tensor = Q4_0.simulate_precision_loss(rhs_tensor, 2)?
            };
            let output = matmul.eval(tvec![lhs_tensor.into_tvalue(), rhs_tensor.into_tvalue()])?;

            Ok(output[0].clone().into_tensor())
        }

        pub fn run(&self) -> TractResult<Tensor> {
            crate::with_cuda_stream(|stream| {
                let lhs = if self.transpose_lhs {
                    Tensor::from_shape(&[self.b, self.k, self.m], &self.lhs)?.into_device()?
                } else {
                    let mut lhs =
                        Tensor::from_shape(&[self.b, self.m, self.k], &self.lhs)?.into_device()?;
                    if self.q4_0 {
                        let act_shape_tdim: ShapeFact = tvec![
                            TDim::Val(self.b as i64),
                            TDim::Val(self.m as i64),
                            TDim::Val(self.k as i64)
                        ]
                        .into();

                        let io_facts = GgmlQuantQ81Fact {
                            in_fact: act_shape_tdim.clone(),
                            out_fact: GgmlQuantQ81::output_shape_fact(&act_shape_tdim)?,
                        };

                        lhs = GgmlQuantQ81.eval(stream, &lhs, io_facts)?;
                    }
                    lhs
                };
                let rhs = if self.transpose_rhs {
                    if !self.q4_0 {
                        Tensor::from_shape(&[self.b, self.n, self.k], &self.rhs)?
                    } else {
                        let w_quant = Q4_0.quant_f32(
                            &self
                                .rhs
                                .clone()
                                .into_iter()
                                .map(|x| x.to_f32().unwrap())
                                .collect_vec(),
                        )?;
                        let bqs = BlockQuantStorage::new(
                            Box::new(Q4_0),
                            self.b * self.n,
                            self.k,
                            Arc::new(w_quant),
                        )?;
                        let padded_q40 = pad_q40(&bqs, self.b * self.n, self.k)?;
                        let padded_k = self.k.next_multiple_of(crate::Q40_ROW_PADDING);
                        padded_q40
                            .into_tensor_with_shape(f32::datum_type(), &[self.b, self.n, padded_k])
                    }
                } else {
                    Tensor::from_shape(&[self.b, self.k, self.n], &self.rhs)?
                }
                .into_device()?;

                let c = GgmlGemm.eval(stream, &lhs, &rhs)?;
                Ok(c.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: cuda/src/kernels/matmul/quant_act_q81.rs
================================================
use cudarc::driver::{LaunchConfig, PushKernelArg};
use derive_new::new;
use std::fmt;
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::{BlockQuant, Q8_1};
use tract_gpu::tensor::DeviceTensor;

use crate::Q40_ROW_PADDING;
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::matmul::{MMQ_X_MAX, squeeze_batch_axes};
use crate::kernels::{LibraryName, get_cuda_view};
use crate::ops::GgmlQuantQ81Fact;

pub(crate) const QK8_1: usize = 32;

pub(crate) const QUANTIZE_BLOCK_SIZE: usize = 256;
pub(crate) const QUANTIZE_BLOCK_SIZE_MMQ: usize = 128;

#[derive(Debug, Clone, new, PartialEq, Eq, Hash)]
pub struct GgmlQuantQ81;

impl fmt::Display for GgmlQuantQ81 {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

impl GgmlQuantQ81 {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn output_shape_fact(shape: &ShapeFact) -> TractResult<ShapeFact> {
        let mut o_shape = shape.dims().to_owned();
        let rank = o_shape.len();
        let k = o_shape[rank - 1].as_i64().context("Expected concrete k")? as usize;
        let padded_k = k.next_multiple_of(Q40_ROW_PADDING);
        o_shape[rank - 1] = TDim::Val(padded_k as i64);
        o_shape[rank - 2] += (MMQ_X_MAX * 4 * Q8_1.block_bytes()).div_ceil(padded_k);

        Ok(ShapeFact::from_dims(o_shape))
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        let context = cuda_context();
        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);

        let rank = input.rank();
        let squeezed_shape = squeeze_batch_axes(input.shape())?;

        let a_batch = squeezed_shape[0];
        let m = squeezed_shape[1];
        let k = squeezed_shape[2];

        let padded_k = k.next_multiple_of(Q40_ROW_PADDING);
        let mut out_shape = input.shape().to_owned();
        out_shape[rank - 1] = padded_k;
        if m > 8 {
            let in_strides = input.strides();
            let fast_path_str = if in_strides[rank - 1] == 1 { "fast_" } else { "" };
            let func = cuda_context().load_pipeline(
                LibraryName::Quant,
                format!("quantize_mmq_q8_1_{fast_path_str}nd{}", input.rank()),
            )?;
            let mut launch_args = TractLaunchArgs::new(stream, &func);
            launch_args.push_view(&i_view);
            launch_args.push_view(&o_view);
            launch_args.push::<u64>(k);
            launch_args.push_slice_i32(in_strides);
            launch_args.push_slice_i32(&out_shape[1..]);

            let cfg = LaunchConfig {
                grid_dim: (
                    m as _,
                    padded_k.div_ceil(4 * QUANTIZE_BLOCK_SIZE_MMQ) as _,
                    a_batch as _,
                ),
                block_dim: (128, 1, 1),
                shared_mem_bytes: 0,
            };
            unsafe { launch_args.launch(cfg) };
        } else {
            let func = context
                .load_pipeline(LibraryName::Quant, format!("quantize_q8_1_nd{}", input.rank()))?;
            let mut launch_args = TractLaunchArgs::new(stream, &func);
            launch_args.push_view(&i_view);
            launch_args.push_view(&o_view);
            launch_args.push::<u64>(k);
            launch_args.push_slice_i32(input.strides());
            launch_args.push_slice_i32(&out_shape[1..]);

            let cfg = LaunchConfig {
                grid_dim: (padded_k.div_ceil(QUANTIZE_BLOCK_SIZE) as _, m as _, a_batch as _),
                block_dim: (QUANTIZE_BLOCK_SIZE as _, 1, 1),
                shared_mem_bytes: 0,
            };
            unsafe { launch_args.launch(cfg) };
        }
        Ok(())
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        output_fact: GgmlQuantQ81Fact,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_exotic(Box::new(output_fact))? };
        self.dispatch_eval(stream, input, &output)?;
        stream.synchronize()?;
        Ok(output)
    }
}


================================================
FILE: cuda/src/kernels/mod.rs
================================================
#![allow(unused)]

pub mod array;
pub mod binary;
pub mod conv;
pub mod conv_cudnn;
pub mod element_wise;
pub mod flash_attn;
pub mod ggml_flash_attn;
mod iff;
pub(crate) mod launch_args;
pub mod matmul;
pub mod nn;
pub(crate) mod utils;

use std::env;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;

use crate::ops::GgmlQuantQ81Fact;
use crate::tensor::{CudaBuffer, CudaTensor};
use anyhow::{bail, ensure};
use cudarc::driver::{CudaView, CudaViewMut};
pub use iff::Iff;
use tract_core::internal::ExoticFact;
use tract_core::prelude::{TDim, TractResult};
use tract_core::tract_linalg::block_quant::{BlockQuant, BlockQuantFact, Q4_0, Q8_1};
use tract_gpu::tensor::{DeviceTensor, OwnedDeviceTensor};
use tract_gpu::utils::as_q40_tensor;

const MAX_THREADS: usize = 1024;
const WARP_SIZE: usize = 32;

static CUBIN_FOLDER: OnceLock<PathBuf> = OnceLock::new();

pub fn cubin_dir() -> &'static Path {
    CUBIN_FOLDER
        .get_or_init(|| {
            dirs::cache_dir()
                .unwrap_or_else(|| ".cache".into())
                .join("tract")
                .join(env!("CARGO_PKG_VERSION"))
                .join("cuda")
                .join("cubins")
        })
        .as_path()
}

const ELEMENT_WISE_OPS: &str = include_str!("cu/element_wise.cu");
const BINARY_OPS: &str = include_str!("cu/binary.cu");
const ARRAY_OPS: &str = include_str!("cu/array.cu");
const NN_OPS: &str = include_str!("cu/nn.cu");
const CNN_OPS: &str = include_str!("cu/cnn.cu");
const GGML_MM_MV: &str = include_str!("cu/mm_mv.cu");
const GGML_MM_MV_Q: &str = include_str!("cu/mm_mv_q.cu");
const GGML_QUANTIZE: &str = include_str!("cu/quantize.cu");
const GGML_FLASH_ATTN: &str = include_str!("cu/ggml_flash_attn.cu");
const FLASH_ATTN: &str = include_str!("cu/flash_attn.cu");
pub const COMMON_H: &str = include_str!("cu/common.cuh");

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LibraryName {
    ElementWise,
    Binary,
    Array,
    NN,
    Cnn,
    Ggml,
    GgmlQ,
    Quant,
    GgmlFlashAttn,
    FlashAttn,
}

fn fnv1a64(text: &str) -> u64 {
    const FNV_OFFSET_BASIS: u64 = 0xcbf29ce484222325;
    const FNV_PRIME: u64 = 0x00000100000001B3;

    let mut hash = FNV_OFFSET_BASIS;
    for b in text.as_bytes() {
        hash ^= *b as u64;
        hash = hash.wrapping_mul(FNV_PRIME);
    }
    hash
}

impl LibraryName {
    pub const ALL: [LibraryName; 10] = [
        Self::FlashAttn,
        Self::GgmlFlashAttn,
        Self::ElementWise,
        Self::Binary,
        Self::Array,
        Self::NN,
        Self::Cnn,
        Self::Ggml,
        Self::GgmlQ,
        Self::Quant,
    ];

    pub fn content(&self) -> &str {
        match self {
            Self::ElementWise => ELEMENT_WISE_OPS,
            Self::Binary => BINARY_OPS,
            Self::Array => ARRAY_OPS,
            Self::NN => NN_OPS,
            Self::Cnn => CNN_OPS,
            Self::Ggml => GGML_MM_MV,
            Self::GgmlQ => GGML_MM_MV_Q,
            Self::Quant => GGML_QUANTIZE,
            Self::GgmlFlashAttn => GGML_FLASH_ATTN,
            Self::FlashAttn => FLASH_ATTN,
        }
    }

    pub fn cubin_path(&self) -> PathBuf {
        let basename = match self {
            Self::ElementWise => "element_wise",
            Self::Binary => "binary",
            Self::Array => "array",
            Self::NN => "nn",
            Self::Cnn => "cnn",
            Self::Ggml => "mm_mv",
            Self::GgmlQ => "mm_mv_q",
            Self::Quant => "quantize",
            Self::GgmlFlashAttn => "flash_attn",
            Self::FlashAttn => "minimal_flash_attn",
        };
        let hash = fnv1a64(self.content());
        cubin_dir().join(format!("{}_{}.cubin", basename, hash))
    }
}

pub use tract_gpu::utils::BroadcastKind;

fn tensor_size(t: &DeviceTensor) -> usize {
    let exotic_fact: Option<&dyn ExoticFact> = match t {
        DeviceTensor::Owned(ot) => {
            let cuda_tensor =
                ot.downcast_ref::<CudaTensor>().expect("Non Cuda-Tensor in a Cuda Context");
            cuda_tensor.exotic_fact()
        }
        DeviceTensor::ArenaView(av) => av.exotic_fact(),
    };

    if let Some(of) = exotic_fact {
        of.buffer_sizes()
            .iter()
            .sum::<TDim>()
            .as_i64()
            .expect("Symbols should be resolved at this point") as usize
    } else {
        t.len() * t.datum_type().size_of()
    }
}

pub fn get_cuda_view(t: &DeviceTensor) -> CudaView<'_, u8> {
    let size = tensor_size(t);
    get_sliced_cuda_view(t, 0, size).unwrap()
}

// NOTE: offset and len are in bytes
pub fn get_sliced_cuda_view(
    t: &DeviceTensor,
    offset: usize,
    len: usize,
) -> TractResult<CudaView<'_, u8>> {
    ensure!(offset + len <= tensor_size(t));
    let buffer = t.device_buffer().downcast_ref::<CudaBuffer>().unwrap();
    let offset = t.buffer_offset::<usize>() + offset;
    Ok(buffer.slice(offset..(offset + len)))
}

pub fn get_cuda_view_mut(t: &DeviceTensor) -> CudaViewMut<'_, u8> {
    let size = t.len() * t.datum_type().size_of();
    get_sliced_cuda_view_mut(t, 0, size).unwrap()
}

// NOTE: offset and len are in bytes
pub fn get_sliced_cuda_view_mut(
    t: &DeviceTensor,
    offset: usize,
    len: usize,
) -> TractResult<CudaViewMut<'_, u8>> {
    ensure!(offset + len <= t.len() * t.datum_type().size_of());
    let buffer: &CudaBuffer = t.device_buffer().downcast_ref::<CudaBuffer>().unwrap();
    let offset = t.buffer_offset::<usize>() + offset;
    let ptr: *const CudaBuffer = buffer;
    let mut_buffer: &mut CudaBuffer = unsafe { (ptr as *mut CudaBuffer).as_mut().unwrap() };
    Ok(mut_buffer.inner.slice_mut(offset..(offset + len)))
}


================================================
FILE: cuda/src/kernels/nn/apply_rope.rs
================================================
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::utils::compute_broadcast_strides;
use crate::kernels::{BroadcastKind, LibraryName, get_cuda_view, utils};
use anyhow::ensure;
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ApplyRope;

impl fmt::Display for ApplyRope {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

impl ApplyRope {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn is_supported_broadcast(broadcast_kind: BroadcastKind) -> bool {
        matches!(broadcast_kind, BroadcastKind::Nd2 | BroadcastKind::Nd3 | BroadcastKind::Nd4)
    }

    pub fn kernel_name(&self, dt: DatumType, broadcast_kind: BroadcastKind) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for cuda apply rope", dt);
        ensure!(
            Self::is_supported_broadcast(broadcast_kind),
            "Unsupported broadcast kind {:?} for cuda apply rope",
            broadcast_kind
        );
        let tname = DeviceTensor::tname(dt)?;
        let broadcast_name = broadcast_kind.name();
        Ok(format!("apply_rope_{broadcast_name}_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        cos: &DeviceTensor,
        sin: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, cos, sin, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        cos: &DeviceTensor,
        sin: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(input.datum_type() == cos.datum_type());
        ensure!(input.datum_type() == sin.datum_type());

        ensure!(cos.shape() == sin.shape());

        ensure!(input.rank() >= 2 && input.rank() <= 4);
        ensure!(cos.rank() <= input.rank());

        let padded_shape = [&tvec![1; input.rank() - cos.rank()], cos.shape()].concat();
        let (padded_cos, padded_sin) =
            (cos.reshaped(padded_shape.clone().into())?, sin.reshaped(padded_shape.into())?);

        ensure!(
            input.shape()[input.rank() - 1].is_multiple_of(2),
            "Rotate half required most inner dimension to be a multiple of 2: {:?}",
            input.shape()
        );

        let cos_sin_strides =
            compute_broadcast_strides::<usize>(padded_cos.shape(), padded_sin.strides())?;

        let broadcast_kind = BroadcastKind::from_rank(input.rank())
            .with_context(|| format!("Unsupported rank for ApplyRope op: {:?}", input.shape(),))?;

        let kernel_name = self.kernel_name(input.datum_type(), broadcast_kind)?;

        let i_view = get_cuda_view(input);
        let cos_view = get_cuda_view(&padded_cos);
        let sin_view = get_cuda_view(&padded_sin);
        let o_view = get_cuda_view(output);

        let func = cuda_context().load_pipeline(LibraryName::NN, kernel_name)?;
        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&cos_view);
        launch_args.push_view(&sin_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(input.shape());
        launch_args.push_slice_i32(input.strides());
        launch_args.push_slice_i32(&cos_sin_strides);
        launch_args.push_slice_i32(output.strides());

        let shape = input.shape();

        let block_dim = 32;
        let mut grid = match shape.len() {
            0 => panic!("Unexpected empty shape while build grid size"),
            1 => (shape[0] as _, 1, 1),
            2 => (shape[1] as _, shape[0] as _, 1),
            3.. => (
                shape[shape.len() - 1],
                shape[shape.len() - 2],
                (shape[..shape.len() - 2].iter().product::<usize>()),
            ),
        };
        grid.0 /= 2;

        let cfg = LaunchConfig {
            grid_dim: (
                grid.0.div_ceil(block_dim) as _,
                grid.1.div_ceil(block_dim) as _,
                grid.2 as _,
            ),
            block_dim: (block_dim as _, block_dim as _, 1),
            shared_mem_bytes: 0,
        };

        launch_args.launch(cfg)
    }
}

pub fn cuda_apply_rope_dispatch(
    input: &DeviceTensor,
    cos: &DeviceTensor,
    sin: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| ApplyRope.dispatch_eval(stream, input, cos, sin, output))
}

crate::register_cuda_op!(tract_transformers::ops::apply_rope::ApplyRope, |source, node, _op| {
    rule_if!(ApplyRope::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::apply_rope::GpuApplyRope::new(
        "Cuda",
        cuda_apply_rope_dispatch,
    ))))
});

#[cfg(test)]
mod tests {
    use std::f32::consts::PI;

    use super::*;
    use tract_core::internal::Tensor;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::apply_rope;

    fn run_test_case(shape: &[usize]) -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len).map(|f| f as f32 / 1000.0).collect::<Vec<_>>(),
            )?;

            let cos =
                Tensor::from_shape(shape, &(0..len).map(|f| (f as f32).cos()).collect::<Vec<_>>())?;

            let sin =
                Tensor::from_shape(shape, &(0..len).map(|f| (f as f32).sin()).collect::<Vec<_>>())?;

            let cuda_a = a.clone().into_device()?;
            let cuda_sin = sin.clone().into_device()?;
            let cuda_cos = cos.clone().into_device()?;

            let cpu_output = apply_rope::ApplyRope.eval(tvec![
                a.clone().into(),
                cos.clone().into(),
                sin.clone().into(),
            ])?[0]
                .clone()
                .into_tensor();
            let cuda_output = ApplyRope.eval(stream, &cuda_a, &cuda_cos, &cuda_sin)?;

            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?} Cpu: {:?}, Cuda: {:?}",
                        a.dump(true),
                        cpu_output.dump(true),
                        cuda_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_apply_rope() -> TractResult<()> {
        run_test_case(&[2, 1, 2, 2])?;
        run_test_case(&[2, 4, 4])?;
        run_test_case(&[2, 1, 512, 10])?;
        run_test_case(&[8, 8])?;
        run_test_case(&[1, 10, 512, 24])?;
        run_test_case(&[3, 10, 512, 24])?;
        Ok(())
    }
}


================================================
FILE: cuda/src/kernels/nn/gelu_approximate.rs
================================================
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, get_cuda_view};

#[derive(Debug, Clone, Default, Copy, PartialEq, Eq, Hash)]
pub struct GeluApproximate {
    pub fast_impl: bool,
}

impl GeluApproximate {
    pub fn fast() -> Self {
        Self { fast_impl: true }
    }

    pub fn accurate() -> Self {
        Self { fast_impl: false }
    }

    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for cuda geluop", dt);
        let tname = DeviceTensor::tname(dt)?;
        if self.fast_impl {
            Ok(format!("gelu_approx_fast_{tname}"))
        } else {
            Ok(format!("gelu_approx_{tname}"))
        }
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let kernel_name = self.kernel_name(input.datum_type())?;

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);
        let len = output.len();

        let func = cuda_context().load_pipeline(LibraryName::NN, kernel_name)?;
        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_i32(len);

        let cfg = LaunchConfig::for_num_elems(input.len() as _);
        unsafe {
            launch_args.launch(cfg);
        }
        Ok(())
    }
}

pub fn cuda_gelu_approximate_dispatch(
    fast_impl: bool,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| {
        GeluApproximate { fast_impl }.dispatch_eval(stream, input, output)
    })
}

// GeluApproximate is an ElementWiseMiniOp, so we register under ElementWiseOp's TypeId.
crate::register_cuda_op!(tract_core::ops::element_wise::ElementWiseOp, |source, node, op| {
    rule_if_some!(
        ew = op.0.downcast_ref::<tract_transformers::ops::gelu_approximate::GeluApproximate>()
    );
    rule_if!(GeluApproximate::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::gelu_approximate::GpuGeluApproximate::new(
        ew.fast_impl,
        "Cuda",
        cuda_gelu_approximate_dispatch,
    ))))
});

#[cfg(test)]
mod tests {

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::gelu_approximate;

    fn test_case<F>(
        gelu_approx: GeluApproximate,
        shape: &[usize],
        offset: f32,
        scale: f32,
        approximate: Approximation,
    ) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_output = gelu_approximate::gelu_approximate(false)
                .eval(tvec![a.to_host()?.into_tvalue()])?[0]
                .clone()
                .into_tensor();
            let cuda_output = gelu_approx.eval(stream, &a)?;

            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?}, scale: {:?} Cpu: {:?}, Cuda: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        scale,
                        cpu_output.dump(true),
                        cuda_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_gelu_approx() -> TractResult<()> {
        test_case::<f32>(
            GeluApproximate::accurate(),
            &[4, 4],
            -0.0,
            1.0 / 100.0,
            Approximation::Approximate,
        )?;
        test_case::<f32>(
            GeluApproximate::accurate(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::Approximate,
        )?;
        test_case::<f16>(
            GeluApproximate::accurate(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::SuperApproximate,
        )?;
        Ok(())
    }
    #[test]
    fn test_gelu_approx_fast() -> TractResult<()> {
        test_case::<f32>(
            GeluApproximate::fast(),
            &[4, 4],
            -0.0,
            1.0 / 100.0,
            Approximation::SuperApproximate,
        )?;
        test_case::<f32>(
            GeluApproximate::fast(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::SuperApproximate,
        )?;
        test_case::<f16>(
            GeluApproximate::fast(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::SuperApproximate,
        )?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn gelu_approx_prop_f32(pb in any::<GeluProblem<f32>>()) {
            fn run(pb: GeluProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn gelu_approx_prop_f16(pb in any::<GeluProblem<f16>>()) {
            fn run(pb: GeluProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct GeluProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for GeluProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..3, 0usize..3)
                .prop_flat_map(|(left, right)| {
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    vec(shape, shape_len..=shape_len)
                })
                .prop_map(|shape| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, input }
                })
                .boxed()
        }
    }

    impl<F> GeluProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;

            let cpu_output = gelu_approximate::gelu_approximate(false)
                .eval(tvec![a.into_tvalue()])?[0]
                .clone()
                .into_tensor();

            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            crate::with_cuda_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let cuda_output = GeluApproximate::accurate().eval(stream, &a)?;
                Ok(cuda_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: cuda/src/kernels/nn/leaky_relu.rs
================================================
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, get_cuda_view};

#[derive(Debug, Clone, Default, PartialEq)]
pub struct LeakyRelu;

impl LeakyRelu {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for cuda geluop", dt);
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("leaky_relu_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        alpha: f32,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, alpha, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        alpha: f32,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let kernel_name = self.kernel_name(input.datum_type())?;

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);
        let len = output.len();

        let func = cuda_context().load_pipeline(LibraryName::NN, kernel_name)?;
        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_i32(len);
        launch_args.push::<f32>(alpha);

        let cfg = LaunchConfig::for_num_elems(input.len() as _);
        unsafe {
            launch_args.launch(cfg);
        }
        Ok(())
    }
}

pub fn cuda_leaky_relu_dispatch(
    alpha: f32,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| LeakyRelu.dispatch_eval(stream, input, alpha, output))
}

// LeakyRelu is an ElementWiseMiniOp, so we register under ElementWiseOp's TypeId.
crate::register_cuda_op!(tract_core::ops::element_wise::ElementWiseOp, |_source, _node, op| {
    rule_if_some!(leaky = op.0.downcast_ref::<tract_core::ops::nn::LeakyRelu>());
    Ok(Some(Box::new(tract_gpu::ops::leaky_relu::GpuLeakyRelu::new(
        leaky.alpha,
        "Cuda",
        cuda_leaky_relu_dispatch,
    ))))
});


================================================
FILE: cuda/src/kernels/nn/mod.rs
================================================
mod apply_rope;
mod gelu_approximate;
mod leaky_relu;
mod reduce;
mod rms_norm;
mod scaled_masked_softmax;
mod softmax;

pub use apply_rope::{ApplyRope, cuda_apply_rope_dispatch};
pub use gelu_approximate::GeluApproximate;
pub use gelu_approximate::cuda_gelu_approximate_dispatch;
pub use leaky_relu::LeakyRelu;
pub use leaky_relu::cuda_leaky_relu_dispatch;
pub use reduce::{Reducer, cuda_reduce_launch};
pub use rms_norm::RmsNorm;
pub use rms_norm::cuda_rms_norm_dispatch;
pub use scaled_masked_softmax::{ScaledMaskedSoftmax, cuda_scaled_masked_softmax_dispatch};
pub use softmax::Softmax;
pub use softmax::cuda_softmax_dispatch;

use crate::kernels::{BroadcastKind, MAX_THREADS};

fn sms_block_sizes() -> Vec<i32> {
    let mut range = vec![0i32];

    for i in 5..=15 {
        range.push(2i32.pow(i));
    }

    range
}

pub fn all_functions() -> Vec<String> {
    use std::collections::HashSet;
    let mut functions = HashSet::<String>::new();

    functions.extend(
        Reducer::ALL
            .into_iter()
            .flat_map(|op| {
                tract_gpu::tensor::DeviceTensor::SUPPORTED_DT.into_iter().map(move |dt| (op, dt))
            })
            .flat_map(|(op, dt)| [0, MAX_THREADS].into_iter().map(move |n_cols| (op, dt, n_cols)))
            .flat_map(|(op, dt, n_cols)| reduce::kernel_name(&op, dt, n_cols).into_iter()),
    );
    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| [0, MAX_THREADS].into_iter().map(move |n_cols| (dt, n_cols)))
            .flat_map(|(dt, n_cols)| Softmax.kernel_name(dt, n_cols).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| sms_block_sizes().into_iter().map(move |bs| (dt, bs as usize)))
            .flat_map(|(dt, bs)| ScaledMaskedSoftmax.kernel_name(dt, bs).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| [0, MAX_THREADS].into_iter().map(move |n_cols| (dt, n_cols)))
            .flat_map(|(dt, n_cols)| RmsNorm.kernel_name(dt, n_cols).into_iter()),
    );

    functions.extend(
        BroadcastKind::ALL
            .into_iter()
            .flat_map(|brdcast| {
                tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
                    .into_iter()
                    .map(move |dt| (dt, brdcast))
            })
            .flat_map(|(dt, brdcast)| ApplyRope.kernel_name(dt, brdcast).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| [true, false].into_iter().map(move |fast_impl| (dt, fast_impl)))
            .flat_map(|(dt, fast_impl)| GeluApproximate { fast_impl }.kernel_name(dt).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| LeakyRelu.kernel_name(dt).into_iter()),
    );

    functions.into_iter().collect()
}


================================================
FILE: cuda/src/kernels/nn/reduce.rs
================================================
use crate::context::cuda_context;
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, MAX_THREADS, get_cuda_view, launch_args, utils};
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

pub use tract_gpu::ops::reduce::Reducer;

fn cuda_reduce_is_supported_dt(reducer: &Reducer, dt: DatumType) -> bool {
    reducer.is_supported_dt(dt)
        || (matches!(reducer, Reducer::Sum | Reducer::Prod) && dt.is::<i64>())
}

pub fn kernel_name(reducer: &Reducer, dt: DatumType, n_cols: usize) -> TractResult<String> {
    ensure!(
        cuda_reduce_is_supported_dt(reducer, dt),
        "Unsupported dt {dt:?} for cuda reduceop {:?}",
        reducer
    );
    let tname = DeviceTensor::tname(dt)?;
    if n_cols < 1024 {
        Ok(format!("reduce_{}_small_{tname}", reducer))
    } else {
        Ok(format!("reduce_{}_{tname}", reducer))
    }
}

pub fn cuda_reduce_launch(
    reducer: &Reducer,
    input: &DeviceTensor,
    axis: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| {
        ensure!(output.datum_type() == input.datum_type());
        ensure!(output.shape()[axis] == 1);

        let input_shape_nd3 = utils::reshape_to_rank_3(input.shape(), axis);
        let input_strides_nd3 = Tensor::natural_strides(&input_shape_nd3);
        let output_shape_nd3 = utils::reshape_to_rank_3(output.shape(), axis);
        let output_strides_nd3 = Tensor::natural_strides(&output_shape_nd3);

        let total = (input_shape_nd3[0] as u64) * (input_shape_nd3[2] as u64);

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);

        let func = cuda_context().load_pipeline(
            LibraryName::NN,
            kernel_name(reducer, input.datum_type(), input_shape_nd3[1])?,
        )?;
        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(&input_shape_nd3);
        launch_args.push_slice_i32(&input_strides_nd3);
        launch_args.push_slice_i32(&output_strides_nd3);

        let cfg = LaunchConfig {
            grid_dim: (total as u32, 1, 1),
            block_dim: if input_shape_nd3[1] < MAX_THREADS {
                (32, 1, 1)
            } else {
                (MAX_THREADS as _, 1, 1)
            },
            shared_mem_bytes: 0,
        };

        launch_args.launch(cfg)
    })
}

crate::register_cuda_op!(tract_core::ops::nn::Reduce, |source, node, op| {
    let dt = source.node_input_facts(node.id)?[0].datum_type;
    if let Ok(gpu_op) =
        tract_gpu::ops::reduce::GpuReduce::from_tract_core(op, "Cuda", cuda_reduce_launch)
    {
        if cuda_reduce_is_supported_dt(&gpu_op.reducer, dt) {
            return Ok(Some(Box::new(gpu_op)));
        }
    }
    Ok(None)
});

#[cfg(test)]
mod tests {

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_core::ops::nn::Reducer as TractReducer;
    use tract_core::tract_data::itertools::Itertools;
    use tract_gpu::tensor::IntoDevice;

    fn test_case<F>(
        reducer: Reducer,
        tract_reducer: TractReducer,
        shape: &[usize],
        axis: usize,
        scale: f32,
    ) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_output = tract_reducer.reduce(&[axis], &a.to_host()?.into_tensor())?;
            let mut o_shape = a.shape().to_vec();
            o_shape[axis] = 1;
            let cuda_output_dt =
                unsafe { DeviceTensor::uninitialized_dt(a.datum_type(), &o_shape)? };
            cuda_reduce_launch(&reducer, &a, axis, &cuda_output_dt)?;
            stream.synchronize()?;
            let cuda_output = cuda_output_dt;
            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)
                .with_context(|| {
                    format!(
                        "A: {:?}, scale: {:?} Cpu: {:?}, Cuda: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        scale,
                        cpu_output.dump(true),
                        cuda_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_reduce_mean_of_squares() -> TractResult<()> {
        test_case::<f32>(Reducer::MeanOfSquares, TractReducer::MeanOfSquares, &[4, 4], 1, 1.0)?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[4, 4],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[1, 10],
            0,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[1, 10],
            0,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 1],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 1],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            2,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            2,
            1.0 / 100.0,
        )?;
        Ok(())
    }

    #[test]
    fn test_reduce_sum() -> TractResult<()> {
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        Ok(())
    }

    #[test]
    fn test_reduce_prod() -> TractResult<()> {
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 2, 1.0 / 100000.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 2, 1.0 / 1000.0)?;
        Ok(())
    }

    #[test]
    fn test_reduce_max() -> TractResult<()> {
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 2], 1, 1.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[1, 10], 0, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[2, 1], 1, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 1, -1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 2, -1.0 / 100.0)?;
        Ok(())
    }

    #[test]
    fn test_reduce_min() -> TractResult<()> {
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[1, 10], 0, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 1, -1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 1, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn reduce_prop_f32(pb in any::<ReduceProblem<f32>>()) {
            fn run(pb: ReduceProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn reduce_prop_f16(pb in any::<ReduceProblem<f16>>()) {
            fn run(pb: ReduceProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct ReduceProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        pub op: Reducer,
        pub shape: Vec<usize>,
        pub axis: usize,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for ReduceProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            let reducers = Reducer::ALL.into_iter().filter(|r| !r.is_logic()).collect_vec();
            (0..reducers.len(), 0usize..3, 0usize..3)
                .prop_flat_map(move |(op_ix, left, right)| {
                    let axis = left;
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    (Just(reducers[op_ix]), vec(shape, shape_len..=shape_len), Just(axis))
                })
                .prop_map(|(op, shape, axis)| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { op, shape, axis, input }
                })
                .boxed()
        }
    }

    impl<F> ReduceProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;
            let cpu_output = match self.op {
                Reducer::Sum => TractReducer::Sum.reduce(&[self.axis], &a)?,
                Reducer::Prod => TractReducer::Prod.reduce(&[self.axis], &a)?,
                Reducer::MeanOfSquares => TractReducer::MeanOfSquares.reduce(&[self.axis], &a)?,
                Reducer::Min => TractReducer::Min.reduce(&[self.axis], &a)?,
                Reducer::Max => TractReducer::Max.reduce(&[self.axis], &a)?,
                Reducer::Any => TractReducer::Any.reduce(&[self.axis], &a)?,
                Reducer::All => TractReducer::All.reduce(&[self.axis], &a)?,
            };
            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            crate::with_cuda_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let mut o_shape = a.shape().to_vec();
                o_shape[self.axis] = 1;
                let output = unsafe { DeviceTensor::uninitialized_dt(a.datum_type(), &o_shape)? };
                cuda_reduce_launch(&self.op, &a, self.axis, &output)?;
                stream.synchronize()?;
                Ok(output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: cuda/src/kernels/nn/rms_norm.rs
================================================
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, MAX_THREADS, WARP_SIZE, get_cuda_view, utils};
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct RmsNorm;

impl RmsNorm {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType, n_cols: usize) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for cuda rmsop", dt);
        let tname = DeviceTensor::tname(dt)?;
        if n_cols < MAX_THREADS {
            Ok(format!("rms_norm_small_{tname}"))
        } else {
            Ok(format!("rms_norm_{tname}"))
        }
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        axis: usize,
        eps: &Tensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, axis, eps, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        axis: usize,
        eps: &Tensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let shape_nd3 = utils::reshape_to_rank_3(input.shape(), axis);
        let strides_nd3 = Tensor::natural_strides(&shape_nd3);

        let kernel_name = self.kernel_name(input.datum_type(), shape_nd3[1])?;

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);

        let func = cuda_context().load_pipeline(LibraryName::NN, kernel_name)?;
        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(&shape_nd3);
        launch_args.push_slice_i32(&strides_nd3);
        launch_args.push::<f32>(*eps.try_as_plain()?.to_scalar::<f32>()?);

        let cfg = LaunchConfig {
            grid_dim: ((shape_nd3[2] * shape_nd3[0]) as _, 1, 1),
            block_dim: if shape_nd3[1] < MAX_THREADS {
                (WARP_SIZE as _, 1, 1)
            } else {
                (MAX_THREADS as _, 1, 1)
            },
            shared_mem_bytes: 0,
        };

        launch_args.launch(cfg)
    }
}

pub fn cuda_rms_norm_dispatch(
    input: &DeviceTensor,
    axis: usize,
    eps: &Tensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| RmsNorm.dispatch_eval(stream, input, axis, eps, output))
}

crate::register_cuda_op!(tract_transformers::ops::rms_norm::RmsNorm, |source, node, op| {
    rule_if!(RmsNorm::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::rms_norm::GpuRmsNorm::new(
        op.axis,
        op.eps.clone(),
        "Cuda",
        cuda_rms_norm_dispatch,
    ))))
});

#[cfg(test)]
mod tests {
    use tract_gpu::tensor::IntoDevice;

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_transformers::ops::rms_norm;

    fn test_case<F>(shape: &[usize], axis: usize, offset: f32, scale: f32) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        crate::with_cuda_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let eps = Arc::new(tensor0(0.0001f32));
            let cpu_rms = rms_norm::RmsNorm { axis, eps: Arc::clone(&eps) };

            let cpu_output =
                cpu_rms.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let cuda_output = RmsNorm.eval(stream, &a, axis, &eps)?;

            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?}, Cpu: {:?}, Cuda: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        cpu_output.dump(true),
                        cuda_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_rms() -> TractResult<()> {
        test_case::<f32>(&[2, 2], 1, -0.0, 1.0 / 100.0)?;
        test_case::<f16>(&[2, 7], 0, -0.0, 1.0 / 100.0)?;
        test_case::<f32>(&[2, 124], 1, -0.0, 1.0 / 100.0)?;
        test_case::<f16>(&[1026, 7], 0, -0.0, 1.0 / 100.0)?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn rms_prop_f32(pb in any::<RmsNormProblem<f32>>()) {
            fn run(pb: RmsNormProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn rms_prop_f16(pb in any::<RmsNormProblem<f16>>()) {
            fn run(pb: RmsNormProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct RmsNormProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub axis: usize,
        pub input: Vec<F>,
        pub eps: Arc<Tensor>,
    }

    impl<F> Arbitrary for RmsNormProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..5, 0usize..1)
                .prop_flat_map(|(left, right)| {
                    let axis = left;
                    let shape_len = usize::min(left + right, 4);
                    let iter_ax_dim = 1usize..1024;
                    let other_dim = 1usize..10;
                    (iter_ax_dim, vec(other_dim, shape_len..=shape_len), Just(axis))
                })
                .prop_map(|(iter_dim, mut shape, axis)| {
                    shape.insert(axis, iter_dim);
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, axis, input, eps: Arc::new(tensor0(0.0001f32)) }
                })
                .boxed()
        }
    }

    impl<F> RmsNormProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;

            let cpu_rms = rms_norm::RmsNorm { axis: self.axis, eps: Arc::clone(&self.eps) };

            let cpu_output = cpu_rms.eval(tvec![a.into_tvalue()])?[0].clone().into_tensor();

            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            crate::with_cuda_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let cuda_output = RmsNorm.eval(stream, &a, self.axis, &self.eps)?;
                Ok(cuda_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: cuda/src/kernels/nn/scaled_masked_softmax.rs
================================================
use std::iter::repeat_n;

use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::utils::compute_broadcast_strides;
use crate::kernels::{LibraryName, MAX_THREADS, get_cuda_view, launch_args};
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use num_traits::AsPrimitive;
use tract_core::internal::*;
use tract_core::tract_data::itertools::Itertools;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ScaledMaskedSoftmax;

impl ScaledMaskedSoftmax {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType, block_size: usize) -> TractResult<String> {
        ensure!(
            Self::is_supported_dt(dt),
            "Unsupported dt {:?} for cuda scaled masked softmaxop",
            dt
        );
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("scaled_masked_softmax_{block_size}_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        scale: &Tensor,
        mask: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, scale, mask, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        scale: &Tensor,
        mask: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(output.shape() == input.shape());
        ensure!(input.rank() >= 2 && input.rank() <= 5);
        ensure!(mask.rank() == input.rank());
        ensure!(output.datum_type() == input.datum_type());
        ensure!(mask.datum_type() == input.datum_type());

        let shape = pad(input.shape(), 1);
        let strides = pad(input.strides(), 0);
        let mask_strides = pad(&compute_broadcast_strides::<i32>(mask.shape(), mask.strides())?, 0);
        let output_strides = pad(output.strides(), 0);
        let inner_len = shape[4];

        let i_view = get_cuda_view(input);
        let mask_view = get_cuda_view(mask);
        let o_view = get_cuda_view(output);

        let inner_len = shape[4] as usize;
        let mut nth = 32;
        while nth < inner_len && nth < MAX_THREADS {
            nth *= 2;
        }

        let block_size =
            if inner_len.is_power_of_two() && inner_len > 32 { inner_len.min(1024) } else { 0 };

        let func = cuda_context()
            .load_pipeline(LibraryName::NN, self.kernel_name(input.datum_type(), block_size)?)?;

        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&mask_view);
        launch_args.push::<f32>(scale.cast_to_scalar::<f32>()?);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(&shape);
        launch_args.push_slice_i32(&strides);
        launch_args.push_slice_i32(&mask_strides);
        launch_args.push_slice_i32(&output_strides);

        // input is [b, kh, gh, row, col]
        // grid_dim= (row, gh, b*kh)
        let cfg = LaunchConfig {
            grid_dim: (shape[3] as _, shape[2] as _, (shape[0] * shape[1]) as _),
            block_dim: (nth as _, 1, 1),
            shared_mem_bytes: ((inner_len.next_power_of_two() + 32) * size_of::<f32>()) as u32,
        };

        launch_args.launch(cfg)
    }
}

fn pad(vals: &[impl AsPrimitive<i32>], neutral: i32) -> [i32; 5] {
    let mut it = [neutral; 5];
    for (ix, val) in vals.iter().enumerate() {
        it[ix + 5 - vals.len()] = val.as_();
    }
    it
}

pub fn cuda_scaled_masked_softmax_dispatch(
    input: &DeviceTensor,
    scale: &Tensor,
    mask: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| {
        ScaledMaskedSoftmax.dispatch_eval(stream, input, scale, mask, output)
    })
}

crate::register_cuda_op!(
    tract_transformers::ops::scaled_masked_softmax::ScaledMaskedSoftmax,
    |source, node, op| {
        rule_if!(!op.post_softmax_mask);
        rule_if!(ScaledMaskedSoftmax::is_supported_dt(
            source.node_input_facts(node.id)?[0].datum_type
        ));
        Ok(Some(Box::new(tract_gpu::ops::scaled_masked_softmax::GpuScaledMaskedSoftmax::new(
            op.scale.clone(),
            "Cuda",
            cuda_scaled_masked_softmax_dispatch,
        ))))
    }
);

#[cfg(test)]
mod tests {
    use tract_gpu::tensor::IntoDevice;

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use proptest::strategy::Strategy;
    use tract_core::internal::Tensor;
    use tract_transformers::ops::scaled_masked_softmax;

    #[test]
    fn test_scaled_masked_softmax_f32() -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let m = 6;
            let n = 33;
            let scale: Arc<_> = tensor0(0.125f32).into();
            let mask = Tensor::from_shape(&[1, 1, m, n], &vec![-1000f32; m * n])?.into_device()?;

            let a = Tensor::from_shape(
                &[4, 1, m, n],
                &(0..4 * m * n).map(|f| f as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu = scaled_masked_softmax::ScaledMaskedSoftmax {
                scale: scale.clone(),
                post_softmax_mask: false,
            };

            let cpu_output = cpu
                .eval(tvec![a.to_host()?.into_tvalue(), mask.to_host()?.into_tvalue()])?[0]
                .clone()
                .into_tensor();
            let cuda_output = ScaledMaskedSoftmax.eval(stream, &a, &scale, &mask)?;
            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    proptest::proptest! {
        #[test]
        fn scaled_masked_softmax_prop_f32(pb in any::<ScaledMaskedSoftmaxProblem<f32>>()) {
            fn run(pb: ScaledMaskedSoftmaxProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn scaled_masked_softmax_prop_f16(pb in any::<ScaledMaskedSoftmaxProblem<f16>>()) {
            fn run(pb: ScaledMaskedSoftmaxProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct ScaledMaskedSoftmaxProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub mask_shape: Vec<usize>,
        pub input: Vec<F>,
        pub mask: Vec<F>,
    }

    impl<F> Arbitrary for ScaledMaskedSoftmaxProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            vec(1usize..10, 4..=4)
                .prop_map(|shape| {
                    let mut mask_shape = shape.clone();
                    mask_shape[0] = 1;
                    mask_shape[1] = 1;

                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();

                    let mask = (0..mask_shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, input, mask_shape, mask }
                })
                .boxed()
        }
    }

    impl<F> ScaledMaskedSoftmaxProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;
            let mask = Tensor::from_shape(self.mask_shape.as_slice(), &self.mask)?;
            let scale: Arc<_> = tensor0::<F>(0.125f32.as_()).into();

            let cpu_output =
                scaled_masked_softmax::ScaledMaskedSoftmax { scale, post_softmax_mask: false }
                    .eval(tvec![a.into_tvalue(), mask.into_tvalue()])?[0]
                    .clone()
                    .into_tensor();
            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            crate::with_cuda_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let mask =
                    Tensor::from_shape(self.mask_shape.as_slice(), &self.mask)?.into_device()?;
                let scale: Arc<_> = tensor0::<F>(0.125f32.as_()).into();
                let cuda_output = ScaledMaskedSoftmax.eval(stream, &a, &scale, &mask)?;
                Ok(cuda_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: cuda/src/kernels/nn/softmax.rs
================================================
use crate::context::{TractCudaStream, cuda_context};
use crate::kernels::launch_args::TractLaunchArgs;
use crate::kernels::{LibraryName, MAX_THREADS, get_cuda_view, launch_args, utils};
use cudarc::driver::{CudaStream, LaunchConfig, PushKernelArg};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Softmax;

impl Softmax {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType, n_cols: usize) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for cuda softmaxop", dt);
        let tname = DeviceTensor::tname(dt)?;
        if n_cols < MAX_THREADS {
            Ok(format!("softmax_small_{tname}"))
        } else {
            Ok(format!("softmax_{tname}"))
        }
    }

    pub fn eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        axis: usize,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, axis, &output)?;
        stream.synchronize()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &TractCudaStream,
        input: &DeviceTensor,
        axis: usize,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let shape_nd3 = utils::reshape_to_rank_3(input.shape(), axis);
        let strides_nd3 = Tensor::natural_strides(&shape_nd3);

        let i_view = get_cuda_view(input);
        let o_view = get_cuda_view(output);

        let func = cuda_context()
            .load_pipeline(LibraryName::NN, self.kernel_name(input.datum_type(), shape_nd3[1])?)?;
        let mut launch_args = TractLaunchArgs::new(stream, &func);
        launch_args.push_view(&i_view);
        launch_args.push_view(&o_view);
        launch_args.push_slice_i32(&shape_nd3);
        launch_args.push_slice_i32(&strides_nd3);

        let cfg = LaunchConfig {
            grid_dim: ((shape_nd3[0] * shape_nd3[2]) as _, 1, 1),
            block_dim: if shape_nd3[1] < MAX_THREADS {
                (32, 1, 1)
            } else {
                (MAX_THREADS as _, 1, 1)
            },
            shared_mem_bytes: 0,
        };

        launch_args.launch(cfg)
    }
}

pub fn cuda_softmax_dispatch(
    input: &DeviceTensor,
    axis: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| Softmax.dispatch_eval(stream, input, axis, output))
}

crate::register_cuda_op!(tract_core::ops::nn::Softmax, |source, node, op| {
    rule_if!(Softmax::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::softmax::GpuSoftmax::from_tract_core(
        op,
        "Cuda",
        cuda_softmax_dispatch,
    )?)))
});

#[cfg(test)]
mod tests {

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_core::ops::nn::Softmax as TractSoftmax;
    use tract_core::ops::nn::{SoftmaxExp, SoftmaxKind};
    use tract_gpu::tensor::IntoDevice;

    #[test]
    fn test_softmax_f32() -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let m = 2;
            let k = 3;
            let axis = 1;

            let a = Tensor::from_shape(&[m, k], &(0..m * k).map(|f| f as f32).collect::<Vec<_>>())?
                .into_device()?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };

            let cpu_output =
                cpu_softmax.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let cuda_output = Softmax.eval(stream, &a, axis)?;

            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_softmax_f32_2() -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let shape = [8, 4, 3];
            let num_elements = shape.iter().product();
            let axis = 0;

            let a = Tensor::from_shape(
                &shape,
                &(0..num_elements).map(|f| f as f32 / 1000.0).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };

            let cpu_output =
                cpu_softmax.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let cuda_output = Softmax.eval(stream, &a, axis)?;
            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_softmax_f16() -> TractResult<()> {
        crate::with_cuda_stream(|stream| {
            let m = 4;
            let k = 4;
            let axis = 1;

            let a = Tensor::from_shape(
                &[m, k],
                &(0..m * k).map(|f| -> f16 { f.as_() }).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };

            let cpu_output =
                cpu_softmax.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let cuda_output = Softmax.eval(stream, &a, axis)?;
            cpu_output
                .close_enough(&cuda_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    proptest::proptest! {
        #[test]
        fn softmax_prop_f32(pb in any::<SoftmaxProblem<f32>>()) {
            fn run(pb: SoftmaxProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn softmax_prop_f16(pb in any::<SoftmaxProblem<f16>>()) {
            fn run(pb: SoftmaxProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Cuda: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct SoftmaxProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub axis: usize,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for SoftmaxProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..3, 0usize..3)
                .prop_flat_map(|(left, right)| {
                    let axis = left;
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    (vec(shape, shape_len..=shape_len), Just(axis))
                })
                .prop_map(|(shape, axis)| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, axis, input }
                })
                .boxed()
        }
    }

    impl<F> SoftmaxProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![self.axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };
            let cpu_output = cpu_softmax.eval(tvec![a.into_tvalue()])?[0].clone().into_tensor();
            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            crate::with_cuda_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let cuda_output = Softmax.eval(stream, &a, self.axis)?;
                Ok(cuda_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: cuda/src/kernels/utils.rs
================================================
use cudarc::driver::LaunchConfig;

use crate::kernels::MAX_THREADS;

pub use tract_gpu::utils::{compute_broadcast_strides, reshape_to_rank_2, reshape_to_rank_3};

pub fn cuda_launch_cfg_for_cpy(shape: &[usize]) -> LaunchConfig {
    // Grid layout: z=dim0, y=dim1, x=product(middle dims), threads=innermost
    // nd1: x=1, threads=d0
    // nd2: x=d0, threads=d1
    // nd3: x=d0*d1, threads=d2
    // nd4: z=d0, x=d1*d2, threads=d3
    // nd5: z=d0, y=d1, x=d2*d3, threads=d4
    // nd6: z=d0, y=d1, x=d2*d3*d4, threads=d5
    let rank = shape.len();
    let grid_dim = match rank {
        0 => panic!("Unexpected empty shape while build grid size"),
        1 => (1, 1, 1),
        2 => (shape[0] as _, 1, 1),
        3 => (shape[1] as _, shape[0] as _, 1),
        4 => (shape[2] as _, shape[1] as _, shape[0] as _),
        5 => (shape[2] as u32 * shape[3] as u32, shape[1] as _, shape[0] as _),
        6 => (shape[2] as u32 * shape[3] as u32 * shape[4] as u32, shape[1] as _, shape[0] as _),
        _ => panic!("Unsupported rank {rank} for cuda copy launch config"),
    };
    LaunchConfig {
        grid_dim,
        block_dim: (shape[rank - 1].min(MAX_THREADS) as _, 1, 1),
        shared_mem_bytes: 0,
    }
}


================================================
FILE: cuda/src/lib.rs
================================================
mod context;
pub mod kernels;
pub mod ops;
mod rewrite_rules;
mod tensor;
mod transform;
pub mod utils;

pub use context::with_cuda_stream;
use tract_core::internal::*;
use tract_core::transform::ModelTransform;
pub use transform::CudaTransform;

use crate::utils::ensure_cuda_runtime_dependencies;
const Q40_ROW_PADDING: usize = 512;

#[derive(Debug)]
struct CudaRuntime;

impl Runtime for CudaRuntime {
    fn name(&self) -> StaticName {
        "cuda".into()
    }

    fn prepare_with_options(
        &self,
        mut model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        ensure_cuda_runtime_dependencies("cuda runtime supported dependencies not found.")?;
        context::cuda_context();
        CudaTransform.transform(&mut model)?;
        model.optimize()?;

        let options = RunOptions { skip_order_opt_ram: true, ..options.clone() };

        let mut runnable = TypedSimplePlan::build(model, &options)?;
        if let Some(hints) = options.memory_sizing_hints {
            let session_handler =
                tract_gpu::session_handler::DeviceSessionHandler::from_plan(&runnable, &hints)
                    .context("While sizing memory arena. Missing hint ?")?;
            runnable = runnable.with_session_handler(session_handler);
        }

        Ok(Box::new(Arc::new(runnable)))
    }

    fn check(&self) -> TractResult<()> {
        ensure_cuda_runtime_dependencies("cuda runtime supported dependencies not found.")
    }
}

register_runtime!(CudaRuntime = CudaRuntime);


================================================
FILE: cuda/src/ops/conv.rs
================================================
use crate::kernels::conv::{ConvGeneric, ConvKernel, ConvKernelScratch};
use crate::kernels::conv_cudnn::ConvCudnn;
use tract_core::internal::*;
use tract_core::ops::OpStateFreeze;
use tract_core::ops::cnn::Conv;
use tract_gpu::ops::change_axes::GpuAxisOp;
use tract_gpu::tensor::DeviceTensorExt;

pub fn wire_cuda_conv(
    source: &TypedModel,
    node: &TypedNode,
    target: &mut TypedModel,
    inputs: &[OutletId],
    op: &Conv,
) -> TractResult<TVec<OutletId>> {
    let facts = source.node_input_facts(node.id)?;
    let data_shape = op.pool_spec.data_format.shape(&facts[0].shape)?;
    let hw_rank = data_shape.hw_rank();
    let is_f16 = facts[0].datum_type.is::<f16>();
    if facts.iter().all(|f| f.datum_type.is::<f32>() || f.datum_type.is::<f16>())
        && hw_rank <= if is_f16 { 2 } else { 6 }
        && op
            .pool_spec
            .computed_padding(data_shape.hw_dims())
            .iter()
            .all(|paddings| paddings.pad_before == paddings.pad_after)
    {
        let prefix = &node.name;
        let bias = &facts[2];
        let need_bias = !(bias.konst.is_some() && bias.konst.as_ref().unwrap().is_all_zero()?);
        let conv_name = format!("{prefix}.conv");
        let mut conv_wire = target.wire_node(
            if need_bias { &conv_name } else { &node.name },
            CudaConv { op: op.clone(), kernel: Box::new(ConvCudnn) },
            &inputs[0..2],
        )?[0];
        if need_bias {
            let mut needed_shape = tvec![1.to_dim(); node.outputs[0].fact.rank()];
            needed_shape[data_shape.c_axis()] = op.pool_spec.output_channels.to_dim();
            let reshaped = target.wire_node(
                format!("{prefix}.bias_reshaped"),
                GpuAxisOp::new(AxisOp::Reshape(0, bias.shape.to_tvec(), needed_shape)),
                &[inputs[2]],
            )?[0];
            conv_wire = target.wire_node(
                prefix,
                crate::kernels::binary::cuda_bin_op(Box::new(tract_core::ops::math::Add)),
                &[conv_wire, reshaped],
            )?[0];
        }
        Ok(tvec!(conv_wire))
    } else {
        target.wire_node(
            &node.name,
            CudaConv { op: op.clone(), kernel: Box::new(ConvGeneric) },
            inputs,
        )
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CudaConv {
    op: Conv,
    kernel: Box<dyn ConvKernel>,
}

impl Op for CudaConv {
    fn name(&self) -> StaticName {
        "CudaConv".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut info = self.op.info()?;
        info.push(format!("kernel: {}", self.kernel.name()));
        Ok(info)
    }

    op_as_typed_op!();
}

impl EvalOp for CudaConv {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(CudaConvState(node_id, None))))
    }
}

impl TypedOp for CudaConv {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |facts| {
            let zero = facts[0].datum_type.scalar_fact();
            let mut facts: TVec<&TypedFact> = facts.into();
            if facts.len() == 2 {
                facts.push(&zero);
            }
            self.op.output_facts(&facts)
        })
        .with_context(|| format!("Error while computing facts for Conv/{:?}", self.kernel.name()))
    }
}

#[derive(Debug)]
struct CudaConvState(usize, Option<Box<dyn ConvKernelScratch>>);

impl Clone for CudaConvState {
    fn clone(&self) -> Self {
        CudaConvState(self.0, None)
    }
}

impl OpState for CudaConvState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let op: &CudaConv = op.downcast_ref().context("Wrong op")?;
        let inputs =
            inputs.iter().map(|it| it.to_device_tensor()).collect::<TractResult<TVec<_>>>()?;
        let output_shape = op.op.pool_spec.output_shape(inputs[0].shape())?;
        let output = tract_gpu::session_handler::make_tensor_for_node(
            session,
            self.0,
            inputs[0].datum_type(),
            &output_shape.shape,
        )?;

        if self.1.is_none() {
            self.1 = Some(op.kernel.state());
        }

        if output.len() > 0 {
            crate::with_cuda_stream(|stream| {
                op.kernel.dispatch(
                    &mut **self.1.as_mut().unwrap(),
                    self.0,
                    &op.op,
                    stream,
                    inputs[0],
                    inputs[1],
                    inputs.get(2).cloned(),
                    &output,
                )
            })?;
        }
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

#[derive(Debug, Clone)]
struct FrozenCudaConvState(usize);

impl OpStateFreeze for CudaConvState {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(FrozenCudaConvState(self.0))
    }
}

impl FrozenOpState for FrozenCudaConvState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(CudaConvState(self.0, None))
    }
}


================================================
FILE: cuda/src/ops/flash_attn.rs
================================================
use crate::kernels::flash_attn::CudaFlashAttn;
use derive_new::new;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensorExt;

#[derive(Clone, Debug, new, PartialEq)]
pub struct CudaFlashAttention {
    scale: f32,
    is_causal: bool,
}
impl Eq for CudaFlashAttention {}

impl Op for CudaFlashAttention {
    fn name(&self) -> StaticName {
        "CudaFlashAttention".into()
    }

    op_as_typed_op!();
}

impl EvalOp for CudaFlashAttention {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        crate::with_cuda_stream(|stream| {
            ensure!(inputs.len() >= 3, "flash-attn expects [q, k, v, (mask)]");

            let q = inputs[0].to_device_tensor()?;
            let k = inputs[1].to_device_tensor()?;
            let v = inputs[2].to_device_tensor()?;
            let mask = inputs.get(3).map(|m| m.to_device_tensor()).transpose()?;
            let output = tract_gpu::session_handler::make_tensor_for_node(
                session,
                node_id,
                q.datum_type(),
                &CudaFlashAttn.output_shape(q.shape(), k.shape(), v.shape())?,
            )?;
            CudaFlashAttn.dispatch_eval(
                stream,
                q,
                k,
                v,
                mask,
                self.scale,
                &output,
                self.is_causal,
            )?;
            Ok(tvec!(output.into_tensor().into_tvalue()))
        })
    }
}

impl TypedOp for CudaFlashAttention {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |facts| {
            ensure!(facts.len() >= 3);
            let dt = facts[0].datum_type;

            ensure!(facts.iter().all(|f| f.rank() == 4));
            let shape =
                CudaFlashAttn.output_shape(&facts[0].shape, &facts[1].shape, &facts[2].shape)?;
            let fact = dt.fact(shape);
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: cuda/src/ops/fused_axis_op.rs
================================================
use derive_new::new;
use tract_core::internal::tract_smallvec::ToSmallVec;
use tract_core::internal::*;
use tract_core::ops::OpStateFreeze;
use tract_gpu::ops::change_axes::GpuAxisOp;
use tract_gpu::tensor::{DeviceTensor, DeviceTensorExt};

#[derive(Clone, Debug, new, PartialEq, Eq)]
pub struct CudaFusedAxisOp {
    /// List of axis ops to apply for each op inputs
    /// Length of the list is equal to number of inputs
    pub grouped_axis_ops: TVec<TVec<GpuAxisOp>>,
    pub op: Box<dyn TypedOp>,
}

#[derive(Debug, Clone, new)]
pub struct CudaFusedAxisOpState {
    pub op_state: Box<dyn OpState>,
}

fn compute_reshaped_inputs(
    inputs: TVec<TValue>,
    grouped_axis_ops: &TVec<TVec<GpuAxisOp>>,
    session: &TurnState,
) -> TractResult<TVec<TValue>> {
    // Apply Axis Ops per input

    inputs
        .into_iter()
        .zip(grouped_axis_ops.iter())
        .map(|(input, axis_ops)| {
            if axis_ops.is_empty() {
                return Ok(input);
            };
            let m_input = input.to_device_tensor()?;
            let reshaped_input = axis_ops.iter().try_fold(
                m_input.clone(),
                |t, axis_op| -> TractResult<DeviceTensor> {
                    let new_shape = match &axis_op.inner {
                        AxisOp::Reshape(skip, from, to) => {
                            let from =
                                from.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                            let to = to.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                            let mut shape: TVec<usize> = t.shape().into();
                            AxisOp::Reshape(*skip, from, to)
                                .change_shape_array(&mut shape, false)?;
                            shape
                        }
                        AxisOp::Add(_) | AxisOp::Rm(_) | AxisOp::Move(..) => {
                            let mut shape: TVec<usize> = t.shape().into();
                            axis_op.inner.change_shape_array(&mut shape, false)?;
                            shape
                        }
                    };
                    if let AxisOp::Move(from, to) = axis_op.inner {
                        let mut out_strides: TVec<isize> = t.strides().to_smallvec();
                        let removed_stride = out_strides.remove(from);
                        out_strides.insert(to, removed_stride);
                        let tmp_t = t.reshaped(new_shape)?;
                        tmp_t.restrided(out_strides)
                    } else {
                        t.reshaped(new_shape)
                    }
                },
            )?;

            Ok(reshaped_input.into_tensor().into())
        })
        .collect::<TractResult<TVec<_>>>()
}

impl OpState for CudaFusedAxisOpState {
    fn init_tensor_fact(&self) -> Option<(String, TypedFact)> {
        self.op_state.init_tensor_fact()
    }

    fn load_from(
        &mut self,
        session: &mut TurnState,
        states: &mut dyn Iterator<Item = tract_core::value::TValue>,
    ) -> TractResult<()> {
        self.op_state.load_from(session, states)
    }

    fn save_to(&self, states: &mut Vec<TValue>) -> TractResult<()> {
        self.op_state.save_to(states)
    }

    fn resolve_symbols(&mut self, session: &mut TurnState) -> TractResult<()> {
        self.op_state.resolve_symbols(session)
    }

    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let fused_axis_op = op.downcast_ref::<CudaFusedAxisOp>().unwrap();
        let inputs = compute_reshaped_inputs(inputs, &fused_axis_op.grouped_axis_ops, session)?;
        // Runner inner op
        self.op_state.eval(session, fused_axis_op.op.as_op(), inputs)
    }
}

#[derive(Debug, Clone)]
pub struct FrozenCudaFusedAxisOpState {
    pub op_state: Box<dyn FrozenOpState>,
}

impl OpStateFreeze for CudaFusedAxisOpState {
    fn freeze(&self) -> Box<dyn FrozenOpState + 'static> {
        Box::new(FrozenCudaFusedAxisOpState { op_state: self.op_state.freeze() })
    }
}

impl FrozenOpState for FrozenCudaFusedAxisOpState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(CudaFusedAxisOpState { op_state: self.op_state.unfreeze() })
    }
}

impl Op for CudaFusedAxisOp {
    fn name(&self) -> StaticName {
        self.op.name()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut info = self.op.info()?;
        for (idx, axis_ops) in self.grouped_axis_ops.iter().enumerate() {
            if !axis_ops.is_empty() {
                info.push(format!(
                    "Fused axis Op on Input #{idx}: {}",
                    axis_ops
                        .iter()
                        .map(|axis_op| Ok(format!(
                            "{} - {}",
                            axis_op.name(),
                            axis_op.info()?.join(" | ")
                        )))
                        .collect::<TractResult<TVec<_>>>()?
                        .join(" | ")
                ));
            }
        }
        Ok(info)
    }

    op_as_typed_op!();
}

impl EvalOp for CudaFusedAxisOp {
    fn is_stateless(&self) -> bool {
        self.op.is_stateless()
    }

    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        if let Some(state) = self.op.state(session, node_id)? {
            Ok(Some(Box::new(CudaFusedAxisOpState { op_state: state })))
        } else {
            Ok(None)
        }
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let inputs = compute_reshaped_inputs(inputs, &self.grouped_axis_ops, session)?;
        // Runner inner op
        self.op.eval_with_session(node_id, session, inputs)
    }
}

impl TypedOp for CudaFusedAxisOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(
            inputs.len() == self.grouped_axis_ops.len(),
            "Number of inputs and fused axis ops are not aligned"
        );
        // Apply AxisOp
        let inputs = inputs
            .iter()
            .zip(self.grouped_axis_ops.iter())
            .map(|(i, axis_ops)| {
                axis_ops.iter().try_fold((*i).clone(), |reshaped_i, axis_op| {
                    Ok(axis_op.output_facts(&[&reshaped_i])?[0].clone())
                })
            })
            .collect::<TractResult<TVec<_>>>()?;

        let inputs_ref = inputs.iter().collect::<TVec<_>>();
        // Apply Op
        self.op.output_facts(&inputs_ref)
    }

    as_op!();
}


================================================
FILE: cuda/src/ops/gemm.rs
================================================
use crate::kernels::matmul::GgmlGemm;
use crate::ops::GgmlQuantQ81Fact;
use crate::utils::get_ggml_q81_fact;

use anyhow::{bail, ensure};
use derive_new::new;
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::Q4_0;
use tract_gpu::tensor::DeviceTensorExt;
use tract_gpu::utils::as_quant_fact;

#[derive(Debug, new, Default, Clone, PartialEq, Eq)]
pub struct CudaGgmlGemm;

impl Op for CudaGgmlGemm {
    fn name(&self) -> StaticName {
        "CudaGgmlGemm".into()
    }

    op_as_typed_op!();
}

impl CudaGgmlGemm {
    fn resolve_output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let [a, b] = inputs else {
            bail!("Expects 2 inputs");
        };

        if a.datum_type.is_number() && b.datum_type.is_number() && a.is_plain() && b.is_plain() {
            ensure!(a.rank() == b.rank());
            ensure!(a.rank() >= 2);
            ensure!(a.shape[a.rank() - 1] == b.shape[b.rank() - 1]);
            let out_shape = GgmlGemm.output_shape(&a.shape, &b.shape);
            Ok(tvec![a.datum_type().unwrap().fact(out_shape)])
        } else if as_quant_fact(inputs[1], &Q4_0).is_some() {
            let Some(b_ggml_qf) =
                inputs[0].exotic_fact.as_ref().and_then(|of| of.downcast_ref::<GgmlQuantQ81Fact>())
            else {
                bail!("Expected GGML Q81 activations for Q40 MM")
            };
            let a_shape: ShapeFact =
                a.shape.iter().cloned().chain(b_ggml_qf.in_shape().to_owned()).collect();
            // b.shape already carries the full logical dimensions after the
            // exotic-storage refactoring — no need to chain with BlockQuantFact.shape().
            let out_shape = GgmlGemm.output_shape(&a_shape, &b.shape);
            Ok(tvec![DatumType::F32.fact(out_shape)])
        } else {
            bail!("Unsupported datum type configuration for GEMM")
        }
    }
}
impl EvalOp for CudaGgmlGemm {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (act_raw, weights_raw) = args_2!(inputs);
        let activs = act_raw
            .to_device_tensor()
            .with_context(|| format!("A tensor is not a cuda tensor: {act_raw:?}"))?;
        let weights = weights_raw
            .to_device_tensor()
            .with_context(|| format!("B tensor is not a cuda tensor {weights_raw:?}"))?;

        let (activ_shape, weights_shape) =
            crate::kernels::matmul::get_concrete_shapes(activs, weights)?;

        let out_shape = GgmlGemm.output_shape(&activ_shape, &weights_shape);
        let out_dt =
            if get_ggml_q81_fact(activs).is_some() { DatumType::F32 } else { activs.datum_type() };
        let out =
            tract_gpu::session_handler::make_tensor_for_node(session, node_id, out_dt, &out_shape)?;

        crate::with_cuda_stream(|stream| GgmlGemm.dispatch_eval(stream, activs, weights, &out))?;

        Ok(tvec![out.into_tensor().into_tvalue()])
    }
}

impl TypedOp for CudaGgmlGemm {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |input_facts| {
            self.resolve_output_facts(input_facts)
        })
        .with_context(|| format!("Error while computing output facts for {}", self.name()))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        tract_gpu::utils::get_device_facts(inputs, |input_facts| {
            let fma = self.resolve_output_facts(input_facts)?[0].shape.iter().product::<TDim>()
                * input_facts[0].shape.last().unwrap();
            if input_facts[0].datum_type == f16::datum_type() {
                Ok(tvec!((Cost::FMA(f16::datum_type()), fma)))
            } else {
                Ok(tvec!((Cost::FMA(f32::datum_type()), fma)))
            }
        })
        .with_context(|| format!("Error while computing cost for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: cuda/src/ops/ggml_flash_attn.rs
================================================
use crate::kernels::ggml_flash_attn::GgmlFlashAttn;
use derive_new::new;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensorExt;

#[derive(Clone, Debug, new)]
pub struct CudaFlashAttention {
    scale: f32,
    _is_causal: bool,
}

impl Op for CudaFlashAttention {
    fn name(&self) -> StaticName {
        "CudaFlashAttention".into()
    }

    op_as_typed_op!();
}

impl EvalOp for CudaFlashAttention {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &SessionState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        crate::with_cuda_stream(|stream| {
            ensure!(inputs.len() == 4, "flash-attn expects [q, k, v, mask]");

            let q = inputs[0].to_device_tensor()?;
            let k = inputs[1].to_device_tensor()?;
            let v = inputs[2].to_device_tensor()?;
            let mask = inputs[3].to_device_tensor()?;

            let output = tract_gpu::session_handler::make_tensor_for_node(
                session,
                node_id,
                q.datum_type(),
                &GgmlFlashAttn.output_shape(q.shape(), k.shape(), v.shape())?,
            )?;
            GgmlFlashAttn.dispatch_eval(stream, q, k, v, mask, self.scale, &output)?;
            Ok(tvec!(output.into_tensor().into_tvalue()))
        })
    }
}

impl TypedOp for CudaFlashAttention {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |facts| {
            ensure!(facts.len() == 4);
            let dt = facts[0].datum_type;

            ensure!(facts.iter().all(|f| f.rank() == 4));
            let shape =
                GgmlFlashAttn.output_shape(&facts[0].shape, &facts[1].shape, &facts[2].shape)?;
            let fact = dt.fact(shape);
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}

// Transform code copied here to keep transform.rs clean

//.with_rule_for("causal_mask_as_extern", causal_mask_as_extern)
//.with_rule_for("full_attn_mask_as_neutral", neutral_mask_for_full_attn)

//fn convert_sdpa_to_cuda_flash_attn(
//    model: &TypedModel,
//    node: &TypedNode,
//    target: &mut TypedModel,
//    inputs: &mut [OutletId],
//    op: &Sdpa,
//) -> TractResult<TVec<OutletId>> {
//    // ---- Facts & quick guards -------------------------------------------------
//    let facts = model.node_input_facts(node.id)?;
//    ensure!(!op.is_causal && facts.len() == 4, "FlashAttn requires non-causal SDPA with 4 inputs");
//
//    let [qf, kf, vf, mf] = [facts[0], facts[1], facts[2], facts[3]];
//    ensure!(kf.datum_type() == vf.datum_type(), "K/V dtypes must match");
//
//    // split inputs as (q, k, v, m)
//    let [q, k, v, m, ..] = &mut inputs[..] else {
//        bail!("need at least 4 inputs");
//    };
//
//    // ---- Small helpers --------------------------------------------------------
//    fn name(base: &str, suffix: &str) -> String {
//        format!("{base}{suffix}")
//    }
//
//    fn mut_cast(
//        target: &mut TypedModel,
//        node_name: &str,
//        dst: &mut OutletId,
//        have: DatumType,
//        want: DatumType,
//        suffix: &str,
//    ) -> TractResult<()> {
//        if have != want {
//            *dst = target.wire_node(
//                name(node_name, suffix),
//                ops::CudaCast::new(want).unwrap(),
//                &[*dst],
//            )?[0];
//        }
//        Ok(())
//    }
//
//    fn add_head_axis_if_rank3(
//        target: &mut TypedModel,
//        node_name: &str,
//        dst: &mut OutletId,
//        fact: &TypedFact,
//        suffix: &str,
//    ) -> TractResult<bool> {
//        if fact.rank() == 3 {
//            let ax = ops::CudaAxisOp::from_tract_core(AxisOp::Add(1));
//            *dst = target.wire_node(name(node_name, suffix), ax, &[*dst])?[0];
//            Ok(true)
//        } else {
//            ensure!(fact.rank() == 4, "Q/K/V must be rank 3 or 4");
//            Ok(false)
//        }
//    }
//
//    fn pad_last_two_dims(
//        target: &mut TypedModel,
//        node_name: &str,
//        dst: &mut OutletId,
//        pad_s: TDim,
//        pad_sp: TDim,
//        fill: Tensor,
//        suffix: &str,
//    ) -> TractResult<()> {
//        let mut pads = vec![(TDim::Val(0), TDim::Val(0)); 4];
//        pads[2].1 = pad_s;
//        pads[3].1 = pad_sp;
//        *dst = target.wire_node(
//            name(node_name, suffix),
//            ops::CudaPad::new(pads, PadMode::Constant(fill.into()))?,
//            &[*dst],
//        )?[0];
//        Ok(())
//    }
//
//    // ----- casts
//    let q_dt = qf.datum_type().unwrap();
//    let kv_dt = kf.datum_type().unwrap();
//    mut_cast(target, &node.name, k, kv_dt, DatumType::F16, ".cast_k")?;
//    mut_cast(target, &node.name, v, kv_dt, DatumType::F16, ".cast_v")?;
//    mut_cast(target, &node.name, q, q_dt, DatumType::F32, ".cast_q")?;
//
//    // ----- rank normalize (sequential to avoid overlapping borrows)
//    let mut added_head_axis = false;
//    added_head_axis |= add_head_axis_if_rank3(target, &node.name, q, qf, ".reshape_q")?;
//    added_head_axis |= add_head_axis_if_rank3(target, &node.name, k, kf, ".reshape_k")?;
//    added_head_axis |= add_head_axis_if_rank3(target, &node.name, v, vf, ".reshape_v")?;
//
//    let out_dim = kf.shape[kf.rank() - 1].to_i64()?;
//    ensure!(
//        matches!(out_dim, 64 | 80 | 96 | 112 | 128 | 256),
//        "Unsupported head dim (D): {out_dim}"
//    );
//    ensure!(kf.shape == vf.shape, "K and V shapes must be identical");
//
//    // ----- pad K/V seq to multiple of 256
//    let s_plus_p = kf.shape.dims()[qf.rank() - 2].clone();
//    let s_plus_p_to_256 = ((s_plus_p.clone() + 255) / 256) * 256 - s_plus_p;
//
//    let zero_f16: Arc<Tensor> = tensor0(f16::from_f32(0.0)).into();
//    // Only pad dim=2 (S+P) for K/V
//    let mut pads_kv = vec![(TDim::Val(0), TDim::Val(0)); 4];
//    pads_kv[2].1 = s_plus_p_to_256.clone();
//    *k = target.wire_node(
//        name(&node.name, ".pad_k"),
//        ops::CudaPad::new(pads_kv.clone(), PadMode::Constant(zero_f16.clone()))?,
//        &[*k],
//    )?[0];
//    *v = target.wire_node(
//        name(&node.name, ".pad_v"),
//        ops::CudaPad::new(pads_kv, PadMode::Constant(zero_f16))?,
//        &[*v],
//    )?[0];
//
//    // ----- mask: cast→reshape→pad
//    mut_cast(target, &node.name, m, mf.datum_type().unwrap(), DatumType::F16, ".cast_m")?;
//    if mf.rank() != 4 {
//        let add = 4 - mf.rank();
//        let ax =
//            ops::CudaAxisOp::from_tract_core(AxisOp::Reshape(0, tvec![], tvec![TDim::Val(1); add]));
//        *m = target.wire_node(name(&node.name, ".reshape_m"), ax, &[*m])?[0];
//    }
//    let s = qf.shape.dims()[qf.rank() - 2].clone();
//    let pad_s_to_16 = ((s.clone() + 15) / 16) * 16 - s;
//    let neg_inf_f16 = tensor0(-f16::infinity());
//    pad_last_two_dims(target, &node.name, m, pad_s_to_16, s_plus_p_to_256, neg_inf_f16, ".pad_m")?;
//
//    // ----- scale & op
//    let scale = op
//        .scale
//        .as_ref()
//        .map(|s| *s.to_scalar::<f32>().unwrap())
//        .unwrap_or(1.0 / (out_dim as f32).sqrt());
//    let sdpa = ops::CudaFlashAttention::new(scale, false);
//
//    let mut out = target.wire_node(node.name.clone(), sdpa, inputs)?;
//
//    if added_head_axis {
//        out = target.wire_node(
//            name(&node.name, ".reshape_out"),
//            ops::CudaAxisOp::from_tract_core(AxisOp::Rm(1)),
//            &out,
//        )?;
//    }
//    if q_dt != DatumType::F32 {
//        out = target.wire_node(
//            name(&node.name, ".cast_out"),
//            ops::CudaCast::new(q_dt).unwrap(),
//            &out,
//        )?;
//    }
//
//    Ok(out)
//}


================================================
FILE: cuda/src/ops/iff.rs
================================================
use crate::kernels::Iff;
use tract_core::broadcast::multi_broadcast;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensorExt;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CudaIff;

impl Op for CudaIff {
    fn name(&self) -> StaticName {
        "CudaIff".into()
    }

    op_as_typed_op!();
}

impl EvalOp for CudaIff {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (cond_val, then_val, else_val) = args_3!(inputs);

        let cond = cond_val.to_device_tensor()?;
        let then_t = then_val.to_device_tensor()?;
        let else_t = else_val.to_device_tensor()?;
        ensure!(cond.rank() == then_t.rank());
        ensure!(cond.rank() == else_t.rank());
        ensure!(then_t.datum_type() == else_t.datum_type());

        let out_shape = multi_broadcast(&[cond.shape(), then_t.shape(), else_t.shape()])
            .context("No broadcasting solution found")?;
        let out_dt = then_t.datum_type();
        let output =
            tract_gpu::session_handler::make_tensor_for_node(session, node_id, out_dt, &out_shape)?;

        if output.len() > 0 {
            crate::with_cuda_stream(|stream| {
                Iff.dispatch_eval(stream, cond, then_t, else_t, &output)
                    .with_context(|| "Error while dispatching eval for CudaIff")
            })?;
        }
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for CudaIff {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |inputs| {
            let out_shape =
                multi_broadcast(&[&*inputs[0].shape, &*inputs[1].shape, &*inputs[2].shape])
                    .context("No broadcasting solution found")?;
            let out_dt = inputs[1].datum_type;
            Ok(tvec!(out_dt.fact(out_shape)))
        })
    }

    as_op!();
}


================================================
FILE: cuda/src/ops/mod.rs
================================================
mod conv;
mod flash_attn;
mod fused_axis_op;
mod gemm;
mod iff;
mod quant_q81;
pub use conv::{CudaConv, wire_cuda_conv};
pub use flash_attn::CudaFlashAttention;
pub use fused_axis_op::CudaFusedAxisOp;
pub use gemm::CudaGgmlGemm;
pub use iff::CudaIff;
pub use quant_q81::{CudaGgmlQuantQ81, GgmlQuantQ81Fact};


================================================
FILE: cuda/src/ops/quant_q81.rs
================================================
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::{BlockQuant, Q8_1};
use tract_gpu::session_handler::make_scalar_exotic_tensor_for_node;
use tract_gpu::tensor::DeviceTensorExt;

use crate::kernels::matmul::quant_act_q81::GgmlQuantQ81;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GgmlQuantQ81Fact {
    pub in_fact: ShapeFact,
    pub out_fact: ShapeFact,
}

impl GgmlQuantQ81Fact {
    pub fn in_shape(&self) -> &[TDim] {
        self.in_fact.dims()
    }

    pub fn out_shape(&self) -> &[TDim] {
        self.out_fact.dims()
    }

    pub fn concrete_in_shape(&self) -> TractResult<&[usize]> {
        self.in_fact.as_concrete().context("Expected concrete shape")
    }

    pub fn concrete_out_shape(&self) -> TractResult<&[usize]> {
        self.out_fact.as_concrete().context("Expected concrete shape")
    }

    pub fn eval(&self, values: &SymbolValues) -> TractResult<Self> {
        Ok(Self {
            in_fact: self.in_fact.eval(values)?.into_owned(),
            out_fact: self.out_fact.eval(values)?.into_owned(),
        })
    }
}

impl ExoticFact for GgmlQuantQ81Fact {
    fn buffer_sizes(&self) -> TVec<TDim> {
        tvec!(self.out_fact.iter().product::<TDim>() * Q8_1.block_bytes() / Q8_1.block_len())
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct CudaGgmlQuantQ81 {
    io_facts: GgmlQuantQ81Fact,
}

impl CudaGgmlQuantQ81 {
    pub fn new(in_fact: ShapeFact) -> TractResult<Self> {
        let out_fact = GgmlQuantQ81::output_shape_fact(&in_fact)?;
        let io_facts = GgmlQuantQ81Fact { in_fact, out_fact };
        Ok(Self { io_facts })
    }
}
impl Op for CudaGgmlQuantQ81 {
    fn name(&self) -> StaticName {
        "CudaGgmlQuantQ81Op".into()
    }

    op_as_typed_op!();
}

impl EvalOp for CudaGgmlQuantQ81 {
    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        crate::with_cuda_stream(|stream| {
            let input_value = args_1!(inputs);
            let input = input_value.to_device_tensor()?;

            let resolved_io_facts = self.io_facts.eval(&session.resolved_symbols)?;

            let output = make_scalar_exotic_tensor_for_node(
                session,
                node_id,
                input.datum_type(),
                Box::new(resolved_io_facts),
            )?;

            GgmlQuantQ81.dispatch_eval(stream, input, &output)?;

            Ok(tvec!(output.into_tensor().into_tvalue()))
        })
    }

    fn is_stateless(&self) -> bool {
        true
    }
}

impl TypedOp for CudaGgmlQuantQ81 {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 1);
        tract_gpu::utils::facts_to_device_facts(inputs, |input_facts| {
            let dt = input_facts[0].datum_type;
            let fact = TypedFact::dt_scalar(dt).with_exotic_fact(self.io_facts.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let op = Self::new(self.io_facts.in_fact.eval(values)?.into_owned())?;
        target.wire_node(&node.name, op, &[mapping[&node.inputs[0]]])
    }
    as_op!();
}


================================================
FILE: cuda/src/rewrite_rules/add_matmul_broadcast.rs
================================================
use tract_core::internal::*;
use tract_core::ops::array::MultiBroadcastTo;
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_gpu::rule_ensure;

pub fn add_broadcast_pre_matmul(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    let in_facts = model.node_input_facts(node.id)?;
    // GGML supports broadcast
    rule_ensure!(in_facts[1].rank() > 2);
    let b_rank = in_facts[1].rank();
    rule_ensure!(
        !(in_facts[1].shape[b_rank - 2 + !op.transpose_a as usize]
            .as_i64()
            .is_some_and(|k| k % 2 == 0)
            && in_facts[1].shape[b_rank - 2 + op.transpose_b as usize]
                .as_i64()
                .is_some_and(|m| m == 1))
    );

    // Detect broadcast
    let a_shape = &in_facts[0].shape;
    let b_shape = &in_facts[1].shape;
    let a_rank = a_shape.rank();

    let a_batch = &a_shape[..a_rank - 2];
    let b_batch = &b_shape[..a_rank - 2];

    // Remove from batch_dim array all symbolic dimensions also present in the other batch_dim array
    // Symbolic Dimensions will be considered as 1 in gcd() so this allows identifying a
    // symbolic broadcast factor.
    let a_batch_dims: Vec<_> = a_batch
        .iter()
        .filter(|tdim| !matches!(tdim, TDim::Sym(_)) || b_batch.contains(tdim))
        .cloned()
        .collect();

    let b_batch_dims: Vec<_> = b_batch
        .iter()
        .filter(|tdim| !matches!(tdim, TDim::Sym(_)) || a_batch.contains(tdim))
        .cloned()
        .collect();

    let symb_in_a = a_batch_dims != a_batch;
    let symb_in_b = b_batch_dims != b_batch;

    let a_batch_size = a_batch_dims.iter().product::<TDim>().gcd();
    let b_batch_size = b_batch_dims.iter().product::<TDim>().gcd();

    let (activ_slot, weight_slot) = if (a_batch_size % b_batch_size == 0)
        && ((a_batch_size != b_batch_size) || symb_in_a)
    {
        (0, 1)
    } else if (b_batch_size % a_batch_size == 0) && ((a_batch_size != b_batch_size) || symb_in_b) {
        (1, 0)
    } else {
        return Ok(None);
    };

    let mut patch = TypedModelPatch::default();
    let activ = patch.tap_model(model, node.inputs[activ_slot])?;
    let weights = patch.tap_model(model, node.inputs[weight_slot])?;
    let brd_shape = ShapeFact::from_dims(
        [
            in_facts[activ_slot].shape.dims()[..a_rank - 2].to_vec(),
            in_facts[weight_slot].shape.dims()[a_rank - 2..].to_vec(),
        ]
        .concat(),
    );
    let brd = MultiBroadcastTo { shape: brd_shape };

    let brd_out = patch.wire_node(format!("{node_name}.broadcast"), brd, &[weights])?[0];

    let inputs = if activ_slot == 1 { [brd_out, activ] } else { [activ, brd_out] };
    let mm_out = patch.wire_node(node_name, *op, &inputs)?[0];

    patch.shunt_outside(model, node.id.into(), mm_out)?;

    Ok(Some(patch))
}


================================================
FILE: cuda/src/rewrite_rules/fuse_axis_op.rs
================================================
use crate::ops::CudaFusedAxisOp;
use tract_core::internal::*;
use tract_core::tract_data::itertools::Itertools;
use tract_gpu::fact::DeviceTypedFactExt;
use tract_gpu::ops::change_axes::GpuAxisOp;
use tract_gpu::rule_ensure;

fn is_supported_axis_op(op: &GpuAxisOp) -> bool {
    matches!(op.inner, AxisOp::Add(_) | AxisOp::Rm(_) | AxisOp::Reshape(..))
}

// these are operators that can handle arbitrarty strides
fn can_fuse_move(model: &TypedModel, axis_node: &TypedNode) -> bool {
    model.single_succ(axis_node.id).unwrap().is_some_and(|node| {
        node.op_is::<tract_gpu::ops::concat::GpuConcat>()
            || node.op_is::<tract_gpu::ops::apply_rope::GpuApplyRope>()
            || node.op_is::<tract_gpu::ops::scaled_masked_softmax::GpuScaledMaskedSoftmax>()
            || node.op_is::<tract_gpu::ops::slice::GpuSlice>()
            || node.op_is::<tract_gpu::ops::broadcast::GpuMultiBroadcastTo>()
            || node.op_is::<tract_gpu::ops::dyn_kv_cache::GpuDynKVCache>()
            || node.op_is::<crate::ops::CudaGgmlQuantQ81>()
            || node.op_is::<tract_gpu::ops::pulse::GpuDelay>()
            || node.op_is::<tract_gpu::ops::pulse::GpuPulsePad>()
            || node.op_is::<tract_gpu::ops::binary::GpuBinOp>()
            || node.op_is::<crate::ops::CudaIff>()
    })
}

pub fn collect_chain_of_axis_ops<'a>(
    model: &'a TypedModel,
    mut cursor: &'a TypedNode,
) -> TractResult<Option<(TVec<GpuAxisOp>, &'a TypedNode)>> {
    let mut acc_axis_ops = tvec![];
    let mut head_of_chain = cursor;

    while let Some(axis_op) = cursor.op_as::<GpuAxisOp>().filter(|o| {
        is_supported_axis_op(o)
            || (matches!(o.inner, AxisOp::Move(..)) && can_fuse_move(model, cursor))
    }) {
        acc_axis_ops.push(axis_op.clone());
        head_of_chain = cursor;

        if let Some(prev) = model.single_prec(cursor.id)? {
            cursor = prev;
        } else {
            break;
        }
    }

    Ok(if acc_axis_ops.is_empty() {
        None
    } else {
        Some((acc_axis_ops.into_iter().rev().collect(), head_of_chain))
    })
}

fn split_succs(
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_node_name: &str,
    axis_op: &GpuAxisOp,
) -> TractResult<Option<TypedModelPatch>> {
    let succs = model.all_succ(axis_node.id)?.context("Expected node with successors")?;

    let mut patch = TypedModelPatch::default();
    let input = patch.tap_model(model, axis_node.inputs[0])?;

    for (i, succ) in succs.iter().enumerate() {
        let axis_out =
            patch.wire_node(format!("{axis_node_name}.{i}"), axis_op.clone(), &[input])?[0];

        let mut op_ins = patch.taps(model, &succ.inputs)?;

        let (idx, _) = succ
            .inputs
            .iter()
            .enumerate()
            .find(|(_, inlet)| inlet.node == axis_node.id)
            .context("Axis node not found in its successor inputs")?;

        op_ins[idx] = axis_out;

        let op_outs = patch.wire_node(succ.name.clone(), succ.op.clone(), &op_ins)?;
        for out in op_outs {
            patch.shunt_outside(model, succ.id.into(), out)?;
        }
    }

    Ok(Some(patch))
}

pub fn fuse_axis_op(
    _ctx: &(),
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_node_name: &str,
    axis_op: &GpuAxisOp,
) -> TractResult<Option<TypedModelPatch>> {
    // Only support certain axis ops (or a Move, which is handled specially below)
    rule_ensure!(is_supported_axis_op(axis_op) || matches!(axis_op.inner, AxisOp::Move(..)));

    let Some(node) = model.single_succ(axis_node.id)? else {
        return split_succs(model, axis_node, axis_node_name, axis_op);
    };

    // Disallow fusing when the successor is already an axis/fused op or a sync,
    // *unless* it's a Move AxisOp (we allow that via the early-quit branch).
    let is_axis_like = node.op_is::<GpuAxisOp>() || node.op_is::<CudaFusedAxisOp>();
    let is_allowed_move =
        node.op_as::<GpuAxisOp>().is_some_and(|op| matches!(op.inner, AxisOp::Move(..)));

    rule_ensure!(!is_axis_like || is_allowed_move);

    let node_name = &node.name;

    let Some(in_nodes) = model.all_prec(node.id)? else {
        return Ok(None);
    };

    let mut grouped_axis_ops: TVec<TVec<GpuAxisOp>> = tvec![];
    let mut tap_inputs = tvec![];
    let mut patch = TypedModelPatch::default();

    for (in_idx, in_node) in in_nodes.into_iter().enumerate() {
        match collect_chain_of_axis_ops(model, in_node)? {
            Some((acc_axis_ops, head_of_chain)) => {
                grouped_axis_ops.push(acc_axis_ops);
                tap_inputs.push(patch.tap_model(model, head_of_chain.inputs[0])?);
            }
            None => {
                grouped_axis_ops.push(tvec![]);
                tap_inputs.push(patch.tap_model(model, node.inputs[in_idx])?);
            }
        }
    }

    // If the successor is a Move, we may fuse it now or defer.
    if let Some(op) = node.op_as::<GpuAxisOp>()
        && matches!(op.inner, AxisOp::Move(..))
    {
        let should_defer_move = !grouped_axis_ops[0].is_empty() && !can_fuse_move(model, node);
        if should_defer_move {
            let out = patch.wire_node(
                format!("{node_name}.fused_axis_op"),
                CudaFusedAxisOp { grouped_axis_ops, op: Box::new(op.clone()) },
                &tap_inputs,
            )?;
            patch.shunt_outside(model, node.id.into(), out[0])?;
            return Ok(Some(patch));
        } else {
            // Nothing to do right now; we’ll fuse on a later pass.
            return Ok(None);
        }
    }

    // General case: fuse using the successor's op.
    let out = patch.wire_node(
        format!("{node_name}.fused_axis_op"),
        CudaFusedAxisOp { grouped_axis_ops, op: node.op.clone() },
        &tap_inputs,
    )?;
    patch.shunt_outside(model, node.id.into(), out[0])?;
    Ok(Some(patch))
}

pub fn fuse_move_axis(
    _ctx: &(),
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_node_name: &str,
    axis_op: &GpuAxisOp,
) -> TractResult<Option<TypedModelPatch>> {
    rule_ensure!(matches!(axis_op.inner, AxisOp::Move(..)));

    let in_fact = model.node_input_facts(axis_node.id)?[0];
    let in_shape =
        in_fact.as_device_fact().map(|mf| mf.shape.clone()).unwrap_or(in_fact.shape.clone());

    let out_fact = model.node_output_facts(axis_node.id)?[0];
    let out_shape =
        out_fact.as_device_fact().map(|mf| mf.shape.clone()).unwrap_or(out_fact.shape.clone());

    // Checks if MoveAxis has no impact on shape + layout
    if in_shape == out_shape
        && let (Some(in_strides), AxisOp::Move(from, to)) =
            (in_shape.as_concrete().map(Tensor::natural_strides), axis_op.inner.clone())
    {
        let mut out_strides = in_strides.clone();
        let remove_stride = out_strides.remove(from);
        out_strides.insert(to, remove_stride);
        if in_strides == out_strides {
            return TypedModelPatch::shunt_one_op(model, axis_node);
        }
    }

    // Reshape are always fusable. Change Move by Reshape if possible
    let simpl_op = GpuAxisOp::simplify_axis_op(axis_op.inner.clone(), in_shape.dims());
    if simpl_op != *axis_op {
        return Ok(Some(TypedModelPatch::replace_single_op(
            model,
            axis_node,
            &[axis_node.inputs[0]],
            simpl_op,
        )?));
    }

    // Fuse consecutive MoveAxis if possible
    let Some(cursor) = model.single_succ(axis_node.id)? else { return Ok(None) };
    if let (AxisOp::Move(from_1, to_1), AxisOp::Move(from_2, to_2)) = (
        axis_op.inner.clone(),
        cursor.op_as::<GpuAxisOp>().map(|ax_op| ax_op.inner.clone()).unwrap_or(AxisOp::Add(0)),
    ) {
        let max_rank = [from_1, from_2, to_1, to_2].iter().max().unwrap() + 1;
        let mut perm: TVec<usize> = (0..max_rank).collect_vec().into();

        AxisOp::Move(from_1, to_1).change_shape_array(&mut perm, false)?;
        AxisOp::Move(from_2, to_2).change_shape_array(&mut perm, false)?;
        let new_axis_ops = perm_to_ops(&perm);
        if new_axis_ops.len() == 1 {
            let mut patch = TypedModelPatch::default();
            let inputs = patch.taps(model, &axis_node.inputs)?;
            let out = patch.wire_node(
                format!("{axis_node_name}.fused_move_axis"),
                GpuAxisOp::new(new_axis_ops[0].clone()),
                &inputs,
            )?;
            patch.shunt_outside(model, cursor.id.into(), out[0])?;
            return Ok(Some(patch));
        }
    }

    // Add(x) -> Move(x, y)
    let Some(cursor) = model.single_prec(axis_node.id)? else { return Ok(None) };
    if let (AxisOp::Move(from_1, to_1), AxisOp::Add(ax)) = (
        axis_op.inner.clone(),
        cursor.op_as::<GpuAxisOp>().map(|ax_op| ax_op.inner.clone()).unwrap_or(AxisOp::Rm(0)),
    ) && ax == from_1
    {
        let mut patch = TypedModelPatch::default();
        let inputs = patch.taps(model, &cursor.inputs)?;
        let out =
            patch.wire_node(cursor.name.clone(), GpuAxisOp::new(AxisOp::Add(to_1)), &inputs)?;
        patch.shunt_outside(model, axis_node.id.into(), out[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}


================================================
FILE: cuda/src/rewrite_rules/mod.rs
================================================
mod add_matmul_broadcast;
mod fuse_axis_op;
mod pad_q40_weights;
mod untranspose_matmul_output;

pub use add_matmul_broadcast::add_broadcast_pre_matmul;
pub use fuse_axis_op::{fuse_axis_op, fuse_move_axis};
pub use pad_q40_weights::pad_q40_weights;
pub use untranspose_matmul_output::untranspose_matmul_output;


================================================
FILE: cuda/src/rewrite_rules/pad_q40_weights.rs
================================================
use tract_core::internal::*;
use tract_core::ops::konst::Const;
use tract_core::tract_linalg::block_quant::*;
use tract_gpu::fact::DeviceFact;
use tract_gpu::rule_ensure;
use tract_gpu::tensor::{DeviceTensor, DeviceTensorExt, IntoDevice, OwnedDeviceTensor};
use tract_gpu::utils::as_q40_tensor;

use crate::Q40_ROW_PADDING;
use crate::ops::{CudaFusedAxisOp, CudaGgmlGemm};
use crate::tensor::CudaTensor;
use crate::utils::pad_q40;
use tract_gpu::ops::change_axes::GpuAxisOp;

/// Walk the successor chain from a Const node to find a GEMM consumer.
/// Returns the effective weight shape as seen by the GEMM kernel (after all
/// axis ops, including CudaFusedAxisOp internal reshapes).  The last
/// dimension of the returned shape is the k dimension.
fn effective_gemm_shape(
    model: &TypedModel,
    node: &TypedNode,
    shape: &[usize],
) -> TractResult<Option<TVec<usize>>> {
    let mut cursor = node;
    let mut effective_shape: TVec<usize> = shape.into();
    while let Some(succ) = model.single_succ(cursor.id)? {
        if succ.op_is::<CudaGgmlGemm>() {
            return Ok(Some(effective_shape));
        }
        if let Some(fao) = succ.op_as::<CudaFusedAxisOp>() {
            if fao.op.is::<CudaGgmlGemm>() {
                // Apply the fused axis ops for the weight input slot.
                let weight_inlet = succ.inputs.iter().position(|i| i.node == cursor.id).unwrap();
                for axis_op in &fao.grouped_axis_ops[weight_inlet] {
                    axis_op.inner.change_shape_array(&mut effective_shape, false)?;
                }
                return Ok(Some(effective_shape));
            }
        }
        if let Some(axis_op) = succ.op_as::<GpuAxisOp>() {
            axis_op.inner.change_shape_array(&mut effective_shape, false)?;
            cursor = succ;
            continue;
        }
        break;
    }
    Ok(None)
}

// This rule is necessary GGML Q40 Matmul kernels that requires k % 512 == 0
pub fn pad_q40_weights(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _node_name: &str,
    op: &Const,
) -> TractResult<Option<TypedModelPatch>> {
    let Some(dev_tensor) = op.val().to_device_tensor().ok() else {
        return Ok(None);
    };

    let DeviceTensor::Owned(t) = dev_tensor else {
        return Ok(None);
    };
    let Some(cuda_tensor) = t.downcast_ref::<CudaTensor>() else {
        return Ok(None);
    };

    let bqf = cuda_tensor
        .exotic_fact()
        .and_then(|of| of.downcast_ref::<BlockQuantFact>())
        .filter(|bqf| bqf.format.dyn_eq(&Q4_0));
    rule_ensure!(bqf.is_some());
    let bqf = bqf.unwrap();

    // Compute the effective weight shape as seen by the GEMM kernel (after
    // all axis ops and CudaFusedAxisOp internal reshapes).  The raw tensor
    // may have a per-head shape like [m, num_heads, head_dim] that gets
    // collapsed before the GEMM.
    let Some(effective_shape) = effective_gemm_shape(model, node, bqf.shape())? else {
        return Ok(None);
    };
    let effective_k = *effective_shape.last().unwrap();
    rule_ensure!(effective_k % Q40_ROW_PADDING != 0);

    let host_tensor = dev_tensor.to_host()?.into_tensor();
    let bqs = as_q40_tensor(&host_tensor).expect("expected Q4_0 tensor view");
    // Compute flat (m, k) matching the contiguous Q4_0 data layout.
    // The data can be viewed as flat_m rows of effective_k elements each.
    let total_elements: usize = bqf.shape().iter().product();
    let flat_m = total_elements / effective_k;
    let padded_bqs = pad_q40(bqs, flat_m, effective_k)?;

    // Build padded shape preserving the rank and batch structure expected by
    // the GEMM.  effective_shape already incorporates all GpuAxisOp and
    // CudaFusedAxisOp transformations, so we just replace k with padded_k.
    let padded_k = effective_k.next_multiple_of(Q40_ROW_PADDING);
    let mut padded_shape = effective_shape.clone();
    *padded_shape.last_mut().unwrap() = padded_k;
    let padded_bqf = BlockQuantFact::new(
        tract_core::dyn_clone::clone_box(padded_bqs.format()),
        padded_shape.clone(),
    );
    let padded_fact =
        TypedFact::dt_shape(f32::datum_type(), &padded_shape).with_exotic_fact(padded_bqf);

    let padded_tensor =
        padded_bqs.into_tensor_with_shape(f32::datum_type(), &padded_shape).into_arc_tensor();

    let new_const = Const::new_with_exotic_fact(
        padded_tensor.into_device()?.into_tensor().into_arc_tensor(),
        Box::new(DeviceFact::from_host(padded_fact)?),
    )?;

    let mut patch = TypedModelPatch::default();
    let wire = patch.wire_node(&node.name, new_const, &[])?[0];

    // The padded const already has the effective GEMM shape (with padded k),
    // so skip the GpuAxisOp chain — those transforms are baked in.
    // Collect the intermediate node ids to obliterate.
    let mut cursor = node;
    let mut obliterate_ids: TVec<usize> = tvec![node.id];
    while let Some(succ) = model.single_succ(cursor.id)? {
        if succ.op_is::<GpuAxisOp>() {
            obliterate_ids.push(succ.id);
            cursor = succ;
            continue;
        }
        break;
    }

    // Rewire the downstream GEMM node.  If the consumer is a CudaFusedAxisOp,
    // clear the weight input's axis ops since the padded tensor is already in
    // the expected shape.
    let gemm_succ = model.single_succ(cursor.id)?.unwrap();
    let weight_outlet: OutletId = cursor.id.into();
    let weight_inlet = gemm_succ
        .inputs
        .iter()
        .position(|i| i.node == weight_outlet.node && i.slot == weight_outlet.slot)
        .unwrap();
    let mut gemm_inputs: TVec<OutletId> = tvec![];
    for (ix, input) in gemm_succ.inputs.iter().enumerate() {
        if ix == weight_inlet {
            gemm_inputs.push(wire);
        } else {
            gemm_inputs.push(patch.tap_model(model, *input)?);
        }
    }
    let gemm_op: Box<dyn TypedOp> = if let Some(fao) = gemm_succ.op_as::<CudaFusedAxisOp>() {
        let mut axis_ops = fao.grouped_axis_ops.clone();
        axis_ops[weight_inlet].clear();
        Box::new(CudaFusedAxisOp::new(axis_ops, fao.op.clone()))
    } else {
        gemm_succ.op.clone()
    };
    let gemm_out = patch.wire_node(&gemm_succ.name, gemm_op, &gemm_inputs)?;
    patch.shunt_outside(model, gemm_succ.id.into(), gemm_out[0])?;
    obliterate_ids.push(gemm_succ.id);
    for id in obliterate_ids {
        patch.obliterate(id)?;
    }
    Ok(Some(patch))
}


================================================
FILE: cuda/src/rewrite_rules/untranspose_matmul_output.rs
================================================
use tract_core::internal::*;
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_gpu::rule_ensure;

/// Rewrite BasicMatMul { .. transpose_c: true } to BasicMatMul { .. transpose_c: false}
pub fn untranspose_matmul_output(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _node_name: &str,
    op: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    rule_ensure!(op.transpose_c);

    let new_matmul = PrefixMatMul {
        transpose_a: !op.transpose_b,
        transpose_b: !op.transpose_a,
        transpose_c: false,
        ..*op
    };

    TypedModelPatch::replace_single_op(model, node, &[node.inputs[1], node.inputs[0]], new_matmul)
        .map(Some)
}


================================================
FILE: cuda/src/tensor.rs
================================================
use std::ops::{Deref, DerefMut};

use cudarc::driver::{CudaSlice, DevicePtr};
use tract_core::internal::tract_smallvec::ToSmallVec;
use tract_core::internal::*;
use tract_core::prelude::{DatumType, TVec};
use tract_core::tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage, Q8_1};
use tract_gpu::device::DeviceBuffer;
use tract_gpu::tensor::{DeviceTensor, OwnedDeviceTensor};
use tract_gpu::utils::{as_q40_tensor, check_strides_validity};

use crate::ops::GgmlQuantQ81Fact;

#[derive(Debug, Clone)]
pub struct CudaBuffer {
    pub inner: CudaSlice<u8>,
}

impl DeviceBuffer for CudaBuffer {
    fn ptr(&self) -> *const std::ffi::c_void {
        crate::with_cuda_stream(|stream| Ok(self.inner.device_ptr(stream).0 as _)).unwrap()
    }
}
impl Deref for CudaBuffer {
    type Target = CudaSlice<u8>;

    fn deref(&self) -> &Self::Target {
        &self.inner
    }
}

impl DerefMut for CudaBuffer {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.inner
    }
}

impl PartialEq for CudaBuffer {
    fn eq(&self, other: &Self) -> bool {
        self.ptr() == other.ptr() && self.inner.len() == other.inner.len()
    }
}
impl Eq for CudaBuffer {}

#[derive(Clone, PartialEq, Eq)]
pub struct CudaTensor {
    buffer: Arc<CudaBuffer>,
    datum_type: DatumType,
    shape: TVec<usize>,
    strides: TVec<isize>,
    exotic_fact: Option<Box<dyn ExoticFact>>,
}

impl CudaTensor {
    pub fn from_tensor(tensor: &Tensor) -> TractResult<Self> {
        if let Some(bqs) = as_q40_tensor(tensor) {
            let bqf = BlockQuantFact::new(
                tract_core::dyn_clone::clone_box(bqs.format()),
                tensor.shape().into(),
            );
            let data = bqs.value().as_bytes();
            crate::with_cuda_stream(|stream| {
                let device_data = stream
                    .clone_htod(data)
                    .with_context(|| format!("Data address: {:?}", data.as_ptr()))?;
                let buffer = Arc::new(CudaBuffer { inner: device_data });
                Ok(CudaTensor {
                    buffer,
                    datum_type: tensor.datum_type(),
                    shape: tensor.shape().into(),
                    strides: tensor.strides().into(),
                    exotic_fact: Some(Box::new(bqf)),
                })
            })
        } else {
            let data = tensor.as_bytes();
            crate::with_cuda_stream(|stream| {
                let device_data = stream
                    .clone_htod(data)
                    .with_context(|| format!("Data address: {:?}", data.as_ptr()))?;
                let buffer = Arc::new(CudaBuffer { inner: device_data });
                Ok(CudaTensor {
                    buffer,
                    datum_type: tensor.datum_type(),
                    shape: tensor.shape().into(),
                    strides: tensor.strides().into(),
                    exotic_fact: None,
                })
            })
        }
    }

    pub fn uninitialized_dt(shape: &[usize], dt: DatumType) -> TractResult<Self> {
        crate::with_cuda_stream(|stream| unsafe {
            let device_data = stream.alloc(shape.iter().product::<usize>() * dt.size_of()).unwrap();
            let buffer = Arc::new(CudaBuffer { inner: device_data });
            Ok(CudaTensor {
                buffer,
                datum_type: dt,
                shape: shape.to_smallvec(),
                strides: natural_strides(shape),
                exotic_fact: None,
            })
        })
    }

    pub fn uninitialized_exotic(exotic_fact: Box<dyn ExoticFact>) -> TractResult<Self> {
        if let Some(bqf) = exotic_fact.downcast_ref::<BlockQuantFact>() {
            let shape = bqf.shape();
            let format = bqf.format.clone();
            let len = shape.iter().product::<usize>();
            ensure!(len % format.block_len() == 0);
            crate::with_cuda_stream(|stream| unsafe {
                let device_data = stream.alloc(len * format.block_bytes() / format.block_len())?;
                let buffer = Arc::new(CudaBuffer { inner: device_data });
                Ok(CudaTensor {
                    buffer,
                    datum_type: f32::datum_type(),
                    shape: tvec!(),
                    strides: tvec!(),
                    exotic_fact: Some(Box::new(bqf.clone())),
                })
            })
        } else if let Some(ggml_q81_fact) = exotic_fact.downcast_ref::<GgmlQuantQ81Fact>() {
            let mem_size = ggml_q81_fact.mem_size().as_i64().unwrap() as usize;

            crate::with_cuda_stream(|stream| unsafe {
                let device_data = stream.alloc(mem_size)?;
                let buffer = Arc::new(CudaBuffer { inner: device_data });
                Ok(CudaTensor {
                    buffer,
                    datum_type: f32::datum_type(),
                    shape: tvec!(),
                    strides: tvec!(),
                    exotic_fact: Some(Box::new(ggml_q81_fact.clone())),
                })
            })
        } else {
            bail!("Unsupported exotic type")
        }
    }
}

impl std::fmt::Debug for CudaTensor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CudaTensor")
            .field("datum_type", &self.datum_type)
            .field("shape", &self.shape)
            .field("block_quant_fact", &self.exotic_fact)
            .finish()
    }
}

impl OwnedDeviceTensor for CudaTensor {
    fn datum_type(&self) -> DatumType {
        self.datum_type
    }

    fn shape(&self) -> &[usize] {
        &self.shape
    }

    fn strides(&self) -> &[isize] {
        &self.strides
    }

    fn reshaped(&self, shape: TVec<usize>) -> TractResult<DeviceTensor> {
        if self.len() != shape.iter().product::<usize>() {
            bail!("Invalid reshape {:?} to {:?}", self.shape(), shape);
        }
        if shape.as_slice() != self.shape() {
            Ok(DeviceTensor::Owned(Box::new(CudaTensor {
                strides: Tensor::natural_strides(&shape),
                shape,
                ..self.clone()
            })))
        } else {
            Ok(DeviceTensor::Owned(Box::new(self.clone())))
        }
    }

    fn restrided(&self, strides: TVec<isize>) -> TractResult<DeviceTensor> {
        check_strides_validity(self.shape().into(), strides.clone())?;
        if strides.as_slice() != self.strides() {
            Ok(DeviceTensor::Owned(Box::new(CudaTensor { strides, ..self.clone() })))
        } else {
            Ok(DeviceTensor::Owned(Box::new(self.clone())))
        }
    }

    fn device_buffer(&self) -> &dyn tract_gpu::device::DeviceBuffer {
        self.buffer.as_ref()
    }

    fn to_host(&self) -> TractResult<Arc<Tensor>> {
        crate::with_cuda_stream(|stream| {
            let t: Tensor = if let Some(of) = &self.exotic_fact {
                let mut blob =
                    unsafe { Blob::new_for_size_and_align(self.buffer.len(), vector_size()) };
                stream.memcpy_dtoh(&self.buffer.inner, blob.as_bytes_mut())?;
                let bqf = if let Some(bqf) = of.downcast_ref::<BlockQuantFact>() {
                    (*bqf).clone()
                } else if let Some(ggml_q81) = of.downcast_ref::<GgmlQuantQ81Fact>() {
                    let out_shape = ggml_q81.concrete_out_shape()?;
                    BlockQuantFact::new(Box::new(Q8_1), out_shape.into())
                } else {
                    bail!("Unknown exotic fact")
                };
                let total_m = bqf.m();
                let k = bqf.k();
                BlockQuantStorage::new(bqf.format.clone(), total_m, k, Arc::new(blob))?
                    .into_tensor_with_shape(self.datum_type, &self.shape)
            } else {
                let mut tensor = unsafe { Tensor::uninitialized_dt(self.datum_type, &self.shape)? };
                stream.memcpy_dtoh(&self.buffer.inner, tensor.as_bytes_mut())?;
                tensor
            };

            Ok(Arc::new(t))
        })
    }

    fn exotic_fact(&self) -> Option<&dyn ExoticFact> {
        self.exotic_fact.as_deref()
    }

    fn get_bytes_slice(&self, offset: usize, len: usize) -> Vec<u8> {
        crate::with_cuda_stream(|stream| {
            Ok(stream.clone_dtoh(&self.buffer.slice(offset..offset + len)).unwrap())
        })
        .unwrap()
    }
}


================================================
FILE: cuda/src/transform.rs
================================================
use std::any::TypeId;
use std::collections::HashMap;
use std::sync::OnceLock;

use crate::context::cuda_context;
use crate::ops::wire_cuda_conv;
use crate::{kernels, ops, rewrite_rules};
use DatumType::{F16, F32};
use tract_core::dyn_clone::clone_box;
use tract_core::internal::*;
use tract_core::model::translator::Translate;
use tract_core::ops::cnn::conv::rewrite_kernel_conv_in_oihw;
use tract_core::ops::cnn::{Conv, rewrite_conv_with_n_axis};
use tract_core::ops::einsum::prefix_matmul::{PrefixMatMul, rewrite_einsum_to_prefix_matmul};
use tract_core::ops::konst::Const;
use tract_core::ops::nn::Reduce;
use tract_core::tract_linalg::block_quant::Q4_0;
use tract_core::transform::ModelTransform;
use tract_gpu::fact::{DeviceFact, DeviceTypedFactExt};
use tract_gpu::rewrite_rules::rewire_syncs::rewire_syncs;
use tract_gpu::rewrite_rules::rms_norm::remove_rms_norm_cast;
use tract_gpu::sync::{DeviceSyncKind, sync_inputs_if_required, sync_model_outputs_if_required};
use tract_gpu::tensor::{DeviceTensor, IntoDevice};
use tract_gpu::utils::as_quant_fact;
use tract_transformers::ops::sdpa::Sdpa;

/// A registered translator that can convert a core op into a CUDA GPU op.
/// Each kernel module submits one (or more) of these via [`register_cuda_op!`].
pub struct CudaOpTranslator {
    pub type_id: TypeId,
    pub try_make: fn(&TypedModel, &TypedNode) -> TractResult<Option<Box<dyn TypedOp>>>,
}

inventory::collect!(CudaOpTranslator);

/// Register a translator for a core op type. The closure receives `(source, node, op)`
/// where `op` is already downcast to `$op_type`. Return `Ok(Some(gpu_op))` to translate,
/// `Ok(None)` to skip.
#[macro_export]
macro_rules! register_cuda_op {
    ($op_type:ty, |$source:ident, $node:ident, $op:ident| $body:expr) => {
        inventory::submit! {
            $crate::transform::CudaOpTranslator {
                type_id: std::any::TypeId::of::<$op_type>(),
                try_make: |$source, $node| {
                    let Some($op) = $node.op_as::<$op_type>() else {
                        return Ok(None);
                    };
                    $body
                },
            }
        }
    };
}

#[derive(Debug, Default)]
pub struct CudaTransform;

impl ModelTransform for CudaTransform {
    fn name(&self) -> StaticName {
        "cuda-transform".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        self.transform_up_to_phase(model, usize::MAX)
    }
}

impl CudaTransform {
    pub fn transform_up_to_phase(
        &self,
        model: &mut TypedModel,
        stop_at_phase: usize,
    ) -> TractResult<()> {
        // Init CUDA Context if not done previously
        cuda_context();

        rewrite_einsum_to_prefix_matmul(model, false)?;
        if stop_at_phase == 0 {
            return Ok(());
        }

        Rewriter::default()
            .with_rule_for("untranspose_matmul_output", rewrite_rules::untranspose_matmul_output)
            .with_rule_for("add_broadcast_pre_matmul", rewrite_rules::add_broadcast_pre_matmul)
            .with_rule_for("rewrite_kernel_conv_in_oihw", rewrite_kernel_conv_in_oihw)
            .with_rule_for("rewrite_conv_with_n_axis", rewrite_conv_with_n_axis)
            .rewrite(&(), model)?;

        Rewriter::default()
            .with_rule_for("remove_rms_norm_cast", remove_rms_norm_cast)
            .with_rule_for("split_multi_axis_reduce", split_multi_axis_reduce)
            .rewrite(&(), model)?;

        if stop_at_phase == 1 {
            return Ok(());
        }

        *model = self.translate_model(model)?;

        if stop_at_phase == 2 {
            return Ok(());
        }

        Rewriter::default()
            .with_rule_for("fuse_move_axis", rewrite_rules::fuse_move_axis)
            .rewrite(&(), model)?;
        Rewriter::default()
            .with_rule_for("fuse_axis_op", rewrite_rules::fuse_axis_op)
            .rewrite(&(), model)?;

        rewire_syncs(model)?;

        Rewriter::default()
            .with_rule_for("pad_q40_weights", rewrite_rules::pad_q40_weights)
            .rewrite(&(), model)?;
        Ok(())
    }
}

/// Looks up the node's op TypeId in the inventory of registered `CudaOpTranslator`s.
/// Returns `Some(gpu_op)` if a translator matches and succeeds, `None` otherwise.
fn try_make_cuda_op(
    source: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<Box<dyn TypedOp>>> {
    type TranslateFn = fn(&TypedModel, &TypedNode) -> TractResult<Option<Box<dyn TypedOp>>>;
    static MAP: OnceLock<HashMap<TypeId, Vec<TranslateFn>>> = OnceLock::new();
    let map = MAP.get_or_init(|| {
        let mut m: HashMap<TypeId, Vec<TranslateFn>> = HashMap::new();
        for t in inventory::iter::<CudaOpTranslator> {
            m.entry(t.type_id).or_default().push(t.try_make);
        }
        m
    });

    let input_facts = source.node_input_facts(node.id)?;
    if !input_facts.iter().all(|f| DeviceTensor::is_supported_dt(f.datum_type)) {
        return Ok(None);
    }

    // Copy-based ops are fully generic (no backend-specific dispatch needed).
    if let Some(op) = tract_gpu::ops::copy_based::try_make_copy_based_op(source, node)? {
        return Ok(Some(op));
    }

    if let Some(fns) = map.get(&(*node.op).type_id()) {
        for f in fns {
            if let Some(op) = f(source, node)? {
                return Ok(Some(op));
            }
        }
    }
    Ok(None)
}

fn convert_const(op: &Const) -> TractResult<Const> {
    let typed_fact: TypedFact = Arc::clone(op.val()).try_into()?;
    let cuda_fact = if let Some(of) = op.exotic_fact() {
        DeviceFact::from_host(typed_fact.with_exotic_fact(clone_box(of)))?
    } else {
        DeviceFact::from_host(typed_fact)?
    };

    let cuda_const = op.val().clone().into_device()?.into_tensor().into_arc_tensor();
    Const::new_with_exotic_fact(cuda_const, Box::new(cuda_fact))
}

pub(crate) fn cuda_cast_new(to: DatumType) -> Option<tract_gpu::ops::cast::GpuCast> {
    tract_gpu::ops::cast::GpuCast::new(
        to,
        "Cuda",
        kernels::array::cuda_cast_dispatch,
        kernels::array::Cast::is_supported_dt,
    )
}

fn can_convert_to_cuda_gemm(facts: &[TypedFact]) -> bool {
    assert!(facts.len() == 2, "Ggml: Expected 2 inputs for Matmul");

    let regular_types_support = facts[0].is_plain()
        && facts[1].is_plain()
        && matches!(
            (facts[0].datum_type, facts[1].datum_type),
            (F32, F32) | (F16, F16) | (F16, F32)
        );

    regular_types_support
        || (as_quant_fact(&facts[1], &Q4_0).is_some() && matches!(facts[0].datum_type, F16 | F32))
}

fn convert_matmul_to_cuda(
    model: &TypedModel,
    node: &TypedNode,
    target: &mut TypedModel,
    inputs: &mut [OutletId],
    op: &PrefixMatMul,
) -> TractResult<TVec<OutletId>> {
    let mut input_facts = model.node_input_facts(node.id)?;
    // GGML kernel expects weights in second position and activations in first position
    // This avoid output transposition due to GGML column-major data expectations

    let mut swap_inputs = false;
    if !can_convert_to_cuda_gemm(&[input_facts[0].clone(), input_facts[1].clone()])
        && can_convert_to_cuda_gemm(&[input_facts[1].clone(), input_facts[0].clone()])
    {
        input_facts.swap(0, 1);
        inputs.swap(0, 1);
        swap_inputs = true;
    }

    let act_fact = input_facts[0];
    let weight_fact = input_facts[1];
    let outlets = inputs.split_at_mut(1);
    let act_outlet = &mut outlets.0[0];
    let weights_outlet = &mut outlets.1[0];

    let transpose_act = if swap_inputs { !op.transpose_b } else { op.transpose_a };
    let transpose_weight = if swap_inputs { !op.transpose_a } else { op.transpose_b };

    if transpose_act {
        let rank = act_fact.rank();
        let perm_act_op =
            tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Move(rank - 2, rank - 1));
        let perm_act_name = node.name.clone() + ".perm_activs";
        *act_outlet = target.wire_node(perm_act_name, perm_act_op, &[*act_outlet])?[0];
    }

    if act_fact.datum_type == DatumType::F16 && as_quant_fact(weight_fact, &Q4_0).is_some() {
        let in_cast_op = cuda_cast_new(DatumType::F32).unwrap();
        *act_outlet =
            target.wire_node(node.name.clone() + ".in_cast", in_cast_op, &[*act_outlet])?[0];
    } else if act_fact.datum_type == DatumType::F16 && weight_fact.datum_type == DatumType::F32 {
        let in_cast_op = cuda_cast_new(DatumType::F16).unwrap();
        *weights_outlet =
            target.wire_node(node.name.clone() + ".in_cast", in_cast_op, &[*weights_outlet])?[0];
    }

    if !transpose_weight {
        ensure!(as_quant_fact(weight_fact, &Q4_0).is_none(), "Cannot transpose Q40 tensor");

        let rank = weight_fact.rank();
        let perm_weights_op =
            tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Move(rank - 2, rank - 1));
        let perm_weights_name = node.name.clone() + ".perm_weights";
        *weights_outlet =
            target.wire_node(perm_weights_name, perm_weights_op, &[*weights_outlet])?[0];
    }

    if as_quant_fact(weight_fact, &Q4_0).is_some() {
        let device_fact = target.outlet_fact(*act_outlet)?.to_device_fact()?;
        let quant_op = ops::CudaGgmlQuantQ81::new(device_fact.shape.clone())?;
        *act_outlet =
            target.wire_node(node.name.clone() + ".quant_activs", quant_op, &[*act_outlet])?[0];
    }
    let mut matmul_output =
        target.wire_node(node.name.clone(), *Box::new(ops::CudaGgmlGemm), inputs)?;

    if swap_inputs {
        let out_fact = target.outlet_fact(matmul_output[0])?;
        let rank = &out_fact
            .exotic_fact
            .clone()
            .map(|fact| fact.clarify_dt_shape().unwrap().1.len())
            .unwrap();

        let perm_out_op =
            tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Move(rank - 2, rank - 1));
        matmul_output =
            target.wire_node(node.name.clone() + ".perm_out", perm_out_op, &matmul_output)?;
    }

    let out_fact = target.outlet_fact(matmul_output[0])?;
    let out_dt = out_fact.as_device_fact().map(|f| f.datum_type).unwrap_or(out_fact.datum_type);

    let expected_dt = model.node_output_facts(node.id)?[0].datum_type;
    if out_dt != expected_dt {
        ensure!(
            kernels::array::Cast::is_supported_dt(out_dt),
            "Matmul output type cannot be casted to expected type"
        );
        let cast_op = cuda_cast_new(model.node_output_facts(node.id)?[0].datum_type).unwrap();
        matmul_output =
            target.wire_node(node.name.clone() + ".out_cast", cast_op, &matmul_output)?
    }
    Ok(matmul_output)
}

fn convert_sdpa_to_cuda_flash_attn(
    model: &TypedModel,
    node: &TypedNode,
    target: &mut TypedModel,
    inputs: &mut [OutletId],
    op: &Sdpa,
) -> TractResult<TVec<OutletId>> {
    let facts = model.node_input_facts(node.id)?;

    let [qf, kf, vf] = [facts[0], facts[1], facts[2]];
    ensure!(kf.datum_type() == vf.datum_type(), "K/V dtypes must match");

    let mask_fact = if facts.len() == 4 { Some(facts[3]) } else { None };

    let (q, k, v, m_opt) = match &mut inputs[..] {
        [q, k, v, m, ..] => (q, k, v, Some(m)),
        [q, k, v] => (q, k, v, None),
        _ => bail!("unexpected number of inputs"),
    };

    fn name(base: &str, suffix: &str) -> String {
        format!("{base}{suffix}")
    }

    fn mut_cast(
        target: &mut TypedModel,
        node_name: &str,
        dst: &mut OutletId,
        have: DatumType,
        want: DatumType,
        suffix: &str,
    ) -> TractResult<()> {
        if have != want {
            *dst =
                target.wire_node(name(node_name, suffix), cuda_cast_new(want).unwrap(), &[*dst])?
                    [0];
        }
        Ok(())
    }

    fn add_head_axis_if_rank3(
        target: &mut TypedModel,
        node_name: &str,
        dst: &mut OutletId,
        fact: &TypedFact,
        suffix: &str,
    ) -> TractResult<bool> {
        if fact.rank() == 3 {
            let ax = tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Add(1));
            *dst = target.wire_node(name(node_name, suffix), ax, &[*dst])?[0];
            Ok(true)
        } else {
            ensure!(fact.rank() == 4, "Q/K/V must be rank 3 or 4");
            Ok(false)
        }
    }

    // ----- casts
    let q_dt = qf.datum_type().unwrap();
    let kv_dt = kf.datum_type().unwrap();
    mut_cast(target, &node.name, k, kv_dt, DatumType::F16, ".cast_k")?;
    mut_cast(target, &node.name, v, kv_dt, DatumType::F16, ".cast_v")?;
    mut_cast(target, &node.name, q, q_dt, DatumType::F16, ".cast_q")?;

    // ----- rank normalize
    let mut added_head_axis = false;
    added_head_axis |= add_head_axis_if_rank3(target, &node.name, q, qf, ".reshape_q")?;
    added_head_axis |= add_head_axis_if_rank3(target, &node.name, k, kf, ".reshape_k")?;
    added_head_axis |= add_head_axis_if_rank3(target, &node.name, v, vf, ".reshape_v")?;

    let out_dim = kf.shape[kf.rank() - 1].to_i64()?;
    ensure!(matches!(out_dim, 64 | 128), "Unsupported head dim (D): {out_dim}");
    ensure!(kf.shape == vf.shape, "K and V shapes must be identical");

    // ----- mask: cast & reshape
    if let Some(mf) = mask_fact {
        let m = m_opt.unwrap();
        mut_cast(target, &node.name, m, mf.datum_type().unwrap(), DatumType::F16, ".cast_m")?;
        if mf.rank() != 4 {
            let ax = tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Add(1));
            *m = target.wire_node(name(&node.name, ".reshape_m"), ax, &[*m])?[0];
        }
    }

    // ----- scale & op
    let scale = op
        .scale
        .as_ref()
        .map(|s| *s.try_as_plain().unwrap().to_scalar::<f32>().unwrap())
        .unwrap_or(1.0 / (out_dim as f32).sqrt());
    let sdpa = ops::CudaFlashAttention::new(scale, op.is_causal);

    let mut out = target.wire_node(node.name.clone(), sdpa, inputs)?;

    if added_head_axis {
        out = target.wire_node(
            name(&node.name, ".reshape_out"),
            tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Rm(1)),
            &out,
        )?;
    }

    if q_dt != DatumType::F16 {
        out =
            target.wire_node(name(&node.name, ".cast_out"), cuda_cast_new(q_dt).unwrap(), &out)?;
    }

    Ok(out)
}

impl Translate<TypedFact, Box<dyn TypedOp>, TypedFact, Box<dyn TypedOp>> for CudaTransform {
    fn translate_node(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        // Special multi-node ops handled first
        let input_facts = source.node_input_facts(node.id)?;
        if let Some(op) = node.op_as::<PrefixMatMul>() {
            let facts: Vec<TypedFact> = input_facts.iter().map(|f| (*f).clone()).collect();
            if !op.transpose_c
                && op.quantize_output.is_none()
                && (can_convert_to_cuda_gemm(&facts)
                    || can_convert_to_cuda_gemm(&[facts[1].clone(), facts[0].clone()]))
            {
                let mut device_inputs =
                    sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
                let outlet_ids =
                    convert_matmul_to_cuda(source, node, target, &mut device_inputs, op)?;
                return sync_model_outputs_if_required(source, node, target, outlet_ids);
            }
        }
        if let Some(op) = node.op_as::<Sdpa>() {
            let mut device_inputs =
                sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
            let outlet_ids =
                convert_sdpa_to_cuda_flash_attn(source, node, target, &mut device_inputs, op)?;
            return sync_model_outputs_if_required(source, node, target, outlet_ids);
        }
        if let Some(conv) = node.op_as::<Conv>() {
            if input_facts.iter().all(|f| DeviceTensor::is_supported_dt(f.datum_type))
                && matches!(input_facts[0].datum_type, F16 | F32)
            {
                let device_inputs =
                    sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
                let outlet_ids = wire_cuda_conv(source, node, target, &device_inputs, conv)?;
                return sync_model_outputs_if_required(source, node, target, outlet_ids);
            }
        }
        // Const: inline conversion, not a GPU op
        if let Some(op) = node.op_as::<Const>() {
            if DeviceTensor::is_supported_dt(op.val().datum_type()) {
                let device_inputs =
                    sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
                let outlet_ids =
                    target.wire_node(node.name.clone(), convert_const(op)?, &device_inputs)?;
                return sync_model_outputs_if_required(source, node, target, outlet_ids);
            }
        }

        // Single-op translation
        if let Some(gpu_op) = try_make_cuda_op(source, node)? {
            let device_inputs =
                sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
            let outlet_ids = target.wire_node(node.name.clone(), gpu_op, &device_inputs)?;
            sync_model_outputs_if_required(source, node, target, outlet_ids)
        } else {
            let cpu_inputs =
                sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToHost)?;
            target.wire_node(&node.name, node.op.clone(), &cpu_inputs)
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_prefix_matmul_transform_f32_f16() -> TractResult<()> {
        let mut model = TypedModel::default();
        let (b, m, k, n) = (1, 16, 128, 32);

        let a_fact = TypedFact::dt_shape(DatumType::F32, &[b, m, k]);
        let b_fact = TypedFact::dt_shape(DatumType::F16, &[b, k, n]);

        let source_a = model.add_source("a", a_fact)?;
        let source_b = model.add_source("b", b_fact)?;

        let op = PrefixMatMul {
            transpose_a: false,
            transpose_b: false,
            transpose_c: false,
            quantize_output: None,
            operating_dt: Some(DatumType::F32),
        };

        let matmul_out = model.wire_node("matmul", op, &[source_a, source_b])?;
        model.select_output_outlets(&matmul_out)?;

        let tensor_a = Tensor::zero::<f32>(&[b, m, k])?;
        let tensor_b = Tensor::zero::<f16>(&[b, k, n])?;
        let inputs = tvec!(tensor_a.into(), tensor_b.into());

        let transform = CudaTransform::default();
        transform.transform(&mut model)?;

        let cuda_runnable = model.into_runnable()?;
        let _ = cuda_runnable.run(inputs)?;
        Ok(())
    }
}

fn split_multi_axis_reduce(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Reduce,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(op.axes.len() > 1);
    use tract_core::ops::nn::Reducer::*;
    rule_if!(matches!(op.reducer, Sum | Prod | Min | Max | Any | All));
    let mut patch = TypedModelPatch::default();
    let mut wire = patch.tap_model(model, node.inputs[0])?;
    // Reduce axes from highest to lowest so indices stay valid
    let mut axes = op.axes.clone();
    axes.sort();
    for (i, &axis) in axes.iter().rev().enumerate() {
        let single = Reduce { axes: tvec![axis], reducer: op.reducer };
        wire = patch.wire_node(format!("{node_name}.axis_{i}"), single, &[wire])?[0];
    }
    patch.shunt_outside(model, node.id.into(), wire)?;
    Ok(Some(patch))
}


================================================
FILE: cuda/src/utils.rs
================================================
use std::ffi::c_int;
use std::sync::OnceLock;

use anyhow::Context;
use libloading::{Library, Symbol};

use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::*;
use tract_gpu::tensor::DeviceTensor;

use crate::Q40_ROW_PADDING;
use crate::ops::GgmlQuantQ81Fact;

static CULIBS_PRESENT: OnceLock<bool> = OnceLock::new();

// Ensure exactly cuda-12060 is enabled.
// Prevent accidental change of feature gate without
// updating this required API version used for compatibility check.
// please update the 3 references bellow if cudarc gate is updated to a newer version.
pub const REQUIRED_CUDA_API: i32 = 12060;
#[cfg(not(feature = "cuda-12060"))]
compile_error!(
    "Tract CUDA backend currently supports only cudarc feature 'cuda-12060'. \
        Enabled in Cargo features.",
);

/// CUDA Driver API status code type (CUresult is an enum, but it's ABI-compatible with int).
type CuResult = c_int;

type CuInitFn = unsafe extern "C" fn(flags: u32) -> CuResult;
type CuDriverGetVersionFn = unsafe extern "C" fn(version: *mut c_int) -> CuResult;

#[cfg(target_os = "linux")]
const CUDA_DRIVER_LIB_CANDIDATES: [&str; 2] = ["libcuda.so.1", "libcuda.so"];

#[cfg(target_os = "windows")]
const CUDA_DRIVER_LIB_CANDIDATES: [&str; 1] = ["nvcuda.dll"];

#[cfg(not(any(target_os = "linux", target_os = "windows")))]
const CUDA_DRIVER_LIB_CANDIDATES: [&str; 0] = [];

fn format_cuda_version(v: i32) -> (i32, i32) {
    (v / 1000, (v % 1000) / 10)
}

fn load_first_found_cuda_lib() -> TractResult<Library> {
    for name in CUDA_DRIVER_LIB_CANDIDATES {
        if let Ok(lib) = unsafe { Library::new(name) } {
            return Ok(lib);
        }
    }

    bail!(
        "CUDA driver library not found. Tried: {:?}. \
         Is an NVIDIA driver installed \
         (and, in containers, did you run with GPU passthrough)?",
        CUDA_DRIVER_LIB_CANDIDATES
    )
}

/// Checks that the installed CUDA driver is present and new enough to satisfy REQUIRED_CUDA_API.
///
/// IMPORTANT: call this before touching `cudarc::driver::sys` or any cudarc context creation,
/// otherwise cudarc may panic while eagerly binding symbols.
fn ensure_cuda_driver_compatible() -> TractResult<()> {
    // Load driver library without involving cudarc.
    let lib = load_first_found_cuda_lib()?;

    unsafe {
        // Resolve symbols we need. If these are missing, the driver install is broken or not NVIDIA.
        let cu_init: Symbol<CuInitFn> = lib.get(b"cuInit\0").map_err(|e| {
            format_err!(
                "CUDA driver library loaded, but symbol cuInit is missing. \
                 This does not look like a functional NVIDIA driver installation. Details: {e}"
            )
        })?;

        let cu_driver_get_version: Symbol<CuDriverGetVersionFn> =
            lib.get(b"cuDriverGetVersion\0").map_err(|e| {
                format_err!(
                    "CUDA driver library loaded, but symbol cuDriverGetVersion is missing. \
                     Driver is too old or installation is corrupted. Details: {e}"
                )
            })?;

        // Initialize the driver. This also surfaces "no device / permission / container" issues early.
        let init_res = cu_init(0);
        if init_res != 0 {
            // Don't assume specific numeric codes here; just report the CUresult.
            bail!(
                "CUDA driver initialization failed (cuInit returned {}). \
                 Possible causes: no CUDA-capable device exposed to this process, \
                 missing /dev/nvidia* nodes (container), insufficient permissions, \
                 or a broken driver install.",
                init_res
            );
        }

        // Query driver API version.
        let mut version: c_int = 0;
        let ver_res = cu_driver_get_version(&mut version as *mut _);
        if ver_res != 0 {
            bail!(
                "cuDriverGetVersion failed (returned {}). \
                 NVIDIA driver may be corrupted or not functioning properly.",
                ver_res
            );
        }

        // Compare against required API based on feature gate.
        if version < REQUIRED_CUDA_API {
            let (req_major, req_minor) = format_cuda_version(REQUIRED_CUDA_API);
            let (found_major, found_minor) = format_cuda_version(version);

            bail!(
                "CUDA driver too old.\n\
                 Built with cudarc feature cuda-{} (requires driver API >= {}.{}).\n\
                 Found driver API {}.{}.\n\
                 Fix: upgrade the NVIDIA driver, or rebuild tract-cuda with a lower cuda-XXXXX gate.",
                REQUIRED_CUDA_API,
                req_major,
                req_minor,
                found_major,
                found_minor
            );
        }
    }

    Ok(())
}

fn are_cudarc_required_culibs_present() -> bool {
    *CULIBS_PRESENT.get_or_init(|| unsafe {
        cudarc::driver::sys::is_culib_present()
            && cudarc::runtime::sys::is_culib_present()
            && cudarc::nvrtc::sys::is_culib_present()
            && cudarc::cublas::sys::is_culib_present()
    })
}

pub fn ensure_cuda_runtime_dependencies(context_msg: &'static str) -> TractResult<()> {
    ensure_cuda_driver_compatible()
        .context("CUDA driver validation failed")
        .context(context_msg)?;

    ensure!(
        are_cudarc_required_culibs_present(),
        "{}: CUDA runtime sub-libraries missing/missaligned with cudarc (nvrtc,cublas,cudart).",
        context_msg
    );

    Ok(())
}

pub fn get_ggml_q81_fact(t: &DeviceTensor) -> Option<GgmlQuantQ81Fact> {
    if let DeviceTensor::Owned(t) = t {
        t.exotic_fact().and_then(|of| of.downcast_ref::<GgmlQuantQ81Fact>()).cloned()
    } else if let DeviceTensor::ArenaView(t) = t {
        t.exotic_fact().and_then(|of| of.downcast_ref::<GgmlQuantQ81Fact>()).cloned()
    } else {
        None
    }
}

pub fn pad_q40(bqs: &BlockQuantStorage, m: usize, k: usize) -> TractResult<BlockQuantStorage> {
    ensure!(k % 32 == 0);

    let to_pad = k.next_multiple_of(Q40_ROW_PADDING) - k;
    if to_pad == 0 {
        return Ok(bqs.clone()); // No padding needed
    }

    let row_bytes = k * Q4_0.block_bytes() / Q4_0.block_len();

    let pad_quant = Q4_0.quant_f32(&vec![0f32; to_pad])?;
    let pad_bytes = pad_quant.len();

    let mut new_data = Vec::with_capacity(m * (row_bytes + pad_bytes));
    let old_bytes = bqs.value().as_bytes();

    for row in 0..m {
        let start = row * row_bytes;
        new_data.extend_from_slice(&old_bytes[start..start + row_bytes]);
        new_data.extend_from_slice(&pad_quant);
    }

    BlockQuantStorage::new(
        tract_core::dyn_clone::clone_box(bqs.format()),
        m,
        k + to_pad,
        Arc::new(Blob::from_bytes(&new_data)?),
    )
}


================================================
FILE: data/Cargo.toml
================================================
[package]
name = "tract-data"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
anyhow.workspace = true
downcast-rs.workspace = true
dyn-clone.workspace = true
dyn-eq.workspace = true
dyn-hash.workspace = true
half.workspace = true
itertools.workspace = true
libm.workspace = true
maplit.workspace = true
ndarray.workspace = true
nom.workspace = true
nom-language.workspace = true
num-complex = { workspace = true, optional = true }
num-integer.workspace = true
num-traits.workspace = true
smallvec.workspace = true
lazy_static.workspace = true
scan_fmt.workspace = true
string-interner.workspace = true
parking_lot = "0.12.3"

[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion.workspace = true
proptest.workspace = true

[target.'cfg(target_family = "wasm")'.dev-dependencies]
# Wasm doesn't support the `rayon` feature of criterion
criterion = { version = "0.8", default-features = false, features = ["plotters", "cargo_bench_support"] }
# Wasm doesn't support the `fork` feature of proptest.
proptest = { version = "1.0.0", default-features = false, features = ["std", "bit-set"] }

[features]
complex = [ "num-complex" ]

[[bench]]
name = "tensor_from_datum"
harness = false

[[bench]]
name = "stack_tensors"
harness = false


================================================
FILE: data/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: data/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: data/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: data/benches/stack_tensors.rs
================================================
#[macro_use]
extern crate criterion;
extern crate tract_data;

use criterion::Criterion;
use tract_data::internal::*;

fn inceptionv3_5b(c: &mut Criterion) {
    c.bench_function("inceptionv3_5b", |b| {
        b.iter_with_setup(
            || unsafe {
                vec![
                    Tensor::uninitialized_dt(DatumType::F32, &[1, 35, 35, 64]).unwrap(),
                    Tensor::uninitialized_dt(DatumType::F32, &[1, 35, 35, 64]).unwrap(),
                    Tensor::uninitialized_dt(DatumType::F32, &[1, 35, 35, 96]).unwrap(),
                    Tensor::uninitialized_dt(DatumType::F32, &[1, 35, 35, 32]).unwrap(),
                ]
            },
            |input| Tensor::stack_tensors(3, &input),
        );
    });
}

criterion_group!(benches, inceptionv3_5b);
criterion_main!(benches);


================================================
FILE: data/benches/tensor_from_datum.rs
================================================
#[macro_use]
extern crate criterion;
extern crate tract_data;

use criterion::Criterion;
use tract_data::internal::*;

fn rank_4(c: &mut Criterion) {
    c.bench_function("rank_4", |b| {
        b.iter_with_setup(
            || {
                tract_ndarray::Array4::from_shape_simple_fn((256, 35, 35, 1), || 1.0f32)
                    .permuted_axes([3, 2, 1, 0])
            },
            Tensor::from,
        );
    });
}

criterion_group!(benches, rank_4);
criterion_main!(benches);


================================================
FILE: data/src/blob.rs
================================================
use num_traits::Zero;

use crate::{TractError, TractResult};
use std::alloc::*;
use std::fmt::Display;
use std::hash::Hash;
use std::ptr::null_mut;

#[derive(Eq)]
pub struct Blob {
    layout: std::alloc::Layout,
    data: *mut u8,
}

impl Default for Blob {
    #[inline]
    fn default() -> Blob {
        Blob::from_bytes(&[]).unwrap()
    }
}

impl Clone for Blob {
    #[inline]
    fn clone(&self) -> Self {
        Blob::from_bytes_alignment(self, self.layout.align()).unwrap()
    }
}

impl Drop for Blob {
    #[inline]
    fn drop(&mut self) {
        if !self.data.is_null() {
            unsafe { dealloc(self.data, self.layout) }
        }
    }
}

impl PartialEq for Blob {
    #[inline]
    fn eq(&self, other: &Self) -> bool {
        self.layout == other.layout && self.as_bytes() == other.as_bytes()
    }
}

impl Hash for Blob {
    #[inline]
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.layout.align().hash(state);
        self.as_bytes().hash(state);
    }
}

impl Blob {
    #[inline]
    pub unsafe fn new_for_size_and_align(size: usize, align: usize) -> Blob {
        unsafe { Self::for_layout(Layout::from_size_align_unchecked(size, align)) }
    }

    #[inline]
    pub unsafe fn ensure_size_and_align(&mut self, size: usize, align: usize) {
        if size > self.layout.size() || align > self.layout.align() {
            if !self.data.is_null() {
                unsafe { std::alloc::dealloc(self.data as _, self.layout) };
            }
            self.layout = unsafe { Layout::from_size_align_unchecked(size, align) };
            self.data = unsafe { std::alloc::alloc(self.layout) };
            assert!(!self.data.is_null());
        }
    }

    #[inline]
    pub unsafe fn for_layout(layout: Layout) -> Blob {
        let mut data = null_mut();
        if layout.size() > 0 {
            data = unsafe { alloc(layout) };
            assert!(!data.is_null(), "failed to allocate {layout:?}");
        }
        Blob { layout, data }
    }

    #[inline]
    pub fn from_bytes(s: &[u8]) -> TractResult<Blob> {
        Self::from_bytes_alignment(s, 128)
    }

    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        if self.data.is_null() {
            &[]
        } else {
            unsafe { std::slice::from_raw_parts(self.data, self.layout.size()) }
        }
    }

    #[inline]
    pub fn as_bytes_mut(&mut self) -> &mut [u8] {
        if self.data.is_null() {
            &mut []
        } else {
            unsafe { std::slice::from_raw_parts_mut(self.data, self.layout.size()) }
        }
    }

    #[inline]
    pub fn from_bytes_alignment(s: &[u8], alignment: usize) -> TractResult<Blob> {
        unsafe {
            let layout = Layout::from_size_align(s.len(), alignment)?;
            let blob = Self::for_layout(layout);
            if s.len() > 0 {
                std::ptr::copy_nonoverlapping(s.as_ptr(), blob.data, s.len());
            }
            Ok(blob)
        }
    }

    #[inline]
    pub fn layout(&self) -> &Layout {
        &self.layout
    }
}

impl std::ops::Deref for Blob {
    type Target = [u8];
    #[inline]
    fn deref(&self) -> &[u8] {
        self.as_bytes()
    }
}

impl std::ops::DerefMut for Blob {
    #[inline]
    fn deref_mut(&mut self) -> &mut [u8] {
        self.as_bytes_mut()
    }
}

impl std::fmt::Display for Blob {
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        assert!(self.data.is_null() == self.layout.size().is_zero());
        write!(
            fmt,
            "Blob of {} bytes (align @{}): {} {}",
            self.len(),
            self.layout.align(),
            String::from_utf8(
                self.iter()
                    .take(20)
                    .copied()
                    .flat_map(std::ascii::escape_default)
                    .collect::<Vec<u8>>()
            )
            .unwrap(),
            if self.len() >= 20 { "[...]" } else { "" }
        )
    }
}

impl std::fmt::Debug for Blob {
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        <Self as Display>::fmt(self, fmt)
    }
}

impl TryFrom<&[u8]> for Blob {
    type Error = TractError;
    #[inline]
    fn try_from(s: &[u8]) -> Result<Blob, Self::Error> {
        Blob::from_bytes(s)
    }
}

unsafe impl Send for Blob {}
unsafe impl Sync for Blob {}


================================================
FILE: data/src/datum.rs
================================================
//! `Tensor` is the main data container for tract
use crate::TVec;
use crate::dim::TDim;
use crate::internal::*;
use crate::tensor::Tensor;
use half::f16;
#[cfg(feature = "complex")]
use num_complex::Complex;
use scan_fmt::scan_fmt;
use std::fmt;
use std::hash::Hash;

use num_traits::AsPrimitive;

#[derive(Copy, Clone, PartialEq)]
pub enum QParams {
    MinMax { min: f32, max: f32 },
    ZpScale { zero_point: i32, scale: f32 },
}

impl Eq for QParams {}

impl Ord for QParams {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        use QParams::*;
        match (self, other) {
            (MinMax { .. }, ZpScale { .. }) => std::cmp::Ordering::Less,
            (ZpScale { .. }, MinMax { .. }) => std::cmp::Ordering::Greater,
            (MinMax { min: min1, max: max1 }, MinMax { min: min2, max: max2 }) => {
                min1.total_cmp(min2).then_with(|| max1.total_cmp(max2))
            }
            (
                Self::ZpScale { zero_point: zp1, scale: s1 },
                Self::ZpScale { zero_point: zp2, scale: s2 },
            ) => zp1.cmp(zp2).then_with(|| s1.total_cmp(s2)),
        }
    }
}

impl PartialOrd for QParams {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Default for QParams {
    fn default() -> Self {
        QParams::ZpScale { zero_point: 0, scale: 1. }
    }
}

#[allow(clippy::derived_hash_with_manual_eq)]
impl Hash for QParams {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        match self {
            QParams::MinMax { min, max } => {
                0.hash(state);
                min.to_bits().hash(state);
                max.to_bits().hash(state);
            }
            QParams::ZpScale { zero_point, scale } => {
                1.hash(state);
                zero_point.hash(state);
                scale.to_bits().hash(state);
            }
        }
    }
}

impl QParams {
    pub fn zp_scale(&self) -> (i32, f32) {
        match self {
            QParams::MinMax { min, max } => {
                let scale = (max - min) / 255.;
                ((-(min + max) / 2. / scale) as i32, scale)
            }
            QParams::ZpScale { zero_point, scale } => (*zero_point, *scale),
        }
    }

    pub fn q(&self, f: f32) -> i32 {
        let (zp, scale) = self.zp_scale();
        (f / scale) as i32 + zp
    }

    pub fn dq(&self, i: i32) -> f32 {
        let (zp, scale) = self.zp_scale();
        (i - zp) as f32 * scale
    }
}

impl std::fmt::Debug for QParams {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let (zp, scale) = self.zp_scale();
        write!(f, "Z:{zp} S:{scale}")
    }
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum DatumType {
    Bool,
    U8,
    U16,
    U32,
    U64,
    I8,
    I16,
    I32,
    I64,
    F16,
    F32,
    F64,
    TDim,
    Blob,
    String,
    QI8(QParams),
    QU8(QParams),
    QI32(QParams),
    #[cfg(feature = "complex")]
    ComplexI16,
    #[cfg(feature = "complex")]
    ComplexI32,
    #[cfg(feature = "complex")]
    ComplexI64,
    #[cfg(feature = "complex")]
    ComplexF16,
    #[cfg(feature = "complex")]
    ComplexF32,
    #[cfg(feature = "complex")]
    ComplexF64,
}

impl DatumType {
    pub fn super_types(&self) -> TVec<DatumType> {
        use DatumType::*;
        if *self == String || *self == TDim || *self == Blob || *self == Bool || self.is_quantized()
        {
            return tvec!(*self);
        }
        #[cfg(feature = "complex")]
        if self.is_complex_float() {
            return [ComplexF16, ComplexF32, ComplexF64]
                .iter()
                .filter(|s| s.size_of() >= self.size_of())
                .copied()
                .collect();
        } else if self.is_complex_signed() {
            return [ComplexI16, ComplexI32, ComplexI64]
                .iter()
                .filter(|s| s.size_of() >= self.size_of())
                .copied()
                .collect();
        }
        if self.is_float() {
            [F16, F32, F64].iter().filter(|s| s.size_of() >= self.size_of()).copied().collect()
        } else if self.is_signed() {
            [I8, I16, I32, I64, TDim]
                .iter()
                .filter(|s| s.size_of() >= self.size_of())
                .copied()
                .collect()
        } else {
            [U8, U16, U32, U64].iter().filter(|s| s.size_of() >= self.size_of()).copied().collect()
        }
    }

    pub fn super_type_for(
        i: impl IntoIterator<Item = impl std::borrow::Borrow<DatumType>>,
    ) -> Option<DatumType> {
        let mut iter = i.into_iter();
        let mut current = match iter.next() {
            None => return None,
            Some(it) => *it.borrow(),
        };
        for n in iter {
            match current.common_super_type(*n.borrow()) {
                None => return None,
                Some(it) => current = it,
            }
        }
        Some(current)
    }

    pub fn common_super_type(&self, rhs: DatumType) -> Option<DatumType> {
        for mine in self.super_types() {
            for theirs in rhs.super_types() {
                if mine == theirs {
                    return Some(mine);
                }
            }
        }
        None
    }

    pub fn is_unsigned(&self) -> bool {
        matches!(
            self.unquantized(),
            DatumType::U8 | DatumType::U16 | DatumType::U32 | DatumType::U64
        )
    }

    pub fn is_signed(&self) -> bool {
        matches!(
            self.unquantized(),
            DatumType::I8 | DatumType::I16 | DatumType::I32 | DatumType::I64
        )
    }

    pub fn is_float(&self) -> bool {
        matches!(self, DatumType::F16 | DatumType::F32 | DatumType::F64)
    }

    pub fn is_number(&self) -> bool {
        self.is_signed() | self.is_unsigned() | self.is_float() | self.is_quantized()
    }

    pub fn is_tdim(&self) -> bool {
        *self == DatumType::TDim
    }

    #[cfg(feature = "complex")]
    pub fn is_complex(&self) -> bool {
        self.is_complex_float() || self.is_complex_signed()
    }

    #[cfg(feature = "complex")]
    pub fn is_complex_float(&self) -> bool {
        matches!(self, DatumType::ComplexF16 | DatumType::ComplexF32 | DatumType::ComplexF64)
    }

    #[cfg(feature = "complex")]
    pub fn is_complex_signed(&self) -> bool {
        matches!(self, DatumType::ComplexI16 | DatumType::ComplexI32 | DatumType::ComplexI64)
    }

    #[cfg(feature = "complex")]
    pub fn complexify(&self) -> TractResult<DatumType> {
        match *self {
            DatumType::I16 => Ok(DatumType::ComplexI16),
            DatumType::I32 => Ok(DatumType::ComplexI32),
            DatumType::I64 => Ok(DatumType::ComplexI64),
            DatumType::F16 => Ok(DatumType::ComplexF16),
            DatumType::F32 => Ok(DatumType::ComplexF32),
            DatumType::F64 => Ok(DatumType::ComplexF64),
            _ => bail!("No complex datum type formed on {:?}", self),
        }
    }

    #[cfg(feature = "complex")]
    pub fn decomplexify(&self) -> TractResult<DatumType> {
        match *self {
            DatumType::ComplexI16 => Ok(DatumType::I16),
            DatumType::ComplexI32 => Ok(DatumType::I32),
            DatumType::ComplexI64 => Ok(DatumType::I64),
            DatumType::ComplexF16 => Ok(DatumType::F16),
            DatumType::ComplexF32 => Ok(DatumType::F32),
            DatumType::ComplexF64 => Ok(DatumType::F64),
            _ => bail!("{:?} is not a complex type", self),
        }
    }

    pub fn is_copy(&self) -> bool {
        #[cfg(feature = "complex")]
        if self.is_complex() {
            return true;
        }
        *self == DatumType::Bool || self.is_unsigned() || self.is_signed() || self.is_float()
    }

    pub fn is_quantized(&self) -> bool {
        self.qparams().is_some()
    }

    pub fn qparams(&self) -> Option<QParams> {
        match self {
            DatumType::QI8(qparams) | DatumType::QU8(qparams) | DatumType::QI32(qparams) => {
                Some(*qparams)
            }
            _ => None,
        }
    }

    pub fn with_qparams(&self, qparams: QParams) -> DatumType {
        match self {
            DatumType::QI8(_) => DatumType::QI8(qparams),
            DatumType::QU8(_) => DatumType::QI8(qparams),
            DatumType::QI32(_) => DatumType::QI32(qparams),
            _ => *self,
        }
    }

    pub fn quantize(&self, qparams: QParams) -> DatumType {
        match self {
            DatumType::I8 => DatumType::QI8(qparams),
            DatumType::U8 => DatumType::QU8(qparams),
            DatumType::I32 => DatumType::QI32(qparams),
            DatumType::QI8(_) => DatumType::QI8(qparams),
            DatumType::QU8(_) => DatumType::QU8(qparams),
            DatumType::QI32(_) => DatumType::QI32(qparams),
            _ => panic!("Can't quantize {self:?}"),
        }
    }

    #[inline(always)]
    pub fn zp_scale(&self) -> (i32, f32) {
        self.qparams().map(|q| q.zp_scale()).unwrap_or((0, 1.))
    }

    #[inline(always)]
    pub fn with_zp_scale(&self, zero_point: i32, scale: f32) -> DatumType {
        self.quantize(QParams::ZpScale { zero_point, scale })
    }

    pub fn unquantized(&self) -> DatumType {
        match self {
            DatumType::QI8(_) => DatumType::I8,
            DatumType::QU8(_) => DatumType::U8,
            DatumType::QI32(_) => DatumType::I32,
            _ => *self,
        }
    }

    pub fn integer(signed: bool, size: usize) -> Self {
        use DatumType::*;
        match (signed, size) {
            (false, 8) => U8,
            (false, 16) => U16,
            (false, 32) => U32,
            (false, 64) => U64,
            (true, 8) => U8,
            (true, 16) => U16,
            (true, 32) => U32,
            (true, 64) => U64,
            _ => panic!("No integer for signed:{signed} size:{size}"),
        }
    }

    pub fn is_integer(&self) -> bool {
        self.is_signed() || self.is_unsigned()
    }

    #[inline]
    pub fn size_of(&self) -> usize {
        dispatch_datum!(std::mem::size_of(self)())
    }

    pub fn min_value(&self) -> Tensor {
        match self {
            DatumType::QU8(_)
            | DatumType::U8
            | DatumType::U16
            | DatumType::U32
            | DatumType::U64 => Tensor::zero_dt(*self, &[1]).unwrap(),
            DatumType::I8 | DatumType::QI8(_) => tensor0(i8::MIN),
            DatumType::QI32(_) => tensor0(i32::MIN),
            DatumType::I16 => tensor0(i16::MIN),
            DatumType::I32 => tensor0(i32::MIN),
            DatumType::I64 => tensor0(i64::MIN),
            DatumType::F16 => tensor0(f16::MIN),
            DatumType::F32 => tensor0(f32::MIN),
            DatumType::F64 => tensor0(f64::MIN),
            _ => panic!("No min value for datum type {self:?}"),
        }
    }
    pub fn max_value(&self) -> Tensor {
        match self {
            DatumType::U8 | DatumType::QU8(_) => tensor0(u8::MAX),
            DatumType::U16 => tensor0(u16::MAX),
            DatumType::U32 => tensor0(u32::MAX),
            DatumType::U64 => tensor0(u64::MAX),
            DatumType::I8 | DatumType::QI8(_) => tensor0(i8::MAX),
            DatumType::I16 => tensor0(i16::MAX),
            DatumType::I32 => tensor0(i32::MAX),
            DatumType::I64 => tensor0(i64::MAX),
            DatumType::QI32(_) => tensor0(i32::MAX),
            DatumType::F16 => tensor0(f16::MAX),
            DatumType::F32 => tensor0(f32::MAX),
            DatumType::F64 => tensor0(f64::MAX),
            _ => panic!("No max value for datum type {self:?}"),
        }
    }

    pub fn is<D: Datum>(&self) -> bool {
        *self == D::datum_type()
    }
}

impl std::str::FromStr for DatumType {
    type Err = TractError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if let Ok((z, s)) = scan_fmt!(s, "QU8(Z:{d} S:{f})", i32, f32) {
            Ok(DatumType::QU8(QParams::ZpScale { zero_point: z, scale: s }))
        } else if let Ok((z, s)) = scan_fmt!(s, "QI8(Z:{d} S:{f})", i32, f32) {
            Ok(DatumType::QI8(QParams::ZpScale { zero_point: z, scale: s }))
        } else if let Ok((z, s)) = scan_fmt!(s, "QI32(Z:{d} S:{f})", i32, f32) {
            Ok(DatumType::QI32(QParams::ZpScale { zero_point: z, scale: s }))
        } else {
            match s {
                "I8" | "i8" => Ok(DatumType::I8),
                "I16" | "i16" => Ok(DatumType::I16),
                "I32" | "i32" => Ok(DatumType::I32),
                "I64" | "i64" => Ok(DatumType::I64),
                "U8" | "u8" => Ok(DatumType::U8),
                "U16" | "u16" => Ok(DatumType::U16),
                "U32" | "u32" => Ok(DatumType::U32),
                "U64" | "u64" => Ok(DatumType::U64),
                "F16" | "f16" => Ok(DatumType::F16),
                "F32" | "f32" => Ok(DatumType::F32),
                "F64" | "f64" => Ok(DatumType::F64),
                "Bool" | "bool" => Ok(DatumType::Bool),
                "Blob" | "blob" => Ok(DatumType::Blob),
                "String" | "string" => Ok(DatumType::String),
                "TDim" | "tdim" => Ok(DatumType::TDim),
                #[cfg(feature = "complex")]
                "ComplexI16" | "complexi16" => Ok(DatumType::ComplexI16),
                #[cfg(feature = "complex")]
                "ComplexI32" | "complexi32" => Ok(DatumType::ComplexI32),
                #[cfg(feature = "complex")]
                "ComplexI64" | "complexi64" => Ok(DatumType::ComplexI64),
                #[cfg(feature = "complex")]
                "ComplexF16" | "complexf16" => Ok(DatumType::ComplexF16),
                #[cfg(feature = "complex")]
                "ComplexF32" | "complexf32" => Ok(DatumType::ComplexF32),
                #[cfg(feature = "complex")]
                "ComplexF64" | "complexf64" => Ok(DatumType::ComplexF64),
                _ => bail!("Unknown type {}", s),
            }
        }
    }
}

const TOINT: f32 = 1.0f32 / f32::EPSILON;

pub fn round_ties_to_even(x: f32) -> f32 {
    let u = x.to_bits();
    let e = (u >> 23) & 0xff;
    if e >= 0x7f + 23 {
        return x;
    }
    let s = u >> 31;
    let y = if s == 1 { x - TOINT + TOINT } else { x + TOINT - TOINT };
    if y == 0.0 { if s == 1 { -0f32 } else { 0f32 } } else { y }
}

#[inline]
pub fn scale_by<T: Datum + AsPrimitive<f32>>(b: T, a: f32) -> T
where
    f32: AsPrimitive<T>,
{
    let b = b.as_();
    (round_ties_to_even(b.abs() * a) * b.signum()).as_()
}

pub trait ClampCast: PartialOrd + Copy + 'static {
    #[inline(always)]
    fn clamp_cast<O>(self) -> O
    where
        Self: AsPrimitive<O> + Datum,
        O: AsPrimitive<Self> + num_traits::Bounded + Datum,
    {
        // this fails if we're upcasting, in which case clamping is useless
        if O::min_value().as_() < O::max_value().as_() {
            num_traits::clamp(self, O::min_value().as_(), O::max_value().as_()).as_()
        } else {
            self.as_()
        }
    }
}
impl<T: PartialOrd + Copy + 'static> ClampCast for T {}

pub trait Datum:
    Clone + Send + Sync + fmt::Debug + fmt::Display + Default + 'static + PartialEq
{
    fn name() -> &'static str;
    fn datum_type() -> DatumType;
    fn is<D: Datum>() -> bool;
}

macro_rules! datum {
    ($t:ty, $v:ident) => {
        impl From<$t> for Tensor {
            fn from(it: $t) -> Tensor {
                tensor0(it)
            }
        }

        impl Datum for $t {
            fn name() -> &'static str {
                stringify!($t)
            }

            fn datum_type() -> DatumType {
                DatumType::$v
            }

            fn is<D: Datum>() -> bool {
                Self::datum_type() == D::datum_type()
            }
        }
    };
}

datum!(bool, Bool);
datum!(f16, F16);
datum!(f32, F32);
datum!(f64, F64);
datum!(i8, I8);
datum!(i16, I16);
datum!(i32, I32);
datum!(i64, I64);
datum!(u8, U8);
datum!(u16, U16);
datum!(u32, U32);
datum!(u64, U64);
datum!(TDim, TDim);
datum!(String, String);
datum!(crate::blob::Blob, Blob);
#[cfg(feature = "complex")]
datum!(Complex<i16>, ComplexI16);
#[cfg(feature = "complex")]
datum!(Complex<i32>, ComplexI32);
#[cfg(feature = "complex")]
datum!(Complex<i64>, ComplexI64);
#[cfg(feature = "complex")]
datum!(Complex<f16>, ComplexF16);
#[cfg(feature = "complex")]
datum!(Complex<f32>, ComplexF32);
#[cfg(feature = "complex")]
datum!(Complex<f64>, ComplexF64);

#[cfg(test)]
mod tests {
    use crate::internal::*;
    use ndarray::arr1;

    #[test]
    fn test_array_to_tensor_to_array() {
        let array = arr1(&[12i32, 42]);
        let tensor = Tensor::from(array.clone());
        let view = tensor.to_plain_array_view::<i32>().unwrap();
        assert_eq!(array, view.into_dimensionality().unwrap());
    }

    #[test]
    fn test_cast_dim_to_dim() {
        let t_dim: Tensor = tensor1(&[12isize.to_dim(), 42isize.to_dim()]);
        let t_i32 = t_dim.cast_to::<i32>().unwrap();
        let t_dim_2 = t_i32.cast_to::<TDim>().unwrap().into_owned();
        assert_eq!(t_dim, t_dim_2);
    }

    #[test]
    fn test_cast_i32_to_dim() {
        let t_i32: Tensor = tensor1(&[0i32, 12]);
        t_i32.cast_to::<TDim>().unwrap();
    }

    #[test]
    fn test_cast_i64_to_bool() {
        let t_i64: Tensor = tensor1(&[0i64]);
        t_i64.cast_to::<bool>().unwrap();
    }

    #[test]
    fn test_parse_qu8() {
        assert_eq!(
            "QU8(Z:128 S:0.01)".parse::<DatumType>().unwrap(),
            DatumType::QU8(QParams::ZpScale { zero_point: 128, scale: 0.01 })
        );
    }
}


================================================
FILE: data/src/dim/assertion.rs
================================================
use fmt::Display;

use super::*;

#[derive(Debug, PartialEq, Clone, Hash)]
#[allow(clippy::upper_case_acronyms)]
pub enum Assertion {
    Eq(TDim, TDim),
    LT(TDim, TDim),
    GT(TDim, TDim),
    LTE(TDim, TDim),
    GTE(TDim, TDim),
}

impl Display for Assertion {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use Assertion::*;
        match self {
            Eq(l, r) => write!(f, "{l} == {r}"),
            LT(l, r) => write!(f, "{l} < {r}"),
            GT(l, r) => write!(f, "{l} > {r}"),
            LTE(l, r) => write!(f, "{l} <= {r}"),
            GTE(l, r) => write!(f, "{l} >= {r}"),
        }
    }
}

impl Assertion {
    pub fn as_known_positive(&self) -> Option<TDim> {
        use Assertion::*;
        match self {
            Eq(left, right) => Some(left.clone() - right),
            GTE(left, right) => Some(left.clone() - right),
            GT(left, right) => Some(left.clone() - 1 - right),
            LTE(left, right) => Some(right.clone() - left),
            LT(left, right) => Some(right.clone() - 1 - left),
        }
    }

    pub fn check(&self, values: &SymbolValues) -> Option<bool> {
        use Assertion::*;
        match self {
            Eq(left, right) => {
                (left.eval(values) - right.eval(values)).to_i64().ok().map(|d| d == 0)
            }
            GTE(left, right) => {
                (left.eval(values) - right.eval(values)).to_i64().ok().map(|d| d >= 0)
            }
            GT(left, right) => {
                (left.eval(values) - right.eval(values)).to_i64().ok().map(|d| d > 0)
            }
            LTE(left, right) => {
                (left.eval(values) - right.eval(values)).to_i64().ok().map(|d| d <= 0)
            }
            LT(left, right) => {
                (left.eval(values) - right.eval(values)).to_i64().ok().map(|d| d < 0)
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn use_equalities() {
        let s = SymbolScope::default();
        s.add_assertion("s==0").unwrap();
        assert!(s.parse_tdim("s").unwrap().simplify().is_zero());
    }

    #[test]
    fn prove_positive_with_axiom() {
        let s = SymbolScope::default();
        s.add_assertion("s>=0").unwrap();
        assert!(s.parse_tdim("s").unwrap().prove_positive_or_zero());
    }

    #[test]
    fn prove_positive_with_axiom_2() {
        let s = SymbolScope::default();
        s.add_assertion("s>=0").unwrap();
        s.add_assertion("p>=0").unwrap();
        s.add_assertion("p+s<4096").unwrap();
        assert!(s.parse_tdim("4096-p").unwrap().prove_positive_or_zero());
    }

    #[test]
    fn min_max_with_axiom() {
        let symbols = SymbolScope::default();
        symbols.add_assertion("a>=0").unwrap();
        assert_eq!(symbols.parse_tdim("min(a,0)").unwrap().simplify(), 0.into());
        assert_eq!(
            symbols.parse_tdim("max(a,0)").unwrap().simplify(),
            symbols.parse_tdim("a").unwrap()
        );
    }

    #[test]
    fn low_bound_0() -> TractResult<()> {
        let symbols = SymbolScope::default().with_assertion("S>=0")?;
        let s = symbols.parse_tdim("S").unwrap();
        assert_eq!(s.low_inclusive_bound(), Some(0));
        Ok(())
    }

    #[test]
    fn low_bound_1() -> TractResult<()> {
        let symbols = SymbolScope::default().with_assertion("S>0")?;
        assert_eq!(symbols.parse_tdim("S").unwrap().low_inclusive_bound(), Some(1));
        Ok(())
    }

    #[test]
    fn low_bound_2() -> TractResult<()> {
        let symbols = SymbolScope::default().with_assertion("S>0")?;
        assert_eq!(symbols.parse_tdim("S + 1").unwrap().low_inclusive_bound(), Some(2));
        Ok(())
    }

    #[test]
    fn low_bound_3() -> TractResult<()> {
        let symbols = SymbolScope::default().with_assertion("S>0")?;
        assert_eq!(symbols.parse_tdim("4*S").unwrap().low_inclusive_bound(), Some(4));
        Ok(())
    }

    #[test]
    fn low_bound_4() -> TractResult<()> {
        let symbols = SymbolScope::default().with_assertion("S>0")?.with_assertion("S>5")?;
        assert_eq!(symbols.parse_tdim("S + 3").unwrap().low_inclusive_bound(), Some(9));
        Ok(())
    }

    #[test]
    fn max_bug_1() {
        let symbols = SymbolScope::default();
        symbols.add_assertion("S>8").unwrap();
        assert_eq!(
            symbols.parse_tdim("max(1,-1+(S+1)/4)").unwrap().simplify(),
            symbols.parse_tdim("-1+(S+1)/4").unwrap(),
        );
    }

    #[test]
    fn min_bug_1() {
        let symbols = SymbolScope::default();
        symbols.add_assertion("S>8").unwrap();
        assert_eq!(
            symbols.parse_tdim("min(1,-1+(S+1)/4)").unwrap().simplify(),
            symbols.parse_tdim("1").unwrap()
        );
    }

    #[test]
    fn min_bug_2() {
        let symbols = SymbolScope::default();
        symbols.add_assertion("S>50").unwrap();
        assert_eq!(
            symbols.parse_tdim("min(-3+2*(S+1)/4,-1+(S+1)/4)").unwrap().simplify(),
            symbols.parse_tdim("-1+(S+1)/4").unwrap()
        );
    }

    #[test]
    fn min_bug_3() {
        let symbols = SymbolScope::default();
        symbols.add_assertion("S>=0").unwrap();
        symbols.add_assertion("P>=0").unwrap();
        assert_eq!(
            symbols.parse_tdim("min(0,(S)#(P+S))").unwrap().simplify(),
            symbols.parse_tdim("0").unwrap()
        );
    }

    #[test]
    fn guess_scenario() -> TractResult<()> {
        let symbols = SymbolScope::default()
            .with_assertion("S>=0")?
            .with_assertion("P>=0")?
            .with_scenario_assertion("tg", "S==1")?
            .with_scenario_assertion("pp", "P==0")?;
        let s = symbols.sym("S");
        let p = symbols.sym("P");
        assert_eq!(symbols.guess_scenario(&SymbolValues::default())?, None);
        assert_eq!(symbols.guess_scenario(&SymbolValues::default().with(&s, 50))?, Some(1));
        assert_eq!(symbols.guess_scenario(&SymbolValues::default().with(&p, 50))?, Some(0));
        assert!(
            symbols.guess_scenario(&SymbolValues::default().with(&p, 50).with(&s, 50)).is_err()
        );
        Ok(())
    }

    #[test]
    fn min_llm_0() -> TractResult<()> {
        let symbols = SymbolScope::default()
            .with_assertion("S>=0")?
            .with_assertion("P>=0")?
            .with_scenario_assertion("tg", "S==1")?
            .with_scenario_assertion("pp", "P==0")?;
        assert_eq!(
            symbols.parse_tdim("min(P,(S)#(P+S))").unwrap().simplify(),
            symbols.parse_tdim("P").unwrap()
        );
        Ok(())
    }
}


================================================
FILE: data/src/dim/mod.rs
================================================
//! Extended dimension support
use crate::internal::*;
use num_traits::{One, Zero};
use std::fmt;
use std::ops;

mod assertion;
mod parse;
mod resolve;
mod sym;
mod tree;

pub use self::assertion::Assertion;
pub use self::parse::parse_tdim;
pub use self::resolve::solve_for;
pub use self::sym::{Symbol, SymbolScope, SymbolValues};
pub use self::tree::{TDim, TooEarly};

use crate::{TractError, TractResult};

/// A super-trait for value acting as tensor dimensions in tract.
///
/// Implemented by:
///
/// * `usize` for regular dimensions
/// * `TDim` supporting regular and streaming dimensions
pub trait DimLike:
    Clone
    + Default
    + PartialEq
    + From<usize>
    + for<'a> std::convert::TryFrom<&'a TDim, Error = TractError>
    + ::num_traits::Zero
    + fmt::Debug
    + fmt::Display
    + std::hash::Hash
    + ops::Add<Self, Output = Self>
    + ops::Add<usize, Output = Self>
    + for<'a> ops::Add<&'a Self, Output = Self>
    + ops::Sub<Self, Output = Self>
    + ops::Sub<usize, Output = Self>
    + for<'a> ops::Sub<&'a Self, Output = Self>
    + ops::Mul<Self, Output = Self>
    + ops::Mul<usize, Output = Self>
    + for<'a> ops::Mul<&'a Self, Output = Self>
    + ops::Div<usize, Output = Self>
    + ops::Rem<usize, Output = Self>
    + Send
    + Sync
    + 'static
    + std::iter::Sum
    + std::iter::Product
    + ToDim
    + One
{
    fn maybe_div(&self, other: &Self) -> TractResult<(Self, u64)>;

    /// Integer divise, rounding up to next integer.
    fn divceil(&self, other: usize) -> Self {
        (self.clone() + other - 1) / other
    }

    /// Convert to regular integer.
    fn to_i64(&self) -> TractResult<i64>;

    fn to_usize(&self) -> TractResult<usize> {
        self.to_i64().map(|d| d as usize)
    }

    fn to_isize(&self) -> TractResult<isize> {
        self.to_i64().map(|d| d as isize)
    }

    fn to_i32(&self) -> TractResult<i32> {
        self.to_i64().map(|d| d as i32)
    }

    /// Substitute as many symbols as possible in the dim value.
    fn eval(&self, values: &SymbolValues) -> Self;

    /// Full evaluation of the symbol, failing if a symbol is missing
    fn eval_to_i64(&self, values: &SymbolValues) -> TractResult<i64>;

    fn substitute(&self, from: &Symbol, to: &Self) -> TractResult<Self>;
    fn substitute_all(&self, map: &std::collections::HashMap<Symbol, Self>) -> TractResult<Self>;

    fn broadcast(self, other: Self) -> TractResult<Self>;
    fn mini(self, other: Self) -> Self;
    fn maxi(self, other: Self) -> Self;

    fn compatible_with(&self, other: &Self) -> bool;
}

impl DimLike for TDim {
    fn maybe_div(&self, other: &Self) -> TractResult<(Self, u64)> {
        if self.is_zero() {
            return Ok((TDim::zero(), 1));
        } else if other.is_zero() {
            bail!("Division by zero")
        }
        fn expand(dim: &TDim) -> (i64, Vec<TDim>) {
            match dim {
                TDim::Mul(terms) => terms.iter().map(expand).fold((1i64, vec![]), |acc, t| {
                    (acc.0 * t.0, acc.1.into_iter().chain(t.1).collect())
                }),
                TDim::MulInt(a, terms) => {
                    let (b, v) = expand(terms);
                    (a * b, v)
                }
                TDim::Val(x) => (*x, vec![]),
                TDim::Add(terms) => {
                    let gcd =
                        terms.iter().map(expand).map(|(n, _)| n).reduce(|a, b| a.gcd(&b)).unwrap();
                    (
                        gcd,
                        vec![TDim::Add(terms.iter().map(|t| t.clone() / gcd).collect()).simplify()],
                    )
                }
                it => (1, vec![it.clone()]),
            }
        }
        let (mut num_int, mut num) = expand(self);
        let (mut denum_int, mut denum) = expand(other);
        if num == denum {
            num = vec![];
            denum = vec![];
        }
        for it in denum {
            if let Some(pos) = num.iter().position(|n| n == &it) {
                num.remove(pos);
            } else {
                bail!("Can't divide {} by {}", self, other)
            }
        }
        use num_integer::Integer;
        if denum_int < 0 {
            num_int *= -1;
            denum_int *= -1;
        }
        let gcd = num_int.gcd(&denum_int);
        num_int /= gcd;
        denum_int /= gcd;
        Ok(((TDim::Mul(num) * num_int).reduce(), denum_int as u64))
    }

    fn to_i64(&self) -> TractResult<i64> {
        TDim::to_i64(self)
    }

    fn eval(&self, values: &SymbolValues) -> Self {
        self.eval(values)
    }

    fn substitute(&self, from: &Symbol, to: &Self) -> TractResult<Self> {
        self.substitute(from, to)
    }

    fn substitute_all(&self, map: &std::collections::HashMap<Symbol, Self>) -> TractResult<Self> {
        TDim::substitute_all(self, map)
    }

    fn eval_to_i64(&self, values: &SymbolValues) -> TractResult<i64> {
        TDim::eval_to_i64(self, values)
    }

    fn broadcast(self, other: Self) -> TractResult<Self> {
        if self.is_one() {
            Ok(other)
        } else if other.is_one() {
            Ok(self)
        } else {
            Ok(TDim::Broadcast(vec![self, other]).simplify())
        }
    }

    fn compatible_with(&self, other: &Self) -> bool {
        self.compatible_with(other)
    }

    fn mini(self, other: Self) -> Self {
        TDim::Min(vec![self, other]).simplify()
    }

    fn maxi(self, other: Self) -> Self {
        TDim::Max(vec![self, other]).simplify()
    }
}

impl<'a> std::convert::TryFrom<&'a TDim> for TDim {
    type Error = TractError;
    fn try_from(d: &'a TDim) -> TractResult<TDim> {
        Ok(d.clone())
    }
}

impl DimLike for usize {
    fn maybe_div(&self, other: &Self) -> TractResult<(Self, u64)> {
        use num_integer::Integer;
        let gcd = self.gcd(other);
        Ok((self / gcd, (other / gcd) as u64))
    }

    fn to_i64(&self) -> TractResult<i64> {
        Ok(*self as i64)
    }

    fn eval(&self, _values: &SymbolValues) -> Self {
        *self
    }

    fn substitute(&self, _from: &Symbol, _to: &Self) -> TractResult<Self> {
        Ok(*self)
    }

    fn substitute_all(&self, _map: &std::collections::HashMap<Symbol, Self>) -> TractResult<Self> {
        Ok(*self)
    }

    fn eval_to_i64(&self, _: &SymbolValues) -> TractResult<i64> {
        Ok(*self as i64)
    }

    fn broadcast(self, other: Self) -> TractResult<Self> {
        if self == 1 || self == other {
            Ok(other)
        } else if other == 1 {
            Ok(self)
        } else {
            bail!("Can not broadcast {self} against {other}")
        }
    }

    fn compatible_with(&self, other: &Self) -> bool {
        self == other
    }

    fn mini(self, other: Self) -> Self {
        if self < other { self } else { other }
    }

    fn maxi(self, other: Self) -> Self {
        if self > other { self } else { other }
    }
}

impl<'a> std::convert::TryFrom<&'a TDim> for usize {
    type Error = TractError;
    fn try_from(d: &'a TDim) -> TractResult<usize> {
        d.to_usize()
    }
}

/// Convenience trait to convert values to TDim.
pub trait ToDim {
    /// Convert self to a TDim.
    fn to_dim(&self) -> TDim;
}

impl<I: Into<TDim> + Clone> ToDim for I {
    fn to_dim(&self) -> TDim {
        self.clone().into()
    }
}

impl ToDim for &TDim {
    fn to_dim(&self) -> TDim {
        (*self).clone()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    lazy_static::lazy_static! {
        static ref S: (SymbolScope, Symbol) = {
            let table = SymbolScope::default();
            let s = table.new_with_prefix("S");
            (table, s)
        };
    }

    pub fn s() -> TDim {
        S.1.clone().into()
    }

    #[test]
    fn div() {
        assert_eq!(TDim::from(12).maybe_div(&TDim::from(4)).unwrap(), (3.into(), 1));
    }

    #[test]
    fn div_sym_int() {
        assert_eq!((s() * 12).maybe_div(&TDim::from(4)).unwrap(), (s() * 3, 1));
    }

    #[test]
    fn div_sym_sym() {
        assert_eq!((s() * 12).maybe_div(&(s() * 4)).unwrap(), (3.into(), 1));
    }

    #[test]
    fn div_sym_sym_ratio() {
        assert_eq!((s() * 13).maybe_div(&(s() * 4)).unwrap(), (13.into(), 4));
    }

    #[test]
    fn div_sym_sym_rem() {
        assert!((s() + 1).maybe_div(&(s() * 4)).is_err());
    }

    #[test]
    fn div_sym_sym_simply_1() {
        assert_eq!((s()).maybe_div(&(s())).unwrap(), (TDim::Val(1), 1));
    }

    #[test]
    fn div_sym_sym_complex() {
        let s = s();
        let b = S.0.sym("b");
        assert_eq!(
            (256.to_dim() * &s * &b).maybe_div(&(1.to_dim() * &s * &b)).unwrap(),
            (256.into(), 1)
        );
    }

    #[test]
    fn div_sym_sym_with_add() {
        assert_eq!((s() * 80 - 160).maybe_div(&(s() - 2)).unwrap(), (80.into(), 1));
    }
}


================================================
FILE: data/src/dim/parse.rs
================================================
use super::*;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::{alpha1, alphanumeric1, digit1, one_of};
use nom::combinator::{all_consuming, map, map_res, recognize};
use nom::multi::{fold, many0, separated_list0};
use nom::sequence::{delimited, pair, preceded, separated_pair};
use nom::{IResult, Parser};
use nom_language::error::VerboseError;

type R<'i, O> = IResult<&'i str, O, VerboseError<&'i str>>;

pub fn parse_tdim(symbol_table: &SymbolScope, input: &str) -> TractResult<TDim> {
    match all_consuming(|i| expr(symbol_table, i)).parse(input) {
        Ok(pair) => Ok(pair.1),
        Err(e) => bail!("Failed to parse {:?}, {:?}", input, e),
    }
}

pub fn parse_assertion(symbol_table: &SymbolScope, input: &str) -> TractResult<Assertion> {
    match all_consuming(|i| assertion(symbol_table, i)).parse(input) {
        Ok(pair) => Ok(pair.1),
        Err(e) => bail!("Failed to parse {:?}, {:?}", input, e),
    }
}

fn assertion<'i>(s: &SymbolScope, i: &'i str) -> R<'i, Assertion> {
    delimited(
        spaces,
        alt((
            map(separated_pair(|i| expr(s, i), stag("=="), |i| expr(s, i)), |(a, b)| {
                Assertion::Eq(a, b)
            }),
            map(separated_pair(|i| expr(s, i), stag("<="), |i| expr(s, i)), |(a, b)| {
                Assertion::LTE(a, b)
            }),
            map(separated_pair(|i| expr(s, i), stag(">="), |i| expr(s, i)), |(a, b)| {
                Assertion::GTE(a, b)
            }),
            map(separated_pair(|i| expr(s, i), stag("<"), |i| expr(s, i)), |(a, b)| {
                Assertion::LT(a, b)
            }),
            map(separated_pair(|i| expr(s, i), stag(">"), |i| expr(s, i)), |(a, b)| {
                Assertion::GT(a, b)
            }),
        )),
        spaces,
    )
    .parse(i)
}

fn expr<'i>(symbol_table: &SymbolScope, i: &'i str) -> R<'i, TDim> {
    broadcast(symbol_table, i)
}

fn broadcast<'i>(symbol_table: &SymbolScope, input: &'i str) -> R<'i, TDim> {
    let s = symbol_table;
    let (mut input, mut result) = add(s, input)?;
    while let Ok((i, _)) = stag("#").parse(input) {
        let (i, next) = map_res(|i| add(s, i), |v| result.clone().broadcast(v)).parse(i)?;
        (input, result) = (i, next);
    }
    Ok((input, result))
}

macro_rules! bin {
    ($name: ident, $left: expr, $right: expr, $op: expr, $builder: expr) => {
        fn $name<'i>(symbol_table: &SymbolScope, input: &'i str) -> R<'i, TDim> {
            let s = symbol_table;
            let (input, result) = $left(s, input)?;
            fold(0.., preceded(stag($op), |i| $right(s, i)), move || result.clone(), $builder)
                .parse(input)
        }
    };
}

bin!(add, sub, sub, "+", |a, b| a + b);
bin!(sub, mul, mul, "-", |a, b| a - b);
bin!(mul, div, div, "*", |a, b| a * b);
bin!(div, atom, |_s, i| numeric(i), "/", |a, b| a / b);

fn atom<'i>(symbol_table: &SymbolScope, i: &'i str) -> R<'i, TDim> {
    alt((
        map(numeric, TDim::Val),
        map(|i| func(symbol_table, "min", i), TDim::Min),
        map(|i| func(symbol_table, "max", i), TDim::Max),
        map(|i| func(symbol_table, "floor", i), |xs| xs[0].clone()),
        map(|i| identifier(symbol_table, i), TDim::Sym),
        map(pair(recognize(stag("-")), |i| atom(symbol_table, i)), |(_, dim)| dim * -1),
        delimited(stag("("), |i| expr(symbol_table, i), stag(")")),
    ))
    .parse(i)
}

fn func<'i>(symbol_table: &SymbolScope, name: &'static str, i: &'i str) -> R<'i, Vec<TDim>> {
    preceded(
        stag(name),
        delimited(stag("("), separated_list0(stag(","), |i| expr(symbol_table, i)), stag(")")),
    )
    .parse(i)
}

fn identifier<'i>(symbol_table: &SymbolScope, i: &'i str) -> R<'i, Symbol> {
    map(
        recognize(pair(alt((alpha1, tag("_"))), many0(alt((alphanumeric1, tag("_"), tag(".")))))),
        |s| symbol_table.sym(s),
    )
    .parse(i)
}

fn numeric(i: &str) -> R<'_, i64> {
    map_res(digit1, std::str::FromStr::from_str).parse(i)
}

fn spaces(i: &str) -> R<'_, ()> {
    map(many0(one_of(" \t\n\r")), |_| ()).parse(i)
}

fn spaced<'s, O, P>(it: P) -> impl Parser<&'s str, Output = O, Error = VerboseError<&'s str>>
where
    P: Parser<&'s str, Output = O, Error = VerboseError<&'s str>>,
{
    delimited(spaces, it, spaces)
}

pub(super) fn stag<'s>(
    t: &'static str,
) -> impl Parser<&'s str, Output = &'s str, Error = VerboseError<&'s str>> {
    spaced(tag(t))
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn parse_int() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "12").unwrap(), TDim::Val(12));
        assert_eq!(parse_tdim(&table, "-12").unwrap(), TDim::Val(-12));
    }

    #[test]
    fn parse_sym() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "x").unwrap(), TDim::Sym(table.sym("x")));
        assert_eq!(
            parse_tdim(&table, "-y").unwrap(),
            TDim::MulInt(-1, Box::new(table.sym("y").into()))
        );
    }

    #[test]
    fn parse_bin() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "1+2").unwrap(), 3.into());
        assert_eq!(parse_tdim(&table, "1-2").unwrap(), (-1).into());
        assert_eq!(parse_tdim(&table, "1*2").unwrap(), 2.into());
        assert_eq!(parse_tdim(&table, "1/2").unwrap(), 0.into());
    }

    #[test]
    fn parse_prio() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "1+2*3").unwrap(), 7.into());
        assert_eq!(parse_tdim(&table, "1*2+3").unwrap(), 5.into());
    }

    #[test]
    fn parse_min() {
        let table = SymbolScope::default();
        assert_eq!(
            parse_tdim(&table, "min(P,S)").unwrap(),
            TDim::Min(vec!(table.sym("P").into(), table.sym("S").into()))
        );
    }

    #[test]
    fn parse_inequality_0() {
        let table = SymbolScope::default();
        assert_eq!(
            parse_assertion(&table, "P+S<4096").unwrap(),
            Assertion::LT(parse_tdim(&table, "P+S").unwrap(), 4096.to_dim())
        );
    }

    #[test]
    fn parse_dot_ids() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "dot.0").unwrap(), table.sym("dot.0").into());
    }

    #[test]
    fn parse_dot_ids_arith() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "dot.0/2").unwrap(), table.sym("dot.0").to_dim() / 2);
    }

    #[test]
    fn parse_floors() {
        let table = SymbolScope::default();
        assert_eq!(parse_tdim(&table, "floor(a)").unwrap(), table.sym("a").to_dim());
    }
}


================================================
FILE: data/src/dim/resolve.rs
================================================
use tract_num_traits::Zero;

use crate::internal::*;

pub fn solve_for(sym: &Symbol, left: &TDim, right: &TDim) -> Option<TDim> {
    if !left.symbols().contains(sym) && !right.symbols().contains(sym) {
        return None;
    }
    if right.symbols().contains(sym) {
        return solve_for(sym, &(left.clone() - right), &0.to_dim());
    }
    match left {
        TDim::Sym(s) => {
            if s == sym {
                Some(right.clone())
            } else {
                None
            }
        }
        TDim::Add(terms) => {
            let consts: TDim = terms.iter().filter(|t| !t.symbols().contains(sym)).sum();
            if consts.is_zero() {
                None
            } else {
                solve_for(sym, &(left.clone() - &consts), &(right.clone() - &consts))
            }
        }
        TDim::MulInt(z, a) => {
            let gcd = right.gcd();
            if gcd % z.unsigned_abs() == 0 {
                solve_for(sym, a, &(right.clone() / *z))
            } else {
                None
            }
        }
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::{SymbolScope, parse_tdim};

    lazy_static::lazy_static!(
        static ref TABLE:SymbolScope = SymbolScope::default();
        static ref A:Symbol = TABLE.sym("a");
    );

    fn p(s: &str) -> TDim {
        parse_tdim(&TABLE, s).unwrap()
    }

    #[test]
    fn trivial() {
        assert_eq!(solve_for(&A, &p("a"), &p("3")), Some(3i32.to_dim()));
    }

    #[test]
    fn negative() {
        assert_eq!(solve_for(&A, &p("a + 3"), &p("0")), Some(-(3i32.to_dim())));
    }

    #[test]
    fn swap() {
        assert_eq!(solve_for(&A, &p("3"), &p("a")), Some(3i32.to_dim()));
    }

    #[test]
    fn scale() {
        assert_eq!(solve_for(&A, &p("3 * a"), &p("6")), Some(2.to_dim()));
    }

    #[test]
    fn ax_plus_b() {
        assert_eq!(solve_for(&A, &p("3 * a + 1"), &p("7")), Some(2.to_dim()));
    }

    #[test]
    fn both_sides() {
        assert_eq!(solve_for(&A, &p("3 * a + 1"), &p("2 * a")), Some((-1).to_dim()));
    }

    #[test]
    fn x_over_n() {
        assert_eq!(solve_for(&A, &p("a/4"), &p("2")), None);
    }

    #[test]
    fn with_symbols() {
        assert_eq!(solve_for(&A, &p("a + 1"), &p("b")), Some(p("b-1")));
    }
}


================================================
FILE: data/src/dim/sym.rs
================================================
use itertools::Itertools;
use parking_lot::ReentrantMutex;
use std::cell::RefCell;
use std::collections::HashMap;
use std::fmt::{self, Display};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Weak};
use string_interner::DefaultStringInterner;
use string_interner::Symbol as _;

use crate::TractResult;

use super::parse::parse_assertion;
use super::{Assertion, TDim, parse_tdim};

static SCOPE_COUNTER: AtomicUsize = AtomicUsize::new(0);

#[derive(Clone, Default)]
pub struct SymbolScope(pub Arc<ReentrantMutex<RefCell<SymbolScopeData>>>);

impl PartialEq for SymbolScope {
    fn eq(&self, other: &Self) -> bool {
        Arc::ptr_eq(&self.0, &other.0)
    }
}

impl Eq for SymbolScope {}

pub struct SymbolScopeData {
    id: usize,
    table: DefaultStringInterner,
    assertions: Vec<Assertion>,
    scenarios: Vec<(String, Vec<Assertion>)>,
}

impl Default for SymbolScopeData {
    fn default() -> Self {
        SymbolScopeData {
            id: SCOPE_COUNTER.fetch_add(1, Ordering::Relaxed),
            table: DefaultStringInterner::default(),
            assertions: Vec::new(),
            scenarios: Vec::new(),
        }
    }
}

impl SymbolScope {
    pub fn id(&self) -> usize {
        let locked = self.0.lock();
        let locked = locked.borrow();
        locked.id
    }

    pub fn proof_cache_session(&self) -> ProofCacheSession {
        ProofCacheSession::new(self.id())
    }

    pub fn get(&self, name: &str) -> Option<Symbol> {
        let locked = self.0.lock();
        let locked = locked.borrow();
        locked.table.get(name).map(|sym| Symbol(Arc::downgrade(&self.0), sym))
    }

    /// Get or create the coordinate symbol for axis `k` (named "🎯{k}").
    pub fn coord_sym(&self, k: usize) -> Symbol {
        self.sym(&format!("🎯{k}"))
    }

    pub fn sym(&self, name: &str) -> Symbol {
        let locked = self.0.lock();
        let mut locked = locked.borrow_mut();
        let sym = locked.table.get_or_intern(name);
        Symbol(Arc::downgrade(&self.0), sym)
    }

    pub fn new_with_prefix(&self, prefix: &str) -> Symbol {
        let locked = self.0.lock();
        let mut locked = locked.borrow_mut();
        let sym = if locked.table.get(prefix).is_none() {
            locked.table.get_or_intern(prefix)
        } else {
            let mut i = 0;
            loop {
                let s = format!("{prefix}_{i}");
                if locked.table.get(&s).is_none() {
                    break locked.table.get_or_intern(s);
                }
                i += 1;
            }
        };
        Symbol(Arc::downgrade(&self.0), sym)
    }

    pub fn parse_tdim(&self, input: impl AsRef<str>) -> TractResult<TDim> {
        parse_tdim(self, input.as_ref())
    }

    pub fn add_assertion(&self, assert: impl Into<String>) -> TractResult<()> {
        let assert = assert.into();
        let assert = parse_assertion(self, &assert)?;
        let locked = self.0.lock();
        let mut locked = locked.borrow_mut();
        locked.assertions.push(assert);
        Ok(())
    }

    pub fn with_assertion(self, assert: impl Into<String>) -> TractResult<Self> {
        self.add_assertion(assert)?;
        Ok(self)
    }

    pub fn all_assertions(&self) -> Vec<Assertion> {
        let locked = self.0.lock();
        let locked = locked.borrow();
        locked.assertions.clone()
    }

    pub fn all_scenarios(&self) -> impl IntoIterator<Item = (String, Vec<Assertion>)> {
        let locked = self.0.lock();
        let locked = locked.borrow();
        locked.scenarios.clone()
    }

    pub fn add_scenario(&self, scenario: impl Into<String>) -> TractResult<()> {
        let locked = self.0.lock();
        let mut locked = locked.borrow_mut();
        let s = scenario.into();
        if !locked.scenarios.iter().any(|sc| sc.0 == s) {
            locked.scenarios.push((s, vec![]));
        }
        Ok(())
    }

    pub fn add_scenario_assertion(
        &self,
        scenario: impl Into<String>,
        assertion: impl Into<String>,
    ) -> TractResult<()> {
        let assert = parse_assertion(self, &assertion.into())?;
        let s = scenario.into();
        let locked = self.0.lock();
        let mut locked = locked.borrow_mut();
        if let Some(s) = locked.scenarios.iter_mut().find(|sc| sc.0 == s) {
            s.1.push(assert);
        } else {
            locked.scenarios.push((s, vec![assert]));
        }
        Ok(())
    }

    pub fn with_scenario_assertion(
        self,
        scenario: impl Into<String>,
        assertion: impl Into<String>,
    ) -> TractResult<Self> {
        self.add_scenario_assertion(scenario, assertion)?;
        Ok(self)
    }

    pub fn with_scenario(self, scenario: impl Into<String>) -> TractResult<Self> {
        self.add_scenario(scenario)?;
        Ok(self)
    }

    pub fn all_symbols(&self) -> Vec<Symbol> {
        self.0
            .lock()
            .borrow()
            .table
            .into_iter()
            .map(|is| Symbol(Arc::downgrade(&self.0), is.0))
            .collect()
    }

    pub fn guess_scenario(&self, values: &SymbolValues) -> TractResult<Option<usize>> {
        let locked = self.0.lock();
        let locked = locked.borrow();
        if locked.scenarios.len() == 0 {
            return Ok(None);
        }
        let mut maybe = None;
        for (ix, (_name, assertions)) in locked.scenarios.iter().enumerate() {
            if assertions.iter().any(|a| a.check(values) == Some(false)) {
                continue;
            } else if assertions.iter().all(|a| a.check(values) == Some(true)) {
                return Ok(Some(ix));
            } else if maybe.is_none() {
                maybe = Some(ix);
            } else {
                return Ok(None);
            }
        }
        if maybe.is_some() {
            Ok(maybe)
        } else {
            anyhow::bail!("No possible scenario");
        }
    }
}

thread_local! {
    static PROOF_CACHE: RefCell<Option<ProofCache>> = const { RefCell::new(None) };
}

struct ProofCache {
    scope_id: usize,
    depth: usize,
    cache: HashMap<TDim, bool>,
}

pub struct ProofCacheSession {
    active: bool,
}

impl ProofCacheSession {
    pub fn new(scope_id: usize) -> Self {
        let active = PROOF_CACHE.with(|cell| {
            let mut borrow = cell.borrow_mut();
            match &mut *borrow {
                None => {
                    *borrow = Some(ProofCache { scope_id, depth: 1, cache: HashMap::new() });
                    true
                }
                Some(pc) if pc.scope_id == scope_id => {
                    pc.depth += 1;
                    true
                }
                Some(_) => false,
            }
        });
        ProofCacheSession { active }
    }
}

impl Drop for ProofCacheSession {
    fn drop(&mut self) {
        if !self.active {
            return;
        }
        PROOF_CACHE.with(|cell| {
            let mut borrow = cell.borrow_mut();
            if let Some(pc) = &mut *borrow {
                pc.depth -= 1;
                if pc.depth == 0 {
                    *borrow = None;
                }
            }
        });
    }
}

impl SymbolScopeData {
    pub fn all_assertions(&self) -> &[Assertion] {
        &self.assertions
    }

    pub fn assertions(&self, scenario: Option<&str>) -> impl Iterator<Item = &'_ Assertion> {
        self.assertions.iter().chain(
            scenario
                .and_then(|s| self.scenarios.iter().find(|s2| s2.0 == s))
                .map(|s| &*s.1)
                .unwrap_or(&[])
                .iter(),
        )
    }

    pub fn scenarios(&self) -> impl Iterator<Item = &'_ str> {
        self.scenarios.iter().map(|s| &*s.0)
    }

    pub fn scenario(&self, s: &str) -> impl Iterator<Item = &'_ Assertion> {
        self.scenarios.iter().find(|sc| sc.0 == s).map(|sc| &*sc.1).unwrap_or(&[]).iter()
    }

    pub fn resolving<R>(&self, sym: &Symbol, f: impl FnOnce(&str) -> R) -> Option<R> {
        self.table.resolve(sym.1).map(f)
    }

    #[allow(clippy::mutable_key_type)]
    pub fn prove_positive_or_zero(&self, t: &TDim) -> bool {
        if let TDim::Val(v) = t {
            return *v >= 0;
        }
        let cached = PROOF_CACHE.with(|cell| {
            let borrow = cell.borrow();
            if let Some(pc) = &*borrow {
                debug_assert_eq!(pc.scope_id, self.id, "ProofCacheSession scope_id mismatch");
                pc.cache.get(t).copied()
            } else {
                None
            }
        });
        if let Some(result) = cached {
            return result;
        }
        let result = self.prove_positive_or_zero_inner(t);
        PROOF_CACHE.with(|cell| {
            let mut borrow = cell.borrow_mut();
            if let Some(pc) = &mut *borrow {
                pc.cache.insert(t.clone(), result);
            }
        });
        result
    }

    #[allow(clippy::mutable_key_type)]
    fn prove_positive_or_zero_inner(&self, t: &TDim) -> bool {
        self.prove_positive_or_zero_inner_with_extra(t, &[])
    }

    #[allow(clippy::mutable_key_type)]
    fn prove_positive_or_zero_inner_with_extra(&self, t: &TDim, extra: &[Assertion]) -> bool {
        let positives = self
            .assertions
            .iter()
            .chain(extra.iter())
            .filter_map(|i| i.as_known_positive())
            .collect_vec();
        let mut visited = vec![];
        let mut todo = vec![t.clone()];
        while let Some(t) = todo.pop() {
            if t.to_i64().is_ok_and(|i| i >= 0) {
                return true;
            }
            if t.inclusive_bound(self, false).is_some_and(|l| l >= 0) {
                return true;
            }
            // Div(a, q) with q >= 1 is non-negative whenever a is non-negative.
            if let TDim::Div(a, q) = &t {
                if *q >= 1 && self.prove_positive_or_zero_inner_with_extra(a, extra) {
                    return true;
                }
            }
            let syms = t.symbols();
            for s in syms {
                let me = t.guess_slope(&s);
                for pos in &positives {
                    if pos.symbols().contains(&s) {
                        let other = pos.guess_slope(&s);
                        if me.0.signum() == other.0.signum() {
                            let new = t.clone() * me.1 * other.0.abs()
                                - pos.clone() * me.0.abs() * other.1;
                            if !visited.contains(&new) {
                                todo.push(new);
                            }
                        }
                    }
                }
            }
            visited.push(t);
            if visited.len() > 10 {
                break;
            }
        }
        false
    }

    pub(crate) fn prove_positive_or_zero_with_extra(&self, t: &TDim, extra: &[Assertion]) -> bool {
        if let TDim::Val(v) = t {
            return *v >= 0;
        }
        // Skip the proof cache for extra-assertion calls (cache is keyed without extra context)
        self.prove_positive_or_zero_inner_with_extra(t, extra)
    }

    pub(crate) fn prove_strict_positive_with_extra(&self, b: &TDim, extra: &[Assertion]) -> bool {
        self.prove_positive_or_zero_with_extra(&(b.clone() - 1), extra)
    }
}

impl fmt::Debug for SymbolScope {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let locked = self.0.lock();
        let locked = locked.borrow();
        write!(
            f,
            "symbols: {}; assertions: {}; {}",
            locked.table.into_iter().map(|(_, s)| s).sorted().join(", "),
            locked.assertions.iter().map(|s| s.to_string()).sorted().join(", "),
            locked
                .scenarios
                .iter()
                .map(|s| format!(
                    "{}: {}",
                    s.0,
                    s.1.iter().map(|s| s.to_string()).sorted().join(", ")
                ))
                .join(" ; "),
        )
    }
}

#[derive(Clone)]
pub struct Symbol(Weak<ReentrantMutex<RefCell<SymbolScopeData>>>, string_interner::DefaultSymbol);

impl Eq for Symbol {}

impl PartialEq for Symbol {
    fn eq(&self, other: &Self) -> bool {
        self.1 == other.1
    }
}

impl Symbol {
    pub fn scope(&self) -> Option<SymbolScope> {
        self.0.upgrade().map(SymbolScope)
    }
}

impl PartialOrd for Symbol {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for Symbol {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.1.cmp(&other.1)
    }
}

impl std::hash::Hash for Symbol {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.1.hash(state)
    }
}

impl std::fmt::Display for Symbol {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if let Some(scope) = self.scope() {
            let lock = scope.0.lock();
            let lock = lock.borrow();
            if let Some(s) = lock.table.resolve(self.1) {
                return write!(f, "{s}");
            }
        }
        write!(f, "<Sym{}>", self.1.to_usize())
    }
}

impl fmt::Debug for Symbol {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        Display::fmt(&self, f)
    }
}

#[derive(Clone, Debug, Default)]
pub struct SymbolValues {
    values: HashMap<Symbol, i64>,
}

impl SymbolValues {
    pub fn with(mut self, s: &Symbol, v: i64) -> Self {
        self.set(s, v);
        self
    }

    pub fn set(&mut self, s: &Symbol, v: i64) {
        self.values.insert(s.clone(), v);
    }

    pub fn get(&self, s: &Symbol) -> Option<i64> {
        self.values.get(s).copied()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn as_known_positive_gte() {
        let s = SymbolScope::default();
        assert_eq!(
            parse_assertion(&s, "S>=0").unwrap().as_known_positive(),
            Some(s.parse_tdim("S").unwrap())
        );
    }

    #[test]
    fn as_known_positive_gt() {
        let s = SymbolScope::default();
        assert_eq!(
            parse_assertion(&s, "S>0").unwrap().as_known_positive(),
            Some(s.parse_tdim("S-1").unwrap())
        );
    }

    #[test]
    fn as_known_positive_lte() {
        let s = SymbolScope::default();
        assert_eq!(
            parse_assertion(&s, "S<=0").unwrap().as_known_positive(),
            Some(s.parse_tdim("-S").unwrap())
        );
    }

    #[test]
    fn as_known_positive_lt() {
        let s = SymbolScope::default();
        assert_eq!(
            parse_assertion(&s, "S<0").unwrap().as_known_positive(),
            Some(s.parse_tdim("-S - 1").unwrap())
        );
    }

    #[test]
    fn prove_positive_0() {
        let s = SymbolScope::default();
        assert!(s.parse_tdim("0").unwrap().prove_positive_or_zero());
    }

    #[test]
    fn prove_positive_1() {
        let s = SymbolScope::default();
        assert!(s.parse_tdim("1").unwrap().prove_positive_or_zero());
    }

    #[test]
    fn prove_positive_neg1() {
        let s = SymbolScope::default();
        assert!(!s.parse_tdim("-1").unwrap().prove_positive_or_zero());
    }

    #[test]
    fn prove_positive_add_0() {
        let s = SymbolScope::default();
        assert!(!s.parse_tdim("s+1").unwrap().prove_positive_or_zero());
    }
}


================================================
FILE: data/src/dim/tree.rs
================================================
use crate::dim::Assertion;
use crate::internal::*;

use super::{DimLike, sym::*};
use itertools::Itertools;
use num_integer::Integer;
use num_traits::{AsPrimitive, PrimInt, Zero};
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::ops::Neg;
use std::{fmt, ops};

#[derive(Debug)]
pub enum TooEarly {
    UndeterminedSymbol(String),
    Other(String),
}

impl std::fmt::Display for TooEarly {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            TooEarly::UndeterminedSymbol(s) => write!(f, "Undetermined symbol in expression: {s}"),
            TooEarly::Other(s) => write!(f, "{s}"),
        }
    }
}

impl std::error::Error for TooEarly {}

macro_rules! b( ($e:expr) => { Box::new($e) } );

#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum TDim {
    Val(i64),
    Sym(Symbol),
    Add(Vec<TDim>),
    Mul(Vec<TDim>),
    MulInt(i64, Box<TDim>),
    Div(Box<TDim>, u64),
    Broadcast(Vec<TDim>),
    Min(Vec<TDim>),
    Max(Vec<TDim>),
    /// Comparison: evaluates to 1 (true) or 0 (false). lhs >= rhs
    Ge(Box<TDim>, Box<TDim>),
    /// Comparison: evaluates to 1 (true) or 0 (false). lhs == rhs
    Eq(Box<TDim>, Box<TDim>),
}

use TDim::*;

fn tdim_lexi_order(a: &TDim, b: &TDim) -> Ordering {
    match (a, b) {
        (Sym(a), Sym(b)) => a.cmp(b),
        (Val(a), Val(b)) => a.cmp(b),
        (Add(a), Add(b))
        | (Mul(a), Mul(b))
        | (Broadcast(a), Broadcast(b))
        | (Min(a), Min(b))
        | (Max(a), Max(b)) => a.len().cmp(&b.len()).then(
            a.iter()
                .zip(b.iter())
                .fold(Ordering::Equal, |acc, (a, b)| acc.then_with(|| tdim_lexi_order(a, b))),
        ),
        (MulInt(p, d), MulInt(q, e)) => p.cmp(q).then_with(|| tdim_lexi_order(d, e)),
        (Div(d, p), Div(e, q)) => p.cmp(q).then_with(|| tdim_lexi_order(d, e)),
        (Sym(_), _) => Ordering::Less,
        (_, Sym(_)) => Ordering::Greater,
        (Val(_), _) => Ordering::Less,
        (_, Val(_)) => Ordering::Greater,
        (Add(_), _) => Ordering::Less,
        (_, Add(_)) => Ordering::Greater,
        (Mul(_), _) => Ordering::Less,
        (_, Mul(_)) => Ordering::Greater,
        (MulInt(_, _), _) => Ordering::Less,
        (_, MulInt(_, _)) => Ordering::Greater,
        (Broadcast(_), _) => Ordering::Less,
        (_, Broadcast(_)) => Ordering::Greater,
        (Min(_), _) => Ordering::Less,
        (_, Min(_)) => Ordering::Greater,
        (Max(_), _) => Ordering::Less,
        (_, Max(_)) => Ordering::Greater,
        (Ge(a1, b1), Ge(a2, b2)) | (Eq(a1, b1), Eq(a2, b2)) => {
            tdim_lexi_order(a1, a2).then_with(|| tdim_lexi_order(b1, b2))
        }
        (Ge(_, _) | Eq(_, _), _) => Ordering::Less,
        (_, Ge(_, _) | Eq(_, _)) => Ordering::Greater,
    }
}

impl fmt::Display for TDim {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        match &self {
            Sym(sym) => write!(fmt, "{sym}"),
            Val(it) => write!(fmt, "{it}"),
            Add(it) => write!(fmt, "{}", it.iter().map(|x| format!("{x}")).join("+")),
            Mul(it) => write!(fmt, "{}", it.iter().map(|x| format!("({x})")).join("*")),
            Broadcast(it) => write!(fmt, "{}", it.iter().map(|x| format!("({x})")).join("#")),
            Min(it) => write!(fmt, "min({})", it.iter().map(|x| format!("{x}")).join(",")),
            Max(it) => write!(fmt, "max({})", it.iter().map(|x| format!("{x}")).join(",")),
            MulInt(a, b) => write!(fmt, "{a}*{b}"),
            Div(a, b) => write!(fmt, "({a})/{b}"),
            Ge(a, b) => write!(fmt, "({a}>={b})"),
            Eq(a, b) => write!(fmt, "({a}=={b})"),
        }
    }
}

impl TDim {
    #[inline]
    pub fn is_one(&self) -> bool {
        matches!(self, Val(1))
    }

    #[inline]
    pub fn to_i64(&self) -> TractResult<i64> {
        if let Val(v) = self {
            Ok(*v)
        } else {
            Err(TooEarly::UndeterminedSymbol(self.to_string()))?
        }
    }

    #[inline]
    pub fn as_i64(&self) -> Option<i64> {
        if let Val(v) = self { Some(*v) } else { None }
    }

    pub fn eval_to_i64(&self, values: &SymbolValues) -> TractResult<i64> {
        match self {
            Sym(sym) => {
                let Some(v) = values.get(sym) else {
                    Err(TooEarly::UndeterminedSymbol(self.to_string()))?
                };
                Ok(v)
            }
            Val(v) => Ok(*v),
            Add(terms) => terms.iter().try_fold(0i64, |acc, it| {
                let x = it.eval_to_i64(values)?;
                acc.checked_add(x)
                    .with_context(|| format!("Overflow in TDim addition ({acc} + {x})"))
            }),
            Mul(terms) => terms.iter().try_fold(1i64, |acc, it| {
                let x = it.eval_to_i64(values)?;
                acc.checked_mul(x)
                    .with_context(|| format!("Overflow in TDim multiplication ({acc} * {x})"))
            }),
            Min(terms) => terms
                .iter()
                .try_fold(i64::MAX, |acc, it| it.eval_to_i64(values).map(|x| acc.min(x))),
            Max(terms) => terms
                .iter()
                .try_fold(i64::MIN, |acc, it| it.eval_to_i64(values).map(|x| acc.max(x))),
            Broadcast(terms) => terms.iter().try_fold(1i64, |acc, it| {
                it.eval_to_i64(values)
                    .and_then(|x| ((acc as usize).broadcast(x as usize)).map(|x| x as i64))
            }),
            Div(a, q) => Ok(a.eval_to_i64(values)? / *q as i64),
            MulInt(p, a) => {
                let x = a.eval_to_i64(values)?;
                x.checked_mul(*p)
                    .with_context(|| format!("Overflow in TDim multiplication ({x} * {p})"))
            }
            Ge(a, b) => Ok(if a.eval_to_i64(values)? >= b.eval_to_i64(values)? { 1 } else { 0 }),
            Eq(a, b) => Ok(if a.eval_to_i64(values)? == b.eval_to_i64(values)? { 1 } else { 0 }),
        }
    }

    pub fn eval(&self, values: &SymbolValues) -> TDim {
        match self {
            Sym(sym) => values.get(sym).map(Val).unwrap_or_else(|| Sym(sym.clone())),
            Val(v) => Val(*v),
            Add(terms) => terms.iter().fold(Val(0), |acc, it| -> TDim { acc + it.eval(values) }),
            Mul(terms) => terms.iter().fold(Val(1), |acc, it| -> TDim { acc * it.eval(values) }),
            Min(terms) => {
                terms.iter().fold(Val(i64::MAX), |acc, it| -> TDim { acc.mini(it.eval(values)) })
            }
            Max(terms) => {
                terms.iter().fold(Val(i64::MIN), |acc, it| -> TDim { acc.maxi(it.eval(values)) })
            }
            Broadcast(terms) => terms.iter().fold(Val(1), |acc, it| -> TDim {
                acc.broadcast(it.eval(values)).unwrap_or_else(|_| self.clone())
            }),
            Div(a, q) => a.eval(values) / *q as i64,
            MulInt(p, a) => a.eval(values) * *p,
            Ge(a, b) => {
                let a2 = a.eval(values);
                let b2 = b.eval(values);
                if let (Val(av), Val(bv)) = (&a2, &b2) {
                    Val(if av >= bv { 1 } else { 0 })
                } else {
                    Ge(b!(a2), b!(b2))
                }
            }
            Eq(a, b) => {
                let a2 = a.eval(values);
                let b2 = b.eval(values);
                if let (Val(av), Val(bv)) = (&a2, &b2) {
                    Val(if av == bv { 1 } else { 0 })
                } else {
                    Eq(b!(a2), b!(b2))
                }
            }
        }
    }

    pub fn eval_with_scenario(&self, scenario: &str) -> TDim {
        if let Val(v) = self {
            return Val(*v);
        }
        let scope = self.find_scope().unwrap();
        let scope = scope.0;
        let locked = scope.lock();
        let scope = locked.borrow();
        self.clone().simplify_rec(&scope, Some(scenario), &[])
    }

    pub fn substitute(&self, from: &Symbol, to: &Self) -> TractResult<Self> {
        self.substitute_all(&std::collections::HashMap::from([(from.clone(), to.clone())]))
    }

    pub fn substitute_all(
        &self,
        map: &std::collections::HashMap<Symbol, Self>,
    ) -> TractResult<Self> {
        match self {
            Sym(sym) => Ok(map.get(sym).cloned().unwrap_or_else(|| self.clone())),
            Val(v) => Ok(Val(*v)),
            Add(terms) => terms.iter().try_fold(Val(0), |acc, it| -> TractResult<TDim> {
                Ok(acc + it.substitute_all(map)?)
            }),
            Mul(terms) => terms.iter().try_fold(Val(1), |acc, it| -> TractResult<TDim> {
                Ok(acc * it.substitute_all(map)?)
            }),
            Broadcast(terms) => terms.iter().try_fold(Val(1), |acc, it| -> TractResult<TDim> {
                acc.broadcast(it.substitute_all(map)?)
            }),
            Min(terms) => terms.iter().try_fold(Val(i64::MAX), |acc, it| -> TractResult<TDim> {
                Ok(acc.mini(it.substitute_all(map)?))
            }),
            Max(terms) => terms.iter().try_fold(Val(i64::MIN), |acc, it| -> TractResult<TDim> {
                Ok(acc.maxi(it.substitute_all(map)?))
            }),
            Div(a, q) => Ok(a.substitute_all(map)? / *q as i64),
            MulInt(p, a) => Ok(a.substitute_all(map)? * *p),
            Ge(a, b) => Ok(Ge(b!(a.substitute_all(map)?), b!(b.substitute_all(map)?))),
            Eq(a, b) => Ok(Eq(b!(a.substitute_all(map)?), b!(b.substitute_all(map)?))),
        }
    }

    pub fn reduce(self) -> TDim {
        self.simplify()
            .wiggle()
            .into_iter()
            .sorted_by(tdim_lexi_order)
            .unique()
            .map(|e| e.simplify())
            .min_by_key(|e| e.cost())
            .unwrap()
    }

    fn cost(&self) -> usize {
        use self::TDim::*;
        match self {
            Sym(_) | Val(_) => 1,
            Add(terms) => 2 * terms.iter().map(TDim::cost).sum::<usize>(),
            Mul(terms) => 3 * terms.iter().map(TDim::cost).sum::<usize>(),
            Broadcast(terms) => 4 * terms.iter().map(TDim::cost).sum::<usize>(),
            Min(terms) | Max(terms) => 5 * terms.iter().map(TDim::cost).sum::<usize>(),
            Div(a, _) => 3 * a.cost(),
            MulInt(_, a) => 2 * a.cost(),
            Ge(a, b) | Eq(a, b) => 5 * (a.cost() + b.cost()),
        }
    }

    fn wiggle(&self) -> Vec<TDim> {
        use self::TDim::*;
        match self {
            Sym(_) | Val(_) | Mul(_) | Broadcast(_) | Min(_) | Max(_) | Ge(_, _) | Eq(_, _) => {
                vec![self.clone()]
            }
            Add(terms) => {
                let mut forms = vec![];
                let sub_exprs = terms.iter().map(|e| e.wiggle()).multi_cartesian_product();

                fn first_div_term(terms: &[TDim]) -> Option<(usize, &TDim, u64)> {
                    terms.iter().enumerate().find_map(|(index, t)| match t {
                        Div(numerator, quotient) => Some((index, &**numerator, *quotient)),
                        _ => None,
                    })
                }

                fn generate_new_numerator(
                    div_index: usize,
                    numerator: &TDim,
                    quotient: u64,
                    expr: &[TDim],
                ) -> Vec<TDim> {
                    expr.iter()
                        .enumerate()
                        .map(|(index, term)| {
                            if index == div_index {
                                numerator.clone()
                            } else {
                                MulInt(quotient as i64, Box::new(term.clone()))
                            }
                        })
                        .collect()
                }

                for expr in sub_exprs {
                    if let Some((div_index, numerator, quotient)) = first_div_term(&expr) {
                        let new_numerator =
                            generate_new_numerator(div_index, numerator, quotient, &expr);
                        forms.push(Div(Box::new(Add(new_numerator)), quotient))
                    }

                    forms.push(Add(expr));
                }
                forms
            }
            MulInt(p, a) => a.wiggle().into_iter().map(|a| MulInt(*p, b!(a))).collect(),
            Div(a, q) => {
                let mut forms = vec![];
                for num in a.wiggle() {
                    if let Add(terms) = &num {
                        let (integer, non_integer): (Vec<_>, Vec<_>) =
                            terms.iter().cloned().partition(|a| a.gcd() % q == 0);
                        let mut new_terms = integer.iter().map(|i| i.div(*q)).collect::<Vec<_>>();
                        if non_integer.len() > 0 {
                            new_terms.push(Div(b!(Add(non_integer)), *q));
                        }
                        forms.push(Add(new_terms))
                    }
                    forms.push(Div(b!(num), *q))
                }
                forms
            }
        }
    }

    fn find_any_sym(tdim: &TDim) -> Option<&Symbol> {
        match tdim {
            Val(_) => None,
            Sym(s) => Some(s),
            Add(terms) | Mul(terms) | Min(terms) | Max(terms) | Broadcast(terms) => {
                terms.iter().find_map(Self::find_any_sym)
            }
            MulInt(_, t) | Div(t, _) => Self::find_any_sym(t),
            Ge(a, b) | Eq(a, b) => Self::find_any_sym(a).or_else(|| Self::find_any_sym(b)),
        }
    }

    pub fn find_scope(&self) -> Option<SymbolScope> {
        Self::find_any_sym(self).and_then(|s| s.scope().clone())
    }

    pub fn simplify(self) -> TDim {
        use self::TDim::*;
        if let Ok(v) = self.eval_to_i64(&SymbolValues::default()) {
            return Val(v);
        }
        let Some(scope) = self.find_scope() else {
            return self;
        };
        let scope = scope.0;
        let locked = scope.lock();
        let scope = locked.borrow();
        let it = self.simplify_rec(&scope, None, &[]);
        let mut current: Option<TDim> = None;
        for scenario in scope.scenarios() {
            let v = it.clone().simplify_rec(&scope, Some(scenario), &[]);
            if current.is_some_and(|c| c != v) {
                return it;
            } else {
                current = Some(v);
            }
        }
        current.unwrap_or(it)
    }

    pub fn simplify_with_extra_assertions(self, extra: &[Assertion]) -> TDim {
        use self::TDim::*;
        if extra.is_empty() {
            return self.simplify();
        }
        if let Ok(v) = self.eval_to_i64(&SymbolValues::default()) {
            return Val(v);
        }
        let Some(scope) = self.find_scope() else {
            return self;
        };
        let scope = scope.0;
        let locked = scope.lock();
        let scope = locked.borrow();
        let it = self.simplify_rec(&scope, None, extra);
        let mut current: Option<TDim> = None;
        for scenario in scope.scenarios() {
            let v = it.clone().simplify_rec(&scope, Some(scenario), extra);
            if current.is_some_and(|c| c != v) {
                return it;
            } else {
                current = Some(v);
            }
        }
        current.unwrap_or(it)
    }

    fn simplify_rec(
        self,
        scope: &SymbolScopeData,
        scenario: Option<&str>,
        extra: &[Assertion],
    ) -> TDim {
        match self {
            Add(mut terms) => {
                #[allow(clippy::mutable_key_type)]
                let mut simplified_terms: HashMap<TDim, i64> = HashMap::new();
                // factorize common sub-expr
                while let Some(term) = terms.pop() {
                    let simplified = term.simplify_rec(scope, scenario, extra);
                    match simplified {
                        Val(0) => {} // ignore
                        Add(members) => {
                            terms.extend(members);
                            continue;
                        }
                        Val(value) => *simplified_terms.entry(Val(1)).or_insert(0) += value,
                        MulInt(value, factor) => {
                            *simplified_terms.entry((*factor).clone()).or_insert(0) += value;
                        }
                        n => *simplified_terms.entry(n).or_insert(0) += 1,
                    };
                }

                pub fn evaluate_count(term: TDim, count: i64) -> Option<TDim> {
                    match count {
                        0 => None,
                        _ if term == TDim::Val(1) => Some(TDim::Val(count)),
                        1 => Some(term),
                        _ => Some(TDim::MulInt(count, Box::new(term))),
                    }
                }

                let mut members: Vec<TDim> = simplified_terms
                    .into_iter()
                    .filter_map(|(term, count)| evaluate_count(term, count))
                    .collect();
                members.sort_by(tdim_lexi_order);

                match members.len() {
                    0 => TDim::Val(0),
                    1 => members.into_iter().next().unwrap(),
                    _ => TDim::Add(members),
                }
            }
            Mul(terms) => {
                // in case a term is a multiplication itself, flatten it
                // e.g., (a*b)*c => a*b*c
                let mut flattened_terms = vec![];
                for t in terms {
                    if let Mul(inner_terms) = t.clone().reduce() {
                        flattened_terms.extend(inner_terms);
                    } else {
                        flattened_terms.push(t);
                    }
                }
                let mut terms = flattened_terms;

                let mut gcd = Mul(terms.clone()).gcd() as i64;
                if gcd == 0 {
                    return Val(0);
                }
                terms = if gcd != 1 {
                    terms
                        .into_iter()
                        .map(|t| {
                            let gcd = t.gcd();
                            (t / gcd).simplify_rec(scope, scenario, extra)
                        })
                        .collect()
                } else {
                    terms
                };
                if terms.iter().filter(|t| t == &&Val(-1)).count() % 2 == 1 {
                    gcd = -gcd;
                }
                terms.retain(|t| !t.is_one() && t != &Val(-1));
                terms.sort_by(tdim_lexi_order);

                match (gcd, terms.len()) {
                    (_, 0) => Val(gcd), // Case #1: If 0 variables, return product
                    (0, _) => Val(0),   // Case #2: Result is 0 if coef is 0 (actually
                    // unreachable as we check at the beginning)
                    (1, 1) => terms.remove(0), // Case #3: Product is 1, so return the only term
                    (1, _) => Mul(terms), // Case #4: Product is 1, so return the non-integer terms
                    (_, 1) => MulInt(gcd, Box::new(terms.remove(0))), // Case #5: Single variable, convert to 1 MulInt
                    _ => MulInt(gcd, Box::new(Mul(terms))), // Case #6: Multiple variables, convert to MulInt
                }
            }
            MulInt(coef, expr) => {
                match *expr {
                    MulInt(c2, inner) => {
                        if let Some(c) = coef.checked_mul(c2) {
                            return MulInt(c, inner).simplify_rec(scope, scenario, extra);
                        } else {
                            return MulInt(coef, Box::new(MulInt(c2, inner)));
                        }
                    }
                    Val(v) => {
                        return coef
                            .checked_mul(v)
                            .map(Val)
                            .unwrap_or_else(|| MulInt(coef, Box::new(Val(v))));
                    }
                    _ => {}
                }

                let simplified = expr.simplify_rec(scope, scenario, extra);
                match (coef, simplified) {
                    (0, _) => Val(0), // Case #1: If coef is 0, return 0
                    (1, s) => s,      // Case #2: If coef is 1, return the simplified expression
                    (_, Add(terms)) => Add(terms
                        .into_iter()
                        .map(|term| {
                            MulInt(coef, Box::new(term)).simplify_rec(scope, scenario, extra)
                        })
                        .collect()), // Case #3: If expression is an addition, distribute the coef
                    (c, Val(v)) => {
                        c.checked_mul(v).map(Val).unwrap_or_else(|| MulInt(c, Box::new(Val(v))))
                    } // Case #4: If expression is a value, combine coefs
                    (c, MulInt(v, inner)) => {
                        if let Some(cv) = c.checked_mul(v) {
                            MulInt(cv, inner) // Case #5: If expression is a MulInt, combine coefs
                        } else {
                            MulInt(c, Box::new(MulInt(v, inner)))
                        }
                    }
                    (_, s) => MulInt(coef, Box::new(s)), // Case #6: Otherwise, return the original
                }
            }
            Div(a, q) => {
                if q == 1 {
                    return a.simplify_rec(scope, scenario, extra);
                } else if let Div(a, q2) = *a {
                    return Div(a, q * q2).simplify_rec(scope, scenario, extra);
                }
                let a = a.simplify_rec(scope, scenario, extra);
                if let Val(a) = a {
                    Val(a / q as i64)
                } else if let MulInt(-1, a) = a {
                    MulInt(-1, b!(Div(a, q)))
                } else if let Add(mut terms) = a {
                    if terms
                        .iter()
                        .any(|t| if let MulInt(-1, s) = t { matches!(&**s, Sym(_)) } else { false })
                    {
                        MulInt(
                            -1,
                            b!(Div(
                                b!(Add(terms.into_iter().map(|t| MulInt(-1, b!(t))).collect())
                                    .simplify_rec(scope, scenario, extra)),
                                q
                            )),
                        )
                    } else if let Some(v) =
                        terms.iter().find_map(|t| if let Val(v) = t { Some(*v) } else { None })
                    {
                        let offset = if v >= q as i64 {
                            Some(v / q as i64)
                        } else if v < 0 {
                            Some(-Integer::div_ceil(&-v, &(q as i64)))
                        } else {
                            None
                        };
                        if let Some(val) = offset {
                            terms.push(Val(-val * q as i64));
                            Add(vec![
                                Val(val),
                                Div(b!(Add(terms).simplify_rec(scope, scenario, extra)), q),
                            ])
                        } else {
                            Div(b!(Add(terms)), q)
                        }
                    } else {
                        Div(b!(Add(terms)), q)
                    }
                } else if let MulInt(p, a) = a {
                    if p == q as i64 {
                        a.simplify()
                    } else {
                        let gcd = p.abs().gcd(&(q as i64));
                        if gcd == p {
                            Div(a, q / gcd as u64)
                        } else if gcd == q as i64 {
                            MulInt(p / gcd, a)
                        } else if gcd > 1 {
                            Div(b!(MulInt(p / gcd, a)), q / gcd as u64)
                                .simplify_rec(scope, scenario, extra)
                        } else {
                            Div(b!(MulInt(p, a)), q)
                        }
                    }
                } else {
                    Div(b!(a), q)
                }
            }
            Broadcast(terms) => {
                let mut terms: Vec<TDim> = terms
                    .iter()
                    .map(|s| s.clone().simplify_rec(scope, scenario, extra))
                    .flat_map(|t| if let Broadcast(t) = t { t } else { vec![t] })
                    .filter(|t| !t.is_one())
                    .sorted_by(tdim_lexi_order)
                    .dedup()
                    .collect_vec();
                // a#min(a,b) if a>0 && b>0 => a
                match &*terms {
                    [] => Val(1),
                    [_] => terms.remove(0),
                    [a, Min(m)] | [Min(m), a]
                        if m.contains(a)
                            && m.iter()
                                .all(|t| scope.prove_strict_positive_with_extra(t, extra)) =>
                    {
                        a.clone()
                    }
                    _ => Broadcast(terms),
                }
            }

            Min(terms) => {
                let mut flatten: Vec<TDim> = terms
                    .into_iter()
                    .map(|t| t.simplify_rec(scope, scenario, extra))
                    .flat_map(|t| if let Min(t) = t { t } else { vec![t] })
                    .filter(|t| t != &Val(i64::MAX))
                    .sorted_by(tdim_lexi_order)
                    .dedup()
                    .collect();
                #[allow(clippy::mutable_key_type)]
                let mut redundant = HashSet::<TDim>::default();
                for pair in flatten.iter().permutations(2) {
                    let (a, b) = (pair[0], pair[1]);
                    if redundant.contains(a) || redundant.contains(b) {
                        continue;
                    }
                    let diff = a.clone() - b;
                    if diff.as_i64().is_some_and(|i| i >= 0)
                        || scope.prove_positive_or_zero_with_extra(&diff, extra)
                    {
                        redundant.insert(a.clone());
                    }
                }
                flatten.retain(|t| !redundant.contains(t));
                if flatten.len() == 0 {
                    i64::MAX.to_dim()
                } else if flatten.len() == 1 {
                    flatten.into_iter().next().unwrap()
                } else {
                    Min(flatten)
                }
            }
            Max(terms) => {
                let mut flatten: Vec<TDim> = terms
                    .into_iter()
                    .map(|t| t.simplify_rec(scope, scenario, extra))
                    .flat_map(|t| if let Max(t) = t { t } else { vec![t] })
                    .filter(|t| t != &Val(i64::MIN))
                    .sorted_by(tdim_lexi_order)
                    .dedup()
                    .collect();
                #[allow(clippy::mutable_key_type)]
                let mut redundant = HashSet::<TDim>::default();
                for pair in flatten.iter().permutations(2) {
                    let (a, b) = (pair[0], pair[1]);
                    if redundant.contains(a) || redundant.contains(b) {
                        continue;
                    }
                    let diff = a.clone() - b;
                    if diff.as_i64().is_some_and(|i| i >= 0)
                        || scope.prove_positive_or_zero_with_extra(&diff, extra)
                    {
                        redundant.insert(b.clone());
                    }
                }
                flatten.retain(|t| !redundant.contains(t));
                if flatten.len() == 0 {
                    i64::MIN.to_dim()
                } else if flatten.len() == 1 {
                    flatten.into_iter().next().unwrap()
                } else {
                    Max(flatten)
                }
            }
            Sym(s) => scope
                .assertions(scenario)
                .find_map(|a| match a {
                    Assertion::Eq(Sym(sym), v) if sym == &s => Some(v.clone()),
                    _ => None,
                })
                .unwrap_or(Sym(s)),
            Val(_) => self,
            Ge(a, b) => {
                let a = a.simplify_rec(scope, scenario, extra);
                let b = b.simplify_rec(scope, scenario, extra);
                match (&a, &b) {
                    (Val(av), Val(bv)) => Val(if av >= bv { 1 } else { 0 }),
                    _ => {
                        let diff = a.clone() - b.clone();
                        if scope.prove_positive_or_zero_with_extra(&diff, extra) {
                            Val(1)
                        } else if scope
                            .prove_strict_positive_with_extra(&(b.clone() - a.clone()), extra)
                        {
                            Val(0)
                        } else {
                            Ge(b!(a), b!(b))
                        }
                    }
                }
            }
            Eq(a, b) => {
                let a = a.simplify_rec(scope, scenario, extra);
                let b = b.simplify_rec(scope, scenario, extra);
                match (&a, &b) {
                    (Val(av), Val(bv)) => Val(if av == bv { 1 } else { 0 }),
                    _ => {
                        let diff = a.clone() - b.clone();
                        if scope.prove_strict_positive_with_extra(&diff, extra)
                            || scope
                                .prove_strict_positive_with_extra(&(b.clone() - a.clone()), extra)
                        {
                            Val(0)
                        } else {
                            Eq(b!(a), b!(b))
                        }
                    }
                }
            }
        }
    }

    pub(super) fn inclusive_bound(&self, scope: &SymbolScopeData, upper: bool) -> Option<i64> {
        use self::TDim::*;
        match self {
            Val(n) => Some(*n),
            Sym(_) => {
                if upper {
                    scope
                        .all_assertions()
                        .iter()
                        .filter_map(|assert| match &assert {
                            Assertion::LT(left, right)
                                if left == self && right.as_i64().is_some() =>
                            {
                                Some(right.as_i64().unwrap() - 1)
                            }
                            Assertion::LTE(left, right)
                                if left == self && right.as_i64().is_some() =>
                            {
                                Some(right.as_i64().unwrap())
                            }
                            _ => None,
                        })
                        .min()
                } else {
                    scope
                        .all_assertions()
                        .iter()
                        .filter_map(|assert| match &assert {
                            Assertion::GT(left, right)
                                if left == self && right.as_i64().is_some() =>
                            {
                                Some(right.as_i64().unwrap() + 1)
                            }
                            Assertion::GTE(left, right)
                                if left == self && right.as_i64().is_some() =>
                            {
                                Some(right.as_i64().unwrap())
                            }
                            _ => None,
                        })
                        .max()
                }
            }
            Add(terms) => {
                let mut bound: i64 = 0;
                for t in terms {
                    if let Some(b) = t.inclusive_bound(scope, upper) {
                        bound = bound.checked_add(b)?;
                    } else {
                        return None;
                    }
                }
                Some(bound)
            }
            MulInt(p, a) => match p.cmp(&0) {
                Ordering::Equal => Some(0),
                Ordering::Greater => {
                    a.inclusive_bound(scope, upper).and_then(|x| x.checked_mul(*p))
                }
                Ordering::Less => a.inclusive_bound(scope, !upper).and_then(|x| x.checked_mul(*p)),
            },
            Mul(_) => None,
            Min(terms) if !upper => {
                terms.iter().filter_map(|t| t.inclusive_bound(scope, false)).min()
            }
            Max(terms) if upper => {
                terms.iter().filter_map(|t| t.inclusive_bound(scope, true)).max()
            }
            Div(a, q) => a.inclusive_bound(scope, upper).map(|x| x / (*q as i64)),
            Broadcast(terms) => {
                if upper {
                    Max(terms.clone()).inclusive_bound(scope, true)
                } else {
                    Min(terms.clone()).inclusive_bound(scope, false)
                }
            }
            Ge(_, _) | Eq(_, _) => {
                if upper {
                    Some(1)
                } else {
                    Some(0)
                }
            }
            _ => None,
        }
    }

    pub fn low_inclusive_bound(&self) -> Option<i64> {
        if let TDim::Val(v) = self {
            return Some(*v);
        }
        let scope = self.find_scope()?;
        let data = scope.0.lock();
        let data = data.borrow();
        self.inclusive_bound(&data, false)
    }

    pub fn high_inclusive_bound(&self) -> Option<i64> {
        if let TDim::Val(v) = self {
            return Some(*v);
        }
        let scope = self.find_scope()?;
        let data = scope.0.lock();
        let data = data.borrow();
        self.inclusive_bound(&data, true)
    }

    pub fn prove_positive_or_zero(&self) -> bool {
        if let TDim::Val(v) = self {
            return *v >= 0;
        }
        let Some(scope) = self.find_scope() else { return false };
        let data = scope.0.lock();
        let data = data.borrow();
        data.prove_positive_or_zero(self)
    }

    pub fn prove_strict_positive(&self) -> bool {
        if let TDim::Val(v) = self {
            return *v > 0;
        }
        (self.clone() - 1).prove_positive_or_zero()
    }

    pub fn prove_negative_or_zero(&self) -> bool {
        if let TDim::Val(v) = self {
            return *v <= 0;
        }
        self.clone().neg().prove_positive_or_zero()
    }

    pub fn prove_strict_negative(&self) -> bool {
        if let TDim::Val(v) = self {
            return *v < 0;
        }
        self.clone().neg().prove_strict_positive()
    }

    pub fn gcd(&self) -> u64 {
        use self::TDim::*;
        match self {
            Val(v) => v.unsigned_abs(),
            Sym(_) => 1,
            Add(terms) => {
                let (head, tail) = terms.split_first().unwrap();
                tail.iter().fold(head.gcd(), |a, b| a.gcd(&b.gcd()))
            }
            MulInt(p, a) => a.gcd().saturating_mul(p.unsigned_abs()),
            Mul(terms) => terms.iter().map(|t| t.gcd()).fold(1u64, |a, b| a.saturating_mul(b)),
            Min(terms) => terms.iter().map(|t| t.gcd()).reduce(|a, b| a.gcd(&b)).unwrap(),
            Max(terms) => terms.iter().map(|t| t.gcd()).reduce(|a, b| a.gcd(&b)).unwrap(),
            Div(a, q) => {
                if a.gcd() % *q == 0 {
                    a.gcd() / *q
                } else {
                    1
                }
            }
            Broadcast(terms) => terms.iter().map(|t| t.gcd()).reduce(|a, b| a.gcd(&b)).unwrap_or(1),
            Ge(_, _) | Eq(_, _) => 1,
        }
    }

    fn div(&self, d: u64) -> TDim {
        use self::TDim::*;
        if d == 1 {
            return self.clone();
        }
        match self {
            Val(v) => Val(v / d as i64),
            Sym(_) => panic!(),
            Add(terms) => Add(terms.iter().map(|t| t.div(d)).collect()),
            Min(terms) => Min(terms.iter().map(|t| t.div(d)).collect()),
            Max(terms) => Max(terms.iter().map(|t| t.div(d)).collect()),
            Broadcast(terms) => Broadcast(terms.iter().map(|t| t.div(d)).collect()),
            Mul(_) => Div(Box::new(self.clone()), d),
            MulInt(p, a) => {
                if *p == d as i64 {
                    (**a).clone()
                } else {
                    let gcd = p.unsigned_abs().gcd(&d);
                    MulInt(p / gcd as i64, b!(a.div(d / gcd)))
                }
            }
            Div(a, q) => Div(a.clone(), q * d),
            Ge(_, _) | Eq(_, _) => Div(Box::new(self.clone()), d),
        }
    }

    pub fn div_ceil(self, rhs: u64) -> TDim {
        TDim::Div(Box::new(Add(vec![self, Val(rhs as i64 - 1)])), rhs).reduce()
    }

    pub(super) fn guess_slope(&self, sym: &Symbol) -> (i64, u64) {
        fn slope_rec(d: &TDim, sym: &Symbol) -> (i64, i64) {
            match d {
                Val(_) => (0, 1),
                Sym(s) => ((sym == s) as i64, 1),
                Add(terms) => terms
                    .iter()
                    .map(|d| slope_rec(d, sym))
                    .fold((0, 1), |a, b| ((a.0 * b.1 + a.1 * b.0), (b.1 * a.1))),
                Mul(terms) => terms
                    .iter()
                    .map(|d| slope_rec(d, sym))
                    .fold((1, 1), |a, b| ((a.0 * b.0), (b.1 * a.1))),
                MulInt(p, a) => {
                    let (n, d) = slope_rec(a, sym);
                    (p * n, d)
                }
                Div(a, q) => {
                    let (n, d) = slope_rec(a, sym);
                    (n, d * *q as i64)
                }
                Broadcast(terms) => slope_rec(&terms[0], sym),
                Min(terms) => slope_rec(&terms[0], sym),
                Max(terms) => slope_rec(&terms[0], sym),
                Ge(_, _) | Eq(_, _) => (0, 1),
            }
        }
        let (p, q) = slope_rec(self, sym);
        reduce_ratio(p, q)
    }

    #[allow(clippy::mutable_key_type)]
    pub fn symbols(&self) -> std::collections::HashSet<Symbol> {
        match self {
            Val(_) => maplit::hashset!(),
            Sym(s) => maplit::hashset!(s.clone()),
            Add(terms) | Mul(terms) | Broadcast(terms) | Min(terms) | Max(terms) => {
                terms.iter().fold(maplit::hashset!(), |mut set, v| {
                    set.extend(v.symbols());
                    set
                })
            }
            MulInt(_, a) => a.symbols(),
            Div(a, _) => a.symbols(),
            Ge(a, b) | Eq(a, b) => {
                let mut set = a.symbols();
                set.extend(b.symbols());
                set
            }
        }
    }

    pub fn compatible_with(&self, other: &TDim) -> bool {
        if let Ok(x) = (self.clone() - other).to_i64() {
            return x == 0;
        }
        true // maybe ? :)
    }
}

pub(super) fn reduce_ratio(mut p: i64, mut q: i64) -> (i64, u64) {
    let gcd = p.abs().gcd(&q.abs());
    if gcd > 1 {
        p /= gcd;
        q /= gcd;
    }
    if q < 0 { (-p, (-q) as u64) } else { (p, q as u64) }
}

impl Zero for TDim {
    fn zero() -> Self {
        Val(0)
    }
    fn is_zero(&self) -> bool {
        matches!(self, Val(0))
    }
}

impl Default for TDim {
    fn default() -> TDim {
        Val(0)
    }
}

impl num_traits::Bounded for TDim {
    fn min_value() -> Self {
        TDim::Val(i64::MIN)
    }

    fn max_value() -> Self {
        TDim::Val(i64::MAX)
    }
}

impl num_traits::One for TDim {
    fn one() -> Self {
        TDim::Val(1)
    }
}

impl ::std::iter::Sum for TDim {
    fn sum<I: Iterator<Item = TDim>>(iter: I) -> TDim {
        iter.fold(0.into(), |a, b| a + b)
    }
}

impl<'a> ::std::iter::Sum<&'a TDim> for TDim {
    fn sum<I: Iterator<Item = &'a TDim>>(iter: I) -> TDim {
        iter.fold(0.into(), |a, b| a + b)
    }
}

impl std::iter::Product for TDim {
    fn product<I: Iterator<Item = TDim>>(iter: I) -> Self {
        iter.fold(TDim::Val(1), |a, b| a * b)
    }
}

impl<'a> ::std::iter::Product<&'a TDim> for TDim {
    fn product<I: Iterator<Item = &'a TDim>>(iter: I) -> TDim {
        iter.fold(1.into(), |a, b| a * b)
    }
}

macro_rules! from_i {
    ($i: ty) => {
        impl From<$i> for TDim {
            fn from(v: $i) -> TDim {
                TDim::Val(v as _)
            }
        }
        impl<'a> From<&'a $i> for TDim {
            fn from(v: &'a $i) -> TDim {
                TDim::Val(*v as _)
            }
        }
    };
}

from_i!(i32);
from_i!(i64);
from_i!(u64);
from_i!(isize);
from_i!(usize);

impl From<Symbol> for TDim {
    fn from(it: Symbol) -> Self {
        TDim::Sym(it)
    }
}

impl<'a> From<&'a Symbol> for TDim {
    fn from(it: &'a Symbol) -> Self {
        TDim::Sym(it.clone())
    }
}

impl ops::Neg for TDim {
    type Output = Self;
    fn neg(self) -> Self {
        if let Val(v) = self { Val(-v) } else { TDim::MulInt(-1, Box::new(self)).reduce() }
    }
}

impl<'a> ops::AddAssign<&'a TDim> for TDim {
    fn add_assign(&mut self, rhs: &'a TDim) {
        if rhs.is_zero() {
        } else if self.is_zero() {
            *self = rhs.clone();
        } else if let (Val(s), Val(o)) = (&mut *self, &rhs) {
            *s += o;
        } else {
            *self = TDim::Add(vec![std::mem::take(self), rhs.clone()]).reduce()
        }
    }
}

impl<I> ops::AddAssign<I> for TDim
where
    I: Into<TDim>,
{
    fn add_assign(&mut self, rhs: I) {
        let rhs = rhs.into();
        if rhs.is_zero() {
        } else if self.is_zero() {
            *self = rhs;
        } else if let (Val(s), Val(o)) = (&mut *self, &rhs) {
            *s += o;
        } else {
            *self = TDim::Add(vec![std::mem::take(self), rhs]).reduce()
        }
    }
}

impl<I> ops::Add<I> for TDim
where
    I: Into<TDim>,
{
    type Output = Self;
    fn add(mut self, rhs: I) -> Self {
        self += rhs;
        self
    }
}

impl<'a> ops::Add<&'a TDim> for TDim {
    type Output = Self;
    fn add(mut self, rhs: &'a TDim) -> Self {
        self += rhs;
        self
    }
}

#[allow(clippy::suspicious_op_assign_impl)]
impl<'a> ops::SubAssign<&'a TDim> for TDim {
    fn sub_assign(&mut self, rhs: &'a TDim) {
        if rhs.is_zero() {
        } else if self.is_zero() {
            *self = rhs.clone().neg();
        } else if let (Val(s), Val(o)) = (&mut *self, &rhs) {
            *s -= o;
        } else {
            *self = TDim::Add(vec![std::mem::take(self), rhs.clone().neg()]).reduce()
        }
    }
}

impl<I> ops::SubAssign<I> for TDim
where
    I: Into<TDim>,
{
    fn sub_assign(&mut self, rhs: I) {
        let rhs = rhs.into();
        if rhs.is_zero() {
        } else if self.is_zero() {
            *self = rhs.neg();
        } else if let (Val(s), Val(o)) = (&mut *self, &rhs) {
            *s -= o;
        } else {
            *self = TDim::Add(vec![std::mem::take(self), rhs.neg()]).reduce()
        }
    }
}

impl<I> ops::Sub<I> for TDim
where
    I: Into<TDim>,
{
    type Output = Self;
    fn sub(mut self, rhs: I) -> Self {
        self -= rhs;
        self
    }
}

impl<'a> ops::Sub<&'a TDim> for TDim {
    type Output = Self;
    fn sub(mut self, rhs: &'a TDim) -> Self {
        self -= rhs;
        self
    }
}

impl<I: Into<TDim>> ops::MulAssign<I> for TDim {
    fn mul_assign(&mut self, rhs: I) {
        let rhs = rhs.into();
        if self.is_one() {
            *self = rhs
        } else if rhs.is_one() {
        } else {
            *self = TDim::Mul(vec![rhs, std::mem::take(self)]).reduce()
        }
    }
}

impl<'a> ops::MulAssign<&'a TDim> for TDim {
    fn mul_assign(&mut self, rhs: &'a TDim) {
        if self.is_one() {
            *self = rhs.clone()
        } else if rhs.is_one() {
        } else {
            *self = TDim::Mul(vec![std::mem::take(self), rhs.clone()]).reduce()
        }
    }
}

impl<I: Into<TDim>> ops::Mul<I> for TDim {
    type Output = Self;
    fn mul(mut self, rhs: I) -> Self {
        self *= rhs.into();
        self
    }
}

impl<'a> ops::Mul<&'a TDim> for TDim {
    type Output = Self;
    fn mul(mut self, rhs: &'a TDim) -> Self {
        self *= rhs;
        self
    }
}

impl<I: AsPrimitive<u64> + PrimInt> ops::DivAssign<I> for TDim {
    fn div_assign(&mut self, rhs: I) {
        *self = TDim::Div(Box::new(std::mem::take(self)), rhs.as_()).reduce()
    }
}

impl<I: AsPrimitive<u64> + PrimInt> ops::Div<I> for TDim {
    type Output = Self;
    fn div(mut self, rhs: I) -> Self {
        self /= rhs.as_();
        self
    }
}

impl<I: AsPrimitive<u64> + PrimInt> ops::RemAssign<I> for TDim {
    fn rem_assign(&mut self, rhs: I) {
        *self += -(self.clone() / rhs.as_() * rhs.as_());
    }
}

impl<I: AsPrimitive<u64> + PrimInt> ops::Rem<I> for TDim {
    type Output = Self;
    fn rem(mut self, rhs: I) -> Self {
        self %= rhs;
        self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    macro_rules! b( ($e:expr) => { Box::new($e) } );

    lazy_static::lazy_static! {
        static ref table: SymbolScope = SymbolScope::default();
        static ref A: Symbol = table.sym("a");
        static ref B: Symbol = table.sym("b");
        static ref C: Symbol = table.sym("c");
        static ref D: Symbol = table.sym("d");
        static ref E: Symbol = table.sym("e");
    }

    fn neg(a: &TDim) -> TDim {
        mul(-1, a)
    }

    fn add(a: &TDim, b: &TDim) -> TDim {
        TDim::Add(vec![a.clone(), b.clone()])
    }

    fn mul(a: i64, b: &TDim) -> TDim {
        TDim::MulInt(a, b![b.clone()])
    }

    fn div(a: &TDim, b: u64) -> TDim {
        TDim::Div(b!(a.clone()), b)
    }

    #[test]
    fn reduce_add() {
        assert_eq!(add(&A.to_dim(), &neg(&A.to_dim())).reduce(), Val(0))
    }

    #[test]
    fn reduce_neg_mul() {
        assert_eq!(neg(&mul(2, &A.to_dim())).reduce(), mul(-2, &A.to_dim()))
    }

    #[test]
    fn reduce_cplx_ex_2() {
        assert_eq!(
            add(
                &add(&Val(-4), &mul(-2, &div(&A.to_dim(), 4))),
                &mul(-2, &mul(-1, &div(&A.to_dim(), 4)))
            )
            .reduce(),
            Val(-4)
        )
    }

    #[test]
    fn reduce_cplx_ex_3() {
        assert_eq!(div(&MulInt(1, b!(MulInt(4, b!(A.to_dim())))), 4).reduce(), A.to_dim())
    }

    #[test]
    fn reduce_cplx_ex_4() {
        // (S+1)/2 + (1-S)/2 == 1
        assert_eq!(
            add(&div(&add(&A.to_dim(), &Val(1)), 2), &div(&add(&neg(&A.to_dim()), &Val(1)), 2))
                .reduce(),
            1.into()
        );
    }

    #[test]
    fn reduce_mul_mul_1() {
        assert_eq!(mul(3, &mul(2, &A.to_dim())).reduce(), mul(6, &A.to_dim()))
    }

    #[test]
    fn reduce_mul_mul_2() {
        assert_eq!(mul(-2, &mul(-1, &A.to_dim())).reduce(), mul(2, &A.to_dim()))
    }

    #[test]
    fn reduce_mul_div_1() {
        assert_eq!(mul(2, &div(&mul(-1, &A.to_dim()), 3)).reduce(), mul(-2, &div(&A.to_dim(), 3)))
    }

    #[test]
    fn const_and_add() {
        let e: TDim = 2i64.into();
        assert_eq!(e.eval(&SymbolValues::default()).to_i64().unwrap(), 2);
        let e: TDim = TDim::from(2) + 3;
        assert_eq!(e.eval(&SymbolValues::default()).to_i64().unwrap(), 5);
        let e: TDim = TDim::from(2) - 3;
        assert_eq!(e.eval(&SymbolValues::default()).to_i64().unwrap(), -1);
        let e: TDim = -TDim::from(2);
        assert_eq!(e.eval(&SymbolValues::default()).to_i64().unwrap(), -2);
    }

    #[test]
    fn substitution() {
        let a: TDim = A.to_dim();
        assert_eq!(a.eval(&SymbolValues::default().with(&A, 2)).to_i64().unwrap(), 2);
        let e = a + 3;
        assert_eq!(e.eval(&SymbolValues::default().with(&A, 2)).to_i64().unwrap(), 5);
    }

    #[test]
    fn reduce_adds() {
        let e: TDim = TDim::from(2) + 1;
        assert_eq!(e, TDim::from(3));
        let e: TDim = TDim::from(3) + 2;
        assert_eq!(e, TDim::from(5));
        let e: TDim = TDim::from(3) + 0;
        assert_eq!(e, TDim::from(3));
        let e: TDim = TDim::from(3) + 2 + 1;
        assert_eq!(e, TDim::from(6));
    }

    #[test]
    fn reduce_muls() {
        let e: TDim = Val(1) * A.to_dim();
        assert_eq!(e, A.to_dim());
        let e: TDim = A.to_dim() * &B.to_dim() * 1;
        assert_eq!(e, A.to_dim() * &B.to_dim());
    }

    #[test]
    fn reduce_divs() {
        let e: TDim = TDim::from(2) / 1;
        assert_eq!(e, TDim::from(2));
        let e: TDim = TDim::from(3) / 2;
        assert_eq!(e, TDim::from(1));
        let e: TDim = TDim::from(3) % 2;
        assert_eq!(e, TDim::from(1));
        let e: TDim = TDim::from(5) / 2;
        assert_eq!(e, TDim::from(2));
        let e: TDim = TDim::from(5) % 2;
        assert_eq!(e, TDim::from(1));
    }

    #[test]
    fn reduce_div_bug_0() {
        let e1: TDim = (A.to_dim() + 23) / 2 - 1;
        let e2: TDim = (A.to_dim() + 21) / 2;
        assert_eq!(e1, e2);
    }

    #[test]
    fn reduce_div_bug_1() {
        let e1: TDim = (A.to_dim() + -1) / 2;
        let e2: TDim = (A.to_dim() + 1) / 2 - 1;
        assert_eq!(e1, e2);
    }

    #[test]
    fn reduce_div_bug_2() {
        let e1: TDim = ((A.to_dim() + 1) / 2 + 1) / 2;
        let e2: TDim = (A.to_dim() + 3) / 4;
        assert_eq!(e1, e2);
    }

    #[test]
    fn reduce_div_bug_3() {
        let e1: TDim = (A.to_dim() / 2) * -4;
        let e2: TDim = (A.to_dim() / 2) * -4 / 1;
        assert_eq!(e1, e2);
    }

    #[test]
    fn reduce_mul_div() {
        let e: TDim = A.to_dim() * 2 / 2;
        assert_eq!(e, A.to_dim());
    }

    #[test]
    fn reduce_div_mul() {
        let e: TDim = A.to_dim() / 2 * 2;
        assert_ne!(e, A.to_dim());
    }

    #[test]
    fn reduce_add_div() {
        let e: TDim = A.to_dim() / 2 + 1;
        assert_eq!(e, ((A.to_dim() + 2) / 2));
    }

    #[test]
    fn reduce_neg_mul_() {
        let e: TDim = TDim::from(1) - A.to_dim() * 2;
        assert_eq!(e, TDim::from(1) + A.to_dim() * -2);
    }

    #[test]
    fn reduce_add_rem_1() {
        assert_eq!(((A.to_dim() + 4) % 2), (A.to_dim() % 2));
    }

    #[test]
    fn reduce_add_rem_2() {
        assert_eq!(((A.to_dim() - 4) % 2), (A.to_dim() % 2));
    }

    #[test]
    fn reduce_rem_div() {
        let e: TDim = A.to_dim() % 2 / 2;
        assert_eq!(e, TDim::from(0));
    }

    #[test]
    fn conv2d_ex_1() {
        let e = (TDim::from(1) - 1 + 1).div_ceil(1);
        assert_eq!(e, TDim::from(1));
    }

    #[test]
    fn conv2d_ex_2() {
        let e = (A.to_dim() - 3 + 1).div_ceil(1);
        assert_eq!(e, A.to_dim() + -2);
    }

    #[test]
    fn extract_int_gcd_from_muls() {
        let term = (A.to_dim() + 1) / 4;
        let mul = (term.clone() * 24 - 24) * (term.clone() * 2 - 2);
        let target = (term.clone() - 1) * (term.clone() - 1) * 48;
        assert_eq!(mul, target);
    }

    #[test]
    fn equality_of_muls() {
        let term = (A.to_dim() + 1) / 4;
        let mul1 = (term.clone() * 2 - 3) * (term.clone() - 1);
        let mul2 = (term.clone() - 1) * (term.clone() * 2 - 3);
        assert_eq!(mul1, mul2);
    }

    #[test]
    fn factorize_complex_expr_times_int() {
        let term = (A.to_dim() + 1) / 4;
        let e = term.clone() * 2 - &term - 1;
        assert_eq!(e, term - 1);
    }

    #[test]
    fn broadcast_over_min() {
        // assuming a>0, b>0 then a#min(a,b) can be replaced by a
        // proof:
        //    if b == 1 => min(a,b)=1 => a#1=a => ok
        //    if a <= b => min(a,b)=a => ok
        //    if 1 < B < A => expression was invalid, we're generalizing over the non-domain and ignoring the constraint
        for a in 1..5 {
            for b in 1..5 {
                if b > 1 && a > b {
                    assert!(a.broadcast(a.min(b)).is_err());
                } else {
                    assert_eq!(a.broadcast(a.min(b)).unwrap(), a);
                }
            }
        }
    }

    #[test]
    fn min_ints_1() {
        assert_eq!(2.to_dim().mini(1.to_dim()), 1.to_dim());
    }

    #[test]
    fn min_ints_2() {
        assert_eq!(1.to_dim().mini(2.to_dim()), 1.to_dim());
    }

    #[test]
    fn min_same() {
        assert_eq!(A.to_dim().mini(A.to_dim()), A.to_dim());
    }

    #[test]
    fn min_noop() {
        assert_eq!(A.to_dim().mini(1.to_dim()), A.to_dim().mini(1.to_dim()));
    }

    #[test]
    fn min_diff_1() {
        assert_eq!((A.to_dim() + 1).mini(A.to_dim() + 2), A.to_dim() + 1);
    }

    #[test]
    fn slope_0() {
        assert_eq!(12.to_dim().guess_slope(&A), (0, 1));
    }

    #[test]
    fn slope_1() {
        assert_eq!(A.to_dim().guess_slope(&A), (1, 1));
    }

    #[test]
    fn slope_2() {
        assert_eq!((A.to_dim() * 2).guess_slope(&A), (2, 1));
    }

    #[test]
    fn slope_3() {
        assert_eq!((A.to_dim() * 2 + A.to_dim() / 2).guess_slope(&A), (5, 2));
    }

    #[test]
    fn slope_4() {
        assert_eq!((A.to_dim()).guess_slope(&B), (0, 1));
    }

    #[test]
    fn slope_5() {
        assert_eq!((A.to_dim() + 1).guess_slope(&A), (1, 1));
        assert_eq!((A.to_dim() + 1).guess_slope(&B), (0, 1));
    }

    #[test]
    fn slope_6() {
        assert_eq!((A.to_dim() + 1).guess_slope(&A), (1, 1));
        assert_eq!((A.to_dim() + B.to_dim()).guess_slope(&B), (1, 1));
    }

    #[test]
    fn min_0() -> TractResult<()> {
        let symbols = SymbolScope::default();
        assert_eq!(
            symbols.parse_tdim("min(S+3, S+2)").unwrap().simplify(),
            symbols.parse_tdim("S+2").unwrap(),
        );
        Ok(())
    }

    #[test]
    fn commutative_mul_parens() -> TractResult<()> {
        let symbols = SymbolScope::default();
        assert_eq!(
            symbols.parse_tdim("A*(B*C)").unwrap().simplify(),
            symbols.parse_tdim("(B*A)*C").unwrap().simplify(),
        );
        Ok(())
    }

    #[test]
    fn commutative_in_nemo_parakeet_model() -> TractResult<()> {
        let symbols = SymbolScope::default();
        assert_eq!(
            symbols
                .parse_tdim("8*(1+-1*max(0,5000+-1*(S+7)/8)+max(0,4999+(S+7)/8))*((B)*((S+7)/8))")
                .unwrap()
                .simplify(),
            symbols
                .parse_tdim("8*((B)*(1+-1*max(0,5000+-1*(S+7)/8)+max(0,4999+(S+7)/8)))*((S+7)/8)")
                .unwrap()
                .simplify(),
        );
        Ok(())
    }

    #[test]
    fn commutative_mul_parens_deep() -> TractResult<()> {
        let symbols = SymbolScope::default();
        let deep_tdim = Mul(vec![
            Mul(vec![Mul(vec![Mul(vec![A.to_dim(), B.to_dim()]), C.to_dim()]), D.to_dim()]),
            E.to_dim(),
        ])
        .simplify();
        assert_eq!(deep_tdim, symbols.parse_tdim("a*b*c*d*e").unwrap().simplify());
        Ok(())
    }

    // ---- Tests for new comparison/not TDim variants ----

    #[test]
    fn ge_concrete_true() {
        assert_eq!(Ge(b!(Val(5)), b!(Val(3))).reduce(), Val(1));
    }

    #[test]
    fn ge_concrete_false() {
        assert_eq!(Ge(b!(Val(2)), b!(Val(3))).reduce(), Val(0));
    }

    #[test]
    fn lt_concrete_true() {
        // Lt(2,3) normalizes to Ge(3, 2+1) = Ge(3, 3)
        assert_eq!(Ge(b!(Val(3)), b!(Val(3))).reduce(), Val(1));
    }

    #[test]
    fn lt_concrete_false() {
        // Lt(5,3) normalizes to Ge(3, 5+1) = Ge(3, 6)
        assert_eq!(Ge(b!(Val(3)), b!(Val(6))).reduce(), Val(0));
    }

    #[test]
    fn eq_concrete_true() {
        assert_eq!(Eq(b!(Val(3)), b!(Val(3))).reduce(), Val(1));
    }

    #[test]
    fn eq_concrete_false() {
        assert_eq!(Eq(b!(Val(3)), b!(Val(4))).reduce(), Val(0));
    }

    #[test]
    fn not_val_0() {
        // not(0) = 1 - 0 = 1
        assert_eq!((Val(1) - Val(0)).reduce(), Val(1));
    }

    #[test]
    fn not_val_1() {
        // not(1) = 1 - 1 = 0
        assert_eq!((Val(1) - Val(1)).reduce(), Val(0));
    }

    #[test]
    fn not_lt_becomes_ge() {
        // not(Lt(x1, T)) = 1 - Ge(T, x1+1); check it evaluates correctly at boundary
        let s = SymbolScope::default();
        let t = s.sym("T");
        let x1 = s.sym("x1");
        // at x1 = T (boundary), Ge(T, T+1) = 0, so 1 - 0 = 1 (not-lt is true when x1 >= T)
        let expr = Val(1) - Ge(b!(Sym(t.clone())), b!(Sym(x1.clone()) + Val(1)));
        let at_boundary = expr.substitute(&x1, &Sym(t.clone())).unwrap().simplify();
        assert_eq!(at_boundary, Val(1));
    }

    #[test]
    fn eq_with_assertion_proves_false() {
        // Eq(T, 0) should reduce to Val(0) when T >= 1
        let s = SymbolScope::default();
        s.add_assertion("T >= 1").unwrap();
        let t = s.sym("T");
        let expr = Eq(b!(Sym(t)), b!(Val(0)));
        assert_eq!(expr.simplify(), Val(0));
    }

    #[test]
    fn ge_coord_at_extremes() {
        // Ge(x1, T) should not simplify without coordinate substitution
        let s = SymbolScope::default();
        s.add_assertion("T >= 1").unwrap();
        let t = s.sym("T");
        let x1 = s.sym("x1");
        let expr = Ge(b!(Sym(x1.clone())), b!(Sym(t.clone())));
        // simplify() alone can't prove this false (x1 could be > T)
        // but with coordinate substitution (x1 = T-1), Ge(T-1, T) = 0
        let at_max = expr.substitute(&x1, &(Sym(t.clone()) - Val(1))).unwrap().simplify();
        assert_eq!(at_max, Val(0));
    }

    #[test]
    fn eval_to_i64_new_variants() {
        use super::super::sym::SymbolValues;
        let sv = SymbolValues::default();
        assert_eq!(Ge(b!(Val(5)), b!(Val(3))).eval_to_i64(&sv).unwrap(), 1);
        assert_eq!(Ge(b!(Val(3)), b!(Val(5))).eval_to_i64(&sv).unwrap(), 0);
        assert_eq!(Eq(b!(Val(3)), b!(Val(3))).eval_to_i64(&sv).unwrap(), 1);
        assert_eq!(Eq(b!(Val(3)), b!(Val(4))).eval_to_i64(&sv).unwrap(), 0);
    }
}


================================================
FILE: data/src/exotic.rs
================================================
#![allow(clippy::derived_hash_with_manual_eq)]
use crate::datum::DatumType;
use crate::dim::TDim;
use crate::internal::TVec;
use std::fmt::Debug;

use downcast_rs::{Downcast, impl_downcast};
use dyn_eq::DynEq;
use dyn_hash::DynHash;

pub trait ExoticFact:
    DynHash + dyn_eq::DynEq + Send + Sync + Debug + dyn_clone::DynClone + Downcast
{
    /// Whether or not it is acceptable for a Patch to substitute `self` by `other`.
    ///
    /// In other terms, all operators consuming `self` MUST accept also accept `other` without being altered.
    fn compatible_with(&self, other: &dyn ExoticFact) -> bool {
        self.dyn_eq(other)
    }

    fn clarify_dt_shape(&self) -> Option<(DatumType, TVec<TDim>)> {
        None
    }

    fn buffer_sizes(&self) -> TVec<TDim>;

    fn mem_size(&self) -> TDim {
        self.buffer_sizes().iter().sum::<TDim>()
    }
}

impl_downcast!(ExoticFact);
dyn_hash::hash_trait_object!(ExoticFact);
dyn_clone::clone_trait_object!(ExoticFact);
dyn_eq::eq_trait_object!(ExoticFact);

impl<T: ExoticFact> From<T> for Box<dyn ExoticFact> {
    fn from(v: T) -> Self {
        Box::new(v)
    }
}

impl ExoticFact for TVec<Box<dyn ExoticFact>> {
    fn buffer_sizes(&self) -> TVec<TDim> {
        self.iter().flat_map(|it| it.buffer_sizes()).collect()
    }
}
impl ExoticFact for TVec<Option<Box<dyn ExoticFact>>> {
    fn buffer_sizes(&self) -> TVec<TDim> {
        self.iter().flatten().flat_map(|it| it.buffer_sizes()).collect()
    }
}


================================================
FILE: data/src/lib.rs
================================================
#![allow(clippy::len_zero)]
#![allow(clippy::missing_safety_doc)]

pub extern crate itertools;

#[macro_use]
mod macros;

/// A Smallvec instantiation with 4 embeddable values.
///
/// Used about everywhere in tract, for node inputs and outputs, or
/// tensor dimensions.
pub type TVec<T> = smallvec::SmallVec<[T; 4]>;

pub type TractError = anyhow::Error;
pub type TractResult<T> = anyhow::Result<T>;

pub mod prelude {
    pub use crate::TVec;
    pub use crate::blob::Blob;
    pub use crate::datum::{Datum, DatumType, QParams, round_ties_to_even};
    pub use crate::dim::{Assertion, Symbol, SymbolScope, SymbolValues, TDim, ToDim};
    pub use crate::tensor::litteral::*;
    pub use crate::tensor::plain_view::{PlainView, PlainViewMut};
    pub use crate::tensor::storage::{PlainStorage, TensorStorage};
    pub use crate::tensor::{IntoArcTensor, IntoTensor, Tensor, natural_strides};
    #[cfg(feature = "complex")]
    pub use crate::tensor::{reinterpret_complex_as_inner_dim, reinterpret_inner_dim_as_complex};
    pub use crate::tvec;
    pub use crate::{TractError, TractResult};
    pub use crate::{
        dispatch_copy, dispatch_copy_by_size, dispatch_datum, dispatch_datum_by_size,
        dispatch_floatlike, dispatch_hash, dispatch_numbers, dispatch_signed,
    };
    pub use half::f16;
    pub use itertools as tract_itertools;
    #[cfg(feature = "complex")]
    pub use num_complex::Complex;
}

pub mod internal {
    pub use crate::datum::ClampCast;
    pub use crate::dim::{DimLike, parse_tdim, solve_for};
    pub use crate::exotic::ExoticFact;
    pub use crate::prelude::*;
    pub use crate::tensor::Approximation;
    pub use crate::tensor::view::TensorView;
    pub use crate::tensor::{clip_range_bounds, vector_size};
    pub use anyhow::{Context as TractErrorContext, anyhow, bail, ensure, format_err};
    pub use ndarray as tract_ndarray;
    pub use num_integer;
    pub use num_traits as tract_num_traits;
    pub use smallvec as tract_smallvec;
    pub type StaticName = std::borrow::Cow<'static, str>;
}

pub use dim::TooEarly;
pub use half;

mod blob;
mod datum;
mod dim;
mod exotic;
mod scatter;
mod tensor;


================================================
FILE: data/src/macros.rs
================================================
#[macro_export]
macro_rules! tvec {
    // count helper: transform any expression into 1
    (@one $x:expr) => (1usize);
    ($elem:expr; $n:expr) => ({
        $crate::TVec::from_elem($elem, $n)
    });
    ($($x:expr),*$(,)*) => ({
        let count = 0usize $(+ tvec!(@one $x))*;
        #[allow(unused_mut)]
        let mut vec = $crate::TVec::new();
        if count <= vec.inline_size() {
            $(vec.push($x);)*
            vec
        } else {
            $crate::TVec::from_vec(vec![$($x,)*])
        }
    });
}

#[macro_export]
macro_rules! dispatch_datum {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        #[allow(unexpected_cfgs)]
        match $dt {
            DatumType::Bool => $($path)::*::<bool>($($args),*),
            DatumType::U8   => $($path)::*::<u8>($($args),*),
            DatumType::U16  => $($path)::*::<u16>($($args),*),
            DatumType::U32  => $($path)::*::<u32>($($args),*),
            DatumType::U64  => $($path)::*::<u64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::F16  => $($path)::*::<f16>($($args),*),
            DatumType::F32  => $($path)::*::<f32>($($args),*),
            DatumType::F64  => $($path)::*::<f64>($($args),*),
            DatumType::Blob => $($path)::*::<$crate::prelude::Blob>($($args),*),
            DatumType::TDim => $($path)::*::<TDim>($($args),*),
            DatumType::String => $($path)::*::<String>($($args),*),
            DatumType::QI8(_) => $($path)::*::<i8>($($args),*),
            DatumType::QU8(_) => $($path)::*::<u8>($($args),*),
            DatumType::QI32(_) => $($path)::*::<i32>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI16 => $($path)::*::<Complex<i16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI32 => $($path)::*::<Complex<i32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI64 => $($path)::*::<Complex<i64>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF16 => $($path)::*::<Complex<f16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF32 => $($path)::*::<Complex<f32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => $($path)::*::<Complex<f64>>($($args),*),
        }
    } }
}

#[macro_export]
macro_rules! dispatch_datum_by_size {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        #[allow(unexpected_cfgs)]
        match $dt {
            DatumType::Bool => $($path)::*::<i8>($($args),*),
            DatumType::U8   => $($path)::*::<i8>($($args),*),
            DatumType::U16  => $($path)::*::<i16>($($args),*),
            DatumType::U32  => $($path)::*::<i32>($($args),*),
            DatumType::U64  => $($path)::*::<i64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::F16  => $($path)::*::<i16>($($args),*),
            DatumType::F32  => $($path)::*::<i32>($($args),*),
            DatumType::F64  => $($path)::*::<i64>($($args),*),
            DatumType::Blob => $($path)::*::<Blob>($($args),*),
            DatumType::TDim => $($path)::*::<TDim>($($args),*),
            DatumType::String => $($path)::*::<String>($($args),*),
            DatumType::QI8(_)   => $($path)::*::<i8>($($args),*),
            DatumType::QU8(_)   => $($path)::*::<u8>($($args),*),
            DatumType::QI32(_)   => $($path)::*::<i32>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI16 => $($path)::*::<Complex<i16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI32 => $($path)::*::<Complex<i32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI64 => $($path)::*::<Complex<i64>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF16 => $($path)::*::<Complex<f16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF32 => $($path)::*::<Complex<f32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => $($path)::*::<Complex<f64>>($($args),*),
        }
    } }
}

#[macro_export]
macro_rules! dispatch_copy {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        #[allow(unexpected_cfgs)]
        match $dt {
            DatumType::Bool => $($path)::*::<bool>($($args),*),
            DatumType::U8   => $($path)::*::<u8>($($args),*),
            DatumType::U16  => $($path)::*::<u16>($($args),*),
            DatumType::U32  => $($path)::*::<u32>($($args),*),
            DatumType::U64  => $($path)::*::<u64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::F16  => $($path)::*::<f16>($($args),*),
            DatumType::F32  => $($path)::*::<f32>($($args),*),
            DatumType::F64  => $($path)::*::<f64>($($args),*),
            DatumType::QI8(_)  => $($path)::*::<i8>($($args),*),
            DatumType::QU8(_)  => $($path)::*::<u8>($($args),*),
            DatumType::QI32(_)  => $($path)::*::<u8>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI16 => $($path)::*::<Complex<i16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI32 => $($path)::*::<Complex<i32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI64 => $($path)::*::<Complex<i64>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF16 => $($path)::*::<Complex<f16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF32 => $($path)::*::<Complex<f32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => $($path)::*::<Complex<f64>>($($args),*),
            _ => panic!("{:?} is not Copy", $dt)
        }
    } }
}

#[macro_export]
macro_rules! dispatch_copy_by_size {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        #[allow(unexpected_cfgs)]
        match $dt {
            DatumType::Bool => $($path)::*::<i8>($($args),*),
            DatumType::U8   => $($path)::*::<i8>($($args),*),
            DatumType::U16  => $($path)::*::<i16>($($args),*),
            DatumType::U32  => $($path)::*::<i32>($($args),*),
            DatumType::U64  => $($path)::*::<i64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::F16  => $($path)::*::<i16>($($args),*),
            DatumType::F32  => $($path)::*::<i32>($($args),*),
            DatumType::F64  => $($path)::*::<i64>($($args),*),
            DatumType::QI8(_)  => $($path)::*::<i8>($($args),*),
            DatumType::QU8(_)  => $($path)::*::<u8>($($args),*),
            DatumType::QI32(_)  => $($path)::*::<i32>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI32 => $($path)::*::<Complex<i32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI64 => $($path)::*::<Complex<i64>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF16 => $($path)::*::<Complex<f16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF32 => $($path)::*::<Complex<f32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => $($path)::*::<Complex<f64>>($($args),*),
            _ => panic!("{:?} is not Copy", $dt)
        }
    } }
}

#[macro_export]
macro_rules! dispatch_numbers {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        match $dt {
            DatumType::U8   => $($path)::*::<u8>($($args),*),
            DatumType::U16  => $($path)::*::<u16>($($args),*),
            DatumType::U32  => $($path)::*::<u32>($($args),*),
            DatumType::U64  => $($path)::*::<u64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::F16  => $($path)::*::<f16>($($args),*),
            DatumType::F32  => $($path)::*::<f32>($($args),*),
            DatumType::F64  => $($path)::*::<f64>($($args),*),
            DatumType::QI8(_)  => $($path)::*::<i8>($($args),*),
            DatumType::QU8(_)  => $($path)::*::<u8>($($args),*),
            DatumType::QI32(_)  => $($path)::*::<i32>($($args),*),
            _ => $crate::internal::bail!("{:?} is not a number", $dt)
        }
    } }
}

#[macro_export]
macro_rules! dispatch_zerolike {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        match $dt {
            DatumType::TDim => $($path)::*::<TDim>($($args),*),
            DatumType::U8   => $($path)::*::<u8>($($args),*),
            DatumType::U16  => $($path)::*::<u16>($($args),*),
            DatumType::U32  => $($path)::*::<u32>($($args),*),
            DatumType::U64  => $($path)::*::<u64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::F16  => $($path)::*::<f16>($($args),*),
            DatumType::F32  => $($path)::*::<f32>($($args),*),
            DatumType::F64  => $($path)::*::<f64>($($args),*),
            DatumType::QI8(_)  => $($path)::*::<i8>($($args),*),
            DatumType::QU8(_)  => $($path)::*::<u8>($($args),*),
            DatumType::QI32(_)  => $($path)::*::<i32>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI32 => $($path)::*::<Complex<i32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexI64 => $($path)::*::<Complex<i64>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF16 => $($path)::*::<Complex<f16>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF32 => $($path)::*::<Complex<f32>>($($args),*),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => $($path)::*::<Complex<f64>>($($args),*),
            _ => $crate::internal::bail!("{:?} doesn't implement num_traits::Zero", $dt)
        }
    } }
}

#[macro_export]
macro_rules! dispatch_floatlike {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        match $dt {
            DatumType::F16  => $($path)::*::<f16>($($args),*),
            DatumType::F32  => $($path)::*::<f32>($($args),*),
            DatumType::F64  => $($path)::*::<f64>($($args),*),
            _ => $crate::internal::bail!("{:?} is not float-like", $dt)
        }
    } }
}

#[macro_export]
macro_rules! dispatch_signed {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        match $dt {
            DatumType::F16  => $($path)::*::<f16>($($args),*),
            DatumType::F32  => $($path)::*::<f32>($($args),*),
            DatumType::F64  => $($path)::*::<f64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::TDim => $($path)::*::<TDim>($($args),*),
            _ => $crate::internal::bail!("{:?} is not signed", $dt)
        }
    } }
}

#[macro_export]
macro_rules! dispatch_hash {
    ($($path:ident)::* ($dt:expr) ($($args:expr),*)) => { {
        use $crate::prelude::DatumType;
        #[allow(unexpected_cfgs)]
        match $dt {
            DatumType::Bool => $($path)::*::<bool>($($args),*),
            DatumType::U8   => $($path)::*::<u8>($($args),*),
            DatumType::U16  => $($path)::*::<u16>($($args),*),
            DatumType::U32  => $($path)::*::<u32>($($args),*),
            DatumType::U64  => $($path)::*::<u64>($($args),*),
            DatumType::I8   => $($path)::*::<i8>($($args),*),
            DatumType::I16  => $($path)::*::<i16>($($args),*),
            DatumType::I32  => $($path)::*::<i32>($($args),*),
            DatumType::I64  => $($path)::*::<i64>($($args),*),
            DatumType::Blob => $($path)::*::<Blob>($($args),*),
            DatumType::TDim => $($path)::*::<TDim>($($args),*),
            DatumType::String => $($path)::*::<String>($($args),*),
            #[cfg(feature="complex")]
            DatumType::ComplexI16 => $($path)::*::<Complex<i16>>($($args),*),
            #[cfg(feature="complex")]
            DatumType::ComplexI32 => $($path)::*::<Complex<i32>>($($args),*),
            #[cfg(feature="complex")]
            DatumType::ComplexI64 => $($path)::*::<Complex<i64>>($($args),*),
            _ => $crate::internal::bail!("{:?} is not Hash", $dt)
        }
    } }
}


================================================
FILE: data/src/scatter.rs
================================================
use crate::prelude::*;
use ndarray::Dimension;

pub(crate) unsafe fn scatter_contig_data<T: Datum>(
    mut src: *const T,
    dst: *mut T,
    dst_len_and_strides: &[(usize, usize)],
) {
    unsafe {
        match *dst_len_and_strides {
            [(len_a, stride_a)] => {
                for a in 0..len_a {
                    *dst.add(a * stride_a) = (*src).clone();
                    src = src.offset(1);
                }
            }
            [(len_a, stride_a), (len_b, stride_b)] => {
                for a in 0..len_a {
                    for b in 0..len_b {
                        *dst.add(a * stride_a + b * stride_b) = (*src).clone();
                        src = src.offset(1);
                    }
                }
            }
            [(len_a, stride_a), (len_b, stride_b), (len_c, stride_c)] => {
                for a in 0..len_a {
                    for b in 0..len_b {
                        for c in 0..len_c {
                            *dst.add(a * stride_a + b * stride_b + c * stride_c) = (*src).clone();
                            src = src.offset(1);
                        }
                    }
                }
            }
            [(len_a, stride_a), (len_b, stride_b), (len_c, stride_c), (len_d, stride_d)] => {
                for a in 0..len_a {
                    for b in 0..len_b {
                        for c in 0..len_c {
                            for d in 0..len_d {
                                *dst.add(
                                    a * stride_a + b * stride_b + c * stride_c + d * stride_d,
                                ) = (*src).clone();
                                src = src.offset(1);
                            }
                        }
                    }
                }
            }
            [
                (len_a, stride_a),
                (len_b, stride_b),
                (len_c, stride_c),
                (len_d, stride_d),
                (len_e, stride_e),
            ] => {
                for a in 0..len_a {
                    for b in 0..len_b {
                        for c in 0..len_c {
                            for d in 0..len_d {
                                for e in 0..len_e {
                                    *dst.add(
                                        a * stride_a
                                            + b * stride_b
                                            + c * stride_c
                                            + d * stride_d
                                            + e * stride_e,
                                    ) = (*src).clone();
                                    src = src.offset(1);
                                }
                            }
                        }
                    }
                }
            }
            _ => {
                let shape: TVec<usize> = dst_len_and_strides.iter().map(|pair| pair.0).collect();
                for coords in ndarray::indices(&*shape) {
                    let offset = coords
                        .slice()
                        .iter()
                        .zip(dst_len_and_strides.iter())
                        .map(|(x, (_len, stride))| x * stride)
                        .sum::<usize>();
                    *dst.add(offset) = (*src).clone();
                    src = src.offset(1);
                }
            }
        }
    }
}


================================================
FILE: data/src/tensor/litteral.rs
================================================
use super::Tensor;
use crate::datum::Datum;
use ndarray::*;
use std::sync::Arc;

pub fn arr4<A, const N: usize, const M: usize, const T: usize>(xs: &[[[[A; T]; M]; N]]) -> Array4<A>
where
    A: Clone,
{
    use ndarray::*;
    let xs = xs.to_vec();
    let dim = Ix4(xs.len(), N, M, T);
    let len = xs.len();
    let cap = xs.capacity();
    let expand_len = len * N * M * T;
    let ptr = Box::into_raw(xs.into_boxed_slice());
    unsafe {
        let v = if ::std::mem::size_of::<A>() == 0 {
            Vec::from_raw_parts(ptr as *mut A, expand_len, expand_len)
        } else if N == 0 || M == 0 || T == 0 {
            Vec::new()
        } else {
            let expand_cap = cap * N * M * T;
            Vec::from_raw_parts(ptr as *mut A, expand_len, expand_cap)
        };
        ArrayBase::from_shape_vec_unchecked(dim, v)
    }
}

pub fn tensor0<A: Datum>(x: A) -> Tensor {
    unsafe {
        let mut tensor = Tensor::uninitialized::<A>(&[]).unwrap();
        tensor.as_slice_mut_unchecked::<A>()[0] = x;
        tensor
    }
}

pub fn tensor1<A: Datum>(xs: &[A]) -> Tensor {
    Tensor::from(arr1(xs))
}

pub fn tensor2<A: Datum, const N: usize>(xs: &[[A; N]]) -> Tensor {
    Tensor::from(arr2(xs))
}

pub fn tensor3<A: Datum, const N: usize, const M: usize>(xs: &[[[A; M]; N]]) -> Tensor {
    Tensor::from(arr3(xs))
}

pub fn tensor4<A: Datum, const N: usize, const M: usize, const T: usize>(
    xs: &[[[[A; T]; M]; N]],
) -> Tensor {
    Tensor::from(arr4(xs))
}

pub fn rctensor0<A: Datum>(x: A) -> Arc<Tensor> {
    Arc::new(Tensor::from(arr0(x)))
}

pub fn rctensor1<A: Datum>(xs: &[A]) -> Arc<Tensor> {
    Arc::new(Tensor::from(arr1(xs)))
}

pub fn rctensor2<A: Datum, const N: usize>(xs: &[[A; N]]) -> Arc<Tensor> {
    Arc::new(Tensor::from(arr2(xs)))
}

pub fn rctensor3<A: Datum, const N: usize, const M: usize>(xs: &[[[A; M]; N]]) -> Arc<Tensor> {
    Arc::new(Tensor::from(arr3(xs)))
}

pub fn rctensor4<A: Datum, const N: usize, const M: usize, const T: usize>(
    xs: &[[[[A; T]; M]; N]],
) -> Arc<Tensor> {
    Arc::new(Tensor::from(arr4(xs)))
}


================================================
FILE: data/src/tensor/plain_view.rs
================================================
use std::alloc::Layout;

use ndarray::prelude::*;

use crate::datum::{Datum, DatumType};
use crate::internal::*;
use crate::tensor::Tensor;

use super::storage::PlainStorage;

fn check_for_access<D: Datum>(dt: DatumType) -> TractResult<()> {
    ensure!(
        dt.unquantized() == D::datum_type().unquantized(),
        "Tensor datum type error: tensor is {:?}, accessed as {:?}",
        dt,
        D::datum_type(),
    );
    Ok(())
}

/// Immutable view into a [`Tensor`] verified to have plain storage.
///
/// Construction is the single point of failure (`Tensor::as_plain()` returns
/// `Option`). Once constructed, all data access is infallible with no
/// `unwrap()`/`expect()` on the plain codepath.
pub struct PlainView<'a> {
    tensor: &'a Tensor,
    storage: &'a PlainStorage,
}

impl<'a> PlainView<'a> {
    /// Private constructor used by `Tensor::as_plain()`.
    #[inline]
    pub(crate) fn new(tensor: &'a Tensor, storage: &'a PlainStorage) -> Self {
        PlainView { tensor, storage }
    }

    // -- Metadata (delegated to tensor) --

    #[inline]
    pub fn tensor(&self) -> &Tensor {
        self.tensor
    }

    #[inline]
    pub fn datum_type(&self) -> DatumType {
        self.tensor.datum_type()
    }

    #[inline]
    pub fn shape(&self) -> &[usize] {
        self.tensor.shape()
    }

    #[inline]
    pub fn strides(&self) -> &[isize] {
        self.tensor.strides()
    }

    #[inline]
    pub fn rank(&self) -> usize {
        self.tensor.rank()
    }

    #[inline]
    pub fn len(&self) -> usize {
        self.tensor.len()
    }

    // -- Plain-specific (direct storage access, no dispatch) --

    #[inline]
    pub fn as_bytes(&self) -> &'a [u8] {
        self.storage.as_bytes()
    }

    #[inline]
    pub fn layout(&self) -> &Layout {
        self.storage.layout()
    }

    // -- Typed access --
    // TractResult is for datum-type check only, NOT plain check.

    #[inline]
    pub fn as_ptr<D: Datum>(&self) -> TractResult<*const D> {
        check_for_access::<D>(self.datum_type())?;
        unsafe { Ok(self.as_ptr_unchecked()) }
    }

    #[inline]
    pub unsafe fn as_ptr_unchecked<D: Datum>(&self) -> *const D {
        self.storage.as_ptr() as *const D
    }

    #[inline]
    pub fn as_slice<D: Datum>(&self) -> TractResult<&'a [D]> {
        check_for_access::<D>(self.datum_type())?;
        unsafe { Ok(self.as_slice_unchecked()) }
    }

    #[inline]
    pub unsafe fn as_slice_unchecked<D: Datum>(&self) -> &'a [D] {
        if self.storage.is_empty() {
            &[]
        } else {
            unsafe { std::slice::from_raw_parts(self.as_ptr_unchecked(), self.len()) }
        }
    }

    #[inline]
    pub fn to_scalar<D: Datum>(&self) -> TractResult<&'a D> {
        check_for_access::<D>(self.datum_type())?;
        unsafe { Ok(self.to_scalar_unchecked()) }
    }

    #[inline]
    pub unsafe fn to_scalar_unchecked<D: Datum>(&self) -> &'a D {
        unsafe { &*(self.storage.as_ptr() as *const D) }
    }

    #[inline]
    pub fn to_array_view<D: Datum>(&self) -> TractResult<ArrayViewD<'a, D>> {
        check_for_access::<D>(self.datum_type())?;
        unsafe { Ok(self.to_array_view_unchecked()) }
    }

    #[inline]
    pub unsafe fn to_array_view_unchecked<D: Datum>(&self) -> ArrayViewD<'a, D> {
        if self.len() != 0 {
            unsafe { ArrayViewD::from_shape_ptr(self.shape(), self.storage.as_ptr() as *const D) }
        } else {
            ArrayViewD::from_shape(self.shape(), &[]).unwrap()
        }
    }
}

/// Mutable view into a [`Tensor`] verified to have plain storage.
///
/// Fields are split to satisfy the borrow checker: mutable storage +
/// immutable metadata borrowed from the same Tensor.
pub struct PlainViewMut<'a> {
    dt: DatumType,
    shape: &'a [usize],
    strides: &'a [isize],
    len: usize,
    storage: &'a mut PlainStorage,
}

impl<'a> PlainViewMut<'a> {
    /// Private constructor used by `Tensor::as_plain_mut()`.
    #[inline]
    pub(crate) fn new(
        dt: DatumType,
        shape: &'a [usize],
        strides: &'a [isize],
        len: usize,
        storage: &'a mut PlainStorage,
    ) -> Self {
        PlainViewMut { dt, shape, strides, len, storage }
    }

    // -- Metadata --

    #[inline]
    pub fn datum_type(&self) -> DatumType {
        self.dt
    }

    #[inline]
    pub fn shape(&self) -> &[usize] {
        self.shape
    }

    #[inline]
    pub fn strides(&self) -> &[isize] {
        self.strides
    }

    #[inline]
    pub fn rank(&self) -> usize {
        self.shape.len()
    }

    #[inline]
    pub fn len(&self) -> usize {
        self.len
    }

    // -- Read access (same as PlainView, self.storage reborrows as &PlainStorage) --

    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        self.storage.as_bytes()
    }

    #[inline]
    pub fn layout(&self) -> &Layout {
        self.storage.layout()
    }

    #[inline]
    pub fn as_ptr<D: Datum>(&self) -> TractResult<*const D> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.as_ptr_unchecked()) }
    }

    #[inline]
    pub unsafe fn as_ptr_unchecked<D: Datum>(&self) -> *const D {
        self.storage.as_ptr() as *const D
    }

    #[inline]
    pub fn as_slice<D: Datum>(&self) -> TractResult<&[D]> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.as_slice_unchecked()) }
    }

    #[inline]
    pub unsafe fn as_slice_unchecked<D: Datum>(&self) -> &[D] {
        if self.storage.is_empty() {
            &[]
        } else {
            unsafe { std::slice::from_raw_parts(self.as_ptr_unchecked(), self.len) }
        }
    }

    #[inline]
    pub fn to_scalar<D: Datum>(&self) -> TractResult<&D> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.to_scalar_unchecked()) }
    }

    #[inline]
    pub unsafe fn to_scalar_unchecked<D: Datum>(&self) -> &D {
        unsafe { &*(self.storage.as_ptr() as *const D) }
    }

    #[inline]
    pub fn to_array_view<D: Datum>(&self) -> TractResult<ArrayViewD<'_, D>> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.to_array_view_unchecked()) }
    }

    #[inline]
    pub unsafe fn to_array_view_unchecked<D: Datum>(&self) -> ArrayViewD<'_, D> {
        if self.len != 0 {
            unsafe { ArrayViewD::from_shape_ptr(self.shape, self.storage.as_ptr() as *const D) }
        } else {
            ArrayViewD::from_shape(self.shape, &[]).unwrap()
        }
    }

    // -- Mutable access --

    #[inline]
    pub fn as_bytes_mut(&mut self) -> &mut [u8] {
        self.storage.as_bytes_mut()
    }

    #[inline]
    pub fn as_ptr_mut<D: Datum>(&mut self) -> TractResult<*mut D> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.as_ptr_mut_unchecked()) }
    }

    #[inline]
    pub unsafe fn as_ptr_mut_unchecked<D: Datum>(&mut self) -> *mut D {
        self.storage.as_mut_ptr() as *mut D
    }

    #[inline]
    pub fn as_slice_mut<D: Datum>(&mut self) -> TractResult<&mut [D]> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.as_slice_mut_unchecked()) }
    }

    #[inline]
    pub unsafe fn as_slice_mut_unchecked<D: Datum>(&mut self) -> &mut [D] {
        if self.storage.is_empty() {
            &mut []
        } else {
            let len = self.len;
            unsafe { std::slice::from_raw_parts_mut(self.as_ptr_mut_unchecked(), len) }
        }
    }

    #[inline]
    pub fn to_scalar_mut<D: Datum>(&mut self) -> TractResult<&mut D> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.to_scalar_mut_unchecked()) }
    }

    #[inline]
    pub unsafe fn to_scalar_mut_unchecked<D: Datum>(&mut self) -> &mut D {
        unsafe { &mut *(self.storage.as_mut_ptr() as *mut D) }
    }

    #[inline]
    pub fn to_array_view_mut<D: Datum>(&mut self) -> TractResult<ArrayViewMutD<'_, D>> {
        check_for_access::<D>(self.dt)?;
        unsafe { Ok(self.to_array_view_mut_unchecked()) }
    }

    #[inline]
    pub unsafe fn to_array_view_mut_unchecked<D: Datum>(&mut self) -> ArrayViewMutD<'_, D> {
        if self.len != 0 {
            unsafe {
                ArrayViewMutD::from_shape_ptr(self.shape, self.storage.as_mut_ptr() as *mut D)
            }
        } else {
            ArrayViewMutD::from_shape(self.shape, &mut []).unwrap()
        }
    }
}


================================================
FILE: data/src/tensor/storage.rs
================================================
use std::alloc::Layout;
use std::fmt;
use std::hash::Hash;

use crate::TractResult;
use crate::blob::Blob;
use crate::exotic::ExoticFact;
use downcast_rs::{Downcast, impl_downcast};
use dyn_eq::DynEq;

/// Trait abstracting over tensor storage backends.
///
/// `PlainStorage` is the primary implementation backed by a contiguous `Blob`.
/// Non-plain backends are held behind `StorageKind::Exotic(Box<dyn TensorStorage>)`.
pub trait TensorStorage:
    Send + Sync + fmt::Debug + fmt::Display + dyn_eq::DynEq + Downcast
{
    fn byte_len(&self) -> usize;
    fn is_empty(&self) -> bool;
    fn deep_clone(&self) -> Box<dyn TensorStorage>;
    fn as_plain(&self) -> Option<&PlainStorage>;
    fn as_plain_mut(&mut self) -> Option<&mut PlainStorage>;
    fn into_plain(self: Box<Self>) -> Option<PlainStorage>;
    fn dyn_hash(&self, state: &mut dyn std::hash::Hasher);
    /// Build the `ExoticFact` that describes this storage for use in `TypedFact`.
    ///
    /// Plain storage returns `None`. Exotic storages should return the
    /// appropriate fact so that `From<Arc<Tensor>> for TypedFact` preserves
    /// exotic-ness.
    fn exotic_fact(&self, shape: &[usize]) -> TractResult<Option<Box<dyn ExoticFact>>>;
}
impl_downcast!(TensorStorage);
dyn_eq::eq_trait_object!(TensorStorage);

/// Plain, contiguous storage backed by a `Blob`.
#[derive(Eq)]
pub struct PlainStorage(pub(crate) Blob);

impl PlainStorage {
    #[inline]
    pub fn layout(&self) -> &Layout {
        self.0.layout()
    }

    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        self.0.as_bytes()
    }

    #[inline]
    pub fn as_bytes_mut(&mut self) -> &mut [u8] {
        self.0.as_bytes_mut()
    }

    #[inline]
    pub fn as_ptr(&self) -> *const u8 {
        self.0.as_bytes().as_ptr()
    }

    #[inline]
    pub fn as_mut_ptr(&mut self) -> *mut u8 {
        self.0.as_bytes_mut().as_mut_ptr()
    }

    #[inline]
    pub fn into_blob(self) -> Blob {
        self.0
    }
}

impl Default for PlainStorage {
    #[inline]
    fn default() -> Self {
        PlainStorage(Blob::default())
    }
}

impl Clone for PlainStorage {
    #[inline]
    fn clone(&self) -> Self {
        PlainStorage(self.0.clone())
    }
}

impl Hash for PlainStorage {
    #[inline]
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.0.hash(state);
    }
}

impl PartialEq for PlainStorage {
    #[inline]
    fn eq(&self, other: &Self) -> bool {
        self.0 == other.0
    }
}

impl From<Blob> for PlainStorage {
    #[inline]
    fn from(blob: Blob) -> Self {
        PlainStorage(blob)
    }
}

impl std::ops::Deref for PlainStorage {
    type Target = [u8];
    #[inline]
    fn deref(&self) -> &[u8] {
        self.0.as_bytes()
    }
}

impl std::ops::DerefMut for PlainStorage {
    #[inline]
    fn deref_mut(&mut self) -> &mut [u8] {
        self.0.as_bytes_mut()
    }
}

impl fmt::Debug for PlainStorage {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Debug::fmt(&self.0, f)
    }
}

impl fmt::Display for PlainStorage {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Display::fmt(&self.0, f)
    }
}

impl TensorStorage for PlainStorage {
    #[inline]
    fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    #[inline]
    fn byte_len(&self) -> usize {
        self.0.len()
    }

    fn deep_clone(&self) -> Box<dyn TensorStorage> {
        Box::new(PlainStorage(self.0.clone()))
    }

    fn as_plain(&self) -> Option<&PlainStorage> {
        Some(self)
    }

    fn as_plain_mut(&mut self) -> Option<&mut PlainStorage> {
        Some(self)
    }

    fn into_plain(self: Box<Self>) -> Option<PlainStorage> {
        Some(*self)
    }

    fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
        state.write_u8(0);
        state.write(self.0.as_bytes());
    }

    fn exotic_fact(&self, _shape: &[usize]) -> TractResult<Option<Box<dyn ExoticFact>>> {
        Ok(None)
    }
}

/// Inline enum replacing `Box<dyn TensorStorage>`.
///
/// The common `Plain` case stays inline (no heap alloc, no vtable indirection).
/// `Exotic` covers non-plain backends behind a single Box indirection.
#[derive(Debug, PartialEq, Eq)]
#[allow(dead_code)]
pub(crate) enum StorageKind {
    Plain(PlainStorage),
    Exotic(Box<dyn TensorStorage>),
}

impl StorageKind {
    #[inline]
    pub fn as_plain(&self) -> Option<&PlainStorage> {
        match self {
            StorageKind::Plain(d) => Some(d),
            StorageKind::Exotic(o) => o.as_plain(),
        }
    }

    #[inline]
    pub fn as_plain_mut(&mut self) -> Option<&mut PlainStorage> {
        match self {
            StorageKind::Plain(d) => Some(d),
            StorageKind::Exotic(o) => o.as_plain_mut(),
        }
    }

    #[inline]
    pub fn into_plain(self) -> Option<PlainStorage> {
        match self {
            StorageKind::Plain(d) => Some(d),
            StorageKind::Exotic(o) => o.into_plain(),
        }
    }

    #[inline]
    pub fn byte_len(&self) -> usize {
        match self {
            StorageKind::Plain(d) => d.0.len(),
            StorageKind::Exotic(o) => o.byte_len(),
        }
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
        match self {
            StorageKind::Plain(d) => d.0.is_empty(),
            StorageKind::Exotic(o) => o.is_empty(),
        }
    }

    #[inline]
    #[allow(dead_code)]
    pub fn deep_clone(&self) -> StorageKind {
        match self {
            StorageKind::Plain(d) => StorageKind::Plain(d.clone()),
            StorageKind::Exotic(o) => StorageKind::Exotic(o.deep_clone()),
        }
    }

    #[inline]
    pub fn as_storage(&self) -> &dyn TensorStorage {
        match self {
            StorageKind::Plain(d) => d,
            StorageKind::Exotic(o) => o.as_ref(),
        }
    }

    #[inline]
    #[allow(dead_code)]
    pub fn as_storage_mut(&mut self) -> &mut dyn TensorStorage {
        match self {
            StorageKind::Plain(d) => d,
            StorageKind::Exotic(o) => o.as_mut(),
        }
    }

    pub fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
        match self {
            StorageKind::Plain(d) => {
                state.write_u8(0);
                state.write(d.as_bytes())
            }
            StorageKind::Exotic(o) => o.dyn_hash(state),
        }
    }
}


================================================
FILE: data/src/tensor/view.rs
================================================
use super::*;
use crate::internal::*;

#[derive(Clone, Debug)]
enum Indexing<'a> {
    Prefix(usize),
    Custom { shape: &'a [usize], strides: &'a [isize] },
}

#[derive(Clone, Debug)]
pub struct TensorView<'a> {
    pub tensor: &'a Tensor,
    offset_bytes: isize,
    indexing: Indexing<'a>,
}

impl<'a> TensorView<'a> {
    pub unsafe fn from_bytes(
        tensor: &'a Tensor,
        offset_bytes: isize,
        shape: &'a [usize],
        strides: &'a [isize],
    ) -> TensorView<'a> {
        TensorView { tensor, offset_bytes, indexing: Indexing::Custom { shape, strides } }
    }

    pub fn offsetting(tensor: &'a Tensor, coords: &[usize]) -> TractResult<TensorView<'a>> {
        ensure!(
            coords.len() == tensor.rank() && coords.iter().zip(tensor.shape()).all(|(p, d)| p < d),
            "Invalid coords {:?} for shape {:?}",
            coords,
            tensor.shape()
        );
        unsafe { Ok(Self::offsetting_unchecked(tensor, coords)) }
    }

    pub unsafe fn offsetting_unchecked(tensor: &'a Tensor, coords: &[usize]) -> TensorView<'a> {
        let offset_bytes =
            coords.iter().zip(tensor.strides()).map(|(a, b)| *a as isize * b).sum::<isize>()
                * tensor.datum_type().size_of() as isize;
        TensorView {
            tensor,
            offset_bytes,
            indexing: Indexing::Custom { shape: &tensor.shape, strides: &tensor.strides },
        }
    }

    pub fn at_prefix(tensor: &'a Tensor, prefix: &[usize]) -> TractResult<TensorView<'a>> {
        ensure!(
            prefix.len() <= tensor.rank() && prefix.iter().zip(tensor.shape()).all(|(p, d)| p < d),
            "Invalid prefix {:?} for shape {:?}",
            prefix,
            tensor.shape()
        );
        unsafe { Ok(Self::at_prefix_unchecked(tensor, prefix)) }
    }

    pub unsafe fn at_prefix_unchecked(tensor: &'a Tensor, prefix: &[usize]) -> TensorView<'a> {
        let offset_bytes =
            prefix.iter().zip(tensor.strides()).map(|(a, b)| *a as isize * b).sum::<isize>()
                * tensor.datum_type().size_of() as isize;
        TensorView { tensor, offset_bytes, indexing: Indexing::Prefix(prefix.len()) }
    }

    #[inline]
    pub unsafe fn view(tensor: &'a Tensor) -> TensorView<'a> {
        TensorView { tensor, offset_bytes: 0, indexing: Indexing::Prefix(0) }
    }

    #[inline]
    pub fn datum_type(&self) -> DatumType {
        self.tensor.datum_type()
    }

    #[inline]
    pub fn shape(&self) -> &[usize] {
        match &self.indexing {
            Indexing::Prefix(i) => &self.tensor.shape()[*i..],
            Indexing::Custom { shape, .. } => shape,
        }
    }

    #[inline]
    pub fn strides(&self) -> &[isize] {
        match &self.indexing {
            Indexing::Prefix(i) => &self.tensor.strides()[*i..],
            Indexing::Custom { strides, .. } => strides,
        }
    }

    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        match &self.indexing {
            Indexing::Prefix(i) => {
                if *i == 0 {
                    self.tensor.len()
                } else {
                    self.tensor.strides[*i - 1] as usize
                }
            }
            Indexing::Custom { shape, .. } => shape.iter().product(),
        }
    }

    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn valid_bytes(&self) -> usize {
        self.tensor.plain_storage().layout().size() - self.offset_bytes as usize
    }

    #[inline]
    pub fn rank(&self) -> usize {
        match &self.indexing {
            Indexing::Prefix(i) => self.tensor.rank() - i,
            Indexing::Custom { shape, .. } => shape.len(),
        }
    }

    fn check_dt<D: Datum>(&self) -> TractResult<()> {
        self.tensor.check_for_access::<D>()
    }

    fn check_coords(&self, coords: &[usize]) -> TractResult<()> {
        ensure!(
            coords.len() == self.rank()
                && coords.iter().zip(self.shape()).all(|(&x, &dim)| x < dim),
            "Can't access coordinates {:?} of TensorView of shape {:?}",
            coords,
            self.shape(),
        );
        Ok(())
    }

    /// Access the data as a pointer.
    #[inline]
    pub fn as_ptr<D: Datum>(&self) -> TractResult<*const D> {
        self.check_dt::<D>()?;
        Ok(unsafe { self.as_ptr_unchecked() })
    }

    /// Access the data as a pointer.
    #[inline]
    pub unsafe fn as_ptr_unchecked<D: Datum>(&self) -> *const D {
        unsafe { self.tensor.as_ptr_unchecked::<u8>().offset(self.offset_bytes) as *const D }
    }

    /// Access the data as a pointer.
    #[inline]
    pub unsafe fn as_ptr_mut_unchecked<D: Datum>(&mut self) -> *mut D {
        unsafe { self.as_ptr_unchecked::<D>() as *mut D }
    }

    /// Access the data as a mutable pointer.
    #[inline]
    pub fn as_ptr_mut<D: Datum>(&mut self) -> TractResult<*mut D> {
        Ok(self.as_ptr::<D>()? as *mut D)
    }

    /// Access the data as a slice.
    #[inline]
    pub unsafe fn as_slice_unchecked<D: Datum>(&self) -> &'a [D] {
        unsafe { std::slice::from_raw_parts::<D>(self.as_ptr_unchecked(), self.len()) }
    }

    /// Access the data as a slice.
    #[inline]
    pub fn as_slice<D: Datum>(&self) -> TractResult<&'a [D]> {
        self.check_dt::<D>()?;
        unsafe { Ok(self.as_slice_unchecked()) }
    }

    /// Access the data as a mutable slice.
    #[inline]
    pub unsafe fn as_slice_mut_unchecked<D: Datum>(&mut self) -> &mut [D] {
        unsafe { std::slice::from_raw_parts_mut::<D>(self.as_ptr_mut_unchecked(), self.len()) }
    }

    /// Access the data as a mutable slice.
    #[inline]
    pub fn as_slice_mut<D: Datum>(&mut self) -> TractResult<&mut [D]> {
        self.check_dt::<D>()?;
        unsafe { Ok(self.as_slice_mut_unchecked()) }
    }

    #[inline]
    pub unsafe fn offset_bytes(&mut self, offset: isize) {
        self.offset_bytes += offset
    }

    #[inline]
    pub unsafe fn offset_axis_unchecked(&mut self, axis: usize, pos: isize) {
        let stride = self.strides()[axis] * self.datum_type().size_of() as isize;
        unsafe { self.offset_bytes(stride * pos) }
    }

    #[inline]
    pub unsafe fn offset_axis(&mut self, axis: usize, pos: isize) {
        let stride = self.strides()[axis] * self.datum_type().size_of() as isize;
        unsafe { self.offset_bytes(stride * pos) }
    }

    #[inline]
    fn offset_for_coords(&self, coords: &[usize]) -> isize {
        self.strides().iter().zip(coords.as_ref()).map(|(s, c)| *s * *c as isize).sum::<isize>()
    }

    #[inline]
    pub unsafe fn at_unchecked<T: Datum>(&self, coords: impl AsRef<[usize]>) -> &T {
        unsafe {
            self.as_ptr_unchecked::<T>()
                .offset(self.offset_for_coords(coords.as_ref()))
                .as_ref()
                .unwrap()
        }
    }

    #[inline]
    pub unsafe fn at_mut_unchecked<T: Datum>(&mut self, coords: impl AsRef<[usize]>) -> &mut T {
        unsafe {
            self.as_ptr_mut_unchecked::<T>()
                .offset(self.offset_for_coords(coords.as_ref()))
                .as_mut()
                .unwrap()
        }
    }

    #[inline]
    pub fn at<T: Datum>(&self, coords: impl AsRef<[usize]>) -> TractResult<&T> {
        self.check_dt::<T>()?;
        let coords = coords.as_ref();
        self.check_coords(coords)?;
        unsafe { Ok(self.at_unchecked(coords)) }
    }

    #[inline]
    pub fn at_mut<T: Datum>(&mut self, coords: impl AsRef<[usize]>) -> TractResult<&mut T> {
        self.check_dt::<T>()?;
        let coords = coords.as_ref();
        self.check_coords(coords)?;
        unsafe { Ok(self.at_mut_unchecked(coords)) }
    }

    /*
      pub unsafe fn reshaped(&self, shape: impl AsRef<[usize]>) -> TensorView<'a> {
      let shape = shape.as_ref();
      let mut strides: TVec<isize> = shape
      .iter()
      .rev()
      .scan(1, |state, d| {
      let old = *state;
    *state = *state * d;
    Some(old as isize)
    })
    .collect();
    strides.reverse();
    TensorView { shape: shape.into(), strides, ..*self }
    }
    */
}

#[cfg(test)]
mod test {
    use super::TensorView;
    use crate::prelude::Tensor;

    #[test]
    fn test_at_prefix() {
        let a = Tensor::from_shape(&[2, 2], &[1, 2, 3, 4]).unwrap();
        let a_view = TensorView::at_prefix(&a, &[1]).unwrap();
        assert_eq!(a_view.shape(), &[2]);
        assert_eq!(a_view.as_slice::<i32>().unwrap(), &[3, 4]);
    }
}


================================================
FILE: data/src/tensor.rs
================================================
//! `Tensor`, tract main data object of interest.
use crate::TVec;
use crate::blob::Blob;
use crate::datum::{ClampCast, Datum, DatumType, QParams, round_ties_to_even, scale_by};
use crate::dim::TDim;
use crate::internal::*;
use half::f16;
use itertools::{Itertools, izip};
use ndarray::prelude::*;
#[cfg(feature = "complex")]
use num_complex::Complex;
use num_traits::{Float, Zero};
use std::borrow::Cow;
use std::fmt;
use std::hash::Hash;
use std::ops::Range;
use std::sync::Arc;

pub mod litteral;
pub mod plain_view;
pub mod storage;
pub mod view;

pub use plain_view::{PlainView, PlainViewMut};
use storage::{PlainStorage, StorageKind, TensorStorage};

#[derive(Copy, Clone, Default, Debug)]
pub enum Approximation {
    Exact,
    #[default]
    Close,
    Approximate,
    VeryApproximate,
    SuperApproximate,
    UltraApproximate,
    Custom(f32, f32, f32),
}

impl PartialEq for Approximation {
    fn eq(&self, other: &Self) -> bool {
        use Approximation::Custom;
        if let (Custom(aa, ar, ao), Custom(ba, br, bo)) = (self, other) {
            aa == ba && ar == br && bo == ao
        } else {
            std::mem::discriminant(self) == std::mem::discriminant(other)
        }
    }
}

impl Eq for Approximation {}

impl From<bool> for Approximation {
    fn from(b: bool) -> Self {
        if b { Self::Approximate } else { Self::Exact }
    }
}

impl Approximation {
    fn atol_rtol_outliers(&self, dt: &DatumType) -> (f64, f64, f64) {
        use Approximation::*;
        match (self, dt) {
            (Exact, _) => (0.0, 0.0, 0.0),
            (Close, DatumType::F16) => (1e-3, 1e-3, 0.0),
            (Approximate, DatumType::F16) => (1e-3, 5e-3, 0.0),
            (Approximate, qp) if qp.is_quantized() => (qp.zp_scale().1 as f64, 0., 0.0),
            (Close, _) => (1e-7, 1e-7, 0.0),
            (Approximate, _) => (1e-4, 5e-4, 0.0),
            (VeryApproximate, _) => (5e-2, 1e-2, 0.0),
            (SuperApproximate, _) => (0.1, 0.05, 0.0001),
            (UltraApproximate, _) => (0.2, 0.1, 0.0005),
            (Custom(atol, rtol, out), _) => (*atol as _, *rtol as _, *out as _),
        }
    }
}

/// Tensor is a concrete tensor in tract.
pub struct Tensor {
    dt: DatumType,
    shape: TVec<usize>,
    strides: TVec<isize>,
    len: usize,
    storage: StorageKind,
}

unsafe impl Send for Tensor {}
unsafe impl Sync for Tensor {}

impl Hash for Tensor {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        use DatumType::*;
        self.dt.hash(state);
        self.shape.hash(state);
        if let Some(plain) = self.storage.as_plain() {
            plain.layout().align().hash(state);
            unsafe {
                match self.dt {
                    Bool => self.as_slice_unchecked::<bool>().hash(state),
                    I8 => self.as_slice_unchecked::<i8>().hash(state),
                    I16 => self.as_slice_unchecked::<i16>().hash(state),
                    I32 => self.as_slice_unchecked::<i32>().hash(state),
                    I64 => self.as_slice_unchecked::<i64>().hash(state),
                    U8 => self.as_slice_unchecked::<u8>().hash(state),
                    U16 => self.as_slice_unchecked::<u16>().hash(state),
                    U32 => self.as_slice_unchecked::<u32>().hash(state),
                    U64 => self.as_slice_unchecked::<u64>().hash(state),
                    F16 => self.as_slice_unchecked::<i16>().hash(state),
                    F32 => self.as_slice_unchecked::<i32>().hash(state),
                    F64 => self.as_slice_unchecked::<i64>().hash(state),
                    TDim => self.as_slice_unchecked::<crate::dim::TDim>().hash(state),
                    String => self.as_slice_unchecked::<std::string::String>().hash(state),
                    Blob => self.as_slice_unchecked::<crate::blob::Blob>().hash(state),
                    QI8(_) => self.as_slice_unchecked::<i8>().hash(state),
                    QU8(_) => self.as_slice_unchecked::<u8>().hash(state),
                    QI32(_) => self.as_slice_unchecked::<i32>().hash(state),
                    #[cfg(feature = "complex")]
                    ComplexI16 => self.as_slice_unchecked::<Complex<i16>>().hash(state),
                    #[cfg(feature = "complex")]
                    ComplexI32 => self.as_slice_unchecked::<Complex<i32>>().hash(state),
                    #[cfg(feature = "complex")]
                    ComplexI64 => self.as_slice_unchecked::<Complex<i64>>().hash(state),
                    #[cfg(feature = "complex")]
                    ComplexF16 => self.as_slice_unchecked::<Complex<i16>>().hash(state),
                    #[cfg(feature = "complex")]
                    ComplexF32 => self.as_slice_unchecked::<Complex<i32>>().hash(state),
                    #[cfg(feature = "complex")]
                    ComplexF64 => self.as_slice_unchecked::<Complex<i64>>().hash(state),
                }
            }
        } else {
            self.storage.dyn_hash(state);
        }
    }
}

impl Clone for Tensor {
    fn clone(&self) -> Tensor {
        self.deep_clone()
    }
}

impl Default for Tensor {
    fn default() -> Tensor {
        litteral::tensor0(0f32)
    }
}

impl Drop for Tensor {
    fn drop(&mut self) {
        if self.is_plain() {
            macro_rules! drop_in_place {
                ($t: ty) => {
                    if self.dt == <$t>::datum_type() {
                        unsafe {
                            let slice = self.as_slice_mut_unchecked::<$t>();
                            std::ptr::drop_in_place(slice as *mut [$t]);
                        }
                    }
                };
            }
            drop_in_place!(Blob);
            drop_in_place!(String);
            drop_in_place!(TDim);
        }
        // StorageKind::Exotic drops via Box<dyn TensorStorage> automatically
    }
}

#[allow(unreachable_code)]
pub fn vector_size() -> usize {
    #[cfg(target_arch = "x86_64")]
    {
        return if is_x86_feature_detected!("avx512f") { 512 / 8 } else { 256 / 8 };
    }
    128 / 8
}

impl Tensor {
    #[inline]
    fn plain_storage(&self) -> &PlainStorage {
        self.storage.as_plain().expect("Non-plain storage")
    }

    #[inline]
    fn plain_storage_mut(&mut self) -> &mut PlainStorage {
        self.storage.as_plain_mut().expect("Non-plain storage")
    }

    pub fn storage_as<T: TensorStorage>(&self) -> Option<&T> {
        self.storage.as_storage().downcast_ref::<T>()
    }

    pub fn try_storage_as<T: TensorStorage>(&self) -> TractResult<&T> {
        self.storage_as::<T>().context("Unexpected tensor storage type")
    }

    pub fn from_storage(
        dt: DatumType,
        shape: &[usize],
        storage: impl TensorStorage + 'static,
    ) -> Tensor {
        let len = shape.iter().product::<usize>();
        let strides = Self::natural_strides(shape);
        Tensor {
            dt,
            shape: shape.into(),
            strides,
            len,
            storage: StorageKind::Exotic(Box::new(storage)),
        }
    }

    /// Returns an immutable [`PlainView`] if this tensor has plain storage.
    #[inline]
    pub fn as_plain(&self) -> Option<PlainView<'_>> {
        let storage = self.storage.as_plain()?;
        Some(PlainView::new(self, storage))
    }

    /// Returns an immutable [`PlainView`], or an error if storage is not plain.
    #[inline]
    pub fn try_as_plain(&self) -> TractResult<PlainView<'_>> {
        self.as_plain().context("Tensor storage is not plain")
    }

    /// Returns `true` if this tensor uses plain (contiguous) storage.
    #[inline]
    pub fn is_plain(&self) -> bool {
        self.storage.as_plain().is_some()
    }

    /// Returns `true` if this tensor uses exotic (non-plain) storage.
    #[inline]
    pub fn is_exotic(&self) -> bool {
        !self.is_plain()
    }

    /// Build the `ExoticFact` matching this tensor's storage, or `None` for plain tensors.
    pub fn exotic_fact(&self) -> TractResult<Option<Box<dyn crate::exotic::ExoticFact>>> {
        self.storage.as_storage().exotic_fact(&self.shape)
    }

    /// Returns a mutable [`PlainViewMut`] if this tensor has plain storage.
    #[inline]
    pub fn as_plain_mut(&mut self) -> Option<PlainViewMut<'_>> {
        let storage = self.storage.as_plain_mut()?;
        Some(PlainViewMut::new(self.dt, &self.shape, &self.strides, self.len, storage))
    }

    /// Returns a mutable [`PlainViewMut`], or an error if storage is not plain.
    #[inline]
    pub fn try_as_plain_mut(&mut self) -> TractResult<PlainViewMut<'_>> {
        self.as_plain_mut().context("Tensor storage is not plain")
    }

    /// Create an uninitialized tensor (dt as type paramater).
    #[inline]
    pub unsafe fn uninitialized<T: Datum>(shape: &[usize]) -> TractResult<Tensor> {
        unsafe { Self::uninitialized_dt(T::datum_type(), shape) }
    }

    /// Create an uninitialized tensor (dt as regular parameter).
    #[inline]
    pub unsafe fn uninitialized_dt(dt: DatumType, shape: &[usize]) -> TractResult<Tensor> {
        unsafe { Self::uninitialized_aligned_dt(dt, shape, vector_size()) }
    }

    /// Create an uninitialized tensor with a given alignment (in bytes).
    #[inline]
    pub unsafe fn uninitialized_aligned<T: Datum>(
        shape: &[usize],
        alignment: usize,
    ) -> TractResult<Tensor> {
        unsafe { Self::uninitialized_aligned_dt(T::datum_type(), shape, alignment) }
    }

    /// Create an uninitialized tensor with a given alignment (in bytes).
    pub unsafe fn uninitialized_aligned_dt(
        dt: DatumType,
        shape: &[usize],
        alignment: usize,
    ) -> TractResult<Tensor> {
        let bytes = shape.iter().cloned().product::<usize>() * dt.size_of();
        let storage = StorageKind::Plain(PlainStorage::from(unsafe {
            Blob::new_for_size_and_align(bytes, alignment)
        }));
        let mut tensor = Tensor { strides: tvec!(), dt, shape: shape.into(), storage, len: 0 };
        if tensor.shape.len() == 0 {
            tensor.len = 1;
        } else {
            tensor.update_strides_and_len();
        }
        if !tensor.storage.is_empty() {
            if dt == String::datum_type() || dt == Blob::datum_type() {
                // assumes zero-initialized string and blob are valid
                tensor.plain_storage_mut().as_bytes_mut().fill(0);
            } else if dt == TDim::datum_type() {
                unsafe {
                    tensor
                        .as_slice_mut_unchecked::<TDim>()
                        .iter_mut()
                        .for_each(|dim| std::ptr::write(dim, TDim::zero()))
                }
            } else if cfg!(debug_assertions) {
                assert!(dt.is_copy());
                if dt == DatumType::F32 {
                    tensor.fill_t(f32::NAN).unwrap();
                } else {
                    // safe, non copy types have been dealt with
                    tensor.as_bytes_mut().iter_mut().for_each(|x| *x = (-1i8) as u8);
                }
            }
        }
        Ok(tensor)
    }

    pub fn stack_tensors(
        axis: usize,
        tensors: &[impl std::borrow::Borrow<Tensor>],
    ) -> TractResult<Tensor> {
        ensure!(tensors.len() > 0);
        let rank = tensors[0].borrow().rank();
        ensure!(axis < rank);
        ensure!(tensors.iter().all(|t| t.borrow().rank() == rank));
        let dt = tensors[0].borrow().datum_type();
        ensure!(tensors.iter().all(|t| t.borrow().datum_type() == dt));
        let mut shape: TVec<usize> = tensors[0].borrow().shape().into();
        for ax in 0..rank {
            if ax != axis {
                ensure!(tensors.iter().all(|t| t.borrow().shape()[ax] == shape[ax]));
            }
        }
        shape[axis] = tensors.iter().map(|v| v.borrow().shape()[axis]).sum();
        unsafe {
            let mut result = Tensor::uninitialized_dt(dt, &shape)?;
            if dt.is_copy() && shape[..axis].iter().all(|d| *d == 1) {
                let mut offset = 0isize;
                for v in tensors {
                    let v = v.borrow();
                    let len = v.storage.byte_len();
                    std::ptr::copy_nonoverlapping(
                        v.plain_storage().as_ptr(),
                        result.plain_storage_mut().as_mut_ptr().offset(offset),
                        len,
                    );
                    offset += len as isize;
                }
            } else {
                let mut offset = 0;
                for t in tensors {
                    let t = t.borrow();
                    let len = t.shape()[axis];
                    result.assign_slice_from_resolved(offset..offset + len, t, 0..len, axis);
                    offset += len;
                }
            }

            Ok(result)
        }
    }

    pub fn clear<T: Datum + num_traits::Zero + Clone>(&mut self) -> TractResult<()> {
        self.fill_t(T::zero())
    }

    pub fn zero<T: Datum + num_traits::Zero>(shape: &[usize]) -> TractResult<Tensor> {
        unsafe {
            let mut t = Tensor::uninitialized::<T>(shape)?;
            t.clear::<T>()?;
            Ok(t)
        }
    }

    pub fn zero_scalar<T: Datum + num_traits::Zero>() -> TractResult<Tensor> {
        Tensor::zero::<T>(&[])
    }

    pub fn zero_scalar_dt(dt: DatumType) -> TractResult<Tensor> {
        Tensor::zero_dt(dt, &[])
    }

    pub fn zero_dt(dt: DatumType, shape: &[usize]) -> TractResult<Tensor> {
        Tensor::zero_aligned_dt(dt, shape, vector_size())
    }

    pub fn fill_t<T: Datum + Clone>(&mut self, value: T) -> TractResult<()> {
        self.try_as_plain_mut()?
            .as_slice_mut::<T>()?
            .iter_mut()
            .for_each(|item| *item = value.clone());
        Ok(())
    }

    pub fn zero_aligned_dt(
        dt: DatumType,
        shape: &[usize],
        alignment: usize,
    ) -> TractResult<Tensor> {
        if shape.iter().product::<usize>() == 0 {
            unsafe { return Tensor::uninitialized_dt(dt, shape) };
        }
        if dt.is_quantized() {
            unsafe {
                let mut t = Tensor::uninitialized_dt(dt, shape)?;
                let zp = dt.zp_scale().0;
                match dt.unquantized() {
                    DatumType::I8 => t
                        .try_as_plain_mut()?
                        .as_slice_mut::<i8>()?
                        .iter_mut()
                        .for_each(|item| *item = zp as _),
                    DatumType::U8 => t
                        .try_as_plain_mut()?
                        .as_slice_mut::<u8>()?
                        .iter_mut()
                        .for_each(|item| *item = zp as _),
                    DatumType::I32 => t
                        .try_as_plain_mut()?
                        .as_slice_mut::<i32>()?
                        .iter_mut()
                        .for_each(|item| *item = zp as _),
                    _ => unreachable!(),
                }
                Ok(t)
            }
        } else if dt == DatumType::Bool {
            let mut t = unsafe { Tensor::uninitialized_dt(dt, shape)? };
            t.fill_t::<bool>(false)?;
            Ok(t)
        } else {
            dispatch_zerolike!(Self::zero_aligned(dt)(shape, alignment))
        }
    }

    pub fn zero_aligned<T: Datum + num_traits::Zero>(
        shape: &[usize],
        alignment: usize,
    ) -> TractResult<Tensor> {
        unsafe {
            let mut tensor = Self::uninitialized_aligned::<T>(shape, alignment)?;
            tensor.clear::<T>()?;
            Ok(tensor)
        }
    }

    /// Create a tensor with a given shape and a slice of elements.
    /// The data is copied and aligned to size of T.
    pub fn from_shape<T: Datum + Copy>(shape: &[usize], data: &[T]) -> TractResult<Tensor> {
        Self::from_shape_align(shape, data, vector_size())
    }

    /// Create a tensor with a given shape and a slice of elements.
    /// The data is copied and aligned to given alignment.
    pub fn from_shape_align<T: Datum + Copy>(
        shape: &[usize],
        data: &[T],
        align: usize,
    ) -> TractResult<Tensor> {
        ensure!(
            data.len() == shape.iter().product::<usize>(),
            "Shape product must be equal to data length"
        );
        unsafe {
            let bytes = std::slice::from_raw_parts(
                data.as_ptr() as *const u8,
                data.len() * T::datum_type().size_of(),
            );
            let dt = T::datum_type();
            Self::from_raw_dt_align(dt, shape, bytes, align)
        }
    }

    /// Create a tensor from raw data.
    ///
    /// It copies the data, aligning it to the size of T.
    pub unsafe fn from_raw<T: Datum>(shape: &[usize], content: &[u8]) -> TractResult<Tensor> {
        unsafe { Tensor::from_raw_dt(T::datum_type(), shape, content) }
    }

    pub unsafe fn from_raw_aligned<T: Datum>(
        shape: &[usize],
        content: &[u8],
        align: usize,
    ) -> TractResult<Tensor> {
        unsafe { Tensor::from_raw_dt_align(T::datum_type(), shape, content, align) }
    }

    pub unsafe fn from_raw_dt(
        dt: DatumType,
        shape: &[usize],
        content: &[u8],
    ) -> TractResult<Tensor> {
        unsafe { Self::from_raw_dt_align(dt, shape, content, vector_size()) }
    }

    pub unsafe fn from_raw_dt_align(
        dt: DatumType,
        shape: &[usize],
        content: &[u8],
        align: usize,
    ) -> TractResult<Tensor> {
        let mut tensor = unsafe { Tensor::uninitialized_aligned_dt(dt, shape, align) }?;
        tensor.as_bytes_mut().copy_from_slice(content);
        Ok(tensor)
    }

    pub unsafe fn from_slice_align<T: Datum>(content: &[T], align: usize) -> TractResult<Tensor> {
        let bytes = if content.len() == 0 {
            &[]
        } else {
            unsafe {
                std::slice::from_raw_parts(
                    content.as_ptr() as *const u8,
                    content.len() * T::datum_type().size_of(),
                )
            }
        };
        unsafe { Self::from_raw_dt_align(T::datum_type(), &[content.len()], bytes, align) }
    }

    /// Get the number of dimensions (or axes) of the tensor.
    #[inline]
    pub fn rank(&self) -> usize {
        self.shape.len()
    }

    /// Get the shape of the tensor.
    #[inline]
    pub fn shape(&self) -> &[usize] {
        &self.shape
    }

    /// Get the number of values in the tensor.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        self.len
    }

    /// Get the number of valeus in the tensor.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn volume(&self) -> usize {
        self.len
    }

    /// Get the shape of the tensor.
    #[inline]
    pub fn strides(&self) -> &[isize] {
        &self.strides
    }

    fn update_strides_and_len(&mut self) {
        self.strides.clear();
        if self.shape.len() == 0 {
            self.len = 1;
            return;
        }
        compute_natural_stride_to(&mut self.strides, &self.shape);
        self.len = unsafe { *self.strides.get_unchecked(0) as usize * self.shape.get_unchecked(0) };
    }

    /// Force the tensor shape, no consistency check.
    pub unsafe fn set_shape_unchecked(&mut self, shape: &[usize]) {
        if shape != &*self.shape {
            self.shape.clear();
            self.shape.extend_from_slice(shape);
            self.update_strides_and_len();
        }
    }

    /// Force the tensor shape and strides, no consistency check.
    pub unsafe fn set_geometry_unchecked(&mut self, shape: &[usize], strides: &[isize]) {
        self.shape.clear();
        self.shape.extend_from_slice(shape);
        self.strides.clear();
        self.strides.extend_from_slice(strides);
    }

    /// Force the tensor shape.
    pub fn set_shape(&mut self, shape: &[usize]) -> TractResult<()> {
        if self.len() != shape.iter().product::<usize>() {
            bail!("Invalid reshape {:?} to {:?}", self.shape, shape);
        }
        unsafe { self.set_shape_unchecked(shape) }
        Ok(())
    }

    pub fn permute_axes(self, axes: &[usize]) -> TractResult<Tensor> {
        ensure!(axes.iter().duplicates().next().is_none());
        ensure!(axes.iter().all(|a| *a < self.rank()));
        unsafe {
            #[inline]
            unsafe fn permute<T: Datum>(axes: &[usize], input: Tensor) -> Tensor {
                unsafe { input.into_array_unchecked::<T>().permuted_axes(axes).into_tensor() }
            }
            let dt = self.datum_type();
            let mut t = dispatch_datum_by_size!(permute(self.datum_type())(axes, self));
            t.set_datum_type(dt);
            Ok(t)
        }
    }

    pub fn move_axis(self, from: usize, to: usize) -> TractResult<Tensor> {
        let mut permutation: Vec<usize> = (0..self.rank()).collect();
        permutation.remove(from);
        permutation.insert(to, from);
        self.permute_axes(&permutation)
    }

    pub fn collapse_axis_with_next(mut self, axis: usize) -> Tensor {
        let removed = self.shape.remove(axis + 1);
        self.shape[axis] *= removed;
        self.update_strides_and_len();
        self
    }

    pub fn split_axis(mut self, axis: usize, outer_dim: usize) -> TractResult<Tensor> {
        if self.shape[axis] % outer_dim != 0 {
            bail!(
                "Invalid axis split, shape is {:?}, axis split at {}, outer {}",
                self.shape,
                axis,
                outer_dim
            );
        }
        self.shape.insert(axis + 1, self.shape[axis] / outer_dim);
        self.shape[axis] = outer_dim;
        self.update_strides_and_len();
        Ok(self)
    }

    /// Reshape the tensor to `shape`.
    pub fn into_shape(mut self, shape: &[usize]) -> TractResult<Tensor> {
        self.set_shape(shape)?;
        Ok(self)
    }

    pub fn insert_axis(&mut self, axis: usize) -> TractResult<()> {
        self.shape.insert(axis, 1);
        self.strides.insert(axis, self.strides.get(axis).copied().unwrap_or(1));
        Ok(())
    }

    pub fn remove_axis(&mut self, axis: usize) -> TractResult<()> {
        ensure!(self.shape[axis] == 1, "Remove a non-1 axis: axis {} in {:?}", axis, self);
        self.shape.remove(axis);
        self.strides.remove(axis);
        Ok(())
    }

    pub fn broadcast_into_rank(mut self, rank: usize) -> TractResult<Tensor> {
        self.broadcast_to_rank(rank)?;
        self.update_strides_and_len();
        Ok(self)
    }

    pub fn broadcast_to_rank(&mut self, rank: usize) -> TractResult<()> {
        if rank < self.rank() {
            bail!("Can only broadcast to higher rank")
        }
        while self.shape.len() < rank {
            self.shape.insert(0, 1)
        }
        self.update_strides_and_len();
        Ok(())
    }

    pub fn broadcast_scalar_to_shape(&self, shape: &[usize]) -> TractResult<Tensor> {
        if self.rank() > 0 {
            bail!("broadcast_scalar_to_shape called on {:?}, which is not a salar", self);
        }
        unsafe fn make<T: Datum>(src: &Tensor, dst: &mut Tensor) {
            unsafe {
                let value: &T = src.to_scalar_unchecked::<T>();
                dst.as_slice_mut_unchecked::<T>().iter_mut().for_each(|item| *item = value.clone())
            };
        }
        unsafe {
            let mut t = Tensor::uninitialized_dt(self.datum_type(), shape)?;
            dispatch_datum_by_size!(make(self.datum_type())(self, &mut t));
            Ok(t)
        }
    }

    fn broadcast_to_shape_t<T: Datum>(&self, shape: &[usize]) -> TractResult<Tensor> {
        unsafe {
            let view = self.to_array_view_unchecked::<T>();
            let mut output = view
                .broadcast(shape)
                .with_context(|| format!("Broadcasting {view:?} to {shape:?}"))?
                .into_owned()
                .into_tensor();
            output.set_datum_type(self.datum_type());
            Ok(output)
        }
    }

    pub fn broadcast_to_shape(&self, shape: &[usize]) -> TractResult<Tensor> {
        dispatch_datum!(Self::broadcast_to_shape_t(self.dt)(self, shape))
    }

    pub fn broadcast_vector_to_shape(&self, shape: &[usize], axis: usize) -> TractResult<Tensor> {
        ensure!(self.rank() == 1);
        ensure!(shape[axis] == self.len());
        if !self.datum_type().is_copy() {
            let mut vec_shape = vec![1; shape.len()];
            vec_shape[axis] = self.len();
            return self.clone().into_shape(&vec_shape)?.broadcast_to_shape(shape);
        }
        unsafe {
            let mut output = Tensor::uninitialized_dt(self.datum_type(), shape)?;
            if output.len() == 0 {
                return Ok(output);
            }
            let inner_len = shape[axis + 1..].iter().product::<usize>();

            unsafe fn splat<T>(input: &Tensor, output: &mut Tensor, inner_len: usize)
            where
                T: Datum + Copy,
            {
                unsafe {
                    for ix in 0..input.len() {
                        let value: T = input.as_slice_unchecked()[ix];
                        output.as_slice_mut_unchecked::<T>()[ix * inner_len..(ix + 1) * inner_len]
                            .iter_mut()
                            .for_each(|item| *item = value);
                    }
                }
            }
            dispatch_copy_by_size!(splat(self.datum_type())(&self, &mut output, inner_len));

            let outer_len = shape[0..axis].iter().product::<usize>();
            let repeat_bytes_len = inner_len * self.as_bytes().len();
            let bytes = output.as_bytes_mut();
            for ix in 1..outer_len {
                bytes.copy_within(0..repeat_bytes_len, ix * repeat_bytes_len);
            }

            Ok(output)
        }
    }

    pub fn assign_slice(
        &mut self,
        range: impl std::ops::RangeBounds<usize>,
        src: &Tensor,
        src_range: impl std::ops::RangeBounds<usize>,
        axis: usize,
    ) -> TractResult<()> {
        ensure!(self.rank() == src.rank());
        ensure!(axis < self.rank());
        let range = clip_range_bounds(self.shape[axis], range);
        let src_range = clip_range_bounds(src.shape[axis], src_range);
        ensure!(
            src.datum_type() == self.datum_type(),
            "Attempt to assign into {:?} from {:?}, datum type mismatch",
            self.datum_type(),
            src.datum_type()
        );
        ensure!(
            src_range.len() == range.len(),
            "Attempt to assign a range of {:?} from a range of {:?}",
            range,
            src_range,
        );
        ensure!(
            itertools::izip!(0.., self.shape(), src.shape())
                .all(|(ix, dst, src)| ix == axis || src == dst),
            "Attempt to assign a {}-axis range of {:?} from a range of {:?}",
            axis,
            self,
            src
        );
        ensure!(
            src_range.end <= src.shape()[axis],
            "Assigning from invalid slice (axis {}, {:?}) of {:?}",
            axis,
            src_range,
            src
        );
        ensure!(
            range.end <= self.shape()[axis],
            "Assigning to invalid slice (axis {}, {:?}) of {:?}",
            axis,
            range,
            self
        );
        unsafe { self.assign_slice_from_resolved(range, src, src_range, axis) };
        Ok(())
    }

    pub unsafe fn assign_slice_unchecked(
        &mut self,
        range: impl std::ops::RangeBounds<usize>,
        src: &Tensor,
        src_range: impl std::ops::RangeBounds<usize>,
        axis: usize,
    ) {
        let range = clip_range_bounds(self.shape[axis], range);
        let src_range = clip_range_bounds(src.shape[axis], src_range);
        unsafe { self.assign_slice_from_resolved(range, src, src_range, axis) };
    }

    #[allow(clippy::ptr_eq)]
    unsafe fn assign_slice_from_resolved(
        &mut self,
        range: std::ops::Range<usize>,
        src: &Tensor,
        src_range: std::ops::Range<usize>,
        axis: usize,
    ) {
        unsafe {
            use ndarray::Slice;
            unsafe fn assign_slice_t<T: Datum>(
                to: &mut Tensor,
                to_range: Range<usize>,
                from: &Tensor,
                from_range: Range<usize>,
                axis: usize,
            ) {
                unsafe {
                    to.to_array_view_mut_unchecked::<T>()
                        .slice_axis_mut(Axis(axis), Slice::from(to_range))
                        .assign(
                            &from
                                .to_array_view_unchecked::<T>()
                                .slice_axis(Axis(axis), Slice::from(from_range)),
                        )
                }
            }
            if self.datum_type().is_copy() && self.shape[..axis].iter().all(|d| *d == 1) {
                let stride = self.strides[axis] as usize * self.datum_type().size_of();
                let dst_start = (stride * range.start) as isize;
                let src_start = (stride * src_range.start) as isize;
                let len = stride * range.len();
                if len > 0 {
                    if self.plain_storage().as_ptr() != src.plain_storage().as_ptr() {
                        std::ptr::copy_nonoverlapping(
                            src.plain_storage().as_ptr().offset(src_start),
                            self.plain_storage_mut().as_mut_ptr().offset(dst_start),
                            len,
                        );
                    } else {
                        std::ptr::copy(
                            src.plain_storage().as_ptr().offset(src_start),
                            self.plain_storage_mut().as_mut_ptr().offset(dst_start),
                            len,
                        );
                    }
                }
            } else {
                dispatch_datum!(assign_slice_t(self.datum_type())(
                    self, range, src, src_range, axis
                ));
            }
        }
    }

    /// Get the datum type of the tensor.
    #[inline]
    pub fn datum_type(&self) -> DatumType {
        self.dt
    }

    /// Set the datum type of the tensor.
    #[inline]
    pub unsafe fn set_datum_type(&mut self, dt: DatumType) {
        self.dt = dt
    }

    /// Dump the tensor in a human readable form.
    ///
    /// `force_full` will force the tensor to be dump in full even if it is big.
    pub fn dump(&self, force_full: bool) -> TractResult<String> {
        if self.is_exotic() {
            return Ok(format!(
                "{},{:?} (non-plain storage)",
                self.shape.iter().join(","),
                self.dt,
            ));
        }
        unsafe fn dump_t<D: Datum>(tensor: &Tensor, n: usize) -> String {
            unsafe {
                if let Some(qp) = tensor.datum_type().qparams() {
                    let integers = tensor.cast_to::<i32>().unwrap();
                    integers.as_slice_unchecked::<i32>()[0..n]
                        .iter()
                        .map(|x| format!("[{}]({})", x, qp.dq(*x)))
                        .join(", ")
                } else {
                    tensor.as_slice_unchecked::<D>()[0..n].iter().join(", ")
                }
            }
        }
        unsafe {
            let trunc = self.len() > 12 && !force_full;
            let data = dispatch_datum!(dump_t(self.datum_type())(
                self,
                if trunc { 12 } else { self.len() }
            ));
            Ok(format!(
                "{},{:?} {}{}",
                self.shape.iter().join(","),
                self.dt,
                data,
                if trunc { "..." } else { "" }
            ))
        }
    }

    /// Compare two tensors, allowing for rounding errors.
    pub fn close_enough(
        &self,
        other: &Self,
        approx: impl Into<Approximation> + std::fmt::Debug,
    ) -> TractResult<()> {
        let approx = approx.into();
        if self.shape() != other.shape() {
            bail!("Shape mismatch {:?} != {:?}", self.shape(), other.shape())
        }
        let (atol, rtol, outliers) = approx.atol_rtol_outliers(&self.datum_type());
        let ma = self.cast_to::<f32>()?;
        let ma = ma.to_plain_array_view::<f32>()?;
        let mb = other.cast_to::<f32>()?;
        let mb = mb.to_plain_array_view::<f32>()?;
        let mut first_outlier = None;
        let mut outliers_count = 0;
        ndarray::indices_of(&ma).into_iter().for_each(|indices| {
            let a = ma[&indices];
            let b = mb[&indices];
            if !((a.is_nan() && b.is_nan())
                || (a.is_infinite() && b.is_infinite() && a.signum() == b.signum())
                || (a - b).abs() <= atol as f32 + rtol as f32 * b.abs())
            {
                if outliers_count == 0 {
                    first_outlier = Some(indices.as_array_view().to_vec());
                }
                outliers_count += 1;
            }
        });
        if self.volume() > 0 && outliers_count as f64 / self.volume() as f64 > outliers {
            let indices = first_outlier.unwrap();
            let a = ma[&*indices];
            let b = mb[&*indices];
            bail!(
                "Mismatch. First outlier: {:?} for {:?}) at {:?} {} != {}. Outliers: {} / {} = {:0.5} > {:0.5}.",
                approx,
                self.datum_type(),
                indices,
                a,
                b,
                outliers_count,
                self.volume(),
                outliers_count as f64 / self.volume() as f64,
                outliers
            );
        }
        Ok(())
    }

    /// Transform the tensor into a `ndarray::Array`.
    pub fn into_plain_array<D: Datum>(self) -> TractResult<ArrayD<D>> {
        Ok(self.to_plain_array_view::<D>()?.to_owned())
    }

    /// Transform the tensor into a `ndarray::Array`.
    pub unsafe fn into_array_unchecked<D: Datum>(self) -> ArrayD<D> {
        unsafe { self.to_array_view_unchecked::<D>().to_owned() }
    }

    /// Returns a plain array view of the tensor.
    ///
    /// Errors if the storage is not plain or the datum type does not match `D`.
    #[inline]
    pub fn to_plain_array_view<D: Datum>(&self) -> TractResult<ArrayViewD<'_, D>> {
        self.try_as_plain()?.to_array_view::<D>()
    }

    /// Returns a mutable plain array view of the tensor.
    ///
    /// Errors if the storage is not plain or the datum type does not match `D`.
    #[inline]
    pub fn to_plain_array_view_mut<D: Datum>(&mut self) -> TractResult<ArrayViewMutD<'_, D>> {
        self.check_for_access::<D>()?;
        ensure!(self.storage.as_plain_mut().is_some(), "Tensor storage is not plain");
        unsafe { Ok(self.to_array_view_mut_unchecked()) }
    }

    fn check_for_access<D: Datum>(&self) -> TractResult<()> {
        ensure!(
            self.datum_type().unquantized() == D::datum_type().unquantized(),
            "Tensor datum type error: tensor is {:?}, accessed as {:?}",
            self.datum_type(),
            D::datum_type(),
        );
        Ok(())
    }

    /// Transform the data as a `ndarray::Array`.
    pub unsafe fn to_array_view_unchecked<D: Datum>(&self) -> ArrayViewD<'_, D> {
        if self.len() != 0 {
            unsafe {
                ArrayViewD::from_shape_ptr(&*self.shape, self.plain_storage().as_ptr() as *const D)
            }
        } else {
            ArrayViewD::from_shape(&*self.shape, &[]).unwrap()
        }
    }

    /// Transform the data as a mutable `ndarray::Array`.
    pub unsafe fn to_array_view_mut_unchecked<D: Datum>(&mut self) -> ArrayViewMutD<'_, D> {
        if self.len() != 0 {
            unsafe {
                let ptr = self.plain_storage_mut().as_mut_ptr() as *mut D;
                ArrayViewMutD::from_shape_ptr(&*self.shape, ptr)
            }
        } else {
            ArrayViewMutD::from_shape(&*self.shape, &mut []).unwrap()
        }
    }

    /// Access the data as a pointer.
    pub fn as_ptr<D: Datum>(&self) -> TractResult<*const D> {
        self.check_for_access::<D>()?;
        Ok(self.plain_storage().as_ptr() as *const D)
    }

    /// Access the data as a pointer.
    pub unsafe fn as_ptr_unchecked<D: Datum>(&self) -> *const D {
        self.plain_storage().as_ptr() as *const D
    }

    /// Access the data as a pointer.
    pub unsafe fn as_ptr_mut_unchecked<D: Datum>(&mut self) -> *mut D {
        self.plain_storage_mut().as_mut_ptr() as *mut D
    }

    /// Access the data as a mutable pointer.
    pub fn as_ptr_mut<D: Datum>(&mut self) -> TractResult<*mut D> {
        self.as_ptr::<D>().map(|p| p as *mut D)
    }

    /// Access the data as a slice.
    pub unsafe fn as_slice_unchecked<D: Datum>(&self) -> &[D] {
        if self.storage.byte_len() == 0 {
            &[]
        } else {
            unsafe { std::slice::from_raw_parts::<D>(self.as_ptr_unchecked(), self.len()) }
        }
    }

    /// Access the data as a mutable slice.
    pub unsafe fn as_slice_mut_unchecked<D: Datum>(&mut self) -> &mut [D] {
        if self.storage.byte_len() == 0 {
            &mut []
        } else {
            unsafe { std::slice::from_raw_parts_mut::<D>(self.as_ptr_mut_unchecked(), self.len()) }
        }
    }

    /// Make the tensor a scalar tensor (assumes it contains a single value).
    pub fn to_scalar_tensor(&self) -> TractResult<Tensor> {
        fn to_scalar_tensor_t<D: Datum>(t: &Tensor) -> TractResult<Tensor> {
            Ok(litteral::tensor0(t.try_as_plain()?.to_scalar::<D>()?.clone()))
        }
        dispatch_datum!(to_scalar_tensor_t(self.datum_type())(self))
    }

    /// Access the data as a scalar.
    pub unsafe fn to_scalar_unchecked<D: Datum>(&self) -> &D {
        unsafe { &*(self.plain_storage().as_ptr() as *const D) }
    }

    /// Mutable access the data as a scalar.
    pub fn to_scalar_mut<D: Datum>(&mut self) -> TractResult<&mut D> {
        self.check_for_access::<D>()?;
        if self.len() == 0 {
            bail!("to_scalar_mut called on empty tensor ({:?})", self)
        }
        if self.len() > 1 {
            bail!("to_scalar called on a tensor with multiple values ({:?})", self)
        }
        unsafe { Ok(self.to_scalar_mut_unchecked()) }
    }

    /// Mutable access the data as a scalar.
    pub unsafe fn to_scalar_mut_unchecked<D: Datum>(&mut self) -> &mut D {
        unsafe { &mut *(self.plain_storage_mut().as_mut_ptr() as *mut D) }
    }

    pub fn as_bytes(&self) -> &[u8] {
        self.plain_storage().as_bytes()
    }

    pub fn as_bytes_mut(&mut self) -> &mut [u8] {
        self.plain_storage_mut().as_bytes_mut()
    }

    unsafe fn is_uniform_t<T: Datum>(&self) -> bool {
        let slice = unsafe { self.as_slice_unchecked::<T>() };
        slice[1..].iter().all(|x| x == &slice[0])
    }

    pub fn is_uniform(&self) -> bool {
        if self.is_exotic() {
            return false;
        }
        if self.len() <= 1 {
            return true;
        }
        unsafe { dispatch_datum!(Tensor::is_uniform_t(self.datum_type())(self)) }
    }

    unsafe fn as_uniform_t<T: Datum>(&self) -> Tensor {
        let v: T = unsafe { self.as_slice_unchecked::<T>() }[0].clone();
        litteral::tensor0(v)
    }

    pub fn as_uniform(&self) -> Option<Tensor> {
        if self.len() >= 1 && self.is_uniform() {
            unsafe {
                let mut t = dispatch_datum!(Tensor::as_uniform_t(self.datum_type())(self));
                t.set_datum_type(self.datum_type());
                Some(t)
            }
        } else {
            None
        }
    }

    pub fn is_all_zero(&self) -> TractResult<bool> {
        Ok(self.len() == 0 || self.as_uniform().map(|t| t.is_zero().unwrap()).unwrap_or(false))
    }

    pub fn is_zero(&self) -> TractResult<bool> {
        Ok(self == &Tensor::zero_scalar_dt(self.dt)?)
    }

    unsafe fn natural_cast<
        Source: Datum + num_traits::AsPrimitive<Target>,
        Target: Datum + Copy,
    >(
        &self,
        other: &mut Tensor,
    ) {
        unsafe {
            self.as_slice_unchecked::<Source>()
                .iter()
                .zip(other.as_slice_mut_unchecked::<Target>().iter_mut())
                .for_each(|(s, d)| *d = s.as_())
        };
    }

    unsafe fn cast_number_to_bool<Source: Datum + num_traits::Zero>(&self, other: &mut Tensor) {
        unsafe {
            self.as_slice_unchecked::<Source>()
                .iter()
                .zip(other.as_slice_mut_unchecked::<bool>().iter_mut())
                .for_each(|(s, d)| *d = !s.is_zero());
        }
    }

    unsafe fn cast_from_string<Target: Datum + core::str::FromStr>(
        &self,
        other: &mut Tensor,
    ) -> TractResult<()> {
        unsafe {
            for (s, d) in self
                .as_slice_unchecked::<String>()
                .iter()
                .zip(other.as_slice_mut_unchecked::<Target>().iter_mut())
            {
                *d = s
                    .parse()
                    .map_err(|_| format_err!("Can not parse as {:?}", Target::datum_type()))?;
            }
            Ok(())
        }
    }

    unsafe fn cast_to_string<Source: Datum>(&self, other: &mut Tensor) {
        unsafe {
            for (s, d) in self
                .as_slice_unchecked::<Source>()
                .iter()
                .zip(other.as_slice_mut_unchecked::<String>().iter_mut())
            {
                *d = s.to_string()
            }
        }
    }

    /// Optionnaly convert data to a tensor for a new DatumType.
    pub fn cast_to<D: Datum>(&self) -> TractResult<Cow<'_, Tensor>> {
        self.cast_to_dt(D::datum_type())
    }

    /// Optionnaly convert data to a tensor for a new DatumType.
    #[allow(clippy::redundant_closure_call)]
    pub fn cast_to_dt(&self, dst_dt: DatumType) -> TractResult<Cow<'_, Tensor>> {
        unsafe {
            if self.dt == dst_dt {
                return Ok(Cow::Borrowed(self));
            }
            if self.dt == TDim::datum_type() && (dst_dt.is_integer() || dst_dt.is_float()) {
                let slice = self.as_slice_unchecked::<TDim>();
                let mut ints = Self::uninitialized::<i64>(&self.shape)?;
                let ints_slice = ints.as_slice_mut_unchecked::<i64>();
                for i in 0..self.len() {
                    ints_slice[i] = slice[i].to_i64()?;
                }
                return Ok(Cow::Owned(ints.cast_to_dt(dst_dt)?.into_owned()));
            }
            if self.dt == bool::datum_type()
                && (dst_dt.is_integer() || dst_dt.is_float() || dst_dt == TDim::datum_type())
            {
                let slice = self.as_slice_unchecked::<bool>();
                let mut ints = Self::uninitialized::<i8>(&self.shape)?;
                let ints_slice = ints.as_slice_mut_unchecked::<i8>();
                for i in 0..self.len() {
                    ints_slice[i] = slice[i] as usize as i8;
                }
                return Ok(Cow::Owned(ints.cast_to_dt(dst_dt)?.into_owned()));
            }
            let mut result = Self::uninitialized_dt(dst_dt, &self.shape)?;
            if self.dt == DatumType::String {
                dispatch_numbers!(Self::cast_from_string(dst_dt)(self, &mut result))?;
                return Ok(Cow::Owned(result));
            }
            if dst_dt == DatumType::String {
                dispatch_datum!(Self::cast_to_string(self.dt)(self, &mut result));
                return Ok(Cow::Owned(result));
            }
            macro_rules! n {
                ($source:ty) => {
                    if <$source>::datum_type() == self.datum_type() {
                        match dst_dt {
                            DatumType::I8 => self.natural_cast::<$source, i8>(&mut result),
                            DatumType::I16 => self.natural_cast::<$source, i16>(&mut result),
                            DatumType::I32 => self.natural_cast::<$source, i32>(&mut result),
                            DatumType::I64 => self.natural_cast::<$source, i64>(&mut result),
                            DatumType::U8 => self.natural_cast::<$source, u8>(&mut result),
                            DatumType::U16 => self.natural_cast::<$source, u16>(&mut result),
                            DatumType::U32 => self.natural_cast::<$source, u32>(&mut result),
                            DatumType::U64 => self.natural_cast::<$source, u64>(&mut result),
                            DatumType::F16 => self.natural_cast::<$source, f16>(&mut result),
                            DatumType::F32 => self.natural_cast::<$source, f32>(&mut result),
                            DatumType::F64 => self.natural_cast::<$source, f64>(&mut result),
                            DatumType::TDim => {
                                let ints = self.cast_to::<i32>()?;
                                let slice = ints.as_slice_unchecked::<i32>();
                                let result = result.as_slice_mut_unchecked::<TDim>();
                                for i in 0..self.len() {
                                    result[i] = slice[i].into();
                                }
                            }
                            DatumType::Bool => self.cast_number_to_bool::<$source>(&mut result),
                            _ => todo!(),
                        }
                        return Ok(Cow::Owned(result));
                    };
                };
            }
            //If there is no quantization
            if !dst_dt.is_quantized() && !self.datum_type().is_quantized() {
                n!(u8);
                n!(u16);
                n!(u32);
                n!(u64);
                n!(i8);
                n!(i16);
                n!(i32);
                n!(i64);
                n!(f16);
                n!(f32);
                n!(f64);
            } else {
                let (s_zp, s_scale) = self.datum_type().zp_scale();
                let (d_zp, d_scale) = dst_dt.zp_scale();
                if self.datum_type().is_quantized() && dst_dt.is_float() {
                    macro_rules! q_to_fp {
                        ($source:ty, $dest:ty) => {
                            if <$source>::datum_type().unquantized()
                                == self.datum_type().unquantized()
                                && <$dest>::datum_type().unquantized() == dst_dt.unquantized()
                            {
                                self.as_slice_unchecked::<$source>()
                                    .iter()
                                    .zip(result.as_slice_mut_unchecked::<$dest>().iter_mut())
                                    .for_each(|(&s, d)| {
                                        *d = (s as $dest - s_zp as $dest) * s_scale as $dest;
                                    });
                                return Ok(Cow::Owned(result));
                            }
                        };
                    }
                    q_to_fp!(i8, f64);
                    q_to_fp!(i8, f32);
                    q_to_fp!(u8, f64);
                    q_to_fp!(u8, f32);
                }
                //TODO: optimize scale_by
                macro_rules! q8_to_q8 {
                    ($typ:ty) => {
                        if dst_dt.unquantized() == <$typ>::datum_type() {
                            self.as_slice_unchecked::<$typ>()
                                .iter()
                                .zip(result.as_slice_mut_unchecked::<$typ>().iter_mut())
                                .for_each(|(&s, d)| {
                                    *d = (d_zp as i32
                                        + scale_by(s as i32 - s_zp as i32, s_scale / d_scale))
                                    .clamp_cast()
                                });
                            return Ok(Cow::Owned(result));
                        }
                    };
                }

                macro_rules! q_via_f32 {
                    ($source:ty, $dest:ty, $round:expr) => {
                        if <$source>::datum_type().unquantized() == self.datum_type().unquantized()
                            && <$dest>::datum_type().unquantized() == dst_dt.unquantized()
                        {
                            self.as_slice_unchecked::<$source>()
                                .iter()
                                .zip(result.as_slice_mut_unchecked::<$dest>().iter_mut())
                                .for_each(|(&s, d)| {
                                    let s_float = (s as f32 - s_zp as f32) * s_scale as f32;
                                    let d_float = s_float as f32 / d_scale as f32 + d_zp as f32;
                                    *d = $round(d_float);
                                });
                            return Ok(Cow::Owned(result));
                        }
                    };
                }

                macro_rules! q_n {
                    (clamp $source:ty, $dest:ty) => {{
                        if <$source>::datum_type().unquantized() == self.datum_type().unquantized()
                            && <$dest>::datum_type().unquantized() == dst_dt.unquantized()
                        {
                            self.as_slice_unchecked::<$source>()
                                .iter()
                                .zip(result.as_slice_mut_unchecked::<$dest>().iter_mut())
                                .for_each(|(&s, d)| {
                                    *d = s.clamp_cast();
                                });
                            return Ok(Cow::Owned(result));
                        }
                    }};
                    ($source:ty, $dest:ty) => {{
                        if <$source>::datum_type().unquantized() == self.datum_type().unquantized()
                            && <$dest>::datum_type().unquantized() == dst_dt.unquantized()
                        {
                            self.as_slice_unchecked::<$source>()
                                .iter()
                                .zip(result.as_slice_mut_unchecked::<$dest>().iter_mut())
                                .for_each(|(&s, d)| {
                                    *d = s as $dest;
                                });
                            return Ok(Cow::Owned(result));
                        }
                    }};
                }

                if dst_dt.unquantized() == self.datum_type().unquantized()
                    && dst_dt.is_quantized()
                    && self.datum_type().is_quantized()
                {
                    q8_to_q8!(i8);
                    q8_to_q8!(u8);
                }

                q_via_f32!(f32, i8, |f| round_ties_to_even(f).clamp_cast());
                q_via_f32!(f32, u8, |f| round_ties_to_even(f).clamp_cast());
                q_via_f32!(f32, i32, |f| round_ties_to_even(f).clamp_cast());
                q_via_f32!(i8, f32, |f| f);
                q_via_f32!(u8, f32, |f| f);
                q_via_f32!(i32, f32, |f| f);

                if dst_dt.is_quantized() && self.datum_type().is_quantized() {
                    q_via_f32!(u8, i8, |f| round_ties_to_even(f).clamp_cast());
                    q_via_f32!(i8, u8, |f| round_ties_to_even(f).clamp_cast());
                    q_via_f32!(i32, u8, |f| round_ties_to_even(f).clamp_cast());
                    q_via_f32!(i32, i8, |f| round_ties_to_even(f).clamp_cast());
                    q_via_f32!(u8, i32, |f| round_ties_to_even(f).clamp_cast());
                    q_via_f32!(i8, i32, |f| round_ties_to_even(f).clamp_cast());

                    // ensure cast to different scale offset work
                    q_via_f32!(i8, i8, |f| round_ties_to_even(f).clamp_cast());
                    q_via_f32!(u8, u8, |f| round_ties_to_even(f).clamp_cast());
                }

                q_n!(i8, i32);
                q_n!(i8, u32);
                q_n!(u8, i32);
                q_n!(u8, u32);
                q_n!(clamp i32, i8);
                q_n!(clamp i32, u8);
                q_n!(clamp u32, i8);
                q_n!(clamp u32, u8);
                q_n!(i8, i8);
                q_n!(u8, u8);
                q_n!(i32, i32);
                q_n!(u32, u32);
            }

            bail!("Unsupported cast from {:?} to {:?}", self.dt, dst_dt)
        }
    }

    /// Access the data as a scalar, after a cast.
    pub fn cast_to_scalar<D: Datum + Copy>(&self) -> TractResult<D> {
        let casted = self.cast_to::<D>()?;
        casted.try_as_plain()?.to_scalar::<D>().copied()
    }

    /// Access the nth element of the tensor, returned as a 0-rank Tensor
    pub fn nth(&self, nth: usize) -> TractResult<Tensor> {
        if nth >= self.len() {
            bail!(
                "nth called with {}th element on a tensor of len {} ({:?}",
                nth,
                self.len(),
                self
            );
        }
        unsafe fn nth_t<T: Datum>(me: &Tensor, nth: usize, output: &mut Tensor) {
            unsafe {
                let value = me.as_slice_unchecked::<T>()[nth].clone();
                output.as_slice_mut_unchecked::<T>()[0] = value;
            }
        }
        unsafe {
            let mut output = Tensor::uninitialized_dt(self.datum_type(), &[])?;
            dispatch_datum_by_size!(nth_t(self.datum_type())(self, nth, &mut output));
            Ok(output)
        }
    }

    /// Strict equality test on tensors.
    fn eq_dt(&self, other: &Tensor) -> TractResult<bool> {
        unsafe fn eq_t<D: Datum>(me: &Tensor, other: &Tensor) -> TractResult<bool> {
            unsafe {
                if D::datum_type().is_float() {
                    return dispatch_floatlike!(float_eq_t(D::datum_type())(me, other));
                }
                Ok(izip!(me.as_slice_unchecked::<D>(), other.as_slice_unchecked::<D>())
                    .all(|(a, b)| a == b))
            }
        }

        unsafe fn float_eq_t<D: Datum + Float>(me: &Tensor, other: &Tensor) -> TractResult<bool> {
            unsafe {
                Ok(izip!(me.as_slice_unchecked::<D>(), other.as_slice_unchecked::<D>())
                    .all(|(a, b)| (a.is_nan() && b.is_nan()) || a == b))
            }
        }

        unsafe {
            Ok(self.datum_type() == other.datum_type()
                && self.shape() == other.shape()
                && dispatch_datum!(eq_t(self.dt)(self, other))?)
        }
    }

    fn from_datum<T: Datum>(mut it: ArrayD<T>) -> Tensor {
        unsafe {
            let mut t = Self::uninitialized::<T>(it.shape()).unwrap();
            if let Some(slice) = it.as_slice_mut() {
                if t.datum_type().is_copy() {
                    std::ptr::copy_nonoverlapping(
                        slice.as_ptr() as *const i8,
                        t.as_ptr_mut_unchecked(),
                        t.plain_storage().layout().size(),
                    );
                } else {
                    t.as_slice_mut_unchecked::<T>()
                        .iter_mut()
                        .zip(slice.iter_mut())
                        .for_each(|(t, s)| *t = std::mem::take(s));
                }
                return t;
            }
            if it.strides().iter().all(|&s| s > 0) && it.as_slice_memory_order().is_some() {
                let mut len_and_strides: TVec<(usize, usize)> = tvec!();
                for (len, stride) in itertools::izip!(it.shape(), it.strides(), t.strides())
                    .sorted_by_key(|(_, src, _)| *src)
                    .map(|(l, _, dst)| (*l as isize, *dst))
                {
                    if !len_and_strides.is_empty()
                        && len_and_strides.last().unwrap().1 * len_and_strides.last().unwrap().0
                            == stride as usize
                    {
                        len_and_strides.last_mut().unwrap().0 *= len as usize;
                    } else {
                        len_and_strides.push((len as usize, stride as usize));
                    }
                }
                len_and_strides.reverse();
                crate::scatter::scatter_contig_data(
                    it.as_ptr(),
                    t.as_ptr_mut_unchecked(),
                    &len_and_strides,
                );
                return t;
            }
            // finally use ndarray into_iter()
            t.as_slice_mut_unchecked().iter_mut().zip(it).for_each(|(t, a)| *t = a);
            t
        }
    }

    pub fn deep_clone(&self) -> Tensor {
        if self.is_exotic() {
            return Tensor {
                dt: self.dt,
                shape: self.shape.clone(),
                strides: self.strides.clone(),
                len: self.len,
                storage: self.storage.deep_clone(),
            };
        }
        unsafe {
            let mut tensor = Tensor::uninitialized_dt(self.datum_type(), self.shape()).unwrap();
            if self.len() > 0 {
                if self.dt.is_copy() {
                    self.plain_storage().as_ptr().copy_to_nonoverlapping(
                        tensor.as_bytes_mut().as_mut_ptr(),
                        self.plain_storage().layout().size(),
                    )
                } else if self.dt == DatumType::String {
                    tensor
                        .as_slice_mut_unchecked::<String>()
                        .clone_from_slice(self.as_slice_unchecked());
                } else if self.dt == DatumType::Blob {
                    tensor
                        .as_slice_mut_unchecked::<Blob>()
                        .clone_from_slice(self.as_slice_unchecked());
                } else if self.dt == DatumType::TDim {
                    tensor
                        .as_slice_mut_unchecked::<TDim>()
                        .clone_from_slice(self.as_slice_unchecked());
                }
            }
            tensor
        }
    }

    pub fn slice(&self, axis: usize, start: usize, end: usize) -> TractResult<Tensor> {
        if axis >= self.rank() {
            bail!("Can not slice at axis {} tensor {:?}", axis, self);
        }
        if start > self.shape[axis] || end > self.shape[axis] || start >= end {
            bail!("Invalid slicing range {start}..{end} on axis {axis} for {self:?}");
        }
        fn slice_t<T: Datum>(
            t: &Tensor,
            axis: usize,
            start: usize,
            end: usize,
        ) -> TractResult<Tensor> {
            Ok(t.to_plain_array_view::<T>()?
                .slice_axis(ndarray::Axis(axis), (start..end).into())
                .into_owned()
                .into_tensor())
        }
        dispatch_datum!(slice_t(self.datum_type())(self, axis, start, end))
    }

    #[inline]
    pub fn view(&self) -> view::TensorView<'_> {
        unsafe { view::TensorView::view(self) }
    }

    #[inline]
    pub fn view_at_prefix(&self, prefix: &[usize]) -> TractResult<view::TensorView<'_>> {
        view::TensorView::at_prefix(self, prefix)
    }

    #[inline]
    pub fn view_offsetting(&self, coords: &[usize]) -> TractResult<view::TensorView<'_>> {
        view::TensorView::offsetting(self, coords)
    }

    #[inline]
    pub unsafe fn view_offsetting_unchecked(&self, coords: &[usize]) -> view::TensorView<'_> {
        unsafe { view::TensorView::offsetting_unchecked(self, coords) }
    }

    #[inline]
    pub fn view_mut(&mut self) -> view::TensorView<'_> {
        unsafe { view::TensorView::view(self) }
    }

    #[inline]
    pub fn view_at_prefix_mut(&mut self, prefix: &[usize]) -> TractResult<view::TensorView<'_>> {
        view::TensorView::at_prefix(self, prefix)
    }

    #[inline]
    pub fn view_offsetting_mut(&mut self, coords: &[usize]) -> TractResult<view::TensorView<'_>> {
        view::TensorView::offsetting(self, coords)
    }

    /// Offsets the tensor as an i8 type if it's an u8 type, otherwise passes it unchanged.
    pub fn offset_u8_as_i8(self: &Arc<Self>) -> Arc<Self> {
        let mut t = if let DatumType::U8 = self.dt.unquantized() {
            self.try_as_plain()
                .unwrap()
                .to_array_view::<u8>()
                .unwrap()
                .mapv(|v| v.wrapping_sub(128) as i8)
                .into_tensor()
        } else {
            return self.clone();
        };

        if let DatumType::QU8(qp) = self.dt {
            if let QParams::ZpScale { zero_point, scale } = qp {
                t.dt = DatumType::QI8(QParams::ZpScale { zero_point: zero_point - 128, scale });
            } else {
                t.dt = DatumType::QI8(qp);
            }
        }

        t.into_arc_tensor()
    }

    /// Offsets the tensor as an u8 type if it's an i8 type, otherwise passes it unchanged.
    pub fn offset_i8_as_u8(self: &Arc<Self>) -> Arc<Self> {
        let mut t = if let DatumType::I8 = self.dt.unquantized() {
            self.try_as_plain()
                .unwrap()
                .to_array_view::<i8>()
                .unwrap()
                .mapv(|v| (v as u8).wrapping_add(128))
                .into_tensor()
        } else {
            return self.clone();
        };

        if let DatumType::QI8(qp) = self.dt {
            if let QParams::ZpScale { zero_point, scale } = qp {
                t.dt = DatumType::QU8(QParams::ZpScale { zero_point: zero_point + 128, scale });
            } else {
                t.dt = DatumType::QU8(qp);
            }
        }
        t.into_arc_tensor()
    }

    pub fn to_aligned_default(&self) -> TractResult<Self> {
        if self.dt.is_copy() {
            unsafe {
                let mut t = Self::uninitialized_dt(self.dt, &self.shape)?;
                t.as_bytes_mut().copy_from_slice(self.as_bytes());
                Ok(t)
            }
        } else {
            let mut t = Self::zero_dt(self.dt, &self.shape)?;
            if self.dt == String::datum_type() {
                t.try_as_plain_mut()?
                    .as_slice_mut::<String>()?
                    .clone_from_slice(self.try_as_plain()?.as_slice()?);
            } else if self.dt == Blob::datum_type() {
                t.try_as_plain_mut()?
                    .as_slice_mut::<Blob>()?
                    .clone_from_slice(self.try_as_plain()?.as_slice()?);
            } else if self.dt == TDim::datum_type() {
                t.try_as_plain_mut()?
                    .as_slice_mut::<TDim>()?
                    .clone_from_slice(self.try_as_plain()?.as_slice()?);
            }
            Ok(t)
        }
    }

    pub fn natural_strides(shape: &[usize]) -> TVec<isize> {
        let mut strides = tvec!();
        compute_natural_stride_to(&mut strides, shape);
        strides
    }

    pub fn into_blob(mut self) -> TractResult<Blob> {
        ensure!(self.dt.is_copy());
        let storage =
            std::mem::replace(&mut self.storage, StorageKind::Plain(PlainStorage::default()));
        Ok(storage.into_plain().context("Storage is not plain")?.into_blob())
    }
}

impl PartialEq for Tensor {
    fn eq(&self, other: &Tensor) -> bool {
        if self.dt != other.dt || self.shape != other.shape {
            return false;
        }
        match (self.storage.as_plain(), other.storage.as_plain()) {
            (Some(_), Some(_)) => self.eq_dt(other).unwrap_or(false),
            (None, None) => self.storage == other.storage,
            _ => false,
        }
    }
}

impl Eq for Tensor {}

impl fmt::Debug for Tensor {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        let content = self.dump(false).unwrap_or_else(|e| format!("Error : {e:?}"));
        write!(formatter, "{content}")
    }
}

#[cfg(feature = "complex")]
pub fn reinterpret_inner_dim_as_complex(mut t: Tensor) -> TractResult<Tensor> {
    ensure!(
        t.shape().last() == Some(&2),
        "The last dimension in the tensor shape {:?} must be 2",
        t.shape()
    );
    unsafe {
        t.shape.pop();
        t.set_datum_type(t.datum_type().complexify()?);
        t.update_strides_and_len();
        Ok(t)
    }
}

#[cfg(feature = "complex")]
pub fn reinterpret_complex_as_inner_dim(mut t: Tensor) -> TractResult<Tensor> {
    unsafe {
        t.shape.push(2);
        t.set_datum_type(t.datum_type().decomplexify()?);
        t.update_strides_and_len();
        Ok(t)
    }
}

pub fn clip_range_bounds(len: usize, range: impl std::ops::RangeBounds<usize>) -> Range<usize> {
    use std::ops::Bound;
    let start = match range.start_bound() {
        Bound::Included(ix) => *ix,
        Bound::Excluded(ix) => ix + 1,
        Bound::Unbounded => 0,
    };
    let end = match range.end_bound() {
        Bound::Included(ix) => *ix + 1,
        Bound::Excluded(ix) => *ix,
        Bound::Unbounded => len,
    };
    start..end
}

pub fn natural_strides(shape: &[usize]) -> TVec<isize> {
    let mut strides = tvec!();
    compute_natural_stride_to(&mut strides, shape);
    strides
}

fn compute_natural_stride_to(strides: &mut TVec<isize>, shape: &[usize]) {
    match shape.len() {
        0 => (),
        1 => strides.push(1),
        2 => strides.extend_from_slice(&[shape[1] as isize, 1]),
        3 => strides.extend_from_slice(&[(shape[1] * shape[2]) as isize, shape[2] as _, 1]),
        4 => strides.extend_from_slice(&[
            (shape[1] * shape[2] * shape[3]) as isize,
            (shape[2] * shape[3]) as _,
            shape[3] as _,
            1,
        ]),
        _ => {
            strides.push(1);
            for dim in shape.as_ref().iter().skip(1).rev() {
                let previous = *strides.last().unwrap();
                strides.push(previous * *dim as isize)
            }
            strides.reverse();
        }
    }
}

impl<D: ::ndarray::Dimension, T: Datum> From<Array<T, D>> for Tensor {
    fn from(it: Array<T, D>) -> Tensor {
        Tensor::from_datum(it.into_dyn())
    }
}

/// Convenient conversion to Tensor.
pub trait IntoTensor: Sized {
    /// Convert Self to a Tensor.
    ///
    /// May perform a copy
    fn into_tensor(self) -> Tensor;
}

/// Convenient conversion to Arc<Tensor>.
pub trait IntoArcTensor: Sized {
    /// Convert Self to a Arc<Tensor>.
    ///
    /// May perform a copy
    fn into_arc_tensor(self) -> Arc<Tensor>;
}

impl<D: ::ndarray::Dimension, T: Datum> IntoTensor for Array<T, D> {
    fn into_tensor(self) -> Tensor {
        Tensor::from(self)
    }
}

impl<D: ::ndarray::Dimension, T: Datum> IntoArcTensor for Array<T, D> {
    fn into_arc_tensor(self) -> Arc<Tensor> {
        Arc::new(Tensor::from(self))
    }
}

impl IntoTensor for Tensor {
    fn into_tensor(self) -> Tensor {
        self
    }
}

impl IntoTensor for Arc<Tensor> {
    fn into_tensor(self) -> Tensor {
        Arc::try_unwrap(self).unwrap_or_else(|t| (*t).clone())
    }
}

impl IntoArcTensor for Tensor {
    fn into_arc_tensor(self) -> Arc<Tensor> {
        Arc::new(self)
    }
}

impl IntoArcTensor for Arc<Tensor> {
    fn into_arc_tensor(self) -> Arc<Tensor> {
        self
    }
}

#[cfg(test)]
mod tests {
    use crate::dim::SymbolScope;
    use crate::prelude::tensor1;

    use super::*;
    use litteral::tensor0;
    use proptest::collection::vec;
    use proptest::prelude::*;

    #[derive(Debug)]
    struct PermuteAxisProblem {
        shape: Vec<usize>,
        permutation: Vec<usize>,
    }

    impl Arbitrary for PermuteAxisProblem {
        type Strategy = BoxedStrategy<PermuteAxisProblem>;
        type Parameters = ();

        fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
            (0..8usize)
                .prop_flat_map(|rank| {
                    let permute: Vec<usize> = (0..rank).collect();
                    (proptest::collection::vec(1..5usize, rank), Just(permute).prop_shuffle())
                })
                .prop_map(|(shape, permutation)| PermuteAxisProblem { shape, permutation })
                .boxed()
        }
    }

    impl PermuteAxisProblem {
        fn input(&self) -> ArrayD<i32> {
            let mut i = 0;
            ArrayD::from_shape_simple_fn(&*self.shape, || {
                i += 1;
                i
            })
            .permuted_axes(&*self.permutation)
        }

        fn reference(&self) -> Tensor {
            let values: Vec<i32> = self.input().iter().copied().collect();
            let shape = self.permutation.iter().map(|ix| self.shape[*ix]).collect::<TVec<usize>>();
            super::litteral::tensor1(&values).into_shape(&shape).unwrap()
        }

        fn tract(&self) -> Tensor {
            Tensor::from(self.input())
        }

        fn check(&self) -> proptest::test_runner::TestCaseResult {
            prop_assert_eq!(self.tract(), self.reference());
            Ok(())
        }
    }

    proptest::proptest! {
        #[test]
        fn prop(pb: PermuteAxisProblem) {
            pb.check().unwrap();
        }
    }

    #[test]
    fn t_1_2() {
        PermuteAxisProblem { shape: vec![2, 1], permutation: vec![1, 0] }.check().unwrap();
    }

    #[test]
    fn t_2_2() {
        PermuteAxisProblem { shape: vec![2, 2], permutation: vec![1, 0] }.check().unwrap();
    }

    #[derive(Debug)]
    struct BroadcastVecToShape {
        vec: Vec<f32>,
        axis: usize,
        shape: TVec<usize>,
    }

    impl BroadcastVecToShape {
        fn check(&self) -> proptest::test_runner::TestCaseResult {
            let input = tensor1(&self.vec);
            let mut intermediate = tvec![1usize; self.shape.len()];
            intermediate[self.axis] = self.vec.len();
            let reference = input
                .clone()
                .into_shape(&intermediate)
                .unwrap()
                .broadcast_to_shape(&self.shape)
                .unwrap();
            prop_assert_eq!(
                reference,
                input.broadcast_vector_to_shape(&self.shape, self.axis).unwrap()
            );
            Ok(())
        }
    }

    impl Arbitrary for BroadcastVecToShape {
        type Strategy = BoxedStrategy<BroadcastVecToShape>;
        type Parameters = ();

        fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
            vec(0usize..5, 0usize..4)
                .prop_flat_map(|shape| {
                    (vec(-10f32..10f32, 0usize..5), Just(shape.clone()), 0..shape.len() + 1)
                })
                .prop_map(|(vec, mut shape, axis)| {
                    shape.insert(axis, vec.len());
                    BroadcastVecToShape { vec, shape: shape.into(), axis }
                })
                .boxed()
        }
    }

    proptest::proptest! {
        #[test]
        fn broadcast_vector_to_shape_prop(pb: BroadcastVecToShape) {
            pb.check().unwrap()
        }
    }

    #[test]
    #[cfg(feature = "complex")]
    fn test_reinterpret_inner_dim_as_complex() -> TractResult<()> {
        let input = crate::internal::tensor2(&[[1.0f32, 2.0], [3.0, 4.0], [5.0, 6.0]]);
        let cplx_input = reinterpret_inner_dim_as_complex(input)?;
        let expected = crate::internal::tensor1(&[
            Complex::new(1.0f32, 2.0),
            Complex::new(3.0, 4.0),
            Complex::new(5.0, 6.0),
        ]);
        assert_eq!(expected, cplx_input);
        Ok(())
    }

    #[test]
    #[cfg(feature = "complex")]
    fn test_reinterpret_inner_dim_as_complex_2() -> TractResult<()> {
        let input =
            crate::internal::tensor3(&[[[1i32, 2], [1, 2]], [[3, 4], [3, 4]], [[5, 6], [5, 6]]]);
        let cplx_input = reinterpret_inner_dim_as_complex(input)?;
        let expected = crate::internal::tensor2(&[
            [Complex::new(1i32, 2), Complex::new(1, 2)],
            [Complex::new(3, 4), Complex::new(3, 4)],
            [Complex::new(5, 6), Complex::new(5, 6)],
        ]);
        assert_eq!(expected, cplx_input);
        Ok(())
    }

    #[test]
    fn clone_tdim_tensor() {
        let symbols = SymbolScope::default();
        let a = symbols.sym("a");
        let t = tensor0(TDim::from(a));
        let _ = t.clone();
    }
}


================================================
FILE: deny.toml
================================================

# add whatever else we support.
[graph]
targets = [
    { triple = "x86_64-unknown-linux-gnu" },
    { triple = "x86_64-unknown-linux-musl" },
    { triple = "x86_64-apple-darwin" },
    { triple = "x86_64-pc-windows-msvc" },
    { triple = "aarch64-linux-android" },
    { triple = "aarch64-unknown-linux-gnu" },
    { triple = "aarch64-unknown-linux-musl" },
    { triple = "aarch64-apple-ios" },
    { triple = "aarch64-apple-darwin" },
    { triple = "armv7-unknown-linux-gnueabihf" },
    { triple = "armv7-unknown-linux-musleabi" },
    { triple = "arm-unknown-linux-gnueabihf" },
    { triple = "wasm32-unknown-unknown" },
]

[advisories]
git-fetch-with-cli = true
yanked = "deny"
ignore = [
    "RUSTSEC-2024-0436", # paste unmaintained — transitive dep from metal, tokenizers, rav1e
]

[bans]
multiple-versions = "deny"
wildcards = "allow"
deny = [
    # List crates we don't want in our dependency tree here.
]

# Skip some multiple-versions checks, until they can be fixed.
skip = [
    { name = "hashbrown", version="<0.16" },
    { name = "foldhash", version="<0.2" },
    { name = "cpufeatures", version="<0.3" },
    { name = "cfg-if", version="<1" },
    { name = "getrandom", version="<0.4" },
    { name = "rand", version="<0.10" },
    { name = "rand_core", version="<0.10" },
    { name = "rand_distr", version="<0.6" },
]

[sources]
# trusted git sources.
allow-git = [
    "https://github.com/rustformers/llm.git",
]

[licenses]
allow = [
    "Apache-2.0",                     # https://tldrlegal.com/license/apache-license-2.0-(apache-2.0)
    "MIT",                            # https://tldrlegal.com/license/mit-license
    "Unicode-3.0",                    # https://spdx.org/licenses/Unicode-3.0.html
    "Zlib",                           # https://tldrlegal.com/license/zlib-libpng-license
    "ISC",                            # https://tldrlegal.com/license/isc-license
    "MPL-2.0",                        # https://tldrlegal.com/license/mozilla-public-license-2.0-(mpl-2)
]

clarify = [
]


================================================
FILE: doc/README.md
================================================
# Tract internals documentation

This kind of documentation does not tend to age well. Use with caution.

* a [tract crates introduction](intro.md)
* a [tract command line cookbook](cli-recipe.md)
* [graph, model, node, op, facts](graph.md)


================================================
FILE: doc/cli-recipe.md
================================================
# tract cli recipes

`tract` command line is meant to be an auditing, debugging, profiling tool, for CI and
interactive usage.

Please do not make assumptions on the exact forms of its outputs. We do not commit on
any form of stability suitable to script writing.

We are going to use [ONNX mobilenet](../examples/onnx-mobilenet-v2) as examples in these notes. See its
code and README to download a the model.

## Install `tract`

* build latest release: 

```
cargo install tract
```

* download a prebuilt binary fo MacOs Intel, linux Intel, Armv7 or Armv8

```
https://github.com/sonos/tract/releases/latest
```

* run from source

```
git clone https://github.com/sonos/tract
cd cli
cargo run
```

## Model loading

First, `tract` needs to load a model and pipeline it to the preferred `tract-opl` form.
This is equivalent to using the API to load an onnx file, and setting input shapes with
InferenceFact and with_input_fact.

```bash
tract mobilenetv2-7.onnx -i 1,3,224,224
```

loads the model like:

```rust
tract_onnx::onnx()
            .model_for_path("mobilenetv2-7.onnx")?
            .with_input_fact(0, f32.fact(&[1, 3, 224, 224]).into())?
            .into_decluttered()?
```

## Model import pipeline

Once a model is loaded, tract default behaviour is to call the the `dump` subcommand, so the
previous example is equivalent to:

```bash
tract mobilenetv2-7.onnx -i 1,3,224,224,f32 dump
```

The displayed form is `tract-opl` intermediate representation. It is *decluttered* of most
training artefacts, in a form meant to be simple to reason about and as stripped down as
possible.

This is not the "optimised" form: `tract-opl` form is meant to be platform independant, can
be serialized to nnef. The optimised form is just meant to be as fast as possible on a given
CPU.

The `.into_optimized()` transformation can be performed by passing `-O` to the command line.

```bash
tract -O mobilenetv2-7.onnx -i 1,3,224,224,f32 dump
```

Several other intermediate network "stages" can be reached by using `--pass XXX` instead of `-O`.
`--pass load` and `--pass analyse` are interesting as they can dump a network for which inputs are
unknown (maybe to try and figure out what they could be).

## Benching a network

We can get a reading of tract performance on a model by running the `bench` or`criterion`
subcommands.

```
tract -O mobilenetv2-7.onnx -i 1,3,224,224,f32 bench
tract -O mobilenetv2-7.onnx -i 1,3,224,224,f32 criterion
```

The first one is a simple bench runner customized for tract specific needs, the second one
uses the [criterion](https://docs.rs/criterion) crate.

## Profiling a network

Getting a raw performance number is a first step, but tract can also profile a network execution.
A goto command to get a first glimpse can be:

```
tract -O mobilenetv2-7.onnx -i 1,3,224,224,f32 dump --profile --cost
```

This will show running time for each operator, its relative weight. For some critical operations,
it will also give a number of arithmetic operations per seconds (typically Flops).

Note that
we only count item multiplications, whereas many projects in the HPC field count both
multiplications and additions. So for matrix multiplication, convolution and the like, you may need
to double tract Flops number before comparing with, say, BLAS implementations.

Please do not parse this output. At least use the `--json` output. We do not commit on its stability
but it's less susceptible to changes.

## Running a test case

`tract` command line can also be use to build test-case, either for non-regression insurance
of debugging purposes.

`--input-facts-from-bundle` takes a `.npz` file, and will set the input facts (dtype, shape) according to the tensors
in the npz file. This is useful when your model does not have any input type information embedded within it.

The `run` subcommand accepts an `--input-from-bundle` that also takes a `.npz` file, but it
will not set any input fact, it will only take the tensor values.
This will also supersede the `-i` option: we will take the input shapes and tensor
from the input itself.

The `run` subcommand also accepts an `--assert-output-bundle`. This time, the tensors names are
matched with the model output names. `tract` will run over the input and check that its finding
are the same to the expected output (with some leeway for rounding differences).

[Example here](/onnx/test_cases/qtanh_1) for a quantized tanh in onnx.

```sh
tract model.onnx -O run --input-from-bundle io.npz --assert-output-bundle io.npz
```

If we want to make sure we actually check something, `-v` can help:

```
tract -v model.onnx -O run --input-from-bundle io.npz --assert-output-bundle io.npz
```

The log displays "Checked output #0, ok." (among other information).

[generate_io.py here](/onnx/test_cases/transformer-mlm/generate_io.py) contains an example building a
testcase for a BERT model from huggingface for inspiration.


================================================
FILE: doc/graph.md
================================================
# Tract internals - Graph, Node, Fact and Op

These are a few notes about the way tract represents Neural Networks.

## Graph, Node, and OutletId

Neural Network natural structure is roughly a Directed Acyclic Graph.
In tract, the network structure is materialized mostly by Graph and BaseNode.

From core/src/model/{ graph, node }.rs, with some edits

```rust
pub struct Graph<F, O> {
    pub nodes: Vec<BaseNode<F, O>>,
    pub inputs: Vec<OutletId>,
    pub outputs: Vec<OutletId>,
    /* [...] */
}

pub struct BaseNode<F, O> {
    pub inputs: Vec<OutletId>,
    pub op: O,
    pub outputs: Vec<Outlet<F>>,
}

pub struct OutletId {
    pub node: usize,
    pub slot: usize,
}

pub struct Outlet<F: Fact + Hash> {
    pub fact: F,
    pub successors: Vec<InletId>,
}
```

`Graph` contains a list of `BaseNode` (the `nodes` field).

Each node contains an `Op`, which describes and implements the operator
the Node will apply to the data. Op can be a convolution, addition, etc. 
Each node has zero or more inputs referred (rarely) as *inlets*, and zero or
more outputs referred (often) as *outlets*. Most operators have one single
outlet.

When the network is ran, tract executes the nodes in the order induced by the
links: a node can only be ran all when tract knows the value of all its inputs.

Note that links between nodes (often called *wires*) are not symetrical: a
tensor produced by a node after its op is ran, can be fed to more than one
operator. An outlet can be connected to one or several inlets. On the other
hand one inlet can only have its value set by one outlet. A wire connects
*one single unique* outlet to several inlets.

An OutletId is a pair made of a node id and a *slot* which is the output number
of the Op. As most of the ops have a single output, the OutletId slot are
frequently zero.

As a consequence, OutletId is a the identifier for the wires in the graph,
connecting the designated outlet to successor inlets, and which tensor value
is set by the op from the node owning the outlet.

The `BaseNode` incoming wires are materialized by the `inputs` field, as a
simple vector.

For easier graph traversal, tract also maintains in `BaseNode` a redundant list
of outgoing wires: `outputs.successors` field. Additionaly, BaseNode stores a
`Fact` for each of its outgoing wires. Fact is the topic of the next section.

Finally, `Graph` owns two lists of OutletId designating the model inputs and
outputs: it is required that nodes pointed but the inputs' OutletId have the
specific Op `Source`. On the other hand, the output of a model can be any
node,

With this terminology, we can say that "running a network" involves finding
the value (a tensor) of each wire in the graph, 

    1. Source nodes have their value set by the network caller
    2. compute node outputs for unvisited nodes which have all their inputs
        known until all nodes are computed
    3. extract the model outputs from the wires pointed by `Graph.outputs`

## Facts

tract actually does much more than running the network as described in previous
section. It is capable of performing several optimisations, at the single node
or at the graph level. It can also perfom specific transformation, like
converting a streaming network to a pulsing network.

In order to perform these network rewrites, tract needs to be able to reason
about the *datum type* and the *shape* of each wire in the graph. Datum type,
despite the pedantic name, is just the type of each element of the tensor (like
f32, i8, or String).

Things get a bit more complicated here, because we often need to manipulate and
reason with graph with partial shape information. For instance, in TensorFlow
frozen network format, input shapes are usually not explicit, so we need to be
able to load graphs with partial facts before completing the shape and type
analysis.

`Fact` is a trait abstracting over the level of knowledge the `Graph` has about
the model types and shapes. The `F` type parameter in Graph is the kind of Fact
that *nodes* in the graph will contains as `outputs.fact`.

There are two major implementation of Fact in tract. The first one is
`TypedFact` in tract-core, the second one is `InferenceFact` in tract-hir.

From core/src/model/fact.rs with some edits.

```rust
pub struct TypedFact {
    pub datum_type: DatumType,
    pub shape: ShapeFact,
    pub konst: Option<Arc<Tensor>>,
}

pub struct ShapeFact(Vec<TDim>);
```

DatumType is a simple enumeration of the various element type a Tensor can hold
(like DatumType::F32, or DatumType::I8...). ShapeFact is a basically a vector
of TDim. Let's assume TDim is an integer for now.

TypedFact has also a optional constant value: if a tensor in the graph has a
constant value regardless of the network inputs, this is information the
optimiser may be able to use to simplify the network.

As we implied beforehand, this type is not suitable to reprensent network where
some wires have an unknown datum type, or partial shape information. In order
to reason about these networks, we need a more flexible Fact:

From hir/sr/infer/fact.rs

```rust
pub struct InferenceFact {
    pub datum_type: TypeFactoid,
    pub shape: ShapeFactoid,
    pub value: ValueFact,
}
```

*Inference* here takes the Computer Science meaning (as in "type inference"),
not the machine learning sense (synonym with *prediction*).

The InferenceFact main purpose is to support full shape and datum type
discovery of a graph. When this full *analysis* is done, the network facts
are all converted to TypedFact and the graph can undergo optimisation and
other transformations.

Without entering into too many details here, `TypeFactoid` is basically 
an `Option<DatumType>`, where the `None` value means "unknown". Similarly
`ShapeFact` can be seen as a `Vec<Option<TDim>>`, along with a boolean to
denote if the tensor rank (its number of dimension) is known or not.

## Fact inference, model pipeline and ops

The preferred way of running efficiently a network in tract is to make sure we
get it to a TypedModel (a model which Fact is a TypedFact), and let tract-core
optimise it.

For TensorFlow frozen networks, and to some extent for ONNX networks, the
analysis process will traverse the graph looking for incomplete shape or datum
type, and collaborate with the Op in the connecting nodes to "infer" more
information about the wires facts.

This collaboration is handled through the `InferenceOp` trait. All Ops used in
an InferenceModel must implement it. This is actually what the second type
parameter (`O`) of the Graph structure represent.

tract defines the `InferenceModel` type alias in hir/src/infer/mod.rs:

```rust
pub type InferenceModel = Graph<InferenceFact, Box<dyn InferenceOp>>;
```

InferenceOp interface is relatively complex to implement, as the code needs to
be able to operate on partial information. Once a model is "typed" we no longer
need this complex logic, but can work with a stricter, but easier to implement
variant of the InferenceOp contract, the `TypedOp`.

In core/src/model/types, we define:

```rust
pub type TypedModel = Graph<TypedFact, Box<dyn TypedOp>>;
```

The `TypedOp` contract is simple: an operation, given the TypedFacts (datum 
type, full shape, optional value) of all its input *must* be able to compute
the TypeFacts for its outputs.

in core/srs/ops/mod.rs:

```rust
trait TypedOp /* [...] */ {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<Vec<TypedFact>>;
    /* [...] */
}
```

For most operations, implementing it is trivial. This simple contract is enough
to build a graph from the Source inputs (assuming we know their Fact) to its
outputs, discovering the full facts operation after operation.

The InferenceOp interface is a bit more involved, but allow to build the graph
in any aritrary order, leaving some facts partially determined, then running
the analyse to complete the missing bits.

```rust
trait InferenceOp /* [...] */ {
    fn infer(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
    ) -> TractResult<(Vec<InferenceFact>, Vec<InferenceFact>)> {
}
```

Here, the analyser will call the `infer` method providing all the known
information accumulated, and the op must do its best to return more determined
facts. Note that in the case of `TypedOp`, the typing information strictly goes
from inputs to outputs, whereas the `InferenceOp` allow operation to infer
inputs facts from output facts. This allows the framework to infer factoids
about model inputs, which can be useful when you're handed over a model without
explicit information about the input structure.

## EvalOp

Of course, the purpose of all of this is ultimately to *compute*. Once tract
has make sense of a model, it will just have to call the `eval` method on
each op in the graph in a determined order, keeping track of each tensor
already computed, handing them over to the successor ops.

```rust
pub trait EvalOp {
    fn eval(&self, inputs: Vec<Arc<Tensor>>) -> TractResult<Vec<Arc<Tensor>>>;
    /* .. */
}
```


================================================
FILE: doc/intro.md
================================================
# Tract

tract is a neural network inference library. It takes trained networks from higher-level
frameworks (Tensorflow, PyTorch, etc.), converts them to an intermediate representation
and runs them on the end-user data. It is designed to be very portable and embedding
friendly. We believe in running Neural Network Inference on the Edge, on a
browser or a small embeddable CPU.

## How to use tract ?

* tract-onnx is a Rust library that can load and run an ONNX network. About 85%
    of ONNX operators are supported.
* tract-tensorflow is a Rust library that can load and run a TensorFlow 1
    network. Because of the huge size of TensorFlow, a smaller portion of the
    operator set is supported.
* tract-nnef is a Rust lbrary that can load and run NNEF networks. Most of
    NNEF is supported (missing deconv, ROI operations and quantization).
* tract is the main command line interface (can be installed with "cargo install").
    It can load network in any of the previously listed formats, dump them in a
    user friendly form, bench and profile a network.
    Additionaly, the tract command line can be used to convert a network to
    NNEF (with some extensions). tract-nnef is significanly smaller and
    lighter to start than tract-onnx or tract-tensorflow, so this conversion
    is useful for embedded situations.

## Crates

### tract-data

Contains the Tensor struct, DatumType enum, and TDim (symbolic dimension
value type).

### tract-linalg

It is bit of a misnomer: this crate contains the low-level optimised
routines for fast computation (actually not restricted to LINear ALGebra).
Beyond Intel, we payed specific attention to the ARM 6, 7 and 8 use
platforms. It is not meant to be used directly.

### tract-core

The heart of tract. It contains

    * the network graph representation manipulation (Graph, Node)
    * the "core" operator set of tract
    * most of the network optimisation logic.

tract-core depends on tract-linalg only, and is usually not used directly.

### tract-nnef

It support for NNEF format and maps its operator set to tract-core operators,
It also contains some tract-core proprietary extension to NNEF.
This crate depends on tract-core (thus tract-linalg transitively).
It is the entry-point for embedded situations where NNEF is preferred to ONNX
or tensorflow formats (requiring model translation to NNEF before hand).

### tract-hir

Python-based training frameworks (TensorFlow or ONNX) have to support lots of
"python-isms" or "numpy-isms". While they are helpful at model design time,
they can be a burden at inference time. As a consequence, we try to have most
of them translated before getting into tract-core. This allow us to comply with
ONNX or TensorFlow semantics while keeping tract-core complexity more
manageable.

Examples of such patterns are: negative indexing, negative rank indexing, rank
broadcasting.

It features the InferenceModel, InferenceFact (and friends), along with the
"analyser" that can work from the partial types and shapes included in the
training frameworks formats, to the stricter expectations of tract-core.

It also contains translation to tract-core logic for operators which have
close enough semantics between TensorFlow and ONNX.

This crate is not meant to be used directly.

### tract-onnx and tract-onnx-opl

Support for ONNX protobuf format and mapping of ONNX operators to tract-hir,
tract-core or ad-hoc operators.

tract-onnx-opl depends only on tract-core and tract-nnef. It contains
operators implementation from ONNX operators which do not have an equivalent
in tract-core, including dumping to / loading from OPL.

tract-onnx is the library to use to load and run an ONNX network. It uses
tract-hir for type inference and translate ONNX operators to operators from
tract-core and tract-onnx-opl.

### tract-tensorflow

Support for TensorFlow 1 frozen model format, similar to the ONNX crates.

NB: The split between tract-tensorflow (tensorflow parser, tensorflow operators
mapping to core) and tract-tensorflow-opl (ad-hoc implementation of operators)
has not been done yet.

### tract-pulse and tract-pulse-opl

Implements translation of streaming networks to pulsing network (tract-pulse)
including runtime support (ad-hoc operatrs in tract-pulse-opl).

### tract-kaldi

Partial support for kaldi framework model. Consider it very experimental, it
may disappear at any time.

### tract

In the `cli/` sub-directory, implements the command line tool.

## tract-OPL

Tract OPL (for Operation Programming Language) is an intermediate
representation of a Neural Network. It is based on NNEF. NNEF is a
specification aiming to be for *inference* applications what ONNX is
to *training* frameworks. As it turns out, inference implementations and
training frameworks have widely divergent objectives.

Tract can be used as a monolithic library, accepting an ONNX or
TensorFlow model, loading it and optimising it on the fly (using
tract-onnx API).

We have recently added support for (most of) NNEF. As this format is
designed for inference, translating it to tract-core operator set is
very straightforward.

We have built tract OPL on top the tract NNEF support: we have extended
NNEF to support tract operators that are not present in NNEF. The same
extension mechanism can be used to extend NNEF with operators belonging
to ONNX that we chose not to include in tract-core. That way it is
possible to reduce runtime footprint and startup time:
    * tract command line includes tract-onnx. It can be used to translate
an onnx network to a tract-core-plus-extensions model in memory, then dump
this network in NNEF form. This is done once, right after training.
    * At runtime we only to tract-core, tract-nnef (for the
format parser) and optionaly tract-onnx-opl if the network used one of the
handful of ONNX operations that are not supported natively by tract-core.

The split between translation time and runtime have also been done for the
streaming (aka pulse) capabilities. We only need tract-pulse to preprocess
the network (which we can do with the command line) but only ship
`tract-pulse-opl`.

It could (and should) be done with tract-tensorflow too.

Note that tract-OPL format is machine independant. We still need to call
into_optimized() on the loaded NNEF network to get the most efficient network
possible, but this operation is actually much lighter than the "decluttering"
of the network from the training formats to the tract-core/NNEF semantics.

We are playing with the idea of adding another similar split (tract R for
tract Runtime). The machine optimized network form would be stored at this
time, shedding most of the optimisation code from tract-core and making
networks even faster to load.


================================================
FILE: doc/kernel-notes.md
================================================
# Notes about implementing and working with the kernels

Kernels in tract-linalg are built using templated assembly and via `extern "C"` calling conventions.

The templates are stored in `linalg/$arch`, and in general the file
and main entrypoint share name stem. However, the proc name has a
suffix based on the package version. In order to skip maintaining this
the `extern_kernel!` macro declares the matching function and
re-exports it sans suffix.

Kernels work like a VM. When dispatching a kernel there's a list of
instructions from `FusedKerSpec` that's dispatched in a jump
table. For example; as of writing a MatMatmUl is roughly encoded as
`[Clear, AddMatMul, Store, Done]`. The dispatch is called `non_linear_loop`.

When iterating on assembly; building the code and looking at the
generated assembly under
`target/debug/build/tract-linalg-***/out/fma_mmm_*.S` can be much
easier than tracking the flow through each macro.

If one needs to debug a kernel a useful workflow is to simply insert a
`mov rNN, [0]` at the appropriate point, and configure GDB with
`handle SIGSEGV stop nopass`. This'll pause in GDB but not send the
signal to the program.


================================================
FILE: doc/nnef/tract-core.nnef
================================================
# Extension `tract_core` exposes NNEF fragments for using
# operator defined by tract-core crate.
# 
# Add `extension tract_core` to `graph.nnef`

fragment tract_core_round_even( x: tensor<scalar> ) -> (y: tensor<scalar>);
fragment tract_core_erf( x: tensor<scalar> ) -> (y: tensor<scalar>);
fragment tract_core_hard_swish( x: tensor<scalar> ) -> (y: tensor<scalar>);
fragment tract_core_bitnot( x: tensor<scalar> ) -> (y: tensor<scalar>);

fragment tract_core_argmax_reduce_last(
    input: tensor<scalar>,
    axes: integer[]
) -> (output: tensor<scalar>);

fragment tract_core_argmin_reduce_last(
    input: tensor<scalar>,
    axes: integer[]
) -> (output: tensor<scalar>);

fragment tract_core_broadcast(
    input: tensor<scalar>,
    shape: integer[]
) -> (output: tensor<scalar>);

fragment tract_core_cast(
    input: tensor<scalar>,
    to: string
) -> (output: tensor<scalar>);

fragment tract_core_downsample(
    input: tensor<scalar>,
    axis: integer,
    stride: integer,
    modulo: integer = 0
) -> (output: tensor<scalar>);

fragment tract_core_dyn_slice(
    input: tensor<scalar>,
    start: integer,
    end: integer,
    len: integer,
    axis: integer
) -> (output: tensor<scalar>);

fragment tract_core_einsum(
    inputs: tensor<scalar>[],
    expr: string,
    acc: string,
    output: string = ""
) -> (output: tensor<scalar>);

fragment tract_core_einsum_q(
    inputs: tensor<scalar>[],
    expr: string,
    acc: string,
    output: string = "",
    bias: tensor<scalar> = 0,
    a0: tensor<integer>,
    a_scale: tensor<scalar>,
    b0: tensor<integer>,
    b_scale: tensor<scalar>,
    c0: tensor<integer>,
    c_scale: tensor<scalar>
) -> (output: tensor<scalar>);

fragment tract_core_external(
    datum_type: string,
    shape: integer[]
) -> (output: tensor<?>);

fragment tract_core_fft(
    input: tensor<scalar>,
    axis: integer,
    inverse: logical
) -> (output: tensor<scalar>);

fragment tract_core_force_eval(
    inputs: tensor<scalar>[],
    slots: integer[]
) -> (output: tensor<scalar>);

fragment tract_core_gather(
    input: tensor<scalar>,
    indices: tensor<scalar>,
    axis: integer
) -> (output: tensor<scalar>);

fragment tract_core_gather_elements(
    input: tensor<scalar>,
    indices: tensor<scalar>,
    axis: integer
) -> (output: tensor<scalar>);

fragment tract_core_gather_nd(
    input: tensor<scalar>,
    indices: tensor<scalar>,
    batch_dims: integer
) -> (output: tensor<scalar>);

fragment tract_core_load(
    input: tensor<scalar>[],
    id: string
) -> (output: tensor<scalar>);

fragment tract_core_matmul(
    A: tensor<scalar>,
    B: tensor<scalar>,
    axes: integer[]
) -> (output: tensor<scalar>);

fragment tract_core_one_hot(
    input: tensor<scalar>,
    axis: integer,
    dim: integer,
    value_off: scalar = 0.0,
    value_on: scalar = 1.0
) -> (output: tensor<scalar>);

fragment tract_core_product_reduce(
    input: tensor<scalar>,
    axes: integer[]
) -> (output: tensor<scalar>);

fragment tract_core_qconv(
    input: tensor<scalar>,
    filter: tensor<scalar>,
    bias: tensor<scalar> = 0,
    group: integer,
    dilation: integer[],
    stride: integer[],
    padding: integer[][],
    border: string,
    a0: integer,
    a_scale: scalar,
    b0: integer,
    b_scale: scalar,
    c0: integer,
    c_scale: scalar
) -> (output: tensor<scalar>);

fragment tract_core_qmatmul(
    A: tensor<scalar>,
    B: tensor<scalar>,
    bias: tensor<scalar> = 0,
    axes: integer[],
    a0: integer,
    a_scale: scalar,
    b0: integer,
    b_scale: scalar,
    c0: integer,
    c_scale: scalar,
    output_type: string
) -> (output: tensor<scalar>);

fragment tract_core_range(
    start: integer,
    end: integer,
    step: integer
) -> (output: tensor<scalar>);

fragment tract_core_scan(
    body: string,
    scan: (string, tensor<scalar>, integer, integer)[],
    full: (string, tensor<scalar>)[],
    state: (string, tensor<scalar>, string)[],
    output: (string, string, integer, integer)[],
    skip: integer = 0,
    reset_every_turn: integer = 0
) -> (outputs: tensor<scalar>[]);

fragment tract_core_scatter_elements(
    input: tensor<scalar>,
    indices: tensor<scalar>,
    updates: tensor<scalar>,
    axis: integer
) -> (output: tensor<scalar>);

fragment tract_core_scatter_nd(
    input: tensor<scalar>,
    indices: tensor<scalar>,
    updates: tensor<scalar>
) -> (output: tensor<scalar>);

fragment tract_core_shape_of(
    input: tensor<scalar>
) -> (output: tensor<integer>);

fragment tract_core_softmax(
    x: tensor<scalar>,
    axes: tensor<integer>,
    exp: string
) -> (output: tensor<scalar>);

fragment tract_core_stft(
    input: tensor<scalar>,
    axis: integer,
    frame: integer,
    stride: integer,
    window: tensor<scalar> = false
) -> (output: tensor<scalar>);

fragment tract_core_store(
    input: tensor<scalar>,
    state: tensor<scalar>,
    id: string
) -> (output: tensor<scalar>);

fragment tract_core_submodel(
    input: tensor<scalar>[],
    label: string
) -> (outputs: tensor<?>[]);

fragment tract_core_topk(
    input: tensor<scalar>,
    k: tensor<integer>,
    axis: integer,
    largest: logical
) -> (values: tensor<scalar>, indices: tensor<integer>);

fragment tract_core_trilu(
    input: tensor<scalar>,
    k: tensor<integer>,
    upper: logical
) -> (output: tensor<scalar>);


================================================
FILE: doc/nnef/tract-onnx.nnef
================================================
# Extension `tract_onnx` extends NNEF for supporting some corner case ONNX operators.
# 
# Add `extension tract_onnx` to `graph.nnef`

fragment tract_onnx_is_nan( x: tensor<scalar> ) -> (y: tensor<scalar>);

fragment tract_onnx_isinf(
    input: tensor<scalar>,
    detect_positive: logical = true,
    detect_negative: logical = true
) -> (output: tensor<?>)fragment tract_onnx_lrn(
    input: tensor<scalar>,
    alpha: scalar = 0.0001,
    beta: scalar = 0.75,
    bias: scalar = 1.0,
    size: integer
) -> (output: tensor<scalar>);

fragment tract_onnx_ml_direct_lookup(
    input: tensor<string>,
    values: tensor<scalar>,
    fallback: tensor<scalar>
) -> (output: tensor<scalar>);

fragment tract_onnx_ml_reverse_lookup(
    input: tensor<scalar>,
    keys: tensor<scalar>,
    fallback: scalar
) -> (output: tensor<scalar>);

fragment tract_onnx_ml_tree_ensemble_classifier(
    input: tensor<scalar>,
    trees: tensor<scalar>,
    nodes: tensor<scalar>,
    leaves: tensor<scalar>,
    max_used_feature: integer,
    n_classes: integer,
    aggregate_fn: string
) -> (output: tensor<scalar>);

fragment tract_onnx_multinomial(
    input: tensor<integer>,
    dtype: integer = 6,
    sample_size: integer = 1,
    seed: integer
) -> (output: tensor<scalar>);

fragment tract_onnx_non_max_suppression(
    boxes: tensor<integer>,
    scores: tensor<scalar>,
    max_output_boxes_per_class: integer = 0,
    iou_threshold: scalar = 0.0,
    score_threshold: scalar,
    center_point_box: integer = 0
) -> (output: tensor<integer>);

fragment tract_onnx_random(
    datum_type: string,
    shape: integer[],
    dist: string,
    parameters: scalar[],
    seed: integer
) -> (output: tensor<scalar>);


================================================
FILE: doc/nnef/tract-pulse.nnef
================================================
# Extension `tract_resource` extends NNEF with operators
# for pulsified networks.
# 
# Add `extension tract_pulse` to `graph.nnef`


fragment tract_pulse_delay(
    input: tensor<scalar>,
    axis: integer,
    delay: integer,
    overlap: integer
) -> (output: tensor<scalar>);

fragment tract_pulse_mask(
    input: tensor<scalar>,
    axis: integer,
    begin: integer,
    end: integer,
    value: scalar
) -> (output: tensor<scalar>);

fragment tract_pulse_pulse_pad(
    input: tensor<scalar>,
    axis: integer,
    before: integer,
    after: integer,
    begin_input: integer,
    end_input: integer,
    border: string,
    value: scalar,
    overlap: integer
) -> (output: tensor<scalar>);


================================================
FILE: doc/nnef/tract-resource.nnef
================================================
# Extension `tract_resource` exposes NNEF fragments for accessing
# resources files in NNEF folder or archive.
# 
# Add `extension tract_resource` to `graph.nnef`


# Access embedded resource by key
fragment tract_resource_get(
    # Resource label to access
    label: string,
    # Key path in resource
    key: string
) -> (output: tensor<?>);


================================================
FILE: doc/op.md
================================================
# Anatomy of an Op

Operators, Op for short are central players in tract. They are of course
responsible for doing the actual computations and transformations on the
tensor, but also must collaborate with the loading and optimisation
framework, and sometimes also with their peers to analyse, validate or reduce
the model to a simple form.

These tasks are varying widely in complexity depending on the actual op, and
how much effort have been put into optimising them.

tract defines, in `core` and `hir` a few traits that an operation can or must
implement.

## tract-core Op trait

This trait contains the minimal metadata that all ops will share.

```rust
pub trait Op:
    fmt::Debug + dyn_clone::DynClone + Send + Sync + 'static + Downcast + EvalOp + DynHash
{
    fn name(&self) -> StaticName;

    fn validation(&self) -> Validation {
        Validation::Accurate
    }

    fn same_as(&self, _other: &dyn Op) -> bool {
        false
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![])
    }

    fn as_typed(&self) -> Option<&dyn TypedOp>;
}
```

`name()` and `info()` are mostly useful for debugging and
auditing (the command line interface and error messages will use these entry
points).

`validation` is a hint, for debugging and auditing purposed that this
operator is likely to generates rounding errors. If we run network side to side
with a reference implementation (TensorFlow for example) do we expect the
results to be exactly the same, to the last bit, or should we use a more
lenient way or comparing the results.

We rely a lot on Rust's *trait object* mecanism around Ops. It can be
surprising to the new comer that things such as Clone become complicated in the
context of trait objects, but their are a few crates that offer workaround:
tract Op leverage `dyn_clone` crate functionality, mimicks it with the DynHash
trait and macros to be able to get a hash of an `&dyn Op`. `same_as` is another
workaround (maybe a missed opportunity for a DynHash-like mecanism actualy)
as PartialEq is incompatible with trait objects.

`as_typed()` allows the framework to dynamicaly ask the Op to cast itself to
&TypedOp. rust offers no "QueryInterface"-like mecanism to switch from trait to
trait in a family of trait, so there are a few casting methods here and there
in tract op traits. Most of them are implemented using trivial functions
(`return Some(&self);`) that we generate with macros.

We also need `Op` to implement Downcast. There are actually quite a few
situations in tract where we need to Downcast, like implementing `same_as()`.
As we said, Op pushes Rust dynamic typing where it does not like to go :)

## tract-core EvalOp trait

While Op is mostly metadata, EvalOp is at the other end of spectrum with the
business side of things.

```rust
pub trait EvalOp {
     fn eval(&self, inputs: TVec<Arc<Tensor>>) -> TractResult<TVec<Arc<Tensor>>> {
         bail!("stateless evaluation not implemented")
     }

     fn state(
         &self,
         session: &mut SessionState,
         node_id: usize,
     ) -> TractResult<Option<Box<dyn OpState>>> {
         Ok(None)
     }

     fn is_stateless(&self) -> bool;
}
```

The EvalOp realize the actual computation the Operator is supposed to perform. It
supports both *stateful* and *stateless* operators. Most of them are stateless:
they should just implement `eval` method and say so in `is_stateless()`. The
handful of stateful operators will implements `state()` instead and return
`false` is is_stateless: the framework will call `state()` during the network
initialization, then will call `eval()` on the obtained `OpState` instead:

```rust
pub trait OpState: fmt::Debug + Send + dyn_clone::DynClone {
    fn eval(
        &mut self,
        session: &mut SessionState,
        op: &dyn Op,
        inputs: TVec<Arc<Tensor>>,
    ) -> TractResult<TVec<Arc<Tensor>>>;
}
```

Here the eval implementation is free to mute some operation internal state if
required, or access the `SessionState`.

But most operators are stateless anyway.

## tract-core TypedOp trait

`Op` is metadata, `EvalOp` is runtime, `TypedOp` is about reasoning on the
model. Most optimisations and transformations will operate on TypedOp
implementors. TypedOp has a minimal handful of methods that are required to be
implemented and many optional. While they are not strictly required, a missing,
or partial implementation may prevent the optimiser to perform optimisations
that require an Op to "collaborate" with its peers.

```rust
pub trait TypedOp:
    Op + fmt::Debug + dyn_clone::DynClone + Send + Sync + 'static + Downcast + EvalOp + DynHash
{
    fn as_op(&self) -> &dyn Op;
    fn as_op_mut(&mut self) -> &mut dyn Op;

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>>;

    /*[...]*/
}
```

First we have two more cross-trait casting methods. Once again, we have macros
that do a trivial return self.

Then comes `output_facts()`. This is a pivotal method in TypedOp. As discussed
in the [graph write-up](graph.md) it is the lighter version of the type
inference, just enough to build networks from source to outputs while
maintaining known shapes and data types.

When building a `TypedModel`, we repeatedly create new `TypedOp`s, retrieving
from the network the `TypedFact` for their inputs.  Then output_facts() is
called, and the TypedOp must return the output facts.  The framework can then
create the `TypedNode` and append it to the partial TypedModel before repeating
the process. This also makes output_fact() a great place to run as many
validation checks and detect inconsistency soon instead of discovering them at
runtime (in eval).

## trait-hir InferenceOp trait

Sitting between the tract-core and the training frameworks loaders, tract-hir
contains everything needed to translate networks from ONNX or TensorFlow to
tract-core. Tract-hir role is to map the training framworks' very expressive
operators set to the more constrained tract-core. In order to perform this
translation, the partial type information contained in ONNX or TensorFlow
protobuf files must be used to infer the full types and shapes of the network.

The contract here is significantly more complex than the one required by
TypedOp.

```rust
pub trait InferenceOp:
    Op + fmt::Debug + tract_core::dyn_clone::DynClone + Send + Sync + 'static + Downcast + EvalOp + DynHash
{
    fn infer(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
        observed: TVec<&InferenceFact>,
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>, TVec<InferenceFact>)>;
    /*[...]*/

    fn to_typed(
        &self,
        source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
  ) -> TractResult<TVec<OutletId>>;
}
```

The InferenceFact is a partially determined version of TypedFact: typically,
TypedFact has a DatumType field to represent the type of the tensor element,
while InferenceFact datum_type is akin to an `Option<DatumType>` where `None`
means unknown. In a similar fashion, the shape can be partially known, with
rank determined or not and individual dimensions known or not.

The framework will try to propagate type information accross the graph,
refining incrementally its knowledge of all the inference facts. It will do
so by calling the `infer()` method on operators which interfaces are not fully
determined. The operator receives as paramaters the current information on its
inputs and outputs, try to improve them and returns the refined versions. The
third paramaters and result (`observed`) is out of scope here.

Once a network has been entirely typed, it can be translated to a TypedOp. The
framework will visit the entire network and call the `to_typed()` method on
each operator. The operator is responsible to "wire" itself into the target
TypedModel (creating a node) and return its output(s) wires.

## trait-hir InferenceRulesOp trait

Implementing the `infer()` method by hand is tedious, mostly because
type information constraint can flow in both directions, from an input to
output, but also the other way around, or even between two inputs. We
developped an alternative, which makes the process more declarative. It is a
syntax heavy, but less error-prone once rustc is happy.

```rust
trait InferenceOp {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()>;
    /*[...]*/
}
```

Here, we can use a more declarative approach and load the "solver" with rules
specifying relations between inputs and outputs. This make it possible to
propagate information in all directions with one single rule.

The rules solver syntax is a bit old and arcane, and could certainly be
improved or simplified, but it is still much easier to use than writing rules
by hand.

## Loading from the frameworks

Training frameworks (TensorFlow and Onnx) use protobuf as a serialization
format. tract-tensorflow (and tract-onnx) can read these and build the neural 
network as an InferenceModel in memory. When the framework parses a node, the
operation type is manifested by its name. Then the way to interpret and plug-in
the various attribute depends on the Operator itself.

When loading the framework object, tract builds a mapping of
operator names to operator constructor functions that is responsible for
extracting the attributes from the parsed protobuf.

Modules containing operators typically expose a register_all_ops function that
feeds this map. Here is an example from `onnx/src/ops/nn/mod.rs`.

```rust
pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("ArgMax", arg_max_min);
    reg.insert("ArgMin", arg_max_min);
    reg.insert("AveragePool", average_pool);
    reg.insert("BatchNormalization", batch_normalization);
    /* [...] */
}
```

The `batch_normalization` function looks like this:

```rust
pub fn batch_normalization(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let epsilon = node.get_attr_opt("epsilon")?.unwrap_or(1e-5);
    let spatial = node.get_attr_opt("spatial")?.unwrap_or(1);
    if spatial != 1 {
        bail!("BatchNormalization: attribute 'spatial' is not supported (deprecated by ONNX operator set 9)")
    }
    Ok((expand(batch_norm::BatchNorm::new(nn::DataFormat::NCHW, epsilon, spatial != 0)), vec![]))
}
```

It is given the protobuf parsed Node, and extracts two attributes from it.
Then it buidls an actual operation (as an Expansion, more on this later).

## Dumping as OPL, loading from OPL

"Good citizen" operators know how to dump themselves in OPL, and load from
them.

OPL is tract NNEF-based format. Some operators are compatible with NNEF, in
which case they can use NNEF standard form, but many operators from ONNX and
TensorFlow can only be handled with extensions.

NNEF compatible operators are dumped and loaded by code in 
`nnef/src/ops/nnef/mod.rs`.

Each OPL module (`nnef`, `pulse-opl`, `onnx-opl`) defines a registry of
operators, containing both OPL loaders and OPL dumpers.

```rust
pub struct Registry {
    pub id: String,
    pub fragments: HashMap<String, FragmentDef>,
    pub primitives: HashMap<String, (Vec<ast::Parameter>, ToTract)>,
    pub unit_element_wise_ops: Vec<(String, Box<dyn ElementWiseMiniOp>)>,
    pub element_wise_ops: Vec<(String, TypeId, FromTract, Vec<ast::Parameter>, ToTract)>,
    pub binary_ops: Vec<(String, Box<dyn BinMiniOp>)>,
    pub from_tract: HashMap<TypeId, FromTract>,
}
pub type ToTract = fn(&mut ModelBuilder, &ResolvedInvocation) -> TractResult<TVec<OutletId>>;
pub type FromTract = fn(&mut IntoAst, node: &TypedNode) -> TractResult<Option<Arc<RValue>>>;
```

The generic dumping mecanism relies on `from_tract` HashMap: it maps a rust
TypeId (the one for the TypedOp we need to dump) to a FromTract dumping
function. The mutable IntoAst is modified by the callback to store the
representation of the Op. The callback can add NNEF fragments (NNEF lingo for
functions) to the NNEF document but its main responsibility is to translate 
the node and its op to some NNEF ast nodes.

## Expansions, and rules wrapper


================================================
FILE: examples/.gitignore
================================================
*.onnx
*.nnef.tgz
*.tar.gz


================================================
FILE: examples/causal_llm/Cargo.toml
================================================
[package]
name = "causal_llm"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
float-ord = "0.3.2"
tract.workspace = true
axum = "0.8.4"
tokio = { version = "1.47.1", features = ["rt-multi-thread"] }
clap = { version = "4", features = ["derive"] }
axum-macros = "0.5.0"
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
log.workspace = true
env_logger.workspace = true
ndarray-npy.workspace = true
tokio-scoped = "0.2.0"
rand.workspace = true


# tokenizer special handle for wasm
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tokenizers.workspace = true

[target.'cfg(target_arch = "wasm32")'.dependencies]
tokenizers = { version = "0.22", default-features = false, features = [
  "unstable_wasm",
] }


================================================
FILE: examples/causal_llm/README.md
================================================
# causal_llm

A causal language model inference example using tract. Loads an NNEF model and a HuggingFace tokenizer to run text completion, with optional OpenAI-compatible HTTP serving.

## Binaries

- **complete** — Generate text from a prompt on the command line.
- **serve** — OpenAI-compatible completion server on `http://0.0.0.0:3000/v1/completions`.
- **client** — Send completion requests to the server.

## Usage

You need a tokenizer (`tokenizer.json`) and an NNEF model (`.nnef.tgz`).

### Obtaining a model

```sh
# NNEF model (Qwen3-1.7B, 4-bit quantized)
wget https://s3.amazonaws.com/tract-ci-builds/tests/llm/541/Qwen--Qwen3-1.7B-q40ef16/Qwen--Qwen3-1.7B-q40ef16.nnef.tgz
# Tokenizer
wget https://huggingface.co/Qwen/Qwen3-1.7B/resolve/main/tokenizer.json
```

### Text completion

```sh
cargo run --bin complete --release -- \
    -t path/to/tokenizer.json \
    -m path/to/model.nnef.tgz \
    -n 50 \
    "The capital of France is"
```

### Completion server

```sh
cargo run --bin serve --release -- \
    -t path/to/tokenizer.json \
    -m path/to/model.nnef.tgz
```

Then query it:

```sh
cargo run --bin client --release -- "The capital of France is"
```

## Options

- `-n <N>` — Number of tokens to generate (default: 20)
- `--force-cpu` — Disable CUDA/Metal, run on CPU only

## Library

The `CausalLlmModel` struct can be used as a library. It handles KV cache detection, runtime selection (CUDA > Metal > CPU), and prompt chunking automatically.

```rust
let llm = CausalLlmModel::from_paths("tokenizer.json", "model.nnef.tgz")?;
let mut state = llm.spawn()?;
state.append_text("Hello world")?;
for _ in 0..20 {
    state.generate_next_token()?;
}
```


================================================
FILE: examples/causal_llm/ci.sh
================================================
#!/bin/bash

set -e
set -o pipefail
set -x

wget https://s3.amazonaws.com/tract-ci-builds/tests/llm/541/Qwen--Qwen3-1.7B-q40ef16/Qwen--Qwen3-1.7B-q40ef16.nnef.tgz
wget https://huggingface.co/Qwen/Qwen3-1.7B/resolve/main/tokenizer.json

OUTPUT=$(cargo run -p causal_llm --bin complete --profile opt-no-lto -- \
    -t "tokenizer.json" -m Qwen--Qwen3-1.7B-q40ef16.nnef.tgz -n 20 "The capital of France is")

echo "Output: $OUTPUT"

if [ -z "$OUTPUT" ]
then
    echo "ERROR: empty output"
    exit 1
fi


================================================
FILE: examples/causal_llm/scripts/generate_ci_llm_assets.sh
================================================
#!/bin/bash

set -ex

tmp=$(echo ${TMPDIR:-${TEMP:-${TMP:-/tmp}}})
venv=$tmp/venv-for-tract-assets

if [ ! -d $venv ]
then
    virtualenv -p python3.13 $venv
    source $venv/bin/activate
    pip install 'torch-to-nnef[llm-tract]>=0.20.0' 'transformers>=4.51,<4.52'
else
    source $venv/bin/activate
fi

MODELS=
MODELS="$MODELS apple/OpenELM-270M"
MODELS="$MODELS meta-llama/Llama-3.2-1B-Instruct"
MODELS="$MODELS meta-llama/Llama-3.2-3B-Instruct"
MODELS="$MODELS meta-llama/Llama-3.1-8B-Instruct"
MODELS="$MODELS Qwen/Qwen2.5-7B-Instruct"
MODELS="$MODELS Qwen/Qwen3-1.7B"
MODELS="$MODELS Qwen/Qwen3-8B"

MODELS_F32_ALLOWED="meta-llama/Llama-3.2-1B-Instruct"

VARIANTS="f32f32 f16f16 q40ef16 "

for hf_id in $MODELS
do
    local_id=$(echo $hf_id | sed "s/\//--/g")

    for q in $VARIANTS
    do
	 if [ "$q" = "f32f32" ] && ! echo "$MODELS_F32_ALLOWED" | grep -q -w "$hf_id"; then
         echo "INFO: Skipping f32f32 for model $hf_id"
            continue
         fi
         model=$local_id-$q
         rm -rf $model
         case $q in
             f32f32) EXPORT_ARGS= ;;
             f16f16) EXPORT_ARGS="-dt f16" ;;
             q40f32) EXPORT_ARGS="-c min_max_q4_0" ;;
             q40ef32) EXPORT_ARGS="-c min_max_q4_0_with_embeddings" ;;
             q40ef16) EXPORT_ARGS="-c min_max_q4_0_with_embeddings -dt f16" ;;
         esac

         t2n_export_llm_to_tract \
             --sample-generation-total-size 100 \
             --no-verify \
             -s $hf_id -e $model $EXPORT_ARGS \
	     --reify-sdpa-operator

          aws s3 cp $model/model/model.nnef.tgz s3://tract-ci-builds/tests/llm/current/$model/$model.nnef.tgz
          aws s3 cp $model/tests/prompt_io.npz s3://tract-ci-builds/tests/llm/current/$model/$model.p0s100.io.npz
          aws s3 cp $model/tests/text_generation_io.npz s3://tract-ci-builds/tests/llm/current/$model/$model.p99s1.io.npz
          if [ -e $model/tests/prompt_with_past_io.npz ]
          then
            aws s3 cp $model/tests/prompt_with_past_io.npz s3://tract-ci-builds/tests/llm/current/$model/$model.p50s50.io.npz
          fi
    done

done


================================================
FILE: examples/causal_llm/src/bin/client.rs
================================================
use std::io::Write;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant};

use anyhow::{Result, bail};
use clap::Parser;
use reqwest::Client;
use tokenizers::Tokenizer;
use tokio::sync::broadcast::Receiver;

#[allow(dead_code)]
mod common;
use common::*;

#[derive(Parser, Debug)]
struct Args {
    /// tokenizer to use for token counts
    #[arg(long, short, required(true))]
    tokenizers: String,

    /// /completions endpoint to bench
    #[arg(long, default_value = "http://localhost:3000/v1/completions")]
    endpoint: String,

    /// model id
    #[arg(long, default_value = "meta-llama/Llama-3.1-8B")]
    model: String,

    #[command(subcommand)]
    command: Commands,
}

#[derive(Debug, Clone)]
enum Api {
    OpenAICompletions(Client, String),
    OllamaGenerate(Client, String),
}

struct GenericCompletion {
    text: String,
    generated_tokens: usize,
    prompt_tokens: usize,
}

impl Api {
    fn new(endpoint: &str) -> Api {
        if endpoint.ends_with("generate") {
            Api::OllamaGenerate(Client::new(), endpoint.to_string())
        } else {
            Api::OpenAICompletions(Client::new(), endpoint.to_string())
        }
    }

    async fn generate(
        &self,
        model: &str,
        prompt: impl Into<String>,
        max_tokens: usize,
    ) -> Result<GenericCompletion> {
        match &self {
            Api::OpenAICompletions(client, endpoint) => {
                let query = OpenAICompletionQuery {
                    prompt: prompt.into(),
                    model: model.to_string(),
                    max_tokens,
                    stop: vec![],
                };
                let response = client
                    .post(endpoint)
                    .body(serde_json::to_string(&query)?)
                    .header("content-type", "application/json")
                    .send()
                    .await?;
                if response.status().is_success() {
                    let it: OpenAICompletionReply = serde_json::from_str(&response.text().await?)?;
                    Ok(GenericCompletion {
                        text: it.choices[0].text.clone(),
                        generated_tokens: it.usage.completion_tokens,
                        prompt_tokens: it.usage.prompt_tokens,
                    })
                } else {
                    let error = response.text().await.unwrap();
                    anyhow::bail!(error)
                }
            }
            Api::OllamaGenerate(client, endpoint) => {
                let query = OllamaCompletionQuery {
                    prompt: prompt.into(),
                    model: model.to_string(),
                    options: OllamaCompletionOptions { num_predict: max_tokens },
                };

                let response = client
                    .post(endpoint)
                    .body(serde_json::to_string(&query)?)
                    .header("content-type", "application/json")
                    .send()
                    .await?;

                if response.status().is_success() {
                    todo!()
                    // let it: Oll = serde_json::from_str(response.text().awai?)?;
                    // Ok(GenericCompletion {
                    //     text: it.choices[0].text,
                    //     generated_tokens: it.usage.completion_tokens,
                    //     prompt_tokens: it.usage.prompt_tokens,
                    // })
                } else {
                    let error = response.text().await.unwrap();
                    anyhow::bail!(error)
                }
            }
        }
    }
}

#[derive(Parser, Debug)]
enum Commands {
    GenerateBench(GenerateBenchArgs),
    Stress(StressArgs),
    Scalability(ScalabilityArgs),
    Complete(CompleteArgs),
    Prompts(PromptsArgs),
}

#[derive(Parser, Debug)]
struct GenerateBenchArgs {
    #[structopt(value_delimiter = ',')]
    pp: Vec<usize>,
    #[structopt(value_delimiter = ',')]
    tg: Vec<usize>,
    #[structopt(long)]
    time: Option<f32>,
}

impl GenerateBenchArgs {
    async fn handle(&self, clients: &Clients) -> Result<()> {
        for tg in &self.tg {
            print!("\t{tg}");
        }
        println!();
        for &pp in &self.pp {
            print!("{pp}");
            for &tg in &self.tg {
                let dur = self.run_one(clients, pp, tg).await?;
                print!("\t{:.3}", dur.as_secs_f32());
                let _ = std::io::stdout().flush();
            }
            println!();
        }
        Ok(())
    }

    async fn run_one(&self, clients: &Clients, pp: usize, tg: usize) -> Result<Duration> {
        // this is wall clock to stop the benching loop
        let start = Instant::now();
        // this will measure duration for successul runs  only
        let mut duration: Duration = Default::default();
        for ran in 1.. {
            duration += clients.bench_one_generate(pp, tg).await?;
            if !self.time.is_some_and(|limit| start.elapsed().as_secs_f32() < limit) {
                return Ok(duration / ran);
            }
        }
        unreachable!()
    }
}

#[derive(Parser, Debug)]
struct StressArgs {
    concurrency: usize,
    avg_pp: usize,
    avg_tg: usize,
    /// do not report aggregated performance
    #[arg(long)]
    quiet: bool,
}

impl StressArgs {
    async fn handle(&self, clients: &Clients) -> Result<()> {
        self.stress(clients, None).await
    }

    async fn stress(&self, clients: &Clients, keepalive: Option<Receiver<()>>) -> Result<()> {
        use Ordering::Relaxed;
        let total_pp = Arc::new(AtomicUsize::default());
        let total_tg = Arc::new(AtomicUsize::default());
        let report = Duration::from_secs(10);
        tokio_scoped::scope(|scope| {
            if !self.quiet {
                scope.spawn(async {
                    let mut interval = tokio::time::interval(report);
                    loop {
                        interval.tick().await;
                        eprintln!(
                            "PP:{:.0}/s TG:{:.0}/s",
                            total_pp.swap(0, Relaxed) as f32 / report.as_secs_f32(),
                            total_tg.swap(0, Relaxed) as f32 / report.as_secs_f32(),
                        );
                        if let Some(keepalive) = &keepalive {
                            if keepalive.is_closed() {
                                break;
                            }
                        }
                    }
                });
            }
            for _ in 0..self.concurrency {
                scope.spawn(async {
                    loop {
                        let pp =
                            ((self.avg_pp as f32) * (0.8 + 0.4 * rand::random::<f32>())) as usize;
                        let tg = (((self.avg_tg as f32) * (0.8 + 0.4 * rand::random::<f32>()))
                            as usize)
                            .max(1);
                        if let Ok(it) = clients.run_one_generate(pp, tg).await {
                            total_pp.fetch_add(it.prompt_tokens, Relaxed);
                            total_tg.fetch_add(it.generated_tokens, Relaxed);
                        }
                        if let Some(keepalive) = &keepalive {
                            if keepalive.is_closed() {
                                break;
                            }
                        }
                    }
                });
            }
        });
        Ok(())
    }
}

fn clear_terminal_line() {
    eprint!("\x1b[2K\r");
}

#[derive(Parser, Debug)]
struct ScalabilityArgs {
    avg_pp: usize,
    avg_tg: usize,
    #[arg(value_delimiter = ',')]
    concurrency: Vec<usize>,
}

impl ScalabilityArgs {
    async fn handle(&self, clients: &Clients) -> Result<()> {
        eprint!("Server and model warmup...");
        let bench =
            GenerateBenchArgs { pp: vec![self.avg_pp], tg: vec![self.avg_tg], time: Some(10.0) };
        let _dur = bench.run_one(clients, self.avg_pp, self.avg_tg).await?;
        clear_terminal_line();
        for &concurrency in &self.concurrency {
            tokio_scoped::scope(|scope| {
                eprint!("Starting stress loading with {concurrency} and warming up...");
                let (tx, rx) = tokio::sync::broadcast::channel::<()>(1);
                let stress = StressArgs {
                    quiet: true,
                    concurrency,
                    avg_pp: self.avg_pp,
                    avg_tg: self.avg_tg,
                };
                scope.spawn(async move {
                    let _ = stress.stress(clients, Some(rx)).await;
                });
                scope.spawn(async move {
                    tokio::time::sleep(Duration::from_secs(5)).await;
                    clear_terminal_line();
                    eprint!("Running actual bench");
                    let bench = GenerateBenchArgs {
                        pp: vec![self.avg_pp],
                        tg: vec![self.avg_tg],
                        time: Some(10.0),
                    };
                    let dur = bench.run_one(clients, self.avg_pp, self.avg_tg).await.unwrap();
                    clear_terminal_line();
                    println!("{concurrency} {:.3}", dur.as_secs_f32());
                    std::mem::drop(tx);
                });
            })
        }
        Ok(())
    }
}

#[derive(Parser, Debug)]
struct CompleteArgs {
    prompt: String,
    #[arg(short('n'), default_value = "50")]
    max_tokens: usize,
}

impl CompleteArgs {
    async fn handle(&self, clients: &Clients) -> Result<()> {
        let reply = clients.complete(&self.prompt, self.max_tokens).await?;
        println!("{}", reply.text);
        eprintln!("prompt:{:?} generated:{}", reply.prompt_tokens, reply.generated_tokens);
        Ok(())
    }
}

#[derive(Parser, Debug)]
struct PromptsArgs {
    #[arg(short, long, default_value = "50")]
    len: usize,

    #[arg(short, long, default_value = "50")]
    count: usize,
}

impl PromptsArgs {
    async fn handle(&self, clients: &Clients) -> Result<()> {
        for _ in 0..self.count {
            let prompt = clients.get_one_prompt(self.len);
            println!("{}", prompt.replace("\n", " "));
        }
        Ok(())
    }
}

impl Commands {
    async fn run(&self, clients: &Clients) -> Result<()> {
        match self {
            Self::GenerateBench(args) => args.handle(clients).await?,
            Self::Stress(args) => args.handle(clients).await?,
            Self::Scalability(args) => args.handle(clients).await?,
            Self::Complete(args) => args.handle(clients).await?,
            Self::Prompts(args) => args.handle(clients).await?,
        }
        Ok(())
    }
}

#[derive(Clone, Debug)]
struct Clients {
    model: String,
    tokens: Vec<u32>,
    tokenizer: Tokenizer,
    api: Api,
}

impl Clients {
    fn from_args(args: &Args) -> Self {
        let tokenizer = Tokenizer::from_file(&args.tokenizers).unwrap();
        const BASE_TEXT: &str = include_str!("../lib.rs");
        let tokens: Vec<u32> = tokenizer.encode_fast(BASE_TEXT, true).unwrap().get_ids().into();
        let api = Api::new(&args.endpoint);
        Clients { api, model: args.model.clone(), tokens, tokenizer }
    }
    fn get_one_prompt(&self, len: usize) -> String {
        assert!(len < self.tokens.len());
        let start: usize = rand::random::<u32>() as usize % (self.tokens.len() - len);
        self.tokenizer.decode(&self.tokens[start..][..len.saturating_sub(1)], true).unwrap()
    }

    async fn run_one_generate(&self, pp: usize, tg: usize) -> Result<GenericCompletion> {
        self.api.generate(&self.model, &self.get_one_prompt(pp), tg).await
    }

    async fn complete(
        &self,
        prompt: impl Into<String>,
        max_tokens: usize,
    ) -> Result<GenericCompletion> {
        self.api.generate(&self.model, prompt.into(), max_tokens).await
    }

    async fn bench_one_generate(&self, pp: usize, tg: usize) -> Result<Duration> {
        for _attempt in 0..10 {
            let start = Instant::now();
            let it = self.run_one_generate(pp, tg).await?;
            if it.generated_tokens == tg {
                return Ok(start.elapsed());
            };
        }
        bail!("Failed to obtain enough tokens from generate after multiple attempts")
    }
}

#[tokio::main(flavor = "multi_thread", worker_threads = 8)]
async fn main() -> anyhow::Result<()> {
    let cli = Args::parse();
    let clients = Clients::from_args(&cli);

    let start = Instant::now();
    cli.command.run(&clients).await?;

    let total = start.elapsed();
    dbg!(total);
    Ok(())
}


================================================
FILE: examples/causal_llm/src/bin/common/mod.rs
================================================
use serde::{Deserialize, Serialize};

#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct OpenAICompletionQuery {
    pub prompt: String,
    pub model: String,
    pub max_tokens: usize,
    pub stop: Vec<String>,
}

#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct OpenAICompletionReply {
    pub id: String,
    pub choices: Vec<OpenAICompletionReplyChoice>,
    pub usage: OpenAICompletionReplyUsage,
}

#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct OpenAICompletionReplyChoice {
    pub text: String,
}

#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct OpenAICompletionReplyUsage {
    pub prompt_tokens: usize,
    pub total_tokens: usize,
    pub completion_tokens: usize,
}

#[derive(Serialize, Debug, Clone)]
pub struct OllamaCompletionQuery {
    pub prompt: String,
    pub model: String,
    pub options: OllamaCompletionOptions,
}

#[derive(Serialize, Debug, Clone)]
pub struct OllamaCompletionOptions {
    pub num_predict: usize,
}


================================================
FILE: examples/causal_llm/src/bin/complete.rs
================================================
use anyhow::Result;
use causal_llm::{CausalLlmModel, CausalLlmModelConfig};
use clap::Parser;

#[derive(Parser, Debug)]
#[command(about = "Run text completion on a causal LLM")]
struct Args {
    /// Path to tokenizer.json
    #[arg(short, long)]
    tokenizer: String,

    /// Path to NNEF model (.nnef.tgz or directory)
    #[arg(short, long)]
    model: String,

    /// Number of tokens to generate
    #[arg(short, default_value = "20")]
    n: usize,

    /// Force CPU execution (no CUDA or Metal)
    #[arg(long)]
    force_cpu: bool,

    /// Prompts to complete
    prompts: Vec<String>,
}

fn main() -> Result<()> {
    env_logger::init();
    let args = Args::parse();

    let conf = CausalLlmModelConfig { force_cpu: args.force_cpu };
    let llm = CausalLlmModel::from_paths_and_conf(&args.tokenizer, &args.model, conf)?;

    for prompt in &args.prompts {
        let mut state = llm.spawn()?;
        state.append_text(prompt)?;
        for _ in 0..args.n {
            state.generate_next_token()?;
        }
        let prompt_len = state.encode(prompt, true)?.len();
        let generated = state.decode(&state.seq[prompt_len..], true)?;
        println!("{prompt}{generated}");
    }

    Ok(())
}


================================================
FILE: examples/causal_llm/src/bin/serve.rs
================================================
use std::sync::Arc;
use std::sync::atomic::AtomicUsize;

use anyhow::Result as TractResult;
use axum::extract::State;
use axum::http::StatusCode;
use axum::response::IntoResponse;
use axum::routing::{get, post};
use axum::{Json, Router};
use axum_macros::debug_handler;
use causal_llm::{CausalLlmModel, CausalLlmStateConfig};
use clap::Parser;
use log::{debug, info};

#[allow(dead_code)]
mod common;
use common::*;

macro_rules! http_ensure {
    ($expr: expr, $msg: expr) => {
        if !$expr {
            return Err(anyhow::anyhow!($msg).into());
        }
    };
}
type Result<A> = std::result::Result<A, AppError>;
struct AppError(anyhow::Error);

impl IntoResponse for AppError {
    fn into_response(self) -> axum::response::Response {
        (StatusCode::INTERNAL_SERVER_ERROR, format!("Something went wrong: {}", self.0))
            .into_response()
    }
}

impl<E> From<E> for AppError
where
    E: Into<anyhow::Error>,
{
    fn from(err: E) -> Self {
        Self(err.into())
    }
}

#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
    /// path to tokenizes.json.file
    #[arg(short, long, required = true)]
    tokenizers: String,

    /// path to nnef neural net model
    #[arg(short, long, required = true)]
    model: String,

    /// Turn debugging information on
    #[arg(short, long, action = clap::ArgAction::Count)]
    verbose: u8,

    /// Force execution on CPU (no cuda or metal)
    #[arg(long)]
    force_cpu: bool,

    /// Disable prefill chunking
    #[arg(long)]
    no_prefill_chunk: bool,

    /// Prefill chunking
    #[arg(long, default_value = "512")]
    prefill_chunk: usize,
}

struct Context {
    args: Args,
    llm: Arc<CausalLlmModel>,
}

#[tokio::main]
async fn main() -> TractResult<()> {
    let args = Args::parse();

    if ::std::env::var("TRACT_SERVE_LOG").is_err() {
        let level = match args.verbose {
            0 => "serve=warn,causal_llm=warn",
            1 => "serve=info,causal_llm=info",
            2 => "serve=debug,causal_llm=debug",
            _ => "serve=trace,causal_llm=trace",
        };
        unsafe {
            std::env::set_var("TRACT_SERVE_LOG", level);
        }
    }

    let env = env_logger::Env::default().filter_or("TRACT_SERVE_LOG", "warn");

    env_logger::Builder::from_env(env).format_timestamp_nanos().init();

    let conf = causal_llm::CausalLlmModelConfig { force_cpu: args.force_cpu };
    info!("Loading model...");
    let llm = CausalLlmModel::from_paths_and_conf(&args.tokenizers, &args.model, conf)?;
    info!("Loaded model.");

    let context = Context { llm, args };

    let app = Router::new()
        .route("/", get(root))
        .route("/v1/completions", post(completions))
        .with_state(Arc::new(context));

    let listen = "0.0.0.0:3000";
    let listener = tokio::net::TcpListener::bind(&listen).await.unwrap();
    info!("Serving on http://{listen}/v1/completions");
    axum::serve(listener, app).await?;
    Ok(())
}

async fn root() -> &'static str {
    "tract mini llm completions server\n"
}

#[debug_handler]
async fn completions(
    State(global): State<Arc<Context>>,
    Json(query): Json<OpenAICompletionQuery>,
) -> Result<Json<OpenAICompletionReply>> {
    http_ensure!(query.max_tokens > 0, "max_tokens must be at least 1");

    static COUNTER: AtomicUsize = AtomicUsize::new(1);
    let id = COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed).to_string();
    let s = tokio::task::spawn_blocking(move || -> Result<OpenAICompletionReply> {
        let mut state = global.llm.spawn_with_config(CausalLlmStateConfig {
            prompt_chunk_size: Some(global.args.prefill_chunk)
                .filter(|_| !global.args.no_prefill_chunk),
            ..Default::default()
        })?;
        debug!("prompt [{id}] << {}", query.prompt);
        state.append_text(&query.prompt)?;
        let prompt_len = state.seq.len();

        while state.seq.len() - prompt_len < query.max_tokens {
            state.generate_next_token()?;
        }
        let generated = state.decode(&state.seq[prompt_len..], true)?;
        debug!("gen   [{id}] >> {}", generated);
        Ok(OpenAICompletionReply {
            id,
            choices: vec![OpenAICompletionReplyChoice { text: generated }],
            usage: OpenAICompletionReplyUsage {
                prompt_tokens: prompt_len,
                total_tokens: state.seq.len(),
                completion_tokens: state.seq.len() - prompt_len,
            },
        })
    })
    .await??;
    Ok(Json(s))
}


================================================
FILE: examples/causal_llm/src/lib.rs
================================================
use std::collections::HashSet;
use std::path::Path;
use std::sync::Arc;
use std::time::Instant;

use anyhow::{Context, ensure};
use float_ord::FloatOrd;
use log::{info, trace};
use tokenizers::Tokenizer;
use tract::prelude::*;

#[derive(Clone, Debug)]
struct KvCacheInfo {
    axis: usize,
    dt: DatumType,
    /// Concrete shape for an empty cache (seq_len=0 on the cache axis)
    empty_shape: Vec<usize>,
}

#[derive(Clone, Debug, Default)]
pub struct CausalLlmModelConfig {
    pub force_cpu: bool,
}

#[derive(Clone, Debug)]
pub struct CausalLlmModel {
    pub tokenizer: Tokenizer,
    pub nn: Runnable,
    pub conf: CausalLlmModelConfig,
    kv_caches: Vec<KvCacheInfo>,
    n_regular_outputs: usize,
}

impl CausalLlmModel {
    fn from_tokenizer_and_model(
        tokenizer: tokenizers::Tokenizer,
        nn: Model,
        conf: CausalLlmModelConfig,
    ) -> anyhow::Result<Arc<CausalLlmModel>> {
        let mut nn = nn;

        // Detect transformer patterns (creates DynKeyValueCache ops)
        nn.transform("transformers_detect_all")?;

        let n_regular_outputs = nn.output_count()?;

        // Unfold KV caches into explicit model I/O
        nn.transform("unfold-kv-cache")?;

        // Detect KV cache inputs by comparing pre/post unfold input counts.
        // Token input (idx 0) is integer, KV cache inputs are float.
        let token_fact = nn.input_fact(0)?;
        let mut token_symbols = HashSet::new();
        for axis in 0..token_fact.rank()? {
            let dim = token_fact.dim(axis)?;
            if dim.to_int64().is_err() {
                token_symbols.insert(format!("{dim}"));
            }
        }

        let n_inputs = nn.input_count()?;
        let mut kv_infos = Vec::new();
        for i in 1..n_inputs {
            let fact = nn.input_fact(i)?;
            let dt = fact.datum_type()?;
            let mut cache_axis = None;
            let mut empty_shape = vec![];
            for axis in 0..fact.rank()? {
                let dim = fact.dim(axis)?;
                match dim.to_int64() {
                    Ok(v) => empty_shape.push(v as usize),
                    Err(_) => {
                        let sym_name = format!("{dim}");
                        if token_symbols.contains(&sym_name) {
                            // Batch dim — default to 1
                            empty_shape.push(1);
                        } else {
                            // Cache axis (past sequence) — start at 0
                            cache_axis = Some(axis);
                            empty_shape.push(0);
                        }
                    }
                }
            }
            kv_infos.push(KvCacheInfo {
                axis: cache_axis.context("no symbolic cache axis found in KV cache input")?,
                dt,
                empty_shape,
            });
        }

        // Try runtimes in order of preference
        let runtimes =
            if conf.force_cpu { vec!["default"] } else { vec!["cuda", "metal", "default"] };
        let mut runnable = None;
        for rt_name in &runtimes {
            match runtime_for_name(rt_name) {
                Ok(rt) => match rt.prepare(nn.clone()) {
                    Ok(r) => {
                        info!("Using runtime {rt_name}");
                        runnable = Some(r);
                        break;
                    }
                    Err(e) => info!("{rt_name} {e:?}"),
                },
                Err(_) => continue,
            }
        }
        let nn = runnable.context("No suitable runtime")?;
        Ok(Arc::new(CausalLlmModel { tokenizer, nn, conf, kv_caches: kv_infos, n_regular_outputs }))
    }

    pub fn from_paths_and_conf(
        tokenizer: impl AsRef<Path>,
        nn: impl AsRef<Path>,
        conf: CausalLlmModelConfig,
    ) -> anyhow::Result<Arc<CausalLlmModel>> {
        let nnef = tract::nnef()?.with_tract_transformers()?;
        let tokenizer =
            tokenizers::Tokenizer::from_file(tokenizer).map_err(|e| anyhow::anyhow!(e))?;
        let nn = nnef.load(nn)?;
        CausalLlmModel::from_tokenizer_and_model(tokenizer, nn, conf)
    }

    pub fn from_paths(
        tokenizer: impl AsRef<Path>,
        nn: impl AsRef<Path>,
    ) -> anyhow::Result<Arc<CausalLlmModel>> {
        Self::from_paths_and_conf(tokenizer, nn, Default::default())
    }

    pub fn from_bytes_and_conf(
        tokenizer_bytes: impl AsRef<[u8]>,
        llm_model_bytes: impl AsRef<[u8]>,
        conf: CausalLlmModelConfig,
    ) -> anyhow::Result<Arc<CausalLlmModel>> {
        let nnef = tract::nnef()?.with_tract_transformers()?;
        let tokenizer = Tokenizer::from_bytes(tokenizer_bytes).map_err(|e| anyhow::anyhow!(e))?;
        let nn = nnef.load_buffer(llm_model_bytes.as_ref())?;
        CausalLlmModel::from_tokenizer_and_model(tokenizer, nn, conf)
    }

    pub fn from_bytes(
        tokenizer_bytes: impl AsRef<[u8]>,
        llm_model_bytes: impl AsRef<[u8]>,
    ) -> anyhow::Result<Arc<CausalLlmModel>> {
        Self::from_bytes_and_conf(tokenizer_bytes, llm_model_bytes, Default::default())
    }

    fn make_empty_kv_caches(&self) -> anyhow::Result<Vec<Tensor>> {
        self.kv_caches
            .iter()
            .map(|info| {
                let n_bytes = info.empty_shape.iter().product::<usize>() * info.dt.size_of();
                Tensor::from_bytes(info.dt, &info.empty_shape, &vec![0u8; n_bytes])
            })
            .collect()
    }

    pub fn spawn(self: &Arc<Self>) -> anyhow::Result<CausalLlmState> {
        let kv_caches = self.make_empty_kv_caches()?;
        Ok(CausalLlmState {
            model: self.clone(),
            kv_caches,
            seq: vec![],
            processed_tokens: 0,
            config: CausalLlmStateConfig::default(),
        })
    }

    pub fn spawn_with_config(
        self: &Arc<Self>,
        config: CausalLlmStateConfig,
    ) -> anyhow::Result<CausalLlmState> {
        let kv_caches = self.make_empty_kv_caches()?;
        Ok(CausalLlmState {
            model: self.clone(),
            kv_caches,
            seq: vec![],
            processed_tokens: 0,
            config,
        })
    }
}

/// Config of the state containing specific behavior requested
/// prompt chunking, sampling strategy, ...
#[derive(Debug, Clone, PartialEq)]
pub struct CausalLlmStateConfig {
    /// split long prompt into chunks of fixed number of tokens,
    /// reducing RAM usage
    pub prompt_chunk_size: Option<usize>,
    pub repeat_penalty: f32,
    pub repeat_last_n: usize,
}
impl Default for CausalLlmStateConfig {
    fn default() -> Self {
        Self { prompt_chunk_size: Some(512), repeat_penalty: 1.0, repeat_last_n: 64 }
    }
}

impl CausalLlmStateConfig {
    pub fn new(
        prompt_chunk_size: Option<usize>,
        repeat_penalty: f32,
        repeat_last_n: usize,
    ) -> anyhow::Result<Self> {
        let conf = Self { prompt_chunk_size, repeat_penalty, repeat_last_n };
        conf.validate()?;
        Ok(conf)
    }

    pub fn validate(&self) -> anyhow::Result<()> {
        if let Some(chunk_size) = self.prompt_chunk_size {
            anyhow::ensure!(
                chunk_size > 0,
                format!(
                    "prompt_chunk_size cannot be set to '{chunk_size}', set None to deactivate it."
                )
            )
        }
        Ok(())
    }
}

#[derive(Debug)]
pub struct CausalLlmState {
    pub model: Arc<CausalLlmModel>,
    kv_caches: Vec<Tensor>,
    pub seq: Vec<u32>,
    pub processed_tokens: usize,
    pub config: CausalLlmStateConfig,
}

impl CausalLlmState {
    pub fn append_text(&mut self, prompt: &str) -> anyhow::Result<()> {
        self.seq.extend(self.encode(prompt, true)?);
        Ok(())
    }

    pub fn generate_next_token(&mut self) -> anyhow::Result<()> {
        let tokens = &self.seq[self.processed_tokens..];
        ensure!(tokens.len() > 0);
        let chunk_size = self.config.prompt_chunk_size.unwrap_or(usize::MAX);
        let output = tokens
            .chunks(chunk_size)
            .map(|chunk| -> anyhow::Result<Tensor> {
                let start = Instant::now();
                let token_data: Vec<i64> = chunk.iter().map(|t| *t as i64).collect();
                let input: Tensor =
                    tract_ndarray::Array2::from_shape_vec((1, chunk.len()), token_data)?
                        .try_into()?;
                // Build inputs: token_ids + current kv caches
                let mut inputs: Vec<Tensor> = vec![input];
                inputs.extend(self.kv_caches.iter().cloned());
                let mut results = self.model.nn.run(inputs)?;
                // Extract updated KV caches from outputs
                // outputs layout: [regular_outputs..., kv_cache_0, kv_cache_1, ...]
                let n_regular = self.model.n_regular_outputs;
                for (i, kv) in results.drain(n_regular..).enumerate() {
                    self.kv_caches[i] = kv;
                }
                trace!("Processed {} tokens in {:?}", chunk.len(), start.elapsed());
                Ok(results.remove(0))
            })
            .last()
            .unwrap()?;

        let start_at = self.seq.len().saturating_sub(self.config.repeat_last_n);

        let output_f32 = output.convert_to(DatumType::F32)?;
        let mut last_token_logits: Vec<f32> = output_f32.as_slice::<f32>()?.to_vec();

        apply_repeat_penalty(
            last_token_logits.as_mut_slice(),
            self.config.repeat_penalty,
            &self.seq[start_at..],
        );

        let next_tok = last_token_logits
            .iter()
            .enumerate()
            .max_by_key(|(_ix, v)| FloatOrd(**v))
            .context("no tokens in output ?")?
            .0 as u32;

        self.processed_tokens += tokens.len();
        self.seq.push(next_tok);
        Ok(())
    }

    pub fn tokenizer(&self) -> &Tokenizer {
        &self.model.tokenizer
    }

    pub fn decode(&self, tokens: &[u32], skip_special_tokens: bool) -> anyhow::Result<String> {
        self.tokenizer().decode(tokens, skip_special_tokens).map_err(|e| anyhow::anyhow!(e))
    }

    pub fn encode(&self, text: &str, add_special_tokens: bool) -> anyhow::Result<Vec<u32>> {
        Ok(self
            .tokenizer()
            .encode(text, add_special_tokens)
            .map_err(|e| anyhow::anyhow!(e))?
            .get_ids()
            .to_vec())
    }

    pub fn freeze(self) -> FrozenCausalLlmState {
        FrozenCausalLlmState {
            model: self.model,
            kv_caches: self.kv_caches,
            seq: self.seq,
            config: self.config,
        }
    }

    pub fn truncate(&mut self, len: usize) -> anyhow::Result<()> {
        anyhow::ensure!(len > 0, "cannot truncate to 0");
        self.seq.truncate(len);
        // Slice KV caches to len-1: the last token will be re-processed by
        // the next generate_next_token call to produce output logits.
        let cache_len = len - 1;
        self.processed_tokens = self.processed_tokens.min(cache_len);
        for (kv, info) in self.kv_caches.iter_mut().zip(&self.model.kv_caches) {
            *kv = slice_value(kv, info.axis, 0, cache_len)?;
        }
        Ok(())
    }
}

/// Slice a Value along an axis, extracting elements [start..end].
fn slice_value(value: &Tensor, axis: usize, start: usize, end: usize) -> anyhow::Result<Tensor> {
    let (dt, shape, data) = value.as_bytes()?;
    let elem_size = dt.size_of();
    let new_len = end - start;
    let mut new_shape = shape.to_vec();
    new_shape[axis] = new_len;

    let inner_size: usize = shape[axis + 1..].iter().product::<usize>() * elem_size;
    let axis_stride = shape[axis] * inner_size;
    let outer_count: usize = shape[..axis].iter().product::<usize>().max(1);

    let new_data_len = outer_count * new_len * inner_size;
    let mut new_data = vec![0u8; new_data_len];

    for outer in 0..outer_count {
        let src_offset = outer * axis_stride + start * inner_size;
        let dst_offset = outer * new_len * inner_size;
        new_data[dst_offset..dst_offset + new_len * inner_size]
            .copy_from_slice(&data[src_offset..src_offset + new_len * inner_size]);
    }

    Tensor::from_bytes(dt, &new_shape, &new_data)
}

#[derive(Clone, Debug)]
pub struct FrozenCausalLlmState {
    pub model: Arc<CausalLlmModel>,
    kv_caches: Vec<Tensor>,
    pub seq: Vec<u32>,
    pub config: CausalLlmStateConfig,
}

impl FrozenCausalLlmState {
    pub fn unfreeze(self) -> anyhow::Result<CausalLlmState> {
        Ok(CausalLlmState {
            model: self.model,
            kv_caches: self.kv_caches,
            seq: self.seq,
            processed_tokens: 0,
            config: self.config,
        })
    }
}

/// Cheap way to avoid repeating model (mostly usefull for tiny models)
pub fn apply_repeat_penalty(logits: &mut [f32], penalty: f32, context: &[u32]) {
    if penalty == 1.0 {
        return;
    }
    let context: std::collections::HashSet<_> = context.iter().collect();
    for (token_id, logit) in logits.iter_mut().enumerate() {
        if context.contains(&(token_id as u32)) {
            if *logit >= 0. { *logit /= penalty } else { *logit *= penalty }
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::FrozenCausalLlmState;
    use tract::prelude::*;

    fn is_send<T: Send>() {}

    #[test]
    fn frozen_state_is_send() {
        is_send::<FrozenCausalLlmState>();
    }

    #[test]
    fn truncate_prefix_cache_hit() -> anyhow::Result<()> {
        let model_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
            .join("../../.cached/llm/541/meta-llama--Llama-3.2-1B-Instruct-q40ef16");
        if !model_dir.exists() {
            eprintln!("Skipping: model not found at {model_dir:?}");
            return Ok(());
        }
        let llm = crate::CausalLlmModel::from_paths(
            model_dir.join("tokenizer.json"),
            model_dir.join("meta-llama--Llama-3.2-1B-Instruct-q40ef16.nnef.tgz"),
        )?;

        // Generate from "Hello world" for 5 tokens
        let mut state = llm.spawn()?;
        state.append_text("Hello world")?;
        for _ in 0..5 {
            state.generate_next_token()?;
        }
        let full_seq = state.seq.clone();
        let prompt_len = state.encode("Hello world", true)?.len();

        // Truncate back to just the prompt (simulating prefix cache hit)
        // KV cache is sliced to prompt_len-1, last token will be re-processed
        state.truncate(prompt_len)?;
        assert_eq!(state.seq.len(), prompt_len);
        assert_eq!(state.processed_tokens, prompt_len - 1);

        // Generate one token and verify KV cache grew by exactly 1
        // (proving the truncated prefix was preserved, not recomputed from scratch)
        state.generate_next_token()?;
        {
            let shape = state.kv_caches[0].shape()?;
            let cache_axis = state.model.kv_caches[0].axis;
            assert_eq!(
                shape[cache_axis], prompt_len,
                "KV cache should have prompt_len entries after truncate + 1 generate"
            );
        }

        // Generate 4 more tokens
        for _ in 0..4 {
            state.generate_next_token()?;
        }
        let regenerated_seq = state.seq.clone();

        // A fresh state from the same prompt should produce identical output
        let mut fresh = llm.spawn()?;
        fresh.append_text("Hello world")?;
        for _ in 0..5 {
            fresh.generate_next_token()?;
        }

        // The truncated-and-regenerated sequence should match the fresh sequence
        assert_eq!(
            regenerated_seq, fresh.seq,
            "truncate + regenerate should match fresh generation"
        );
        // And should also match the original since generation is deterministic
        assert_eq!(full_seq, fresh.seq, "generation should be deterministic");

        Ok(())
    }
}


================================================
FILE: examples/face_detection_yolov8onnx_example/.gitignore
================================================
venv/
*.onnx
*.pt
*.pt.1


================================================
FILE: examples/face_detection_yolov8onnx_example/Cargo.toml
================================================
[package]
name = "face_detection_yolov8onnx_example"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
clap = { version = "4.5.9", features = ["derive"] }
image.workspace = true 
tract.workspace = true


================================================
FILE: examples/face_detection_yolov8onnx_example/README.md
================================================
# tract onnx face detection demo yolov8
face deteciton demo using yolov8 , using models converted from [derronqi's repo](https://github.com/derronqi/yolov8-face)

# getting the models
1. you can get the model and convert them yourself from [here](https://github.com/derronqi/yolov8-face) , you can follow conversion instructions [here](https://docs.ultralytics.com/integrations/onnx/)
2. you can get it preconverted from
[google drive](https://drive.google.com/file/d/1PYAG1ypAuwh_rDROaUF0OdLmBqOefBGL/view?usp=sharing)

# to use 
run `cargo run -- --input-image /path/to/image --weights /path/to/weights`


================================================
FILE: examples/face_detection_yolov8onnx_example/ci.sh
================================================
#!/bin/bash

set -ex
# download pre-exported onnx model
wget -Nq "https://tract-ci-builds.s3.amazonaws.com/model/yolov8n-face.onnx"

# on win/linux 
cargo run -- --input-image grace_hopper.jpg --weights yolov8n-face.onnx 

wasmtime -V || curl https://wasmtime.dev/install.sh -sSf | bash # install wasmtime
PATH=$PATH:$HOME/.wasmtime/bin
rustup target install wasm32-wasip1
cargo build --target wasm32-wasip1 --release
wasmtime --dir . ../../target/wasm32-wasip1/release/face_detection_yolov8onnx_example.wasm --input-image grace_hopper.jpg --weights yolov8n-face.onnx

rm yolov8n-face.onnx


================================================
FILE: examples/face_detection_yolov8onnx_example/src/main.rs
================================================
use anyhow::{Error, Result};
use clap::Parser;
use image::DynamicImage;
use std::cmp::Ordering;
use std::cmp::PartialOrd;
use tract::prelude::*;
use tract_ndarray::s;

#[derive(Parser)]
struct CliArgs {
    #[arg(long)]
    input_image: String,

    #[arg(long)]
    weights: String,
}

#[derive(Debug, Clone)]
pub struct Bbox {
    pub x1: f32,
    pub y1: f32,
    pub x2: f32,
    pub y2: f32,
    pub confidence: f32,
}

impl Bbox {
    pub fn new(x1: f32, y1: f32, x2: f32, y2: f32, confidence: f32) -> Bbox {
        Bbox { x1, y1, x2, y2, confidence }
    }
    pub fn apply_image_scale(
        &mut self,
        original_image: &DynamicImage,
        x_scale: f32,
        y_scale: f32,
    ) -> Bbox {
        let normalized_x1 = self.x1 / x_scale;
        let normalized_x2 = self.x2 / x_scale;
        let normalized_y1 = self.y1 / y_scale;
        let normalized_y2 = self.y2 / y_scale;

        let cart_x1 = original_image.width() as f32 * normalized_x1;
        let cart_x2 = original_image.width() as f32 * normalized_x2;
        let cart_y1 = original_image.height() as f32 * normalized_y1;
        let cart_y2 = original_image.height() as f32 * normalized_y2;

        Bbox { x1: cart_x1, y1: cart_y1, x2: cart_x2, y2: cart_y2, confidence: self.confidence }
    }
    pub fn crop_bbox(&self, original_image: &DynamicImage) -> Result<DynamicImage, Error> {
        let bbox_width = (self.x2 - self.x1) as u32;
        let bbox_height = (self.y2 - self.y1) as u32;
        Ok(original_image.to_owned().crop_imm(
            self.x1 as u32,
            self.y1 as u32,
            bbox_width,
            bbox_height,
        ))
    }
}

pub fn non_maximum_suppression(mut boxes: Vec<Bbox>, iou_threshold: f32) -> Vec<Bbox> {
    boxes.sort_by(|a, b| a.confidence.partial_cmp(&b.confidence).unwrap_or(Ordering::Equal));
    let mut keep = Vec::new();
    while !boxes.is_empty() {
        let current = boxes.remove(0);
        keep.push(current.clone());
        boxes.retain(|box_| calculate_iou(&current, box_) <= iou_threshold);
    }

    keep
}

fn calculate_iou(box1: &Bbox, box2: &Bbox) -> f32 {
    let x1 = box1.x1.max(box2.x1);
    let y1 = box1.y1.max(box2.y1);
    let x2 = box1.x2.min(box2.x2);
    let y2 = box1.y2.min(box2.y2);

    let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
    let area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
    let area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
    let union = area1 + area2 - intersection;
    intersection / union
}

fn main() -> Result<(), Error> {
    let args = CliArgs::parse();
    let mut model = tract::onnx()?.load(args.weights)?;
    model.set_input_fact(0, "1,3,640,640,f32")?;
    let model = model.into_model()?.into_runnable()?;
    let raw_image = image::open(args.input_image)?;

    // scale the image with black padding
    let width = raw_image.width();
    let height = raw_image.height();
    let scale = 640.0 / width.max(height) as f32;
    let new_width = (width as f32 * scale) as u32;
    let new_height = (height as f32 * scale) as u32;
    let resized = image::imageops::resize(
        &raw_image.to_rgb8(),
        new_width,
        new_height,
        image::imageops::FilterType::Triangle,
    );
    let mut padded = image::RgbImage::new(640, 640);
    image::imageops::replace(
        &mut padded,
        &resized,
        (640 - new_width as i64) / 2,
        (640 - new_height as i64) / 2,
    );
    let input = tract_ndarray::Array4::from_shape_fn((1, 3, 640, 640), |(_, c, y, x)| {
        padded.get_pixel(x as u32, y as u32)[c] as f32 / 255.0
    });

    //run model
    let forward = model.run([input])?;
    let results = forward[0].view::<f32>()?.t().into_owned();
    let mut bbox_vec: Vec<Bbox> = vec![];
    for i in 0..results.len_of(tract_ndarray::Axis(0)) {
        let row = results.slice(s![i, .., ..]);
        let confidence = row[[4, 0]];

        if confidence >= 0.5 {
            let x = row[[0, 0]];
            let y = row[[1, 0]];
            let w = row[[2, 0]];
            let h = row[[3, 0]];
            let x1 = x - w / 2.0;
            let y1 = y - h / 2.0;
            let x2 = x + w / 2.0;
            let y2 = y + h / 2.0;
            let bbox =
                Bbox::new(x1, y1, x2, y2, confidence).apply_image_scale(&raw_image, 640.0, 640.0);
            bbox_vec.push(bbox);
        }
    }
    // uncomment below to save preview face
    // let test_save = bbox_vec[0].crop_bbox(&raw_image)?.save("test_crop.png");

    println!("bboxes: {bbox_vec:?}");

    Ok(())
}


================================================
FILE: examples/face_similarity_arcface_onnx/.gitignore
================================================
*.onnx


================================================
FILE: examples/face_similarity_arcface_onnx/Cargo.toml
================================================
[package]
name = "face_similarity_arcface_onnx"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
clap = { version = "4.5.9", features = ["derive"] }
image.workspace = true 
tract-onnx.workspace = true
tract-core.workspace = true


================================================
FILE: examples/face_similarity_arcface_onnx/README.md
================================================
# tract onnx face similarity comparison using arcface
face deteciton demo using yolov8 , using models converted from [derronqi's repo](https://github.com/derronqi/yolov8-face), and then using ArcFace to get face embeddings and then compare them with cosine similarity.

# getting the models
## yolov8-face
1. you can get the model and convert them yourself from [here](https://github.com/derronqi/yolov8-face) , you can follow conversion instructions [here](https://docs.ultralytics.com/integrations/onnx/)
2. you can get it preconverted from
[google drive](https://drive.google.com/file/d/1PYAG1ypAuwh_rDROaUF0OdLmBqOefBGL/view?usp=sharing)

## arcface 
you can get onnx converted models [here](https://github.com/onnx/models/tree/main/validated/vision/body_analysis/arcface)


# to use 
run `cargo run -- --face1 path/to/image1 --face2 path/to/image2`


================================================
FILE: examples/face_similarity_arcface_onnx/ci.sh
================================================
#!/bin/bash

set -ex

# download pre-exported onnx model
wget -Nq "https://tract-ci-builds.s3.amazonaws.com/model/yolov8n-face.onnx"
wget -Nq "https://tract-ci-builds.s3.amazonaws.com/model/arcfaceresnet100-8.onnx"

# on win/linux 
cargo run --release -- --face1 grace_hopper.jpeg --face2 grace_hopper2.jpeg 

# disabled wasm test, it OOM with recent wasm. is this model too big for wasm32 ?

# wasmtime -V || curl https://wasmtime.dev/install.sh -sSf | bash # install wasmtime
# PATH=$PATH:$HOME/.wasmtime/bin
# rustup target install wasm32-wasip1
# cargo build --target wasm32-wasip1 --release
# wasmtime --dir . ../../target/wasm32-wasip1/release/face_similarity_arcface_onnx.wasm --face1 grace_hopper.jpeg --face2 grace_hopper2.jpeg

rm yolov8n-face.onnx
rm arcfaceresnet100-8.onnx


================================================
FILE: examples/face_similarity_arcface_onnx/src/arc_face.rs
================================================
use anyhow::{Error, Result};
use image::{DynamicImage, imageops};
use tract_ndarray::{Array1, Array3};
use tract_ndarray::{ArrayBase, OwnedRepr};
use tract_onnx::prelude::*;

#[allow(clippy::type_complexity)]
pub struct ArcFace {
    model: Arc<TypedRunnableModel>,
}

impl ArcFace {
    /// returns a ndarray of length 512 representing a face
    /// please crop face before using !
    pub fn get_face_embedding(
        &self,
        input_image: &DynamicImage,
    ) -> Result<ArrayBase<OwnedRepr<f32>, tract_core::ndarray::Dim<[usize; 1]>>, Error> {
        let preprocess_image = preprocess_arcface(input_image, 112)?;
        let forward = self.model.run(tvec![preprocess_image.to_owned().into()])?;
        println!("FORWARD {forward:?}");
        let results = forward[0].to_plain_array_view::<f32>()?.to_shape(512)?.to_owned();
        Ok(results)
    }
}

pub fn load_arcface_model(model_path: &str, input_size: i32) -> ArcFace {
    let load_model = tract_onnx::onnx()
        .model_for_path(model_path)
        .unwrap()
        .with_input_fact(0, f32::fact([1, 3, input_size, input_size]).into())
        .unwrap()
        .incorporate()
        .unwrap()
        .into_optimized()
        .unwrap()
        .into_runnable()
        .unwrap();
    ArcFace { model: load_model }
}

fn image_to_tract_tensor(img: &DynamicImage) -> Array3<f32> {
    let height = img.height();
    let width = img.width();
    let img_buffer = img.to_rgb8();
    Array3::from_shape_vec((height as usize, width as usize, 3), img_buffer.into_raw())
        .expect("cannot convert image to ndarray")
        .mapv(|x| x as f32)
}

pub fn preprocess_arcface(input_image: &DynamicImage, target_size: u32) -> Result<Tensor, Error> {
    let resize = input_image.resize_exact(target_size, target_size, imageops::FilterType::Triangle);
    let ndarray_img = image_to_tract_tensor(&resize);
    // ndarray_img *= 1.0 / 127.5;
    // ndarray_img -= 127.5;
    let mut _final: Tensor = ndarray_img.permuted_axes((2, 0, 1)).into();
    _final.insert_axis(0).unwrap();
    Ok(_final)
}

pub fn cosine_similarity(a: &Array1<f32>, b: &Array1<f32>) -> f32 {
    let dotprod = a.dot(b);
    let norm_a = a.dot(a).sqrt();
    let norm_b = b.dot(b).sqrt();
    println!("dotprod {dotprod:?} norm_a {norm_a:?} norm_b {norm_b:?}");
    dotprod / (norm_a * norm_b)
}


================================================
FILE: examples/face_similarity_arcface_onnx/src/main.rs
================================================
mod arc_face;
mod yolo_face;
use anyhow::{Error, Result};
use arc_face::{ArcFace, cosine_similarity, load_arcface_model};
use clap::Parser;
use yolo_face::{YoloFace, load_yolo_model, sort_conf_bbox};

#[derive(Parser)]
struct CliArgs {
    #[arg(long)]
    face1: String,

    #[arg(long)]
    face2: String,
}

fn main() -> Result<(), Error> {
    let args = CliArgs::parse();
    println!("face1 {:#?},\nface2 {:?}", &args.face1, &args.face2);
    let face1 = image::open(&args.face1)?;
    let face2 = image::open(&args.face2)?;

    let yolo_model: YoloFace = load_yolo_model("yolov8n-face.onnx", (640, 640));
    let arcface_model: ArcFace = load_arcface_model("arcfaceresnet100-8.onnx", 112);

    let mut face1_bbox = yolo_model.get_faces_bbox(&face1, 0.5, 0.5)?;
    let mut face2_bbox = yolo_model.get_faces_bbox(&face2, 0.5, 0.5)?;

    let f1_sorted_bbox = sort_conf_bbox(&mut face1_bbox);
    let f2_sorted_bbox = sort_conf_bbox(&mut face2_bbox);

    let f1_crop = f1_sorted_bbox[0].crop_bbox(&face1)?;
    let f2_crop = f2_sorted_bbox[0].crop_bbox(&face2)?;

    let f1_embed = arcface_model.get_face_embedding(&f1_crop)?;
    let f2_embed = arcface_model.get_face_embedding(&f2_crop)?;

    let similarity = cosine_similarity(&f1_embed, &f2_embed);
    println!("SIMILARITY {similarity:#?}");
    Ok(())
}


================================================
FILE: examples/face_similarity_arcface_onnx/src/yolo_face.rs
================================================
use anyhow::{Error, Result};
use image::DynamicImage;
use std::cmp::Ordering;
use std::cmp::PartialOrd;
use tract_ndarray::s;
use tract_onnx::prelude::*;

#[allow(clippy::type_complexity)]
pub struct YoloFace {
    model: Arc<TypedRunnableModel>,
    width: i32,
    height: i32,
}

pub fn sort_conf_bbox(input_bbox: &mut [Bbox]) -> Vec<Bbox> {
    input_bbox.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
    input_bbox.to_vec()
}

impl YoloFace {
    pub fn get_faces_bbox(
        &self,
        input_image: &DynamicImage,
        confidence_threshold: f32,
        iou_threshold: f32,
    ) -> Result<Vec<Bbox>, Error> {
        // assuming that model has input shape of a square
        let preprocess_image = preprocess_yoloface_square(input_image, self.width as f32);

        // run forward pass and then convert result to f32
        let forward = self.model.run(tvec![preprocess_image.to_owned().into()])?;
        let results = forward[0].to_plain_array_view::<f32>()?.view().t().into_owned();

        // process results
        let mut bbox_vec: Vec<Bbox> = vec![];
        for i in 0..results.len_of(tract_ndarray::Axis(0)) {
            let row = results.slice(s![i, .., ..]);
            let confidence = row[[4, 0]];

            if confidence >= confidence_threshold {
                let x = row[[0, 0]];
                let y = row[[1, 0]];
                let w = row[[2, 0]];
                let h = row[[3, 0]];
                let x1 = x - w / 2.0;
                let y1 = y - h / 2.0;
                let x2 = x + w / 2.0;
                let y2 = y + h / 2.0;
                let bbox = Bbox::new(x1, y1, x2, y2, confidence).apply_image_scale(
                    input_image,
                    self.width as f32,
                    self.height as f32,
                );
                bbox_vec.push(bbox);
            }
        }
        Ok(non_maximum_suppression(bbox_vec, iou_threshold))
    }
}

#[derive(Debug, Clone)]
pub struct Bbox {
    pub x1: f32,
    pub y1: f32,
    pub x2: f32,
    pub y2: f32,
    pub confidence: f32,
}

impl Bbox {
    pub fn new(x1: f32, y1: f32, x2: f32, y2: f32, confidence: f32) -> Bbox {
        Bbox { x1, y1, x2, y2, confidence }
    }
    pub fn apply_image_scale(
        &mut self,
        original_image: &DynamicImage,
        x_scale: f32,
        y_scale: f32,
    ) -> Bbox {
        let normalized_x1 = self.x1 / x_scale;
        let normalized_x2 = self.x2 / x_scale;
        let normalized_y1 = self.y1 / y_scale;
        let normalized_y2 = self.y2 / y_scale;

        let cart_x1 = original_image.width() as f32 * normalized_x1;
        let cart_x2 = original_image.width() as f32 * normalized_x2;
        let cart_y1 = original_image.height() as f32 * normalized_y1;
        let cart_y2 = original_image.height() as f32 * normalized_y2;

        Bbox { x1: cart_x1, y1: cart_y1, x2: cart_x2, y2: cart_y2, confidence: self.confidence }
    }

    pub fn crop_bbox(&self, original_image: &DynamicImage) -> Result<DynamicImage, Error> {
        let bbox_width = (self.x2 - self.x1) as u32;
        let bbox_height = (self.y2 - self.y1) as u32;
        Ok(original_image.to_owned().crop_imm(
            self.x1 as u32,
            self.y1 as u32,
            bbox_width,
            bbox_height,
        ))
    }
}

/// loads model, panic on failure.
pub fn load_yolo_model(model_path: &str, input_size: (i32, i32)) -> YoloFace {
    let load_model = tract_onnx::onnx()
        .model_for_path(model_path)
        .unwrap()
        .with_input_fact(0, f32::fact([1, 3, input_size.0, input_size.1]).into())
        .unwrap()
        .into_optimized()
        .unwrap()
        .into_runnable()
        .unwrap();
    YoloFace { model: load_model, width: input_size.0, height: input_size.1 }
}

fn non_maximum_suppression(mut boxes: Vec<Bbox>, iou_threshold: f32) -> Vec<Bbox> {
    boxes.sort_by(|a, b| a.confidence.partial_cmp(&b.confidence).unwrap_or(Ordering::Equal));
    let mut keep = Vec::new();
    while !boxes.is_empty() {
        let current = boxes.remove(0);
        keep.push(current.clone());
        boxes.retain(|box_| calculate_iou(&current, box_) <= iou_threshold);
    }
    keep
}

fn calculate_iou(box1: &Bbox, box2: &Bbox) -> f32 {
    let x1 = box1.x1.max(box2.x1);
    let y1 = box1.y1.max(box2.y1);
    let x2 = box1.x2.min(box2.x2);
    let y2 = box1.y2.min(box2.y2);

    let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
    let area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
    let area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
    let union = area1 + area2 - intersection;
    intersection / union
}

/// scales the image to target dims with black padding.
fn preprocess_yoloface_square(input_image: &DynamicImage, target_size: f32) -> Tensor {
    let width = input_image.width();
    let height = input_image.height();
    let scale = target_size / (width.max(height) as f32);
    let new_width = (width as f32 * scale) as u32;
    let new_height = (height as f32 * scale) as u32;
    let resized = image::imageops::resize(
        &input_image.to_rgb8(),
        new_width,
        new_height,
        image::imageops::FilterType::Triangle,
    );
    let mut padded = image::RgbImage::new(target_size as u32, target_size as u32);
    image::imageops::replace(
        &mut padded,
        &resized,
        (target_size as u32 - new_width) as i64 / 2,
        (target_size as u32 - new_height) as i64 / 2,
    );
    let image: Tensor = tract_ndarray::Array4::from_shape_fn(
        (1, 3, target_size as usize, target_size as usize),
        |(_, c, y, x)| padded.get_pixel(x as u32, y as u32)[c] as f32 / 255.0,
    )
    .into();
    image
}


================================================
FILE: examples/keras-tract-tf2/.gitignore
================================================
.ipynb_checkpoints/
Anaconda3-2022.10-Linux-x86_64.sh
anaconda/
venv


================================================
FILE: examples/keras-tract-tf2/Cargo.toml
================================================
[package]
name = "keras-tract-tf2"
version = "0.20.7-pre"
authors = ["Matthew Alhonte <mattalhonte@gmail.com>"]
license = "MIT OR Apache-2.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

tract-core.workspace = true
tract-onnx.workspace = true
ndarray-npy.workspace = true


================================================
FILE: examples/keras-tract-tf2/README.md
================================================
A simple example of training a Tensorflow model with Python, check the model with tract python API, then [loading it into `tract`](src/main.rs) and compare predictions.

# Python side training

Setup [environment](requirements.txt).

```
pip install -r requirements.txt
```

[Train](example.py) a model, export it to ONNX along with a input and output example.

```
python example.py
```

(Outputs are commited to git, you don't need to run the python step at all.)

# Rust side inference

[Run](src/main.rs) the model and double check the output.

```
cargo run
```


================================================
FILE: examples/keras-tract-tf2/ci.sh.nope
================================================
#!/bin/sh

set -e


sudo apt-get update

# Install required libraries
# sudo apt-get install -y libgl1-mesa-glx libegl1-mesa libxrandr2 libxss1 libxcursor1 libxcomposite1 libasound2 libxi6 libxtst6 python3 python3-venv
sudo apt-get install -y libxrandr2 libxss1 libxcursor1 libxcomposite1 libxi6 libxtst6 python3 python3-venv

python3 -m venv venv
. venv/bin/activate


pip install -r requirements.txt
python example.py
cargo run


================================================
FILE: examples/keras-tract-tf2/example.py
================================================
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import tf2onnx
import tract

# Define a simple demo model and training data
model = Sequential([
    Dense(32, activation='relu', input_dim=100,  name='main_input'),
    Dense(1, activation='sigmoid', name="dense_1"),
])
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

data = np.random.random((1000, 100))
labels = np.random.randint(2, size=(1000, 1))

# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)

# Save the model in ONNX format to pass to tract
model_proto, _ = tf2onnx.convert.from_keras(model, output_path="example.onnx")

# Generate a demo input, and run the model in Tensorflow
input = np.random.random((1,100)).astype(np.float32)
tf_output = model.predict(input)

# Run the model in tract and check output against TensorFlow
tract_model = tract.onnx().model_for_path("example.onnx")
tract_model.set_output_fact(0, None)
tract_output = tract_model.into_optimized().into_runnable().run([input])[0].to_numpy()
assert(np.allclose(tf_output, tract_output))

# Save input and reference output for Rust demo
np.savez("io.npz", input=input, output=tf_output)


================================================
FILE: examples/keras-tract-tf2/requirements.txt
================================================
tensorflow==2.18.0
numpy==1.26.0
tf2onnx==1.16.1
tract==0.21.6


================================================
FILE: examples/keras-tract-tf2/src/main.rs
================================================
use tract_core::ndarray::{IxDyn, OwnedRepr};
use tract_onnx::prelude::*;

fn main() -> TractResult<()> {
    let model = tract_onnx::onnx()
    // load the model
    .model_for_path("example.onnx")?
    // optimize graph
    .into_optimized()?
    // make the model runnable and fix its inputs and outputs
    .into_runnable()?;

    // load input and expected output from npz file into tract tensors
    let io = std::fs::File::open("io.npz").unwrap();
    let mut npz = ndarray_npy::NpzReader::new(io)?;
    let input = npz.by_name::<OwnedRepr<f32>, IxDyn>("input.npy").unwrap().into_tensor();
    let expected = npz.by_name::<OwnedRepr<f32>, IxDyn>("output.npy").unwrap().into_tensor();

    // Input the generated data into the model
    let found = model.run(tvec![input.into()]).unwrap().remove(0);
    assert!(found.close_enough(&expected, true).is_ok());
    Ok(())
}


================================================
FILE: examples/nemo-nemotron-asr/.gitignore
================================================
assets
reference
.venv


================================================
FILE: examples/nemo-nemotron-asr/Cargo.toml
================================================
[package]
name = "nemo-nemotron-asr"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
float-ord.workspace = true
hound = "3.5.1"
itertools.workspace = true
serde_json.workspace = true
tract.workspace = true


================================================
FILE: examples/nemo-nemotron-asr/ci.sh
================================================
#!/bin/bash

set -x

[ -e .venv ] || python3 -m venv .venv
source .venv/bin/activate

pip install "nemo-toolkit[asr]" "torch_to_nnef[nemo_tract]"

mkdir -p assets
wget -qN https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav -O assets/2086-149220-0033.wav
rm -rf assets/model
t2n_export_nemo -s nvidia/nemotron-speech-streaming-en-0.6b -e assets/model -tt skip --split-joint-decoder

# Inject missing upper bound assertion into encoder model (~6.7min at 100Hz)
enc_tgz=assets/model/encoder.nnef.tgz
tmpdir=$(mktemp -d)
tar xzf "$enc_tgz" -C "$tmpdir"
sed -i '/^extension tract_symbol AUDIO_SIGNAL__TIME;/a extension tract_assert AUDIO_SIGNAL__TIME<=39993;' "$tmpdir/graph.nnef"
tar czf "$enc_tgz" -C "$tmpdir" .
rm -rf "$tmpdir"

cargo run --release
rm -rf assets


================================================
FILE: examples/nemo-nemotron-asr/nemotron.py
================================================
import numpy
import soundfile as sf
import torch
import os
import nemo.collections.asr as nemo_asr

model_name = "nvidia/nemotron-speech-streaming-en-0.6b"

asr = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)

asr.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
asr = asr.to(device)

data, sr = sf.read("examples/nemo-nemotron-asr/assets/2086-149220-0033.wav", dtype="float32")
sig = torch.tensor(data).unsqueeze(0)  # [1, T]

signal = sig.to(device)
length = torch.tensor([signal.shape[1]], device=device, dtype=torch.int64)

with torch.no_grad():
    proc_out, proc_len = asr.preprocessor(
        input_signal=signal, length=length
    )
    enc_out, enc_len = asr.encoder(audio_signal=proc_out, length=proc_len)

os.makedirs(model_name + "/preprocessor", exist_ok=True)
os.makedirs(model_name + "/encoder", exist_ok=True)
os.makedirs(model_name + "/decoder", exist_ok=True)
os.makedirs(model_name + "/joint", exist_ok=True)

numpy.savez(model_name + "/preprocessor/io.npz",
    input_signal=signal.cpu(),
    length=length.cpu(),
    processed_signal=proc_out.cpu(),
    processed_length=proc_len.cpu()
)
numpy.savez(model_name + "/encoder/io.npz",
    audio_signal=proc_out.cpu(),
    length=proc_len.cpu(),
    encoded_lengths=enc_len.cpu(),
    outputs=enc_out.cpu()
)

encoded = enc_out.transpose(1, 2)

T = int(enc_len[0].item())
t = 0
p = 0
j = 0
max_output_len = 6 * T + 10
hyp = []

vocab = asr.joint.vocabulary
vocab_size = len(vocab)
blank_id = asr.decoding.blank_id

print(f"vocab_size={vocab_size} blank_id={blank_id}")

with torch.no_grad():
    prediction, state = asr.decoder.predict(add_sos=True, batch_size=1)
    numpy.savez(model_name + "/decoder/warmup-io.npz", **{
        "targets": numpy.array([[blank_id]], dtype=numpy.int32),
        "states_0": numpy.zeros([2, 1, 640], dtype=numpy.float32),
        "states_1": numpy.zeros([2, 1, 640], dtype=numpy.float32),
        "outputs": prediction.transpose(1, 2).cpu().numpy(),
        "out_states_0": state[0].cpu().numpy(),
        "out_states_1": state[1].cpu().numpy(),
    })

    while t < T and len(hyp) < max_output_len:
        enc_frame = encoded[:, t:t+1, :]
        joint_logits = asr.joint.joint(enc_frame, prediction[:, -1:, :])
        numpy.savez(f"{model_name}/joint/turn-{j}-io.npz",
            encoder_outputs=enc_frame.transpose(1, 2).cpu(),
            decoder_outputs=prediction.transpose(1, 2)[:, :, -1:].cpu(),
            outputs=joint_logits.cpu()
        )
        j += 1
        k = int(torch.argmax(joint_logits[..., :(vocab_size + 1)], dim=-1).item())
        print(f"t={t} k={k}")
        if k == blank_id:
            # Standard RNNT: advance by 1 frame on blank
            t += 1
        else:
            p += 1
            hyp.append(k)
            last_token = torch.tensor([[k]], device=device, dtype=torch.int32)
            prediction, new_state = asr.decoder.predict(y=last_token, add_sos=False, state=state)
            numpy.savez(f"{model_name}/decoder/turn-{p}-io.npz", **{
                "targets": last_token.cpu(),
                "states_0": state[0].cpu(),
                "states_1": state[1].cpu(),
                "outputs": prediction.transpose(1, 2).cpu(),
                "out_states_0": new_state[0].cpu(),
                "out_states_1": new_state[1].cpu(),
            })
            state = new_state

print(hyp)
print(f"p={p} j={j}")
pieces = [vocab[i] for i in hyp if 0 <= i < len(vocab)]
text = "".join(pieces)
print(text)


================================================
FILE: examples/nemo-nemotron-asr/src/main.rs
================================================
use std::fs::File;

use anyhow::*;
use float_ord::FloatOrd;
use itertools::Itertools;
use tract::prelude::tract_ndarray::prelude::*;
use tract::prelude::*;

fn argmax(slice: &[f32]) -> Option<usize> {
    slice.into_iter().position_max_by_key(|x| FloatOrd(**x))
}

fn concretize_batch(mut model: Model) -> anyhow::Result<Model> {
    model.transform(ConcretizeSymbols::new().value("BATCH", 1))?;
    Ok(model)
}

fn remove_length_input(mut model: Model) -> anyhow::Result<Model> {
    model
        .transform(r#"{"name":"patch","body":"length = tract_core_shape_of(input_signal)[1];"}"#)?;
    Ok(model)
}

fn main() -> anyhow::Result<()> {
    let config: serde_json::Value =
        serde_json::from_reader(File::open("assets/model/model_config.json")?)?;
    let blank_id = config.pointer("/decoder/vocab_size").unwrap().as_i64().unwrap() as usize;
    let vocab = config.pointer("/joint/vocabulary").unwrap().as_array().unwrap();
    let vocab: Vec<&str> = vocab.iter().map(|v| v.as_str().unwrap()).collect();

    let nnef = tract::nnef()?.with_tract_core()?.with_tract_transformers()?;
    let gpu = ["cuda", "metal", "default"]
        .iter()
        .find_map(|rt| tract::runtime_for_name(rt).ok())
        .unwrap();

    let patched_preprocessor =
        remove_length_input(concretize_batch(nnef.load("assets/model/preprocessor.nnef.tgz")?)?)?;
    nnef.write_model_to_tar_gz(
        "assets/model/preprocessor_patched.nnef.tgz",
        &patched_preprocessor,
    )?;
    let preprocessor = patched_preprocessor.into_runnable()?;

    let mut encoder = nnef.load("assets/model/encoder.nnef.tgz")?;
    encoder.transform("transformers_detect_all")?;
    let encoder = gpu.prepare(concretize_batch(encoder)?)?;

    let decoder = gpu.prepare(concretize_batch(nnef.load("assets/model/decoder.nnef.tgz")?)?)?;
    let joint = gpu.prepare(concretize_batch(nnef.load("assets/model/joint.nnef.tgz")?)?)?;

    // soundfile (Python) normalizes i16 PCM to [-1, 1]; match that here
    let wav: Vec<f32> = hound::WavReader::open("assets/2086-149220-0033.wav")?
        .samples::<i16>()
        .map(|x| x.unwrap() as f32 / 32768.0)
        .collect();
    let samples = Tensor::from_slice(&[1, wav.len()], &wav)?;

    let [features, feat_len] = preprocessor.run([samples])?.try_into().unwrap();
    let [encoded, _lens] = encoder.run([features, feat_len])?.try_into().unwrap();

    let encoded: ArrayD<f32> = encoded.view()?.into_owned();

    let max_frames = encoded.shape()[2];
    let max_len = max_frames * 6 + 10;

    let mut hyp = vec![];
    let mut frame_ix = 0;

    // Warm-up: NeMo's predict(add_sos=True, y=None) prepends a zero vector then processes
    // one zero embedding — 2 steps of zero input total. Token blank_id has zero embedding
    // (padding_idx=blank_id), so pass [blank_id, blank_id] and take the last output step.
    let warmup_tokens = Tensor::from_slice(&[1, 2], &[blank_id as i32, blank_id as i32])?;
    let state_0 = tensor(Array3::<f32>::zeros([2, 1, 640]))?;
    let state_1 = tensor(Array3::<f32>::zeros([2, 1, 640]))?;
    let [warmup_out, mut state_0, mut state_1] =
        decoder.run([warmup_tokens, state_0, state_1])?.try_into().unwrap();
    // warmup_out shape: [1, 640, 2] — take last timestep → [1, 640, 1]
    let warmup_out: ArrayD<f32> = warmup_out.view()?.into_owned();
    let mut token: Tensor = tensor(warmup_out.slice_axis(Axis(2), (1..2).into()).to_owned())?;

    while hyp.len() < max_len && frame_ix < max_frames {
        let frame: Tensor = tensor(encoded.slice_axis(Axis(2), (frame_ix..frame_ix + 1).into()))?;
        let [logits] = joint.run([frame, token.clone()])?.try_into().unwrap();
        let logits = logits.view::<f32>()?;
        let logits = logits.as_slice().unwrap();
        let token_id = argmax(logits).unwrap();
        if token_id == blank_id {
            frame_ix += 1;
        } else {
            hyp.push(token_id);
            token = Tensor::from_slice(&[1, 1], &[token_id as i32])?;
            [token, state_0, state_1] = decoder.run([token, state_0, state_1])?.try_into().unwrap();
        }
    }

    let transcript = hyp.into_iter().map(|t| vocab[t]).join("");
    println!("Transcript: {transcript}");
    assert_eq!(
        transcript,
        "▁well▁I▁don't▁wish▁to▁see▁it▁any▁more▁observed▁Phoebe,▁turning▁away▁her▁eyes.▁It▁is▁certainly▁very▁like▁the▁old▁portrait"
    );
    Ok(())
}


================================================
FILE: examples/nemo-parakeet-asr/.gitignore
================================================
assets


================================================
FILE: examples/nemo-parakeet-asr/Cargo.toml
================================================
[package]
name = "nemo-parakeet-asr"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
float-ord.workspace = true
hound = "3.5.1"
itertools.workspace = true
serde_json.workspace = true
tract.workspace = true


================================================
FILE: examples/nemo-parakeet-asr/ci.sh
================================================
#!/bin/bash

set -x

[ -e .venv ] || python3 -m venv .venv
source .venv/bin/activate

 pip install  "nemo-toolkit[asr]" "torch_to_nnef[nemo_tract]" 

mkdir -p assets
wget -qN https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav -O  assets/2086-149220-0033.wav
rm -rf assets/model
t2n_export_nemo -s nvidia/parakeet-tdt-0.6b-v3 -e assets/model -tt skip --split-joint-decoder

# Inject missing upper bound assertion into encoder model (~6.7min at 100Hz)
enc_tgz=assets/model/encoder.nnef.tgz
tmpdir=$(mktemp -d)
tar xzf "$enc_tgz" -C "$tmpdir"
sed -i '/^extension tract_symbol S;/a extension tract_assert S<=39993;' "$tmpdir/graph.nnef"
tar czf "$enc_tgz" -C "$tmpdir" .
rm -rf "$tmpdir"

cargo run --release
rm -rf assets


================================================
FILE: examples/nemo-parakeet-asr/parakeet.py
================================================
import numpy
import torchaudio
import torch
import os
import nemo.collections.asr as nemo_asr

# model_name = "nvidia/parakeet-rnnt-1.1b"
# model_name = "nvidia/parakeet-tdt-0.6b-v2"
model_name = "nvidia/parakeet-tdt-0.6b-v3"
 
asr = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)

# os.makedirs(model_name + "/encoder", exist_ok=True)
# os.makedirs(model_name + "/decoder", exist_ok=True)
# os.makedirs(model_name + "/joint", exist_ok=True)

# asr.encoder.export(model_name + "/encoder/encoder.onnx")
# asr.decoder.export(model_name + "/decoder/decoder.onnx")
# asr.joint.export(model_name + "/joint/joint.onnx")

asr.eval()
device = "cuda"

# ▁well▁i▁don't▁wish▁to▁see▁it▁any▁more▁observed▁phoebe▁turning▁away▁her▁eyes▁it▁is▁certainly▁very▁like▁the▁old▁portrait
sig, sr = torchaudio.load("2086-149220-0033.wav")

signal = sig.to("cuda")
length = torch.tensor([signal.shape[1]], device=device, dtype=torch.int64)

with torch.no_grad():
    proc_out, proc_len = asr.preprocessor(
        input_signal=signal, length=length
    )
    enc_out, enc_len = asr.encoder(audio_signal=proc_out, length=proc_len)

numpy.savez(model_name + "/featurizer.npz", { "signal": sig, "features": proc_out })
numpy.savez(model_name + "/encoder/io.npz",
    audio_signal=proc_out.to("cpu"),
    length= proc_len.to("cpu"),
    encoded_lengths= enc_len.to("cpu"),
    outputs= enc_out.to("cpu")
)

encoded = enc_out.transpose(1,2)

T = int(enc_len[0].item())
t = 0
p = 0
j = 0
max_output_len = 6 * T + 10
hyp = []

vocab = asr.joint.vocabulary
vocab_size = len(vocab)
blank_id = asr.decoding.blank_id

print(vocab_size, blank_id)

with torch.no_grad():
    prediction, state = asr.decoder.predict(add_sos=True, batch_size=1)

    while t < T and len(hyp) < max_output_len:
        enc_frame = encoded[:, t:t+1, :]
        joint_logits = asr.joint.joint(enc_frame, prediction[:,-1:,:])
        numpy.savez(f"{model_name}/joint/turn-{j}-io.npz",
            encoder_outputs=enc_frame.transpose(1,2).cpu(),
            decoder_outputs=prediction.transpose(1,2)[:, :, -1:].cpu(),
            outputs=joint_logits.cpu()
            
        )
        j += 1
        k = int(torch.argmax(joint_logits[...,:(vocab_size+1)], dim=-1).item())
        print(f"t={t} k={k}")
        if k == asr.decoding.blank_id:
            dur_logits = joint_logits[..., (vocab_size+1):]
            if dur_logits.shape[-1] > 0:
                t += int(torch.argmax(dur_logits, dim=-1).item())
            else:
                t += 1
        else:
            p += 1
            hyp.append(k)
            last_token = torch.tensor([[k]], device=device, dtype=torch.long)
            prediction, new_state = asr.decoder.predict(y = last_token, add_sos = False, state=state)
            numpy.savez(f"{model_name}/decoder/turn-{p}-io.npz",
                **{ "targets": last_token.cpu(),
                "target_length": [1],
                "states.1": state[0].cpu(),
                "onnx::Slice_3":state[1].cpu(),
                "outputs": prediction.transpose(1,2).cpu(),
                # "prednet_length": [[1]],
                "prednet_length": [[prediction.shape[1]]],
                "states": new_state[0].cpu(),
                "162": new_state[1].cpu(),
            })
            state = new_state
print(hyp)
print("p", p, "j", j)
vocab = asr.joint.vocabulary
pieces = [vocab[i] for i in hyp if 0 <= i < len(vocab)]
text = "".join(pieces)
print(text)


================================================
FILE: examples/nemo-parakeet-asr/src/main.rs
================================================
use std::fs::File;

use anyhow::*;
use float_ord::FloatOrd;
use itertools::Itertools;
use tract::prelude::tract_ndarray::prelude::*;
use tract::prelude::*;

fn argmax(slice: &[f32]) -> Option<usize> {
    slice.into_iter().position_max_by_key(|x| FloatOrd(**x))
}

fn main() -> anyhow::Result<()> {
    let config: serde_json::Value =
        serde_json::from_reader(File::open("assets/model/model_config.json")?)?;
    let blank_id = config.pointer("/decoder/vocab_size").unwrap().as_i64().unwrap() as usize;
    let vocab = config.pointer("/joint/vocabulary").unwrap().as_array().unwrap();
    let vocab: Vec<&str> = vocab.iter().map(|v| v.as_str().unwrap()).collect();

    let nnef = tract::nnef()?.with_tract_core()?.with_tract_transformers()?;
    let gpu = ["cuda", "metal", "default"]
        .iter()
        .find_map(|rt| tract::runtime_for_name(rt).ok())
        .unwrap();

    let preprocessor = nnef.load("assets/model/preprocessor.nnef.tgz")?.into_runnable()?;

    let mut encoder = nnef.load("assets/model/encoder.nnef.tgz")?;
    encoder.transform("transformers_detect_all")?;
    let encoder = gpu.prepare(encoder)?;

    let decoder = nnef.load("assets/model/decoder.nnef.tgz")?;
    let decoder = gpu.prepare(decoder)?;

    let joint = nnef.load("assets/model/joint.nnef.tgz")?;
    let joint = gpu.prepare(joint)?;

    let wav: Vec<f32> = hound::WavReader::open("assets/2086-149220-0033.wav")?
        .samples::<i16>()
        .map(|x| x.unwrap() as f32)
        .collect();
    let samples = Tensor::from_slice(&[1, wav.len()], &wav)?;
    let len = tensor(arr1(&[wav.len() as i64]))?;

    let [features, feat_len] = preprocessor.run([samples, len])?.try_into().unwrap();
    let [encoded, _lens] = encoder.run([features, feat_len])?.try_into().unwrap();

    let encoded: ArrayD<f32> = encoded.view()?.into_owned();

    let max_frames = encoded.shape()[2];
    let max_len = max_frames * 6 + 10;

    let mut hyp = vec![];
    let mut frame_ix = 0;
    let mut token = Tensor::from_slice(&[1, 1], &[0i32])?;
    let mut state_0 = tensor(Array3::<f32>::zeros([2, 1, 640]))?;
    let mut state_1 = tensor(Array3::<f32>::zeros([2, 1, 640]))?;

    [token, state_0, state_1] = decoder.run([token, state_0, state_1])?.try_into().unwrap();
    while hyp.len() < max_len && frame_ix < max_frames {
        let frame = tensor(encoded.slice_axis(Axis(2), (frame_ix..frame_ix + 1).into()))?;
        let [logits] = joint.run([frame, token.clone()])?.try_into().unwrap();
        let logits = logits.view::<f32>()?;
        let logits = logits.as_slice().unwrap();
        let token_id = argmax(&logits[0..blank_id + 1]).unwrap();
        if token_id == blank_id {
            frame_ix += argmax(&logits[blank_id + 1..]).unwrap_or(0).max(1);
        } else {
            hyp.push(token_id);
            token = Tensor::from_slice(&[1, 1], &[token_id as i32])?;
            [token, state_0, state_1] = decoder.run([token, state_0, state_1])?.try_into().unwrap();
        }
    }

    let transcript = hyp.into_iter().map(|t| vocab[t]).join("");
    println!("Transcript: {transcript}");
    assert_eq!(
        transcript,
        "▁Well,▁I▁don't▁wish▁to▁see▁it▁any▁more,▁observed▁Phoebe,▁turning▁away▁her▁eyes."
    );
    Ok(())
}


================================================
FILE: examples/nnef-dump-mobilenet-v2/.gitignore
================================================
mobilenet*


================================================
FILE: examples/nnef-dump-mobilenet-v2/Cargo.toml
================================================
[package]
name = "example-dump-nnef-mobilenet-v2"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
image.workspace = true
tract.workspace = true


================================================
FILE: examples/nnef-dump-mobilenet-v2/README.md
================================================
# Tract Example: Translating to NNEF 

Using NNEF instead of TensorFlow or ONNX allow for faster loading time and
smaller binaries, but require precompilation of the networks.

This example shows how to translate a Neural Network to NNEF (with extensions
if needed), then use this translated network for prediction.

```sh
git clone https://github.com/snipsco/tract
cd tract/examples/nnef-mobilenet-v2/
```

## Installing tract command line tool

This one is going to take a while.

```sh
cargo install tract
```

## Obtaining the model 

MobileNet is a response to the ImageNet challenge. The goal is to categorize
images and associate them with one of 1000 labels. In other words, recognize a
dog, a cat, a rabbit, or a military uniform.

You will need to download the models. For instance:

```sh
wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz
tar zxf mobilenet_v2_1.4_224.tgz
```

This expands a half-dozen files in the directory. The only one of interest
for us is the frozen TensorFlow model: `mobilenet_v2_1.4_224_frozen.pb`.

## Converting the network

We need to tell tract about the input shape and types. TensorFlow uses the NHWC
convention, and the variant of Mobilenet we picked operates on inputs of
224x224 pixels:

```sh
tract mobilenet_v2_1.4_224_frozen.pb -i 1,224,224,3,f32 dump --nnef mobilenet.nnef.tgz
```

## Running with tract_nnef

```
use tract_nnef::prelude::*;

fn main() -> TractResult<()> {
    let model = tract_nnef::nnef()
        .model_for_path("mobilenet.nnef.tgz")?
        // optimize the model
        .into_optimized()?
        // make the model runnable and fix its inputs and outputs
        .into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into();

    // run the model on the input
    let result = model.run(tvec!(image.into()))?;

    // find and display the max value with its index
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
    Ok(())
}
```

Compared to running the original mobilenet TensorFlow model (see
[../tensorflow-mobilenet-v2](tensorflow example), there is no longer need to
give the input size hint (they are now embedded in the graph description).
Similarly, the InferenceModel and InferenceFact have disappeared: we no longer
need to operate with partially typed networks.


================================================
FILE: examples/nnef-dump-mobilenet-v2/ci.sh
================================================
#!/bin/sh

set -ex

wget -nc -q https://s3.amazonaws.com/tract-ci-builds/model/mobilenet_v2_1.4_224.tgz
tar zxf mobilenet_v2_1.4_224.tgz

cargo run -p tract-cli -- mobilenet_v2_1.4_224_frozen.pb -i 1,224,224,3,f32 dump --nnef mobilenet.nnef.tgz
cargo run


================================================
FILE: examples/nnef-dump-mobilenet-v2/src/main.rs
================================================
use anyhow::Result;
use tract::prelude::*;

fn main() -> Result<()> {
    let model = tract::nnef()?.with_tract_core()?.load("mobilenet.nnef.tgz")?.into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let input = tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    });

    // run the model on the input
    let result = model.run([input])?;

    // find and display the max value with its index
    let best =
        result[0].as_slice::<f32>()?.iter().zip(1..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: examples/nnef-mobilenet-v2/.gitignore
================================================
mobilenet_v2_1.0.onnx.nnef.tgz
mobilenetv2-7.onnx


================================================
FILE: examples/nnef-mobilenet-v2/Cargo.toml
================================================
[package]
name = "example-nnef-mobilenet-v2"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
image.workspace = true
tract.workspace = true


================================================
FILE: examples/nnef-mobilenet-v2/ci.sh
================================================
#!/bin/sh

set -ex

wget -q https://sfo2.digitaloceanspaces.com/nnef-public/mobilenet_v2_1.0.onnx.nnef.tgz -O mobilenet_v2_1.0.onnx.nnef.tgz

cargo run
rm mobilenet_v2_1.0.onnx.nnef.tgz


================================================
FILE: examples/nnef-mobilenet-v2/imagenet_slim_labels.txt
================================================
dummy
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue


================================================
FILE: examples/nnef-mobilenet-v2/src/main.rs
================================================
use anyhow::Result;
use tract::prelude::*;

fn main() -> Result<()> {
    let model = tract::nnef()?.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let input = tract_ndarray::Array4::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
        let mean = [0.485, 0.456, 0.406][c];
        let std = [0.229, 0.224, 0.225][c];
        (resized[(x as _, y as _)][c] as f32 / 255.0 - mean) / std
    });

    // run the model on the input
    let result = model.run([input])?;

    // find and display the max value with its index
    let best =
        result[0].as_slice::<f32>()?.iter().zip(2..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: examples/nnef-mobilenet-v2-api/.gitignore
================================================
mobilenet_v2_1.0.onnx.nnef.tgz
mobilenetv2-7.onnx


================================================
FILE: examples/nnef-mobilenet-v2-api/Cargo.toml
================================================
[package]
name = "example-nnef-mobilenet-v2-api"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
image.workspace = true
tract.workspace = true


================================================
FILE: examples/nnef-mobilenet-v2-api/ci.sh
================================================
#!/bin/sh

set -ex

wget -q https://sfo2.digitaloceanspaces.com/nnef-public/mobilenet_v2_1.0.onnx.nnef.tgz -O mobilenet_v2_1.0.onnx.nnef.tgz

cargo run
rm mobilenet_v2_1.0.onnx.nnef.tgz


================================================
FILE: examples/nnef-mobilenet-v2-api/imagenet_slim_labels.txt
================================================
dummy
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue


================================================
FILE: examples/nnef-mobilenet-v2-api/src/main.rs
================================================
use anyhow::Result;
use tract::prelude::*;

fn main() -> Result<()> {
    let nnef = tract::nnef()?;
    let model = nnef.load("mobilenet_v2_1.0.onnx.nnef.tgz")?.into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let input = tract_ndarray::Array4::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
        let mean = [0.485, 0.456, 0.406][c];
        let std = [0.229, 0.224, 0.225][c];
        (resized[(x as _, y as _)][c] as f32 / 255.0 - mean) / std
    });

    // run the model on the input
    let result = model.run([input])?;

    // find and display the max value with its index
    let best =
        &result[0].as_slice::<f32>()?.iter().zip(2..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: examples/onnx-mobilenet-v2/.gitignore
================================================
mobilenetv2-7.onnx


================================================
FILE: examples/onnx-mobilenet-v2/Cargo.toml
================================================
[package]
name = "example-onnx-mobilenet-v2"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"
default-run = "example-onnx-mobilenet-v2"

[dependencies]
anyhow.workspace = true
image.workspace = true
tract.workspace = true


================================================
FILE: examples/onnx-mobilenet-v2/README.md
================================================
# Tract examples: ONNX MobileNet v2

This project is a simple project with minimal code showing how to use tract to
process an image with MobileNetV2.

The example assume the following command are run in the directory of this
example project, where this README lives.

```sh
git clone https://github.com/snipsco/tract
cd tract/examples/onnx-mobilenet-v2/
```

## Obtaining the model 

MobileNet is a response to the ImageNet challenge. The goal is to categorize
images and associate them with one of 1000 labels. In other words, recognize a
dog, a cat, a rabbit, or a military uniform.

See https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet for more information.

Models can get a big heavy, so we chose not to include them in tract git repository. 
You will need to download the models. For instance:

```sh
wget https://s3.amazonaws.com/tract-ci-builds/tests/mobilenetv2-7.onnx 
```

## Input image

We will use a portrait of Grace Hopper in uniform (included in the repository).

```
grace_hopper.jpg: JPEG image data, JFIF standard 1.02, resolution (DPI), density 96x96, segment length 16, baseline, precision 8, 517x606, components 3
```

## Try it

`cargo run` should print a lot of things, and ultimately: `result: Some((11.4773035, 654))`.

This is actually good. It is the rank (654) and a confidence indicator (11.4773035)
of the inferred label.

```
$ cat -n imagenet_slim_labels.txt | grep -C 3 654
   651  megalith
   652  microphone
   653  microwave
   654  military uniform
   655  milk can
   656  minibus
   657  miniskirt
```

## A look at the code

Everything happens in [src/main.rs](src/main.rs).


```rust
use tract_onnx::prelude::*;

fn main() -> TractResult<()> {
    let model = tract_onnx::onnx()
        // load the model
        .model_for_path("mobilenetv2-7.onnx")?
        // optimize the model
        .into_optimized()?
        // make the model runnable and fix its inputs and outputs
        .into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
        let mean = [0.485, 0.456, 0.406][c];
        let std = [0.229, 0.224, 0.225][c];
        (resized[(x as _, y as _)][c] as f32 / 255.0 - mean) / std
    })
    .into();

    // run the model on the input
    let result = model.run(tvec!(image.into()))?;

    // find and display the max value with its index
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .zip(2..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
    Ok(())
}
```

It uses the tract-onnx as an entry point. Other options are available (tensorflow, or nnef):
I also use the `image` crate to load and resize the JPEG portrait.

### Loading the model

This line creates a tract-onnx context, and uses it to load the protobuf
model.

```rust
    let model = tract_onnx::onnx()
        .model_for_path("mobilenet_v2_1.4_224_frozen.pb")?
        .into_optimized()?
        .into_runnable()?;
    // ..
```

Now the model is ready to run, we have an execution plan, so let's prepare the
image.

### Conditioning the input

We use the `image` crate to load the `.jpg` image, resize is to 224x224. Then
we build an 4-dimension array in the right NHWC shape, with `f32` obtained by
normalizing the `u8` input to the `0..1` range. This array is then converted
into a Tensor. We apply a color normalization on the fly, which is standard for
MobileNet models.

```rust
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb();
    let resized = image::imageops::resize(&image, 224, 224, ::image::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
        let mean = [0.485, 0.456, 0.406][c];
        let std = [0.229, 0.224, 0.225][c];
        (resized[(x as _, y as _)][c] as f32 / 255.0 - mean) / std
```

Note that `tract` crates re-export the excellent `ndarray` crate as `tract_ndarray`so that 
it is easy to get the right version for tract conversions to work.

### Run the network!

```rust
    let result = model.run(tvec!(image.into()))?;
```

### Interpret the result

Finally we grab the single Tensor output by the plan execution, convert it to a
ndarray ArrayView of f32 values. It is a single dimension (a vector...) of 1001
category scores (1000 labels plus the dummy one). We need pick the maximum
score, with its index, and display it...

```rust
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .enumerate()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
```

## Try it on WebAssembly

You can also compile the example into webassembly and run it on wasm runtime like [wasmtime](https://github.com/bytecodealliance/wasmtime).
You need to [install wasmtime](https://docs.wasmtime.dev/cli-install.html) and add wasm as a target with rustup.

```sh
rustup target add wasm32-wasi
```

Build the example with the `wasm32-wasi` target.

```sh
cargo build --target wasm32-wasi
```

Then run the example with wasmtime, use `--dir=.` to specify the directory path used in the example.

```sh
wasmtime --dir . ../../target/wasm32-wasi/debug/example-onnx-mobilenet-v2.wasm
```

You should see the same result as the native one.

# Dynamic shape example

A [second example](src/bin/dyn-shape.rs) shows how to use dynamic shapes and symbols. Right after loading the model, it creates a symbol in the model scope, and use it to override
the input shape before analysing and optimizing the model as in the regular example.

At runtime, our example simulates batches of different number (N=1, 3, then 5) of images by duplicating the grace_hopper input image N times. tract lazily discovers the batch size
at each turn without requiring model re-optimization.

The example can be run by

```sh
cargo run --bin dyn-shape
```

It should display three batches of different size (1, 3 then 5), with the same results of the normal test (as we are just duplicating the same input image).


================================================
FILE: examples/onnx-mobilenet-v2/ci.sh
================================================
#!/bin/sh

set -ex

[ -e mobilenetv2-7.onnx ] || \
    wget -q https://s3.amazonaws.com/tract-ci-builds/tests/mobilenetv2-7.onnx -O mobilenetv2-7.onnx

# on win/linux
cargo run
# on wasm
wasmtime -V || curl https://wasmtime.dev/install.sh -sSf | bash # install wasmtime
PATH=$PATH:$HOME/.wasmtime/bin
rustup target install wasm32-wasip1
cargo build --target wasm32-wasip1
wasmtime --dir . ../../target/wasm32-wasip1/debug/example-onnx-mobilenet-v2.wasm

cargo run --bin dyn-shape

rm  mobilenetv2-7.onnx


================================================
FILE: examples/onnx-mobilenet-v2/imagenet_slim_labels.txt
================================================
dummy
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue


================================================
FILE: examples/onnx-mobilenet-v2/src/bin/dyn-shape.rs
================================================
use anyhow::Result;
use tract::prelude::*;

fn main() -> Result<()> {
    // load the model
    let mut model = tract::onnx()?.load("mobilenetv2-7.onnx")?;

    // Create a symbolic batch dimension
    model.set_input_fact(0, "N,3,224,224,f32")?;

    let model = model.into_model()?.into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);

    for n in [1, 3, 5] {
        println!("batch of {n} images:");

        // we put the same image n times to simulate a batch of size pictures
        let images = tract_ndarray::Array4::from_shape_fn((n, 3, 224, 224), |(_, c, y, x)| {
            let mean = [0.485, 0.456, 0.406][c];
            let std = [0.229, 0.224, 0.225][c];
            (resized[(x as _, y as _)][c] as f32 / 255.0 - mean) / std
        });
        // run the model on the input
        let results = model.run([images])?;

        // loop over the batch
        for image in results[0].view::<f32>()?.outer_iter() {
            // find and display the max value with its index
            let best = image.iter().zip(2..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap());
            println!("  result: {best:?}");
        }
    }
    Ok(())
}


================================================
FILE: examples/onnx-mobilenet-v2/src/main.rs
================================================
use anyhow::Result;
use tract::prelude::*;

fn main() -> Result<()> {
    let model = tract::onnx()?.load("mobilenetv2-7.onnx")?.into_model()?.into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let input = tract_ndarray::Array4::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
        let mean = [0.485, 0.456, 0.406][c];
        let std = [0.229, 0.224, 0.225][c];
        (resized[(x as _, y as _)][c] as f32 / 255.0 - mean) / std
    });

    // run the model on the input
    let result = model.run([input])?;

    // find and display the max value with its index
    let best =
        result[0].as_slice::<f32>()?.iter().zip(2..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: examples/pytorch-albert-v2/.gitignore
================================================
albert


================================================
FILE: examples/pytorch-albert-v2/Cargo.toml
================================================
[package]
name = "pytorch-albert-v2"
version = "0.20.7-pre"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anstyle.workspace = true
anstyle-parse.workspace = true
anstyle-query.workspace = true
tokenizers.workspace = true
clap_builder = { version = "4.4" }
clap_lex = { version = "1.1" }
tract.workspace = true


================================================
FILE: examples/pytorch-albert-v2/README.md
================================================
A simple example of exporting a [transformer](https://huggingface.co/docs/transformers/index) model with Python, then loading it into tract to make predictions.

# To Use

First export the pre-trained transformer model using Python and PyTorch

``` shell
python export.py
```

the exported model and tokenizer are saved in `./albert`. Then load the model into tract and make prediction.

``` rust
cargo run --release
```

The output for input sentence "Paris is the [MASK] of France" should look like

``` text
Result: Some("▁capital")
```


================================================
FILE: examples/pytorch-albert-v2/ci.sh
================================================
#!/bin/sh

set -e
sudo apt-get install -y python3-virtualenv
virtualenv venv
. ./venv/bin/activate
pip install -q torch "transformers<5" onnx accelerate onnxscript
python export.py
cargo run --release
rm -rf venv


================================================
FILE: examples/pytorch-albert-v2/export.py
================================================
# -*- coding: utf-8 -*-

import os

import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer
from torch.export import Dim

model_name = "albert-base-v2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)

text = "Paris is the [MASK] of France."
tokenizer_output = tokenizer(text, return_tensors="pt")

input_ids = tokenizer_output["input_ids"]
attention_mask = tokenizer_output["attention_mask"]
token_type_ids = tokenizer_output["token_type_ids"]

batch = Dim("batch")
seq = Dim("seq")

output_dir = "./albert"
os.makedirs(output_dir, exist_ok=True)
torch.onnx.export(
    model,
    (input_ids, attention_mask, token_type_ids),
    os.path.join(output_dir, "model.onnx"),
    input_names=["input_ids", "attention_mask", "token_type_ids"],
    output_names=["logits"],
    dynamic_shapes={
        "input_ids": (batch, seq),
        "attention_mask": (batch, seq),
        "token_type_ids": (batch, seq),
    }, 
)

tokenizer.save_pretrained(output_dir)


================================================
FILE: examples/pytorch-albert-v2/src/main.rs
================================================
use std::{
    path::{Path, PathBuf},
    str::FromStr,
};
use tokenizers::tokenizer::{Result, Tokenizer};
use tract::prelude::*;
use tract_ndarray::s;

fn main() -> Result<()> {
    let model_dir = PathBuf::from_str("./albert")?;
    let tokenizer = Tokenizer::from_file(Path::join(&model_dir, "tokenizer.json"))?;

    let text = "Paris is the [MASK] of France.";

    let tokenizer_output = tokenizer.encode(text, true)?;
    let input_ids = tokenizer_output.get_ids();
    let attention_mask = tokenizer_output.get_attention_mask();
    let token_type_ids = tokenizer_output.get_type_ids();
    let length = input_ids.len();
    let mask_pos =
        input_ids.iter().position(|&x| x == tokenizer.token_to_id("[MASK]").unwrap()).unwrap();

    let model =
        tract::onnx()?.load(Path::join(&model_dir, "model.onnx"))?.into_model()?.into_runnable()?;

    let input_ids = tract_ndarray::Array2::from_shape_vec(
        (1, length),
        input_ids.iter().map(|&x| x as i64).collect(),
    )?;
    let attention_mask = tract_ndarray::Array2::from_shape_vec(
        (1, length),
        attention_mask.iter().map(|&x| x as i64).collect(),
    )?;
    let token_type_ids = tract_ndarray::Array2::from_shape_vec(
        (1, length),
        token_type_ids.iter().map(|&x| x as i64).collect(),
    )?;

    let outputs = model.run((input_ids, attention_mask, token_type_ids))?;
    let logits = outputs[0].view::<f32>()?;
    let logits = logits.slice(s![0, mask_pos, ..]);
    let word_id = logits.iter().zip(0..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap()).unwrap().1;
    let word = tokenizer.id_to_token(word_id);
    println!("Result: {word:?}");

    Ok(())
}


================================================
FILE: examples/pytorch-resnet/.gitignore
================================================
resnet.onnx


================================================
FILE: examples/pytorch-resnet/Cargo.toml
================================================
[package]
name = "example-pytorch-resnet"
version = "0.20.7-pre"
authors = ["Teddy Koker <teddy.koker@gmail.com>"]
license = "MIT OR Apache-2.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow.workspace = true
image.workspace = true
tract.workspace = true


================================================
FILE: examples/pytorch-resnet/README.md
================================================
Example of exporting a PyTorch to ONNX format, then performing inference with
tract.

**Export Model:**
```bash
python export.py
```

**Inference on `elephants.jpg`:**
```
cargo run
result: Some((22.08386, 102))
```

Predicts class 102 (`tusker`).


================================================
FILE: examples/pytorch-resnet/ci.sh
================================================
#!/bin/sh

set -e
sudo apt-get install -y python3-virtualenv
virtualenv venv
. ./venv/bin/activate
pip install -q torch torchvision onnx onnxscript
python export.py
cargo run
rm -rf venv


================================================
FILE: examples/pytorch-resnet/export.py
================================================
import torch
import torchvision
from torchvision.models import resnet18, ResNet18_Weights

def export():
    dummy_input = torch.randn(1, 3, 224, 224)
    model = resnet18(weights=ResNet18_Weights.DEFAULT).eval()
    torch.onnx.export(model, (dummy_input,), "resnet.onnx")

if __name__ == "__main__":
    export()


================================================
FILE: examples/pytorch-resnet/requirements.txt
================================================
torch==2.6.0
torchvision==0.4.2


================================================
FILE: examples/pytorch-resnet/src/main.rs
================================================
use anyhow::Result;
use tract::prelude::*;
use tract_ndarray::Array;

fn main() -> Result<()> {
    let mut model = tract::onnx()?.load("resnet.onnx")?;
    model.set_input_fact(0, "1,3,224,224,f32")?;
    let model = model.into_model()?.into_runnable()?;

    // Imagenet mean and standard deviation
    let mean = Array::from_shape_vec((1, 3, 1, 1), vec![0.485, 0.456, 0.406])?;
    let std = Array::from_shape_vec((1, 3, 1, 1), vec![0.229, 0.224, 0.225])?;

    let img = image::open("elephants.jpg").unwrap().to_rgb8();
    let resized = image::imageops::resize(&img, 224, 224, ::image::imageops::FilterType::Triangle);
    let input = (tract_ndarray::Array4::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    }) - mean)
        / std;

    let result = model.run([input])?;

    // find and display the max value with its index
    let best =
        result[0].as_slice::<f32>()?.iter().zip(1..).max_by(|a, b| a.0.partial_cmp(b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: examples/stable-diffusion/Cargo.toml
================================================
[package]
name = "stable-diffusion"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
clap = { workspace = true, features = ["derive"] }
kdam = "0.6"
image = "0.25"
ndarray.workspace = true
ndarray-npy.workspace = true
rand = "0.9"
rand_distr = "0.5"
tokenizers = "0.22"
tract.workspace = true


================================================
FILE: examples/stable-diffusion/README.md
================================================
# Stable Diffusion 1.5 with tract

A minimal text-to-image pipeline running entirely in Rust using [tract](https://github.com/sonos/tract). All three SD 1.5 components (text encoder, UNet, VAE decoder) run on GPU via tract's CUDA backend.

## Quick start

```bash
# Export ONNX models and tokenizer (one-time setup)
pip install torch diffusers transformers accelerate onnxscript onnx Pillow
python export.py

# Generate images
cargo run --release -- -p "a photo of a cat" -o cat.png
```

## Usage

```
stable-diffusion [OPTIONS]

Options:
  -p, --prompt <PROMPT>            Text prompt [default: "a photo of a cat"]
  -n, --num-images <NUM_IMAGES>    Number of images [default: 1]
  -s, --steps <STEPS>              Denoising steps [default: 20]
      --seed <SEED>                Random seed [default: 42]
  -g, --guidance-scale <SCALE>     CFG scale [default: 7.5]
  -o, --output <OUTPUT>            Output file [default: output.png]
      --assets <ASSETS>            Model directory [default: assets]
  -h, --help                       Print help
```

Multiple images get numbered: `output_0.png`, `output_1.png`, etc.

Images display inline on iTerm2/WezTerm (including through tmux).

## Performance

On an NVIDIA GPU (32GB VRAM):

| Images | Steps | Time | Notes |
|--------|-------|------|-------|
| 1 | 20 | ~8s | Single image |
| 4 | 20 | ~20s | Batched UNet, ~5s/image |
| 1 | 10 | ~5s | Fewer steps, lower quality |

All three models run on CUDA. The UNet uses batched inference for classifier-free guidance (batch=2N for N images).

## Architecture

```
Prompt → [Tokenizer] → [CLIP Text Encoder] → text embeddings
                                                    ↓
Random noise → [Euler Scheduler + UNet × steps] → denoised latent
                                                    ↓
                              [VAE Decoder] → 512×512 RGB image
```

- **Tokenizer**: HuggingFace `tokenizers` crate (loads `tokenizer.json`)
- **Text Encoder**: CLIP ViT-L/14, ONNX, runs on GPU
- **UNet**: SD 1.5, ONNX with dynamic batch, runs on GPU
- **VAE Decoder**: SD 1.5, ONNX, runs on GPU
- **Scheduler**: Euler discrete, computed in Rust from the SD 1.5 noise schedule constants

## Model export

`export.py` exports the three model components from HuggingFace diffusers to ONNX. The UNet is exported with dynamic batch axes so tract can run classifier-free guidance in a single batched call.

`reference.py` generates per-model input/output reference data for validation. The CI script validates each model against these references using `tract --assert-output-bundle`.

## Files

| File | Description |
|------|-------------|
| `src/main.rs` | Rust pipeline: tokenize, encode, denoise, decode, save |
| `export.py` | Export SD 1.5 to ONNX |
| `reference.py` | Generate validation data |
| `ci.sh` | End-to-end CI: export + validate + generate |
| `assets/` | Models, tokenizer (generated, not checked in) |


================================================
FILE: examples/stable-diffusion/ci-gpu.sh
================================================
#!/bin/bash

set -ex

SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
cd "$SCRIPT_DIR"

# Create venv
if [ ! -e .venv ]; then
    if python3 -m venv .venv 2>/dev/null; then
        true
    else
        ../../api/py/.venv/bin/virtualenv .venv
    fi
fi
source .venv/bin/activate

pip install -q torch diffusers "transformers>=4.44,<4.50" accelerate onnxscript onnx Pillow

# Export models to ONNX
mkdir -p assets
python export.py

# Generate reference I/O data for validation
python reference.py

# Validate each model against Python reference
# tract-cli is pre-built by CI, fall back to building locally
TRACT=../../target/opt-no-lto/tract
if [ ! -x "$TRACT" ]; then
    cargo build --profile opt-no-lto -p tract-cli
fi

if nvidia-smi > /dev/null 2>&1; then
    RUNTIME="--cuda"
    GPU_ASSERT="--assert-op-only Cuda*,Gpu*,DeviceSync*,Const,Source,IsNan,Gather*,Reduce*"
elif [ "$(uname)" = "Darwin" ] && system_profiler SPDisplaysDataType 2>/dev/null | grep -qi metal; then
    RUNTIME="--metal"
    GPU_ASSERT="--assert-op-only Metal*,Gpu*,DeviceSync*,Const,Source,IsNan,Gather*,Reduce*"
else
    RUNTIME="-O"
    GPU_ASSERT=""
fi

echo "Validating text encoder ($RUNTIME)..."
$TRACT assets/text_encoder.onnx $RUNTIME run \
    --input-from-bundle assets/text_encoder.io.npz \
    --assert-output-bundle assets/text_encoder.io.npz --approx very $GPU_ASSERT

echo "Validating VAE decoder ($RUNTIME)..."
$TRACT assets/vae_decoder.onnx $RUNTIME run \
    --input-from-bundle assets/vae_decoder.io.npz \
    --assert-output-bundle assets/vae_decoder.io.npz --approx very $GPU_ASSERT

echo "Validating UNet ($RUNTIME)..."
$TRACT assets/unet.onnx $RUNTIME run \
    --input-from-bundle assets/unet.io.npz \
    --assert-output-bundle assets/unet.io.npz --approx very $GPU_ASSERT

# Run the Rust example
cargo run -p stable-diffusion --profile opt-no-lto -- \
    -p "a photo of a cat" -s 10 --seed 42 \
    -o assets/test_output.png \
    --assets assets

test -f assets/test_output.png
echo "CI passed: test_output.png generated"

rm -rf assets .venv


================================================
FILE: examples/stable-diffusion/export.py
================================================
#!/usr/bin/env python3
"""Export Stable Diffusion 1.5 components to ONNX."""

import os
import torch
import numpy as np
from pathlib import Path

ASSETS = Path("assets")
MODEL_ID = "stable-diffusion-v1-5/stable-diffusion-v1-5"


def main():
    from diffusers import StableDiffusionPipeline

    component = os.environ.get("EXPORT_COMPONENT", "all")

    print(f"Loading {MODEL_ID}...")
    pipe = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.float32)
    pipe = pipe.to("cpu")

    ASSETS.mkdir(parents=True, exist_ok=True)

    if component in ("all", "vae_decoder"):
        print("Exporting vae_decoder...")
        vae = pipe.vae
        vae.eval()

        class VaeDecoder(torch.nn.Module):
            def __init__(self, vae):
                super().__init__()
                self.decoder = vae.decoder
                self.post_quant_conv = vae.post_quant_conv

            def forward(self, latent):
                latent = self.post_quant_conv(latent)
                return self.decoder(latent)

        vae_decoder = VaeDecoder(vae)
        latent = torch.randn(1, 4, 64, 64)
        with torch.no_grad():
            torch.onnx.export(
                vae_decoder,
                (latent,),
                str(ASSETS / "vae_decoder.onnx"),
                input_names=["latent"],
                output_names=["image"],
                opset_version=17,
            )
        print(f"  Exported to {ASSETS / 'vae_decoder.onnx'}")

    if component in ("all", "text_encoder"):
        print("Exporting text_encoder...")
        text_encoder = pipe.text_encoder
        text_encoder.eval()
        input_ids = torch.zeros(1, 77, dtype=torch.int64)
        with torch.no_grad():
            torch.onnx.export(
                text_encoder,
                (input_ids,),
                str(ASSETS / "text_encoder.onnx"),
                input_names=["input_ids"],
                output_names=["last_hidden_state", "pooler_output"],
                opset_version=17,
            )
        print(f"  Exported to {ASSETS / 'text_encoder.onnx'}")

    if component in ("all", "unet"):
        print("Exporting unet...")
        unet = pipe.unet
        unet.eval()
        sample = torch.randn(2, 4, 64, 64)
        timestep = torch.tensor([999, 999], dtype=torch.int64)
        encoder_hidden_states = torch.randn(2, 77, 768)
        with torch.no_grad():
            torch.onnx.export(
                unet,
                (sample, timestep, encoder_hidden_states),
                str(ASSETS / "unet.onnx"),
                input_names=["sample", "timestep", "encoder_hidden_states"],
                output_names=["noise_pred"],
                opset_version=17,
                dynamic_axes={
                    "sample": {0: "batch"},
                    "timestep": {0: "batch"},
                    "encoder_hidden_states": {0: "batch"},
                    "noise_pred": {0: "batch"},
                },
            )
        print(f"  Exported to {ASSETS / 'unet.onnx'}")

    # Save fast tokenizer (with tokenizer.json) for Rust side
    print("Saving tokenizer...")
    from transformers import CLIPTokenizerFast
    tok = CLIPTokenizerFast.from_pretrained(MODEL_ID, subfolder="tokenizer")
    tok.save_pretrained(str(ASSETS / "tokenizer"))

    print("Export complete.")


if __name__ == "__main__":
    main()


================================================
FILE: examples/stable-diffusion/reference.py
================================================
#!/usr/bin/env python3
"""Generate reference data for Stable Diffusion 1.5 validation."""

import torch
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler

ASSETS = Path("assets")
MODEL_ID = "stable-diffusion-v1-5/stable-diffusion-v1-5"
PROMPT = "a photo of a cat"
SEED = 42
NUM_STEPS = 20
GUIDANCE_SCALE = 7.5

def main():
    print(f"Loading {MODEL_ID}...")
    pipe = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.float32)
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe = pipe.to("cpu")

    # Tokenize
    tokenizer = pipe.tokenizer
    tokens = tokenizer(
        PROMPT,
        padding="max_length",
        max_length=77,
        truncation=True,
        return_tensors="pt",
    )
    input_ids = tokens.input_ids  # (1, 77)
    print(f"Token IDs shape: {input_ids.shape}")

    # Unconditional tokens (empty prompt for classifier-free guidance)
    uncond_tokens = tokenizer(
        "",
        padding="max_length",
        max_length=77,
        truncation=True,
        return_tensors="pt",
    )
    uncond_input_ids = uncond_tokens.input_ids

    # Text encoder
    with torch.no_grad():
        text_output = pipe.text_encoder(input_ids)
        text_embeddings = text_output[0]  # last_hidden_state (1, 77, 768)
        uncond_output = pipe.text_encoder(uncond_input_ids)
        uncond_embeddings = uncond_output[0]

    # Concat for classifier-free guidance: [uncond, cond]
    text_embeddings_cfg = torch.cat([uncond_embeddings, text_embeddings])  # (2, 77, 768)
    print(f"Text embeddings shape: {text_embeddings_cfg.shape}")

    # Scheduler setup
    pipe.scheduler.set_timesteps(NUM_STEPS)
    timesteps = pipe.scheduler.timesteps
    print(f"Timesteps: {timesteps}")

    # Initial latent
    generator = torch.Generator().manual_seed(SEED)
    latent = torch.randn(1, 4, 64, 64, generator=generator)
    latent = latent * pipe.scheduler.init_noise_sigma
    print(f"Initial latent shape: {latent.shape}")

    # Denoising loop
    with torch.no_grad():
        for i, t in enumerate(timesteps):
            latent_model_input = torch.cat([latent] * 2)  # (2, 4, 64, 64)
            latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)

            noise_pred = pipe.unet(
                latent_model_input,
                t,
                encoder_hidden_states=text_embeddings_cfg,
            ).sample

            # Classifier-free guidance
            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
            noise_pred = noise_pred_uncond + GUIDANCE_SCALE * (noise_pred_text - noise_pred_uncond)

            # Scheduler step
            latent = pipe.scheduler.step(noise_pred, t, latent).prev_sample

            if i % 5 == 0:
                print(f"  Step {i}/{NUM_STEPS}, t={t.item()}")

    print(f"Final latent shape: {latent.shape}")

    # VAE decode
    with torch.no_grad():
        latent_scaled = latent / pipe.vae.config.scaling_factor
        image = pipe.vae.decode(latent_scaled).sample

    print(f"Image shape: {image.shape}")

    # --- Save per-model I/O for validation ---
    # Text encoder (has two outputs: last_hidden_state and pooler_output)
    with torch.no_grad():
        te_out = pipe.text_encoder(input_ids)
    np.savez(str(ASSETS / "text_encoder.io.npz"),
        input_ids=input_ids.numpy(),
        last_hidden_state=te_out[0].numpy(),
        pooler_output=te_out[1].numpy(),
    )
    print("Saved text_encoder.io.npz")

    # One UNet step (first step)
    pipe.scheduler.set_timesteps(NUM_STEPS)
    ts = pipe.scheduler.timesteps
    generator2 = torch.Generator().manual_seed(SEED)
    lat = torch.randn(1, 4, 64, 64, generator=generator2) * pipe.scheduler.init_noise_sigma
    t0 = ts[0]
    lat_in = pipe.scheduler.scale_model_input(lat, t0)
    with torch.no_grad():
        unet_out = pipe.unet(lat_in, t0, encoder_hidden_states=text_embeddings).sample
    np.savez(str(ASSETS / "unet.io.npz"),
        sample=lat_in.numpy(),
        timestep=np.array([t0.item()], dtype=np.int64),
        encoder_hidden_states=text_embeddings.numpy(),
        noise_pred=unet_out.numpy(),
    )
    print("Saved unet.io.npz")

    # VAE decoder
    np.savez(str(ASSETS / "vae_decoder.io.npz"),
        latent=latent_scaled.numpy(),
        image=image.numpy(),
    )
    print("Saved vae_decoder.io.npz")

    # --- Save all data for Rust in a single npz ---
    pipe.scheduler.set_timesteps(NUM_STEPS)
    sigma = pipe.scheduler.init_noise_sigma
    sigma_val = sigma.item() if hasattr(sigma, 'item') else float(sigma)
    sigmas = pipe.scheduler.sigmas.numpy().astype(np.float32)

    np.savez(str(ASSETS / "pipeline.npz"),
        input_ids=input_ids.numpy().astype(np.int64),
        uncond_input_ids=uncond_input_ids.numpy().astype(np.int64),
        initial_latent=torch.randn(1, 4, 64, 64, generator=torch.Generator().manual_seed(SEED)).numpy(),
        timesteps=timesteps.numpy().astype(np.int64),
        sigmas=sigmas,
        vae_scaling_factor=np.array([pipe.vae.config.scaling_factor], dtype=np.float32),
        init_noise_sigma=np.array([sigma_val], dtype=np.float32),
    )
    print(f"Saved pipeline.npz (sigmas: {sigmas.shape}, timesteps: {timesteps.shape})")

    # Also save image as PNG for visual inspection
    image_np = ((image[0].permute(1, 2, 0).clamp(-1, 1) + 1) / 2 * 255).byte().numpy()
    from PIL import Image
    Image.fromarray(image_np).save(str(ASSETS / "reference.png"))
    print(f"Saved reference image to {ASSETS / 'reference.png'}")


if __name__ == "__main__":
    main()


================================================
FILE: examples/stable-diffusion/src/main.rs
================================================
use anyhow::*;
use tract::prelude::*;

/// SD 1.5 Euler discrete scheduler — compute sigmas from scratch.
struct EulerScheduler {
    timesteps: Vec<i64>,
    sigmas: Vec<f32>,
}

impl EulerScheduler {
    fn new(num_inference_steps: usize) -> Self {
        use tract_ndarray::{Array1, Axis, concatenate};

        // SD 1.5 config: scaled_linear beta schedule, 1000 training steps
        let num_train = 1000;
        let betas = Array1::linspace(0.00085f64.sqrt(), 0.012f64.sqrt(), num_train).mapv(|b| b * b);
        let alphas = betas.mapv(|b| 1.0 - b);

        // cumulative product → sigmas
        let mut alphas_cumprod = alphas.clone();
        for i in 1..num_train {
            alphas_cumprod[i] *= alphas_cumprod[i - 1];
        }
        let all_sigmas = alphas_cumprod.mapv(|a| ((1.0 - a) / a).sqrt());

        // Timesteps: linspace num_train-1 → 0
        let timesteps = Array1::linspace((num_train - 1) as f64, 0.0, num_inference_steps)
            .mapv(|t| t.round() as i64);

        // Interpolate sigmas at timestep positions, append 0
        let sigmas_at_t = timesteps.mapv(|t| {
            let t = t as f64;
            let lo = t.floor() as usize;
            let hi = (lo + 1).min(num_train - 1);
            let frac = t - lo as f64;
            (all_sigmas[lo] * (1.0 - frac) + all_sigmas[hi] * frac) as f32
        });
        let sigmas =
            concatenate(Axis(0), &[sigmas_at_t.view(), Array1::from_vec(vec![0.0f32]).view()])
                .unwrap();

        EulerScheduler { timesteps: timesteps.to_vec(), sigmas: sigmas.to_vec() }
    }

    fn init_noise_sigma(&self) -> f32 {
        self.sigmas[0]
    }

    fn scale_factor(&self, step: usize) -> f32 {
        let sigma = self.sigmas[step];
        1.0 / (sigma * sigma + 1.0).sqrt()
    }

    fn dt(&self, step: usize) -> f32 {
        self.sigmas[step + 1] - self.sigmas[step]
    }
}

/// SD 1.5 VAE scaling factor
const VAE_SCALING_FACTOR: f32 = 0.18215;

fn base64_encode(data: &[u8]) -> String {
    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    let mut out = String::with_capacity((data.len() + 2) / 3 * 4);
    for chunk in data.chunks(3) {
        let b = match chunk.len() {
            3 => [chunk[0], chunk[1], chunk[2]],
            2 => [chunk[0], chunk[1], 0],
            1 => [chunk[0], 0, 0],
            _ => unreachable!(),
        };
        let n = (b[0] as u32) << 16 | (b[1] as u32) << 8 | b[2] as u32;
        out.push(CHARS[(n >> 18 & 63) as usize] as char);
        out.push(CHARS[(n >> 12 & 63) as usize] as char);
        if chunk.len() > 1 {
            out.push(CHARS[(n >> 6 & 63) as usize] as char);
        } else {
            out.push('=');
        }
        if chunk.len() > 2 {
            out.push(CHARS[(n & 63) as usize] as char);
        } else {
            out.push('=');
        }
    }
    out
}

/// Stable Diffusion 1.5 image generation with tract
#[derive(clap::Parser)]
struct Args {
    /// Text prompt for image generation
    #[arg(short, long, default_value = "a photo of a cat")]
    prompt: String,

    /// Number of images to generate
    #[arg(short, long, default_value_t = 1)]
    num_images: usize,

    /// Number of denoising steps
    #[arg(short, long, default_value_t = 20)]
    steps: usize,

    /// Random seed
    #[arg(long, default_value_t = 42)]
    seed: u64,

    /// Classifier-free guidance scale
    #[arg(short, long, default_value_t = 7.5)]
    guidance_scale: f32,

    /// Output filename (or prefix for multiple images)
    #[arg(short, long, default_value = "output.png")]
    output: String,

    /// Path to model assets directory
    #[arg(long, default_value = "assets")]
    assets: std::path::PathBuf,
}

fn main() -> Result<()> {
    use clap::Parser as _;
    let args = Args::parse();

    let prompt = &args.prompt;
    let num_images = args.num_images;
    let num_steps = args.steps;
    let seed = args.seed;
    let guidance_scale = args.guidance_scale;
    let output = &args.output;
    let assets = &args.assets;

    eprintln!(
        "Prompt: \"{prompt}\", images: {num_images}, steps: {num_steps}, seed: {seed}, guidance: {guidance_scale}"
    );
    let tokenizer = tokenizers::Tokenizer::from_file(assets.join("tokenizer/tokenizer.json"))
        .map_err(|e| anyhow!("{e}"))?;
    let encode = |text: &str| -> Result<ndarray::Array2<i64>> {
        let enc = tokenizer.encode(text, true).map_err(|e| anyhow!("{e}"))?;
        let mut ids: Vec<i64> = enc.get_ids().iter().map(|&id| id as i64).collect();
        ids.resize(77, tokenizer.token_to_id("<|endoftext|>").unwrap_or(49407) as i64);
        Ok(ndarray::Array2::from_shape_vec((1, 77), ids)?)
    };
    let input_ids = encode(prompt)?;
    let uncond_input_ids = encode("")?;

    use rand::SeedableRng;
    use rand_distr::{Distribution, StandardNormal};
    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);

    // Build scheduler from scratch
    let scheduler = EulerScheduler::new(num_steps);
    let init_noise_sigma = scheduler.init_noise_sigma();
    eprintln!("  Scheduler: {num_steps} steps, init_sigma={init_noise_sigma:.4}");

    // Pick best available runtime (CUDA > Metal > CPU)
    let gpu = ["cuda", "metal", "default"]
        .iter()
        .find_map(|rt| tract::runtime_for_name(rt).ok())
        .unwrap();
    eprintln!("Using runtime: {gpu:?}");

    eprintln!("Loading models...");
    let onnx = tract::onnx()?;
    let text_encoder = gpu.prepare(onnx.load(assets.join("text_encoder.onnx"))?.into_model()?)?;
    let unet = gpu.prepare(onnx.load(assets.join("unet.onnx"))?.into_model()?)?;
    let vae_decoder = gpu.prepare(onnx.load(assets.join("vae_decoder.onnx"))?.into_model()?)?;

    // --- Text encoding ---
    eprintln!("Running text encoder...");
    let cond_emb = text_encoder.run([input_ids])?;
    let uncond_emb = text_encoder.run([uncond_input_ids])?;

    let cond = cond_emb[0].view::<f32>()?;
    let uncond = uncond_emb[0].view::<f32>()?;
    let text_embeddings =
        tract_ndarray::concatenate(tract_ndarray::Axis(0), &[uncond.view(), cond.view()])?;
    eprintln!("  Text embeddings: {:?}", text_embeddings.shape());

    // Build batched text embeddings: [uncond×N, cond×N] → (2N, 77, 768)
    let uncond_slice = uncond_emb[0].as_slice::<f32>()?;
    let cond_slice = cond_emb[0].as_slice::<f32>()?;
    let emb_size = 77 * 768;
    let mut emb_data = Vec::with_capacity(2 * num_images * emb_size);
    for _ in 0..num_images {
        emb_data.extend_from_slice(uncond_slice);
    }
    for _ in 0..num_images {
        emb_data.extend_from_slice(cond_slice);
    }
    let b2 = 2 * num_images;
    let text_emb = tract_ndarray::ArrayD::from_shape_vec(vec![b2, 77, 768], emb_data)?;

    // Generate initial latent noise for all images
    let latent_size = 4 * 64 * 64;
    let mut latents: Vec<f32> = (0..num_images * latent_size)
        .map(|_| {
            <StandardNormal as Distribution<f32>>::sample(&StandardNormal, &mut rng)
                * init_noise_sigma
        })
        .collect();

    // --- Batched denoising loop ---
    use kdam::BarExt as _;
    let mut pb = kdam::Bar::builder()
        .total(num_steps)
        .desc(format!("Denoising {num_images} image(s)"))
        .build()
        .unwrap();
    for (i, &t) in scheduler.timesteps.iter().enumerate() {
        let scale = scheduler.scale_factor(i);

        // Build UNet input: [scaled_latents×N, scaled_latents×N] → (2N, 4, 64, 64)
        let mut sample_data = Vec::with_capacity(b2 * latent_size);
        for &x in &latents {
            sample_data.push(x * scale);
        }
        for &x in &latents {
            sample_data.push(x * scale);
        }
        let sample = tract_ndarray::ArrayD::from_shape_vec(vec![b2, 4, 64, 64], sample_data)?;
        let timestep = tract_ndarray::Array1::from_vec(vec![t; b2]).into_dyn();

        let noise_pred =
            unet.run(vec![tensor(sample)?, tensor(timestep)?, tensor(text_emb.clone())?])?;
        let pred = noise_pred[0].as_slice::<f32>()?;

        // CFG: split uncond (first N) / cond (last N), combine
        let batch_latent_size = num_images * latent_size;
        let dt = scheduler.dt(i);
        for j in 0..batch_latent_size {
            let u = pred[j];
            let c = pred[batch_latent_size + j];
            let eps = u + guidance_scale * (c - u);
            latents[j] += eps * dt;
        }

        let sigma = scheduler.sigmas[i];
        pb.set_postfix(format!("t={t} σ={sigma:.2}"));
        pb.update(1).ok();
    }
    eprintln!();

    // --- VAE decode + save each image ---
    let (h, w) = (512usize, 512usize);
    for n in 0..num_images {
        let img_latent: Vec<f32> = latents[n * latent_size..(n + 1) * latent_size]
            .iter()
            .map(|&x| x / VAE_SCALING_FACTOR)
            .collect();
        let latent_arr = tract_ndarray::ArrayD::from_shape_vec(vec![1, 4, 64, 64], img_latent)?;
        let image_result = vae_decoder.run([latent_arr])?;
        let image_data = image_result[0].as_slice::<f32>()?;

        let mut pixels = vec![0u8; h * w * 3];
        for y in 0..h {
            for x in 0..w {
                for ch in 0..3 {
                    let val =
                        (image_data[ch * h * w + y * w + x].clamp(-1.0, 1.0) + 1.0) / 2.0 * 255.0;
                    pixels[(y * w + x) * 3 + ch] = val as u8;
                }
            }
        }
        let path = if num_images == 1 {
            std::path::PathBuf::from(output)
        } else {
            let stem = std::path::Path::new(output)
                .file_stem()
                .unwrap_or_default()
                .to_str()
                .unwrap_or("output");
            let ext = std::path::Path::new(output)
                .extension()
                .unwrap_or_default()
                .to_str()
                .unwrap_or("png");
            std::path::PathBuf::from(format!("{stem}_{n}.{ext}"))
        };
        image::save_buffer(&path, &pixels, w as u32, h as u32, image::ColorType::Rgb8)?;
        eprintln!("Saved {}", path.display());

        // Display inline in iTerm2/WezTerm/compatible terminals (with tmux passthrough)
        {
            use std::io::Write as _;
            if let std::result::Result::Ok(png_data) = std::fs::read(&path) {
                let b64 = base64_encode(&png_data);
                let in_tmux = std::env::var("TMUX").is_ok();
                let osc = if in_tmux { "\x1bPtmux;\x1b\x1b]" } else { "\x1b]" };
                let st = if in_tmux { "\x07\x1b\\" } else { "\x07" };
                let _ = write!(
                    std::io::stderr(),
                    "{osc}1337;File=inline=1;width=20;preserveAspectRatio=1:{b64}{st}\n"
                );
            }
        }
    }

    Ok(())
}


================================================
FILE: examples/stable-diffusion-3/Cargo.toml
================================================
[package]
name = "stable-diffusion-3"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
clap = { workspace = true, features = ["derive"] }
image = "0.25"
kdam = "0.6"
ndarray.workspace = true
rand = "0.9"
rand_distr = "0.5"
tokenizers = "0.22"
tract.workspace = true


================================================
FILE: examples/stable-diffusion-3/export.py
================================================
#!/usr/bin/env python3
"""Export Stable Diffusion 3 Medium components to ONNX."""

import torch
from pathlib import Path
from transformers import CLIPTokenizerFast

ASSETS = Path("assets")
MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers"


def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--with-t5", action="store_true", help="Include T5-XXL encoder (~18GB)")
    args = parser.parse_args()

    from diffusers import StableDiffusion3Pipeline

    extra = {} if args.with_t5 else dict(text_encoder_3=None, tokenizer_3=None)
    print(f"Loading {MODEL_ID} ({'with' if args.with_t5 else 'without'} T5)...")
    pipe = StableDiffusion3Pipeline.from_pretrained(
        MODEL_ID, torch_dtype=torch.float32, **extra,
    )
    pipe = pipe.to("cpu")

    ASSETS.mkdir(parents=True, exist_ok=True)

    # --- Text Encoder 1 (CLIP-L/14, hidden_size=768) ---
    print("Exporting text_encoder (CLIP-L)...")
    text_encoder = pipe.text_encoder
    text_encoder.eval()
    input_ids = torch.zeros(1, 77, dtype=torch.int64)
    with torch.no_grad():
        torch.onnx.export(
            text_encoder,
            (input_ids,),
            str(ASSETS / "text_encoder.onnx"),
            input_names=["input_ids"],
            output_names=["text_embeds", "last_hidden_state"],
            opset_version=18,
        )
    print(f"  Exported to {ASSETS / 'text_encoder.onnx'}")

    # --- Text Encoder 2 (OpenCLIP bigG/14, hidden_size=1280) ---
    print("Exporting text_encoder_2 (CLIP-G)...")
    text_encoder_2 = pipe.text_encoder_2
    text_encoder_2.eval()
    with torch.no_grad():
        torch.onnx.export(
            text_encoder_2,
            (input_ids,),
            str(ASSETS / "text_encoder_2.onnx"),
            input_names=["input_ids"],
            output_names=["text_embeds", "last_hidden_state"],
            opset_version=18,
        )
    print(f"  Exported to {ASSETS / 'text_encoder_2.onnx'}")

    # --- Text Encoder 3 (T5-XXL, hidden_size=4096, optional) ---
    if args.with_t5:
        print("Exporting text_encoder_3 (T5-XXL)...")
        text_encoder_3 = pipe.text_encoder_3
        text_encoder_3.eval()
        t5_input_ids = torch.zeros(1, 256, dtype=torch.int64)
        with torch.no_grad():
            torch.onnx.export(
                text_encoder_3,
                (t5_input_ids,),
                str(ASSETS / "text_encoder_3.onnx"),
                input_names=["input_ids"],
                output_names=["last_hidden_state"],
                opset_version=18,
            )
        print(f"  Exported to {ASSETS / 'text_encoder_3.onnx'}")

    # --- VAE Decoder ---
    print("Exporting vae_decoder...")
    vae = pipe.vae
    vae.eval()

    class VaeDecoder(torch.nn.Module):
        def __init__(self, vae):
            super().__init__()
            self.vae = vae

        def forward(self, latent):
            return self.vae.decode(latent).sample

    vae_decoder = VaeDecoder(vae)
    latent = torch.randn(1, 16, 128, 128)
    with torch.no_grad():
        torch.onnx.export(
            vae_decoder,
            (latent,),
            str(ASSETS / "vae_decoder.onnx"),
            input_names=["latent"],
            output_names=["image"],
            opset_version=18,
        )
    print(f"  Exported to {ASSETS / 'vae_decoder.onnx'}")

    # --- Transformer (MMDiT) ---
    seq_len = 333 if args.with_t5 else 77
    print(f"Exporting transformer (MMDiT, seq={seq_len})...")
    transformer = pipe.transformer
    transformer.eval()

    hidden_states = torch.randn(2, 16, 128, 128)
    encoder_hidden_states = torch.randn(2, seq_len, 4096)
    pooled_projections = torch.randn(2, 2048)
    timestep = torch.tensor([500.0, 500.0])

    class TransformerWrapper(torch.nn.Module):
        def __init__(self, transformer):
            super().__init__()
            self.transformer = transformer

        def forward(self, hidden_states, encoder_hidden_states, pooled_projections, timestep):
            return self.transformer(
                hidden_states=hidden_states,
                encoder_hidden_states=encoder_hidden_states,
                pooled_projections=pooled_projections,
                timestep=timestep,
            ).sample

    wrapper = TransformerWrapper(transformer)
    with torch.no_grad():
        torch.onnx.export(
            wrapper,
            (hidden_states, encoder_hidden_states, pooled_projections, timestep),
            str(ASSETS / "transformer.onnx"),
            input_names=["hidden_states", "encoder_hidden_states", "pooled_projections", "timestep"],
            output_names=["output"],
            opset_version=18,
            dynamic_axes={
                "hidden_states": {0: "batch"},
                "encoder_hidden_states": {0: "batch"},
                "pooled_projections": {0: "batch"},
                "timestep": {0: "batch"},
                "output": {0: "batch"},
            },
        )
    print(f"  Exported to {ASSETS / 'transformer.onnx'}")

    # Save tokenizers
    print("Saving tokenizers...")
    tok = CLIPTokenizerFast.from_pretrained(MODEL_ID, subfolder="tokenizer")
    tok.save_pretrained(str(ASSETS / "tokenizer"))
    if args.with_t5:
        from transformers import AutoTokenizer
        tok3 = AutoTokenizer.from_pretrained(MODEL_ID, subfolder="tokenizer_3")
        tok3.save_pretrained(str(ASSETS / "tokenizer_3"))

    print("Export complete.")


if __name__ == "__main__":
    main()


================================================
FILE: examples/stable-diffusion-3/reference.py
================================================
#!/usr/bin/env python3
"""Generate reference I/O data for SD3 model validation."""

import torch
import numpy as np
from pathlib import Path
from diffusers import StableDiffusion3Pipeline, FlowMatchEulerDiscreteScheduler

ASSETS = Path("assets")
MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers"
PROMPT = "a photo of a cat"
SEED = 42
NUM_STEPS = 28


def main():
    print(f"Loading {MODEL_ID} (without T5)...")
    pipe = StableDiffusion3Pipeline.from_pretrained(
        MODEL_ID, torch_dtype=torch.float32, text_encoder_3=None, tokenizer_3=None,
    )
    pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe = pipe.to("cpu")

    tokenizer = pipe.tokenizer
    tokenizer_2 = pipe.tokenizer_2

    # Tokenize
    tokens = tokenizer(PROMPT, padding="max_length", max_length=77, truncation=True, return_tensors="pt")
    uncond_tokens = tokenizer("", padding="max_length", max_length=77, truncation=True, return_tensors="pt")
    tokens_2 = tokenizer_2(PROMPT, padding="max_length", max_length=77, truncation=True, return_tensors="pt")
    uncond_tokens_2 = tokenizer_2("", padding="max_length", max_length=77, truncation=True, return_tensors="pt")

    input_ids = tokens.input_ids
    uncond_input_ids = uncond_tokens.input_ids
    input_ids_2 = tokens_2.input_ids
    uncond_input_ids_2 = uncond_tokens_2.input_ids

    # --- Text Encoder 1 (CLIP-L) ---
    with torch.no_grad():
        te1_cond = pipe.text_encoder(input_ids, output_hidden_states=True)
        te1_uncond = pipe.text_encoder(uncond_input_ids, output_hidden_states=True)

    # CLIPTextModelWithProjection: text_embeds = projected pooled, hidden_states[-2] = penultimate
    np.savez(str(ASSETS / "text_encoder.io.npz"),
        input_ids=input_ids.numpy(),
        text_embeds=te1_cond.text_embeds.numpy(),
        last_hidden_state=te1_cond.last_hidden_state.numpy(),
    )
    print("Saved text_encoder.io.npz")

    # --- Text Encoder 2 (CLIP-G) ---
    with torch.no_grad():
        te2_cond = pipe.text_encoder_2(input_ids_2, output_hidden_states=True)
        te2_uncond = pipe.text_encoder_2(uncond_input_ids_2, output_hidden_states=True)

    np.savez(str(ASSETS / "text_encoder_2.io.npz"),
        input_ids=input_ids_2.numpy(),
        text_embeds=te2_cond.text_embeds.numpy(),
        last_hidden_state=te2_cond.last_hidden_state.numpy(),
    )
    print("Saved text_encoder_2.io.npz")

    # --- Concat text embeddings (without T5) ---
    # Penultimate hidden states from both CLIPs
    cond_h1 = te1_cond.hidden_states[-2]   # (1, 77, 768)
    cond_h2 = te2_cond.hidden_states[-2]   # (1, 77, 1280)
    uncond_h1 = te1_uncond.hidden_states[-2]
    uncond_h2 = te2_uncond.hidden_states[-2]

    # Concatenate along feature dim then pad to 4096
    cond_clip = torch.cat([cond_h1, cond_h2], dim=-1)      # (1, 77, 2048)
    uncond_clip = torch.cat([uncond_h1, uncond_h2], dim=-1)
    cond_embeds = torch.nn.functional.pad(cond_clip, (0, 4096 - 2048))    # (1, 77, 4096)
    uncond_embeds = torch.nn.functional.pad(uncond_clip, (0, 4096 - 2048))

    # Pooled: concatenate CLIP pooled outputs
    cond_pooled = torch.cat([te1_cond.text_embeds, te2_cond.text_embeds], dim=-1)      # (1, 2048)
    uncond_pooled = torch.cat([te1_uncond.text_embeds, te2_uncond.text_embeds], dim=-1)

    print(f"Text embeddings: {cond_embeds.shape}, pooled: {cond_pooled.shape}")

    # --- Transformer (one step) ---
    pipe.scheduler.set_timesteps(NUM_STEPS)
    ts = pipe.scheduler.timesteps
    sigmas = pipe.scheduler.sigmas
    generator = torch.Generator().manual_seed(SEED)
    latent = torch.randn(1, 16, 128, 128, generator=generator)
    # Scale initial noise by first sigma
    latent = latent * sigmas[0]

    t0 = ts[0]
    sigma0 = sigmas[0]
    lat_in = latent / ((sigma0**2 + 1) ** 0.5)

    with torch.no_grad():
        pred = pipe.transformer(
            hidden_states=lat_in,
            encoder_hidden_states=cond_embeds,
            pooled_projections=cond_pooled,
            timestep=t0.unsqueeze(0),
        ).sample

    np.savez(str(ASSETS / "transformer.io.npz"),
        hidden_states=lat_in.numpy(),
        encoder_hidden_states=cond_embeds.numpy(),
        pooled_projections=cond_pooled.numpy(),
        timestep=np.array([t0.item()], dtype=np.float32),
        output=pred.numpy(),
    )
    print("Saved transformer.io.npz")

    # --- Full denoising loop ---
    print(f"Running full pipeline ({NUM_STEPS} steps)...")
    latent = torch.randn(1, 16, 128, 128, generator=torch.Generator().manual_seed(SEED))
    latent = latent * sigmas[0]

    with torch.no_grad():
        for i, t in enumerate(ts):
            lat_uncond = latent.clone()
            lat_cond = latent.clone()
            lat_in = torch.cat([lat_uncond, lat_cond])

            pred = pipe.transformer(
                hidden_states=lat_in,
                encoder_hidden_states=torch.cat([uncond_embeds, cond_embeds]),
                pooled_projections=torch.cat([uncond_pooled, cond_pooled]),
                timestep=t.expand(2),
            ).sample

            pred_uncond, pred_cond = pred.chunk(2)
            pred_guided = pred_uncond + 7.0 * (pred_cond - pred_uncond)

            sigma = sigmas[i]
            sigma_next = sigmas[i + 1]
            latent = latent + (sigma_next - sigma) * pred_guided

            if i % 7 == 0:
                print(f"  Step {i}/{NUM_STEPS}, t={t.item():.1f}")

    # --- VAE decode ---
    scaling_factor = pipe.vae.config.scaling_factor   # 1.5305
    shift_factor = pipe.vae.config.shift_factor       # 0.0609
    latent_scaled = latent / scaling_factor + shift_factor

    with torch.no_grad():
        image = pipe.vae.decode(latent_scaled).sample

    np.savez(str(ASSETS / "vae_decoder.io.npz"),
        latent=latent_scaled.numpy(),
        image=image.numpy(),
    )
    print("Saved vae_decoder.io.npz")

    # Save reference image
    image_np = ((image[0].permute(1, 2, 0).clamp(-1, 1) + 1) / 2 * 255).byte().numpy()
    from PIL import Image
    Image.fromarray(image_np).save(str(ASSETS / "reference.png"))
    print(f"Saved reference image to {ASSETS / 'reference.png'}")


if __name__ == "__main__":
    main()


================================================
FILE: examples/stable-diffusion-3/runme.sh
================================================
#!/bin/bash

set -ex

SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
cd "$SCRIPT_DIR"

# Create venv
if [ ! -e .venv ]; then
    if python3 -m venv .venv 2>/dev/null; then
        true
    else
        ../../api/py/.venv/bin/virtualenv .venv
    fi
fi
source .venv/bin/activate

pip install -q torch diffusers transformers accelerate onnxscript onnx Pillow sentencepiece protobuf

# Export models + reference I/O
mkdir -p assets
python export.py
python reference.py

# Validate each model against Python reference
# tract-cli is pre-built by CI, fall back to building locally
TRACT=../../target/opt-no-lto/tract
if [ ! -x "$TRACT" ]; then
    cargo build --profile opt-no-lto -p tract-cli
fi

if nvidia-smi > /dev/null 2>&1; then
    RUNTIME="--cuda"
else
    RUNTIME="-O"
fi

echo "Validating text encoder 1 ($RUNTIME)..."
$TRACT assets/text_encoder.onnx $RUNTIME run \
    --input-from-bundle assets/text_encoder.io.npz \
    --assert-output-bundle assets/text_encoder.io.npz --approx very

echo "Validating text encoder 2 ($RUNTIME)..."
$TRACT assets/text_encoder_2.onnx $RUNTIME run \
    --input-from-bundle assets/text_encoder_2.io.npz \
    --assert-output-bundle assets/text_encoder_2.io.npz --approx very

echo "Validating transformer ($RUNTIME)..."
$TRACT assets/transformer.onnx $RUNTIME run \
    --input-from-bundle assets/transformer.io.npz \
    --assert-output-bundle assets/transformer.io.npz --approx very

echo "Validating VAE decoder ($RUNTIME)..."
$TRACT assets/vae_decoder.onnx $RUNTIME run \
    --input-from-bundle assets/vae_decoder.io.npz \
    --assert-output-bundle assets/vae_decoder.io.npz --approx very

# Run the Rust example
cargo run -p stable-diffusion-3 --profile opt-no-lto -- \
    -p "a photo of a cat" -s 10 --seed 42 \
    -o assets/test_output.png \
    --assets assets

test -f assets/test_output.png
echo "CI passed: test_output.png generated"

rm -rf assets .venv


================================================
FILE: examples/stable-diffusion-3/src/main.rs
================================================
use anyhow::*;
use tract::prelude::*;

/// Flow-matching Euler scheduler for SD3.
struct FlowMatchScheduler {
    timesteps: Vec<f32>,
    sigmas: Vec<f32>,
}

impl FlowMatchScheduler {
    fn new(num_inference_steps: usize, shift: f32) -> Self {
        let n = num_inference_steps;
        // Linearly spaced sigmas from 1.0 to 0.0
        let sigmas_unshifted: Vec<f32> = (0..=n).map(|i| 1.0 - i as f32 / n as f32).collect();
        // Apply shift: sigma_shifted = shift * sigma / (1 + (shift - 1) * sigma)
        let sigmas: Vec<f32> =
            sigmas_unshifted.iter().map(|&s| shift * s / (1.0 + (shift - 1.0) * s)).collect();
        // Timesteps = sigmas[0..n] * 1000
        let timesteps: Vec<f32> = sigmas[..n].iter().map(|&s| s * 1000.0).collect();
        FlowMatchScheduler { timesteps, sigmas }
    }

    fn init_noise_sigma(&self) -> f32 {
        self.sigmas[0]
    }

    fn dt(&self, step: usize) -> f32 {
        self.sigmas[step + 1] - self.sigmas[step]
    }
}

const VAE_SCALING_FACTOR: f32 = 1.5305;
const VAE_SHIFT_FACTOR: f32 = 0.0609;

fn base64_encode(data: &[u8]) -> String {
    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    let mut out = String::with_capacity((data.len() + 2) / 3 * 4);
    for chunk in data.chunks(3) {
        let b = match chunk.len() {
            3 => [chunk[0], chunk[1], chunk[2]],
            2 => [chunk[0], chunk[1], 0],
            _ => [chunk[0], 0, 0],
        };
        let n = (b[0] as u32) << 16 | (b[1] as u32) << 8 | b[2] as u32;
        out.push(CHARS[(n >> 18 & 63) as usize] as char);
        out.push(CHARS[(n >> 12 & 63) as usize] as char);
        out.push(if chunk.len() > 1 { CHARS[(n >> 6 & 63) as usize] as char } else { '=' });
        out.push(if chunk.len() > 2 { CHARS[(n & 63) as usize] as char } else { '=' });
    }
    out
}

fn display_inline(path: &std::path::Path) {
    use std::io::Write as _;
    let img = match image::open(path) {
        std::result::Result::Ok(img) => img,
        _ => return,
    };
    let thumb = img.thumbnail(256, 256);
    let mut png_data = Vec::new();
    let encoder = image::codecs::png::PngEncoder::new(std::io::Cursor::new(&mut png_data));
    if thumb.write_with_encoder(encoder).is_err() {
        return;
    }
    let b64 = base64_encode(&png_data);
    let in_tmux = std::env::var("TMUX").is_ok();
    let osc = if in_tmux { "\x1bPtmux;\x1b\x1b]" } else { "\x1b]" };
    let st = if in_tmux { "\x07\x1b\\" } else { "\x07" };
    let _ = write!(
        std::io::stderr(),
        "{osc}1337;File=inline=1;width=20;preserveAspectRatio=1:{b64}{st}\n"
    );
}

/// Stable Diffusion 3 Medium image generation with tract
#[derive(clap::Parser)]
struct Args {
    /// Text prompt
    #[arg(short, long, default_value = "a photo of a cat")]
    prompt: String,

    /// Number of images
    #[arg(short, long, default_value_t = 1)]
    num_images: usize,

    /// Denoising steps
    #[arg(short, long, default_value_t = 28)]
    steps: usize,

    /// Random seed
    #[arg(long, default_value_t = 42)]
    seed: u64,

    /// Classifier-free guidance scale
    #[arg(short, long, default_value_t = 7.0)]
    guidance_scale: f32,

    /// Output filename
    #[arg(short, long, default_value = "output.png")]
    output: String,

    /// Model assets directory
    #[arg(long, default_value = "assets")]
    assets: std::path::PathBuf,
}

fn main() -> Result<()> {
    use clap::Parser as _;
    let args = Args::parse();
    let num_images = args.num_images;
    let num_steps = args.steps;
    let guidance_scale = args.guidance_scale;
    let assets = &args.assets;

    eprintln!(
        "SD3: \"{}\", images: {num_images}, steps: {num_steps}, seed: {}, guidance: {guidance_scale}",
        args.prompt, args.seed
    );

    // --- Tokenize CLIP (shared tokenizer for CLIP-L and CLIP-G) ---
    let tokenizer = tokenizers::Tokenizer::from_file(assets.join("tokenizer/tokenizer.json"))
        .map_err(|e| anyhow!("{e}"))?;
    let encode_clip = |text: &str| -> Result<ndarray::Array2<i64>> {
        let enc = tokenizer.encode(text, true).map_err(|e| anyhow!("{e}"))?;
        let mut ids: Vec<i64> = enc.get_ids().iter().map(|&id| id as i64).collect();
        ids.resize(77, tokenizer.token_to_id("<|endoftext|>").unwrap_or(49407) as i64);
        Ok(ndarray::Array2::from_shape_vec((1, 77), ids)?)
    };
    let input_ids = encode_clip(&args.prompt)?;
    let uncond_input_ids = encode_clip("")?;

    // --- Tokenize T5 (optional, if tokenizer_3 exists) ---
    let t5_tokenizer =
        tokenizers::Tokenizer::from_file(assets.join("tokenizer_3/tokenizer.json")).ok();
    let has_t5 = t5_tokenizer.is_some() && assets.join("text_encoder_3.onnx").exists();
    let encode_t5 = |text: &str| -> Result<ndarray::Array2<i64>> {
        let tok = t5_tokenizer.as_ref().unwrap();
        let enc = tok.encode(text, true).map_err(|e| anyhow!("{e}"))?;
        let mut ids: Vec<i64> = enc.get_ids().iter().map(|&id| id as i64).collect();
        // T5 pad token is 0, max length 256
        ids.resize(256, 0);
        Ok(ndarray::Array2::from_shape_vec((1, 256), ids)?)
    };
    let t5_input_ids = if has_t5 { Some(encode_t5(&args.prompt)?) } else { None };
    let t5_uncond_ids = if has_t5 { Some(encode_t5("")?) } else { None };

    use rand::SeedableRng;
    use rand_distr::{Distribution, StandardNormal};
    let mut rng = rand::rngs::StdRng::seed_from_u64(args.seed);

    let scheduler = FlowMatchScheduler::new(num_steps, 3.0);
    let init_sigma = scheduler.init_noise_sigma();
    eprintln!("  Scheduler: {num_steps} steps, shift=3.0, init_sigma={init_sigma:.4}");

    // --- Pick runtime ---
    let gpu = ["cuda", "metal", "default"]
        .iter()
        .find_map(|rt| tract::runtime_for_name(rt).ok())
        .unwrap();
    eprintln!("Using runtime: {gpu:?}");

    // --- Load models ---
    // Text encoders run on CPU (T5-XXL is 18GB alone); transformer + VAE go on GPU.
    eprintln!("Loading models{}...", if has_t5 { " (with T5)" } else { "" });
    let onnx = tract::onnx()?;
    let cpu = tract::runtime_for_name("default")?;
    let text_encoder = cpu.prepare(onnx.load(assets.join("text_encoder.onnx"))?.into_model()?)?;
    let text_encoder_2 =
        cpu.prepare(onnx.load(assets.join("text_encoder_2.onnx"))?.into_model()?)?;
    let text_encoder_3 = if has_t5 {
        Some(cpu.prepare(onnx.load(assets.join("text_encoder_3.onnx"))?.into_model()?)?)
    } else {
        None
    };
    let transformer = gpu.prepare(onnx.load(assets.join("transformer.onnx"))?.into_model()?)?;
    let vae_decoder = gpu.prepare(onnx.load(assets.join("vae_decoder.onnx"))?.into_model()?)?;

    // --- Text encoding ---
    eprintln!("Running text encoders...");
    let cond1 = text_encoder.run([input_ids.clone()])?;
    let uncond1 = text_encoder.run([uncond_input_ids.clone()])?;
    let cond2 = text_encoder_2.run([input_ids])?;
    let uncond2 = text_encoder_2.run([uncond_input_ids])?;

    // CLIPTextModelWithProjection outputs:
    //   output[0] = text_embeds (pooled projected), output[1] = last_hidden_state
    let cond_h1 = cond1[1].view::<f32>()?; // (1, 77, 768)
    let cond_h2 = cond2[1].view::<f32>()?; // (1, 77, 1280)
    let uncond_h1 = uncond1[1].view::<f32>()?;
    let uncond_h2 = uncond2[1].view::<f32>()?;

    // Concatenate CLIP hidden states: (1,77,768) + (1,77,1280) → (1,77,2048), pad to 4096
    let cond_clip =
        tract_ndarray::concatenate(tract_ndarray::Axis(2), &[cond_h1.view(), cond_h2.view()])?
            .as_standard_layout()
            .into_owned();
    let uncond_clip =
        tract_ndarray::concatenate(tract_ndarray::Axis(2), &[uncond_h1.view(), uncond_h2.view()])?
            .as_standard_layout()
            .into_owned();

    // Pad CLIP to 4096: (1, 77, 2048) → (77 * 4096) flat
    let pad_clip = |arr: &ndarray::ArrayBase<
        ndarray::OwnedRepr<f32>,
        ndarray::Dim<ndarray::IxDynImpl>,
    >|
     -> Vec<f32> {
        let sl = arr.as_slice().unwrap();
        let mut out = Vec::with_capacity(77 * 4096);
        for token in 0..77 {
            let base = token * 2048;
            out.extend_from_slice(&sl[base..base + 2048]);
            out.extend(std::iter::repeat_n(0.0f32, 2048));
        }
        out
    };
    let cond_clip_padded = pad_clip(&cond_clip);
    let uncond_clip_padded = pad_clip(&uncond_clip);

    // Run T5 if available: (1, 256) → (1, 256, 4096)
    let (cond_t5, uncond_t5) = if let Some(te3) = &text_encoder_3 {
        eprintln!("Running T5 encoder...");
        let c = te3.run([t5_input_ids.unwrap()])?;
        let u = te3.run([t5_uncond_ids.unwrap()])?;
        let c_sl = c[0].as_slice::<f32>()?.to_vec();
        let u_sl = u[0].as_slice::<f32>()?.to_vec();
        (Some(c_sl), Some(u_sl))
    } else {
        (None, None)
    };

    // Build combined embeddings:
    //   Without T5: (seq=77, dim=4096)
    //   With T5:    (seq=77+256=333, dim=4096)
    let seq_len = if has_t5 { 77 + 256 } else { 77 };
    let b2 = 2 * num_images;
    let emb_size = seq_len * 4096;

    let build_emb = |clip_padded: &[f32], t5_data: &Option<Vec<f32>>| -> Vec<f32> {
        let mut out = Vec::with_capacity(emb_size);
        out.extend_from_slice(clip_padded);
        if let Some(t5) = t5_data {
            out.extend_from_slice(t5);
        }
        out
    };
    let cond_emb = build_emb(&cond_clip_padded, &cond_t5);
    let uncond_emb = build_emb(&uncond_clip_padded, &uncond_t5);

    let mut emb_data = Vec::with_capacity(b2 * emb_size);
    for _ in 0..num_images {
        emb_data.extend_from_slice(&uncond_emb);
    }
    for _ in 0..num_images {
        emb_data.extend_from_slice(&cond_emb);
    }
    let text_emb = tract_ndarray::ArrayD::from_shape_vec(vec![b2, seq_len, 4096], emb_data)?;
    eprintln!("  Text embeddings: {:?}", text_emb.shape());

    // Pooled: cat CLIP-L pooled (768) + CLIP-G pooled (1280) → (1, 2048)
    let cond_p1 = cond1[0].as_slice::<f32>()?;
    let cond_p2 = cond2[0].as_slice::<f32>()?;
    let uncond_p1 = uncond1[0].as_slice::<f32>()?;
    let uncond_p2 = uncond2[0].as_slice::<f32>()?;
    let pooled_dim = cond_p1.len() + cond_p2.len(); // 768 + 1280 = 2048
    let mut pooled_data = Vec::with_capacity(b2 * pooled_dim);
    for _ in 0..num_images {
        pooled_data.extend_from_slice(uncond_p1);
        pooled_data.extend_from_slice(uncond_p2);
    }
    for _ in 0..num_images {
        pooled_data.extend_from_slice(cond_p1);
        pooled_data.extend_from_slice(cond_p2);
    }
    let pooled = tract_ndarray::ArrayD::from_shape_vec(vec![b2, pooled_dim], pooled_data)?;

    // --- Generate latent noise (16 channels for SD3) ---
    let latent_size = 16 * 128 * 128;
    let mut latents: Vec<f32> = (0..num_images * latent_size)
        .map(|_| {
            <StandardNormal as Distribution<f32>>::sample(&StandardNormal, &mut rng) * init_sigma
        })
        .collect();

    // --- Batched denoising ---
    use kdam::BarExt as _;
    let mut pb = kdam::Bar::builder()
        .total(num_steps)
        .desc(format!("Denoising {num_images} image(s)"))
        .build()
        .unwrap();
    for (i, &t) in scheduler.timesteps.iter().enumerate() {
        // Build input: [uncond latents, cond latents] — no scaling needed for flow matching
        let mut sample_data = Vec::with_capacity(b2 * latent_size);
        for &x in &latents {
            sample_data.push(x);
        }
        for &x in &latents {
            sample_data.push(x);
        }
        let sample = tract_ndarray::ArrayD::from_shape_vec(vec![b2, 16, 128, 128], sample_data)?;
        let timestep = tract_ndarray::Array1::from_vec(vec![t; b2]).into_dyn();

        // MMDiT: 4 inputs (hidden_states, encoder_hidden_states, pooled_projections, timestep)
        let noise_pred = transformer.run(vec![
            tensor(sample)?,
            tensor(text_emb.clone())?,
            tensor(pooled.clone())?,
            tensor(timestep)?,
        ])?;
        let pred = noise_pred[0].as_slice::<f32>()?;

        // Classifier-free guidance
        let batch_latent_size = num_images * latent_size;
        let dt = scheduler.dt(i);
        for j in 0..batch_latent_size {
            let u = pred[j];
            let c = pred[batch_latent_size + j];
            let eps = u + guidance_scale * (c - u);
            latents[j] += eps * dt;
        }

        let sigma = scheduler.sigmas[i];
        pb.set_postfix(format!("t={t:.0} σ={sigma:.3}"));
        pb.update(1).ok();
    }
    eprintln!();

    // --- VAE decode + save ---
    let (h, w) = (1024usize, 1024usize);
    for n in 0..num_images {
        let img_latent: Vec<f32> = latents[n * latent_size..(n + 1) * latent_size]
            .iter()
            .map(|&x| x / VAE_SCALING_FACTOR + VAE_SHIFT_FACTOR)
            .collect();
        let latent_arr = tract_ndarray::ArrayD::from_shape_vec(vec![1, 16, 128, 128], img_latent)?;
        let image_result = vae_decoder.run([latent_arr])?;
        let image_data = image_result[0].as_slice::<f32>()?;

        let mut pixels = vec![0u8; h * w * 3];
        for y in 0..h {
            for x in 0..w {
                for ch in 0..3 {
                    let val =
                        (image_data[ch * h * w + y * w + x].clamp(-1.0, 1.0) + 1.0) / 2.0 * 255.0;
                    pixels[(y * w + x) * 3 + ch] = val as u8;
                }
            }
        }
        let path = if num_images == 1 {
            std::path::PathBuf::from(&args.output)
        } else {
            let stem = std::path::Path::new(&args.output)
                .file_stem()
                .unwrap_or_default()
                .to_str()
                .unwrap_or("output");
            let ext = std::path::Path::new(&args.output)
                .extension()
                .unwrap_or_default()
                .to_str()
                .unwrap_or("png");
            std::path::PathBuf::from(format!("{stem}_{n}.{ext}"))
        };
        image::save_buffer(&path, &pixels, w as u32, h as u32, image::ColorType::Rgb8)?;
        eprintln!("Saved {}", path.display());
        display_inline(&path);
    }

    Ok(())
}


================================================
FILE: examples/stable-diffusion-xl/Cargo.toml
================================================
[package]
name = "stable-diffusion-xl"
version = "0.1.0"
edition = "2024"

[dependencies]
anyhow.workspace = true
clap = { workspace = true, features = ["derive"] }
image = "0.25"
kdam = "0.6"
ndarray.workspace = true
rand = "0.9"
rand_distr = "0.5"
tokenizers = "0.22"
tract.workspace = true


================================================
FILE: examples/stable-diffusion-xl/ci-gpu.sh
================================================
#!/bin/bash

set -ex

SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
cd "$SCRIPT_DIR"

# Create venv
if [ ! -e .venv ]; then
    if python3 -m venv .venv 2>/dev/null; then
        true
    else
        ../../api/py/.venv/bin/virtualenv .venv
    fi
fi
source .venv/bin/activate

pip install -q torch diffusers transformers accelerate onnxscript onnx Pillow

# Export models + reference I/O
mkdir -p assets
python export.py
python reference.py

# Validate each model against Python reference
# tract-cli is pre-built by CI, fall back to building locally
TRACT=../../target/opt-no-lto/tract
if [ ! -x "$TRACT" ]; then
    cargo build --profile opt-no-lto -p tract-cli
fi

if nvidia-smi > /dev/null 2>&1; then
    RUNTIME="--cuda"
    GPU_ASSERT="--assert-op-only Cuda*,Gpu*,DeviceSync*,Const,Source,IsNan,Gather*,Reduce*"
elif [ "$(uname)" = "Darwin" ] && system_profiler SPDisplaysDataType 2>/dev/null | grep -qi metal; then
    RUNTIME="--metal"
    GPU_ASSERT="--assert-op-only Metal*,Gpu*,DeviceSync*,Const,Source,IsNan,Gather*,Reduce*"
else
    RUNTIME="-O"
    GPU_ASSERT=""
fi

echo "Validating text encoder 1 ($RUNTIME)..."
$TRACT assets/text_encoder.onnx $RUNTIME run \
    --input-from-bundle assets/text_encoder.io.npz \
    --assert-output-bundle assets/text_encoder.io.npz --approx very $GPU_ASSERT

echo "Validating text encoder 2 ($RUNTIME)..."
$TRACT assets/text_encoder_2.onnx $RUNTIME run \
    --input-from-bundle assets/text_encoder_2.io.npz \
    --assert-output-bundle assets/text_encoder_2.io.npz --approx very $GPU_ASSERT

# Validate UNet — needs >=16GB VRAM for f32, skip on smaller GPUs
if nvidia-smi > /dev/null 2>&1; then
    GPU_MEM_MB=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -1 | tr -d ' ')
    if [ "$GPU_MEM_MB" -ge 16000 ] 2>/dev/null; then
        echo "Validating UNet f32 ($RUNTIME)..."
        $TRACT assets/unet.onnx $RUNTIME run \
            --input-from-bundle assets/unet.io.npz \
            --assert-output-bundle assets/unet.io.npz --approx very $GPU_ASSERT
    else
        echo "Skipping UNet validation (GPU has ${GPU_MEM_MB}MB, need >=16000MB for f32)"
    fi
else
    echo "Validating UNet f32 (CPU)..."
    $TRACT assets/unet.onnx -O run \
        --input-from-bundle assets/unet.io.npz \
        --assert-output-bundle assets/unet.io.npz --approx very
fi

echo "Validating VAE decoder ($RUNTIME)..."
$TRACT assets/vae_decoder.onnx $RUNTIME run \
    --input-from-bundle assets/vae_decoder.io.npz \
    --assert-output-bundle assets/vae_decoder.io.npz --approx very $GPU_ASSERT

# Run the Rust example
cargo run -p stable-diffusion-xl --profile opt-no-lto -- \
    -p "a photo of a cat" -s 10 --seed 42 \
    -o assets/test_output.png \
    --assets assets

test -f assets/test_output.png
echo "CI passed: test_output.png generated"

rm -rf assets .venv


================================================
FILE: examples/stable-diffusion-xl/export.py
================================================
#!/usr/bin/env python3
"""Export Stable Diffusion XL 1.0 components to ONNX."""

import os
import torch
from pathlib import Path
from transformers import CLIPTokenizerFast

ASSETS = Path("assets")
MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"


def main():
    from diffusers import StableDiffusionXLPipeline

    print(f"Loading {MODEL_ID}...")
    pipe = StableDiffusionXLPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.float32, variant="fp16")
    pipe = pipe.to("cpu")

    ASSETS.mkdir(parents=True, exist_ok=True)

    # --- Text Encoder 1 (CLIP ViT-L/14, hidden_size=768) ---
    print("Exporting text_encoder...")
    text_encoder = pipe.text_encoder
    text_encoder.eval()
    input_ids = torch.zeros(1, 77, dtype=torch.int64)
    with torch.no_grad():
        torch.onnx.export(
            text_encoder,
            (input_ids,),
            str(ASSETS / "text_encoder.onnx"),
            input_names=["input_ids"],
            output_names=["last_hidden_state", "pooler_output"],
            opset_version=17,
        )
    print(f"  Exported to {ASSETS / 'text_encoder.onnx'}")

    # --- Text Encoder 2 (OpenCLIP ViT-bigG, hidden_size=1280) ---
    print("Exporting text_encoder_2...")
    text_encoder_2 = pipe.text_encoder_2
    text_encoder_2.eval()
    with torch.no_grad():
        torch.onnx.export(
            text_encoder_2,
            (input_ids,),
            str(ASSETS / "text_encoder_2.onnx"),
            input_names=["input_ids"],
            output_names=["text_embeds", "last_hidden_state"],
            opset_version=17,
        )
    print(f"  Exported to {ASSETS / 'text_encoder_2.onnx'}")

    # --- VAE Decoder ---
    print("Exporting vae_decoder...")
    vae = pipe.vae
    vae.eval()

    class VaeDecoder(torch.nn.Module):
        def __init__(self, vae):
            super().__init__()
            self.decoder = vae.decoder
            self.post_quant_conv = vae.post_quant_conv

        def forward(self, latent):
            latent = self.post_quant_conv(latent)
            return self.decoder(latent)

    vae_decoder = VaeDecoder(vae)
    latent = torch.randn(1, 4, 128, 128)
    with torch.no_grad():
        torch.onnx.export(
            vae_decoder,
            (latent,),
            str(ASSETS / "vae_decoder.onnx"),
            input_names=["latent"],
            output_names=["image"],
            opset_version=17,
        )
    print(f"  Exported to {ASSETS / 'vae_decoder.onnx'}")

    # --- UNet (with added_cond_kwargs) ---
    print("Exporting unet...")
    unet = pipe.unet
    unet.eval()
    sample = torch.randn(2, 4, 128, 128)
    timestep = torch.tensor([999, 999], dtype=torch.int64)
    encoder_hidden_states = torch.randn(2, 77, 2048)
    # SDXL added conditions
    time_ids = torch.zeros(2, 6)  # [orig_h, orig_w, crop_top, crop_left, target_h, target_w]
    text_embeds = torch.randn(2, 1280)  # pooled from text_encoder_2

    class UNetWrapper(torch.nn.Module):
        def __init__(self, unet):
            super().__init__()
            self.unet = unet

        def forward(self, sample, timestep, encoder_hidden_states, time_ids, text_embeds):
            return self.unet(
                sample,
                timestep,
                encoder_hidden_states=encoder_hidden_states,
                added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds},
            ).sample

    wrapper = UNetWrapper(unet)
    with torch.no_grad():
        torch.onnx.export(
            wrapper,
            (sample, timestep, encoder_hidden_states, time_ids, text_embeds),
            str(ASSETS / "unet.onnx"),
            input_names=["sample", "timestep", "encoder_hidden_states", "time_ids", "text_embeds"],
            output_names=["noise_pred"],
            opset_version=17,
            dynamic_axes={
                "sample": {0: "batch"},
                "timestep": {0: "batch"},
                "encoder_hidden_states": {0: "batch"},
                "time_ids": {0: "batch"},
                "text_embeds": {0: "batch"},
                "noise_pred": {0: "batch"},
            },
        )
    print(f"  Exported to {ASSETS / 'unet.onnx'}")

    # Save fast tokenizer (same for both text encoders in SDXL)
    print("Saving tokenizer...")
    tok = CLIPTokenizerFast.from_pretrained(MODEL_ID, subfolder="tokenizer")
    tok.save_pretrained(str(ASSETS / "tokenizer"))

    print("Export complete.")


if __name__ == "__main__":
    main()


================================================
FILE: examples/stable-diffusion-xl/reference.py
================================================
#!/usr/bin/env python3
"""Generate reference I/O data for SDXL model validation."""

import torch
import numpy as np
from pathlib import Path
from diffusers import StableDiffusionXLPipeline, EulerDiscreteScheduler

ASSETS = Path("assets")
MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
PROMPT = "a photo of a cat"
SEED = 42
NUM_STEPS = 20

def main():
    print(f"Loading {MODEL_ID}...")
    pipe = StableDiffusionXLPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.float32, variant="fp16")
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe = pipe.to("cpu")

    tokenizer = pipe.tokenizer
    tokenizer_2 = pipe.tokenizer_2

    # Tokenize
    tokens = tokenizer(PROMPT, padding="max_length", max_length=77, truncation=True, return_tensors="pt")
    uncond_tokens = tokenizer("", padding="max_length", max_length=77, truncation=True, return_tensors="pt")
    tokens_2 = tokenizer_2(PROMPT, padding="max_length", max_length=77, truncation=True, return_tensors="pt")
    uncond_tokens_2 = tokenizer_2("", padding="max_length", max_length=77, truncation=True, return_tensors="pt")

    input_ids = tokens.input_ids
    uncond_input_ids = uncond_tokens.input_ids
    input_ids_2 = tokens_2.input_ids
    uncond_input_ids_2 = uncond_tokens_2.input_ids

    print(f"Token IDs: {input_ids.shape}, Token IDs 2: {input_ids_2.shape}")

    # --- Text Encoder 1 ---
    with torch.no_grad():
        te1_cond = pipe.text_encoder(input_ids)
        te1_uncond = pipe.text_encoder(uncond_input_ids)

    np.savez(str(ASSETS / "text_encoder.io.npz"),
        input_ids=input_ids.numpy(),
        last_hidden_state=te1_cond.last_hidden_state.numpy(),
        pooler_output=te1_cond.pooler_output.numpy(),
    )
    print("Saved text_encoder.io.npz")

    # --- Text Encoder 2 ---
    with torch.no_grad():
        te2_cond = pipe.text_encoder_2(input_ids_2)
        te2_uncond = pipe.text_encoder_2(uncond_input_ids_2)

    np.savez(str(ASSETS / "text_encoder_2.io.npz"),
        input_ids=input_ids_2.numpy(),
        last_hidden_state=te2_cond.last_hidden_state.numpy(),
        text_embeds=te2_cond.text_embeds.numpy(),
    )
    print("Saved text_encoder_2.io.npz")

    # --- Concat text embeddings ---
    cond_hidden = torch.cat([te1_cond.last_hidden_state, te2_cond.last_hidden_state], dim=-1)  # (1,77,2048)
    uncond_hidden = torch.cat([te1_uncond.last_hidden_state, te2_uncond.last_hidden_state], dim=-1)
    text_embeddings = cond_hidden  # for single-image reference
    print(f"Text embeddings: {text_embeddings.shape}")

    # Pooled embeddings from text_encoder_2
    cond_pooled = te2_cond.text_embeds  # (1, 1280)
    uncond_pooled = te2_uncond.text_embeds

    # time_ids: [original_h, original_w, crop_top, crop_left, target_h, target_w]
    time_ids = torch.tensor([[1024., 1024., 0., 0., 1024., 1024.]])

    # --- UNet (one step) ---
    pipe.scheduler.set_timesteps(NUM_STEPS)
    ts = pipe.scheduler.timesteps
    generator = torch.Generator().manual_seed(SEED)
    latent = torch.randn(1, 4, 128, 128, generator=generator) * pipe.scheduler.init_noise_sigma
    t0 = ts[0]
    lat_in = pipe.scheduler.scale_model_input(latent, t0)

    with torch.no_grad():
        unet_out = pipe.unet(
            lat_in, t0,
            encoder_hidden_states=text_embeddings,
            added_cond_kwargs={"text_embeds": cond_pooled, "time_ids": time_ids},
        ).sample

    np.savez(str(ASSETS / "unet.io.npz"),
        sample=lat_in.numpy(),
        timestep=np.array([t0.item()], dtype=np.int64),
        encoder_hidden_states=text_embeddings.numpy(),
        time_ids=time_ids.numpy(),
        text_embeds=cond_pooled.numpy(),
        noise_pred=unet_out.numpy(),
    )
    print("Saved unet.io.npz")

    # --- Full denoising loop ---
    print(f"Running full pipeline ({NUM_STEPS} steps)...")
    latent = torch.randn(1, 4, 128, 128, generator=torch.Generator().manual_seed(SEED)) * pipe.scheduler.init_noise_sigma
    with torch.no_grad():
        for i, t in enumerate(ts):
            latent_model_input = torch.cat([latent] * 2)
            latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
            noise_pred = pipe.unet(
                latent_model_input, t,
                encoder_hidden_states=torch.cat([uncond_hidden, cond_hidden]),
                added_cond_kwargs={
                    "text_embeds": torch.cat([uncond_pooled, cond_pooled]),
                    "time_ids": torch.cat([time_ids, time_ids]),
                },
            ).sample
            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
            noise_pred = noise_pred_uncond + 7.5 * (noise_pred_text - noise_pred_uncond)
            latent = pipe.scheduler.step(noise_pred, t, latent).prev_sample
            if i % 5 == 0:
                print(f"  Step {i}/{NUM_STEPS}, t={t.item()}")

    # --- VAE decode ---
    with torch.no_grad():
        latent_scaled = latent / pipe.vae.config.scaling_factor
        image = pipe.vae.decode(latent_scaled).sample

    np.savez(str(ASSETS / "vae_decoder.io.npz"),
        latent=latent_scaled.numpy(),
        image=image.numpy(),
    )
    print("Saved vae_decoder.io.npz")

    # Save reference image
    image_np = ((image[0].permute(1, 2, 0).clamp(-1, 1) + 1) / 2 * 255).byte().numpy()
    from PIL import Image
    Image.fromarray(image_np).save(str(ASSETS / "reference.png"))
    print(f"Saved reference image to {ASSETS / 'reference.png'}")


if __name__ == "__main__":
    main()


================================================
FILE: examples/stable-diffusion-xl/src/main.rs
================================================
use anyhow::*;
use tract::prelude::*;

/// Euler discrete scheduler — same noise schedule as SD 1.5 / SDXL.
struct EulerScheduler {
    timesteps: Vec<i64>,
    sigmas: Vec<f32>,
}

impl EulerScheduler {
    fn new(num_inference_steps: usize) -> Self {
        use tract_ndarray::{Array1, Axis, concatenate};
        let num_train = 1000;
        let betas = Array1::linspace(0.00085f64.sqrt(), 0.012f64.sqrt(), num_train).mapv(|b| b * b);
        let alphas = betas.mapv(|b| 1.0 - b);
        let mut alphas_cumprod = alphas.clone();
        for i in 1..num_train {
            alphas_cumprod[i] *= alphas_cumprod[i - 1];
        }
        let all_sigmas = alphas_cumprod.mapv(|a| ((1.0 - a) / a).sqrt());
        let timesteps = Array1::linspace((num_train - 1) as f64, 0.0, num_inference_steps)
            .mapv(|t| t.round() as i64);
        let sigmas_at_t = timesteps.mapv(|t| {
            let t = t as f64;
            let lo = t.floor() as usize;
            let hi = (lo + 1).min(num_train - 1);
            let frac = t - lo as f64;
            (all_sigmas[lo] * (1.0 - frac) + all_sigmas[hi] * frac) as f32
        });
        let sigmas =
            concatenate(Axis(0), &[sigmas_at_t.view(), Array1::from_vec(vec![0.0f32]).view()])
                .unwrap();
        EulerScheduler { timesteps: timesteps.to_vec(), sigmas: sigmas.to_vec() }
    }

    fn init_noise_sigma(&self) -> f32 {
        self.sigmas[0]
    }

    fn scale_factor(&self, step: usize) -> f32 {
        let sigma = self.sigmas[step];
        1.0 / (sigma * sigma + 1.0).sqrt()
    }

    fn dt(&self, step: usize) -> f32 {
        self.sigmas[step + 1] - self.sigmas[step]
    }
}

const VAE_SCALING_FACTOR: f32 = 0.13025; // SDXL

fn base64_encode(data: &[u8]) -> String {
    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    let mut out = String::with_capacity((data.len() + 2) / 3 * 4);
    for chunk in data.chunks(3) {
        let b = match chunk.len() {
            3 => [chunk[0], chunk[1], chunk[2]],
            2 => [chunk[0], chunk[1], 0],
            _ => [chunk[0], 0, 0],
        };
        let n = (b[0] as u32) << 16 | (b[1] as u32) << 8 | b[2] as u32;
        out.push(CHARS[(n >> 18 & 63) as usize] as char);
        out.push(CHARS[(n >> 12 & 63) as usize] as char);
        out.push(if chunk.len() > 1 { CHARS[(n >> 6 & 63) as usize] as char } else { '=' });
        out.push(if chunk.len() > 2 { CHARS[(n & 63) as usize] as char } else { '=' });
    }
    out
}

fn display_inline(path: &std::path::Path) {
    use std::io::Write as _;
    // Re-encode as a small PNG thumbnail to keep the escape sequence manageable.
    let img = match image::open(path) {
        std::result::Result::Ok(img) => img,
        _ => return,
    };
    let thumb = img.thumbnail(256, 256);
    let mut png_data = Vec::new();
    let encoder = image::codecs::png::PngEncoder::new(std::io::Cursor::new(&mut png_data));
    if thumb.write_with_encoder(encoder).is_err() {
        return;
    }
    let b64 = base64_encode(&png_data);
    let in_tmux = std::env::var("TMUX").is_ok();
    let osc = if in_tmux { "\x1bPtmux;\x1b\x1b]" } else { "\x1b]" };
    let st = if in_tmux { "\x07\x1b\\" } else { "\x07" };
    let _ = write!(
        std::io::stderr(),
        "{osc}1337;File=inline=1;width=20;preserveAspectRatio=1:{b64}{st}\n"
    );
}

/// Stable Diffusion XL 1.0 image generation with tract
#[derive(clap::Parser)]
struct Args {
    /// Text prompt
    #[arg(short, long, default_value = "a photo of a cat")]
    prompt: String,

    /// Number of images
    #[arg(short, long, default_value_t = 1)]
    num_images: usize,

    /// Denoising steps
    #[arg(short, long, default_value_t = 20)]
    steps: usize,

    /// Random seed
    #[arg(long, default_value_t = 42)]
    seed: u64,

    /// Classifier-free guidance scale
    #[arg(short, long, default_value_t = 7.5)]
    guidance_scale: f32,

    /// Output filename
    #[arg(short, long, default_value = "output.png")]
    output: String,

    /// Model assets directory
    #[arg(long, default_value = "assets")]
    assets: std::path::PathBuf,
}

fn main() -> Result<()> {
    use clap::Parser as _;
    let args = Args::parse();
    let num_images = args.num_images;
    let num_steps = args.steps;
    let guidance_scale = args.guidance_scale;
    let assets = &args.assets;

    eprintln!(
        "SDXL: \"{}\", images: {num_images}, steps: {num_steps}, seed: {}, guidance: {guidance_scale}",
        args.prompt, args.seed
    );

    // --- Tokenize (same tokenizer for both text encoders) ---
    let tokenizer = tokenizers::Tokenizer::from_file(assets.join("tokenizer/tokenizer.json"))
        .map_err(|e| anyhow!("{e}"))?;
    let encode = |text: &str| -> Result<ndarray::Array2<i64>> {
        let enc = tokenizer.encode(text, true).map_err(|e| anyhow!("{e}"))?;
        let mut ids: Vec<i64> = enc.get_ids().iter().map(|&id| id as i64).collect();
        ids.resize(77, tokenizer.token_to_id("<|endoftext|>").unwrap_or(49407) as i64);
        Ok(ndarray::Array2::from_shape_vec((1, 77), ids)?)
    };
    let input_ids = encode(&args.prompt)?;
    let uncond_input_ids = encode("")?;

    use rand::SeedableRng;
    use rand_distr::{Distribution, StandardNormal};
    let mut rng = rand::rngs::StdRng::seed_from_u64(args.seed);

    let scheduler = EulerScheduler::new(num_steps);
    let init_noise_sigma = scheduler.init_noise_sigma();
    eprintln!("  Scheduler: {num_steps} steps, init_sigma={init_noise_sigma:.4}");

    // --- Pick runtime ---
    let gpu = ["cuda", "metal", "default"]
        .iter()
        .find_map(|rt| tract::runtime_for_name(rt).ok())
        .unwrap();
    eprintln!("Using runtime: {gpu:?}");

    // --- Load models ---
    eprintln!("Loading models...");
    let onnx = tract::onnx()?;
    let text_encoder = gpu.prepare(onnx.load(assets.join("text_encoder.onnx"))?.into_model()?)?;
    let text_encoder_2 =
        gpu.prepare(onnx.load(assets.join("text_encoder_2.onnx"))?.into_model()?)?;
    let unet = gpu.prepare(onnx.load(assets.join("unet.onnx"))?.into_model()?)?;
    let vae_decoder = gpu.prepare(onnx.load(assets.join("vae_decoder.onnx"))?.into_model()?)?;

    // --- Text encoding (two encoders, concatenated) ---
    eprintln!("Running text encoders...");
    let cond1 = text_encoder.run([input_ids.clone()])?;
    let uncond1 = text_encoder.run([uncond_input_ids.clone()])?;
    let cond2 = text_encoder_2.run([input_ids])?;
    let uncond2 = text_encoder_2.run([uncond_input_ids])?;

    // Concatenate hidden states: (1,77,768) + (1,77,1280) → (1,77,2048)
    // TE1: output[0] = last_hidden_state (1,77,768), output[1] = pooler (1,768)
    // TE2: output[0] = text_embeds/pooled (1,1280), output[1] = last_hidden_state (1,77,1280)
    let cond_h1 = cond1[0].view::<f32>()?;
    let cond_h2 = cond2[1].view::<f32>()?;
    let uncond_h1 = uncond1[0].view::<f32>()?;
    let uncond_h2 = uncond2[1].view::<f32>()?;
    let cond_cat =
        tract_ndarray::concatenate(tract_ndarray::Axis(2), &[cond_h1.view(), cond_h2.view()])?
            .as_standard_layout()
            .into_owned();
    let uncond_cat =
        tract_ndarray::concatenate(tract_ndarray::Axis(2), &[uncond_h1.view(), uncond_h2.view()])?
            .as_standard_layout()
            .into_owned();

    // Build batched: [uncond×N, cond×N] → (2N, 77, 2048)
    let b2 = 2 * num_images;
    let emb_dim = 2048;
    let uncond_sl = uncond_cat.as_slice().unwrap();
    let cond_sl = cond_cat.as_slice().unwrap();
    let emb_size = 77 * emb_dim;
    let mut emb_data = Vec::with_capacity(b2 * emb_size);
    for _ in 0..num_images {
        emb_data.extend_from_slice(uncond_sl);
    }
    for _ in 0..num_images {
        emb_data.extend_from_slice(cond_sl);
    }
    let text_emb = tract_ndarray::ArrayD::from_shape_vec(vec![b2, 77, emb_dim], emb_data)?;
    eprintln!("  Text embeddings: {:?}", text_emb.shape());

    // Pooled text embeddings from text_encoder_2 (output 0)
    let cond_pooled = cond2[0].as_slice::<f32>()?;
    let uncond_pooled = uncond2[0].as_slice::<f32>()?;
    let pooled_dim = cond_pooled.len();
    let mut pooled_data = Vec::with_capacity(b2 * pooled_dim);
    for _ in 0..num_images {
        pooled_data.extend_from_slice(uncond_pooled);
    }
    for _ in 0..num_images {
        pooled_data.extend_from_slice(cond_pooled);
    }
    let pooled = tract_ndarray::ArrayD::from_shape_vec(vec![b2, pooled_dim], pooled_data)?;

    // time_ids: [original_h, original_w, crop_top, crop_left, target_h, target_w]
    let time_ids_single: Vec<f32> = vec![1024.0, 1024.0, 0.0, 0.0, 1024.0, 1024.0];
    let mut time_ids_data = Vec::with_capacity(b2 * 6);
    for _ in 0..b2 {
        time_ids_data.extend_from_slice(&time_ids_single);
    }
    let time_ids = tract_ndarray::ArrayD::from_shape_vec(vec![b2, 6], time_ids_data)?;

    // --- Generate latent noise ---
    let latent_size = 4 * 128 * 128;
    let mut latents: Vec<f32> = (0..num_images * latent_size)
        .map(|_| {
            <StandardNormal as Distribution<f32>>::sample(&StandardNormal, &mut rng)
                * init_noise_sigma
        })
        .collect();

    // --- Batched denoising ---
    use kdam::BarExt as _;
    let mut pb = kdam::Bar::builder()
        .total(num_steps)
        .desc(format!("Denoising {num_images} image(s)"))
        .build()
        .unwrap();
    for (i, &t) in scheduler.timesteps.iter().enumerate() {
        let scale = scheduler.scale_factor(i);

        let mut sample_data = Vec::with_capacity(b2 * latent_size);
        for &x in &latents {
            sample_data.push(x * scale);
        }
        for &x in &latents {
            sample_data.push(x * scale);
        }
        let sample = tract_ndarray::ArrayD::from_shape_vec(vec![b2, 4, 128, 128], sample_data)?;
        let timestep = tract_ndarray::Array1::from_vec(vec![t; b2]).into_dyn();

        // SDXL UNet: 5 inputs (sample, timestep, encoder_hidden_states, time_ids, text_embeds)
        let noise_pred = unet.run(vec![
            tensor(sample)?,
            tensor(timestep)?,
            tensor(text_emb.clone())?,
            tensor(time_ids.clone())?,
            tensor(pooled.clone())?,
        ])?;
        let pred = noise_pred[0].as_slice::<f32>()?;

        let batch_latent_size = num_images * latent_size;
        let dt = scheduler.dt(i);
        for j in 0..batch_latent_size {
            let u = pred[j];
            let c = pred[batch_latent_size + j];
            let eps = u + guidance_scale * (c - u);
            latents[j] += eps * dt;
        }

        let sigma = scheduler.sigmas[i];
        pb.set_postfix(format!("t={t} σ={sigma:.2}"));
        pb.update(1).ok();
    }
    eprintln!();

    // --- VAE decode + save ---
    let (h, w) = (1024usize, 1024usize);
    for n in 0..num_images {
        let img_latent: Vec<f32> = latents[n * latent_size..(n + 1) * latent_size]
            .iter()
            .map(|&x| x / VAE_SCALING_FACTOR)
            .collect();
        let latent_arr = tract_ndarray::ArrayD::from_shape_vec(vec![1, 4, 128, 128], img_latent)?;
        let image_result = vae_decoder.run([latent_arr])?;
        let image_data = image_result[0].as_slice::<f32>()?;

        let mut pixels = vec![0u8; h * w * 3];
        for y in 0..h {
            for x in 0..w {
                for ch in 0..3 {
                    let val =
                        (image_data[ch * h * w + y * w + x].clamp(-1.0, 1.0) + 1.0) / 2.0 * 255.0;
                    pixels[(y * w + x) * 3 + ch] = val as u8;
                }
            }
        }
        let path = if num_images == 1 {
            std::path::PathBuf::from(&args.output)
        } else {
            let stem = std::path::Path::new(&args.output)
                .file_stem()
                .unwrap_or_default()
                .to_str()
                .unwrap_or("output");
            let ext = std::path::Path::new(&args.output)
                .extension()
                .unwrap_or_default()
                .to_str()
                .unwrap_or("png");
            std::path::PathBuf::from(format!("{stem}_{n}.{ext}"))
        };
        image::save_buffer(&path, &pixels, w as u32, h as u32, image::ColorType::Rgb8)?;
        eprintln!("Saved {}", path.display());
        display_inline(&path);
    }

    Ok(())
}


================================================
FILE: examples/tensorflow-mobilenet-v2/.gitignore
================================================
mobilenet_v2_1.4_224*
target


================================================
FILE: examples/tensorflow-mobilenet-v2/Cargo.toml
================================================
[package]
name = "example-tensorflow-mobilenet-v2"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
image.workspace = true
tract-tensorflow.workspace = true


================================================
FILE: examples/tensorflow-mobilenet-v2/README.md
================================================
# Tract examples: Tensorflow MobileNet v2

This project is a simple project with minimal code showing how to use tract to
process an image with MobileNetV2.

The example assume the following command are run in the directory of this
example project, where this README lives.

```sh
git clone https://github.com/snipsco/tract
cd tract/examples/tensorflow-mobilenet-v2/
```

## Obtaining the model 

MobileNet is a response to the ImageNet challenge. The goal is to categorize
images and associate them with one of 1000 labels. In other words, recognize a
dog, a cat, a rabbit, or a military uniform.

See https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet for more information.

You will need to download the models. For instance:

```sh
wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz
tar zxf mobilenet_v2_1.4_224.tgz
```

This expands a half-dozen files in the directory. The only one of interest
for us is the frozen TensorFlow model: `mobilenet_v2_1.4_224_frozen.pb`.

## Converting the input image

As in TensorFlow documentation, we will use a portrait of Grace Hopper
(included with this example).

```
grace_hopper.jpg: JPEG image data, JFIF standard 1.02, resolution (DPI), density 96x96, segment length 16, baseline, precision 8, 517x606, components 3
```

## Try it

`cargo run` should print a lot of things, and ultimately: `result: Some((0.32560226, 654))`.

This is actually good. It is the rank (654) and a confidence indicator (0.32)
of the inferred label.

```
$ cat -n imagenet_slim_labels.txt | grep -C 3 654
   651  megalith
   652  microphone
   653  microwave
   654  military uniform
   655  milk can
   656  minibus
   657  miniskirt
```

## A look at the code

Everything happens in [src/main.rs](src/main.rs).

```rust
    let model = tract_tensorflow::tensorflow()
        // load the model
        .model_for_path("mobilenet_v2_1.4_224_frozen.pb")?
        // specify input type and shape
        .with_input_fact(0, f32::fact(&[1, 224, 224, 3]).into())?
        // optimize the model
        .into_optimized()?
        // make the model runnable and fix its inputs and outputs
        .into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into();

    // run the model on the input
    let result = model.run(tvec!(image))?;

    // find and display the max value with its index
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
```

It uses three crates:

* `tract-core` is the main tract crate. It contains tract operators, model
    analysing and running infrastructure.
* `tract-tensorflow` is the TensorFlow format parser. It translates most of
    TensorFlow operators to core ones, or implements the one which are specific
    to TensorFlow.
* finally we use the `image` crate to load and resize the JPEG portrait.


### Loading the model

This line creates a tract-tensorflow context, and uses it to load the protobuf
model.

```rust
    let model = tract_tensorflow::tensorflow()
        .model_for_path("mobilenet_v2_1.4_224_frozen.pb")?
    // ..
```

### Specifying input size and optimizing.

TensorFlow models typically do not specify explicitely the input dimensions,
but a lot of optimization in `tract` depends on the knownledge of all tensors
types and shapes in the network.

MobileNet assumes its input in in the NHWC convention: [batch, height, width,
channels]. The MobileNet variant we have picked works with a 224x224 square
RGB (C=3) pictures. We will only process one image at a time (N=1).
And it operates on single precision floats (aka `f32`).

```rust
    // ..
        .with_input_fact(0, f32::fact(&[1, 224, 224, 3]).into())?
        .into_optimized()?
        .into_runnable()?;
```

Now the model is ready to run, we have an execution plan, so let's prepare the
image.

### Conditioning the input

We use the `image` crate to load the `.jpg` image, resize is to 224x224. Then
we build an 4-dimension array in the right NHWC shape, with `f32` obtained by
normalizing the `u8` input to the `0..1` range. This array is then converted
into a Tensor.

```rust
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb();
    let resized = image::imageops::resize(&image, 224, 224, ::image::FilterType::Triangle);
    let image: Tensor = ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    }).into();
```

Note that `tract-core` re-export the excellent `ndarray` crate so that it is
easy to get the right version for tract conversion to work.

### Run the network!

```rust
    let result = model.run(tvec!(image))?;
```

### Interpret the result

Finally we grab the single Tensor output by the plan execution, convert it to a
ndarray ArrayView of f32 values. It is a single dimension (a vector...) of 1001
category scores (1000 labels plus the dummy one). We need pick the maximum
score, with its index, and display it...

```rust
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .enumerate()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
```


================================================
FILE: examples/tensorflow-mobilenet-v2/ci.sh
================================================
#!/bin/sh

set -ex

wget -nc -q https://s3.amazonaws.com/tract-ci-builds/model/mobilenet_v2_1.4_224.tgz
tar zxf mobilenet_v2_1.4_224.tgz
cargo run
rm -rf mobilenet*


================================================
FILE: examples/tensorflow-mobilenet-v2/imagenet_slim_labels.txt
================================================
dummy
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue


================================================
FILE: examples/tensorflow-mobilenet-v2/src/main.rs
================================================
use tract_tensorflow::prelude::*;

fn main() -> TractResult<()> {
    let model = tract_tensorflow::tensorflow()
        // load the model
        .model_for_path("mobilenet_v2_1.4_224_frozen.pb")?
        // specify input type and shape
        .with_input_fact(0, f32::fact([1, 224, 224, 3]).into())?
        // optimize the model
        .into_optimized()?
        // make the model runnable and fix its inputs and outputs
        .into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into();

    // run the model on the input
    let result = model.run(tvec!(image.into()))?;

    // find and display the max value with its index
    let best = result[0]
        .to_plain_array_view::<f32>()?
        .iter()
        .cloned()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: examples/tflite-mobilenet-v3/.gitignore
================================================
mobilenet_v2_1.4_224*
target


================================================
FILE: examples/tflite-mobilenet-v3/Cargo.toml
================================================
[package]
name = "example-tflite-mobilenet-v3"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
image.workspace = true
tract-tflite.workspace = true


================================================
FILE: examples/tflite-mobilenet-v3/README.md
================================================
# Tract examples: TensorflowLite MobileNet v3

This project is a simple project with minimal code showing how to use tract to
process an image with MobileNetV3.

The example assume the following command are run in the directory of this
example project, where this README lives.

```sh
git clone https://github.com/snipsco/tract
cd tract/examples/tflite-mobilenet-v3
```

## Obtaining the model 

MobileNet is a response to the ImageNet challenge. The goal is to categorize
images and associate them with one of 1000 labels. In other words, recognize a
dog, a cat, a rabbit, or a military uniform.

See https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet for more information.

You will need to download the models. For instance:

```sh
wget -q https://tfhub.dev/google/lite-model/imagenet/mobilenet_v3_small_100_224/classification/5/default/1?lite-format=tflite -O mobilenet_v3_small_100_224.tflite
```

## Converting the input image

As in TensorFlow documentation, we will use a portrait of Grace Hopper
(included with this example).

```
grace_hopper.jpg: JPEG image data, JFIF standard 1.02, resolution (DPI), density 96x96, segment length 16, baseline, precision 8, 517x606, components 3
```

## Try it

`cargo run` should print a lot of things, and ultimately: `result: Some((0.32560226, 654))`.

This is actually good. It is the rank (654) and a confidence indicator (0.32)
of the inferred label.

```
$ cat -n imagenet_slim_labels.txt | grep -C 3 654
   651  megalith
   652  microphone
   653  microwave
   654  military uniform
   655  milk can
   656  minibus
   657  miniskirt
```

## A look at the code

Everything happens in [src/main.rs](src/main.rs).

```rust
    let model = tract_tflite::tflite()
        // load the model
        .model_for_path("./mobilenet_v3_small_100_224.tflite")?
        // optimize the model
        .into_optimized()?
        // make the model runnable and fix its inputs and outputs
        .into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into();

    // run the model on the input
    let result = model.run(tvec!(image.into()))?;

    // find and display the max value with its index
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
```

It uses three crates:

* `tract-core` is the main tract crate. It contains tract operators, model
    analysing and running infrastructure.
* `tract-tflite` is the TensorFlow format parser. It translates most of
    TensorFlow operators to core ones, or implements the one which are specific
    to TensorFlow.
* finally we use the `image` crate to load and resize the JPEG portrait.


### Loading the model

This line creates a tract-tensorflow context, and uses it to load the protobuf
model.

```rust
    let model = tract_tflite::tflite()
        .model_for_path("./mobilenet_v3_small_100_224.tflite")?
        .into_optimized()?
        .into_runnable()?;
```

Now the model is ready to run, we have an execution plan, so let's prepare the
image.

### Conditioning the input

MobileNet assumes its input in in the NHWC convention: [batch, height, width,
channels]. The MobileNet variant we have picked works with a 224x224 square
RGB (C=3) pictures. We will only process one image at a time (N=1).
And it operates on single precision floats (aka `f32`).

We use the `image` crate to load the `.jpg` image, resize is to 224x224. Then
we build an 4-dimension array in the right NHWC shape, with `f32` obtained by
normalizing the `u8` input to the `0..1` range. This array is then converted
into a Tensor.

```rust
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb();
    let resized = image::imageops::resize(&image, 224, 224, ::image::FilterType::Triangle);
    let image: Tensor = ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    }).into();
```

Note that `tract-core` re-export the excellent `ndarray` crate so that it is
easy to get the right version for tract conversion to work.

### Run the network!

```rust
    let result = model.run(tvec!(image.into()))?;
```

### Interpret the result

Finally we grab the single Tensor output by the plan execution, convert it to a
ndarray ArrayView of f32 values. It is a single dimension (a vector...) of 1001
category scores (1000 labels plus the dummy one). We need pick the maximum
score, with its index, and display it...

```rust
    let best = result[0]
        .to_array_view::<f32>()?
        .iter()
        .cloned()
        .enumerate()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {:?}", best);
```


================================================
FILE: examples/tflite-mobilenet-v3/ci.sh
================================================
#!/bin/sh

set -ex

wget -q "https://tract-ci-builds.s3.amazonaws.com/model/mobilenet_v3_small_100_224.tflite" -O mobilenet_v3_small_100_224.tflite
cargo run
rm -rf mobilenet*


================================================
FILE: examples/tflite-mobilenet-v3/imagenet_slim_labels.txt
================================================
dummy
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue


================================================
FILE: examples/tflite-mobilenet-v3/src/main.rs
================================================
use tract_tflite::prelude::*;

fn main() -> TractResult<()> {
    let model = tract_tflite::tflite()
        // load the model
        .model_for_path("./mobilenet_v3_small_100_224.tflite")?
        // optimize the model
        .into_optimized()?
        // make the model runnable and fix its inputs and outputs
        .into_runnable()?;

    // open image, resize it and make a Tensor out of it
    let image = image::open("grace_hopper.jpg").unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 224, 224, ::image::imageops::FilterType::Triangle);
    let image: Tensor = tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into();

    // run the model on the input
    let result = model.run(tvec!(image.into()))?;

    // find and display the max value with its index
    let best = result[0]
        .to_plain_array_view::<f32>()?
        .iter()
        .cloned()
        .zip(1..)
        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
    println!("result: {best:?}");
    Ok(())
}


================================================
FILE: extra/Cargo.toml
================================================
[package]
name = "tract-extra"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
tract-nnef.workspace = true
tract-pulse.workspace = true

[dev-dependencies]
criterion.workspace = true
env_logger.workspace = true
lazy_static.workspace = true
proptest.workspace = true
approx.workspace = true


================================================
FILE: extra/src/exp_unit_norm.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_core::trivial_op_state_freeze;
use tract_pulse::PulsedOp;
use tract_pulse::model::PulsedModel;
use tract_pulse::ops::OpPulsifier;
use tract_pulse::{internal::*, pulsed_op_to_typed_op};

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_extra_exp_unit_norm",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("state"),
            TypeName::Integer.named("axis"),
            TypeName::Scalar.named("alpha"),
            TypeName::Integer.named("skip").default(0),
            TypeName::Logical.named("stateless").default(false),
            TypeName::Logical.named("complex").default(false),
            TypeName::Scalar.named("epsilon").default(1e-14f32),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_eun,
    );
    registry.register_dumper(ser_eun);

    registry.register_primitive(
        "tract_extra_exp_mean_norm",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("state"),
            TypeName::Integer.named("axis"),
            TypeName::Scalar.named("alpha"),
            TypeName::Integer.named("skip").default(0),
            TypeName::Logical.named("stateless").default(false),
            TypeName::Scalar.named("scaling_factor"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_eun,
    );

    OpPulsifier::register::<ExpUnitNorm>(pulsify).unwrap();
}

fn de_eun(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let state = invocation.named_arg_as(builder, "state")?;
    let axis = invocation.named_arg_as::<i64>(builder, "axis")? as usize;
    let alpha = invocation.named_arg_as(builder, "alpha")?;
    let epsilon = invocation.get_named_arg_as(builder, "epsilon")?.unwrap_or(1e-14);
    let stateless = invocation.named_arg_as::<bool>(builder, "stateless")?;
    let complex = invocation.get_named_arg_as::<bool>(builder, "complex")?.unwrap_or(false);
    let skip = invocation.named_arg_as::<i64>(builder, "skip")? as usize;
    let scaling_factor = invocation.get_named_arg_as(builder, "scaling_factor")?.unwrap_or(1.0);
    let mean = invocation.invocation.id == Identifier::from("tract_extra_exp_mean_norm");
    let op = ExpUnitNorm { alpha, axis, epsilon, stateless, skip, complex, scaling_factor, mean };
    builder.wire(op, &[wire, state])
}

fn ser_eun(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ExpUnitNorm,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let state = ast.mapping[&node.inputs[1]].clone();
    let mut attributes = vec![
        ("axis", numeric(op.axis)),
        ("alpha", numeric(op.alpha)),
        ("stateless", logical(op.stateless)),
        ("skip", numeric(op.skip)),
    ];
    if op.mean {
        attributes.push(("scaling_factor", numeric(op.scaling_factor)));
        Ok(Some(invocation("tract_extra_exp_mean_norm", &[input, state], &attributes)))
    } else {
        attributes.push(("epsilon", numeric(op.epsilon)));
        attributes.push(("complex", numeric(op.complex)));
        Ok(Some(invocation("tract_extra_exp_unit_norm", &[input, state], &attributes)))
    }
}

#[derive(Clone, Debug, PartialEq)]
pub struct ExpUnitNorm {
    pub alpha: f32,
    pub epsilon: f32,
    pub axis: usize,
    pub skip: usize,
    pub stateless: bool,
    pub complex: bool,
    pub mean: bool,
    pub scaling_factor: f32,
}
impl Eq for ExpUnitNorm {}

#[derive(Clone, Debug, PartialEq, Default)]
pub struct ExpUnitNormState {
    hidden: Option<Tensor>,
    index: usize,
}
trivial_op_state_freeze!(ExpUnitNormState);

impl Op for ExpUnitNorm {
    fn name(&self) -> StaticName {
        "ExpUnitNorm".into()
    }

    op_as_typed_op!();
}

impl EvalOp for ExpUnitNorm {
    fn is_stateless(&self) -> bool {
        self.stateless
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::<ExpUnitNormState>::default()))
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        ExpUnitNormState::default().eval(self, inputs)
    }
}

impl ExpUnitNormState {
    fn eval(&mut self, op: &ExpUnitNorm, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        use tract_ndarray::Axis;
        let (input, state0) = args_2!(inputs);
        let mut input = input.into_tensor();
        let mut input_plain = input.try_as_plain_mut()?;
        let mut x_view = input_plain.to_array_view_mut::<f32>()?;
        if self.hidden.is_none() || op.stateless {
            self.hidden = Some(state0.into_tensor());
        }
        if op.complex {
            ensure!(x_view.shape()[x_view.ndim() - 1] == 2);
        }
        let mut hidden_plain = self.hidden.as_mut().unwrap().try_as_plain_mut()?;
        let mut state = hidden_plain.to_array_view_mut::<f32>()?;
        for mut time_slice in x_view.axis_iter_mut(Axis(op.axis)) {
            if self.index >= op.skip {
                if op.mean {
                    state.zip_mut_with(&time_slice, |s: &mut f32, x: &f32| {
                        *s = x * (1f32 - op.alpha) + *s * op.alpha;
                    });
                } else {
                    // unit norms
                    let normed = if op.complex {
                        time_slice
                            .mapv(|x| x * x)
                            .sum_axis(Axis(time_slice.ndim() - 1))
                            .mapv(|x| x.sqrt())
                    } else {
                        time_slice.mapv(|x| x.abs())
                    };
                    state.zip_mut_with(&normed, |s: &mut f32, x: &f32| {
                        *s = x.max(op.epsilon) * (1f32 - op.alpha) + *s * op.alpha;
                    });
                }
            }
            if op.mean {
                time_slice.zip_mut_with(&state, |x, s| *x = (*x - s) / op.scaling_factor);
            } else if op.complex {
                let state_view = state.view().insert_axis(Axis(state.ndim()));
                time_slice.zip_mut_with(&state_view, |x, s| *x /= s.sqrt());
            } else {
                time_slice.zip_mut_with(&state, |x, s| *x /= s.sqrt());
            }
            self.index += 1;
        }
        Ok(tvec!(input.into_tvalue()))
    }
}

impl OpState for ExpUnitNormState {
    fn eval(
        &mut self,
        _session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let op = op.downcast_ref::<ExpUnitNorm>().context("Wrong op")?;
        Self::eval(self, op, inputs)
    }
}

impl TypedOp for ExpUnitNorm {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut state_shape = inputs[0].shape.clone();
        state_shape.remove_axis(self.axis)?;
        if self.complex {
            ensure!(inputs[0].shape[inputs[0].rank() - 1] == 2.to_dim());
            state_shape.remove_axis(state_shape.rank() - 1)?;
        }
        ensure!(inputs[1].without_value() == inputs[0].datum_type.fact(state_shape));
        Ok(tvec!(inputs[0].without_value()))
    }

    as_op!();
}

impl PulsedOp for ExpUnitNorm {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}

fn pulsify(
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let op = node.op_as::<ExpUnitNorm>().unwrap();
    let (input, state0) = (mapping[&node.inputs[0]], mapping[&node.inputs[1]]);
    let input_fact = target.outlet_fact(input)?;
    let pulsing_input_axis = input_fact
        .to_streaming_fact()
        .shape
        .iter()
        .position(|dim| dim.symbols().contains(symbol))
        .context("No pulsing axis found")?;
    if pulsing_input_axis == op.axis {
        let pulsed_op =
            ExpUnitNorm { skip: input_fact.stream.as_ref().unwrap().delay, ..op.clone() };
        target.wire_node(&node.name, pulsed_op, &[input, state0]).map(Some)
    } else {
        target.wire_node(&node.name, op.clone(), &[input, state0]).map(Some)
    }
}


================================================
FILE: extra/src/lib.rs
================================================
use tract_nnef::internal::*;

mod exp_unit_norm;

pub trait WithTractExtra {
    fn enable_tract_extra(&mut self);
    fn with_tract_extra(self) -> Self;
}

impl WithTractExtra for tract_nnef::framework::Nnef {
    fn enable_tract_extra(&mut self) {
        self.enable_tract_core();
        self.registries.push(tract_extra_registry());
    }

    fn with_tract_extra(mut self) -> Self {
        self.enable_tract_extra();
        self
    }
}

pub fn tract_extra_registry() -> Registry {
    let mut reg = Registry::new("tract_extra");
    exp_unit_norm::register(&mut reg);
    reg
}

pub fn register_pulsifiers() {
    let _ = tract_extra_registry();
}


================================================
FILE: gpu/Cargo.toml
================================================
[package]
name = "tract-gpu"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = [
	"Hubert de La Jonquiere <hubert.delajonquiere@sonos.com>",
	"Mathieu Poumeyrol <kali@zoy.org>",
	"Louis Chouraki <louis.chouraki@sonos.com>",
]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks", "GPU" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
anyhow.workspace = true
derive-new.workspace = true
downcast-rs.workspace = true
dyn-eq.workspace = true
num-traits.workspace = true
tract-core.workspace = true
tract-pulse-opl.workspace = true
tract-transformers.workspace = true
dyn-hash.workspace = true


================================================
FILE: gpu/src/device.rs
================================================
use std::ffi::c_void;
use std::ops::Range;
use std::sync::Mutex;

use anyhow::{anyhow, bail};
use downcast_rs::{Downcast, impl_downcast};
use tract_core::dyn_clone;
use tract_core::internal::*;
use tract_core::value::TValue;

use crate::tensor::{DeviceTensor, OwnedDeviceTensor};

pub trait DeviceContext: Downcast + dyn_clone::DynClone + Send + Sync {
    fn tensor_to_device(&self, tensor: TValue) -> TractResult<Box<dyn OwnedDeviceTensor>>;
    fn uninitialized_device_tensor(
        &self,
        shape: &[usize],
        dt: DatumType,
    ) -> TractResult<Box<dyn OwnedDeviceTensor>>;
    fn uninitialized_device_exotic_tensor(
        &self,
        exotic_fact: Box<dyn ExoticFact>,
    ) -> TractResult<Box<dyn OwnedDeviceTensor>>;
    fn synchronize(&self) -> TractResult<()>;
    fn copy_nd(
        &self,
        input: &DeviceTensor,
        input_offset: usize,
        input_strides: &[isize],
        output: &DeviceTensor,
        output_offset: usize,
        output_shape: &[usize],
        output_strides: &[isize],
    ) -> TractResult<()>;

    /// Copy a slice along `axis` from `src[src_range]` into `dst[dst_range]`.
    fn assign_slice(
        &self,
        dst: &DeviceTensor,
        dst_range: Range<usize>,
        src: &DeviceTensor,
        src_range: Range<usize>,
        axis: usize,
    ) -> TractResult<()> {
        let mut zone_shape: TVec<usize> = src.shape().into();
        zone_shape[axis] = src_range.len();
        if zone_shape.iter().product::<usize>() == 0 {
            return Ok(());
        }
        let src_offset =
            src_range.start * src.strides()[axis] as usize * src.datum_type().size_of();
        let dst_offset =
            dst_range.start * dst.strides()[axis] as usize * dst.datum_type().size_of();
        self.copy_nd(src, src_offset, src.strides(), dst, dst_offset, &zone_shape, dst.strides())
    }

    /// Copy from `src` into `dst` with given origins and strides.
    fn copy_with_origins(
        &self,
        zone_shape: &[usize],
        dst: &DeviceTensor,
        dst_origin: &[usize],
        dst_strides: &[isize],
        src: &DeviceTensor,
        src_origin: &[usize],
        src_strides: &[isize],
    ) -> TractResult<()> {
        if zone_shape.iter().product::<usize>() == 0 {
            return Ok(());
        }
        let dt_size = src.datum_type().size_of();
        let src_offset: usize =
            src_origin.iter().zip(src_strides).map(|(o, s)| o * *s as usize).sum::<usize>()
                * dt_size;
        let dst_offset: usize =
            dst_origin.iter().zip(dst_strides).map(|(o, s)| o * *s as usize).sum::<usize>()
                * dt_size;
        self.copy_nd(src, src_offset, src_strides, dst, dst_offset, zone_shape, dst_strides)
    }

    /// Flat memcpy of `byte_len` bytes.
    fn flat_copy(
        &self,
        src: &DeviceTensor,
        src_byte_offset: usize,
        dst: &DeviceTensor,
        dst_byte_offset: usize,
        byte_len: usize,
    ) -> TractResult<()> {
        if byte_len == 0 {
            return Ok(());
        }
        self.copy_nd(src, src_byte_offset, &[1], dst, dst_byte_offset, &[byte_len], &[1])
    }
}

impl_downcast!(DeviceContext);
dyn_clone::clone_trait_object!(DeviceContext);

pub trait DeviceBuffer: Downcast + dyn_clone::DynClone + Send + Sync + std::fmt::Debug {
    fn ptr(&self) -> *const c_void;
}

impl_downcast!(DeviceBuffer);
dyn_clone::clone_trait_object!(DeviceBuffer);

pub static DEVICE_CONTEXT: Mutex<Option<Box<dyn DeviceContext>>> = Mutex::new(None);

pub fn set_context(curr_context: Box<dyn DeviceContext>) -> TractResult<()> {
    let mut context = DEVICE_CONTEXT.lock().unwrap();
    if context.is_none() {
        *context = Some(curr_context);
        Ok(())
    } else {
        bail!("Context is already set")
    }
}

pub fn get_context() -> TractResult<Box<dyn DeviceContext>> {
    let guard = DEVICE_CONTEXT.lock().map_err(|_| anyhow!("Cannot read GPU Context"))?;
    guard.as_ref().cloned().ok_or_else(|| anyhow!("GPU Context not initialized"))
}


================================================
FILE: gpu/src/fact.rs
================================================
use std::fmt;
use tract_core::internal::*;

/// Origin of the GPU tensor
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum DeviceTensorOrigin {
    /// Tensor outputted by a device operator
    /// Can be either a Host or ArenaView tensor
    /// Note: Tensors marked as Device are from asynchronous operations.
    FromDevice,
    /// Tensor built from a CPU tensor (CPU op output or Const)
    /// Can be only Host tensor.
    /// Note: Tensors marked as Host are from synchronous operations.
    FromHost,
}

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct DeviceFact {
    pub origin: DeviceTensorOrigin,
    pub fact: TypedFact,
    pub state_owned: bool,
}

impl DeviceFact {
    pub fn new(origin: DeviceTensorOrigin, fact: TypedFact) -> TractResult<Self> {
        ensure!(fact.as_device_fact().is_none());
        let new_fact = fact.without_value();
        Ok(Self { origin, fact: new_fact, state_owned: false })
    }

    pub fn from_host(fact: TypedFact) -> TractResult<Self> {
        Self::new(DeviceTensorOrigin::FromHost, fact)
    }

    pub fn is_from_device(&self) -> bool {
        matches!(self.origin, DeviceTensorOrigin::FromDevice)
    }

    pub fn is_state_owned(&self) -> bool {
        self.state_owned
    }

    pub fn is_from_host(&self) -> bool {
        matches!(self.origin, DeviceTensorOrigin::FromHost)
    }

    pub fn into_typed_fact(self) -> TypedFact {
        self.fact
    }

    pub fn into_exotic_fact(self) -> TypedFact {
        let dt = self.fact.datum_type;
        let shape = self.fact.shape.clone();
        TypedFact::dt_shape(dt, shape).with_exotic_fact(self)
    }
}

impl ExoticFact for DeviceFact {
    fn clarify_dt_shape(&self) -> Option<(DatumType, TVec<TDim>)> {
        Some((self.fact.datum_type, self.fact.shape.to_tvec()))
    }

    fn buffer_sizes(&self) -> TVec<TDim> {
        let inner_fact = &self.fact;
        let mut sizes = tvec!(inner_fact.shape.volume() * inner_fact.datum_type.size_of());
        if let Some(of) = inner_fact.exotic_fact() {
            sizes.extend(of.buffer_sizes());
        }
        sizes
    }
    fn compatible_with(&self, other: &dyn ExoticFact) -> bool {
        other.is::<Self>()
    }
}

impl fmt::Debug for DeviceFact {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        match self.origin {
            DeviceTensorOrigin::FromHost => write!(fmt, "FromHost({:?})", self.without_value()),
            DeviceTensorOrigin::FromDevice => {
                write!(fmt, "FromDevice({:?})", self.fact.without_value())
            }
        }
    }
}

pub trait DeviceTypedFactExt {
    fn to_device_fact(&self) -> TractResult<&DeviceFact>;
    fn as_device_fact(&self) -> Option<&DeviceFact>;
    fn as_device_fact_mut(&mut self) -> Option<&mut DeviceFact>;
}

impl DeviceTypedFactExt for TypedFact {
    fn to_device_fact(&self) -> TractResult<&DeviceFact> {
        self.exotic_fact
            .as_ref()
            .and_then(|m| m.downcast_ref::<DeviceFact>())
            .ok_or_else(|| anyhow!("DeviceFact not found"))
    }
    fn as_device_fact(&self) -> Option<&DeviceFact> {
        self.exotic_fact.as_ref().and_then(|m| m.downcast_ref::<DeviceFact>())
    }
    fn as_device_fact_mut(&mut self) -> Option<&mut DeviceFact> {
        self.exotic_fact.as_mut().and_then(|m| m.downcast_mut::<DeviceFact>())
    }
}

impl std::ops::Deref for DeviceFact {
    type Target = TypedFact;
    fn deref(&self) -> &Self::Target {
        &self.fact
    }
}

impl std::convert::AsRef<TypedFact> for DeviceFact {
    fn as_ref(&self) -> &TypedFact {
        &self.fact
    }
}


================================================
FILE: gpu/src/lib.rs
================================================
pub mod device;
pub mod fact;
pub mod memory;
pub mod ops;
pub mod rewrite_rules;
pub mod session_handler;
pub mod sync;
pub mod tensor;
pub mod utils;


================================================
FILE: gpu/src/memory/mod.rs
================================================
mod pool;
mod schema;

pub use pool::DeviceMemoryPool;
pub use schema::{DeviceMemSchema, DeviceResolvedMemSchema};


================================================
FILE: gpu/src/memory/pool.rs
================================================
use crate::device::get_context;
use crate::memory::DeviceResolvedMemSchema;
use crate::tensor::DeviceArenaView;
use crate::tensor::DeviceTensor;
use crate::tensor::OwnedDeviceTensor;

use tract_core::internal::*;

#[derive(Debug)]
pub struct DeviceMemoryPool {
    storage: Arc<Box<dyn OwnedDeviceTensor>>,
    resolved_schema: DeviceResolvedMemSchema,
}

impl DeviceMemoryPool {
    pub fn from_schema(resolved_schema: DeviceResolvedMemSchema) -> TractResult<Self> {
        Ok(Self {
            storage: Arc::new(
                get_context()?
                    .uninitialized_device_tensor(&[resolved_schema.memory_size], DatumType::U8)?,
            ),
            resolved_schema,
        })
    }

    pub fn tensor_for_node(
        &self,
        node_id: usize,
        dt: DatumType,
        shape: &[usize],
    ) -> TractResult<DeviceTensor> {
        self.resolved_schema.offsets_by_node[node_id]
            .as_ref()
            .map(|offsets| {
                ensure!(
                    offsets.len() == 1 && offsets[0].len() == 1,
                    "'tensor_for_node' is for mono-output nodes only"
                );
                Ok(DeviceArenaView {
                    arena: Arc::clone(&self.storage),
                    dt,
                    len: shape.iter().product(),
                    shape: shape.into(),
                    strides: Tensor::natural_strides(shape),
                    offset_bytes: offsets[0][0],
                    exotic_fact: None,
                }
                .into())
            })
            .unwrap_or_else(|| DeviceTensor::uninitialized_dt(dt, shape))
    }

    pub fn scalar_exotic_tensor_for_node(
        &self,
        node_id: usize,
        dt: DatumType,
        exotic_fact: Box<dyn ExoticFact>,
    ) -> TractResult<DeviceTensor> {
        match self.resolved_schema.offsets_by_node[node_id].as_ref() {
            Some(offsets) => {
                ensure!(
                    offsets.len() == 1 && offsets[0].len() == 2,
                    "'scalar_exotic_tensor_for_node' is for mono-output nodes only"
                );
                Ok(DeviceArenaView {
                    arena: Arc::clone(&self.storage),
                    dt,
                    len: 1,
                    shape: tvec!(),
                    strides: tvec!(),
                    offset_bytes: offsets[0][1],
                    exotic_fact: Some(exotic_fact.clone()),
                }
                .into())
            }
            None => DeviceTensor::uninitialized_exotic(exotic_fact),
        }
    }
}


================================================
FILE: gpu/src/memory/schema.rs
================================================
use std::fmt;
use std::fmt::Debug;
use tract_core::internal::num_integer::Integer;
use tract_core::internal::*;

use crate::fact::DeviceTypedFactExt;
use crate::sync::{DeviceSync, DeviceSyncKind};

/// Requirement for node outputs from a memory perspective.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct NodeMemReq {
    pub outlet_id: OutletId,
    pub lifetime: Lifetime,
    pub mem_size: TDim,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Lifetime {
    pub start: usize,
    pub end: usize,
}

impl Lifetime {
    pub fn is_disjoint(&self, other: &Lifetime) -> bool {
        self.start >= other.end || other.start >= self.end
    }

    pub fn is_alive_at_step(&self, step: usize) -> bool {
        self.start <= step && step < self.end
    }

    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    pub fn len(&self) -> usize {
        self.end - self.start
    }
}

fn next_nodes<'a>(model: &'a TypedModel, node: &TypedNode) -> Option<TVec<&'a TypedNode>> {
    if node.outputs.is_empty() {
        return None;
    };

    Some(
        node.outputs
            .iter()
            .flat_map(|o| {
                o.successors.iter().map(|succ| &model.nodes()[succ.node]).collect::<Vec<_>>()
            })
            .collect(),
    )
}

pub fn eval_device_mem_req_for_nodes(
    model: &TypedModel,
    order: &[usize],
) -> TractResult<TVec<NodeMemReq>> {
    let outputs = model.output_outlets()?.to_vec();
    let flush_lists = order::build_flush_list(model, order, &outputs, |node| {
        let Ok(facts) = model.node_output_facts(node.id) else { return false };

        let cpu_sync_in_next_nodes = next_nodes(model, node).is_some_and(|nodes| {
            nodes.iter().any(|it| {
                it.op_as::<DeviceSync>().is_some_and(|op| op.kind == DeviceSyncKind::ToHost)
            })
        });

        !cpu_sync_in_next_nodes
            && facts.iter().any(|it| {
                it.as_device_fact()
                    .map(|it| it.is_from_device() && !it.is_state_owned())
                    .unwrap_or(false)
            })
    });
    let mut scoped_nodes = tvec![];

    for (step, n) in order.iter().enumerate() {
        let lifetime_start = step;

        let lifetime_end = flush_lists
            .iter()
            .enumerate()
            .find(|(_step, flush_list)| flush_list.contains(n))
            .map(|it| usize::min(it.0 + 1, order.len()));
        // Ignore nodes that won't be flushed from Device.
        let Some(lifetime_end) = lifetime_end else {
            continue;
        };

        let out_device_tmp_facts = model
            .node_output_facts(*n)?
            .into_iter()
            .flat_map(|it| it.as_device_fact())
            .filter(|it| it.is_from_device())
            .collect::<TVec<_>>();

        if out_device_tmp_facts.is_empty() {
            continue;
        }

        for (slot, fact) in out_device_tmp_facts.iter().enumerate() {
            let outlet_id = OutletId { node: *n, slot };
            for buff_size in fact.buffer_sizes() {
                scoped_nodes.push(NodeMemReq {
                    outlet_id,
                    lifetime: Lifetime { start: lifetime_start, end: lifetime_end },
                    mem_size: buff_size,
                })
            }
        }
    }

    Ok(scoped_nodes)
}

fn collect_exotic_facts(model: &TypedModel) -> TractResult<Vec<NodeExoticFacts>> {
    let mut res: Vec<TVec<Option<Box<dyn ExoticFact>>>> = vec![];
    for node in model.nodes() {
        let mut tmp: TVec<Option<Box<dyn ExoticFact>>> = tvec![];
        for fact in model.node_output_facts(node.id)? {
            if let Some(dev_fact) = fact.as_device_fact() {
                tmp.push(dev_fact.exotic_fact.clone());
            }
        }
        res.push(tmp);
    }
    Ok(res)
}

/// A partition is a list of node that have disjoint memory requirement from a lifetime
/// perspective.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Partition {
    pub nodes: Vec<NodeMemReq>,
}

impl Partition {
    pub fn eval_size_to_i64(&self, symbols: &SymbolValues) -> TractResult<i64> {
        let mut max_size = self
            .nodes
            .iter()
            .map(|it| it.mem_size.eval_to_i64(symbols))
            .collect::<TractResult<Vec<_>>>()?
            .into_iter()
            .max()
            .unwrap_or(0);
        max_size = Integer::next_multiple_of(&max_size, &(vector_size() as i64));
        Ok(max_size)
    }

    pub fn size(&self) -> TDim {
        TDim::Max(self.nodes.iter().map(|s| s.mem_size.clone()).collect()).simplify()
    }

    pub fn has_no_conflict_with_lifetime(&self, lifetime: &Lifetime) -> bool {
        self.nodes.iter().all(|n| n.lifetime.is_disjoint(lifetime))
    }

    pub fn find_node_alive_at_step(&self, step: usize) -> Option<&NodeMemReq> {
        self.nodes.iter().find(|it| it.lifetime.is_alive_at_step(step))
    }
}

type NodeExoticFacts = TVec<Option<Box<dyn ExoticFact>>>;
/// This struct represents a resolved memory schema for a model that contains
/// GPU operators. This schema is concrete.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DeviceResolvedMemSchema {
    pub offsets_by_node: Vec<Option<TVec<TVec<usize>>>>,
    pub memory_size: usize,
}

/// This struct represent a memory schema for node output memory that are handled
/// by a GPU.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DeviceMemSchema {
    /// Total numbef in the model.
    pub model_num_nodes: usize,
    pub by_partition: Vec<Partition>,
    // vec![vec![Option<NodeMemReq>; num_partitions]; num_steps].
    pub by_steps: Vec<Vec<Option<NodeMemReq>>>,
    pub exotic_facts: Vec<NodeExoticFacts>,
}

impl DeviceMemSchema {
    /// Returns memory size of each inner partitions.
    pub fn size_by_partition(&self) -> Vec<TDim> {
        self.by_partition.iter().map(|it| it.size()).collect()
    }

    /// Evaluate memory size by partition for given symbol values.
    pub fn eval_size_by_partition(&self, symbols: &SymbolValues) -> TractResult<Vec<i64>> {
        self.by_partition.iter().map(|it| it.eval_size_to_i64(symbols)).collect()
    }

    /// Returns total memory size required for the schema.
    pub fn memory_size(&self) -> TDim {
        self.by_partition.iter().map(|it| it.size()).sum()
    }

    /// Evaluate memory size required for the schema for given symbol values.
    pub fn eval_memory_size(&self, symbols: &SymbolValues) -> TractResult<i64> {
        self.by_partition.iter().map(|it| it.eval_size_to_i64(symbols)).sum()
    }

    /// Compute offsets for each node for given symbols. Node ids
    /// are indexes in the returned vector.
    pub fn compute_offset_by_node(
        &self,
        symbols: &SymbolValues,
    ) -> TractResult<Vec<Option<TVec<TVec<usize>>>>> {
        let mut cursor = 0;
        let mut offset_by_outlet: Vec<Option<TVec<TVec<usize>>>> = vec![None; self.model_num_nodes];

        for partition in &self.by_partition {
            for node_mem in &partition.nodes {
                let node = node_mem.outlet_id.node;
                let slot = node_mem.outlet_id.slot;

                let slots: &mut TVec<TVec<usize>> =
                    offset_by_outlet[node].get_or_insert_with(|| tvec![tvec!()]);

                if slot < 1 {
                    slots[slot].push(cursor);
                } else {
                    if slots.len() <= slot {
                        slots.resize_with(slot + 1, TVec::<usize>::new);
                    }
                    slots[slot].push(cursor);
                }
            }
            cursor += partition.eval_size_to_i64(symbols)? as usize;
        }

        Ok(offset_by_outlet)
    }

    /// Evaluate peak memory size for given symbols. The return value is lower or equal to the memory
    /// size of the schema. The difference between peak memory size and memory size represents the
    /// memory fragmentation introduced by the schema.
    pub fn eval_peak_memory_size(&self, symbols: &SymbolValues) -> TractResult<i64> {
        Ok(self
            .by_steps
            .iter()
            .map(|active_nodes| {
                active_nodes
                    .iter()
                    .flatten()
                    .map(|it| it.mem_size.clone())
                    .sum::<TDim>()
                    .eval_to_i64(symbols)
            })
            .collect::<TractResult<Vec<_>>>()?
            .into_iter()
            .max()
            .unwrap_or(0))
    }

    /// Evaluate the usage for given symbols as the ratio between
    /// schema memory size and peak memory size. A value of 1.0 means
    /// that the schema doesn't introduce memory fragmentation.
    pub fn eval_usage(&self, symbols: &SymbolValues) -> TractResult<f32> {
        let memory_size = self.eval_memory_size(symbols)? as f32;
        let peak_memory_size = self.eval_peak_memory_size(symbols)? as f32;
        Ok(peak_memory_size / memory_size)
    }
}

impl fmt::Display for DeviceMemSchema {
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        for (step, mem_step) in self.by_steps.iter().enumerate() {
            writeln!(
                fmt,
                "step: {:5} => |{}|",
                step,
                mem_step
                    .iter()
                    .map(|n| -> String {
                        n.as_ref()
                            .map(|it| format!("{:^7}/{:^7}", it.outlet_id.node, it.outlet_id.slot))
                            .unwrap_or(format!("{:^7}", "*"))
                    })
                    .collect::<Vec<String>>()
                    .join("|")
            )?;
        }
        writeln!(fmt, "memory_size: {}", self.memory_size())?;
        Ok(())
    }
}

impl DeviceMemSchema {
    /// Resolve Memory schema with given symbols.
    pub fn resolve(&self, symbols: &SymbolValues) -> TractResult<DeviceResolvedMemSchema> {
        Ok(DeviceResolvedMemSchema {
            offsets_by_node: self.compute_offset_by_node(symbols)?,
            memory_size: self.eval_memory_size(symbols)?.try_into()?,
        })
    }

    /// Build a memory schema for given model and execution order. The hint is used to optimize
    /// the memory schema because it is based on symbolic dimensions. That doesn't mean it will be
    /// optimal for all possible values for symbolic dimensions.
    pub fn build(
        model: &TypedModel,
        order: &[usize],
        hint: &SymbolValues,
    ) -> TractResult<DeviceMemSchema> {
        let mut nodes_mem_req = eval_device_mem_req_for_nodes(model, order)?;

        let exotic_facts = collect_exotic_facts(model)?;
        let hinted_mem_size = nodes_mem_req
            .iter()
            .map(|node_mem| Ok((node_mem.outlet_id, node_mem.mem_size.eval_to_i64(hint)?)))
            .collect::<TractResult<HashMap<OutletId, i64>>>()?;

        nodes_mem_req.sort_by(|lhs, rhs| {
            let lhs_hint_mem_size = hinted_mem_size.get(&lhs.outlet_id);
            let rhs_hint_mem_size = hinted_mem_size.get(&rhs.outlet_id);
            lhs_hint_mem_size.cmp(&rhs_hint_mem_size).reverse()
        });

        let mut partitions: Vec<Partition> = vec![];
        for node_mem in nodes_mem_req {
            // Find partitions where node lifetime is disjoint from existing.
            let mut available = partitions
                .iter_mut()
                .filter(|it| it.has_no_conflict_with_lifetime(&node_mem.lifetime))
                .collect::<Vec<_>>();

            available.sort_by_cached_key(|n| {
                -n.nodes.iter().flat_map(|it| hinted_mem_size.get(&it.outlet_id)).sum::<i64>()
            });

            match available.first_mut() {
                Some(available) => {
                    available.nodes.push(node_mem);
                }
                None => partitions.push(Partition { nodes: vec![node_mem] }),
            }
        }

        let by_steps: Vec<Vec<Option<NodeMemReq>>> = (0..order.len())
            .map(|step| {
                let mem_step: Vec<_> =
                    partitions.iter().map(|p| p.find_node_alive_at_step(step).cloned()).collect();
                ensure!(mem_step.len() <= partitions.len());
                Ok(mem_step)
            })
            .collect::<TractResult<Vec<_>>>()?;

        Ok(DeviceMemSchema {
            model_num_nodes: model.nodes().len(),
            by_partition: partitions,
            by_steps,
            exotic_facts,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_lifetime_is_disjoint() {
        let l1 = Lifetime { start: 0, end: 5 };
        let l2 = Lifetime { start: 5, end: 10 };
        let l3 = Lifetime { start: 3, end: 7 };

        assert!(l1.is_disjoint(&l2));
        assert!(l2.is_disjoint(&l1));
        assert!(!l1.is_disjoint(&l3));
        assert!(!l3.is_disjoint(&l2));
    }

    #[test]
    fn test_lifetime_is_alive_at_step() {
        let lifetime = Lifetime { start: 5, end: 10 };

        assert!(!lifetime.is_alive_at_step(4));
        assert!(lifetime.is_alive_at_step(5));
        assert!(lifetime.is_alive_at_step(7));
        assert!(lifetime.is_alive_at_step(9));
        assert!(!lifetime.is_alive_at_step(10));
    }

    #[test]
    fn test_empty_lifetime() {
        let lifetime = Lifetime { start: 5, end: 5 };
        assert!(lifetime.is_empty());
        assert_eq!(lifetime.len(), 0);
    }

    #[test]
    fn test_node_mem_req_basic() {
        let outlet_id = OutletId { node: 1, slot: 0 };
        let req = NodeMemReq {
            outlet_id,
            lifetime: Lifetime { start: 0, end: 5 },
            mem_size: 1000.into(),
        };

        assert_eq!(req.outlet_id.node, 1);
        assert_eq!(req.lifetime.start, 0);
        assert_eq!(req.lifetime.end, 5);
        assert_eq!(req.mem_size.to_i64().unwrap(), 1000);
    }

    #[test]
    fn test_partition_has_no_conflict() {
        let outlet_id = OutletId { node: 1, slot: 0 };
        let node1 = NodeMemReq {
            outlet_id,
            lifetime: Lifetime { start: 0, end: 5 },
            mem_size: 1000.into(),
        };

        let partition = Partition { nodes: vec![node1] };

        assert!(partition.has_no_conflict_with_lifetime(&Lifetime { start: 5, end: 10 }));
        assert!(!partition.has_no_conflict_with_lifetime(&Lifetime { start: 3, end: 7 }));
    }

    #[test]
    fn test_partition_find_node() {
        let outlet_id = OutletId { node: 1, slot: 0 };
        let node1 = NodeMemReq {
            outlet_id,
            lifetime: Lifetime { start: 0, end: 5 },
            mem_size: 1000.into(),
        };

        let outlet_id = OutletId { node: 2, slot: 0 };
        let node2 = NodeMemReq {
            outlet_id,
            lifetime: Lifetime { start: 5, end: 10 },
            mem_size: 2000.into(),
        };

        let partition = Partition { nodes: vec![node1.clone(), node2.clone()] };

        assert_eq!(partition.find_node_alive_at_step(3), Some(&node1));
        assert_eq!(partition.find_node_alive_at_step(7), Some(&node2));
        assert_eq!(partition.find_node_alive_at_step(10), None);
    }
}


================================================
FILE: gpu/src/ops/RECIPE.md
================================================
# Recipe: factorizing a GPU op between CUDA and Metal

This describes the pattern used to factorize the Reduce op. Follow the same
steps for other ops (binary, softmax, etc.).

## Before

Each backend had its own copy of:
- The op enum (e.g. `Reducer` in both `cuda/src/kernels/nn/reduce.rs` and `metal/src/kernels/nn/reduce.rs`)
- The op wrapper struct (e.g. `CudaReduce` / `MetalReduce`) with identical Op/EvalOp/TypedOp impls
- A `from_tract_core()` that maps core ops to the backend op

The only real differences were:
- How to launch the kernel (cudarc vs Metal command buffers)

## After

### 1. Shared enum in `gpu/src/ops/`

Move the op enum (e.g. `Reducer`) into `gpu/src/ops/reduce.rs` with:
- Variant definitions
- `Display` impl (kernel name fragment — e.g. "sum", "prod", "mean_of_squares")
- `is_supported_dt()`, `is_logic()`, and other predicate methods
- `from_tract_core()` mapping from `core::ops::nn::Reducer`

### 2. Shared op struct with fn pointer for dispatch

`GpuReduce` in `gpu/src/ops/reduce.rs`:
- Stores: `axes`, `reducer`, `backend_name: &'static str`, `dispatch: DispatchReduceFn`
- `DispatchReduceFn = fn(&Reducer, &DeviceTensor, usize, &DeviceTensor) -> TractResult<()>`
- Implements `Op`, `EvalOp`, `TypedOp` once — shared across backends
- `eval_with_session` calls `(self.dispatch)(...)` directly
- `PartialEq`/`Hash` are manual impls that ignore the fn pointer (compare by axes + reducer + backend_name)

### 3. Stream access

The `gpu` crate has no stream concept. Each backend owns its stream in its
own thread-local:
- CUDA: `cuda/src/context.rs` has `CUDA_STREAM` TLS and `with_cuda_stream()`
- Metal: `metal/src/context.rs` has `METAL_STREAM` TLS and `with_metal_stream()`

Dispatch functions access the stream internally — they do not receive it
as a parameter.

### 4. Backend kernel launch function

In `cuda/src/kernels/nn/reduce.rs`, the launch function accesses the
stream via its own TLS:

```rust
pub fn cuda_reduce_launch(
    reducer: &Reducer,
    input: &DeviceTensor,
    axis: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_cuda_stream(|stream| {
        // ... kernel launch code
    })
}
```

### 5. Wiring in transform.rs

No per-op wrapper module needed. Transform calls `GpuReduce::from_tract_core`
directly with the backend's launch function:

```rust
// in can_translate_to_cuda_op:
GpuReduce::from_tract_core(op, "Cuda", cuda_reduce_launch)
    .is_ok_and(|op| op.reducer.is_supported_dt(input_dts[0]))

// in translate_node:
Box::new(GpuReduce::from_tract_core(op, "Cuda", cuda_reduce_launch)?)
```

## Checklist for the next op

1. Move enum + predicates + `from_tract_core` + `Display` to `gpu/src/ops/`
2. Create `GpuXxx` struct with fn pointer dispatch, impl Op/EvalOp/TypedOp
3. Define `DispatchXxxFn` type alias (no stream parameter)
4. In each backend kernel file: make launch fn access its own stream TLS internally
5. In each backend transform.rs: call `GpuXxx::from_tract_core(op, "Backend", launch_fn)`
6. Delete the backend `ops/xxx.rs` wrapper


================================================
FILE: gpu/src/ops/apply_rope.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;

pub type DispatchApplyRopeFn =
    fn(&DeviceTensor, &DeviceTensor, &DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuApplyRope {
    pub backend_name: &'static str,
    pub dispatch: DispatchApplyRopeFn,
}

impl std::fmt::Debug for GpuApplyRope {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}ApplyRope", self.backend_name)
    }
}

impl PartialEq for GpuApplyRope {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name
    }
}
impl Eq for GpuApplyRope {}

impl std::hash::Hash for GpuApplyRope {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
    }
}

impl Op for GpuApplyRope {
    fn name(&self) -> StaticName {
        format!("{}ApplyRope", self.backend_name).into()
    }
    op_as_typed_op!();
}

impl EvalOp for GpuApplyRope {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (input_val, cos_val, sin_val) = args_3!(inputs);
        let input = input_val.to_device_tensor()?;
        let cos = cos_val.to_device_tensor()?;
        let sin = sin_val.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(input, cos, sin, &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuApplyRope {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }
    as_op!();
}


================================================
FILE: gpu/src/ops/binary.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;
use tract_core::ops::binary::BinMiniOp;

pub type DispatchBinOpFn =
    fn(&dyn BinMiniOp, &DeviceTensor, &DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuBinOp {
    pub mini_op: Box<dyn BinMiniOp>,
    pub backend_name: &'static str,
    pub dispatch: DispatchBinOpFn,
}

impl std::fmt::Debug for GpuBinOp {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "GpuBinOp({}{:?})", self.backend_name, self.mini_op)
    }
}

impl PartialEq for GpuBinOp {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.mini_op == other.mini_op
    }
}

impl Eq for GpuBinOp {}

impl std::hash::Hash for GpuBinOp {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.mini_op.name().hash(state);
    }
}

impl GpuBinOp {
    fn resolve_output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let (a, b) = (inputs[0], inputs[1]);
        if a.rank() != b.rank() {
            bail!(
                "Typed ops require rank match. Invalid inputs for {}: {{a: {:?}, b: {:?}}}",
                self.name(),
                a.shape,
                b.shape
            );
        }
        let out_shape = tract_core::broadcast::multi_broadcast(&[&a.shape, &b.shape])
            .with_context(|| format!("Error while broadcasting {:?} {:?}", a.shape, b.shape))?;
        let out_dt = self.mini_op.result_datum_type(a.datum_type, b.datum_type)?;
        Ok(tvec!(out_dt.fact(out_shape)))
    }
}

impl Op for GpuBinOp {
    fn name(&self) -> StaticName {
        format!("{}{}", self.backend_name, self.mini_op.name()).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuBinOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (a_val, b_val) = args_2!(inputs);
        let a = a_val.to_device_tensor()?;
        let b = b_val.to_device_tensor()?;
        let out_shape = tract_core::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
        let out_dt = self.mini_op.result_datum_type(a.datum_type(), b.datum_type())?;
        let output =
            crate::session_handler::make_tensor_for_node(session, node_id, out_dt, &out_shape)?;
        if a.len() > 0 && b.len() > 0 {
            (self.dispatch)(&*self.mini_op, a, b, &output)
                .with_context(|| format!("Error while dispatching eval for {}", self.name()))?;
        }
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuBinOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| self.resolve_output_facts(facts))
            .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/broadcast.rs
================================================
use crate::tensor::DeviceTensorExt;
use crate::utils::compute_broadcast_strides;
use tract_core::internal::*;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct GpuMultiBroadcastTo {
    pub shape: ShapeFact,
}

impl GpuMultiBroadcastTo {
    pub fn new(shape: ShapeFact) -> Self {
        Self { shape }
    }
}

impl Op for GpuMultiBroadcastTo {
    fn name(&self) -> StaticName {
        "GpuMultiBroadcastTo".into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuMultiBroadcastTo {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let shape = self.shape.eval_to_usize(&session.resolved_symbols)?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            &shape,
        )?;

        // Pad input shape/strides to output rank for broadcasting
        let mut input_strides = vec![input.strides()[0]; output.rank() - input.rank()];
        input_strides.extend(input.strides());
        let mut input_shape = vec![1usize; output.rank() - input.rank()];
        input_shape.extend(input.shape());
        let broadcast_strides: TVec<isize> =
            compute_broadcast_strides(&input_shape, &input_strides)?;

        let ctx = crate::device::get_context()?;
        ctx.copy_nd(input, 0, &broadcast_strides, &output, 0, output.shape(), output.strides())?;
        Ok(tvec![output.into_tensor().into_tvalue()])
    }
}

impl TypedOp for GpuMultiBroadcastTo {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let mut fact = facts[0].datum_type.fact(self.shape.clone());
            fact.uniform.clone_from(&inputs[0].uniform);
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/cast.rs
================================================
use crate::tensor::DeviceTensorExt;
use tract_core::internal::*;

use crate::tensor::DeviceTensor;

pub type DispatchCastFn = fn(&DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone)]
pub struct GpuCast {
    pub to: DatumType,
    pub backend_name: &'static str,
    pub dispatch: DispatchCastFn,
    pub is_supported_dt: fn(DatumType) -> bool,
}

impl GpuCast {
    pub fn new(
        to: DatumType,
        backend_name: &'static str,
        dispatch: DispatchCastFn,
        is_supported_dt: fn(DatumType) -> bool,
    ) -> Option<Self> {
        is_supported_dt(to).then_some(Self { to, backend_name, dispatch, is_supported_dt })
    }

    pub fn is_supported_dt(&self, dt: DatumType) -> bool {
        (self.is_supported_dt)(dt)
    }
}

impl std::fmt::Debug for GpuCast {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}Cast({:?})", self.backend_name, self.to)
    }
}

impl PartialEq for GpuCast {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.to == other.to
    }
}

impl Eq for GpuCast {}

impl std::hash::Hash for GpuCast {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.to.hash(state);
    }
}

impl Op for GpuCast {
    fn name(&self) -> StaticName {
        format!("{}Cast", self.backend_name).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuCast {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        if input.datum_type() == self.to {
            Ok(tvec!(input_value))
        } else {
            let output = crate::session_handler::make_tensor_for_node(
                session,
                node_id,
                self.to,
                input.shape(),
            )?;
            (self.dispatch)(input, &output)?;
            Ok(tvec![output.into_tensor().into_tvalue()])
        }
    }
}

impl TypedOp for GpuCast {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            Ok(tvec!(self.to.fact(facts[0].shape.clone())))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/change_axes.rs
================================================
use crate::tensor::DeviceTensorExt;
use tract_core::internal::*;
use tract_itertools::Itertools;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct GpuAxisOp {
    pub inner: AxisOp,
}

impl GpuAxisOp {
    pub fn new(inner: AxisOp) -> Self {
        Self { inner }
    }

    pub fn simplify_axis_op(op: AxisOp, dims: &[TDim]) -> Self {
        let inner = match op {
            AxisOp::Move(from, to) if from.abs_diff(to) == 1 => {
                if [&dims[from], &dims[to]].contains(&&1usize.into()) {
                    if from < to {
                        AxisOp::Reshape(
                            from,
                            tvec![dims[from].clone(), dims[to].clone()],
                            tvec![dims[to].clone(), dims[from].clone()],
                        )
                    } else {
                        AxisOp::Reshape(
                            to,
                            tvec![dims[to].clone(), dims[from].clone()],
                            tvec![dims[from].clone(), dims[to].clone()],
                        )
                    }
                } else {
                    op
                }
            }
            AxisOp::Move(from, to) if dims[from] == TDim::Val(1) => {
                let (start, end) = if from < to { (from, to) } else { (to, from) };
                let mut out_dims = dims[start..=end].to_vec();

                if from < to {
                    let tmp = out_dims.remove(0);
                    out_dims.push(tmp);
                } else {
                    let tmp = out_dims.pop().unwrap();
                    out_dims.insert(0, tmp);
                }

                AxisOp::Reshape(start, dims[start..=end].into(), out_dims.into())
            }
            _ => op,
        };
        Self { inner }
    }

    pub fn from_tract_core_with_fact(op: AxisOp, fact: &TypedFact) -> Self {
        let dims = fact.shape.dims();
        Self::simplify_axis_op(op, dims)
    }
}

impl Op for GpuAxisOp {
    fn name(&self) -> StaticName {
        format!("Gpu{}", self.inner.name()).into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.inner.info()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuAxisOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let tensor = args_1!(inputs).into_tensor();
        let input = tensor.to_device_tensor()?;
        let shape = input.shape();

        let simplified = Self::simplify_axis_op(
            self.inner.clone(),
            &shape.iter().map(|s| s.into()).collect_vec(),
        );

        let new_shape = match &simplified.inner {
            AxisOp::Move(from, to) => {
                let mut permutation: Vec<usize> = (0..input.rank()).collect();
                permutation.remove(*from);
                permutation.insert(*to, *from);

                let out_shape = permute_output_shape(input.shape(), &permutation)?;
                let output = crate::session_handler::make_tensor_for_node(
                    session,
                    node_id,
                    input.datum_type(),
                    &out_shape,
                )?;
                // Compute permuted input strides
                let permuted_strides: TVec<isize> =
                    permutation.iter().map(|&i| input.strides()[i]).collect();
                let ctx = crate::device::get_context()?;
                ctx.copy_nd(
                    input,
                    0,
                    &permuted_strides,
                    &output,
                    0,
                    output.shape(),
                    output.strides(),
                )?;
                return Ok(tvec!(output.into_tensor().into_tvalue()));
            }
            AxisOp::Reshape(skip, from, to) => {
                let from = from.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                let to = to.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                let mut shape: TVec<usize> = input.shape().into();
                AxisOp::Reshape(*skip, from, to).change_shape_array(&mut shape, false)?;
                shape
            }
            _ => {
                let mut shape: TVec<usize> = input.shape().into();
                self.inner.change_shape_array(&mut shape, false)?;
                shape
            }
        };

        // Memcpy path (Reshape/Add/Rm) — flat copy, treat as 1D
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            &new_shape,
        )?;
        let flat_len = input.len();
        let ctx = crate::device::get_context()?;
        ctx.copy_nd(input, 0, &[1], &output, 0, &[flat_len], &[1])?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuAxisOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| self.inner.output_facts(facts))
            .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        let ref_inputs = crate::utils::get_device_facts(inputs, |facts| Ok(facts.to_vec()))?;
        let ref_outputs = crate::utils::get_device_facts(outputs, |facts| Ok(facts.to_vec()))?;
        self.inner.axes_mapping(&ref_inputs, &ref_outputs)
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let inner = if let AxisOp::Reshape(axis, from, to) = &self.inner {
            AxisOp::Reshape(
                *axis,
                from.iter().map(|d| d.eval(values)).collect(),
                to.iter().map(|d| d.eval(values)).collect(),
            )
        } else {
            self.inner.clone()
        };
        let op = GpuAxisOp { inner };
        target.wire_node(&node.name, op, &[mapping[&node.inputs[0]]])
    }

    as_op!();
}

pub fn permute_output_shape(shape: &[usize], permutation: &[usize]) -> TractResult<TVec<usize>> {
    ensure!(shape.len() == permutation.len());
    Ok(permutation.iter().map(|&i| shape[i]).collect())
}


================================================
FILE: gpu/src/ops/concat.rs
================================================
use crate::tensor::DeviceTensorExt;
use tract_core::internal::*;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct GpuConcat {
    pub axis: usize,
}

impl GpuConcat {
    pub fn new(axis: usize) -> Self {
        Self { axis }
    }

    pub fn offsets(&self, inputs: &[&TypedFact]) -> TractResult<Vec<TDim>> {
        let mut offsets = vec![0.to_dim()];
        for slice in inputs {
            let len = slice.shape[self.axis].clone();
            let offset = len + offsets.last().unwrap();
            offsets.push(offset)
        }
        Ok(offsets)
    }
}

impl Op for GpuConcat {
    fn name(&self) -> StaticName {
        "GpuConcat".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {}", self.axis)])
    }

    op_as_typed_op!();
}

impl EvalOp for GpuConcat {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let inputs =
            inputs.iter().map(|it| it.to_device_tensor()).collect::<TractResult<TVec<_>>>()?;

        let mut output_shape = inputs[0].shape().to_vec();
        output_shape[self.axis] = inputs.iter().map(|it| it.shape()[self.axis]).sum();
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            inputs[0].datum_type(),
            &output_shape,
        )?;

        let ctx = crate::device::get_context()?;
        let mut cursor = 0usize;
        for input in &inputs {
            let slice_len = input.shape()[self.axis];
            if slice_len == 0 {
                continue;
            }
            // Build zone shape (same as input shape for this slice)
            let zone_shape = input.shape();
            // Output offset along concat axis
            let dst_offset =
                cursor * output.strides()[self.axis] as usize * output.datum_type().size_of();

            ctx.copy_nd(
                input,
                0,
                input.strides(),
                &output,
                dst_offset,
                zone_shape,
                output.strides(),
            )
            .with_context(|| {
                format!(
                    "Error in concat dispatch for slice at offset {} (shape {:?})",
                    cursor, zone_shape
                )
            })?;
            cursor += slice_len;
        }

        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuConcat {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let mut fact = facts[0].without_value();
            for input in facts {
                if input.rank() != fact.rank()
                    || input
                        .shape
                        .iter()
                        .zip(fact.shape.iter())
                        .enumerate()
                        .filter(|(ax, _)| *ax != self.axis)
                        .any(|(_, (i, f))| i != f)
                {
                    bail!("Inconsistent {:?} inputs: {:?}", self, facts);
                }
            }
            fact.shape.set(self.axis, self.offsets(facts)?.pop().unwrap());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/copy_based.rs
================================================
//! Translators for ops that only need the generic copy_nd dispatch.
//! These are fully backend-agnostic and can be constructed without
//! any backend-specific arguments.

use tract_core::internal::*;
use tract_core::ops::array::{MultiBroadcastTo, Slice, TypedConcat};
use tract_pulse_opl::ops::{Delay, PulsePad};
use tract_transformers::ops::dyn_kv_cache::DynKeyValueCache;

/// Try to translate a node into a copy-based GPU op.
/// Returns `Some(gpu_op)` if the node is one of the 7 copy-based ops.
pub fn try_make_copy_based_op(
    source: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<Box<dyn TypedOp>>> {
    if let Some(op) = node.op_as::<MultiBroadcastTo>() {
        return Ok(Some(Box::new(super::broadcast::GpuMultiBroadcastTo::new(op.shape.clone()))));
    }
    if let Some(op) = node.op_as::<AxisOp>() {
        let in_fact = source.node_input_facts(node.id)?[0];
        return Ok(Some(Box::new(super::change_axes::GpuAxisOp::from_tract_core_with_fact(
            op.clone(),
            in_fact,
        ))));
    }
    if let Some(op) = node.op_as::<Slice>() {
        return Ok(Some(Box::new(super::slice::GpuSlice::new(op.clone()))));
    }
    if let Some(op) = node.op_as::<TypedConcat>() {
        return Ok(Some(Box::new(super::concat::GpuConcat::new(op.axis))));
    }
    if let Some(op) = node.op_as::<DynKeyValueCache>() {
        return Ok(Some(Box::new(super::dyn_kv_cache::GpuDynKVCache::from_tract_transformers(op))));
    }
    if let Some(op) = node.op_as::<Delay>() {
        return Ok(Some(Box::new(super::pulse::GpuDelay::new(op))));
    }
    if let Some(op) = node.op_as::<PulsePad>() {
        return Ok(Some(Box::new(super::pulse::GpuPulsePad::new(op)?)));
    }
    Ok(None)
}


================================================
FILE: gpu/src/ops/dyn_kv_cache.rs
================================================
use crate::fact::DeviceTypedFactExt;
use crate::tensor::{DeviceTensor, DeviceTensorExt, IntoDevice};
use derive_new::new;
use tract_core::internal::*;
use tract_core::ops::OpStateFreeze;
use tract_transformers::ops::dyn_kv_cache::{DynKeyValueCache, DynKeyValueCacheState};

#[derive(Debug, Clone, new)]
pub struct GpuDynKVCacheState {
    node_id: usize,
    name: String,
    axis: usize,
    past_sequence_fact: TypedFact,
    kv_cache: Option<TValue>,
}

impl OpState for GpuDynKVCacheState {
    fn load_from(
        &mut self,
        state: &mut TurnState,
        states: &mut dyn Iterator<Item = TValue>,
    ) -> TractResult<()> {
        let kv_cache = states.next().context("Not enough state initializers")?;
        DynKeyValueCacheState::resolve_symbols(
            state,
            self.past_sequence_fact.clone(),
            Some(kv_cache.shape()),
        )?;
        self.kv_cache = Some(kv_cache.into_tensor().into_device()?.into_tensor().into_tvalue());
        Ok(())
    }

    fn save_to(&self, states: &mut Vec<TValue>) -> TractResult<()> {
        if let Some(kv_cache) = &self.kv_cache {
            states.push(kv_cache.to_device_tensor()?.to_host()?.into_tensor().into_tvalue());
            Ok(())
        } else {
            bail!("KV cache {} was never initialized", self.name)
        }
    }

    fn init_tensor_fact(&self) -> Option<(String, TypedFact)> {
        Some((self.name.clone(), self.past_sequence_fact.clone()))
    }

    fn resolve_symbols(&mut self, state: &mut TurnState) -> TractResult<()> {
        let shape = self
            .kv_cache
            .as_ref()
            .map(|kv_cache| kv_cache.to_device_tensor().expect("Expected GPU Tensor").shape());
        DynKeyValueCacheState::resolve_symbols(state, self.past_sequence_fact.clone(), shape)
    }

    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        ensure!(inputs.len() == 1);
        let mut op_inputs = TVec::new();

        if let Some(kv_cache) = self.kv_cache.take() {
            op_inputs.push(kv_cache);
        }

        op_inputs.push(inputs.into_iter().next().unwrap());

        let gpu_op =
            op.downcast_ref::<GpuDynKVCache>().ok_or_else(|| format_err!("Wrong Op type"))?;
        let axis = gpu_op.axis;

        let inputs =
            op_inputs.iter().map(|it| it.to_device_tensor()).collect::<TractResult<TVec<_>>>()?;
        let mut output_shape = inputs[0].shape().to_vec();
        output_shape[axis] = inputs.iter().map(|it| it.shape()[axis]).sum();
        let output = crate::session_handler::make_tensor_for_node(
            session,
            self.node_id,
            inputs[0].datum_type(),
            &output_shape,
        )?;

        // Concat inputs into output
        let ctx = crate::device::get_context()?;
        let mut cursor = 0usize;
        for input in &inputs {
            let slice_len = input.shape()[axis];
            if slice_len == 0 {
                continue;
            }
            let dst_offset =
                cursor * output.strides()[axis] as usize * output.datum_type().size_of();
            ctx.copy_nd(
                input,
                0,
                input.strides(),
                &output,
                dst_offset,
                input.shape(),
                output.strides(),
            )?;
            cursor += slice_len;
        }

        let res = output.into_tensor().into_tvalue();
        self.kv_cache = Some(res.clone());
        Ok(tvec!(res))
    }
}

impl GpuDynKVCacheState {
    pub fn truncate(&mut self, len: usize) -> TractResult<()> {
        if let Some(v) = &mut self.kv_cache {
            let mut t: Tensor = v.to_device_tensor()?.to_host()?.into_tensor();
            t = t.slice(self.axis, 0, len)?;
            *v = t.into_device()?.into_tensor().into_tvalue();
        }
        Ok(())
    }
}

#[derive(Debug, Clone)]
pub struct FrozenGpuDynKVCacheState {
    node_id: usize,
    name: String,
    axis: usize,
    past_sequence_fact: TypedFact,
    kv_cache: Option<DeviceTensor>,
}

impl OpStateFreeze for GpuDynKVCacheState {
    fn freeze(&self) -> Box<dyn FrozenOpState + 'static> {
        Box::new(FrozenGpuDynKVCacheState {
            node_id: self.node_id,
            name: self.name.clone(),
            axis: self.axis,
            past_sequence_fact: self.past_sequence_fact.clone(),
            kv_cache: self.kv_cache.clone().map(|t| t.to_device_tensor().cloned().unwrap()),
        })
    }
}

impl FrozenOpState for FrozenGpuDynKVCacheState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(GpuDynKVCacheState {
            node_id: self.node_id,
            name: self.name.clone(),
            axis: self.axis,
            past_sequence_fact: self.past_sequence_fact.clone(),
            kv_cache: self.kv_cache.clone().map(|t| t.into_tensor().into_tvalue()),
        })
    }
}

#[derive(Clone)]
pub struct GpuDynKVCache {
    pub name: String,
    pub past_sequence_fact: TypedFact,
    pub input_sequence_fact: TypedFact,
    pub axis: usize,
}

impl GpuDynKVCache {
    pub fn from_tract_transformers(op: &DynKeyValueCache) -> Self {
        Self {
            name: op.name.clone(),
            axis: op.axis,
            past_sequence_fact: op.past_sequence_fact.clone(),
            input_sequence_fact: op.input_sequence_fact.clone(),
        }
    }
}

impl std::fmt::Debug for GpuDynKVCache {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "GpuDynKVCache({}, axis={})", self.name, self.axis)
    }
}

impl PartialEq for GpuDynKVCache {
    fn eq(&self, other: &Self) -> bool {
        self.name == other.name
            && self.axis == other.axis
            && self.past_sequence_fact == other.past_sequence_fact
            && self.input_sequence_fact == other.input_sequence_fact
    }
}

impl Eq for GpuDynKVCache {}

impl std::hash::Hash for GpuDynKVCache {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.name.hash(state);
        self.axis.hash(state);
    }
}

impl Op for GpuDynKVCache {
    fn name(&self) -> StaticName {
        "GpuDynKVCache".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {}", self.axis)])
    }

    op_as_typed_op!();
}

impl EvalOp for GpuDynKVCache {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(GpuDynKVCacheState::new(
            node_id,
            self.name.clone(),
            self.axis,
            self.past_sequence_fact.clone(),
            None,
        ))))
    }
}

impl TypedOp for GpuDynKVCache {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 1);
        let mut facts = crate::utils::facts_to_device_facts(inputs, |facts| {
            let mut fact = facts[0].without_value();
            fact.shape.set(
                self.axis,
                self.past_sequence_fact.shape.dims()[self.axis].clone()
                    + self.input_sequence_fact.shape.dims()[self.axis].clone(),
            );
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))?;
        facts[0].as_device_fact_mut().unwrap().state_owned = true;
        Ok(facts)
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/element_wise.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;
use tract_core::ops::element_wise::ElementWiseMiniOp;

pub type DispatchElementWiseFn =
    fn(&dyn ElementWiseMiniOp, &DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuElementWise {
    pub mini_op: Box<dyn ElementWiseMiniOp>,
    pub backend_name: &'static str,
    pub dispatch: DispatchElementWiseFn,
}

impl std::fmt::Debug for GpuElementWise {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "GpuElementWise({}{:?})", self.backend_name, self.mini_op)
    }
}

impl PartialEq for GpuElementWise {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.mini_op == other.mini_op
    }
}

impl Eq for GpuElementWise {}

impl std::hash::Hash for GpuElementWise {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.mini_op.name().hash(state);
    }
}

impl Op for GpuElementWise {
    fn name(&self) -> StaticName {
        format!("{}{}", self.backend_name, self.mini_op.name()).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuElementWise {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(&*self.mini_op, input, &output)
            .with_context(|| format!("Error while dispatching eval for {}", self.name()))?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuElementWise {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/gelu_approximate.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;

pub type DispatchGeluApproximateFn = fn(bool, &DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuGeluApproximate {
    pub fast_impl: bool,
    pub backend_name: &'static str,
    pub dispatch: DispatchGeluApproximateFn,
}

impl std::fmt::Debug for GpuGeluApproximate {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}GeluApproximate(fast_impl: {})", self.backend_name, self.fast_impl)
    }
}

impl PartialEq for GpuGeluApproximate {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.fast_impl == other.fast_impl
    }
}

impl Eq for GpuGeluApproximate {}

impl std::hash::Hash for GpuGeluApproximate {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.fast_impl.hash(state);
    }
}

impl Op for GpuGeluApproximate {
    fn name(&self) -> StaticName {
        format!("{}GeluApproximate", self.backend_name).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuGeluApproximate {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(self.fast_impl, input, &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuGeluApproximate {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/iff.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::broadcast::multi_broadcast;
use tract_core::internal::*;

static IFF_MAX_RANK: usize = 5;

/// Dispatch function for the iff (select) kernel.
/// Args: cond, then, else tensors with pre-computed broadcast strides,
/// output tensor, output shape and strides. All strides are padded to IFF_MAX_RANK.
pub type DispatchIffFn = fn(
    cond: &DeviceTensor,
    then_value: &DeviceTensor,
    else_value: &DeviceTensor,
    cond_strides: &[isize],
    then_strides: &[isize],
    else_strides: &[isize],
    output: &DeviceTensor,
    output_shape: &[usize],
    output_strides: &[isize],
) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuIff {
    pub backend_name: &'static str,
    pub dispatch: DispatchIffFn,
}

impl std::fmt::Debug for GpuIff {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}Iff", self.backend_name)
    }
}

impl PartialEq for GpuIff {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name
    }
}

impl Eq for GpuIff {}

impl std::hash::Hash for GpuIff {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
    }
}

impl Op for GpuIff {
    fn name(&self) -> StaticName {
        format!("{}Iff", self.backend_name).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuIff {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (cond_val, then_val, else_val) = args_3!(inputs);

        let cond = cond_val.to_device_tensor()?;
        let then_t = then_val.to_device_tensor()?;
        let else_t = else_val.to_device_tensor()?;
        ensure!(cond.rank() == then_t.rank());
        ensure!(cond.rank() == else_t.rank());
        ensure!(then_t.datum_type() == else_t.datum_type());

        let out_shape = multi_broadcast(&[cond.shape(), then_t.shape(), else_t.shape()])
            .context("No broadcasting solution found")?;
        let out_dt = then_t.datum_type();
        let output =
            crate::session_handler::make_tensor_for_node(session, node_id, out_dt, &out_shape)?;

        if output.len() > 0 {
            let rank = cond.rank();
            ensure!(rank <= IFF_MAX_RANK);
            let rank_pad = IFF_MAX_RANK - rank;

            let mut padded_cond_strides = [0isize; IFF_MAX_RANK];
            let mut padded_then_strides = [0isize; IFF_MAX_RANK];
            let mut padded_else_strides = [0isize; IFF_MAX_RANK];
            let mut padded_out_shape = [1usize; IFF_MAX_RANK];
            let mut padded_out_strides = [0isize; IFF_MAX_RANK];

            for axis in 0..rank {
                padded_out_shape[rank_pad + axis] = output.shape()[axis];
                padded_out_strides[rank_pad + axis] = output.strides()[axis];
                padded_cond_strides[rank_pad + axis] = if cond.shape()[axis] < output.shape()[axis]
                {
                    0
                } else {
                    cond.strides()[axis]
                };
                padded_then_strides[rank_pad + axis] =
                    if then_t.shape()[axis] < output.shape()[axis] {
                        0
                    } else {
                        then_t.strides()[axis]
                    };
                padded_else_strides[rank_pad + axis] =
                    if else_t.shape()[axis] < output.shape()[axis] {
                        0
                    } else {
                        else_t.strides()[axis]
                    };
            }

            (self.dispatch)(
                cond,
                then_t,
                else_t,
                &padded_cond_strides,
                &padded_then_strides,
                &padded_else_strides,
                &output,
                &padded_out_shape,
                &padded_out_strides,
            )
            .with_context(|| "Error while dispatching eval for Iff")?;
        }
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuIff {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |inputs| {
            let out_shape =
                multi_broadcast(&[&*inputs[0].shape, &*inputs[1].shape, &*inputs[2].shape])
                    .context("No broadcasting solution found")?;
            let out_dt = inputs[1].datum_type;
            Ok(tvec!(out_dt.fact(out_shape)))
        })
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/leaky_relu.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;

pub type DispatchLeakyReluFn = fn(f32, &DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuLeakyRelu {
    pub alpha: f32,
    pub backend_name: &'static str,
    pub dispatch: DispatchLeakyReluFn,
}

impl std::fmt::Debug for GpuLeakyRelu {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}LeakyRelu(alpha: {})", self.backend_name, self.alpha)
    }
}

impl PartialEq for GpuLeakyRelu {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.alpha == other.alpha
    }
}

impl Eq for GpuLeakyRelu {}

impl std::hash::Hash for GpuLeakyRelu {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.alpha.to_bits().hash(state);
    }
}

impl Op for GpuLeakyRelu {
    fn name(&self) -> StaticName {
        format!("{}LeakyRelu", self.backend_name).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuLeakyRelu {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(self.alpha, input, &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuLeakyRelu {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/mod.rs
================================================
pub mod apply_rope;
pub mod binary;
pub mod broadcast;
pub mod cast;
pub mod change_axes;
pub mod concat;
pub mod copy_based;
pub mod dyn_kv_cache;
pub mod element_wise;
pub mod gelu_approximate;
pub mod iff;
pub mod leaky_relu;
pub mod pulse;
pub mod reduce;
pub mod rms_norm;
pub mod rotate_half;
pub mod scaled_masked_softmax;
pub mod slice;
pub mod softmax;


================================================
FILE: gpu/src/ops/pulse.rs
================================================
#![allow(unpredictable_function_pointer_comparisons)]
use crate::device::{DeviceContext, get_context};
use crate::session_handler::make_tensor_for_node;
use crate::tensor::{DeviceTensor, DeviceTensorExt, IntoDevice};
use std::ops::Range;
use tract_core::internal::*;
use tract_core::ops::array::PadMode;
use tract_core::trivial_op_state_freeze;
use tract_pulse_opl::ops::{Delay, PulsePad};

// ─── GpuDelay ────────────────────────────────────────────────────────────────

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GpuDelay {
    pub inner: Delay,
}

impl GpuDelay {
    pub fn new(inner: &Delay) -> Self {
        Self { inner: inner.clone() }
    }
}

impl Op for GpuDelay {
    fn name(&self) -> StaticName {
        "GpuDelay".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.inner.info()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuDelay {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(GpuDelayState { node_id, buffer: None })))
    }
}

impl TypedOp for GpuDelay {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| self.inner.output_facts(facts))
            .with_context(|| format!("Error while computing output facts for {}", self.name()))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        crate::utils::get_device_facts(inputs, |facts| self.inner.cost(facts))
    }

    as_op!();
}

#[derive(Debug, Clone)]
pub struct GpuDelayState {
    pub node_id: usize,
    pub buffer: Option<DeviceTensor>,
}

impl GpuDelayState {
    unsafe fn apply_delay_unchecked(
        &mut self,
        ctx: &dyn DeviceContext,
        op: &Delay,
        input: &DeviceTensor,
        output: &mut DeviceTensor,
    ) -> TractResult<()> {
        let buffered = op.delay + op.overlap;
        let input_pulse = input.shape()[op.axis];
        let output_pulse = input_pulse + op.overlap;
        let buffer = self.buffer.as_mut().unwrap();

        let from_input = input_pulse.saturating_sub(op.delay);
        let from_buffer = output_pulse.saturating_sub(from_input);

        // Copy from buffer to output
        ctx.assign_slice(output, 0..from_buffer, buffer, 0..from_buffer, op.axis)?;
        // Copy from input to output
        ctx.assign_slice(output, from_buffer..output_pulse, input, 0..from_input, op.axis)?;

        // Maintain buffer
        if buffered < input_pulse {
            ctx.assign_slice(
                buffer,
                0..buffered,
                input,
                (input_pulse - buffered)..input_pulse,
                op.axis,
            )?;
        } else {
            // Shift buffer left by input_pulse elements
            let dt = input.datum_type();
            let shift_bytes = buffer.strides()[op.axis] as usize * dt.size_of() * input_pulse;
            let remaining = buffer.len() * dt.size_of() - shift_bytes;
            ctx.flat_copy(buffer, shift_bytes, buffer, 0, remaining)?;
            // Copy input to end of buffer
            ctx.assign_slice(
                buffer,
                (buffered - input_pulse)..buffered,
                input,
                0..input_pulse,
                op.axis,
            )?;
        }
        Ok(())
    }
}

impl OpState for GpuDelayState {
    fn eval(
        &mut self,
        state: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let op = &op.downcast_ref::<GpuDelay>().ok_or_else(|| format_err!("Wrong Op type"))?.inner;
        let buffered = op.delay + op.overlap;
        let device_input = input.as_device_tensor().context("Expected a GPU tensor")?;
        let input_pulse = device_input.shape()[op.axis];
        let output_pulse = input_pulse + op.overlap;
        let mut output_shape: TVec<usize> = device_input.shape().into();
        output_shape[op.axis] = output_pulse;
        let dt = device_input.datum_type();
        let ctx = get_context()?;
        unsafe {
            if self.buffer.is_none() {
                let mut shape = device_input.shape().to_owned();
                shape[op.axis] = buffered;
                self.buffer = Some(DeviceTensor::uninitialized_dt(dt, &shape)?);
            };
            let mut output = make_tensor_for_node(state, self.node_id, dt, &output_shape)?;
            self.apply_delay_unchecked(&*ctx, op, device_input, &mut output)?;
            Ok(tvec!(output.into_tensor().into()))
        }
    }
}

trivial_op_state_freeze!(GpuDelayState);

// ─── GpuPulsePad ─────────────────────────────────────────────────────────────

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GpuPulsePad {
    pub op: PulsePad,
    pub device_cst: Option<DeviceTensor>,
}

impl GpuPulsePad {
    pub fn new(op: &PulsePad) -> TractResult<Self> {
        let device_cst =
            if let PadMode::Constant(c) = &op.mode { Some(c.clone().into_device()?) } else { None };
        Ok(Self { op: op.clone(), device_cst })
    }
}

impl std::hash::Hash for GpuPulsePad {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.op.hash(state);
    }
}

impl Op for GpuPulsePad {
    fn name(&self) -> StaticName {
        "GpuPulsePad".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.op.info()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuPulsePad {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(GpuPulsePadState { node_id, current_pos: 0, last_valid_frame: None })))
    }
}

impl TypedOp for GpuPulsePad {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| self.op.output_facts(facts))
            .with_context(|| format!("Error while computing output facts for {}", self.name()))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        crate::utils::get_device_facts(inputs, |facts| self.op.cost(facts))
    }

    as_op!();
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct GpuPulsePadState {
    node_id: usize,
    current_pos: usize,
    last_valid_frame: Option<DeviceTensor>,
}

fn fill_slice_constant(
    ctx: &dyn DeviceContext,
    dst: &mut DeviceTensor,
    cst: &DeviceTensor,
    axis: usize,
    range: Range<usize>,
) -> TractResult<()> {
    let mut zone_shape: TVec<usize> = dst.shape().into();
    zone_shape[axis] = range.len();
    let mut dst_origin = tvec!(0; dst.rank());
    dst_origin[axis] = range.start;
    ctx.copy_with_origins(
        &zone_shape,
        dst,
        &dst_origin,
        dst.strides(),
        cst,
        &tvec!(0; dst.rank()),
        &tvec!(0; dst.rank()),
    )
}

fn fill_slice_repeating_one_frame(
    ctx: &dyn DeviceContext,
    dst: &mut DeviceTensor,
    src: &DeviceTensor,
    axis: usize,
    dst_range: Range<usize>,
    src_frame: usize,
) -> TractResult<()> {
    let mut zone_shape: TVec<usize> = dst.shape().into();
    zone_shape[axis] = dst_range.len();
    let mut dst_origin = tvec!(0; dst.rank());
    dst_origin[axis] = dst_range.start;
    let mut src_origin = tvec!(0; src.rank());
    src_origin[axis] = src_frame;
    let mut src_strides: TVec<isize> = src.strides().into();
    src_strides[axis] = 0;
    ctx.copy_with_origins(
        &zone_shape,
        dst,
        &dst_origin,
        dst.strides(),
        src,
        &src_origin,
        &src_strides,
    )
}

impl GpuPulsePadState {
    fn save_frame(
        &mut self,
        ctx: &dyn DeviceContext,
        op: &PulsePad,
        input: &DeviceTensor,
        frame: usize,
    ) -> TractResult<()> {
        let mut frame_shape: TVec<usize> = input.shape().into();
        frame_shape[op.axis] = 1;
        let last_valid_frame = DeviceTensor::uninitialized_dt(input.datum_type(), &frame_shape)?;
        ctx.assign_slice(&last_valid_frame, 0..1, input, frame..frame + 1, op.axis)?;
        self.last_valid_frame = Some(last_valid_frame);
        Ok(())
    }

    fn pad(
        &mut self,
        session: &TurnState,
        gpu_op: &GpuPulsePad,
        input: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let ctx = get_context()?;
        let op = &gpu_op.op;
        let pulse = input.shape()[op.axis];
        let pulse_begin = self.current_pos;
        let pulse_end = self.current_pos + pulse;
        self.current_pos += pulse - op.overlap;
        let end_input =
            op.end_input.eval(&session.resolved_symbols).to_usize().unwrap_or(usize::MAX);
        let after = op.after.eval(&session.resolved_symbols).to_usize().unwrap_or(usize::MAX);

        if let PadMode::Edge = op.mode
            && after != 0
            && pulse_begin < end_input
        {
            let latest_valid_frame = (end_input - pulse_begin).min(pulse) - 1;
            self.save_frame(&*ctx, op, input, latest_valid_frame)?;
        }

        // Start with a copy of input
        let mut output =
            make_tensor_for_node(session, self.node_id, input.datum_type(), input.shape())?;
        let flat_len = input.len() * input.datum_type().size_of();
        ctx.flat_copy(input, 0, &output, 0, flat_len)?;

        // Quick return if entirely in valid or invalid range
        if (pulse_begin >= op.begin_input && pulse_end <= end_input)
            || (pulse_end <= op.begin_input - op.before
                || pulse_begin >= end_input.saturating_add(after))
        {
            return Ok(output);
        }

        if pulse_begin < op.begin_input {
            let fill_up_to = (op.begin_input - pulse_begin).min(pulse);
            match &op.mode {
                PadMode::Constant(_) => fill_slice_constant(
                    &*ctx,
                    &mut output,
                    gpu_op.device_cst.as_ref().unwrap(),
                    op.axis,
                    0..fill_up_to,
                )?,
                PadMode::Edge => fill_slice_repeating_one_frame(
                    &*ctx,
                    &mut output,
                    input,
                    op.axis,
                    0..fill_up_to,
                    fill_up_to,
                )?,
                _ => unimplemented!(),
            }
        }

        if pulse_end > end_input {
            let fill_from = pulse - (pulse_end - end_input).min(pulse);
            match &op.mode {
                PadMode::Constant(_) => fill_slice_constant(
                    &*ctx,
                    &mut output,
                    gpu_op.device_cst.as_ref().unwrap(),
                    op.axis,
                    fill_from..pulse,
                )?,
                PadMode::Edge => fill_slice_repeating_one_frame(
                    &*ctx,
                    &mut output,
                    self.last_valid_frame.as_ref().unwrap(),
                    op.axis,
                    fill_from..pulse,
                    0,
                )?,
                _ => unimplemented!(),
            }
        }
        Ok(output)
    }
}

impl OpState for GpuPulsePadState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let gpu_op =
            op.downcast_ref::<GpuPulsePad>().ok_or_else(|| format_err!("Wrong Op type"))?;
        let device_input = input.as_device_tensor().context("Expected a GPU tensor")?;
        let output = self.pad(session, gpu_op, device_input)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

trivial_op_state_freeze!(GpuPulsePadState);


================================================
FILE: gpu/src/ops/reduce.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use std::fmt;
use tract_core::internal::*;
use tract_core::ops::nn as core_ops_nn;
use tract_itertools::Itertools;

pub type DispatchReduceFn = fn(&Reducer, &DeviceTensor, usize, &DeviceTensor) -> TractResult<()>;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Reducer {
    MeanOfSquares,
    Sum,
    Prod,
    Min,
    Max,
    All,
    Any,
}

impl fmt::Display for Reducer {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::MeanOfSquares => write!(f, "mean_of_squares"),
            Self::Sum => write!(f, "sum"),
            Self::Prod => write!(f, "prod"),
            Self::Min => write!(f, "min"),
            Self::Max => write!(f, "max"),
            Self::All => write!(f, "all"),
            Self::Any => write!(f, "any"),
        }
    }
}

impl Reducer {
    pub const ALL: [Reducer; 7] =
        [Self::MeanOfSquares, Self::Sum, Self::Prod, Self::Min, Self::Max, Self::All, Self::Any];

    pub fn is_logic(&self) -> bool {
        *self == Reducer::All || *self == Reducer::Any
    }

    pub fn is_supported_dt(&self, dt: DatumType) -> bool {
        if self.is_logic() { dt.is::<bool>() } else { dt.is::<f32>() || dt.is::<f16>() }
    }

    pub fn from_tract_core(reducer: &core_ops_nn::Reducer) -> TractResult<Self> {
        match reducer {
            core_ops_nn::Reducer::Sum => Ok(Reducer::Sum),
            core_ops_nn::Reducer::MeanOfSquares => Ok(Reducer::MeanOfSquares),
            core_ops_nn::Reducer::Prod => Ok(Reducer::Prod),
            core_ops_nn::Reducer::Min => Ok(Reducer::Min),
            core_ops_nn::Reducer::Max => Ok(Reducer::Max),
            core_ops_nn::Reducer::All => Ok(Reducer::All),
            core_ops_nn::Reducer::Any => Ok(Reducer::Any),
            _ => bail!("Unsupported reducer {:?} on GPU", reducer),
        }
    }
}

#[derive(Clone, Debug)]
pub struct GpuReduce {
    pub axes: TVec<usize>,
    pub reducer: Reducer,
    pub backend_name: &'static str,
    pub dispatch: DispatchReduceFn,
}

impl PartialEq for GpuReduce {
    fn eq(&self, other: &Self) -> bool {
        self.axes == other.axes
            && self.reducer == other.reducer
            && self.backend_name == other.backend_name
    }
}

impl Eq for GpuReduce {}

impl std::hash::Hash for GpuReduce {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.axes.hash(state);
        self.reducer.hash(state);
        self.backend_name.hash(state);
    }
}

impl GpuReduce {
    pub fn new(
        axes: TVec<usize>,
        reducer: Reducer,
        backend_name: &'static str,
        dispatch: DispatchReduceFn,
    ) -> TractResult<Self> {
        ensure!(axes.len() == 1, "Only one axis of reduce is supported by {backend_name}Reduce");
        Ok(Self { axes, reducer, backend_name, dispatch })
    }

    pub fn from_tract_core(
        core_reduce: &core_ops_nn::Reduce,
        backend_name: &'static str,
        dispatch: DispatchReduceFn,
    ) -> TractResult<Self> {
        let reducer = Reducer::from_tract_core(&core_reduce.reducer)?;
        Self::new(core_reduce.axes.clone(), reducer, backend_name, dispatch)
    }
}

impl Op for GpuReduce {
    fn name(&self) -> StaticName {
        format!("{}Reduce<{:?}>", self.backend_name, self.reducer).into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axes: {:?}", self.axes)])
    }
    op_as_typed_op!();
}

impl EvalOp for GpuReduce {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let mut output_shape = input.shape().to_vec();
        output_shape[self.axes[0]] = 1;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            &output_shape,
        )?;
        (self.dispatch)(&self.reducer, input, self.axes[0], &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuReduce {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(self.axes.iter().tuple_windows().all(|(a, b)| a < b));
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let mut shape: TVec<_> = facts[0].shape.to_tvec();
            for &ax in &self.axes {
                shape[ax] = 1.to_dim();
            }
            let dt = facts[0].datum_type;
            Ok(tvec!(dt.fact(shape)))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/rms_norm.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use std::sync::Arc;
use tract_core::internal::*;

pub type DispatchRmsNormFn = fn(&DeviceTensor, usize, &Tensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuRmsNorm {
    pub axis: usize,
    pub eps: Arc<Tensor>,
    pub backend_name: &'static str,
    pub dispatch: DispatchRmsNormFn,
}

impl std::fmt::Debug for GpuRmsNorm {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}RmsNorm(axis: {:?}, eps: {:?})", self.backend_name, self.axis, self.eps)
    }
}

impl PartialEq for GpuRmsNorm {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.axis == other.axis && self.eps == other.eps
    }
}

impl Eq for GpuRmsNorm {}

impl std::hash::Hash for GpuRmsNorm {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.axis.hash(state);
        self.eps.hash(state);
    }
}

impl Op for GpuRmsNorm {
    fn name(&self) -> StaticName {
        format!("{}RmsNorm", self.backend_name).into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {:?}, eps: {:?}", self.axis, self.eps)])
    }

    op_as_typed_op!();
}

impl EvalOp for GpuRmsNorm {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(input, self.axis, &self.eps, &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuRmsNorm {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/rotate_half.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;

pub type DispatchRotateHalfFn = fn(&DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuRotateHalf {
    pub backend_name: &'static str,
    pub dispatch: DispatchRotateHalfFn,
}

impl std::fmt::Debug for GpuRotateHalf {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}RotateHalf", self.backend_name)
    }
}

impl PartialEq for GpuRotateHalf {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name
    }
}

impl Eq for GpuRotateHalf {}

impl std::hash::Hash for GpuRotateHalf {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
    }
}

impl Op for GpuRotateHalf {
    fn name(&self) -> StaticName {
        format!("{}RotateHalf", self.backend_name).into()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuRotateHalf {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(input, &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuRotateHalf {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/scaled_masked_softmax.rs
================================================
use crate::tensor::{DeviceTensor, DeviceTensorExt};
use derive_new::new;
use tract_core::internal::*;

/// A = SOFTMAX(INPUT * SCALE + MASK, AXIS=2)
/// Only input of rank of 3 is supported
pub type DispatchScaledMaskedSoftmaxFn =
    fn(&DeviceTensor, &Tensor, &DeviceTensor, &DeviceTensor) -> TractResult<()>;

#[derive(Clone, new)]
pub struct GpuScaledMaskedSoftmax {
    pub scale: Arc<Tensor>,
    pub backend_name: &'static str,
    pub dispatch: DispatchScaledMaskedSoftmaxFn,
}

impl std::fmt::Debug for GpuScaledMaskedSoftmax {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}ScaledMaskedSoftmax", self.backend_name)
    }
}

impl PartialEq for GpuScaledMaskedSoftmax {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.scale == other.scale
    }
}
impl Eq for GpuScaledMaskedSoftmax {}

impl std::hash::Hash for GpuScaledMaskedSoftmax {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.scale.hash(state);
    }
}

impl Op for GpuScaledMaskedSoftmax {
    fn name(&self) -> StaticName {
        format!("{}ScaledMaskedSoftmax", self.backend_name).into()
    }
    op_as_typed_op!();
}

impl EvalOp for GpuScaledMaskedSoftmax {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (input_val, mask_val) = args_2!(inputs);
        let input = input_val.to_device_tensor()?;
        let mask = mask_val.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(input, &self.scale, mask, &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuScaledMaskedSoftmax {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            ensure!(facts.len() == 2);
            let dt = facts[0].datum_type;
            ensure!(dt == facts[1].datum_type);
            ensure!(facts[0].rank() <= 5);
            ensure!(facts[0].rank() >= 2);
            ensure!(facts[0].rank() == facts[1].rank());
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }
    as_op!();
}


================================================
FILE: gpu/src/ops/slice.rs
================================================
use crate::tensor::DeviceTensorExt;
use crate::utils::compute_broadcast_strides;
use tract_core::internal::*;
use tract_core::ops::array::Slice;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct GpuSlice {
    pub inner: Slice,
}

impl GpuSlice {
    pub fn new(inner: Slice) -> Self {
        Self { inner }
    }
}

impl Op for GpuSlice {
    fn name(&self) -> StaticName {
        "GpuSlice".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.inner.info()
    }

    op_as_typed_op!();
}

impl EvalOp for GpuSlice {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;

        let start = self.inner.start.eval(&session.resolved_symbols).to_usize()?;
        let end = self.inner.end.eval(&session.resolved_symbols).to_usize()?;
        let axis = self.inner.axis;

        let input_shape = input.shape();
        let input_strides = input.strides();
        let input_dt = input.datum_type();

        ensure!(
            end <= input_shape[axis] && start <= end,
            "Invalid range {}..{} for slicing {:?} on axis {}",
            start,
            end,
            input,
            axis
        );

        let mut o_shape: TVec<usize> = input_shape.into();
        o_shape[axis] = end - start;

        let offset = (start * input_strides[axis] as usize) * input_dt.size_of();

        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            &o_shape,
        )?;

        if o_shape[axis] != 0 {
            // Slice uses same strides as input (broadcast strides with matching shapes)
            let broadcast_strides: TVec<isize> =
                compute_broadcast_strides(&o_shape, input_strides)?;
            let ctx = crate::device::get_context()?;
            ctx.copy_nd(
                input,
                offset,
                &broadcast_strides,
                &output,
                0,
                output.shape(),
                output.strides(),
            )?;
        }
        Ok(tvec![output.into_tensor().into_tvalue()])
    }
}

impl TypedOp for GpuSlice {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| self.inner.output_facts(facts))
            .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    fn concretize_dims(
        &self,
        _source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
        values: &SymbolValues,
    ) -> TractResult<TVec<OutletId>> {
        let op = GpuSlice {
            inner: Slice {
                axis: self.inner.axis,
                start: self.inner.start.eval(values),
                end: self.inner.end.eval(values),
            },
        };
        let inputs = node.inputs.iter().map(|i| mapping[i]).collect::<TVec<_>>();
        target.wire_node(&node.name, op, &inputs)
    }

    as_op!();
}


================================================
FILE: gpu/src/ops/softmax.rs
================================================
use crate::tensor::DeviceTensorExt;
use tract_core::internal::*;
use tract_core::ops::nn as core_ops_nn;

use crate::tensor::DeviceTensor;

pub type DispatchSoftmaxFn = fn(&DeviceTensor, usize, &DeviceTensor) -> TractResult<()>;

#[derive(Clone)]
pub struct GpuSoftmax {
    pub axes: TVec<usize>,
    pub backend_name: &'static str,
    pub dispatch: DispatchSoftmaxFn,
}

impl GpuSoftmax {
    pub fn new(
        axes: TVec<usize>,
        backend_name: &'static str,
        dispatch: DispatchSoftmaxFn,
    ) -> TractResult<Self> {
        ensure!(
            axes.len() == 1,
            "Only one axis of softmax is supported by {}Softmax",
            backend_name
        );
        Ok(Self { axes, backend_name, dispatch })
    }

    pub fn from_tract_core(
        core_softmax: &core_ops_nn::Softmax,
        backend_name: &'static str,
        dispatch: DispatchSoftmaxFn,
    ) -> TractResult<Self> {
        ensure!(core_softmax.quant_output_dt.is_none());
        Self::new(core_softmax.axes.clone(), backend_name, dispatch)
    }
}

impl std::fmt::Debug for GpuSoftmax {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{}Softmax(axes: {:?})", self.backend_name, self.axes)
    }
}

impl PartialEq for GpuSoftmax {
    fn eq(&self, other: &Self) -> bool {
        self.backend_name == other.backend_name && self.axes == other.axes
    }
}

impl Eq for GpuSoftmax {}

impl std::hash::Hash for GpuSoftmax {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.backend_name.hash(state);
        self.axes.hash(state);
    }
}

impl Op for GpuSoftmax {
    fn name(&self) -> StaticName {
        format!("{}Softmax", self.backend_name).into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axes: {:?}", self.axes)])
    }

    op_as_typed_op!();
}

impl EvalOp for GpuSoftmax {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input_value = args_1!(inputs);
        let input = input_value.to_device_tensor()?;
        let output = crate::session_handler::make_tensor_for_node(
            session,
            node_id,
            input.datum_type(),
            input.shape(),
        )?;
        (self.dispatch)(input, self.axes[0], &output)?;
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for GpuSoftmax {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        crate::utils::facts_to_device_facts(inputs, |facts| {
            let dt = facts[0].datum_type;
            let fact = dt.fact(facts[0].shape.clone());
            Ok(tvec!(fact))
        })
        .with_context(|| format!("Error while computing facts for {:?}", self.name()))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        let axes: Option<TVec<usize>> =
            self.axes.iter().map(|it| change.transform_axis(*it)).collect();
        if let Some(axes) = axes {
            Ok(Some(AxisChangeConsequence::new(
                model,
                node,
                Some(Box::new(GpuSoftmax {
                    axes,
                    backend_name: self.backend_name,
                    dispatch: self.dispatch,
                })),
                change,
            )))
        } else {
            Ok(None)
        }
    }

    as_op!();
}


================================================
FILE: gpu/src/rewrite_rules/mod.rs
================================================
pub mod rewire_sdpa;
pub mod rewire_syncs;
pub mod rms_norm;

#[macro_export]
macro_rules! rule_ensure {
    ($cond:expr) => {
        if !$cond {
            return Ok(None);
        }
    };
}


================================================
FILE: gpu/src/rewrite_rules/rewire_sdpa.rs
================================================
use tract_core::internal::*;
use tract_transformers::ops::sdpa::{Sdpa, SdpaMaskMode, wire_attention_mask};

pub fn rewire_sdpa(model: &mut TypedModel) -> TractResult<()> {
    Rewriter::default().with_rule_for("flatten-sdpa", rewire_sdpa_op).rewrite(&(), model)
}

pub fn rewire_sdpa_op(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _node_name: &str,
    op: &Sdpa,
) -> TractResult<Option<TypedModelPatch>> {
    op.patch_sdpa(model, node)
}

pub fn create_sdpa_mask_graph(
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Sdpa,
    mode: SdpaMaskMode,
) -> TractResult<Option<TypedModelPatch>> {
    let in_facts = model.node_input_facts(node.id)?;
    let q_shape = &in_facts[0].shape;
    let k_shape = &in_facts[1].shape;
    let rank = q_shape.len();
    ensure!(k_shape.len() == rank);

    let q_len = &q_shape[rank - 2];
    let k_len = &k_shape[rank - 2];

    let mut patch = TypedModelPatch::default();
    let mut inputs = patch.taps(model, &node.inputs)?;

    let mask =
        wire_attention_mask(&mut patch, &node.name, op.acc_datum_type, mode, rank, q_len, k_len)?;
    inputs.push(mask);

    let mut new_op = op.clone();
    new_op.is_causal = false;
    let new_sdpa = patch.wire_node(node_name, new_op, &inputs)?[0];
    patch.shunt_outside(model, node.id.into(), new_sdpa)?;

    Ok(Some(patch))
}

pub fn neutral_mask_for_full_attn(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Sdpa,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(!op.is_causal && node.inputs.len() == 3);
    create_sdpa_mask_graph(model, node, node_name, op, SdpaMaskMode::Neutral)
}

pub fn causal_mask_as_extern(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Sdpa,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(op.is_causal);
    create_sdpa_mask_graph(model, node, node_name, op, SdpaMaskMode::Causal)
}


================================================
FILE: gpu/src/rewrite_rules/rewire_syncs.rs
================================================
use crate::rule_ensure;
use crate::sync::{DeviceSync, DeviceSyncKind};
use crate::tensor::DeviceTensorExt;
use tract_core::internal::*;
use tract_core::ops::konst::Const;
use tract_core::tract_data::itertools::Itertools;
use tract_core::tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};

pub fn rewire_syncs(model: &mut TypedModel) -> TractResult<()> {
    Rewriter::default()
        .with_rule_for("remove-back-and-forth-sync", rewire_back_and_forth_sync)
        .with_rule_for("remove-sync-after-const", rewire_sync_after_const)
        .rewrite(&(), model)
}

pub fn rewire_back_and_forth_sync(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    _node_name: &str,
    op: &DeviceSync,
) -> TractResult<Option<TypedModelPatch>> {
    // Search pattern => ToHost => ToDevice
    rule_ensure!(op.kind == DeviceSyncKind::ToDevice);

    // Identify precessor ToHost
    let Some(sync_to_host_prec) = model.single_prec(node.id)? else {
        return Ok(None);
    };
    let Some(sync_to_host_prec_op) = sync_to_host_prec.op_as::<DeviceSync>() else {
        return Ok(None);
    };
    rule_ensure!(sync_to_host_prec_op.kind == DeviceSyncKind::ToHost);

    let patch =
        TypedModelPatch::rewire(model, &sync_to_host_prec.inputs, &[node.id.into()], &|_p, xs| {
            Ok(xs.into())
        })?;
    Ok(Some(patch))
}

pub fn rewire_sync_after_const(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Const,
) -> TractResult<Option<TypedModelPatch>> {
    // Search pattern => Const => ToHost

    let Some(device_const) = op.val().as_device_tensor() else {
        return Ok(None);
    };

    // Identify successors ToHost
    let Some(next_nodes) = model.all_succ(node.id)? else {
        return Ok(None);
    };

    let sync_to_hosts = next_nodes
        .into_iter()
        .filter(|n| n.op_as::<DeviceSync>().is_some_and(|sync| sync.kind == DeviceSyncKind::ToHost))
        .collect_vec();

    if sync_to_hosts.is_empty() {
        return Ok(None);
    };

    let host_const = device_const.to_host()?;
    let exotic_fact: Option<Box<dyn ExoticFact>> =
        host_const.storage_as::<BlockQuantStorage>().map(|bqs| {
            Box::new(BlockQuantFact::new(
                tract_core::dyn_clone::clone_box(bqs.format()),
                host_const.shape().into(),
            )) as Box<dyn ExoticFact>
        });

    let mut patch = TypedModelPatch::default();
    let out = patch.wire_node(
        node_name.to_string(),
        Const::new_with_opt_exotic_fact(host_const, exotic_fact)?,
        &[],
    )?;

    for sync_node in sync_to_hosts {
        patch.shunt_outside(model, sync_node.id.into(), out[0])?;
    }

    Ok(Some(patch))
}


================================================
FILE: gpu/src/rewrite_rules/rms_norm.rs
================================================
use tract_core::internal::*;
use tract_core::ops::cast::Cast;
use tract_transformers::ops::rms_norm::RmsNorm;

/// Search pattern => A = CAST(RMS_NORM(CAST(A, F32)), F16)
pub fn remove_rms_norm_cast(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &RmsNorm,
) -> TractResult<Option<TypedModelPatch>> {
    // Identify Cast from F16 To F32
    let Some(cast_in_node) = model
        .single_prec(node.id)?
        .and_then(|n| n.op_as::<Cast>().and_then(|cast| (cast.to == DatumType::F32).then_some(n)))
        .filter(|n| {
            model.node_input_facts(n.id).map(|i| i[0].datum_type == DatumType::F16).unwrap_or(false)
        })
    else {
        return Ok(None);
    };

    // Identify Cast from F32 To F16
    let Some(cast_out_node) = model
        .single_succ(node.id)?
        .and_then(|n| n.op_as::<Cast>().and_then(|cast| (cast.to == DatumType::F16).then_some(n)))
        .filter(|n| {
            model.node_input_facts(n.id).map(|i| i[0].datum_type == DatumType::F32).unwrap_or(false)
        })
    else {
        return Ok(None);
    };

    let mut patch = TypedModelPatch::default();
    let rsm_input = patch.taps(model, &cast_in_node.inputs)?;
    let out = patch.wire_node(format!("{node_name}.without-cast"), op.clone(), &rsm_input)?;
    patch.shunt_outside(model, cast_out_node.id.into(), out[0])?;
    Ok(Some(patch))
}


================================================
FILE: gpu/src/session_handler.rs
================================================
use crate::memory::DeviceMemSchema;
use crate::memory::DeviceMemoryPool;
use crate::tensor::DeviceTensor;
use tract_core::internal::*;

#[derive(Debug, Clone)]
pub struct DeviceSessionHandler {
    pub mem_schema: DeviceMemSchema,
}

impl DeviceSessionHandler {
    pub fn from_plan(plan: &TypedSimplePlan, memory_hint: &SymbolValues) -> TractResult<Self> {
        let mem_schema =
            DeviceMemSchema::build(plan.model(), plan.order_without_consts(), memory_hint)?;
        Ok(Self { mem_schema })
    }
}

impl SessionStateHandler for DeviceSessionHandler {
    fn before_plan_eval(&self, session_state: &mut TurnState) -> TractResult<()> {
        let resolved_mem_schema = self.mem_schema.resolve(&session_state.resolved_symbols)?;
        let memory_pool = DeviceMemoryPool::from_schema(resolved_mem_schema)?;

        session_state.scratch_extensions.insert(memory_pool);
        ensure!(session_state.scratch_extensions.get::<DeviceMemoryPool>().is_some());
        Ok(())
    }

    fn after_plan_eval(&self, session_state: &mut TurnState) -> TractResult<()> {
        session_state.scratch_extensions.remove::<DeviceMemoryPool>();
        Ok(())
    }
}

pub fn make_tensor_for_node(
    session: &TurnState,
    node_id: usize,
    dt: DatumType,
    shape: &[usize],
) -> TractResult<DeviceTensor> {
    session
        .scratch_extensions
        .get::<DeviceMemoryPool>()
        .map(|mem| mem.tensor_for_node(node_id, dt, shape))
        .unwrap_or_else(|| DeviceTensor::uninitialized_dt(dt, shape))
}

pub fn make_scalar_exotic_tensor_for_node(
    session: &TurnState,
    node_id: usize,
    dt: DatumType,
    exotic_fact: Box<dyn ExoticFact>,
) -> TractResult<DeviceTensor> {
    match session.scratch_extensions.get::<DeviceMemoryPool>() {
        Some(mem) => mem.scalar_exotic_tensor_for_node(node_id, dt, exotic_fact),
        None => DeviceTensor::uninitialized_exotic(exotic_fact),
    }
}


================================================
FILE: gpu/src/sync.rs
================================================
use crate::fact::{DeviceFact, DeviceTypedFactExt};
use crate::tensor::{DeviceTensorExt, IntoDevice};
use derive_new::new;
use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use tract_core::internal::*;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DeviceSyncKind {
    ToHost,
    ToDevice,
}

impl fmt::Display for DeviceSyncKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{self:?}")
    }
}

#[derive(Debug, Clone, new, Copy, PartialEq, Eq, Hash)]
pub struct DeviceSync {
    pub kind: DeviceSyncKind,
}

impl Op for DeviceSync {
    fn name(&self) -> StaticName {
        format!("DeviceSync{}", self.kind).into()
    }

    op_as_typed_op!();
}

impl EvalOp for DeviceSync {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        match self.kind {
            DeviceSyncKind::ToHost => {
                let device_tensor = input.to_device_tensor()?;

                let tensor = device_tensor
                    .to_host()
                    .with_context(|| "Error while syncing device tensor to host")?;
                Ok(tvec![tensor.into_tvalue()])
            }
            DeviceSyncKind::ToDevice => {
                let device_input = if let Some(t) = input.as_arc_tensor() {
                    Arc::clone(t).into_device()?
                } else {
                    input.into_tensor().into_device()?
                };
                Ok(tvec![device_input.into_tensor().into()])
            }
        }
    }
}

impl TypedOp for DeviceSync {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let input = inputs[0];
        match self.kind {
            DeviceSyncKind::ToHost => {
                let mut typed_fact = input
                    .to_device_fact()
                    .with_context(|| {
                        "Cannot sync to Host a tensor without DeviceFact as metadata in its TypedFact"
                    })?
                    .clone()
                    .into_typed_fact();
                if let Some(konst) = input.konst.clone() {
                    if let Some(dt) = konst.as_device_tensor() {
                        typed_fact.konst = Some(dt.to_host()?);
                    } else {
                        typed_fact.konst = Some(konst);
                    }
                }
                Ok(tvec!(typed_fact))
            }
            DeviceSyncKind::ToDevice => {
                ensure!(
                    input.as_device_fact().is_none(),
                    "Cannot sync to Device a tensor already on Device"
                );
                Ok(tvec![DeviceFact::from_host(input.clone())?.into_exotic_fact()])
            }
        }
    }

    as_op!();
}

/// Map node inputs through the translation mapping, inserting DeviceSync nodes
/// where needed to move tensors to/from the device.
pub fn sync_inputs_if_required(
    model: &mut TypedModel,
    node: &TypedNode,
    mapping: &HashMap<OutletId, OutletId>,
    sync_kind: DeviceSyncKind,
) -> TractResult<TVec<OutletId>> {
    let mut mapped_inputs = tvec![];
    for (i_idx, i) in node.inputs.iter().enumerate() {
        let in_fact = model.outlet_fact_mut(mapping[i])?;
        match sync_kind {
            DeviceSyncKind::ToHost if in_fact.as_device_fact().is_some() => {
                mapped_inputs.push(
                    model.wire_node(
                        format!("{}.to-cpu-{i_idx}", node.name),
                        DeviceSync::new(sync_kind),
                        &[mapping[i]],
                    )?[0],
                );
            }
            DeviceSyncKind::ToDevice if in_fact.as_device_fact().is_none() => {
                if let Some(ref konst) = in_fact.konst
                    && konst.as_device_tensor().is_none()
                {
                    let device_konst = konst.as_ref().clone().into_device()?.into_tensor();
                    let device_fact = DeviceFact::from_host(in_fact.clone())?;

                    *in_fact = device_fact.into_exotic_fact();

                    in_fact.konst = Some(Arc::new(device_konst));
                    mapped_inputs.push(mapping[i]);
                    continue;
                }
                ensure!(
                    in_fact.datum_type.is_copy(),
                    "Only copy DatumType can be sync to Device: {:?}",
                    in_fact.datum_type
                );

                mapped_inputs.push(
                    model.wire_node(
                        format!("{}.to-device-{i_idx}", node.name),
                        DeviceSync::new(sync_kind),
                        &[mapping[i]],
                    )?[0],
                );
            }
            _ => mapped_inputs.push(mapping[i]),
        }
    }
    Ok(mapped_inputs)
}

/// For model outputs that are on device, insert DeviceSync nodes to move them back to host.
pub fn sync_model_outputs_if_required(
    src: &TypedModel,
    node: &TypedNode,
    target: &mut TypedModel,
    target_node_outlet_ids: TVec<OutletId>,
) -> TractResult<TVec<OutletId>> {
    let mut outputs = tvec![];
    for (o_idx, o) in target_node_outlet_ids.into_iter().enumerate() {
        let is_src_output = src.outputs.contains(&OutletId::new(node.id, o_idx));
        if target.outlet_fact(o)?.as_device_fact().is_some() && is_src_output {
            let sync_output = target.wire_node(
                format!("{}.to-host-{o_idx}-out", node.name),
                DeviceSync::new(DeviceSyncKind::ToHost),
                &[o],
            )?[0];
            outputs.push(sync_output);
        } else {
            outputs.push(o)
        }
    }
    Ok(outputs)
}


================================================
FILE: gpu/src/tensor/arena_view.rs
================================================
use num_traits::AsPrimitive;
use std::ffi::c_void;
use std::fmt::Display;
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};

use crate::device::{DeviceBuffer, get_context};
use crate::utils::check_strides_validity;

use super::OwnedDeviceTensor;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct DeviceArenaView {
    pub(crate) arena: Arc<Box<dyn OwnedDeviceTensor>>,
    pub(crate) dt: DatumType,
    pub(crate) len: usize,
    pub(crate) shape: TVec<usize>,
    pub(crate) strides: TVec<isize>,
    pub(crate) offset_bytes: usize,
    pub(crate) exotic_fact: Option<Box<dyn ExoticFact>>,
}

impl DeviceArenaView {
    #[inline]
    pub fn shape(&self) -> &[usize] {
        self.shape.as_slice()
    }

    /// Get the datum type of the tensor.
    #[inline]
    pub fn datum_type(&self) -> DatumType {
        self.dt
    }

    #[inline]
    pub fn strides(&self) -> &[isize] {
        self.strides.as_slice()
    }

    /// Get underlying inner device buffer.
    pub fn device_buffer(&self) -> &dyn DeviceBuffer {
        self.arena.device_buffer()
    }

    pub fn device_buffer_ptr(&self) -> *const c_void {
        self.arena.device_buffer().ptr()
    }

    /// Get underlying inner device buffer offset
    pub fn buffer_offset<I: Copy + 'static>(&self) -> I
    where
        usize: AsPrimitive<I>,
    {
        self.offset_bytes.as_()
    }

    pub fn exotic_fact(&self) -> Option<&dyn ExoticFact> {
        self.exotic_fact.as_deref()
    }

    /// Get the number of values in the tensor.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        self.len
    }

    pub fn as_bytes(&self) -> Vec<u8> {
        let len = if let Some(of) = &self.exotic_fact {
            of.mem_size().as_i64().unwrap() as usize
        } else {
            self.len() * self.dt.size_of()
        };
        self.arena.get_bytes_slice(self.offset_bytes, len)
    }

    /// Reshaped tensor with given shape.
    pub fn reshaped(&self, shape: impl Into<TVec<usize>>) -> TractResult<Self> {
        ensure!(self.exotic_fact.is_none(), "Can't reshape exotic tensor");
        let shape = shape.into();
        if self.len() != shape.iter().product::<usize>() {
            bail!("Invalid reshape {:?} to {:?}", self.shape(), shape);
        }
        if shape.as_slice() != self.shape() {
            Ok(Self {
                arena: Arc::clone(&self.arena),
                dt: self.dt,
                len: self.len,
                strides: Tensor::natural_strides(&shape),
                shape,
                offset_bytes: self.offset_bytes,
                exotic_fact: None,
            })
        } else {
            Ok(self.clone())
        }
    }

    pub fn restrided(&self, strides: impl Into<TVec<isize>>) -> TractResult<Self> {
        ensure!(self.exotic_fact.is_none(), "Can't restride exotic tensor");
        let strides = strides.into();
        check_strides_validity(self.shape().into(), strides.clone())?;

        if strides.as_slice() != self.strides() {
            Ok(Self {
                arena: Arc::clone(&self.arena),
                dt: self.dt,
                len: self.len,
                strides,
                shape: self.shape.clone(),
                offset_bytes: self.offset_bytes,
                exotic_fact: None,
            })
        } else {
            Ok(self.clone())
        }
    }

    pub fn to_host(&self) -> TractResult<Tensor> {
        get_context()?.synchronize()?;
        let content = self.as_bytes();
        unsafe {
            if let Some(bqf) =
                self.exotic_fact.as_ref().and_then(|of| of.downcast_ref::<BlockQuantFact>())
            {
                Ok(BlockQuantStorage::new(
                    bqf.format.clone(),
                    bqf.m(),
                    bqf.k(),
                    Arc::new(Blob::from_bytes(&content)?),
                )?
                .into_tensor_with_shape(self.dt, bqf.shape()))
            } else {
                Tensor::from_raw_dt(self.dt, &self.shape, &content)
            }
        }
    }
}

impl Display for DeviceArenaView {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let content = self
            .clone()
            .to_host()
            .unwrap()
            .dump(false)
            .unwrap_or_else(|e| format!("Error : {e:?}"));
        write!(f, "DeviceArenaView: {{ {content} }}")
    }
}


================================================
FILE: gpu/src/tensor/mod.rs
================================================
#![allow(clippy::missing_safety_doc)]
#![allow(clippy::missing_transmute_annotations)]

mod arena_view;
mod owned;

pub use arena_view::*;
pub use owned::*;

use num_traits::AsPrimitive;
use std::ffi::c_void;
use std::fmt::Display;
use tract_core::internal::*;
use tract_data::itertools::Itertools;

use crate::device::{DeviceBuffer, get_context};

/// This struct represents a GPU tensor that can be either a owned tensor
/// or an arena view.
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum DeviceTensor {
    Owned(Box<dyn OwnedDeviceTensor>),
    ArenaView(DeviceArenaView),
}

impl DeviceTensor {
    pub const SUPPORTED_DT: [DatumType; 11] = [
        DatumType::Bool,
        DatumType::F32,
        DatumType::F16,
        DatumType::I8,
        DatumType::U8,
        DatumType::I16,
        DatumType::U16,
        DatumType::I32,
        DatumType::U32,
        DatumType::I64,
        DatumType::U64,
    ];

    pub fn tname(dt: DatumType) -> TractResult<&'static str> {
        Ok(match dt {
            DatumType::F32 => "f32",
            DatumType::F16 => "f16",
            DatumType::U8 => "u8",
            DatumType::U16 => "u16",
            DatumType::U32 => "u32",
            DatumType::U64 => "u64",
            DatumType::I8 => "i8",
            DatumType::I16 => "i16",
            DatumType::I32 => "i32",
            DatumType::I64 => "i64",
            DatumType::Bool => "bool",
            _ => bail!("Unsupported dt {:?} for GPU Tensor", dt),
        })
    }

    /// Create an uninitialized DeviceTensor
    pub fn uninitialized_dt(dt: DatumType, shape: &[usize]) -> TractResult<DeviceTensor> {
        Ok(DeviceTensor::Owned(get_context()?.uninitialized_device_tensor(shape, dt)?))
    }

    pub fn uninitialized<T: Datum>(shape: &[usize]) -> TractResult<DeviceTensor> {
        Self::uninitialized_dt(T::datum_type(), shape)
    }

    pub fn uninitialized_exotic(exotic_fact: Box<dyn ExoticFact>) -> TractResult<DeviceTensor> {
        Ok(DeviceTensor::Owned(get_context()?.uninitialized_device_exotic_tensor(exotic_fact)?))
    }
    // Create a device tensor with a given shape and a slice of elements. The data is copied and aligned to size of T.
    pub fn from_shape<T: Copy + Datum>(shape: &[usize], data: &[T]) -> TractResult<DeviceTensor> {
        Tensor::from_shape(shape, data)?.into_device()
    }

    pub fn is_supported_dt(dt: DatumType) -> bool {
        Self::SUPPORTED_DT.contains(&dt)
    }

    /// Get the datum type of the tensor.
    #[inline]
    pub fn datum_type(&self) -> DatumType {
        match self {
            Self::Owned(owned) => owned.datum_type(),
            Self::ArenaView(view) => view.datum_type(),
        }
    }

    /// Get the number of dimensions (or axes) of the tensor.
    #[inline]
    pub fn rank(&self) -> usize {
        self.shape().len()
    }

    /// Get the shape of the tensor.
    #[inline]
    pub fn shape(&self) -> &[usize] {
        match self {
            Self::Owned(t) => t.shape(),
            Self::ArenaView(t) => t.shape(),
        }
    }

    /// Get the number of values in the tensor.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        match self {
            Self::Owned(t) => t.len(),
            Self::ArenaView(t) => t.len(),
        }
    }

    /// Get the strides of the tensor.
    #[inline]
    pub fn strides(&self) -> &[isize] {
        match self {
            Self::Owned(t) => t.strides(),
            Self::ArenaView(t) => t.strides(),
        }
    }

    /// Get underlying inner buffer.
    pub fn device_buffer(&self) -> &dyn DeviceBuffer {
        match self {
            Self::Owned(t) => t.device_buffer(),
            Self::ArenaView(t) => t.device_buffer(),
        }
    }

    /// Get underlying inner buffer offset
    pub fn buffer_offset<I: Copy + 'static>(&self) -> I
    where
        usize: AsPrimitive<I>,
    {
        match self {
            Self::Owned(_) => 0.as_(),
            Self::ArenaView(t) => t.buffer_offset(),
        }
    }

    pub fn device_buffer_ptr(&self) -> *const c_void {
        match self {
            Self::Owned(t) => t.device_buffer().ptr(),
            Self::ArenaView(t) => t.device_buffer().ptr(),
        }
    }

    /// Returns short description of the inner tensor.
    pub fn description(&self) -> String {
        format!("|{},{:?}|", self.shape().iter().join(","), self.datum_type(),)
    }

    /// Reshaped tensor with given shape.
    pub fn reshaped(&self, shape: TVec<usize>) -> TractResult<Self> {
        match self {
            Self::Owned(t) => Ok(t.reshaped(shape)?),
            Self::ArenaView(t) => Ok(Self::ArenaView(t.reshaped(shape)?)),
        }
    }

    pub fn restrided(&self, strides: TVec<isize>) -> TractResult<Self> {
        match self {
            Self::Owned(t) => Ok(t.restrided(strides)?),
            Self::ArenaView(t) => Ok(Self::ArenaView(t.restrided(strides)?)),
        }
    }

    /// Convert device tensor to a Tensor backed by device storage.
    ///
    /// The resulting tensor carries the real datum type and shape from the
    /// device tensor (e.g. F32 / \[2,3\]), rather than an exotic scalar wrapper.
    pub fn into_tensor(self) -> Tensor {
        let dt = self.datum_type();
        let shape: TVec<usize> = self.shape().into();
        Tensor::from_storage(dt, &shape, self)
    }

    /// Synchronize the GPU Tensor by completing all current
    /// commands on GPU and returns the inner tensor.
    pub fn to_host(&self) -> TractResult<Arc<Tensor>> {
        get_context()?.synchronize()?;

        Ok(match self {
            Self::Owned(o) => o.to_host()?,
            Self::ArenaView(v) => v.to_host()?.into(),
        })
    }
}

impl Display for DeviceTensor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Owned(o) => o.fmt(f),
            Self::ArenaView(v) => {
                let content =
                    v.to_host().unwrap().dump(false).unwrap_or_else(|e| format!("Error : {e:?}"));
                write!(f, "ArenaView: {{ {content} }}")
            }
        }
    }
}

pub trait IntoDevice<T> {
    fn into_device(self) -> TractResult<T>;
}

impl IntoDevice<DeviceTensor> for Tensor {
    fn into_device(self) -> TractResult<DeviceTensor> {
        Ok(DeviceTensor::Owned(get_context()?.tensor_to_device(self.into_tvalue())?))
    }
}

impl IntoDevice<DeviceTensor> for Arc<Tensor> {
    fn into_device(self) -> TractResult<DeviceTensor> {
        Ok(DeviceTensor::Owned(get_context()?.tensor_to_device(self.into_tvalue())?))
    }
}

impl TensorStorage for DeviceTensor {
    fn byte_len(&self) -> usize {
        self.len() * self.datum_type().size_of()
    }

    fn is_empty(&self) -> bool {
        self.byte_len() == 0
    }

    fn deep_clone(&self) -> Box<dyn TensorStorage> {
        Box::new(self.clone())
    }

    fn as_plain(&self) -> Option<&PlainStorage> {
        None
    }

    fn as_plain_mut(&mut self) -> Option<&mut PlainStorage> {
        None
    }

    fn into_plain(self: Box<Self>) -> Option<PlainStorage> {
        None
    }

    fn dyn_hash(&self, _state: &mut dyn std::hash::Hasher) {
        // no meaningful hash for device memory
    }

    fn exotic_fact(&self, _shape: &[usize]) -> TractResult<Option<Box<dyn ExoticFact>>> {
        bail!(
            "DeviceTensor cannot reconstruct a DeviceFact: origin (FromHost/FromDevice) is not carried by storage"
        )
    }
}

impl From<DeviceArenaView> for DeviceTensor {
    fn from(view: DeviceArenaView) -> Self {
        Self::ArenaView(view)
    }
}

pub trait DeviceTensorExt {
    fn to_device_tensor(&self) -> TractResult<&DeviceTensor>;
    fn as_device_tensor(&self) -> Option<&DeviceTensor>;
}

impl DeviceTensorExt for Tensor {
    fn to_device_tensor(&self) -> TractResult<&DeviceTensor> {
        self.try_storage_as::<DeviceTensor>()
    }

    fn as_device_tensor(&self) -> Option<&DeviceTensor> {
        self.storage_as::<DeviceTensor>()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_device_tensor() -> TractResult<()> {
        let a = DeviceTensor::from_shape(&[1], &[0f32])?;
        assert_eq!(a.to_host()?.try_as_plain()?.as_slice::<f32>()?, &[0.0]);
        Ok(())
    }
}


================================================
FILE: gpu/src/tensor/owned.rs
================================================
use downcast_rs::{Downcast, impl_downcast};
use dyn_clone::DynClone;
use dyn_eq::DynEq;
use std::fmt::Debug;
use tract_core::dyn_clone;
use tract_core::internal::*;

use crate::device::DeviceBuffer;

use super::DeviceTensor;

#[allow(clippy::len_without_is_empty)]
pub trait OwnedDeviceTensor: Downcast + DynClone + Send + Sync + Debug + DynEq {
    fn datum_type(&self) -> DatumType;

    fn shape(&self) -> &[usize];

    fn strides(&self) -> &[isize];

    #[inline]
    fn len(&self) -> usize {
        self.shape().iter().product()
    }

    fn reshaped(&self, shape: TVec<usize>) -> TractResult<DeviceTensor>;
    fn restrided(&self, shape: TVec<isize>) -> TractResult<DeviceTensor>;

    fn exotic_fact(&self) -> Option<&dyn ExoticFact>;
    fn get_bytes_slice(&self, offset: usize, len: usize) -> Vec<u8>;
    fn device_buffer(&self) -> &dyn DeviceBuffer;
    fn to_host(&self) -> TractResult<Arc<Tensor>>;
}

impl_downcast!(OwnedDeviceTensor);
dyn_hash::hash_trait_object!(OwnedDeviceTensor);
dyn_clone::clone_trait_object!(OwnedDeviceTensor);
dyn_eq::eq_trait_object!(OwnedDeviceTensor);


================================================
FILE: gpu/src/utils.rs
================================================
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::BlockQuant;
use tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage, Q4_0};

use crate::fact::*;
use crate::tensor::DeviceTensor;

pub fn facts_to_device_facts(
    facts: &[&TypedFact],
    resolve_facts: impl Fn(&[&TypedFact]) -> TractResult<TVec<TypedFact>>,
) -> TractResult<TVec<TypedFact>> {
    if facts.iter().all(|it| it.as_device_fact().is_some()) {
        let device_facts = facts
            .iter()
            .map(|it| it.to_device_fact().map(|it| it.as_ref()))
            .collect::<TractResult<TVec<_>>>()?;
        let output_facts = (resolve_facts)(device_facts.as_slice())?;
        Ok(output_facts
            .into_iter()
            .map(|it| Ok(DeviceFact::new(DeviceTensorOrigin::FromDevice, it)?.into_exotic_fact()))
            .collect::<TractResult<_>>()?)
    } else if facts.iter().all(|it| it.as_device_fact().is_none()) {
        (resolve_facts)(facts)
    } else {
        bail!("Inconsistent facts: mix of device and host facts");
    }
}

pub fn get_device_facts<'a, 'b: 'a, T>(
    facts: &'a [&'b TypedFact],
    map_facts: impl Fn(&[&'b TypedFact]) -> TractResult<T>,
) -> TractResult<T> {
    if facts.iter().all(|it| it.as_device_fact().is_some()) {
        let device_facts = facts
            .iter()
            .map(|it| it.to_device_fact().map(|it| it.as_ref()))
            .collect::<TractResult<TVec<_>>>()?;
        (map_facts)(device_facts.as_slice())
    } else if facts.iter().all(|it| it.as_device_fact().is_none()) {
        (map_facts)(facts)
    } else {
        bail!("Inconsistent facts: mix of device and host facts");
    }
}

pub fn get_device_fact<'a, T: 'a>(
    fact: &'a TypedFact,
    map_fact: impl Fn(&'a TypedFact) -> TractResult<T>,
) -> TractResult<T> {
    if fact.as_device_fact().is_some() {
        (map_fact)(fact.to_device_fact()?)
    } else {
        (map_fact)(fact)
    }
}

pub fn as_quant_fact<'a>(
    fact: &'a TypedFact,
    format: &dyn BlockQuant,
) -> Option<&'a BlockQuantFact> {
    fact.exotic_fact
        .as_ref()
        .and_then(|of| of.downcast_ref::<BlockQuantFact>())
        .and_then(|bqf| if bqf.format.dyn_eq(format) { Some(bqf) } else { None })
}

pub fn as_q40_tensor(a: &Tensor) -> Option<&BlockQuantStorage> {
    a.storage_as::<BlockQuantStorage>().filter(|bqs| bqs.format().dyn_eq(&Q4_0))
}

pub fn get_quant_fact(t: &DeviceTensor, format: &dyn BlockQuant) -> Option<BlockQuantFact> {
    if let DeviceTensor::Owned(t) = t {
        t.exotic_fact()
            .and_then(|of| of.downcast_ref::<BlockQuantFact>())
            .cloned()
            .filter(|bqf| bqf.format.dyn_eq(format))
    } else {
        None
    }
}

// --- Shared array/copy utilities ---

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BroadcastKind {
    Unicast,
    ByScalarLeft,
    ByScalarRight,
    Nd1,
    Nd2,
    Nd3,
    Nd4,
    Nd5,
    Nd6,
}

impl BroadcastKind {
    pub const ALL: [BroadcastKind; 8] = [
        Self::Unicast,
        Self::ByScalarLeft,
        Self::ByScalarRight,
        Self::Nd1,
        Self::Nd2,
        Self::Nd3,
        Self::Nd4,
        Self::Nd5,
    ];

    pub fn from_rank(rank: usize) -> TractResult<Self> {
        match rank {
            1 => Ok(Self::Nd1),
            2 => Ok(Self::Nd2),
            3 => Ok(Self::Nd3),
            4 => Ok(Self::Nd4),
            5 => Ok(Self::Nd5),
            6 => Ok(Self::Nd6),
            _ => bail!("Unsupported rank {rank} for broadcasting"),
        }
    }

    pub fn name(&self) -> &'static str {
        match self {
            Self::Unicast => "unicast",
            Self::ByScalarLeft => "by_scalar_lhs",
            Self::ByScalarRight => "by_scalar_rhs",
            Self::Nd1 => "nd1",
            Self::Nd2 => "nd2",
            Self::Nd3 => "nd3",
            Self::Nd4 => "nd4",
            Self::Nd5 => "nd5",
            Self::Nd6 => "nd6",
        }
    }

    /// Map datum type to the copy kernel type name based on element size.
    /// Copy kernels only care about element size, not the actual type.
    pub fn copy_tname(dt: DatumType) -> &'static str {
        match dt.size_of() {
            1 => "u8",
            2 => "u16",
            4 => "u32",
            8 => "u64",
            _ => panic!("Unsupported element size {} for copy kernel", dt.size_of()),
        }
    }

    pub fn copy_kernel_name(&self, dt: DatumType, prefix: &str) -> TractResult<String> {
        Ok(format!("{prefix}copy_{}_{}", self.name(), Self::copy_tname(dt)))
    }

    pub fn all_copy_kernel_names(prefix: &str) -> Vec<String> {
        let copy_types = ["u8", "u16", "u32", "u64"];
        Self::ALL
            .into_iter()
            .flat_map(|bk| {
                copy_types
                    .into_iter()
                    .map(move |tname| format!("{prefix}copy_{}_{tname}", bk.name()))
            })
            .collect()
    }
}

pub fn compute_broadcast_strides<T: num_traits::Zero + Copy + 'static>(
    shape: &[usize],
    strides: &[isize],
) -> TractResult<TVec<T>>
where
    isize: num_traits::AsPrimitive<T>,
{
    use num_traits::AsPrimitive;
    ensure!(
        shape.len() == strides.len(),
        "Mismatch between shape and strides length while computing broadcast strides"
    );
    Ok(strides
        .iter()
        .zip(shape)
        .map(|(s, dim)| if *dim == 1 { T::zero() } else { s.as_() })
        .collect::<TVec<T>>())
}

pub fn reshape_to_rank_2(shape: &[usize], axis: usize) -> TVec<usize> {
    let dim_axis_0 = shape[0..axis].iter().product::<usize>();
    let dim_axis_2 = shape[axis..].iter().product::<usize>();
    tvec![dim_axis_0, dim_axis_2]
}

pub fn reshape_to_rank_3(shape: &[usize], axis: usize) -> TVec<usize> {
    let dim_axis_0 = shape[0..axis].iter().product::<usize>();
    let dim_axis_1 = shape[axis];
    let dim_axis_2 = shape[axis + 1..].iter().product::<usize>();
    tvec![dim_axis_0, dim_axis_1, dim_axis_2]
}

pub fn check_strides_validity(shape: TVec<usize>, strides: TVec<isize>) -> TractResult<()> {
    let mut zipped_shape_strides: Vec<_> = shape.into_iter().zip(strides).collect();
    zipped_shape_strides.sort_by_key(|&(_, stride)| stride);

    let mut prev_stride = 1;
    for (dim, stride) in zipped_shape_strides {
        ensure!((stride == prev_stride) || (dim == 1), "Invalid strides");
        prev_stride *= dim as isize;
    }
    Ok(())
}


================================================
FILE: harness/core-proptest-pulse/Cargo.toml
================================================
[package]
name = "core-proptest-pulse"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
tract-core.workspace = true
tract-pulse.workspace = true

[dev-dependencies]
log.workspace = true
proptest.workspace = true
env_logger.workspace = true


================================================
FILE: harness/core-proptest-pulse/src/conv_plus_conv.rs
================================================
use proptest::proptest;
use proptest::test_runner::TestCaseResult;
use tract_core::tract_data::itertools::Itertools;

use super::*;

#[derive(Debug, Clone)]
struct ConvOp {
    stride: usize,
    dilation: usize,
    ker: Tensor,
    padding: PaddingSpec,
}

impl ConvOp {
    fn chain(&self, name: &str, model: &mut TypedModel, after: &[OutletId]) -> TVec<OutletId> {
        let kernel = model.add_const(format!("{name}.k"), self.ker.clone()).unwrap();
        let bias = model.add_const(format!("{name}.b"), tensor0(0f32)).unwrap();
        model
            .wire_node(
                name,
                Conv {
                    pool_spec: PoolSpec {
                        data_format: DataFormat::NCHW,
                        kernel_shape: self.ker.shape()[2..].into(),
                        padding: self.padding.clone(),
                        dilations: Some(tvec!(self.dilation)),
                        strides: Some(tvec!(self.stride)),
                        input_channels: 1,
                        output_channels: 1,
                    },
                    kernel_fmt: tract_core::ops::cnn::KernelFormat::OIHW,
                    group: 1,
                    q_params: None,
                },
                &[after[0], kernel, bias],
            )
            .unwrap()
    }
}

impl Arbitrary for ConvOp {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<Self> {
        (1usize..3, 1usize..3, 1usize..4)
            .prop_flat_map(|(stride, dil, ker)| {
                let padding = (ker - 1) * dil;
                let explicit = (0..=padding).prop_map(move |right| {
                    PaddingSpec::ExplicitOnnxPool(tvec!(padding - right), tvec!(right), false)
                });
                (Just((stride, dil, ker)), prop_oneof![Just(PaddingSpec::Valid), explicit])
            })
            .prop_map(|((stride, dilation, ker), padding)| ConvOp {
                stride,
                dilation,
                ker: t(ker),
                padding,
            })
            .boxed()
    }
}

#[derive(Debug, Clone)]
struct ConvPlusConvProblem {
    input: Tensor,
    pulse: usize,
    convs: Vec<ConvOp>,
}

impl Arbitrary for ConvPlusConvProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<Self> {
        (proptest::collection::vec(ConvOp::arbitrary(), 1..4), 1usize..4)
            .prop_flat_map(|(convs, pulse_factor)| {
                let pulse = convs.iter().map(|cv| cv.stride).product::<usize>() * pulse_factor;
                let min_input = Self::min_input_size(&convs);
                (Just(convs), Just(pulse), min_input..3 * min_input)
            })
            .prop_map(|(convs, pulse, input)| ConvPlusConvProblem { input: t(input), pulse, convs })
            .boxed()
    }
}

impl ConvPlusConvProblem {
    pub fn min_input_size(ops: &[ConvOp]) -> usize {
        let model = Self::model(ops);
        let dims: Vec<&TDim> = model
            .nodes
            .iter()
            .filter(|node| !node.outputs[0].fact.shape.is_concrete())
            .map(|n| &n.outputs[0].fact.shape[2])
            .collect();
        for s in 0usize.. {
            let symbols = SymbolValues::default().with(&model.symbols.get("S").unwrap(), s as _);
            if dims.iter().all(|d| d.eval(&symbols).to_isize().unwrap() > 0) {
                return s;
            }
        }
        unreachable!();
    }

    pub fn model(ops: &[ConvOp]) -> TypedModel {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let wire = model.add_source("a", f32::fact(dims!(1, 1, s))).unwrap();
        let mut wire = tvec!(wire);
        for (ix, cv) in ops.iter().enumerate() {
            wire = cv.chain(&format!("conv{ix}"), &mut model, &wire);
        }
        model.select_output_outlets(&wire).unwrap();
        model
    }

    pub fn run(&self) -> TestCaseResult {
        proptest_regular_against_pulse(
            Self::model(&self.convs),
            self.pulse as _,
            self.input.to_plain_array_view::<f32>().unwrap().to_owned(),
            2,
        )
    }
}

proptest! {
    #[test]
    fn proptest(pb in ConvPlusConvProblem::arbitrary()) { pb.run().unwrap() }
}

fn t(n: usize) -> Tensor {
    tensor1(&(0..n).map(|x| x as f32).collect_vec()).into_shape(&[1, 1, n]).unwrap()
}

#[test]
fn prob_1() {
    let cpc = ConvPlusConvProblem {
        input: t(7),
        pulse: 1,
        convs: vec![
            ConvOp {
                stride: 1,
                dilation: 1,
                ker: tensor3(&[[[1f32]]]),
                padding: PaddingSpec::Valid,
            },
            ConvOp {
                stride: 1,
                dilation: 2,
                ker: tensor3(&[[[1f32, 2.0]]]),
                padding: PaddingSpec::Valid,
            },
        ],
    };
    cpc.run().unwrap();
}

#[test]
fn prob_2() {
    let cpc = ConvPlusConvProblem {
        input: t(10),
        pulse: 2,
        convs: vec![
            ConvOp {
                stride: 2,
                dilation: 1,
                ker: tensor3(&[[[0f32]]]),
                padding: PaddingSpec::SameUpper,
            },
            ConvOp {
                stride: 1,
                dilation: 1,
                ker: tensor3(&[[[1f32]]]),
                padding: PaddingSpec::Valid,
            },
        ],
    };
    cpc.run().unwrap();
}

#[test]
fn prob_3() {
    let cpc = ConvPlusConvProblem {
        input: t(10),
        pulse: 1,
        convs: vec![
            ConvOp {
                stride: 1,
                dilation: 1,
                ker: tensor3(&[[[0f32]]]),
                padding: PaddingSpec::Valid,
            },
            ConvOp {
                stride: 1,
                dilation: 1,
                ker: tensor3(&[[[1f32, 0f32]]]),
                padding: PaddingSpec::SameUpper,
            },
        ],
    };
    cpc.run().unwrap();
}

#[test]
#[ignore]
fn prob_4() {
    let cpc = ConvPlusConvProblem {
        input: t(4),
        pulse: 2,
        convs: vec![
            ConvOp {
                stride: 1,
                dilation: 1,
                ker: tensor3(&[[[0f32]]]),
                padding: PaddingSpec::Valid,
            },
            ConvOp {
                stride: 2,
                dilation: 1,
                ker: tensor3(&[[[0f32, 0f32]]]),
                padding: PaddingSpec::SameUpper,
            },
        ],
    };
    cpc.run().unwrap();
}

#[test]
fn prob_7() {
    let cpc = ConvPlusConvProblem {
        input: t(4),
        pulse: 4,
        convs: vec![
            ConvOp {
                stride: 1,
                dilation: 2,
                ker: tensor3(&[[[0f32, 0.0]]]),
                padding: PaddingSpec::Valid,
            },
            ConvOp {
                stride: 2,
                dilation: 1,
                ker: tensor3(&[[[1f32]]]),
                padding: PaddingSpec::Valid,
            },
        ],
    };
    cpc.run().unwrap();
}

#[test]
fn same_upper() {
    let cpc = ConvPlusConvProblem {
        input: tensor3(&[[[0f32, 0., 0., 1.]]]),
        pulse: 1,
        convs: vec![ConvOp {
            stride: 1,
            dilation: 1,
            ker: tensor3(&[[[1f32, 0.0]]]),
            padding: PaddingSpec::SameUpper,
        }],
    };
    cpc.run().unwrap();
}

#[test]
fn stride() {
    let cpc = ConvPlusConvProblem {
        input: t(4),
        pulse: 2,
        convs: vec![ConvOp {
            stride: 2,
            dilation: 1,
            ker: t(2),
            padding: PaddingSpec::ExplicitOnnxPool(tvec!(1), tvec!(0), false),
        }],
    };
    cpc.run().unwrap();
}

#[test]
fn three() {
    let cpc = ConvPlusConvProblem {
        input: t(5),
        pulse: 1,
        convs: vec![
            ConvOp { stride: 1, dilation: 2, ker: t(2), padding: PaddingSpec::Valid },
            ConvOp { stride: 1, dilation: 1, ker: t(3), padding: PaddingSpec::Valid },
            ConvOp {
                stride: 1,
                dilation: 1,
                ker: t(2),
                padding: PaddingSpec::ExplicitOnnxPool(tvec!(1), tvec!(0), false),
            },
        ],
    };
    cpc.run().unwrap();
}

#[test]
fn three_stride() {
    let cpc = ConvPlusConvProblem {
        input: t(4),
        pulse: 2,
        convs: vec![
            // 0 1 2 3
            ConvOp { stride: 1, dilation: 1, ker: t(2), padding: PaddingSpec::Valid }, // overlap=1, 1 2 3  -> ∂=1
            // pulse: x 1 | 2 3
            ConvOp { stride: 1, dilation: 1, ker: t(1), padding: PaddingSpec::Valid }, // no delay, 0 0 0 -> ∂=1
            // pulse: x 0 | 0 0
            ConvOp { stride: 2, dilation: 2, ker: t(1), padding: PaddingSpec::Valid }, // 0 0
                                                                                       // pulse 0 | 0
        ],
    };
    cpc.run().unwrap();
}


================================================
FILE: harness/core-proptest-pulse/src/deconv.rs
================================================
use proptest::proptest;
use proptest::test_runner::TestCaseResult;

use super::*;

#[derive(Debug, Clone)]
struct DeconvOp {
    stride: usize,
    dilation: usize,
    adj: usize,
    ker: Array3<f32>,
    padding: PaddingSpec,
}

impl DeconvOp {
    fn chain(&self, name: &str, model: &mut TypedModel, after: OutletId) -> OutletId {
        let deconv = tract_core::ops::cnn::Deconv {
            pool_spec: PoolSpec {
                data_format: DataFormat::NCHW,
                kernel_shape: tvec!(self.ker.shape()[2]),
                padding: self.padding.clone(),
                strides: Some(self.stride).filter(|d| *d > 1).map(|d| tvec!(d)),
                dilations: Some(self.dilation).filter(|d| *d > 1).map(|d| tvec!(d)),
                input_channels: self.ker.shape()[1],
                output_channels: self.ker.shape()[0],
            },
            kernel_format: tract_core::ops::cnn::KernelFormat::OIHW,
            adjustments: tvec!(self.adj),
            group: 1,
        };
        let kernel = model.add_const("kernel", self.ker.clone()).unwrap();
        let bias = model.add_const("bias", rctensor0(0f32)).unwrap();
        model.wire_node(name, deconv, &[after, kernel, bias]).unwrap()[0]
    }
}

impl Arbitrary for DeconvOp {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<Self> {
        (
            1usize..4,
            1usize..4,
            0usize..4,
            vec(1usize..4),
            prop_oneof![
                Just(PaddingSpec::Valid),
                Just(PaddingSpec::SameUpper),
                Just(PaddingSpec::SameLower)
            ],
        )
            .prop_filter(
                "Same padding geometry constraint",
                |(stride, dilation, _adj, ker, padding)| {
                    padding == &PaddingSpec::Valid || ((ker.len() - 1) * dilation > stride - 1)
                },
            )
            .prop_map(|(stride, dilation, adj, ker, padding)| DeconvOp {
                stride,
                dilation,
                adj,
                ker: Array3::from_shape_vec((1, 1, ker.len()), ker).unwrap(),
                padding,
            })
            .boxed()
    }
}

#[derive(Debug, Clone)]
struct DeconvProblem {
    input: Array3<f32>,
    pulse: usize,
    deconv: DeconvOp,
}

impl Arbitrary for DeconvProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<Self> {
        (DeconvOp::arbitrary(), 1usize..3)
            .prop_flat_map(|(deconv, pulse_factor)| {
                let pulse = deconv.stride * pulse_factor;
                let min_input = 4usize;
                (Just(deconv), Just(pulse), vec(min_input..3 * min_input))
            })
            .prop_map(|(deconv, pulse, input)| {
                let input = Array3::from_shape_vec((1, 1, input.len()), input).unwrap(); // NCHW
                DeconvProblem { input, pulse, deconv }
            })
            .boxed()
    }
}

impl DeconvProblem {
    pub fn run(&self) -> TestCaseResult {
        let mut model = TypedModel::default();
        let mut fact = f32::fact(self.input.shape());
        let s = model.symbols.sym("S");
        fact.shape.set(2, s.to_dim());
        let input = model.add_source("a", fact).unwrap();
        let id = self.deconv.chain("deconv1", &mut model, input);
        model.select_output_outlets(&[id]).unwrap();
        proptest_regular_against_pulse(model, self.pulse as _, self.input.clone().into_dyn(), 2)
    }
}

proptest! {
    #[test]
    fn proptest(pb in DeconvProblem::arbitrary()) { pb.run().unwrap() }
}

#[test]
fn example_0() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 1.0, 0.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 1,
            adj: 0,
            ker: arr3(&[[[1.0f32]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn example_1() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 0.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 1,
            adj: 0,
            ker: arr3(&[[[0.0f32, 0.0]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn example_2() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 1.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 1,
            adj: 0,
            ker: arr3(&[[[0.0f32, 1.0]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn example_3() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 0.0, 1.0]]]),
        pulse: 2,
        deconv: DeconvOp {
            stride: 1,
            dilation: 1,
            adj: 0,
            ker: arr3(&[[[0.0f32, 1.0]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn dilation_0() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 0.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 2,
            adj: 0,
            ker: arr3(&[[[0.0f32, 0.0]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn dilation_1() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 1.0, 0.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 2,
            adj: 0,
            ker: arr3(&[[[0.0f32, 1.0]]]),
            padding: PaddingSpec::SameUpper,
        },
    };
    pb.run().unwrap()
}

#[test]
fn stride_0() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 1.0]]]),
        pulse: 2,
        deconv: DeconvOp {
            stride: 2,
            dilation: 1,
            adj: 0,
            ker: arr3(&[[[1.0f32]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn same_upper_0() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 1.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 1,
            adj: 0,
            ker: arr3(&[[[0.0f32, 1.0]]]),
            padding: PaddingSpec::SameUpper,
        },
    };
    pb.run().unwrap()
}

#[test]
fn adj_0() {
    let pb = DeconvProblem {
        input: arr3(&[[[0.0f32, 0.0, 0.0, 0.0]]]),
        pulse: 1,
        deconv: DeconvOp {
            stride: 1,
            dilation: 1,
            adj: 1,
            ker: arr3(&[[[0.0f32]]]),
            padding: PaddingSpec::Valid,
        },
    };
    pb.run().unwrap()
}

#[test]
fn deconv2d() {
    let mut model = TypedModel::default();
    let s = model.symbols.sym("S");
    let a = model.add_source("a", f32::fact(dims!(1, 2, s, 8))).unwrap();
    let mut kernel = Tensor::zero::<f32>(&[2, 2, 1, 3]).unwrap();
    kernel
        .try_as_plain_mut()
        .unwrap()
        .as_slice_mut::<f32>()
        .unwrap()
        .iter_mut()
        .enumerate()
        .for_each(|(ix, x)| *x = ix as f32);
    let deconv = tract_core::ops::cnn::Deconv {
        pool_spec: PoolSpec {
            data_format: DataFormat::NCHW,
            kernel_shape: tvec!(1, 3),
            padding: PaddingSpec::Explicit(tvec!(0, 1), tvec!(0, 1)),
            strides: Some(tvec!(1, 2)),
            dilations: Some(tvec![1, 1]),
            input_channels: 2,
            output_channels: 2,
        },
        kernel_format: tract_core::ops::cnn::KernelFormat::OIHW,
        adjustments: tvec!(0, 0),
        group: 1,
    };
    let kernel = model.add_const("kernel", kernel).unwrap();
    let bias = model.add_const("bias", rctensor0(0f32)).unwrap();
    let deconv = model.wire_node("deconv", deconv, &[a, kernel, bias]).unwrap();
    model.select_output_outlets(&deconv).unwrap();
    model.declutter().unwrap();

    let mut input = Tensor::zero::<f32>(&[1, 2, 5, 8]).unwrap();
    input
        .try_as_plain_mut()
        .unwrap()
        .as_slice_mut::<f32>()
        .unwrap()
        .iter_mut()
        .enumerate()
        .for_each(|(ix, x)| *x = ix as f32);
    proptest_regular_against_pulse(model, 1, input.into_plain_array().unwrap(), 2).unwrap()
}


================================================
FILE: harness/core-proptest-pulse/src/delay_plus_downsample.rs
================================================
use proptest::proptest;
use proptest::test_runner::TestCaseResult;
use tract_core::ops::Downsample;
use tract_core::tract_data::itertools::Itertools;

use super::*;

#[derive(Debug, Clone)]
struct DelayPlusDownsampleProblem {
    input: usize,
    pulse: usize,
    delay: usize,
    stride: usize,
    modulo: usize,
}

fn t(n: usize) -> ArrayD<f32> {
    arr1(&(0..n).map(|x| x as f32).collect_vec()).into_shape_with_order(vec![1, n, 1]).unwrap()
}

impl Arbitrary for DelayPlusDownsampleProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<Self> {
        (1usize..100, 1usize..4, 0usize..100, 1usize..4)
            .prop_flat_map(|(input, pulse_mul, delay, stride)| {
                (
                    Just(input + stride + delay),
                    Just(pulse_mul * stride),
                    Just(delay),
                    Just(stride),
                    0..stride,
                )
            })
            .prop_map(|(input, pulse, delay, stride, modulo)| DelayPlusDownsampleProblem {
                input,
                pulse,
                delay,
                stride,
                modulo,
            })
            .boxed()
    }
}

impl DelayPlusDownsampleProblem {
    pub fn run(&self) -> TestCaseResult {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let a = model.add_source("a", f32::fact(dims!(1, s, 1))).unwrap();
        let crop =
//            model.wire_node("delay", expand(array::Crop::new(1, self.delay, 0)), &[a]).unwrap();
            model.wire_node("delay", Slice::new(1, self.delay, s), &[a]).unwrap();
        let ds = model
            .wire_node(
                "ds",
                Downsample { axis: 1, stride: self.stride as isize, modulo: self.modulo },
                &crop,
            )
            .unwrap();
        model.select_output_outlets(&ds).unwrap();
        proptest_regular_against_pulse(model, self.pulse as _, t(self.input), 1)
    }
}

proptest! {
    #[test]
    fn proptest(pb in DelayPlusDownsampleProblem::arbitrary()) { pb.run().unwrap() }
}

#[test]
fn test_modulo() {
    DelayPlusDownsampleProblem { input: 3, pulse: 2, delay: 0, stride: 2, modulo: 1 }.run().unwrap()
}

#[test]
fn test_delay() {
    DelayPlusDownsampleProblem { input: 3, pulse: 2, delay: 1, stride: 2, modulo: 0 }.run().unwrap()
}

#[test]
fn test_from_convs() {
    DelayPlusDownsampleProblem { input: 5, pulse: 2, delay: 1, stride: 2, modulo: 0 }
        .run()
        .unwrap();
}

#[test]
fn test_delayed_stride() {
    DelayPlusDownsampleProblem { input: 9, pulse: 2, delay: 1, stride: 2, modulo: 0 }.run().unwrap()
}

#[test]
fn test_big_delay() {
    DelayPlusDownsampleProblem { input: 6, pulse: 1, delay: 4, stride: 1, modulo: 0 }.run().unwrap()
}

#[test]
fn test_huge_delay() {
    DelayPlusDownsampleProblem { input: 4, pulse: 2, delay: 1, stride: 2, modulo: 0 }.run().unwrap()
}


================================================
FILE: harness/core-proptest-pulse/src/delay_plus_pool.rs
================================================
use proptest::proptest;
use proptest::test_runner::TestCaseResult;
use tract_core::ops::cnn::MaxPool;

use super::*;

#[derive(Debug, Clone)]
struct DelayPlusPoolProblem {
    input: Vec<f32>,
    pulse: usize,
    delay: usize,
    stride: usize,
    pool_window: usize,
    padding: PaddingSpec,
}

impl Arbitrary for DelayPlusPoolProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<Self> {
        (1usize..4, 1usize..4, 0usize..5, 1usize..4)
            .prop_flat_map(|(pool_window, factor, delay, stride)| {
                let padding = pool_window - 1;
                let explicit = (0..=padding).prop_map(move |right| {
                    PaddingSpec::ExplicitOnnxPool(tvec!(padding - right), tvec!(right), false)
                });
                let min_input = delay + pool_window;
                (
                    Just(pool_window),
                    Just(factor),
                    Just(delay),
                    Just(stride),
                    vec(min_input..min_input + 10),
                    prop_oneof![Just(PaddingSpec::Valid), explicit],
                )
            })
            .prop_map(|(pool_window, factor, delay, stride, input, padding)| {
                let pulse = factor * stride;
                DelayPlusPoolProblem { input, pulse, delay, stride, pool_window, padding }
            })
            .boxed()
    }
}

impl DelayPlusPoolProblem {
    pub fn run(&self) -> TestCaseResult {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let a = model.add_source("a", f32::fact(dims!(1, s, 1))).unwrap();
        let crop = model.wire_node("delay", Slice::new(1, self.delay, s), &[a]).unwrap();
        let pool_spec = PoolSpec::new(
            DataFormat::NHWC,
            tvec!(self.pool_window),
            self.padding.clone(),
            None,
            Some(tvec!(self.stride)),
            1,
            1,
        );
        let pool = model.wire_node("pool", MaxPool::new(pool_spec, None), &crop).unwrap();
        model.select_output_outlets(&pool).unwrap();
        let input =
            arr1(&self.input).into_shape_with_order((1, self.input.len(), 1)).unwrap().into_dyn();
        proptest_regular_against_pulse(model, self.pulse as _, input, 1)
    }
}

proptest! {
    #[test]
    fn proptest(pb in DelayPlusPoolProblem::arbitrary()) { pb.run().unwrap() }
}

#[test]
fn test_basic() {
    DelayPlusPoolProblem {
        input: vec![0.0, 0.0, 0.0, 0.0, 1.0],
        pulse: 2,
        delay: 0,
        stride: 1,
        pool_window: 2,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_stride() {
    DelayPlusPoolProblem {
        input: vec![0.0, 0.0, 0.0],
        pulse: 2,
        delay: 0,
        stride: 2,
        pool_window: 1,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_misaligned_stride() {
    DelayPlusPoolProblem {
        input: vec![0.0, 1.0],
        pulse: 2,
        delay: 1,
        stride: 2,
        pool_window: 1,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_overlap() {
    DelayPlusPoolProblem {
        input: vec![0.0, 1.0],
        pulse: 1,
        delay: 0,
        stride: 1,
        pool_window: 2,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_overlap_realign() {
    DelayPlusPoolProblem {
        input: vec![f32::NAN, 2.0, 3.0, 4.0, 5.0, 6.0],
        pulse: 2,
        delay: 1,
        stride: 2,
        pool_window: 3,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap();
}

#[test]
fn test_long_overlap_1() {
    DelayPlusPoolProblem {
        input: vec![0.0, 0.0, 0.0],
        pulse: 1,
        delay: 0,
        stride: 1,
        pool_window: 3,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_long_overlap_2() {
    DelayPlusPoolProblem {
        input: vec![0.0, 0.0, 0.0, 0.0],
        pulse: 1,
        delay: 2,
        stride: 1,
        pool_window: 2,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_long_overlap_3() {
    DelayPlusPoolProblem {
        input: vec![-1.0, -1.0, 0.0],
        pulse: 2,
        delay: 0,
        stride: 2,
        pool_window: 3,
        padding: PaddingSpec::Valid,
    }
    .run()
    .unwrap()
}

#[test]
fn test_pad_right() {
    DelayPlusPoolProblem {
        input: vec![0.0, 0.0],
        pulse: 1,
        delay: 0,
        stride: 1,
        pool_window: 2,
        padding: PaddingSpec::ExplicitOnnxPool(tvec!(0), tvec!(1), false),
    }
    .run()
    .unwrap()
}

#[test]
fn test_pad_right_2() {
    DelayPlusPoolProblem {
        input: vec![f32::NAN, 0.0, 1.0],
        pulse: 2,
        delay: 1,
        stride: 2,
        pool_window: 2,
        padding: PaddingSpec::ExplicitOnnxPool(tvec!(0), tvec!(1), false),
    }
    .run()
    .unwrap()
}


================================================
FILE: harness/core-proptest-pulse/src/einsum.rs
================================================
use tract_core::ops::einsum::*;

use super::*;

#[test]
fn einsum_pulsedmm() {
    let mut model = TypedModel::default();
    let s = model.symbols.sym("S");
    let x = model.add_source("x", f32::fact(dims!(s, 8, 2))).unwrap();
    let w = model.add_const("w", Tensor::zero::<f32>(&[8, 2, 4]).unwrap()).unwrap();

    let expr = "sij,ijk->sik".parse().unwrap();
    let einsum = EinSum { axes: expr, operating_dt: f32::datum_type(), q_params: None };

    let einsum = model.wire_node("einsum", einsum, &[x, w]).unwrap();
    model.select_output_outlets(&einsum).unwrap();
    model.declutter().unwrap();

    let mut input = Tensor::zero::<f32>(&[5, 8, 2]).unwrap();
    input
        .try_as_plain_mut()
        .unwrap()
        .as_slice_mut::<f32>()
        .unwrap()
        .iter_mut()
        .enumerate()
        .for_each(|(ix, x)| *x = ix as f32);
    proptest_regular_against_pulse(model, 1, input.into_plain_array().unwrap(), 0).unwrap()
}


================================================
FILE: harness/core-proptest-pulse/src/lib.rs
================================================
#![cfg(test)]

#[macro_use]
extern crate log;

use proptest::prelude::*;
use proptest::proptest;
use proptest::test_runner::TestCaseResult;
use proptest::*;
use tract_core::ndarray::arr3;
use tract_core::num_traits::Zero;
use tract_core::ops::array::Pad;
use tract_core::ops::array::PadMode;
use tract_core::ops::array::Slice;
use tract_core::ops::cnn::Conv;
use tract_core::ops::cnn::KernelFormat;
use tract_core::ops::cnn::PaddingSpec;
use tract_core::ops::cnn::PoolSpec;
use tract_core::ops::nn::DataFormat;
use tract_ndarray::prelude::*;
use tract_pulse::internal::*;

mod conv_plus_conv;
mod deconv;
mod delay_plus_downsample;
mod delay_plus_pool;
mod einsum;
mod pad_plus_conv;

#[allow(dead_code)]
fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}

fn proptest_regular_against_pulse(
    model: TypedModel,
    pulse: usize,
    input_array: tract_ndarray::ArrayD<f32>,
    axis: usize,
) -> TestCaseResult {
    setup_test_logger();

    let len = input_array.shape()[axis];
    let model = model.into_decluttered().unwrap();
    // dbg!(&model);
    let s = model.symbols.sym("S");
    let symbols = SymbolValues::default().with(&s, len as i64);

    let concrete = model.clone().concretize_dims(&symbols).unwrap();
    if concrete.nodes.iter().any(|n| n.outputs.iter().any(|o| o.fact.shape.volume().is_zero())) {
        return Err(TestCaseError::reject("too short input"));
    }
    let runnable = concrete.into_runnable().unwrap();

    // dbg!(&runnable);
    let outputs = runnable.run(tvec!(input_array.clone().into_tvalue())).unwrap();
    // dbg!(&outputs);
    debug!("Build pulsing model");
    // dbg!(&model);
    let pulsed = PulsedModel::new(&model, s.clone(), &pulse.to_dim()).unwrap();
    // dbg!(&pulsed);
    let output_fact = pulsed.output_fact(0).unwrap().clone();

    let stream_info = output_fact.stream.as_ref().unwrap();
    prop_assert!(stream_info.dim.eval(&symbols) == outputs[0].shape()[stream_info.axis].to_dim());
    let output_stream_axis = stream_info.axis;
    let delay = stream_info.delay;
    let mut initial_output_shape = output_fact.shape.clone();
    initial_output_shape.set(output_stream_axis, 0.to_dim());
    let initial_output_shape: TVec<usize> =
        initial_output_shape.iter().map(|d| d.to_usize().unwrap()).collect();

    let pulsed_plan = SimplePlan::new(pulsed).unwrap();
    let mut state = SimpleState::new(&pulsed_plan).unwrap();

    let mut got: ArrayD<f32> = ArrayD::zeros(&*initial_output_shape);
    let mut output_len = None;

    debug!("Run pulsing model");
    //dbg!(pulsed_plan.model());
    let mut written = 0;
    loop {
        let to_write_in_chunk = pulse.min(input_array.shape()[axis].saturating_sub(written));
        let mut chunk: ArrayD<f32> = input_array
            .slice_axis(Axis(axis), (written..written + to_write_in_chunk).into())
            .to_owned();
        written += to_write_in_chunk;
        if to_write_in_chunk < pulse {
            let mut filler_shape = input_array.shape().to_vec();
            filler_shape[axis] = pulse - to_write_in_chunk;
            chunk = tract_ndarray::concatenate(
                Axis(axis),
                &[chunk.view(), ArrayD::from_elem(filler_shape, f32::NAN).view()],
            )
            .unwrap();
            state.turn_state.resolved_symbols.set(&s, written as i64);
            output_len = stream_info
                .dim
                .eval(&state.turn_state.resolved_symbols)
                .to_isize()
                .ok()
                .map(|n| n.max(0) as usize);
        }
        let mut outputs = state.run(tvec!(chunk.into_tensor().into_tvalue())).unwrap();
        got = tract_ndarray::concatenate(
            Axis(output_stream_axis),
            &[got.view(), outputs.remove(0).to_plain_array_view::<f32>().unwrap()],
        )
        .unwrap();
        eprintln!("GOT: {got}");
        if let Some(output_len) = output_len {
            if got.shape()[output_stream_axis] >= output_len.max(0) + delay {
                break;
            }
        }
        eprintln!();
    }

    let pulsed_output = got
        .slice_axis(
            Axis(output_stream_axis),
            (stream_info.delay..stream_info.delay + output_len.unwrap().max(0)).into(),
        )
        .to_owned()
        .into_tensor();

    prop_assert!(
        &pulsed_output.close_enough(&outputs[0], true).is_ok(),
        "{:?} == {:?}",
        pulsed_output,
        outputs[0]
    );
    Ok(())
}

proptest! {
    #[test]
    fn proptest_crop(pulse in 1i32..3, input_len in 0i32..10, begin in 0i32..3, end in 0i32..3) {
        let full_len = input_len + begin + end;
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let a = model.add_source("a", f32::fact(&[s])).unwrap();
        let slice = model.wire_node("slice", Slice::new(0, begin as usize, (input_len + begin) as usize), &[a]).unwrap();
        model.select_output_outlets(&slice).unwrap();

        let input = Array1::range(1.0f32, full_len as f32 + 1.0, 1.0);
        proptest_regular_against_pulse(model, pulse as _, input.into_dyn(), 0)?;
    }

    #[test]
    fn proptest_pad(pulse in 1i32..3, input_len in 0i32..10, begin in 0i32..3, end in 0i32..3) {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let a = model.add_source("a", f32::fact(&[s])).unwrap();
        let pad = model.wire_node("pad", Pad::new(vec![(begin as _, end as _)],
        PadMode::Constant(Arc::new(Tensor::from(-1f32)))), &[a]).unwrap();
        model.select_output_outlets(&pad).unwrap();

        let input = Array1::range(1.0f32, input_len as f32 + 1.0, 1.0);
        proptest_regular_against_pulse(model, pulse as _, input.into_dyn(), 0)?;
    }
}

fn vec(len: impl Strategy<Value = usize>) -> impl Strategy<Value = Vec<f32>> {
    len.prop_flat_map(|l| proptest::collection::vec(-5..5, l..=l))
        .prop_map(|v| v.into_iter().map(|f| f as f32).collect())
}

#[test]
fn test_simple_conv() {
    let mut model = TypedModel::default();
    let kernel = rctensor3(&[[[0.5f32, 1.0, -0.1]]]);
    let s = model.symbols.sym("S");
    let a = model.add_source("a", f32::fact(dims!(1, 1, s))).unwrap();
    let kernel = model.add_const("kernel", kernel).unwrap();
    let bias = model.add_const("bias", tensor0(0f32)).unwrap();

    model
        .wire_node(
            "conv",
            Conv {
                pool_spec: PoolSpec {
                    data_format: DataFormat::NCHW,
                    kernel_shape: tvec!(3),
                    padding: PaddingSpec::Valid,
                    dilations: None,
                    strides: None,
                    input_channels: 1,
                    output_channels: 1,
                },
                kernel_fmt: KernelFormat::OIHW,
                group: 1,
                q_params: None,
            },
            &[a, kernel, bias],
        )
        .unwrap();
    model.auto_outputs().unwrap();

    let input = arr3(&[[[1.0f32, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0]]]);
    proptest_regular_against_pulse(model, 4, input.into_dyn(), 2).unwrap();
}

#[test]
fn test_pad_before_1() {
    let mut model = TypedModel::default();
    let s = model.symbols.sym("S");
    let a = model.add_source("a", f32::fact(&[s])).unwrap();
    model
        .wire_node(
            "pad",
            Pad::new(vec![(1, 0)], PadMode::Constant(Arc::new(Tensor::from(-1f32)))),
            &[a],
        )
        .unwrap();
    model.auto_outputs().unwrap();

    let input = arr1(&[1.0]);
    proptest_regular_against_pulse(model, 1, input.into_dyn(), 0).unwrap();
}

#[test]
fn test_pad_before_2() {
    let mut model = TypedModel::default();
    let s = model.symbols.sym("S");
    let a = model.add_source("a", f32::fact(&[s])).unwrap();
    model
        .wire_node(
            "pad",
            Pad::new(vec![(1, 0)], PadMode::Constant(Arc::new(Tensor::from(-1f32)))),
            &[a],
        )
        .unwrap();
    model.auto_outputs().unwrap();

    let input = arr1(&[1.0, 2.0]);
    proptest_regular_against_pulse(model, 2, input.into_dyn(), 0).unwrap();
}


================================================
FILE: harness/core-proptest-pulse/src/pad_plus_conv.rs
================================================
use proptest::proptest;
use proptest::test_runner::TestCaseResult;
use proptest::*;

use super::*;

#[derive(Debug, Clone)]
struct PadPlusConvProblem {
    pad_before: usize,
    pad_after: usize,
    pad_mode: PadMode,
    stride: usize,
    dilation: usize,
    pulse: usize,
    ker: Array3<f32>,
    input: Array3<f32>,
}

impl Arbitrary for PadPlusConvProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_: Self::Parameters) -> BoxedStrategy<PadPlusConvProblem> {
        (1usize..3, vec(1usize..3), 1usize..3, 0usize..15, 0usize..15, 1usize..3, bool::ANY)
            .prop_flat_map(|(stride, ker, dil, pad_before, pad_after, pulse_factor, edge)| {
                let min_input = (ker.len() * dil).max(pulse_factor * stride);
                (
                    Just(stride),
                    Just(ker),
                    Just(dil),
                    Just(pad_before),
                    Just(pad_after),
                    Just(stride * pulse_factor),
                    vec(min_input..3 * min_input),
                    Just(edge),
                )
            })
            .prop_map(|(stride, ker, dilation, pad_before, pad_after, pulse, input, edge)| {
                let pad_mode = if edge && pad_before < pulse {
                    PadMode::Edge
                } else {
                    PadMode::Constant(Tensor::from(9999f32).into())
                };
                let input = Array3::from_shape_vec((1, 1, input.len()), input).unwrap(); // NCHW
                let ker = Array3::from_shape_vec((1, 1, ker.len()), ker).unwrap(); // OIHW
                PadPlusConvProblem {
                    pad_before,
                    pad_after,
                    pad_mode,
                    stride,
                    dilation,
                    pulse,
                    ker,
                    input,
                }
            })
            .boxed()
    }
}

impl PadPlusConvProblem {
    pub fn run(&self) -> TestCaseResult {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let mut wire = model.add_source("a", f32::fact(dims!(1, 1, s))).unwrap();
        if self.pad_before > 0 || self.pad_after > 0 {
            wire = model
                .wire_node(
                    "pad",
                    Pad::new(
                        vec![(0, 0), (0, 0), (self.pad_before, self.pad_after)],
                        self.pad_mode.clone(),
                    ),
                    &[wire],
                )
                .unwrap()[0];
        }
        let kernel = model.add_const("kernel", self.ker.clone()).unwrap();
        let bias = model.add_const("bias", tensor0(0f32)).unwrap();
        let conv = model
            .wire_node(
                "conv",
                Conv {
                    pool_spec: PoolSpec {
                        data_format: DataFormat::NCHW,
                        kernel_shape: self.ker.shape()[2..].into(),
                        padding: PaddingSpec::Valid,
                        dilations: Some(tvec!(self.dilation)),
                        strides: Some(tvec!(self.stride)),
                        input_channels: 1,
                        output_channels: 1,
                    },
                    kernel_fmt: tract_core::ops::cnn::KernelFormat::OIHW,
                    group: 1,
                    q_params: None,
                },
                &[wire, kernel, bias],
            )
            .unwrap();
        model.select_output_outlets(&conv).unwrap();
        proptest_regular_against_pulse(model, self.pulse as _, self.input.clone().into_dyn(), 2)
    }
}

proptest! {
    #[test]
    fn proptest_conv(pb in PadPlusConvProblem::arbitrary()) { pb.run().unwrap() }
}

#[test]
fn conv_1() {
    PadPlusConvProblem {
        pad_before: 0,
        pad_after: 0,
        pad_mode: PadMode::Constant(tensor0(9999f32).into()),
        stride: 1,
        dilation: 1,
        pulse: 1,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32, 0.0]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_2() {
    PadPlusConvProblem {
        pad_before: 0,
        pad_after: 0,
        pad_mode: PadMode::Constant(tensor0(9999f32).into()),
        stride: 2,
        dilation: 2,
        pulse: 2,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32, 0.0]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_3() {
    PadPlusConvProblem {
        pad_before: 0,
        pad_after: 0,
        pad_mode: PadMode::Constant(tensor0(9999f32).into()),
        stride: 2,
        dilation: 1,
        pulse: 2,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32, 0.0, 0.0]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_4() {
    PadPlusConvProblem {
        pad_before: 0,
        pad_after: 0,
        pad_mode: PadMode::Constant(tensor0(9999f32).into()),
        stride: 2,
        dilation: 2,
        pulse: 2,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32, 0.0, 0.0]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_5() {
    PadPlusConvProblem {
        pad_before: 2,
        pad_after: 0,
        pad_mode: PadMode::Constant(tensor0(9999f32).into()),
        stride: 2,
        dilation: 1,
        pulse: 2,
        ker: arr3(&[[[0.0f32, 1.0]]]),
        input: arr3(&[[[1.0f32, 0.0]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_6() {
    PadPlusConvProblem {
        pad_before: 0,
        pad_after: 0,
        pad_mode: PadMode::Constant(tensor0(9999f32).into()),
        stride: 2,
        dilation: 1,
        pulse: 2,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32, 0.0, 0.0]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_7() {
    PadPlusConvProblem {
        pad_before: 0,
        pad_after: 1,
        pad_mode: PadMode::Edge,
        stride: 1,
        dilation: 1,
        pulse: 1,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_8() {
    PadPlusConvProblem {
        pad_before: 1,
        pad_after: 0,
        pad_mode: PadMode::Edge,
        stride: 2,
        dilation: 2,
        pulse: 2,
        ker: arr3(&[[[0.0f32]]]),
        input: arr3(&[[[0.0f32, 0.0f32]]]),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_kaldi_librispeech() {
    PadPlusConvProblem {
        pad_before: 5,
        pad_after: 15,
        pad_mode: PadMode::Edge,
        stride: 3,
        dilation: 1,
        pulse: 9,
        ker: arr3(&[[[1f32, 0f32, 0f32, 0f32, 0f32]]]),
        input: Array3::from_shape_vec((1, 1, 10), (1..=10).map(|i| i as f32).collect()).unwrap(),
    }
    .run()
    .unwrap()
}

#[test]
fn conv_9() {
    PadPlusConvProblem {
        pad_before: 13,
        pad_after: 9,
        pad_mode: PadMode::Constant(rctensor0(9999f32)),
        stride: 2,
        dilation: 2,
        pulse: 2,
        ker: arr3(&[[[0.0f32, 0.0]]]),
        input: arr3(&[[[0.0f32, 0.0, 0.0, 0.0]]]),
    }
    .run()
    .unwrap()
}


================================================
FILE: harness/nemotron-speech-streaming-en-0.6b/ci.sh
================================================
#!/bin/sh

set -ex

ROOT=$(realpath $(dirname $(realpath $0))/../..)
. $ROOT/.travis/ci-system-setup.sh

MODEL=nvidia--nemotron-speech-streaming-en-0.6b-f32f32
S3DIR=asr/613/$MODEL

for rt in $TRACT_RUNTIMES
do
	gpu_assert=""
	case "$rt" in
		--cuda) gpu_assert="--assert-op-only Cuda*,Gpu*,DeviceSync*,Const,Source,STFT,Pad,IsNan,Add,Range,Cast,Eq,Div,Sub,Scan,Gather";;
		--metal) gpu_assert="--assert-op-only Metal*,Gpu*,DeviceSync*,Const,Source,STFT,Pad,IsNan,Add,Range,Cast,Eq,Div,Sub,Scan,Gather,Reduce*";;
	esac

	for m in preprocessor encoder decoder joint
	do
		# Encoder uses a patched model with upper bound assertion on AUDIO_SIGNAL__TIME
		if [ "$m" = "encoder" ]; then
			nnef_file=$MODEL.$m.p1.nnef.tgz
		else
			nnef_file=$MODEL.$m.nnef.tgz
		fi
		$CACHE_FILE \
			$S3DIR/$nnef_file \
			$S3DIR/$MODEL.$m.io.npz

		$TRACT_RUN $MODELS/$S3DIR/$nnef_file $rt --nnef-tract-transformers -t transformers_detect_all run \
			--input-from-bundle $MODELS/$S3DIR/$MODEL.$m.io.npz --assert-output-bundle $MODELS/$S3DIR/$MODEL.$m.io.npz \
			--approx very $gpu_assert
	done
done

model_prefix=$MODELS/$S3DIR/$MODEL

# Check that the patch transform eliminates all Iff nodes,
# and that select_outputs can reduce the model to a single output
$TRACT_RUN $model_prefix.preprocessor.nnef.tgz \
	-t 'concretize_symbols(values: {"BATCH": 1})' \
	-t 'patch(body: "length = tract_core_shape_of(input_signal)[1];")' \
	-t 'select_outputs(outputs: ["processed_signal"])' \
	dump -q \
	--assert-op-count Iff 0

# Check that the preprocessor can be pulsified
$TRACT_RUN $model_prefix.preprocessor.nnef.tgz \
	-t 'concretize_symbols(values: {"BATCH": 1})' \
	-t 'patch(body: "length = tract_core_shape_of(input_signal)[1];")' \
	-t 'select_outputs(outputs: ["processed_signal"])' \
	-t 'pulse(symbol: Some("INPUT_SIGNAL__TIME"), pulse: "4800")' \
	dump -q


================================================
FILE: harness/nnef-inceptionv3/Cargo.toml
================================================
[package]
name = "nnef-inceptionv3"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
flate2.workspace = true
image.workspace = true
tract-core.workspace = true
tract-nnef.workspace = true

[dev-dependencies]
dinghy-test.workspace = true
env_logger.workspace = true
log.workspace = true


================================================
FILE: harness/nnef-inceptionv3/download.sh
================================================
#!/bin/sh

MY_DIR=`dirname $0`

$MY_DIR/../../.travis/cache_file.sh inception_v3.tfpb.nnef.tgz imagenet_slim_labels.txt


================================================
FILE: harness/nnef-inceptionv3/src/lib.rs
================================================
use std::{fs, io, path};

use tract_core::prelude::*;

fn download() {
    use std::sync::Once;
    static START: Once = Once::new();

    START.call_once(|| do_download().unwrap());
}

fn do_download() -> TractResult<()> {
    let run = ::std::process::Command::new("./download.sh").status().unwrap();
    if !run.success() {
        tract_core::internal::bail!("Failed to download inception model files")
    }
    Ok(())
}

pub fn load_labels() -> Vec<String> {
    use std::io::BufRead;
    io::BufReader::new(fs::File::open(imagenet_slim_labels()).unwrap())
        .lines()
        .collect::<::std::io::Result<Vec<String>>>()
        .unwrap()
}

fn inception_v3_2016_08_28() -> path::PathBuf {
    ::std::env::var("CACHEDIR").ok().unwrap_or_else(|| "../../.cached".to_string()).into()
}

pub fn inception_v3_tgz() -> path::PathBuf {
    download();
    inception_v3_2016_08_28().join("inception_v3.tfpb.nnef.tgz")
}

pub fn imagenet_slim_labels() -> path::PathBuf {
    download();
    inception_v3_2016_08_28().join("imagenet_slim_labels.txt")
}

pub fn load_image<P: AsRef<path::Path>>(p: P) -> Tensor {
    let image = image::open(&p).unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 299, 299, ::image::imageops::FilterType::Triangle);
    tract_ndarray::Array4::from_shape_fn((1, 3, 299, 299), |(_, c, y, x)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into_dyn()
    .into()
}

#[cfg(test)]
mod tests {
    use super::*;
    use dinghy_test::test_project_path;
    use std::path;

    const HOPPER: &str = "grace_hopper.jpg";
    pub fn hopper() -> path::PathBuf {
        test_project_path().join(HOPPER)
    }

    #[allow(dead_code)]
    pub fn setup_test_logger() {
        env_logger::Builder::from_default_env().filter_level(log::LevelFilter::Trace).init();
    }

    #[test]
    fn grace_hopper_is_a_military_uniform() -> TractResult<()> {
        download();
        // setup_test_logger();
        let nnef = tract_nnef::nnef();
        let mut tar = flate2::read::GzDecoder::new(fs::File::open(inception_v3_tgz())?);
        let model = nnef.model_for_read(&mut tar)?.into_optimized()?.into_runnable()?;
        let input = load_image(hopper());
        let outputs = model.run(tvec![input.into()]).unwrap();
        let labels = load_labels();
        let label_id = outputs[0]
            .try_as_plain()
            .unwrap()
            .to_array_view::<f32>()
            .unwrap()
            .iter()
            .enumerate()
            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(0u32.cmp(&1)))
            .unwrap()
            .0;
        let label = &labels[label_id];
        assert_eq!(label, "military uniform");
        Ok(())
    }
}


================================================
FILE: harness/nnef-test-cases/.gitignore
================================================
found


================================================
FILE: harness/nnef-test-cases/conv-bias/expected
================================================
version 1.0;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_ser_version", "0.18.3-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [1, 1, 3]);
  kernel1 = [[[0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0]]];
  output_bias_rm_0 = [1.0, 1.0];
  output_conv = conv(input, kernel1, output_bias_rm_0, dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  output = output_conv;
}


================================================
FILE: harness/nnef-test-cases/conv-bias/graph.nnef
================================================
version 1.0;

graph check_bias_loading(input) -> (output)
{
    input = external<scalar>(shape = [1, 1, 3]);
    kernel1 = [[[0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0]]];
    bias1 = [[[1.0, 1.0]]];
    output = conv(input, kernel1, bias1, padding = [(0, 0)], border = 'constant', stride = [1], dilation = [1]);
}


================================================
FILE: harness/nnef-test-cases/conv-bias/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --no-nnef-tract-core dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.18.3-pre/" found

diff -u expected found


================================================
FILE: harness/nnef-test-cases/conv-q40/conv2d/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_base_kernel1/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_base_kernel3/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_base_kernel9/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_dilation2/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_dilation4/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_dilation8/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_groups2/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_groups4/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_insize128/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_insize64/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_stride2/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-q40/conv_stride3/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core model.nnef.tgz -O run --approx very --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/conv-with-batch/graph.nnef
================================================
version 1.0;

graph net_2022_04_13T13_03_40(input_0) -> (output_0)
{
    input_0 = external<scalar>(shape = [2, 1, 3]);
    v3_weight = [[[0.0, 0.0, 0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]];
    v2 = linear(input_0, v3_weight);
    output_0 = add(v2, input_0);
}


================================================
FILE: harness/nnef-test-cases/conv-with-batch/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN -O . compare --allow-random-input --stage declutter


================================================
FILE: harness/nnef-test-cases/debox/debox_base/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 1, 2, 2], datum_type = 'f32');
    output_0 = debox(input_0, size = [1, 1, 2, 2], stride = [1, 1, 2, 2], padding = [(0, 0), (0, 0), (0, 0), (0, 0)]);
}


================================================
FILE: harness/nnef-test-cases/debox/debox_base/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core . -O --input-facts-from-bundle ./io.npz run --input-from-bundle io.npz --assert-output-bundle io.npz --approx approximate


================================================
FILE: harness/nnef-test-cases/debox/debox_high_dim/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [2, 3, 4, 5], datum_type = 'f32');
    output_0 = debox(input_0, size = [1, 1, 2, 2], stride = [1, 1, 2, 2], padding = [(0, 0), (0, 0), (0, 0), (0, 0)]);
}


================================================
FILE: harness/nnef-test-cases/debox/debox_high_dim/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --nnef-tract-core . -O --input-facts-from-bundle ./io.npz run --input-from-bundle io.npz --assert-output-bundle io.npz --approx approximate


================================================
FILE: harness/nnef-test-cases/dyn_slice/graph.nnef
================================================
version 1.0;

extension tract_pulse_streaming_symbol;
extension tract_registry tract_core;
extension KHR_enable_fragment_definitions;
extension KHR_enable_operator_expressions;


fragment trunc( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = select(x < 0, ceil(x), floor(x));
}


graph net_2024_06_26T15_36_33(input_0) -> (output_0)
{
    input_0 = external<scalar>(shape = [3, 1, S]);
    v1_shape = tract_core_shape_of(input_0);
    v0_sliced = slice(v1_shape, axes = [0], begin = [2], end = [3], stride = [1]);
    v1_shape_2 = squeeze(v0_sliced, axes = [0]);
    v4_div = div(v1_shape_2, 2);
    v4 = trunc(v4_div);
    output_0 = slice(input_0, axes = [2], begin = [0], end = [v4], stride = [1]);
}


================================================
FILE: harness/nnef-test-cases/dyn_slice/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core dump -q


================================================
FILE: harness/nnef-test-cases/fixed_roll/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 2, 3], datum_type = 'f32');
    output_0_roll_l0_p1 = slice(input_0, axes = [1], begin = [1], end = [2], stride = [1]);
    output_0_roll_l0_p2 = slice(input_0, axes = [1], begin = [0], end = [1], stride = [1]);
    output_0_roll_0 = concat([output_0_roll_l0_p1, output_0_roll_l0_p2], axis = 1);
    output_0_roll_l1_p1 = slice(output_0_roll_0, axes = [2], begin = [2], end = [3], stride = [1]);
    output_0_roll_l1_p2 = slice(output_0_roll_0, axes = [2], begin = [0], end = [2], stride = [1]);
    output_0 = concat([output_0_roll_l1_p1, output_0_roll_l1_p2], axis = 2);
}


================================================
FILE: harness/nnef-test-cases/fixed_roll/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# Check result is as expected
# bug appear only if model optimized
$TRACT_RUN --nnef-tract-core . -O run --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/memory-arena/expected.json
================================================
{"memory_size":"3072*P+3584*S+max(2560,640*S+640*P)+max(1280*S+1280*P,2560*S)+max(10240,1536*S+1536*P,9728*S)+max(10240,2560*S+2560*P,9728*S)+max(20480,1536*S+1536*P,24*(S)*(S+P),32*(S)*(S+P),40*(S)*(S+P),19456*S)+max(40960,1280*S+1280*P,24*(S)*(S+P),32*(S)*(S+P),40*(S)*(S+P),38912*S)","size_by_partition":["max(40960,1280*S+1280*P,24*(S)*(S+P),32*(S)*(S+P),40*(S)*(S+P),38912*S)","max(20480,1536*S+1536*P,24*(S)*(S+P),32*(S)*(S+P),40*(S)*(S+P),19456*S)","max(10240,2560*S+2560*P,9728*S)","max(10240,1536*S+1536*P,9728*S)","2560*S+2560*P","max(1280*S+1280*P,2560*S)","max(2560,640*S+640*P)","256*S","256*S+256*P","256*S","256*S+256*P"],"pp":{"16":{"arena_memory_size":1353728,"peak_memory_size":991232,"peak_memory_usage":0.7322239},"32":{"arena_memory_size":2707456,"peak_memory_size":1982464,"peak_memory_usage":0.7322239},"48":{"arena_memory_size":4061184,"peak_memory_size":2973696,"peak_memory_usage":0.7322239},"64":{"arena_memory_size":5414912,"peak_memory_size":3964928,"peak_memory_usage":0.7322239},"80":{"arena_memory_size":6768640,"peak_memory_size":4956160,"peak_memory_usage":0.7322239},"96":{"arena_memory_size":8122368,"peak_memory_size":5947392,"peak_memory_usage":0.7322239},"112":{"arena_memory_size":9476096,"peak_memory_size":6938624,"peak_memory_usage":0.7322239},"128":{"arena_memory_size":10829824,"peak_memory_size":7929856,"peak_memory_usage":0.7322239},"144":{"arena_memory_size":12183552,"peak_memory_size":8921088,"peak_memory_usage":0.7322239},"160":{"arena_memory_size":13537280,"peak_memory_size":9912320,"peak_memory_usage":0.7322239},"176":{"arena_memory_size":14891008,"peak_memory_size":10903552,"peak_memory_usage":0.7322239},"192":{"arena_memory_size":16244736,"peak_memory_size":11894784,"peak_memory_usage":0.7322239},"208":{"arena_memory_size":17598464,"peak_memory_size":12886016,"peak_memory_usage":0.7322239},"224":{"arena_memory_size":18952192,"peak_memory_size":13877248,"peak_memory_usage":0.7322239},"240":{"arena_memory_size":20305920,"peak_memory_size":14868480,"peak_memory_usage":0.7322239},"256":{"arena_memory_size":21659648,"peak_memory_size":15859712,"peak_memory_usage":0.7322239},"272":{"arena_memory_size":23013376,"peak_memory_size":16850944,"peak_memory_usage":0.7322239},"288":{"arena_memory_size":24367104,"peak_memory_size":17842176,"peak_memory_usage":0.7322239},"304":{"arena_memory_size":25720832,"peak_memory_size":18833408,"peak_memory_usage":0.7322239},"320":{"arena_memory_size":27074560,"peak_memory_size":19824640,"peak_memory_usage":0.7322239},"336":{"arena_memory_size":28428288,"peak_memory_size":20815872,"peak_memory_usage":0.7322239},"352":{"arena_memory_size":29782016,"peak_memory_size":21807104,"peak_memory_usage":0.7322239},"368":{"arena_memory_size":31135744,"peak_memory_size":22798336,"peak_memory_usage":0.7322239},"384":{"arena_memory_size":32489472,"peak_memory_size":23789568,"peak_memory_usage":0.7322239},"400":{"arena_memory_size":33843200,"peak_memory_size":24780800,"peak_memory_usage":0.7322239},"416":{"arena_memory_size":35196928,"peak_memory_size":25772032,"peak_memory_usage":0.7322239},"432":{"arena_memory_size":36550656,"peak_memory_size":26763264,"peak_memory_usage":0.7322239},"448":{"arena_memory_size":37904384,"peak_memory_size":27754496,"peak_memory_usage":0.7322239},"464":{"arena_memory_size":39258112,"peak_memory_size":28745728,"peak_memory_usage":0.7322239},"480":{"arena_memory_size":40611840,"peak_memory_size":29736960,"peak_memory_usage":0.7322239},"496":{"arena_memory_size":42156032,"peak_memory_size":30728192,"peak_memory_usage":0.7289157},"512":{"arena_memory_size":43843584,"peak_memory_size":31719424,"peak_memory_usage":0.7234679},"528":{"arena_memory_size":45551616,"peak_memory_size":32710656,"peak_memory_usage":0.7181009},"544":{"arena_memory_size":47280128,"peak_memory_size":33701888,"peak_memory_usage":0.71281296},"560":{"arena_memory_size":49029120,"peak_memory_size":34693120,"peak_memory_usage":0.7076023},"576":{"arena_memory_size":50798592,"peak_memory_size":35684352,"peak_memory_usage":0.7024673},"592":{"arena_memory_size":52588544,"peak_memory_size":36675584,"peak_memory_usage":0.69740635},"608":{"arena_memory_size":54398976,"peak_memory_size":37666816,"peak_memory_usage":0.69241774},"624":{"arena_memory_size":56229888,"peak_memory_size":38658048,"peak_memory_usage":0.6875},"640":{"arena_memory_size":58081280,"peak_memory_size":39649280,"peak_memory_usage":0.68265164},"656":{"arena_memory_size":59953152,"peak_memory_size":40640512,"peak_memory_usage":0.67787117},"672":{"arena_memory_size":61845504,"peak_memory_size":41631744,"peak_memory_usage":0.67315716},"688":{"arena_memory_size":63758336,"peak_memory_size":42622976,"peak_memory_usage":0.6685083},"704":{"arena_memory_size":65691648,"peak_memory_size":43974656,"peak_memory_usage":0.66941017},"720":{"arena_memory_size":67645440,"peak_memory_size":45895680,"peak_memory_usage":0.6784741},"736":{"arena_memory_size":69619712,"peak_memory_size":47857664,"peak_memory_usage":0.6874154},"752":{"arena_memory_size":71614464,"peak_memory_size":49860608,"peak_memory_usage":0.69623655},"768":{"arena_memory_size":73629696,"peak_memory_size":51904512,"peak_memory_usage":0.7049399},"784":{"arena_memory_size":75665408,"peak_memory_size":53989376,"peak_memory_usage":0.71352786},"800":{"arena_memory_size":77721600,"peak_memory_size":56115200,"peak_memory_usage":0.7220026},"816":{"arena_memory_size":79798272,"peak_memory_size":58281984,"peak_memory_usage":0.73036647},"832":{"arena_memory_size":81895424,"peak_memory_size":60489728,"peak_memory_usage":0.7386216},"848":{"arena_memory_size":84013056,"peak_memory_size":62738432,"peak_memory_usage":0.74677},"864":{"arena_memory_size":86151168,"peak_memory_size":65028096,"peak_memory_usage":0.75481385},"880":{"arena_memory_size":88309760,"peak_memory_size":67358720,"peak_memory_usage":0.7627551},"896":{"arena_memory_size":90488832,"peak_memory_size":69730304,"peak_memory_usage":0.77059567},"912":{"arena_memory_size":92688384,"peak_memory_size":72142848,"peak_memory_usage":0.77833754},"928":{"arena_memory_size":94908416,"peak_memory_size":74596352,"peak_memory_usage":0.7859825},"944":{"arena_memory_size":97148928,"peak_memory_size":77090816,"peak_memory_usage":0.7935323},"960":{"arena_memory_size":99409920,"peak_memory_size":79626240,"peak_memory_usage":0.80098885},"976":{"arena_memory_size":101816320,"peak_memory_size":82202624,"peak_memory_usage":0.80736196},"992":{"arena_memory_size":104755200,"peak_memory_size":84819968,"peak_memory_usage":0.809697},"1008":{"arena_memory_size":107735040,"peak_memory_size":87478272,"peak_memory_usage":0.8119761},"1024":{"arena_memory_size":110755840,"peak_memory_size":90177536,"peak_memory_usage":0.8142012},"1040":{"arena_memory_size":113817600,"peak_memory_size":92917760,"peak_memory_usage":0.81637424},"1056":{"arena_memory_size":116920320,"peak_memory_size":95698944,"peak_memory_usage":0.8184971},"1072":{"arena_memory_size":120064000,"peak_memory_size":98521088,"peak_memory_usage":0.8205714},"1088":{"arena_memory_size":123248640,"peak_memory_size":101384192,"peak_memory_usage":0.8225989},"1104":{"arena_memory_size":126474240,"peak_memory_size":104288256,"peak_memory_usage":0.824581},"1120":{"arena_memory_size":129740800,"peak_memory_size":107233280,"peak_memory_usage":0.8265193},"1136":{"arena_memory_size":133048320,"peak_memory_size":110219264,"peak_memory_usage":0.8284153},"1152":{"arena_memory_size":136396800,"peak_memory_size":113246208,"peak_memory_usage":0.8302703},"1168":{"arena_memory_size":139786240,"peak_memory_size":116314112,"peak_memory_usage":0.83208555},"1184":{"arena_memory_size":143216640,"peak_memory_size":119422976,"peak_memory_usage":0.8338624},"1200":{"arena_memory_size":146688000,"peak_memory_size":122572800,"peak_memory_usage":0.8356021},"1216":{"arena_memory_size":150200320,"peak_memory_size":125763584,"peak_memory_usage":0.8373057},"1232":{"arena_memory_size":153753600,"peak_memory_size":128995328,"peak_memory_usage":0.83897436},"1248":{"arena_memory_size":157347840,"peak_memory_size":132268032,"peak_memory_usage":0.84060913},"1264":{"arena_memory_size":160983040,"peak_memory_size":135581696,"peak_memory_usage":0.84221107},"1280":{"arena_memory_size":164659200,"peak_memory_size":138936320,"peak_memory_usage":0.8437811},"1296":{"arena_memory_size":168376320,"peak_memory_size":142331904,"peak_memory_usage":0.8453202},"1312":{"arena_memory_size":172134400,"peak_memory_size":145768448,"peak_memory_usage":0.8468293},"1328":{"arena_memory_size":175933440,"peak_memory_size":149245952,"peak_memory_usage":0.84830916},"1344":{"arena_memory_size":179773440,"peak_memory_size":152764416,"peak_memory_usage":0.8497608},"1360":{"arena_memory_size":183654400,"peak_memory_size":156323840,"peak_memory_usage":0.85118484},"1376":{"arena_memory_size":187576320,"peak_memory_size":159924224,"peak_memory_usage":0.85258216},"1392":{"arena_memory_size":191539200,"peak_memory_size":163565568,"peak_memory_usage":0.8539535},"1408":{"arena_memory_size":195543040,"peak_memory_size":167247872,"peak_memory_usage":0.85529953},"1424":{"arena_memory_size":199587840,"peak_memory_size":170971136,"peak_memory_usage":0.856621},"1440":{"arena_memory_size":203673600,"peak_memory_size":174735360,"peak_memory_usage":0.85791856},"1456":{"arena_memory_size":207800320,"peak_memory_size":178540544,"peak_memory_usage":0.85919285},"1472":{"arena_memory_size":211968000,"peak_memory_size":182386688,"peak_memory_usage":0.8604444},"1488":{"arena_memory_size":216176640,"peak_memory_size":186273792,"peak_memory_usage":0.861674},"1504":{"arena_memory_size":220426240,"peak_memory_size":190201856,"peak_memory_usage":0.8628821},"1520":{"arena_memory_size":224716800,"peak_memory_size":194170880,"peak_memory_usage":0.8640693},"1536":{"arena_memory_size":229048320,"peak_memory_size":198180864,"peak_memory_usage":0.86523604},"1552":{"arena_memory_size":233420800,"peak_memory_size":202231808,"peak_memory_usage":0.86638296},"1568":{"arena_memory_size":237834240,"peak_memory_size":206323712,"peak_memory_usage":0.86751056},"1584":{"arena_memory_size":242288640,"peak_memory_size":210456576,"peak_memory_usage":0.86861926},"1600":{"arena_memory_size":246784000,"peak_memory_size":214630400,"peak_memory_usage":0.86970955},"1616":{"arena_memory_size":251320320,"peak_memory_size":218845184,"peak_memory_usage":0.8707819},"1632":{"arena_memory_size":255897600,"peak_memory_size":223100928,"peak_memory_usage":0.8718367},"1648":{"arena_memory_size":260515840,"peak_memory_size":227397632,"peak_memory_usage":0.8728745},"1664":{"arena_memory_size":265175040,"peak_memory_size":231735296,"peak_memory_usage":0.8738956},"1680":{"arena_memory_size":269875200,"peak_memory_size":236113920,"peak_memory_usage":0.8749004},"1696":{"arena_memory_size":274616320,"peak_memory_size":240533504,"peak_memory_usage":0.8758893},"1712":{"arena_memory_size":279398400,"peak_memory_size":244994048,"peak_memory_usage":0.87686276},"1728":{"arena_memory_size":284221440,"peak_memory_size":249495552,"peak_memory_usage":0.877821},"1744":{"arena_memory_size":289085440,"peak_memory_size":254038016,"peak_memory_usage":0.87876445},"1760":{"arena_memory_size":293990400,"peak_memory_size":258621440,"peak_memory_usage":0.8796935},"1776":{"arena_memory_size":298936320,"peak_memory_size":263245824,"peak_memory_usage":0.8806084},"1792":{"arena_memory_size":303923200,"peak_memory_size":267911168,"peak_memory_usage":0.8815094},"1808":{"arena_memory_size":308951040,"peak_memory_size":272617472,"peak_memory_usage":0.882397},"1824":{"arena_memory_size":314019840,"peak_memory_size":277364736,"peak_memory_usage":0.8832714},"1840":{"arena_memory_size":319129600,"peak_memory_size":282152960,"peak_memory_usage":0.88413286},"1856":{"arena_memory_size":324280320,"peak_memory_size":286982144,"peak_memory_usage":0.8849817},"1872":{"arena_memory_size":329472000,"peak_memory_size":291852288,"peak_memory_usage":0.8858182},"1888":{"arena_memory_size":334704640,"peak_memory_size":296763392,"peak_memory_usage":0.8866426},"1904":{"arena_memory_size":339978240,"peak_memory_size":301715456,"peak_memory_usage":0.8874552},"1920":{"arena_memory_size":345292800,"peak_memory_size":306708480,"peak_memory_usage":0.88825625},"1936":{"arena_memory_size":350648320,"peak_memory_size":311742464,"peak_memory_usage":0.88904595},"1952":{"arena_memory_size":356044800,"peak_memory_size":316817408,"peak_memory_usage":0.88982457},"1968":{"arena_memory_size":361482240,"peak_memory_size":321933312,"peak_memory_usage":0.89059234},"1984":{"arena_memory_size":366960640,"peak_memory_size":327090176,"peak_memory_usage":0.8913495},"2000":{"arena_memory_size":372480000,"peak_memory_size":332288000,"peak_memory_usage":0.8920962},"2016":{"arena_memory_size":378040320,"peak_memory_size":337526784,"peak_memory_usage":0.89283276},"2032":{"arena_memory_size":383641600,"peak_memory_size":342806528,"peak_memory_usage":0.89355934},"2048":{"arena_memory_size":389283840,"peak_memory_size":348127232,"peak_memory_usage":0.8942761}},"tg":{"0":{"arena_memory_size":90624,"peak_memory_size":64000,"peak_memory_usage":0.70621467},"16":{"arena_memory_size":222080,"peak_memory_size":123136,"peak_memory_usage":0.55446684},"32":{"arena_memory_size":393344,"peak_memory_size":233728,"peak_memory_usage":0.59420764},"48":{"arena_memory_size":583808,"peak_memory_size":344320,"peak_memory_usage":0.58978295},"64":{"arena_memory_size":774272,"peak_memory_size":454912,"peak_memory_usage":0.58753514},"80":{"arena_memory_size":964736,"peak_memory_size":565504,"peak_memory_usage":0.58617485},"96":{"arena_memory_size":1155200,"peak_memory_size":676096,"peak_memory_usage":0.58526313},"112":{"arena_memory_size":1345664,"peak_memory_size":786688,"peak_memory_usage":0.5846095},"128":{"arena_memory_size":1536128,"peak_memory_size":897280,"peak_memory_usage":0.584118},"144":{"arena_memory_size":1726592,"peak_memory_size":1007872,"peak_memory_usage":0.58373487},"160":{"arena_memory_size":1917056,"peak_memory_size":1118464,"peak_memory_usage":0.5834279},"176":{"arena_memory_size":2107520,"peak_memory_size":1229056,"peak_memory_usage":0.58317643},"192":{"arena_memory_size":2297984,"peak_memory_size":1339648,"peak_memory_usage":0.5829666},"208":{"arena_memory_size":2488448,"peak_memory_size":1450240,"peak_memory_usage":0.58278894},"224":{"arena_memory_size":2678912,"peak_memory_size":1560832,"peak_memory_usage":0.58263654},"240":{"arena_memory_size":2869376,"peak_memory_size":1671424,"peak_memory_usage":0.58250433},"256":{"arena_memory_size":3059840,"peak_memory_size":1782016,"peak_memory_usage":0.58238864},"272":{"arena_memory_size":3250304,"peak_memory_size":1892608,"peak_memory_usage":0.5822865},"288":{"arena_memory_size":3440768,"peak_memory_size":2003200,"peak_memory_usage":0.5821956},"304":{"arena_memory_size":3631232,"peak_memory_size":2113792,"peak_memory_usage":0.5821143},"320":{"arena_memory_size":3821696,"peak_memory_size":2224384,"peak_memory_usage":0.5820411},"336":{"arena_memory_size":4012160,"peak_memory_size":2334976,"peak_memory_usage":0.5819748},"352":{"arena_memory_size":4202624,"peak_memory_size":2445568,"peak_memory_usage":0.58191454},"368":{"arena_memory_size":4393088,"peak_memory_size":2556160,"peak_memory_usage":0.5818595},"384":{"arena_memory_size":4583552,"peak_memory_size":2666752,"peak_memory_usage":0.58180904},"400":{"arena_memory_size":4774016,"peak_memory_size":2777344,"peak_memory_usage":0.5817626},"416":{"arena_memory_size":4964480,"peak_memory_size":2887936,"peak_memory_usage":0.58171976},"432":{"arena_memory_size":5154944,"peak_memory_size":2998528,"peak_memory_usage":0.58168006},"448":{"arena_memory_size":5345408,"peak_memory_size":3109120,"peak_memory_usage":0.58164316},"464":{"arena_memory_size":5535872,"peak_memory_size":3219712,"peak_memory_usage":0.58160883},"480":{"arena_memory_size":5726336,"peak_memory_size":3330304,"peak_memory_usage":0.58157676},"496":{"arena_memory_size":5916800,"peak_memory_size":3440896,"peak_memory_usage":0.5815468},"512":{"arena_memory_size":6107264,"peak_memory_size":3551488,"peak_memory_usage":0.58151865},"528":{"arena_memory_size":6297728,"peak_memory_size":3662080,"peak_memory_usage":0.58149225},"544":{"arena_memory_size":6488192,"peak_memory_size":3772672,"peak_memory_usage":0.5814674},"560":{"arena_memory_size":6678656,"peak_memory_size":3883264,"peak_memory_usage":0.5814439},"576":{"arena_memory_size":6869120,"peak_memory_size":3993856,"peak_memory_usage":0.5814218},"592":{"arena_memory_size":7059584,"peak_memory_size":4104448,"peak_memory_usage":0.5814008},"608":{"arena_memory_size":7250048,"peak_memory_size":4215040,"peak_memory_usage":0.58138096},"624":{"arena_memory_size":7440512,"peak_memory_size":4325632,"peak_memory_usage":0.5813621},"640":{"arena_memory_size":7630976,"peak_memory_size":4436224,"peak_memory_usage":0.58134425},"656":{"arena_memory_size":7821440,"peak_memory_size":4546816,"peak_memory_usage":0.5813272},"672":{"arena_memory_size":8011904,"peak_memory_size":4657408,"peak_memory_usage":0.581311},"688":{"arena_memory_size":8202368,"peak_memory_size":4768000,"peak_memory_usage":0.58129555},"704":{"arena_memory_size":8392832,"peak_memory_size":4878592,"peak_memory_usage":0.58128077},"720":{"arena_memory_size":8583296,"peak_memory_size":4989184,"peak_memory_usage":0.5812667},"736":{"arena_memory_size":8773760,"peak_memory_size":5099776,"peak_memory_usage":0.5812532},"752":{"arena_memory_size":8964224,"peak_memory_size":5210368,"peak_memory_usage":0.5812403},"768":{"arena_memory_size":9154688,"peak_memory_size":5320960,"peak_memory_usage":0.5812279},"784":{"arena_memory_size":9345152,"peak_memory_size":5431552,"peak_memory_usage":0.58121604},"800":{"arena_memory_size":9535616,"peak_memory_size":5542144,"peak_memory_usage":0.5812046},"816":{"arena_memory_size":9726080,"peak_memory_size":5652736,"peak_memory_usage":0.5811937},"832":{"arena_memory_size":9916544,"peak_memory_size":5763328,"peak_memory_usage":0.58118314},"848":{"arena_memory_size":10107008,"peak_memory_size":5873920,"peak_memory_usage":0.581173},"864":{"arena_memory_size":10297472,"peak_memory_size":5984512,"peak_memory_usage":0.5811632},"880":{"arena_memory_size":10487936,"peak_memory_size":6095104,"peak_memory_usage":0.5811538},"896":{"arena_memory_size":10678400,"peak_memory_size":6205696,"peak_memory_usage":0.58114475},"912":{"arena_memory_size":10868864,"peak_memory_size":6316288,"peak_memory_usage":0.581136},"928":{"arena_memory_size":11059328,"peak_memory_size":6426880,"peak_memory_usage":0.5811275},"944":{"arena_memory_size":11249792,"peak_memory_size":6537472,"peak_memory_usage":0.58111936},"960":{"arena_memory_size":11440256,"peak_memory_size":6648064,"peak_memory_usage":0.5811115},"976":{"arena_memory_size":11630720,"peak_memory_size":6758656,"peak_memory_usage":0.58110386},"992":{"arena_memory_size":11821184,"peak_memory_size":6869248,"peak_memory_usage":0.5810965},"1008":{"arena_memory_size":12011648,"peak_memory_size":6979840,"peak_memory_usage":0.5810893},"1024":{"arena_memory_size":12202112,"peak_memory_size":7090432,"peak_memory_usage":0.58108234},"1040":{"arena_memory_size":12392576,"peak_memory_size":7201024,"peak_memory_usage":0.5810756},"1056":{"arena_memory_size":12583040,"peak_memory_size":7311616,"peak_memory_usage":0.5810691},"1072":{"arena_memory_size":12773504,"peak_memory_size":7422208,"peak_memory_usage":0.5810628},"1088":{"arena_memory_size":12963968,"peak_memory_size":7532800,"peak_memory_usage":0.58105665},"1104":{"arena_memory_size":13154432,"peak_memory_size":7643392,"peak_memory_usage":0.5810507},"1120":{"arena_memory_size":13344896,"peak_memory_size":7753984,"peak_memory_usage":0.5810449},"1136":{"arena_memory_size":13535360,"peak_memory_size":7864576,"peak_memory_usage":0.5810393},"1152":{"arena_memory_size":13725824,"peak_memory_size":7975168,"peak_memory_usage":0.5810338},"1168":{"arena_memory_size":13916288,"peak_memory_size":8085760,"peak_memory_usage":0.5810285},"1184":{"arena_memory_size":14106752,"peak_memory_size":8196352,"peak_memory_usage":0.58102334},"1200":{"arena_memory_size":14297216,"peak_memory_size":8306944,"peak_memory_usage":0.58101827},"1216":{"arena_memory_size":14487680,"peak_memory_size":8417536,"peak_memory_usage":0.5810134},"1232":{"arena_memory_size":14678144,"peak_memory_size":8528128,"peak_memory_usage":0.5810086},"1248":{"arena_memory_size":14868608,"peak_memory_size":8638720,"peak_memory_usage":0.58100396},"1264":{"arena_memory_size":15059072,"peak_memory_size":8749312,"peak_memory_usage":0.58099943},"1280":{"arena_memory_size":15249536,"peak_memory_size":8859904,"peak_memory_usage":0.58099496},"1296":{"arena_memory_size":15440000,"peak_memory_size":8970496,"peak_memory_usage":0.5809907},"1312":{"arena_memory_size":15630464,"peak_memory_size":9081088,"peak_memory_usage":0.58098644},"1328":{"arena_memory_size":15820928,"peak_memory_size":9191680,"peak_memory_usage":0.5809823},"1344":{"arena_memory_size":16011392,"peak_memory_size":9302272,"peak_memory_usage":0.58097833},"1360":{"arena_memory_size":16201856,"peak_memory_size":9412864,"peak_memory_usage":0.5809744},"1376":{"arena_memory_size":16392320,"peak_memory_size":9523456,"peak_memory_usage":0.5809706},"1392":{"arena_memory_size":16582784,"peak_memory_size":9634048,"peak_memory_usage":0.5809669},"1408":{"arena_memory_size":16773248,"peak_memory_size":9744640,"peak_memory_usage":0.5809632},"1424":{"arena_memory_size":16963712,"peak_memory_size":9855232,"peak_memory_usage":0.5809596},"1440":{"arena_memory_size":17154176,"peak_memory_size":9965824,"peak_memory_usage":0.58095616},"1456":{"arena_memory_size":17344640,"peak_memory_size":10076416,"peak_memory_usage":0.5809527},"1472":{"arena_memory_size":17535104,"peak_memory_size":10187008,"peak_memory_usage":0.58094937},"1488":{"arena_memory_size":17725568,"peak_memory_size":10297600,"peak_memory_usage":0.58094615},"1504":{"arena_memory_size":17916032,"peak_memory_size":10408192,"peak_memory_usage":0.5809429},"1520":{"arena_memory_size":18106496,"peak_memory_size":10518784,"peak_memory_usage":0.58093977},"1536":{"arena_memory_size":18296960,"peak_memory_size":10629376,"peak_memory_usage":0.58093673},"1552":{"arena_memory_size":18487424,"peak_memory_size":10739968,"peak_memory_usage":0.58093375},"1568":{"arena_memory_size":18677888,"peak_memory_size":10850560,"peak_memory_usage":0.58093077},"1584":{"arena_memory_size":18868352,"peak_memory_size":10961152,"peak_memory_usage":0.5809279},"1600":{"arena_memory_size":19058816,"peak_memory_size":11071744,"peak_memory_usage":0.58092505},"1616":{"arena_memory_size":19249280,"peak_memory_size":11182336,"peak_memory_usage":0.5809223},"1632":{"arena_memory_size":19439744,"peak_memory_size":11292928,"peak_memory_usage":0.58091956},"1648":{"arena_memory_size":19630208,"peak_memory_size":11403520,"peak_memory_usage":0.58091694},"1664":{"arena_memory_size":19820672,"peak_memory_size":11514112,"peak_memory_usage":0.5809143},"1680":{"arena_memory_size":20011136,"peak_memory_size":11624704,"peak_memory_usage":0.58091176},"1696":{"arena_memory_size":20201600,"peak_memory_size":11735296,"peak_memory_usage":0.58090925},"1712":{"arena_memory_size":20392064,"peak_memory_size":11845888,"peak_memory_usage":0.58090675},"1728":{"arena_memory_size":20582528,"peak_memory_size":11956480,"peak_memory_usage":0.58090436},"1744":{"arena_memory_size":20772992,"peak_memory_size":12067072,"peak_memory_usage":0.580902},"1760":{"arena_memory_size":20963456,"peak_memory_size":12177664,"peak_memory_usage":0.58089966},"1776":{"arena_memory_size":21153920,"peak_memory_size":12288256,"peak_memory_usage":0.58089733},"1792":{"arena_memory_size":21344384,"peak_memory_size":12398848,"peak_memory_usage":0.5808951},"1808":{"arena_memory_size":21534848,"peak_memory_size":12509440,"peak_memory_usage":0.58089286},"1824":{"arena_memory_size":21725312,"peak_memory_size":12620032,"peak_memory_usage":0.5808907},"1840":{"arena_memory_size":21915776,"peak_memory_size":12730624,"peak_memory_usage":0.58088857},"1856":{"arena_memory_size":22106240,"peak_memory_size":12841216,"peak_memory_usage":0.5808865},"1872":{"arena_memory_size":22296704,"peak_memory_size":12951808,"peak_memory_usage":0.5808844},"1888":{"arena_memory_size":22487168,"peak_memory_size":13062400,"peak_memory_usage":0.5808824},"1904":{"arena_memory_size":22677632,"peak_memory_size":13172992,"peak_memory_usage":0.5808804},"1920":{"arena_memory_size":22868096,"peak_memory_size":13283584,"peak_memory_usage":0.58087844},"1936":{"arena_memory_size":23058560,"peak_memory_size":13394176,"peak_memory_usage":0.5808765},"1952":{"arena_memory_size":23249024,"peak_memory_size":13504768,"peak_memory_usage":0.5808746},"1968":{"arena_memory_size":23439488,"peak_memory_size":13615360,"peak_memory_usage":0.5808728},"1984":{"arena_memory_size":23629952,"peak_memory_size":13725952,"peak_memory_usage":0.5808709},"2000":{"arena_memory_size":23820416,"peak_memory_size":13836544,"peak_memory_usage":0.58086914},"2016":{"arena_memory_size":24010880,"peak_memory_size":13947136,"peak_memory_usage":0.58086735},"2032":{"arena_memory_size":24201344,"peak_memory_size":14057728,"peak_memory_usage":0.58086556},"2048":{"arena_memory_size":24391808,"peak_memory_size":14168320,"peak_memory_usage":0.5808639}},"max_memory_size":389283840,"aggregate_usage":0.82}

================================================
FILE: harness/nnef-test-cases/memory-arena/runme.sh
================================================
#!/bin/sh

if [ `uname` = "Darwin" ] && ! ( sysctl -n machdep.cpu.brand_string | grep -q "(Virtual)" )
then

  ROOT=$(dirname $(realpath $0))/../../..
  . $ROOT/.travis/ci-system-setup.sh

  cd `dirname $0`
  set -ex

  : ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

  model=OpenELM-270M
  q=q40f16
  id="apple--$model-$q"
  generation=current
  nnef="llm/$generation/$id/$id.nnef.tgz"
  $CACHE_FILE $nnef

  $TRACT_RUN -v --nnef-tract-core --metal $MODELS/$nnef dump --set S=1024 --set P=0 --memory-arena found.json

  diff -u expected.json found.json
else
  echo "Skipped (memory arena test requires apple hardware)"
fi


================================================
FILE: harness/nnef-test-cases/pool-padding/graph.nnef
================================================
version 1.0;

graph check_full_padding(input) -> (output)
{
    input = external<scalar>(shape = [1, 1, 3, 3]);
    max = max_pool(input, size = [1, 1, 3, 3],padding = [(0, 0), (0, 0), (1, 1), (1, 1)], border = 'constant', stride = [1, 1, 1, 1], dilation = [1, 1, 1, 1]);
		add_value = [[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]];
		output = add(max, add_value);
}


================================================
FILE: harness/nnef-test-cases/pool-padding/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . dump -q


================================================
FILE: harness/nnef-test-cases/q40_linear_followed_slice/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph net_2024_08_07_bug_slice_bubble_up(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [10, 96], datum_type = 'f32');
    linear__var0 = variable(label = 'linear__var0_q4_0', shape = [16, 96]);
    linear0 = linear(input_0, linear__var0, 0.0);
    slice_a = slice(linear0, axes = [1], begin = [0], end = [8], stride = [1]);
    slice_b = slice(linear0, axes = [1], begin = [8], end = [16], stride = [1]);
    output_0 = mul(slice_a, slice_b);
}


================================================
FILE: harness/nnef-test-cases/q40_linear_followed_slice/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core dump -q


================================================
FILE: harness/nnef-test-cases/qmul/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph net_2024_06_26T18_12_43(input_0) -> (output_0)
{
    input_0 = external<scalar>(shape = [2, 3]);
    v2 = tract_core_cast(input_0);
    v1 = mul(v2, v2);
    output_0 = tract_core_cast(v1, to = 'f32');
}


================================================
FILE: harness/nnef-test-cases/qmul/graph.quant
================================================
"v2": zero_point_linear_quantize(zero_point = 0, scale = 0.13444413244724274, bits = 8, signed = false, symmetric = false);
"v1": zero_point_linear_quantize(scale = 4.611433506011963, zero_point = 0, bits = 8, signed = false, symmetric = false);


================================================
FILE: harness/nnef-test-cases/qmul/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz

# version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
# perl -pi -e "s/$version/0.16.10-pre/" found/graph.nnef
# 
# diff expected found


================================================
FILE: harness/nnef-test-cases/range-slice-dyn-tile/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN model.nnef.tgz --nnef-tract-core dump -q
$TRACT_RUN model.nnef.tgz --nnef-tract-core --nnef-cycle dump -q


================================================
FILE: harness/nnef-test-cases/reshape/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;
extension tract_pulse_streaming_symbol;

graph net_2024_06_18T12_07_11(input_0) -> (output_0)
{
    input_0 = external<integer>(shape = [3, 1, 2]);
    foo = [1, 2];
    output_0 = reshape(input_0, shape = [1, foo, 3]);
}


================================================
FILE: harness/nnef-test-cases/reshape/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core dump -q


================================================
FILE: harness/nnef-test-cases/reshape_with_bc/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph net(a) -> (o)
{
    a = external<scalar>(shape = [3, 2]);
    reshaped = reshape(a, shape = [2, 3]);
    b = reshaped * 2.0;
    o = reshape(b, shape = [3, 2]);
}


================================================
FILE: harness/nnef-test-cases/reshape_with_bc/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core dump -q --assert-op-count Reshape 0


================================================
FILE: harness/nnef-test-cases/sdpa/simple-causal-f32/graph.nnef
================================================
version 1.0;

extension tract_registry tract_transformers;
extension tract_registry tract_core;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [
    ("tract_target_version", "0.21.14"),
    ("torch_to_nnef_version", "0.19.1"),
    ("torch_version", "2.6.0"),
    ("transformers_version", "4.49.0"),
    ("os", "Darwin SNS008481 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:28:30 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6030 arm64"),
    ("hostname", "SNS008481"),
    ("user", "emrick.sinitambirivo"),
    ("py_version", "3.13.1 (main, Dec  3 2024, 17:59:52) [Clang 16.0.0 (clang-1600.0.26.4)] (64-bit runtime)"),
    ("export_date", "2025-08-21 16:13:54.598072"),
    ("exported_py_class", "FScaledDotProdAttn"),
    ("export_cmd", "/Users/emrick.sinitambirivo/Documents/projects/svc-library-torch-to-nnef/.venv/bin/pytest tests/test_multi_head_att.py -rA")
  ];
}


graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 2, 3], datum_type = 'f32');
    output_0 = tract_transformers_sdpa(input_0, input_0, input_0, datum_type = 'f32', acc_datum_type = 'f32', is_causal = true);
}


================================================
FILE: harness/nnef-test-cases/sdpa/simple-causal-f32/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core --nnef-tract-transformers . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/sdpa/simple-f16/graph.nnef
================================================
version 1.0;

extension tract_registry tract_transformers;
extension tract_registry tract_core;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [
    ("tract_target_version", "0.21.14"),
    ("torch_to_nnef_version", "0.19.1"),
    ("torch_version", "2.6.0"),
    ("transformers_version", "4.49.0"),
    ("os", "Darwin SNS008481 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:28:30 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6030 arm64"),
    ("hostname", "SNS008481"),
    ("user", "emrick.sinitambirivo"),
    ("py_version", "3.13.1 (main, Dec  3 2024, 17:59:52) [Clang 16.0.0 (clang-1600.0.26.4)] (64-bit runtime)"),
    ("export_date", "2025-08-21 16:13:57.709373"),
    ("exported_py_class", "FScaledDotProdAttn"),
    ("export_cmd", "/Users/emrick.sinitambirivo/Documents/projects/svc-library-torch-to-nnef/.venv/bin/pytest tests/test_multi_head_att.py -rA")
  ];
}


graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 2, 3, 4], datum_type = 'f32');
    x = tract_core_cast(input_0, to = 'f16');
    res = tract_transformers_sdpa(x, x, x, datum_type = 'f16', acc_datum_type = 'f32', is_causal = false);
    output_0 = tract_core_cast(res, to = 'f32');
}


================================================
FILE: harness/nnef-test-cases/sdpa/simple-f16/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core --nnef-tract-transformers . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz --approx approximate


================================================
FILE: harness/nnef-test-cases/sdpa/simple-grouped-query-att-f32/runme.sh
================================================
#!/bin/sh

# Model generated with the following configuration
# LlamaConfig(
#     hidden_size=8,
#     intermediate_size=16,
#     num_attention_heads=4,
#     num_key_value_heads=2,
#     num_hidden_layers=1,
#     vocab_size=100,
#     max_position_embeddings=256,
# )

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core --nnef-tract-transformers --transform transformers_detect_all --transform unfold-kv-cache model.nnef.tgz run --input-from-bundle io.npz --steps --assert-output-bundle io.npz --allow-missing-outputs


================================================
FILE: harness/nnef-test-cases/sdpa/simple-mask-f32/graph.nnef
================================================
version 1.0;

extension tract_registry tract_transformers;
extension tract_registry tract_core;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [
    ("tract_target_version", "0.21.14"),
    ("torch_to_nnef_version", "0.19.1"),
    ("torch_version", "2.6.0"),
    ("transformers_version", "4.49.0"),
    ("os", "Darwin SNS008481 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:28:30 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6030 arm64"),
    ("hostname", "SNS008481"),
    ("user", "emrick.sinitambirivo"),
    ("py_version", "3.13.1 (main, Dec  3 2024, 17:59:52) [Clang 16.0.0 (clang-1600.0.26.4)] (64-bit runtime)"),
    ("export_date", "2025-08-21 16:13:52.221182"),
    ("exported_py_class", "TernaryPrimitive"),
    ("export_cmd", "/Users/emrick.sinitambirivo/Documents/projects/svc-library-torch-to-nnef/.venv/bin/pytest tests/test_multi_head_att.py -rA")
  ];
}


graph network(input_0, input_1, input_2) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 2, 4], datum_type = 'f32');
    input_1 = tract_core_external(shape = [1, 2, 4], datum_type = 'f32');
    input_2 = tract_core_external(shape = [1, 2, 4], datum_type = 'f32');
    v0 = variable<scalar>(label = 'v0', shape = [2, 1]);
    v0_aligned_rank_expanded = unsqueeze(v0, axes = [0]);
    output_0 = tract_transformers_sdpa(input_0, input_1, input_2, v0_aligned_rank_expanded, datum_type = 'f32', acc_datum_type = 'f32', is_causal = false);
}


================================================
FILE: harness/nnef-test-cases/sdpa/simple-mask-f32/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core --nnef-tract-transformers . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/sdpa/simple-non-causal-f32/graph.nnef
================================================
version 1.0;

extension tract_registry tract_transformers;
extension tract_registry tract_core;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [
    ("tract_target_version", "0.21.14"),
    ("torch_to_nnef_version", "0.19.1"),
    ("torch_version", "2.6.0"),
    ("transformers_version", "4.49.0"),
    ("os", "Darwin SNS008481 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:28:30 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6030 arm64"),
    ("hostname", "SNS008481"),
    ("user", "emrick.sinitambirivo"),
    ("py_version", "3.13.1 (main, Dec  3 2024, 17:59:52) [Clang 16.0.0 (clang-1600.0.26.4)] (64-bit runtime)"),
    ("export_date", "2025-08-21 16:13:52.940220"),
    ("exported_py_class", "FScaledDotProdAttn"),
    ("export_cmd", "/Users/emrick.sinitambirivo/Documents/projects/svc-library-torch-to-nnef/.venv/bin/pytest tests/test_multi_head_att.py -rA")
  ];
}


graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 2, 3], datum_type = 'f32');
    output_0 = tract_transformers_sdpa(input_0, input_0, input_0, datum_type = 'f32', acc_datum_type = 'f32', is_causal = false);
}


================================================
FILE: harness/nnef-test-cases/sdpa/simple-non-causal-f32/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core --nnef-tract-transformers . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/sdpa/simple-scale-f32/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;
extension tract_registry tract_transformers;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [
    ("tract_target_version", "0.21.14"),
    ("torch_to_nnef_version", "0.19.1"),
    ("torch_version", "2.6.0"),
    ("transformers_version", "4.49.0"),
    ("os", "Darwin SNS008481 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:28:30 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6030 arm64"),
    ("hostname", "SNS008481"),
    ("user", "emrick.sinitambirivo"),
    ("py_version", "3.13.1 (main, Dec  3 2024, 17:59:52) [Clang 16.0.0 (clang-1600.0.26.4)] (64-bit runtime)"),
    ("export_date", "2025-08-20 17:36:58.170637"),
    ("exported_py_class", "FScaledDotProdAttn"),
    ("export_cmd", "/Users/emrick.sinitambirivo/Documents/projects/svc-library-torch-to-nnef/.venv/bin/pytest tests/test_multi_head_att.py")
  ];
}


graph network(input_0) -> (output_0)
{
    input_0 = tract_core_external(shape = [1, 2, 3], datum_type = 'f32');
    output_0 = tract_transformers_sdpa(input_0, input_0, input_0, datum_type = 'f32', acc_datum_type = 'f32', is_causal = false, scale = 1.3);
}


================================================
FILE: harness/nnef-test-cases/sdpa/simple-scale-f32/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --nnef-tract-core --nnef-tract-transformers . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/slice-over-slice-optim-loop/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph network(input) -> (o00, o01, o10, o11)
{
    input = tract_core_external(shape = [2, 2], datum_type = 'f32');
    o0 = slice(input, axes = [0], begin = [0], end = [1], stride = [1]);
    o1 = slice(input, axes = [0], begin = [1], end = [2], stride = [1]);
    o00 = slice(o0, axes = [1], begin = [0], end = [1], stride = [1]);
    o01 = slice(o0, axes = [1], begin = [1], end = [2], stride = [1]);
    o10 = slice(o1, axes = [1], begin = [0], end = [1], stride = [1]);
    o11 = slice(o1, axes = [1], begin = [1], end = [2], stride = [1]);
}


================================================
FILE: harness/nnef-test-cases/slice-over-slice-optim-loop/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# no timeout during recompilation
$TRACT_RUN --version

timeout 3 $TRACT_RUN --nnef-tract-core .


================================================
FILE: harness/nnef-test-cases/softmax/softmax-change-axis/expected
================================================
version 1.0;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_ser_version", "0.16.10-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [2, 1, 3]);
  softmax1 = softmax(input, axes = [2]);
  output = softmax1;
}


================================================
FILE: harness/nnef-test-cases/softmax/softmax-change-axis/graph.nnef
================================================
version 1.0;

graph rm_softmax_add(input) -> (output)
{
    input = external<scalar>(shape = [2, 1, 3]);
    tmp1 = squeeze( input, axes = [1] );
    softmax1 = softmax( tmp1, axes = [1] );
    output = unsqueeze( softmax1, axes = [1] );
}


================================================
FILE: harness/nnef-test-cases/softmax/softmax-change-axis/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --no-nnef-tract-core . dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.16.10-pre/" found

diff expected found


================================================
FILE: harness/nnef-test-cases/softmax/softmax-change-axis-1/expected
================================================
version 1.0;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_ser_version", "0.16.10-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [2, 1, 3]);
  softmax1 = softmax(input, axes = [2]);
  output = softmax1;
}


================================================
FILE: harness/nnef-test-cases/softmax/softmax-change-axis-1/graph.nnef
================================================
version 1.0;

graph add_softmax_rm(input) -> (output)
{
    input = external<scalar>(shape = [2, 1, 3]);
    tmp1 = unsqueeze( input, axes = [1] );
    softmax1 = softmax( tmp1, axes = [3] );
    output = squeeze( softmax1, axes = [1] );
}


================================================
FILE: harness/nnef-test-cases/softmax/softmax-change-axis-1/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --no-nnef-tract-core . dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.16.10-pre/" found

diff expected found


================================================
FILE: harness/nnef-test-cases/softmax/softmax-quant/expected/graph.nnef
================================================
version 1.0;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_ser_version", "0.16.10-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [2, 1, 3]);
  output = softmax(input, axes = [3]);
}


================================================
FILE: harness/nnef-test-cases/softmax/softmax-quant/expected/graph.quant
================================================
"input": zero_point_linear_quantize(zero_point = 128, scale = 0.007812500, bits = 8, signed = false, symmetric = false);
"output": zero_point_linear_quantize(zero_point = 0, scale = 0.003906250, bits = 8, signed = false, symmetric = true);


================================================
FILE: harness/nnef-test-cases/softmax/softmax-quant/model/graph.nnef
================================================
version 1.0;

graph softmax_quant(input) -> (output)
{
    input = external<scalar>(shape = [2, 1, 3]);
    output = softmax( input, axes = [3] );
}


================================================
FILE: harness/nnef-test-cases/softmax/softmax-quant/model/graph.quant
================================================
"input": zero_point_linear_quantize(zero_point = 128, scale = 0.0078125, bits = 8, signed = false, symmetric = false);
"output": zero_point_linear_quantize(zero_point = 0, scale = 0.00390625, bits = 8, signed = false, symmetric = false);

================================================
FILE: harness/nnef-test-cases/softmax/softmax-quant/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

rm -rf found
$TRACT_RUN --no-nnef-tract-core model dump -q --nnef-dir found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.16.10-pre/" found/graph.nnef

diff expected found


================================================
FILE: harness/nnef-test-cases/submodel/expected
================================================
version 1.0;

extension tract_registry tract_core;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_ser_version", "0.18.3-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [1, 1, 5]);
  nnet1 = tract_core_submodel(input, label = "nnet1");
  nnet_1_out_quant = tract_core_cast(nnet1, to = "u8");
  nnet2 = tract_core_submodel(nnet_1_out_quant, label = "nnet2");
  nnet_2_out = tract_core_cast(nnet2, to = "f32");
  output = nnet_2_out;
}


================================================
FILE: harness/nnef-test-cases/submodel/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph nnet(input) -> (output)
{
    input = external<scalar>(shape = [1, 1, 5]);

    # First model is loaded from a tgz archive
    nnet_1_out = tract_core_submodel(input, label = 'nnet1');
    
    # Second model is a quantized model and loaded from a subfolder
    nnet_1_out_quant = tract_core_cast(nnet_1_out);
    nnet_2_out_quant = tract_core_submodel(nnet_1_out_quant, label = 'nnet2');
    nnet_2_out = tract_core_cast(nnet_2_out_quant, to = 'f32');

    output = nnet_2_out;
}


================================================
FILE: harness/nnef-test-cases/submodel/graph.quant
================================================
"nnet_1_out_quant": zero_point_linear_quantize(zero_point = 0, scale = 0.064853981, bits = 8, signed = false, symmetric = true);
"nnet_2_out_quant": zero_point_linear_quantize(zero_point = 0, scale = 0.064853981, bits = 8, signed = false, symmetric = true);


================================================
FILE: harness/nnef-test-cases/submodel/nnet2/graph.nnef
================================================
version 1.0;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_format_version", "beta1"), ("tract_nnef_ser_version", "0.18.4"), ("tract_nnef_ser_version", "0.18.4"), ("tract_nnef_format_version", "beta1")];
}

graph network( input ) -> ( output ) {
  input = external(shape = [1, 2, 3]);
  conv_weights = [[[0, 0, 0], [0, 0, 0]]];
  conv_bias = [[[1]]];
  output_conv = conv(input, conv_weights, conv_bias, dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  output = output_conv;
}


================================================
FILE: harness/nnef-test-cases/submodel/nnet2/graph.quant
================================================
"output": zero_point_linear_quantize(zero_point = 0, scale = 0.064853981, bits = 8, signed = false, symmetric = true);
"conv_weights": zero_point_linear_quantize(zero_point = 128, scale = 0.000184978, bits = 8, signed = false, symmetric = false);
"input": zero_point_linear_quantize(zero_point = 0, scale = 0.064853981, bits = 8, signed = false, symmetric = true);
"output_conv": zero_point_linear_quantize(zero_point = 0, scale = 0.064853981, bits = 8, signed = false, symmetric = true);


================================================
FILE: harness/nnef-test-cases/submodel/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.18.3-pre/" found

diff -u expected found


================================================
FILE: harness/nnef-test-cases/tdim-cmp/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;

graph dummy_net(input_0) -> (v90)
{
    input_0 = external<integer>(shape = [1, 6]);
    v37 = tract_core_range(0, 6, step = 1);
    v131 = add(v37, 1);
    v90 = lt(v37, v131);
}


================================================
FILE: harness/nnef-test-cases/tdim-cmp/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core run --allow-random-input


================================================
FILE: harness/nnef-test-cases/test_all_reduce/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# Check result is as expected
# bug appear only if model optimized and input-fact-from-bundle
$TRACT_RUN --nnef-tract-core ./model.nnef.tgz -O --input-facts-from-bundle ./io.npz run --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/test_any_reduce/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# Check result is as expected
# bug appear only if model optimized and input-fact-from-bundle
$TRACT_RUN --nnef-tract-core ./model.nnef.tgz -O --input-facts-from-bundle ./io.npz run --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/test_manage_gru_states/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# Check result is as expected
# bug appear only if model optimized and input-fact-from-bundle
$TRACT_RUN --nnef-tract-core ./model.nnef.tgz -O --input-facts-from-bundle ./io.npz run --input-from-bundle io.npz --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/test_stft_smaller_win/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# Check result is as expected
# bug appear only if model optimized and input-fact-from-bundle
$TRACT_RUN --nnef-tract-core ./model.nnef.tgz -O --input-facts-from-bundle ./io.npz run --input-from-bundle io.npz --assert-output-bundle io.npz --approx approximate


================================================
FILE: harness/nnef-test-cases/test_upcast_f32_attn/runme.sh
================================================
#!/bin/sh

cd $(dirname $0)
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

# Check result is as expected
# bug appear only if model optimized
$TRACT_RUN --nnef-tract-core ./model.nnef.tgz -O run --input-from-bundle io.npz --assert-output-bundle io.npz --allow-float-casts


================================================
FILE: harness/nnef-test-cases/tile-with-tdim/graph.nnef
================================================
version 1.0;

extension tract_registry tract_core;
extension tract_symbol B;
extension tract_symbol S;

graph net_2024_07_25T14_48_31(input_0) -> (output_0)
{
    input_0 = external<scalar>(shape = [B, S, 4]);
    reducedform_x_shape = tract_core_shape_of(input_0);
    reducedform_8_sliced = slice(reducedform_x_shape, axes = [0], begin = [1], end = [2], stride = [1]);
    reducedform_x_shape_1 = squeeze(reducedform_8_sliced, axes = [0]);
    reducedform_mask_to_be_expanded = [[0.0]];
    reducedform_mask = tile(reducedform_mask_to_be_expanded, repeats = [reducedform_x_shape_1, reducedform_x_shape_1]);
    reducedform_mask_shape = tract_core_shape_of(reducedform_mask);
    reducedform_23_sliced = slice(reducedform_mask_shape, axes = [0], begin = [1], end = [2], stride = [1]);
    reducedform_mask_shape_1 = squeeze(reducedform_23_sliced, axes = [0]);
    reducedform_mask_cond = tract_core_range(0, reducedform_mask_shape_1, step = 1);
    reducedform_33 = add(reducedform_mask_cond, 1);
    reducedform_40 = reshape(reducedform_33, shape = [reducedform_mask_shape_1, 1]);
    reducedform_mask_cond_expanded = unsqueeze(reducedform_mask_cond, axes = [0]);
    reducedform_41 = lt(reducedform_mask_cond_expanded, reducedform_40);
    reducedform_43_shape_of_false = tract_core_shape_of(reducedform_mask);
    reducedform_43_true_expanded = tile([[0.0]], repeats = reducedform_43_shape_of_false);
    output_0 = select(reducedform_41, reducedform_43_true_expanded, reducedform_mask);
}


================================================
FILE: harness/nnef-test-cases/tile-with-tdim/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . --nnef-tract-core dump -q


================================================
FILE: harness/nnef-test-cases/uniform-mul/expected
================================================
version 1.0;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("tract_nnef_ser_version", "0.18.3-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [2, 2, 33]);
  a = [[[2.0], [3.0]]];
  mul_1_0 = mul(input, a);
  b = [[[2.0], [3.0]], [[2.0], [3.0]]];
  mul_2_0 = mul(input, b);
  output_1 = add(mul_1_0, mul_2_0);
  output = output_1;
}


================================================
FILE: harness/nnef-test-cases/uniform-mul/graph.nnef
================================================
version 1.0;

graph check_uniform_mul_not_applied(input) -> (output)
{
    input = external<scalar>(shape = [2, 2, 33]);
    
    # a_shape = [1, 2, 1]
    a = [[[2.0], [3.0]]];
    # This mul shouldn't be optimized with a mul_by_scalar as input_dim[0] != a_dim[0]
    mul_1 = mul(input, a);

   # b_shape = [2, 2, 1]
    b = [[[2.0], [3.0]], [[2.0], [3.0]]];
    
    # This mul should be optimized with a mul_by_scalar as input_dim[0] == b_dim[0] + num_elements > 32
    mul_2 = mul(input, b);

    output = mul_1 + mul_2;
}


================================================
FILE: harness/nnef-test-cases/uniform-mul/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN --no-nnef-tract-core . dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.18.3-pre/" found

diff -u expected found

# Check result is as expected
$TRACT_RUN --nnef-tract-core . run --input-from-bundle io.npz --steps --assert-output-bundle io.npz


================================================
FILE: harness/nnef-test-cases/variable-in-fragment/graph.nnef
================================================
version 1.0;

fragment weights(input: tensor<scalar>) -> (weights: tensor<scalar>) {
  weights = external(label =  weights, shape = [1,2,3]);
}

graph network( input ) -> ( output ) {
  input = external(shape = [1, 2, 3]);
  output = weights(input);
}


================================================
FILE: harness/nnef-test-cases/variable-in-fragment/runme.sh
================================================
#!/bin/sh

cd `dirname $0`
set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$TRACT_RUN . dump -q


================================================
FILE: harness/parakeet-tdt-600m-v3/ci.sh
================================================
#!/bin/sh

set -ex

ROOT=$(realpath $(dirname $(realpath $0))/../..)
. $ROOT/.travis/ci-system-setup.sh

for rt in $TRACT_RUNTIMES
do
	gpu_assert=""
	case "$rt" in
		--cuda) gpu_assert="--assert-op-only Cuda*,Gpu*,DeviceSync*,Const,Source,STFT,Pad,IsNan,Add,Range,Cast,Eq,Div,Sub,Scan,Gather";;
		--metal) gpu_assert="--assert-op-only Metal*,Gpu*,DeviceSync*,Const,Source,STFT,Pad,IsNan,Add,Range,Cast,Eq,Div,Sub,Scan,Gather,Reduce*";;
	esac

	for m in preprocessor encoder decoder joint
	do
		# Encoder uses a patched model with upper bound assertion on S
		if [ "$m" = "encoder" ]; then
			nnef_file=nvidia--parakeet-tdt-0.6b-v3-f32f32.$m.p1.nnef.tgz
		else
			nnef_file=nvidia--parakeet-tdt-0.6b-v3-f32f32.$m.nnef.tgz
		fi
		$CACHE_FILE \
			asr/608/nvidia--parakeet-tdt-0.6b-v3-f32f32/$nnef_file \
			asr/608/nvidia--parakeet-tdt-0.6b-v3-f32f32/nvidia--parakeet-tdt-0.6b-v3-f32f32.$m.io.npz

		$TRACT_RUN $MODELS/asr/608/nvidia--parakeet-tdt-0.6b-v3-f32f32/$nnef_file $rt \
			--nnef-tract-transformers -t transformers_detect_all run \
			--input-from-bundle $MODELS/asr/608/nvidia--parakeet-tdt-0.6b-v3-f32f32/nvidia--parakeet-tdt-0.6b-v3-f32f32.$m.io.npz \
			--assert-output-bundle $MODELS/asr/608/nvidia--parakeet-tdt-0.6b-v3-f32f32/nvidia--parakeet-tdt-0.6b-v3-f32f32.$m.io.npz \
			--approx very $gpu_assert
	 done
done


================================================
FILE: harness/pre-optimized-graphes/.gitignore
================================================
found


================================================
FILE: harness/pre-optimized-graphes/hey_snips_v4_model17/expected
================================================
version 1.0;

extension tract_registry tract_core;
extension tract_registry tract_pulse;
extension tract_symbol S;

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("pulse.delay", tract_core_cast([182], to = "i64")), ("pulse.input_axes", tract_core_cast([0], to = "i64")), ("pulse.output_axes", tract_core_cast([0], to = "i64")), ("tract_nnef_ser_version", "0.19.3-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input_node) -> (i"wavenet_2/post_proc_2-1x1_conv-conv1d/convolution/Conv2D") {
  input_node = external(shape = [8, 20]);
  i"wavenet_2/input_batch_normalisation/batchnorm/mul.fix-rank.1" = variable<scalar>(label = "wavenet_2/input_batch_normalisation/batchnorm/mul.fix-rank.1", shape = [1, 20]);
  i"wavenet_2/input_batch_normalisation/batchnorm/mul" = mul(input_node, i"wavenet_2/input_batch_normalisation/batchnorm/mul.fix-rank.1");
  i"wavenet_2/input_batch_normalisation/batchnorm/add_1.fix-rank.1" = variable<scalar>(label = "wavenet_2/input_batch_normalisation/batchnorm/add_1.fix-rank.1", shape = [1, 20]);
  i"wavenet_2/input_batch_normalisation/batchnorm/add_1" = add(i"wavenet_2/input_batch_normalisation/batchnorm/mul", i"wavenet_2/input_batch_normalisation/batchnorm/add_1.fix-rank.1");
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/input_batch_normalisation/batchnorm/add_1", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/pre_conv-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [16, 20, 3]);
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias" = 0.0;
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/pre_conv-conv1d/convolution/Conv2D_input", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/pre_conv-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/pre_conv-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.rm_n", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul" = mul(i"wavenet_2/dilation_layer_0-dilation_rate_1-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_0-dilation_rate_1-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.178..S+-4" = matmul(i"wavenet_2/mul", i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul", i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add" = add(i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add", axis = 0, delay = 0, overlap = 4);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_1" = mul(i"wavenet_2/dilation_layer_1-dilation_rate_2-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_1-dilation_rate_2-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.174..S+-8" = matmul(i"wavenet_2/mul_1", i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.0" = add(i"wavenet_2/dilation_layer_0-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.178..S+-4", i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.174..S+-8");
  i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_1", i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_1" = add(i"wavenet_2/dilation_layer_1-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add");
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_1", axis = 0, delay = 0, overlap = 8);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_2" = mul(i"wavenet_2/dilation_layer_2-dilation_rate_4-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_2-dilation_rate_4-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.166..S+-16" = matmul(i"wavenet_2/mul_2", i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.1" = add(i"wavenet_2/AddN.0", i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.166..S+-16");
  i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_2", i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_2" = add(i"wavenet_2/dilation_layer_2-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_1");
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_2", axis = 0, delay = 0, overlap = 16);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_3" = mul(i"wavenet_2/dilation_layer_3-dilation_rate_8-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_3-dilation_rate_8-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.150..S+-32" = matmul(i"wavenet_2/mul_3", i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.2" = add(i"wavenet_2/AddN.1", i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.150..S+-32");
  i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_3", i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_3" = add(i"wavenet_2/dilation_layer_3-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_2");
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_3", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_4" = mul(i"wavenet_2/dilation_layer_4-dilation_rate_1-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_4-dilation_rate_1-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.148..S+-34" = matmul(i"wavenet_2/mul_4", i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.3" = add(i"wavenet_2/AddN.2", i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.148..S+-34");
  i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_4", i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_4" = add(i"wavenet_2/dilation_layer_4-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_3");
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_4", axis = 0, delay = 0, overlap = 4);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_5" = mul(i"wavenet_2/dilation_layer_5-dilation_rate_2-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_5-dilation_rate_2-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.144..S+-38" = matmul(i"wavenet_2/mul_5", i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.4" = add(i"wavenet_2/AddN.3", i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.144..S+-38");
  i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_5", i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_5" = add(i"wavenet_2/dilation_layer_5-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_4");
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_5", axis = 0, delay = 0, overlap = 8);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_6" = mul(i"wavenet_2/dilation_layer_6-dilation_rate_4-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_6-dilation_rate_4-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.136..S+-46" = matmul(i"wavenet_2/mul_6", i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.5" = add(i"wavenet_2/AddN.4", i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.136..S+-46");
  i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_6", i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_6" = add(i"wavenet_2/dilation_layer_6-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_5");
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_6", axis = 0, delay = 0, overlap = 16);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_7" = mul(i"wavenet_2/dilation_layer_7-dilation_rate_8-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_7-dilation_rate_8-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.120..S+-62" = matmul(i"wavenet_2/mul_7", i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.6" = add(i"wavenet_2/AddN.5", i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.120..S+-62");
  i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_7", i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_7" = add(i"wavenet_2/dilation_layer_7-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_6");
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_7", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_8" = mul(i"wavenet_2/dilation_layer_8-dilation_rate_1-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_8-dilation_rate_1-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.118..S+-64" = matmul(i"wavenet_2/mul_8", i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.7" = add(i"wavenet_2/AddN.6", i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.118..S+-64");
  i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_8", i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_8" = add(i"wavenet_2/dilation_layer_8-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_7");
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_8", axis = 0, delay = 0, overlap = 4);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_9" = mul(i"wavenet_2/dilation_layer_9-dilation_rate_2-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_9-dilation_rate_2-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.114..S+-68" = matmul(i"wavenet_2/mul_9", i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.8" = add(i"wavenet_2/AddN.7", i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.114..S+-68");
  i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_9", i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_9" = add(i"wavenet_2/dilation_layer_9-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_8");
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_9", axis = 0, delay = 0, overlap = 8);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_10" = mul(i"wavenet_2/dilation_layer_10-dilation_rate_4-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_10-dilation_rate_4-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.106..S+-76" = matmul(i"wavenet_2/mul_10", i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.9" = add(i"wavenet_2/AddN.8", i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.106..S+-76");
  i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_10", i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_10" = add(i"wavenet_2/dilation_layer_10-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_9");
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_10", axis = 0, delay = 0, overlap = 16);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_11" = mul(i"wavenet_2/dilation_layer_11-dilation_rate_8-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_11-dilation_rate_8-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.90..S+-92" = matmul(i"wavenet_2/mul_11", i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.10" = add(i"wavenet_2/AddN.9", i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.90..S+-92");
  i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_11", i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_11" = add(i"wavenet_2/dilation_layer_11-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_10");
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_11", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_12" = mul(i"wavenet_2/dilation_layer_12-dilation_rate_1-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_12-dilation_rate_1-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.88..S+-94" = matmul(i"wavenet_2/mul_12", i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.11" = add(i"wavenet_2/AddN.10", i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.88..S+-94");
  i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_12", i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_12" = add(i"wavenet_2/dilation_layer_12-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_11");
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_12", axis = 0, delay = 0, overlap = 4);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_13" = mul(i"wavenet_2/dilation_layer_13-dilation_rate_2-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_13-dilation_rate_2-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.84..S+-98" = matmul(i"wavenet_2/mul_13", i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.12" = add(i"wavenet_2/AddN.11", i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.84..S+-98");
  i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_13", i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_13" = add(i"wavenet_2/dilation_layer_13-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_12");
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_13", axis = 0, delay = 0, overlap = 8);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_14" = mul(i"wavenet_2/dilation_layer_14-dilation_rate_4-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_14-dilation_rate_4-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.76..S+-106" = matmul(i"wavenet_2/mul_14", i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.13" = add(i"wavenet_2/AddN.12", i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.76..S+-106");
  i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_14", i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_14" = add(i"wavenet_2/dilation_layer_14-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_13");
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_14", axis = 0, delay = 0, overlap = 16);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_15" = mul(i"wavenet_2/dilation_layer_15-dilation_rate_8-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_15-dilation_rate_8-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.60..S+-122" = matmul(i"wavenet_2/mul_15", i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.14" = add(i"wavenet_2/AddN.13", i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.60..S+-122");
  i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_15", i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_15" = add(i"wavenet_2/dilation_layer_15-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_14");
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_15", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_16" = mul(i"wavenet_2/dilation_layer_16-dilation_rate_1-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_16-dilation_rate_1-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.58..S+-124" = matmul(i"wavenet_2/mul_16", i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.15" = add(i"wavenet_2/AddN.14", i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.58..S+-124");
  i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_16", i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_16" = add(i"wavenet_2/dilation_layer_16-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_15");
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_16", axis = 0, delay = 0, overlap = 4);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_17" = mul(i"wavenet_2/dilation_layer_17-dilation_rate_2-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_17-dilation_rate_2-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.54..S+-128" = matmul(i"wavenet_2/mul_17", i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.16" = add(i"wavenet_2/AddN.15", i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.54..S+-128");
  i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_17", i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_17" = add(i"wavenet_2/dilation_layer_17-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_16");
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_17", axis = 0, delay = 0, overlap = 8);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_18" = mul(i"wavenet_2/dilation_layer_18-dilation_rate_4-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_18-dilation_rate_4-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.46..S+-136" = matmul(i"wavenet_2/mul_18", i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.17" = add(i"wavenet_2/AddN.16", i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.46..S+-136");
  i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_18", i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_18" = add(i"wavenet_2/dilation_layer_18-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_17");
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_18", axis = 0, delay = 0, overlap = 16);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_19" = mul(i"wavenet_2/dilation_layer_19-dilation_rate_8-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_19-dilation_rate_8-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.30..S+-152" = matmul(i"wavenet_2/mul_19", i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.18" = add(i"wavenet_2/AddN.17", i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.30..S+-152");
  i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_19", i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_19" = add(i"wavenet_2/dilation_layer_19-dilation_rate_8-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_18");
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_19", axis = 0, delay = 0, overlap = 2);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_20" = mul(i"wavenet_2/dilation_layer_20-dilation_rate_1-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_20-dilation_rate_1-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.28..S+-154" = matmul(i"wavenet_2/mul_20", i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.19" = add(i"wavenet_2/AddN.18", i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.28..S+-154");
  i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_20", i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_20" = add(i"wavenet_2/dilation_layer_20-dilation_rate_1-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_19");
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_20", axis = 0, delay = 0, overlap = 4);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [2], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_21" = mul(i"wavenet_2/dilation_layer_21-dilation_rate_2-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_21-dilation_rate_2-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.24..S+-158" = matmul(i"wavenet_2/mul_21", i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.20" = add(i"wavenet_2/AddN.19", i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.24..S+-158");
  i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_21", i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_21" = add(i"wavenet_2/dilation_layer_21-dilation_rate_2-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_20");
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_21", axis = 0, delay = 0, overlap = 8);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [4], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_22" = mul(i"wavenet_2/dilation_layer_22-dilation_rate_4-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_22-dilation_rate_4-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.16..S+-166" = matmul(i"wavenet_2/mul_22", i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.21" = add(i"wavenet_2/AddN.20", i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_skip-conv1d/convolution/Conv2D.split-over-2.16..S+-166");
  i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [16, 64]);
  i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_22", i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/add_22" = add(i"wavenet_2/dilation_layer_22-dilation_rate_4-1x1_conv_transform-conv1d/convolution/Conv2D", i"wavenet_2/add_21");
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay" = tract_pulse_delay(i"wavenet_2/add_22", axis = 0, delay = 0, overlap = 16);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/Tanh" = tanh(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n" = unsqueeze(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/convolution/Conv2D.delay", axes = [0]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go" = variable<scalar>(label = "wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", shape = [64, 16, 3]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D_input" = transpose(i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.add_n", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv" = conv(i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D_input", i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.kernel_reorg_go", i"wavenet_2/pre_conv-conv1d/convolution/Conv2D.bias", dilation = [8], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D" = transpose(i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D_conv", axes = [0, 2, 1]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n" = squeeze(i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D", axes = [0]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/Sigmoid" = sigmoid(i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/convolution/Conv2D.rm_n");
  i"wavenet_2/mul_23" = mul(i"wavenet_2/dilation_layer_23-dilation_rate_8-filter-conv1d/Tanh", i"wavenet_2/dilation_layer_23-dilation_rate_8-gate-conv1d/Sigmoid");
  i"wavenet_2/dilation_layer_23-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/dilation_layer_23-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 64]);
  i"wavenet_2/dilation_layer_23-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/mul_23", i"wavenet_2/dilation_layer_23-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/AddN.22" = add(i"wavenet_2/AddN.21", i"wavenet_2/dilation_layer_23-dilation_rate_8-1x1_conv_skip-conv1d/convolution/Conv2D");
  i"wavenet_2/Relu.low.cst.1.1.1" = [[0.0]];
  i"wavenet_2/Relu.low" = max(i"wavenet_2/AddN.22", i"wavenet_2/Relu.low.cst.1.1.1");
  i"wavenet_2/post_proc_1-1x1_conv-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/post_proc_1-1x1_conv-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [32, 32]);
  i"wavenet_2/post_proc_1-1x1_conv-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/Relu.low", i"wavenet_2/post_proc_1-1x1_conv-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
  i"wavenet_2/Relu.low.cst.2.1.1" = [[0.0]];
  i"wavenet_2/post_proc_1-1x1_conv-conv1d/Relu.low" = max(i"wavenet_2/post_proc_1-1x1_conv-conv1d/convolution/Conv2D", i"wavenet_2/Relu.low.cst.2.1.1");
  i"wavenet_2/post_proc_2-1x1_conv-conv1d/convolution/Conv2D.filters_as_co_ci" = variable<scalar>(label = "wavenet_2/post_proc_2-1x1_conv-conv1d/convolution/Conv2D.filters_as_co_ci", shape = [2, 32]);
  i"wavenet_2/post_proc_2-1x1_conv-conv1d/convolution/Conv2D" = matmul(i"wavenet_2/post_proc_1-1x1_conv-conv1d/Relu.low", i"wavenet_2/post_proc_2-1x1_conv-conv1d/convolution/Conv2D.filters_as_co_ci", transposeA = false, transposeB = true);
}


================================================
FILE: harness/pre-optimized-graphes/hey_snips_v4_model17/runme.sh
================================================
#!/bin/sh

cd `dirname $0`

ROOT=$(dirname $(realpath $0))/../../..
. $ROOT/.travis/ci-system-setup.sh

set -ex

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$CACHE_FILE hey_snips_v4_model17.pb
$TRACT_RUN $MODELS/hey_snips_v4_model17.pb -i S,20,f32 --pulse 8 --nnef-tract-pulse --nnef-extended-identifier dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.19.3-pre/" found

diff -u expected found


================================================
FILE: harness/pre-optimized-graphes/mdl-en-2019-Q3-librispeech/expected
================================================
version 1.0;

extension tract_registry tract_core;
extension tract_registry tract_pulse;
extension tract_symbol S;

fragment scan_body_0(
    i"fastlstm1.c": tensor<scalar>,
    i"fastlstm1.r": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.0..256.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.256..512.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.512..768.slice": tensor<scalar>,
    i"fastlstm1.four_parts.split-1-over-1.768..1024.slice": tensor<scalar>,
    i"fastlstm1.h_new.W.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm1.h_new.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm1.peephole0.mul.fix-rank": tensor<scalar>,
    i"fastlstm1.peephole1.mul.fix-rank": tensor<scalar>,
    i"fastlstm1.peephole2.mul.fix-rank": tensor<scalar>
) -> (i"fastlstm1.c_new": tensor<scalar>, i"fastlstm1.r_new": tensor<scalar>, i"fastlstm1.h_new.W.prop_axis.a.input_0": tensor<scalar>)
{
  i"fastlstm1.peephole0.mul" = mul(i"fastlstm1.peephole0.mul.fix-rank", i"fastlstm1.c");
  i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.add-1" = add(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256", i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.256..384");
  i"fastlstm1.four_parts.split-over-1.0..256" = add(i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.add-1", i"fastlstm1.four_parts.split-1-over-1.0..256.slice");
  i"fastlstm1.peephole0.output" = add(i"fastlstm1.peephole0.mul", i"fastlstm1.four_parts.split-over-1.0..256");
  i"fastlstm1.peephole0.output.nolin" = sigmoid(i"fastlstm1.peephole0.output");
  i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.prop_axis.a.output" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256", axes = [0]);
  i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.add-1" = add(i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.prop_axis.a.output", i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.256..384");
  i"fastlstm1.four_parts.split-over-1.512..768" = add(i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.add-1", i"fastlstm1.four_parts.split-1-over-1.512..768.slice");
  i"fastlstm1.four_parts.j.nolin" = tanh(i"fastlstm1.four_parts.split-over-1.512..768");
  i"fastlstm1.c_update" = mul(i"fastlstm1.peephole0.output.nolin", i"fastlstm1.four_parts.j.nolin");
  i"fastlstm1.peephole1.mul" = mul(i"fastlstm1.peephole1.mul.fix-rank", i"fastlstm1.c");
  i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.prop_axis.a.output" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256", axes = [0]);
  i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.add-1" = add(i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.prop_axis.a.output", i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.256..384");
  i"fastlstm1.four_parts.split-over-1.256..512" = add(i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.add-1", i"fastlstm1.four_parts.split-1-over-1.256..512.slice");
  i"fastlstm1.peephole1.output" = add(i"fastlstm1.peephole1.mul", i"fastlstm1.four_parts.split-over-1.256..512");
  i"fastlstm1.peephole1.output.nolin" = sigmoid(i"fastlstm1.peephole1.output");
  i"fastlstm1.c_prop" = mul(i"fastlstm1.peephole1.output.nolin", i"fastlstm1.c");
  i"fastlstm1.c_new" = add(i"fastlstm1.c_update", i"fastlstm1.c_prop");
  i"fastlstm1.tanh_c" = tanh(i"fastlstm1.c_new");
  i"fastlstm1.peephole2.mul" = mul(i"fastlstm1.peephole2.mul.fix-rank", i"fastlstm1.c_new");
  i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.prop_axis.a.output" = squeeze(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256", axes = [0]);
  i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm1.r", i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.add-1" = add(i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.prop_axis.a.output", i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.256..384");
  i"fastlstm1.four_parts.split-over-1.768..1024" = add(i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.add-1", i"fastlstm1.four_parts.split-1-over-1.768..1024.slice");
  i"fastlstm1.peephole2.output" = add(i"fastlstm1.peephole2.mul", i"fastlstm1.four_parts.split-over-1.768..1024");
  i"fastlstm1.peephole2.output.nolin" = sigmoid(i"fastlstm1.peephole2.output");
  i"fastlstm1.m" = mul(i"fastlstm1.tanh_c", i"fastlstm1.peephole2.output.nolin");
  i"fastlstm1.h_new.W.split-over-1.0..128" = tract_core_einsum([i"fastlstm1.m", i"fastlstm1.h_new.W.split-1-over-1.0..128.slice"], expr = "bk,kn->n", acc = "f32", output = "");
  i"fastlstm1.h_new.split-over-1.0..128" = add(i"fastlstm1.h_new.W.split-over-1.0..128", i"fastlstm1.h_new.split-1-over-1.0..128.slice");
  i"fastlstm1.h_new.W.prop_axis.a.input_0" = unsqueeze(i"fastlstm1.m", axes = [0]);
  i"fastlstm1.r_new" = i"fastlstm1.h_new.split-over-1.0..128";
}

fragment scan_body_1(
    i"fastlstm2.c": tensor<scalar>,
    i"fastlstm2.r": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256": tensor<scalar>,
    i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.0..256.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.256..512.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.512..768.slice": tensor<scalar>,
    i"fastlstm2.four_parts.split-1-over-1.768..1024.slice": tensor<scalar>,
    i"fastlstm2.h_new.W.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm2.h_new.split-1-over-1.0..128.slice": tensor<scalar>,
    i"fastlstm2.peephole0.mul.fix-rank": tensor<scalar>,
    i"fastlstm2.peephole1.mul.fix-rank": tensor<scalar>,
    i"fastlstm2.peephole2.mul.fix-rank": tensor<scalar>
) -> (i"fastlstm2.c_new": tensor<scalar>, i"fastlstm2.r_new": tensor<scalar>, i"fastlstm2.h_new.W.prop_axis.a.input_0": tensor<scalar>)
{
  i"fastlstm2.peephole0.mul" = mul(i"fastlstm2.peephole0.mul.fix-rank", i"fastlstm2.c");
  i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.add-1" = add(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256", i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.256..384");
  i"fastlstm2.four_parts.split-over-1.0..256" = add(i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.add-1", i"fastlstm2.four_parts.split-1-over-1.0..256.slice");
  i"fastlstm2.peephole0.output" = add(i"fastlstm2.peephole0.mul", i"fastlstm2.four_parts.split-over-1.0..256");
  i"fastlstm2.peephole0.output.nolin" = sigmoid(i"fastlstm2.peephole0.output");
  i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.prop_axis.a.output" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256", axes = [0]);
  i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.add-1" = add(i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.prop_axis.a.output", i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.256..384");
  i"fastlstm2.four_parts.split-over-1.512..768" = add(i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.add-1", i"fastlstm2.four_parts.split-1-over-1.512..768.slice");
  i"fastlstm2.four_parts.j.nolin" = tanh(i"fastlstm2.four_parts.split-over-1.512..768");
  i"fastlstm2.c_update" = mul(i"fastlstm2.peephole0.output.nolin", i"fastlstm2.four_parts.j.nolin");
  i"fastlstm2.peephole1.mul" = mul(i"fastlstm2.peephole1.mul.fix-rank", i"fastlstm2.c");
  i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.prop_axis.a.output" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256", axes = [0]);
  i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.add-1" = add(i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.prop_axis.a.output", i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.256..384");
  i"fastlstm2.four_parts.split-over-1.256..512" = add(i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.add-1", i"fastlstm2.four_parts.split-1-over-1.256..512.slice");
  i"fastlstm2.peephole1.output" = add(i"fastlstm2.peephole1.mul", i"fastlstm2.four_parts.split-over-1.256..512");
  i"fastlstm2.peephole1.output.nolin" = sigmoid(i"fastlstm2.peephole1.output");
  i"fastlstm2.c_prop" = mul(i"fastlstm2.peephole1.output.nolin", i"fastlstm2.c");
  i"fastlstm2.c_new" = add(i"fastlstm2.c_update", i"fastlstm2.c_prop");
  i"fastlstm2.tanh_c" = tanh(i"fastlstm2.c_new");
  i"fastlstm2.peephole2.mul" = mul(i"fastlstm2.peephole2.mul.fix-rank", i"fastlstm2.c_new");
  i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.prop_axis.a.output" = squeeze(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256", axes = [0]);
  i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.256..384" = tract_core_einsum([i"fastlstm2.r", i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384"], expr = "k,kn->bn", acc = "f32", output = "");
  i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.add-1" = add(i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.prop_axis.a.output", i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.256..384");
  i"fastlstm2.four_parts.split-over-1.768..1024" = add(i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.add-1", i"fastlstm2.four_parts.split-1-over-1.768..1024.slice");
  i"fastlstm2.peephole2.output" = add(i"fastlstm2.peephole2.mul", i"fastlstm2.four_parts.split-over-1.768..1024");
  i"fastlstm2.peephole2.output.nolin" = sigmoid(i"fastlstm2.peephole2.output");
  i"fastlstm2.m" = mul(i"fastlstm2.tanh_c", i"fastlstm2.peephole2.output.nolin");
  i"fastlstm2.h_new.W.split-over-1.0..128" = tract_core_einsum([i"fastlstm2.m", i"fastlstm2.h_new.W.split-1-over-1.0..128.slice"], expr = "bk,kn->n", acc = "f32", output = "");
  i"fastlstm2.h_new.split-over-1.0..128" = add(i"fastlstm2.h_new.W.split-over-1.0..128", i"fastlstm2.h_new.split-1-over-1.0..128.slice");
  i"fastlstm2.h_new.W.prop_axis.a.input_0" = unsqueeze(i"fastlstm2.m", axes = [0]);
  i"fastlstm2.r_new" = i"fastlstm2.h_new.split-over-1.0..128";
}

fragment tract_core_properties(
) -> (properties: (string, tensor<scalar>)[])
{
  properties = [("pulse.delay", tract_core_cast([6], to = "i64")), ("pulse.input_axes", tract_core_cast([0], to = "i64")), ("pulse.output_axes", tract_core_cast([0], to = "i64")), ("tract_nnef_ser_version", "0.19.3-pre"), ("tract_nnef_format_version", "beta1")];
}

graph network(input) -> (output) {
  input = external(shape = [24, 40]);
  i"lda.output.delay" = tract_pulse_delay(input, axis = 0, delay = 0, overlap = 4);
  i"lda.output.add_n" = unsqueeze(i"lda.output.delay", axes = [0]);
  i"lda.kernel.0" = variable<scalar>(label = "lda.kernel.0", shape = [200, 40, 5]);
  i"lda.output.bias" = variable<scalar>(label = "lda.output.bias", shape = [200]);
  i"lda.output_input" = transpose(i"lda.output.add_n", axes = [0, 2, 1]);
  i"lda.output_conv" = conv(i"lda.output_input", i"lda.kernel.0", i"lda.output.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"lda.output" = transpose(i"lda.output_conv", axes = [0, 2, 1]);
  i"lda.output.rm_n" = squeeze(i"lda.output", axes = [0]);
  i"tdnn1.affine.output.filters_as_co_ci" = variable<scalar>(label = "tdnn1.affine.output.filters_as_co_ci", shape = [256, 200]);
  i"tdnn1.affine.output.einsum" = matmul(i"tdnn1.affine.output.filters_as_co_ci", i"lda.output.rm_n", transposeA = false, transposeB = true);
  i"tdnn1.affine.output.bias.reshape.1" = variable<scalar>(label = "tdnn1.affine.output.bias.reshape.1", shape = [256, 1]);
  i"tdnn1.affine.output" = add(i"tdnn1.affine.output.einsum", i"tdnn1.affine.output.bias.reshape.1");
  i"tdnn1.relu.output.low.cst.1" = [[0.0]];
  i"tdnn1.relu.output.low" = max(i"tdnn1.affine.output", i"tdnn1.relu.output.low.cst.1");
  i"tdnn1.renorm.reduced.sum.sqr" = square(i"tdnn1.relu.output.low");
  i"tdnn1.renorm.reduced.sum.sum" = sum_reduce(i"tdnn1.renorm.reduced.sum.sqr", axes = [0]);
  i"tdnn1.renorm.reduced.sum.norm.fix-rank.1" = [[256.0]];
  i"tdnn1.renorm.reduced.sum.norm" = div(i"tdnn1.renorm.reduced.sum.sum", i"tdnn1.renorm.reduced.sum.norm.fix-rank.1");
  i"tdnn1.renorm.output-recip" = rsqrt(i"tdnn1.renorm.reduced.sum.norm");
  i"tdnn1.renorm.output" = mul(i"tdnn1.relu.output.low", i"tdnn1.renorm.output-recip");
  i"tdnn2.affine.output.delay" = tract_pulse_delay(i"tdnn1.renorm.output", axis = 1, delay = 0, overlap = 2);
  i"tdnn2.affine.output.add_n" = unsqueeze(i"tdnn2.affine.output.delay", axes = [0]);
  i"tdnn2.affine.kernel.0" = variable<scalar>(label = "tdnn2.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn2.affine.output.bias" = variable<scalar>(label = "tdnn2.affine.output.bias", shape = [256]);
  i"tdnn2.affine.output_conv" = conv(i"tdnn2.affine.output.add_n", i"tdnn2.affine.kernel.0", i"tdnn2.affine.output.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn2.affine.output" = i"tdnn2.affine.output_conv";
  i"tdnn2.affine.output.rm_n" = squeeze(i"tdnn2.affine.output", axes = [0]);
  i"tdnn2.relu.output.low" = max(i"tdnn2.affine.output.rm_n", i"tdnn1.relu.output.low.cst.1");
  i"tdnn2.renorm.reduced.sum.sqr" = square(i"tdnn2.relu.output.low");
  i"tdnn2.renorm.reduced.sum.sum" = sum_reduce(i"tdnn2.renorm.reduced.sum.sqr", axes = [0]);
  i"tdnn2.renorm.reduced.sum.norm.fix-rank.1" = [[256.0]];
  i"tdnn2.renorm.reduced.sum.norm" = div(i"tdnn2.renorm.reduced.sum.sum", i"tdnn2.renorm.reduced.sum.norm.fix-rank.1");
  i"tdnn2.renorm.output-recip" = rsqrt(i"tdnn2.renorm.reduced.sum.norm");
  i"tdnn2.renorm.output" = mul(i"tdnn2.relu.output.low", i"tdnn2.renorm.output-recip");
  i"tdnn3.affine.output.add_n" = unsqueeze(i"tdnn2.renorm.output", axes = [0]);
  i"tdnn3.affine.kernel.0" = variable<scalar>(label = "tdnn3.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn3.affine.output.bias" = variable<scalar>(label = "tdnn3.affine.output.bias", shape = [256]);
  i"tdnn3.affine.output_conv" = conv(i"tdnn3.affine.output.add_n", i"tdnn3.affine.kernel.0", i"tdnn3.affine.output.bias", dilation = [1], stride = [3], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn3.affine.output" = i"tdnn3.affine.output_conv";
  i"tdnn3.affine.output.rm_n" = squeeze(i"tdnn3.affine.output", axes = [0]);
  i"tdnn3.relu.output.low" = max(i"tdnn3.affine.output.rm_n", i"tdnn1.relu.output.low.cst.1");
  i"tdnn3.renorm.reduced.sum.sqr" = square(i"tdnn3.relu.output.low");
  i"tdnn3.renorm.reduced.sum.sum" = sum_reduce(i"tdnn3.renorm.reduced.sum.sqr", axes = [0]);
  i"tdnn3.renorm.reduced.sum.norm.fix-rank.1" = [[256.0]];
  i"tdnn3.renorm.reduced.sum.norm" = div(i"tdnn3.renorm.reduced.sum.sum", i"tdnn3.renorm.reduced.sum.norm.fix-rank.1");
  i"tdnn3.renorm.output-recip" = rsqrt(i"tdnn3.renorm.reduced.sum.norm");
  i"tdnn3.renorm.output" = mul(i"tdnn3.relu.output.low", i"tdnn3.renorm.output-recip");
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256.prop_axis.a.input_1" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256.prop_axis.a.input_1", shape = [256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256" = matmul(i"tdnn3.renorm.output", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256.prop_axis.a.input_1", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_a" = unsqueeze(i"tdnn3.renorm.output", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_b" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_b", shape = [1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256" = matmul(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_a", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_b", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_c.0" = transpose(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256", axes = [1, 0, 2]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_a" = unsqueeze(i"tdnn3.renorm.output", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_b" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_b", shape = [1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256" = matmul(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_a", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_b", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_c.0" = transpose(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256", axes = [1, 0, 2]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_a" = unsqueeze(i"tdnn3.renorm.output", axes = [0]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_b" = variable<scalar>(label = "fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_b", shape = [1, 256, 256]);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256" = matmul(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_a", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_b", transposeA = true, transposeB = false);
  i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_c.0" = transpose(i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256", axes = [1, 0, 2]);
  i"tap.tap.fastlstm1.c_init.0-35/0-104/0" = variable<scalar>(label = "tap.tap.fastlstm1.c_init.0-35/0-104/0", shape = [1, 256]);
  i"tap.tap.tap.fastlstm1.r_init.0-36/0-110/0-164/0" = variable<scalar>(label = "tap.tap.tap.fastlstm1.r_init.0-36/0-110/0-164/0", shape = [128]);
  i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm1.four_parts.split-1-over-1.0..256.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.0..256.slice", shape = [1, 256]);
  i"fastlstm1.four_parts.split-1-over-1.256..512.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.256..512.slice", shape = [1, 256]);
  i"fastlstm1.four_parts.split-1-over-1.512..768.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.512..768.slice", shape = [1, 256]);
  i"fastlstm1.four_parts.split-1-over-1.768..1024.slice" = variable<scalar>(label = "fastlstm1.four_parts.split-1-over-1.768..1024.slice", shape = [1, 256]);
  i"fastlstm1.h_new.W.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm1.h_new.W.split-1-over-1.0..128.slice", shape = [256, 128]);
  i"fastlstm1.h_new.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm1.h_new.split-1-over-1.0..128.slice", shape = [128]);
  i"fastlstm1.peephole0.mul.fix-rank" = variable<scalar>(label = "fastlstm1.peephole0.mul.fix-rank", shape = [1, 256]);
  i"fastlstm1.peephole1.mul.fix-rank" = variable<scalar>(label = "fastlstm1.peephole1.mul.fix-rank", shape = [1, 256]);
  i"fastlstm1.peephole2.mul.fix-rank" = variable<scalar>(label = "fastlstm1.peephole2.mul.fix-rank", shape = [1, 256]);
  i"fastlstm1.c_final" = tract_core_scan(body = "scan_body_0", scan = [("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256", 0, 1), ("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_c.0", 0, 1), ("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_c.0", 0, 1), ("fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256", i"fastlstm1.c_final.extracted.fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_c.0", 0, 1)], full = [("fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384", i"fastlstm1.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384"), ("fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384", i"fastlstm1.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384"), ("fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384", i"fastlstm1.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384"), ("fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384", i"fastlstm1.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384"), ("fastlstm1.four_parts.split-1-over-1.0..256.slice", i"fastlstm1.four_parts.split-1-over-1.0..256.slice"), ("fastlstm1.four_parts.split-1-over-1.256..512.slice", i"fastlstm1.four_parts.split-1-over-1.256..512.slice"), ("fastlstm1.four_parts.split-1-over-1.512..768.slice", i"fastlstm1.four_parts.split-1-over-1.512..768.slice"), ("fastlstm1.four_parts.split-1-over-1.768..1024.slice", i"fastlstm1.four_parts.split-1-over-1.768..1024.slice"), ("fastlstm1.h_new.W.split-1-over-1.0..128.slice", i"fastlstm1.h_new.W.split-1-over-1.0..128.slice"), ("fastlstm1.h_new.split-1-over-1.0..128.slice", i"fastlstm1.h_new.split-1-over-1.0..128.slice"), ("fastlstm1.peephole0.mul.fix-rank", i"fastlstm1.peephole0.mul.fix-rank"), ("fastlstm1.peephole1.mul.fix-rank", i"fastlstm1.peephole1.mul.fix-rank"), ("fastlstm1.peephole2.mul.fix-rank", i"fastlstm1.peephole2.mul.fix-rank")], state = [("fastlstm1.c", i"tap.tap.fastlstm1.c_init.0-35/0-104/0", "fastlstm1.c_new"), ("fastlstm1.r", i"tap.tap.tap.fastlstm1.r_init.0-36/0-110/0-164/0", "fastlstm1.r_new")], output = [("fastlstm1.h_new.W.prop_axis.a.input_0", "full", 0, 1)], skip = 2, reset_every_turn = false);
  i"fastlstm1.h_new.W.fix_a" = transpose(i"fastlstm1.c_final", axes = [1, 0, 2]);
  i"fastlstm1.h_new.W.fix_b" = variable<scalar>(label = "fastlstm1.h_new.W.fix_b", shape = [1, 256, 256]);
  i"fastlstm1.h_new.W" = matmul(i"fastlstm1.h_new.W.fix_a", i"fastlstm1.h_new.W.fix_b", transposeA = false, transposeB = false);
  i"fastlstm1.h_new.W.fix_c.0" = squeeze(i"fastlstm1.h_new.W", axes = [0]);
  i"fastlstm1.c_final.fastlstm1.h_new.fix-rank" = variable<scalar>(label = "fastlstm1.c_final.fastlstm1.h_new.fix-rank", shape = [1, 256]);
  i"fastlstm1.h_new" = add(i"fastlstm1.h_new.W.fix_c.0", i"fastlstm1.c_final.fastlstm1.h_new.fix-rank");
  i"tdnn4.affine.output.delay" = tract_pulse_delay(i"fastlstm1.h_new", axis = 0, delay = 0, overlap = 2);
  i"tdnn4.affine.output.add_n" = unsqueeze(i"tdnn4.affine.output.delay", axes = [0]);
  i"tdnn4.affine.kernel.0" = variable<scalar>(label = "tdnn4.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn4.affine.output.bias" = variable<scalar>(label = "tdnn4.affine.output.bias", shape = [256]);
  i"tdnn4.affine.output_input" = transpose(i"tdnn4.affine.output.add_n", axes = [0, 2, 1]);
  i"tdnn4.affine.output_conv" = conv(i"tdnn4.affine.output_input", i"tdnn4.affine.kernel.0", i"tdnn4.affine.output.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn4.affine.output" = transpose(i"tdnn4.affine.output_conv", axes = [0, 2, 1]);
  i"tdnn4.affine.output.rm_n" = squeeze(i"tdnn4.affine.output", axes = [0]);
  i"tdnn1.relu.output.low.cst.2.1" = [[0.0]];
  i"tdnn4.relu.output.low" = max(i"tdnn4.affine.output.rm_n", i"tdnn1.relu.output.low.cst.2.1");
  i"tdnn4.renorm.reduced.sum.sqr" = square(i"tdnn4.relu.output.low");
  i"tdnn4.renorm.reduced.sum.sum" = sum_reduce(i"tdnn4.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn4.renorm.reduced.sum.norm.fix-rank.1" = [[256.0]];
  i"tdnn4.renorm.reduced.sum.norm" = div(i"tdnn4.renorm.reduced.sum.sum", i"tdnn4.renorm.reduced.sum.norm.fix-rank.1");
  i"tdnn4.renorm.output-recip" = rsqrt(i"tdnn4.renorm.reduced.sum.norm");
  i"tdnn4.renorm.output" = mul(i"tdnn4.relu.output.low", i"tdnn4.renorm.output-recip");
  i"tdnn5.affine.output.delay" = tract_pulse_delay(i"tdnn4.renorm.output", axis = 0, delay = 0, overlap = 2);
  i"tdnn5.affine.output.add_n" = unsqueeze(i"tdnn5.affine.output.delay", axes = [0]);
  i"tdnn5.affine.kernel.0" = variable<scalar>(label = "tdnn5.affine.kernel.0", shape = [256, 256, 3]);
  i"tdnn5.affine.output.bias" = variable<scalar>(label = "tdnn5.affine.output.bias", shape = [256]);
  i"tdnn5.affine.output_input" = transpose(i"tdnn5.affine.output.add_n", axes = [0, 2, 1]);
  i"tdnn5.affine.output_conv" = conv(i"tdnn5.affine.output_input", i"tdnn5.affine.kernel.0", i"tdnn5.affine.output.bias", dilation = [1], stride = [1], border = "constant", groups = 1, padding = [(0, 0)]);
  i"tdnn5.affine.output" = transpose(i"tdnn5.affine.output_conv", axes = [0, 2, 1]);
  i"tdnn5.affine.output.rm_n" = squeeze(i"tdnn5.affine.output", axes = [0]);
  i"tdnn5.relu.output.low" = max(i"tdnn5.affine.output.rm_n", i"tdnn1.relu.output.low.cst.2.1");
  i"tdnn5.renorm.reduced.sum.sqr" = square(i"tdnn5.relu.output.low");
  i"tdnn5.renorm.reduced.sum.sum" = sum_reduce(i"tdnn5.renorm.reduced.sum.sqr", axes = [1]);
  i"tdnn5.renorm.reduced.sum.norm.fix-rank.1" = [[256.0]];
  i"tdnn5.renorm.reduced.sum.norm" = div(i"tdnn5.renorm.reduced.sum.sum", i"tdnn5.renorm.reduced.sum.norm.fix-rank.1");
  i"tdnn5.renorm.output-recip" = rsqrt(i"tdnn5.renorm.reduced.sum.norm");
  i"tdnn5.renorm.output" = mul(i"tdnn5.relu.output.low", i"tdnn5.renorm.output-recip");
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256.prop_axis.a.input_1" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256.prop_axis.a.input_1", shape = [256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256" = matmul(i"tdnn5.renorm.output", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256.prop_axis.a.input_1", transposeA = false, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_a" = unsqueeze(i"tdnn5.renorm.output", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_b" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_b", shape = [1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256" = matmul(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_a", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_b", transposeA = false, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_c.0" = transpose(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256", axes = [1, 0, 2]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_a" = unsqueeze(i"tdnn5.renorm.output", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_b" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_b", shape = [1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256" = matmul(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_a", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_b", transposeA = false, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_c.0" = transpose(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256", axes = [1, 0, 2]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_a" = unsqueeze(i"tdnn5.renorm.output", axes = [0]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_b" = variable<scalar>(label = "fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_b", shape = [1, 256, 256]);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256" = matmul(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_a", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_b", transposeA = false, transposeB = false);
  i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_c.0" = transpose(i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256", axes = [1, 0, 2]);
  i"tap.tap.fastlstm1.c_init.0-35/0.1-106/0" = variable<scalar>(label = "tap.tap.fastlstm1.c_init.0-35/0.1-106/0", shape = [1, 256]);
  i"tap.tap.tap.fastlstm1.r_init.0-36/0.1-112/0-166/0" = variable<scalar>(label = "tap.tap.tap.fastlstm1.r_init.0-36/0.1-112/0-166/0", shape = [128]);
  i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384" = variable<scalar>(label = "fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384", shape = [128, 256]);
  i"fastlstm2.four_parts.split-1-over-1.0..256.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.0..256.slice", shape = [1, 256]);
  i"fastlstm2.four_parts.split-1-over-1.256..512.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.256..512.slice", shape = [1, 256]);
  i"fastlstm2.four_parts.split-1-over-1.512..768.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.512..768.slice", shape = [1, 256]);
  i"fastlstm2.four_parts.split-1-over-1.768..1024.slice" = variable<scalar>(label = "fastlstm2.four_parts.split-1-over-1.768..1024.slice", shape = [1, 256]);
  i"fastlstm2.h_new.W.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm2.h_new.W.split-1-over-1.0..128.slice", shape = [256, 128]);
  i"fastlstm2.h_new.split-1-over-1.0..128.slice" = variable<scalar>(label = "fastlstm2.h_new.split-1-over-1.0..128.slice", shape = [128]);
  i"fastlstm2.peephole0.mul.fix-rank" = variable<scalar>(label = "fastlstm2.peephole0.mul.fix-rank", shape = [1, 256]);
  i"fastlstm2.peephole1.mul.fix-rank" = variable<scalar>(label = "fastlstm2.peephole1.mul.fix-rank", shape = [1, 256]);
  i"fastlstm2.peephole2.mul.fix-rank" = variable<scalar>(label = "fastlstm2.peephole2.mul.fix-rank", shape = [1, 256]);
  i"fastlstm2.c_final" = tract_core_scan(body = "scan_body_1", scan = [("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-k.0..256", 0, 1), ("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-k.0..256.fix_c.0", 0, 1), ("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-k.0..256.fix_c.0", 0, 1), ("fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256", i"fastlstm2.c_final.extracted.fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-k.0..256.fix_c.0", 0, 1)], full = [("fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384", i"fastlstm2.four_parts.W.split-over-1.0..256.concat-einsum-slice-k.1.256..384"), ("fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384", i"fastlstm2.four_parts.W.split-over-1.256..512.concat-einsum-slice-k.1.256..384"), ("fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384", i"fastlstm2.four_parts.W.split-over-1.512..768.concat-einsum-slice-k.1.256..384"), ("fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384", i"fastlstm2.four_parts.W.split-over-1.768..1024.concat-einsum-slice-k.1.256..384"), ("fastlstm2.four_parts.split-1-over-1.0..256.slice", i"fastlstm2.four_parts.split-1-over-1.0..256.slice"), ("fastlstm2.four_parts.split-1-over-1.256..512.slice", i"fastlstm2.four_parts.split-1-over-1.256..512.slice"), ("fastlstm2.four_parts.split-1-over-1.512..768.slice", i"fastlstm2.four_parts.split-1-over-1.512..768.slice"), ("fastlstm2.four_parts.split-1-over-1.768..1024.slice", i"fastlstm2.four_parts.split-1-over-1.768..1024.slice"), ("fastlstm2.h_new.W.split-1-over-1.0..128.slice", i"fastlstm2.h_new.W.split-1-over-1.0..128.slice"), ("fastlstm2.h_new.split-1-over-1.0..128.slice", i"fastlstm2.h_new.split-1-over-1.0..128.slice"), ("fastlstm2.peephole0.mul.fix-rank", i"fastlstm2.peephole0.mul.fix-rank"), ("fastlstm2.peephole1.mul.fix-rank", i"fastlstm2.peephole1.mul.fix-rank"), ("fastlstm2.peephole2.mul.fix-rank", i"fastlstm2.peephole2.mul.fix-rank")], state = [("fastlstm2.c", i"tap.tap.fastlstm1.c_init.0-35/0.1-106/0", "fastlstm2.c_new"), ("fastlstm2.r", i"tap.tap.tap.fastlstm1.r_init.0-36/0.1-112/0-166/0", "fastlstm2.r_new")], output = [("fastlstm2.h_new.W.prop_axis.a.input_0", "full", 0, 1)], skip = 6, reset_every_turn = false);
  i"fastlstm2.h_new.W.fix_a" = transpose(i"fastlstm2.c_final", axes = [1, 0, 2]);
  i"fastlstm2.h_new.W.fix_b" = variable<scalar>(label = "fastlstm2.h_new.W.fix_b", shape = [1, 256, 256]);
  i"fastlstm2.h_new.W" = matmul(i"fastlstm2.h_new.W.fix_b", i"fastlstm2.h_new.W.fix_a", transposeA = true, transposeB = true);
  i"fastlstm2.h_new.W.fix_c.0" = squeeze(i"fastlstm2.h_new.W", axes = [0]);
  i"fastlstm2.c_final.fastlstm2.h_new.fix-rank" = variable<scalar>(label = "fastlstm2.c_final.fastlstm2.h_new.fix-rank", shape = [256, 1]);
  i"fastlstm2.h_new" = add(i"fastlstm2.h_new.W.fix_c.0", i"fastlstm2.c_final.fastlstm2.h_new.fix-rank");
  i"output.affine.kernel.0" = variable<scalar>(label = "output.affine.kernel.0", shape = [1690, 256]);
  i"output.affine.output.W" = matmul(i"fastlstm2.h_new", i"output.affine.kernel.0", transposeA = true, transposeB = true);
  i"output.affine.bias.0" = variable<scalar>(label = "output.affine.bias.0", shape = [1, 1690]);
  i"output.affine.output" = add(i"output.affine.output.W", i"output.affine.bias.0");
  output = i"output.affine.output";
}


================================================
FILE: harness/pre-optimized-graphes/mdl-en-2019-Q3-librispeech/runme.sh
================================================
#!/bin/sh

set -ex

cd `dirname $0`

ROOT=$(dirname $(realpath $0))/../../..
. $ROOT/.travis/ci-system-setup.sh

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

$CACHE_FILE mdl-en-2019-Q3-librispeech.onnx
$TRACT_RUN $MODELS/mdl-en-2019-Q3-librispeech.onnx -i S,40 --output-node output --pulse 24 --nnef-tract-pulse --nnef-extended-identifier dump -q --nnef-graph found

version=`cargo metadata --format-version 1 | jq -r '.packages | map(select( (.name) == "tract-core") | .version) | .[] '`
perl -pi -e "s/$version/0.19.3-pre/" found

diff -u expected found


================================================
FILE: harness/tf-inceptionv3/Cargo.toml
================================================
[package]
name = "tf-inceptionv3"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
image.workspace = true
tract-tensorflow.workspace = true

[features]
conform = [ "tract-tensorflow/conform" ]

[dev-dependencies]
criterion.workspace = true
dinghy-test.workspace = true
env_logger.workspace = true
log.workspace = true

[[bench]]
harness = false
name = "inceptionv3"


================================================
FILE: harness/tf-inceptionv3/benches/inceptionv3.rs
================================================
#[macro_use]
extern crate criterion;
extern crate dinghy_test;
extern crate tf_inceptionv3;
extern crate tract_tensorflow;

use tract_tensorflow::prelude::*;

use self::dinghy_test::test_project_path;
use criterion::Criterion;

use std::path;

const HOPPER: &str = "grace_hopper.jpg";
pub fn hopper() -> path::PathBuf {
    test_project_path().join(HOPPER)
}

#[cfg(feature = "conform")]
fn dummy(_bencher: &mut Criterion) {
    tract_tensorflow::conform::tf::for_path(tf_inceptionv3::inception_v3_2016_08_28_frozen())
        .unwrap();
}

#[cfg(feature = "conform")]
fn tf(bencher: &mut Criterion) {
    let mut tf =
        tract_tensorflow::conform::tf::for_path(tf_inceptionv3::inception_v3_2016_08_28_frozen())
            .unwrap();
    let input = tf_inceptionv3::load_image(hopper());
    bencher.bench_function("tensorflow", move |b| {
        b.iter(|| {
            tf.run(vec![("input", input.clone())], "InceptionV3/Predictions/Reshape_1").unwrap()
        })
    });
}

fn tract(bencher: &mut Criterion) {
    let mut tfd =
        tensorflow().model_for_path(tf_inceptionv3::inception_v3_2016_08_28_frozen()).unwrap();
    tfd.set_input_fact(0, f32::fact([1, 299, 299, 3]).into()).unwrap();
    let tfd = tfd.into_optimized().unwrap().into_runnable().unwrap();
    let input = tf_inceptionv3::load_image(hopper());
    bencher.bench_function("tract", move |b| b.iter(|| tfd.run(tvec![input.clone()]).unwrap()));
}

pub fn benches() {
    let mut criterion: Criterion = Criterion::default().sample_size(3).configure_from_args();
    #[cfg(feature = "conform")]
    {
        dummy(&mut criterion);
        tf(&mut criterion);
    }
    tract(&mut criterion);
}
criterion_main!(benches);


================================================
FILE: harness/tf-inceptionv3/download.sh
================================================
#!/bin/sh

MY_DIR=`dirname $0`

$MY_DIR/../../.travis/cache_file.sh inception_v3_2016_08_28_frozen.pb imagenet_slim_labels.txt


================================================
FILE: harness/tf-inceptionv3/src/lib.rs
================================================
extern crate image;
extern crate tract_tensorflow;

use std::{fs, io, path};

use tract_tensorflow::prelude::*;
use tract_tensorflow::tract_core::internal::*;

fn download() {
    use std::sync::Once;
    static START: Once = Once::new();

    START.call_once(|| do_download().unwrap());
}

fn do_download() -> TractResult<()> {
    let run = ::std::process::Command::new("./download.sh").status().unwrap();
    if !run.success() {
        bail!("Failed to download inception model files")
    }
    Ok(())
}

pub fn load_labels() -> Vec<String> {
    use std::io::BufRead;
    io::BufReader::new(fs::File::open(imagenet_slim_labels()).unwrap())
        .lines()
        .collect::<::std::io::Result<Vec<String>>>()
        .unwrap()
}

fn inception_v3_2016_08_28() -> path::PathBuf {
    ::std::env::var("CACHEDIR").ok().unwrap_or_else(|| "../../.cached".to_string()).into()
}

pub fn inception_v3_2016_08_28_frozen() -> path::PathBuf {
    download();
    inception_v3_2016_08_28().join("inception_v3_2016_08_28_frozen.pb")
}

pub fn imagenet_slim_labels() -> path::PathBuf {
    download();
    inception_v3_2016_08_28().join("imagenet_slim_labels.txt")
}

pub fn load_image<P: AsRef<path::Path>>(p: P) -> TValue {
    let image = image::open(&p).unwrap().to_rgb8();
    let resized =
        image::imageops::resize(&image, 299, 299, ::image::imageops::FilterType::Triangle);
    tract_ndarray::Array4::from_shape_fn((1, 299, 299, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into_dyn()
    .into_tvalue()
}

#[cfg(test)]
mod tests {
    extern crate dinghy_test;
    use tract_tensorflow::prelude::*;

    use self::dinghy_test::test_project_path;
    use super::*;
    use std::path;

    const HOPPER: &str = "grace_hopper.jpg";
    pub fn hopper() -> path::PathBuf {
        test_project_path().join(HOPPER)
    }

    #[allow(dead_code)]
    pub fn setup_test_logger() {
        env_logger::Builder::from_default_env().filter_level(log::LevelFilter::Trace).init();
    }

    #[test]
    fn grace_hopper_is_a_military_uniform() {
        download();
        // setup_test_logger();
        println!("{:?}", inception_v3_2016_08_28_frozen());
        let tfd = tensorflow().model_for_path(inception_v3_2016_08_28_frozen()).unwrap();
        let plan = SimplePlan::new(tfd).unwrap();
        let input = load_image(hopper());
        let outputs = plan.run(tvec![input]).unwrap();
        let labels = load_labels();
        let label_id = outputs[0]
            .try_as_plain()
            .unwrap()
            .to_array_view::<f32>()
            .unwrap()
            .iter()
            .enumerate()
            .max_by(|a, b| a.1.total_cmp(b.1))
            .unwrap()
            .0;
        let label = &labels[label_id];
        assert_eq!(label, "military uniform");
    }
}


================================================
FILE: harness/tf-mobilenet-v2/Cargo.toml
================================================
[package]
name = "tf-mobilenet-v2"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
image.workspace = true
tract-tensorflow.workspace = true

[dev-dependencies]
dinghy-test.workspace = true


================================================
FILE: harness/tf-mobilenet-v2/download.sh
================================================
#!/bin/sh

MY_DIR=`dirname $0`

$MY_DIR/../../.travis/cache_file.sh \
    mobilenet_v2_1.4_224_frozen.pb \
    imagenet_slim_labels.txt \
    grace_hopper.jpg


================================================
FILE: harness/tf-mobilenet-v2/src/lib.rs
================================================
extern crate image;
extern crate tract_tensorflow;

use std::{fs, path};

use tract_tensorflow::prelude::*;
use tract_tensorflow::tract_core::internal::*;

fn download() {
    use std::sync::Once;
    static START: Once = std::sync::Once::new();

    START.call_once(|| do_download().unwrap());
}

fn do_download() -> TractResult<()> {
    let run = ::std::process::Command::new("./download.sh").status().unwrap();
    if !run.success() {
        bail!("Failed to download model files")
    }
    Ok(())
}

fn cachedir() -> path::PathBuf {
    ::std::env::var("CACHEDIR").ok().unwrap_or_else(|| "../../.cached".to_string()).into()
}

pub fn load_labels() -> Vec<String> {
    fs::read_to_string(imagenet_slim_labels()).unwrap().lines().map(|s| s.into()).collect()
}
pub fn imagenet_slim_labels() -> path::PathBuf {
    download();
    cachedir().join("imagenet_slim_labels.txt")
}

pub fn grace_hopper() -> path::PathBuf {
    download();
    cachedir().join("grace_hopper.jpg")
}

pub fn load_image<P: AsRef<path::Path>>(p: P) -> Tensor {
    let image = image::open(&p).unwrap().to_rgb8();
    let resized = image::imageops::resize(&image, 224, 224, image::imageops::FilterType::Triangle);
    tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
        resized[(x as _, y as _)][c] as f32 / 255.0
    })
    .into_dyn()
    .into()
}

#[cfg(test)]
mod tests {
    extern crate dinghy_test;
    use tract_tensorflow::prelude::*;

    use super::*;

    fn mobilenet_v2() -> path::PathBuf {
        download();
        cachedir().join("mobilenet_v2_1.4_224_frozen.pb")
    }

    fn run<F, O>(runnable: Arc<SimplePlan<F, O>>) -> TractResult<()>
    where
        F: Fact + Hash + Clone + 'static,
        O: std::fmt::Debug + std::fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    {
        let input = load_image(grace_hopper());
        let outputs = runnable.run(tvec![input.into()])?;
        let label_id = outputs[0]
            .try_as_plain()?
            .as_slice::<f32>()?
            .iter()
            .enumerate()
            .max_by(|a, b| a.1.total_cmp(b.1))
            .unwrap()
            .0;
        let labels = load_labels();
        let label = &labels[label_id];
        assert_eq!(label, "military uniform");
        Ok(())
    }

    #[test]
    fn plain() -> TractResult<()> {
        let tfd = tract_tensorflow::tensorflow().model_for_path(mobilenet_v2())?.into_runnable()?;
        run(tfd)
    }

    #[test]
    fn declutter() -> TractResult<()> {
        let tfd = tract_tensorflow::tensorflow()
            .model_for_path(mobilenet_v2())?
            .with_input_fact(0, f32::fact([1, 224, 224, 3]).into())?
            .into_typed()?
            .into_decluttered()?
            .into_runnable()?;
        run(tfd)
    }

    #[test]
    fn optimized() -> TractResult<()> {
        let tfd = tract_tensorflow::tensorflow()
            .model_for_path(mobilenet_v2())?
            .with_input_fact(0, f32::fact([1, 224, 224, 3]).into())?
            .into_optimized()?
            .into_runnable()?;
        run(tfd)
    }
}


================================================
FILE: harness/tfl-mobilenet-v2-q/Cargo.toml
================================================
[package]
name = "tfl-mobilenet-v2-q"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
image.workspace = true
tract-tflite.workspace = true

[dev-dependencies]
dinghy-test.workspace = true


================================================
FILE: harness/tfl-mobilenet-v2-q/download.sh
================================================
#!/bin/sh

MY_DIR=`dirname $0`

$MY_DIR/../../.travis/cache_file.sh \
    mobilenetv2_ptq_single_img.tflite \
    imagenet_slim_labels.txt \
    grace_hopper.jpg


================================================
FILE: harness/tfl-mobilenet-v2-q/src/lib.rs
================================================
use std::{fs, path};

use tract_tflite::internal::*;

fn download() {
    use std::sync::Once;
    static START: Once = std::sync::Once::new();

    START.call_once(|| do_download().unwrap());
}

fn do_download() -> TractResult<()> {
    let run = std::process::Command::new("./download.sh").status().unwrap();
    if !run.success() {
        bail!("Failed to download model files")
    }
    Ok(())
}

fn cachedir() -> path::PathBuf {
    std::env::var("CACHEDIR").ok().unwrap_or_else(|| "../../.cached".to_string()).into()
}

pub fn load_labels() -> Vec<String> {
    fs::read_to_string(imagenet_slim_labels()).unwrap().lines().map(|s| s.into()).collect()
}
pub fn imagenet_slim_labels() -> path::PathBuf {
    download();
    cachedir().join("imagenet_slim_labels.txt")
}

pub fn grace_hopper() -> path::PathBuf {
    download();
    cachedir().join("grace_hopper.jpg")
}

pub fn input_dt() -> DatumType {
    i8::datum_type().with_zp_scale(-1, 0.007843138)
}

pub fn load_image<P: AsRef<path::Path>>(p: P) -> Tensor {
    let image = image::open(&p).unwrap().to_rgb8();
    let resized = image::imageops::resize(&image, 224, 224, image::imageops::FilterType::Triangle);
    let mut tensor: Tensor =
        tract_ndarray::Array4::from_shape_fn((1, 224, 224, 3), |(_, y, x, c)| {
            (resized[(x as _, y as _)][c] as i32 - 128) as i8
        })
        .into_dyn()
        .into();
    unsafe { tensor.set_datum_type(input_dt()) };
    tensor
}

#[cfg(test)]
mod tests {
    extern crate dinghy_test;
    use tract_tflite::prelude::*;

    use super::*;

    fn mobilenet_v2() -> path::PathBuf {
        download();
        cachedir().join("mobilenetv2_ptq_single_img.tflite")
    }

    fn run<F, O>(runnable: Arc<SimplePlan<F, O>>) -> TractResult<()>
    where
        F: Fact + Hash + Clone + 'static,
        O: std::fmt::Debug + std::fmt::Display + AsRef<dyn Op> + AsMut<dyn Op> + Clone + 'static,
    {
        let input = load_image(grace_hopper());
        let outputs = runnable.run(tvec![input.into()])?;
        let label_id =
            outputs[0].try_as_plain()?.as_slice::<i8>()?.iter().enumerate().max().unwrap().0;
        let labels = load_labels();
        let label = &labels[label_id];
        assert_eq!(label, "military uniform");
        Ok(())
    }

    #[test]
    #[ignore]
    fn plain() -> TractResult<()> {
        let tfd = tract_tflite::tflite().model_for_path(mobilenet_v2())?.into_runnable()?;
        run(tfd)
    }

    #[test]
    #[ignore]
    fn declutter() -> TractResult<()> {
        let tfd = tract_tflite::tflite()
            .model_for_path(mobilenet_v2())?
            .with_input_fact(0, input_dt().fact([1, 224, 224, 3]))?
            .into_decluttered()?
            .into_runnable()?;
        run(tfd)
    }

    #[test]
    #[ignore]
    fn optimized() -> TractResult<()> {
        let tfd = tract_tflite::tflite()
            .model_for_path(mobilenet_v2())?
            .with_input_fact(0, input_dt().fact([1, 224, 224, 3]))?
            .into_optimized()?
            .into_runnable()?;
        run(tfd)
    }
}


================================================
FILE: hir/Cargo.toml
================================================
[package]
name = "tract-hir"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
derive-new.workspace = true
log.workspace = true

tract-core.workspace = true

[dev-dependencies]
env_logger.workspace = true


================================================
FILE: hir/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: hir/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: hir/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: hir/src/framework.rs
================================================


================================================
FILE: hir/src/infer/analyser.rs
================================================
use super::InferenceModel;
use super::*;
use crate::prelude::*;
use std::borrow::BorrowMut;
use std::collections::{BTreeSet, HashMap};

/// A graph analyser, along with its current state.
#[derive(new)]
pub struct Analyser<M: BorrowMut<InferenceModel>> {
    model: M,
}

impl<M: BorrowMut<InferenceModel>> Analyser<M> {
    /// Runs the entire analysis at once. Will not stop on error if obstinate is
    /// true.
    pub fn analyse_obstinate(&mut self, obstinate: bool) -> TractResult<bool> {
        let mut nodes_to_visit: BTreeSet<usize> =
            self.model.borrow().eval_order()?.iter().cloned().collect();
        let mut observed_outlets: HashMap<usize, Vec<OutletId>> = HashMap::new();
        let mut observers: HashMap<OutletId, TVec<usize>> = HashMap::new();
        for node in self.model.borrow().nodes() {
            if !nodes_to_visit.contains(&node.id) {
                nodes_to_visit.insert(node.id);
            }
            let observed = node.op.observe_outlets(self.model.borrow(), node)?;
            for outlet in &observed {
                observers.entry(*outlet).or_insert(tvec!()).push(node.id);
            }
            observed_outlets.insert(node.id, observed);
        }
        let mut first_error = None;
        let mut did_something = false;
        while let Some(&node) = nodes_to_visit.iter().next() {
            trace!("Remaining nodes {}, visiting {}", nodes_to_visit.len(), node);
            match self.analyse_one(node) {
                Ok(changed_edges) => {
                    for (edge, _fact) in changed_edges {
                        did_something = true;
                        trace!("Changed edge: {edge:?}");
                        for dst in self.model.borrow().nodes()[edge.node].outputs[edge.slot]
                            .successors
                            .iter()
                        {
                            if dst.node != edge.node {
                                trace!("Inserting node dn {:?}", dst.node);
                                nodes_to_visit.insert(dst.node);
                            }
                        }
                        if edge.node != node {
                            trace!("Inserting node up {}", edge.node);
                            nodes_to_visit.insert(edge.node);
                        }
                        if let Some(observers) = observers.get(&edge) {
                            for observer in observers {
                                nodes_to_visit.insert(*observer);
                            }
                        }
                    }
                }
                Err(e) => {
                    let e = e.context(format!(
                        "Failed analyse for node {}",
                        self.model.borrow().node(node)
                    ));
                    if !obstinate {
                        return Err(e);
                    }
                    debug!("{e:?}");
                    if first_error.is_none() {
                        first_error = Some(e);
                    }
                }
            }
            nodes_to_visit.remove(&node);
        }
        trace!("analyse done");
        if let Some(e) = first_error {
            Err(e)?
        }
        Ok(did_something)
    }

    /// Tries to run a single step of the analysis, and returns whether
    /// there was any additional information gained during the step.
    pub fn analyse_one(&mut self, node: usize) -> TractResult<Vec<(OutletId, InferenceFact)>> {
        let mut changed_edges = vec![];
        {
            trace!("Starting step for {}", self.model.borrow().node(node));
            let observed_outlets: Vec<OutletId> = {
                let model = self.model.borrow();
                let node = model.node(node);
                node.op.observe_outlets(model, node)?
            };

            let inferred = {
                let (inputs, outputs) = self.model.borrow().node_facts(node)?;
                if outputs.len() != self.model.borrow().node(node).op.nboutputs().unwrap() {
                    bail!(
                        "Wrong number of outputs. Op says {}, node says {}.",
                        self.model.borrow().node(node).op.nboutputs().unwrap(),
                        outputs.len(),
                    )
                }
                let inputs: TVec<InferenceFact> = inputs.into_iter().cloned().collect();
                let outputs: TVec<InferenceFact> = outputs.into_iter().cloned().collect();
                let observed: TVec<(OutletId, InferenceFact)> = {
                    let model = self.model.borrow();
                    let node = model.node(node);
                    node.op
                        .observe_outlets(model, node)?
                        .iter()
                        .map(|o| model.outlet_fact(*o).map(|f| (*o, f.clone())))
                        .collect::<TractResult<_>>()?
                };
                if log_enabled!(log::Level::Trace) {
                    for (ix, i) in inputs.iter().enumerate() {
                        trace!("  Input  #{ix}: {i:?}");
                    }
                    for (ix, o) in outputs.iter().enumerate() {
                        trace!("  Output #{ix}: {o:?}");
                    }
                }

                let inputs: TVec<&InferenceFact> = inputs.iter().collect();
                let outputs: TVec<&InferenceFact> = outputs.iter().collect();
                let observed: TVec<&InferenceFact> = observed.iter().map(|p| &p.1).collect();

                self.model.borrow_mut().node_mut(node).op.infer(inputs, outputs, observed)?
            };

            let node = self.model.borrow().node(node);
            for (ix, &outlet) in node.inputs.iter().enumerate() {
                let inferred_fact = &inferred.0[ix];
                let old_fact = self.model.borrow().outlet_fact(outlet)?;
                let unified = inferred_fact
                    .unify(old_fact)
                    .with_context(|| format!("while unifying inputs of {node}"))?;

                if &unified != old_fact {
                    debug!("  Refined {outlet:?}: {old_fact:?} -> {unified:?}");
                    changed_edges.push((outlet, unified));
                }
            }

            for (ix, inferred_fact) in inferred.1.iter().enumerate() {
                let old_fact = self.model.borrow().outlet_fact(OutletId::new(node.id, ix))?;
                let unified = old_fact.unify(inferred_fact)?;

                if &unified != old_fact {
                    let outlet = OutletId::new(node.id, ix);
                    debug!("  Refined {outlet:?}: {old_fact:?} -> {unified:?}");
                    changed_edges.push((outlet, unified));
                }
            }

            for (ix, &outlet) in observed_outlets.iter().enumerate() {
                let old_fact = self.model.borrow().outlet_fact(outlet)?;
                let new_fact = &inferred.2[ix];
                let unified = old_fact.unify(new_fact)?;
                if &unified != old_fact {
                    changed_edges.push((outlet, unified));
                }
            }
        }
        for (outlet, fact) in &changed_edges {
            self.model.borrow_mut().set_outlet_fact(*outlet, fact.clone())?;
        }
        Ok(changed_edges)
    }
}


================================================
FILE: hir/src/infer/fact.rs
================================================
use std::convert::TryFrom;
use std::fmt;
use std::sync::Arc;

use super::factoid::*;
use crate::internal::*;

/// Partial information about a tensor.
///
/// The task of the analyser is to tag every edge in the graph with information
/// about the tensors that flow through it - specifically their datum_type, their
/// shape and possibly their value. During the analysis, however, we might only
/// know some of that information (say, for instance, that an edge only carries
/// tensors of rank 4, but without knowing their precise dimension).
///
/// This is where tensor facts come in: they hold partial information about the
/// datum_type, shape and value of tensors that might flow through an edge of the
/// graph. The analyser will first tag each edge with a fact, starting with the
/// most general one and specializing it at each iteration. Eventually, it will
/// reach a fixed point that - hopefully - holds enough information.
#[derive(Clone, PartialEq, Eq, Default, Hash)]
pub struct InferenceFact {
    pub datum_type: TypeFactoid,
    pub shape: ShapeFactoid,
    pub value: ValueFact,
}

impl InferenceFact {
    /// Constructs the most general tensor fact possible.
    pub fn new() -> InferenceFact {
        InferenceFact::default()
    }

    pub fn any() -> InferenceFact {
        InferenceFact::default()
    }

    pub fn dt(dt: DatumType) -> InferenceFact {
        InferenceFact::default().with_datum_type(dt)
    }

    pub fn dt_shape<S: Into<ShapeFactoid>>(dt: DatumType, shape: S) -> InferenceFact {
        InferenceFact::dt(dt).with_shape(shape)
    }

    pub fn shape<S: Into<ShapeFactoid>>(shape: S) -> InferenceFact {
        InferenceFact::default().with_shape(shape)
    }

    pub fn with_datum_type(self, dt: DatumType) -> InferenceFact {
        InferenceFact { datum_type: dt.into(), ..self }
    }

    pub fn without_datum_type(self) -> InferenceFact {
        InferenceFact { datum_type: TypeFactoid::Any, ..self }
    }

    pub fn with_shape<S: Into<ShapeFactoid>>(self, shape: S) -> InferenceFact {
        InferenceFact { shape: shape.into(), ..self }
    }

    pub fn format_dt_shape(&self) -> String {
        if !self.shape.open && self.shape.dims.len() == 0 {
            self.datum_type
                .concretize()
                .map(|dt| format!("{dt:?}"))
                .unwrap_or_else(|| "?".to_string())
        } else {
            format!(
                "{:?},{}",
                self.shape,
                self.datum_type
                    .concretize()
                    .map(|dt| format!("{dt:?}"))
                    .unwrap_or_else(|| "?".to_string())
            )
        }
    }

    pub fn dt_shape_from_tensor(t: &Tensor) -> InferenceFact {
        InferenceFact::dt_shape(t.datum_type(), t.shape())
    }

    pub fn without_value(self) -> InferenceFact {
        InferenceFact { value: GenericFactoid::Any, ..self }
    }
}

impl Factoid for InferenceFact {
    type Concrete = Arc<Tensor>;

    /// Tries to transform the fact into a concrete value.
    fn concretize(&self) -> Option<Self::Concrete> {
        self.value.concretize()
    }

    /// Tries to unify the fact with another fact of the same type.
    fn unify(&self, other: &Self) -> TractResult<Self> {
        let tensor = InferenceFact {
            datum_type: self.datum_type.unify(&other.datum_type)?,
            shape: self.shape.unify(&other.shape)?,
            value: self.value.unify(&other.value)?,
        };

        trace!("Unifying {self:?} with {other:?} into {tensor:?}.");

        Ok(tensor)
    }
}

impl fmt::Debug for InferenceFact {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        if let Some(t) = self.value.concretize() {
            write!(formatter, "{t:?}")
        } else {
            write!(formatter, "{}", self.format_dt_shape())
        }
    }
}

use crate::infer::factoid::Factoid;

impl Fact for InferenceFact {
    fn to_typed_fact(&self) -> TractResult<Cow<'_, TypedFact>> {
        Ok(Cow::Owned(TypedFact::try_from(self)?))
    }

    fn matches(&self, t: &Tensor, _symbols: Option<&SymbolValues>) -> TractResult<bool> {
        if let Some(dt) = self.datum_type() {
            if t.datum_type() != dt {
                return Ok(false);
            }
        }
        if let Some(shape) = self.shape.concretize() {
            if *ShapeFact::from(t.shape()) != *shape {
                return Ok(false);
            }
        }
        if let Some(value) = self.value.concretize() {
            if &*value != t {
                return Ok(false);
            }
        }
        Ok(true)
    }

    fn compatible_with(&self, other: &dyn Fact) -> bool {
        if let Some(other) = other.downcast_ref::<Self>() {
            self.unify(other).is_ok()
        } else {
            false
        }
    }

    fn datum_type(&self) -> Option<DatumType> {
        self.datum_type.concretize()
    }
}

impl TryFrom<&InferenceFact> for TypedFact {
    type Error = TractError;
    fn try_from(fact: &InferenceFact) -> TractResult<TypedFact> {
        if let (Some(datum_type), Some(shape)) =
            (fact.datum_type.concretize(), fact.shape.concretize())
        {
            let shape = ShapeFact::from_dims(shape);
            let konst = fact.value.concretize();
            let uniform = konst.as_ref().and_then(|k| k.as_uniform()).map(Arc::new);
            Ok(TypedFact {
                datum_type,
                shape,
                konst,
                uniform,
                exotic_fact: None,
                uniform_tdim: None,
                region_of_interest: None,
            })
        } else {
            bail!("Can not make a TypedFact out of {:?}", fact)
        }
    }
}

impl<'a> From<&'a InferenceFact> for InferenceFact {
    fn from(t: &'a InferenceFact) -> InferenceFact {
        t.clone()
    }
}

impl<'a> From<&'a TypedFact> for InferenceFact {
    fn from(t: &'a TypedFact) -> InferenceFact {
        let mut fact = InferenceFact::dt_shape(t.datum_type, t.shape.iter());
        if let Some(k) = &t.konst {
            fact.value = Arc::clone(k).into();
        }
        fact
    }
}

impl From<TypedFact> for InferenceFact {
    fn from(t: TypedFact) -> InferenceFact {
        InferenceFact::from(&t)
    }
}

impl<'a> TryFrom<&'a Arc<Tensor>> for InferenceFact {
    type Error = TractError;
    fn try_from(t: &'a Arc<Tensor>) -> TractResult<InferenceFact> {
        Ok(InferenceFact::from(&TypedFact::try_from(Arc::clone(t))?))
    }
}

impl TryFrom<Arc<Tensor>> for InferenceFact {
    type Error = TractError;
    fn try_from(t: Arc<Tensor>) -> TractResult<InferenceFact> {
        Ok(InferenceFact::from(&TypedFact::try_from(t)?))
    }
}

impl From<Tensor> for InferenceFact {
    fn from(t: Tensor) -> InferenceFact {
        let mut fact = InferenceFact::dt_shape(t.datum_type(), t.shape());
        fact.value = t.into_arc_tensor().into();
        fact
    }
}


================================================
FILE: hir/src/infer/factoid.rs
================================================
use std::fmt;
use std::iter::FromIterator;
use std::ops::{Add, Div, Mul, Neg, Rem, Sub};

use tract_num_traits::Zero;

use crate::internal::*;

/// Partial information about any value.
pub trait Factoid: fmt::Debug + Clone + PartialEq + Default + Hash {
    type Concrete: fmt::Debug;

    /// Tries to transform the fact into a concrete value.
    fn concretize(&self) -> Option<Self::Concrete>;

    /// Returns whether the value is fully determined.
    fn is_concrete(&self) -> bool {
        self.concretize().is_some()
    }

    /// Tries to unify the fact with another fact of the same type.
    fn unify(&self, other: &Self) -> TractResult<Self>;

    /// Tries to unify the fact with another fact of the same type and update
    /// self.
    ///
    /// Returns true if it actually changed something.
    fn unify_with(&mut self, other: &Self) -> TractResult<bool> {
        let new = self.unify(other)?;
        let mut changed = false;
        if &new != self {
            changed = true;
            *self = new;
        }
        Ok(changed)
    }

    /// Tries to unify the fact with another fact of the same type and update
    /// both of them.
    ///
    /// Returns true if it actually changed something.
    fn unify_with_mut(&mut self, other: &mut Self) -> TractResult<bool> {
        let new = self.unify(other)?;
        let mut changed = false;
        if &new != self {
            changed = true;
            *self = new.clone();
        }
        if &new != other {
            changed = true;
            *other = new;
        }
        Ok(changed)
    }

    /// Tries to unify all facts in the list.
    ///
    ///
    /// Returns true if it actually changed something.
    fn unify_all(facts: &mut [&mut Self]) -> TractResult<bool> {
        let mut overall_changed = false;
        loop {
            let mut changed = false;
            for i in 0..facts.len() - 1 {
                for j in i + 1..facts.len() {
                    let (left, right) = facts.split_at_mut(j);
                    let c = left[i].unify_with(right[0])?;
                    changed = changed || c;
                    overall_changed = changed || c;
                }
            }
            if !changed {
                return Ok(overall_changed);
            }
        }
    }
}

/// Partial information about a value of type T.
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum GenericFactoid<T: fmt::Debug + Clone + PartialEq + Hash> {
    Only(T),
    Any,
}

// if T is not Default, autoderive wont work
#[allow(clippy::derivable_impls)]
impl<T: fmt::Debug + Clone + PartialEq + Hash> Default for GenericFactoid<T> {
    fn default() -> Self {
        GenericFactoid::Any
    }
}

impl<T: Copy + Clone + fmt::Debug + PartialEq + Hash> Copy for GenericFactoid<T> {}

impl<T: fmt::Debug + Clone + PartialEq + Hash> Factoid for GenericFactoid<T> {
    type Concrete = T;

    /// Tries to transform the fact into a concrete value.
    fn concretize(&self) -> Option<T> {
        match self {
            GenericFactoid::Any => None,
            GenericFactoid::Only(m) => Some(m.clone()),
        }
    }

    /// Tries to unify the fact with another fact of the same type.
    fn unify(&self, other: &Self) -> TractResult<Self> {
        let fact = match (self, other) {
            (_, GenericFactoid::Any) => self.clone(),
            (GenericFactoid::Any, _) => other.clone(),
            _ if self == other => self.clone(),
            _ => bail!("Impossible to unify {:?} with {:?}.", self, other),
        };

        Ok(fact)
    }
}

impl<T: fmt::Debug + Clone + PartialEq + Hash> From<T> for GenericFactoid<T> {
    fn from(t: T) -> Self {
        GenericFactoid::Only(t)
    }
}

impl<T: fmt::Display + fmt::Debug + Clone + PartialEq + Hash> fmt::Display for GenericFactoid<T> {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        match self {
            GenericFactoid::Any => write!(formatter, "?"),
            GenericFactoid::Only(u) => write!(formatter, "{u}"),
        }
    }
}

impl<T: fmt::Debug + Clone + PartialEq + Hash> fmt::Debug for GenericFactoid<T> {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        match self {
            GenericFactoid::Any => write!(formatter, "?"),
            GenericFactoid::Only(u) => write!(formatter, "{u:?}"),
        }
    }
}

/// Partial information about a type.
pub type TypeFactoid = GenericFactoid<DatumType>;

/// Partial information about a shape.
///
/// A basic example of a shape fact is `shapefactoid![1, 2]`, which corresponds to
/// the shape `[1, 2]` in Arc<Tensor>. We can use `_` in facts to denote unknown
/// dimensions (e.g. `shapefactoid![1, 2, _]` corresponds to any shape `[1, 2, k]`
/// with `k` a non-negative integer). We can also use `..` at the end of a fact
/// to only specify its first dimensions, so `shapefactoid![1, 2; ..]` matches any
/// shape that starts with `[1, 2]` (e.g. `[1, 2, i]` or `[1, 2, i, j]`), while
/// `shapefactoid![..]` matches any shape.
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct ShapeFactoid {
    pub(super) open: bool,
    pub(super) dims: TVec<GenericFactoid<TDim>>,
}

impl ShapeFactoid {
    /// Constructs an open shape fact.
    pub fn open(dims: TVec<DimFact>) -> ShapeFactoid {
        ShapeFactoid { open: true, dims }
    }

    pub fn is_open(&self) -> bool {
        self.open
    }

    /// Constructs a closed shape fact.
    pub fn closed(dims: TVec<DimFact>) -> ShapeFactoid {
        ShapeFactoid { open: false, dims }
    }

    pub fn rank(&self) -> IntFactoid {
        if self.open { GenericFactoid::Any } else { GenericFactoid::Only(self.dims.len() as i64) }
    }

    pub fn ensure_rank_at_least(&mut self, n: usize) -> bool {
        let mut changed = false;
        while self.dims.len() <= n {
            self.dims.push(GenericFactoid::Any);
            changed = true;
        }
        changed
    }

    pub fn dim(&self, i: usize) -> Option<DimFact> {
        self.dims().nth(i).cloned()
    }

    pub fn set_dim(&mut self, i: usize, d: TDim) -> bool {
        let fact = GenericFactoid::Only(d.clone());
        if self.dim(i).as_ref() == Some(&fact) {
            return false;
        }
        self.dims[i] = GenericFactoid::Only(d);
        true
    }

    pub fn dims(&self) -> impl Iterator<Item = &DimFact> {
        self.dims.iter()
    }

    pub fn as_concrete_finite(&self) -> TractResult<Option<TVec<usize>>> {
        if self.open {
            return Ok(None);
        }
        Ok(self.dims.iter().map(|d| d.concretize().and_then(|d| d.to_usize().ok())).collect())
    }

    pub fn matches(&self, t: &Tensor, symbols: Option<&SymbolValues>) -> TractResult<bool> {
        let rank_compatible =
            if self.is_open() { self.dims.len() <= t.rank() } else { self.dims.len() == t.rank() };
        if !rank_compatible {
            return Ok(false);
        }

        for i in 0..t.rank() {
            let dim = self.dims.get(i).and_then(|el| el.concretize());
            if let Some(dim) = dim.and_then(|dim| {
                dim.eval(symbols.unwrap_or(&SymbolValues::default())).to_usize().ok()
            }) {
                if dim != t.shape()[i] {
                    return Ok(false);
                }
            }
        }
        Ok(true)
    }
}

impl Factoid for ShapeFactoid {
    type Concrete = TVec<TDim>;

    /// Tries to transform the fact into a `Vec<usize>`, or returns `None`.
    fn concretize(self: &ShapeFactoid) -> Option<TVec<TDim>> {
        if self.open {
            return None;
        }

        let dims: TVec<_> = self.dims().filter_map(|d| d.concretize()).collect();

        if dims.len() < self.dims.len() { None } else { Some(dims) }
    }

    /// Tries to unify the fact with another fact of the same type.
    fn unify(&self, other: &Self) -> TractResult<Self> {
        let (x, y) = (self, other);

        use tract_itertools::EitherOrBoth::{Both, Left, Right};
        use tract_itertools::Itertools;

        let xi = x.dims();
        let yi = y.dims();

        let dimensions: TVec<_> = xi
            .zip_longest(yi)
            .map(|r| match r {
                Both(a, b) => a.unify(b),
                Left(d) if y.open => Ok(d.clone()),
                Right(d) if x.open => Ok(d.clone()),

                Left(_) | Right(_) => bail!(
                    "Impossible to unify closed shapes of different rank (found {:?} and {:?}).",
                    x,
                    y
                ),
            })
            .collect::<TractResult<_>>()
            .with_context(|| format!("Unifying shapes {x:?} and {y:?}"))?;

        if x.open && y.open {
            Ok(ShapeFactoid::open(dimensions))
        } else {
            Ok(ShapeFactoid::closed(dimensions))
        }
    }
}

impl Default for ShapeFactoid {
    /// Returns the most general shape fact possible.
    fn default() -> ShapeFactoid {
        ShapeFactoid::open(tvec![])
    }
}

impl FromIterator<TDim> for ShapeFactoid {
    /// Converts an iterator over usize into a closed shape.
    fn from_iter<I: IntoIterator<Item = TDim>>(iter: I) -> ShapeFactoid {
        ShapeFactoid::closed(iter.into_iter().map(GenericFactoid::Only).collect())
    }
}

impl FromIterator<usize> for ShapeFactoid {
    /// Converts an iterator over usize into a closed shape.
    fn from_iter<I: IntoIterator<Item = usize>>(iter: I) -> ShapeFactoid {
        ShapeFactoid::closed(iter.into_iter().map(|d| GenericFactoid::Only(d.to_dim())).collect())
    }
}

impl<D: ToDim, I: IntoIterator<Item = D>> From<I> for ShapeFactoid {
    fn from(it: I) -> ShapeFactoid {
        ShapeFactoid::closed(it.into_iter().map(|d| GenericFactoid::Only(d.to_dim())).collect())
    }
}

impl fmt::Debug for ShapeFactoid {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        for (ix, d) in self.dims.iter().enumerate() {
            if ix != 0 {
                write!(formatter, ",")?
            }
            write!(formatter, "{d}")?;
        }
        if self.open {
            if self.dims.len() == 0 {
                write!(formatter, "..")?;
            } else {
                write!(formatter, ",..")?;
            }
        }
        Ok(())
    }
}

pub type DimFact = GenericFactoid<TDim>;

/// Partial information about a value.
pub type ValueFact = GenericFactoid<Arc<Tensor>>;

pub type IntFactoid = GenericFactoid<i64>;

impl<T> Zero for GenericFactoid<T>
where
    T: Add<T, Output = T> + Zero + PartialEq + Clone + ::std::fmt::Debug + Hash,
{
    fn zero() -> GenericFactoid<T> {
        GenericFactoid::Only(T::zero())
    }
    fn is_zero(&self) -> bool {
        match self {
            GenericFactoid::Only(t) => t.is_zero(),
            _ => false,
        }
    }
}

impl<T> Neg for GenericFactoid<T>
where
    T: Neg<Output = T> + PartialEq + Clone + ::std::fmt::Debug + Hash,
{
    type Output = GenericFactoid<T>;
    fn neg(self) -> GenericFactoid<T> {
        match self {
            GenericFactoid::Only(t) => GenericFactoid::Only(t.neg()),
            any => any,
        }
    }
}

impl<T, I> Add<I> for GenericFactoid<T>
where
    T: Add<T, Output = T> + PartialEq + Clone + ::std::fmt::Debug + Hash,
    I: Into<GenericFactoid<T>>,
{
    type Output = GenericFactoid<T>;
    fn add(self, rhs: I) -> Self::Output {
        match (self.concretize(), rhs.into().concretize()) {
            (Some(a), Some(b)) => GenericFactoid::Only(a + b),
            _ => GenericFactoid::Any,
        }
    }
}

impl<T> Sub<GenericFactoid<T>> for GenericFactoid<T>
where
    T: Sub<T, Output = T> + PartialEq + Clone + ::std::fmt::Debug + Hash,
{
    type Output = GenericFactoid<T>;
    fn sub(self, rhs: GenericFactoid<T>) -> Self::Output {
        match (self.concretize(), rhs.concretize()) {
            (Some(a), Some(b)) => GenericFactoid::Only(a - b),
            _ => GenericFactoid::Any,
        }
    }
}

impl<T, R> Mul<R> for GenericFactoid<T>
where
    T: Mul<R, Output = T> + PartialEq + Clone + ::std::fmt::Debug + Hash,
{
    type Output = GenericFactoid<T>;
    fn mul(self, rhs: R) -> Self::Output {
        if let Some(a) = self.concretize() {
            GenericFactoid::Only(a * rhs)
        } else {
            GenericFactoid::Any
        }
    }
}

impl<T, R> Div<R> for GenericFactoid<T>
where
    T: Div<R, Output = T> + PartialEq + Clone + ::std::fmt::Debug + Hash,
{
    type Output = GenericFactoid<T>;
    fn div(self, rhs: R) -> Self::Output {
        if let Some(a) = self.concretize() {
            GenericFactoid::Only(a / rhs)
        } else {
            GenericFactoid::Any
        }
    }
}

impl<T, R> Rem<R> for GenericFactoid<T>
where
    T: Rem<R, Output = T> + PartialEq + Clone + ::std::fmt::Debug + Hash,
{
    type Output = GenericFactoid<T>;
    fn rem(self, rhs: R) -> Self::Output {
        if let Some(a) = self.concretize() {
            GenericFactoid::Only(a % rhs)
        } else {
            GenericFactoid::Any
        }
    }
}

#[cfg(test)]
mod tests {
    use super::GenericFactoid::*;
    use super::*;

    #[test]
    fn unify_same_datum_type() {
        let dt = TypeFactoid::Only(DatumType::F32);
        assert_eq!(dt.unify(&dt).unwrap(), dt);
    }

    #[test]
    fn unify_different_datum_types_only() {
        let dt1 = TypeFactoid::Only(DatumType::F32);
        let dt2 = TypeFactoid::Only(DatumType::F64);
        assert!(dt1.unify(&dt2).is_err());
    }

    #[test]
    fn unify_different_datum_types_any_left() {
        let dt = TypeFactoid::Only(DatumType::F32);
        assert_eq!(TypeFactoid::Any.unify(&dt).unwrap(), dt);
    }

    #[test]
    fn unify_different_datum_types_any_right() {
        let dt = TypeFactoid::Only(DatumType::F32);
        assert_eq!(dt.unify(&TypeFactoid::Any).unwrap(), dt);
    }

    #[test]
    fn unify_same_shape_1() {
        let s = ShapeFactoid::closed(tvec![]);
        assert_eq!(s.unify(&s).unwrap(), s);
    }

    #[test]
    fn unify_same_shape_2() {
        let s = ShapeFactoid::closed(tvec![Any]);
        assert_eq!(s.unify(&s).unwrap(), s);
    }

    #[test]
    fn unify_same_shape_3() {
        let s = ShapeFactoid::closed(tvec![Only(1.into()), Only(2.into())]);
        assert_eq!(s.unify(&s).unwrap(), s);
    }

    #[test]
    fn unify_different_shapes_1() {
        let s1 = ShapeFactoid::closed(tvec![Only(1.into()), Only(2.into())]);
        let s2 = ShapeFactoid::closed(tvec![Only(1.into())]);
        assert!(s1.unify(&s2).is_err());
    }

    #[test]
    fn unify_different_shapes_2() {
        let s1 = ShapeFactoid::closed(tvec![Only(1.into()), Only(2.into())]);
        let s2 = ShapeFactoid::closed(tvec![Any]);
        assert!(s1.unify(&s2).is_err());
    }

    #[test]
    fn unify_different_shapes_3() {
        let s1 = ShapeFactoid::open(tvec![Only(1.into()), Only(2.into())]);
        let s2 = ShapeFactoid::closed(tvec![Any]);
        assert!(s1.unify(&s2).is_err());
    }

    #[test]
    fn unify_different_shapes_4() {
        let s1 = ShapeFactoid::closed(tvec![Any]);
        let s2 = ShapeFactoid::closed(tvec![Any]);
        let sr = ShapeFactoid::closed(tvec![Any]);
        assert_eq!(s1.unify(&s2).unwrap(), sr);
    }

    #[test]
    fn unify_different_shapes_5() {
        let s1 = ShapeFactoid::closed(tvec![Any]);
        let s2 = ShapeFactoid::closed(tvec![Only(1.into())]);
        let sr = ShapeFactoid::closed(tvec![Only(1.into())]);
        assert_eq!(s1.unify(&s2).unwrap(), sr);
    }

    #[test]
    fn unify_different_shapes_6() {
        let s1 = ShapeFactoid::open(tvec![]);
        let s2 = ShapeFactoid::closed(tvec![Only(1.into())]);
        let sr = ShapeFactoid::closed(tvec![Only(1.into())]);
        assert_eq!(s1.unify(&s2).unwrap(), sr);
    }

    #[test]
    fn unify_different_shapes_7() {
        let s1 = ShapeFactoid::open(tvec![Any, Only(2.into())]);
        let s2 = ShapeFactoid::closed(tvec![Only(1.into()), Any, Any]);
        let sr = ShapeFactoid::closed(tvec![Only(1.into()), Only(2.into()), Any]);
        assert_eq!(s1.unify(&s2).unwrap(), sr);
    }

    #[test]
    fn unify_same_value() {
        let t = ValueFact::Only(rctensor0(12f32));
        assert_eq!(t.unify(&t).unwrap(), t);
    }

    #[test]
    fn unify_different_values_only() {
        let t1 = ValueFact::Only(rctensor1(&[12f32]));
        let t2 = ValueFact::Only(rctensor1(&[12f32, 42.0]));
        assert!(t1.unify(&t2).is_err());
    }

    #[test]
    fn unify_different_values_any_left() {
        let t1 = ValueFact::Only(rctensor1(&[12f32]));
        assert_eq!(ValueFact::Any.unify(&t1).unwrap(), t1);
    }

    #[test]
    fn unify_different_values_any_right() {
        let t1 = ValueFact::Only(rctensor1(&[12f32]));
        assert_eq!(t1.unify(&ValueFact::Any).unwrap(), t1);
    }
}


================================================
FILE: hir/src/infer/helpers.rs
================================================
use super::factoid::*;
use super::*;

/// Infers every possible fact when all the values are concrete.
pub fn infer_forward_concrete(
    op: &dyn Op,
    inputs: &[&InferenceFact],
) -> TractResult<Option<TVec<InferenceFact>>> {
    let input_values: TVec<_> = inputs.iter().filter_map(|t| t.value.concretize()).collect();

    if input_values.len() < inputs.len() {
        debug!("Can't infer value: some inputs are still unknown.");
        return Ok(None);
    }
    let input_values = input_values.iter().map(|t| t.clone().into_tvalue()).collect();
    // If we know the value of all the inputs, we can deduce everything.
    if op.is_stateless() {
        let output_value = op.eval(input_values)?.pop().unwrap();
        return Ok(Some(tvec![output_value.into_arc_tensor().try_into()?]));
    }

    Ok(None)
}

/// Infers basic shape facts in the case of broadcasting operators.
pub fn infer_shape_broadcasting(shapes: &[&ShapeFactoid]) -> TractResult<Option<ShapeFactoid>> {
    if shapes.iter().any(|s| s.is_open()) {
        debug!("Can't infer shape for broadcasting operators when some inputs have an open shape.");
        return Ok(None);
    }

    let bound = shapes.iter().map(|s| s.rank().concretize().unwrap()).max().unwrap() as usize;

    let mut output_shape: TVec<DimFact> = tvec![];

    for i in 0..bound {
        let mut previous: Option<TDim> = None;
        let mut unknown = 0;

        for shape in shapes.iter() {
            let rank = shape.rank().concretize().unwrap() as usize;
            let shape: TVec<DimFact> = shape.dims().cloned().collect();
            if i >= rank {
                continue;
            }

            match &shape[rank - i - 1] {
                GenericFactoid::Any => unknown += 1,
                GenericFactoid::Only(d) if d.is_one() => (),
                GenericFactoid::Only(d) => {
                    if previous.is_some() && previous.as_ref() != Some(d) {
                        bail!(
                            "Invalid shape (broadcasting): {:?} is not compatible with {:?}.",
                            d,
                            previous
                        )
                    } else {
                        previous = Some(d.clone())
                    }
                }
            };
        }

        if unknown > 1 {
            debug!(
                "Can't infer shape (broadcasting): there are multiple unknown values at same index."
            );
            return Ok(None);
        } else if unknown == 1 && previous.is_some() {
            debug!(
                "Can't infer shape (broadcasting): there are both unknown and known values at same index."
            );
            return Ok(None);
        } else if unknown == 1 && previous.is_none() {
            output_shape.push(GenericFactoid::Any);
        } else if let Some(previous) = previous {
            output_shape.push(GenericFactoid::Only(previous.clone()));
        } else {
            output_shape.push(GenericFactoid::Only(1.into()));
        }
    }

    output_shape.reverse();

    Ok(Some(ShapeFactoid::closed(output_shape)))
}

/// Infers basic facts in the case of unary or binary operators.
pub fn infer_forward_basic(
    op: &dyn Op,
    inputs: Vec<&InferenceFact>,
) -> TractResult<Option<TVec<InferenceFact>>> {
    if let Some(output) = infer_forward_concrete(op, &inputs)? {
        return Ok(Some(output));
    }

    // Otherwise we can only deduce the type and shape of the output.
    let input_shapes: Vec<_> = inputs.iter().map(|t| &t.shape).collect();

    let datum_type = inputs
        .iter()
        .find_map(|i| i.datum_type.concretize())
        .map(|t| typefact!(t))
        .unwrap_or_else(|| typefact!(_));

    let output = InferenceFact {
        datum_type,
        shape: infer_shape_broadcasting(&input_shapes)?.unwrap_or_else(|| shapefactoid![..]),
        value: valuefact!(_),
    };

    Ok(Some(tvec![output]))
}

/// Returns the most specific closed shape out of an iterator.
pub fn most_specific_shape<'a, I: IntoIterator<Item = &'a ShapeFactoid>>(
    iter: I,
) -> TractResult<Option<&'a ShapeFactoid>> {
    let mut prev_rank = None;
    let mut prev_concrete = None;
    let mut best = None;

    for shape in iter {
        if let Some(rank) = shape.rank().concretize() {
            if prev_rank.is_some() && rank != prev_rank.unwrap() {
                bail!("Rank mismatch between different shapes.");
            } else {
                prev_rank = Some(rank);
            }

            let concrete = shape.dims().filter(|d| d.is_concrete()).count();

            if prev_concrete.is_none() || concrete > prev_concrete.unwrap() {
                prev_concrete = Some(concrete);
                best = Some(shape)
            }
        }
    }

    Ok(best)
}


================================================
FILE: hir/src/infer/mod.rs
================================================
use crate::internal::*;

#[macro_use]
pub mod helpers;
#[macro_use]
pub mod rules;

mod analyser;
mod fact;
mod factoid;
mod model;
mod ops;
mod optim;

pub use self::fact::InferenceFact;
pub use self::factoid::*;
pub use self::model::InferenceModelExt;
pub use self::ops::InferenceOp;
pub use self::rules::InferenceResult;
pub use self::rules::InferenceRulesOp;
pub use self::rules::Solver;
pub use self::rules::TensorProxy;
pub use self::rules::expr::IntoExp;
pub use self::rules::expr::ToDimExp;

pub fn check_input_arity(inputs: &[TensorProxy], expected: usize) -> TractResult<()> {
    if inputs.len() != expected {
        bail!("Wrong input number. Rules expect {}, node has {}.", expected, inputs.len())
    } else {
        Ok(())
    }
}

pub fn check_output_arity(outputs: &[TensorProxy], expected: usize) -> TractResult<()> {
    if outputs.len() != expected {
        bail!("Wrong output number. Rules expect {}, node has {}.", expected, outputs.len())
    } else {
        Ok(())
    }
}

/// A model with partially types and shapes, as produced by parsing ONNX or
/// Tensorflow graphs.
pub type InferenceModel = Graph<InferenceFact, Box<dyn InferenceOp>>;
/// Node for InferenceModel graph
pub type InferenceNode = Node<InferenceFact, Box<dyn InferenceOp>>;
/// A ModelPatch for InferenceModel.
pub type InferenceModelPatch = ModelPatch<InferenceFact, Box<dyn InferenceOp>>;
/// An execution plan for InferenceModel.
pub type InferenceSimplePlan = SimplePlan<InferenceFact, Box<dyn InferenceOp>>;
/// An execution state for InferenceModel.
pub type InferenceSimpleState = SimpleState<InferenceFact, Box<dyn InferenceOp>>;

impl<'a> From<&'a Box<dyn InferenceOp>> for Box<dyn InferenceOp> {
    fn from(it: &'a Box<dyn InferenceOp>) -> Box<dyn InferenceOp> {
        tract_core::dyn_clone::clone_box(it.as_ref())
    }
}


================================================
FILE: hir/src/infer/model.rs
================================================
use std::collections::HashMap;

use tract_core::ops::konst::Const;

use super::factoid::Factoid;
use super::{InferenceFact, InferenceModel, InferenceNode, InferenceOp};
use crate::internal::*;
use crate::prelude::TVec;

pub trait InferenceModelExt {
    /// Analyse all nodes of the graph.
    ///
    /// Will stop on first error unless `obstinate` is `true`.
    fn analyse(&mut self, obstinate: bool) -> TractResult<bool>;

    /// Perform early transformation before going typed.
    fn incorporate(self) -> TractResult<InferenceModel>;

    /// List OutletId with incomplete type information.
    ///
    /// Will stop on first error unless `obstinate` is `true`.
    fn missing_type_shape(&self) -> TractResult<Vec<OutletId>>;

    /// Eliminate seemingly dead branches of the graph.
    ///
    /// This may break stateful networks.
    fn eliminate_dead_branches(self) -> TractResult<InferenceModel>;

    /// Attempt full analyse and conversion to TypedModel.
    fn into_typed(self) -> TractResult<TypedModel>;

    /// Attempt full analyse, decluttering and mapping to optimized operations.
    ///
    /// This will work even if the network can not be normalized.
    fn into_optimized(self) -> TractResult<TypedModel>;
}

impl InferenceModelExt for InferenceModel {
    /// Analyse all nodes of the graph.
    ///
    /// Will stop on first error unless `obstinate` is `true`.
    fn analyse(&mut self, obstinate: bool) -> TractResult<bool> {
        super::analyser::Analyser::new(self).analyse_obstinate(obstinate)
    }

    /// Perform early transformation before going typed.
    fn incorporate(self) -> TractResult<InferenceModel> {
        let mut model = self;
        loop {
            let mut done_something = false;
            for p in crate::infer::optim::incorporate() {
                done_something = done_something || p.pass(&mut model)?;
                if cfg!(debug_assertions) {
                    model.check_edges()?;
                }
            }
            if !done_something {
                break;
            }
        }
        model = model.into_compact()?;
        model.analyse(false)?;
        Ok(model)
    }

    /// List OutletId with incomplete type information.
    ///
    /// Will stop on first error unless `obstinate` is `true`.
    fn missing_type_shape(&self) -> TractResult<Vec<OutletId>> {
        Ok(self
            .eval_order()?
            .iter()
            .flat_map(|&node| {
                self.nodes()[node]
                    .outputs
                    .iter()
                    .enumerate()
                    .map(move |(ix, outlet)| (OutletId::new(node, ix), outlet))
            })
            .filter(|(_, o)| !o.fact.datum_type.is_concrete() || !o.fact.shape.is_concrete())
            .map(|(id, _)| id)
            .collect())
    }

    /// Eliminate seemingly dead branches of the graph.
    ///
    /// This may break stateful networks.
    fn eliminate_dead_branches(self) -> TractResult<InferenceModel> {
        self.into_compact()
    }

    /// Attempt full analyse and conversion to TypedModel.
    fn into_typed(mut self) -> TractResult<TypedModel> {
        use tract_core::internal::translator::Translate;

        self.analyse(false)?;
        let m = self.incorporate()?;

        #[derive(Debug)]
        struct ToTypedTranslator;
        impl Translate<InferenceFact, Box<dyn InferenceOp>, TypedFact, Box<dyn TypedOp>>
            for ToTypedTranslator
        {
            fn translate_node(
                &self,
                source: &InferenceModel,
                node: &InferenceNode,
                target: &mut TypedModel,
                mapping: &HashMap<OutletId, OutletId>,
            ) -> TractResult<TVec<OutletId>> {
                if node.op.is_stateless()
                    && source.node_output_facts(node.id)?.iter().all(|f| f.value.is_concrete())
                {
                    (0..node.outputs.len())
                        .map(|ix| {
                            target.add_const(
                                format!("{}.{}", node.name, ix),
                                node.outputs[ix].fact.value.concretize().unwrap(),
                            )
                        })
                        .collect()
                } else {
                    let outputs = node.op.to_typed(source, node, target, mapping)?;
                    for output in &outputs {
                        let fact = target.outlet_fact(*output)?;
                        fact.consistent().with_context(|| {
                            format!(
                                "Checking oulet fact consistency for {:?}: {:?} after translating {:?}",
                                output,
                                fact, node.op,
                            )
                        })?;
                    }
                    Ok(outputs)
                }
            }
        }

        ToTypedTranslator.translate_model(&m)
    }

    /// Attempt full analyse, decluttering and mapping to optimized operations.
    ///
    /// This is meant for "simple" networks, where no special model
    /// transformation needs to happen. Aternaltively, use to_typed() and
    /// manipulate the TypedModel for more control.
    fn into_optimized(self) -> TractResult<TypedModel> {
        self.into_typed()?.into_optimized()
    }
}

impl SpecialOps<InferenceFact, Box<dyn InferenceOp>> for InferenceModel {
    fn is_source(op: &Box<dyn InferenceOp>) -> bool {
        op.as_op().downcast_ref::<crate::ops::source::Source>().is_some()
    }

    fn create_dummy(&self) -> Box<dyn InferenceOp> {
        Box::new(tract_core::ops::dummy::Dummy::new())
    }

    fn create_source(&self, _fact: InferenceFact) -> Box<dyn InferenceOp> {
        Box::new(crate::ops::source::Source::new())
    }

    fn wire_node(
        &mut self,
        name: impl Into<String>,
        op: impl Into<Box<dyn InferenceOp>>,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let op = op.into();
        let output_facts: TVec<InferenceFact> =
            (0..op.nboutputs()?).map(|_| InferenceFact::default()).collect();
        let id = self.add_node(name, op, output_facts)?;
        inputs
            .iter()
            .enumerate()
            .try_for_each(|(ix, i)| self.add_edge(*i, InletId::new(id, ix)))?;
        Ok(self.node(id).outputs.iter().enumerate().map(|(ix, _)| OutletId::new(id, ix)).collect())
    }

    fn add_const(
        &mut self,
        name: impl Into<String>,
        v: impl IntoArcTensor,
    ) -> TractResult<OutletId> {
        let v = v.into_arc_tensor();
        for node in &self.nodes {
            if let Some(op) = node.op_as::<Const>() {
                if op.val() == &v {
                    return Ok(node.id.into());
                }
            }
        }
        let name = name.into();
        let fact = TypedFact::try_from(v.clone())?;
        self.add_node(name, crate::ops::konst::Const::new(v)?, tvec!(fact.into()))
            .map(|id| id.into())
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test() {
        fn is_sync<T: Sync>() {}
        is_sync::<InferenceModel>();
    }
}


================================================
FILE: hir/src/infer/ops.rs
================================================
use super::Factoid;
use crate::infer::*;
use std::fmt;
use tract_data::TooEarly;

tract_core::dyn_clone::clone_trait_object!(InferenceOp);

/// An operation with tensor type inference
pub trait InferenceOp: Op {
    /// Infers properties about the input and output tensors.
    ///
    /// The `inputs` and `outputs` arguments correspond to properties about
    /// the input and output tensors that are already known.
    ///
    /// The default implementation will call the private infer_facts method,
    /// which is usually implemented using the InferenceRulesOp trait. It will
    /// also try to eval() the op if its a EvalOp and if the inputs are
    /// fully determined.
    ///
    /// Returns Err in case of an unrecoverable error during the inference,
    /// and the refined properties about the inputs and outputs otherwise.
    fn infer(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
        observed: TVec<&InferenceFact>,
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>, TVec<InferenceFact>)> {
        let (infered_inputs, infered_outputs, observed) =
            self.infer_facts(inputs, outputs, observed).context("Infering facts")?;

        if self.is_stateless() && infered_inputs.iter().all(|i| i.value.is_concrete()) {
            let input_values = infered_inputs
                .iter()
                .map(|i| i.value.concretize().unwrap().into_tvalue())
                .collect(); // checked
            match self.eval(input_values) {
                Ok(values) => {
                    let output_values = values
                        .into_iter()
                        .map(|t| t.into_arc_tensor().try_into())
                        .collect::<TractResult<TVec<_>>>()?;
                    return Ok((infered_inputs, output_values, observed));
                }
                Err(e) if e.root_cause().downcast_ref::<TooEarly>().is_some() => (),
                Err(e) => return Err(e).context("Eager eval during inference"),
            }
        }

        Ok((infered_inputs, infered_outputs, observed))
    }

    /// Allow an op to specify a supplementary list of outlets facts that
    /// will trigger inference again.
    fn observe_outlets(
        &self,
        _model: &InferenceModel,
        _node: &InferenceNode,
    ) -> TractResult<Vec<OutletId>> {
        Ok(vec![])
    }

    /// Infer properties about inputs and output tensors. This method does not
    /// need to deal with the "trivial" stateless op with fully determined
    /// inputs cases.
    ///
    /// Most of the time, it is implemented using InferenceRulesOp.
    fn infer_facts(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
        observed: TVec<&InferenceFact>,
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>, TVec<InferenceFact>)>;

    /// Early pass on inference model, after analyse, but before translation to
    /// typed network. Meant to deal with some framework idiosyncrasies that
    /// manifest with temporaries nodes that can run some form of inference but
    /// require refactoring the network before it can be evaluated.
    ///
    /// Called after succesful analyse, but before translating to typed model.
    #[allow(unused_variables)]
    fn incorporate(
        &self,
        model: &InferenceModel,
        node: &InferenceNode,
    ) -> TractResult<Option<InferenceModelPatch>> {
        Ok(None)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1)
    }

    /// Reinterpret the InferenceOp as an Op.
    fn as_op(&self) -> &dyn Op;

    /// Reinterpret the InferenceOp as an Op, mutably.
    fn as_op_mut(&mut self) -> &mut dyn Op;

    /// Called during translation to TypedModel.
    #[allow(unused_variables)]
    fn to_typed(
        &self,
        source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        bail!("Operator can not be made a TypedOp.")
    }
}

impl std::fmt::Display for Box<dyn InferenceOp> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}", self.name())
    }
}

impl<O: InferenceOp> From<O> for Box<dyn InferenceOp> {
    fn from(it: O) -> Box<dyn InferenceOp> {
        Box::new(it)
    }
}

impl AsRef<dyn Op> for dyn InferenceOp {
    fn as_ref(&self) -> &dyn Op {
        self.as_op()
    }
}

impl AsRef<dyn Op> for Box<dyn InferenceOp> {
    fn as_ref(&self) -> &dyn Op {
        self.as_op()
    }
}

impl AsMut<dyn Op> for dyn InferenceOp {
    fn as_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }
}

impl AsMut<dyn Op> for Box<dyn InferenceOp> {
    fn as_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }
}


================================================
FILE: hir/src/infer/optim.rs
================================================
use crate::infer::*;
use std::fmt;

pub fn incorporate() -> Vec<Box<dyn IncorporatePass>> {
    vec![Box::new(IncorporateOps)]
}

pub trait IncorporatePass: fmt::Debug + Send + Sync {
    fn pass(&self, model: &mut InferenceModel) -> TractResult<bool>;
}

#[derive(Debug)]
pub struct IncorporateOps;

impl IncorporatePass for IncorporateOps {
    fn pass(&self, model: &mut InferenceModel) -> TractResult<bool> {
        let mut done_something = false;
        loop {
            let mut done_something_this_time = false;
            for id in model.eval_order()? {
                let reduced = {
                    let node = &model.nodes()[id];
                    trace!("Incorporate {node}");
                    node.op
                        .incorporate(model, node)
                        .with_context(|| format!("{self:?} node {node}"))?
                };
                if let Some(red) = reduced {
                    {
                        let node = &model.nodes()[id];
                        debug!("Apply a model patch for {self:?}: {node}");
                    }
                    red.apply(model)?;
                    if cfg!(debug_assertions) {
                        model.check_edges()?;
                    }
                    done_something_this_time = true;
                }
            }
            done_something = done_something || done_something_this_time;
            if !done_something_this_time {
                break;
            }
        }
        Ok(done_something)
    }
}


================================================
FILE: hir/src/infer/rules/cache.rs
================================================
use std::cell::RefCell;
use std::collections::HashMap;
use std::hash::Hash;

/// An insert-only HashMap which doesn't require mutable references.
pub struct Cache<K: Eq + Hash, V>(
    // We need to use a RefCell here because we need interior mutability for
    // the cache. This way, the `get` method will only need `&self` (and not
    // `&mut self`) but we'll still be able to insert new items dynamically.
    RefCell<HashMap<K, Box<V>>>,
);

impl<K: Eq + Hash, V> Cache<K, V> {
    /// Creates a new Cache instance.
    pub fn new() -> Cache<K, V> {
        Cache(RefCell::new(HashMap::new()))
    }

    /// Returns a reference to the cached entry for a given key, or stores a
    /// new entry on cache misses and then returns a reference to it.
    pub fn get<F>(&self, index: K, default: F) -> &V
    where
        F: FnOnce() -> V,
    {
        // This is valid because we never remove anything from the cache, so
        // the reference to the items that we return will always exist.
        unsafe {
            let cache = &mut *self.0.as_ptr();
            cache.entry(index).or_insert_with(|| Box::new(default()))
        }
    }
}


================================================
FILE: hir/src/infer/rules/expr.rs
================================================
use std::fmt;
use std::marker::PhantomData;
use std::ops::{Add, Div, Mul, Neg, Sub};

use tract_num_traits::ToPrimitive;
use tract_num_traits::Zero;

use crate::internal::*;

use self::super::super::factoid::*;
use self::super::path::Path;
use self::super::proxies::*;
use self::super::solver::Context;

/// A trait for values produced by expressions.
pub trait Output: fmt::Debug + Clone + PartialEq {
    /// Wraps self in the Wrapped type.
    fn wrap(self) -> Wrapped {
        Self::into_wrapped(self)
    }

    /// Wraps the fact in the Wrapped type.
    fn into_wrapped(source: Self) -> Wrapped;

    /// Retrieves the fact from the Wrapped type.
    /// Panics if wrapped doesn't have the right constructor.
    fn from_wrapped(wrapped: Wrapped) -> TractResult<Self>;
}

macro_rules! impl_output {
    ($type:ty, $constr:ident, $name:expr) => {
        impl Output for $type {
            fn into_wrapped(source: Self) -> Wrapped {
                Wrapped::$constr(source)
            }

            fn from_wrapped(wrapped: Wrapped) -> TractResult<$type> {
                if let Wrapped::$constr(v) = wrapped {
                    Ok(v)
                } else {
                    bail!("Tried to get a {} from {:?}.", $name, wrapped);
                }
            }
        }
    };
}

impl_output!(IntFactoid, Int, "Int");
impl_output!(TypeFactoid, Type, "DatumType");
impl_output!(ShapeFactoid, Shape, "Shape");
impl_output!(ValueFact, Tensor, "Tensor");
impl_output!(DimFact, Dim, "TDim");

// Converts back and forth between Wrapped and usize.
impl Output for usize {
    fn into_wrapped(source: usize) -> Wrapped {
        IntFactoid::into_wrapped((source as i64).into())
    }

    fn from_wrapped(wrapped: Wrapped) -> TractResult<usize> {
        IntFactoid::from_wrapped(wrapped.clone())?
            .concretize()
            .and_then(|u| u.to_usize())
            .with_context(|| format!("Tried to convert {wrapped:?} to a usize."))
    }
}

// Converts back and forth between Wrapped and i64.
impl Output for i64 {
    fn into_wrapped(source: i64) -> Wrapped {
        IntFactoid::into_wrapped(source.into())
    }

    fn from_wrapped(wrapped: Wrapped) -> TractResult<i64> {
        IntFactoid::from_wrapped(wrapped.clone())?
            .concretize()
            .with_context(|| format!("Tried to convert {wrapped:?} to a i64."))
    }
}

// Converts back and forth between Wrapped and Tensor.
impl Output for Arc<Tensor> {
    fn into_wrapped(source: Arc<Tensor>) -> Wrapped {
        ValueFact::into_wrapped(source.into())
    }

    fn from_wrapped(wrapped: Wrapped) -> TractResult<Arc<Tensor>> {
        ValueFact::from_wrapped(wrapped.clone())?
            .concretize()
            .with_context(|| format_err!("Tried to convert {:?} to a tensor.", wrapped))
    }
}

// Converts back and forth between Wrapped and usize.
impl Output for TDim {
    fn into_wrapped(source: TDim) -> Wrapped {
        DimFact::into_wrapped(source.into())
    }

    fn from_wrapped(wrapped: Wrapped) -> TractResult<TDim> {
        DimFact::from_wrapped(wrapped.clone())?
            .concretize()
            .with_context(|| format_err!("Tried to convert {:?} to a usize.", wrapped))
    }
}

/// A wrapper for all the types of values that expressions can produce.
#[derive(Debug, Clone)]
pub enum Wrapped {
    Int(IntFactoid),
    Type(TypeFactoid),
    Shape(ShapeFactoid),
    Tensor(ValueFact),
    Dim(DimFact),
}

/// An expression that can be compared by the solver.
pub trait TExp<T>: fmt::Debug {
    /// Returns the current value of the expression in the given context.
    fn get(&self, context: &Context) -> TractResult<T>;

    /// Tries to set the value of the expression in the given context.
    fn set(&self, context: &mut Context, value: T) -> TractResult<bool>;

    /// Returns the paths that the expression depends on.
    fn get_paths(&self) -> Vec<&Path>;
}

pub struct Exp<T>(Box<dyn TExp<T>>);
impl<T: Factoid + Output + Clone + fmt::Debug> TExp<T> for Exp<T> {
    /// Returns the current value of the expression in the given context.
    fn get(&self, context: &Context) -> TractResult<T> {
        self.0.get(context)
    }

    /// Tries to set the value of the expression in the given context.
    fn set(&self, context: &mut Context, value: T) -> TractResult<bool> {
        self.0.set(context, value)
    }

    /// Returns the paths that the expression depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.0.get_paths()
    }
}

impl<T> fmt::Debug for Exp<T>
where
    T: Factoid + Output + Clone + ::std::fmt::Debug,
{
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "{:?}", self.0)
    }
}

pub trait IntoExp<T> {
    /// Converts the value to an Expression.
    fn bex(self) -> Exp<T>;
}

#[derive(new)]
pub struct SumExp<T>(Vec<Exp<T>>)
where
    T: Factoid + Output + Clone + ::std::fmt::Debug + 'static;

impl<T> TExp<T> for SumExp<T>
where
    T: Factoid + Output + Zero + Add<T> + Neg<Output = T> + Clone + ::std::fmt::Debug + 'static,
{
    /// Returns the current value of the expression in the given context.
    fn get(&self, context: &Context) -> TractResult<T> {
        self.0.iter().try_fold(T::zero(), |acc, it| Ok(acc + it.0.get(context)?))
    }

    /// Tries to set the value of the expression in the given context.
    fn set(&self, context: &mut Context, value: T) -> TractResult<bool> {
        let mut sum = T::zero();
        let mut misses = vec![];

        for item in &self.0 {
            let fact = item.get(context)?;
            if fact.is_concrete() {
                sum = sum + fact;
            } else {
                misses.push(item);
            }
        }

        if misses.len() > 1 {
            Ok(false)
        } else if misses.len() == 1 {
            misses[0].set(context, value + -sum)?;
            Ok(true)
        } else if sum == value {
            Ok(false)
        } else {
            bail!("{:?} set to {:?}, already is {:?}", self, value, sum)
        }
    }

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.0.iter().flat_map(|e| e.get_paths()).collect()
    }
}

impl<T> fmt::Debug for SumExp<T>
where
    T: Factoid + Output + Clone + ::std::fmt::Debug,
{
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        for (ix, t) in self.0.iter().enumerate() {
            if ix > 0 {
                write!(formatter, " + ")?;
            }
            t.fmt(formatter)?;
        }
        Ok(())
    }
}

/// A constant expression (e.g. `2` or `DatumType::DT_INT32`).
pub struct ConstantExp<T>(T)
where
    T: Factoid + Output + Clone + ::std::fmt::Debug;

impl<T> TExp<T> for ConstantExp<T>
where
    T: Factoid + Output + Clone + ::std::fmt::Debug,
{
    /// Returns the current value of the expression in the given context.
    fn get(&self, _: &Context) -> TractResult<T> {
        Ok(self.0.clone())
    }

    /// Tries to set the value of the expression in the given context.
    fn set(&self, _: &mut Context, value: T) -> TractResult<bool> {
        self.0.unify(&value)?;
        Ok(false)
    }

    /// Returns the paths that the expression depends on.
    fn get_paths(&self) -> Vec<&Path> {
        vec![]
    }
}

impl<T> fmt::Debug for ConstantExp<T>
where
    T: Factoid + Output + Clone + ::std::fmt::Debug,
{
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "{:?}", self.0)
    }
}

/// A reference to a variable.
///
/// For instance, `inputs[0].rank` is a reference to the rank of the first
/// input. Internally, a reference holds a Vec<usize> called a path (see
/// the documentation for `Proxy::get_path`).
pub struct VariableExp<T>(Path, PhantomData<T>)
where
    T: Factoid + Output + Clone + ::std::fmt::Debug;

impl<T> TExp<T> for VariableExp<T>
where
    T: Factoid + Output + Clone + ::std::fmt::Debug,
{
    /// Returns the current value of the expression in the given context.
    fn get(&self, context: &Context) -> TractResult<T> {
        context.get(&self.0).with_context(|| format!("while getting {:?}", self.0))
    }

    /// Tries to set the value of the expression in the given context.
    fn set(&self, context: &mut Context, value: T) -> TractResult<bool> {
        let old = self.get(context)?;
        let new = old.unify(&value)?;
        let diff = old != new;
        context.set(&self.0, new).with_context(|| format!("while setting {:?}", self.0))?;
        Ok(diff)
    }

    /// Returns the paths that the expression depends on.
    fn get_paths(&self) -> Vec<&Path> {
        vec![&self.0]
    }
}

impl<T> fmt::Debug for VariableExp<T>
where
    T: Factoid + Output + Clone + ::std::fmt::Debug,
{
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "{:?}", self.0)
    }
}

/// A scalar product between a constant and another expression.
pub struct ScaledExp<T>(i64, Exp<T>)
where
    T: Factoid + Output + Zero + Mul<i64, Output = T> + Div<i64, Output = T> + Clone;

impl<T> TExp<T> for ScaledExp<T>
where
    T: Factoid + Output + Zero + Mul<i64, Output = T> + Div<i64, Output = T> + Clone,
{
    /// Returns the current value of the expression in the given context.
    fn get(&self, context: &Context) -> TractResult<T> {
        let v: T = self.1.get(context)?;
        Ok(v * self.0)
    }

    /// Tries to set the value of the expression in the given context.
    fn set(&self, context: &mut Context, value: T) -> TractResult<bool> {
        let k = &self.0;
        let m = value;

        if m.is_zero() && k.is_zero() {
            // We want to set 0 * x <- 0, so we don't have to do anything.
            Ok(false)
        } else if m.is_zero() {
            // We want to set k * x <- 0, where k != 0, so we have to set x <- 0.
            self.1.set(context, T::zero())
        } else {
            /*
            // We want to set k * x <- m, where k and m != 0, so we will try
            // to set x <- m / k using a checked division. This way, if m is
            // not divisible by k, we will return Err instead of panicking.
            let div = m.div(&V::from(*k)).ok_or(format!(
            "Cannot set the value of ({:?}, _) to {:?} because \
            {:?} is not divisible by {:?}.",
            k, m, m, k
            ))?;
            */

            let div = m.div(*k);
            self.1.set(context, div)
        }
    }

    /// Returns the paths that the expression depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.1.get_paths()
    }
}

impl<T> fmt::Debug for ScaledExp<T>
where
    T: Factoid + Output + Zero + Mul<i64, Output = T> + Div<i64, Output = T> + Clone,
{
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "{}*{{{:?}}}", self.0, self.1)
    }
}

/// Cast an IntFactoid into a DimFact
pub struct IntoDimExp(Exp<IntFactoid>);

impl TExp<DimFact> for IntoDimExp {
    /// Returns the current value of the expression in the given context.
    fn get(&self, context: &Context) -> TractResult<DimFact> {
        let v: IntFactoid = self.0.get(context)?;
        match v {
            GenericFactoid::Only(i) => Ok(GenericFactoid::Only(i.to_dim())),
            GenericFactoid::Any => Ok(GenericFactoid::Any),
        }
    }

    /// Tries to set the value of the expression in the given context.
    fn set(&self, context: &mut Context, value: DimFact) -> TractResult<bool> {
        if let Some(concrete) = value.concretize() {
            if let Ok(int) = concrete.to_i64() {
                return self.0.set(context, GenericFactoid::Only(int));
            }
        }
        Ok(false)
    }

    /// Returns the paths that the expression depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.0.get_paths()
    }
}

impl fmt::Debug for IntoDimExp {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "{{({:?}) as dim}}", self.0)
    }
}

// ops and cast on Exp

impl<T, E: TExp<T> + 'static> IntoExp<T> for E {
    fn bex(self) -> Exp<T> {
        Exp(Box::new(self))
    }
}

// Type

impl IntoExp<TypeFactoid> for TypeProxy {
    fn bex(self) -> Exp<TypeFactoid> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<TypeFactoid> for &TypeProxy {
    fn bex(self) -> Exp<TypeFactoid> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<TypeFactoid> for DatumType {
    fn bex(self) -> Exp<TypeFactoid> {
        ConstantExp(self.into()).bex()
    }
}

impl IntoExp<TypeFactoid> for &DatumType {
    fn bex(self) -> Exp<TypeFactoid> {
        ConstantExp((*self).into()).bex()
    }
}

// Int

impl IntoExp<IntFactoid> for &IntProxy {
    fn bex(self) -> Exp<IntFactoid> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<IntFactoid> for &ElementProxy {
    fn bex(self) -> Exp<IntFactoid> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<IntFactoid> for i64 {
    fn bex(self) -> Exp<IntFactoid> {
        ConstantExp(self.into()).bex()
    }
}

impl IntoExp<IntFactoid> for IntFactoid {
    fn bex(self) -> Exp<IntFactoid> {
        ConstantExp(self).bex()
    }
}

impl<IE: IntoExp<IntFactoid>> Add<IE> for Exp<IntFactoid> {
    type Output = Exp<IntFactoid>;
    fn add(self, other: IE) -> Exp<IntFactoid> {
        SumExp(vec![self.bex(), other.bex()]).bex()
    }
}

impl<IE: IntoExp<IntFactoid>> Sub<IE> for Exp<IntFactoid> {
    type Output = Exp<IntFactoid>;
    fn sub(self, other: IE) -> Exp<IntFactoid> {
        SumExp(vec![self.bex(), -1 * other.bex()]).bex()
    }
}

impl Mul<Exp<IntFactoid>> for i64 {
    type Output = Exp<IntFactoid>;
    fn mul(self, other: Exp<IntFactoid>) -> Exp<IntFactoid> {
        ScaledExp(self, other).bex()
    }
}

// Dim

impl IntoExp<DimFact> for &DimProxy {
    fn bex(self) -> Exp<DimFact> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<DimFact> for TDim {
    fn bex(self) -> Exp<DimFact> {
        ConstantExp(self.into()).bex()
    }
}

impl IntoExp<DimFact> for &TDim {
    fn bex(self) -> Exp<DimFact> {
        ConstantExp(self.clone().into()).bex()
    }
}

impl<IE: IntoExp<DimFact>> Add<IE> for Exp<DimFact> {
    type Output = Exp<DimFact>;
    fn add(self, other: IE) -> Exp<DimFact> {
        SumExp(vec![self.bex(), other.bex()]).bex()
    }
}

impl<IE: IntoExp<DimFact>> Sub<IE> for Exp<DimFact> {
    type Output = Exp<DimFact>;
    fn sub(self, other: IE) -> Exp<DimFact> {
        SumExp(vec![self.bex(), -1 * other.bex()]).bex()
    }
}

impl Mul<Exp<DimFact>> for i64 {
    type Output = Exp<DimFact>;
    fn mul(self, other: Exp<DimFact>) -> Exp<DimFact> {
        ScaledExp(self, other).bex()
    }
}

impl IntoExp<DimFact> for GenericFactoid<TDim> {
    fn bex(self) -> Exp<GenericFactoid<TDim>> {
        ConstantExp(self).bex()
    }
}

// Cast to dim

pub trait ToDimExp {
    fn to_dim(self) -> Exp<DimFact>;
}

impl ToDimExp for Exp<IntFactoid> {
    fn to_dim(self) -> Exp<DimFact> {
        IntoDimExp(self).bex()
    }
}

// Shape

impl IntoExp<ShapeFactoid> for ShapeFactoid {
    fn bex(self) -> Exp<ShapeFactoid> {
        ConstantExp(self).bex()
    }
}

impl IntoExp<ShapeFactoid> for ShapeProxy {
    fn bex(self) -> Exp<ShapeFactoid> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<ShapeFactoid> for &ShapeProxy {
    fn bex(self) -> Exp<ShapeFactoid> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<ShapeFactoid> for TVec<TDim> {
    fn bex(self) -> Exp<ShapeFactoid> {
        ConstantExp(self.into_iter().collect()).bex()
    }
}

// Arc<Tensor>

impl IntoExp<ValueFact> for ValueProxy {
    fn bex(self) -> Exp<ValueFact> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<ValueFact> for &ValueProxy {
    fn bex(self) -> Exp<ValueFact> {
        VariableExp(self.get_path().clone(), PhantomData).bex()
    }
}

impl IntoExp<ValueFact> for Arc<Tensor> {
    fn bex(self) -> Exp<ValueFact> {
        ConstantExp(self.into()).bex()
    }
}


================================================
FILE: hir/src/infer/rules/mod.rs
================================================
//! A fluent interface for the analyser.
//!
//! This interface provides proxies for the different properties of tensors.
//! This allows inference rules to be stated in a clear, declarative fashion
//! inside the `rules` method of each operator.
//!
//! Take these rules for instance:
//! ```text
//! solver.equals(inputs.len(), 2);
//! solver.equals(inputs[0].datum_type, outputs[0].datum_type);
//! ```
//! Here, `inputs.len`, `inputs[0].datum_type` and `outputs[0].datum_type` don't
//! actually hold the values of the length and datum_types, but instead act as
//! declarative placeholders for these values.

#[macro_export]
macro_rules! wrap {
    ($($x:expr),*) => ({
        vec![$( $crate::infer::rules::expr::IntoExp::bex($x) ),*]
    });

    ($($x:expr,)*) => (wrap![$($x),*]);
}

use crate::infer::*;

mod cache;
pub mod expr;
mod path;
mod proxies;
mod solver;

pub use self::proxies::*;
pub use self::solver::Solver;

pub type InferenceResult = TractResult<()>;

pub trait InferenceRulesOp {
    /// Registers the inference rules of the operator.
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult;

    fn as_op(&self) -> &dyn Op;
    fn as_op_mut(&mut self) -> &mut dyn Op;

    #[allow(unused_variables)]
    fn to_typed(
        &self,
        source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        bail!("Node {} can not be typed", node)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1)
    }

    #[allow(unused_variables)]
    fn incorporate(
        &self,
        model: &InferenceModel,
        node: &InferenceNode,
    ) -> TractResult<Option<InferenceModelPatch>> {
        Ok(None)
    }
}

impl<O: InferenceRulesOp + Op> InferenceOp for O {
    fn infer_facts(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
        observed: TVec<&InferenceFact>,
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>, TVec<InferenceFact>)> {
        let inputs_proxy: TVec<TensorProxy> =
            (0..inputs.len()).map(|ix| TensorProxy::new(tvec!(0, ix as isize).into())).collect();
        let outputs_proxy: TVec<TensorProxy> =
            (0..outputs.len()).map(|ix| TensorProxy::new(tvec!(1, ix as isize).into())).collect();

        trace!("Building rules for {self:?}");
        let mut solver = Solver::default();
        self.rules(&mut solver, &inputs_proxy, &outputs_proxy)?;
        trace!("Applying rules for {self:?}");
        let (input, output) = solver.infer_facts((inputs, outputs))?;
        trace!("Solver done");
        Ok((input, output, observed.into_iter().cloned().collect()))
    }

    fn nboutputs(&self) -> TractResult<usize> {
        self.nboutputs()
    }

    fn observe_outlets(
        &self,
        _model: &InferenceModel,
        _node: &InferenceNode,
    ) -> TractResult<Vec<OutletId>> {
        Ok(vec![])
    }

    fn as_op(&self) -> &dyn Op {
        self.as_op()
    }

    fn as_op_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }

    fn to_typed(
        &self,
        source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        self.to_typed(source, node, target, mapping)
    }

    fn incorporate(
        &self,
        model: &InferenceModel,
        node: &InferenceNode,
    ) -> TractResult<Option<InferenceModelPatch>> {
        self.incorporate(model, node)
    }
}


================================================
FILE: hir/src/infer/rules/path.rs
================================================
use std::fmt;

use tract_num_traits::ToPrimitive;

use crate::infer::*;

use self::super::super::factoid::*;
use self::super::expr::*;
use self::super::solver::Context;

/// A symbolic path for a value.
#[derive(PartialEq, Eq, Clone)]
pub struct Path(TVec<isize>);

impl From<TVec<isize>> for Path {
    fn from(v: TVec<isize>) -> Path {
        Path(v)
    }
}

impl From<Vec<isize>> for Path {
    fn from(v: Vec<isize>) -> Path {
        Path(v.into())
    }
}

impl ::std::ops::Deref for Path {
    type Target = [isize];
    fn deref(&self) -> &[isize] {
        &self.0
    }
}

impl fmt::Debug for Path {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        debug_path(self, formatter)
    }
}

/// Returns the value at the given path (starting from a context).
pub fn get_path(context: &Context, path: &[isize]) -> TractResult<Wrapped> {
    match path[0] {
        0 => get_tensorfacts_path(&context.inputs, &path[1..]),
        1 => get_tensorfacts_path(&context.outputs, &path[1..]),
        _ => bail!(
            "The first component of path {:?} should be 0 (for the `inputs` \
             set of facts) or 1 (for the `outputs` set of facts).",
            path
        ),
    }
}

/// Sets the value at the given path (starting from a context).
pub fn set_path(context: &mut Context, path: &[isize], value: Wrapped) -> TractResult<()> {
    match path[0] {
        0 => set_tensorfacts_path(&mut context.inputs, &path[1..], value),
        1 => set_tensorfacts_path(&mut context.outputs, &path[1..], value),
        _ => bail!(
            "The first component of path {:?} should be 0 (for the `inputs` \
             set of facts) or 1 (for the `outputs` set of facts).",
            path
        ),
    }
}

fn debug_path(path: &[isize], formatter: &mut fmt::Formatter) -> fmt::Result {
    write!(
        formatter,
        "{}",
        match path[0] {
            0 => "inputs",
            1 => "outputs",
            _ => "buggy_path",
        }
    )?;
    debug_tensorfacts_path(&path[1..], formatter)
}

/// Returns the value at the given path (starting from a set of InferenceFacts).
fn get_tensorfacts_path(facts: &TVec<InferenceFact>, path: &[isize]) -> TractResult<Wrapped> {
    match path {
        // Get the number of facts in the set.
        [-1] => Ok(facts.len().wrap()),

        slice if slice[0] >= 0 => {
            let k = slice[0].to_usize().unwrap(); // checked

            if k < facts.len() {
                get_tensorfact_path(&facts[k], &slice[1..])
            } else {
                bail!(
                    "There are only {:?} facts in the given set, so the index \
                     {:?} is not valid.",
                    facts.len(),
                    k
                )
            }
        }

        _ => bail!(
            "The first component of subpath {:?} should either be -1 (for \
             the number of facts in the set) or a valid fact index.",
            path
        ),
    }
}

/// Sets the value at the given path (starting from a set of InferenceFacts).
fn set_tensorfacts_path(
    facts: &mut TVec<InferenceFact>,
    path: &[isize],
    value: Wrapped,
) -> TractResult<()> {
    match path {
        // Set the number of facts in the set.
        [-1] => {
            // Conversion is checked.
            let value =
                IntFactoid::from_wrapped(value)?.concretize().map(|v| v.to_usize().unwrap());

            if value.is_some() && value.unwrap() != facts.len() {
                bail!(
                    "Can't set the length of the given set of facts to {:?} \
                     because it already has length {:?}.",
                    value,
                    facts.len()
                );
            }

            Ok(())
        }

        slice if slice[0] >= 0 => {
            // Conversion is checked.
            let k = slice[0].to_usize().unwrap();

            if k < facts.len() {
                set_tensorfact_path(&mut facts[k], &path[1..], value)
            } else {
                bail!(
                    "There are only {:?} facts in the given set, so the index \
                     {:?} is not valid.",
                    facts.len(),
                    k
                )
            }
        }

        _ => bail!(
            "The first component of subpath {:?} should either be -1 (for \
             the number of facts in the set) or a valid fact index.",
            path
        ),
    }
}

fn debug_tensorfacts_path(path: &[isize], formatter: &mut fmt::Formatter) -> fmt::Result {
    match path[0] {
        -1 => write!(formatter, ".len"),
        n => {
            write!(formatter, "[{n}]")?;
            debug_tensorfact_path(&path[1..], formatter)
        }
    }
}

/// Returns the value at the given path (starting from a InferenceFact).
fn get_tensorfact_path(fact: &InferenceFact, path: &[isize]) -> TractResult<Wrapped> {
    match path {
        // Get the type of the InferenceFact.
        [0] => Ok(fact.datum_type.wrap()),

        // Get the rank of the InferenceFact.
        [1] => Ok(fact.shape.rank().wrap()),

        slice if slice[0] == 2 => get_shape_path(&fact.shape, &slice[1..]),
        slice if slice[0] == 3 => get_value_path(&fact.value, &slice[1..]),

        _ => bail!(
            "The subpath {:?} should start with 0, 1, 2 or 3 (for the type, \
             rank, dimension or value of the fact respectively).",
            path
        ),
    }
}

/// Sets the value at the given path (starting from a InferenceFact).
fn set_tensorfact_path(
    fact: &mut InferenceFact,
    path: &[isize],
    value: Wrapped,
) -> TractResult<()> {
    match path {
        // Set the type of the InferenceFact.
        [0] => {
            let value = TypeFactoid::from_wrapped(value)?;
            fact.datum_type = value.unify(&fact.datum_type)?;
            Ok(())
        }

        // Set the rank of the InferenceFact.
        [1] => {
            if let Some(k) = IntFactoid::from_wrapped(value)?.concretize() {
                if k >= 0 {
                    let k = k.to_usize().unwrap();
                    fact.shape = fact.shape.unify(&ShapeFactoid::closed(tvec![dimfact!(_); k]))?;
                } else {
                    bail!("Infered a negative rank ({})", k)
                }
            }

            Ok(())
        }

        // Set the whole shape of the InferenceFact.
        [2] => {
            let shape = ShapeFactoid::from_wrapped(value)?;
            fact.shape = shape.unify(&fact.shape)?;

            Ok(())
        }

        // Set a precise dimension of the InferenceFact.
        [2, k] => {
            let k = k.to_usize().unwrap();
            let dim = DimFact::from_wrapped(value)?;

            let mut dims = tvec![dimfact!(_); k];
            dims.push(dim);

            fact.shape = fact.shape.unify(&ShapeFactoid::open(dims))?;

            Ok(())
        }

        // Set full InferenceFact value, also unifying type and shape.
        [3] => {
            let value = ValueFact::from_wrapped(value)?;
            fact.value = fact.value.unify(&value)?;
            if let Some(tensor) = fact.value.concretize() {
                fact.shape = fact.shape.unify(&ShapeFactoid::from(tensor.shape()))?;
                fact.datum_type = fact.datum_type.unify(&TypeFactoid::from(tensor.datum_type()))?;
            }
            Ok(())
        }

        slice if slice[0] == 3 => {
            debug!("FIXME Unimplemented set_value_path for individual value");
            Ok(())
        }

        _ => bail!(
            "The subpath {:?} should start with 0, 1, 2 or 3 (for the type, \
             rank, dimension or value of the fact respectively).",
            path
        ),
    }
}

fn debug_tensorfact_path(path: &[isize], formatter: &mut fmt::Formatter) -> fmt::Result {
    match path {
        [] => Ok(()),
        [0] => write!(formatter, ".datum_type"),
        [1] => write!(formatter, ".rank"),
        [2] => write!(formatter, ".shape"),
        [2, k] => write!(formatter, ".shape[{k}]"),
        slice if slice[0] == 3 => debug_value_path(&path[1..], formatter),
        _ => write!(formatter, ".invalid"),
    }
}

/// Returns the shape or dimension at the given path (starting from a ShapeFactoid).
fn get_shape_path(shape: &ShapeFactoid, path: &[isize]) -> TractResult<Wrapped> {
    match path {
        // Get the whole shape.
        [] => Ok(shape.clone().wrap()),

        // Get a precise dimension.
        [k] => {
            let k = k.to_usize().unwrap();
            if let Some(d) = shape.dims().nth(k) {
                Ok(d.clone().wrap())
            } else if shape.is_open() {
                Ok(dimfact!(_).wrap())
            } else {
                bail!("{:?} has no {:?}-th dimension.", shape, k);
            }
        }

        _ => bail!(
            "The subpath {:?} for the shape should either be [] (for the \
             entire shape) or [k] with k the index of a dimension.",
            path
        ),
    }
}

/// Returns the value at the given path (starting from a ValueFact).
fn get_value_path(value: &ValueFact, path: &[isize]) -> TractResult<Wrapped> {
    trace!("get_value_path path:{path:?} value:{value:?}");
    // Return the whole tensor.
    if path == [-1] || path.is_empty() {
        return Ok(value.clone().wrap());
    }

    let returns = match value.concretize() {
        None => Ok(IntFactoid::default().wrap()),
        Some(tensor) => {
            let path = path.iter().map(|i| *i as usize).collect::<TVec<usize>>();
            if tensor.rank() == 0 && path == tvec!(0) {
                Ok(tensor.cast_to_scalar::<i64>()?.wrap())
            } else {
                Ok(tensor.cast_to::<i64>()?.to_plain_array_view::<i64>()?[&*path].wrap())
            }
        }
    };
    trace!("returns: {returns:?}");
    returns
}

fn debug_value_path(path: &[isize], formatter: &mut fmt::Formatter) -> fmt::Result {
    for p in path {
        write!(formatter, "[{p}]")?;
    }
    Ok(())
}


================================================
FILE: hir/src/infer/rules/proxies.rs
================================================
use std::fmt;
use std::ops::Index;

use crate::tract_num_traits::ToPrimitive;

use crate::infer::factoid::*;

use self::super::cache::Cache;
use self::super::expr::Output;
use self::super::path::Path;

/// A proxy for any value.
pub trait Proxy {
    /// Returns the symbolic path to the value.
    ///
    /// Take the `inputs[0].shape[1]` proxy for instance: it represents the
    /// second dimension of the shape of the first input. Because we encode
    /// the "inputs" vectors as `0`, and the `shape` field as `2`, the path
    /// for this proxy will be `vec![0, 0, 2, 1]`.
    fn get_path(&self) -> &Path;
}

/// A proxy which can be used in a solver rule.
pub trait ComparableProxy: Proxy {
    type Output: Output;
}

/// Generates the get_path method for structs which have a `path` field.
macro_rules! impl_proxy {
    ($struct:ident) => {
        impl Proxy for $struct {
            /// Returns the symbolic path to the value.
            fn get_path(&self) -> &Path {
                &self.path
            }
        }

        impl fmt::Debug for $struct {
            fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
                write!(formatter, "{:?}", self.get_path())
            }
        }

        impl<'a> Proxy for &'a $struct {
            /// Returns the symbolic path to the value.
            fn get_path(&self) -> &Path {
                &self.path
            }
        }
    };
}

/// Implements the ComparableProxy trait for the proxy and references to it.
macro_rules! impl_comparable_proxy {
    ($struct:ident, $output:ident) => {
        impl ComparableProxy for $struct {
            type Output = $output;
        }
        impl<'a> ComparableProxy for &'a $struct {
            type Output = $output;
        }
    };
}

/// A proxy for any integer-like value.
#[derive(new)]
pub struct IntProxy {
    path: Path,
}

impl_proxy!(IntProxy);
impl_comparable_proxy!(IntProxy, IntFactoid);

/// A proxy for a tensor.
///
/// This is used for rules involving the datum_type, rank, shape or value of a
/// tensor. Here are a few examples of constraints that can be expressed:
/// ```text
/// solver.equals(input.datum_type, DTYPE_I32)
/// solver.equals(input.rank, 2)
/// solver.equals(input.shape[1], output.value[0][1])
/// ```
pub struct TensorProxy {
    pub datum_type: TypeProxy,
    pub rank: IntProxy,
    pub shape: ShapeProxy,
    pub value: ValueProxy,
    path: Path,
}

impl TensorProxy {
    /// Creates a new TensorProxy instance.
    pub fn new(path: Path) -> TensorProxy {
        TensorProxy {
            datum_type: TypeProxy::new([&path[..], &[0]].concat().into()),
            rank: IntProxy::new([&path[..], &[1]].concat().into()),
            shape: ShapeProxy::new([&path[..], &[2]].concat().into()),
            value: ValueProxy::new([&path[..], &[3]].concat().into()),
            path,
        }
    }
}

impl_proxy!(TensorProxy);

/// A proxy for a tensor datum_type.
#[derive(new)]
pub struct TypeProxy {
    path: Path,
}

impl_proxy!(TypeProxy);
impl_comparable_proxy!(TypeProxy, TypeFactoid);

/// A proxy for a tensor shape.
pub struct ShapeProxy {
    dims: Cache<usize, DimProxy>,
    path: Path,
}

impl ShapeProxy {
    /// Creates a new ShapeProxy instance.
    pub fn new(path: Path) -> ShapeProxy {
        ShapeProxy { dims: Cache::new(), path }
    }
}

impl_proxy!(ShapeProxy);
impl_comparable_proxy!(ShapeProxy, ShapeFactoid);

impl Index<usize> for ShapeProxy {
    type Output = DimProxy;

    /// Returns the DimProxy corresponding to the given index.
    fn index(&self, index: usize) -> &DimProxy {
        let path = [&self.path[..], &[index.to_isize().unwrap()]].concat();
        self.dims.get(index, || DimProxy::new(path.into()))
    }
}

/// A proxy for a dimension of a shape.
#[derive(new)]
pub struct DimProxy {
    path: Path,
}

impl_proxy!(DimProxy);
impl_comparable_proxy!(DimProxy, DimFact);

/// A proxy for the whole tensor value.
///
/// This proxy is a bit special as it allows arbitrarily nested indexing, so
/// that writing something like ```input.value[1][6][2]``` will always work.
/// To make this work, each ValueProxy holds a cache which will generate new
/// ValueProxys for nested items on the fly and store them.
pub struct ValueProxy {
    sub: Cache<usize, ElementProxy>,
    root: IntProxy,
    path: Path,
}

impl ValueProxy {
    /// Creates a new RootValueProxy instance.
    pub fn new(path: Path) -> ValueProxy {
        let root = IntProxy::new([&path[..], &[-1]].concat().into());
        ValueProxy { sub: Cache::new(), root, path }
    }
}

impl Index<()> for ValueProxy {
    type Output = IntProxy;

    /// Returns the RootValueProxy corresponding to the given index.
    fn index(&self, _: ()) -> &IntProxy {
        &self.root
    }
}

impl Index<usize> for ValueProxy {
    type Output = ElementProxy;

    /// Returns the ElementProxy corresponding to the given index.
    fn index(&self, index: usize) -> &ElementProxy {
        let path = [&self.path[..], &[index.to_isize().unwrap()]].concat();
        self.sub.get(index, || ElementProxy::new(path.into()))
    }
}

impl_proxy!(ValueProxy);
impl_comparable_proxy!(ValueProxy, ValueFact);

/// A proxy for a tensor element.
pub struct ElementProxy {
    sub: Cache<usize, ElementProxy>,
    path: Path,
}

impl ElementProxy {
    /// Creates a new ElementProxy instance.
    pub fn new(path: Path) -> ElementProxy {
        ElementProxy { sub: Cache::new(), path }
    }
}

impl Index<usize> for ElementProxy {
    type Output = ElementProxy;

    /// Returns the ElementProxy corresponding to the given index.
    fn index(&self, index: usize) -> &ElementProxy {
        let path = [&self.path[..], &[index.to_isize().unwrap()]].concat();
        self.sub.get(index, || ElementProxy::new(path.into()))
    }
}

impl_proxy!(ElementProxy);
impl_comparable_proxy!(ElementProxy, IntFactoid);

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_tensor_proxy_datum_type() {
        let input = TensorProxy::new(vec![0, 0].into());
        assert_eq!(input.datum_type.get_path(), &vec![0, 0, 0].into());
    }

    #[test]
    fn test_tensor_proxy_rank() {
        let input = TensorProxy::new(vec![0, 0].into());
        assert_eq!(input.rank.get_path(), &vec![0, 0, 1].into());
    }

    #[test]
    fn test_tensor_proxy_shape() {
        let input = TensorProxy::new(vec![0, 0].into());
        assert_eq!(input.shape[0].get_path(), &vec![0, 0, 2, 0].into());
        assert_eq!(input.shape[2].get_path(), &vec![0, 0, 2, 2].into());
    }

    #[test]
    fn test_tensor_proxy_value() {
        let input = TensorProxy::new(vec![0, 0].into());
        assert_eq!(input.value.get_path(), &vec![0, 0, 3].into());
        assert_eq!(input.value[()].get_path(), &vec![0, 0, 3, -1].into());
        assert_eq!(input.value[0].get_path(), &vec![0, 0, 3, 0].into());
        assert_eq!(input.value[0][1].get_path(), &vec![0, 0, 3, 0, 1].into());
        assert_eq!(input.value[1][2][3].get_path(), &vec![0, 0, 3, 1, 2, 3].into());
    }
}


================================================
FILE: hir/src/infer/rules/solver.rs
================================================
use std::fmt;
use std::ops::{Add, Neg};

use tract_num_traits::Zero;

use crate::infer::*;

use self::super::InferenceResult;
use self::super::expr::{Exp, IntoExp, Output, TExp};
use self::super::path::{Path, get_path, set_path};

/// A structure that holds the current sets of InferenceFacts.
///
/// This is used during inference (see `Solver::infer`) to let rules compute
/// the value of expressions which involve tensor properties.
#[derive(Debug, new)]
pub struct Context {
    pub inputs: TVec<InferenceFact>,
    pub outputs: TVec<InferenceFact>,
}

impl Context {
    /// Returns the current value of the variable at the given path.
    pub fn get<T: Output>(&self, path: &Path) -> TractResult<T> {
        let value = get_path(self, &path[..])?;
        T::from_wrapped(value)
    }

    /// Tries to set the value of the variable at the given path.
    pub fn set<T: Output>(&mut self, path: &Path, value: T) -> TractResult<()> {
        set_path(self, &path[..], T::into_wrapped(value))?;
        Ok(())
    }
}

/// A rule that can be applied by the solver.
pub trait Rule<'rules>: fmt::Debug {
    /// Tries to apply the rule to a given context.
    ///""
    /// The method must return Ok(true) if the rule was applied successfully
    /// (meaning that the Context was mutated), or Ok(false) if the rule was
    /// not applied but didn't generate any errors.
    fn apply(
        &self,
        context: &mut Context,
    ) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)>;

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path>;
}

/// The `equals` rule.
/// It states that the given expressions must all be equal.
///
/// It can be added to the solver via the following two methods:
/// ```text
/// solver.equals(a, b);
/// solver.equals_all(vec![a, b, ...]);
/// ```
struct EqualsRule<T: Output + Factoid> {
    items: Vec<Exp<T>>,
}

impl<T: Output + Factoid> EqualsRule<T> {
    /// Creates a new EqualsRule instance.
    pub fn new(items: Vec<Exp<T>>) -> EqualsRule<T> {
        EqualsRule { items }
    }
}

impl<'rules, T: Output + Factoid> Rule<'rules> for EqualsRule<T> {
    /// Tries to apply the rule to a given context.
    fn apply(
        &self,
        context: &mut Context,
    ) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)> {
        let value =
            self.items.iter().try_fold(T::default(), |acc, f| acc.unify(&f.get(context)?))?;
        let mut changed = false;
        for item in &self.items {
            changed |= item.set(context, value.clone())?;
        }
        Ok((changed, vec![]))
    }

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.items.iter().flat_map(|e| e.get_paths()).collect()
    }
}

impl<T: Output + Factoid> fmt::Debug for EqualsRule<T> {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "{:?}", self.items[0])?;
        for item in &self.items[1..] {
            write!(formatter, " == {item:?}")?;
        }
        Ok(())
    }
}

/// The `equals_zero` rule.
/// It states that the given expression must equal zero.
///
/// It can be added to the solver via the following method:
/// ```text
/// solver.equals_zero(vec![a, b, ...]);
/// ```
struct EqualsZeroRule<F>(Exp<F>)
where
    F: Factoid + Zero + Add<F, Output = F> + Neg<Output = F> + Clone + ::std::fmt::Debug + Output;

impl<'rules, F> Rule<'rules> for EqualsZeroRule<F>
where
    F: Factoid + Zero + Add<F, Output = F> + Neg<Output = F> + Clone + ::std::fmt::Debug + Output,
{
    /// Tries to apply the rule to a given context.
    fn apply(
        &self,
        context: &mut Context,
    ) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)> {
        Ok((self.0.set(context, F::zero())?, vec![]))
    }

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.0.get_paths()
    }
}

impl<F> fmt::Debug for EqualsZeroRule<F>
where
    F: Factoid + Zero + Add<F, Output = F> + Neg<Output = F> + Clone + ::std::fmt::Debug + Output,
{
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        self.0.fmt(formatter)?;
        write!(formatter, " == 0")
    }
}

/// The `with` rule.
/// It allows you to add more rules to the solver using what is known about an
/// expression.using a closure that takes the value as parameter.
///
/// It can be added to the solver via the following method:
/// ```text
/// solver.with(input.rank, |solver, ir|
///     // Add more rules to `solver` here.
/// );
/// ```
#[allow(clippy::type_complexity)]
pub struct WithRule<'rules, T: Factoid> {
    pub item: Exp<T>,
    pub closure: Box<dyn Fn(&mut Solver<'rules>, T) -> InferenceResult + 'rules>,
}

impl<'rules, T: Output + Factoid> WithRule<'rules, T> {
    /// Creates a new GivenRule instance.
    pub fn new<F>(item: Exp<T>, closure: F) -> WithRule<'rules, T>
    where
        F: Fn(&mut Solver<'rules>, T) -> InferenceResult + 'rules,
    {
        let closure = Box::new(closure);
        WithRule { item, closure }
    }
}

impl<'rules, T: Output + Factoid> Rule<'rules> for WithRule<'rules, T> {
    /// Tries to apply the rule to a given context.
    fn apply(
        &self,
        context: &mut Context,
    ) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)> {
        let value = self.item.get(context)?;
        trace!("    With rule: {:?} is {:?}", self.item, value);
        let mut solver = Solver::default();
        (self.closure)(&mut solver, value)?;
        Ok((true, solver.take_rules()))
    }

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.item.get_paths()
    }
}

impl<T: Output + Factoid> fmt::Debug for WithRule<'_, T> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "WithRule {{ {:?} }}", self.item)
    }
}

/// The `given` rule.
/// It allows you to add more rules to the solver once the value of a given
/// expression is known, using a closure that takes the value as parameter.
///
/// It can be added to the solver via the following method:
/// ```text
/// solver.given(input.rank, |solver, ir|
///     // Add more rules to `solver` here.
/// );
/// ```
#[allow(clippy::type_complexity)]
pub struct GivenRule<'rules, T: Factoid> {
    pub item: Exp<T>,
    pub closure: Box<dyn Fn(&mut Solver<'rules>, T::Concrete) -> InferenceResult + 'rules>,
}

impl<'rules, T: Output + Factoid> GivenRule<'rules, T> {
    /// Creates a new GivenRule instance.
    pub fn new<F>(item: Exp<T>, closure: F) -> GivenRule<'rules, T>
    where
        F: Fn(&mut Solver<'rules>, T::Concrete) -> InferenceResult + 'rules,
    {
        let closure = Box::new(closure);

        GivenRule { item, closure }
    }
}

impl<'rules, T: Output + Factoid> Rule<'rules> for GivenRule<'rules, T> {
    /// Tries to apply the rule to a given context.
    fn apply(
        &self,
        context: &mut Context,
    ) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)> {
        let value = self.item.get(context)?;

        if let Some(value) = value.concretize() {
            trace!("    Given rule: {:?} is {:?}", self.item, value);
            // We create a new solver instance, which will be populated with
            // new rules by the code inside the closure.
            let mut solver = Solver::default();

            (self.closure)(&mut solver, value)?;

            Ok((true, solver.take_rules()))
        } else {
            trace!(
                "In {:?}, failed to convert {:?} to expected type",
                self,
                self.item.get(context)?.wrap()
            );
            Ok((false, vec![]))
        }
    }

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.item.get_paths()
    }
}

impl<T: Output + Factoid> fmt::Debug for GivenRule<'_, T> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "GivenRule {{ {:?} }}", self.item)
    }
}

/// The `given` rule.
/// It allows you to add more rules to the solver once the value of a given
/// expression is known, using a closure that takes the value as parameter.
///
/// It can be added to the solver via the following method:
/// ```text
/// solver.given(input.rank, |solver, ir|
///     // Add more rules to `solver` here.
/// );
/// ```
#[allow(clippy::type_complexity)]
pub struct GivenAllRule<'rules, T: Factoid> {
    pub items: Vec<Exp<T>>,
    pub closure: Box<dyn Fn(&mut Solver<'rules>, Vec<T::Concrete>) -> InferenceResult + 'rules>,
}

impl<'rules, T: Output + Factoid> GivenAllRule<'rules, T> {
    /// Creates a new GivenRule instance.
    pub fn new<F>(items: Vec<Exp<T>>, closure: F) -> GivenAllRule<'rules, T>
    where
        F: Fn(&mut Solver<'rules>, Vec<T::Concrete>) -> InferenceResult + 'rules,
    {
        let closure = Box::new(closure);

        GivenAllRule { items, closure }
    }
}

impl<'rules, T: Output + Factoid> Rule<'rules> for GivenAllRule<'rules, T> {
    /// Tries to apply the rule to a given context.
    fn apply(
        &self,
        context: &mut Context,
    ) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)> {
        let values: Vec<T> =
            self.items.iter().map(|it| it.get(context)).collect::<TractResult<Vec<T>>>()?;
        let concrete: Vec<_> = values.iter().filter_map(|it| it.concretize()).collect();

        if concrete.len() == self.items.len() {
            trace!("    Given all rule: {:?} is {:?}", self.items, values);
            // We create a new solver instance, which will be populated with
            // new rules by the code inside the closure.
            let mut solver = Solver::default();
            (self.closure)(&mut solver, concrete)?;
            Ok((true, solver.take_rules()))
        } else {
            Ok((false, vec![]))
        }
    }

    /// Returns the paths that the rule depends on.
    fn get_paths(&self) -> Vec<&Path> {
        self.items.iter().flat_map(|it| it.get_paths()).collect()
    }
}

impl<T: Output + Factoid> fmt::Debug for GivenAllRule<'_, T> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "GivenAllRule {:?}", self.items)
    }
}

/// A declarative constraint solver for tensors.
#[derive(Default)]
pub struct Solver<'rules> {
    // The rules used by the solver.
    pub rules: Vec<Box<dyn Rule<'rules> + 'rules>>,
}

impl<'rules> Solver<'rules> {
    /// Consumes the solver and returns the rules that it uses.
    pub fn take_rules(self) -> Vec<Box<dyn Rule<'rules> + 'rules>> {
        self.rules
    }

    /// Runs the solver on a set of InferenceFacts.
    ///
    /// This method returns:
    /// - Err(_) if a constraint couldn't be satisfied.
    /// - Ok(None) if no more information about tensors could be deduced.
    /// - Ok(Some(facts)) otherwise, with `facts` the new InferenceFacts.
    pub fn infer_facts(
        self,
        facts: (TVec<&InferenceFact>, TVec<&InferenceFact>),
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>)> {
        let mut context = Context::new(
            facts.0.into_iter().cloned().collect(),
            facts.1.into_iter().cloned().collect(),
        );

        // Apply the rules until reaching a fixed point.
        let mut changed = true;
        let mut added_rules = vec![];
        let mut rules: Vec<_> = self.rules.into_iter().map(|r| (false, r)).collect();

        while changed {
            changed = false;

            for (used, rule) in &mut rules {
                // Don't try to apply rules which have already been used.
                if *used {
                    continue;
                }

                trace!("  Applying rule {rule:?}");
                let (step_used, mut step_added) =
                    rule.apply(&mut context).with_context(|| format!("Applying rule {rule:?}"))?;
                *used |= step_used;

                // There is a change if the rule was used, or if it added new rules.
                changed |= step_used;
                changed |= step_added.len() > 0;

                added_rules.append(&mut step_added);
            }

            trace!("  Applying all rules");

            for rule in added_rules.drain(..) {
                rules.push((false, rule));
            }
        }

        trace!("  Solver exiting {context:?}");
        Ok((context.inputs, context.outputs))
    }

    /// Ensures that two expressions are equal.
    ///
    /// For instance, one could write:
    /// ```text
    /// solver.equals(outputs[0].rank, inputs[1].shape[0]);
    /// solver.equals(outputs[1].rank, 3);
    /// ```
    pub fn equals<T, A, B>(&mut self, left: A, right: B) -> InferenceResult
    where
        T: Output + Factoid + 'static,
        A: IntoExp<T>,
        B: IntoExp<T>,
    {
        let items: Vec<Exp<T>> = vec![left.bex(), right.bex()];

        let rule = EqualsRule::new(items);
        self.rules.push(Box::new(rule));
        Ok(())
    }

    /// Ensures that several expressions are equal.
    ///
    /// For instance, one could write:
    /// ```text
    /// solver.equals_all(vec![
    ///     outputs[0].rank.into(),
    ///     inputs[1].shape[0].into(),
    ///     3.into(),
    /// ]);
    /// ```
    pub fn equals_all<T>(&mut self, items: Vec<Exp<T>>) -> InferenceResult
    where
        T: Output + Factoid + 'static,
    {
        let rule = EqualsRule::new(items);
        self.rules.push(Box::new(rule));
        Ok(())
    }

    /// Ensures that the sum of several expressions equals zero.
    ///
    /// For instance, one could write:
    /// ```text
    /// solver.equals_zero(vec![
    ///     outputs[0].rank.into(),
    ///     outputs[1].rank.into(),
    ///     (-1, inputs[1].shape[0]).into(),
    /// ]);
    /// ```
    pub fn equals_zero<F>(&mut self, items: Exp<F>) -> InferenceResult
    where
        F: Factoid
            + Zero
            + Add<F, Output = F>
            + Neg<Output = F>
            + Clone
            + ::std::fmt::Debug
            + Output
            + 'rules,
    {
        let rule = EqualsZeroRule(items);
        self.rules.push(Box::new(rule));
        Ok(())
    }

    /// Adds rules to the solver with a partial value.
    ///
    /// For instance, one could write:
    /// ```text
    /// solver.given(input.rank, |solver, ir|
    ///     (0..ir).map(|i| solver.equals(input.shape[ir], 0))
    /// );
    /// ```
    pub fn with<T, A, F>(&mut self, item: A, closure: F) -> InferenceResult
    where
        T: Factoid + Output + 'static,
        A: IntoExp<T>,
        F: Fn(&mut Solver<'rules>, T) -> InferenceResult + 'rules,
    {
        let rule = WithRule::new(item.bex(), closure);
        self.rules.push(Box::new(rule));
        Ok(())
    }

    /// Adds rules to the solver once the value of an expression is known.
    ///
    /// For instance, one could write:
    /// ```text
    /// solver.given(input.rank, |solver, ir|
    ///     (0..ir).map(|i| solver.equals(input.shape[ir], 0))
    /// );
    /// ```
    pub fn given<T, A, F>(&mut self, item: A, closure: F) -> InferenceResult
    where
        T: Factoid + Output + 'static,
        A: IntoExp<T>,
        F: Fn(&mut Solver<'rules>, T::Concrete) -> InferenceResult + 'rules,
    {
        let rule = GivenRule::new(item.bex(), closure);
        self.rules.push(Box::new(rule));
        Ok(())
    }

    /// Adds rules to the solver once the value of all expressions are known.
    ///
    /// For instance, one could write:
    /// ```text
    /// solver.given(input.rank, |solver, ir|
    ///     (0..ir).map(|i| solver.equals(input.shape[ir], 0))
    /// );
    /// ```
    pub fn given_all<T, I, A, F>(&mut self, items: I, closure: F) -> InferenceResult
    where
        T: Factoid + Output + 'static,
        A: IntoExp<T>,
        I: IntoIterator<Item = A>,
        F: Fn(&mut Solver<'rules>, Vec<T::Concrete>) -> InferenceResult + 'rules,
    {
        let rule = GivenAllRule::new(items.into_iter().map(|it| it.bex()).collect(), closure);
        self.rules.push(Box::new(rule));
        Ok(())
    }
}

macro_rules! given_tuple {
    ($Name:ident, $name:ident, $($id:ident),*) => {
        #[allow(non_camel_case_types)]
        pub struct $Name<'rules, $($id: Factoid),*> {
            $(pub $id: Exp<$id>,)*
            pub closure: Box<dyn Fn(&mut Solver<'rules>, $($id::Concrete,)*) -> InferenceResult + 'rules>,
        }

        #[allow(non_camel_case_types)]
        impl<'rules, $($id: Factoid + Output,)*> $Name<'rules, $($id,)*> {
            pub fn new<F>($($id: Exp<$id>,)* closure: F) -> $Name<'rules, $($id,)*>
            where
                F: Fn(&mut Solver<'rules>, $($id::Concrete,)*) -> InferenceResult + 'rules,
            {
                $Name { $($id,)*
                    closure: Box::new(closure),
                }
            }
        }

        #[allow(non_camel_case_types)]
        impl<'rules, $($id: Factoid + Output,)*> Rule<'rules> for $Name<'rules, $($id,)*> {
            /// Tries to apply the rule to a given context.
            fn apply(&self, context: &mut Context) -> TractResult<(bool, Vec<Box<dyn Rule<'rules> + 'rules>>)> {
                $(
                let $id = if let Some(it) = self.$id.get(context)?.concretize() {
                    it
                } else {
                    return Ok((false, vec![]));
                };
                )*

                let mut solver = Solver::default();
                (self.closure)(&mut solver, $($id,)*)?;
                Ok((true, solver.take_rules()))
            }

            /// Returns the paths that the rule depends on.
            fn get_paths(&self) -> Vec<&Path> {
                let mut v = vec!();
                $(v.extend(self.$id.get_paths());)*
                v
            }
        }

        #[allow(non_camel_case_types)]
        impl<'s, $($id: Factoid + Output,)*> fmt::Debug for $Name<'s, $($id,)*> {
            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                write!(f, "GivenRule {{ {:?} }}", ($(&self.$id),*))
            }
        }

    }
}

given_tuple!(Given2Rule, given_2, a, b);
impl<'rules> Solver<'rules> {
    pub fn given_2<T1, T2, A1, A2, F>(
        &mut self,
        item_1: A1,
        item_2: A2,
        closure: F,
    ) -> InferenceResult
    where
        A1: IntoExp<T1>,
        T1: Factoid + Output + 'static,
        A2: IntoExp<T2>,
        T2: Factoid + Output + 'static,
        F: Fn(&mut Solver<'rules>, T1::Concrete, T2::Concrete) -> InferenceResult + 'rules,
    {
        let rule = Given2Rule::new(item_1.bex(), item_2.bex(), closure);
        self.rules.push(Box::new(rule));
        Ok(())
    }
}

given_tuple!(Given3Rule, given_3, a, b, c);
impl<'rules> Solver<'rules> {
    pub fn given_3<T1, T2, T3, A1, A2, A3, F>(
        &mut self,
        item_1: A1,
        item_2: A2,
        item_3: A3,
        closure: F,
    ) -> InferenceResult
    where
        A1: IntoExp<T1>,
        T1: Factoid + Output + 'static,
        A2: IntoExp<T2>,
        T2: Factoid + Output + 'static,
        A3: IntoExp<T3>,
        T3: Factoid + Output + 'static,
        F: Fn(&mut Solver<'rules>, T1::Concrete, T2::Concrete, T3::Concrete) -> InferenceResult
            + 'rules,
    {
        let rule = Given3Rule::new(item_1.bex(), item_2.bex(), item_3.bex(), closure);
        self.rules.push(Box::new(rule));
        Ok(())
    }
}

given_tuple!(Given4Rule, given_4, a, b, c, d);
impl<'rules> Solver<'rules> {
    pub fn given_4<T1, T2, T3, T4, A1, A2, A3, A4, F>(
        &mut self,
        item_1: A1,
        item_2: A2,
        item_3: A3,
        item_4: A4,
        closure: F,
    ) -> InferenceResult
    where
        A1: IntoExp<T1>,
        T1: Factoid + Output + 'static,
        A2: IntoExp<T2>,
        T2: Factoid + Output + 'static,
        A3: IntoExp<T3>,
        T3: Factoid + Output + 'static,
        A4: IntoExp<T4>,
        T4: Factoid + Output + 'static,
        F: Fn(
                &mut Solver<'rules>,
                T1::Concrete,
                T2::Concrete,
                T3::Concrete,
                T4::Concrete,
            ) -> InferenceResult
            + 'rules,
    {
        let rule = Given4Rule::new(item_1.bex(), item_2.bex(), item_3.bex(), item_4.bex(), closure);
        self.rules.push(Box::new(rule));
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn bootstrap<'s>() -> (Solver<'s>, TVec<TensorProxy>, TVec<TensorProxy>) {
        (
            Solver::default(),
            tvec!(TensorProxy::new(tvec![0, 0].into())),
            tvec!(TensorProxy::new(tvec![1, 0].into())),
        )
    }

    #[test]
    #[should_panic]
    fn solver_wrong_size_1() {
        let (mut solver, inputs, _) = bootstrap();
        solver.equals(&inputs[0].rank, 2).unwrap();
        solver.infer_facts((tvec![], tvec![])).unwrap();
    }

    #[test]
    fn solver_exact_size() {
        let (solver, _, _) = bootstrap();
        let any = InferenceFact::new();

        let facts = solver.infer_facts((tvec![&any], tvec![])).unwrap();
        assert_eq!(facts, (tvec![InferenceFact::new()], tvec![]));
    }

    #[test]
    fn solver_exact_rank() {
        let (mut solver, inputs, _) = bootstrap();
        solver.equals(&inputs[0].rank, 2).unwrap();

        let any = InferenceFact::new();
        let facts = solver.infer_facts((tvec![&any], tvec![])).unwrap();
        let expected =
            (tvec![InferenceFact { shape: shapefactoid![_, _], ..InferenceFact::new() }], tvec![]);

        assert_eq!(facts, expected);
    }

    #[test]
    fn solver_dynamic_rank() {
        let (mut solver, inputs, _) = bootstrap();
        solver.equals(&inputs[0].shape[1], 0.to_dim()).unwrap();

        let any = InferenceFact::new();
        let facts = solver.infer_facts((tvec![&any], tvec![])).unwrap();
        let expected = (
            tvec![InferenceFact { shape: shapefactoid![_, 0; ..], ..InferenceFact::new() }],
            tvec![],
        );

        assert_eq!(facts, expected);
    }

    #[test]
    fn solver_ranks() {
        let (mut solver, inputs, _) = bootstrap();
        solver.equals(&inputs[0].rank, 3).unwrap();
        solver.equals(&inputs[0].shape[0], &inputs[0].shape[1]).unwrap();
        solver.equals(&inputs[0].shape[1], &inputs[0].shape[2]).unwrap();
        solver.equals(&inputs[0].shape[1], 3.to_dim()).unwrap();

        let any = InferenceFact::new();
        let facts = solver.infer_facts((tvec![&any], tvec![])).unwrap();
        let expected = (
            tvec![InferenceFact { shape: shapefactoid![3, 3, 3], ..InferenceFact::new() }],
            tvec![],
        );

        assert_eq!(facts, expected);
    }

    #[test]
    #[should_panic]
    fn solver_wrong_constant() {
        let (mut solver, _, _) = bootstrap();
        solver.equals(1, 2).unwrap();
        solver.infer_facts((tvec![], tvec![])).unwrap();
    }

    #[test]
    fn solver_right_constant() {
        let (mut solver, _, _) = bootstrap();
        solver.equals(2, 2).unwrap();
        solver.infer_facts((tvec![], tvec![])).unwrap();
    }

    #[test]
    fn solver_backward_1() {
        let (mut solver, inputs, outputs) = bootstrap();
        solver.equals(&inputs[0].shape[1], &outputs[0].shape[1]).unwrap();

        let any = InferenceFact::new();
        let facts = solver.infer_facts((tvec![&any], tvec![&any])).unwrap();
        let expected = (
            tvec![InferenceFact::shape(shapefactoid![_,_;..])],
            tvec![InferenceFact::shape(shapefactoid![_,_;..])],
        );
        assert_eq!(facts, expected);
    }

    #[test]
    fn solver_backward_2() {
        let (mut solver, inputs, outputs) = bootstrap();
        solver.equals(&inputs[0].shape[1], &outputs[0].shape[1]).unwrap();

        let output = InferenceFact { shape: shapefactoid![_, 2, _], ..InferenceFact::new() };
        let any = InferenceFact::new();
        let facts = solver.infer_facts((tvec![&any], tvec![&output])).unwrap();
        let expected = (
            tvec![InferenceFact { shape: shapefactoid![_, 2; ..], ..InferenceFact::new() }],
            tvec![output],
        );

        assert_eq!(facts, expected);
    }
}


================================================
FILE: hir/src/lib.rs
================================================
#![allow(clippy::len_zero)]
#![allow(clippy::collapsible_if)]
#[macro_use]
extern crate derive_new;
#[macro_use]
extern crate log;

#[macro_use]
pub mod macros;
pub mod framework;

pub mod infer;

pub extern crate tract_core;

pub use tract_core::prelude::tract_ndarray;
pub use tract_core::prelude::tract_num_traits;

pub mod ops;

pub mod prelude {
    pub use crate::infer::InferenceFact;
    pub use crate::infer::InferenceModel;
    pub use crate::infer::InferenceModelExt;
    pub use crate::infer::InferenceSimplePlan;
    pub use tract_core::prelude::*;
}

pub mod internal {
    pub use super::prelude::*;
    pub use crate::infer::*;
    pub use crate::ops::binary::BinIntoHir;
    pub use crate::ops::element_wise::ElementWiseIntoHir;
    pub use crate::ops::expandable::{Expansion, expand, inference_wrap};
    pub use tract_core;
    pub use tract_core::internal::*;
    pub use {shapefactoid, to_typed};
}

#[cfg(test)]
#[allow(dead_code)]
fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}


================================================
FILE: hir/src/macros.rs
================================================
#[macro_export]
macro_rules! to_typed {
    () => {
        fn to_typed(
            &self,
            _source: &$crate::infer::InferenceModel,
            node: &$crate::infer::InferenceNode,
            target: &mut TypedModel,
            mapping: &std::collections::HashMap<OutletId, OutletId>,
        ) -> TractResult<TVec<OutletId>> {
            let inputs = node.inputs.iter().map(|m| mapping[m]).collect::<TVec<_>>();
            target.wire_node(&*node.name, self.clone(), &*inputs)
        }
    };
}

/// Constructs a type fact.
#[macro_export]
macro_rules! typefact {
    (_) => {
        $crate::infer::TypeFactoid::default()
    };
    ($arg:expr) => {{
        let fact: $crate::infer::TypeFactoid = $crate::infer::GenericFactoid::Only($arg);
        fact
    }};
}

/// Constructs a shape fact.
#[macro_export]
macro_rules! shapefactoid {
    () =>
        ($crate::infer::ShapeFactoid::closed(tvec![]));
    (..) =>
        ($crate::infer::ShapeFactoid::open(tvec![]));
    ($($arg:tt),+; ..) =>
        ($crate::infer::ShapeFactoid::open(tvec![$($crate::dimfact!($arg)),+]));
    ($($arg:tt),+) =>
        ($crate::infer::ShapeFactoid::closed(tvec![$($crate::dimfact!($arg)),+]));
}

/// Constructs a dimension fact.
#[macro_export]
macro_rules! dimfact {
    (_) => {
        $crate::infer::DimFact::default()
    };
    (S) => {
        $crate::infer::GenericFactoid::Only(tract_pulse::internal::stream_dim())
    };
    ($arg:expr) => {
        $crate::infer::GenericFactoid::Only($arg.to_dim())
    };
}

/// Constructs an value fact.
#[macro_export]
macro_rules! valuefact {
    (_) => {
        $crate::infer::ValueFact::default()
    };
    ($arg:expr) => {{
        let fact: $crate::infer::ValueFact = $crate::infer::GenericFactoid::Only($arg);
        fact
    }};
}


================================================
FILE: hir/src/ops/activations.rs
================================================
use crate::internal::*;
use tract_core::ops::binary::TypedBinOp;
use tract_core::ops::logic::{comp_gt, comp_lt};
use tract_core::ops::math::*;

macro_rules! activation {
    ($op: ident, $wire:expr) => {
        impl Expansion for $op {
            fn name(&self) -> StaticName {
                stringify!($op).into()
            }

            fn rules<'r, 'p: 'r, 's: 'r>(
                &'s self,
                s: &mut Solver<'r>,
                inputs: &'p [TensorProxy],
                outputs: &'p [TensorProxy],
            ) -> InferenceResult {
                simple_unary_rules(s, inputs, outputs)
            }

            fn wire(
                &self,
                name: &str,
                model: &mut TypedModel,
                inputs: &[OutletId],
            ) -> TractResult<TVec<OutletId>> {
                let wire: fn(
                    &$op,
                    &str,
                    &mut TypedModel,
                    &[OutletId],
                ) -> TractResult<TVec<OutletId>> = $wire;
                (wire)(self, name, model, inputs)
            }
        }
    };
}

macro_rules! cst {
    ($model: expr, $inputs: expr, $name: expr, $id:ident, $value: expr) => {
        let $id = broadcast_scalar($value, $model, $inputs)?;
        let $id = $model.add_const($name.to_string() + "." + stringify!($id), $id)?;
    };
}

#[derive(Debug, Clone, new)]
pub struct Clip(Option<f32>, Option<f32>);

activation!(Clip, |op, name: &str, model: &mut TypedModel, inputs| {
    let mut wire: TVec<OutletId> = inputs.into();
    if let Some(low) = op.0 {
        let low = broadcast_scalar(low, model, inputs)?;
        let low = model.add_const(name.to_string() + ".low.cst", low)?;
        wire = model.wire_node(name.to_string() + ".low", max(), &[wire[0], low])?;
    }
    if let Some(high) = op.1 {
        let high = broadcast_scalar(high, model, inputs)?;
        let high = model.add_const(name.to_string() + ".high.cst", high)?;
        wire = model.wire_node(name.to_string() + ".high", min(), &[wire[0], high])?;
    }
    Ok(wire)
});

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Softplus;

activation!(Softplus, |_op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, one, 1.0);
    let wire = model.wire_node(name.to_string() + ".exp", exp(), inputs)?;
    let wire = model.wire_node(name.to_string() + ".plus_one", add(), &[wire[0], one])?;
    let wire = model.wire_node(name.to_string() + ".ln", ln(), &wire)?;
    Ok(wire)
});

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Softsign;

activation!(Softsign, |_op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, one, 1.0);
    let x_abs = model.wire_node(name.to_string() + ".abs", abs(), inputs)?;
    let denum = model.wire_node(name.to_string() + ".plus_one", add(), &[x_abs[0], one])?;
    let wire = model.wire_node(name.to_string() + ".div", div(), &[inputs[0], denum[0]])?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct Celu(pub f32);

activation!(Celu, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, zero, 0.0);
    cst!(model, inputs, name, one, 1.0);
    cst!(model, inputs, name, alpha, op.0);
    let x_over_alpha =
        model.wire_node(name.to_string() + ".x_over_alpha", div(), &[inputs[0], alpha])?;
    let x_over_alpha_exp = model.wire_node(name.to_string() + ".exp", exp(), &[x_over_alpha[0]])?;
    let minus_one =
        model.wire_node(name.to_string() + ".minus_one", sub(), &[x_over_alpha_exp[0], one])?;
    let wire = model.wire_node(name.to_string() + ".sat-zero", min(), &[zero, minus_one[0]])?;
    let relu = model.wire_node(name.to_string() + ".relu", max(), &[zero, inputs[0]])?;
    let wire = model.wire_node(name.to_string(), add(), &[relu[0], wire[0]])?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct Elu(pub f32);

activation!(Elu, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, zero, 0.0);
    cst!(model, inputs, name, one, 1.0);
    cst!(model, inputs, name, alpha, op.0);
    let x_exp = model.wire_node(name.to_string() + ".exp", exp(), inputs)?;
    let minus_one = model.wire_node(name.to_string() + ".minus_one", sub(), &[x_exp[0], one])?;
    let neg = model.wire_node(name.to_string() + ".mul_alpha", mul(), &[alpha, minus_one[0]])?;
    let test = model.wire_node(
        name.to_string() + ".test",
        TypedBinOp(comp_lt(), None),
        &[zero, inputs[0]],
    )?;
    let wire = model.wire_node(
        name.to_string() + ".iff",
        tract_core::ops::logic::Iff,
        &[test[0], inputs[0], neg[0]],
    )?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct HardSigmoid(pub f32, pub f32);

activation!(HardSigmoid, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, zero, 0.0);
    cst!(model, inputs, name, one, 1.0);
    cst!(model, inputs, name, alpha, op.0);
    cst!(model, inputs, name, beta, op.1);
    let wire = model.wire_node(name.to_string() + ".mul_alpha", mul(), &[alpha, inputs[0]])?;
    let wire = model.wire_node(name.to_string() + ".add_beta", add(), &[beta, wire[0]])?;
    let wire = model.wire_node(name.to_string() + ".sat-one", min(), &[one, wire[0]])?;
    let wire = model.wire_node(name.to_string() + ".sat-zero", max(), &[zero, wire[0]])?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct LeakyRelu(pub f32);

activation!(LeakyRelu, |op, name: &str, model: &mut TypedModel, inputs| {
    model.wire_node(name, tract_core::ops::nn::leaky_relu(op.0), inputs)
});

#[derive(Debug, Clone, new)]
pub struct ParametricSoftplus(pub f32, pub f32);

activation!(ParametricSoftplus, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, one, 1.0);
    cst!(model, inputs, name, alpha, op.0);
    cst!(model, inputs, name, beta, op.1);
    let wire = model.wire_node(name.to_string() + ".mul_beta", mul(), &[beta, inputs[0]])?;
    let wire = model.wire_node(name.to_string() + ".exp", exp(), &wire)?;
    let wire = model.wire_node(name.to_string() + ".plus_one", add(), &[one, wire[0]])?;
    let wire = model.wire_node(name.to_string() + ".ln", ln(), &wire)?;
    let wire = model.wire_node(name.to_string() + ".mul_alpha", mul(), &[alpha, wire[0]])?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct ScaledTanh(pub f32, pub f32);

activation!(ScaledTanh, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, alpha, op.0);
    cst!(model, inputs, name, beta, op.1);
    let wire = model.wire_node(name.to_string() + ".mul_beta", mul(), &[beta, inputs[0]])?;
    let wire = model.wire_node(name.to_string() + ".tanh", tanh(), &wire)?;
    let wire = model.wire_node(name.to_string() + ".mul_alpha", mul(), &[alpha, wire[0]])?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct Selu(pub f32, pub f32);

activation!(Selu, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, zero, 0.0);
    cst!(model, inputs, name, alpha, op.0);
    cst!(model, inputs, name, gamma, op.1);
    let wire = model.wire_node(name.to_string() + ".exp", exp(), inputs)?;
    let wire = model.wire_node(name.to_string() + ".mul_alpha", mul(), &[wire[0], alpha])?;
    let wire = model.wire_node(name.to_string() + ".sub_alpha", sub(), &[wire[0], alpha])?;
    let test = model.wire_node(
        name.to_string() + ".test",
        TypedBinOp(comp_lt(), None),
        &[zero, inputs[0]],
    )?;
    let wire = model.wire_node(
        name.to_string() + ".iff",
        tract_core::ops::logic::Iff,
        &[test[0], inputs[0], wire[0]],
    )?;
    let wire = model.wire_node(name.to_string() + ".mul_gamma", mul(), &[gamma, wire[0]])?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct Shrink(pub f32, pub f32);

activation!(Shrink, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, bias, op.0);
    cst!(model, inputs, name, lambda, op.1);
    cst!(model, inputs, name, minus_lambda, -op.1);
    let zero = broadcast_scalar(0.0, model, inputs)?;
    let zero = model.add_const(name.to_string() + ".zero", zero)?;
    let test_pos = model.wire_node(
        name.to_string() + ".test_pos",
        TypedBinOp(comp_lt(), None),
        &[lambda, inputs[0]],
    )?;
    let pos = model.wire_node(
        name.to_string() + ".pos",
        tract_core::ops::math::sub(),
        &[inputs[0], bias],
    )?;
    let test_neg = model.wire_node(
        name.to_string() + ".test_neg",
        TypedBinOp(comp_gt(), None),
        &[minus_lambda, inputs[0]],
    )?;
    let neg = model.wire_node(
        name.to_string() + ".neg",
        tract_core::ops::math::add(),
        &[bias, inputs[0]],
    )?;
    let wire = model.wire_node(
        name.to_string() + ".if_pos",
        tract_core::ops::logic::Iff,
        &[test_pos[0], pos[0], zero],
    )?;
    let wire = model.wire_node(
        name.to_string() + ".if_neg",
        tract_core::ops::logic::Iff,
        &[test_neg[0], neg[0], wire[0]],
    )?;
    Ok(wire)
});

#[derive(Debug, Clone, new)]
pub struct ThresholdRelu(pub f32);

activation!(ThresholdRelu, |op, name: &str, model: &mut TypedModel, inputs| {
    cst!(model, inputs, name, zero, 0.0);
    cst!(model, inputs, name, alpha, op.0);
    let test = model.wire_node(
        name.to_string() + ".test",
        TypedBinOp(comp_lt(), None),
        &[alpha, inputs[0]],
    )?;
    let wire = model.wire_node(
        name.to_string() + ".iff",
        tract_core::ops::logic::Iff,
        &[test[0], inputs[0], zero],
    )?;
    Ok(wire)
});

fn simple_unary_rules<'r, 'p: 'r, 's: 'r>(
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    check_input_arity(inputs, 1)?;
    check_output_arity(outputs, 1)?;
    s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
    s.equals(&inputs[0].shape, &outputs[0].shape)?;
    Ok(())
}

pub fn broadcast_scalar(
    f: f32,
    model: &TypedModel,
    inputs: &[OutletId],
) -> TractResult<Arc<Tensor>> {
    let fact = model.outlet_fact(inputs[0])?;
    let mut tensor = tensor0(f).cast_to_dt(fact.datum_type)?.into_owned();
    while tensor.rank() < fact.rank() {
        tensor.insert_axis(0)?;
    }
    Ok(tensor.into_arc_tensor())
}


================================================
FILE: hir/src/ops/array/add_dims.rs
================================================
use crate::infer::*;
use crate::internal::*;
use tract_itertools::Itertools;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct AddDims {
    pub axes: Vec<isize>,
}

impl AddDims {
    pub fn output_shape<D: DimLike>(&self, input: &[D]) -> TVec<D> {
        let rank = input.len() as isize;
        let mut shape: TVec<D> = input.iter().cloned().collect();
        let output_rank = rank + self.axes.len() as isize;
        let axes = self
            .axes
            .iter()
            .map(|&axis| if axis < 0 { axis + output_rank } else { axis } as usize)
            .sorted();
        for axis in axes {
            shape.insert(axis, D::one())
        }
        shape
    }
}

impl Expansion for AddDims {
    fn name(&self) -> StaticName {
        "AddDims".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("Axes: {:?}", self.axes)])
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&outputs[0].rank, (&inputs[0].rank).bex() + self.axes.len() as i64)?;
        s.given(&inputs[0].shape, move |s, shape| {
            let output_shape = self.output_shape(&shape);
            s.equals(&outputs[0].shape, output_shape)
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let rank = model.outlet_fact(inputs[0])?.rank() as isize;
        let mut wire: TVec<OutletId> = inputs.into();
        let output_rank = rank + self.axes.len() as isize;
        let axes = self
            .axes
            .iter()
            .map(|&axis| if axis < 0 { axis + output_rank } else { axis } as usize)
            .sorted();
        for axis in axes {
            wire = model.wire_node(format!("{prefix}.axis-{axis}"), AxisOp::Add(axis), &wire)?;
        }
        Ok(wire)
    }
}


================================================
FILE: hir/src/ops/array/array_feature_extractor.rs
================================================
use tract_core::ops::array::Gather;

use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct ArrayFeatureExtractor;

impl Expansion for ArrayFeatureExtractor {
    fn name(&self) -> StaticName {
        "ArrayFeatureExtractor".into()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let last_axis = model.outlet_fact(inputs[0])?.rank() - 1;
        let gather_op = Gather { axis: last_axis, output_type: None };

        model.wire_node(prefix, gather_op, inputs)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        // Expect two inputs:
        // - X: data to be selected
        // - Y: the indices that'll be applied to the last axis
        check_input_arity(inputs, 2)?;

        // We return one tensor containing the selection
        check_output_arity(outputs, 1)?;

        // Check types
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[1].datum_type, i64::datum_type())?;

        // Check ranks
        s.equals(inputs[0].rank.bex() - 1 + inputs[1].rank.bex(), outputs[0].rank.bex())?;

        // Check shapes
        s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, input_shape, indices_shape| {
            let input_rank = input_shape.len();
            let mut output_shape = tvec![];
            output_shape.extend(input_shape.iter().take(input_rank - 1).cloned());
            output_shape.extend(indices_shape.iter().cloned());
            s.equals(&outputs[0].shape, output_shape)?;
            Ok(())
        })?;
        Ok(())
    }
}


================================================
FILE: hir/src/ops/array/broadcast.rs
================================================
use crate::infer::*;
use crate::internal::*;

use tract_core::ops::array::MultiBroadcastTo as Typed;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct MultiBroadcastTo;

impl MultiBroadcastTo {
    fn wire_with_known_target_shape(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
        target_shape: &[TDim],
    ) -> TractResult<TVec<OutletId>> {
        let left_shape = model.outlet_fact(inputs[0])?.shape.to_tvec();
        let dims = tract_core::broadcast::multi_broadcast(&[&*left_shape, target_shape])?;
        let op = Typed::new(dims.into());
        model.wire_node(prefix, op, &[inputs[0]])
    }
}

impl Expansion for MultiBroadcastTo {
    fn name(&self) -> StaticName {
        "MultiBroadcastTo".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&inputs[1].rank, 1)?;
        s.given(&inputs[0].shape, move |s, shape| {
            s.given(&inputs[1].value, move |s, dims| {
                let dims = dims.cast_to::<TDim>()?;
                let dims = tract_core::broadcast::multi_broadcast(&[
                    dims.try_as_plain()?.as_slice::<TDim>()?,
                    &shape,
                ])?;
                s.equals(&outputs[0].shape, ShapeFactoid::from(dims))
            })
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(shape) = model.outlet_fact(inputs[1])?.konst.clone() {
            let shape = shape.cast_to::<TDim>()?;
            self.wire_with_known_target_shape(
                prefix,
                model,
                inputs,
                shape.try_as_plain()?.as_slice()?,
            )
        } else {
            bail!("shape input is variable")
        }
    }

    fn wire_with_inference_model_and_node(
        &self,
        prefix: &str,
        source: &InferenceModel,
        node: &InferenceNode,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(shape) = model.outlet_fact(inputs[1])?.konst.clone() {
            let shape = shape.cast_to::<TDim>()?;
            self.wire_with_known_target_shape(
                prefix,
                model,
                inputs,
                shape.try_as_plain()?.as_slice()?,
            )
        } else if let Some(shape) = source.outlet_fact(node.id.into())?.shape.concretize() {
            let op = Typed::new(shape.into());
            model.wire_node(prefix, op, &[inputs[0]])
        } else {
            bail!("shape input is variable, of variable length (output can not have variable rank)")
        }
    }
}


================================================
FILE: hir/src/ops/array/concat.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::array::TypedConcat;
use tract_core::ops::cast::wire_cast;

/// Concat: high level concat op
#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Concat {
    axis: i64,
}

impl Concat {
    fn resolve_axis(&self, rank: i64) -> TractResult<usize> {
        if 0 <= self.axis && self.axis < rank {
            Ok(self.axis as usize)
        } else if -rank <= self.axis && self.axis < 0 {
            Ok((self.axis + rank) as usize)
        } else {
            bail!("Illegal combination of values for rank and axis: {} and {}", rank, self.axis)
        }
    }
}

impl Expansion for Concat {
    fn name(&self) -> StaticName {
        "InferenceConcat".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        let n = inputs.len();
        s.equals_all((0..n).map(|i| (&inputs[i].rank).bex()).collect())?;
        s.given_all((0..n).map(|i| (&inputs[i].datum_type).bex()), move |s, dts| {
            let super_type: DatumType = DatumType::super_type_for(&dts)
                .with_context(|| format!("No supertype found for {dts:?}"))?;
            s.equals(&outputs[0].datum_type, super_type)
        })?;
        s.given(&inputs[0].rank, move |s, rank| {
            let axis = self.resolve_axis(rank)?;
            s.equals(
                rules::expr::SumExp::new((0..n).map(|i| (&inputs[i].shape[axis]).bex()).collect()),
                &outputs[0].shape[axis],
            )?;
            for axis in 0..axis {
                s.equals(&outputs[0].shape[axis], &inputs[0].shape[axis])?;
                s.equals_all((0..n).map(|i| inputs[i].shape[axis].bex()).collect())?;
            }
            for axis in (axis + 1)..(rank as usize) {
                s.equals(&outputs[0].shape[axis], &inputs[0].shape[axis])?;
                s.equals_all((0..n).map(|i| inputs[i].shape[axis].bex()).collect())?;
            }
            Ok(())
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let facts = inputs
            .iter()
            .map(|i| target.outlet_fact(*i).cloned())
            .collect::<TractResult<TVec<_>>>()?;

        let super_type = if let Some(super_type) =
            DatumType::super_type_for(facts.iter().map(|x| x.datum_type))
        {
            super_type
        } else {
            bail!("Can not type op");
        };

        let axis = self.resolve_axis(facts[0].shape.rank() as i64)?;

        let inputs = wire_cast(prefix, target, inputs, super_type)?;
        let op = TypedConcat::new(axis);
        target.wire_node(prefix, op, &inputs)
    }
}


================================================
FILE: hir/src/ops/array/constant_like.rs
================================================
use crate::internal::*;
use tract_ndarray::*;
use tract_num_traits::{AsPrimitive, One, Zero};

#[derive(Debug, Clone, new, Default)]
pub struct ConstantLike {
    value: f32,
}

impl PartialEq for ConstantLike {
    fn eq(&self, other: &Self) -> bool {
        self.value.to_bits() == other.value.to_bits()
    }
}
impl Eq for ConstantLike {}

impl Op for ConstantLike {
    fn name(&self) -> StaticName {
        "ConstantLike".into()
    }

    op_as_typed_op!();
}

impl EvalOp for ConstantLike {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        Ok(tvec!(tensor0(self.value).broadcast_scalar_to_shape(input.shape())?.into_tvalue()))
    }
}

impl InferenceRulesOp for ConstantLike {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.given_2(&inputs[0].shape, &inputs[0].datum_type, move |s, shape, dt| {
            if shape.iter().all(|d| d.to_usize().is_ok()) {
                let shape: Vec<usize> = shape.iter().map(|d| d.to_usize().unwrap()).collect();
                let value = tensor0(self.value)
                    .cast_to_dt(dt)?
                    .broadcast_scalar_to_shape(&shape)?
                    .into_arc_tensor();
                s.equals(&outputs[0].value, value)?;
            }
            Ok(())
        })
    }

    as_op!();
    to_typed!();
}

impl TypedOp for ConstantLike {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct EyeLike {
    dt: Option<DatumType>,
    k: isize,
}

impl EyeLike {
    pub fn make<T>(&self, (r, c): (usize, usize)) -> TractResult<TValue>
    where
        T: Copy + Datum + One + Zero,
        f32: AsPrimitive<T>,
    {
        let mut array = Array2::<T>::zeros((r, c));
        for y in 0..r {
            let x = y as isize + self.k;
            if x >= 0 && x < c as isize {
                array[(y, x as usize)] = T::one()
            }
        }
        Ok(array.into_dyn().into_tvalue())
    }
}

impl Op for EyeLike {
    fn name(&self) -> StaticName {
        "EyeLike".into()
    }

    op_as_typed_op!();
}

impl EvalOp for EyeLike {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let dt = self.dt.unwrap_or_else(|| input.datum_type());
        Ok(tvec!(dispatch_numbers!(Self::make(dt)(self, (input.shape()[0], input.shape()[1])))?))
    }
}

impl InferenceRulesOp for EyeLike {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        if let Some(dt) = self.dt {
            s.equals(&outputs[0].datum_type, dt)?;
        } else {
            s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        }
        s.equals(&inputs[0].rank, 2)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.given(&inputs[0].shape, move |s, shape| {
            if let (Ok(r), Ok(c)) = (shape[0].to_usize(), shape[1].to_usize()) {
                let shape = (r, c);
                if let Some(dt) = self.dt {
                    let value = dispatch_numbers!(Self::make(dt)(self, shape))?;
                    s.equals(&outputs[0].value, value.into_arc_tensor())?;
                } else {
                    s.given(&inputs[0].datum_type, move |s, dt| {
                        let value = dispatch_numbers!(Self::make(dt)(self, shape))?;
                        s.equals(&outputs[0].value, value.into_arc_tensor())
                    })?;
                }
            }
            Ok(())
        })
    }

    as_op!();
    to_typed!();
}

impl TypedOp for EyeLike {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(self.dt.unwrap_or(inputs[0].datum_type).fact(inputs[0].shape.iter())))
    }
}


================================================
FILE: hir/src/ops/array/constant_of_shape.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct ConstantOfShape {
    scalar: Arc<Tensor>,
}

impl Expansion for ConstantOfShape {
    fn name(&self) -> StaticName {
        "ConstantOfShape".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, self.scalar.datum_type())?;
        s.equals(&inputs[0].rank, 1)?;
        s.equals(&inputs[0].shape[0], outputs[0].rank.bex().to_dim())?;
        s.given(&inputs[0].value, move |s, shape| {
            let shape = shape.cast_to::<TDim>()?;
            let shape = shape.try_as_plain()?.as_slice::<TDim>()?;
            for (axis, dim) in shape.iter().enumerate() {
                s.equals(&outputs[0].shape[axis], dim)?;
            }
            Ok(())
        })?;
        /* does not work .value assumes ints
        s.given(&outputs[0].rank, move |s, rank| {
            for axis in 0..rank as usize {
                s.equals(&outputs[0].shape[axis], &inputs[0].value[axis].bex())?;
            }
            Ok(())
        })?;
        */
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(shape) = target.outlet_fact(inputs[0])?.konst.clone() {
            let shape = shape.cast_to::<TDim>()?;
            let shape = shape.try_as_plain()?.as_slice::<TDim>()?;
            let scalar = target.add_const(format!("{prefix}.scalar"), self.scalar.clone())?;
            let op = tract_core::ops::array::MultiBroadcastTo::new(shape.into());
            return target.wire_node(prefix, op, &[scalar]);
        }
        bail!("shape input is variable")
    }
}


================================================
FILE: hir/src/ops/array/crop.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Crop {
    pub axis: usize,
    pub start: usize,
    pub end: usize,
}

impl Expansion for Crop {
    fn name(&self) -> StaticName {
        "Crop".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.given(&inputs[0].rank, move |s, rank| {
            (0..rank as usize).try_for_each(|ax| {
                if self.axis == ax {
                    s.equals(
                        &inputs[0].shape[ax],
                        outputs[0].shape[ax].bex() + self.start.to_dim() + self.end.to_dim(),
                    )
                } else {
                    s.equals(&inputs[0].shape[ax], &outputs[0].shape[ax])
                }
            })
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let len = target.outlet_fact(inputs[0])?.shape[self.axis].clone();
        target.wire_node(
            prefix,
            crate::ops::array::Slice::new(self.axis, self.start.to_dim(), len - self.end.to_dim()),
            inputs,
        )
    }
}


================================================
FILE: hir/src/ops/array/dyn_slice.rs
================================================
use crate::internal::*;
use tract_core::ops::array::DynSlice;

impl InferenceRulesOp for DynSlice {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut crate::infer::Solver<'r>,
        inputs: &'p [crate::infer::TensorProxy],
        outputs: &'p [crate::infer::TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[1].rank, 0)?;
        s.equals(&inputs[2].rank, 0)?;
        s.given(&inputs[0].rank, move |s, rank| {
            for axis in 0..rank as usize {
                if axis == self.axis {
                    s.equals(&outputs[0].shape[axis], self.len.clone())?;
                } else {
                    s.equals(&outputs[0].shape[axis], &inputs[0].shape[axis])?;
                }
            }
            Ok(())
        })?;
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/array/flatten.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Flatten {
    pub axis: i64,
}

impl Flatten {
    pub fn compute_shape<D: DimLike>(&self, shape: &[D]) -> TractResult<[D; 2]> {
        if shape.iter().filter(|d| d.to_usize().is_err()).count() > 1 {
            bail!("Can not compute a shape with square of symbols")
        }
        let axis = if self.axis >= 0 { self.axis } else { self.axis + shape.len() as i64 } as usize;
        Ok([shape[..axis].iter().cloned().product::<D>(), shape[axis..].iter().cloned().product()])
    }
}

impl Expansion for Flatten {
    fn name(&self) -> StaticName {
        "Flatten".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.given(&inputs[0].shape, move |s, shape| {
            let [shape_0, shape_1] = self.compute_shape(&shape)?;
            s.equals(&outputs[0].shape, ShapeFactoid::from(vec![shape_0, shape_1]))
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_shape = model.outlet_fact(inputs[0])?.shape.to_tvec();
        let output_shape = self.compute_shape(&input_shape)?;
        let mut wire = tvec!(inputs[0]);
        for (ix, op) in
            tract_core::ops::change_axes::to_axis_ops_with_tf_rules(&input_shape, &output_shape)?
                .into_iter()
                .enumerate()
        {
            wire = model.wire_node(format!("{prefix}.{ix}"), op, &wire)?;
        }
        Ok(wire)
    }
}


================================================
FILE: hir/src/ops/array/gather.rs
================================================
use tract_core::ops::cast::cast;

use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Gather {
    axis: i64,
}

impl Gather {
    pub fn to_type_op(&self, input_rank: usize) -> tract_core::ops::array::Gather {
        let axis = if self.axis < 0 { self.axis + input_rank as i64 } else { self.axis } as usize;
        tract_core::ops::array::Gather::new(axis)
    }
}

impl Expansion for Gather {
    fn name(&self) -> StaticName {
        "Gather".into()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_rank = model.outlet_fact(inputs[0])?.rank();
        let mut inputs: TVec<OutletId> = inputs.into();
        inputs[1] = model.wire_node(
            format!("{prefix}.cast_to_i64"),
            cast(i64::datum_type()),
            &[inputs[1]],
        )?[0];
        model.wire_node(prefix, self.to_type_op(input_rank), &inputs)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(inputs[0].rank.bex() - 1 + inputs[1].rank.bex(), outputs[0].rank.bex())?;
        s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, input_shape, indices_shape| {
            let rank = input_shape.len();
            let output_shape =
                self.to_type_op(rank).compute_output_shape(&input_shape, &indices_shape)?;
            s.equals(&outputs[0].shape, output_shape)?;
            Ok(())
        })?;
        Ok(())
    }
}


================================================
FILE: hir/src/ops/array/gather_elements.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct GatherElements {
    axis: i64,
}

impl Expansion for GatherElements {
    fn name(&self) -> StaticName {
        "GatherElements".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &inputs[1].rank)?;
        s.equals(&outputs[0].shape, &inputs[1].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_rank = model.outlet_fact(inputs[0])?.rank();
        let axis = if self.axis < 0 { self.axis + input_rank as i64 } else { self.axis } as usize;
        model.wire_node(prefix, tract_core::ops::array::GatherElements { axis }, inputs)
    }
}


================================================
FILE: hir/src/ops/array/gather_nd.rs
================================================
use crate::internal::*;
pub use tract_core::ops::array::GatherNd;

impl InferenceRulesOp for GatherNd {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.given(&inputs[1].rank, move |s, indices_rank| {
            let indices_rank = indices_rank as usize;
            for i in 0..(indices_rank - 1) {
                s.equals(&outputs[0].shape[i], &inputs[1].shape[i])?;
            }
            s.given_2(
                &inputs[1].shape[indices_rank - 1],
                &inputs[1].rank,
                move |s, n, input_rank| {
                    if let Ok(n) = n.to_i64() {
                        for i in 0..(input_rank - n) as usize {
                            s.equals(&outputs[0].shape[indices_rank - 1 + i], &inputs[1].shape[i])?;
                        }
                    }
                    Ok(())
                },
            )
        })
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/array/mod.rs
================================================
mod add_dims;
mod array_feature_extractor;
mod broadcast;
mod concat;
mod constant_like;
mod constant_of_shape;
mod crop;
mod dyn_slice;
mod flatten;
mod gather;
mod gather_elements;
mod gather_nd;
mod pad;
pub mod permute_axes;
mod range;
mod reshape;
mod rm_dims;
mod scatter_elements;
mod scatter_nd;
mod shape;
mod size;
mod slice;
mod split;
mod squeeze;
mod strided_slice;
mod tile;

pub use add_dims::AddDims;
pub use array_feature_extractor::ArrayFeatureExtractor;
pub use broadcast::MultiBroadcastTo;
pub use concat::{Concat, TypedConcat};
pub use constant_like::{ConstantLike, EyeLike};
pub use constant_of_shape::ConstantOfShape;
pub use crop::Crop;
pub use flatten::Flatten;
pub use gather::Gather;
pub use gather_elements::GatherElements;
pub use gather_nd::GatherNd;
pub use pad::{Pad, PadMode};
pub use permute_axes::PermuteAxes;
pub use range::Range;
pub use reshape::Reshape;
pub use rm_dims::RmDims;
pub use scatter_elements::ScatterElements;
pub use scatter_nd::{ScatterNd, ScatterReduction};
pub use shape::Shape;
pub use size::Size;
pub use slice::Slice;
pub use split::Split;
pub use squeeze::Squeeze;
pub use tile::Tile;
pub use tract_core::ops::array::StridedSlice;


================================================
FILE: hir/src/ops/array/pad.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::array::{Pad, PadMode};

impl InferenceRulesOp for Pad {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        for (ix, &(a, b)) in self.pads.iter().enumerate() {
            s.equals(&inputs[0].shape[ix], outputs[0].shape[ix].bex() - a.to_dim() - b.to_dim())?;
        }
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/array/permute_axes.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct PermuteAxes {
    pub axes: Option<TVec<usize>>,
}

impl PermuteAxes {
    fn compute_shape<D: DimLike>(&self, input: &[D]) -> TractResult<TVec<D>> {
        if let Some(ref axes) = self.axes {
            if input.len() != axes.len() {
                bail!(
                    "Op expects tensor of rank {}, input is actually of rank {}.",
                    axes.len(),
                    input.len()
                );
            }
            let mut new_shape = tvec![D::zero(); input.len()];
            for (ix, &d) in axes.iter().enumerate() {
                new_shape[ix] = input[d].clone();
            }
            Ok(new_shape)
        } else {
            let mut new_shape: TVec<D> = input.iter().cloned().collect();
            new_shape.reverse();
            Ok(new_shape)
        }
    }
}

impl Expansion for PermuteAxes {
    fn name(&self) -> StaticName {
        "PermuteAxes".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("{:?}", self.axes)])
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        s.given(&inputs[0].shape, move |s, shape| {
            let output_shape = self.compute_shape(&shape)?;
            s.equals(&outputs[0].shape, output_shape)
        })?;
        if let Some(axes) = &self.axes {
            s.equals(&outputs[0].rank, axes.len() as i64)?;
        }
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let fact = target.outlet_fact(inputs[0])?;
        let axes = if let Some(axes) = &self.axes {
            if fact.rank() != axes.len() {
                bail!(
                    "Op expects tensor of rank {}, input is actually of rank {}.",
                    axes.len(),
                    fact.rank()
                );
            }
            axes.clone()
        } else {
            (0..fact.rank()).rev().collect()
        };
        let mut wire: TVec<OutletId> = inputs.into();
        for (ix, op) in perm_to_ops(&axes).into_iter().enumerate() {
            wire = target.wire_node(format!("{}.{}-{}", prefix, op.name(), ix), op, &wire)?;
        }
        Ok(wire)
    }
}


================================================
FILE: hir/src/ops/array/range.rs
================================================
use tract_core::ops::cast::wire_cast;

use crate::internal::*;

#[derive(Debug, Default, Clone, new, Hash, PartialEq, Eq)]
pub struct Range;

impl Expansion for Range {
    fn name(&self) -> StaticName {
        "Range".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.given_3(
            &inputs[0].datum_type,
            &inputs[1].datum_type,
            &inputs[2].datum_type,
            move |s, dt0, dt1, dt2| {
                let dt =
                    DatumType::super_type_for([dt0, dt1, dt2]).context("No supertype found")?;
                if dt.is_tdim() {
                    s.equals(&outputs[0].datum_type, i64::datum_type())
                } else {
                    s.equals(dt, &outputs[0].datum_type)
                }
            },
        )?;
        s.equals(&inputs[0].rank, 0)?;
        s.equals(&inputs[1].rank, 0)?;
        s.equals(&inputs[2].rank, 0)?;
        s.equals(&outputs[0].rank, 1)?;
        s.given_3(&inputs[0].value, &inputs[1].value, &inputs[2].value, move |s, v0, v1, v2| {
            let v0 = v0.cast_to::<TDim>()?;
            let v1 = v1.cast_to::<TDim>()?;
            let v2 = v2.cast_to::<i64>()?;
            let out = (v1.try_as_plain()?.to_scalar::<TDim>()?.clone()
                - v0.try_as_plain()?.to_scalar::<TDim>()?)
            .divceil(*v2.try_as_plain()?.to_scalar::<i64>()? as _);
            s.equals(&outputs[0].shape[0], out)
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let dt: DatumType = DatumType::super_type_for(
            inputs.iter().map(|o| model.outlet_fact(*o).unwrap().datum_type),
        )
        .context("No supertype for inputs")?;
        let inputs = wire_cast(prefix, model, inputs, dt)?;
        let len = model.symbols.new_with_prefix("range");
        model.wire_node(prefix, tract_core::ops::array::Range::new(len.into()), &inputs)
    }
}


================================================
FILE: hir/src/ops/array/reshape.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Reshape {}

impl Expansion for Reshape {
    fn name(&self) -> StaticName {
        "Reshape".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.given_2(&inputs[0].shape, &inputs[1].value, move |s, ishape, shape| {
            let shape = shape.cast_to::<TDim>()?;
            let shape = shape.try_as_plain()?.as_slice::<TDim>()?;
            let oshape = tract_core::ops::change_axes::compute_shape_with_tf_rules(&ishape, shape)
                .with_context(|| format!("Reshaping {ishape:?} to {shape:?}"))?;
            s.equals(&outputs[0].shape, ShapeFactoid::from(oshape))
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(ref shape) = model.outlet_fact(inputs[1])?.konst {
            let input_shape: TVec<TDim> = model.outlet_fact(inputs[0])?.shape.to_tvec();
            let shape = shape.cast_to::<TDim>()?;
            let shape = shape.try_as_plain()?.as_slice::<TDim>()?;
            let mut wire = tvec!(inputs[0]);
            for (ix, op) in to_axis_ops_with_tf_rules(&input_shape, shape)?.into_iter().enumerate()
            {
                wire = model.wire_node(format!("{prefix}.{ix}"), op, &wire)?;
            }
            return Ok(wire);
        }
        bail!("shape input is variable")
    }
}


================================================
FILE: hir/src/ops/array/rm_dims.rs
================================================
use crate::infer::*;
use crate::internal::*;
use tract_itertools::Itertools;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct RmDims {
    pub axes: Vec<isize>,
}

impl RmDims {
    fn compute_shape<D: DimLike>(&self, input: &[D]) -> TVec<D> {
        let axes = self
            .axes
            .iter()
            .map(|&a| if a < 0 { a + input.len() as isize } else { a } as usize)
            .collect::<Vec<_>>();
        input
            .iter()
            .enumerate()
            .filter(|(ix, _d)| !axes.contains(ix))
            .map(|(_ix, d)| d.clone())
            .collect()
    }
}

impl Expansion for RmDims {
    fn name(&self) -> StaticName {
        "RmDims".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&outputs[0].rank, (&inputs[0].rank).bex() - self.axes.len() as i64)?;
        s.given(&inputs[0].rank, move |s, rank| {
            for axis in &self.axes {
                let axis = if *axis < 0 { axis + rank as isize } else { *axis } as usize;
                s.equals(&inputs[0].shape[axis], 1.to_dim())?;
            }
            Ok(())
        })?;
        s.given(&inputs[0].shape, move |s, shape| {
            let output_shape = self.compute_shape(&shape);
            s.equals(&outputs[0].shape, output_shape)
        })
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut wire = inputs[0];
        let rank = target.outlet_fact(inputs[0])?.rank();
        let axes = self
            .axes
            .iter()
            .map(|&a| if a < 0 { a + rank as isize } else { a } as usize)
            .sorted()
            .rev();
        for axis in axes {
            wire = target.wire_node(format!("{prefix}.axis-{axis}"), AxisOp::Rm(axis), &[wire])?[0];
        }
        Ok(tvec!(wire))
    }
}


================================================
FILE: hir/src/ops/array/scatter_elements.rs
================================================
use tract_core::ops::cast::wire_cast;

use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct ScatterElements {
    axis: i64,
    reduction: tract_core::ops::array::ScatterReduction,
}

impl Expansion for ScatterElements {
    fn name(&self) -> StaticName {
        "ScatterElements".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;

        s.given_2(&inputs[0].datum_type, &inputs[2].datum_type, move |s, input, updates| {
            let super_type: DatumType = DatumType::super_type_for([input, updates])
                .with_context(|| format!("No supertype found for {input:?} and {updates:?}"))?;
            s.equals(&outputs[0].datum_type, super_type)
        })?;
        s.equals(&inputs[0].rank, &inputs[1].rank)?;
        s.equals(&inputs[1].shape, &inputs[2].shape)?;
        s.equals(&outputs[0].shape, &inputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_rank = model.outlet_fact(inputs[0])?.rank();
        let axis = if self.axis < 0 { self.axis + input_rank as i64 } else { self.axis } as usize;
        let super_type = if let Some(super_type) = DatumType::super_type_for([
            model.outlet_fact(inputs[0])?.datum_type,
            model.outlet_fact(inputs[2])?.datum_type,
        ]) {
            super_type
        } else {
            bail!("Can not type op");
        };
        let casted = wire_cast(prefix, model, &[inputs[0], inputs[2]], super_type)?;
        model.wire_node(
            prefix,
            tract_core::ops::array::ScatterElements { axis, reduction: self.reduction },
            &[casted[0], inputs[1], casted[1]],
        )
    }
}


================================================
FILE: hir/src/ops/array/scatter_nd.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::array::{ScatterNd, ScatterReduction};

impl InferenceRulesOp for ScatterNd {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&inputs[2].datum_type, &inputs[0].datum_type)?;
        s.equals(&outputs[0].shape, &inputs[0].shape)?;

        s.given_2(&inputs[0].rank, &inputs[1].rank, move |s, p, q| {
            s.given(&inputs[1].shape[q as usize - 1], move |s, r| {
                if let Ok(r) = r.to_i64() {
                    s.equals(&inputs[2].rank, p + q - r - 1)?;
                }
                Ok(())
            })
        })?;
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/array/shape.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Shape {
    pub dt: DatumType,
}

impl Expansion for Shape {
    fn name(&self) -> StaticName {
        "Shape".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].rank, 1)?;
        s.equals(&outputs[0].shape[0], inputs[0].rank.bex().to_dim())?;
        s.equals(&outputs[0].datum_type, self.dt.bex())?;
        s.given(&inputs[0].shape, move |s, shape| {
            let shape = tensor1(&shape);
            if let Ok(shape) = shape.cast_to_dt(self.dt) {
                s.equals(&outputs[0].value, shape.into_owned().into_arc_tensor())?;
            }
            Ok(())
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let shape = tensor1(&model.outlet_fact(inputs[0])?.shape.to_tvec());
        let wire = model.add_const(format!("{prefix}.const"), shape)?;
        model.wire_node(prefix, tract_core::ops::cast::cast(self.dt), &[wire])
    }
}


================================================
FILE: hir/src/ops/array/size.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Size {
    pub dt: DatumType,
}

impl Expansion for Size {
    fn name(&self) -> StaticName {
        "Size".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, self.dt)?;
        s.equals(&outputs[0].rank, 0)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut size = tensor0(model.outlet_fact(inputs[0])?.shape.iter().product::<TDim>());
        if let Ok(s) = size.cast_to_dt(self.dt) {
            size = s.into_owned();
        }
        let wire = model.add_const(prefix, size)?;
        Ok(tvec!(wire))
    }
}


================================================
FILE: hir/src/ops/array/slice.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::array::Slice;

impl InferenceRulesOp for Slice {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.given(&inputs[0].rank, move |s, rank| {
            (0..(rank as usize)).try_for_each(move |axis| {
                if self.axis == axis {
                    s.equals(&outputs[0].shape[axis], (self.end.clone() - &self.start).to_dim())
                } else {
                    s.equals(&outputs[0].shape[axis], &inputs[0].shape[axis])
                }
            })
        })?;
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/array/split.rs
================================================
use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Split {
    axis: isize,
    outputs: usize,
    split: Option<Vec<usize>>,
}

impl Split {
    fn split_dims<D: DimLike>(&self, input: &D) -> TractResult<TVec<D>> {
        if let Some(split) = self.split.as_ref() {
            Ok(split.iter().map(|&d| D::from(d)).collect())
        } else {
            let bigs = input.clone().divceil(self.outputs);
            let last = input.clone() - (bigs.clone() * (self.outputs - 1));
            let mut splits = tvec!(bigs ; self.outputs - 1);
            splits.push(last);
            Ok(splits)
        }
    }
}

impl Expansion for Split {
    fn name(&self) -> StaticName {
        "Split".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, self.outputs)?;
        (0..self.outputs).try_for_each(|i| {
            s.equals(&inputs[0].datum_type, &outputs[i].datum_type)?;
            s.equals(&inputs[0].rank, &outputs[i].rank)
        })?;
        s.given(&inputs[0].shape, move |s, shape| {
            let axis =
                if self.axis < 0 { self.axis + shape.len() as isize } else { self.axis } as usize;
            let dims = self.split_dims(&shape[axis])?;
            for i in 0..self.outputs {
                let mut shape = shape.clone();
                shape[axis] = dims[i].clone();
                s.equals(&outputs[i].shape, shape)?;
            }
            Ok(())
        })?;
        Ok(())
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(self.outputs)
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = target.outlet_fact(inputs[0])?.clone();
        let mut outputs = tvec!();
        let mut current = 0.to_dim();
        let axis =
            if self.axis < 0 { self.axis + input.rank() as isize } else { self.axis } as usize;
        for (ix, len) in self.split_dims(&input.shape[axis])?.into_iter().enumerate() {
            let end = current.clone() + len;
            outputs.push(
                target.wire_node(
                    format!("{prefix}.axis{axis}_slice{ix}_{current}..{end}"),
                    crate::ops::array::Slice::new(axis, current, end.clone()),
                    inputs,
                )?[0],
            );
            current = end;
        }
        Ok(outputs)
    }
}


================================================
FILE: hir/src/ops/array/squeeze.rs
================================================
use crate::infer::*;
use crate::internal::*;

use super::RmDims;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Squeeze {
    axes: Option<Vec<isize>>,
}

impl Squeeze {
    pub fn output_shape<D: DimLike>(&self, input: &[D]) -> TractResult<TVec<D>> {
        if let Some(ref axes) = self.axes {
            let axes = axes
                .iter()
                .map(|&a| if a < 0 { a + input.len() as isize } else { a } as usize)
                .collect::<Vec<_>>();
            let mut shape: TVec<D> = input.iter().cloned().collect();
            for &axis in axes.iter().rev() {
                if shape.remove(axis) != D::one() {
                    bail!(
                        "Attempt to squeeze an axis which dimension is not one {:?}, {:?}",
                        self,
                        input
                    );
                }
            }
            Ok(shape)
        } else {
            Ok(input.iter().filter(|&d| d != &D::one()).cloned().collect())
        }
    }
}

impl Expansion for Squeeze {
    fn name(&self) -> StaticName {
        "Squeeze".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        if let Some(ref axes) = self.axes {
            s.equals(&outputs[0].rank, (&inputs[0].rank).bex() - axes.len() as i64)?;
        }
        s.given(&inputs[0].shape, move |s, shape| {
            let output_shape = self.output_shape(&shape)?;
            s.equals(&outputs[0].shape, output_shape)
        })
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = inputs[0];
        let axes = if let Some(axes) = &self.axes {
            axes.clone()
        } else {
            let input_fact = target.outlet_fact(input)?;
            input_fact
                .shape
                .iter()
                .enumerate()
                .filter(|(_ix, d)| d.is_one())
                .map(|(ix, _d)| ix as isize)
                .collect()
        };
        RmDims::new(axes).wire(prefix, target, inputs)
    }
}


================================================
FILE: hir/src/ops/array/strided_slice.rs
================================================
use crate::internal::*;
use tract_core::ops::array::StridedSlice;
use tract_itertools::Itertools;

impl InferenceRulesOp for StridedSlice {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(
            inputs,
            3 + self.optional_axes_input.is_some() as usize
                + self.optional_steps_input.is_some() as usize,
        )?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[1].rank, 1)?;
        s.equals(&inputs[2].rank, 1)?;
        s.equals(&inputs[1].shape[0], &inputs[2].shape[0])?;
        s.equals(
            &outputs[0].rank,
            inputs[0].rank.bex() - self.shrink_axis_mask.count_ones() as i64,
        )?;
        if let Some(axis) = self.optional_axes_input {
            s.equals(&inputs[1].shape, &inputs[axis].shape)?;
        };
        if let Some(step) = self.optional_steps_input {
            s.equals(&inputs[1].shape, &inputs[step].shape)?;
        };
        if let Some(axes_input) = self.optional_axes_input {
            s.given(&inputs[axes_input].value, move |s, axes| {
                let axes = axes.cast_to::<i64>()?.into_owned();
                s.given(&outputs[0].rank, move |s, orank| {
                    let axes = axes
                        .try_as_plain()?
                        .as_slice::<i64>()?
                        .iter()
                        .map(|a| if *a >= 0 { *a } else { *a + orank } as usize)
                        .collect_vec();
                    let mut iaxis = 0;
                    for oaxis in 0..orank as usize {
                        while self.shrink_axis_mask & (1 << iaxis) != 0 {
                            iaxis += 1;
                        }
                        if !axes.contains(&iaxis) {
                            s.equals(&inputs[0].shape[iaxis], &outputs[0].shape[oaxis])?;
                        }
                        iaxis += 1;
                    }
                    Ok(())
                })
            })?;
        }
        s.given(&inputs[0].shape, move |s, input_shape| {
            s.given_all(inputs[1..].iter().map(|i| &i.value), move |s, params| {
                let begin = &params[0];
                let end = &params[1];
                let strides = if let Some(i) = self.optional_steps_input {
                    let t = params[i - 1].cast_to::<i32>()?;
                    t.try_as_plain()?.as_slice::<i32>()?.to_vec()
                } else {
                    vec![1; input_shape.len()]
                };
                let axes: TVec<usize> = if let Some(i) = self.optional_axes_input {
                    let axes = params[i - 1].cast_to::<i32>()?;
                    axes.try_as_plain()?
                        .as_slice::<i32>()?
                        .iter()
                        .map(|&i| if i < 0 { input_shape.len() as i32 + i } else { i } as usize)
                        .collect()
                } else {
                    (0..input_shape.len()).collect()
                };
                let mut output_shape = input_shape.clone();
                let mut shrink = vec![];
                for (ix, axis) in axes.into_iter().enumerate() {
                    let preped =
                        self.prepare_one_dim(ix, &input_shape[axis], begin, end, &strides)?;
                    output_shape[axis] = preped.soft_len()?;
                    if preped.shrink {
                        shrink.push(axis);
                    }
                }
                for shrink in shrink.iter().sorted().rev() {
                    output_shape.remove(*shrink);
                }
                s.equals(&outputs[0].shape, output_shape)
            })
        })
    }

    to_typed!();
    as_op!();
}

#[cfg(test)]
mod tests {
    #![allow(non_snake_case)]
    use super::*;
    use tract_core::ops::array::strided_slice::Dim;
    use tract_ndarray::{arr1, arr2, arr3};

    pub fn strided_slice(begin_mask: i64, end_mask: i64, shrink_axis_mask: i64) -> StridedSlice {
        StridedSlice {
            begin_mask,
            end_mask,
            shrink_axis_mask,
            optional_axes_input: None,
            optional_steps_input: Some(3),
        }
    }

    fn eval<I, B, E, S>(op: StridedSlice, input: I, begin: B, end: E, strides: S) -> Tensor
    where
        I: Into<Tensor>,
        B: Into<Tensor>,
        E: Into<Tensor>,
        S: Into<Tensor>,
    {
        op.eval(tvec![
            input.into().into(),
            begin.into().into(),
            end.into().into(),
            strides.into().into(),
        ])
        .unwrap()
        .pop()
        .unwrap()
        .into_tensor()
    }

    // https://www.tensorflow.org/api_docs/python/tf/strided_slice
    #[test]
    fn eval_1() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                arr3(&[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]],]),
                tensor1(&[1, 0, 0]),
                tensor1(&[2, 1, 3]),
                tensor1(&[1, 1, 1])
            ),
            Tensor::from(arr3(&[[[3, 3, 3]]])),
        );
    }

    #[test]
    fn eval_2() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                arr3(&[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]],]),
                tensor1(&[1, 0, 0]),
                tensor1(&[2, 2, 3]),
                tensor1(&[1, 1, 1])
            ),
            Tensor::from(arr3(&[[[3, 3, 3], [4, 4, 4]]])),
        );
    }

    #[test]
    fn eval_3_negative_stride() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                arr3(&[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]],]),
                tensor1(&[1, -1, 0]),
                tensor1(&[2, -3, 3]),
                tensor1(&[1, -1, 1])
            ),
            Tensor::from(arr3(&[[[4, 4, 4], [3, 3, 3]]])),
        );
    }

    #[test]
    fn eval_3_bis() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                arr1(&[0, 1]),
                tensor1(&[-1]),
                tensor1(&[-3]),
                tensor1(&[-1])
            ),
            Tensor::from(arr1(&[1, 0]))
        );
    }

    #[test]
    fn eval_4() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                tensor3(&[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]],]),
                tensor1(&[1, 0, 0]),
                tensor1(&[2, 2, 4]),
                tensor1(&[1, 1, 2])
            ),
            tensor3(&[[[3, 3], [4, 4]]]),
        );
    }

    #[test]
    fn eval_5() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                tensor1(&[0, 0]),
                tensor1(&[0]),
                tensor1(&[-1]),
                tensor1(&[1])
            ),
            tensor1(&[0])
        )
    }

    #[test]
    fn eval_6() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                tensor2(&[[1, 0, 0, 0], [3, 0, 0, 0], [0, 0, 0, 0]]),
                tensor1(&[-3, -4]),
                tensor1(&[-1, -1]),
                tensor1(&[1, 2])
            ),
            tensor2(&[[1, 0], [3, 0]])
        )
    }

    #[test]
    fn eval_7() {
        assert_eq!(
            eval(
                strided_slice(0, 0, 0),
                tensor2(&[[0, 6], [0, 0]]),
                tensor1(&[0]),
                tensor1(&[2]),
                tensor1(&[1])
            ),
            tensor2(&[[0, 6], [0, 0]])
        )
    }

    #[test]
    fn eval_begin_mask_1() {
        let mut op = strided_slice(0, 0, 0);
        op.begin_mask = 1;
        assert_eq!(
            eval(op, tensor1(&[0, 1]), tensor1(&[1]), tensor1(&[1]), tensor1(&[1])),
            tensor1(&[0])
        )
    }

    #[test]
    fn eval_shrink_1() {
        let mut op = strided_slice(0, 0, 0);
        op.shrink_axis_mask = 1;
        assert_eq!(
            eval(op, arr2(&[[0]]), tensor1(&[0, 0]), tensor1(&[0, 0]), tensor1(&[1, 1])),
            tensor1::<i32>(&[])
        )
    }

    #[test]
    fn eval_shrink_to_scalar() {
        let mut op = strided_slice(0, 0, 0);
        op.shrink_axis_mask = 1;
        assert_eq!(
            eval(op, tensor1(&[0]), tensor1(&[0]), tensor1(&[0]), tensor1(&[1])),
            tensor0::<i32>(0)
        )
    }

    #[test]
    fn inference_1() {
        let mut op = strided_slice(5, 7, 0);
        let input = InferenceFact::default().with_datum_type(DatumType::F32);
        let begin = InferenceFact::from(tensor1(&[0i32, 2, 0]));
        let end = InferenceFact::from(tensor1(&[0i32, 0, 0]));
        let strides = InferenceFact::from(tensor1(&[1i32, 1, 1]));
        let any = InferenceFact::default();

        let (input_facts, output_facts, _) =
            op.infer_facts(tvec![&input, &begin, &end, &strides], tvec![&any], tvec!()).unwrap();
        assert_eq!(
            input_facts,
            tvec![
                InferenceFact::default()
                    .with_datum_type(DatumType::F32)
                    .with_shape(shapefactoid![..]),
                begin,
                end,
                strides,
            ]
        );
        assert_eq!(
            output_facts,
            tvec![
                InferenceFact::default()
                    .with_datum_type(DatumType::F32)
                    .with_shape(shapefactoid![..]),
            ]
        );
    }

    #[test]
    fn inference_2() {
        let mut op = strided_slice(1, 1, 2);
        let input = InferenceFact::default().with_datum_type(DatumType::F32);
        let begin = InferenceFact::from(tensor1(&[0i32, 0]));
        let end = InferenceFact::from(tensor1(&[0i32, 1]));
        let strides = InferenceFact::from(tensor1(&[1i32, 1]));
        let any = InferenceFact::default();

        let (input_facts, output_facts, _) =
            op.infer_facts(tvec![&input, &begin, &end, &strides], tvec![&any], tvec!()).unwrap();
        assert_eq!(
            input_facts,
            tvec![
                InferenceFact::default()
                    .with_datum_type(DatumType::F32)
                    .with_shape(shapefactoid![..]),
                begin,
                end,
                strides,
            ]
        );
        assert_eq!(
            output_facts,
            tvec![
                InferenceFact::default()
                    .with_datum_type(DatumType::F32)
                    .with_shape(shapefactoid![..]),
            ]
        );
    }

    #[test]
    fn inference_3() {
        let table = SymbolScope::default();
        let s = table.new_with_prefix("S").to_dim();
        let mut op = strided_slice(5, 7, 0);
        let input = f32::fact(dims!(1, s.clone() - 2, 16)).into();
        let begin = InferenceFact::from(tensor1(&[0i32, 2, 0]));
        let end = InferenceFact::from(tensor1(&[0i32, 0, 0]));
        let strides = InferenceFact::from(tensor1(&[1i32, 1, 1]));
        let any = InferenceFact::default();

        let (_, output_facts, _) =
            op.infer_facts(tvec![&input, &begin, &end, &strides], tvec![&any], tvec!()).unwrap();

        assert_eq!(output_facts, tvec![f32::fact(dims!(1, s - 4, 16)).into()]);
    }

    #[test]
    fn prep_1() {
        let op = strided_slice(0, 0, 0);
        assert_eq!(
            op.prepare_one_dim(0, &4.to_dim(), &tensor1(&[-1i64]), &tensor1(&[i64::MIN]), &[-1])
                .unwrap(),
            Dim { begin: 3.to_dim(), end: (-1).to_dim(), stride: -1, shrink: false }
        );
    }

    #[test]
    fn prep_pytorch_onnx_bug_workadound() {
        let op = strided_slice(0, 0, 0);
        assert_eq!(
            op.prepare_one_dim(
                0,
                &4.to_dim(),
                &tensor1(&[-1i64]),
                &tensor1(&[i64::MIN + 1]),
                &[-1]
            )
            .unwrap(),
            Dim { begin: 3.to_dim(), end: (-1).to_dim(), stride: -1, shrink: false }
        );
    }
}


================================================
FILE: hir/src/ops/array/tile.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Tile;

impl Expansion for Tile {
    fn name(&self) -> StaticName {
        "Tile".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[1].rank, 1)?;
        s.equals(&inputs[1].shape[0], inputs[0].rank.bex().to_dim())?;
        s.given(&inputs[1].value, move |s, mult| {
            for (ix, m) in
                mult.cast_to::<TDim>()?.try_as_plain()?.as_slice::<TDim>()?.iter().enumerate()
            {
                if let Some(m) = m.as_i64() {
                    s.equals(m * inputs[0].shape[ix].bex(), &outputs[0].shape[ix])?;
                } else {
                    let m = m.clone();
                    s.given(&inputs[0].shape[ix], move |s, input| {
                        s.equals(input * &m, &outputs[0].shape[ix])
                    })?;
                }
            }
            Ok(())
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(ref mult) = target.outlet_fact(inputs[1])?.konst {
            let mult: TVec<TDim> =
                mult.cast_to::<TDim>()?.try_as_plain()?.as_slice::<TDim>()?.into();
            target.wire_node(prefix, tract_core::ops::array::Tile::new(mult), &inputs[0..1])
        } else {
            bail!("shape input is variable")
        }
    }
}


================================================
FILE: hir/src/ops/binary.rs
================================================
use crate::infer::*;
use crate::internal::*;

use tract_core::broadcast::multi_broadcast;
use tract_core::ops as mir;
use tract_core::ops::binary::BinMiniOp;
pub use tract_core::ops::cast::wire_cast;
pub use tract_core::ops::change_axes::wire_rank_broadcast;

#[derive(Debug, Clone)]
pub struct InferenceBinOp(pub Box<dyn BinMiniOp>);

impl Expansion for InferenceBinOp {
    fn name(&self) -> StaticName {
        self.0.name().into()
    }

    fn validation(&self) -> Validation {
        self.0.validation()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(s, inputs, outputs, move |typa, typb| self.0.result_datum_type(typa, typb))
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let operating_datum_type = self.0.operating_datum_type(
            target.outlet_fact(inputs[0])?.datum_type,
            target.outlet_fact(inputs[1])?.datum_type,
        )?;
        let wires = wire_rank_broadcast(prefix, target, inputs)?;
        let wires = wire_cast(prefix, target, &wires, operating_datum_type)?;
        target.wire_node(prefix, mir::binary::TypedBinOp(self.0.clone(), None), &wires)
    }
}

pub fn rules<'r, 'p: 'r, 's: 'r, DT: Fn(DatumType, DatumType) -> TractResult<DatumType> + 'p>(
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
    dt: DT,
) -> InferenceResult {
    check_input_arity(inputs, 2)?;
    check_output_arity(outputs, 1)?;

    /*
    s.with(&inputs[0].shape, move |s, a_shape| {
        s.with(&inputs[1].shape, move |s, b_shape| {
            /*
            if let Some(c_shape) =
                crate::infer::helpers::infer_shape_broadcasting(&[&a_shape, &b_shape])
                    .with_context(|| {
                        format!(
                            "Matching {a_shape:?} and {b_shape:?} with numpy/onnx broadcast rules"
                        )
                    })?
            {
                s.equals(&outputs[0].shape, c_shape)?;
            }
            Ok(())
        })
        */
    })?;
    */
    s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, a, b| {
        s.equals(&outputs[0].shape, multi_broadcast(&[a, b])?)
    })?;
    s.given_2(&inputs[0].datum_type, &inputs[1].datum_type, move |s, typa, typb| {
        s.equals(&outputs[0].datum_type, dt(typa, typb)?)
    })?;
    Ok(())
}

pub trait BinIntoHir {
    fn into_hir(self) -> Box<dyn InferenceOp>;
}

impl<B: BinMiniOp> BinIntoHir for B {
    fn into_hir(self) -> Box<dyn InferenceOp> {
        expand(InferenceBinOp(Box::new(self) as _))
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Nary(pub Box<dyn mir::binary::BinMiniOp>, pub bool);

impl Nary {
    fn normalize_t<T>(t: &mut Tensor, n: usize) -> TractResult<()>
    where
        T: Datum + std::ops::DivAssign<T> + Copy,
        usize: tract_num_traits::AsPrimitive<T>,
    {
        use tract_num_traits::AsPrimitive;
        let mut t_plain = t.try_as_plain_mut()?;
        let mut t = t_plain.to_array_view_mut::<T>()?;
        let n: T = n.as_();
        t /= &tract_ndarray::arr0(n);
        Ok(())
    }
}

impl Op for Nary {
    fn name(&self) -> StaticName {
        format!("{}Nary", self.0.name()).into()
    }

    fn validation(&self) -> Validation {
        self.0.validation()
    }

    not_a_typed_op!();
}

impl EvalOp for Nary {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut t = inputs[0].clone().into_tensor();
        for i in inputs[1..].iter() {
            let mut i = i.clone().into_tensor();
            let operating_datum_type =
                self.0.operating_datum_type(t.datum_type(), i.datum_type())?;
            if i.datum_type() != operating_datum_type {
                i = i.cast_to_dt(operating_datum_type)?.into_owned();
            }
            if t.datum_type() != operating_datum_type {
                t = t.cast_to_dt(operating_datum_type)?.into_owned();
            }
            t = self.0.eval(t.into_tvalue(), i.into_tvalue(), operating_datum_type)?;
        }
        if self.1 {
            dispatch_numbers!(Self::normalize_t(t.datum_type())(&mut t, inputs.len()))?;
        }
        Ok(tvec!(t.into_tvalue()))
    }
}

impl InferenceRulesOp for Nary {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        let n = inputs.len();
        s.given_all(
            (0..n).map(|i| (&inputs[i].datum_type).bex()),
            move |s, types: Vec<DatumType>| {
                let dt = DatumType::super_type_for(&types)
                    .with_context(|| format!("No super type for {types:?}"))?;
                let dt = self.0.operating_datum_type(dt, dt)?;
                let result = self.0.result_datum_type(dt, dt)?;
                s.equals(&outputs[0].datum_type, result)
            },
        )?;
        s.given_all(inputs.iter().map(|i| &i.shape), move |s, shapes: Vec<TVec<TDim>>| {
            let out = tract_core::broadcast::multi_broadcast(&shapes)?;
            s.equals(&outputs[0].shape, ShapeFactoid::from(out))
        })
    }

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let inputs = node.inputs.iter().map(|i| mapping[i]).collect::<Vec<_>>();
        let types = inputs
            .iter()
            .map(|i| Ok(target.outlet_fact(*i)?.datum_type))
            .collect::<TractResult<Vec<_>>>()?;
        let dt = DatumType::super_type_for(&types)
            .with_context(|| format!("No super type for {types:?}"))?;
        let operating = self.0.operating_datum_type(dt, dt)?;
        let inputs = wire_cast(&node.name, target, &inputs, operating)?;
        let mut wire = inputs[0];
        for (ix, i) in inputs[1..].iter().enumerate() {
            let wires = wire_rank_broadcast(format!("{}.{}", node.name, ix), target, &[wire, *i])?;
            wire = target.wire_node(
                format!("{}.{}", node.name, ix),
                mir::binary::TypedBinOp(self.0.clone(), None),
                &wires,
            )?[0];
        }
        if self.1 {
            let n = tensor0(inputs.len() as i32)
                .cast_to_dt(node.outputs[0].fact.datum_type.concretize().unwrap())?
                .into_owned()
                .broadcast_into_rank(target.outlet_fact(inputs[0])?.rank())?;
            let n = target.add_const(format!("{}.n", node.name), n.into_arc_tensor())?;
            wire = target.wire_node(
                format!("{}.norm", node.name),
                crate::ops::math::div(),
                &[wire, n],
            )?[0];
        }
        Ok(tvec!(wire))
    }

    as_op!();
}


================================================
FILE: hir/src/ops/cast.rs
================================================
use crate::infer::*;
use tract_core::internal::*;

use tract_core::ops::cast::Cast;
pub use tract_core::ops::cast::cast;

impl InferenceRulesOp for Cast {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals(&outputs[0].datum_type, self.to)?;
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/cnn/conv.rs
================================================
use crate::infer::*;
use crate::internal::*;
use crate::ops::cast::cast;

use tract_core::ops::cnn::conv::KernelFormat;
use tract_core::ops::cnn::{PaddingSpec, PoolSpec};
use tract_core::ops::nn::DataFormat;

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct Conv {
    pub data_format: DataFormat,
    pub kernel_fmt: KernelFormat,
    pub dilations: Option<TVec<usize>>,
    pub kernel_shape: Option<TVec<usize>>,
    pub padding: PaddingSpec,
    pub strides: Option<TVec<usize>>,
    pub group: Option<usize>,

    pub x_scale_input: Option<usize>,
    pub x_zero_point_input: Option<usize>,
    pub k_input: Option<usize>,
    pub k_scale_input: Option<usize>,
    pub k_zero_point_input: Option<usize>,

    pub y_scale_input: Option<usize>,
    pub y_zero_point_input: Option<usize>,

    pub bias_input: Option<usize>,

    pub override_output_datum_type: Option<DatumType>,
}

impl Conv {
    pub fn hwc(self) -> Conv {
        Conv { data_format: DataFormat::HWC, ..self }
    }

    pub fn nhwc(self) -> Conv {
        Conv { data_format: DataFormat::NHWC, ..self }
    }

    pub fn hwio(self) -> Conv {
        Conv { kernel_fmt: KernelFormat::HWIO, ..self }
    }

    pub fn padding(self, padding: PaddingSpec) -> Conv {
        Conv { padding, ..self }
    }

    pub fn dilations(self, dilations: TVec<usize>) -> Conv {
        Conv { dilations: Some(dilations), ..self }
    }

    pub fn group(self, group: usize) -> Conv {
        Conv { group: Some(group), ..self }
    }

    pub fn strides(self, strides: TVec<usize>) -> Conv {
        Conv { strides: Some(strides), ..self }
    }

    pub fn kernel_shape(self, kernel_shape: TVec<usize>) -> Conv {
        Conv { kernel_shape: Some(kernel_shape), ..self }
    }

    pub fn bias_input(self, input: usize) -> Conv {
        Conv { bias_input: Some(input), ..self }
    }

    pub fn x_zero_point_input(self, input: usize) -> Conv {
        Conv { x_zero_point_input: Some(input), ..self }
    }

    pub fn k_zero_point_input(self, input: usize) -> Conv {
        Conv { k_zero_point_input: Some(input), ..self }
    }

    pub fn output_shape<D: DimLike>(&self, ishape: &[D], kshape: &[usize]) -> TractResult<TVec<D>> {
        debug_assert_eq!(
            ishape.len()
                + (self.data_format == DataFormat::HWC || self.data_format == DataFormat::CHW)
                    as usize,
            kshape.len(),
            "Input and kernel ranks are inconsistent"
        );
        let mut result: TVec<D> = ishape.into();
        let ishape = self.data_format.shape(ishape)?;
        let spatial_rank = ishape.hw_rank();
        let ones = tvec![1; spatial_rank];
        let kernel_spatial_shape = self.kernel_fmt.hw(kshape);
        let computed = self.padding.compute(
            ishape.hw_dims(),
            kernel_spatial_shape,
            self.dilations.as_ref().unwrap_or(&ones),
            self.strides.as_ref().unwrap_or(&ones),
        );
        let channels_out = *self.kernel_fmt.o(kshape);
        result[ishape.c_axis()] = channels_out.into();
        for (ix, d) in computed.iter().enumerate() {
            result[ishape.h_axis() + ix] = d.convoluted.clone();
        }
        Ok(result)
    }
}

impl Expansion for Conv {
    fn name(&self) -> StaticName {
        "ConvHir".into()
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        if inputs.len() < 2 {
            bail!("Wrong number of inputs. Expected 2 or more, got {}", inputs.len());
        }
        let has_n = self.data_format == DataFormat::NHWC || self.data_format == DataFormat::NCHW;
        let k_input = &inputs[self.k_input.unwrap_or(1)];
        if let Some(kshape) = &self.kernel_shape {
            s.equals(&k_input.rank, kshape.len() as i64 + 2)?;
            for (ix, dim) in kshape.iter().enumerate() {
                s.equals(&k_input.shape[ix + self.kernel_fmt.h_axis()], TDim::from(*dim as i64))?;
            }
        }
        s.equals(&inputs[0].rank, k_input.rank.bex() + (has_n as usize as i64 - 1))?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &k_input.datum_type)?;
        if let Some(dt) = self.override_output_datum_type {
            s.equals(&outputs[0].datum_type, dt)?;
        } else {
            s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        }
        if let Some(bias) = self.bias_input {
            // bias datum type is ill-defined. no check
            s.equals(&inputs[bias].rank, 1)?;
            s.given(&k_input.rank, move |s, krank| {
                let filter_o = match self.kernel_fmt {
                    KernelFormat::OIHW => &k_input.shape[0],
                    KernelFormat::HWIO => &k_input.shape[krank as usize - 1],
                    KernelFormat::OHWI => &k_input.shape[0],
                };
                s.equals(&inputs[bias].shape[0], filter_o)
            })?
        }
        s.given_2(&inputs[0].rank, &k_input.rank, move |s, irank, krank| {
            let input_c =
                if self.data_format == DataFormat::NHWC || self.data_format == DataFormat::HWC {
                    &inputs[0].shape[irank as usize - 1]
                } else {
                    &inputs[0].shape[1]
                };
            let filter_i = match self.kernel_fmt {
                KernelFormat::OIHW => &k_input.shape[1],
                KernelFormat::HWIO => &k_input.shape[krank as usize - 2],
                KernelFormat::OHWI => &k_input.shape[krank as usize - 1],
            };
            s.equals(input_c.bex(), self.group.unwrap_or(1) as i64 * filter_i.bex())
        })?;
        s.given_2(&inputs[0].shape, &k_input.shape, move |s, ishape, kshape| {
            if let Some(kshape) =
                kshape.iter().map(|d| d.to_usize().ok()).collect::<Option<TVec<_>>>()
            {
                let oshape = self.output_shape(&ishape, &kshape)?;
                s.equals(&outputs[0].shape, oshape)?;
            }
            Ok(())
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let kernel_input = self.k_input.unwrap_or(1);
        let kernel_fact = model.outlet_fact(inputs[kernel_input])?.clone();
        let input = model.outlet_fact(inputs[0])?.clone();
        let input_shape = self.data_format.shape(&input.shape)?;
        let kernel_full_shape =
            kernel_fact.shape.as_concrete().context("Expect concrete shape for kernel")?;
        let group = self.group.unwrap_or(1);
        let input_channels = self.kernel_fmt.input_channels(kernel_full_shape, group).into_owned();
        let output_channels =
            self.kernel_fmt.output_channels(kernel_full_shape, group).into_owned();
        if input_shape.c_dim() != &input_channels.to_dim() {
            bail!("Input has {} channels, kernel expects {}", input_shape.c_dim(), input_channels)
        }
        let bias_dt =
            if input.datum_type.is_float() { input.datum_type } else { i32::datum_type() };
        let mut bias = if let Some(slot) = self.bias_input {
            model.wire_node(format!("{prefix}.bias"), cast(bias_dt), &[inputs[slot]])?[0]
        } else {
            model.add_const(format!("{prefix}.bias"), Tensor::zero_scalar_dt(bias_dt)?)?
        };
        while let Some(axis) = model
            .outlet_fact(bias)?
            .shape
            .to_tvec()
            .iter()
            .enumerate()
            .rev()
            .position(|(_, dim)| dim.is_one())
        {
            bias =
                model.wire_node(format!("{prefix}.bias_rm_{axis}"), AxisOp::Rm(axis), &[bias])?[0];
        }
        let mut wires = vec![inputs[0], inputs[kernel_input], bias];
        let pool_spec = PoolSpec {
            data_format: self.data_format,
            padding: self.padding.clone(),
            strides: self.strides.clone(),
            dilations: self.dilations.clone(),
            kernel_shape: self.kernel_fmt.hw(kernel_full_shape).into(),
            input_channels,
            output_channels,
        };

        let quantized = self.k_zero_point_input.is_some()
            || self.k_scale_input.is_some()
            || self.x_zero_point_input.is_some()
            || self.x_scale_input.is_some()
            || self.y_zero_point_input.is_some()
            || self.y_scale_input.is_some();
        let output_type = self.override_output_datum_type.unwrap_or(input.datum_type);
        if quantized {
            let zero = model.add_const(format!("{prefix}.zero"), tensor0(0i32))?;
            let one = model.add_const(format!("{prefix}.one"), tensor0(1f32))?;

            macro_rules! qp {
                ($id: ident, $def: expr, $ty: ty) => {
                    let wire = self.$id.map(|i| inputs[i]).unwrap_or($def);
                    let wire = model.wire_node(
                        format!("{prefix}.cast_{}", stringify!($id)),
                        cast(<$ty>::datum_type()),
                        &[wire],
                    )?[0];
                    wires.push(wire);
                };
            }

            qp!(x_zero_point_input, zero, i32);
            qp!(x_scale_input, one, f32);
            qp!(k_zero_point_input, zero, i32);
            qp!(k_scale_input, one, f32);
            qp!(y_zero_point_input, zero, i32);
            qp!(y_scale_input, one, f32);
        };

        let reduced = tract_core::ops::cnn::Conv::new(
            pool_spec,
            self.kernel_fmt,
            group,
            Some(output_type).filter(|_| quantized),
        );
        model.wire_node(prefix, reduced, &wires)
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::setup_test_logger;

    #[test]
    fn test_infer_with_known_kshape() {
        let mut op = expand(Conv::default().strides(tvec![2, 2]).kernel_shape(tvec![3, 3]));
        let ifact = f32::fact([1, 1, 7, 5]).into();
        let kfact = f32::fact([1, 1, 3, 3]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 1, 3, 2]).into()));
    }

    #[test]
    fn test_infer_channels() {
        let mut op = expand(Conv::default()); // NCHW - OIHW
        let ifact = f32::fact([1, 2, 1, 1]).into();
        let kfact = f32::fact([3, 2, 1, 1]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 3, 1, 1]).into()));
    }

    #[test]
    fn test_infer_onnx_strides_no_padding() {
        let mut op = expand(Conv::default().strides(tvec![2, 2]));
        let ifact = f32::fact([1, 1, 7, 5]).into();
        let kfact = f32::fact([1, 1, 3, 3]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 1, 3, 2]).into()));
    }

    #[test]
    fn test_infer_nhwc_1() {
        let mut op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let ifact = f32::fact([1, 2, 2, 2]).into();
        let kfact = f32::fact([2, 2, 2, 1]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 2, 2, 1]).into()));
    }

    #[test]
    fn test_eval_nhwc_1() -> TractResult<()> {
        setup_test_logger();
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let res = op.eval(tvec!(
            Tensor::zero::<f32>(&[1, 2, 2, 2]).unwrap().into_tvalue(),
            Tensor::zero::<f32>(&[2, 2, 2, 1]).unwrap().into_tvalue(),
        ))?;
        Tensor::zero::<f32>(&[1, 2, 2, 1]).unwrap().close_enough(&res[0], false)
    }

    #[test]
    fn test_infer_nhwc_2() {
        setup_test_logger();
        let mut op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let ifact = f32::fact([1, 1, 2, 2]).into();
        let kfact = f32::fact([2, 1, 2, 1]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 1, 2, 1]).into()));
    }

    #[test]
    fn test_eval_nhwc_2() {
        setup_test_logger();
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let i = tensor4(&[[[[0.0f32, 0.0], [1.0, 0.0]]]]);
        let k = tensor4(&[[[[0.0f32], [0.0]], [[1.0], [0.0]]]]);
        let e = tensor4(&[[[[1.0f32], [0.0]]]]);
        let res = op.eval(tvec!(i.into(), k.into())).unwrap();
        res[0].close_enough(&e, Approximation::Approximate).unwrap();
    }

    #[test]
    fn test_eval_nhwc_3() {
        setup_test_logger();
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let i = tensor4(&[[[[0.0f32, 1.0], [2.0, 3.0]], [[10.0, 11.0], [12.0, 13.0]]]]);
        let k = tensor4(&[[[[1.0f32, 0.0], [0.0, 1.0]]]]);
        let res = op.eval(tvec!(i.clone().into(), k.into())).unwrap();
        res[0].close_enough(&i, Approximation::Approximate).unwrap()
    }

    #[test]
    fn test_eval_nhwc_batch() {
        setup_test_logger();
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let result = op
            .eval(tvec!(
                tensor4(&[[[[2.0f32]]], [[[0.0f32]]]]).into(),
                tensor4(&[[[[1.0f32]]]]).into()
            ))
            .unwrap();
        result[0]
            .close_enough(&tensor4(&[[[[2.0f32]]], [[[0.0f32]]]]), Approximation::Approximate)
            .unwrap();
    }

    #[test]
    fn test_infer_ntc_simple() {
        let mut op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let ifact = f32::fact([1, 2, 1]).into();
        let kfact = f32::fact([1, 1, 1]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 2, 1]).into()));
    }

    #[test]
    fn test_eval_ntc_simple() {
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let result = op
            .eval(tvec!(tensor3(&[[[2.0f32], [0.0f32]]]).into(), tensor3(&[[[1.0f32]]]).into()))
            .unwrap();
        result[0]
            .close_enough(&tensor3(&[[[2.0f32], [0.0f32]]]), Approximation::Approximate)
            .unwrap();
    }

    #[test]
    fn test_infer_ntc_batch() {
        let mut op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let ifact = f32::fact([2, 1, 1]).into();
        let kfact = f32::fact([1, 1, 1]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([2, 1, 1]).into()));
    }

    #[test]
    fn test_eval_ntc_batch() {
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let result = op
            .eval(tvec!(tensor3(&[[[2.0f32]], [[0.0f32]]]).into(), tensor3(&[[[1.0f32]]]).into()))
            .unwrap();
        result[0]
            .close_enough(&tensor3(&[[[2.0f32]], [[0.0f32]]]), Approximation::Approximate)
            .unwrap();
    }

    #[test]
    fn test_infer_ntc_channel() {
        let mut op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let ifact = f32::fact([1, 1, 2]).into();
        let kfact = f32::fact([1, 2, 1]).into();
        let ofact = InferenceFact::default();
        let facts = op.infer_facts(tvec!(&ifact, &kfact), tvec!(&ofact), tvec!()).unwrap();
        assert_eq!(facts.1, tvec!(f32::fact([1, 1, 1]).into()));
    }

    #[test]
    fn test_eval_ntc_channel() {
        let op = expand(Conv::default().nhwc().hwio().padding(PaddingSpec::SameUpper));
        let result = op
            .eval(tvec!(
                tensor3(&[[[2.0f32, 0.0f32]]]).into(),
                tensor3(&[[[1.0f32], [0.0f32]]]).into()
            ))
            .unwrap();
        result[0].close_enough(&tensor3(&[[[2.0f32]]]), Approximation::Approximate).unwrap();
    }
}


================================================
FILE: hir/src/ops/cnn/mod.rs
================================================
mod conv;
mod pools;

pub use conv::Conv;
pub use pools::{HirMaxPool, HirSumPool};
pub use tract_core::ops::cnn::{PaddingSpec, PoolSpec};


================================================
FILE: hir/src/ops/cnn/pools.rs
================================================
use crate::infer::*;
use crate::internal::*;

use tract_core::ops::cnn::MaxPool;
use tract_core::ops::cnn::PoolSpec;
use tract_core::ops::cnn::SumPool;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct HirSumPool {
    pub pool_spec: PoolSpec,
    pub count_include_pad: bool,
    pub normalize: bool,
}

impl Expansion for HirSumPool {
    fn name(&self) -> StaticName {
        "SumPool".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        rules_for_shape(&self.pool_spec, s, inputs, outputs)
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let c = self
            .pool_spec
            .data_format
            .shape(&model.outlet_fact(inputs[0])?.shape)?
            .c()
            .to_usize()
            .context("Expect constant integer depth")?;
        let pool_spec =
            PoolSpec { input_channels: c, output_channels: c, ..self.pool_spec.clone() };
        model.wire_node(
            prefix,
            SumPool {
                pool_spec,
                count_include_pad: self.count_include_pad,
                normalize: self.normalize,
            },
            inputs,
        )
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct HirMaxPool {
    pub pool_spec: PoolSpec,
    pub with_index_outputs: Option<DatumType>,
}

impl Expansion for HirMaxPool {
    fn name(&self) -> StaticName {
        "MaxPool".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1 + self.with_index_outputs.is_some() as usize)?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        if let Some(idt) = self.with_index_outputs {
            s.equals(&outputs[1].datum_type, idt)?;
            s.equals(&outputs[1].shape, &outputs[0].shape)?;
        }
        rules_for_shape(&self.pool_spec, s, inputs, outputs)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1 + self.with_index_outputs.is_some() as usize)
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let c = self
            .pool_spec
            .data_format
            .shape(&model.outlet_fact(inputs[0])?.shape)?
            .c()
            .to_usize()
            .context("Expect constant integer depth")?;
        let pool_spec =
            PoolSpec { input_channels: c, output_channels: c, ..self.pool_spec.clone() };
        model.wire_node(
            prefix,
            MaxPool { pool_spec, with_index_outputs: self.with_index_outputs },
            inputs,
        )
    }
}

pub fn rules_for_shape<'r, 'p: 'r, 's: 'r>(
    pool_spec: &'s PoolSpec,
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    s.equals(&outputs[0].rank, &inputs[0].rank)?;
    s.given(&inputs[0].shape, move |s, ishape| {
        let ishape = pool_spec.data_format.shape(ishape)?;
        let ones = tvec![1; ishape.hw_rank()];
        let computed = pool_spec.padding.compute(
            ishape.hw_dims(),
            &pool_spec.kernel_shape,
            pool_spec.dilations.as_ref().unwrap_or(&ones),
            pool_spec.strides.as_ref().unwrap_or(&ones),
        );
        for o in outputs {
            for (ix, d) in computed.iter().enumerate() {
                s.equals(&o.shape[ix + ishape.h_axis()], &d.convoluted)?;
            }
            if ishape.n_axis().is_some() {
                s.equals(&o.shape[ishape.n_axis().unwrap()], ishape.n_dim().unwrap())?;
            }
            // hack for max and sum pool, convolutions know this and deal with it on their side
            if pool_spec.input_channels == 0 && pool_spec.output_channels == 0 {
                s.equals(&o.shape[ishape.c_axis()], ishape.c_dim())?;
            }
        }
        Ok(())
    })
}


================================================
FILE: hir/src/ops/downsample.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::downsample::Downsample;

impl InferenceRulesOp for Downsample {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.given(&inputs[0].rank, move |s, r| {
            for i in 0..(r as usize) {
                if i == self.axis {
                    s.given(&inputs[0].shape[i], move |s, d| {
                        s.equals(
                            &outputs[0].shape[i],
                            (d - self.modulo).div_ceil(self.stride as u64),
                        )
                    })?
                } else {
                    s.equals(&inputs[0].shape[i], &outputs[0].shape[i])?
                }
            }
            Ok(())
        })
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/dummy.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::dummy::Dummy;

impl InferenceRulesOp for Dummy {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        _s: &mut Solver<'r>,
        _inputs: &'p [TensorProxy],
        _outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/element_wise.rs
================================================
use tract_core::ops::cast::wire_cast;

use crate::infer::*;
use crate::internal::*;

#[derive(Debug, Clone)]
pub struct ElementWiseOp(pub Box<dyn ElementWiseMiniOp>);

impl Expansion for ElementWiseOp {
    fn name(&self) -> StaticName {
        self.0.name().into()
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let operating_datum_type =
            self.0.operating_datum_type(target.outlet_fact(inputs[0])?.datum_type);
        let wires = wire_cast(prefix, target, inputs, operating_datum_type)?;
        target.wire_node(
            prefix,
            tract_core::ops::element_wise::ElementWiseOp(self.0.clone(), None),
            &wires,
        )
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.given(&inputs[0].datum_type, move |s, dt| {
            let dt = self.0.operating_datum_type(dt);
            if let Some(dt) = self.0.output_type(dt) {
                s.equals(&outputs[0].datum_type, dt)
            } else {
                s.equals(&outputs[0].datum_type, dt)
            }
        })?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }
}

pub trait ElementWiseIntoHir {
    fn into_hir(self) -> Box<dyn InferenceOp>;
}

impl ElementWiseIntoHir for tract_core::ops::element_wise::ElementWiseOp {
    fn into_hir(self) -> Box<dyn InferenceOp> {
        expand(ElementWiseOp(self.0))
    }
}


================================================
FILE: hir/src/ops/expandable.rs
================================================
use std::any::Any;

use crate::internal::*;
use tract_core::internal::*;

pub fn expand<E: Expansion>(e: E) -> Box<dyn InferenceOp> {
    Box::new(Box::new(e) as Box<dyn Expansion>)
}

pub trait Expansion:
    tract_core::dyn_clone::DynClone
    + std::fmt::Debug
    + Send
    + Sync
    + tract_core::downcast_rs::Downcast
    + Any
{
    fn name(&self) -> StaticName;
    fn validation(&self) -> Validation {
        Validation::Accurate
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![])
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1)
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>>;

    #[allow(unused_variables)]
    fn wire_with_inference_model_and_node(
        &self,
        prefix: &str,
        model: &InferenceModel,
        node: &InferenceNode,
        typed_model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        self.wire(prefix, typed_model, inputs)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult;

    fn is_stateless(&self) -> bool {
        true
    }
}

tract_core::dyn_clone::clone_trait_object!(Expansion);

impl PartialEq for Box<dyn Expansion> {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
}
impl Eq for Box<dyn Expansion> {}

impl Op for Box<dyn Expansion> {
    fn name(&self) -> StaticName {
        self.as_ref().name()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.as_ref().info()
    }

    fn validation(&self) -> Validation {
        self.as_ref().validation()
    }

    not_a_typed_op!();
}

impl EvalOp for Box<dyn Expansion> {
    fn is_stateless(&self) -> bool {
        self.as_ref().is_stateless()
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut adhoc = TypedModel::default();
        let wires = inputs
            .iter()
            .enumerate()
            .map(|(ix, i)| {
                adhoc.add_source(
                    format!("adhoc-source-{ix}"),
                    TypedFact::try_from(i.clone().into_arc_tensor())?,
                )
            })
            .collect::<TractResult<TVec<OutletId>>>()?;

        let wires = self.wire("adhoc", &mut adhoc, &wires)?;
        adhoc.select_output_outlets(&wires)?;
        SimplePlan::new(adhoc)?.run(inputs)
    }
}

impl InferenceRulesOp for Box<dyn Expansion> {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        self.as_ref().rules(s, inputs, outputs)
    }

    fn to_typed(
        &self,
        source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let inputs = node.inputs.iter().map(|i| mapping[i]).collect::<Vec<_>>();
        let outputs =
            self.wire_with_inference_model_and_node(&node.name, source, node, target, &inputs)?;
        for (ix, o) in outputs.iter().enumerate() {
            let expected = &node.outputs[ix].fact;
            let got = target.outlet_fact(*o)?;
            if expected.clone().unify_with(&InferenceFact::from(got)).is_err() {
                bail!(
                    "Output mismatch after rewiring expansion for output #{}: expected {:?} got {:?}",
                    ix,
                    expected,
                    got
                );
            }
        }
        Ok(outputs)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        self.as_ref().nboutputs()
    }

    as_op!();
}

pub fn inference_wrap<O, R>(op: O, outputs: usize, rules: R) -> Box<dyn InferenceOp>
where
    O: TypedOp,
    R: for<'r, 'p, 's> Fn(
            &'s dyn Op,
            &mut Solver<'r>,
            &'p [TensorProxy],
            &'p [TensorProxy],
        ) -> InferenceResult
        + Send
        + Sync
        + 'static,
{
    expand(InferenceWrapper { typed_op: Box::new(op), rules: Arc::new(rules), outputs })
}

type RuleProducer = dyn for<'r, 'p, 's> Fn(
        &'s dyn Op,
        &mut Solver<'r>,
        &'p [TensorProxy],
        &'p [TensorProxy],
    ) -> InferenceResult
    + Send
    + Sync
    + 'static;

#[derive(Clone, new)]
pub struct InferenceWrapper {
    typed_op: Box<dyn TypedOp>,
    rules: Arc<RuleProducer>,
    outputs: usize,
}

impl std::fmt::Debug for InferenceWrapper {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        std::fmt::Debug::fmt(&self.typed_op, f)
    }
}

impl Expansion for InferenceWrapper {
    fn name(&self) -> StaticName {
        self.typed_op.name()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        model.wire_node(prefix, &self.typed_op, inputs)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        (self.rules)(self.typed_op.as_op(), s, inputs, outputs)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(self.outputs)
    }
}


================================================
FILE: hir/src/ops/identity.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::identity::Identity;

impl InferenceRulesOp for Identity {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/konst.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::konst::*;

impl InferenceRulesOp for Const {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 0)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].value, self.val().clone().bex())?;
        Ok(())
    }

    as_op!();
    to_typed!();
}


================================================
FILE: hir/src/ops/logic.rs
================================================
use crate::infer::*;
use crate::internal::*;

use tract_core::broadcast::multi_broadcast;
use tract_core::ops::cast::wire_cast;
pub use tract_core::ops::change_axes::wire_with_rank_broadcast;
pub use tract_core::ops::logic::*;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Iff;

impl Expansion for Iff {
    fn name(&self) -> StaticName {
        "Iff".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, DatumType::Bool)?;
        s.given_2(&inputs[1].datum_type, &inputs[2].datum_type, move |s, a, b| {
            let dt = a
                .common_super_type(b)
                .with_context(|| format!("No super type for {a:?} and {b:?}"))?;
            s.equals(&outputs[0].datum_type, dt)
        })?;
        s.given_3(&inputs[0].shape, &inputs[1].shape, &inputs[2].shape, move |s, c, t, f| {
            let shape = multi_broadcast(&[&c, &t, &f])?;
            s.equals(&outputs[0].shape, shape)
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let dta = model.outlet_fact(inputs[1])?.datum_type;
        let dtb = model.outlet_fact(inputs[2])?.datum_type;
        let dt = dta
            .common_super_type(dtb)
            .with_context(|| format!("No super type for {dta:?} and {dtb:?}"))?;
        let mut casted = wire_cast(prefix, model, &inputs[1..], dt)?;
        casted.insert(0, inputs[0]);
        wire_with_rank_broadcast(prefix, model, tract_core::ops::logic::Iff, &casted)
    }
}


================================================
FILE: hir/src/ops/matmul.rs
================================================
use crate::infer::*;
use crate::internal::*;

use tract_core::ops::einsum::EinSum;
use tract_core::tract_data::itertools::Itertools;

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct MatMulInference {
    pub a_trans: bool,
    pub b_trans: bool,
    pub c_trans: bool,
}

impl MatMulInference {
    pub fn with_a_trans(self, a_trans: bool) -> MatMulInference {
        MatMulInference { a_trans, ..self }
    }

    pub fn with_b_trans(self, b_trans: bool) -> MatMulInference {
        MatMulInference { b_trans, ..self }
    }

    pub fn with_c_trans(self, c_trans: bool) -> MatMulInference {
        MatMulInference { c_trans, ..self }
    }
}

impl Expansion for MatMulInference {
    fn name(&self) -> StaticName {
        "MatMulInference".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, ashape, bshape| {
            let (_, _, _, cshape) =
                compute_shapes(ashape, bshape, self.a_trans, self.b_trans, self.c_trans)?;
            s.equals(&outputs[0].shape, cshape)
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let a_rank = target.outlet_fact(inputs[0])?.rank();
        let b_rank = target.outlet_fact(inputs[1])?.rank();
        ensure!(a_rank > 1 || b_rank > 1);
        let mk = if self.a_trans { "km" } else { "mk" };
        let kn = if self.b_trans { "nk" } else { "kn" };
        let mn = if self.c_trans { "nm" } else { "mn" };
        let axes: AxesMapping = if a_rank == 1 {
            let prefix: String = ('a'..).take(b_rank - 2).collect();
            format!("k,{prefix}{kn}->{prefix}n").parse()?
        } else if b_rank == 1 {
            let prefix: String = ('a'..).take(a_rank - 2).collect();
            format!("{prefix}{mk},k->{prefix}m").parse()?
        } else {
            let c_rank = b_rank.max(a_rank);
            let a_prefix: String =
                ('a'..).take(c_rank - 2).skip(b_rank.saturating_sub(a_rank)).collect();
            let b_prefix: String =
                ('a'..).take(c_rank - 2).skip(a_rank.saturating_sub(b_rank)).collect();
            let c_prefix: String = ('a'..).take(c_rank - 2).collect();
            format!("{a_prefix}{mk},{b_prefix}{kn}->{c_prefix}{mn}").parse()?
        };
        let dt = target.outlet_fact(inputs[0])?.datum_type;
        target.wire_node(prefix, EinSum { axes, operating_dt: dt, q_params: None }, inputs)
    }
}

#[allow(clippy::type_complexity)]
pub fn compute_shapes<D: DimLike>(
    mut ashape: TVec<D>,
    mut bshape: TVec<D>,
    a_trans: bool,
    b_trans: bool,
    c_trans: bool,
) -> TractResult<(TVec<D>, TVec<D>, TVec<D>, TVec<D>)> {
    let mut implicit_m = false;
    let mut implicit_n = false;
    if ashape.len() < 2 {
        implicit_m = true;
        ashape.insert(a_trans as usize, D::one());
    }
    if bshape.len() < 2 {
        implicit_n = true;
        bshape.insert(!b_trans as usize, D::one());
    }
    while ashape.len() < bshape.len() {
        ashape.insert(0, D::one());
    }
    while bshape.len() < ashape.len() {
        bshape.insert(0, D::one());
    }
    let c_bc_shape_prefix = tract_core::broadcast::multi_broadcast(&[
        &ashape[..(ashape.len() - 2)],
        &bshape[..(bshape.len() - 2)],
    ])?;
    let mut c_bc_shape: TVec<D> = c_bc_shape_prefix;
    let (mut m, mut ka) = (ashape[ashape.len() - 2].clone(), ashape[ashape.len() - 1].clone());
    let (mut kb, mut n) = (bshape[bshape.len() - 2].clone(), bshape[bshape.len() - 1].clone());
    if a_trans {
        std::mem::swap(&mut m, &mut ka);
    }
    if b_trans {
        std::mem::swap(&mut kb, &mut n);
    }
    if !ka.compatible_with(&kb) {
        bail!(
            "Inconsistent matmul: a: {} b: {}, a_trans: {} b_trans: {} c_trans: {}",
            ashape.iter().join(","),
            bshape.iter().join(","),
            a_trans,
            b_trans,
            c_trans
        );
    }
    let mut c_shape_final = c_bc_shape.clone();
    if c_trans {
        c_bc_shape.push(n.clone());
        c_bc_shape.push(m.clone());
        if !implicit_n {
            c_shape_final.push(n.clone());
        }
        if !implicit_m {
            c_shape_final.push(m.clone());
        }
    } else {
        c_bc_shape.push(m.clone());
        c_bc_shape.push(n.clone());
        if !implicit_m {
            c_shape_final.push(m.clone());
        }
        if !implicit_n {
            c_shape_final.push(n.clone());
        }
    }
    Ok((ashape, bshape, c_bc_shape, c_shape_final))
}


================================================
FILE: hir/src/ops/mod.rs
================================================
pub mod activations;
pub mod array;
pub mod binary;
pub mod cast;
pub mod cnn;
pub mod downsample;
pub mod dummy;
pub mod element_wise;
pub mod expandable;
pub mod identity;
pub mod konst;
pub mod logic;
pub use tract_core::ops::math;
pub mod matmul;
pub mod nn;
pub use tract_core::ops::quant;

pub mod scan;
pub mod source;
pub mod unimpl;


================================================
FILE: hir/src/ops/nn/global_pools.rs
================================================
use tract_core::ops::change_axes::wire_with_rank_broadcast;

use crate::infer::*;
use crate::internal::*;

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct GlobalAvgPool;

impl Expansion for GlobalAvgPool {
    fn name(&self) -> StaticName {
        "GlobalAvgPool".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(solver, inputs, outputs)
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = inputs[0];
        let input_fact = target.outlet_fact(input)?.clone();
        let axes = (2..input_fact.rank()).collect();
        let wire = target.wire_node(
            name.to_string() + ".sum",
            tract_core::ops::nn::Reduce::new(axes, tract_core::ops::nn::Reducer::Sum),
            &[input],
        )?;
        let div = tensor0(input_fact.shape.iter().skip(2).product::<TDim>());
        let div = target.add_const(format!("{name}.div"), div)?;
        let div = target.wire_node(
            format!("{name}.casted"),
            tract_core::ops::cast::cast(input_fact.datum_type),
            &[div],
        )?;
        wire_with_rank_broadcast(
            format!("{name}.norm"),
            target,
            tract_core::ops::math::div(),
            &[wire[0], div[0]],
        )
    }
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct GlobalLpPool(usize);

impl Expansion for GlobalLpPool {
    fn name(&self) -> StaticName {
        format!("GlobalL{}Pool", self.0).into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(solver, inputs, outputs)
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = inputs[0];
        let input_fact = target.outlet_fact(input)?.clone();
        let axes = (2..input_fact.rank()).collect();
        let mut wire = tvec!(input);
        if self.0 == 2 {
            wire = target.wire_node(
                name.to_string() + ".sqr",
                tract_core::ops::math::square(),
                &wire,
            )?;
        } else {
            let pow = tensor0(self.0 as f64)
                .cast_to_dt(input_fact.datum_type)?
                .into_owned()
                .broadcast_into_rank(input_fact.rank())?
                .into_arc_tensor();
            let pow = target.add_const(name.to_string() + ".pow.cst", pow)?;
            wire = target.wire_node(
                name.to_string() + ".pow",
                tract_core::ops::math::pow(),
                &[wire[0], pow],
            )?;
        }
        wire = target.wire_node(
            name.to_string() + ".sum",
            tract_core::ops::nn::Reduce::new(axes, tract_core::ops::nn::Reducer::Sum),
            &wire,
        )?;
        let div = tensor0(input_fact.shape.iter().skip(2).product::<TDim>().to_i64()? as f64)
            .cast_to_dt(input_fact.datum_type)?
            .into_owned()
            .broadcast_into_rank(input_fact.rank())?;
        let div = target.add_const(name.to_string() + ".div", div)?;
        wire = target.wire_node(
            name.to_string() + ".norm",
            tract_core::ops::math::div(),
            &[wire[0], div],
        )?;
        if self.0 == 2 {
            wire = target.wire_node(
                name.to_string() + ".sqrt",
                tract_core::ops::math::sqrt(),
                &wire,
            )?;
        } else {
            let anti_pow = tensor0((self.0 as f64).recip())
                .cast_to_dt(input_fact.datum_type)?
                .into_owned()
                .broadcast_into_rank(input_fact.rank())?
                .into_arc_tensor();
            let anti_pow = target.add_const(name.to_string() + ".anti_pow", anti_pow)?;
            wire = target.wire_node(
                name.to_string() + ".antipow",
                tract_core::ops::math::pow(),
                &[wire[0], anti_pow],
            )?;
        }
        Ok(wire)
    }
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct GlobalMaxPool;

impl Expansion for GlobalMaxPool {
    fn name(&self) -> StaticName {
        "GlobalMaxPool".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(solver, inputs, outputs)
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = inputs[0];
        let input_fact = target.outlet_fact(input)?.clone();
        let axes = (2..input_fact.rank()).collect();
        target.wire_node(
            name.to_string() + ".max",
            tract_core::ops::nn::Reduce::new(axes, tract_core::ops::nn::Reducer::Max),
            &[input],
        )
    }
}

fn rules<'r, 'p: 'r, 's: 'r>(
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    check_input_arity(inputs, 1)?;
    check_output_arity(outputs, 1)?;
    s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
    s.equals(&outputs[0].rank, &inputs[0].rank)?;
    s.equals(&outputs[0].shape[0], &inputs[0].shape[0])?;
    s.equals(&outputs[0].shape[1], &inputs[0].shape[1])?;
    s.given(&inputs[0].rank, move |s, rank| {
        for i in 2..rank {
            s.equals(&outputs[0].shape[i as usize], TDim::from(1))?;
        }
        Ok(())
    })
}


================================================
FILE: hir/src/ops/nn/layer_max.rs
================================================
use tract_core::ops::nn::Softmax;

use crate::infer::*;
use crate::internal::*;

// TODO tricky to re-express in "core" because of the multiple hot point... do
// we need one more reduce ?
#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct LayerHardmax {
    axis: isize,
    coerce_to_2d: bool,
}

impl Expansion for LayerHardmax {
    fn name(&self) -> StaticName {
        "LayerHardmax".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(solver, inputs, outputs)
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_core::ops::{array, change_axes, nn};
        let input = inputs[0];
        let input_fact = target.outlet_fact(input)?.clone();
        let input_dt = input_fact.datum_type;
        let rank = input_fact.rank();
        let axis = if self.axis < 0 { rank as isize + self.axis } else { self.axis } as usize;
        let suffix_dim: TDim = input_fact.shape[axis..].iter().product();
        let dim = if self.coerce_to_2d {
            suffix_dim.to_usize()
        } else {
            input_fact.shape[axis].to_usize()
        }
        .context("Assumes known dimension on working axes suffix.")?;
        let off = tensor0(0f32).cast_to_dt(input_dt)?.into_owned().into_arc_tensor();
        let on = tensor0(1f32).cast_to_dt(input_dt)?.into_owned().into_arc_tensor();
        let mut wires = inputs.into();
        if self.coerce_to_2d {
            wires = target.wire_node(
                format!("{name}.reshaped"),
                AxisOp::Reshape(axis, input_fact.shape[axis..].into(), tvec!(suffix_dim.clone())),
                &[input],
            )?;
        }
        wires = target.wire_node(
            format!("{name}.argmax"),
            nn::Reduce::new(tvec!(axis), nn::Reducer::ArgMax(false)),
            &wires,
        )?;
        wires =
            target.wire_node(format!("{name}.rm_axis"), change_axes::AxisOp::Rm(axis), &wires)?;
        wires = target.wire_node(
            format!("{name}.hardmax"),
            array::OneHot { axis, dim, off, on },
            &wires,
        )?;
        if self.coerce_to_2d {
            wires = target.wire_node(
                format!("{name}.hardmax_reshaped"),
                AxisOp::Reshape(axis, tvec!(suffix_dim), input_fact.shape[axis..].into()),
                &wires,
            )?;
        }
        Ok(wires)
    }
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct LayerLogSoftmax {
    pub axis: isize,
    pub coerce_to_2d: bool,
}

impl Expansion for LayerLogSoftmax {
    fn name(&self) -> StaticName {
        "LayerLogSoftmax".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(solver, inputs, outputs)
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let softmax = LayerSoftmax { axis: self.axis, coerce_to_2d: self.coerce_to_2d }
            .wire(name, target, inputs)?;
        target.wire_node(format!("{name}.logsoftmax"), tract_core::ops::math::ln(), &softmax)
    }
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct LayerSoftmax {
    axis: isize,
    coerce_to_2d: bool,
}

impl Expansion for LayerSoftmax {
    fn name(&self) -> StaticName {
        "LayerSoftmax".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        rules(solver, inputs, outputs)
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = inputs[0];
        let rank = target.outlet_fact(input)?.rank();
        let dt = target.outlet_fact(input)?.datum_type;
        let axis = if self.axis < 0 { rank as isize + self.axis } else { self.axis } as usize;
        let axes =
            if self.coerce_to_2d { (axis..rank).collect::<TVec<usize>>() } else { tvec!(axis) };
        let quant_output_dt = if dt.is_float() { None } else { Some(dt) };
        target.wire_node(name, Softmax { axes, quant_output_dt, ..Softmax::default() }, inputs)
    }
}

fn rules<'r, 'p: 'r, 's: 'r>(
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    check_output_arity(outputs, 1)?;
    s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
    s.equals(&outputs[0].rank, &inputs[0].rank)?;
    s.equals(&outputs[0].shape, &inputs[0].shape)?;
    Ok(())
}


================================================
FILE: hir/src/ops/nn/mod.rs
================================================
pub mod global_pools;
pub mod layer_max;
pub mod reduce;
pub mod softmax;

pub use global_pools::*;
pub use layer_max::*;
pub use reduce::{Reduce, Reducer};
pub use softmax::Softmax;

pub use tract_core::ops::nn::{DataFormat, hard_swish, sigmoid};


================================================
FILE: hir/src/ops/nn/reduce.rs
================================================
use crate::internal::*;

use tract_core::ops::nn::Reduce as TReduce;
use tract_core::ops::nn::Reducer as TReducer;

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Reducer {
    ArgMax(bool), // take last
    ArgMin(bool),
    L1,
    L2,
    LogSum,
    LogSumExp,
    Max,
    Mean,
    Min,
    Prod,
    Sum,
    SumSquare,
}

impl Reducer {
    pub fn wire(
        &self,
        axes: TVec<usize>,
        name: &str,
        target: &mut TypedModel,
        mut wire: OutletId,
    ) -> TractResult<OutletId> {
        use Reducer::*;
        use tract_core::ops::math;
        match self {
            ArgMax(last) => {
                wire =
                    target.wire_node(name, TReduce::new(axes, TReducer::ArgMax(*last)), &[wire])?[0]
            }
            ArgMin(last) => {
                wire =
                    target.wire_node(name, TReduce::new(axes, TReducer::ArgMin(*last)), &[wire])?[0]
            }
            Max => wire = target.wire_node(name, TReduce::new(axes, TReducer::Max), &[wire])?[0],
            Min => wire = target.wire_node(name, TReduce::new(axes, TReducer::Min), &[wire])?[0],
            Sum => wire = target.wire_node(name, TReduce::new(axes, TReducer::Sum), &[wire])?[0],
            Prod => wire = target.wire_node(name, TReduce::new(axes, TReducer::Prod), &[wire])?[0],

            L1 => {
                wire = target.wire_node(format!("{name}.abs"), math::abs(), &[wire])?[0];
                wire = target.wire_node(
                    format!("{name}.sum"),
                    TReduce::new(axes, TReducer::Sum),
                    &[wire],
                )?[0];
            }
            L2 => {
                wire = target.wire_node(format!("{name}.sq"), math::square(), &[wire])?[0];
                wire = target.wire_node(
                    format!("{name}.sum"),
                    TReduce::new(axes, TReducer::Sum),
                    &[wire],
                )?[0];
                wire = target.wire_node(format!("{name}.sqrt"), math::sqrt(), &[wire])?[0];
            }
            LogSum => {
                wire = target.wire_node(
                    format!("{name}.sum"),
                    TReduce::new(axes, TReducer::Sum),
                    &[wire],
                )?[0];
                wire = target.wire_node(format!("{name}.ln"), math::ln(), &[wire])?[0];
            }
            LogSumExp => {
                wire = target.wire_node(format!("{name}.exp"), math::exp(), &[wire])?[0];
                wire = target.wire_node(
                    format!("{name}.sum"),
                    TReduce::new(axes, TReducer::Sum),
                    &[wire],
                )?[0];
                wire = target.wire_node(format!("{name}.ln"), math::ln(), &[wire])?[0];
            }
            SumSquare => {
                wire = target.wire_node(format!("{name}.sq"), math::square(), &[wire])?[0];
                wire = target.wire_node(
                    name.to_string() + ".sum",
                    TReduce::new(axes, TReducer::Sum),
                    &[wire],
                )?[0]
            }
            Mean => {
                let fact = target.outlet_fact(wire)?.clone();
                wire = target.wire_node(
                    name.to_string() + ".sum",
                    TReduce::new(axes.clone(), TReducer::Sum),
                    &[wire],
                )?[0];
                let size: TDim = axes.iter().map(|ax| &fact.shape[*ax]).product();
                let size = tensor0(size).broadcast_into_rank(fact.rank())?;
                let size = target.add_const(name.to_string() + ".size", size)?;
                let size = target.wire_node(
                    name.to_string() + ".cast",
                    tract_core::ops::cast::cast(fact.datum_type),
                    &[size],
                )?[0];
                wire = target.wire_node(name.to_string() + ".norm", math::div(), &[wire, size])?[0];
            }
        };
        Ok(wire)
    }
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct Reduce {
    pub axes: Option<Vec<i64>>,
    pub keep_dims: bool,
    pub reducer: Reducer,
}

impl Reduce {
    pub fn must_reduce(&self, ax: usize, rank: usize) -> bool {
        let resolved_axes: Option<Vec<usize>> = match &self.axes {
            None => None,
            Some(original_axes) => {
                let mut ans: Vec<usize> = vec![];
                for or_ax in original_axes.iter() {
                    ans.push(Self::resolve_axis(*or_ax, rank).unwrap());
                }
                Some(ans)
            }
        };

        resolved_axes.as_ref().map(|axes| axes.contains(&ax)).unwrap_or(true)
    }

    pub fn output_shape(&self, shape: &[TDim]) -> TVec<TDim> {
        shape
            .iter()
            .enumerate()
            .filter_map(|(ix, d)| {
                if self.must_reduce(ix, shape.len()) {
                    if self.keep_dims { Some(1.to_dim()) } else { None }
                } else {
                    Some(d.clone())
                }
            })
            .collect()
    }

    fn resolve_axis(axis: i64, rank: usize) -> TractResult<usize> {
        if 0 <= axis && axis < rank as i64 {
            Ok(axis as usize)
        } else if -(rank as i64) <= axis && axis < 0 {
            Ok((axis + rank as i64) as usize)
        } else {
            bail!("Illegal combination of values for rank and axis: {} and {}", rank, axis)
        }
    }

    fn resolve_axes(&self, input_rank: usize) -> TractResult<TVec<usize>> {
        let mut axes: TVec<usize> = match self.axes.as_ref() {
            None => Ok((0..input_rank).collect()),
            Some(axis) => axis.iter().map(|&a| Self::resolve_axis(a, input_rank)).collect(),
        }?;
        axes.sort();
        Ok(axes)
    }
}

impl Expansion for Reduce {
    fn name(&self) -> StaticName {
        format!("Reduce<{:?}>", self.reducer).into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axes: {:?} keep_dims: {}", self.axes, self.keep_dims)])
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        if let Reducer::ArgMax(_) | Reducer::ArgMin(_) = self.reducer {
            s.equals(&outputs[0].datum_type, DatumType::I64)?;
        } else {
            s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        }
        if self.keep_dims {
            s.equals(&inputs[0].rank, &outputs[0].rank)?;
        } else if let Some(axes) = self.axes.as_ref() {
            s.equals(inputs[0].rank.bex() - axes.len() as i64, &outputs[0].rank)?;
        } else {
            s.equals(&outputs[0].rank, 0)?;
        }
        s.given(&inputs[0].shape, move |s, shape| {
            let out_shape = self.output_shape(&shape);
            s.equals(&outputs[0].shape, out_shape)
        })
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut wire = inputs[0];
        let fact = target.outlet_fact(wire)?.clone();
        let mut axes = self.resolve_axes(fact.rank())?;
        axes.sort();
        if fact.datum_type == TDim::datum_type() {
            wire = target.wire_node(
                format!("{name}.cast_from_tdim"),
                tract_core::ops::cast::cast(i64::datum_type()),
                &[wire],
            )?[0];
        }
        wire = self.reducer.wire(axes.clone(), name, target, wire).context("wiring reducer")?;
        if fact.datum_type == TDim::datum_type() {
            wire = target.wire_node(
                format!("{name}.cast_to_tdim"),
                tract_core::ops::cast::cast(TDim::datum_type()),
                &[wire],
            )?[0];
        }
        if !self.keep_dims {
            for axis in axes.into_iter().rev() {
                wire = target.wire_node(
                    format!("{name}-dispose-dims-{axis}"),
                    AxisOp::Rm(axis),
                    &[wire],
                )?[0];
            }
        }
        Ok(tvec!(wire))
    }
}


================================================
FILE: hir/src/ops/nn/softmax.rs
================================================
use crate::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Softmax {
    axis: isize,
}

impl Expansion for Softmax {
    fn name(&self) -> StaticName {
        "Softmax".into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {:?}", self.axis)])
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;

        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let axis = if self.axis < 0 {
            (target.outlet_fact(inputs[0])?.rank() as isize + self.axis) as usize
        } else {
            self.axis as usize
        };

        let input = target.outlet_fact(inputs[0])?.clone();
        let input_dt = input.datum_type;
        let quant_output_dt = if input_dt.is_quantized() {
            // Quantization parameters are not specified in ONNX (v13) so we set this value as default
            // in order to maximize the precision of the output.
            Some(DatumType::QU8(QParams::ZpScale { zero_point: 0, scale: 0.0078125 }))
        } else {
            None
        };

        target.wire_node(
            name,
            tract_core::ops::nn::Softmax {
                axes: tvec![axis],
                quant_output_dt,
                ..tract_core::ops::nn::Softmax::default()
            },
            inputs,
        )
    }
}


================================================
FILE: hir/src/ops/quant.rs
================================================


================================================
FILE: hir/src/ops/scan.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::scan::Scan;
use tract_core::ops::scan::ScanInfo;
pub use tract_core::ops::scan::{InputMapping, OutputMapping};

#[derive(Debug, Clone, new, Default)]
pub struct InferenceScan {
    pub body: InferenceModel,
    pub input_mapping: Vec<InputMapping>,
    pub output_mapping: Vec<OutputMapping<TDim>>,
    pub clean_scan_counts: bool,
    pub iter_count_fact: GenericFactoid<TDim>,
}

impl PartialEq for InferenceScan {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
}
impl Eq for InferenceScan {}

impl Op for InferenceScan {
    fn name(&self) -> StaticName {
        "Scan".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut lines = vec![];
        for (ix, im) in self.input_mapping.iter().enumerate() {
            lines.push(format!("Model input  #{ix}: {im:?}"));
        }
        for (ix, om) in self.output_mapping.iter().enumerate() {
            lines.push(format!("Model output #{ix}: {om:?}"));
        }
        Ok(lines)
    }

    not_a_typed_op!();
}

impl EvalOp for InferenceScan {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        self.to_mir_scan()?.state(session, node_id)
    }
}

impl InferenceScan {
    pub(super) fn to_mir_scan(&self) -> TractResult<Box<Scan>> {
        let typed_model = self.body.clone().into_typed()?;
        let input_mapping = self
            .input_mapping
            .iter()
            .enumerate()
            .map(|(ix, im)| {
                Ok(match im {
                    InputMapping::Scan(info) => InputMapping::Scan(ScanInfo {
                        chunk: typed_model.input_fact(ix)?.shape[info.axis].to_isize()?,
                        ..*info
                    }),
                    other => other.clone(),
                })
            })
            .collect::<TractResult<_>>()?;
        let output_mapping = self
            .output_mapping
            .iter()
            .enumerate()
            .map(|(ix, im)| {
                let scan = if let Some((slot, scan)) = im.scan {
                    Some((
                        slot,
                        ScanInfo {
                            chunk: typed_model.input_fact(ix)?.shape[scan.axis].to_isize()?,
                            ..scan
                        },
                    ))
                } else {
                    None
                };
                Ok(OutputMapping {
                    state: im.state,
                    scan,
                    full_dim_hint: im.full_dim_hint.clone(),
                    last_value_slot: im.last_value_slot,
                })
            })
            .collect::<TractResult<_>>()?;
        Ok(Box::new(Scan::new(typed_model, input_mapping, output_mapping, 0)?))
    }

    fn unify_scanning_tensor_fact(
        outer: &mut InferenceFact,
        inner: &mut InferenceFact,
        outer_scan_axis: usize,
    ) -> TractResult<bool> {
        let mut changed = outer.datum_type.unify_with_mut(&mut inner.datum_type)?;
        let rank = outer
            .shape
            .rank()
            .concretize()
            .or_else(|| inner.shape.rank().concretize())
            .map(|r| r as usize);
        if let Some(rank) = rank {
            if outer.shape.unify_with(&ShapeFactoid::closed(tvec!(GenericFactoid::Any; rank)))? {
                changed = true;
            }
            if inner.shape.unify_with(&ShapeFactoid::closed(tvec!(GenericFactoid::Any; rank)))? {
                changed = true;
            }
            for axis in 0..rank {
                if axis != outer_scan_axis {
                    let value = outer
                        .shape
                        .dim(axis)
                        .unwrap()
                        .concretize()
                        .or_else(|| inner.shape.dim(axis).unwrap().concretize());
                    if let Some(value) = value {
                        if outer.shape.set_dim(axis, value.clone()) {
                            changed = true
                        }
                        if inner.shape.set_dim(axis, value) {
                            changed = true
                        }
                    }
                }
            }
        }
        Ok(changed)
    }

    fn unify_facts(
        &mut self,
        inputs: &mut [InferenceFact],
        outputs: &mut [InferenceFact],
    ) -> TractResult<bool> {
        let mut changed = false;
        let hidden_state_len = self.input_mapping.iter().filter(|m| m.is_state()).count();
        #[allow(clippy::needless_range_loop)]
        for state_ix in 0..hidden_state_len {
            trace!("Unify hidden state #{state_ix}");
            let inner_model_output_ix = self
                .output_mapping
                .iter()
                .enumerate()
                .filter(|(_ix, map)| map.state)
                .nth(state_ix)
                .unwrap()
                .0;
            let mut facts = self.body.outlets_fact_mut(&[
                self.body.input_outlets()?[state_ix],
                self.body.output_outlets()?[inner_model_output_ix],
            ])?;
            facts.push(&mut inputs[state_ix]);
            if Factoid::unify_all(
                &mut facts.iter_mut().map(|f| &mut f.datum_type).collect::<TVec<_>>(),
            )? {
                changed = true;
            }
            if Factoid::unify_all(&mut facts.iter_mut().map(|f| &mut f.shape).collect::<TVec<_>>())?
            {
                changed = true;
            }
        }
        for (slot, i) in self.input_mapping.iter().enumerate() {
            match i {
                InputMapping::State => {}
                InputMapping::Full => {
                    if inputs[slot].unify_with_mut(self.body.input_fact_mut(slot)?)? {
                        changed = true;
                    }
                }
                InputMapping::Scan(scan) => {
                    let incoming = &mut inputs[slot];
                    let inner = self.body.input_fact_mut(slot)?;
                    if Self::unify_scanning_tensor_fact(incoming, inner, scan.axis)? {
                        changed = true;
                    };
                    if self.clean_scan_counts {
                        if incoming.shape.ensure_rank_at_least(scan.axis) {
                            changed = true;
                        }
                        let value =
                            self.iter_count_fact.unify(&incoming.shape.dim(scan.axis).unwrap())?;
                        if self.iter_count_fact != value {
                            changed = true;
                            self.iter_count_fact = value.clone();
                        }
                        if incoming.shape.dim(scan.axis).unwrap() != value {
                            changed = true;
                            incoming.shape.set_dim(scan.axis, value.concretize().unwrap());
                        }
                    }
                }
            }
        }
        for (ix, i) in self.output_mapping.iter().enumerate() {
            if let Some((slot, scan)) = i.scan {
                let outgoing = &mut outputs[slot];
                let inner = self.body.output_fact_mut(ix)?;
                if Self::unify_scanning_tensor_fact(outgoing, inner, scan.axis)? {
                    changed = true
                }
                if self.clean_scan_counts {
                    if outgoing.shape.ensure_rank_at_least(scan.axis) {
                        changed = true;
                    }
                    let value =
                        self.iter_count_fact.unify(&outgoing.shape.dim(scan.axis).unwrap())?;
                    if self.iter_count_fact != value {
                        changed = true;
                        self.iter_count_fact = value.clone();
                    }
                    if outgoing.shape.dim(scan.axis).unwrap() != value {
                        changed = true;
                        outgoing.shape.set_dim(scan.axis, value.concretize().unwrap());
                    }
                }
            }
            if let Some(slot) = i.last_value_slot {
                if outputs[slot].unify_with(self.body.output_fact_mut(ix)?)? {
                    changed = true;
                }
            }
        }
        Ok(changed)
    }
}

impl InferenceOp for InferenceScan {
    fn infer_facts(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
        _observed: TVec<&InferenceFact>,
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>, TVec<InferenceFact>)> {
        let body_inputs = self.body.input_outlets()?.len();
        let body_outputs = self.body.output_outlets()?.len();
        let expected_op_inputs = self.input_mapping.len();
        let expected_op_outputs = self
            .output_mapping
            .iter()
            .filter_map(|om| om.last_value_slot)
            .chain(self.output_mapping.iter().filter_map(|om| om.scan.map(|si| si.0)))
            .max()
            .context("No output slot found")?
            + 1;
        if inputs.len() != expected_op_inputs {
            bail!("Scan receives {} inputs, mappings expects {}", inputs.len(), expected_op_inputs)
        }
        if body_inputs != self.input_mapping.len() {
            bail!(
                "Scan body expect {} inputs, mappings expects {}",
                body_inputs,
                self.input_mapping.len()
            )
        }
        if outputs.len() != expected_op_outputs {
            bail!("Scan has {} outputs, mappings expects {}", outputs.len(), expected_op_outputs);
        }
        if body_outputs != self.output_mapping.len() {
            bail!(
                "Scan body expect {} outputs, mappings expects {}",
                body_outputs,
                self.output_mapping.len()
            )
        }
        let mut inputs: TVec<InferenceFact> = inputs.into_iter().cloned().collect();
        let mut outputs: TVec<InferenceFact> = outputs.into_iter().cloned().collect();
        loop {
            trace!("Unify inner and outer interface");
            let mut changed = self.unify_facts(&mut inputs, &mut outputs)?;
            trace!("iters: {:?} changed: {:?}", self.iter_count_fact, changed);
            for (ix, input) in self.body.input_outlets()?.iter().enumerate() {
                trace!("  Input inner model: {} {:?} {:?}", ix, input, self.body.input_fact(ix));
            }
            for (ix, output) in self.body.output_outlets()?.iter().enumerate() {
                trace!("  Output inner model: {} {:?} {:?}", ix, output, self.body.output_fact(ix));
            }
            trace!("Inner model analyse");
            if self.body.analyse(false).context("analysing inner model")? {
                changed = true;
            }
            if !changed {
                break;
            }
            trace!("Finished inner model analyse");
        }
        Ok((inputs, outputs, tvec!()))
    }

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let inputs = node.inputs.iter().map(|m| mapping[m]).collect::<TVec<_>>();
        target.wire_node(&*node.name, self.to_mir_scan()? as Box<dyn TypedOp>, &inputs)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(self.output_mapping.iter().filter(|om| !om.invisible()).count())
    }

    as_op!();
}


================================================
FILE: hir/src/ops/source.rs
================================================
use crate::infer::*;
use crate::internal::*;

use tract_core::ops::source::{SourceState, TypedSource};

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Source;

impl Op for Source {
    fn name(&self) -> StaticName {
        "Source".into()
    }

    not_a_typed_op!();
}

impl EvalOp for Source {
    fn is_stateless(&self) -> bool {
        false
    }
    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(SourceState(node_id))))
    }
}

impl InferenceRulesOp for Source {
    /// Registers the inference rules of the operator.
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        _s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 0)?;
        check_output_arity(outputs, 1)?;
        Ok(())
    }

    as_op!();

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        _mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let Ok(fact) = TypedFact::try_from(&node.outputs[0].fact) {
            target.wire_node(&*node.name, TypedSource::new(fact), &[])
        } else {
            bail!(
                "Source node without a determined fact. Help: provide explicit input facts to your model."
            )
        }
    }
}


================================================
FILE: hir/src/ops/unimpl.rs
================================================
use crate::infer::*;
use crate::internal::*;

pub use tract_core::ops::unimpl::UnimplementedOp;

impl InferenceRulesOp for UnimplementedOp {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        _: &mut Solver<'r>,
        _: &'p [TensorProxy],
        _: &'p [TensorProxy],
    ) -> InferenceResult {
        Ok(())
    }

    as_op!();

    fn to_typed(
        &self,
        _source: &InferenceModel,
        _node: &InferenceNode,
        _target: &mut TypedModel,
        _mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        bail!("Operator can not be made a TypedOp.")
    }
}


================================================
FILE: libcli/Cargo.toml
================================================
[package]
name = "tract-libcli"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = ["TensorFlow", "NeuralNetworks"]
categories = ["science"]
edition = "2024"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
box_drawing.workspace = true
clap.workspace = true
colorous.workspace = true
lazy_static.workspace = true
log.workspace = true
ndarray-npy.workspace = true
nu-ansi-term.workspace = true
py_literal.workspace = true
rand.workspace = true
serde.workspace = true
serde_json.workspace = true
tract-core.workspace = true
tract-hir.workspace = true
tract-onnx = { workspace = true, optional = true }
tract-tflite.workspace = true
tract-gpu.workspace = true
tract-transformers = { workspace = true, optional = true }

[target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies]
tract-metal = { workspace = true }

[target.'cfg(any(target_os = "linux", target_os = "windows"))'.dependencies]
cudarc.workspace = true
tract-cuda.workspace = true

[features]
default = ["transformers"]
# hir = ["tract-hir"]
hir = []
onnx = ["tract-onnx"]
complex = ["tract-core/complex"]
transformers = ["tract-transformers"]


================================================
FILE: libcli/src/annotations.rs
================================================
use nu_ansi_term::Style;
use std::collections::HashMap;
#[allow(unused_imports)]
use std::convert::TryFrom;
use std::time::Duration;
use tract_core::internal::*;
use tract_core::ops::scan::Scan;
use tract_itertools::Itertools;
use tract_itertools::izip;

use crate::model::Model;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct NodeQId(pub TVec<(usize, String)>, pub usize);

impl From<usize> for NodeQId {
    fn from(id: usize) -> NodeQId {
        NodeQId(tvec!(), id)
    }
}

impl NodeQId {
    pub fn model<'a>(&self, model: &'a dyn Model) -> Option<&'a dyn Model> {
        fn scope<'a>(path: &[(usize, String)], model: &'a dyn Model) -> Option<&'a dyn Model> {
            if path.is_empty() {
                Some(model)
            } else {
                model
                    .nested_models(path[0].0)
                    .into_iter()
                    .find(|(name, _sub)| name == &path[0].1)
                    .map(|(_, sub)| sub)
            }
        }
        scope(&self.0, model)
    }
}

#[derive(Debug, Default, Clone)]
pub struct NodeTags {
    pub cost: Vec<(Cost, TDim)>,
    pub tmp_mem_usage: Option<TDim>,
    pub style: Option<Style>,
    pub labels: Vec<String>,
    pub sections: Vec<Vec<String>>,
    pub profile: Option<Duration>,
    pub accelerator_profile: Option<Duration>,
    pub model_input: Option<String>,
    pub model_output: Option<String>,
    pub outlet_labels: Vec<Vec<String>>,
    pub outlet_axes: Vec<Vec<String>>,
}

impl<'a> std::ops::Add<&'a NodeTags> for &'a NodeTags {
    type Output = NodeTags;
    fn add(self, other: &'a NodeTags) -> NodeTags {
        let cost = self
            .cost
            .iter()
            .chain(other.cost.iter())
            .sorted_by_key(|(a, _)| a)
            .chunk_by(|(a, _)| a)
            .into_iter()
            .map(|(cost, dims)| {
                (cost.clone(), dims.into_iter().fold(0.to_dim(), |acc, d| acc + &d.1))
            })
            .collect::<Vec<(Cost, TDim)>>();

        let tmp_mem_usage = match (self.tmp_mem_usage.clone(), other.tmp_mem_usage.clone()) {
            (Some(self_mem), Some(other_mem)) => Some(self_mem + other_mem),
            (_, Some(mem)) | (Some(mem), _) => Some(mem),
            (None, None) => None,
        };

        let profile = self.profile.unwrap_or_default() + other.profile.unwrap_or_default();
        let profile = if profile != Duration::default() { Some(profile) } else { None };
        let accelerator_profile = self.accelerator_profile.unwrap_or_default()
            + other.accelerator_profile.unwrap_or_default();
        let accelerator_profile = if accelerator_profile != Duration::default() {
            Some(accelerator_profile)
        } else {
            None
        };

        let style = self.style.or(other.style);
        let labels = self.labels.iter().chain(other.labels.iter()).cloned().collect();
        let sections = self.sections.iter().chain(other.sections.iter()).cloned().collect();
        let model_input = self.model_input.clone().or_else(|| other.model_input.clone());
        let model_output = self.model_output.clone().or_else(|| other.model_output.clone());
        let outlet_labels = izip!(&self.outlet_labels, &other.outlet_labels)
            .map(|(s, o)| s.iter().chain(o.iter()).cloned().collect())
            .collect();
        let outlet_axes = izip!(&self.outlet_axes, &other.outlet_axes)
            .map(|(s, o)| s.iter().chain(o.iter()).cloned().collect())
            .collect();
        NodeTags {
            cost,
            tmp_mem_usage,
            profile,
            accelerator_profile,
            style,
            labels,
            sections,
            model_input,
            model_output,
            outlet_labels,
            outlet_axes,
        }
    }
}

impl<'a> std::iter::Sum<&'a NodeTags> for NodeTags {
    fn sum<I>(iter: I) -> NodeTags
    where
        I: std::iter::Iterator<Item = &'a NodeTags>,
    {
        iter.fold(EMPTY, |a, b| &a + b)
    }
}

const EMPTY: NodeTags = NodeTags {
    cost: Vec::new(),
    tmp_mem_usage: None,
    style: None,
    labels: Vec::new(),
    sections: Vec::new(),
    profile: None,
    accelerator_profile: None,
    model_output: None,
    model_input: None,
    outlet_labels: Vec::new(),
    outlet_axes: Vec::new(),
};

#[derive(Debug, Clone, Default)]
pub struct Annotations {
    pub tags: HashMap<NodeQId, NodeTags>,
    pub profile_summary: Option<ProfileSummary>,
    pub memory_summary: Option<MemorySummary>,
}

impl Annotations {
    pub fn node_mut(&mut self, qid: NodeQId) -> &mut NodeTags {
        self.tags.entry(qid).or_default()
    }

    pub fn track_tmp_memory_usage<Flushable>(
        &mut self,
        model: &dyn Model,
        flushable: Flushable,
        skip_order_opt_ram: bool,
    ) -> TractResult<()>
    where
        Flushable: Fn(&TypedNode) -> bool,
    {
        let Some(model) = model.downcast_ref::<TypedModel>() else { return Ok(()) };
        let order = if skip_order_opt_ram {
            tract_core::model::order::eval_order(model)?
        } else {
            tract_core::model::order::eval_order_opt_ram(model)?
        };

        let tmp_mem_usage = model.eval_tmp_memory_usage(&order, &flushable)?;

        let peak_tmp_mem_usage = tmp_mem_usage
            .iter()
            .map(|(n, mem)| mem.to_usize().map(|m| (*n, m)))
            .collect::<TractResult<TVec<_>>>()
            .ok()
            .and_then(|mems| {
                mems.into_iter().map(|(n, mem)| (NodeQId(tvec![], n), mem)).max_by_key(|it| it.1)
            });

        self.memory_summary =
            peak_tmp_mem_usage.map(|(n, mem)| MemorySummary { max: mem, max_reached_by_node: n });

        for (n, mem_size) in tmp_mem_usage.into_iter() {
            let qid = NodeQId(tvec![], n);
            let tags = self.tags.entry(qid).or_default();
            tags.tmp_mem_usage = Some(mem_size.simplify());
        }
        Ok(())
    }

    pub fn track_axes(
        &mut self,
        model: &dyn Model,
        hints: &HashMap<OutletId, TVec<String>>,
    ) -> TractResult<()> {
        let Some(model) = model.downcast_ref::<TypedModel>() else { return Ok(()) };
        fn sub(
            annotations: &mut Annotations,
            prefix: &[(usize, String)],
            name_prefix: &str,
            model: &TypedModel,
            hints: &HashMap<OutletId, TVec<String>>,
        ) -> TractResult<()> {
            let tracking = tract_core::axes::full_axis_tracking(model)?;
            for (ix, axis) in tracking.iter().enumerate() {
                let name = axis
                    .creators
                    .iter()
                    .find_map(|cre| hints.get(cre).and_then(|hints| hints.get(axis.outlets[cre])))
                    .cloned()
                    .unwrap_or_else(|| format!("{name_prefix}x{ix}"));
                for outlet in axis.outlets.keys() {
                    let axis = axis.outlets[&outlet];
                    let qid = NodeQId(prefix.into(), outlet.node);
                    let tags = annotations.tags.entry(qid).or_default();
                    while tags.outlet_axes.len() <= outlet.slot {
                        tags.outlet_axes.push(vec![]);
                    }
                    while tags.outlet_axes[outlet.slot].len() <= axis {
                        tags.outlet_axes[outlet.slot].push(Default::default());
                    }
                    tags.outlet_axes[outlet.slot][axis].clone_from(&name);
                }
            }
            for node in &model.nodes {
                if let Some(scan) = node.op_as::<Scan>() {
                    let mut prefix: TVec<_> = prefix.into();
                    prefix.push((node.id, "loop".to_string()));
                    sub(
                        annotations,
                        &prefix,
                        &format!("{name_prefix}loop_"),
                        &scan.body,
                        &Default::default(),
                    )?;
                }
            }
            Ok(())
        }
        sub(self, &[], "", model, hints)
    }

    pub fn from_model(model: &dyn Model) -> TractResult<Annotations> {
        let mut annotations = Annotations::default();
        fn set_subio_labels(
            model: &dyn Model,
            prefix: &[(usize, String)],
            annotations: &mut Annotations,
        ) {
            for n in 0..model.nodes_len() {
                for output in 0..model.node_output_count(n) {
                    if let Some(label) = model.outlet_label((n, output).into()) {
                        let qid = NodeQId(prefix.into(), n);
                        annotations
                            .tags
                            .entry(qid.clone())
                            .or_default()
                            .outlet_labels
                            .resize(output + 1, vec![]);
                        annotations.tags.entry(qid).or_default().outlet_labels[output] =
                            vec![label.to_string()];
                    }
                }
                for (label, sub /*, ins, outs*/) in model.nested_models(n) {
                    let mut prefix: TVec<(usize, String)> = prefix.into();
                    prefix.push((n, label.to_string()));
                    set_subio_labels(sub, &prefix, annotations);
                    /*
                    ins.into_iter().enumerate().for_each(|(ix, i)| {
                    annotations.tags.entry(qid).or_default().model_input = Some(i);
                    });
                    outs.into_iter().enumerate().for_each(|(ix, o)| {
                    let qid = NodeQId(prefix.clone(), ix);
                    annotations.tags.entry(qid).or_default().model_output = Some(o);
                    });
                    */
                }
            }
        }
        set_subio_labels(model, &[], &mut annotations);
        Ok(annotations)
    }
}

#[derive(Debug, Clone)]
pub struct ProfileSummary {
    pub max: Duration,
    pub sum: Duration,
    pub accel_sum: Duration,
    pub entire: Duration,
    pub iters: usize,
}

#[derive(Debug, Clone)]
pub struct MemorySummary {
    pub max: usize,
    pub max_reached_by_node: NodeQId,
}


================================================
FILE: libcli/src/display_params.rs
================================================
use crate::model::Model;
use tract_core::prelude::*;

#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub enum Io {
    None,
    #[default]
    Short,
    Long,
}

#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct DisplayParams {
    pub konst: bool,
    pub invariants: bool,
    pub quiet: bool,
    pub natural_order: bool,
    pub opt_ram_order: bool,
    pub debug_op: bool,
    pub cost: bool,
    pub tmp_mem_usage: bool,
    pub profile: bool,
    pub folded: bool,
    pub node_ids: Option<Vec<TVec<(usize, String)>>>,
    pub op_name: Option<String>,
    pub node_name: Option<String>,
    pub expect_core: bool,
    pub outlet_labels: bool,
    pub io: Io,
    pub json: bool,
    pub info: bool,
    pub left_column_width: usize,
    pub mm: bool,
    pub summary: bool,
    pub audit_json: bool,
}

impl DisplayParams {
    pub fn filter(
        &self,
        model: &dyn Model,
        scope: &[(usize, String)],
        node_id: usize,
    ) -> TractResult<bool> {
        if let Some(nodes) = self.node_ids.as_ref() {
            return Ok(nodes.iter().any(|n| {
                n.len() == scope.len() + 1
                    && &n[0..scope.len()] == scope
                    && n.last().unwrap().0 == node_id
            }));
        }
        if let Some(node_name) = self.node_name.as_ref() {
            return Ok(model.node_name(node_id).starts_with(node_name));
        }
        if let Some(op_name) = self.op_name.as_ref() {
            return Ok(model.node_op_name(node_id).starts_with(op_name));
        }
        /*
        if let Some(successor) = self.successors {
        return Ok(model.node_inputs(node_id).iter().any(|i| i.node == successor));
        }
        */
        Ok(!model.node_const(node_id) || self.konst)
    }

    pub fn should_draw(&self) -> bool {
        !self.natural_order
    }

    pub fn order(&self, model: &dyn Model) -> TractResult<Vec<usize>> {
        if self.natural_order {
            Ok((0..model.nodes_len()).collect())
        } else if self.opt_ram_order {
            model.eval_order_opt_ram()
        } else {
            model.eval_order()
        }
    }
}


================================================
FILE: libcli/src/draw.rs
================================================
use crate::display_params::DisplayParams;
use crate::model::Model;
use box_drawing::heavy::*;
use nu_ansi_term::{Color, Style};
use std::collections::HashSet;
use std::fmt::Write;
use tract_core::internal::*;

/// A wire that is not rendered (const node output when konst=false).
#[derive(Clone, Debug)]
struct HiddenWire {
    successors: Vec<InletId>,
}

/// A wire that occupies a visual column.
#[derive(Clone, Debug)]
struct VisibleWire {
    outlet: OutletId,
    color: Style,
    successors: Vec<InletId>,
    should_change_color: bool,
}

/// White circled number for model inputs: ⓪①②...⑳
pub fn circled_input(ix: usize) -> char {
    match ix {
        0 => '⓪',
        1..=20 => char::from_u32(0x2460 + (ix as u32 - 1)).unwrap(),
        _ => '○',
    }
}

/// Filled circled number for model outputs: ⓿❶❷...❿
pub fn circled_output(ix: usize) -> char {
    match ix {
        0 => '⓿',
        1..=10 => char::from_u32(0x2776 + (ix as u32 - 1)).unwrap(),
        _ => '●',
    }
}

#[derive(Clone, Default)]
pub struct DrawingState {
    hidden: Vec<HiddenWire>,
    visible: Vec<VisibleWire>, // index = visual column
    latest_node_color: Style,
    visited: HashSet<usize>,
}

impl DrawingState {
    fn next_color(&self) -> Style {
        let colors = &[
            Color::Red.normal(),
            Color::Green.normal(),
            Color::Yellow.normal(),
            Color::Blue.normal(),
            Color::Purple.normal(),
            Color::Cyan.normal(),
            Color::White.normal(),
            Color::Red.bold(),
            Color::Green.bold(),
            Color::Yellow.bold(),
            Color::Blue.bold(),
            Color::Purple.bold(),
            Color::Cyan.bold(),
            Color::White.bold(),
        ];
        *colors
            .iter()
            .min_by_key(|&c| self.visible.iter().filter(|w| w.color == *c).count())
            .unwrap()
    }

    /// Number of visible wires that pass through (have successors to nodes other than `node`).
    fn passthrough_count(&self, node: usize) -> usize {
        self.visible.iter().filter(|w| w.successors.iter().any(|i| i.node != node)).count()
    }

    /// Color of the last visible wire, or the latest node color.
    pub fn last_wire_color(&self) -> Style {
        self.visible.last().map(|w| w.color).unwrap_or(self.latest_node_color)
    }

    /// Render a filler line: one ┃ per visible wire.
    fn render_filler(&self) -> String {
        let mut s = String::new();
        for w in &self.visible {
            let _ = write!(s, "{}", w.color.paint(VERTICAL));
        }
        s
    }

    pub fn draw_node_vprefix(
        &mut self,
        model: &dyn Model,
        node: usize,
        _opts: &DisplayParams,
    ) -> TractResult<Vec<String>> {
        let mut lines = vec![];

        // Prune wires whose only remaining successors are all already visited.
        self.visible.retain(|w| w.successors.iter().any(|i| !self.visited.contains(&i.node)));
        self.hidden.retain(|w| w.successors.iter().any(|i| !self.visited.contains(&i.node)));

        // Build target layout: passthroughs in current order, then visible inputs in input order.
        let inputs = model.node_inputs(node);
        let mut passthroughs: Vec<VisibleWire> = Vec::new();
        let mut input_wires: Vec<Option<VisibleWire>> = vec![None; inputs.len()];

        for w in &self.visible {
            // Check if this wire feeds any input of this node
            let mut matched_input = None;
            for (ix, &inlet) in inputs.iter().enumerate() {
                if w.outlet == inlet {
                    matched_input = Some(ix);
                    break;
                }
            }

            if let Some(ix) = matched_input {
                let this_inlet = InletId::new(node, ix);
                let must_clone = w.successors.iter().any(|i| *i != this_inlet);
                if must_clone {
                    // Wire feeds this node AND others: clone it.
                    // Original (with other successors) stays as passthrough.
                    let mut pass_wire = w.clone();
                    pass_wire.successors.retain(|i| *i != this_inlet);
                    passthroughs.push(pass_wire);
                    input_wires[ix] = Some(VisibleWire {
                        outlet: w.outlet,
                        color: w.color,
                        successors: vec![this_inlet],
                        should_change_color: true,
                    });
                } else {
                    // Wire feeds only this node: move entirely to input position.
                    input_wires[ix] = Some(w.clone());
                }
            } else {
                passthroughs.push(w.clone());
            }
        }

        // Target = passthroughs ++ visible input wires
        let pt = passthroughs.len();
        let mut target: Vec<VisibleWire> = passthroughs;
        for w in input_wires.iter().flatten() {
            target.push(w.clone());
        }

        // Build working state with empty slots for the input region.
        // Cols 0..pt are passthroughs (occupied), cols pt..target.len() start empty.
        let n_inputs_visible = input_wires.iter().filter(|w| w.is_some()).count();
        let total_cols = pt + n_inputs_visible;
        let mut slots: Vec<Option<VisibleWire>> = Vec::with_capacity(total_cols);
        for w in &self.visible {
            slots.push(Some(w.clone()));
        }
        while slots.len() < total_cols {
            slots.push(None); // empty reserved slots
        }

        // Process inputs right to left. For each input:
        // - Find the wire in `slots` (by outlet)
        // - Compute its target column in the final layout
        // - Render the routing line and update slots
        for (ix, &inlet) in inputs.iter().enumerate().rev() {
            let Some(ref input_wire) = input_wires[ix] else { continue };

            let target_col = target
                .iter()
                .position(|w| w.outlet == inlet && w.successors.iter().any(|i| i.node == node))
                .unwrap();

            let cur_col =
                match slots.iter().position(|s| s.as_ref().is_some_and(|w| w.outlet == inlet)) {
                    Some(c) => c,
                    None => continue,
                };

            let must_clone = input_wire.should_change_color; // proxy: cloned wires have this set

            if cur_col == target_col && !must_clone {
                continue;
            }

            // Render the routing line from cur_col to target_col.
            let mut s = String::new();
            let color = slots[cur_col].as_ref().unwrap().color;
            let from = cur_col.min(target_col);
            let to = cur_col.max(target_col);

            // Leading verticals (cols before the leftmost endpoint)
            for w in slots[..from].iter().flatten() {
                let _ = write!(s, "{}", w.color.paint(VERTICAL));
            }

            if must_clone {
                // Split: ┣ at cur_col, horizontals in between, ┓ at target_col
                let _ = write!(s, "{}", color.paint(VERTICAL_RIGHT));
            } else {
                // Swap: ┗ at cur_col, horizontals in between, ┓ at target_col
                let _ = write!(s, "{}", color.paint(UP_RIGHT));
            }
            for _ in from + 1..to {
                let _ = write!(s, "{}", color.paint(HORIZONTAL));
            }
            let _ = write!(s, "{}", color.paint(DOWN_LEFT));

            // Trailing verticals (cols after the rightmost endpoint)
            for w in slots[to + 1..].iter().flatten() {
                let _ = write!(s, "{}", w.color.paint(VERTICAL));
            }

            lines.push(s);

            // Update slots: place the wire/clone at target_col
            if must_clone {
                // Original stays at cur_col, clone goes to target_col
                slots[target_col] = Some(input_wire.clone());
            } else {
                // Move: remove from cur_col, place at target_col
                slots[cur_col] = None;
                slots[target_col] = Some(input_wire.clone());
            }
        }

        // Set final state
        self.visible = target;

        lines.retain(|l: &String| !l.trim().is_empty());
        Ok(lines)
    }

    pub fn draw_node_body(
        &mut self,
        model: &dyn Model,
        node: usize,
        opts: &DisplayParams,
    ) -> TractResult<Vec<String>> {
        let mut lines = vec![String::new()];
        macro_rules! p { ($($args: expr),*) => { write!(lines.last_mut().unwrap(), $($args),*)?;} }
        macro_rules! ln {
            () => {
                lines.push(String::new())
            };
        }

        let inputs = model.node_inputs(node).to_vec();
        let passthrough_count = self.passthrough_count(node);
        let display = opts.konst || !model.node_const(node);

        if display {
            // Draw passthrough verticals
            for w in &self.visible[..passthrough_count] {
                p!("{}", w.color.paint(VERTICAL));
            }

            let node_output_count = model.node_output_count(node);

            // Determine node color
            self.latest_node_color = if !inputs.is_empty() && passthrough_count < self.visible.len()
            {
                let wire0 = &self.visible[passthrough_count];
                if !wire0.should_change_color { wire0.color } else { self.next_color() }
            } else {
                self.next_color()
            };

            // Draw junction
            match (inputs.len(), node_output_count) {
                (0, 1) => {
                    // Source node: use circled number if it's a model input
                    let input_idx = model.input_outlets().iter().position(|o| o.node == node);
                    let symbol = match input_idx {
                        Some(i) => circled_input(i).to_string(),
                        _ => DOWN_RIGHT.to_string(),
                    };
                    p!("{}", self.latest_node_color.paint(symbol));
                }
                (1, 0) => {
                    p!("{}", self.latest_node_color.paint("╹"));
                }
                (u, d) => {
                    p!("{}", self.latest_node_color.paint(VERTICAL_RIGHT));
                    for _ in 1..u.min(d) {
                        p!("{}", self.latest_node_color.paint(VERTICAL_HORIZONTAL));
                    }
                    for _ in u..d {
                        p!("{}", self.latest_node_color.paint(DOWN_HORIZONTAL));
                    }
                    for _ in d..u {
                        p!("{}", self.latest_node_color.paint(UP_HORIZONTAL));
                    }
                }
            }
            ln!();
        }

        while lines.last().map(|s| s.trim()) == Some("") {
            lines.pop();
        }
        Ok(lines)
    }

    pub fn draw_node_vfiller(&self, _model: &dyn Model, _node: usize) -> TractResult<String> {
        Ok(self.render_filler())
    }

    pub fn draw_node_vsuffix(
        &mut self,
        model: &dyn Model,
        node: usize,
        opts: &DisplayParams,
    ) -> TractResult<Vec<String>> {
        // Mark node as visited now that its inputs have been consumed.
        self.visited.insert(node);
        let mut lines = vec![];
        let passthrough_count = self.passthrough_count(node);
        let node_output_count = model.node_output_count(node);

        // Remove input wires (keep passthroughs)
        self.visible.truncate(passthrough_count);

        // Add output wires
        for slot in 0..node_output_count {
            let outlet = OutletId::new(node, slot);
            let successors = model.outlet_successors(outlet).to_vec();
            let color = if !opts.konst && model.node_const(node) {
                // Const node: wire goes to hidden, not visible
                self.hidden.push(HiddenWire { successors });
                continue;
            } else if slot == 0 {
                self.latest_node_color
            } else {
                self.next_color()
            };
            self.visible.push(VisibleWire {
                outlet,
                color,
                successors,
                should_change_color: false,
            });
        }

        // Mark model outputs with a circled number on a filler line.
        let model_outputs = model.output_outlets();
        let has_output_marker = self.visible.iter().any(|w| model_outputs.contains(&w.outlet));
        if has_output_marker {
            let mut s = String::new();
            for w in &self.visible {
                if model_outputs.contains(&w.outlet) {
                    let output_idx = model_outputs.iter().position(|o| *o == w.outlet);
                    let symbol = match output_idx {
                        Some(i) => circled_output(i),
                        _ => '●',
                    };
                    let _ = write!(s, "{}", w.color.paint(symbol.to_string()));
                } else {
                    let _ = write!(s, "{}", w.color.paint(VERTICAL));
                }
            }
            lines.push(s);
        }

        // Remove wires with no successors
        self.visible.retain(|w| !w.successors.is_empty());

        lines.retain(|l: &String| !l.trim().is_empty());
        Ok(lines)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::display_params::DisplayParams;
    use crate::model::Model;
    use tract_core::ops::identity::Identity;
    use tract_core::ops::math;

    fn strip_ansi(s: &str) -> String {
        let mut out = String::new();
        let mut in_escape = false;
        for c in s.chars() {
            if in_escape {
                if c == 'm' {
                    in_escape = false;
                }
            } else if c == '\x1b' {
                in_escape = true;
            } else {
                out.push(c);
            }
        }
        out
    }

    fn draw_all(model: &dyn Model, ds: &mut DrawingState, node: usize) -> Vec<String> {
        let opts = DisplayParams { konst: true, ..DisplayParams::default() };
        let mut lines = vec![];
        for l in ds.draw_node_vprefix(model, node, &opts).unwrap() {
            lines.push(strip_ansi(&l));
        }
        for l in ds.draw_node_body(model, node, &opts).unwrap() {
            lines.push(strip_ansi(&l));
        }
        for l in ds.draw_node_vsuffix(model, node, &opts).unwrap() {
            lines.push(strip_ansi(&l));
        }
        lines.retain(|l| !l.trim().is_empty());
        lines
    }

    /// Source → Identity (linear chain, no branching)
    #[test]
    fn linear_chain() -> TractResult<()> {
        let mut model = TypedModel::default();
        let s = model.add_source("s", f32::fact([1]))?;
        let _id = model.wire_node("id", Identity, &[s])?[0];
        model.auto_outputs()?;
        let mut ds = DrawingState::default();
        let lines0 = draw_all(&model, &mut ds, 0);
        assert_eq!(lines0, vec!["⓪"]); // circled 0 (first model input)
        let lines1 = draw_all(&model, &mut ds, 1);
        assert_eq!(lines1[0], VERTICAL_RIGHT); // ┣ (1 in, 1 out)
        assert!(lines1.len() == 2 && lines1[1] == "⓿"); // output marker
        Ok(())
    }

    /// Source → Add(source, source) — fan-in from one source to two inputs
    #[test]
    fn fanin_from_one_source() -> TractResult<()> {
        let mut model = TypedModel::default();
        let s = model.add_source("s", f32::fact([1]))?;
        let _add = model.wire_node("add", math::add(), &[s, s])?[0];
        model.auto_outputs()?;
        let mut ds = DrawingState::default();
        let lines0 = draw_all(&model, &mut ds, 0);
        assert_eq!(lines0, vec!["⓪"]); // circled 0 (first model input)
        let lines1 = draw_all(&model, &mut ds, 1);
        let joined = lines1.join("|");
        assert!(
            joined.contains(UP_HORIZONTAL), // ┻ (merge)
            "Expected merge pattern, got: {lines1:?}"
        );
        Ok(())
    }

    /// Two sources → Add → two consumers (fork)
    #[test]
    fn fork_after_merge() -> TractResult<()> {
        let mut model = TypedModel::default();
        let a = model.add_source("a", f32::fact([1]))?;
        let b = model.add_source("b", f32::fact([1]))?;
        let add = model.wire_node("add", math::add(), &[a, b])?[0];
        let _id1 = model.wire_node("id1", Identity, &[add])?[0];
        let _id2 = model.wire_node("id2", Identity, &[add])?[0];
        model.auto_outputs()?;
        let mut ds = DrawingState::default();
        draw_all(&model, &mut ds, 0); // source a
        draw_all(&model, &mut ds, 1); // source b
        let lines_add = draw_all(&model, &mut ds, 2); // add (2 inputs, 1 output)
        let joined = lines_add.join("|");
        assert!(
            joined.contains(UP_HORIZONTAL), // ┻ (2 inputs merge)
            "Expected merge in body, got: {lines_add:?}"
        );
        let lines_id1 = draw_all(&model, &mut ds, 3); // id1
        assert!(!lines_id1.is_empty(), "id1 should render");
        Ok(())
    }

    /// No blank lines in prefix output (regression for leading-empty-line bug)
    #[test]
    fn no_blank_prefix_lines() -> TractResult<()> {
        let mut model = TypedModel::default();
        let a = model.add_source("a", f32::fact([1]))?;
        let b = model.add_source("b", f32::fact([1]))?;
        let add = model.wire_node("add", math::add(), &[a, b])?[0];
        let _id = model.wire_node("id", Identity, &[add])?[0];
        model.auto_outputs()?;
        let opts = DisplayParams { konst: true, ..DisplayParams::default() };
        let mut ds = DrawingState::default();
        let order = model.eval_order()?;
        for &node in &order {
            let prefix = ds.draw_node_vprefix(&model, node, &opts).unwrap();
            for (i, l) in prefix.iter().enumerate() {
                let stripped = strip_ansi(l);
                assert!(
                    !stripped.trim().is_empty() || i == prefix.len() - 1,
                    "Blank line at position {i} in prefix for node {node}: {prefix:?}"
                );
            }
            ds.draw_node_body(&model, node, &opts).unwrap();
            ds.draw_node_vsuffix(&model, node, &opts).unwrap();
        }
        Ok(())
    }

    /// Filler width matches the number of visible wires (post-suffix state)
    #[test]
    fn filler_width_matches_visible() -> TractResult<()> {
        let mut model = TypedModel::default();
        let a = model.add_source("a", f32::fact([1]))?;
        let b = model.add_source("b", f32::fact([1]))?;
        let add = model.wire_node("add", math::add(), &[a, b])?[0];
        let _id1 = model.wire_node("id1", Identity, &[add])?[0];
        let _id2 = model.wire_node("id2", Identity, &[add])?[0];
        model.auto_outputs()?;
        let opts = DisplayParams { konst: true, ..DisplayParams::default() };
        let mut ds = DrawingState::default();
        let order = model.eval_order()?;
        for &node in &order {
            ds.draw_node_vprefix(&model, node, &opts).unwrap();
            ds.draw_node_body(&model, node, &opts).unwrap();
            ds.draw_node_vsuffix(&model, node, &opts).unwrap();
            let filler = ds.draw_node_vfiller(&model, node).unwrap();
            let filler_w = strip_ansi(&filler).chars().count();
            let visible_count = ds.visible.len();
            assert_eq!(
                filler_w, visible_count,
                "Filler width {filler_w} != visible wire count {visible_count} for node {node}"
            );
        }
        Ok(())
    }
}


================================================
FILE: libcli/src/export.rs
================================================
use crate::annotations::{Annotations, NodeQId};
use crate::model::Model;
use serde::Serialize;
use std::collections::HashMap;
use tract_core::internal::*;

#[derive(Clone, Debug, Default, Serialize)]
pub struct GraphPerfInfo {
    nodes: Vec<Node>,
    profiling_info: Option<ProfilingInfo>,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize)]
pub struct NodeQIdSer(pub Vec<(usize, String)>, pub usize);

#[derive(Clone, Debug, Serialize)]
pub struct Node {
    qualified_id: NodeQIdSer,
    op_name: String,
    node_name: String,

    #[serde(skip_serializing_if = "HashMap::is_empty")]
    cost: HashMap<String, String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    secs_per_iter: Option<f64>,
}

#[derive(Clone, Debug, Serialize)]
pub struct ProfilingInfo {
    iterations: usize,
    secs_per_iter: f64,
}

impl GraphPerfInfo {
    pub fn from(model: &dyn Model, annotations: &Annotations) -> GraphPerfInfo {
        let nodes = annotations
            .tags
            .iter()
            .map(|(id, node)| Node {
                qualified_id: NodeQIdSer(id.0.iter().cloned().collect(), id.1),
                cost: node.cost.iter().map(|(k, v)| (format!("{k:?}"), format!("{v}"))).collect(),
                node_name: id.model(model).unwrap().node_name(id.1).to_string(),
                op_name: id.model(model).unwrap().node_op_name(id.1).to_string(),
                secs_per_iter: node.profile.map(|s| s.as_secs_f64()),
            })
            .collect();
        let profiling_info = annotations.profile_summary.as_ref().map(|summary| ProfilingInfo {
            secs_per_iter: summary.entire.as_secs_f64(),
            iterations: summary.iters,
        });
        GraphPerfInfo { nodes, profiling_info }
    }
}

// -- audit-json --

#[derive(Serialize)]
pub struct AuditModel {
    properties: HashMap<String, String>,
    assertions: Vec<String>,
    inputs: Vec<AuditModelIo>,
    outputs: Vec<AuditModelIo>,
    nodes: Vec<AuditNode>,
}

#[derive(Serialize)]
struct AuditModelIo {
    name: String,
    node: usize,
    slot: usize,
    fact: String,
}

#[derive(Serialize)]
struct AuditNode {
    id: usize,
    name: String,
    op: String,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    info: Vec<String>,
    inputs: Vec<AuditOutletRef>,
    outputs: Vec<AuditNodeOutput>,
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    cost: HashMap<String, String>,
}

#[derive(Serialize)]
struct AuditOutletRef {
    node: usize,
    slot: usize,
}

#[derive(Serialize)]
struct AuditNodeOutput {
    fact: String,
    successors: Vec<AuditInletRef>,
}

#[derive(Serialize)]
struct AuditInletRef {
    node: usize,
    slot: usize,
}

pub fn audit_json(
    model: &dyn Model,
    annotations: &Annotations,
    writer: impl std::io::Write,
) -> TractResult<()> {
    let properties: HashMap<String, String> =
        model.properties().iter().map(|(k, v)| (k.clone(), format!("{v:?}"))).collect();

    let scope = model.symbols();
    let assertions: Vec<String> = scope.all_assertions().iter().map(|a| format!("{a}")).collect();

    let inputs: Vec<AuditModelIo> = model
        .input_outlets()
        .iter()
        .map(|o| {
            Ok(AuditModelIo {
                name: model.node_name(o.node).to_string(),
                node: o.node,
                slot: o.slot,
                fact: model.outlet_fact_format(*o),
            })
        })
        .collect::<TractResult<_>>()?;

    let outputs: Vec<AuditModelIo> = model
        .output_outlets()
        .iter()
        .map(|o| {
            Ok(AuditModelIo {
                name: model.node_name(o.node).to_string(),
                node: o.node,
                slot: o.slot,
                fact: model.outlet_fact_format(*o),
            })
        })
        .collect::<TractResult<_>>()?;

    let nodes: Vec<AuditNode> = (0..model.nodes_len())
        .map(|id| {
            let op = model.node_op(id);
            let info = op.info().unwrap_or_default();
            let node_inputs: Vec<AuditOutletRef> = model
                .node_inputs(id)
                .iter()
                .map(|o| AuditOutletRef { node: o.node, slot: o.slot })
                .collect();
            let node_outputs: Vec<AuditNodeOutput> = (0..model.node_output_count(id))
                .map(|slot| {
                    let outlet = OutletId::new(id, slot);
                    let fact = model.outlet_fact_format(outlet);
                    let successors: Vec<AuditInletRef> = model
                        .outlet_successors(outlet)
                        .iter()
                        .map(|inlet| AuditInletRef { node: inlet.node, slot: inlet.slot })
                        .collect();
                    AuditNodeOutput { fact, successors }
                })
                .collect();
            let cost: HashMap<String, String> = annotations
                .tags
                .get(&NodeQId(tvec!(), id))
                .map(|tags| {
                    tags.cost.iter().map(|(k, v)| (format!("{k:?}"), format!("{v}"))).collect()
                })
                .unwrap_or_default();
            AuditNode {
                id,
                name: model.node_name(id).to_string(),
                op: model.node_op_name(id).to_string(),
                info,
                inputs: node_inputs,
                outputs: node_outputs,
                cost,
            }
        })
        .collect();

    let audit = AuditModel { properties, assertions, inputs, outputs, nodes };
    serde_json::to_writer_pretty(writer, &audit)?;
    Ok(())
}


================================================
FILE: libcli/src/lib.rs
================================================
#![allow(clippy::collapsible_if)]
#[macro_use]
extern crate log;

pub mod annotations;
pub mod display_params;
pub mod draw;
pub mod export;
pub mod model;
pub mod profile;
pub mod tensor;
pub mod terminal;
pub mod time;

use tract_core::internal::*;
#[allow(unused_imports)]
#[cfg(any(target_os = "linux", target_os = "windows"))]
use tract_cuda::utils::ensure_cuda_runtime_dependencies;

pub fn capture_gpu_trace<F>(matches: &clap::ArgMatches, func: F) -> TractResult<()>
where
    F: FnOnce() -> TractResult<()>,
{
    if matches.contains_id("metal-gpu-trace")
        && matches.get_one::<String>("metal-gpu-trace").is_some()
    {
        #[cfg(any(target_os = "macos", target_os = "ios"))]
        {
            let gpu_trace_path =
                std::path::Path::new(matches.get_one::<String>("metal-gpu-trace").unwrap())
                    .to_path_buf();
            ensure!(gpu_trace_path.is_absolute(), "Metal GPU trace file has to be absolute");
            ensure!(
                !gpu_trace_path.exists(),
                format!("Given Metal GPU trace file {:?} already exists.", gpu_trace_path)
            );

            log::info!("Capturing Metal GPU trace at : {gpu_trace_path:?}");
            tract_metal::with_metal_stream(move |stream| {
                stream.capture_trace(gpu_trace_path, move |_stream| func())
            })
        }
        #[cfg(not(any(target_os = "macos", target_os = "ios")))]
        {
            bail!("`--metal-gpu-trace` present but it is only available on MacOS and iOS")
        }
    } else if matches.get_flag("cuda-gpu-trace") {
        #[cfg(any(target_os = "linux", target_os = "windows"))]
        {
            ensure_cuda_runtime_dependencies(
                "`--cuda-gpu-trace` present but no CUDA installation has been found",
            )?;
            let _prof = cudarc::driver::safe::Profiler::new()?;
            func()
        }
        #[cfg(not(any(target_os = "linux", target_os = "windows")))]
        {
            bail!("`--cuda-gpu-trace` present but it is only available on Linux and Windows")
        }
    } else {
        func()
    }
}


================================================
FILE: libcli/src/model.rs
================================================
use tract_core::internal::*;
use tract_core::{downcast_rs, dyn_clone};

/// Common methods for all variants of model.
pub trait Model:
    downcast_rs::Downcast + std::fmt::Debug + dyn_clone::DynClone + Send + Sync
{
    /// Lookup node id by name
    fn node_id_by_name(&self, name: &str) -> TractResult<usize>;

    /// Node name by id
    fn node_name(&self, id: usize) -> &str;

    /// Node op by id
    fn node_op(&self, id: usize) -> &dyn Op;

    /// Node is const
    fn node_const(&self, id: usize) -> bool;

    /// Node op by id
    fn node_op_name(&self, id: usize) -> StaticName;

    /// Node inputs by id
    fn node_inputs(&self, id: usize) -> &[OutletId];

    /// Number of outputs for a node, by id.
    fn node_output_count(&self, id: usize) -> usize;

    /// Number nodes
    fn nodes_len(&self) -> usize;

    /// Formatted node label
    fn node_display(&self, id: usize) -> String;

    /// Formatted node label
    fn node_debug(&self, id: usize) -> String;

    /// Eval order for the model
    fn eval_order(&self) -> TractResult<Vec<usize>>;

    /// Eval order for the model
    fn eval_order_opt_ram(&self) -> TractResult<Vec<usize>>;

    /// Inputs of the model
    fn input_outlets(&self) -> &[OutletId];

    fn set_input_names(&mut self, names: &[&str]) -> TractResult<()>;
    fn select_outputs_by_name(&mut self, names: &[&str]) -> TractResult<()>;

    /// Outputs of the model
    fn output_outlets(&self) -> &[OutletId];

    /// Tensorfact for an outlet
    fn outlet_typedfact(&self, outlet: OutletId) -> TractResult<TypedFact>;

    /// Short outlet formatter (id plus fact)
    fn outlet_fact_format(&self, outlet: OutletId) -> String;

    /// Labels for an outlet
    fn outlet_label(&self, id: OutletId) -> Option<&str>;

    /// List consumers of an outlet
    fn outlet_successors(&self, outlet: OutletId) -> &[InletId];

    /// Subnets of a node
    fn nested_models(&self, id: usize) -> Vec<(String, &dyn Model)> {
        if let Some(submodel) =
            self.node_op(id).downcast_ref::<tract_core::ops::submodel::SubmodelOp>()
        {
            return vec![("submodel".into(), submodel.model())];
        }
        if let Some(lir) = self.node_op(id).downcast_ref::<tract_core::ops::scan::OptScan>() {
            return vec![("loop".into(), lir.plan.model())];
        }
        if let Some(mir) = self.node_op(id).downcast_ref::<tract_core::ops::scan::Scan>() {
            return vec![("loop".into(), &mir.body)];
        }
        if let Some(mir) = self.node_op(id).downcast_ref::<tract_core::ops::logic::IfThenElse>() {
            return vec![("then".into(), &mir.then_body), ("else".into(), &mir.else_body)];
        }
        #[cfg(feature = "hir")]
        if let Some(hir) = self.node_op(id).downcast_ref::<tract_hir::ops::scan::InferenceScan>() {
            return vec![("loop".into(), &hir.body)];
        }
        #[cfg(feature = "onnx")]
        if let Some(hir) = self.node_op(id).downcast_ref::<tract_onnx::ops::logic::If>() {
            return vec![("then".into(), &hir.then_body), ("else".into(), &hir.else_body)];
        }
        vec![]
    }

    /// Subnets of a node
    fn nested_models_iters(&self, id: usize, input: &[&TypedFact]) -> Option<TDim> {
        if let Some(submodel) =
            self.node_op(id).downcast_ref::<tract_core::ops::submodel::SubmodelOp>()
        {
            submodel.iteration_count(input)
        } else if let Some(lir) = self.node_op(id).downcast_ref::<tract_core::ops::scan::OptScan>()
        {
            lir.iteration_count(input)
        } else if let Some(mir) = self.node_op(id).downcast_ref::<tract_core::ops::scan::Scan>() {
            mir.iteration_count(input)
        } else {
            None
        }
    }

    fn auto_outputs(&mut self) -> TractResult<()>;

    fn properties(&self) -> &HashMap<String, Arc<Tensor>>;

    fn symbols(&self) -> &SymbolScope;

    fn get_or_intern_symbol(&self, name: &str) -> Symbol;

    fn rename_node(&mut self, id: usize, name: &str) -> TractResult<()>;
}

downcast_rs::impl_downcast!(Model);
dyn_clone::clone_trait_object!(Model);

impl<F, O> Model for Graph<F, O>
where
    F: Fact + Hash + Clone + 'static,
    O: std::fmt::Debug
        + std::fmt::Display
        + AsRef<dyn Op>
        + AsMut<dyn Op>
        + Clone
        + 'static
        + Send
        + Sync,
    Graph<F, O>: Send + Sync + 'static,
{
    fn node_id_by_name(&self, name: &str) -> TractResult<usize> {
        self.nodes
            .iter()
            .find(|n| n.name == name)
            .map(|n| n.id)
            .with_context(|| format!("No node found for name: \"{name}\""))
    }

    fn node_name(&self, id: usize) -> &str {
        &self.nodes[id].name
    }

    fn node_op_name(&self, id: usize) -> StaticName {
        self.node(id).op().name()
    }

    fn node_const(&self, id: usize) -> bool {
        self.node_op_name(id) == "Const"
    }

    fn node_inputs(&self, id: usize) -> &[OutletId] {
        &self.nodes[id].inputs
    }

    fn node_output_count(&self, id: usize) -> usize {
        self.nodes[id].outputs.len()
    }

    fn nodes_len(&self) -> usize {
        self.nodes.len()
    }

    fn node_display(&self, id: usize) -> String {
        format!("{}", self.nodes[id])
    }

    fn node_debug(&self, id: usize) -> String {
        format!("{:?}", self.nodes[id])
    }

    fn eval_order(&self) -> TractResult<Vec<usize>> {
        tract_core::model::order::eval_order(self)
    }

    fn eval_order_opt_ram(&self) -> TractResult<Vec<usize>> {
        tract_core::model::order::eval_order_opt_ram(self)
    }

    fn input_outlets(&self) -> &[OutletId] {
        &self.inputs
    }

    fn set_input_names(&mut self, names: &[&str]) -> TractResult<()> {
        self.set_input_names(names.iter())
    }

    fn select_outputs_by_name(&mut self, names: &[&str]) -> TractResult<()> {
        self.select_outputs_by_name(names)
    }

    fn output_outlets(&self) -> &[OutletId] {
        &self.outputs
    }

    fn node_op(&self, id: usize) -> &dyn Op {
        self.nodes[id].op.as_ref()
    }

    fn outlet_typedfact(&self, outlet: OutletId) -> TractResult<TypedFact> {
        Ok(self.outlet_fact(outlet)?.to_typed_fact()?.into_owned())
    }

    fn outlet_fact_format(&self, outlet: OutletId) -> String {
        format!("{:?}", self.outlet_fact(outlet).unwrap())
    }

    fn outlet_label(&self, id: OutletId) -> Option<&str> {
        self.outlet_label(id)
    }

    fn outlet_successors(&self, outlet: OutletId) -> &[InletId] {
        &self.nodes[outlet.node].outputs[outlet.slot].successors
    }

    fn auto_outputs(&mut self) -> TractResult<()> {
        self.auto_outputs()
    }

    fn properties(&self) -> &HashMap<String, Arc<Tensor>> {
        &self.properties
    }

    fn symbols(&self) -> &SymbolScope {
        &self.symbols
    }
    fn rename_node(&mut self, id: usize, name: &str) -> TractResult<()> {
        self.rename_node(id, name)
    }

    fn get_or_intern_symbol(&self, name: &str) -> Symbol {
        self.symbols.sym(name)
    }
}


================================================
FILE: libcli/src/profile.rs
================================================
use crate::model::Model;
use crate::tensor::RunTensors;
use crate::tensor::make_inputs_for_model;
use crate::{annotations::*, capture_gpu_trace};
use std::any::TypeId;
use std::time::{Duration, Instant};
use tract_core::internal::*;
use tract_core::num_traits::Zero;
use tract_core::ops::submodel::TypedModelOpState;

pub fn reusable_state(runnable: &Arc<dyn Runnable>) -> bool {
    runnable.typed_model().is_some_and(|model| model.properties().contains_key("pulse.delay"))
}

pub fn run_one_step(
    runnable: &Arc<dyn Runnable>,
    state: &mut Box<dyn State>,
    inputs: &RunTensors,
) -> TractResult<Duration> {
    if !reusable_state(runnable) {
        *state = runnable.spawn()?;
    }
    let start = Instant::now();
    for source in &inputs.sources {
        state.run(source.clone())?;
    }
    Ok(start.elapsed())
}

pub struct BenchLimits {
    pub warmup_loops: usize,
    pub warmup_time: std::time::Duration,
    pub max_loops: usize,
    pub max_time: std::time::Duration,
}

impl Default for BenchLimits {
    fn default() -> Self {
        BenchLimits {
            warmup_loops: 0,
            warmup_time: Duration::default(),
            max_loops: 100_000,
            max_time: std::time::Duration::from_secs(5),
        }
    }
}

impl BenchLimits {
    pub fn warmup(&self, runnable: &Arc<dyn Runnable>, inputs: &RunTensors) -> TractResult<()> {
        if self.warmup_time.is_zero() && self.warmup_loops.is_zero() {
            return Ok(());
        }
        let reuse = reusable_state(runnable);
        let mut state = runnable.spawn()?;

        let mut iters = 0;
        let max_loops = if self.warmup_loops.is_zero() { usize::MAX } else { self.warmup_loops };
        let max_time = if self.warmup_time.is_zero() { Duration::MAX } else { self.warmup_time };

        let start_warmup = Instant::now();
        info!("Warming up before profiling...");
        while iters < max_loops && start_warmup.elapsed() < max_time {
            if !reuse {
                state = runnable.spawn()?;
            }
            state.run(inputs.sources[0].clone())?;
            iters += 1;
        }
        info!("Done warming up.");

        Ok(())
    }

    pub fn bench(
        &self,
        runnable: &Arc<dyn Runnable>,
        inputs: &RunTensors,
    ) -> TractResult<(usize, Duration)> {
        if self.max_time.is_zero() && self.max_loops.is_zero() {
            return Ok(Default::default());
        }
        let reuse = reusable_state(runnable);
        let mut state = runnable.spawn()?;

        let mut iters = 0;
        let max_loops = if self.max_loops.is_zero() { usize::MAX } else { self.max_loops };
        let max_time = if self.max_time.is_zero() { Duration::MAX } else { self.max_time };

        let mut dur = Duration::default();
        let start = Instant::now();
        while iters < max_loops && start.elapsed() < max_time {
            if !reuse {
                state = runnable.spawn()?;
            }
            let start_inner = Instant::now();
            state.run(inputs.sources[0].clone())?;
            dur += start_inner.elapsed();
            iters += 1;
        }

        Ok((iters, dur))
    }
}

pub fn profile(
    runnable: &Arc<dyn Runnable>,
    bench_limits: &BenchLimits,
    dg: &mut Annotations,
    inputs: &RunTensors,
    custom_profiler: Option<HashMap<TypeId, Profiler>>,
    folded: bool,
) -> TractResult<()> {
    let Some(plan) = runnable.typed_plan() else {
        bail!("Can only profile TypedRunnable");
    };
    info!("Running entire network");
    let mut iters = 0usize;
    let prefix = tvec!();

    bench_limits.warmup(runnable, inputs)?;

    let reuse = reusable_state(runnable);
    let mut state = plan.spawn()?;

    let mut dur = Duration::default();
    let mut time_accounted_by_inner_nodes = Duration::default();
    while iters < bench_limits.max_loops && dur < bench_limits.max_time {
        if !reuse {
            state = plan.spawn()?;
        }
        let start = Instant::now();

        for source in &inputs.sources {
            rec_profiler(
                &mut state,
                dg,
                source,
                custom_profiler.as_ref(),
                &prefix,
                None,
                &mut time_accounted_by_inner_nodes,
                folded,
            )?;
        }
        dur += start.elapsed();
        iters += 1;
    }

    dur -= time_accounted_by_inner_nodes;

    info!("Running {} iterations max. for each node.", bench_limits.max_loops);
    info!("Running for {} ms max. for each node.", bench_limits.max_time.as_millis());

    let denum = (iters as f32).recip();
    let entire = dur.mul_f32(denum);
    for d in dg.tags.values_mut() {
        if let Some(d) = d.profile.as_mut() {
            *d = d.mul_f32(denum);
        }

        if let Some(d) = d.accelerator_profile.as_mut() {
            *d = d.mul_f32(denum);
        }
    }
    let max = dg.tags.values().filter_map(|t| t.profile).max().unwrap();
    let sum = dg.tags.values().filter_map(|t| t.profile).sum::<Duration>();
    let accel_sum = dg.tags.values().filter_map(|t| t.accelerator_profile).sum::<Duration>();
    dg.profile_summary = Some(ProfileSummary { max, sum, accel_sum, entire, iters });
    Ok(())
}

pub fn profile_gpu(
    runnable: &Arc<dyn Runnable>,
    bench_limits: &BenchLimits,
    sub_matches: &clap::ArgMatches,
    dg: &mut Annotations,
    inputs: &RunTensors,
    before_node: &dyn Fn(usize),
    after_iteration: &dyn Fn(&mut Annotations, &[(usize, String)]) -> TractResult<()>,
) -> TractResult<()> {
    let Some(plan) = runnable.typed_plan() else {
        bail!("Can only profile TypedRunnable");
    };
    info!("Running entire network");
    let mut iters = 0usize;
    let prefix = tvec!();

    bench_limits.warmup(runnable, inputs)?;

    let reuse = reusable_state(runnable);
    let mut state = plan.spawn()?;

    let mut dur = Duration::default();

    capture_gpu_trace(sub_matches, || -> TractResult<()> {
        while iters < bench_limits.max_loops && dur < bench_limits.max_time {
            if !reuse {
                state = plan.spawn()?;
            }
            let start = Instant::now();
            for source in &inputs.sources {
                rec_profiler_gpu(&mut state, dg, source, &prefix, before_node)?;
            }
            after_iteration(dg, &prefix)?;
            dur += start.elapsed();
            iters += 1;
        }
        Ok(())
    })?;

    info!("Running {} iterations max. for each node.", bench_limits.max_loops);
    info!("Running for {} ms max. for each node.", bench_limits.max_time.as_millis());

    let denum = (iters as f32).recip();
    let entire = dur.mul_f32(denum);
    for d in dg.tags.values_mut() {
        if let Some(d) = d.profile.as_mut() {
            *d = d.mul_f32(denum);
        }

        if let Some(d) = d.accelerator_profile.as_mut() {
            *d = d.mul_f32(denum);
        }
    }
    let max = dg.tags.values().filter_map(|t| t.profile).max().unwrap();
    let sum = dg.tags.values().filter_map(|t| t.profile).sum::<Duration>();
    let accel_sum = dg.tags.values().filter_map(|t| t.accelerator_profile).sum::<Duration>();
    dg.profile_summary = Some(ProfileSummary { max, sum, accel_sum, entire, iters });
    Ok(())
}

pub fn rec_profiler_gpu(
    state: &mut TypedSimpleState,
    dg: &mut Annotations,
    inputs: &TVec<TValue>,
    prefix: &[(usize, String)],
    before_node: &dyn Fn(usize),
) -> TractResult<TVec<TValue>> {
    let r = state.run_plan_with_eval(
        inputs.clone(),
        |session_state, mut node_state, node, input| {
            before_node(node.id);
            // Profile node
            let start = crate::time::now();
            let res = tract_core::plan::eval(
                session_state,
                node_state.as_deref_mut(),
                node,
                input.clone(),
            );
            let elapsed = start.elapsed();
            let node_id = NodeQId(prefix.into(), node.id);
            *dg.node_mut(node_id).profile.get_or_insert(Duration::default()) += elapsed;

            res
        },
    )?;

    Ok(r)
}

#[allow(clippy::too_many_arguments)]
pub fn rec_profiler(
    state: &mut TypedSimpleState,
    dg: &mut Annotations,
    inputs: &TVec<TValue>,
    profilers: Option<&HashMap<TypeId, Profiler>>,
    prefix: &[(usize, String)],
    multiplier: Option<usize>,
    time_accounted_by_inner_nodes: &mut Duration,
    folded: bool,
) -> TractResult<TVec<TValue>> {
    let r = state.run_plan_with_eval(
        inputs.clone(),
        |session_state, mut node_state, node, input| {
            // Profile node
            let start = crate::time::now();
            let res = tract_core::plan::eval(
                session_state,
                node_state.as_deref_mut(),
                node,
                input.clone(),
            );
            let elapsed = start.elapsed().mul_f32(multiplier.unwrap_or(1) as _);
            let node_id = NodeQId(prefix.into(), node.id);
            *dg.node_mut(node_id).profile.get_or_insert(Duration::default()) += elapsed;

            if !folded {
                let start = crate::time::now();
                profile_submodel(
                    node,
                    node_state,
                    input,
                    dg,
                    profilers,
                    prefix,
                    time_accounted_by_inner_nodes,
                )?;
                *time_accounted_by_inner_nodes += start.elapsed();
            }

            // Update parent nodes if any (childs timings are deducted from parents)
            let prefix_vec = prefix.to_vec();
            if !prefix_vec.is_empty() {
                (1..prefix_vec.len() + 1).map(|idx| prefix_vec[..idx].to_vec()).for_each(
                    |parent_path| {
                        let parent_node = parent_path.last().map(|it| it.0).unwrap();
                        let parent = dg
                            .node_mut(NodeQId(
                                parent_path[..parent_path.len() - 1].into(),
                                parent_node,
                            ))
                            .profile
                            .get_or_insert(Duration::default());
                        *parent -= elapsed.min(*parent);
                    },
                );
            }
            res
        },
    )?;
    Ok(r)
}

fn profile_submodel(
    node: &TypedNode,
    mut node_state: Option<&mut dyn OpState>,
    input: TVec<TValue>,
    dg: &mut Annotations,
    profilers: Option<&HashMap<TypeId, Profiler>>,
    prefix: &[(usize, String)],
    time_accounted_by_inner_nodes: &mut Duration,
) -> TractResult<()> {
    if let Some(ref mut op_state) = node_state {
        if let Some(profiler) = profilers.and_then(|it| it.get(&op_state.type_id())) {
            let mut new_prefix: TVec<_> = prefix.into();
            new_prefix.push((node.id, "submodel".to_string()));

            let (_, _) =
                (profiler.func)(*op_state, input, dg, &new_prefix, time_accounted_by_inner_nodes)?;
        } else if let Some(scan_state) = op_state.downcast_mut::<tract_core::ops::scan::State>() {
            let mut new_prefix: TVec<_> = prefix.into();
            new_prefix.push((node.id, "loop".to_string()));

            let scan_inputs = make_inputs_for_model(scan_state.model_state.model())?;
            let multi = scan_state.iteration_count(&input);

            rec_profiler(
                &mut scan_state.model_state,
                dg,
                &scan_inputs,
                None,
                &new_prefix,
                Some(multi),
                time_accounted_by_inner_nodes,
                false,
            )?;
        } else if let Some(typed_model_state) = op_state.downcast_mut::<TypedModelOpState>() {
            let mut new_prefix: TVec<_> = prefix.into();
            new_prefix.push((node.id, "submodel".to_string()));

            rec_profiler(
                typed_model_state,
                dg,
                &input,
                None,
                &new_prefix,
                None,
                time_accounted_by_inner_nodes,
                false,
            )?;
        }
    }

    Ok(())
}

type ProfilerFn = fn(
    &mut dyn OpState,
    TVec<TValue>,
    &mut Annotations,
    &[(usize, String)],
    &mut Duration,
) -> TractResult<(TractResult<TVec<TValue>>, Duration)>;

#[derive(Clone)]
pub struct Profiler {
    pub func: ProfilerFn,
    pub name: &'static str,
}

impl Hash for Profiler {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.name.hash(state)
    }
}

pub fn extract_costs(
    annotations: &mut Annotations,
    model: &dyn Model,
    extra_symbols: &SymbolValues,
) -> TractResult<()> {
    fn extract_costs_rec(
        annotations: &mut Annotations,
        model: &dyn Model,
        prefix: &[(usize, String)],
        multiplier: TDim,
        extra_symbols: &SymbolValues,
    ) -> TractResult<()> {
        if let Some(model) = model.downcast_ref::<TypedModel>() {
            for node_id in 0..model.nodes().len() {
                let inputs = model.node_input_facts(node_id)?;
                let cost = model
                    .node(node_id)
                    .op
                    .cost(&inputs)
                    .with_context(|| format!("costing node {}", model.node(node_id)))?;
                annotations.node_mut(NodeQId(prefix.into(), node_id)).cost = cost
                    .into_iter()
                    .map(|(k, v)| {
                        let cost = if k.is_compute() { v * &multiplier } else { v };
                        (k, cost.eval(extra_symbols))
                    })
                    .collect();

                let nested_subs = model.nested_models(node_id);
                let nested_multis = (model as &dyn Model).nested_models_iters(node_id, &inputs);
                for (name, sub) in nested_subs {
                    let mut prefix: TVec<_> = prefix.into();
                    prefix.push((node_id, name.to_string()));
                    extract_costs_rec(
                        annotations,
                        sub,
                        &prefix,
                        nested_multis.clone().unwrap_or_else(|| 1.into()) * &multiplier,
                        extra_symbols,
                    )?;
                }
            }
        }
        Ok(())
    }
    extract_costs_rec(annotations, model, &[], 1.into(), extra_symbols)
}


================================================
FILE: libcli/src/tensor.rs
================================================
use std::collections::HashSet;
use std::io::{Read, Seek};
use std::ops::Range;
use std::str::FromStr;
use std::sync::Mutex;

use crate::model::Model;
use tract_hir::internal::*;
use tract_num_traits::Zero;

#[cfg(feature = "transformers")]
use tract_transformers::figure_out_causal_llm_b_s_p;

#[derive(Debug, Default, Clone)]
pub struct TensorsValues(pub Vec<TensorValues>);

impl TensorsValues {
    pub fn by_name(&self, name: &str) -> Option<&TensorValues> {
        self.0.iter().find(|t| t.name.as_deref() == Some(name))
    }
    pub fn by_name_mut(&mut self, name: &str) -> Option<&mut TensorValues> {
        self.0.iter_mut().find(|t| t.name.as_deref() == Some(name))
    }
    pub fn by_name_mut_with_default(&mut self, name: &str) -> &mut TensorValues {
        if self.by_name_mut(name).is_none() {
            self.add(TensorValues { name: Some(name.to_string()), ..TensorValues::default() });
        }
        self.by_name_mut(name).unwrap()
    }

    pub fn by_input_ix(&self, ix: usize) -> Option<&TensorValues> {
        self.0.iter().find(|t| t.input_index == Some(ix))
    }
    pub fn by_input_ix_mut(&mut self, ix: usize) -> Option<&mut TensorValues> {
        self.0.iter_mut().find(|t| t.input_index == Some(ix))
    }
    pub fn by_input_ix_mut_with_default(&mut self, ix: usize) -> &mut TensorValues {
        if self.by_input_ix_mut(ix).is_none() {
            self.add(TensorValues { input_index: Some(ix), ..TensorValues::default() });
        }
        self.by_input_ix_mut(ix).unwrap()
    }

    pub fn add(&mut self, other: TensorValues) {
        let mut tensor = other.input_index.and_then(|ix| self.by_input_ix_mut(ix));

        if tensor.is_none() {
            tensor = other.name.as_deref().and_then(|ix| self.by_name_mut(ix))
        }

        if let Some(tensor) = tensor {
            if tensor.fact.is_none() {
                tensor.fact = other.fact;
            }
            if tensor.values.is_none() {
                tensor.values = other.values;
            }
        } else {
            self.0.push(other.clone());
        };
    }

    pub fn input_by_name(&self, name: &str) -> Option<&TensorValues> {
        self.0
            .iter()
            .filter(|tv| tv.output_index.is_none() && !tv.only_output)
            .find(|t| t.name.as_deref() == Some(name))
    }
}

#[derive(Debug, PartialEq, Clone, Default)]
pub struct TensorValues {
    pub input_index: Option<usize>,
    pub output_index: Option<usize>,
    pub name: Option<String>,
    pub fact: Option<InferenceFact>,
    pub values: Option<Vec<TValue>>,
    pub random_range: Option<Range<f32>>,
    pub only_input: bool,
    pub only_output: bool,
}

fn parse_dt(dt: &str) -> TractResult<DatumType> {
    Ok(match dt.to_lowercase().as_ref() {
        "bool" => DatumType::Bool,
        "f16" => DatumType::F16,
        "f32" => DatumType::F32,
        "f64" => DatumType::F64,
        "i8" => DatumType::I8,
        "i16" => DatumType::I16,
        "i32" => DatumType::I32,
        "i64" => DatumType::I64,
        "u8" => DatumType::U8,
        "u16" => DatumType::U16,
        "u32" => DatumType::U32,
        "u64" => DatumType::U64,
        "tdim" => DatumType::TDim,
        _ => bail!(
            "Type of the input should be f16, f32, f64, i8, i16, i16, i32, u8, u16, u32, u64, TDim."
        ),
    })
}

pub fn parse_spec(symbol_table: &SymbolScope, size: &str) -> TractResult<InferenceFact> {
    if size.is_empty() {
        return Ok(InferenceFact::default());
    }
    parse_coma_spec(symbol_table, size)
}

pub fn parse_coma_spec(symbol_table: &SymbolScope, size: &str) -> TractResult<InferenceFact> {
    let splits = size.split(',').collect::<Vec<_>>();

    #[allow(clippy::literal_string_with_formatting_args)]
    if splits.is_empty() {
        bail!("The <size> argument should be formatted as {{size}},{{...}},{{type}}.");
    }

    let last = splits.last().unwrap();
    let (datum_type, shape) = if let Ok(dt) = parse_dt(last) {
        (Some(dt), &splits[0..splits.len() - 1])
    } else {
        (None, &*splits)
    };

    let shape = ShapeFactoid::closed(
        shape
            .iter()
            .map(|&s| {
                Ok(if s == "_" {
                    GenericFactoid::Any
                } else {
                    GenericFactoid::Only(parse_tdim(symbol_table, s)?)
                })
            })
            .collect::<TractResult<TVec<DimFact>>>()?,
    );

    if let Some(dt) = datum_type {
        Ok(InferenceFact::dt_shape(dt, shape))
    } else {
        Ok(InferenceFact::shape(shape))
    }
}

fn parse_values<T: Datum + FromStr>(shape: &[usize], it: Vec<&str>) -> TractResult<Tensor> {
    let values = it
        .into_iter()
        .map(|v| v.parse::<T>().map_err(|_| format_err!("Failed to parse {}", v)))
        .collect::<TractResult<Vec<T>>>()?;
    Ok(tract_ndarray::Array::from_shape_vec(shape, values)?.into())
}

fn tensor_for_text_data(
    symbol_table: &SymbolScope,
    _filename: &str,
    mut reader: impl Read,
) -> TractResult<Tensor> {
    let mut data = String::new();
    reader.read_to_string(&mut data)?;

    let mut lines = data.lines();
    let proto = parse_spec(symbol_table, lines.next().context("Empty data file")?)?;
    let shape = proto.shape.concretize().unwrap();

    let values = lines.flat_map(|l| l.split_whitespace()).collect::<Vec<&str>>();

    // We know there is at most one streaming dimension, so we can deduce the
    // missing value with a simple division.
    let product: usize = shape.iter().map(|o| o.to_usize().unwrap_or(1)).product();
    let missing = values.len() / product;

    let shape: Vec<_> = shape.iter().map(|d| d.to_usize().unwrap_or(missing)).collect();
    dispatch_numbers!(parse_values(proto.datum_type.concretize().unwrap())(&*shape, values))
}

/// Parses the `data` command-line argument.
pub fn for_data(
    symbol_table: &SymbolScope,
    filename: &str,
    reader: impl Read + std::io::Seek,
) -> TractResult<(Option<String>, InferenceFact)> {
    #[allow(unused_imports)]
    use std::convert::TryFrom;
    if filename.ends_with(".pb") {
        #[cfg(feature = "onnx")]
        {
            use tract_onnx::data_resolver::FopenDataResolver;
            use tract_onnx::tensor::load_tensor;
            let proto = ::tract_onnx::tensor::proto_from_reader(reader)?;
            let tensor = load_tensor(&FopenDataResolver, &proto, None)?;
            Ok((Some(proto.name.to_string()).filter(|s| !s.is_empty()), tensor.into()))
        }
        #[cfg(not(feature = "onnx"))]
        {
            panic!("Loading tensor from protobuf requires onnx features");
        }
    } else if filename.contains(".npz:") {
        let mut tokens = filename.split(':');
        let (_filename, inner) = (tokens.next().unwrap(), tokens.next().unwrap());
        let mut npz = ndarray_npy::NpzReader::new(reader)?;
        Ok((None, for_npz(&mut npz, inner)?.into()))
    } else {
        Ok((None, tensor_for_text_data(symbol_table, filename, reader)?.into()))
    }
}

pub fn for_npz(
    npz: &mut ndarray_npy::NpzReader<impl Read + Seek>,
    name: &str,
) -> TractResult<Tensor> {
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<f32>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<f64>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<i8>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<i16>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<i32>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<i64>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<u8>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<u16>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<u32>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<u64>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    if let Ok(t) = npz.by_name::<tract_ndarray::OwnedRepr<bool>, tract_ndarray::IxDyn>(name) {
        return Ok(t.into_tensor());
    }
    bail!("Can not extract tensor from {}", name);
}

pub fn for_string(
    symbol_table: &SymbolScope,
    value: &str,
) -> TractResult<(Option<String>, InferenceFact)> {
    let (name, value) = if value.contains(':') {
        let mut splits = value.split(':');
        (Some(splits.next().unwrap().to_string()), splits.next().unwrap())
    } else {
        (None, value)
    };
    if value.contains('=') {
        let mut split = value.split('=');
        let spec = parse_spec(symbol_table, split.next().unwrap())?;
        let value = split.next().unwrap().split(',');
        let dt =
            spec.datum_type.concretize().context("Must specify type when giving tensor value")?;
        let shape = spec
            .shape
            .as_concrete_finite()?
            .context("Must specify concrete shape when giving tensor value")?;
        let tensor = if dt == TDim::datum_type() {
            let mut tensor = Tensor::zero::<TDim>(&shape)?;
            let values =
                value.map(|v| parse_tdim(symbol_table, v)).collect::<TractResult<Vec<_>>>()?;
            tensor
                .try_as_plain_mut()?
                .as_slice_mut::<TDim>()?
                .iter_mut()
                .zip(values)
                .for_each(|(t, v)| *t = v);
            tensor
        } else {
            dispatch_numbers!(parse_values(dt)(&*shape, value.collect()))?
        };
        Ok((name, tensor.into()))
    } else {
        Ok((name, parse_spec(symbol_table, value)?))
    }
}

lazy_static::lazy_static! {
    static ref MESSAGE_ONCE: Mutex<HashSet<String>> = Mutex::new(HashSet::new());
}

fn info_once(msg: String) {
    if MESSAGE_ONCE.lock().unwrap().insert(msg.clone()) {
        info!("{msg}");
    }
}

pub struct RunParams {
    pub tensors_values: TensorsValues,
    pub allow_random_input: bool,
    pub allow_float_casts: bool,
    pub symbols: SymbolValues,
    pub prompt_chunk_size: Option<usize>,
}

pub struct RunTensors {
    pub sources: Vec<TVec<TValue>>,
}

#[cfg(feature = "transformers")]
fn chunk_fact(
    fact: &TypedFact,
    params: &RunParams,
    model: &Arc<dyn Model>,
) -> TractResult<Vec<TypedFact>> {
    let Some(chunk_size) = params.prompt_chunk_size else {
        return Ok(vec![fact.clone()]);
    };
    let Some(model) = model.downcast_ref::<TypedModel>() else {
        return Ok(vec![fact.clone()]);
    };
    let (_, s, _) = figure_out_causal_llm_b_s_p(model)?;
    let Some(s) = s else {
        return Ok(vec![fact.clone()]);
    };

    let dims = fact.shape.dims();
    let Some(sym_idx) = dims.iter().position(|d| *d == TDim::Sym(s.clone())) else {
        return Ok(vec![fact.clone()]);
    };

    let resolved_sym = dims[sym_idx].eval_to_i64(&params.symbols)? as usize;
    if resolved_sym <= chunk_size {
        return Ok(vec![fact.clone()]);
    }

    let num_chunks = resolved_sym.div_ceil(chunk_size);
    let mut out = Vec::with_capacity(num_chunks);

    for start in (0..resolved_sym).step_by(chunk_size) {
        let this = chunk_size.min(resolved_sym - start) as i64;

        let mut new_fact = fact.clone();
        new_fact.shape = new_fact
            .shape
            .iter()
            .enumerate()
            .map(|(i, d)| if i == sym_idx { TDim::Val(this) } else { d.eval(&params.symbols) })
            .collect();

        out.push(new_fact);
    }

    Ok(out)
}

#[cfg(feature = "transformers")]
fn chunk_tensor(
    tensor: Tensor,
    fact: &TypedFact,
    params: &RunParams,
    model: &Arc<dyn Model>,
) -> TractResult<Vec<TValue>> {
    let Some(chunk_size) = params.prompt_chunk_size else {
        return Ok(vec![tensor.into_tvalue()]);
    };

    let Some(model) = model.downcast_ref::<TypedModel>() else {
        return Ok(vec![tensor.into_tvalue()]);
    };
    let (_, s, _) = figure_out_causal_llm_b_s_p(model)?;
    let Some(s) = s else {
        return Ok(vec![tensor.into_tvalue()]);
    };

    let dims = fact.shape.dims();
    let Some(symb_axis) = dims.iter().position(|d| *d == TDim::Sym(s.clone())) else {
        return Ok(vec![tensor.into_tvalue()]);
    };

    let resolved_sym = tensor.shape()[symb_axis];
    if resolved_sym <= chunk_size {
        return Ok(vec![tensor.into_tvalue()]);
    }

    let num_chunks = resolved_sym.div_ceil(chunk_size);
    let mut out = Vec::with_capacity(num_chunks);

    for start in (0..resolved_sym).step_by(chunk_size) {
        let this = chunk_size.min(resolved_sym - start);
        out.push(tensor.slice(symb_axis, start, start + this)?.into_tvalue());
    }

    Ok(out)
}

fn get_or_make_tensors(
    model: &Arc<dyn Model>,
    params: &RunParams,
    fact: TypedFact,
    name: &str,
    input_idx: usize,
    target: &mut TVec<Vec<TValue>>,
) -> TractResult<()> {
    if let Some(mut value) = params
        .tensors_values
        .by_name(name)
        .or_else(|| params.tensors_values.by_input_ix(input_idx))
        .and_then(|t| t.values.clone())
    {
        if !value[0].datum_type().is_quantized()
            && fact.datum_type.is_quantized()
            && value[0].datum_type() == fact.datum_type.unquantized()
        {
            value = value
                .iter()
                .map(|v| {
                    let mut v = v.clone().into_tensor();
                    unsafe { v.set_datum_type(fact.datum_type) };
                    v.into()
                })
                .collect();
        }
        let mut chunked_tensors: Vec<TValue> = vec![];
        for t in &value {
            let tensor = if TypedFact::shape_and_dt_of(&value[0]).compatible_with(&fact) {
                info_once(format!(
                    "Using fixed input for input called {} ({} turn(s))",
                    name,
                    value.len()
                ));
                t.clone().into_tensor()
            } else if fact.datum_type == f16::datum_type()
                && value[0].datum_type() == f32::datum_type()
                && params.allow_float_casts
            {
                debug!("Casting input to F16 for input called {} ({} turn(s))", name, value.len());
                t.cast_to::<f16>()?.into_owned()
            } else {
                break;
            };

            chunked_tensors.extend(chunk_tensor(tensor, &fact, params, model)?);
        }
        if !chunked_tensors.is_empty() {
            target.push(chunked_tensors);
            return Ok(());
        }

        if value.len() == 1 && model.properties().contains_key("pulse.delay") {
            let value = &value[0];
            let input_pulse_axis = model
                .properties()
                .get("pulse.input_axes")
                .context("Expect pulse.input_axes property")?
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?[input_idx] as usize;
            let input_pulse = fact.shape.get(input_pulse_axis).unwrap().to_usize().unwrap();
            let input_len = value.shape()[input_pulse_axis];

            // how many pulses do we need to push full result out ?
            // guess by looking at len and delay of the first output
            let output_pulse_axis = model
                .properties()
                .get("pulse.output_axes")
                .context("Expect pulse.output_axes property")?
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?[0] as usize;
            let output_fact = model.outlet_typedfact(model.output_outlets()[0])?;
            let output_pulse =
                output_fact.shape.get(output_pulse_axis).unwrap().to_usize().unwrap();
            let output_len = input_len * output_pulse / input_pulse;
            let output_delay =
                model.properties()["pulse.delay"].try_as_plain()?.as_slice::<i64>()?[0] as usize;
            let last_frame = output_len + output_delay;
            let needed_pulses = last_frame.divceil(output_pulse);
            let mut values = vec![];
            for ix in 0..needed_pulses {
                let mut t = Tensor::zero_dt(fact.datum_type, fact.shape.as_concrete().unwrap())?;
                let start = ix * input_pulse;
                let end = (start + input_pulse).min(input_len);
                if end > start {
                    t.assign_slice(0..end - start, value, start..end, input_pulse_axis)?;
                }
                values.push(t.into());
            }
            info!(
                "Generated {} pulses of shape {:?} for input {}.",
                needed_pulses, fact.shape, input_idx
            );
            target.push(values);
        } else {
            bail!(
                "For input {}, can not reconcile model input fact {:?} with provided input {:?}",
                name,
                fact,
                value[0]
            );
        };
    } else if fact.shape.is_concrete() && fact.shape.volume() == TDim::zero() {
        let shape = fact.shape.as_concrete().unwrap();
        let tensor = Tensor::zero_dt(fact.datum_type, shape)?;
        target.push(vec![tensor.into()]);
    } else if params.allow_random_input {
        info_once(format!("Using random input for input called {name:?}: {fact:?}"));
        let tv = params
            .tensors_values
            .by_name(name)
            .or_else(|| params.tensors_values.by_input_ix(input_idx));

        let mut chunked_facts = chunk_fact(&fact, params, model)?;

        let mut chunked_tensors = Vec::with_capacity(chunked_facts.len());
        for fact in &mut chunked_facts {
            fact.shape = fact.shape.iter().map(|dim| dim.eval(&params.symbols)).collect();
            chunked_tensors.push(tensor_for_fact(fact, None, tv)?.into());
        }
        target.push(chunked_tensors);
    } else {
        bail!(
            "Unmatched tensor {}. Fix the input or use \"--allow-random-input\" if this was intended",
            name
        );
    }
    Ok(())
}

pub fn get_or_make_inputs(tract: &Arc<dyn Model>, params: &RunParams) -> TractResult<RunTensors> {
    // Resolve source inputs
    let mut tmp_inputs = tvec![];
    for (ix, input) in tract.input_outlets().iter().enumerate() {
        let fact = tract.outlet_typedfact(*input)?;
        let name = tract.node_name(input.node);
        get_or_make_tensors(tract, params, fact, name, ix, &mut tmp_inputs)?;
    }

    let n_turns = tmp_inputs.iter().map(|t| t.len()).max().unwrap_or(0);
    let sources = (0..n_turns)
        .map(|i| {
            tmp_inputs
                .iter()
                .map(|t| if i < t.len() { t[i].clone() } else { t[t.len() - 1].clone() })
                .collect::<TVec<_>>()
        })
        .collect::<Vec<_>>();

    Ok(RunTensors { sources })
}

fn make_inputs(values: &[impl std::borrow::Borrow<TypedFact>]) -> TractResult<TVec<TValue>> {
    values.iter().map(|v| tensor_for_fact(v.borrow(), None, None).map(|t| t.into())).collect()
}

pub fn make_inputs_for_model(model: &dyn Model) -> TractResult<TVec<TValue>> {
    make_inputs(
        &model
            .input_outlets()
            .iter()
            .map(|&t| model.outlet_typedfact(t))
            .collect::<TractResult<Vec<TypedFact>>>()?,
    )
}

#[allow(unused_variables)]
pub fn tensor_for_fact(
    fact: &TypedFact,
    streaming_dim: Option<usize>,
    tv: Option<&TensorValues>,
) -> TractResult<Tensor> {
    if let Some(value) = &fact.konst {
        return Ok(value.clone().into_tensor());
    }
    Ok(random(
        fact.shape
            .as_concrete()
            .with_context(|| format!("Expected concrete shape, found: {fact:?}"))?,
        fact.datum_type,
        tv,
    ))
}

/// Generates a random tensor of a given size and type.
pub fn random(sizes: &[usize], datum_type: DatumType, tv: Option<&TensorValues>) -> Tensor {
    use rand::{RngExt, SeedableRng};
    let mut rng = rand::rngs::StdRng::seed_from_u64(21242);
    let mut tensor = Tensor::zero::<f32>(sizes).unwrap();
    let mut tensor_plain = tensor.try_as_plain_mut().unwrap();
    let slice = tensor_plain.as_slice_mut::<f32>().unwrap();
    if let Some(range) = tv.and_then(|tv| tv.random_range.as_ref()) {
        slice.iter_mut().for_each(|x| *x = rng.random_range(range.clone()))
    } else {
        slice.iter_mut().for_each(|x| *x = rng.random())
    };
    tensor.cast_to_dt(datum_type).unwrap().into_owned()
}


================================================
FILE: libcli/src/terminal.rs
================================================
use std::time::Duration;

use crate::annotations::*;
use crate::display_params::*;
use crate::draw::DrawingState;
use crate::model::Model;
use nu_ansi_term::AnsiString;
use nu_ansi_term::Color::*;
#[allow(unused_imports)]
use std::convert::TryFrom;
use tract_core::internal::*;
use tract_core::num_traits::AsPrimitive;
use tract_itertools::Itertools;

pub fn render(
    model: &dyn Model,
    annotations: &Annotations,
    options: &DisplayParams,
) -> TractResult<()> {
    if options.quiet {
        return Ok(());
    }
    render_prefixed(model, "", &[], annotations, options)?;
    if !model.properties().is_empty() {
        println!("{}", White.bold().paint("# Properties"));
    }
    for (k, v) in model.properties().iter().sorted_by_key(|(k, _)| k.to_string()) {
        println!("* {}: {:?}", White.paint(k), v)
    }
    let symbols = model.symbols();
    if !symbols.all_assertions().is_empty() {
        println!("{}", White.bold().paint("# Assertions"));
        for a in symbols.all_assertions() {
            println!(" * {a}");
        }
    }
    for (ix, scenario) in symbols.all_scenarios().into_iter().enumerate() {
        if ix == 0 {
            println!("{}", White.bold().paint("# Scenarios"));
        }
        for a in scenario.1 {
            println!(" * {}: {}", scenario.0, a);
        }
    }
    Ok(())
}

pub fn render_node(
    model: &dyn Model,
    node_id: usize,
    annotations: &Annotations,
    options: &DisplayParams,
) -> TractResult<()> {
    render_node_prefixed(model, "", &[], node_id, None, annotations, options)
}

fn render_prefixed(
    model: &dyn Model,
    prefix: &str,
    scope: &[(usize, String)],
    annotations: &Annotations,
    options: &DisplayParams,
) -> TractResult<()> {
    let mut drawing_state =
        if options.should_draw() { Some(DrawingState::default()) } else { None };
    let node_ids = options.order(model)?;
    for node in node_ids {
        if options.filter(model, scope, node)? {
            render_node_prefixed(
                model,
                prefix,
                scope,
                node,
                drawing_state.as_mut(),
                annotations,
                options,
            )?
        } else if let Some(ref mut ds) = drawing_state {
            let _prefix = ds.draw_node_vprefix(model, node, options)?;
            let _body = ds.draw_node_body(model, node, options)?;
            let _suffix = ds.draw_node_vsuffix(model, node, options)?;
        }
    }
    Ok(())
}

pub fn si_prefix(v: impl AsPrimitive<f64>, unit: &str) -> String {
    radical_prefix(v, unit, 1000, "")
}

pub fn pow2_prefix(v: impl AsPrimitive<f64>, unit: &str) -> String {
    radical_prefix(v, unit, 1024, "i")
}

pub fn radical_prefix(
    v: impl AsPrimitive<f64>,
    unit: &str,
    radical: usize,
    radical_prefix: &str,
) -> String {
    let v: f64 = v.as_();
    let radical = radical as f64;
    let radical3 = radical.powi(3);
    let radical2 = radical.powi(2);
    if v > radical3 {
        format!("{:7.3} G{}{}", v / radical3, radical_prefix, unit)
    } else if v > 1e6 {
        format!("{:7.3} M{}{}", v / radical2, radical_prefix, unit)
    } else if v > 1e3 {
        format!("{:7.3} k{}{}", v / radical, radical_prefix, unit)
    } else {
        format!("{v:7.3}  {unit}")
    }
}

fn render_node_prefixed(
    model: &dyn Model,
    prefix: &str,
    scope: &[(usize, String)],
    node_id: usize,
    mut drawing_state: Option<&mut DrawingState>,
    annotations: &Annotations,
    options: &DisplayParams,
) -> TractResult<()> {
    let qid = NodeQId(scope.into(), node_id);
    let tags = annotations.tags.get(&qid).cloned().unwrap_or_default();
    let name_color = tags.style.unwrap_or_else(|| White.into());
    let node_name = model.node_name(node_id);
    let node_op_name = model.node_op_name(node_id);
    let profile_column_pad = format!("{:>1$}", "", options.profile as usize * 20);
    let cost_column_pad = format!("{:>1$}", "", options.cost as usize * 25);
    let mem_padding = if annotations.memory_summary.is_some() { 15 } else { 30 };
    let tmp_mem_usage_column_pad =
        format!("{:>1$}", "", options.tmp_mem_usage as usize * mem_padding);
    let flops_column_pad = format!("{:>1$}", "", (options.profile && options.cost) as usize * 20);

    if let Some(ds) = &mut drawing_state {
        for l in ds.draw_node_vprefix(model, node_id, options)? {
            println!(
                "{cost_column_pad}{profile_column_pad}{flops_column_pad}{tmp_mem_usage_column_pad}{prefix}{l} "
            );
        }
    }

    // profile column
    let mut profile_column = tags.profile.map(|measure| {
        let profile_summary = annotations.profile_summary.as_ref().unwrap();
        let use_micros = profile_summary.sum < Duration::from_millis(1);
        let ratio = measure.as_secs_f64() / profile_summary.sum.as_secs_f64();
        let ratio_for_color = measure.as_secs_f64() / profile_summary.max.as_secs_f64();
        let color = colorous::RED_YELLOW_GREEN.eval_continuous(1.0 - ratio_for_color);
        let color = nu_ansi_term::Color::Rgb(color.r, color.g, color.b);
        let label = format!(
            "{:7.3} {}s/i {}  ",
            measure.as_secs_f64() * if use_micros { 1e6 } else { 1e3 },
            if use_micros { "µ" } else { "m" },
            color.bold().paint(format!("{:>4.1}%", ratio * 100.0))
        );
        std::iter::once(label)
    });

    // cost column
    let mut cost_column = if options.cost {
        Some(
            tags.cost
                .iter()
                .map(|c| {
                    let key = format!("{:?}", c.0);
                    let value = render_tdim(&c.1);
                    let value_visible_len = c.1.to_string().len();
                    let padding = 24usize.saturating_sub(value_visible_len + key.len());
                    key + &*std::iter::repeat_n(' ', padding).join("") + &value.to_string() + " "
                })
                .peekable(),
        )
    } else {
        None
    };

    // flops column
    let mut flops_column = if options.profile && options.cost {
        let timing: f64 = tags.profile.as_ref().map(|d| d.as_secs_f64()).unwrap_or(0.0);
        let flops_column_pad = flops_column_pad.clone();
        let it = tags.cost.iter().map(move |c| {
            if c.0.is_compute() {
                let flops = c.1.to_usize().unwrap_or(0) as f64 / timing;
                let unpadded = si_prefix(flops, "F/s");
                format!("{:>1$} ", unpadded, 19)
            } else {
                flops_column_pad.clone()
            }
        });
        Some(it)
    } else {
        None
    };

    // tmp_mem_usage column
    let mut tmp_mem_usage_column = if options.tmp_mem_usage {
        let it = tags.tmp_mem_usage.iter().map(move |mem| {
            let unpadded = if let Ok(mem_size) = mem.to_usize() {
                pow2_prefix(mem_size, "B")
            } else {
                format!("{mem:.3} B")
            };
            format!("{:>1$} ", unpadded, mem_padding - 1)
        });
        Some(it)
    } else {
        None
    };

    // drawing column
    let mut drawing_lines: Box<dyn Iterator<Item = String>> =
        if let Some(ds) = drawing_state.as_mut() {
            let body = ds.draw_node_body(model, node_id, options)?;
            let suffix = ds.draw_node_vsuffix(model, node_id, options)?;
            let filler = ds.draw_node_vfiller(model, node_id)?;
            Box::new(body.into_iter().chain(suffix).chain(std::iter::repeat(filler)))
        } else {
            Box::new(std::iter::repeat(cost_column_pad.clone()))
        };

    macro_rules! prefix {
        () => {
            let cost = cost_column
                .as_mut()
                .map(|it| it.next().unwrap_or_else(|| cost_column_pad.to_string()))
                .unwrap_or("".to_string());
            let profile = profile_column
                .as_mut()
                .map(|it| it.next().unwrap_or_else(|| profile_column_pad.to_string()))
                .unwrap_or("".to_string());
            let flops = flops_column
                .as_mut()
                .map(|it| it.next().unwrap_or_else(|| flops_column_pad.to_string()))
                .unwrap_or("".to_string());
            let tmp_mem_usage = tmp_mem_usage_column
                .as_mut()
                .map(|it| it.next().unwrap_or_else(|| tmp_mem_usage_column_pad.to_string()))
                .unwrap_or("".to_string());
            print!(
                "{}{}{}{}{}{} ",
                profile,
                cost,
                flops,
                tmp_mem_usage,
                prefix,
                drawing_lines.next().unwrap(),
            )
        };
    }

    let have_accel_profiling =
        annotations.tags.iter().any(|(_, tag)| tag.accelerator_profile.is_some());
    let is_cpu_fallback = have_accel_profiling
        && tags.accelerator_profile.unwrap_or_default() == Duration::default()
        && tags.profile.unwrap_or_default() > Duration::default();
    let op_color = if node_name == "UnimplementedOp" {
        Red.bold()
    } else if is_cpu_fallback {
        Yellow.bold()
    } else {
        Blue.bold()
    };

    prefix!();
    println!(
        "{} {} {}",
        White.bold().paint(format!("{node_id}")),
        op_color.paint(node_op_name),
        name_color.italic().paint(node_name)
    );
    for label in tags.labels.iter() {
        prefix!();
        println!("  * {label}");
    }
    if let Io::Long = options.io {
        for (ix, i) in model.node_inputs(node_id).iter().enumerate() {
            let star = if ix == 0 { '*' } else { ' ' };
            prefix!();
            println!(
                "  {} input fact  #{}: {} {}",
                star,
                ix,
                White.bold().paint(format!("{i:?}")),
                model.outlet_fact_format(*i),
            );
        }
        for slot in 0..model.node_output_count(node_id) {
            let star = if slot == 0 { '*' } else { ' ' };
            let outlet = OutletId::new(node_id, slot);
            let mut model_io = vec![];
            for (ix, _) in model.input_outlets().iter().enumerate().filter(|(_, o)| **o == outlet) {
                model_io.push(Cyan.bold().paint(format!("MODEL INPUT #{ix}")).to_string());
            }
            if let Some(t) = &tags.model_input {
                model_io.push(t.to_string());
            }
            for (ix, _) in model.output_outlets().iter().enumerate().filter(|(_, o)| **o == outlet)
            {
                model_io.push(Yellow.bold().paint(format!("MODEL OUTPUT #{ix}")).to_string());
            }
            if let Some(t) = &tags.model_output {
                model_io.push(t.to_string());
            }
            let successors = model.outlet_successors(outlet);
            prefix!();
            let mut axes =
                tags.outlet_axes.get(slot).map(|s| s.join(",")).unwrap_or_else(|| "".to_string());
            if !axes.is_empty() {
                axes.push(' ')
            }
            println!(
                "  {} output fact #{}: {}{} {} {} {}",
                star,
                slot,
                Green.bold().italic().paint(axes),
                model.outlet_fact_format(outlet),
                White.bold().paint(successors.iter().map(|s| format!("{s:?}")).join(" ")),
                model_io.join(", "),
                Blue.bold().italic().paint(
                    tags.outlet_labels
                        .get(slot)
                        .map(|s| s.join(","))
                        .unwrap_or_else(|| "".to_string())
                )
            );
            if options.outlet_labels {
                if let Some(label) = model.outlet_label(OutletId::new(node_id, slot)) {
                    prefix!();
                    println!("            {} ", White.italic().paint(label));
                }
            }
        }
    }
    if options.info {
        for info in model.node_op(node_id).info()? {
            prefix!();
            println!("  * {info}");
        }
    }
    if options.invariants {
        if let Some(typed) = model.downcast_ref::<TypedModel>() {
            let node = typed.node(node_id);
            let (inputs, outputs) = typed.node_facts(node.id)?;
            let axes_mapping = node.op().as_typed().unwrap().axes_mapping(&inputs, &outputs)?;
            prefix!();
            println!("  * {axes_mapping}");
        }
    }
    if options.debug_op {
        prefix!();
        println!("  * {:?}", model.node_op(node_id));
    }
    for section in tags.sections {
        if section.is_empty() {
            continue;
        }
        prefix!();
        println!("  * {}", section[0]);
        for s in &section[1..] {
            prefix!();
            println!("    {s}");
        }
    }

    if !options.folded {
        for (label, sub) in model.nested_models(node_id) {
            let prefix = drawing_lines.next().unwrap();
            let mut scope: TVec<_> = scope.into();
            scope.push((node_id, label));
            let scope_prefix = scope.iter().map(|(_, p)| p).join("|");
            render_prefixed(
                sub,
                &format!("{prefix} [{scope_prefix}] "),
                &scope,
                annotations,
                options,
            )?
        }
    }
    if let Io::Short = options.io {
        let same = !model.node_inputs(node_id).is_empty()
            && model.node_output_count(node_id) == 1
            && model.outlet_fact_format(node_id.into())
                == model.outlet_fact_format(model.node_inputs(node_id)[0]);
        if !same || model.output_outlets().iter().any(|o| o.node == node_id) {
            let style = drawing_state.map(|s| s.last_wire_color()).unwrap_or_else(|| White.into());
            for ix in 0..model.node_output_count(node_id) {
                prefix!();
                println!(
                    "  {}{}{} {}",
                    style.paint(box_drawing::heavy::HORIZONTAL),
                    style.paint(box_drawing::heavy::HORIZONTAL),
                    style.paint(box_drawing::heavy::HORIZONTAL),
                    model.outlet_fact_format((node_id, ix).into())
                );
            }
        }
    }

    while cost_column.as_mut().map(|cost| cost.peek().is_some()).unwrap_or(false) {
        prefix!();
        println!();
    }

    Ok(())
}

pub fn render_summaries(
    model: &dyn Model,
    annotations: &Annotations,
    options: &DisplayParams,
) -> TractResult<()> {
    let total = annotations.tags.values().sum::<NodeTags>();

    if options.tmp_mem_usage {
        if let Some(summary) = &annotations.memory_summary {
            println!("{}", White.bold().paint("Memory summary"));
            println!(" * Peak flushable memory: {}", pow2_prefix(summary.max, "B"));
        }
    }
    if options.cost {
        println!("{}", White.bold().paint("Cost summary"));
        for (c, i) in &total.cost {
            println!(" * {:?}: {}", c, render_tdim(i));
        }
    }

    if options.profile {
        let summary = annotations.profile_summary.as_ref().unwrap();

        let have_accel_profiling =
            annotations.tags.iter().any(|(_, tag)| tag.accelerator_profile.is_some());
        println!(
            "{}{}{}",
            White.bold().paint(format!("{:<43}", "Most time consuming operations")),
            White.bold().paint(format!("{:<17}", "CPU")),
            White.bold().paint(if have_accel_profiling { "Accelerator" } else { "" }),
        );

        for (op, (cpu_dur, accel_dur, n)) in annotations
            .tags
            .iter()
            .map(|(k, v)| {
                (
                    k.model(model).unwrap().node_op_name(k.1),
                    (v.profile.unwrap_or_default(), v.accelerator_profile.unwrap_or_default()),
                )
            })
            .sorted_by_key(|a| a.0.to_string())
            .chunk_by(|(n, _)| n.clone())
            .into_iter()
            .map(|(a, group)| {
                (
                    a,
                    group.into_iter().fold(
                        (Duration::default(), Duration::default(), 0),
                        |(accu, accel_accu, n), d| (accu + d.1.0, accel_accu + d.1.1, n + 1),
                    ),
                )
            })
            .sorted_by_key(|(_, d)| if have_accel_profiling { d.1 } else { d.0 })
            .rev()
        {
            let is_cpu_fallback = have_accel_profiling
                && accel_dur == Duration::default()
                && cpu_dur > Duration::default();
            let op_color = if is_cpu_fallback { Yellow.bold() } else { Blue.bold() };
            println!(
                " * {} {:3} nodes: {}  {}",
                op_color.paint(format!("{op:22}")),
                n,
                dur_avg_ratio(cpu_dur, summary.sum),
                if have_accel_profiling {
                    dur_avg_ratio(accel_dur, summary.accel_sum)
                } else {
                    "".to_string()
                }
            );
        }

        println!("{}", White.bold().paint("By prefix"));
        fn prefixes_for(s: &str) -> impl Iterator<Item = String> + '_ {
            use tract_itertools::*;
            let split = s.split('.').count();
            (0..split).map(move |n| s.split('.').take(n).join("."))
        }
        let all_prefixes = annotations
            .tags
            .keys()
            .flat_map(|id| prefixes_for(id.model(model).unwrap().node_name(id.1)))
            .filter(|s| !s.is_empty())
            .sorted()
            .unique()
            .collect::<Vec<String>>();

        for prefix in &all_prefixes {
            let sum = annotations
                .tags
                .iter()
                .filter(|(k, _v)| k.model(model).unwrap().node_name(k.1).starts_with(prefix))
                .map(|(_k, v)| v)
                .sum::<NodeTags>();

            let profiler =
                if !have_accel_profiling { sum.profile } else { sum.accelerator_profile };
            if profiler.unwrap_or_default().as_secs_f64() / summary.entire.as_secs_f64() < 0.01 {
                continue;
            }
            print!("{}    ", dur_avg_ratio(profiler.unwrap_or_default(), summary.sum));

            for _ in prefix.chars().filter(|c| *c == '.') {
                print!("   ");
            }
            println!("{prefix}");
        }

        println!(
            "Not accounted by ops: {}",
            dur_avg_ratio(summary.entire - summary.sum.min(summary.entire), summary.entire)
        );

        if have_accel_profiling {
            println!(
                "(Total CPU Op time - Total Accelerator Op time): {}",
                dur_avg_ratio(summary.sum - summary.accel_sum.min(summary.sum), summary.entire)
            );
        }
        println!("Entire network performance: {}", dur_avg(summary.entire));
    }

    Ok(())
}

pub fn render_summary(model: &dyn Model, annotations: &Annotations) -> TractResult<()> {
    if !model.properties().is_empty() {
        println!("{}", White.bold().paint("# Properties"));
        for (k, v) in model.properties().iter().sorted_by_key(|(k, _)| k.to_string()) {
            println!("* {}: {:?}", White.paint(k), v);
        }
    }
    println!("{}", White.bold().paint("# Inputs"));
    for (ix, input) in model.input_outlets().iter().enumerate() {
        let name = model.node_name(input.node);
        let fact = model.outlet_typedfact(*input)?;
        let symbol = crate::draw::circled_input(ix);
        println!("  {symbol} {name}: {fact:?}");
    }
    println!("{}", White.bold().paint("# Outputs"));
    for (ix, output) in model.output_outlets().iter().enumerate() {
        let name = model.node_name(output.node);
        let fact = model.outlet_typedfact(*output)?;
        let symbol = crate::draw::circled_output(ix);
        println!("  {symbol} {name}: {fact:?}");
    }
    let mut op_counts: HashMap<StaticName, usize> = HashMap::default();
    let mut op_costs: HashMap<StaticName, Vec<(Cost, TDim)>> = HashMap::default();
    for id in 0..model.nodes_len() {
        let op_name = model.node_op_name(id);
        *op_counts.entry(op_name.clone()).or_default() += 1;
        if let Some(tags) = annotations.tags.get(&NodeQId(tvec!(), id)) {
            let costs = op_costs.entry(op_name).or_default();
            for (cost_kind, value) in &tags.cost {
                if let Some(existing) = costs.iter_mut().find(|(k, _)| k == cost_kind) {
                    existing.1 = existing.1.clone() + value;
                } else {
                    costs.push((cost_kind.clone(), value.clone()));
                }
            }
        }
    }
    let total = annotations.tags.values().sum::<NodeTags>();
    let total_cost_str = total
        .cost
        .iter()
        .filter(|(k, _)| k.is_compute())
        .map(|(kind, val)| format!("{kind:?}: {}", render_tdim(val)))
        .join(", ");
    let all_costs_concrete = op_costs
        .values()
        .all(|costs| costs.iter().filter(|(k, _)| k.is_compute()).all(|(_, v)| v.to_i64().is_ok()));
    let concrete_compute_cost = |op: &StaticName| -> i64 {
        op_costs
            .get(op)
            .map(|costs| {
                costs
                    .iter()
                    .filter(|(k, _)| k.is_compute())
                    .filter_map(|(_, v)| v.to_i64().ok())
                    .sum::<i64>()
            })
            .unwrap_or(0)
    };
    println!("{}", White.bold().paint("# Operators"));
    for (op, count) in op_counts.iter().sorted_by(|a, b| {
        if all_costs_concrete {
            concrete_compute_cost(b.0)
                .cmp(&concrete_compute_cost(a.0))
                .then(b.1.cmp(a.1))
                .then(a.0.cmp(b.0))
        } else {
            b.1.cmp(a.1).then(a.0.cmp(b.0))
        }
    }) {
        let cost_str = op_costs
            .get(op)
            .map(|costs| {
                costs.iter().map(|(kind, val)| format!("{kind:?}: {}", render_tdim(val))).join(", ")
            })
            .unwrap_or_default();
        if cost_str.is_empty() {
            println!("  {count:>5} {op}");
        } else {
            println!("  {count:>5} {op}  [{cost_str}]");
        }
    }
    let total_nodes: usize = op_counts.values().sum();
    if total_cost_str.is_empty() {
        println!("  {total_nodes:>5} total");
    } else {
        println!("  {total_nodes:>5} total  [{total_cost_str}]");
    }
    Ok(())
}

/// Format a rusage::Duration showing avgtime in ms.
pub fn dur_avg(measure: Duration) -> String {
    White.bold().paint(format!("{:.3} ms/i", measure.as_secs_f64() * 1e3)).to_string()
}

/// Format a rusage::Duration showing avgtime in ms, with percentage to a global
/// one.
pub fn dur_avg_ratio(measure: Duration, global: Duration) -> String {
    format!(
        "{} {}",
        White.bold().paint(format!("{:7.3} ms/i", measure.as_secs_f64() * 1e3)),
        Yellow
            .bold()
            .paint(format!("{:>4.1}%", measure.as_secs_f64() / global.as_secs_f64() * 100.)),
    )
}

fn render_tdim(d: &TDim) -> AnsiString<'static> {
    if let Ok(i) = d.to_i64() { render_big_integer(i) } else { d.to_string().into() }
}

fn render_big_integer(i: i64) -> nu_ansi_term::AnsiString<'static> {
    let raw = i.to_string();
    let mut blocks = raw
        .chars()
        .rev()
        .chunks(3)
        .into_iter()
        .map(|mut c| c.join("").chars().rev().join(""))
        .enumerate()
        .map(|(ix, s)| if ix % 2 == 1 { White.bold().paint(s).to_string() } else { s })
        .collect::<Vec<_>>();
    blocks.reverse();
    blocks.into_iter().join("").into()
}


================================================
FILE: libcli/src/time.rs
================================================
#![allow(dead_code)]

#[cfg(not(target_arch = "aarch64"))]
pub use generic::*;

#[cfg(target_arch = "aarch64")]
pub use aarch64::*;

mod generic {
    pub fn now() -> std::time::Instant {
        std::time::Instant::now()
    }
}

#[cfg(target_arch = "aarch64")]
mod aarch64 {
    use std::arch::asm;
    use std::time::Duration;

    pub struct Timestamp(u64);

    impl Timestamp {
        pub fn elapsed(&self) -> Duration {
            let diff = timestamp().saturating_sub(self.0) as f64;
            let secs = diff / frequency() as f64;
            std::time::Duration::from_secs_f64(secs)
        }
    }

    pub fn now() -> Timestamp {
        Timestamp(timestamp())
    }

    #[inline]
    fn frequency() -> u64 {
        unsafe {
            let frequency: u64;
            asm!(
                "mrs {}, cntfrq_el0",
                out(reg) frequency,
                options(nomem, nostack, preserves_flags, pure),
            );
            frequency
        }
    }

    #[inline(always)]
    fn timestamp() -> u64 {
        unsafe {
            let timestamp: u64;
            asm!(
                "mrs {}, cntvct_el0",
                out(reg) timestamp,
                // Leave off `nomem` because this should be a compiler fence.
                options(nostack, preserves_flags),
            );
            timestamp
        }
    }
}


================================================
FILE: libcli/validate_wires.py
================================================
#!/usr/bin/env python3
"""Validate wire drawing continuity in tract dump output.

Top-down approach: track which wires are active at each visual column,
update state as we encounter node lines, swap lines, and split lines.
Flag any discontinuity we can't explain.
"""

import sys
import re

def strip_ansi(s):
    return re.sub(r'\x1b\[[0-9;]*m', '', s)

def parse_colored_chars(line):
    """Return list of (char, ansi_color_code) for each visible character."""
    result = []
    current_color = ""
    i = 0
    while i < len(line):
        if line[i] == '\x1b':
            j = line.index('m', i) + 1
            seq = line[i:j]
            if seq == '\x1b[0m':
                current_color = ""
            else:
                current_color = seq
            i = j
        else:
            result.append((line[i], current_color))
            i += 1
    return result

# Character classification
CONNECTS_DOWN = set('┃┣┏╋┳┓')
CONNECTS_UP   = set('┃┣╋┻┗╹')
CONNECTS_RIGHT = set('┣┏┗━╋┳┻')
CONNECTS_LEFT  = set('┓┛┗━╋┳┻')
ALL_WIRE = CONNECTS_DOWN | CONNECTS_UP | set('━')

def classify_line(wire_str):
    """Classify a wire-region string as: filler, node, swap, split, info, or unknown."""
    if not wire_str:
        return 'empty'
    chars = set(wire_str) - {' '}
    if not chars:
        return 'empty'
    # Info line: has ━━━ after spaces (output shape)
    if '━━━' in wire_str and ('┃' in wire_str or wire_str.strip().startswith('━')):
        only_vert_and_horiz = chars <= {'┃', '━', ' '}
        if only_vert_and_horiz:
            return 'info'
    # Filler: only vertical bars
    if chars <= {'┃'}:
        return 'filler'
    # Swap: has ┗ and ┓ (wire moves right)
    if '┗' in wire_str and '┓' in wire_str and '┣' not in wire_str:
        return 'swap'
    # Split: has ┣ and ┓ but no ┻ (wire clones right)
    if '┣' in wire_str and '┓' in wire_str and '┻' not in wire_str:
        return 'split'
    # Node: has ┣ with possible ┻ (inputs merging)
    if '┣' in wire_str and ('┻' in wire_str or '┓' in wire_str or wire_str.endswith('┣')):
        return 'node'
    # Source: starts with ┏
    if '┏' in wire_str:
        return 'node'
    # Single ┣ at end (1 input, 1 output)
    if wire_str.rstrip().endswith('┣'):
        return 'node'
    return 'unknown'

def extract_wire_region(raw_line):
    """Extract wire region from a line. Returns (offset, wire_string, colored_chars) or None."""
    stripped = strip_ansi(raw_line)
    colored = parse_colored_chars(raw_line)

    first = None
    for i, c in enumerate(stripped):
        if c in ALL_WIRE:
            first = i
            break
    if first is None:
        return None

    # Find end of wire region (stop at non-wire, non-space char)
    end = first
    for i in range(first, len(stripped)):
        if stripped[i] in ALL_WIRE:
            end = i + 1
        elif stripped[i] != ' ':
            break

    wire_str = stripped[first:end]
    wire_colored = colored[first:end] if first < len(colored) else []
    return (first, wire_str, wire_colored)

def find_node_id(stripped_line):
    """Try to extract node ID from a line like '... ┣┻ 123 OpName ...'"""
    m = re.search(r'[┃┣┻┓┗┏╋┳╹]\s+(\d+)\s+', stripped_line)
    if m:
        return int(m.group(1))
    return None

def main():
    raw_lines = [l.rstrip('\n') for l in sys.stdin.readlines()]

    errors = []
    prev_wire = None
    prev_colored = None
    prev_lineno = 0
    prev_kind = None
    prev_node_id = None

    for lineno_0, raw_line in enumerate(raw_lines):
        lineno = lineno_0 + 1
        stripped = strip_ansi(raw_line)
        r = extract_wire_region(raw_line)
        if r is None:
            continue

        offset, wire_str, wire_colored = r
        kind = classify_line(wire_str)
        node_id = find_node_id(stripped)

        if prev_wire is not None:
            prev_str = prev_wire
            # Check top-down: each column in prev that connects down
            # must have something valid below
            max_col = max(len(prev_str), len(wire_str))
            for col in range(max_col):
                pc = prev_str[col] if col < len(prev_str) else ' '
                cc = wire_str[col] if col < len(wire_str) else ' '

                if pc in CONNECTS_DOWN and cc == ' ':
                    # Wire going down disappears
                    # Acceptable if: current line is a node/filler line (wire consumed or output terminated)
                    if kind in ('node', 'filler', 'info'):
                        continue
                    errors.append((lineno, col, prev_lineno, prev_node_id, node_id,
                        f"col {col}: '{pc}' connects down into space"))

                elif pc in CONNECTS_DOWN and cc not in CONNECTS_UP and cc != '━' and cc not in CONNECTS_DOWN:
                    # Connecting down into something that doesn't connect up
                    # ┓ doesn't connect up — but it's OK in a swap/split context
                    if cc == '┓' and kind in ('swap', 'split'):
                        continue  # swap/split endpoint
                    errors.append((lineno, col, prev_lineno, prev_node_id, node_id,
                        f"col {col}: '{pc}' down into '{cc}' ({kind} line)"))

                elif cc in CONNECTS_UP and pc == ' ':
                    # Wire connecting up from nothing
                    # Acceptable if: this is a node line (new const/hidden input)
                    # or an info/filler line right after a source node
                    if kind in ('node', 'info', 'filler'):
                        continue
                    errors.append((lineno, col, prev_lineno, prev_node_id, node_id,
                        f"col {col}: '{cc}' up from space ({kind} line)"))

                elif cc in CONNECTS_UP and pc not in CONNECTS_DOWN and pc != '━':
                    # Connecting up from something that doesn't connect down
                    if pc == '┗' and kind in ('node', 'filler'):
                        continue  # ┗ was a swap, wire shifted
                    if pc == '┻' and kind == 'node':
                        continue  # stacked node inputs
                    errors.append((lineno, col, prev_lineno, prev_node_id, node_id,
                        f"col {col}: '{cc}' up from '{pc}' ({kind} line, prev={prev_kind})"))

                # Color continuity: ┃ to ┃ must be same color (not at node junction)
                if (pc == '┃' and cc == '┃'
                        and prev_colored is not None
                        and col < len(prev_colored) and col < len(wire_colored)):
                    pc_color = prev_colored[col][1]
                    cc_color = wire_colored[col][1]
                    if pc_color and cc_color and pc_color != cc_color:
                        errors.append((lineno, col, prev_lineno, prev_node_id, node_id,
                            f"col {col}: color change on ┃→┃ ({prev_kind}→{kind})"))

        prev_wire = wire_str
        prev_colored = wire_colored
        prev_lineno = lineno
        prev_kind = kind
        prev_node_id = node_id

    if not errors:
        print("All wire connections valid.")
        sys.exit(0)

    # Group by line pair and show in context
    shown = set()
    for lineno, col, prev_ln, prev_nid, curr_nid, msg in errors:
        key = (prev_ln, lineno)
        if key in shown:
            continue
        shown.add(key)
        errs_here = [(c, m) for l, c, pl, pn, cn, m in errors if pl == prev_ln and l == lineno]

        node_info = ""
        if prev_nid is not None:
            node_info += f" (after node {prev_nid})"
        if curr_nid is not None:
            node_info += f" (at node {curr_nid})"

        print(f"\n--- Lines {prev_ln}-{lineno}{node_info}: {len(errs_here)} error(s) ---")
        # Show context
        prev_r = extract_wire_region(raw_lines[prev_ln - 1])
        curr_r = extract_wire_region(raw_lines[lineno - 1])
        if prev_r:
            print(f"  {prev_r[1]}  [{classify_line(prev_r[1])}]")
        if curr_r:
            print(f"  {curr_r[1]}  [{classify_line(curr_r[1])}]")
        if prev_r and curr_r:
            marker = list(' ' * max(len(prev_r[1]), len(curr_r[1])))
            for c, m in errs_here:
                if c < len(marker):
                    marker[c] = '^'
            print(f"  {''.join(marker)}")
        for c, m in errs_here:
            print(f"    {m}")

    total = len(errors)
    print(f"\nTotal: {total} errors across {len(shown)} line pairs")
    sys.exit(1)

if __name__ == '__main__':
    main()


================================================
FILE: linalg/Cargo.toml
================================================
[package]
name = "tract-linalg"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = ["TensorFlow", "NeuralNetworks"]
categories = ["science"]
autobenches = false
edition = "2024"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
byteorder.workspace = true
derive-new.workspace = true
downcast-rs.workspace = true
dyn-clone.workspace = true
dyn-eq.workspace = true
dyn-hash.workspace = true
lazy_static.workspace = true
log.workspace = true
num-traits.workspace = true
pastey.workspace = true
rayon = { workspace = true, optional = true }
scan_fmt.workspace = true
tract-data.workspace = true

[build-dependencies]
cc.workspace = true
half.workspace = true
liquid.workspace = true
liquid-core.workspace = true
liquid-derive.workspace = true
smallvec.workspace = true
unicode-normalization.workspace = true
time.workspace = true
walkdir.workspace = true

[dev-dependencies]
env_logger.workspace = true
libc.workspace = true
nu-ansi-term.workspace = true
core_affinity.workspace = true

[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion.workspace = true
proptest.workspace = true

[target.'cfg(target_family = "wasm")'.dev-dependencies]
# Wasm doesn't support the `rayon` feature of criterion
criterion = { version = "0.8", default-features = false, features = ["plotters", "cargo_bench_support"] }
# Wasm doesn't support the `fork` feature of proptest.
proptest = { version = "1.0.0", default-features = false, features = ["std", "bit-set"] }

[features]
# This feature is meant to accomodate very restrictive / legacy toolchains that do
# have support for fp16 instructions, breaking tract compilation.
# It is not meant to be used in other situations, where run-time detection is
# preferred.
no_fp16 = []
apple-amx-ios = []
default = [ ]
multithread-mm = [ "rayon" ]
complex = [ "tract-data/complex" ]
hwbench = [ "rayon" ]

[[bench]]
bench = false
name = "arm64"
harness = false

[[bench]]
name = "mat_vec"
harness = false

[[bench]]
name = "mm_for_wavenet_hw"
harness = false

[[bench]]
name = "mm_for_inception"
harness = false

[[bench]]
name = "mm_for_asr_am"
harness = false

[[bench]]
name = "sigmoid"
harness = false

[[bench]]
name = "softmax"
harness = false

[[bench]]
bench = false
name = "arm64simd"
harness = false

[[bench]]
bench = false
name = "arm32neon"
harness = false

[[bench]]
name = "virtual_im2col"
harness = false

[[bench]]
bench = false
name = "x86_64"
harness = false

[[bench]]
bench = false
name = "intel"
harness = false

[[bench]]
bench = false
name = "leaky_relu"
harness = false


================================================
FILE: linalg/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: linalg/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: linalg/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: linalg/README.md
================================================
# tract-linalg

linalg stands for "linear algebra". This is a misnamer. This crates contains
low-level, architecture dependant optimisations used by tract-core.

# Functions

* MatMatMul: Extended matrix*matrix product:
    * inspired by Gotoblass and BLIS micro kernel approach
    * extended for convolution friendly addressing (fused img2col)
    * fused output pipeline (min, max, and a few more simple, fast ops)
    * f32*f32 -> f32 (à la sgemm)
    * i8*i8 -> i32 accumulator -> i32 storage
    * i8*i8 -> i32 accumulator -> i8 (with channel zeropoint and scale, and re-quantization pipeline)
* f32 sigmoid and f32 tanh: at f32 precision, by a rationale function (no exponentiation)
* byte-to-byte lookup table

# Implementations

|                   |  generic fallback  |   armv6, vfp  |     armv7 neon    |    armv8 simd     |     x64 FMA
|-------------------|--------------------|---------------|-------------------|-------------------|-----------------
| MatMatMul f32     |                    |      4x4      |         8x4       |       8x8         |       16x6
| MatMatMul i8->i8  |                    |               |         8x4       |                   |        8x8
| MatMatMul i8->i32 |                    |               |                   |                   |        8x8
| sigmoid f32       |                    |               |         4n        |        4n         |
| tanh f32          |                    |               |         4n        |        4n         |
| byte lookup       |                    |               |                   |                   |


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_32x1_core.tmpl
================================================
// vim: ft=arm

// C tile regs
//
//      q8[0]
//      q8[1]
//      q8[2]
//      q8[3]
//
//      ....
//
//      q15[0]
//      q15[1]
//      q15[2]
//      q15[3]

    .arm
    .text
    .global armv7neon_mmm_f32_32x1_{{core}}_{{suffix}}
    .type armv7neon_mmm_f32_32x1_{{core}}_{{suffix}}, %function

armv7neon_mmm_f32_32x1_{{core}}_{{suffix}}:

    pld     [r0]
    push    { r4-r12 }
    vpush   { q4-q7 }

{% include "dispatcher.tmpliq" %}

.add_mat_mul:

    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r5]

    pld     [r1, #128]
    pld     [r1, #192]
    pld     [r1, #256]
    pld     [r1, #320]
    pld     [r1, #384]
    pld     [r1, #448]
    pld     [r1, #512]

.packed_packed_loop_1:
    pld     [r5]                           // packed B ptr

{% if core == "cortexa7" %}

    vldr            d0, [r1]
    vldr            d1, [r1, #8]
    vldr            d2, [r1, #16]
    vldr            d3, [r1, #24]
    vldr            d4, [r1, #32]
    vldr            d5, [r1, #40]
    vldr            d6, [r1, #48]
    vldr            d7, [r1, #56]
    vldr            d8, [r1, #64]
    vldr            d9, [r1, #72]
    vldr            d10, [r1, #80]
    vldr            d11, [r1, #88]
    vldr            s30, [r5]

    pld             [r1, #512]
    pld             [r1, #576]
    pld             [r5, #64]

    vmla.f32        q8, q0, d15[0]
    vmla.f32        q9, q1, d15[0]

    vldr            d0, [r1, #96]
    vldr            d1, [r1, #104]
    vldr            d2, [r1, #112]
    vldr            d3, [r1, #120]

    vmla.f32        q10, q2, d15[0]
    vmla.f32        q11, q3, d15[0]

    vmla.f32        q12, q4, d15[0]
    vmla.f32        q13, q5, d15[0]

    vmla.f32        q14, q0, d15[0]
    vmla.f32        q15, q1, d15[0]

    add             r1, #128
    add             r5, #4

{% elsif core == "cortexa9" %}

    vld1.64         {d0-d3}, [r1]!
    vld1.64         {d4-d7}, [r1]!
    pld             [r1, #512]
    pld             [r1, #576]
    vld1.64         {d8-d11}, [r1]!
    vld1.f32        d15[0], [r5]!
    pld             [r5, #64]

    vmla.f32        q8, q0, d15[0]
    vmla.f32        q9, q1, d15[0]
    vld1.64         {d0-d3}, [r1]!

    vmla.f32        q10, q2, d15[0]
    vmla.f32        q11, q3, d15[0]

    vmla.f32        q12, q4, d15[0]
    vmla.f32        q13, q5, d15[0]

    vmla.f32        q14, q0, d15[0]
    vmla.f32        q15, q1, d15[0]

{% else %}

    vldmia          r1!, { q0-q3 }
    vldmia          r5!, { s30 }

    vmla.f32        q8, q0, d15[0]
    vmla.f32        q9, q1, d15[0]
    vldmia          r1!, { q0-q1 }

    vmla.f32        q10, q2, d15[0]
    vmla.f32        q11, q3, d15[0]
    vldmia          r1!, { q2-q3 }

    vmla.f32        q12, q0, d15[0]
    vmla.f32        q13, q1, d15[0]

    vmla.f32        q14, q2, d15[0]
    vmla.f32        q15, q3, d15[0]

{% endif %}

    subs            r3, r3, #1
    bne .packed_packed_loop_1

    b   .non_linear_loop

{% include "armv7neon_mmm_f32_scalars.tmpliq" from:8, to:15 %}
{% include "armv7neon_mmm_f32_per_rows.tmpliq" mr:32, from:8, to:15 %}
{% include "armv7neon_mmm_f32_per_cols.tmpliq" mr:32, from:8, to:15 %}

.add_unicast:
    {% for reg in (0..15) %}
        vld1.f32    d{{reg}}[0], [ r3 ], r4
        vld1.f32    d{{reg}}[1], [ r3 ], r4
    {% endfor %}
    {% for reg in (0..7) %}
        vadd.f32 q{{reg|plus:8}}, q{{reg|plus:8}}, q{{reg}}
    {% endfor %}

    b .non_linear_loop

.add_row_col_products:
    vld1.f32        d0[0], [ r4 ]
    vldmia          r3!, { q4-q7 }

    vmla.f32        q8, q4, d0[0]
    vmla.f32        q9, q5, d0[0]

    vmla.f32        q10, q6, d0[0]
    vmla.f32        q11, q7, d0[0]

    vldmia          r3!, { q4-q7 }

    vmla.f32        q12, q4, d0[0]
    vmla.f32        q13, q5, d0[0]

    vmla.f32        q14, q6, d0[0]
    vmla.f32        q15, q7, d0[0]

    b .non_linear_loop

.store:
    // r3, r4 <- ptr, rsc
    cmp     r4, #4
    bne     .store_generic

    vst1.f64    {d16-d19}, [r3]!
    vst1.f64    {d20-d23}, [r3]!
    vst1.f64    {d24-d27}, [r3]!
    vst1.f64    {d28-d31}, [r3]!

    b .non_linear_loop

.store_generic:

    {% for reg in (16..31) %}
        vst1.f32    d{{reg}}[0], [r3], r4
        vst1.f32    d{{reg}}[1], [r3], r4
    {% endfor %}

    b .non_linear_loop

.load_tile:
    vldmia          r3!, { q8-q15 }
    b .non_linear_loop

.return:
    vpop        { q4-q7 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_8x1_core.tmpl
================================================
// vim: ft=arm

    .arm
    .text
    .global armv7neon_mmm_f32_8x1_{{core}}_{{suffix}}
    .type armv7neon_mmm_f32_8x1_{{core}}_{{suffix}}, %function

armv7neon_mmm_f32_8x1_{{core}}_{{suffix}}:

    pld     [r0]
    push    { r4-r12 }
    vpush   { q4-q7 }

{% include "dispatcher.tmpliq" %}

.add_mat_mul:

    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r5]

    pld     [r1, #128]
    pld     [r1, #192]
    pld     [r1, #256]
    pld     [r1, #320]
    pld     [r1, #384]
    pld     [r1, #448]
    pld     [r1, #512]

.packed_packed_loop_1:
    pld     [r5]                           // packed B ptr

    vldmia          r1!, { q0-q1 }
    vldmia          r5!, { s30 }

    vmla.f32        q8, q0, d15[0]
    vmla.f32        q9, q1, d15[0]

    subs            r3, r3, #1
    bne .packed_packed_loop_1

    b   .non_linear_loop

{% include "armv7neon_mmm_f32_scalars.tmpliq" from:8, to:9 %}
{% include "armv7neon_mmm_f32_per_rows.tmpliq" mr:8, from:8, to:9 %}
{% include "armv7neon_mmm_f32_per_cols.tmpliq" mr:8, from:8, to:9 %}

.add_unicast:
    {% for reg in (0..15) %}
        vld1.f32    d{{reg}}[0], [ r3 ], r4
        vld1.f32    d{{reg}}[1], [ r3 ], r4
    {% endfor %}
    {% for reg in (0..7) %}
        vadd.f32 q{{reg|plus:8}}, q{{reg|plus:8}}, q{{reg}}
    {% endfor %}

    b .non_linear_loop

.add_row_col_products:
    vld1.f32        d0[0], [ r4 ]
    vldmia          r3!, { q4-q5 }

    vmla.f32        q8, q4, d0[0]
    vmla.f32        q9, q5, d0[0]

    b .non_linear_loop

.store:
    // r3, r4 <- ptr, rsc
    cmp     r4, #4
    bne     .store_generic

    vst1.f64    {d16-d19}, [r3]!

    b .non_linear_loop

.store_generic:

    {% for reg in (16..19) %}
        vst1.f32    d{{reg}}[0], [r3], r4
        vst1.f32    d{{reg}}[1], [r3], r4
    {% endfor %}

    b .non_linear_loop

.load_tile:
    vldmia          r3!, { q8-q15 }
    b .non_linear_loop

.return:
    vpop        { q4-q7 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_8x4_core.tmpl
================================================
// vim: ft=arm

// C tile regs
//
//      q8[0]    q10[0]   q12[0]    q14[0]
//      q8[1]    q10[1]   q12[1]    q14[1]
//      q8[2]    q10[2]   q12[2]    q14[2]
//      q8[3]    q10[3]   q12[3]    q14[3]
//
//      q9[0]    q11[0]   q13[0]    q15[0]
//      q9[1]    q11[1]   q13[1]    q15[1]
//      q9[2]    q11[2]   q13[2]    q15[2]
//      q9[3]    q11[3]   q13[3]    q15[3]

// packed A buffering (2x8 values): alternating q0, q1 with q2, q3
// packed B buffering (2x4 values): alternating q4 with q5

    .arm
    .text
    .global armv7neon_mmm_f32_8x4_{{core}}_{{suffix}}
    .type armv7neon_mmm_f32_8x4_{{core}}_{{suffix}}, %function

armv7neon_mmm_f32_8x4_{{core}}_{{suffix}}:
    pld     [r0]
    push    { r4-r12 }
    vpush   { q4-q7 }

{% include "dispatcher.tmpliq" %}

.add_mat_mul:

    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r5]

    .packed_packed:
    pld     [r5]                           // packed B ptr
    .packed_packed_loop_1:

{% if core == "cortexa7" %}
    vldr            d0, [r1]
    vldr            d1, [r1, #8]
    vldr            d2, [r1, #16]
    vldr            d3, [r1, #24]
    vldr            d4, [r5]
    vldr            d5, [r5, #8]
{% elsif core == "cortexa9" %}
    vld1.64         {d0-d3}, [r1]!
    vld1.64         {d4, d5}, [r5]!
{% else %}
    vldmia          r1!, { q0, q1}
    vldmia          r5!, { q2 }
{% endif %}

{% if core != "generic" %}
    pld             [r1, #512]
    pld             [r5, #512]
{% endif %}

    vmla.f32        q8, q0, d4[0]
    vmla.f32        q9, q1, d4[0]

    vmla.f32        q10, q0, d4[1]
    vmla.f32        q11, q1, d4[1]

    vmla.f32        q12, q0, d5[0]
    vmla.f32        q13, q1, d5[0]

    vmla.f32        q14, q0, d5[1]
    vmla.f32        q15, q1, d5[1]

{% if core == "cortexa7" %}
    add             r1, #32
    add             r5, #16
{% endif %}

    subs r3, r3, #1
    bne .packed_packed_loop_1
    b   .non_linear_loop

{% include "armv7neon_mmm_f32_scalars.tmpliq" from:8, to:15 %}
{% include "armv7neon_mmm_f32_per_rows.tmpliq" mr:8, from:8, to:15 %}
{% include "armv7neon_mmm_f32_per_cols.tmpliq" mr:8, from:8, to:15 %}

.add_unicast:
    // r3, r4, r5 <- ptr, rsc, csc
    {% for col in (0..3) %}
        mov         r2, r3
        {% for reg in (0..3) %}
            vld1.f32    d0[0], [ r2 ], r4
            vld1.f32    d0[1], [ r2 ], r4
            vadd.f32    d{{col | times: 4 | plus: reg | plus : 16}}, d0
        {% endfor %}
        add r3, r3, r5
    {% endfor %}

    b .non_linear_loop

.add_row_col_products:
    vldmia          r3!, { q0, q1 }
    vldmia          r4!, { q4 }

    vmla.f32        q8, q0, d8[0]
    vmla.f32        q9, q1, d8[0]

    vmla.f32        q10, q0, d8[1]
    vmla.f32        q11, q1, d8[1]

    vmla.f32        q12, q0, d9[0]
    vmla.f32        q13, q1, d9[0]

    vmla.f32        q14, q0, d9[1]
    vmla.f32        q15, q1, d9[1]

    b .non_linear_loop

.store:
    // r3,r4,r5 are c,rsc,csc
    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            vst1.f32    d{{col | times: 4 | plus: reg | plus : 16}}[0], [ r8 ], r4
            vst1.f32    d{{col | times: 4 | plus: reg | plus : 16}}[1], [ r8 ], r4
        {% endfor %}
        {% if col < 3 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}
    b .non_linear_loop

.load_tile:
    vldmia          r3!, { q8-q15 }
    b .non_linear_loop

.return:
    vpop        { q4-q7 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_8x6_core.tmpl
================================================
// vim: ft=arm

    .arm
    .text
    .global armv7neon_mmm_f32_8x6_{{core}}_{{suffix}}
    .type armv7neon_mmm_f32_8x6_{{core}}_{{suffix}}, %function

armv7neon_mmm_f32_8x6_{{core}}_{{suffix}}:

    pld     [r0]
    push    { r4-r12 }
    vpush   { q4-q7 }

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r5]

    .packed_packed_loop_1:

{% if core == "cortexa7" %}
    vldr            d0, [r1]
    vldr            d1, [r1, #8]
    vldr            d2, [r1, #16]
    vldr            d3, [r1, #24]
    vldr            d4, [r5]
    vldr            d5, [r5, #8]
    vldr            d6, [r5, #16]
{% elsif core == "cortexa9" %}
    vld1.64         {d0-d3}, [r1]!
    vld1.64         {d4, d5, d6}, [r5]!
{% else %}
    vldmia          r1!, {q0-q1}
    vldmia          r5!, {d4-d6}
{% endif %}

{% if core != "generic" %}
    pld             [r1, #512]
    pld             [r5, #512]
{% endif %}

    vmla.f32        q4, q0, d4[0]
    vmla.f32        q5, q1, d4[0]

    vmla.f32        q6, q0, d4[1]
    vmla.f32        q7, q1, d4[1]

    vmla.f32        q8, q0, d5[0]
    vmla.f32        q9, q1, d5[0]

    vmla.f32        q10, q0, d5[1]
    vmla.f32        q11, q1, d5[1]

    vmla.f32        q12, q0, d6[0]
    vmla.f32        q13, q1, d6[0]

    vmla.f32        q14, q0, d6[1]
    vmla.f32        q15, q1, d6[1]

{% if core == "cortexa7" %}
    add             r1, #32
    add             r5, #24
{% endif %}

    subs r3, r3, #1
    bne .packed_packed_loop_1
    b   .non_linear_loop

{% include "armv7neon_mmm_f32_scalars.tmpliq" from:4, to:15 %}
{% include "armv7neon_mmm_f32_per_rows.tmpliq" mr:8, from:4, to:15 %}
{% include "armv7neon_mmm_f32_per_cols.tmpliq" mr:8, from:4, to:15 %}

.add_unicast:
    //  r3, r4, r5, r6 <- ptr, rsc, csc, size
    {% for col in (0..5) %}
        mov         r2, r3
        {% for reg in (0..3) %}
            vld1.f32    d0[0], [ r2 ], r4
            vld1.f32    d0[1], [ r2 ], r4
            vadd.f32    d{{col | times: 4 | plus: reg | plus : 8}}, d0
        {% endfor %}
        add r3, r3, r5
    {% endfor %}

    b .non_linear_loop

.add_row_col_products:
    vldmia          r3!, { q0, q1 }
    vldmia          r4!, { d4, d5, d6 }

    vmla.f32        q4, q0, d4[0]
    vmla.f32        q5, q1, d4[0]

    vmla.f32        q6, q0, d4[1]
    vmla.f32        q7, q1, d4[1]

    vmla.f32        q8, q0, d5[0]
    vmla.f32        q9, q1, d5[0]

    vmla.f32        q10, q0, d5[1]
    vmla.f32        q11, q1, d5[1]

    vmla.f32        q12, q0, d6[0]
    vmla.f32        q13, q1, d6[0]

    vmla.f32        q14, q0, d6[1]
    vmla.f32        q15, q1, d6[1]

    b .non_linear_loop

.store:
    // r3, r4, r5 <- ptr, rsc, csc

    cmp     r4, #4
    bne     .store_generic

    {% for col in (0..5) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            vst1.64     d{{col| times: 4 | plus: 8 | plus: reg}}, [ r8 ]!
        {% endfor %}
        {% if col < 5 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

    b .non_linear_loop

.store_generic:
    {% for col in (0..5) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            vst1.f32    d{{col | times: 4 | plus: reg | plus : 8}}[0], [ r8 ], r4
            vst1.f32    d{{col | times: 4 | plus: reg | plus : 8}}[1], [ r8 ], r4
        {% endfor %}
        {% if col < 5 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

    b .non_linear_loop

.load_tile:
    vldmia          r3!, { q4-q7 }
    vldmia          r3!, { q8-q15 }
    b .non_linear_loop

.return:
    vpop        { q4-q7 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_per_cols.tmpliq
================================================
// vim: ft=arm

{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_min", op:"vmin.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_max", op:"vmax.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_mul", op:"vmul.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_add", op:"vadd.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_sub", op:"vsub.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_sub_flipped", op:"vsub.f32", mr:mr, from:from, to:to, flipped: true%}


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_per_rows.tmpliq
================================================
// vim: ft=arm

{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_min", op:"vmin.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_max", op:"vmax.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_mul", op:"vmul.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_add", op:"vadd.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_sub", op:"vsub.f32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_sub_flipped", op:"vsub.f32", mr:mr, from:from, to:to, flipped: true%}


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_f32_scalars.tmpliq
================================================
// vim: ft=arm

{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_min", op:"vmin.f32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_max", op:"vmax.f32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_mul", op:"vmul.f32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_add", op:"vadd.f32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_sub", op:"vsub.f32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_sub_flipped", op:"vsub.f32", from:from, to:to, flipped:true%}

.leaky_relu:
    vmov            s0, r3
    vdup.32         q0, d0[0]
    {% for reg in (from..to) %}
        vmul.f32    q2, q{{reg}}, q0
        vcgt.f32    q1, q{{reg}}, 0
        vbsl        q1, q{{reg}}, q2
        vmov        q{{reg}}, q1
    {% endfor %}
    b .non_linear_loop

.q_shl:
.q_shr:
.q_scale:
    b .unsupported


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_i32_32x1.tmpl
================================================
// vim: ft=arm

// C tile regs: q8..q16

    .arm
    .text
    .global armv7neon_mmm_i32_32x1_{{suffix}}
    .type armv7neon_mmm_i32_32x1_{{suffix}}, %function

armv7neon_mmm_i32_32x1_{{suffix}}:

    pld     [r0]
    push    { r4-r12 }
    vpush   { q4-q7 }

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    // r3 r4 r5 r6
    // k  a  b  packing
    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r7]

    cmp     r6, #1
    beq     .packed_packed_i8i8

    .packed_packed:

    .packed_packed_loop_1:
    vldmia         r1!, { q4-q7 }

    vld1.32        { d0[0] }, [ r5 ]!

    vmla.s32       q8, q4, d0[0]

    vldmia         r1!, { q1-q4 }

    vmla.s32       q9, q5, d0[0]
    vmla.s32       q10, q6, d0[0]
    vmla.s32       q11, q7, d0[0]

    vmla.s32       q12, q1, d0[0]
    vmla.s32       q13, q2, d0[0]

    vmla.s32       q14, q3, d0[0]
    vmla.s32       q15, q4, d0[0]

    subs r3, r3, #1
    bne .packed_packed_loop_1
    b   .non_linear_loop

    .packed_packed_i8i8:

    .packed_packed_loop_i8i8_1:
    vldmia          r1!, { q4-q5 }

    vld1.8          { d0[0] }, [ r5 ]!
    vmovl.s8        q0, d0

    vmovl.s8        q1, d8
    vmlal.s16       q8, d2, d0[0]
    vmlal.s16       q9, d3, d0[0]

    vmovl.s8        q1, d9
    vmlal.s16       q10, d2, d0[0]
    vmlal.s16       q11, d3, d0[0]

    vmovl.s8        q1, d10
    vmlal.s16       q12, d2, d0[0]
    vmlal.s16       q13, d3, d0[0]

    vmovl.s8        q1, d11
    vmlal.s16       q14, d2, d0[0]
    vmlal.s16       q15, d3, d0[0]

    subs r3, r3, #1
    bne .packed_packed_loop_i8i8_1
    b   .non_linear_loop

{% include "armv7neon_mmm_i32_scalars.tmpliq" from:8, to:15 %}
{% include "armv7neon_mmm_i32_per_rows.tmpliq" mr:32, from:8, to:15 %}
{% include "armv7neon_mmm_i32_per_cols.tmpliq" mr:32, from:8, to:15 %}

.add_unicast:
    // r3, r4, r5, r6 <- ptr, rsc, csc, size

    cmp     r6, #4
    beq     .non_linear_addc_i32

    {% for reg in (16..31) %}
        vld1.s8     d0[0], [ r3 ], r4
        vld1.s8     d0[1], [ r3 ], r4
        vmovl.s8    q0, d0
        vmovl.s16   q0, d0
        vadd.i32    d{{reg}}, d0
    {% endfor %}

    b .non_linear_loop

.non_linear_addc_i32:
    {% for reg in (16..31) %}
        vld1.s32    d0[0], [ r3 ], r4
        vld1.s32    d0[1], [ r3 ], r4
        vadd.i32    d{{reg}}, d0
    {% endfor %}
    b .non_linear_loop

.add_row_col_products:
    vldm    	r4, { s0 }

    vldmia          r3!, { q4-q7 }

    vmla.s32        q8, q4, d0[0]
    vmla.s32        q9, q5, d0[0]

    vmla.s32        q10, q6, d0[0]
    vmla.s32        q11, q7, d0[0]

    vldmia          r3!, { q4-q7 }

    vmla.s32        q12, q4, d0[0]
    vmla.s32        q13, q5, d0[0]

    vmla.s32        q14, q6, d0[0]
    vmla.s32        q15, q7, d0[0]

    b .non_linear_loop

    {% include "armv7neon_mmm_i32_scale_q8_q15.tmpliq" %}

.store:
    // r3, r4, r5, r6 <- ptr, rsc, csc, size
    cmp     r6, #4
    beq     .store_strides_i32

    {% for reg in (8..15) %}
        vmovn.s32 d{{reg | times: 2}}, q{{reg}}
        vmovn.s16 d{{reg | times: 2}}, q{{reg}}
    {% endfor %}
    {% for reg in (8..15) %}
        {%capture d%}{{reg | times: 2 }}{%endcapture%}
        vst1.s8     d{{d}}[0], [ r3 ], r4
        vst1.s8     d{{d}}[1], [ r3 ], r4
        vst1.s8     d{{d}}[2], [ r3 ], r4
        vst1.s8     d{{d}}[3], [ r3 ], r4
    {% endfor %}

    b .non_linear_loop

.store_strides_i32:
    {% for reg in (8..15) %}
        {%capture d%}{{reg | times: 2}}{%endcapture%}
        vst1.s32    d{{d}}[0], [ r3 ], r4
        vst1.s32    d{{d}}[1], [ r3 ], r4
        vst1.s32    d{{d|plus:1}}[0], [ r3 ], r4
        vst1.s32    d{{d|plus:1}}[1], [ r3 ], r4
    {% endfor %}

    b .non_linear_loop

.load_tile:
    vldmia          r3!, { q8-q15 }
    b .non_linear_loop

.return:
    vpop        { q4-q7 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_i32_8x4.tmpl
================================================
// vim: ft=arm

// C tile regs
// 
//      q8[0]    q10[0]   q12[0]    q14[0]
//      q8[1]    q10[1]   q12[1]    q14[1]
//      q8[2]    q10[2]   q12[2]    q14[2]
//      q8[3]    q10[3]   q12[3]    q14[3]
//
//      q9[0]    q11[0]   q13[0]    q15[0]
//      q9[1]    q11[1]   q13[1]    q15[1]
//      q9[2]    q11[2]   q13[2]    q15[2]
//      q9[3]    q11[3]   q13[3]    q15[3]

    .arm
    .text
    .global armv7neon_mmm_i32_8x4_{{suffix}}
    .type armv7neon_mmm_i32_8x4_{{suffix}}, %function

armv7neon_mmm_i32_8x4_{{suffix}}:

    pld     [r0]
    push    { r4-r12 }
    vpush   { q4-q7 }

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    // r3 r4 r5 r6
    // k  a  b  packing
    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r5]

    cmp     r6, #1
    beq     .packed_packed_i8i8

    .packed_packed_loop_1:

    vldmia          r1!, { q0, q1 }
    vldmia          r5!, { q2 }

    vmla.s32       q8, q0, d4[0]
    vmla.s32       q9, q1, d4[0]

    vmla.s32       q10, q0, d4[1]
    vmla.s32       q11, q1, d4[1]

    vmla.s32       q12, q0, d5[0]
    vmla.s32       q13, q1, d5[0]

    vmla.s32       q14, q0, d5[1]
    vmla.s32       q15, q1, d5[1]

    subs r3, r3, #1
    bne .packed_packed_loop_1

    b   .non_linear_loop

    .packed_packed_i8i8:
    pld     [r5]                           // packed B ptr       

    cmp r3, #4
    blt .packed_packed_loop_i8i8_1

    .packed_packed_loop_i8i8_4:
    pld             [r1, #64]
    pld             [r5, #64]

    // q2: d4 -> d4,d5 A even cols (from r1)
    // q3: d6 -> d6,d7 A odd cols (from r1)
    // q0: s0 -> d0 : B even lines (from r5)
    // q1: s4 -> d2 : B odd lines (from r5)

    // 0
    vldmia          r1!, { d4 }
    vldmia          r5!, { s0 }

    vmovl.s8        q2, d4
    vmovl.s8        q0, d0

    vmlal.s16       q8, d4, d0[0]
    vmlal.s16       q9, d5, d0[0]

    vldmia          r1!, { d6 }

    vmlal.s16       q10, d4, d0[1]
    vmlal.s16       q11, d5, d0[1]

    vldmia          r5!, { s4 }

    vmlal.s16       q12, d4, d0[2]
    vmlal.s16       q13, d5, d0[2]

    vmlal.s16       q14, d4, d0[3]
    vmlal.s16       q15, d5, d0[3]

    // 1
    vmovl.s8        q3, d6
    vmovl.s8        q1, d2

    vmlal.s16       q8, d6, d2[0]
    vldmia          r1!, { d4 }
    vmlal.s16       q9, d7, d2[0]
    vldmia          r5!, { s0 }

    vmlal.s16       q10, d6, d2[1]
    vmlal.s16       q11, d7, d2[1]

    vmlal.s16       q12, d6, d2[2]
    vmlal.s16       q13, d7, d2[2]

    vmlal.s16       q14, d6, d2[3]
    vmlal.s16       q15, d7, d2[3]

    // 2
    vmovl.s8        q2, d4
    vmovl.s8        q0, d0

    vmlal.s16       q8, d4, d0[0]
    vmlal.s16       q9, d5, d0[0]

    vldmia          r1!, { d6 }

    vmlal.s16       q10, d4, d0[1]
    vmlal.s16       q11, d5, d0[1]

    vldmia          r5!, { s4 }

    vmlal.s16       q12, d4, d0[2]
    vmlal.s16       q13, d5, d0[2]

    vmlal.s16       q14, d4, d0[3]
    vmlal.s16       q15, d5, d0[3]

    // 3
    vmovl.s8        q3, d6
    vmovl.s8        q1, d2

    vmlal.s16       q8, d6, d2[0]
    vmlal.s16       q9, d7, d2[0]

    vmlal.s16       q10, d6, d2[1]
    vmlal.s16       q11, d7, d2[1]

    vmlal.s16       q12, d6, d2[2]
    vmlal.s16       q13, d7, d2[2]

    vmlal.s16       q14, d6, d2[3]
    vmlal.s16       q15, d7, d2[3]

    sub r3, r3, #4
    cmp r3, #4
    bge .packed_packed_loop_i8i8_4

    cmp r3, #0
    beq .non_linear_loop

    .packed_packed_loop_i8i8_1:

    vldmia          r1!, { s0, s1 }
    vmovl.s8        q0, d0
    vldmia          r5!, { s4 }
    vmovl.s8        q1, d2

    vmlal.s16       q8, d0, d2[0]
    vmlal.s16       q9, d1, d2[0]

    vmlal.s16       q10, d0, d2[1]
    vmlal.s16       q11, d1, d2[1]

    vmlal.s16       q12, d0, d2[2]
    vmlal.s16       q13, d1, d2[2]

    vmlal.s16       q14, d0, d2[3]
    vmlal.s16       q15, d1, d2[3]

    subs r3, r3, #1
    bne .packed_packed_loop_i8i8_1
    b   .non_linear_loop

{% include "armv7neon_mmm_i32_scalars.tmpliq" from:8, to:15 %}
{% include "armv7neon_mmm_i32_per_rows.tmpliq" mr:8, from:8, to:15 %}
{% include "armv7neon_mmm_i32_per_cols.tmpliq" mr:8, from:8, to:15 %}

.add_unicast:
    // r3, r4, r5, r6 <- ptr, rsc, csc, size
    cmp     r6, #4
    beq     .non_linear_addc_i32

    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            vld1.s8     d0[0], [ r8 ], r4
            vld1.s8     d0[1], [ r8 ], r4
            vmovl.s8    q0, d0
            vmovl.s16   q0, d0
            vadd.i32    d{{col | times: 4 | plus: reg | plus : 16}}, d0
        {% endfor %}
        add r3, r3, r5
    {% endfor %}

    b .non_linear_loop

.non_linear_addc_i32:

    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            vld1.s32    d0[0], [ r8 ], r4
            vld1.s32    d0[1], [ r8 ], r4
            vadd.i32    d{{col | times: 4 | plus: reg | plus : 16}}, d0
        {% endfor %}
        {% if col < 3 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

b .non_linear_loop

.add_row_col_products:
    vldmia          r3!, { q0, q1 }
    vldmia          r4!, { q4 }

    vmla.s32        q8, q0, d8[0]
    vmla.s32        q9, q1, d8[0]

    vmla.s32        q10, q0, d8[1]
    vmla.s32        q11, q1, d8[1]

    vmla.s32        q12, q0, d9[0]
    vmla.s32        q13, q1, d9[0]

    vmla.s32        q14, q0, d9[1]
    vmla.s32        q15, q1, d9[1]

    b .non_linear_loop

    {% include "armv7neon_mmm_i32_scale_q8_q15.tmpliq" %}

.store:
    // r3, r4, r5, r6 <- ptr, rsc, csc, size
    cmp     r6, #4
    beq     .store_strides_i32

    {% for reg in (8..15) %}
        vmovn.s32 d{{reg | times: 2}}, q{{reg}}
        vmovn.s16 d{{reg | times: 2}}, q{{reg}}
    {% endfor %}
    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..1) %}
            {%capture d%}{{col | times: 2 | plus: reg | times: 2 | plus: 16}}{%endcapture%}
            vst1.s8     d{{d}}[0], [ r8 ], r4
            vst1.s8     d{{d}}[1], [ r8 ], r4
            vst1.s8     d{{d}}[2], [ r8 ], r4
            vst1.s8     d{{d}}[3], [ r8 ], r4
        {% endfor %}
        {% if col < 3 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

    b .non_linear_loop

.store_strides_i32:

    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            {% for lane in (0..1) %}
                vst1.s32     d{{col | times: 4 | plus: reg | plus: 16}}[{{lane}}], [ r8 ], r4
            {% endfor %}
        {% endfor %}
        {% if col < 3 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

    b .non_linear_loop

.load_tile:
    vldmia          r3!, { q8-q15 }
    b .non_linear_loop

.return:
    vpop        { q4-q7 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_i32_per_cols.tmpliq
================================================
// vim: ft=arm

{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_min", op:"vmin.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_max", op:"vmax.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_mul", op:"vmul.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_add", op:"vadd.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_sub", op:"vsub.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_col.tmpliq" label:"per_col_sub_flipped", op:"vsub.s32", mr:mr, from:from, to:to, flipped:true%}


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_i32_per_rows.tmpliq
================================================
// vim: ft=arm

{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_min", op:"vmin.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_max", op:"vmax.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_mul", op:"vmul.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_add", op:"vadd.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_sub", op:"vsub.s32", mr:mr, from:from, to:to %}
{% include "armv7neon_mmm_q_per_row.tmpliq" label:"per_row_sub_flipped", op:"vsub.s32", mr:mr, from:from, to:to, flipped:true%}


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_i32_scalars.tmpliq
================================================
// vim: ft=arm

{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_min", op:"vmin.s32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_max", op:"vmax.s32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_mul", op:"vmul.s32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_add", op:"vadd.s32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_sub", op:"vsub.s32", from:from, to:to%}
{% include "armv7neon_mmm_q_scalar.tmpliq" label:"scalar_sub_flipped", op:"vsub.s32", from:from, to:to, flipped:true%}

.leaky_relu:
    vmov            s0, r3
    vdup.32         q0, d0[0]
    {% for reg in (from..to) %}
        vmul.s32    q2, q{{reg}}, q0
        vcgt.s32    q1, q{{reg}}, 0
        vbsl        q1, q{{reg}}, q2
        vmov        q{{reg}}, q1
    {% endfor %}
    b .non_linear_loop


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_i32_scale_q8_q15.tmpliq
================================================
// vim: ft=arm

.q_scale:
    ldm         r0, { r4, r5, r6, r7 }      // fixme params are already loaded by disp.
    vdup.s32    q0, r7                      // q0 <- multiplier

    mov         r3, #1
    vdup.s32    q1, r3                      // q1 <- ones
    vmovl.s32   q1, d2

    add         r5, #32
    neg         r5, r5
    vdup.s32    q2, r5                      // q2 <- -(shift + 32)
    vmovl.s32   q2, d4

    cmp     r6, #1
    beq     .q_scale_rounding_zero
    cmp     r6, #2
    beq     .q_scale_rounding_away
    cmp     r6, #3
    beq     .q_scale_rounding_minus_inf
    cmp     r6, #4
    beq     .q_scale_rounding_plus_inf
    cmp     r6, #5
    beq     .q_scale_rounding_even
    cmp     r6, #6
    beq     .q_scale_rounding_odd

    b .unsupported

.q_scale_rounding_zero:
    {% for q in (8..15) %}
        vclt.s32    q7, q{{q}}, #0
        vabs.s32    q{{q}}, q{{q}}
        vqdmull.s32 q5, d{{q | times:2}}, d0[0]
        vqdmull.s32 q6, d{{q | times:2 | plus:1}}, d0[0]
        vsub.s64    q5, q1
        vsub.s64    q6, q1
        vqrshl.s64  q5, q2
        vqrshl.s64  q6, q2
        vmovn.s64   d{{q | times:2}}, q5
        vmovn.s64   d{{q | times:2 | plus: 1}}, q6
        vneg.s32    q5, q{{q}}
        vbit.s32    q{{q}}, q5, q7
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_away:
    {% for q in (8..15) %}
        vclt.s32    q7, q{{q}}, #0
        vabs.s32    q{{q}}, q{{q}}
        vqdmull.s32 q5, d{{q | times:2}}, d0[0]
        vqdmull.s32 q6, d{{q | times:2 | plus:1}}, d0[0]
        vqrshl.s64  q5, q2
        vqrshl.s64  q6, q2
        vmovn.s64   d{{q | times:2}}, q5
        vmovn.s64   d{{q | times:2 | plus: 1}}, q6
        vneg.s32    q5, q{{q}}
        vbit.s32    q{{q}}, q5, q7
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_minus_inf:
    {% for q in (8..15) %}
        vqdmull.s32 q5, d{{q | times:2}}, d0[0]
        vqdmull.s32 q6, d{{q | times:2 | plus:1}}, d0[0]
        vsub.s64    q5, q1
        vsub.s64    q6, q1
        vqrshl.s64  q5, q2
        vqrshl.s64  q6, q2
        vmovn.s64   d{{q | times:2}}, q5
        vmovn.s64   d{{q | times:2 | plus: 1}}, q6
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_plus_inf:
    {% for q in (8..15) %}
        vqdmull.s32 q5, d{{q | times:2}}, d0[0]
        vqdmull.s32 q6, d{{q | times:2 | plus:1}}, d0[0]
        vqrshl.s64  q5, q2
        vqrshl.s64  q6, q2
        vmovn.s64   d{{q | times:2}}, q5
        vmovn.s64   d{{q | times:2 | plus: 1}}, q6
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_even:
    {% for q in (8..15) %}
        vclt.s32    q7, q{{q}}, #0
        vabs.s32    q{{q}}, q{{q}}
        vqdmull.s32 q5, d{{q | times:2}}, d0[0]
        vqdmull.s32 q6, d{{q | times:2 | plus:1}}, d0[0]
        vqshl.s64   q3, q5, q2
        vqshl.s64   q4, q6, q2
        vand        q3, q3, q1
        vand        q4, q4, q1
        vsub.s64    q3, q3, q1
        vsub.s64    q4, q4, q1
        vadd.s64    q5, q3
        vadd.s64    q6, q4
        vqrshl.s64  q5, q2
        vqrshl.s64  q6, q2
        vmovn.s64   d{{q | times:2}}, q5
        vmovn.s64   d{{q | times:2 | plus: 1}}, q6
        vneg.s32    q5, q{{q}}
        vbit.s32    q{{q}}, q5, q7
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_odd:
    {% for q in (8..15) %}
        vclt.s32    q7, q{{q}}, #0
        vabs.s32    q{{q}}, q{{q}}
        vqdmull.s32 q5, d{{q | times:2}}, d0[0]
        vqdmull.s32 q6, d{{q | times:2 | plus:1}}, d0[0]
        vqshl.s64   q3, q5, q2
        vqshl.s64   q4, q6, q2
        vand        q3, q3, q1
        vand        q4, q4, q1
        vsub.s64    q5, q3
        vsub.s64    q6, q4
        vqrshl.s64  q5, q2
        vqrshl.s64  q6, q2
        vmovn.s64   d{{q | times:2}}, q5
        vmovn.s64   d{{q | times:2 | plus: 1}}, q6
        vneg.s32    q5, q{{q}}
        vbit.s32    q{{q}}, q5, q7
    {% endfor %}

    b .non_linear_loop

.q_shl:
    ldm         r0, { r4, r5 }      // fixme params are already loaded by disp.
    vdup.s32    q2, r5              // q2 <- shift

    {% for q in (8..15) %}
        vqrshl.s32  q{{q}}, q2      // Shift
    {% endfor %}

    b .non_linear_loop

.q_shr:
    ldm         r0, { r4, r5, r6 }      // fixme params are already loaded by disp.

    mov         r3, #1
    vdup.s32    q1, r3                      // q1 <- ones

    neg         r5, r5
    vdup.s32    q2, r5                      // q2 <- shift

    cmp     r6, #1
    beq     .q_shr_rounding_zero
    cmp     r6, #2
    beq     .q_shr_rounding_away
    cmp     r6, #3
    beq     .q_shr_rounding_minus_inf
    cmp     r6, #4
    beq     .q_shr_rounding_plus_inf
    cmp     r6, #5
    beq     .q_shr_rounding_even
    cmp     r6, #6
    beq     .q_shr_rounding_odd

    b .unsupported

.q_shr_rounding_zero:
    // return signum(x) * ((abs(x) - 1) >>r shift )
    {% for q in (8..15) %}
        vclt.s32    q3, q{{q}}, #0  // Store the sign of the value
        vabs.s32    q{{q}}, q{{q}}  // Compute their abs
        vsub.s32    q{{q}}, q1      // Substract 1 to abs(x)
        vqrshl.s32  q{{q}}, q2      // Rounding shift (0.5 -> 1)
        vneg.s32    q4, q{{q}}      // Compute -((abs(x) - 1) >>r shift )
        vbit.s32    q{{q}}, q4, q3  // Restore sign of x with bit mask
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_away:
    // return signum(x) * (abs(x) >>r shift )
    {% for q in (8..15) %}
        vclt.s32    q3, q{{q}}, #0  // Store the sign of the value
        vabs.s32    q{{q}}, q{{q}}  // Compute their abs
        vqrshl.s32  q{{q}}, q2      // Rounding shift (0.5 -> 1)
        vneg.s32    q4, q{{q}}      // Compute -(abs(x) >>r shift )
        vbit.s32    q{{q}}, q4, q3  // Restore sign of x with bit mask
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_minus_inf:
    // return -(-x >>r shift)
    {% for q in (8..15) %}
        vneg.s32    q3, q{{q}}      // Compute -x
        vqrshl.s32  q3, q2          // Rounding shift (0.5 -> 1)
        vneg.s32    q{{q}}, q3      // Compute -(-x >>r shift)
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_plus_inf:
    // return x >>r shift
    {% for q in (8..15) %}
        vqrshl.s32  q{{q}}, q2      // Rounding shift (0.5 -> 1)
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_even:
    // If (x >> shift) is odd -> (x - 0) >>r shift
    // If (x >> shift) is even -> (x - 1) >>r shift
    {% for q in (8..15) %}
        vqshl.s32   q3, q{{q}}, q2      // Truncate shift (0.5 -> 0)
        vand.s32    q4, q3, q1          // Store if x is odd
        vsub.s32    q5, q4, q1          // If (x >> shift) is odd 0 else -1
        vadd.s32   q{{q}}, q{{q}}, q5   // If (x >> shift) is odd (x - 0) else (x - 1)
        vqrshl.s32 q{{q}}, q2           // Rounding shift (0.5 -> 1)
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_odd:
    // If (x >> shift) is even -> (x - 0) >>r shift
    // If (x >> shift) is odd -> (x - 1) >>r shift
    {% for q in (8..15) %}
        vqshl.s32   q3, q{{q}}, q2      // Truncate shift (0.5 -> 0)
        vand.s32    q4, q3, q1          // Store if x >> shift is odd
        vneg.s32    q5, q4              // If x is odd -1 else 0
        vadd.s32   q{{q}}, q{{q}}, q5   // If x is odd (x - 1) else (x - 0)
        vqrshl.s32 q{{q}}, q2           // Rounding shift (0.5 -> 1)
    {% endfor %}
    b .non_linear_loop


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_q_per_col.tmpliq
================================================
// vim: ft=arm

.{{label}}:

{% capture mr_over_4 %}{{ mr | divided_by: 4}}{%endcapture%}
{% capture mr_over_4_min_1 %}{{ mr | divided_by: 4 | minus: 1}}{%endcapture%}

{%capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_4}}{%endcapture%}
{%capture cols_min_1%}{{to | plus: 1| minus:from| divided_by:mr_over_4|minus:1}}{%endcapture%}

{% if cols == "1" %}
    vld1.f32        d0[0], [ r3 ]
{% else %}
    {%capture cols_over_2_minus_1%}{{cols | divided_by:2 | minus:1}}{%endcapture%}
    {% for c in (0..cols_over_2_minus_1) %}
        vldmia      r3!, { d{{c}} }
    {% endfor %}
{% endif %}


{% for right in (0..cols_min_1) %}
    vdup.f32 q3, d{{right|divided_by:2}}[{{right| modulo:2}}]
    {% for down in (0..mr_over_4_min_1) %}
        {%capture acc%}{{mr_over_4|times:right|plus:from|plus:down}}{%endcapture%}
        {% if flipped %}
            {{op}} q{{acc}}, q{{acc}}, q3
        {% else %}
            {{op}} q{{acc}}, q3, q{{acc}}
        {% endif %}
    {% endfor %}
{% endfor %}

    b .non_linear_loop


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_q_per_row.tmpliq
================================================
// vim: ft=arm

.{{label}}:

{% capture mr_over_4 %}{{ mr | divided_by: 4}}{%endcapture%}
{% capture mr_over_4_min_1 %}{{ mr | divided_by: 4 | minus: 1}}{%endcapture%}

{% for reg in (0..mr_over_4_min_1) %}
    vldmia         r3!, { q{{reg}} }
{% endfor %}

{% if flipped %}
    {% for acc in (from..to) %}
        {% capture other%}{{acc | minus: from | modulo: mr_over_4}}{%endcapture%}
        {{op}} q{{acc}}, q{{acc}}, q{{other}}
    {% endfor %}
{% else %}
    {% for acc in (from..to) %}
        {% capture other%}{{acc | minus: from | modulo: mr_over_4}}{%endcapture%}
        {{op}} q{{acc}}, q{{other}}, q{{acc}}
    {% endfor %}
{% endif %}

b           .non_linear_loop


================================================
FILE: linalg/arm32/armv7neon/armv7neon_mmm_q_scalar.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    vmov            s0, r3
    vdup.32         q0, d0[0]
    {% if flipped %}
        {% for reg in (from..to) %}
            {{op}}    q{{reg}}, q{{reg}}, q0
        {% endfor %}
    {% else %}
        {% for reg in (from..to) %}
            {{op}}    q{{reg}}, q0, q{{reg}}
        {% endfor %}
    {% endif %}
    b .non_linear_loop


================================================
FILE: linalg/arm32/armv7neon/armv7neon_prefetch.tmpl
================================================
// vim: ft=arm

.arm
.text
.global armv7neon_prefetch_{{suffix}}
.type armv7neon_prefetch_{{suffix}}, %function

armv7neon_prefetch_{{suffix}}:
loop:
    pld     [r0]
    pld     [r0, #32]
    pld     [r0, #64]
    pld     [r0, #96]
    pld     [r0, #128]
    pld     [r0, #160]
    pld     [r0, #192]
    pld     [r0, #224]
    add     r0, r0, #256
    cmp     r0, r1
    blt     loop

    bx      lr


================================================
FILE: linalg/arm32/armv7neon/armv7neon_sigmoid_f32_4n.tmpl
================================================
// vim: ft=arm

    .arm
    .text
    .global armv7neon_sigmoid_f32_4n_{{suffix}}
    .type armv7neon_sigmoid_f32_4n_{{suffix}}, %function

/*
    s16–s31 (d8–d15, q4–q7) must be preserved
    s0–s15 (d0–d7, q0–q3) and d16–d31 (q8–q15) do not need to be preserved
*/

armv7neon_sigmoid_f32_4n_{{suffix}}:
    cmp         r1, #0
    blxeq       lr

    vpush       { q4-q7 }

    adr         r2, .coeffs_num
    vldmia      r2!, { s0-s13 }

// q4 -> q4,5,6
// q5 -> q7,8,9
// q6 -> q10,11,12
// q7 -> q13,14,15


    cmp         r1, #12
    blt         .loop

.loop_3:
    vldmia      r0, { q4, q5, q6 }         // q4 <- x

    vdup.32     q15, d0[0]
    vmax.f32    q4, q15
    vmax.f32    q5, q15
    vmax.f32    q6, q15
    vdup.32     q15, d0[1]
    vmin.f32    q4, q15
    vmin.f32    q5, q15
    vmin.f32    q6, q15

    vmul.f32    q7, q4, q4          // q7 <- x2
    vmul.f32    q8, q5, q5
    vmul.f32    q9, q6, q6

    vdup.32     q10, d1[0]
    vdup.32     q11, d1[0]
    vdup.32     q12, d1[0]
    vdup.32     q13, d1[1]
    vdup.32     q14, d1[1]
    vdup.32     q15, d1[1]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d2[0]
    vdup.32     q11, d2[0]
    vdup.32     q12, d2[0]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vdup.32     q13, d2[1]
    vdup.32     q14, d2[1]
    vdup.32     q15, d2[1]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d3[0]
    vdup.32     q11, d3[0]
    vdup.32     q12, d3[0]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vdup.32     q13, d3[1]
    vdup.32     q14, d3[1]
    vdup.32     q15, d3[1]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d4[0]
    vdup.32     q11, d4[0]
    vdup.32     q12, d4[0]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vmul.f32    q4, q4, q10          // q4 <- numerator
    vmul.f32    q5, q5, q11
    vmul.f32    q6, q6, q12

    vdup.32     q10, d4[1]
    vdup.32     q11, d4[1]
    vdup.32     q12, d4[1]
    vdup.32     q13, d5[0]
    vdup.32     q14, d5[0]
    vdup.32     q15, d5[0]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d5[1]
    vdup.32     q11, d5[1]
    vdup.32     q12, d5[1]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vdup.32     q13, d6[0]
    vdup.32     q14, d6[0]
    vdup.32     q15, d6[0]
    vmla.f32    q13, q7, q10          // q13 <- denum
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12

    vrecpe.f32  q7, q13
    vrecpe.f32  q8, q14
    vrecpe.f32  q9, q15
    vrecps.f32  q10, q7, q13
    vrecps.f32  q11, q8, q14
    vrecps.f32  q12, q9, q15
    vmul.f32    q7, q7, q10
    vmul.f32    q8, q8, q11
    vmul.f32    q9, q9, q12
    vrecps.f32  q10, q7, q13
    vrecps.f32  q11, q8, q14
    vrecps.f32  q12, q9, q15
    vmul.f32    q7, q7, q10          // q7 <- 1/q13
    vmul.f32    q8, q8, q11
    vmul.f32    q9, q9, q12

    vdup.32     q10, d6[1]
    vdup.32     q11, d6[1]
    vdup.32     q12, d6[1]
    vmla.f32    q10, q4, q7
    vmla.f32    q11, q5, q8
    vmla.f32    q12, q6, q9

    vstmia      r0!, { q10, q11, q12 }

    subs        r1, #12
    cmp         r1, #12
    bge         .loop_3

    cmp         r1, #0;
    beq         .return

.loop:
    vldmia      r0, { q4 }         // q4 <- x

    vdup.32     q15, d0[0]
    vmax.f32    q4, q15
    vdup.32     q15, d0[1]
    vmin.f32    q4, q15

    vmul.f32    q7, q4, q4          // q7 <- x2

    vdup.32     q10, d1[0]
    vdup.32     q13, d1[1]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d2[0]
    vmla.f32    q10, q13, q7
    vdup.32     q13, d2[1]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d3[0]
    vmla.f32    q10, q13, q7
    vdup.32     q13, d3[1]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d4[0]
    vmla.f32    q10, q13, q7
    vmul.f32    q4, q4, q10          // q4 <- numerator

    vdup.32     q10, d4[1]
    vdup.32     q13, d5[0]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d5[1]
    vmla.f32    q10, q13, q7
    vdup.32     q13, d6[0]
    vmla.f32    q13, q7, q10          // q13 <- denum

    vrecpe.f32  q7, q13
    vrecps.f32  q10, q7, q13
    vmul.f32    q7, q7, q10
    vrecps.f32  q10, q7, q13
    vmul.f32    q7, q7, q10          // q7 <- 1/q13

    vdup.32     q10, d6[1]
    vmla.f32    q10, q4, q7

    vstmia      r0!, { q10 }

    subs        r1, #4;
    bne         .loop

.return:
    vpop        { q4-q7 }
    bx          lr

.coeffs_num:
    .float -18.6                    // low
    .float 18.6                     // high
    .float -4.433153405e-18         // alpha_13
    .float 1.169974371e-14

    .float -1.875289645e-11
    .float 4.257889523e-8
    .float 0.00004811817576
    .float 0.008163842030

    .float 0.2499999971
    .float 3.922935744e-6           // beta_6
    .float 0.001524872358
    .float 0.1159886749

    .float 1.0
    .float 0.5                      //              
    .float 0.0                      // padding
    .float 0.0


================================================
FILE: linalg/arm32/armv7neon/armv7neon_tanh_f32_4n.tmpl
================================================
// vim: ft=arm

    .arm
    .text
    .global armv7neon_tanh_f32_4n_{{suffix}}
    .type armv7neon_tanh_f32_4n_{{suffix}}, %function

/*
    s16–s31 (d8–d15, q4–q7) must be preserved
    s0–s15 (d0–d7, q0–q3) and d16–d31 (q8–q15) do not need to be preserved
*/

armv7neon_tanh_f32_4n_{{suffix}}:
    cmp         r1, #0
    blxeq       lr

    vpush       { q4-q7 }

    adr         r2, .coeffs_num
    vldmia      r2!, { s0-s13 }

// q4 -> q4,5,6
// q5 -> q7,8,9
// q6 -> q10,11,12
// q7 -> q13,14,15

    cmp         r1, #12
    blt         .loop

.loop_3:
    vldmia      r0, { q4, q5, q6 }         // q4 <- x

    vdup.32     q15, d0[0]
    vmax.f32    q4, q15
    vmax.f32    q5, q15
    vmax.f32    q6, q15
    vdup.32     q15, d0[1]
    vmin.f32    q4, q15
    vmin.f32    q5, q15
    vmin.f32    q6, q15

    vmul.f32    q7, q4, q4          // q7 <- x2
    vmul.f32    q8, q5, q5
    vmul.f32    q9, q6, q6

    vdup.32     q10, d1[0]
    vdup.32     q11, d1[0]
    vdup.32     q12, d1[0]
    vdup.32     q13, d1[1]
    vdup.32     q14, d1[1]
    vdup.32     q15, d1[1]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d2[0]
    vdup.32     q11, d2[0]
    vdup.32     q12, d2[0]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vdup.32     q13, d2[1]
    vdup.32     q14, d2[1]
    vdup.32     q15, d2[1]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d3[0]
    vdup.32     q11, d3[0]
    vdup.32     q12, d3[0]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vdup.32     q13, d3[1]
    vdup.32     q14, d3[1]
    vdup.32     q15, d3[1]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d4[0]
    vdup.32     q11, d4[0]
    vdup.32     q12, d4[0]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vmul.f32    q4, q4, q10          // q4 <- numerator
    vmul.f32    q5, q5, q11
    vmul.f32    q6, q6, q12

    vdup.32     q10, d4[1]
    vdup.32     q11, d4[1]
    vdup.32     q12, d4[1]
    vdup.32     q13, d5[0]
    vdup.32     q14, d5[0]
    vdup.32     q15, d5[0]
    vmla.f32    q13, q7, q10
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12
    vdup.32     q10, d5[1]
    vdup.32     q11, d5[1]
    vdup.32     q12, d5[1]
    vmla.f32    q10, q13, q7
    vmla.f32    q11, q14, q8
    vmla.f32    q12, q15, q9
    vdup.32     q13, d6[0]
    vdup.32     q14, d6[0]
    vdup.32     q15, d6[0]
    vmla.f32    q13, q7, q10          // q13 <- denum
    vmla.f32    q14, q8, q11
    vmla.f32    q15, q9, q12

    vrecpe.f32  q7, q13
    vrecpe.f32  q8, q14
    vrecpe.f32  q9, q15
    vrecps.f32  q10, q7, q13
    vrecps.f32  q11, q8, q14
    vrecps.f32  q12, q9, q15
    vmul.f32    q7, q7, q10
    vmul.f32    q8, q8, q11
    vmul.f32    q9, q9, q12
    vrecps.f32  q10, q7, q13
    vrecps.f32  q11, q8, q14
    vrecps.f32  q12, q9, q15
    vmul.f32    q7, q7, q10          // q7 <- 1/q13
    vmul.f32    q8, q8, q11
    vmul.f32    q9, q9, q12

    vmul.f32    q10, q4, q7
    vmul.f32    q11, q5, q8
    vmul.f32    q12, q6, q9

    vstmia      r0!, { q10, q11, q12 }

    subs        r1, #12
    cmp         r1, #12
    bge         .loop_3

    cmp         r1, #0;
    beq         .return

.loop:
    vldmia      r0, { q4 }         // q4 <- x

    vdup.32     q15, d0[0]
    vmax.f32    q4, q15
    vdup.32     q15, d0[1]
    vmin.f32    q4, q15

    vmul.f32    q7, q4, q4          // q7 <- x2

    vdup.32     q10, d1[0]
    vdup.32     q13, d1[1]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d2[0]
    vmla.f32    q10, q13, q7
    vdup.32     q13, d2[1]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d3[0]
    vmla.f32    q10, q13, q7
    vdup.32     q13, d3[1]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d4[0]
    vmla.f32    q10, q13, q7
    vmul.f32    q4, q4, q10          // q4 <- numerator

    vdup.32     q10, d4[1]
    vdup.32     q13, d5[0]
    vmla.f32    q13, q7, q10
    vdup.32     q10, d5[1]
    vmla.f32    q10, q13, q7
    vdup.32     q13, d6[0]
    vmla.f32    q13, q7, q10          // q13 <- denum

    vrecpe.f32  q7, q13
    vrecps.f32  q10, q7, q13
    vmul.f32    q7, q7, q10
    vrecps.f32  q10, q7, q13
    vmul.f32    q7, q7, q10          // q7 <- 1/q13

    vmul.f32    q10, q4, q7

    vstmia      r0!, { q10 }

    subs        r1, #4;
    bne         .loop

.return:
    vpop        { q4-q7 }
    bx          lr

.coeffs_num:
    .float -8.9                     // low
    .float 8.9                      // high
    .float -8.488492677e-14         // alpha_13
    .float 5.277853000e-11

    .float -2.022500419e-8
    .float 0.00001115424833
    .float 0.003103950131
    .float 0.1308400453

    .float 0.9999999934
    .float 0.0002546136580          // beta_6
    .float 0.02449515379
    .float 0.4641733162

    .float 1.0
    .float 0                        // padding
    .float 0                        // padding
    .float 0                        // padding


================================================
FILE: linalg/arm32/armv7neon/dispatcher.tmpliq
================================================
// vim: ft=arm

.non_linear:

.non_linear_loop_entry:
    sub     r0, #20

.non_linear_loop:
    add     r0, #20
    ldm     r0, { r2, r3, r4, r5, r6 }

    cmp     r2, #{{ jump_table | size }}
    movgt   r2, #{{ jump_table | size }}
    cmp     r2, #0
    movlt   r2, #{{ jump_table | size }}

    add     pc, pc, r2, LSL#2
    nop     // pc in Rn above is start of the add instruction + 8, hence a nop is needed
            // This is A32 asm, for T32/Thump2 use nop.w and b.w to avoid problems.
{% for j in jump_table %}
    b .{{j}}
{% endfor %}
    b .unsupported


.unsupported:
    mov         r0,     #1
    b           .return

.done:
    mov         r0,     #0
    b           .return

.clear:
{% for r in (4..15) %}
    veor    q{{r}}, q{{r}}, q{{r}}
{% endfor %}
    b           .non_linear_loop


================================================
FILE: linalg/arm32/armvfpv2/armvfpv2_mmm_f32_4x4.tmpl
================================================
// vim: ft=arm

    .arm
    .text
    .global armvfpv2_mmm_f32_4x4_{{suffix}}
    .type armvfpv2_mmm_f32_4x4_{{suffix}}, %function

// C tile:

//  s16 s20 s24 s28
//  s17 s21 s25 s29
//  s18 s22 s26 s30
//  s19 s23 s27 s31

// packed A: (2x4) alternating between (s0-s3) and (s4-s7)
// packed B: (2x4) alternating between (s8-s11) and (s12-15)

// all vfp registers in use.

armvfpv2_mmm_f32_4x4_{{suffix}}:

/*
    pld [r1]
    pld [r1, #8]
    pld [r2]
    pld [r2, #8]
*/

    push        { r4-r12 }               // no lr (we're a leaf), no fp. #24 bytes

    ldr         r8, [sp, #28]
    ldr         r9, [sp, #24]

//  r8=rsc, r9=csc

    vmrs        r6, FPSCR
    bic         r6, r6, #0x00370000
    vmsr        FPSCR, r6

    vpush       { s16-s31 }

{% include "dispatcher.tmpliq" %}

.clear:
    eor         r6, r6
    vmov        s16, r6
    vmov.f32    s17, s16
    vmov.f32    s18, s16
    vmov.f32    s19, s16
    vmov.f32    s20, s16
    vmov.f32    s21, s16
    vmov.f32    s22, s16
    vmov.f32    s23, s16
    vmov.f32    s24, s16
    vmov.f32    s25, s16
    vmov.f32    s26, s16
    vmov.f32    s27, s16
    vmov.f32    s28, s16
    vmov.f32    s29, s16
    vmov.f32    s30, s16
    vmov.f32    s31, s16
    b     .non_linear_loop

.add_mat_mul:
    // r3 <- k, r4 <- a, r5 <- b
    cmp     r3, #0
    beq     .non_linear_loop

    mov     r1, r4 // packed A ptr
    pld     [r3]
    pld     [r5]

    .packed_packed:
    cmp r3, #4
    blt .packed_packed_loop_1

    .packed_packed_loop_4:

    // 1
    vldmia          r1!, { s0, s1 }
    vldmia          r5!, { s8, s9 }

    vmla.f32        s16, s0, s8
    vldmia          r1!, { s2, s3 }
    vmla.f32        s17, s1, s8
    vldmia          r5!, { s10, s11 }
    vmla.f32        s18, s2, s8
    vmla.f32        s19, s3, s8

    vmla.f32        s20, s0, s9
    vmla.f32        s21, s1, s9
    vmla.f32        s22, s2, s9
    vmla.f32        s23, s3, s9

    vldmia          r1!, { s4-s7 }
    vmla.f32        s24, s0, s10
    vmla.f32        s25, s1, s10
    vmla.f32        s26, s2, s10
    vmla.f32        s27, s3, s10

    vldmia          r5!, { s12-s15 }
    vmla.f32        s28, s0, s11
    vmla.f32        s29, s1, s11
    vmla.f32        s30, s2, s11
    vmla.f32        s31, s3, s11

    // 2
    vmla.f32        s16, s4, s12
    vmla.f32        s17, s5, s12
    vmla.f32        s18, s6, s12
    vmla.f32        s19, s7, s12

    vldmia          r1!, { s0-s3 }

    vmla.f32        s20, s4, s13
    vmla.f32        s21, s5, s13
    vmla.f32        s22, s6, s13
    vmla.f32        s23, s7, s13

    vldmia          r5!, { s8-s11 }

    vmla.f32        s24, s4, s14
    vmla.f32        s25, s5, s14
    vmla.f32        s26, s6, s14
    vmla.f32        s27, s7, s14

    vmla.f32        s28, s4, s15
    vmla.f32        s29, s5, s15
    vmla.f32        s30, s6, s15
    vmla.f32        s31, s7, s15

    // 3
    vmla.f32        s16, s0, s8
    vmla.f32        s17, s1, s8
    vmla.f32        s18, s2, s8
    vmla.f32        s19, s3, s8

    vldmia          r1!, { s4-s7 }

    vmla.f32        s20, s0, s9
    vmla.f32        s21, s1, s9
    vmla.f32        s22, s2, s9
    vmla.f32        s23, s3, s9

    vldmia          r5!, { s12-s15 }

    vmla.f32        s24, s0, s10
    vmla.f32        s25, s1, s10
    vmla.f32        s26, s2, s10
    vmla.f32        s27, s3, s10

    pld [r1]

    vmla.f32        s28, s0, s11
    vmla.f32        s29, s1, s11
    vmla.f32        s30, s2, s11
    vmla.f32        s31, s3, s11

    pld [r6]

    // 4
    vmla.f32        s16, s4, s12
    vmla.f32        s17, s5, s12
    vmla.f32        s18, s6, s12
    vmla.f32        s19, s7, s12

    vmla.f32        s20, s4, s13
    vmla.f32        s21, s5, s13
    vmla.f32        s22, s6, s13
    vmla.f32        s23, s7, s13

    vmla.f32        s24, s4, s14
    vmla.f32        s25, s5, s14
    vmla.f32        s26, s6, s14
    vmla.f32        s27, s7, s14

    vmla.f32        s28, s4, s15
    vmla.f32        s29, s5, s15
    vmla.f32        s30, s6, s15
    vmla.f32        s31, s7, s15

    sub r3, r3, #4
    cmp r3, #4
    bge .packed_packed_loop_4

    cmp r3, #0
    beq .non_linear_loop

    .packed_packed_loop_1:

    vldmia          r1!, { s0, s1 }
    vldmia          r5!, { s8, s9 }

    vmla.f32        s16, s0, s8
    vldmia          r1!, { s2, s3 }
    vmla.f32        s17, s1, s8
    vldmia          r5!, { s10, s11 }
    vmla.f32        s18, s2, s8
    vmla.f32        s19, s3, s8

    vmla.f32        s20, s0, s9
    vmla.f32        s21, s1, s9
    vmla.f32        s22, s2, s9
    vmla.f32        s23, s3, s9

    vmla.f32        s24, s0, s10
    vmla.f32        s25, s1, s10
    vmla.f32        s26, s2, s10
    vmla.f32        s27, s3, s10

    vmla.f32        s28, s0, s11
    vmla.f32        s29, s1, s11
    vmla.f32        s30, s2, s11
    vmla.f32        s31, s3, s11

    subs r3, r3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

.add_unicast:
    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            vldr            s0, [ r8 ]
            vadd.f32        s{{col|times:4|plus:reg|plus:16}}, s{{col|times:4|plus:reg|plus:16}}, s0
            {% if reg < 3 %}
                add         r8, r8, r4
            {% endif %}
        {% endfor %}
        {% if col < 3 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

    b .non_linear_loop

.scalar_min:
    vmov            s0, r3
    {% for reg in (16..31) %}
        vcmp.f32        s{{reg}}, s0
        vmrs            apsr_nzcv, fpscr
        vmovge          s{{reg}}, s0
    {% endfor %}

    b .non_linear_loop

.scalar_max:
    vmov            s0, r3
    {% for reg in (16..31) %}
        vcmp.f32        s{{reg}}, s0
        vmrs            apsr_nzcv, fpscr
        vmovle          s{{reg}}, s0
    {% endfor %}

    b .non_linear_loop

.scalar_add:
    vmov            s0, r3
    {% for s in (16..31) %}
        vadd.f32    s{{s}}, s{{s}}, s0
    {% endfor %}

    b .non_linear_loop

.scalar_mul:
    vmov            s0, r3
    {% for s in (16..31) %}
        vmul.f32    s{{s}}, s{{s}}, s0
    {% endfor %}

    b .non_linear_loop

.scalar_sub:
    vmov            s0, r3
    {% for s in (16..31) %}
        vsub.f32    s{{s}}, s0, s{{s}}
    {% endfor %}

    b .non_linear_loop

.scalar_sub_flipped:
    vmov            s0, r3
    {% for s in (16..31) %}
        vsub.f32    s{{s}}, s{{s}}, s0
    {% endfor %}

    b .non_linear_loop

.leaky_relu:
    vmov            s0, r3
    {% for reg in (16..31) %}
        vmul.f32        s1, s0, s{{reg}}
        vcmp.f32        s{{reg}}, #0
        vmrs            apsr_nzcv, fpscr
        vmovlt          s{{reg}}, s1
    {% endfor %}
    b .non_linear_loop

.per_row_min:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            {%capture s%}s{{col|times:4|plus:row|plus:16}}{%endcapture%}
            vcmp.f32        {{s}}, s{{row}}
            vmrs            apsr_nzcv, fpscr
            vmovge          {{s}}, s{{row}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_row_max:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            {%capture s%}s{{col|times:4|plus:row|plus:16}}{%endcapture%}
            vcmp.f32        {{s}}, s{{row}}
            vmrs            apsr_nzcv, fpscr
            vmovlt          {{s}}, s{{row}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_row_add:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vadd.f32    s{{col|times:4|plus:row|plus:16}}, s{{col|times:4|plus:row|plus:16}}, s{{row}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_row_mul:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vmul.f32    s{{col|times:4|plus:row|plus:16}}, s{{col|times:4|plus:row|plus:16}}, s{{row}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_row_sub:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vsub.f32    s{{col|times:4|plus:row|plus:16}}, s{{row}}, s{{col|times:4|plus:row|plus:16}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_row_sub_flipped:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vsub.f32    s{{col|times:4|plus:row|plus:16}}, s{{col|times:4|plus:row|plus:16}}, s{{row}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_col_min:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            {%capture s%}s{{col|times:4|plus:row|plus:16}}{%endcapture%}
            vcmp.f32        {{s}}, s{{col}}
            vmrs            apsr_nzcv, fpscr
            vmovge          {{s}}, s{{col}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_col_max:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            {%capture s%}s{{col|times:4|plus:row|plus:16}}{%endcapture%}
            vcmp.f32        {{s}}, s{{col}}
            vmrs            apsr_nzcv, fpscr
            vmovlt          {{s}}, s{{col}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_col_add:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vadd.f32    s{{col|times:4|plus:row|plus:16}}, s{{col|times:4|plus:row|plus:16}}, s{{col}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_col_mul:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vmul.f32    s{{col|times:4|plus:row|plus:16}}, s{{col|times:4|plus:row|plus:16}}, s{{col}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_col_sub:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vsub.f32    s{{col|times:4|plus:row|plus:16}}, s{{col}}, s{{col|times:4|plus:row|plus:16}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.per_col_sub_flipped:
    vldm    r3, {s0, s1, s2, s3}
    {% for row in (0..3) %}
        {% for col in (0..3) %}
            vsub.f32    s{{col|times:4|plus:row|plus:16}}, s{{col|times:4|plus:row|plus:16}}, s{{col}}
        {% endfor %}
    {% endfor %}

    b .non_linear_loop

.add_row_col_products:
    vldmia          r3!, { s0, s1 }
    vldmia          r4!, { s8, s9 }

    vmla.f32        s16, s0, s8
    vldmia          r3!, { s2, s3 }
    vmla.f32        s17, s1, s8
    vldmia          r4!, { s10, s11 }
    vmla.f32        s18, s2, s8
    vmla.f32        s19, s3, s8

    vmla.f32        s20, s0, s9
    vmla.f32        s21, s1, s9
    vmla.f32        s22, s2, s9
    vmla.f32        s23, s3, s9

    vmla.f32        s24, s0, s10
    vmla.f32        s25, s1, s10
    vmla.f32        s26, s2, s10
    vmla.f32        s27, s3, s10

    vmla.f32        s28, s0, s11
    vmla.f32        s29, s1, s11
    vmla.f32        s30, s2, s11
    vmla.f32        s31, s3, s11

    b .non_linear_loop

.store:
    {% for col in (0..3) %}
        mov         r8, r3
        {% for reg in (0..3) %}
            fsts        s{{col|times:4|plus:reg|plus:16}}, [ r8 ]
            {% if reg < 3 %}
                add         r8, r8, r4
            {% endif %}
        {% endfor %}
        {% if col < 3 %}
            add r3, r3, r5
        {% endif %}
    {% endfor %}

    mov         r0,     #0
    b   .return

.load_tile:
    vldmia       r3!, { s16-s31 }
    b .non_linear_loop

.q_scale:
.q_shl:
.q_shr:
    b   .unsupported

.return:
    vpop        { s16-s31 }
    pop         { r4-r12 }

    bx          lr


================================================
FILE: linalg/arm32/armvfpv2/dispatcher.tmpliq
================================================
// vim: ft=arm

.non_linear:

.non_linear_loop_entry:
    sub     r0, #20

.non_linear_loop:
    add     r0, #20
    ldm     r0, { r2, r3, r4, r5, r6 }

    cmp     r2, #{{ jump_table | size }}
    movgt   r2, #{{ jump_table | size }}
    cmp     r2, #0
    movlt   r2, #{{ jump_table | size }}

    add     pc, pc, r2, LSL#2
    nop     // pc in Rn above is start of the add instruction + 8, hence a nop is needed
            // This is A32 asm, for T32/Thump2 use nop.w and b.w to avoid problems.
{% for j in jump_table %}
        b .{{j}}
{% endfor %}
    b .unsupported

.unsupported:
    mov         r0,     #1
    b           .return

.done:
    mov         r0,     #0
    b           .return


================================================
FILE: linalg/arm64/apple_amx/apple_amx_mmm_f16_64x1.tmpl
================================================
// vim: ft=arm
.text
.align 4

/* Z: 32x1
 z0[0] ..  z0[15] z1[0] .. z1[15]
*/
    

.global {{G}}apple_amx_mmm_f16_64x1_{{suffix}}
{{G}}apple_amx_mmm_f16_64x1_{{suffix}}:

{{ AMX_SET }}

    // set x1 to a 128 bytes aligned block for loads
    mov x1, sp
    lsr x1, x1, #7
    lsl x1, x1, #7
    sub x1, x1, 128

{% include "dispatcher.tmpliq" %}

.leaky_relu:
.q_scale:
.q_shl:
.q_shr:
    b .unsupported

.add_mat_mul:

    ldr         x2, [x0, #24]       // b
    ldp         x3, x4, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    orr x4, x4, {{ 0|setting:62 }}  // load a pair of A

    mov x5, {{ 0|setting:43 }}      // f16
    orr x5, x5, {{ 0|setting:38 }}  // Broadcast Y

    orr x6, x5, {{ 0|setting:20 }}  // z offset
    orr x6, x6, {{ 0|setting:16 }}  // x offset

    cmp         x3, #32
    blt         .packed_packed_loop_1

    mov x9, {{0|setting:32}}        // Y broadcast offset += 1

    .packed_packed_loop_32:
        mov x7, x5
        mov x8, x6
        {% amx ldy x2 %}
        {% for k in (0..31) %}
            {% amx ldx x4 %}
            add x4, x4, 128
            {% amx vecfp x7 %}
            {% amx vecfp x8 %}
            add x7, x7, x9
            add x8, x8, x9
        {% endfor %}
        add x2, x2, #64
        sub x3, x3, #32
        cmp x3, #32
    bge .packed_packed_loop_32

    cmp x3, #0
    beq .non_linear_loop

    .packed_packed_loop_1:
        ldr w7, [x2], #2
        str w7, [x1]
        {% amx ldx x4 %}
        {% amx ldy x1 %}
        {% amx vecfp x5 %}
        {% amx vecfp x6 %}
        add x4, x4, 128
        subs x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

.clear:
    // top left
    eor x2, x2, x2
    orr x2, x2, {{ 0|setting:27 }}
    orr x2, x2, {{ 0|setting:28 }}
    orr x2, x2, {{ 0|setting:29 }}  // Z = 0
    {% amx fma32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    {% amx fma32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_col_sub:

    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]
    // extr[hxyz] is suport confusing

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    {% amx extrx x2 %}
    {% amx fms16 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}              // extr[hxyz] is confusing
    {% amx fms16 x4 %}

    // continue

.per_col_add:
    ldr         x2, [x0, #8]

    // broadcast value to x0
    ld1         { v0.h }[0], [x2]
    dup         v0.8h, v0.h[0]
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    sub         x1, x1, #64

    {% amx ldx x1 %} // load into x0 by default

    mov x2, {{ 0|setting:28 }}      // z += y
    {% amx fma16 x2 %}

    orr x2, x2, {{ 0|setting:20 }}  // target is now z1
    {% amx fma16 x2 %}

    b .non_linear_loop

.per_col_sub_flipped:
    ldr         x2, [x0, #8]

    // broadcast value to x0
    ld1         { v0.h }[0], [x2]
    dup         v0.8h, v0.h[0]
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    sub x1, x1, #64

    {% amx ldx x1 %} // load into x0 by default

    mov x2, {{ 0|setting:28 }}      // z += y
    {% amx fms16 x2 %}

    orr x2, x2, {{ 0|setting:20 }}  // target is now z1
    {% amx fms16 x2 %}

    b .non_linear_loop

.per_row_sub_flipped:
    ldr         x2, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2], #64
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1], #64
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:63 }}      // vector mode
    orr x2, x2, {{ 0|setting:29 }}  // z -= y

    // top left
    {% amx fms16 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fms16 x2 %}

    b .non_linear_loop

.per_row_sub:
    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    {% amx extrx x2 %}
    {% amx fms16 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}
    {% amx fms16 x4 %}

    // continue

.per_row_add:
    ldr         x2, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2], #64
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1], #64
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:63 }}      // vector mode
    orr x2, x2, {{ 0|setting:29 }}  // z += y

    // top left
    {% amx fma16 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fma16 x2 %}

    b .non_linear_loop

.per_row_min:
    mov x2, 5
    b .per_row_min_max
.per_row_max:
    mov x2, 7
.per_row_min_max:
    ldr         x5, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x5], #64
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1], #64
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x5]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    sub x1, x1, #64

    orr x5, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x5 %}

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:44 }}  // f32
    {% amx vecfp x2 %}

    orr x2, x2, {{ 0|setting:16 }}  // x1
    orr x2, x2, {{ 0|setting:20 }}  // z1
    {% amx vecfp x2 %}

    b .non_linear_loop

.per_col_min:
    mov x2, 5
    b .per_col_min_max
.per_col_max:
    mov x2, 7
.per_col_min_max:
    ldr         x4, [x0, #8]

    // broadcast value to x0
    ld1         { v0.h }[0], [x4]
    dup         v0.8h, v0.h[0]
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    sub         x1, x1, #64

    {% amx ldx x1 %}

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:43 }}  // f32

    {% amx vecfp x2 %}
    orr x2, x2, {{ 0|setting:20 }}  // z offset
    {% amx vecfp x2 %}

    b .non_linear_loop

.per_col_mul:
    ldr         x4, [x0, #8]

    // broadcast value to y0
    ld1         { v0.h }[0], [x4]
    dup         v0.8h, v0.h[0]
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    st1         { v0.8h }, [x1], #16
    sub         x1, x1, #64

    {% amx ldy x1 %}

    eor x2, x2, x2                      // X[0] = Z[0]
    {% amx extrx x2 %}
    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y
    {% amx fma16 x4 %}
    orr x2, x2, {{ 0|setting:20 }}      // Z1
    {% amx extrx x2 %}
    orr x4, x4, {{ 0|setting:20 }}      // Z1
    {% amx fma16 x4 %}

    b .non_linear_loop

.per_row_mul:
    ldr         x2, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2], #64
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1], #64
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}      // pair
    {% amx ldy x2 %}

    eor x2, x2, x2                      // X[0] = Z[0]
    {% amx extrx x2 %}
    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y
    {% amx fma16 x4 %}
    orr x2, x2, {{ 0|setting:20 }}      // Z1
    {% amx extrx x2 %}
    orr x4, x4, {{ 0|setting:20 }}      // Z1
    orr x4, x4, {{ 0|setting:6 }}       // Y1
    {% amx fma16 x4 %}

    b .non_linear_loop

.scalar_sub:
    // performs a unary neg on Z, then go to scalar_add
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X
    {% amx extrx x2 %}
    {% amx fms16 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}
    {% amx fms16 x4 %}

    // continue on purpose

.scalar_add:
    ldr         w5, [x0, #8]

    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z+=X 
    {% amx fma16 x2 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    {% amx fma16 x2 %}
    b .non_linear_loop

.scalar_sub_flipped:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z-=X 
    {% amx fms16 x2 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    {% amx fms16 x2 %}
    b .non_linear_loop

.scalar_mul:
    ldr         w5, [x0, #8]
    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldy x1 %}

    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y

    {% amx extrx x2 %}
    {% amx fma16 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}
    {% amx fma16 x4 %}

    b .non_linear_loop

.scalar_min:
    mov x2, 5
    b .scalar_min_max
.scalar_max:
    mov x2, 7
.scalar_min_max:
    ldr         w5, [x0, #8]
    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}    // load 16 values

    lsl x2, x2, 47
    orr x2, x2, {{ 0|setting:43 }} // f16

    {% amx vecfp x2 %}
    add x2, x2, {{ 0|setting:20}} // next Z
    {% amx vecfp x2 %}

    b .non_linear_loop

.add_unicast:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    {% for neon in (0..7) %}
       {% for lane in (0..7) %}
           ld1 { v{{neon}}.h }[{{lane}}], [x5], x6
       {% endfor %}
    {% endfor %}
    mov x8, x1
    st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x8], #64
    st1 { v4.8h, v5.8h, v6.8h, v7.8h }, [x8], #64

    orr x8, x1, {{ 0|setting:62 }}          // pair
    {% amx ldy x8 %}

    eor x2, x2, x2
    orr x2, x2, {{ 0|setting:63 }}  // vector mode
    orr x2, x2, {{ 0|setting:29 }}  // perform Z0+=Y0
    {% amx fma16 x2 %}
    orr x2, x2, {{ 0|setting:20 }}  // Z1
    orr x2, x2, 64                  // offset Y
    {% amx fma16 x2 %}
    
    b .non_linear_loop

.add_row_col_products:
    ldp         x5, x6, [x0, #8]            // a base ptr, b base ptr

    ld1         { v0.h }[0], [x6]
    st1         { v0.h }[0], [x1]
    {% amx ldy x1 %}

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x2 %}

    mov x2, {{ 0|setting:43 }}      // f16
    orr x2, x2, {{ 0|setting:38 }}  // Broadcast Y
    {% amx vecfp x2 %}

    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:16 }}  // X offset
    {% amx vecfp x2 %}

    b .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    ands        x8, x5, 0x7f
    bne         .store_generic
    cmp         x6, 4
    bne         .store_generic
    cmp         x7, 4
    bne         .store_generic
 
    orr x5, x5, {{ 0|setting:62 }}          // pair
    {% amx stz x5 %}
    b .non_linear_loop

 .store_generic:

    orr x8, x1, {{ 0|setting:62 }}          // pair
    {% amx stz x8 %}

    mov x8, x1
    ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x8], #64
    ld1 { v4.8h, v5.8h, v6.8h, v7.8h }, [x8], #64
    {% for neon in (0..7) %}
       {% for lane in (0..7) %}
           st1 { v{{neon}}.h }[{{lane}}], [x5], x6
       {% endfor %}
    {% endfor %}
    
    b .non_linear_loop

.load_tile:
    ldr  x2, [x0, #16]                      // row major ptr
    orr  x2, x2, {{0|setting:62}}           // load pairs
    {% amx ldz x2 %}
    b .non_linear_loop

.return:
{{ AMX_CLR }}
ret


================================================
FILE: linalg/arm64/apple_amx/apple_amx_mmm_f16_64x32.tmpl
================================================
// vim: ft=arm
.text
.align 4

/* Z: 64x32 tile. each Z reg is f16x32
    Z0
    Z2
    ...
    Z62
    
    Z1
    Z3
    S63
*/
    

.global {{G}}apple_amx_mmm_f16_64x32_{{suffix}}
{{G}}apple_amx_mmm_f16_64x32_{{suffix}}:

{{ AMX_SET }}

    // set x1 to a 128 bytes aligned block for loads
    mov x1, sp
    lsr x1, x1, #7
    lsl x1, x1, #7
    sub x1, x1, 128

{% include "dispatcher.tmpliq" %}

.leaky_relu:
.q_scale:
.q_shl:
.q_shr:
    b .unsupported

.add_mat_mul:

    ldr         x2, [x0, #24]       // b
    ldp         x3, x4, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    orr         x4, x4, {{0|setting:62}}    // load pairs (A)

    eor         x5, x5, x5                  // top left

    orr         x7, x5, {{ 0|setting:20 }}
    orr         x7, x7, {{ 0|setting:6 }}   // bottom left

    .packed_packed_loop_1:
    {% amx ldx x2 %}
    {% amx ldy x4 %}
    add x2, x2, 64
    add x4, x4, 128

    {% amx fma16 x5 %}
    {% amx fma16 x7 %}

    subs x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

.clear:
    // top left
    eor x2, x2, x2
    orr x2, x2, {{ 0|setting:27 }}
    orr x2, x2, {{ 0|setting:28 }}
    orr x2, x2, {{ 0|setting:29 }}  // Z = 0
    {% amx fma32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    {% amx fma32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fma32 x2 %}

    mov     x3, #16
    str     x3, [x1]

    b .non_linear_loop

.per_col_sub:

    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    mov x6, 64
    .per_col_sub_loop:
        {% amx extrx x2 %}
        {% amx fms16 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .per_col_sub_loop

    // continue

.per_col_add:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}

    mov x2, {{ 0|setting:28 }}      // z += y

    // top left
    {% amx fma16 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fma16 x2 %}

    b .non_linear_loop

.per_col_sub_flipped:
    ldr         x2, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]

    {% amx ldx x1 %}

    mov x2, {{ 0|setting:28 }}      // z += y

    {% amx fms16 x2 %}
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fms16 x2 %}

    b .non_linear_loop

.per_row_sub_flipped:
    ldr         x2, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2], #64
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1], #64
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:29 }}      // z += y

    // top left
    {% amx fms16 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 3
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fms16 x2 %}

    b .non_linear_loop

.per_row_sub:
    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    mov x6, 64
    .per_row_sub_loop:
        {% amx extrx x2 %}
        {% amx fms16 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .per_row_sub_loop

    // continue

.per_row_add:
    ldr         x2, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2], #64
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1], #64
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:29 }}      // z += y

    // top left
    {% amx fma16 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fma16 x2 %}

    b .non_linear_loop

.per_row_min:
    mov x2, 5
    b .per_row_min_max
.per_row_max:
    mov x2, 7
.per_row_min_max:
    ldr         x5, [x0, #8]

    add x6, x5, 64

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:43 }}  // f16

    orr x8, x2, {{ 0|setting:20 }}  // bottom left

    mov x4, 32
    .loop_per_row_max:
        // top half
        ld1         { v0.h }[0], [x5], #2
        dup         v0.8h, v0.h[0]
        dup         v1.8h, v0.h[0]
        dup         v2.8h, v0.h[0]
        dup         v3.8h, v0.h[0]
        st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]

        {% amx ldx x1 %}
        {% amx vecfp x2 %}

        add x2, x2, {{ 0|setting:21 }}

        // bottom half
        ld1         { v0.h }[0], [x6], #2
        dup         v0.8h, v0.h[0]
        dup         v1.8h, v0.h[0]
        dup         v2.8h, v0.h[0]
        dup         v3.8h, v0.h[0]
        st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]

        {% amx ldx x1 %}
        {% amx vecfp x8 %}

        add x8, x8, {{ 0|setting:21 }}

    subs x4, x4, 1
    bne .loop_per_row_max

    b .non_linear_loop

.per_col_min:
    mov x2, 5
    b .per_col_min_max
.per_col_max:
    mov x2, 7
.per_col_min_max:
    ldr         x4, [x0, #8]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x4]
    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:43 }}  // f16

    mov x4, 64
    .loop_per_col_max:
        {% amx vecfp x2 %}
        add x2, x2, {{ 0|setting:20 }}
    subs x4, x4, 1
    bne .loop_per_col_max

    b .non_linear_loop

.per_col_mul:
    ldr         x4, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x4]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldy x1 %}

    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y

    mov x6, 64
    .loop_per_col_mul:
        {% amx extrx x2 %}
        {% amx fma16 x4 %}
        add x2, x2, {{0|setting:20}}
        add x4, x4, {{0|setting:20}}
    subs x6, x6, 1
    bne .loop_per_col_mul

    b .non_linear_loop

.per_row_mul:
    ldr         x14, [x0, #8]
    add         x15, x14, 64

    // extrx
    eor x2, x2, x2                      // X[0] = Z[0] (top left)

    eor x4, x4, x4
    orr x4, x4, {{0|setting:20}}        // X[0] = Z[1] (bottom left)

    // fma16
    eor x6, x6, x6
    orr x6, x6, {{0|setting:63}}        // vector mode
    orr x6, x6, {{0|setting:27}}        // Z=X*Y       Z[0]=X[0]*Y[0]

    orr x8, x6, {{0|setting:20}}        // Z[1]

    mov x10, 32
    .loop_per_row_mul:
        // top
        ld1         { v0.h }[0], [x14], #2
        dup         v0.8h, v0.h[0]
        dup         v1.8h, v0.h[0]
        dup         v2.8h, v0.h[0]
        dup         v3.8h, v0.h[0]
        st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]

        {% amx ldy x1 %}
        {% amx extrx x2 %}
        {% amx fma16 x6 %}

        add x2, x2, {{ 0|setting:21 }}
        add x6, x6, {{ 0|setting:21 }}

        // bottom
        ld1         { v0.h }[0], [x15], #2
        dup         v0.8h, v0.h[0]
        dup         v1.8h, v0.h[0]
        dup         v2.8h, v0.h[0]
        dup         v3.8h, v0.h[0]
        st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]

        {% amx ldy x1 %}
        {% amx extrx x4 %}
        {% amx fma16 x8 %}

        add x4, x4, {{ 0|setting:21 }}
        add x8, x8, {{ 0|setting:21 }}

    subs x10, x10, 1
    bne .loop_per_row_mul

    b .non_linear_loop

.scalar_sub:
    // performs a unary neg on Z, then go to scalar_add
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    mov x6, 64
    .scalar_sub_loop:
        {% amx extrx x2 %}
        {% amx fms16 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .scalar_sub_loop

    // continue on purpose

.scalar_add:
    ldr         w5, [x0, #8]

    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z+=X 
    {% amx fma16 x2 %}
    add x2, x2, {{0|setting:20}}    // Z1
    {% amx fma16 x2 %}
    b .non_linear_loop

.scalar_sub_flipped:
    ldr         w5, [x0, #8]

    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}    // load 32 values

    mov x2, {{ 0|setting:28 }}          // Z-=X 
    {% amx fms16 x2 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    {% amx fms16 x2 %}
    b .non_linear_loop

.scalar_mul:
    ldr         w5, [x0, #8]

    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldy x1 %}    // load 32 values

    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y

    mov x6, 64
    .scalar_mul_loop:
        {% amx extrx x2 %}
        {% amx fma16 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .scalar_mul_loop

    b .non_linear_loop

.scalar_min:
    mov x2, 5
    b .scalar_min_max
.scalar_max:
    mov x2, 7
.scalar_min_max:
    ldr         w5, [x0, #8]

    fmov        h0, w5
    dup         v0.8h, v0.h[0]
    dup         v1.8h, v0.h[0]
    dup         v2.8h, v0.h[0]
    dup         v3.8h, v0.h[0]

    st1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
    {% amx ldx x1 %}    // load 16 values

    lsl x2, x2, 47
    orr x2, x2, {{ 0|setting:43 }} // f32

    mov x3, 64
    .loop_scalar_max:
        add x2, x2, {{ 0|setting:20}} // next Z
        {% amx vecfp x2 %}
        subs x3, x3, 1
        bne .loop_scalar_max

    b .non_linear_loop

.add_unicast:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    mov x3, 0                               // x3 is the row
    .loop_load:
        // z reg is (row % 32) * 2 + (row / 32)
        and x9, x3, 0x1f
        lsl x9, x9, 1
        lsr x10, x3, 5
        add x9, x9, x10

        mov x4, x5
        {% for neon in (0..3) %}
            {% for lane in (0..7) %}
                ld1 { v{{neon}}.h }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}

        st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]
        {% amx ldy x1 %}

        lsl x2, x9, 20                  // Z register to update
        orr x2, x2, {{ 0|setting:63 }}  // vector mode
        orr x2, x2, {{ 0|setting:29 }}  // perform Z+=Y
        {% amx fma16 x2 %}

        add x5, x5, x6
        add x3, x3, 1
        cmp x3, 64
    bne .loop_load

    /*
    mov x3, 0                               // x3 is the row
    .loop_load:
        and x9, x3, 0xf                     // x9 = row % 16
        lsl x9, x9, 2                       // x9 = (row % 16) * 4
        lsr x10, x3, 4                      // x10 = row / 16 
        lsl x10, x10, 1                     // x10 = (row / 16) * 2
        add x9, x9, x10                     // x9 = x9 + x10

        mov x4, x5
        {% for neon in (0..3) %}
            {% for lane in (0..3) %}
                ld1 { v{{neon}}.s }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
        {% for neon in (0..3) %}
            {% for lane in (0..3) %}
                ld1 { v{{neon}}.s }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x8]

        mov x2, x1
        orr x2, x2, {{ 0|setting:62 }} // load 32 values
        {% amx ldy x2 %}

        lsl x2, x9, 20                  // left Z register to update
        orr x2, x2, {{ 0|setting:63 }}  // vector mode
        orr x2, x2, {{ 0|setting:29 }}  // perform Z+=Y
        {% amx fma32 x2 %}

        add x2, x2, {{0|setting:20}}
        orr x2, x2, 64                  // offset Y by 16 values
        {% amx fma32 x2 %}

        add x5, x5, x6
    add x3, x3, 1
    cmp x3, 32
    bne .loop_load

*/

    b .non_linear_loop

.add_row_col_products:
    ldp         x5, x6, [x0, #8]            // a base ptr, b base ptr

    add x8, x1, 64

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

    {% amx ldx x1 %}

    // top
    eor x2, x2, x2
    {% amx fma16 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fma16 x2 %}

    b .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x7, 2
    bne         .store_generic
    ands        x8, x5, 0x7f
    bne         .store_generic
    ands        x8, x6, 0x7f
    bne         .store_generic

    lsl x8, x6, 5
    add x8, x8, x5                          // x8 = 32*rsc
    orr x8, x8, {{ 0|setting:56 }}          // first to x8 is z1

    mov x4, {{0|setting:57}}                // Zreg += 2
    add x4, x4, x6                          // +rsc

    mov x3, 32
    .loop_store_direct:
        {% amx stz x5 %}
        {% amx stz x8 %}
        add x5, x5, x4
        add x8, x8, x4
    subs x3, x3, 1
    bne .loop_store_direct

    b .non_linear_loop

.store_generic:

    mov x3, 0                               // row id
    .loop_store:
        // z reg is (row % 32) * 2 + (row / 32)
        and x9, x3, 0x1f
        lsl x9, x9, 1
        lsr x10, x3, 5
        add x9, x9, x10

        lsl x2, x9, 56
        orr x2, x2, x1
        {% amx stz x2 %}                            // f16 x 32

        ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x1]

        mov x4, x5
        {% for neon in (0..3) %}
            {% for lane in (0..7) %}
                st1 { v{{neon}}.h }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        add x5, x5, x6

        add x3, x3, 1
        cmp x3, 64
    bne .loop_store
    b .non_linear_loop

.load_tile:
    ldr  x2, [x0, #16]                      // row major ptr
    orr  x3, x2, {{0|setting:56}}
    add  x3, x3, #2048
    
    mov  x4, {{0|setting:57}}               // z+=2
    add  x4, x4, #64

    mov x8, 32
    .loop_load_tile:
        {% amx ldz x2 %}
        {% amx ldz x3 %}
        add x2, x2, x4
        add x3, x3, x4
    subs x8, x8, 1
    bne .loop_load_tile

    b .non_linear_loop
   
.return:
{{ AMX_CLR }}
ret


================================================
FILE: linalg/arm64/apple_amx/apple_amx_mmm_f32_32x1.tmpl
================================================
// vim: ft=arm
.text
.align 4

/* Z: 32x1
 z0[0] ..  z0[15] z1[0] .. z1[15]
*/
    

.global {{G}}apple_amx_mmm_f32_32x1_{{suffix}}
{{G}}apple_amx_mmm_f32_32x1_{{suffix}}:

{{ AMX_SET }}

    // set x1 to a 128 bytes aligned block for loads
    mov x1, sp
    lsr x1, x1, #7
    lsl x1, x1, #7
    sub x1, x1, 128

{% include "dispatcher.tmpliq" %}

.leaky_relu:
.q_scale:
.q_shl:
.q_shr:
    b .unsupported

.add_mat_mul:

    ldr         x2, [x0, #24]       // b
    ldp         x3, x4, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    orr x4, x4, {{ 0|setting:62 }}  // load a pair of A

    mov x5, {{ 0|setting:44 }}      // f32
    orr x5, x5, {{ 0|setting:38 }}  // Broadcast Y

    orr x6, x5, {{ 0|setting:20 }}  // z offset
    orr x6, x6, {{ 0|setting:16 }}  // x offset

    cmp         x3, #16
    blt         .packed_packed_loop_1

    mov x9, {{0|setting:32}}        // Y broadcast offset += 1

    .packed_packed_loop_16:
        mov x7, x5
        mov x8, x6
        {% amx ldy x2 %}
        {% for k in (0..15) %}
            {% amx ldx x4 %}
            add x4, x4, 128
            {% amx vecfp x7 %}
            {% amx vecfp x8 %}
            add x7, x7, x9
            add x8, x8, x9
        {% endfor %}
        add x2, x2, #64
        sub x3, x3, #16
        cmp x3, #16
    bge .packed_packed_loop_16

    cmp x3, #0
    beq .non_linear_loop

    .packed_packed_loop_1:
        ldr w7, [x2], #4
        str w7, [x1]
        {% amx ldx x4 %}
        {% amx ldy x1 %}
        {% amx vecfp x5 %}
        {% amx vecfp x6 %}
        add x4, x4, 128
        subs x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

.clear:
    // top left
    eor x2, x2, x2
    orr x2, x2, {{ 0|setting:27 }}
    orr x2, x2, {{ 0|setting:28 }}
    orr x2, x2, {{ 0|setting:29 }}  // Z = 0
    {% amx fma32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    {% amx fma32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_col_sub:

    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]
    // extr[hxyz] is suport confusing

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    {% amx extrx x2 %}
    {% amx fms32 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}              // extr[hxyz] is confusing
    {% amx fms32 x4 %}

    // continue

.per_col_add:
    ldr         x2, [x0, #8]

    // broadcast value to x0
    ld1         { v0.s }[0], [x2]
    dup         v0.4s, v0.s[0]
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    sub         x1, x1, #64

    {% amx ldx x1 %} // load into x0 by default

    mov x2, {{ 0|setting:28 }}      // z += y
    {% amx fma32 x2 %}

    orr x2, x2, {{ 0|setting:20 }}  // target is now z1
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_col_sub_flipped:
    ldr         x2, [x0, #8]

    // broadcast value to x0
    ld1         { v0.s }[0], [x2]
    dup         v0.4s, v0.s[0]
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    sub x1, x1, #64

    {% amx ldx x1 %} // load into x0 by default

    mov x2, {{ 0|setting:28 }}      // z += y
    {% amx fms32 x2 %}

    orr x2, x2, {{ 0|setting:20 }}  // target is now z1
    {% amx fms32 x2 %}

    b .non_linear_loop

.per_row_sub_flipped:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:63 }}      // vector mode
    orr x2, x2, {{ 0|setting:29 }}  // z -= y

    // top left
    {% amx fms32 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fms32 x2 %}

    b .non_linear_loop

.per_row_sub:
    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    {% amx extrx x2 %}
    {% amx fms32 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}
    {% amx fms32 x4 %}

    // continue

.per_row_add:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:63 }}      // vector mode
    orr x2, x2, {{ 0|setting:29 }}  // z += y

    // top left
    {% amx fma32 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_row_min:
    mov x2, 5
    b .per_row_min_max
.per_row_max:
    mov x2, 7
.per_row_min_max:
    ldr         x5, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x5, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x5 %}

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:44 }}  // f32
    {% amx vecfp x2 %}

    orr x2, x2, {{ 0|setting:16 }}  // x1
    orr x2, x2, {{ 0|setting:20 }}  // z1
    {% amx vecfp x2 %}

    b .non_linear_loop

.per_col_min:
    mov x2, 5
    b .per_col_min_max
.per_col_max:
    mov x2, 7
.per_col_min_max:
    ldr         x4, [x0, #8]

    // broadcast value to x0
    ld1         { v0.s }[0], [x4]
    dup         v0.4s, v0.s[0]
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    sub         x1, x1, #64

    {% amx ldx x1 %}

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:44 }}  // f32

    {% amx vecfp x2 %}
    orr x2, x2, {{ 0|setting:20 }}  // z offset
    {% amx vecfp x2 %}

    b .non_linear_loop

.per_col_mul:
    ldr         x4, [x0, #8]

    // broadcast value to y0
    ld1         { v0.s }[0], [x4]
    dup         v0.4s, v0.s[0]
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    st1         { v0.4s }, [x1], #16
    sub         x1, x1, #64

    {% amx ldy x1 %}

    eor x2, x2, x2                      // X[0] = Z[0]
    {% amx extrx x2 %}
    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y
    {% amx fma32 x4 %}
    orr x2, x2, {{ 0|setting:20 }}      // Z1
    {% amx extrx x2 %}
    orr x4, x4, {{ 0|setting:20 }}      // Z1
    {% amx fma32 x4 %}

    b .non_linear_loop

.per_row_mul:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}      // pair
    {% amx ldy x2 %}

    eor x2, x2, x2                      // X[0] = Z[0]
    {% amx extrx x2 %}
    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y
    {% amx fma32 x4 %}
    orr x2, x2, {{ 0|setting:20 }}      // Z1
    {% amx extrx x2 %}
    orr x4, x4, {{ 0|setting:20 }}      // Z1
    orr x4, x4, {{ 0|setting:6 }}       // Y1
    {% amx fma32 x4 %}

    b .non_linear_loop

.scalar_sub:
    // performs a unary neg on Z, then go to scalar_add
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X
    {% amx extrx x2 %}
    {% amx fms32 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}
    {% amx fms32 x4 %}

    // continue on purpose

.scalar_add:
    ldr         w5, [x0, #8]

    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z+=X 
    {% amx fma32 x2 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    {% amx fma32 x2 %}
    b .non_linear_loop

.scalar_sub_flipped:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z-=X 
    {% amx fms32 x2 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    {% amx fms32 x2 %}
    b .non_linear_loop

.scalar_mul:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldy x1 %}    // load 16 values

    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y

    {% amx extrx x2 %}
    {% amx fma32 x4 %}
    add x2, x2, {{0|setting:20}}    // next Z row
    add x4, x4, {{0|setting:20}}    // next Z row
    {% amx extrx x2 %}
    {% amx fma32 x4 %}

    b .non_linear_loop

.scalar_min:
    mov x2, 5
    b .scalar_min_max
.scalar_max:
    mov x2, 7
.scalar_min_max:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}    // load 16 values

    lsl x2, x2, 47
    orr x2, x2, {{ 0|setting:44 }} // f32

    {% amx vecfp x2 %}
    add x2, x2, {{ 0|setting:20}} // next Z
    {% amx vecfp x2 %}

    b .non_linear_loop

.add_unicast:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    {% for neon in (0..7) %}
       {% for lane in (0..3) %}
           ld1 { v{{neon}}.s }[{{lane}}], [x5], x6
       {% endfor %}
    {% endfor %}
    mov x8, x1
    st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x8], #64
    st1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x8], #64

    orr x8, x1, {{ 0|setting:62 }}          // pair
    {% amx ldy x8 %}

    eor x2, x2, x2
    orr x2, x2, {{ 0|setting:63 }}  // vector mode
    orr x2, x2, {{ 0|setting:29 }}  // perform Z0+=Y0
    {% amx fma32 x2 %}
    orr x2, x2, {{ 0|setting:20 }}  // Z1
    orr x2, x2, 64                  // offset Y by 16 values
    {% amx fma32 x2 %}
    
    b .non_linear_loop

.add_row_col_products:
    ldp         x5, x6, [x0, #8]            // a base ptr, b base ptr

    ld1         { v0.s }[0], [x6]
    st1         { v0.s }[0], [x1]
    {% amx ldy x1 %}

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x2 %}

    mov x2, {{ 0|setting:44 }}      // f32
    orr x2, x2, {{ 0|setting:38 }}  // Broadcast Y
    {% amx vecfp x2 %}

    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:16 }}  // X offset
    {% amx vecfp x2 %}

    b .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    ands        x8, x5, 0x7f
    bne         .store_generic
    cmp         x6, 4
    bne         .store_generic
    cmp         x7, 4
    bne         .store_generic
 
    orr x5, x5, {{ 0|setting:62 }}          // pair
    {% amx stz x5 %}
    b .non_linear_loop

 .store_generic:

    orr x8, x1, {{ 0|setting:62 }}          // pair
    {% amx stz x8 %}

    mov x8, x1
    ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x8], #64
    ld1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x8], #64
    {% for neon in (0..7) %}
       {% for lane in (0..3) %}
           st1 { v{{neon}}.s }[{{lane}}], [x5], x6
       {% endfor %}
    {% endfor %}
    
    b .non_linear_loop

.load_tile:
    ldr  x2, [x0, #16]                      // row major ptr
    orr  x2, x2, {{0|setting:62}}           // load pairs
    {% amx ldz x2 %}
    b .non_linear_loop

.return:
{{ AMX_CLR }}
ret


================================================
FILE: linalg/arm64/apple_amx/apple_amx_mmm_f32_32x32.tmpl
================================================
// vim: ft=arm
.text
.align 4

/* Z: 32x32
 z0[0] ..  z0[15] z1[0] .. z1[15]
 z4[0] ..  z4[15] z5[0] .. z5[15]
  ..
z60[0] .. z60[15] z61[0] .. z61[15]

 z2[0] ..  z2[15] z3[0] .. z3[15]
 z5[0] ..  z5[15] z6[0] .. z6[15]
  ..
z62[0] .. z62[15] z63[0] .. z63[15]
*/
    

.global {{G}}apple_amx_mmm_f32_32x32_{{suffix}}
{{G}}apple_amx_mmm_f32_32x32_{{suffix}}:

{{ AMX_SET }}

    // set x1 to a 128 bytes aligned block for loads
    mov x1, sp
    lsr x1, x1, #7
    lsl x1, x1, #7
    sub x1, x1, 128

{% include "dispatcher.tmpliq" %}

.leaky_relu:
.q_scale:
.q_shl:
.q_shr:
    b .unsupported

.add_mat_mul:

    ldr         x2, [x0, #24]       // b
    ldp         x3, x4, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    orr         x4, x4, {{0|setting:62}}    // load pairs (A)
    orr         x2, x2, {{0|setting:62}}    // load pairs (B)

    eor         x5, x5, x5                  // top left

    orr         x6, x5, {{ 0|setting:20 }}  // Z row = 1
    orr         x6, x6, {{ 0|setting:16 }}  // top right

    orr         x7, x5, {{ 0|setting:21 }}
    orr         x7, x7, {{ 0|setting:6 }}   // bottom left

    orr         x8, x7, x6                  // bottom right

    .packed_packed_loop_1:
    {% amx ldx x2 %}
    {% amx ldy x4 %}
    add x2, x2, 128
    add x4, x4, 128

    {% amx fma32 x5 %}
    {% amx fma32 x6 %}
    {% amx fma32 x7 %}
    {% amx fma32 x8 %}

    subs x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

.clear:
    // top left
    eor x2, x2, x2
    orr x2, x2, {{ 0|setting:27 }}
    orr x2, x2, {{ 0|setting:28 }}
    orr x2, x2, {{ 0|setting:29 }}  // Z = 0
    {% amx fma32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    {% amx fma32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_col_sub:

    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    mov x6, 64
    .per_col_sub_loop:
        {% amx extrx x2 %}
        {% amx fms32 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .per_col_sub_loop

    // continue

.per_col_add:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x1, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x1 %}

    mov x2, {{ 0|setting:28 }}      // z += y

    // top left
    {% amx fma32 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 2
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:16 }}  // X offset
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 3
    {% amx fma32 x2 %}

    // top right
    eor x2, x2, {{ 0|setting:21 }}  // Z row = 1
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_col_sub_flipped:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x1, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x1 %}

    mov x2, {{ 0|setting:28 }}      // z += y

    // top left
    {% amx fms32 x2 %}

    // bottom left
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 2
    {% amx fms32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:16 }}  // X offset
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 3
    {% amx fms32 x2 %}

    // top right
    eor x2, x2, {{ 0|setting:21 }}  // Z row = 1
    {% amx fms32 x2 %}


    b .non_linear_loop

.per_row_sub_flipped:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:29 }}      // z += y

    // top left
    {% amx fms32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fms32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fms32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fms32 x2 %}

    b .non_linear_loop

.per_row_sub:
    // performs a unary neg on Z
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    mov x6, 64
    .per_row_sub_loop:
        {% amx extrx x2 %}
        {% amx fms32 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .per_row_sub_loop

    // continue

.per_row_add:
    ldr         x2, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    mov x2, {{ 0|setting:29 }}      // z += y

    // top left
    {% amx fma32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fma32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    {% amx fma32 x2 %}

    b .non_linear_loop

.per_row_min:
    mov x2, 5
    b .per_row_min_max
.per_row_max:
    mov x2, 7
.per_row_min_max:
    ldr         x5, [x0, #8]

    add x6, x5, 64

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:44 }}  // f32
    orr x3, x2, {{ 0|setting:20 }}  // right half: z offset

    orr x8, x2, {{ 0|setting:21 }}  // bottom left
    orr x9, x3, {{ 0|setting:21 }}  // bottom right

    mov x4, 16
    .loop_per_row_max:
        // top half
        ld1         { v0.s }[0], [x5], #4
        dup         v0.4s, v0.s[0]
        dup         v1.4s, v0.s[0]
        dup         v2.4s, v0.s[0]
        dup         v3.4s, v0.s[0]
        st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

        {% amx ldx x1 %}
        {% amx vecfp x2 %}
        {% amx vecfp x3 %}

        add x2, x2, {{ 0|setting:22 }}
        add x3, x3, {{ 0|setting:22 }}

        // bottom half
        ld1         { v0.s }[0], [x6], #4
        dup         v0.4s, v0.s[0]
        dup         v1.4s, v0.s[0]
        dup         v2.4s, v0.s[0]
        dup         v3.4s, v0.s[0]
        st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

        {% amx ldx x1 %}
        {% amx vecfp x8 %}
        {% amx vecfp x9 %}

        add x8, x8, {{ 0|setting:22 }}
        add x9, x9, {{ 0|setting:22 }}

    subs x4, x4, 1
    bne .loop_per_row_max

    b .non_linear_loop

.per_col_min:
    mov x2, 5
    b .per_col_min_max
.per_col_max:
    mov x2, 7
.per_col_min_max:
    ldr         x4, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x4], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x4]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x3, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x3 %}

    lsl x2, x2, 47                  // max(x,z) (or min)
    orr x2, x2, {{ 0|setting:44 }}  // f32

    orr x3, x2, {{ 0|setting:16 }}  // right half: x offset
    orr x3, x3, {{ 0|setting:20 }}  // right half: z offset

    mov x4, 32
    .loop_per_col_max:
        {% amx vecfp x2 %}
        {% amx vecfp x3 %}
        add x2, x2, {{ 0|setting:21 }}
        add x3, x3, {{ 0|setting:21 }}
    subs x4, x4, 1
    bne .loop_per_col_max

    b .non_linear_loop

.per_col_mul:
    ldr         x4, [x0, #8]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x4], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x4]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}      // load a pair
    {% amx ldy x2 %}

    eor x2, x2, x2                      // X[0] = Z[0]

    eor x3, x3, x3
    orr x3, x3, {{0|setting:20 }}       // Z[1]
    orr x3, x3, {{0|setting:16 }}       // X[1]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y

    mov x5, {{ 0|setting:63 }}          // vector mode
    orr x5, x5, {{ 0|setting:27 }}      // Z=X*Y
    orr x5, x5, {{ 0|setting:20 }}      // Z right
    orr x5, x5, {{ 0|setting:16 }}      // X[1] (right)
    orr x5, x5, {{ 0|setting:6 }}       // Y[1] (right)

    mov x6, 32
    .loop_per_col_mul:
        {% amx extrx x2 %}
        {% amx extrx x3 %}
        {% amx fma32 x4 %}
        {% amx fma32 x5 %}
        add x2, x2, {{0|setting:21}}
        add x3, x3, {{0|setting:21}}
        add x4, x4, {{0|setting:21}}
        add x5, x5, {{0|setting:21}}
    subs x6, x6, 1
    bne .loop_per_col_mul

    b .non_linear_loop

.per_row_mul:
    ldr         x14, [x0, #8]
    add         x15, x14, 64

    // extrx
    eor x2, x2, x2                      // X[0] = Z[0] (top left)

    eor x3, x3, x3
    orr x3, x3, {{0|setting:20 }}       // Z[1]
    orr x3, x3, {{0|setting:16 }}       // X[1] = Z[1] (top right)

    eor x4, x4, x4
    orr x4, x4, {{0|setting:21}}        // X[0] = Z[2] (bottom left)

    orr x5, x4, {{0|setting:20}}
    orr x5, x5, {{0|setting:16}}        // X[1] = Z[3] (bottom right)

    // fma32
    eor x6, x6, x6
    orr x6, x6, {{0|setting:63}}        // vector mode
    orr x6, x6, {{0|setting:27}}        // Z=X*Y       Z[0]=X[0]*Y[0]

    orr x7, x6, {{0|setting:20}}        // Z[1]
    orr x7, x7, {{0|setting:16}}        // X[1]        Z[1] = X[1]*Y[0]

    orr x8, x6, {{0|setting:21}}        // Z[2]
    orr x8, x8, {{0|setting:21}}        // Z[2]

    orr x9, x8, {{0|setting:20}}        // Z[3]
    orr x9, x9, {{0|setting:16}}        // X[1]

    mov x10, 16
    .loop_per_row_mul:
        // top
        ld1         { v0.s }[0], [x14], #4
        dup         v0.4s, v0.s[0]
        dup         v1.4s, v0.s[0]
        dup         v2.4s, v0.s[0]
        dup         v3.4s, v0.s[0]
        st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

        {% amx ldy x1 %}
        {% amx extrx x2 %}
        {% amx extrx x3 %}
        {% amx fma32 x6 %}
        {% amx fma32 x7 %}

        add x2, x2, {{ 0|setting:22 }}
        add x3, x3, {{ 0|setting:22 }}
        add x6, x6, {{ 0|setting:22 }}
        add x7, x7, {{ 0|setting:22 }}

        // bottom
        ld1         { v0.s }[0], [x15], #4
        dup         v0.4s, v0.s[0]
        dup         v1.4s, v0.s[0]
        dup         v2.4s, v0.s[0]
        dup         v3.4s, v0.s[0]
        st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

        {% amx ldy x1 %}
        {% amx extrx x4 %}
        {% amx extrx x5 %}
        {% amx fma32 x8 %}
        {% amx fma32 x9 %}

        add x4, x4, {{ 0|setting:22 }}
        add x5, x5, {{ 0|setting:22 }}
        add x8, x8, {{ 0|setting:22 }}
        add x9, x9, {{ 0|setting:22 }}

    subs x10, x10, 1
    bne .loop_per_row_mul

    b .non_linear_loop

.scalar_sub:
    // performs a unary neg on Z, then go to scalar_add
    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:28 }}
    orr x4, x4, {{ 0|setting:27 }}      // Z=-X

    mov x6, 64
    .scalar_sub_loop:
        {% amx extrx x2 %}
        {% amx fms32 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .scalar_sub_loop

    // continue on purpose

.scalar_add:
    ldr         w5, [x0, #8]

    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z+=X 
    {% for chunk in (0..3) %}
        {% amx fma32 x2 %}
        add x2, x2, {{0|setting:20}}    // next Z row
    {% endfor %}
    b .non_linear_loop

.scalar_sub_flipped:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}    // load 16 values

    mov x2, {{ 0|setting:28 }}          // Z-=X 
    {% for chunk in (0..3) %}
        {% amx fms32 x2 %}
        add x2, x2, {{0|setting:20}}    // next Z row
    {% endfor %}
    b .non_linear_loop

.scalar_mul:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldy x1 %}    // load 16 values

    eor x2, x2, x2                      // X[0] = Z[0]

    mov x4, {{ 0|setting:63 }}          // vector mode
    orr x4, x4, {{ 0|setting:27 }}      // Z=X*Y

    mov x6, 64
    .scalar_mul_loop:
        {% amx extrx x2 %}
        {% amx fma32 x4 %}
        add x2, x2, {{0|setting:20}}    // next Z row
        add x4, x4, {{0|setting:20}}    // next Z row
    subs x6, x6, 1
    bne .scalar_mul_loop

    b .non_linear_loop

.scalar_min:
    mov x2, 5
    b .scalar_min_max
.scalar_max:
    mov x2, 7
.scalar_min_max:
    ldr         w5, [x0, #8]
    fmov        s0, w5
    dup         v0.4s, v0.s[0]
    dup         v1.4s, v0.s[0]
    dup         v2.4s, v0.s[0]
    dup         v3.4s, v0.s[0]

    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    {% amx ldx x1 %}    // load 16 values

    lsl x2, x2, 47
    orr x2, x2, {{ 0|setting:44 }} // f32

    mov x3, 64
    .loop_scalar_max:
        add x2, x2, {{ 0|setting:20}} // next Z
        {% amx vecfp x2 %}
        subs x3, x3, 1
        bne .loop_scalar_max

    b .non_linear_loop

.add_unicast:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    add x8, x1, 64

    mov x3, 0                               // x3 is the row
    .loop_load:
        and x9, x3, 0xf                     // x9 = row % 16
        lsl x9, x9, 2                       // x9 = (row % 16) * 4
        lsr x10, x3, 4                      // x10 = row / 16 
        lsl x10, x10, 1                     // x10 = (row / 16) * 2
        add x9, x9, x10                     // x9 = x9 + x10

        mov x4, x5
        {% for neon in (0..3) %}
            {% for lane in (0..3) %}
                ld1 { v{{neon}}.s }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
        {% for neon in (0..3) %}
            {% for lane in (0..3) %}
                ld1 { v{{neon}}.s }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        st1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x8]

        mov x2, x1
        orr x2, x2, {{ 0|setting:62 }} // load 32 values
        {% amx ldy x2 %}

        lsl x2, x9, 20                  // left Z register to update
        orr x2, x2, {{ 0|setting:63 }}  // vector mode
        orr x2, x2, {{ 0|setting:29 }}  // perform Z+=Y
        {% amx fma32 x2 %}

        add x2, x2, {{0|setting:20}}
        orr x2, x2, 64                  // offset Y by 16 values
        {% amx fma32 x2 %}

        add x5, x5, x6
    add x3, x3, 1
    cmp x3, 32
    bne .loop_load

    b .non_linear_loop

.add_row_col_products:
    ldp         x5, x6, [x0, #8]            // a base ptr, b base ptr

    add x8, x1, 64

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x5]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldy x2 %}

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x6], #64
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1], #64
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x6]
    st1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]
    sub x1, x1, #64

    orr x2, x1, {{ 0|setting:62 }}  // load a pair
    {% amx ldx x2 %}

    // top left
    eor x2, x2, x2
    {% amx fma32 x2 %}

    // top right
    orr x2, x2, {{ 0|setting:20 }}  // Z row = 1
    orr x2, x2, {{ 0|setting:16 }}  // X offset
    {% amx fma32 x2 %}

    // bottom right
    orr x2, x2, {{ 0|setting:21 }}  // Z row = 3
    orr x2, x2, {{ 0|setting:6 }}   // Y offset
    {% amx fma32 x2 %}

    // bottom left
    eor x2, x2, {{ 0|setting:20 }}  // Z row = 2
    eor x2, x2, {{ 0|setting:16 }}  // X offset <-
    {% amx fma32 x2 %}

    b .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x7, 4
    bne         .store_generic
    ands        x8, x5, 0x7f
    bne         .store_generic
    ands        x8, x6, 0x7f
    bne         .store_generic

    orr x5, x5, {{ 0|setting:62 }}          // pair
    lsl x8, x6, 4
    add x8, x8, x5                          // x8 = 16*rsc
    orr x8, x8, {{ 0|setting:57 }}          // first to x8 is z2

    mov x4, {{0|setting:58}}                // Zreg += 4
    add x4, x4, x6                          // +rsc

    mov x3, 16
    .loop_store_direct:
        {% amx stz x5 %}
        {% amx stz x8 %}
        add x5, x5, x4
        add x8, x8, x4
    subs x3, x3, 1
    bne .loop_store_direct

    b .non_linear_loop

.store_generic:
    
    add x8, x1, 64

    mov x3, 0                               // row id
    .loop_store:
        and x9, x3, 0xf                     // x9 = row % 16
        lsl x9, x9, 2                       // x9 = (row % 16) * 4
        lsr x10, x3, 4                      // x10 = row / 16 
        lsl x10, x10, 1                     // x10 = (row / 16) * 2
        add x9, x9, x10                     // x9 = x9 + x10

        lsl x2, x9, 56
        orr x2, x2, {{ 0|setting:62 }}
        orr x2, x2, x1
        {% amx stz x2 %}
        ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x1]

        mov x4, x5
        {% for neon in (0..3) %}
            {% for lane in (0..3) %}
                st1 { v{{neon}}.s }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x8]
        {% for neon in (0..3) %}
            {% for lane in (0..3) %}
                st1 { v{{neon}}.s }[{{lane}}], [x4], x7
            {% endfor %}
        {% endfor %}
        add x5, x5, x6

    add x3, x3, 1
    cmp x3, 32
    bne .loop_store

    b .non_linear_loop

.load_tile:
    ldr  x2, [x0, #16]                      // row major ptr
    orr  x2, x2, {{0|setting:62}}           // load pairs
    mov  x3, x2
    orr  x3, x3, {{0|setting:57}}
    add  x3, x3, #2048
    
    mov  x4, {{0|setting:58}}               // z+=4
    add  x4, x4, #128

    mov x8, 16
    .loop_load_tile:
        {% amx ldz x2 %}
        {% amx ldz x3 %}
        add x2, x2, x4
        add x3, x3, x4
    subs x8, x8, 1
    bne .loop_load_tile

    b .non_linear_loop

.return:
{{ AMX_CLR }}
ret


================================================
FILE: linalg/arm64/apple_amx/dispatcher.tmpliq
================================================
// vim: ft=arm

.non_linear:
    sub         x0, x0, 40

.non_linear_loop:
    add         x0, x0, 40
    ldr         x2, [x0]

    mov         x4, #{{ jump_table | size }}

    cmp         x2, #{{ jump_table | size }}
    csel        x2, x2, x4, lt
    cmp         x2, #0
    csel        x2, x4, x2, lt

    adr         x3, .jmp_table
    add         x3, x3, x2, LSL#2
    br          x3

.jmp_table:
{% for j in jump_table %}
    b   .{{j}}
{% endfor %}
    b   .unsupported

    add x0, x2, #4000
    b .return

.unsupported:
    mov         x0, #1
    b           .return

.done:
    mov         x0, 0
    b           .return


================================================
FILE: linalg/arm64/apple_amx/instructions.rs
================================================
use liquid::model::KString;
use liquid::partials::PartialCompiler;
use liquid::{ParserBuilder, ValueView};
use liquid_core::{
    Display_filter, Expression, Filter, FilterParameters, FilterReflection, FromFilterParameters,
    ParseFilter, ParseTag, Renderable, Runtime, TagReflection, Value,
};

pub fn register<C: PartialCompiler>(parser: ParserBuilder<C>) -> ParserBuilder<C> {
    parser.tag(AmxTag).filter(LeftShift).filter(Setting).filter(Unsigned)
}

pub fn globals() -> Vec<(KString, Value)> {
    vec![
        ("AMX_SET".to_string().into(), Value::scalar(amx_nop_op_imm5(17, 0))),
        ("AMX_CLR".to_string().into(), Value::scalar(amx_nop_op_imm5(17, 1))),
    ]
}

fn amx_nop_op_imm5(op: usize, imm5: usize) -> String {
    format!("nop\nnop\nnop\n.word 0x{:x}\n", (0x201000 + (op << 5) + imm5))
}

fn amx_nop_op_gpr(op: usize, gpr: usize) -> String {
    format!(".word 0x{:x}", (0x201000 + (op << 5) + gpr))
}

#[derive(Copy, Clone)]
struct AmxTag;

impl ParseTag for AmxTag {
    fn reflection(&self) -> &dyn liquid_core::TagReflection {
        self
    }

    fn parse(
        &self,
        mut arguments: liquid_core::TagTokenIter,
        _options: &liquid_core::Language,
    ) -> liquid_core::Result<Box<dyn liquid_core::Renderable>> {
        let op = arguments.expect_next("expects op and gpr")?.as_str().to_string();
        let gpr = arguments
            .expect_next("expects op and gpr")?
            .as_str()
            .trim_start_matches('x')
            .parse::<usize>()
            .unwrap();
        let op_id = [
            "ldx", "ldy", "stx", "sty", "ldz", "stz", "ldzi", "stzi", "extrx", "extry", "fma64",
            "fms64", "fma32", "fms32", "mac16", "fma16", "fms16", "setclr", "vecint", "vecfp",
            "matint", "matfp", "genlut",
        ]
        .iter()
        .position(|x| x == &op)
        .unwrap();
        Ok(Box::new(RenderedAmxTag(format!(
            "{} \t\t\t\t// AMX {op} x{gpr}\n",
            amx_nop_op_gpr(op_id, gpr)
        ))))
    }
}

impl TagReflection for AmxTag {
    fn tag(&self) -> &str {
        "amx"
    }

    fn description(&self) -> &str {
        "translate to an Apple AMX instruction"
    }
}

#[derive(Clone, Debug)]
struct RenderedAmxTag(String);

impl Renderable for RenderedAmxTag {
    fn render_to(
        &self,
        writer: &mut dyn std::io::Write,
        _runtime: &dyn liquid_core::Runtime,
    ) -> liquid_core::Result<()> {
        writer.write_all(self.0.as_bytes()).unwrap();
        Ok(())
    }
}

#[derive(Debug, FilterParameters)]
struct ShiftArgs {
    #[parameter(description = "The number to shift the input by.")]
    operand: Expression,
}

#[derive(Clone, ParseFilter, FilterReflection)]
#[filter(
    name = "lsl",
    description = "Shift left a number by the given operand.",
    parameters(ShiftArgs),
    parsed(LeftShiftFilter)
)]
struct LeftShift;

#[derive(Debug, FromFilterParameters, Display_filter)]
#[name = "lsl"]
struct LeftShiftFilter {
    #[parameters]
    args: ShiftArgs,
}

impl Filter for LeftShiftFilter {
    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> liquid_core::Result<Value> {
        let args = self.args.evaluate(runtime)?;

        let operand = args
            .operand
            .as_scalar()
            .ok_or_else(|| invalid_argument("operand", "Number expected"))?;

        let result = input
            .as_scalar()
            .unwrap()
            .to_integer()
            .and_then(|i| operand.to_integer().map(|o| Value::scalar(i << o)))
            .ok_or_else(|| invalid_argument("operand", "Integer expected"))?;

        Ok(result)
    }
}

#[derive(Clone, ParseFilter, FilterReflection)]
#[filter(
    name = "setting",
    description = "Set the bit deigned by the operand.",
    parameters(ShiftArgs),
    parsed(SettingFilter)
)]
struct Setting;

#[derive(Debug, FromFilterParameters, Display_filter)]
#[name = "setting"]
struct SettingFilter {
    #[parameters]
    args: ShiftArgs,
}

impl Filter for SettingFilter {
    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> liquid_core::Result<Value> {
        let args = self.args.evaluate(runtime)?;

        let operand = args
            .operand
            .as_scalar()
            .ok_or_else(|| invalid_argument("operand", "Number expected"))?;

        let result = input
            .as_scalar()
            .unwrap()
            .to_integer()
            .and_then(|i| operand.to_integer().map(|o| Value::scalar(i | (1 << o))))
            .ok_or_else(|| invalid_argument("operand", "Integer expected"))?;

        Ok(result)
    }
}

fn invalid_argument<S>(argument: S, cause: S) -> liquid::Error
where
    S: Into<liquid_core::model::KString>,
{
    liquid_core::Error::with_msg("Invalid argument")
        .context("argument", argument)
        .context("cause", cause)
}

#[derive(Clone, ParseFilter, FilterReflection)]
#[filter(name = "u", description = "unsigned number", parsed(UnsignedFilter))]
pub struct Unsigned;

#[derive(Debug, Default, Display_filter)]
#[name = "float16"]
struct UnsignedFilter;

impl Filter for UnsignedFilter {
    fn evaluate(
        &self,
        input: &dyn ValueView,
        _runtime: &dyn Runtime,
    ) -> liquid_core::Result<Value> {
        let input = input.as_scalar().unwrap().to_integer().unwrap() as u64;
        Ok(input.to_string().to_value())
    }
}


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_leaky_relu_f16_8n.tmpl
================================================
// vim: ft=arm

// no preservation either for v0-v7 and v16-v31

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_leaky_relu_f16_8n_{{suffix}}
{{G}}arm64fp16_leaky_relu_f16_8n_{{suffix}}:

    cmp         x1, #0
    beq         .return

    mov         v31.h[0], w2
    dup         v31.8h, v31.h[0]
    mov         x2, x0
    
    cmp         x1, #64
    blt         .loop

    ld1         { v16.8h, v17.8h, v18.8h, v19.8h }, [x2], #64
.loop4:

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [x2], #64

    fmul        v20.8h, v16.8h, v31.8h
    fmul        v21.8h, v17.8h, v31.8h
    fmul        v22.8h, v18.8h, v31.8h
    fmul        v23.8h, v19.8h, v31.8h

    fcmge       v24.8h, v16.8h, #0.0
    fcmge       v25.8h, v17.8h, #0.0
    fcmge       v26.8h, v18.8h, #0.0
    fcmge       v27.8h, v19.8h, #0.0

    bsl         v24.16b, v16.16b, v20.16b
    bsl         v25.16b, v17.16b, v21.16b
    bsl         v26.16b, v18.16b, v22.16b
    bsl         v27.16b, v19.16b, v23.16b

    st1         { v24.8h, v25.8h, v26.8h, v27.8h }, [x0], #64

    and         v16.16b, v0.16b, v0.16b
    and         v17.16b, v1.16b, v1.16b
    and         v18.16b, v2.16b, v2.16b
    and         v19.16b, v3.16b, v3.16b

    subs        x1, x1, #32
    cmp         x1, #64
    bge         .loop4

    cmp         x1, #0
    beq         .return

.loop:
    ld1         { v16.8h }, [x0]

    fmul        v17.8h, v16.8h, v31.8h
    fcmge       v18.8h, v16.8h, #0.0
    bsl         v18.16b, v16.16b, v17.16b
    
    st1         { v18.8h }, [x0], #16

    subs        x1, x1, #8
    bne         .loop

.return:
    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_8h_per_col.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    ldr         x2, [x0, #8]

{% capture mr_over_8 %}{{ mr | divided_by: 8}}{%endcapture%}
{% capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_8}}{%endcapture%}

{% capture loads %}{{cols | divided_by: 8}}{% endcapture %}

{%if cols == "1" %}
        ld1         {v0.h}[0], [ x2 ]
{% elsif cols == "3" %}
        ld1         {v0.s}[0], [ x2 ], #4
        ld1         {v0.h}[2], [ x2 ]
{% elsif cols == "4" %}
        ldr         d0, [ x2 ]
{% elsif cols == "6" %}
        ld1         {v0.d}[0], [ x2 ], #8
        ld1         {v0.s}[2], [ x2 ]
{% else %}
    {% for reg in (1..loads) %}
        ldr         q{{reg |minus:1}}, [ x2 ], #16
    {% endfor %}
{% endif %}

// mr:{{mr}} {{ loads }} {{cols}}

{% for col in (1..cols) %}
    dup v3.8h, v{{col| minus: 1|divided_by:8}}.h[{{col| minus: 1|modulo:8}}]
    {% for row in (1..mr_over_8) %}
        {% capture acc %}{{ col|minus:1|times:mr_over_8|plus:row|minus:1|plus:from }}{% endcapture %}
        {% if flipped %}
            {{op}} v{{acc}}.8h, v{{acc}}.8h, v3.8h
        {% else %}
            {{op}} v{{acc}}.8h, v3.8h, v{{acc}}.8h
        {% endif %}
    {% endfor %}
{% endfor %}

b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_8h_per_row.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    ldr         x2, [x0, #8]

{% capture mr_over_8 %}{{ mr | divided_by: 8 }}{%endcapture%}
{% capture mr_over_8_min_1 %}{{ mr | divided_by: 8 | minus: 1 }}{%endcapture%}

{% for reg in (0..mr_over_8_min_1) %}
    ldr         q{{reg}}, [ x2 ], #16
{% endfor %}

{% if flipped %}
    {% for acc in (from..to) %}
        {% capture other%}{{acc | minus: from | modulo: mr_over_8}}{%endcapture%}
        {{op}} v{{acc}}.8h, v{{acc}}.8h, v{{other}}.8h
    {% endfor %}
{% else %}
    {% for acc in (from..to) %}
        {% capture other%}{{acc | minus: from | modulo: mr_over_8}}{%endcapture%}
        {{op}} v{{acc}}.8h, v{{other}}.8h, v{{acc}}.8h
    {% endfor %}
{% endif %}

b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_8h_scalar.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    add         x2, x0, #8
    ld1         {v0.h}[0], [ x2 ]
    dup         v0.8h, v0.h[0]
    {% if flipped %}
        {% for reg in (from..to) %}
            {{op}}       v{{reg}}.8h, v{{reg}}.8h, v0.8h
        {% endfor %}
    {% else %}
        {% for reg in (from..to) %}
            {{op}}       v{{reg}}.8h, v0.8h, v{{reg}}.8h
        {% endfor %}
    {% endif %}

    b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_128x1/loop1/cortex_a53.tmpli
================================================
    fmla        v16.4s, v0.4s, v8.s[0]
    ldr         x5, [x1, #128]
    fmla        v17.4s, v1.4s, v8.s[0]
    ldr         x6, [x1, #136]
    fmla        v18.4s, v2.4s, v8.s[0]
    ldr         x7, [x1, #144]
    fmla        v19.4s, v3.4s, v8.s[0]
    ldr         x9, [x1, #152]
    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [ x1 ], #64

    fmla        v20.4s, v4.4s, v8.s[0]
    ldr         x10, [x1, #96]
    fmla        v21.4s, v5.4s, v8.s[0]
    ldr         x11, [x1, #104]
    fmla        v22.4s, v6.4s, v8.s[0]
    ldr         x12, [x1, #112]
    fmla        v23.4s, v7.4s, v8.s[0]
    ldr         x13, [x1, #120]

    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [ x1 ]

    fmla        v24.4s, v0.4s, v8.s[0]
    ldr         x14, [x1, #128]
    fmla        v25.4s, v1.4s, v8.s[0]
    ldr         x15, [x1, #136]
    fmla        v26.4s, v2.4s, v8.s[0]
    ldr         x20, [x1, #144]
    fmla        v27.4s, v3.4s, v8.s[0]
    ldr         x21, [x1, #152]
    fmla        v28.4s, v4.4s, v8.s[0]
    ldr         x22, [x1, #160]
    fmla        v29.4s, v5.4s, v8.s[0]
    ldr         x23, [x1, #168]
    fmla        v30.4s, v6.4s, v8.s[0]
    ldr         x24, [x1, #176]
    fmla        v31.4s, v7.4s, v8.s[0]
    ldr         x25, [x1, #184]

    ld1         {{ v8.s }}[0], [ x2 ], #4

    prfm        pldl1keep, [x1, #1024]
    prfm        pldl1keep, [x1, #1088]
    prfm        pldl1keep, [x1, #1152]
    prfm        pldl1keep, [x1, #1216]
    prfm        pldl1keep, [x2, #256]

    ins         v0.d[0], x5
    ins         v1.d[0], x7
    ins         v2.d[0], x10
    ins         v3.d[0], x12
    ins         v4.d[0], x14
    ins         v5.d[0], x20
    ins         v6.d[0], x22
    ins         v7.d[0], x24

    ins         v0.d[1], x6
    ins         v1.d[1], x9
    ins         v2.d[1], x11
    ins         v3.d[1], x13
    ins         v4.d[1], x15
    ins         v5.d[1], x21
    ins         v6.d[1], x23
    ins         v7.d[1], x25

    add         x1, x1, #192


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_128x1/loop1/naive.tmpli
================================================
    ld1         {{ v9.8h, v10.8h, v11.8h, v12.8h }}, [x1], #64
    ld1         {{ v13.8h, v14.8h, v15.8h }}, [x1], #48

    fmla        v16.8h, v0.8h, v8.h[0]
    fmla        v17.8h, v1.8h, v8.h[0]
    fmla        v18.8h, v2.8h, v8.h[0]
    fmla        v19.8h, v3.8h, v8.h[0]
    fmla        v20.8h, v4.8h, v8.h[0]
    fmla        v21.8h, v5.8h, v8.h[0]
    fmla        v22.8h, v6.8h, v8.h[0]
    fmla        v23.8h, v7.8h, v8.h[0]
    fmla        v24.8h, v9.8h, v8.h[0]
    ld1         {{ v9.8h }}, [ x1 ], #16
    ld1         {{ v0.8h, v1.8h, v2.8h, v3.8h }}, [x1], #64
    ld1         {{ v4.8h, v5.8h, v6.8h, v7.8h }}, [x1], #64
    fmla        v25.8h, v10.8h, v8.h[0]
    fmla        v26.8h, v11.8h, v8.h[0]
    fmla        v27.8h, v12.8h, v8.h[0]
    fmla        v28.8h, v13.8h, v8.h[0]
    fmla        v29.8h, v14.8h, v8.h[0]
    fmla        v30.8h, v15.8h, v8.h[0]

    fmla        v31.8h, v9.8h, v8.h[0]

    ld1         {{ v8.h }}[0], [ x2 ], #2

    prfm        pldl1keep, [x1, #1024]
    prfm        pldl1keep, [x1, #1088]
    prfm        pldl1keep, [x1, #1152]
    prfm        pldl1keep, [x1, #1216]
    prfm        pldl1keep, [x2, #256]


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_128x1/loop2/cortex_a55.tmpli
================================================
    ld1         {{ v9.4s, v10.4s, v11.4s }}, [x1], #48

    fmla        v16.8h, v0.8h, v8.h[0]
    ldr         w8, [x2], #4
    fmla        v17.8h, v1.8h, v8.h[0]
    ldr         d12, [x1], #8
    fmla        v18.8h, v2.8h, v8.h[0]
    ldr         x12, [x1], #8
    fmla        v19.8h, v3.8h, v8.h[0]
    ldr         d13, [x1], #8
    fmla        v20.8h, v4.8h, v8.h[0]
    ldr         x13, [x1], #8
    fmla        v21.8h, v5.8h, v8.h[0]
    ldr         d14, [x1], #8
    fmla        v22.8h, v6.8h, v8.h[0]
    ldr         x14, [x1], #8
    fmla        v23.8h, v7.8h, v8.h[0]
    ldr         d15, [x1], #8
    fmla        v24.8h, v9.8h, v8.h[0]
    ldr         x15, [x1], #8

    ld1         {{ v0.8h, v1.8h, v2.8h, v3.8h }}, [x1], #64
    ins         v8.s[1], w8
    ld1         {{ v4.8h, v5.8h, v6.8h, v7.8h }}, [x1], #64

    fmla        v25.8h, v10.8h, v8.h[0]
    ins         v12.d[1], x12
    fmla        v26.8h, v11.8h, v8.h[0]
    ins         v13.d[1], x13
    fmla        v27.8h, v12.8h, v8.h[0]
    ins         v14.d[1], x14
    fmla        v28.8h, v13.8h, v8.h[0]
    ins         v15.d[1], x15

    ld1         {{ v9.8h, v10.8h, v11.8h, v12.8h }}, [x1], #64

    fmla        v29.8h, v14.8h, v8.h[0]
    ldr         d13, [x1], #8
    fmla        v30.8h, v15.8h, v8.h[0]
    ldr         x13, [x1], #8
    fmla        v31.8h, v0.8h, v8.h[0]
    ldr         d14, [x1], #8

    fmla        v16.8h, v1.8h, v8.h[2]
    ldr         x14, [x1], #8
    fmla        v17.8h, v2.8h, v8.h[2]
    ldr         d15, [x1], #8
    fmla        v18.8h, v3.8h, v8.h[2]
    ldr         x15, [x1], #8
    fmla        v19.8h, v4.8h, v8.h[2]

    ld1         {{ v0.8h }}, [x1], #16

    fmla        v20.8h, v5.8h, v8.h[2]
    ldr         d1, [x1], #8
    fmla        v21.8h, v6.8h, v8.h[2]
    ldr         x10, [x1], #8

    fmla        v22.8h, v7.8h, v8.h[2]

    fmla        v23.8h, v9.8h, v8.h[2]
    ins         v13.d[1], x13
    fmla        v24.8h, v10.8h, v8.h[2]
    ins         v14.d[1], x14
    fmla        v25.8h, v11.8h, v8.h[2]
    ins         v15.d[1], x15

    fmla        v26.8h, v12.8h, v8.h[2]
    prfm        pldl1keep, [x1, #1024]
    fmla        v27.8h, v13.8h, v8.h[2]
    ins         v1.d[1], x10
    fmla        v28.8h, v14.8h, v8.h[2]
    prfm        pldl1keep, [x1, #1088]
    fmla        v29.8h, v15.8h, v8.h[2]
    prfm        pldl1keep, [x1, #1152]
    fmla        v30.8h, v0.8h, v8.h[2]
    prfm        pldl1keep, [x1, #1216]
    fmla        v31.8h, v1.8h, v8.h[2]
    prfm        pldl1keep, [x2, #256]

    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [x1], #64
    ins         v8.h[0], v8.h[3]
    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [x1], #64


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_128x1_core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, no need to preserve

// no preservation either for v0-v7...
// v8..v15 are callee-preserved
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_mmm_f16_128x1_{{core}}_{{suffix}}
{{G}}arm64fp16_mmm_f16_128x1_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop
    sub         x3, x3, #1


    ld1         { v8.h }[0], [ x2 ], #2
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s, v6.4s, v7.4s }, [ x1 ], #64

    cmp         x3, #0
    beq         .packed_packed_loop_1_last

    cmp         x3, #4
    blt        .packed_packed_loop_1

{% capture packed_packed_loop1 %}
    {% include "arm64fp16_mmm_f16_128x1/loop1/naive.tmpli" %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% include "arm64fp16_mmm_f16_128x1/loop2/cortex_a55.tmpli" %}
{% endcapture %}

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub         x3, x3, #4
    cmp         x3, #4
    bge         .packed_packed_loop_4

    cmp         x3, #0
    beq         .packed_packed_loop_1_last

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}

    subs        x3, x3, #1
    bne         .packed_packed_loop_1

// last loop can't read beyond actual input as it's likely not packed and padded
.packed_packed_loop_1_last:
    ld1         { v9.8h, v10.8h, v11.8h, v12.8h }, [x1], #64
    ld1         { v13.8h, v14.8h, v15.8h }, [x1], #48

    fmla        v16.8h, v0.8h, v8.h[0]
    fmla        v17.8h, v1.8h, v8.h[0]
    ld1         { v0.8h }, [ x1 ]
    fmla        v18.8h, v2.8h, v8.h[0]
    fmla        v19.8h, v3.8h, v8.h[0]
    fmla        v20.8h, v4.8h, v8.h[0]
    fmla        v21.8h, v5.8h, v8.h[0]
    fmla        v22.8h, v6.8h, v8.h[0]
    fmla        v23.8h, v7.8h, v8.h[0]

    fmla        v24.8h, v9.8h, v8.h[0]
    fmla        v25.8h, v10.8h, v8.h[0]
    fmla        v26.8h, v11.8h, v8.h[0]
    fmla        v27.8h, v12.8h, v8.h[0]
    fmla        v28.8h, v13.8h, v8.h[0]
    fmla        v29.8h, v14.8h, v8.h[0]
    fmla        v30.8h, v15.8h, v8.h[0]
    fmla        v31.8h, v0.8h, v8.h[0]

    b           .non_linear_loop

{% include "arm64fp16_mmm_f16_scalars.tmpliq" from:16, to:31%}
{% include "arm64fp16_mmm_f16_per_rows.tmpliq" mr:128, from:16, to:31%}
{% include "arm64fp16_mmm_f16_per_cols.tmpliq" mr:128, from:16, to:31%}
{% include "arm64fp16_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]           // c base ptr, rsc
    cmp         x6, #2
    beq         .do_per_row_add

    {% for reg in (16..31) %}
        {% for lane in (0..7) %}
            ld1 {v0.h}[{{lane}}], [ x5 ], x6
        {% endfor %}
        fadd v{{reg}}.8h, v{{reg}}.8h, v0.8h
    {% endfor %}

    b           .non_linear_loop

.do_per_row_add:
    ld1     {v0.8h-v3.8h}, [x5], #64
    ld1     {v4.8h-v7.8h}, [x5], #64
    ld1     {v8.8h-v11.8h}, [x5], #64
    ld1     {v12.8h-v15.8h}, [x5], #64

    {% for r in (0..15) %}
        fadd v{{r| plus: 16}}.8h, v{{r | plus: 16}}.8h, v{{r}}.8h
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x3, [x0, #16]
    ldr     x2, [x0, #8]

    ld1         {v8.h}[0], [ x3 ]

    {% for r in (0..7) %}
        ldr     q{{r}}, [x2], #16
    {% endfor %}

    fmla        v16.8h, v0.8h, v8.h[0]
    ldr         q0, [x2], #16
    fmla        v17.8h, v1.8h, v8.h[0] 
    ldr         q1, [x2], #16
    fmla        v18.8h, v2.8h, v8.h[0] 
    ldr         q2, [x2], #16
    fmla        v19.8h, v3.8h, v8.h[0] 
    ldr         q3, [x2], #16
    fmla        v20.8h, v4.8h, v8.h[0] 
    ldr         q4, [x2], #16
    fmla        v21.8h, v5.8h, v8.h[0] 
    ldr         q5, [x2], #16
    fmla        v22.8h, v6.8h, v8.h[0] 
    ldr         q6, [x2], #16
    fmla        v23.8h, v7.8h, v8.h[0] 
    ldr         q7, [x2], #16

    fmla        v24.8h, v0.8h, v8.h[0]
    fmla        v25.8h, v1.8h, v8.h[0] 
    fmla        v26.8h, v2.8h, v8.h[0] 
    fmla        v27.8h, v3.8h, v8.h[0] 
    fmla        v28.8h, v4.8h, v8.h[0] 
    fmla        v29.8h, v5.8h, v8.h[0] 
    fmla        v30.8h, v6.8h, v8.h[0] 
    fmla        v31.8h, v7.8h, v8.h[0] 

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc$

    cmp         x6, #2
    beq         .store_strides_contig

    {% for reg in (16..31) %}
        {% for lane in (0..7) %}
            st1 { v{{reg}}.h }[{{lane}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for reg in (16..31) %}
        st1 { v{{reg}}.8h }, [ x5 ], #16
    {% endfor %}
    b           .non_linear_loop

.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_16x8/loop1/naive.tmpli
================================================

fmla        v16.8h, v0.8h, v4.h[0]
fmla        v17.8h, v1.8h, v4.h[0]
fmla        v18.8h, v0.8h, v4.h[1]
fmla        v19.8h, v1.8h, v4.h[1]
fmla        v20.8h, v0.8h, v4.h[2]
fmla        v21.8h, v1.8h, v4.h[2]
fmla        v22.8h, v0.8h, v4.h[3]
fmla        v23.8h, v1.8h, v4.h[3]

fmla        v24.8h, v0.8h, v4.h[4]
fmla        v25.8h, v1.8h, v4.h[4]
fmla        v26.8h, v0.8h, v4.h[5]
fmla        v27.8h, v1.8h, v4.h[5]
fmla        v28.8h, v0.8h, v4.h[6]
fmla        v29.8h, v1.8h, v4.h[6]
fmla        v30.8h, v0.8h, v4.h[7]
fmla        v31.8h, v1.8h, v4.h[7]

ld1         {{ v0.8h, v1.8h }}, [x1], #32
ld1         {{ v4.8h }}, [x2], #16


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_16x8/loop2/cortex_a55.tmpli
================================================
fmla        v16.8h, v0.8h, v4.h[0]
ldr         d2, [x1], #8
fmla        v17.8h, v1.8h, v4.h[0]
ldr         d6, [x2], #8
fmla        v18.8h, v0.8h, v4.h[1]
ldr         x5, [x1], #8
fmla        v19.8h, v1.8h, v4.h[1]
ldr         x7, [x2], #8
fmla        v20.8h, v0.8h, v4.h[2]
ldr         d3, [x1], #8
fmla        v21.8h, v1.8h, v4.h[2]
fmla        v22.8h, v0.8h, v4.h[3]
ldr         x6, [x1], #8
fmla        v23.8h, v1.8h, v4.h[3]

fmla        v24.8h, v0.8h, v4.h[4]
fmla        v25.8h, v1.8h, v4.h[4]
fmla        v26.8h, v0.8h, v4.h[5]
fmla        v27.8h, v1.8h, v4.h[5]
fmla        v28.8h, v0.8h, v4.h[6]
ins         v2.d[1], x5
fmla        v29.8h, v1.8h, v4.h[6]
ins         v6.d[1], x7
fmla        v30.8h, v0.8h, v4.h[7]
ins         v3.d[1], x6
fmla        v31.8h, v1.8h, v4.h[7]

fmla        v16.8h, v2.8h, v6.h[0]
ldr         d0, [x1], #8
fmla        v17.8h, v3.8h, v6.h[0]
ldr         d4, [x2], #8
fmla        v18.8h, v2.8h, v6.h[1]
ldr         x5, [x1], #8
fmla        v19.8h, v3.8h, v6.h[1]
ldr         x7, [x2], #8
fmla        v20.8h, v2.8h, v6.h[2]
ldr         d1, [x1], #8
fmla        v21.8h, v3.8h, v6.h[2]
fmla        v22.8h, v2.8h, v6.h[3]
ldr         x6, [x1], #8
fmla        v23.8h, v3.8h, v6.h[3]

fmla        v24.8h, v2.8h, v6.h[4]
fmla        v25.8h, v3.8h, v6.h[4]
fmla        v26.8h, v2.8h, v6.h[5]
fmla        v27.8h, v3.8h, v6.h[5]
fmla        v28.8h, v2.8h, v6.h[6]
ins         v0.d[1], x5
fmla        v29.8h, v3.8h, v6.h[6]
ins         v4.d[1], x7
fmla        v30.8h, v2.8h, v6.h[7]
ins         v1.d[1], x6
fmla        v31.8h, v3.8h, v6.h[7]


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_16x8_core.tmpl
================================================
// vim: ft=arm

// x20..x27 are used, callee-preserved

// C tile regs: v16 to v31, (scratch)
// 
//      v16[0] v18[0] v20[0] v22[0] v24[0] v26[0] v28[0] v30[0]
//      v16[1] v18[1] 
//      v16[2] v18[2] 
//      v16[3] v18[3]
//                     
//      v17[0] v19[0] v21[0] v23[0] v25[0] v27[0] v29[0] v31[0]
//      v17[1] v19[1] 
//      v17[2] v19[2] 
//      v17[3] v19[3] 

// v8 is used, d8 (lower half) must preserved
// v0-v7 (scratch registers)
//  packed A buffering (2x8 values): alternating v0, v1 with v2, v3
//  packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_mmm_f16_16x8_{{core}}_{{suffix}}
{{G}}arm64fp16_mmm_f16_16x8_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    str         q8, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

.packed_packed:
    ld1         { v0.4s, v1.4s }, [ x1 ], #32
    ld1         { v4.4s }, [ x2 ], #16

{% capture packed_packed_loop1 %}
    {% include "arm64fp16_mmm_f16_16x8/loop1/naive.tmpli" %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% if core == "a55" %}
        {% include "arm64fp16_mmm_f16_16x8/loop2/cortex_a55.tmpli" %}
    {% else %}
        {{ packed_packed_loop1 }}
        {{ packed_packed_loop1 }}
    {% endif %}
{% endcapture %}

    cmp         x3, #4
    blt         .packed_packed_loop_1

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub x3, x3, #4
    cmp x3, #4
    bge .packed_packed_loop_4


    cmp x3, #0
    beq .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}
    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

{% include "arm64fp16_mmm_f16_scalars.tmpliq" from:16, to:31%}
{% include "arm64fp16_mmm_f16_per_rows.tmpliq" mr:16, from:16, to:31 %}
{% include "arm64fp16_mmm_f16_per_cols.tmpliq" mr:16, from:16, to:31 %}
{% include "arm64fp16_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..7) %}
                ld1 {v0.h}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:2 | plus: reg}}.8h, v{{col | times:2 | plus: reg}}.8h, v0.8h
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.4s, v1.4s }, [ x2 ], #32
    ld1         { v4.4s }, [ x3 ], #16

    fmla        v16.8h, v0.8h, v4.h[0]
    fmla        v17.8h, v1.8h, v4.h[0]
    fmla        v18.8h, v0.8h, v4.h[1]
    fmla        v19.8h, v1.8h, v4.h[1]
    fmla        v20.8h, v0.8h, v4.h[2]
    fmla        v21.8h, v1.8h, v4.h[2]
    fmla        v22.8h, v0.8h, v4.h[3]
    fmla        v23.8h, v1.8h, v4.h[3]

    fmla        v24.8h, v0.8h, v4.h[4]
    fmla        v25.8h, v1.8h, v4.h[4]
    fmla        v26.8h, v0.8h, v4.h[5]
    fmla        v27.8h, v1.8h, v4.h[5]
    fmla        v28.8h, v0.8h, v4.h[6]
    fmla        v29.8h, v1.8h, v4.h[6]
    fmla        v30.8h, v0.8h, v4.h[7]
    fmla        v31.8h, v1.8h, v4.h[7]

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x6, #2
    bne         .store_strides_generic

    {% for col in (8..15) %}
        str q{{col | times:2 }}, [ x5 ]
        str q{{col | times:2 | plus: 1}}, [ x5, #16 ]
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_generic:

    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..7) %}
                st1 { v{{col | times:2 | plus: reg}}.h }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldr         q8, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_32x4/loop1/naive.tmpli
================================================

fmla        v16.8h, v0.8h, v4.h[0]
fmla        v17.8h, v1.8h, v4.h[0]
fmla        v18.8h, v2.8h, v4.h[0]
fmla        v19.8h, v3.8h, v4.h[0]
fmla        v20.8h, v0.8h, v4.h[1]
fmla        v21.8h, v1.8h, v4.h[1]
fmla        v22.8h, v2.8h, v4.h[1]
fmla        v23.8h, v3.8h, v4.h[1]

fmla        v24.8h, v0.8h, v4.h[2]
fmla        v25.8h, v1.8h, v4.h[2]
fmla        v26.8h, v2.8h, v4.h[2]
fmla        v27.8h, v3.8h, v4.h[2]
fmla        v28.8h, v0.8h, v4.h[3]
fmla        v29.8h, v1.8h, v4.h[3]
fmla        v30.8h, v2.8h, v4.h[3]
fmla        v31.8h, v3.8h, v4.h[3]

ld1         {{ v0.8h, v1.8h, v2.8h, v3.8h }}, [ x1 ], #64
ldr         d4, [x2], #8


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_32x4/loop2/cortex_a55.tmpli
================================================
// mul a: v0, v1, v2, v3 b: v4
// load a: v5(d5/x5), v6(d6,x6), v7(d7,x7), v8(d8, x8)
// load b: v9 as d9

fmla        v16.8h, v0.8h, v4.h[0]
ldr         d5, [x1], #8
fmla        v17.8h, v1.8h, v4.h[0]
ldr         d9, [x2], #8
fmla        v18.8h, v2.8h, v4.h[0]
ldr         x5, [x1], #8
fmla        v19.8h, v3.8h, v4.h[0]
fmla        v20.8h, v0.8h, v4.h[1]
ldr         d6, [x1], #8
fmla        v21.8h, v1.8h, v4.h[1]
ldr         x6, [x1], #8
fmla        v22.8h, v2.8h, v4.h[1]
ldr         d7, [x1], #8
fmla        v23.8h, v3.8h, v4.h[1]
ldr         x7, [x1], #8

fmla        v24.8h, v0.8h, v4.h[2]
ldr         d8, [x1], #8
fmla        v25.8h, v1.8h, v4.h[2]
ldr         x8, [x1], #8
fmla        v26.8h, v2.8h, v4.h[2]
ins         v5.d[1], x5
fmla        v27.8h, v3.8h, v4.h[2]
ins         v6.d[1], x6
fmla        v28.8h, v0.8h, v4.h[3]
ins         v7.d[1], x7
fmla        v29.8h, v1.8h, v4.h[3]
ins         v8.d[1], x8
fmla        v30.8h, v2.8h, v4.h[3]
ins         v9.d[1], x9
fmla        v31.8h, v3.8h, v4.h[3]

// mul a: v5, v6, v7, v8 b: v9
// load a: v0(d0/x5), v1(d1,x6), v2(d2,x7), v3(d3, x8)
// load b: v4 as d4

fmla        v16.8h, v5.8h, v9.h[0]
ldr         d0, [x1], #8
fmla        v17.8h, v6.8h, v9.h[0]
ldr         d4, [x2], #8
fmla        v18.8h, v7.8h, v9.h[0]
ldr         x5, [x1], #8
fmla        v19.8h, v8.8h, v9.h[0]
fmla        v20.8h, v5.8h, v9.h[1]
ldr         d1, [x1], #8
fmla        v21.8h, v6.8h, v9.h[1]
ldr         x6, [x1], #8
fmla        v22.8h, v7.8h, v9.h[1]
ldr         d2, [x1], #8
fmla        v23.8h, v8.8h, v9.h[1]
ldr         x7, [x1], #8

fmla        v24.8h, v5.8h, v9.h[2]
ldr         d3, [x1], #8
fmla        v25.8h, v6.8h, v9.h[2]
ldr         x8, [x1], #8
fmla        v26.8h, v7.8h, v9.h[2]
ins         v0.d[1], x5
fmla        v27.8h, v8.8h, v9.h[2]
ins         v1.d[1], x6
fmla        v28.8h, v5.8h, v9.h[3]
ins         v2.d[1], x7
fmla        v29.8h, v6.8h, v9.h[3]
ins         v3.d[1], x8
fmla        v30.8h, v7.8h, v9.h[3]
ins         v4.d[1], x9
fmla        v31.8h, v8.8h, v9.h[3]


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_32x4_core.tmpl
================================================
// vim: ft=arm

// x20..x27 are used, callee-preserved

// C tile regs: v16 to v31, (scratch)

// v8 is used, d8 (lower half) must preserved
// v0-v7 (scratch registers)
//  packed A buffering (2x8 values): alternating v0, v1 with v2, v3
//  packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_mmm_f16_32x4_{{core}}_{{suffix}}
{{G}}arm64fp16_mmm_f16_32x4_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ldr         d4, [x2], #8

{% capture packed_packed_loop1 %}
    {% include "arm64fp16_mmm_f16_32x4/loop1/naive.tmpli" %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% if core == "a55" %}
        {% include "arm64fp16_mmm_f16_32x4/loop2/cortex_a55.tmpli" %}
    {% else %}
        {{ packed_packed_loop1 }}
        {{ packed_packed_loop1 }}
    {% endif %}
{% endcapture %}

    cmp         x3, #4
    blt         .packed_packed_loop_1

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub x3, x3, #4
    cmp x3, #4
    bge .packed_packed_loop_4

    cmp x3, #0
    beq .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}
    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b   .non_linear_loop

{% include "arm64fp16_mmm_f16_scalars.tmpliq" from:16, to:31%}
{% include "arm64fp16_mmm_f16_per_rows.tmpliq" mr:32, from:16, to:31 %}
{% include "arm64fp16_mmm_f16_per_cols.tmpliq" mr:32, from:16, to:31 %}
{% include "arm64fp16_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (0..3) %}
        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..7) %}
                ld1 {v0.h}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:4 | plus: 16| plus: reg}}.8h, v{{col | times:4 | plus: 16 | plus: reg}}.8h, v0.8h
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x2 ]
    ldr         d4, [x3]

    fmla        v16.8h, v0.8h, v4.h[0]
    fmla        v17.8h, v1.8h, v4.h[0]
    fmla        v18.8h, v2.8h, v4.h[0]
    fmla        v19.8h, v3.8h, v4.h[0]
    fmla        v20.8h, v0.8h, v4.h[1]
    fmla        v21.8h, v1.8h, v4.h[1]
    fmla        v22.8h, v2.8h, v4.h[1]
    fmla        v23.8h, v3.8h, v4.h[1]

    fmla        v24.8h, v0.8h, v4.h[2]
    fmla        v25.8h, v1.8h, v4.h[2]
    fmla        v26.8h, v2.8h, v4.h[2]
    fmla        v27.8h, v3.8h, v4.h[2]
    fmla        v28.8h, v0.8h, v4.h[3]
    fmla        v29.8h, v1.8h, v4.h[3]
    fmla        v30.8h, v2.8h, v4.h[3]
    fmla        v31.8h, v3.8h, v4.h[3]

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x6, #2
    bne           .store_strides_generic

    {% for col in (0..3) %}
        str q{{col | times:4 | plus:16 | plus: 0}}, [ x5 ]
        str q{{col | times:4 | plus:16 | plus: 1}}, [ x5, #16 ]
        str q{{col | times:4 | plus:16 | plus: 2}}, [ x5, #32 ]
        str q{{col | times:4 | plus:16 | plus: 3}}, [ x5, #48 ]
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_generic:

    {% for col in (0..3) %}
        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..7) %}
                st1 { v{{col | times:4 | plus: 16 | plus: reg}}.h }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_32x6.core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, no need to preserve

// no preservation either for v0-v7...
// v8..v15 are callee-preserved
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_mmm_f16_32x6_{{core}}_{{suffix}}
{{G}}arm64fp16_mmm_f16_32x6_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    ld1         { v7.8h }, [ x2 ]
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x1 ], #64
    add         x2, x2, 12

{% for row in (0..3) %}
    {% for col in (0..5) %}
        fmla        v{{ col|times:4|plus:8|plus:row}}.8h, v{{row}}.8h, v7.h[{{col}}]
    {% endfor %}
    /*
    {% for col in (0..1) %}
        fmla        v{{ col|plus:4|times:4|plus:8|plus:row}}.8h, v{{row}}.8h, v6.h[{{col}}]
    {% endfor %}
    */
{% endfor %}

    subs        x3, x3, #1
    bne         .packed_packed_loop_1

    b           .non_linear_loop

{% include "arm64fp16_mmm_f16_scalars.tmpliq" from:8, to:31%}
{% include "arm64fp16_mmm_f16_per_rows.tmpliq" mr:32, from:8, to:31%}
{% include "arm64fp16_mmm_f16_per_cols.tmpliq" mr:32, from:8, to:31%}
{% include "arm64fp16_mmm_load_tile.tmpliq" from:8, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (0..5) %}
        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..7) %}
                ld1 {v0.h}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:4 | plus: 8| plus: reg}}.8h, v{{col | times:4 | plus: 8 | plus: reg}}.8h, v0.8h
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.do_per_row_add:
    ld1     {v0.8h-v3.8h}, [x5], #64
    ld1     {v4.8h-v7.8h}, [x5], #64

    {% for r in (0..7) %}
        fadd v{{r| plus: 24}}.8h, v{{r | plus: 24}}.8h, v{{r}}.8h
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldp         x2, x3, [x0, #8]

    ld1         { v7.d }[0], [ x3 ], #8
    ld1         { v7.s }[2], [ x3 ], #4
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x2 ], #64

{% for row in (0..3) %}
    {% for col in (0..5) %}
        fmla        v{{ col|times:4|plus:8|plus:row}}.8h, v{{row}}.8h, v7.h[{{col}}]
    {% endfor %}
{% endfor %}

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc
    ldp         x7, x8, [x0, #24]               // csc, item_size

    cmp         x6, #2
    beq         .store_strides_contig

    {% for col in (0..5) %}
        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..7) %}
                st1 { v{{col | times:4 | plus: 8 | plus: reg}}.h }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for col in (0..5) %}
        mov x4, x5
        {% for r in (0..3) %}
            st1 { v{{col | times:4 | plus: 8 | plus: r}}.8h }, [ x4 ], 16
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_64x1.core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, no need to preserve

// no preservation either for v0-v7...
// v8..v15 are callee-preserved
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_mmm_f16_64x1_{{core}}_{{suffix}}
{{G}}arm64fp16_mmm_f16_64x1_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    cmp         x4, #1
    beq         .q4f16se
    
    cmp         x4, #2
    beq         .q4f16

    
.p2align 4
.packed_packed_loop_1:
    ld1         { v8.h }[0], [ x2 ], #2
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x1 ], #64
    ld1         { v4.8h, v5.8h, v6.8h, v7.8h }, [ x1 ], #64

    fmla        v24.8h, v0.8h, v8.h[0]
    fmla        v25.8h, v1.8h, v8.h[0]
    fmla        v26.8h, v2.8h, v8.h[0]
    fmla        v27.8h, v3.8h, v8.h[0]
    fmla        v28.8h, v4.8h, v8.h[0]
    fmla        v29.8h, v5.8h, v8.h[0]
    fmla        v30.8h, v6.8h, v8.h[0]
    fmla        v31.8h, v7.8h, v8.h[0]
    subs        x3, x3, #1
    bne         .packed_packed_loop_1

    b           .non_linear_loop

.p2align 8
.q40f16_const:
    .byte 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc2, 0xc0, 0xbc
    .byte 0x00, 0x3c, 0x40, 0x42, 0x44, 0x45, 0x46, 0x47

.q4f16se:
    adr      x4, .q40f16_const
    movi     v15.16b, 15
    ld1      {v13.16b}, [ x4 ]
    eor      v12.16b, v12.16b, v12.16b

.q4f16se_outerloop:
{% for i in (0..7) %}
    eor      v{{i|plus:16}}.16b, v{{i|plus:16}}.16b, v{{i|plus:16}}.16b
{% endfor %}
    mov         x4, #32

.p2align 4
.q4f16se_innerloop:
        ld1      { v9.16b-v10.16b }, [x1], #32
        ld1      { v8.h }[0], [ x2 ], #2

        and      v0.16b, v9.16b, v15.16b
        ushr     v2.16b, v9.16b, 4

        and      v4.16b, v10.16b, v15.16b
        ushr     v6.16b, v10.16b, 4

        tbl      v0.16b, { v13.16b }, v0.16b
        tbl      v2.16b, { v13.16b }, v2.16b
        tbl      v4.16b, { v13.16b }, v4.16b
        tbl      v6.16b, { v13.16b }, v6.16b

        zip2     v1.16b, v12.16b, v0.16b
        zip2     v3.16b, v12.16b, v2.16b
        zip2     v5.16b, v12.16b, v4.16b
        zip2     v7.16b, v12.16b, v6.16b

        zip1     v0.16b, v12.16b, v0.16b
        zip1     v2.16b, v12.16b, v2.16b
        zip1     v4.16b, v12.16b, v4.16b
        zip1     v6.16b, v12.16b, v6.16b

{% for i in (0..7) %}
        fmla        v{{ i|plus: 16 }}.8h, v{{i}}.8h, v8.h[0]
{% endfor %}

    subs        x4, x4, #1
    bne         .q4f16se_innerloop

    // scales
    ld1         { v0.8h-v3.8h }, [ x1 ], #64
    ld1         { v4.8h-v7.8h }, [ x1 ], #64

{% for i in (0..7) %}
       fmla     v{{i|plus:24}}.8h, v{{i}}.8h, v{{i|plus:16}}.8h
{% endfor %}

    subs        x3, x3, #32
    bne         .q4f16se_outerloop

    b           .non_linear_loop
    
.q4f16:
    adr      x4, .q40f16_const
    movi     v15.16b, 15
    ld1      {v13.16b}, [ x4 ]
    eor      v12.16b, v12.16b, v12.16b

.q4f16_outerloop:
    // scales
    ld1         { v16.8h-v19.8h }, [ x1 ], #64
    ld1         { v20.8h-v23.8h }, [ x1 ], #64
    mov         x4, #32

.p2align 4
.q4f16_innerloop:
        ld1      { v9.16b-v10.16b }, [x1], #32
        ld1      { v8.h }[0], [ x2 ], #2

        and      v0.16b, v9.16b, v15.16b
        ushr     v2.16b, v9.16b, 4

        and      v4.16b, v10.16b, v15.16b
        ushr     v6.16b, v10.16b, 4

        tbl      v0.16b, { v13.16b }, v0.16b
        tbl      v2.16b, { v13.16b }, v2.16b
        tbl      v4.16b, { v13.16b }, v4.16b
        tbl      v6.16b, { v13.16b }, v6.16b

        zip2     v1.16b, v12.16b, v0.16b
        zip2     v3.16b, v12.16b, v2.16b
        zip2     v5.16b, v12.16b, v4.16b
        zip2     v7.16b, v12.16b, v6.16b

        zip1     v0.16b, v12.16b, v0.16b
        zip1     v2.16b, v12.16b, v2.16b
        zip1     v4.16b, v12.16b, v4.16b
        zip1     v6.16b, v12.16b, v6.16b

{% for i in (0..7) %}
       fmul     v{{i}}.8h, v{{i}}.8h, v{{i|plus:16}}.8h
{% endfor %}

{% for i in (0..7) %}
        fmla        v{{ i|plus: 24 }}.8h, v{{i}}.8h, v8.h[0]
{% endfor %}

    subs        x4, x4, #1
    bne         .q4f16_innerloop

    subs        x3, x3, #32
    bne         .q4f16_outerloop

    b           .non_linear_loop

{% include "arm64fp16_mmm_f16_scalars.tmpliq" from:24, to:31%}
{% include "arm64fp16_mmm_f16_per_rows.tmpliq" mr:64, from:24, to:31%}
{% include "arm64fp16_mmm_f16_per_cols.tmpliq" mr:64, from:24, to:31%}
{% include "arm64fp16_mmm_load_tile.tmpliq" from:24, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]           // c base ptr, rsc
    cmp         x6, #2
    beq         .do_per_row_add

    {% for reg in (24..31) %}
        {% for lane in (0..7) %}
            ld1 {v0.h}[{{lane}}], [ x5 ], x6
        {% endfor %}
        fadd v{{reg}}.8h, v{{reg}}.8h, v0.8h
    {% endfor %}

    b           .non_linear_loop

.do_per_row_add:
    ld1     {v0.8h-v3.8h}, [x5], #64
    ld1     {v4.8h-v7.8h}, [x5], #64

    {% for r in (0..7) %}
        fadd v{{r| plus: 24}}.8h, v{{r | plus: 24}}.8h, v{{r}}.8h
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x3, [x0, #16]
    ldr     x2, [x0, #8]

    ld1         {v8.h}[0], [ x3 ]

    {% for r in (0..7) %}
        ldr     q{{r}}, [x2], #16
    {% endfor %}

    fmla        v24.8h, v0.8h, v8.h[0]
    fmla        v25.8h, v1.8h, v8.h[0] 
    fmla        v26.8h, v2.8h, v8.h[0] 
    fmla        v27.8h, v3.8h, v8.h[0] 
    fmla        v28.8h, v4.8h, v8.h[0] 
    fmla        v29.8h, v5.8h, v8.h[0] 
    fmla        v30.8h, v6.8h, v8.h[0] 
    fmla        v31.8h, v7.8h, v8.h[0] 

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc$

    cmp         x6, #2
    beq         .store_strides_contig

    {% for reg in (24..31) %}
        {% for lane in (0..7) %}
            st1 { v{{reg}}.h }[{{lane}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for reg in (24..31) %}
        st1 { v{{reg}}.8h }, [ x5 ], #16
    {% endfor %}

    b           .non_linear_loop

.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_64x3.core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, no need to preserve

// no preservation either for v0-v7...
// v8..v15 are callee-preserved
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_mmm_f16_64x3_{{core}}_{{suffix}}
{{G}}arm64fp16_mmm_f16_64x3_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    ld1         { v7.4s }, [ x2 ]
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x1 ], #64
    ld1         { v4.8h, v5.8h, v6.8h }, [ x1 ], #48
    add         x2, x2, #6

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:8}}.8h, v0.8h, v7.h[{{ col }}]
{% endfor %}

    ld1         { v0.8h }, [ x1 ], #16

{% for row in (1..6) %}
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8|plus:row}}.8h, v{{row}}.8h, v7.h[{{col}}]
    {% endfor %}
{% endfor %}

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:15}}.8h, v0.8h, v7.h[{{ col }}]
{% endfor %}

    subs        x3, x3, #1
    bne         .packed_packed_loop_1

    b           .non_linear_loop

{% include "arm64fp16_mmm_f16_scalars.tmpliq" from:8, to:31%}
{% include "arm64fp16_mmm_f16_per_rows.tmpliq" mr:64, from:8, to:31%}
{% include "arm64fp16_mmm_f16_per_cols.tmpliq" mr:64, from:8, to:31%}
{% include "arm64fp16_mmm_load_tile.tmpliq" from:8, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (0..2) %}
        mov x4, x5
        {% for reg in (0..7) %}
            {% for lane in (0..7) %}
                ld1 {v0.h}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:8 | plus: 8| plus: reg}}.8h, v{{col | times:8 | plus: 8 | plus: reg}}.8h, v0.8h
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.do_per_row_add:
    ld1     {v0.8h-v3.8h}, [x5], #64
    ld1     {v4.8h-v7.8h}, [x5], #64

    {% for r in (0..7) %}
        fadd v{{r| plus: 24}}.8h, v{{r | plus: 24}}.8h, v{{r}}.8h
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldp         x2, x3, [x0, #8]

    ld1         { v7.s }[0], [ x3 ], #4
    ld1         { v7.h }[2], [ x3 ], #2
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x2 ], #64
    ld1         { v4.8h, v5.8h, v6.8h }, [ x2 ], #48

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:8}}.8h, v0.8h, v7.h[{{ col }}]
{% endfor %}

    ld1         { v0.8h }, [ x2 ], #16

{% for row in (1..6) %}
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8|plus:row}}.8h, v{{row}}.8h, v7.h[{{col}}]
    {% endfor %}
{% endfor %}

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:15}}.8h, v0.8h, v7.h[{{ col }}]
{% endfor %}

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc
    ldp         x7, x8, [x0, #24]               // csc, item_size

    cmp         x6, #2
    beq         .store_strides_contig

    {% for col in (0..2) %}
        mov x4, x5
        {% for reg in (0..7) %}
            {% for lane in (0..7) %}
                st1 { v{{col | times:8 | plus: 8 | plus: reg}}.h }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for col in (0..2) %}
        mov x4, x5
        {% for r in (0..7) %}
            st1 { v{{col | times:8 | plus: 8 | plus: r}}.8h }, [ x4 ], 16
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_per_cols.tmpliq
================================================
// vim: ft=arm

{% include "arm64fp16_mmm_8h_per_col.tmpliq" label:"per_col_min", op:"fmin", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_col.tmpliq" label:"per_col_max", op:"fmax", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_col.tmpliq" label:"per_col_mul", op:"fmul", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_col.tmpliq" label:"per_col_add", op:"fadd", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_col.tmpliq" label:"per_col_sub", op:"fsub", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_col.tmpliq" label:"per_col_sub_flipped", op:"fsub", mr:mr, from:from, to:to, flipped: true%}


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_per_rows.tmpliq
================================================
// vim: ft=arm

{% include "arm64fp16_mmm_8h_per_row.tmpliq" label:"per_row_min", op:"fmin", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_row.tmpliq" label:"per_row_max", op:"fmax", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_row.tmpliq" label:"per_row_mul", op:"fmul", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_row.tmpliq" label:"per_row_add", op:"fadd", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_row.tmpliq" label:"per_row_sub", op:"fsub", mr:mr, from:from, to:to %}
{% include "arm64fp16_mmm_8h_per_row.tmpliq" label:"per_row_sub_flipped", op:"fsub", mr:mr, from:from, to:to, flipped: true%}


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_f16_scalars.tmpliq
================================================
// vim: ft=arm

{% include "arm64fp16_mmm_8h_scalar.tmpliq" label:"scalar_min", op:"fmin", from:from, to:to %}
{% include "arm64fp16_mmm_8h_scalar.tmpliq" label:"scalar_max", op:"fmax", from:from, to:to %}
{% include "arm64fp16_mmm_8h_scalar.tmpliq" label:"scalar_mul", op:"fmul", from:from, to:to %}
{% include "arm64fp16_mmm_8h_scalar.tmpliq" label:"scalar_add", op:"fadd", from:from, to:to %}
{% include "arm64fp16_mmm_8h_scalar.tmpliq" label:"scalar_sub", op:"fsub", from:from, to:to %}
{% include "arm64fp16_mmm_8h_scalar.tmpliq" label:"scalar_sub_flipped", op:"fsub", from:from, to:to, flipped:true %}

.clear:
{% for r in (from..to) %}
    eor         v{{r}}.8b, v{{r}}.8b, v{{r}}.8b
{% endfor %}
    b .non_linear_loop

.leaky_relu:
    add         x2, x0, #8
    ld1         {v4.s}[0], [ x2 ]
    dup         v4.8h, v4.h[0]

    // bsl cond/dst, then, else
    // fcmge dst, src, #0.0
    {% for r in (from..to) %}
        fmul  v0.8h, v{{r}}.8h, v4.8h
        fcmge v1.8h, v{{r}}.8h, #0.0
        bsl   v1.16b, v{{r}}.16b, v0.16b
        and   v{{r}}.16b, v1.16b, v1.16b
    {% endfor %}

    b .non_linear_loop


.q_scale:
.q_shl:
.q_shr:
    b .unsupported


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_mmm_load_tile.tmpliq
================================================
// vim: ft=arm

.load_tile:
    ldr         x2, [ x0, #8 ]
    {% for reg in (from..to) %}
        ld1         { v{{reg}}.4s }, [ x2 ], #16
    {% endfor %}

    b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_sigmoid_f16_8n.tmpl
================================================
// vim: ft=arm

// no preservation either for v0-v7 and v16-v31

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_sigmoid_f16_8n_{{suffix}}
{{G}}arm64fp16_sigmoid_f16_8n_{{suffix}}:

    cmp         x1, #0
    beq         .return

    adr         x2, .coeffs_num
    ld1         { v0.8h }, [x2]
    dup         v5.8h, v0.h[0]              // v5 <- low, broadcasted
    dup         v6.8h, v0.h[1]              // v6 <- high, broadcasted
    dup         v7.8h, v0.h[7]              // v7 <- half, broadcasted

    cmp         x1, #32
    blt         .loop

.loop4:
    ld1         { v16.8h, v17.8h, v18.8h, v19.8h }, [x0]

    fmax        v16.8h, v16.8h, v5.8h
    fmax        v17.8h, v17.8h, v5.8h
    fmax        v18.8h, v18.8h, v5.8h
    fmax        v19.8h, v19.8h, v5.8h

    fmin        v16.8h, v16.8h, v6.8h
    fmin        v17.8h, v17.8h, v6.8h
    fmin        v18.8h, v18.8h, v6.8h
    fmin        v19.8h, v19.8h, v6.8h       // v16 <- x

    fmul        v20.8h, v16.8h, v16.8h
    fmul        v21.8h, v17.8h, v17.8h
    fmul        v22.8h, v18.8h, v18.8h
    fmul        v23.8h, v19.8h, v19.8h      // v20 <- x2

    dup         v28.8h, v0.h[3]
    fmla        v28.8h, v20.8h, v0.h[2]
    dup         v29.8h, v0.h[3]
    fmla        v29.8h, v21.8h, v0.h[2]
    dup         v30.8h, v0.h[3]
    fmla        v30.8h, v22.8h, v0.h[2]
    dup         v31.8h, v0.h[3]
    fmla        v31.8h, v23.8h, v0.h[2]

    dup         v24.8h, v0.h[4]
    fmla        v24.8h, v20.8h, v28.8h
    dup         v25.8h, v0.h[4]
    fmla        v25.8h, v21.8h, v29.8h
    dup         v26.8h, v0.h[4]
    fmla        v26.8h, v22.8h, v30.8h
    dup         v27.8h, v0.h[4]
    fmla        v27.8h, v23.8h, v31.8h

    fmul        v16.8h, v16.8h, v24.8h
    fmul        v17.8h, v17.8h, v25.8h
    fmul        v18.8h, v18.8h, v26.8h
    fmul        v19.8h, v19.8h, v27.8h      // v16 <- numerator

    dup         v24.8h, v0.h[6]
    dup         v25.8h, v0.h[6]
    dup         v26.8h, v0.h[6]
    dup         v27.8h, v0.h[6]
    fmla        v24.8h, v20.8h, v0.h[5]
    fmla        v25.8h, v21.8h, v0.h[5]
    fmla        v26.8h, v22.8h, v0.h[5]
    fmla        v27.8h, v23.8h, v0.h[5]      // v24 <- denum

    fdiv        v16.8h, v16.8h, v24.8h
    fdiv        v17.8h, v17.8h, v25.8h
    fdiv        v18.8h, v18.8h, v26.8h
    fdiv        v19.8h, v19.8h, v27.8h

    fadd        v16.8h, v16.8h, v7.8h
    fadd        v17.8h, v17.8h, v7.8h
    fadd        v18.8h, v18.8h, v7.8h
    fadd        v19.8h, v19.8h, v7.8h

    st1         { v16.8h, v17.8h, v18.8h, v19.8h }, [x0], #64

    subs        x1, x1, #32
    cmp         x1, #32
    bge         .loop4

    cmp         x1, #0
    beq         .return

.loop:
    ld1         { v16.8h }, [x0]

    fmax        v16.8h, v16.8h, v5.8h
    fmin        v16.8h, v16.8h, v6.8h       // v16 <- x
    fmul        v20.8h, v16.8h, v16.8h      // v20 <- x2

    dup         v28.8h, v0.h[3]
    fmla        v28.8h, v20.8h, v0.h[2]
    dup         v24.8h, v0.h[4]
    fmla        v24.8h, v20.8h, v28.8h
    fmul        v16.8h, v16.8h, v24.8h      // v16 <- numerator

    dup         v24.8h, v0.h[6]
    fmla        v24.8h, v20.8h, v0.h[5]      // v24 <- denum

    fdiv        v16.8h, v16.8h, v24.8h
    fadd        v16.8h, v16.8h, v7.8h
    
    st1         { v16.8h }, [x0], #16

    subs        x1, x1, #8
    bne         .loop

.return:
    ret

.coeffs_num:
    {{ -6.92 | float16 }}
    {{ 6.92 | float16 }}
    {{ -0.0000124702 | float16 }}
    {{ 0.00400222 | float16 }}

    {{ 0.249895 | float16 }}
    {{ 0.098734 | float16 }}
    {{ 1.0 | float16 }}
    {{ 0.5 | float16 }}


================================================
FILE: linalg/arm64/arm64fp16/arm64fp16_tanh_f16_8n.tmpl
================================================
// vim: ft=arm

// no preservation either for v0-v7 and v16-v31

.text
.align 4

{% if needs_pragma == true %}
.cpu generic+fp+simd+fp16
{% endif %}
.global {{G}}arm64fp16_tanh_f16_8n_{{suffix}}
{{G}}arm64fp16_tanh_f16_8n_{{suffix}}:

    cmp         x1, #0
    beq         .return

    adr         x2, .coeffs_num
    ld1         { v0.8h }, [x2]
    dup         v5.8h, v0.h[0]              // v5 <- low, broadcasted
    dup         v6.8h, v0.h[1]              // v6 <- high, broadcasted

    cmp         x1, #32
    blt         .loop

.loop4:
    ld1         { v16.8h, v17.8h, v18.8h, v19.8h }, [x0]

    fmax        v16.8h, v16.8h, v5.8h
    fmax        v17.8h, v17.8h, v5.8h
    fmax        v18.8h, v18.8h, v5.8h
    fmax        v19.8h, v19.8h, v5.8h

    fmin        v16.8h, v16.8h, v6.8h
    fmin        v17.8h, v17.8h, v6.8h
    fmin        v18.8h, v18.8h, v6.8h
    fmin        v19.8h, v19.8h, v6.8h       // v16 <- x

    fmul        v20.8h, v16.8h, v16.8h
    fmul        v21.8h, v17.8h, v17.8h
    fmul        v22.8h, v18.8h, v18.8h
    fmul        v23.8h, v19.8h, v19.8h      // v20 <- x2

    dup         v24.8h, v0.h[3]
    fmla        v24.8h, v20.8h, v0.h[2]
    dup         v25.8h, v0.h[3]
    fmla        v25.8h, v21.8h, v0.h[2]
    dup         v26.8h, v0.h[3]
    fmla        v26.8h, v22.8h, v0.h[2]
    dup         v27.8h, v0.h[3]
    fmla        v27.8h, v23.8h, v0.h[2]

    fmul        v16.8h, v16.8h, v24.8h
    fmul        v17.8h, v17.8h, v25.8h
    fmul        v18.8h, v18.8h, v26.8h
    fmul        v19.8h, v19.8h, v27.8h      // v16 <- numerator

    dup         v28.8h, v0.h[5]
    fmla        v28.8h, v20.8h, v0.h[4]
    dup         v29.8h, v0.h[5]
    fmla        v29.8h, v21.8h, v0.h[4]
    dup         v30.8h, v0.h[5]
    fmla        v30.8h, v22.8h, v0.h[4]
    dup         v31.8h, v0.h[5]
    fmla        v31.8h, v23.8h, v0.h[4]

    dup         v24.8h, v0.h[6]
    fmla        v24.8h, v20.8h, v28.8h
    dup         v25.8h, v0.h[6]
    fmla        v25.8h, v21.8h, v29.8h
    dup         v26.8h, v0.h[6]
    fmla        v26.8h, v22.8h, v30.8h
    dup         v27.8h, v0.h[6]
    fmla        v27.8h, v23.8h, v31.8h      // v24 <- denum

    fdiv        v16.8h, v16.8h, v24.8h
    fdiv        v17.8h, v17.8h, v25.8h
    fdiv        v18.8h, v18.8h, v26.8h
    fdiv        v19.8h, v19.8h, v27.8h

    st1         { v16.8h, v17.8h, v18.8h, v19.8h }, [x0], #64

    subs        x1, x1, #32
    cmp         x1, #32
    bge         .loop4

    cmp         x1, #0
    beq         .return

.loop:
    ld1         { v16.8h }, [x0]

    fmax        v16.8h, v16.8h, v5.8h
    fmin        v16.8h, v16.8h, v6.8h       // v16 <- x
    fmul        v20.8h, v16.8h, v16.8h      // v20 <- x2

    dup         v24.8h, v0.h[3]
    fmla        v24.8h, v20.8h, v0.h[2]
    fmul        v16.8h, v16.8h, v24.8h      // v16 <- numerator

    dup         v28.8h, v0.h[5]
    fmla        v28.8h, v20.8h, v0.h[4]
    dup         v24.8h, v0.h[6]
    fmla        v24.8h, v20.8h, v28.8h      // v24 <- denum

    fdiv        v16.8h, v16.8h, v24.8h
    
    st1         { v16.8h }, [x0], #16

    subs        x1, x1, #8
    bne         .loop

.return:
    ret

.coeffs_num:
    {{ -3.84 | float16 }}
    {{ 3.84 | float16 }}
    {{ 0.082654955 | float16 }}              // alpha
    {{ 0.99963124 | float16 }}

    {{ 0.0065383179 | float16 }}             // beta
    {{ 0.41401828 | float16 }}   
    {{ 1.0 | float16 }}
    {{ 0 | float16 }}                        // padding


================================================
FILE: linalg/arm64/arm64fp16/dispatcher.tmpliq
================================================
// vim: ft=arm

.non_linear:
    sub         x0, x0, 40

.non_linear_loop:
    add         x0, x0, 40
    ldr         x2, [x0]

    mov         x4, #{{ jump_table | size }}

    cmp         x2, #{{ jump_table | size }}
    csel        x2, x2, x4, lt
    cmp         x2, #0
    csel        x2, x4, x2, lt

    adr         x3, .jmp_table
    add         x3, x3, x2, LSL#2
    br          x3

.jmp_table:
{% for j in jump_table %}
    b   .{{j}}
{% endfor %}
    b   .unsupported

    add x0, x2, #4000
    b .return

.unsupported:
    mov         x0, #1
    b           .return

.done:
    mov         x0, 0
    b           .return


================================================
FILE: linalg/arm64/arm64fp16/dummy_fmla_no_pragma.S
================================================
// vim: ft=arm

// serves as a canary build file to figure out which flag combination will accept half precision fmla 

.text
.align 4

// .cpu generic+fp+simd+fp16
.global foo
foo:
    fmla        v16.8h, v0.8h, v8.h[0]
    ret


================================================
FILE: linalg/arm64/arm64fp16/dummy_fmla_pragma.S
================================================
// vim: ft=arm

// serves as a canary build file to figure out which flag combination will accept half precision fmla 

.text
.align 4

.cpu generic+fp+simd+fp16
.global foo
foo:
    fmla        v16.8h, v0.8h, v8.h[0]
    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_4s_per_col.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    ldr         x2, [x0, #8]

{% capture mr_over_4 %}{{ mr | divided_by: 4}}{%endcapture%}
{% capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_4}}{%endcapture%}

{% capture loads %}{{cols | divided_by:4}}{% endcapture %}

{%if cols == "1" %}
        ld1         {v0.s}[0], [ x2 ]
{% elsif cols == "3" %}
        ld1         {v0.d}[0], [ x2 ], #8
        ld1         {v0.s}[2], [ x2 ]
{% else %}
    {% for reg in (1..loads) %}
        ldr         q{{reg |minus:1}}, [ x2 ], #16
    {% endfor %}
{% endif %}

// {{mr}} {{cols}}

{% for col in (1..cols) %}
    dup v3.4s, v{{col| minus: 1|divided_by:4}}.s[{{col| minus: 1|modulo:4}}]
    {% for row in (1..mr_over_4) %}
        {% capture acc %}{{ col|minus:1|times:mr_over_4|plus:row|minus:1|plus:from }}{% endcapture %}
        {% if flipped %}
            {{op}} v{{acc}}.4s, v{{acc}}.4s, v3.4s
        {% else %}
            {{op}} v{{acc}}.4s, v3.4s, v{{acc}}.4s
        {% endif %}
    {% endfor %}
{% endfor %}

b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_4s_per_row.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    ldr         x2, [x0, #8]

{% capture mr_over_4 %}{{ mr | divided_by: 4}}{%endcapture%}
{% capture mr_over_4_min_1 %}{{ mr | divided_by: 4 | minus: 1}}{%endcapture%}

{% for reg in (0..mr_over_4_min_1) %}
    ldr         q{{reg}}, [ x2 ], #16
{% endfor %}

{% if flipped %}
    {% for acc in (from..to) %}
        {% capture other%}{{acc | minus: from | modulo: mr_over_4}}{%endcapture%}
        {{op}} v{{acc}}.4s, v{{acc}}.4s, v{{other}}.4s
    {% endfor %}
{% else %}
    {% for acc in (from..to) %}
        {% capture other%}{{acc | minus: from | modulo: mr_over_4}}{%endcapture%}
        {{op}} v{{acc}}.4s, v{{other}}.4s, v{{acc}}.4s
    {% endfor %}
{% endif %}

b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_4s_scalar.tmpliq
================================================
// vim: ft=arm

.{{label}}:
    add         x2, x0, #8
    ld1         {v0.s}[0], [ x2 ]
    dup         v0.4s, v0.s[0]
    {% if flipped %}
        {% for reg in (from..to) %}
            {{op}}       v{{reg}}.4s, v{{reg}}.4s, v0.4s
        {% endfor %}
    {% else %}
        {% for reg in (from..to) %}
            {{op}}       v{{reg}}.4s, v0.4s, v{{reg}}.4s
        {% endfor %}
    {% endif %}

    b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_w_no_preload.tmpli
================================================
fmla        v8.4s, v0.4s, v4.s[0]
ldr         w4, [x1], #4
fmla        v9.4s, v1.4s, v4.s[0]
ldr         w20, [x2], #4
fmla        v10.4s, v2.4s, v4.s[0]
ldr         w5, [x1], #4

fmla        v11.4s, v0.4s, v4.s[1]
ldr         w21, [x2], #4
fmla        v12.4s, v1.4s, v4.s[1]
ldr         w6, [x1], #4
fmla        v13.4s, v2.4s, v4.s[1]
ldr         w22, [x2], #4

fmla        v14.4s, v0.4s, v4.s[2]
ldr         w7, [x1], #4
fmla        v15.4s, v1.4s, v4.s[2]
ldr         w23, [x2], #4
fmla        v16.4s, v2.4s, v4.s[2]
ldr         w8, [x1], #4
fmla        v17.4s, v0.4s, v4.s[3]
ldr         w24, [x2], #4
fmla        v18.4s, v1.4s, v4.s[3]
ldr         w9, [x1], #4
fmla        v19.4s, v2.4s, v4.s[3]
ldr         w25, [x2], #4

fmla        v20.4s, v0.4s, v5.s[0]
ldr         w10, [x1], #4
fmla        v21.4s, v1.4s, v5.s[0]
ldr         w26, [x2], #4
fmla        v22.4s, v2.4s, v5.s[0]
ldr         w11, [x1], #4
fmla        v23.4s, v0.4s, v5.s[1]
ldr         w27, [x2], #4
fmla        v24.4s, v1.4s, v5.s[1]
ldr         w12, [x1], #4
fmla        v25.4s, v2.4s, v5.s[1]

fmla        v26.4s, v0.4s, v5.s[2]
ldr         w13, [x1], #4
fmla        v27.4s, v1.4s, v5.s[2]
fmla        v28.4s, v2.4s, v5.s[2]
ldr         w14, [x1], #4
fmla        v29.4s, v0.4s, v5.s[3]
fmla        v30.4s, v1.4s, v5.s[3]
ldr         w15, [x1], #4
fmla        v31.4s, v2.4s, v5.s[3]

ins         v0.s[0], w4
ins         v1.s[0], w8
ins         v2.s[0], w12
ins         v4.s[0], w20
ins         v5.s[0], w24
ins         v0.s[1], w5
ins         v1.s[1], w9
ins         v2.s[1], w13
ins         v4.s[1], w21
ins         v5.s[1], w25
ins         v0.s[2], w6
ins         v1.s[2], w10
ins         v2.s[2], w14
ins         v4.s[2], w22
ins         v5.s[2], w26
ins         v0.s[3], w7
ins         v1.s[3], w11
ins         v2.s[3], w15
ins         v4.s[3], w23
ins         v5.s[3], w27


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_w_preload.tmpli
================================================
fmla        v8.4s, v0.4s, v4.s[0]
    ldr         w4, [x1]
fmla        v9.4s, v1.4s, v4.s[0]
        ldr         w20, [x2], #4
fmla        v10.4s, v2.4s, v4.s[0]
    ldr         w5, [x1, #4]

fmla        v11.4s, v0.4s, v4.s[1]
        ldr         w21, [x2], #4
fmla        v12.4s, v1.4s, v4.s[1]
    ldr         w6, [x1, #8]
fmla        v13.4s, v2.4s, v4.s[1]
        ldr         w22, [x2], #4

fmla        v14.4s, v0.4s, v4.s[2]
    ldr         w7, [x1, #12]
fmla        v15.4s, v1.4s, v4.s[2]
        ldr         w23, [x2], #4
fmla        v16.4s, v2.4s, v4.s[2]
    ldr         w8, [x1, #16]
fmla        v17.4s, v0.4s, v4.s[3]
        ldr         w24, [x2], #4
fmla        v18.4s, v1.4s, v4.s[3]
    ldr         w9, [x1, #20]
fmla        v19.4s, v2.4s, v4.s[3]
        ldr         w25, [x2], #4

fmla        v20.4s, v0.4s, v5.s[0]
    ldr         w10, [x1, #24]
fmla        v21.4s, v1.4s, v5.s[0]
        ldr         w26, [x2], #4
fmla        v22.4s, v2.4s, v5.s[0]
    ldr         w11, [x1, #28]
fmla        v23.4s, v0.4s, v5.s[1]
        ldr         w27, [x2], #4
fmla        v24.4s, v1.4s, v5.s[1]
    ldr         w12, [x1, #32]
fmla        v25.4s, v2.4s, v5.s[1]
    ldr         w13, [x1, #36]

fmla        v26.4s, v0.4s, v5.s[2]
    ldr         w14, [x1, #40]
fmla        v27.4s, v1.4s, v5.s[2]
    ldr         w15, [x1, #44]
fmla        v28.4s, v2.4s, v5.s[2]
    prfm        pldl1keep, [x1, #512]
fmla        v29.4s, v0.4s, v5.s[3]
    add         x1, x1, #48
fmla        v30.4s, v1.4s, v5.s[3]
    prfm        pldl1keep, [x2, #384]
fmla        v31.4s, v2.4s, v5.s[3]

    ins         v0.s[0], w4

    ins         v1.s[0], w8
    ins         v2.s[0], w12

        ins         v4.s[0], w20
        ins         v5.s[0], w24

    ins         v0.s[1], w5
    ins         v1.s[1], w9

    ins         v2.s[1], w13
        ins         v4.s[1], w21

        ins         v5.s[1], w25
    ins         v0.s[2], w6

    ins         v1.s[2], w10
    ins         v2.s[2], w14

        ins         v4.s[2], w22
        ins         v5.s[2], w26

    ins         v0.s[3], w7
    ins         v1.s[3], w11

    ins         v2.s[3], w15
        ins         v4.s[3], w23
        ins         v5.s[3], w27


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_x_preload.tmpli
================================================
fmla        v8.4s, v0.4s, v4.s[0]
    ldr         x4, [x1]
fmla        v9.4s, v1.4s, v4.s[0]
        ldr         x20, [x2]
fmla        v10.4s, v2.4s, v4.s[0]
    ldr         x5, [x1, #8]

fmla        v11.4s, v0.4s, v4.s[1]
        ldr         x21, [x2, #8]
fmla        v12.4s, v1.4s, v4.s[1]
    ldr         x6, [x1, #16]
fmla        v13.4s, v2.4s, v4.s[1]
        ldr         x22, [x2, #16]

fmla        v14.4s, v0.4s, v4.s[2]
    ldr         x7, [x1, #24]
fmla        v15.4s, v1.4s, v4.s[2]
        ldr         x23, [x2, #24]
fmla        v16.4s, v2.4s, v4.s[2]
    ldr         x8, [x1, #32]
fmla        v17.4s, v0.4s, v4.s[3]
fmla        v18.4s, v1.4s, v4.s[3]
    ldr         x9, [x1, #40]
fmla        v19.4s, v2.4s, v4.s[3]

fmla        v20.4s, v0.4s, v5.s[0]
fmla        v21.4s, v1.4s, v5.s[0]
fmla        v22.4s, v2.4s, v5.s[0]
fmla        v23.4s, v0.4s, v5.s[1]
fmla        v24.4s, v1.4s, v5.s[1]
fmla        v25.4s, v2.4s, v5.s[1]

fmla        v26.4s, v0.4s, v5.s[2]
fmla        v27.4s, v1.4s, v5.s[2]
fmla        v28.4s, v2.4s, v5.s[2]
    prfm        pldl1keep, [x1, #512]
fmla        v29.4s, v0.4s, v5.s[3]
    add         x1, x1, #48
fmla        v30.4s, v1.4s, v5.s[3]
    prfm        pldl1keep, [x2, #384]
fmla        v31.4s, v2.4s, v5.s[3]
    add         x2, x2, #32


    ins         v0.d[0], x4
    ins         v2.d[0], x8

        ins         v4.d[0], x20
        ins         v5.d[0], x22

    ins         v0.d[1], x5
    ins         v2.d[1], x9

        ins         v4.d[1], x21
    ins         v1.d[0], x6

    ins         v1.d[1], x7

        ins         v5.d[1], x23


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_12x8/packed_packed_loop1/naive.tmpli
================================================
fmla        v8.4s, v0.4s, v4.s[0]
fmla        v9.4s, v1.4s, v4.s[0]
fmla        v10.4s, v2.4s, v4.s[0]

fmla        v11.4s, v0.4s, v4.s[1]
fmla        v12.4s, v1.4s, v4.s[1]
fmla        v13.4s, v2.4s, v4.s[1]

fmla        v14.4s, v0.4s, v4.s[2]
fmla        v15.4s, v1.4s, v4.s[2]
fmla        v16.4s, v2.4s, v4.s[2]

fmla        v17.4s, v0.4s, v4.s[3]
fmla        v18.4s, v1.4s, v4.s[3]
fmla        v19.4s, v2.4s, v4.s[3]

fmla        v20.4s, v0.4s, v5.s[0]
fmla        v21.4s, v1.4s, v5.s[0]
fmla        v22.4s, v2.4s, v5.s[0]

fmla        v23.4s, v0.4s, v5.s[1]
fmla        v24.4s, v1.4s, v5.s[1]
fmla        v25.4s, v2.4s, v5.s[1]

fmla        v26.4s, v0.4s, v5.s[2]
fmla        v27.4s, v1.4s, v5.s[2]
fmla        v28.4s, v2.4s, v5.s[2]

fmla        v29.4s, v0.4s, v5.s[3]
fmla        v30.4s, v1.4s, v5.s[3]
fmla        v31.4s, v2.4s, v5.s[3]

ld1         {{ v0.4s, v1.4s, v2.4s }}, [x1], #48
ld1         {{ v4.4s, v5.4s }}, [x2], #32


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_12x8/packed_packed_loop2/cortex_a55.tmpli
================================================
// mul a: v0, v1, v2, b: v4, v5
// load a: d3/x23, d6/x26, d7/x27
// load b: x4, x5, x6, x7

fmla        v8.4s,  v0.4s, v4.s[0] 
ldr         d3, [x1], #8
fmla        v9.4s,  v1.4s, v4.s[0]
ldr         x4, [x2], #8
fmla        v10.4s, v2.4s, v4.s[0]
ldr         x23, [x1], #8
fmla        v11.4s, v0.4s, v4.s[1]
ldr         x5, [x2], #8
fmla        v12.4s, v1.4s, v4.s[1]
ldr         d6, [x1], #8
fmla        v13.4s, v2.4s, v4.s[1]
ldr         x6, [x2], #8
fmla        v14.4s, v0.4s, v4.s[2]
ldr         x26, [x1], #8
fmla        v15.4s, v1.4s, v4.s[2]
ldr         x7, [x2], #8
fmla        v16.4s, v2.4s, v4.s[2]
ldr         d7, [x1], #8
fmla        v17.4s, v0.4s, v4.s[3]
ldr         x27, [x1], #8
fmla        v18.4s, v1.4s, v4.s[3]

fmla        v19.4s, v2.4s, v4.s[3]
fmla        v20.4s, v0.4s, v5.s[0]

// ins b: v4 <- x4/x5
// ins a: d3/x23, d6/x26, d7/x27

ins         v4.d[0], x4
fmla        v21.4s, v1.4s, v5.s[0]
ins         v4.d[1], x5
fmla        v22.4s, v2.4s, v5.s[0]
fmla        v23.4s, v0.4s, v5.s[1]

fmla        v24.4s, v1.4s, v5.s[1]
fmla        v25.4s, v2.4s, v5.s[1]
fmla        v26.4s, v0.4s, v5.s[2]
fmla        v27.4s, v1.4s, v5.s[2]
fmla        v28.4s, v2.4s, v5.s[2]
fmla        v29.4s, v0.4s, v5.s[3]
ins         v3.d[1], x23
fmla        v30.4s, v1.4s, v5.s[3]
ins         v6.d[1], x26
fmla        v31.4s, v2.4s, v5.s[3]
ins         v7.d[1], x27

// mul a: v3, v6, v7, b: v4, v5
// ins b, v5 <- x6, x7
// load a: d0/x20, d1/x21, d2/x22
// load b: x4, x5

fmla        v8.4s,  v3.4s, v4.s[0] 
ins         v5.d[0], x6
fmla        v9.4s,  v6.4s, v4.s[0]
ins         v5.d[1], x7
fmla        v10.4s, v7.4s, v4.s[0]
ldr         d0, [x1], #8
fmla        v11.4s, v3.4s, v4.s[1]
ldr         x4, [x2], #8
fmla        v12.4s, v6.4s, v4.s[1]
ldr         x20, [x1], #8
fmla        v13.4s, v7.4s, v4.s[1]
ldr         x5, [x2], #8
fmla        v14.4s, v3.4s, v4.s[2]
ldr         d1, [x1], #8
fmla        v15.4s, v6.4s, v4.s[2]
ldr         x6, [x2], #8
fmla        v16.4s, v7.4s, v4.s[2]
ldr         x21, [x1], #8
fmla        v17.4s, v3.4s, v4.s[3]
ldr         x7, [x2], #8

// load b: x6, x7
fmla        v18.4s, v6.4s, v4.s[3]
ldr         d2, [x1], #8
fmla        v19.4s, v7.4s, v4.s[3]
ldr         x22, [x1], #8
fmla        v20.4s, v3.4s, v5.s[0]
fmla        v21.4s, v6.4s, v5.s[0]
fmla        v22.4s, v7.4s, v5.s[0]
fmla        v23.4s, v3.4s, v5.s[1]
fmla        v24.4s, v6.4s, v5.s[1]
fmla        v25.4s, v7.4s, v5.s[1]

// ins a: d0/x20, d1/x21, d2/x22
fmla        v26.4s, v3.4s, v5.s[2]
ins         v0.d[1], x20
fmla        v27.4s, v6.4s, v5.s[2]
ins         v1.d[1], x21
fmla        v28.4s, v7.4s, v5.s[2]
ins         v2.d[1], x22

// ins b: v4 <- x4, x5
fmla        v29.4s, v3.4s, v5.s[3]
ins         v4.d[0], x4
fmla        v30.4s, v6.4s, v5.s[3]
ins         v4.d[1], x5
fmla        v31.4s, v7.4s, v5.s[3]

// ins b: v5 <- x6, x7
ins         v5.d[0], x6
ins         v5.d[1], x7


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_12x8_core.tmpl
================================================
// vim: ft=arm

// C tile regs: 
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve
//
// v8  v11 v14 v17 v20 v23 v26 v29
// v9  v12 v15 v18 v21 v24 v27 v30
// v10 v13 v16 v19 v22 v25 v28 v31

// no preservation for v0-v7:
// packed A buffering (2x8 values): rotating over v0..v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_f32_12x8_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_12x8_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    ld1         { v0.4s, v1.4s, v2.4s }, [ x1 ], #48
    ld1         { v4.4s, v5.4s }, [ x2 ], #32

{% capture packed_packed_loop1 %}
    {% if core == "a53" %}
        {% include "arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_x_preload.tmpli" %}
    {% else %}
        {% include "arm64simd_mmm_f32_12x8/packed_packed_loop1/naive.tmpli" %}
    {% endif %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% if core == "a55" %}
        {% include "arm64simd_mmm_f32_12x8/packed_packed_loop2/cortex_a55.tmpli" %}
    {% else %}
        {{ packed_packed_loop1 }}
        {{ packed_packed_loop1 }}
    {% endif %}
{% endcapture %}

    cmp         x3, #4
    blt         .packed_packed_loop_1

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub x3, x3, #4
    cmp x3, #4
    bge .packed_packed_loop_4

    cmp x3, #0
    beq .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}
    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

{% include "arm64simd_mmm_f32_scalars.tmpliq" from:8, to:31%}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:12, from:8, to:31 %}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:12, from:8, to:31 %}
{% include "arm64simd_mmm_load_tile.tmpliq" from:8, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8 ]           // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    {% for col in (0..7) %}
        mov x4, x5
        {% for reg in (0..2) %}
            {% for lane in (0..3) %}
                ld1 {v0.s}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:3 | plus: 8| plus: reg}}.4s, v{{col | times:3 | plus: 8 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.4s, v1.4s, v2.4s }, [ x2 ]
    ld1         { v4.4s, v5.4s }, [ x3 ]

    {% for col in (0..7) %}
        {% for reg in (0..2) %}
            fmla v{{col | times:3 | plus: 8 | plus: reg}}.4s, v{{reg}}.4s, v{{col| divided_by:4 | plus: 4}}.s[{{col| modulo: 4}}]
        {% endfor %}
    {% endfor %}

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x6, #4
    bne         .store_strides_generic

    {% for col in (0..7) %}
        str q{{col | times:3 | plus: 8 }}, [ x5 ]
        str q{{col | times:3 | plus: 9}}, [ x5, #16 ]
        str q{{col | times:3 | plus: 10}}, [ x5, #32 ]
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_generic:
    {% for col in (0..7) %}
        mov x4, x5
        {% for reg in (0..2) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:3 | plus: 8 | plus: reg}}.s }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_16x4/packed_packed_loop1/cortex_a53.tmpli
================================================
fmla        v16.4s, v0.4s, v4.s[0]
ldr         x5, [x1]
fmla        v17.4s, v1.4s, v4.s[0]
ldr         x6, [x1, #8]
fmla        v18.4s, v2.4s, v4.s[0]
ldr         x7, [x1, #16]
fmla        v19.4s, v3.4s, v4.s[0]
ldr         x8, [x1, #24]
fmla        v20.4s, v0.4s, v4.s[1]
ldr         x9, [x1, #32]
fmla        v21.4s, v1.4s, v4.s[1]
ldr         x10, [x1, #40]
fmla        v22.4s, v2.4s, v4.s[1]
ldr         x11, [x1, #48]
fmla        v23.4s, v3.4s, v4.s[1]
ldr         x12, [x1, #56]

fmla        v24.4s, v0.4s, v4.s[2]
ldr         x24, [x2]
fmla        v25.4s, v1.4s, v4.s[2]
ldr         x25, [x2, #8]
fmla        v26.4s, v2.4s, v4.s[2]
add         x1, x1, #64
fmla        v27.4s, v3.4s, v4.s[2]
add         x2, x2, #16
fmla        v28.4s, v0.4s, v4.s[3]
prfm        pldl1keep, [x1, #256]
fmla        v29.4s, v1.4s, v4.s[3]
prfm        pldl1keep, [x2, #256]
fmla        v30.4s, v2.4s, v4.s[3]
prfm        pldl1keep, [x1, #256]
fmla        v31.4s, v3.4s, v4.s[3]

ins         v0.d[0], x5
ins         v2.d[0], x9
ins         v1.d[0], x7
ins         v3.d[0], x11
ins         v4.d[0], x24

ins         v0.d[1], x6
ins         v2.d[1], x10
ins         v1.d[1], x8
ins         v3.d[1], x12
ins         v4.d[1], x25


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_16x4/packed_packed_loop1/naive.tmpli
================================================

fmla        v16.4s, v0.4s, v4.s[0]
fmla        v17.4s, v1.4s, v4.s[0]
fmla        v18.4s, v2.4s, v4.s[0]
fmla        v19.4s, v3.4s, v4.s[0]
fmla        v20.4s, v0.4s, v4.s[1]
fmla        v21.4s, v1.4s, v4.s[1]
fmla        v22.4s, v2.4s, v4.s[1]
fmla        v23.4s, v3.4s, v4.s[1]

fmla        v24.4s, v0.4s, v4.s[2]
fmla        v25.4s, v1.4s, v4.s[2]
fmla        v26.4s, v2.4s, v4.s[2]
fmla        v27.4s, v3.4s, v4.s[2]
fmla        v28.4s, v0.4s, v4.s[3]
fmla        v29.4s, v1.4s, v4.s[3]
fmla        v30.4s, v2.4s, v4.s[3]
fmla        v31.4s, v3.4s, v4.s[3]

ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [ x1 ], #64
ld1         {{ v4.4s }}, [ x2 ], #16


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_16x4/packed_packed_loop2/cortex_a55.tmpli
================================================
// mul a: v0, v1, v2, v3 b: v4
// load a: v5(d5/x5), v6(d6,x6), v7(d7,x7), v8(d8, x8)
// load b: v9(d9/x9)

fmla        v16.4s, v0.4s, v4.s[0]
ldr         d5, [x1], #8
fmla        v17.4s, v1.4s, v4.s[0]
ldr         d9, [x2], #8
fmla        v18.4s, v2.4s, v4.s[0]
ldr         x5, [x1], #8
fmla        v19.4s, v3.4s, v4.s[0]
ldr         x9, [x2], #8
fmla        v20.4s, v0.4s, v4.s[1]
ldr         d6, [x1], #8
fmla        v21.4s, v1.4s, v4.s[1]
ldr         x6, [x1], #8
fmla        v22.4s, v2.4s, v4.s[1]
ldr         d7, [x1], #8
fmla        v23.4s, v3.4s, v4.s[1]
ldr         x7, [x1], #8

fmla        v24.4s, v0.4s, v4.s[2]
ldr         d8, [x1], #8
fmla        v25.4s, v1.4s, v4.s[2]
ldr         x8, [x1], #8
fmla        v26.4s, v2.4s, v4.s[2]
ins         v5.d[1], x5
fmla        v27.4s, v3.4s, v4.s[2]
ins         v6.d[1], x6
fmla        v28.4s, v0.4s, v4.s[3]
ins         v7.d[1], x7
fmla        v29.4s, v1.4s, v4.s[3]
ins         v8.d[1], x8
fmla        v30.4s, v2.4s, v4.s[3]
ins         v9.d[1], x9
fmla        v31.4s, v3.4s, v4.s[3]

// mul a: v5, v6, v7, v8 b: v9
// load a: v0(d0/x5), v1(d1,x6), v2(d2,x7), v3(d3, x8)
// load b: v4(d4/x9)

fmla        v16.4s, v5.4s, v9.s[0]
ldr         d0, [x1], #8
fmla        v17.4s, v6.4s, v9.s[0]
ldr         d4, [x2], #8
fmla        v18.4s, v7.4s, v9.s[0]
ldr         x5, [x1], #8
fmla        v19.4s, v8.4s, v9.s[0]
ldr         x9, [x2], #8
fmla        v20.4s, v5.4s, v9.s[1]
ldr         d1, [x1], #8
fmla        v21.4s, v6.4s, v9.s[1]
ldr         x6, [x1], #8
fmla        v22.4s, v7.4s, v9.s[1]
ldr         d2, [x1], #8
fmla        v23.4s, v8.4s, v9.s[1]
ldr         x7, [x1], #8

fmla        v24.4s, v5.4s, v9.s[2]
ldr         d3, [x1], #8
fmla        v25.4s, v6.4s, v9.s[2]
ldr         x8, [x1], #8
fmla        v26.4s, v7.4s, v9.s[2]
ins         v0.d[1], x5
fmla        v27.4s, v8.4s, v9.s[2]
ins         v1.d[1], x6
fmla        v28.4s, v5.4s, v9.s[3]
ins         v2.d[1], x7
fmla        v29.4s, v6.4s, v9.s[3]
ins         v3.d[1], x8
fmla        v30.4s, v7.4s, v9.s[3]
ins         v4.d[1], x9
fmla        v31.4s, v8.4s, v9.s[3]


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_16x4_core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, (scratch)
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve

// v8 is used, d8 (lower half) must preserved
// v0-v7 (scratch registers)
//  packed A buffering (2x8 values): alternating v0, v1 with v2, v3
//  packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_f32_16x4_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_16x4_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s }, [ x2 ], #16

{% capture packed_packed_loop1 %}
    {% if core == "a53" %}
        {% include "arm64simd_mmm_f32_16x4/packed_packed_loop1/cortex_a53.tmpli" %}
    {% else %}
        {% include "arm64simd_mmm_f32_16x4/packed_packed_loop1/naive.tmpli" %}
    {% endif %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% if core == "a55" %}
        {% include "arm64simd_mmm_f32_16x4/packed_packed_loop2/cortex_a55.tmpli" %}
    {% else %}
        {{ packed_packed_loop1 }}
        {{ packed_packed_loop1 }}
    {% endif %}
{% endcapture %}

    cmp         x3, #4
    blt         .packed_packed_loop_1

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub x3, x3, #4
    cmp x3, #4
    bge .packed_packed_loop_4

    cmp x3, #0
    beq .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}
    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b   .non_linear_loop

{% include "arm64simd_mmm_f32_scalars.tmpliq" from:16, to:31%}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:16, from:16, to:31 %}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:16, from:16, to:31 %}
{% include "arm64simd_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (0..3) %}
        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..3) %}
                ld1 {v0.s}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:4 | plus: 16| plus: reg}}.4s, v{{col | times:4 | plus: 16 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x2 ]
    ld1         { v4.4s }, [ x3 ]

    fmla        v16.4s, v0.4s, v4.s[0]
    fmla        v17.4s, v1.4s, v4.s[0]
    fmla        v18.4s, v2.4s, v4.s[0]
    fmla        v19.4s, v3.4s, v4.s[0]
    fmla        v20.4s, v0.4s, v4.s[1]
    fmla        v21.4s, v1.4s, v4.s[1]
    fmla        v22.4s, v2.4s, v4.s[1]
    fmla        v23.4s, v3.4s, v4.s[1]

    fmla        v24.4s, v0.4s, v4.s[2]
    fmla        v25.4s, v1.4s, v4.s[2]
    fmla        v26.4s, v2.4s, v4.s[2]
    fmla        v27.4s, v3.4s, v4.s[2]
    fmla        v28.4s, v0.4s, v4.s[3]
    fmla        v29.4s, v1.4s, v4.s[3]
    fmla        v30.4s, v2.4s, v4.s[3]
    fmla        v31.4s, v3.4s, v4.s[3]

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x6, #4
    bne           .store_strides_generic

    {% for col in (0..3) %}
        str q{{col | times:4 | plus:16 | plus: 0}}, [ x5 ]
        str q{{col | times:4 | plus:16 | plus: 1}}, [ x5, #16 ]
        str q{{col | times:4 | plus:16 | plus: 2}}, [ x5, #32 ]
        str q{{col | times:4 | plus:16 | plus: 3}}, [ x5, #48 ]
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_generic:

    {% for col in (0..3) %}
        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:4 | plus: 16 | plus: reg}}.s }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_24x4/loop2/cortex_a55.tmpli
================================================
// mul a: v0, v1, v2, v3, v4, v5 b: v7
// load a: v5(d5/x5), v6(d6,x6), v7(d7,x7), v8(d8, x8)
// load b: v9(d9/x9)

fmla        v16.4s, v0.4s, v4.s[0]
ldr         d5, [x1], #8
fmla        v17.4s, v1.4s, v4.s[0]
ldr         d9, [x2], #8
fmla        v18.4s, v2.4s, v4.s[0]
ldr         x5, [x1], #8
fmla        v19.4s, v3.4s, v4.s[0]
ldr         x9, [x2], #8
fmla        v20.4s, v0.4s, v4.s[1]
ldr         d6, [x1], #8
fmla        v21.4s, v1.4s, v4.s[1]
ldr         x6, [x1], #8
fmla        v22.4s, v2.4s, v4.s[1]
ldr         d7, [x1], #8
fmla        v23.4s, v3.4s, v4.s[1]
ldr         x7, [x1], #8

fmla        v24.4s, v0.4s, v4.s[2]
ldr         d8, [x1], #8
fmla        v25.4s, v1.4s, v4.s[2]
ldr         x8, [x1], #8
fmla        v26.4s, v2.4s, v4.s[2]
ins         v5.d[1], x5
fmla        v27.4s, v3.4s, v4.s[2]
ins         v6.d[1], x6
fmla        v28.4s, v0.4s, v4.s[3]
ins         v7.d[1], x7
fmla        v29.4s, v1.4s, v4.s[3]
ins         v8.d[1], x8
fmla        v30.4s, v2.4s, v4.s[3]
ins         v9.d[1], x9
fmla        v31.4s, v3.4s, v4.s[3]

// mul a: v5, v6, v7, v8 b: v9
// load a: v0(d0/x5), v1(d1,x6), v2(d2,x7), v3(d3, x8)
// load b: v4(d4/x9)

fmla        v16.4s, v5.4s, v9.s[0]
ldr         d0, [x1], #8
fmla        v17.4s, v6.4s, v9.s[0]
ldr         d4, [x2], #8
fmla        v18.4s, v7.4s, v9.s[0]
ldr         x5, [x1], #8
fmla        v19.4s, v8.4s, v9.s[0]
ldr         x9, [x2], #8
fmla        v20.4s, v5.4s, v9.s[1]
ldr         d1, [x1], #8
fmla        v21.4s, v6.4s, v9.s[1]
ldr         x6, [x1], #8
fmla        v22.4s, v7.4s, v9.s[1]
ldr         d2, [x1], #8
fmla        v23.4s, v8.4s, v9.s[1]
ldr         x7, [x1], #8

fmla        v24.4s, v5.4s, v9.s[2]
ldr         d3, [x1], #8
fmla        v25.4s, v6.4s, v9.s[2]
ldr         x8, [x1], #8
fmla        v26.4s, v7.4s, v9.s[2]
ins         v0.d[1], x5
fmla        v27.4s, v8.4s, v9.s[2]
ins         v1.d[1], x6
fmla        v28.4s, v5.4s, v9.s[3]
ins         v2.d[1], x7
fmla        v29.4s, v6.4s, v9.s[3]
ins         v3.d[1], x8
fmla        v30.4s, v7.4s, v9.s[3]
ins         v4.d[1], x9
fmla        v31.4s, v8.4s, v9.s[3]


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_24x4/packed_packed_loop1/cortex_a53.tmpli
================================================

fmla        v8.4s, v0.4s, v7.s[0]
    ldr         x4, [x1]
fmla        v9.4s, v1.4s, v7.s[0]
    ldr         x5, [x1, #8]
fmla        v10.4s, v2.4s, v7.s[0]
    ldr         x6, [x1, #16]
fmla        v11.4s, v3.4s, v7.s[0]
    ldr         x7, [x1, #24]
fmla        v12.4s, v4.4s, v7.s[0]
    ldr         x8, [x1, #32]
fmla        v13.4s, v5.4s, v7.s[0]
    ldr         x9, [x1, #40]

fmla        v14.4s, v0.4s, v7.s[1]
    ldr         x10, [x1, #48]
fmla        v15.4s, v1.4s, v7.s[1]
    ldr         x11, [x1, #56]
fmla        v16.4s, v2.4s, v7.s[1]
    ldr         x12, [x1, #64]
fmla        v17.4s, v3.4s, v7.s[1]
    ldr         x13, [x1, #72]
fmla        v18.4s, v4.4s, v7.s[1]
    ldr         x14, [x1, #80]
fmla        v19.4s, v5.4s, v7.s[1]
    ldr         x15, [x1, #88]

fmla        v20.4s, v0.4s, v7.s[2]
        ldr         x20, [x2]
fmla        v21.4s, v1.4s, v7.s[2]
        ldr         x21, [x2, #8]
fmla        v22.4s, v2.4s, v7.s[2]
    add         x1, x1, #96
fmla        v23.4s, v3.4s, v7.s[2]
        add         x2, x2, #16
fmla        v24.4s, v4.4s, v7.s[2]
    prfm        pldl1keep, [x1, #256]
fmla        v25.4s, v5.4s, v7.s[2]
        prfm        pldl1keep, [x2, #256]

fmla        v26.4s, v0.4s, v7.s[3]
    prfm        pldl1keep, [x1, #320]
fmla        v27.4s, v1.4s, v7.s[3]
fmla        v28.4s, v2.4s, v7.s[3]
fmla        v29.4s, v3.4s, v7.s[3]
fmla        v30.4s, v4.4s, v7.s[3]
fmla        v31.4s, v5.4s, v7.s[3]

ins         v0.d[0], x4
ins         v1.d[0], x6
ins         v2.d[0], x8
ins         v3.d[0], x10
ins         v4.d[0], x12
ins         v5.d[0], x14
ins         v7.d[0], x20

ins         v0.d[1], x5
ins         v1.d[1], x7
ins         v2.d[1], x9
ins         v3.d[1], x11
ins         v4.d[1], x13
ins         v5.d[1], x15
ins         v7.d[1], x21


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_24x4/packed_packed_loop1/cortex_a55.tmpli
================================================
fmla        v8.4s, v0.4s, v7.s[0]
fmla        v14.4s, v0.4s, v7.s[1]
        prfm        pldl1keep, [x2, #256]
fmla        v20.4s, v0.4s, v7.s[2]
fmla        v26.4s, v0.4s, v7.s[3]
    ldr         d0, [x1], #8
fmla        v9.4s, v1.4s, v7.s[0]
    ldr         x5, [x1], #8
fmla        v15.4s, v1.4s, v7.s[1]
        ldr         x20, [x2], #8
fmla        v21.4s, v1.4s, v7.s[2]
        ldr         x21, [x2], #8
fmla        v27.4s, v1.4s, v7.s[3]
    ldr         d1, [x1], #8
fmla        v10.4s, v2.4s, v7.s[0]
    ldr         x7, [x1], #8
fmla        v16.4s, v2.4s, v7.s[1]
    prfm        pldl1keep, [x1, #256]
fmla        v22.4s, v2.4s, v7.s[2]
    prfm        pldl1keep, [x1, #320]
fmla        v28.4s, v2.4s, v7.s[3]
    ldr         d2, [x1], #8
fmla        v11.4s, v3.4s, v7.s[0]
    ldr         x9, [x1], #8
fmla        v17.4s, v3.4s, v7.s[1]
    ins         v0.d[1], x5
fmla        v23.4s, v3.4s, v7.s[2]
    ins         v1.d[1], x7
fmla        v29.4s, v3.4s, v7.s[3]
    ldr         d3, [x1], #8
fmla        v12.4s, v4.4s, v7.s[0]
    ldr         x11, [x1], #8
fmla        v18.4s, v4.4s, v7.s[1]
    ins         v2.d[1], x9
fmla        v24.4s, v4.4s, v7.s[2]
fmla        v30.4s, v4.4s, v7.s[3]
    ldr         d4, [x1], #8
fmla        v13.4s, v5.4s, v7.s[0]
    ldr         x13, [x1], #8
fmla        v19.4s, v5.4s, v7.s[1]
    ldr         x14, [x1], #8
fmla        v25.4s, v5.4s, v7.s[2]
    ldr         x15, [x1], #8
fmla        v31.4s, v5.4s, v7.s[3]

ins         v7.d[0], x20
ins         v7.d[1], x21

ins         v5.d[0], x14
ins         v5.d[1], x15

ins         v3.d[1], x11
ins         v4.d[1], x13


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_24x4/packed_packed_loop1/naive.tmpli
================================================
fmla        v8.4s, v0.4s, v7.s[0]
fmla        v9.4s, v1.4s, v7.s[0]
fmla        v10.4s, v2.4s, v7.s[0]
fmla        v11.4s, v3.4s, v7.s[0]
fmla        v12.4s, v4.4s, v7.s[0]
fmla        v13.4s, v5.4s, v7.s[0]

fmla        v14.4s, v0.4s, v7.s[1]
fmla        v15.4s, v1.4s, v7.s[1]
fmla        v16.4s, v2.4s, v7.s[1]
fmla        v17.4s, v3.4s, v7.s[1]
fmla        v18.4s, v4.4s, v7.s[1]
fmla        v19.4s, v5.4s, v7.s[1]

fmla        v20.4s, v0.4s, v7.s[2]
fmla        v21.4s, v1.4s, v7.s[2]
fmla        v22.4s, v2.4s, v7.s[2]
fmla        v23.4s, v3.4s, v7.s[2]
fmla        v24.4s, v4.4s, v7.s[2]
fmla        v25.4s, v5.4s, v7.s[2]

fmla        v26.4s, v0.4s, v7.s[3]
fmla        v27.4s, v1.4s, v7.s[3]
fmla        v28.4s, v2.4s, v7.s[3]
fmla        v29.4s, v3.4s, v7.s[3]
fmla        v30.4s, v4.4s, v7.s[3]
fmla        v31.4s, v5.4s, v7.s[3]

ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [ x1 ], #64
ld1         {{ v4.4s, v5.4s }}, [ x1 ], #32
ld1         {{ v7.4s }}, [ x2 ], #16


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_24x4_core.tmpl
================================================
// vim: ft=arm

// x20..x27 are used, callee-preserved

// C tile regs: v8 to v31, (scratch)
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve

// v8 is used, d8 (lower half) must preserved
// v0-v7 (scratch registers)
//  packed A buffering (2x8 values): alternating v0, v1 with v2, v3
//  packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_f32_24x4_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_24x4_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s }, [ x1 ], #32
    ld1         { v7.4s }, [ x2 ], #16

{% capture packed_packed_loop1 %}
    {% if core == "a53" %}
        {% include "arm64simd_mmm_f32_24x4/packed_packed_loop1/cortex_a53.tmpli" %}
    {% elsif core == "a55" %}
        {% include "arm64simd_mmm_f32_24x4/packed_packed_loop1/cortex_a55.tmpli" %}
    {% else %}
        {% include "arm64simd_mmm_f32_24x4/packed_packed_loop1/naive.tmpli" %}
    {% endif %}
{% endcapture %}

    cmp         x3, #4
    blt         .packed_packed_loop_1

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop1 }}
    {{ packed_packed_loop1 }}
    {{ packed_packed_loop1 }}
    {{ packed_packed_loop1 }}

    sub x3, x3, #4
    cmp x3, #4
    bge .packed_packed_loop_4

    cmp x3, #0
    beq .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}
    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b   .non_linear_loop

{% include "arm64simd_mmm_f32_scalars.tmpliq" from:8, to:31 %}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:24, from:8, to:31 %}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:24, from:8, to:31 %}
{% include "arm64simd_mmm_load_tile.tmpliq" from:8, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (0..3) %}
        mov x4, x5
        {% for reg in (0..5) %}
            {% for lane in (0..3) %}
                ld1 {v0.s}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:6 | plus: 8 | plus: reg}}.4s, v{{col | times:6 | plus: 8 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x2 ], #64
    ld1         { v7.4s }, [ x3 ]
    ld1         { v4.4s, v5.4s }, [ x2 ]

    fmla        v8.4s, v0.4s, v7.s[0]
    fmla        v9.4s, v1.4s, v7.s[0]
    fmla        v10.4s, v2.4s, v7.s[0]
    fmla        v11.4s, v3.4s, v7.s[0]
    fmla        v12.4s, v4.4s, v7.s[0]
    fmla        v13.4s, v5.4s, v7.s[0]

    fmla        v14.4s, v0.4s, v7.s[1]
    fmla        v15.4s, v1.4s, v7.s[1]
    fmla        v16.4s, v2.4s, v7.s[1]
    fmla        v17.4s, v3.4s, v7.s[1]
    fmla        v18.4s, v4.4s, v7.s[1]
    fmla        v19.4s, v5.4s, v7.s[1]

    fmla        v20.4s, v0.4s, v7.s[2]
    fmla        v21.4s, v1.4s, v7.s[2]
    fmla        v22.4s, v2.4s, v7.s[2]
    fmla        v23.4s, v3.4s, v7.s[2]
    fmla        v24.4s, v4.4s, v7.s[2]
    fmla        v25.4s, v5.4s, v7.s[2]

    fmla        v26.4s, v0.4s, v7.s[3]
    fmla        v27.4s, v1.4s, v7.s[3]
    fmla        v28.4s, v2.4s, v7.s[3]
    fmla        v29.4s, v3.4s, v7.s[3]
    fmla        v30.4s, v4.4s, v7.s[3]
    fmla        v31.4s, v5.4s, v7.s[3]

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x6, #4
    bne           .store_strides_generic

    {% for col in (0..3) %}
        str q{{col | times:6 | plus:8 | plus: 0}}, [ x5 ]
        str q{{col | times:6 | plus:8 | plus: 1}}, [ x5, #16 ]
        str q{{col | times:6 | plus:8 | plus: 2}}, [ x5, #32 ]
        str q{{col | times:6 | plus:8 | plus: 3}}, [ x5, #48 ]
        str q{{col | times:6 | plus:8 | plus: 4}}, [ x5, #64 ]
        str q{{col | times:6 | plus:8 | plus: 5}}, [ x5, #80 ]
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_generic:

    {% for col in (0..3) %}
        mov x4, x5
        {% for reg in (0..5) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:6 | plus:8 | plus: reg}}.s }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_32x1_core.tmpl
================================================
// vim: ft=arm

// C tile regs:
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve
// 
//      v16[0] v18[0] v20[0] v22[0] v24[0] v26[0] v28[0] v30[0]
//      v16[1] v18[1] 
//      v16[2] v18[2] 
//      v16[3] v18[3]
//                     
//      v17[0] v19[0] v21[0] v23[0] v25[0] v27[0] v29[0] v31[0]
//      v17[1] v19[1] 
//      v17[2] v19[2] 
//      v17[3] v19[3] 

// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_f32_32x1_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_32x1_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    cmp         x4, #1
    beq         .q4f16se
    cmp         x4, #2
    beq         .q4f32se
    cmp         x4, #3
    beq         .f16f16
    cmp         x4, #4
    beq         .f32f16
    cmp         x4, #5
    beq         .f16f32

    sub         x3, x3, #1

.p2align 4
.packed_packed_loop_1:
    ld1         { v8.s }[0], [ x2 ], #4
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s, v6.4s, v7.4s }, [ x1 ], #64

    fmla        v24.4s, v0.4s, v8.s[0]
    fmla        v25.4s, v1.4s, v8.s[0]
    fmla        v26.4s, v2.4s, v8.s[0]
    fmla        v27.4s, v3.4s, v8.s[0]
    fmla        v28.4s, v4.4s, v8.s[0]
    fmla        v29.4s, v5.4s, v8.s[0]
    fmla        v30.4s, v6.4s, v8.s[0]
    fmla        v31.4s, v7.4s, v8.s[0]

    subs        x3, x3, #1
    bge         .packed_packed_loop_1

    b           .non_linear_loop

.p2align 8
.q40f16_const:
    .byte 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc2, 0xc0, 0xbc
    .byte 0x00, 0x3c, 0x40, 0x42, 0x44, 0x45, 0x46, 0x47

.q4f16se:
    adr      x4, .q40f16_const
    movi     v15.16b, 15
    ld1      {v13.16b}, [ x4 ]
    eor      v12.16b, v12.16b, v12.16b

.q4f16se_outerloop:
{% for i in (0..7) %}
    eor      v{{i|plus:16}}.16b, v{{i|plus:16}}.16b, v{{i|plus:16}}.16b
{% endfor %}
    mov         x4, #32

.p2align 4
.q4f16se_innerloop:
        ld1      { v10.16b }, [ x1 ], #16
        ld1      { v11.h }[0], [ x2 ], #2

        and      v9.16b, v10.16b, v15.16b
        ushr     v10.16b, v10.16b, 4

        tbl      v9.16b, { v13.16b }, v9.16b
        tbl      v10.16b, { v13.16b }, v10.16b

        zip1     v0.16b, v12.16b, v9.16b
        zip2     v2.16b, v12.16b, v9.16b
        zip1     v4.16b, v12.16b, v10.16b
        zip2     v6.16b, v12.16b, v10.16b

        fcvtl    v11.4s, v11.4h

        fcvtl2   v1.4s, v0.8h
        fcvtl2   v3.4s, v2.8h
        fcvtl2   v5.4s, v4.8h
        fcvtl2   v7.4s, v6.8h
        fcvtl    v0.4s, v0.4h
        fcvtl    v2.4s, v2.4h
        fcvtl    v4.4s, v4.4h
        fcvtl    v6.4s, v6.4h

{% for i in (0..7) %}
        fmla        v{{ i|plus: 16 }}.4s, v{{i}}.4s, v11.s[0]
{% endfor %}

    subs        x4, x4, #1
    bne         .q4f16se_innerloop

    // scales
    ld1         { v0.8h-v3.8h }, [ x1 ], #64

    fcvtl       v4.4s, v0.4h
    fcvtl2      v5.4s, v0.8h
    fcvtl       v6.4s, v1.4h
    fcvtl2      v7.4s, v1.8h
    fcvtl       v8.4s, v2.4h
    fcvtl2      v9.4s, v2.8h
    fcvtl       v10.4s, v3.4h
    fcvtl2      v11.4s, v3.8h

{% for i in (0..7) %}
       fmla     v{{i|plus:24}}.4s, v{{i|plus:4}}.4s, v{{i|plus:16}}.4s
{% endfor %}

    subs        x3, x3, #32
    bne         .q4f16se_outerloop

    b           .non_linear_loop

.q4f32se:
    adr      x4, .q40f16_const
    movi     v15.16b, 15
    ld1      {v13.16b}, [ x4 ]
    eor      v12.16b, v12.16b, v12.16b

.q4f32se_outerloop:
{% for i in (0..7) %}
    eor      v{{i|plus:16}}.16b, v{{i|plus:16}}.16b, v{{i|plus:16}}.16b
{% endfor %}
    mov         x4, #32

.p2align 4
.q4f32se_innerloop:
        ld1      { v10.16b }, [ x1 ], #16
        ld1      { v11.s }[0], [ x2 ], #4

        and      v9.16b, v10.16b, v15.16b
        ushr     v10.16b, v10.16b, 4

        tbl      v9.16b, { v13.16b }, v9.16b
        tbl      v10.16b, { v13.16b }, v10.16b

        zip1     v0.16b, v12.16b, v9.16b
        zip2     v2.16b, v12.16b, v9.16b
        zip1     v4.16b, v12.16b, v10.16b
        zip2     v6.16b, v12.16b, v10.16b

        fcvtl2   v1.4s, v0.8h
        fcvtl2   v3.4s, v2.8h
        fcvtl2   v5.4s, v4.8h
        fcvtl2   v7.4s, v6.8h
        fcvtl    v0.4s, v0.4h
        fcvtl    v2.4s, v2.4h
        fcvtl    v4.4s, v4.4h
        fcvtl    v6.4s, v6.4h

{% for i in (0..7) %}
        fmla        v{{ i|plus: 16 }}.4s, v{{i}}.4s, v11.s[0]
{% endfor %}

    subs        x4, x4, #1
    bne         .q4f32se_innerloop

    // scales
    ld1         { v0.8h-v3.8h }, [ x1 ], #64

    fcvtl       v4.4s, v0.4h
    fcvtl2      v5.4s, v0.8h
    fcvtl       v6.4s, v1.4h
    fcvtl2      v7.4s, v1.8h
    fcvtl       v8.4s, v2.4h
    fcvtl2      v9.4s, v2.8h
    fcvtl       v10.4s, v3.4h
    fcvtl2      v11.4s, v3.8h

{% for i in (0..7) %}
       fmla     v{{i|plus:24}}.4s, v{{i|plus:4}}.4s, v{{i|plus:16}}.4s
{% endfor %}

    subs        x3, x3, #32
    bne         .q4f32se_outerloop

    b           .non_linear_loop

.p2align 4
.f16f16:
    sub         x3, x3, #1
.f16f16_loop:
    ld1         { v9.h }[0], [ x2 ], #2
    ld1         { v10.8h-v13.8h }, [ x1 ], #64

    fcvtl       v8.4s, v9.4h
    {% for reg in (0..3) %}
        fcvtl       v{{reg|times:2}}.4s, v{{reg|plus:10}}.4h
        fcvtl2      v{{reg|times:2|plus:1}}.4s, v{{reg|plus:10}}.8h
    {% endfor %}

    fmla        v24.4s, v0.4s, v8.s[0]
    fmla        v25.4s, v1.4s, v8.s[0]
    fmla        v26.4s, v2.4s, v8.s[0]
    fmla        v27.4s, v3.4s, v8.s[0]
    fmla        v28.4s, v4.4s, v8.s[0]
    fmla        v29.4s, v5.4s, v8.s[0]
    fmla        v30.4s, v6.4s, v8.s[0]
    fmla        v31.4s, v7.4s, v8.s[0]

    subs        x3, x3, #1
    bge         .f16f16_loop

    b           .non_linear_loop

.p2align 4
.f32f16:
    sub         x3, x3, #1
.f32f16_loop:
    ld1         { v9.h }[0], [ x2 ], #2
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s, v6.4s, v7.4s }, [ x1 ], #64

    fcvtl       v8.4s, v9.4h

    fmla        v24.4s, v0.4s, v8.s[0]
    fmla        v25.4s, v1.4s, v8.s[0]
    fmla        v26.4s, v2.4s, v8.s[0]
    fmla        v27.4s, v3.4s, v8.s[0]
    fmla        v28.4s, v4.4s, v8.s[0]
    fmla        v29.4s, v5.4s, v8.s[0]
    fmla        v30.4s, v6.4s, v8.s[0]
    fmla        v31.4s, v7.4s, v8.s[0]

    subs        x3, x3, #1
    bge         .f32f16_loop

    b           .non_linear_loop

.p2align 4
.f16f32:
    sub         x3, x3, #1
.f16f32_loop:
    ld1         { v8.s }[0], [ x2 ], #4
    ld1         { v10.8h-v13.8h }, [ x1 ], #64

    {% for reg in (0..3) %}
        fcvtl       v{{reg|times:2}}.4s, v{{reg|plus:10}}.4h
        fcvtl2      v{{reg|times:2|plus:1}}.4s, v{{reg|plus:10}}.8h
    {% endfor %}

    fmla        v24.4s, v0.4s, v8.s[0]
    fmla        v25.4s, v1.4s, v8.s[0]
    fmla        v26.4s, v2.4s, v8.s[0]
    fmla        v27.4s, v3.4s, v8.s[0]
    fmla        v28.4s, v4.4s, v8.s[0]
    fmla        v29.4s, v5.4s, v8.s[0]
    fmla        v30.4s, v6.4s, v8.s[0]
    fmla        v31.4s, v7.4s, v8.s[0]

    subs        x3, x3, #1
    bge         .f16f32_loop

    b           .non_linear_loop

{% include "arm64simd_mmm_f32_scalars.tmpliq" from:24, to:31%}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:32, from:24, to:31%}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:32, from:24, to:31%}
{% include "arm64simd_mmm_load_tile.tmpliq" from:24, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]           // c base ptr, rsc
    cmp         x6, #4
    beq         .do_per_row_add

    {% for reg in (24..31) %}
        {% for lane in (0..3) %}
            ld1 {v0.s}[{{lane}}], [ x5 ], x6
        {% endfor %}
        fadd v{{reg}}.4s, v{{reg}}.4s, v0.4s
    {% endfor %}

    b           .non_linear_loop

.do_per_row_add:
    ld1     {v0.4s-v3.4s}, [x5], #64
    ld1     {v4.4s-v7.4s}, [x5], #64

    {% for r in (0..7) %}
        fadd v{{r| plus: 24}}.4s, v{{r | plus: 24}}.4s, v{{r}}.4s
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x3, [x0, #16]
    ldr     x2, [x0, #8]

    ld1         {v8.s}[0], [ x3 ]
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x2 ], #64
    ld1         { v4.4s, v5.4s, v6.4s, v7.4s }, [ x2 ], #64

    fmla        v24.4s, v0.4s, v8.s[0]
    fmla        v25.4s, v1.4s, v8.s[0]
    fmla        v26.4s, v2.4s, v8.s[0]
    fmla        v27.4s, v3.4s, v8.s[0]
    fmla        v28.4s, v4.4s, v8.s[0]
    fmla        v29.4s, v5.4s, v8.s[0]
    fmla        v30.4s, v6.4s, v8.s[0]
    fmla        v31.4s, v7.4s, v8.s[0]

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc
    ldp         x7, x8, [x0, #24]               // csc, item_size

    cmp         x8, #2
    beq         .store_f16

    cmp         x6, #4
    beq         .store_strides_contig

    {% for reg in (24..31) %}
        {% for lane in (0..3) %}
            st1 { v{{reg}}.s }[{{lane}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for reg in (24..31) %}
        st1 { v{{reg}}.4s }, [ x5 ], #16
    {% endfor %}
    b           .non_linear_loop

.store_f16:
    {% for reg in (0..3) %}
        fcvtn  v{{reg}}.4h, v{{reg|times:2|plus:24}}.4s
        fcvtn2 v{{reg}}.8h, v{{reg|times:2|plus:25}}.4s
    {% endfor %}

    cmp         x6, #2
    beq         .store_strides_contig_f16

    {% for reg in (0..3) %}
        {% for lane in (0..7) %}
            st1 { v{{reg}}.h }[{{lane}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}

    b           .non_linear_loop

.store_strides_contig_f16:

    {% for reg in (0..3) %}
        st1 { v{{reg}}.8h }, [ x5 ], #16
    {% endfor %}
    b           .non_linear_loop

.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_32x3_core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, no need to preserve

// no preservation either for v0-v7...
// v8..v15 are callee-preserved
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.global {{G}}arm64simd_mmm_f32_32x3_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_32x3_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    cmp         x4, #1
    beq         .f32f16
    cmp         x4, #2
    beq         .f16f32
    cmp         x4, #3
    beq         .f16f16

.p2align 4
.packed_packed_loop_1:
    ld1         { v7.4s }, [ x2 ]
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s, v6.4s }, [ x1 ], #48
    add         x2, x2, #12

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:8}}.4s, v0.4s, v7.s[{{ col }}]
{% endfor %}

    ld1         { v0.4s }, [ x1 ], #16

{% for row in (1..6) %}
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8|plus:row}}.4s, v{{row}}.4s, v7.s[{{col}}]
    {% endfor %}
{% endfor %}

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:15}}.4s, v0.4s, v7.s[{{ col }}]
{% endfor %}

    subs        x3, x3, #1
    bne         .packed_packed_loop_1

    b           .non_linear_loop

.p2align 4
.f32f16:
    ld1         { v7.4h }, [ x2 ]
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s, v6.4s }, [ x1 ], #48
    fcvtl       v7.4s, v7.4h
    add         x2, x2, #6

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:8}}.4s, v0.4s, v7.s[{{ col }}]
{% endfor %}

    ld1         { v0.4s }, [ x1 ], #16

{% for row in (1..6) %}
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8|plus:row}}.4s, v{{row}}.4s, v7.s[{{col}}]
    {% endfor %}
{% endfor %}

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:15}}.4s, v0.4s, v7.s[{{ col }}]
{% endfor %}

    subs        x3, x3, #1
    bne         .f32f16

    b           .non_linear_loop

.p2align 4
.f16f32:
    ld1         { v7.4s }, [ x2 ]
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x1 ], #64
    add         x2, x2, #12

    fcvtl       v4.4s, v0.4h
    fcvtl2      v5.4s, v0.8h
    fcvtl       v6.4s, v1.4h
    fcvtl2      v0.4s, v1.8h

    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8}}.4s, v4.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:9}}.4s, v5.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:10}}.4s, v6.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:11}}.4s, v0.4s, v7.s[{{col}}]
    {% endfor %}

    fcvtl       v4.4s, v2.4h
    fcvtl2      v5.4s, v2.8h
    fcvtl       v6.4s, v3.4h
    fcvtl2      v1.4s, v3.8h
    
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:12}}.4s, v4.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:13}}.4s, v5.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:14}}.4s, v6.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:15}}.4s, v1.4s, v7.s[{{col}}]
    {% endfor %}

    subs        x3, x3, #1
    bne         .f16f32

    b           .non_linear_loop

.p2align 4
.f16f16:
    ld1         { v7.4h }, [ x2 ]
    ld1         { v0.8h, v1.8h, v2.8h, v3.8h }, [ x1 ], #64
    add         x2, x2, #6

    fcvtl       v7.4s, v7.4h

    fcvtl       v4.4s, v0.4h
    fcvtl2      v5.4s, v0.8h
    fcvtl       v6.4s, v1.4h
    fcvtl2      v0.4s, v1.8h

    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8}}.4s, v4.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:9}}.4s, v5.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:10}}.4s, v6.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:11}}.4s, v0.4s, v7.s[{{col}}]
    {% endfor %}

    fcvtl       v4.4s, v2.4h
    fcvtl2      v5.4s, v2.8h
    fcvtl       v6.4s, v3.4h
    fcvtl2      v1.4s, v3.8h
    
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:12}}.4s, v4.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:13}}.4s, v5.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:14}}.4s, v6.4s, v7.s[{{col}}]
        fmla        v{{ col|times:8|plus:15}}.4s, v1.4s, v7.s[{{col}}]
    {% endfor %}

    subs        x3, x3, #1
    bne         .f16f16

    b           .non_linear_loop


{% include "arm64simd_mmm_f32_scalars.tmpliq" from:8, to:31%}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:32, from:8, to:31%}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:32, from:8, to:31%}
{% include "arm64simd_mmm_load_tile.tmpliq" from:8, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (0..2) %}
        mov x4, x5
        {% for reg in (0..7) %}
            {% for lane in (0..3) %}
                ld1 {v0.s}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:8 | plus: 8| plus: reg}}.4s, v{{col | times:8 | plus: 8 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldp         x2, x3, [x0, #8]

    ld1         { v7.d }[0], [ x3 ], #8
    ld1         { v7.s }[2], [ x3 ], #4
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x2 ], #64
    ld1         { v4.4s, v5.4s, v6.4s }, [ x2 ], #48

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:8}}.4s, v0.4s, v7.s[{{ col }}]
{% endfor %}

    ld1         { v0.4s }, [ x2 ], #16

{% for row in (1..6) %}
    {% for col in (0..2) %}
        fmla        v{{ col|times:8|plus:8|plus:row}}.4s, v{{row}}.4s, v7.s[{{col}}]
    {% endfor %}
{% endfor %}

{% for col in (0..2) %}
    fmla        v{{ col|times:8|plus:15}}.4s, v0.4s, v7.s[{{ col }}]
{% endfor %}

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc
    ldp         x7, x8, [x0, #24]               // csc, item_size

    cmp         x8, #2
    beq         .store_f16

    cmp         x6, #4
    beq         .store_strides_contig


    {% for col in (0..2) %}
        mov x4, x5
        {% for reg in (0..7) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:8 | plus: 8 | plus: reg}}.s }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for col in (0..2) %}
        mov x4, x5
        {% for r in (0..7) %}
            st1 { v{{col | times:8 | plus: 8 | plus: r}}.4s }, [ x4 ], 16
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_f16:

    cmp         x6, #2
    beq         .store_strides_contig_f16

    {% for col in (0..2) %}
        {% for reg in (0..3) %}
            fcvtn  v{{reg}}.4h, v{{col|times:4|plus:reg|times:2|plus:8}}.4s
            fcvtn2 v{{reg}}.8h, v{{col|times:4|plus:reg|times:2|plus:9}}.4s
        {% endfor %}

        mov x4, x5
        {% for reg in (0..3) %}
            {% for lane in (0..7) %}
                st1 { v{{reg}}.h }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7

    {% endfor %}


    b           .non_linear_loop

.store_strides_contig_f16:

    {% for col in (0..2) %}
        {% for reg in (0..3) %}
            fcvtn  v{{reg}}.4h, v{{col|times:4|plus:reg|times:2|plus:8}}.4s
            fcvtn2 v{{reg}}.8h, v{{col|times:4|plus:reg|times:2|plus:9}}.4s
        {% endfor %}

        mov x4, x5
        {% for reg in (0..3) %}
            st1 { v{{reg}}.4s }, [ x4 ], #16
        {% endfor %}
        add x5, x5, x7

    {% endfor %}
    b           .non_linear_loop


.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_64x1/loop1/cortex_a53.tmpli
================================================
    fmla        v16.4s, v0.4s, v8.s[0]
    ldr         x5, [x1, #128]
    fmla        v17.4s, v1.4s, v8.s[0]
    ldr         x6, [x1, #136]
    fmla        v18.4s, v2.4s, v8.s[0]
    ldr         x7, [x1, #144]
    fmla        v19.4s, v3.4s, v8.s[0]
    ldr         x9, [x1, #152]
    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [ x1 ], #64

    fmla        v20.4s, v4.4s, v8.s[0]
    ldr         x10, [x1, #96]
    fmla        v21.4s, v5.4s, v8.s[0]
    ldr         x11, [x1, #104]
    fmla        v22.4s, v6.4s, v8.s[0]
    ldr         x12, [x1, #112]
    fmla        v23.4s, v7.4s, v8.s[0]
    ldr         x13, [x1, #120]

    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [ x1 ]

    fmla        v24.4s, v0.4s, v8.s[0]
    ldr         x14, [x1, #128]
    fmla        v25.4s, v1.4s, v8.s[0]
    ldr         x15, [x1, #136]
    fmla        v26.4s, v2.4s, v8.s[0]
    ldr         x20, [x1, #144]
    fmla        v27.4s, v3.4s, v8.s[0]
    ldr         x21, [x1, #152]
    fmla        v28.4s, v4.4s, v8.s[0]
    ldr         x22, [x1, #160]
    fmla        v29.4s, v5.4s, v8.s[0]
    ldr         x23, [x1, #168]
    fmla        v30.4s, v6.4s, v8.s[0]
    ldr         x24, [x1, #176]
    fmla        v31.4s, v7.4s, v8.s[0]
    ldr         x25, [x1, #184]

    ld1         {{ v8.s }}[0], [ x2 ], #4

    prfm        pldl1keep, [x1, #1024]
    prfm        pldl1keep, [x1, #1088]
    prfm        pldl1keep, [x1, #1152]
    prfm        pldl1keep, [x1, #1216]
    prfm        pldl1keep, [x2, #256]

    ins         v0.d[0], x5
    ins         v1.d[0], x7
    ins         v2.d[0], x10
    ins         v3.d[0], x12
    ins         v4.d[0], x14
    ins         v5.d[0], x20
    ins         v6.d[0], x22
    ins         v7.d[0], x24

    ins         v0.d[1], x6
    ins         v1.d[1], x9
    ins         v2.d[1], x11
    ins         v3.d[1], x13
    ins         v4.d[1], x15
    ins         v5.d[1], x21
    ins         v6.d[1], x23
    ins         v7.d[1], x25

    add         x1, x1, #192


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_64x1/loop1/naive.tmpli
================================================
    ld1         {{ v9.4s, v10.4s, v11.4s, v12.4s }}, [x1], #64
    ld1         {{ v13.4s, v14.4s, v15.4s }}, [x1], #48

    fmla        v16.4s, v0.4s, v8.s[0]
    fmla        v17.4s, v1.4s, v8.s[0]
    fmla        v18.4s, v2.4s, v8.s[0]
    fmla        v19.4s, v3.4s, v8.s[0]
    fmla        v20.4s, v4.4s, v8.s[0]
    fmla        v21.4s, v5.4s, v8.s[0]
    fmla        v22.4s, v6.4s, v8.s[0]
    fmla        v23.4s, v7.4s, v8.s[0]
    fmla        v24.4s, v9.4s, v8.s[0]
    ld1         {{ v9.4s }}, [ x1 ], #16
    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [x1], #64
    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [x1], #64
    fmla        v25.4s, v10.4s, v8.s[0]
    fmla        v26.4s, v11.4s, v8.s[0]
    fmla        v27.4s, v12.4s, v8.s[0]
    fmla        v28.4s, v13.4s, v8.s[0]
    fmla        v29.4s, v14.4s, v8.s[0]
    fmla        v30.4s, v15.4s, v8.s[0]

    fmla        v31.4s, v9.4s, v8.s[0]

    ld1         {{ v8.s }}[0], [ x2 ], #4

    prfm        pldl1keep, [x1, #1024]
    prfm        pldl1keep, [x1, #1088]
    prfm        pldl1keep, [x1, #1152]
    prfm        pldl1keep, [x1, #1216]
    prfm        pldl1keep, [x2, #256]


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_64x1/loop2/cortex_a55.tmpli
================================================
    ld1         {{ v9.4s, v10.4s, v11.4s }}, [x1], #48

    fmla        v16.4s, v0.4s, v8.s[0]
    ldr         x8, [x2], #8
    fmla        v17.4s, v1.4s, v8.s[0]
    ldr         d12, [x1], #8
    fmla        v18.4s, v2.4s, v8.s[0]
    ldr         x12, [x1], #8
    fmla        v19.4s, v3.4s, v8.s[0]
    ldr         d13, [x1], #8
    fmla        v20.4s, v4.4s, v8.s[0]
    ldr         x13, [x1], #8
    fmla        v21.4s, v5.4s, v8.s[0]
    ldr         d14, [x1], #8
    fmla        v22.4s, v6.4s, v8.s[0]
    ldr         x14, [x1], #8
    fmla        v23.4s, v7.4s, v8.s[0]
    ldr         d15, [x1], #8
    fmla        v24.4s, v9.4s, v8.s[0]
    ldr         x15, [x1], #8

    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [x1], #64
    ins         v8.d[1], x8
    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [x1], #64

    fmla        v25.4s, v10.4s, v8.s[0]
    ins         v12.d[1], x12
    fmla        v26.4s, v11.4s, v8.s[0]
    ins         v13.d[1], x13
    fmla        v27.4s, v12.4s, v8.s[0]
    ins         v14.d[1], x14
    fmla        v28.4s, v13.4s, v8.s[0]
    ins         v15.d[1], x15

    ld1         {{ v9.4s, v10.4s, v11.4s, v12.4s }}, [x1], #64

    fmla        v29.4s, v14.4s, v8.s[0]
    ldr         d13, [x1], #8
    fmla        v30.4s, v15.4s, v8.s[0]
    ldr         x13, [x1], #8
    fmla        v31.4s, v0.4s, v8.s[0]
    ldr         d14, [x1], #8

    fmla        v16.4s, v1.4s, v8.s[2]
    ldr         x14, [x1], #8
    fmla        v17.4s, v2.4s, v8.s[2]
    ldr         d15, [x1], #8
    fmla        v18.4s, v3.4s, v8.s[2]
    ldr         x15, [x1], #8
    fmla        v19.4s, v4.4s, v8.s[2]

    ld1         {{ v0.4s }}, [x1], #16

    fmla        v20.4s, v5.4s, v8.s[2]
    ldr         d1, [x1], #8
    fmla        v21.4s, v6.4s, v8.s[2]
    ldr         x10, [x1], #8

    fmla        v22.4s, v7.4s, v8.s[2]

    fmla        v23.4s, v9.4s, v8.s[2]
    ins         v13.d[1], x13
    fmla        v24.4s, v10.4s, v8.s[2]
    ins         v14.d[1], x14
    fmla        v25.4s, v11.4s, v8.s[2]
    ins         v15.d[1], x15

    fmla        v26.4s, v12.4s, v8.s[2]
    prfm        pldl1keep, [x1, #1024]
    fmla        v27.4s, v13.4s, v8.s[2]
    ins         v1.d[1], x10
    fmla        v28.4s, v14.4s, v8.s[2]
    prfm        pldl1keep, [x1, #1088]
    fmla        v29.4s, v15.4s, v8.s[2]
    prfm        pldl1keep, [x1, #1152]
    fmla        v30.4s, v0.4s, v8.s[2]
    prfm        pldl1keep, [x1, #1216]
    fmla        v31.4s, v1.4s, v8.s[2]
    prfm        pldl1keep, [x2, #256]

    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [x1], #64
    ins         v8.s[0], v8.s[3]
    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [x1], #64


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_64x1/loop2/naive.tmpli
================================================
// load a: v9, v10, v11, v12, v13, v14, v15
// load a: v0, v1, v2, v3, v4, v4, v6, v7

    ld1         {{ v9.4s, v10.4s, v11.4s, v12.4s }}, [x1], #64
    ld1         {{ v13.4s, v14.4s, v15.4s }}, [x1], #48

    fmla        v16.4s, v0.4s, v8.s[0]
    fmla        v17.4s, v1.4s, v8.s[0]
    fmla        v18.4s, v2.4s, v8.s[0]
    fmla        v19.4s, v3.4s, v8.s[0]

    ld1         {{ v0.4s, v1.4s }}, [x1], #32

    fmla        v20.4s, v4.4s, v8.s[0]
    fmla        v21.4s, v5.4s, v8.s[0]

    ld1         {{  v2.4s, v3.4s, v4.4s, v5.4s }}, [x1], #64
    fmla        v22.4s, v6.4s, v8.s[0]
    fmla        v23.4s, v7.4s, v8.s[0]

    ld1         {{  v6.4s, v7.4s }}, [x1], #32

    fmla        v24.4s, v9.4s, v8.s[0]
    fmla        v25.4s, v10.4s, v8.s[0]
    fmla        v26.4s, v11.4s, v8.s[0]
    fmla        v27.4s, v12.4s, v8.s[0]
    fmla        v28.4s, v13.4s, v8.s[0]
    fmla        v29.4s, v14.4s, v8.s[0]
    fmla        v30.4s, v15.4s, v8.s[0]

    ld1         {{ v9.4s, v10.4s, v11.4s, v12.4s }}, [x1], #64
    ld1         {{ v13.4s, v14.4s, v15.4s }}, [x1], #48

    fmla        v31.4s, v0.4s, v8.s[0]
    ld1         {{ v8.s }}[0], [ x2 ], #4

    fmla        v16.4s, v1.4s, v8.s[0]
    ld1         {{ v0.4s, v1.4s }}, [x1], #32
    fmla        v17.4s, v2.4s, v8.s[0]
    fmla        v18.4s, v3.4s, v8.s[0]
    fmla        v19.4s, v4.4s, v8.s[0]

    fmla        v20.4s, v5.4s, v8.s[0]
    fmla        v21.4s, v6.4s, v8.s[0]
    fmla        v22.4s, v7.4s, v8.s[0]
    fmla        v23.4s, v9.4s, v8.s[0]

    fmla        v24.4s, v10.4s, v8.s[0]
    fmla        v25.4s, v11.4s, v8.s[0]
    fmla        v26.4s, v12.4s, v8.s[0]
    fmla        v27.4s, v13.4s, v8.s[0]
    fmla        v28.4s, v14.4s, v8.s[0]
    fmla        v29.4s, v15.4s, v8.s[0]
    fmla        v30.4s, v0.4s, v8.s[0]
    fmla        v31.4s, v1.4s, v8.s[0]
    ld1         {{ v8.s }}[0], [ x2 ], #4

    ld1         {{ v0.4s, v1.4s, v2.4s, v3.4s }}, [x1], #64
    ld1         {{ v4.4s, v5.4s, v6.4s, v7.4s }}, [x1], #64

    prfm        pldl1keep, [x1, #1024]
    prfm        pldl1keep, [x1, #1088]
    prfm        pldl1keep, [x1, #1152]
    prfm        pldl1keep, [x1, #1216]
    prfm        pldl1keep, [x2, #256]


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_64x1_core.tmpl
================================================
// vim: ft=arm

// C tile regs:
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve
// 
//      v16[0] v18[0] v20[0] v22[0] v24[0] v26[0] v28[0] v30[0]
//      v16[1] v18[1] 
//      v16[2] v18[2] 
//      v16[3] v18[3]
//                     
//      v17[0] v19[0] v21[0] v23[0] v25[0] v27[0] v29[0] v31[0]
//      v17[1] v19[1] 
//      v17[2] v19[2] 
//      v17[3] v19[3] 

// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_f32_64x1_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_64x1_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop
    sub         x3, x3, #1


    ld1         { v8.s }[0], [ x2 ], #4
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [ x1 ], #64
    ld1         { v4.4s, v5.4s, v6.4s, v7.4s }, [ x1 ], #64

    cmp         x3, #0
    beq         .packed_packed_loop_1_last

    cmp         x3, #4
    blt        .packed_packed_loop_1

{% capture packed_packed_loop1 %}
    {% if core == "a53" %}
        {% include "arm64simd_mmm_f32_64x1/loop1/cortex_a53.tmpli" %}
    {% else %}
        {% include "arm64simd_mmm_f32_64x1/loop1/naive.tmpli" %}
    {% endif %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% if core == "a53" %}
        {{ packed_packed_loop1 }}
        {{ packed_packed_loop1 }}
    {% elsif core == "a55" %}
        {% include "arm64simd_mmm_f32_64x1/loop2/cortex_a55.tmpli" %}
    {% else %}
        {% include "arm64simd_mmm_f32_64x1/loop2/naive.tmpli" %}
    {% endif %}
{% endcapture %}

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub         x3, x3, #4
    cmp         x3, #4
    bge         .packed_packed_loop_4

    cmp         x3, #0
    beq         .packed_packed_loop_1_last

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}

    subs        x3, x3, #1
    bne         .packed_packed_loop_1

// last loop can't read beyond actual input as it's likely not packed and padded
.packed_packed_loop_1_last:
    ld1         { v9.4s, v10.4s, v11.4s, v12.4s }, [x1], #64
    ld1         { v13.4s, v14.4s, v15.4s }, [x1], #48

    fmla        v16.4s, v0.4s, v8.s[0]
    fmla        v17.4s, v1.4s, v8.s[0]
    ld1         { v0.4s }, [ x1 ]
    fmla        v18.4s, v2.4s, v8.s[0]
    fmla        v19.4s, v3.4s, v8.s[0]
    fmla        v20.4s, v4.4s, v8.s[0]
    fmla        v21.4s, v5.4s, v8.s[0]
    fmla        v22.4s, v6.4s, v8.s[0]
    fmla        v23.4s, v7.4s, v8.s[0]

    fmla        v24.4s, v9.4s, v8.s[0]
    fmla        v25.4s, v10.4s, v8.s[0]
    fmla        v26.4s, v11.4s, v8.s[0]
    fmla        v27.4s, v12.4s, v8.s[0]
    fmla        v28.4s, v13.4s, v8.s[0]
    fmla        v29.4s, v14.4s, v8.s[0]
    fmla        v30.4s, v15.4s, v8.s[0]
    fmla        v31.4s, v0.4s, v8.s[0]

    b           .non_linear_loop

{% include "arm64simd_mmm_f32_scalars.tmpliq" from:16, to:31%}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:64, from:16, to:31%}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:64, from:16, to:31%}
{% include "arm64simd_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]           // c base ptr, rsc
    cmp         x6, #4
    beq         .do_per_row_add

    {% for reg in (16..31) %}
        {% for lane in (0..3) %}
            ld1 {v0.s}[{{lane}}], [ x5 ], x6
        {% endfor %}
        fadd v{{reg}}.4s, v{{reg}}.4s, v0.4s
    {% endfor %}

    b           .non_linear_loop

.do_per_row_add:
    ld1     {v0.4s-v3.4s}, [x5], #64
    ld1     {v4.4s-v7.4s}, [x5], #64
    ld1     {v8.4s-v11.4s}, [x5], #64
    ld1     {v12.4s-v15.4s}, [x5], #64

    {% for r in (0..15) %}
        fadd v{{r| plus: 16}}.4s, v{{r | plus: 16}}.4s, v{{r}}.4s
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x3, [x0, #16]
    ldr     x2, [x0, #8]

    ld1         {v8.s}[0], [ x3 ]

    {% for r in (0..7) %}
        ldr     q{{r}}, [x2], #16
    {% endfor %}

    fmla        v16.4s, v0.4s, v8.s[0]
    ldr         q0, [x2], #16
    fmla        v17.4s, v1.4s, v8.s[0] 
    ldr         q1, [x2], #16
    fmla        v18.4s, v2.4s, v8.s[0] 
    ldr         q2, [x2], #16
    fmla        v19.4s, v3.4s, v8.s[0] 
    ldr         q3, [x2], #16
    fmla        v20.4s, v4.4s, v8.s[0] 
    ldr         q4, [x2], #16
    fmla        v21.4s, v5.4s, v8.s[0] 
    ldr         q5, [x2], #16
    fmla        v22.4s, v6.4s, v8.s[0] 
    ldr         q6, [x2], #16
    fmla        v23.4s, v7.4s, v8.s[0] 
    ldr         q7, [x2], #16

    fmla        v24.4s, v0.4s, v8.s[0]
    fmla        v25.4s, v1.4s, v8.s[0] 
    fmla        v26.4s, v2.4s, v8.s[0] 
    fmla        v27.4s, v3.4s, v8.s[0] 
    fmla        v28.4s, v4.4s, v8.s[0] 
    fmla        v29.4s, v5.4s, v8.s[0] 
    fmla        v30.4s, v6.4s, v8.s[0] 
    fmla        v31.4s, v7.4s, v8.s[0] 

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]                // c base ptr, rsc$

    cmp         x6, #4
    beq         .store_strides_contig

    {% for reg in (16..31) %}
        {% for lane in (0..3) %}
            st1 { v{{reg}}.s }[{{lane}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}
    b           .non_linear_loop

.store_strides_contig:

    {% for reg in (16..31) %}
        st1 { v{{reg}}.4s }, [ x5 ], #16
    {% endfor %}
    b           .non_linear_loop

.return:

    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop1/broken_chains.tmpli
================================================
ld1         {{ v2.4s, v3.4s }}, [x1], #32
ld1         {{ v6.4s, v7.4s }}, [x2], #32

fmla        v16.4s, v0.4s, v4.s[0]
fmla        v17.4s, v1.4s, v4.s[0]
fmla        v18.4s, v0.4s, v4.s[1]
fmla        v19.4s, v1.4s, v4.s[1]
fmla        v20.4s, v0.4s, v4.s[2]
fmla        v21.4s, v1.4s, v4.s[2]
fmla        v22.4s, v0.4s, v4.s[3]
fmla        v23.4s, v1.4s, v4.s[3]

fmla        v24.4s, v0.4s, v5.s[0]
fmla        v25.4s, v1.4s, v5.s[0]
fmla        v26.4s, v0.4s, v5.s[1]
fmla        v27.4s, v1.4s, v5.s[1]
fmla        v28.4s, v0.4s, v5.s[2]
fmla        v29.4s, v1.4s, v5.s[2]
fmla        v30.4s, v0.4s, v5.s[3]
fmla        v31.4s, v1.4s, v5.s[3]

and         v0.16b, v2.16b, v2.16b
and         v1.16b, v3.16b, v3.16b
and         v4.16b, v6.16b, v6.16b
and         v5.16b, v7.16b, v7.16b


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_w_no_preload.tmpli
================================================

fmla        v16.4s, v0.4s, v4.s[0]
ldr         w5, [x1], #4
fmla        v17.4s, v1.4s, v4.s[0]
ldr         w20, [x2], #4
fmla        v18.4s, v0.4s, v4.s[1]
ldr         w6, [x1], #4
fmla        v20.4s, v1.4s, v4.s[1]
ldr         w21, [x2], #4
fmla        v20.4s, v0.4s, v4.s[2]
ldr         w7, [x1], #4
fmla        v21.4s, v1.4s, v4.s[2]
ldr         w22, [x2], #4
fmla        v22.4s, v0.4s, v4.s[3]
ldr         w8, [x1], #4
fmla        v23.4s, v1.4s, v4.s[3]
ldr         w23, [x2], #4

fmla        v24.4s, v0.4s, v5.s[0]
ldr         w9, [x1], #4
fmla        v25.4s, v1.4s, v5.s[0]
ldr         w24, [x2], #4
fmla        v26.4s, v0.4s, v5.s[1]
ldr         w10, [x1], #4
fmla        v27.4s, v1.4s, v5.s[1]
ldr         w25, [x2], #4
fmla        v28.4s, v0.4s, v5.s[2]
ldr         w11, [x1], #4
fmla        v29.4s, v1.4s, v5.s[2]
ldr         w26, [x2], #4
fmla        v30.4s, v0.4s, v5.s[3]
ldr         w12, [x1], #4
fmla        v31.4s, v1.4s, v5.s[3]
ldr         w27, [x2], #4

ins         v0.s[0], w5
ins         v4.s[0], w20
ins         v1.s[0], w9
ins         v5.s[0], w24
ins         v0.s[2], w7
ins         v4.s[2], w22
ins         v1.s[2], w11
ins         v5.s[2], w26
ins         v0.s[1], w6
ins         v4.s[1], w21
ins         v1.s[1], w10
ins         v5.s[1], w25
ins         v0.s[3], w8
ins         v4.s[3], w23
ins         v1.s[3], w12
ins         v5.s[3], w27


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_w_preload.tmpli
================================================
fmla        v16.4s, v0.4s, v4.s[0]
ldr         w5, [x1], #4
fmla        v17.4s, v1.4s, v4.s[0]
ldr         w20, [x2], #4
fmla        v18.4s, v0.4s, v4.s[1]
ldr         w6, [x1], #4
fmla        v19.4s, v1.4s, v4.s[1]
ldr         w21, [x2], #4
fmla        v20.4s, v0.4s, v4.s[2]
ldr         w7, [x1], #4
fmla        v21.4s, v1.4s, v4.s[2]
ldr         w22, [x2], #4
fmla        v22.4s, v0.4s, v4.s[3]
ldr         w8, [x1], #4
fmla        v23.4s, v1.4s, v4.s[3]
ldr         w23, [x2], #4

fmla        v24.4s, v0.4s, v5.s[0]
ldr         w9, [x1], #4
fmla        v25.4s, v1.4s, v5.s[0]
ldr         w24, [x2], #4
fmla        v26.4s, v0.4s, v5.s[1]
ldr         w10, [x1], #4
fmla        v27.4s, v1.4s, v5.s[1]
ldr         w25, [x2], #4
fmla        v28.4s, v0.4s, v5.s[2]
ldr         w11, [x1], #4
fmla        v29.4s, v1.4s, v5.s[2]
ldr         w26, [x2], #4
fmla        v30.4s, v0.4s, v5.s[3]
ldr         w12, [x1], #4
fmla        v31.4s, v1.4s, v5.s[3]
ldr         w27, [x2], #4

prfm        pldl1keep, [x1, #256]
prfm        pldl1keep, [x2, #256]

ins         v0.s[0], w5
ins         v4.s[0], w20
ins         v1.s[0], w9
ins         v5.s[0], w24
ins         v0.s[2], w7
ins         v4.s[2], w22
ins         v1.s[2], w11
ins         v5.s[2], w26
ins         v0.s[1], w6
ins         v4.s[1], w21
ins         v1.s[1], w10
ins         v5.s[1], w25
ins         v0.s[3], w8
ins         v4.s[3], w23
ins         v1.s[3], w12
ins         v5.s[3], w27


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_x_no_preload.tmpli
================================================

fmla        v16.4s, v0.4s, v4.s[0]
ldr         x5, [x1], #8
fmla        v17.4s, v1.4s, v4.s[0]
ldr         x9, [x2], #8
fmla        v18.4s, v0.4s, v4.s[1]
ldr         x6, [x1], #8
fmla        v19.4s, v1.4s, v4.s[1]
ldr         x10, [x2], #8
fmla        v20.4s, v0.4s, v4.s[2]
ldr         x7, [x1], #8
fmla        v21.4s, v1.4s, v4.s[2]
ldr         x11, [x2], #8
fmla        v22.4s, v0.4s, v4.s[3]
ldr         x8, [x1], #8
fmla        v23.4s, v1.4s, v4.s[3]
ldr         x12, [x2], #8

fmla        v24.4s, v0.4s, v5.s[0]
fmla        v25.4s, v1.4s, v5.s[0]
fmla        v26.4s, v0.4s, v5.s[1]
fmla        v27.4s, v1.4s, v5.s[1]
fmla        v28.4s, v0.4s, v5.s[2]
fmla        v29.4s, v1.4s, v5.s[2]
fmla        v30.4s, v0.4s, v5.s[3]
fmla        v31.4s, v1.4s, v5.s[3]

ins         v2.d[0], x5
ins         v6.d[0], x9
ins         v3.d[0], x7
ins         v7.d[0], x11
ins         v2.d[1], x6
ins         v6.d[1], x10
ins         v3.d[1], x8
ins         v7.d[1], x12


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_x_preload.tmpli
================================================

fmla        v16.4s, v0.4s, v4.s[0]
ldr         x5, [x1], #8
fmla        v17.4s, v1.4s, v4.s[0]
ldr         x9, [x2], #8
fmla        v18.4s, v0.4s, v4.s[1]
ldr         x6, [x1], #8
fmla        v19.4s, v1.4s, v4.s[1]
ldr         x10, [x2], #8
fmla        v20.4s, v0.4s, v4.s[2]
ldr         x7, [x1], #8
fmla        v21.4s, v1.4s, v4.s[2]
ldr         x11, [x2], #8
fmla        v22.4s, v0.4s, v4.s[3]
ldr         x8, [x1], #8
fmla        v23.4s, v1.4s, v4.s[3]
ldr         x12, [x2], #8

fmla        v24.4s, v0.4s, v5.s[0]
prfm        pldl1keep, [x1, #256]
fmla        v25.4s, v1.4s, v5.s[0]
prfm        pldl1keep, [x1, #320]
fmla        v26.4s, v0.4s, v5.s[1]
prfm        pldl1keep, [x1, #384]
fmla        v27.4s, v1.4s, v5.s[1]
prfm        pldl1keep, [x1, #448]
fmla        v28.4s, v0.4s, v5.s[2]
prfm        pldl1keep, [x2, #256]
fmla        v29.4s, v1.4s, v5.s[2]
prfm        pldl1keep, [x2, #320]
fmla        v30.4s, v0.4s, v5.s[3]
prfm        pldl1keep, [x2, #384]
fmla        v31.4s, v1.4s, v5.s[3]
prfm        pldl1keep, [x2, #448]

ins         v0.d[0], x5
ins         v4.d[0], x9
ins         v1.d[0], x7
ins         v5.d[0], x11
ins         v0.d[1], x6
ins         v4.d[1], x10
ins         v1.d[1], x8
ins         v5.d[1], x12


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop1/naive.tmpli
================================================

fmla        v16.4s, v0.4s, v4.s[0]
fmla        v17.4s, v1.4s, v4.s[0]
fmla        v18.4s, v0.4s, v4.s[1]
fmla        v19.4s, v1.4s, v4.s[1]
fmla        v20.4s, v0.4s, v4.s[2]
fmla        v21.4s, v1.4s, v4.s[2]
fmla        v22.4s, v0.4s, v4.s[3]
fmla        v23.4s, v1.4s, v4.s[3]

fmla        v24.4s, v0.4s, v5.s[0]
fmla        v25.4s, v1.4s, v5.s[0]
fmla        v26.4s, v0.4s, v5.s[1]
fmla        v27.4s, v1.4s, v5.s[1]
fmla        v28.4s, v0.4s, v5.s[2]
fmla        v29.4s, v1.4s, v5.s[2]
fmla        v30.4s, v0.4s, v5.s[3]
fmla        v31.4s, v1.4s, v5.s[3]

ld1         {{ v0.4s, v1.4s }}, [x1], #32
ld1         {{ v4.4s, v5.4s }}, [x2], #32


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop2/broken_chains.tmpli
================================================
ld1         {{ v2.4s, v3.4s }}, [x1], #32
ld1         {{ v6.4s, v7.4s }}, [x2], #32

fmla        v16.4s, v0.4s, v4.s[0]
fmla        v17.4s, v1.4s, v4.s[0]
fmla        v18.4s, v0.4s, v4.s[1]
fmla        v19.4s, v1.4s, v4.s[1]
fmla        v20.4s, v0.4s, v4.s[2]
fmla        v21.4s, v1.4s, v4.s[2]
fmla        v22.4s, v0.4s, v4.s[3]
fmla        v23.4s, v1.4s, v4.s[3]

fmla        v24.4s, v0.4s, v5.s[0]
fmla        v25.4s, v1.4s, v5.s[0]
fmla        v26.4s, v0.4s, v5.s[1]
fmla        v27.4s, v1.4s, v5.s[1]
fmla        v28.4s, v0.4s, v5.s[2]
fmla        v29.4s, v1.4s, v5.s[2]
fmla        v30.4s, v0.4s, v5.s[3]
fmla        v31.4s, v1.4s, v5.s[3]

ld1         {{ v0.4s, v1.4s }}, [x1], #32
ld1         {{ v4.4s, v5.4s }}, [x2], #32

fmla        v16.4s, v2.4s, v6.s[0]
fmla        v17.4s, v3.4s, v6.s[0]
fmla        v18.4s, v2.4s, v6.s[1]
fmla        v19.4s, v3.4s, v6.s[1]
fmla        v20.4s, v2.4s, v6.s[2]
fmla        v21.4s, v3.4s, v6.s[2]
fmla        v22.4s, v2.4s, v6.s[3]
fmla        v23.4s, v3.4s, v6.s[3]

fmla        v24.4s, v2.4s, v7.s[0]
fmla        v25.4s, v3.4s, v7.s[0]
fmla        v26.4s, v2.4s, v7.s[1]
fmla        v27.4s, v3.4s, v7.s[1]
fmla        v28.4s, v2.4s, v7.s[2]
fmla        v29.4s, v3.4s, v7.s[2]
fmla        v30.4s, v2.4s, v7.s[3]
fmla        v31.4s, v3.4s, v7.s[3]


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8/packed_packed_loop2/cortex_a55.tmpli
================================================
fmla        v16.4s, v0.4s, v4.s[0]
ldr         d2, [x1], #8
fmla        v17.4s, v1.4s, v4.s[0]
ldr         d6, [x2], #8
fmla        v18.4s, v0.4s, v4.s[1]
ldr         x5, [x1], #8
fmla        v19.4s, v1.4s, v4.s[1]
ldr         x7, [x2], #8
fmla        v20.4s, v0.4s, v4.s[2]
ldr         d3, [x1], #8
fmla        v21.4s, v1.4s, v4.s[2]
ldr         d7, [x2], #8
fmla        v22.4s, v0.4s, v4.s[3]
ldr         x6, [x1], #8
fmla        v23.4s, v1.4s, v4.s[3]
ldr         x8, [x2], #8

fmla        v24.4s, v0.4s, v5.s[0]
fmla        v25.4s, v1.4s, v5.s[0]
fmla        v26.4s, v0.4s, v5.s[1]
fmla        v27.4s, v1.4s, v5.s[1]
fmla        v28.4s, v0.4s, v5.s[2]
ins         v2.d[1], x5
fmla        v29.4s, v1.4s, v5.s[2]
ins         v6.d[1], x7
fmla        v30.4s, v0.4s, v5.s[3]
ins         v3.d[1], x6
fmla        v31.4s, v1.4s, v5.s[3]
ins         v7.d[1], x8

fmla        v16.4s, v2.4s, v6.s[0]
ldr         d0, [x1], #8
fmla        v17.4s, v3.4s, v6.s[0]
ldr         d4, [x2], #8
fmla        v18.4s, v2.4s, v6.s[1]
ldr         x5, [x1], #8
fmla        v19.4s, v3.4s, v6.s[1]
ldr         x7, [x2], #8
fmla        v20.4s, v2.4s, v6.s[2]
ldr         d1, [x1], #8
fmla        v21.4s, v3.4s, v6.s[2]
ldr         d5, [x2], #8
fmla        v22.4s, v2.4s, v6.s[3]
ldr         x6, [x1], #8
fmla        v23.4s, v3.4s, v6.s[3]
ldr         x8, [x2], #8

fmla        v24.4s, v2.4s, v7.s[0]
fmla        v25.4s, v3.4s, v7.s[0]
fmla        v26.4s, v2.4s, v7.s[1]
fmla        v27.4s, v3.4s, v7.s[1]
fmla        v28.4s, v2.4s, v7.s[2]
ins         v0.d[1], x5
fmla        v29.4s, v3.4s, v7.s[2]
ins         v4.d[1], x7
fmla        v30.4s, v2.4s, v7.s[3]
ins         v1.d[1], x6
fmla        v31.4s, v3.4s, v7.s[3]
ins         v5.d[1], x8


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_8x8_core.tmpl
================================================
// vim: ft=arm

// C tile regs: v16 to v31, (scratch)
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve
// 
//      v16[0] v18[0] v20[0] v22[0] v24[0] v26[0] v28[0] v30[0]
//      v16[1] v18[1] 
//      v16[2] v18[2] 
//      v16[3] v18[3]
//                     
//      v17[0] v19[0] v21[0] v23[0] v25[0] v27[0] v29[0] v31[0]
//      v17[1] v19[1] 
//      v17[2] v19[2] 
//      v17[3] v19[3] 

// v0-v7 (scratch registers)
//  packed A buffering (2x8 values): alternating v0, v1 with v2, v3
//  packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_f32_8x8_{{core}}_{{suffix}}
{{G}}arm64simd_mmm_f32_8x8_{{core}}_{{suffix}}:

    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!
    
    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldr         x2, [x0, #24]       // b
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

.packed_packed:
    ld1         { v0.4s, v1.4s }, [ x1 ], #32
    ld1         { v4.4s, v5.4s }, [ x2 ], #32

{% capture packed_packed_loop1 %}
    {% if core == "a53" %}
        {% include "arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_x_preload.tmpli" %}
    {% else %}
        {% include "arm64simd_mmm_f32_8x8/packed_packed_loop1/naive.tmpli" %}
    {% endif %}
{% endcapture %}

{% capture packed_packed_loop2 %}
    {% if core == "a55" %}
        {% include "arm64simd_mmm_f32_8x8/packed_packed_loop2/cortex_a55.tmpli" %}
    {% else %}
        {{ packed_packed_loop1 }}
        {{ packed_packed_loop1 }}
    {% endif %}
{% endcapture %}

    cmp         x3, #4
    blt         .packed_packed_loop_1

.p2align 4
.packed_packed_loop_4:
    {{ packed_packed_loop2 }}
    {{ packed_packed_loop2 }}

    sub x3, x3, #4
    cmp x3, #4
    bge .packed_packed_loop_4


    cmp x3, #0
    beq .non_linear_loop

.p2align 4
.packed_packed_loop_1:
    {{ packed_packed_loop1 }}
    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

{% include "arm64simd_mmm_f32_scalars.tmpliq" from:16, to:31%}
{% include "arm64simd_mmm_f32_per_rows.tmpliq" mr:8, from:16, to:31 %}
{% include "arm64simd_mmm_f32_per_cols.tmpliq" mr:8, from:16, to:31 %}
{% include "arm64simd_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..3) %}
                ld1 {v0.s}[{{lane}}], [ x4 ], x6
            {% endfor %}
            fadd v{{col | times:2 | plus: reg}}.4s, v{{col | times:2 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.4s, v1.4s }, [ x2 ], #32
    ld1         { v4.4s, v5.4s }, [ x3 ], #32

    fmla        v16.4s, v0.4s, v4.s[0]
    fmla        v17.4s, v1.4s, v4.s[0]
    fmla        v18.4s, v0.4s, v4.s[1]
    fmla        v19.4s, v1.4s, v4.s[1]
    fmla        v20.4s, v0.4s, v4.s[2]
    fmla        v21.4s, v1.4s, v4.s[2]
    fmla        v22.4s, v0.4s, v4.s[3]
    fmla        v23.4s, v1.4s, v4.s[3]

    fmla        v24.4s, v0.4s, v5.s[0]
    fmla        v25.4s, v1.4s, v5.s[0]
    fmla        v26.4s, v0.4s, v5.s[1]
    fmla        v27.4s, v1.4s, v5.s[1]
    fmla        v28.4s, v0.4s, v5.s[2]
    fmla        v29.4s, v1.4s, v5.s[2]
    fmla        v30.4s, v0.4s, v5.s[3]
    fmla        v31.4s, v1.4s, v5.s[3]

    b           .non_linear_loop

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x6, #4
    bne         .store_strides_generic

    {% for col in (8..15) %}
        str q{{col | times:2 }}, [ x5 ]
        str q{{col | times:2 | plus: 1}}, [ x5, #16 ]
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_generic:

    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:2 | plus: reg}}.s }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_per_cols.tmpliq
================================================
// vim: ft=arm

{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_min", op:"fmin", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_max", op:"fmax", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_mul", op:"fmul", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_add", op:"fadd", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_sub", op:"fsub", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_sub_flipped", op:"fsub", mr:mr, from:from, to:to, flipped: true%}


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_per_rows.tmpliq
================================================
// vim: ft=arm

{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_min", op:"fmin", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_max", op:"fmax", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_mul", op:"fmul", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_add", op:"fadd", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_sub", op:"fsub", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_sub_flipped", op:"fsub", mr:mr, from:from, to:to, flipped: true%}


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_f32_scalars.tmpliq
================================================
// vim: ft=arm

{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_min", op:"fmin", from:from, to:to %}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_max", op:"fmax", from:from, to:to %}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_mul", op:"fmul", from:from, to:to %}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_add", op:"fadd", from:from, to:to %}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_sub", op:"fsub", from:from, to:to %}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_sub_flipped", op:"fsub", from:from, to:to, flipped:true %}


.clear:
{% for r in (from..to) %}
    eor         v{{r}}.8b, v{{r}}.8b, v{{r}}.8b
{% endfor %}
    b .non_linear_loop

.leaky_relu:
    add         x2, x0, #8
    ld1         {v4.s}[0], [ x2 ]
    dup         v4.4s, v4.s[0]

    // bsl cond/dst, then, else
    // fcmge dst, src, #0.0
    {% for r in (from..to) %}
        fmul  v0.4s, v{{r}}.4s, v4.4s
        fcmge v1.4s, v{{r}}.4s, #0.0
        bsl   v1.16b, v{{r}}.16b, v0.16b
        and   v{{r}}.16b, v1.16b, v1.16b
    {% endfor %}

    b .non_linear_loop

.q_scale:
.q_shl:
.q_shr:
    b .unsupported


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_i32_64x1.tmpl
================================================
// vim: ft=arm

// C tile regs: 
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve

// no preservation either for v0-v7...
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_i32_64x1_{{suffix}}
{{G}}arm64simd_mmm_i32_64x1_{{suffix}}:

/*
    prfm        pldl1keep, [x1]
    prfm        pldl1keep, [x2]
*/
    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    cmp         x4, #1
    beq         .packed_packed_loop_1_i8i8

.packed_packed_loop_1:
    ld1         {v9.s}[0], [ x2 ], 4

    ld1	        { v0.4s-v3.4s }, [ x1 ], #64
    ld1	        { v4.4s-v7.4s }, [ x1 ], #64
    {% for reg in (0..3) %}
        mla      v{{reg | times: 2 | plus: 16 }}.4s, v{{reg | times:2}}.4s, v9.s[0]
        mla      v{{reg | times: 2 | plus: 17 }}.4s, v{{reg | times:2 | plus:1}}.4s, v9.s[0]
    {% endfor %}

    ld1	        { v0.4s-v3.4s }, [ x1 ], #64
    ld1	        { v4.4s-v7.4s }, [ x1 ], #64
    {% for reg in (0..3) %}
        mla      v{{reg | times: 2 | plus: 24 }}.4s, v{{reg | times:2}}.4s, v9.s[0]
        mla      v{{reg | times: 2 | plus: 25 }}.4s, v{{reg | times:2 | plus:1}}.4s, v9.s[0]
    {% endfor %}

    subs        x3, x3, #1
    bne .packed_packed_loop_1
    
    b .non_linear_loop

.packed_packed_loop_1_i8i8:
    ld1         {v9.b}[0], [ x2 ], 1
    sshll       v9.8h, v9.8b, 0

    ld1	        { v0.8b-v3.8b }, [ x1 ], #32
    ld1	        { v4.8b-v7.8b }, [ x1 ], #32

    {% for reg in (0..7) %}
        sshll       v10.8h, v{{reg}}.8b, 0
        smlal       v{{reg | times: 2 | plus: 16 }}.4s, v10.4h, v9.h[0]
        smlal2      v{{reg | times: 2 | plus: 17 }}.4s, v10.8h, v9.h[0]
    {% endfor %}

    subs        x3, x3, #1
    bne .packed_packed_loop_1_i8i8

    b .non_linear_loop

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    cmp         x8, #4
    beq         non_linear_addc_i32

    {% for reg in (16..31) %}
        {% for lane in (0..3) %}
            ld1 {v0.b}[{{lane}}], [ x5 ], x6
        {% endfor %}
        sshll v0.8h, v0.8b, 0
        sshll v0.4s, v0.4h, 0
        add v{{reg}}.4s, v{{reg}}.4s, v0.4s
    {% endfor %}

    b           .non_linear_loop

non_linear_addc_i32:
    {% for reg in (16..31) %}
        {% for lane in (0..3) %}
            ld1 {v0.s}[{{lane}}], [ x5 ], x6
        {% endfor %}
        add v{{reg}}.4s, v{{reg}}.4s, v0.4s
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v15.s }[0], [ x3 ]
    xtn         v15.4h, v15.4s

    ld1         { v0.4s-v3.4s }, [ x2 ], #64
    ld1         { v4.4s-v7.4s }, [ x2 ], #64

    {% for reg in (0..7) %}
        xtn         v{{reg}}.4h, v{{reg}}.4s
        smlal        v{{reg|plus: 16}}.4s, v{{reg}}.4h, v15.h[0]
    {% endfor %}

    ld1         { v0.4s-v3.4s }, [ x2 ], #64
    ld1         { v4.4s-v7.4s }, [ x2 ], #64

    {% for reg in (0..7) %}
        xtn         v{{reg}}.4h, v{{reg}}.4s
        smlal        v{{reg|plus: 24}}.4s, v{{reg}}.4h, v15.h[0]
    {% endfor %}

    b           .non_linear_loop

{% include "arm64simd_mmm_i32_scalars.tmpliq" from:16, to:31 %}
{% include "arm64simd_mmm_i32_per_rows.tmpliq" mr:64, from:16, to:31 %}
{% include "arm64simd_mmm_i32_per_cols.tmpliq" mr:64, from:16, to:31 %}
{% include "arm64simd_mmm_i32_scale_q16_q31.tmpliq" %}
{% include "arm64simd_mmm_load_tile.tmpliq" from:16, to:31 %}

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x8, #4
    beq         .store_strides_i32

    {% for reg in (16..31) %}
        {% for lane in (0..3) %}
            st1 { v{{reg}}.b }[{{lane | times: 4}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}

    b   .non_linear_loop

.store_strides_i32:
    {% for reg in (16..31) %}
        {% for lane in (0..3) %}
            st1 { v{{reg}}.s }[{{lane}}], [ x5 ], x6
        {% endfor %}
    {% endfor %}

    b   .non_linear_loop

.return:
    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16
    
    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_i32_8x8.tmpl
================================================
// vim: ft=arm

// C tile regs:
// - x19-x29 to preserve (but x19, x28, x29 not used) 
// - d8..d15 to preserve
// - v16 to v31, no need to preserve
// 
//      v16[0] v18[0] v20[0] v22[0] v24[0] v26[0] v28[0] v30[0]
//      v16[1] v18[1] 
//      v16[2] v18[2] 
//      v16[3] v18[3]
//                     
//      v17[0] v19[0] v21[0] v23[0] v25[0] v27[0] v29[0] v31[0]
//      v17[1] v19[1] 
//      v17[2] v19[2] 
//      v17[3] v19[3] 

// no preservation either for v0-v7...
// packed A buffering (2x8 values): alternating v0, v1 with v2, v3
// packed B buffering (2x8 values): alternating v4, v5 with v6, v7

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_mmm_i32_8x8_{{suffix}}
{{G}}arm64simd_mmm_i32_8x8_{{suffix}}:

/*
    prfm        pldl1keep, [x1]
    prfm        pldl1keep, [x2]
*/
    stp         x20, x21, [sp, #-16]!
    stp         x22, x23, [sp, #-16]!
    stp         x24, x25, [sp, #-16]!
    stp         x26, x27, [sp, #-16]!

    stp         d8, d9, [sp, #-16]!
    stp         d10, d11, [sp, #-16]!
    stp         d12, d13, [sp, #-16]!
    stp         d14, d15, [sp, #-16]!

{% include "dispatcher.tmpliq" %}

.add_mat_mul:
    ldp         x2, x4, [x0, #24]   // b, packing
    ldp         x3, x1, [x0, #8]    // k, a

    cmp         x3, #0
    beq         .non_linear_loop

    cmp         x4, #1
    beq         .packed_packed_loop_1_i8i8

.packed_packed_loop_1:

    ld1	        { v0.4s, v1.4s }, [ x1 ], #32
    ld1	        { v4.4s, v5.4s }, [ x2 ], #32

    mla         v16.4s, v0.4s, v4.s[0]
    mla         v17.4s, v1.4s, v4.s[0]
    mla         v18.4s, v0.4s, v4.s[1]
    mla         v19.4s, v1.4s, v4.s[1]

    mla         v20.4s, v0.4s, v4.s[2]
    mla         v21.4s, v1.4s, v4.s[2]
    mla         v22.4s, v0.4s, v4.s[3]
    mla         v23.4s, v1.4s, v4.s[3]

    mla         v24.4s, v0.4s, v5.s[0]
    mla         v25.4s, v1.4s, v5.s[0]
    mla         v26.4s, v0.4s, v5.s[1]
    mla         v27.4s, v1.4s, v5.s[1]

    mla         v28.4s, v0.4s, v5.s[2]
    mla         v29.4s, v1.4s, v5.s[2]
    mla         v30.4s, v0.4s, v5.s[3]
    mla         v31.4s, v1.4s, v5.s[3]

    subs        x3, x3, #1
    bne .packed_packed_loop_1

    b .non_linear_loop

.packed_packed_loop_1_i8i8:

    ld1	        { v0.8b }, [ x1 ], #8
    sshll       v0.8h, v0.8b, 0
    ld1         { v4.8b }, [ x2 ], #8
    sshll        v4.8h, v4.8b, 0

    smlal        v16.4s, v0.4h, v4.h[0]
    smlal2       v17.4s, v0.8h, v4.h[0]
    smlal        v18.4s, v0.4h, v4.h[1]
    smlal2       v19.4s, v0.8h, v4.h[1]
    smlal        v20.4s, v0.4h, v4.h[2]
    smlal2       v21.4s, v0.8h, v4.h[2]
    smlal        v22.4s, v0.4h, v4.h[3]
    smlal2       v23.4s, v0.8h, v4.h[3]

    smlal        v24.4s, v0.4h, v4.h[4]
    smlal2       v25.4s, v0.8h, v4.h[4]
    smlal        v26.4s, v0.4h, v4.h[5]
    smlal2       v27.4s, v0.8h, v4.h[5]
    smlal        v28.4s, v0.4h, v4.h[6]
    smlal2       v29.4s, v0.8h, v4.h[6]
    smlal        v30.4s, v0.4h, v4.h[7]
    smlal2       v31.4s, v0.8h, v4.h[7]

    subs        x3, x3, #1
    bne .packed_packed_loop_1_i8i8

    b .non_linear_loop

{% include "arm64simd_mmm_i32_scalars.tmpliq" from:16, to:31%}
{% include "arm64simd_mmm_i32_per_rows.tmpliq" mr:8, from:16, to:31%}
{% include "arm64simd_mmm_i32_per_cols.tmpliq" mr:8, from:16, to:31%}
{% include "arm64simd_mmm_load_tile.tmpliq" from:16, to:31 %}

.add_unicast:
    ldp         x5, x6, [x0, #8]
    ldp         x7, x8, [x0, #24]

    cmp         x8, #4
    beq         non_linear_addc_i32

    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..3) %}
                ld1 {v0.b}[{{lane}}], [ x4 ], x6
            {% endfor %}
            sshll v0.8h, v0.8b, 0
            sshll v0.4s, v0.4h, 0
            add v{{col | times:2 | plus: reg}}.4s, v{{col | times:2 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

non_linear_addc_i32:
    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..3) %}
                ld1 {v0.s}[{{lane}}], [ x4 ], x6
            {% endfor %}
            add v{{col | times:2 | plus: reg}}.4s, v{{col | times:2 | plus: reg}}.4s, v0.4s
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.add_row_col_products:
    ldr     x2, [x0, #8]
    ldr     x3, [x0, #16]

    ld1         { v0.4s, v1.4s }, [ x2 ]
    ld1         { v4.4s, v5.4s }, [ x3 ]

    xtn         v0.4h, v0.4s
    xtn         v1.4h, v1.4s
    xtn         v4.4h, v4.4s
    xtn         v5.4h, v5.4s

    smlal        v16.4s, v0.4h, v4.h[0]
    smlal        v17.4s, v1.4h, v4.h[0]
    smlal        v18.4s, v0.4h, v4.h[1]
    smlal        v19.4s, v1.4h, v4.h[1]
    smlal        v20.4s, v0.4h, v4.h[2]
    smlal        v21.4s, v1.4h, v4.h[2]
    smlal        v22.4s, v0.4h, v4.h[3]
    smlal        v23.4s, v1.4h, v4.h[3]

    smlal        v24.4s, v0.4h, v5.h[0]
    smlal        v25.4s, v1.4h, v5.h[0]
    smlal        v26.4s, v0.4h, v5.h[1]
    smlal        v27.4s, v1.4h, v5.h[1]
    smlal        v28.4s, v0.4h, v5.h[2]
    smlal        v29.4s, v1.4h, v5.h[2]
    smlal        v30.4s, v0.4h, v5.h[3]
    smlal        v31.4s, v1.4h, v5.h[3]

    b           .non_linear_loop

    {% include "arm64simd_mmm_i32_scale_q16_q31.tmpliq" %}

.store:
    ldp         x5, x6, [x0, #8]            // c base ptr, rsc
    ldp         x7, x8, [x0, #24]           // csc, item_size

    cmp         x8, #4
    beq         .store_strides_i32

    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:2 | plus: reg}}.b }[{{lane|times:4}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.store_strides_i32:
    {% for col in (8..15) %}
        mov x4, x5
        {% for reg in (0..1) %}
            {% for lane in (0..3) %}
                st1 { v{{col | times:2 | plus: reg}}.s }[{{lane}}], [ x4 ], x6
            {% endfor %}
        {% endfor %}
        add x5, x5, x7
    {% endfor %}

    b           .non_linear_loop

.return:
    ldp         d14, d15, [sp], #16
    ldp         d12, d13, [sp], #16
    ldp         d10, d11, [sp], #16
    ldp         d8, d9, [sp], #16

    ldp         x26, x27, [sp], #16
    ldp         x24, x25, [sp], #16
    ldp         x22, x23, [sp], #16
    ldp         x20, x21, [sp], #16

    ret


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_i32_per_cols.tmpliq
================================================
// vim: ft=arm

{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_min", op:"smin", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_max", op:"smax", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_mul", op:"mul", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_add", op:"add", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_sub", op:"sub", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_col.tmpliq" label:"per_col_sub_flipped", op:"sub", mr:mr, from:from, to:to, flipped: true %}


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_i32_per_rows.tmpliq
================================================
// vim: ft=arm

{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_min", op:"smin", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_max", op:"smax", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_mul", op:"mul", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_add", op:"add", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_sub", op:"sub", mr:mr, from:from, to:to %}
{% include "arm64simd_mmm_4s_per_row.tmpliq" label:"per_row_sub_flipped", op:"sub", mr:mr, from:from, to:to, flipped: true %}


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_i32_scalars.tmpliq
================================================
// vim: ft=arm

{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_min", op:"smin", from:from, to:to%}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_max", op:"smax", from:from, to:to%}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_mul", op:"mul", from:from, to:to%}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_add", op:"add", from:from, to:to%}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_sub", op:"sub", from:from, to:to%}
{% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_sub_flipped", op:"sub", from:from, to:to, flipped:true%}

.clear:
{% for r in (from..to) %}
    eor         v{{r}}.8b, v{{r}}.8b, v{{r}}.8b
{% endfor %}
    b .non_linear_loop

.leaky_relu:
    add         x2, x0, #8
    ld1         {v4.s}[0], [ x2 ]
    dup         v4.4s, v4.s[0]

    // bsl cond/dst, then, else
    // fcmge dst, src, #0.0
    {% for r in (from..to) %}
        mul   v0.4s, v{{r}}.4s, v4.4s
        cmge  v1.4s, v{{r}}.4s, #0
        bsl   v1.16b, v{{r}}.16b, v0.16b
        and   v{{r}}.16b, v1.16b, v1.16b
    {% endfor %}

    b .non_linear_loop


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_i32_scale_q16_q31.tmpliq
================================================

// vim: ft=arm

.q_scale:
    ldp     x5, x6, [x0, #8]            // x5: shift, x6: policy
    add     x2, x0, #24
    ld1r    { v2.4s }, [x2]             // v2.4s <- multiplier

    mov     w3, #1
    ins     v4.d[0], x3
    dup     v4.2d, v4.d[0]              // v4.2d <- 1

    add     x5, x5, #32                 // add 32 to shift
    neg     x5, x5                      // broadcast shift
    ins     v1.d[0], x5
    dup     v1.2d, v1.d[0]              // v1.2s <- -(shift + 32)

    cmp     x6, 1
    beq     .q_scale_rounding_zero
    cmp     x6, 2
    beq     .q_scale_rounding_away
    cmp     x6, 3
    beq     .q_scale_rounding_minus_inf
    cmp     x6, 4
    beq     .q_scale_rounding_plus_inf
    cmp     x6, 5
    beq     .q_scale_rounding_even
    cmp     x6, 6
    beq     .q_scale_rounding_odd

    b .unsupported

.q_scale_rounding_zero:
        // rust: signum * ((abs + nudge2) >> shift
        // asm: signum * (2*abs - 1) >>r (shift + 1)

    {% for q in (16..31) %}
        cmlt        v0.4s, v{{q}}.4s, #0
        abs         v{{q}}.4s, v{{q}}.4s
        sqdmull     v8.2d, v{{q}}.2s, v2.2s
        sqdmull2    v9.2d, v{{q}}.4s, v2.4s     //mul without shift and store results in v8 and v9

        sub         v8.2d, v8.2d, v4.2d
        sqrshl      v8.2d, v8.2d, v1.2d

        sub         v9.2d, v9.2d, v4.2d
        sqrshl      v9.2d, v9.2d, v1.2d

        uzp1         v{{q}}.4s, v8.4s, v9.4s    //combine back

        neg         v3.4s, v{{q}}.4s
        bit         v{{q}}.16b, v3.16b, v0.16b
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_away: // signum * (abs >> (shift-1) + 1 >> 1)

    {% for q in (16..31) %}
        cmlt        v0.4s, v{{q}}.4s, #0
        abs         v{{q}}.4s, v{{q}}.4s
        sqdmull     v8.2d, v{{q}}.2s, v2.2s
        sqdmull2    v9.2d, v{{q}}.4s, v2.4s     //mul without shift and store results in v8 and v9

        sqrshl      v8.2d, v8.2d, v1.2d
        sqrshl      v9.2d, v9.2d, v1.2d

        uzp1         v{{q}}.4s, v8.4s, v9.4s    //combine back

        neg         v3.4s, v{{q}}.4s
        bit         v{{q}}.16b, v3.16b, v0.16b
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_minus_inf: // val >> shift

    {% for q in (16..31) %}
        sqdmull     v8.2d, v{{q}}.2s, v2.2s
        sqdmull2    v9.2d, v{{q}}.4s, v2.4s     //mul without shift and store results in v8 and v9

        sub         v8.2d, v8.2d, v4.2d
        sqrshl      v8.2d, v8.2d, v1.2d

        sub         v9.2d, v9.2d, v4.2d
        sqrshl      v9.2d, v9.2d, v1.2d

        uzp1         v{{q}}.4s, v8.4s, v9.4s    //combine back
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_plus_inf: // (val >> shift-1)+1 >>1

    {% for q in (16..31) %}
        sqdmull     v8.2d, v{{q}}.2s, v2.2s
        sqdmull2    v9.2d, v{{q}}.4s, v2.4s     //mul without shift and store results in v8 and v9

        sqrshl      v8.2d, v8.2d, v1.2d
        sqrshl      v9.2d, v9.2d, v1.2d

        uzp1         v{{q}}.4s, v8.4s, v9.4s    //combine back
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_even: // signum * ((abs >> shift-1) + (abs & 0x1) - 1 >> 1)

    {% for q in (16..31) %}
        cmlt        v0.4s, v{{q}}.4s, #0
        abs         v{{q}}.4s, v{{q}}.4s
        sqdmull     v8.2d, v{{q}}.2s, v2.2s
        sqdmull2    v9.2d, v{{q}}.4s, v2.4s     //mul without shift and store results in v8 and v9

        sqshl       v3.2d, v8.2d, v1.2d         // abs >> shift - 1
        and         v3.16b, v3.16b, v4.16b      // abs & 0x1
        sub         v3.2d, v3.2d, v4.2d         //nudge : -1 if we want to round down, 0 if up

        add         v8.2d, v8.2d, v3.2d
        sqrshl      v8.2d, v8.2d, v1.2d

        sqshl       v3.2d, v9.2d, v1.2d
        and         v3.16b, v3.16b, v4.16b
        sub         v3.2d, v3.2d, v4.2d         //nudge : -1 if we want to round down, 0 if up

        add         v9.2d, v9.2d, v3.2d
        sqrshl      v9.2d, v9.2d, v1.2d

        uzp1         v{{q}}.4s, v8.4s, v9.4s    //combine back

        neg         v3.4s, v{{q}}.4s
        bit         v{{q}}.16b, v3.16b, v0.16b
    {% endfor %}

    b .non_linear_loop

.q_scale_rounding_odd: // signum * ((abs >> shift-1) - (abs & 0x1) >> 1)

    {% for q in (16..31) %}
        cmlt        v0.4s, v{{q}}.4s, #0
        abs         v{{q}}.4s, v{{q}}.4s
        sqdmull     v8.2d, v{{q}}.2s, v2.2s
        sqdmull2    v9.2d, v{{q}}.4s, v2.4s     //mul without shift and store results in v8 and v9

        sqshl       v3.2d, v8.2d, v1.2d
        and         v3.16b, v3.16b, v4.16b      //nudge : -1 if we want to round down, 0 if up

        sub         v8.2d, v8.2d, v3.2d
        sqrshl      v8.2d, v8.2d, v1.2d

        sqshl       v3.2d, v9.2d, v1.2d
        and         v3.16b, v3.16b, v4.16b      //nudge : -1 if we want to round down, 0 if up

        sub         v9.2d, v9.2d, v3.2d
        sqrshl      v9.2d, v9.2d, v1.2d

        uzp1        v{{q}}.4s, v8.4s, v9.4s    //combine back

        neg         v3.4s, v{{q}}.4s
        bit         v{{q}}.16b, v3.16b, v0.16b
    {% endfor %}

    b .non_linear_loop

.q_shl:
    ldr     x5, [x0, #8]                // x5: shift
    ins     v1.s[0], w5
    dup     v1.4s, v1.s[0]              // v1.4s <- shift

    {% for q in (16..31) %}
        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s
    {% endfor %}
    b .non_linear_loop

.q_shr:
    ldp     x5, x6, [x0, #8]            // x5: shift, x6: policy

    mov     w3, #1
    ins     v4.s[0], w3
    dup     v4.4s, v4.s[0]              // v4.4d <- 1

    neg     w5, w5                      // broadcast shift
    ins     v1.s[0], w5
    dup     v1.4s, v1.s[0]              // v1.4s <- -shift

    cmp     x6, 1
    beq     .q_shr_rounding_zero
    cmp     x6, 2
    beq     .q_shr_rounding_away
    cmp     x6, 3
    beq     .q_shr_rounding_minus_inf
    cmp     x6, 4
    beq     .q_shr_rounding_plus_inf
    cmp     x6, 5
    beq     .q_shr_rounding_even
    cmp     x6, 6
    beq     .q_shr_rounding_odd

    b .unsupported

.q_shr_rounding_zero:
    // asm: signum * (abs >>r shift)
    {% for q in (16..31) %}
        cmlt        v0.4s, v{{q}}.4s, #0
        abs         v{{q}}.4s, v{{q}}.4s

        sub         v{{q}}.4s, v{{q}}.4s, v4.4s
        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s

        neg         v3.4s, v{{q}}.4s
        bit         v{{q}}.16b, v3.16b, v0.16b
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_away:
    {% for q in (16..31) %}
        cmlt        v0.4s, v{{q}}.4s, #0
        abs         v{{q}}.4s, v{{q}}.4s

        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s

        neg         v3.4s, v{{q}}.4s
        bit         v{{q}}.16b, v3.16b, v0.16b
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_minus_inf:
    {% for q in (16..31) %}
        sqneg       v{{q}}.4s, v{{q}}.4s
        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s
        sqneg       v{{q}}.4s, v{{q}}.4s
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_plus_inf:
    {% for q in (16..31) %}
        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_even:
    // sqrshl is round(+inf), sqshl trauncates
    // we look at parity of result by truncation: if it's odd, we have nothing more to do, we go towards +inf
    // if it's even, we need to nudge towards 0 by adding -1
    // => nudge = (x >>l shift) & 0x1 - 1 (>>l is sqshl)
    // => result is (x + nudge) >>r shift (with sqrshl)
    {% for q in (16..31) %}
        sqshl       v3.4s, v{{q}}.4s, v1.4s // trunc
        and         v3.16b, v3.16b, v4.16b
        sub         v3.4s, v3.4s, v4.4s
        add         v{{q}}.4s, v{{q}}.4s, v3.4s

        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s
    {% endfor %}
    b .non_linear_loop

.q_shr_rounding_odd:
    // here: nudge is -((x >>l shift) & 0x1)
    {% for q in (16..31) %}
        sqshl       v3.4s, v{{q}}.4s, v1.4s // trunc
        and         v3.16b, v3.16b, v4.16b
        neg         v3.4s, v3.4s
        add         v{{q}}.4s, v{{q}}.4s, v3.4s

        sqrshl      v{{q}}.4s, v{{q}}.4s, v1.4s
    {% endfor %}
    b .non_linear_loop


================================================
FILE: linalg/arm64/arm64simd/arm64simd_mmm_load_tile.tmpliq
================================================
// vim: ft=arm

.load_tile:
    ldr         x2, [ x0, #8 ]
    {% for reg in (from..to) %}
        ld1         { v{{reg}}.4s }, [ x2 ], #16
    {% endfor %}

    b           .non_linear_loop


================================================
FILE: linalg/arm64/arm64simd/arm64simd_sigmoid_f32_4n.tmpl
================================================
// vim: ft=arm

// no preservation either for v0-v7 and v16-v31

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_sigmoid_f32_4n_{{suffix}}
{{G}}arm64simd_sigmoid_f32_4n_{{suffix}}:

    cmp         x1, #0
    beq         .return

    adr         x2, .coeffs_num
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    dup         v5.4s, v0.s[0]              // v5 <- low, broadcasted
    dup         v6.4s, v0.s[1]              // v6 <- high, broadcasted
    dup         v7.4s, v3.s[1]              // v7 <- 0.5, broadcasted

    cmp         x1, #16
    blt         .loop

.loop4:
    ld1         { v16.4s, v17.4s, v18.4s, v19.4s }, [x0]

    fmax        v16.4s, v16.4s, v5.4s
    fmax        v17.4s, v17.4s, v5.4s
    fmax        v18.4s, v18.4s, v5.4s
    fmax        v19.4s, v19.4s, v5.4s

    fmin        v16.4s, v16.4s, v6.4s
    fmin        v17.4s, v17.4s, v6.4s
    fmin        v18.4s, v18.4s, v6.4s
    fmin        v19.4s, v19.4s, v6.4s       // v16 <- x

    fmul        v20.4s, v16.4s, v16.4s
    fmul        v21.4s, v17.4s, v17.4s
    fmul        v22.4s, v18.4s, v18.4s
    fmul        v23.4s, v19.4s, v19.4s      // v20 <- x2

    dup         v24.4s, v0.s[3]
    fmla        v24.4s, v20.4s, v0.s[2]
    dup         v25.4s, v0.s[3]
    fmla        v25.4s, v21.4s, v0.s[2]
    dup         v26.4s, v0.s[3]
    fmla        v26.4s, v22.4s, v0.s[2]
    dup         v27.4s, v0.s[3]
    fmla        v27.4s, v23.4s, v0.s[2]

    dup         v28.4s, v1.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v1.s[0]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v1.s[0]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v1.s[0]
    fmla        v31.4s, v23.4s, v27.4s

    dup         v24.4s, v1.s[1]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v25.4s, v1.s[1]
    fmla        v25.4s, v21.4s, v29.4s
    dup         v26.4s, v1.s[1]
    fmla        v26.4s, v22.4s, v30.4s
    dup         v27.4s, v1.s[1]
    fmla        v27.4s, v23.4s, v31.4s

    dup         v28.4s, v1.s[2]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v1.s[2]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v1.s[2]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v1.s[2]
    fmla        v31.4s, v23.4s, v27.4s

    dup         v24.4s, v1.s[3]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v25.4s, v1.s[3]
    fmla        v25.4s, v21.4s, v29.4s
    dup         v26.4s, v1.s[3]
    fmla        v26.4s, v22.4s, v30.4s
    dup         v27.4s, v1.s[3]
    fmla        v27.4s, v23.4s, v31.4s

    dup         v28.4s, v2.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v2.s[0]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v2.s[0]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v2.s[0]
    fmla        v31.4s, v23.4s, v27.4s

    fmul        v16.4s, v16.4s, v28.4s
    fmul        v17.4s, v17.4s, v29.4s
    fmul        v18.4s, v18.4s, v30.4s
    fmul        v19.4s, v19.4s, v31.4s      // v16 <- numerator

    dup         v24.4s, v2.s[2]
    fmla        v24.4s, v20.4s, v2.s[1]
    dup         v25.4s, v2.s[2]
    fmla        v25.4s, v21.4s, v2.s[1]
    dup         v26.4s, v2.s[2]
    fmla        v26.4s, v22.4s, v2.s[1]
    dup         v27.4s, v2.s[2]
    fmla        v27.4s, v23.4s, v2.s[1]

    dup         v28.4s, v2.s[3]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v2.s[3]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v2.s[3]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v2.s[3]
    fmla        v31.4s, v23.4s, v27.4s

    dup         v24.4s, v3.s[0]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v25.4s, v3.s[0]
    fmla        v25.4s, v21.4s, v29.4s
    dup         v26.4s, v3.s[0]
    fmla        v26.4s, v22.4s, v30.4s
    dup         v27.4s, v3.s[0]
    fmla        v27.4s, v23.4s, v31.4s  // v24 denum

    fdiv        v16.4s, v16.4s, v24.4s
    fdiv        v17.4s, v17.4s, v25.4s
    fdiv        v18.4s, v18.4s, v26.4s
    fdiv        v19.4s, v19.4s, v27.4s

    fadd        v16.4s, v16.4s, v7.4s
    fadd        v17.4s, v17.4s, v7.4s
    fadd        v18.4s, v18.4s, v7.4s
    fadd        v19.4s, v19.4s, v7.4s

    st1         { v16.4s, v17.4s, v18.4s, v19.4s }, [x0], #64

    subs        x1, x1, #16
    cmp         x1, #16
    bge         .loop4

    cmp         x1, #0
    beq         .return

.loop:
    ld1         { v16.4s }, [x0]

    fmax        v16.4s, v16.4s, v5.4s
    fmin        v16.4s, v16.4s, v6.4s       // v16 <- x
    fmul        v20.4s, v16.4s, v16.4s      // v20 <- x2

    dup         v24.4s, v0.s[3]
    fmla        v24.4s, v20.4s, v0.s[2]
    dup         v28.4s, v1.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v24.4s, v1.s[1]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v28.4s, v1.s[2]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v24.4s, v1.s[3]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v28.4s, v2.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    fmul        v16.4s, v16.4s, v28.4s      // v16 <- numerator

    dup         v24.4s, v2.s[2]
    fmla        v24.4s, v20.4s, v2.s[1]
    dup         v28.4s, v2.s[3]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v24.4s, v3.s[0]
    fmla        v24.4s, v20.4s, v28.4s      // v24 <- denum

    fdiv        v16.4s, v16.4s, v24.4s
    fadd        v16.4s, v16.4s, v7.4s

    st1         { v16.4s }, [x0], #16

    subs        x1, x1, #4
    bne         .loop

.return:
    ret

.coeffs_num:
    .float -18.6                    // low
    .float 18.6                     // high
    .float -4.433153405e-18         // alpha_13
    .float 1.169974371e-14

    .float -1.875289645e-11
    .float 4.257889523e-8
    .float 0.00004811817576
    .float 0.008163842030

    .float 0.2499999971
    .float 3.922935744e-6           // beta_6
    .float 0.001524872358
    .float 0.1159886749

    .float 1.0
    .float 0.5                      //              
    .float 0.0                      // padding
    .float 0.0


================================================
FILE: linalg/arm64/arm64simd/arm64simd_tanh_f32_4n.tmpl
================================================
// vim: ft=arm

// no preservation either for v0-v7 and v16-v31

.text
.align 4

.cpu generic+fp+simd
.global {{G}}arm64simd_tanh_f32_4n_{{suffix}}
{{G}}arm64simd_tanh_f32_4n_{{suffix}}:

    cmp         x1, #0
    beq         .return

    adr         x2, .coeffs_num
    ld1         { v0.4s, v1.4s, v2.4s, v3.4s }, [x2]
    dup         v5.4s, v0.s[0]              // v5 <- low, broadcasted
    dup         v6.4s, v0.s[1]              // v6 <- high, broadcasted

    cmp         x1, #16
    blt         .loop

.loop4:
    ld1         { v16.4s, v17.4s, v18.4s, v19.4s }, [x0]

    fmax        v16.4s, v16.4s, v5.4s
    fmax        v17.4s, v17.4s, v5.4s
    fmax        v18.4s, v18.4s, v5.4s
    fmax        v19.4s, v19.4s, v5.4s

    fmin        v16.4s, v16.4s, v6.4s
    fmin        v17.4s, v17.4s, v6.4s
    fmin        v18.4s, v18.4s, v6.4s
    fmin        v19.4s, v19.4s, v6.4s       // v16 <- x

    fmul        v20.4s, v16.4s, v16.4s
    fmul        v21.4s, v17.4s, v17.4s
    fmul        v22.4s, v18.4s, v18.4s
    fmul        v23.4s, v19.4s, v19.4s      // v20 <- x2

    dup         v24.4s, v0.s[3]
    fmla        v24.4s, v20.4s, v0.s[2]
    dup         v25.4s, v0.s[3]
    fmla        v25.4s, v21.4s, v0.s[2]
    dup         v26.4s, v0.s[3]
    fmla        v26.4s, v22.4s, v0.s[2]
    dup         v27.4s, v0.s[3]
    fmla        v27.4s, v23.4s, v0.s[2]

    dup         v28.4s, v1.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v1.s[0]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v1.s[0]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v1.s[0]
    fmla        v31.4s, v23.4s, v27.4s

    dup         v24.4s, v1.s[1]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v25.4s, v1.s[1]
    fmla        v25.4s, v21.4s, v29.4s
    dup         v26.4s, v1.s[1]
    fmla        v26.4s, v22.4s, v30.4s
    dup         v27.4s, v1.s[1]
    fmla        v27.4s, v23.4s, v31.4s

    dup         v28.4s, v1.s[2]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v1.s[2]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v1.s[2]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v1.s[2]
    fmla        v31.4s, v23.4s, v27.4s

    dup         v24.4s, v1.s[3]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v25.4s, v1.s[3]
    fmla        v25.4s, v21.4s, v29.4s
    dup         v26.4s, v1.s[3]
    fmla        v26.4s, v22.4s, v30.4s
    dup         v27.4s, v1.s[3]
    fmla        v27.4s, v23.4s, v31.4s

    dup         v28.4s, v2.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v2.s[0]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v2.s[0]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v2.s[0]
    fmla        v31.4s, v23.4s, v27.4s

    fmul        v16.4s, v16.4s, v28.4s
    fmul        v17.4s, v17.4s, v29.4s
    fmul        v18.4s, v18.4s, v30.4s
    fmul        v19.4s, v19.4s, v31.4s      // v16 <- numerator

    dup         v24.4s, v2.s[2]
    fmla        v24.4s, v20.4s, v2.s[1]
    dup         v25.4s, v2.s[2]
    fmla        v25.4s, v21.4s, v2.s[1]
    dup         v26.4s, v2.s[2]
    fmla        v26.4s, v22.4s, v2.s[1]
    dup         v27.4s, v2.s[2]
    fmla        v27.4s, v23.4s, v2.s[1]

    dup         v28.4s, v2.s[3]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v29.4s, v2.s[3]
    fmla        v29.4s, v21.4s, v25.4s
    dup         v30.4s, v2.s[3]
    fmla        v30.4s, v22.4s, v26.4s
    dup         v31.4s, v2.s[3]
    fmla        v31.4s, v23.4s, v27.4s

    dup         v24.4s, v3.s[0]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v25.4s, v3.s[0]
    fmla        v25.4s, v21.4s, v29.4s
    dup         v26.4s, v3.s[0]
    fmla        v26.4s, v22.4s, v30.4s
    dup         v27.4s, v3.s[0]
    fmla        v27.4s, v23.4s, v31.4s  // v24 denum

    fdiv        v16.4s, v16.4s, v24.4s
    fdiv        v17.4s, v17.4s, v25.4s
    fdiv        v18.4s, v18.4s, v26.4s
    fdiv        v19.4s, v19.4s, v27.4s

    st1         { v16.4s, v17.4s, v18.4s, v19.4s }, [x0], #64

    subs        x1, x1, #16
    cmp         x1, #16
    bge         .loop4

    cmp         x1, #0
    beq         .return

.loop:
    ld1         { v16.4s }, [x0]

    fmax        v16.4s, v16.4s, v5.4s
    fmin        v16.4s, v16.4s, v6.4s       // v16 <- x
    fmul        v20.4s, v16.4s, v16.4s      // v20 <- x2

    dup         v24.4s, v0.s[3]
    fmla        v24.4s, v20.4s, v0.s[2]
    dup         v28.4s, v1.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v24.4s, v1.s[1]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v28.4s, v1.s[2]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v24.4s, v1.s[3]
    fmla        v24.4s, v20.4s, v28.4s
    dup         v28.4s, v2.s[0]
    fmla        v28.4s, v20.4s, v24.4s
    fmul        v16.4s, v16.4s, v28.4s      // v16 <- numerator

    dup         v24.4s, v2.s[2]
    fmla        v24.4s, v20.4s, v2.s[1]
    dup         v28.4s, v2.s[3]
    fmla        v28.4s, v20.4s, v24.4s
    dup         v24.4s, v3.s[0]
    fmla        v24.4s, v20.4s, v28.4s      // v24 <- denum

    fdiv        v16.4s, v16.4s, v24.4s

    st1         { v16.4s }, [x0], #16

    subs        x1, x1, #4
    bne         .loop

.return:
    ret

.coeffs_num:
    .float -8.9                     // low
    .float 8.9                      // high
    .float -8.488492677e-14         // alpha_13
    .float 5.277853000e-11

    .float -2.022500419e-8
    .float 0.00001115424833
    .float 0.003103950131
    .float 0.1308400453

    .float 0.9999999934
    .float 0.0002546136580          // beta_6
    .float 0.02449515379
    .float 0.4641733162

    .float 1.0
    .float 0                        // padding
    .float 0                        // padding
    .float 0                        // padding


================================================
FILE: linalg/arm64/arm64simd/dispatcher.tmpliq
================================================
// vim: ft=arm

.non_linear:
    sub         x0, x0, 40

.non_linear_loop:
    add         x0, x0, 40
    ldr         x2, [x0]

    mov         x4, #{{ jump_table | size }}

    cmp         x2, #{{ jump_table | size }}
    csel        x2, x2, x4, lt
    cmp         x2, #0
    csel        x2, x4, x2, lt

    adr         x3, .jmp_table
    add         x3, x3, x2, LSL#2
    br          x3

.jmp_table:
{% for j in jump_table %}
    b   .{{j}}
{% endfor %}
    b   .unsupported

    add x0, x2, #4000
    b .return

.unsupported:
    mov         x0, #1
    b           .return

.done:
    mov         x0, 0
    b           .return


================================================
FILE: linalg/benches/arm32neon.rs
================================================
#![feature(asm)]
#![allow(dead_code, non_upper_case_globals, unused_macros, non_snake_case, unused_assignments)]

use std::time::Instant;

macro_rules! r2 { ($($stat:stmt)*) => { $( $stat )* $( $stat )* } }
macro_rules! r4 { ($($stat:stmt)*) => { r2!(r2!($($stat)*)) }}
macro_rules! r8 { ($($stat:stmt)*) => { r4!(r2!($($stat)*)) }}
macro_rules! r16 { ($($stat:stmt)*) => { r4!(r4!($($stat)*)) }}
macro_rules! r32 { ($($stat:stmt)*) => { r8!(r4!($($stat)*)) }}
macro_rules! r64 { ($($stat:stmt)*) => { r8!(r8!($($stat)*)) }}
macro_rules! r128 { ($($stat:stmt)*) => { r8!(r16!($($stat)*)) }}
macro_rules! r1024 { ($($stat:stmt)*) => { r8!(r128!($($stat)*)) }}
macro_rules! r4096 { ($($stat:stmt)*) => { r4!(r1024!($($stat)*)) }}

const _F32: [f32; 1024] = [12.; 1024];
const F32: *const f32 = _F32.as_ptr();

/*
fn ruin_cache() {
let _a = (0..1000000).collect::<Vec<i32>>();
}
*/

macro_rules! b {
    ($f: block, $inner_loop: expr, $measures: expr) => {{
        let mut values = Vec::with_capacity($measures);
        for _ in 0..$measures {
            //       ruin_cache();
            let start = Instant::now();
            for _ in 0..$inner_loop {
                unsafe { $f };
            }
            values.push(start.elapsed());
        }
        values.sort();
        values[$measures / 2].as_nanos() as f64 / 1e9 / $inner_loop as f64
    }};
}

fn main() {
    let cycle = b!(
        {
            r1024!(asm!("orr r0, r0, r0", out("r0") _));
        },
        1000,
        1000
    ) / 1024.;
    let indep_fmla = b!(
        {
            r8!(asm!("
                vmla.f32 q0, q0, q0
                vmla.f32 q1, q1, q1
                vmla.f32 q2, q2, q2
                vmla.f32 q3, q3, q3
                vmla.f32 q4, q4, q4
                vmla.f32 q5, q5, q5
                vmla.f32 q6, q6, q6
                vmla.f32 q7, q7, q7
                 ", out("q0") _, out("q1") _, out("q2") _, out("q3") _, out("q4") _, out("q5") _, out("q6") _, out("q7") _));
        },
        1000,
        1000
    ) / 64.;
    eprintln!("rcp tp: indep fmla: {}", indep_fmla / cycle);
    let dep_accu_fmla = b!(
        {
            r16!(asm!("
                vmla.f32 q15, q0, q0
                vmla.f32 q15, q1, q1
                vmla.f32 q15, q2, q2
                vmla.f32 q15, q3, q3
                vmla.f32 q15, q4, q4
                vmla.f32 q15, q5, q5
                vmla.f32 q15, q6, q6
                vmla.f32 q15, q7, q7
                vmla.f32 q15, q8, q8
                vmla.f32 q15, q9, q9
                vmla.f32 q15, q10, q10
                vmla.f32 q15, q11, q11
                vmla.f32 q15, q12, q12
                vmla.f32 q15, q13, q13
                vmla.f32 q15, q14, q14
                 ", out("q0") _, out("q1") _, out("q2") _, out("q3") _, out("q4") _, out("q5") _, out("q6") _, out("q7") _,
                 out("q8") _, out("q9") _, out("q10") _, out("q11") _, out("q12") _, out("q13") _, out("q14") _, out("q15") _));
        },
        1000,
        1000
    ) / 16.
        / 15.;
    eprintln!("rcp tp: accu-dep fmla: {}", dep_accu_fmla / cycle);
    let load_s_using_vld1_64 = b!(
        {
            let mut p = F32;
            r16!(asm!("
                vld1.64         {{d0-d3}}, [{0}]!
                vld1.64         {{d4-d7}}, [{0}]!
                vld1.64         {{d8-d11}}, [{0}]!
                vld1.64         {{d12-d15}}, [{0}]!
                vld1.64         {{d16-d19}}, [{0}]!
                vld1.64         {{d20-d23}}, [{0}]!
                vld1.64         {{d24-d27}}, [{0}]!
                vld1.64         {{d28-d31}}, [{0}]!
                 ", 
                 inout(reg) p,
                 out("q0") _, out("q1") _, out("q2") _, out("q3") _, out("q4") _, out("q5") _, out("q6") _, out("q7") _,
                 out("q8") _, out("q9") _, out("q10") _, out("q11") _, out("q12") _, out("q13") _, out("q14") _, out("q15") _));
        },
        1000,
        1000
    ) / 16.
        / 64.; // each line load 8 s
    eprintln!("rcp tp: load s using vld1_64 ia {}", load_s_using_vld1_64 / cycle);
    let load_s_using_vldm_q = b!(
        {
            let mut p = F32;
            r16!(asm!("
                vldm            {0}!, {{q0-q3}}
                vldm            {0}!, {{q4-q7}}
                vldm            {0}!, {{q8-q11}}
                vldm            {0}!, {{q12-q15}}
                 ", 
                 inout(reg) p,
                 out("q0") _, out("q1") _, out("q2") _, out("q3") _, out("q4") _, out("q5") _, out("q6") _, out("q7") _,
                 out("q8") _, out("q9") _, out("q10") _, out("q11") _, out("q12") _, out("q13") _, out("q14") _, out("q15") _));
        },
        1000,
        1000
    ) / 16.
        / 64.;
    eprintln!("rcp tp: load s using vldmia q: {}", load_s_using_vldm_q / cycle);
    let load = b!(
        {
            let mut p = F32;
            r16!(asm!("
                vldr.64  d0, [{0}]
                vldr.64  d1, [{0}, #8]
                vldr.64  d2, [{0}, #16]
                vldr.64  d3, [{0}, #24]
                vldr.64  d4, [{0}, #32]
                vldr.64  d5, [{0}, #40]
                vldr.64  d6, [{0}, #48]
                vldr.64  d7, [{0}, #56]
                vldr.64  d8, [{0}, #64]
                vldr.64  d9, [{0}, #72]
                vldr.64  d10, [{0}, #80]
                vldr.64  d11, [{0}, #88]
                vldr.64  d12, [{0}, #96]
                vldr.64  d13, [{0}, #104]
                vldr.64  d14, [{0}, #112]
                vldr.64  d15, [{0}, #120]
                vldr.64  d16, [{0}, #128]
                vldr.64  d17, [{0}, #136]
                vldr.64  d18, [{0}, #144]
                vldr.64  d19, [{0}, #152]
                vldr.64  d20, [{0}, #160]
                vldr.64  d21, [{0}, #168]
                vldr.64  d22, [{0}, #176]
                vldr.64  d23, [{0}, #184]
                vldr.64  d24, [{0}, #192]
                vldr.64  d25, [{0}, #200]
                vldr.64  d26, [{0}, #208]
                vldr.64  d27, [{0}, #216]
                vldr.64  d28, [{0}, #224]
                vldr.64  d29, [{0}, #232]
                vldr.64  d30, [{0}, #240]
                vldr.64  d31, [{0}, #248]
                add {0}, #256
                 ", 
                 inout(reg) p,
                 out("q0") _, out("q1") _, out("q2") _, out("q3") _, out("q4") _, out("q5") _, out("q6") _, out("q7") _,
                 out("q8") _, out("q9") _, out("q10") _, out("q11") _, out("q12") _, out("q13") _, out("q14") _, out("q15") _));
        },
        1000,
        1000
    ) / 16.
        / 64.;
    eprintln!("rcp tp: load s using vldr d + imm: {}", load / cycle);
}


================================================
FILE: linalg/benches/arm64.rs
================================================
use std::time::Instant;

use tract_data::prelude::*;
use tract_linalg::LADatum;
use tract_linalg::frame::mmm::FusedSpec;
use tract_linalg::frame::mmm::MatMatMulKer;

fn ruin_cache() {
    let _a = (0..1000000).collect::<Vec<i32>>();
}

fn bench_to_nanos<T: LADatum + Copy + num_traits::Zero, K: MatMatMulKer<T>>(
    k: usize,
    loops: usize,
) -> f64 {
    let item_size = T::datum_type().size_of();
    let a = Tensor::zero_aligned::<T>(
        &[(k + K::end_padding_packed_a()) * K::mr()],
        K::alignment_bytes_packed_a(),
    )
    .unwrap();
    let b = Tensor::zero_aligned::<T>(
        &[(k + K::end_padding_packed_b()) * K::nr()],
        K::alignment_bytes_packed_b(),
    )
    .unwrap();
    let mut c = Tensor::zero::<T>(&[K::mr() * K::nr()]).unwrap();
    let ref a = InputStoreKer::Packed { ptr: unsafe { a.as_ptr_unchecked::<u8>() as _ } };
    let ref b = InputStoreKer::Packed { ptr: unsafe { b.as_ptr_unchecked::<u8>() as _ } };
    let ref c = OutputStoreKer {
        ptr: unsafe { c.as_ptr_mut_unchecked::<u8>() as _ },
        item_size,
        col_byte_stride: (item_size * K::mr()) as isize,
        row_byte_stride: item_size as isize,
    };
    let ref linear = LinearSpec::Mul { k };
    let op = MatMatMulKerSpec { a, b, c, linear, non_linear: std::ptr::null() };
    let mut values = Vec::with_capacity(loops);
    for _ in 0..loops {
        ruin_cache();
        let start = Instant::now();
        K::kernel(&op);
        values.push(start.elapsed());
    }
    values.sort();
    values[loops / 2].as_nanos() as f64
}

fn model<T: Datum + Copy + num_traits::Zero, K: MatMatMulKer<T>>() -> (f64, f64) {
    let x = 1000;
    let zp = bench_to_nanos::<T, K>(0, 10000);
    let y = bench_to_nanos::<T, K>(x, 1000);
    let slope = (y - zp) / x as f64;
    (slope, zp)
}

fn as_match_line<T: Datum + Copy + num_traits::Zero, K: MatMatMulKer<T>>() {
    let coeffs = model::<T, K>();
    println!(
        "({:?}, {}, {}) => {} * k + {},",
        K::name(),
        K::mr(),
        K::nr(),
        (coeffs.0 * 1000.).round(),
        (coeffs.1 * 1000.).round()
    );
}

fn main() {
    use tract_linalg::arm64::*;
    as_match_line::<f32, MatMatMulF32x16x4>();
    as_match_line::<f32, MatMatMulF32x12x8>();
    as_match_line::<f32, MatMatMulF32x8x8>();
    as_match_line::<f32, MatMatMulF32x16x4A53>();
    as_match_line::<f32, MatMatMulF32x12x8A53>();
    as_match_line::<f32, MatMatMulF32x8x8A53>();
}


================================================
FILE: linalg/benches/arm64simd.rs
================================================
#![allow(dead_code, non_upper_case_globals, unused_macros, non_snake_case, unused_assignments)]

use std::arch::asm;

// mod nano;

#[repr(C, align(8))]
struct Floats([f32; 4096]);
const _F32: Floats = Floats([12.; 4096]);
const F32: *const f32 = (&_F32) as *const Floats as *const f32;

lazy_static::lazy_static! {
    static ref TICK: f64 = unsafe { b8192!(asm!("orr x20, x20, x20", out("x20") _)) };
}

pub unsafe fn armv8(filter: Option<&str>) {
    macro_rules! s32 {
        ($label: literal, $n: expr, $stmt:block) => {
            if $label.contains(filter.unwrap_or("")) {
                println!("{:40} {:.2}", $label, b32!($stmt) / $n as f64 / *TICK);
            }
        };
    }

    macro_rules! s128 {
        ($label: literal, $n: expr, $stmt:block) => {
            if $label.contains(filter.unwrap_or("")) {
                println!("{:40} {:.2}", $label, b128!($stmt) / $n as f64 / *TICK);
            }
        };
    }

    macro_rules! s1024 {
        ($label: literal, $n: expr, $stmt:block) => {
            if $label.contains(filter.unwrap_or("")) {
                println!("{:40} {:.2}", $label, b1024!($stmt) / $n as f64 / *TICK);
            }
        };
    }

    macro_rules! s8192 {
        ($label: literal, $n: expr, $stmt:block) => {
            if $label.contains(filter.unwrap_or("")) {
                println!("{:40} {:.2}", $label, b8192!($stmt) / $n as f64 / *TICK);
            }
        };
    }

    s128!("nop", 1, { asm!("nop") });
    s128!("vands", 4, {
        asm!("  and v0.16b, v1.16b, v1.16b
                and v2.16b, v3.16b, v3.16b
                and v4.16b, v5.16b, v5.16b
                and v6.16b, v7.16b, v7.16b ",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        )
    });
    s128!("fmax", 4, {
        asm!("  fmax v0.4s, v1.4s, v1.4s
                fmax v2.4s, v3.4s, v3.4s
                fmax v4.4s, v5.4s, v5.4s
                fmax v6.4s, v7.4s, v7.4s ",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        )
    });
    s128!("fmax_with_dep", 1, { asm!("fmax v0.4s, v0.4s, v0.4s", out("v0") _) });
    s128!("fmla", 16, {
        asm!(" fmla v0.4s, v0.4s, v0.4s
               fmla v1.4s, v1.4s, v1.4s
               fmla v2.4s, v2.4s, v2.4s
               fmla v3.4s, v3.4s, v3.4s
               fmla v4.4s, v4.4s, v4.4s
               fmla v5.4s, v5.4s, v5.4s
               fmla v6.4s, v6.4s, v6.4s
               fmla v7.4s, v7.4s, v7.4s
               fmla v8.4s, v8.4s, v8.4s
               fmla v9.4s, v9.4s, v9.4s
               fmla v10.4s,v10.4s,v10.4s
               fmla v11.4s,v11.4s,v11.4s
               fmla v12.4s,v12.4s,v12.4s
               fmla v13.4s,v13.4s,v13.4s
               fmla v14.4s,v14.4s,v14.4s
               fmla v15.4s,v15.4s,v15.4s ",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        )
    });

    s128!("fmla_with_dep", 1, { asm!("fmla v0.4s, v0.4s, v0.4s", out("v0") _) });
    s32!("w_load", 64, {
        let mut p = F32;
        r8!(asm!("ldr w20, [{0}]
                   ldr w21, [{0}]
                   ldr w22, [{0}]
                   ldr w23, [{0}]
                   ldr w24, [{0}]
                   ldr w25, [{0}]
                   ldr w26, [{0}]
                   ldr w27, [{0}]",
        inout(reg) p,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        ));
    });
    s32!("x_load", 64, {
        let mut p = F32;
        r8!(asm!("
           ldr x20, [{0}]
           ldr x21, [{0}]
           ldr x22, [{0}]
           ldr x23, [{0}]
           ldr x24, [{0}]
           ldr x25, [{0}]
           ldr x26, [{0}]
           ldr x27, [{0}]
           ",
        inout(reg) p,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        ));
    });
    s32!("d_load", 64, {
        let mut p = F32;
        r8!(asm!("
       ldr d20, [{0}]
       ldr d21, [{0}]
       ldr d22, [{0}]
       ldr d23, [{0}]
       ldr d24, [{0}]
       ldr d25, [{0}]
       ldr d26, [{0}]
       ldr d27, [{0}]
       ",
        inout(reg) p,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        out("v24") _, out("v25") _, out("v26") _, out("v27") _,
        ));
    });
    s32!("s_load", 64, {
        let mut p = F32;
        r8!(asm!("
       ld1 {{v20.s}}[0], [{0}]
       ld1 {{v21.s}}[0], [{0}]
       ld1 {{v22.s}}[0], [{0}]
       ld1 {{v23.s}}[0], [{0}]
       ld1 {{v24.s}}[0], [{0}]
       ld1 {{v25.s}}[0], [{0}]
       ld1 {{v26.s}}[0], [{0}]
       ld1 {{v27.s}}[0], [{0}]
       ",
        inout(reg) p,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        out("v24") _, out("v25") _, out("v26") _, out("v27") _,
        ));
    });
    s32!("d_load_as_v", 64, {
        let mut p = F32;
        r8!(asm!("
       ld1 {{v20.d}}[0], [{0}]
       ld1 {{v21.d}}[0], [{0}]
       ld1 {{v22.d}}[0], [{0}]
       ld1 {{v23.d}}[0], [{0}]
       ld1 {{v24.d}}[0], [{0}]
       ld1 {{v25.d}}[0], [{0}]
       ld1 {{v26.d}}[0], [{0}]
       ld1 {{v27.d}}[0], [{0}]
       ",
        inout(reg) p,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        out("v24") _, out("v25") _, out("v26") _, out("v27") _,
        ));
    });
    s32!("v_load", 64, {
        let mut p = F32;
        r8!(asm!("
       ld1 {{v20.4s}}, [{0}]
       ld1 {{v21.4s}}, [{0}]
       ld1 {{v22.4s}}, [{0}]
       ld1 {{v23.4s}}, [{0}]
       ld1 {{v24.4s}}, [{0}]
       ld1 {{v25.4s}}, [{0}]
       ld1 {{v26.4s}}, [{0}]
       ld1 {{v27.4s}}, [{0}]
       ",
        inout(reg) p,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        out("v24") _, out("v25") _, out("v26") _, out("v27") _,
        ));
    });
    s32!("v2_load", 64, {
        let mut p = F32;
        r8!(asm!("
                     ld1 {{v0.4s, v1.4s}}, [{0}]
                     ld1 {{v2.4s, v3.4s}}, [{0}]
                     ld1 {{v4.4s, v5.4s}}, [{0}]
                     ld1 {{v6.4s, v7.4s}}, [{0}]
                     ld1 {{v8.4s, v9.4s}}, [{0}]
                     ld1 {{v10.4s, v11.4s}}, [{0}]
                     ld1 {{v12.4s, v13.4s}}, [{0}]
                     ld1 {{v14.4s, v15.4s}}, [{0}]
       ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("v3_load", 32, {
        let mut p = F32;
        r8!(asm!("
           ld1 {{v0.4s, v1.4s, v2.4s}}, [{0}]
           ld1 {{v3.4s, v4.4s, v5.4s}}, [{0}]
           ld1 {{v6.4s, v7.4s, v8.4s}}, [{0}]
           ld1 {{v9.4s, v10.4s, v11.4s}}, [{0}]
       ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        ));
    });
    s32!("v4_load", 32, {
        let mut p = F32;
        r8!(asm!("
           ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{0}]
           ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{0}]
           ld1 {{v8.4s, v9.4s, v10.4s, v11.4s}}, [{0}]
           ld1 {{v12.4s, v13.4s, v14.4s, v15.4s}}, [{0}]
       ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("ins_32b", 64, {
        r8!(asm!("
           ins v8.s[0], w20
           ins v9.s[0], w20
           ins v10.s[0], w20
           ins v11.s[0], w20
           ins v12.s[0], w20
           ins v13.s[0], w20
           ins v14.s[0], w20
           ins v15.s[0], w20
       ",
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("ins_32b_same_lane", 128, {
        r8!(asm!("
           ins         v0.s[0], w20
           ins         v1.s[0], w20
           ins         v4.s[0], w20
           ins         v5.s[0], w20
           ins         v0.s[1], w20
           ins         v1.s[1], w20
           ins         v4.s[1], w20
           ins         v5.s[1], w20
           ins         v0.s[2], w20
           ins         v1.s[2], w20
           ins         v4.s[2], w20
           ins         v5.s[2], w20
           ins         v0.s[3], w20
           ins         v1.s[3], w20
           ins         v4.s[3], w20
           ins         v5.s[3], w20
       ",
        out("v0") _, out("v1") _, out("v4") _, out("v5") _,
        ));
    });
    s32!("ins_64b", 64, {
        r8!(asm!("
           ins v8.d[0], x20
           ins v9.d[0], x20
           ins v10.d[0], x20
           ins v11.d[0], x20
           ins v12.d[0], x20
           ins v13.d[0], x20
           ins v14.d[0], x20
           ins v15.d[0], x20
       ",
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("ins_64b_same_v", 64, {
        r8!(asm!("
                     ins v8.d[0], x20
                     ins v8.d[1], x20
                     ins v8.d[0], x20
                     ins v8.d[1], x20
                     ins v8.d[0], x20
                     ins v8.d[1], x20
                     ins v8.d[0], x20
                     ins v8.d[1], x20
                     ",
        out("v8") _,
        ));
    });
    s32!("ins_64b_from_v", 64, {
        r8!(asm!("
                     ins v8.d[0], v9.d[0]
                     ins v8.d[1], v9.d[0]
                     ins v8.d[0], v9.d[1]
                     ins v8.d[1], v9.d[1]
                     ins v8.d[0], v9.d[0]
                     ins v8.d[1], v9.d[0]
                     ins v8.d[0], v9.d[1]
                     ins v8.d[1], v9.d[1]
                     ",
        out("v8") _,
        ));
    });
    s32!("fmla_with_prfm", 64, {
        let mut p = F32;
        r8!(asm!("
           prfm pldl1keep, [{0}, #256]
           fmla v0.4s, v0.4s, v0.4s
           prfm pldl1keep, [{0}, #320]
           fmla v1.4s, v1.4s, v1.4s
           prfm pldl1keep, [{0}, #384]
           fmla v2.4s, v2.4s, v2.4s
           prfm pldl1keep, [{0}, #448]
           fmla v3.4s, v3.4s, v3.4s
           prfm pldl1keep, [{0}, #512]
           fmla v4.4s, v4.4s, v4.4s
           prfm pldl1keep, [{0}, #576]
           fmla v5.4s, v5.4s, v5.4s
           prfm pldl1keep, [{0}, #640]
           fmla v6.4s, v6.4s, v6.4s
           prfm pldl1keep, [{0}, #704]
           fmla v7.4s, v7.4s, v7.4s
           prfm pldl1keep, [{0}, #768]
           ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        ));
    });
    s32!("fmla_with_w_load", 64, {
        let mut p = F32;
        r8!(asm!("
           ldr w20, [{0}]
           fmla v0.4s, v0.4s, v0.4s
           ldr w21, [{0}]
           fmla v1.4s, v1.4s, v1.4s
           ldr w22, [{0}]
           fmla v2.4s, v2.4s, v2.4s
           ldr w23, [{0}]
           fmla v3.4s, v3.4s, v3.4s
           ldr w24, [{0}]
           fmla v4.4s, v4.4s, v4.4s
           ldr w25, [{0}]
           fmla v5.4s, v5.4s, v5.4s
           ldr w26, [{0}]
           fmla v6.4s, v6.4s, v6.4s
           ldr w27, [{0}]
           fmla v7.4s, v7.4s, v7.4s
           ",
        inout(reg) p,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        ));
    });
    s32!("fmla_with_w_load_inc", 64, {
        let mut p = F32;
        r8!(asm!("
                     ldr w20, [{0}], #4
                     fmla v0.4s, v0.4s, v0.4s
                     ldr w21, [{0}], #4
                     fmla v1.4s, v1.4s, v1.4s
                     ldr w22, [{0}], #4
                     fmla v2.4s, v2.4s, v2.4s
                     ldr w23, [{0}], #4
                     fmla v3.4s, v3.4s, v3.4s
                     ldr w24, [{0}], #4
                     fmla v4.4s, v4.4s, v4.4s
                     ldr w25, [{0}], #4
                     fmla v5.4s, v5.4s, v5.4s
                     ldr w26, [{0}], #4
                     fmla v6.4s, v6.4s, v6.4s
                     ldr w27, [{0}], #4
                     fmla v7.4s, v7.4s, v7.4s
                     ",
        inout(reg) p,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        ));
    });
    s32!("fmla_with_w_load_inc_alt", 64, {
        let mut p = F32;
        let mut q = F32;
        r8!(asm!("
                     ldr w20, [{0}], #4
                     fmla v0.4s, v0.4s, v0.4s
                     ldr w21, [{1}], #4
                     fmla v1.4s, v1.4s, v1.4s
                     ldr w22, [{0}], #4
                     fmla v2.4s, v2.4s, v2.4s
                     ldr w23, [{1}], #4
                     fmla v3.4s, v3.4s, v3.4s
                     ldr w24, [{0}], #4
                     fmla v4.4s, v4.4s, v4.4s
                     ldr w25, [{1}], #4
                     fmla v5.4s, v5.4s, v5.4s
                     ldr w26, [{0}], #4
                     fmla v6.4s, v6.4s, v6.4s
                     ldr w27, [{1}], #4
                     fmla v7.4s, v7.4s, v7.4s
                     ",
        inout(reg) p, inout(reg) q,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        ));
    });
    s32!("fmla_with_w_load_offset", 64, {
        let mut p = F32;
        r8!(asm!("
                     ldr w20, [{0}]
                     fmla v0.4s, v0.4s, v0.4s
                     ldr w21, [{0}, #4]
                     fmla v1.4s, v1.4s, v1.4s
                     ldr w22, [{0}, #8]
                     fmla v2.4s, v2.4s, v2.4s
                     ldr w23, [{0}, #12]
                     fmla v3.4s, v3.4s, v3.4s
                     ldr w24, [{0}, #16]
                     fmla v4.4s, v4.4s, v4.4s
                     ldr w25, [{0}, #20]
                     fmla v5.4s, v5.4s, v5.4s
                     ldr w26, [{0}, #24]
                     fmla v6.4s, v6.4s, v6.4s
                     ldr w27, [{0}, #28]
                     fmla v7.4s, v7.4s, v7.4s
                     ",
        inout(reg) p,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        ));
    });
    s32!("fmla_with_x_load", 64, {
        let mut p = F32;
        r8!(asm!("
                     fmla v0.4s, v0.4s, v0.4s
                     ldr x20, [{0}]
                     fmla v1.4s, v1.4s, v1.4s
                     ldr x21, [{0}]
                     fmla v2.4s, v2.4s, v2.4s
                     ldr x22, [{0}]
                     fmla v3.4s, v3.4s, v3.4s
                     ldr x23, [{0}]
                     fmla v4.4s, v4.4s, v4.4s
                     ldr x24, [{0}]
                     fmla v5.4s, v5.4s, v5.4s
                     ldr x25, [{0}]
                     fmla v6.4s, v6.4s, v6.4s
                     ldr x26, [{0}]
                     fmla v7.4s, v7.4s, v7.4s
                     ldr x27, [{0}]
                     ",
        inout(reg) p,
        out("x20") _, out("x21") _, out("x22") _, out("x23") _,
        out("x24") _, out("x25") _, out("x26") _, out("x27") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        ));
    });
    s32!("fmla_with_s_load", 64, {
        let mut p = F32;
        r8!(asm!("
                     ldr s16, [{0}]
                     fmla v0.4s, v0.4s, v0.4s
                     ldr s17, [{0}]
                     fmla v1.4s, v1.4s, v1.4s
                     ldr s18, [{0}]
                     fmla v2.4s, v2.4s, v2.4s
                     ldr s19, [{0}]
                     fmla v3.4s, v3.4s, v3.4s
                     ldr s20, [{0}]
                     fmla v4.4s, v4.4s, v4.4s
                     ldr s21, [{0}]
                     fmla v5.4s, v5.4s, v5.4s
                     ldr s22, [{0}]
                     fmla v6.4s, v6.4s, v6.4s
                     ldr s23, [{0}]
                     fmla v7.4s, v7.4s, v7.4s
                     ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("fmla_with_d_load", 64, {
        let mut p = F32;
        r8!(asm!("
                     ldr d16, [{0}]
                     fmla v0.4s, v0.4s, v0.4s
                     ldr d17, [{0}]
                     fmla v1.4s, v1.4s, v1.4s
                     ldr d18, [{0}]
                     fmla v2.4s, v2.4s, v2.4s
                     ldr d19, [{0}]
                     fmla v3.4s, v3.4s, v3.4s
                     ldr d20, [{0}]
                     fmla v4.4s, v4.4s, v4.4s
                     ldr d21, [{0}]
                     fmla v5.4s, v5.4s, v5.4s
                     ldr d22, [{0}]
                     fmla v6.4s, v6.4s, v6.4s
                     ldr d23, [{0}]
                     fmla v7.4s, v7.4s, v7.4s
                     ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        out("v16") _, out("v17") _, out("v18") _, out("v19") _,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        ));
    });
    s32!("fmla_with_d_load_as_v", 64, {
        let mut p = F32;
        r8!(asm!("
                     fmla v0.4s, v0.4s, v0.4s
                     ld1 {{ v9.d }}[0], [{0}]
                     fmla v1.4s, v1.4s, v1.4s
                     ld1 {{ v10.d }}[0], [{0}]
                     fmla v2.4s, v2.4s, v2.4s
                     ld1 {{ v11.d }}[0], [{0}]
                     fmla v3.4s, v3.4s, v3.4s
                     ld1 {{ v12.d }}[0], [{0}]
                     fmla v4.4s, v4.4s, v4.4s
                     ld1 {{ v13.d }}[0], [{0}]
                     fmla v5.4s, v5.4s, v5.4s
                     ld1 {{ v14.d }}[0], [{0}]
                     fmla v6.4s, v6.4s, v6.4s
                     ld1 {{ v15.d }}[0], [{0}]
                     fmla v7.4s, v7.4s, v7.4s
                     ld1 {{ v16.d }}[0], [{0}]
                     ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("fmla_with_v_load", 64, {
        let mut p = F32;
        r8!(asm!("
                     fmla v0.4s, v0.4s, v0.4s
                     ld1 {{ v9.4s }}, [{0}]
                     fmla v1.4s, v1.4s, v1.4s
                     ld1 {{ v10.4s }}, [{0}]
                     fmla v2.4s, v2.4s, v2.4s
                     ld1 {{ v11.4s }}, [{0}]
                     fmla v3.4s, v3.4s, v3.4s
                     ld1 {{ v12.4s }}, [{0}]
                     fmla v4.4s, v4.4s, v4.4s
                     ld1 {{ v13.4s }}, [{0}]
                     fmla v5.4s, v5.4s, v5.4s
                     ld1 {{ v14.4s }}, [{0}]
                     fmla v6.4s, v6.4s, v6.4s
                     ld1 {{ v15.4s }}, [{0}]
                     fmla v7.4s, v7.4s, v7.4s
                     ld1 {{ v16.4s }}, [{0}]
                     ",
        inout(reg) p,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("fmla_with_ins_32b", 64, {
        r8!(asm!("
                     fmla v0.4s, v0.4s, v0.4s
                     ins v8.s[0], w20
                     fmla v1.4s, v1.4s, v1.4s
                     ins v9.s[0], w20
                     fmla v2.4s, v2.4s, v2.4s
                     ins v10.s[0], w20
                     fmla v3.4s, v3.4s, v3.4s
                     ins v11.s[0], w20
                     fmla v4.4s, v4.4s, v4.4s
                     ins v12.s[0], w20
                     fmla v5.4s, v5.4s, v5.4s
                     ins v13.s[0], w20
                     fmla v6.4s, v6.4s, v6.4s
                     ins v14.s[0], w20
                     fmla v7.4s, v7.4s, v7.4s
                     ins v15.s[0], w20
                     ",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        out("x20") _,
        ));
    });
    s32!("fmla_with_ins_64b", 64, {
        r8!(asm!("
                     fmla v0.4s, v0.4s, v0.4s
                     ins v8.d[0], x20
                     fmla v1.4s, v1.4s, v1.4s
                     ins v9.d[0], x20
                     fmla v2.4s, v2.4s, v2.4s
                     ins v10.d[0], x20
                     fmla v3.4s, v3.4s, v3.4s
                     ins v11.d[0], x20
                     fmla v4.4s, v4.4s, v4.4s
                     ins v12.d[0], x20
                     fmla v5.4s, v5.4s, v5.4s
                     ins v13.d[0], x20
                     fmla v6.4s, v6.4s, v6.4s
                     ins v14.d[0], x20
                     fmla v7.4s, v7.4s, v7.4s
                     ins v15.d[0], x20
                     ",
        out("x20") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("fmla_with_ins_64b_cross_parity", 64, {
        r8!(asm!("
                     fmla v0.4s, v0.4s, v0.4s
                     ins v9.d[0], x20
                     fmla v1.4s, v1.4s, v1.4s
                     ins v10.d[0], x20
                     fmla v2.4s, v2.4s, v2.4s
                     ins v11.d[0], x20
                     fmla v3.4s, v6.4s, v3.4s
                     ins v12.d[0], x20
                     fmla v4.4s, v4.4s, v4.4s
                     ins v13.d[0], x20
                     fmla v5.4s, v5.4s, v5.4s
                     ins v14.d[0], x20
                     fmla v6.4s, v6.4s, v6.4s
                     ins v15.d[0], x20
                     fmla v7.4s, v7.4s, v7.4s
                     ins v8.d[0], x20
                     ",
        out("x20") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("ins_32b_with_load_s", 64, {
        let mut p = F32;
        r8!(asm!("
                     ldr s0, [{0}]
                     ins v8.d[0], x20
                     ldr s1, [{0}]
                     ins v9.d[0], x20
                     ldr s2, [{0}]
                     ins v10.d[0], x20
                     ldr s3, [{0}]
                     ins v11.d[0], x20
                     ldr s4, [{0}]
                     ins v12.d[0], x20
                     ldr s5, [{0}]
                     ins v13.d[0], x20
                     ldr s6, [{0}]
                     ins v14.d[0], x20
                     ldr s7, [{0}]
                     ins v15.d[0], x20
                     ",
        inout(reg) p,
        out("x20") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
    s32!("ins_32b_with_load_s_cross_parity", 64, {
        let mut p = F32;
        r8!(asm!("
                     ldr s0, [{0}]
                     ins v9.d[0], x20
                     ldr s1, [{0}]
                     ins v10.d[0], x20
                     ldr s2, [{0}]
                     ins v11.d[0], x20
                     ldr s3, [{0}]
                     ins v12.d[0], x20
                     ldr s4, [{0}]
                     ins v13.d[0], x20
                     ldr s5, [{0}]
                     ins v14.d[0], x20
                     ldr s6, [{0}]
                     ins v15.d[0], x20
                     ldr s7, [{0}]
                     ins v8.d[0], x20
                     ",
        inout(reg) p,
        out("x20") _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        ));
    });
}

fn has_asimdhp() -> bool {
    std::fs::read_to_string("/proc/cpuinfo").unwrap().contains("asimdhp")
}

#[target_feature(enable = "fp16")]
pub unsafe fn asimdhp(filter: Option<&str>) {
    macro_rules! s32 {
        ($label: literal, $n: expr, $stmt:block) => {
            if $label.contains(filter.unwrap_or("")) {
                println!("{:40} {:.2}", $label, b32!($stmt) / $n as f64 / *TICK);
            }
        };
    }

    s32!("fmlahp", 16, {
        asm!(" fmla v0.8h, v0.8h, v0.8h
               fmla v1.8h, v1.8h, v1.8h
               fmla v2.8h, v2.8h, v2.8h
               fmla v3.8h, v3.8h, v3.8h
               fmla v4.8h, v4.8h, v4.8h
               fmla v5.8h, v5.8h, v5.8h
               fmla v6.8h, v6.8h, v6.8h
               fmla v7.8h, v7.8h, v7.8h
               fmla v8.8h, v8.8h, v8.8h
               fmla v9.8h, v9.8h, v9.8h
               fmla v10.8h,v10.8h,v10.8h
               fmla v11.8h,v11.8h,v11.8h
               fmla v12.8h,v12.8h,v12.8h
               fmla v13.8h,v13.8h,v13.8h
               fmla v14.8h,v14.8h,v14.8h
               fmla v15.8h,v15.8h,v15.8h ",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        )
    });

    s32!("fcvt", 16, {
        asm!(" fcvtn v0.4h,  v0.4s
               fcvtn v1.4h,  v1.4s 
               fcvtn v2.4h,  v2.4s 
               fcvtn v3.4h,  v3.4s 
               fcvtn v4.4h,  v4.4s 
               fcvtn v5.4h,  v5.4s 
               fcvtn v6.4h,  v6.4s 
               fcvtn v7.4h,  v7.4s 
               fcvtn v8.4h,  v8.4s 
               fcvtn v9.4h,  v9.4s 
               fcvtn v10.4h, v10.4s
               fcvtn v11.4h, v11.4s
               fcvtn v12.4h, v12.4s
               fcvtn v13.4h, v13.4s
               fcvtn v14.4h, v14.4s
               fcvtn v15.4h, v15.4s",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        )
    });

    s32!("fcvt2", 16, {
        asm!(" fcvtn2 v0.8h,  v0.4s
               fcvtn2 v1.8h,  v1.4s 
               fcvtn2 v2.8h,  v2.4s 
               fcvtn2 v3.8h,  v3.4s 
               fcvtn2 v4.8h,  v4.4s 
               fcvtn2 v5.8h,  v5.4s 
               fcvtn2 v6.8h,  v6.4s 
               fcvtn2 v7.8h,  v7.4s 
               fcvtn2 v8.8h,  v8.4s 
               fcvtn2 v9.8h,  v9.4s 
               fcvtn2 v10.8h, v10.4s
               fcvtn2 v11.8h, v11.4s
               fcvtn2 v12.8h, v12.4s
               fcvtn2 v13.8h, v13.4s
               fcvtn2 v14.8h, v14.4s
               fcvtn2 v15.8h, v15.4s",
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        )
    });

    s32!("fmlahp_with_dep", 1, { asm!("fmla v0.8h, v0.8h, v0.8h", out("v0") _) });
    s32!("fcvtn_with_dep", 1, { asm!("fcvtn v0.4h, v0.4s", out("v0") _) });
    s32!("fcvtn2_with_dep", 1, { asm!("fcvtn2 v0.8h, v0.4s", out("v0") _) });
}

macro_rules! ksimd {
    ($filter: expr, $vector_size: expr, $geo: literal, $n: expr, $path: literal) => {
        kloop!($filter, $vector_size, $geo, $n, "arm64simd", $path)
    };
}

macro_rules! kfp16 {
    ($filter: expr, $vector_size: expr, $geo: literal, $n: expr, $path: literal) => {
        kloop!($filter, $vector_size, $geo, $n, "arm64fp16", $path)
    };
}

macro_rules! kloop {
    ($filter: expr, $vector_size: expr, $geo: literal, $n: expr, $dir: literal, $path: literal) => {
        let label = $path.split("/").last().unwrap().split_once(".").unwrap().0;
        let full_label = format!("{:8} {:40}", $geo, label);
        if full_label.contains($filter.unwrap_or("")) {
            let time = b2!({
                let mut p = F32;
                let mut q = F32;
                r4!(asm!(include_str!(concat!("../arm64/", $dir, "/", $path)),
                inout("x1") p, inout("x2") q, out("x3") _,
                out("x4") _, out("x5") _, out("x6") _, out("x7") _,
                out("x8") _, out("x9") _, out("x10") _, out("x11") _,
                out("x12") _, out("x13") _, out("x14") _, out("x15") _,
                out("x20") _, out("x21") _, out("x22") _, out("x23") _,
                out("x24") _, out("x25") _, out("x26") _, out("x27") _,
                out("v0") _, out("v1") _, out("v2") _, out("v3") _,
                out("v4") _, out("v5") _, out("v6") _, out("v7") _,
                out("v8") _, out("v9") _, out("v10") _, out("v11") _,
                out("v12") _, out("v13") _, out("v14") _, out("v15") _,
                out("v16") _, out("v17") _, out("v18") _, out("v19") _,
                out("v20") _, out("v21") _, out("v22") _, out("v23") _,
                out("v24") _, out("v25") _, out("v26") _, out("v27") _,
                out("v28") _, out("v29") _, out("v30") _, out("v31") _,
                ));
            }) / 4.;
            println!("{} {:3.0}% ({:0.2}/{} cy)", full_label, $n as f64 / $vector_size as f64 / time * 100. * *TICK, time / *TICK, $n as f64 / $vector_size as f64);
        }
    }
}

unsafe fn f32_8x8(f: Option<&str>) {
    ksimd!(f, 4, "8x8x1xf32", 64, "arm64simd_mmm_f32_8x8/packed_packed_loop1/naive.tmpli");
    ksimd!(f, 4, "8x8x1xf32", 64, "arm64simd_mmm_f32_8x8/packed_packed_loop1/broken_chains.tmpli");
    ksimd!(
        f,
        4,
        "8x8x1xf32",
        64,
        "arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_x_no_preload.tmpli"
    );
    ksimd!(f, 4, "8x8x1xf32", 64, "arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_x_preload.tmpli");
    ksimd!(
        f,
        4,
        "8x8x1xf32",
        64,
        "arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_w_no_preload.tmpli"
    );
    ksimd!(f, 4, "8x8x1xf32", 64, "arm64simd_mmm_f32_8x8/packed_packed_loop1/ldr_w_preload.tmpli");
    ksimd!(f, 4, "8x8x2xf32", 128, "arm64simd_mmm_f32_8x8/packed_packed_loop2/broken_chains.tmpli");
    ksimd!(f, 4, "8x8x2xf32", 128, "arm64simd_mmm_f32_8x8/packed_packed_loop2/cortex_a55.tmpli");
}

unsafe fn f32_12x8(f: Option<&str>) {
    ksimd!(f, 4, "12x8x1xf32", 96, "arm64simd_mmm_f32_12x8/packed_packed_loop1/naive.tmpli");
    ksimd!(
        f,
        4,
        "12x8x1xf32",
        96,
        "arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_w_no_preload.tmpli"
    );
    ksimd!(
        f,
        4,
        "12x8x1xf32",
        96,
        "arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_w_preload.tmpli"
    );
    ksimd!(
        f,
        4,
        "12x8x1xf32",
        96,
        "arm64simd_mmm_f32_12x8/packed_packed_loop1/ldr_x_preload.tmpli"
    );
    ksimd!(f, 4, "12x8x2xf32", 192, "arm64simd_mmm_f32_12x8/packed_packed_loop2/cortex_a55.tmpli");
}

unsafe fn f32_16x4(f: Option<&str>) {
    ksimd!(f, 4, "16x4x1xf32", 64, "arm64simd_mmm_f32_16x4/packed_packed_loop1/naive.tmpli");
    ksimd!(f, 4, "16x4x1xf32", 64, "arm64simd_mmm_f32_16x4/packed_packed_loop1/cortex_a53.tmpli");
    ksimd!(f, 4, "16x4x2xf32", 128, "arm64simd_mmm_f32_16x4/packed_packed_loop2/cortex_a55.tmpli");
}

unsafe fn f32_24x4(f: Option<&str>) {
    ksimd!(f, 4, "24x4x1xf32", 96, "arm64simd_mmm_f32_24x4/packed_packed_loop1/naive.tmpli");
    ksimd!(f, 4, "24x4x1xf32", 96, "arm64simd_mmm_f32_24x4/packed_packed_loop1/cortex_a53.tmpli");
    ksimd!(f, 4, "24x4x1xf32", 96, "arm64simd_mmm_f32_24x4/packed_packed_loop1/cortex_a55.tmpli");
}

unsafe fn f32_64x1(f: Option<&str>) {
    ksimd!(f, 4, "64x1x1xf32", 64, "arm64simd_mmm_f32_64x1/loop1/naive.tmpli");
    ksimd!(f, 4, "64x1x1xf32", 64, "arm64simd_mmm_f32_64x1/loop1/cortex_a53.tmpli");
    ksimd!(f, 4, "64x1x2xf32", 128, "arm64simd_mmm_f32_64x1/loop2/naive.tmpli");
    ksimd!(f, 4, "64x1x2xf32", 128, "arm64simd_mmm_f32_64x1/loop2/cortex_a55.tmpli");
}

// RUSTFLAGS="-C target-feature=+fp16" cargo +nightly dinghy -d khadas-paris bench --bench arm64simd
#[target_feature(enable = "fp16")]
unsafe fn f16_16x8(f: Option<&str>) {
    kfp16!(f, 8, "16x8x1xf16", 128, "arm64fp16_mmm_f16_16x8/loop1/naive.tmpli");
    kfp16!(f, 8, "16x8x2xf16", 256, "arm64fp16_mmm_f16_16x8/loop2/cortex_a55.tmpli");
    kfp16!(f, 8, "32x4x1xf16", 128, "arm64fp16_mmm_f16_32x4/loop1/naive.tmpli");
    kfp16!(f, 8, "32x4x2xf16", 256, "arm64fp16_mmm_f16_32x4/loop2/cortex_a55.tmpli");
}

fn main() {
    println!("freq {:.2}GHz\n", 1e-9 / *TICK);

    let filter = std::env::args().skip(1).filter(|a| a != "--bench").next();
    unsafe {
        armv8(filter.as_deref());
        if has_asimdhp() {
            asimdhp(filter.as_deref());
        }
        f32_8x8(filter.as_deref());
        f32_12x8(filter.as_deref());
        f32_16x4(filter.as_deref());
        f32_24x4(filter.as_deref());
        f32_64x1(filter.as_deref());
        f16_16x8(filter.as_deref());
    }
}


================================================
FILE: linalg/benches/intel.rs
================================================
#![allow(dead_code)]
use std::time::Instant;

use tract_data::prelude::*;
use tract_linalg::frame::mmm::*;

fn ruin_cache() {
    // return;
    let _a = (0..1000000).collect::<Vec<i32>>();
}

pub fn reference<T, K>(mr: usize, k: usize, nr: usize) -> Vec<f32>
where
    T: Datum + Copy + num_traits::Zero + tract_linalg::LADatum,
    K: MatMatMulKer<T>,
{
    let mut vi = vec![0.0; k * nr];

    for m in 0..mr {
        for n in 0..nr {
            for _ in 0..k {
                let a: f32 = 1.0;
                let b = 1.0;
                let offset = { n + m * nr };
                vi[offset] += a * b;
            }
        }
    }
    vi
}

fn bench_to_nanos<
    T: Datum + Copy + num_traits::Zero + tract_linalg::LADatum,
    K: MatMatMulKer<T>,
>(
    loops: usize,
    m: usize,
    n: usize,
    k: usize,
) -> f64 {
    let kernel = K::mmm();

    let mut a = Tensor::zero_aligned::<T>(
        &[(k + K::end_padding_packed_a()) * m],
        K::alignment_bytes_packed_a(),
    )
    .unwrap();

    let mut a_plain = a.try_as_plain_mut().unwrap();
    let mut v = a_plain.to_array_view_mut::<f32>().unwrap();
    v += 1.0;
    drop(v);
    drop(a_plain);
    let mut b = Tensor::zero_aligned::<T>(
        &[(k + K::end_padding_packed_b()) * n],
        K::alignment_bytes_packed_b(),
    )
    .unwrap();

    let mut b_plain = b.try_as_plain_mut().unwrap();
    let mut v = b_plain.to_array_view_mut::<f32>().unwrap();
    v += 1.0;
    drop(v);
    drop(b_plain);
    let mut c = Tensor::zero::<T>(&[n, m]).unwrap();

    let ops = unsafe {
        [
            FusedSpec::AddMatMul {
                k,
                a: kernel.a_packed(4, k).wrap(&a.view()),
                b: kernel.b_packed(4, k).wrap(&b.view()),
            },
            // FusedSpec::AddUnicast(kernel.c_view(1, 0).wrap(&c.view_mut())),
            FusedSpec::Store(kernel.c_view(1, 0).wrap(&c.view_mut())),
        ]
    };

    let mut values = Vec::with_capacity(loops);

    for _ in 0..loops {
        ruin_cache();
        let start = Instant::now();
        unsafe { kernel.run(m, n, &ops).unwrap() };
        values.push(start.elapsed());
    }

    eprintln!("{:?} -> {:?}", values.first().unwrap(), values.last().unwrap());

    values.sort();
    values[loops / 2].as_nanos() as f64
}

fn model<T: Datum + Copy + num_traits::Zero + tract_linalg::LADatum, K: MatMatMulKer<T>>()
-> (f64, f64) {
    let x = 1000;
    let zp = bench_to_nanos::<T, K>(1000, K::mr() * 4, K::nr() * 4, 0);
    let y = bench_to_nanos::<T, K>(1000, K::mr() * 4, K::nr() * 4, x);
    let slope = (y - zp) / x as f64;
    (slope, zp)
}

fn as_match_line<T: Datum + Copy + num_traits::Zero + tract_linalg::LADatum, K: MatMatMulKer<T>>() {
    let coeffs = model::<T, K>();
    println!("({:?}, {}, {}) => {} * k + {}", K::name(), K::mr(), K::nr(), (coeffs.0), (coeffs.1),);
}

fn main() {
    let core_id = core_affinity::get_core_ids().unwrap()[0];
    core_affinity::set_for_current(core_id);
    // as_match_line::<f32, fma_mmm_f32_64x1>();
    // as_match_line::<f32, avx512_mmm_f32_128x1>();
    // as_match_line::<f32, avx512_mmm_f32_16x1>();
    // as_match_line::<f32, fma_mmm_f32_40x2>();
    // as_match_line::<f32, fma_mmm_f32_32x3>();
    // as_match_line::<f32, fma_mmm_f32_24x4>();
    // as_match_line::<f32, fma_mmm_f32_16x5>();
    // as_match_line::<f32, fma_mmm_f32_16x6>();
    // as_match_line::<f32, fma_mmm_f32_8x8>();

    // mmv_perf_m();
    mmm_perf_batch_size();
}

// for mmv
fn mmv_perf_m() {
    use tract_linalg::x86_64_fma::mmm::*;
    let core_id = core_affinity::get_core_ids().unwrap()[0];
    core_affinity::set_for_current(core_id);
    fn bench<T: Datum + Copy + num_traits::Zero + tract_linalg::LADatum, K: MatMatMulKer<T>>(
        m: usize,
    ) {
        let val = bench_to_nanos::<T, K>(1000, m, 1, 100) / (m * 100) as f64;
        print!("{val}\t");
    }

    print!("N\t");
    print!("fma_mmm_f32_64x1\t");
    print!("avx512_mmm_f32_128x1\t");
    print!("avx512_mmm_f32_16x1\t");
    println!();
    for n in 1..=128 {
        eprintln!("{n}");
        print!("{n}\t");
        bench::<f32, fma_mmm_f32_64x1>(n);
        bench::<f32, avx512_mmm_f32_128x1>(n);
        bench::<f32, avx512_mmm_f32_16x1>(n);
        println!();
    }
}

// output a csv file with the perf of the kernels wrt batch size
fn mmm_perf_batch_size() {
    use tract_linalg::x86_64_fma::mmm::*;
    let core_id = core_affinity::get_core_ids().unwrap()[0];
    core_affinity::set_for_current(core_id);
    fn bench<T: Datum + Copy + num_traits::Zero + tract_linalg::LADatum, K: MatMatMulKer<T>>(
        n: usize,
    ) {
        let val =
            bench_to_nanos::<T, K>(1000, K::mr() * 4, n, 100) / (K::mr() * 4 * 100 * n) as f64;
        print!("{val}\t");
    }

    print!("N\t");
    print!("fma_mmm_f32_8x8\t");
    print!("fma_mmm_f32_16x6\t");
    print!("fma_mmm_f32_16x5\t");
    print!("fma_mmm_f32_24x4\t");
    print!("fma_mmm_f32_32x3\t");
    print!("fma_mmm_f32_40x2\t");
    print!("fma_mmm_f32_64x1\t");
    print!("avx512_mmm_f32_128x1\t");
    print!("avx512_mmm_f32_16x1\t");
    print!("avx512_mmm_f32_16x12\t");
    print!("avx512_mmm_f32_16x8\t");
    print!("avx512_mmm_f32_32x6\t");
    print!("avx512_mmm_f32_32x5\t");
    print!("avx512_mmm_f32_48x4\t");
    print!("avx512_mmm_f32_64x3\t");
    print!("avx512_mmm_f32_80x2\t");
    println!();
    for n in 1..=128 {
        eprintln!("{n}");
        print!("{n}\t");
        bench::<f32, fma_mmm_f32_8x8>(n);
        bench::<f32, fma_mmm_f32_16x6>(n);
        bench::<f32, fma_mmm_f32_16x5>(n);
        bench::<f32, fma_mmm_f32_24x4>(n);
        bench::<f32, fma_mmm_f32_32x3>(n);
        bench::<f32, fma_mmm_f32_40x2>(n);
        bench::<f32, fma_mmm_f32_64x1>(n);
        bench::<f32, avx512_mmm_f32_128x1>(n);
        bench::<f32, avx512_mmm_f32_16x1>(n);
        bench::<f32, avx512_mmm_f32_16x12>(n);
        bench::<f32, avx512_mmm_f32_16x8>(n);
        bench::<f32, avx512_mmm_f32_32x6>(n);
        bench::<f32, avx512_mmm_f32_32x5>(n);
        bench::<f32, avx512_mmm_f32_48x4>(n);
        bench::<f32, avx512_mmm_f32_64x3>(n);
        bench::<f32, avx512_mmm_f32_80x2>(n);
        println!();
    }
}


================================================
FILE: linalg/benches/leaky_relu.rs
================================================
use criterion::*;
use tract_data::prelude::*;

use tract_linalg::element_wise::ElementWiseKer;

fn leaky_relu_f16(c: &mut Criterion) {
    let mut group = c.benchmark_group("leaky_relu_f16");
    group.throughput(Throughput::Elements(1024));
    let mut input = unsafe { Tensor::uninitialized_aligned::<f16>(&[1024], 16).unwrap() };
    let input = input.as_slice_mut::<f16>().unwrap();
    let alpha = f16::from_f32(0.1);
    group.bench_function("rust", |b| b.iter(|| rust_fp16(input, alpha)));
    group.bench_function("rust_with_f16", |b| b.iter(|| unsafe { rust_with_fp16(input, alpha) }));
    group.bench_function("linalg", |b| b.iter(|| linalg16(input, alpha)));
    group.bench_function("linalg-asm", |b| {
        b.iter(|| tract_linalg::arm64::arm64fp16_leaky_relu_f16_16n::run(input, alpha))
    });
}

#[inline(never)]
fn rust_fp16(input: &mut [f16], alpha: f16) {
    for x in input {
        *x = if *x > f16::ZERO { *x } else { *x * alpha }
    }
}

#[target_feature(enable = "fp16")]
#[inline(never)]
unsafe fn rust_with_fp16(input: &mut [f16], alpha: f16) {
    for x in input {
        *x = if *x > f16::ZERO { *x } else { *x * alpha }
    }
}

#[inline(never)]
fn linalg16(input: &mut [f16], alpha: f16) {
    (tract_linalg::ops().leaky_relu_f16)().run_with_params(input, alpha).unwrap();
}

fn leaky_relu_f32(c: &mut Criterion) {
    let mut group = c.benchmark_group("leaky_relu_f32");
    group.throughput(Throughput::Elements(1024));
    let mut input = unsafe { Tensor::uninitialized_aligned::<f32>(&[1024], 16).unwrap() };
    let input = input.as_slice_mut::<f32>().unwrap();
    let alpha = 0.1f32;
    group.bench_function("rust", |b| b.iter(|| rust_fp32(input, alpha)));
    group.bench_function("linalg", |b| b.iter(|| linalg32(input, alpha)));
    group.bench_function("linalg-asm", |b| {
        b.iter(|| tract_linalg::arm64::arm64simd_leaky_relu_f32_8n::run(input, alpha))
    });
}

#[inline(never)]
fn rust_fp32(input: &mut [f32], alpha: f32) {
    for x in input {
        *x = if *x > 0.0 { *x } else { *x * alpha }
    }
}

#[inline(never)]
fn linalg32(input: &mut [f32], alpha: f32) {
    (tract_linalg::ops().leaky_relu_f32)().run_with_params(input, alpha).unwrap();
}

criterion_group!(benches, leaky_relu_f32, leaky_relu_f16);
criterion_main!(benches);


================================================
FILE: linalg/benches/mat_vec.rs
================================================
use criterion::*;
use tract_data::internal::*;
use tract_linalg::mmm::{AsInputValue, FusedSpec};

use DatumType::F32;

fn mat_vec_mul(c: &mut Criterion) {
    let mut group = c.benchmark_group("mat_vec_mul");
    unsafe {
        {
            let (m, k) = &(768usize, 256usize);
            group.throughput(Throughput::Elements((m * k) as u64));
            group.bench_with_input(
                BenchmarkId::from_parameter(format!("{m}x{k}")),
                &(m, k),
                |be, &(&m, &k)| {
                    let mmm = tract_linalg::ops().mmm(F32, Some(m), Some(k), Some(1)).unwrap();
                    let packing = &mmm.packings()[0];
                    let a = Tensor::zero::<f32>(&[m, k]).unwrap();
                    let pa = packing.0.prepare_one(&a, 1, 0).unwrap();
                    let b = Tensor::zero::<f32>(&[k, 1]).unwrap();
                    let pb = packing.1.prepare_one(&b, 0, 1).unwrap();
                    let mut c = Tensor::zero::<f32>(&[m]).unwrap();
                    be.iter(move || {
                        mmm.run(
                            m,
                            1,
                            &[
                                FusedSpec::AddMatMul {
                                    a: AsInputValue::Borrowed(&*pa),
                                    b: AsInputValue::Borrowed(&*pb),
                                    packing: 0,
                                },
                                FusedSpec::Store(mmm.c_view(Some(0), Some(0)).wrap(&c.view_mut())),
                            ],
                        )
                    });
                },
            );
        }
    }
    group.finish();
}

criterion_group!(benches, mat_vec_mul);
criterion_main!(benches);


================================================
FILE: linalg/benches/mm_for_asr_am.rs
================================================
use criterion::*;

mod utils;
use utils::*;

fn all(c: &mut Criterion) {
    // packed_packed: co, ci, n
    //    direct_conv(c, "asr_2M", 24, 5, 40, 200, 1); // lda
    packed_packed(c, "asr_2M", 256, 200, 24); // tdnn1
    //    direct_conv(c, "asr_2M", 24, 3, 256, 256, 1); // tdnn2
    //    direct_conv(c, "asr_2M", 24, 3, 256, 256, 3); // tdnn3
    packed_packed(c, "asr_2M", 256, 256, 8); // fastlstm1 and 2 (input) x 8 (4 prod x 2 layers)
    packed_packed(c, "asr_2M", 256, 128, 1); // fastlstm1 and 2 (hidden) x 64 (4 prod x 2 layers x 8 loops)
    packed_packed(c, "asr_2M", 256, 256, 1); // fastlstm1 and 2 (rp) x 16 (2 layers x 8 loops)
    //    direct_conv(c, "asr_2M", 8, 3, 256, 256, 1); // tdnn4, tdd5 (x2)
    packed_packed(c, "asr_2M", 1690, 256, 8); // output

    // 8M
    packed_packed(c, "asr_8M", 512, 200, 24); // tdnn1
    packed_packed(c, "asr_8M", 512, 512, 24); // tdnn2
    packed_packed(c, "asr_8M", 512, 256, 1); // fastlstm1 and 2 (four parts, rec mat*vec)
    packed_vec(c, "asr_8M", 512, 256, 1); // fastlstm1 and 2 (four parts, rec mat*vec)

    // pseudo 15M
    packed_packed(c, "asr_pseudo15M", 768, 200, 24); // tdnn1
    packed_packed(c, "asr_pseudo15M", 768, 2304, 24); // tdnn2
    packed_packed(c, "asr_pseudo15M", 768, 2304, 8); // tdnn3,4,5
    packed_packed(c, "asr_pseudo15M", 768, 768, 8); // fastlstm1 and 2 (four parts, rec mat*mat)
    packed_packed(c, "asr_pseudo15M", 768, 384, 1); // fastlstm1 and 2 (four parts, rec mat*vec)
    packed_vec(c, "asr_pseudo15M", 768, 384, 1); // fastlstm1 and 2 (four parts, rec mat*vec)

    // 15M
    packed_vec(c, "asr_15M", 768, 256, 1); // fastlstm1 and 2 (four parts, rec mat*vec)
}

criterion_group!(benches, all);
criterion_main!(benches);


================================================
FILE: linalg/benches/mm_for_inception.rs
================================================
extern crate criterion;
use criterion::*;
use tract_data::internal::*;
use tract_linalg::mmm::{AsInputValue, FusedSpec};

use DatumType::F32;

fn mat_mul_smmm(be: &mut criterion::Bencher, &(m, k, n): &(usize, usize, usize)) {
    unsafe {
        let mmm = tract_linalg::ops().mmm(F32, Some(m), Some(k), Some(n)).unwrap();
        let a = Tensor::zero::<f32>(&[m, k]).unwrap();
        let b = Tensor::zero::<f32>(&[k, n]).unwrap();
        let packing = &mmm.packings()[0];
        let pa = packing.0.prepare_one(&a, 1, 0).unwrap();
        let pb = packing.1.prepare_one(&b, 0, 1).unwrap();

        let mut c = Tensor::zero::<f32>(&[m, n]).unwrap();
        be.iter(move || {
            mmm.run(
                m,
                n,
                &[
                    FusedSpec::AddMatMul {
                        a: AsInputValue::Borrowed(&*pa),
                        b: AsInputValue::Borrowed(&*pb),
                        packing: 0,
                    },
                    FusedSpec::Store(mmm.c_view(Some(0), Some(1)).wrap(&c.view_mut())),
                ],
            )
        });
    }
}

fn mat_mul_prepacked(c: &mut Criterion, m: usize, k: usize, n: usize) {
    let mut group = c.benchmark_group("mat_mul_prepacked");
    group.bench_function("smmm", |be| mat_mul_smmm(be, &(m, k, n)));
}

fn s64x288x21609(c: &mut Criterion) {
    mat_mul_prepacked(c, 64, 288, 21609)
}

criterion::criterion_group!(benches, s64x288x21609);
criterion::criterion_main!(benches);


================================================
FILE: linalg/benches/mm_for_wavenet_hw.rs
================================================
use criterion::*;

mod utils;
use utils::*;

fn s16x60x8(c: &mut Criterion) {
    packed_packed(c, "wavenet", 32, 32, 8); // postproc
    packed_packed(c, "wavenet", 16, 60, 8);
}

criterion_group!(benches, s16x60x8);
criterion_main!(benches);


================================================
FILE: linalg/benches/sigmoid.rs
================================================
#[macro_use]
extern crate criterion;
extern crate tract_linalg;
use criterion::Criterion;

fn ssigmoid(c: &mut Criterion, n: usize) {
    c.bench_function(&format!("ssigmoid_{n}"), move |be| {
        let mut s = (0..n).map(|i| i as f32 / 10.0).collect::<Vec<f32>>();
        let op = &(tract_linalg::ops().sigmoid_f32)();
        be.iter(|| op.run(&mut s));
    });
}

fn bs(c: &mut Criterion) {
    ssigmoid(c, 4);
    ssigmoid(c, 8);
    ssigmoid(c, 128);
    ssigmoid(c, 1024);
}

criterion_group!(benches, bs);
criterion_main!(benches);


================================================
FILE: linalg/benches/softmax.rs
================================================
use criterion::*;
use tract_data::prelude::*;
use tract_linalg::element_wise::ElementWiseKer;
use tract_linalg::generic::reduce::softmax_l2::SSoftMaxL2;
use tract_linalg::reduce::{MapReduceKer, ReduceKer};

#[inline(never)]
fn loop1_f32_naive(slice: &mut [f32]) -> f32 {
    let mut max = f32::MIN;
    for x in &*slice {
        if *x > max {
            max = *x;
        }
    }
    max
}

#[inline(never)]
fn loop2_f32(slice: &mut [f32], max: f32) -> f32 {
    let mut sum = 0.;
    for x in slice.iter_mut() {
        *x = (*x - max).exp();
        sum += *x;
    }
    sum
}

#[inline(never)]
fn loop3_f32(slice: &mut [f32], sum: f32) {
    let recip = sum.recip();
    for x in slice {
        *x *= recip;
    }
}

#[inline(never)]
fn rust_f32(slice: &mut [f32]) {
    let max = loop1_f32_naive(slice);
    let sum = loop2_f32(slice, max);
    loop3_f32(slice, sum);
}

fn softmax_f32(c: &mut Criterion) {
    let mut group = c.benchmark_group("softmax_f32");
    group.throughput(Throughput::Elements(1500));
    let mut input = unsafe { Tensor::uninitialized_aligned::<f32>(&[1500], 16).unwrap() };
    let mut plain = input.try_as_plain_mut().unwrap();
    let input = plain.as_slice_mut::<f32>().unwrap();
    group.bench_function("rust", |b| b.iter(|| rust_f32(input)));
    group.bench_function("loop1/naive", |b| b.iter(|| loop1_f32_naive(input)));
    group.bench_function("loop1/generic", |b| {
        b.iter(|| tract_linalg::generic::reduce::max::SMax4::red().run(input))
    });
    #[cfg(target_arch = "x86_64")]
    group.bench_function("loop1/iasm", |b| {
        b.iter(|| {
            tract_linalg::x86_64_fma::max::x86_64_fma_max_f32_32n::red().run(input).unwrap();
        })
    });
    #[cfg(target_arch = "aarch64")]
    group.bench_function("loop1/intr", |b| {
        b.iter(|| {
            tract_linalg::arm64::arm64simd_max_f32_16n::red().run(input).unwrap();
        })
    });
    group.bench_function("loop2/naive", |b| b.iter(|| loop2_f32(input, 1.0)));
    group.bench_function("loop2/generic", |b| {
        b.iter(|| SSoftMaxL2::red().run_with_params(input, 10.))
    });
    #[cfg(target_arch = "x86_64")]
    group.bench_function("loop2/iasm", |b| {
        b.iter(|| {
            tract_linalg::x86_64_fma::softmax::x86_64_fma_softmax2_fastcompact_f32_32n::red()
                .run_with_params(input, 10.)
                .unwrap()
        });
    });
    #[cfg(target_arch = "aarch64")]
    group.bench_function("loop2/iasm", |b| {
        b.iter(|| {
            tract_linalg::arm64::arm64simd_softmax2_fastcompact_f32_16n::red()
                .run_with_params(input, 0.21)
                .unwrap()
        });
    });
    group.bench_function("loop3/naive", |b| b.iter(|| loop3_f32(input, 0.21)));
    group.bench_function("loop3/generic", |b| {
        b.iter(|| {
            tract_linalg::generic::by_scalar::SMulByScalar4::ew().run_with_params(input, 0.21)
        })
    });
    #[cfg(target_arch = "x86_64")]
    group.bench_function("loop3/iasm", |b| {
        b.iter(|| {
            tract_linalg::x86_64_fma::by_scalar::x86_64_avx_f32_mul_by_scalar_32n::ew()
                .run_with_params(input, 0.21)
                .unwrap()
        });
    });
    #[cfg(target_arch = "aarch64")]
    group.bench_function("loop3/iasm", |b| {
        b.iter(|| {
            tract_linalg::arm64::arm64simd_mul_by_scalar_f32_16n::ew()
                .run_with_params(input, 0.21)
                .unwrap()
        });
    });
}

criterion_group!(benches, softmax_f32);
criterion_main!(benches);


================================================
FILE: linalg/benches/utils.rs
================================================
#![allow(dead_code)]
use criterion::*;
use tract_data::internal::*;
use tract_linalg::mmm::{FusedSpec, MMMInputValue, MatMatMul};

use DatumType::*;
use tract_linalg::mmm::AsInputValue;

pub fn packed_packed(c: &mut Criterion, name: &str, m: usize, k: usize, n: usize) {
    let mut group = c.benchmark_group(format!("{name}/packed_packed"));
    group.throughput(Throughput::Elements((m * k * n) as u64));
    let id = format!("{m}x{k}x{n}");
    group.bench_with_input(BenchmarkId::new("f32/cold", &id), &(F32, m, k, n, true), mat_mat);
    group.bench_with_input(BenchmarkId::new("f32/hot", &id), &(F32, m, k, n, false), mat_mat);
    group.bench_with_input(BenchmarkId::new("i8/cold", &id), &(I8, m, k, n, true), mat_mat);
    group.bench_with_input(BenchmarkId::new("i8/hot", &id), &(I8, m, k, n, false), mat_mat);
}

pub fn packed_vec(c: &mut Criterion, name: &str, m: usize, k: usize, n: usize) {
    assert_eq!(n, 1);
    let mut group = c.benchmark_group(format!("{name}/packed_vec"));
    group.throughput(Throughput::Elements((m * k * n) as u64));
    let id = format!("{m}x{k}x{n}");
    group.bench_with_input(BenchmarkId::new("f32/cold", &id), &(F32, m, k, n, true), mat_mat);
    group.bench_with_input(BenchmarkId::new("f32/hot", &id), &(F32, m, k, n, false), mat_mat);
    group.bench_with_input(BenchmarkId::new("i8/cold", &id), &(I8, m, k, n, true), mat_mat);
    group.bench_with_input(BenchmarkId::new("i8/hot", &id), &(I8, m, k, n, false), mat_mat);
}

pub fn ruin_cache() {
    let _a = (0..1000000).collect::<Vec<i32>>();
}

#[allow(clippy::too_many_arguments)]
unsafe fn run(
    m: usize,
    _k: usize,
    n: usize,
    be: &mut Bencher,
    mmm: &dyn MatMatMul,
    a: &dyn MMMInputValue,
    b: &dyn MMMInputValue,
    cold: bool,
) {
    let mut scratch = unsafe { mmm.allocate_scratch_space() };
    be.iter_custom(move |iters| {
        let mut dur = std::time::Duration::default();
        for _ in 0..iters {
            if cold {
                ruin_cache();
            }
            let instant = std::time::Instant::now();
            unsafe {
                mmm.run_with_scratch_space(
                    m,
                    n,
                    scratch.as_mut(),
                    &[FusedSpec::AddMatMul {
                        a: AsInputValue::Borrowed(a),
                        b: AsInputValue::Borrowed(b),
                        packing: 0,
                    }],
                )
                .unwrap()
            };
            let time = instant.elapsed();
            dur += time;
        }
        dur
    });
}

fn mat_mat(be: &mut Bencher, params: &(DatumType, usize, usize, usize, bool)) {
    let (dt, m, k, n, _) = *params;
    let mm = tract_linalg::ops().mmm(dt, Some(m), Some(k), Some(n)).unwrap();
    mat_mat_with_mm(be, &*mm, params)
}

pub fn mat_mat_with_mm(
    be: &mut Bencher,
    mmm: &dyn MatMatMul,
    &(dt, m, k, n, cold): &(DatumType, usize, usize, usize, bool),
) {
    let a = Tensor::zero_dt(dt, &[m, k]).unwrap();
    let b = Tensor::zero_dt(dt, &[k, n]).unwrap();
    let packing = &mmm.packings()[0];
    let pa = packing.0.prepare_one(&a, 1, 0).unwrap();
    let pb = packing.1.prepare_one(&b, 0, 1).unwrap();
    unsafe {
        run(m, k, n, be, mmm, &*pa, &*pb, cold);
    }
}


================================================
FILE: linalg/benches/virtual_im2col.rs
================================================
use criterion::measurement::WallTime;
use criterion::*;
use tract_data::internal::*;

#[allow(dead_code)]
#[path = "../tests/virtual_im2col.rs"]
mod virtual_im2col;
use virtual_im2col::ConvProblem;

fn conv(
    c: &mut BenchmarkGroup<WallTime>,
    ci: usize,
    h: usize,
    w: usize,
    co: usize,
    kh: usize,
    kw: usize,
) {
    // CHW HWIO
    let input = Tensor::zero::<f32>(&[ci, h, w]).unwrap();
    let filters = Tensor::zero::<f32>(&[kh, kw, ci, co]).unwrap();
    let mut cv = ConvProblem { input, filters, lazy_im2col: false };
    c.bench_function("eager", |b| {
        b.iter(|| {
            cv.tract().unwrap();
        })
    });
    cv.lazy_im2col = true;
    c.bench_function("lazy", |b| {
        b.iter(|| {
            cv.tract().unwrap();
        })
    });
}

fn ex1(c: &mut Criterion) {
    let mut c = c.benchmark_group("ex1");
    conv(&mut c, 32, 256, 256, 32, 3, 3);
}

fn big(c: &mut Criterion) {
    let mut c = c.benchmark_group("big");
    conv(&mut c, 1, 1024, 1024, 99, 3, 3);
}

criterion_group!(benches, ex1, big);
criterion_main!(benches);


================================================
FILE: linalg/benches/x86_64.rs
================================================
#![allow(dead_code, non_upper_case_globals, unused_macros, non_snake_case, unused_assignments)]

use std::arch::asm;

// mod nano;

#[repr(C, align(64))]
struct Floats([f32; 256 * 1024 * 64]);
const _F32: Floats = Floats([12.; 256 * 1024 * 64]);
const F32: *const f32 = (&_F32) as *const Floats as *const f32;

lazy_static::lazy_static! {
    static ref TICK: f64 = unsafe { b8192!(asm!("or rax, rax", out("rax") _)) };
}

macro_rules! kloop {
    ($filter: expr, $geo: literal, $n: expr, $path: literal, $ww: expr, $u: expr, $arch: expr) => {
        let label = $path.split("/").last().unwrap().split_once(".").unwrap().0;
        let full_label = format!("{:8} {:40}", $geo, label);
		let repeats = 32;
		let ks = 256;
        if full_label.contains($filter.unwrap_or("")) {
            let time = b1!({

				let mut p = F32;
				let mut q = F32;
				let mut k = ks;
				let mut r = repeats;
				asm!(
					concat!(r#"
2:
      mov rax, r9
      mov rcx, r10
      mov r8, r12
3:
    "#, include_str!(			concat!("../x86_64/", $arch, "/", $path)), "\n sub r8, ", $u, r#"
jnz 3b

sub r11, 1
jnz 2b
"#),
					inout("r9") p, inout("r10") q, inout("r12") k, inout("r11") r, out("rax") _, out("rcx") _,
					out("r8") _,
					out("zmm0") _, out("zmm1") _, out("zmm2") _, out("zmm3") _,
					out("zmm4") _, out("zmm5") _, out("zmm6") _, out("zmm7") _,
					out("zmm8") _, out("zmm9") _, out("zmm10") _, out("zmm11") _,
					out("zmm12") _, out("zmm13") _, out("zmm14") _, out("zmm15") _,
					out("zmm20") _, out("zmm21") _, out("zmm22") _, out("zmm23") _,
					out("zmm24") _, out("zmm25") _, out("zmm26") _, out("zmm27") _,
                    out("zmm28") _,  out("zmm29") _,  out("zmm30") _,  out("zmm31") _,
				);
            });

			// We have k=1024 * 64 but some tests step twice per iteration
			let iterations = (ks * repeats / $u);
			// Those that step twice process twice as many elements per iteration
			let elems_per_iteration = $n * $u;

			let time_per_iteration = time / iterations  as f64;

			let total_floats = elems_per_iteration * iterations;
			let flops = total_floats as f64 / time;

			let total_time_ms = time * 1e6;
			let fmas_per_iteration = ($n as f64 / $ww as f64) * $u as f64;
			let ticks_per_iteration = time_per_iteration / *TICK;
            println!("{} {:3.5} {:3.0}% ({:>5.2 }/{:3 } cy) {:.2} GFLOP/s", full_label, total_time_ms, fmas_per_iteration / ticks_per_iteration * 100., ticks_per_iteration, fmas_per_iteration, flops / 1e9 );
        }
    };

	($filter: expr, $geo: literal, $n: expr, $path: literal, $ww: expr) => {
		kloop!($filter, $geo, $n, $path, $ww, 1, "fma")
	};
	($filter: expr, $geo: literal, $n: expr, $path: literal, $ww: expr, $u: expr) => {
		kloop!($filter, $geo, $n, $path, $ww, $u, "fma")
	};
}

unsafe fn packed_packed_1x12(f: Option<&str>) {
    println!("-- 1x12 kernels");
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(
            f,
            "1x12x1",
            (16 * 1 * 12),
            "1x12/packed_packed_loop1/avx-512.tmpli",
            16,
            1,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_1x8(f: Option<&str>) {
    println!("-- 1x8 kernels");
    kloop!(f, "1x8x1", (8 * 8), "8x8/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "1x8x2", (8 * 8), "8x8/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "1x8x1", (16 * 1 * 8), "8x8/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
    }
    println!();
}

unsafe fn packed_packed_2x6(f: Option<&str>) {
    println!("-- 2x6 kernels");
    kloop!(f, "2x6x1", (16 * 6), "2x6/packed_packed_loop1/original.tmpli", 8);
    kloop!(f, "2x6x2", (16 * 6), "2x6/packed_packed_loop1/original-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "2x6x1", (16 * 2 * 6), "2x6/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "2x6x2",
            (16 * 2 * 6),
            "2x6/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_2x5(f: Option<&str>) {
    println!("-- 2x5 kernels");
    kloop!(f, "2x5x1", (16 * 5), "2x5/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "2x5x2", (16 * 5), "2x5/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "2x5x1", (32 * 5), "2x5/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "2x5x2",
            (32 * 5),
            "2x5/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_3x4(f: Option<&str>) {
    println!("-- 3x4 kernels");
    kloop!(f, "3x4x1", (24 * 4), "3x4/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "3x4x2", (24 * 4), "3x4/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "3x4x1", (16 * 3 * 4), "3x4/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "3x4x2",
            (16 * 3 * 4),
            "3x4/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_4x3(f: Option<&str>) {
    println!("-- 4x3 kernels");
    kloop!(f, "4x3x1", (32 * 3), "4x3/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "4x3x2", (32 * 3), "4x3/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "4x3x1", (16 * 4 * 3), "4x3/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "4x3x2",
            (16 * 4 * 3),
            "4x3/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_5x2(f: Option<&str>) {
    println!("-- 5x2 kernels");
    kloop!(f, "5x2x1", (40 * 2), "5x2/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "5x2x1", (40 * 2), "5x2/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "5x2x1", (16 * 5 * 2), "5x2/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "5x2x2",
            (16 * 5 * 2),
            "5x2/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_6x2(f: Option<&str>) {
    println!("-- 6x2 kernels");
    kloop!(f, "6x2x1", (48 * 2), "6x2/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "6x2x2", (48 * 2), "6x2/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "6x2x1", (16 * 6 * 2), "6x2/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "6x2x2",
            (16 * 6 * 2),
            "6x2/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_8x2(f: Option<&str>) {
    println!("-- 8x2 kernels");
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "8x2x1", (16 * 8 * 2), "8x2/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
    }
    println!();
}

unsafe fn packed_packed_8x1(f: Option<&str>) {
    println!("-- 8x1 kernels");
    kloop!(f, "8x1x1", (64 * 1), "8x1/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "8x1x2", (64 * 1), "8x1/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "8x1x1", (16 * 8 * 1), "8x1/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "8x1x2",
            (16 * 8 * 1),
            "8x1/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_6x1(f: Option<&str>) {
    println!("-- 6x1 kernels");
    kloop!(f, "6x1x1", (48 * 1), "6x1/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "6x1x2", (48 * 1), "6x1/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "6x1x1", (16 * 6 * 1), "6x1/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "6x1x2",
            (16 * 6 * 1),
            "6x1/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_7x1(f: Option<&str>) {
    println!("-- 7x1 kernels");
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "7x1x1", (16 * 7 * 1), "7x1/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(
            f,
            "7x1x2",
            (16 * 7 * 1),
            "7x1/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_1x1(f: Option<&str>) {
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(f, "1x1x1", (16 * 1 * 1), "1x1/packed_packed_loop1/avx-512.tmpli", 16, 1, "avx512");
        kloop!(f, "1x1x2", (16 * 1 * 1), "1x1/packed_packed_loop1/unroll.tmpli", 16, 2, "avx512");
        kloop!(f, "1x1x4", (16 * 1 * 1), "1x1/packed_packed_loop1/unroll-4.tmpli", 16, 4, "avx512");
        kloop!(f, "1x1x8", (16 * 1 * 1), "1x1/packed_packed_loop1/unroll-8.tmpli", 16, 8, "avx512");
        kloop!(
            f,
            "1x1x16",
            (16 * 1 * 1),
            "1x1/packed_packed_loop1/unroll-16.tmpli",
            16,
            16,
            "avx512"
        );
    }
    println!();
}

unsafe fn packed_packed_10x1(f: Option<&str>) {
    println!("-- 10x1 kernels");
    kloop!(f, "10x1x1", (80 * 1), "10x1/packed_packed_loop1/avx.tmpli", 8);
    kloop!(f, "10x1x2", (80 * 1), "10x1/packed_packed_loop1/avx-unroll.tmpli", 8, 2);
    if std::is_x86_feature_detected!("avx512f") {
        kloop!(
            f,
            "10x1x1",
            (16 * 10 * 1),
            "10x1/packed_packed_loop1/avx-512.tmpli",
            16,
            1,
            "avx512"
        );
        kloop!(
            f,
            "10x1x2",
            (16 * 10 * 1),
            "10x1/packed_packed_loop1/avx-512-unroll.tmpli",
            16,
            2,
            "avx512"
        );
    }
    println!();
}

fn main() {
    let filter = std::env::args().skip(1).find(|a| a != "--bench");
    unsafe {
        packed_packed_1x1(filter.as_deref());
        packed_packed_1x12(filter.as_deref());
        packed_packed_1x8(filter.as_deref());
        packed_packed_2x6(filter.as_deref());
        packed_packed_2x5(filter.as_deref());
        packed_packed_3x4(filter.as_deref());
        packed_packed_4x3(filter.as_deref());
        packed_packed_5x2(filter.as_deref());
        packed_packed_6x2(filter.as_deref());
        packed_packed_8x2(filter.as_deref());
        packed_packed_6x1(filter.as_deref());
        packed_packed_7x1(filter.as_deref());
        packed_packed_8x1(filter.as_deref());
        packed_packed_10x1(filter.as_deref());
    }
}


================================================
FILE: linalg/build.rs
================================================
#![allow(clippy::box_default)]

use liquid_core::Runtime;
use liquid_core::{Display_filter, Filter, FilterReflection, ParseFilter};
use liquid_core::{Value, ValueView};

use std::{env, ffi, fs, path};

#[path = "arm64/apple_amx/instructions.rs"]
mod apple_amx_instructions;

fn var(k: &str) -> String {
    env::var(k).unwrap()
}

fn use_masm() -> bool {
    env::var("CARGO_CFG_TARGET_ENV") == Ok("msvc".to_string()) && var("HOST").contains("-windows-")
}

fn include_amx() -> bool {
    let arch = var("CARGO_CFG_TARGET_ARCH");
    let os = var("CARGO_CFG_TARGET_OS");
    os == "macos"
        || (env::var("CARGO_FEATURE_APPLE_AMX_IOS").is_ok() && os == "ios" && arch == "aarch64")
}

fn jump_table() -> Vec<String> {
    println!("cargo:rerun-if-changed=src/frame/mmm/fuse.rs");
    std::fs::read_to_string("src/frame/mmm/fuse.rs")
        .unwrap()
        .lines()
        .filter(|l| l.contains("// jump_to:"))
        .map(|l| l.split("jump_to:").nth(1).unwrap().to_owned())
        .collect()
}

#[derive(Clone, Debug)]
struct ConfigForHalf {
    extra_flags: Vec<String>,
    needs_pragma: bool,
}

impl ConfigForHalf {
    fn new(extra_flags: Vec<String>, needs_pragma: bool) -> ConfigForHalf {
        ConfigForHalf { extra_flags, needs_pragma }
    }

    fn all() -> Vec<ConfigForHalf> {
        let mut configs = vec![];
        for extra_flags in
            [vec![], vec!["-march=armv8.2-a".to_string()], vec!["-mcpu=cortex-a55".to_string()]]
        {
            for needs_pragma in [false, true] {
                configs.push(ConfigForHalf::new(extra_flags.clone(), needs_pragma))
            }
        }
        configs
    }

    fn cc(&self) -> cc::Build {
        let mut cc = cc::Build::new();
        for flag in &self.extra_flags {
            cc.flag(flag);
        }
        cc
    }

    fn works(&self) -> bool {
        let filename = if self.needs_pragma {
            "arm64/arm64fp16/dummy_fmla_pragma.S"
        } else {
            "arm64/arm64fp16/dummy_fmla_no_pragma.S"
        };
        self.cc().file(filename).try_compile("dummy").is_ok()
    }

    pub fn probe() -> Option<ConfigForHalf> {
        Self::all().iter().find(|c| c.works()).cloned()
    }
}

fn main() {
    let target = var("TARGET");
    let arch = var("CARGO_CFG_TARGET_ARCH");
    let os = var("CARGO_CFG_TARGET_OS");
    let out_dir = path::PathBuf::from(var("OUT_DIR"));

    let suffix = env!("CARGO_PKG_VERSION").replace(['-', '.'], "_");
    make_extern_kernel_decl_macro(&out_dir, &suffix);

    match arch.as_ref() {
        "x86_64" => {
            let mut files = preprocess_files("x86_64/fma", &[], &suffix, false);
            files.extend(preprocess_files("x86_64/avx512", &[], &suffix, false));

            if os == "windows" {
                if use_masm() {
                    let mut lib_exe = cc::windows_registry::find(&target, "lib.exe")
                        .expect("Could not find lib.exe");
                    lib_exe
                        .arg(format!("/out:{}", out_dir.join("x86_64_fma.lib").to_str().unwrap()));
                    for f in files {
                        let mut obj = f.clone();
                        obj.set_extension("o");
                        let mut ml_exe = cc::windows_registry::find(&target, "ml64.exe")
                            .expect("Could not find ml64.exe");
                        if !ml_exe
                            .arg("/Fo")
                            .arg(&obj)
                            .arg("/c")
                            .arg(&f)
                            .status()
                            .unwrap()
                            .success()
                        {
                            for (i, l) in std::fs::read_to_string(&f).unwrap().lines().enumerate() {
                                println!("{i:8} {l}");
                            }
                            panic!();
                        }
                        lib_exe.arg(obj);
                    }
                    assert!(lib_exe.status().unwrap().success());
                    println!("cargo:rustc-link-search=native={}", out_dir.to_str().unwrap());
                    println!("cargo:rustc-link-lib=static=x86_64_fma");
                } else {
                    cc::Build::new()
                        .files(files)
                        .flag("-mfma")
                        .flag("-mf16c")
                        .compile("x86_64_fma");

                    // clang at least (dunno about gcc) outputs .asm files in the
                    // root directory that we need to clean up so we don't pollute
                    // the build output/working directory
                    let _ = fs::remove_file("fma_mmm_f32_16x6.asm");
                    let _ = fs::remove_file("fma_mmm_i32_8x8.asm");
                    let _ = fs::remove_file("fma_sigmoid_f32.asm");
                    let _ = fs::remove_file("fma_tanh_f32.asm");
                }
            } else {
                cc::Build::new().files(files).flag("-mfma").compile("x86_64_fma");
            }
        }
        "arm" | "armv7" => {
            let files = preprocess_files("arm32/armvfpv2", &[], &suffix, false);
            cc::Build::new().files(files).flag("-marm").flag("-mfpu=vfp").compile("armvfpv2");
            let files = preprocess_files(
                "arm32/armv7neon",
                &[("core", vec!["cortexa7", "cortexa9", "generic"])],
                &suffix,
                false,
            );
            cc::Build::new().files(files).flag("-marm").flag("-mfpu=neon").compile("armv7neon");
        }
        "aarch64" => {
            let files = preprocess_files(
                "arm64/arm64simd",
                &[("core", vec!["a53", "a55", "gen"])],
                &suffix,
                false,
            );
            cc::Build::new().files(files).compile("arm64simd");
            if include_amx() {
                let files = preprocess_files("arm64/apple_amx", &[], &suffix, false);
                cc::Build::new().files(files).compile("appleamx");
            }
            if std::env::var("CARGO_FEATURE_NO_FP16").is_err() {
                let config =
                    ConfigForHalf::probe().expect("No configuration found for fp16 support");
                let files = preprocess_files(
                    "arm64/arm64fp16",
                    &[("core", vec!["a55", "gen"])],
                    &suffix,
                    config.needs_pragma,
                );
                config.cc().files(files).compile("arm64fp16")
            }
        }
        _ => {}
    }
}

type Variant = (&'static str, Vec<&'static str>);

fn preprocess_files(
    input: impl AsRef<path::Path>,
    variants: &[Variant],
    suffix: &str,
    needs_pragma: bool,
) -> Vec<path::PathBuf> {
    let out_dir = path::PathBuf::from(var("OUT_DIR"));
    let mut files = vec![];
    let dir_entries = {
        let mut dir_entries: Vec<fs::DirEntry> =
            input.as_ref().read_dir().unwrap().map(|f| f.unwrap()).collect();
        dir_entries.sort_by_key(|a| a.path());
        dir_entries
    };
    for f in dir_entries {
        if f.path().extension() == Some(ffi::OsStr::new("tmpl")) {
            let tmpl_file = f.path().file_name().unwrap().to_str().unwrap().to_owned();
            let concerned_variants: Vec<&Variant> =
                variants.iter().filter(|v| tmpl_file.contains(v.0)).collect();
            let expanded_variants = concerned_variants.iter().map(|pair| pair.1.len()).product();
            for v in 0..expanded_variants {
                let mut tmpl_file = tmpl_file.clone();
                let mut id = v;
                let mut globals = vec![];
                for variable in variants {
                    let key = variable.0;
                    let value = variable.1[id % variable.1.len()];
                    globals.push((key, value));
                    tmpl_file = tmpl_file.replace(key, value);
                    id /= variable.1.len();
                }
                let mut file = out_dir.join(tmpl_file);
                file.set_extension("S");
                preprocess_file(f.path(), &file, &globals, suffix, needs_pragma);
                files.push(file);
            }
        }
    }
    files
}

fn strip_comments(s: String, msvc: bool) -> String {
    if msvc {
        s.lines().map(|line| line.replace("//", ";")).collect::<Vec<String>>().join("\n")
    } else {
        s
    }
}

fn preprocess_file(
    template: impl AsRef<path::Path>,
    output: impl AsRef<path::Path>,
    variants: &[(&'static str, &'static str)],
    suffix: &str,
    needs_pragma: bool,
) {
    println!("cargo:rerun-if-changed={}", template.as_ref().to_string_lossy());
    let family = var("CARGO_CFG_TARGET_FAMILY");
    let os = var("CARGO_CFG_TARGET_OS");

    // We also check to see if we're on a windows host, if we aren't, we won't be
    // able to use the Microsoft assemblers,
    let msvc = use_masm();
    println!("cargo:rerun-if-changed={}", template.as_ref().to_string_lossy());
    let mut input = fs::read_to_string(&template).unwrap();
    input = strip_comments(input, msvc);
    let l = if os == "macos" {
        "L"
    } else if family == "windows" {
        ""
    } else {
        ".L"
    }
    .to_owned();
    let long = if msvc { "dd" } else { ".long" };
    let g = if os == "macos" || os == "ios" || os == "watchos" || os == "tvos" { "_" } else { "" };
    // note: use .align with bytes instead of p2align since they both use direct bytes.
    let align = if msvc { "align" } else { ".align" };
    let mut globals = liquid::object!({
        "msvc": msvc,
        "needs_pragma": needs_pragma,
        "family": family,
        "os": os,
        "L": l,
        "G": g,
        "suffix": suffix,
        "long": long,
        "jump_table": jump_table(),
        "align": align,
        "offset": if msvc { "offset" } else { "rip + "},
    });
    for (k, v) in variants {
        globals.insert(k.to_string().into(), liquid::model::Value::scalar(*v));
    }
    let partials = load_partials(template.as_ref().parent().unwrap(), msvc);
    let mut parser = liquid::ParserBuilder::with_stdlib()
        .partials(liquid::partials::LazyCompiler::new(partials))
        .filter(F16);
    if include_amx() {
        parser = apple_amx_instructions::register(parser);
        globals.extend(apple_amx_instructions::globals());
    }
    if let Err(e) = parser
        .build()
        .and_then(|p| p.parse(&input))
        .and_then(|r| r.render_to(&mut fs::File::create(&output).unwrap(), &globals))
    {
        eprintln!("Processing {}", template.as_ref().to_string_lossy());
        eprintln!("{e}");
        panic!()
    }
}

fn load_partials(p: &path::Path, msvc: bool) -> liquid::partials::InMemorySource {
    let mut mem = liquid::partials::InMemorySource::new();
    for f in walkdir::WalkDir::new(p) {
        let f = f.unwrap();
        if f.path().is_dir() {
            continue;
        }

        let ext = f.path().extension().map(|s| s.to_string_lossy()).unwrap_or("".into());
        let text = std::fs::read_to_string(f.path()).unwrap_or_else(|_| panic!("file {f:?}"));
        let text = match ext.as_ref() {
            "tmpli" => Some(text.replace("{{", "{").replace("}}", "}")),
            "tmpliq" => Some(text),
            _ => None,
        };
        if let Some(text) = text {
            let text = strip_comments(text, msvc);
            let key =
                f.path().strip_prefix(p).unwrap().to_str().unwrap().to_owned().replace('\\', "/");
            println!("cargo:rerun-if-changed={}", f.path().to_string_lossy().replace('\\', "/"));

            mem.add(key, text);
        }
    }
    mem
}

fn make_extern_kernel_decl_macro(out_dir: &path::Path, suffix: &str) {
    let macro_decl = r#"
    macro_rules! extern_kernel {
        (fn $name: ident($($par_name:ident : $par_type: ty ),*) -> $rv: ty) => {
            paste! {
                unsafe extern "C" { pub fn [<$name _ _suffix>]($(par_name: $par_type),*) -> $rv; }
                pub use [<$name _ _suffix>] as $name;
            }
        }
    }"#
    .replace("_suffix", suffix);
    std::fs::write(out_dir.join("extern_kernel_macro.rs"), macro_decl).unwrap();
}

#[derive(Clone, ParseFilter, FilterReflection)]
#[filter(
    name = "float16",
    description = "Write a float16 constant with the .float16 directive in gcc, or as short in clang",
    parsed(F16Filter)
)]
pub struct F16;

#[derive(Debug, Default, Display_filter)]
#[name = "float16"]
struct F16Filter;

impl Filter for F16Filter {
    fn evaluate(
        &self,
        input: &dyn ValueView,
        _runtime: &dyn Runtime,
    ) -> liquid_core::Result<Value> {
        let input: f32 = input.as_scalar().unwrap().to_float().unwrap() as f32;
        let value = half::f16::from_f32(input);
        let bits = value.to_bits();
        Ok(format!(".short {bits}").to_value())
    }
}


================================================
FILE: linalg/cost_model/Cargo.toml
================================================
[package]
name = "cost_model"
version = "0.20.7-pre"
edition = "2024"

[workspace]
members = []

[dependencies]
lazy_static = "1.4.0"
clap = "3.0.7"
scan_fmt = "0.2.6"
rand = "0.8.4"
colorous = "1.0.6"
nu-ansi-term = "0.50"
pbr = "1"
readings-probe = "0.1.4"
tract-linalg = { path = ".."}
tract-data = { path = "../../data" }


================================================
FILE: linalg/cost_model/src/main.rs
================================================
use pbr::ProgressBar;
use tract_data::internal::*;
use tract_linalg::mmm::*;

use rand::prelude::*;
use std::io::Write;
use std::ops::Range;
use std::str::FromStr;
use std::time::{Duration, Instant};
use tract_itertools::Itertools;

pub fn ruin_cache() {
    let _a = (0..1_000_000).collect::<Vec<i32>>();
}

fn order_f<F: tract_num_traits::Float>(&a: &F, &b: &F) -> std::cmp::Ordering {
    if a < b {
        std::cmp::Ordering::Less
    } else {
        std::cmp::Ordering::Greater
    }
}

pub struct Bencher {
    bench_time_target: Duration,
    chunk_time_target: Duration,
    chunks_min_count: usize,
    chunks_max_count: usize,
    probe: Option<readings_probe::Probe>,
}

impl Bencher {
    fn black_box<T>(dummy: T) -> T {
        unsafe {
            let ret = std::ptr::read_volatile(&dummy);
            std::mem::forget(dummy);
            ret
        }
    }

    pub fn run_bench<T, I, P: FnMut() -> Vec<I>, F: FnMut(&mut I) -> T>(
        &self,
        mut prep: P,
        mut f: F,
    ) -> f64 {
        let mut inputs = prep();
        let islen = inputs.len();
        Self::black_box(f(&mut inputs[0]));
        let start = Instant::now();
        Self::black_box(f(&mut inputs[1.min(islen - 1)]));
        let once = start.elapsed();
        //   dbg!(once);
        let evaled = if once < Duration::from_millis(1) {
            let start = Instant::now();
            for i in 0..1000 {
                Self::black_box(f(&mut inputs[i % islen]));
            }
            start.elapsed().as_secs_f64() / 1000.
        } else {
            once.as_secs_f64()
        };
        //    let warmup = (0.2 / evaled) as usize;
        //    let iters = 5.0 / evaled as f64;
        // chunk just need to be big enough be measurable
        //    dbg!(evaled);
        let chunk = ((self.chunk_time_target.as_secs_f64() / evaled) as usize).max(1);
        // chunks is the number of measure. make it 1000 at least, 10000 at most
        let chunks = ((self.bench_time_target.as_secs_f64() / (evaled * chunk as f64)) as usize)
            .max(self.chunks_min_count)
            .min(self.chunks_max_count);
        // let chunks = 10;
        //dbg!(chunk, chunks);
        let mut measures = vec![0.0; chunks];
        /*
        for _ in 0..warmup {
        black_box(f());
        }
        */
        let mut input = 0;
        for i in 0..chunks {
            let start = Instant::now();
            for _ in 0..chunk {
                Self::black_box(f(&mut inputs[input]));
                input += 1;
                if input == inputs.len() {
                    input = 0
                }
            }
            let time = start.elapsed().as_secs_f64();
            measures[i] = time / chunk as f64;
        }
        measures.sort_by(order_f);
        let q1 = measures[chunks / 4];
        /*
           let q3 = measures[chunks - chunks / 4];
           let iq = q3 - q1;
        //    measures.retain(|&x| x >= q1 && x <= q3);
        let epsilon = iq * 2. / (q3 + q1);
        eprintln!("evaled: {} chunk:{} chunks: {} epsilon: {:.3e}", evaled, chunk, chunks, epsilon);
        */
        /*
        let mut hist = vec![0; 101];
        for m in &measures {
        let bucket = (m - measures[0]) / (measures[measures.len() - 1] - measures[0]);
        hist[(100. * bucket) as usize] += 1;
        }
        eprintln!("{hist:?}");
        eprintln!("q1: {}", measures[measures.len() / 4]);
        */
        /*
        eprintln!("avg: {}", );
        measures[chunks / 4] //[..chunks / 2].iter().copied().sum::<f64>() / (chunks / 2) as f64
        */
        q1
    }
}

fn measure_add_mat_mul(bencher: &Bencher, mm: &dyn MatMatMul, m: usize, k: usize, n: usize) -> f64 {
    let dt = mm.internal_type();
    if let Some(probe) = &bencher.probe {
        probe.log_event(&format!("start_{},{},{}", m, k, n)).unwrap();
    }
    let a = Tensor::zero_dt(dt, &[m, k]).unwrap();
    let b = Tensor::zero_dt(dt, &[k, n]).unwrap();
    unsafe {
        let time = bencher.run_bench(
            || {
                let pb_size = 4 * (m * k + m * n + k * n);
                let inputs = (10_000_000 / pb_size).max(1);
                (0..inputs)
                    .map(|_| {
                        let (packed_a, packed_b) = &mm.packings()[0];
                        let pa = packed_a.prepare_one(&a, 1, 0).unwrap();
                        let pb = packed_b.prepare_one(&b, 0, 1).unwrap();
                        let c = Tensor::zero_dt(dt, &[m, n]).unwrap();
                        let pc = mm.c_view(Some(0), Some(1)).wrap(&c.view());
                        let scratch = mm.allocate_scratch_space();
                        (scratch, c, pa, pb, pc)
                    })
                    .collect()
            },
            #[allow(unused_mut)] // not sure why the warning pops
            |(scratch, _c, pa, pb, pc)| {
                mm.run_with_scratch_space(
                    m,
                    n,
                    scratch.as_mut(),
                    &[
                        FusedSpec::AddMatMul {
                            a: AsInputValue::Borrowed(&**pa),
                            b: AsInputValue::Borrowed(&**pb),
                            packing: 0,
                        },
                        FusedSpec::Store(*pc),
                    ],
                )
                .unwrap();
            },
        );
        time
    }
}

#[derive(Clone, Debug)]
enum SamplingStrategy {
    Random(Range<usize>),
    Fixed(Vec<usize>),
}

impl SamplingStrategy {
    fn sample(&self) -> Vec<usize> {
        use SamplingStrategy::*;
        let mut rng = thread_rng();
        match self {
            Random(range) => vec![rng.gen_range(range.clone())],
            Fixed(v) => v.clone(),
        }
    }
}

impl FromStr for SamplingStrategy {
    type Err = TractError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if s.contains("-") {
            let (min, max) = s.split_once("-").unwrap();
            Ok(SamplingStrategy::Random(
                min.parse::<usize>().unwrap()..max.parse::<usize>().unwrap() + 1,
            ))
        } else {
            Ok(SamplingStrategy::Fixed(s.split(",").map(|s| s.parse::<usize>().unwrap()).collect()))
        }
    }
}

#[derive(Clone, Debug)]
struct Sample {
    kernel: String,
    mr: usize,
    nr: usize,
    m: usize,
    k: usize,
    n: usize,
}

#[derive(Clone, Debug)]
struct Dataset(Vec<(Sample, f64)>);

impl Dataset {
    pub fn smart_sample(mmm: &[&dyn MatMatMul]) -> Vec<Sample> {
        let mut inputs = vec![];
        for mm in mmm {
            let ms = [1, 2, 4, 32, 128]
                .iter()
                .map(|m| m * mm.mr())
                .flat_map(|m| [m - 1, m, m + 1])
                .collect_vec();
            let ns = [1, 2, 4, 32, 128]
                .iter()
                .map(|m| m * mm.nr())
                .flat_map(|m| [m - 1, m, m + 1])
                .collect_vec();
            let ks = [32, 128, 1024];
            for m in ms {
                for &n in &ns {
                    for k in ks {
                        inputs.push(Sample {
                            kernel: mm.name().to_string(),
                            mr: mm.mr(),
                            nr: mm.nr(),
                            m,
                            k,
                            n,
                        });
                    }
                }
            }
        }
        inputs
    }

    pub fn allkernels_random_sample(
        mmm: &[&dyn MatMatMul],
        size: usize,
        m: SamplingStrategy,
        k: SamplingStrategy,
        n: SamplingStrategy,
        max_mkn: usize,
    ) -> Vec<Sample> {
        let mut inputs = vec![];
        for _ in 0..size {
            let ms = m.sample();
            let ks = k.sample();
            let ns = n.sample();
            for m in ms {
                for &k in &ks {
                    for &n in &ns {
                        for mm in mmm {
                            if max_mkn < m * k * n {
                                continue;
                            }
                            inputs.push(Sample {
                                kernel: mm.name().to_string(),
                                mr: mm.mr(),
                                nr: mm.nr(),
                                m,
                                k,
                                n,
                            });
                        }
                    }
                }
            }
        }
        inputs
    }

    pub fn make_dataset(
        bencher: &Bencher,
        mut inputs: Vec<Sample>,
        mmm: &[&dyn MatMatMul],
    ) -> Dataset {
        //        let ruin_cache_time = bencher.run_bench(|| ruin_cache());
        let mut rng = thread_rng();
        inputs.shuffle(&mut rng);
        let mut progress_bar = ProgressBar::new(inputs.len() as _);
        let mut samples = vec![];
        for s in inputs {
            let mm = mmm.iter().find(|mm| mm.name() == s.kernel).unwrap();
            let y = measure_add_mat_mul(&bencher, *mm, s.m, s.k, s.n);
            samples.push((s.clone(), y));
            progress_bar.inc();
        }
        progress_bar.finish();
        Dataset(samples)
    }

    pub fn save(&self, filename: &str) {
        let mut f = std::fs::File::create(filename).unwrap();
        for (s, y) in &self.0 {
            writeln!(&mut f, "{} {} {} {} {} {} {}", s.kernel, s.mr, s.nr, s.m, s.k, s.n, y)
                .unwrap();
        }
    }
}

fn display_comparison(
    m: usize,
    k: usize,
    n: usize,
    alts: &[(impl AsRef<str>, f64)],
    choice: Option<&str>,
) {
    alts.iter().sorted_by(|a, b| order_f(&a.1, &b.1)).enumerate().for_each(|(ix, (s, t))| {
        let s = s.as_ref();
        let line = format!(
            "{:30} truth: {:9.03} us / {:9.03} GFLops",
            s,
            t * 1e6,
            (m * k * n) as f64 / t / 1e9,
        );
        if Some(s) == choice {
            if ix == 0 {
                println!("{}", nu_ansi_term::Color::Green.bold().paint(line));
            } else {
                println!("{}", nu_ansi_term::Color::Red.bold().paint(line));
            }
        } else {
            println!("{}", line);
        }
    });
}

fn main() {
    use clap::*;

    let probe = if let Ok(file) = std::fs::File::create("readings.out") {
        let mut probe = readings_probe::Probe::new(file).unwrap();
        probe.spawn_heartbeat(std::time::Duration::from_millis(1000)).unwrap();
        Some(probe)
    } else {
        None
    };

    let parser = App::new("tract-linalg-cost-model")
        .arg(
            Arg::new("bench-time-target")
                .long("bench-time-target")
                .default_value("0.1")
                .help("Target time for chunk sizing"),
        )
        .arg(
            Arg::new("chunk-time-target")
                .long("chunk-time-target")
                .default_value("0.01")
                .help("Target time for chunk sizing"),
        )
        .arg(
            Arg::new("chunks-min-count")
                .long("chunks-min-count")
                .default_value("100")
                .help("Minimum number of chunks"),
        )
        .arg(
            Arg::new("chunks-max-count")
                .long("chunks-max-count")
                .default_value("10000")
                .help("Minimum number of chunks"),
        )
        .subcommand(App::new("list-models"))
        .subcommand(
            App::new("time")
                .arg(Arg::new("mm").long("mm").help("Filter kernels").takes_value(true))
                .arg(Arg::new("m"))
                .arg(Arg::new("k"))
                .arg(Arg::new("n")),
        )
        .subcommand(
            App::new("ds")
                .arg(Arg::new("mm").long("mm").help("Filter kernels").takes_value(true))
                .arg(
                    Arg::new("m")
                        .short('m')
                        .help("m values: 1-512 or 1,16,32")
                        .takes_value(true)
                        .default_value("1-512"),
                )
                .arg(
                    Arg::new("k")
                        .short('k')
                        .help("k values: 1-512 or 1,16,32")
                        .takes_value(true)
                        .default_value("1-512"),
                )
                .arg(
                    Arg::new("n")
                        .short('n')
                        .help("m values: 1-512 or 1,16,32")
                        .takes_value(true)
                        .default_value("1-512"),
                )
                .arg(
                    Arg::new("mkn")
                        .long("mkn")
                        .help("Max m*k*n value")
                        .takes_value(true)
                        .default_value("9999999999"),
                )
                .arg(
                    Arg::new("size")
                        .short('s')
                        .long("size")
                        .help("Sample size (total)")
                        .takes_value(true)
                        .default_value("128"),
                )
                .arg(
                    Arg::new("strat")
                        .long("strat")
                        .help("Strategy for sampling")
                        .takes_value(true)
                        .possible_values(["smart", "random"])
                        .default_value("random"),
                )
                .arg(Arg::new("name").required(true)),
        );

    let matches = parser.get_matches();

    let bencher = Bencher {
        bench_time_target: Duration::from_secs_f64(
            matches.value_of_t("bench-time-target").unwrap(),
        ),
        chunk_time_target: Duration::from_secs_f64(
            matches.value_of_t("chunk-time-target").unwrap(),
        ),
        chunks_min_count: matches.value_of_t("chunks-min-count").unwrap(),
        chunks_max_count: matches.value_of_t("chunks-max-count").unwrap(),
        probe,
    };

    let impls = tract_linalg::ops().mmm_impls().iter().collect_vec();
    let mmms: Vec<&dyn MatMatMul> = impls.iter().map(|p| &***p).collect_vec();
    match matches.subcommand() {
        Some(("list-models", _sub)) => {
            for mmm in mmms {
                println!("{}", mmm.name());
            }
        }
        Some(("ds", sub)) => {
            let mut mmms = mmms.clone();
            if let Some(mm) = sub.value_of("mm") {
                mmms.retain(|m| m.name().contains(mm));
            }
            let inputs = match sub.value_of("strat").unwrap() {
                "smart" => Dataset::smart_sample(&*mmms),
                "random" => Dataset::allkernels_random_sample(
                    &*mmms,
                    sub.value_of_t("size").unwrap(),
                    sub.value_of_t("m").unwrap(),
                    sub.value_of_t("k").unwrap(),
                    sub.value_of_t("n").unwrap(),
                    sub.value_of_t("mkn").unwrap(),
                ),
                _ => unreachable!(),
            };
            Dataset::make_dataset(&bencher, inputs, &mmms).save(sub.value_of("name").unwrap());
        }
        Some(("time", sub)) => {
            let mut mmms = impls.clone();
            if let Some(mm) = sub.value_of("mm") {
                mmms.retain(|m| m.name().contains(mm));
            }
            let m: usize = sub.value_of("m").unwrap().parse().unwrap();
            let k: usize = sub.value_of("k").unwrap().parse().unwrap();
            let n: usize = sub.value_of("n").unwrap().parse().unwrap();
            let mut alts = vec![];
            for mm in &mmms {
                let y = measure_add_mat_mul(&bencher, &***mm, m, k, n);
                alts.push((mm.name(), y));
            }
            display_comparison(m, k, n, &*alts, None);
        }
        _ => panic!(),
    };
}


================================================
FILE: linalg/cost_model/train/README.md
================================================
# Matrix-matrix multiplication kernel prediction

Script to train a feed-forward neural network with one hidden layer to
predict which kernel to use for matrix-matrix multiplication, from a dataset
of measurements made with tract.

To install the dependencies in a virtual environment:

```sh
virtualenv venv
pip install -r requirements.txt
```

To train `N=15` neural networks on the dataset (e.g. `a53-dataset`) and save the
best one to `neural_net_a53.rs`, run:

```sh
python train.py -N 15 --platform=a53 a53-dataset neural_net_a53.rs
```

This will save the neural network as an instance of ../../src/frame/mmm/cost_model.rs.

The neural network computes:

```
  softmax( b2 + w2 * tanh(b1 + w1 * (x - feat_norm_mean) / feat_norm_stddev ) )
```

Rust CostModel implementation is for kernel selection. It is only interested in the ArgMax, so it skips the SoftMax.


================================================
FILE: linalg/cost_model/train/requirements.txt
================================================
numpy==1.22.0
torch==2.7.0


================================================
FILE: linalg/cost_model/train/runme.sh
================================================
#!/bin/sh

device_name=$1
dataset_name=$2
platform=$3

[ -e venv ] || virtualenv venv
. venv/bin/activate

pip install -r requirements.txt

set -ex
mkdir -p tmp
(
cd tmp
aws s3 cp s3://tract-ci-builds/products/$device_name/$dataset_name.tgz .
tar zxf $dataset_name.tgz
data=`ls -1 $dataset_name.$device_name`
python ../train.py -N 15 --platform=$platform $dataset_name.$device_name/$data $platform.rs
)
mv tmp/$platform.rs .


================================================
FILE: linalg/cost_model/train/train.py
================================================
import random
from math import ceil
import numpy as np
import torch
from torch import nn
from collections import Counter


class Dataset:
    def __init__(self, data, kernels, platform="cortex_a53"):
        self.data = data
        self.kernels = kernels
        self.platform = platform

    @classmethod
    def from_tract_outputs(cls, input_file, platform="cortex_a53"):
        kernels = set()
        mkn = set()
        params = []
        for line in open(input_file):
            x = line.strip().split()
            if len(x) < 7:
                continue
            m, k, n = list(map(float, x[3:6]))
            dur = float(x[-1])
            kernels.add((x[0], int(x[1]), int(x[2])))
            mkn.add((m, k, n))
            params.append((x[0], m, k, n, dur))
        kernels = sorted(list(kernels))
        kernel_names = list(map(lambda ker: ker[0], kernels))
        mkn_kernels = {mkn_value: [0 for k in kernels] for mkn_value in mkn}
        for krn, m, k, n, dur in params:
            i = kernel_names.index(krn)
            mkn_kernels[(m, k, n)][i] = dur
        sorted_mkn = sorted(list([(m * k * n, (m, k, n)) for m, k, n in mkn]))

        data = []
        for pdt, (m, k, n) in sorted_mkn:
            if pdt == 0:
                continue
            data.append((np.array([m, k, n]), np.array(mkn_kernels[(m, k, n)])))
        return cls(data, kernels)

    def shuffle(self):
        random.shuffle(self.data)

    def split(self, validation=0.1, shuffle=True):
        if shuffle:
            self.shuffle()
        N = len(self.data)
        S = ceil(validation * N)
        trainset = Dataset(self.data[S:], self.kernels)
        valset = Dataset(self.data[:S], self.kernels)
        return trainset, valset

    def filter_mkn(self, max_val=None, min_val=None):
        def keep(x):
            mkn = np.prod(x)
            if max_val is not None and mkn > max_val:
                return False
            if min_val is not None and mkn < min_val:
                return False
            return True

        data = [(x, y) for x, y in self.data if keep(x)]
        return Dataset(data, self.kernels)

    def __len__(self):
        return len(self.data)

    def get_mr_nr_values(self):
        _, mrs, nrs = zip(*self.kernels)
        return sorted(list(set(mrs))), sorted(list(set(nrs)))

    def get_classif_features_for(self, x):
        m, k, n = x

        mrs, nrs = self.get_mr_nr_values()
        fts = [
            np.log(m),
            np.log(k),
            np.log(n),
            np.log(m * k * n),
        ]
        for mr in mrs:
            fts.append(m % mr)
            fts.append(float(m % mr != 0))
        for nr in nrs:
            fts.append(n % nr)
            fts.append(float(n % nr != 0))

        return np.array(fts)

    def get_classif_features(self, soft_targets=True, temp=5e-2):
        feats = []
        targets = []
        for x, y in self.data:
            x_features = self.get_classif_features_for(x)
            if soft_targets:
                tgt = 1 / y
                tgt = tgt - tgt.max()
                tgt *= temp
                tgt = np.exp(tgt)
                tgt = tgt / tgt.sum()
                targets.append(tgt)
            else:
                targets.append(y.argmin())
            feats.append(x_features)
        return np.array(feats), np.array(targets)

    def get_rel_diffs(self, preds):
        diffs = []
        for (x, y), z in zip(self.data, preds):
            t = y.argmin()
            if z == t:
                continue
            diffs.append((y[z] - y.min()) / y.min())
        return np.array(diffs)

    def big_product_behaviour(self):
        sorted_data = sorted((np.prod(x), np.argmin(y)) for x, y in self.data)
        biggest_exp = sorted_data[:-int(len(self) / 100)]
        _, choices = zip(*biggest_exp)
        kernel_ix = Counter(choices).most_common(1)[0][0]
        kernel = self.kernels[kernel_ix][0]
        threshold = sorted_data[-1][0]
        return (threshold, kernel)

class MLP(nn.Module):
    def __init__(
        self,
        kernels,
        num_features,
        num_hiddens,
        normalize=True,
        num_updates=3000,
        batch_size=128,
        weight_decay=0.0001,
        soft_preds=False,
    ):
        super().__init__()
        self.kernels = kernels
        num_kernels = len(kernels)
        self.linear_1 = nn.Linear(num_features, num_hiddens)
        self.act = nn.Tanh()
        self.linear_2 = nn.Linear(num_hiddens, num_kernels)
        self.softmax = nn.LogSoftmax(dim=1)
        self.mean = None
        self.std = None
        self._normalize = normalize
        self.num_updates = num_updates
        self.batch_size = batch_size
        self.soft_preds = soft_preds
        self.weight_decay = weight_decay

    def forward(self, x):
        y1 = self.linear_1.forward(x)
        y = self.act.forward(y1)

        y = self.linear_2.forward(y)
        return self.softmax.forward(y)

    def normalize(self, X):
        if self._normalize:
            return (X - self.mean) / self.std
        return X

    def predict_proba(self, x):
        x = self.normalize(x)
        tx = torch.from_numpy(x).float()
        y = self.forward(tx)
        return np.exp(y.detach().numpy())

    def predict(self, x):
        y = self.predict_proba(x)
        return y.argmax(axis=1)

    def fit(self, X, y):
        if self._normalize:
            self.mean = X.mean(axis=0, keepdims=True)
            self.std = X.std(axis=0, keepdims=True)
            self.std[self.std < 1e-4] = 1e-4
            X = self.normalize(X)

        updates = 0
        optimizer = torch.optim.AdamW(
            self.parameters(), lr=1e-3, weight_decay=self.weight_decay
        )
        loss = (
            torch.nn.KLDivLoss(reduction="batchmean")
            if self.soft_preds
            else torch.nn.NLLLoss()
        )
        indices = list(range(X.shape[0]))
        num_batches = len(indices) // self.batch_size

        prev_loss = None
        num_iter_no_impr = 0

        while updates < self.num_updates:
            random.shuffle(indices)
            total_loss = 0
            batches_seen = 0
            for bnum in range(num_batches):
                bb = self.batch_size * bnum
                be = bb + self.batch_size
                Xb = X[indices[bb:be]]
                yb = y[indices[bb:be]]

                tx = torch.from_numpy(Xb).float()
                if self.soft_preds:
                    ty = torch.from_numpy(yb).float()
                else:
                    ty = torch.from_numpy(yb).long()

                optimizer.zero_grad()
                z = self.forward(tx)
                loss_val = loss(z, ty)
                loss_val.backward()
                optimizer.step()

                sloss = loss_val.detach().numpy()
                total_loss += sloss

                updates += 1
                batches_seen += 1
                if updates > self.num_updates:
                    break

            total_loss /= batches_seen
            if prev_loss is not None:
                impr = (prev_loss - total_loss) / prev_loss
                if impr < 1e-4:
                    num_iter_no_impr += 1
                else:
                    num_iter_no_impr = 0
            prev_loss = total_loss
            if num_iter_no_impr > 4:
                break

def save_as_rust(mlp, dataset, output):
    with open(output, 'w') as f:
        mrs, nrs = dataset.get_mr_nr_values()
        params = {}
        for name, tensor in mlp.named_parameters():
            params[name] = tensor.detach().numpy()
        big_product_mkn_threshold, big_product_kernel_choice = dataset.big_product_behaviour()
        f.write(f"""use crate::frame::mmm::CostModel;
        pub fn model() -> CostModel<'static> {{
            CostModel {{
                big_product_mkn_threshold: {big_product_mkn_threshold},
                big_product_kernel_choice: "{big_product_kernel_choice}",
                kernels: &{str(list(map(lambda k: k[0], mlp.kernels))).replace("'", '"')},
                mrs: &{mrs},
                nrs: &{nrs},
                feat_norm_mean: &{mlp.mean.flatten().tolist()},
                feat_norm_stddev: &{mlp.std.flatten().tolist()},
                w1: &{params["linear_1.weight"].flatten().tolist()},
                b1: &{params["linear_1.bias"].flatten().tolist()},
                w2: &{params["linear_2.weight"].flatten().tolist()},
                b2: &{params["linear_2.bias"].flatten().tolist()},
            }}
        }}
""")

def train_one_mlp(
    dataset, hidden_layer_size, validation=0.2, num_updates=3000,
):
    train_ds, dev_ds = dataset.split(validation=validation)
    Xtrain, ytrain = train_ds.get_classif_features()
    clf = MLP(
        train_ds.kernels,
        Xtrain.shape[1],
        hidden_layer_size,
        soft_preds=True,
        num_updates=num_updates,
    )
    clf.fit(Xtrain, ytrain)

    Xtest, ytest = dev_ds.get_classif_features()
    ztest = clf.predict(Xtest)
    ptest = clf.predict_proba(Xtest)

    gtclass = ytest.argmax(axis=1)
    accuracy = 100 * np.sum(gtclass == ztest) / len(ytest)
    crank = [
        len(dev_ds.kernels) - list(stdpreds).index(gty)
        for stdpreds, gty in zip(ptest.argsort(axis=1), gtclass)
    ]
    rdiffs = list(dev_ds.get_rel_diffs(ztest))
    return accuracy, rdiffs, crank, clf


PASS_TESTS = {
    "cortex_a7": [
        ([16, 60, 8], "armv7neon_mmm_f32_8x4_cortexa7"),
        ([16, 64, 8], "armv7neon_mmm_f32_8x4_cortexa7"),
#         ([2, 32, 8], "generic_f32_4x4"),
        ([64, 48, 8], "armv7neon_mmm_f32_8x4_cortexa7"),
        ([256, 768, 6], "armv7neon_mmm_f32_8x6_cortexa7"),
        ([512, 1536, 18], "armv7neon_mmm_f32_8x6_cortexa7"),
        ([512, 1536, 24], "armv7neon_mmm_f32_8x6_cortexa7"),
    ],
    "cortex_a9": [
        ([16, 60, 8], "armv7neon_mmm_f32_8x4_cortexa9"),
        ([16, 64, 8], "armv7neon_mmm_f32_8x4_cortexa9"),
#         ([2, 32, 8], "generic_f32_4x4"),
        ([64, 48, 8], "armv7neon_mmm_f32_8x4_cortexa9"),
        ([256, 768, 6], "armv7neon_mmm_f32_8x6_cortexa9"),
    ],
    "cortex_a53": [
        ([16, 60, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([16, 64, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([2, 32, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([64, 48, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([256, 768, 4], "arm64simd_mmm_f32_24x4_a53"),
    ],
    "cortex_a55": [
        ([16, 60, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([16, 64, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([64, 48, 8], "arm64simd_mmm_f32_8x8_a53"),
        ([256, 768, 4], "arm64simd_mmm_f32_16x4_a55"),
        ([512, 1536, 24], "arm64simd_mmm_f32_12x8_a55"),
        ([60, 40, 1337], "arm64simd_mmm_f32_12x8_a55"),
        ([72, 2400,728], "arm64simd_mmm_f32_12x8_a55"),
    ],
}


def main():
    from argparse import ArgumentParser

    parser = ArgumentParser(
        description="Train a neural network to predict which mmm kernel"
    )
    parser.add_argument(
        "-H",
        "--hidden-size",
        type=int,
        default=40,
        help="Size of the hidden layer of the neural network [default: 40].",
    )
    parser.add_argument(
        "-N", "--num-trainings", type=int, default=1, help="Number of trainings.",
    )
    parser.add_argument("--platform", default="cortex_a53", choices=PASS_TESTS.keys(), help="Platform")
    parser.add_argument("dataset")
    parser.add_argument("output_rs")
    args = parser.parse_args()
    print("Loading dataset...")
    dataset = Dataset.from_tract_outputs(args.dataset, platform=args.platform)
    print(f"Loaded {len(dataset)} samples")
    passed = False
    tests = PASS_TESTS[args.platform]
    best = None
    best_acc = 0.0
    trained = 0
    while not passed:
        print(f"[{trained + 1}] Training MLP with {args.hidden_size} units...")
        accuracy, _, _, model = train_one_mlp(dataset, args.hidden_size)
        trained += 1
        num_passed = 0
        for mkn, ker in tests:
            x = dataset.get_classif_features_for(mkn)
            y = model.predict([x])[0]
            if dataset.kernels[int(y)][0] == ker:
                num_passed += 1
            else:
                print(f"for {mkn} predicted: {dataset.kernels[int(y)]}, expected: {ker}")
        passed = num_passed == len(tests)
        if passed and accuracy > best_acc:
            best_acc = accuracy
            best = model
        color = 92 if passed else 91
        print(
            f"\tAccuracy: {accuracy:.1f}% ... \033[{color}mPASSED {num_passed} / {len(tests)}\033[0m"
        )
        passed = passed and trained >= args.num_trainings
    print(f"Saving model to {args.output_rs}")
    save_as_rust(best, dataset, args.output_rs)


if __name__ == "__main__":
    main()


================================================
FILE: linalg/matmul-bench/Cargo.toml
================================================
[package]
name = "matmul-bench"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
edition = "2024"

[workspace]
members = []

[dependencies]
cblas = { version = "0.3", optional = true }
accelerate-src = { version = "0.3", optional = true }
blis-src = { version = "0.2", features = ["static"], optional = true }
matrixmultiply = "*"
tract-data.workspace = true
tract-linalg.workspace = true


[features]
default = []
blas = ["cblas"]
blis = ["blis-src", "blas"]
accelerate = ["accelerate-src", "blas"]

[build-dependencies]
cc = "1.0"

[dev-dependencies]
criterion = "*"

[[bench]]
name = "matmul"
harness = false


================================================
FILE: linalg/matmul-bench/benches/matmul.rs
================================================
#![allow(non_snake_case)]
#[cfg(feature = "accelerate")]
extern crate accelerate_src;
#[cfg(feature = "blis")]
extern crate blis_src;
#[cfg(feature = "blis")]
extern crate cblas;

use criterion::measurement::WallTime;
use criterion::*;
use tract_data::internal::*;

macro_rules! b {
    ($id:ident) => {
        pub fn $id(crit: &mut BenchmarkGroup<WallTime>, m: usize, k: usize, n: usize) {
            let a = vec![0f32; m * k];
            let b = vec![0f32; k * n];
            let mut c = vec![0f32; m * n];
            crit.bench_function(stringify!($id), |be| {
                be.iter(|| matmul_bench::$id(m, k, n, &a, &b, &mut c))
            });
        }
    };
}

b!(naive);
b!(ctile_1x1);
b!(tile_2x2);
b!(ctile_2x2);
b!(tile_4x4);
b!(ctile_4x4);
b!(cpacked_tile_4x4);
b!(tile_8x8);
b!(ctile_8x8);
b!(cpacked_tile_8x8);
b!(matrixmultiply);
b!(cblas);
b!(tract);

pub fn tract_blaslike(
    crit: &mut BenchmarkGroup<WallTime>,
    m: usize,
    k: usize,
    n: usize,
    dt: DatumType,
) {
    use tract_linalg::frame::mmm::FusedSpec;
    let a = Tensor::zero_dt(dt, &[m, k]).unwrap();
    let b = Tensor::zero_dt(dt, &[k, n]).unwrap();
    let mut c = Tensor::zero_dt(dt, &[m, n]).unwrap();

    unsafe {
        let mmm = tract_linalg::ops().mmm(dt, dt, dt, Some(m), Some(k), Some(n)).unwrap();

        let c_storage = mmm.c_view(0, 1);

        let mut scratch = mmm.allocate_scratch_space();

        crit.bench_function(&format!("tract_blaslike_{:?}", dt), |be| {
            let packed_a = mmm.a_pack().pack_tensor(&a, 1, 0).unwrap();
            let packed_b = mmm.b_pack().pack_tensor(&b, 0, 1).unwrap();

            be.iter(|| {
                mmm.run_with_scratch_space(
                    m,
                    n,
                    &mut *scratch,
                    &[
                        FusedSpec::AddMatMul {
                            a: packed_a.as_ref(),
                            b: packed_b.as_ref(),
                        },
                        FusedSpec::Store(c_storage.wrap(&mut c.view_mut())),
                    ],
                )
                .unwrap()
            });
        });
    }
}

fn matmul(c: &mut Criterion, m: usize, k: usize, n: usize) {
    let mut c = c.benchmark_group(format!("{}x{}x{}", m, k, n));
    c.throughput(Throughput::Elements((m * k * n) as _));
    naive(&mut c, m, k, n);
    ctile_1x1(&mut c, m, k, n);
    tile_2x2(&mut c, m, k, n);
    ctile_2x2(&mut c, m, k, n);
    tile_4x4(&mut c, m, k, n);
    ctile_4x4(&mut c, m, k, n);
    cpacked_tile_4x4(&mut c, m, k, n);
    tile_8x8(&mut c, m, k, n);
    ctile_8x8(&mut c, m, k, n);
    cpacked_tile_8x8(&mut c, m, k, n);
    matrixmultiply(&mut c, m, k, n);
    cblas(&mut c, m, k, n);
    tract(&mut c, m, k, n);
    tract_blaslike(&mut c, m, k, n, f32::datum_type());
    tract_blaslike(&mut c, m, k, n, f16::datum_type());
    c.finish();
}

fn big(c: &mut Criterion) {
    matmul(c, 512, 512, 512);
}

fn wavenet(c: &mut Criterion) {
    matmul(c, 32, 32, 8);
    matmul(c, 16, 60, 8);
}

fn asr_15M(c: &mut Criterion) {
    matmul(c, 768, 200, 24);
    matmul(c, 768, 2304, 24);
    matmul(c, 768, 2304, 8);
    matmul(c, 768, 384, 1);
}

fn inception(c: &mut Criterion) {
    matmul(c, 64, 288, 21609);
}

fn whisper_base(c: &mut Criterion) {
    matmul(c, 512, 512, 1500);
}

criterion_group!(benches, big, wavenet, asr_15M, inception, whisper_base);
criterion_main!(benches);


================================================
FILE: linalg/matmul-bench/build.rs
================================================
fn main() {
    let mut cc = cc::Build::new();
    cc
        .file("c/tile_1x1.c")
        .file("c/tile_2x2.c")
        .file("c/tile_4x4.c")
        .file("c/packed_tile_4x4.c")
        .file("c/tile_8x8.c")
        .file("c/packed_tile_8x8.c");
    if std::env::var("TARGET").unwrap().starts_with("aarch64") {
        cc.flag("-mtune=cortex-a53");
    } else {
        cc.flag("-mtune=haswell");
    }
    cc.flag("-funsafe-math-optimizations").compile("libmatmulbench");
}


================================================
FILE: linalg/matmul-bench/c/packed_tile_4x4.c
================================================

#include <stddef.h>

void c_packed_tile_4x4(size_t m, size_t k, size_t n, float *a, float *b, float *c) {
    for(size_t row = 0 ; row < m / 4 ; row++) {
        for(size_t col = 0 ; col < n / 4 ; col++) {
            float  sum00 = 0.0;
            float  sum01 = 0.0;
            float  sum02 = 0.0;
            float  sum03 = 0.0;
            float  sum10 = 0.0;
            float  sum11 = 0.0;
            float  sum12 = 0.0;
            float  sum13 = 0.0;
            float  sum20 = 0.0;
            float  sum21 = 0.0;
            float  sum22 = 0.0;
            float  sum23 = 0.0;
            float  sum30 = 0.0;
            float  sum31 = 0.0;
            float  sum32 = 0.0;
            float  sum33 = 0.0;
            float *pa = a + row * k * 4;
            float *pb = b + col * k * 4;
            for(size_t i = 0; i < k ; i++) {
                float a0 = pa[0];
                float a1 = pa[1];
                float a2 = pa[2];
                float a3 = pa[3];
                float b0 = pb[0];
                float b1 = pb[1];
                float b2 = pb[2];
                float b3 = pb[3];
                pa += 4;
                pb += 4;
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum02 += a0 * b2;
                sum03 += a0 * b3;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
                sum12 += a1 * b2;
                sum13 += a1 * b3;
                sum20 += a2 * b0;
                sum21 += a2 * b1;
                sum22 += a2 * b2;
                sum23 += a2 * b3;
                sum30 += a3 * b0;
                sum31 += a3 * b1;
                sum32 += a3 * b2;
                sum33 += a3 * b3;
            }
            c[(row * 4 + 0) * n + col * 4] = sum00;
            c[(row * 4 + 0) * n + col * 4 + 1] = sum01;
            c[(row * 4 + 0) * n + col * 4 + 2] = sum02;
            c[(row * 4 + 0) * n + col * 4 + 3] = sum03;
            c[(row * 4 + 1) * n + col * 4] = sum10;
            c[(row * 4 + 1) * n + col * 4 + 1] = sum11;
            c[(row * 4 + 1) * n + col * 4 + 2] = sum12;
            c[(row * 4 + 1) * n + col * 4 + 3] = sum13;
            c[(row * 4 + 2) * n + col * 4] = sum20;
            c[(row * 4 + 2) * n + col * 4 + 1] = sum21;
            c[(row * 4 + 2) * n + col * 4 + 2] = sum22;
            c[(row * 4 + 2) * n + col * 4 + 3] = sum23;
            c[(row * 4 + 3) * n + col * 4] = sum30;
            c[(row * 4 + 3) * n + col * 4 + 1] = sum31;
            c[(row * 4 + 3) * n + col * 4 + 2] = sum32;
            c[(row * 4 + 3) * n + col * 4 + 3] = sum33;
        }
    }
}


================================================
FILE: linalg/matmul-bench/c/packed_tile_8x8.c
================================================


#include <stddef.h>

void c_packed_tile_8x8(size_t m, size_t k, size_t n, float *a, float *b, float *c) {
    for(size_t row = 0 ; row < m / 8 ; row++) {
        for(size_t col = 0 ; col < n / 8 ; col++) {
            float sum00 = 0.0;
            float sum01 = 0.0;
            float sum02 = 0.0;
            float sum03 = 0.0;
            float sum04 = 0.0;
            float sum05 = 0.0;
            float sum06 = 0.0;
            float sum07 = 0.0;
            float sum10 = 0.0;
            float sum11 = 0.0;
            float sum12 = 0.0;
            float sum13 = 0.0;
            float sum14 = 0.0;
            float sum15 = 0.0;
            float sum16 = 0.0;
            float sum17 = 0.0;
            float sum20 = 0.0;
            float sum21 = 0.0;
            float sum22 = 0.0;
            float sum23 = 0.0;
            float sum24 = 0.0;
            float sum25 = 0.0;
            float sum26 = 0.0;
            float sum27 = 0.0;
            float sum30 = 0.0;
            float sum31 = 0.0;
            float sum32 = 0.0;
            float sum33 = 0.0;
            float sum34 = 0.0;
            float sum35 = 0.0;
            float sum36 = 0.0;
            float sum37 = 0.0;
            float sum40 = 0.0;
            float sum41 = 0.0;
            float sum42 = 0.0;
            float sum43 = 0.0;
            float sum44 = 0.0;
            float sum45 = 0.0;
            float sum46 = 0.0;
            float sum47 = 0.0;
            float sum50 = 0.0;
            float sum51 = 0.0;
            float sum52 = 0.0;
            float sum53 = 0.0;
            float sum54 = 0.0;
            float sum55 = 0.0;
            float sum56 = 0.0;
            float sum57 = 0.0;
            float sum60 = 0.0;
            float sum61 = 0.0;
            float sum62 = 0.0;
            float sum63 = 0.0;
            float sum64 = 0.0;
            float sum65 = 0.0;
            float sum66 = 0.0;
            float sum67 = 0.0;
            float sum70 = 0.0;
            float sum71 = 0.0;
            float sum72 = 0.0;
            float sum73 = 0.0;
            float sum74 = 0.0;
            float sum75 = 0.0;
            float sum76 = 0.0;
            float sum77 = 0.0;
            float *pa = a + row * k * 8;
            float *pb = b + col * k * 8;
            for(size_t i = 0 ; i < k ; i++) {
                float a0 = a[0];
                float a1 = a[1];
                float a2 = a[2];
                float a3 = a[3];
                float a4 = a[4];
                float a5 = a[5];
                float a6 = a[6];
                float a7 = a[7];
                float b0 = b[0];
                float b1 = b[1];
                float b2 = b[2];
                float b3 = b[3];
                float b4 = b[4];
                float b5 = b[5];
                float b6 = b[6];
                float b7 = b[7];
                pa += 8;
                pb += 8;
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum02 += a0 * b2;
                sum03 += a0 * b3;
                sum04 += a0 * b4;
                sum05 += a0 * b5;
                sum06 += a0 * b6;
                sum07 += a0 * b7;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
                sum12 += a1 * b2;
                sum13 += a1 * b3;
                sum14 += a1 * b4;
                sum15 += a1 * b5;
                sum16 += a1 * b6;
                sum17 += a1 * b7;
                sum20 += a2 * b0;
                sum21 += a2 * b1;
                sum22 += a2 * b2;
                sum23 += a2 * b3;
                sum24 += a2 * b4;
                sum25 += a2 * b5;
                sum26 += a2 * b6;
                sum27 += a2 * b7;
                sum30 += a3 * b0;
                sum31 += a3 * b1;
                sum32 += a3 * b2;
                sum33 += a3 * b3;
                sum34 += a3 * b4;
                sum35 += a3 * b5;
                sum36 += a3 * b6;
                sum37 += a3 * b7;
                sum40 += a4 * b0;
                sum41 += a4 * b1;
                sum42 += a4 * b2;
                sum43 += a4 * b3;
                sum44 += a4 * b4;
                sum45 += a4 * b5;
                sum46 += a4 * b6;
                sum47 += a4 * b7;
                sum50 += a5 * b0;
                sum51 += a5 * b1;
                sum52 += a5 * b2;
                sum53 += a5 * b3;
                sum54 += a5 * b4;
                sum55 += a5 * b5;
                sum56 += a5 * b6;
                sum57 += a5 * b7;
                sum60 += a6 * b0;
                sum61 += a6 * b1;
                sum62 += a6 * b2;
                sum63 += a6 * b3;
                sum64 += a6 * b4;
                sum65 += a6 * b5;
                sum66 += a6 * b6;
                sum67 += a6 * b7;
                sum70 += a7 * b0;
                sum71 += a7 * b1;
                sum72 += a7 * b2;
                sum73 += a7 * b3;
                sum74 += a7 * b4;
                sum75 += a7 * b5;
                sum76 += a7 * b6;
                sum77 += a7 * b7;
            }
            c[(row * 8 + 0) * n + col * 8] = sum00;
            c[(row * 8 + 0) * n + col * 8 + 1] = sum01;
            c[(row * 8 + 0) * n + col * 8 + 2] = sum02;
            c[(row * 8 + 0) * n + col * 8 + 3] = sum03;
            c[(row * 8 + 0) * n + col * 8 + 4] = sum04;
            c[(row * 8 + 0) * n + col * 8 + 5] = sum05;
            c[(row * 8 + 0) * n + col * 8 + 6] = sum06;
            c[(row * 8 + 0) * n + col * 8 + 7] = sum07;
            c[(row * 8 + 1) * n + col * 8] = sum10;
            c[(row * 8 + 1) * n + col * 8 + 1] = sum11;
            c[(row * 8 + 1) * n + col * 8 + 2] = sum12;
            c[(row * 8 + 1) * n + col * 8 + 3] = sum13;
            c[(row * 8 + 1) * n + col * 8 + 4] = sum14;
            c[(row * 8 + 1) * n + col * 8 + 5] = sum15;
            c[(row * 8 + 1) * n + col * 8 + 6] = sum16;
            c[(row * 8 + 1) * n + col * 8 + 7] = sum17;
            c[(row * 8 + 2) * n + col * 8] = sum20;
            c[(row * 8 + 2) * n + col * 8 + 1] = sum21;
            c[(row * 8 + 2) * n + col * 8 + 2] = sum22;
            c[(row * 8 + 2) * n + col * 8 + 3] = sum23;
            c[(row * 8 + 2) * n + col * 8 + 4] = sum24;
            c[(row * 8 + 2) * n + col * 8 + 5] = sum25;
            c[(row * 8 + 2) * n + col * 8 + 6] = sum26;
            c[(row * 8 + 2) * n + col * 8 + 7] = sum27;
            c[(row * 8 + 3) * n + col * 8] = sum30;
            c[(row * 8 + 3) * n + col * 8 + 1] = sum31;
            c[(row * 8 + 3) * n + col * 8 + 2] = sum32;
            c[(row * 8 + 3) * n + col * 8 + 3] = sum33;
            c[(row * 8 + 3) * n + col * 8 + 4] = sum34;
            c[(row * 8 + 3) * n + col * 8 + 5] = sum35;
            c[(row * 8 + 3) * n + col * 8 + 6] = sum36;
            c[(row * 8 + 3) * n + col * 8 + 7] = sum37;
            c[(row * 8 + 4) * n + col * 8] = sum40;
            c[(row * 8 + 4) * n + col * 8 + 1] = sum41;
            c[(row * 8 + 4) * n + col * 8 + 2] = sum42;
            c[(row * 8 + 4) * n + col * 8 + 3] = sum43;
            c[(row * 8 + 4) * n + col * 8 + 4] = sum44;
            c[(row * 8 + 4) * n + col * 8 + 5] = sum45;
            c[(row * 8 + 4) * n + col * 8 + 6] = sum46;
            c[(row * 8 + 4) * n + col * 8 + 7] = sum47;
            c[(row * 8 + 5) * n + col * 8] = sum50;
            c[(row * 8 + 5) * n + col * 8 + 1] = sum51;
            c[(row * 8 + 5) * n + col * 8 + 2] = sum52;
            c[(row * 8 + 5) * n + col * 8 + 3] = sum53;
            c[(row * 8 + 5) * n + col * 8 + 4] = sum54;
            c[(row * 8 + 5) * n + col * 8 + 5] = sum55;
            c[(row * 8 + 5) * n + col * 8 + 6] = sum56;
            c[(row * 8 + 5) * n + col * 8 + 7] = sum57;
            c[(row * 8 + 6) * n + col * 8] = sum60;
            c[(row * 8 + 6) * n + col * 8 + 1] = sum61;
            c[(row * 8 + 6) * n + col * 8 + 2] = sum62;
            c[(row * 8 + 6) * n + col * 8 + 3] = sum63;
            c[(row * 8 + 6) * n + col * 8 + 4] = sum64;
            c[(row * 8 + 6) * n + col * 8 + 5] = sum65;
            c[(row * 8 + 6) * n + col * 8 + 6] = sum66;
            c[(row * 8 + 6) * n + col * 8 + 7] = sum67;
            c[(row * 8 + 7) * n + col * 8] = sum70;
            c[(row * 8 + 7) * n + col * 8 + 1] = sum71;
            c[(row * 8 + 7) * n + col * 8 + 2] = sum72;
            c[(row * 8 + 7) * n + col * 8 + 3] = sum73;
            c[(row * 8 + 7) * n + col * 8 + 4] = sum74;
            c[(row * 8 + 7) * n + col * 8 + 5] = sum75;
            c[(row * 8 + 7) * n + col * 8 + 6] = sum76;
            c[(row * 8 + 7) * n + col * 8 + 7] = sum77;
        }
    }
}


================================================
FILE: linalg/matmul-bench/c/tile_1x1.c
================================================

#include <stddef.h>

void c_tile_1x1(size_t m, size_t k, size_t n, float *a, float *b, float *c) {
    for(size_t row = 0 ; row < m ; row++) {
        for(size_t col = 0 ; col < n ; col++) {
            float  sum00 = 0.0;
            for(size_t i = 0; i < k ; i++) {
                float a0 = a[row * k + i];
                float b0 = b[i * n + col];
                sum00 += a0 * b0;
            }
            c[row * n + col] = sum00;
        }
    }
}


================================================
FILE: linalg/matmul-bench/c/tile_2x2.c
================================================

#include <stddef.h>

void c_tile_2x2(size_t m, size_t k, size_t n, float *a, float *b, float *c) {
    for(size_t row = 0 ; row < m / 2 ; row++) {
        for(size_t col = 0 ; col < n / 2 ; col++) {
            float  sum00 = 0.0;
            float  sum01 = 0.0;
            float  sum10 = 0.0;
            float  sum11 = 0.0;
            for(size_t i = 0; i < k ; i++) {
                float a0 = a[2 * row * k + i];
                float a1 = a[(2 * row + 1) * k + i];
                float b0 = b[i * n + 2 * col];
                float b1 = b[i * n + 2 * col + 1];
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
            }
            c[(2 * row + 0) * n + 2 * col] = sum00;
            c[(2 * row + 0) * n + 2 * col + 1] = sum01;
            c[(2 * row + 1) * n + 2 * col] = sum10;
            c[(2 * row + 1) * n + 2 * col + 1] = sum11;
        }
    }
}


================================================
FILE: linalg/matmul-bench/c/tile_4x4.c
================================================

#include <stddef.h>

void c_tile_4x4(size_t m, size_t k, size_t n, float *a, float *b, float *c) {
    for(size_t row = 0 ; row < m / 4 ; row++) {
        for(size_t col = 0 ; col < n / 4 ; col++) {
            float  sum00 = 0.0;
            float  sum01 = 0.0;
            float  sum02 = 0.0;
            float  sum03 = 0.0;
            float  sum10 = 0.0;
            float  sum11 = 0.0;
            float  sum12 = 0.0;
            float  sum13 = 0.0;
            float  sum20 = 0.0;
            float  sum21 = 0.0;
            float  sum22 = 0.0;
            float  sum23 = 0.0;
            float  sum30 = 0.0;
            float  sum31 = 0.0;
            float  sum32 = 0.0;
            float  sum33 = 0.0;
            for(size_t i = 0; i < k ; i++) {
                float a0 = a[4 * row * k + i];
                float a1 = a[(4 * row + 1) * k + i];
                float a2 = a[(4 * row + 2) * k + i];
                float a3 = a[(4 * row + 3) * k + i];
                float b0 = b[i * n + 4 * col];
                float b1 = b[i * n + 4 * col + 1];
                float b2 = b[i * n + 4 * col + 2];
                float b3 = b[i * n + 4 * col + 3];
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum02 += a0 * b2;
                sum03 += a0 * b3;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
                sum12 += a1 * b2;
                sum13 += a1 * b3;
                sum20 += a2 * b0;
                sum21 += a2 * b1;
                sum22 += a2 * b2;
                sum23 += a2 * b3;
                sum30 += a3 * b0;
                sum31 += a3 * b1;
                sum32 += a3 * b2;
                sum33 += a3 * b3;
            }
            c[(4 * row + 0) * n + 4 * col] = sum00;
            c[(4 * row + 0) * n + 4 * col + 1] = sum01;
            c[(4 * row + 0) * n + 4 * col + 2] = sum02;
            c[(4 * row + 0) * n + 4 * col + 3] = sum03;
            c[(4 * row + 1) * n + 4 * col] = sum10;
            c[(4 * row + 1) * n + 4 * col + 1] = sum11;
            c[(4 * row + 1) * n + 4 * col + 2] = sum12;
            c[(4 * row + 1) * n + 4 * col + 3] = sum13;
            c[(4 * row + 2) * n + 4 * col] = sum20;
            c[(4 * row + 2) * n + 4 * col + 1] = sum21;
            c[(4 * row + 2) * n + 4 * col + 2] = sum22;
            c[(4 * row + 2) * n + 4 * col + 3] = sum23;
            c[(4 * row + 3) * n + 4 * col] = sum30;
            c[(4 * row + 3) * n + 4 * col + 1] = sum31;
            c[(4 * row + 3) * n + 4 * col + 2] = sum32;
            c[(4 * row + 3) * n + 4 * col + 3] = sum33;
        }
    }
}


================================================
FILE: linalg/matmul-bench/c/tile_8x8.c
================================================


#include <stddef.h>

void c_tile_8x8(size_t m, size_t k, size_t n, float *a, float *b, float *c) {
    for(size_t row = 0 ; row < m / 8 ; row++) {
        for(size_t col = 0 ; col < n / 8 ; col++) {
            float sum00 = 0.0;
            float sum01 = 0.0;
            float sum02 = 0.0;
            float sum03 = 0.0;
            float sum04 = 0.0;
            float sum05 = 0.0;
            float sum06 = 0.0;
            float sum07 = 0.0;
            float sum10 = 0.0;
            float sum11 = 0.0;
            float sum12 = 0.0;
            float sum13 = 0.0;
            float sum14 = 0.0;
            float sum15 = 0.0;
            float sum16 = 0.0;
            float sum17 = 0.0;
            float sum20 = 0.0;
            float sum21 = 0.0;
            float sum22 = 0.0;
            float sum23 = 0.0;
            float sum24 = 0.0;
            float sum25 = 0.0;
            float sum26 = 0.0;
            float sum27 = 0.0;
            float sum30 = 0.0;
            float sum31 = 0.0;
            float sum32 = 0.0;
            float sum33 = 0.0;
            float sum34 = 0.0;
            float sum35 = 0.0;
            float sum36 = 0.0;
            float sum37 = 0.0;
            float sum40 = 0.0;
            float sum41 = 0.0;
            float sum42 = 0.0;
            float sum43 = 0.0;
            float sum44 = 0.0;
            float sum45 = 0.0;
            float sum46 = 0.0;
            float sum47 = 0.0;
            float sum50 = 0.0;
            float sum51 = 0.0;
            float sum52 = 0.0;
            float sum53 = 0.0;
            float sum54 = 0.0;
            float sum55 = 0.0;
            float sum56 = 0.0;
            float sum57 = 0.0;
            float sum60 = 0.0;
            float sum61 = 0.0;
            float sum62 = 0.0;
            float sum63 = 0.0;
            float sum64 = 0.0;
            float sum65 = 0.0;
            float sum66 = 0.0;
            float sum67 = 0.0;
            float sum70 = 0.0;
            float sum71 = 0.0;
            float sum72 = 0.0;
            float sum73 = 0.0;
            float sum74 = 0.0;
            float sum75 = 0.0;
            float sum76 = 0.0;
            float sum77 = 0.0;
            for(size_t i = 0 ; i < k ; i++) {
                float a0 = a[8 * row * k + i];
                float a1 = a[(8 * row + 1) * k + i];
                float a2 = a[(8 * row + 2) * k + i];
                float a3 = a[(8 * row + 3) * k + i];
                float a4 = a[(8 * row + 4) * k + i];
                float a5 = a[(8 * row + 5) * k + i];
                float a6 = a[(8 * row + 6) * k + i];
                float a7 = a[(8 * row + 7) * k + i];
                float b0 = b[i * n + 8 * col];
                float b1 = b[i * n + 8 * col + 1];
                float b2 = b[i * n + 8 * col + 2];
                float b3 = b[i * n + 8 * col + 3];
                float b4 = b[i * n + 8 * col + 4];
                float b5 = b[i * n + 8 * col + 5];
                float b6 = b[i * n + 8 * col + 6];
                float b7 = b[i * n + 8 * col + 7];
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum02 += a0 * b2;
                sum03 += a0 * b3;
                sum04 += a0 * b4;
                sum05 += a0 * b5;
                sum06 += a0 * b6;
                sum07 += a0 * b7;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
                sum12 += a1 * b2;
                sum13 += a1 * b3;
                sum14 += a1 * b4;
                sum15 += a1 * b5;
                sum16 += a1 * b6;
                sum17 += a1 * b7;
                sum20 += a2 * b0;
                sum21 += a2 * b1;
                sum22 += a2 * b2;
                sum23 += a2 * b3;
                sum24 += a2 * b4;
                sum25 += a2 * b5;
                sum26 += a2 * b6;
                sum27 += a2 * b7;
                sum30 += a3 * b0;
                sum31 += a3 * b1;
                sum32 += a3 * b2;
                sum33 += a3 * b3;
                sum34 += a3 * b4;
                sum35 += a3 * b5;
                sum36 += a3 * b6;
                sum37 += a3 * b7;
                sum40 += a4 * b0;
                sum41 += a4 * b1;
                sum42 += a4 * b2;
                sum43 += a4 * b3;
                sum44 += a4 * b4;
                sum45 += a4 * b5;
                sum46 += a4 * b6;
                sum47 += a4 * b7;
                sum50 += a5 * b0;
                sum51 += a5 * b1;
                sum52 += a5 * b2;
                sum53 += a5 * b3;
                sum54 += a5 * b4;
                sum55 += a5 * b5;
                sum56 += a5 * b6;
                sum57 += a5 * b7;
                sum60 += a6 * b0;
                sum61 += a6 * b1;
                sum62 += a6 * b2;
                sum63 += a6 * b3;
                sum64 += a6 * b4;
                sum65 += a6 * b5;
                sum66 += a6 * b6;
                sum67 += a6 * b7;
                sum70 += a7 * b0;
                sum71 += a7 * b1;
                sum72 += a7 * b2;
                sum73 += a7 * b3;
                sum74 += a7 * b4;
                sum75 += a7 * b5;
                sum76 += a7 * b6;
                sum77 += a7 * b7;
            }
            c[(8 * row + 0) * n + 8 * col] = sum00;
            c[(8 * row + 0) * n + 8 * col + 1] = sum01;
            c[(8 * row + 0) * n + 8 * col + 2] = sum02;
            c[(8 * row + 0) * n + 8 * col + 3] = sum03;
            c[(8 * row + 0) * n + 8 * col + 4] = sum04;
            c[(8 * row + 0) * n + 8 * col + 5] = sum05;
            c[(8 * row + 0) * n + 8 * col + 6] = sum06;
            c[(8 * row + 0) * n + 8 * col + 7] = sum07;
            c[(8 * row + 1) * n + 8 * col] = sum10;
            c[(8 * row + 1) * n + 8 * col + 1] = sum11;
            c[(8 * row + 1) * n + 8 * col + 2] = sum12;
            c[(8 * row + 1) * n + 8 * col + 3] = sum13;
            c[(8 * row + 1) * n + 8 * col + 4] = sum14;
            c[(8 * row + 1) * n + 8 * col + 5] = sum15;
            c[(8 * row + 1) * n + 8 * col + 6] = sum16;
            c[(8 * row + 1) * n + 8 * col + 7] = sum17;
            c[(8 * row + 2) * n + 8 * col] = sum20;
            c[(8 * row + 2) * n + 8 * col + 1] = sum21;
            c[(8 * row + 2) * n + 8 * col + 2] = sum22;
            c[(8 * row + 2) * n + 8 * col + 3] = sum23;
            c[(8 * row + 2) * n + 8 * col + 4] = sum24;
            c[(8 * row + 2) * n + 8 * col + 5] = sum25;
            c[(8 * row + 2) * n + 8 * col + 6] = sum26;
            c[(8 * row + 2) * n + 8 * col + 7] = sum27;
            c[(8 * row + 3) * n + 8 * col] = sum30;
            c[(8 * row + 3) * n + 8 * col + 1] = sum31;
            c[(8 * row + 3) * n + 8 * col + 2] = sum32;
            c[(8 * row + 3) * n + 8 * col + 3] = sum33;
            c[(8 * row + 3) * n + 8 * col + 4] = sum34;
            c[(8 * row + 3) * n + 8 * col + 5] = sum35;
            c[(8 * row + 3) * n + 8 * col + 6] = sum36;
            c[(8 * row + 3) * n + 8 * col + 7] = sum37;
            c[(8 * row + 4) * n + 8 * col] = sum40;
            c[(8 * row + 4) * n + 8 * col + 1] = sum41;
            c[(8 * row + 4) * n + 8 * col + 2] = sum42;
            c[(8 * row + 4) * n + 8 * col + 3] = sum43;
            c[(8 * row + 4) * n + 8 * col + 4] = sum44;
            c[(8 * row + 4) * n + 8 * col + 5] = sum45;
            c[(8 * row + 4) * n + 8 * col + 6] = sum46;
            c[(8 * row + 4) * n + 8 * col + 7] = sum47;
            c[(8 * row + 5) * n + 8 * col] = sum50;
            c[(8 * row + 5) * n + 8 * col + 1] = sum51;
            c[(8 * row + 5) * n + 8 * col + 2] = sum52;
            c[(8 * row + 5) * n + 8 * col + 3] = sum53;
            c[(8 * row + 5) * n + 8 * col + 4] = sum54;
            c[(8 * row + 5) * n + 8 * col + 5] = sum55;
            c[(8 * row + 5) * n + 8 * col + 6] = sum56;
            c[(8 * row + 5) * n + 8 * col + 7] = sum57;
            c[(8 * row + 6) * n + 8 * col] = sum60;
            c[(8 * row + 6) * n + 8 * col + 1] = sum61;
            c[(8 * row + 6) * n + 8 * col + 2] = sum62;
            c[(8 * row + 6) * n + 8 * col + 3] = sum63;
            c[(8 * row + 6) * n + 8 * col + 4] = sum64;
            c[(8 * row + 6) * n + 8 * col + 5] = sum65;
            c[(8 * row + 6) * n + 8 * col + 6] = sum66;
            c[(8 * row + 6) * n + 8 * col + 7] = sum67;
            c[(8 * row + 7) * n + 8 * col] = sum70;
            c[(8 * row + 7) * n + 8 * col + 1] = sum71;
            c[(8 * row + 7) * n + 8 * col + 2] = sum72;
            c[(8 * row + 7) * n + 8 * col + 3] = sum73;
            c[(8 * row + 7) * n + 8 * col + 4] = sum74;
            c[(8 * row + 7) * n + 8 * col + 5] = sum75;
            c[(8 * row + 7) * n + 8 * col + 6] = sum76;
            c[(8 * row + 7) * n + 8 * col + 7] = sum77;
        }
    }
}


================================================
FILE: linalg/matmul-bench/src/lib.rs
================================================
#![allow(non_snake_case)]
#[cfg(feature = "accelerate")]
extern crate accelerate_src;
#[cfg(feature = "blis")]
extern crate blis_src;
#[cfg(feature = "blis")]
extern crate cblas;

pub fn naive(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    for row in 0..m {
        for col in 0..n {
            let mut sum = 0.0;
            for i in 0..k {
                sum += a[row * k + i] * b[i * n + col];
            }
            c[row * n + col] = sum;
        }
    }
}

pub fn tile_2x2(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    for row in 0..m / 2 {
        for col in 0..n / 2 {
            let mut sum00 = 0.0;
            let mut sum01 = 0.0;
            let mut sum10 = 0.0;
            let mut sum11 = 0.0;
            for i in 0..k {
                let a0 = a[2 * row * k + i];
                let a1 = a[(2 * row + 1) * k + i];
                let b0 = b[i * n + 2 * col];
                let b1 = b[i * n + 2 * col + 1];
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
            }
            c[2 * row * n + 2 * col] = sum00;
            c[2 * row * n + 2 * col + 1] = sum01;
            c[(2 * row + 1) * n + 2 * col] = sum10;
            c[(2 * row + 1) * n + 2 * col + 1] = sum11;
        }
    }
}

pub fn tile_4x4(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    for row in 0..m / 4 {
        for col in 0..n / 4 {
            let mut sum00 = 0.0;
            let mut sum01 = 0.0;
            let mut sum02 = 0.0;
            let mut sum03 = 0.0;
            let mut sum10 = 0.0;
            let mut sum11 = 0.0;
            let mut sum12 = 0.0;
            let mut sum13 = 0.0;
            let mut sum20 = 0.0;
            let mut sum21 = 0.0;
            let mut sum22 = 0.0;
            let mut sum23 = 0.0;
            let mut sum30 = 0.0;
            let mut sum31 = 0.0;
            let mut sum32 = 0.0;
            let mut sum33 = 0.0;
            for i in 0..k {
                let a0 = a[4 * row * k + i];
                let a1 = a[(4 * row + 1) * k + i];
                let a2 = a[(4 * row + 2) * k + i];
                let a3 = a[(4 * row + 3) * k + i];
                let b0 = b[i * n + 4 * col];
                let b1 = b[i * n + 4 * col + 1];
                let b2 = b[i * n + 4 * col + 2];
                let b3 = b[i * n + 4 * col + 3];
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum02 += a0 * b2;
                sum03 += a0 * b3;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
                sum12 += a1 * b2;
                sum13 += a1 * b3;
                sum20 += a2 * b0;
                sum21 += a2 * b1;
                sum22 += a2 * b2;
                sum23 += a2 * b3;
                sum30 += a3 * b0;
                sum31 += a3 * b1;
                sum32 += a3 * b2;
                sum33 += a3 * b3;
            }
            c[(4 * row + 0) * n + 4 * col] = sum00;
            c[(4 * row + 0) * n + 4 * col + 1] = sum01;
            c[(4 * row + 0) * n + 4 * col + 2] = sum02;
            c[(4 * row + 0) * n + 4 * col + 3] = sum03;
            c[(4 * row + 1) * n + 4 * col] = sum10;
            c[(4 * row + 1) * n + 4 * col + 1] = sum11;
            c[(4 * row + 1) * n + 4 * col + 2] = sum12;
            c[(4 * row + 1) * n + 4 * col + 3] = sum13;
            c[(4 * row + 2) * n + 4 * col] = sum20;
            c[(4 * row + 2) * n + 4 * col + 1] = sum21;
            c[(4 * row + 2) * n + 4 * col + 2] = sum22;
            c[(4 * row + 2) * n + 4 * col + 3] = sum23;
            c[(4 * row + 3) * n + 4 * col] = sum30;
            c[(4 * row + 3) * n + 4 * col + 1] = sum31;
            c[(4 * row + 3) * n + 4 * col + 2] = sum32;
            c[(4 * row + 3) * n + 4 * col + 3] = sum33;
        }
    }
}

pub fn tile_8x8(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    for row in 0..m / 8 {
        for col in 0..n / 8 {
            let mut sum00 = 0.0;
            let mut sum01 = 0.0;
            let mut sum02 = 0.0;
            let mut sum03 = 0.0;
            let mut sum04 = 0.0;
            let mut sum05 = 0.0;
            let mut sum06 = 0.0;
            let mut sum07 = 0.0;
            let mut sum10 = 0.0;
            let mut sum11 = 0.0;
            let mut sum12 = 0.0;
            let mut sum13 = 0.0;
            let mut sum14 = 0.0;
            let mut sum15 = 0.0;
            let mut sum16 = 0.0;
            let mut sum17 = 0.0;
            let mut sum20 = 0.0;
            let mut sum21 = 0.0;
            let mut sum22 = 0.0;
            let mut sum23 = 0.0;
            let mut sum24 = 0.0;
            let mut sum25 = 0.0;
            let mut sum26 = 0.0;
            let mut sum27 = 0.0;
            let mut sum30 = 0.0;
            let mut sum31 = 0.0;
            let mut sum32 = 0.0;
            let mut sum33 = 0.0;
            let mut sum34 = 0.0;
            let mut sum35 = 0.0;
            let mut sum36 = 0.0;
            let mut sum37 = 0.0;
            let mut sum40 = 0.0;
            let mut sum41 = 0.0;
            let mut sum42 = 0.0;
            let mut sum43 = 0.0;
            let mut sum44 = 0.0;
            let mut sum45 = 0.0;
            let mut sum46 = 0.0;
            let mut sum47 = 0.0;
            let mut sum50 = 0.0;
            let mut sum51 = 0.0;
            let mut sum52 = 0.0;
            let mut sum53 = 0.0;
            let mut sum54 = 0.0;
            let mut sum55 = 0.0;
            let mut sum56 = 0.0;
            let mut sum57 = 0.0;
            let mut sum60 = 0.0;
            let mut sum61 = 0.0;
            let mut sum62 = 0.0;
            let mut sum63 = 0.0;
            let mut sum64 = 0.0;
            let mut sum65 = 0.0;
            let mut sum66 = 0.0;
            let mut sum67 = 0.0;
            let mut sum70 = 0.0;
            let mut sum71 = 0.0;
            let mut sum72 = 0.0;
            let mut sum73 = 0.0;
            let mut sum74 = 0.0;
            let mut sum75 = 0.0;
            let mut sum76 = 0.0;
            let mut sum77 = 0.0;
            for i in 0..k {
                let a0 = a[8 * row * k + i];
                let a1 = a[(8 * row + 1) * k + i];
                let a2 = a[(8 * row + 2) * k + i];
                let a3 = a[(8 * row + 3) * k + i];
                let a4 = a[(8 * row + 4) * k + i];
                let a5 = a[(8 * row + 5) * k + i];
                let a6 = a[(8 * row + 6) * k + i];
                let a7 = a[(8 * row + 7) * k + i];
                let b0 = b[i * n + 8 * col];
                let b1 = b[i * n + 8 * col + 1];
                let b2 = b[i * n + 8 * col + 2];
                let b3 = b[i * n + 8 * col + 3];
                let b4 = b[i * n + 8 * col + 4];
                let b5 = b[i * n + 8 * col + 5];
                let b6 = b[i * n + 8 * col + 6];
                let b7 = b[i * n + 8 * col + 7];
                sum00 += a0 * b0;
                sum01 += a0 * b1;
                sum02 += a0 * b2;
                sum03 += a0 * b3;
                sum04 += a0 * b4;
                sum05 += a0 * b5;
                sum06 += a0 * b6;
                sum07 += a0 * b7;
                sum10 += a1 * b0;
                sum11 += a1 * b1;
                sum12 += a1 * b2;
                sum13 += a1 * b3;
                sum14 += a1 * b4;
                sum15 += a1 * b5;
                sum16 += a1 * b6;
                sum17 += a1 * b7;
                sum20 += a2 * b0;
                sum21 += a2 * b1;
                sum22 += a2 * b2;
                sum23 += a2 * b3;
                sum24 += a2 * b4;
                sum25 += a2 * b5;
                sum26 += a2 * b6;
                sum27 += a2 * b7;
                sum30 += a3 * b0;
                sum31 += a3 * b1;
                sum32 += a3 * b2;
                sum33 += a3 * b3;
                sum34 += a3 * b4;
                sum35 += a3 * b5;
                sum36 += a3 * b6;
                sum37 += a3 * b7;
                sum40 += a4 * b0;
                sum41 += a4 * b1;
                sum42 += a4 * b2;
                sum43 += a4 * b3;
                sum44 += a4 * b4;
                sum45 += a4 * b5;
                sum46 += a4 * b6;
                sum47 += a4 * b7;
                sum50 += a5 * b0;
                sum51 += a5 * b1;
                sum52 += a5 * b2;
                sum53 += a5 * b3;
                sum54 += a5 * b4;
                sum55 += a5 * b5;
                sum56 += a5 * b6;
                sum57 += a5 * b7;
                sum60 += a6 * b0;
                sum61 += a6 * b1;
                sum62 += a6 * b2;
                sum63 += a6 * b3;
                sum64 += a6 * b4;
                sum65 += a6 * b5;
                sum66 += a6 * b6;
                sum67 += a6 * b7;
                sum70 += a7 * b0;
                sum71 += a7 * b1;
                sum72 += a7 * b2;
                sum73 += a7 * b3;
                sum74 += a7 * b4;
                sum75 += a7 * b5;
                sum76 += a7 * b6;
                sum77 += a7 * b7;
            }
            c[(8 * row + 0) * n + 8 * col] = sum00;
            c[(8 * row + 0) * n + 8 * col + 1] = sum01;
            c[(8 * row + 0) * n + 8 * col + 2] = sum02;
            c[(8 * row + 0) * n + 8 * col + 3] = sum03;
            c[(8 * row + 0) * n + 8 * col + 4] = sum04;
            c[(8 * row + 0) * n + 8 * col + 5] = sum05;
            c[(8 * row + 0) * n + 8 * col + 6] = sum06;
            c[(8 * row + 0) * n + 8 * col + 7] = sum07;
            c[(8 * row + 1) * n + 8 * col] = sum10;
            c[(8 * row + 1) * n + 8 * col + 1] = sum11;
            c[(8 * row + 1) * n + 8 * col + 2] = sum12;
            c[(8 * row + 1) * n + 8 * col + 3] = sum13;
            c[(8 * row + 1) * n + 8 * col + 4] = sum14;
            c[(8 * row + 1) * n + 8 * col + 5] = sum15;
            c[(8 * row + 1) * n + 8 * col + 6] = sum16;
            c[(8 * row + 1) * n + 8 * col + 7] = sum17;
            c[(8 * row + 2) * n + 8 * col] = sum20;
            c[(8 * row + 2) * n + 8 * col + 1] = sum21;
            c[(8 * row + 2) * n + 8 * col + 2] = sum22;
            c[(8 * row + 2) * n + 8 * col + 3] = sum23;
            c[(8 * row + 2) * n + 8 * col + 4] = sum24;
            c[(8 * row + 2) * n + 8 * col + 5] = sum25;
            c[(8 * row + 2) * n + 8 * col + 6] = sum26;
            c[(8 * row + 2) * n + 8 * col + 7] = sum27;
            c[(8 * row + 3) * n + 8 * col] = sum30;
            c[(8 * row + 3) * n + 8 * col + 1] = sum31;
            c[(8 * row + 3) * n + 8 * col + 2] = sum32;
            c[(8 * row + 3) * n + 8 * col + 3] = sum33;
            c[(8 * row + 3) * n + 8 * col + 4] = sum34;
            c[(8 * row + 3) * n + 8 * col + 5] = sum35;
            c[(8 * row + 3) * n + 8 * col + 6] = sum36;
            c[(8 * row + 3) * n + 8 * col + 7] = sum37;
            c[(8 * row + 4) * n + 8 * col] = sum40;
            c[(8 * row + 4) * n + 8 * col + 1] = sum41;
            c[(8 * row + 4) * n + 8 * col + 2] = sum42;
            c[(8 * row + 4) * n + 8 * col + 3] = sum43;
            c[(8 * row + 4) * n + 8 * col + 4] = sum44;
            c[(8 * row + 4) * n + 8 * col + 5] = sum45;
            c[(8 * row + 4) * n + 8 * col + 6] = sum46;
            c[(8 * row + 4) * n + 8 * col + 7] = sum47;
            c[(8 * row + 5) * n + 8 * col] = sum50;
            c[(8 * row + 5) * n + 8 * col + 1] = sum51;
            c[(8 * row + 5) * n + 8 * col + 2] = sum52;
            c[(8 * row + 5) * n + 8 * col + 3] = sum53;
            c[(8 * row + 5) * n + 8 * col + 4] = sum54;
            c[(8 * row + 5) * n + 8 * col + 5] = sum55;
            c[(8 * row + 5) * n + 8 * col + 6] = sum56;
            c[(8 * row + 5) * n + 8 * col + 7] = sum57;
            c[(8 * row + 6) * n + 8 * col] = sum60;
            c[(8 * row + 6) * n + 8 * col + 1] = sum61;
            c[(8 * row + 6) * n + 8 * col + 2] = sum62;
            c[(8 * row + 6) * n + 8 * col + 3] = sum63;
            c[(8 * row + 6) * n + 8 * col + 4] = sum64;
            c[(8 * row + 6) * n + 8 * col + 5] = sum65;
            c[(8 * row + 6) * n + 8 * col + 6] = sum66;
            c[(8 * row + 6) * n + 8 * col + 7] = sum67;
            c[(8 * row + 7) * n + 8 * col] = sum70;
            c[(8 * row + 7) * n + 8 * col + 1] = sum71;
            c[(8 * row + 7) * n + 8 * col + 2] = sum72;
            c[(8 * row + 7) * n + 8 * col + 3] = sum73;
            c[(8 * row + 7) * n + 8 * col + 4] = sum74;
            c[(8 * row + 7) * n + 8 * col + 5] = sum75;
            c[(8 * row + 7) * n + 8 * col + 6] = sum76;
            c[(8 * row + 7) * n + 8 * col + 7] = sum77;
        }
    }
}

extern "C" {
    fn c_tile_1x1(m: usize, k: usize, n: usize, a: *const f32, b: *const f32, c: *mut f32);
    fn c_tile_2x2(m: usize, k: usize, n: usize, a: *const f32, b: *const f32, c: *mut f32);
    fn c_tile_4x4(m: usize, k: usize, n: usize, a: *const f32, b: *const f32, c: *mut f32);
    fn c_packed_tile_4x4(m: usize, k: usize, n: usize, a: *const f32, b: *const f32, c: *mut f32);
    fn c_tile_8x8(m: usize, k: usize, n: usize, a: *const f32, b: *const f32, c: *mut f32);
    fn c_packed_tile_8x8(m: usize, k: usize, n: usize, a: *const f32, b: *const f32, c: *mut f32);
}

pub fn ctile_1x1(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe { c_tile_1x1(m, k, n, a.as_ptr(), b.as_ptr(), c.as_mut_ptr()) }
}

pub fn ctile_2x2(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe { c_tile_2x2(m, k, n, a.as_ptr(), b.as_ptr(), c.as_mut_ptr()) }
}

pub fn ctile_4x4(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe { c_tile_4x4(m, k, n, a.as_ptr(), b.as_ptr(), c.as_mut_ptr()) }
}

pub fn cpacked_tile_4x4(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe { c_packed_tile_4x4(m, k, n, a.as_ptr(), b.as_ptr(), c.as_mut_ptr()) }
}

pub fn ctile_8x8(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe { c_tile_8x8(m, k, n, a.as_ptr(), b.as_ptr(), c.as_mut_ptr()) }
}

pub fn cpacked_tile_8x8(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe { c_packed_tile_8x8(m, k, n, a.as_ptr(), b.as_ptr(), c.as_mut_ptr()) }
}

pub fn matrixmultiply(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    unsafe {
        matrixmultiply::sgemm(
            m,
            k,
            n,
            1.0,
            a.as_ptr(),
            k as _,
            1,
            b.as_ptr(),
            n as _,
            1,
            0.0,
            c.as_mut_ptr(),
            n as _,
            1,
        )
    }
}

#[allow(unused_variables, unused_mut)]
pub fn cblas(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    #[cfg(feature = "blas")]
    unsafe {
        cblas::sgemm(
            cblas::Layout::RowMajor,
            cblas::Transpose::None,
            cblas::Transpose::None,
            m as _,
            n as _,
            k as _,
            1.0,
            &a,
            k as _,
            &b,
            n as _,
            0.0,
            c,
            n as _,
        )
    }
}

pub fn tract(m: usize, k: usize, n: usize, a: &[f32], b: &[f32], c: &mut [f32]) {
    use tract_data::internal::*;
    use tract_linalg::frame::mmm::FusedSpec;
    unsafe {
        let mmm = tract_linalg::ops()
            .mmm(DatumType::F32, DatumType::F32, DatumType::F32, Some(m), Some(k), Some(n))
            .unwrap();

        let c_storage = mmm.c_view(0, 1);

        let a = Tensor::from_shape(&[m, k], a).unwrap();
        let b = Tensor::from_shape(&[k, n], b).unwrap();
        let mut tc = Tensor::uninitialized_dt(f32::datum_type(), &[m, n]).unwrap();

        let packed_a = mmm.a_pack().pack_tensor(&a, 1, 0).unwrap();
        let packed_b = mmm.b_pack().pack_tensor(&b, 0, 1).unwrap();

        let mut scratch = mmm.allocate_scratch_space();

        mmm.run_with_scratch_space(
            m,
            n,
            &mut *scratch,
            &[
                FusedSpec::AddMatMul {
                    a: packed_a.as_ref(),
                    b: packed_b.as_ref(),
                },
                FusedSpec::Store(c_storage.wrap(&mut tc.view_mut())),
            ],
        )
        .unwrap();
        c.copy_from_slice(tc.as_slice_unchecked())
    }
}


================================================
FILE: linalg/src/arm32/armv7neon.rs
================================================
use crate::Ops;
use crate::frame::mmm::ImplementationQuality::ManuallyOptimized;
use crate::pack::PackedFormat;

const NEON: fn() -> bool = || crate::arm32::has_neon();

MMMExternKernel!(armv7neon_mmm_f32_8x4_cortexa7 <f32>( 8, 4)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_8x4_cortexa9 <f32>( 8, 4)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_8x4_generic  <f32>( 8, 4)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_8x6_cortexa7 <f32>( 8, 6)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_8x6_cortexa9 <f32>( 8, 6)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_8x6_generic  <f32>( 8, 6)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_8x1_generic  <f32>( 8, 1)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_32x1_cortexa7<f32>(32, 1)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_32x1_cortexa9<f32>(32, 1)@(16, 4) where(NEON) quality(ManuallyOptimized));
MMMExternKernel!(armv7neon_mmm_f32_32x1_generic <f32>(32, 1)@(16, 4) where(NEON) quality(ManuallyOptimized));

MMMExternKernel!(armv7neon_mmm_i32_8x4<i32>(8, 4)@(32, 4) where(NEON)
  packing[1] = i8i8 => |k| k.with_packing(PackedFormat::new(DatumType::I8, 8, 32), PackedFormat::new(DatumType::I8, 4, 32));
  quality(ManuallyOptimized)
  store(i8)
);

MMMExternKernel!(armv7neon_mmm_i32_32x1<i32>(32, 1)@(32, 4) where(NEON)
  packing[1] = i8i8 => |k| k.with_packing(PackedFormat::new(DatumType::I8, 32, 32), PackedFormat::new(DatumType::I8, 1, 4));
  quality(ManuallyOptimized)
  store(i8)
);

pub fn plug(ops: &mut Ops) {
    ops.mmm_impls.extend_from_slice(&[
        armv7neon_mmm_f32_8x4_cortexa7.mmm(),
        armv7neon_mmm_f32_8x4_cortexa9.mmm(),
        armv7neon_mmm_f32_8x4_generic.mmm(),
        armv7neon_mmm_f32_8x6_cortexa7.mmm(),
        armv7neon_mmm_f32_8x6_cortexa9.mmm(),
        armv7neon_mmm_f32_8x6_generic.mmm(),
        armv7neon_mmm_f32_8x1_generic.mmm(),
        armv7neon_mmm_f32_32x1_cortexa7.mmm(),
        armv7neon_mmm_f32_32x1_cortexa9.mmm(),
        armv7neon_mmm_f32_32x1_generic.mmm(),
    ]);
}

sigmoid_impl!(f32, armv7neon_sigmoid_f32_4n, 4, 4, crate::arm32::has_neon());
tanh_impl!(f32, armv7neon_tanh_f32_4n, 4, 4, crate::arm32::has_neon());


================================================
FILE: linalg/src/arm32/armvfpv2.rs
================================================
use crate::Ops;
use crate::frame::mmm::ImplementationQuality::ManuallyOptimized;
use crate::frame::mmm::*;

MMMExternKernel!(armvfpv2_mmm_f32_4x4<f32>(4, 4)@(4, 4) quality(ManuallyOptimized));

pub fn plug(ops: &mut Ops) {
    log::info!("armvfpv2 activated for smmm");
    ops.mmm_f32 = Box::new(|_, _, _| armvfpv2_mmm_f32_4x4.mmm());
    ops.mmm_impls.push(armvfpv2_mmm_f32_4x4.mmm());
}


================================================
FILE: linalg/src/arm32/cortex_a7.rs
================================================
use crate::frame::mmm::CostModel;
pub fn model() -> CostModel<'static> {
    CostModel {
        big_product_mkn_threshold: 4193728.0,
        big_product_kernel_choice: "armv7neon_mmm_f32_8x6_cortexa7",
        kernels: &[
            "armv7neon_mmm_f32_8x4_cortexa7",
            "armv7neon_mmm_f32_8x4_cortexa9",
            "armv7neon_mmm_f32_8x4_generic",
            "armv7neon_mmm_f32_8x6_cortexa7",
            "armv7neon_mmm_f32_8x6_cortexa9",
            "armv7neon_mmm_f32_8x6_generic",
            "generic_f32_4x4",
        ],
        mrs: &[4, 8],
        nrs: &[4, 6],
        feat_norm_mean: &[
            4.589878771602424,
            4.5739692460187005,
            4.598167981532298,
            13.762015999153403,
            1.5038983903420524,
            0.749874245472837,
            3.465165995975855,
            0.8777665995975855,
            1.5022635814889336,
            0.7570422535211268,
            2.482142857142857,
            0.8333752515090543,
        ],
        feat_norm_stddev: &[
            1.2587312982588519,
            1.2603116830524392,
            1.2581181647300588,
            1.3169322340874257,
            1.1192637768418767,
            0.43308528195884044,
            2.2762097127791114,
            0.32755518043295856,
            1.1069539235554247,
            0.42886977033219037,
            1.7067987601825914,
            0.37264049924995035,
        ],
        w1: &[
            0.06765510141849518,
            0.024555781856179237,
            -0.8821254968643188,
            -0.004870870150625706,
            -0.10525479167699814,
            0.1827959418296814,
            0.1633400171995163,
            -0.2377464473247528,
            -0.17880690097808838,
            0.19097138941287994,
            0.04676022008061409,
            -0.11329511553049088,
            0.4089120030403137,
            -0.3100685477256775,
            -0.1652061492204666,
            -0.19124962389469147,
            -0.03810987249016762,
            -0.00785011239349842,
            0.09714752435684204,
            -0.11142419278621674,
            0.19261880218982697,
            -0.2893339991569519,
            -0.19540216028690338,
            0.39759594202041626,
            -0.00619965186342597,
            -0.8473111391067505,
            0.343344122171402,
            -0.12575943768024445,
            0.029266485944390297,
            -0.02900734543800354,
            -0.019343264400959015,
            0.08306540548801422,
            -0.1927606761455536,
            0.23312175273895264,
            0.2576882541179657,
            -0.35881471633911133,
            -0.27300119400024414,
            -0.2995607852935791,
            -0.7934547662734985,
            -0.9349930286407471,
            -0.011614155024290085,
            -0.12521372735500336,
            0.011371670290827751,
            0.05779163911938667,
            0.17875070869922638,
            -0.23169392347335815,
            -0.09749509394168854,
            0.07436174154281616,
            0.24035069346427917,
            -0.1262669861316681,
            0.3874961733818054,
            -0.11149000376462936,
            0.03639678284525871,
            0.17740628123283386,
            0.03768332302570343,
            -0.20480288565158844,
            -0.1955408751964569,
            0.44144806265830994,
            0.3628064692020416,
            -0.2537013292312622,
            0.019405143335461617,
            0.06186319515109062,
            0.5196826457977295,
            0.3010406494140625,
            0.04013144597411156,
            0.03517461195588112,
            -0.037290964275598526,
            0.009919736534357071,
            -0.3135205805301666,
            0.4654330909252167,
            0.46720823645591736,
            0.29665476083755493,
            0.09099660068750381,
            -0.7376689314842224,
            -0.07840575277805328,
            -0.5192644000053406,
            0.019796665757894516,
            -0.021734869107604027,
            0.13953897356987,
            -0.04154204577207565,
            0.10942933708429337,
            -0.13621817529201508,
            -0.04218055680394173,
            0.09188657253980637,
            -0.16021296381950378,
            -0.19393481314182281,
            0.3737955689430237,
            0.08288388699293137,
            -0.08280416578054428,
            -0.13087297976016998,
            -0.09470323473215103,
            0.2779513895511627,
            0.03663017228245735,
            0.36601993441581726,
            0.8102841377258301,
            0.6883901953697205,
            -0.33066609501838684,
            -0.34960171580314636,
            0.923985481262207,
            0.5853908061981201,
            0.07039576023817062,
            -0.11843020468950272,
            -0.06797836720943451,
            0.0974433571100235,
            -0.4707315266132355,
            0.37827417254447937,
            0.15521520376205444,
            -0.7403592467308044,
            -0.25005313754081726,
            0.596679151058197,
            -0.7277861833572388,
            -0.6915309429168701,
            -0.0050544412806630135,
            -0.12311484664678574,
            0.04149714484810829,
            0.05289606750011444,
            0.2448417991399765,
            -0.47261708974838257,
            -0.3535511791706085,
            0.4614925682544708,
            0.9230178594589233,
            -0.5351396799087524,
            0.8224894404411316,
            0.37244901061058044,
            -0.08826857805252075,
            -0.0452042818069458,
            0.0035054143518209457,
            0.09203510731458664,
            0.08918709307909012,
            -0.0694250762462616,
            -0.053435735404491425,
            0.1012222170829773,
            0.3401939570903778,
            -0.38458573818206787,
            0.3040490746498108,
            0.7614821791648865,
            -0.17064380645751953,
            0.22403603792190552,
            0.08646601438522339,
            -0.08289062976837158,
            -0.20126193761825562,
            0.2795524299144745,
            0.13253425061702728,
            -0.07332615554332733,
            0.2151418924331665,
            0.16798575222492218,
            0.003749655559659004,
            0.2437056005001068,
            -0.09098415076732635,
            0.18923071026802063,
            0.07854695618152618,
            -0.25417080521583557,
            0.15693743526935577,
            -0.30657434463500977,
            -0.19041943550109863,
            0.26519766449928284,
            0.24278832972049713,
            -0.18357035517692566,
            -0.015992645174264908,
            0.43973660469055176,
            0.02785446122288704,
            0.3032245934009552,
            -0.021606506779789925,
            -0.2682349383831024,
            -0.10395143181085587,
            0.050348248332738876,
            0.12892353534698486,
            -0.10498340427875519,
            -0.027477847412228584,
            0.09730125963687897,
            -0.16150422394275665,
            -0.21831916272640228,
            0.10376061499118805,
            -0.25544440746307373,
            0.031593386083841324,
            0.11986788362264633,
            0.22690074145793915,
            -0.3509098291397095,
            -0.1881190538406372,
            -0.04210145026445389,
            0.6883101463317871,
            -0.07829979062080383,
            0.4657376706600189,
            0.9263871908187866,
            0.08322961628437042,
            0.04429711028933525,
            -0.08905605971813202,
            -0.06788893789052963,
            -0.056182388216257095,
            -0.04881853610277176,
            -0.04854113608598709,
            0.15449045598506927,
            0.32911357283592224,
            -0.5772383809089661,
            -0.00027374469209462404,
            -0.2995521128177643,
            -0.027322502806782722,
            0.5023694038391113,
            0.045783523470163345,
            -0.4035968780517578,
            0.053967904299497604,
            0.00014662329340353608,
            0.021607715636491776,
            -0.028252260759472847,
            -0.05918470770120621,
            -0.1273883581161499,
            0.0679078996181488,
            0.25051605701446533,
            -0.0745333656668663,
            0.18680104613304138,
            -0.12048312276601791,
            0.013110226020216942,
            -0.07659415900707245,
            0.2906968295574188,
            0.3136366307735443,
            -0.47699007391929626,
            0.02583535574376583,
            -0.15701107680797577,
            0.045304182916879654,
            0.23456838726997375,
            -0.06186807528138161,
            0.3926846981048584,
            -0.13252438604831696,
            -0.16362214088439941,
            0.013557562604546547,
            -0.09991434961557388,
            0.09150815010070801,
            -0.006477471441030502,
            0.2915862202644348,
            0.5867642164230347,
            -0.37984445691108704,
            0.033169880509376526,
            0.024414243176579475,
            -0.0384003147482872,
            -0.06395144015550613,
            0.07380940765142441,
            -0.025898484513163567,
            0.03951931372284889,
            -0.2343142330646515,
            0.27318838238716125,
            0.1105947494506836,
            0.290696382522583,
            -0.17851489782333374,
            -0.17699271440505981,
            -0.210996612906456,
            -0.10575137287378311,
            0.15886521339416504,
            0.10631759464740753,
            0.22946283221244812,
            -0.3170112073421478,
            -0.49773311614990234,
            -0.10753292590379715,
            -0.1114523783326149,
            -0.10953730344772339,
            0.4754663109779358,
            0.20793643593788147,
            0.021392812952399254,
            -0.0691467821598053,
            0.03368104621767998,
            -0.017844771966338158,
            0.1657843142747879,
            -0.5556477904319763,
            -1.108074426651001,
            -0.822117805480957,
            -0.06053074076771736,
            -0.4072379469871521,
            0.09109722077846527,
            -0.5544739961624146,
            -0.13978064060211182,
            -0.36262163519859314,
            0.20034632086753845,
            0.050625383853912354,
            0.1497042030096054,
            -0.18745489418506622,
            0.0894727036356926,
            0.00417149206623435,
            0.2228451371192932,
            0.00852279644459486,
            -0.028313757851719856,
            0.04104698821902275,
            -0.0874263271689415,
            0.19788521528244019,
            -0.019343160092830658,
            -0.03962515667080879,
            0.2092486023902893,
            -0.44425246119499207,
            -0.48542261123657227,
            -0.04222029820084572,
            0.7616084218025208,
            0.512810468673706,
            -0.17871123552322388,
            0.5459727644920349,
            -0.13069608807563782,
            0.09155352413654327,
            0.11548610031604767,
            -0.15368784964084625,
            0.038799818605184555,
            -0.049028217792510986,
            -0.03215758875012398,
            -0.050522346049547195,
            0.1663637012243271,
            -0.15482299029827118,
            -0.9425870180130005,
            -0.7017998695373535,
            0.04315050691366196,
            -0.019968662410974503,
            0.03749818727374077,
            -0.07611791789531708,
            0.32011789083480835,
            -0.6925904750823975,
            -0.49334919452667236,
            0.23214411735534668,
            1.1447347402572632,
            -0.6757001876831055,
            0.7940422296524048,
            0.40169182419776917,
            -0.018513813614845276,
            0.048821814358234406,
            -0.016693273559212685,
            0.008068449795246124,
            0.04566117003560066,
            -0.09829569607973099,
            -0.026971371844410896,
            0.05381541699171066,
            -0.3659301698207855,
            0.3473235070705414,
            0.14521746337413788,
            0.11228122562170029,
            -0.041056130081415176,
            -0.11228874325752258,
            0.006667478010058403,
            0.15931302309036255,
            -0.30010080337524414,
            0.3464723229408264,
            0.4476386308670044,
            -0.3498152494430542,
            0.2616507112979889,
            -0.19995814561843872,
            0.10946320742368698,
            0.4034257233142853,
            -0.08651446551084518,
            0.018647747114300728,
            0.11572548002004623,
            -0.100877545773983,
            -0.16341210901737213,
            0.2377898246049881,
            0.3417612910270691,
            -0.49084869027137756,
            -0.02805873565375805,
            -0.09811390936374664,
            0.17161016166210175,
            0.3627470135688782,
            -0.08954513072967529,
            0.06629404425621033,
            0.012786897830665112,
            0.01578289456665516,
            -0.32630467414855957,
            0.4854920506477356,
            0.12709765136241913,
            -0.4909423291683197,
            -0.3745254874229431,
            -0.6513142585754395,
            -0.040075208991765976,
            -0.569782018661499,
            -0.009953420609235764,
            0.04735071584582329,
            0.0230120699852705,
            -0.07381311058998108,
            -0.06293600797653198,
            0.20196016132831573,
            0.26551517844200134,
            -0.42071688175201416,
            0.28809165954589844,
            0.19747501611709595,
            -0.5686206221580505,
            -0.5285986661911011,
            0.02009684592485428,
            0.11322621256113052,
            -0.1082596555352211,
            -0.0856761634349823,
            -0.04493662342429161,
            -0.6179490089416504,
            -0.1442672610282898,
            0.028762176632881165,
            0.12426868081092834,
            -0.5771384835243225,
            0.1608373522758484,
            0.004147801548242569,
            -0.047590240836143494,
            0.10347189754247665,
            0.11780986934900284,
            -0.08490656316280365,
            -0.0746934711933136,
            0.15699702501296997,
            0.1298881322145462,
            -0.14411042630672455,
            -0.08601037412881851,
            0.2997709810733795,
            -0.05418943241238594,
            -0.1772651970386505,
            0.04576871916651726,
            -0.13510753214359283,
            -0.057203926146030426,
            0.18647770583629608,
            0.0055348677560687065,
            -0.12238732725381851,
            -0.11199415475130081,
            0.43077343702316284,
            0.1349855363368988,
            0.21327465772628784,
            0.05924845486879349,
            0.12549948692321777,
            -0.060076650232076645,
            0.23921678960323334,
            0.02152605727314949,
            -0.1352948695421219,
            0.09325127303600311,
            -0.14411674439907074,
            0.010495728813111782,
            0.11577513813972473,
            -0.07580242305994034,
            0.42641204595565796,
            -0.5557231903076172,
            -0.12044595927000046,
            0.024152765050530434,
            -0.14175696671009064,
            0.024960221722722054,
            0.10017693042755127,
            -0.07402117550373077,
            0.09156208485364914,
            0.455565482378006,
            0.424320250749588,
            -0.07668061554431915,
            0.10318724811077118,
            -0.32521969079971313,
            -0.2653461694717407,
            -0.03919212520122528,
            0.12909358739852905,
            -0.17091549932956696,
            0.07353391498327255,
            0.11510979384183884,
            -0.23758216202259064,
            -0.3059186339378357,
            -0.046047650277614594,
            0.17527209222316742,
            0.19020265340805054,
            -0.20766229927539825,
            -0.23476286232471466,
            -0.14011070132255554,
            0.1085173636674881,
            -0.020777594298124313,
            0.014691418968141079,
            0.21648286283016205,
            -0.21576255559921265,
            0.28203028440475464,
            0.6320008635520935,
            -0.23609709739685059,
            0.16072526574134827,
            0.30149686336517334,
            -0.05675647035241127,
            -0.018186205998063087,
            -0.1844293773174286,
            0.13510139286518097,
            0.05780869722366333,
            0.07202577590942383,
            0.07459436357021332,
            0.18700383603572845,
            -0.09449177235364914,
            0.057188909500837326,
            0.21453143656253815,
            -0.30002379417419434,
            -0.12217795103788376,
            0.03723505884408951,
            -0.18360234797000885,
            -0.029992947354912758,
            0.10999765247106552,
            0.09575961530208588,
            -0.36028456687927246,
            -0.4311397075653076,
            0.5812231302261353,
        ],
        b1: &[
            0.3801889419555664,
            -0.5001883506774902,
            0.19484910368919373,
            0.6488791704177856,
            0.38620173931121826,
            0.8780303597450256,
            -0.1126403734087944,
            0.021730314940214157,
            -0.7806469202041626,
            -0.04312174394726753,
            0.3102167546749115,
            0.9241658449172974,
            0.8900863528251648,
            -0.2938256561756134,
            -0.5012822151184082,
            -0.00329477502964437,
            0.5169500708580017,
            0.4563848376274109,
            -0.4903448224067688,
            0.27919942140579224,
            -0.4288303554058075,
            -0.1836952418088913,
            -0.09118890762329102,
            0.5528226494789124,
            -0.19896377623081207,
            0.33588215708732605,
            0.07895006239414215,
            0.07812929153442383,
            0.6203332543373108,
            0.8427650332450867,
            -0.684628427028656,
            0.5408275723457336,
            -0.5548633933067322,
            -0.49557214975357056,
            0.7953769564628601,
            -0.4109633266925812,
            -0.6270897388458252,
            -0.43285393714904785,
            -0.7562689781188965,
            -0.7167727947235107,
        ],
        w2: &[
            0.15592391788959503,
            0.25119924545288086,
            -0.499594122171402,
            -0.5441639423370361,
            -0.11186911165714264,
            -0.6334478855133057,
            0.28880706429481506,
            -0.592946469783783,
            0.7188563942909241,
            -0.49322614073753357,
            -0.1398385912179947,
            -0.1868145614862442,
            0.9288992881774902,
            -0.07525540888309479,
            0.2288437783718109,
            0.09932874143123627,
            0.2782813012599945,
            -0.12644614279270172,
            -0.14151062071323395,
            0.38845404982566833,
            0.2691279947757721,
            -0.9148958921432495,
            0.19230225682258606,
            0.6098687052726746,
            -0.24782557785511017,
            -0.6989489197731018,
            -0.30721813440322876,
            -0.4890380799770355,
            -0.43724432587623596,
            -0.38428765535354614,
            -0.6491377353668213,
            -0.28134995698928833,
            -0.36228886246681213,
            -0.05963568389415741,
            0.5086851119995117,
            0.4664144814014435,
            0.3797634541988373,
            0.5596290826797485,
            -0.1977449357509613,
            0.6540879607200623,
            -0.24533972144126892,
            0.6865915656089783,
            -0.18364377319812775,
            0.0013501447392627597,
            -0.4037604331970215,
            -0.287411093711853,
            -0.43570032715797424,
            -0.4085054099559784,
            0.7341827750205994,
            -0.29973891377449036,
            -0.18240050971508026,
            -0.23446109890937805,
            0.7225431799888611,
            0.008502814918756485,
            0.04582007974386215,
            0.03352205455303192,
            0.12457727640867233,
            -0.2019437849521637,
            -0.1299249827861786,
            -0.09946829080581665,
            0.40665051341056824,
            -0.6841736435890198,
            -0.523845911026001,
            0.21656402945518494,
            0.6046024560928345,
            -0.6393186450004578,
            -0.3965637981891632,
            -0.7872777581214905,
            -0.13687947392463684,
            -0.19312888383865356,
            -0.5453231930732727,
            -0.21912647783756256,
            0.011589044705033302,
            0.2665385603904724,
            0.3249806761741638,
            0.293254017829895,
            0.1047254130244255,
            0.4246895909309387,
            -0.0033608688972890377,
            0.4066942632198334,
            0.06138676777482033,
            0.382074236869812,
            0.0787188857793808,
            -0.28631800413131714,
            -0.3500039279460907,
            -0.1490340679883957,
            -0.14991725981235504,
            -0.180477574467659,
            0.15140952169895172,
            -0.35168370604515076,
            0.38904908299446106,
            -0.11262823641300201,
            -0.18404939770698547,
            0.5045862197875977,
            0.23344825208187103,
            0.6740546226501465,
            -0.054060351103544235,
            -0.47260594367980957,
            0.287933886051178,
            0.28975099325180054,
            0.2366262525320053,
            -0.1751112937927246,
            -0.15358465909957886,
            -0.062381260097026825,
            0.45881521701812744,
            -0.12647950649261475,
            0.45258036255836487,
            -0.21084383130073547,
            -0.15994171798229218,
            -0.4229416847229004,
            -0.18642400205135345,
            -0.2506699860095978,
            0.20604389905929565,
            0.16662882268428802,
            -0.23073841631412506,
            0.045810505747795105,
            0.33520498871803284,
            0.37685254216194153,
            0.11563336104154587,
            0.22259201109409332,
            -0.010484708473086357,
            -0.45855188369750977,
            0.24794596433639526,
            0.33667632937431335,
            0.20378778874874115,
            0.4198003113269806,
            0.23384596407413483,
            0.23601709306240082,
            -0.509751558303833,
            0.5694931149482727,
            -0.08933047205209732,
            0.037133198231458664,
            0.20635388791561127,
            -0.2857131361961365,
            -0.4278101921081543,
            -0.26602792739868164,
            0.1998632550239563,
            0.4324374794960022,
            -0.13389578461647034,
            0.11837134510278702,
            -0.17028754949569702,
            0.37928706407546997,
            0.10062910616397858,
            -0.04736608266830444,
            -0.04692180082201958,
            0.6633663773536682,
            -0.3517492711544037,
            0.2055688351392746,
            0.44142597913742065,
            0.42460545897483826,
            0.4567111134529114,
            0.3061029016971588,
            -0.16390416026115417,
            -0.3541538417339325,
            0.2544074058532715,
            -0.18162837624549866,
            -0.21904821693897247,
            -0.2520917057991028,
            -0.07266020774841309,
            -0.23432950675487518,
            -0.1989256739616394,
            0.09460597485303879,
            -0.24563294649124146,
            0.9719013571739197,
            0.2578149139881134,
            0.26680076122283936,
            -0.39480605721473694,
            0.22382304072380066,
            -0.4284250736236572,
            0.4294125437736511,
            -0.04923247918486595,
            0.5011574625968933,
            0.1887599676847458,
            -0.02984841726720333,
            -0.16428305208683014,
            -0.33957910537719727,
            -0.16184143722057343,
            0.37313663959503174,
            -0.11775537580251694,
            -0.34507161378860474,
            -0.24848994612693787,
            0.3492432236671448,
            -0.2122095823287964,
            -0.022055158391594887,
            0.07298140972852707,
            0.36230477690696716,
            -0.2514148950576782,
            0.11675992608070374,
            0.4010731875896454,
            0.31790846586227417,
            0.0585796944797039,
            0.30878275632858276,
            0.5536429286003113,
            -0.061644136905670166,
            -0.06381722539663315,
            -0.1873038411140442,
            -0.24746698141098022,
            -0.3139619529247284,
            -0.19278131425380707,
            -0.48264867067337036,
            0.5122742056846619,
            0.09536745399236679,
            0.17870695888996124,
            0.18145892024040222,
            0.2471739798784256,
            -0.16399677097797394,
            -0.18874068558216095,
            0.21305255591869354,
            -0.6930050253868103,
            -0.4031701982021332,
            0.5250658392906189,
            0.4295860230922699,
            -0.464653879404068,
            -0.026941847056150436,
            -0.08213993161916733,
            0.34638163447380066,
            -0.15401627123355865,
            0.021148433908820152,
            0.19726167619228363,
            -0.25100240111351013,
            3.085673233726993e-05,
            0.16563303768634796,
            -0.008333534933626652,
            -0.02890022285282612,
            -0.284770667552948,
            0.3429299592971802,
            0.6073935627937317,
            -0.10915102809667587,
            0.3420248329639435,
            0.07347360253334045,
            0.18400518596172333,
            0.2084905058145523,
            0.3218590021133423,
            0.16883575916290283,
            -0.6880696415901184,
            -0.37455135583877563,
            0.04792584478855133,
            -0.04572531208395958,
            -0.17001567780971527,
            -0.12369263172149658,
            -0.3716808259487152,
            -0.04167286679148674,
            0.04307235777378082,
            -0.1655367612838745,
            -0.47902533411979675,
            -0.21886907517910004,
            0.4065888226032257,
            0.30626556277275085,
            0.25965678691864014,
            0.07168732583522797,
            -0.17138782143592834,
            -0.6293558478355408,
            -0.6350710988044739,
            0.25923609733581543,
            0.5668261647224426,
            -0.030662082135677338,
            -0.7059182524681091,
            -0.25901535153388977,
            0.25449642539024353,
            -0.3232290744781494,
            0.42758384346961975,
            0.7120643258094788,
            0.023215001448988914,
            -0.40807682275772095,
            0.1332295536994934,
            -0.33705568313598633,
            0.1038941740989685,
            0.39904412627220154,
            -0.567590057849884,
            -0.26575762033462524,
            0.7635160088539124,
            -0.38967835903167725,
            -0.08988548815250397,
            0.4150312840938568,
            -0.540441632270813,
            0.33467426896095276,
            -0.03507159277796745,
            0.00720902718603611,
            0.6702240109443665,
            0.2707512676715851,
        ],
        b2: &[
            0.3580038547515869,
            0.06861710548400879,
            -0.04651366174221039,
            0.24638813734054565,
            0.1557426154613495,
            -0.40271297097206116,
            -0.405432790517807,
        ],
    }
}


================================================
FILE: linalg/src/arm32/cortex_a7.txt
================================================
armv7neon_mmm_f32_8x4_cortexa7 16 128 8 0.000019373978862224142
armv7neon_mmm_f32_8x6_generic 24 4 18 0.000005589467629481233
armv7neon_mmm_f32_8x6_cortexa7 24 32 5 0.0000068541067994687505
armv7neon_mmm_f32_8x6_cortexa7 24 128 12 0.000038981217414944424
generic_f32_4x4 4 32 3 0.000001790047741390679
armv7neon_mmm_f32_8x4_generic 17 32 12 0.000013250293625322834
armv7neon_mmm_f32_8x4_cortexa7 9 4 9 0.00000392359813080775
armv7neon_mmm_f32_8x6_generic 16 128 18 0.00003780372998360008
armv7neon_mmm_f32_8x6_generic 7 32 7 0.000004508038841829227
armv7neon_mmm_f32_8x4_cortexa9 25 128 4 0.00001979557034383245
armv7neon_mmm_f32_8x4_cortexa7 24 128 5 0.00002916219359816615
generic_f32_4x4 8 4 7 0.000002792935337190097
armv7neon_mmm_f32_8x6_generic 25 32 5 0.000008513973930688824
armv7neon_mmm_f32_8x4_generic 8 128 4 0.000004984706018353683
armv7neon_mmm_f32_8x6_cortexa7 23 4 6 0.0000024972411854271217
armv7neon_mmm_f32_8x6_cortexa9 17 32 18 0.000018034380706056615
generic_f32_4x4 9 32 5 0.000007787097193216308
armv7neon_mmm_f32_8x4_cortexa7 7 32 11 0.000005445588694072235
armv7neon_mmm_f32_8x4_cortexa9 7 4 3 0.0000011467255960994079
armv7neon_mmm_f32_8x6_cortexa7 7 128 6 0.0000071177868474168225
armv7neon_mmm_f32_8x6_generic 15 128 5 0.000013291532044598022
armv7neon_mmm_f32_8x6_cortexa7 25 4 18 0.000007704060547781454
armv7neon_mmm_f32_8x4_generic 15 128 11 0.000028366457109510148
armv7neon_mmm_f32_8x6_cortexa7 7 4 12 0.000002031241642569242
generic_f32_4x4 12 32 9 0.000011111678190760999
armv7neon_mmm_f32_8x6_cortexa9 24 32 17 0.000018402463967012353
armv7neon_mmm_f32_8x4_cortexa7 8 4 12 0.0000019146048075747953
armv7neon_mmm_f32_8x4_cortexa7 23 4 8 0.00000383750508652125
armv7neon_mmm_f32_8x4_cortexa7 25 4 7 0.0000052043882186278224
armv7neon_mmm_f32_8x4_generic 16 32 12 0.000008739868831484192
generic_f32_4x4 12 4 8 0.00000347003352464419
armv7neon_mmm_f32_8x6_cortexa7 25 32 19 0.00003152139986100901
armv7neon_mmm_f32_8x6_cortexa9 25 32 18 0.000023769189598361993
armv7neon_mmm_f32_8x4_cortexa7 15 128 5 0.00001982738009364039
armv7neon_mmm_f32_8x6_generic 9 128 11 0.000025754948633914126
armv7neon_mmm_f32_8x4_cortexa7 15 4 4 0.0000016751657126978973
armv7neon_mmm_f32_8x4_cortexa9 24 32 8 0.000009220363478862854
generic_f32_4x4 3 32 13 0.000005627827228507873
armv7neon_mmm_f32_8x6_cortexa9 23 128 13 0.00006012344410992782
armv7neon_mmm_f32_8x6_cortexa7 24 128 6 0.00001973568156590405
armv7neon_mmm_f32_8x4_generic 17 128 8 0.000027722972081041346
armv7neon_mmm_f32_8x4_generic 9 128 4 0.000009605985891246275
armv7neon_mmm_f32_8x4_cortexa7 15 32 3 0.000003754036592781278
armv7neon_mmm_f32_8x4_cortexa7 25 128 8 0.00003835708390551408
armv7neon_mmm_f32_8x4_cortexa9 9 32 4 0.0000035373293731924376
generic_f32_4x4 7 4 11 0.000004044788183106201
armv7neon_mmm_f32_8x6_cortexa9 7 32 12 0.000004744792047894666
armv7neon_mmm_f32_8x4_cortexa9 23 4 8 0.0000038749254387558
generic_f32_4x4 9 32 8 0.0000075635927841880176
armv7neon_mmm_f32_8x4_cortexa7 9 4 12 0.000003727778947810718
armv7neon_mmm_f32_8x6_cortexa7 23 32 5 0.0000067956400405037536
generic_f32_4x4 8 4 5 0.000002723326536848814
armv7neon_mmm_f32_8x6_cortexa9 15 32 5 0.000004690163698982028
armv7neon_mmm_f32_8x4_cortexa9 9 32 3 0.0000037317693401172363
armv7neon_mmm_f32_8x6_cortexa7 25 128 13 0.00007831443093305969
armv7neon_mmm_f32_8x4_cortexa7 24 32 12 0.00001331331475399187
armv7neon_mmm_f32_8x6_cortexa7 24 128 19 0.00007802209851667727
armv7neon_mmm_f32_8x4_generic 16 128 11 0.00002788579513287149
armv7neon_mmm_f32_8x4_cortexa7 17 128 4 0.000014706021218529391
armv7neon_mmm_f32_8x6_generic 8 4 13 0.0000023378593097707485
armv7neon_mmm_f32_8x6_generic 7 4 7 0.0000019186954240278283
armv7neon_mmm_f32_8x4_cortexa9 16 4 8 0.000002428666090877155
armv7neon_mmm_f32_8x6_generic 24 128 13 0.00005734829975884668
generic_f32_4x4 8 128 11 0.000021025833076703056
armv7neon_mmm_f32_8x6_cortexa9 17 4 19 0.00000803059286526004
armv7neon_mmm_f32_8x6_cortexa7 25 4 7 0.00000571019568400475
armv7neon_mmm_f32_8x4_cortexa9 7 128 3 0.000005476007735837036
generic_f32_4x4 9 32 9 0.000011314156862861827
armv7neon_mmm_f32_8x4_cortexa9 8 32 3 0.000002175330632998311
armv7neon_mmm_f32_8x4_cortexa7 9 128 13 0.00003871137553728846
armv7neon_mmm_f32_8x6_cortexa7 16 32 19 0.00001594300556874623
armv7neon_mmm_f32_8x4_cortexa7 17 4 13 0.000007095518187604073
armv7neon_mmm_f32_8x4_generic 17 4 12 0.00000511222434537087
armv7neon_mmm_f32_8x4_generic 23 32 3 0.000005240662819589997
armv7neon_mmm_f32_8x6_generic 7 4 19 0.000003459698540347226
armv7neon_mmm_f32_8x6_cortexa9 7 128 19 0.000027297846707037145
armv7neon_mmm_f32_8x4_cortexa7 23 4 9 0.000005799914306902319
armv7neon_mmm_f32_8x4_generic 23 4 4 0.0000021622122978664557
armv7neon_mmm_f32_8x4_cortexa9 17 128 12 0.00004402054920694197
armv7neon_mmm_f32_8x6_cortexa9 15 4 18 0.000004616359494672357
armv7neon_mmm_f32_8x4_cortexa9 8 4 4 0.0000010041412858292361
armv7neon_mmm_f32_8x4_cortexa9 8 32 13 0.000006496433067761439
generic_f32_4x4 11 128 3 0.000011040772633263298
armv7neon_mmm_f32_8x6_generic 24 4 11 0.000004523362892147976
generic_f32_4x4 9 4 4 0.0000021034125372056796
generic_f32_4x4 8 4 9 0.000003722094027081023
generic_f32_4x4 4 4 8 0.0000014952745701485973
armv7neon_mmm_f32_8x4_generic 16 128 3 0.000009908089584838082
armv7neon_mmm_f32_8x4_generic 25 32 7 0.000012421086220512836
armv7neon_mmm_f32_8x6_cortexa7 8 4 5 0.0000013403828133224992
armv7neon_mmm_f32_8x4_cortexa7 23 32 4 0.00000503241852364444
armv7neon_mmm_f32_8x6_cortexa9 7 32 6 0.000002618507739501653
armv7neon_mmm_f32_8x4_cortexa7 7 4 13 0.0000031793808774197963
armv7neon_mmm_f32_8x4_generic 16 4 7 0.00000282397574390546
armv7neon_mmm_f32_8x4_cortexa9 17 128 4 0.00001500697741030371
armv7neon_mmm_f32_8x6_cortexa7 24 4 6 0.000002233395192000468
armv7neon_mmm_f32_8x6_cortexa9 15 128 6 0.0000138021942440989
armv7neon_mmm_f32_8x4_cortexa9 15 128 11 0.000030164013207782058
generic_f32_4x4 7 4 5 0.0000028577946255424533
armv7neon_mmm_f32_8x4_cortexa9 16 128 5 0.000020020455876347578
armv7neon_mmm_f32_8x6_cortexa9 16 128 6 0.000013512716817189273
armv7neon_mmm_f32_8x4_cortexa7 16 4 11 0.0000037996360400635168
armv7neon_mmm_f32_8x4_generic 7 32 4 0.0000021000959358733572
armv7neon_mmm_f32_8x4_cortexa9 9 4 11 0.000004001256873073548
armv7neon_mmm_f32_8x6_generic 24 128 7 0.00003837839936999645
armv7neon_mmm_f32_8x4_cortexa9 15 128 4 0.00001034155567067011
armv7neon_mmm_f32_8x4_generic 7 128 9 0.000014585897754194483
armv7neon_mmm_f32_8x4_cortexa9 7 128 11 0.000015558039194528556
armv7neon_mmm_f32_8x4_generic 25 32 11 0.000018056248294844393
armv7neon_mmm_f32_8x6_cortexa7 9 32 6 0.0000044289694676171784
armv7neon_mmm_f32_8x4_cortexa9 9 32 7 0.000006775203907440964
armv7neon_mmm_f32_8x4_cortexa7 17 32 4 0.000004906536483038752
armv7neon_mmm_f32_8x6_generic 25 128 13 0.00007697771420855431
armv7neon_mmm_f32_8x6_generic 15 32 13 0.000012236156128465641
armv7neon_mmm_f32_8x4_generic 9 32 11 0.000009363916532723868
armv7neon_mmm_f32_8x6_cortexa9 16 32 7 0.000008475723903784643
armv7neon_mmm_f32_8x4_cortexa9 24 4 8 0.000003400276973989579
armv7neon_mmm_f32_8x4_generic 8 32 9 0.000004816590922050574
armv7neon_mmm_f32_8x6_cortexa9 7 32 18 0.0000068452889719178265
armv7neon_mmm_f32_8x4_cortexa9 9 32 5 0.000006726253331965845
generic_f32_4x4 11 32 13 0.000015033889948226767
armv7neon_mmm_f32_8x4_generic 17 4 11 0.000005556840437336695
generic_f32_4x4 13 32 7 0.000010313627379766583
generic_f32_4x4 9 4 3 0.0000023801388628702307
armv7neon_mmm_f32_8x4_generic 8 128 3 0.00000520201390212873
armv7neon_mmm_f32_8x4_generic 8 32 12 0.000004612565409735615
armv7neon_mmm_f32_8x4_cortexa9 16 128 4 0.000010106142800191435
armv7neon_mmm_f32_8x4_cortexa7 8 4 5 0.0000016424950539860917
armv7neon_mmm_f32_8x4_generic 8 32 8 0.000003246701797046072
armv7neon_mmm_f32_8x6_cortexa7 17 128 7 0.00003957490867985703
armv7neon_mmm_f32_8x4_cortexa7 7 32 12 0.000005449789255433999
armv7neon_mmm_f32_8x4_cortexa7 15 32 5 0.000006774977020301673
armv7neon_mmm_f32_8x6_generic 7 4 13 0.0000027111208216897315
armv7neon_mmm_f32_8x4_cortexa7 16 32 5 0.000006561637517767873
generic_f32_4x4 3 128 9 0.000011017761032513612
armv7neon_mmm_f32_8x4_cortexa7 23 128 5 0.00002948816149020999
armv7neon_mmm_f32_8x4_cortexa7 17 32 11 0.00001417097516045665
armv7neon_mmm_f32_8x6_cortexa7 16 4 12 0.000002812978829286788
armv7neon_mmm_f32_8x6_cortexa9 23 32 5 0.0000068076361854689625
armv7neon_mmm_f32_8x4_cortexa7 24 4 3 0.0000025622772317463136
armv7neon_mmm_f32_8x4_cortexa9 16 32 13 0.000012465423813216206
generic_f32_4x4 4 32 11 0.000004083209759238612
armv7neon_mmm_f32_8x4_generic 7 32 5 0.000003626955080135103
armv7neon_mmm_f32_8x6_cortexa7 23 128 6 0.000020078583004924116
armv7neon_mmm_f32_8x6_cortexa9 25 128 19 0.00010584384897256878
armv7neon_mmm_f32_8x6_generic 17 4 12 0.000004107213127611975
armv7neon_mmm_f32_8x6_cortexa7 16 4 17 0.000004432708536915881
armv7neon_mmm_f32_8x6_cortexa7 16 32 18 0.00001183035782221589
armv7neon_mmm_f32_8x4_cortexa9 25 4 3 0.000003166221225598654
armv7neon_mmm_f32_8x4_cortexa9 24 128 9 0.00004426766902863704
armv7neon_mmm_f32_8x6_generic 25 4 18 0.000007554578118985422
armv7neon_mmm_f32_8x6_cortexa7 15 32 5 0.000004708546886703234
armv7neon_mmm_f32_8x6_cortexa9 9 128 6 0.000013657373073808522
armv7neon_mmm_f32_8x6_cortexa7 25 32 7 0.00001623345709716141
armv7neon_mmm_f32_8x4_cortexa7 9 32 5 0.000006618675517469529
armv7neon_mmm_f32_8x4_cortexa7 7 32 13 0.000007030082349548267
armv7neon_mmm_f32_8x4_cortexa9 17 4 7 0.0000040753817592878874
armv7neon_mmm_f32_8x6_generic 15 4 5 0.0000019991656588201214
generic_f32_4x4 12 128 5 0.00002108090993489815
armv7neon_mmm_f32_8x6_cortexa9 25 128 17 0.00007984886358906369
generic_f32_4x4 8 128 5 0.000014230862544269676
armv7neon_mmm_f32_8x6_generic 24 128 18 0.000056743692014886694
armv7neon_mmm_f32_8x4_cortexa9 8 32 8 0.0000034133387591211906
armv7neon_mmm_f32_8x4_generic 9 128 12 0.000027800478110813017
generic_f32_4x4 7 32 7 0.000005509675967097076
armv7neon_mmm_f32_8x6_cortexa9 16 32 13 0.000012310372306220252
armv7neon_mmm_f32_8x4_generic 15 128 13 0.00003762533200230424
generic_f32_4x4 4 128 5 0.000007369710897637148
armv7neon_mmm_f32_8x6_generic 16 128 6 0.000012947918354537283
armv7neon_mmm_f32_8x4_cortexa7 8 32 3 0.000002138296476186818
armv7neon_mmm_f32_8x4_generic 9 128 11 0.000028058363419186727
armv7neon_mmm_f32_8x4_cortexa7 7 128 13 0.00002008308819140427
armv7neon_mmm_f32_8x6_cortexa9 15 128 12 0.000027068411466415602
armv7neon_mmm_f32_8x6_generic 8 128 12 0.000012940488826817833
armv7neon_mmm_f32_8x4_generic 24 32 7 0.00000940385598649166
armv7neon_mmm_f32_8x6_cortexa7 15 128 19 0.00005293187157460067
armv7neon_mmm_f32_8x4_cortexa9 25 128 3 0.000020446918539621497
armv7neon_mmm_f32_8x4_cortexa7 17 4 9 0.000005534537134563046
armv7neon_mmm_f32_8x6_cortexa9 15 128 19 0.00005366369135659099
armv7neon_mmm_f32_8x4_generic 25 4 4 0.000002522928321866339
armv7neon_mmm_f32_8x6_generic 16 4 19 0.000005280876227150452
armv7neon_mmm_f32_8x6_generic 23 128 12 0.000038470533759474454
armv7neon_mmm_f32_8x4_generic 16 32 3 0.00000367665371967417
armv7neon_mmm_f32_8x6_cortexa7 7 32 12 0.000004702345831209106
armv7neon_mmm_f32_8x6_generic 16 32 11 0.000008300619599261807
armv7neon_mmm_f32_8x4_cortexa9 23 4 5 0.000004136206632272795
armv7neon_mmm_f32_8x4_cortexa9 23 32 7 0.000010130368574328868
armv7neon_mmm_f32_8x4_generic 7 128 12 0.000014642209408823472
armv7neon_mmm_f32_8x4_generic 16 4 3 0.0000018668229452058654
armv7neon_mmm_f32_8x4_cortexa9 24 128 8 0.00002933769331717579
armv7neon_mmm_f32_8x4_cortexa9 15 32 5 0.0000068812833008684075
armv7neon_mmm_f32_8x6_generic 16 4 17 0.000004317183704154257
armv7neon_mmm_f32_8x6_cortexa9 17 32 7 0.000012443561351360363
generic_f32_4x4 13 128 7 0.000028155411441347644
armv7neon_mmm_f32_8x6_cortexa9 25 4 12 0.000005357355009390057
armv7neon_mmm_f32_8x6_cortexa7 9 128 18 0.00003939231793264078
generic_f32_4x4 9 128 13 0.00004157559429631316
armv7neon_mmm_f32_8x6_cortexa9 23 32 11 0.000012894467716591857
armv7neon_mmm_f32_8x6_generic 17 32 19 0.000023157961942761285
armv7neon_mmm_f32_8x6_generic 23 128 13 0.00005762004261565962
armv7neon_mmm_f32_8x6_generic 25 32 12 0.000015419463508057784
armv7neon_mmm_f32_8x6_generic 23 32 19 0.000023607314480173466
generic_f32_4x4 11 4 9 0.0000056154966368798884
armv7neon_mmm_f32_8x4_generic 23 4 3 0.000002512455070432762
armv7neon_mmm_f32_8x6_generic 17 128 12 0.000038205761728052566
armv7neon_mmm_f32_8x6_cortexa9 25 4 6 0.0000029441583370270357
armv7neon_mmm_f32_8x6_cortexa7 8 128 5 0.00000718614713533821
armv7neon_mmm_f32_8x6_cortexa9 9 32 11 0.000008580295552398777
armv7neon_mmm_f32_8x4_generic 17 4 8 0.0000036027501506567706
armv7neon_mmm_f32_8x4_cortexa9 24 128 12 0.000043681280972474085
armv7neon_mmm_f32_8x6_cortexa7 23 128 19 0.00007892740320659124
armv7neon_mmm_f32_8x4_cortexa7 7 4 7 0.000001867957586629068
armv7neon_mmm_f32_8x4_cortexa7 17 128 3 0.000015105393976853567
armv7neon_mmm_f32_8x6_cortexa9 16 32 5 0.000004737870196682113
armv7neon_mmm_f32_8x6_cortexa7 17 32 11 0.000012513663992372357
armv7neon_mmm_f32_8x4_cortexa9 8 4 13 0.000002615891904894993
generic_f32_4x4 9 128 8 0.00002092560518618388
armv7neon_mmm_f32_8x6_cortexa9 24 4 13 0.000006118924275544352
armv7neon_mmm_f32_8x4_cortexa9 23 128 12 0.00004443207751479167
armv7neon_mmm_f32_8x4_cortexa9 9 32 13 0.000012776770370453443
armv7neon_mmm_f32_8x4_cortexa7 7 128 4 0.000005416694010439176
generic_f32_4x4 3 32 3 0.0000017607189056083412
generic_f32_4x4 8 32 13 0.00000985693332598433
armv7neon_mmm_f32_8x4_generic 15 128 9 0.00002827090625785394
armv7neon_mmm_f32_8x6_generic 17 4 5 0.0000026659420735671337
armv7neon_mmm_f32_8x6_cortexa9 7 4 19 0.000003496314614706576
generic_f32_4x4 9 32 12 0.00001103802239472411
armv7neon_mmm_f32_8x6_cortexa7 24 32 19 0.00002361513173387037
armv7neon_mmm_f32_8x6_cortexa9 24 32 13 0.00001816124851845242
armv7neon_mmm_f32_8x6_cortexa7 24 4 19 0.000007820954390784165
armv7neon_mmm_f32_8x4_cortexa9 16 4 9 0.0000037661558813876353
armv7neon_mmm_f32_8x6_generic 25 4 13 0.000007941364297025098
armv7neon_mmm_f32_8x6_cortexa7 23 128 7 0.0000397790896264826
armv7neon_mmm_f32_8x4_cortexa7 23 32 12 0.000014064248486107495
armv7neon_mmm_f32_8x6_cortexa9 25 32 17 0.000024451766821636896
armv7neon_mmm_f32_8x4_generic 16 32 13 0.000011863952520137507
armv7neon_mmm_f32_8x6_cortexa9 15 4 13 0.000004662604065748643
armv7neon_mmm_f32_8x4_cortexa7 16 128 13 0.00003851383301865242
armv7neon_mmm_f32_8x4_cortexa9 17 4 3 0.0000024871767077518544
armv7neon_mmm_f32_8x6_generic 9 128 18 0.000038275747204244
armv7neon_mmm_f32_8x4_cortexa7 23 4 7 0.000004236340851810833
armv7neon_mmm_f32_8x4_generic 24 32 12 0.000012855667391841598
armv7neon_mmm_f32_8x6_cortexa9 15 32 17 0.000012917112497463966
armv7neon_mmm_f32_8x4_generic 23 128 11 0.000042177094013740186
armv7neon_mmm_f32_8x4_cortexa7 16 128 12 0.000028624335558632763
armv7neon_mmm_f32_8x6_cortexa7 23 4 17 0.000006797176857939227
armv7neon_mmm_f32_8x4_generic 25 4 3 0.0000031059316600251953
armv7neon_mmm_f32_8x6_cortexa7 23 4 19 0.000008441504854510506
armv7neon_mmm_f32_8x6_cortexa7 23 128 17 0.00005957716872417335
armv7neon_mmm_f32_8x4_generic 24 32 3 0.000005255472766158138
armv7neon_mmm_f32_8x6_cortexa7 7 32 6 0.0000025969460967210575
generic_f32_4x4 12 128 9 0.00003118081783563989
armv7neon_mmm_f32_8x6_cortexa7 24 32 7 0.000012316039376583595
armv7neon_mmm_f32_8x4_cortexa9 9 128 11 0.00002984202658897139
armv7neon_mmm_f32_8x6_generic 24 4 12 0.00000390465698063092
armv7neon_mmm_f32_8x6_generic 24 128 5 0.000019825223712031248
generic_f32_4x4 11 128 12 0.00003129955987835105
armv7neon_mmm_f32_8x4_generic 15 32 8 0.000006474371340367591
armv7neon_mmm_f32_8x6_cortexa7 25 128 17 0.00007864370121172857
armv7neon_mmm_f32_8x4_cortexa7 7 4 9 0.000002506847196975368
armv7neon_mmm_f32_8x6_cortexa9 25 4 11 0.000005933707367961421
armv7neon_mmm_f32_8x4_cortexa9 16 4 5 0.000002787654264318038
armv7neon_mmm_f32_8x6_generic 17 32 13 0.00001761805014832438
armv7neon_mmm_f32_8x4_cortexa9 7 32 4 0.0000021905945506812955
armv7neon_mmm_f32_8x6_cortexa9 17 128 11 0.00004027130474721357
generic_f32_4x4 7 32 9 0.000007912119634032119
armv7neon_mmm_f32_8x4_generic 23 4 13 0.0000073828945636745854
generic_f32_4x4 12 128 11 0.00003129061011102201
armv7neon_mmm_f32_8x6_cortexa7 9 4 5 0.0000019755012039429016
armv7neon_mmm_f32_8x6_cortexa9 8 4 13 0.000002405709930798169
armv7neon_mmm_f32_8x6_cortexa9 24 4 7 0.0000044131675061571695
armv7neon_mmm_f32_8x4_cortexa7 25 128 3 0.00002004605546076412
armv7neon_mmm_f32_8x6_cortexa7 25 32 11 0.000016504451247846665
generic_f32_4x4 11 4 3 0.0000024035209703323816
generic_f32_4x4 5 128 9 0.00002116685987655445
armv7neon_mmm_f32_8x4_generic 9 32 9 0.000009310383786805071
armv7neon_mmm_f32_8x6_cortexa7 15 32 12 0.00000858295982977344
generic_f32_4x4 9 4 9 0.0000055122591860013075
armv7neon_mmm_f32_8x4_cortexa7 24 128 4 0.000014582241975346975
armv7neon_mmm_f32_8x6_cortexa7 7 32 11 0.0000047102983594351624
armv7neon_mmm_f32_8x6_cortexa7 17 32 5 0.000006703902999940797
armv7neon_mmm_f32_8x4_cortexa9 9 32 12 0.000009569569775041666
generic_f32_4x4 11 32 4 0.000004102122527868448
armv7neon_mmm_f32_8x4_cortexa7 23 128 11 0.00004393294052475175
armv7neon_mmm_f32_8x6_cortexa7 17 128 18 0.00005867315609945908
armv7neon_mmm_f32_8x6_generic 15 4 19 0.000005888722731281036
armv7neon_mmm_f32_8x4_cortexa9 25 128 12 0.000058416478283637004
armv7neon_mmm_f32_8x6_cortexa7 15 128 18 0.00003981402002143062
armv7neon_mmm_f32_8x6_generic 17 32 11 0.000012156545175144957
armv7neon_mmm_f32_8x6_cortexa9 17 128 5 0.000020505815042923473
armv7neon_mmm_f32_8x4_generic 23 4 8 0.0000037989035755978144
armv7neon_mmm_f32_8x4_cortexa9 23 128 4 0.000015133679307570648
generic_f32_4x4 12 32 3 0.000004382513702192395
generic_f32_4x4 12 4 5 0.000003824564176783666
generic_f32_4x4 5 32 7 0.000005434572832921092
generic_f32_4x4 3 4 8 0.0000017566767353451639
armv7neon_mmm_f32_8x6_cortexa7 7 32 13 0.0000067229229930212575
armv7neon_mmm_f32_8x4_generic 8 4 13 0.0000025649388423337978
generic_f32_4x4 11 32 12 0.000011236073634740423
armv7neon_mmm_f32_8x6_generic 25 4 17 0.000008196880964751204
armv7neon_mmm_f32_8x6_cortexa9 7 32 11 0.000004753650782271882
armv7neon_mmm_f32_8x4_cortexa9 7 128 4 0.000005529569938400554
generic_f32_4x4 13 4 13 0.000009131495505295274
armv7neon_mmm_f32_8x6_cortexa9 9 4 7 0.00000312421222650566
armv7neon_mmm_f32_8x6_generic 17 32 7 0.000011970222410677647
armv7neon_mmm_f32_8x4_cortexa7 7 4 3 0.0000011523161892903704
armv7neon_mmm_f32_8x6_cortexa7 23 128 18 0.00005908293678660039
armv7neon_mmm_f32_8x4_cortexa9 23 4 13 0.000007508665249371487
armv7neon_mmm_f32_8x4_cortexa9 7 32 8 0.000003875838102020609
armv7neon_mmm_f32_8x6_cortexa7 24 32 11 0.00001256332295885272
armv7neon_mmm_f32_8x6_cortexa9 23 128 6 0.000020319056425196668
armv7neon_mmm_f32_8x4_generic 23 32 4 0.00000488125169723422
armv7neon_mmm_f32_8x6_generic 24 32 18 0.00001705594140661088
armv7neon_mmm_f32_8x4_generic 7 4 3 0.000001143118878212571
armv7neon_mmm_f32_8x4_generic 24 4 3 0.000002536821660220204
armv7neon_mmm_f32_8x6_cortexa9 7 4 17 0.000002840883149941229
armv7neon_mmm_f32_8x6_cortexa7 17 32 13 0.000018113836070828896
armv7neon_mmm_f32_8x6_cortexa7 24 4 5 0.000002893802636827323
armv7neon_mmm_f32_8x4_cortexa9 7 128 8 0.00001055308814900546
armv7neon_mmm_f32_8x4_cortexa9 15 4 4 0.0000017017415253459248
armv7neon_mmm_f32_8x6_generic 9 128 17 0.00003847119775045202
armv7neon_mmm_f32_8x4_cortexa9 17 128 7 0.00003001254835210308
generic_f32_4x4 7 128 3 0.0000075145364744355526
armv7neon_mmm_f32_8x6_cortexa9 24 128 13 0.00005963153865876477
armv7neon_mmm_f32_8x6_generic 16 4 11 0.0000031955847464919697
armv7neon_mmm_f32_8x6_generic 24 128 17 0.000057273707239946334
armv7neon_mmm_f32_8x6_cortexa9 16 128 17 0.00004016422558583101
armv7neon_mmm_f32_8x4_generic 24 4 7 0.000003971324732226714
armv7neon_mmm_f32_8x6_cortexa9 25 32 11 0.0000166679830493271
armv7neon_mmm_f32_8x4_generic 7 128 3 0.000005169798410902207
armv7neon_mmm_f32_8x6_cortexa7 15 4 11 0.000003482445418673263
armv7neon_mmm_f32_8x6_cortexa7 8 128 6 0.0000069109869984566186
armv7neon_mmm_f32_8x4_cortexa9 24 32 13 0.000018445950950001155
armv7neon_mmm_f32_8x6_generic 8 4 11 0.0000018570966900607452
armv7neon_mmm_f32_8x6_cortexa9 24 4 6 0.000002228168146536169
armv7neon_mmm_f32_8x6_generic 8 128 7 0.000013146479087947972
armv7neon_mmm_f32_8x6_cortexa9 25 32 12 0.000016067383961246167
armv7neon_mmm_f32_8x4_cortexa9 16 32 5 0.000006662778599544951
armv7neon_mmm_f32_8x6_generic 9 4 11 0.0000031451298378122295
armv7neon_mmm_f32_8x4_generic 16 32 7 0.000006438918321306648
armv7neon_mmm_f32_8x6_cortexa9 7 32 7 0.000004657116192635584
armv7neon_mmm_f32_8x6_generic 17 32 12 0.00001176030032709091
armv7neon_mmm_f32_8x4_generic 15 4 8 0.0000028163110410039656
armv7neon_mmm_f32_8x4_cortexa9 7 4 9 0.0000025260788973778824
generic_f32_4x4 8 4 11 0.000003795516523798154
armv7neon_mmm_f32_8x6_generic 15 32 6 0.0000044168833936544535
armv7neon_mmm_f32_8x4_cortexa7 8 128 9 0.000014761720958613642
armv7neon_mmm_f32_8x6_cortexa7 9 32 7 0.000008410754850495816
armv7neon_mmm_f32_8x4_cortexa7 16 4 13 0.000004684953060253782
armv7neon_mmm_f32_8x4_generic 16 32 5 0.000006354175934259964
armv7neon_mmm_f32_8x4_generic 24 4 4 0.0000019148396982311507
armv7neon_mmm_f32_8x4_generic 17 128 4 0.000014107689106847269
armv7neon_mmm_f32_8x6_cortexa9 24 32 11 0.000012681731796133347
generic_f32_4x4 3 4 5 0.0000017351550450721446
armv7neon_mmm_f32_8x4_cortexa7 16 4 9 0.0000037512996244278985
armv7neon_mmm_f32_8x6_cortexa9 23 32 13 0.000018600736039744996
generic_f32_4x4 7 32 13 0.000010363567629711427
generic_f32_4x4 7 128 13 0.000028209372228861907
armv7neon_mmm_f32_8x6_generic 24 32 13 0.000017489604886914003
armv7neon_mmm_f32_8x4_cortexa7 24 32 8 0.00000906737294441132
armv7neon_mmm_f32_8x6_generic 15 32 5 0.000004544284306242495
armv7neon_mmm_f32_8x6_cortexa9 16 128 19 0.00005301309753082383
armv7neon_mmm_f32_8x6_cortexa7 15 128 11 0.000026895954754680612
armv7neon_mmm_f32_8x4_cortexa9 15 32 12 0.000009927381424295026
armv7neon_mmm_f32_8x4_cortexa9 16 128 12 0.00002922630339731252
armv7neon_mmm_f32_8x4_cortexa9 24 4 11 0.000005481062579336857
armv7neon_mmm_f32_8x6_generic 7 128 18 0.000019821467429857938
armv7neon_mmm_f32_8x6_cortexa9 7 32 5 0.000002575926161215081
armv7neon_mmm_f32_8x6_cortexa7 15 32 7 0.000008595071585263579
armv7neon_mmm_f32_8x4_cortexa9 17 32 4 0.000005001583148868544
armv7neon_mmm_f32_8x4_cortexa7 25 32 4 0.000006334740769256929
armv7neon_mmm_f32_8x4_cortexa7 25 128 7 0.00003905527355458101
generic_f32_4x4 4 4 5 0.0000016159570864189527
armv7neon_mmm_f32_8x4_cortexa9 17 128 11 0.000044522329607090446
armv7neon_mmm_f32_8x6_cortexa9 8 128 18 0.0000200309695734825
armv7neon_mmm_f32_8x6_generic 24 32 19 0.000022973011330361666
generic_f32_4x4 3 4 13 0.000002984204583865987
armv7neon_mmm_f32_8x6_cortexa9 15 4 5 0.000002015871636210361
generic_f32_4x4 7 4 9 0.000003999946420645267
armv7neon_mmm_f32_8x4_generic 7 4 9 0.0000024834088306212943
armv7neon_mmm_f32_8x4_cortexa7 25 128 9 0.00005782975703195741
armv7neon_mmm_f32_8x4_generic 25 32 8 0.000011779346411467398
armv7neon_mmm_f32_8x6_cortexa9 25 32 7 0.000016399753838712434
armv7neon_mmm_f32_8x6_generic 24 4 6 0.000002204607070591687
armv7neon_mmm_f32_8x6_cortexa7 9 32 17 0.000012402833540807695
armv7neon_mmm_f32_8x4_cortexa7 25 128 13 0.00007672799293438395
armv7neon_mmm_f32_8x6_generic 24 128 12 0.0000379228420372398
armv7neon_mmm_f32_8x4_cortexa7 16 128 3 0.00001030544963732427
armv7neon_mmm_f32_8x4_generic 15 128 3 0.000009885955298583161
armv7neon_mmm_f32_8x4_generic 15 4 12 0.0000039769457516235065
armv7neon_mmm_f32_8x6_generic 23 4 11 0.000004727342544050073
armv7neon_mmm_f32_8x6_cortexa7 15 4 12 0.0000032858094110604857
armv7neon_mmm_f32_8x4_cortexa7 16 4 3 0.0000018824484779381146
armv7neon_mmm_f32_8x6_cortexa9 23 4 7 0.000004580846920018228
generic_f32_4x4 11 4 8 0.00000379091798312253
armv7neon_mmm_f32_8x6_cortexa9 23 32 18 0.00001843667314281391
armv7neon_mmm_f32_8x6_generic 8 32 18 0.0000060026095209057905
armv7neon_mmm_f32_8x4_cortexa7 8 32 7 0.00000356603711659208
armv7neon_mmm_f32_8x6_generic 15 128 18 0.00003865113024109526
armv7neon_mmm_f32_8x6_generic 7 32 5 0.0000025012501670870884
generic_f32_4x4 12 128 3 0.00001108291043110956
armv7neon_mmm_f32_8x6_cortexa9 9 128 17 0.00004019255842003589
armv7neon_mmm_f32_8x4_cortexa9 15 128 3 0.000010489952376847188
armv7neon_mmm_f32_8x4_cortexa9 7 4 5 0.0000018283099982388463
armv7neon_mmm_f32_8x6_cortexa7 23 32 13 0.000018410568030938504
armv7neon_mmm_f32_8x6_generic 15 4 17 0.000004759242853957918
armv7neon_mmm_f32_8x6_cortexa7 17 128 5 0.00002027503703993735
armv7neon_mmm_f32_8x4_generic 7 4 12 0.000002526741586689988
armv7neon_mmm_f32_8x6_cortexa9 17 4 13 0.000006256702206265106
armv7neon_mmm_f32_8x6_cortexa7 16 128 13 0.000039449772577428465
generic_f32_4x4 5 32 4 0.0000028896370502300425
armv7neon_mmm_f32_8x6_cortexa9 23 4 6 0.000002496256395769302
armv7neon_mmm_f32_8x4_generic 17 4 9 0.000005469161034223508
armv7neon_mmm_f32_8x4_generic 17 4 13 0.0000070099392970628585
armv7neon_mmm_f32_8x6_generic 24 4 17 0.000006212264617752923
armv7neon_mmm_f32_8x6_cortexa7 23 32 18 0.000018244504523002484
armv7neon_mmm_f32_8x4_cortexa7 17 32 3 0.000005314636591625876
armv7neon_mmm_f32_8x6_generic 23 4 13 0.00000640173388890707
armv7neon_mmm_f32_8x6_cortexa7 25 4 19 0.00001046601238496823
armv7neon_mmm_f32_8x4_cortexa7 9 128 5 0.000019669882407282677
generic_f32_4x4 5 4 3 0.0000017330186385522962
armv7neon_mmm_f32_8x6_generic 9 4 13 0.000004297017223755962
armv7neon_mmm_f32_8x4_cortexa9 9 128 7 0.0000201239354954616
armv7neon_mmm_f32_8x4_cortexa7 8 4 7 0.0000016739241733780589
armv7neon_mmm_f32_8x6_cortexa7 7 4 6 0.000001271018362845475
generic_f32_4x4 13 32 9 0.000014839920846066823
armv7neon_mmm_f32_8x4_generic 25 4 9 0.000007058701573468786
armv7neon_mmm_f32_8x4_cortexa7 7 32 5 0.000003729557835063842
armv7neon_mmm_f32_8x6_cortexa9 7 128 5 0.000007173464418699434
armv7neon_mmm_f32_8x6_cortexa7 24 128 13 0.00005881823134405547
armv7neon_mmm_f32_8x6_cortexa9 17 32 12 0.000012231167345130436
armv7neon_mmm_f32_8x4_cortexa7 8 4 4 0.0000009861752945826601
armv7neon_mmm_f32_8x4_cortexa9 23 128 3 0.00001548775471015623
armv7neon_mmm_f32_8x4_generic 8 128 9 0.000014166892351007583
armv7neon_mmm_f32_8x4_cortexa9 25 32 3 0.000007055926590582058
armv7neon_mmm_f32_8x6_cortexa7 8 32 6 0.0000023905642959740767
armv7neon_mmm_f32_8x4_generic 24 4 12 0.000004705581936932163
generic_f32_4x4 13 32 13 0.000019466458499229495
armv7neon_mmm_f32_8x6_cortexa7 8 128 12 0.000013354945269855691
generic_f32_4x4 4 32 8 0.000002784564616216757
generic_f32_4x4 11 4 13 0.000007235980721358389
armv7neon_mmm_f32_8x4_cortexa9 15 4 9 0.000004211132231298468
armv7neon_mmm_f32_8x4_generic 23 4 7 0.000004197975758631663
armv7neon_mmm_f32_8x4_generic 24 4 13 0.000006669346538195287
generic_f32_4x4 9 4 11 0.000005605492408222339
armv7neon_mmm_f32_8x4_cortexa9 7 4 7 0.0000018866714926916992
armv7neon_mmm_f32_8x6_generic 7 4 12 0.00000203404671528183
armv7neon_mmm_f32_8x6_cortexa9 16 32 12 0.000008156581758904169
armv7neon_mmm_f32_8x6_generic 9 4 19 0.000005498641044027106
generic_f32_4x4 7 4 8 0.000002762174951002869
armv7neon_mmm_f32_8x4_cortexa9 16 4 3 0.0000019059695727093323
armv7neon_mmm_f32_8x4_generic 25 4 12 0.00000651099979096713
armv7neon_mmm_f32_8x6_cortexa7 24 128 5 0.000020414031398736494
armv7neon_mmm_f32_8x6_cortexa7 7 128 18 0.000020361101695155362
generic_f32_4x4 11 128 5 0.00002125700072345171
armv7neon_mmm_f32_8x6_cortexa7 17 32 18 0.000017837908064150246
armv7neon_mmm_f32_8x6_cortexa7 25 32 13 0.00002390699968855355
armv7neon_mmm_f32_8x4_generic 23 4 5 0.0000040641271507242515
generic_f32_4x4 8 4 3 0.0000017865876036353628
armv7neon_mmm_f32_8x6_cortexa9 16 4 17 0.00000442887972122732
armv7neon_mmm_f32_8x4_cortexa9 25 32 8 0.000012384930380190053
generic_f32_4x4 5 4 5 0.000002798692518373374
armv7neon_mmm_f32_8x6_cortexa9 8 128 6 0.0000070012080134920035
armv7neon_mmm_f32_8x6_cortexa7 17 32 6 0.000006327458516647519
armv7neon_mmm_f32_8x4_cortexa7 24 32 7 0.0000097071140215999
armv7neon_mmm_f32_8x6_generic 23 32 12 0.000012023256652212629
armv7neon_mmm_f32_8x6_generic 9 32 11 0.000008252674922417165
armv7neon_mmm_f32_8x6_generic 9 32 6 0.000004271117675286997
armv7neon_mmm_f32_8x4_cortexa7 7 4 4 0.0000011809046933853917
armv7neon_mmm_f32_8x6_cortexa7 15 32 19 0.00001661331304395181
armv7neon_mmm_f32_8x4_generic 25 128 5 0.00003728262011412803
armv7neon_mmm_f32_8x6_cortexa9 17 128 19 0.00007942199229108003
generic_f32_4x4 11 128 9 0.000031521930244918895
armv7neon_mmm_f32_8x6_cortexa7 25 4 6 0.000002951526384640025
armv7neon_mmm_f32_8x6_generic 25 128 18 0.00007559930226918245
armv7neon_mmm_f32_8x6_generic 24 32 7 0.000011956301096424158
armv7neon_mmm_f32_8x4_cortexa7 24 4 4 0.000001942280219822285
armv7neon_mmm_f32_8x4_cortexa7 16 4 5 0.000002773541103005484
generic_f32_4x4 13 4 7 0.000005120137461912069
armv7neon_mmm_f32_8x4_generic 9 4 9 0.000003880248068324046
armv7neon_mmm_f32_8x4_cortexa9 8 4 5 0.0000016579504301247745
armv7neon_mmm_f32_8x6_cortexa7 7 32 18 0.000006779881862785529
armv7neon_mmm_f32_8x4_generic 24 32 9 0.000013438884904462624
armv7neon_mmm_f32_8x4_generic 17 32 11 0.000013721051068287207
armv7neon_mmm_f32_8x4_cortexa7 23 32 3 0.000005392987682793705
armv7neon_mmm_f32_8x6_cortexa9 17 4 18 0.000005996181221285945
armv7neon_mmm_f32_8x4_generic 23 128 12 0.000041706025206470595
armv7neon_mmm_f32_8x4_generic 25 128 7 0.00003743774855697319
generic_f32_4x4 3 4 12 0.000002379029629256848
armv7neon_mmm_f32_8x6_cortexa9 24 128 18 0.00005914765901143539
armv7neon_mmm_f32_8x4_cortexa7 23 32 5 0.00000981049054917298
armv7neon_mmm_f32_8x6_cortexa7 16 4 19 0.000005412079670267658
armv7neon_mmm_f32_8x4_cortexa7 8 32 4 0.0000019274117652778804
armv7neon_mmm_f32_8x6_generic 17 128 6 0.000019306808921524692
armv7neon_mmm_f32_8x6_cortexa7 7 128 12 0.000013761467144605992
armv7neon_mmm_f32_8x6_cortexa7 16 32 7 0.000008404964853562951
armv7neon_mmm_f32_8x6_cortexa9 9 32 7 0.000008475033766481017
armv7neon_mmm_f32_8x6_cortexa9 15 128 5 0.000013894752583522413
armv7neon_mmm_f32_8x6_generic 16 128 17 0.000038424998460484644
armv7neon_mmm_f32_8x4_cortexa7 8 32 9 0.00000497069436204451
armv7neon_mmm_f32_8x6_generic 9 32 5 0.000004444687551155496
armv7neon_mmm_f32_8x4_cortexa7 24 32 5 0.00000958372113096483
armv7neon_mmm_f32_8x6_cortexa9 24 4 12 0.000003962114122866607
armv7neon_mmm_f32_8x4_generic 15 4 11 0.000004231967658632821
armv7neon_mmm_f32_8x6_cortexa7 24 32 13 0.000017973816622821935
armv7neon_mmm_f32_8x4_cortexa9 23 32 13 0.000019157368846217136
armv7neon_mmm_f32_8x4_generic 16 32 9 0.00000912682021206548
armv7neon_mmm_f32_8x6_generic 16 32 5 0.000004588157612379825
armv7neon_mmm_f32_8x6_generic 16 4 13 0.0000041677198842138695
armv7neon_mmm_f32_8x6_generic 15 128 17 0.00003886891043603058
armv7neon_mmm_f32_8x6_cortexa9 7 128 11 0.000013961383587786538
armv7neon_mmm_f32_8x4_generic 7 128 5 0.000009860600106626588
armv7neon_mmm_f32_8x4_cortexa9 16 4 12 0.0000033867538649589902
armv7neon_mmm_f32_8x6_cortexa9 17 32 19 0.000024087066037173788
armv7neon_mmm_f32_8x6_generic 9 128 5 0.000013189504068813325
armv7neon_mmm_f32_8x6_cortexa9 8 32 18 0.000006222642744442069
armv7neon_mmm_f32_8x6_cortexa7 24 32 12 0.000011846425063097506
generic_f32_4x4 5 4 13 0.000004993325421206121
armv7neon_mmm_f32_8x6_generic 8 128 11 0.000013157428320463883
generic_f32_4x4 7 32 4 0.0000029484328830194284
armv7neon_mmm_f32_8x4_cortexa9 7 32 9 0.000005475212517453128
armv7neon_mmm_f32_8x4_cortexa9 9 128 13 0.00003949164898061412
armv7neon_mmm_f32_8x4_cortexa9 23 4 11 0.000005947124934955378
armv7neon_mmm_f32_8x6_generic 23 32 11 0.00001239533186039336
armv7neon_mmm_f32_8x6_generic 7 32 17 0.000006661670995420206
armv7neon_mmm_f32_8x4_cortexa7 7 4 8 0.0000018776318580783529
armv7neon_mmm_f32_8x6_generic 8 32 5 0.0000025552685069830137
armv7neon_mmm_f32_8x4_cortexa9 9 32 9 0.000009774693013502346
generic_f32_4x4 13 128 5 0.000027995490696525238
armv7neon_mmm_f32_8x4_cortexa9 8 4 11 0.0000021722237770380927
armv7neon_mmm_f32_8x4_generic 23 128 9 0.0000419391925884448
armv7neon_mmm_f32_8x4_cortexa9 25 32 7 0.000013031732285626503
armv7neon_mmm_f32_8x4_cortexa7 25 32 12 0.000017966835523846677
armv7neon_mmm_f32_8x4_cortexa7 16 32 11 0.000009494335431932482
armv7neon_mmm_f32_8x6_cortexa7 17 128 19 0.0000783675036460944
armv7neon_mmm_f32_8x6_cortexa7 17 4 7 0.0000044328101817797105
generic_f32_4x4 3 32 8 0.000003088179719444515
generic_f32_4x4 12 4 11 0.000005408469300584859
armv7neon_mmm_f32_8x4_cortexa7 23 32 13 0.000018862031131309997
armv7neon_mmm_f32_8x4_generic 23 32 11 0.000014024445889402156
generic_f32_4x4 4 128 11 0.00001075990813733738
armv7neon_mmm_f32_8x6_generic 24 4 7 0.000004301779173383875
armv7neon_mmm_f32_8x6_cortexa9 25 128 12 0.000052850505948183045
armv7neon_mmm_f32_8x4_generic 15 32 4 0.0000034946744383909553
armv7neon_mmm_f32_8x4_generic 17 4 3 0.0000024363476636608272
armv7neon_mmm_f32_8x4_cortexa7 16 128 4 0.000009891301569266948
armv7neon_mmm_f32_8x6_cortexa9 25 4 13 0.000008105640345047328
armv7neon_mmm_f32_8x4_cortexa9 16 4 4 0.000001487225488304516
generic_f32_4x4 5 32 9 0.000007777979342543209
armv7neon_mmm_f32_8x6_cortexa7 8 4 19 0.0000029885727357209406
generic_f32_4x4 8 32 9 0.000007585061806416855
armv7neon_mmm_f32_8x4_cortexa7 17 128 13 0.000057643034032365925
armv7neon_mmm_f32_8x4_generic 7 128 4 0.000005216038661083481
armv7neon_mmm_f32_8x4_cortexa7 16 32 12 0.000009049743188132303
armv7neon_mmm_f32_8x4_cortexa9 17 4 8 0.000003682177568678712
armv7neon_mmm_f32_8x4_cortexa9 17 32 11 0.000014400917572892256
armv7neon_mmm_f32_8x4_cortexa9 15 4 12 0.0000040529055101759995
armv7neon_mmm_f32_8x4_cortexa7 15 4 11 0.000004271052070252677
armv7neon_mmm_f32_8x6_cortexa7 16 128 5 0.000013791269931666714
armv7neon_mmm_f32_8x6_generic 8 32 7 0.000004325189484240455
generic_f32_4x4 3 128 7 0.000007522472627920054
armv7neon_mmm_f32_8x6_cortexa9 9 32 6 0.000004463650285453612
generic_f32_4x4 11 32 7 0.000007948963253145964
armv7neon_mmm_f32_8x4_cortexa9 24 128 4 0.000014890215129529216
armv7neon_mmm_f32_8x6_cortexa9 16 4 19 0.000005414025318591343
armv7neon_mmm_f32_8x6_cortexa7 17 4 6 0.00000238128254631235
armv7neon_mmm_f32_8x4_cortexa9 24 4 5 0.000003916194410369112
armv7neon_mmm_f32_8x4_cortexa9 8 128 9 0.000015067666888020707
armv7neon_mmm_f32_8x6_cortexa7 23 32 11 0.000012775953874196187
generic_f32_4x4 8 32 3 0.0000030910360857362123
armv7neon_mmm_f32_8x4_cortexa7 7 32 3 0.0000021042104056045423
armv7neon_mmm_f32_8x6_cortexa9 25 32 5 0.000008813546121201035
armv7neon_mmm_f32_8x6_cortexa7 25 4 17 0.000008369215065652083
generic_f32_4x4 3 32 9 0.000004340286298208673
armv7neon_mmm_f32_8x4_generic 17 4 5 0.000003911088956924729
generic_f32_4x4 9 4 13 0.000007067880206932553
armv7neon_mmm_f32_8x4_cortexa7 8 128 13 0.00001944164572907436
armv7neon_mmm_f32_8x4_cortexa7 25 4 8 0.000004598061889383109
armv7neon_mmm_f32_8x4_generic 15 32 5 0.000006570904917754489
armv7neon_mmm_f32_8x4_cortexa9 7 4 4 0.000001199774667010064
armv7neon_mmm_f32_8x4_cortexa9 16 32 9 0.000009580069903775671
armv7neon_mmm_f32_8x6_generic 17 4 19 0.000007879961865942272
armv7neon_mmm_f32_8x4_cortexa7 23 128 13 0.00005805408197282095
armv7neon_mmm_f32_8x6_generic 25 32 13 0.000023213828889712974
armv7neon_mmm_f32_8x6_cortexa9 23 4 17 0.000006797095061554012
armv7neon_mmm_f32_8x6_cortexa9 24 32 6 0.000006232031189380453
armv7neon_mmm_f32_8x4_cortexa7 15 128 11 0.00002955574134898906
armv7neon_mmm_f32_8x6_cortexa9 24 4 11 0.000004626561901203258
armv7neon_mmm_f32_8x6_cortexa9 8 4 7 0.0000018344586562526734
armv7neon_mmm_f32_8x6_generic 15 4 18 0.000004518763751153566
armv7neon_mmm_f32_8x6_cortexa9 16 128 11 0.000027030003785723685
armv7neon_mmm_f32_8x4_cortexa7 9 4 4 0.0000015888326511808313
armv7neon_mmm_f32_8x4_cortexa7 7 4 12 0.0000025479152659232096
armv7neon_mmm_f32_8x4_cortexa7 23 128 7 0.000029563915293732792
armv7neon_mmm_f32_8x4_cortexa9 25 4 13 0.000009216194666264275
generic_f32_4x4 4 4 7 0.0000016464048874437295
armv7neon_mmm_f32_8x4_cortexa7 7 128 11 0.00001525304959159917
armv7neon_mmm_f32_8x4_cortexa9 9 4 9 0.000003956900236836743
generic_f32_4x4 4 128 8 0.000007233357504684728
armv7neon_mmm_f32_8x4_cortexa9 16 128 9 0.000029612198853914824
armv7neon_mmm_f32_8x4_generic 15 128 12 0.000028154522908512787
armv7neon_mmm_f32_8x6_cortexa9 8 128 5 0.000007227187608771384
armv7neon_mmm_f32_8x4_cortexa9 25 128 7 0.000039766026339765396
generic_f32_4x4 9 128 5 0.00002114949804637606
armv7neon_mmm_f32_8x4_generic 17 32 4 0.000004758199768605913
armv7neon_mmm_f32_8x6_cortexa9 7 4 13 0.0000027386176506007003
generic_f32_4x4 7 32 8 0.000005375358081458721
armv7neon_mmm_f32_8x6_cortexa9 8 4 18 0.000002220900812833037
armv7neon_mmm_f32_8x4_cortexa9 16 128 11 0.000029669482734966696
armv7neon_mmm_f32_8x6_cortexa7 8 32 18 0.000006161276860676508
armv7neon_mmm_f32_8x4_cortexa7 8 32 8 0.000003353396080834466
armv7neon_mmm_f32_8x6_generic 16 128 13 0.000038174416411990654
armv7neon_mmm_f32_8x6_generic 15 32 19 0.00001612698422472605
armv7neon_mmm_f32_8x4_cortexa9 9 128 3 0.000010402587362113396
armv7neon_mmm_f32_8x6_cortexa9 9 4 11 0.0000032196424297717835
armv7neon_mmm_f32_8x4_cortexa9 8 128 11 0.000015086238127499093
armv7neon_mmm_f32_8x6_generic 16 128 19 0.00005061640609152132
armv7neon_mmm_f32_8x4_generic 16 128 9 0.00002782092751958622
generic_f32_4x4 4 32 4 0.000001643238458184986
armv7neon_mmm_f32_8x6_cortexa7 17 4 11 0.0000046002558984855774
generic_f32_4x4 5 4 9 0.0000039003390490700075
generic_f32_4x4 9 128 7 0.000021241983402647673
generic_f32_4x4 7 4 13 0.000005155057897108874
armv7neon_mmm_f32_8x6_generic 17 4 17 0.000006303527455540143
armv7neon_mmm_f32_8x4_generic 16 4 13 0.000004620655887935485
armv7neon_mmm_f32_8x4_cortexa7 8 128 5 0.000010060854723538398
armv7neon_mmm_f32_8x4_cortexa9 16 32 11 0.000009653502461801779
armv7neon_mmm_f32_8x6_generic 23 128 5 0.00001973212006224306
armv7neon_mmm_f32_8x4_generic 8 4 7 0.000001657225281211141
armv7neon_mmm_f32_8x6_generic 16 4 12 0.0000027740993216529004
armv7neon_mmm_f32_8x6_cortexa9 15 128 18 0.00004029185998372536
armv7neon_mmm_f32_8x4_cortexa9 25 32 4 0.000006456592320044242
armv7neon_mmm_f32_8x6_cortexa7 25 128 12 0.000052118173447989496
armv7neon_mmm_f32_8x4_cortexa7 25 32 9 0.000018537854346104505
armv7neon_mmm_f32_8x6_cortexa7 16 4 7 0.000003147284440858119
armv7neon_mmm_f32_8x4_generic 17 128 5 0.000028048493049013062
armv7neon_mmm_f32_8x6_cortexa7 23 4 18 0.000006335447404352516
armv7neon_mmm_f32_8x6_cortexa9 24 4 19 0.000007813810057423332
armv7neon_mmm_f32_8x4_cortexa7 17 128 8 0.000028915520714406696
armv7neon_mmm_f32_8x4_cortexa7 17 128 7 0.000029363768223094683
generic_f32_4x4 3 32 11 0.0000043748460947431475
armv7neon_mmm_f32_8x4_cortexa9 9 128 5 0.0000200735894165016
armv7neon_mmm_f32_8x4_cortexa7 17 4 3 0.000002458111098809617
generic_f32_4x4 5 128 11 0.0000211987993849004
generic_f32_4x4 12 32 7 0.00000782487986470221
armv7neon_mmm_f32_8x4_generic 7 128 11 0.000014659497491826627
armv7neon_mmm_f32_8x6_cortexa7 17 128 11 0.00003969667780371537
armv7neon_mmm_f32_8x6_generic 7 4 6 0.0000012612776856940052
armv7neon_mmm_f32_8x6_cortexa9 7 4 6 0.0000012711169589040185
armv7neon_mmm_f32_8x6_cortexa7 8 4 13 0.0000024259808837385263
armv7neon_mmm_f32_8x4_cortexa7 24 128 9 0.000043285670969312205
armv7neon_mmm_f32_8x6_generic 15 128 19 0.00005126058311511034
generic_f32_4x4 13 128 11 0.00004170340313011054
armv7neon_mmm_f32_8x6_cortexa9 8 128 12 0.000013510136481590433
armv7neon_mmm_f32_8x6_generic 23 32 17 0.00001815155326925384
armv7neon_mmm_f32_8x4_generic 24 32 8 0.000008758373656325976
armv7neon_mmm_f32_8x4_generic 15 32 13 0.000012566353080909321
armv7neon_mmm_f32_8x4_generic 15 4 9 0.000004138286673960469
armv7neon_mmm_f32_8x4_cortexa7 16 32 4 0.000003365053985432853
armv7neon_mmm_f32_8x4_cortexa7 9 128 3 0.0000101978289798725
generic_f32_4x4 11 32 8 0.000007681371193157391
generic_f32_4x4 7 128 9 0.00002127278439974851
armv7neon_mmm_f32_8x4_cortexa7 24 4 13 0.000006759425188306811
armv7neon_mmm_f32_8x4_cortexa7 24 32 9 0.000013896132257150766
armv7neon_mmm_f32_8x4_cortexa9 25 32 9 0.000018829874971593756
armv7neon_mmm_f32_8x4_cortexa7 9 32 4 0.000003474275688786252
armv7neon_mmm_f32_8x6_cortexa7 8 32 7 0.0000044833206937891445
generic_f32_4x4 3 32 7 0.0000030714723992174605
armv7neon_mmm_f32_8x6_cortexa9 17 4 11 0.000004585922035285582
armv7neon_mmm_f32_8x4_cortexa9 24 128 5 0.000029781338716311416
armv7neon_mmm_f32_8x6_generic 15 32 18 0.000012214298327316169
armv7neon_mmm_f32_8x6_generic 25 32 6 0.000007957229153899586
armv7neon_mmm_f32_8x6_cortexa9 9 32 18 0.000012307833924250695
armv7neon_mmm_f32_8x6_generic 8 32 13 0.000006159219915111089
armv7neon_mmm_f32_8x4_cortexa7 8 128 3 0.000005401956819775254
armv7neon_mmm_f32_8x4_generic 15 32 9 0.000009577293347035024
armv7neon_mmm_f32_8x4_cortexa9 25 128 13 0.00007830393224178603
generic_f32_4x4 4 4 13 0.0000026249296920626683
armv7neon_mmm_f32_8x6_cortexa9 8 32 6 0.000002404882860716393
armv7neon_mmm_f32_8x6_cortexa9 9 32 5 0.0000045921033389599396
armv7neon_mmm_f32_8x6_generic 17 128 11 0.000038492804993535863
armv7neon_mmm_f32_8x4_generic 17 32 9 0.000013624172849176604
armv7neon_mmm_f32_8x4_cortexa9 24 4 9 0.000005393334776799241
armv7neon_mmm_f32_8x4_cortexa9 23 32 8 0.000009723809855547939
armv7neon_mmm_f32_8x4_cortexa9 25 4 4 0.0000025900920077984026
armv7neon_mmm_f32_8x6_generic 23 4 5 0.0000027608446978317408
armv7neon_mmm_f32_8x4_generic 8 4 12 0.0000019060496583230645
armv7neon_mmm_f32_8x4_cortexa7 15 32 8 0.000006678781571694655
generic_f32_4x4 4 4 9 0.0000021081574199754717
armv7neon_mmm_f32_8x4_cortexa7 17 128 9 0.00004346458158147509
armv7neon_mmm_f32_8x4_generic 25 4 8 0.000004544403472760322
armv7neon_mmm_f32_8x4_generic 24 128 4 0.000013981279265161887
armv7neon_mmm_f32_8x6_cortexa7 25 4 12 0.000005353769509302435
armv7neon_mmm_f32_8x4_cortexa9 15 4 8 0.000002871100263594495
armv7neon_mmm_f32_8x6_cortexa7 9 4 12 0.0000030500030063725167
armv7neon_mmm_f32_8x4_cortexa9 15 128 7 0.000020348668200781455
generic_f32_4x4 4 4 3 0.0000011380263710773134
armv7neon_mmm_f32_8x4_generic 17 32 7 0.000009461004385359903
armv7neon_mmm_f32_8x6_cortexa7 16 4 11 0.000003292952568894936
armv7neon_mmm_f32_8x4_generic 9 128 7 0.0000189264627670604
armv7neon_mmm_f32_8x4_cortexa7 9 32 12 0.000009404582356065284
armv7neon_mmm_f32_8x6_cortexa7 24 4 11 0.000004645745440552646
armv7neon_mmm_f32_8x4_generic 7 4 8 0.0000018628381186813514
generic_f32_4x4 13 32 12 0.000014451033360231452
armv7neon_mmm_f32_8x6_generic 7 128 11 0.00001335338354211472
armv7neon_mmm_f32_8x4_cortexa9 24 32 5 0.000009736881086291142
armv7neon_mmm_f32_8x4_generic 9 32 8 0.000006244209351860461
armv7neon_mmm_f32_8x4_cortexa7 23 32 7 0.000009963814618687657
armv7neon_mmm_f32_8x6_cortexa7 24 128 7 0.000039514548121680776
armv7neon_mmm_f32_8x4_cortexa7 17 32 5 0.000009649055437118146
armv7neon_mmm_f32_8x6_generic 7 128 7 0.000013256390141337729
armv7neon_mmm_f32_8x6_cortexa7 9 128 17 0.00003956707654525446
armv7neon_mmm_f32_8x4_generic 9 128 8 0.000018708339822835962
generic_f32_4x4 9 4 7 0.00000399084452417743
armv7neon_mmm_f32_8x4_generic 15 128 8 0.000018936209183566976
armv7neon_mmm_f32_8x6_cortexa7 9 32 19 0.00001616084997042782
armv7neon_mmm_f32_8x6_generic 16 128 5 0.00001333369244237612
armv7neon_mmm_f32_8x4_cortexa7 9 32 3 0.0000036715654135840626
armv7neon_mmm_f32_8x4_cortexa9 23 32 5 0.000009965038150728467
armv7neon_mmm_f32_8x6_cortexa7 9 128 7 0.000026494406670274787
armv7neon_mmm_f32_8x4_cortexa9 24 128 13 0.00005851957886711075
armv7neon_mmm_f32_8x6_cortexa7 9 4 18 0.000004273474311217914
generic_f32_4x4 5 32 12 0.000007635355402928665
armv7neon_mmm_f32_8x4_cortexa9 7 128 7 0.000010528249080204253
armv7neon_mmm_f32_8x6_cortexa9 24 32 19 0.00002386884326098149
armv7neon_mmm_f32_8x6_generic 8 4 7 0.0000017826372979170567
armv7neon_mmm_f32_8x4_generic 7 4 5 0.000001794625708774814
armv7neon_mmm_f32_8x6_cortexa7 9 4 6 0.000001801617764943881
armv7neon_mmm_f32_8x4_generic 25 32 13 0.00002352372957802219
armv7neon_mmm_f32_8x4_generic 24 128 13 0.00005496909042425587
armv7neon_mmm_f32_8x6_cortexa9 23 32 17 0.000018875847839557392
generic_f32_4x4 12 32 4 0.000003926824640983302
armv7neon_mmm_f32_8x6_cortexa9 7 128 18 0.000020631769324589798
armv7neon_mmm_f32_8x6_cortexa9 8 128 19 0.000026726033464302463
armv7neon_mmm_f32_8x6_cortexa7 7 4 5 0.0000012346138819052426
armv7neon_mmm_f32_8x4_generic 17 128 7 0.00002816390176070489
armv7neon_mmm_f32_8x6_generic 9 32 19 0.000015684300494904117
armv7neon_mmm_f32_8x4_cortexa7 9 128 4 0.000010001875449763685
armv7neon_mmm_f32_8x4_generic 23 32 13 0.000018251850356188044
generic_f32_4x4 13 4 3 0.000003021647059953821
armv7neon_mmm_f32_8x6_cortexa9 25 128 7 0.000053220906812986594
armv7neon_mmm_f32_8x4_cortexa9 16 128 8 0.00001967608213206112
armv7neon_mmm_f32_8x6_generic 24 32 5 0.000006620165371496042
armv7neon_mmm_f32_8x4_cortexa7 9 4 7 0.0000028674611086712135
armv7neon_mmm_f32_8x4_cortexa9 17 4 11 0.000005653151614630126
generic_f32_4x4 13 4 4 0.000002594385628155042
armv7neon_mmm_f32_8x6_generic 25 128 5 0.000026094659734105824
armv7neon_mmm_f32_8x4_generic 17 32 3 0.000005161144602046728
armv7neon_mmm_f32_8x4_cortexa9 24 32 11 0.000014220571969428708
generic_f32_4x4 4 128 3 0.000004015357652724343
armv7neon_mmm_f32_8x4_cortexa9 8 128 5 0.00001027176965651363
armv7neon_mmm_f32_8x6_cortexa7 15 4 17 0.000004874430261781061
armv7neon_mmm_f32_8x4_cortexa7 15 4 12 0.0000040169005856724945
armv7neon_mmm_f32_8x4_cortexa7 8 32 12 0.00000477050855780904
armv7neon_mmm_f32_8x6_cortexa7 9 4 7 0.0000031516329281183407
armv7neon_mmm_f32_8x4_generic 7 32 11 0.000005308351521432454
armv7neon_mmm_f32_8x4_cortexa7 25 32 5 0.000012670365676368953
generic_f32_4x4 5 128 5 0.000014294465011666321
armv7neon_mmm_f32_8x6_cortexa7 7 32 17 0.000006818696484359277
armv7neon_mmm_f32_8x4_cortexa9 24 32 7 0.00000986925847989351
generic_f32_4x4 4 128 13 0.000014106152963483687
armv7neon_mmm_f32_8x4_cortexa9 16 128 3 0.000010512285712675924
generic_f32_4x4 8 128 4 0.000007240602279573043
armv7neon_mmm_f32_8x4_cortexa9 7 128 9 0.00001548686209510202
armv7neon_mmm_f32_8x4_cortexa9 7 32 3 0.000002138015694680355
armv7neon_mmm_f32_8x4_generic 24 32 13 0.000017536918553280824
armv7neon_mmm_f32_8x6_cortexa9 23 4 12 0.0000044455569065126315
armv7neon_mmm_f32_8x4_cortexa7 8 4 13 0.0000026000896469710663
armv7neon_mmm_f32_8x4_cortexa7 25 32 8 0.000012179917551949525
generic_f32_4x4 5 32 5 0.000005390504735926875
generic_f32_4x4 11 128 13 0.00004175553018581371
armv7neon_mmm_f32_8x4_cortexa7 7 32 4 0.0000021521595449855865
armv7neon_mmm_f32_8x4_cortexa9 8 4 9 0.000002148291671877107
armv7neon_mmm_f32_8x6_cortexa7 24 4 18 0.000005654885858199984
armv7neon_mmm_f32_8x4_generic 25 128 9 0.00005533756635099798
armv7neon_mmm_f32_8x4_cortexa9 9 128 9 0.00002978120797293901
armv7neon_mmm_f32_8x6_cortexa7 8 32 12 0.000004281164608278993
armv7neon_mmm_f32_8x4_generic 9 4 13 0.000004935869563311462
armv7neon_mmm_f32_8x4_cortexa7 23 128 3 0.000015184873420309865
generic_f32_4x4 12 4 12 0.000004912969841843077
armv7neon_mmm_f32_8x6_generic 16 4 7 0.0000030460066806302122
armv7neon_mmm_f32_8x4_cortexa7 9 4 13 0.000004992355989646887
armv7neon_mmm_f32_8x6_cortexa9 15 4 12 0.0000032832021366886516
armv7neon_mmm_f32_8x6_cortexa9 17 4 7 0.000004416231510276644
armv7neon_mmm_f32_8x4_generic 9 128 5 0.000018874961280340985
armv7neon_mmm_f32_8x4_cortexa7 25 4 9 0.000007146809416343016
armv7neon_mmm_f32_8x6_cortexa9 23 4 5 0.000002791403862004092
generic_f32_4x4 3 128 11 0.000011052312539972672
armv7neon_mmm_f32_8x4_cortexa7 16 32 13 0.000012274313788252517
generic_f32_4x4 5 4 8 0.000002691374011509211
armv7neon_mmm_f32_8x6_generic 8 4 12 0.0000016391318699699268
generic_f32_4x4 8 4 12 0.0000034590196765442797
armv7neon_mmm_f32_8x6_generic 17 128 18 0.00005685255235859907
armv7neon_mmm_f32_8x6_cortexa9 17 128 6 0.00002017521356859541
armv7neon_mmm_f32_8x6_generic 25 128 6 0.00002553797390337986
armv7neon_mmm_f32_8x6_cortexa9 25 128 5 0.000027210481604244293
armv7neon_mmm_f32_8x4_cortexa7 15 128 7 0.000019945561157727263
armv7neon_mmm_f32_8x4_generic 7 4 11 0.000002550958711536068
generic_f32_4x4 13 4 8 0.000004699429428545618
armv7neon_mmm_f32_8x4_cortexa9 9 4 5 0.0000028517696924614755
armv7neon_mmm_f32_8x4_cortexa9 8 4 3 0.0000012050980961045702
armv7neon_mmm_f32_8x6_cortexa9 24 128 17 0.000059842842681725297
armv7neon_mmm_f32_8x6_cortexa9 25 128 6 0.000026691322740044253
generic_f32_4x4 9 32 3 0.000004333031052520466
armv7neon_mmm_f32_8x6_cortexa7 9 32 11 0.000008502744568312225
armv7neon_mmm_f32_8x6_cortexa9 16 32 11 0.000008643328393303817
armv7neon_mmm_f32_8x6_cortexa9 16 4 11 0.00000327884229270897
armv7neon_mmm_f32_8x4_generic 8 128 5 0.000009661930895932827
armv7neon_mmm_f32_8x4_cortexa9 15 4 5 0.0000029989007242676373
armv7neon_mmm_f32_8x4_cortexa7 23 32 11 0.000014477721812785248
armv7neon_mmm_f32_8x6_cortexa7 9 4 13 0.000004411946245984795
armv7neon_mmm_f32_8x6_generic 7 128 12 0.000013345094425498999
armv7neon_mmm_f32_8x4_cortexa9 24 128 11 0.00004426398857587651
armv7neon_mmm_f32_8x6_generic 25 128 7 0.00005086677641643256
armv7neon_mmm_f32_8x6_generic 16 32 17 0.00001197546406643872
generic_f32_4x4 4 32 13 0.000005196854104063007
armv7neon_mmm_f32_8x4_cortexa7 16 4 7 0.000002852179426317928
armv7neon_mmm_f32_8x6_cortexa7 23 4 7 0.000004598943528457281
armv7neon_mmm_f32_8x6_generic 17 32 6 0.000006120980761955415
armv7neon_mmm_f32_8x6_cortexa7 17 32 17 0.00001829474178461942
generic_f32_4x4 7 4 12 0.000003918843035175344
armv7neon_mmm_f32_8x6_generic 9 4 7 0.0000030530125579888694
armv7neon_mmm_f32_8x6_cortexa9 23 4 13 0.000006537462226861253
generic_f32_4x4 12 32 11 0.000011229611140019307
armv7neon_mmm_f32_8x4_cortexa7 7 32 9 0.000005392896316953727
armv7neon_mmm_f32_8x4_cortexa9 15 4 7 0.00000310030600994376
armv7neon_mmm_f32_8x4_cortexa9 23 32 3 0.000005486033134258231
armv7neon_mmm_f32_8x6_cortexa7 25 4 11 0.000005954018792233075
armv7neon_mmm_f32_8x4_generic 23 128 8 0.000027951206051686114
armv7neon_mmm_f32_8x6_generic 23 4 12 0.000004342351795004267
armv7neon_mmm_f32_8x4_cortexa9 8 32 5 0.0000035943022440195
armv7neon_mmm_f32_8x6_cortexa9 7 4 18 0.0000028075381670446466
armv7neon_mmm_f32_8x4_generic 8 32 13 0.0000061830339053399435
armv7neon_mmm_f32_8x6_generic 15 32 7 0.000008321528944284549
armv7neon_mmm_f32_8x6_cortexa7 16 128 6 0.00001332317233161335
armv7neon_mmm_f32_8x4_generic 9 4 12 0.000003689645307663438
armv7neon_mmm_f32_8x4_cortexa9 16 32 7 0.000006751619359417928
generic_f32_4x4 7 4 4 0.0000016418721244643219
armv7neon_mmm_f32_8x4_generic 7 128 13 0.000019287387814897854
armv7neon_mmm_f32_8x6_cortexa9 23 4 11 0.000004831555332090134
armv7neon_mmm_f32_8x4_cortexa9 8 128 8 0.00001009639921913338
armv7neon_mmm_f32_8x6_generic 23 32 5 0.000006582337877689147
armv7neon_mmm_f32_8x4_generic 25 128 3 0.00001920626106575073
armv7neon_mmm_f32_8x4_generic 7 32 8 0.000003711210711678028
armv7neon_mmm_f32_8x6_cortexa7 16 128 19 0.00005218314273770285
armv7neon_mmm_f32_8x6_cortexa9 7 128 12 0.000013936034942170719
armv7neon_mmm_f32_8x6_generic 17 32 5 0.000006487710283772798
armv7neon_mmm_f32_8x4_generic 8 128 7 0.000009698770362126403
armv7neon_mmm_f32_8x4_generic 15 4 7 0.0000030411777158900867
armv7neon_mmm_f32_8x6_generic 25 128 19 0.00010155326542007438
armv7neon_mmm_f32_8x6_cortexa7 9 128 5 0.000013650595324888383
armv7neon_mmm_f32_8x4_cortexa9 9 32 11 0.00000982080901853953
generic_f32_4x4 9 32 11 0.00001141647780993558
generic_f32_4x4 8 32 11 0.000007662846153151327
armv7neon_mmm_f32_8x6_generic 8 128 17 0.000019396378758926843
armv7neon_mmm_f32_8x6_cortexa9 8 4 17 0.00000248907607573393
generic_f32_4x4 13 128 12 0.000041173556961397094
armv7neon_mmm_f32_8x4_generic 25 32 3 0.0000067405543706379265
armv7neon_mmm_f32_8x6_cortexa9 16 4 13 0.000004284450398155638
armv7neon_mmm_f32_8x6_generic 7 32 13 0.000006564286527582003
generic_f32_4x4 9 4 12 0.000005239644799087451
generic_f32_4x4 3 128 12 0.00001105053431182824
armv7neon_mmm_f32_8x4_cortexa9 16 32 12 0.000009204841041049037
armv7neon_mmm_f32_8x4_cortexa9 8 4 7 0.0000016950881959591487
armv7neon_mmm_f32_8x4_cortexa9 8 4 8 0.00000148340332010083
armv7neon_mmm_f32_8x4_cortexa9 24 32 4 0.000004874669124633379
armv7neon_mmm_f32_8x4_cortexa9 16 128 7 0.00002009832072993214
armv7neon_mmm_f32_8x4_generic 16 4 5 0.000002739180418018979
armv7neon_mmm_f32_8x4_cortexa9 8 4 12 0.000001953874992361998
armv7neon_mmm_f32_8x4_generic 15 4 4 0.0000016638026379811712
armv7neon_mmm_f32_8x4_cortexa9 25 128 8 0.000039087913140379676
armv7neon_mmm_f32_8x6_cortexa9 17 4 12 0.000004212564422068874
armv7neon_mmm_f32_8x6_cortexa9 8 32 5 0.00000263061064210809
armv7neon_mmm_f32_8x4_generic 8 4 5 0.000001623245576042727
armv7neon_mmm_f32_8x6_cortexa7 15 128 6 0.000013618693064061564
armv7neon_mmm_f32_8x6_cortexa7 7 4 18 0.000002804843066747126
generic_f32_4x4 3 4 11 0.0000023913102926543824
armv7neon_mmm_f32_8x6_generic 16 4 6 0.000001638807649221633
armv7neon_mmm_f32_8x6_generic 8 128 6 0.0000067051548997103826
armv7neon_mmm_f32_8x6_cortexa9 16 4 6 0.0000016568160988445508
armv7neon_mmm_f32_8x6_cortexa9 16 128 7 0.000026851544464649677
armv7neon_mmm_f32_8x4_cortexa9 8 128 12 0.000014872272678272856
armv7neon_mmm_f32_8x6_generic 24 128 19 0.00007580588220559739
generic_f32_4x4 3 128 5 0.0000074971724012246544
generic_f32_4x4 3 128 8 0.000007539035525450043
armv7neon_mmm_f32_8x6_cortexa9 24 128 7 0.00004003350952160301
armv7neon_mmm_f32_8x4_cortexa7 23 32 9 0.000014356067971382847
armv7neon_mmm_f32_8x6_cortexa9 15 32 18 0.000012709917183710011
generic_f32_4x4 9 128 3 0.000011011983185480526
armv7neon_mmm_f32_8x4_generic 15 128 7 0.000019154850070723174
generic_f32_4x4 7 32 12 0.000007829205768353918
generic_f32_4x4 12 4 3 0.000002428328614613921
armv7neon_mmm_f32_8x4_cortexa7 8 128 4 0.000005191807529366239
armv7neon_mmm_f32_8x4_cortexa9 17 128 3 0.00001540571905602067
armv7neon_mmm_f32_8x4_cortexa9 17 128 9 0.000044358855138628255
armv7neon_mmm_f32_8x4_generic 8 32 4 0.0000018712794095957982
armv7neon_mmm_f32_8x6_cortexa9 9 128 7 0.000026855539948208353
armv7neon_mmm_f32_8x6_cortexa7 7 128 13 0.000020286298173428065
armv7neon_mmm_f32_8x4_cortexa7 7 32 7 0.0000037916453247576294
armv7neon_mmm_f32_8x4_cortexa7 16 128 11 0.000029071525948432538
armv7neon_mmm_f32_8x4_generic 8 128 8 0.000009479495707619602
armv7neon_mmm_f32_8x4_cortexa7 9 32 13 0.000012575930597830614
armv7neon_mmm_f32_8x6_cortexa7 7 4 7 0.0000019563323222872665
armv7neon_mmm_f32_8x4_cortexa7 15 128 3 0.000010282742767020747
armv7neon_mmm_f32_8x4_generic 17 128 9 0.00004166868490626211
armv7neon_mmm_f32_8x6_cortexa9 15 32 11 0.000008848994549844927
armv7neon_mmm_f32_8x4_cortexa7 16 128 7 0.000019694781274526964
armv7neon_mmm_f32_8x6_cortexa9 7 128 17 0.00002067047797610865
armv7neon_mmm_f32_8x4_cortexa7 16 4 8 0.000002421339765366324
armv7neon_mmm_f32_8x6_cortexa9 25 4 19 0.000010458910160208884
armv7neon_mmm_f32_8x4_cortexa7 17 4 12 0.000005174031688170145
armv7neon_mmm_f32_8x4_cortexa7 8 128 7 0.000010096523777664552
armv7neon_mmm_f32_8x4_cortexa7 9 32 11 0.000009664173576551376
armv7neon_mmm_f32_8x4_cortexa9 15 32 13 0.000013172771066560177
armv7neon_mmm_f32_8x6_cortexa7 15 32 6 0.0000045760506363113526
armv7neon_mmm_f32_8x4_generic 16 128 7 0.000018904285841254675
armv7neon_mmm_f32_8x6_cortexa9 24 32 7 0.000012425961839460377
armv7neon_mmm_f32_8x4_cortexa7 8 128 8 0.00000988276098193371
armv7neon_mmm_f32_8x4_cortexa7 25 32 13 0.000024332756642022888
armv7neon_mmm_f32_8x6_cortexa9 8 4 12 0.000001657247635058824
armv7neon_mmm_f32_8x4_cortexa7 8 4 9 0.000002131892294337394
generic_f32_4x4 8 4 8 0.000002499295427359815
armv7neon_mmm_f32_8x6_cortexa9 16 128 5 0.000013927438070206447
generic_f32_4x4 3 4 7 0.0000017546173861422715
armv7neon_mmm_f32_8x4_cortexa7 23 128 12 0.000043445426131070976
armv7neon_mmm_f32_8x4_generic 8 128 12 0.00001395887153959496
armv7neon_mmm_f32_8x4_cortexa9 7 4 11 0.0000025905119707065483
generic_f32_4x4 12 128 4 0.00001060459652081298
armv7neon_mmm_f32_8x6_generic 17 128 13 0.00005710733590612565
armv7neon_mmm_f32_8x4_cortexa7 7 4 11 0.000002572252868831556
armv7neon_mmm_f32_8x6_generic 15 128 11 0.000026019339889436572
generic_f32_4x4 9 4 5 0.000003906378066863749
armv7neon_mmm_f32_8x6_cortexa9 15 4 17 0.00000485956993190203
armv7neon_mmm_f32_8x6_generic 9 32 17 0.000012038137584352969
armv7neon_mmm_f32_8x4_cortexa9 8 32 11 0.000005081279026778461
armv7neon_mmm_f32_8x4_cortexa7 25 128 11 0.000057856501014562726
armv7neon_mmm_f32_8x6_cortexa9 25 32 19 0.00003186559880573022
armv7neon_mmm_f32_8x6_cortexa9 8 32 12 0.000004319717163614961
armv7neon_mmm_f32_8x6_cortexa9 8 32 19 0.000008329624592686573
armv7neon_mmm_f32_8x6_cortexa9 9 32 13 0.000012409078932821099
armv7neon_mmm_f32_8x6_cortexa9 17 128 12 0.00003982138220281054
armv7neon_mmm_f32_8x4_cortexa9 8 32 4 0.0000019683045582115826
armv7neon_mmm_f32_8x6_cortexa9 17 128 13 0.000059714513355137016
armv7neon_mmm_f32_8x6_cortexa9 25 32 13 0.00002415767938328582
armv7neon_mmm_f32_8x4_cortexa7 7 128 8 0.000010344849965132014
armv7neon_mmm_f32_8x6_cortexa9 8 128 11 0.000013785904590501993
armv7neon_mmm_f32_8x4_cortexa7 24 32 13 0.000018144082756384124
armv7neon_mmm_f32_8x6_generic 15 4 13 0.000004567420998191548
armv7neon_mmm_f32_8x4_generic 9 32 13 0.000012173282994755266
generic_f32_4x4 4 4 4 0.0000009923563076021443
armv7neon_mmm_f32_8x4_cortexa9 24 128 7 0.000029905631027460932
armv7neon_mmm_f32_8x4_cortexa7 9 128 8 0.0000195010462183809
armv7neon_mmm_f32_8x6_cortexa9 9 4 5 0.000001915573042137183
generic_f32_4x4 7 128 12 0.000021189012747949428
armv7neon_mmm_f32_8x4_generic 23 32 9 0.000013896926880805095
armv7neon_mmm_f32_8x4_generic 8 128 13 0.000018647991490366943
armv7neon_mmm_f32_8x4_cortexa7 15 32 11 0.00000998072291720506
generic_f32_4x4 3 128 4 0.0000040235161328678346
armv7neon_mmm_f32_8x4_generic 9 128 3 0.000009799742651068634
armv7neon_mmm_f32_8x6_generic 25 4 11 0.000005815747914439624
armv7neon_mmm_f32_8x4_cortexa9 9 4 12 0.0000037593832702220446
armv7neon_mmm_f32_8x4_generic 8 32 7 0.0000034630101008340834
armv7neon_mmm_f32_8x4_cortexa9 24 4 7 0.000004049582371373125
armv7neon_mmm_f32_8x6_generic 9 32 12 0.000008052850558834548
armv7neon_mmm_f32_8x4_cortexa7 25 4 3 0.000003139298464286286
armv7neon_mmm_f32_8x4_generic 15 4 3 0.0000018347526374757481
armv7neon_mmm_f32_8x6_cortexa9 25 4 18 0.000007705086964181997
generic_f32_4x4 5 128 13 0.00002796513369706675
armv7neon_mmm_f32_8x4_cortexa9 23 4 12 0.0000055210994766811515
generic_f32_4x4 7 128 4 0.000007400928983118127
generic_f32_4x4 12 128 8 0.000020684245069264504
armv7neon_mmm_f32_8x4_generic 9 4 7 0.0000028402738019775784
generic_f32_4x4 13 4 9 0.000007094831203377165
armv7neon_mmm_f32_8x4_cortexa9 17 4 4 0.000002109614550782161
generic_f32_4x4 9 32 7 0.00000788121798038347
armv7neon_mmm_f32_8x4_cortexa9 17 32 9 0.000014308883462633102
armv7neon_mmm_f32_8x4_generic 16 128 12 0.00002742841574783841
armv7neon_mmm_f32_8x6_generic 9 32 13 0.000011930679710766873
armv7neon_mmm_f32_8x4_cortexa7 24 128 7 0.000029306857678894273
armv7neon_mmm_f32_8x4_cortexa9 15 4 11 0.0000043086270265983256
generic_f32_4x4 11 4 11 0.000005690020384668188
generic_f32_4x4 11 4 5 0.000003971103754939711
armv7neon_mmm_f32_8x4_generic 23 128 7 0.000028363425957340315
armv7neon_mmm_f32_8x4_cortexa7 25 32 7 0.000012820527346969179
armv7neon_mmm_f32_8x6_generic 17 4 13 0.0000061204912028192775
armv7neon_mmm_f32_8x4_cortexa7 23 4 13 0.000007468189054284558
generic_f32_4x4 4 32 7 0.0000029448449923973884
armv7neon_mmm_f32_8x4_cortexa9 25 32 12 0.000018265077104449187
armv7neon_mmm_f32_8x6_cortexa9 17 32 5 0.000006709520484710961
armv7neon_mmm_f32_8x4_cortexa9 23 128 7 0.00003016688393762355
armv7neon_mmm_f32_8x4_cortexa9 16 128 13 0.000039189527740613905
armv7neon_mmm_f32_8x4_cortexa7 16 128 5 0.000019619696312768993
armv7neon_mmm_f32_8x4_generic 17 128 13 0.00005528191915660002
armv7neon_mmm_f32_8x6_generic 17 128 19 0.00007601141239533076
armv7neon_mmm_f32_8x4_generic 8 32 5 0.0000034293993184402013
generic_f32_4x4 9 32 4 0.000004039130783975973
armv7neon_mmm_f32_8x4_cortexa9 23 32 9 0.000014579239774429512
armv7neon_mmm_f32_8x6_cortexa7 8 128 11 0.000013612858256767989
armv7neon_mmm_f32_8x6_cortexa9 15 32 12 0.000008658441674303882
armv7neon_mmm_f32_8x4_generic 9 128 13 0.00003711806630993475
armv7neon_mmm_f32_8x6_generic 17 4 11 0.000004486220214719747
armv7neon_mmm_f32_8x6_generic 24 32 17 0.00001769011948694583
armv7neon_mmm_f32_8x6_generic 9 4 18 0.00000416889219962654
armv7neon_mmm_f32_8x6_cortexa7 23 32 19 0.000024297125708617196
armv7neon_mmm_f32_8x6_cortexa9 9 32 19 0.000016308328501596863
generic_f32_4x4 5 32 13 0.000010150287138446791
generic_f32_4x4 11 128 8 0.000021041592663498075
armv7neon_mmm_f32_8x4_generic 17 32 8 0.000009030738946480375
armv7neon_mmm_f32_8x4_generic 25 4 5 0.0000050194271140758065
armv7neon_mmm_f32_8x4_generic 8 128 11 0.000014188042949903446
generic_f32_4x4 13 32 11 0.000014981513303829195
armv7neon_mmm_f32_8x4_cortexa9 8 128 4 0.000005303802744401548
armv7neon_mmm_f32_8x6_cortexa7 17 4 5 0.0000027556390651666856
armv7neon_mmm_f32_8x6_generic 25 128 12 0.000050521454291978235
armv7neon_mmm_f32_8x6_cortexa9 16 4 7 0.0000031344346513242154
armv7neon_mmm_f32_8x6_cortexa7 16 32 5 0.000004750660751815082
armv7neon_mmm_f32_8x4_cortexa9 17 128 8 0.000029514324154126036
armv7neon_mmm_f32_8x4_cortexa7 8 4 8 0.000001465122696586523
armv7neon_mmm_f32_8x6_cortexa9 8 128 17 0.00002029296708644369
armv7neon_mmm_f32_8x4_generic 25 32 12 0.00001736483445253461
generic_f32_4x4 3 32 5 0.0000030443235090952762
generic_f32_4x4 7 128 11 0.000021331736800406107
armv7neon_mmm_f32_8x4_generic 24 128 3 0.000014605163771206865
armv7neon_mmm_f32_8x4_cortexa9 15 32 8 0.000006785141126690698
armv7neon_mmm_f32_8x6_cortexa7 23 4 11 0.000004845198031614994
armv7neon_mmm_f32_8x6_cortexa7 8 4 18 0.0000022246057256413265
armv7neon_mmm_f32_8x4_generic 23 32 12 0.000013609481500575639
armv7neon_mmm_f32_8x6_generic 7 128 17 0.00001990485288299381
armv7neon_mmm_f32_8x6_cortexa7 25 128 19 0.00010434796032564001
armv7neon_mmm_f32_8x6_generic 15 4 7 0.0000032121848403447733
armv7neon_mmm_f32_8x4_generic 25 32 9 0.00001793284879597201
armv7neon_mmm_f32_8x4_cortexa7 25 4 5 0.0000050795046947410985
armv7neon_mmm_f32_8x4_cortexa7 17 32 12 0.000013704665363244266
generic_f32_4x4 5 32 3 0.0000030334097589492826
armv7neon_mmm_f32_8x4_cortexa9 17 128 5 0.000030021544003020483
armv7neon_mmm_f32_8x4_cortexa7 24 4 9 0.000005360352425382144
armv7neon_mmm_f32_8x6_cortexa7 9 32 18 0.000012173297962511017
armv7neon_mmm_f32_8x4_cortexa7 15 4 13 0.000005358787865343933
armv7neon_mmm_f32_8x6_cortexa9 17 32 11 0.000012635537581820717
armv7neon_mmm_f32_8x4_generic 9 32 12 0.000009105324476823614
armv7neon_mmm_f32_8x4_cortexa7 17 128 11 0.00004382978043849
armv7neon_mmm_f32_8x6_generic 17 4 6 0.0000023082534746308964
armv7neon_mmm_f32_8x6_cortexa7 8 128 17 0.000020059146620211786
armv7neon_mmm_f32_8x4_cortexa7 15 4 7 0.000003068689058039639
armv7neon_mmm_f32_8x4_cortexa9 15 4 13 0.000005392948403824124
armv7neon_mmm_f32_8x4_cortexa7 15 4 8 0.0000028443546860500008
armv7neon_mmm_f32_8x4_cortexa9 24 4 3 0.000002583780394094933
generic_f32_4x4 13 4 11 0.0000072216668884748755
armv7neon_mmm_f32_8x6_cortexa7 25 4 13 0.00000811613473861232
armv7neon_mmm_f32_8x6_generic 7 32 18 0.000006623134760255213
armv7neon_mmm_f32_8x4_cortexa7 16 4 4 0.0000014656631347323804
armv7neon_mmm_f32_8x6_cortexa9 24 128 12 0.00003980745750816077
armv7neon_mmm_f32_8x6_cortexa9 17 32 6 0.00000638262644761529
generic_f32_4x4 8 128 12 0.000020674001160178604
armv7neon_mmm_f32_8x6_cortexa9 9 128 5 0.000013783853077105059
armv7neon_mmm_f32_8x4_cortexa7 23 4 11 0.00000590971284549428
armv7neon_mmm_f32_8x6_generic 17 128 7 0.00003951740163826464
armv7neon_mmm_f32_8x6_cortexa7 23 128 12 0.00003972987011474437
generic_f32_4x4 11 32 9 0.000011454015846057979
armv7neon_mmm_f32_8x4_generic 7 128 8 0.000009944618306958537
armv7neon_mmm_f32_8x4_cortexa7 9 128 12 0.00002904338895016975
generic_f32_4x4 7 32 3 0.000003062772103932835
armv7neon_mmm_f32_8x6_generic 7 32 19 0.000008594590275392258
armv7neon_mmm_f32_8x4_cortexa9 24 4 4 0.0000019652194228578485
armv7neon_mmm_f32_8x6_generic 25 32 17 0.00002354000930758944
armv7neon_mmm_f32_8x4_cortexa7 24 4 12 0.000004776517171804484
generic_f32_4x4 9 128 12 0.00003113614108946529
armv7neon_mmm_f32_8x4_generic 25 4 13 0.000009058320096239628
armv7neon_mmm_f32_8x4_cortexa9 15 32 7 0.000007013841485260632
armv7neon_mmm_f32_8x6_cortexa7 8 32 13 0.000006373539570632082
armv7neon_mmm_f32_8x6_cortexa9 7 128 7 0.000013849670236184067
armv7neon_mmm_f32_8x6_cortexa7 16 4 5 0.0000021140389285040373
armv7neon_mmm_f32_8x6_cortexa7 15 4 19 0.000006018452488388481
armv7neon_mmm_f32_8x4_cortexa9 25 4 11 0.000007297177417716392
armv7neon_mmm_f32_8x4_cortexa7 7 128 3 0.0000053704412970208095
armv7neon_mmm_f32_8x6_cortexa7 25 32 6 0.000008216173586447742
armv7neon_mmm_f32_8x6_cortexa9 23 32 6 0.000006530580171598388
armv7neon_mmm_f32_8x6_cortexa9 8 4 11 0.0000019099336489036645
armv7neon_mmm_f32_8x4_cortexa7 8 4 3 0.0000011915557137944658
armv7neon_mmm_f32_8x4_cortexa9 7 32 13 0.000007135759350139172
armv7neon_mmm_f32_8x4_cortexa9 15 128 8 0.000020152163526097688
armv7neon_mmm_f32_8x6_generic 16 128 12 0.000025531348127989936
armv7neon_mmm_f32_8x4_cortexa9 9 32 8 0.00000655179781932232
armv7neon_mmm_f32_8x4_generic 25 4 7 0.000005150952172982353
armv7neon_mmm_f32_8x6_generic 23 128 7 0.00003975509670222853
generic_f32_4x4 7 128 7 0.000014417564018686427
armv7neon_mmm_f32_8x6_generic 7 32 6 0.0000025435007870682203
generic_f32_4x4 12 32 13 0.000014516218594466092
armv7neon_mmm_f32_8x6_generic 24 4 13 0.000005989131584497686
armv7neon_mmm_f32_8x6_cortexa7 24 4 13 0.000006130318576111218
armv7neon_mmm_f32_8x4_cortexa7 24 4 8 0.0000033735678137031244
armv7neon_mmm_f32_8x6_cortexa9 15 32 6 0.000004614375157776428
armv7neon_mmm_f32_8x4_cortexa7 17 128 5 0.000029428735157155848
generic_f32_4x4 11 32 3 0.000004361903012775443
armv7neon_mmm_f32_8x6_cortexa9 24 128 11 0.00004051475238084589
armv7neon_mmm_f32_8x6_cortexa7 8 4 11 0.0000019289152314594922
armv7neon_mmm_f32_8x4_generic 7 32 12 0.00000529445196979877
armv7neon_mmm_f32_8x4_cortexa9 25 4 9 0.0000071851687447521495
armv7neon_mmm_f32_8x4_cortexa9 7 128 13 0.0000205036178463644
armv7neon_mmm_f32_8x4_cortexa7 25 128 4 0.00001951695951825782
generic_f32_4x4 11 32 11 0.0000115439545889861
armv7neon_mmm_f32_8x6_cortexa7 23 32 7 0.000012514048765991971
generic_f32_4x4 13 32 8 0.000009849544291047146
armv7neon_mmm_f32_8x4_cortexa9 24 4 13 0.0000068036126797526975
armv7neon_mmm_f32_8x6_cortexa9 23 128 7 0.00004044892329500272
armv7neon_mmm_f32_8x4_cortexa9 24 32 9 0.000014128824207386303
armv7neon_mmm_f32_8x6_cortexa9 15 4 19 0.000006004743734908054
armv7neon_mmm_f32_8x6_generic 16 128 7 0.000025803175203093092
armv7neon_mmm_f32_8x4_generic 24 4 9 0.000005289440520085159
armv7neon_mmm_f32_8x4_generic 7 128 7 0.000009924053412179527
armv7neon_mmm_f32_8x6_cortexa7 15 4 5 0.0000020760458197512403
armv7neon_mmm_f32_8x4_cortexa9 23 32 12 0.000014302742013508701
armv7neon_mmm_f32_8x6_cortexa9 23 128 18 0.00006018418795895432
armv7neon_mmm_f32_8x4_cortexa7 7 128 12 0.000015239535080520215
armv7neon_mmm_f32_8x6_generic 15 4 6 0.0000018544842241934815
armv7neon_mmm_f32_8x6_cortexa7 25 32 18 0.000023497641993495917
generic_f32_4x4 9 4 8 0.0000037065692747303202
armv7neon_mmm_f32_8x6_generic 9 4 17 0.0000044060023407811705
armv7neon_mmm_f32_8x4_generic 17 4 7 0.000004004011518049962
armv7neon_mmm_f32_8x4_generic 16 4 9 0.0000037009767742197034
armv7neon_mmm_f32_8x4_generic 16 4 11 0.0000037573621028760724
armv7neon_mmm_f32_8x6_cortexa9 23 4 18 0.000006335024379033378
generic_f32_4x4 7 32 11 0.00000797109252276388
generic_f32_4x4 8 32 8 0.00000506136423984456
armv7neon_mmm_f32_8x6_cortexa9 16 32 18 0.000011962958224342696
armv7neon_mmm_f32_8x4_generic 9 32 3 0.0000035695396503977124
armv7neon_mmm_f32_8x4_cortexa7 15 128 9 0.000029527917733416504
armv7neon_mmm_f32_8x4_cortexa7 16 32 3 0.0000037787003081727403
armv7neon_mmm_f32_8x4_cortexa9 17 4 5 0.000003983936158752065
generic_f32_4x4 5 4 7 0.0000028427432000736736
armv7neon_mmm_f32_8x6_generic 24 128 6 0.000019285772023622517
armv7neon_mmm_f32_8x6_cortexa9 24 128 6 0.000020079389293074595
armv7neon_mmm_f32_8x4_cortexa7 24 128 11 0.000043650740679119
armv7neon_mmm_f32_8x6_cortexa7 15 4 6 0.0000019214162046321274
armv7neon_mmm_f32_8x6_generic 24 128 11 0.000039928070800596723
armv7neon_mmm_f32_8x6_cortexa7 17 128 13 0.00005915137603637775
armv7neon_mmm_f32_8x6_cortexa7 9 128 12 0.00002647047584440748
generic_f32_4x4 8 32 7 0.000005396919284647475
armv7neon_mmm_f32_8x4_cortexa7 16 32 7 0.000006638663857963081
armv7neon_mmm_f32_8x4_cortexa7 9 128 9 0.000029246794882198228
armv7neon_mmm_f32_8x4_generic 15 32 12 0.000009467451463683956
armv7neon_mmm_f32_8x6_generic 23 128 6 0.00001954577461053118
armv7neon_mmm_f32_8x4_cortexa9 7 32 7 0.000003851842872992441
armv7neon_mmm_f32_8x6_cortexa9 16 32 6 0.000004321654529645647
generic_f32_4x4 13 32 4 0.00000517924119222049
armv7neon_mmm_f32_8x4_generic 8 4 9 0.000002104797317503441
armv7neon_mmm_f32_8x4_cortexa7 24 128 3 0.000015233751299574878
armv7neon_mmm_f32_8x6_cortexa9 16 128 18 0.000039798911987149584
armv7neon_mmm_f32_8x6_cortexa7 23 128 13 0.00005946225660703637
generic_f32_4x4 7 32 5 0.000005462565009156774
armv7neon_mmm_f32_8x6_generic 8 128 19 0.000025872976344928823
generic_f32_4x4 5 4 12 0.0000037781682890084256
armv7neon_mmm_f32_8x4_cortexa9 7 4 13 0.000003202055925058074
armv7neon_mmm_f32_8x6_cortexa9 8 32 13 0.000006422608725593242
armv7neon_mmm_f32_8x6_generic 9 128 7 0.000025813127546183672
armv7neon_mmm_f32_8x4_generic 23 128 3 0.000014644120134860135
armv7neon_mmm_f32_8x4_generic 8 4 3 0.0000011807711987459403
armv7neon_mmm_f32_8x6_cortexa7 16 128 18 0.00003920283874886212
armv7neon_mmm_f32_8x4_cortexa9 15 32 3 0.0000038264927174656245
armv7neon_mmm_f32_8x6_cortexa7 25 32 12 0.000015882492018463845
armv7neon_mmm_f32_8x6_generic 15 128 13 0.00003967007811496026
armv7neon_mmm_f32_8x6_generic 25 4 12 0.00000523795399145237
armv7neon_mmm_f32_8x4_cortexa9 16 4 7 0.0000028765969548151297
armv7neon_mmm_f32_8x4_cortexa7 7 4 5 0.0000018096325702613645
armv7neon_mmm_f32_8x6_generic 25 4 19 0.00001025642311454773
armv7neon_mmm_f32_8x6_generic 9 128 6 0.000013045121912181557
armv7neon_mmm_f32_8x6_cortexa7 15 32 11 0.000008770690105208661
armv7neon_mmm_f32_8x6_generic 23 128 17 0.00005984628206960245
armv7neon_mmm_f32_8x4_cortexa9 8 32 9 0.000005054839401749921
generic_f32_4x4 9 32 13 0.000014812319931001716
armv7neon_mmm_f32_8x4_cortexa9 8 32 7 0.0000036300075704923473
generic_f32_4x4 12 128 12 0.00003080051055479753
armv7neon_mmm_f32_8x4_generic 9 4 4 0.0000015775072922596973
generic_f32_4x4 11 128 11 0.00003163786674887252
generic_f32_4x4 11 128 7 0.000021315035021180117
armv7neon_mmm_f32_8x4_generic 24 128 11 0.00004302931660486898
armv7neon_mmm_f32_8x6_cortexa7 7 128 7 0.000013660184612749007
armv7neon_mmm_f32_8x6_generic 15 4 12 0.0000032030364372184837
armv7neon_mmm_f32_8x6_cortexa7 15 128 17 0.00004014988958130008
armv7neon_mmm_f32_8x4_cortexa7 17 32 7 0.000009763008233938806
armv7neon_mmm_f32_8x4_generic 23 4 11 0.000005850624187247901
armv7neon_mmm_f32_8x6_cortexa9 25 4 7 0.000005691323284198507
armv7neon_mmm_f32_8x6_generic 9 4 5 0.0000018995170310061328
generic_f32_4x4 11 32 5 0.000007873582158405535
armv7neon_mmm_f32_8x6_generic 17 4 7 0.0000043144070272553465
armv7neon_mmm_f32_8x6_cortexa9 7 32 13 0.000006785328240517207
armv7neon_mmm_f32_8x4_generic 25 128 12 0.00005645345647499795
armv7neon_mmm_f32_8x4_generic 9 4 8 0.0000026364183937099707
armv7neon_mmm_f32_8x4_generic 9 4 3 0.000001754783422972911
armv7neon_mmm_f32_8x6_cortexa9 24 4 5 0.000002822627399800846
armv7neon_mmm_f32_8x4_cortexa9 23 4 7 0.000004277460484355386
armv7neon_mmm_f32_8x4_cortexa9 16 4 13 0.000004703052607837251
armv7neon_mmm_f32_8x4_cortexa7 9 32 8 0.0000064472411584695615
generic_f32_4x4 5 128 3 0.000007484402044593335
armv7neon_mmm_f32_8x6_cortexa7 24 4 12 0.000003960884362652896
armv7neon_mmm_f32_8x4_cortexa7 7 32 8 0.0000038146920144727004
armv7neon_mmm_f32_8x4_cortexa9 24 4 12 0.000004810650577018378
armv7neon_mmm_f32_8x4_cortexa9 9 4 13 0.000005031022350762627
armv7neon_mmm_f32_8x6_cortexa9 9 128 12 0.000026841942459090627
generic_f32_4x4 3 4 3 0.0000011084107564139803
armv7neon_mmm_f32_8x4_generic 23 128 13 0.000057838678203389286
armv7neon_mmm_f32_8x6_cortexa7 23 128 5 0.000020389937552256748
armv7neon_mmm_f32_8x4_cortexa7 15 32 13 0.000012977109930123348
armv7neon_mmm_f32_8x4_generic 16 32 11 0.00000919341672636989
armv7neon_mmm_f32_8x6_cortexa9 17 128 17 0.000060215116898669664
armv7neon_mmm_f32_8x4_generic 24 4 8 0.000003324206058446326
armv7neon_mmm_f32_8x4_cortexa7 17 4 5 0.000003957357480955988
armv7neon_mmm_f32_8x6_cortexa9 16 32 19 0.000016111976497239984
generic_f32_4x4 13 4 12 0.000006710960240784156
armv7neon_mmm_f32_8x6_cortexa9 23 128 5 0.000020639189093056077
armv7neon_mmm_f32_8x6_generic 24 32 11 0.000012182073684360654
armv7neon_mmm_f32_8x6_cortexa9 9 128 13 0.00004021781819635084
armv7neon_mmm_f32_8x4_cortexa9 17 4 12 0.000005225946660532542
armv7neon_mmm_f32_8x6_cortexa9 15 4 7 0.0000032840412749160575
armv7neon_mmm_f32_8x4_cortexa9 17 128 13 0.000059220265497108164
armv7neon_mmm_f32_8x6_cortexa9 7 4 11 0.0000020564874758047424
generic_f32_4x4 8 128 8 0.000013969303780467269
armv7neon_mmm_f32_8x4_cortexa7 25 4 12 0.000006595762756379242
armv7neon_mmm_f32_8x6_generic 16 32 19 0.000015466876363304513
armv7neon_mmm_f32_8x4_generic 24 4 11 0.0000053731939607204945
generic_f32_4x4 8 128 13 0.00002774022280530327
generic_f32_4x4 7 128 8 0.00001428610505820359
armv7neon_mmm_f32_8x4_cortexa9 15 128 12 0.00002999051242337208
armv7neon_mmm_f32_8x4_cortexa9 17 32 5 0.00000980239512919155
armv7neon_mmm_f32_8x4_cortexa9 9 128 8 0.00001992992679699177
armv7neon_mmm_f32_8x4_generic 25 32 5 0.00001226452231341447
generic_f32_4x4 11 128 4 0.0000107871041093641
generic_f32_4x4 13 32 5 0.000010185534520279402
armv7neon_mmm_f32_8x6_cortexa7 9 32 13 0.000012306258740029597
generic_f32_4x4 3 4 9 0.0000023591282065930476
armv7neon_mmm_f32_8x4_cortexa7 15 128 12 0.000029408783696922103
armv7neon_mmm_f32_8x6_cortexa9 25 4 5 0.000003460717936029046
armv7neon_mmm_f32_8x6_cortexa9 15 32 7 0.000008659647810573247
armv7neon_mmm_f32_8x6_generic 15 32 12 0.000008324450366911187
armv7neon_mmm_f32_8x4_generic 7 4 4 0.0000011733671526145581
armv7neon_mmm_f32_8x6_generic 8 32 17 0.000006245151171661497
generic_f32_4x4 4 128 12 0.000010589994729591713
generic_f32_4x4 13 32 3 0.000005623254183753802
armv7neon_mmm_f32_8x4_cortexa9 17 32 8 0.000009489997139764281
armv7neon_mmm_f32_8x6_cortexa7 15 128 12 0.0000267366663985427
armv7neon_mmm_f32_8x6_generic 8 32 12 0.000004170620267459084
generic_f32_4x4 12 128 13 0.00004142579938644744
armv7neon_mmm_f32_8x6_cortexa7 7 4 17 0.0000028372537187841064
armv7neon_mmm_f32_8x6_cortexa7 17 4 18 0.000005987713490844775
armv7neon_mmm_f32_8x6_cortexa9 7 4 5 0.0000012358111649104604
armv7neon_mmm_f32_8x4_generic 7 4 7 0.0000018542946008174326
armv7neon_mmm_f32_8x4_generic 7 4 13 0.0000031498896105686864
armv7neon_mmm_f32_8x6_cortexa9 15 4 11 0.0000034645213247919744
armv7neon_mmm_f32_8x6_cortexa7 23 4 5 0.0000028436717084834024
armv7neon_mmm_f32_8x4_generic 17 128 3 0.00001456481059501176
armv7neon_mmm_f32_8x4_cortexa7 17 4 7 0.00000404574535805843
armv7neon_mmm_f32_8x6_cortexa9 23 4 19 0.000008441880755451333
armv7neon_mmm_f32_8x6_generic 16 4 5 0.000002037583190427157
armv7neon_mmm_f32_8x4_cortexa7 17 4 11 0.000005618041346469936
generic_f32_4x4 8 128 3 0.000007542706448510692
armv7neon_mmm_f32_8x6_cortexa7 16 4 6 0.0000016599211327997808
armv7neon_mmm_f32_8x4_generic 24 32 11 0.000013531694734026834
armv7neon_mmm_f32_8x4_cortexa7 24 128 13 0.00005774273822898698
generic_f32_4x4 12 32 8 0.000007328921605870887
armv7neon_mmm_f32_8x4_generic 17 32 5 0.000009344539695515143
armv7neon_mmm_f32_8x6_cortexa7 25 4 5 0.0000035354392629416224
armv7neon_mmm_f32_8x4_generic 9 128 9 0.000028070243170981636
armv7neon_mmm_f32_8x4_generic 7 32 7 0.0000036902878668462197
generic_f32_4x4 4 32 5 0.000002913735845387932
armv7neon_mmm_f32_8x4_cortexa7 23 128 9 0.00004401834026329482
armv7neon_mmm_f32_8x6_cortexa7 15 32 17 0.000012793909984998035
armv7neon_mmm_f32_8x4_cortexa7 24 32 11 0.000013985445748841752
armv7neon_mmm_f32_8x4_generic 17 128 12 0.0000426589070381794
armv7neon_mmm_f32_8x6_generic 9 128 19 0.000053237918402024005
armv7neon_mmm_f32_8x6_cortexa7 17 4 17 0.000006443148021217296
armv7neon_mmm_f32_8x4_cortexa7 8 128 12 0.000014559920878726009
armv7neon_mmm_f32_8x6_cortexa9 8 4 19 0.000002972990505690349
armv7neon_mmm_f32_8x6_cortexa7 25 128 5 0.000027009599886340098
armv7neon_mmm_f32_8x6_cortexa7 7 32 5 0.0000025526809022378453
generic_f32_4x4 13 128 13 0.00005568030788678991
armv7neon_mmm_f32_8x4_cortexa7 15 32 12 0.000009774179275886764
armv7neon_mmm_f32_8x4_generic 16 128 8 0.000018479196791574824
armv7neon_mmm_f32_8x6_generic 25 128 17 0.00007786728690688477
armv7neon_mmm_f32_8x6_generic 7 32 11 0.000004604319680609591
armv7neon_mmm_f32_8x4_cortexa9 15 128 5 0.000020252737794258734
armv7neon_mmm_f32_8x4_cortexa9 25 4 12 0.000006639922481547946
armv7neon_mmm_f32_8x4_cortexa7 23 128 8 0.000029341008489406463
armv7neon_mmm_f32_8x4_cortexa7 8 32 5 0.000003533774210652598
generic_f32_4x4 8 128 7 0.000014303959630243704
armv7neon_mmm_f32_8x4_cortexa9 25 32 5 0.000012867678837939383
armv7neon_mmm_f32_8x6_generic 8 128 13 0.000019410057867203144
armv7neon_mmm_f32_8x4_generic 15 4 5 0.0000029437961456768246
generic_f32_4x4 12 4 9 0.000005303987106806706
armv7neon_mmm_f32_8x6_generic 24 4 19 0.000007657205135344466
armv7neon_mmm_f32_8x4_generic 9 4 11 0.000003929237868686891
generic_f32_4x4 13 128 9 0.00004175491457338789
armv7neon_mmm_f32_8x6_cortexa7 9 4 11 0.0000032396587513198363
armv7neon_mmm_f32_8x6_cortexa7 9 32 5 0.000004609642726421324
armv7neon_mmm_f32_8x6_cortexa7 8 128 13 0.000019980200207777093
generic_f32_4x4 8 4 13 0.000004697441883300323
armv7neon_mmm_f32_8x4_generic 24 128 5 0.000028404231306910303
armv7neon_mmm_f32_8x4_cortexa7 15 128 8 0.00001974335359333113
armv7neon_mmm_f32_8x4_generic 8 4 4 0.000000975840577396329
armv7neon_mmm_f32_8x6_cortexa9 17 4 6 0.000002372991977511254
armv7neon_mmm_f32_8x6_cortexa7 24 4 17 0.0000063580907262403335
armv7neon_mmm_f32_8x6_cortexa9 24 4 17 0.00000634338841510274
armv7neon_mmm_f32_8x6_cortexa7 8 128 19 0.000026457168307267402
armv7neon_mmm_f32_8x4_cortexa7 15 32 4 0.0000035973058136569266
armv7neon_mmm_f32_8x6_cortexa7 23 128 11 0.000040137380065831114
armv7neon_mmm_f32_8x6_generic 8 32 11 0.0000044080486884835774
armv7neon_mmm_f32_8x6_cortexa7 16 32 13 0.000012184909260827915
armv7neon_mmm_f32_8x4_generic 24 128 12 0.00004230513222395041
armv7neon_mmm_f32_8x6_generic 16 32 6 0.000004173665390794717
armv7neon_mmm_f32_8x4_generic 15 128 5 0.000019049336126167508
generic_f32_4x4 5 32 11 0.000007837409156830309
armv7neon_mmm_f32_8x6_cortexa7 15 128 5 0.00001375002565612756
armv7neon_mmm_f32_8x6_generic 17 128 17 0.00005946391401813929
generic_f32_4x4 8 128 9 0.00002095153754448952
generic_f32_4x4 12 4 7 0.000003927725122844973
armv7neon_mmm_f32_8x6_cortexa9 25 32 6 0.000008303393078126922
armv7neon_mmm_f32_8x4_generic 17 128 11 0.000043129532556527814
armv7neon_mmm_f32_8x6_generic 23 128 19 0.00007849962174893098
armv7neon_mmm_f32_8x4_cortexa9 25 32 11 0.00001896209821400977
generic_f32_4x4 13 128 4 0.000014088650845963764
armv7neon_mmm_f32_8x6_generic 17 32 17 0.000017768659574740396
armv7neon_mmm_f32_8x6_generic 25 32 11 0.000016016335806526446
generic_f32_4x4 4 128 9 0.000010729958874422145
armv7neon_mmm_f32_8x6_generic 9 128 12 0.00002573025108782965
armv7neon_mmm_f32_8x6_cortexa7 9 128 19 0.000052645037711891825
armv7neon_mmm_f32_8x4_cortexa7 15 32 9 0.000009881132385003031
armv7neon_mmm_f32_8x6_generic 15 4 11 0.0000033903277250974355
armv7neon_mmm_f32_8x4_cortexa9 23 4 9 0.000005836030607874567
armv7neon_mmm_f32_8x6_cortexa9 15 32 13 0.00001271636871027449
armv7neon_mmm_f32_8x4_cortexa9 7 128 12 0.00001554481747780415
armv7neon_mmm_f32_8x6_cortexa7 9 128 11 0.00002665664619685107
armv7neon_mmm_f32_8x6_generic 23 4 19 0.000008276510705477435
armv7neon_mmm_f32_8x6_cortexa7 9 128 13 0.00003963680038872068
armv7neon_mmm_f32_8x6_generic 9 4 12 0.0000029664494321281043
armv7neon_mmm_f32_8x4_cortexa7 9 4 8 0.0000026618049788927614
armv7neon_mmm_f32_8x6_generic 25 4 5 0.0000034227395786859638
armv7neon_mmm_f32_8x6_cortexa7 7 4 11 0.00000205419298901934
armv7neon_mmm_f32_8x4_generic 15 128 4 0.000009727827829578247
armv7neon_mmm_f32_8x4_generic 23 32 5 0.000009507106099903071
generic_f32_4x4 13 128 3 0.000014531958964466404
generic_f32_4x4 8 4 4 0.0000014932684997984591
armv7neon_mmm_f32_8x6_cortexa7 9 32 12 0.0000083110843565406
armv7neon_mmm_f32_8x4_cortexa9 9 128 4 0.000010214368441429339
armv7neon_mmm_f32_8x6_cortexa7 24 128 11 0.000039934073240007294
armv7neon_mmm_f32_8x4_cortexa9 25 128 9 0.0000592519545970783
armv7neon_mmm_f32_8x4_cortexa7 17 32 13 0.000018457944384762052
armv7neon_mmm_f32_8x6_generic 15 32 17 0.000012429374164019118
armv7neon_mmm_f32_8x6_generic 7 4 5 0.0000012252588749082722
generic_f32_4x4 4 4 11 0.000002142420120818127
armv7neon_mmm_f32_8x4_cortexa7 17 32 8 0.000009331391202514006
armv7neon_mmm_f32_8x6_cortexa9 9 32 12 0.000008401621869943723
armv7neon_mmm_f32_8x6_cortexa7 16 32 17 0.00001235175359963665
armv7neon_mmm_f32_8x4_cortexa9 16 4 11 0.000003828421619878369
armv7neon_mmm_f32_8x4_cortexa9 9 4 8 0.000002691222544669364
armv7neon_mmm_f32_8x4_cortexa9 15 32 4 0.0000036579305511926493
armv7neon_mmm_f32_8x6_cortexa7 8 32 11 0.0000045660588709311665
generic_f32_4x4 5 4 11 0.000003955847289217088
generic_f32_4x4 7 128 5 0.00001437315927626327
generic_f32_4x4 7 4 7 0.0000028946154117180987
generic_f32_4x4 8 32 12 0.000007313658366544497
armv7neon_mmm_f32_8x4_cortexa7 16 4 12 0.000003356919407698039
armv7neon_mmm_f32_8x4_cortexa9 17 32 12 0.000013931859029317578
armv7neon_mmm_f32_8x6_generic 7 128 13 0.000019807746403591995
armv7neon_mmm_f32_8x6_generic 16 32 12 0.000007859148665693045
armv7neon_mmm_f32_8x6_cortexa9 7 32 17 0.000006883792927542178
armv7neon_mmm_f32_8x4_generic 24 32 4 0.000004632821669832301
armv7neon_mmm_f32_8x6_cortexa7 15 32 18 0.000012578243798367562
armv7neon_mmm_f32_8x6_cortexa9 17 128 7 0.000040256201289956
armv7neon_mmm_f32_8x6_cortexa9 25 128 13 0.00007968673747995729
armv7neon_mmm_f32_8x6_cortexa7 16 32 12 0.000008069060391993281
armv7neon_mmm_f32_8x6_cortexa9 8 128 13 0.000020267415746870704
armv7neon_mmm_f32_8x4_cortexa7 23 4 12 0.0000054788039005168164
armv7neon_mmm_f32_8x4_cortexa7 7 128 9 0.000015183203798286496
armv7neon_mmm_f32_8x4_cortexa9 25 128 11 0.00005936651957856526
armv7neon_mmm_f32_8x6_cortexa9 7 128 13 0.00002061155210279855
armv7neon_mmm_f32_8x6_cortexa9 16 128 12 0.000026606165960514827
armv7neon_mmm_f32_8x4_cortexa7 24 4 7 0.000004016126966910536
armv7neon_mmm_f32_8x4_cortexa9 16 32 4 0.000003429114676947513
armv7neon_mmm_f32_8x4_cortexa7 9 128 7 0.000019732799833911844
armv7neon_mmm_f32_8x6_generic 17 128 5 0.00001976574761517156
armv7neon_mmm_f32_8x6_cortexa7 16 128 12 0.00002622611042000424
armv7neon_mmm_f32_8x4_generic 23 128 4 0.000014300391832012492
generic_f32_4x4 4 32 9 0.0000040473563381120025
armv7neon_mmm_f32_8x6_cortexa9 16 4 12 0.00000281228428913529
armv7neon_mmm_f32_8x6_generic 15 128 7 0.000025996819067612833
armv7neon_mmm_f32_8x4_generic 25 128 13 0.00007519120583347806
armv7neon_mmm_f32_8x6_cortexa7 25 128 18 0.00007816972627698026
armv7neon_mmm_f32_8x6_generic 23 4 7 0.000004478475265472157
armv7neon_mmm_f32_8x6_cortexa7 23 32 17 0.000018694963726284716
armv7neon_mmm_f32_8x4_cortexa9 24 128 3 0.000015528002576413032
armv7neon_mmm_f32_8x4_generic 15 4 13 0.000005299524671067579
armv7neon_mmm_f32_8x6_cortexa9 8 32 7 0.000004505823387651013
armv7neon_mmm_f32_8x4_cortexa9 9 4 7 0.0000028943361192655857
armv7neon_mmm_f32_8x4_generic 9 32 4 0.0000033743539224745525
armv7neon_mmm_f32_8x4_cortexa9 23 128 5 0.000030175843280997272
armv7neon_mmm_f32_8x6_generic 8 4 6 0.000001069506404381519
armv7neon_mmm_f32_8x6_cortexa9 9 32 17 0.000012530842578999487
armv7neon_mmm_f32_8x6_generic 25 4 7 0.0000055691779651847645
armv7neon_mmm_f32_8x6_cortexa7 24 32 18 0.000017489152288872757
armv7neon_mmm_f32_8x4_generic 8 32 3 0.0000020846296384167996
armv7neon_mmm_f32_8x4_generic 24 128 9 0.000042880640962818326
armv7neon_mmm_f32_8x6_cortexa7 16 32 6 0.000004280562843189148
generic_f32_4x4 11 4 7 0.000004035828927778453
armv7neon_mmm_f32_8x6_cortexa7 17 128 6 0.000019945123788870955
armv7neon_mmm_f32_8x6_cortexa7 23 4 13 0.000006544590047318178
generic_f32_4x4 3 128 13 0.00001453817180213847
armv7neon_mmm_f32_8x6_cortexa9 7 4 7 0.000001955919692684627
armv7neon_mmm_f32_8x4_generic 8 4 8 0.0000014466616080596564
armv7neon_mmm_f32_8x4_cortexa9 7 4 12 0.00000257120273391917
armv7neon_mmm_f32_8x6_cortexa7 23 32 12 0.000012365351347550488
armv7neon_mmm_f32_8x4_cortexa7 24 128 8 0.000028841373491782905
generic_f32_4x4 12 4 4 0.0000019842828386500398
armv7neon_mmm_f32_8x4_cortexa7 25 32 11 0.000018655746399310692
armv7neon_mmm_f32_8x4_cortexa7 15 4 5 0.0000029733858994116004
armv7neon_mmm_f32_8x4_cortexa9 8 128 3 0.000005515258544137823
armv7neon_mmm_f32_8x6_cortexa7 24 4 7 0.000004423529851941163
generic_f32_4x4 8 32 4 0.0000027872687254075905
armv7neon_mmm_f32_8x6_generic 17 32 18 0.000017325844518327246
armv7neon_mmm_f32_8x4_cortexa7 25 4 13 0.000009174920440845347
armv7neon_mmm_f32_8x4_generic 25 128 8 0.00003833303029711307
armv7neon_mmm_f32_8x4_generic 16 4 4 0.0000014454956826662534
armv7neon_mmm_f32_8x4_cortexa7 17 4 4 0.0000020761952577578897
armv7neon_mmm_f32_8x6_cortexa7 8 32 5 0.0000026618366158480003
generic_f32_4x4 3 4 4 0.0000011319274511202646
armv7neon_mmm_f32_8x6_cortexa7 8 32 17 0.000006455239935042666
armv7neon_mmm_f32_8x4_cortexa7 15 4 3 0.0000018508727527174426
armv7neon_mmm_f32_8x4_cortexa9 7 128 5 0.000010464038311697658
armv7neon_mmm_f32_8x4_cortexa7 9 128 11 0.000029303633762621288
armv7neon_mmm_f32_8x6_generic 15 128 6 0.000013191467223518409
armv7neon_mmm_f32_8x6_generic 8 32 19 0.00000799220605890635
armv7neon_mmm_f32_8x4_cortexa7 25 32 3 0.000006944685928632884
generic_f32_4x4 5 128 7 0.000014344402542266391
armv7neon_mmm_f32_8x6_generic 25 32 18 0.00002286640455940777
armv7neon_mmm_f32_8x6_generic 23 4 17 0.000006658878255694999
armv7neon_mmm_f32_8x4_cortexa9 15 32 11 0.000010153701137788554
armv7neon_mmm_f32_8x4_cortexa7 8 32 13 0.000006386976234107792
generic_f32_4x4 9 128 11 0.0000315155364444221
armv7neon_mmm_f32_8x4_generic 16 128 5 0.00001883838484826069
armv7neon_mmm_f32_8x6_generic 8 4 18 0.0000021936894870634253
armv7neon_mmm_f32_8x6_cortexa7 24 32 17 0.00001822762304619792
armv7neon_mmm_f32_8x6_generic 24 32 6 0.000006012607283304525
armv7neon_mmm_f32_8x4_cortexa9 7 4 8 0.0000018992159881966028
generic_f32_4x4 5 32 8 0.000005268936385502091
armv7neon_mmm_f32_8x6_generic 23 32 13 0.000017886686307111186
armv7neon_mmm_f32_8x6_generic 23 128 11 0.00004012008266709576
armv7neon_mmm_f32_8x4_generic 7 32 3 0.000002052664755929161
armv7neon_mmm_f32_8x4_generic 7 32 13 0.0000068232658761941584
armv7neon_mmm_f32_8x4_cortexa9 15 4 3 0.0000018728759441468222
armv7neon_mmm_f32_8x4_cortexa9 17 32 7 0.000009921472462970692
armv7neon_mmm_f32_8x4_cortexa7 23 4 5 0.000004108135771777712
armv7neon_mmm_f32_8x6_generic 17 4 18 0.0000058654454576938545
armv7neon_mmm_f32_8x6_generic 8 32 6 0.0000023314322550231567
armv7neon_mmm_f32_8x6_cortexa9 24 32 12 0.000011982465286422162
armv7neon_mmm_f32_8x6_cortexa9 9 4 12 0.000003049708218088651
armv7neon_mmm_f32_8x6_cortexa7 9 4 19 0.000005629342662833259
generic_f32_4x4 3 32 12 0.00000437213523735793
armv7neon_mmm_f32_8x6_generic 23 128 18 0.00005941769593137103
armv7neon_mmm_f32_8x6_cortexa7 15 4 18 0.000004622352036845354
armv7neon_mmm_f32_8x6_cortexa9 24 128 19 0.00007950053817997623
armv7neon_mmm_f32_8x4_generic 16 128 4 0.000009494054447373921
armv7neon_mmm_f32_8x6_cortexa7 7 128 17 0.000020414604977074606
armv7neon_mmm_f32_8x4_cortexa9 7 32 12 0.000005535747117413851
armv7neon_mmm_f32_8x6_cortexa9 17 4 5 0.000002695145363194299
generic_f32_4x4 4 128 4 0.000003870015760471616
armv7neon_mmm_f32_8x4_generic 15 32 11 0.000009681542653487903
armv7neon_mmm_f32_8x6_cortexa7 7 4 13 0.000002737908308165428
armv7neon_mmm_f32_8x4_cortexa7 9 32 7 0.000006661491356810654
armv7neon_mmm_f32_8x4_generic 16 4 8 0.0000023864199459892154
armv7neon_mmm_f32_8x6_generic 24 32 12 0.000011535291893158489
armv7neon_mmm_f32_8x6_cortexa9 15 128 11 0.00002729544131868298
armv7neon_mmm_f32_8x4_cortexa7 15 128 13 0.00003933216865420138
generic_f32_4x4 11 4 4 0.0000021466514853413024
armv7neon_mmm_f32_8x4_cortexa7 15 32 7 0.000006892235042247499
armv7neon_mmm_f32_8x4_generic 24 128 7 0.000028572043647170913
armv7neon_mmm_f32_8x4_cortexa9 9 128 12 0.000029617239836207004
armv7neon_mmm_f32_8x6_generic 23 32 6 0.000006267673062941748
armv7neon_mmm_f32_8x6_cortexa9 23 128 17 0.00006059797167110977
armv7neon_mmm_f32_8x6_cortexa9 9 128 18 0.00004014798753858387
armv7neon_mmm_f32_8x6_generic 25 32 19 0.00003066922065226773
armv7neon_mmm_f32_8x6_generic 9 32 7 0.00000813819010478461
armv7neon_mmm_f32_8x6_generic 8 128 18 0.000019253370491633585
armv7neon_mmm_f32_8x6_generic 8 4 5 0.0000012747243711687536
armv7neon_mmm_f32_8x4_cortexa7 25 4 4 0.000002551159854962235
armv7neon_mmm_f32_8x4_cortexa9 8 128 13 0.000019869699675831398
armv7neon_mmm_f32_8x6_cortexa9 9 128 19 0.00005342392770358122
armv7neon_mmm_f32_8x6_cortexa9 7 4 12 0.000002051630860797947
armv7neon_mmm_f32_8x6_cortexa9 16 128 13 0.00004015173417099066
armv7neon_mmm_f32_8x6_cortexa9 23 128 12 0.0000403164031404094
armv7neon_mmm_f32_8x4_generic 23 128 5 0.000028627967286799668
armv7neon_mmm_f32_8x4_cortexa9 25 4 8 0.000004634829587692317
armv7neon_mmm_f32_8x4_cortexa7 15 128 4 0.000010126569600218532
armv7neon_mmm_f32_8x4_cortexa9 23 32 4 0.000005123975664670012
armv7neon_mmm_f32_8x6_generic 8 4 17 0.0000024242275965441413
armv7neon_mmm_f32_8x6_cortexa7 25 128 7 0.000052727920949761586
armv7neon_mmm_f32_8x4_cortexa7 16 32 9 0.000009437089513621594
armv7neon_mmm_f32_8x4_generic 23 4 9 0.000005734226973840947
armv7neon_mmm_f32_8x4_cortexa7 17 128 12 0.0000433599443382883
armv7neon_mmm_f32_8x4_cortexa7 24 32 3 0.000005409349524302138
armv7neon_mmm_f32_8x6_cortexa7 8 4 7 0.0000018575873013342354
armv7neon_mmm_f32_8x6_generic 7 128 19 0.000026452799909726964
armv7neon_mmm_f32_8x4_cortexa9 24 32 12 0.000013540981249723916
armv7neon_mmm_f32_8x4_cortexa9 23 128 9 0.000044894692860566556
armv7neon_mmm_f32_8x6_cortexa9 9 4 17 0.000004505536489134122
armv7neon_mmm_f32_8x6_generic 15 32 11 0.000008507986633036132
armv7neon_mmm_f32_8x6_cortexa7 7 32 19 0.000008805049700237493
armv7neon_mmm_f32_8x4_generic 17 4 4 0.0000020583169447652696
armv7neon_mmm_f32_8x4_generic 7 32 9 0.000005237513207003328
armv7neon_mmm_f32_8x6_cortexa9 8 4 5 0.0000012863007612366299
armv7neon_mmm_f32_8x6_cortexa7 16 128 17 0.00003972939922069338
armv7neon_mmm_f32_8x6_cortexa9 16 32 17 0.000012476036240479964
generic_f32_4x4 5 128 4 0.000007343673936530293
armv7neon_mmm_f32_8x6_cortexa7 17 4 13 0.000006264666040125929
armv7neon_mmm_f32_8x6_cortexa9 25 128 18 0.00007930397959821057
armv7neon_mmm_f32_8x6_cortexa9 16 4 5 0.0000020584584410478216
armv7neon_mmm_f32_8x4_cortexa9 7 32 5 0.00000378972657151533
armv7neon_mmm_f32_8x6_cortexa9 16 4 18 0.000003945842563203367
armv7neon_mmm_f32_8x4_generic 23 32 7 0.000009665874611953724
armv7neon_mmm_f32_8x6_cortexa7 25 128 6 0.000026459671946913376
generic_f32_4x4 7 4 3 0.0000017591858474192314
armv7neon_mmm_f32_8x6_cortexa7 16 4 18 0.000003943668999181098
generic_f32_4x4 4 128 7 0.0000074032510257738404
armv7neon_mmm_f32_8x4_generic 16 128 13 0.0000374668929079076
armv7neon_mmm_f32_8x4_cortexa9 9 4 3 0.0000017875665004976476
armv7neon_mmm_f32_8x4_cortexa9 15 128 9 0.00003010940924012135
armv7neon_mmm_f32_8x6_cortexa9 17 32 13 0.000018295003723366057
armv7neon_mmm_f32_8x6_cortexa9 9 4 13 0.000004395459186352696
armv7neon_mmm_f32_8x6_cortexa7 8 4 12 0.0000016616372440035075
generic_f32_4x4 5 128 12 0.000021001699183635672
armv7neon_mmm_f32_8x6_generic 25 4 6 0.000002871872138224116
armv7neon_mmm_f32_8x6_generic 7 128 5 0.000006875620566369796
generic_f32_4x4 5 4 4 0.000001600585638499612
armv7neon_mmm_f32_8x4_cortexa9 23 4 4 0.0000022070594896923405
armv7neon_mmm_f32_8x6_cortexa9 8 32 17 0.0000065161951430999815
armv7neon_mmm_f32_8x6_cortexa7 17 4 19 0.000008057471663557758
armv7neon_mmm_f32_8x6_cortexa9 8 128 7 0.000013701107804035602
armv7neon_mmm_f32_8x4_cortexa9 23 128 11 0.00004502007320686303
armv7neon_mmm_f32_8x4_cortexa7 8 32 11 0.000004991462488279611
armv7neon_mmm_f32_8x6_cortexa9 24 32 18 0.00001769207861513086
armv7neon_mmm_f32_8x4_generic 25 128 4 0.000019508351532460557
armv7neon_mmm_f32_8x6_cortexa7 23 4 12 0.0000044400455773711846
armv7neon_mmm_f32_8x6_cortexa9 24 32 5 0.000006843511403564935
armv7neon_mmm_f32_8x6_cortexa9 25 128 11 0.000053747765759359946
armv7neon_mmm_f32_8x4_cortexa7 25 4 11 0.000007253341541711691
armv7neon_mmm_f32_8x6_cortexa9 23 32 19 0.000024549141146277552
armv7neon_mmm_f32_8x6_cortexa7 16 128 11 0.000026732044849478285
armv7neon_mmm_f32_8x6_cortexa9 23 128 19 0.0000801357471530611
generic_f32_4x4 3 32 4 0.0000017966908912230104
armv7neon_mmm_f32_8x6_cortexa7 24 128 17 0.00005929496892410179
armv7neon_mmm_f32_8x6_cortexa9 23 32 12 0.000012490651961414628
armv7neon_mmm_f32_8x4_cortexa7 16 128 9 0.000029084669529414875
armv7neon_mmm_f32_8x6_cortexa7 7 128 19 0.000026966388966864694
armv7neon_mmm_f32_8x4_cortexa7 23 32 8 0.000009566852853696024
armv7neon_mmm_f32_8x4_generic 17 32 13 0.00001785315018140605
armv7neon_mmm_f32_8x4_cortexa7 23 4 4 0.00000218193596958567
armv7neon_mmm_f32_8x4_generic 9 32 7 0.000006462010776076977
armv7neon_mmm_f32_8x4_generic 23 32 8 0.000009259693411977238
armv7neon_mmm_f32_8x6_generic 15 128 12 0.000025997314252108204
generic_f32_4x4 12 32 5 0.000007719446523382617
armv7neon_mmm_f32_8x6_cortexa9 15 128 17 0.000040755037497853366
armv7neon_mmm_f32_8x4_generic 24 32 5 0.00000927870929356689
armv7neon_mmm_f32_8x4_cortexa9 23 32 11 0.00001470624762457902
armv7neon_mmm_f32_8x4_cortexa7 7 128 7 0.000010321518649139075
armv7neon_mmm_f32_8x4_cortexa9 17 32 3 0.000005401396003956081
armv7neon_mmm_f32_8x4_cortexa7 25 128 5 0.00003910615015651058
armv7neon_mmm_f32_8x6_cortexa9 17 128 18 0.00005978117998553773
armv7neon_mmm_f32_8x6_generic 7 4 11 0.000002036996116605385
armv7neon_mmm_f32_8x6_cortexa9 15 4 6 0.0000019083928751646085
armv7neon_mmm_f32_8x6_cortexa9 9 4 19 0.000005613783088587898
armv7neon_mmm_f32_8x4_cortexa7 24 32 4 0.0000047895509427620865
armv7neon_mmm_f32_8x4_cortexa9 8 32 12 0.000004854203771648976
armv7neon_mmm_f32_8x4_generic 25 4 11 0.000007170056956827459
armv7neon_mmm_f32_8x6_cortexa7 24 32 6 0.000006172583934492533
armv7neon_mmm_f32_8x6_generic 23 32 18 0.000017730914760808268
generic_f32_4x4 9 128 4 0.00001072609031817021
armv7neon_mmm_f32_8x6_generic 16 32 7 0.000008131169126293927
armv7neon_mmm_f32_8x6_cortexa9 8 32 11 0.000004596930720411251
generic_f32_4x4 3 128 3 0.000003987497974732173
armv7neon_mmm_f32_8x6_generic 16 32 18 0.000011518507884431023
generic_f32_4x4 12 4 13 0.0000067677797185944386
armv7neon_mmm_f32_8x6_cortexa7 17 4 12 0.00000420578831765641
armv7neon_mmm_f32_8x6_cortexa7 25 128 11 0.00005298746347142357
armv7neon_mmm_f32_8x6_cortexa9 17 4 17 0.0000064380265240168815
armv7neon_mmm_f32_8x4_cortexa9 16 32 3 0.000003850508528044626
armv7neon_mmm_f32_8x4_cortexa7 15 4 9 0.0000041824910345443945
armv7neon_mmm_f32_8x6_cortexa7 16 4 13 0.000004283476073931167
armv7neon_mmm_f32_8x6_cortexa9 15 32 19 0.00001676006214460718
armv7neon_mmm_f32_8x6_cortexa9 9 4 6 0.0000017882273242368877
generic_f32_4x4 11 4 12 0.000005385894826689573
armv7neon_mmm_f32_8x6_generic 25 128 11 0.00005279747695062305
armv7neon_mmm_f32_8x4_generic 25 128 11 0.000057163651606893397
armv7neon_mmm_f32_8x6_generic 16 32 13 0.000011809288098621582
armv7neon_mmm_f32_8x6_cortexa7 16 128 7 0.00002656376898311115
armv7neon_mmm_f32_8x6_generic 8 4 19 0.0000028980842866481956
armv7neon_mmm_f32_8x6_cortexa9 25 4 17 0.000008360160748075129
armv7neon_mmm_f32_8x6_cortexa7 25 32 17 0.00002416762541107772
armv7neon_mmm_f32_8x4_cortexa7 9 4 11 0.000003971891623914232
armv7neon_mmm_f32_8x6_cortexa7 8 128 18 0.000019766077162036943
armv7neon_mmm_f32_8x4_generic 15 32 3 0.0000036533992903578352
armv7neon_mmm_f32_8x6_cortexa7 25 32 5 0.0000087944366611451
armv7neon_mmm_f32_8x6_cortexa7 7 128 5 0.000007076691588612229
armv7neon_mmm_f32_8x6_generic 23 4 6 0.000002427909563551317
armv7neon_mmm_f32_8x4_cortexa9 17 4 9 0.000005573474441160942
armv7neon_mmm_f32_8x4_cortexa9 25 4 5 0.000005112325491179266
generic_f32_4x4 8 32 5 0.000005323391922801219
armv7neon_mmm_f32_8x6_cortexa7 24 128 18 0.00005856236723783224
armv7neon_mmm_f32_8x6_generic 23 4 18 0.000006215918837505929
armv7neon_mmm_f32_8x4_generic 16 32 4 0.000003260594187485516
armv7neon_mmm_f32_8x4_generic 8 4 11 0.000002124764231070715
armv7neon_mmm_f32_8x6_cortexa9 15 128 7 0.000027104995655962794
armv7neon_mmm_f32_8x4_cortexa7 23 4 3 0.000002539994674766333
armv7neon_mmm_f32_8x4_cortexa9 15 128 13 0.000040120613837028105
armv7neon_mmm_f32_8x4_generic 9 32 5 0.000006416826225047117
armv7neon_mmm_f32_8x6_generic 9 128 13 0.000039231938860404315
armv7neon_mmm_f32_8x4_cortexa7 17 32 9 0.000014080751383917914
armv7neon_mmm_f32_8x4_generic 24 4 5 0.0000038475341959329615
armv7neon_mmm_f32_8x6_cortexa7 17 32 19 0.00002385320479583871
armv7neon_mmm_f32_8x4_cortexa9 25 4 7 0.000005243554539274239
armv7neon_mmm_f32_8x4_cortexa9 8 128 7 0.000010306643098752432
armv7neon_mmm_f32_8x4_cortexa7 9 32 9 0.000009612025649062918
armv7neon_mmm_f32_8x6_generic 7 128 6 0.000006918233017670394
armv7neon_mmm_f32_8x6_cortexa7 23 32 6 0.000006480642874727307
armv7neon_mmm_f32_8x4_cortexa7 17 4 8 0.0000036467312614380765
armv7neon_mmm_f32_8x6_cortexa9 9 4 18 0.000004263423188010347
armv7neon_mmm_f32_8x4_cortexa9 23 4 3 0.000002565453674896801
armv7neon_mmm_f32_8x6_cortexa7 17 128 17 0.00005934218053895787
armv7neon_mmm_f32_8x6_cortexa9 9 128 11 0.00002703084778716899
armv7neon_mmm_f32_8x6_cortexa7 8 32 19 0.000008254990878887053
armv7neon_mmm_f32_8x4_cortexa7 24 4 11 0.000005437311620791031
armv7neon_mmm_f32_8x6_cortexa7 17 32 12 0.000012103957972190128
armv7neon_mmm_f32_8x6_cortexa7 16 32 11 0.000008575915963542344
armv7neon_mmm_f32_8x6_cortexa7 15 128 13 0.00003995376024317136
armv7neon_mmm_f32_8x4_cortexa7 25 128 12 0.00005747997493377876
armv7neon_mmm_f32_8x6_cortexa7 9 4 17 0.000004513992669521213
armv7neon_mmm_f32_8x4_cortexa9 7 32 11 0.000005542086596158504
armv7neon_mmm_f32_8x4_generic 16 32 8 0.00000600791906169857
armv7neon_mmm_f32_8x6_cortexa9 8 4 6 0.000001079992540189233
armv7neon_mmm_f32_8x6_cortexa7 15 4 7 0.0000033103105460849674
armv7neon_mmm_f32_8x6_generic 7 4 18 0.0000027798204465348195
armv7neon_mmm_f32_8x4_cortexa9 9 4 4 0.0000016086729123039774
armv7neon_mmm_f32_8x4_cortexa9 17 4 13 0.000007139907020236834
armv7neon_mmm_f32_8x6_generic 7 4 17 0.000002811022144491351
armv7neon_mmm_f32_8x4_generic 16 4 12 0.00000330778427406012
armv7neon_mmm_f32_8x6_cortexa7 17 32 7 0.000012331946306204285
armv7neon_mmm_f32_8x4_cortexa7 23 128 4 0.000014866985895709814
armv7neon_mmm_f32_8x6_generic 8 128 5 0.000006930294425648014
armv7neon_mmm_f32_8x6_cortexa7 7 128 11 0.000013755100434106755
armv7neon_mmm_f32_8x4_cortexa7 9 4 3 0.0000017694089316929863
armv7neon_mmm_f32_8x4_cortexa9 25 128 5 0.00003989811331140034
armv7neon_mmm_f32_8x6_cortexa9 24 4 18 0.000005659337689416026
armv7neon_mmm_f32_8x6_cortexa7 7 32 7 0.000004615694133689407
armv7neon_mmm_f32_8x4_generic 23 4 12 0.000005414200630328292
armv7neon_mmm_f32_8x6_cortexa7 15 128 7 0.00002674534329947021
armv7neon_mmm_f32_8x4_cortexa9 23 128 13 0.0000596298628444228
armv7neon_mmm_f32_8x6_cortexa9 7 32 19 0.000008889380829716555
armv7neon_mmm_f32_8x6_cortexa7 8 4 17 0.0000025025489216272165
armv7neon_mmm_f32_8x6_cortexa9 24 128 5 0.000020677151329306246
armv7neon_mmm_f32_8x6_cortexa9 17 32 17 0.000018491383989214777
armv7neon_mmm_f32_8x4_cortexa7 8 4 11 0.000002146399138479012
armv7neon_mmm_f32_8x6_cortexa7 7 4 19 0.0000034947036576981197
armv7neon_mmm_f32_8x6_cortexa7 15 32 13 0.00001261074648998989
armv7neon_mmm_f32_8x6_cortexa7 17 128 12 0.000039467248529014934
armv7neon_mmm_f32_8x6_cortexa9 15 128 13 0.0000405486222699268
armv7neon_mmm_f32_8x6_cortexa7 8 4 6 0.0000010838257676938484
armv7neon_mmm_f32_8x4_cortexa7 8 128 11 0.00001478367268084039
generic_f32_4x4 13 4 5 0.000005005970941882336
armv7neon_mmm_f32_8x4_generic 9 4 5 0.0000027981911123646093
armv7neon_mmm_f32_8x4_cortexa9 24 32 3 0.000005499754513818659
armv7neon_mmm_f32_8x4_cortexa9 16 32 8 0.00000632372871965183
armv7neon_mmm_f32_8x6_generic 25 32 7 0.000015750403152655726
armv7neon_mmm_f32_8x6_cortexa7 9 128 6 0.000013469652928515112
generic_f32_4x4 5 128 8 0.000014178354640152658
armv7neon_mmm_f32_8x4_cortexa9 25 32 13 0.00002471922478461092
armv7neon_mmm_f32_8x4_generic 15 32 7 0.000006693321105057111
armv7neon_mmm_f32_8x4_cortexa9 23 128 8 0.000029940232861406133
armv7neon_mmm_f32_8x4_cortexa7 7 128 5 0.000010257823038892243
generic_f32_4x4 4 32 12 0.000003911119050209719
armv7neon_mmm_f32_8x6_cortexa7 8 128 7 0.000013528068455942153
armv7neon_mmm_f32_8x4_cortexa7 24 4 5 0.000003893712023496801
armv7neon_mmm_f32_8x4_cortexa7 9 4 5 0.00000282867688987085
armv7neon_mmm_f32_8x6_generic 9 4 6 0.0000017337687872243595
armv7neon_mmm_f32_8x4_generic 8 32 11 0.000004839072297522764
armv7neon_mmm_f32_8x4_cortexa7 16 32 8 0.000006215610136029107
armv7neon_mmm_f32_8x4_cortexa7 24 128 12 0.00004296408794693615
armv7neon_mmm_f32_8x6_generic 23 32 7 0.000012139369977001298
armv7neon_mmm_f32_8x6_cortexa9 23 32 7 0.00001262657347999726
armv7neon_mmm_f32_8x6_generic 9 32 18 0.000011811540149507637
generic_f32_4x4 4 4 12 0.0000019950687694365225
armv7neon_mmm_f32_8x4_cortexa9 17 32 13 0.000018760347719013833
armv7neon_mmm_f32_8x4_generic 24 128 8 0.000027893007985374452
armv7neon_mmm_f32_8x6_generic 7 32 12 0.000004596558109391685
generic_f32_4x4 12 32 12 0.0000107075710637508
armv7neon_mmm_f32_8x6_generic 16 4 18 0.000003888078280974674
generic_f32_4x4 12 128 7 0.000021189568041458556
armv7neon_mmm_f32_8x6_generic 16 128 11 0.000025972024072475202
armv7neon_mmm_f32_8x6_cortexa7 15 4 13 0.000004683720683139814
armv7neon_mmm_f32_8x6_cortexa9 7 128 6 0.000007213870080063144
armv7neon_mmm_f32_8x6_cortexa9 23 128 11 0.00004071170407195971
armv7neon_mmm_f32_8x6_generic 24 4 5 0.000002793903924978298
armv7neon_mmm_f32_8x4_cortexa9 15 32 9 0.000010036115634563751
generic_f32_4x4 9 128 9 0.000031422044792444864
armv7neon_mmm_f32_8x4_generic 25 32 4 0.000006133898665196312
generic_f32_4x4 13 128 8 0.00002772292203595808


================================================
FILE: linalg/src/arm32/cortex_a9.rs
================================================
use crate::frame::mmm::CostModel;
pub fn model() -> CostModel<'static> {
    CostModel {
        big_product_mkn_threshold: 4194036.0,
        big_product_kernel_choice: "armv7neon_mmm_f32_8x6_cortexa9",
        kernels: &[
            "armv7neon_mmm_f32_8x4_cortexa7",
            "armv7neon_mmm_f32_8x4_cortexa9",
            "armv7neon_mmm_f32_8x4_generic",
            "armv7neon_mmm_f32_8x6_cortexa7",
            "armv7neon_mmm_f32_8x6_cortexa9",
            "armv7neon_mmm_f32_8x6_generic",
            "generic_f32_4x4",
        ],
        mrs: &[4, 8],
        nrs: &[4, 6],
        feat_norm_mean: &[
            4.582296677813486,
            4.595402322442016,
            4.571260231028445,
            13.748959231283994,
            1.5179177668804225,
            0.7575757575757576,
            3.5337608449641644,
            0.8831887338111405,
            1.5048409405255878,
            0.7526719476926946,
            2.489123601156796,
            0.8326417704011065,
        ],
        feat_norm_stddev: &[
            1.2635817489024164,
            1.2723436827339079,
            1.2620157548883217,
            1.3497763942449361,
            1.1141159992246472,
            0.42854956435545316,
            2.2880460409304937,
            0.32119525880720723,
            1.1154901716833412,
            0.43145902105435263,
            1.7051378780434328,
            0.37329539587896904,
        ],
        w1: &[
            0.5391961336135864,
            -0.32089367508888245,
            0.203999862074852,
            -0.10011337697505951,
            0.09040801972150803,
            -0.14198464155197144,
            0.031854499131441116,
            0.12334256619215012,
            0.15339604020118713,
            -0.20091375708580017,
            -0.014548280276358128,
            0.12154694646596909,
            0.31225234270095825,
            0.10782113671302795,
            0.44618168473243713,
            0.8267014026641846,
            -0.1204405128955841,
            -0.08261110633611679,
            -0.052502430975437164,
            0.3066086769104004,
            0.1493932157754898,
            -0.14119412004947662,
            -0.1985343098640442,
            0.19361039996147156,
            -0.4636686146259308,
            0.08120443671941757,
            0.03210291638970375,
            0.17303235828876495,
            0.16502155363559723,
            -0.19771894812583923,
            -0.11060577630996704,
            0.08698348701000214,
            -0.07793445140123367,
            0.32749465107917786,
            0.3663202226161957,
            -0.4629170894622803,
            -0.1586134433746338,
            0.4272242486476898,
            -0.12016090005636215,
            -0.17830348014831543,
            -0.05493386462330818,
            -0.036517318338155746,
            0.01293050218373537,
            0.016577009111642838,
            0.10738552361726761,
            -0.3662779629230499,
            -0.2917434275150299,
            0.5752639770507812,
            0.11406347155570984,
            0.8622727394104004,
            0.07158719748258591,
            0.29530274868011475,
            -0.11287810653448105,
            0.12262264639139175,
            0.02478562481701374,
            0.17749948799610138,
            -0.036227867007255554,
            0.10140471905469894,
            -0.011896232143044472,
            -0.021761735901236534,
            0.06046223267912865,
            0.5727048516273499,
            -0.007826486602425575,
            0.3863913118839264,
            -0.04224887117743492,
            0.056023009121418,
            -0.02467598207294941,
            0.0385640449821949,
            0.0219524335116148,
            -0.03437826409935951,
            -0.2060588151216507,
            0.2895224988460541,
            0.10751669108867645,
            0.00845037866383791,
            -0.1836385875940323,
            -0.24757762253284454,
            -0.09606243669986725,
            0.03918633610010147,
            0.07913251221179962,
            0.06499160826206207,
            -0.08156774938106537,
            0.08835449814796448,
            0.13896305859088898,
            -0.16936920583248138,
            0.010146846994757652,
            -0.42553824186325073,
            0.39916151762008667,
            -0.004584060981869698,
            -0.10256388038396835,
            0.041573416441679,
            0.05155385658144951,
            0.015019520185887814,
            0.09554271399974823,
            -0.20487457513809204,
            -0.4146610200405121,
            -0.773110032081604,
            0.3662724494934082,
            -0.23762361705303192,
            0.6974321603775024,
            0.8990052938461304,
            0.02772649936378002,
            0.042197681963443756,
            -0.0022736566606909037,
            -0.028843341395258904,
            -0.4559306204319,
            0.6326258778572083,
            0.4568879008293152,
            -0.4892531633377075,
            -0.032289132475852966,
            0.04378330707550049,
            -0.4118069112300873,
            0.2493579089641571,
            -0.021955665200948715,
            -0.01538186427205801,
            -0.21400974690914154,
            -0.09971866756677628,
            0.02185226045548916,
            -0.18125569820404053,
            -0.13828244805335999,
            -0.20846466720104218,
            -0.10373540222644806,
            0.4842098653316498,
            -0.06586655229330063,
            0.03369470313191414,
            0.013142148964107037,
            0.017437899485230446,
            0.15891534090042114,
            0.5269678831100464,
            0.02546108327805996,
            -0.004250233061611652,
            -5.8676625485531986e-05,
            0.06777831166982651,
            -0.14051207900047302,
            0.6876491904258728,
            -0.3455996811389923,
            0.0378129817545414,
            0.15291574597358704,
            -0.03829087316989899,
            -0.05761529877781868,
            -0.05344394966959953,
            0.1421334147453308,
            -0.3614322543144226,
            -0.21606910228729248,
            0.1558765172958374,
            0.14480257034301758,
            -0.1799984872341156,
            0.4238421618938446,
            -0.08961529284715652,
            -0.04010967165231705,
            0.14250615239143372,
            -0.0038367861416190863,
            -0.044531334191560745,
            -0.08958051353693008,
            -0.1577986180782318,
            -0.5795103907585144,
            -1.1048516035079956,
            0.16444185376167297,
            -0.09989812225103378,
            -0.26304998993873596,
            0.040687527507543564,
            0.065303735435009,
            -0.06267901510000229,
            0.08742637187242508,
            0.02480895072221756,
            0.23719966411590576,
            -0.09509539604187012,
            0.39278310537338257,
            0.18978112936019897,
            0.11301649361848831,
            -0.16268616914749146,
            -0.14119602739810944,
            -0.04518252611160278,
            0.10456270724534988,
            0.008367948234081268,
            0.004280170891433954,
            0.01894286274909973,
            -0.1547478288412094,
            0.197267547249794,
            0.20271208882331848,
            -0.28377917408943176,
            -0.26751258969306946,
            0.15954937040805817,
            0.33988064527511597,
            0.16848208010196686,
            0.11668887734413147,
            -0.057433612644672394,
            -0.049777109175920486,
            0.00744214653968811,
            -0.012330793775618076,
            -0.08413149416446686,
            -0.2053118497133255,
            0.09235486388206482,
            -0.1354941576719284,
            0.41610953211784363,
            0.8428494334220886,
            0.880882740020752,
            0.024029193446040154,
            -0.08453702926635742,
            0.00771496444940567,
            -0.013013732619583607,
            -0.23804998397827148,
            0.4110376536846161,
            0.23720477521419525,
            -0.13951541483402252,
            -0.1747516244649887,
            -0.34215790033340454,
            0.014357345178723335,
            0.34224632382392883,
            0.03783192113041878,
            0.01125166192650795,
            -0.08253959566354752,
            0.015717405825853348,
            -0.22759634256362915,
            0.3980898857116699,
            0.2427154779434204,
            -0.3319437801837921,
            0.11146843433380127,
            -0.9666317105293274,
            -0.12227121740579605,
            -0.1948898285627365,
            -0.030186548829078674,
            0.0011711223050951958,
            -0.040062546730041504,
            -0.16316139698028564,
            -0.14714862406253815,
            0.13224393129348755,
            -0.0019320327555760741,
            -0.09674090147018433,
            0.3630145490169525,
            -0.019513679668307304,
            -0.07729464769363403,
            -0.34592965245246887,
            0.15215164422988892,
            0.046678490936756134,
            0.06675180792808533,
            -0.08943335711956024,
            0.006386714521795511,
            0.10086977481842041,
            -0.07409387081861496,
            -0.19604018330574036,
            -0.042700666934251785,
            0.12124726921319962,
            0.5694677233695984,
            0.25033196806907654,
            0.01862989366054535,
            0.0053687929175794125,
            -0.0017405126709491014,
            -0.01638556271791458,
            -0.32222822308540344,
            0.5348804593086243,
            0.5546748042106628,
            1.2770946025848389,
            0.11648745834827423,
            -0.058405984193086624,
            -0.2997635006904602,
            -0.2040756195783615,
            0.15525077283382416,
            -0.12436354905366898,
            -0.089121975004673,
            0.06441225856542587,
            0.2444663643836975,
            -0.3495825529098511,
            -0.05243751034140587,
            0.08752834796905518,
            0.08800745010375977,
            -0.09807545691728592,
            -0.3823537230491638,
            -0.13047000765800476,
            0.029333092272281647,
            0.11618250608444214,
            -0.0638590008020401,
            -0.09598273783922195,
            -0.07390140742063522,
            0.09151650220155716,
            -0.1700282245874405,
            0.23608872294425964,
            0.24879834055900574,
            -0.15922772884368896,
            -0.33795130252838135,
            -0.053850702941417694,
            0.1014639139175415,
            -0.05480973795056343,
            -0.06753639131784439,
            0.04606246575713158,
            -0.07082260400056839,
            0.07848796248435974,
            0.05011916160583496,
            -0.05570689216256142,
            -0.14584510028362274,
            -0.8908579349517822,
            -0.5959509611129761,
            -0.8982105255126953,
            0.0788002535700798,
            -0.03575791418552399,
            0.052424680441617966,
            -0.08019822835922241,
            0.10848221182823181,
            0.0957408994436264,
            0.1457311511039734,
            -0.1956494003534317,
            -0.21669772267341614,
            0.9854136109352112,
            -0.23215851187705994,
            0.16359730064868927,
            0.02025810070335865,
            -0.08975380659103394,
            -0.013868067413568497,
            -0.22188447415828705,
            0.020666224882006645,
            -0.22304703295230865,
            0.06407633423805237,
            0.19804184138774872,
            -0.05285267159342766,
            -0.5510660409927368,
            -0.8522927761077881,
            -0.6061599850654602,
            0.08484024554491043,
            -0.08973539620637894,
            0.013228937052190304,
            -0.07834818214178085,
            0.02858446165919304,
            -0.3826225996017456,
            0.059726644307374954,
            0.1139102503657341,
            -0.19311848282814026,
            0.05770142376422882,
            0.22584261000156403,
            0.34312352538108826,
            -0.15085645020008087,
            0.34372228384017944,
            0.08070214092731476,
            0.5744000673294067,
            -0.08693907409906387,
            -0.003695777617394924,
            -0.1334235966205597,
            0.06418291479349136,
            0.02848576195538044,
            -0.34958112239837646,
            -0.3419312834739685,
            -0.09599799662828445,
            0.015022341161966324,
            0.03255023807287216,
            0.09713662415742874,
            -0.1730588674545288,
            0.1904430240392685,
            -0.32815566658973694,
            -0.16749203205108643,
            0.35736411809921265,
            -0.503787100315094,
            0.5057004690170288,
            -0.47198373079299927,
            0.11386436969041824,
            -0.0722493901848793,
            0.03358639404177666,
            0.005928087048232555,
            -0.05637047439813614,
            0.06552420556545258,
            -0.07283362001180649,
            -0.09314802289009094,
            0.13586974143981934,
            -0.5054865479469299,
            -0.18127793073654175,
            0.08853171765804291,
            -0.13333705067634583,
            -0.2623322308063507,
            0.17757390439510345,
            0.04408252611756325,
            -0.0277855321764946,
            -0.05175777152180672,
            0.40444689989089966,
            -0.03518976643681526,
            -0.36402902007102966,
            -0.019589770585298538,
            -0.05277400091290474,
            -0.27273234724998474,
            -0.07373850792646408,
            -0.058221735060214996,
            0.14292845129966736,
            -0.005004828795790672,
            -0.05554938316345215,
            0.20361287891864777,
            -0.30462127923965454,
            -0.1140812486410141,
            0.16081976890563965,
            -0.07133162021636963,
            -0.20463652908802032,
            0.34733739495277405,
            0.17099761962890625,
            0.025868643075227737,
            -0.02960631065070629,
            -0.02717636525630951,
            0.02027258090674877,
            -0.13165302574634552,
            0.36201152205467224,
            0.5002728700637817,
            0.39691421389579773,
            -0.04605599492788315,
            0.28801581263542175,
            -1.0140656232833862,
            -0.5481916666030884,
            0.0896061584353447,
            -0.049390073865652084,
            0.08813252300024033,
            -0.1784677952528,
            0.34480658173561096,
            -0.36402803659439087,
            0.16948284208774567,
            0.45740315318107605,
            -0.23747704923152924,
            0.580975353717804,
            -0.24338461458683014,
            -0.11410018056631088,
            0.06431885808706284,
            -0.0317281149327755,
            -0.024683356285095215,
            -0.10083278268575668,
            0.024547407403588295,
            -0.16270779073238373,
            -0.07757837325334549,
            0.19732129573822021,
            0.03790999948978424,
            -0.18804220855236053,
            0.8675169348716736,
            0.5377629399299622,
            -0.0036910742055624723,
            -0.0016441351035609841,
            -0.030448857694864273,
            0.07757671177387238,
            -0.1475408971309662,
            0.613543689250946,
            0.30266445875167847,
            0.12106148898601532,
            0.05485830456018448,
            -0.04748840630054474,
            -0.23233623802661896,
            -0.1949906051158905,
            0.05692804977297783,
            0.07474583387374878,
            -0.11879625171422958,
            0.07200933247804642,
            -0.012743310071527958,
            -0.02546215057373047,
            -0.3765566349029541,
            0.28637346625328064,
            -0.18051809072494507,
            0.5034835934638977,
            -0.34970414638519287,
            -0.2386687994003296,
            -0.03804561868309975,
            -0.03649319335818291,
            -0.10303670912981033,
            0.1299818456172943,
            0.24685724079608917,
            -0.34168556332588196,
            -0.086674265563488,
            0.32085898518562317,
            0.48488491773605347,
            -0.522548258304596,
            0.309568852186203,
            0.167385995388031,
            0.11308691650629044,
            0.14733079075813293,
            -0.22416195273399353,
            0.14763982594013214,
            -0.07242503017187119,
            0.07601745426654816,
            -0.10375087708234787,
            -0.03409396857023239,
            -0.35759225487709045,
            0.18936687707901,
            0.28248289227485657,
            0.26482364535331726,
            0.061123836785554886,
            -0.021603189408779144,
            -0.13469825685024261,
            0.07248867303133011,
            -0.03464066982269287,
            0.06557167321443558,
            0.16093865036964417,
            -0.1718607246875763,
        ],
        b1: &[
            -0.3893989324569702,
            -0.2791002690792084,
            0.07853052020072937,
            -0.4629746377468109,
            -0.7148261070251465,
            0.8680436015129089,
            -0.46459102630615234,
            0.0404132716357708,
            -0.44012945890426636,
            0.08434166759252548,
            0.32190972566604614,
            -0.20194832980632782,
            -0.3781348764896393,
            -0.23968002200126648,
            -0.581799328327179,
            0.6500483155250549,
            -0.6192854046821594,
            0.5922245383262634,
            0.44006091356277466,
            0.2982949912548065,
            0.6136102676391602,
            -0.597486138343811,
            -0.3697699308395386,
            -0.45241132378578186,
            0.60771644115448,
            -0.3373708128929138,
            0.5697194337844849,
            0.4784911870956421,
            -0.49601855874061584,
            0.5023709535598755,
            0.21592296659946442,
            -0.45412343740463257,
            0.5104787945747375,
            0.558862566947937,
            0.4729066491127014,
            -0.5520593523979187,
            -0.5120576620101929,
            -0.7157037258148193,
            0.12596718966960907,
            0.4773174524307251,
        ],
        w2: &[
            0.1379607617855072,
            0.09308824688196182,
            -0.2596932649612427,
            0.4461972713470459,
            0.3480601906776428,
            0.036684323102235794,
            0.4057384729385376,
            -0.3081648051738739,
            0.4561280608177185,
            0.2749394178390503,
            -0.1400817334651947,
            0.3145979046821594,
            -0.16919250786304474,
            0.7247185707092285,
            0.3479674756526947,
            -0.7546817064285278,
            0.38135531544685364,
            -0.3939172029495239,
            -0.038021210581064224,
            0.026914050802588463,
            -0.5281358361244202,
            0.39009571075439453,
            0.4090450406074524,
            0.5053343772888184,
            -0.23938016593456268,
            0.488080233335495,
            -0.38536468148231506,
            -0.23763014376163483,
            0.2661689519882202,
            -0.14746293425559998,
            -0.7541974186897278,
            0.27726081013679504,
            -0.4072169065475464,
            -0.8030230402946472,
            -0.386343389749527,
            0.6674754619598389,
            0.06677238643169403,
            0.5055669546127319,
            -0.44330647587776184,
            -0.3423362970352173,
            -0.10948927700519562,
            0.11290912330150604,
            -0.2759379744529724,
            0.5522158741950989,
            -0.5766478776931763,
            0.7288797497749329,
            -0.4967955946922302,
            -0.5466133952140808,
            0.7254890203475952,
            0.1274457424879074,
            0.3098924458026886,
            0.2524661719799042,
            -0.7162019610404968,
            0.19503603875637054,
            -0.5212412476539612,
            0.0968603864312172,
            0.4835629463195801,
            -0.5865079164505005,
            0.27647316455841064,
            0.1975109577178955,
            -0.845225989818573,
            0.4172143042087555,
            -0.014424118213355541,
            -0.24702520668506622,
            -0.16123531758785248,
            -0.047759659588336945,
            -0.09985388815402985,
            0.10430619865655899,
            0.53556889295578,
            0.2595883011817932,
            0.11729882657527924,
            0.36996161937713623,
            -0.41997936367988586,
            -0.3332042694091797,
            0.2527308464050293,
            0.6039140820503235,
            0.35183605551719666,
            0.42042237520217896,
            -0.2265913337469101,
            -0.06852111965417862,
            0.3749903440475464,
            0.3698897361755371,
            -0.43096107244491577,
            0.1275794953107834,
            0.27926334738731384,
            -0.3282606303691864,
            0.290679931640625,
            -0.14467079937458038,
            0.3357028663158417,
            -0.0683436468243599,
            -0.35492125153541565,
            -0.14275093376636505,
            -0.1504347324371338,
            0.1782987266778946,
            0.07464402168989182,
            -0.2788643538951874,
            0.5896115303039551,
            -0.314520001411438,
            -0.3235827684402466,
            -0.2899278700351715,
            -0.21264874935150146,
            0.41862159967422485,
            0.3237628936767578,
            0.2948566973209381,
            -0.6101413369178772,
            -0.025511808693408966,
            -0.4238346517086029,
            -0.28283095359802246,
            0.32077667117118835,
            -0.34138476848602295,
            -0.5257527232170105,
            0.24129967391490936,
            -0.38175472617149353,
            -0.20559589564800262,
            -0.11267697811126709,
            0.32475054264068604,
            0.29545050859451294,
            0.0010625360300764441,
            0.4097916781902313,
            -0.3120468556880951,
            0.3134985566139221,
            0.33620578050613403,
            -0.27408266067504883,
            -0.0118736382573843,
            0.21356475353240967,
            -0.6716119647026062,
            0.14166241884231567,
            0.020748334005475044,
            0.27158322930336,
            -0.27066248655319214,
            -0.5078546404838562,
            0.39642488956451416,
            0.4044502079486847,
            0.1363500952720642,
            0.38089585304260254,
            -0.18438327312469482,
            -0.08652642369270325,
            0.05718545988202095,
            -0.5758764743804932,
            0.0948563665151596,
            0.298057496547699,
            -0.07299521565437317,
            -0.24248233437538147,
            0.29135069251060486,
            -0.44556060433387756,
            0.6689074039459229,
            -0.12930674850940704,
            -0.12669484317302704,
            0.1074564978480339,
            -0.20472179353237152,
            0.14787982404232025,
            -0.13180267810821533,
            0.3045596182346344,
            -0.3345180153846741,
            -0.3405822217464447,
            0.22327540814876556,
            0.02809770777821541,
            0.17404714226722717,
            0.22873322665691376,
            -0.3915692865848541,
            -0.39005470275878906,
            -0.4675980806350708,
            0.44798821210861206,
            -0.31790846586227417,
            -0.21734853088855743,
            0.2172199934720993,
            -0.3485357165336609,
            0.1241735890507698,
            -0.6933310031890869,
            -0.09649480134248734,
            0.24731965363025665,
            -0.20421941578388214,
            0.13033808767795563,
            -0.4282769560813904,
            -0.22173112630844116,
            0.08912057429552078,
            -0.3927532434463501,
            0.3523387908935547,
            0.36073970794677734,
            -0.036902282387018204,
            0.5880261063575745,
            -0.29945725202560425,
            -0.40845751762390137,
            -0.3265145421028137,
            0.370391309261322,
            -0.3553546965122223,
            0.5133077502250671,
            0.1800842434167862,
            -0.34683868288993835,
            0.28811708092689514,
            0.3033837080001831,
            -0.4140017628669739,
            0.4362258017063141,
            0.3689269423484802,
            0.3121638596057892,
            -0.3287503123283386,
            -0.15226924419403076,
            -0.17191028594970703,
            -0.10683685541152954,
            0.34219542145729065,
            0.34955963492393494,
            0.22892920672893524,
            -0.20123478770256042,
            -0.3934169411659241,
            0.25449705123901367,
            -0.541163444519043,
            0.21640898287296295,
            0.19343338906764984,
            -0.14020974934101105,
            0.010480044409632683,
            -0.24229897558689117,
            -0.4682120084762573,
            0.02336042746901512,
            0.039344485849142075,
            0.42446646094322205,
            -0.3173693120479584,
            0.23609045147895813,
            0.20335273444652557,
            -0.19347436726093292,
            -0.05698636546730995,
            0.17990583181381226,
            0.30915674567222595,
            0.3115670382976532,
            0.4147215485572815,
            -0.38558056950569153,
            -0.12379863113164902,
            0.025996098294854164,
            -0.3010733425617218,
            0.03275908902287483,
            -0.6039671897888184,
            0.06267470866441727,
            -0.012677585706114769,
            0.3484704792499542,
            0.24301587045192719,
            -0.40881243348121643,
            -0.16732162237167358,
            0.190901979804039,
            -0.5619192719459534,
            0.30009278655052185,
            -0.43359509110450745,
            0.26643550395965576,
            0.5083268880844116,
            0.3491555452346802,
            0.4731655716896057,
            0.6301924586296082,
            -0.8111121654510498,
            0.6473397016525269,
            -0.001451796037144959,
            0.3649038076400757,
            -0.6002859473228455,
            -0.41925248503685,
            0.05584913119673729,
            0.7823511362075806,
            0.421135276556015,
            0.5779385566711426,
            -0.49475061893463135,
            0.5293950438499451,
            -0.45432502031326294,
            -0.680946946144104,
            -0.3506624102592468,
            -0.21028658747673035,
            0.4775547385215759,
            0.25049126148223877,
            0.2707470655441284,
            -0.3469635546207428,
            0.5959001779556274,
            -0.5623777508735657,
            -0.6334168910980225,
            0.4096938669681549,
            -0.3921370208263397,
            -0.27649807929992676,
            0.4424516260623932,
            -0.28308066725730896,
            -0.22009265422821045,
            -0.386872798204422,
            0.5130718350410461,
            0.5702601075172424,
            0.7469420433044434,
            -0.09606175124645233,
            -0.4271978437900543,
        ],
        b2: &[
            -0.07522959262132645,
            0.3644154667854309,
            -0.25166040658950806,
            -0.12973527610301971,
            0.25026997923851013,
            -0.2794199585914612,
            -0.17614373564720154,
        ],
    }
}


================================================
FILE: linalg/src/arm32/cortex_a9.txt
================================================
armv7neon_mmm_f32_8x6_generic 17 128 19 0.00006235573582381347
armv7neon_mmm_f32_8x4_cortexa7 23 32 3 0.000006021597781788675
armv7neon_mmm_f32_8x6_cortexa7 17 128 7 0.000041163109831630036
armv7neon_mmm_f32_8x6_generic 9 4 5 0.0000020753617625129768
generic_f32_4x4 13 4 3 0.000003220368712907131
armv7neon_mmm_f32_8x4_cortexa7 9 128 3 0.000011219671010907719
armv7neon_mmm_f32_8x6_generic 24 4 12 0.00000416032880372066
armv7neon_mmm_f32_8x6_cortexa9 15 128 12 0.00002134037524275856
armv7neon_mmm_f32_8x4_cortexa7 16 32 7 0.000007487730700761545
armv7neon_mmm_f32_8x4_generic 25 128 4 0.000015723210051937644
armv7neon_mmm_f32_8x6_cortexa9 15 128 13 0.000031729639665247244
armv7neon_mmm_f32_8x4_cortexa7 17 128 5 0.00003233807616782481
generic_f32_4x4 5 4 9 0.000004114340189363069
armv7neon_mmm_f32_8x4_cortexa9 7 4 12 0.0000026805076801341797
armv7neon_mmm_f32_8x6_cortexa7 8 4 7 0.0000020191503624854738
generic_f32_4x4 5 32 4 0.0000030555103653558445
armv7neon_mmm_f32_8x4_cortexa7 8 128 12 0.000016128094616247412
generic_f32_4x4 13 128 8 0.000029057855790622486
armv7neon_mmm_f32_8x6_cortexa7 23 128 5 0.00002125083853467516
generic_f32_4x4 5 128 4 0.000007724128084704853
generic_f32_4x4 9 128 5 0.000022305019807747277
armv7neon_mmm_f32_8x6_generic 23 32 11 0.000011268391904458938
armv7neon_mmm_f32_8x4_generic 7 4 12 0.0000026796237449338948
armv7neon_mmm_f32_8x4_cortexa9 8 32 11 0.000004490236277399054
armv7neon_mmm_f32_8x6_cortexa7 23 4 7 0.000005431543985839428
armv7neon_mmm_f32_8x4_cortexa9 7 32 11 0.000004893302315959357
armv7neon_mmm_f32_8x6_generic 17 4 6 0.0000024671248311817606
armv7neon_mmm_f32_8x6_cortexa7 7 4 7 0.0000023786534435590155
armv7neon_mmm_f32_8x6_cortexa9 23 32 7 0.000011142179078429717
armv7neon_mmm_f32_8x6_cortexa7 23 128 18 0.00006187753967619513
armv7neon_mmm_f32_8x4_cortexa9 17 4 13 0.000007583246734362711
armv7neon_mmm_f32_8x4_cortexa7 25 32 8 0.000013742616132230942
armv7neon_mmm_f32_8x4_cortexa9 15 128 13 0.00003174823725370825
armv7neon_mmm_f32_8x6_cortexa7 8 32 17 0.0000068525292311713665
armv7neon_mmm_f32_8x4_generic 23 4 12 0.000005946433902061308
armv7neon_mmm_f32_8x4_generic 7 32 12 0.000004913908866329809
armv7neon_mmm_f32_8x4_generic 9 4 5 0.0000029635862825605913
armv7neon_mmm_f32_8x6_generic 24 128 7 0.00003097026633557495
armv7neon_mmm_f32_8x4_generic 24 4 13 0.000007343587112941961
armv7neon_mmm_f32_8x4_generic 23 4 8 0.000004129384710709428
generic_f32_4x4 11 128 8 0.00002206478635932785
armv7neon_mmm_f32_8x4_generic 8 4 9 0.000002203244153911471
armv7neon_mmm_f32_8x4_cortexa9 17 32 3 0.000004810722960669039
armv7neon_mmm_f32_8x4_cortexa7 7 128 12 0.000016707563395180685
armv7neon_mmm_f32_8x6_cortexa7 7 4 12 0.000002638802872509435
armv7neon_mmm_f32_8x4_cortexa7 17 128 9 0.0000480527815279415
armv7neon_mmm_f32_8x4_cortexa7 7 128 7 0.000011282871511736955
armv7neon_mmm_f32_8x4_cortexa9 16 4 5 0.000002910382588603331
armv7neon_mmm_f32_8x4_generic 17 128 13 0.00004671426916376633
armv7neon_mmm_f32_8x4_cortexa7 9 128 8 0.00002150638840328972
armv7neon_mmm_f32_8x4_cortexa9 25 32 12 0.000015994893141833952
armv7neon_mmm_f32_8x4_cortexa7 23 32 11 0.000016244020387064702
armv7neon_mmm_f32_8x6_cortexa9 7 4 11 0.000002387141196478666
armv7neon_mmm_f32_8x6_cortexa7 7 4 5 0.0000013875129728218855
armv7neon_mmm_f32_8x4_cortexa7 15 32 4 0.0000039730943727200555
armv7neon_mmm_f32_8x4_cortexa7 24 32 3 0.000006034875853921332
armv7neon_mmm_f32_8x4_cortexa9 8 4 5 0.0000016776748569042105
armv7neon_mmm_f32_8x6_cortexa7 16 4 18 0.0000045013629937264245
armv7neon_mmm_f32_8x4_cortexa9 8 128 8 0.000008024678845849025
armv7neon_mmm_f32_8x6_cortexa9 25 4 18 0.000008301730396887936
armv7neon_mmm_f32_8x4_generic 7 32 11 0.000004892898505691065
armv7neon_mmm_f32_8x4_cortexa9 16 128 4 0.000008030995808345325
armv7neon_mmm_f32_8x4_cortexa7 16 32 3 0.000004194758783711011
armv7neon_mmm_f32_8x4_generic 25 32 3 0.000006291923472603681
generic_f32_4x4 3 32 13 0.000005952451348897524
armv7neon_mmm_f32_8x6_cortexa9 23 32 19 0.000021623159632105592
armv7neon_mmm_f32_8x4_generic 9 4 7 0.0000030054177268153855
armv7neon_mmm_f32_8x4_cortexa9 7 128 3 0.0000044229477614846184
armv7neon_mmm_f32_8x4_generic 25 32 12 0.0000159741241286288
armv7neon_mmm_f32_8x6_generic 8 32 5 0.000002351046891403544
armv7neon_mmm_f32_8x6_generic 8 128 17 0.00001572220287732098
armv7neon_mmm_f32_8x6_cortexa9 25 4 13 0.00000878947090986632
armv7neon_mmm_f32_8x6_cortexa7 9 128 18 0.00004088792544048485
armv7neon_mmm_f32_8x4_generic 9 128 8 0.000015874731663155136
armv7neon_mmm_f32_8x4_generic 16 128 5 0.000015914646023810615
armv7neon_mmm_f32_8x6_cortexa7 17 32 18 0.000019117240551030633
armv7neon_mmm_f32_8x4_cortexa9 7 4 11 0.0000026740706182831685
armv7neon_mmm_f32_8x4_cortexa7 9 128 13 0.00004271080548146737
armv7neon_mmm_f32_8x6_generic 16 128 11 0.000020976534312868846
armv7neon_mmm_f32_8x4_cortexa7 16 128 7 0.00002177826510527881
armv7neon_mmm_f32_8x4_generic 15 32 7 0.000006163372672989967
armv7neon_mmm_f32_8x4_generic 9 128 11 0.000023697623425846144
armv7neon_mmm_f32_8x6_cortexa7 16 4 6 0.0000018480490329829836
armv7neon_mmm_f32_8x4_cortexa9 25 4 7 0.00000554548266542983
armv7neon_mmm_f32_8x6_generic 15 128 13 0.00003163198682165344
armv7neon_mmm_f32_8x6_cortexa7 25 32 18 0.000025184955719578352
armv7neon_mmm_f32_8x6_generic 8 4 6 0.000001113907974305815
armv7neon_mmm_f32_8x6_cortexa7 16 32 5 0.0000050492542920704264
armv7neon_mmm_f32_8x6_generic 8 32 19 0.000007097287986832066
armv7neon_mmm_f32_8x6_cortexa9 25 32 5 0.000007806390304931795
armv7neon_mmm_f32_8x6_cortexa9 15 128 11 0.000021399956173572563
armv7neon_mmm_f32_8x6_generic 16 32 6 0.000003721529767067075
armv7neon_mmm_f32_8x4_cortexa7 17 32 5 0.000010843745704621824
armv7neon_mmm_f32_8x4_generic 7 128 7 0.00000843066632668338
armv7neon_mmm_f32_8x4_generic 23 32 13 0.000016850629087203773
armv7neon_mmm_f32_8x4_cortexa9 16 32 9 0.000008424770761452617
armv7neon_mmm_f32_8x4_cortexa9 15 32 13 0.000011652714145510248
armv7neon_mmm_f32_8x4_generic 23 128 5 0.000023910719482642124
armv7neon_mmm_f32_8x4_cortexa9 23 128 3 0.000012498528953898262
armv7neon_mmm_f32_8x4_generic 16 32 8 0.000005593957529271591
armv7neon_mmm_f32_8x6_cortexa7 7 128 13 0.000021419187824431687
armv7neon_mmm_f32_8x6_cortexa9 7 128 6 0.000005900049478814555
armv7neon_mmm_f32_8x4_cortexa7 24 32 11 0.000015824034451655578
armv7neon_mmm_f32_8x6_cortexa7 16 4 17 0.000004987657461648502
armv7neon_mmm_f32_8x4_cortexa7 23 32 8 0.000010735439695395534
armv7neon_mmm_f32_8x4_cortexa9 25 4 5 0.000005392929349101573
armv7neon_mmm_f32_8x4_generic 25 4 5 0.000005418182338760064
armv7neon_mmm_f32_8x4_generic 7 128 9 0.000012382199177609924
armv7neon_mmm_f32_8x4_cortexa9 9 32 9 0.000008579188782470308
armv7neon_mmm_f32_8x4_generic 8 128 12 0.000011801915167566902
armv7neon_mmm_f32_8x4_cortexa9 23 4 8 0.0000041152642927900605
armv7neon_mmm_f32_8x4_generic 7 4 11 0.0000026764041239252324
generic_f32_4x4 13 128 12 0.00004319526080896362
generic_f32_4x4 5 32 5 0.000005725142687375275
generic_f32_4x4 5 128 12 0.00002200938035136394
armv7neon_mmm_f32_8x4_generic 23 4 5 0.000004369620528795486
armv7neon_mmm_f32_8x6_generic 9 4 11 0.000003424705713246475
armv7neon_mmm_f32_8x6_generic 17 32 12 0.000010395428642618952
generic_f32_4x4 13 32 11 0.000015867743214986118
armv7neon_mmm_f32_8x6_cortexa9 9 32 17 0.000010850143335646025
armv7neon_mmm_f32_8x6_cortexa9 23 4 6 0.0000028251957000612876
armv7neon_mmm_f32_8x6_cortexa9 9 128 7 0.000020914788448206477
armv7neon_mmm_f32_8x4_cortexa7 15 128 11 0.000032581738066809436
armv7neon_mmm_f32_8x4_cortexa9 25 128 3 0.000016370587474295946
armv7neon_mmm_f32_8x4_cortexa7 23 4 4 0.0000024826703257808083
armv7neon_mmm_f32_8x4_generic 9 128 4 0.000008108848864529598
armv7neon_mmm_f32_8x6_cortexa9 25 128 5 0.00002136490649839919
armv7neon_mmm_f32_8x6_generic 23 128 19 0.00006253554336626349
generic_f32_4x4 12 4 12 0.000005236763691820686
armv7neon_mmm_f32_8x6_cortexa9 8 4 7 0.0000019309843288986318
armv7neon_mmm_f32_8x4_cortexa7 15 4 5 0.000003357403897484634
armv7neon_mmm_f32_8x6_cortexa9 24 4 6 0.0000024072433754401144
armv7neon_mmm_f32_8x6_cortexa7 25 128 7 0.000054619400677372974
armv7neon_mmm_f32_8x6_generic 7 4 19 0.000004265433096040261
generic_f32_4x4 7 4 12 0.0000041235655802886916
armv7neon_mmm_f32_8x6_generic 17 128 17 0.000046385914441781564
armv7neon_mmm_f32_8x4_generic 8 32 7 0.0000032122117166780275
armv7neon_mmm_f32_8x4_cortexa9 16 4 7 0.0000030113926906216977
generic_f32_4x4 13 128 13 0.00005791151370731608
armv7neon_mmm_f32_8x4_cortexa7 9 32 9 0.000010771893810221605
armv7neon_mmm_f32_8x4_cortexa7 23 4 7 0.000004820473947807521
armv7neon_mmm_f32_8x4_cortexa9 24 4 13 0.00000731015216867438
generic_f32_4x4 3 4 5 0.0000018607872137198219
armv7neon_mmm_f32_8x4_generic 15 32 9 0.000008854455084657494
armv7neon_mmm_f32_8x6_cortexa9 15 32 6 0.000004174355801441308
armv7neon_mmm_f32_8x6_cortexa9 16 4 5 0.000002250377451226225
armv7neon_mmm_f32_8x6_cortexa9 16 32 19 0.000013757586520904355
armv7neon_mmm_f32_8x6_cortexa9 24 128 11 0.000031276431090202235
armv7neon_mmm_f32_8x4_generic 25 128 7 0.00003164406261833124
armv7neon_mmm_f32_8x4_generic 24 4 4 0.0000020584132727266
generic_f32_4x4 7 128 11 0.000022350186275600858
armv7neon_mmm_f32_8x4_cortexa9 23 4 4 0.0000022753692017814563
armv7neon_mmm_f32_8x6_cortexa9 17 32 19 0.00002069369907930334
generic_f32_4x4 4 128 13 0.00001478326569151377
armv7neon_mmm_f32_8x6_cortexa9 23 32 18 0.00001626278262596922
armv7neon_mmm_f32_8x6_generic 7 128 18 0.000016667585395966858
armv7neon_mmm_f32_8x4_cortexa9 8 128 3 0.000004452778028984887
armv7neon_mmm_f32_8x4_cortexa7 16 128 13 0.000042499574729319346
generic_f32_4x4 3 32 11 0.000004620575096918176
generic_f32_4x4 4 4 8 0.0000015603089093623964
armv7neon_mmm_f32_8x6_cortexa9 15 32 7 0.00000772763531212548
armv7neon_mmm_f32_8x4_cortexa7 9 4 11 0.000004505536773333099
armv7neon_mmm_f32_8x6_cortexa9 15 32 18 0.000011416109977905379
armv7neon_mmm_f32_8x4_cortexa7 24 128 12 0.00004730343110835064
armv7neon_mmm_f32_8x4_cortexa9 8 4 7 0.000001726865000473268
armv7neon_mmm_f32_8x4_cortexa9 17 4 12 0.00000554999550214869
armv7neon_mmm_f32_8x4_cortexa9 24 32 3 0.000004919761516416024
armv7neon_mmm_f32_8x4_cortexa7 7 4 7 0.0000020327105105148146
generic_f32_4x4 8 32 11 0.000008129741741591272
armv7neon_mmm_f32_8x6_cortexa9 17 128 6 0.000015685032106978185
armv7neon_mmm_f32_8x6_cortexa7 24 4 13 0.0000069628322548194484
generic_f32_4x4 11 32 7 0.000008418658676221946
armv7neon_mmm_f32_8x4_cortexa9 23 32 13 0.000016859244394935546
armv7neon_mmm_f32_8x6_cortexa7 15 4 19 0.000007361880271251662
armv7neon_mmm_f32_8x6_cortexa7 25 128 19 0.00010862057908423823
generic_f32_4x4 7 128 5 0.00001508121452285347
armv7neon_mmm_f32_8x4_generic 8 4 12 0.0000020475145659989298
armv7neon_mmm_f32_8x6_generic 23 32 6 0.000005742234510888845
armv7neon_mmm_f32_8x6_cortexa9 8 32 7 0.000003926599359988243
armv7neon_mmm_f32_8x4_cortexa7 16 32 5 0.00000738840656931522
generic_f32_4x4 3 128 5 0.000007875053042737703
armv7neon_mmm_f32_8x6_cortexa9 9 128 5 0.000010860557674890965
armv7neon_mmm_f32_8x6_generic 15 32 11 0.000007797932920988772
generic_f32_4x4 8 4 3 0.0000019148150797610994
armv7neon_mmm_f32_8x4_generic 23 32 9 0.000012836974653040903
armv7neon_mmm_f32_8x4_cortexa7 25 4 3 0.0000035757992765103294
armv7neon_mmm_f32_8x4_generic 7 32 8 0.0000034181328210189654
armv7neon_mmm_f32_8x6_cortexa9 25 32 7 0.000014158102081231813
armv7neon_mmm_f32_8x6_generic 9 4 18 0.000004486229240869606
armv7neon_mmm_f32_8x4_cortexa7 8 4 4 0.0000010559381010362017
armv7neon_mmm_f32_8x4_cortexa9 25 32 3 0.000006292048691193805
armv7neon_mmm_f32_8x4_cortexa9 24 4 11 0.000005847758703659802
armv7neon_mmm_f32_8x4_cortexa9 7 128 12 0.00001242120677392976
armv7neon_mmm_f32_8x4_cortexa7 16 128 9 0.00003210848238315819
armv7neon_mmm_f32_8x4_cortexa9 24 128 7 0.00002382518082369683
armv7neon_mmm_f32_8x4_generic 24 32 3 0.0000049198649021802165
armv7neon_mmm_f32_8x4_cortexa9 25 4 11 0.000007781278370753281
armv7neon_mmm_f32_8x6_cortexa7 17 32 12 0.000012940679584474003
armv7neon_mmm_f32_8x6_cortexa7 16 32 17 0.000013190821660739104
armv7neon_mmm_f32_8x6_cortexa7 25 4 19 0.000012097338566670792
armv7neon_mmm_f32_8x6_generic 16 32 18 0.000010138504593611297
armv7neon_mmm_f32_8x4_generic 9 128 5 0.000015947315101595927
armv7neon_mmm_f32_8x6_cortexa9 23 4 5 0.0000031081830446744932
armv7neon_mmm_f32_8x6_cortexa9 24 128 7 0.00003103549849311001
armv7neon_mmm_f32_8x4_cortexa7 25 4 13 0.000010646545231619916
generic_f32_4x4 9 128 12 0.00003260110763311798
armv7neon_mmm_f32_8x6_cortexa9 23 4 17 0.000007631985122319121
armv7neon_mmm_f32_8x4_cortexa9 8 128 13 0.00001574791956845722
armv7neon_mmm_f32_8x6_cortexa9 23 4 18 0.000007303182081264746
armv7neon_mmm_f32_8x4_cortexa7 24 128 13 0.00006349168486923089
armv7neon_mmm_f32_8x4_generic 23 32 11 0.000012994166844502801
generic_f32_4x4 7 32 4 0.0000031041482961797043
armv7neon_mmm_f32_8x4_cortexa9 9 32 7 0.000005959447070414145
armv7neon_mmm_f32_8x4_cortexa7 23 128 11 0.00004845207607490223
armv7neon_mmm_f32_8x6_cortexa7 17 4 11 0.0000052419075338197185
generic_f32_4x4 8 4 13 0.00000500809427570026
armv7neon_mmm_f32_8x4_generic 24 32 7 0.000008731713037367055
armv7neon_mmm_f32_8x6_cortexa9 25 128 13 0.00006164900543131858
armv7neon_mmm_f32_8x6_cortexa9 17 4 7 0.000004817790997463295
armv7neon_mmm_f32_8x6_cortexa9 8 32 19 0.000007141501791335855
armv7neon_mmm_f32_8x4_cortexa9 16 4 4 0.000001530544742707957
armv7neon_mmm_f32_8x6_generic 25 4 17 0.000008895187762533163
armv7neon_mmm_f32_8x4_cortexa7 23 4 3 0.0000028668169166829235
armv7neon_mmm_f32_8x6_cortexa7 24 128 11 0.00004128096933839809
generic_f32_4x4 7 4 13 0.000005447538386200191
armv7neon_mmm_f32_8x6_cortexa9 9 128 17 0.0000311696244066944
armv7neon_mmm_f32_8x6_cortexa7 25 4 17 0.000009561336620584211
armv7neon_mmm_f32_8x4_generic 16 128 8 0.000015609467528565242
armv7neon_mmm_f32_8x6_cortexa9 7 4 6 0.0000015274572782560943
armv7neon_mmm_f32_8x6_cortexa9 7 4 19 0.000004327266570693739
generic_f32_4x4 4 128 3 0.0000042186208925339794
armv7neon_mmm_f32_8x6_cortexa9 16 32 6 0.000003783089782648409
armv7neon_mmm_f32_8x4_generic 24 4 12 0.000005240725524660556
armv7neon_mmm_f32_8x6_generic 24 4 5 0.000003075012528242585
armv7neon_mmm_f32_8x4_cortexa7 16 128 3 0.000011341419957358612
generic_f32_4x4 13 32 7 0.000010885101829856154
generic_f32_4x4 8 128 13 0.00002903953507476824
armv7neon_mmm_f32_8x6_cortexa9 7 32 12 0.000004528441801230068
armv7neon_mmm_f32_8x4_cortexa9 8 128 9 0.000011968397967338213
generic_f32_4x4 9 4 11 0.000005879906070039223
armv7neon_mmm_f32_8x4_generic 9 128 9 0.000023684049431562338
armv7neon_mmm_f32_8x6_cortexa7 23 128 19 0.00008252903643379173
armv7neon_mmm_f32_8x6_generic 23 128 7 0.000031381294348434004
armv7neon_mmm_f32_8x4_generic 15 4 13 0.000005669760296114337
armv7neon_mmm_f32_8x4_generic 15 4 11 0.000004507200935249084
armv7neon_mmm_f32_8x4_generic 15 4 3 0.0000019251390208258458
generic_f32_4x4 9 32 7 0.00000832450452449881
generic_f32_4x4 9 4 9 0.000005828318767488796
armv7neon_mmm_f32_8x4_cortexa7 16 4 3 0.000002093301960254744
armv7neon_mmm_f32_8x4_generic 15 128 12 0.000023812063445324994
armv7neon_mmm_f32_8x6_cortexa7 7 4 19 0.000004468409864573403
generic_f32_4x4 8 4 7 0.000002955604897568221
armv7neon_mmm_f32_8x6_cortexa9 9 32 12 0.000007238338427088594
armv7neon_mmm_f32_8x6_generic 16 32 12 0.00000694068903036478
armv7neon_mmm_f32_8x6_cortexa7 25 128 6 0.000027308370544312844
armv7neon_mmm_f32_8x6_cortexa9 24 4 17 0.000006795821587814609
armv7neon_mmm_f32_8x6_cortexa7 15 128 12 0.00002803261400606977
generic_f32_4x4 9 32 8 0.000008044423595514706
armv7neon_mmm_f32_8x4_generic 23 128 9 0.00003548379543389711
armv7neon_mmm_f32_8x4_cortexa7 17 32 3 0.000005926615429744039
armv7neon_mmm_f32_8x4_generic 8 128 7 0.000008235490634332163
armv7neon_mmm_f32_8x4_cortexa7 9 4 12 0.000004255018272123806
armv7neon_mmm_f32_8x4_cortexa9 16 32 11 0.000008524184894246237
armv7neon_mmm_f32_8x4_cortexa9 24 32 4 0.000004264845803875226
generic_f32_4x4 3 4 11 0.000002555428269031667
armv7neon_mmm_f32_8x4_cortexa7 24 4 4 0.0000022261643758837446
armv7neon_mmm_f32_8x4_cortexa7 15 128 3 0.000011306220407671872
armv7neon_mmm_f32_8x6_generic 8 4 18 0.000002337607386621077
armv7neon_mmm_f32_8x4_cortexa9 25 4 4 0.0000026788269418985845
generic_f32_4x4 11 4 7 0.000004308040885452502
armv7neon_mmm_f32_8x4_cortexa9 15 128 4 0.000008223517905022214
generic_f32_4x4 5 32 11 0.000008288284199113315
armv7neon_mmm_f32_8x6_cortexa9 9 4 12 0.0000032467302759009237
armv7neon_mmm_f32_8x4_generic 24 128 7 0.00002382735987416326
armv7neon_mmm_f32_8x6_generic 24 32 13 0.000015487434262812744
armv7neon_mmm_f32_8x6_generic 17 128 13 0.00004629753684559049
armv7neon_mmm_f32_8x6_cortexa7 24 128 13 0.00006117633525152988
armv7neon_mmm_f32_8x6_generic 24 4 19 0.000008326315174295047
armv7neon_mmm_f32_8x6_cortexa7 17 32 7 0.000013248073815175261
armv7neon_mmm_f32_8x4_cortexa9 8 4 9 0.0000022033600613983073
generic_f32_4x4 13 4 12 0.000007132280989099253
armv7neon_mmm_f32_8x4_cortexa7 16 128 4 0.000010902014380146801
armv7neon_mmm_f32_8x6_cortexa7 9 4 6 0.0000019491119088955803
generic_f32_4x4 3 4 7 0.0000019042920757312053
armv7neon_mmm_f32_8x4_generic 7 32 5 0.0000033764800411714497
armv7neon_mmm_f32_8x6_cortexa7 23 4 19 0.000010215694576435441
armv7neon_mmm_f32_8x4_generic 17 4 3 0.0000025969094160852644
generic_f32_4x4 5 4 11 0.0000041349188443565096
armv7neon_mmm_f32_8x6_cortexa7 9 32 17 0.000013295032359470915
armv7neon_mmm_f32_8x4_cortexa7 15 128 13 0.00004313466302520057
armv7neon_mmm_f32_8x4_cortexa9 15 4 8 0.0000030055981545968782
armv7neon_mmm_f32_8x6_generic 16 4 12 0.0000029570565051151263
armv7neon_mmm_f32_8x6_cortexa9 8 4 11 0.000002014524477563071
armv7neon_mmm_f32_8x4_cortexa9 8 4 3 0.0000012065261711370315
armv7neon_mmm_f32_8x4_cortexa7 7 32 7 0.000004136959156912223
armv7neon_mmm_f32_8x4_generic 17 4 9 0.000005894535633035915
generic_f32_4x4 9 4 13 0.00000752174902729491
armv7neon_mmm_f32_8x6_cortexa9 7 32 5 0.000002364950726746234
generic_f32_4x4 13 32 4 0.000005521387429490463
armv7neon_mmm_f32_8x4_generic 8 128 3 0.000004474358471860054
armv7neon_mmm_f32_8x4_cortexa7 23 4 12 0.0000063640165444956975
armv7neon_mmm_f32_8x4_generic 7 32 4 0.0000019290673256470733
armv7neon_mmm_f32_8x4_generic 17 32 12 0.000012180617018748219
armv7neon_mmm_f32_8x6_generic 17 4 11 0.000004899442085076861
armv7neon_mmm_f32_8x4_cortexa7 24 128 7 0.000032404931744711066
armv7neon_mmm_f32_8x4_cortexa9 16 32 12 0.000008099332243893017
armv7neon_mmm_f32_8x6_generic 9 128 18 0.00003081307981321012
armv7neon_mmm_f32_8x6_generic 15 128 11 0.00002132277281707897
armv7neon_mmm_f32_8x6_cortexa7 7 128 6 0.000007540283032400861
armv7neon_mmm_f32_8x6_generic 25 32 12 0.000013597212519317418
armv7neon_mmm_f32_8x6_generic 23 4 17 0.000007516814516170969
armv7neon_mmm_f32_8x6_cortexa7 24 32 11 0.000013420883457543118
generic_f32_4x4 9 32 11 0.000012087233281631852
armv7neon_mmm_f32_8x6_cortexa7 24 4 17 0.000007204721001644005
armv7neon_mmm_f32_8x6_cortexa7 25 32 7 0.000017455571757923716
armv7neon_mmm_f32_8x4_cortexa9 23 32 11 0.000012992255106920675
armv7neon_mmm_f32_8x4_cortexa9 9 4 9 0.000004090278120441601
armv7neon_mmm_f32_8x4_cortexa7 23 32 13 0.000021193261455653347
armv7neon_mmm_f32_8x6_cortexa7 24 32 19 0.00002532279550192562
armv7neon_mmm_f32_8x4_generic 25 128 11 0.00004692401023814237
armv7neon_mmm_f32_8x4_cortexa9 17 32 7 0.000008780640229470475
armv7neon_mmm_f32_8x4_cortexa7 9 128 9 0.00003233195046796576
armv7neon_mmm_f32_8x4_cortexa7 23 128 5 0.000032464808470770534
armv7neon_mmm_f32_8x4_cortexa9 9 4 5 0.00000293936322263601
armv7neon_mmm_f32_8x6_cortexa7 25 32 6 0.000008732005752739124
armv7neon_mmm_f32_8x6_cortexa9 8 4 5 0.0000013801093771363888
armv7neon_mmm_f32_8x6_cortexa7 16 128 6 0.000013837384210811002
armv7neon_mmm_f32_8x4_generic 23 32 8 0.000008530859801735756
armv7neon_mmm_f32_8x4_cortexa7 17 4 8 0.000004162931637169246
armv7neon_mmm_f32_8x6_generic 7 32 13 0.000006254936643865642
armv7neon_mmm_f32_8x6_cortexa7 9 32 7 0.00000901443584736606
armv7neon_mmm_f32_8x4_cortexa7 15 4 7 0.0000034347738366310684
generic_f32_4x4 9 32 13 0.000015744382160735945
armv7neon_mmm_f32_8x6_generic 9 32 18 0.000010478111403829218
armv7neon_mmm_f32_8x6_cortexa9 23 128 11 0.00003166377905937062
armv7neon_mmm_f32_8x4_generic 15 128 7 0.000016210803535229283
armv7neon_mmm_f32_8x4_cortexa7 23 4 9 0.000006649300631510142
armv7neon_mmm_f32_8x4_generic 17 32 3 0.000004810545788546653
armv7neon_mmm_f32_8x6_generic 24 32 12 0.000010144077161084113
armv7neon_mmm_f32_8x4_cortexa9 15 32 7 0.000006152877670399033
armv7neon_mmm_f32_8x4_cortexa9 23 4 12 0.000005900735151833346
armv7neon_mmm_f32_8x4_cortexa7 16 32 4 0.0000037548262311678015
armv7neon_mmm_f32_8x6_cortexa7 23 4 18 0.000007713970848848978
armv7neon_mmm_f32_8x6_cortexa7 16 32 6 0.000004565838055826264
armv7neon_mmm_f32_8x4_cortexa9 25 32 5 0.000011311149331127226
armv7neon_mmm_f32_8x6_cortexa9 16 128 18 0.00003053013458613619
armv7neon_mmm_f32_8x6_cortexa7 15 32 17 0.000013972664044561828
armv7neon_mmm_f32_8x4_generic 16 32 4 0.000003010794297325924
armv7neon_mmm_f32_8x6_generic 7 128 7 0.000011021195155003848
generic_f32_4x4 4 32 13 0.000005510533372512351
armv7neon_mmm_f32_8x4_cortexa9 16 32 7 0.000005969619454408248
armv7neon_mmm_f32_8x4_cortexa9 24 32 12 0.000011900133300984794
armv7neon_mmm_f32_8x4_cortexa7 8 128 3 0.000005902896580930324
armv7neon_mmm_f32_8x4_generic 16 32 3 0.000003439802295093395
armv7neon_mmm_f32_8x4_cortexa7 17 32 4 0.000005503311716430611
armv7neon_mmm_f32_8x6_generic 9 32 12 0.000007171024246490819
armv7neon_mmm_f32_8x6_cortexa7 23 32 5 0.000007315917802939159
armv7neon_mmm_f32_8x6_cortexa7 23 128 12 0.0000413645417403771
armv7neon_mmm_f32_8x4_generic 16 32 7 0.000005981518355563633
generic_f32_4x4 7 4 7 0.000003097228954014147
armv7neon_mmm_f32_8x6_cortexa7 24 4 18 0.000006499393673193748
armv7neon_mmm_f32_8x6_cortexa9 24 4 7 0.000004723972074276078
armv7neon_mmm_f32_8x4_cortexa7 16 32 11 0.00001075379201180745
generic_f32_4x4 5 32 8 0.000005591832396338314
armv7neon_mmm_f32_8x6_cortexa7 7 4 17 0.000003519015871119777
armv7neon_mmm_f32_8x6_generic 25 4 13 0.000008668362849414556
armv7neon_mmm_f32_8x4_cortexa7 15 128 4 0.000011117502498464982
armv7neon_mmm_f32_8x6_cortexa7 15 4 17 0.000005776336577667951
armv7neon_mmm_f32_8x4_cortexa9 8 32 13 0.000005695220879131919
armv7neon_mmm_f32_8x4_cortexa7 24 4 12 0.000005659072390762546
armv7neon_mmm_f32_8x4_generic 23 128 4 0.00001202844464043909
armv7neon_mmm_f32_8x6_generic 17 128 7 0.00003102141310178231
armv7neon_mmm_f32_8x4_generic 16 128 11 0.00002360551094138009
armv7neon_mmm_f32_8x4_cortexa7 25 128 9 0.00006388979998675838
armv7neon_mmm_f32_8x4_generic 9 32 9 0.00000858128920884699
armv7neon_mmm_f32_8x6_cortexa7 9 4 12 0.0000034111339123867222
armv7neon_mmm_f32_8x6_cortexa9 17 128 11 0.00003127371093460866
armv7neon_mmm_f32_8x4_cortexa9 15 128 11 0.00002401650595136234
armv7neon_mmm_f32_8x6_cortexa9 23 128 12 0.00003137052216487605
armv7neon_mmm_f32_8x6_cortexa9 16 128 17 0.00003103196066568324
generic_f32_4x4 7 32 3 0.000003268544296254094
armv7neon_mmm_f32_8x4_generic 9 4 8 0.0000027781499181131053
armv7neon_mmm_f32_8x4_cortexa7 16 32 9 0.000010640273784126178
armv7neon_mmm_f32_8x4_cortexa9 25 4 9 0.00000761410675406794
generic_f32_4x4 3 4 12 0.00000255471395132772
generic_f32_4x4 12 32 13 0.00001543431169651167
armv7neon_mmm_f32_8x6_cortexa9 15 4 7 0.0000037646981265010737
armv7neon_mmm_f32_8x4_cortexa7 7 4 9 0.0000027630250787792764
armv7neon_mmm_f32_8x6_cortexa9 15 4 13 0.000005407861539689391
armv7neon_mmm_f32_8x4_cortexa7 24 4 5 0.000004483592435521624
armv7neon_mmm_f32_8x6_cortexa9 16 128 5 0.000010995701297253243
armv7neon_mmm_f32_8x4_cortexa9 7 4 13 0.000003344708907386396
generic_f32_4x4 3 4 4 0.0000012029323677452384
armv7neon_mmm_f32_8x6_cortexa7 8 4 5 0.0000014134340225429588
armv7neon_mmm_f32_8x4_cortexa7 16 4 5 0.000003166142335812665
armv7neon_mmm_f32_8x6_cortexa9 15 4 17 0.000005534126802188086
armv7neon_mmm_f32_8x4_generic 17 128 4 0.000011910311848497855
armv7neon_mmm_f32_8x4_cortexa9 17 128 9 0.00003520115855849476
armv7neon_mmm_f32_8x6_cortexa7 24 128 17 0.00006131644295051564
armv7neon_mmm_f32_8x4_generic 8 32 12 0.000004263261337563466
armv7neon_mmm_f32_8x4_generic 15 128 13 0.000031724034290535855
generic_f32_4x4 13 128 4 0.000014842273639272524
armv7neon_mmm_f32_8x6_generic 16 128 17 0.00003115527449889593
armv7neon_mmm_f32_8x4_cortexa7 25 128 13 0.00008540107215655228
armv7neon_mmm_f32_8x4_cortexa9 17 4 8 0.000003867693626667699
armv7neon_mmm_f32_8x6_generic 9 4 6 0.0000018447023821962963
armv7neon_mmm_f32_8x4_generic 15 4 5 0.0000031342530980756114
armv7neon_mmm_f32_8x4_cortexa9 25 32 11 0.000016711914928453156
armv7neon_mmm_f32_8x4_cortexa9 9 4 7 0.0000029971874356879734
armv7neon_mmm_f32_8x4_cortexa9 17 32 8 0.000008299083390781581
armv7neon_mmm_f32_8x6_generic 9 128 11 0.000020922916297729766
armv7neon_mmm_f32_8x6_cortexa9 25 4 11 0.000006455717226984232
generic_f32_4x4 9 128 13 0.00004380056172414816
armv7neon_mmm_f32_8x4_generic 9 4 3 0.0000018459881520629983
generic_f32_4x4 13 32 13 0.000020718460422326147
generic_f32_4x4 11 128 11 0.00003310679264480642
armv7neon_mmm_f32_8x4_cortexa7 17 128 12 0.00004762075330394028
armv7neon_mmm_f32_8x6_cortexa9 25 4 6 0.0000031406906098450826
armv7neon_mmm_f32_8x4_cortexa7 15 4 3 0.0000020595941028725178
generic_f32_4x4 4 32 5 0.000003094039509493947
armv7neon_mmm_f32_8x4_cortexa7 17 4 13 0.000008167970707514618
armv7neon_mmm_f32_8x6_cortexa9 16 128 7 0.000020852896757305592
armv7neon_mmm_f32_8x4_generic 24 32 12 0.000011875110028324704
armv7neon_mmm_f32_8x6_cortexa7 23 32 6 0.0000070036066823672265
armv7neon_mmm_f32_8x4_generic 24 32 9 0.000012379847822938195
armv7neon_mmm_f32_8x6_generic 16 32 7 0.000007288146983219525
armv7neon_mmm_f32_8x4_generic 8 32 4 0.0000017213389522499293
armv7neon_mmm_f32_8x6_cortexa7 25 32 12 0.00001698633380929768
armv7neon_mmm_f32_8x4_generic 25 32 8 0.000010843062239811664
armv7neon_mmm_f32_8x6_generic 16 128 5 0.000010970239776322642
armv7neon_mmm_f32_8x6_cortexa9 23 32 12 0.00001105222841817834
armv7neon_mmm_f32_8x4_cortexa7 7 4 12 0.0000028259675178908435
armv7neon_mmm_f32_8x4_generic 17 32 13 0.000016456626258678596
armv7neon_mmm_f32_8x6_generic 23 32 19 0.000021489093420466062
armv7neon_mmm_f32_8x6_cortexa9 9 32 7 0.000007388528782309879
armv7neon_mmm_f32_8x6_generic 16 128 13 0.0000308218952023933
armv7neon_mmm_f32_8x4_cortexa9 24 128 4 0.000011799248370378306
armv7neon_mmm_f32_8x6_cortexa7 15 4 5 0.000002311967551514126
armv7neon_mmm_f32_8x6_cortexa9 15 32 19 0.000015002244448339703
armv7neon_mmm_f32_8x4_cortexa7 25 32 4 0.000007113231524121585
armv7neon_mmm_f32_8x4_generic 9 4 9 0.0000040996991473354
generic_f32_4x4 11 4 11 0.00000604219727392619
armv7neon_mmm_f32_8x4_generic 23 128 12 0.0000351784079527385
armv7neon_mmm_f32_8x4_generic 8 32 8 0.0000029996111096440974
generic_f32_4x4 3 128 11 0.000011575521961547323
armv7neon_mmm_f32_8x6_generic 17 4 19 0.000008636925572322338
armv7neon_mmm_f32_8x6_cortexa7 17 32 17 0.000019651252774271738
armv7neon_mmm_f32_8x4_cortexa7 25 32 5 0.000014279031192163009
armv7neon_mmm_f32_8x6_cortexa9 7 128 5 0.000005734808712355398
armv7neon_mmm_f32_8x4_cortexa9 15 4 5 0.0000031170038028798296
armv7neon_mmm_f32_8x4_cortexa9 24 128 13 0.000046424963618177915
armv7neon_mmm_f32_8x6_generic 16 4 5 0.0000022248730615242665
generic_f32_4x4 8 128 9 0.0000220091711298822
armv7neon_mmm_f32_8x6_cortexa9 24 32 17 0.000015790292495872178
armv7neon_mmm_f32_8x6_cortexa7 25 4 12 0.000006064814099731578
armv7neon_mmm_f32_8x4_generic 9 32 3 0.0000033141535571016215
armv7neon_mmm_f32_8x4_cortexa7 24 32 7 0.000010955777874109968
armv7neon_mmm_f32_8x6_generic 7 32 19 0.000008240412026502634
armv7neon_mmm_f32_8x6_generic 15 128 12 0.000021273134898855332
armv7neon_mmm_f32_8x4_generic 9 128 12 0.000023427967692188372
generic_f32_4x4 9 128 4 0.000011233611698379809
armv7neon_mmm_f32_8x4_generic 23 32 3 0.000004898603928972602
armv7neon_mmm_f32_8x4_cortexa9 15 4 7 0.000003196236363013327
generic_f32_4x4 13 128 9 0.000043631376927673965
armv7neon_mmm_f32_8x6_cortexa7 8 32 13 0.000006769099621081802
armv7neon_mmm_f32_8x6_cortexa9 16 32 11 0.000007489162129911177
armv7neon_mmm_f32_8x6_cortexa7 15 4 11 0.000004047240611447739
armv7neon_mmm_f32_8x6_cortexa9 8 128 12 0.000010521407486118232
armv7neon_mmm_f32_8x6_generic 24 128 12 0.000030475350289631447
armv7neon_mmm_f32_8x6_cortexa9 8 4 18 0.0000023963651215663894
armv7neon_mmm_f32_8x4_generic 7 4 13 0.0000033498033347340617
armv7neon_mmm_f32_8x6_cortexa7 24 128 5 0.000021225542006705262
armv7neon_mmm_f32_8x6_generic 8 128 7 0.000010658035873946595
armv7neon_mmm_f32_8x6_cortexa9 16 128 13 0.00003086535920353521
armv7neon_mmm_f32_8x6_cortexa7 9 4 13 0.000005015620872516637
armv7neon_mmm_f32_8x6_cortexa9 8 4 13 0.0000025412882105309308
armv7neon_mmm_f32_8x4_cortexa9 24 32 11 0.000012564889907233448
armv7neon_mmm_f32_8x4_generic 7 128 13 0.000016356824884577277
armv7neon_mmm_f32_8x4_cortexa7 15 32 11 0.00001113088620100476
armv7neon_mmm_f32_8x6_generic 23 32 18 0.000016159850846463288
armv7neon_mmm_f32_8x6_generic 15 32 19 0.000014889331973563625
armv7neon_mmm_f32_8x4_generic 7 128 11 0.000012426167611771446
generic_f32_4x4 4 4 4 0.000001047996290990507
armv7neon_mmm_f32_8x4_generic 23 4 9 0.000006177048756338135
armv7neon_mmm_f32_8x6_cortexa9 16 4 11 0.0000034974057138003183
armv7neon_mmm_f32_8x6_cortexa9 7 128 12 0.00001128966410991496
armv7neon_mmm_f32_8x4_cortexa9 9 4 8 0.0000027723235031605195
armv7neon_mmm_f32_8x4_generic 7 128 12 0.000012422711675746062
armv7neon_mmm_f32_8x4_cortexa7 25 128 12 0.0000632498211882511
armv7neon_mmm_f32_8x4_generic 7 128 5 0.000008380542348646574
armv7neon_mmm_f32_8x4_cortexa9 17 4 3 0.000002593764492859534
generic_f32_4x4 9 4 3 0.0000025396068006223777
generic_f32_4x4 9 4 12 0.0000055571336153173335
armv7neon_mmm_f32_8x4_cortexa7 16 128 8 0.000021301840263398342
armv7neon_mmm_f32_8x4_generic 9 32 12 0.000008360119586696751
armv7neon_mmm_f32_8x6_cortexa9 23 32 13 0.00001642300983789001
armv7neon_mmm_f32_8x4_cortexa7 9 32 13 0.000014115360584179656
armv7neon_mmm_f32_8x4_generic 7 4 4 0.0000011988761531825325
generic_f32_4x4 4 32 12 0.000004173516705669169
armv7neon_mmm_f32_8x6_cortexa7 8 4 17 0.000002748952290655516
armv7neon_mmm_f32_8x6_cortexa9 15 128 17 0.000031856750398890617
armv7neon_mmm_f32_8x4_cortexa9 17 128 4 0.00001190476277599742
armv7neon_mmm_f32_8x6_cortexa7 23 4 17 0.000008057629471932516
armv7neon_mmm_f32_8x4_cortexa7 24 128 9 0.00004787648599530554
armv7neon_mmm_f32_8x4_cortexa7 23 4 5 0.000004689549145067684
armv7neon_mmm_f32_8x6_cortexa9 17 4 18 0.000006445994468840472
armv7neon_mmm_f32_8x6_generic 9 128 17 0.00003107927056434744
armv7neon_mmm_f32_8x4_generic 24 128 8 0.000023288445002113136
armv7neon_mmm_f32_8x6_cortexa9 16 32 17 0.000010725045258389392
armv7neon_mmm_f32_8x4_cortexa7 17 128 11 0.00004814359255643903
armv7neon_mmm_f32_8x4_generic 7 4 3 0.0000011794687664451176
generic_f32_4x4 7 32 5 0.000005808524626140803
armv7neon_mmm_f32_8x6_generic 24 32 11 0.000010897102444398395
armv7neon_mmm_f32_8x4_generic 7 4 7 0.0000019250466424784192
armv7neon_mmm_f32_8x4_cortexa9 7 32 13 0.000006310599463529341
generic_f32_4x4 4 32 8 0.000002964574479014875
armv7neon_mmm_f32_8x6_cortexa7 17 4 12 0.000004769238750897463
armv7neon_mmm_f32_8x6_generic 8 32 17 0.000005580365730578937
armv7neon_mmm_f32_8x4_cortexa9 7 4 5 0.0000018767262951495324
armv7neon_mmm_f32_8x4_cortexa9 9 128 3 0.000008376722642527261
generic_f32_4x4 8 32 8 0.000005427007518436356
generic_f32_4x4 3 32 7 0.000003269319162898471
armv7neon_mmm_f32_8x6_generic 23 128 11 0.000031615917676258355
generic_f32_4x4 11 32 3 0.000004643343538119153
armv7neon_mmm_f32_8x6_generic 17 4 13 0.000006685914546845586
armv7neon_mmm_f32_8x4_cortexa9 7 128 13 0.000016356503104634956
armv7neon_mmm_f32_8x6_generic 8 4 11 0.000001987955413940483
armv7neon_mmm_f32_8x6_cortexa9 23 32 6 0.0000058061790469852035
armv7neon_mmm_f32_8x6_cortexa9 17 128 12 0.00003080777419909108
generic_f32_4x4 3 4 13 0.00000320269810797615
armv7neon_mmm_f32_8x6_cortexa9 17 32 12 0.000010479324211722284
armv7neon_mmm_f32_8x4_cortexa9 7 4 4 0.000001192052740790223
armv7neon_mmm_f32_8x4_cortexa9 24 128 12 0.00003457434886740767
armv7neon_mmm_f32_8x6_cortexa9 23 32 11 0.000011348364973457056
armv7neon_mmm_f32_8x4_generic 16 4 12 0.000003656453112482303
armv7neon_mmm_f32_8x6_cortexa9 16 128 6 0.000010542301986141948
armv7neon_mmm_f32_8x4_generic 24 128 4 0.000011845830939195392
armv7neon_mmm_f32_8x6_cortexa7 9 32 18 0.000013032529505137248
armv7neon_mmm_f32_8x4_generic 24 128 12 0.00003455867526442596
armv7neon_mmm_f32_8x4_generic 23 128 7 0.00002405539721722316
armv7neon_mmm_f32_8x4_cortexa9 23 32 4 0.000004484967279906347
armv7neon_mmm_f32_8x4_cortexa9 17 32 4 0.000004369095064954239
armv7neon_mmm_f32_8x6_cortexa7 8 4 12 0.000001838051428755667
armv7neon_mmm_f32_8x4_generic 15 4 8 0.000003030902158049641
armv7neon_mmm_f32_8x4_cortexa7 8 128 9 0.000016361621231447548
armv7neon_mmm_f32_8x6_generic 15 4 5 0.000002193007903880869
armv7neon_mmm_f32_8x6_cortexa9 24 32 18 0.000015047680763476656
armv7neon_mmm_f32_8x6_generic 25 128 6 0.00002065407414270236
armv7neon_mmm_f32_8x4_generic 25 32 9 0.00001655596981137993
armv7neon_mmm_f32_8x4_cortexa7 7 4 8 0.000002046321793305707
armv7neon_mmm_f32_8x4_generic 24 32 5 0.000008563586932451436
armv7neon_mmm_f32_8x6_cortexa7 25 32 19 0.00003389329143278319
generic_f32_4x4 3 4 3 0.0000011908985597580253
generic_f32_4x4 11 128 13 0.000043852326643092945
armv7neon_mmm_f32_8x6_cortexa9 7 32 6 0.0000025216564013293366
generic_f32_4x4 8 128 11 0.00002204318798460829
generic_f32_4x4 9 32 5 0.00000828023901975494
armv7neon_mmm_f32_8x6_generic 25 4 11 0.000006352168944400001
armv7neon_mmm_f32_8x4_cortexa9 24 32 5 0.000008590022043400085
armv7neon_mmm_f32_8x4_generic 8 32 3 0.0000019449019781287236
armv7neon_mmm_f32_8x4_cortexa7 24 128 8 0.0000317811111212887
armv7neon_mmm_f32_8x4_cortexa9 7 128 9 0.000012372639572013476
armv7neon_mmm_f32_8x6_generic 15 128 19 0.00004250719000509339
armv7neon_mmm_f32_8x6_cortexa7 17 4 7 0.000005084587454728339
armv7neon_mmm_f32_8x6_generic 15 32 5 0.000004169790668084077
generic_f32_4x4 9 128 3 0.000011555287825087273
armv7neon_mmm_f32_8x4_cortexa7 25 128 7 0.00004317760438423037
generic_f32_4x4 8 32 12 0.00000779706808746927
armv7neon_mmm_f32_8x6_generic 8 32 12 0.000003719914023610946
armv7neon_mmm_f32_8x6_cortexa7 15 128 17 0.000041839968426337905
armv7neon_mmm_f32_8x6_cortexa7 8 128 13 0.000020713409071600263
armv7neon_mmm_f32_8x6_cortexa9 24 4 5 0.000003104317520805454
armv7neon_mmm_f32_8x6_cortexa9 9 4 13 0.000004778161797091213
armv7neon_mmm_f32_8x4_cortexa9 25 128 12 0.00004644468928643539
generic_f32_4x4 11 4 9 0.000005973042856611991
armv7neon_mmm_f32_8x4_cortexa7 23 128 8 0.00003217939318511314
armv7neon_mmm_f32_8x6_generic 24 4 11 0.00000490822983265492
armv7neon_mmm_f32_8x4_generic 17 4 12 0.000005542606566473033
armv7neon_mmm_f32_8x6_cortexa9 16 32 5 0.000004236072973244724
armv7neon_mmm_f32_8x6_cortexa7 24 32 12 0.000012668891408140166
armv7neon_mmm_f32_8x6_cortexa7 17 128 19 0.00008180995267293593
armv7neon_mmm_f32_8x4_cortexa7 8 4 9 0.0000023929041278364927
armv7neon_mmm_f32_8x4_generic 17 4 11 0.000005996779011118073
armv7neon_mmm_f32_8x4_generic 23 32 5 0.000008783310902330428
armv7neon_mmm_f32_8x4_cortexa9 24 4 5 0.000004126054215412369
armv7neon_mmm_f32_8x4_cortexa7 15 4 4 0.0000018664924896343623
generic_f32_4x4 12 32 12 0.000011419939741453591
armv7neon_mmm_f32_8x6_cortexa9 9 128 19 0.00004140314991254094
armv7neon_mmm_f32_8x6_cortexa7 24 4 19 0.000008976504261442944
armv7neon_mmm_f32_8x6_cortexa7 23 32 12 0.000013517680029411935
armv7neon_mmm_f32_8x6_generic 23 128 17 0.00004767212397281204
armv7neon_mmm_f32_8x6_cortexa7 9 128 12 0.000027472854173113385
generic_f32_4x4 13 4 4 0.0000027493566687604084
armv7neon_mmm_f32_8x6_cortexa7 7 32 7 0.000005097398974457722
armv7neon_mmm_f32_8x4_generic 25 4 3 0.0000033394241283747185
armv7neon_mmm_f32_8x4_cortexa7 25 4 5 0.000005839267769079541
generic_f32_4x4 8 4 9 0.0000039467461932377605
armv7neon_mmm_f32_8x6_cortexa7 8 32 6 0.0000025355396885136015
armv7neon_mmm_f32_8x4_cortexa7 7 4 13 0.0000035454436358925364
generic_f32_4x4 12 128 5 0.000022119589677203316
armv7neon_mmm_f32_8x4_cortexa9 17 32 5 0.000008619398425928128
generic_f32_4x4 5 32 12 0.00000809048145029521
armv7neon_mmm_f32_8x6_generic 7 32 12 0.000004497135112528872
armv7neon_mmm_f32_8x6_cortexa9 17 128 18 0.00004612047264624732
armv7neon_mmm_f32_8x4_cortexa9 24 128 9 0.00003503924267370847
armv7neon_mmm_f32_8x6_cortexa9 25 128 18 0.00006126219444229572
armv7neon_mmm_f32_8x4_cortexa9 7 32 7 0.0000034009841492082025
armv7neon_mmm_f32_8x6_cortexa7 8 32 18 0.0000065795660386672954
armv7neon_mmm_f32_8x4_cortexa7 9 4 7 0.0000032333021816508883
generic_f32_4x4 11 4 5 0.000004260253310204167
armv7neon_mmm_f32_8x4_cortexa7 15 32 12 0.00001091466040761069
armv7neon_mmm_f32_8x6_generic 25 128 17 0.00006242813577501301
armv7neon_mmm_f32_8x6_cortexa7 23 128 17 0.00006230757214971725
armv7neon_mmm_f32_8x4_cortexa7 9 4 3 0.000001972675027149826
armv7neon_mmm_f32_8x6_generic 9 32 19 0.000013969471313273173
armv7neon_mmm_f32_8x4_cortexa7 9 4 4 0.0000017499654922061095
generic_f32_4x4 8 4 8 0.0000026159902565290668
armv7neon_mmm_f32_8x6_generic 23 128 12 0.000031313730389866355
armv7neon_mmm_f32_8x4_cortexa7 15 128 7 0.000021942752305858642
armv7neon_mmm_f32_8x6_cortexa7 9 32 6 0.000004675388926372666
armv7neon_mmm_f32_8x6_cortexa7 8 32 5 0.000002772093015202498
armv7neon_mmm_f32_8x6_cortexa7 9 32 19 0.000017344416985284313
armv7neon_mmm_f32_8x4_cortexa9 8 128 7 0.000008229649908268804
generic_f32_4x4 3 128 12 0.000011565133509989783
generic_f32_4x4 9 32 12 0.000011785850787897203
armv7neon_mmm_f32_8x6_cortexa9 17 128 13 0.00004651964090971228
armv7neon_mmm_f32_8x6_cortexa9 25 32 6 0.0000071127343061349165
armv7neon_mmm_f32_8x6_cortexa7 25 128 13 0.00008178262598187339
armv7neon_mmm_f32_8x4_cortexa9 25 32 13 0.000021697932623977668
armv7neon_mmm_f32_8x4_cortexa9 16 4 11 0.0000040619610195960516
armv7neon_mmm_f32_8x4_generic 23 128 3 0.000012468771697124281
generic_f32_4x4 11 32 13 0.000015931672245334774
generic_f32_4x4 8 32 9 0.000008097203894273956
armv7neon_mmm_f32_8x4_cortexa9 17 4 5 0.000004171816997731455
armv7neon_mmm_f32_8x6_generic 15 32 17 0.000011434650801767795
armv7neon_mmm_f32_8x4_cortexa7 25 128 3 0.000022089826731240803
armv7neon_mmm_f32_8x4_cortexa9 17 32 9 0.000012562108538363274
generic_f32_4x4 7 128 4 0.000007740927731735862
armv7neon_mmm_f32_8x6_cortexa7 16 128 17 0.00004105266320070572
armv7neon_mmm_f32_8x6_generic 17 128 5 0.000016103121774230763
armv7neon_mmm_f32_8x6_cortexa9 25 128 19 0.0000820798385767341
armv7neon_mmm_f32_8x4_generic 15 32 12 0.000008721032730753763
armv7neon_mmm_f32_8x4_generic 25 4 9 0.000007623970700084407
armv7neon_mmm_f32_8x4_generic 17 32 11 0.000012672883096785458
armv7neon_mmm_f32_8x6_generic 16 32 19 0.000013691087754459915
armv7neon_mmm_f32_8x6_generic 9 4 12 0.0000031605677124646883
generic_f32_4x4 5 4 5 0.0000029759889706969683
armv7neon_mmm_f32_8x4_generic 16 32 9 0.00000841650213766426
generic_f32_4x4 8 32 13 0.000010487684258287977
generic_f32_4x4 4 128 5 0.000007733485289678424
armv7neon_mmm_f32_8x6_cortexa7 8 4 19 0.0000033370479940342483
armv7neon_mmm_f32_8x6_cortexa7 16 4 13 0.000004820409587660123
armv7neon_mmm_f32_8x6_cortexa7 7 128 5 0.000007382676538540774
armv7neon_mmm_f32_8x6_cortexa9 9 4 7 0.0000034194906166596
armv7neon_mmm_f32_8x4_generic 24 128 3 0.000012489994751068132
armv7neon_mmm_f32_8x6_generic 17 4 5 0.0000029369592565703416
armv7neon_mmm_f32_8x6_cortexa9 7 32 7 0.000004299364469367014
generic_f32_4x4 8 128 8 0.000014675074170610452
armv7neon_mmm_f32_8x6_generic 24 32 6 0.000005321602550624656
generic_f32_4x4 8 128 3 0.000007920826245860217
armv7neon_mmm_f32_8x6_cortexa7 9 128 17 0.000041149683356221505
armv7neon_mmm_f32_8x6_generic 15 128 18 0.0000317697933254728
armv7neon_mmm_f32_8x4_generic 15 32 11 0.000008972113808644295
armv7neon_mmm_f32_8x6_generic 17 128 11 0.00003128672479118261
armv7neon_mmm_f32_8x6_generic 16 32 5 0.000004210171223198828
armv7neon_mmm_f32_8x4_cortexa9 23 32 7 0.000008919434958029703
armv7neon_mmm_f32_8x6_cortexa7 23 4 11 0.000005630918362501218
generic_f32_4x4 13 4 13 0.000009734041681688549
armv7neon_mmm_f32_8x4_cortexa7 23 4 8 0.000004419109488183114
armv7neon_mmm_f32_8x6_generic 17 128 18 0.00004641763657018708
armv7neon_mmm_f32_8x6_cortexa7 23 32 7 0.000013604910503058097
armv7neon_mmm_f32_8x4_cortexa9 16 4 8 0.0000025811126658507737
armv7neon_mmm_f32_8x6_generic 16 4 17 0.000004664791456959476
armv7neon_mmm_f32_8x6_cortexa9 15 128 19 0.00004234538403110192
armv7neon_mmm_f32_8x4_cortexa7 7 128 8 0.000011300292613931975
generic_f32_4x4 4 32 7 0.000003116445195774716
armv7neon_mmm_f32_8x4_generic 17 128 3 0.000012383648487655847
armv7neon_mmm_f32_8x6_cortexa9 24 32 12 0.00001022987009394415
armv7neon_mmm_f32_8x4_cortexa9 17 32 11 0.000012684638765009217
armv7neon_mmm_f32_8x6_cortexa9 9 128 18 0.000030893903863542745
armv7neon_mmm_f32_8x6_cortexa7 9 4 18 0.000004852117278755586
armv7neon_mmm_f32_8x6_cortexa9 8 128 13 0.00001571685239499597
armv7neon_mmm_f32_8x6_cortexa9 23 4 7 0.000005166723329090456
armv7neon_mmm_f32_8x6_cortexa9 7 128 18 0.000016699359662800567
armv7neon_mmm_f32_8x4_generic 17 32 4 0.0000043747418052874395
armv7neon_mmm_f32_8x4_cortexa9 25 128 13 0.00006245169757364539
armv7neon_mmm_f32_8x4_generic 24 128 11 0.00003520805509267024
armv7neon_mmm_f32_8x4_generic 7 4 8 0.000001940897025671589
armv7neon_mmm_f32_8x4_cortexa9 24 32 8 0.000008105445830543554
generic_f32_4x4 3 32 5 0.000003239638778592903
armv7neon_mmm_f32_8x6_generic 16 128 6 0.000010481759011880018
armv7neon_mmm_f32_8x6_cortexa9 23 4 11 0.000005380241199288215
armv7neon_mmm_f32_8x6_cortexa7 24 4 11 0.000005233250260928121
armv7neon_mmm_f32_8x6_cortexa9 23 4 19 0.000009671598448323651
armv7neon_mmm_f32_8x6_cortexa7 23 4 6 0.0000029172933842802905
armv7neon_mmm_f32_8x4_cortexa9 23 32 8 0.000008528926728389173
armv7neon_mmm_f32_8x4_cortexa9 24 128 8 0.000023207480788900347
generic_f32_4x4 7 32 11 0.00000844108827118094
generic_f32_4x4 12 4 5 0.000004095794522926841
armv7neon_mmm_f32_8x4_generic 25 4 11 0.000007781652426587433
armv7neon_mmm_f32_8x4_generic 16 32 12 0.000008090083939378293
armv7neon_mmm_f32_8x4_generic 9 128 7 0.000016030853255705617
armv7neon_mmm_f32_8x6_generic 15 4 19 0.000006928708092302355
armv7neon_mmm_f32_8x6_cortexa7 24 128 12 0.00004054881877870741
generic_f32_4x4 8 4 4 0.0000015677362682744682
armv7neon_mmm_f32_8x6_generic 7 128 19 0.000021822554568277356
armv7neon_mmm_f32_8x6_cortexa9 24 4 12 0.00000423613681186459
armv7neon_mmm_f32_8x4_cortexa7 16 4 4 0.0000016584422934049926
armv7neon_mmm_f32_8x6_cortexa7 15 4 12 0.0000039842977137122266
armv7neon_mmm_f32_8x4_cortexa7 7 128 4 0.000005876303683712701
armv7neon_mmm_f32_8x6_cortexa9 7 4 5 0.000001360389294622995
armv7neon_mmm_f32_8x4_cortexa7 24 4 9 0.000006182082152017003
armv7neon_mmm_f32_8x6_generic 7 128 17 0.000016508734849042377
armv7neon_mmm_f32_8x4_generic 17 128 9 0.000035220470083517575
generic_f32_4x4 11 32 4 0.000004333160180811584
armv7neon_mmm_f32_8x6_cortexa7 7 32 12 0.000005349730883357719
armv7neon_mmm_f32_8x4_generic 8 32 5 0.0000031582408088385206
armv7neon_mmm_f32_8x6_cortexa9 8 32 6 0.000002149345359409538
generic_f32_4x4 5 32 13 0.000010767359583748404
armv7neon_mmm_f32_8x4_generic 17 32 5 0.000008612220700720838
generic_f32_4x4 5 4 4 0.0000016841544626715604
armv7neon_mmm_f32_8x4_cortexa9 9 32 8 0.000005740276944412148
armv7neon_mmm_f32_8x6_cortexa9 23 128 7 0.00003146196425036688
armv7neon_mmm_f32_8x4_cortexa9 25 128 9 0.00004693016970364154
armv7neon_mmm_f32_8x4_generic 24 4 8 0.0000036269745603468488
armv7neon_mmm_f32_8x6_cortexa9 24 32 5 0.000006081990180966961
armv7neon_mmm_f32_8x6_generic 7 4 6 0.000001511590523783866
generic_f32_4x4 13 128 7 0.00002944642529063838
armv7neon_mmm_f32_8x4_cortexa9 8 4 11 0.000002254110071814015
armv7neon_mmm_f32_8x6_cortexa9 24 128 19 0.00006153515122537746
armv7neon_mmm_f32_8x4_cortexa9 16 128 13 0.00003108122366900544
generic_f32_4x4 11 128 9 0.000033046521573243154
armv7neon_mmm_f32_8x4_generic 7 4 9 0.0000026137777410885136
armv7neon_mmm_f32_8x6_cortexa7 7 4 13 0.0000034217472919863715
armv7neon_mmm_f32_8x6_generic 16 4 11 0.000003453597254301285
armv7neon_mmm_f32_8x4_cortexa7 17 32 11 0.000015933978694713537
armv7neon_mmm_f32_8x6_cortexa7 23 4 12 0.0000053181832633687915
armv7neon_mmm_f32_8x6_generic 17 32 6 0.000005447406276972202
armv7neon_mmm_f32_8x4_cortexa7 7 32 4 0.000002302756052490624
armv7neon_mmm_f32_8x4_cortexa9 15 4 13 0.000005662932902369066
armv7neon_mmm_f32_8x4_cortexa7 17 32 8 0.000010470053145450968
armv7neon_mmm_f32_8x4_generic 17 4 8 0.000003846416592921501
armv7neon_mmm_f32_8x4_cortexa7 25 32 7 0.000014416885420743293
armv7neon_mmm_f32_8x6_cortexa9 7 4 7 0.000002316979424271203
armv7neon_mmm_f32_8x6_cortexa9 16 4 17 0.000004718807435203576
generic_f32_4x4 11 32 12 0.000011882704283190954
armv7neon_mmm_f32_8x4_cortexa7 23 32 9 0.000016122379106643026
armv7neon_mmm_f32_8x6_cortexa7 9 32 11 0.000009102642142770847
armv7neon_mmm_f32_8x6_generic 25 4 7 0.000006116994655414601
armv7neon_mmm_f32_8x4_generic 23 4 3 0.000002685832196401119
armv7neon_mmm_f32_8x6_cortexa9 25 32 11 0.000014409038834226244
armv7neon_mmm_f32_8x6_generic 23 32 12 0.000010966793413222081
armv7neon_mmm_f32_8x4_cortexa7 15 4 8 0.0000032350546103453447
armv7neon_mmm_f32_8x6_cortexa7 15 128 7 0.000027943938585492828
generic_f32_4x4 7 4 4 0.0000017389973856077342
armv7neon_mmm_f32_8x6_generic 8 4 12 0.0000017181145762324274
armv7neon_mmm_f32_8x4_cortexa9 15 4 11 0.000004502163536054048
armv7neon_mmm_f32_8x6_generic 15 128 6 0.000010890629782885533
armv7neon_mmm_f32_8x4_cortexa7 23 128 12 0.00004796960186341638
armv7neon_mmm_f32_8x6_generic 8 32 7 0.000003896823245256928
armv7neon_mmm_f32_8x6_cortexa9 15 128 7 0.000021306419489905895
armv7neon_mmm_f32_8x6_cortexa7 7 4 11 0.0000024493622993257104
armv7neon_mmm_f32_8x6_cortexa7 7 32 19 0.000009918011828071643
armv7neon_mmm_f32_8x4_cortexa9 24 4 7 0.000004277922454765341
armv7neon_mmm_f32_8x4_cortexa7 9 4 5 0.0000031825583861368363
armv7neon_mmm_f32_8x6_cortexa9 8 32 11 0.000004007622996506884
armv7neon_mmm_f32_8x6_generic 15 4 11 0.0000038091887644203395
armv7neon_mmm_f32_8x6_cortexa7 23 128 11 0.000041674027523638683
armv7neon_mmm_f32_8x6_cortexa7 17 4 18 0.0000068571632794369916
armv7neon_mmm_f32_8x6_cortexa9 9 128 12 0.000020793132831426426
armv7neon_mmm_f32_8x6_generic 25 32 19 0.000027146627303420713
armv7neon_mmm_f32_8x6_cortexa7 9 128 19 0.00005467645413703322
armv7neon_mmm_f32_8x6_cortexa7 8 32 11 0.0000048195055146364755
armv7neon_mmm_f32_8x4_cortexa7 8 128 5 0.000011083761554457595
generic_f32_4x4 7 128 12 0.000022151037088381854
generic_f32_4x4 9 128 11 0.00003295464355640065
armv7neon_mmm_f32_8x6_cortexa9 25 4 17 0.000009038405830025483
generic_f32_4x4 8 32 3 0.0000032862084964825317
armv7neon_mmm_f32_8x4_cortexa9 23 128 12 0.00003536277613858418
armv7neon_mmm_f32_8x6_cortexa9 16 128 11 0.000021144715462112054
armv7neon_mmm_f32_8x6_generic 17 32 5 0.000005941189535355633
armv7neon_mmm_f32_8x6_generic 15 32 13 0.000011297818131567483
armv7neon_mmm_f32_8x6_generic 16 32 11 0.000007448947403712638
generic_f32_4x4 5 4 7 0.0000029999149449467176
armv7neon_mmm_f32_8x4_generic 8 32 13 0.000005690794573889263
armv7neon_mmm_f32_8x4_cortexa9 23 128 4 0.000012046555463702128
armv7neon_mmm_f32_8x4_cortexa7 25 128 5 0.00004305932269978789
armv7neon_mmm_f32_8x6_cortexa7 24 32 7 0.000013181202441221999
armv7neon_mmm_f32_8x4_cortexa9 23 4 3 0.0000026820954481134975
generic_f32_4x4 12 4 8 0.0000036441773649361156
armv7neon_mmm_f32_8x6_cortexa9 24 128 17 0.000046509213530800746
armv7neon_mmm_f32_8x6_cortexa9 8 4 6 0.0000011575433314376175
generic_f32_4x4 3 32 4 0.0000018882099872634265
armv7neon_mmm_f32_8x4_cortexa7 9 32 3 0.000004070589988484743
armv7neon_mmm_f32_8x4_cortexa7 17 32 9 0.000015851096904703647
generic_f32_4x4 13 32 12 0.00001535208709935954
armv7neon_mmm_f32_8x6_cortexa9 25 4 19 0.000011387388616753858
armv7neon_mmm_f32_8x6_cortexa9 23 128 5 0.00001624764366012842
armv7neon_mmm_f32_8x4_cortexa9 9 128 12 0.00002346244933254337
armv7neon_mmm_f32_8x4_cortexa9 25 4 12 0.000007150024568853224
armv7neon_mmm_f32_8x6_cortexa9 15 32 12 0.000007825900269054534
armv7neon_mmm_f32_8x4_cortexa7 25 4 12 0.000007707673254391108
generic_f32_4x4 11 32 5 0.000008363778534607299
armv7neon_mmm_f32_8x4_cortexa7 17 128 8 0.00003194246552397465
armv7neon_mmm_f32_8x6_generic 8 32 6 0.0000021066884422501677
armv7neon_mmm_f32_8x6_cortexa7 16 4 11 0.0000036716657035155366
armv7neon_mmm_f32_8x4_generic 17 128 7 0.00002384609528431003
armv7neon_mmm_f32_8x4_cortexa7 9 128 7 0.000021759641210777887
armv7neon_mmm_f32_8x6_cortexa7 17 32 11 0.000013412538846571892
generic_f32_4x4 7 32 8 0.000005690920861795519
armv7neon_mmm_f32_8x6_generic 25 128 19 0.00008258096370056387
armv7neon_mmm_f32_8x6_cortexa7 15 4 7 0.000003922803920269034
armv7neon_mmm_f32_8x4_cortexa9 16 4 12 0.0000036509568763084446
generic_f32_4x4 11 128 4 0.000011287445429820831
armv7neon_mmm_f32_8x6_cortexa7 15 128 5 0.000014299033646853434
armv7neon_mmm_f32_8x6_cortexa9 24 128 12 0.00003053734816197179
armv7neon_mmm_f32_8x6_cortexa9 17 32 6 0.000005510675534687666
armv7neon_mmm_f32_8x6_cortexa9 9 32 19 0.000014080838768872731
generic_f32_4x4 13 128 5 0.000029375850896877852
armv7neon_mmm_f32_8x6_cortexa7 25 32 5 0.000009454087084930695
armv7neon_mmm_f32_8x4_generic 8 4 8 0.0000015055905623190838
armv7neon_mmm_f32_8x4_cortexa7 16 4 13 0.00000545158088053187
armv7neon_mmm_f32_8x6_generic 8 128 6 0.0000054873008191504395
generic_f32_4x4 5 128 7 0.000015018624971335435
armv7neon_mmm_f32_8x6_generic 15 128 7 0.00002123110926361701
armv7neon_mmm_f32_8x4_cortexa7 7 32 3 0.000002283721924125051
armv7neon_mmm_f32_8x6_generic 9 128 19 0.00004153518147749415
armv7neon_mmm_f32_8x4_generic 9 32 11 0.000008626343403146056
armv7neon_mmm_f32_8x6_cortexa9 9 4 6 0.0000018884740207207647
armv7neon_mmm_f32_8x6_cortexa7 9 128 6 0.000013948466701679792
generic_f32_4x4 11 4 12 0.000005708922503580876
armv7neon_mmm_f32_8x6_cortexa7 24 128 6 0.000020527418679269756
armv7neon_mmm_f32_8x4_cortexa7 8 4 11 0.0000024406051533290663
armv7neon_mmm_f32_8x4_cortexa9 23 128 9 0.000035493958882244616
armv7neon_mmm_f32_8x6_cortexa9 17 4 11 0.00000498803165144454
armv7neon_mmm_f32_8x6_generic 8 32 18 0.000005320856703844532
armv7neon_mmm_f32_8x4_cortexa9 9 128 4 0.00000810350837903274
generic_f32_4x4 9 4 8 0.000003892676050216947
armv7neon_mmm_f32_8x6_cortexa9 24 128 5 0.000016255452753216072
armv7neon_mmm_f32_8x6_generic 7 4 5 0.0000013389729556607902
armv7neon_mmm_f32_8x6_generic 17 32 19 0.000020571678410473347
armv7neon_mmm_f32_8x6_cortexa9 7 32 13 0.0000063039500398145465
armv7neon_mmm_f32_8x4_cortexa9 17 128 7 0.000023826784148315835
armv7neon_mmm_f32_8x4_generic 24 4 7 0.000004274685140650307
armv7neon_mmm_f32_8x6_cortexa9 17 32 17 0.000015955807283560996
armv7neon_mmm_f32_8x6_cortexa7 17 4 13 0.000007179865477053954
armv7neon_mmm_f32_8x4_cortexa9 7 32 12 0.00000488792089076881
generic_f32_4x4 4 128 11 0.000011283150201350866
armv7neon_mmm_f32_8x6_cortexa7 7 128 17 0.00002152414507851334
armv7neon_mmm_f32_8x4_generic 25 32 13 0.0000216921434601138
armv7neon_mmm_f32_8x6_generic 15 32 12 0.000007763276203034865
generic_f32_4x4 7 128 9 0.00002231642099570999
armv7neon_mmm_f32_8x4_generic 23 4 7 0.0000044972515990527706
armv7neon_mmm_f32_8x4_generic 16 128 9 0.00002352868039758383
armv7neon_mmm_f32_8x4_generic 24 32 8 0.000008105856924464553
armv7neon_mmm_f32_8x4_cortexa7 25 4 9 0.000008239024137829502
armv7neon_mmm_f32_8x4_generic 8 4 7 0.000001726186960943308
armv7neon_mmm_f32_8x6_cortexa7 24 4 12 0.000004476686880431161
armv7neon_mmm_f32_8x4_generic 8 4 5 0.0000016761254405249265
armv7neon_mmm_f32_8x6_cortexa9 17 32 18 0.000015401920673930987
armv7neon_mmm_f32_8x6_cortexa7 8 4 18 0.000002497401986754502
armv7neon_mmm_f32_8x6_cortexa7 23 32 18 0.000019976606065621178
armv7neon_mmm_f32_8x4_cortexa9 16 128 5 0.000015920451629989893
armv7neon_mmm_f32_8x6_cortexa7 16 128 19 0.00005438554195188942
armv7neon_mmm_f32_8x4_generic 25 32 11 0.00001669591843012648
armv7neon_mmm_f32_8x6_cortexa9 7 32 18 0.000006518795701369289
generic_f32_4x4 3 128 7 0.000007891377185866899
armv7neon_mmm_f32_8x6_cortexa9 9 128 11 0.000021029476636249923
armv7neon_mmm_f32_8x4_generic 9 32 8 0.00000573930624839856
armv7neon_mmm_f32_8x4_cortexa9 23 32 9 0.000012851357918328979
armv7neon_mmm_f32_8x4_cortexa7 25 128 11 0.00006422947640130408
armv7neon_mmm_f32_8x4_cortexa7 9 128 5 0.000021690553836350107
armv7neon_mmm_f32_8x4_cortexa7 8 128 4 0.000005680428664405247
armv7neon_mmm_f32_8x6_cortexa9 17 32 11 0.000010949048608533127
armv7neon_mmm_f32_8x4_cortexa9 9 32 13 0.00001119516860361631
armv7neon_mmm_f32_8x4_cortexa9 8 128 11 0.000012017424644185006
armv7neon_mmm_f32_8x4_cortexa9 17 4 9 0.000005874110633384251
armv7neon_mmm_f32_8x6_cortexa7 9 4 7 0.0000035812085152963736
armv7neon_mmm_f32_8x4_cortexa7 8 4 12 0.0000022112784369711703
armv7neon_mmm_f32_8x6_generic 23 32 13 0.000016306177258992233
armv7neon_mmm_f32_8x4_cortexa7 9 32 11 0.000010819099493176479
armv7neon_mmm_f32_8x4_generic 25 128 12 0.00004665173717365099
armv7neon_mmm_f32_8x4_cortexa9 25 4 8 0.000004903249015406008
armv7neon_mmm_f32_8x4_generic 15 128 8 0.000016011765752869696
generic_f32_4x4 4 4 5 0.00000173237663586998
generic_f32_4x4 7 32 13 0.000010951743398686358
armv7neon_mmm_f32_8x6_cortexa9 7 32 11 0.000004373180249911381
armv7neon_mmm_f32_8x6_generic 9 4 7 0.000003340351034825373
armv7neon_mmm_f32_8x6_cortexa7 8 128 6 0.000007164475272224828
generic_f32_4x4 8 4 11 0.000003982256860993786
armv7neon_mmm_f32_8x6_generic 15 128 5 0.000010930509860719574
armv7neon_mmm_f32_8x4_cortexa9 25 128 11 0.00004712675673590545
armv7neon_mmm_f32_8x4_cortexa7 7 4 11 0.000002822400310815202
armv7neon_mmm_f32_8x6_cortexa7 17 32 13 0.000019470812344800868
armv7neon_mmm_f32_8x4_cortexa7 23 32 4 0.000005619794374416781
generic_f32_4x4 11 32 8 0.000008140965167056766
armv7neon_mmm_f32_8x4_cortexa7 17 4 5 0.000004516405012537028
armv7neon_mmm_f32_8x4_cortexa7 17 128 4 0.00001625158956213561
armv7neon_mmm_f32_8x6_generic 16 128 18 0.000030484389052945016
armv7neon_mmm_f32_8x6_cortexa7 17 128 11 0.00004127386103261296
armv7neon_mmm_f32_8x6_cortexa7 24 32 6 0.000006586776362035912
armv7neon_mmm_f32_8x4_generic 9 32 5 0.000005900796369564617
armv7neon_mmm_f32_8x6_cortexa7 7 128 11 0.000014441097326114378
armv7neon_mmm_f32_8x6_cortexa9 24 4 19 0.000008421665590898334
armv7neon_mmm_f32_8x4_cortexa7 15 32 9 0.000011049029459002566
armv7neon_mmm_f32_8x6_cortexa9 17 32 7 0.000010792251610714281
armv7neon_mmm_f32_8x4_generic 24 32 11 0.000012578955321334167
armv7neon_mmm_f32_8x6_generic 8 4 17 0.0000025848174603713075
armv7neon_mmm_f32_8x4_cortexa9 8 128 12 0.000011812308047768464
generic_f32_4x4 12 128 8 0.00002174331661540877
armv7neon_mmm_f32_8x4_cortexa7 16 128 12 0.0000317241936699183
armv7neon_mmm_f32_8x6_cortexa9 9 4 5 0.000002126610313888205
armv7neon_mmm_f32_8x6_cortexa9 8 32 12 0.0000037644801015301184
armv7neon_mmm_f32_8x4_cortexa9 9 32 4 0.000003081152195958602
generic_f32_4x4 7 32 9 0.000008408374041494756
generic_f32_4x4 4 128 4 0.0000040497405415986515
armv7neon_mmm_f32_8x4_cortexa9 15 128 7 0.000016202393035594662
armv7neon_mmm_f32_8x6_cortexa9 25 128 6 0.000020788151245438416
armv7neon_mmm_f32_8x4_cortexa7 8 128 13 0.000021488179929071295
armv7neon_mmm_f32_8x4_cortexa7 25 32 9 0.00002091079321932589
armv7neon_mmm_f32_8x4_cortexa9 8 128 5 0.000008180420598290364
armv7neon_mmm_f32_8x4_generic 7 128 3 0.000004423621357206741
armv7neon_mmm_f32_8x4_cortexa9 15 4 9 0.000004394233374561228
armv7neon_mmm_f32_8x6_generic 7 4 12 0.0000025127271377138368
armv7neon_mmm_f32_8x4_generic 23 128 8 0.00002364193719899604
armv7neon_mmm_f32_8x4_cortexa7 7 32 11 0.000005969501269507202
armv7neon_mmm_f32_8x6_cortexa9 16 4 7 0.0000033362358276857998
armv7neon_mmm_f32_8x4_generic 25 4 4 0.000002679652210124942
armv7neon_mmm_f32_8x6_cortexa7 24 128 19 0.00008148701915820504
armv7neon_mmm_f32_8x4_cortexa7 25 32 12 0.000020301609592643522
armv7neon_mmm_f32_8x6_generic 25 4 12 0.000005597867792560783
armv7neon_mmm_f32_8x4_cortexa9 16 4 13 0.0000050370172264265826
generic_f32_4x4 11 4 8 0.000003987191713644613
armv7neon_mmm_f32_8x4_cortexa7 7 32 9 0.00000592156011559423
armv7neon_mmm_f32_8x6_cortexa7 25 4 13 0.000009317851927845776
armv7neon_mmm_f32_8x6_generic 7 32 6 0.0000025009886657895365
armv7neon_mmm_f32_8x4_cortexa9 9 32 5 0.0000059031902576708605
armv7neon_mmm_f32_8x4_cortexa7 25 4 7 0.000005997625417148922
armv7neon_mmm_f32_8x4_generic 15 128 9 0.000023965770004599912
armv7neon_mmm_f32_8x4_generic 9 128 3 0.000008336141118552254
armv7neon_mmm_f32_8x6_cortexa9 15 4 5 0.0000022485015750854945
armv7neon_mmm_f32_8x4_cortexa7 25 32 13 0.000027508557090202434
armv7neon_mmm_f32_8x4_cortexa9 7 32 4 0.0000019276370605991633
armv7neon_mmm_f32_8x6_cortexa7 7 32 13 0.000007512654299558133
generic_f32_4x4 12 128 12 0.000032295866967880904
armv7neon_mmm_f32_8x6_generic 8 128 13 0.00001569076394434193
armv7neon_mmm_f32_8x4_cortexa9 17 128 8 0.000023412675748873913
armv7neon_mmm_f32_8x4_cortexa9 16 4 3 0.000001964056838467611
armv7neon_mmm_f32_8x4_generic 16 4 4 0.0000015347326629006993
armv7neon_mmm_f32_8x4_cortexa9 24 4 4 0.0000020572993016505205
armv7neon_mmm_f32_8x4_cortexa9 8 32 3 0.0000019507567628415824
armv7neon_mmm_f32_8x4_cortexa7 8 32 7 0.000004003993118643105
armv7neon_mmm_f32_8x4_generic 17 4 13 0.000007623324255157955
armv7neon_mmm_f32_8x6_cortexa9 9 4 11 0.0000035090633204775533
armv7neon_mmm_f32_8x6_generic 9 4 19 0.00000601244922696809
armv7neon_mmm_f32_8x6_generic 9 128 7 0.000020875839952630357
generic_f32_4x4 4 32 11 0.000004326527977488747
armv7neon_mmm_f32_8x6_cortexa9 25 32 18 0.00002022815888923194
armv7neon_mmm_f32_8x6_cortexa9 15 4 18 0.000005434062942764287
armv7neon_mmm_f32_8x4_cortexa9 24 4 3 0.000002704298016252254
armv7neon_mmm_f32_8x4_generic 24 4 9 0.000005701271915969865
armv7neon_mmm_f32_8x6_cortexa9 7 128 19 0.000021850407441008384
armv7neon_mmm_f32_8x4_cortexa9 15 4 3 0.0000019251432800014627
armv7neon_mmm_f32_8x6_cortexa7 15 4 18 0.000005711033859652051
armv7neon_mmm_f32_8x6_cortexa7 9 4 17 0.000005102785803942399
armv7neon_mmm_f32_8x4_generic 8 128 4 0.000004232482987549164
armv7neon_mmm_f32_8x4_cortexa9 25 128 7 0.00003172200454891407
armv7neon_mmm_f32_8x4_cortexa9 9 32 12 0.000008361571818242635
armv7neon_mmm_f32_8x6_cortexa7 24 32 17 0.000019514235657015186
armv7neon_mmm_f32_8x6_cortexa7 23 128 6 0.000020944404830412158
armv7neon_mmm_f32_8x6_cortexa9 17 128 5 0.000016134142374025674
armv7neon_mmm_f32_8x4_cortexa7 8 4 7 0.000001873057019803265
armv7neon_mmm_f32_8x6_cortexa7 25 128 5 0.00002802974521426182
armv7neon_mmm_f32_8x6_cortexa9 17 4 17 0.000006958050831218578
armv7neon_mmm_f32_8x4_cortexa9 15 32 5 0.00000608182468505122
armv7neon_mmm_f32_8x4_generic 7 128 4 0.0000044404498411270055
generic_f32_4x4 13 4 5 0.000005350599856702572
armv7neon_mmm_f32_8x4_generic 16 4 9 0.0000039608523692233575
armv7neon_mmm_f32_8x6_cortexa9 16 4 12 0.000003013935297522993
armv7neon_mmm_f32_8x6_generic 15 4 13 0.000005319052004340107
armv7neon_mmm_f32_8x4_cortexa9 8 4 13 0.0000027408020700732427
armv7neon_mmm_f32_8x6_generic 8 128 18 0.000015505866042830093
armv7neon_mmm_f32_8x6_cortexa7 15 32 18 0.000013888471960481304
armv7neon_mmm_f32_8x4_cortexa7 15 128 8 0.00002174159034943732
armv7neon_mmm_f32_8x4_generic 9 32 4 0.0000030856158641387716
armv7neon_mmm_f32_8x4_generic 24 4 5 0.000004129514406774627
generic_f32_4x4 4 4 7 0.0000017497295508923947
armv7neon_mmm_f32_8x6_generic 23 32 17 0.00001653018733416135
generic_f32_4x4 7 128 13 0.000029515913541067617
armv7neon_mmm_f32_8x6_cortexa9 24 32 19 0.000020360891894208073
armv7neon_mmm_f32_8x6_cortexa9 9 32 18 0.000010557768641496435
armv7neon_mmm_f32_8x4_generic 8 4 13 0.000002736501222546797
armv7neon_mmm_f32_8x4_generic 16 32 11 0.000008543993772071838
armv7neon_mmm_f32_8x4_cortexa7 7 32 12 0.0000059782618274442185
armv7neon_mmm_f32_8x6_cortexa7 8 128 11 0.000014095624176931188
armv7neon_mmm_f32_8x4_cortexa7 7 32 13 0.00000775363354362587
armv7neon_mmm_f32_8x4_cortexa7 24 128 5 0.000032299734402349505
armv7neon_mmm_f32_8x6_cortexa9 25 32 12 0.000013697867410484751
armv7neon_mmm_f32_8x4_cortexa7 24 128 4 0.000016113162651910352
armv7neon_mmm_f32_8x6_cortexa9 9 32 5 0.0000041006560791439575
generic_f32_4x4 7 4 11 0.000004296735480110931
armv7neon_mmm_f32_8x6_generic 8 32 11 0.000003977003783384346
armv7neon_mmm_f32_8x4_generic 24 32 13 0.000016227668114322495
armv7neon_mmm_f32_8x6_generic 7 32 5 0.0000023419854212729095
generic_f32_4x4 3 128 3 0.00000421323412258659
generic_f32_4x4 8 4 12 0.0000036950676170218278
armv7neon_mmm_f32_8x4_cortexa7 15 4 12 0.000004609879642718733
armv7neon_mmm_f32_8x6_generic 8 128 11 0.000010745832002669769
armv7neon_mmm_f32_8x6_cortexa7 16 32 18 0.000012726640475291294
armv7neon_mmm_f32_8x6_generic 24 4 13 0.000006477448734654056
armv7neon_mmm_f32_8x4_cortexa9 23 128 5 0.000023893090361578605
armv7neon_mmm_f32_8x6_cortexa9 7 128 7 0.000011059489910827611
armv7neon_mmm_f32_8x4_cortexa7 24 4 13 0.000007916232793484605
armv7neon_mmm_f32_8x6_generic 24 32 5 0.000006050244864860637
armv7neon_mmm_f32_8x6_generic 23 4 5 0.000003050740283377829
armv7neon_mmm_f32_8x4_generic 17 4 7 0.000004317179186717401
generic_f32_4x4 13 128 3 0.000015316541870867625
armv7neon_mmm_f32_8x6_generic 25 128 11 0.00004196449161824797
armv7neon_mmm_f32_8x6_generic 24 32 7 0.000010654786791684403
armv7neon_mmm_f32_8x6_generic 9 128 13 0.0000310078542141026
armv7neon_mmm_f32_8x4_cortexa7 17 128 13 0.00006395441906545403
armv7neon_mmm_f32_8x6_cortexa7 25 4 11 0.000006802751222800881
armv7neon_mmm_f32_8x6_cortexa7 16 32 12 0.000008630037205832748
armv7neon_mmm_f32_8x6_cortexa9 8 128 6 0.000005530202507023102
armv7neon_mmm_f32_8x4_cortexa7 24 4 3 0.000002895093469687887
armv7neon_mmm_f32_8x6_cortexa7 23 32 13 0.00002011431966689971
armv7neon_mmm_f32_8x6_cortexa7 8 128 17 0.000020781906415830542
armv7neon_mmm_f32_8x6_cortexa7 8 128 7 0.00001401297102177983
generic_f32_4x4 12 128 4 0.000011113783764067
armv7neon_mmm_f32_8x4_cortexa9 23 128 8 0.000023633711148043694
armv7neon_mmm_f32_8x6_cortexa7 8 32 7 0.00000473866242690848
armv7neon_mmm_f32_8x6_cortexa7 23 128 13 0.00006200535049902772
armv7neon_mmm_f32_8x6_generic 25 128 12 0.000040933940992834904
armv7neon_mmm_f32_8x6_generic 8 4 7 0.0000019018312390917206
armv7neon_mmm_f32_8x6_cortexa9 9 128 13 0.0000311113672962118
armv7neon_mmm_f32_8x6_cortexa7 17 32 6 0.000006712613519894369
armv7neon_mmm_f32_8x6_generic 17 32 17 0.00001584305104507533
armv7neon_mmm_f32_8x6_cortexa9 25 4 5 0.000003846834546374256
armv7neon_mmm_f32_8x4_generic 23 128 11 0.00003563405640596688
armv7neon_mmm_f32_8x6_cortexa7 7 32 18 0.000007762095957340233
armv7neon_mmm_f32_8x4_cortexa7 8 4 3 0.000001278724116798178
armv7neon_mmm_f32_8x4_generic 8 32 9 0.000004428552791795867
armv7neon_mmm_f32_8x4_cortexa7 9 32 8 0.000007212913220352459
armv7neon_mmm_f32_8x6_cortexa7 16 32 11 0.000009132197785046325
armv7neon_mmm_f32_8x6_generic 15 4 6 0.0000021293993921351435
armv7neon_mmm_f32_8x6_cortexa9 8 32 17 0.000005618722861518267
generic_f32_4x4 12 32 8 0.000007824605805291677
generic_f32_4x4 3 4 9 0.0000025426825999668207
armv7neon_mmm_f32_8x6_cortexa9 15 128 6 0.000010938928121944877
armv7neon_mmm_f32_8x6_generic 8 128 12 0.000010479271785006985
armv7neon_mmm_f32_8x6_cortexa7 25 4 18 0.000008843120394707296
generic_f32_4x4 5 128 8 0.000014867005692635484
armv7neon_mmm_f32_8x4_generic 25 4 12 0.000007134980758480078
armv7neon_mmm_f32_8x6_generic 23 4 6 0.0000027550692223444384
generic_f32_4x4 8 128 4 0.000007594935908642548
armv7neon_mmm_f32_8x4_generic 24 4 3 0.0000027070026756528944
armv7neon_mmm_f32_8x6_generic 15 4 7 0.0000036879615500804916
armv7neon_mmm_f32_8x6_cortexa7 8 4 13 0.0000026670243954904784
armv7neon_mmm_f32_8x6_generic 17 4 7 0.000004739940833422853
armv7neon_mmm_f32_8x6_generic 24 128 11 0.00003139196614178293
armv7neon_mmm_f32_8x6_cortexa9 24 128 13 0.0000461884138564535
generic_f32_4x4 9 128 9 0.00003291260190607621
armv7neon_mmm_f32_8x4_cortexa9 7 128 11 0.000012420788787006248
armv7neon_mmm_f32_8x6_generic 17 128 12 0.00003088346853188957
armv7neon_mmm_f32_8x4_cortexa7 8 4 13 0.000002968959195086507
armv7neon_mmm_f32_8x6_cortexa9 24 128 6 0.000015548482246921477
armv7neon_mmm_f32_8x4_cortexa7 17 32 7 0.000010943141781236415
armv7neon_mmm_f32_8x6_cortexa7 16 4 12 0.000003160151719580885
generic_f32_4x4 5 128 5 0.000014996303737029157
armv7neon_mmm_f32_8x6_generic 25 4 6 0.0000030664133228659144
armv7neon_mmm_f32_8x4_cortexa9 9 4 11 0.0000041810312425253365
armv7neon_mmm_f32_8x6_generic 15 4 18 0.000005353071509503547
armv7neon_mmm_f32_8x6_cortexa7 8 32 12 0.000004562116220290897
armv7neon_mmm_f32_8x4_cortexa9 8 32 7 0.0000032056568363054847
armv7neon_mmm_f32_8x4_generic 16 4 11 0.0000040609047361039316
armv7neon_mmm_f32_8x6_cortexa9 17 4 19 0.00000876633957316096
armv7neon_mmm_f32_8x6_cortexa7 15 128 18 0.00004180200146350024
generic_f32_4x4 5 128 9 0.000022192516532888443
armv7neon_mmm_f32_8x6_cortexa9 24 32 11 0.000010954278251461477
armv7neon_mmm_f32_8x6_cortexa9 8 32 5 0.000002372663230804753
armv7neon_mmm_f32_8x6_generic 8 4 5 0.0000013605175071348517
armv7neon_mmm_f32_8x6_cortexa7 24 4 6 0.0000025054020094278158
armv7neon_mmm_f32_8x6_generic 24 128 5 0.000016222838915570737
generic_f32_4x4 12 32 4 0.000004159176762509275
generic_f32_4x4 13 4 11 0.000007582485226342242
generic_f32_4x4 9 4 5 0.000004196638831417376
armv7neon_mmm_f32_8x6_cortexa7 25 32 17 0.000026071317528366354
armv7neon_mmm_f32_8x4_cortexa9 16 128 8 0.000015686411664235285
generic_f32_4x4 13 128 11 0.00004380249651194818
armv7neon_mmm_f32_8x4_cortexa7 24 32 13 0.000020563104321953744
armv7neon_mmm_f32_8x4_cortexa9 23 4 7 0.0000044861575535086865
generic_f32_4x4 12 128 3 0.000011613772328414718
armv7neon_mmm_f32_8x6_cortexa7 16 4 19 0.000006163473976132481
generic_f32_4x4 7 4 8 0.0000029313315346635237
armv7neon_mmm_f32_8x4_cortexa9 23 32 12 0.000012545342985454958
armv7neon_mmm_f32_8x6_cortexa9 7 128 11 0.000011136453503507945
armv7neon_mmm_f32_8x6_cortexa7 9 128 13 0.000041110907972239005
armv7neon_mmm_f32_8x4_cortexa9 7 32 8 0.000003418961974304266
armv7neon_mmm_f32_8x4_cortexa7 24 32 12 0.000015105629538772432
generic_f32_4x4 7 128 3 0.000007903637217361153
armv7neon_mmm_f32_8x6_cortexa7 9 4 11 0.000003661168986658601
generic_f32_4x4 4 128 9 0.00001126621217728165
armv7neon_mmm_f32_8x4_cortexa7 23 128 3 0.000016760586455833876
armv7neon_mmm_f32_8x6_cortexa9 16 32 12 0.000007056930252112048
armv7neon_mmm_f32_8x6_cortexa9 8 4 19 0.000003165350777332514
armv7neon_mmm_f32_8x6_generic 8 32 13 0.000005498586776368978
generic_f32_4x4 3 128 13 0.000015227075547579779
generic_f32_4x4 12 4 13 0.000007217490541690208
armv7neon_mmm_f32_8x6_cortexa7 16 128 18 0.000040584526180649477
armv7neon_mmm_f32_8x6_cortexa9 25 4 7 0.000006216705461889991
armv7neon_mmm_f32_8x4_generic 24 128 9 0.000035021164515684145
generic_f32_4x4 3 32 12 0.000004609887398147942
armv7neon_mmm_f32_8x4_generic 25 128 9 0.00004684932933132534
armv7neon_mmm_f32_8x6_generic 7 4 17 0.0000033628632970040393
generic_f32_4x4 4 128 12 0.000011148897561634414
armv7neon_mmm_f32_8x4_generic 8 128 8 0.000008028324048468155
armv7neon_mmm_f32_8x4_generic 8 4 11 0.0000022515619534309057
armv7neon_mmm_f32_8x4_cortexa7 9 32 5 0.00000739044702644143
armv7neon_mmm_f32_8x4_generic 8 128 5 0.000008179707225297848
armv7neon_mmm_f32_8x4_generic 9 4 4 0.0000016079691037580586
armv7neon_mmm_f32_8x4_generic 16 128 4 0.00000803169255760427
armv7neon_mmm_f32_8x4_generic 25 128 3 0.000016355748669331108
armv7neon_mmm_f32_8x6_cortexa9 24 4 18 0.0000061179448610506165
armv7neon_mmm_f32_8x6_cortexa9 16 4 19 0.000005803928147585998
armv7neon_mmm_f32_8x6_cortexa7 17 128 6 0.00002064753338392539
armv7neon_mmm_f32_8x4_cortexa9 9 4 12 0.000003936841431536775
armv7neon_mmm_f32_8x4_cortexa7 17 32 13 0.000020815595163068456
armv7neon_mmm_f32_8x6_cortexa7 23 32 11 0.000013810001606181058
armv7neon_mmm_f32_8x4_cortexa7 15 32 13 0.00001449569792912252
armv7neon_mmm_f32_8x4_cortexa7 7 4 3 0.0000012380899483592991
armv7neon_mmm_f32_8x6_cortexa9 23 128 17 0.00004728149304262184
armv7neon_mmm_f32_8x4_cortexa7 15 32 3 0.000004158381257275162
armv7neon_mmm_f32_8x4_cortexa7 9 4 9 0.000004430434184731522
armv7neon_mmm_f32_8x6_generic 23 128 13 0.00004715382202102794
armv7neon_mmm_f32_8x6_cortexa9 9 4 19 0.000006116079821760871
armv7neon_mmm_f32_8x6_cortexa9 7 4 18 0.0000035442600221807646
armv7neon_mmm_f32_8x4_generic 15 128 5 0.000016261604433183137
armv7neon_mmm_f32_8x4_cortexa9 8 128 4 0.000004251382345566181
armv7neon_mmm_f32_8x6_cortexa7 25 128 11 0.000055294824394928404
armv7neon_mmm_f32_8x6_cortexa9 23 128 13 0.00004716175024845285
generic_f32_4x4 12 128 11 0.00003283409583810123
armv7neon_mmm_f32_8x6_cortexa7 15 128 6 0.000014244472752339967
armv7neon_mmm_f32_8x4_generic 17 32 8 0.000008307634701683482
armv7neon_mmm_f32_8x4_generic 16 4 13 0.000005034888545956025
armv7neon_mmm_f32_8x6_cortexa7 9 128 11 0.000027682771225447373
armv7neon_mmm_f32_8x4_generic 25 128 8 0.000030970064243633
armv7neon_mmm_f32_8x6_cortexa9 15 32 5 0.0000042126470351483745
armv7neon_mmm_f32_8x6_cortexa9 8 128 7 0.000010688502492077545
armv7neon_mmm_f32_8x6_cortexa7 8 32 19 0.000008785622923040567
generic_f32_4x4 8 32 7 0.000005699367356537332
armv7neon_mmm_f32_8x4_cortexa9 23 4 13 0.000007982321009915726
armv7neon_mmm_f32_8x6_cortexa9 7 32 19 0.000008296837727225683
armv7neon_mmm_f32_8x4_cortexa9 17 128 12 0.00003482723156416195
armv7neon_mmm_f32_8x4_cortexa7 16 4 12 0.000003962627391393735
armv7neon_mmm_f32_8x4_generic 24 32 4 0.000004275975584058462
armv7neon_mmm_f32_8x4_generic 15 4 4 0.00000172768002087017
generic_f32_4x4 12 128 13 0.000043289852359027704
armv7neon_mmm_f32_8x6_cortexa7 24 32 18 0.00001876149316286456
armv7neon_mmm_f32_8x4_cortexa9 23 128 13 0.000047081790611742305
armv7neon_mmm_f32_8x4_generic 25 4 8 0.000004894765654717258
generic_f32_4x4 13 32 9 0.000015809325344243507
armv7neon_mmm_f32_8x6_cortexa9 15 32 11 0.000007869115330631703
armv7neon_mmm_f32_8x6_cortexa7 16 32 19 0.000017064208112188813
armv7neon_mmm_f32_8x4_cortexa7 9 32 12 0.000010571719001405112
armv7neon_mmm_f32_8x6_cortexa9 7 4 12 0.000002545629385927187
armv7neon_mmm_f32_8x4_cortexa9 24 128 5 0.00002368492009732677
armv7neon_mmm_f32_8x6_cortexa7 25 4 5 0.000004026698373090156
generic_f32_4x4 9 32 9 0.000012044684258478282
armv7neon_mmm_f32_8x6_cortexa9 25 128 12 0.000040909803644855135
generic_f32_4x4 8 128 12 0.00002170370665519434
armv7neon_mmm_f32_8x6_generic 23 4 13 0.000007322110702891982
armv7neon_mmm_f32_8x4_generic 8 128 9 0.000011964404434625917
armv7neon_mmm_f32_8x6_generic 24 4 6 0.00000234486203657902
armv7neon_mmm_f32_8x4_generic 16 128 12 0.000023289775015844785
armv7neon_mmm_f32_8x4_generic 25 32 4 0.00000566239966628999
armv7neon_mmm_f32_8x4_cortexa7 8 128 7 0.000011133117389377919
armv7neon_mmm_f32_8x4_cortexa7 23 128 7 0.000032614653457132354
generic_f32_4x4 11 128 3 0.000011596634661777904
armv7neon_mmm_f32_8x6_generic 9 32 7 0.000007301874181587425
armv7neon_mmm_f32_8x6_cortexa7 25 4 6 0.0000032842534990270153
armv7neon_mmm_f32_8x4_cortexa7 25 4 11 0.000008398472143479583
armv7neon_mmm_f32_8x4_cortexa9 17 128 3 0.000012367498051511246
armv7neon_mmm_f32_8x4_cortexa9 15 4 4 0.0000017226474127640693
armv7neon_mmm_f32_8x6_cortexa9 8 4 12 0.0000017663174571893155
armv7neon_mmm_f32_8x6_generic 24 32 18 0.000014939528824089691
armv7neon_mmm_f32_8x4_cortexa9 15 128 3 0.000008424928636646119
armv7neon_mmm_f32_8x4_cortexa9 25 32 8 0.000010844406621465396
armv7neon_mmm_f32_8x6_generic 24 128 6 0.00001549274717546449
armv7neon_mmm_f32_8x4_cortexa9 7 4 8 0.0000019411905486089406
generic_f32_4x4 5 4 12 0.000003970968097394295
armv7neon_mmm_f32_8x4_cortexa7 17 128 3 0.000016675513856755485
armv7neon_mmm_f32_8x6_generic 15 128 17 0.0000319988391989146
armv7neon_mmm_f32_8x4_generic 8 4 3 0.0000012046114173177262
armv7neon_mmm_f32_8x4_generic 9 4 13 0.000005271381310779438
armv7neon_mmm_f32_8x6_generic 25 4 5 0.000003787680148419445
armv7neon_mmm_f32_8x6_cortexa9 16 4 13 0.00000455965655028581
armv7neon_mmm_f32_8x6_generic 16 128 12 0.000020507038874403576
armv7neon_mmm_f32_8x4_cortexa9 7 4 3 0.0000011789464739113475
armv7neon_mmm_f32_8x4_cortexa7 15 4 9 0.000004718086287004601
generic_f32_4x4 3 32 8 0.0000032590697277049294
armv7neon_mmm_f32_8x4_cortexa7 25 32 11 0.000021029318586240916
armv7neon_mmm_f32_8x4_cortexa9 7 128 7 0.000008425064090676705
armv7neon_mmm_f32_8x4_cortexa9 23 128 7 0.000024027014834706714
armv7neon_mmm_f32_8x4_cortexa9 9 128 13 0.000031337531340777375
generic_f32_4x4 12 32 3 0.0000046491920750409604
armv7neon_mmm_f32_8x4_cortexa7 9 128 12 0.00003204704023977606
armv7neon_mmm_f32_8x6_cortexa9 15 128 5 0.000010976146066847136
armv7neon_mmm_f32_8x6_generic 9 128 5 0.000010821808595127155
armv7neon_mmm_f32_8x6_cortexa9 16 32 18 0.000010217778748183444
armv7neon_mmm_f32_8x6_cortexa9 8 32 13 0.000005537991894862071
armv7neon_mmm_f32_8x6_cortexa7 15 128 11 0.000028075729994494098
armv7neon_mmm_f32_8x6_cortexa7 24 4 7 0.000004992618899850428
armv7neon_mmm_f32_8x6_generic 15 32 6 0.000004128781340215273
armv7neon_mmm_f32_8x6_generic 15 32 18 0.000011342396294923752
generic_f32_4x4 3 128 9 0.000011614205849736826
armv7neon_mmm_f32_8x4_generic 7 32 9 0.000004862452767728157
armv7neon_mmm_f32_8x4_cortexa9 17 4 7 0.000004308345799806778
armv7neon_mmm_f32_8x6_cortexa9 16 32 13 0.000010598291750058129
armv7neon_mmm_f32_8x4_generic 17 32 7 0.000008775628241420315
armv7neon_mmm_f32_8x4_cortexa9 8 4 8 0.00000151354666080862
armv7neon_mmm_f32_8x6_cortexa7 9 4 19 0.0000064774482704190535
armv7neon_mmm_f32_8x4_cortexa9 9 4 4 0.000001610951102254284
armv7neon_mmm_f32_8x4_cortexa7 7 128 3 0.00000588268473785512
armv7neon_mmm_f32_8x4_cortexa9 9 128 7 0.00001614593088465898
armv7neon_mmm_f32_8x6_cortexa9 9 128 6 0.000010694366815766805
armv7neon_mmm_f32_8x6_cortexa9 17 4 5 0.000003000192050393217
armv7neon_mmm_f32_8x4_cortexa9 8 32 4 0.0000017239720455885444
armv7neon_mmm_f32_8x6_cortexa7 25 128 18 0.0000814675708223554
generic_f32_4x4 12 128 7 0.000022191318720751985
generic_f32_4x4 5 128 13 0.000029341360540777852
generic_f32_4x4 7 4 5 0.000003063610185404798
armv7neon_mmm_f32_8x4_cortexa7 23 32 12 0.000015790133040211727
armv7neon_mmm_f32_8x6_cortexa7 17 128 13 0.00006139081303315256
armv7neon_mmm_f32_8x4_cortexa9 8 32 12 0.000004268120941798172
armv7neon_mmm_f32_8x6_generic 16 32 17 0.000010656142728555816
generic_f32_4x4 13 4 8 0.000004925203028119758
armv7neon_mmm_f32_8x6_generic 16 4 6 0.0000017414671485444925
armv7neon_mmm_f32_8x6_generic 23 4 7 0.000005090461887505994
armv7neon_mmm_f32_8x4_cortexa9 7 128 4 0.000004439163092495918
armv7neon_mmm_f32_8x4_cortexa9 15 32 4 0.000003199516110418427
generic_f32_4x4 13 4 9 0.000007521941079552681
armv7neon_mmm_f32_8x4_generic 9 128 13 0.00003132672472959027
armv7neon_mmm_f32_8x6_cortexa7 9 128 5 0.000014186965083572584
armv7neon_mmm_f32_8x4_cortexa9 16 32 13 0.000010950995485777078
armv7neon_mmm_f32_8x4_cortexa7 24 4 11 0.000006343455120468695
armv7neon_mmm_f32_8x4_generic 23 32 12 0.000012518582750578088
armv7neon_mmm_f32_8x4_cortexa9 23 4 5 0.000004357470789736493
armv7neon_mmm_f32_8x6_generic 9 128 12 0.000020738827544116163
armv7neon_mmm_f32_8x4_generic 7 128 8 0.000008441257822863349
armv7neon_mmm_f32_8x6_generic 25 4 18 0.000008206815180054958
armv7neon_mmm_f32_8x6_cortexa7 16 128 7 0.00002754330318348781
armv7neon_mmm_f32_8x4_cortexa9 15 128 12 0.000023818850811398474
armv7neon_mmm_f32_8x6_cortexa7 7 32 5 0.0000027477786822716826
generic_f32_4x4 4 32 9 0.000004310962413140207
armv7neon_mmm_f32_8x6_cortexa9 9 32 11 0.000007472405716714093
generic_f32_4x4 9 4 7 0.0000042110601811121985
armv7neon_mmm_f32_8x4_cortexa9 25 128 5 0.00003145000193606305
armv7neon_mmm_f32_8x6_generic 16 128 7 0.000020961637268522957
armv7neon_mmm_f32_8x6_cortexa7 8 128 18 0.00002061441681055432
armv7neon_mmm_f32_8x6_cortexa7 15 32 11 0.000009542090957976056
armv7neon_mmm_f32_8x4_cortexa9 8 32 8 0.0000030204030341535007
armv7neon_mmm_f32_8x6_cortexa9 16 4 18 0.000004289256443600587
armv7neon_mmm_f32_8x4_cortexa7 15 32 8 0.000007451057144596197
armv7neon_mmm_f32_8x6_cortexa7 25 128 12 0.00005431857023175915
armv7neon_mmm_f32_8x6_generic 7 128 11 0.000011099973931488493
armv7neon_mmm_f32_8x4_cortexa9 16 128 11 0.000023648803168223167
armv7neon_mmm_f32_8x6_cortexa9 17 4 12 0.0000044808063412694585
armv7neon_mmm_f32_8x6_generic 25 32 7 0.000014053446208239492
armv7neon_mmm_f32_8x6_generic 16 128 19 0.0000411706712809725
armv7neon_mmm_f32_8x4_generic 16 32 13 0.000010937227005963747
armv7neon_mmm_f32_8x4_cortexa7 8 32 3 0.000002329652194595725
armv7neon_mmm_f32_8x4_cortexa9 23 4 11 0.000006308879061820964
armv7neon_mmm_f32_8x6_cortexa7 16 4 7 0.0000035065261101064537
armv7neon_mmm_f32_8x6_generic 9 128 6 0.000010599309996894829
armv7neon_mmm_f32_8x4_cortexa7 24 128 11 0.000048035927729516165
armv7neon_mmm_f32_8x6_generic 7 32 11 0.000004333152377602563
armv7neon_mmm_f32_8x4_cortexa7 15 4 11 0.000004824968358151188
armv7neon_mmm_f32_8x6_cortexa7 7 32 6 0.0000029036965940094368
generic_f32_4x4 12 4 3 0.0000026010633583656195
armv7neon_mmm_f32_8x6_cortexa9 8 32 18 0.000005373878070970815
armv7neon_mmm_f32_8x4_generic 16 128 7 0.000016057054409709098
armv7neon_mmm_f32_8x4_generic 17 128 5 0.000023713194238003484
armv7neon_mmm_f32_8x6_generic 24 128 19 0.0000639824281045487
generic_f32_4x4 5 4 13 0.000005260715034760787
armv7neon_mmm_f32_8x6_cortexa9 16 128 19 0.000040932625772708764
armv7neon_mmm_f32_8x4_generic 16 32 5 0.00000587187854127478
armv7neon_mmm_f32_8x4_generic 15 32 4 0.0000032031359648686487
armv7neon_mmm_f32_8x4_cortexa9 24 32 13 0.00001619115724351509
armv7neon_mmm_f32_8x4_generic 15 32 8 0.000005976862649343733
armv7neon_mmm_f32_8x6_cortexa9 16 32 7 0.000007329062657266122
armv7neon_mmm_f32_8x4_cortexa9 25 32 7 0.00001147457512424339
armv7neon_mmm_f32_8x4_cortexa9 24 128 3 0.000012480179872163079
armv7neon_mmm_f32_8x6_generic 16 4 13 0.000004503173649426117
armv7neon_mmm_f32_8x6_generic 16 4 19 0.000005734445256719681
armv7neon_mmm_f32_8x4_cortexa7 24 4 8 0.000003952435244053743
armv7neon_mmm_f32_8x6_cortexa9 9 32 6 0.000003904765036328731
armv7neon_mmm_f32_8x6_cortexa7 23 4 13 0.000007856276222270553
armv7neon_mmm_f32_8x6_cortexa9 15 4 6 0.000002182843988681052
armv7neon_mmm_f32_8x6_generic 23 4 18 0.000007195841362691159
armv7neon_mmm_f32_8x4_cortexa7 16 32 12 0.000010255406371017558
armv7neon_mmm_f32_8x4_cortexa9 25 128 8 0.0000309762014941751
armv7neon_mmm_f32_8x6_cortexa7 25 128 17 0.00008204444005539351
armv7neon_mmm_f32_8x6_cortexa7 17 4 6 0.0000026268622339640763
armv7neon_mmm_f32_8x4_cortexa9 15 32 9 0.000008859604622252251
generic_f32_4x4 5 32 9 0.000008264557275071169
generic_f32_4x4 13 32 5 0.000010823964577863127
generic_f32_4x4 5 4 3 0.000001851289980714991
armv7neon_mmm_f32_8x4_generic 15 128 4 0.000008225264190077282
armv7neon_mmm_f32_8x6_generic 17 32 18 0.000015295586544658026
armv7neon_mmm_f32_8x6_cortexa9 9 4 18 0.000004572833723975051
armv7neon_mmm_f32_8x4_cortexa7 15 4 13 0.000006075307141015665
armv7neon_mmm_f32_8x6_cortexa7 17 32 19 0.000025630924279728742
generic_f32_4x4 7 32 7 0.0000058385014382941885
armv7neon_mmm_f32_8x4_cortexa9 16 128 7 0.000016054999854639244
armv7neon_mmm_f32_8x4_cortexa7 25 4 4 0.0000029326008363454604
armv7neon_mmm_f32_8x6_cortexa7 16 128 12 0.000027204121533574832
armv7neon_mmm_f32_8x4_generic 15 128 3 0.000008426038232256742
armv7neon_mmm_f32_8x4_cortexa9 7 32 5 0.0000033516027030490246
armv7neon_mmm_f32_8x4_cortexa7 16 4 7 0.0000032624014554442194
armv7neon_mmm_f32_8x4_cortexa9 16 128 3 0.000008462143402947945
armv7neon_mmm_f32_8x4_generic 25 4 7 0.000005557466852852917
armv7neon_mmm_f32_8x4_generic 23 32 4 0.000004495607981128392
armv7neon_mmm_f32_8x4_cortexa9 15 4 12 0.000004291414852711907
generic_f32_4x4 4 4 13 0.0000027727007811134473
armv7neon_mmm_f32_8x4_generic 16 4 8 0.0000025796442872451462
armv7neon_mmm_f32_8x4_cortexa9 23 4 9 0.000006179316744537218
armv7neon_mmm_f32_8x4_cortexa7 25 32 3 0.000007764249905705397
armv7neon_mmm_f32_8x6_generic 17 32 7 0.000010687826852062675
generic_f32_4x4 13 4 7 0.000005399817435719611
generic_f32_4x4 12 4 4 0.0000021094685699886824
armv7neon_mmm_f32_8x6_cortexa9 7 4 17 0.0000034025163658270913
generic_f32_4x4 4 4 9 0.0000022359497586694876
armv7neon_mmm_f32_8x6_cortexa9 17 128 17 0.00004660020476545699
armv7neon_mmm_f32_8x4_generic 15 4 12 0.00000428385702551851
armv7neon_mmm_f32_8x4_generic 9 4 11 0.000004185007110545824
armv7neon_mmm_f32_8x6_cortexa9 24 128 18 0.000045657972165917084
armv7neon_mmm_f32_8x6_cortexa9 15 4 19 0.000007027336593004323
armv7neon_mmm_f32_8x4_cortexa9 9 128 9 0.00002367365046977455
armv7neon_mmm_f32_8x4_cortexa9 16 4 9 0.000003960113146754688
armv7neon_mmm_f32_8x4_cortexa7 8 32 9 0.000005561279017749759
armv7neon_mmm_f32_8x4_cortexa9 23 32 3 0.000004897369194486712
armv7neon_mmm_f32_8x4_cortexa9 16 32 3 0.0000034377989421061634
armv7neon_mmm_f32_8x6_generic 25 32 5 0.00000775592482259445
armv7neon_mmm_f32_8x4_cortexa9 9 128 11 0.000023733495515623523
armv7neon_mmm_f32_8x4_generic 23 32 7 0.000008935361824557136
armv7neon_mmm_f32_8x4_cortexa7 7 128 9 0.00001664390737173506
generic_f32_4x4 5 4 8 0.0000028477215638418112
armv7neon_mmm_f32_8x6_generic 23 4 19 0.000009557906532901389
armv7neon_mmm_f32_8x6_cortexa9 16 128 12 0.000020636825667982075
armv7neon_mmm_f32_8x6_generic 9 32 11 0.000007421682594966975
armv7neon_mmm_f32_8x4_generic 23 128 13 0.000047353976390946026
armv7neon_mmm_f32_8x4_cortexa7 17 4 7 0.000004622886247456753
armv7neon_mmm_f32_8x6_cortexa9 8 128 5 0.000005754129281131009
armv7neon_mmm_f32_8x4_cortexa7 9 128 11 0.000032319021391833177
armv7neon_mmm_f32_8x4_cortexa9 24 4 9 0.000005698490674755626
armv7neon_mmm_f32_8x6_generic 17 4 18 0.000006326121915439696
armv7neon_mmm_f32_8x4_cortexa9 16 128 12 0.000023196293109823396
armv7neon_mmm_f32_8x4_cortexa9 25 32 9 0.000016534317998128363
armv7neon_mmm_f32_8x6_cortexa7 24 4 5 0.0000032362274636792855
armv7neon_mmm_f32_8x6_cortexa7 9 32 13 0.000013201464669659568
armv7neon_mmm_f32_8x4_generic 25 128 5 0.000031432252054608495
armv7neon_mmm_f32_8x4_cortexa7 16 32 8 0.000007004694197132091
armv7neon_mmm_f32_8x4_cortexa7 25 128 8 0.00004234315278499361
armv7neon_mmm_f32_8x6_cortexa9 25 128 11 0.00004162498921103509
armv7neon_mmm_f32_8x4_cortexa9 17 4 11 0.000005990145971795821
armv7neon_mmm_f32_8x6_generic 7 128 5 0.000005711586442858516
generic_f32_4x4 5 32 7 0.000005745084475312582
armv7neon_mmm_f32_8x4_cortexa7 17 4 3 0.0000027778998365925647
armv7neon_mmm_f32_8x6_generic 7 32 17 0.000006323975084622719
armv7neon_mmm_f32_8x4_cortexa7 24 32 4 0.0000053627426905310905
armv7neon_mmm_f32_8x6_cortexa9 7 128 13 0.000016465054999968026
armv7neon_mmm_f32_8x6_cortexa9 25 32 17 0.000021029276181809713
armv7neon_mmm_f32_8x6_generic 7 4 18 0.0000035040816774469106
armv7neon_mmm_f32_8x6_cortexa7 17 128 12 0.00004084848200658872
armv7neon_mmm_f32_8x6_cortexa7 16 128 11 0.000027707012596319906
armv7neon_mmm_f32_8x6_cortexa7 8 128 12 0.0000138330433065325
armv7neon_mmm_f32_8x4_cortexa9 7 32 9 0.000004837549683885907
armv7neon_mmm_f32_8x4_generic 7 32 7 0.000003409099607922408
armv7neon_mmm_f32_8x6_cortexa7 16 4 5 0.0000023317089411955496
armv7neon_mmm_f32_8x6_cortexa7 7 32 17 0.000007581121899800583
armv7neon_mmm_f32_8x4_cortexa7 16 4 9 0.0000042985642379541856
armv7neon_mmm_f32_8x6_generic 16 4 7 0.0000032960593218992506
armv7neon_mmm_f32_8x4_generic 15 32 5 0.0000060704297110345465
armv7neon_mmm_f32_8x6_cortexa9 24 32 6 0.00000538028834244819
armv7neon_mmm_f32_8x6_generic 25 32 18 0.0000200993477092098
armv7neon_mmm_f32_8x6_generic 23 128 6 0.00001591506566691279
armv7neon_mmm_f32_8x6_generic 16 32 13 0.00001049572423866497
armv7neon_mmm_f32_8x4_cortexa7 15 32 5 0.000007565304970371118
generic_f32_4x4 12 32 7 0.000008258205556820225
armv7neon_mmm_f32_8x4_cortexa7 17 4 9 0.000006349279290785205
generic_f32_4x4 8 128 5 0.000014938883570524155
armv7neon_mmm_f32_8x4_generic 15 32 3 0.0000034015953691065083
armv7neon_mmm_f32_8x6_cortexa7 7 4 18 0.000003692100644999123
generic_f32_4x4 8 128 7 0.000015006901203342376
generic_f32_4x4 4 4 3 0.0000012181206340328286
generic_f32_4x4 12 32 5 0.000008206014399831754
armv7neon_mmm_f32_8x6_cortexa9 8 128 19 0.000020717968248585576
generic_f32_4x4 5 32 3 0.000003225094305304002
armv7neon_mmm_f32_8x6_generic 24 4 18 0.000006021495896771489
armv7neon_mmm_f32_8x6_cortexa7 16 128 13 0.00004093551587387655
armv7neon_mmm_f32_8x4_cortexa9 15 128 8 0.00001604448990415154
armv7neon_mmm_f32_8x6_generic 9 32 13 0.000010651318368732651
armv7neon_mmm_f32_8x4_cortexa7 8 32 8 0.000003772107419321899
generic_f32_4x4 9 32 3 0.000004621846936980697
armv7neon_mmm_f32_8x4_cortexa9 16 32 5 0.000005906329928132943
armv7neon_mmm_f32_8x6_cortexa9 16 4 6 0.000001808587040184933
armv7neon_mmm_f32_8x6_cortexa9 9 32 13 0.000010778660548919894
armv7neon_mmm_f32_8x4_cortexa9 16 32 4 0.0000030105150384631114
armv7neon_mmm_f32_8x6_generic 17 4 17 0.000006838981837746129
armv7neon_mmm_f32_8x6_cortexa7 7 128 19 0.000028499453971524397
armv7neon_mmm_f32_8x6_cortexa7 23 32 17 0.000020319754403004648
armv7neon_mmm_f32_8x6_generic 9 32 17 0.000010765407372129566
armv7neon_mmm_f32_8x4_generic 23 4 13 0.000008003285656461567
armv7neon_mmm_f32_8x4_generic 17 4 4 0.000002161502194915875
armv7neon_mmm_f32_8x4_generic 23 4 11 0.000006323737070277622
armv7neon_mmm_f32_8x4_generic 9 4 12 0.00000394067683542442
armv7neon_mmm_f32_8x4_cortexa7 23 128 4 0.00001636523555605817
armv7neon_mmm_f32_8x4_cortexa9 17 32 12 0.000012187305627907507
armv7neon_mmm_f32_8x4_generic 25 32 5 0.000011305023296577953
armv7neon_mmm_f32_8x4_cortexa7 23 128 13 0.0000641420493251813
armv7neon_mmm_f32_8x6_cortexa9 17 4 13 0.0000067882601578195375
armv7neon_mmm_f32_8x6_cortexa9 24 32 13 0.00001554482246687919
armv7neon_mmm_f32_8x4_cortexa7 15 128 12 0.00003239866815806022
armv7neon_mmm_f32_8x6_cortexa9 23 4 13 0.000007410621365009766
armv7neon_mmm_f32_8x4_cortexa9 17 128 5 0.000023718649742852314
armv7neon_mmm_f32_8x6_cortexa9 15 32 13 0.000011397983577281118
armv7neon_mmm_f32_8x6_cortexa7 15 32 5 0.0000050262586648579474
armv7neon_mmm_f32_8x4_cortexa9 16 128 9 0.000023527233248412315
armv7neon_mmm_f32_8x6_cortexa7 17 4 17 0.000007344312997274551
armv7neon_mmm_f32_8x6_cortexa7 15 128 13 0.00004174083709909147
armv7neon_mmm_f32_8x4_cortexa9 24 128 11 0.00003518379984445077
armv7neon_mmm_f32_8x6_cortexa7 15 4 6 0.0000022388984932072717
armv7neon_mmm_f32_8x6_generic 17 32 11 0.000010862418538636322
armv7neon_mmm_f32_8x6_cortexa9 17 128 19 0.00006205059984963943
armv7neon_mmm_f32_8x6_cortexa7 24 128 7 0.00004110240285622754
armv7neon_mmm_f32_8x6_generic 25 32 11 0.00001430805053910281
armv7neon_mmm_f32_8x4_cortexa9 23 128 11 0.00003562503787212338
armv7neon_mmm_f32_8x4_generic 8 128 13 0.00001577265144197705
armv7neon_mmm_f32_8x4_cortexa9 15 32 3 0.0000034017274319352206
generic_f32_4x4 13 32 3 0.000005959878460430609
armv7neon_mmm_f32_8x4_cortexa9 8 32 9 0.000004432308412032044
armv7neon_mmm_f32_8x4_cortexa7 8 4 5 0.0000018214760004994945
armv7neon_mmm_f32_8x4_generic 16 4 5 0.0000029127359283609326
armv7neon_mmm_f32_8x4_cortexa7 8 32 11 0.000005611833179795977
armv7neon_mmm_f32_8x4_generic 17 128 12 0.00003481391410825688
armv7neon_mmm_f32_8x6_cortexa7 17 4 19 0.00000928821589263059
armv7neon_mmm_f32_8x6_generic 8 4 19 0.00000313513885546113
generic_f32_4x4 11 32 11 0.000012240954381721343
armv7neon_mmm_f32_8x4_cortexa9 9 128 8 0.000015834005248731945
armv7neon_mmm_f32_8x6_generic 25 128 7 0.00004137887977916487
armv7neon_mmm_f32_8x6_cortexa7 25 32 13 0.000025709475387022286
armv7neon_mmm_f32_8x6_cortexa7 24 32 5 0.000007312970559712554
armv7neon_mmm_f32_8x4_cortexa9 7 32 3 0.0000019121426586981612
armv7neon_mmm_f32_8x4_cortexa9 8 4 12 0.0000020507155861457753
armv7neon_mmm_f32_8x4_cortexa9 25 128 4 0.000015694825557140796
armv7neon_mmm_f32_8x6_cortexa9 9 4 17 0.0000048654285121987666
armv7neon_mmm_f32_8x4_generic 15 32 13 0.000011594181812430167
generic_f32_4x4 8 4 5 0.0000029234671714958714
armv7neon_mmm_f32_8x6_generic 25 128 18 0.00006398210071667638
armv7neon_mmm_f32_8x4_generic 25 32 7 0.000011481869355677313
armv7neon_mmm_f32_8x6_cortexa7 15 32 13 0.000013835762311238379
armv7neon_mmm_f32_8x6_generic 7 32 7 0.000004270239691930375
armv7neon_mmm_f32_8x4_cortexa9 24 4 12 0.0000052513903304867065
armv7neon_mmm_f32_8x4_cortexa7 7 128 13 0.000022090489564676277
armv7neon_mmm_f32_8x4_cortexa7 9 32 7 0.0000074510499136902
armv7neon_mmm_f32_8x4_generic 15 128 11 0.00002405185440078953
armv7neon_mmm_f32_8x6_cortexa7 24 128 18 0.00006059904552298362
armv7neon_mmm_f32_8x4_cortexa9 24 4 8 0.0000036249029546823233
armv7neon_mmm_f32_8x4_generic 17 4 5 0.000004186211638893127
armv7neon_mmm_f32_8x4_cortexa7 15 128 5 0.00002190540588603946
armv7neon_mmm_f32_8x6_cortexa7 9 32 5 0.000004913938240718097
armv7neon_mmm_f32_8x4_cortexa7 25 128 4 0.000021428570118948462
armv7neon_mmm_f32_8x6_generic 9 4 17 0.000004759335274456197
armv7neon_mmm_f32_8x4_cortexa9 23 32 5 0.000008792076478618836
armv7neon_mmm_f32_8x4_cortexa9 17 128 13 0.00004665611088212803
generic_f32_4x4 11 128 12 0.00003275274568553372
armv7neon_mmm_f32_8x6_cortexa7 7 128 12 0.000014620372931092478
armv7neon_mmm_f32_8x6_generic 24 128 13 0.000046299674326571746
generic_f32_4x4 12 4 9 0.000005632703760211813
armv7neon_mmm_f32_8x6_cortexa7 15 32 6 0.000004965054605236343
armv7neon_mmm_f32_8x6_cortexa9 7 128 17 0.000016538122105202102
armv7neon_mmm_f32_8x6_cortexa9 17 128 7 0.00003112401925909139
armv7neon_mmm_f32_8x4_cortexa7 7 32 5 0.0000040832664009078
armv7neon_mmm_f32_8x4_cortexa9 25 4 3 0.000003352094433744027
armv7neon_mmm_f32_8x4_cortexa7 17 4 4 0.0000023752236037367475
armv7neon_mmm_f32_8x4_cortexa9 8 4 4 0.000000986252516977325
armv7neon_mmm_f32_8x6_cortexa7 9 4 5 0.0000022147026524486433
armv7neon_mmm_f32_8x6_cortexa9 24 4 13 0.000006569367159583456
armv7neon_mmm_f32_8x6_cortexa9 25 32 13 0.000020818742897252734
armv7neon_mmm_f32_8x4_cortexa9 15 32 12 0.000008751598473767715
armv7neon_mmm_f32_8x4_cortexa7 7 4 4 0.000001249360695162341
armv7neon_mmm_f32_8x6_generic 25 4 19 0.000011234098334898354
generic_f32_4x4 7 4 9 0.000004262590318570047
generic_f32_4x4 12 128 9 0.000032724926249473206
armv7neon_mmm_f32_8x4_cortexa7 23 4 11 0.0000067800221845095914
armv7neon_mmm_f32_8x4_cortexa7 23 4 13 0.00000857826635505948
armv7neon_mmm_f32_8x4_generic 7 32 3 0.0000019119582227554076
armv7neon_mmm_f32_8x6_cortexa9 25 4 12 0.000005714183543712814
generic_f32_4x4 7 128 8 0.000014963178444176004
armv7neon_mmm_f32_8x6_generic 23 4 11 0.000005289174016444985
armv7neon_mmm_f32_8x4_generic 15 4 7 0.000003212917021733202
armv7neon_mmm_f32_8x4_cortexa9 17 4 4 0.000002157304549786207
armv7neon_mmm_f32_8x4_cortexa7 25 4 8 0.000005296217018377082
armv7neon_mmm_f32_8x4_cortexa7 16 4 8 0.000002789622045679376
armv7neon_mmm_f32_8x4_cortexa9 9 32 11 0.00000862398264754348
armv7neon_mmm_f32_8x6_generic 16 4 18 0.000004182651246093671
armv7neon_mmm_f32_8x4_cortexa9 7 4 7 0.0000019258225946178918
armv7neon_mmm_f32_8x4_generic 7 32 13 0.0000063109264345399555
armv7neon_mmm_f32_8x6_generic 15 32 7 0.000007649176449323901
armv7neon_mmm_f32_8x4_cortexa7 9 4 13 0.00000566559429146675
generic_f32_4x4 11 4 13 0.00000768804413911549
armv7neon_mmm_f32_8x6_cortexa7 15 128 19 0.000055484906874277455
armv7neon_mmm_f32_8x6_generic 8 128 19 0.000020785294814215668
armv7neon_mmm_f32_8x4_cortexa7 23 32 7 0.000011137926376204002
armv7neon_mmm_f32_8x4_cortexa9 25 32 4 0.000005634291322373054
armv7neon_mmm_f32_8x4_cortexa7 16 4 11 0.0000043943657333441155
generic_f32_4x4 3 128 4 0.000004206283247834624
armv7neon_mmm_f32_8x6_cortexa9 23 32 17 0.000016635289447796483
armv7neon_mmm_f32_8x6_cortexa7 23 4 5 0.0000032303127776050868
generic_f32_4x4 7 128 7 0.000015111291354960824
armv7neon_mmm_f32_8x6_cortexa7 8 128 19 0.000027387915918929853
armv7neon_mmm_f32_8x4_cortexa7 24 32 9 0.000015681272955767534
armv7neon_mmm_f32_8x6_cortexa7 15 32 7 0.000009363971820493224
armv7neon_mmm_f32_8x6_generic 15 4 17 0.000005464908042174986
generic_f32_4x4 11 4 4 0.000002266320385947867
armv7neon_mmm_f32_8x4_cortexa7 7 4 5 0.0000019827556786453945
armv7neon_mmm_f32_8x4_generic 7 4 5 0.0000018753720795687383
armv7neon_mmm_f32_8x6_cortexa7 23 32 19 0.000026555363914009545
generic_f32_4x4 12 32 11 0.000011904804943326893
generic_f32_4x4 9 32 4 0.000004278605538448279
armv7neon_mmm_f32_8x4_cortexa9 9 4 3 0.0000018374246302226002
armv7neon_mmm_f32_8x4_generic 17 128 11 0.00003530676669688071
armv7neon_mmm_f32_8x4_cortexa7 23 32 5 0.000011014184598781204
armv7neon_mmm_f32_8x4_cortexa7 8 128 8 0.000010889817517963246
armv7neon_mmm_f32_8x4_generic 24 128 5 0.000023658482563487358
armv7neon_mmm_f32_8x4_generic 23 4 4 0.0000022865778486536074
armv7neon_mmm_f32_8x6_cortexa7 16 32 7 0.000008963466531834949
armv7neon_mmm_f32_8x4_cortexa7 16 128 11 0.00003220406926334493
armv7neon_mmm_f32_8x4_cortexa9 7 4 9 0.000002612401333065891
armv7neon_mmm_f32_8x6_generic 24 128 18 0.00004570910076801179
armv7neon_mmm_f32_8x6_cortexa7 15 32 19 0.00001825949129411215
armv7neon_mmm_f32_8x4_cortexa7 7 128 11 0.00001669033881154567
armv7neon_mmm_f32_8x6_cortexa7 17 128 17 0.00006152323359147728
armv7neon_mmm_f32_8x6_generic 23 4 12 0.0000049613667404571205
armv7neon_mmm_f32_8x6_cortexa9 17 4 6 0.0000025304413562385254
armv7neon_mmm_f32_8x6_generic 25 32 13 0.000020634077057855525
generic_f32_4x4 3 128 8 0.000007895048382688004
armv7neon_mmm_f32_8x6_generic 7 128 6 0.000005880420229157795
armv7neon_mmm_f32_8x6_cortexa7 7 4 6 0.0000015435048498918165
armv7neon_mmm_f32_8x6_cortexa9 15 4 12 0.0000038035548284711065
armv7neon_mmm_f32_8x4_generic 16 128 13 0.00003105986968794038
armv7neon_mmm_f32_8x6_generic 17 128 6 0.00001561934075445538
armv7neon_mmm_f32_8x6_generic 23 32 5 0.000006038048456367719
generic_f32_4x4 3 4 8 0.0000018887559388867697
armv7neon_mmm_f32_8x6_cortexa7 17 128 5 0.000021122166031945488
armv7neon_mmm_f32_8x4_cortexa7 17 4 12 0.000005984255865199454
armv7neon_mmm_f32_8x6_cortexa9 8 4 17 0.00000262350755833462
armv7neon_mmm_f32_8x6_generic 24 32 17 0.000015705211312591065
armv7neon_mmm_f32_8x6_generic 23 32 7 0.00001103425525544337
armv7neon_mmm_f32_8x6_generic 23 128 18 0.00004698363257869806
armv7neon_mmm_f32_8x4_cortexa7 9 128 4 0.000010997242917041898
generic_f32_4x4 12 32 9 0.000011855594903877275
armv7neon_mmm_f32_8x4_cortexa7 8 32 4 0.000002107136592547487
armv7neon_mmm_f32_8x4_cortexa7 9 32 4 0.0000038494164534325605
generic_f32_4x4 8 32 4 0.0000029596097342538777
armv7neon_mmm_f32_8x6_cortexa9 7 4 13 0.0000033183395006378194
armv7neon_mmm_f32_8x4_cortexa9 7 128 5 0.00000837292374457436
armv7neon_mmm_f32_8x6_cortexa7 9 32 12 0.000008879244649291164
armv7neon_mmm_f32_8x6_cortexa9 23 32 5 0.00000608726564780247
generic_f32_4x4 4 4 11 0.000002253269465041155
armv7neon_mmm_f32_8x6_generic 15 4 12 0.000003737614965115773
armv7neon_mmm_f32_8x4_cortexa7 8 32 13 0.000007179427212160366
armv7neon_mmm_f32_8x6_generic 7 4 7 0.00000227244051714339
armv7neon_mmm_f32_8x6_generic 7 128 13 0.000016430484681121403
armv7neon_mmm_f32_8x4_cortexa9 25 4 13 0.000009867842979190694
armv7neon_mmm_f32_8x4_cortexa9 24 32 7 0.000008714828341166468
armv7neon_mmm_f32_8x6_generic 7 128 12 0.00001125899429533851
armv7neon_mmm_f32_8x4_cortexa7 24 128 3 0.000016779227724494087
armv7neon_mmm_f32_8x6_cortexa7 25 32 11 0.000017700250695390733
armv7neon_mmm_f32_8x6_cortexa9 8 128 17 0.00001578057628845984
armv7neon_mmm_f32_8x6_cortexa9 15 128 18 0.00003176376886582956
armv7neon_mmm_f32_8x6_generic 7 32 18 0.000006481310483268657
armv7neon_mmm_f32_8x6_generic 24 32 19 0.000020260301856480532
armv7neon_mmm_f32_8x6_cortexa7 25 4 7 0.000006556992855989281
armv7neon_mmm_f32_8x4_cortexa7 9 4 8 0.0000030004666340701197
armv7neon_mmm_f32_8x6_generic 8 4 13 0.0000025029052985754286
armv7neon_mmm_f32_8x4_cortexa7 16 32 13 0.00001386814401860724
armv7neon_mmm_f32_8x4_generic 16 128 3 0.000008461514063719399
generic_f32_4x4 8 32 5 0.000005666217550078829
armv7neon_mmm_f32_8x4_generic 17 32 9 0.000012567178399696468
armv7neon_mmm_f32_8x4_cortexa7 15 128 9 0.00003252514906200416
armv7neon_mmm_f32_8x6_generic 25 128 13 0.00006413295578186804
armv7neon_mmm_f32_8x6_generic 7 4 11 0.0000023405037923286433
generic_f32_4x4 7 32 12 0.000008239110132919408
armv7neon_mmm_f32_8x6_cortexa9 23 128 6 0.00001597150399238562
armv7neon_mmm_f32_8x4_generic 25 4 13 0.000009863674729272788
armv7neon_mmm_f32_8x4_cortexa7 16 128 5 0.00002172651909764639
armv7neon_mmm_f32_8x6_cortexa7 15 32 12 0.000009468415752921276
armv7neon_mmm_f32_8x6_cortexa9 24 4 11 0.000004968600626897013
armv7neon_mmm_f32_8x4_generic 24 128 13 0.00004635289274725685
armv7neon_mmm_f32_8x6_generic 23 128 5 0.000016196412852382603
armv7neon_mmm_f32_8x6_cortexa7 16 32 13 0.000013024830178376121
armv7neon_mmm_f32_8x4_cortexa9 9 32 3 0.0000033151175658832473
armv7neon_mmm_f32_8x6_generic 7 4 13 0.000003265379138781528
armv7neon_mmm_f32_8x4_cortexa9 17 32 13 0.000016459192383496957
armv7neon_mmm_f32_8x6_cortexa7 24 32 13 0.000019272536127977423
generic_f32_4x4 4 32 3 0.0000019007760051192194
generic_f32_4x4 9 128 8 0.000021954328754919918
armv7neon_mmm_f32_8x4_cortexa7 8 32 12 0.000005371382475600821
generic_f32_4x4 7 4 3 0.000001903414095718297
armv7neon_mmm_f32_8x6_generic 24 4 7 0.000004666010098240403
armv7neon_mmm_f32_8x4_cortexa9 16 32 8 0.000005563656358158919
generic_f32_4x4 3 32 9 0.000004602770059818119
armv7neon_mmm_f32_8x4_generic 9 32 13 0.000011194068458304182
armv7neon_mmm_f32_8x4_cortexa9 15 32 11 0.000008943973907120027
generic_f32_4x4 11 128 7 0.000022328923764966492
armv7neon_mmm_f32_8x6_generic 8 128 5 0.000005731544283554465
armv7neon_mmm_f32_8x6_cortexa9 8 128 11 0.00001076946555010939
armv7neon_mmm_f32_8x4_cortexa7 7 128 5 0.000011229094508072489
armv7neon_mmm_f32_8x4_cortexa9 15 128 9 0.000023961524252030835
generic_f32_4x4 11 128 5 0.000022274598932900346
generic_f32_4x4 5 128 3 0.000007857636386132928
generic_f32_4x4 4 128 7 0.00000774651849385228
armv7neon_mmm_f32_8x4_cortexa9 15 128 5 0.000016175167728379695
armv7neon_mmm_f32_8x6_cortexa7 17 4 5 0.0000031159889361188713
generic_f32_4x4 12 4 11 0.000005685376438998431
armv7neon_mmm_f32_8x4_cortexa7 8 32 5 0.000003934735618425319
generic_f32_4x4 4 32 4 0.0000017328412461691005
armv7neon_mmm_f32_8x4_cortexa7 8 128 11 0.000016332637899779713
armv7neon_mmm_f32_8x6_cortexa9 15 32 17 0.000011527836971780865
armv7neon_mmm_f32_8x4_generic 15 4 9 0.000004398255246018968
armv7neon_mmm_f32_8x4_cortexa9 15 32 8 0.000005965121319547414
armv7neon_mmm_f32_8x6_cortexa7 8 4 11 0.0000020902150325449643
armv7neon_mmm_f32_8x6_cortexa7 15 4 13 0.000005648201530441972
armv7neon_mmm_f32_8x6_cortexa9 23 128 18 0.00004690308298866514
armv7neon_mmm_f32_8x6_cortexa7 17 128 18 0.00006100420785992372
generic_f32_4x4 3 32 3 0.000001876030465879534
generic_f32_4x4 4 128 8 0.000007586370052411088
armv7neon_mmm_f32_8x6_cortexa9 8 128 18 0.000015532022218663232
armv7neon_mmm_f32_8x4_cortexa7 17 4 11 0.000006454744732596089
armv7neon_mmm_f32_8x6_cortexa9 15 4 11 0.0000038999665208551116
armv7neon_mmm_f32_8x6_cortexa9 25 128 17 0.00006218409021295646
armv7neon_mmm_f32_8x6_cortexa7 9 128 7 0.000027592791728379908
armv7neon_mmm_f32_8x4_cortexa7 17 128 7 0.0000324174579838398
armv7neon_mmm_f32_8x6_generic 17 32 13 0.00001566115346007812
generic_f32_4x4 9 4 4 0.0000022019063671444208
armv7neon_mmm_f32_8x4_cortexa9 17 128 11 0.0000353036171900936
armv7neon_mmm_f32_8x4_cortexa7 17 32 12 0.0000154170992202297
generic_f32_4x4 4 4 12 0.0000020915219079462592
armv7neon_mmm_f32_8x4_generic 16 4 3 0.000001964189345127758
generic_f32_4x4 11 4 3 0.0000025875707471480883
armv7neon_mmm_f32_8x4_cortexa7 24 32 8 0.000010240674992864977
armv7neon_mmm_f32_8x4_cortexa7 24 4 7 0.00000464383472294094
armv7neon_mmm_f32_8x6_generic 24 128 17 0.00004653762565754179
armv7neon_mmm_f32_8x4_cortexa9 9 128 5 0.000015984472800286198
armv7neon_mmm_f32_8x6_cortexa9 17 32 13 0.000015793322899399953
armv7neon_mmm_f32_8x4_cortexa7 23 128 9 0.00004832080937818461
armv7neon_mmm_f32_8x6_generic 17 4 12 0.0000043886752215018795
armv7neon_mmm_f32_8x6_generic 24 4 17 0.000006718970310010203
armv7neon_mmm_f32_8x6_cortexa9 23 4 12 0.000005053037256901505
armv7neon_mmm_f32_8x4_generic 25 128 13 0.00006314133455535656
generic_f32_4x4 9 128 7 0.000022234980219629062
armv7neon_mmm_f32_8x6_cortexa7 7 128 7 0.000014376478927807395
armv7neon_mmm_f32_8x6_cortexa7 16 128 5 0.0000143212403106337
armv7neon_mmm_f32_8x6_generic 25 32 6 0.000007045905360966285
armv7neon_mmm_f32_8x6_generic 25 32 17 0.000020892213993308648
armv7neon_mmm_f32_8x6_cortexa9 24 32 7 0.000010711106258433335
armv7neon_mmm_f32_8x4_cortexa7 15 32 7 0.000007644594488957809
armv7neon_mmm_f32_8x4_generic 9 32 7 0.0000059612924557323865
armv7neon_mmm_f32_8x4_cortexa7 24 32 5 0.000010813567327083286
generic_f32_4x4 11 32 9 0.000012181640310257489
armv7neon_mmm_f32_8x4_generic 8 32 11 0.0000044856528446547376
armv7neon_mmm_f32_8x6_cortexa7 8 128 5 0.00000740698393210008
armv7neon_mmm_f32_8x6_generic 9 4 13 0.00000467415217244372
armv7neon_mmm_f32_8x4_cortexa9 8 32 5 0.0000031557846756887957
armv7neon_mmm_f32_8x6_generic 9 32 6 0.000003833293115622011
generic_f32_4x4 5 128 11 0.000022197811800866368
armv7neon_mmm_f32_8x4_cortexa7 8 4 8 0.000001630228451378558
armv7neon_mmm_f32_8x6_cortexa7 7 128 18 0.00002168993830362326
armv7neon_mmm_f32_8x4_generic 16 4 7 0.000003012988093329698
armv7neon_mmm_f32_8x6_generic 9 32 5 0.0000040586320817732015
armv7neon_mmm_f32_8x4_generic 8 128 11 0.000012020324272970633
armv7neon_mmm_f32_8x4_cortexa9 9 4 13 0.00000525401614258308
armv7neon_mmm_f32_8x6_cortexa9 17 32 5 0.000005959198344084862
armv7neon_mmm_f32_8x6_cortexa9 25 128 7 0.00004136188393626292
armv7neon_mmm_f32_8x6_cortexa7 8 4 6 0.0000011677036715350285
armv7neon_mmm_f32_8x6_cortexa7 7 32 11 0.000005167860829086469
armv7neon_mmm_f32_8x4_generic 24 4 11 0.000005847812768700571
armv7neon_mmm_f32_8x4_generic 17 128 8 0.0000233987853798507
armv7neon_mmm_f32_8x4_cortexa9 7 128 8 0.000008435297227106425
armv7neon_mmm_f32_8x4_generic 8 4 4 0.000000981928574441578
generic_f32_4x4 13 32 8 0.000010469226231367714
generic_f32_4x4 12 4 7 0.000004168142600643075
armv7neon_mmm_f32_8x6_generic 25 128 5 0.000021400005561991285
armv7neon_mmm_f32_8x6_cortexa9 23 128 19 0.00006309986803011645
armv7neon_mmm_f32_8x6_cortexa7 17 32 5 0.000007188792393610533
armv7neon_mmm_f32_8x6_cortexa9 7 32 17 0.00000637568429598606
armv7neon_mmm_f32_8x6_cortexa9 25 32 19 0.000027309010551769435
armv7neon_mmm_f32_8x4_cortexa7 7 32 8 0.000004145572586004413
armv7neon_mmm_f32_8x4_cortexa9 24 32 9 0.000012468417684719209
armv7neon_mmm_f32_8x6_cortexa7 23 128 7 0.00004157442562492112


================================================
FILE: linalg/src/arm32.rs
================================================
use std::{env, fs};
pub mod armv7neon;
mod armvfpv2;
mod cortex_a7;
mod cortex_a9;
use armv7neon::*;

use crate::frame::element_wise::ElementWiseKer;

use crate::Ops;

fn has_neon_cpuinfo() -> std::io::Result<bool> {
    let cpu_info = fs::read_to_string("/proc/cpuinfo")?;
    let neon = cpu_info.split("\n").any(|line| {
        line.starts_with("Features") && (line.contains("neon") || line.contains("asimd"))
    });
    Ok(neon)
}

fn cpu_part() -> Option<usize> {
    fs::read_to_string("/proc/cpuinfo").ok().and_then(|cpuinfo| {
        cpuinfo
            .lines()
            .find(|line| line.starts_with("CPU part"))
            .and_then(|s| s.trim().split_whitespace().last())
            .and_then(|s| s.strip_prefix("0x"))
            .and_then(|s| usize::from_str_radix(s, 16).ok())
    })
}

fn has_neon() -> bool {
    if let Ok(v) = env::var("TRACT_CPU_ARM32_NEON") {
        return v == "true" || v == "1";
    }
    has_neon_cpuinfo().unwrap_or(false)
}

pub fn plug(ops: &mut Ops) {
    if has_neon() {
        log::info!("armv7neon activated (smmm, ssigmoid), stanh)");
        armv7neon::plug(ops);

        let cpu = cpu_part().unwrap_or(0);

        fn prefer_8x4(_m: Option<usize>, _k: Option<usize>, n: Option<usize>) -> bool {
            n.map(|n| n % 4 == 0 && n % 6 != 0 && n <= 12).unwrap_or(false)
        }

        let cost_managed_impls = vec![
            armv7neon_mmm_f32_8x4_cortexa7.mmm(),
            armv7neon_mmm_f32_8x6_cortexa7.mmm(),
            armv7neon_mmm_f32_8x4_cortexa9.mmm(),
            armv7neon_mmm_f32_8x6_cortexa9.mmm(),
            armv7neon_mmm_f32_8x4_generic.mmm(),
            armv7neon_mmm_f32_8x6_generic.mmm(),
            crate::generic::mmm::generic_f32_4x4.mmm(),
        ];
        ops.mmv_f32 = match cpu {
            0xc07 => Box::new(|_, _| armv7neon::armv7neon_mmm_f32_32x1_cortexa7.mmm()),
            0xc09 => Box::new(|_, _| armv7neon::armv7neon_mmm_f32_32x1_cortexa9.mmm()),
            _ => Box::new(|_, _| armv7neon::armv7neon_mmm_f32_32x1_generic.mmm()),
        };

        ops.mmm_f32 = match cpu {
            0xc07 => {
                let model = cortex_a7::model();
                Box::new(move |m, k, n| model.pick(&cost_managed_impls, m, k, n))
            }
            0xc09 => {
                let model = cortex_a9::model();
                Box::new(move |m, k, n| model.pick(&cost_managed_impls, m, k, n))
            }
            _ => Box::new(|m, k, n| {
                if prefer_8x4(m, k, n) {
                    armv7neon::armv7neon_mmm_f32_8x4_generic.mmm()
                } else {
                    armv7neon::armv7neon_mmm_f32_8x6_generic.mmm()
                }
            }),
        };
        ops.qmmm_i32 = Box::new(|_, _, _| armv7neon::armv7neon_mmm_i32_8x4.mmm());
        ops.qmmv_i32 = Box::new(|_, _| armv7neon::armv7neon_mmm_i32_32x1.mmm());
        ops.sigmoid_f32 = Box::new(|| armv7neon_sigmoid_f32_4n::ew());
        ops.tanh_f32 = Box::new(|| armv7neon_tanh_f32_4n::ew());
    } else {
        armvfpv2::plug(ops);
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn may_have_neon() {
        println!("Has neon ? {:?}", has_neon());
        if let Ok(neon) = env::var("TRACT_CPU_EXPECT_ARM32_NEON") {
            assert_eq!(neon == "true", has_neon());
        }
    }
}


================================================
FILE: linalg/src/arm64/apple_amx.rs
================================================
use crate::Ops;
use crate::frame::mmm::ImplementationQuality::ManuallyOptimized;
use crate::mmm::*;
use tract_data::prelude::*;

use super::has_amx;

const AMX: fn() -> bool = crate::arm64::has_amx;
const CAN_FUSE: fn(&FusedSpec) -> bool = |f| !matches!(f, &FusedSpec::LeakyRelu(_));

MMMExternKernel!(apple_amx_mmm_f32_32x32<f32>(32, 32)@(128, 128) where(AMX) can_fuse(CAN_FUSE) quality(ManuallyOptimized));
MMMExternKernel!(apple_amx_mmm_f32_32x1<f32>(32, 1)@(128, 128) where(AMX) can_fuse(CAN_FUSE) quality(ManuallyOptimized));
MMMExternKernel!(apple_amx_mmm_f16_64x32<f16>(64, 32)@(128, 128) where(AMX) can_fuse(CAN_FUSE) quality(ManuallyOptimized));
MMMExternKernel!(apple_amx_mmm_f16_64x1<f16>(64, 1)@(128, 128) where(AMX) can_fuse(CAN_FUSE) quality(ManuallyOptimized));

pub fn plug(ops: &mut Ops) {
    if has_amx() {
        log::info!("AMX optimisation activated");
        ops.mmm_f16 = Box::new(|_, _, _| apple_amx_mmm_f16_64x32.mmm());
        ops.mmm_f32 = Box::new(|_, _, _| apple_amx_mmm_f32_32x32.mmm());
        ops.mmv_f16 = Box::new(|_, _| apple_amx_mmm_f16_64x1.mmm());
        ops.mmv_f32 = Box::new(|_, _| apple_amx_mmm_f32_32x1.mmm());
        ops.mmm_impls.extend_from_slice(&[
            apple_amx_mmm_f32_32x32.mmm(),
            apple_amx_mmm_f32_32x1.mmm(),
            apple_amx_mmm_f16_64x32.mmm(),
            apple_amx_mmm_f16_64x1.mmm(),
        ]);
    } else {
        log::info!("No AMX optimisation");
    }
}


================================================
FILE: linalg/src/arm64/arm64fp16/by_scalar.rs
================================================
use crate::f16;

by_scalar_impl_wrap!(
    f16,
    arm64fp16_mul_by_scalar_f16_32n,
    32,
    4,
    f16,
    fn run(buf: &mut [f16], s: f16) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], s: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
            dup v0.8h, v0.h[0]
            2:
                ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}]
                fmul v4.8h, v4.8h, v0.8h
                fmul v5.8h, v5.8h, v0.8h
                fmul v6.8h, v6.8h, v0.8h
                fmul v7.8h, v7.8h, v0.8h
                st1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                subs {len}, {len}, 32
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s.to_bits(),
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            }
        }
        unsafe { run(buf, s) }
    }
);

by_scalar_impl_wrap!(
    f16,
    arm64fp16_add_by_scalar_f16_32n,
    32,
    4,
    f16,
    fn run(buf: &mut [f16], s: f16) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], s: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
            dup v0.8h, v0.h[0]
            2:
                ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}]
                fadd v4.8h, v4.8h, v0.8h
                fadd v5.8h, v5.8h, v0.8h
                fadd v6.8h, v6.8h, v0.8h
                fadd v7.8h, v7.8h, v0.8h
                st1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                subs {len}, {len}, 32
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s.to_bits(),
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            }
        }
        unsafe { run(buf, s) }
    }
);

by_scalar_impl_wrap!(
    f16,
    arm64fp16_sub_by_scalar_f16_32n,
    32,
    4,
    f16,
    fn run(buf: &mut [f16], s: f16) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], s: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
            dup v0.8h, v0.h[0]
            2:
                ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}]
                fsub v4.8h, v4.8h, v0.8h
                fsub v5.8h, v5.8h, v0.8h
                fsub v6.8h, v6.8h, v0.8h
                fsub v7.8h, v7.8h, v0.8h
                st1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                subs {len}, {len}, 32
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s.to_bits(),
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            }
        }
        unsafe { run(buf, s) }
    }
);

by_scalar_impl_wrap!(
    f16,
    arm64fp16_subf_by_scalar_f16_32n,
    32,
    4,
    f16,
    fn run(buf: &mut [f16], s: f16) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], s: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
            dup v0.8h, v0.h[0]
            2:
                ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}]
                fsub v4.8h, v0.8h, v4.8h
                fsub v5.8h, v0.8h, v5.8h
                fsub v6.8h, v0.8h, v6.8h
                fsub v7.8h, v0.8h, v7.8h
                st1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                subs {len}, {len}, 32
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s.to_bits(),
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            }
        }
        unsafe { run(buf, s) }
    }
);

by_scalar_impl_wrap!(
    f16,
    arm64fp16_min_by_scalar_f16_32n,
    32,
    4,
    f16,
    fn run(buf: &mut [f16], s: f16) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], s: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
            dup v0.8h, v0.h[0]
            2:
                ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}]
                fmin v4.8h, v4.8h, v0.8h
                fmin v5.8h, v5.8h, v0.8h
                fmin v6.8h, v6.8h, v0.8h
                fmin v7.8h, v7.8h, v0.8h
                st1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                subs {len}, {len}, 32
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s.to_bits(),
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            }
        }
        unsafe { run(buf, s) }
    }
);

by_scalar_impl_wrap!(
    f16,
    arm64fp16_max_by_scalar_f16_32n,
    32,
    4,
    f16,
    fn run(buf: &mut [f16], s: f16) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], s: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
            dup v0.8h, v0.h[0]
            2:
                ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}]
                fmax v4.8h, v4.8h, v0.8h
                fmax v5.8h, v5.8h, v0.8h
                fmax v6.8h, v6.8h, v0.8h
                fmax v7.8h, v7.8h, v0.8h
                st1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                subs {len}, {len}, 32
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s.to_bits(),
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            }
        }
        unsafe { run(buf, s) }
    }
);

#[cfg(test)]
mod test_arm64fp16_mul_by_scalar_f16_32n {
    use super::*;
    by_scalar_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_mul_by_scalar_f16_32n,
        |a, b| a * b
    );
    by_scalar_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_add_by_scalar_f16_32n,
        |a, b| a + b
    );
    by_scalar_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_sub_by_scalar_f16_32n,
        |a, b| a - b
    );
    by_scalar_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_subf_by_scalar_f16_32n,
        |a, b| b - a
    );
    by_scalar_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_min_by_scalar_f16_32n,
        |a, b| a.min(b)
    );
    by_scalar_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_max_by_scalar_f16_32n,
        |a, b| a.max(b)
    );
}


================================================
FILE: linalg/src/arm64/arm64fp16/leaky_relu.rs
================================================
use tract_data::internal::f16;

ew_impl_wrap!(
    f16,
    arm64fp16_leaky_relu_f16_16n,
    16,
    8,
    f16,
    #[inline(never)]
    fn run(buf: &mut [f16], alpha: f16) {
        assert!(buf.len() % 8 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &mut [f16], alpha: f16) {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                std::arch::asm!("
                    dup v0.8h, {alpha:v}.h[0]
                    dup v1.8h, {one:v}.h[0]
                    2:
                        ldp q3, q4, [{ptr}]

                        fcmgt v5.8h, v3.8h, #0.0
                        fcmgt v6.8h, v4.8h, #0.0
                        bsl   v5.16b, v1.16b, v0.16b
                        bsl   v6.16b, v1.16b, v0.16b
                        fmul  v3.8h, v3.8h, v5.8h
                        fmul  v4.8h, v4.8h, v6.8h

                        stp q3, q4, [{ptr}], #32
                        subs {len}, {len}, 16
                        bne 2b
                ",
                one = in(vreg) f16::from_f32(1.0f32).to_bits(),
                alpha = in(vreg) alpha.to_bits(),
                len = inout(reg) len => _,
                ptr = inout(reg) ptr => _,
                out("v0") _,
                out("v1") _,
                out("q3") _,
                out("q4") _,
                out("q5") _,
                out("q6") _,
                );
            }
        }
        unsafe { run(buf, alpha) }
    }
);

#[cfg(test)]
pub mod test_arm64simd_leaky_relu_f16_16n {
    use super::*;
    leaky_relu_frame_tests!(crate::arm64::has_fp16(), f16, arm64fp16_leaky_relu_f16_16n);
}


================================================
FILE: linalg/src/arm64/arm64fp16/max.rs
================================================
use tract_data::half::f16;

reduce_impl_wrap!(
    f16,
    arm64fp16_max_f16_32n,
    32,
    8,
    (),
    f16::MIN,
    #[inline(never)]
    fn run(buf: &[f16], _: ()) -> f16 {
        assert!(buf.len() % 32 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &[f16]) -> f16 {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                let mut out: u16;
                std::arch::asm!("
                ins v0.h[0], {min:w}
                dup v0.8h, v0.h[0]
                dup v1.8h, v0.h[0]
                dup v2.8h, v0.h[0]
                dup v3.8h, v0.h[0]

                2:
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                    fmax v0.8h, v0.8h, v4.8h
                    fmax v1.8h, v1.8h, v5.8h
                    fmax v2.8h, v2.8h, v6.8h
                    fmax v3.8h, v3.8h, v7.8h

                    subs {len}, {len}, 32
                    bne 2b

                fmax v0.8h, v0.8h, v1.8h
                fmax v2.8h, v2.8h, v3.8h
                fmax v0.8h, v0.8h, v2.8h
                fmaxv h0, v0.8h
                ",
                // using v0 as inout triggers https://github.com/rust-lang/rust/issues/120374
                min = in(reg) f16::MIN.to_bits(),
                ptr = inout(reg) ptr => _,
                len = inout(reg) len => _,
                out("v0") out, out("v1") _, out("v2") _, out("v3") _,
                out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
                f16::from_bits(out)
            }
        }
        unsafe { run(buf) }
    },
    #[inline(never)]
    fn reduce_two(a: f16, b: f16) -> f16 {
        a.max(b)
    }
);

#[cfg(test)]
mod test_arm64fp16_max_f16_32n {
    use super::*;
    crate::max_frame_tests!(crate::arm64::has_fp16(), f16, arm64fp16_max_f16_32n);
}


================================================
FILE: linalg/src/arm64/arm64fp16/panel_extract.rs
================================================
use super::FP16;
use crate::Ops;
use crate::block_quant::{PackedBlockQuantFormat, Q4_0};
use crate::pack::Packing;
use tract_data::internal::*;

pub fn plug(ops: &mut Ops) {
    ops.panel_extractors.push(packed_64_q40_to_f16.clone());
}

panel_extractor!(kernel_packed_64_q40_to_f16 as packed_64_q40_to_f16(
    Box::new(PackedBlockQuantFormat::new(&Q4_0, 64, 16, true)),
    f16::packing(64).align(16)
) where(FP16));

#[target_feature(enable = "fp16")]
unsafe fn kernel_packed_64_q40_to_f16(input: *const u8, output: *mut u8, k: usize) {
    unsafe {
        if k == 0 {
            return;
        }
        let lookup_table: [u8; 16] = [
            0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc2, 0xc0, 0xbc, 0x00, 0x3c, 0x40, 0x42, 0x44, 0x45,
            0x46, 0x47,
        ];
        std::arch::asm!("
    ld1      {{v13.16b}}, [{lookup_table}]
    movi     v15.16b, 15
    eor      v12.16b, v12.16b, v12.16b

    2:
        add     {scales}, {i}, 1024  // scales at end: 32 (cols) * 64 (rows) / 2 (half byte)
        ld1     {{v16.16b-v19.16b}}, [{scales}], #64
        ld1     {{v20.16b-v23.16b}}, [{scales}]

        mov     {k2}, 32
    3:
        ld1     {{ v9.16b-v10.16b }}, [{i}], #32

        and     v0.16b, v9.16b, v15.16b
        ushr    v2.16b, v9.16b, 4

        and     v4.16b, v10.16b, v15.16b
        ushr    v6.16b, v10.16b, 4

        tbl     v0.16b, {{ v13.16b }}, v0.16b
        tbl     v2.16b, {{ v13.16b }}, v2.16b
        tbl     v4.16b, {{ v13.16b }}, v4.16b
        tbl     v6.16b, {{ v13.16b }}, v6.16b

        zip2    v1.16b, v12.16b, v0.16b
        zip2    v3.16b, v12.16b, v2.16b
        zip2    v5.16b, v12.16b, v4.16b
        zip2    v7.16b, v12.16b, v6.16b

        zip1    v0.16b, v12.16b, v0.16b
        zip1    v2.16b, v12.16b, v2.16b
        zip1    v4.16b, v12.16b, v4.16b
        zip1    v6.16b, v12.16b, v6.16b

        fmul    v0.8h, v0.8h, v16.8h
        fmul    v1.8h, v1.8h, v17.8h
        fmul    v2.8h, v2.8h, v18.8h
        fmul    v3.8h, v3.8h, v19.8h
        fmul    v4.8h, v4.8h, v20.8h
        fmul    v5.8h, v5.8h, v21.8h
        fmul    v6.8h, v6.8h, v22.8h
        fmul    v7.8h, v7.8h, v23.8h

        st1     {{v0.16b-v3.16b}}, [{o}], #64
        st1     {{v4.16b-v7.16b}}, [{o}], #64

        subs    {k2}, {k2}, #1
        bne     3b

        add     {i}, {i}, 128 // skip scales
        subs    {k}, {k}, 32
        bne     2b
            ",
        lookup_table = in(reg) &lookup_table,
        k = inout(reg) k => _,
        k2 = out(reg) _,
        scales = out(reg) _,
        i = inout(reg) input => _,
        o = inout(reg) output => _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        out("v16") _, out("v17") _, out("v18") _, out("v19") _,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        );
    }
}


================================================
FILE: linalg/src/arm64/arm64fp16/sum.rs
================================================
use crate::num_traits::Zero;
use tract_data::half::f16;

reduce_impl_wrap!(
    f16,
    arm64fp16_sum_f16_32n,
    32,
    8,
    (),
    f16::zero(),
    #[inline(never)]
    fn run(buf: &[f16], _: ()) -> f16 {
        assert!(buf.len() % 32 == 0);
        assert!(buf.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(buf: &[f16]) -> f16 {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                let mut out: u16;
                std::arch::asm!("
                movi v0.8h, #0
                movi v1.8h, #0
                movi v2.8h, #0
                movi v3.8h, #0
                2:
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{ptr}], 64
                    fadd v0.8h, v0.8h, v4.8h
                    fadd v1.8h, v1.8h, v5.8h
                    fadd v2.8h, v2.8h, v6.8h
                    fadd v3.8h, v3.8h, v7.8h

                    subs {len}, {len}, 32
                    bne 2b

                fadd v0.8h, v0.8h, v1.8h
                fadd v2.8h, v2.8h, v3.8h
                fadd v0.8h, v0.8h, v2.8h
                faddp v0.8h, v0.8h, v0.8h
                faddp v0.8h, v0.8h, v0.8h
                faddp v0.8h, v0.8h, v0.8h
                ",
                ptr = inout(reg) ptr => _,
                len = inout(reg) len => _,
                out("s0") out, out("v1") _, out("v2") _, out("v3") _,
                out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
                f16::from_bits(out)
            }
        }
        unsafe { run(buf) }
    },
    #[inline(never)]
    fn reduce_two(a: f16, b: f16) -> f16 {
        a + b
    }
);

#[cfg(test)]
mod test_arm64fp16_sum_f16_32n {
    use super::*;
    crate::sum_frame_tests!(crate::arm64::has_fp16(), f16, arm64fp16_sum_f16_32n);
}


================================================
FILE: linalg/src/arm64/arm64fp16/unicast.rs
================================================
use tract_data::half::f16;

unicast_impl_wrap!(
    f16,
    arm64fp16_unicast_mul_f16_32n,
    32,
    8,
    #[inline(never)]
    fn run(a: &mut [f16], b: &[f16]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 32 == 0);
        assert!(a.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(a: &mut [f16], b: &[f16]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}]
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{b_ptr}], 64
                    fmul v0.8h, v0.8h, v4.8h
                    fmul v1.8h, v1.8h, v5.8h
                    fmul v2.8h, v2.8h, v6.8h
                    fmul v3.8h, v3.8h, v7.8h
                    st1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}], 64
                    subs {len}, {len}, 32
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f16,
    arm64fp16_unicast_add_f16_32n,
    32,
    8,
    #[inline(never)]
    fn run(a: &mut [f16], b: &[f16]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 32 == 0);
        assert!(a.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(a: &mut [f16], b: &[f16]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}]
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{b_ptr}], 64
                    fadd v0.8h, v0.8h, v4.8h
                    fadd v1.8h, v1.8h, v5.8h
                    fadd v2.8h, v2.8h, v6.8h
                    fadd v3.8h, v3.8h, v7.8h
                    st1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}], 64
                    subs {len}, {len}, 32
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f16,
    arm64fp16_unicast_sub_f16_32n,
    32,
    8,
    #[inline(never)]
    fn run(a: &mut [f16], b: &[f16]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 32 == 0);
        assert!(a.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(a: &mut [f16], b: &[f16]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}]
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{b_ptr}], 64
                    fsub v0.8h, v0.8h, v4.8h
                    fsub v1.8h, v1.8h, v5.8h
                    fsub v2.8h, v2.8h, v6.8h
                    fsub v3.8h, v3.8h, v7.8h
                    st1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}], 64
                    subs {len}, {len}, 32
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f16,
    arm64fp16_unicast_subf_f16_32n,
    32,
    8,
    #[inline(never)]
    fn run(a: &mut [f16], b: &[f16]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 32 == 0);
        assert!(a.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(a: &mut [f16], b: &[f16]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}]
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{b_ptr}], 64
                    fsub v0.8h, v4.8h, v0.8h
                    fsub v1.8h, v5.8h, v1.8h
                    fsub v2.8h, v6.8h, v2.8h
                    fsub v3.8h, v7.8h, v3.8h
                    st1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}], 64
                    subs {len}, {len}, 32
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f16,
    arm64fp16_unicast_min_f16_32n,
    32,
    8,
    #[inline(never)]
    fn run(a: &mut [f16], b: &[f16]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 32 == 0);
        assert!(a.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(a: &mut [f16], b: &[f16]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}]
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{b_ptr}], 64
                    fmin v0.8h, v0.8h, v4.8h
                    fmin v1.8h, v1.8h, v5.8h
                    fmin v2.8h, v2.8h, v6.8h
                    fmin v3.8h, v3.8h, v7.8h
                    st1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}], 64
                    subs {len}, {len}, 32
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f16,
    arm64fp16_unicast_max_f16_32n,
    32,
    8,
    #[inline(never)]
    fn run(a: &mut [f16], b: &[f16]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 32 == 0);
        assert!(a.len() > 0);
        #[target_feature(enable = "fp16")]
        unsafe fn run(a: &mut [f16], b: &[f16]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}]
                    ld1 {{v4.8h, v5.8h, v6.8h, v7.8h}}, [{b_ptr}], 64
                    fmax v0.8h, v0.8h, v4.8h
                    fmax v1.8h, v1.8h, v5.8h
                    fmax v2.8h, v2.8h, v6.8h
                    fmax v3.8h, v3.8h, v7.8h
                    st1 {{v0.8h, v1.8h, v2.8h, v3.8h}}, [{a_ptr}], 64
                    subs {len}, {len}, 32
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

#[cfg(test)]
mod test_arm64fp16_unicast_mul_f16_32n {
    use super::*;
    use proptest::strategy::Strategy;
    crate::unicast_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_unicast_mul_f16_32n,
        |a, b| a * b
    );
    crate::unicast_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_unicast_add_f16_32n,
        |a, b| a + b
    );
    crate::unicast_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_unicast_sub_f16_32n,
        |a, b| a - b
    );
    crate::unicast_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_unicast_subf_f16_32n,
        |a, b| b - a
    );
    crate::unicast_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_unicast_min_f16_32n,
        |a, b| a.min(b)
    );
    crate::unicast_frame_tests!(
        crate::arm64::has_fp16(),
        f16,
        arm64fp16_unicast_max_f16_32n,
        |a, b| a.max(b)
    );
}


================================================
FILE: linalg/src/arm64/arm64fp16.rs
================================================
use tract_data::half::f16;

mod by_scalar;
mod leaky_relu;
mod max;
pub mod panel_extract;
mod sum;
mod unicast;
pub use by_scalar::*;
pub use leaky_relu::*;
pub use max::*;
pub use sum::*;
pub use unicast::*;

use crate::Ops;
use crate::block_quant::PackedBlockQuantFormat;
use crate::block_quant::Q4_0;
use crate::frame::mmm::ImplementationQuality::ManuallyOptimized;

const FP16: fn() -> bool = crate::arm64::has_fp16;

MMMExternKernel!(arm64fp16_mmm_f16_16x8_gen<f16>(16, 8)@(16, 16) where(FP16) quality(ManuallyOptimized));
MMMExternKernel!(arm64fp16_mmm_f16_16x8_a55<f16>(16, 8)@(16, 16) where(FP16) quality(ManuallyOptimized));
MMMExternKernel!(arm64fp16_mmm_f16_32x4_gen<f16>(32, 4)@(16, 16) where(FP16) quality(ManuallyOptimized));
MMMExternKernel!(arm64fp16_mmm_f16_32x4_a55<f16>(32, 4)@(16, 16) where(FP16) quality(ManuallyOptimized));
MMMExternKernel!(arm64fp16_mmm_f16_128x1_gen<f16>(128,1)@(16, 16) where(FP16) quality(ManuallyOptimized));
MMMExternKernel!(arm64fp16_mmm_f16_128x1_a55<f16>(128,1)@(16, 16) where(FP16) quality(ManuallyOptimized));

MMMExternKernel!(arm64fp16_mmm_f16_64x3_gen<f16>(64, 3)@(16, 16) where(FP16) quality(ManuallyOptimized));
MMMExternKernel!(arm64fp16_mmm_f16_32x6_gen<f16>(32, 6)@(16, 16) where(FP16) quality(ManuallyOptimized));

MMMExternKernel! { arm64fp16_mmm_f16_64x1_gen<f16>(64, 1)@(16, 16) where(FP16)
    packing[1] = q40f16z16se => |k| k.with_packing_a(PackedBlockQuantFormat::new(&Q4_0, 64, 16, true));
    packing[2] = q40f16z16 => |k| k.with_packing_a(PackedBlockQuantFormat::new(&Q4_0, 64, 16, false));
    quality(ManuallyOptimized)
}

pub fn plug(ops: &mut Ops) {
    panel_extract::plug(ops);
    ops.mmm_impls.extend_from_slice(&[
        arm64fp16_mmm_f16_16x8_a55.mmm(),
        arm64fp16_mmm_f16_16x8_gen.mmm(),
        arm64fp16_mmm_f16_32x4_a55.mmm(),
        arm64fp16_mmm_f16_32x4_gen.mmm(),
        arm64fp16_mmm_f16_128x1_a55.mmm(),
        arm64fp16_mmm_f16_128x1_gen.mmm(),
        arm64fp16_mmm_f16_64x3_gen.mmm(),
        arm64fp16_mmm_f16_32x6_gen.mmm(),
        arm64fp16_mmm_f16_64x1_gen.mmm(),
    ]);
}

tanh_impl!(f16, arm64fp16_tanh_f16_8n, 8, 8, crate::arm64::has_fp16());
sigmoid_impl!(f16, arm64fp16_sigmoid_f16_8n, 8, 8, crate::arm64::has_fp16());

#[cfg(test)]
mod test {

    #[test]
    fn kits() {
        let mut ops = crate::generic();
        super::plug(&mut ops);
    }
}


================================================
FILE: linalg/src/arm64/arm64simd/by_scalar.rs
================================================
by_scalar_impl_wrap!(
    f32,
    arm64simd_mul_by_scalar_f32_16n,
    16,
    4,
    f32,
    fn run(buf: &mut [f32], s: f32) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
            dup v0.4s, v0.s[0]
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}]
                fmul v4.4s, v4.4s, v0.4s
                fmul v5.4s, v5.4s, v0.4s
                fmul v6.4s, v6.4s, v0.4s
                fmul v7.4s, v7.4s, v0.4s
                st1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
        }
    }
);

by_scalar_impl_wrap!(
    f32,
    arm64simd_add_by_scalar_f32_16n,
    16,
    4,
    f32,
    fn run(buf: &mut [f32], s: f32) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
            dup v0.4s, v0.s[0]
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}]
                fadd v4.4s, v4.4s, v0.4s
                fadd v5.4s, v5.4s, v0.4s
                fadd v6.4s, v6.4s, v0.4s
                fadd v7.4s, v7.4s, v0.4s
                st1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
        }
    }
);

by_scalar_impl_wrap!(
    f32,
    arm64simd_sub_by_scalar_f32_16n,
    16,
    4,
    f32,
    fn run(buf: &mut [f32], s: f32) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
            dup v0.4s, v0.s[0]
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}]
                fsub v4.4s, v4.4s, v0.4s
                fsub v5.4s, v5.4s, v0.4s
                fsub v6.4s, v6.4s, v0.4s
                fsub v7.4s, v7.4s, v0.4s
                st1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
        }
    }
);

by_scalar_impl_wrap!(
    f32,
    arm64simd_subf_by_scalar_f32_16n,
    16,
    4,
    f32,
    fn run(buf: &mut [f32], s: f32) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
            dup v0.4s, v0.s[0]
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}]
                fsub v4.4s, v0.4s, v4.4s
                fsub v5.4s, v0.4s, v5.4s
                fsub v6.4s, v0.4s, v6.4s
                fsub v7.4s, v0.4s, v7.4s
                st1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
        }
    }
);

by_scalar_impl_wrap!(
    f32,
    arm64simd_min_by_scalar_f32_16n,
    16,
    4,
    f32,
    fn run(buf: &mut [f32], s: f32) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
            dup v0.4s, v0.s[0]
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}]
                fmin v4.4s, v4.4s, v0.4s
                fmin v5.4s, v5.4s, v0.4s
                fmin v6.4s, v6.4s, v0.4s
                fmin v7.4s, v7.4s, v0.4s
                st1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
        }
    }
);

by_scalar_impl_wrap!(
    f32,
    arm64simd_max_by_scalar_f32_16n,
    16,
    4,
    f32,
    fn run(buf: &mut [f32], s: f32) {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
            dup v0.4s, v0.s[0]
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}]
                fmax v4.4s, v4.4s, v0.4s
                fmax v5.4s, v5.4s, v0.4s
                fmax v6.4s, v6.4s, v0.4s
                fmax v7.4s, v7.4s, v0.4s
                st1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            in("v0") s,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
        }
    }
);

#[cfg(test)]
mod test_arm64simd_mul_by_scalar_f32_16n {
    use super::*;
    by_scalar_frame_tests!(true, f32, arm64simd_mul_by_scalar_f32_16n, |a, b| a * b);
    by_scalar_frame_tests!(true, f32, arm64simd_add_by_scalar_f32_16n, |a, b| a + b);
    by_scalar_frame_tests!(true, f32, arm64simd_sub_by_scalar_f32_16n, |a, b| a - b);
    by_scalar_frame_tests!(true, f32, arm64simd_subf_by_scalar_f32_16n, |a, b| b - a);
    by_scalar_frame_tests!(true, f32, arm64simd_min_by_scalar_f32_16n, |a, b| a.min(b));
    by_scalar_frame_tests!(true, f32, arm64simd_max_by_scalar_f32_16n, |a, b| a.max(b));
}


================================================
FILE: linalg/src/arm64/arm64simd/leaky_relu.rs
================================================
ew_impl_wrap!(
    f32,
    arm64simd_leaky_relu_f32_8n,
    8,
    4,
    f32,
    #[inline(never)]
    fn run(buf: &mut [f32], alpha: f32) {
        assert!(buf.len() % 8 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            std::arch::asm!("
                dup v0.4s, {alpha:v}.s[0]
                dup v1.4s, {one:v}.s[0]
                2:
                    ldp q3, q4, [{ptr}]

                    fcmgt v5.4s, v3.4s, #0.0
                    fcmgt v6.4s, v4.4s, #0.0
                    bsl   v5.16b, v1.16b, v0.16b
                    bsl   v6.16b, v1.16b, v0.16b
                    fmul  v3.4s, v3.4s, v5.4s
                    fmul  v4.4s, v4.4s, v6.4s

                    stp q3, q4, [{ptr}], #32
                    subs {len}, {len}, 8
                    bne 2b
            ",
            one = in(vreg) 1.0f32,
            alpha = in(vreg) alpha,
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            out("v0") _,
            out("v1") _,
            out("q3") _,
            out("q4") _,
            out("q5") _,
            out("q6") _,
            );
        }
    }
);

#[cfg(test)]
pub mod test_arm64simd_leaky_relu_f32_8n {
    use super::*;
    leaky_relu_frame_tests!(true, f32, arm64simd_leaky_relu_f32_8n);
}


================================================
FILE: linalg/src/arm64/arm64simd/max.rs
================================================
use std::arch::aarch64::{float32x4_t, vdupq_n_f32, vgetq_lane_f32};

reduce_impl_wrap!(
    f32,
    arm64simd_max_f32_16n,
    16,
    4,
    (),
    f32::MIN,
    #[inline(never)]
    fn run(buf: &[f32], _: ()) -> f32 {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe {
            let len = buf.len();
            let ptr = buf.as_ptr();
            let mut out: float32x4_t = vdupq_n_f32(f32::MIN);
            std::arch::asm!("
            and v1.16b, v0.16b, v0.16b
            and v2.16b, v0.16b, v0.16b
            and v3.16b, v0.16b, v0.16b
            2:
                ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                fmax v0.4s, v0.4s, v4.4s
                fmax v1.4s, v1.4s, v5.4s
                fmax v2.4s, v2.4s, v6.4s
                fmax v3.4s, v3.4s, v7.4s
                subs {len}, {len}, 16
                bne 2b
            fmax v0.4s, v0.4s, v1.4s
            fmax v2.4s, v2.4s, v3.4s
            fmax v0.4s, v0.4s, v2.4s
            fmaxv s0, v0.4s
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            inout("v0") out, out("v1") _, out("v2") _, out("v3") _,
            out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
            vgetq_lane_f32(out, 0)
        }
    },
    #[inline(never)]
    fn reduce_two(a: f32, b: f32) -> f32 {
        a.max(b)
    }
);

#[cfg(test)]
mod test_arm64simd_max_f32_16n {
    use super::*;
    crate::max_frame_tests!(true, f32, arm64simd_max_f32_16n);
}


================================================
FILE: linalg/src/arm64/arm64simd/panel_extract.rs
================================================
use crate::Ops;
use crate::pack::Packing;

pub fn plug(ops: &mut Ops) {
    ops.panel_extractors.push(packed_32_q40_to_f32.clone());
}

panel_extractor!(kernel_packed_32_q40_to_f32 as packed_32_q40_to_f32(
    Box::new(super::q40p32z16se()),
    f32::packing(32).align(16)
));

unsafe fn kernel_packed_32_q40_to_f32(input: *const u8, output: *mut u8, k: usize) {
    unsafe {
        if k == 0 {
            return;
        }
        let lookup_table: [u8; 16] = [
            0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc2, 0xc0, 0xbc, 0x00, 0x3c, 0x40, 0x42, 0x44, 0x45,
            0x46, 0x47,
        ];
        std::arch::asm!("
    ld1      {{v13.16b}}, [{lookup_table}]
    movi     v15.16b, 15
    eor      v12.16b, v12.16b, v12.16b

    2:
        add     {scales}, {i}, 512  // scales at end: 32 (cols) * 32 (rows) / 2 (half byte)
        ld1     {{v0.8h-v3.8h}}, [{scales}]

        fcvtl   v16.4s, v0.4h
        fcvtl2  v17.4s, v0.8h
        fcvtl   v18.4s, v1.4h
        fcvtl2  v19.4s, v1.8h
        fcvtl   v20.4s, v2.4h
        fcvtl2  v21.4s, v2.8h
        fcvtl   v22.4s, v3.4h
        fcvtl2  v23.4s, v3.8h

        mov     {k2}, 32
    3:
        ld1     {{ v9.16b }}, [{i}], #16

        and     v0.16b, v9.16b, v15.16b
        ushr    v4.16b, v9.16b, 4

        tbl     v0.16b, {{ v13.16b }}, v0.16b
        tbl     v4.16b, {{ v13.16b }}, v4.16b

        zip2    v2.16b, v12.16b, v0.16b
        zip2    v6.16b, v12.16b, v4.16b

        zip1    v0.16b, v12.16b, v0.16b
        zip1    v4.16b, v12.16b, v4.16b

        fcvtl2  v1.4s, v0.8h
        fcvtl   v0.4s, v0.4h
        fcvtl2  v3.4s, v2.8h
        fcvtl   v2.4s, v2.4h
        fcvtl2  v5.4s, v4.8h
        fcvtl   v4.4s, v4.4h
        fcvtl2  v7.4s, v6.8h
        fcvtl   v6.4s, v6.4h

        fmul    v0.4s, v0.4s, v16.4s
        fmul    v1.4s, v1.4s, v17.4s
        fmul    v2.4s, v2.4s, v18.4s
        fmul    v3.4s, v3.4s, v19.4s
        fmul    v4.4s, v4.4s, v20.4s
        fmul    v5.4s, v5.4s, v21.4s
        fmul    v6.4s, v6.4s, v22.4s
        fmul    v7.4s, v7.4s, v23.4s

        st1     {{v0.16b-v3.16b}}, [{o}], #64
        st1     {{v4.16b-v7.16b}}, [{o}], #64

        subs    {k2}, {k2}, #1
        bne     3b

        add     {i}, {i}, 64 // skip scales
        subs    {k}, {k}, 32
        bne     2b
            ",
        lookup_table = in(reg) &lookup_table,
        k = inout(reg) k => _,
        k2 = out(reg) _,
        scales = out(reg) _,
        i = inout(reg) input => _,
        o = inout(reg) output => _,
        out("v0") _, out("v1") _, out("v2") _, out("v3") _,
        out("v4") _, out("v5") _, out("v6") _, out("v7") _,
        out("v8") _, out("v9") _, out("v10") _, out("v11") _,
        out("v12") _, out("v13") _, out("v14") _, out("v15") _,
        out("v16") _, out("v17") _, out("v18") _, out("v19") _,
        out("v20") _, out("v21") _, out("v22") _, out("v23") _,
        );
    }
}


================================================
FILE: linalg/src/arm64/arm64simd/softmax.rs
================================================
map_reduce_impl_wrap!(
    f32,
    arm64simd_softmax2_fastcompact_f32_16n,
    16,
    4,
    f32,
    f32::MIN,
    0f32,
    #[inline(never)]
    fn run(buf: &mut [f32], max: f32) -> f32 {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        let len = buf.len();
        let ptr = buf.as_ptr();
        let mut acc;
        const MLN2: f32 = 0.6931471805f32;
        const A: f32 = 8388608.0f32;
        const B: f32 = 1065353216.0f32;
        const C: f32 = 60801.0f32;
        const SLOPE: f32 = A / MLN2;
        const OFFSET: f32 = B - C;
        unsafe {
            std::arch::asm!("
            // v0-v3 sum acc
            eor v0.16b, v0.16b, v0.16b
            eor v1.16b, v1.16b, v1.16b
            eor v2.16b, v2.16b, v2.16b
            eor v3.16b, v3.16b, v3.16b

            dup v4.4s, v4.s[0] // max
            dup v5.4s, v5.s[0] // slope
            dup v6.4s, v6.s[0] // offset
            eor v7.16b, v7.16b, v7.16b // zero for max
            2:
                ld1 {{v8.4s, v9.4s, v10.4s, v11.4s}}, [{ptr}]

                fsub v8.4s, v8.4s, v4.4s
                fsub v9.4s, v9.4s, v4.4s
                fsub v10.4s, v10.4s, v4.4s
                fsub v11.4s, v11.4s, v4.4s

                fmul v8.4s, v8.4s, v5.4s
                fmul v9.4s, v9.4s, v5.4s
                fmul v10.4s, v10.4s, v5.4s
                fmul v11.4s, v11.4s, v5.4s

                fadd v8.4s, v8.4s, v6.4s
                fadd v9.4s, v9.4s, v6.4s
                fadd v10.4s, v10.4s, v6.4s
                fadd v11.4s, v11.4s, v6.4s

                fmax v8.4s, v8.4s, v7.4s
                fmax v9.4s, v9.4s, v7.4s
                fmax v10.4s, v10.4s, v7.4s
                fmax v11.4s, v11.4s, v7.4s

                fcvtnu v8.4s, v8.4s
                fcvtnu v9.4s, v9.4s
                fcvtnu v10.4s, v10.4s
                fcvtnu v11.4s, v11.4s

                fadd v0.4s, v0.4s, v8.4s
                fadd v1.4s, v1.4s, v9.4s
                fadd v2.4s, v2.4s, v10.4s
                fadd v3.4s, v3.4s, v11.4s

                st1 {{v8.4s, v9.4s, v10.4s, v11.4s}}, [{ptr}], 64
                subs {len}, {len}, 16
                bne 2b

            fadd v0.4s, v0.4s, v1.4s
            fadd v2.4s, v2.4s, v3.4s
            fadd v0.4s, v0.4s, v2.4s

            ext v1.16b, v0.16b, v0.16b, 4
            ext v2.16b, v0.16b, v0.16b, 8
            ext v3.16b, v0.16b, v0.16b, 12
            fadd v0.4s, v0.4s, v1.4s
            fadd v2.4s, v2.4s, v3.4s
            fadd v0.4s, v0.4s, v2.4s
            ",
            len = inout(reg) len => _,
            ptr = inout(reg) ptr => _,
            out("v0") acc,
            out("v1") _,
            out("v2") _,
            out("v3") _,
            inout("v4") max => _,
            inout("v5") SLOPE => _,
            inout("v6") OFFSET => _,
            out("v7") _,
            out("v8") _,
            out("v9") _,
            out("v10") _,
            out("v11") _,
            );
        }
        acc
    },
    #[inline(never)]
    fn reduce_two(a: f32, b: f32) -> f32 {
        a + b
    }
);

#[cfg(test)]
mod test_arm64simd_softmax2_fastcompact_f32_16n {
    use super::*;
    crate::softmax_l2_frame_tests!(true, f32, arm64simd_softmax2_fastcompact_f32_16n);
}


================================================
FILE: linalg/src/arm64/arm64simd/sum.rs
================================================
use crate::num_traits::Zero;

reduce_impl_wrap!(
    f32,
    arm64simd_sum_f32_16n,
    16,
    4,
    (),
    f32::zero(),
    #[inline(never)]
    fn run(buf: &[f32], _: ()) -> f32 {
        assert!(buf.len() % 16 == 0);
        assert!(buf.len() > 0);
        unsafe fn run(buf: &[f32]) -> f32 {
            unsafe {
                let len = buf.len();
                let ptr = buf.as_ptr();
                let mut out: u32;
                std::arch::asm!("
                movi v0.4s, #0
                movi v1.4s, #0
                movi v2.4s, #0
                movi v3.4s, #0
                2:
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{ptr}], 64
                    fadd v0.4s, v0.4s, v4.4s
                    fadd v1.4s, v1.4s, v5.4s
                    fadd v2.4s, v2.4s, v6.4s
                    fadd v3.4s, v3.4s, v7.4s

                    subs {len}, {len}, 16
                    bne 2b

                fadd v0.4s, v0.4s, v1.4s
                fadd v2.4s, v2.4s, v3.4s
                fadd v0.4s, v0.4s, v2.4s
                faddp v0.4s, v0.4s, v0.4s
                faddp v0.4s, v0.4s, v0.4s
                ",
                ptr = inout(reg) ptr => _,
                len = inout(reg) len => _,
                out("s0") out, out("v1") _, out("v2") _, out("v3") _,
                out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
                f32::from_bits(out)
            }
        }
        unsafe { run(buf) }
    },
    #[inline(never)]
    fn reduce_two(a: f32, b: f32) -> f32 {
        a + b
    }
);

#[cfg(test)]
mod test_arm64simd_sum_f32_16n {
    use super::*;
    crate::sum_frame_tests!(true, f32, arm64simd_sum_f32_16n);
}


================================================
FILE: linalg/src/arm64/arm64simd/unicast.rs
================================================
unicast_impl_wrap!(
    f32,
    arm64simd_unicast_mul_f32_16n,
    16,
    4,
    #[inline(never)]
    fn run(a: &mut [f32], b: &[f32]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 16 == 0);
        assert!(a.len() > 0);
        unsafe fn run(a: &mut [f32], b: &[f32]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}]
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{b_ptr}], 64
                    fmul v0.4s, v0.4s, v4.4s
                    fmul v1.4s, v1.4s, v5.4s
                    fmul v2.4s, v2.4s, v6.4s
                    fmul v3.4s, v3.4s, v7.4s
                    st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}], 64
                    subs {len}, {len}, 16
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f32,
    arm64simd_unicast_add_f32_16n,
    16,
    4,
    #[inline(never)]
    fn run(a: &mut [f32], b: &[f32]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 16 == 0);
        assert!(a.len() > 0);
        unsafe fn run(a: &mut [f32], b: &[f32]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}]
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{b_ptr}], 64
                    fadd v0.4s, v0.4s, v4.4s
                    fadd v1.4s, v1.4s, v5.4s
                    fadd v2.4s, v2.4s, v6.4s
                    fadd v3.4s, v3.4s, v7.4s
                    st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}], 64
                    subs {len}, {len}, 16
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f32,
    arm64simd_unicast_sub_f32_16n,
    16,
    4,
    #[inline(never)]
    fn run(a: &mut [f32], b: &[f32]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 16 == 0);
        assert!(a.len() > 0);
        unsafe fn run(a: &mut [f32], b: &[f32]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}]
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{b_ptr}], 64
                    fsub v0.4s, v0.4s, v4.4s
                    fsub v1.4s, v1.4s, v5.4s
                    fsub v2.4s, v2.4s, v6.4s
                    fsub v3.4s, v3.4s, v7.4s
                    st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}], 64
                    subs {len}, {len}, 16
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f32,
    arm64simd_unicast_subf_f32_16n,
    16,
    4,
    #[inline(never)]
    fn run(a: &mut [f32], b: &[f32]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 16 == 0);
        assert!(a.len() > 0);
        unsafe fn run(a: &mut [f32], b: &[f32]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}]
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{b_ptr}], 64
                    fsub v0.4s, v4.4s, v0.4s
                    fsub v1.4s, v5.4s, v1.4s
                    fsub v2.4s, v6.4s, v2.4s
                    fsub v3.4s, v7.4s, v3.4s
                    st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}], 64
                    subs {len}, {len}, 16
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f32,
    arm64simd_unicast_max_f32_16n,
    16,
    4,
    #[inline(never)]
    fn run(a: &mut [f32], b: &[f32]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 16 == 0);
        assert!(a.len() > 0);
        unsafe fn run(a: &mut [f32], b: &[f32]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}]
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{b_ptr}], 64
                    fmax v0.4s, v0.4s, v4.4s
                    fmax v1.4s, v1.4s, v5.4s
                    fmax v2.4s, v2.4s, v6.4s
                    fmax v3.4s, v3.4s, v7.4s
                    st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}], 64
                    subs {len}, {len}, 16
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

unicast_impl_wrap!(
    f32,
    arm64simd_unicast_min_f32_16n,
    16,
    4,
    #[inline(never)]
    fn run(a: &mut [f32], b: &[f32]) {
        assert!(a.len() == b.len());
        assert!(a.len() % 16 == 0);
        assert!(a.len() > 0);
        unsafe fn run(a: &mut [f32], b: &[f32]) {
            unsafe {
                let len = a.len();
                let a_ptr = a.as_ptr();
                let b_ptr = b.as_ptr();
                std::arch::asm!("
                2:
                    ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}]
                    ld1 {{v4.4s, v5.4s, v6.4s, v7.4s}}, [{b_ptr}], 64
                    fmin v0.4s, v0.4s, v4.4s
                    fmin v1.4s, v1.4s, v5.4s
                    fmin v2.4s, v2.4s, v6.4s
                    fmin v3.4s, v3.4s, v7.4s
                    st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [{a_ptr}], 64
                    subs {len}, {len}, 16
                    bne 2b
            ",
            len = inout(reg) len => _,
            a_ptr = inout(reg) a_ptr => _,
            b_ptr = inout(reg) b_ptr => _,
            out("v0") _, out("v1") _, out("v2") _, out("v3") _,);
            }
        }
        unsafe { run(a, b) }
    }
);

#[cfg(test)]
mod test_arm64simd_unicast_mul_f32_16n {
    use super::*;
    use proptest::strategy::Strategy;
    crate::unicast_frame_tests!(true, f32, arm64simd_unicast_mul_f32_16n, |a, b| a * b);
    crate::unicast_frame_tests!(true, f32, arm64simd_unicast_add_f32_16n, |a, b| a + b);
    crate::unicast_frame_tests!(true, f32, arm64simd_unicast_sub_f32_16n, |a, b| a - b);
    crate::unicast_frame_tests!(true, f32, arm64simd_unicast_subf_f32_16n, |a, b| b - a);
    crate::unicast_frame_tests!(true, f32, arm64simd_unicast_min_f32_16n, |a, b| a.min(b));
    crate::unicast_frame_tests!(true, f32, arm64simd_unicast_max_f32_16n, |a, b| a.max(b));
}


================================================
FILE: linalg/src/arm64/arm64simd.rs
================================================
mod by_scalar;
mod leaky_relu;
mod max;
mod panel_extract;
mod softmax;
mod sum;
mod unicast;

pub use by_scalar::*;
pub use leaky_relu::arm64simd_leaky_relu_f32_8n;
pub use max::arm64simd_max_f32_16n;
pub use softmax::arm64simd_softmax2_fastcompact_f32_16n;
pub use sum::arm64simd_sum_f32_16n;
pub use unicast::*;

use crate::Ops;
use crate::block_quant::{PackedBlockQuantFormat, Q4_0};
use crate::frame::mmm::ImplementationQuality::ManuallyOptimized;
use crate::pack::PackedFormat;

use super::Kind;

fn a55() -> isize {
    if *super::KIND == Kind::CortexA55 { 1 } else { -1 }
}

fn a53() -> isize {
    if *super::KIND == Kind::CortexA53 { 1 } else { -1 }
}

MMMExternKernel!(arm64simd_mmm_f32_8x8_a55 <f32>(8,  8)@(16, 16) quality(ManuallyOptimized) boost(a55));
MMMExternKernel!(arm64simd_mmm_f32_12x8_a55<f32>(12, 8)@(16, 16) quality(ManuallyOptimized) boost(a55));
MMMExternKernel!(arm64simd_mmm_f32_16x4_a55<f32>(16, 4)@(16, 16) quality(ManuallyOptimized) boost(a55));
MMMExternKernel!(arm64simd_mmm_f32_24x4_a55<f32>(24, 4)@(16, 16) quality(ManuallyOptimized) boost(a55));
MMMExternKernel!(arm64simd_mmm_f32_64x1_a55<f32>(64, 1)@(16, 16) quality(ManuallyOptimized) boost(a55));

MMMExternKernel!(arm64simd_mmm_f32_16x4_a53<f32>(16, 4)@(16, 16) quality(ManuallyOptimized) boost(a53));
MMMExternKernel!(arm64simd_mmm_f32_24x4_a53<f32>(24, 4)@(16, 16) quality(ManuallyOptimized) boost(a53));
MMMExternKernel!(arm64simd_mmm_f32_8x8_a53 <f32>(8,  8)@(16, 16) quality(ManuallyOptimized) boost(a53));
MMMExternKernel!(arm64simd_mmm_f32_12x8_a53<f32>(12, 8)@(16, 16) quality(ManuallyOptimized) boost(a53));
MMMExternKernel!(arm64simd_mmm_f32_64x1_a53<f32>(64, 1)@(16, 16) quality(ManuallyOptimized) boost(a53));

MMMExternKernel!(arm64simd_mmm_f32_16x4_gen<f32>(16, 4)@(16, 16) quality(ManuallyOptimized));
MMMExternKernel!(arm64simd_mmm_f32_24x4_gen<f32>(24, 4)@(16, 16) quality(ManuallyOptimized));
MMMExternKernel!(arm64simd_mmm_f32_8x8_gen <f32>(8,  8)@(16, 16) quality(ManuallyOptimized));
MMMExternKernel!(arm64simd_mmm_f32_12x8_gen<f32>(12, 8)@(16, 16) quality(ManuallyOptimized));
MMMExternKernel!(arm64simd_mmm_f32_64x1_gen<f32>(64, 1)@(16, 16) quality(ManuallyOptimized));

fn q40p32z16se() -> PackedBlockQuantFormat {
    PackedBlockQuantFormat::new(&Q4_0, 32, 16, true)
}

MMMExternKernel!(arm64simd_mmm_f32_32x1_gen<f32>(32, 1)@(16, 16)
    packing[1] = q40f16 => |k| k.with_packing(q40p32z16se(), f16::packing(1));
    packing[2] = q40f32 => |k| k.with_packing(q40p32z16se(), f32::packing(1));
    packing[3] = f16f16 => |k| k.with_packing(f16::packing(32), f16::packing(1));
    packing[4] = f32f16 => |k| k.with_packing(f32::packing(32), f16::packing(1));
    packing[5] = f16f32 => |k| k.with_packing(f16::packing(32), f32::packing(1));
    quality(ManuallyOptimized)
    store(f16)
);

MMMExternKernel!(arm64simd_mmm_f32_32x3_gen<f32>(32, 3)@(16, 16)
    packing[1] = f32f16 => |k| k.with_packing(f32::packing(32), f16::packing(3));
    packing[2] = f16f32 => |k| k.with_packing(f16::packing(32), f32::packing(3));
    packing[3] = f16f16 => |k| k.with_packing(f16::packing(32), f16::packing(3));
    quality(ManuallyOptimized)
    store(f16)
);

MMMExternKernel!(arm64simd_mmm_i32_8x8<i32>(8, 8)@(16, 16)
   packing[1] = i8i8 => |k| k.with_packing(PackedFormat::new(DatumType::I8, 8, 16), PackedFormat::new(DatumType::I8, 8, 16));
   quality(ManuallyOptimized)
   store(i8)
);

MMMExternKernel!(arm64simd_mmm_i32_64x1<i32>(64, 1)@(16, 1)
   packing[1] = i8i8 => |k| k.with_packing(PackedFormat::new(DatumType::I8, 64,16), PackedFormat::new(DatumType::I8, 1, 1));
   quality(ManuallyOptimized)
   store(i8)
);

pub fn plug(ops: &mut Ops) {
    ops.mmm_impls.extend([
        arm64simd_mmm_f32_12x8_gen.mmm(),
        arm64simd_mmm_f32_12x8_a53.mmm(),
        arm64simd_mmm_f32_12x8_a55.mmm(),
        arm64simd_mmm_f32_8x8_gen.mmm(),
        arm64simd_mmm_f32_8x8_a53.mmm(),
        arm64simd_mmm_f32_8x8_a55.mmm(),
        arm64simd_mmm_f32_16x4_gen.mmm(),
        arm64simd_mmm_f32_16x4_a53.mmm(),
        arm64simd_mmm_f32_16x4_a55.mmm(),
        arm64simd_mmm_f32_24x4_gen.mmm(),
        arm64simd_mmm_f32_24x4_a53.mmm(),
        arm64simd_mmm_f32_24x4_a55.mmm(),
        arm64simd_mmm_f32_32x1_gen.mmm(),
        arm64simd_mmm_f32_32x3_gen.mmm(),
        arm64simd_mmm_f32_64x1_gen.mmm(),
        arm64simd_mmm_f32_64x1_a53.mmm(),
        arm64simd_mmm_f32_64x1_a55.mmm(),
        arm64simd_mmm_i32_8x8.mmm(),
        arm64simd_mmm_i32_64x1.mmm(),
    ]);
    panel_extract::plug(ops);
}

tanh_impl!(f32, arm64simd_tanh_f32_4n, 4, 4, true);
sigmoid_impl!(f32, arm64simd_sigmoid_f32_4n, 4, 4, true);


================================================
FILE: linalg/src/arm64/cortex_a53.rs
================================================
use crate::frame::mmm::CostModel;
pub fn model() -> CostModel<'static> {
    CostModel {
        big_product_mkn_threshold: 4193280.0,
        big_product_kernel_choice: "arm64simd_mmm_f32_12x8_a53",
        kernels: &[
            "arm64simd_mmm_f32_12x8_a53",
            "arm64simd_mmm_f32_12x8_gen",
            "arm64simd_mmm_f32_16x4_a53",
            "arm64simd_mmm_f32_16x4_gen",
            "arm64simd_mmm_f32_24x4_a53",
            "arm64simd_mmm_f32_24x4_gen",
            "arm64simd_mmm_f32_8x8_a53",
            "arm64simd_mmm_f32_8x8_gen",
            "generic_f32_4x4",
        ],
        mrs: &[4, 8, 12, 16, 24],
        nrs: &[4, 8],
        feat_norm_mean: &[
            4.592185479105843,
            4.595318666792368,
            4.579484503710355,
            13.76698864960861,
            1.5094315895372235,
            0.7603118712273642,
            3.47170523138833,
            0.8752515090543259,
            5.487801810865191,
            0.9224094567404426,
            7.414361167002012,
            0.9387575452716298,
            11.415367203219317,
            0.959758551307847,
            1.5074195171026157,
            0.750125754527163,
            3.47170523138833,
            0.875125754527163,
        ],
        feat_norm_stddev: &[
            1.2629893666668983,
            1.2446322895476982,
            1.258916587498509,
            1.3105293102858375,
            1.1063478713873012,
            0.4268931127321023,
            2.3025561444671223,
            0.330433510637837,
            3.431728816936762,
            0.2675261685447694,
            4.624258056138275,
            0.23977451171303063,
            6.954988241153163,
            0.19652499713600946,
            1.1207056563030822,
            0.4329400731304941,
            2.292868878895526,
            0.3305762669799629,
        ],
        w1: &[
            -0.6321063041687012,
            0.24184978008270264,
            -0.4356610178947449,
            -0.1422707587480545,
            0.10410869866609573,
            0.09415467828512192,
            0.1568029671907425,
            -0.25644537806510925,
            -0.37143954634666443,
            0.15696385502815247,
            0.050514884293079376,
            -0.07972156256437302,
            -0.253411203622818,
            0.27587205171585083,
            0.02698700875043869,
            -0.07245094329118729,
            -0.013899300247430801,
            0.022088056430220604,
            0.2630922496318817,
            -0.06870237737894058,
            0.40947580337524414,
            0.22110328078269958,
            0.03808840364217758,
            -0.008957616984844208,
            -0.11127127707004547,
            0.07818343490362167,
            0.025474127382040024,
            -0.09513817727565765,
            0.10613243281841278,
            0.029441041871905327,
            0.0819312185049057,
            -0.03519295156002045,
            -0.3130439519882202,
            0.4705337882041931,
            0.4476615786552429,
            -0.616556704044342,
            0.2223544716835022,
            -0.23584842681884766,
            -0.3312308192253113,
            0.18874213099479675,
            -0.033394988626241684,
            0.09006354957818985,
            0.014722823165357113,
            0.0877116397023201,
            0.07635975629091263,
            0.04284617677330971,
            -0.029695890843868256,
            -0.05645013228058815,
            -0.096514992415905,
            0.16431200504302979,
            0.11922749876976013,
            -0.08329842984676361,
            -0.15593503415584564,
            0.33497852087020874,
            0.5143201947212219,
            -0.4143742322921753,
            -0.07121813297271729,
            0.032980211079120636,
            -0.014759342186152935,
            -0.10575086623430252,
            -0.08755142986774445,
            0.053559254854917526,
            0.2959750294685364,
            -0.210640087723732,
            -0.09462635219097137,
            0.14600691199302673,
            0.22388464212417603,
            -0.185477152466774,
            -0.100673608481884,
            -0.10946766287088394,
            0.03957876190543175,
            -0.10485030710697174,
            0.01792730763554573,
            0.15610192716121674,
            -0.14726269245147705,
            0.30900657176971436,
            0.21081387996673584,
            -0.06592089682817459,
            0.03168980032205582,
            0.20096036791801453,
            0.021350117400288582,
            -0.04456694424152374,
            0.35106319189071655,
            0.04561518132686615,
            -0.14208926260471344,
            0.06227286159992218,
            -0.20092618465423584,
            0.08163813501596451,
            0.23094142973423004,
            -0.0332462415099144,
            0.26035502552986145,
            0.4639679193496704,
            0.11891252547502518,
            0.4722647964954376,
            -0.025709064677357674,
            0.1651654839515686,
            -0.009242026135325432,
            0.02252785675227642,
            0.13325856626033783,
            -0.32073062658309937,
            -0.05948975682258606,
            -0.07114000618457794,
            -0.04468341916799545,
            -0.002579547930508852,
            0.2056179940700531,
            -0.14614446461200714,
            -0.11110267788171768,
            0.09043771028518677,
            0.135812908411026,
            -0.3300320506095886,
            0.290109783411026,
            0.23399846255779266,
            -0.04882314056158066,
            -7.729629578534514e-05,
            0.04754950851202011,
            0.003435821272432804,
            0.1115187332034111,
            -0.08208155632019043,
            0.018088344484567642,
            -0.01600349321961403,
            -0.025757616385817528,
            0.060233402997255325,
            -0.08445348590612411,
            0.375010222196579,
            0.7828134298324585,
            -0.836024820804596,
            0.041282471269369125,
            -0.07747451961040497,
            0.31279265880584717,
            -0.05552798509597778,
            -0.03274049609899521,
            -0.1147448793053627,
            -0.1660863310098648,
            0.390122652053833,
            0.29283249378204346,
            -0.0705522671341896,
            -0.2927100956439972,
            0.038575850427150726,
            -0.15336857736110687,
            -0.028894517570734024,
            -0.06372164189815521,
            0.2578844130039215,
            0.060502175241708755,
            -0.14235782623291016,
            0.6358739137649536,
            -0.2645033001899719,
            0.01847453974187374,
            0.3809853792190552,
            0.0059107388369739056,
            -0.07365082949399948,
            -0.17490413784980774,
            0.26099810004234314,
            0.38216090202331543,
            -0.44192376732826233,
            -0.1497800052165985,
            0.11983825266361237,
            0.05704215168952942,
            -0.09331715852022171,
            -0.027353238314390182,
            0.07132093608379364,
            0.013686291873455048,
            -0.14973664283752441,
            -0.6386663317680359,
            -0.42794787883758545,
            0.43632233142852783,
            -0.022474655881524086,
            0.011099671013653278,
            0.08784982562065125,
            0.046248968690633774,
            0.011553826741874218,
            0.0328642763197422,
            0.08678832650184631,
            0.3153251111507416,
            -0.15444470942020416,
            -0.5339609980583191,
            0.10007581859827042,
            -0.02821769379079342,
            -0.3091129660606384,
            -0.6009559631347656,
            -0.555920422077179,
            0.9594710469245911,
            -0.5884919166564941,
            -0.08316593617200851,
            0.07074970006942749,
            0.026868166401982307,
            0.03690064698457718,
            -0.2468167096376419,
            0.20655325055122375,
            0.2654767632484436,
            -0.11032287031412125,
            0.09603621065616608,
            0.12746618688106537,
            0.11097392439842224,
            -0.046335164457559586,
            0.2753968834877014,
            -0.4040895402431488,
            -0.20803606510162354,
            0.29299837350845337,
            -0.21050886809825897,
            -0.02308674342930317,
            0.32019543647766113,
            -0.010012545622885227,
            -0.07219666987657547,
            0.03816547617316246,
            -0.03670865297317505,
            -0.023583250120282173,
            -0.2030763179063797,
            0.4087490737438202,
            0.19682352244853973,
            -0.061049312353134155,
            -0.34018784761428833,
            0.4121433198451996,
            -0.10742263495922089,
            -0.2883375287055969,
            0.15564028918743134,
            -0.014489974826574326,
            -0.40427249670028687,
            0.04029366746544838,
            -0.46333804726600647,
            -0.5811125636100769,
            0.1686166524887085,
            -0.08247993886470795,
            0.02783152647316456,
            -0.07444962859153748,
            -0.11033248156309128,
            0.17976728081703186,
            -0.05866902321577072,
            -0.037863120436668396,
            0.016240332275629044,
            0.08362828195095062,
            0.04285397008061409,
            -0.2676204442977905,
            -0.18113869428634644,
            0.10164932906627655,
            0.5798585414886475,
            -0.2936221659183502,
            -0.16815273463726044,
            0.3153108060359955,
            0.1320323497056961,
            0.29474350810050964,
            -0.31565147638320923,
            0.032277628779411316,
            0.5137525796890259,
            0.13915763795375824,
            -0.08313784748315811,
            0.0871160700917244,
            0.07447603344917297,
            -0.4863177537918091,
            0.022499559447169304,
            0.07244526594877243,
            -0.1484450399875641,
            -0.08256664127111435,
            0.09993510693311691,
            0.33980417251586914,
            -0.5465939044952393,
            -0.18684262037277222,
            0.050183601677417755,
            0.015223318710923195,
            -0.32613685727119446,
            0.2532300353050232,
            0.21044038236141205,
            -0.24877160787582397,
            0.17659279704093933,
            -0.14793306589126587,
            0.054353710263967514,
            -0.07312241941690445,
            0.04128497466444969,
            -0.0071349963545799255,
            -0.17010675370693207,
            0.3045605719089508,
            -0.391606867313385,
            0.19206605851650238,
            0.10403380542993546,
            -0.3808597922325134,
            -0.016270365566015244,
            -0.09313700348138809,
            0.11184006929397583,
            0.01242944784462452,
            -0.03349926695227623,
            -0.1107369139790535,
            0.2315940409898758,
            0.03170541673898697,
            -0.48357459902763367,
            0.21056240797042847,
            -0.25072887539863586,
            0.3221265375614166,
            0.5108669400215149,
            -0.6159838438034058,
            -0.5540208220481873,
            0.38405123353004456,
            0.1323588639497757,
            -0.11752784997224808,
            0.07821227610111237,
            0.0494898185133934,
            0.28607267141342163,
            -0.45723024010658264,
            -0.5914809703826904,
            -0.15741930902004242,
            -0.09551641345024109,
            -0.769051730632782,
            -0.2119017094373703,
            -0.8505933284759521,
            0.025818098336458206,
            0.11196669936180115,
            0.013385393656790257,
            -0.02640729956328869,
            -0.061663247644901276,
            -0.012524818070232868,
            -0.8237857222557068,
            -0.40553018450737,
            -0.06807617098093033,
            -0.07508324831724167,
            -0.011943532153964043,
            0.07591933757066727,
            0.18625806272029877,
            -0.14417743682861328,
            0.0031204342376440763,
            -0.031199704855680466,
            -0.037418268620967865,
            -0.062444642186164856,
            0.0434197299182415,
            -0.12462416291236877,
            -0.256317675113678,
            -0.0023087849840521812,
            0.20042477548122406,
            0.17625926434993744,
            -0.21970611810684204,
            0.1626158505678177,
            -0.09550918638706207,
            -0.10577445477247238,
            -0.17239737510681152,
            0.28190216422080994,
            0.003485368099063635,
            -0.24596424400806427,
            0.5330491662025452,
            -0.6179713010787964,
            -0.19186368584632874,
            0.04049135372042656,
            0.005797799210995436,
            0.10468537360429764,
            -0.03522713482379913,
            0.2554764151573181,
            -0.6601210832595825,
            0.3554987609386444,
            -0.1528356373310089,
            -0.2578294575214386,
            -0.01912580616772175,
            0.14837700128555298,
            0.28032413125038147,
            0.6525465250015259,
            -0.16390740871429443,
            -0.12456659972667694,
            -0.04434182122349739,
            0.44120529294013977,
            -0.06832294911146164,
            0.4077378511428833,
            -0.07938709110021591,
            0.23457404971122742,
            -0.05966708064079285,
            0.09640492498874664,
            0.7555295825004578,
            -0.3110663592815399,
            0.035311225801706314,
            0.25391876697540283,
            0.09088675677776337,
            0.03320888802409172,
            -0.1745719611644745,
            0.2270633578300476,
            0.2851920425891876,
            -0.07204318791627884,
            -0.05483328923583031,
            0.189837247133255,
            -0.15304607152938843,
            -0.08311894536018372,
            -0.06649994850158691,
            -0.0776129737496376,
            0.11864881962537766,
            -0.06670717149972916,
            -0.00406235596165061,
            -0.6984686255455017,
            0.28291743993759155,
            -0.04160117730498314,
            -0.09169034659862518,
            0.14924104511737823,
            0.46138641238212585,
            -0.29699283838272095,
            -0.6411864757537842,
            0.26037612557411194,
            0.21487018465995789,
            -0.20806393027305603,
            -0.4174681007862091,
            0.1901395320892334,
            0.049021925777196884,
            0.2822348475456238,
            -0.03862098604440689,
            0.029824024066329002,
            0.2657202184200287,
            -0.43108099699020386,
            0.37041717767715454,
            -0.025845345109701157,
            -0.09200481325387955,
            -0.017871620133519173,
            0.281535267829895,
            -0.20838744938373566,
            -0.400356650352478,
            0.4133286476135254,
            -0.08745774626731873,
            0.02171195112168789,
            0.4766440987586975,
            -0.24629971385002136,
            0.2504408657550812,
            -0.5850875973701477,
            -0.49699774384498596,
            0.7086884379386902,
            -0.479250967502594,
            0.6140879392623901,
            0.0023341099731624126,
            -0.06628652662038803,
            -0.0873338133096695,
            -0.2862805724143982,
            0.28077220916748047,
            0.030578527599573135,
            -0.281633198261261,
            -0.7042887806892395,
            -0.03409203886985779,
            0.3272986114025116,
            0.3397904634475708,
            -0.7069221138954163,
            0.09408266842365265,
            -0.05243761092424393,
            -0.20503726601600647,
            0.15679042041301727,
            0.4723545014858246,
            -0.39158886671066284,
            0.17581138014793396,
            0.10779093205928802,
            -0.013951681554317474,
            0.052481986582279205,
            -0.36543500423431396,
            0.29497984051704407,
            0.4044850766658783,
            -0.3766767382621765,
            -0.07298431545495987,
            0.9660398364067078,
            0.27753373980522156,
            -0.11616200953722,
            0.05277060344815254,
            -0.05379771068692207,
            0.026094499975442886,
            -0.011136082001030445,
            -0.13593854010105133,
            0.033518679440021515,
            0.6947338581085205,
            0.6335914134979248,
            -0.06526267528533936,
            0.019844267517328262,
            0.10042254626750946,
            -0.16847042739391327,
            -0.15717101097106934,
            -0.7462965250015259,
            -0.0653005987405777,
            0.057602036744356155,
            0.010834889486432076,
            -0.46870648860931396,
            -0.1872870922088623,
            0.3152116537094116,
            0.0731910765171051,
            -0.13902369141578674,
            0.10666802525520325,
            0.3094567656517029,
            -0.926356315612793,
            -0.38388797640800476,
            -0.02191060781478882,
            -0.005548040382564068,
            -0.20935170352458954,
            0.24779647588729858,
            0.12304577976465225,
            -0.2883053123950958,
            0.019766222685575485,
            -0.029659172520041466,
            0.06051887571811676,
            -0.01741836965084076,
            0.04409812018275261,
            0.011840295046567917,
            -0.14320705831050873,
            0.31673386693000793,
            -0.069312185049057,
            -0.00935965683311224,
            0.019028477370738983,
            -0.1078404039144516,
            -0.12472966313362122,
            0.10027194768190384,
            0.31244829297065735,
            -0.10855710506439209,
            -0.3165830969810486,
            0.4076120853424072,
            0.05742274224758148,
            0.17263729870319366,
            0.3141464293003082,
            -0.13655878603458405,
            0.07613589614629745,
            -0.10808823257684708,
            -0.19837258756160736,
            0.16735948622226715,
            0.055960867553949356,
            0.005388774909079075,
            -0.30227115750312805,
            -0.009724846109747887,
            -0.11610261350870132,
            0.05133519321680069,
            -0.029441826045513153,
            0.06810834258794785,
            -0.13311177492141724,
            0.2196519374847412,
            0.19138571619987488,
            -0.2621391713619232,
            0.11996466666460037,
            -0.05961257219314575,
            0.1763487011194229,
            -0.10918399691581726,
            -0.14629563689231873,
            0.5217060446739197,
            -0.0012722538085654378,
            0.08564157783985138,
            -0.6640400290489197,
            -0.41702714562416077,
            0.045037489384412766,
            -0.059789709746837616,
            -0.05092751979827881,
            0.10446680337190628,
            -0.05335049331188202,
            0.0846114456653595,
            0.04981796815991402,
            -0.14310699701309204,
            0.01863306201994419,
            -0.0474325567483902,
            0.23124581575393677,
            -0.6166588068008423,
            -0.7533295154571533,
            -1.1133880615234375,
            -0.1241607666015625,
            -0.5540894865989685,
            0.2806711494922638,
            -0.4259497821331024,
            -0.07380827516317368,
            0.009988346137106419,
            0.3110937178134918,
            0.0072226757183671,
            0.2422133982181549,
            -0.351376473903656,
            -0.5103139877319336,
            0.5470908284187317,
            -0.14952707290649414,
            -0.005531645845621824,
            -0.24725599586963654,
            0.1639375537633896,
            0.07172811776399612,
            -0.1566568911075592,
            0.32833099365234375,
            0.06875353306531906,
            -0.17773276567459106,
            -0.09706790000200272,
            -0.019849322736263275,
            0.1257631778717041,
            0.02103520557284355,
            0.12721672654151917,
            0.012451020069420338,
            0.039879027754068375,
            0.17779605090618134,
            -0.09887054562568665,
            -0.08146625012159348,
            0.05893132835626602,
            0.18479469418525696,
            -0.2479601502418518,
            -0.26928654313087463,
            0.3720027506351471,
            -0.45930227637290955,
            0.3673400282859802,
            0.016545426100492477,
            0.13507097959518433,
            -0.006458526011556387,
            0.036685895174741745,
            0.309455007314682,
            -0.23917894065380096,
            -0.11758854985237122,
            0.2146540731191635,
            -0.11578961461782455,
            0.006646907888352871,
            -0.04229713976383209,
            0.09812270104885101,
            0.06730903685092926,
            0.28935620188713074,
            -0.02212020941078663,
            0.007341589778661728,
            -0.1257125288248062,
            -0.4639318287372589,
            0.41743314266204834,
            0.40524497628211975,
            -0.20389464497566223,
            0.1286880075931549,
            0.05365758389234543,
            -0.14487741887569427,
            0.1511518359184265,
            0.11219878494739532,
            0.13080842792987823,
            -0.175934836268425,
            -0.08939457684755325,
            0.16476190090179443,
            -0.061722587794065475,
            0.15382836759090424,
            0.15293729305267334,
            -0.23814627528190613,
            -0.778872013092041,
            0.2813372313976288,
            0.20388194918632507,
            -0.34535032510757446,
            -0.014981378801167011,
            0.1560390293598175,
            0.534339189529419,
            0.7075706124305725,
            -0.20866382122039795,
            0.050050001591444016,
            -0.030285198241472244,
            0.430580735206604,
            0.06858251988887787,
            0.32321590185165405,
            0.006104054860770702,
            0.11919829249382019,
            -0.09377042204141617,
            -0.028785547241568565,
            0.489607572555542,
            -0.321664422750473,
            0.020770607516169548,
            0.5259214639663696,
            -0.0682888925075531,
            0.10569659620523453,
            -0.18257132172584534,
            0.2565872073173523,
            0.2177353799343109,
            0.029641704633831978,
            0.0678875744342804,
            0.1679811030626297,
            -0.04851052165031433,
            -0.1633165180683136,
            -0.007416700944304466,
            -0.06638842821121216,
            0.06177712231874466,
            -0.0709109827876091,
            -0.11213518679141998,
            -0.20582593977451324,
            0.7092531323432922,
            0.43438467383384705,
            -0.0060964771546423435,
            -0.12442151457071304,
            -0.008676152676343918,
            0.21390584111213684,
            -0.014475004747509956,
            -0.7601429224014282,
            0.15622451901435852,
            -0.3261253833770752,
            0.005610095337033272,
            -0.5111817121505737,
            -0.003055301494896412,
            0.32741662859916687,
            -0.022710084915161133,
            -0.24255472421646118,
            -0.6487520933151245,
            0.08797790110111237,
            0.2754897177219391,
            -0.2213398665189743,
            -0.17206217348575592,
            0.1177680641412735,
            0.16599608957767487,
            -0.19922694563865662,
            -0.07098120450973511,
            -0.1628963202238083,
            0.03356413170695305,
            -0.24303652346134186,
            -0.2067747414112091,
            0.1192406490445137,
            -0.020932691171765327,
            0.07735628634691238,
            0.24762177467346191,
            -0.3007707893848419,
            -0.43011191487312317,
            -0.07597793638706207,
            0.2528873085975647,
            -0.3795652985572815,
            0.14651291072368622,
            0.07552091032266617,
            0.026706784963607788,
            -0.11118876934051514,
            0.0460294634103775,
            0.4268769323825836,
            0.32645294070243835,
            -0.09493713080883026,
            0.18892213702201843,
            0.17980137467384338,
            0.06521839648485184,
            0.03702569752931595,
            0.05443478748202324,
            -0.030978504568338394,
            -0.11806164681911469,
            -0.20229215919971466,
            0.6260767579078674,
            0.6068219542503357,
            -0.060956377536058426,
            0.05200914293527603,
            0.04499080404639244,
            -0.09300816804170609,
            0.0501115508377552,
            0.9676806926727295,
            -0.12394528090953827,
            0.17313909530639648,
            -0.0274575874209404,
            1.0245190858840942,
            -0.24425312876701355,
            0.3827340602874756,
            0.270155131816864,
            -0.7169324159622192,
        ],
        b1: &[
            -0.518636167049408,
            0.7074531316757202,
            -0.4965735971927643,
            0.6063699126243591,
            -0.3258720934391022,
            0.4608336389064789,
            0.8324258327484131,
            -0.6118353605270386,
            0.8226121664047241,
            0.3534131944179535,
            -0.43312883377075195,
            -0.05448569357395172,
            -0.5826212167739868,
            0.8478071689605713,
            0.23062080144882202,
            -0.30911386013031006,
            -0.5776869058609009,
            0.5107449293136597,
            0.18762148916721344,
            0.2889731228351593,
            -0.5579098463058472,
            0.7818499207496643,
            0.7910265922546387,
            -0.4228874444961548,
            0.6197248697280884,
            -0.4563252627849579,
            0.27223169803619385,
            -0.2859383523464203,
            -0.4862801730632782,
            -0.7853735089302063,
            -0.1534343808889389,
            -0.5592636466026306,
            -0.6364999413490295,
            -0.5210756063461304,
            0.3506944477558136,
            -0.5348182916641235,
            -0.5098673105239868,
            0.45690369606018066,
            -0.3907462954521179,
            0.8493368029594421,
        ],
        w2: &[
            -0.525189995765686,
            0.44041961431503296,
            -0.4107511341571808,
            0.3741440176963806,
            -0.02630656771361828,
            0.27733951807022095,
            0.3907228410243988,
            -0.05409616604447365,
            0.3991526663303375,
            0.24264170229434967,
            -0.657869279384613,
            -0.3758363425731659,
            -0.5133534669876099,
            0.3480457663536072,
            0.5088834166526794,
            0.0942729115486145,
            -0.4167974889278412,
            0.4895906448364258,
            0.17553496360778809,
            0.3702719211578369,
            -0.5372111201286316,
            -0.1560969352722168,
            -0.30670106410980225,
            -0.48799967765808105,
            0.4005548357963562,
            -0.3075137138366699,
            0.656658947467804,
            -0.4914362132549286,
            -0.36532747745513916,
            -0.5505443811416626,
            0.1328023225069046,
            -0.3564044237136841,
            -0.467242956161499,
            -0.3465808629989624,
            0.4501214027404785,
            -0.4742763936519623,
            -0.35285890102386475,
            0.46182748675346375,
            -0.28942185640335083,
            0.2825036346912384,
            -0.1725425124168396,
            -0.17012473940849304,
            0.5306965708732605,
            -0.34125325083732605,
            0.21301832795143127,
            -0.49370092153549194,
            -0.06135714799165726,
            0.5665233135223389,
            -0.01510544028133154,
            -0.0015591675182804465,
            0.4308379292488098,
            0.09525317698717117,
            0.06129995733499527,
            -0.06124228611588478,
            -0.28377535939216614,
            -0.038286369293928146,
            0.19221894443035126,
            -0.45041826367378235,
            -0.4307488799095154,
            -0.30516454577445984,
            0.3670405447483063,
            -0.1779327690601349,
            -0.36808863282203674,
            0.344722718000412,
            -0.2691067159175873,
            0.5803861021995544,
            -0.42112261056900024,
            0.1169033870100975,
            0.35742461681365967,
            0.16161565482616425,
            0.44920068979263306,
            0.2572435438632965,
            0.263318806886673,
            0.7236857414245605,
            -0.2759736180305481,
            0.37376394867897034,
            0.37350600957870483,
            -0.4067005515098572,
            0.18588955700397491,
            -0.4281120300292969,
            0.4204690456390381,
            -0.448592871427536,
            0.11808016151189804,
            -0.4660882353782654,
            0.33337321877479553,
            -0.11569353938102722,
            -0.589764416217804,
            -0.17854063212871552,
            -0.44001755118370056,
            0.7101057767868042,
            0.057653751224279404,
            0.3937684893608093,
            0.257487416267395,
            -0.38924211263656616,
            0.08511713892221451,
            0.10950952023267746,
            0.0917661041021347,
            -0.25429144501686096,
            0.6342174410820007,
            -0.15891794860363007,
            -0.021509289741516113,
            0.535305380821228,
            0.28721731901168823,
            -0.32432296872138977,
            -0.26846611499786377,
            0.07051636278629303,
            -0.12710770964622498,
            0.14568471908569336,
            0.6293584704399109,
            0.4198862612247467,
            -0.8883509039878845,
            0.5271400809288025,
            0.17345309257507324,
            0.1771862506866455,
            -0.214192733168602,
            0.17817191779613495,
            0.44757506251335144,
            0.04112042486667633,
            0.6819244027137756,
            -0.7277362942695618,
            0.19224950671195984,
            -0.2905896008014679,
            0.5791959762573242,
            -0.4898945093154907,
            0.47323065996170044,
            -0.40173205733299255,
            -0.36294564604759216,
            0.6861273050308228,
            -0.2955973744392395,
            -0.19740070402622223,
            0.4044080674648285,
            -0.11244003474712372,
            0.58234703540802,
            -0.31175708770751953,
            -0.3454722762107849,
            0.12274620682001114,
            0.29693669080734253,
            -0.41234102845191956,
            -0.1583351045846939,
            -0.2763107419013977,
            0.34174609184265137,
            -0.7301539182662964,
            -0.4137580394744873,
            0.5135444402694702,
            -0.19664454460144043,
            0.3913029730319977,
            -0.47720086574554443,
            0.2519521415233612,
            0.3860025703907013,
            0.4073657691478729,
            0.06604084372520447,
            0.32879960536956787,
            0.4341438114643097,
            0.4072171449661255,
            -0.3755425810813904,
            0.29250237345695496,
            0.4723772704601288,
            -0.39177075028419495,
            0.3535446524620056,
            -0.5977760553359985,
            -0.11535356938838959,
            -0.8606860637664795,
            0.3202466070652008,
            0.534551203250885,
            -0.10786011070013046,
            0.5766461491584778,
            -1.0034655332565308,
            -0.08353354036808014,
            0.20165663957595825,
            -0.8530645370483398,
            0.2801732122898102,
            -0.2713226079940796,
            0.460101842880249,
            0.5550602078437805,
            0.11862986534833908,
            -0.8431587219238281,
            -0.41269758343696594,
            -0.36862486600875854,
            0.08385410159826279,
            0.1634000688791275,
            -0.22930988669395447,
            -0.39085301756858826,
            0.8845512270927429,
            0.2522968053817749,
            0.3779301643371582,
            0.3454946279525757,
            -0.14984408020973206,
            0.2937467098236084,
            0.3651972711086273,
            1.1317671537399292,
            -0.4535387456417084,
            0.07272656261920929,
            -0.29987066984176636,
            -0.03405649587512016,
            0.1012202724814415,
            -0.12492970377206802,
            -0.048626113682985306,
            -0.3150321841239929,
            -0.4124220013618469,
            -0.7775830030441284,
            0.25562793016433716,
            -0.4026365876197815,
            0.27681317925453186,
            -0.3169574439525604,
            0.414761483669281,
            -0.37095436453819275,
            -0.2815983295440674,
            0.6821384429931641,
            -0.23631460964679718,
            -0.391885370016098,
            0.32081300020217896,
            0.029309673234820366,
            0.3151959478855133,
            -0.23872429132461548,
            -0.2680605947971344,
            0.2245175689458847,
            0.28024742007255554,
            -0.5187304615974426,
            -0.17155316472053528,
            -0.18662460148334503,
            0.44196388125419617,
            -0.7731465697288513,
            -0.39956656098365784,
            0.4926709830760956,
            -0.2705640196800232,
            0.5851831436157227,
            -0.28655296564102173,
            0.21914565563201904,
            0.42291808128356934,
            0.3754308521747589,
            0.12476411461830139,
            0.4564429223537445,
            0.41455739736557007,
            0.24721866846084595,
            -0.39062193036079407,
            0.47335484623908997,
            0.4390261769294739,
            -0.2776612639427185,
            0.36352279782295227,
            -0.4658246338367462,
            0.5458199977874756,
            0.2368425875902176,
            -0.28375834226608276,
            -0.21349868178367615,
            -0.12575705349445343,
            -0.314109742641449,
            0.2133757472038269,
            -0.4604170322418213,
            -0.5457999110221863,
            0.347943514585495,
            0.3864844739437103,
            0.2128392457962036,
            0.06274894624948502,
            -0.5941122174263,
            -0.4954967200756073,
            0.3897503614425659,
            0.6681548953056335,
            0.011607992462813854,
            -0.5754616260528564,
            -0.4551040530204773,
            0.14332124590873718,
            0.5475043058395386,
            0.35485684871673584,
            0.516143798828125,
            -0.43508225679397583,
            -0.2927212119102478,
            -0.38220953941345215,
            0.22585861384868622,
            -0.49666696786880493,
            -0.47814127802848816,
            0.6455125212669373,
            -0.4184291362762451,
            0.5714888572692871,
            -0.06349734216928482,
            -0.337534636259079,
            0.08359762281179428,
            -0.6663680672645569,
            -0.05490731820464134,
            0.27789443731307983,
            0.44944822788238525,
            -0.12919825315475464,
            -0.24064187705516815,
            0.3863179683685303,
            -0.21315856277942657,
            -0.010893935337662697,
            -0.49465489387512207,
            -0.1953386515378952,
            0.4405977129936218,
            -0.362499862909317,
            -0.15224213898181915,
            0.503758430480957,
            0.13674911856651306,
            0.24574719369411469,
            -0.2888658046722412,
            -0.5966756939888,
            0.24279867112636566,
            0.43060633540153503,
            -0.2950061857700348,
            -0.3071616590023041,
            -0.31878525018692017,
            0.5719135999679565,
            -0.46542906761169434,
            -0.33102989196777344,
            0.2584391236305237,
            -0.3341030776500702,
            0.35185420513153076,
            -0.5347702503204346,
            0.2021929919719696,
            0.3747906982898712,
            0.3017856478691101,
            0.4192887842655182,
            0.2290816456079483,
            0.26369208097457886,
            0.30613088607788086,
            -0.2766033113002777,
            0.48649486899375916,
            0.28767234086990356,
            -0.31826111674308777,
            0.47518086433410645,
            -0.2643313407897949,
            0.38674306869506836,
            -0.20252466201782227,
            0.2426745593547821,
            -0.2963939607143402,
            0.35027387738227844,
            -0.40756842494010925,
            -0.17158618569374084,
            0.6504075527191162,
            -0.23639068007469177,
            -0.5520732998847961,
            0.34597641229629517,
            0.12782879173755646,
            0.46479496359825134,
            -0.4128115773200989,
            -0.4125882685184479,
            0.20131008327007294,
            0.4997844099998474,
            -0.21766024827957153,
            -0.2570849657058716,
            -0.1471637338399887,
            0.5070111155509949,
            -0.6722937226295471,
            -0.5443961024284363,
            0.5341878533363342,
            -0.29976886510849,
            0.6135430932044983,
            -0.3595261573791504,
            0.49033448100090027,
            0.3653552234172821,
            0.2656362056732178,
            0.10900922119617462,
            0.4813465476036072,
            0.41922783851623535,
            0.2692069411277771,
            -0.4056242108345032,
            0.33006641268730164,
            0.27100467681884766,
            -0.5306692123413086,
            0.2701503336429596,
            -0.6044796705245972,
        ],
        b2: &[
            0.044342152774333954,
            -0.28361865878105164,
            -0.0350283607840538,
            -0.129508376121521,
            -0.006770995445549488,
            -0.24053514003753662,
            0.3617520332336426,
            -0.3381704092025757,
            -0.24953331053256989,
        ],
    }
}


================================================
FILE: linalg/src/arm64/cortex_a55.rs
================================================
use crate::frame::mmm::CostModel;
pub fn model() -> CostModel<'static> {
    CostModel {
        big_product_mkn_threshold: 263214080.0,
        big_product_kernel_choice: "arm64simd_mmm_f32_12x8_a55",
        kernels: &[
            "arm64simd_mmm_f32_12x8_a53",
            "arm64simd_mmm_f32_12x8_a55",
            "arm64simd_mmm_f32_12x8_gen",
            "arm64simd_mmm_f32_16x4_a53",
            "arm64simd_mmm_f32_16x4_a55",
            "arm64simd_mmm_f32_16x4_gen",
            "arm64simd_mmm_f32_24x4_a53",
            "arm64simd_mmm_f32_24x4_a55",
            "arm64simd_mmm_f32_24x4_gen",
            "arm64simd_mmm_f32_8x8_a53",
            "arm64simd_mmm_f32_8x8_a55",
            "arm64simd_mmm_f32_8x8_gen",
            "generic_f32_4x4",
        ],
        mrs: &[4, 8, 12, 16, 24],
        nrs: &[4, 8],
        feat_norm_mean: &[
            5.27886946965165,
            6.250454700699139,
            5.241114620514529,
            16.770438790865423,
            1.540625,
            0.770625,
            3.518125,
            0.8775,
            5.560625,
            0.923125,
            7.453125,
            0.943125,
            11.613125,
            0.9575,
            1.509375,
            0.771875,
            3.581875,
            0.898125,
        ],
        feat_norm_stddev: &[
            0.9509890252368828,
            0.6930410342704738,
            1.0261938261805659,
            1.600617293156687,
            1.0981118382819681,
            0.42043086158725473,
            2.338198341538852,
            0.3278623949159141,
            3.494112850120205,
            0.26639300736881577,
            4.56457037785321,
            0.2316036147710134,
            7.043415558830455,
            0.20172691937369452,
            1.0925484471523377,
            0.4196236222795381,
            2.273113830052299,
            0.30248385804039707,
        ],
        w1: &[
            -0.13682155311107635,
            -0.1783919334411621,
            0.26539096236228943,
            0.19552235305309296,
            -0.10618806630373001,
            0.13501706719398499,
            0.21776071190834045,
            0.08390733599662781,
            -0.2215081751346588,
            0.18829140067100525,
            -0.20535176992416382,
            -0.0463368222117424,
            0.05815611779689789,
            -0.13855215907096863,
            -0.024539709091186523,
            -0.4855460524559021,
            -0.414151668548584,
            -0.7574286460876465,
            0.8987273573875427,
            0.5316352844238281,
            0.8244147896766663,
            0.8388808369636536,
            -0.02545193023979664,
            0.04357631504535675,
            -0.007071307860314846,
            0.18223997950553894,
            -0.04292978346347809,
            0.004330582916736603,
            -0.013073648326098919,
            -0.04028080403804779,
            -0.09901119023561478,
            0.062175191938877106,
            -0.006247916258871555,
            0.009531030431389809,
            0.09731218218803406,
            0.004297865089029074,
            0.6260067224502563,
            -0.10042139887809753,
            0.807989239692688,
            0.6866835951805115,
            -0.018399836495518684,
            -0.07194910198450089,
            -0.18889868259429932,
            -0.07729395478963852,
            -0.03907148540019989,
            0.017019111663103104,
            0.06159460172057152,
            -0.02395886555314064,
            0.23730705678462982,
            -0.15546496212482452,
            -0.04492897167801857,
            -0.003982013091444969,
            -0.09160511195659637,
            0.03185845538973808,
            -0.27577653527259827,
            0.5699247121810913,
            -0.6027079820632935,
            -0.4136800467967987,
            -0.04364140331745148,
            -0.11226192861795425,
            0.16899903118610382,
            -0.11524038016796112,
            0.12308179587125778,
            0.027925828471779823,
            -0.06269104778766632,
            0.11644940823316574,
            -0.369202196598053,
            0.3338239789009094,
            -0.06509242206811905,
            0.2303273230791092,
            0.018171854317188263,
            -0.08709719777107239,
            0.18228614330291748,
            -0.071574367582798,
            -0.012407014146447182,
            -0.284942090511322,
            -0.1326635330915451,
            -0.08634418249130249,
            0.11018547415733337,
            -0.09423547983169556,
            0.13697068393230438,
            -0.03515861555933952,
            0.014629656448960304,
            -0.21159854531288147,
            0.15693463385105133,
            -0.021487032994627953,
            0.032396819442510605,
            0.028369005769491196,
            0.08724819868803024,
            -0.13204769790172577,
            0.4691336452960968,
            0.1237262561917305,
            -0.06020978465676308,
            0.24037614464759827,
            0.05237792432308197,
            -0.10641840100288391,
            0.1820996105670929,
            0.6079273819923401,
            -0.4903985857963562,
            0.40744978189468384,
            0.43370547890663147,
            0.5092437863349915,
            0.0810965895652771,
            0.4670366048812866,
            -0.11692337691783905,
            -0.013550599105656147,
            -0.364605575799942,
            0.34470170736312866,
            -0.01755279302597046,
            0.30621764063835144,
            0.35784396529197693,
            0.42736300826072693,
            0.022546129301190376,
            0.08497388660907745,
            0.07601173967123032,
            0.0696730837225914,
            -0.21918217837810516,
            0.6236687898635864,
            -0.0793512761592865,
            -0.0668395534157753,
            0.010559543035924435,
            0.46621084213256836,
            -0.2632196843624115,
            -0.03991322219371796,
            0.1392994076013565,
            0.003188274335116148,
            -0.2655166983604431,
            -0.22143644094467163,
            -0.19157607853412628,
            -0.39395904541015625,
            -0.021266555413603783,
            0.08848410844802856,
            0.08152330666780472,
            0.013220606371760368,
            -0.18424198031425476,
            0.05234640836715698,
            0.05919161066412926,
            -0.16255362331867218,
            -0.04549096152186394,
            0.044437166303396225,
            0.21704396605491638,
            -0.5149197578430176,
            -0.6047705411911011,
            -0.8048356175422668,
            -0.36901935935020447,
            -0.19035962224006653,
            1.3252514600753784,
            0.19824109971523285,
            0.07630860805511475,
            -0.011165079660713673,
            0.011559495702385902,
            0.10554458945989609,
            -0.12820255756378174,
            0.29352235794067383,
            -0.06662449240684509,
            -0.15792854130268097,
            0.0345480814576149,
            -0.04881494492292404,
            -0.06912268698215485,
            -0.00013739474525209516,
            -0.1597173660993576,
            0.34323570132255554,
            0.34446775913238525,
            0.3795395493507385,
            0.017453864216804504,
            0.1102253794670105,
            0.04026523232460022,
            0.1107630804181099,
            0.10295291990041733,
            0.5326219201087952,
            0.1749747395515442,
            -0.2661803066730499,
            0.11752097308635712,
            0.08010037988424301,
            -0.5501991510391235,
            -0.059987347573041916,
            -0.04125819355249405,
            0.16356444358825684,
            0.020170046016573906,
            -0.08306766301393509,
            0.17777052521705627,
            0.4687126874923706,
            0.7723219394683838,
            0.7309747934341431,
            -0.019829215481877327,
            0.10945341736078262,
            0.06796073168516159,
            -0.12042505294084549,
            -0.26762208342552185,
            0.10846878588199615,
            0.013867417350411415,
            0.01077105849981308,
            -0.10193657130002975,
            -0.1757654845714569,
            -0.245382159948349,
            0.20442898571491241,
            0.10115985572338104,
            0.2514199912548065,
            -0.3793720304965973,
            -0.6926521062850952,
            -0.6686761975288391,
            -0.607191264629364,
            0.16187654435634613,
            -0.0073340232484042645,
            -0.09948350489139557,
            -0.21431320905685425,
            -0.12334707379341125,
            -0.15290899574756622,
            -0.026063116267323494,
            0.26553207635879517,
            0.18921764194965363,
            -0.1665697544813156,
            -0.00264778733253479,
            0.20274107158184052,
            0.3660823404788971,
            -0.35731416940689087,
            0.50246661901474,
            0.2781502604484558,
            0.1629776805639267,
            -0.03493829071521759,
            -0.16012291610240936,
            -0.08139592409133911,
            -0.1440155804157257,
            0.32721832394599915,
            0.1312151998281479,
            0.17874418199062347,
            0.06143738701939583,
            -0.05158458650112152,
            0.4802771806716919,
            -0.6857288479804993,
            0.08245638012886047,
            -0.09577414393424988,
            -0.12872998416423798,
            0.16155612468719482,
            0.24089869856834412,
            0.44030725955963135,
            -0.30994167923927307,
            0.12139064073562622,
            0.029418930411338806,
            -0.051672156900167465,
            -0.10080718994140625,
            -0.007311842869967222,
            -0.15189751982688904,
            -0.1559375822544098,
            0.2731820344924927,
            -0.03627878054976463,
            0.10538394004106522,
            0.15048423409461975,
            0.12981411814689636,
            0.0002639668236952275,
            0.05666665732860565,
            0.08173252642154694,
            -0.16131722927093506,
            -0.043261025100946426,
            -0.14845971763134003,
            -0.29335740208625793,
            0.039398159831762314,
            -0.02791670151054859,
            0.22897064685821533,
            -0.12178067117929459,
            -0.4062419831752777,
            0.3934949040412903,
            -0.05093907564878464,
            -0.06126153841614723,
            -0.07318481802940369,
            -0.08793392032384872,
            -0.01818496361374855,
            -0.24753189086914062,
            -0.30580347776412964,
            0.44876909255981445,
            0.5379880666732788,
            0.11587893962860107,
            0.2174995243549347,
            -0.035063862800598145,
            -0.0010147193679586053,
            -0.12281838059425354,
            -0.21301835775375366,
            0.3645245432853699,
            0.39920729398727417,
            -0.45564430952072144,
            0.03503882512450218,
            0.6949061155319214,
            -0.5742982625961304,
            0.38680514693260193,
            -0.018345845863223076,
            0.04529440030455589,
            -0.04468340799212456,
            -0.020917288959026337,
            0.2523670792579651,
            -0.4574699103832245,
            0.17178472876548767,
            -0.12147565186023712,
            0.043810319155454636,
            -0.17998050153255463,
            -0.09663069248199463,
            -0.03498067706823349,
            0.06111514940857887,
            -0.11410824209451675,
            0.18208050727844238,
            -0.09109053015708923,
            0.08489643037319183,
            0.15014725923538208,
            0.18506401777267456,
            -0.060843177139759064,
            -0.11932594329118729,
            0.11290943622589111,
            -0.23226700723171234,
            -0.2114422470331192,
            -0.36001038551330566,
            -0.29864072799682617,
            -0.05599717050790787,
            -0.21294310688972473,
            -0.1301364004611969,
            -0.4993196725845337,
            0.097460076212883,
            0.030209479853510857,
            0.35134217143058777,
            -0.9156147837638855,
            0.0173207875341177,
            -0.9142565131187439,
            0.13512593507766724,
            -0.1926516443490982,
            -0.2812888026237488,
            0.04805266484618187,
            0.5790673494338989,
            -0.28300249576568604,
            -0.10372477024793625,
            0.2964925169944763,
            0.16425621509552002,
            -0.25588271021842957,
            0.37744808197021484,
            -0.07827199995517731,
            -0.7785226702690125,
            -0.4873232841491699,
            -0.0240982286632061,
            -0.31732890009880066,
            -0.7271391749382019,
            -0.40648236870765686,
            -0.08706668019294739,
            -0.0876365602016449,
            -0.08107846975326538,
            0.049622420221567154,
            0.5049374103546143,
            -0.09109669923782349,
            -0.2958216369152069,
            0.23400314152240753,
            0.0727144181728363,
            -0.06163109838962555,
            -0.3235352635383606,
            -0.08323507010936737,
            0.06926267594099045,
            0.12505480647087097,
            0.06806384027004242,
            -0.1783592253923416,
            -0.09036792814731598,
            0.007250780239701271,
            0.07478834688663483,
            0.37752634286880493,
            0.10522382706403732,
            -0.3126020133495331,
            -0.339804470539093,
            -0.2922729253768921,
            -0.04612985998392105,
            0.06431944668292999,
            0.08483731746673584,
            0.12883307039737701,
            -0.015924949198961258,
            0.10468991845846176,
            -0.3394957184791565,
            0.23376204073429108,
            -0.22720825672149658,
            0.005506275221705437,
            -0.22926953434944153,
            -0.10148110240697861,
            0.06526672840118408,
            -0.2586720287799835,
            -0.32853958010673523,
            0.3440588712692261,
            -0.11197478324174881,
            -0.24647162854671478,
            0.32472386956214905,
            0.18955329060554504,
            0.22783295810222626,
            0.27004650235176086,
            0.06792190670967102,
            -0.25404539704322815,
            -0.0421239472925663,
            0.19141103327274323,
            -0.1919824779033661,
            0.024490466341376305,
            -0.45774775743484497,
            0.15080632269382477,
            -0.21607035398483276,
            -0.15506379306316376,
            -0.4421549439430237,
            -0.3747740089893341,
            -0.40712970495224,
            -0.01002188865095377,
            -0.18514835834503174,
            -0.052659012377262115,
            -0.009491002187132835,
            -0.04560127854347229,
            0.5816720724105835,
            -0.8684999942779541,
            -0.6074734330177307,
            -0.6023196578025818,
            0.09026342630386353,
            -0.8521136045455933,
            -0.677777886390686,
            -0.7927519083023071,
            0.05012498050928116,
            0.006620208732783794,
            0.09600439667701721,
            0.006934305187314749,
            -0.41822823882102966,
            0.5416979193687439,
            1.3451576232910156,
            0.6131516098976135,
            -0.1447380781173706,
            0.09429032355546951,
            0.06888633966445923,
            0.09988542646169662,
            -0.09572823345661163,
            0.09141702950000763,
            0.05828794091939926,
            -0.20784544944763184,
            -0.14200495183467865,
            0.014049896970391273,
            -0.081334687769413,
            0.15918458998203278,
            0.001768372836522758,
            0.009856577031314373,
            0.5256384611129761,
            0.49961280822753906,
            0.5969673991203308,
            0.37020817399024963,
            -0.07463415712118149,
            -0.0038648881018161774,
            0.014317997731268406,
            0.07256675511598587,
            0.27220791578292847,
            -0.14287996292114258,
            -0.18170645833015442,
            -0.021593274548649788,
            -0.15909305214881897,
            0.3259168863296509,
            -0.11064229905605316,
            0.12034989148378372,
            0.36166661977767944,
            -0.21680544316768646,
            -0.14505243301391602,
            -0.24518895149230957,
            -0.054052721709012985,
            0.11477477848529816,
            0.10946492105722427,
            -0.004644579254090786,
            -0.11873581260442734,
            0.00934956781566143,
            0.026955196633934975,
            -0.0947655513882637,
            -0.0432097427546978,
            0.2264525443315506,
            0.4585563540458679,
            -0.2117093950510025,
            0.06864829361438751,
            0.01817937195301056,
            -0.09130346775054932,
            -0.031736359000205994,
            -0.6623827219009399,
            0.07924489676952362,
            0.30316102504730225,
            0.06474705785512924,
            0.12052184343338013,
            -0.06878554821014404,
            0.048135798424482346,
            0.14442582428455353,
            -0.1945008486509323,
            0.16308918595314026,
            0.13180820643901825,
            -0.3005691170692444,
            -0.08318639546632767,
            -0.0371159091591835,
            -0.036223117262125015,
            0.27411049604415894,
            -0.008904200047254562,
            -0.21584218740463257,
            -0.22458405792713165,
            -0.2840893864631653,
            0.9380438327789307,
            -0.026274412870407104,
            -0.03674294427037239,
            -0.039288733154535294,
            0.20259428024291992,
            -0.2627299726009369,
            -0.03588804602622986,
            -0.09061996638774872,
            0.0026293552946299314,
            -1.1599351167678833,
            -0.0888570249080658,
            0.3020864427089691,
            0.10419020056724548,
            -0.2301473766565323,
            -0.2372182309627533,
            0.255910724401474,
            -0.9108321666717529,
            -0.17266617715358734,
            -0.21715109050273895,
            -0.4768790900707245,
            0.02349638193845749,
            0.06996935606002808,
            0.2306048572063446,
            -0.2647320032119751,
            -0.5029106140136719,
            0.18124276399612427,
            0.05404527485370636,
            -0.556660532951355,
            -0.20282964408397675,
            0.1787903904914856,
            -0.13809867203235626,
            0.012665750458836555,
            -0.007909105159342289,
            -0.11666542291641235,
            0.192016139626503,
            0.20280246436595917,
            0.04091315343976021,
            0.21129484474658966,
            0.06015581637620926,
            -0.1396055370569229,
            0.11048803478479385,
            -0.22130873799324036,
            0.10175041109323502,
            0.15478093922138214,
            -0.06699641793966293,
            0.16655825078487396,
            -0.5767931938171387,
            0.23376262187957764,
            -0.06561370939016342,
            0.08572515100240707,
            0.22690269351005554,
            -0.10714394599199295,
            0.2328615039587021,
            0.06609856337308884,
            0.15064586699008942,
            0.1398843675851822,
            9.159173350781202e-05,
            -0.006412057671695948,
            0.1231503039598465,
            0.2868848741054535,
            -0.37850138545036316,
            -0.4390513002872467,
            -0.10716433078050613,
            -0.16492293775081635,
            -0.17774488031864166,
            -0.006263014394789934,
            -0.15535981953144073,
            -0.15121980011463165,
            -0.022719506174325943,
            -0.3260766863822937,
            0.1365034133195877,
            0.7772430777549744,
            0.8306354880332947,
            0.8039601445198059,
            0.16534824669361115,
            -0.03939266875386238,
            -0.15611104667186737,
            0.21217003464698792,
            -0.022034769877791405,
            -0.025939559563994408,
            0.1058378517627716,
            -0.08505864441394806,
            0.08503950387239456,
            -0.0037705348804593086,
            -0.0026697057764977217,
            0.3492349088191986,
            0.15157155692577362,
            -0.3159380555152893,
            -0.10824967920780182,
            -0.04872310906648636,
            0.19715555012226105,
            -0.2658633291721344,
            -0.06968845427036285,
            0.009916169568896294,
            0.18593478202819824,
            -0.038871243596076965,
            -0.3416462540626526,
            0.1855567842721939,
            0.21629339456558228,
            -0.10832708328962326,
            -0.04190235957503319,
            0.2388715296983719,
            -0.11624565720558167,
            -0.10361404716968536,
            0.0536813959479332,
            0.12528158724308014,
            -0.262010782957077,
            0.05081893876194954,
            0.29551735520362854,
            0.05958620831370354,
            -0.01989975944161415,
            -0.19261345267295837,
            0.01736867055296898,
            -0.07923264801502228,
            -0.4404444694519043,
            0.3125889301300049,
            0.10095971822738647,
            0.17173698544502258,
            0.23782190680503845,
            -0.07170403748750687,
            0.013639729470014572,
            0.19007621705532074,
            0.1901141107082367,
            -0.052342064678668976,
            -0.9643150568008423,
            -0.12307217717170715,
            -0.21010802686214447,
            -0.5640560984611511,
            0.010125457309186459,
            0.1314179003238678,
            0.10721258819103241,
            -0.24371789395809174,
            -0.5925355553627014,
            0.49424877762794495,
            -0.03528435528278351,
            -0.21386614441871643,
            1.4134130477905273,
            -0.2751445770263672,
            0.007012579124420881,
            -0.023824317380785942,
            0.004113825503736734,
            -0.06332013010978699,
            0.286077082157135,
            0.04896686226129532,
            0.31404414772987366,
            0.15028351545333862,
            0.003490754636004567,
            0.0802399218082428,
            -0.230818971991539,
            -0.022719932720065117,
            0.26083019375801086,
            -0.2885863184928894,
            0.07537354528903961,
            0.12282905727624893,
            -0.38638314604759216,
            0.1752759963274002,
            -0.07370781153440475,
            0.13994526863098145,
            0.13313405215740204,
            0.2851952016353607,
            0.905279278755188,
            0.34521353244781494,
            -0.36453402042388916,
            0.46360254287719727,
            -0.002040385501459241,
            -0.003476516343653202,
            -0.19058215618133545,
            0.27096763253211975,
            0.08722586184740067,
            0.03202880546450615,
            -0.06164764240384102,
            0.011678489856421947,
            0.21189850568771362,
            -0.40100231766700745,
            0.022941868752241135,
            0.0394427627325058,
            0.0675845518708229,
            -0.22503064572811127,
            0.14730903506278992,
            0.24842065572738647,
            -0.34360530972480774,
            0.21811245381832123,
            -0.05238509923219681,
            -0.008763357996940613,
            -0.1336073875427246,
            0.15671105682849884,
            0.4475333094596863,
            -0.5187726616859436,
            0.005388418212532997,
            -0.07889139652252197,
            0.10729073733091354,
            0.22381159663200378,
            0.07434546202421188,
            -0.0843898206949234,
            0.13574494421482086,
            0.01853088103234768,
            -0.41072791814804077,
            0.40448933839797974,
            -0.8231801986694336,
            -0.4780847728252411,
            -0.11237931996583939,
            0.012673617340624332,
            -0.04672158136963844,
            -0.23933981359004974,
            0.01667654886841774,
            -0.14681674540042877,
            0.077765092253685,
            0.15309257805347443,
            0.03254099190235138,
            -0.015896232798695564,
            -0.029608771204948425,
            -0.288953959941864,
            -0.32651081681251526,
            0.06307528167963028,
            -0.09873636066913605,
            0.08938323706388474,
            0.27018269896507263,
            0.018129458650946617,
            -0.050469521433115005,
            -0.17951229214668274,
            0.02319747768342495,
            0.06737810373306274,
            0.2690926194190979,
            -0.10778623819351196,
            -0.04740763455629349,
            0.30407941341400146,
            -0.08746829628944397,
            -0.2184152454137802,
            0.14826175570487976,
            0.18092381954193115,
            0.07989493757486343,
            -0.1297195851802826,
        ],
        b1: &[
            -0.5191351175308228,
            0.6662623882293701,
            0.610133707523346,
            -1.1585999727249146,
            0.6903770565986633,
            0.4241520166397095,
            0.754120945930481,
            -0.7599878907203674,
            -0.3445088267326355,
            0.9317805767059326,
            -0.2041703462600708,
            0.17219330370426178,
            1.1566059589385986,
            -0.41121166944503784,
            -0.6977726817131042,
            0.7911778092384338,
            0.6611397862434387,
            -0.6938921213150024,
            -0.03742314130067825,
            -0.16022440791130066,
            0.11257349699735641,
            0.07743008434772491,
            -0.6286312937736511,
            0.544836699962616,
            -0.15634237229824066,
            -0.5572881698608398,
            0.9681645035743713,
            -0.7440500855445862,
            0.10288882255554199,
            0.9043763875961304,
            0.14654643833637238,
            -0.024421239271759987,
            -0.4609592854976654,
            0.917902410030365,
            0.2704138457775116,
            0.6341348886489868,
            0.034945350140333176,
            0.5565919876098633,
            0.1746397614479065,
            -0.6341800093650818,
        ],
        w2: &[
            0.07229708135128021,
            0.2507615387439728,
            0.16330942511558533,
            0.5204483866691589,
            0.24313874542713165,
            -0.5474504232406616,
            -0.28332123160362244,
            -0.2225571572780609,
            -0.1043124571442604,
            0.06595291197299957,
            0.21239061653614044,
            -0.14725270867347717,
            -0.8134568333625793,
            0.07381946593523026,
            -0.24956485629081726,
            0.4919748604297638,
            0.2962062954902649,
            0.3260444402694702,
            0.07504145801067352,
            -0.053836897015571594,
            0.2531750500202179,
            -0.04855559393763542,
            -0.5578967332839966,
            -0.5225025415420532,
            0.055111128836870193,
            -0.21510563790798187,
            0.5871708989143372,
            -0.19132649898529053,
            0.007392226252704859,
            -0.298953115940094,
            0.16707110404968262,
            -0.04706822335720062,
            0.07302752882242203,
            -0.08172990381717682,
            0.23955324292182922,
            -0.15824700891971588,
            -0.3977665305137634,
            0.5267415642738342,
            -0.11258449405431747,
            -0.3343915045261383,
            0.23245088756084442,
            -0.7491211891174316,
            -0.6333310604095459,
            0.0232061930000782,
            -0.2315434217453003,
            -0.3745144307613373,
            -0.03209906071424484,
            -0.4041699469089508,
            0.041345734149217606,
            0.19181972742080688,
            -0.2760458290576935,
            -0.07779327034950256,
            0.24569696187973022,
            -0.18802686035633087,
            -0.6544056534767151,
            0.556419849395752,
            0.11468080431222916,
            -0.32528090476989746,
            0.38538315892219543,
            0.33702555298805237,
            -0.442532479763031,
            0.00750756124034524,
            -0.45737770199775696,
            -0.06860284507274628,
            -0.4411284625530243,
            -0.23914210498332977,
            0.06834587454795837,
            0.14571186900138855,
            0.6887655258178711,
            0.5702284574508667,
            0.3135473430156708,
            -0.3360161781311035,
            -0.5353860259056091,
            0.06292688101530075,
            0.735708475112915,
            0.7143703103065491,
            -0.3693147897720337,
            0.525284469127655,
            0.39448651671409607,
            -0.09941494464874268,
            0.09564384818077087,
            0.5881519913673401,
            0.05619557946920395,
            0.4508857727050781,
            -0.2834583520889282,
            -0.16902177035808563,
            0.24799591302871704,
            -0.182522252202034,
            0.0468696765601635,
            0.14808374643325806,
            -0.013205822557210922,
            -0.12705814838409424,
            0.0614711195230484,
            0.14103399217128754,
            -0.2599405348300934,
            0.028414186090230942,
            -0.2865449786186218,
            -0.08163938671350479,
            0.13120926916599274,
            0.17990124225616455,
            -0.16350798308849335,
            -0.09809352457523346,
            -0.013590727932751179,
            -0.17736633121967316,
            0.05107983574271202,
            0.3411618173122406,
            -0.2772451341152191,
            0.32397109270095825,
            0.046551186591386795,
            0.13246433436870575,
            0.05053735896945,
            0.24057962000370026,
            -0.04693610221147537,
            -0.1650579869747162,
            0.1331019252538681,
            0.09457181394100189,
            -0.16547952592372894,
            -0.09469929337501526,
            0.30049434304237366,
            0.12664170563220978,
            -0.013082812540233135,
            0.390655517578125,
            0.6400918364524841,
            -0.0010483618825674057,
            -0.03533017635345459,
            0.16345657408237457,
            0.05697643384337425,
            0.1748565286397934,
            0.0036667422391474247,
            -0.05557025969028473,
            0.016822226345539093,
            -0.12541711330413818,
            -0.4695605933666229,
            0.008447905071079731,
            0.16371716558933258,
            -0.1481284201145172,
            -0.10916673392057419,
            0.1754710078239441,
            -0.05557332932949066,
            0.17406205832958221,
            0.03734235838055611,
            -0.0014076621737331152,
            0.16409075260162354,
            -0.0339696928858757,
            0.11525241285562515,
            0.11995170265436172,
            -0.39020177721977234,
            0.01936984248459339,
            -0.14390763640403748,
            -0.18344464898109436,
            -0.08675119280815125,
            0.19569827616214752,
            0.48439380526542664,
            -0.232485830783844,
            -0.004231136757880449,
            0.15202505886554718,
            0.01103641465306282,
            -0.1192987710237503,
            -0.17487019300460815,
            0.27336806058883667,
            -0.5894135236740112,
            -0.03331466019153595,
            0.21942859888076782,
            0.30420297384262085,
            0.2666693329811096,
            0.4481956958770752,
            -0.020630693063139915,
            0.8494743704795837,
            0.5691520571708679,
            0.5711295008659363,
            0.00404204148799181,
            0.5070351958274841,
            0.09074786305427551,
            0.15874768793582916,
            0.7676622271537781,
            0.6556511521339417,
            0.1220490038394928,
            0.7263025641441345,
            -0.07173441350460052,
            0.14413252472877502,
            0.49090006947517395,
            -0.3324028253555298,
            0.45898303389549255,
            0.5931536555290222,
            0.19021296501159668,
            -0.7473744750022888,
            -0.834629476070404,
            -0.1385311633348465,
            -0.05174582824110985,
            0.018871335312724113,
            -0.42817312479019165,
            0.20682017505168915,
            0.016382897272706032,
            -0.6684255599975586,
            0.3525462746620178,
            -0.42306870222091675,
            -0.0817568302154541,
            0.3572525084018707,
            -0.23954586684703827,
            -0.4869120717048645,
            0.016070470213890076,
            0.5639761686325073,
            0.17797298729419708,
            0.2919785678386688,
            -0.3837592601776123,
            0.13362792134284973,
            0.09925093501806259,
            0.12642522156238556,
            0.09690988808870316,
            -0.08732952922582626,
            0.24605968594551086,
            -0.3894798457622528,
            -0.174991175532341,
            0.2573908269405365,
            0.22514064610004425,
            -0.24535547196865082,
            -0.2993263006210327,
            0.24350187182426453,
            0.03375721350312233,
            0.16244018077850342,
            -0.16753582656383514,
            -0.08621060848236084,
            0.1272309273481369,
            0.007472787983715534,
            0.20557984709739685,
            0.1578531116247177,
            -0.5838948488235474,
            0.08410368114709854,
            -0.2831973135471344,
            -0.28126293420791626,
            -0.08023717254400253,
            0.5180243849754333,
            0.2208152413368225,
            -0.3613019585609436,
            -0.06204051896929741,
            -0.13526616990566254,
            0.09384715557098389,
            -0.27185022830963135,
            -0.05938927084207535,
            0.284194678068161,
            0.04228530079126358,
            0.5006632208824158,
            0.6578063368797302,
            -0.07014274597167969,
            -0.3233219087123871,
            -0.01618030108511448,
            0.2888641357421875,
            -0.08185673505067825,
            -0.17689819633960724,
            -0.2994365096092224,
            0.016244128346443176,
            0.02359011210501194,
            0.1367129534482956,
            -0.01653127372264862,
            -0.09157261997461319,
            -0.3516620397567749,
            -0.09030301123857498,
            -0.07817772775888443,
            0.17603041231632233,
            -0.01393663790076971,
            -0.029468189924955368,
            -0.0814921036362648,
            -0.12077502906322479,
            -0.10759524255990982,
            -0.0750858411192894,
            0.2511105239391327,
            -0.20753242075443268,
            -0.05136517807841301,
            -0.024205535650253296,
            -0.3384825587272644,
            0.020664114505052567,
            0.11200296878814697,
            0.08333364874124527,
            -0.24177855253219604,
            -0.07010341435670853,
            0.020779477432370186,
            -0.20839253067970276,
            -0.0016562794335186481,
            0.023504814133048058,
            0.3570723235607147,
            -0.30022287368774414,
            -0.3554439842700958,
            -0.027536675333976746,
            -1.1282703876495361,
            -0.08706718683242798,
            0.0742080882191658,
            0.18080361187458038,
            -0.02274167723953724,
            -0.704075813293457,
            -0.9722687602043152,
            0.1188407614827156,
            -0.029379399493336678,
            0.8019110560417175,
            -0.34810709953308105,
            0.04902748018503189,
            -0.7494327425956726,
            0.5064789056777954,
            -0.11681736260652542,
            0.2257058471441269,
            -0.4354608356952667,
            0.3252757489681244,
            -0.1591869592666626,
            -0.5933760404586792,
            -0.5259361863136292,
            0.22252318263053894,
            0.30712220072746277,
            0.29186123609542847,
            -0.7899709343910217,
            0.3455640971660614,
            -0.8577526807785034,
            0.19282177090644836,
            0.29095181822776794,
            -0.3287593424320221,
            0.0454283282160759,
            -0.5983009338378906,
            -0.08342050760984421,
            -0.8976981043815613,
            0.10165920853614807,
            0.13396088778972626,
            0.2290259599685669,
            0.02499830722808838,
            0.7539560794830322,
            0.1477266401052475,
            0.3097168207168579,
            -0.3993585705757141,
            0.0817292109131813,
            0.038499560207128525,
            0.048502497375011444,
            0.10572300106287003,
            -0.17650842666625977,
            0.30300378799438477,
            -0.3586488962173462,
            -0.09699319303035736,
            0.28980425000190735,
            0.1152607873082161,
            -0.30993735790252686,
            -0.3226162791252136,
            0.2082981914281845,
            0.08206543326377869,
            0.09643732011318207,
            -0.09098457545042038,
            -0.09191355854272842,
            0.04240717366337776,
            -0.08706614375114441,
            0.3119218051433563,
            0.24132680892944336,
            -0.5137639045715332,
            0.03463784605264664,
            -0.29585450887680054,
            -0.3583862781524658,
            -0.09919128566980362,
            0.5263358950614929,
            0.19875890016555786,
            -0.4007430374622345,
            -0.044145308434963226,
            -0.24342355132102966,
            0.16471655666828156,
            -0.25901785492897034,
            0.012997856363654137,
            0.3298455476760864,
            -0.23130790889263153,
            0.4484388828277588,
            0.35633817315101624,
            0.26454973220825195,
            0.15214529633522034,
            -0.12443697452545166,
            -0.405061811208725,
            0.17236965894699097,
            -0.36522531509399414,
            -0.074102483689785,
            0.09564346820116043,
            -0.26696014404296875,
            -0.7053405046463013,
            -0.4750596880912781,
            0.2850874066352844,
            -0.42413032054901123,
            0.3273111581802368,
            0.013779409229755402,
            -0.7248923182487488,
            -0.49210208654403687,
            0.5041399002075195,
            -0.14308881759643555,
            0.629442036151886,
            -0.8470776677131653,
            0.36798736453056335,
            -0.17092065513134003,
            0.5437707304954529,
            -0.26034078001976013,
            -0.4502609074115753,
            0.2898317873477936,
            -0.3266198933124542,
            0.1681036651134491,
            0.6064534783363342,
            0.48974573612213135,
            -0.3461318910121918,
            -0.36192092299461365,
            0.3675844371318817,
            -0.731248676776886,
            -0.21227769553661346,
            -0.4246974289417267,
            0.17397946119308472,
            -0.3643985986709595,
            0.205714613199234,
            0.629838228225708,
            0.10543780773878098,
            0.010421440936625004,
            0.6487590670585632,
            -0.685522198677063,
            0.010746597312390804,
            0.371294766664505,
            -0.68584144115448,
            0.69797283411026,
            -0.39890381693840027,
            0.2957388460636139,
            0.10036955028772354,
            -0.31620606780052185,
            -0.5876231789588928,
            -0.5783882737159729,
            -0.4745366871356964,
            0.20689401030540466,
            -0.2748165428638458,
            0.34110450744628906,
            0.817054033279419,
            0.8686729073524475,
            -0.6139298677444458,
            -0.19506172835826874,
            -0.03448706120252609,
            0.635860025882721,
            -0.38243091106414795,
            0.8843176960945129,
            0.08922040462493896,
            -0.8030375242233276,
            0.01003911904990673,
            0.49227485060691833,
            0.02043282799422741,
            -0.1812848448753357,
            0.8425045609474182,
            -0.18937410414218903,
            0.2360723465681076,
            -0.0486280657351017,
            0.1306903064250946,
            0.44811540842056274,
            -0.09772484004497528,
            0.3676001727581024,
            -0.10864408314228058,
            0.10239739716053009,
            0.26535993814468384,
            -0.19465096294879913,
            -0.05268852412700653,
            0.013907784596085548,
            0.11859709769487381,
            -0.008244873955845833,
            -0.12678827345371246,
            0.16795198619365692,
            0.09826375544071198,
            -0.13783332705497742,
            -0.32474759221076965,
            -0.018496913835406303,
            -0.12179988622665405,
            0.22411927580833435,
            -0.10514824092388153,
            0.038778163492679596,
            0.33486974239349365,
            0.31644245982170105,
            0.05365574359893799,
            0.24912847578525543,
            -0.31889432668685913,
            0.24240325391292572,
            -0.19231560826301575,
            0.18558776378631592,
            -0.022984078153967857,
            0.11608095467090607,
            0.15418484807014465,
            -0.14139854907989502,
            0.01758008636534214,
            -0.12027571350336075,
            0.2522386610507965,
            -0.2922046184539795,
            0.049236513674259186,
            0.19894357025623322,
            0.39957553148269653,
            0.3346879780292511,
            0.3187335133552551,
            0.4501717686653137,
            -0.8946970701217651,
            0.18189306557178497,
            -0.08766483515501022,
            0.2782788574695587,
            0.3587392270565033,
            -0.33824455738067627,
            0.6033147573471069,
            -0.6243746876716614,
            -0.6177958250045776,
            0.6629742383956909,
            0.4856598377227783,
            -0.3099081814289093,
            -0.678487241268158,
            0.47894829511642456,
            -0.03139176964759827,
            0.16848357021808624,
            -0.5739434957504272,
            -0.16708984971046448,
            0.11146949231624603,
            0.090438611805439,
            0.4812713861465454,
            0.5129365921020508,
            -0.7324693202972412,
            0.26365718245506287,
            -0.4824923276901245,
            -0.5487518310546875,
            -0.20128659904003143,
            0.5759150385856628,
            0.3504473567008972,
            -0.36605504155158997,
            -0.4257725477218628,
            -0.25298258662223816,
            0.512897789478302,
            -0.4181336462497711,
            -0.516604483127594,
            0.37244912981987,
        ],
        b2: &[
            0.14859354496002197,
            -0.018167857080698013,
            -0.3407953083515167,
            -0.14991576969623566,
            0.4018653333187103,
            -0.2384500652551651,
            -0.4047893285751343,
            0.15702210366725922,
            -0.3152092695236206,
            0.29297566413879395,
            0.26403820514678955,
            -0.2573520541191101,
            -0.11290331929922104,
        ],
    }
}


================================================
FILE: linalg/src/arm64/cortex_a72.rs
================================================
use crate::frame::mmm::cost_model::CostModel;
pub fn models() -> Vec<(&'static str, CostModel<'static>)> {
    vec![]
}


================================================
FILE: linalg/src/arm64/cortex_a73.rs
================================================
use crate::frame::mmm::cost_model::CostModel;
pub fn models() -> Vec<(&'static str, CostModel<'static>)> {
    vec![]
}


================================================
FILE: linalg/src/arm64.rs
================================================
#![allow(clippy::excessive_precision)]
#[cfg(any(target_os = "macos", all(target_os = "ios", feature = "apple-amx-ios")))]
mod apple_amx;
mod arm64simd;
pub mod cortex_a53;
mod cortex_a55;
//mod cortex_a72;
//mod cortex_a73;
pub use arm64simd::*;

#[cfg(not(feature = "no_fp16"))]
pub mod arm64fp16;
#[cfg(not(feature = "no_fp16"))]
pub use arm64fp16::*;

use crate::f16;
use crate::{BinOp, DatumType, LinalgRegistry, Ops};

use crate::frame::by_scalar::ByScalarKer;
use crate::frame::element_wise::ElementWiseKer;
use crate::frame::reduce::{MapReduceKer, ReduceKer};
use crate::frame::unicast::UnicastKer;

// https://en.wikipedia.org/wiki/Comparison_of_ARMv8-A_cores
const PART_A53: &str = "0xd03";
const PART_A55: &str = "0xd05";
#[allow(dead_code)]
const PART_A72: &str = "0xd08";
#[allow(dead_code)]
const PART_A73: &str = "0xd09";
#[allow(dead_code)]
const PART_A75: &str = "0xd0a";
#[allow(dead_code)]
const PART_NEOVERSE_N1: &str = "0xd0c";
#[allow(dead_code)]
const PART_NEOVERSE_N2: &str = "0xd49";
#[allow(dead_code)]
const PART_NEOVERSE_N3: &str = "0xd8e";
#[allow(dead_code)]
const PART_NEOVERSE_V1: &str = "0xd40";
#[allow(dead_code)]
const PART_NEOVERSE_V2: &str = "0xd4f";
#[allow(dead_code)]
const PART_NEOVERSE_V3: &str = "0xd83";

fn max_cpuid() -> std::io::Result<String> {
    let cpu_info = std::fs::read_to_string("/proc/cpuinfo")?;
    let max = cpu_info
        .lines()
        .filter(|line| line.starts_with("CPU part"))
        .map(|line| line.split_whitespace().last().unwrap_or(""))
        .max();
    Ok(max.unwrap_or("").to_string())
}

lazy_static::lazy_static! {
    static ref KIND: Kind = Kind::choose();

    static ref CPU_FEATURES: Vec<String> = {
        #[cfg(test)] crate::setup_test_logger();
        let Ok(cpu_info) = std::fs::read_to_string("/proc/cpuinfo") else {
            log::warn!("Could not read /proc/cpuinfo. CPU Features detection may be impaired.");
            return vec!();
        };
        if let Some(line) = cpu_info
            .lines()
                .find(|line| line.starts_with("Features")) {
                    line.split_once(':').unwrap().1.split_whitespace().map(|s| s.to_string()).collect()
                } else {
                    log::warn!("Could not find \"Features  :\" lines in /proc/cpuinfo. CPU Features detection may be impaired.");
                    vec!()
        }
    };

    static ref HAS_FP16: bool = {
        CPU_FEATURES.iter().any(|s| &**s == "asimdhp")
    };
}

#[cfg(any(target_os = "macos", target_os = "ios"))]
fn apple_string_from_c_bytes(buf: &[u8]) -> String {
    use std::ffi::CStr;

    CStr::from_bytes_until_nul(buf)
        .ok()
        .map(|s| s.to_string_lossy().into_owned())
        .unwrap_or_default()
}

#[cfg(any(target_os = "macos", target_os = "ios"))]
fn apple_get_syscall(key: &str) -> String {
    use std::ffi::{CString, c_char, c_int, c_void};
    use std::ptr::null_mut;

    unsafe extern "C" {
        fn sysctlbyname(
            name: *const c_char,
            oldp: *mut c_void,
            oldlenp: *mut usize,
            newp: *mut c_void,
            newlen: usize,
        ) -> c_int;
    }

    let Ok(name) = CString::new(key) else {
        return String::new();
    };

    unsafe {
        let mut len_needed: usize = 0;
        if sysctlbyname(name.as_ptr(), null_mut(), &mut len_needed, null_mut(), 0) != 0 {
            return String::new();
        }

        let mut buf = vec![0u8; len_needed.saturating_add(1)];
        let mut len: usize = buf.len();
        if sysctlbyname(name.as_ptr(), buf.as_mut_ptr() as _, &mut len, null_mut(), 0) != 0 {
            return String::new();
        }

        buf.truncate(len.min(buf.len()));
        if buf.last().copied() != Some(0) {
            buf.push(0);
        }

        apple_string_from_c_bytes(&buf)
    }
}

#[cfg(all(test, any(target_os = "macos", target_os = "ios")))]
mod tests {
    use super::*;

    #[test]
    fn apple_string_from_c_bytes_returns_empty_without_nul() {
        assert_eq!(apple_string_from_c_bytes(b"hello"), "");
    }

    #[test]
    fn apple_string_from_c_bytes_stops_at_first_nul() {
        assert_eq!(apple_string_from_c_bytes(b"hello\0world\0"), "hello");
    }

    #[test]
    fn apple_get_syscall_does_not_panic() {
        let _ = apple_get_syscall("machdep.cpu.brand_string");
    }
}

#[cfg(target_os = "macos")]
pub fn has_amx() -> bool {
    !apple_get_syscall("machdep.cpu.brand_string").contains("(Virtual)")
}

#[cfg(target_os = "ios")]
lazy_static::lazy_static! {
    static ref IPHONE_MODEL_MAJOR:Option<usize> = {
        let version = apple_get_syscall("hw.machine");
        let Some((major, _)) = version.trim_start_matches("iPhone").split_once(",") else { return None };
        major.parse::<usize>().ok()
    };
}

#[cfg(all(target_os = "ios", feature = "apple-amx-ios"))]
fn has_amx() -> bool {
    // iPhone12,1 is the one branded "iPhone 11", with Apple A13 bionic, first CPU featuring amx
    IPHONE_MODEL_MAJOR.map(|it| it >= 12).unwrap_or(false)
}

#[inline]
#[cfg(target_os = "ios")]
pub fn has_fp16() -> bool {
    // iPhone10,1 is the one branded "iPhone 8", with Apple A11 bionic, first CPU featuring fp16
    IPHONE_MODEL_MAJOR.map(|it| it >= 10).unwrap_or(false)
}

#[inline]
#[cfg(not(target_os = "ios"))]
pub fn has_fp16() -> bool {
    cfg!(target_os = "macos")
        || cfg!(feature_cpu = "fp16")
        || *KIND == Kind::CortexA55
        || *KIND == Kind::CortexA75
        || *HAS_FP16
}

#[target_feature(enable = "fp16")]
#[inline]
pub unsafe fn add_f16(a: f16, b: f16) -> f16 {
    unsafe {
        let result: u16;
        std::arch::asm!(
        "fadd {0:h}, {1:h}, {2:h}",
        lateout(vreg) result,
        in(vreg) a.to_bits(),
        in(vreg) b.to_bits(),
        options(pure, nomem, nostack, preserves_flags));
        f16::from_bits(result)
    }
}

#[target_feature(enable = "fp16")]
#[inline]
pub unsafe fn mul_f16(a: f16, b: f16) -> f16 {
    unsafe {
        let result: u16;
        std::arch::asm!(
        "fmul {0:h}, {1:h}, {2:h}",
        lateout(vreg) result,
        in(vreg) a.to_bits(),
        in(vreg) b.to_bits(),
        options(pure, nomem, nostack, preserves_flags));
        f16::from_bits(result)
    }
}

#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum Kind {
    Generic,
    AppleM,
    Neoverse,
    CortexA53,
    CortexA55,
    CortexA72,
    CortexA73,
    CortexA75,
}

impl Kind {
    pub fn choose() -> Kind {
        #[cfg(test)]
        crate::setup_test_logger();
        let kind = if let Ok(kind) = std::env::var("TRACT_CPU_AARCH64_KIND") {
            log::info!("CPU kind forced with TRACT_CPU_AARCH64_KIND: {}", kind);
            let kind = kind.to_lowercase();
            if kind.contains("a53") {
                Kind::CortexA53
            } else if kind.contains("a55") {
                Kind::CortexA55
            } else if kind.contains("a72") {
                Kind::CortexA72
            } else if kind.contains("a73") {
                Kind::CortexA73
            } else if kind.contains("a75") {
                Kind::CortexA75
            } else if kind.contains("neoverse") {
                Kind::Neoverse
            } else if kind.contains("applem") {
                Kind::AppleM
            } else {
                Kind::Generic
            }
        } else if cfg!(target_os = "macos") {
            Kind::AppleM
        } else {
            let part = if let Ok(part) = std::env::var("TRACT_CPU_AARCH64_OVERRIDE_CPU_PART") {
                log::info!("CPU part forced with TRACT_CPU_AARCH64_OVERRIDE_CPU_PART: {}", part);
                part
            } else if cfg!(target_os = "linux") {
                let part = max_cpuid().unwrap_or_else(|_| "0x00".to_string());
                log::info!("CPU part auto detected: {}", part);
                part
            } else {
                log::info!("Unknown CPU part");
                "0x00".to_string()
            };
            match &*part {
                PART_A53 => Kind::CortexA53,
                PART_A55 => Kind::CortexA55,
                PART_A72 => Kind::CortexA72,
                PART_A73 => Kind::CortexA73,
                PART_A75 => Kind::CortexA75,
                PART_NEOVERSE_N1 | PART_NEOVERSE_N2 | PART_NEOVERSE_N3 | PART_NEOVERSE_V1
                | PART_NEOVERSE_V2 | PART_NEOVERSE_V3 => Kind::Neoverse,
                _ => Kind::Generic,
            }
        };
        log::info!("CPU optimisation: {:?}", kind);
        kind
    }
}

pub(crate) fn register_all_unicast(registry: &mut LinalgRegistry) {
    registry
        .insert((BinOp::Mul, DatumType::F32), Box::new(|| arm64simd_unicast_mul_f32_16n::bin()));
    registry
        .insert((BinOp::Mul, DatumType::F16), Box::new(|| arm64fp16_unicast_mul_f16_32n::bin()));
    registry
        .insert((BinOp::Add, DatumType::F32), Box::new(|| arm64simd_unicast_add_f32_16n::bin()));
    registry
        .insert((BinOp::Add, DatumType::F16), Box::new(|| arm64fp16_unicast_add_f16_32n::bin()));
    registry
        .insert((BinOp::Sub, DatumType::F32), Box::new(|| arm64simd_unicast_sub_f32_16n::bin()));
    registry
        .insert((BinOp::Sub, DatumType::F16), Box::new(|| arm64fp16_unicast_sub_f16_32n::bin()));
    registry
        .insert((BinOp::SubF, DatumType::F32), Box::new(|| arm64simd_unicast_subf_f32_16n::bin()));
    registry
        .insert((BinOp::SubF, DatumType::F16), Box::new(|| arm64fp16_unicast_subf_f16_32n::bin()));
    registry
        .insert((BinOp::Min, DatumType::F32), Box::new(|| arm64simd_unicast_min_f32_16n::bin()));
    registry
        .insert((BinOp::Min, DatumType::F16), Box::new(|| arm64fp16_unicast_min_f16_32n::bin()));
    registry
        .insert((BinOp::Max, DatumType::F32), Box::new(|| arm64simd_unicast_max_f32_16n::bin()));
    registry
        .insert((BinOp::Max, DatumType::F16), Box::new(|| arm64fp16_unicast_max_f16_32n::bin()));
}

pub(crate) fn register_all_by_scalar(registry: &mut LinalgRegistry) {
    registry
        .insert((BinOp::Mul, DatumType::F32), Box::new(|| arm64simd_mul_by_scalar_f32_16n::bin()));
    registry
        .insert((BinOp::Mul, DatumType::F16), Box::new(|| arm64fp16_mul_by_scalar_f16_32n::bin()));
    registry
        .insert((BinOp::Add, DatumType::F32), Box::new(|| arm64simd_add_by_scalar_f32_16n::bin()));
    registry
        .insert((BinOp::Add, DatumType::F16), Box::new(|| arm64fp16_add_by_scalar_f16_32n::bin()));
    registry
        .insert((BinOp::Sub, DatumType::F32), Box::new(|| arm64simd_sub_by_scalar_f32_16n::bin()));
    registry
        .insert((BinOp::Sub, DatumType::F16), Box::new(|| arm64fp16_sub_by_scalar_f16_32n::bin()));
    registry.insert(
        (BinOp::SubF, DatumType::F32),
        Box::new(|| arm64simd_subf_by_scalar_f32_16n::bin()),
    );
    registry.insert(
        (BinOp::SubF, DatumType::F16),
        Box::new(|| arm64fp16_subf_by_scalar_f16_32n::bin()),
    );
    registry
        .insert((BinOp::Min, DatumType::F32), Box::new(|| arm64simd_min_by_scalar_f32_16n::bin()));
    registry
        .insert((BinOp::Min, DatumType::F16), Box::new(|| arm64fp16_min_by_scalar_f16_32n::bin()));
    registry
        .insert((BinOp::Max, DatumType::F32), Box::new(|| arm64simd_max_by_scalar_f32_16n::bin()));
    registry
        .insert((BinOp::Max, DatumType::F16), Box::new(|| arm64fp16_max_by_scalar_f16_32n::bin()));
}

pub fn plug(ops: &mut Ops) {
    arm64simd::plug(ops);

    #[cfg(not(feature = "no_fp16"))]
    if has_fp16() {
        arm64fp16::plug(ops);
    }

    ops.qmmm_i32 = Box::new(|_, _, _| arm64simd_mmm_i32_8x8.mmm());
    ops.qmmv_i32 = Box::new(|_, _| arm64simd_mmm_i32_64x1.mmm());
    ops.mmv_f32 = match *KIND {
        Kind::CortexA53 => Box::new(|_, _| arm64simd_mmm_f32_64x1_a53.mmm()),
        Kind::CortexA55 => Box::new(|_, _| arm64simd_mmm_f32_64x1_a55.mmm()),
        _ => Box::new(|_, _| arm64simd_mmm_f32_64x1_gen.mmm()),
    };
    let model = match *KIND {
        Kind::CortexA53 => Some(cortex_a53::model()),
        Kind::CortexA55 => Some(cortex_a55::model()),
        _ => None,
    };
    let impls = ops.mmm_impls.clone();
    ops.mmm_f32 = if let Some(model) = model {
        Box::new(move |m, k, n| model.pick(&impls, m, k, n))
    } else {
        Box::new(move |_, _, n| {
            if n.unwrap_or(8) < 8 {
                arm64simd_mmm_f32_16x4_gen.mmm()
            } else {
                arm64simd_mmm_f32_8x8_gen.mmm()
            }
        })
    };
    #[cfg(feature = "no_fp16")]
    if has_fp16() {
        log::warn!(
            "This is a build with fp16 disabled, while your platform CPU seems to support it."
        );
    }
    #[cfg(not(feature = "no_fp16"))]
    if has_fp16() {
        if *KIND == Kind::CortexA55 {
            log::info!("Cortex-A55 mmm_f16 and mmv_f16 activated");
            ops.mmm_f16 = Box::new(|_, _, n| {
                use tract_data::internal::DimLike;
                if n.unwrap_or(1024).divceil(4) * 4 < n.unwrap_or(1024).divceil(8) * 8 {
                    arm64fp16_mmm_f16_32x4_a55.mmm()
                } else {
                    arm64fp16_mmm_f16_16x8_a55.mmm()
                }
            });
            ops.mmv_f16 = Box::new(|_, _| arm64fp16_mmm_f16_128x1_a55.mmm());
        } else {
            log::info!("ARMv8.2 mmm_f16 and mmv_f16 activated");
            ops.mmm_f16 = Box::new(|_, _, n| {
                use tract_data::internal::DimLike;
                if n.unwrap_or(1024).divceil(4) * 4 < n.unwrap_or(1024).divceil(8) * 8 {
                    arm64fp16_mmm_f16_32x4_gen.mmm()
                } else {
                    arm64fp16_mmm_f16_16x8_gen.mmm()
                }
            });
            ops.mmv_f16 = Box::new(|_, _| arm64fp16_mmm_f16_128x1_gen.mmm());
        }
    }
    ops.leaky_relu_f32 = Box::new(|| arm64simd_leaky_relu_f32_8n::ew());
    ops.sigmoid_f32 = Box::new(|| arm64simd_sigmoid_f32_4n::ew());
    ops.tanh_f32 = Box::new(|| arm64simd_tanh_f32_4n::ew());
    ops.max_f32 = Box::new(|| arm64simd_max_f32_16n::red());
    ops.sum_f32 = Box::new(|| arm64simd_sum_f32_16n::red());
    ops.mul_by_scalar_f32 = Box::new(|| arm64simd_mul_by_scalar_f32_16n::ew());
    ops.softmax2_fastcompact_f32 = Box::new(|| arm64simd_softmax2_fastcompact_f32_16n::red());
    #[cfg(not(feature = "no_fp16"))]
    if has_fp16() {
        log::info!("ARMv8.2 tanh_f16 and sigmoid_f16 activated");
        ops.leaky_relu_f16 = Box::new(|| arm64fp16_leaky_relu_f16_16n::ew());
        ops.tanh_f16 = Box::new(|| arm64fp16_tanh_f16_8n::ew());
        ops.sigmoid_f16 = Box::new(|| arm64fp16_sigmoid_f16_8n::ew());
        ops.max_f16 = Box::new(|| arm64fp16_max_f16_32n::red());
        ops.sum_f16 = Box::new(|| arm64fp16_sum_f16_32n::red());
        ops.mul_by_scalar_f16 = Box::new(|| arm64fp16_mul_by_scalar_f16_32n::ew());
    } else {
        log::info!("No native fp16 support");
    }
    #[cfg(any(target_os = "macos", all(target_os = "ios", feature = "apple-amx-ios")))]
    {
        apple_amx::plug(ops);
    }
}


================================================
FILE: linalg/src/frame/block_quant/helpers.rs
================================================
use byteorder::{LE, ReadBytesExt, WriteBytesExt};
use std::io::{Cursor, Read, Write};
use tract_data::internal::*;

pub struct NibbleReader<R> {
    second_half: Option<i8>,
    reader: R,
}

impl<'s> NibbleReader<Cursor<&'s [u8]>> {
    pub fn for_slice(slice: &'s [u8]) -> Self {
        NibbleReader::new(Cursor::new(slice))
    }
}

impl<R: Read> NibbleReader<R> {
    pub fn new(reader: R) -> NibbleReader<R> {
        NibbleReader { reader, second_half: None }
    }

    pub fn read_f16(&mut self) -> f16 {
        assert!(self.second_half.is_none());
        f16::from_bits(self.reader.read_u16::<LE>().unwrap())
    }

    pub fn read_i4(&mut self) -> i8 {
        if let Some(second) = self.second_half.take() {
            second
        } else {
            let byte = self.reader.read_u8().unwrap();
            self.second_half = Some((byte >> 4) as i8);
            (byte & 0x0F) as i8
        }
    }

    pub fn read_i8(&mut self) -> i8 {
        self.reader.read_i8().unwrap()
    }
}

pub struct NibbleWriter<W> {
    first_half: Option<i8>,
    writer: W,
}

impl<'s> NibbleWriter<Cursor<&'s mut [u8]>> {
    pub fn for_slice(slice: &'s mut [u8]) -> Self {
        NibbleWriter::new(Cursor::new(slice))
    }
}

impl<W: Write> NibbleWriter<W> {
    pub fn new(writer: W) -> NibbleWriter<W> {
        NibbleWriter { writer, first_half: None }
    }

    pub fn write_f16(&mut self, f: f16) {
        assert!(self.first_half.is_none());
        self.writer.write_u16::<LE>(f.to_bits()).unwrap()
    }

    pub fn write_i4(&mut self, q: i8) {
        if let Some(first) = self.first_half.take() {
            self.writer.write_u8(first as u8 | ((q as u8) << 4)).unwrap()
        } else {
            self.first_half = Some(q);
        }
    }

    pub fn write_i8(&mut self, q: i8) {
        self.writer.write_i8(q).unwrap()
    }
}


================================================
FILE: linalg/src/frame/block_quant/mod.rs
================================================
use downcast_rs::{Downcast, impl_downcast};
use dyn_clone::{DynClone, clone_box};
use dyn_eq::DynEq;
use dyn_hash::DynHash;
use num_traits::Zero;
use tract_data::internal::*;
use tract_data::itertools::Itertools;

use std::alloc::Layout;
use std::borrow::Cow;
use std::fmt::{Debug, Display};
use std::hash::Hash;
use std::sync::Arc;

mod helpers;
mod q4_0;
mod q8_1;
mod storage;
mod value;

pub use helpers::{NibbleReader, NibbleWriter};
pub use q4_0::Q4_0;
pub use q8_1::Q8_1;
pub use storage::{BlockQuantStorage, block_quant_slice};
pub use value::{BlockQuantFact, PackedBlockQuantFact};

use crate::mmm::{EagerPackedInput, MMMInputFormat};
use crate::pack::PackedFormat;

use crate::WeightType;

use super::mmm::MMMInputValue;

pub trait BlockQuant:
    Debug + Display + Send + Sync + DynClone + DynHash + dyn_eq::DynEq + Downcast
{
    fn block_len(&self) -> usize;

    fn block_bytes(&self) -> usize;

    fn dequant_block_f32(&self, quant: &[u8], block: &mut [f32]);
    fn dequant_block_f16(&self, quant: &[u8], block: &mut [f16]);
    fn quant_block_f16(&self, block: &[f16], quant: &mut [u8]);
    fn quant_block_f32(&self, block: &[f32], quant: &mut [u8]);

    fn quant_f16(&self, input: &[f16]) -> TractResult<Blob> {
        unsafe {
            let blocks = input.len() / self.block_len();
            let mut quant = Blob::for_layout(
                Layout::from_size_align(blocks * self.block_bytes(), 128).unwrap(),
            );
            for b in 0..blocks {
                let block = &input[b * self.block_len()..][..self.block_len()];
                let qblock = &mut quant[b * self.block_bytes()..][..self.block_bytes()];
                self.quant_block_f16(block, qblock);
            }
            Ok(quant)
        }
    }

    fn quant_f32(&self, input: &[f32]) -> TractResult<Blob> {
        unsafe {
            let blocks = input.len() / self.block_len();
            let mut quant = Blob::for_layout(
                Layout::from_size_align(blocks * self.block_bytes(), 128).unwrap(),
            );
            for b in 0..blocks {
                let block = &input[b * self.block_len()..][..self.block_len()];
                let qblock = &mut quant[b * self.block_bytes()..][..self.block_bytes()];
                self.quant_block_f32(block, qblock);
            }
            Ok(quant)
        }
    }

    fn dequant_f32(&self, input: &[u8]) -> TractResult<Tensor> {
        unsafe {
            let blocks = input.len() / self.block_bytes();
            let mut tensor = Tensor::uninitialized::<f32>(&[blocks * self.block_len()])?;
            let mut tensor_plain = tensor.try_as_plain_mut()?;
            let slice = tensor_plain.as_slice_mut::<f32>()?;
            for b in 0..blocks {
                let block = &mut slice[b * self.block_len()..][..self.block_len()];
                let qblock = &input[b * self.block_bytes()..][..self.block_bytes()];
                self.dequant_block_f32(qblock, block);
            }
            Ok(tensor)
        }
    }

    fn dequant_f16(&self, input: &[u8]) -> TractResult<Tensor> {
        unsafe {
            let blocks = input.len() / self.block_bytes();
            let mut tensor = Tensor::uninitialized::<f16>(&[blocks * self.block_len()])?;
            let mut tensor_plain = tensor.try_as_plain_mut()?;
            let slice = tensor_plain.as_slice_mut::<f16>()?;
            for b in 0..blocks {
                let block = &mut slice[b * self.block_len()..][..self.block_len()];
                let qblock = &input[b * self.block_bytes()..][..self.block_bytes()];
                self.dequant_block_f16(qblock, block);
            }
            Ok(tensor)
        }
    }

    fn extract_at_offset_f16(&self, input: &[u8], offset: usize) -> f16 {
        let len = self.block_len();
        let block_id = offset / len;
        let mut block = vec![f16::zero(); self.block_len()];
        self.dequant_block_f16(
            &input[block_id * self.block_bytes()..][..self.block_bytes()],
            &mut block,
        );
        block[offset % len]
    }

    fn extract_at_offset_f32(&self, input: &[u8], offset: usize) -> f32 {
        let len = self.block_len();
        let block_id = offset / len;
        let mut block = vec![f32::zero(); self.block_len()];
        self.dequant_block_f32(
            &input[block_id * self.block_bytes()..][..self.block_bytes()],
            &mut block,
        );
        block[offset % len]
    }

    fn simulate_precision_loss(
        &self,
        mut tensor: Tensor,
        block_axis: usize,
    ) -> TractResult<Tensor> {
        ensure!(block_axis == tensor.rank() - 1);
        ensure!(tensor.shape()[block_axis] % self.block_len() == 0);
        let mut scratch = vec![0u8; self.block_bytes()];
        if tensor.datum_type() == f32::datum_type() {
            let mut tensor_plain = tensor.try_as_plain_mut()?;
            for block in tensor_plain.as_slice_mut::<f32>()?.chunks_mut(self.block_len()) {
                self.quant_block_f32(block, &mut scratch);
                self.dequant_block_f32(&scratch, block);
            }
            drop(tensor_plain);
            Ok(tensor)
        } else if tensor.datum_type() == f16::datum_type() {
            let mut tensor_plain = tensor.try_as_plain_mut()?;
            for block in tensor_plain.as_slice_mut::<f16>()?.chunks_mut(self.block_len()) {
                self.quant_block_f16(block, &mut scratch);
                self.dequant_block_f16(&scratch, block);
            }
            drop(tensor_plain);
            Ok(tensor)
        } else {
            todo!()
        }
    }

    fn pack(
        &self,
        input: &[u8],
        k: usize,
        r: usize,
        zip: usize,
        scales_at_end: bool,
    ) -> TractResult<EagerPackedInput>;

    unsafe fn extract_packed_panel(
        &self,
        value: &EagerPackedInput,
        target: &PackedFormat,
        panel: usize,
        scratch: *mut u8,
    ) -> TractResult<()>;

    fn extract_at_mn_f16(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [f16],
    ) -> TractResult<()>;

    fn extract_at_mn_f32(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [f32],
    ) -> TractResult<()>;
}

dyn_clone::clone_trait_object!(BlockQuant);
dyn_hash::hash_trait_object!(BlockQuant);
dyn_eq::eq_trait_object!(BlockQuant);
impl_downcast!(BlockQuant);

#[allow(clippy::derived_hash_with_manual_eq)]
#[derive(Clone, Hash)]
pub struct PackedBlockQuantFormat {
    pub bq: Box<dyn BlockQuant>,
    pub r: usize,
    pub zip: usize,
    pub scales_at_end: bool,
}

impl PartialEq for PackedBlockQuantFormat {
    fn eq(&self, other: &Self) -> bool {
        *self.bq == *other.bq
            && self.r == other.r
            && self.zip == other.zip
            && self.scales_at_end == other.scales_at_end
    }
}

impl Eq for PackedBlockQuantFormat {}

impl Display for PackedBlockQuantFormat {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Packed{}[{}]", &*self.bq, self.r)?;
        if self.zip != 0 {
            write!(f, "Z{}", self.zip)?;
        }
        if self.scales_at_end {
            write!(f, "Se")?;
        }
        Ok(())
    }
}

impl Debug for PackedBlockQuantFormat {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        <Self as Display>::fmt(self, f)
    }
}

impl PackedBlockQuantFormat {
    pub fn new(bq: &dyn BlockQuant, r: usize, zip: usize, scales_at_end: bool) -> Self {
        PackedBlockQuantFormat { bq: clone_box(bq), r, zip, scales_at_end }
    }

    pub fn simulate_precision_loss(
        &self,
        tensor: Tensor,
        block_axis: usize,
    ) -> TractResult<Tensor> {
        self.bq.simulate_precision_loss(tensor, block_axis)
    }

    pub fn pack(&self, input: &[u8], k: usize) -> TractResult<EagerPackedInput> {
        self.bq.pack(input, k, self.r, self.zip, self.scales_at_end)
    }
}

impl MMMInputFormat for PackedBlockQuantFormat {
    fn prepare_tensor(&self, t: &Tensor, _k_axis: usize, _mn_axis: usize) -> TractResult<Tensor> {
        let bqs = t.try_storage_as::<BlockQuantStorage>()?;
        let num_groups: usize =
            if t.rank() > 2 { t.shape()[..t.rank() - 2].iter().product() } else { 1 };
        let m_per_group = t.shape()[t.rank().saturating_sub(2)];
        let k = *t.shape().last().unwrap();
        let values = (0..num_groups)
            .map(|g| {
                let slice = block_quant_slice(bqs.value(), &*self.bq, m_per_group, k, g);
                let packed = self.pack(slice, k)?;
                Ok(Box::new(packed) as Box<dyn MMMInputValue>)
            })
            .collect::<TractResult<Vec<_>>>()?;
        let leading_shape = &t.shape()[..t.rank().saturating_sub(2)];
        Ok(crate::mmm::PackedMatrixStorage::new_batched(leading_shape, values)
            .into_tensor(t.datum_type()))
    }

    fn prepare_one(
        &self,
        t: &Tensor,
        k_axis: usize,
        mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        // this code path is essentially there for test scenarios
        let t = if t.is_plain() && t.datum_type().is_number() {
            let k = t.shape()[k_axis];
            let m = t.shape()[mn_axis];
            assert!(k % self.bq.block_len() == 0);
            let t: Cow<Tensor> = if k_axis == 1 && mn_axis == 0 {
                Cow::Borrowed(t)
            } else {
                Cow::Owned(t.clone().move_axis(1, 0)?)
            };
            let quant = if t.datum_type() == f32::datum_type() {
                self.bq.quant_f32(t.try_as_plain()?.as_slice()?)?
            } else if t.datum_type() == f16::datum_type() {
                self.bq.quant_f16(t.try_as_plain()?.as_slice()?)?
            } else {
                todo!()
            };
            Cow::Owned(
                BlockQuantStorage::new(self.bq.clone(), m, k, Arc::new(quant))?
                    .into_tensor_with_shape(t.datum_type(), &[1, m, k]),
            )
        } else {
            Cow::Borrowed(t)
        };
        ensure!(mn_axis == 0);
        ensure!(k_axis == 1);
        let bqs = t.try_storage_as::<BlockQuantStorage>()?;
        let k = *t.shape().last().unwrap();
        let packed = self.pack(bqs.value(), k)?;
        Ok(Box::new(packed))
    }

    fn precursor(&self) -> WeightType {
        WeightType::BlockQuant(self.bq.clone())
    }

    fn k_alignment(&self) -> usize {
        self.bq.block_len()
    }

    fn r(&self) -> usize {
        self.r
    }

    fn mem_size(&self, k: TDim, mn: TDim) -> TDim {
        k * mn * self.bq.block_bytes() / self.bq.block_len()
    }

    fn extract_at_mn_f16(
        &self,
        data: &EagerPackedInput,
        mn: usize,
        slice: &mut [f16],
    ) -> TractResult<()> {
        self.bq.extract_at_mn_f16(data, mn, slice)
    }

    fn extract_at_mn_f32(
        &self,
        data: &EagerPackedInput,
        mn: usize,
        slice: &mut [f32],
    ) -> TractResult<()> {
        self.bq.extract_at_mn_f32(data, mn, slice)
    }
}


================================================
FILE: linalg/src/frame/block_quant/q4_0.rs
================================================
use crate::mmm::PackedExoticFact;

use super::*;
use num_traits::{AsPrimitive, Float, Zero};
use std::alloc::Layout;

#[derive(Copy, Clone, Hash, PartialEq, Eq)]
pub struct BaseQ4_0<const QK: usize = 32>;

pub const Q4_0: BaseQ4_0 = BaseQ4_0::<32>;

impl<const QK: usize> Debug for BaseQ4_0<QK> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if QK == 32 { write!(f, "Q4_0") } else { write!(f, "BaseQ4_0<{QK}>") }
    }
}

impl<const QK: usize> BaseQ4_0<QK> {
    fn quant_block<T>(&self, block: &[T], quant: &mut [u8])
    where
        f32: AsPrimitive<i8> + From<T>,
        T: Debug + Float,
    {
        assert!(quant.len() == self.block_bytes());
        assert!(block.len() == self.block_len());
        let mut writer = NibbleWriter::for_slice(quant);
        let mut amax = T::zero();
        let mut max = T::zero();
        for v in block {
            if amax < v.abs() {
                amax = v.abs();
                max = *v;
            }
        }
        let scale = f32::from(max) / -8f32;
        let r_scale = if scale.is_zero() { 0f32 } else { scale.recip() };
        writer.write_f16(f16::from_f32(scale));

        for idx in 0..block.len() {
            // Quant block in GGML nibble order
            let ggml_idx = (block.len() / 2) * (idx % 2) + (idx / 2);
            let i: i8 = (f32::from(block[ggml_idx]) * r_scale + 8.5f32).as_();
            writer.write_i4(i.min(15));
        }
    }

    fn dequant_block<T: Float + 'static>(&self, quant: &[u8], block: &mut [T])
    where
        f16: AsPrimitive<T>,
        i8: AsPrimitive<T>,
    {
        assert!(quant.len() == self.block_bytes());
        assert!(block.len() == self.block_len());
        let mut nibbles = NibbleReader::for_slice(quant);
        let d: T = nibbles.read_f16().as_();
        for idx in 0..block.len() {
            let ggml_idx = (block.len() / 2) * (idx % 2) + (idx / 2);
            block[ggml_idx] = (nibbles.read_i4() - 8).as_() * d;
        }
    }

    unsafe fn extract_panel_t<T: Float + Debug + 'static>(
        &self,
        value: &EagerPackedInput,
        target: &PackedFormat,
        panel: usize,
        scratch: *mut u8,
    ) -> TractResult<()>
    where
        f16: AsPrimitive<T>,
        i8: AsPrimitive<T>,
    {
        let pbqf: &PackedBlockQuantFormat =
            value.fact.format.downcast_ref().with_context(|| {
                format!("Expecing PackedBlockQuantFormat, found {:?}", value.fact.format)
            })?;
        ensure!(pbqf.r == target.r);
        ensure!(value.fact.k % self.block_len() == 0);
        ensure!(*pbqf.bq == *(self as &dyn BlockQuant));
        let scratch =
            unsafe { std::slice::from_raw_parts_mut(scratch as *mut T, value.fact.k * target.r) };
        let blocks_for_k = value.fact.k / self.block_len();
        let row_bytes = blocks_for_k * self.block_bytes();
        let input = &value.packed[panel * target.r * row_bytes..];
        let mut scales = vec![T::zero(); target.r];
        let mut scratch = scratch.iter_mut();
        let zipped_order = zipped_order(pbqf.r, pbqf.zip);
        let mut weights = vec![0i8; pbqf.r];
        let panel_block_bytes = target.r * self.block_bytes();
        let (scale_offset, weights_offset) = if pbqf.scales_at_end {
            (panel_block_bytes - target.r * f16::datum_type().size_of(), 0)
        } else {
            (0, target.r * f16::datum_type().size_of())
        };
        for block in 0..blocks_for_k {
            let block = &input[block * panel_block_bytes..][..panel_block_bytes];
            let mut s_reader = NibbleReader::for_slice(&block[scale_offset..]);
            let mut w_reader = NibbleReader::for_slice(&block[weights_offset..]);
            for s in &mut scales {
                *s = s_reader.read_f16().as_();
            }
            for _ in 0..self.block_len() {
                for &o in &zipped_order {
                    weights[o] = w_reader.read_i4();
                }
                for (w, s) in weights.iter().zip(scales.iter()) {
                    *scratch.next().unwrap() = *s * (*w - 8).as_();
                }
            }
        }
        Ok(())
    }

    fn extract_at_mn_t<T: Float + Debug + 'static>(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [T],
    ) -> TractResult<()>
    where
        f16: AsPrimitive<T>,
        i8: AsPrimitive<T>,
    {
        let pbqf: &PackedBlockQuantFormat =
            value.fact.format.downcast_ref().with_context(|| {
                format!("Expecing PackedBlockQuantFormat, found {:?}", value.fact.format)
            })?;
        ensure!(value.fact.k % self.block_len() == 0);
        ensure!(*pbqf.bq == *(self as &dyn BlockQuant));
        ensure!(value.fact.mn.to_usize().ok().map(|it| mn < it).unwrap_or(true));
        ensure!(value.fact.k == target.len());
        let blocks_for_k = value.fact.k / self.block_len();
        let row_bytes = blocks_for_k * self.block_bytes();
        let panel = mn / pbqf.r;
        let value = &value.packed[panel * pbqf.r * row_bytes..];
        let mut target = target.iter_mut();
        let zipped_order =
            zipped_order(pbqf.r, pbqf.zip).iter().position(|x| *x == mn % pbqf.r).unwrap();

        let panel_block_bytes = pbqf.r * self.block_bytes();
        let (scale_offset, weights_offset) = if pbqf.scales_at_end {
            (panel_block_bytes - pbqf.r * f16::datum_type().size_of(), 0)
        } else {
            (0, pbqf.r * f16::datum_type().size_of())
        };
        unsafe {
            for block in 0..blocks_for_k {
                let block = value.as_ptr().add(block * panel_block_bytes);
                let scale = *((block.add(scale_offset) as *const f16).add(mn % pbqf.r));
                let scale: T = scale.as_();
                for i in 0..self.block_len() {
                    let byte = *block.add(weights_offset + i * pbqf.r / 2 + zipped_order / 2);
                    let nib = if zipped_order % 2 == 0 { byte & 0x0F } else { byte >> 4 };
                    *target.next().unwrap() = scale * ((nib as i8) - 8).as_();
                }
            }
        }
        Ok(())
    }
}

fn zipped_order(r: usize, zip: usize) -> Vec<usize> {
    if zip == 0 {
        (0..r).collect_vec()
    } else {
        (0..r)
            .map(|i| {
                let vec_pair_ix = i / (2 * zip);
                let lane = (i % (2 * zip)) / 2;
                let side = i % 2;
                vec_pair_ix * 2 * zip + side * zip + lane
            })
            .collect_vec()
    }
}

impl<const QK: usize> BlockQuant for BaseQ4_0<QK> {
    fn block_len(&self) -> usize {
        QK
    }

    fn block_bytes(&self) -> usize {
        2 + self.block_len() / 2
    }

    fn quant_block_f32(&self, block: &[f32], quant: &mut [u8]) {
        self.quant_block(block, quant)
    }

    fn quant_block_f16(&self, block: &[f16], quant: &mut [u8]) {
        self.quant_block(block, quant)
    }

    fn dequant_block_f32(&self, quant: &[u8], block: &mut [f32]) {
        self.dequant_block(quant, block)
    }

    fn dequant_block_f16(&self, quant: &[u8], block: &mut [f16]) {
        self.dequant_block(quant, block)
    }

    // s0_0 n0_0 n0_1 n0_2 n0_3 ... n0_30n0_31 s0_32 n0_32n0_33 ...
    // s1_0 n1_0 n1_1 n1_2 n1_3 ... n1_30n1_31 s1_32 n1_32n1_33 ...
    //
    //  becomes (with r=4)
    //
    //  s0_0  s1_0  s2_0  s3_0  n0_0 n1_0 n2_0 n3_0  n0_1 n1_1 n2_1 n3_1 ... n0_33 n1_33 n2_33 n3_33
    //  s0_32 s1_32 s2_32 s3_32 n0_0 n1_0 n2_0 n3_0  n0_1 n1_1 n2_1 n3_1 ... n0_33 n1_33 n2_33 n3_33
    //  ...
    fn pack(
        &self,
        input: &[u8],
        k: usize,
        r: usize,
        zip: usize,
        scales_at_end: bool,
    ) -> TractResult<EagerPackedInput> {
        ensure!(input.len() % self.block_bytes() == 0);
        ensure!(k % self.block_len() == 0);
        // ensure!(input.len() == k * r / self.block_len() * self.block_bytes());
        ensure!(zip < r);
        let m = if input.len() == 0 {
            0
        } else {
            input.len() / self.block_bytes() * self.block_len() / k
        };
        let panels = m.divceil(r);
        let blocks_for_k = k / self.block_len();
        let row_bytes = blocks_for_k * self.block_bytes();
        let panel_bytes = row_bytes * r;
        let mut blob =
            unsafe { Blob::for_layout(Layout::from_size_align(panel_bytes * panels, 128)?) };
        let mut writer = NibbleWriter::for_slice(&mut blob);
        let order = zipped_order(r, zip);
        let mut scales = vec![f16::zero(); r];
        for p in 0..panels {
            let input = &input[(r * p) * row_bytes..];
            let mut readers = (0..r)
                .map(|r| {
                    // manage partial panel
                    let offset = if r * row_bytes < input.len() { r * row_bytes } else { 0 };
                    NibbleReader::for_slice(&input[offset..])
                })
                .collect_vec();
            let mut temp_nibbles = vec![vec![0i8; self.block_len()]; r];
            for _ in 0..blocks_for_k {
                for (row, reader) in readers.iter_mut().enumerate() {
                    scales[row] = reader.read_f16();
                    temp_nibbles[row] =
                        (0..self.block_len()).map(|_| reader.read_i4()).collect_vec();
                }
                if !scales_at_end {
                    scales.iter().for_each(|s| writer.write_f16(*s))
                }
                for pos in 0..self.block_len() {
                    for &row in &order {
                        let ggml_idx = pos / (self.block_len() / 2) + (2 * pos) % self.block_len();
                        let nib = temp_nibbles[row][ggml_idx];
                        writer.write_i4(nib);
                    }
                }
                if scales_at_end {
                    scales.iter().for_each(|s| writer.write_f16(*s))
                }
            }
        }
        Ok(EagerPackedInput {
            fact: PackedExoticFact {
                format: Box::new(PackedBlockQuantFormat {
                    bq: Box::new(*self),
                    r,
                    zip,
                    scales_at_end,
                }),
                mn: m.to_dim(),
                k,
            },
            packed: blob.into(),
            panel_bytes,
            mn: m,
        })
    }

    unsafe fn extract_packed_panel(
        &self,
        value: &EagerPackedInput,
        target: &PackedFormat,
        panel: usize,
        scratch: *mut u8,
    ) -> TractResult<()> {
        unsafe {
            dispatch_floatlike!(Self::extract_panel_t(target.dt)(
                self, value, target, panel, scratch
            ))
        }
    }

    fn extract_at_mn_f16(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [f16],
    ) -> TractResult<()> {
        self.extract_at_mn_t(value, mn, target)
    }

    fn extract_at_mn_f32(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [f32],
    ) -> TractResult<()> {
        self.extract_at_mn_t(value, mn, target)
    }
}

impl<const QK: usize> Display for BaseQ4_0<QK> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Q4_0")
    }
}

#[cfg(test)]
mod tests {
    use num_traits::Zero;
    use tract_data::internal::tract_ndarray::Array2;

    use crate::pack::PackedFormat;

    use super::*;

    fn test_loop_f32(b: impl BlockQuant, data: &[f32]) {
        let mut input = data.to_vec();
        while input.len() % b.block_len() != 0 {
            input.push(0f32);
        }
        let quant = b.quant_f32(&input).unwrap();
        let result = b.dequant_f32(&quant).unwrap();
        let view = &result.try_as_plain().unwrap().as_slice::<f32>().unwrap()[..data.len()];
        assert_eq!(data, view);
    }

    fn test_loop_f16(b: impl BlockQuant, data: &[f32]) {
        let mut input = data.iter().map(|f| f16::from_f32(*f)).collect_vec();
        while input.len() % b.block_len() != 0 {
            input.push(f16::zero());
        }
        let quant = b.quant_f16(&input).unwrap();
        let result = b.dequant_f16(&quant).unwrap();
        let view = &result.try_as_plain().unwrap().as_slice::<f16>().unwrap();
        assert_eq!(&input, view);
    }

    #[test]
    fn loop_q4f32_pos() {
        test_loop_f32(Q4_0, &[1.0, 2.0, 3.0, 4.0]);
    }

    #[test]
    fn loop_q4f16_pos() {
        test_loop_f16(Q4_0, &[1.0, 2.0, 3.0, 4.0]);
    }

    #[test]
    fn loop_q4f32_neg() {
        test_loop_f32(Q4_0, &[-1.0, -2.0, -3.0, -4.0]);
    }

    #[test]
    fn loop_q4f16_neg() {
        test_loop_f16(Q4_0, &[-1.0, -2.0, -3.0, -4.0]);
    }

    #[test]
    fn loop_q4_big_pos() {
        test_loop_f32(Q4_0, &[1234.0]);
        test_loop_f16(Q4_0, &[1234.0]);
    }

    #[test]
    fn loop_q4_big_neg() {
        test_loop_f32(Q4_0, &[-1234.0]);
        test_loop_f16(Q4_0, &[-1234.0]);
    }

    fn test_extract_f32(b: impl BlockQuant, data: &[f32]) {
        let mut input = data.to_vec();
        while input.len() % b.block_len() != 0 {
            input.push(0f32);
        }
        let quant = b.quant_f32(&input).unwrap();
        for (ix, v) in data.iter().enumerate() {
            assert_eq!(b.extract_at_offset_f32(&quant, ix).round(), *v);
        }
    }

    #[test]
    fn extract_q40f32_pos() {
        let data = (1..).map(|i| ((i % 14) - 6) as f32).take(5 * Q4_0.block_len()).collect_vec();
        test_extract_f32(Q4_0, &data);
    }

    fn test_pack_then_extract_panel(
        q: impl BlockQuant,
        k: usize,
        m: usize,
        r: usize,
        zip: usize,
        scales_at_end: bool,
    ) -> TractResult<()> {
        let weights_orig =
            Array2::from_shape_fn((m, k), |(m, k)| ((m * 31 + k * 17) % 20) as f32 - 10.)
                .into_tensor();
        let weights_f32 = q
            .dequant_f32(&q.quant_f32(weights_orig.try_as_plain()?.as_slice::<f32>()?)?)?
            .into_shape(&[m, k])?;
        let packer = PackedFormat::new(f32::datum_type(), r, 128);
        let packed_f32 = packer.pack_tensor(&weights_f32, 1, 0)?;

        let q4 = q.quant_f32(weights_f32.try_as_plain()?.as_slice::<f32>()?)?;
        let packed_q4 = q.pack(&q4, k, r, zip, scales_at_end)?;

        for panel in 0..packed_f32.panels_count() {
            unsafe {
                let panel_f32 = packed_f32.panel_bytes(panel, None)?;
                let panel_f32 = std::slice::from_raw_parts(panel_f32 as *const f32, k * r);
                let mut panel_q4 = Tensor::zero::<f32>(&[k * r])?;
                q.extract_packed_panel(
                    &packed_q4,
                    &packer,
                    panel,
                    panel_q4.as_bytes_mut().as_mut_ptr(),
                )?;
                assert_eq!(panel_q4.try_as_plain()?.as_slice::<f32>()?, panel_f32);
            }
        }
        Ok(())
    }

    #[test]
    fn pack_then_extract_panel() -> TractResult<()> {
        test_pack_then_extract_panel(BaseQ4_0::<2>, 4, 4, 2, 0, false)
    }

    #[test]
    fn pack_then_extract_panel_with_zip() -> TractResult<()> {
        test_pack_then_extract_panel(BaseQ4_0::<2>, 2, 8, 8, 4, false)
    }

    #[test]
    fn pack_then_extract_panel_with_scales_at_end() -> TractResult<()> {
        test_pack_then_extract_panel(BaseQ4_0::<2>, 2, 4, 4, 0, true)
    }

    fn test_pack_then_extract_row(
        q: impl BlockQuant,
        k: usize,
        m: usize,
        r: usize,
        zip: usize,
        scales_at_end: bool,
    ) -> TractResult<()> {
        let weights_orig =
            Array2::from_shape_fn((m, k), |(m, k)| ((m * 31 + k * 17) % 20) as f32 - 10.)
                .into_tensor();
        let weights_f32 = q
            .dequant_f32(&q.quant_f32(weights_orig.try_as_plain()?.as_slice::<f32>()?)?)?
            .into_shape(&[m, k])?;
        let packer = PackedFormat::new(f32::datum_type(), r, 128);
        let packed_f32 = packer.pack_tensor(&weights_f32, 1, 0)?;

        let q4 = q.quant_f32(weights_f32.try_as_plain()?.as_slice::<f32>()?)?;
        let packed_q4 = q.pack(&q4, k, r, zip, scales_at_end)?;

        for row in 0..packed_f32.mn() {
            unsafe {
                let panel_f32 = packed_f32.panel_bytes(row / r, None)?;
                let panel_f32 = std::slice::from_raw_parts(panel_f32 as *const f32, k * r);
                let row_f32 = (0..k).map(|ix| panel_f32[row % r + r * ix]).collect_vec();

                let mut q4 = vec![0f32; k];
                q.extract_at_mn_f32(&packed_q4, row, &mut q4)?;
                assert_eq!(q4, row_f32);
            }
        }
        Ok(())
    }

    #[test]
    fn pack_then_extract_row() -> TractResult<()> {
        test_pack_then_extract_row(BaseQ4_0::<2>, 4, 4, 2, 0, false)
    }

    #[test]
    fn pack_then_extract_row_with_zip() -> TractResult<()> {
        test_pack_then_extract_row(BaseQ4_0::<2>, 2, 8, 8, 4, false)
    }

    #[test]
    fn pack_then_extract_row_with_scales_at_end() -> TractResult<()> {
        test_pack_then_extract_row(BaseQ4_0::<2>, 2, 4, 4, 0, true)
    }
}


================================================
FILE: linalg/src/frame/block_quant/q8_1.rs
================================================
use crate::mmm::PackedExoticFact;

use super::*;
use num_traits::{AsPrimitive, Float, Zero};
use std::alloc::Layout;
use std::ops::AddAssign;

#[derive(Copy, Clone, Hash, PartialEq, Eq)]
pub struct BaseQ8_1<const QK: usize = 32>;

pub const Q8_1: BaseQ8_1 = BaseQ8_1::<32>;

impl<const QK: usize> Debug for BaseQ8_1<QK> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if QK == 32 { write!(f, "Q8_1") } else { write!(f, "BaseQ8_1<{QK}>") }
    }
}

impl<const QK: usize> BaseQ8_1<QK> {
    fn quant_block<T>(&self, block: &[T], quant: &mut [u8])
    where
        f32: AsPrimitive<i8> + From<T>,
        T: Debug + Float + AsPrimitive<f16> + AddAssign + 'static,
    {
        assert!(quant.len() == self.block_bytes());
        assert!(block.len() == self.block_len());
        let mut writer = NibbleWriter::for_slice(quant);
        let mut amax = T::zero();
        let mut max = T::zero();
        let mut sum = T::zero();
        for v in block {
            if amax < v.abs() {
                amax = v.abs();
                max = *v;
            }
            sum += *v;
        }

        let scale = f32::from(max) / 127f32;
        let r_scale = if scale.is_zero() { 0f32 } else { scale.recip() };
        writer.write_f16(f16::from_f32(scale));
        writer.write_f16(sum.as_());

        for val_f in block {
            let i: i8 = (f32::from(*val_f) * r_scale).round().as_();
            writer.write_i8(i);
        }
    }

    fn dequant_block<T: Float + 'static>(&self, quant: &[u8], block: &mut [T])
    where
        f16: AsPrimitive<T>,
        i8: AsPrimitive<T>,
    {
        assert!(quant.len() == self.block_bytes());
        assert!(block.len() == self.block_len());
        let mut quants = NibbleReader::for_slice(quant);
        let d: T = quants.read_f16().as_();
        let _sum: T = quants.read_f16().as_();
        for val_f in block {
            *val_f = (quants.read_i8()).as_() * d;
        }
    }

    unsafe fn extract_panel_t<T: Float + Debug + 'static>(
        &self,
        value: &EagerPackedInput,
        target: &PackedFormat,
        panel: usize,
        scratch: *mut u8,
    ) -> TractResult<()>
    where
        f16: AsPrimitive<T>,
        i8: AsPrimitive<T>,
    {
        let pbqf: &PackedBlockQuantFormat =
            value.fact.format.downcast_ref().with_context(|| {
                format!("Expecing PackedBlockQuantFormat, found {:?}", value.fact.format)
            })?;
        ensure!(pbqf.r == target.r);
        ensure!(value.fact.k % self.block_len() == 0);
        ensure!(*pbqf.bq == *(self as &dyn BlockQuant));
        let scratch =
            unsafe { std::slice::from_raw_parts_mut(scratch as *mut T, value.fact.k * target.r) };
        let blocks_for_k = value.fact.k / self.block_len();
        let row_bytes = blocks_for_k * self.block_bytes();
        let input = &value.packed[panel * target.r * row_bytes..];
        let mut scales = vec![T::zero(); target.r];
        let mut scratch = scratch.iter_mut();
        let mut weights = vec![0i8; pbqf.r];
        let panel_block_bytes = target.r * self.block_bytes();
        let (params_offset, weights_offset) = if pbqf.scales_at_end {
            (panel_block_bytes - target.r * 2 * f16::datum_type().size_of(), 0)
        } else {
            (0, target.r * 2 * f16::datum_type().size_of())
        };
        for block in 0..blocks_for_k {
            let block = &input[block * panel_block_bytes..][..panel_block_bytes];
            let mut s_reader = NibbleReader::for_slice(&block[params_offset..]);
            let mut w_reader = NibbleReader::for_slice(&block[weights_offset..]);
            // Layout: [scale_0, sum_0, scale_1, sum_1, .., weights]
            for s in &mut scales {
                *s = s_reader.read_f16().as_();
                // Unused sums
                s_reader.read_f16();
            }

            for _ in 0..self.block_len() {
                for w in &mut weights {
                    *w = w_reader.read_i8();
                }
                for (w, s) in weights.iter().zip(scales.iter()) {
                    *scratch.next().unwrap() = *s * (*w).as_();
                }
            }
        }
        Ok(())
    }

    fn extract_at_mn_t<T: Float + Debug + 'static>(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [T],
    ) -> TractResult<()>
    where
        f16: AsPrimitive<T>,
        i8: AsPrimitive<T>,
    {
        let pbqf: &PackedBlockQuantFormat =
            value.fact.format.downcast_ref().with_context(|| {
                format!("Expecing PackedBlockQuantFormat, found {:?}", value.fact.format)
            })?;
        ensure!(value.fact.k % self.block_len() == 0);
        ensure!(*pbqf.bq == *(self as &dyn BlockQuant));
        ensure!(value.fact.mn.to_usize().ok().map(|it| mn < it).unwrap_or(true));
        ensure!(value.fact.k == target.len());
        let blocks_for_k = value.fact.k / self.block_len();
        let row_bytes = blocks_for_k * self.block_bytes();
        let panel = mn / pbqf.r;
        let value = &value.packed[panel * pbqf.r * row_bytes..];
        let mut target = target.iter_mut();
        let panel_block_bytes = pbqf.r * self.block_bytes();
        let (scale_offset, weights_offset) = if pbqf.scales_at_end {
            (panel_block_bytes - pbqf.r * 2 * f16::datum_type().size_of(), 0)
        } else {
            (0, pbqf.r * 2 * f16::datum_type().size_of())
        };
        unsafe {
            for block in 0..blocks_for_k {
                let block = value.as_ptr().add(block * panel_block_bytes);
                let scale = *((block.add(scale_offset) as *const f16).add(2 * (mn % pbqf.r)));
                let scale: T = scale.as_();
                for i in 0..self.block_len() {
                    let byte = *block.add(weights_offset + i * pbqf.r + mn % pbqf.r);
                    *target.next().unwrap() = scale * (byte as i8).as_();
                }
            }
        }
        Ok(())
    }
}

impl<const QK: usize> BlockQuant for BaseQ8_1<QK> {
    fn block_len(&self) -> usize {
        QK
    }

    fn block_bytes(&self) -> usize {
        4 + self.block_len()
    }

    fn quant_block_f32(&self, block: &[f32], quant: &mut [u8]) {
        self.quant_block(block, quant)
    }

    fn quant_block_f16(&self, block: &[f16], quant: &mut [u8]) {
        self.quant_block(block, quant)
    }

    fn dequant_block_f32(&self, quant: &[u8], block: &mut [f32]) {
        self.dequant_block(quant, block)
    }

    fn dequant_block_f16(&self, quant: &[u8], block: &mut [f16]) {
        self.dequant_block(quant, block)
    }

    // s0_0 sum_0_0 n0_0 n0_1 n0_2 n0_3 ... n0_30n0_31 s0_32 sum0_32 n0_32n0_33 ...
    // s1_0 sum_1_0 n1_0 n1_1 n1_2 n1_3 ... n1_30n1_31 s1_32 sum_1_32 n1_32n1_33 ...
    //
    //  becomes (with r=4)
    //
    //  s0_0   sum0_0  s1_0  sum 1_0  s2_0  sum2_0  s3_0  sum3_0   n0_0 n1_0 n2_0 n3_0  n0_1 n1_1 n2_1 n3_1 ... n0_33 n1_33 n2_33 n3_33
    //  s0_32  sum0_32 s1_32 sum 1_32 s2_32 sum2_32 s3_32 sum3_32  n0_0 n1_0 n2_0 n3_0  n0_1 n1_1 n2_1 n3_1 ... n0_33 n1_33 n2_33 n3_33
    //  ...
    fn pack(
        &self,
        input: &[u8],
        k: usize,
        r: usize,
        zip: usize,
        scales_at_end: bool,
    ) -> TractResult<EagerPackedInput> {
        ensure!(input.len() % self.block_bytes() == 0);
        ensure!(k % self.block_len() == 0);
        ensure!(zip == 0, "No zipping required for Q8_1");
        let m = if input.len() == 0 {
            0
        } else {
            input.len() / self.block_bytes() * self.block_len() / k
        };
        let panels = m.divceil(r);
        let blocks_for_k = k / self.block_len();
        let row_bytes = blocks_for_k * self.block_bytes();
        let panel_bytes = row_bytes * r;
        let mut blob =
            unsafe { Blob::for_layout(Layout::from_size_align(panel_bytes * panels, 128)?) };
        let mut writer = NibbleWriter::for_slice(&mut blob);
        let mut scales = vec![f16::zero(); r];
        let mut sums = vec![f16::zero(); r];
        for p in 0..panels {
            let input = &input[(r * p) * row_bytes..];
            let mut readers = (0..r)
                .map(|r| {
                    // manage partial panel
                    let offset = if r * row_bytes < input.len() { r * row_bytes } else { 0 };
                    NibbleReader::for_slice(&input[offset..])
                })
                .collect_vec();
            let mut temp_quants = vec![vec![0i8; self.block_len()]; r];
            for _ in 0..blocks_for_k {
                for (row, reader) in readers.iter_mut().enumerate() {
                    scales[row] = reader.read_f16();
                    sums[row] = reader.read_f16();
                    temp_quants[row] =
                        (0..self.block_len()).map(|_| reader.read_i8()).collect_vec();
                }
                if !scales_at_end {
                    scales.iter().zip(&sums).for_each(|(scale, sum)| {
                        writer.write_f16(*scale);
                        writer.write_f16(*sum);
                    });
                }
                for pos in 0..self.block_len() {
                    for row in &temp_quants {
                        let q = row[pos];
                        writer.write_i8(q);
                    }
                }
                if scales_at_end {
                    scales.iter().zip(&sums).for_each(|(scale, sum)| {
                        writer.write_f16(*scale);
                        writer.write_f16(*sum);
                    });
                }
            }
        }
        Ok(EagerPackedInput {
            fact: PackedExoticFact {
                format: Box::new(PackedBlockQuantFormat {
                    bq: Box::new(*self),
                    r,
                    zip,
                    scales_at_end,
                }),
                mn: m.to_dim(),
                k,
            },
            packed: blob.into(),
            panel_bytes,
            mn: m,
        })
    }

    unsafe fn extract_packed_panel(
        &self,
        value: &EagerPackedInput,
        target: &PackedFormat,
        panel: usize,
        scratch: *mut u8,
    ) -> TractResult<()> {
        unsafe {
            dispatch_floatlike!(Self::extract_panel_t(target.dt)(
                self, value, target, panel, scratch
            ))
        }
    }

    fn extract_at_mn_f16(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [f16],
    ) -> TractResult<()> {
        self.extract_at_mn_t(value, mn, target)
    }

    fn extract_at_mn_f32(
        &self,
        value: &EagerPackedInput,
        mn: usize,
        target: &mut [f32],
    ) -> TractResult<()> {
        self.extract_at_mn_t(value, mn, target)
    }
}

impl<const QK: usize> Display for BaseQ8_1<QK> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Q8_1")
    }
}

#[cfg(test)]
mod tests {
    use num_traits::Zero;
    use tract_data::internal::tract_ndarray::Array2;

    use crate::pack::PackedFormat;

    use super::*;

    fn test_loop_f32(b: impl BlockQuant, data: &[f32]) -> TractResult<()> {
        let mut input = data.to_vec();
        while input.len() % b.block_len() != 0 {
            input.push(0f32);
        }
        let ref_tensor = unsafe { Tensor::from_slice_align(&input, vector_size())? };

        let quant = b.quant_f32(&input).unwrap();
        let result = b.dequant_f32(&quant).unwrap();
        result.close_enough(&ref_tensor, Approximation::VeryApproximate)
    }

    fn test_loop_f16(b: impl BlockQuant, data: &[f32]) -> TractResult<()> {
        let mut input = data.iter().map(|f| f16::from_f32(*f)).collect_vec();
        while input.len() % b.block_len() != 0 {
            input.push(f16::zero());
        }
        let ref_tensor = unsafe { Tensor::from_slice_align(&input, vector_size())? };

        let quant = b.quant_f16(&input).unwrap();
        let result = b.dequant_f16(&quant).unwrap();
        result.close_enough(&ref_tensor, Approximation::VeryApproximate)
    }

    #[test]
    fn loop_q81f32_pos() -> TractResult<()> {
        test_loop_f32(Q8_1, &[1.0, 2.0, 3.0, 4.0])?;
        Ok(())
    }

    #[test]
    fn loop_q81f16_pos() -> TractResult<()> {
        test_loop_f16(Q8_1, &[1.0, 2.0, 3.0, 4.0])?;
        Ok(())
    }

    #[test]
    fn loop_q81f32_neg() -> TractResult<()> {
        test_loop_f32(Q8_1, &[-1.0, -2.0, -3.0, -4.0])?;
        Ok(())
    }

    #[test]
    fn loop_q81f16_neg() -> TractResult<()> {
        test_loop_f16(Q8_1, &[-1.0, -2.0, -3.0, -4.0])?;
        Ok(())
    }

    #[test]
    fn loop_q81_big_pos() -> TractResult<()> {
        test_loop_f32(Q8_1, &[1234.0])?;
        test_loop_f16(Q8_1, &[1234.0])?;
        Ok(())
    }

    #[test]
    fn loop_q81_big_neg() -> TractResult<()> {
        test_loop_f32(Q8_1, &[-1234.0])?;
        test_loop_f16(Q8_1, &[-1234.0])?;
        Ok(())
    }

    fn test_extract_f32(b: impl BlockQuant, data: &[f32]) {
        let mut input = data.to_vec();
        while input.len() % b.block_len() != 0 {
            input.push(0f32);
        }
        let quant = b.quant_f32(&input).unwrap();
        for (ix, v) in data.iter().enumerate() {
            assert_eq!(b.extract_at_offset_f32(&quant, ix).round(), *v);
        }
    }

    #[test]
    fn extract_q81f32_pos() {
        let data = (1..).map(|i| ((i % 14) - 6) as f32).take(5 * Q8_1.block_len()).collect_vec();
        test_extract_f32(Q8_1, &data);
    }

    fn test_pack_then_extract_panel(
        q: impl BlockQuant,
        k: usize,
        m: usize,
        r: usize,
        scales_at_end: bool,
    ) -> TractResult<()> {
        let weights_orig =
            Array2::from_shape_fn((m, k), |(m, k)| ((m * 31 + k * 17) % 20) as f32 - 10.)
                .into_tensor();
        let weights_f32 = q
            .dequant_f32(&q.quant_f32(weights_orig.try_as_plain()?.as_slice::<f32>()?)?)?
            .into_shape(&[m, k])?;
        let packer = PackedFormat::new(f32::datum_type(), r, 128);
        let packed_f32 = packer.pack_tensor(&weights_f32, 1, 0)?;

        let q81 = q.quant_f32(weights_f32.try_as_plain()?.as_slice::<f32>()?)?;
        let packed_q81 = q.pack(&q81, k, r, 0, scales_at_end)?;

        for panel in 0..packed_f32.panels_count() {
            unsafe {
                let panel_f32 = packed_f32.panel_bytes(panel, None)?;
                let panel_f32 = std::slice::from_raw_parts(panel_f32 as *const f32, k * r);
                let mut panel_q81 = Tensor::zero::<f32>(&[k * r])?;
                q.extract_packed_panel(
                    &packed_q81,
                    &packer,
                    panel,
                    panel_q81.as_bytes_mut().as_mut_ptr(),
                )?;
                assert_eq!(panel_q81.try_as_plain()?.as_slice::<f32>()?, panel_f32);
            }
        }
        Ok(())
    }

    #[test]
    fn pack_then_extract_panel() -> TractResult<()> {
        test_pack_then_extract_panel(BaseQ8_1::<2>, 4, 4, 2, false)
    }

    #[test]
    fn pack_then_extract_panel_with_scales_at_end() -> TractResult<()> {
        test_pack_then_extract_panel(BaseQ8_1::<2>, 2, 4, 4, true)
    }

    fn test_pack_then_extract_row(
        q: impl BlockQuant,
        k: usize,
        m: usize,
        r: usize,
        scales_at_end: bool,
    ) -> TractResult<()> {
        let weights_orig =
            Array2::from_shape_fn((m, k), |(m, k)| ((m * 31 + k * 17) % 20) as f32 - 10.)
                .into_tensor();
        let weights_f32 = q
            .dequant_f32(&q.quant_f32(weights_orig.try_as_plain()?.as_slice::<f32>()?)?)?
            .into_shape(&[m, k])?;
        let packer = PackedFormat::new(f32::datum_type(), r, 128);
        let packed_f32 = packer.pack_tensor(&weights_f32, 1, 0)?;

        let q81 = q.quant_f32(weights_f32.try_as_plain()?.as_slice::<f32>()?)?;
        let packed_q81 = q.pack(&q81, k, r, 0, scales_at_end)?;

        for row in 0..packed_f32.mn() {
            unsafe {
                let panel_f32 = packed_f32.panel_bytes(row / r, None)?;
                let panel_f32 = std::slice::from_raw_parts(panel_f32 as *const f32, k * r);
                let row_f32 = (0..k).map(|ix| panel_f32[row % r + r * ix]).collect_vec();

                let mut q81 = vec![0f32; k];
                q.extract_at_mn_f32(&packed_q81, row, &mut q81)?;
                assert_eq!(q81, row_f32);
            }
        }
        Ok(())
    }

    #[test]
    fn pack_then_extract_row() -> TractResult<()> {
        test_pack_then_extract_row(BaseQ8_1::<2>, 4, 4, 2, false)
    }

    #[test]
    fn pack_then_extract_row_with_scales_at_end() -> TractResult<()> {
        test_pack_then_extract_row(BaseQ8_1::<2>, 2, 4, 4, true)
    }
}


================================================
FILE: linalg/src/frame/block_quant/storage.rs
================================================
use std::fmt;
use std::sync::Arc;

use tract_data::internal::*;

use super::BlockQuant;
use super::BlockQuantFact;

/// Concrete tensor storage for block-quantized weights.
///
/// Stores a single contiguous `Arc<Blob>` of quantized data along with the
/// block-quant format. Shape lives on the tensor, not here.
#[derive(Clone, PartialEq, Eq)]
pub struct BlockQuantStorage {
    format: Box<dyn BlockQuant>,
    data: Arc<Blob>,
}

impl BlockQuantStorage {
    fn expected_bytes(format: &dyn BlockQuant, m: usize, k: usize) -> usize {
        m * k / format.block_len() * format.block_bytes()
    }

    pub fn new(
        format: Box<dyn BlockQuant>,
        m: usize,
        k: usize,
        data: Arc<Blob>,
    ) -> TractResult<Self> {
        let expected = Self::expected_bytes(&*format, m, k);
        ensure!(
            data.len() == expected,
            "BlockQuantStorage::new: blob length {} does not match expected {} (m={}, k={}, format={})",
            data.len(),
            expected,
            m,
            k,
            format,
        );
        Ok(Self { format, data })
    }

    pub fn format(&self) -> &dyn BlockQuant {
        &*self.format
    }

    /// Returns the single contiguous blob.
    pub fn value(&self) -> &Arc<Blob> {
        &self.data
    }

    /// Converts this storage into a `Tensor` with the given shape.
    ///
    /// `dt` is the logical element type (e.g. f32, f16) — the type these
    /// weights represent when dequantized.
    pub fn into_tensor_with_shape(self, dt: DatumType, shape: &[usize]) -> Tensor {
        Tensor::from_storage(dt, shape, self)
    }
}

/// Returns a byte slice for a single group within contiguous block-quant data.
pub fn block_quant_slice<'a>(
    data: &'a [u8],
    format: &dyn BlockQuant,
    m_per_group: usize,
    k: usize,
    g: usize,
) -> &'a [u8] {
    let row_bytes = k / format.block_len() * format.block_bytes();
    let group_bytes = m_per_group * row_bytes;
    let start = g * group_bytes;
    &data[start..start + group_bytes]
}

impl fmt::Debug for BlockQuantStorage {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "BlockQuantStorage({}, bytes={})", self.format, self.data.len())
    }
}

impl fmt::Display for BlockQuantStorage {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "BlockQuantStorage({}, bytes={})", self.format, self.data.len())
    }
}

impl TensorStorage for BlockQuantStorage {
    fn byte_len(&self) -> usize {
        self.data.len()
    }

    fn is_empty(&self) -> bool {
        self.data.is_empty()
    }

    fn deep_clone(&self) -> Box<dyn TensorStorage> {
        Box::new(self.clone())
    }

    fn as_plain(&self) -> Option<&PlainStorage> {
        None
    }

    fn as_plain_mut(&mut self) -> Option<&mut PlainStorage> {
        None
    }

    fn into_plain(self: Box<Self>) -> Option<PlainStorage> {
        None
    }

    fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
        state.write_u8(1);
        self.format.dyn_hash(state);
        state.write(self.data.as_bytes());
    }

    fn exotic_fact(&self, shape: &[usize]) -> TractResult<Option<Box<dyn ExoticFact>>> {
        Ok(Some(Box::new(BlockQuantFact::new(dyn_clone::clone_box(&*self.format), shape.into()))))
    }
}


================================================
FILE: linalg/src/frame/block_quant/value.rs
================================================
use super::{BlockQuant, PackedBlockQuantFormat};
use tract_data::TVec;
use tract_data::internal::*;

#[allow(clippy::derived_hash_with_manual_eq)]
#[derive(Clone, Hash)]
pub struct BlockQuantFact {
    pub format: Box<dyn BlockQuant>,
    shape: TVec<usize>,
}
impl BlockQuantFact {
    pub fn new(format: Box<dyn BlockQuant>, shape: TVec<usize>) -> Self {
        Self { format, shape }
    }

    /// Product of all leading dims except the last two (M, K).
    /// For rank <= 2, returns 1.
    pub fn num_groups(&self) -> usize {
        if self.shape.len() <= 2 { 1 } else { self.shape[..self.shape.len() - 2].iter().product() }
    }

    /// Product of all dims except the last (K). This is the flat M
    /// dimension (groups * m_per_group).
    pub fn m(&self) -> usize {
        self.shape[..self.shape.len() - 1].iter().product()
    }

    /// Last dimension.
    pub fn k(&self) -> usize {
        *self.shape.last().unwrap()
    }

    pub fn shape(&self) -> &[usize] {
        &self.shape
    }
}

impl std::fmt::Debug for BlockQuantFact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}({:?})", self.format, self.shape)
    }
}

impl ExoticFact for BlockQuantFact {
    fn buffer_sizes(&self) -> TVec<TDim> {
        let total = self.m() * self.k() / self.format.block_len() * self.format.block_bytes();
        tvec!(total.to_dim())
    }
}

impl PartialEq for BlockQuantFact {
    fn eq(&self, other: &Self) -> bool {
        *self.format == *other.format && self.shape == other.shape
    }
}
impl Eq for BlockQuantFact {}

#[derive(Clone, Hash, PartialEq)]
pub struct PackedBlockQuantFact {
    pub format: PackedBlockQuantFormat,
    pub shape: TVec<usize>,
}
impl Eq for PackedBlockQuantFact {}

impl std::fmt::Debug for PackedBlockQuantFact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}({:?})", self.format, self.shape)
    }
}

impl ExoticFact for PackedBlockQuantFact {
    fn buffer_sizes(&self) -> TVec<TDim> {
        tvec!(
            (self.shape.iter().product::<usize>() / self.format.bq.block_len()
                * self.format.bq.block_bytes())
            .to_dim()
        )
    }
}


================================================
FILE: linalg/src/frame/by_scalar.rs
================================================
use std::fmt::Debug;
use std::marker::PhantomData;

use crate::element_wise::{ElementWise, ElementWiseKer};
use crate::element_wise_helper::map_slice_with_alignment;
use crate::{LADatum, LinalgFn};
use tract_data::internal::*;

/// Generic implementation struct that unify all by scalar kernels.
/// A by scalar operation is an ElementWise operation with a scalar paramerer.
#[derive(Debug, Clone, new)]
pub struct ByScalarImpl<K, T>
where
    T: LADatum,
    K: ByScalarKer<T> + Clone,
{
    phantom: PhantomData<(K, T)>,
}

impl<K, T> ElementWise<T, T> for ByScalarImpl<K, T>
where
    T: LADatum,
    K: ByScalarKer<T> + Clone,
{
    fn name(&self) -> &'static str {
        K::name()
    }
    fn run_with_params(&self, vec: &mut [T], params: T) -> TractResult<()> {
        map_slice_with_alignment(vec, |data| K::run(data, params), K::nr(), K::alignment_bytes())
    }
}

pub trait ByScalarKer<T>: ElementWiseKer<T, T>
where
    T: LADatum,
{
    fn bin() -> Box<LinalgFn> {
        Box::new(|a: &mut TensorView, b: &TensorView| {
            let a_slice = a.as_slice_mut()?;
            let b = b.as_slice()?[0];
            (Self::ew()).run_with_params(a_slice, b)
        })
    }
}

macro_rules! by_scalar_impl_wrap {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $params: ty, $run: item) => {
        paste! {
            ew_impl_wrap!($ti, $func, $nr, $alignment_items, $ti, $run);

            impl crate::frame::by_scalar::ByScalarKer<$ti> for $func {}
        }
    };
}

#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::LADatum;
    use crate::frame::element_wise::ElementWiseKer;
    use num_traits::{AsPrimitive, Float};
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! by_scalar_frame_tests {
        ($cond:expr, $t: ty, $ker:ty, $func:expr) => {
            pastey::paste! {
                proptest::proptest! {
                    #[test]
                    fn [<prop_ $ker:snake>](xs in proptest::collection::vec(-25f32..25.0, 0..100), scalar in -25f32..25f32) {
                        if $cond {
                            $crate::frame::by_scalar::test::test_by_scalar::<$ker, $t>(&*xs, scalar, $func).unwrap()
                        }
                    }
                }
            }
        };
    }

    pub fn test_by_scalar<K: ElementWiseKer<T, T>, T: LADatum + Float>(
        values: &[f32],
        scalar: f32,
        func: impl Fn(T, T) -> T,
    ) -> TestCaseResult
    where
        f32: AsPrimitive<T>,
    {
        crate::setup_test_logger();
        let values: Vec<T> = values.iter().copied().map(|x| x.as_()).collect();
        crate::frame::element_wise::test::test_element_wise_params::<K, T, _, T>(
            &values,
            |a| (func)(a, scalar.as_()),
            scalar.as_(),
        )
    }
}


================================================
FILE: linalg/src/frame/element_wise.rs
================================================
use std::fmt::Debug;
use std::marker::PhantomData;

use tract_data::TractResult;

use crate::LADatum;

use super::element_wise_helper::map_slice_with_alignment;

macro_rules! ew_impl_wrap {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $params: ty, $run: item) => {
        paste! {
            #[derive(Copy, Clone, Debug)]
            #[allow(non_camel_case_types)]
            pub struct $func;

            impl crate::frame::element_wise::ElementWiseKer<$ti, $params> for $func {
                #[inline(always)]
                fn name() -> &'static str {
                    stringify!($func)
                }
                #[inline(always)]
                fn nr() -> usize {
                    $nr
                }
                #[inline(always)]
                fn alignment_items() -> usize {
                    $alignment_items
                }
                $run
            }
        }
    };
}

macro_rules! ew_impl {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr) => {
        paste! {
            mod [<sys_ $func>] {
                #[allow(unused_imports)]
                use tract_data::prelude::f16;
                extern_kernel!(fn $func(ptr: *mut $ti, count: usize) -> ());
            }
            ew_impl_wrap!($ti, $func, $nr, $alignment_items, (),
                #[inline(never)]
                fn run(buf: &mut [$ti], _params: ()) {
                    unsafe { [<sys_ $func>]::$func(buf.as_mut_ptr(), buf.len()) }
                }
            );
        }
    };
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $params: ty) => {
        paste! {
            mod [<sys_ $func>] {
                #[allow(unused_imports)]
                use tract_data::prelude::f16;
                extern_kernel!(fn $func(ptr: *mut $ti, count: usize, params: $params) -> ());
            }
            ew_impl_wrap!($ti, $func, $nr, $alignment_items, $params,
                #[inline(never)]
                fn run(buf: &mut [$ti], params: $params) {
                    unsafe { [<sys_ $func>]::$func(buf.as_mut_ptr(), buf.len(), params) }
                }
            );
        }
    };
}

pub trait ElementWise<T, Params = ()>: Send + Sync + Debug + dyn_clone::DynClone
where
    Params: Copy + Send + Sync + Debug + 'static + Default,
    T: Copy + Debug + PartialEq + Send + Sync,
{
    fn name(&self) -> &'static str;
    fn run(&self, vec: &mut [T]) -> TractResult<()> {
        self.run_with_params(vec, Params::default())
    }
    fn run_with_params(&self, vec: &mut [T], params: Params) -> TractResult<()>;
}

dyn_clone::clone_trait_object!(<T, Params> ElementWise<T, Params> where T: Copy, Params: Copy);

#[derive(Debug, Clone, new)]
pub struct ElementWiseImpl<K, T, Params = ()>
where
    T: LADatum,
    Params: Copy + Send + Sync + Debug + 'static + Default,
    K: ElementWiseKer<T, Params> + Clone,
{
    phantom: PhantomData<(K, T, Params)>,
}

impl<K, T, Params> ElementWise<T, Params> for ElementWiseImpl<K, T, Params>
where
    T: LADatum,
    Params: Copy + Send + Sync + Debug + 'static + Default,
    K: ElementWiseKer<T, Params> + Clone,
{
    fn name(&self) -> &'static str {
        K::name()
    }
    fn run_with_params(&self, vec: &mut [T], params: Params) -> TractResult<()> {
        map_slice_with_alignment(vec, |data| K::run(data, params), K::nr(), K::alignment_bytes())
    }
}

pub trait ElementWiseKer<T, Params = ()>:
    Send + Sync + Debug + dyn_clone::DynClone + Clone + 'static
where
    Params: Copy + Send + Sync + Debug + 'static + Default,
    T: LADatum,
{
    fn name() -> &'static str;
    fn alignment_bytes() -> usize {
        Self::alignment_items() * T::datum_type().size_of()
    }
    fn alignment_items() -> usize;
    fn nr() -> usize;
    fn run(vec: &mut [T], params: Params);
    fn ew() -> Box<dyn ElementWise<T, Params>> {
        Box::new(ElementWiseImpl::<Self, T, Params>::new())
    }
}

#[cfg(test)]
pub mod test {
    use crate::{LADatum, frame::element_wise::*};
    use proptest::test_runner::{TestCaseError, TestCaseResult};
    use tract_data::internal::*;

    pub fn test_element_wise<K: ElementWiseKer<T, ()>, T: LADatum, F: Fn(T) -> T>(
        values: &[T],
        reference: F,
    ) -> TestCaseResult {
        test_element_wise_params::<K, T, F, ()>(values, reference, ())
    }

    pub fn test_element_wise_params<
        K: ElementWiseKer<T, Params>,
        T: LADatum,
        F: Fn(T) -> T,
        Params,
    >(
        values: &[T],
        reference: F,
        params: Params,
    ) -> TestCaseResult
    where
        Params: Copy + Send + Sync + Debug + 'static + Default,
    {
        crate::setup_test_logger();
        let op = ElementWiseImpl::<K, T, Params>::new();
        let mut values = values.to_vec();
        while values.len() < K::nr() {
            values.push(T::zero());
        }
        let expected = values.iter().copied().map(reference).collect::<Vec<_>>();
        let mut found = values;
        op.run_with_params(&mut found, params).unwrap();
        tensor1(&found)
            .close_enough(&tensor1(&expected), true)
            .map_err(|e| TestCaseError::fail(e.root_cause().to_string()))?;
        Ok(())
    }
}


================================================
FILE: linalg/src/frame/element_wise_helper.rs
================================================
use crate::LADatum;
use std::alloc::*;
use tract_data::TractResult;

pub(crate) fn map_slice_with_alignment<T>(
    vec: &mut [T],
    f: impl Fn(&mut [T]),
    nr: usize,
    alignment_bytes: usize,
) -> TractResult<()>
where
    T: LADatum,
{
    if vec.is_empty() {
        return Ok(());
    }
    unsafe {
        TMP.with(|buffer| {
            let mut buffer = buffer.borrow_mut();
            buffer.ensure(nr * T::datum_type().size_of(), alignment_bytes);
            let tmp = std::slice::from_raw_parts_mut(buffer.buffer as *mut T, nr);
            let mut compute_via_temp_buffer = |slice: &mut [T]| {
                tmp[..slice.len()].copy_from_slice(slice);
                f(tmp);
                slice.copy_from_slice(&tmp[..slice.len()])
            };
            let prefix_len = vec.as_ptr().align_offset(alignment_bytes).min(vec.len());
            if prefix_len > 0 {
                compute_via_temp_buffer(&mut vec[..prefix_len]);
            }
            let aligned_len = (vec.len() - prefix_len) / nr * nr;
            if aligned_len > 0 {
                f(&mut vec[prefix_len..][..aligned_len]);
            }
            if prefix_len + aligned_len < vec.len() {
                compute_via_temp_buffer(&mut vec[prefix_len + aligned_len..]);
            }
        })
    }
    Ok(())
}

pub(crate) fn reduce_slice_with_alignment<T>(
    vec: &[T],
    f: impl Fn(&[T]) -> T,
    nr: usize,
    alignment_bytes: usize,
    neutral: T,
    reduce: impl Fn(T, T) -> T,
) -> TractResult<T>
where
    T: LADatum,
{
    if vec.is_empty() {
        return Ok(neutral);
    }
    let mut red = neutral;
    unsafe {
        TMP.with(|buffer| {
            let mut buffer = buffer.borrow_mut();
            buffer.ensure(nr * T::datum_type().size_of(), alignment_bytes);
            let tmp = std::slice::from_raw_parts_mut(buffer.buffer as *mut T, nr);
            let mut compute_via_temp_buffer = |slice: &[T], red: &mut T| {
                tmp[..slice.len()].copy_from_slice(slice);
                tmp[slice.len()..].fill(neutral);
                *red = reduce(*red, f(tmp));
            };
            let prefix_len = vec.as_ptr().align_offset(alignment_bytes).min(vec.len());
            if prefix_len > 0 {
                compute_via_temp_buffer(&vec[..prefix_len], &mut red);
            }
            let aligned_len = (vec.len() - prefix_len) / nr * nr;
            if aligned_len > 0 {
                let t = f(&vec[prefix_len..][..aligned_len]);
                red = reduce(red, t);
            }
            if prefix_len + aligned_len < vec.len() {
                compute_via_temp_buffer(&vec[prefix_len + aligned_len..], &mut red);
            }
        })
    }
    Ok(red)
}

pub(crate) fn map_reduce_slice_with_alignment<T>(
    vec: &mut [T],
    f: impl Fn(&mut [T]) -> T,
    nr: usize,
    alignment_bytes: usize,
    map_neutral: T,
    neutral: T,
    reduce: impl Fn(T, T) -> T,
) -> TractResult<T>
where
    T: LADatum,
{
    if vec.is_empty() {
        return Ok(neutral);
    }
    let mut red = neutral;
    unsafe {
        TMP.with(|buffer| {
            let mut buffer = buffer.borrow_mut();
            buffer.ensure(nr * T::datum_type().size_of(), alignment_bytes);
            let tmp = std::slice::from_raw_parts_mut(buffer.buffer as *mut T, nr);
            let mut compute_via_temp_buffer = |slice: &mut [T], red: &mut T| {
                tmp[..slice.len()].copy_from_slice(slice);
                tmp[slice.len()..].fill(map_neutral);
                *red = reduce(*red, f(tmp));
                slice.copy_from_slice(&tmp[..slice.len()]);
            };
            let prefix_len = vec.as_ptr().align_offset(alignment_bytes).min(vec.len());
            if prefix_len > 0 {
                compute_via_temp_buffer(&mut vec[..prefix_len], &mut red);
            }
            let aligned_len = (vec.len() - prefix_len) / nr * nr;
            if aligned_len > 0 {
                let t = f(&mut vec[prefix_len..][..aligned_len]);
                red = reduce(red, t);
            }
            if prefix_len + aligned_len < vec.len() {
                compute_via_temp_buffer(&mut vec[prefix_len + aligned_len..], &mut red);
            }
        })
    }
    Ok(red)
}

std::thread_local! {
    static TMP: std::cell::RefCell<TempBuffer> = std::cell::RefCell::new(TempBuffer::default());
}

pub struct TempBuffer {
    pub layout: Layout,
    pub buffer: *mut u8,
}

impl Default for TempBuffer {
    fn default() -> Self {
        TempBuffer { layout: Layout::new::<()>(), buffer: std::ptr::null_mut() }
    }
}

impl TempBuffer {
    pub fn ensure(&mut self, size: usize, alignment: usize) {
        unsafe {
            if size > self.layout.size() || alignment > self.layout.align() {
                let size = size.max(self.layout.size());
                let alignment = alignment.max(self.layout.align());
                if !self.buffer.is_null() {
                    std::alloc::dealloc(self.buffer, self.layout);
                }
                self.layout = Layout::from_size_align_unchecked(size, alignment);
                self.buffer = std::alloc::alloc(self.layout);
                assert!(!self.buffer.is_null());
            }
        }
    }
}

impl Drop for TempBuffer {
    fn drop(&mut self) {
        unsafe {
            if !self.buffer.is_null() {
                std::alloc::dealloc(self.buffer, self.layout);
            }
        }
    }
}


================================================
FILE: linalg/src/frame/leaky_relu.rs
================================================
#[allow(unused_macros)]
macro_rules! leaky_relu_impl {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $cond: expr) => {
        ew_impl!($ti, $func, $nr, $alignment_items, $ti);
        #[cfg(test)]
        paste! {
            mod [<test_ $func>] {
                use super::*;
                leaky_relu_frame_tests!($cond, $ti, $func);
            }
        }
    };
}

#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::{LADatum, frame::element_wise::*};
    use num_traits::{AsPrimitive, Float};
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! leaky_relu_frame_tests {
        ($cond:expr, $t: ty, $ker:ty) => {
            proptest::proptest! {
                #[test]
                fn prop(xs in proptest::collection::vec(-25f32..25.0, 0..100), alpha in 0f32..1f32) {
                    if $cond {
                        $crate::frame::leaky_relu::test::test_leaky_relu::<$ker, $t>(&*xs, alpha).unwrap()
                    }
                }
            }
            #[test]
            fn trivial() {
                if $cond {
                    $crate::frame::leaky_relu::test::test_leaky_relu::<$ker, $t>(&[-10f32], 0.0496).unwrap();
                }
            }
        };
    }

    pub fn test_leaky_relu<K: ElementWiseKer<T, T>, T: LADatum + Float>(
        values: &[f32],
        alpha: f32,
    ) -> TestCaseResult
    where
        f32: AsPrimitive<T>,
    {
        let data = tract_data::prelude::tensor1(values);
        let data = data.cast_to::<T>().unwrap();
        let data = data.try_as_plain().unwrap().as_slice::<T>().unwrap();
        let alpha: T = tract_data::prelude::tensor0(alpha).cast_to_scalar::<T>().unwrap();
        crate::frame::element_wise::test::test_element_wise_params::<K, T, _, T>(
            data,
            |x: T| {
                if x > T::zero() { x } else { alpha * x }
            },
            alpha,
        )
    }
}


================================================
FILE: linalg/src/frame/lut.rs
================================================
use std::fmt;
use std::hash::Hash;
use std::marker::PhantomData;
use tract_data::internal::*;

pub trait Lut: fmt::Debug + dyn_clone::DynClone + Send + Sync {
    fn table(&self) -> &[u8];
    fn run(&self, buf: &mut [u8]);
}

dyn_clone::clone_trait_object!(Lut);

impl PartialEq for dyn Lut {
    fn eq(&self, other: &Self) -> bool {
        self.table() == other.table()
    }
}
impl Eq for dyn Lut {}

#[derive(Debug, Clone, Hash)]
pub struct LutImpl<K: LutKer> {
    table: Tensor,
    _boo: PhantomData<K>,
}

impl<K: LutKer> LutImpl<K> {
    pub fn new(table: &[u8]) -> LutImpl<K> {
        unsafe {
            LutImpl {
                table: Tensor::from_raw_aligned::<u8>(
                    &[table.len()],
                    table,
                    K::table_alignment_bytes(),
                )
                .unwrap(),
                _boo: PhantomData,
            }
        }
    }
}

impl<K: LutKer> Lut for LutImpl<K> {
    fn table(&self) -> &[u8] {
        self.table.try_as_plain().unwrap().as_slice().unwrap()
    }

    fn run(&self, buf: &mut [u8]) {
        unsafe {
            let table: *const u8 = self.table.as_ptr_unchecked();
            let align = K::input_alignment_bytes();
            let aligned_start = (buf.as_ptr() as usize).next_multiple_of(align);
            let prefix = (aligned_start - buf.as_ptr() as usize).min(buf.len());
            for i in 0..(prefix as isize) {
                let ptr = buf.as_mut_ptr().offset(i);
                *ptr = *table.offset(*ptr as isize);
            }
            let remaining = buf.len() - prefix;
            if remaining == 0 {
                return;
            }
            let n = K::n();
            let aligned_len = remaining / n * n;
            if aligned_len > 0 {
                K::run(buf.as_mut_ptr().add(prefix), aligned_len, table);
            }
            let remaining = buf.len() - aligned_len - prefix;
            for i in 0..remaining {
                let ptr = buf.as_mut_ptr().add(i + prefix + aligned_len);
                *ptr = *table.offset(*ptr as isize);
            }
        }
    }
}

pub trait LutKer: Clone + fmt::Debug + Send + Sync + Hash {
    fn name() -> &'static str;
    fn n() -> usize;
    fn input_alignment_bytes() -> usize;
    fn table_alignment_bytes() -> usize;
    unsafe fn run(buf: *mut u8, len: usize, table: *const u8);
}

#[cfg(test)]
#[macro_use]
pub mod test {
    use super::*;
    use proptest::prelude::*;

    #[derive(Debug)]
    pub struct LutProblem {
        pub table: Vec<u8>,
        pub data: Vec<u8>,
    }

    impl Arbitrary for LutProblem {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_p: ()) -> Self::Strategy {
            proptest::collection::vec(any::<u8>(), 1..256)
                .prop_flat_map(|table| {
                    let data = proptest::collection::vec(0..table.len() as u8, 0..100);
                    (Just(table), data)
                })
                .prop_map(|(table, data)| LutProblem { table, data })
                .boxed()
        }
    }

    impl LutProblem {
        pub fn reference(&self) -> Vec<u8> {
            self.data.iter().map(|x| self.table[*x as usize]).collect()
        }

        pub fn test<K: LutKer>(&self) -> Vec<u8> {
            let lut = LutImpl::<K>::new(&self.table);
            let mut data = self.data.clone();
            lut.run(&mut data);
            data
        }
    }

    #[macro_export]
    macro_rules! lut_frame_tests {
        ($cond:expr, $ker:ty) => {
            mod lut {
                use proptest::prelude::*;
                #[allow(unused_imports)]
                use $crate::frame::lut::test::*;

                proptest::proptest! {
                    #[test]
                    fn lut_prop(pb in any::<LutProblem>()) {
                        if $cond {
                            prop_assert_eq!(pb.test::<$ker>(), pb.reference())
                        }
                    }
                }

                #[test]
                fn test_empty() {
                    let pb = LutProblem { table: vec![0], data: vec![] };
                    assert_eq!(pb.test::<$ker>(), pb.reference())
                }
            }
        };
    }
}


================================================
FILE: linalg/src/frame/mmm/cost_model.rs
================================================
use tract_data::internal::*;
use tract_data::itertools::{Itertools, izip};

use super::MatMatMul;

fn order_f<F: tract_num_traits::Float>(&a: &F, &b: &F) -> std::cmp::Ordering {
    if a < b { std::cmp::Ordering::Less } else { std::cmp::Ordering::Greater }
}

#[derive(Debug)]
pub struct CostModel<'a> {
    pub big_product_mkn_threshold: f32,
    pub big_product_kernel_choice: &'a str,
    pub kernels: &'a [&'a str],
    pub mrs: &'a [u32],
    pub nrs: &'a [u32],
    pub feat_norm_mean: &'a [f32],
    pub feat_norm_stddev: &'a [f32],
    pub w1: &'a [f32],
    pub b1: &'a [f32],
    pub w2: &'a [f32],
    pub b2: &'a [f32],
}

impl CostModel<'_> {
    pub fn features(&self, m: usize, k: usize, n: usize) -> Vec<f32> {
        let mut feat = vec![
            (m as f32).ln(),
            (k as f32).ln(),
            (n as f32).ln(),
            (n as f32 * m as f32 * k as f32).ln(),
        ];
        for &mr in self.mrs {
            let mr = mr as usize;
            feat.push((m % mr) as f32);
            feat.push((m % mr != 0) as usize as f32);
        }
        for &nr in self.nrs {
            let nr = nr as usize;
            feat.push((n % nr) as f32);
            feat.push((n % nr != 0) as usize as f32);
        }
        feat
    }

    fn normalize(&self, feat: &mut [f32]) {
        izip!(feat, self.feat_norm_mean, self.feat_norm_stddev)
            .for_each(|(x, m, s)| *x = (*x - m) / s)
    }

    fn dnn(x: &[f32], w: &[f32], b: &[f32]) -> Vec<f32> {
        let x = tract_ndarray::Array1::from_vec(x.to_vec());
        let w = tract_ndarray::Array2::from_shape_vec([b.len(), x.len()], w.to_vec()).unwrap();
        let b = tract_ndarray::Array1::from_vec(b.to_vec());
        (w.dot(&x) + b).to_vec()
    }

    pub fn predict(&self, m: usize, k: usize, n: usize) -> &str {
        let mut x = self.features(m, k, n);
        self.normalize(&mut x);
        let mut hidden = Self::dnn(&x, self.w1, self.b1);
        (crate::generic().tanh_f32)().run(&mut hidden).unwrap();
        let output = Self::dnn(&hidden, self.w2, self.b2);
        let ix = output.iter().copied().position_max_by(order_f).unwrap();
        self.kernels[ix]
    }

    pub fn pick(
        &self,
        impls: &[Box<dyn MatMatMul>],
        m: Option<usize>,
        k: Option<usize>,
        n: Option<usize>,
    ) -> Box<dyn MatMatMul> {
        if let (Some(m), Some(k), Some(n)) = (m, k, n) {
            let choice = self.predict(m, k, n);
            impls.iter().find(|k| k.name() == choice).unwrap().clone()
        } else {
            impls.iter().find(|k| k.name() == self.big_product_kernel_choice).unwrap().clone()
        }
    }
}


================================================
FILE: linalg/src/frame/mmm/fuse.rs
================================================
use std::fmt::Debug;
use std::ops::Deref;

use crate::BinOp;
use crate::pack::PackedFormat;

use super::{MMMInputValue, OutputStore, OutputStoreKer};
use tract_data::internal::*;

#[repr(usize)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum RoundingPolicy {
    Native,
    Zero,
    Away,
    MinusInf,
    PlusInf,
    Even,
    Odd,
}

#[derive(Clone, Debug)]
pub enum AsInputValue<'t> {
    Owned(Box<dyn MMMInputValue>),
    Borrowed(&'t dyn MMMInputValue),
}

impl Deref for AsInputValue<'_> {
    type Target = dyn MMMInputValue;
    fn deref(&self) -> &Self::Target {
        match self {
            AsInputValue::Owned(b) => &**b,
            AsInputValue::Borrowed(r) => *r,
        }
    }
}

#[derive(Clone, Debug)]
pub enum FusedSpec<'t> {
    BinScalar(&'t Tensor, BinOp),
    BinPerRow(TensorView<'t>, BinOp),
    BinPerCol(TensorView<'t>, BinOp),
    AddRowColProducts(&'t Tensor, &'t Tensor),
    AddUnicast(OutputStore),
    LeakyRelu(&'t Tensor),
    QScale(isize, RoundingPolicy, i32),
    RoundingShiftRight(usize, RoundingPolicy),
    ShiftLeft(usize),
    Store(OutputStore),
    AddMatMul { a: AsInputValue<'t>, b: AsInputValue<'t>, packing: usize },
}

impl FusedSpec<'_> {
    pub fn prefer_col_outer(&self) -> Option<bool> {
        if let FusedSpec::AddMatMul { a, b, .. } = self {
            let a_is_eager = a.format().is::<PackedFormat>();
            let b_is_eager = b.format().is::<PackedFormat>();
            if a_is_eager == b_is_eager { None } else { Some(a_is_eager) }
        } else {
            None
        }
    }
}

// Careful here, the jump_to comments are used by the build script.
#[repr(C, usize)]
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
#[rustfmt::skip]
pub enum FusedKerSpec<TI: Copy> {
    Done,                                       // jump_to:done
    Clear,                                      // jump_to:clear
                                                //
    LoadTile(*const TI, *const TI),             // jump_to:load_tile

    ScalarMin(TI),                              // jump_to:scalar_min
    ScalarMax(TI),                              // jump_to:scalar_max
    ScalarAdd(TI),                              // jump_to:scalar_add
    ScalarMul(TI),                              // jump_to:scalar_mul
    ScalarSub(TI),                              // jump_to:scalar_sub
    ScalarSubF(TI),                             // jump_to:scalar_sub_flipped

    LeakyRelu(TI),                              // jump_to:leaky_relu

    PerRowMin(*const TI),                       // jump_to:per_row_min
    PerRowMax(*const TI),                       // jump_to:per_row_max
    PerRowAdd(*const TI),                       // jump_to:per_row_add
    PerRowMul(*const TI),                       // jump_to:per_row_mul
    PerRowSub(*const TI),                       // jump_to:per_row_sub
    PerRowSubF(*const TI),                      // jump_to:per_row_sub_flipped

    PerColMin(*const TI),                       // jump_to:per_col_min
    PerColMax(*const TI),                       // jump_to:per_col_max
    PerColAdd(*const TI),                       // jump_to:per_col_add
    PerColMul(*const TI),                       // jump_to:per_col_mul
    PerColSub(*const TI),                       // jump_to:per_col_sub
    PerColSubF(*const TI),                      // jump_to:per_col_sub_flipped

    QScale(isize, RoundingPolicy, i32),         // jump_to:q_scale
    RoundingShiftRight(usize, RoundingPolicy),  // jump_to:q_shr
    ShiftLeft(usize),                           // jump_to:q_shl
    AddUnicast(OutputStoreKer),                 // jump_to:add_unicast
    AddRowColProducts(*const TI, *const TI),    // jump_to:add_row_col_products
    Store(OutputStoreKer),                      // jump_to:store

    // jump_to:add_mat_mul
    AddMatMul { k: usize, pa: *const u8, pb: *const u8, packing: usize },
}

unsafe impl<TI: Copy> Send for FusedKerSpec<TI> {}
unsafe impl<TI: Copy> Sync for FusedKerSpec<TI> {}

#[cfg(test)]
#[test]
fn check_non_linear_enum_size() {
    assert_eq!(std::mem::size_of::<RoundingPolicy>(), std::mem::size_of::<usize>());
    assert_eq!(
        std::mem::size_of::<FusedKerSpec<f32>>(),
        std::mem::size_of::<usize>() + std::mem::size_of::<OutputStoreKer>()
    );
    assert_eq!(std::mem::size_of::<FusedKerSpec<f32>>(), 5 * std::mem::size_of::<usize>());
}


================================================
FILE: linalg/src/frame/mmm/input_store.rs
================================================
use downcast_rs::{Downcast, impl_downcast};
use dyn_clone::DynClone;
use dyn_eq::DynEq;
use dyn_hash::DynHash;
use std::alloc::Layout;
use std::fmt::{Debug, Display};
use std::hash::Hash;
use std::sync::Arc;
use tract_data::internal::*;

use crate::WeightType;

pub trait MMMInputFormat:
    Downcast + Debug + DynHash + dyn_eq::DynEq + DynClone + Send + Sync + Display
{
    fn prepare_tensor(&self, t: &Tensor, k_axis: usize, mn_axis: usize) -> TractResult<Tensor>;
    fn prepare_one(
        &self,
        t: &Tensor,
        k_axis: usize,
        mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>>;
    fn precursor(&self) -> WeightType;
    fn r(&self) -> usize;
    fn k_alignment(&self) -> usize;
    fn merge_with<'o, 'a: 'o, 'b: 'o>(
        &'a self,
        other: &'b dyn MMMInputFormat,
    ) -> Option<&'o dyn MMMInputFormat> {
        if self.dyn_eq(other) { Some(other) } else { None }
    }
    fn mem_size(&self, k: TDim, mn: TDim) -> TDim;
    fn extract_at_mn_f16(
        &self,
        data: &EagerPackedInput,
        mn: usize,
        slice: &mut [f16],
    ) -> TractResult<()>;
    fn extract_at_mn_f32(
        &self,
        data: &EagerPackedInput,
        mn: usize,
        slice: &mut [f32],
    ) -> TractResult<()>;
}

dyn_clone::clone_trait_object!(MMMInputFormat);
impl_downcast!(MMMInputFormat);
dyn_hash::hash_trait_object!(MMMInputFormat);
dyn_eq::eq_trait_object!(MMMInputFormat);

pub trait MMMInputValue:
    DynClone + Debug + DynHash + dyn_eq::DynEq + Send + Sync + Display + Downcast
{
    fn format(&self) -> &dyn MMMInputFormat;
    fn scratch_panel_buffer_layout(&self) -> Option<Layout>;
    fn panel_bytes(&self, i: usize, buffer: Option<*mut u8>) -> TractResult<*const u8>;
    fn panels_count(&self) -> usize {
        self.mn().divceil(self.format().r())
    }
    fn mn(&self) -> usize;
    fn k(&self) -> usize;
    fn exotic_fact(&self) -> &dyn ExoticFact;

    fn extract_at_mn_f16(&self, mn: usize, slice: &mut [f16]) -> TractResult<()>;
    fn extract_at_mn_f32(&self, mn: usize, slice: &mut [f32]) -> TractResult<()>;
}
dyn_clone::clone_trait_object!(MMMInputValue);
impl_downcast!(MMMInputValue);
dyn_hash::hash_trait_object!(MMMInputValue);
dyn_eq::eq_trait_object!(MMMInputValue);

#[allow(clippy::derived_hash_with_manual_eq)]
#[derive(Clone, Hash, Debug)]
pub struct PackedExoticFact {
    pub format: Box<dyn MMMInputFormat>,
    pub mn: TDim,
    pub k: usize,
}

impl Display for PackedExoticFact {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Eager {} tensor (mn={} k={})", self.format, self.mn, self.k)
    }
}

impl ExoticFact for PackedExoticFact {
    fn buffer_sizes(&self) -> TVec<TDim> {
        tvec!(self.format.mem_size(self.k.to_dim(), self.mn.clone()))
    }
}

impl PartialEq for PackedExoticFact {
    fn eq(&self, other: &Self) -> bool {
        self.format == other.format && self.mn == other.mn && self.k == other.k
    }
}
impl Eq for PackedExoticFact {}

#[derive(Clone, Hash, PartialEq, Eq)]
pub struct EagerPackedInput {
    pub fact: PackedExoticFact,
    pub packed: Arc<Blob>,
    pub panel_bytes: usize,
    pub mn: usize,
}

impl MMMInputValue for EagerPackedInput {
    fn scratch_panel_buffer_layout(&self) -> Option<Layout> {
        None
    }
    fn panel_bytes(&self, i: usize, _buffer: Option<*mut u8>) -> TractResult<*const u8> {
        unsafe { Ok(self.packed.as_ptr().add(i * self.panel_bytes)) }
    }
    fn k(&self) -> usize {
        self.fact.k
    }
    fn mn(&self) -> usize {
        self.mn
    }
    fn format(&self) -> &dyn MMMInputFormat {
        &*self.fact.format
    }
    fn exotic_fact(&self) -> &dyn ExoticFact {
        &self.fact
    }
    fn extract_at_mn_f16(&self, mn: usize, slice: &mut [f16]) -> TractResult<()> {
        ensure!(slice.len() == self.k());
        ensure!(mn < self.mn());
        self.fact.format.extract_at_mn_f16(self, mn, slice)
    }
    fn extract_at_mn_f32(&self, mn: usize, slice: &mut [f32]) -> TractResult<()> {
        ensure!(slice.len() == self.k());
        ensure!(mn < self.mn());
        self.fact.format.extract_at_mn_f32(self, mn, slice)
    }
}

impl Display for EagerPackedInput {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        (&self.fact as &dyn Display).fmt(f)
    }
}

impl Debug for EagerPackedInput {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        <Self as Display>::fmt(self, f)
    }
}


================================================
FILE: linalg/src/frame/mmm/kernel.rs
================================================
use crate::frame::pack::PackedFormat;

use super::*;
use std::borrow::Cow;
use std::fmt::Debug;

use crate::LADatum;

pub trait MatMatMulKer: Clone + Debug + Send + Sync + 'static {
    type Acc: LADatum;
    fn name(&self) -> &str;
    fn kernel(&self, op: &[FusedKerSpec<Self::Acc>]) -> isize;
    fn mr(&self) -> usize;
    fn nr(&self) -> usize;

    fn quality(&self) -> ImplementationQuality;
    fn dynamic_boost(&self) -> isize;

    #[allow(clippy::type_complexity)]
    fn packings(&self) -> &[(Box<dyn MMMInputFormat>, Box<dyn MMMInputFormat>)];
    fn stores(&self) -> Cow<'_, [DatumType]>;

    #[allow(unused_variables)]
    fn can_fuse(&self, spec: &FusedSpec) -> bool {
        true
    }

    #[allow(unused_variables)]
    fn is_supported_here(&self) -> bool {
        true
    }
}

type Kernel<Acc> = unsafe fn(&[FusedKerSpec<Acc>]) -> isize;

#[derive(Clone)]
pub struct DynKernel<const MR: usize, const NR: usize, Acc: LADatum> {
    pub name: String,
    pub kernel: Kernel<Acc>,
    pub quality: ImplementationQuality,
    pub packings: Vec<(Box<dyn MMMInputFormat>, Box<dyn MMMInputFormat>)>,
    pub stores: Vec<DatumType>,
    pub supported_predicate: fn() -> bool,
    pub boost: fn() -> isize,
    pub can_fuse: fn(&FusedSpec) -> bool,
}

impl<const MR: usize, const NR: usize, Acc: LADatum> DynKernel<MR, NR, Acc> {
    pub fn new(
        name: &str,
        kernel: Kernel<Acc>,
        packing_a: PackedFormat,
        packing_b: PackedFormat,
        quality: ImplementationQuality,
    ) -> Self {
        let kernel = DynKernel {
            name: name.to_string(),
            kernel,
            quality,
            packings: vec![],
            stores: vec![Acc::datum_type()],
            supported_predicate: || true,
            boost: || 0,
            can_fuse: |_| true,
        };
        kernel.with_packing(packing_a, packing_b)
    }

    pub fn with_platform_condition(mut self, f: fn() -> bool) -> Self {
        self.supported_predicate = f;
        self
    }

    pub fn with_boost(mut self, f: fn() -> isize) -> Self {
        self.boost = f;
        self
    }

    pub fn with_packing(mut self, a: impl MMMInputFormat, b: impl MMMInputFormat) -> Self {
        self.packings.push((Box::new(a), Box::new(b)));
        self
    }

    pub fn with_packing_a(self, a: impl MMMInputFormat) -> Self {
        let b = self.regular_pack_b();
        self.with_packing(a, b)
    }

    pub fn regular_pack_a(&self) -> PackedFormat {
        *self.packings[0].0.clone().downcast::<PackedFormat>().unwrap()
    }

    pub fn regular_pack_b(&self) -> PackedFormat {
        *self.packings[0].1.clone().downcast::<PackedFormat>().unwrap()
    }

    pub fn with_can_fuse(self, can_fuse: fn(&FusedSpec) -> bool) -> Self {
        Self { can_fuse, ..self }
    }

    pub fn with_store<D: LADatum>(mut self) -> Self {
        self.stores.push(D::datum_type());
        self
    }

    pub fn mmm(&self) -> Box<dyn MatMatMul> {
        Box::new(self.clone())
    }
}

impl<const MR: usize, const NR: usize, Acc: LADatum> Debug for DynKernel<MR, NR, Acc> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.name)
    }
}

impl<const MR: usize, const NR: usize, Acc: LADatum> MatMatMulKer for DynKernel<MR, NR, Acc> {
    type Acc = Acc;
    fn name(&self) -> &str {
        &self.name
    }

    fn mr(&self) -> usize {
        MR
    }

    fn nr(&self) -> usize {
        NR
    }

    fn quality(&self) -> ImplementationQuality {
        self.quality
    }

    fn is_supported_here(&self) -> bool {
        (self.supported_predicate)()
    }

    fn can_fuse(&self, spec: &FusedSpec) -> bool {
        (self.can_fuse)(spec)
    }

    fn kernel(&self, op: &[FusedKerSpec<Self::Acc>]) -> isize {
        unsafe { (self.kernel)(op) }
    }

    #[allow(clippy::type_complexity)]
    fn packings(&self) -> &[(Box<dyn MMMInputFormat>, Box<dyn MMMInputFormat>)] {
        &self.packings
    }

    fn stores(&self) -> Cow<'_, [DatumType]> {
        Cow::Borrowed(&self.stores)
    }

    fn dynamic_boost(&self) -> isize {
        (self.boost)()
    }
}


================================================
FILE: linalg/src/frame/mmm/macros.rs
================================================
macro_rules! MMMExternKernel {
    (
            $func:ident<$ti:ident>($mr: expr, $nr: expr)
            $(@($align_a:expr, $align_b:expr))?
            $(where($where:expr))?
            $(can_fuse($can_fuse:expr))?
            $(packing[$pnum:literal] = $pid:ident => $packing:expr;)*
            $(quality($quality:expr))?
            $(boost($boost:expr))?
            $(store($($store:ty),*))?
     ) => {
        paste! {
            mod [<sys_ $func>] {
                #[allow(unused_imports)]
                use super::*;
                #[allow(unused_imports)]
                use crate::frame::mmm::*;
                extern_kernel!(fn $func(op: *const FusedKerSpec<$ti>) -> isize);

                #[inline]
                pub unsafe fn rusty(op: &[FusedKerSpec<$ti>]) -> isize {
                    unsafe { $func(op.as_ptr()) }
                }
            }

            MMMKernel!([<sys_$func>]::rusty as $func<$ti>($mr, $nr)
                $(@($align_a, $align_b))?
                $(where($where))?
                $(can_fuse($can_fuse))?
                $(packing[$pnum] = $pid => $packing;)*
                $(quality($quality))?
                $(boost($boost))?
                $(store($($store),*))?
            );
        }
    };
}
macro_rules! MMMRustKernel {
    (       $func: path =>
            $id:ident<$ti:ident>($mr: expr, $nr: expr)
            $(@($align_a:expr, $align_b:expr))?
            $(where($where:expr))?
            $(can_fuse($can_fuse:expr))?
            $(packing[$pnum:literal] = $pid:ident => $packing:expr;)*
            $(quality($quality:expr))?
            $(store($($store:ty),*))?
     ) => {
        paste! {
            mod [<sys_ $id>] {
                #[allow(unused_imports)]
                use crate::frame::mmm::*;
                use super::*;
                #[inline]
                pub unsafe fn rusty(op: &[FusedKerSpec<$ti>]) -> isize {
                    unsafe { $func(op.as_ptr()) }
                }
            }
            MMMKernel!([<sys_$id>]::rusty as $id<$ti>($mr, $nr)
                $(@($align_a, $align_b))?
                generic(true)
                $(where($where))?
                $(can_fuse($can_fuse))?
                $(packing[$pnum] = $pid => $packing;)*
                $(quality($quality))?
                $(store($($store),*))?
            );
        }
    }
}

macro_rules! MMMKernel {
    (
            $func: path as
            $id:ident<$ti:ident>($mr: expr, $nr: expr)
            $(@($align_a:expr, $align_b:expr))?
            $(generic($generic:expr))?
            $(where($where:expr))?
            $(can_fuse($can_fuse:expr))?
            $(packing[$pnum:literal] = $pid:ident => $packing:expr;)*
            $(quality($quality:expr))?
            $(boost($boost:expr))?
            $(store($($store:ty),*))?
     ) => {
        paste! {
            lazy_static::lazy_static! {
                pub static ref $id: $crate::mmm::DynKernel<$mr, $nr, $ti> = {
                    use $crate::mmm::DynKernel;
                    #[allow(unused_imports)]
                    use tract_data::prelude::*;
                    use $crate::pack::Packing;
                    #[allow(unused_mut)]
                    let (mut packing_a, mut packing_b) = ($ti::packing($mr), $ti::packing($nr));
                    $(
                        packing_a = packing_a.align($align_a);
                        packing_b = packing_b.align($align_b);
                    )?
                    #[allow(unused_mut)]
                    let mut k = DynKernel::<$mr, $nr, $ti>::new(stringify!($id), $func, packing_a, packing_b, $crate::frame::mmm::ImplementationQuality::Dreadful);
                    $(k = k.with_platform_condition($where);)?
                    $(
                        assert!(k.packings.len() == $pnum);
                        let f: fn(DynKernel<$mr, $nr, $ti>) -> DynKernel<$mr, $nr, $ti> = $packing;
                        k = f(k);
                    )*
                    $($(
                        k.stores.push(<$store>::datum_type());
                    )*)?
                    $(k.can_fuse = $can_fuse;)?
                    $(k.quality = $quality;)?
                    $(k = k.with_boost($boost);)?
                    k
                };
            }

            #[cfg(test)]
            mod [<test_$id>] {
                use super::$id;
                test_mmm_kernel!($ti, &*super::$id);
                $(mmm_packed_packed_tests!(&*super::$id, $pid : $pnum);)*
                $($(mmm_store_test!(&*super::$id, $store);)*)?
            }
        }
    };
}


================================================
FILE: linalg/src/frame/mmm/mod.rs
================================================
#[macro_use]
mod macros;

pub mod cost_model;
#[macro_use]
pub(crate) mod fuse;
pub(crate) mod input_store;
pub(crate) mod kernel;
#[macro_use]
pub(crate) mod panel_extract;
mod scratch;
mod storage;

#[cfg(test)]
#[macro_use]
pub mod tests;

use crate::multithread::Executor;
#[cfg(feature = "multithread-mm")]
use rayon::prelude::*;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::fmt::Debug;
use tract_data::internal::*;

pub use cost_model::*;
pub use fuse::*;
pub use input_store::*;
pub use kernel::*;
pub use panel_extract::*;
pub use scratch::*;
pub use storage::*;

pub fn no_prefetch(_ptr: *const u8, _len: usize) {}

#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum ImplementationQuality {
    /// Individual operations are emulated by individual conversion (f16->f32->f16)
    Dreadful,
    /// Rust scalar operation (with whatever optimisation the compiler manages)
    Generic,
    /// Implicit vectorization (e.g. Rust code, some unrolled loops, explicit template instantiations for small constant)
    RustOptimized,
    /// Explicit vectorization (e.g. intrinsics vector code)
    TargetOptimized,
    /// Hand optimized (assembly)
    ManuallyOptimized,
}

impl ImplementationQuality {
    pub fn best_to_worst() -> &'static [ImplementationQuality] {
        use ImplementationQuality::*;
        &[ManuallyOptimized, TargetOptimized, RustOptimized, Generic, Dreadful]
    }

    pub fn cost(&self) -> usize {
        ImplementationQuality::best_to_worst().iter().position(|x| x == self).unwrap()
    }
}

impl PartialOrd for ImplementationQuality {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(usize::from(*self).cmp(&usize::from(*other)))
    }
}

impl From<ImplementationQuality> for usize {
    fn from(value: ImplementationQuality) -> Self {
        value.cost()
    }
}

pub trait MatMatMul: Debug + dyn_clone::DynClone + Send + Sync + std::any::Any {
    fn name(&self) -> &str;
    fn mr(&self) -> usize;
    fn nr(&self) -> usize;

    fn quality(&self) -> ImplementationQuality;
    fn dynamic_boost(&self) -> isize;

    #[allow(clippy::type_complexity)]
    fn packings(&self) -> &[(Box<dyn MMMInputFormat>, Box<dyn MMMInputFormat>)];

    fn internal_type(&self) -> DatumType;

    unsafe fn c_view(&self, m_axis: Option<usize>, n_axis: Option<usize>) -> OutputStoreSpec;
    unsafe fn c_from_data_and_strides(
        &self,
        item_size: usize,
        row_stride: isize,
        col_stride: isize,
    ) -> OutputStoreSpec;

    fn can_fuse(&self, spec: &FusedSpec) -> bool;

    fn stores(&self) -> Cow<'_, [DatumType]>;

    unsafe fn run(&self, m: usize, n: usize, non_linear: &[FusedSpec]) -> TractResult<()> {
        unsafe {
            let mut scratch = self.allocate_scratch_space();
            self.run_with_scratch_space(m, n, &mut *scratch, non_linear)
        }
    }

    unsafe fn allocate_scratch_space(&self) -> Box<dyn ScratchSpace>;
    unsafe fn can_use_scratch_space(&self, scratch: &dyn ScratchSpace) -> bool;
    unsafe fn run_with_scratch_space(
        &self,
        m: usize,
        n: usize,
        scratch: &mut dyn ScratchSpace,
        non_linear: &[FusedSpec],
    ) -> TractResult<()>;
}

dyn_clone::clone_trait_object!(MatMatMul);

impl PartialEq for Box<dyn MatMatMul> {
    fn eq(&self, other: &Box<dyn MatMatMul>) -> bool {
        self.name() == other.name()
    }
}
impl Eq for Box<dyn MatMatMul> {}

impl std::hash::Hash for Box<dyn MatMatMul> {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.name().hash(state)
    }
}

impl<K: MatMatMulKer> MatMatMul for K {
    fn name(&self) -> &str {
        self.name()
    }
    fn mr(&self) -> usize {
        self.mr()
    }
    fn nr(&self) -> usize {
        self.nr()
    }

    fn quality(&self) -> ImplementationQuality {
        MatMatMulKer::quality(self)
    }

    fn dynamic_boost(&self) -> isize {
        MatMatMulKer::dynamic_boost(self)
    }

    fn packings(&self) -> &[(Box<dyn MMMInputFormat>, Box<dyn MMMInputFormat>)] {
        self.packings()
    }

    fn internal_type(&self) -> DatumType {
        K::Acc::datum_type()
    }

    fn can_fuse(&self, spec: &FusedSpec) -> bool {
        self.can_fuse(spec)
    }

    unsafe fn c_view(&self, m_axis: Option<usize>, n_axis: Option<usize>) -> OutputStoreSpec {
        OutputStoreSpec::View { m_axis, n_axis, mr: self.mr(), nr: self.nr() }
    }

    unsafe fn c_from_data_and_strides(
        &self,
        item_size: usize,
        row_stride: isize,
        col_stride: isize,
    ) -> OutputStoreSpec {
        OutputStoreSpec::Strides {
            row_byte_stride: row_stride * item_size as isize,
            col_byte_stride: col_stride * item_size as isize,
            mr: self.mr(),
            nr: self.nr(),
        }
    }

    fn stores(&self) -> Cow<'_, [DatumType]> {
        self.stores()
    }

    unsafe fn allocate_scratch_space(&self) -> Box<dyn ScratchSpace> {
        Box::<ScratchSpaceImpl<K::Acc>>::default()
    }

    unsafe fn can_use_scratch_space(&self, scratch: &dyn ScratchSpace) -> bool {
        scratch.downcast_ref::<ScratchSpaceImpl<K::Acc>>().is_some()
    }

    unsafe fn run_with_scratch_space(
        &self,
        m: usize,
        n: usize,
        scratch: &mut dyn ScratchSpace,
        non_linear: &[FusedSpec],
    ) -> TractResult<()> {
        unsafe {
            let scratch = scratch
                .downcast_mut::<ScratchSpaceImpl<K::Acc>>()
                .context("Wrong scratch space type")?;
            scratch.prepare(self, m, n, non_linear)?;
            if n == 1 && self.nr() == 1 {
                run_with_scratch_space_vec(self, m, scratch, non_linear)
            } else {
                let (mut prefer_col, mut prefer_row) = (0, 0);
                for uop in non_linear.iter() {
                    if let Some(col) = uop.prefer_col_outer() {
                        prefer_col = col as usize;
                        prefer_row = (!col) as usize;
                    }
                }
                if prefer_col > prefer_row {
                    run_with_scratch_space_col_outer(self, m, n, scratch, non_linear)
                } else {
                    run_with_scratch_space_row_outer(self, m, n, scratch, non_linear)
                }
            }
        }
    }
}

unsafe fn run_with_scratch_space_vec<K: MatMatMulKer>(
    ker: &K,
    m: usize,
    scratch: &mut ScratchSpaceImpl<K::Acc>,
    non_linear: &[FusedSpec],
) -> TractResult<()> {
    unsafe {
        match crate::multithread::current_tract_executor() {
            Executor::SingleThread => {
                for ia in 0..m.divceil(ker.mr()) {
                    scratch.run(ker, non_linear, ia, 0)?;
                }
                Ok(())
            }
            #[cfg(feature = "multithread-mm")]
            Executor::MultiThread(pool) => pool.install(|| {
                (0..m.div_ceil(ker.mr()))
                    .into_par_iter()
                    .try_for_each(|ia| scratch.run(ker, non_linear, ia, 0))
            }),
        }
    }
}

unsafe fn run_with_scratch_space_col_outer<K: MatMatMulKer>(
    ker: &K,
    m: usize,
    n: usize,
    scratch: &mut ScratchSpaceImpl<K::Acc>,
    non_linear: &[FusedSpec],
) -> TractResult<()> {
    unsafe {
        match crate::multithread::current_tract_executor() {
            Executor::SingleThread => {
                for ib in 0..n.divceil(ker.nr()) {
                    for ia in 0..m.divceil(ker.mr()) {
                        scratch.run(ker, non_linear, ia, ib)?;
                    }
                }
                Ok(())
            }
            #[cfg(feature = "multithread-mm")]
            Executor::MultiThread(pool) => pool.install(|| {
                (0..n.div_ceil(ker.nr())).into_par_iter().try_for_each(|ib| {
                    for ia in 0..m.divceil(ker.mr()) {
                        scratch.run(ker, non_linear, ia, ib)?;
                    }
                    Ok(())
                })
            }),
        }
    }
}

unsafe fn run_with_scratch_space_row_outer<K: MatMatMulKer>(
    ker: &K,
    m: usize,
    n: usize,
    scratch: &mut ScratchSpaceImpl<K::Acc>,
    non_linear: &[FusedSpec],
) -> TractResult<()> {
    unsafe {
        match crate::multithread::current_tract_executor() {
            Executor::SingleThread => {
                for ia in 0..m.divceil(ker.mr()) {
                    for ib in 0..n.divceil(ker.nr()) {
                        scratch.run(ker, non_linear, ia, ib)?;
                    }
                }
                Ok(())
            }
            #[cfg(feature = "multithread-mm")]
            Executor::MultiThread(pool) => pool.install(|| {
                pool.install(|| {
                    (0..m.div_ceil(ker.mr())).into_par_iter().try_for_each(|ia| {
                        for ib in 0..n.divceil(ker.nr()) {
                            scratch.run(ker, non_linear, ia, ib)?;
                        }
                        Ok(())
                    })
                })
            }),
        }
    }
}


================================================
FILE: linalg/src/frame/mmm/panel_extract.rs
================================================
use std::fmt::{Debug, Display};
use tract_data::internal::*;

use super::{EagerPackedInput, MMMInputFormat, MMMInputValue};
use crate::pack::PackedFormat;

type Kernel = unsafe fn(input: *const u8, output: *mut u8, k: usize);

#[allow(clippy::derived_hash_with_manual_eq)]
#[derive(Hash, Clone)]
pub struct PanelExtractor {
    pub name: String,
    pub from: Box<dyn MMMInputFormat>,
    pub to: PackedFormat,
    pub kernel: Kernel,
    pub supported_predicate: fn() -> bool,
}

impl Debug for PanelExtractor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} ({:?} -> {:?})", self.name, self.from, self.to)
    }
}

impl Display for PanelExtractor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.name)
    }
}

impl PartialEq for PanelExtractor {
    fn eq(&self, other: &Self) -> bool {
        self.name == other.name && *self.from == *other.from && self.to == other.to
    }
}
impl Eq for PanelExtractor {}

impl PanelExtractor {
    #[allow(unused_variables)]
    pub fn is_supported_here(&self) -> bool {
        (self.supported_predicate)()
    }
}

#[derive(Clone, Hash, PartialEq, Eq)]
pub struct PanelExtractInput {
    pub format: PanelExtractor,
    pub data: EagerPackedInput,
}

impl MMMInputValue for PanelExtractInput {
    fn scratch_panel_buffer_layout(&self) -> Option<std::alloc::Layout> {
        Some(self.format.to.single_panel_layout(self.data.k(), self.format.to.dt.size_of()))
    }
    fn panel_bytes(&self, i: usize, buffer: Option<*mut u8>) -> TractResult<*const u8> {
        let scratch = buffer.unwrap();
        unsafe {
            let source = self.data.packed.as_ptr().add(self.data.panel_bytes * i);
            (self.format.kernel)(source, scratch, self.data.k());
        }
        Ok(scratch)
    }
    fn mn(&self) -> usize {
        self.data.mn()
    }
    fn k(&self) -> usize {
        self.data.k()
    }
    fn format(&self) -> &dyn MMMInputFormat {
        &self.format.to
    }
    fn exotic_fact(&self) -> &dyn ExoticFact {
        self.data.exotic_fact()
    }
    fn extract_at_mn_f16(&self, mn: usize, slice: &mut [f16]) -> TractResult<()> {
        self.data.extract_at_mn_f16(mn, slice)
    }
    fn extract_at_mn_f32(&self, mn: usize, slice: &mut [f32]) -> TractResult<()> {
        self.data.extract_at_mn_f32(mn, slice)
    }
}

impl Display for PanelExtractInput {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "PanelExtract({})", self.data)
    }
}

impl Debug for PanelExtractInput {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "PanelExtract({})", self.data)
    }
}

#[macro_export]
macro_rules! panel_extractor {
    ( $func:path as $id:ident($from:expr, $to: expr)
            $(where($where:expr))?
     ) => {
        paste! {
            lazy_static::lazy_static! {
                pub static ref $id: $crate::mmm::PanelExtractor = {
                    use $crate::mmm::MMMInputFormat;
                    let (from, to) = ($from, $to);
                    assert!(from.r() == to.r());
                    #[allow(unused_mut)]
                    let mut it = $crate::mmm::PanelExtractor {
                        name: stringify!($id).to_string(),
                        from,
                        to,
                        kernel: $func,
                        supported_predicate: || true
                    };
                    $(
                        it.supported_predicate = $where;
                    )?
                    it
                };
            }

            #[cfg(test)]
            mod [<test_$id>] {
                use super::$id;
                #[test]
                fn repack_0block_1panel() {
                    $crate::frame::mmm::panel_extract::test::test_packing(&$id, 0, 1).unwrap();
                }

                #[test]
                fn repack_1block_0panel() {
                    $crate::frame::mmm::panel_extract::test::test_packing(&$id, 1, 0).unwrap();
                }

                #[test]
                fn repack_1block_1panel() {
                    $crate::frame::mmm::panel_extract::test::test_packing(&$id, 1, 1).unwrap();
                }

                #[test]
                fn repack_2block_1panel() {
                    $crate::frame::mmm::panel_extract::test::test_packing(&$id, 2, 1).unwrap();
                }

                #[test]
                fn repack_1block_2panel() {
                    $crate::frame::mmm::panel_extract::test::test_packing(&$id, 1, 2).unwrap();
                }

                #[test]
                fn repack_2block_2panel() {
                    $crate::frame::mmm::panel_extract::test::test_packing(&$id, 2, 2).unwrap();
                }
            }
        }
    };
}

#[cfg(test)]
pub mod test {
    use crate::frame::block_quant::PackedBlockQuantFormat;
    use crate::mmm::PackedMatrixStorage;
    use tract_data::internal::*;
    use tract_ndarray::Array2;

    use super::*;

    pub fn test_packing(
        extractor: &PanelExtractor,
        blocks: usize,
        panels: usize,
    ) -> TractResult<()> {
        if !extractor.is_supported_here() {
            return Ok(());
        }
        assert!(extractor.from.r() == extractor.to.r());
        assert!(extractor.to.dt == f32::datum_type() || extractor.to.dt == f16::datum_type());
        if let Some(from) = extractor.from.downcast_ref::<PackedBlockQuantFormat>() {
            test_packing_bq(extractor, from, blocks, panels)
        } else if let Some(from) = extractor.from.downcast_ref() {
            test_packing_plain(extractor, from, blocks, panels)
        } else {
            todo!()
        }
    }

    pub fn test_packing_plain(
        extractor: &PanelExtractor,
        from: &PackedFormat,
        blocks: usize,
        panels: usize,
    ) -> TractResult<()> {
        let m = from.r * panels;
        let k = 8 * blocks; // 8 is arbitrary
        let to = &extractor.to;
        let weights_orig =
            Array2::from_shape_fn((m, k), |(m, k)| ((m * 31 + k * 17) % 20) as f32 - 10.)
                .into_tensor()
                .cast_to_dt(from.dt)?
                .into_owned();
        let packed_orig = from.prepare_tensor(&weights_orig, 1, 0)?;
        let packed_orig_storage = packed_orig.try_storage_as::<PackedMatrixStorage>()?;
        let packed_orig = packed_orig_storage.value().downcast_ref::<EagerPackedInput>().unwrap();

        for panel in 0..panels {
            let orig_panel = &packed_orig.packed[packed_orig.panel_bytes * panel..]
                [..k * from.r * from.dt.size_of()];
            let mut reference_panel = Tensor::zero_dt(from.dt, &[k, from.r])?;
            reference_panel.as_bytes_mut().copy_from_slice(orig_panel);
            reference_panel = reference_panel.cast_to_dt(to.dt)?.into_owned();

            let mut tested_panel = Tensor::zero_dt(to.dt, &[k, from.r])?;
            unsafe {
                (extractor.kernel)(
                    orig_panel.as_ptr(),
                    tested_panel.as_bytes_mut().as_mut_ptr(),
                    k,
                );
            }
            compare_panels(&tested_panel, &reference_panel, from.r, k);
        }
        Ok(())
    }

    pub fn test_packing_bq(
        extractor: &PanelExtractor,
        from: &PackedBlockQuantFormat,
        blocks: usize,
        panels: usize,
    ) -> TractResult<()> {
        let m = from.r * panels;
        let k = from.bq.block_len() * blocks;
        let to = &extractor.to;
        let weights_orig =
            Array2::from_shape_fn((m, k), |(m, k)| ((m * 31 + k * 17) % 20) as f32 - 10.)
                .into_tensor()
                .cast_to_dt(to.dt)?
                .into_owned();
        let weights = if to.dt == f32::datum_type() {
            from.bq
                .dequant_f32(&from.bq.quant_f32(weights_orig.try_as_plain()?.as_slice::<f32>()?)?)?
                .into_shape(&[m, k])?
        } else {
            from.bq
                .dequant_f16(&from.bq.quant_f16(weights_orig.try_as_plain()?.as_slice::<f16>()?)?)?
                .into_shape(&[m, k])?
        };
        let block_quant = if to.dt == f32::datum_type() {
            from.bq.quant_f32(weights.try_as_plain()?.as_slice::<f32>()?)?
        } else {
            from.bq.quant_f16(weights.try_as_plain()?.as_slice::<f16>()?)?
        };
        let packed_block_quant =
            from.bq.pack(&block_quant, k, from.r, from.zip, from.scales_at_end)?;

        let mut reference_panel = Tensor::zero_dt(to.dt, &[k, from.r])?;
        let mut tested_panel = Tensor::zero_dt(to.dt, &[k, from.r])?;

        for panel in 0..packed_block_quant.panels_count() {
            unsafe {
                from.bq.extract_packed_panel(
                    &packed_block_quant,
                    to,
                    panel,
                    reference_panel.as_bytes_mut().as_mut_ptr(),
                )?;

                let source =
                    packed_block_quant.packed.as_ptr().add(packed_block_quant.panel_bytes * panel);
                (extractor.kernel)(source, tested_panel.as_bytes_mut().as_mut_ptr(), k);
            }
            compare_panels(&tested_panel, &reference_panel, from.r, k);
        }
        Ok(())
    }

    fn compare_panels(tested_panel: &Tensor, reference_panel: &Tensor, r: usize, k: usize) {
        if tested_panel != reference_panel {
            if reference_panel.datum_type() == f32::datum_type() {
                crate::frame::mmm::tests::display_error(
                    tested_panel.try_as_plain().unwrap().as_slice::<f32>().unwrap(),
                    reference_panel.try_as_plain().unwrap().as_slice::<f32>().unwrap(),
                    r,
                    k,
                );
            } else {
                crate::frame::mmm::tests::display_error(
                    tested_panel.try_as_plain().unwrap().as_slice::<f16>().unwrap(),
                    reference_panel.try_as_plain().unwrap().as_slice::<f16>().unwrap(),
                    r,
                    k,
                );
            }
        }
        assert_eq!(tested_panel, reference_panel);
    }
}


================================================
FILE: linalg/src/frame/mmm/scratch.rs
================================================
use super::{FusedKerSpec, FusedSpec, MatMatMulKer, OutputStoreKer};
use crate::{BinOp, LADatum};
use downcast_rs::{Downcast, impl_downcast};
use std::cell::RefCell;
use std::fmt::Debug;
use std::sync::atomic::AtomicUsize;
use tract_data::internal::num_integer::Integer;
use tract_data::internal::*;

static GENERATION: AtomicUsize = AtomicUsize::new(1);

thread_local! {
    static TLS: RefCell<TLSScratch> = Default::default();
}

#[derive(Default, Debug)]
struct TLSScratch {
    generation: usize,
    blob: Blob,
    ker_specs_16: Vec<FusedKerSpec<f16>>,
    ker_specs_32: Vec<FusedKerSpec<f32>>,
    ker_specs_64: Vec<FusedKerSpec<f64>>,
}

impl TLSScratch {
    #[allow(unknown_lints, clippy::missing_transmute_annotations)]
    fn ker_specs<TI: LADatum>(&mut self) -> &mut Vec<FusedKerSpec<TI>> {
        unsafe {
            if TI::datum_type() == f32::datum_type() || TI::datum_type() == i32::datum_type() {
                std::mem::transmute(&mut self.ker_specs_32)
            } else if TI::datum_type() == f16::datum_type() {
                std::mem::transmute(&mut self.ker_specs_16)
            } else if TI::datum_type() == f64::datum_type() {
                std::mem::transmute(&mut self.ker_specs_64)
            } else {
                todo!();
            }
        }
    }

    fn sync<TI: LADatum>(&mut self, scratch: &ScratchSpaceImpl<TI>) {
        if self.generation == scratch.generation {
            return;
        }
        let ker_specs = self.ker_specs::<TI>();
        ker_specs.clear();
        ker_specs.extend_from_slice(&scratch.ker_specs);

        unsafe {
            self.blob.ensure_size_and_align(scratch.blob_size, scratch.blob_align);

            for LocDependant { loc, ker_spec, .. } in &scratch.loc_dependant {
                #[allow(clippy::single_match)]
                if matches!(scratch.ker_specs[*ker_spec], FusedKerSpec::AddMatMul { .. }) {
                    let scratch = &mut *(self.blob.as_ptr().add(*loc) as *mut AddMatMulTemp);
                    scratch.panel_a_id = usize::MAX;
                    scratch.panel_b_id = usize::MAX;
                };
            }
        }
        self.generation = scratch.generation;
    }
}

pub trait ScratchSpace: Downcast + Send {}
impl_downcast!(ScratchSpace);

#[derive(Debug, Default)]
pub struct ScratchSpaceImpl<TI: LADatum> {
    generation: usize,
    blob_size: usize,
    blob_align: usize,
    ker_specs: Vec<FusedKerSpec<TI>>,
    loc_dependant: TVec<LocDependant>,
    valid_down_tiles: usize,
    remnant_down: usize,
    valid_right_tiles: usize,
    remnant_right: usize,
}

#[derive(Debug, new)]
struct LocDependant {
    spec: usize,
    ker_spec: usize,
    // offset for the location dependant structure
    loc: usize,
    // offset of its associated dynamic-size buffers
    buffer_a: Option<usize>,
    buffer_b: Option<usize>,
}

impl<TI: LADatum> ScratchSpace for ScratchSpaceImpl<TI> {}
unsafe impl<TI: LADatum> Send for ScratchSpaceImpl<TI> {}

#[derive(Debug)]
struct AddMatMulTemp {
    ptr_a: *const u8,
    panel_a_id: usize,
    ptr_b: *const u8,
    panel_b_id: usize,
}

impl<TI: LADatum> ScratchSpaceImpl<TI> {
    pub unsafe fn prepare(
        &mut self,
        ker: &impl MatMatMulKer<Acc = TI>,
        m: usize,
        n: usize,
        specs: &[FusedSpec],
    ) -> TractResult<()> {
        use FusedKerSpec as FKS;
        use FusedSpec as FS;
        self.ker_specs.clear();
        self.loc_dependant.clear();
        self.ker_specs.reserve(specs.len() + 2);
        self.ker_specs.push(FusedKerSpec::Clear);
        self.valid_down_tiles = m / ker.mr();
        self.remnant_down = m % ker.mr();
        self.valid_right_tiles = n / ker.nr();
        self.remnant_right = n % ker.nr();
        let mut offset = 0;
        let mut align = std::mem::size_of::<*const ()>();
        fn ld(spec: usize, uspec: usize, loc: usize) -> LocDependant {
            LocDependant { spec, ker_spec: uspec, loc, buffer_a: None, buffer_b: None }
        }
        for (ix, spec) in specs.iter().enumerate() {
            offset = offset.next_multiple_of(&align);
            let ker_spec = match spec {
                FS::BinScalar(t, op) => match op {
                    BinOp::Min => FKS::ScalarMin(*t.try_as_plain()?.to_scalar()?),
                    BinOp::Max => FKS::ScalarMax(*t.try_as_plain()?.to_scalar()?),
                    BinOp::Mul => FKS::ScalarMul(*t.try_as_plain()?.to_scalar()?),
                    BinOp::Add => FKS::ScalarAdd(*t.try_as_plain()?.to_scalar()?),
                    BinOp::Sub => FKS::ScalarSub(*t.try_as_plain()?.to_scalar()?),
                    BinOp::SubF => FKS::ScalarSubF(*t.try_as_plain()?.to_scalar()?),
                },
                FS::ShiftLeft(s) => FKS::ShiftLeft(*s),
                FS::RoundingShiftRight(s, rp) => FKS::RoundingShiftRight(*s, *rp),
                FS::QScale(s, rp, m) => FKS::QScale(*s, *rp, *m),
                FS::BinPerRow(_, _) => {
                    self.loc_dependant.push(ld(ix, self.ker_specs.len(), offset));
                    offset += TI::datum_type().size_of() * ker.mr();
                    FusedKerSpec::Done
                }
                FS::BinPerCol(_, _) => {
                    self.loc_dependant.push(ld(ix, self.ker_specs.len(), offset));
                    offset += TI::datum_type().size_of() * ker.nr();
                    FusedKerSpec::Done
                }
                FS::AddRowColProducts(_, _) => {
                    self.loc_dependant.push(ld(ix, self.ker_specs.len(), offset));
                    offset += TI::datum_type().size_of() * (ker.mr() + ker.nr());
                    FusedKerSpec::Done
                }
                FS::AddUnicast(_) => {
                    self.loc_dependant.push(ld(ix, self.ker_specs.len(), offset));
                    offset += TI::datum_type().size_of() * ker.mr() * ker.nr();
                    FusedKerSpec::Done
                }
                FS::Store(store) => {
                    self.loc_dependant.push(ld(ix, self.ker_specs.len(), offset));
                    offset += store.item_size * ker.mr() * ker.nr();
                    FusedKerSpec::Done
                }
                FS::LeakyRelu(t) => FKS::LeakyRelu(*t.try_as_plain()?.to_scalar()?),
                FS::AddMatMul { a, b, packing } => {
                    let mut ld = ld(ix, self.ker_specs.len(), offset);
                    offset += std::mem::size_of::<AddMatMulTemp>();
                    if let Some(tmp) = a.scratch_panel_buffer_layout() {
                        align = tmp.align().lcm(&align);
                        offset = Integer::next_multiple_of(&offset, &tmp.align());
                        ld.buffer_a = Some(offset);
                        offset += tmp.size();
                    }
                    if let Some(tmp) = b.scratch_panel_buffer_layout() {
                        align = tmp.align().lcm(&align);
                        offset = Integer::next_multiple_of(&offset, &tmp.align());
                        ld.buffer_b = Some(offset);
                        offset += tmp.size();
                    }
                    self.loc_dependant.push(ld);
                    FusedKerSpec::AddMatMul {
                        k: 0,
                        pa: std::ptr::null(),
                        pb: std::ptr::null(),
                        packing: *packing,
                    }
                }
            };
            self.ker_specs.push(ker_spec);
        }
        self.ker_specs.push(FKS::Done);
        self.blob_size = offset;
        self.blob_align = align;

        self.generation = GENERATION.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
        Ok(())
    }

    pub unsafe fn run(
        &self,
        ker: &impl MatMatMulKer<Acc = TI>,
        specs: &[FusedSpec],
        down: usize,
        right: usize,
    ) -> TractResult<()> {
        unsafe {
            TLS.with_borrow_mut(|tls| {
                tls.sync(self);
                if down < self.valid_down_tiles && right < self.valid_right_tiles {
                    self.for_valid_tile(ker, specs, tls, down, right)?;
                    let err = ker.kernel(tls.ker_specs());
                    debug_assert_eq!(err, 0, "Kernel return error {err}");
                } else {
                    let remnant_down =
                        if down < self.valid_down_tiles { ker.mr() } else { self.remnant_down };
                    let remnant_right =
                        if right < self.valid_right_tiles { ker.nr() } else { self.remnant_right };
                    self.for_border_tile(
                        ker,
                        specs,
                        tls,
                        down,
                        right,
                        remnant_down,
                        remnant_right,
                    )?;
                    let err = ker.kernel(tls.ker_specs());
                    debug_assert_eq!(err, 0, "Kernel return error {err}");
                    self.postprocess_tile(specs, tls, down, right, remnant_down, remnant_right)?;
                }
                Ok(())
            })
        }
    }

    #[inline(always)]
    unsafe fn for_valid_tile(
        &self,
        ker: &impl MatMatMulKer<Acc = TI>,
        specs: &[FusedSpec],
        tls: &mut TLSScratch,
        down: usize,
        right: usize,
    ) -> TractResult<()> {
        unsafe {
            use FusedKerSpec as FKS;
            use FusedSpec as FS;
            let ScratchSpaceImpl { ker_specs, loc_dependant, .. } = self;
            debug_assert!(specs.len() + 2 == ker_specs.len());
            for LocDependant { spec, ker_spec, loc, buffer_a, buffer_b } in loc_dependant {
                let spec = specs.get_unchecked(*spec);
                let it = match spec {
                    FS::BinPerRow(v, op) => {
                        let v = v.as_ptr_unchecked::<TI>().add(down * ker.mr());
                        match op {
                            BinOp::Min => FKS::PerRowMin(v),
                            BinOp::Max => FKS::PerRowMax(v),
                            BinOp::Add => FKS::PerRowAdd(v),
                            BinOp::Mul => FKS::PerRowMul(v),
                            BinOp::Sub => FKS::PerRowSub(v),
                            BinOp::SubF => FKS::PerRowSubF(v),
                        }
                    }
                    FS::BinPerCol(v, op) => {
                        let v = v.as_ptr_unchecked::<TI>().add(right * ker.nr());
                        match op {
                            BinOp::Min => FKS::PerColMin(v),
                            BinOp::Max => FKS::PerColMax(v),
                            BinOp::Add => FKS::PerColAdd(v),
                            BinOp::Mul => FKS::PerColMul(v),
                            BinOp::Sub => FKS::PerColSub(v),
                            BinOp::SubF => FKS::PerColSubF(v),
                        }
                    }
                    FS::AddRowColProducts(rows, cols) => {
                        let row_ptr = rows.as_ptr_unchecked::<TI>().add(down * ker.mr());
                        let col_ptr = cols.as_ptr_unchecked::<TI>().add(right * ker.nr());
                        FKS::AddRowColProducts(row_ptr, col_ptr)
                    }
                    FS::AddUnicast(store) => FKS::AddUnicast(store.tile_c(down, right)),
                    FS::Store(c_store) => FKS::Store(c_store.tile_c(down, right)),
                    FS::AddMatMul { a, b, packing } => {
                        let scratch = (tls.blob.as_mut_ptr().add(*loc) as *mut AddMatMulTemp)
                            .as_mut()
                            .unwrap();
                        if scratch.panel_a_id != down {
                            scratch.ptr_a = a.panel_bytes(
                                down,
                                buffer_a.map(|o| tls.blob.as_mut_ptr().add(o)),
                            )?;
                            scratch.panel_a_id = down;
                        }
                        if scratch.panel_b_id != right {
                            scratch.ptr_b = b.panel_bytes(
                                right,
                                buffer_b.map(|o| tls.blob.as_mut_ptr().add(o)),
                            )?;
                            scratch.panel_b_id = right;
                        }
                        FKS::AddMatMul {
                            k: b.k(),
                            pa: scratch.ptr_a,
                            pb: scratch.ptr_b,
                            packing: *packing,
                        }
                    }
                    _ => std::hint::unreachable_unchecked(),
                };
                *tls.ker_specs().get_unchecked_mut(*ker_spec) = it;
            }
            Ok(())
        }
    }

    #[inline(never)]
    #[allow(clippy::too_many_arguments)]
    unsafe fn for_border_tile(
        &self,
        ker: &impl MatMatMulKer<Acc = TI>,
        specs: &[FusedSpec],
        tls: &mut TLSScratch,
        down: usize,
        right: usize,
        m_remnant: usize,
        n_remnant: usize,
    ) -> TractResult<()> {
        unsafe {
            use FusedKerSpec as FKS;
            use FusedSpec as FS;
            for LocDependant { spec, ker_spec: uspec, loc, buffer_a, buffer_b } in
                &self.loc_dependant
            {
                let loc = tls.blob.as_mut_ptr().add(*loc);
                let spec = specs.get_unchecked(*spec);
                let it = match spec {
                    FS::BinPerRow(v, op) => {
                        let buf = std::slice::from_raw_parts_mut(loc as *mut TI, ker.mr());
                        let ptr = if m_remnant < ker.mr() {
                            if m_remnant > 0 {
                                buf.get_unchecked_mut(..m_remnant).copy_from_slice(
                                    v.as_slice_unchecked()
                                        .get_unchecked(down * ker.mr()..)
                                        .get_unchecked(..m_remnant),
                                );
                            }
                            if cfg!(debug_assertions) {
                                buf.get_unchecked_mut(m_remnant..)
                                    .iter_mut()
                                    .for_each(|x| *x = TI::zero());
                            }
                            buf.as_ptr()
                        } else {
                            v.as_ptr_unchecked::<TI>().add(down * ker.mr())
                        };
                        match op {
                            BinOp::Min => FKS::PerRowMin(ptr),
                            BinOp::Max => FKS::PerRowMax(ptr),
                            BinOp::Add => FKS::PerRowAdd(ptr),
                            BinOp::Mul => FKS::PerRowMul(ptr),
                            BinOp::Sub => FKS::PerRowSub(ptr),
                            BinOp::SubF => FKS::PerRowSubF(ptr),
                        }
                    }
                    FS::BinPerCol(v, op) => {
                        let buf = std::slice::from_raw_parts_mut(loc as *mut TI, ker.nr());
                        let ptr = if n_remnant < ker.nr() {
                            if n_remnant > 0 {
                                buf.get_unchecked_mut(..n_remnant).copy_from_slice(
                                    v.as_slice_unchecked()
                                        .get_unchecked(right * ker.nr()..)
                                        .get_unchecked(..n_remnant),
                                );
                            }
                            if cfg!(debug_assertions) {
                                buf.get_unchecked_mut(n_remnant..)
                                    .iter_mut()
                                    .for_each(|x| *x = TI::zero());
                            }
                            buf.as_ptr()
                        } else {
                            v.as_ptr_unchecked::<TI>().add(right * ker.nr())
                        };
                        match op {
                            BinOp::Min => FKS::PerColMin(ptr),
                            BinOp::Max => FKS::PerColMax(ptr),
                            BinOp::Add => FKS::PerColAdd(ptr),
                            BinOp::Mul => FKS::PerColMul(ptr),
                            BinOp::Sub => FKS::PerColSub(ptr),
                            BinOp::SubF => FKS::PerColSubF(ptr),
                        }
                    }
                    FS::AddRowColProducts(rows, cols) => {
                        let r = std::slice::from_raw_parts_mut(loc as *mut TI, ker.mr());
                        let row_ptr = if m_remnant < ker.mr() {
                            r.get_unchecked_mut(..m_remnant).copy_from_slice(
                                rows.as_slice_unchecked()
                                    .get_unchecked(down * ker.mr()..)
                                    .get_unchecked(..m_remnant),
                            );
                            if cfg!(debug_assertions) {
                                r.get_unchecked_mut(m_remnant..)
                                    .iter_mut()
                                    .for_each(|x| *x = TI::zero());
                            }
                            r.as_ptr()
                        } else {
                            rows.as_ptr_unchecked::<TI>().add(down * ker.mr())
                        };
                        let c = std::slice::from_raw_parts_mut(
                            (loc as *mut TI).add(ker.mr()),
                            ker.nr(),
                        );
                        let col_ptr = if n_remnant < ker.nr() {
                            c.get_unchecked_mut(..n_remnant).copy_from_slice(
                                cols.as_slice_unchecked()
                                    .get_unchecked(right * ker.nr()..)
                                    .get_unchecked(..n_remnant),
                            );
                            if cfg!(debug_assertions) {
                                r.get_unchecked_mut(n_remnant..)
                                    .iter_mut()
                                    .for_each(|x| *x = TI::zero());
                            }
                            c.as_ptr()
                        } else {
                            cols.as_ptr_unchecked::<TI>().add(right * ker.nr())
                        };
                        FKS::AddRowColProducts(row_ptr, col_ptr)
                    }
                    FS::AddUnicast(store) => {
                        let row_byte_stride = store.row_byte_stride;
                        let col_byte_stride = store.col_byte_stride;
                        let tile_offset = row_byte_stride * down as isize * ker.mr() as isize
                            + col_byte_stride * right as isize * ker.nr() as isize;
                        let tile_ptr = store.ptr.offset(tile_offset);
                        let tmp_d_tile =
                            std::slice::from_raw_parts_mut(loc as *mut TI, ker.mr() * ker.nr());
                        if cfg!(debug_assertions) {
                            tmp_d_tile.iter_mut().for_each(|t| *t = TI::zero());
                        }
                        for r in 0..m_remnant as isize {
                            for c in 0..n_remnant as isize {
                                let inner_offset = c * col_byte_stride + r * row_byte_stride;
                                if inner_offset + tile_offset
                                    < (store.item_size * store.item_count) as isize
                                {
                                    *tmp_d_tile
                                        .get_unchecked_mut(r as usize + c as usize * ker.mr()) =
                                        *(tile_ptr.offset(inner_offset) as *const TI);
                                }
                            }
                        }
                        FKS::AddUnicast(OutputStoreKer {
                            ptr: tmp_d_tile.as_ptr() as _,
                            row_byte_stride: std::mem::size_of::<TI>() as isize,
                            col_byte_stride: (std::mem::size_of::<TI>() * ker.mr()) as isize,
                            item_size: std::mem::size_of::<TI>(),
                        })
                    }
                    FS::Store(c_store) => {
                        let tmpc = OutputStoreKer {
                            ptr: loc as _,
                            item_size: c_store.item_size,
                            row_byte_stride: c_store.item_size as isize,
                            col_byte_stride: (c_store.item_size * ker.mr()) as isize,
                        };
                        FKS::Store(tmpc)
                    }
                    FS::AddMatMul { a, b, packing } => {
                        let scratch = (loc as *mut AddMatMulTemp).as_mut().unwrap();
                        if scratch.panel_a_id != down {
                            scratch.ptr_a = a.panel_bytes(
                                down,
                                buffer_a.map(|o| tls.blob.as_mut_ptr().add(o)),
                            )?;
                            scratch.panel_a_id = down;
                        }
                        if scratch.panel_b_id != right {
                            scratch.ptr_b = b.panel_bytes(
                                right,
                                buffer_b.map(|o| tls.blob.as_mut_ptr().add(o)),
                            )?;
                            scratch.panel_b_id = right;
                        }
                        FKS::AddMatMul {
                            k: b.k(),
                            pa: scratch.ptr_a,
                            pb: scratch.ptr_b,
                            packing: *packing,
                        }
                    }
                    _ => std::hint::unreachable_unchecked(),
                };
                *tls.ker_specs().get_unchecked_mut(*uspec) = it;
            }
            Ok(())
        }
    }

    #[inline]
    pub fn uspecs(&self) -> &[FusedKerSpec<TI>] {
        &self.ker_specs
    }

    unsafe fn postprocess_tile(
        &self,
        specs: &[FusedSpec],
        tls: &mut TLSScratch,
        down: usize,
        right: usize,
        m_remnant: usize,
        n_remnant: usize,
    ) -> TractResult<()>
    where
        TI: LADatum,
    {
        unsafe {
            for LocDependant { spec, ker_spec: uspec, .. } in self.loc_dependant.iter() {
                let spec = specs.get_unchecked(*spec);
                let ker_spec = tls.ker_specs::<TI>().get_unchecked(*uspec);
                if let (FusedSpec::Store(c_store), FusedKerSpec::Store(tmp)) = (spec, ker_spec) {
                    c_store.set_from_tile(down, right, m_remnant, n_remnant, tmp)
                }
            }
            Ok(())
        }
    }
}


================================================
FILE: linalg/src/frame/mmm/storage.rs
================================================
use std::fmt;
use std::fmt::Debug;
use tract_data::internal::*;

use super::MMMInputValue;

/// Non-plain tensor storage for packed matrices.
///
/// Holds one or more `Box<dyn MMMInputValue>` values with an optional batch
/// shape, replacing the previous `Tensor` + double-downcast pattern.
#[derive(Clone, PartialEq, Eq)]
pub struct PackedMatrixStorage {
    values: Vec<Box<dyn MMMInputValue>>,
    batch_shape: TVec<usize>,
    batch_strides: TVec<isize>,
}

impl PackedMatrixStorage {
    /// Scalar storage (one value, empty shape).
    pub fn new(value: Box<dyn MMMInputValue>) -> Self {
        PackedMatrixStorage { values: vec![value], batch_shape: tvec![], batch_strides: tvec![] }
    }

    /// Batched storage (shape like `[batch, group]`).
    pub fn new_batched(shape: &[usize], values: Vec<Box<dyn MMMInputValue>>) -> Self {
        let expected: usize = shape.iter().product();
        assert_eq!(values.len(), expected, "values length must match shape product");
        let strides = Self::compute_strides(shape);
        PackedMatrixStorage { values, batch_shape: shape.into(), batch_strides: strides }
    }

    fn compute_strides(shape: &[usize]) -> TVec<isize> {
        let mut strides: TVec<isize> = tvec![0; shape.len()];
        if !shape.is_empty() {
            strides[shape.len() - 1] = 1;
            for i in (0..shape.len() - 1).rev() {
                strides[i] = strides[i + 1] * shape[i + 1] as isize;
            }
        }
        strides
    }

    /// Scalar access (asserts single value).
    #[inline]
    pub fn value(&self) -> &dyn MMMInputValue {
        debug_assert_eq!(self.values.len(), 1);
        &*self.values[0]
    }

    /// Batched access by coordinates.
    pub fn value_at(&self, coords: &[usize]) -> &dyn MMMInputValue {
        let idx = self.flat_index(coords);
        &*self.values[idx]
    }

    /// Batched access by flat (pre-computed) index.
    #[inline]
    pub fn value_at_flat(&self, idx: usize) -> &dyn MMMInputValue {
        &*self.values[idx]
    }

    pub fn values(&self) -> &[Box<dyn MMMInputValue>] {
        &self.values
    }

    pub fn batch_shape(&self) -> &[usize] {
        &self.batch_shape
    }

    pub fn batch_strides(&self) -> &[isize] {
        &self.batch_strides
    }

    /// Convert to a Tensor with the given logical datum type.
    pub fn into_tensor(self, dt: DatumType) -> Tensor {
        let shape: TVec<usize> = self.batch_shape.clone();
        Tensor::from_storage(dt, &shape, self)
    }

    fn flat_index(&self, coords: &[usize]) -> usize {
        coords.iter().zip(self.batch_strides.iter()).map(|(c, s)| *c as isize * s).sum::<isize>()
            as usize
    }
}

impl fmt::Debug for PackedMatrixStorage {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "PackedMatrixStorage({} values, shape={:?})", self.values.len(), self.batch_shape)
    }
}

impl fmt::Display for PackedMatrixStorage {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "PackedMatrixStorage({} values, shape={:?})", self.values.len(), self.batch_shape)
    }
}

impl TensorStorage for PackedMatrixStorage {
    fn byte_len(&self) -> usize {
        // Approximate: sum of individual value sizes isn't precise but gives a ballpark
        self.values.len() * std::mem::size_of::<Box<dyn MMMInputValue>>()
    }

    fn is_empty(&self) -> bool {
        self.values.is_empty()
    }

    fn deep_clone(&self) -> Box<dyn TensorStorage> {
        Box::new(self.clone())
    }

    fn as_plain(&self) -> Option<&PlainStorage> {
        None
    }

    fn as_plain_mut(&mut self) -> Option<&mut PlainStorage> {
        None
    }

    fn into_plain(self: Box<Self>) -> Option<PlainStorage> {
        None
    }

    fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
        for v in &self.values {
            v.dyn_hash(state);
        }
    }

    fn exotic_fact(&self, _shape: &[usize]) -> TractResult<Option<Box<dyn ExoticFact>>> {
        if self.values.len() == 1 {
            Ok(Some(dyn_clone::clone_box(self.values[0].exotic_fact())))
        } else {
            let facts: TVec<Box<dyn ExoticFact>> =
                self.values.iter().map(|v| dyn_clone::clone_box(v.exotic_fact())).collect();
            Ok(Some(Box::new(facts)))
        }
    }
}

#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub enum OutputStoreSpec {
    View { m_axis: Option<usize>, n_axis: Option<usize>, mr: usize, nr: usize },
    Strides { row_byte_stride: isize, col_byte_stride: isize, mr: usize, nr: usize },
}

#[derive(Clone, Copy, Debug)]
pub struct OutputStore {
    pub(crate) ptr: *mut u8,
    pub(crate) row_byte_stride: isize,
    pub(crate) col_byte_stride: isize,
    pub(crate) panel_row_byte_stride: isize,
    pub(crate) panel_col_byte_stride: isize,
    pub(crate) item_size: usize,
    pub(crate) item_count: usize,
    pub(crate) mr: usize,
}

unsafe impl Send for OutputStore {}
unsafe impl Sync for OutputStore {}

impl OutputStoreSpec {
    #[inline]
    pub unsafe fn wrap(&self, tensor: &TensorView) -> OutputStore {
        let (mr, nr, row_byte_stride, col_byte_stride) = unsafe { self.compute_strides(tensor) };
        OutputStore {
            ptr: unsafe { tensor.as_ptr_unchecked::<u8>() } as _,
            row_byte_stride,
            col_byte_stride,
            panel_row_byte_stride: row_byte_stride * mr as isize,
            panel_col_byte_stride: col_byte_stride * nr as isize,
            item_size: tensor.datum_type().size_of(),
            mr,
            item_count: tensor.len(),
        }
    }

    #[inline]
    unsafe fn compute_strides(&self, tensor: &TensorView) -> (usize, usize, isize, isize) {
        let size_of = tensor.datum_type().size_of() as isize;
        match self {
            OutputStoreSpec::View { m_axis, n_axis, mr, nr, .. } => {
                let tensor_strides = tensor.strides();
                let row_item_stride =
                    m_axis.map(|ax| *unsafe { tensor_strides.get_unchecked(ax) }).unwrap_or(0);
                let col_item_stride =
                    n_axis.map(|ax| *unsafe { tensor_strides.get_unchecked(ax) }).unwrap_or(0);
                let row_byte_stride = row_item_stride * size_of;
                let col_byte_stride = col_item_stride * size_of;
                (*mr, *nr, row_byte_stride, col_byte_stride)
            }
            OutputStoreSpec::Strides { row_byte_stride, col_byte_stride, mr, nr, .. } => {
                (*mr, *nr, *row_byte_stride, *col_byte_stride)
            }
        }
    }
}

impl OutputStore {
    #[inline]
    pub(super) unsafe fn tile_c(&self, down: usize, right: usize) -> OutputStoreKer {
        unsafe {
            let (down, right) = (down as isize, right as isize);
            OutputStoreKer {
                ptr: self
                    .ptr
                    .offset(self.panel_row_byte_stride * down + self.panel_col_byte_stride * right)
                    as *mut _,
                row_byte_stride: self.row_byte_stride,
                col_byte_stride: self.col_byte_stride,
                item_size: self.item_size,
            }
        }
    }

    #[inline]
    pub fn item_size(&self) -> usize {
        self.item_size
    }

    #[inline]
    pub(super) unsafe fn set_from_tile(
        &self,
        down: usize,
        right: usize,
        height: usize,
        width: usize,
        tile: &OutputStoreKer,
    ) {
        unsafe {
            if self.item_size() == 1 {
                self.set_from_tile_t::<i8>(down, right, height, width, tile)
            } else if self.item_size() == 2 {
                self.set_from_tile_t::<i16>(down, right, height, width, tile)
            } else if self.item_size() == 4 {
                self.set_from_tile_t::<i32>(down, right, height, width, tile)
            } else {
                self.set_from_tile_t::<i64>(down, right, height, width, tile)
            }
        }
    }

    #[inline]
    unsafe fn set_from_tile_t<T: Datum + Copy>(
        &self,
        down: usize,
        right: usize,
        height: usize,
        width: usize,
        tile: &OutputStoreKer,
    ) {
        unsafe {
            let tile = tile.ptr as *mut T;
            let dst = self.ptr.add(
                self.panel_row_byte_stride as usize * down
                    + self.panel_col_byte_stride as usize * right,
            );
            for y in 0..height as isize {
                for x in 0..width as isize {
                    let value = tile.offset(y + x * self.mr as isize);
                    let dst = dst.offset(y * self.row_byte_stride + x * self.col_byte_stride);
                    *(dst as *mut T) = *value;
                }
            }
        }
    }
}

#[repr(C)]
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub struct OutputStoreKer {
    pub ptr: *mut u8,
    pub row_byte_stride: isize,
    pub col_byte_stride: isize,
    pub item_size: usize,
}


================================================
FILE: linalg/src/frame/mmm/tests/frame.rs
================================================
use crate::frame::mmm::*;
use crate::{BinOp, LADatum};
use num_traits::AsPrimitive;
use std::ops::Neg;
use tests::display_error;
use tract_data::internal::*;

#[macro_export]
macro_rules! mmm_frame_tests {
    ($ker:expr, $ta:ty, $tb:ty, $tc:ty, $ti:ty) => {
        mod frame {
            use tract_data::internal::*;
            #[allow(unused_imports)]
            use $crate::frame::mmm::tests::frame::*;

            #[test]
            fn row_mul_2_1_3() -> TractResult<()> {
                unsafe { row_mul::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn row_add_2_1_3() -> TractResult<()> {
                unsafe { row_add::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn col_mul_2_1_3() -> TractResult<()> {
                unsafe { col_mul::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn col_add_2_1_3() -> TractResult<()> {
                unsafe { col_add::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn max_2_1_3() -> TractResult<()> {
                unsafe { max::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn min_2_1_3() -> TractResult<()> {
                unsafe { min::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn add_d_2_1_3() -> TractResult<()> {
                unsafe { add_d::<_, $ta, $tb, $tc, $ti>($ker, 2, 3)? }
                Ok(())
            }

            #[test]
            fn add_d_big() -> TractResult<()> {
                unsafe { add_d::<_, $ta, $tb, $tc, $ti>($ker, 197, 1)? }
                Ok(())
            }
        }
    };
}

pub unsafe fn fused_ops<
    K: MatMatMulKer<Acc = TI> + 'static,
    TA,
    TB,
    TC,
    TI,
    F: Fn(usize, usize) -> TC,
>(
    ker: &K,
    m: usize,
    n: usize,
    spec: &[FusedSpec],
    expect: F,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    if !ker.is_supported_here() {
        return Ok(());
    };
    crate::setup_test_logger();

    let mut found = Tensor::zero::<TC>(&[m, n])?;
    let c_store = unsafe {
        ker.c_from_data_and_strides(TC::datum_type().size_of(), n as isize, 1)
            .wrap(&found.view_mut())
    };
    let mut spec: TVec<FusedSpec> = spec.into();
    spec.push(FusedSpec::Store(c_store));

    unsafe { ker.run(m, n, &spec) }?;
    let expected =
        tract_ndarray::prelude::Array2::from_shape_fn((m, n), |(r, c)| expect(r, c)).into_tensor();
    let err = found.close_enough(&expected, true);
    if err.is_err() {
        display_error(
            found.try_as_plain()?.as_slice::<TC>()?,
            expected.try_as_plain()?.as_slice::<TC>()?,
            m,
            n,
        );
    }
    err
}

pub unsafe fn row_add<K: MatMatMulKer<Acc = TI> + 'static, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let bias = (0..m).map(|i| i.as_()).collect::<Vec<TI>>();
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[FusedSpec::BinPerRow(tensor1(&bias).view(), BinOp::Add)],
            |r, _| bias[r].as_(),
        )
    }
}

pub unsafe fn row_mul<K: MatMatMulKer<Acc = TI> + 'static, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let bias = (0..m).map(|i| i.as_()).collect::<Vec<TI>>();
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[
                FusedSpec::BinScalar(&tensor0(1i32.as_()), BinOp::Add),
                FusedSpec::BinPerRow(tensor1(&bias).view(), BinOp::Mul),
            ],
            |r, _| bias[r].as_(),
        )
    }
}

pub unsafe fn col_add<K: MatMatMulKer<Acc = TI> + 'static, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let bias = (0..n).map(|i| i.as_()).collect::<Vec<TI>>();
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[FusedSpec::BinPerCol(tensor1(&bias).view(), BinOp::Add)],
            |_, c| bias[c].as_(),
        )
    }
}

pub unsafe fn col_mul<K: MatMatMulKer<Acc = TI> + 'static, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let bias = (0..n).map(|i| i.as_()).collect::<Vec<TI>>();
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[
                FusedSpec::BinScalar(&tensor0(1i32.as_()), BinOp::Add),
                FusedSpec::BinPerCol(tensor1(&bias).view(), BinOp::Mul),
            ],
            |_, c| bias[c].as_(),
        )
    }
}

pub unsafe fn add_d<K: MatMatMulKer<Acc = TI> + 'static, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let d = (0..m * n).map(|i| i.as_()).collect::<Vec<TI>>();
    let d = tensor1(&d).into_shape(&[m, n])?;
    let store_spec =
        OutputStoreSpec::View { m_axis: Some(0), n_axis: Some(1), mr: ker.mr(), nr: ker.nr() };
    let view_d = d.to_plain_array_view::<TI>()?.into_dimensionality()?;
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[FusedSpec::AddUnicast(store_spec.wrap(&d.view()))],
            |r, c| view_d[(r, c)].as_(),
        )
    }
}

pub unsafe fn max<K: MatMatMulKer<Acc = TI>, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let five: TI = 5.as_();
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[FusedSpec::BinScalar(&tensor0(five), BinOp::Max)],
            |_, _| five.as_(),
        )
    }
}

pub unsafe fn min<K: MatMatMulKer<Acc = TI>, TA, TB, TC, TI>(
    ker: &K,
    m: usize,
    n: usize,
) -> TractResult<()>
where
    TA: LADatum + AsPrimitive<TI> + 'static,
    TB: LADatum + AsPrimitive<TI> + 'static,
    TC: LADatum + AsPrimitive<TI> + 'static,
    TI: LADatum + AsPrimitive<TC> + 'static + Neg<Output = TI>,
    i32: AsPrimitive<TI>,
    usize: AsPrimitive<TI>,
{
    let five: TI = 5.as_();
    unsafe {
        fused_ops::<K, TA, TB, TC, TI, _>(
            ker,
            m,
            n,
            &[FusedSpec::BinScalar(&tensor0(five), BinOp::Min)],
            |_, _| TC::zero(),
        )
    }
}


================================================
FILE: linalg/src/frame/mmm/tests/fuse.rs
================================================
use crate::frame::mmm::fuse::FusedKerSpec;
use crate::frame::mmm::storage::*;
use crate::frame::mmm::tests::display_error;
use crate::frame::mmm::tests::store::mmm_stride_storage;
use crate::frame::mmm::*;
use num_traits::{AsPrimitive, Bounded};
use proptest::prelude::*;
use tract_data::internal::*;

#[macro_export]
macro_rules! mmm_kernel_fuse_tests {
    ($ker:expr, $tc:ty, $ti: ty) => {
        mod fuse {
            use num_traits::Zero;
            #[allow(unused_imports)]
            use tract_data::prelude::f16;
            use tract_data::prelude::tensor0;
            use $crate::frame::mmm::MatMatMulKer;
            use $crate::frame::mmm::tests::fuse as test;
            #[allow(unused_imports)]
            use $crate::frame::mmm::tests::fuse::*;

            #[test]
            fn return_zeros() {
                test::return_zeros::<_, $tc, $ti>($ker)
            }

            #[test]
            fn store_non_contiguous() {
                test::store_non_contiguous::<_, $tc, $ti>($ker)
            }
            proptest::proptest! {
                #[test]
                fn return_c_prop(c in tile::<_, $ti>($ker)) {
                    test::return_c::<_, $ti>($ker, &c)
                }
            }

            fn fmin<T: PartialOrd>(a: T, b: T) -> T {
                if a < b { a } else { b }
            }

            fn fmax<T: PartialOrd>(a: T, b: T) -> T {
                if a > b { a } else { b }
            }

            macro_rules! bin {
                ($FKS:ident, $geo:expr, $f:expr, $extra_cond:expr) => {
                    paste! {
                        #[test]
                        fn [<$FKS:snake>]() {
                            if ($ker).is_supported_here() && $extra_cond {
                                test::$geo::<_, $ti>($ker, $crate::mmm::FusedKerSpec::$FKS, $f);
                            }
                        }
                    }
                };
            }

            bin!(PerColMin, per_col, fmin, true);
            bin!(PerColMax, per_col, fmax, true);
            bin!(PerColAdd, per_col, |a, b| a + b, true);
            bin!(PerColMul, per_col, |a, b| a * b, true);
            bin!(PerColSub, per_col, |a, b| a - b, true);
            bin!(PerColSubF, per_col, |a, b| b - a, true);

            bin!(PerRowMin, per_row, fmin, true);
            bin!(PerRowMax, per_row, fmax, true);
            bin!(PerRowAdd, per_row, |a, b| a + b, true);
            bin!(PerRowMul, per_row, |a, b| a * b, true);
            bin!(PerRowSub, per_row, |a, b| a - b, true);
            bin!(PerRowSubF, per_row, |a, b| b - a, true);

            bin!(ScalarMin, scalar, fmin, true);
            bin!(ScalarMax, scalar, fmax, true);
            bin!(ScalarAdd, scalar, |a, b| a + b, true);
            bin!(ScalarMul, scalar, |a, b| a * b, true);
            bin!(ScalarSub, scalar, |a, b| a - b, true);
            bin!(ScalarSubF, scalar, |a, b| b - a, true);

            bin!(
                LeakyRelu,
                scalar,
                |a, b| if b > <$ti>::zero() { b } else { a * b },
                ($ker).can_fuse(&$crate::mmm::FusedSpec::LeakyRelu(&tensor0(<$ti>::from(1_u8))))
            );

            #[test]
            fn return_c_add_row_col_product() {
                test::return_c_add_row_col_product::<_, $ti>($ker)
            }

            #[test]
            fn return_c_plus_d() {
                test::return_c_plus_d::<_, $ti, $ti>($ker)
            }

            #[test]
            fn return_c_clear() {
                test::return_c_clear::<_, $ti>($ker)
            }
        }
    };
}

use crate::LADatum;
pub fn return_zeros<K, TC, TI>(ker: &K)
where
    K: MatMatMulKer<Acc = TI>,
    TC: LADatum,
    TI: LADatum + Bounded + PartialEq,
{
    if !ker.is_supported_here() {
        return;
    }
    let v = vec![TC::max_value(); ker.mr() * ker.nr()];
    let c = mmm_stride_storage(&v, ker.nr());
    let non_linear = tvec![FusedKerSpec::Clear, FusedKerSpec::Store(c), FusedKerSpec::Done];
    let err = ker.kernel(&non_linear);
    assert_eq!(err, 0);
    let expected = vec![TC::zero(); v.len()];
    display_error(&v, &expected, ker.mr(), ker.nr());
    assert_eq!(v, expected);
}

pub fn store_non_contiguous<K, TC, TI>(ker: &K)
where
    K: MatMatMulKer<Acc = TI>,
    TC: LADatum,
    TI: LADatum + Bounded + PartialEq,
{
    if !ker.is_supported_here() {
        return;
    }
    let v = vec![TC::max_value(); ker.mr() * 5 * ker.nr() * 3];
    let c = OutputStoreKer {
        ptr: v.as_ptr() as _,
        row_byte_stride: (std::mem::size_of::<TC>() * 3 * ker.nr() * 5) as isize,
        col_byte_stride: std::mem::size_of::<TC>() as isize * 3,
        item_size: std::mem::size_of::<TC>(),
    };
    let non_linear = tvec![FusedKerSpec::Clear, FusedKerSpec::Store(c), FusedKerSpec::Done];
    let err = ker.kernel(&non_linear);
    assert_eq!(err, 0);
    let mut expected = vec![TC::max_value(); v.len()];
    for c in 0..ker.nr() {
        for r in 0..ker.mr() {
            expected[c * 3 + r * 3 * 5 * ker.nr()] = TC::zero();
        }
    }
    assert_eq!(v, expected);
}

pub fn fused_ops<K, TI, E>(ker: &K, c: &[TI], ops: &[FusedKerSpec<TI>], expect: E)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    E: Fn(usize, usize, TI) -> TI,
{
    if !ker.is_supported_here() {
        return;
    }
    assert!(c.len() == ker.mr() * ker.nr());
    let v = c.to_vec();
    let c = mmm_stride_storage(&v, ker.nr());
    let mut ops = ops.to_vec();
    ops.insert(0, FusedKerSpec::AddUnicast(c));
    ops.insert(0, FusedKerSpec::Clear);
    ops.push(FusedKerSpec::Store(c));
    ops.push(FusedKerSpec::Done);
    let expected =
        (0..v.len()).map(|ix| expect(ix / ker.nr(), ix % ker.nr(), v[ix])).collect::<Vec<TI>>();
    let err = ker.kernel(&ops);
    assert_eq!(err, 0);
    display_error(&v, &expected, ker.mr(), ker.nr());
    assert_eq!(v, expected);
}

pub fn return_c<K, TI>(ker: &K, v: &[TI])
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    usize: AsPrimitive<TI>,
{
    fused_ops::<K, TI, _>(ker, v, &[], |_, _, c| c + 1.as_() - 1.as_())
}

pub fn return_c_plus_d<K, TI, TD>(ker: &K)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    TD: LADatum + AsPrimitive<TI>,
    usize: AsPrimitive<TI> + AsPrimitive<TD>,
{
    let len = ker.mr() * ker.nr();
    let v: Vec<TI> = (0..len).map(|f| f.as_()).collect();
    let d: Vec<TD> = (0..len).map(|f| ((3 * f) % 7).as_()).collect();
    fused_ops::<K, TI, _>(
        ker,
        &v,
        &[FusedKerSpec::AddUnicast(mmm_stride_storage(&d, ker.nr()))],
        |row, col, c| c + d[row * ker.nr() + col].as_(),
    );
}

pub fn per_col<K, TI>(ker: &K, op: impl Fn(*const TI) -> FusedKerSpec<TI>, f: impl Fn(TI, TI) -> TI)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    usize: AsPrimitive<TI>,
{
    let len = ker.mr() * ker.nr();
    let v: Vec<TI> = (0..len).map(|f| f.as_()).collect();
    let bias: Vec<TI> = (0..ker.nr()).map(|f| (f + 1).as_()).collect();
    fused_ops::<K, TI, _>(ker, &v, &[op(bias.as_ptr())], |_, col, c| f(bias[col], c))
}

pub fn per_row<K, TI>(ker: &K, op: impl Fn(*const TI) -> FusedKerSpec<TI>, f: impl Fn(TI, TI) -> TI)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    usize: AsPrimitive<TI>,
{
    let len = ker.mr() * ker.nr();
    let v: Vec<TI> = (0..len).map(|f| f.as_()).collect();
    let bias: Vec<TI> = (0..ker.mr()).map(|f| (f + 1).as_()).collect();
    fused_ops::<K, TI, _>(ker, &v, &[op(bias.as_ptr())], |row, _, c| f(bias[row], c))
}

pub fn scalar<K, TI>(ker: &K, op: impl Fn(TI) -> FusedKerSpec<TI>, f: impl Fn(TI, TI) -> TI)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    isize: AsPrimitive<TI>,
{
    let len = ker.mr() * ker.nr();
    let v: Vec<TI> = (0..len as isize).map(|f| (f - len as isize / 2).as_()).collect();
    let five: TI = 5.as_();
    fused_ops::<K, TI, _>(ker, &v, &[op(five)], |_, _, c| f(five, c))
}

pub fn return_c_add_row_col_product<K, TI>(ker: &K)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    usize: AsPrimitive<TI>,
{
    let len = ker.mr() * ker.nr();
    let v: Vec<TI> = (0..len).map(|f| (f + 1).as_()).collect();
    let rows: Vec<TI> = (0..ker.mr()).map(|f| (f + 3).as_()).collect();
    let cols: Vec<TI> = (0..ker.nr()).map(|f| (f + 2).as_()).collect();
    fused_ops::<K, TI, _>(
        ker,
        &v,
        &[FusedKerSpec::AddRowColProducts(rows.as_ptr(), cols.as_ptr())],
        |row, col, c| c + cols[col] * rows[row],
    )
}

pub fn return_c_clear<K, TI>(ker: &K)
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    usize: AsPrimitive<TI>,
{
    let len = ker.mr() * ker.nr();
    let v: Vec<TI> = (0..len).map(|f| f.as_()).collect();
    fused_ops::<K, TI, _>(ker, &v, &[FusedKerSpec::Clear], |_, _, _| 0.as_())
}

pub fn tile<K, TI>(ker: &K) -> BoxedStrategy<Vec<TI>>
where
    K: MatMatMulKer<Acc = TI>,
    TI: LADatum,
    i8: AsPrimitive<TI>,
{
    let len = ker.mr() * ker.nr();
    proptest::collection::vec(any::<i8>().prop_map(|c| c.as_()), len..=len).boxed()
}


================================================
FILE: linalg/src/frame/mmm/tests/mod.rs
================================================
use crate::LADatum;

#[macro_use]
pub mod fuse;
#[macro_use]
pub mod frame;
#[macro_use]
pub mod packed_packed;
#[macro_use]
pub mod q_scale;
#[macro_use]
pub mod store;

#[cfg(test)]
macro_rules! test_mmm_kernel {
    (f16, $ker:expr) => {
        test_mmm_kernel_f16!($ker);
    };
    (f32, $ker:expr) => {
        test_mmm_kernel_f32!($ker);
    };
    (f64, $ker:expr) => {
        test_mmm_kernel_f64!($ker);
    };
    (i32, $ker:expr) => {
        test_mmm_kernel_i32!($ker);
    };
}

#[macro_export]
macro_rules! test_mmm_kernel_f16 {
    ($ker: expr) => {
        mmm_packed_packed_tests!(&*$ker, f16f16:0);
        mmm_frame_tests!(&*$ker, f16, f16, f16, f16);
        mmm_kernel_fuse_tests!(&*$ker, f16, f16);
        mmm_store_test!(&*$ker, f16);
    };
}

#[macro_export]
macro_rules! test_mmm_kernel_f32 {
    ($ker: expr) => {
        mmm_packed_packed_tests!(&*$ker, f32f32:0);
        mmm_frame_tests!(&*$ker, f32, f32, f32, f32);
        mmm_kernel_fuse_tests!(&*$ker, f32, f32);
        mmm_store_test!(&*$ker, f32);
    };
}

#[macro_export]
macro_rules! test_mmm_kernel_f64 {
    ($ker:expr) => {
        mmm_packed_packed_tests!(&*$ker, f64f64:0);
        mmm_frame_tests!(&*$ker, f64, f64, f64, f64);
        mmm_kernel_fuse_tests!(&*$ker, f64, f64);
        mmm_store_test!(&*$ker, f64);
    };
}

#[macro_export]
macro_rules! test_mmm_kernel_i32 {
    ($ker: expr) => {
        mmm_packed_packed_tests!(&*$ker, i32i32:0);
        mmm_kernel_fuse_tests!(&*$ker, i32, i32);
        mmm_frame_tests!(&*$ker, i32, i32, i32, i32);
        mmm_q_scale_tests!(&*$ker);
        mmm_store_test!(&*$ker, i32);
    };
}

pub fn display_error<TC: LADatum>(v: &[TC], expected: &[TC], m: usize, n: usize) {
    if v != expected {
        for ixm in 0..m {
            print!("|");
            for ixn in 0..n {
                use nu_ansi_term::Color::*;
                let f = v[ixm * n + ixn];
                let e = expected[ixm * n + ixn];
                let color = if f != e { Red.bold() } else { Green.into() };
                print!("{}|", color.paint(format!("{f:5}")));
            }
            print!("  #  ");
            for ixn in 0..n {
                print!("{:5} ", expected[ixm * n + ixn]);
            }
            println!();
        }
    }
}


================================================
FILE: linalg/src/frame/mmm/tests/packed_packed.rs
================================================
use crate::block_quant::PackedBlockQuantFormat;
use crate::mmm::tests::display_error;
use crate::mmm::{AsInputValue, FusedKerSpec, FusedSpec, MatMatMul, MatMatMulKer, OutputStoreKer};
use crate::pack::PackedFormat;
use proptest::collection::vec;
use proptest::prelude::*;
use std::fmt::Debug;
use tract_data::internal::*;

#[macro_export]
macro_rules! mmm_packed_packed_tests {
    ($ker:expr, $packing_id:ident : $packing: expr) => {
        mod $packing_id {
            use super::*;
            #[allow(unused_imports)]
            use proptest::prelude::*;
            #[allow(unused_imports)]
            use tract_data::prelude::f16;
            use tract_data::prelude::*;
            use tract_itertools::Itertools;
            use $crate::frame::mmm::kernel::MatMatMulKer;
            #[allow(unused_imports)]
            use $crate::frame::mmm::tests::packed_packed::*;

            mod fuse {
                use super::*;

                proptest::proptest! {
                    #[test]
                    fn prop(pb in arbitrary_problem(false, $ker, $packing)) {
                        pb.check().unwrap()
                    }
                }

                fn t(a: impl Into<Vec<f32>>, b: impl Into<Vec<f32>>) -> TractResult<()> {
                    PackedPackedProblem::kernel($ker, $packing, a, b).check()
                }

                #[test]
                fn packed_packed_1() -> TractResult<()> {
                    t(vec![1f32; $ker.mr()], vec![1f32; $ker.nr()])
                }

                #[test]
                fn packed_packed_2() -> TractResult<()> {
                    t(vec![1f32; $ker.mr() * 2], vec![1f32; $ker.nr() * 2])
                }

                #[test]
                fn packed_packed_13() -> TractResult<()> {
                    t(vec![1f32; $ker.mr() * 13], vec![1f32; $ker.nr() * 13])
                }

                #[test]
                fn packed_packed_a_scale() -> TractResult<()> {
                    t((1..=$ker.mr() as i64).map(|x| x as f32).collect_vec(), vec![1f32; $ker.nr()])
                }

                #[test]
                fn packed_packed_a_scale_times_2() -> TractResult<()> {
                    t(
                        (1..=2 * $ker.mr() as i64).map(|x| x as f32).collect_vec(),
                        vec![1f32; $ker.nr() * 2],
                    )
                }

                #[test]
                fn packed_packed_empty() -> TractResult<()> {
                    t(vec![0f32; 0], vec![0f32; 0])
                }

                #[test]
                fn packed_packed_bug_1() -> TractResult<()> {
                    t(vec![0f32; $ker.mr()], vec![0f32; $ker.nr()])
                }

                #[test]
                fn packed_packed_bug_2() -> TractResult<()> {
                    let mut a = vec![0f32; $ker.mr()];
                    a[0] = 1.;
                    let mut b = vec![0f32; $ker.nr()];
                    b[0] = 1.;
                    t(a, b)
                }

                #[test]
                fn packed_packed_bug_3() -> TractResult<()> {
                    if $ker.mr() >= 4 {
                        let mut a = vec![0f32; 2 * $ker.mr()];
                        let mut b = vec![0f32; 2 * $ker.nr()];
                        a[2] = -0.7548828f32;
                        a[3] = 0.23547363f32;
                        b[2 * $ker.nr() - 1] = 0.93603516;
                        t(a, b)?;
                    }
                    Ok(())
                }

                #[test]
                fn packed_packed_bug_4() -> TractResult<()> {
                    if $ker.mr() > 16 {
                        let mut a = vec![0f32; $ker.mr()];
                        let mut b = vec![0f32; $ker.nr()];
                        a[16] = 1.;
                        b[0] = 1.;
                        t(a, b)?;
                    }
                    Ok(())
                }
            }

            mod frame {
                use super::*;

                proptest::proptest! {
                    #[test]
                    fn prop(pb in arbitrary_problem(true, $ker, $packing)) {
                        pb.check().unwrap()
                    }
                }

                fn t(
                    m: usize,
                    n: usize,
                    a: impl Into<Vec<f32>>,
                    b: impl Into<Vec<f32>>,
                ) -> TractResult<()> {
                    PackedPackedProblem::frame($ker, $packing, m, n, a, b).check()
                }

                fn ti(
                    m: usize,
                    n: usize,
                    a: impl Into<Vec<i32>>,
                    b: impl Into<Vec<i32>>,
                ) -> TractResult<()> {
                    let a = a.into().into_iter().map(|i| i as f32).collect_vec();
                    let b = b.into().into_iter().map(|i| i as f32).collect_vec();
                    t(m, n, a, b)
                }

                #[test]
                fn trivial_1x2() -> TractResult<()> {
                    ti(1, 2, [0], [0, 0])
                }

                #[test]
                fn packed_packed_empty() -> TractResult<()> {
                    t($ker.mr(), $ker.nr(), [], [])
                }

                #[test]
                fn packed_packed_empty_2() -> TractResult<()> {
                    t(2 * $ker.mr(), 2 * $ker.nr(), [], [])
                }

                #[test]
                fn mat_mul_1() -> TractResult<()> {
                    ti(3, 2, [-3, 3, 5, -5, 6, 0, -6, -5, 0, 0, 9, 7], [-8, 5, 5, -3, 5, 7, -8, -1])
                }

                #[test]
                fn mat_mul_2() -> TractResult<()> {
                    ti(1, 3, [122, 82], [0, 0, 37, 0, 0, 57])
                }
            }
        }
    };
}

#[derive(Debug, new)]
pub struct PackedPackedProblem<K>
where
    K: MatMatMulKer,
{
    pub frame_test: Option<(usize, usize)>,
    pub ker: K,
    pub packing: usize,
    pub a: Vec<f32>,
    pub b: Vec<f32>,
}

pub fn arbitrary_problem<K: MatMatMulKer>(
    frame_test: bool,
    ker: &K,
    packing: usize,
) -> BoxedStrategy<PackedPackedProblem<K>> {
    let (mr, nr) = (ker.mr(), ker.nr());
    let item_range = if ker.internal_type().is_integer() { (-5f32)..5f32 } else { (-1f32)..1f32 };
    let (m_range, n_range) =
        if frame_test { (1usize..3 * mr, 1usize..3 * nr) } else { (mr..mr + 1, nr..nr + 1) };
    let ker = ker.clone();
    (m_range, 0usize..40, n_range)
        .prop_flat_map(move |(m, k, n)| {
            (
                vec(item_range.clone(), k * m..=k * m),
                vec(item_range.clone(), k * n..=k * n),
                Just((m, n)),
            )
        })
        .prop_map(move |(mut a, mut b, mn)| {
            a.reverse();
            b.reverse();
            PackedPackedProblem {
                frame_test: Some(mn).filter(|_| frame_test),
                ker: ker.clone(),
                packing,
                a,
                b,
            }
        })
        .boxed()
}

impl<K: MatMatMulKer> PackedPackedProblem<K> {
    pub fn kernel(
        ker: &K,
        packing: usize,
        a: impl Into<Vec<f32>>,
        b: impl Into<Vec<f32>>,
    ) -> PackedPackedProblem<K> {
        PackedPackedProblem {
            frame_test: None,
            ker: ker.clone(),
            packing,
            a: a.into(),
            b: b.into(),
        }
    }

    pub fn frame(
        ker: &K,
        packing: usize,
        m: usize,
        n: usize,
        a: impl Into<Vec<f32>>,
        b: impl Into<Vec<f32>>,
    ) -> PackedPackedProblem<K> {
        PackedPackedProblem {
            frame_test: Some((m, n)),
            ker: ker.clone(),
            packing,
            a: a.into(),
            b: b.into(),
        }
    }

    pub fn mkn(&self) -> (usize, usize, usize) {
        let (m, n) = self.frame_test.unwrap_or((self.ker.mr(), self.ker.nr()));
        assert!(m != 0 && n != 0);
        let k = self.a.len() / m;
        assert_eq!(self.b.len() / n, k);
        (m, k, n)
    }

    pub fn padded_inputs(&self) -> TractResult<(Tensor, Tensor)> {
        let (pack_a, pack_b) = &self.ker.packings()[self.packing];
        assert!(pack_b.k_alignment() == 1);
        let (m, k, n) = self.mkn();
        let k_aligned = k.next_multiple_of(pack_a.k_alignment());

        let mut a = Tensor::zero::<f32>(&[m, k_aligned])?;
        for row in 0..m {
            for col in 0..k {
                a.try_as_plain_mut()?.to_array_view_mut()?[[row, col]] = self.a[col + k * row];
            }
        }
        if let Some(pf) = pack_a.downcast_ref::<PackedFormat>() {
            a = a.cast_to_dt(pf.dt)?.into_owned();
        }
        let mut b = Tensor::zero::<f32>(&[k_aligned, n])?;
        for row in 0..k {
            for col in 0..n {
                b.try_as_plain_mut()?.to_array_view_mut()?[[row, col]] = self.b[col + n * row];
            }
        }
        if let Some(pf) = pack_b.downcast_ref::<PackedFormat>() {
            b = b.cast_to_dt(pf.dt)?.into_owned();
        }

        Ok((a, b))
    }

    pub fn reference(&self) -> TractResult<Tensor> {
        let (m, k, n) = self.mkn();
        let pack_a = &self.ker.packings()[self.packing].0;
        let (mut a, b) = self.padded_inputs()?;
        let k_aligned = k.next_multiple_of(pack_a.k_alignment());
        if let Some(pbqf) = pack_a.downcast_ref::<PackedBlockQuantFormat>() {
            a = pbqf.simulate_precision_loss(a, 1)?;
        };
        let mut c = Tensor::zero::<K::Acc>(&[m, n])?;

        let a = a.cast_to::<K::Acc>()?;
        let a = a.try_as_plain()?.as_slice::<K::Acc>()?;
        let b = b.cast_to::<K::Acc>()?;
        let b = b.try_as_plain()?.as_slice::<K::Acc>()?;
        let mut c_plain = c.try_as_plain_mut()?;
        let mut view = c_plain.to_array_view_mut::<K::Acc>()?.into_dimensionality()?;
        for ix_m in 0..m {
            for ix_n in 0..n {
                for ix_k in 0..k {
                    let a = a[ix_k + k_aligned * ix_m];
                    let b = b[ix_n + n * ix_k];
                    view[(ix_m, ix_n)] += a * b;
                }
            }
        }
        Ok(c)
    }

    pub fn run(&self) -> TractResult<Tensor> {
        let (m, k, n) = self.mkn();
        let (pack_a, pack_b) = &self.ker.packings()[self.packing];
        assert!(pack_b.k_alignment() == 1);
        let k_aligned = k.next_multiple_of(pack_a.k_alignment());

        let (a, b) = self.padded_inputs()?;
        let pa = pack_a.prepare_one(&a, 1, 0)?;
        let pb = pack_b.prepare_one(&b, 0, 1)?;

        let mut v = unsafe { Tensor::uninitialized_dt(self.ker.internal_type(), &[m, n])? };
        let item_size = self.ker.internal_type().size_of();

        if self.frame_test.is_some() {
            unsafe {
                let c = self.ker.c_view(Some(0), Some(1)).wrap(&v.view_mut());
                let ops = tvec!(
                    FusedSpec::AddMatMul {
                        a: AsInputValue::Borrowed(&*pa),
                        b: AsInputValue::Borrowed(&*pb),
                        packing: self.packing
                    },
                    FusedSpec::Store(c)
                );
                self.ker.run(m, n, &ops)?;
            }
        } else {
            let c = OutputStoreKer {
                ptr: v.as_bytes_mut().as_mut_ptr(),
                row_byte_stride: (item_size * self.ker.nr()) as isize,
                col_byte_stride: item_size as isize,
                item_size,
            };

            let non_linear_ops = tvec!(
                FusedKerSpec::Clear,
                FusedKerSpec::AddMatMul {
                    k: k_aligned,
                    pa: pa.panel_bytes(0, None)?,
                    pb: pb.panel_bytes(0, None)?,
                    packing: self.packing
                },
                FusedKerSpec::Store(c),
                FusedKerSpec::Done
            );
            let err = self.ker.kernel(&non_linear_ops);
            assert_eq!(err, 0);
        }
        Ok(v)
    }

    pub fn check(&self) -> TractResult<()> {
        if !self.ker.is_supported_here() {
            return Ok(());
        }
        let expected = self.reference()?;
        let found = self.run()?;
        let app = if K::Acc::datum_type() == f16::datum_type() {
            Approximation::SuperApproximate
        } else {
            Approximation::Approximate
        };
        let result = found.close_enough(&expected, app);
        if result.is_err() {
            let exp = expected.try_as_plain()?.as_slice::<K::Acc>()?;
            let found = found.try_as_plain()?.as_slice::<K::Acc>()?;
            let (m, _, n) = self.mkn();
            display_error(found, exp, m, n);
        }
        result
    }
}


================================================
FILE: linalg/src/frame/mmm/tests/q_scale.rs
================================================
use crate::Scaler;
use crate::frame::mmm::MatMatMulKer;
use crate::frame::mmm::fuse::RoundingPolicy;
use crate::generic::rounding::ScaleShiftAndRound;
use crate::mmm::{FusedKerSpec, FusedSpec};
use proptest::prelude::*;

use super::fuse::fused_ops;

#[derive(Debug, new)]
pub struct QScaleProblem<K>
where
    K: MatMatMulKer<Acc = i32>,
{
    pub ker: K,
    pub c: Vec<i32>,
    pub scaler: Scaler,
    pub boo: std::marker::PhantomData<K>,
}

pub fn arbitrary_qscale_problem<K: MatMatMulKer<Acc = i32>>(
    ker: &K,
) -> BoxedStrategy<QScaleProblem<K>> {
    use RoundingPolicy::*;
    let ker = ker.clone();
    let len = ker.mr() * ker.nr();
    (
        proptest::collection::vec(-20i32..20, len..=len),
        -5i32..5,
        prop_oneof!(Just(1f32), 0f32..1f32),
        proptest::prop_oneof![
            Just(Zero),
            Just(Away),
            Just(PlusInf),
            Just(MinusInf),
            Just(Odd),
            Just(Even)
        ],
    )
        .prop_map(move |(c, scale_pot, scale_mult, policy)| QScaleProblem {
            ker: ker.clone(),
            c,
            scaler: Scaler::new(scale_mult * 2f32.powi(scale_pot), policy),
            boo: std::marker::PhantomData,
        })
        .boxed()
}

impl<K> QScaleProblem<K>
where
    K: MatMatMulKer<Acc = i32>,
{
    pub fn run(&self) {
        if !self.ker.is_supported_here() {
            return;
        }
        if let FusedSpec::QScale(shift, policy, mult) = self.scaler.as_fused_spec() {
            fused_ops::<K, i32, _>(
                &self.ker,
                &self.c,
                &[FusedKerSpec::QScale(shift, policy, mult)],
                |_, _, c| c.q_scale(self.scaler),
            )
        } else if let FusedSpec::RoundingShiftRight(shift, policy) = self.scaler.as_fused_spec() {
            fused_ops::<K, i32, _>(
                &self.ker,
                &self.c,
                &[FusedKerSpec::RoundingShiftRight(shift, policy)],
                |_, _, c| c.q_shr(shift, policy),
            )
        } else if let FusedSpec::ShiftLeft(shift) = self.scaler.as_fused_spec() {
            fused_ops::<K, i32, _>(
                &self.ker,
                &self.c,
                &[FusedKerSpec::ShiftLeft(shift)],
                |_, _, c| c.q_shl(shift),
            )
        } else {
            unreachable!()
        }
    }
}

pub fn return_c_scale_bigpot<K>(ker: &K)
where
    K: MatMatMulKer<Acc = i32>,
{
    let ker = ker.clone();
    let len = ker.mr() * ker.nr();
    let v: Vec<i32> = (-(len as i32) / 2..).take(len).collect();
    fused_ops::<K, i32, _>(&ker, &v, &[FusedKerSpec::ShiftLeft(1)], |_, _, c| c.q_shl(1))
}

#[macro_export]
macro_rules! mmm_q_scale_tests {
    ($ker:expr) => {
        use $crate::frame::mmm::fuse::RoundingPolicy;
        use $crate::frame::mmm::tests::q_scale::arbitrary_qscale_problem;
        use $crate::frame::mmm::tests::q_scale::QScaleProblem;
        use $crate::frame::mmm::MatMatMulKer;
        use $crate::generic::Scaler;
        // FIXME: Scaler should be arbitrary
        macro_rules! test_q_scale {
            ($policy: ident) => {
                paste! {
                    #[test]
                    fn [<return_q_scale_halfpos_ $policy:lower>]() {
                        let ker = $ker;
                        let len = (ker.mr() * ker.nr()) as i64;
                        let v = (0..len).map(|i| (i - len / 2) as i32).collect();
                        QScaleProblem::new(ker.clone(), v, Scaler::new(0.5f32, RoundingPolicy::$policy)).run()
                    }

                    #[test]
                    fn [<return_q_scale_halfneg_ $policy:lower>]() {
                        let ker = $ker;
                        let len = (ker.mr() * ker.nr()) as i64;
                        let v = (0..len).map(|i| (i - len / 2) as i32).collect();
                        QScaleProblem::new(ker.clone(), v, Scaler::new(-0.5f32, RoundingPolicy::$policy)).run()
                    }

                    #[test]
                    fn [<return_q_scale_pot_ $policy:lower>]() {
                        let ker = $ker;
                        let len = (ker.mr() * ker.nr()) as i64;
                        let v = (0..len).map(|i| (i - len / 2) as i32).collect();
                        QScaleProblem::new(ker.clone(), v, Scaler::new(0.25f32, RoundingPolicy::$policy)).run()
                    }

                    #[test]
                    fn [<return_q_scale_nonpot_ $policy:lower>]() {
                        let ker = $ker;
                        let len = (ker.mr() * ker.nr()) as i64;
                        let v = (0..len).map(|i| (i - len / 2) as i32).collect();
                        QScaleProblem::new(ker.clone(), v, Scaler::new(1f32 / 5., RoundingPolicy::$policy)).run()
                    }

                    #[test]
                    fn [<return_q_scale_bigpot_ $policy:lower>]() {
                        let ker = $ker;
                        let len = (ker.mr() * ker.nr()) as i64;
                        let v = (0..len).map(|i| (i - len / 2) as i32).collect();
                        QScaleProblem::new(ker.clone(), v, Scaler::new(4f32, RoundingPolicy::$policy)).run()
                    }

                    #[test]
                    fn [<return_q_scale_bignonpot_ $policy:lower>]() {
                        let ker = $ker;
                        let len = (ker.mr() * ker.nr()) as i64;
                        let v = (0..len).map(|i| (i - len / 2) as i32).collect();
                        QScaleProblem::new(ker.clone(), v, Scaler::new(14., RoundingPolicy::$policy)).run()
                    }
                }
            }
        }

        test_q_scale!(Zero);
        test_q_scale!(Away);
        test_q_scale!(MinusInf);
        test_q_scale!(PlusInf);
        test_q_scale!(Even);
        test_q_scale!(Odd);

        proptest::proptest! {
            #[test]
            fn return_q_scale_prop(pb in arbitrary_qscale_problem($ker)) {
                pb.run()
            }
        }

        #[test]
        fn return_c_scale_bigpot() {
            $crate::frame::mmm::tests::q_scale::return_c_scale_bigpot::<_>($ker)
        }
    };
}


================================================
FILE: linalg/src/frame/mmm/tests/store.rs
================================================
use crate::LADatum;
use crate::frame::mmm::fuse::FusedKerSpec;
use crate::frame::mmm::storage::*;
use crate::frame::mmm::tests::display_error;
use crate::frame::mmm::*;
use num_traits::Bounded;
use tract_data::internal::*;
use tract_itertools::Itertools;
use tract_ndarray::Axis;

#[macro_export]
macro_rules! mmm_store_test {
    ($ker:expr, $tc:ident) => {
        paste! {
            mod [<store_$tc>] {
                #[allow(unused_imports)]
                use tract_data::prelude::f16;
                use $crate::frame::mmm::tests::store::StoreLayout;

                #[test] fn store_zeros() {
                    $crate::frame::mmm::tests::store::store_zeros::<_,$tc,_>($ker);
                }

                #[test] fn store_col_major() {
                    $crate::frame::mmm::tests::store::store_pattern::<_,$tc,_>($ker, StoreLayout::ColMajor);
                }

                #[test] fn store_row_major() {
                    $crate::frame::mmm::tests::store::store_pattern::<_,$tc,_>($ker, StoreLayout::RowMajor);
                }

                #[test] fn store_arbitrary() {
                    $crate::frame::mmm::tests::store::store_pattern::<_,$tc,_>($ker, StoreLayout::Arbitrary);
                }
            }
        }
    };
}

pub fn mmm_stride_storage<T: Copy>(v: &[T], rsc: usize) -> OutputStoreKer {
    OutputStoreKer {
        ptr: v.as_ptr() as _,
        row_byte_stride: (std::mem::size_of::<T>() * rsc) as isize,
        col_byte_stride: std::mem::size_of::<T>() as isize,
        item_size: std::mem::size_of::<T>(),
    }
}

pub fn store_zeros<K, TC, TI>(ker: &K)
where
    K: MatMatMulKer<Acc = TI>,
    TC: LADatum,
    TI: LADatum + Bounded + PartialEq,
{
    if !ker.is_supported_here() {
        return;
    }
    let v = vec![TC::max_value(); ker.mr() * ker.nr()];
    let c = mmm_stride_storage(&v, ker.nr());
    let non_linear = tvec![FusedKerSpec::Clear, FusedKerSpec::Store(c), FusedKerSpec::Done];
    let err = ker.kernel(&non_linear);
    assert_eq!(err, 0);
    let expected = vec![TC::zero(); v.len()];
    display_error(&v, &expected, ker.mr(), ker.nr());
    assert_eq!(v, expected);
}

pub enum StoreLayout {
    ColMajor,
    RowMajor,
    Arbitrary,
}

pub fn store_pattern<K, TC, TI>(ker: &K, layout: StoreLayout)
where
    K: MatMatMulKer<Acc = TI>,
    TC: LADatum,
    TI: LADatum + Bounded + PartialEq,
{
    if !ker.is_supported_here() {
        return;
    }
    let (mr, nr) = (ker.mr(), ker.nr());
    let pattern = tensor1(&(0..).take(mr * nr).collect_vec())
        .cast_to::<TI>()
        .unwrap()
        .into_owned()
        .into_shape(&[mr, nr])
        .unwrap();
    let pattern_aligned = Blob::from_bytes_alignment(pattern.as_bytes(), 128).unwrap();
    let pattern_col_major = pattern.clone().permute_axes(&[1, 0]).unwrap();
    let pattern_col_major_aligned =
        Blob::from_bytes_alignment(pattern_col_major.as_bytes(), 128).unwrap();
    let size_of_tc = std::mem::size_of::<TC>();
    let (row_stride, col_stride, result_size) = match layout {
        StoreLayout::RowMajor => (nr, 1, mr * nr),
        StoreLayout::ColMajor => (1, mr, mr * nr),
        // like row major, but storing every other third column
        StoreLayout::Arbitrary => (nr * 3, 3, mr * nr * 3),
    };
    let mut result = tensor0(TC::max_value()).broadcast_to_shape(&[result_size]).unwrap();
    let non_linear = tvec![
        FusedKerSpec::LoadTile(
            pattern_col_major_aligned.as_ptr() as *const TI,
            pattern_aligned.as_ptr() as *const TI,
        ),
        FusedKerSpec::Store(OutputStoreKer {
            ptr: result.as_bytes_mut().as_mut_ptr(),
            row_byte_stride: (size_of_tc * row_stride) as isize,
            col_byte_stride: (size_of_tc * col_stride) as isize,
            item_size: size_of_tc,
        }),
        FusedKerSpec::Done
    ];
    let err = ker.kernel(&non_linear);
    assert_eq!(err, 0);
    let expected = pattern.cast_to::<TC>().unwrap().into_owned();
    let result = match layout {
        StoreLayout::RowMajor => result,
        StoreLayout::ColMajor => {
            result.into_shape(&[ker.nr(), ker.mr()]).unwrap().permute_axes(&[1, 0]).unwrap()
        }
        StoreLayout::Arbitrary => result
            .into_plain_array::<TC>()
            .unwrap()
            .into_shape_with_order((mr, nr, 3))
            .unwrap()
            .index_axis_move(Axis(2), 0)
            .into_tensor(),
    };
    let expected = expected.try_as_plain().unwrap().as_slice::<TC>().unwrap();
    let result = result.try_as_plain().unwrap().as_slice::<TC>().unwrap();
    display_error(result, expected, ker.mr(), ker.nr());
    assert_eq!(result, expected);
}


================================================
FILE: linalg/src/frame/mod.rs
================================================
#[macro_use]
pub mod block_quant;
#[macro_use]
pub mod element_wise;
pub mod element_wise_helper;
#[macro_use]
pub mod unicast;
#[macro_use]
pub mod by_scalar;
#[macro_use]
pub mod leaky_relu;
#[macro_use]
pub mod lut;
#[macro_use]
pub mod mmm;
#[macro_use]
pub mod pack;
#[macro_use]
pub mod reduce;
#[macro_use]
pub mod sigmoid;
#[macro_use]
pub mod tanh;
#[macro_use]
pub mod weights;


================================================
FILE: linalg/src/frame/pack.rs
================================================
use std::alloc::Layout;
use std::fmt::{Debug, Display};
use std::marker::PhantomData;
use std::ops::Range;
use tract_data::internal::*;

use crate::mmm::{
    EagerPackedInput, MMMInputFormat, MMMInputValue, PackedExoticFact, PackedMatrixStorage,
};

use crate::WeightType;

#[derive(Clone, Eq, PartialEq, Hash)]
pub struct PackedFormat {
    pub dt: DatumType,
    pub r: usize,
    pub alignment_bytes: usize,
    pub end_padding_record: usize,
}

impl MMMInputFormat for PackedFormat {
    fn prepare_tensor(&self, t: &Tensor, k_axis: usize, mn_axis: usize) -> TractResult<Tensor> {
        let packed = PackedFormat::pack_tensor(self, t, k_axis, mn_axis)?;
        Ok(PackedMatrixStorage::new(packed).into_tensor(t.datum_type()))
    }

    fn prepare_one(
        &self,
        t: &Tensor,
        k_axis: usize,
        mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        PackedFormat::pack_tensor(self, t, k_axis, mn_axis)
    }

    fn precursor(&self) -> WeightType {
        WeightType::Plain(self.dt)
    }

    fn r(&self) -> usize {
        self.r
    }

    fn k_alignment(&self) -> usize {
        1
    }

    #[allow(clippy::collapsible_if)]
    fn merge_with<'o, 'a: 'o, 'b: 'o>(
        &'a self,
        other: &'b dyn MMMInputFormat,
    ) -> Option<&'o dyn MMMInputFormat> {
        if let Some(other) = other.downcast_ref::<PackedFormat>() {
            if self.r == other.r && self.dt == other.dt {
                if self.alignment_bytes % other.alignment_bytes == 0
                    && self.end_padding_record >= other.end_padding_record
                {
                    return Some(self);
                }
                if other.alignment_bytes % self.alignment_bytes == 0
                    && other.end_padding_record >= self.end_padding_record
                {
                    return Some(other);
                }
            }
        }
        None
    }

    fn mem_size(&self, k: TDim, mn: TDim) -> TDim {
        self.len(k, mn) * self.dt.size_of()
    }

    fn extract_at_mn_f16(
        &self,
        data: &EagerPackedInput,
        mn: usize,
        slice: &mut [f16],
    ) -> TractResult<()> {
        ensure!(data.format().dyn_eq(self));
        ensure!(self.len(data.k(), data.mn()) * self.dt.size_of() == data.packed.len());
        unsafe {
            let ptr = data.packed.as_ptr().add(
                (self.single_panel_len(data.k()) * (mn / self.r) + mn % self.r) * self.dt.size_of(),
            );
            for (i, slot) in slice.iter_mut().enumerate() {
                let ptr = ptr.add(i * self.dt.size_of() * self.r);
                *slot = if self.dt == f16::datum_type() {
                    *(ptr as *const f16)
                } else if self.dt == f32::datum_type() {
                    f16::from_f32(*(ptr as *const f32))
                } else {
                    bail!("Unexpected DT {:?}", self.dt)
                }
            }
        }
        Ok(())
    }

    fn extract_at_mn_f32(
        &self,
        data: &EagerPackedInput,
        mn: usize,
        slice: &mut [f32],
    ) -> TractResult<()> {
        ensure!(data.format().dyn_eq(self));
        ensure!(self.len(data.k(), data.mn()) * self.dt.size_of() == data.packed.len());
        unsafe {
            let ptr = data.packed.as_ptr().add(
                (self.single_panel_len(data.k()) * (mn / self.r) + mn % self.r) * self.dt.size_of(),
            );
            for (i, slot) in slice.iter_mut().enumerate() {
                let ptr = ptr.add(i * self.dt.size_of() * self.r);
                *slot = if self.dt == f16::datum_type() {
                    (*(ptr as *const f16)).to_f32()
                } else if self.dt == f32::datum_type() {
                    *(ptr as *const f32)
                } else {
                    bail!("Unexpected DT {:?}", self.dt)
                }
            }
        }
        Ok(())
    }
}

impl Display for PackedFormat {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Packed{:?}[{}]", self.dt, self.r)
    }
}

impl Debug for PackedFormat {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Packed{:?}[{}]@{}+{}",
            self.dt, self.r, self.alignment_bytes, self.end_padding_record
        )
    }
}

impl PackedFormat {
    pub const fn new(dt: DatumType, nr: usize, alignment_bytes: usize) -> PackedFormat {
        PackedFormat { dt, r: nr, alignment_bytes, end_padding_record: 1 }
    }

    pub const fn with_end_padding_record(self, end_padding_record: usize) -> Self {
        PackedFormat { end_padding_record, ..self }
    }

    #[inline]
    pub fn align(self, alignment: usize) -> Self {
        Self { alignment_bytes: alignment, ..self }
    }

    #[inline]
    pub fn alignment(&self) -> usize {
        self.alignment_bytes
    }

    #[inline]
    pub fn panel_width(&self) -> usize {
        self.r
    }

    #[inline]
    pub fn len<D: DimLike>(&self, k: D, n: D) -> D {
        n.divceil(self.r) * self.single_panel_len(k)
    }

    #[inline]
    pub fn single_panel_len<D: DimLike>(&self, k: D) -> D {
        ((k + self.end_padding_record) * self.r).divceil(self.alignment()) * self.alignment()
    }

    #[inline]
    pub fn single_panel_layout(&self, k: usize, item_size: usize) -> Layout {
        Layout::from_size_align(self.single_panel_len(k) * item_size, self.alignment()).unwrap()
    }

    pub fn pack_tensor(
        &self,
        t: &Tensor,
        k_axis: usize,
        mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        ensure!(t.datum_type().is_copy());
        ensure!(
            t.datum_type().unquantized() == self.dt.unquantized(),
            "Attempting to pack for {self} tensor {t:?}"
        );
        let k = t.shape()[k_axis];
        let mn = t.shape()[mn_axis];
        let packed_len = self.len(k, mn);
        let panel_len = self.single_panel_len(k);
        let panel_bytes = panel_len * t.datum_type().size_of();
        let strides = t.strides();
        unsafe {
            let mut packed = Blob::new_for_size_and_align(
                t.datum_type().size_of() * packed_len,
                self.alignment_bytes,
            );
            if cfg!(debug_assertions) {
                packed.as_bytes_mut().fill(0u8);
            }
            dispatch_copy!(Self::pack_t(t.datum_type())(
                self,
                packed.as_mut_ptr() as _,
                t.as_ptr_unchecked(),
                mn,
                strides[k_axis],
                strides[mn_axis],
                0..k,
                0..mn
            ));
            Ok(Box::new(EagerPackedInput {
                fact: PackedExoticFact { format: Box::new(self.clone()), mn: mn.to_dim(), k },
                packed: packed.into(),
                panel_bytes,
                mn,
            }))
        }
    }

    pub fn pack_tensor_view(
        &self,
        t: &TensorView,
        k_axis: usize,
        mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        ensure!(
            t.datum_type().unquantized() == self.dt.unquantized(),
            "Attempting to pack for {self} tensor view {t:?}"
        );
        let k = t.shape()[k_axis];
        let mn = t.shape()[mn_axis];
        let packed_len = self.len(k, mn);
        let panel_len = self.single_panel_len(k);
        let panel_bytes = panel_len * t.datum_type().size_of();
        let strides = t.strides();
        unsafe {
            let mut packed = Blob::new_for_size_and_align(
                t.datum_type().size_of() * packed_len,
                self.alignment_bytes,
            );
            if cfg!(debug_assertions) {
                packed.as_bytes_mut().fill(0u8);
            }
            dispatch_copy!(Self::pack_t(t.datum_type())(
                self,
                packed.as_mut_ptr() as _,
                t.as_ptr_unchecked(),
                mn,
                strides[k_axis],
                strides[mn_axis],
                0..k,
                0..mn
            ));
            Ok(Box::new(EagerPackedInput {
                fact: PackedExoticFact { format: Box::new(self.clone()), mn: mn.to_dim(), k },
                packed: packed.into(),
                panel_bytes,
                mn,
            }))
        }
    }

    pub unsafe fn pack<'a, 'b>(
        &self,
        pb: impl std::borrow::BorrowMut<TensorView<'a>>,
        b: impl std::borrow::Borrow<TensorView<'b>>,
        k_axis: usize,
        mn_axis: usize,
    ) {
        let k = b.borrow().shape()[k_axis];
        let mn = b.borrow().shape()[mn_axis];
        unsafe { self.pack_segment(pb, b, k_axis, mn_axis, 0..k, 0..mn) };
    }


    #[allow(clippy::too_many_arguments)]
    #[rustfmt::skip]
    pub unsafe fn pack_t<T: Datum + Copy>(
        &self,
        pb: *mut T,
        b: *const T,
        mn: usize,
        k_stride: isize,
        mn_stride: isize,
        k_range: Range<usize>,
        mn_range: Range<usize>,
        ) { unsafe {
        if k_range.len() == 0 || mn_range.len() == 0 {
            return
        }
        if self.r == 1 && k_stride == 1 && mn == 1 {
            pb.copy_from_nonoverlapping(b.add(k_range.start), k_range.len())
        } else if mn_stride == 1 {
            let size_of = T::datum_type().size_of();
            let rbytes = self.r * size_of;
            let mn_valid_end = mn_range.end.min(mn);
            let mn_range_bytes = mn_range.start * size_of..mn_valid_end * size_of;
            let k_stride_bytes = k_stride * size_of as isize;
            let bb = b as *const u8;
            let pbb = pb as *mut u8;
            let panel_len = self.single_panel_len(k_range.len()) * size_of;
            match rbytes {
                16 => pack_mn_major::<[u8; 16]>(bb, pbb, panel_len, k_stride_bytes, mn_range_bytes, k_range),
                24 => pack_mn_major::<[u8; 24]>(bb, pbb, panel_len, k_stride_bytes, mn_range_bytes, k_range),
                32 => pack_mn_major::<[u8; 32]>(bb, pbb, panel_len, k_stride_bytes, mn_range_bytes, k_range),
                48 => pack_mn_major::<[u8; 48]>(bb, pbb, panel_len, k_stride_bytes, mn_range_bytes, k_range),
                64 => pack_mn_major::<[u8; 64]>(bb, pbb, panel_len, k_stride_bytes, mn_range_bytes, k_range),
                _ => {
                    let mut packer = self.write_with_k_outer(pb, k_range.len(), mn_range.len());
                    for k in k_range {
                        for x in mn_range.start..mn_valid_end {
                            packer.write(*b.offset(x as isize + k_stride * k as isize))
                        }
                        for _x in mn_valid_end..mn_range.end {
                            packer.write(T::default())
                        }
                    }
                }
            }
        } else if k_stride == 1 {
            let mut packer = self.write_with_k_inner(pb, k_range.len(), mn);
            let mn_valid_end = mn_range.end.min(mn);
            for x in mn_range.start..mn_valid_end {
                for k in k_range.clone() {
                    packer.write(*b.offset(x as isize * mn_stride + k as isize))
                }
            }
            // just ignore invalid mn_range
        } else {
            let mut packer = self.write_with_k_outer(pb, k_range.len(), mn);
            let mn_valid_end = mn_range.end.min(mn);
            for k in k_range {
                for x in mn_range.start..mn_valid_end {
                    packer.write(*b.offset(x as isize * mn_stride + k_stride * k as isize))
                }
                for _x in mn_valid_end..mn_range.end {
                    packer.write(T::default())
                }
            }
        }
    }}

    #[inline]
    pub unsafe fn pack_segment<'a, 'b>(
        &self,
        mut pb: impl std::borrow::BorrowMut<TensorView<'a>>,
        b: impl std::borrow::Borrow<TensorView<'b>>,
        k_axis: usize,
        mn_axis: usize,
        k_range: Range<usize>,
        mn_range: Range<usize>,
    ) {
        debug_assert!(pb.borrow().len() >= self.len(k_range.len(), mn_range.len()));
        let pb = pb.borrow_mut();
        let b = b.borrow();
        let dt = pb.datum_type();
        unsafe {
            dispatch_copy!(Self::pack_t(dt)(
                self,
                pb.as_ptr_mut_unchecked(),
                b.as_ptr_unchecked(),
                b.shape()[mn_axis],
                b.strides()[k_axis],
                b.strides()[mn_axis],
                k_range,
                mn_range
            ));
        }
    }

    pub fn write_with_k_outer<'p, T: Copy + Debug>(
        &self,
        pb: *mut T,
        k: usize,
        mn: usize,
    ) -> KOutWriter<'p, T> {
        KOutWriter::new(pb, self.r, self.single_panel_len(k), mn, k)
    }

    pub fn write_single_panel_with_k_outer<'p, T: Copy + Debug>(
        &self,
        pb: *mut T,
    ) -> KOutSinglePanelWriter<'p, T> {
        KOutSinglePanelWriter::new(pb)
    }

    pub fn write_with_k_inner<'p, T: Copy + Debug>(
        &self,
        pb: *mut T,
        k: usize,
        mn: usize,
    ) -> KInWriter<'p, T> {
        let panel_len = self.single_panel_len(k);
        KInWriter::new(pb, panel_len, self.r, mn, k)
    }
}

pub trait PackingWriter<T: Copy> {
    fn write(&mut self, t: T);
}

#[derive(Debug)]
pub struct KOutSinglePanelWriter<'p, T>
where
    T: Copy + std::fmt::Debug,
{
    ptr: *mut T,
    _phantom: PhantomData<&'p T>,
}

impl<'p, T> KOutSinglePanelWriter<'p, T>
where
    T: Copy + std::fmt::Debug,
{
    pub fn new(ptr: *mut T) -> KOutSinglePanelWriter<'p, T> {
        KOutSinglePanelWriter { ptr, _phantom: PhantomData }
    }
}

impl<T> PackingWriter<T> for KOutSinglePanelWriter<'_, T>
where
    T: Copy + std::fmt::Debug,
{
    #[inline(always)]
    fn write(&mut self, t: T) {
        unsafe {
            *self.ptr = t;
            self.ptr = self.ptr.offset(1);
        }
    }
}

#[derive(Debug)]
pub struct KOutWriter<'p, T>
where
    T: Copy + std::fmt::Debug,
{
    ptr: *mut T,
    panels: usize,
    panel_width: usize,
    last_panel_width: usize,
    remain: usize,
    current_panel: usize,
    next_panel: isize,
    next_lane: isize,
    _phantom: PhantomData<&'p T>,
}

impl<'p, T> KOutWriter<'p, T>
where
    T: Copy + std::fmt::Debug,
{
    pub fn new(
        ptr: *mut T,
        panel_width: usize,
        panel_len: usize,
        mn: usize,
        _k: usize,
    ) -> KOutWriter<'p, T> {
        let panels = mn.divceil(panel_width);
        let last_panel_width = mn - (panels - 1) * panel_width;
        KOutWriter {
            ptr,
            panels,
            panel_width,
            last_panel_width,
            remain: if panels > 1 { panel_width } else { last_panel_width },
            current_panel: 0,
            next_panel: (panel_len - panel_width) as isize,
            next_lane: (panel_width - last_panel_width) as isize
                - (panel_len * (panels - 1)) as isize,
            _phantom: PhantomData,
        }
    }
}

impl<T> PackingWriter<T> for KOutWriter<'_, T>
where
    T: Copy + std::fmt::Debug,
{
    #[inline(always)]
    fn write(&mut self, t: T) {
        unsafe {
            *self.ptr = t;
            self.remain -= 1;
            self.ptr = self.ptr.offset(1);
            if self.remain == 0 {
                self.current_panel += 1;
                if self.current_panel == self.panels {
                    self.ptr = self.ptr.offset(self.next_lane);
                    self.current_panel = 0;
                } else {
                    self.ptr = self.ptr.offset(self.next_panel);
                }
                if self.current_panel == self.panels - 1 {
                    self.remain = self.last_panel_width;
                } else {
                    self.remain = self.panel_width;
                }
            }
        }
    }
}

#[derive(Debug)]
pub struct KInWriter<'p, T>
where
    T: Copy + Debug,
{
    ptr: *mut T,
    k: usize,
    panels: usize,
    panel_width: usize,
    last_panel_width: usize,
    remain_on_k: usize,
    remain_on_mn: usize,
    current_panel: usize,
    next_mn_offset: isize,
    next_panel_offset: isize,
    _phantom: PhantomData<&'p T>,
}

impl<'p, T> KInWriter<'p, T>
where
    T: Copy + Debug,
{
    pub fn new(
        ptr: *mut T,
        panel_len: usize,
        panel_width: usize,
        mn: usize,
        k: usize,
    ) -> KInWriter<'p, T> {
        let panels = mn.divceil(panel_width);
        let last_panel_width = mn - (panels - 1) * panel_width;
        KInWriter {
            ptr,
            k,
            panels,
            panel_width,
            last_panel_width,
            remain_on_k: k,
            remain_on_mn: if panels == 1 { last_panel_width } else { panel_width },
            current_panel: 0,
            next_mn_offset: 1 - (k * panel_width) as isize,
            next_panel_offset: panel_len as isize - (k * panel_width + panel_width - 1) as isize,
            //                 ^ next panel     ^    ^ rewind left ^   ^ rewind up   ^
            _phantom: PhantomData,
        }
    }
}

impl<T> PackingWriter<T> for KInWriter<'_, T>
where
    T: Copy + std::fmt::Debug,
{
    #[inline(always)]
    fn write(&mut self, t: T) {
        unsafe {
            *self.ptr = t;
            self.remain_on_k -= 1;
            self.ptr = self.ptr.add(self.panel_width);
            if self.remain_on_k == 0 {
                self.remain_on_k = self.k;
                self.remain_on_mn -= 1;
                if self.remain_on_mn > 0 {
                    self.ptr = self.ptr.offset(self.next_mn_offset);
                } else {
                    self.ptr = self.ptr.offset(self.next_panel_offset);
                    self.current_panel += 1;
                    if self.current_panel == self.panels - 1 {
                        self.remain_on_mn = self.last_panel_width;
                    } else {
                        self.remain_on_mn = self.panel_width;
                    }
                }
            }
        }
    }
}

#[inline(never)]
unsafe fn pack_mn_major<Chunk: Copy>(
    b: *const u8,
    packed: *mut u8,
    panel_len: usize,
    k_stride_bytes: isize,
    mn_range_bytes: Range<usize>,
    k_range: Range<usize>,
) {
    unsafe {
        let mnr = std::mem::size_of::<Chunk>();
        let full_panes = mn_range_bytes.len() / mnr;
        let partial_pane = mn_range_bytes.len() % mnr;
        for k in 0..k_range.len() {
            let mut p_row = packed.add(k * mnr);
            let mut b_row = b.offset(
                (k_range.start + k) as isize * k_stride_bytes + mn_range_bytes.start as isize,
            );
            for _ in 0..full_panes {
                p_row.copy_from_nonoverlapping(b_row, mnr);
                p_row = p_row.add(panel_len);
                b_row = b_row.add(mnr);
            }
            if partial_pane > 0 {
                p_row.copy_from_nonoverlapping(b_row, partial_pane);
            }
        }
    }
}

pub trait Packing {
    fn packing(r: usize) -> PackedFormat;
}

impl<D: Datum> Packing for D {
    fn packing(r: usize) -> PackedFormat {
        PackedFormat::new(Self::datum_type(), r, vector_size())
    }
}

#[cfg(test)]
mod test {
    use std::ops::Range;

    use proptest::prelude::*;
    use tract_data::internal::num_integer::Integer;
    use tract_data::internal::tract_ndarray::Zip;
    use tract_data::internal::*;
    use tract_ndarray::prelude::*;

    #[derive(Debug)]
    struct PackProblem {
        k: usize,
        mn: usize,
        is_a: bool,
        r: usize,
        k_range: Range<usize>,
        mn_range: Range<usize>,
        align_panel: usize,
    }

    impl PackProblem {
        fn input(&self) -> Array2<u32> {
            let shape = if self.is_a { (self.mn, self.k) } else { (self.k, self.mn) };
            let data = (0..(self.k * self.mn) as u32).collect();
            Array2::from_shape_vec(shape, data).unwrap()
        }

        fn packer(&self) -> Array2<u32> {
            let panels = self.mn_range.len().divceil(self.r);
            let packer = super::PackedFormat::new(u32::datum_type(), self.r, self.align_panel)
                .with_end_padding_record(0);
            let input = self.input().into_tensor();
            let panel_len = packer.single_panel_len(self.k_range.len());
            let mut output =
                Tensor::zero::<u32>(&[packer.len(self.k_range.len(), self.mn_range.len())])
                    .unwrap();
            unsafe {
                packer.pack_segment(
                    output.view_mut(),
                    input.view(),
                    self.is_a as usize,
                    !self.is_a as usize,
                    self.k_range.clone(),
                    self.mn_range.clone(),
                )
            };
            output
                .into_plain_array::<u32>()
                .unwrap()
                .into_shape_with_order((panels, panel_len))
                .unwrap()
        }

        fn reference(&self) -> Array2<u32> {
            let input = self.input();
            let panels = self.mn_range.len().divceil(self.r);
            let len = Integer::next_multiple_of(&(self.k_range.len() * self.r), &self.align_panel);
            Array2::from_shape_fn([panels, len], |(panel, z)| {
                let k = z / self.r;
                let x = z % self.r;
                let mn = panel * self.r + x + self.mn_range.start;
                let k = k + self.k_range.start;
                let coords = if self.is_a { (mn, k) } else { (k, mn) };
                *input.get(coords).unwrap_or(&0)
            })
        }

        fn valid(&self) -> Array2<bool> {
            let panels = self.mn_range.len().divceil(self.r);
            let len = Integer::next_multiple_of(&(self.k_range.len() * self.r), &self.align_panel);
            Array2::from_shape_fn([panels, len], |(panel, z)| {
                let k = z / self.r;
                let x = z % self.r;
                let k = k + self.k_range.start;
                let mn = panel * self.r + x + self.mn_range.start;
                k < self.k_range.end.min(self.k) && mn < self.mn_range.end.min(self.mn)
            })
        }

        fn check(&self) {
            let mut packer = self.packer();
            let mut reference = self.reference();
            let valid = self.valid();
            Zip::from(&mut packer).and(&valid).for_each(|p, v| *p = if *v { *p } else { -1 as _ });
            Zip::from(&mut reference)
                .and(&valid)
                .for_each(|p, v| *p = if *v { *p } else { -1 as _ });
            assert_eq!(packer, reference);
        }
    }

    impl Arbitrary for PackProblem {
        type Parameters = ();
        type Strategy = BoxedStrategy<PackProblem>;
        fn arbitrary_with(_args: ()) -> Self::Strategy {
            (any::<bool>(), 1usize..9, 1usize..20, 1usize..20)
                .prop_flat_map(|(is_a, r, k, mn)| {
                    (
                        Just((is_a, r, k, mn)),
                        sub_range_strat(0..k),
                        sub_range_strat(0..mn),
                        1usize..5,
                    )
                })
                .prop_map(|((is_a, r, k, mn), k_range, mn_range, align_panel)| PackProblem {
                    k,
                    mn,
                    is_a,
                    r,
                    k_range,
                    mn_range,
                    align_panel,
                })
                .boxed()
        }
    }

    fn sub_range_strat(range: Range<usize>) -> BoxedStrategy<Range<usize>> {
        (0..range.len())
            .prop_flat_map(|cropped| (Just(cropped), 0..=cropped))
            .prop_map(move |(cropped, left)| range.start + left..range.end - (cropped - left))
            .boxed()
    }

    proptest::proptest! {
        #[test]
        fn prop(pb in any::<PackProblem>()) {
            pb.check();
        }

        #[test]
        fn subrange_prop(_range in sub_range_strat(0..20)) {
        }

    }

    #[test]
    fn simple_b_1() {
        PackProblem {
            k: 2,
            mn: 1,
            is_a: false,
            r: 1,
            k_range: 0..2,
            mn_range: 0..1,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn simple_b_2() {
        PackProblem {
            k: 2,
            mn: 2,
            is_a: false,
            r: 1,
            k_range: 0..2,
            mn_range: 0..2,
            align_panel: 1,
        }
        .check()
    }

    #[test]
    fn simple_b_3() {
        PackProblem {
            k: 2,
            mn: 1,
            is_a: false,
            r: 4,
            k_range: 0..2,
            mn_range: 0..1,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn simple_b_4() {
        PackProblem {
            k: 1,
            mn: 3,
            is_a: false,
            r: 2,
            k_range: 0..1,
            mn_range: 0..3,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn simple_a_1() {
        PackProblem {
            k: 2,
            mn: 2,
            is_a: true,
            r: 1,
            k_range: 0..2,
            mn_range: 0..2,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn simple_a_2() {
        PackProblem {
            k: 2,
            mn: 3,
            is_a: true,
            r: 2,
            k_range: 0..2,
            mn_range: 0..3,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn range_k_0() {
        PackProblem {
            k: 2,
            mn: 1,
            is_a: false,
            r: 1,
            k_range: 1..2,
            mn_range: 0..1,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn range_k_1() {
        PackProblem {
            k: 2,
            mn: 2,
            is_a: false,
            r: 1,
            k_range: 0..2,
            mn_range: 0..1,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn range_k_2() {
        PackProblem {
            k: 2,
            mn: 1,
            is_a: false,
            r: 6,
            k_range: 1..2,
            mn_range: 0..1,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn range_mn_0() {
        PackProblem {
            k: 1,
            mn: 2,
            is_a: false,
            r: 2,
            k_range: 0..1,
            mn_range: 0..1,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn range_b_4() {
        PackProblem {
            k: 1,
            mn: 2,
            is_a: false,
            r: 6,
            k_range: 0..1,
            mn_range: 1..2,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn range_b_5() {
        PackProblem {
            k: 1,
            mn: 7,
            is_a: false,
            r: 6,
            k_range: 0..1,
            mn_range: 1..7,
            align_panel: 1,
        }
        .check();
    }

    #[test]
    fn align_a_1() {
        PackProblem {
            k: 2,
            mn: 2,
            is_a: true,
            r: 1,
            k_range: 0..1,
            mn_range: 0..2,
            align_panel: 2,
        }
        .check();
    }

    #[test]
    fn align_b_1() {
        PackProblem {
            k: 1,
            mn: 1,
            is_a: false,
            r: 1,
            k_range: 0..1,
            mn_range: 0..1,
            align_panel: 2,
        }
        .check();
    }

    #[test]
    fn align_b_2() {
        PackProblem {
            k: 3,
            mn: 1,
            is_a: false,
            r: 1,
            k_range: 0..3,
            mn_range: 0..1,
            align_panel: 2,
        }
        .check();
    }

    #[test]
    fn align_b_3() {
        PackProblem {
            k: 1,
            mn: 1,
            is_a: false,
            r: 3,
            k_range: 0..1,
            mn_range: 0..1,
            align_panel: 2,
        }
        .check();
    }

    #[test]
    fn align_b_4() {
        PackProblem {
            k: 2,
            mn: 1,
            is_a: false,
            r: 1,
            k_range: 0..1,
            mn_range: 0..1,
            align_panel: 2,
        }
        .check();
    }

    #[test]
    fn align_b_5() {
        PackProblem {
            k: 1,
            mn: 5,
            is_a: false,
            r: 4,
            k_range: 0..1,
            mn_range: 0..5,
            align_panel: 3,
        }
        .check();
    }
}


================================================
FILE: linalg/src/frame/reduce/max.rs
================================================
#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::LADatum;
    use crate::frame::reduce::ReduceKer;
    use num_traits::{AsPrimitive, Float};
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! max_frame_tests {
        ($cond:expr, $t: ty, $ker:ty) => {
            proptest::proptest! {
                #[test]
                fn prop(xs in proptest::collection::vec(-25f32..25.0, 0..100)) {
                    if $cond {
                        $crate::frame::reduce::max::test::test_max::<$ker, $t>(&*xs).unwrap()
                    }
                }
            }

            #[test]
            fn empty() {
                if $cond {
                    $crate::frame::reduce::max::test::test_max::<$ker, $t>(&[]).unwrap()
                }
            }
        };
    }

    pub fn test_max<K: ReduceKer<T>, T: LADatum + Float>(values: &[f32]) -> TestCaseResult
    where
        f32: AsPrimitive<T>,
    {
        crate::setup_test_logger();
        let values: Vec<T> = values.iter().copied().map(|x| x.as_()).collect();
        crate::frame::reduce::test::test_reduce::<K, _>(
            &values,
            <T as Float>::min_value(),
            |a, b| a.max(b),
        )
    }
}


================================================
FILE: linalg/src/frame/reduce/mod.rs
================================================
pub mod max;
pub mod softmax;
pub mod sum;

use std::fmt::Debug;
use std::marker::PhantomData;

use tract_data::TractResult;

use crate::LADatum;

use super::element_wise_helper::{map_reduce_slice_with_alignment, reduce_slice_with_alignment};

macro_rules! reduce_impl_wrap {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $params: ty, $neutral: expr, $run: item, $reduce_two: item) => {
        paste! {
            #[derive(Copy, Clone, Debug)]
            #[allow(non_camel_case_types)]
            pub struct $func;

            impl crate::frame::reduce::ReduceKer<$ti, $params> for $func {
                #[inline(always)]
                fn name() -> &'static str {
                    stringify!($func)
                }
                #[inline(always)]
                fn nr() -> usize {
                    $nr
                }
                #[inline(always)]
                fn alignment_items() -> usize {
                    $alignment_items
                }
                #[inline(always)]
                fn alignment_bytes() -> usize {
                    $alignment_items * std::mem::size_of::<$ti>()
                }
                #[inline(always)]
                fn neutral() -> $ti {
                    $neutral
                }
                $run
                $reduce_two
            }
        }
    };
}

pub trait Reduce<T, Params = ()>: Send + Sync + Debug + dyn_clone::DynClone
where
    Params: Copy + Send + Sync + Debug + 'static + Default,
    T: Copy + Debug + PartialEq + Send + Sync,
{
    fn name(&self) -> &'static str;
    fn run(&self, vec: &[T]) -> TractResult<T> {
        self.run_with_params(vec, Params::default())
    }
    fn run_with_params(&self, vec: &[T], params: Params) -> TractResult<T>;
}

dyn_clone::clone_trait_object!(<T, Params> Reduce<T, Params> where T: Copy, Params: Copy);

#[derive(Debug, Clone, new)]
pub struct ReduceImpl<K, T, Params = ()>
where
    T: LADatum,
    Params: Copy + Send + Sync + Debug + 'static + Default,
    K: ReduceKer<T, Params> + Clone,
{
    phantom: PhantomData<(K, T, Params)>,
}

impl<K, T, Params> Reduce<T, Params> for ReduceImpl<K, T, Params>
where
    T: LADatum,
    Params: Copy + Send + Sync + Debug + 'static + Default,
    K: ReduceKer<T, Params> + Clone,
{
    fn name(&self) -> &'static str {
        K::name()
    }

    fn run_with_params(&self, vec: &[T], params: Params) -> TractResult<T> {
        reduce_slice_with_alignment(
            vec,
            |data| K::run(data, params),
            K::nr(),
            K::alignment_bytes(),
            K::neutral(),
            K::reduce_two,
        )
    }
}

pub trait ReduceKer<T, Params = ()>:
    Send + Sync + Debug + dyn_clone::DynClone + Clone + 'static
where
    Params: Copy + Send + Sync + Debug + 'static + Default,
    T: LADatum,
{
    fn name() -> &'static str;
    fn alignment_bytes() -> usize {
        Self::alignment_items() * T::datum_type().size_of()
    }
    fn alignment_items() -> usize;
    fn nr() -> usize;
    fn neutral() -> T;
    fn reduce_two(a: T, b: T) -> T;
    fn run(vec: &[T], params: Params) -> T;
    fn red() -> Box<dyn Reduce<T, Params>> {
        Box::new(ReduceImpl::<Self, T, Params>::new())
    }
}

#[allow(unused_macros)]
macro_rules! map_reduce_impl_wrap {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $params: ty, $map_neutral: expr, $reduce_neutral: expr, $run: item, $reduce_two: item) => {
        paste! {
            #[derive(Copy, Clone, Debug)]
            #[allow(non_camel_case_types)]
            pub struct $func;

            impl crate::frame::reduce::MapReduceKer<$ti, $params> for $func {
                #[inline(always)]
                fn name() -> &'static str {
                    stringify!($func)
                }
                #[inline(always)]
                fn nr() -> usize {
                    $nr
                }
                #[inline(always)]
                fn alignment_items() -> usize {
                    $alignment_items
                }
                #[inline(always)]
                fn alignment_bytes() -> usize {
                    $alignment_items * std::mem::size_of::<$ti>()
                }
                #[inline(always)]
                fn map_neutral() -> $ti {
                    $map_neutral
                }
                #[inline(always)]
                fn reduce_neutral() -> $ti {
                    $reduce_neutral
                }
                $run
                $reduce_two
            }
        }
    };
}

pub trait MapReduce<T, Params = ()>: Send + Sync + Debug + dyn_clone::DynClone
where
    Params: Copy + Send + Sync + Debug + 'static + Default,
    T: Copy + Debug + PartialEq + Send + Sync,
{
    fn name(&self) -> &'static str;
    fn run(&self, vec: &mut [T]) -> TractResult<T> {
        self.run_with_params(vec, Params::default())
    }
    fn run_with_params(&self, vec: &mut [T], params: Params) -> TractResult<T>;
}

dyn_clone::clone_trait_object!(<T, Params> MapReduce<T, Params> where T: Copy, Params: Copy);

#[derive(Debug, Clone, new)]
pub struct MapReduceImpl<K, T, Params = ()>
where
    T: LADatum,
    Params: Copy + Send + Sync + Debug + 'static + Default,
    K: MapReduceKer<T, Params> + Clone,
{
    phantom: PhantomData<(K, T, Params)>,
}

impl<K, T, Params> MapReduce<T, Params> for MapReduceImpl<K, T, Params>
where
    T: LADatum,
    Params: Copy + Send + Sync + Debug + 'static + Default,
    K: MapReduceKer<T, Params> + Clone,
{
    fn name(&self) -> &'static str {
        K::name()
    }
    fn run_with_params(&self, vec: &mut [T], params: Params) -> TractResult<T> {
        map_reduce_slice_with_alignment(
            vec,
            |data| K::run(data, params),
            K::nr(),
            K::alignment_bytes(),
            K::map_neutral(),
            K::reduce_neutral(),
            K::reduce_two,
        )
    }
}

pub trait MapReduceKer<T, Params = ()>:
    Send + Sync + Debug + dyn_clone::DynClone + Clone + 'static
where
    Params: Copy + Send + Sync + Debug + 'static + Default,
    T: LADatum,
{
    fn name() -> &'static str;
    fn alignment_bytes() -> usize {
        Self::alignment_items() * T::datum_type().size_of()
    }
    fn alignment_items() -> usize;
    fn nr() -> usize;
    fn map_neutral() -> T;
    fn reduce_neutral() -> T;
    fn reduce_two(a: T, b: T) -> T;
    fn run(vec: &mut [T], params: Params) -> T;
    fn red() -> Box<dyn MapReduce<T, Params>> {
        Box::new(MapReduceImpl::<Self, T, Params>::new())
    }
}

#[cfg(test)]
pub mod test {
    use super::*;
    use proptest::test_runner::{TestCaseError, TestCaseResult};
    use tract_data::internal::*;
    use tract_data::itertools::Itertools;

    pub fn test_reduce<K: ReduceKer<T, ()>, T: LADatum>(
        values: &[T],
        neutral: T,
        reference_reduce: impl Fn(T, T) -> T,
    ) -> TestCaseResult {
        test_reduce_params::<K, T, ()>(values, neutral, reference_reduce, ())
    }

    pub fn test_reduce_params<K: ReduceKer<T, Params>, T: LADatum, Params>(
        values: &[T],
        neutral: T,
        reference_reducer: impl Fn(T, T) -> T,
        params: Params,
    ) -> TestCaseResult
    where
        Params: Copy + Send + Sync + Debug + 'static + Default,
    {
        crate::setup_test_logger();
        let op = K::red();
        let expected = values.iter().fold(neutral, |acc, i| reference_reducer(acc, *i));
        let found = values;
        let red = op.run_with_params(found, params).unwrap();
        tensor0(red)
            .close_enough(&tensor0(expected), true)
            .map_err(|e| TestCaseError::fail(e.root_cause().to_string()))?;
        Ok(())
    }

    pub fn test_map_reduce<K: MapReduceKer<T, ()>, T: LADatum>(
        values: &[T],
        map_neutral: T,
        neutral: T,
        reference_map: impl Fn(T) -> T,
        reference_reduce: impl Fn(T, T) -> T,
    ) -> TestCaseResult {
        test_map_reduce_params::<K, T, ()>(
            values,
            map_neutral,
            neutral,
            reference_map,
            reference_reduce,
            (),
        )
    }

    pub fn test_map_reduce_params<K: MapReduceKer<T, Params>, T: LADatum, Params>(
        values: &[T],
        _neutral: T,
        map_neutral: T,
        reference_map: impl Fn(T) -> T,
        reference_reducer: impl Fn(T, T) -> T,
        params: Params,
    ) -> TestCaseResult
    where
        Params: Copy + Send + Sync + Debug + 'static + Default,
    {
        crate::setup_test_logger();
        let op = K::red();
        let mut found = values.to_vec();
        let expected_values = values.iter().copied().map(reference_map).collect_vec();
        let expected_reduced =
            expected_values.iter().fold(map_neutral, |acc, i| reference_reducer(acc, *i));
        let red = op.run_with_params(&mut found, params).unwrap();
        tensor1(&found)
            .close_enough(&tensor1(&expected_values), Approximation::SuperApproximate)
            .map_err(|e| TestCaseError::fail(e.root_cause().to_string()))?;
        tensor0(red)
            .close_enough(&tensor0(expected_reduced), Approximation::SuperApproximate)
            .map_err(|e| TestCaseError::fail(e.root_cause().to_string()))?;
        Ok(())
    }
}


================================================
FILE: linalg/src/frame/reduce/softmax.rs
================================================
#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::LADatum;
    use crate::frame::reduce::MapReduceKer;
    use num_traits::{AsPrimitive, Float};
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! softmax_l2_frame_tests {
        ($cond:expr, $t: ty, $ker:ty) => {
            proptest::proptest! {
                #[test]
                fn prop(xs in proptest::collection::vec(-25f32..25.0, 1..100)) {
                    if $cond {
                        $crate::frame::reduce::softmax::test::test_softmax_l2::<$ker, $t>(&*xs).unwrap()
                    }
                }
            }

            #[test]
            fn single() {
                if $cond {
                    $crate::frame::reduce::softmax::test::test_softmax_l2::<$ker, $t>(&[0.0]).unwrap()
                }
            }

            #[test]
            fn two_zeros() {
                if $cond {
                    $crate::frame::reduce::softmax::test::test_softmax_l2::<$ker, $t>(&[0.0, 0.0]).unwrap()
                }
            }

            #[test]
            fn two_0() {
                if $cond {
                    $crate::frame::reduce::softmax::test::test_softmax_l2::<$ker, $t>(&[
                        16.62555, 21.950674,
                    ])
                    .unwrap()
                }
            }

            #[test]
            fn two_1() {
                if $cond {
                    $crate::frame::reduce::softmax::test::test_softmax_l2::<$ker, $t>(&[0.0f32, 0.38132212])
                        .unwrap()
                }
            }

            #[test]
            fn two_missing_max() {
                if $cond {
                    $crate::frame::reduce::softmax::test::test_softmax_l2::<$ker, $t>(&[
                        -46.15512, 42.875168,
                    ])
                    .unwrap()
                }
            }
        };
    }

    pub fn test_softmax_l2<K: MapReduceKer<T, T>, T>(values: &[f32]) -> TestCaseResult
    where
        T: LADatum + Float + AsPrimitive<f32>,
        f32: AsPrimitive<T>,
    {
        use crate::generic::reduce::softmax_l2::fast_compact_exp_f32;
        crate::setup_test_logger();
        let max = values.iter().max_by(|a, b| a.total_cmp(b)).unwrap();
        let values: Vec<T> = values.iter().copied().map(|x| x.as_()).collect();
        crate::frame::reduce::test::test_map_reduce_params::<K, T, T>(
            &values,
            <T as Float>::min_value(),
            T::zero(),
            //            |x| (x - max.as_()).exp(),
            |x| fast_compact_exp_f32(x.as_() - max).as_(),
            |a, b| a + b,
            max.as_(),
        )
    }
}


================================================
FILE: linalg/src/frame/reduce/sum.rs
================================================
#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::LADatum;
    use crate::frame::reduce::ReduceKer;
    use num_traits::{AsPrimitive, Float, Zero};
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! sum_frame_tests {
        ($cond:expr, $t: ty, $ker:ty) => {
            proptest::proptest! {
                #[test]
                fn prop(xs in proptest::collection::vec(-25_isize..25, 0..100)) {
                    if $cond {
                        let xs_float = xs.into_iter().map(|it| it as f32).collect::<Vec<_>>();
                        $crate::frame::reduce::sum::test::test_sum::<$ker, $t>(&*xs_float).unwrap()
                    }
                }
            }

            #[test]
            fn empty() {
                if $cond {
                    $crate::frame::reduce::sum::test::test_sum::<$ker, $t>(&[]).unwrap()
                }
            }

            #[test]
            fn simple() {
                if $cond {
                    $crate::frame::reduce::sum::test::test_sum::<$ker, $t>(&[1.0, 2.0]).unwrap()
                }
            }
            #[test]
            fn multiple_tile() {
                if $cond {
                    $crate::frame::reduce::sum::test::test_sum::<$ker, $t>(&[1.0; 35]).unwrap()
                }
            }
        };
    }

    pub fn test_sum<K, T>(values: &[f32]) -> TestCaseResult
    where
        K: ReduceKer<T>,
        f32: AsPrimitive<T>,
        T: LADatum + Float + Zero + AsPrimitive<f32>,
    {
        crate::setup_test_logger();
        let values: Vec<T> = values.iter().copied().map(|x| x.as_()).collect();
        crate::frame::reduce::test::test_reduce::<K, _>(&values, <T as Zero>::zero(), |a, b| a + b)
    }
}


================================================
FILE: linalg/src/frame/sigmoid.rs
================================================
macro_rules! sigmoid_impl {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $cond: expr) => {
        ew_impl!($ti, $func, $nr, $alignment_items);
        #[cfg(test)]
        paste! {
            mod [<test_ $func>] {
                use super::*;
                sigmoid_frame_tests!($cond, $ti, $func);
            }
        }
    };
}

#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::{LADatum, frame::element_wise::*};
    use num_traits::{AsPrimitive, Float};
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! sigmoid_frame_tests {
        ($cond:expr, $t: ty, $ker:ty) => {
            proptest::proptest! {
                #[test]
                fn sigmoid(xs in proptest::collection::vec(-25f32..25.0, 0..100)) {
                    if $cond {
                        $crate::frame::sigmoid::test::test_sigmoid::<$ker, $t>(&*xs).unwrap()
                    }
                }
            }

            #[test]
            fn sigmoid_4_magic() {
                if $cond {
                    $crate::frame::sigmoid::test::test_sigmoid::<$ker, $t>(&[
                        0f32, -20.0, 20.0, 0.0,
                    ])
                    .unwrap()
                }
            }

            #[test]
            fn sigmoid_4zeros() {
                if $cond {
                    $crate::frame::sigmoid::test::test_sigmoid::<$ker, $t>(&[0.0; 4]).unwrap();
                }
            }

            #[test]
            fn sigmoid_20_ones() {
                if $cond {
                    $crate::frame::sigmoid::test::test_sigmoid::<$ker, $t>(&[1.0; 20]).unwrap();
                }
            }

            #[test]
            fn sigmoid_18_zeros() {
                if $cond {
                    $crate::frame::sigmoid::test::test_sigmoid::<$ker, $t>(&[0.0; 18]).unwrap();
                }
            }

            #[test]
            fn sigmoid_asymptots() {
                use tract_data::internal::*;
                use $crate::frame::element_wise::*;
                if $cond {
                    let mut input: Vec<$t> = [-100f32, 100f32]
                        .iter()
                        .map(|x| <f32 as num_traits::AsPrimitive<$t>>::as_(*x))
                        .collect();
                    let expected: Vec<$t> = [-0f32, 1f32]
                        .iter()
                        .map(|x| <f32 as num_traits::AsPrimitive<$t>>::as_(*x))
                        .collect();
                    <$ker>::ew().run(&mut input).unwrap();
                    tensor1(&input)
                        .close_enough(&tensor1(&expected), Approximation::Close)
                        .unwrap();
                }
            }
        };
    }

    pub fn test_sigmoid<K: ElementWiseKer<T>, T: LADatum + Float>(values: &[f32]) -> TestCaseResult
    where
        f32: AsPrimitive<T>,
    {
        crate::setup_test_logger();
        let values: Vec<T> = values.iter().copied().map(|x| x.as_()).collect();
        crate::frame::element_wise::test::test_element_wise::<K, _, _>(&values, |x| {
            (1f32).as_() / (1f32.as_() + (-x).exp())
        })
    }
}


================================================
FILE: linalg/src/frame/tanh.rs
================================================
macro_rules! tanh_impl {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $cond: expr) => {
        ew_impl!($ti, $func, $nr, $alignment_items);
        #[cfg(test)]
        paste! {
            mod [<test_ $func>] {
                use super::*;
                tanh_frame_tests!($cond, $ti, $func);
            }
        }
    };
}

#[cfg(test)]
#[macro_use]
pub mod test {
    use crate::LADatum;
    use crate::frame::element_wise::*;
    use num_traits::AsPrimitive;
    use num_traits::float::Float;
    use proptest::test_runner::TestCaseResult;

    #[macro_export]
    macro_rules! tanh_frame_tests {
        ($cond:expr, $t:ty, $ker:ty) => {
            proptest::proptest! {
                #[test]
                fn tanh(xs in proptest::collection::vec(-25f32..25.0, 0..100)) {
                    if $cond {
                        $crate::frame::tanh::test::test_tanh::<$ker, $t>(&*xs).unwrap()
                    }
                }
            }

            #[test]
            fn tanh_4_magic() {
                if $cond {
                    $crate::frame::tanh::test::test_tanh::<$ker, $t>(&[0f32, -20.0, 20.0, 0.0])
                        .unwrap()
                }
            }

            #[test]
            fn tanh_4zeros() {
                if $cond {
                    $crate::frame::tanh::test::test_tanh::<$ker, $t>(&[0.0; 4]).unwrap();
                }
            }

            #[test]
            fn tanh_20_ones() {
                if $cond {
                    $crate::frame::tanh::test::test_tanh::<$ker, $t>(&[1.0; 20]).unwrap();
                }
            }

            #[test]
            fn tanh_18_zeros() {
                if $cond {
                    $crate::frame::tanh::test::test_tanh::<$ker, $t>(&[0.0; 18]).unwrap();
                }
            }

            #[test]
            fn tanh_foo() {
                if $cond {
                    $crate::frame::tanh::test::test_tanh::<$ker, $t>(&[0.67503357]).unwrap();
                }
            }

            #[test]
            fn tanh_asymptots() {
                use tract_data::internal::*;
                use $crate::frame::element_wise::*;
                if $cond {
                    let mut input: Vec<$t> = [-100f32, 100f32]
                        .iter()
                        .map(|x| <f32 as num_traits::AsPrimitive<$t>>::as_(*x))
                        .collect();
                    let expected: Vec<$t> = [-1f32, 1f32]
                        .iter()
                        .map(|x| <f32 as num_traits::AsPrimitive<$t>>::as_(*x))
                        .collect();
                    <$ker>::ew().run(&mut input).unwrap();
                    tensor1(&input)
                        .close_enough(&tensor1(&expected), Approximation::Close)
                        .unwrap();
                }
            }
        };
    }

    pub fn test_tanh<K: ElementWiseKer<T>, T: LADatum + Float>(values: &[f32]) -> TestCaseResult
    where
        f32: AsPrimitive<T>,
    {
        crate::setup_test_logger();
        let values: Vec<T> = values.iter().copied().map(|x| x.as_()).collect();
        crate::frame::element_wise::test::test_element_wise::<K, _, _>(&values, |x| x.tanh())
    }
}


================================================
FILE: linalg/src/frame/unicast.rs
================================================
use std::fmt::Debug;
use std::marker::PhantomData;

use tract_data::TractResult;
use tract_data::internal::TensorView;

use crate::frame::element_wise_helper::TempBuffer;
use crate::{LADatum, LinalgFn};

macro_rules! unicast_impl_wrap {
    ($ti: ident, $func: ident, $nr: expr, $alignment_items: expr, $run: item) => {
        paste! {
            #[derive(Copy, Clone, Debug)]
            #[allow(non_camel_case_types)]
            pub struct $func;

            impl crate::frame::unicast::UnicastKer<$ti> for $func {
                #[inline(always)]
                fn name() -> &'static str {
                    stringify!($func)
                }
                #[inline(always)]
                fn nr() -> usize {
                    $nr
                }
                #[inline(always)]
                fn alignment_items() -> usize {
                    $alignment_items
                }
                $run
            }
        }
    };
}

pub trait Unicast<T>: Send + Sync + Debug + dyn_clone::DynClone
where
    T: Copy + Debug + PartialEq + Send + Sync,
{
    fn name(&self) -> &'static str;
    fn run(&self, a: &mut [T], b: &[T]) -> TractResult<()>;
}

dyn_clone::clone_trait_object!(<T> Unicast<T> where T: Copy);

#[derive(Debug, Clone, new)]
pub struct UnicastImpl<K, T>
where
    T: LADatum,
    K: UnicastKer<T> + Clone,
{
    phantom: PhantomData<(K, T)>,
}

impl<K, T> UnicastImpl<K, T>
where
    T: LADatum,
    K: UnicastKer<T> + Clone,
{
}
impl<K, T> Unicast<T> for UnicastImpl<K, T>
where
    T: LADatum,
    K: UnicastKer<T> + Clone,
{
    fn name(&self) -> &'static str {
        K::name()
    }
    fn run(&self, a: &mut [T], b: &[T]) -> TractResult<()> {
        unicast_with_alignment(a, b, |a, b| K::run(a, b), K::nr(), K::alignment_bytes())
    }
}

pub trait UnicastKer<T>: Send + Sync + Debug + dyn_clone::DynClone + Clone + 'static
where
    T: LADatum,
{
    fn name() -> &'static str;
    fn alignment_bytes() -> usize {
        Self::alignment_items() * T::datum_type().size_of()
    }
    fn alignment_items() -> usize;
    fn nr() -> usize;
    fn run(a: &mut [T], b: &[T]);
    fn bin() -> Box<LinalgFn> {
        Box::new(|a: &mut TensorView, b: &TensorView| {
            let a_slice = a.as_slice_mut()?;
            let b_slice = b.as_slice()?;
            UnicastImpl::<Self, T>::new().run(a_slice, b_slice)
        })
    }
}

std::thread_local! {
    static TMP: std::cell::RefCell<(TempBuffer, TempBuffer)> = std::cell::RefCell::new((TempBuffer::default(), TempBuffer::default()));
}

pub(crate) fn unicast_with_alignment<T>(
    a: &mut [T],
    b: &[T],
    f: impl Fn(&mut [T], &[T]),
    nr: usize,
    alignment_bytes: usize,
) -> TractResult<()>
where
    T: LADatum,
{
    if a.is_empty() {
        return Ok(());
    }
    unsafe {
        TMP.with(|buffers| {
            let mut buffers = buffers.borrow_mut();
            buffers.0.ensure(nr * T::datum_type().size_of(), alignment_bytes);
            buffers.1.ensure(nr * T::datum_type().size_of(), alignment_bytes);
            let tmp_a = std::slice::from_raw_parts_mut(buffers.0.buffer as *mut T, nr);
            let tmp_b = std::slice::from_raw_parts_mut(buffers.1.buffer as *mut T, nr);
            let mut compute_via_temp_buffer = |a: &mut [T], b: &[T]| {
                tmp_a[..a.len()].copy_from_slice(a);
                tmp_b[..b.len()].copy_from_slice(b);
                f(tmp_a, tmp_b);
                a.copy_from_slice(&tmp_a[..a.len()])
            };

            let mut num_element_processed = 0;
            let a_prefix_len = a.as_ptr().align_offset(alignment_bytes).min(a.len());
            let b_prefix_len = b.as_ptr().align_offset(alignment_bytes).min(b.len());
            assert!(
                a_prefix_len == b_prefix_len,
                "Both inputs should be of the same alignement, got {a_prefix_len:?}, {b_prefix_len:?}"
            );
            let mut applied_prefix_len = 0;
            if a_prefix_len > 0 {
                // Incomplete tile needs to be created to process unaligned data.
                let sub_a = &mut a[..a_prefix_len];
                let sub_b = &b[..a_prefix_len];
                compute_via_temp_buffer(sub_a, sub_b);
                num_element_processed += a_prefix_len;
                applied_prefix_len = a_prefix_len;
            }

            let num_complete_tiles = (a.len() - applied_prefix_len) / nr;
            if num_complete_tiles > 0 {
                // Process all tiles that are complete.
                let sub_a = &mut a[applied_prefix_len..][..(num_complete_tiles * nr)];
                let sub_b = &b[applied_prefix_len..][..(num_complete_tiles * nr)];
                f(sub_a, sub_b);
                num_element_processed += num_complete_tiles * nr;
            }

            if num_element_processed < a.len() {
                // Incomplete tile needs to be created to process remaining elements.
                compute_via_temp_buffer(
                    &mut a[num_element_processed..],
                    &b[num_element_processed..],
                );
            }
        })
    }
    Ok(())
}

#[cfg(test)]
#[macro_use]
pub mod test {
    use super::*;
    use crate::LADatum;
    use proptest::test_runner::{TestCaseError, TestCaseResult};
    use tract_data::internal::*;
    use tract_num_traits::{AsPrimitive, Float};

    pub fn test_unicast<K: UnicastKer<T>, T: LADatum>(
        a: &mut [T],
        b: &[T],
        reference: impl Fn(T, T) -> T,
    ) -> TestCaseResult {
        crate::setup_test_logger();
        let op = UnicastImpl::<K, T>::new();
        let expected = a.iter().zip(b.iter()).map(|(a, b)| (reference)(*a, *b)).collect::<Vec<_>>();
        op.run(a, b).unwrap();
        tensor1(a)
            .close_enough(&tensor1(&expected), true)
            .map_err(|e| TestCaseError::fail(e.root_cause().to_string()))?;
        Ok(())
    }

    pub fn test_unicast_t<K: UnicastKer<T>, T: LADatum + Float>(
        a: &[f32],
        b: &[f32],
        func: impl Fn(T, T) -> T,
    ) -> TestCaseResult
    where
        f32: AsPrimitive<T>,
    {
        crate::setup_test_logger();
        let vec_a: Vec<T> = a.iter().copied().map(|x| x.as_()).collect();
        // We allocate a tensor to ensure allocation is done with alignement
        let mut a = unsafe { Tensor::from_slice_align(vec_a.as_slice(), vector_size()).unwrap() };
        let vec_b: Vec<T> = b.iter().copied().map(|x| x.as_()).collect();
        // We allocate a tensor to ensure allocation is done with alignement
        let b = unsafe { Tensor::from_slice_align(vec_b.as_slice(), vector_size()).unwrap() };
        crate::frame::unicast::test::test_unicast::<K, _>(
            a.try_as_plain_mut().unwrap().as_slice_mut::<T>().unwrap(),
            b.try_as_plain().unwrap().as_slice::<T>().unwrap(),
            func,
        )
    }

    #[macro_export]
    macro_rules! unicast_frame_tests {
        ($cond:expr, $t: ty, $ker:ty, $func:expr) => {
            pastey::paste! {
                proptest::proptest! {
                    #[test]
                    fn [<prop_ $ker:snake>](
                        (a, b) in (0..100_usize).prop_flat_map(|len| (vec![-25f32..25.0; len], vec![-25f32..25.0; len]))
                    ) {
                        if $cond {
                            $crate::frame::unicast::test::test_unicast_t::<$ker, $t>(&*a, &*b, $func).unwrap()
                        }
                    }
                }

                #[test]
                fn [<empty_ $ker:snake>]() {
                    if $cond {
                        $crate::frame::unicast::test::test_unicast_t::<$ker, $t>(&[], &[], $func).unwrap()
                    }
                }
            }
        };
    }
}


================================================
FILE: linalg/src/frame/weights.rs
================================================
use std::fmt::Debug;
use tract_data::prelude::DatumType;

use crate::block_quant::{BlockQuant, PackedBlockQuantFormat};

use crate::mmm::MMMInputFormat;
use crate::pack::PackedFormat;

#[derive(Clone)]
pub enum WeightType {
    Plain(DatumType),
    BlockQuant(Box<dyn BlockQuant>),
}

impl From<DatumType> for WeightType {
    fn from(value: DatumType) -> Self {
        match value {
            DatumType::F16 => WeightType::Plain(DatumType::F16),
            DatumType::F32 => WeightType::Plain(DatumType::F32),
            DatumType::F64 => WeightType::Plain(DatumType::F64),
            DatumType::I32 => WeightType::Plain(DatumType::I32),
            DatumType::I8 | DatumType::QI8(_) => WeightType::Plain(DatumType::I8),
            DatumType::U8 | DatumType::QU8(_) => WeightType::Plain(DatumType::U8),
            _ => panic!("Can't build a WeightType from {value:?}"),
        }
    }
}

impl From<Box<dyn MMMInputFormat>> for WeightType {
    fn from(value: Box<dyn MMMInputFormat>) -> Self {
        (&*value).into()
    }
}

impl From<&dyn MMMInputFormat> for WeightType {
    fn from(value: &dyn MMMInputFormat) -> Self {
        if let Some(pf) = value.downcast_ref::<PackedFormat>() {
            WeightType::Plain(pf.dt)
        } else if let Some(pbqf) = value.downcast_ref::<PackedBlockQuantFormat>() {
            WeightType::BlockQuant(dyn_clone::clone_box(&*pbqf.bq))
        } else {
            todo!()
        }
    }
}

impl PartialEq for WeightType {
    fn eq(&self, other: &Self) -> bool {
        use WeightType::*;
        match (self, other) {
            (Plain(a), Plain(b)) => a == b,
            (BlockQuant(a), BlockQuant(b)) => a == b,
            _ => false,
        }
    }
}

impl<BQ: BlockQuant> From<BQ> for WeightType {
    fn from(value: BQ) -> Self {
        WeightType::BlockQuant(dyn_clone::clone_box(&value))
    }
}

impl Debug for WeightType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Plain(p) => write!(f, "{p:?}"),
            Self::BlockQuant(bq) => write!(f, "{bq:?}"),
        }
    }
}

impl WeightType {
    pub fn as_dt(&self) -> Option<DatumType> {
        match self {
            WeightType::Plain(dt) => Some(*dt),
            _ => None,
        }
    }
}


================================================
FILE: linalg/src/generic/by_scalar.rs
================================================
use tract_data::internal::f16;

by_scalar_impl_wrap!(
    f32,
    SMulByScalar4,
    4,
    4,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px *= s)
    }
);

by_scalar_impl_wrap!(
    f32,
    SAddByScalar4,
    4,
    4,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px += s)
    }
);

by_scalar_impl_wrap!(
    f32,
    SSubByScalar4,
    4,
    4,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px -= s)
    }
);

by_scalar_impl_wrap!(
    f32,
    SSubFByScalar4,
    4,
    4,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = s - *px)
    }
);

by_scalar_impl_wrap!(
    f32,
    SMinByScalar4,
    4,
    4,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = px.min(s))
    }
);

by_scalar_impl_wrap!(
    f32,
    SMaxByScalar4,
    4,
    4,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = px.max(s))
    }
);

#[cfg(test)]
#[macro_use]
pub mod mul_by_scalar_f32 {
    use super::*;
    by_scalar_frame_tests!(true, f32, SMulByScalar4, |a, b| a * b);
    by_scalar_frame_tests!(true, f32, SAddByScalar4, |a, b| a + b);
    by_scalar_frame_tests!(true, f32, SSubByScalar4, |a, b| a - b);
    by_scalar_frame_tests!(true, f32, SSubFByScalar4, |a, b| b - a);
    by_scalar_frame_tests!(true, f32, SMinByScalar4, |a, b| a.min(b));
    by_scalar_frame_tests!(true, f32, SMaxByScalar4, |a, b| a.max(b));
}

by_scalar_impl_wrap!(
    f16,
    HMulByScalar8,
    8,
    8,
    f16,
    fn run(x: &mut [f16], s: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px *= s)
    }
);

by_scalar_impl_wrap!(
    f16,
    HAddByScalar8,
    8,
    8,
    f16,
    fn run(x: &mut [f16], s: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px += s)
    }
);

by_scalar_impl_wrap!(
    f16,
    HSubByScalar8,
    8,
    8,
    f16,
    fn run(x: &mut [f16], s: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px -= s)
    }
);

by_scalar_impl_wrap!(
    f16,
    HSubFByScalar8,
    8,
    8,
    f16,
    fn run(x: &mut [f16], s: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = s - *px)
    }
);

by_scalar_impl_wrap!(
    f16,
    HMinByScalar8,
    8,
    8,
    f16,
    fn run(x: &mut [f16], s: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = px.min(s))
    }
);

by_scalar_impl_wrap!(
    f16,
    HMaxByScalar8,
    8,
    8,
    f16,
    fn run(x: &mut [f16], s: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = px.max(s))
    }
);

#[cfg(test)]
#[macro_use]
pub mod mul_by_scalar_f16 {
    use super::*;
    by_scalar_frame_tests!(true, f16, HMulByScalar8, |a, b| a * b);
    by_scalar_frame_tests!(true, f16, HAddByScalar8, |a, b| a + b);
    by_scalar_frame_tests!(true, f16, HSubByScalar8, |a, b| a - b);
    by_scalar_frame_tests!(true, f16, HSubFByScalar8, |a, b| b - a);
    by_scalar_frame_tests!(true, f16, HMinByScalar8, |a, b| a.min(b));
    by_scalar_frame_tests!(true, f16, HMaxByScalar8, |a, b| a.max(b));
}


================================================
FILE: linalg/src/generic/erf.rs
================================================
use crate::element_wise::ElementWiseKer;

#[allow(non_upper_case_globals)]
#[allow(clippy::excessive_precision)]
fn serf(x: &mut f32) {
    const a1: f32 = 0.0705230784;
    const a2: f32 = 0.0422820123;
    const a3: f32 = 0.0092705272;
    const a4: f32 = 0.0001520143;
    const a5: f32 = 0.0002765672;
    const a6: f32 = 0.0000430638;

    let signum = x.signum();
    let abs = x.abs();
    let y = a6 * abs;
    let y = (a5 + y) * abs;
    let y = (a4 + y) * abs;
    let y = (a3 + y) * abs;
    let y = (a2 + y) * abs;
    let y = (a1 + y) * abs;
    let y = 1.0 - (y + 1.0).powi(16).recip();

    *x = y.copysign(signum)
}

#[derive(Clone, Debug)]
pub struct SErf4;

impl ElementWiseKer<f32> for SErf4 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_items() -> usize {
        16
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn nr() -> usize {
        4
    }

    fn run(x: &mut [f32], _: ()) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(serf)
    }
}


================================================
FILE: linalg/src/generic/leaky_relu.rs
================================================
#![allow(clippy::excessive_precision)]
use crate::frame::element_wise::ElementWiseKer;
use tract_data::internal::*;
use tract_num_traits::Zero;

#[derive(Clone, Debug)]
pub struct SLeakyRelu4;

impl ElementWiseKer<f32, f32> for SLeakyRelu4 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn alignment_items() -> usize {
        4
    }

    fn nr() -> usize {
        4
    }

    fn run(x: &mut [f32], alpha: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = if *px < 0. { *px * alpha } else { *px });
    }
}

#[derive(Clone, Debug)]
pub struct HLeakyRelu8;

impl ElementWiseKer<f16, f16> for HLeakyRelu8 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn alignment_items() -> usize {
        4
    }

    fn nr() -> usize {
        8
    }

    fn run(x: &mut [f16], alpha: f16) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = if *px < f16::zero() { *px * alpha } else { *px })
    }
}

#[cfg(test)]
#[macro_use]
pub mod s {
    leaky_relu_frame_tests!(true, f32, crate::generic::leaky_relu::SLeakyRelu4);
}

#[cfg(test)]
#[macro_use]
pub mod h {
    leaky_relu_frame_tests!(
        true,
        tract_data::internal::f16,
        crate::generic::leaky_relu::HLeakyRelu8
    );
}


================================================
FILE: linalg/src/generic/lut.rs
================================================
use crate::frame::lut::LutKer;

#[derive(Clone, Debug, Hash)]
pub struct GenericLut8;

impl LutKer for GenericLut8 {
    fn name() -> &'static str {
        "generic"
    }

    fn input_alignment_bytes() -> usize {
        1
    }

    fn table_alignment_bytes() -> usize {
        1
    }

    fn n() -> usize {
        8
    }

    unsafe fn run(buf: *mut u8, len: usize, table: *const u8) {
        unsafe {
            debug_assert!(len % Self::n() == 0);
            debug_assert!(buf as usize % Self::input_alignment_bytes() == 0);
            debug_assert!(table as usize % Self::table_alignment_bytes() == 0);
            for i in 0..((len / 8) as isize) {
                let ptr = buf.offset(8 * i);
                *ptr.offset(0) = *table.offset(*ptr.offset(0) as isize);
                *ptr.offset(1) = *table.offset(*ptr.offset(1) as isize);
                *ptr.offset(2) = *table.offset(*ptr.offset(2) as isize);
                *ptr.offset(3) = *table.offset(*ptr.offset(3) as isize);
                *ptr.offset(4) = *table.offset(*ptr.offset(4) as isize);
                *ptr.offset(5) = *table.offset(*ptr.offset(5) as isize);
                *ptr.offset(6) = *table.offset(*ptr.offset(6) as isize);
                *ptr.offset(7) = *table.offset(*ptr.offset(7) as isize);
            }
        }
    }
}

#[cfg(test)]
#[macro_use]
pub mod test {
    lut_frame_tests!(true, crate::generic::GenericLut8);
}


================================================
FILE: linalg/src/generic/mmm.rs
================================================
#![allow(clippy::needless_range_loop)]
use num_traits::AsPrimitive;

use tract_data::prelude::f16;
use tract_data::prelude::*;

use super::*;
use crate::frame::block_quant::{BlockQuant, NibbleReader, PackedBlockQuantFormat, Q4_0};
use crate::frame::mmm::*;
use crate::{LADatum, Ops, has_fp16};

macro_rules! scalar {
    ($ab: expr, $m: expr, $f: expr) => {
        for i in 0..$ab.len() {
            for j in 0..$ab[0].len() {
                $ab[i][j] = $f($m, $ab[i][j])
            }
        }
    };
}

macro_rules! per_row {
    ($ab: expr, $m: expr, $f: expr) => {
        for i in 0..$ab.len() {
            for j in 0..$ab[0].len() {
                $ab[i][j] = $f(*$m.add(i), $ab[i][j])
            }
        }
    };
}

macro_rules! per_col {
    ($ab: expr, $m: expr, $f: expr) => {
        for i in 0..$ab.len() {
            for j in 0..$ab[0].len() {
                $ab[i][j] = $f(*$m.add(j), $ab[i][j])
            }
        }
    };
}

unsafe fn add_mat_mul<const MR: usize, const NR: usize, TI, TA, TB>(
    pa: *const u8,
    pb: *const u8,
    k: usize,
    ab: &mut [[TI; NR]; MR],
) where
    TA: LADatum + AsPrimitive<TI>,
    TB: LADatum + AsPrimitive<TI>,
    TI: LADatum,
{
    unsafe {
        let a = pa as *const TA;
        let b = pb as *const TB;
        for ik in 0..k {
            let a = std::slice::from_raw_parts(a.add(MR * ik), MR);
            let b = std::slice::from_raw_parts(b.add(NR * ik), NR);
            for i in 0..MR {
                for j in 0..NR {
                    ab[i][j] += a[i].as_() * b[j].as_();
                }
            }
        }
    }
}

unsafe fn add_mat_mul_pq40<const MR: usize, const NR: usize, TB, TI>(
    pa: *const u8,
    pb: *const u8,
    k: usize,
    ab: &mut [[TI; NR]; MR],
) where
    TI: LADatum,
    f16: AsPrimitive<TI>,
    TB: AsPrimitive<TI>,
    i8: AsPrimitive<TI>,
{
    unsafe {
        assert!(k % Q4_0.block_len() == 0);
        let len = (k * MR) / Q4_0.block_len() * Q4_0.block_bytes();
        let mut pa = NibbleReader::for_slice(std::slice::from_raw_parts(pa, len));
        let b = pb as *const TB;
        for bk in 0..k / 32 {
            let mut scales: [TI; MR] = [TI::zero(); MR];
            scales.iter_mut().for_each(|x| *x = pa.read_f16().as_());
            for ik in 0..32 {
                let mut a: [TI; MR] = [TI::zero(); MR];
                a.iter_mut().zip(&scales).for_each(|(x, s)| *x = *s * (pa.read_i4() - 8).as_());
                let b = std::slice::from_raw_parts(b.add(NR * (ik + 32 * bk)), NR);
                for i in 0..MR {
                    for j in 0..NR {
                        ab[i][j] += a[i] * b[j].as_();
                    }
                }
            }
        }
    }
}

unsafe fn add_mat_mul_pq40_scales_at_end<const MR: usize, const NR: usize, TB, TI>(
    pa: *const u8,
    pb: *const u8,
    k: usize,
    ab: &mut [[TI; NR]; MR],
) where
    TI: LADatum,
    f16: AsPrimitive<TI>,
    TB: AsPrimitive<TI>,
    i8: AsPrimitive<TI>,
{
    unsafe {
        assert!(k % Q4_0.block_len() == 0);
        let len = (k * MR) / Q4_0.block_len() * Q4_0.block_bytes();
        let mut pa = NibbleReader::for_slice(std::slice::from_raw_parts(pa, len));
        let b = pb as *const TB;
        for bk in 0..k / 32 {
            let mut temp = [[TI::zero(); NR]; MR];
            for ik in 0..32 {
                let mut a: [TI; MR] = [TI::zero(); MR];
                a.iter_mut().for_each(|x| *x = (pa.read_i4() - 8).as_());
                let b = std::slice::from_raw_parts(b.add(NR * (ik + 32 * bk)), NR);
                for i in 0..MR {
                    for j in 0..NR {
                        temp[i][j] += a[i] * b[j].as_();
                    }
                }
            }
            for i in 0..MR {
                let scale = pa.read_f16().as_();
                for j in 0..NR {
                    ab[i][j] += temp[i][j] * scale;
                }
            }
        }
    }
}

unsafe fn add_unicast<const MR: usize, const NR: usize, TI, TO>(
    ab: &mut [[TI; NR]; MR],
    other: &OutputStoreKer,
) where
    TI: LADatum,
    TO: LADatum + AsPrimitive<TI>,
{
    unsafe {
        for i in 0usize..MR {
            for j in 0usize..NR {
                let value: *const TO = other
                    .ptr
                    .offset(other.row_byte_stride * i as isize + other.col_byte_stride * j as isize)
                    as _;
                ab[i].as_mut()[j] += (*value).as_();
            }
        }
    }
}

unsafe fn store_t<const MR: usize, const NR: usize, TC, TI>(
    tile: &OutputStoreKer,
    ab: &[[TI; NR]; MR],
) where
    TC: Copy,
{
    unsafe {
        for i in 0usize..MR {
            for j in 0usize..NR {
                let loc: *mut TC = tile
                    .ptr
                    .offset(tile.row_byte_stride * i as isize + tile.col_byte_stride * j as isize)
                    as _;
                let val: *const TC = (&ab[i].as_ref()[j]) as *const TI as _;
                *loc = *val
            }
        }
    }
}

unsafe fn store_float_t<const MR: usize, const NR: usize, TC, TI>(
    tile: &OutputStoreKer,
    ab: &[[TI; NR]; MR],
) where
    TC: Copy + 'static,
    TI: Copy + 'static + AsPrimitive<TC>,
{
    unsafe {
        for i in 0usize..MR {
            for j in 0usize..NR {
                let loc: *mut TC = tile
                    .ptr
                    .offset(tile.row_byte_stride * i as isize + tile.col_byte_stride * j as isize)
                    as _;
                let val = ab[i].as_ref()[j].as_();
                *loc = val
            }
        }
    }
}

#[inline(never)]
unsafe fn kernel<TI, const MR: usize, const NR: usize>(mut pnl: *const FusedKerSpec<TI>) -> isize
where
    TI: LADatum + ScaleShiftAndRound + AsPrimitive<TI>,
    TI: AsPrimitive<f16> + AsPrimitive<f32> + AsPrimitive<f64>,
    usize: AsPrimitive<TI>,
    f16: AsPrimitive<TI>,
    f32: AsPrimitive<TI>,
    f64: AsPrimitive<TI>,
    i8: AsPrimitive<TI>,
    i32: AsPrimitive<TI>,
{
    unsafe {
        let mut ab = [[TI::zero(); NR]; MR];
        loop {
            if pnl.is_null() {
                break;
            }
            match *pnl {
                FusedKerSpec::Done => break,
                FusedKerSpec::Clear => ab = std::mem::zeroed(),
                FusedKerSpec::LoadTile(col_major, _row_major) => {
                    for row in 0..MR {
                        for col in 0..NR {
                            ab[row][col] = *col_major.add(col * MR + row);
                        }
                    }
                }
                FusedKerSpec::ScalarAdd(a) => scalar!(ab, a, |a, b| a + b),
                FusedKerSpec::ScalarMul(a) => scalar!(ab, a, |a, b| a * b),
                FusedKerSpec::ScalarMin(m) => scalar!(ab, m, |a, b| if a < b { a } else { b }),
                FusedKerSpec::ScalarMax(m) => scalar!(ab, m, |a, b| if a > b { a } else { b }),
                FusedKerSpec::ScalarSub(m) => scalar!(ab, m, |a, b| a - b),
                FusedKerSpec::ScalarSubF(m) => scalar!(ab, m, |a, b| b - a),
                FusedKerSpec::LeakyRelu(m) => {
                    scalar!(ab, m, |a, b| if b > TI::zero() { b } else { a * b })
                }
                FusedKerSpec::PerRowMin(m) => per_row!(ab, m, |a, b| if a < b { a } else { b }),
                FusedKerSpec::PerRowMax(m) => per_row!(ab, m, |a, b| if a > b { a } else { b }),
                FusedKerSpec::PerRowAdd(m) => per_row!(ab, m, |a, b| a + b),
                FusedKerSpec::PerRowMul(m) => per_row!(ab, m, |a, b| a * b),
                FusedKerSpec::PerRowSub(m) => per_row!(ab, m, |a, b| a - b),
                FusedKerSpec::PerRowSubF(m) => per_row!(ab, m, |a, b| b - a),
                FusedKerSpec::PerColMin(m) => per_col!(ab, m, |a, b| if a < b { a } else { b }),
                FusedKerSpec::PerColMax(m) => per_col!(ab, m, |a, b| if a > b { a } else { b }),
                FusedKerSpec::PerColAdd(m) => per_col!(ab, m, |a, b| a + b),
                FusedKerSpec::PerColMul(m) => per_col!(ab, m, |a, b| a * b),
                FusedKerSpec::PerColSub(m) => per_col!(ab, m, |a, b| a - b),
                FusedKerSpec::PerColSubF(m) => per_col!(ab, m, |a, b| b - a),
                FusedKerSpec::AddRowColProducts(rows, cols) => {
                    for i in 0..MR {
                        for j in 0..NR {
                            ab[i][j] += *rows.add(i) * *cols.add(j);
                        }
                    }
                }
                FusedKerSpec::AddUnicast(other) => {
                    if TI::datum_type().is_float() && other.item_size == 2 {
                        add_unicast::<MR, NR, TI, f16>(&mut ab, &other)
                    } else if TI::datum_type().is_float() && other.item_size == 4 {
                        add_unicast::<MR, NR, TI, f32>(&mut ab, &other)
                    } else if TI::datum_type().is_float() && other.item_size == 8 {
                        add_unicast::<MR, NR, TI, f64>(&mut ab, &other)
                    } else if TI::datum_type() == i32::datum_type() && other.item_size == 1 {
                        add_unicast::<MR, NR, TI, i8>(&mut ab, &other)
                    } else if TI::datum_type() == i32::datum_type() && other.item_size == 4 {
                        add_unicast::<MR, NR, TI, i32>(&mut ab, &other)
                    } else {
                        unimplemented!("Missing AddUnicast type");
                    }
                }
                FusedKerSpec::ShiftLeft(shift) => {
                    for i in 0..MR {
                        for j in 0..NR {
                            ab[i][j] = ab[i][j].q_shl(shift);
                        }
                    }
                }
                FusedKerSpec::RoundingShiftRight(shift, rp) => {
                    for i in 0..MR {
                        for j in 0..NR {
                            ab[i][j] = ab[i][j].q_shr(shift, rp);
                        }
                    }
                }
                FusedKerSpec::QScale(shift, rp, mult) => {
                    for i in 0..MR {
                        for j in 0..NR {
                            ab[i][j] = ab[i][j].q_scale(Scaler::from_fuse_params(shift, rp, mult));
                        }
                    }
                }
                FusedKerSpec::AddMatMul { k, pa, pb, packing } => {
                    use std::mem::transmute;
                    if TI::datum_type().is_float() {
                        match packing {
                            0 => add_mat_mul::<MR, NR, TI, TI, TI>(pa, pb, k, &mut ab),
                            1 => add_mat_mul::<MR, NR, TI, f16, f16>(pa, pb, k, &mut ab),
                            2 => add_mat_mul::<MR, NR, TI, f32, f32>(pa, pb, k, &mut ab),
                            3 => add_mat_mul::<MR, NR, TI, f16, f32>(pa, pb, k, &mut ab),
                            4 => add_mat_mul::<MR, NR, TI, f32, f16>(pa, pb, k, &mut ab),
                            5 => add_mat_mul_pq40::<MR, NR, f16, TI>(pa, pb, k, &mut ab),
                            6 => add_mat_mul_pq40_scales_at_end::<MR, NR, f16, TI>(
                                pa, pb, k, &mut ab,
                            ),
                            7 => add_mat_mul_pq40::<MR, NR, f32, TI>(pa, pb, k, &mut ab),
                            _ => unreachable!(),
                        }
                    } else if TI::datum_type() == i32::datum_type() {
                        // transmute to allow using explicitly i3 in add_mat_mul generic params
                        let ab = transmute::<&mut [[TI; NR]; MR], &mut [[i32; NR]; MR]>(&mut ab);
                        if packing == 0 {
                            add_mat_mul::<MR, NR, i32, i32, i32>(pa, pb, k, ab)
                        } else if packing == 1 {
                            add_mat_mul::<MR, NR, i32, i8, i8>(pa, pb, k, ab)
                        } else {
                            return 1;
                        }
                    } else {
                        return 1;
                    }
                }
                FusedKerSpec::Store(tile) => {
                    if TI::datum_type().is_float() {
                        match tile.item_size {
                            2 => store_float_t::<MR, NR, f16, _>(&tile, &ab),
                            4 => store_float_t::<MR, NR, f32, _>(&tile, &ab),
                            8 => store_float_t::<MR, NR, f64, _>(&tile, &ab),
                            _ => unimplemented!(),
                        }
                    } else {
                        match tile.item_size {
                            1 => store_t::<MR, NR, u8, _>(&tile, &ab),
                            2 => store_t::<MR, NR, u16, _>(&tile, &ab),
                            4 => store_t::<MR, NR, u32, _>(&tile, &ab),
                            8 => store_t::<MR, NR, u64, _>(&tile, &ab),
                            _ => unimplemented!(),
                        }
                    }
                }
            };
            pnl = pnl.add(1);
        }
    }
    0
}

fn pq40_r4() -> PackedBlockQuantFormat {
    PackedBlockQuantFormat::new(&Q4_0, 4, 0, false)
}

fn pq40_r4_se() -> PackedBlockQuantFormat {
    PackedBlockQuantFormat::new(&Q4_0, 4, 0, true)
}

// f16 kernels
MMMRustKernel!(kernel::<f16, 4, 4> => generic_f16_4x4<f16>(4,4)
    packing[1] = f16f16bis => |k| k.with_packing(f16::packing(4), f16::packing(4));
    packing[2] = f32f32 => |k| k.with_packing(f32::packing(4), f32::packing(4));
    packing[3] = f16f32 => |k| k.with_packing(f16::packing(4), f32::packing(4));
    packing[4] = f32f16 => |k| k.with_packing(f32::packing(4), f16::packing(4));
    packing[5] = q40f16 => |k| k.with_packing(pq40_r4(), f16::packing(4));
    packing[6] = q40f16se => |k| k.with_packing(pq40_r4_se(), f16::packing(4));
    packing[7] = q40f32 => |k| k.with_packing(pq40_r4(), f32::packing(4));
    quality(if has_fp16() { ImplementationQuality::Generic } else { ImplementationQuality::Dreadful })
    store(f32, f64)
);

MMMRustKernel! {kernel::<f16, 4, 1> => generic_f16_4x1<f16>(4,1)
    packing[1] = f16f16bis => |k| k.with_packing(f16::packing(4), f16::packing(1));
    packing[2] = f32f32 => |k| k.with_packing(f32::packing(4), f32::packing(1));
    packing[3] = f16f32 => |k| k.with_packing(f16::packing(4), f32::packing(1));
    packing[4] = f32f16 => |k| k.with_packing(f32::packing(4), f16::packing(1));
    packing[5] = q40f16 => |k| k.with_packing(pq40_r4(), f16::packing(1));
    packing[6] = q40f16se => |k| k.with_packing(pq40_r4_se(), f16::packing(1));
    packing[7] = q40f32 => |k| k.with_packing(pq40_r4(), f32::packing(1));
    quality(if has_fp16() { ImplementationQuality::Generic } else { ImplementationQuality::Dreadful })
    store(f32, f64)
}

// f32 kernels
MMMRustKernel!(kernel::<f32, 4, 4> => generic_f32_4x4<f32>(4,4)
    packing[1] = f16f16 => |k| k.with_packing(f16::packing(4), f16::packing(4));
    packing[2] = f32f32bis => |k| k.with_packing(f32::packing(4), f32::packing(4));
    packing[3] = f16f32 => |k| k.with_packing(f16::packing(4), f32::packing(4));
    packing[4] = f32f16 => |k| k.with_packing(f32::packing(4), f16::packing(4));
    packing[5] = q40f16 => |k| k.with_packing(pq40_r4(), f16::packing(4));
    packing[6] = q40f16se => |k| k.with_packing(pq40_r4_se(), f16::packing(4));
    packing[7] = q40f32 => |k| k.with_packing(pq40_r4(), f32::packing(4));
    quality(ImplementationQuality::Generic)
    store(f16, f64)
);
MMMRustKernel! {kernel::<f32, 4, 1> => generic_f32_4x1<f32>(4,1)
    packing[1] = f16f16 => |k| k.with_packing(f16::packing(4), f16::packing(1));
    packing[2] = f32f32bis => |k| k.with_packing(f32::packing(4), f32::packing(1));
    packing[3] = f16f32 => |k| k.with_packing(f16::packing(4), f32::packing(1));
    packing[4] = f32f16 => |k| k.with_packing(f32::packing(4), f16::packing(1));
    packing[5] = q40f16 => |k| k.with_packing(pq40_r4(), f16::packing(1));
    packing[6] = q40f16se => |k| k.with_packing(pq40_r4_se(), f16::packing(1));
    packing[7] = q40f32 => |k| k.with_packing(pq40_r4(), f32::packing(1));
    quality(ImplementationQuality::Generic)
    store(f16, f64)
}

// f64 kernels
MMMRustKernel!(kernel::<f64, 4, 4> => generic_f64_4x4<f64>(4,4)
    quality(ImplementationQuality::Generic)
    store(f16, f32));
MMMRustKernel!(kernel::<f64, 4, 1> => generic_f64_4x1<f64>(4,1)
    quality(ImplementationQuality::Generic)
    store(f16, f32));

// I32 kernels
MMMRustKernel! {kernel::<i32, 4, 4> => generic_i32_4x4<i32>(4,4)
    packing[1] = i8i8 => |k| k.with_packing(i8::packing(4), i8::packing(4));
    quality(ImplementationQuality::Generic)
    store(i8)
}

MMMRustKernel! {kernel::<i32, 4, 1> => generic_i32_4x1<i32>(4,1)
    packing[1] = i8i8 => |k| k.with_packing(i8::packing(4), i8::packing(1));
    quality(ImplementationQuality::Generic)
    store(i8)
}

// extra tests kernels
#[cfg(test)]
MMMRustKernel!(kernel::<f32, 3, 2> => generic_f32_3x2<f32>(3,2) store(f16, f64));

#[cfg(test)]
MMMRustKernel! {kernel::<i32, 3, 2> => generic_i32_3x2<i32>(3,2)
    packing[1] = i8i8 => |k| k.with_packing(i8::packing(3), i8::packing(2));
    store(i8)
}

pub fn plug(ops: &mut Ops) {
    ops.mmm_impls.push(generic_f16_4x4.mmm());
    ops.mmm_impls.push(generic_f16_4x1.mmm());
    ops.mmm_impls.push(generic_f32_4x4.mmm());
    ops.mmm_impls.push(generic_f32_4x1.mmm());
    ops.mmm_impls.push(generic_f64_4x4.mmm());
    ops.mmm_impls.push(generic_f64_4x1.mmm());
    ops.mmm_impls.push(generic_i32_4x4.mmm());
    ops.mmm_impls.push(generic_i32_4x1.mmm());
}

#[cfg(test)]
mod test {

    #[test]
    fn kits() {
        let mut ops = crate::generic();
        super::plug(&mut ops);
    }
}


================================================
FILE: linalg/src/generic/reduce.rs
================================================
// Reduce<max> generic implementation
pub mod max {
    pub use tract_data::internal::f16;

    reduce_impl_wrap!(
        f32,
        SMax4,
        4,
        4,
        (),
        f32::MIN,
        fn run(x: &[f32], _: ()) -> f32 {
            debug_assert!(x.len() % Self::nr() == 0);
            debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
            *x.iter().max_by(|a, b| a.total_cmp(b)).unwrap()
        },
        fn reduce_two(a: f32, b: f32) -> f32 {
            a.max(b)
        }
    );

    reduce_impl_wrap!(
        f16,
        HMax8,
        8,
        8,
        (),
        f16::MIN,
        fn run(x: &[f16], _: ()) -> f16 {
            debug_assert!(x.len() % Self::nr() == 0);
            debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
            *x.iter().max_by(|a, b| a.total_cmp(b)).unwrap()
        },
        fn reduce_two(a: f16, b: f16) -> f16 {
            a.max(b)
        }
    );

    #[cfg(test)]
    #[macro_use]
    pub mod s {
        crate::max_frame_tests!(true, f32, crate::generic::reduce::max::SMax4);
    }

    #[cfg(test)]
    #[macro_use]
    pub mod h {
        use super::*;
        crate::max_frame_tests!(true, f16, crate::generic::reduce::max::HMax8);
    }
}

// Reduce<sum> generic implementation
pub mod sum {
    use crate::num_traits::Zero;
    pub use tract_data::internal::f16;

    reduce_impl_wrap!(
        f32,
        SSum4,
        4,
        4,
        (),
        0.0,
        fn run(x: &[f32], _: ()) -> f32 {
            debug_assert!(x.len() % Self::nr() == 0);
            debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
            x.iter().sum::<f32>()
        },
        fn reduce_two(a: f32, b: f32) -> f32 {
            a + b
        }
    );

    reduce_impl_wrap!(
        f16,
        HSum8,
        8,
        8,
        (),
        f16::zero(),
        fn run(x: &[f16], _: ()) -> f16 {
            debug_assert!(x.len() % Self::nr() == 0);
            debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
            x.iter().sum::<f16>()
        },
        fn reduce_two(a: f16, b: f16) -> f16 {
            a + b
        }
    );

    #[cfg(test)]
    #[macro_use]
    pub mod s {
        crate::sum_frame_tests!(true, f32, crate::generic::reduce::sum::SSum4);
    }

    #[cfg(test)]
    #[macro_use]
    pub mod h {
        use super::*;
        crate::sum_frame_tests!(true, f16, crate::generic::reduce::sum::HSum8);
    }
}

// Softmax generic implementation
pub mod softmax_l2 {
    use crate::num_traits::Zero;
    use tract_data::internal::f16;

    map_reduce_impl_wrap!(
        f32,
        SSoftMaxL2,
        4,
        4,
        f32,
        f32::MIN,
        0.0,
        fn run(x: &mut [f32], max: f32) -> f32 {
            debug_assert!(x.len() % Self::nr() == 0);
            debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
            let mut sum = 0.;
            for v in x.iter_mut() {
                let y = *v - max;
                let y = fast_compact_exp_f32(y);
                *v = y;
                sum += y;
            }
            sum
        },
        fn reduce_two(a: f32, b: f32) -> f32 {
            a + b
        }
    );

    map_reduce_impl_wrap!(
        f16,
        HSoftMaxL2,
        8,
        8,
        f16,
        f16::MIN,
        f16::zero(),
        fn run(x: &mut [f16], max: f16) -> f16 {
            debug_assert!(x.len() % Self::nr() == 0);
            debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
            let mut sum = f16::zero();
            for v in x.iter_mut() {
                let y = *v - max;
                let y = f16::from_f32(fast_compact_exp_f32(y.to_f32()));
                *v = y;
                sum += y;
            }
            sum
        },
        fn reduce_two(a: f16, b: f16) -> f16 {
            a + b
        }
    );

    // ported from https://github.com/gnuradio/volk/blob/master/kernels/volk/volk_32f_expfast_32f.h
    // probably inspired from https://nic.schraudolph.org/pubs/Schraudolph99.pdf
    // not that the cast to u32 deals with negative right, while implem in volk code are wrong in some
    // corner cases (need a max(0,x) before the u32 conversion)
    pub fn fast_compact_exp_f32(v: f32) -> f32 {
        const MLN2: f32 = 0.6931471805f32;
        const A: f32 = 8388608.0f32;
        const B: f32 = 1065353216.0f32;
        const C: f32 = 60801.0f32;
        const SLOPE: f32 = A / MLN2;
        const OFFSET: f32 = B - C;
        f32::from_bits(((SLOPE * v) + OFFSET) as u32)
    }

    #[cfg(test)]
    #[macro_use]
    pub mod s {
        crate::softmax_l2_frame_tests!(true, f32, super::SSoftMaxL2);
    }

    #[cfg(test)]
    #[macro_use]
    pub mod h {
        use super::*;
        crate::softmax_l2_frame_tests!(true, f16, HSoftMaxL2);
    }
}


================================================
FILE: linalg/src/generic/rounding.rs
================================================
use crate::frame::mmm::*;
use std::hash::{Hash, Hasher};
use std::ops::Mul;
use tract_data::prelude::f16;

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Scaler {
    pub scale: f32,
    pub mult: Option<i32>,
    pub shift: isize,
    pub policy: RoundingPolicy,
}

impl Eq for Scaler {}

#[allow(clippy::derived_hash_with_manual_eq)]
impl Hash for Scaler {
    fn hash<H>(&self, state: &mut H)
    where
        H: Hasher,
    {
        Hash::hash(&self.scale.to_bits(), state)
    }
}

impl Scaler {
    pub fn new(scale: f32, policy: RoundingPolicy) -> Self {
        let (mult, shift) = Self::convert_scale_to_mult_shift(scale);
        Self { scale, mult, shift, policy }
    }

    pub fn as_fused_spec(&self) -> FusedSpec<'_> {
        if let Some(multiplier) = self.mult {
            FusedSpec::QScale(self.shift, self.policy, multiplier)
        } else if self.shift > 0 {
            FusedSpec::RoundingShiftRight(self.shift as usize, self.policy)
        } else {
            FusedSpec::ShiftLeft((-self.shift) as usize)
        }
    }

    // FIXME: Only to avoid fused op breaking
    pub fn from_fuse_params(shift: isize, policy: RoundingPolicy, mult: i32) -> Self {
        let scale = mult as f32 * 2f32.powi(-(31 + shift as i32));
        Self { scale, mult: Some(mult), shift, policy }
    }

    #[inline]
    // This function convert a scale (actually a fraction of two integers Q/D)
    // into an integer multiplier and a shift (the multiplier being 1/2D in Q0_31).
    fn convert_scale_to_mult_shift(scale: f32) -> (Option<i32>, isize) {
        // Zero is a special case to handle
        if scale == 0.0 {
            return (None, 0);
        }

        // Convert f32 to bits representation with the following pattern
        // Bit |  31  |  30-23   |   22-0    |
        //     | Sign | Exponent |  Fraction |
        let scale_bits = scale.to_bits();

        // Get actual value of the exponent
        let current_exponent = (scale_bits >> 23) & 0xff;

        // Extract fractional part of the float with:
        // - 0x007fffff that represents the mask of the 23 lower bits (fractional part)
        // (partial because it doesn't include the hidden bit (24) of the float representation)
        let partial_frac = scale_bits & 0x007fffff;

        if partial_frac == 0 {
            let shift = 127 - current_exponent as isize;
            (None, shift)
        } else {
            // We add 0x800000 that represents the hidden bit set to one.
            // Here the frac is encoded as a Q8_23.
            let frac = partial_frac | 0x800000;

            // We rescale the result to be in Q0_31
            // We should have shifted the result by 8 but the frac value is in [1.0, 2.0)
            // so we cannot do that (we would need one bit for the integer).
            // Instead we devide the frac by two to be in [0.5, 1.0) in Q0_31
            // which lead to a shift of (8-1 = 7).
            let half_frac = (frac << 7) as i32;

            // Compute the actual value of the shift
            // Here, we remove one as half_frac needs to be multiplied by 2.
            let shift = 127 - current_exponent as isize - 1;
            (Some(half_frac), shift)
        }
    }
}

impl Mul<f16> for Scaler {
    type Output = f16;

    #[inline]
    fn mul(self, rhs: f16) -> Self::Output {
        f16::from_f32(self.scale) * rhs
    }
}

impl Mul<f32> for Scaler {
    type Output = f32;

    #[inline]
    fn mul(self, rhs: f32) -> Self::Output {
        self.scale * rhs
    }
}

impl Mul<f64> for Scaler {
    type Output = f64;

    #[inline]
    fn mul(self, rhs: f64) -> Self::Output {
        self.scale as f64 * rhs
    }
}

impl Mul<Scaler> for f16 {
    type Output = f16;

    #[inline]
    fn mul(self, rhs: Scaler) -> Self::Output {
        rhs * self
    }
}

impl Mul<Scaler> for f32 {
    type Output = f32;

    #[inline]
    fn mul(self, rhs: Scaler) -> Self::Output {
        rhs * self
    }
}

impl Mul<Scaler> for f64 {
    type Output = f64;

    #[inline]
    fn mul(self, rhs: Scaler) -> Self::Output {
        rhs * self
    }
}

impl Mul<i32> for Scaler {
    type Output = i32;

    #[inline]
    fn mul(self, rhs: i32) -> Self::Output {
        let (val, shift) = if let Some(multiplier) = self.mult {
            (multiplier as i64 * rhs as i64, self.shift + 31)
        } else {
            (rhs as i64, self.shift)
        };

        // Round according to rounding policy
        use RoundingPolicy::*;
        if shift > 0 {
            let half: i64 = 1 << (shift - 1);
            let nudge: i64 = match self.policy {
                Zero => -1,
                MinusInf => -((val >= 0) as i64),
                PlusInf => -((val <= 0) as i64),
                Away => 0,
                Even => ((val.abs() >> shift) & 0x1) - 1,
                Odd => -((val.abs() >> shift) & 0x1),
                _ => panic!(),
            };

            (val.signum() * ((val.abs() + half + nudge) >> shift)) as i32
        } else {
            (val << -shift) as i32
        }
    }
}

impl Mul<Scaler> for i32 {
    type Output = i32;

    #[inline]
    fn mul(self, rhs: Scaler) -> Self::Output {
        rhs * self
    }
}

pub trait ScaleShiftAndRound {
    fn q_scale(self, scaler: Scaler) -> Self;
    fn q_shl(self, shift: usize) -> Self;
    fn q_shr(self, shift: usize, rp: RoundingPolicy) -> Self;
}

impl ScaleShiftAndRound for f64 {
    fn q_scale(self, scaler: Scaler) -> Self {
        self * scaler
    }
    fn q_shl(self, shift: usize) -> Self {
        self * 2f64.powi(shift as i32)
    }
    fn q_shr(self, shift: usize, _rp: RoundingPolicy) -> Self {
        self * 2f64.powi(-(shift as i32))
    }
}

impl ScaleShiftAndRound for f32 {
    fn q_scale(self, scaler: Scaler) -> Self {
        self * scaler
    }
    fn q_shl(self, shift: usize) -> Self {
        self * 2f32.powi(shift as i32)
    }
    fn q_shr(self, shift: usize, _rp: RoundingPolicy) -> Self {
        self * 2f32.powi(-(shift as i32))
    }
}

impl ScaleShiftAndRound for f16 {
    fn q_scale(self, scaler: Scaler) -> Self {
        self * scaler
    }
    fn q_shl(self, shift: usize) -> Self {
        self * f16::from_f32(2f32.powi(shift as i32))
    }
    fn q_shr(self, shift: usize, _rp: RoundingPolicy) -> Self {
        self * f16::from_f32(2f32.powi(-(shift as i32)))
    }
}

impl ScaleShiftAndRound for i32 {
    fn q_scale(self, scaler: Scaler) -> Self {
        self * scaler
    }
    fn q_shr(self, shift: usize, rp: RoundingPolicy) -> Self {
        use RoundingPolicy::*;
        let half: i32 = 1 << (shift - 1);
        let nudge: i32 = match rp {
            Zero => -1,
            MinusInf => -((self >= 0) as i32),
            PlusInf => -((self <= 0) as i32),
            Away => 0,
            Even => ((self.abs() >> shift) & 0x1) - 1,
            Odd => -((self.abs() >> shift) & 0x1),
            _ => panic!(),
        };
        self.signum() * ((self.abs() + half + nudge) >> shift)
    }
    fn q_shl(self, shift: usize) -> Self {
        self << shift
    }
}

// 6 / 4 -> 1.5 -> arrondi: 2.  rien a faire
// 2 / 4 -> 0.5 -> arrondi: 1. veut 0 -> nudge = -1

#[cfg(test)]
mod test {
    use super::RoundingPolicy::*;
    use super::*;

    #[test]
    fn test_scale_rounding_f32() {
        assert_eq!(0f32.q_scale(Scaler::new(0.5, Zero)), 0.0);
        assert_eq!(1f32.q_scale(Scaler::new(0.5, Zero)), 0.5);
        assert_eq!(2f32.q_scale(Scaler::new(0.5, Zero)), 1.0);
        assert_eq!(3f32.q_scale(Scaler::new(0.5, Zero)), 1.5);
        assert_eq!((-1f32).q_scale(Scaler::new(0.5, Zero)), -0.5);
        assert_eq!((-2f32).q_scale(Scaler::new(0.5, Zero)), -1.0);
        assert_eq!((-3f32).q_scale(Scaler::new(0.5, Zero)), -1.5);
    }

    #[test]
    fn test_shift_rounding_zero() {
        assert_eq!(0i32.q_shr(1, Zero), 0);
        assert_eq!(1i32.q_shr(1, Zero), 0);
        assert_eq!(2i32.q_shr(1, Zero), 1);
        assert_eq!(3i32.q_shr(1, Zero), 1);
        assert_eq!(0i32.q_shr(2, Zero), 0);
        assert_eq!(1i32.q_shr(2, Zero), 0);
        assert_eq!(2i32.q_shr(2, Zero), 0);
        assert_eq!(3i32.q_shr(2, Zero), 1);
        assert_eq!(4i32.q_shr(2, Zero), 1);
        assert_eq!(5i32.q_shr(2, Zero), 1);
        assert_eq!(6i32.q_shr(2, Zero), 1);
        assert_eq!((-1i32).q_shr(2, Zero), 0);
        assert_eq!((-2i32).q_shr(2, Zero), 0);
        assert_eq!((-3i32).q_shr(2, Zero), -1);
        assert_eq!((-4i32).q_shr(2, Zero), -1);
        assert_eq!((-5i32).q_shr(2, Zero), -1);
        assert_eq!((-6i32).q_shr(2, Zero), -1);
    }

    #[test]
    fn test_scale_rounding_zero() {
        assert_eq!(0i32.q_scale(Scaler::new(0.5, Zero)), 0);
        assert_eq!(1i32.q_scale(Scaler::new(0.5, Zero)), 0);
        assert_eq!(2i32.q_scale(Scaler::new(0.5, Zero)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.5, Zero)), 1);
        assert_eq!((-1i32).q_scale(Scaler::new(0.5, Zero)), 0);
        assert_eq!((-2i32).q_scale(Scaler::new(0.5, Zero)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.5, Zero)), -1);
        assert_eq!(2i32.q_scale(Scaler::new(0.25, Zero)), 0);
        assert_eq!(3i32.q_scale(Scaler::new(0.25, Zero)), 1);
        assert_eq!(4i32.q_scale(Scaler::new(0.25, Zero)), 1);
        assert_eq!(5i32.q_scale(Scaler::new(0.25, Zero)), 1);
        assert_eq!(6i32.q_scale(Scaler::new(0.25, Zero)), 1);
        assert_eq!((-2i32).q_scale(Scaler::new(0.25, Zero)), 0);
        assert_eq!((-3i32).q_scale(Scaler::new(0.25, Zero)), -1);
        assert_eq!((-4i32).q_scale(Scaler::new(0.25, Zero)), -1);
        assert_eq!((-5i32).q_scale(Scaler::new(0.25, Zero)), -1);
        assert_eq!((-6i32).q_scale(Scaler::new(0.25, Zero)), -1);
    }

    #[test]
    fn test_shift_rounding_away() {
        assert_eq!(0i32.q_shr(1, Away), 0);
        assert_eq!(1i32.q_shr(1, Away), 1);
        assert_eq!(2i32.q_shr(1, Away), 1);
        assert_eq!(3i32.q_shr(1, Away), 2);
        assert_eq!(0i32.q_shr(2, Away), 0);
        assert_eq!(1i32.q_shr(2, Away), 0);
        assert_eq!(2i32.q_shr(2, Away), 1);
        assert_eq!(3i32.q_shr(2, Away), 1);
        assert_eq!(4i32.q_shr(2, Away), 1);
        assert_eq!(5i32.q_shr(2, Away), 1);
        assert_eq!(6i32.q_shr(2, Away), 2);
        assert_eq!((-1i32).q_shr(2, Away), 0);
        assert_eq!((-2i32).q_shr(2, Away), -1);
        assert_eq!((-3i32).q_shr(2, Away), -1);
        assert_eq!((-4i32).q_shr(2, Away), -1);
        assert_eq!((-5i32).q_shr(2, Away), -1);
        assert_eq!((-6i32).q_shr(2, Away), -2);
    }

    #[test]
    fn test_scale_rounding_away() {
        assert_eq!(0i32.q_scale(Scaler::new(0.5, Away)), 0);
        assert_eq!(1i32.q_scale(Scaler::new(0.5, Away)), 1);
        assert_eq!(2i32.q_scale(Scaler::new(0.5, Away)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.5, Away)), 2);
        assert_eq!((-1i32).q_scale(Scaler::new(0.5, Away)), -1);
        assert_eq!((-2i32).q_scale(Scaler::new(0.5, Away)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.5, Away)), -2);
        assert_eq!(2i32.q_scale(Scaler::new(0.25, Away)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.25, Away)), 1);
        assert_eq!(4i32.q_scale(Scaler::new(0.25, Away)), 1);
        assert_eq!(5i32.q_scale(Scaler::new(0.25, Away)), 1);
        assert_eq!(6i32.q_scale(Scaler::new(0.25, Away)), 2);
        assert_eq!((-2i32).q_scale(Scaler::new(0.25, Away)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.25, Away)), -1);
        assert_eq!((-4i32).q_scale(Scaler::new(0.25, Away)), -1);
        assert_eq!((-5i32).q_scale(Scaler::new(0.25, Away)), -1);
        assert_eq!((-6i32).q_scale(Scaler::new(0.25, Away)), -2);
    }

    #[test]
    fn test_shift_rounding_plus_inf() {
        assert_eq!(0i32.q_shr(1, PlusInf), 0);
        assert_eq!(1i32.q_shr(1, PlusInf), 1);
        assert_eq!(2i32.q_shr(1, PlusInf), 1);
        assert_eq!(3i32.q_shr(1, PlusInf), 2);
        assert_eq!(0i32.q_shr(2, PlusInf), 0);
        assert_eq!(1i32.q_shr(2, PlusInf), 0);
        assert_eq!(2i32.q_shr(2, PlusInf), 1);
        assert_eq!(3i32.q_shr(2, PlusInf), 1);
        assert_eq!(4i32.q_shr(2, PlusInf), 1);
        assert_eq!(5i32.q_shr(2, PlusInf), 1);
        assert_eq!(6i32.q_shr(2, PlusInf), 2);
        assert_eq!((-1i32).q_shr(2, PlusInf), 0);
        assert_eq!((-2i32).q_shr(2, PlusInf), 0);
        assert_eq!((-3i32).q_shr(2, PlusInf), -1);
        assert_eq!((-4i32).q_shr(2, PlusInf), -1);
        assert_eq!((-5i32).q_shr(2, PlusInf), -1);
        assert_eq!((-6i32).q_shr(2, PlusInf), -1);
    }

    #[test]
    fn test_scale_rounding_plus_inf() {
        assert_eq!(0i32.q_scale(Scaler::new(0.5, PlusInf)), 0);
        assert_eq!(1i32.q_scale(Scaler::new(0.5, PlusInf)), 1);
        assert_eq!(2i32.q_scale(Scaler::new(0.5, PlusInf)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.5, PlusInf)), 2);
        assert_eq!((-1i32).q_scale(Scaler::new(0.5, PlusInf)), 0);
        assert_eq!((-2i32).q_scale(Scaler::new(0.5, PlusInf)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.5, PlusInf)), -1);
        assert_eq!(2i32.q_scale(Scaler::new(0.25, PlusInf)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.25, PlusInf)), 1);
        assert_eq!(4i32.q_scale(Scaler::new(0.25, PlusInf)), 1);
        assert_eq!(5i32.q_scale(Scaler::new(0.25, PlusInf)), 1);
        assert_eq!(6i32.q_scale(Scaler::new(0.25, PlusInf)), 2);
        assert_eq!((-2i32).q_scale(Scaler::new(0.25, PlusInf)), 0);
        assert_eq!((-3i32).q_scale(Scaler::new(0.25, PlusInf)), -1);
        assert_eq!((-4i32).q_scale(Scaler::new(0.25, PlusInf)), -1);
        assert_eq!((-5i32).q_scale(Scaler::new(0.25, PlusInf)), -1);
        assert_eq!((-6i32).q_scale(Scaler::new(0.25, PlusInf)), -1);
    }

    #[test]
    fn test_shift_rounding_minus_inf() {
        assert_eq!(0i32.q_shr(1, MinusInf), 0);
        assert_eq!(1i32.q_shr(1, MinusInf), 0);
        assert_eq!(2i32.q_shr(1, MinusInf), 1);
        assert_eq!(3i32.q_shr(1, MinusInf), 1);
        assert_eq!(0i32.q_shr(2, MinusInf), 0);
        assert_eq!(1i32.q_shr(2, MinusInf), 0);
        assert_eq!(2i32.q_shr(2, MinusInf), 0);
        assert_eq!(3i32.q_shr(2, MinusInf), 1);
        assert_eq!(4i32.q_shr(2, MinusInf), 1);
        assert_eq!(5i32.q_shr(2, MinusInf), 1);
        assert_eq!(6i32.q_shr(2, MinusInf), 1);
        assert_eq!((-1i32).q_shr(2, MinusInf), 0);
        assert_eq!((-2i32).q_shr(2, MinusInf), -1);
        assert_eq!((-3i32).q_shr(2, MinusInf), -1);
        assert_eq!((-4i32).q_shr(2, MinusInf), -1);
        assert_eq!((-5i32).q_shr(2, MinusInf), -1);
        assert_eq!((-6i32).q_shr(2, MinusInf), -2);
    }

    #[test]
    fn test_scale_rounding_minus_inf() {
        assert_eq!(0i32.q_scale(Scaler::new(0.5, MinusInf)), 0);
        assert_eq!(1i32.q_scale(Scaler::new(0.5, MinusInf)), 0);
        assert_eq!(2i32.q_scale(Scaler::new(0.5, MinusInf)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.5, MinusInf)), 1);
        assert_eq!((-1i32).q_scale(Scaler::new(0.5, MinusInf)), -1);
        assert_eq!((-2i32).q_scale(Scaler::new(0.5, MinusInf)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.5, MinusInf)), -2);
        assert_eq!(2i32.q_scale(Scaler::new(0.25, MinusInf)), 0);
        assert_eq!(3i32.q_scale(Scaler::new(0.25, MinusInf)), 1);
        assert_eq!(4i32.q_scale(Scaler::new(0.25, MinusInf)), 1);
        assert_eq!(5i32.q_scale(Scaler::new(0.25, MinusInf)), 1);
        assert_eq!(6i32.q_scale(Scaler::new(0.25, MinusInf)), 1);
        assert_eq!((-2i32).q_scale(Scaler::new(0.25, MinusInf)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.25, MinusInf)), -1);
        assert_eq!((-4i32).q_scale(Scaler::new(0.25, MinusInf)), -1);
        assert_eq!((-5i32).q_scale(Scaler::new(0.25, MinusInf)), -1);
        assert_eq!((-6i32).q_scale(Scaler::new(0.25, MinusInf)), -2);
        //assert_eq!((-9i32).q_scale(ONE_OVER_TWO_IN_Q0_30, 5, MinusInf), 0);
    }

    #[test]
    fn test_shift_rounding_even() {
        assert_eq!(0i32.q_shr(1, Even), 0);
        assert_eq!(1i32.q_shr(1, Even), 0);
        assert_eq!(2i32.q_shr(1, Even), 1);
        assert_eq!(3i32.q_shr(1, Even), 2);
        assert_eq!(0i32.q_shr(2, Even), 0);
        assert_eq!(1i32.q_shr(2, Even), 0);
        assert_eq!(2i32.q_shr(2, Even), 0);
        assert_eq!(3i32.q_shr(2, Even), 1);
        assert_eq!(4i32.q_shr(2, Even), 1);
        assert_eq!(5i32.q_shr(2, Even), 1);
        assert_eq!(6i32.q_shr(2, Even), 2);
        assert_eq!((-1i32).q_shr(2, Even), 0);
        assert_eq!((-2i32).q_shr(2, Even), 0);
        assert_eq!((-3i32).q_shr(2, Even), -1);
        assert_eq!((-4i32).q_shr(2, Even), -1);
        assert_eq!((-5i32).q_shr(2, Even), -1);
        assert_eq!((-6i32).q_shr(2, Even), -2);
    }

    #[test]
    fn test_scale_rounding_even() {
        assert_eq!(0i32.q_scale(Scaler::new(0.5, Even)), 0);
        assert_eq!(1i32.q_scale(Scaler::new(0.5, Even)), 0);
        assert_eq!(2i32.q_scale(Scaler::new(0.5, Even)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.5, Even)), 2);
        assert_eq!((-1i32).q_scale(Scaler::new(0.5, Even)), 0);
        assert_eq!((-2i32).q_scale(Scaler::new(0.5, Even)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.5, Even)), -2);
        assert_eq!(2i32.q_scale(Scaler::new(0.25, Even)), 0);
        assert_eq!(3i32.q_scale(Scaler::new(0.25, Even)), 1);
        assert_eq!(4i32.q_scale(Scaler::new(0.25, Even)), 1);
        assert_eq!(5i32.q_scale(Scaler::new(0.25, Even)), 1);
        assert_eq!(6i32.q_scale(Scaler::new(0.25, Even)), 2);
        assert_eq!((-2i32).q_scale(Scaler::new(0.25, Even)), 0);
        assert_eq!((-3i32).q_scale(Scaler::new(0.25, Even)), -1);
        assert_eq!((-4i32).q_scale(Scaler::new(0.25, Even)), -1);
        assert_eq!((-5i32).q_scale(Scaler::new(0.25, Even)), -1);
        assert_eq!((-6i32).q_scale(Scaler::new(0.25, Even)), -2);
    }

    #[test]
    fn test_shift_rounding_odd() {
        assert_eq!(0i32.q_shr(1, Odd), 0);
        assert_eq!(1i32.q_shr(1, Odd), 1);
        assert_eq!(2i32.q_shr(1, Odd), 1);
        assert_eq!(3i32.q_shr(1, Odd), 1);
        assert_eq!(0i32.q_shr(2, Odd), 0);
        assert_eq!(1i32.q_shr(2, Odd), 0);
        assert_eq!(2i32.q_shr(2, Odd), 1);
        assert_eq!(3i32.q_shr(2, Odd), 1);
        assert_eq!(4i32.q_shr(2, Odd), 1);
        assert_eq!(5i32.q_shr(2, Odd), 1);
        assert_eq!(6i32.q_shr(2, Odd), 1);
        assert_eq!((-1i32).q_shr(2, Odd), 0);
        assert_eq!((-2i32).q_shr(2, Odd), -1);
        assert_eq!((-3i32).q_shr(2, Odd), -1);
        assert_eq!((-4i32).q_shr(2, Odd), -1);
        assert_eq!((-5i32).q_shr(2, Odd), -1);
        assert_eq!((-6i32).q_shr(2, Odd), -1);
    }

    #[test]
    fn test_scale_rounding_odd() {
        assert_eq!(0i32.q_scale(Scaler::new(0.5, Odd)), 0);
        assert_eq!(1i32.q_scale(Scaler::new(0.5, Odd)), 1);
        assert_eq!(2i32.q_scale(Scaler::new(0.5, Odd)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.5, Odd)), 1);
        assert_eq!((-1i32).q_scale(Scaler::new(0.5, Odd)), -1);
        assert_eq!((-2i32).q_scale(Scaler::new(0.5, Odd)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.5, Odd)), -1);
        assert_eq!(2i32.q_scale(Scaler::new(0.25, Odd)), 1);
        assert_eq!(3i32.q_scale(Scaler::new(0.25, Odd)), 1);
        assert_eq!(4i32.q_scale(Scaler::new(0.25, Odd)), 1);
        assert_eq!(5i32.q_scale(Scaler::new(0.25, Odd)), 1);
        assert_eq!(6i32.q_scale(Scaler::new(0.25, Odd)), 1);
        assert_eq!((-2i32).q_scale(Scaler::new(0.25, Odd)), -1);
        assert_eq!((-3i32).q_scale(Scaler::new(0.25, Odd)), -1);
        assert_eq!((-4i32).q_scale(Scaler::new(0.25, Odd)), -1);
        assert_eq!((-5i32).q_scale(Scaler::new(0.25, Odd)), -1);
        assert_eq!((-6i32).q_scale(Scaler::new(0.25, Odd)), -1);
    }
}


================================================
FILE: linalg/src/generic/sigmoid.rs
================================================
#![allow(clippy::excessive_precision)]
use crate::frame::element_wise::ElementWiseKer;
use tract_data::internal::*;

pub fn ssigmoid(x: f32) -> f32 {
    const LOW: f32 = -18.6;
    const HIGH: f32 = -LOW;

    const ALPHA_13: f32 = -4.433153405e-18;
    const ALPHA_11: f32 = 1.169974371e-14;
    const ALPHA_9: f32 = -1.875289645e-11;
    const ALPHA_7: f32 = 4.257889523e-8;
    const ALPHA_5: f32 = 0.00004811817576;
    const ALPHA_3: f32 = 0.008163842030;
    const ALPHA_1: f32 = 0.2499999971;
    const BETA_6: f32 = 3.922935744e-6;
    const BETA_4: f32 = 0.001524872358;
    const BETA_2: f32 = 0.1159886749;
    const BETA_0: f32 = 1.0;

    let x = x.clamp(LOW, HIGH);

    let x2 = x * x;

    let p = ALPHA_13;
    let p = x2 * p + ALPHA_11;
    let p = x2 * p + ALPHA_9;
    let p = x2 * p + ALPHA_7;
    let p = x2 * p + ALPHA_5;
    let p = x2 * p + ALPHA_3;
    let p = x2 * p + ALPHA_1;
    let p = p * x;

    let q = BETA_6;
    let q = x2 * q + BETA_4;
    let q = x2 * q + BETA_2;
    let q = x2 * q + BETA_0;

    p / q + 0.5
}

pub fn hsigmoid(x: f16) -> f16 {
    /*
     * (x (0.249895 + x^2 (0.00400222 - 0.0000124702 x^2)))
     * /
     * (1. + 0.098734 x^2)
     */

    const LOW: f16 = f16::from_f32_const(-6.92);
    const HIGH: f16 = f16::from_f32_const(6.92);

    const ALPHA_5: f16 = f16::from_f32_const(-0.0000124702);
    const ALPHA_3: f16 = f16::from_f32_const(0.00400222);
    const ALPHA_1: f16 = f16::from_f32_const(0.249895);

    const BETA_2: f16 = f16::from_f32_const(0.098734);
    const BETA_0: f16 = f16::from_f32_const(1.0);

    let x = x.clamp(LOW, HIGH);

    let x2 = x * x;

    let p = ALPHA_5;
    let p = x2 * p + ALPHA_3;
    let p = x2 * p + ALPHA_1;
    let p = p * x;

    let q = BETA_2;
    let q = x2 * q + BETA_0;

    p / q + f16::from_f32_const(0.5)
}

#[derive(Clone, Debug)]
pub struct SSigmoid4;

impl ElementWiseKer<f32> for SSigmoid4 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn alignment_items() -> usize {
        4
    }

    fn nr() -> usize {
        4
    }

    fn run(x: &mut [f32], _: ()) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = ssigmoid(*px))
    }
}

#[derive(Clone, Debug)]
pub struct HSigmoid8;

impl ElementWiseKer<f16> for HSigmoid8 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn alignment_items() -> usize {
        4
    }

    fn nr() -> usize {
        8
    }

    fn run(x: &mut [f16], _: ()) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = hsigmoid(*px))
    }
}

#[cfg(test)]
#[macro_use]
pub mod s {
    sigmoid_frame_tests!(true, f32, crate::generic::sigmoid::SSigmoid4);
}

#[cfg(test)]
#[macro_use]
pub mod h {
    sigmoid_frame_tests!(true, tract_data::internal::f16, crate::generic::sigmoid::HSigmoid8);
}


================================================
FILE: linalg/src/generic/tanh.rs
================================================
#![allow(clippy::excessive_precision)]
use crate::frame::element_wise::ElementWiseKer;
use tract_data::internal::*;

pub fn stanh(x: f32) -> f32 {
    const LOW: f32 = -8.9;
    const HIGH: f32 = 8.9;

    const ALPHA_13: f32 = -8.488492677e-14;
    const ALPHA_11: f32 = 5.277853000e-11;
    const ALPHA_9: f32 = -2.022500419e-8;
    const ALPHA_7: f32 = 0.00001115424833;
    const ALPHA_5: f32 = 0.003103950131;
    const ALPHA_3: f32 = 0.1308400453;
    const ALPHA_1: f32 = 0.9999999934;

    const BETA_6: f32 = 0.0002546136580;
    const BETA_4: f32 = 0.02449515379;
    const BETA_2: f32 = 0.4641733162;
    const BETA_0: f32 = 1.0;

    let x = x.clamp(LOW, HIGH);

    let x2 = x * x;

    let p = ALPHA_13;
    let p = x2 * p + ALPHA_11;
    let p = x2 * p + ALPHA_9;
    let p = x2 * p + ALPHA_7;
    let p = x2 * p + ALPHA_5;
    let p = x2 * p + ALPHA_3;
    let p = x2 * p + ALPHA_1;
    let p = p * x;

    let q = BETA_6;
    let q = x2 * q + BETA_4;
    let q = x2 * q + BETA_2;
    let q = x2 * q + BETA_0;

    p / q
}

pub fn htanh(x: f16) -> f16 {
    const LOW: f16 = f16::from_f32_const(-3.84);
    const HIGH: f16 = f16::from_f32_const(3.84);

    const ALPHA_3: f16 = f16::from_f32_const(0.082654955);
    const ALPHA_1: f16 = f16::from_f32_const(0.99963124);

    const BETA_4: f16 = f16::from_f32_const(0.0065383179);
    const BETA_2: f16 = f16::from_f32_const(0.41401828);
    const BETA_0: f16 = f16::from_f32_const(1.0);

    let x = x.clamp(LOW, HIGH);

    let x2 = x * x;

    let p = ALPHA_3;
    let p = x2 * p + ALPHA_1;
    let p = p * x;

    let q = BETA_4;
    let q = x2 * q + BETA_2;
    let q = x2 * q + BETA_0;

    p / q
}

#[derive(Clone, Debug)]
pub struct STanh4;

impl ElementWiseKer<f32> for STanh4 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_items() -> usize {
        16
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn nr() -> usize {
        4
    }

    fn run(x: &mut [f32], _: ()) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = stanh(*px))
    }
}

#[cfg(test)]
#[macro_use]
pub mod s {
    tanh_frame_tests!(true, f32, crate::generic::tanh::STanh4);
}

#[derive(Clone, Debug)]
pub struct HTanh8;

impl ElementWiseKer<f16> for HTanh8 {
    fn name() -> &'static str {
        "generic"
    }

    fn alignment_items() -> usize {
        16
    }

    fn alignment_bytes() -> usize {
        16
    }

    fn nr() -> usize {
        8
    }

    fn run(x: &mut [f16], _: ()) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        x.iter_mut().for_each(|px| *px = htanh(*px))
    }
}

#[cfg(test)]
#[macro_use]
pub mod h {
    tanh_frame_tests!(true, tract_data::internal::f16, crate::generic::tanh::HTanh8);
}


================================================
FILE: linalg/src/generic/unicast.rs
================================================
pub use tract_data::internal::f16;
unicast_impl_wrap!(
    f32,
    SUnicastMul4,
    4,
    4,
    fn run(a: &mut [f32], b: &[f32]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a *= b)
    }
);

unicast_impl_wrap!(
    f16,
    HUnicastMul8,
    8,
    8,
    fn run(a: &mut [f16], b: &[f16]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a *= b)
    }
);

unicast_impl_wrap!(
    f32,
    SUnicastAdd4,
    4,
    4,
    fn run(a: &mut [f32], b: &[f32]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a += b)
    }
);

unicast_impl_wrap!(
    f16,
    HUnicastAdd8,
    8,
    8,
    fn run(a: &mut [f16], b: &[f16]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a += b)
    }
);

unicast_impl_wrap!(
    f32,
    SUnicastSub4,
    4,
    4,
    fn run(a: &mut [f32], b: &[f32]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a -= b)
    }
);

unicast_impl_wrap!(
    f16,
    HUnicastSub8,
    8,
    8,
    fn run(a: &mut [f16], b: &[f16]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a -= b)
    }
);

unicast_impl_wrap!(
    f32,
    SUnicastSubF4,
    4,
    4,
    fn run(a: &mut [f32], b: &[f32]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a = *b - *a)
    }
);

unicast_impl_wrap!(
    f16,
    HUnicastSubF8,
    8,
    8,
    fn run(a: &mut [f16], b: &[f16]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a = *b - *a)
    }
);

unicast_impl_wrap!(
    f32,
    SUnicastMin4,
    4,
    4,
    fn run(a: &mut [f32], b: &[f32]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a = a.min(*b))
    }
);

unicast_impl_wrap!(
    f16,
    HUnicastMin8,
    8,
    8,
    fn run(a: &mut [f16], b: &[f16]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a = a.min(*b))
    }
);

unicast_impl_wrap!(
    f32,
    SUnicastMax4,
    4,
    4,
    fn run(a: &mut [f32], b: &[f32]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a = a.max(*b))
    }
);

unicast_impl_wrap!(
    f16,
    HUnicastMax8,
    8,
    8,
    fn run(a: &mut [f16], b: &[f16]) {
        debug_assert!(a.len() == b.len());
        debug_assert!(a.len() % Self::nr() == 0);
        debug_assert!(a.as_ptr() as usize % Self::alignment_bytes() == 0);
        debug_assert!(b.as_ptr() as usize % Self::alignment_bytes() == 0);
        a.iter_mut().zip(b.iter()).for_each(|(a, b)| *a = a.max(*b))
    }
);

#[cfg(test)]
#[macro_use]
pub mod s {
    use super::*;
    use proptest::strategy::Strategy;
    crate::unicast_frame_tests!(true, f32, SUnicastMul4, |a, b| a * b);
    crate::unicast_frame_tests!(true, f32, SUnicastAdd4, |a, b| a + b);
    crate::unicast_frame_tests!(true, f32, SUnicastSub4, |a, b| a - b);
    crate::unicast_frame_tests!(true, f32, SUnicastSubF4, |a, b| b - a);
    crate::unicast_frame_tests!(true, f32, SUnicastMin4, |a, b| a.min(b));
    crate::unicast_frame_tests!(true, f32, SUnicastMax4, |a, b| a.max(b));
}

#[cfg(test)]
#[macro_use]
pub mod h {
    use super::*;
    use proptest::strategy::Strategy;
    crate::unicast_frame_tests!(true, f16, HUnicastMul8, |a, b| a * b);
    crate::unicast_frame_tests!(true, f16, HUnicastAdd8, |a, b| a + b);
    crate::unicast_frame_tests!(true, f16, HUnicastSub8, |a, b| a - b);
    crate::unicast_frame_tests!(true, f16, HUnicastSubF8, |a, b| b - a);
    crate::unicast_frame_tests!(true, f16, HUnicastMin8, |a, b| a.min(b));
    crate::unicast_frame_tests!(true, f16, HUnicastMax8, |a, b| a.max(b));
}


================================================
FILE: linalg/src/generic.rs
================================================
pub mod by_scalar;
pub mod erf;
pub mod leaky_relu;
pub mod lut;
pub mod mmm;
pub mod reduce;
pub mod rounding;
pub mod sigmoid;
pub mod tanh;
pub mod unicast;

use tract_data::prelude::DatumType;

use crate::by_scalar::ByScalarKer;
use crate::unicast::UnicastKer;
use crate::{BinOp, LinalgRegistry};

pub use self::by_scalar::{HMulByScalar8, SMulByScalar4};
pub use self::erf::SErf4;
pub use self::leaky_relu::{HLeakyRelu8, SLeakyRelu4};
pub use self::lut::GenericLut8;
pub use self::reduce::softmax_l2::SSoftMaxL2;
pub use self::rounding::{ScaleShiftAndRound, Scaler};
pub use self::sigmoid::{HSigmoid8, SSigmoid4};
pub use self::tanh::{HTanh8, STanh4};

pub(crate) fn register_all_unicast(registry: &mut LinalgRegistry) {
    registry.insert((BinOp::Mul, DatumType::F32), Box::new(|| unicast::SUnicastMul4::bin()));
    registry.insert((BinOp::Mul, DatumType::F16), Box::new(|| unicast::HUnicastMul8::bin()));
    registry.insert((BinOp::Add, DatumType::F32), Box::new(|| unicast::SUnicastAdd4::bin()));
    registry.insert((BinOp::Add, DatumType::F16), Box::new(|| unicast::HUnicastAdd8::bin()));
    registry.insert((BinOp::Sub, DatumType::F32), Box::new(|| unicast::SUnicastSub4::bin()));
    registry.insert((BinOp::Sub, DatumType::F16), Box::new(|| unicast::HUnicastSub8::bin()));
    registry.insert((BinOp::SubF, DatumType::F32), Box::new(|| unicast::SUnicastSubF4::bin()));
    registry.insert((BinOp::SubF, DatumType::F16), Box::new(|| unicast::HUnicastSubF8::bin()));
    registry.insert((BinOp::Min, DatumType::F32), Box::new(|| unicast::SUnicastMin4::bin()));
    registry.insert((BinOp::Min, DatumType::F16), Box::new(|| unicast::HUnicastMin8::bin()));
    registry.insert((BinOp::Max, DatumType::F32), Box::new(|| unicast::SUnicastMax4::bin()));
    registry.insert((BinOp::Max, DatumType::F16), Box::new(|| unicast::HUnicastMax8::bin()));
}

pub(crate) fn register_all_by_scalar(registry: &mut LinalgRegistry) {
    registry.insert((BinOp::Mul, DatumType::F32), Box::new(|| by_scalar::SMulByScalar4::bin()));
    registry.insert((BinOp::Mul, DatumType::F16), Box::new(|| by_scalar::HMulByScalar8::bin()));
    registry.insert((BinOp::Add, DatumType::F32), Box::new(|| by_scalar::SAddByScalar4::bin()));
    registry.insert((BinOp::Add, DatumType::F16), Box::new(|| by_scalar::HAddByScalar8::bin()));
    registry.insert((BinOp::Sub, DatumType::F32), Box::new(|| by_scalar::SSubByScalar4::bin()));
    registry.insert((BinOp::Sub, DatumType::F16), Box::new(|| by_scalar::HSubByScalar8::bin()));
    registry.insert((BinOp::SubF, DatumType::F32), Box::new(|| by_scalar::SSubFByScalar4::bin()));
    registry.insert((BinOp::SubF, DatumType::F16), Box::new(|| by_scalar::HSubFByScalar8::bin()));
    registry.insert((BinOp::Min, DatumType::F32), Box::new(|| by_scalar::SMinByScalar4::bin()));
    registry.insert((BinOp::Min, DatumType::F16), Box::new(|| by_scalar::HMinByScalar8::bin()));
    registry.insert((BinOp::Max, DatumType::F32), Box::new(|| by_scalar::SMaxByScalar4::bin()));
    registry.insert((BinOp::Max, DatumType::F16), Box::new(|| by_scalar::HMaxByScalar8::bin()));
}


================================================
FILE: linalg/src/hwbench/bandwidth.rs
================================================
use tract_data::itertools::Itertools;
use tract_data::prelude::Blob;

use super::runner;

#[cfg(target_arch = "x86_64")]
static mut HAS_AVX512: bool = false;

#[cfg(target_arch = "x86_64")]
#[inline(never)]
fn load_a_slice(slice: &[u8], loops: usize) {
    unsafe {
        if HAS_AVX512 {
            for _ in 0..loops {
                let mut ptr = slice.as_ptr();
                let end = ptr.add(slice.len());
                while ptr < end {
                    std::arch::asm!("
                vmovaps zmm0, [rsi]
                vmovaps zmm1, [rsi + 64]
                vmovaps zmm2, [rsi + 128]
                vmovaps zmm3, [rsi + 192]
                vmovaps zmm4, [rsi + 256]
                vmovaps zmm5, [rsi + 320]
                vmovaps zmm6, [rsi + 384]
                vmovaps zmm7, [rsi + 448]
                    ", inout("rsi") ptr,
                    out("zmm0") _,
                    out("zmm1") _,
                    );
                    ptr = ptr.add(512);
                }
            }
        } else {
            let mut ptr = slice.as_ptr();
            let end = ptr.add(slice.len());
            for _ in 0..loops {
                while ptr < end {
                    std::arch::asm!("
                vmovaps ymm0, [rsi]
                vmovaps ymm1, [rsi + 32]
                vmovaps ymm2, [rsi + 64]
                vmovaps ymm3, [rsi + 96]
                    ", inout("rsi") ptr,
                    out("ymm0") _,
                    out("ymm1") _,
                    out("ymm2") _,
                    out("ymm3") _,
                    );
                    ptr = ptr.add(128);
                }
            }
        }
    }
}

#[cfg(target_arch = "aarch64")]
#[inline]
fn load_a_slice(slice: &[u8], loops: usize) {
    unsafe {
        for _ in 0..loops {
            let mut ptr = slice.as_ptr();
            let end = ptr.add(slice.len());
            while ptr < end {
                std::arch::asm!("
                    ld1 {{v0.16b-v3.16b}}, [x0], #64
                    ld1 {{v4.16b-v7.16b}}, [x0], #64
                        ", inout("x0") ptr,
                out("v0") _,
                out("v1") _,
                out("v2") _,
                out("v3") _,
                out("v4") _,
                out("v5") _,
                out("v6") _,
                out("v7") _,
                );
            }
        }
    }
}

#[cfg(target_arch = "arm")]
#[inline(never)]
fn load_a_slice(slice: &[u8], loops: usize) {
    unsafe {
        for _ in 0..loops {
            let mut ptr = slice.as_ptr();
            let end = ptr.add(slice.len());
            while ptr < end {
                std::arch::asm!("
                vldmia r1!, {{q0-q3}}
                vldmia r1!, {{q4-q7}}
                    ", inout("r1") ptr,
                out("d0") _, out("d1") _, out("d2") _, out("d3") _,
                out("d4") _, out("d5") _, out("d6") _, out("d7") _,
                out("d8") _, out("d9") _, out("d10") _, out("d11") _,
                out("d12") _, out("d13") _, out("d14") _, out("d15") _,
                );
            }
        }
    }
}

fn bandwidth_seq(slice_len: usize, threads: usize) -> f64 {
    #[cfg(target_arch = "x86_64")]
    unsafe {
        HAS_AVX512 = std::is_x86_feature_detected!("avx512f");
    }
    std::thread::scope(|s| {
        let gards = (0..threads)
            .map(|_| {
                s.spawn(|| {
                    let buffer = unsafe { Blob::new_for_size_and_align(slice_len, 1024) };
                    runner::run_bench(|loops| load_a_slice(&buffer, loops))
                })
            })
            .collect_vec();
        let time = gards.into_iter().map(|t| t.join().unwrap()).sum::<f64>() / threads as f64;
        (slice_len * threads) as f64 / time
    })
}

pub fn what_is_big() -> usize {
    1024 * 1024 * if cfg!(target_arch = "arm") { 64 } else { 256 }
}

pub fn l1_bandwidth_seq(threads: usize) -> f64 {
    // [1024, 2048, 4096, 8192, 16384, 32768, 65536]
    [1024]
        .into_iter()
        .map(|slice_len| bandwidth_seq(slice_len, threads))
        .max_by_key(|x| *x as i64)
        .unwrap()
}

pub fn main_memory_bandwith_seq(threads: usize) -> f64 {
    bandwidth_seq(what_is_big(), threads)
}

#[ignore]
#[test]
fn b() {
    let max = what_is_big();
    for threads in [1, 2, 3, 4] {
        println!("Threads: {}", threads);
        for size in (0..)
            .flat_map(|po2| (0..2).map(move |f| (1024 + 512 * f) * (1 << po2)))
            .take_while(|&s| s < max)
        {
            let bw = bandwidth_seq(size, threads);
            println!(
                "threads: {threads} slice: {} KiB bandwidth: {} GiB/s",
                size as f64 / 1024.,
                (bw / (1024. * 1024. * 1024.)) as usize
            );
        }
    }
}


================================================
FILE: linalg/src/hwbench/mod.rs
================================================
pub mod runner;

#[cfg(feature = "hwbench")]
pub mod bandwidth;


================================================
FILE: linalg/src/hwbench/runner.rs
================================================
#![allow(unused_macros)]

use std::time::Duration;
use std::time::Instant;

#[macro_export]
macro_rules! r1 { ($($stat:stmt)*) => { $( $stat )* } }
#[macro_export]
macro_rules! r2 { ($($stat:stmt)*) => { $( $stat )* $( $stat )* } }
#[macro_export]
macro_rules! r4 { ($($stat:stmt)*) => { r2!(r2!($($stat)*)) }}
#[macro_export]
macro_rules! r8 { ($($stat:stmt)*) => { r2!(r4!($($stat)*)) }}
#[macro_export]
macro_rules! r16 { ($($stat:stmt)*) => { r2!(r8!($($stat)*)) }}
#[macro_export]
macro_rules! r32 { ($($stat:stmt)*) => { r2!(r16!($($stat)*)) }}
#[macro_export]
macro_rules! r64 { ($($stat:stmt)*) => { r2!(r32!($($stat)*)) }}
#[macro_export]
macro_rules! r128 { ($($stat:stmt)*) => { r2!(r64!($($stat)*)) }}
#[macro_export]
macro_rules! r256 { ($($stat:stmt)*) => { r2!(r128!($($stat)*)) }}
#[macro_export]
macro_rules! r512 { ($($stat:stmt)*) => { r2!(r256!($($stat)*)) }}
#[macro_export]
macro_rules! r1024 { ($($stat:stmt)*) => { r2!(r512!($($stat)*)) }}
#[macro_export]
macro_rules! r2048 { ($($stat:stmt)*) => { r2!(r1024!($($stat)*)) }}
#[macro_export]
macro_rules! r4096 { ($($stat:stmt)*) => { r2!(r2048!($($stat)*)) }}
#[macro_export]
macro_rules! r8192 { ($($stat:stmt)*) => { r2!(r4096!($($stat)*)) }}

#[macro_export]
macro_rules! b1 { ($($stat:stmt)*) => { nano::run_bench(|| { r1!($($stat)*); }) / 1.0 } }
#[macro_export]
macro_rules! b2 { ($($stat:stmt)*) => { nano::run_bench(|| { r2!($($stat)*); }) / 2.0 } }
#[macro_export]
macro_rules! b4 { ($($stat:stmt)*) => { nano::run_bench(|| { r4!($($stat)*); }) / 4.0 } }
#[macro_export]
macro_rules! b8 { ($($stat:stmt)*) => { nano::run_bench(|| { r8!($($stat)*); }) / 8.0 } }
#[macro_export]
macro_rules! b16 { ($($stat:stmt)*) => { nano::run_bench(|| { r16!($($stat)*); }) / 16.0 } }
#[macro_export]
macro_rules! b32 { ($($stat:stmt)*) => { nano::run_bench(|| { r32!($($stat)*); }) / 32.0 } }
#[macro_export]
macro_rules! b64 { ($($stat:stmt)*) => { nano::run_bench(|| { r64!($($stat)*); }) / 64.0 } }
#[macro_export]
macro_rules! b128 { ($($stat:stmt)*) => { nano::run_bench(|| { r128!($($stat)*); }) / 128.0 } }
#[macro_export]
macro_rules! b256 { ($($stat:stmt)*) => { nano::run_bench(|| { r256!($($stat)*); }) / 256.0 } }
#[macro_export]
macro_rules! b512 { ($($stat:stmt)*) => { nano::run_bench(|| { r512!($($stat)*); }) / 512.0 } }
#[macro_export]
macro_rules! b1024 { ($($stat:stmt)*) => { nano::run_bench(|| { r1024!($($stat)*); }) / 1024.0 } }
#[macro_export]
macro_rules! b2048 { ($($stat:stmt)*) => { nano::run_bench(|| { r2048!($($stat)*); }) / 2048.0 } }
#[macro_export]
macro_rules! b4096 { ($($stat:stmt)*) => { nano::run_bench(|| { r4096!($($stat)*); }) / 4096.0 } }
#[macro_export]
macro_rules! b8192 { ($($stat:stmt)*) => { nano::run_bench(|| { r8192!($($stat)*); }) / 8192.0 } }

#[inline]
fn black_box<T>(dummy: T) -> T {
    unsafe {
        let ret = std::ptr::read_volatile(&dummy);
        std::mem::forget(dummy);
        ret
    }
}

pub fn run_bench<T, F: FnMut(usize) -> T + Copy>(f: F) -> f64 {
    let start = Instant::now();
    let mut f = black_box(f);
    black_box(f(1));
    let once = start.elapsed();
    let evaled = if once < Duration::from_millis(1) {
        let start = Instant::now();
        black_box(f)(1000);
        start.elapsed().as_secs_f64() / 1000.
    } else {
        once.as_secs_f64()
    };
    // raw evaluation is over a second. stop right there
    if evaled > 1.0 {
        return evaled;
    }

    // we want each individual sample to run for no less than
    let minimum_sampling_time_s = 0.01;
    let minimum_samples = 25;
    let desired_bench_time = 1.0;

    let inner_loops = (minimum_sampling_time_s / evaled).max(1.0) as usize;

    let samples =
        ((desired_bench_time / (inner_loops as f64 * evaled)) as usize).max(minimum_samples);
    let warmup = (1.0 / evaled) as usize;

    // println!(
    //     "evaled: {:?} samples:{samples} inner_loops:{inner_loops} time:{}",
    //     Duration::from_secs_f64(evaled),
    //     (samples * inner_loops) as f64 * evaled
    // );
    let mut measures = vec![0.0; samples];

    black_box(f(warmup));
    for m in &mut measures {
        let start = Instant::now();
        black_box(black_box(f))(inner_loops);
        let time = start.elapsed().as_secs_f64();
        *m = time / inner_loops as f64
    }
    measures
        .sort_by(|a, b| if a < b { std::cmp::Ordering::Less } else { std::cmp::Ordering::Greater });
    let q1 = measures[samples / 4];
    let q3 = measures[samples - samples / 4];
    let iq = q3 - q1;
    measures.retain(|&x| x >= q1 - 3. * iq && x <= q3 + 3. * iq);
    measures.iter().copied().sum::<f64>() / measures.len() as f64
}


================================================
FILE: linalg/src/lib.rs
================================================
#![allow(clippy::missing_safety_doc)]
#![allow(clippy::redundant_closure_call)]
#![allow(clippy::len_zero)]
#![allow(clippy::excessive_precision)]
#![allow(clippy::approx_constant)]
#![allow(clippy::manual_is_multiple_of)]
#![allow(unexpected_cfgs)]
#![allow(unused_macros)]
#[macro_use]
extern crate derive_new;
extern crate lazy_static;
extern crate log;
extern crate num_traits;
#[macro_use]
extern crate pastey;
#[cfg(test)]
extern crate proptest;

include!(concat!(env!("OUT_DIR"), "/extern_kernel_macro.rs"));

#[macro_use]
mod frame;
pub mod generic;
pub mod multithread;
pub use frame::weights::WeightType;
pub use generic::{ScaleShiftAndRound, Scaler};
use lazy_static::lazy_static;
use mmm::{MMMInputFormat, MatMatMul, PanelExtractor};
use tract_data::internal::TensorView;
#[cfg(target_arch = "x86_64")]
pub mod x86_64_fma;

pub mod hwbench;

#[cfg(target_arch = "aarch64")]
pub mod arm64;

#[cfg(target_arch = "aarch64")]
pub use arm64::has_fp16;
use tract_itertools::Itertools;

#[cfg(not(target_arch = "aarch64"))]
pub fn has_fp16() -> bool {
    false
}

#[cfg(any(target_arch = "arm", target_arch = "armv7", target_arch = "arm"))]
pub mod arm32;

#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
pub mod wasm;

pub use self::frame::*;

use tract_data::prelude::*;

pub type MMMImpl = Box<
    dyn Fn(Option<usize>, Option<usize>, Option<usize>) -> Box<dyn mmm::MatMatMul> + Send + Sync,
>;

type MMVImpl = Box<dyn Fn(Option<usize>, Option<usize>) -> Box<dyn mmm::MatMatMul> + Send + Sync>;

#[allow(clippy::type_complexity)]
pub struct Ops {
    mmm_impls: Vec<Box<dyn mmm::MatMatMul>>,
    panel_extractors: Vec<mmm::PanelExtractor>,

    mmm_f64: MMMImpl,
    mmv_f64: MMVImpl,

    mmm_f32: MMMImpl,
    mmv_f32: MMVImpl,

    mmm_f16: MMMImpl,
    mmv_f16: MMVImpl,

    qmmm_i32: MMMImpl,
    qmmv_i32: MMVImpl,

    pub leaky_relu_f16: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f16, f16>> + Send + Sync>,
    pub leaky_relu_f32: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f32, f32>> + Send + Sync>,
    pub mul_by_scalar_f32:
        Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f32, f32>> + Send + Sync>,
    pub mul_by_scalar_f16:
        Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f16, f16>> + Send + Sync>,

    pub sigmoid_f16: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f16>> + Send + Sync>,
    pub sigmoid_f32: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f32>> + Send + Sync>,
    pub tanh_f16: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f16>> + Send + Sync>,
    pub tanh_f32: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f32>> + Send + Sync>,
    pub erf_f32: Box<dyn Fn() -> Box<dyn element_wise::ElementWise<f32>> + Send + Sync>,
    pub lut_u8: Box<dyn Fn(&[u8]) -> Box<dyn lut::Lut> + Send + Sync>,

    pub max_f16: Box<dyn Fn() -> Box<dyn reduce::Reduce<f16>> + Send + Sync>,
    pub max_f32: Box<dyn Fn() -> Box<dyn reduce::Reduce<f32>> + Send + Sync>,

    pub sum_f16: Box<dyn Fn() -> Box<dyn reduce::Reduce<f16>> + Send + Sync>,
    pub sum_f32: Box<dyn Fn() -> Box<dyn reduce::Reduce<f32>> + Send + Sync>,

    pub softmax2_fastcompact_f16:
        Box<dyn Fn() -> Box<dyn reduce::MapReduce<f16, f16>> + Send + Sync>,
    pub softmax2_fastcompact_f32:
        Box<dyn Fn() -> Box<dyn reduce::MapReduce<f32, f32>> + Send + Sync>,
}

impl Ops {
    pub fn mmm_impls(&self) -> &[Box<dyn mmm::MatMatMul>] {
        &self.mmm_impls
    }

    pub fn all_possible_packing(
        &self,
        weight_type: impl Into<WeightType>,
    ) -> impl Iterator<Item = &dyn MMMInputFormat> {
        let weight_type = weight_type.into();
        self.mmm_impls
            .iter()
            .flat_map(|m| m.packings())
            .map(|p| &*p.0)
            .flat_map(move |p| {
                let mut packs: Vec<&dyn MMMInputFormat> = vec![];
                if p.precursor() == weight_type {
                    packs.push(p)
                };
                for pe in &self.panel_extractors {
                    if pe.from.precursor() == weight_type && pe.to.dyn_eq(p) {
                        packs.push(&*pe.from);
                    }
                }
                packs.into_iter()
            })
            .sorted_by_key(|p| p.to_string())
            .dedup()
    }

    pub fn filter_impls<'o>(
        &'o self,
        weight: &'o dyn MMMInputFormat,
        acc: &[DatumType],
        act: DatumType,
        store: DatumType,
    ) -> impl Iterator<
        Item = (
            &'o dyn MatMatMul,
            usize,
            &'o dyn MMMInputFormat,
            Option<&'o PanelExtractor>,
            &'o dyn MMMInputFormat,
        ),
    > {
        let acc = acc.to_vec();
        self.mmm_impls
            .iter()
            .filter(move |mmm| acc.contains(&mmm.internal_type()) && mmm.stores().contains(&store))
            .flat_map(|mmm| {
                mmm.packings()
                    .iter()
                    .enumerate()
                    .map(|(pack_ix, (a, b))| (&**mmm, pack_ix, &**a, &**b))
            })
            .filter_map(|(mmm, ix, a, b)| {
                if a.dyn_eq(weight) {
                    Some((mmm, ix, a, None, b))
                } else {
                    self.panel_extractors
                        .iter()
                        .find(|pe| pe.from.dyn_eq(weight) && pe.to.dyn_eq(a))
                        .map(|pe| (mmm, ix, a, Some(pe), b))
                }
            })
            .filter(move |(_mmm, _ix, _a, _pe, b)| {
                b.precursor().as_dt().is_some_and(|dt| dt == act)
            })
    }

    pub fn panel_extractors(&self) -> &[mmm::panel_extract::PanelExtractor] {
        &self.panel_extractors
    }

    pub fn mmm(
        &self,
        accumulator: DatumType,
        m: Option<usize>,
        k: Option<usize>,
        n: Option<usize>,
    ) -> Option<Box<dyn mmm::MatMatMul>> {
        use DatumType::*;
        match accumulator {
            F64 => Some(if n == Some(1) { (self.mmv_f64)(m, k) } else { (self.mmm_f64)(m, k, n) }),
            F32 => Some(if n == Some(1) { (self.mmv_f32)(m, k) } else { (self.mmm_f32)(m, k, n) }),
            F16 => Some(if n == Some(1) { (self.mmv_f16)(m, k) } else { (self.mmm_f16)(m, k, n) }),
            I32 => {
                Some(if n == Some(1) { (self.qmmv_i32)(m, k) } else { (self.qmmm_i32)(m, k, n) })
            }
            _ => None,
        }
    }
}

pub fn generic() -> Ops {
    use crate::generic::mmm::*;
    use element_wise::ElementWiseKer;
    use reduce::{MapReduceKer, ReduceKer};
    let mut ops = Ops {
        mmm_impls: vec![],
        panel_extractors: vec![],
        mmm_f64: Box::new(|_, _, _| generic_f64_4x4.mmm()),
        mmv_f64: Box::new(|_, _| generic_f64_4x1.mmm()),
        mmm_f32: Box::new(|_, _, _| generic_f32_4x4.mmm()),
        mmv_f32: Box::new(|_, _| generic_f32_4x1.mmm()),
        mmm_f16: Box::new(|_, _, _| generic_f16_4x4.mmm()),
        mmv_f16: Box::new(|_, _| generic_f16_4x1.mmm()),
        qmmm_i32: Box::new(|_, _, _| generic_i32_4x4.mmm()),
        qmmv_i32: Box::new(|_, _| generic_i32_4x4.mmm()),
        leaky_relu_f16: Box::new(|| generic::HLeakyRelu8::ew()),
        leaky_relu_f32: Box::new(|| generic::SLeakyRelu4::ew()),
        mul_by_scalar_f16: Box::new(|| generic::HMulByScalar8::ew()),
        mul_by_scalar_f32: Box::new(|| generic::SMulByScalar4::ew()),
        sigmoid_f16: Box::new(|| generic::HSigmoid8::ew()),
        sigmoid_f32: Box::new(|| generic::SSigmoid4::ew()),
        tanh_f16: Box::new(|| generic::HTanh8::ew()),
        tanh_f32: Box::new(|| generic::STanh4::ew()),
        erf_f32: Box::new(|| generic::SErf4::ew()),
        lut_u8: Box::new(|table: &[u8]| Box::new(lut::LutImpl::<generic::GenericLut8>::new(table))),
        max_f16: Box::new(|| generic::reduce::max::HMax8::red()),
        max_f32: Box::new(|| generic::reduce::max::SMax4::red()),
        sum_f16: Box::new(|| generic::reduce::sum::HSum8::red()),
        sum_f32: Box::new(|| generic::reduce::sum::SSum4::red()),
        /*
        activation_f32: Box::new(|microcode| generic::SActivation::new(microcode))
        */
        softmax2_fastcompact_f16: Box::new(|| generic::reduce::softmax_l2::HSoftMaxL2::red()),
        softmax2_fastcompact_f32: Box::new(|| generic::reduce::softmax_l2::SSoftMaxL2::red()),
    };
    crate::generic::mmm::plug(&mut ops);
    ops
}

#[allow(unreachable_code, unused_mut, unexpected_cfgs)]
pub fn best() -> Ops {
    let mut ops = generic();
    #[cfg(target_arch = "x86_64")]
    x86_64_fma::plug(&mut ops);
    #[cfg(any(target_arch = "arm", target_arch = "armv7"))]
    arm32::plug(&mut ops);
    #[cfg(target_arch = "aarch64")]
    arm64::plug(&mut ops);
    #[cfg(all(target_family = "wasm", target_feature = "simd128"))]
    wasm::plug(&mut ops);

    ops
}

lazy_static::lazy_static! {
    static ref OPS: Ops = {
        best()
    };
}

#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum BinOp {
    Min,
    Max,
    Add,
    Mul,
    Sub,
    SubF,
}

impl BinOp {
    pub fn flip(&self) -> BinOp {
        use BinOp::*;
        match self {
            Sub => SubF,
            SubF => Sub,
            sym => *sym,
        }
    }
}

fn register_all_unicast(registry: &mut LinalgRegistry) {
    generic::register_all_unicast(registry);
    #[cfg(target_arch = "aarch64")]
    arm64::register_all_unicast(registry);
}

fn register_all_by_scalar(registry: &mut LinalgRegistry) {
    generic::register_all_by_scalar(registry);
    #[cfg(target_arch = "aarch64")]
    arm64::register_all_by_scalar(registry);
}

pub type LinalgFn = dyn Fn(&mut TensorView, &TensorView) -> TractResult<()> + Send + Sync;
type LinalgRegistry = HashMap<(BinOp, DatumType), Box<dyn Fn() -> Box<LinalgFn> + Send + Sync>>;
lazy_static! {
    static ref BIN_UNICAST_OPS: Mutex<LinalgRegistry> = {
        let mut registry = HashMap::default();
        register_all_unicast(&mut registry);
        Mutex::new(registry)
    };
    static ref BIN_BY_SCALAR_OPS: Mutex<LinalgRegistry> = {
        let mut registry = HashMap::default();
        register_all_by_scalar(&mut registry);
        Mutex::new(registry)
    };
}

pub fn bin_by_scalar(dt: DatumType, bin: BinOp) -> Option<Box<LinalgFn>> {
    let map = BIN_BY_SCALAR_OPS.lock().unwrap();
    if (dt == DatumType::F16) && !has_fp16() {
        return None;
    }
    map.get(&(bin, dt)).map(|it| (it)())
}

pub fn bin_unicast(dt: DatumType, bin: BinOp) -> Option<Box<LinalgFn>> {
    let map = BIN_UNICAST_OPS.lock().unwrap();
    if (dt == DatumType::F16) && !has_fp16() {
        return None;
    }
    map.get(&(bin, dt)).map(|it| (it)())
}

pub fn ops() -> &'static Ops {
    &OPS
}

use dyn_eq::DynEq;
use num_traits::*;
use std::collections::HashMap;
use std::fmt::Debug;
use std::ops::*;
use std::sync::Mutex;

pub trait LADatum:
    Sized
    + std::fmt::Display
    + Debug
    + Copy
    + Clone
    + Zero
    + One
    + 'static
    + Add<Output = Self>
    + Sub<Output = Self>
    + Mul
    + AddAssign
    + PartialOrd
    + Bounded
    + tract_data::prelude::Datum
{
    #[cfg(test)]
    fn strat() -> proptest::prelude::BoxedStrategy<Self>;
}

#[cfg(test)]
use proptest::prelude::*;

impl LADatum for f16 {
    #[cfg(test)]
    fn strat() -> BoxedStrategy<Self> {
        f32::strat().prop_map(|f| f.as_()).boxed()
    }
}

impl LADatum for f32 {
    #[cfg(test)]
    fn strat() -> BoxedStrategy<Self> {
        (-1000isize..1000).prop_map(|i| i as f32 / 1000.0).boxed()
    }
}

impl LADatum for f64 {
    #[cfg(test)]
    fn strat() -> BoxedStrategy<Self> {
        (-1000isize..1000).prop_map(|i| i as f64 / 1000.0).boxed()
    }
}

impl LADatum for u8 {
    #[cfg(test)]
    fn strat() -> BoxedStrategy<Self> {
        any::<u8>().boxed()
    }
}

impl LADatum for i8 {
    #[cfg(test)]
    fn strat() -> BoxedStrategy<Self> {
        any::<i8>().boxed()
    }
}

impl LADatum for i32 {
    #[cfg(test)]
    fn strat() -> BoxedStrategy<Self> {
        any::<i32>().boxed()
    }
}

#[cfg(test)]
#[allow(dead_code)]
fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}


================================================
FILE: linalg/src/multithread.rs
================================================
use std::cell::RefCell;
#[allow(unused_imports)]
use std::sync::{Arc, Mutex};

#[cfg(feature = "multithread-mm")]
use rayon::{ThreadPool, ThreadPoolBuilder};

#[derive(Debug, Clone, Default)]
pub enum Executor {
    #[default]
    SingleThread,
    #[cfg(feature = "multithread-mm")]
    MultiThread(Arc<ThreadPool>),
}

impl Executor {
    #[cfg(feature = "multithread-mm")]
    pub fn multithread(n: usize) -> Executor {
        Executor::multithread_with_name(n, "tract-default")
    }

    #[cfg(feature = "multithread-mm")]
    pub fn multithread_with_name(n: usize, name: &str) -> Executor {
        let name = name.to_string();
        let pool = ThreadPoolBuilder::new()
            .thread_name(move |n| format!("{name}-{n}"))
            .num_threads(n)
            .build()
            .unwrap();
        Executor::MultiThread(Arc::new(pool))
    }
}

static DEFAULT_EXECUTOR: Mutex<Executor> = Mutex::new(Executor::SingleThread);

thread_local! {
    static TLS_EXECUTOR_OVERRIDE: RefCell<Option<Executor>> = Default::default();
}

pub fn current_tract_executor() -> Executor {
    if let Some(over_ride) = TLS_EXECUTOR_OVERRIDE.with_borrow(|tls| tls.clone()) {
        over_ride
    } else {
        DEFAULT_EXECUTOR.lock().unwrap().clone()
    }
}

pub fn set_default_executor(executor: Executor) {
    *DEFAULT_EXECUTOR.lock().unwrap() = executor;
}

pub fn multithread_tract_scope<R, F: FnOnce() -> R>(pool: Executor, f: F) -> R {
    let previous = TLS_EXECUTOR_OVERRIDE.replace(Some(pool));
    let result = f();
    TLS_EXECUTOR_OVERRIDE.set(previous);
    result
}


================================================
FILE: linalg/src/wasm.rs
================================================
/// Wasm SIMD implementation of `MatMatMulKer<f32>`
///
/// To run test, you need to install `wasmtime`
/// and export the following environment variables:
/// ```
/// > export RUSTFLAGS='-C target-feature=+simd128'
/// > export CARGO_TARGET_WASM32_WASI_RUNNER=wasmtime
/// > cargo test --target=wasm32-wasi
/// ```
use crate::mmm::FusedKerSpec;
use crate::mmm::ImplementationQuality;
use crate::{Ops, Scaler};

pub fn plug(ops: &mut Ops) {
    ops.mmm_impls.push(wasm_f32_4x4.mmm());
    ops.mmm_f32 = Box::new(|_m, _k, _n| wasm_f32_4x4.mmm());
}

unsafe fn kernel_f32_4x4(mut pnl: *const FusedKerSpec<f32>) -> isize {
    use std::arch::wasm32::*;

    unsafe {
        // Each of these variables stores a row of the matrix,
        // consisting of four packed `f32` numbers.
        let mut ab0 = f32x4_splat(0.0);
        let mut ab1 = f32x4_splat(0.0);
        let mut ab2 = f32x4_splat(0.0);
        let mut ab3 = f32x4_splat(0.0);

        while !pnl.is_null() {
            match *pnl {
                FusedKerSpec::Done => break,
                FusedKerSpec::Clear => {
                    let a = f32x4_splat(0.0);
                    ab0 = a;
                    ab1 = a;
                    ab2 = a;
                    ab3 = a;
                }
                FusedKerSpec::LoadTile(_cols, rows) => {
                    let rows = rows as *const v128;
                    ab0 = *rows;
                    ab1 = *rows.add(1);
                    ab2 = *rows.add(2);
                    ab3 = *rows.add(3);
                }
                FusedKerSpec::ScalarMin(a) => {
                    let a = f32x4_splat(a);
                    ab0 = f32x4_min(a, ab0);
                    ab1 = f32x4_min(a, ab1);
                    ab2 = f32x4_min(a, ab2);
                    ab3 = f32x4_min(a, ab3);
                }
                FusedKerSpec::ScalarMax(a) => {
                    let a = f32x4_splat(a);
                    ab0 = f32x4_max(a, ab0);
                    ab1 = f32x4_max(a, ab1);
                    ab2 = f32x4_max(a, ab2);
                    ab3 = f32x4_max(a, ab3);
                }
                FusedKerSpec::ScalarAdd(a) => {
                    let a = f32x4_splat(a);
                    ab0 = f32x4_add(a, ab0);
                    ab1 = f32x4_add(a, ab1);
                    ab2 = f32x4_add(a, ab2);
                    ab3 = f32x4_add(a, ab3);
                }
                FusedKerSpec::ScalarMul(a) => {
                    let a = f32x4_splat(a);
                    ab0 = f32x4_mul(a, ab0);
                    ab1 = f32x4_mul(a, ab1);
                    ab2 = f32x4_mul(a, ab2);
                    ab3 = f32x4_mul(a, ab3);
                }
                FusedKerSpec::ScalarSub(a) => {
                    let a = f32x4_splat(a);
                    ab0 = f32x4_sub(a, ab0);
                    ab1 = f32x4_sub(a, ab1);
                    ab2 = f32x4_sub(a, ab2);
                    ab3 = f32x4_sub(a, ab3);
                }
                FusedKerSpec::ScalarSubF(a) => {
                    let a = f32x4_splat(a);
                    ab0 = f32x4_sub(ab0, a);
                    ab1 = f32x4_sub(ab1, a);
                    ab2 = f32x4_sub(ab2, a);
                    ab3 = f32x4_sub(ab3, a);
                }
                FusedKerSpec::LeakyRelu(a) => {
                    let a = f32x4_splat(a);
                    let zero = f32x4_splat(0.0);

                    let mask0 = f32x4_gt(ab0, zero);
                    ab0 = v128_bitselect(ab0, f32x4_mul(a, ab0), mask0);

                    let mask1 = f32x4_gt(ab1, zero);
                    ab1 = v128_bitselect(ab1, f32x4_mul(a, ab1), mask1);

                    let mask2 = f32x4_gt(ab2, zero);
                    ab2 = v128_bitselect(ab2, f32x4_mul(a, ab2), mask2);

                    let mask3 = f32x4_gt(ab3, zero);
                    ab3 = v128_bitselect(ab3, f32x4_mul(a, ab3), mask3);
                }
                FusedKerSpec::PerRowMin(row) => {
                    let row = std::slice::from_raw_parts(row, 4);
                    ab0 = f32x4_min(f32x4_splat(row[0]), ab0);
                    ab1 = f32x4_min(f32x4_splat(row[1]), ab1);
                    ab2 = f32x4_min(f32x4_splat(row[2]), ab2);
                    ab3 = f32x4_min(f32x4_splat(row[3]), ab3);
                }
                FusedKerSpec::PerRowMax(row) => {
                    let row = std::slice::from_raw_parts(row, 4);
                    ab0 = f32x4_max(f32x4_splat(row[0]), ab0);
                    ab1 = f32x4_max(f32x4_splat(row[1]), ab1);
                    ab2 = f32x4_max(f32x4_splat(row[2]), ab2);
                    ab3 = f32x4_max(f32x4_splat(row[3]), ab3);
                }
                FusedKerSpec::PerRowAdd(row) => {
                    let row = std::slice::from_raw_parts(row, 4);
                    ab0 = f32x4_add(f32x4_splat(row[0]), ab0);
                    ab1 = f32x4_add(f32x4_splat(row[1]), ab1);
                    ab2 = f32x4_add(f32x4_splat(row[2]), ab2);
                    ab3 = f32x4_add(f32x4_splat(row[3]), ab3);
                }
                FusedKerSpec::PerRowMul(row) => {
                    let row = std::slice::from_raw_parts(row, 4);
                    ab0 = f32x4_mul(f32x4_splat(row[0]), ab0);
                    ab1 = f32x4_mul(f32x4_splat(row[1]), ab1);
                    ab2 = f32x4_mul(f32x4_splat(row[2]), ab2);
                    ab3 = f32x4_mul(f32x4_splat(row[3]), ab3);
                }
                FusedKerSpec::PerRowSub(row) => {
                    let row = std::slice::from_raw_parts(row, 4);
                    ab0 = f32x4_sub(f32x4_splat(row[0]), ab0);
                    ab1 = f32x4_sub(f32x4_splat(row[1]), ab1);
                    ab2 = f32x4_sub(f32x4_splat(row[2]), ab2);
                    ab3 = f32x4_sub(f32x4_splat(row[3]), ab3);
                }
                FusedKerSpec::PerRowSubF(row) => {
                    let row = std::slice::from_raw_parts(row, 4);
                    ab0 = f32x4_sub(ab0, f32x4_splat(row[0]));
                    ab1 = f32x4_sub(ab1, f32x4_splat(row[1]));
                    ab2 = f32x4_sub(ab2, f32x4_splat(row[2]));
                    ab3 = f32x4_sub(ab3, f32x4_splat(row[3]));
                }
                FusedKerSpec::PerColMin(cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_min(cols, ab0);
                    ab1 = f32x4_min(cols, ab1);
                    ab2 = f32x4_min(cols, ab2);
                    ab3 = f32x4_min(cols, ab3);
                }
                FusedKerSpec::PerColMax(cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_max(cols, ab0);
                    ab1 = f32x4_max(cols, ab1);
                    ab2 = f32x4_max(cols, ab2);
                    ab3 = f32x4_max(cols, ab3);
                }
                FusedKerSpec::PerColAdd(cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_add(cols, ab0);
                    ab1 = f32x4_add(cols, ab1);
                    ab2 = f32x4_add(cols, ab2);
                    ab3 = f32x4_add(cols, ab3);
                }
                FusedKerSpec::PerColMul(cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_mul(cols, ab0);
                    ab1 = f32x4_mul(cols, ab1);
                    ab2 = f32x4_mul(cols, ab2);
                    ab3 = f32x4_mul(cols, ab3);
                }
                FusedKerSpec::PerColSub(cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_sub(cols, ab0);
                    ab1 = f32x4_sub(cols, ab1);
                    ab2 = f32x4_sub(cols, ab2);
                    ab3 = f32x4_sub(cols, ab3);
                }
                FusedKerSpec::PerColSubF(cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_sub(ab0, cols);
                    ab1 = f32x4_sub(ab1, cols);
                    ab2 = f32x4_sub(ab2, cols);
                    ab3 = f32x4_sub(ab3, cols);
                }
                FusedKerSpec::QScale(shift, rp, mult) => {
                    let scaler = Scaler::from_fuse_params(shift, rp, mult);
                    let scale = f32x4_splat(scaler.scale);
                    ab0 = f32x4_mul(scale, ab0);
                    ab1 = f32x4_mul(scale, ab1);
                    ab2 = f32x4_mul(scale, ab2);
                    ab3 = f32x4_mul(scale, ab3);
                }
                FusedKerSpec::RoundingShiftRight(shift, _rp) => {
                    let shift = f32x4_splat(2f32.powi(-(shift as i32)));
                    ab0 = f32x4_mul(shift, ab0);
                    ab1 = f32x4_mul(shift, ab1);
                    ab2 = f32x4_mul(shift, ab2);
                    ab3 = f32x4_mul(shift, ab3);
                }
                FusedKerSpec::ShiftLeft(shift) => {
                    let shift = f32x4_splat(2f32.powi(shift as i32));
                    ab0 = f32x4_mul(shift, ab0);
                    ab1 = f32x4_mul(shift, ab1);
                    ab2 = f32x4_mul(shift, ab2);
                    ab3 = f32x4_mul(shift, ab3);
                }
                FusedKerSpec::AddUnicast(tile) => {
                    let mut ptr: *const u8 = tile.ptr;

                    let m0 = *(ptr as *const f32);
                    let m1 = *(ptr.offset(tile.col_byte_stride) as *const f32);
                    let m2 = *(ptr.offset(tile.col_byte_stride * 2) as *const f32);
                    let m3 = *(ptr.offset(tile.col_byte_stride * 3) as *const f32);
                    ab0 = f32x4_add(ab0, f32x4(m0, m1, m2, m3));
                    ptr = ptr.add(tile.row_byte_stride as usize);

                    let m0 = *(ptr as *const f32);
                    let m1 = *(ptr.offset(tile.col_byte_stride) as *const f32);
                    let m2 = *(ptr.offset(tile.col_byte_stride * 2) as *const f32);
                    let m3 = *(ptr.offset(tile.col_byte_stride * 3) as *const f32);
                    ab1 = f32x4_add(ab1, f32x4(m0, m1, m2, m3));
                    ptr = ptr.add(tile.row_byte_stride as usize);

                    let m0 = *(ptr as *const f32);
                    let m1 = *(ptr.offset(tile.col_byte_stride) as *const f32);
                    let m2 = *(ptr.offset(tile.col_byte_stride * 2) as *const f32);
                    let m3 = *(ptr.offset(tile.col_byte_stride * 3) as *const f32);
                    ab2 = f32x4_add(ab2, f32x4(m0, m1, m2, m3));
                    ptr = ptr.add(tile.row_byte_stride as usize);

                    let m0 = *(ptr as *const f32);
                    let m1 = *(ptr.offset(tile.col_byte_stride) as *const f32);
                    let m2 = *(ptr.offset(tile.col_byte_stride * 2) as *const f32);
                    let m3 = *(ptr.offset(tile.col_byte_stride * 3) as *const f32);
                    ab3 = f32x4_add(ab3, f32x4(m0, m1, m2, m3));
                }
                FusedKerSpec::AddRowColProducts(rows, cols) => {
                    let cols = v128_load(cols as *const v128);
                    ab0 = f32x4_add(ab0, f32x4_mul(f32x4_splat(*rows.add(0)), cols));
                    ab1 = f32x4_add(ab1, f32x4_mul(f32x4_splat(*rows.add(1)), cols));
                    ab2 = f32x4_add(ab2, f32x4_mul(f32x4_splat(*rows.add(2)), cols));
                    ab3 = f32x4_add(ab3, f32x4_mul(f32x4_splat(*rows.add(3)), cols));
                }
                FusedKerSpec::Store(tile) => {
                    let mut ptr: *mut u8 = tile.ptr;

                    *(ptr as *mut f32) = f32x4_extract_lane::<0>(ab0);
                    *(ptr.offset(tile.col_byte_stride) as *mut f32) = f32x4_extract_lane::<1>(ab0);
                    *(ptr.offset(tile.col_byte_stride * 2) as *mut f32) =
                        f32x4_extract_lane::<2>(ab0);
                    *(ptr.offset(tile.col_byte_stride * 3) as *mut f32) =
                        f32x4_extract_lane::<3>(ab0);
                    ptr = ptr.add(tile.row_byte_stride as usize);

                    *(ptr as *mut f32) = f32x4_extract_lane::<0>(ab1);
                    *(ptr.offset(tile.col_byte_stride) as *mut f32) = f32x4_extract_lane::<1>(ab1);
                    *(ptr.offset(tile.col_byte_stride * 2) as *mut f32) =
                        f32x4_extract_lane::<2>(ab1);
                    *(ptr.offset(tile.col_byte_stride * 3) as *mut f32) =
                        f32x4_extract_lane::<3>(ab1);
                    ptr = ptr.add(tile.row_byte_stride as usize);

                    *(ptr as *mut f32) = f32x4_extract_lane::<0>(ab2);
                    *(ptr.offset(tile.col_byte_stride) as *mut f32) = f32x4_extract_lane::<1>(ab2);
                    *(ptr.offset(tile.col_byte_stride * 2) as *mut f32) =
                        f32x4_extract_lane::<2>(ab2);
                    *(ptr.offset(tile.col_byte_stride * 3) as *mut f32) =
                        f32x4_extract_lane::<3>(ab2);
                    ptr = ptr.add(tile.row_byte_stride as usize);

                    *(ptr as *mut f32) = f32x4_extract_lane::<0>(ab3);
                    *(ptr.offset(tile.col_byte_stride) as *mut f32) = f32x4_extract_lane::<1>(ab3);
                    *(ptr.offset(tile.col_byte_stride * 2) as *mut f32) =
                        f32x4_extract_lane::<2>(ab3);
                    *(ptr.offset(tile.col_byte_stride * 3) as *mut f32) =
                        f32x4_extract_lane::<3>(ab3);
                }
                FusedKerSpec::AddMatMul { k, pa, pb, packing: _ } => {
                    let a = pa as *const f32;
                    let b = pb as *const v128;
                    for i in 0..k {
                        let a = std::slice::from_raw_parts(a.offset(4 * i as isize), 4);
                        let b = v128_load(b.offset(i as isize));
                        ab0 = f32x4_add(ab0, f32x4_mul(f32x4_splat(a[0]), b));
                        ab1 = f32x4_add(ab1, f32x4_mul(f32x4_splat(a[1]), b));
                        ab2 = f32x4_add(ab2, f32x4_mul(f32x4_splat(a[2]), b));
                        ab3 = f32x4_add(ab3, f32x4_mul(f32x4_splat(a[3]), b));
                    }
                }
            }
            pnl = pnl.add(1);
        }
    }
    0
}

MMMRustKernel!(kernel_f32_4x4 => wasm_f32_4x4<f32>(4,4)@(4,4) quality(ImplementationQuality::TargetOptimized));


================================================
FILE: linalg/src/x86_64_fma/by_scalar.rs
================================================
ew_impl_wrap!(
    f32,
    x86_64_avx_f32_mul_by_scalar_32n,
    32,
    8,
    f32,
    fn run(x: &mut [f32], s: f32) {
        debug_assert!(x.len() % Self::nr() == 0);
        debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
        unsafe { x86_64_avx_f32_mul_by_scalar_32n_run(x, s) }
    }
);

#[target_feature(enable = "avx")]
unsafe fn x86_64_avx_f32_mul_by_scalar_32n_run(buf: &mut [f32], scalar: f32) {
    unsafe {
        let len = buf.len();
        let ptr = buf.as_ptr();
        std::arch::asm!("
            vbroadcastss ymm0, xmm0
            2:
                vmovaps ymm4, [{ptr}]
                vmovaps ymm5, [{ptr} + 32]
                vmovaps ymm6, [{ptr} + 64]
                vmovaps ymm7, [{ptr} + 96]
                vmulps ymm4, ymm4, ymm0
                vmulps ymm5, ymm5, ymm0
                vmulps ymm6, ymm6, ymm0
                vmulps ymm7, ymm7, ymm0
                vmovaps [{ptr}], ymm4
                vmovaps [{ptr} + 32], ymm5
                vmovaps [{ptr} + 64], ymm6
                vmovaps [{ptr} + 96], ymm7
                add {ptr}, 128
                sub {len}, 32
                jnz 2b
            ",
        len = inout(reg) len => _,
        ptr = inout(reg) ptr => _,
        in("xmm0") scalar,
        out("ymm4") _, out("ymm5") _, out("ymm6") _, out("ymm7") _
        );
    }
}

#[cfg(test)]
#[macro_use]
pub mod test_x86_64_avx_f32_mul_by_scalar_32n {
    use super::*;
    by_scalar_frame_tests!(
        is_x86_feature_detected!("avx2"),
        f32,
        x86_64_avx_f32_mul_by_scalar_32n,
        |a, b| a * b
    );
}


================================================
FILE: linalg/src/x86_64_fma/intel.rs
================================================
use crate::frame::mmm::cost_model::CostModel;
#[allow(dead_code)]
pub fn models() -> Vec<(&'static str, CostModel<'static>)> {
    vec![]
}


================================================
FILE: linalg/src/x86_64_fma/max.rs
================================================
reduce_impl_wrap!(
    f32,
    x86_64_fma_max_f32_32n,
    32,
    8,
    (),
    f32::MIN,
    #[inline(never)]
    fn run(buf: &[f32], _: ()) -> f32 {
        assert!(buf.len() % 32 == 0);
        assert!(buf.len() > 0);
        unsafe { x86_64_fma_max_f32_32n_run(buf) }
    },
    #[inline(never)]
    fn reduce_two(a: f32, b: f32) -> f32 {
        a.max(b)
    }
);

#[target_feature(enable = "avx")]
unsafe fn x86_64_fma_max_f32_32n_run(buf: &[f32]) -> f32 {
    unsafe {
        let len = buf.len();
        let ptr = buf.as_ptr();
        let mut acc = f32::MIN;
        std::arch::asm!("
            vbroadcastss ymm0, xmm0
            vmovaps ymm1, ymm0
            vmovaps ymm2, ymm0
            vmovaps ymm3, ymm0
            2:
                vmovaps ymm4, [{ptr}]
                vmovaps ymm5, [{ptr} + 32]
                vmovaps ymm6, [{ptr} + 64]
                vmovaps ymm7, [{ptr} + 96]
                vmaxps ymm0, ymm0, ymm4
                vmaxps ymm1, ymm1, ymm5
                vmaxps ymm2, ymm2, ymm6
                vmaxps ymm3, ymm3, ymm7
                add {ptr}, 128
                sub {len}, 32
                jnz 2b
            vmaxps ymm0, ymm0, ymm1
            vmaxps ymm2, ymm2, ymm3
            vmaxps ymm0, ymm0, ymm2
            vperm2f128 ymm1, ymm0, ymm0, 1      // copy second half (4xf32) of ymm0 to ymm1
            vmaxps xmm0, xmm0, xmm1             // xmm0 contains 4 values to max
            vpermilps xmm1, xmm0, 2 + (3 << 2)  // second 2x32 bit half moved to top
            vmaxps xmm0, xmm0, xmm1             // xmm0 containes 2 values
            vpermilps xmm1, xmm0, 1             // second f32 to top
            vmaxps xmm0, xmm0, xmm1
            ",
        len = inout(reg) len => _,
        ptr = inout(reg) ptr => _,
        inout("ymm0") acc,
        out("ymm1") _, out("ymm2") _, out("ymm3") _,
        out("ymm4") _, out("ymm5") _, out("ymm6") _, out("ymm7") _
        );
        acc
    }
}

#[cfg(test)]
mod test_x86_64_fma_max_f32_32n {
    use super::*;
    crate::max_frame_tests!(is_x86_feature_detected!("avx2"), f32, x86_64_fma_max_f32_32n);
}


================================================
FILE: linalg/src/x86_64_fma/mmm.rs
================================================
use crate::Ops;
use crate::block_quant::*;
use crate::mmm::ImplementationQuality::ManuallyOptimized;
use crate::pack::PackedFormat;

use super::*;

MMMExternKernel!(fma_mmm_f32_8x8 <f32>(8, 8)@(256,4) where(FMA) quality(ManuallyOptimized));
MMMExternKernel!(fma_mmm_f32_16x6<f32>(16,6)@(256,4) where(FMA) quality(ManuallyOptimized));
MMMExternKernel!(fma_mmm_f32_16x5<f32>(16,5)@(256,4) where(FMA) quality(ManuallyOptimized));
MMMExternKernel!(fma_mmm_f32_24x4<f32>(24,4)@(256,4) where(FMA) quality(ManuallyOptimized));
MMMExternKernel!(fma_mmm_f32_40x2<f32>(40,2)@(256,4) where(FMA) quality(ManuallyOptimized));
MMMExternKernel!(fma_mmm_f32_64x1<f32>(64,1)@(256,4) where(FMA) quality(ManuallyOptimized));

pub fn pq40_r32() -> PackedBlockQuantFormat {
    PackedBlockQuantFormat::new(&Q4_0, 32, 16, false)
}
MMMExternKernel! {fma_mmm_f32_32x1<f32>(32,1)@(256,4) where(FMA)
    packing[1] = q40f32 => |k| k.with_packing_a(pq40_r32());
    packing[2] = q40f16 => |k| k.with_packing(pq40_r32(), f16::packing(1));
    packing[3] = f16f16 => |k| k.with_packing(f16::packing(32), f16::packing(1));
    packing[4] = f16f32 => |k| k.with_packing(f16::packing(32), f32::packing(1));
    packing[5] = f32f16 => |k| k.with_packing(f32::packing(32), f16::packing(1));
    quality(ManuallyOptimized)
    store(f16)
}
MMMExternKernel!(fma_mmm_f32_32x3<f32>(32,3)@(256,4) where(FMA)
 packing[1] = f32f16 => |k| k.with_packing(f32::packing(32).align(256), f16::packing(3));
 packing[2] = f16f32 => |k| k.with_packing(f16::packing(32).align(256), f32::packing(3));
 packing[3] = f16f16 => |k| k.with_packing(f16::packing(32).align(256), f16::packing(3));
 quality(ManuallyOptimized)
 store(f16)
);

MMMExternKernel!(avx512_mmm_f32_128x1<f32>(128, 1)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_16x1 <f32>( 16, 1)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_16x12<f32>( 16,12)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_16x8 <f32>( 16, 8)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_32x6 <f32>( 32, 6)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_32x5 <f32>( 32, 5)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_48x4 <f32>( 48, 4)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_64x3 <f32>( 64, 3)@(512,4) where (AVX512F) quality(ManuallyOptimized));
MMMExternKernel!(avx512_mmm_f32_80x2 <f32>( 80, 2)@(512,4) where (AVX512F) quality(ManuallyOptimized));

MMMExternKernel! { avx2_mmm_i32_8x8<i32>(8,8)@(256,4) where(AVX2)
    packing[1] = i8i8 => |k| k.with_packing(PackedFormat::new(DatumType::I8, 8, 256), PackedFormat::new(DatumType::I8, 8, 4));
    quality(ManuallyOptimized)
    store(i8)
}

pub fn plug(ops: &mut Ops) {
    if is_x86_feature_detected!("avx2") {
        plug_avx2(ops);
        if is_x86_feature_detected!("fma") {
            plug_fma(ops);
            if is_x86_feature_detected!("avx512f") {
                plug_avx512f(ops);
            }
        }
    }
}

pub fn plug_avx2(ops: &mut Ops) {
    ops.mmm_impls.push(mmm::avx2_mmm_i32_8x8.mmm());
    ops.qmmm_i32 = Box::new(|_, _, _| mmm::avx2_mmm_i32_8x8.mmm());
    log::info!("qmmm_i32: x86_64/avx2 activated");
}

pub fn plug_fma(ops: &mut Ops) {
    ops.mmm_impls.extend([
        fma_mmm_f32_8x8.mmm(),
        fma_mmm_f32_16x5.mmm(),
        fma_mmm_f32_16x6.mmm(),
        fma_mmm_f32_24x4.mmm(),
        fma_mmm_f32_32x3.mmm(),
        fma_mmm_f32_40x2.mmm(),
        fma_mmm_f32_64x1.mmm(),
    ]);

    ops.mmv_f32 = Box::new(|_, _| fma_mmm_f32_64x1.mmm());

    ops.mmm_f32 = Box::new(|_, _, n| {
        if n.is_none() {
            return fma_mmm_f32_16x6.mmm();
        }

        let n = n.unwrap();

        match n {
            1 => unreachable!("should've been mmv"),
            2 => return fma_mmm_f32_40x2.mmm(),
            3 => return fma_mmm_f32_32x3.mmm(),
            4 => return fma_mmm_f32_24x4.mmm(),
            5 => return fma_mmm_f32_16x5.mmm(),
            6 => return fma_mmm_f32_16x6.mmm(),
            8 => return fma_mmm_f32_8x8.mmm(),
            _ => {}
        };

        let scaling_baseline = 60.0;
        let kernel_normalized_perf = [
            44.0 / scaling_baseline, // 8x8
            54.0 / scaling_baseline, // 2x6
            54.0 / scaling_baseline, // 2x5
            54.0 / scaling_baseline, // 3x4
            54.0 / scaling_baseline, // 4x3
            54.0 / scaling_baseline, // 5x2
        ];

        fn compute_efficiency(n: usize, kernel_width: usize, scale: f32) -> f32 {
            let kernel_width = kernel_width as f32;
            let n = n as f32;
            let batch_count = (n / kernel_width).ceil();
            let actual_count = batch_count * kernel_width;
            let multi_batch_penalty = 1.0 - batch_count / 100.0;
            n / actual_count * scale * multi_batch_penalty
        }

        let efficiencies = [
            compute_efficiency(n, 8, kernel_normalized_perf[0]),
            compute_efficiency(n, 6, kernel_normalized_perf[1]),
            compute_efficiency(n, 5, kernel_normalized_perf[2]),
            compute_efficiency(n, 4, kernel_normalized_perf[3]),
            compute_efficiency(n, 3, kernel_normalized_perf[4]),
            compute_efficiency(n, 2, kernel_normalized_perf[5]),
        ];

        let best_idx = efficiencies
            .iter()
            .copied()
            .enumerate()
            .fold((0, 0.0), |max, val| if val.1 > max.1 { val } else { max });

        match best_idx.0 {
            0 => fma_mmm_f32_8x8.mmm(),
            1 => fma_mmm_f32_16x6.mmm(),
            2 => fma_mmm_f32_16x5.mmm(),
            3 => fma_mmm_f32_24x4.mmm(),
            4 => fma_mmm_f32_32x3.mmm(),
            5 => fma_mmm_f32_40x2.mmm(),
            _ => unreachable!("not a valid index"),
        }
    });
    log::info!("mmm_f32, mmv_f32: x86_64/fma activated");

    if is_x86_feature_detected!("f16c") {
        ops.mmm_impls.push(mmm::fma_mmm_f32_32x1.mmm()); // q40f32 requires f16c
        log::info!("found f16c, added fake-f16 and q40-able kernels");
    }
}

pub fn plug_avx512f(ops: &mut Ops) {
    ops.mmm_impls.push(avx512_mmm_f32_128x1.mmm());
    ops.mmm_impls.push(avx512_mmm_f32_80x2.mmm());
    ops.mmm_impls.push(avx512_mmm_f32_48x4.mmm());
    ops.mmm_impls.push(avx512_mmm_f32_64x3.mmm());
    ops.mmm_impls.push(avx512_mmm_f32_16x12.mmm());
    ops.mmv_f32 = Box::new(|m, _k| match m {
        Some(m) if m < 31 => avx512_mmm_f32_16x1.mmm(),
        _ => avx512_mmm_f32_128x1.mmm(),
    });

    ops.mmm_f32 = Box::new(|m, _, n| match (m, n) {
        (_, Some(1)) => unreachable!("should've been mmv"),
        (_, Some(2)) => avx512_mmm_f32_80x2.mmm(),
        (Some(m), _) if m <= 16 => mmm::avx512_mmm_f32_16x12.mmm(),
        (_, Some(n)) if n % 4 == 0 && n % 3 != 0 && n < 32 => avx512_mmm_f32_48x4.mmm(),
        (_, Some(n)) if n < 32 => avx512_mmm_f32_64x3.mmm(),
        _ => avx512_mmm_f32_16x12.mmm(),
    });
    log::info!("mmm_f32, mmv_f32: x86_64/avx512f activated");
}


================================================
FILE: linalg/src/x86_64_fma/panel_extract.rs
================================================
use super::*;
use crate::Ops;
use crate::pack::{PackedFormat, Packing};
use tract_data::internal::*;

pub fn plug(ops: &mut Ops) {
    ops.panel_extractors.extend([packed_32_q40_to_f32.clone(), packed_32_f16_to_f32.clone()]);
}

panel_extractor!(kernel_packed_32_q40_to_f32 as packed_32_q40_to_f32(
    Box::new(super::mmm::pq40_r32()),
    f32::packing(32).align(32)
) where(AVX2));

panel_extractor!(kernel_packed_32_f16_to_f32 as packed_32_f16_to_f32(
    Box::new(PackedFormat::new(f16::datum_type(), 32, 32)),
    f32::packing(32).align(32)
) where(AVX2));

#[target_feature(enable = "avx2")]
unsafe fn kernel_packed_32_q40_to_f32(input: *const u8, output: *mut u8, k: usize) {
    unsafe {
        if k == 0 {
            return;
        }
        debug_assert!(k % 32 == 0);
        debug_assert!(output as usize % 32 == 0);
        std::arch::asm!("
    vbroadcastss    ymm14, dword ptr [{mask}]
    vbroadcastss    ymm13, dword ptr [{eight}]

    2:
        vmovaps         xmm4, [{i}]
        vmovaps         xmm5, [{i} + 16]
        vmovaps         xmm6, [{i} + 32]
        vmovaps         xmm7, [{i} + 48]
        vcvtph2ps       ymm4, xmm4
        vcvtph2ps       ymm5, xmm5
        vcvtph2ps       ymm6, xmm6
        vcvtph2ps       ymm7, xmm7
        add             {i}, 64

        mov {k2}, 32
    3:
        vmovaps         xmm8, [{i}]            // 32 nibbles
        vpand           xmm10, xmm8, xmm14     // 16 bytes
        vpmovzxbd       ymm9, xmm10            // 8 u32

        vpermilpd       xmm10, xmm10, 1        // swap 64bit halves
        vpmovzxbd       ymm10, xmm10           // 8 u32

        vpsrlw          xmm8, xmm8, 4
        vpand           xmm12, xmm8, xmm14      // 16 bytes
        vpmovzxbd       ymm11, xmm12            // 8 u32
        vpermilpd       xmm12, xmm12, 1         // swap 64bit halves
        vpmovzxbd       ymm12, xmm12            // 8 u32

        vpsubd          ymm9, ymm9, ymm13
        vpsubd          ymm10, ymm10, ymm13
        vpsubd          ymm11, ymm11, ymm13
        vpsubd          ymm12, ymm12, ymm13

        vcvtdq2ps       ymm9, ymm9
        vcvtdq2ps       ymm10, ymm10
        vcvtdq2ps       ymm11, ymm11
        vcvtdq2ps       ymm12, ymm12

        vmulps          ymm9, ymm9, ymm4
        vmulps          ymm10, ymm10, ymm5
        vmulps          ymm11, ymm11, ymm6
        vmulps          ymm12, ymm12, ymm7

        vmovaps         [{o}], ymm9
        vmovaps         [{o}+32], ymm10
        vmovaps         [{o}+64], ymm11
        vmovaps         [{o}+96], ymm12

        add             {i}, 16
        add             {o}, 128
        sub             {k2}, 1
        jnz             3b

        sub {k}, 32
        jnz 2b;
            ",
        mask = in(reg) &0x0F0F0F0F,
        eight = in(reg) &0x08,
        k = inout(reg) k => _,
        k2 = out(reg) _,
        i = inout(reg) input => _,
        o = inout(reg) output => _,
        out("ymm0") _, out("ymm1") _, out("ymm2") _, out("ymm3") _,
        out("ymm4") _, out("ymm5") _, out("ymm6") _, out("ymm7") _,
        out("ymm8") _, out("ymm9") _, out("ymm10") _, out("ymm11") _,
        out("ymm12") _, out("ymm13") _, out("ymm14") _, out("ymm15") _
        );
    }
}

#[target_feature(enable = "avx2")]
unsafe fn kernel_packed_32_f16_to_f32(input: *const u8, output: *mut u8, k: usize) {
    unsafe {
        if k == 0 {
            return;
        }
        debug_assert!(output as usize % 32 == 0);
        std::arch::asm!("
    2:
        vmovaps         xmm4, [{i}]
        vmovaps         xmm5, [{i} + 16]
        vmovaps         xmm6, [{i} + 32]
        vmovaps         xmm7, [{i} + 48]

        vcvtph2ps       ymm4, xmm4
        vcvtph2ps       ymm5, xmm5
        vcvtph2ps       ymm6, xmm6
        vcvtph2ps       ymm7, xmm7

        vmovaps         [{o}], ymm4
        vmovaps         [{o}+32], ymm5
        vmovaps         [{o}+64], ymm6
        vmovaps         [{o}+96], ymm7

        add             {i}, 64
        add             {o}, 128

        sub {k}, 1
        jnz 2b;
            ",
        k = inout(reg) k => _,
        i = inout(reg) input => _,
        o = inout(reg) output => _,
        out("ymm4") _, out("ymm5") _, out("ymm6") _, out("ymm7") _,
        );
    }
}


================================================
FILE: linalg/src/x86_64_fma/softmax.rs
================================================
map_reduce_impl_wrap!(
    f32,
    x86_64_fma_softmax2_fastcompact_f32_32n,
    32,
    8,
    f32,
    f32::MIN,
    0f32,
    #[inline(never)]
    fn run(buf: &mut [f32], max: f32) -> f32 {
        assert!(buf.len() % 32 == 0);
        assert!(buf.len() > 0);
        unsafe { x86_64_fma_softmax2_fastcompact_f32_32n_run(buf, max) }
    },
    #[inline(never)]
    fn reduce_two(a: f32, b: f32) -> f32 {
        a + b
    }
);

#[target_feature(enable = "avx,fma")]
unsafe fn x86_64_fma_softmax2_fastcompact_f32_32n_run(buf: &mut [f32], max: f32) -> f32 {
    unsafe {
        let len = buf.len();
        let ptr = buf.as_ptr();
        let mut acc = 0f32;
        const MLN2: f32 = 0.6931471805f32;
        const A: f32 = 8388608.0f32;
        const B: f32 = 1065353216.0f32;
        const C: f32 = 60801.0f32;
        const SLOPE: f32 = A / MLN2;
        const OFFSET: f32 = B - C;
        std::arch::asm!("
            vbroadcastss ymm0, xmm0
            vmovaps ymm1, ymm0
            vmovaps ymm2, ymm0
            vmovaps ymm3, ymm0

            vpxor   ymm12, ymm12, ymm12
            vbroadcastss ymm13, xmm13
            vbroadcastss ymm14, xmm14
            vbroadcastss ymm15, xmm15
            2:
                vmovaps ymm4, [{ptr}]
                vmovaps ymm5, [{ptr} + 32]
                vmovaps ymm6, [{ptr} + 64]
                vmovaps ymm7, [{ptr} + 96]

                vsubps ymm4, ymm4, ymm13
                vsubps ymm5, ymm5, ymm13
                vsubps ymm6, ymm6, ymm13
                vsubps ymm7, ymm7, ymm13

                vmovaps ymm8, ymm15
                vmovaps ymm9, ymm15
                vmovaps ymm10, ymm15
                vmovaps ymm11, ymm15

                vfmadd231ps ymm8, ymm4, ymm14
                vfmadd231ps ymm9, ymm5, ymm14
                vfmadd231ps ymm10, ymm6, ymm14
                vfmadd231ps ymm11, ymm7, ymm14

                vmaxps ymm8, ymm8, ymm12
                vmaxps ymm9, ymm9, ymm12
                vmaxps ymm10, ymm10, ymm12
                vmaxps ymm11, ymm11, ymm12

                vcvttps2dq ymm8, ymm8
                vcvttps2dq ymm9, ymm9
                vcvttps2dq ymm10, ymm10
                vcvttps2dq ymm11, ymm11

                vmovaps [{ptr}]     , ymm8
                vmovaps [{ptr} + 32], ymm9
                vmovaps [{ptr} + 64], ymm10
                vmovaps [{ptr} + 96], ymm11

                vaddps ymm0, ymm0, ymm8
                vaddps ymm1, ymm1, ymm9
                vaddps ymm2, ymm2, ymm10
                vaddps ymm3, ymm3, ymm11

                add {ptr}, 128
                sub {len}, 32
                jnz 2b

            vaddps ymm0, ymm0, ymm1
            vaddps ymm2, ymm2, ymm3
            vaddps ymm0, ymm0, ymm2
            vperm2f128 ymm1, ymm0, ymm0, 1
            vaddps xmm0, xmm0, xmm1
            vpermilps xmm1, xmm0, 2 + (3 << 2)
            vaddps xmm0, xmm0, xmm1
            vpermilps xmm1, xmm0, 1
            vaddps xmm0, xmm0, xmm1
            ",
        len = inout(reg) len => _,
        ptr = inout(reg) ptr => _,
        inout("ymm0") acc,
        out("ymm1") _, out("ymm2") _, out("ymm3") _,
        out("ymm4") _, out("ymm5") _, out("ymm6") _, out("ymm7") _,
        out("ymm8") _, out("ymm9") _, out("ymm10") _, out("ymm11") _,
        out("ymm12") _,
        inout("ymm13") max => _,
        inout("ymm14") SLOPE => _,
        inout("ymm15") OFFSET => _,
        );
        acc
    }
}

#[cfg(test)]
mod test_x86_64_fma_softmax2_fastcompact_f32_32n {
    use super::*;
    crate::softmax_l2_frame_tests!(
        is_x86_feature_detected!("fma"),
        f32,
        x86_64_fma_softmax2_fastcompact_f32_32n
    );
}


================================================
FILE: linalg/src/x86_64_fma.rs
================================================
use crate::Ops;
use crate::frame::element_wise::ElementWiseKer;
use crate::frame::reduce::{MapReduceKer, ReduceKer};
use crate::x86_64_fma::softmax::x86_64_fma_softmax2_fastcompact_f32_32n;

pub mod mmm;

pub mod by_scalar;
mod intel;
pub mod max;
pub mod panel_extract;
pub mod softmax;

const AVX2: fn() -> bool = || is_x86_feature_detected!("avx2");
const FMA: fn() -> bool = || is_x86_feature_detected!("fma");
const AVX512F: fn() -> bool = || is_x86_feature_detected!("avx512f");

tanh_impl!(f32, fma_tanh_f32, 8, 8, is_x86_feature_detected!("fma"));
sigmoid_impl!(f32, fma_sigmoid_f32, 8, 8, is_x86_feature_detected!("fma"));

fn plug_avx2(_ops: &mut Ops) {}

fn plug_fma(ops: &mut Ops) {
    panel_extract::plug(ops);

    ops.sigmoid_f32 = Box::new(|| fma_sigmoid_f32::ew());
    ops.tanh_f32 = Box::new(|| fma_tanh_f32::ew());

    ops.mul_by_scalar_f32 = Box::new(|| by_scalar::x86_64_avx_f32_mul_by_scalar_32n::ew());
    ops.max_f32 = Box::new(|| max::x86_64_fma_max_f32_32n::red());
    ops.softmax2_fastcompact_f32 = Box::new(|| x86_64_fma_softmax2_fastcompact_f32_32n::red());

    log::info!("sigmoid_f32, tanh_f32: x86_64/fma activated");
}

fn plug_avx512f(_ops: &mut Ops) {}

pub fn plug(ops: &mut Ops) {
    mmm::plug(ops);
    if is_x86_feature_detected!("avx2") {
        plug_avx2(ops);
        if is_x86_feature_detected!("fma") {
            plug_fma(ops);
            if is_x86_feature_detected!("avx512f") {
                plug_avx512f(ops);
            }
        }
    }
}


================================================
FILE: linalg/tests/virtual_im2col.rs
================================================
use std::alloc::Layout;
use std::fmt::Display;

use DatumType::F32;
use proptest::arbitrary::Arbitrary;
use proptest::prelude::*;
use proptest::strategy::{BoxedStrategy, Strategy};
use tract_data::internal::*;
use tract_linalg::WeightType;
use tract_linalg::mmm::FusedSpec;
use tract_linalg::mmm::{AsInputValue, EagerPackedInput, MMMInputFormat, MMMInputValue};
use tract_linalg::pack::{PackedFormat, PackingWriter};

proptest::proptest! {
    #[test]
    fn prop(pb in any::<ConvProblem>()) {
        pb.check()
    }
}

#[test]
fn test1() {
    ConvProblem {
        lazy_im2col: false,
        input: tensor3(&[[[1f32]]]),
        filters: tensor4(&[[[[-1f32]]]]),
    }
    .check()
}

#[test]
fn test_axes_0() {
    // CHW HWIO CHW
    // 121 1112 221
    ConvProblem {
        lazy_im2col: false,
        input: tensor3(&[[[0f32], [-1.0]]]),
        filters: tensor4(&[[[[0f32, -1f32]]]]),
    }
    .check()
}

#[test]
fn test_axes_1() {
    ConvProblem {
        lazy_im2col: false,
        input: tensor3(&[[[0f32, 1.]]]),
        filters: tensor4(&[[[[1f32]]]]),
    }
    .check()
}

#[test]
fn test_lazy_0() {
    ConvProblem { lazy_im2col: true, input: tensor3(&[[[1f32]]]), filters: tensor4(&[[[[1f32]]]]) }
        .check()
}

#[test]
fn test_lazy_1() {
    ConvProblem {
        lazy_im2col: true,
        input: tensor3(&[[[0f32], [0.], [0.]]]),
        filters: tensor4(&[[[[0f32]]]]),
    }
    .check()
}

#[test]
fn test_lazy_2() {
    ConvProblem {
        lazy_im2col: true,
        input: tensor3(&[[[0f32, 0.], [0., 1.]]]),
        filters: tensor4(&[[[[0f32]], [[1.]]]]),
    }
    .check()
}

#[test]
fn test_lazy_3() {
    // CHW HWIO CHW
    // 212 1221 111
    // im2col: k=4, n=1, k <- kh, kw, c
    // 0 X X X X kh=0, kw=0, c=0
    // 1 X X X X kh=0, kw=0, c=1
    // 0 X X X X kh=0, kw=1, c=0
    // 0 X X X X kh=0, kw=1, c=1
    ConvProblem {
        lazy_im2col: true,
        input: tensor3(&[[[0f32, 0.]], [[1., 0.]]]),
        filters: tensor4(&[[[[0f32], [0.]], [[1.], [0.]]]]),
    }
    .check()
}

#[test]
fn test_eager_asan_0() {
    ConvProblem {
        lazy_im2col: false,
        input: tensor(vec![3, 3, 5]),
        filters: tensor(vec![3, 3, 3, 1]),
    }
    .check()
}

// 2D valid, no group, no dil, no stride, HWIO, CHW
#[derive(Clone, Debug)]
pub struct ConvProblem {
    pub lazy_im2col: bool,
    pub input: Tensor,
    pub filters: Tensor,
}

fn mknhw(filters: &[usize], input: &[usize]) -> (usize, usize, usize, usize, usize) {
    let m = filters[3];
    let k = filters[0..3].iter().product::<usize>();
    let h = input[1] - filters[0] + 1;
    let w = input[2] - filters[1] + 1;
    let n = h * w;
    (m, k, n, h, w)
}

impl ConvProblem {
    fn reference(&self) -> Tensor {
        let (m, _, _, h, w) = mknhw(self.filters.shape(), self.input.shape());
        let output_shape = [m, h, w];
        let mut output = Tensor::zero::<f32>(&output_shape).unwrap();
        let mut output_plain = output.try_as_plain_mut().unwrap();
        let mut output_view = output_plain.to_array_view_mut::<f32>().unwrap();
        let input_view = self.input.to_plain_array_view::<f32>().unwrap();
        let filters_view = self.filters.to_plain_array_view::<f32>().unwrap();
        for geo_out in tract_ndarray::indices(&output_shape[1..]) {
            for ker_geo in tract_ndarray::indices(&self.filters.shape()[0..2]) {
                for ci in 0..self.filters.shape()[2] {
                    for co in 0..self.filters.shape()[3] {
                        let output_coord = [co, geo_out[0], geo_out[1]];
                        let input_coord = [ci, geo_out[0] + ker_geo[0], geo_out[1] + ker_geo[1]];
                        let ker_coord = [ker_geo[0], ker_geo[1], ci, co];
                        output_view[output_coord] +=
                            filters_view[ker_coord] * input_view[input_coord];
                    }
                }
            }
        }
        output
    }

    pub fn tract(&self) -> TractResult<Tensor> {
        let (m, k, n, h, w) = mknhw(self.filters.shape(), self.input.shape());
        let output_shape = [m, h, w];
        let internal_output_shape = [m, h * w];
        let mmm = tract_linalg::ops().mmm(F32, Some(m), Some(k), Some(n)).unwrap();
        let output = Tensor::zero::<f32>(&internal_output_shape)?;
        let reshaped_filters = self.filters.clone().into_shape(&[k, m])?;
        let (a_pack, b_pack) = &mmm.packings()[0];
        let a = a_pack.prepare_one(&reshaped_filters, 0, 1)?;
        unsafe {
            let im2col: Box<dyn MMMInputValue> = if self.lazy_im2col {
                LazyIm2colSpec {
                    full_kernel_shape: self.filters.shape().into(),
                    packer: b_pack.downcast_ref::<PackedFormat>().unwrap().clone(),
                }
                .wrap(&self.input.view())
            } else {
                EagerIm2colSpec {
                    full_kernel_shape: self.filters.shape().into(),
                    packer: b_pack.downcast_ref::<PackedFormat>().unwrap().clone(),
                }
                .wrap(&self.input.view())
            };
            let c_store = mmm.c_view(Some(0), Some(1)).wrap(&output.view());
            mmm.run(
                m,
                n,
                &[
                    FusedSpec::AddMatMul {
                        a: AsInputValue::Owned(a),
                        b: AsInputValue::Owned(im2col),
                        packing: 0,
                    },
                    FusedSpec::Store(c_store),
                ],
            )
            .unwrap()
        }
        output.into_shape(&output_shape)
    }

    fn check(&self) {
        let expected = self.reference();
        let found = self.tract().unwrap();
        if found.close_enough(&expected, true).is_err() {
            println!("found: ");
            println!("{:?}", found.to_plain_array_view::<f32>().unwrap());
            println!("expected: ");
            println!("{:?}", expected.to_plain_array_view::<f32>().unwrap());
        }
        found.close_enough(&expected, true).unwrap()
    }
}

impl Arbitrary for ConvProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;
    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
        (any::<bool>(), 1..4usize, 1..4usize, 1..4usize, 1..4usize, 0..3usize, 0..3usize)
            .prop_map(|(eager_im2col, h, w, i, o, extra_h, extra_w)| {
                let filters = tensor(vec![h, w, i, o]);
                let input = tensor(vec![i, h + extra_h, w + extra_w]);
                ConvProblem { lazy_im2col: eager_im2col, filters, input }
            })
            .boxed()
    }
}

fn tensor(shape: Vec<usize>) -> Tensor {
    let mut tensor = Tensor::zero::<f32>(&shape).unwrap();
    tensor
        .try_as_plain_mut()
        .unwrap()
        .as_slice_mut::<f32>()
        .unwrap()
        .iter_mut()
        .enumerate()
        .for_each(|(ix, x)| *x = ix as f32);
    tensor
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct EagerIm2colSpec {
    packer: PackedFormat,
    full_kernel_shape: TVec<usize>,
}

impl EagerIm2colSpec {
    fn wrap(&self, input: &TensorView) -> Box<dyn MMMInputValue> {
        let (_, k, n, h, w) = mknhw(&self.full_kernel_shape, input.shape());
        // let input = input.to_array_view::<f32>().unwrap();
        let ci = input.shape()[0];
        let kh = self.full_kernel_shape[0];
        let kw = self.full_kernel_shape[1];
        let im2col = tract_ndarray::Array5::<f32>::from_shape_fn(
            [kh, kw, ci, h, w],
            |(kh, kw, ci, h, w)| *input.at([ci, h + kh, w + kw]).unwrap(),
        )
        .into_shape_with_order([k, n])
        .unwrap();
        Box::new(EagerIm2col { im2col: im2col.into_tensor(), packer: self.packer.clone(), k })
    }
}

impl Display for EagerIm2colSpec {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "EagerIm2colSpec")
    }
}

impl MMMInputFormat for EagerIm2colSpec {
    fn prepare_tensor(&self, _t: &Tensor, _k_axis: usize, _mn_axis: usize) -> TractResult<Tensor> {
        todo!();
    }

    fn precursor(&self) -> WeightType {
        WeightType::Plain(f32::datum_type())
    }

    fn k_alignment(&self) -> usize {
        1
    }

    fn r(&self) -> usize {
        self.packer.r()
    }

    fn mem_size(&self, _k: TDim, _mn: TDim) -> TDim {
        unimplemented!()
    }

    fn extract_at_mn_f16(
        &self,
        _data: &EagerPackedInput,
        _mn: usize,
        _slice: &mut [f16],
    ) -> TractResult<()> {
        todo!();
    }

    fn extract_at_mn_f32(
        &self,
        _data: &EagerPackedInput,
        _mn: usize,
        _slice: &mut [f32],
    ) -> TractResult<()> {
        todo!();
    }

    fn prepare_one(
        &self,
        _t: &Tensor,
        _k_axis: usize,
        _mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        todo!()
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct EagerIm2col {
    packer: PackedFormat,
    im2col: Tensor,
    k: usize,
}

impl Display for EagerIm2col {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "eager")
    }
}

impl MMMInputValue for EagerIm2col {
    fn scratch_panel_buffer_layout(&self) -> Option<std::alloc::Layout> {
        Some(
            Layout::from_size_align(
                self.packer.single_panel_len(self.k) * f32::datum_type().size_of(),
                self.packer.alignment(),
            )
            .unwrap(),
        )
    }

    fn panel_bytes(&self, i: usize, buffer: Option<*mut u8>) -> TractResult<*const u8> {
        let buffer = buffer.unwrap();
        let mn = self.im2col.shape()[1];
        unsafe {
            self.packer.pack_t::<f32>(
                buffer as _,
                self.im2col.as_ptr().unwrap(),
                mn,
                mn as isize,
                1,
                0..self.k,
                (i * self.packer.r)..((i + 1) * self.packer.r),
            );
        }
        Ok(buffer)
    }

    fn k(&self) -> usize {
        self.k
    }

    fn mn(&self) -> usize {
        self.im2col.shape()[1]
    }

    fn format(&self) -> &dyn tract_linalg::mmm::MMMInputFormat {
        &self.packer
    }

    fn exotic_fact(&self) -> &dyn ExoticFact {
        unimplemented!()
    }

    fn extract_at_mn_f16(&self, _mn: usize, _slice: &mut [f16]) -> TractResult<()> {
        unimplemented!()
    }

    fn extract_at_mn_f32(&self, _mn: usize, _slice: &mut [f32]) -> TractResult<()> {
        unimplemented!()
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct LazyIm2colSpec {
    packer: PackedFormat,
    full_kernel_shape: TVec<usize>,
}

impl LazyIm2colSpec {
    fn wrap(&self, input: &TensorView) -> Box<dyn MMMInputValue> {
        let (_, _, _, h, w) = mknhw(&self.full_kernel_shape, input.shape());
        let kh = self.full_kernel_shape[0];
        let kw = self.full_kernel_shape[1];
        let ci = self.full_kernel_shape[2];
        let input_strides = input.strides();
        let k_offsets = (0..kh as isize)
            .flat_map(|kh| {
                (0..kw as isize).flat_map(move |kw| {
                    (0..ci as isize).map(move |ci| {
                        ci * input_strides[0] + kh * input_strides[1] + kw * input_strides[2]
                    })
                })
            })
            .collect();
        let n_offsets = (0..h as isize)
            .flat_map(|h| (0..w as isize).map(move |w| h * input_strides[1] + w * input_strides[2]))
            .collect();
        unsafe {
            Box::new(LazyIm2col {
                spec: self.clone(),
                image: input.as_ptr_unchecked(),
                k_offsets,
                n_offsets,
                packer: self.packer.clone(),
            })
        }
    }
}

impl Display for LazyIm2colSpec {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "LazyIm2colSpec")
    }
}

impl MMMInputFormat for LazyIm2colSpec {
    fn prepare_tensor(&self, _t: &Tensor, _k_axis: usize, _mn_axis: usize) -> TractResult<Tensor> {
        todo!();
    }
    fn prepare_one(
        &self,
        _t: &Tensor,
        _k_axis: usize,
        _mn_axis: usize,
    ) -> TractResult<Box<dyn MMMInputValue>> {
        todo!();
    }

    fn precursor(&self) -> WeightType {
        WeightType::Plain(f32::datum_type())
    }

    fn k_alignment(&self) -> usize {
        1
    }

    fn r(&self) -> usize {
        self.packer.r()
    }

    fn mem_size(&self, _k: TDim, _mn: TDim) -> TDim {
        unimplemented!()
    }

    fn extract_at_mn_f16(
        &self,
        _data: &EagerPackedInput,
        _mn: usize,
        _slice: &mut [f16],
    ) -> TractResult<()> {
        todo!();
    }

    fn extract_at_mn_f32(
        &self,
        _data: &EagerPackedInput,
        _mn: usize,
        _slice: &mut [f32],
    ) -> TractResult<()> {
        todo!();
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct LazyIm2col {
    spec: LazyIm2colSpec,
    packer: PackedFormat,
    image: *const f32,
    n_offsets: Vec<isize>,
    k_offsets: Vec<isize>,
}
unsafe impl Send for LazyIm2col {}
unsafe impl Sync for LazyIm2col {}

impl Display for LazyIm2col {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "lazy")
    }
}

impl MMMInputValue for LazyIm2col {
    fn scratch_panel_buffer_layout(&self) -> Option<std::alloc::Layout> {
        Some(
            Layout::from_size_align(
                self.packer.single_panel_len(self.k_offsets.len() * f32::datum_type().size_of()),
                self.packer.alignment(),
            )
            .unwrap(),
        )
    }

    fn panel_bytes(&self, i: usize, buffer: Option<*mut u8>) -> TractResult<*const u8> {
        let buffer = buffer.unwrap() as *mut f32;
        let mn_end = ((i + 1) * self.packer.r).min(self.n_offsets.len());
        let n_range = (i * self.packer.r)..mn_end;
        let k = self.k_offsets.len();
        unsafe {
            let mut writer = self.packer.write_with_k_outer(buffer, k, n_range.len());
            for k in 0..k {
                for n in n_range.clone() {
                    writer.write(
                        *self.image.offset(
                            self.n_offsets.get_unchecked(n) + self.k_offsets.get_unchecked(k),
                        ),
                    )
                }
            }
        }
        Ok(buffer as _)
    }

    fn k(&self) -> usize {
        self.k_offsets.len()
    }

    fn mn(&self) -> usize {
        self.n_offsets.len()
    }

    fn format(&self) -> &dyn MMMInputFormat {
        &self.spec
    }

    fn exotic_fact(&self) -> &dyn ExoticFact {
        unimplemented!()
    }

    fn extract_at_mn_f16(&self, _mn: usize, _slice: &mut [f16]) -> TractResult<()> {
        unimplemented!()
    }

    fn extract_at_mn_f32(&self, _mn: usize, _slice: &mut [f32]) -> TractResult<()> {
        unimplemented!()
    }
}


================================================
FILE: linalg/x86_64/avx512/10x1/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 10x1
	// Accumulators: 0-9
	// Col regs: 10-19
	// Row regs: 20, 21

	vbroadcastss    zmm20,  dword ptr [rcx]

	vmovaps         zmm10, [rax + 0]
	vmovaps         zmm11, [rax + 64]
    vmovaps         zmm12, [rax + 128]
	vmovaps         zmm13, [rax + 192]
    vmovaps         zmm14, [rax + 256]

    vfmadd231ps     zmm0, zmm10, zmm20
    vfmadd231ps     zmm1, zmm11, zmm20
    vfmadd231ps     zmm2, zmm12, zmm20
    vfmadd231ps     zmm3, zmm13, zmm20
    vfmadd231ps     zmm4, zmm14, zmm20

	vmovaps         zmm15, [rax + 320]
    vmovaps         zmm16, [rax + 384]
	vmovaps         zmm17, [rax + 448]
	vmovaps         zmm18, [rax + 512]
	vmovaps         zmm19, [rax + 576]

    vfmadd231ps     zmm5, zmm10, zmm20
    vfmadd231ps     zmm6, zmm11, zmm20
    vfmadd231ps     zmm7, zmm12, zmm20
    vfmadd231ps     zmm8, zmm13, zmm20
    vfmadd231ps     zmm9, zmm14, zmm20

	vbroadcastss    zmm21,  dword ptr [rcx + 4]

	vmovaps         zmm10, [rax + 640]
	vmovaps         zmm11, [rax + 704]
    vmovaps         zmm12, [rax + 768]
	vmovaps         zmm13, [rax + 832]
    vmovaps         zmm14, [rax + 896]

	vfmadd231ps     zmm0, zmm10, zmm21
    vfmadd231ps     zmm1, zmm11, zmm21
    vfmadd231ps     zmm2, zmm12, zmm21
    vfmadd231ps     zmm3, zmm13, zmm21
    vfmadd231ps     zmm4, zmm14, zmm21

	vmovaps         zmm15, [rax + 960]
    vmovaps         zmm16, [rax + 1024]
	vmovaps         zmm17, [rax + 1088]
	vmovaps         zmm18, [rax + 1152]
	vmovaps         zmm19, [rax + 1216]

    vfmadd231ps     zmm5, zmm10, zmm21
    vfmadd231ps     zmm6, zmm11, zmm21
    vfmadd231ps     zmm7, zmm12, zmm21
    vfmadd231ps     zmm8, zmm13, zmm21
    vfmadd231ps     zmm9, zmm14, zmm21

    add rcx, 8
	add rax, 1280


================================================
FILE: linalg/x86_64/avx512/10x1/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 10x1
	// Accumulators: 0-9
	// Col regs: 10-19
	// Row regs: 20

	vbroadcastss    zmm20,  dword ptr [rcx]

	vmovaps         zmm10, [rax + 0]
	vmovaps         zmm11, [rax + 64]
    vmovaps         zmm12, [rax + 128]
	vmovaps         zmm13, [rax + 192]
    vmovaps         zmm14, [rax + 256]

    vfmadd231ps     zmm0, zmm10, zmm20
    vfmadd231ps     zmm1, zmm11, zmm20
    vfmadd231ps     zmm2, zmm12, zmm20
    vfmadd231ps     zmm3, zmm13, zmm20
    vfmadd231ps     zmm4, zmm14, zmm20

	vmovaps         zmm15, [rax + 320]
    vmovaps         zmm16, [rax + 384]
	vmovaps         zmm17, [rax + 448]
	vmovaps         zmm18, [rax + 512]
	vmovaps         zmm19, [rax + 576]

    vfmadd231ps     zmm5, zmm10, zmm20
    vfmadd231ps     zmm6, zmm11, zmm20
    vfmadd231ps     zmm7, zmm12, zmm20
    vfmadd231ps     zmm8, zmm13, zmm20
    vfmadd231ps     zmm9, zmm14, zmm20

    add rcx, 4
	add rax, 320


================================================
FILE: linalg/x86_64/avx512/1x1/packed_packed_loop1/avx-512.tmpli
================================================
	vbroadcastss    zmm15, dword ptr [rcx]

    vmovups         zmm8, [rax]
    vfmadd231ps     zmm0, zmm15, zmm8

    add rcx, 4
	add rax, 64


================================================
FILE: linalg/x86_64/avx512/1x1/packed_packed_loop1/unroll-16.tmpli
================================================
	vmovups    zmm31, [rcx]
	// vbroadcastss    zmm17, [rcx + 4 * 0]
	// vbroadcastss    zmm18, [rcx + 4 * 1]
	// vbroadcastss    zmm19, [rcx + 4 * 2]
	// vbroadcastss    zmm20, [rcx + 4 * 3]
	// vbroadcastss    zmm21, [rcx + 4 * 4]
	// vbroadcastss    zmm22, [rcx + 4 * 5]
	// vbroadcastss    zmm23, [rcx + 4 * 6]
	// vbroadcastss    zmm24, [rcx + 4 * 7]
	// vbroadcastss    zmm25, [rcx + 4 * 8]
	// vbroadcastss    zmm26, [rcx + 4 * 9]
	// vbroadcastss    zmm27, [rcx + 4 * 10]
	// vbroadcastss    zmm28, [rcx + 4 * 11]
	// vbroadcastss    zmm29, [rcx + 4 * 12]
	// vbroadcastss    zmm30, [rcx + 4 * 13]
	// vbroadcastss    zmm31, [rcx + 4 * 14]

	vbroadcastss zmm16, xmm31
	valignd zmm17, zmm31, zmm31, 1
	vbroadcastss zmm17, xmm17
	valignd zmm18, zmm31, zmm31, 2
	vbroadcastss zmm18, xmm18
	valignd zmm19, zmm31, zmm31, 3
	vbroadcastss zmm19, xmm19
	valignd zmm20, zmm31, zmm31, 4
	vbroadcastss zmm20, xmm20
	valignd zmm21, zmm31, zmm31, 5
	vbroadcastss zmm21, xmm21
	valignd zmm22, zmm31, zmm31, 6
	vbroadcastss zmm22, xmm22
	valignd zmm23, zmm31, zmm31, 7
	vbroadcastss zmm23, xmm23
	valignd zmm24, zmm31, zmm31, 8
	vbroadcastss zmm24, xmm24
	valignd zmm25, zmm31, zmm31, 9
	vbroadcastss zmm25, xmm25
	valignd zmm26, zmm31, zmm31, 10
	vbroadcastss zmm26, xmm26
	valignd zmm27, zmm31, zmm31, 11
	vbroadcastss zmm27, xmm27
	valignd zmm28, zmm31, zmm31, 12
	vbroadcastss zmm28, xmm28
	valignd zmm29, zmm31, zmm31, 13
	vbroadcastss zmm29, xmm29
	valignd zmm30, zmm31, zmm31, 14
	vbroadcastss zmm30, xmm30
	valignd zmm31, zmm31, zmm31, 15
	vbroadcastss zmm31, xmm31

	vfmadd231ps     zmm0, zmm16, [rax + 0]
    vfmadd231ps     zmm1, zmm17, [rax + 64]
    vfmadd231ps     zmm2, zmm18, [rax + 128]
    vfmadd231ps     zmm3, zmm19, [rax + 192]
	vfmadd231ps     zmm4, zmm20, [rax + 256]
    vfmadd231ps     zmm5, zmm21, [rax + 320]
    vfmadd231ps     zmm6, zmm22, [rax + 384]
    vfmadd231ps     zmm7, zmm23, [rax + 448]
	vfmadd231ps     zmm8, zmm24, [rax + 512]
    vfmadd231ps     zmm9, zmm25, [rax + 576]
    vfmadd231ps     zmm10, zmm26, [rax + 640]
    vfmadd231ps     zmm11, zmm27, [rax + 704]
	vfmadd231ps     zmm12, zmm28, [rax + 768]
    vfmadd231ps     zmm13, zmm29, [rax + 832]
    vfmadd231ps     zmm14, zmm30, [rax + 896]
    vfmadd231ps     zmm15, zmm31, [rax + 960]

    add rcx, 64
	add rax, 1024


================================================
FILE: linalg/x86_64/avx512/1x1/packed_packed_loop1/unroll-4.tmpli
================================================
	// slow
	vbroadcastss xmm16, dword ptr [rcx]
	vbroadcastss xmm17, dword ptr [rcx + 4]
	vbroadcastss xmm18, dword ptr [rcx + 8]
	vbroadcastss xmm19, dword ptr [rcx + 12]

	// fast
	vmovups	   		xmm31, [rcx]
	vbroadcastss 	zmm16, xmm31
	valignd 		xmm17, xmm31, xmm31, 1
	vbroadcastss 	zmm17, xmm17
	valignd 		xmm18, xmm31, xmm31, 2
	vbroadcastss 	zmm18, xmm18
	valignd 		xmm19, xmm31, xmm31, 3
	vbroadcastss 	zmm19, xmm19

	// commmon
	vfmadd231ps		zmm0, zmm16, [rax + 0]
	vfmadd231ps		zmm1, zmm17, [rax + 64]
	vfmadd231ps		zmm2, zmm18, [rax + 128]
	vfmadd231ps		zmm3, zmm19, [rax + 192]

	add rcx, 16
	add rax, 256


================================================
FILE: linalg/x86_64/avx512/1x1/packed_packed_loop1/unroll-8.tmpli
================================================
	vmovups    ymm31, [rcx]

	vbroadcastss zmm16, xmm31
	valignd ymm17, ymm31, ymm31, 1
	vbroadcastss zmm17, xmm17
	valignd ymm18, ymm31, ymm31, 2
	vbroadcastss zmm18, xmm18
	valignd ymm19, ymm31, ymm31, 3
	vbroadcastss zmm19, xmm19
	valignd ymm20, ymm31, ymm31, 4
	vbroadcastss zmm20, xmm20
	valignd ymm21, ymm31, ymm31, 5
	vbroadcastss zmm21, xmm21
	valignd ymm22, ymm31, ymm31, 6
	vbroadcastss zmm22, xmm22
	valignd ymm23, ymm31, ymm31, 7
	vbroadcastss zmm23, xmm23

	vfmadd231ps     zmm0, zmm16, [rax + 0]
    vfmadd231ps     zmm1, zmm17, [rax + 64]
    vfmadd231ps     zmm2, zmm18, [rax + 128]
    vfmadd231ps     zmm3, zmm19, [rax + 192]
	vfmadd231ps     zmm4, zmm20, [rax + 256]
    vfmadd231ps     zmm5, zmm21, [rax + 320]
    vfmadd231ps     zmm6, zmm22, [rax + 384]
    vfmadd231ps     zmm7, zmm23, [rax + 448]

    add rcx, 32
	add rax, 512


================================================
FILE: linalg/x86_64/avx512/1x1/packed_packed_loop1/unroll.tmpli
================================================
	vbroadcastss    zmm15,  dword ptr [rcx]

    vmovaps     zmm8, [rax + 0]
    vfmadd231ps     zmm0, zmm15, zmm8

	vbroadcastss    zmm16,  dword ptr [rcx + 4]
    vmovaps     zmm9, [rax + 64]
    vfmadd231ps     zmm1, zmm16, zmm9

    add rcx, 8
	add rax, 128


================================================
FILE: linalg/x86_64/avx512/1x12/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 1x12
	// Accumulators: 0-11
	// Col regs: zmm14
	// Row regs: zmm15

    vmovaps         zmm15,  [rax]

    vbroadcastss    zmm14, dword ptr [rcx + 0 * 4]
    vfmadd231ps     zmm0, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 1 * 4]
    vfmadd231ps     zmm1, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 2 * 4]
    vfmadd231ps     zmm2, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 3 * 4]
    vfmadd231ps     zmm3, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 4 * 4]
    vfmadd231ps     zmm4, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 5 * 4]
    vfmadd231ps     zmm5, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 6 * 4]
    vfmadd231ps     zmm6, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 7 * 4]
    vfmadd231ps     zmm7, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 8 * 4]
    vfmadd231ps     zmm8, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 9 * 4]
    vfmadd231ps     zmm9, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 10 * 4]
    vfmadd231ps     zmm10, zmm15, zmm14

    vbroadcastss    zmm14, dword ptr [rcx + 11 * 4]
    vfmadd231ps     zmm11, zmm15, zmm14

	add rcx, 48
	add rax, 64

================================================
FILE: linalg/x86_64/avx512/2x5/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Accumulators: 0-9
	// Columns: 15-16
	// Rows: 10-14
    vbroadcastss    zmm10,  dword ptr [rcx]
    vbroadcastss    zmm11,  dword ptr [rcx + 4]
    vbroadcastss    zmm12,  dword ptr [rcx + 8]
    vbroadcastss    zmm13,  dword ptr [rcx + 12]
	vbroadcastss    zmm14,  dword ptr [rcx + 16]

    vmovaps         zmm15,  [rax]
    vmovaps         zmm16,  [rax + 64]

    vfmadd231ps     zmm0,   zmm15, zmm10
    vfmadd231ps     zmm1,   zmm16, zmm10

    vfmadd231ps     zmm2,   zmm15, zmm11
    vfmadd231ps     zmm3,   zmm16, zmm11

    vfmadd231ps     zmm4,   zmm15, zmm12
    vfmadd231ps     zmm5,   zmm16, zmm12

    vfmadd231ps     zmm6,   zmm15, zmm13
    vfmadd231ps     zmm7,   zmm16, zmm13

    vfmadd231ps     zmm8,   zmm15, zmm14
    vfmadd231ps     zmm9,   zmm16, zmm14

    vbroadcastss    zmm10,  dword ptr [rcx + 20]
    vbroadcastss    zmm11,  dword ptr [rcx + 24]
    vbroadcastss    zmm12,  dword ptr [rcx + 28]
    vbroadcastss    zmm13,  dword ptr [rcx + 32]
    vbroadcastss    zmm14,  dword ptr [rcx + 36]

    vmovaps         zmm15,  [rax + 128]
    vmovaps         zmm16,  [rax + 192]

    vfmadd231ps     zmm0,   zmm15, zmm10
    vfmadd231ps     zmm1,   zmm16, zmm10

    vfmadd231ps     zmm2,   zmm15, zmm11
    vfmadd231ps     zmm3,   zmm16, zmm11

    vfmadd231ps     zmm4,   zmm15, zmm12
    vfmadd231ps     zmm5,   zmm16, zmm12

    vfmadd231ps     zmm6,   zmm15, zmm13
    vfmadd231ps     zmm7,   zmm16, zmm13

    vfmadd231ps     zmm8,   zmm15, zmm14
    vfmadd231ps     zmm9,   zmm16, zmm14

	add rcx, 40
	add rax, 256


================================================
FILE: linalg/x86_64/avx512/2x5/packed_packed_loop1/avx-512.tmpli
================================================
	// Accumulators: 0-9
	// Columns: 15
	// Rows: 10-14

    vbroadcastss    zmm10,  dword ptr [rcx]
    vbroadcastss    zmm11,  dword ptr [rcx + 4]
    vbroadcastss    zmm12,  dword ptr [rcx + 8]
    vbroadcastss    zmm13,  dword ptr [rcx + 12]
	vbroadcastss    zmm14,  dword ptr [rcx + 16]

    vmovaps         zmm15,  [rax]
    vmovaps         zmm16,  [rax + 64]

    vfmadd231ps     zmm0,   zmm15, zmm10
    vfmadd231ps     zmm1,   zmm16, zmm10

    vfmadd231ps     zmm2,   zmm15, zmm11
    vfmadd231ps     zmm3,   zmm16, zmm11

    vfmadd231ps     zmm4,   zmm15, zmm12
    vfmadd231ps     zmm5,   zmm16, zmm12

    vfmadd231ps     zmm6,   zmm15, zmm13
    vfmadd231ps     zmm7,   zmm16, zmm13

    vfmadd231ps     zmm8,   zmm15, zmm14
    vfmadd231ps     zmm9,   zmm16, zmm14

	add rcx, 20
	add rax, 128


================================================
FILE: linalg/x86_64/avx512/2x6/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 2x6
	// Accumulators: 0-11
	// Col regs: zmm14-15
	// Row regs: zmm12-13

	vbroadcastss	zmm14,	dword ptr [rcx]
	vmovaps			zmm12,	[rax]
	vmovaps			zmm13,	[rax + 64]
	vbroadcastss	zmm15,	dword ptr [rcx + 4]

	vfmadd231ps		zmm0,	zmm12, zmm14
	vfmadd231ps		zmm1,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx + 8]

	vfmadd231ps		zmm2,	zmm12, zmm15
	vfmadd231ps		zmm3,	zmm13, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 12]

	vfmadd231ps		zmm4,	zmm12, zmm14
	vfmadd231ps		zmm5,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx + 16]

	vfmadd231ps		zmm6,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm13, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 20]

	vfmadd231ps		zmm8,	zmm12, zmm14
	vfmadd231ps		zmm9,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx+24]

	vfmadd231ps		zmm10,	 zmm12, zmm15
	vfmadd231ps		zmm11,	 zmm13, zmm15

	// Iteration two
	vmovaps			zmm12,	[rax + 128]
	vmovaps			zmm13,	[rax + 192]
	vbroadcastss	zmm15,	dword ptr [rcx + 24 + 4]

	vfmadd231ps		zmm0,	zmm12, zmm14
	vfmadd231ps		zmm1,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx + 24 + 8]

	vfmadd231ps		zmm2,	zmm12, zmm15
	vfmadd231ps		zmm3,	zmm13, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 24 + 12]

	vfmadd231ps		zmm4,	zmm12, zmm14
	vfmadd231ps		zmm5,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx + 24 + 16]

	vfmadd231ps		zmm6,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm13, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 24 + 20]

	vfmadd231ps		zmm8,	zmm12, zmm14
	vfmadd231ps		zmm9,	zmm13, zmm14

	vfmadd231ps		zmm10,	 zmm12, zmm15
	vfmadd231ps		zmm11,	 zmm13, zmm15

	add rax, 256
	add rcx, 48


================================================
FILE: linalg/x86_64/avx512/2x6/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 2x6
	// Accumulators: 0-11
	// Col regs: zmm14-15
	// Row regs: zmm12-13

	// Load ordered by earliest use for first 2x2 block
	vbroadcastss	zmm14,	dword ptr [rcx]
	vmovaps			zmm12,	[rax]
	vmovaps			zmm13,	[rax + 64]
	vbroadcastss	zmm15,	dword ptr [rcx + 4]

	vfmadd231ps		zmm0,	zmm12, zmm14
	vfmadd231ps		zmm1,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx + 8]

	vfmadd231ps		zmm2,	zmm12, zmm15
	vfmadd231ps		zmm3,	zmm13, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 12]

	vfmadd231ps		zmm4,	zmm12, zmm14
	vfmadd231ps		zmm5,	zmm13, zmm14

	vbroadcastss	zmm14,	dword ptr [rcx + 16]

	vfmadd231ps		zmm6,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm13, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 20]

	vfmadd231ps		zmm8,	zmm12, zmm14
	vfmadd231ps		zmm9,	zmm13, zmm14

	vfmadd231ps		zmm10,	 zmm12, zmm15
	vfmadd231ps		zmm11,	 zmm13, zmm15

	add rax, 128
	add rcx, 24


================================================
FILE: linalg/x86_64/avx512/3x4/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 3x4
	// Accumulators: 0-11
	// Col regs: zmm12-14
	// Row regs: zmm15

	vmovaps			zmm12,	[rax]
	vmovaps			zmm13,	[rax+64]
	vmovaps			zmm14,	[rax+128]

	vbroadcastss	zmm15,	dword ptr [rcx + 0]

	vfmadd231ps		zmm0,	zmm12, zmm15
	vfmadd231ps		zmm1,	zmm13, zmm15
	vfmadd231ps		zmm2,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 4]

	vfmadd231ps		zmm3,	zmm12, zmm15
	vfmadd231ps		zmm4,	zmm13, zmm15
	vfmadd231ps		zmm5,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 8]

	vfmadd231ps		zmm6,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm13, zmm15
	vfmadd231ps		zmm8,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 12]

	vfmadd231ps		zmm9,	zmm12, zmm15
	vfmadd231ps		zmm10,	 zmm13, zmm15
	vfmadd231ps		zmm11,	 zmm14, zmm15

	vmovaps			zmm12,	[rax + 192]
	vmovaps			zmm13,	[rax + 256]
	vmovaps			zmm14,	[rax + 320]

	vbroadcastss	zmm15,	dword ptr [rcx + 16]

	vfmadd231ps		zmm0,	zmm12, zmm15
	vfmadd231ps		zmm1,	zmm13, zmm15
	vfmadd231ps		zmm2,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 20]

	vfmadd231ps		zmm3,	zmm12, zmm15
	vfmadd231ps		zmm4,	zmm13, zmm15
	vfmadd231ps		zmm5,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 24]

	vfmadd231ps		zmm6,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm13, zmm15
	vfmadd231ps		zmm8,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 28]

	vfmadd231ps		zmm9,	zmm12, zmm15
	vfmadd231ps		zmm10,	 zmm13, zmm15
	vfmadd231ps		zmm11,	 zmm14, zmm15

	add rax, 384
	add rcx, 32


================================================
FILE: linalg/x86_64/avx512/3x4/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 3x4
	// Accumulators: 0-11
	// Col regs: zmm12-14
	// Row regs: zmm15

	vmovaps			zmm12,	[rax]
	vmovaps			zmm13,	[rax+64]
	vmovaps			zmm14,	[rax+128]

	vbroadcastss	zmm15,	dword ptr [rcx + 0]

	vfmadd231ps		zmm0,	zmm12, zmm15
	vfmadd231ps		zmm1,	zmm13, zmm15
	vfmadd231ps		zmm2,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 4]

	vfmadd231ps		zmm3,	zmm12, zmm15
	vfmadd231ps		zmm4,	zmm13, zmm15
	vfmadd231ps		zmm5,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 8]

	vfmadd231ps		zmm6,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm13, zmm15
	vfmadd231ps		zmm8,	zmm14, zmm15

	vbroadcastss	zmm15,	dword ptr [rcx + 12]

	vfmadd231ps		zmm9,	zmm12, zmm15
	vfmadd231ps		zmm10,	 zmm13, zmm15
	vfmadd231ps		zmm11,	 zmm14, zmm15

	add rax, 192
	add rcx, 16


================================================
FILE: linalg/x86_64/avx512/4x3/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 4x3
	// Accumulators: 0-11
	// Col regs: zmm12
	// Row regs: zmm13-15

	// Load col of A
	vmovaps			zmm12,	[rax]

	// Fill 3 cols of B
	vbroadcastss	zmm13,	dword ptr [rcx + 0]
	vbroadcastss	zmm14,	dword ptr [rcx + 4]
	vbroadcastss	zmm15,	dword ptr [rcx + 8]

	// N.B. Stepping cols in inner loop
	vfmadd231ps		zmm0,	zmm12, zmm13
	vfmadd231ps		zmm4,	zmm12, zmm14
	vfmadd231ps		zmm8,	zmm12, zmm15

	vmovaps			zmm12,	[rax+64]

	vfmadd231ps		zmm1,	zmm12, zmm13
	vfmadd231ps		zmm5,	zmm12, zmm14
	vfmadd231ps		zmm9,	zmm12, zmm15

	vmovaps			zmm12,	[rax+128]

	vfmadd231ps		zmm2,	zmm12, zmm13
	vfmadd231ps		zmm6,	zmm12, zmm14
	vfmadd231ps		zmm10,	 zmm12, zmm15

	vmovaps			zmm12,	[rax+192]

	vfmadd231ps		zmm3,	zmm12, zmm13
	vfmadd231ps		zmm7,	zmm12, zmm14
	vfmadd231ps		zmm11,	zmm12, zmm15

	// Load col of A, switching col!
	vmovaps			zmm13,	[rax + 256]

	// Fill 3 cols of B
	vbroadcastss	zmm14,	dword ptr [rcx + 12]
	vbroadcastss	zmm15,	dword ptr [rcx + 16]
	vbroadcastss	zmm12,	dword ptr [rcx + 20]

	// N.B. Stepping cols in inner loop
	vfmadd231ps		zmm0,	zmm13, zmm14
	vfmadd231ps		zmm4,	zmm13, zmm15
	vfmadd231ps		zmm8,	zmm13, zmm12

	vmovaps			zmm13,	[rax + 320]

	vfmadd231ps		zmm1,	zmm13, zmm14
	vfmadd231ps		zmm5,	zmm13, zmm15
	vfmadd231ps		zmm9,	zmm13, zmm12

	vmovaps			zmm13,	[rax + 384]

	vfmadd231ps		zmm2,	zmm13, zmm14
	vfmadd231ps		zmm6,	zmm13, zmm15
	vfmadd231ps		zmm10,	 zmm13, zmm12

	vmovaps			zmm13,	[rax + 448]

	vfmadd231ps		zmm3,	zmm13, zmm14
	vfmadd231ps		zmm7,	zmm13, zmm15
	vfmadd231ps		zmm11,	zmm13, zmm12

    add             rcx,    24
    add             rax,    512


================================================
FILE: linalg/x86_64/avx512/4x3/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 4x3
	// Accumulators: 0-11
	// Col regs: zmm12
	// Row regs: zmm13-15

	// Load col of A
	vmovaps			zmm12,	[rax]

	// Fill 3 cols of B
	vbroadcastss	zmm13,	dword ptr [rcx + 0]
	vbroadcastss	zmm14,	dword ptr [rcx + 4]
	vbroadcastss	zmm15,	dword ptr [rcx + 8]

	// N.B. Stepping cols in inner loop
	vfmadd231ps		zmm0,	zmm12, zmm13
	vfmadd231ps		zmm4,	zmm12, zmm14
	vfmadd231ps		zmm8,	zmm12, zmm15

	vmovaps			zmm12,	[rax+64]

	vfmadd231ps		zmm1,	zmm12, zmm13
	vfmadd231ps		zmm5,	zmm12, zmm14
	vfmadd231ps		zmm9,	zmm12, zmm15

	vmovaps			zmm12,	[rax+128]

	vfmadd231ps		zmm2,	zmm12, zmm13
	vfmadd231ps		zmm6,	zmm12, zmm14
	vfmadd231ps		zmm10,	 zmm12, zmm15

	vmovaps			zmm12,	[rax+192]

	vfmadd231ps		zmm3,	zmm12, zmm13
	vfmadd231ps		zmm7,	zmm12, zmm14
	vfmadd231ps		zmm11,	zmm12, zmm15

    add             rcx,    12
    add             rax,    256


================================================
FILE: linalg/x86_64/avx512/5x2/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 5x2
	// Accumulators: 0-9
	// Col regs: zmm10-13
	// Row regs: zmm14-15

	vmovaps			zmm10,	[rax]
	vbroadcastss	zmm14,	dword ptr [rcx + 0]
	vbroadcastss	zmm15,	dword ptr [rcx + 4]
	vmovaps			zmm11,	[rax + 64]

	// NB stepping column-wise
	vfmadd231ps		zmm0,	zmm10, zmm14
	vfmadd231ps		zmm5,	zmm10, zmm15

	vmovaps			zmm12,	[rax + 128]

	vfmadd231ps		zmm1,	zmm11, zmm14
	vfmadd231ps		zmm6,	zmm11, zmm15

	vmovaps			zmm13,	[rax + 192]

	vfmadd231ps		zmm2,	zmm12, zmm14
	vfmadd231ps		zmm7,	zmm12, zmm15

	vmovaps			zmm10,	[rax + 256]

	vfmadd231ps		zmm3,	zmm13, zmm14
	vfmadd231ps		zmm8,	zmm13, zmm15

	vmovaps			zmm11,	[rax + 320]

	vfmadd231ps		zmm4,	zmm10, zmm14
	vfmadd231ps		zmm9,	zmm10, zmm15

	vbroadcastss	zmm14,	dword ptr [rcx + 8]
	vbroadcastss	zmm15,	dword ptr [rcx + 12]

	vmovaps			zmm12,	[rax + 384]

	// NB stepping column-wise
	vfmadd231ps		zmm0,	zmm11, zmm14
	vfmadd231ps		zmm5,	zmm11, zmm15

	vmovaps			zmm13,	[rax + 448]

	vfmadd231ps		zmm1,	zmm12, zmm14
	vfmadd231ps		zmm6,	zmm12, zmm15

	vmovaps			zmm10,	[rax + 512]

	vfmadd231ps		zmm2,	zmm13, zmm14
	vfmadd231ps		zmm7,	zmm13, zmm15

	vmovaps			zmm11,	[rax + 576]

	vfmadd231ps		zmm3,	zmm10, zmm14
	vfmadd231ps		zmm8,	zmm10, zmm15

	vfmadd231ps		zmm4,	zmm11, zmm14
	vfmadd231ps		zmm9,	zmm11, zmm15

	add rax, 640
	add rcx, 16


================================================
FILE: linalg/x86_64/avx512/5x2/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 5x2
	// Accumulators: 0-9
	// Col regs: zmm10-14
	// Row regs: zmm15-16

	vmovaps			zmm10,	[rax]
	vbroadcastss	zmm15,	dword ptr [rcx + 0]
	vbroadcastss	zmm16,	dword ptr [rcx + 4]
	vmovaps			zmm11,	[rax + 64]

	// NB stepping column-wise
	vfmadd231ps		zmm0,	zmm10, zmm15
	vfmadd231ps		zmm5,	zmm10, zmm16

	vmovaps			zmm12,	[rax + 128]

	vfmadd231ps		zmm1,	zmm11, zmm15
	vfmadd231ps		zmm6,	zmm11, zmm16

	vmovaps			zmm13,	[rax + 192]

	vfmadd231ps		zmm2,	zmm12, zmm15
	vfmadd231ps		zmm7,	zmm12, zmm16

	vmovaps			zmm14,	[rax + 256]

	vfmadd231ps		zmm3,	zmm13, zmm15
	vfmadd231ps		zmm8,	zmm13, zmm16

	vfmadd231ps		zmm4,	zmm14, zmm15
	vfmadd231ps		zmm9,	zmm14, zmm16

	add rax, 320
	add rcx, 8


================================================
FILE: linalg/x86_64/avx512/6x1/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15


    vbroadcastss    zmm15,  dword ptr [rcx]
    vfmadd231ps     zmm0, zmm15, [rax]
    vfmadd231ps     zmm1, zmm15, [rax + 64]
    vfmadd231ps     zmm2, zmm15, [rax + 128]
    vfmadd231ps     zmm3, zmm15, [rax + 192]
    vfmadd231ps     zmm4, zmm15, [rax + 256]
    vfmadd231ps     zmm5, zmm15, [rax + 320]

    vbroadcastss    zmm14,  dword ptr [rcx + 4]

    vfmadd231ps     zmm0, zmm14, [rax + 384]
    vfmadd231ps     zmm1, zmm14, [rax + 448]
    vfmadd231ps     zmm2, zmm14, [rax + 512]
    vfmadd231ps     zmm3, zmm14, [rax + 576]
    vfmadd231ps     zmm4, zmm14, [rax + 640]
    vfmadd231ps     zmm5, zmm14, [rax + 704]

	add rax, 768
    add rcx, 8


================================================
FILE: linalg/x86_64/avx512/6x1/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15

    vbroadcastss    zmm15,  dword ptr [rcx]

	vmovups     zmm10, [rax]
	vmulps     zmm10, zmm10, zmm15
	vaddps     zmm0, zmm0, zmm10
    vmovups     zmm11, [rax + 64]
	vmulps     zmm11, zmm11, zmm15
	vaddps     zmm1, zmm1, zmm11
    vmovups     zmm12, [rax + 128]
	vmulps     zmm12, zmm12, zmm15
	vaddps     zmm2, zmm2, zmm12
    vmovups     zmm13, [rax + 192]
	vmulps     zmm13, zmm13, zmm15
	vaddps     zmm3, zmm3, zmm13
    vmovups     zmm14, [rax + 256]
	vmulps     zmm14, zmm14, zmm15
	vaddps     zmm4, zmm4, zmm14
    vmovups     zmm15, [rax + 320]
	vmulps     zmm15, zmm15, zmm15
	vaddps     zmm5, zmm5, zmm15


    add rcx, 4
	add rax, 384


================================================
FILE: linalg/x86_64/avx512/6x2/packed_packed_loop1/avx-512-unroll.tmpli
================================================
    // Tile size: 6x2
	// Accumulators: 0-9
	// Col regs: zmm10-13
	// Row regs: zmm14-15

	vmovaps         zmm12,  [rax]
	vbroadcastss    zmm14,  dword ptr [rcx + 0]
    vbroadcastss    zmm15,  dword ptr [rcx + 4]
	vmovaps         zmm13,  [rax + 64]

    vfmadd231ps     zmm0,   zmm12, zmm14
    vfmadd231ps     zmm6,   zmm12, zmm15

	vmovaps         zmm12,  [rax + 128]

    vfmadd231ps     zmm1,   zmm13, zmm14
    vfmadd231ps     zmm7,   zmm13, zmm15

	vmovaps         zmm13,  [rax + 192]

    vfmadd231ps     zmm2,   zmm12, zmm14
    vfmadd231ps     zmm8,   zmm12, zmm15

	vmovaps         zmm12,  [rax + 256]

	vfmadd231ps     zmm3,   zmm13, zmm14
    vfmadd231ps     zmm9,   zmm13, zmm15

	vmovaps         zmm13,  [rax + 320]

	vfmadd231ps     zmm4,   zmm12, zmm14
    vfmadd231ps     zmm10,  zmm12, zmm15

	vmovaps         zmm12,  [rax + 384]
	vbroadcastss    zmm14,  dword ptr [rcx + 8]

	vfmadd231ps     zmm5,   zmm13, zmm14
    vfmadd231ps     zmm11, 	zmm13, zmm15

    vbroadcastss    zmm15,  dword ptr [rcx + 12]
	vmovaps         zmm13,  [rax + 448]

    vfmadd231ps     zmm0,   zmm12, zmm14
    vfmadd231ps     zmm6,   zmm12, zmm15

	vmovaps         zmm12,  [rax + 512]

    vfmadd231ps     zmm1,   zmm13, zmm14
    vfmadd231ps     zmm7,   zmm13, zmm15

	vmovaps         zmm13,  [rax + 576]

    vfmadd231ps     zmm2,   zmm12, zmm14
    vfmadd231ps     zmm8,   zmm12, zmm15

	vmovaps         zmm12,  [rax + 640]

	vfmadd231ps     zmm3,   zmm13, zmm14
    vfmadd231ps     zmm9,   zmm13, zmm15

	vmovaps         zmm13,  [rax + 704]

	vfmadd231ps     zmm4,   zmm12, zmm14
    vfmadd231ps     zmm10,  zmm12, zmm15

	vfmadd231ps     zmm5,   zmm13, zmm14
    vfmadd231ps     zmm11, 	zmm13, zmm15

	add rax, 768
	add rcx, 16


================================================
FILE: linalg/x86_64/avx512/6x2/packed_packed_loop1/avx-512.tmpli
================================================
    // Tile size: 6x2
	// Accumulators: 0-11
	// Col regs: 12-13
	// Row regs: 14-15

	vmovaps         zmm12,  [rax]
	vbroadcastss    zmm14,  dword ptr [rcx + 0]
    vbroadcastss    zmm15,  dword ptr [rcx + 4]
	vmovaps         zmm13,  [rax + 64]

    vfmadd231ps     zmm0,   zmm12, zmm14
    vfmadd231ps     zmm6,   zmm12, zmm15

	vmovaps         zmm12,  [rax + 128]

    vfmadd231ps     zmm1,   zmm13, zmm14
    vfmadd231ps     zmm7,   zmm13, zmm15

	vmovaps         zmm13,  [rax + 192]

    vfmadd231ps     zmm2,   zmm12, zmm14
    vfmadd231ps     zmm8,   zmm12, zmm15

	vmovaps         zmm12,  [rax + 256]

	vfmadd231ps     zmm3,   zmm13, zmm14
    vfmadd231ps     zmm9,   zmm13, zmm15

	vmovaps         zmm13,  [rax + 320]

	vfmadd231ps     zmm4,   zmm12, zmm14
    vfmadd231ps     zmm10,  zmm12, zmm15

	vfmadd231ps     zmm5,   zmm13, zmm14
    vfmadd231ps     zmm11, 	zmm13, zmm15

	add rcx, 8
	add rax, 384


================================================
FILE: linalg/x86_64/avx512/7x1/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15

	vbroadcastss    zmm15,  dword ptr [rcx]

    vmovaps         zmm7,  [rax + 0]
	vmovaps         zmm8,  [rax + 64]
	vmovaps         zmm9,  [rax + 128]
	vmovaps         zmm10, [rax + 192]
    vmovaps         zmm11, [rax + 256]
	vmovaps         zmm12, [rax + 320]
	vmovaps         zmm13, [rax + 384]

    vfmadd231ps     zmm0, zmm7, zmm15
    vfmadd231ps     zmm1, zmm8, zmm15
    vfmadd231ps     zmm2, zmm9, zmm15
    vfmadd231ps     zmm3, zmm10, zmm15
    vfmadd231ps     zmm4, zmm11, zmm15
    vfmadd231ps     zmm5, zmm12, zmm15
	vfmadd231ps     zmm6, zmm13, zmm15

	vbroadcastss    zmm16,  dword ptr [rcx + 4]

    vmovaps         zmm7,  [rax + 448 + 0]
	vmovaps         zmm8,  [rax + 448 + 64]
	vmovaps         zmm9,  [rax + 448 + 128]
	vmovaps         zmm10, [rax + 448 + 192]
    vmovaps         zmm11, [rax + 448 + 256]
	vmovaps         zmm12, [rax + 448 + 320]
	vmovaps         zmm13, [rax + 448 + 384]

    vfmadd231ps     zmm0, zmm7, zmm15
    vfmadd231ps     zmm1, zmm8, zmm15
    vfmadd231ps     zmm2, zmm9, zmm15
    vfmadd231ps     zmm3, zmm10, zmm15
    vfmadd231ps     zmm4, zmm11, zmm15
    vfmadd231ps     zmm5, zmm12, zmm15
	vfmadd231ps     zmm6, zmm13, zmm15


================================================
FILE: linalg/x86_64/avx512/7x1/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 7x1
	// Accumulators: 0-6
	// Col regs: 6-13
	// Row regs: 15
    vbroadcastss    zmm15,  dword ptr [rcx]

    vmovaps         zmm7,  [rax + 0]
	vmovaps         zmm8,  [rax + 64]
	vmovaps         zmm9,  [rax + 128]
	vmovaps         zmm10, [rax + 192]
    vmovaps         zmm11, [rax + 256]
	vmovaps         zmm12, [rax + 320]
	vmovaps         zmm13, [rax + 384]

    vfmadd231ps     zmm0, zmm7, zmm15
    vfmadd231ps     zmm1, zmm8, zmm15
    vfmadd231ps     zmm2, zmm9, zmm15
    vfmadd231ps     zmm3, zmm10, zmm15
    vfmadd231ps     zmm4, zmm11, zmm15
    vfmadd231ps     zmm5, zmm12, zmm15
	vfmadd231ps     zmm6, zmm13, zmm15


================================================
FILE: linalg/x86_64/avx512/8x1/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 8x1
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15

	vbroadcastss    zmm17,  dword ptr [rcx]


    vfmadd231ps     zmm0, zmm17, [rax + 0]
    vfmadd231ps     zmm1, zmm17, [rax + 64]
    vfmadd231ps     zmm2, zmm17, [rax + 128]
    vfmadd231ps     zmm3, zmm17, [rax + 192]
    vfmadd231ps     zmm4, zmm17, [rax + 256]
    vfmadd231ps     zmm5, zmm17, [rax + 320]
    vfmadd231ps     zmm6, zmm17, [rax + 384]
    vfmadd231ps     zmm7, zmm17, [rax + 448]

	vbroadcastss    zmm16,  dword ptr [rcx + 4]

	vfmadd231ps     zmm0, zmm16, [rax + 0 + 512]
    vfmadd231ps     zmm1, zmm16, [rax + 64 + 512]
    vfmadd231ps     zmm2, zmm16, [rax + 128 + 512]
    vfmadd231ps     zmm3, zmm16, [rax + 192 + 512]
    vfmadd231ps     zmm4, zmm16, [rax + 256 + 512]
    vfmadd231ps     zmm5, zmm16, [rax + 320 + 512]
    vfmadd231ps     zmm6, zmm16, [rax + 384 + 512]
    vfmadd231ps     zmm7, zmm16, [rax + 448 + 512]

    add rcx, 8
	add rax, 1024


================================================
FILE: linalg/x86_64/avx512/8x1/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 8x1
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15

	vbroadcastss    zmm15,  dword ptr [rcx]

    vmovaps     zmm8, [rax + 0]
    vfmadd231ps     zmm0, zmm15, zmm8
    vmovaps     zmm9, [rax + 64]
    vfmadd231ps     zmm1, zmm15, zmm9
    vmovaps     zmm10, [rax + 128]
    vfmadd231ps     zmm2, zmm15, zmm10
    vmovaps     zmm11, [rax + 192]
	vfmadd231ps     zmm3, zmm15, zmm11
    vmovaps     zmm12, [rax + 256]
	vfmadd231ps     zmm4, zmm15, zmm12
    vmovaps     zmm13, [rax + 320]
	vfmadd231ps     zmm5, zmm15, zmm13
    vmovaps     zmm14, [rax + 384]
	vfmadd231ps     zmm6, zmm15, zmm14
    vmovaps     zmm8, [rax + 448]
	vfmadd231ps     zmm7, zmm15, zmm8
    add rcx, 4
	add rax, 512


================================================
FILE: linalg/x86_64/avx512/8x2/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 8x2
	// Accumulators: 0-15
	// Col regs: 16-23
	// Row regs: 24-25

    vmovaps         zmm16,  [rax + 0]
	vbroadcastss	zmm24,	dword ptr [rcx + 0]
	vbroadcastss	zmm25,	dword ptr [rcx + 4]

    vfmadd231ps     zmm0, zmm16, zmm24
    vfmadd231ps     zmm8, zmm16, zmm25

    vmovaps         zmm17,  [rax + 64]
    vfmadd231ps     zmm1, zmm17, zmm24
    vfmadd231ps     zmm9, zmm17, zmm25

    vmovaps         zmm18,  [rax + 128]
    vfmadd231ps     zmm2, zmm18, zmm24
    vfmadd231ps     zmm10, zmm18, zmm25

    vmovaps         zmm19,  [rax + 192]
    vfmadd231ps     zmm3, zmm19, zmm24
    vfmadd231ps     zmm11, zmm19, zmm25

    vmovaps         zmm20,  [rax + 256]
    vfmadd231ps     zmm4, zmm20, zmm24
    vfmadd231ps     zmm12, zmm20, zmm25

    vmovaps         zmm21,  [rax + 320]
    vfmadd231ps     zmm5, zmm21, zmm24
    vfmadd231ps     zmm13, zmm21, zmm25

    vmovaps         zmm22,  [rax + 384]
    vfmadd231ps     zmm6, zmm22, zmm24
    vfmadd231ps     zmm14, zmm22, zmm25

    vmovaps         zmm23,  [rax + 448]
    vfmadd231ps     zmm7, zmm23, zmm24
    vfmadd231ps     zmm15, zmm23, zmm25

	add rax, 512
	add rcx, 8


================================================
FILE: linalg/x86_64/avx512/8x8/packed_packed_loop1/avx-512-unroll.tmpli
================================================
	// Tile size: 1x8
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15


    vmovaps         zmm15,  [rax]

    vbroadcastss    zmm8, dword ptr [rcx + 0 * 4]
    vfmadd231ps     zmm0, zmm15, zmm8

    vbroadcastss    zmm9, dword ptr [rcx + 1 * 4]
    vfmadd231ps     zmm1, zmm15, zmm9

    vbroadcastss    zmm10, dword ptr [rcx + 2 * 4]
    vfmadd231ps     zmm2, zmm15, zmm10

    vbroadcastss    zmm11, dword ptr [rcx + 3 * 4]
    vfmadd231ps     zmm3, zmm15, zmm11

    vbroadcastss    zmm12, dword ptr [rcx + 4 * 4]
    vfmadd231ps     zmm4, zmm15, zmm12

    vbroadcastss    zmm13, dword ptr [rcx + 5 * 4]
    vfmadd231ps     zmm5, zmm15, zmm13

    vbroadcastss    zmm10, dword ptr [rcx + 6 * 4]
    vfmadd231ps     zmm6, zmm15, zmm10

    vbroadcastss    zmm11, dword ptr [rcx + 7 * 4]
    vfmadd231ps     zmm7, zmm15, zmm11


    vmovaps         zmm15,  [rax+64]

    vbroadcastss    zmm8, dword ptr [rcx + 8 * 4]
    vfmadd231ps     zmm0, zmm15, zmm8

    vbroadcastss    zmm9, dword ptr [rcx + 9 * 4]
    vfmadd231ps     zmm1, zmm15, zmm9

    vbroadcastss    zmm10, dword ptr [rcx + 10 * 4]
    vfmadd231ps     zmm2, zmm15, zmm10

    vbroadcastss    zmm11, dword ptr [rcx + 11 * 4]
    vfmadd231ps     zmm3, zmm15, zmm11

    vbroadcastss    zmm12, dword ptr [rcx + 12 * 4]
    vfmadd231ps     zmm4, zmm15, zmm12

    vbroadcastss    zmm13, dword ptr [rcx + 13 * 4]
    vfmadd231ps     zmm5, zmm15, zmm13

    vbroadcastss    zmm10, dword ptr [rcx + 14 * 4]
    vfmadd231ps     zmm6, zmm15, zmm10

    vbroadcastss    zmm11, dword ptr [rcx + 15 * 4]
    vfmadd231ps     zmm7, zmm15, zmm11

	add rcx, 64
	add rax, 128

================================================
FILE: linalg/x86_64/avx512/8x8/packed_packed_loop1/avx-512.tmpli
================================================
	// Tile size: 1x8
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15

    vmovaps         zmm15,  [rax]

    vbroadcastss    zmm8, dword ptr [rcx + 0 * 4]
    vfmadd231ps     zmm0, zmm15, zmm8

    vbroadcastss    zmm9, dword ptr [rcx + 1 * 4]
    vfmadd231ps     zmm1, zmm15, zmm9

    vbroadcastss    zmm10, dword ptr [rcx + 2 * 4]
    vfmadd231ps     zmm2, zmm15, zmm10

    vbroadcastss    zmm11, dword ptr [rcx + 3 * 4]
    vfmadd231ps     zmm3, zmm15, zmm11

    vbroadcastss    zmm12, dword ptr [rcx + 4 * 4]
    vfmadd231ps     zmm4, zmm15, zmm12

    vbroadcastss    zmm13, dword ptr [rcx + 5 * 4]
    vfmadd231ps     zmm5, zmm15, zmm13

    vbroadcastss    zmm10, dword ptr [rcx + 6 * 4]
    vfmadd231ps     zmm6, zmm15, zmm10

    vbroadcastss    zmm11, dword ptr [rcx + 7 * 4]
    vfmadd231ps     zmm7, zmm15, zmm11

	add rcx, 32
	add rax, 64


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_128x1.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 128 x 1

    zmm0
    zmm1
    ...
    zmm7

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of ZMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" size:"128x1", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{align}} 16
{{L}}main_loop_packed_packed:
	{% include "8x1/packed_packed_loop1/avx-512.tmpli" %}

    sub             rbx, 1
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:7 %}
{% include "f32_per_rows.tmpliq" mr:128, from:0, to:7 %}
{% include "f32_per_cols.tmpliq" mr:128, from:0, to:7 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:7 %}

{{L}}add_unicast:
    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

    {% for row in (0..7) %}
        vaddps zmm{{row}}, zmm{{row}}, [ r10 + {{row|times:64}} ]
    {% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    zmm14, dword ptr [rbx]

{% for i in (0..7) %}
    vmovups         zmm12,  [rax + {{i|times:64}}]
    vfmadd231ps     zmm{{i}}, zmm12, zmm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

    cmp     rsi, 4
    jne      {{L}}store_noncontiguous

	test r8, 63
	jnz {{L}}store_unaligned

	{% for row in (0..7) %}
        vmovaps [r8 + {{row|times:64}}], zmm{{row}}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_unaligned:
	{% for row in (0..7) %}
        vmovups [r8 + {{row|times:64}}], zmm{{row}}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_noncontiguous:
    {% for r in (0..7) %}
        {% for quarter in (0..3) %}
            vextractf32x4 xmm8, zmm{{r}}, {{quarter}}
            {% for row in (0..3) %}
                vextractps  dword ptr [r8], xmm8, {{row}}
                add         r8, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}
    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"128x1", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_16x1.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 16 x 1

    zmm0

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of ZMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}


{% include "preamble.tmpliq" size:"16x1", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

	cmp rbx, 8
	jl {{L}}main_loop_packed_packed_tail

{{align}} 16
{{L}}main_loop_packed_packed:
	{% include "1x1/packed_packed_loop1/unroll-4.tmpli" %}

    sub             rbx, 4
	cmp rbx,        4
	jge              {{L}}main_loop_packed_packed

	{% for r in (1..3) %}
	   vaddps zmm0, zmm0, zmm{{r}}
	{% endfor %}

    test    rbx, rbx
    jz      {{L}}non_linear_loop

{{align}} 16
{{L}}main_loop_packed_packed_tail:
	{% include "1x1/packed_packed_loop1/avx-512.tmpli" %}

	sub             rbx, 1
    jnz				{{L}}main_loop_packed_packed_tail

    jmp      {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:0 %}
{% include "f32_per_rows.tmpliq" mr:16, from:0, to:0 %}
{% include "f32_per_cols.tmpliq" mr:16, from:0, to:0 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:0 %}

{{L}}add_unicast:
    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

	cmp rsi, 4
	jne {{L}}add_unicast_generic

	vaddps zmm0, zmm0, [r10]

    jmp    {{L}}non_linear_loop

{{L}}add_unicast_generic:
    mov r8, [0]
//     mov     eax,    0
// {% for i in (0..3) %}
//     pinsrd  xmm14, eax, {{i}}
//     add     eax,    esi
// {% endfor %}
// {% for i in (0..3) %}
//     pinsrd  xmm15, eax, {{i}}
//     add     eax,    esi
// {% endfor %}
//
//     vperm2f128      zmm14,  zmm14, zmm15,         32 // zmm14 <- xmm14::xmm15
//
// {% for i in (0..7) %}
//     vpcmpeqd        zmm15,  zmm15, zmm15
//     vgatherdps      zmm12,  [ r10 + zmm14 ], zmm15
//
//     vaddps          zmm{{i}},   zmm{{i}},   zmm12
//     lea             r10, [ r10 + rsi * 8 ]
// {% endfor %}
//
    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    zmm14, dword ptr [rbx]

{% for i in (0..0) %}
    vmovups         zmm12,  [rax + {{i|times:64}}]
    vfmadd231ps     zmm{{i}}, zmm12, zmm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

    cmp     rsi, 4
    jne      {{L}}store_noncontiguous

	test r8, 63
	jnz {{L}}store_unaligned

    vmovaps [r8], zmm0
    jmp     {{L}}non_linear_loop

{{L}}store_unaligned:
	vmovups [r8], zmm0
    jmp     {{L}}non_linear_loop

{{L}}store_noncontiguous:
    {% for quarter in (0..3) %}
        vextractf32x4 xmm8, zmm0, {{quarter}}
        {% for row in (0..3) %}
            vextractps  dword ptr [r8], xmm8, {{row}}
            add         r8, rsi
        {% endfor %}
    {% endfor %}
    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"16x1", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_16x12.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 16 x 12

    zmm0 zmm1 ... zmm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of ZMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}


{% include "preamble.tmpliq" size:"16x12", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{align}} 16
{{L}}main_loop_packed_packed_tail:
	{% include "1x12/packed_packed_loop1/avx-512.tmpli" %}

	sub             rbx, 1
    jnz				{{L}}main_loop_packed_packed_tail

    jmp      {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:11 %}
{% include "f32_per_rows.tmpliq" mr:16, from:0, to:11 %}
{% include "f32_per_cols.tmpliq" mr:16, from:0, to:11 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..11) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i}},   zmm{{i}},   zmm12
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         zmm12, zmmword ptr [rax]

{% for i in (0..11) %}
    vbroadcastss    zmm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     zmm{{i}},   zmm12, zmm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for quarter in (0..3) %}
        {% for r in (0..3) %}
            vextractf32x4 xmm{{r | plus: 12}}, zmm{{r}}, {{quarter}}
        {% endfor %}
        {% for row in (0..3) %}
            {% for i in (0..3) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    mov     r8,     [rdi + 8]           // c ptr

    // tops of cols
    lea     r8,     [ r8 + 4 * rbx ]
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for quarter in (0..3) %}
        {% for r in (0..3) %}
            vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | plus: 4}}, {{quarter}}
        {% endfor %}
        {% for row in (0..3) %}
            {% for i in (0..3) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

        mov     r8,     [rdi + 8]           // c ptr

    // tops of cols
    lea     r8,     [ r8 + 8 * rbx ]
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for quarter in (0..3) %}
        {% for r in (0..3) %}
            vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | plus: 8}}, {{quarter}}
        {% endfor %}
        {% for row in (0..3) %}
            {% for i in (0..3) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"16x12", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_16x8.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 16 x 8

    zmm0 zmm1 ... zmm8

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of ZMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}


{% include "preamble.tmpliq" size:"16x8", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

	cmp rbx, 2
	jl {{L}}main_loop_packed_packed_tail

{{align}} 16
{{L}}main_loop_packed_packed:
	{% include "8x8/packed_packed_loop1/avx-512-unroll.tmpli" %}

    sub             rbx, 2
	cmp rbx,        2
	jge              {{L}}main_loop_packed_packed

    test    rbx, rbx
    jz      {{L}}non_linear_loop

{{align}} 16
{{L}}main_loop_packed_packed_tail:
	{% include "8x8/packed_packed_loop1/avx-512.tmpli" %}

	sub             rbx, 1
    jnz				{{L}}main_loop_packed_packed_tail

    jmp      {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:7 %}
{% include "f32_per_rows.tmpliq" mr:16, from:0, to:7 %}
{% include "f32_per_cols.tmpliq" mr:16, from:0, to:7 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:7 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..7) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i}},   zmm{{i}},   zmm12
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         zmm12, zmmword ptr [rax]

{% for i in (0..7) %}
    vbroadcastss    zmm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     zmm{{i}},   zmm12, zmm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r12,    [ r8 + 4 * rbx ]
    lea     r11,    [ r10 + rbx ]
    lea     r13,    [ r12 + rbx ]
    lea     r14,    [ r12 + 2 * rbx ]
    lea     r15,    [ r13 + 2 * rbx ]
    
    {% for quarter in (0..3) %}
        {% for r in (0..7) %}
            vextractf32x4 xmm{{r | plus: 8}}, zmm{{r}}, {{quarter}}
        {% endfor %}
        {% for row in (0..3) %}
            {% for i in (0..7) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 8}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"16x8", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_32x5.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 32 x 5:

    zmm0 zmm2 zmm4 zmm6 zmm8
    zmm1 zmm3 zmm5 zmm7 zmm9

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" size:"32x5", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
	{% include "2x5/packed_packed_loop1/avx-512.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:9 %}
{% include "f32_per_rows.tmpliq" mr:32, from:0, to:9 %}
{% include "f32_per_cols.tmpliq" mr:32, from:0, to:9 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:9 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..4) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 2}},   zmm{{i | times: 2}},   zmm12
{% endfor %}

    imul    esi,    16
    vpbroadcastd    zmm15, esi

    mov     r10,    [rdi + 8]
    vpaddd          zmm14, zmm14, zmm15

{% for i in (0..4) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 2 | plus: 1}},   zmm{{i | times: 2 | plus: 1}},   zmm12
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         zmm12, zmmword ptr [rax]
    vmovups         zmm13, zmmword ptr [rax+64]

{% for i in (0..4) %}
    vbroadcastss    zmm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     zmm{{i | times: 2}}, zmm12, zmm14
    vfmadd231ps     zmm{{i | times: 2 | plus: 1}}, zmm13, zmm14
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]
    lea     r12,    [ r10 + 2 * rbx ]

    {% for word in (0..1) %}
        {% for quarter in (0..3) %}
            {% for r in (0..4) %}
                vextractf32x4 xmm{{r | plus: 11}}, zmm{{r | times: 2 | plus: word}}, {{quarter}}
            {% endfor %}
            {% for row in (0..3) %}
                {% for i in (0..4) %}
                    vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 11}}, {{row}}
                    add         r{{i | plus: 8}}, rsi
                {% endfor %}
            {% endfor %}
        {% endfor %}
    {% endfor %}    

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"32x5", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_32x6.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 32 x 6:

    zmm0 zmm2 zmm4 zmm6 zmm8 zmm10
    zmm1 zmm3 zmm5 zmm7 zmm9 zmm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" size:"32x6", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
	{% include "2x6/packed_packed_loop1/avx-512.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:11 %}
{% include "f32_per_rows.tmpliq" mr:32, from:0, to:11 %}
{% include "f32_per_cols.tmpliq" mr:32, from:0, to:11 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..5) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 2}},   zmm{{i | times: 2}},   zmm12
{% endfor %}

    mov     r10,    [rdi + 8]
    imul    esi,    16
    vpbroadcastd    zmm15, esi
    vpaddd          zmm14, zmm14, zmm15

{% for i in (0..5) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 2 | plus: 1}},   zmm{{i | times: 2 | plus: 1}},   zmm12
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         zmm12, zmmword ptr [rax]
    vmovups         zmm13, zmmword ptr [rax+64]

{% for i in (0..5) %}
    vbroadcastss    zmm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     zmm{{i | times: 2}}, zmm12, zmm14
    vfmadd231ps     zmm{{i | times: 2 | plus: 1}}, zmm13, zmm14
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for word in (0..1) %}
        {% for quarter in (0..3) %}
            {% for r in (0..2) %}
                vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | times: 2 | plus: word}}, {{quarter}}
            {% endfor %}
            {% for row in (0..3) %}
                {% for i in (0..2) %}
                    vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                    add         r{{i | plus: 8}}, rsi
                {% endfor %}
            {% endfor %}
        {% endfor %}
    {% endfor %}    

    // tops of cols
    mov     r8, r11
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]

    {% for word in (0..1) %}
        {% for quarter in (0..3) %}
            {% for r in (0..2) %}
                vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | plus: 3 | times: 2 | plus: word}}, {{quarter}}
            {% endfor %}
            {% for row in (0..3) %}
                {% for i in (0..2) %}
                    vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                    add         r{{i | plus: 8}}, rsi
                {% endfor %}
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"32x6", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_48x4.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 48 x 4:

    zmm0 zmm3 zmm6 zmm9
    zmm1 zmm4 zmm7 zmm10
    zmm2 zmm5 zmm8 zmm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" size:"48x4", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
	{% include "3x4/packed_packed_loop1/avx-512.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:11 %}
{% include "f32_per_rows.tmpliq" mr:48, from:0, to:11 %}
{% include "f32_per_cols.tmpliq" mr:48, from:0, to:11 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..3) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 3}},   zmm{{i | times: 3}},   zmm12
{% endfor %}

    imul    esi,    16
    vpbroadcastd    zmm15, esi

{% for j in (1..2) %}
    mov     r10,    [rdi + 8]
    vpaddd          zmm14, zmm14, zmm15

    {% for i in (0..3) %}
        kxnorw k1,k1,k1
        vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
        add     r10, rbx
        vaddps          zmm{{i | times: 3 | plus: j}},   zmm{{i | times: 3 | plus: j}},   zmm12
    {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         zmm12, zmmword ptr [rax]
    vmovups         zmm13, zmmword ptr [rax+64]
    vmovups         zmm15, zmmword ptr [rax+128]

{% for i in (0..3) %}
    vbroadcastss    zmm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     zmm{{i | times: 3}}, zmm12, zmm14
    vfmadd231ps     zmm{{i | times: 3 | plus: 1}}, zmm13, zmm14
    vfmadd231ps     zmm{{i | times: 3 | plus: 2}}, zmm15, zmm14
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for word in (0..2) %}
        {% for quarter in (0..3) %}
            {% for r in (0..3) %}
                vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | times: 3 | plus: word}}, {{quarter}}
            {% endfor %}
            {% for row in (0..3) %}
                {% for i in (0..3) %}
                    vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                    add         r{{i | plus: 8}}, rsi
                {% endfor %}
            {% endfor %}
        {% endfor %}
    {% endfor %}    

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"48x4", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_64x3.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 64 x 3:

    zmm0 zmm4 zmm8
    zmm1 zmm5 zmm9
    zmm2 zmm6 zmm10
    zmm3 zmm7 zmm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" size:"64x3", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
	{% include "4x3/packed_packed_loop1/avx-512.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:11 %}
{% include "f32_per_rows.tmpliq" mr:64, from:0, to:11 %}
{% include "f32_per_cols.tmpliq" mr:64, from:0, to:11 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..2) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 4}},   zmm{{i | times: 4}},   zmm12
{% endfor %}

    imul    esi,    16
    vpbroadcastd    zmm15, esi

{% for j in (1..3) %}
    mov     r10,    [rdi + 8]
    vpaddd          zmm14, zmm14, zmm15

    {% for i in (0..2) %}
        kxnorw k1,k1,k1
        vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
        add     r10, rbx
        vaddps          zmm{{i | times: 4 | plus: j}},   zmm{{i | times: 4 | plus: j}},   zmm12
    {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    zmm13, dword ptr [rbx]
    vbroadcastss    zmm14, dword ptr [rbx+4]
    vbroadcastss    zmm15, dword ptr [rbx+8]

{% for i in (0..3) %}
    vmovups         zmm12, zmmword ptr [rax+{{i | times:64}}]
    vfmadd231ps     zmm{{i}}, zmm12, zmm13
    vfmadd231ps     zmm{{i | plus: 4}}, zmm12, zmm14
    vfmadd231ps     zmm{{i | plus: 8}}, zmm12, zmm15
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for word in (0..3) %}
        {% for quarter in (0..3) %}
            {% for r in (0..2) %}
                vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | times: 4 | plus: word}}, {{quarter}}
            {% endfor %}
            {% for row in (0..3) %}
                {% for i in (0..2) %}
                    vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                    add         r{{i | plus: 8}}, rsi
                {% endfor %}
            {% endfor %}
        {% endfor %}
    {% endfor %}    

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"64x3", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_f32_80x2.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 80 x 2:

    zmm0 zmm5
    zmm1 zmm6
    zmm2 zmm7
    zmm3 zmm8
    zmm4 zmm9

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" size:"80x2", suffix:suffix, G:G, arch:"avx512" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
	{% include "5x2/packed_packed_loop1/avx-512.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "f32_scalars.tmpliq" from:0, to:9 %}
{% include "f32_per_rows.tmpliq" mr:80, from:0, to:9 %}
{% include "f32_per_cols.tmpliq" mr:80, from:0, to:9 %}
{% include "avx512_mmm_load_tile.tmpliq" from:0, to:9 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0

{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm12, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm13, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15
    vperm2f128      ymm13,  ymm12, ymm13,         32 // ymm12 <- xmm12::xmm13
    vinsertf32x8    zmm14, zmm14, ymm13, 1

{% for i in (0..1) %}
    kxnorw k1,k1,k1
    vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
    add     r10, rbx
    vaddps          zmm{{i | times: 5}},   zmm{{i | times: 5}},   zmm12
{% endfor %}

    imul    esi,    16
    vpbroadcastd    zmm15, esi

{% for j in (1..4) %}
    mov     r10,    [rdi + 8]
    vpaddd          zmm14, zmm14, zmm15

    {% for i in (0..1) %}
        kxnorw k1,k1,k1
        vgatherdps      zmm12{k1},  [ r10 + zmm14 ]
        add     r10, rbx
        vaddps          zmm{{i | times: 5 | plus: j}},   zmm{{i | times: 5 | plus: j}},   zmm12
    {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    zmm14, dword ptr [rbx]
    vbroadcastss    zmm15, dword ptr [rbx+4]

{% for i in (0..4) %}
    vmovups         zmm12, zmmword ptr [rax+{{i | times:64}}]
    vfmadd231ps     zmm{{i}}, zmm12, zmm14
    vfmadd231ps     zmm{{i | plus: 5}}, zmm12, zmm15
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r11,    [ r10 + rbx ]

    {% for word in (0..4) %}
        {% for quarter in (0..3) %}
            {% for r in (0..1) %}
                vextractf32x4 xmm{{r | plus: 12}}, zmm{{r | times: 5 | plus: word}}, {{quarter}}
            {% endfor %}
            {% for row in (0..3) %}
                {% for i in (0..1) %}
                    vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | plus: 12}}, {{row}}
                    add         r{{i | plus: 8}}, rsi
                {% endfor %}
            {% endfor %}
        {% endfor %}
    {% endfor %}    

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" size:"80x2", suffix:suffix, G:G, L:L, arch:"avx512" %}


================================================
FILE: linalg/x86_64/avx512/avx512_mmm_load_tile.tmpliq
================================================
// vim: set syntax=asm :

{{L}}load_tile:
    mov          r8, [rdi + 8]
    {% for reg in (from..to) %}
        vmovups         zmm{{reg}}, zmmword ptr [r8 + {{ reg|minus:from|times:64 }}]
    {% endfor %}

    jmp    {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/avx512/dispatcher.tmpliq
================================================
// vim: set syntax=asm :

{{L}}non_linear:

{{L}}non_linear_loop_enter:
    sub     rdi,    40
{{L}}non_linear_loop:
    add     rdi,    40
    mov     rax,    [rdi]

    mov     r8, {{ jump_table | size }}
    cmp     rax, 0
    cmovl   rax, r8
    cmp     rax, {{ jump_table | size }}
    cmovg   rax, r8

{% if msvc %}
    lea     r8, [ offset {{L}}jmp_table ]
{% else %}
    lea     r8, [ rip + {{L}}jmp_table ]
{% endif %}
    movsxd  r9, dword ptr [ r8 + rax * 4 ]
    lea     r8, [ r8 + r9 ]
    jmp     r8

{{L}}jmp_table:
{% for j in jump_table %}
    {{long}}      {{L}}{{j}}-{{L}}jmp_table
{% endfor %}
    {{long}}      {{L}}unsupported-{{L}}jmp_table

{{L}}unsupported:
    mov     rax,    1
    jmp     {{L}}return


{{L}}done:
    mov     rax, 0
    jmp     {{L}}return


================================================
FILE: linalg/x86_64/avx512/f32_per_cols.tmpliq
================================================
// vim: set syntax=asm :

{% include "zmm_per_col.tmpliq" label:"per_col_min", op:"vminps", mr:mr, from:from, to:to %}
{% include "zmm_per_col.tmpliq" label:"per_col_max", op:"vmaxps", mr:mr, from:from, to:to %}
{% include "zmm_per_col.tmpliq" label:"per_col_add", op:"vaddps", mr:mr, from:from, to:to %}
{% include "zmm_per_col.tmpliq" label:"per_col_mul", op:"vmulps", mr:mr, from:from, to:to %}
{% include "zmm_per_col.tmpliq" label:"per_col_sub", op:"vsubps", from:from, to:to %}
{% include "zmm_per_col.tmpliq" label:"per_col_sub_flipped", op:"vsubps", from:from, to:to, flipped: true %}


================================================
FILE: linalg/x86_64/avx512/f32_per_rows.tmpliq
================================================
// vim: set syntax=asm :

{% include "zmm_per_row.tmpliq" label:"per_row_min", op:"vminps", mr:mr, from:from, to:to %}
{% include "zmm_per_row.tmpliq" label:"per_row_max", op:"vmaxps", mr:mr, from:from, to:to %}
{% include "zmm_per_row.tmpliq" label:"per_row_add", op:"vaddps", mr:mr, from:from, to:to %}
{% include "zmm_per_row.tmpliq" label:"per_row_mul", op:"vmulps", mr:mr, from:from, to:to %}
{% include "zmm_per_row.tmpliq" label:"per_row_sub", op:"vsubps", from:from, to:to %}
{% include "zmm_per_row.tmpliq" label:"per_row_sub_flipped", op:"vsubps", from:from, to:to, flipped: true %}


================================================
FILE: linalg/x86_64/avx512/f32_scalars.tmpliq
================================================
// vim: set syntax=asm :

{% include "zmm_scalar.tmpliq" label:"scalar_min", op:"vminps", from:from, to:to %}
{% include "zmm_scalar.tmpliq" label:"scalar_max", op:"vmaxps", from:from, to:to %}
{% include "zmm_scalar.tmpliq" label:"scalar_add", op:"vaddps", from:from, to:to %}
{% include "zmm_scalar.tmpliq" label:"scalar_mul", op:"vmulps", from:from, to:to %}
{% include "zmm_scalar.tmpliq" label:"scalar_sub", op:"vsubps", from:from, to:to %}
{% include "zmm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vsubps", from:from, to:to, flipped: true %}

{{L}}leaky_relu:
    // can only use zmm12 to zmm15
    // ymm15 <- alpha
    vbroadcastss    zmm15, dword ptr [rdi + 8]
    // ymm14 <- all zero
    vpxorq          zmm14, zmm14, zmm14

    {% for reg in (from..to) %}
        vcmpps      k1, zmm{{reg}}, zmm14, 1 // 1 means LT
        // ymm12 <- alpha * x if < 0
        vmulps      zmm{{reg}} {k1}, zmm{{reg}}, zmm15
    {% endfor %}
    // select muled of orginal

    jmp    {{L}}non_linear_loop

{{L}}q_scale:
{{L}}q_shl:
{{L}}q_shr:
    jmp {{L}}unsupported


================================================
FILE: linalg/x86_64/avx512/i32_per_cols.tmpliq
================================================
// vim: set syntax=asm :

{% include "zmm_per_col.tmpliq" label:"per_col_min", op:"vpminsd", mr:mr, from:from, to:to%}
{% include "zmm_per_col.tmpliq" label:"per_col_max", op:"vpmaxsd", mr:mr, from:from, to:to%}
{% include "zmm_per_col.tmpliq" label:"per_col_add", op:"vpaddd", mr:mr, from:from, to:to%}
{% include "zmm_per_col.tmpliq" label:"per_col_mul", op:"vpmulld", mr:mr, from:from, to:to%}
{% include "zmm_per_col.tmpliq" label:"per_col_sub", op:"vpsubd", from:from, to:to%}
{% include "zmm_per_col.tmpliq" label:"per_col_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true%}


================================================
FILE: linalg/x86_64/avx512/i32_per_rows.tmpliq
================================================
// vim: set syntax=asm :

{% include "zmm_per_row.tmpliq" label:"per_row_min", op:"vpminsd", mr:mr, from:from, to:to%}
{% include "zmm_per_row.tmpliq" label:"per_row_max", op:"vpmaxsd", mr:mr, from:from, to:to%}
{% include "zmm_per_row.tmpliq" label:"per_row_add", op:"vpaddd", mr:mr, from:from, to:to%}
{% include "zmm_per_row.tmpliq" label:"per_row_mul", op:"vpmulld", mr:mr, from:from, to:to%}
{% include "zmm_per_row.tmpliq" label:"per_row_sub", op:"vpsubd", from:from, to:to%}
{% include "zmm_per_row.tmpliq" label:"per_row_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true%}


================================================
FILE: linalg/x86_64/avx512/i32_scalars.tmpliq
================================================
// vim: set syntax=asm :
{% unless arch %}
   {% assign arch = "ymm" %}
{% endunless %}
{% include "zmm_scalar.tmpliq" label:"scalar_min", op:"vpminsd", from:from, to:to, arch:arch %}
{% include "zmm_scalar.tmpliq" label:"scalar_max", op:"vpmaxsd", from:from, to:to, arch:arch %}
{% include "zmm_scalar.tmpliq" label:"scalar_mul", op:"vpmulld", from:from, to:to, arch:arch %}
{% include "zmm_scalar.tmpliq" label:"scalar_add", op:"vpaddd", from:from, to:to, arch:arch %}
{% include "zmm_scalar.tmpliq" label:"scalar_sub", op:"vpsubd", from:from, to:to, arch:arch %}
{% include "zmm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true, arch:arch %}


================================================
FILE: linalg/x86_64/avx512/postamble.tmpliq
================================================
{{L}}return:
    ldmxcsr     [rsp + 4]
    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret

{% if msvc %}
{{arch}}_mmm_f32_{{size}}_{{suffix}} endp
_text ends
end

{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/avx512/preamble.tmpliq
================================================
{% if msvc %}

_text segment
{{arch}}_mmm_f32_{{size}}_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}{{arch}}_mmm_f32_{{size}}_{{suffix}}
{{G}}{{arch}}_mmm_f32_{{size}}_{{suffix}}:
.cfi_startproc

{% endif %}

    push        rbp
    mov         rbp, rsp

{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    push        rdi
    push        rsi

    mov         rdi, rcx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
.cfi_def_cfa_offset 64
{% endif %}
    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]

{% include "dispatcher.tmpliq" %}


================================================
FILE: linalg/x86_64/avx512/sigmoid_f32.tmpl
================================================
{% comment %}
// vim: set syntax=asm :


// TODO[TSolberg] : Not validated.

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of ZMM0-15 and ZMM0-15
    return: rax (+rdx)

{% endcomment %}

{% if msvc %}

_text segment
avx512_sigmoid_f32_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}avx512_sigmoid_f32_{{suffix}}
{{G}}avx512_sigmoid_f32_{{suffix}}:
.cfi_startproc
{% endif %}

    push        rbp
    mov         rbp, rsp


{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    // move around arguments to mimick SysV rdi,rsi passing
    push        rdi
    push        rsi
    mov         rdi, rcx
    mov         rsi, rdx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
// FIXME
// .cfi_def_cfa_offset 64
{% endif %}

    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]
// ----------------------------------------------------------------------

{%capture offset%}{% if msvc %} offset {%else%} rip + {%endif%} {%endcapture%}

    cmp     rsi, 0
    je      {{L}}done

    cmp     rsi, 32
    jl      {{L}}loop_1

{{L}}loop_4:

    vmovaps         zmm4, [rdi]
    vmovaps         zmm5, [rdi + 64]
    vmovaps         zmm6, [rdi + 128]
    vmovaps         zmm7, [rdi + 192]

    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmaxps          zmm4, zmm4, zmm0
    vmaxps          zmm5, zmm5, zmm0
    vmaxps          zmm6, zmm6, zmm0
    vmaxps          zmm7, zmm7, zmm0
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]

    vminps          zmm4, zmm4, zmm1
    vminps          zmm5, zmm5, zmm1
    vminps          zmm6, zmm6, zmm1
    vminps          zmm7, zmm7, zmm1        // zmm4..7 <- x
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]

    vmulps          zmm8, zmm4, zmm4
    vmulps          zmm9, zmm5, zmm5
    vmulps          zmm10, zmm6, zmm6
    vmulps          zmm11, zmm7, zmm7        // zmm8..11 <- x^2

    vmovaps         zmm12, zmm2
    vmovaps         zmm13, zmm2
    vmovaps         zmm14, zmm2
    vmovaps         zmm15, zmm2
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm13, zmm3, zmm9
    vfmadd132ps     zmm14, zmm3, zmm10
    vfmadd132ps     zmm15, zmm3, zmm11
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_10]
    vfmadd132ps     zmm12, zmm0, zmm8
    vfmadd132ps     zmm13, zmm0, zmm9
    vfmadd132ps     zmm14, zmm0, zmm10
    vfmadd132ps     zmm15, zmm0, zmm11
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_8]
    vfmadd132ps     zmm12, zmm1, zmm8
    vfmadd132ps     zmm13, zmm1, zmm9
    vfmadd132ps     zmm14, zmm1, zmm10
    vfmadd132ps     zmm15, zmm1, zmm11
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     zmm12, zmm2, zmm8
    vfmadd132ps     zmm13, zmm2, zmm9
    vfmadd132ps     zmm14, zmm2, zmm10
    vfmadd132ps     zmm15, zmm2, zmm11
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vmulps          zmm4, zmm4, zmm12
    vmulps          zmm5, zmm5, zmm13
    vmulps          zmm6, zmm6, zmm14
    vmulps          zmm7, zmm7, zmm15   // zmm4..7 <- num

    vmovaps         zmm12, zmm3
    vmovaps         zmm13, zmm3
    vmovaps         zmm14, zmm3
    vmovaps         zmm15, zmm3
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     zmm12, zmm0, zmm8
    vfmadd132ps     zmm13, zmm0, zmm9
    vfmadd132ps     zmm14, zmm0, zmm10
    vfmadd132ps     zmm15, zmm0, zmm11
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vfmadd132ps     zmm12, zmm1, zmm8
    vfmadd132ps     zmm13, zmm1, zmm9
    vfmadd132ps     zmm14, zmm1, zmm10
    vfmadd132ps     zmm15, zmm1, zmm11
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_half]
    vfmadd132ps     zmm12, zmm2, zmm8
    vfmadd132ps     zmm13, zmm2, zmm9
    vfmadd132ps     zmm14, zmm2, zmm10
    vfmadd132ps     zmm15, zmm2, zmm11
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm13, zmm3, zmm9
    vfmadd132ps     zmm14, zmm3, zmm10
    vfmadd132ps     zmm15, zmm3, zmm11
    vfmadd132ps     zmm12, zmm0, zmm8
    vfmadd132ps     zmm13, zmm0, zmm9
    vfmadd132ps     zmm14, zmm0, zmm10
    vfmadd132ps     zmm15, zmm0, zmm11  // zmm12..14 <- denum

    vdivps          zmm4, zmm4, zmm12
    vdivps          zmm5, zmm5, zmm13
    vdivps          zmm6, zmm6, zmm14
    vdivps          zmm7, zmm7, zmm15
    vaddps          zmm4, zmm4, zmm1
    vaddps          zmm5, zmm5, zmm1
    vaddps          zmm6, zmm6, zmm1
    vaddps          zmm7, zmm7, zmm1

    vmovaps [rdi], zmm4
    vmovaps [rdi + 64], zmm5
    vmovaps [rdi + 128], zmm6
    vmovaps [rdi + 192], zmm7

    add     rdi, 256
    sub     rsi, 32
    cmp     rsi, 32
    jg      {{L}}loop_4

    cmp     rsi, 0
    je      {{L}}done

{{L}}loop_1:
    vmovaps         zmm4, [rdi]

    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmaxps          zmm4, zmm4, zmm0
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]

    vminps          zmm4, zmm4, zmm1        // zmm4 <- x
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]

    vmulps          zmm8, zmm4, zmm4        // zmm8 <- x^2

    vmovaps         zmm12, zmm2
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     zmm12, zmm3, zmm8
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_10]
    vfmadd132ps     zmm12, zmm0, zmm8
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_8]
    vfmadd132ps     zmm12, zmm1, zmm8
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     zmm12, zmm2, zmm8
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vmulps          zmm4, zmm4, zmm12

    vmovaps         zmm12, zmm3
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     zmm12, zmm0, zmm8
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vfmadd132ps     zmm12, zmm1, zmm8
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_half]
    vfmadd132ps     zmm12, zmm2, zmm8
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm12, zmm0, zmm8

    vdivps          zmm4, zmm4, zmm12
    vaddps          zmm4, zmm4, zmm1

    vmovaps [rdi], zmm4
    add     rdi, 32
    sub     rsi, 8
    jnz     {{L}}loop_1

{{L}}done:

// ----------------------------------------------------------------------

    ldmxcsr     [rsp + 4]

    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret

{%capture float%}{% if msvc %} real4 {%else%} .float {%endif%}{%endcapture%}

{{L}}coeffs_num_low:
    {{float}} -18.0                    // low
{{L}}coeffs_num_high:
    {{float}} 18.0                     // high

{{L}}coeffs_num_alpha_9:
    {{float}} 4.37031012579801e-11     // alpha_9
{{L}}coeffs_num_alpha_7:
    {{float}} 1.15627324459942e-07     // alpha_7
{{L}}coeffs_num_alpha_5:
    {{float}} 6.08574864600143e-05     // alpha_5
{{L}}coeffs_num_alpha_3:
    {{float}} 8.51377133304701e-03     // alpha_3
{{L}}coeffs_num_alpha_1:
    {{float}} 2.48287947061529e-01     // alpha_1

{{L}}coeffs_num_beta_10:
    {{float}} 6.10247389755681e-13
{{L}}coeffs_num_beta_8:
    {{float}} 5.76102136993427e-09
{{L}}coeffs_num_beta_6:
    {{float}} 6.29106785017040e-06     // beta_6
{{L}}coeffs_num_beta_4:
    {{float}} 1.70198817374094e-03     // beta_4
{{L}}coeffs_num_beta_2:
    {{float}} 1.16817656904453e-01     // beta_2
{{L}}coeffs_num_beta_0:
    {{float}} 9.93151921023180e-01     // beta_0

{{L}}coeffs_num_half:
    {{float}} 0.5

{% if msvc %}
avx512_sigmoid_f32_{{suffix}} endp
_text ends
end
{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/avx512/tanh_f32.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

// TODO[TSolberg] : Not validated.

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of ZMM0-15 and ZMM0-15
    return: rax (+rdx)

{% endcomment %}

{% if msvc %}

_text segment
avx512_tanh_f32_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}avx512_tanh_f32_{{suffix}}
{{G}}avx512_tanh_f32_{{suffix}}:
.cfi_startproc
{% endif %}

    push        rbp
    mov         rbp, rsp


{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    // move around arguments to mimick SysV rdi,rsi passing
    push        rdi
    push        rsi
    mov         rdi, rcx
    mov         rsi, rdx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
// FIXME
// .cfi_def_cfa_offset 64
{% endif %}

    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]
// ----------------------------------------------------------------------

{%capture offset%}{% if msvc %} offset {%else%} rip + {%endif%} {%endcapture%}

    cmp     rsi, 0
    je      {{L}}done

    cmp     rsi, 32
    jl      {{L}}loop_1

{{L}}loop_4:

    vmovaps         zmm4, [rdi]
    vmovaps         zmm5, [rdi + 64]
    vmovaps         zmm6, [rdi + 128]
    vmovaps         zmm7, [rdi + 192]

    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_13]
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_11]

    vmaxps          zmm4, zmm4, zmm0
    vmaxps          zmm5, zmm5, zmm0
    vmaxps          zmm6, zmm6, zmm0
    vmaxps          zmm7, zmm7, zmm0
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]

    vminps          zmm4, zmm4, zmm1
    vminps          zmm5, zmm5, zmm1
    vminps          zmm6, zmm6, zmm1
    vminps          zmm7, zmm7, zmm1        // zmm4..7 <- x
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmulps          zmm8, zmm4, zmm4
    vmulps          zmm9, zmm5, zmm5
    vmulps          zmm10, zmm6, zmm6
    vmulps          zmm11, zmm7, zmm7        // zmm8..11 <- x^2

    vmovaps         zmm12, zmm2
    vmovaps         zmm13, zmm2
    vmovaps         zmm14, zmm2
    vmovaps         zmm15, zmm2
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm13, zmm3, zmm9
    vfmadd132ps     zmm14, zmm3, zmm10
    vfmadd132ps     zmm15, zmm3, zmm11
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]
    vfmadd132ps     zmm12, zmm0, zmm8
    vfmadd132ps     zmm13, zmm0, zmm9
    vfmadd132ps     zmm14, zmm0, zmm10
    vfmadd132ps     zmm15, zmm0, zmm11
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     zmm12, zmm1, zmm8
    vfmadd132ps     zmm13, zmm1, zmm9
    vfmadd132ps     zmm14, zmm1, zmm10
    vfmadd132ps     zmm15, zmm1, zmm11
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     zmm12, zmm2, zmm8
    vfmadd132ps     zmm13, zmm2, zmm9
    vfmadd132ps     zmm14, zmm2, zmm10
    vfmadd132ps     zmm15, zmm2, zmm11
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm13, zmm3, zmm9
    vfmadd132ps     zmm14, zmm3, zmm10
    vfmadd132ps     zmm15, zmm3, zmm11
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     zmm12, zmm0, zmm8
    vfmadd132ps     zmm13, zmm0, zmm9
    vfmadd132ps     zmm14, zmm0, zmm10
    vfmadd132ps     zmm15, zmm0, zmm11
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vmulps          zmm4, zmm4, zmm12
    vmulps          zmm5, zmm5, zmm13
    vmulps          zmm6, zmm6, zmm14
    vmulps          zmm7, zmm7, zmm15   // zmm4..7 <- num

    vmovaps         zmm12, zmm1
    vmovaps         zmm13, zmm1
    vmovaps         zmm14, zmm1
    vmovaps         zmm15, zmm1
    vfmadd132ps     zmm12, zmm2, zmm8
    vfmadd132ps     zmm13, zmm2, zmm9
    vfmadd132ps     zmm14, zmm2, zmm10
    vfmadd132ps     zmm15, zmm2, zmm11
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm13, zmm3, zmm9
    vfmadd132ps     zmm14, zmm3, zmm10
    vfmadd132ps     zmm15, zmm3, zmm11
    vfmadd132ps     zmm12, zmm0, zmm8
    vfmadd132ps     zmm13, zmm0, zmm9
    vfmadd132ps     zmm14, zmm0, zmm10
    vfmadd132ps     zmm15, zmm0, zmm11  // zmm12..14 <- denum

    vdivps          zmm4, zmm4, zmm12
    vdivps          zmm5, zmm5, zmm13
    vdivps          zmm6, zmm6, zmm14
    vdivps          zmm7, zmm7, zmm15

    vmovaps [rdi], zmm4
    vmovaps [rdi + 64], zmm5
    vmovaps [rdi + 128], zmm6
    vmovaps [rdi + 192], zmm7

    add     rdi, 256
    sub     rsi, 32
    cmp     rsi, 32
    jg      {{L}}loop_4

    cmp     rsi, 0
    je      {{L}}done

{{L}}loop_1:
    vmovaps         zmm4, [rdi]

    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_13]
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_11]

    vmaxps          zmm4, zmm4, zmm0
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]

    vminps          zmm4, zmm4, zmm1        // zmm4 <- x
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmulps          zmm8, zmm4, zmm4        // zmm8 <- x^2

    vmovaps         zmm12, zmm2
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]
    vfmadd132ps     zmm12, zmm3, zmm8
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]
    vfmadd132ps     zmm12, zmm0, zmm8
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     zmm12, zmm1, zmm8
    vbroadcastss    zmm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     zmm12, zmm2, zmm8
    vbroadcastss    zmm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vfmadd132ps     zmm12, zmm3, zmm8
    vbroadcastss    zmm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     zmm12, zmm0, zmm8
    vbroadcastss    zmm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vmulps          zmm4, zmm4, zmm12

    vmovaps         zmm12, zmm1
    vfmadd132ps     zmm12, zmm2, zmm8
    vfmadd132ps     zmm12, zmm3, zmm8
    vfmadd132ps     zmm12, zmm0, zmm8

    vdivps          zmm4, zmm4, zmm12

    vmovaps [rdi], zmm4
    add     rdi, 32
    sub     rsi, 8
    jnz     {{L}}loop_1

{{L}}done:

// ----------------------------------------------------------------------

    ldmxcsr     [rsp + 4]

    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret

{%capture float%}{% if msvc %} real4 {%else%} .float {%endif%}{%endcapture%}

{{L}}coeffs_num_low:
    {{float}} -9.0                     // low
{{L}}coeffs_num_high:
    {{float}} 9.0                      // high

{{L}}coeffs_num_alpha_13:
    {{float}} -2.76076847742355e-16    // alpha_13
{{L}}coeffs_num_alpha_11:
    {{float}} 2.00018790482477e-13     // alpha_11
{{L}}coeffs_num_alpha_9:
    {{float}} -8.60467152213735e-11    // alpha_9
{{L}}coeffs_num_alpha_7:
    {{float}} 5.12229709037114e-08     // alpha_7
{{L}}coeffs_num_alpha_5:
    {{float}} 1.48572235717979e-05     // alpha_5
{{L}}coeffs_num_alpha_3:
    {{float}} 6.37261928875436e-04     // alpha_3
{{L}}coeffs_num_alpha_1:
    {{float}} 4.89352455891786e-03     // alpha_1

{{L}}coeffs_num_beta_6:
    {{float}} 1.19825839466702e-06     // beta_6
{{L}}coeffs_num_beta_4:
    {{float}} 1.18534705686654e-04     // beta_4
{{L}}coeffs_num_beta_2:
    {{float}} 2.26843463243900e-03     // beta_2
{{L}}coeffs_num_beta_0:
    {{float}} 4.89352518554385e-03     // beta_0

{% if msvc %}
avx512_tanh_f32_{{suffix}} endp
_text ends
end
{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/avx512/zmm_per_col.tmpliq
================================================
// vim: set syntax=asm :

{{L}}{{label}}:
    mov             rax, [ rdi + 8 ]

{% capture mr_over_16 %}{{ mr | divided_by: 16}}{%endcapture%}
{% capture mr_over_16_min_1 %}{{ mr | divided_by: 16 | minus: 1}}{%endcapture%}

{%capture tmp%}{{to | plus: 1 }}{%endcapture%}

{%capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_16}}{%endcapture%}
{%capture cols_min_1%}{{to | plus: 1| minus:from| divided_by:mr_over_16|minus:1}}{%endcapture%}
// {{to|minus:from|plus:1}} cols:{{cols}}

{% for right in (0..cols_min_1) %}
    vbroadcastss    zmm{{tmp}}, dword ptr [ rax ]
    add             rax, 4

    {% for down in (0..mr_over_16_min_1) %}
        {%capture acc%}{{mr_over_16|times:right|plus:from|plus:down}}{%endcapture%}
        {% if flipped %}
            {{op}} zmm{{acc}}, zmm{{acc}}, zmm{{tmp}}
        {% else %}
            {{op}} zmm{{acc}}, zmm{{tmp}}, zmm{{acc}}
        {% endif %}
    {% endfor %}
{% endfor %}

    jmp {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/avx512/zmm_per_row.tmpliq
================================================
// vim: set syntax=asm :

{{L}}{{label}}:
    mov             rax, [ rdi + 8 ]

{% capture mr_over_16 %}{{ mr | divided_by: 16}}{%endcapture%}
{% capture mr_over_16_min_1 %}{{ mr | divided_by: 16 | minus: 1}}{%endcapture%}

{% for ix in (0..mr_over_16_min_1) %}
    vmovups         zmm{{to | plus: 1 | plus: ix}},  [rax + {{ix | times: 64}}]
{% endfor %}

{% if flipped %}
    {% for acc in (from..to) %}
        {{op}} zmm{{acc}}, zmm{{acc}}, zmm{{ acc | modulo: mr_over_16 | plus: to | plus: 1 }}
    {% endfor %}
{% else %}
    {% for acc in (from..to) %}
        {{op}} zmm{{acc}}, zmm{{ acc | modulo: mr_over_16 | plus: to | plus: 1 }}, zmm{{acc}}
    {% endfor %}
{% endif %}

    jmp {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/avx512/zmm_scalar.tmpliq
================================================
// vim: set syntax=asm :

{{L}}{{label}}:
    vbroadcastss    zmm12, dword ptr [rdi + 8]
    {% if flipped %}
        {% for reg in (from..to) %}
            {{op}}          zmm{{reg}}, zmm{{reg}}, zmm12
        {% endfor %}
    {% else %}
        {% for reg in (from..to) %}
            {{op}}          zmm{{reg}}, zmm12, zmm{{reg}}
        {% endfor %}
    {% endif %}

    jmp    {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/fma/10x1/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Accumulators: 0-7
	// Columns: 14-15
	// Rows: 8-13

	vbroadcastss    ymm15,  dword ptr [rcx]

	vmovaps         ymm10, [rax + 0]
	vmovaps         ymm11, [rax + 32]
    vmovaps         ymm12, [rax + 64]
	vmovaps         ymm13, [rax + 96]
    vmovaps         ymm14, [rax + 128]

    vfmadd231ps     ymm0, ymm10, ymm15
    vfmadd231ps     ymm1, ymm11, ymm15
    vfmadd231ps     ymm2, ymm12, ymm15
    vfmadd231ps     ymm3, ymm13, ymm15
    vfmadd231ps     ymm4, ymm14, ymm15

	vmovaps         ymm10, [rax + 160]
    vmovaps         ymm11, [rax + 192]
	vmovaps         ymm12, [rax + 224]
	vmovaps         ymm13, [rax + 256]
	vmovaps         ymm14, [rax + 288]

    vfmadd231ps     ymm5, ymm10, ymm15
    vfmadd231ps     ymm6, ymm11, ymm15
    vfmadd231ps     ymm7, ymm12, ymm15
    vfmadd231ps     ymm8, ymm13, ymm15
    vfmadd231ps     ymm9, ymm14, ymm15

	vbroadcastss    ymm15,  dword ptr [rcx + 4]

	vmovaps         ymm10, [rax + 320]
	vmovaps         ymm11, [rax + 352]
    vmovaps         ymm12, [rax + 384]
	vmovaps         ymm13, [rax + 416]
    vmovaps         ymm14, [rax + 448]

	vfmadd231ps     ymm0, ymm10, ymm15
    vfmadd231ps     ymm1, ymm11, ymm15
    vfmadd231ps     ymm2, ymm12, ymm15
    vfmadd231ps     ymm3, ymm13, ymm15
    vfmadd231ps     ymm4, ymm14, ymm15

	vmovaps         ymm10, [rax + 480]
    vmovaps         ymm11, [rax + 512]
	vmovaps         ymm12, [rax + 544]
	vmovaps         ymm13, [rax + 576]
	vmovaps         ymm14, [rax + 608]

    vfmadd231ps     ymm5, ymm10, ymm15
    vfmadd231ps     ymm6, ymm11, ymm15
    vfmadd231ps     ymm7, ymm12, ymm15
    vfmadd231ps     ymm8, ymm13, ymm15
    vfmadd231ps     ymm9, ymm14, ymm15

    add rcx, 8
	add rax, 640


================================================
FILE: linalg/x86_64/fma/10x1/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 10x1
	// Accumulators: 0-9
	// Col regs: 10-14
	// Row regs: 15

	vbroadcastss    ymm15,  dword ptr [rcx]

	vmovaps         ymm10, [rax + 0]
	vmovaps         ymm11, [rax + 32]
    vmovaps         ymm12, [rax + 64]
	vmovaps         ymm13, [rax + 96]
    vmovaps         ymm14, [rax + 128]

    vfmadd231ps     ymm0, ymm10, ymm15
    vfmadd231ps     ymm1, ymm11, ymm15
    vfmadd231ps     ymm2, ymm12, ymm15
    vfmadd231ps     ymm3, ymm13, ymm15
    vfmadd231ps     ymm4, ymm14, ymm15

	vmovaps         ymm10, [rax + 160]
    vmovaps         ymm11, [rax + 192]
	vmovaps         ymm12, [rax + 224]
	vmovaps         ymm13, [rax + 256]
	vmovaps         ymm14, [rax + 288]

    vfmadd231ps     ymm5, ymm10, ymm15
    vfmadd231ps     ymm6, ymm11, ymm15
    vfmadd231ps     ymm7, ymm12, ymm15
    vfmadd231ps     ymm8, ymm13, ymm15
    vfmadd231ps     ymm9, ymm14, ymm15

    add rcx, 4
	add rax, 320


================================================
FILE: linalg/x86_64/fma/2x5/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Accumulators: 0-9
	// Columns: 14-15
	// Rows: 10-13
    vbroadcastss    ymm10,  dword ptr [rcx]
    vbroadcastss    ymm11,  dword ptr [rcx + 4]
    vbroadcastss    ymm12,  dword ptr [rcx + 8]
    vbroadcastss    ymm13,  dword ptr [rcx + 12]

    vmovaps         ymm14,  [rax]
    vmovaps         ymm15,  [rax + 32]

    vfmadd231ps     ymm0,   ymm14, ymm10
    vfmadd231ps     ymm1,   ymm15, ymm10

    vfmadd231ps     ymm2,   ymm14, ymm11
    vfmadd231ps     ymm3,   ymm15, ymm11

    vbroadcastss    ymm11,  dword ptr [rcx + 16]

    vfmadd231ps     ymm4,   ymm14, ymm12
    vfmadd231ps     ymm5,   ymm15, ymm12

    vfmadd231ps     ymm6,   ymm14, ymm13
    vfmadd231ps     ymm7,   ymm15, ymm13

    vfmadd231ps     ymm8,   ymm14, ymm11
    vfmadd231ps     ymm9,   ymm15, ymm11

    vbroadcastss    ymm10,  dword ptr [rcx + 20]
    vbroadcastss    ymm11,  dword ptr [rcx + 24]
    vbroadcastss    ymm12,  dword ptr [rcx + 28]
    vbroadcastss    ymm13,  dword ptr [rcx + 32]

    vmovaps         ymm14,  [rax + 64]
    vmovaps         ymm15,  [rax + 96]

    vfmadd231ps     ymm0,   ymm14, ymm10
    vfmadd231ps     ymm1,   ymm15, ymm10

    vfmadd231ps     ymm2,   ymm14, ymm11
    vfmadd231ps     ymm3,   ymm15, ymm11

    vbroadcastss    ymm11,  dword ptr [rcx + 36]

    vfmadd231ps     ymm4,   ymm14, ymm12
    vfmadd231ps     ymm5,   ymm15, ymm12

    vfmadd231ps     ymm6,   ymm14, ymm13
    vfmadd231ps     ymm7,   ymm15, ymm13

    vfmadd231ps     ymm8,   ymm14, ymm11
    vfmadd231ps     ymm9,   ymm15, ymm11


================================================
FILE: linalg/x86_64/fma/2x5/packed_packed_loop1/avx.tmpli
================================================
	// Accumulators: 0-9
	// Columns: 14-15
	// Rows: 10-13
    vbroadcastss    ymm10,  dword ptr [rcx]
    vbroadcastss    ymm11,  dword ptr [rcx + 4]
    vbroadcastss    ymm12,  dword ptr [rcx + 8]
    vbroadcastss    ymm13,  dword ptr [rcx + 12]

    vmovaps         ymm14,  [rax]
    vmovaps         ymm15,  [rax + 32]

    vfmadd231ps     ymm0,   ymm14, ymm10
    vfmadd231ps     ymm1,   ymm15, ymm10

    vfmadd231ps     ymm2,   ymm14, ymm11
    vfmadd231ps     ymm3,   ymm15, ymm11

	// Use register 11 as it's "middle" use, leading to a decent
	// trade-off between required use next iteration and when it has
	// to be used this iteration.
    vbroadcastss    ymm11,  dword ptr [rcx + 16]

    vfmadd231ps     ymm4,   ymm14, ymm12
    vfmadd231ps     ymm5,   ymm15, ymm12

    vfmadd231ps     ymm6,   ymm14, ymm13
    vfmadd231ps     ymm7,   ymm15, ymm13

    vfmadd231ps     ymm8,   ymm14, ymm11
    vfmadd231ps     ymm9,   ymm15, ymm11


================================================
FILE: linalg/x86_64/fma/2x6/packed_packed_loop1/original-unroll.tmpli
================================================
	// Tile size: 2x6
	// Accumulators: 0-11
	// Col regs: ymm14-15
	// Row regs: ymm12-13

	vbroadcastss	ymm14,	dword ptr [rcx]
	vmovaps			ymm12,	[rax]
	vmovaps			ymm13,	[rax + 32]
	vbroadcastss	ymm15,	dword ptr [rcx + 4]

	vfmadd231ps		ymm0,	ymm12, ymm14
	vfmadd231ps		ymm1,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx + 8]

	vfmadd231ps		ymm2,	ymm12, ymm15
	vfmadd231ps		ymm3,	ymm13, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 12]

	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm5,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx + 16]

	vfmadd231ps		ymm6,	ymm12, ymm15
	vfmadd231ps		ymm7,	ymm13, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 20]

	vfmadd231ps		ymm8,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx+24]

	vfmadd231ps		ymm10,	 ymm12, ymm15
	vfmadd231ps		ymm11,	 ymm13, ymm15

	// Iteration two
	vmovaps			ymm12,	[rax + 64]
	vmovaps			ymm13,	[rax + 96]
	vbroadcastss	ymm15,	dword ptr [rcx + 24 + 4]

	vfmadd231ps		ymm0,	ymm12, ymm14
	vfmadd231ps		ymm1,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx + 24 + 8]

	vfmadd231ps		ymm2,	ymm12, ymm15
	vfmadd231ps		ymm3,	ymm13, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 24 + 12]

	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm5,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx + 24 + 16]

	vfmadd231ps		ymm6,	ymm12, ymm15
	vfmadd231ps		ymm7,	ymm13, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 24 + 20]

	vfmadd231ps		ymm8,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm13, ymm14

	vfmadd231ps		ymm10,	 ymm12, ymm15
	vfmadd231ps		ymm11,	 ymm13, ymm15

	add rax, 128
	add rcx, 48


================================================
FILE: linalg/x86_64/fma/2x6/packed_packed_loop1/original.tmpli
================================================
	// Tile size: 2x6
	// Accumulators: 0-11
	// Col regs: ymm14-15
	// Row regs: ymm12-13

	// Load ordered by earliest use for first 2x2 block
	vbroadcastss	ymm14,	dword ptr [rcx]
	vmovaps			ymm12,	[rax]
	vmovaps			ymm13,	[rax + 32]
	vbroadcastss	ymm15,	dword ptr [rcx + 4]

	vfmadd231ps		ymm0,	ymm12, ymm14
	vfmadd231ps		ymm1,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx + 8]

	vfmadd231ps		ymm2,	ymm12, ymm15
	vfmadd231ps		ymm3,	ymm13, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 12]

	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm5,	ymm13, ymm14

	vbroadcastss	ymm14,	dword ptr [rcx + 16]

	vfmadd231ps		ymm6,	ymm12, ymm15
	vfmadd231ps		ymm7,	ymm13, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 20]

	vfmadd231ps		ymm8,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm13, ymm14

	vfmadd231ps		ymm10,	 ymm12, ymm15
	vfmadd231ps		ymm11,	 ymm13, ymm15

	add rax, 64
	add rcx, 24


================================================
FILE: linalg/x86_64/fma/3x4/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Tile size: 3x4
	// Accumulators: 0-11
	// Col regs: ymm12-14
	// Row regs: ymm15

	vmovaps			ymm12,	[rax]
	vmovaps			ymm13,	[rax+32]
	vmovaps			ymm14,	[rax+64]

	vbroadcastss	ymm15,	dword ptr [rcx + 0]

	vfmadd231ps		ymm0,	ymm12, ymm15
	vfmadd231ps		ymm1,	ymm13, ymm15
	vfmadd231ps		ymm2,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 4]

	vfmadd231ps		ymm3,	ymm12, ymm15
	vfmadd231ps		ymm4,	ymm13, ymm15
	vfmadd231ps		ymm5,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 8]

	vfmadd231ps		ymm6,	ymm12, ymm15
	vfmadd231ps		ymm7,	ymm13, ymm15
	vfmadd231ps		ymm8,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 12]

	vfmadd231ps		ymm9,	ymm12, ymm15
	vfmadd231ps		ymm10,	 ymm13, ymm15
	vfmadd231ps		ymm11,	 ymm14, ymm15

	vmovaps			ymm12,	[rax + 96]
	vmovaps			ymm13,	[rax + 128]
	vmovaps			ymm14,	[rax + 160]

	vbroadcastss	ymm15,	dword ptr [rcx + 16]

	vfmadd231ps		ymm0,	ymm12, ymm15
	vfmadd231ps		ymm1,	ymm13, ymm15
	vfmadd231ps		ymm2,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 20]

	vfmadd231ps		ymm3,	ymm12, ymm15
	vfmadd231ps		ymm4,	ymm13, ymm15
	vfmadd231ps		ymm5,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 24]

	vfmadd231ps		ymm6,	ymm12, ymm15
	vfmadd231ps		ymm7,	ymm13, ymm15
	vfmadd231ps		ymm8,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 28]

	vfmadd231ps		ymm9,	ymm12, ymm15
	vfmadd231ps		ymm10,	 ymm13, ymm15
	vfmadd231ps		ymm11,	 ymm14, ymm15


================================================
FILE: linalg/x86_64/fma/3x4/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 3x4
	// Accumulators: 0-11
	// Col regs: ymm12-14
	// Row regs: ymm15

	vmovaps			ymm12,	[rax]
	vmovaps			ymm13,	[rax+32]
	vmovaps			ymm14,	[rax+64]

	vbroadcastss	ymm15,	dword ptr [rcx + 0]

	vfmadd231ps		ymm0,	ymm12, ymm15
	vfmadd231ps		ymm1,	ymm13, ymm15
	vfmadd231ps		ymm2,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 4]

	vfmadd231ps		ymm3,	ymm12, ymm15
	vfmadd231ps		ymm4,	ymm13, ymm15
	vfmadd231ps		ymm5,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 8]

	vfmadd231ps		ymm6,	ymm12, ymm15
	vfmadd231ps		ymm7,	ymm13, ymm15
	vfmadd231ps		ymm8,	ymm14, ymm15

	vbroadcastss	ymm15,	dword ptr [rcx + 12]

	vfmadd231ps		ymm9,	ymm12, ymm15
	vfmadd231ps		ymm10,	 ymm13, ymm15
	vfmadd231ps		ymm11,	 ymm14, ymm15


================================================
FILE: linalg/x86_64/fma/4x3/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Tile size: 4x3
	// Accumulators: 0-11
	// Col regs: ymm12
	// Row regs: ymm13-15

	// Load col of A
	vmovaps			ymm12,	[rax]

	// Fill 3 cols of B
	vbroadcastss	ymm13,	dword ptr [rcx + 0]
	vbroadcastss	ymm14,	dword ptr [rcx + 4]
	vbroadcastss	ymm15,	dword ptr [rcx + 8]

	// N.B. Stepping cols in inner loop
	vfmadd231ps		ymm0,	ymm12, ymm13
	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm8,	ymm12, ymm15

	vmovaps			ymm12,	[rax+32]

	vfmadd231ps		ymm1,	ymm12, ymm13
	vfmadd231ps		ymm5,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm12, ymm15

	vmovaps			ymm12,	[rax+64]

	vfmadd231ps		ymm2,	ymm12, ymm13
	vfmadd231ps		ymm6,	ymm12, ymm14
	vfmadd231ps		ymm10,	 ymm12, ymm15

	vmovaps			ymm12,	[rax+96]

	vfmadd231ps		ymm3,	ymm12, ymm13
	vfmadd231ps		ymm7,	ymm12, ymm14
	vfmadd231ps		ymm11,	ymm12, ymm15

	// Load col of A, switching col!
	vmovaps			ymm13,	[rax + 128]

	// Fill 3 cols of B
	vbroadcastss	ymm14,	dword ptr [rcx + 12]
	vbroadcastss	ymm15,	dword ptr [rcx + 16]
	vbroadcastss	ymm12,	dword ptr [rcx + 20]

	// N.B. Stepping cols in inner loop
	vfmadd231ps		ymm0,	ymm13, ymm14
	vfmadd231ps		ymm4,	ymm13, ymm15
	vfmadd231ps		ymm8,	ymm13, ymm12

	vmovaps			ymm13,	[rax + 160]

	vfmadd231ps		ymm1,	ymm13, ymm14
	vfmadd231ps		ymm5,	ymm13, ymm15
	vfmadd231ps		ymm9,	ymm13, ymm12

	vmovaps			ymm13,	[rax + 192]

	vfmadd231ps		ymm2,	ymm13, ymm14
	vfmadd231ps		ymm6,	ymm13, ymm15
	vfmadd231ps		ymm10,	 ymm13, ymm12

	vmovaps			ymm13,	[rax + 224]

	vfmadd231ps		ymm3,	ymm13, ymm14
	vfmadd231ps		ymm7,	ymm13, ymm15
	vfmadd231ps		ymm11,	ymm13, ymm12

    add             rcx,    24
    add             rax,    256


================================================
FILE: linalg/x86_64/fma/4x3/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 4x3
	// Accumulators: 0-11
	// Col regs: ymm12
	// Row regs: ymm13-15

	// Load col of A
	vmovaps			ymm12,	[rax]

	// Fill 3 cols of B
	vbroadcastss	ymm13,	dword ptr [rcx + 0]
	vbroadcastss	ymm14,	dword ptr [rcx + 4]
	vbroadcastss	ymm15,	dword ptr [rcx + 8]

	// N.B. Stepping cols in inner loop
	vfmadd231ps		ymm0,	ymm12, ymm13
	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm8,	ymm12, ymm15

	vmovaps			ymm12,	[rax+32]

	vfmadd231ps		ymm1,	ymm12, ymm13
	vfmadd231ps		ymm5,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm12, ymm15

	vmovaps			ymm12,	[rax+64]

	vfmadd231ps		ymm2,	ymm12, ymm13
	vfmadd231ps		ymm6,	ymm12, ymm14
	vfmadd231ps		ymm10,	 ymm12, ymm15

	vmovaps			ymm12,	[rax+96]

	vfmadd231ps		ymm3,	ymm12, ymm13
	vfmadd231ps		ymm7,	ymm12, ymm14
	vfmadd231ps		ymm11,	ymm12, ymm15

    add             rcx,    12
    add             rax,    128


================================================
FILE: linalg/x86_64/fma/5x2/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Tile size: 5x2
	// Accumulators: 0-9
	// Col regs: ymm10-13
	// Row regs: ymm14-15

	vmovaps			ymm10,	[rax]
	vbroadcastss	ymm14,	dword ptr [rcx + 0]
	vbroadcastss	ymm15,	dword ptr [rcx + 4]
	vmovaps			ymm11,	[rax + 32]

	// NB stepping column-wise
	vfmadd231ps		ymm0,	ymm10, ymm14
	vfmadd231ps		ymm5,	ymm10, ymm15

	vmovaps			ymm12,	[rax + 64]

	vfmadd231ps		ymm1,	ymm11, ymm14
	vfmadd231ps		ymm6,	ymm11, ymm15

	vmovaps			ymm13,	[rax + 96]

	vfmadd231ps		ymm2,	ymm12, ymm14
	vfmadd231ps		ymm7,	ymm12, ymm15

	vmovaps			ymm10,	[rax + 128]

	vfmadd231ps		ymm3,	ymm13, ymm14
	vfmadd231ps		ymm8,	ymm13, ymm15

	vmovaps			ymm11,	[rax + 160]

	vfmadd231ps		ymm4,	ymm10, ymm14
	vfmadd231ps		ymm9,	ymm10, ymm15

	vbroadcastss	ymm14,	dword ptr [rcx + 8]
	vbroadcastss	ymm15,	dword ptr [rcx + 12]

	vmovaps			ymm12,	[rax + 192]

	// NB stepping column-wise
	vfmadd231ps		ymm0,	ymm11, ymm14
	vfmadd231ps		ymm5,	ymm11, ymm15

	vmovaps			ymm13,	[rax + 224]

	vfmadd231ps		ymm1,	ymm12, ymm14
	vfmadd231ps		ymm6,	ymm12, ymm15

	vmovaps			ymm10,	[rax + 256]

	vfmadd231ps		ymm2,	ymm13, ymm14
	vfmadd231ps		ymm7,	ymm13, ymm15

	vmovaps			ymm11,	[rax + 288]

	vfmadd231ps		ymm3,	ymm10, ymm14
	vfmadd231ps		ymm8,	ymm10, ymm15

	vfmadd231ps		ymm4,	ymm11, ymm14
	vfmadd231ps		ymm9,	ymm11, ymm15

	add rax, 320
	add rcx, 16


================================================
FILE: linalg/x86_64/fma/5x2/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 5x2
	// Accumulators: 0-9
	// Col regs: ymm10-13
	// Row regs: ymm14-15

	vmovaps			ymm10,	[rax]
	vbroadcastss	ymm14,	dword ptr [rcx + 0]
	vbroadcastss	ymm15,	dword ptr [rcx + 4]
	vmovaps			ymm11,	[rax + 32]

	// NB stepping column-wise
	vfmadd231ps		ymm0,	ymm10, ymm14
	vfmadd231ps		ymm5,	ymm10, ymm15

	vmovaps			ymm12,	[rax + 64]

	vfmadd231ps		ymm1,	ymm11, ymm14
	vfmadd231ps		ymm6,	ymm11, ymm15

	vmovaps			ymm13,	[rax + 96]

	vfmadd231ps		ymm2,	ymm12, ymm14
	vfmadd231ps		ymm7,	ymm12, ymm15

	vmovaps			ymm11,	[rax + 128]

	vfmadd231ps		ymm3,	ymm13, ymm14
	vfmadd231ps		ymm8,	ymm13, ymm15

	vfmadd231ps		ymm4,	ymm11, ymm14
	vfmadd231ps		ymm9,	ymm11, ymm15

	add rax, 160
	add rcx, 8


================================================
FILE: linalg/x86_64/fma/6x1/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15


    vbroadcastss    ymm15,  dword ptr [rcx]
    vfmadd231ps     ymm0, ymm15, [rax]
    vfmadd231ps     ymm1, ymm15, [rax + 32]
    vfmadd231ps     ymm2, ymm15, [rax + 64]
    vfmadd231ps     ymm3, ymm15, [rax + 96]
    vfmadd231ps     ymm4, ymm15, [rax + 128]
    vfmadd231ps     ymm5, ymm15, [rax + 160]

    vbroadcastss    ymm14,  dword ptr [rcx + 4]

    vfmadd231ps     ymm0, ymm14, [rax + 192]
    vfmadd231ps     ymm1, ymm14, [rax + 224]
    vfmadd231ps     ymm2, ymm14, [rax + 256]
    vfmadd231ps     ymm3, ymm14, [rax + 288]
    vfmadd231ps     ymm4, ymm14, [rax + 320]
    vfmadd231ps     ymm5, ymm14, [rax + 352]

	add rax, 384
    add rcx, 8


================================================
FILE: linalg/x86_64/fma/6x1/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15

    vbroadcastss    ymm15,  dword ptr [rcx]

	vmovups     ymm10, [rax]
	vmulps     ymm10, ymm10, ymm15
	vaddps     ymm0, ymm0, ymm10
    vmovups     ymm11, [rax + 32]
	vmulps     ymm11, ymm11, ymm15
	vaddps     ymm1, ymm1, ymm11
    vmovups     ymm12, [rax + 64]
	vmulps     ymm12, ymm12, ymm15
	vaddps     ymm2, ymm2, ymm12
    vmovups     ymm13, [rax + 96]
	vmulps     ymm13, ymm13, ymm15
	vaddps     ymm3, ymm3, ymm13
    vmovups     ymm14, [rax + 128]
	vmulps     ymm14, ymm14, ymm15
	vaddps     ymm4, ymm4, ymm14
    vmovups     ymm15, [rax + 160]
	vmulps     ymm15, ymm15, ymm15
	vaddps     ymm5, ymm5, ymm15


    add rcx, 4
	add rax, 192


================================================
FILE: linalg/x86_64/fma/6x2/packed_packed_loop1/avx-unroll.tmpli
================================================
    // Tile size: 6x2
	// Accumulators: 0-9
	// Col regs: ymm10-13
	// Row regs: ymm14-15

	vmovaps         ymm12,  [rax]
	vbroadcastss    ymm14,  dword ptr [rcx + 0]
    vbroadcastss    ymm15,  dword ptr [rcx + 4]
	vmovaps         ymm13,  [rax + 32]

    vfmadd231ps     ymm0,   ymm12, ymm14
    vfmadd231ps     ymm6,   ymm12, ymm15

	vmovaps         ymm12,  [rax + 64]

    vfmadd231ps     ymm1,   ymm13, ymm14
    vfmadd231ps     ymm7,   ymm13, ymm15

	vmovaps         ymm13,  [rax + 96]

    vfmadd231ps     ymm2,   ymm12, ymm14
    vfmadd231ps     ymm8,   ymm12, ymm15

	vmovaps         ymm12,  [rax + 128]

	vfmadd231ps     ymm3,   ymm13, ymm14
    vfmadd231ps     ymm9,   ymm13, ymm15

	vmovaps         ymm13,  [rax + 160]

	vfmadd231ps     ymm4,   ymm12, ymm14
    vfmadd231ps     ymm10,  ymm12, ymm15

	vmovaps         ymm12,  [rax + 192]
	vbroadcastss    ymm14,  dword ptr [rcx + 8]

	vfmadd231ps     ymm5,   ymm13, ymm14
    vfmadd231ps     ymm11, 	ymm13, ymm15

    vbroadcastss    ymm15,  dword ptr [rcx + 12]
	vmovaps         ymm13,  [rax + 224]

    vfmadd231ps     ymm0,   ymm12, ymm14
    vfmadd231ps     ymm6,   ymm12, ymm15

	vmovaps         ymm12,  [rax + 256]

    vfmadd231ps     ymm1,   ymm13, ymm14
    vfmadd231ps     ymm7,   ymm13, ymm15

	vmovaps         ymm13,  [rax + 288]

    vfmadd231ps     ymm2,   ymm12, ymm14
    vfmadd231ps     ymm8,   ymm12, ymm15

	vmovaps         ymm12,  [rax + 320]

	vfmadd231ps     ymm3,   ymm13, ymm14
    vfmadd231ps     ymm9,   ymm13, ymm15

	vmovaps         ymm13,  [rax + 352]

	vfmadd231ps     ymm4,   ymm12, ymm14
    vfmadd231ps     ymm10,  ymm12, ymm15

	vfmadd231ps     ymm5,   ymm13, ymm14
    vfmadd231ps     ymm11, 	ymm13, ymm15

	add rax, 384
	add rcx, 16


================================================
FILE: linalg/x86_64/fma/6x2/packed_packed_loop1/avx.tmpli
================================================
    // Tile size: 6x2
	// Accumulators: 0-11
	// Col regs: 12-13
	// Row regs: 14-15

	vmovaps         ymm12,  [rax]
	vbroadcastss    ymm14,  dword ptr [rcx + 0]
    vbroadcastss    ymm15,  dword ptr [rcx + 4]
	vmovaps         ymm13,  [rax + 32]

    vfmadd231ps     ymm0,   ymm12, ymm14
    vfmadd231ps     ymm6,   ymm12, ymm15

	vmovaps         ymm12,  [rax + 64]

    vfmadd231ps     ymm1,   ymm13, ymm14
    vfmadd231ps     ymm7,   ymm13, ymm15

	vmovaps         ymm13,  [rax + 96]

    vfmadd231ps     ymm2,   ymm12, ymm14
    vfmadd231ps     ymm8,   ymm12, ymm15

	vmovaps         ymm12,  [rax + 128]

	vfmadd231ps     ymm3,   ymm13, ymm14
    vfmadd231ps     ymm9,   ymm13, ymm15

	vmovaps         ymm13,  [rax + 160]

	vfmadd231ps     ymm4,   ymm12, ymm14
    vfmadd231ps     ymm10,  ymm12, ymm15

	vfmadd231ps     ymm5,   ymm13, ymm14
    vfmadd231ps     ymm11, 	ymm13, ymm15

	add rcx, 8
	add rax, 192


================================================
FILE: linalg/x86_64/fma/7x1/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15
    vbroadcastss    ymm15,  dword ptr [rcx]

    vmovaps         ymm6, [rax + 0]
	vmovaps         ymm7, [rax + 32]
	vmovaps         ymm8, [rax + 64]
	vmovaps         ymm9, [rax + 96]

    vfmadd231ps     ymm0, ymm6, ymm15
    vmovaps         ymm10, [rax + 128]

	vfmadd231ps     ymm1, ymm7, ymm15
	vmovaps         ymm11, [rax + 160]
    vfmadd231ps     ymm2, ymm8, ymm15
	vbroadcastss    ymm14,  dword ptr [rcx+4]
    vfmadd231ps     ymm3, ymm9, ymm15
    vmovaps         ymm12, [rax + 192]
    vfmadd231ps     ymm4, ymm10, ymm15
	vmovaps         ymm13, [rax + 224]
    vfmadd231ps     ymm5, ymm11, ymm15

	vmovaps         ymm6, [rax + 256]
    vfmadd231ps     ymm0, ymm12, ymm14
	vmovaps         ymm7, [rax + 288]
    vfmadd231ps     ymm1, ymm13, ymm14

    vmovaps         ymm8, [rax + 128]
    vfmadd231ps     ymm2, ymm6, ymm14

	vmovaps         ymm9, [rax + 160]
    vfmadd231ps     ymm3, ymm7, ymm14

    vfmadd231ps     ymm4, ymm8, ymm14
    vfmadd231ps     ymm5, ymm9, ymm14


================================================
FILE: linalg/x86_64/fma/7x1/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 6x1
	// Accumulators: 0-5
	// Col regs: 6-11
	// Row regs: 15
    vbroadcastss    ymm15,  dword ptr [rcx]

    vmovaps         ymm6, [rax + 0]
	vmovaps         ymm7, [rax + 32]
	vmovaps         ymm8, [rax + 64]
	vmovaps         ymm9, [rax + 96]

    vfmadd231ps     ymm0, ymm6, ymm15
    vfmadd231ps     ymm1, ymm7, ymm15

    vmovaps         ymm10, [rax + 128]
    vfmadd231ps     ymm2, ymm8, ymm15

	vmovaps         ymm11, [rax + 160]
    vfmadd231ps     ymm3, ymm9, ymm15

    vfmadd231ps     ymm4, ymm10, ymm15
    vfmadd231ps     ymm5, ymm11, ymm15


================================================
FILE: linalg/x86_64/fma/8x1/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Accumulators: 0-7
	// Columns: 14-15
	// Rows: 8-13


    vbroadcastss    ymm15,  dword ptr [rcx]
    vbroadcastss    ymm14,  dword ptr [rcx + 4]

    vmovaps     ymm8, [rax]
    vmovaps     ymm9, [rax + 32]
    vmovaps     ymm10, [rax + 64]
    vmovaps     ymm11, [rax + 96]
    vmovaps     ymm12, [rax + 128]
    vmovaps     ymm13, [rax + 160]

    vfmadd231ps     ymm0, ymm15, ymm8
    vfmadd231ps     ymm1, ymm15, ymm9
    vfmadd231ps     ymm2, ymm15, ymm10
    vfmadd231ps     ymm3, ymm15, ymm11
    vfmadd231ps     ymm4, ymm15, ymm12
    vfmadd231ps     ymm5, ymm15, ymm13

	vmovaps     ymm8, [rax + 192]
    vmovaps     ymm9, [rax + 224]
    vmovaps     ymm10, [rax + 256]
    vmovaps     ymm11, [rax + 288]
    vmovaps     ymm12, [rax + 320]
    vmovaps     ymm13, [rax + 352]

    vfmadd231ps     ymm6, ymm15, ymm8
    vfmadd231ps     ymm7, ymm15, ymm9
    vfmadd231ps     ymm0, ymm14, ymm10
    vfmadd231ps     ymm1, ymm14, ymm11
    vfmadd231ps     ymm2, ymm14, ymm12
    vfmadd231ps     ymm3, ymm14, ymm13

    vmovaps     ymm8, [rax + 384]
    vmovaps     ymm9, [rax + 416]
    vmovaps     ymm10, [rax + 448]
    vmovaps     ymm11, [rax + 480]

    vfmadd231ps     ymm4, ymm14, ymm8
    vfmadd231ps     ymm5, ymm14, ymm9
    vfmadd231ps     ymm6, ymm14, ymm10
    vfmadd231ps     ymm7, ymm14, ymm11

    add rcx, 8
	add rax, 512


================================================
FILE: linalg/x86_64/fma/8x1/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 8x1
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15

	vbroadcastss    ymm15,  dword ptr [rcx]

    vmovaps         ymm8, [rax + 0]
	vmovaps         ymm9, [rax + 32]
	vmovaps         ymm10, [rax + 64]
	vmovaps         ymm11, [rax + 96]

    vfmadd231ps     ymm0, ymm8, ymm15
    vfmadd231ps     ymm1, ymm9, ymm15

    vmovaps         ymm12, [rax + 128]
	vmovaps         ymm13, [rax + 160]

    vfmadd231ps     ymm2, ymm10, ymm15
    vfmadd231ps     ymm3, ymm11, ymm15

    vmovaps         ymm14, [rax + 192]
	vmovaps         ymm11, [rax + 224]

    vfmadd231ps     ymm4, ymm12, ymm15
    vfmadd231ps     ymm5, ymm13, ymm15


    vfmadd231ps     ymm6, ymm14, ymm15
    vfmadd231ps     ymm7, ymm11, ymm15

    add rcx, 4
	add rax, 256


================================================
FILE: linalg/x86_64/fma/8x8/packed_packed_loop1/avx-unroll.tmpli
================================================
	// Tile size: 1x8
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15


    vmovaps         ymm15,  [rax]

    vbroadcastss    ymm8, dword ptr [rcx + 0 * 4]
    vfmadd231ps     ymm0, ymm15, ymm8

    vbroadcastss    ymm9, dword ptr [rcx + 1 * 4]
    vfmadd231ps     ymm1, ymm15, ymm9

    vbroadcastss    ymm10, dword ptr [rcx + 2 * 4]
    vfmadd231ps     ymm2, ymm15, ymm10

    vbroadcastss    ymm11, dword ptr [rcx + 3 * 4]
    vfmadd231ps     ymm3, ymm15, ymm11

    vbroadcastss    ymm12, dword ptr [rcx + 4 * 4]
    vfmadd231ps     ymm4, ymm15, ymm12

    vbroadcastss    ymm13, dword ptr [rcx + 5 * 4]
    vfmadd231ps     ymm5, ymm15, ymm13

    vbroadcastss    ymm10, dword ptr [rcx + 6 * 4]
    vfmadd231ps     ymm6, ymm15, ymm10

    vbroadcastss    ymm11, dword ptr [rcx + 7 * 4]
    vfmadd231ps     ymm7, ymm15, ymm11


    vmovaps         ymm15,  [rax]

    vbroadcastss    ymm8, dword ptr [rcx + 0 * 4]
    vfmadd231ps     ymm0, ymm15, ymm8

    vbroadcastss    ymm9, dword ptr [rcx + 1 * 4]
    vfmadd231ps     ymm1, ymm15, ymm9

    vbroadcastss    ymm10, dword ptr [rcx + 2 * 4]
    vfmadd231ps     ymm2, ymm15, ymm10

    vbroadcastss    ymm11, dword ptr [rcx + 3 * 4]
    vfmadd231ps     ymm3, ymm15, ymm11

    vbroadcastss    ymm12, dword ptr [rcx + 4 * 4]
    vfmadd231ps     ymm4, ymm15, ymm12

    vbroadcastss    ymm13, dword ptr [rcx + 5 * 4]
    vfmadd231ps     ymm5, ymm15, ymm13

    vbroadcastss    ymm10, dword ptr [rcx + 6 * 4]
    vfmadd231ps     ymm6, ymm15, ymm10

    vbroadcastss    ymm11, dword ptr [rcx + 7 * 4]
    vfmadd231ps     ymm7, ymm15, ymm11


================================================
FILE: linalg/x86_64/fma/8x8/packed_packed_loop1/avx.tmpli
================================================
	// Tile size: 1x8
	// Accumulators: 0-7
	// Col regs: 8-14
	// Row regs: 15

    vmovaps         ymm15,  [rax]

    vbroadcastss    ymm8, dword ptr [rcx + 0 * 4]
    vfmadd231ps     ymm0, ymm15, ymm8

    vbroadcastss    ymm9, dword ptr [rcx + 1 * 4]
    vfmadd231ps     ymm1, ymm15, ymm9

    vbroadcastss    ymm10, dword ptr [rcx + 2 * 4]
    vfmadd231ps     ymm2, ymm15, ymm10

    vbroadcastss    ymm11, dword ptr [rcx + 3 * 4]
    vfmadd231ps     ymm3, ymm15, ymm11

    vbroadcastss    ymm12, dword ptr [rcx + 4 * 4]
    vfmadd231ps     ymm4, ymm15, ymm12

    vbroadcastss    ymm13, dword ptr [rcx + 5 * 4]
    vfmadd231ps     ymm5, ymm15, ymm13

    vbroadcastss    ymm10, dword ptr [rcx + 6 * 4]
    vfmadd231ps     ymm6, ymm15, ymm10

    vbroadcastss    ymm11, dword ptr [rcx + 7 * 4]
    vfmadd231ps     ymm7, ymm15, ymm11


================================================
FILE: linalg/x86_64/fma/avx2_mmm_i32_8x8.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 8x8:

    ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% if msvc %}

_text segment
avx2_mmm_i32_8x8_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}avx2_mmm_i32_8x8_{{suffix}}
{{G}}avx2_mmm_i32_8x8_{{suffix}}:
.cfi_startproc

{% endif %}

    push        rbp
    mov         rbp, rsp

{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    push        rdi
    push        rsi

    mov         rdi, rcx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
.cfi_def_cfa_offset 64
{% endif %}

    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]

{% include "dispatcher.tmpliq" %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     r12,    [rdi + 32]   // packing
    mov     rbx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rcx,    [rdi + 8]    // k
    test    rcx,    rcx
    jz      {{L}}non_linear_loop

    cmp     r12, 1
    je      {{L}}main_loop_packed_packed_i8i8

{{L}}main_loop_packed_packed:
    vmovaps         ymm12,  [rax]

    {% for i in (0..7) %}
        vbroadcastss    ymm14, dword ptr [rbx + {{i}} * 4]
        vpmulld         ymm13, ymm12, ymm14
        vpaddd          ymm{{i}}, ymm{{i}}, ymm13
    {% endfor %}

    add             rax,    32
    add             rbx,    32
    dec             rcx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{{L}}main_loop_packed_packed_i8i8:
    movq            xmm8, qword ptr [rax]          // read 8 bytes
    vpmovsxbw       ymm8, xmm8                     // promote byte to i32x8

    vpbroadcastb    ymm9, byte ptr [rbx]           // broadcast 1 byte from B
    vpbroadcastb    ymm10, byte ptr [rbx + 1]      // broadcast 1 byte from B
    vpbroadcastb    ymm11, byte ptr [rbx + 2]      // broadcast 1 byte from B
    vpbroadcastb    ymm12, byte ptr [rbx + 3]      // broadcast 1 byte from B
    vpmovsxbw       ymm9, xmm9                     // promote byte to i32x8
    vpmovsxbw       ymm10, xmm10                   // promote byte to i32x8
    vpmovsxbw       ymm11, xmm11                   // promote byte to i32x8
    vpmovsxbw       ymm12, xmm12                   // promote byte to i32x8

    vpmullw         ymm9, ymm9, ymm8
    vpmullw         ymm10, ymm10, ymm8
    vpmullw         ymm11, ymm11, ymm8
    vpmullw         ymm12, ymm12, ymm8
    vpmovsxwd       ymm9, xmm9                     // promote byte to i32x8
    vpmovsxwd       ymm10, xmm10                   // promote byte to i32x8
    vpmovsxwd       ymm11, xmm11                   // promote byte to i32x8
    vpmovsxwd       ymm12, xmm12                   // promote byte to i32x8
    vpaddd          ymm0, ymm0, ymm9
    vpaddd          ymm1, ymm1, ymm10
    vpaddd          ymm2, ymm2, ymm11
    vpaddd          ymm3, ymm3, ymm12

    vpbroadcastb    ymm9, byte ptr [rbx + 4]
    vpbroadcastb    ymm10, byte ptr [rbx + 5]
    vpbroadcastb    ymm11, byte ptr [rbx + 6]
    vpbroadcastb    ymm12, byte ptr [rbx + 7]
    vpmovsxbw       ymm9, xmm9
    vpmovsxbw       ymm10, xmm10
    vpmovsxbw       ymm11, xmm11
    vpmovsxbw       ymm12, xmm12

    vpmullw         ymm9, ymm9, ymm8
    vpmullw         ymm10, ymm10, ymm8
    vpmullw         ymm11, ymm11, ymm8
    vpmullw         ymm12, ymm12, ymm8
    vpmovsxwd       ymm9, xmm9                     // promote byte to i32x8
    vpmovsxwd       ymm10, xmm10                   // promote byte to i32x8
    vpmovsxwd       ymm11, xmm11                   // promote byte to i32x8
    vpmovsxwd       ymm12, xmm12                   // promote byte to i32x8
    vpaddd          ymm4, ymm4, ymm9
    vpaddd          ymm5, ymm5, ymm10
    vpaddd          ymm6, ymm6, ymm11
    vpaddd          ymm7, ymm7, ymm12

    add             rbx,    8
    add             rax,    8
    dec             rcx
    jnz             {{L}}main_loop_packed_packed_i8i8

    jmp             {{L}}non_linear_loop

{% include "fma_mmm_i32_scalars.tmpliq" from:0, to:7 %}
{% include "fma_mmm_i32_per_rows.tmpliq" mr:8,from:0, to:7 %}
{% include "fma_mmm_i32_per_cols.tmpliq" mr:8,from:0, to:7 %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:7 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride
    mov     r8,     [rdi + 32]          // item size

    cmp     r8,    4
    je      {{L}}non_linear_addc_i32

{% comment %}
// This is not great as vgatherdps reads 32-bits values and goes beyond our buffer. Probably harmless though.
// Commented and replaced with the "mov al" loop beyond to pacify valgrind.
// ymm14 and ymm15 are the same as in the non_linear_addc_i32 case (compute them before the test right above here.
// {% for i in (0..7) %}
//     vpcmpeqd        ymm15, ymm15, ymm15
//     vgatherdps      ymm12, [ r10 + ymm14 ], ymm15   // 0xxx 1xxx 2xxx 3xxx 4xxx 5xxx 6xxx 7xxx
//
//     // we need to go through vpmovsxbd, shuffling naively erases signs
//     vpshufb         ymm12, ymm12, ymm10             // 0123 0123 0123 0123 4567 4567 4567 4567
//
//     vpermd          ymm12, ymm11, ymm12             // 0123 4567
//     vpmovsxbd       ymm12, xmm12                    // sign extend
//
//     vpaddd          ymm{{i}},   ymm{{i}},   ymm12
//     add             r10, rbx
// {% endfor %}
{% endcomment %}

    {% for col in (0..7) %}
        mov r8, r10
        {% for half in (0..1) %}
            {% for lane in (0..3) %}
                mov al, [ r8 ]
                add r8, rsi
                movsx eax, al
                pinsrd xmm10, eax, {{lane}}
            {% endfor %}
            vperm2f128  ymm10,   ymm10,   ymm10,  1
        {% endfor %}
        vpaddd ymm{{col}}, ymm{{col}}, ymm10
        add r10, rbx
    {% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}non_linear_addc_i32:

    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
    vpermq          ymm14, ymm14, 78 // 0b01001110
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
    vpermq          ymm14, ymm14, 78 // 0b01001110


{% if msvc %}
    vpbroadcastd    ymm10, dword ptr [ offset byte_shuffle ]
    vmovups         ymm11, dword ptr [ offset i128_shuffle ]
{% else %}
    vpbroadcastd    ymm10, [ rip + {{L}}byte_shuffle ]
    vmovups         ymm11, [ rip + {{L}}i128_shuffle ]
{% endif %}

{% for i in (0..7) %}
    vpcmpeqd        ymm15, ymm15, ymm15
    vgatherdps      ymm12, [ r10 + ymm14 ], ymm15
    vpaddd          ymm{{i}},   ymm{{i}},   ymm12
    add             r10, rbx
{% endfor %}

    jmp    {{L}}non_linear_loop

{% if msvc %}
.data
byte_shuffle dd              201851904 // 0x0c080400
i128_shuffle dd              0, 4
.code
{% else %}
{{L}}byte_shuffle: .int            201851904 // 0x0c080400
{{L}}i128_shuffle: .int            0, 4
{% endif %}

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         ymm12,  [rax]

{% for i in (0..7) %}
    vbroadcastss    ymm14, dword ptr [rbx + {{i|times:4}} ]
    vpmulld         ymm15, ymm12, ymm14
    vpaddd          ymm{{i}}, ymm{{i}}, ymm15
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}q_scale:
    mov             r8, [ rdi + 16 ]        // policy
    vbroadcastss    ymm8, dword ptr [rdi + 24] // multi

    mov             rax, 1
    movq            xmm9, rax
    vpbroadcastq    ymm9, xmm9              // ymm9 <- 1

    mov             rax, [ rdi + 8 ]        // xmm10 <- shift + 31
    add             rax, 31
    movq            xmm10, rax
    vpbroadcastq    ymm10, xmm10

    mov             rax, 1
    movq            xmm11, rax
    vpsubq          ymm12, ymm10, ymm9      // shift+31 - 1
    vpsllq          ymm11, ymm9, xmm12      // ymm11 <- 1 << (shift + 31 - 1)

    cmp     r8, 1
    je      {{L}}q_scale_rounding_zero
    cmp     r8, 2
    je      {{L}}q_scale_rounding_away
    cmp     r8, 3
    je      {{L}}q_scale_rounding_minus_inf
    cmp     r8, 4
    je      {{L}}q_scale_rounding_plus_inf
    cmp     r8, 5
    je      {{L}}q_scale_rounding_even
    cmp     r8, 6
    je      {{L}}q_scale_rounding_odd

    jmp    {{L}}unsupported

{{L}}q_scale_rounding_zero:           // signum * ( (abs + nudge) >> shift )
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsrldq     ymm15, ymm14, 4             // ymm15 <- a1, a2, a3, a4, a5, a6, a7, 0
    vpmuldq     ymm14, ymm14, ymm8          // ymm14  <- a0*c, a2*c, a4*c, a6*c
    vpmuldq     ymm15, ymm15, ymm8          // ymm15 <- a1*c, a3*c, a5*c, a7*c

    vpaddq      ymm14, ymm14, ymm11
    vpaddq      ymm15, ymm15, ymm11

    vpsubq      ymm14, ymm14, ymm9
    vpsubq      ymm15, ymm15, ymm9

    vpsrlq      ymm14, ymm14, xmm10
    vpsrlq      ymm15, ymm15, xmm10

    vpslldq     ymm15, ymm15, 4
    vpblendd    ymm14, ymm15, ymm14, 85     // 0x55
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}q_scale_rounding_away:           // signum * ( (abs + nudge) >> shift )
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsrldq     ymm15, ymm14, 4             // ymm15 <- a1, a2, a3, a4, a5, a6, a7, 0
    vpmuldq     ymm14, ymm14, ymm8          // ymm14  <- a0*c, a2*c, a4*c, a6*c
    vpmuldq     ymm15, ymm15, ymm8          // ymm15 <- a1*c, a3*c, a5*c, a7*c

    vpaddq      ymm14, ymm14, ymm11
    vpaddq      ymm15, ymm15, ymm11

    vpsrlq      ymm14, ymm14, xmm10
    vpsrlq      ymm15, ymm15, xmm10

    vpslldq     ymm15, ymm15, 4
    vpblendd    ymm14, ymm15, ymm14, 85     // 0x55
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}q_scale_rounding_minus_inf:           // signum * ( (abs << 32 + 1<<30+shift) >> shift )
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    // sign extract for nudging in the right direction
    vpxor       ymm13, ymm13, ymm13
    vpcmpgtd    ymm13, ymm{{i}}, ymm13      // ymm13 <- s0, s1, ..s8 (signums, as all ones or all zeros)
    vpsrld      ymm13, ymm13, 31            // then just 0 or 1

    vpsrldq     ymm15, ymm14, 4             // ymm15 <- a1, a2, a3, a4, a5, a6, a7, 0
    vpmuldq     ymm14, ymm14, ymm8          // ymm14  <- a0*c, a2*c, a4*c, a6*c
    vpmuldq     ymm15, ymm15, ymm8          // ymm15 <- a1*c, a3*c, a5*c, a7*c

    vpaddq      ymm14, ymm14, ymm11
    vpaddq      ymm15, ymm15, ymm11

    // reinterpret ymm13=s0i32..s7 as i64 and blend with zero to pick the even ones as i64
    vpxor       ymm12, ymm12, ymm12
    vpblendd    ymm12, ymm12, ymm13, 85     // 0x55
    vpsubq      ymm14, ymm14, ymm12

    vpsrldq     ymm13, ymm13, 4             // ymm13 <- s1, s2, .., s7, 0
    vpxor       ymm12, ymm12, ymm12
    vpblendd    ymm12, ymm12, ymm13, 85     // 0x55
    vpsubq      ymm15, ymm15, ymm12

    vpsrlq      ymm14, ymm14, xmm10
    vpsrlq      ymm15, ymm15, xmm10

    vpslldq     ymm15, ymm15, 4
    vpblendd    ymm14, ymm15, ymm14, 85     // 0x55
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}q_scale_rounding_plus_inf:           // signum * ( (abs << 32 + 1<<30+shift) >> shift )

    vpbroadcastd ymm9, xmm9

{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpxor       ymm13, ymm13, ymm13

    // sign extract for nudging in the right direction
    vpcmpgtd    ymm13, ymm{{i}}, ymm13      // ymm13 <- s0, s1, ..s8 (signums, as all ones or all zeros)
    vpaddd      ymm13, ymm13, ymm9          // if val >= 0 { 0i32 } else { 1i32 }

    vpsrldq     ymm15, ymm14, 4             // ymm15 <- a1, a2, a3, a4, a5, a6, a7, 0
    vpmuldq     ymm14, ymm14, ymm8          // ymm14  <- a0*c, a2*c, a4*c, a6*c
    vpmuldq     ymm15, ymm15, ymm8          // ymm15 <- a1*c, a3*c, a5*c, a7*c

    vpaddq      ymm14, ymm14, ymm11
    vpaddq      ymm15, ymm15, ymm11

    // reinterpret ymm13=s0i32..s7 as i64 and blend with zero to pick the even ones as i64
    vpxor       ymm12, ymm12, ymm12
    vpblendd    ymm12, ymm12, ymm13, 85     // 0x55
    vpsubq      ymm14, ymm14, ymm12

    vpsrldq     ymm13, ymm13, 4             // ymm13 <- s1, s2, .., s7, 0
    vpxor       ymm12, ymm12, ymm12
    vpblendd    ymm12, ymm12, ymm13, 85     // 0x55
    vpsubq      ymm15, ymm15, ymm12

    vpsrlq      ymm14, ymm14, xmm10
    vpsrlq      ymm15, ymm15, xmm10

    vpslldq     ymm15, ymm15, 4
    vpblendd    ymm14, ymm15, ymm14, 85     // 0x55
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}q_scale_rounding_even:           // signum * ( (abs + nudge) >> shift )
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsrldq     ymm15, ymm14, 4             // ymm15 <- a1, a2, a3, a4, a5, a6, a7, 0
    vpmuldq     ymm14, ymm14, ymm8          // ymm14  <- a0*c, a2*c, a4*c, a6*c
    vpmuldq     ymm15, ymm15, ymm8          // ymm15 <- a1*c, a3*c, a5*c, a7*c

    vpsrlq      ymm12, ymm14, xmm10
    vpand       ymm12, ymm12, ymm9
    vpaddq      ymm14, ymm14, ymm12
    vpsubq      ymm14, ymm14, ymm9

    vpsrlq      ymm12, ymm15, xmm10
    vpand       ymm12, ymm12, ymm9
    vpaddq      ymm15, ymm15, ymm12
    vpsubq      ymm15, ymm15, ymm9

    vpaddq      ymm14, ymm14, ymm11
    vpaddq      ymm15, ymm15, ymm11

    vpsrlq      ymm14, ymm14, xmm10
    vpsrlq      ymm15, ymm15, xmm10

    vpslldq     ymm15, ymm15, 4
    vpblendd    ymm14, ymm15, ymm14, 85     // 0x55
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}q_scale_rounding_odd:           // signum * ( (abs + nudge) >> shift )
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsrldq     ymm15, ymm14, 4             // ymm15 <- a1, a2, a3, a4, a5, a6, a7, 0
    vpmuldq     ymm14, ymm14, ymm8          // ymm14  <- a0*c, a2*c, a4*c, a6*c
    vpmuldq     ymm15, ymm15, ymm8          // ymm15 <- a1*c, a3*c, a5*c, a7*c

    vpsrlq      ymm12, ymm14, xmm10
    vpand       ymm12, ymm12, ymm9
    vpsubq      ymm14, ymm14, ymm12

    vpsrlq      ymm12, ymm15, xmm10
    vpand       ymm12, ymm12, ymm9
    vpsubq      ymm15, ymm15, ymm12

    vpaddq      ymm14, ymm14, ymm11
    vpaddq      ymm15, ymm15, ymm11

    vpsrlq      ymm14, ymm14, xmm10
    vpsrlq      ymm15, ymm15, xmm10

    vpslldq     ymm15, ymm15, 4
    vpblendd    ymm14, ymm15, ymm14, 85     // 0x55
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}q_shl:
    mov             eax, [ rdi + 8 ]        // xmm10 <- -shift (8 times)
    movd            xmm10, eax
    vpbroadcastd    ymm10, xmm10

{% for i in (0..7) %}
    vpsllvd     ymm{{i}}, ymm{{i}}, ymm10
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}q_shr:
    mov             r8, [ rdi + 16 ]        // policy

    mov             eax, 1
    movd            xmm9, eax
    vpbroadcastd    ymm9, xmm9              // ymm9 <- 1u32 (8 times)

    mov             eax, [ rdi + 8 ]        // xmm10 <- shift (8 times)
    movd            xmm10, eax
    vpbroadcastd    ymm10, xmm10

    mov             ebx, 1
    mov             cl, al
    sub             cl, 1                  // rcx <- shift -1
    sal             ebx, cl                // rbx <- (1 << (shift - 1))
    movd            xmm11, ebx
    vpbroadcastd    ymm11, xmm11            // ymm11 <- "half"

    vpxor           ymm12, ymm12, ymm12     // ymm12 <- zeroes

    cmp     r8, 1
    je      {{L}}q_shr_rounding_zero
    cmp     r8, 2
    je      {{L}}q_shr_rounding_away
    cmp     r8, 3
    je      {{L}}q_shr_rounding_minus_inf
    cmp     r8, 4
    je      {{L}}q_shr_rounding_plus_inf
    cmp     r8, 5
    je      {{L}}q_shr_rounding_even
    cmp     r8, 6
    je      {{L}}q_shr_rounding_odd

    jmp    {{L}}unsupported

{{L}}q_shr_rounding_zero:
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsubd      ymm14, ymm14, ymm9
    vpaddd      ymm14, ymm14, ymm11
    vpsravd     ymm14, ymm14, ymm10
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}q_shr_rounding_away:
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpaddd      ymm14, ymm14, ymm11
    vpsravd     ymm14, ymm14, ymm10
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}q_shr_rounding_minus_inf:
{% for i in (0..7) %}
    vpsubd  ymm{{i}}, ymm{{i}}, ymm9
    vpaddd  ymm{{i}}, ymm{{i}}, ymm11
    vpsravd ymm{{i}}, ymm{{i}}, ymm10
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}q_shr_rounding_plus_inf:
{% for i in (0..7) %}
    vpaddd  ymm{{i}}, ymm{{i}}, ymm11
    vpsravd ymm{{i}}, ymm{{i}}, ymm10
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}q_shr_rounding_even:
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsravd ymm13, ymm14, ymm10
    vpand   ymm13, ymm13, ymm9
    vpsubd  ymm13, ymm13, ymm9          // nudge = ((abs >>l shift) & 0x01) - 1
    vpaddd  ymm14, ymm14, ymm13         // add nudge
    vpaddd  ymm14, ymm14, ymm11         // add half
    vpsravd ymm14, ymm14, ymm10
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}q_shr_rounding_odd:
{% for i in (0..7) %}
    vpabsd      ymm14, ymm{{i}}
    vpsravd ymm13, ymm14, ymm10
    vpand   ymm13, ymm13, ymm9
    vpsubd  ymm13, ymm12, ymm13          // nudge = - ((abs >>l shift) & 0x01)
    vpaddd  ymm14, ymm14, ymm13         // add nudge
    vpaddd  ymm14, ymm14, ymm11         // add half
    vpsravd ymm14, ymm14, ymm10
    vpsignd     ymm{{i}}, ymm14, ymm{{i}}
{% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rdx,    [rdi + 24]          // col stride
    mov     rcx,    [rdi + 32]          // item size

    cmp     rcx,    4
    je      {{L}}store_strides_i32

    {% for col in (0..7) %}
        mov r10, r8
        {% for row in (0..3) %}
            extractps   ebx, xmm{{col}}, {{row}}
            mov         byte ptr [r10], bl
            add         r10, rsi
        {% endfor %}
        vperm2f128  ymm{{col}},   ymm{{col}},   ymm{{col}},  1
        {% for row in (0..3) %}
            extractps   ebx, xmm{{col}}, {{row}}
            mov         byte ptr [r10], bl
            add         r10, rsi
        {% endfor %}
        add r8, rdx
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_strides_i32:
    {% for col in (0..7) %}
        mov r10,    r8
        {% for row in (0..3) %}
            extractps   ebx, xmm{{col}}, {{row}}
            mov         dword ptr [r10], ebx
            add         r10, rsi
        {% endfor %}
        vperm2f128  ymm{{col}},   ymm{{col}},   ymm{{col}},  1
        {% for row in (0..3) %}
            extractps   ebx, xmm{{col}}, {{row}}
            mov         dword ptr [r10], ebx
            add         r10, rsi
        {% endfor %}
        add r8, rdx
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}return:
    ldmxcsr     [rsp + 4]
    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret


{{L}}one_32bit:
{% if msvc %}
    dd      1
{% else %}
    .int    1
{% endif %}

{% if msvc %}
avx2_mmm_i32_8x8_{{suffix}} endp
_text ends
end
{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/fma/dispatcher.tmpliq
================================================
// vim: set syntax=asm :

{{L}}non_linear:

{{L}}non_linear_loop_enter:
    sub     rdi,    40
{{L}}non_linear_loop:
    add     rdi,    40
    mov     rax,    [rdi]

    mov     r8, {{ jump_table | size }}
    cmp     rax, 0
    cmovl   rax, r8
    cmp     rax, {{ jump_table | size }}
    cmovg   rax, r8

{% if msvc %}
    lea     r8, [ offset {{L}}jmp_table ]
{% else %}
    lea     r8, [ rip + {{L}}jmp_table ]
{% endif %}
    movsxd  r9, dword ptr [ r8 + rax * 4 ]
    lea     r8, [ r8 + r9 ]
    jmp     r8

{{L}}jmp_table:
{% for j in jump_table %}
    {{long}}      {{L}}{{j}}-{{L}}jmp_table
{% endfor %}
    {{long}}      {{L}}unsupported-{{L}}jmp_table

{{L}}unsupported:
    mov     rax,    1
    jmp     {{L}}return


{{L}}done:
    mov     rax, 0
    jmp     {{L}}return


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_16x5.tmpl
================================================
{% comment %}
// vim: set syntax=asm :
/* mmm 16 x 5:

    ymm0 ymm2 ymm4 ymm6 ymm8
    ymm1 ymm3 ymm5 ymm7 ymm9

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"16x5", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
    {% include "2x5/packed_packed_loop1/avx.tmpli" %}

    add             rcx,    20
    add             rax,    64
    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

// NON LINEAR / ADDC

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:9, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:16, from:0, to:9, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:16, from:0, to:9, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:9 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

    lea             r8, [ r10 + rsi * 8 ]

{% for i in (0..4) %}
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm12,  [ r10 + ymm14 ],      ymm15
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm13,  [ r8  + ymm14 ],      ymm15
    add             r10, rbx
    add             r8, rbx
    vaddps          ymm{{i | times:2 }},   ymm{{i | times:2}},   ymm12
    vaddps          ymm{{i | times:2 | plus: 1}}, ymm{{i | times:2 | plus:1 }},   ymm13
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         ymm12,  [rax]
    vmovups         ymm13,  [rax + 32]

{% for i in (0..4) %}
    vbroadcastss    ymm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     ymm{{i|times:2}},   ymm12, ymm14
    vfmadd231ps     ymm{{i|times:2|plus:1}}, ymm13, ymm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r12,    [ r8 + 4 * rbx ]
    lea     r11,    [ r10 + rbx ]
    cmp     rbx,    64
    jne     {{L}}store_strides_generic

    {% for row in (0..1) %}
        {% for col in (0..4) %}
            vmovups ymmword ptr [r{{col|plus:8}}], ymm{{col|times:2|plus:row}}
            add r{{col|plus:8}}, 32
       {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_strides_generic:
    // tops of cols

    {% for quarter in (0..3) %}
        {% if quarter != 0 %}
            // move next four rows at top (xmm0,2,..10)
            vperm2f128  ymm0,   ymm0,   ymm1,  {{quarter}}
            vperm2f128  ymm2,   ymm2,   ymm3,  {{quarter}}
            vperm2f128  ymm4,   ymm4,   ymm5,  {{quarter}}
            vperm2f128  ymm6,   ymm6,   ymm7,  {{quarter}}
            vperm2f128  ymm8,   ymm8,   ymm9,  {{quarter}}
        {% endif %}
        {% for row in (0..3) %}
            {% for i in (0..4) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | times:2}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" type:"f32", size:"16x5", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_16x6.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 16 x 6:

    ymm0 ymm2 ymm4 ymm6 ymm8 ymm10
    ymm1 ymm3 ymm5 ymm7 ymm9 ymm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"16x6", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
	{% include "2x6/packed_packed_loop1/original.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

// NON LINEAR / ADDC

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:11, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:16, from:0, to:11, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:16, from:0, to:11, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

    lea             r8, [ r10 + rsi * 8 ]

{% for i in (0..5) %}
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm12,  [ r10 + ymm14 ],      ymm15
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm13,  [ r8  + ymm14 ],      ymm15
    add     		r10, rbx
    add     		r8, rbx
    vaddps          ymm{{i | times:2 }},   ymm{{i | times:2}},   ymm12
    vaddps          ymm{{i | times:2 | plus: 1}}, ymm{{i | times:2 | plus:1 }},   ymm13
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         ymm12,  [rax]
    vmovups         ymm13,  [rax + 32]

{% for i in (0..5) %}
    vbroadcastss    ymm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     ymm{{i|times:2}},   ymm12, ymm14
    vfmadd231ps     ymm{{i|times:2|plus:1}}, ymm13, ymm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r12,    [ r8 + 4 * rbx ]
    lea     r11,    [ r10 + rbx ]
    lea     r13,    [ r12 + rbx ]

    {% for quarter in (0..3) %}
        {% if quarter != 0 %}
            // move next four rows at top (xmm0,2,..10)
            vperm2f128  ymm0,   ymm0,   ymm1,  {{quarter}}
            vperm2f128  ymm2,   ymm2,   ymm3,  {{quarter}}
            vperm2f128  ymm4,   ymm4,   ymm5,  {{quarter}}
            vperm2f128  ymm6,   ymm6,   ymm7,  {{quarter}}
            vperm2f128  ymm8,   ymm8,   ymm9,  {{quarter}}
            vperm2f128  ymm10,  ymm10,  ymm11, {{quarter}}
        {% endif %}
        {% for row in (0..3) %}
            {% for i in (0..5) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i | times:2}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" type:"f32", size:"16x6", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_24x4.tmpl
================================================
{% comment %}
// vim: set syntax=asm :
/* mmm 24 x 4:

    ymm0 ymm3 ymm6 ymm10
    ymm1 ymm4 ymm7 ymm11
    ymm2 ymm5 ymm8 ymm12

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"24x4", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
    {% include "3x4/packed_packed_loop1/avx.tmpli" %}

    add             rcx,    16
    add             rax,    96
    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

// NON LINEAR / ADDC

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:11, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:24, from:0, to:11, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:24, from:0, to:11, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:

    mov     r8,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    cmp rsi, 4
    jne {{L}}unicast_generic

    lea             r9,  [ r8 + rbx ]
    lea             r10, [ r9 + rbx]
    lea             r11, [ r10 + rbx ]
    lea             r12, [ r11 + rbx ]

{% for col in (0..3) %}
    {% for row in (0..2) %}
        vmovups ymm12,  [ r{{col|plus:8}} ]
        add r{{col|plus:8}}, 32
        vaddps ymm{{col|times:3|plus:row}}, ymm{{col|times:3|plus:row}}, ymm12
    {% endfor %}
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}unicast_generic:
    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

//  mov r12, [0]
    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

    lea             r9, [ r8 + rsi * 8 ]
    lea             r10, [ r9 + rsi * 8 ]

{% for col in (0..3) %}
   {% for row in (0..2) %}
      vpcmpeqd        ymm15,  ymm15, ymm15
      vgatherdps      ymm12,  [ r{{row|plus:8}} + ymm14 ], ymm15
      add r{{row|plus:8}}, rbx
      vaddps ymm{{col|times:3|plus:row}}, ymm{{col|times:3|plus:row}}, ymm12
   {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         ymm12,  [rax]
    vmovups         ymm13,  [rax + 32]
    vmovups         ymm15,  [rax + 64]
{% for i in (0..3) %}
    vbroadcastss    ymm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     ymm{{i|times:3}},   ymm12, ymm14
    vfmadd231ps     ymm{{i|times:3|plus:1}}, ymm13, ymm14
    vfmadd231ps     ymm{{i|times:3|plus:2}}, ymm15, ymm14
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    lea     r9,     [ r8  +     rbx ]
    lea     r10,    [ r8  + 2 * rbx ]
    lea     r11,    [ r10 +     rbx ]

    cmp         rsi, 4
    jne         {{L}}store_strides_generic

    {% for col in (0..3) %}
       {% for row in (0..2) %}
            vmovups ymmword ptr [r{{col|plus:8}}], ymm{{col|times:3|plus:row}}
            add r{{col|plus:8}}, 32
       {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_strides_generic:
    {% for col in (0..3) %}
       {% for row in (0..2) %}
           {% for i in (0..3) %}
                vextractps  dword ptr [r{{col | plus: 8}}], xmm{{col | times:3 | plus:row}}, {{i}}
                add         r{{col | plus: 8}}, rsi
           {% endfor %}
           vperm2f128  ymm{{col | times:3 | plus:row}}, ymm{{col | times:3 | plus:row}}, ymm{{col | times:3 | plus:row}}, 1
           {% for i in (0..3) %}
                vextractps  dword ptr [r{{col | plus: 8}}], xmm{{col | times:3|plus:row}}, {{i}}
                add         r{{col | plus: 8}}, rsi
           {% endfor %}
       {% endfor %}
    {% endfor %}
    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" type:"f32", size:"24x4", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_32x1.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 64 x 1

    ymm0
    ymm1
    ymm2
    ymm3

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"32x1", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    mov     r8,    [rdi + 32]   // packing
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

    cmp     r8, 1
    jz      {{L}}q40f32

    cmp     r8, 2
    jz      {{L}}q40f16

    cmp     r8, 3
    jz      {{L}}f16f16

    cmp     r8, 4
    jz      {{L}}f16f32

    cmp     r8, 5
    jz      {{L}}f32f16

{{align}} 16
{{L}}main_loop_packed_packed:
    vbroadcastss    ymm15,  dword ptr [rcx]

    vmovaps     ymm8, [rax]
    vmovaps     ymm9, [rax + 32]
    vmovaps     ymm10, [rax + 64]
    vmovaps     ymm11, [rax + 96]

    vfmadd231ps     ymm0, ymm15, ymm8
    vfmadd231ps     ymm1, ymm15, ymm9
    vfmadd231ps     ymm2, ymm15, ymm10
    vfmadd231ps     ymm3, ymm15, ymm11

    add             rcx, 4
	add             rax, 128
    sub             rbx, 1
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% if msvc %}
{{L}}q40f32_mask:
    {{long}} 0F0F0F0Fh
{{L}}q40f32_eight:
    {{long}} 08h
{% else %}
{{L}}q40f32_mask:
    {{long}} 0x0F0F0F0F
{{L}}q40f32_eight:
    {{long}} 8
{% endif %}

{{L}}q40f32:
    // ymm0-3: acc
    // ymm4-7: scales
    // ymm13: 8
    // ymm14: mask
    // ymm15: b value
    vbroadcastss    ymm14, dword ptr [{{offset}} {{L}}q40f32_mask]
    vbroadcastss    ymm13, dword ptr [{{offset}} {{L}}q40f32_eight]

{{L}}q40f32_outerloop:
    // scales
    vmovaps         xmm4, [rax]
    vmovaps         xmm5, [rax + 16]
    vmovaps         xmm6, [rax + 32]
    vmovaps         xmm7, [rax + 48]
    vcvtph2ps       ymm4, xmm4
    vcvtph2ps       ymm5, xmm5
    vcvtph2ps       ymm6, xmm6
    vcvtph2ps       ymm7, xmm7
    add             rax, 64

    mov             rdx, 32

{{L}}q40f32_innerloop:
    vbroadcastss    ymm15, dword ptr [rcx]
    vmovaps         xmm8, [rax]            // 32 nibbles

    vpand           xmm10, xmm8, xmm14      // 16 bytes

    vpmovzxbd       ymm9, xmm10            // 8 u32

    vpermilpd       xmm10, xmm10, 1        // swap 64bit halves
    vpmovzxbd       ymm10, xmm10            // 8 u32

    vpsrlw          xmm8, xmm8, 4
    vpand           xmm12, xmm8, xmm14      // 16 bytes
    vpmovzxbd       ymm11, xmm12            // 8 u32
    vpermilpd       xmm12, xmm12, 1        // swap 64bit halves
    vpmovzxbd       ymm12, xmm12            // 8 u32

    vpsubd          ymm9, ymm9, ymm13
    vpsubd          ymm10, ymm10, ymm13
    vpsubd          ymm11, ymm11, ymm13
    vpsubd          ymm12, ymm12, ymm13

    vcvtdq2ps       ymm9, ymm9
    vcvtdq2ps       ymm10, ymm10
    vcvtdq2ps       ymm11, ymm11
    vcvtdq2ps       ymm12, ymm12

    vmulps          ymm9, ymm9, ymm4
    vmulps          ymm10, ymm10, ymm5
    vmulps          ymm11, ymm11, ymm6
    vmulps          ymm12, ymm12, ymm7

    vfmadd231ps     ymm0, ymm15, ymm9
    vfmadd231ps     ymm1, ymm15, ymm10
    vfmadd231ps     ymm2, ymm15, ymm11
    vfmadd231ps     ymm3, ymm15, ymm12

    add             rax, 16
    add             rcx, 4
    sub             rdx, 1
    jnz             {{L}}q40f32_innerloop

    sub             rbx, 32
    jnz             {{L}}q40f32_outerloop

    jmp             {{L}}non_linear_loop

{{L}}q40f16:
    // ymm0-3: acc
    // ymm4-7: scales
    // ymm13: 8
    // ymm14: mask
    // ymm15: b value
    vbroadcastss    ymm14, dword ptr [{{offset}} {{L}}q40f32_mask]
    vbroadcastss    ymm13, dword ptr [{{offset}} {{L}}q40f32_eight]

{{L}}q40f16_outerloop:
    // scales
    vmovaps         xmm4, [rax]
    vmovaps         xmm5, [rax + 16]
    vmovaps         xmm6, [rax + 32]
    vmovaps         xmm7, [rax + 48]
    vcvtph2ps       ymm4, xmm4
    vcvtph2ps       ymm5, xmm5
    vcvtph2ps       ymm6, xmm6
    vcvtph2ps       ymm7, xmm7
    add             rax, 64

    mov             rdx, 32

{{L}}q40f16_innerloop:
    vpbroadcastw    ymm15, word ptr [rcx]
    vcvtph2ps       ymm15, xmm15

    vmovaps         xmm8, [rax]            // 32 nibbles

    vpand           xmm10, xmm8, xmm14      // 16 bytes

    vpmovzxbd       ymm9, xmm10            // 8 u32

    vpermilpd       xmm10, xmm10, 1        // swap 64bit halves
    vpmovzxbd       ymm10, xmm10            // 8 u32

    vpsrlw          xmm8, xmm8, 4
    vpand           xmm12, xmm8, xmm14      // 16 bytes
    vpmovzxbd       ymm11, xmm12            // 8 u32
    vpermilpd       xmm12, xmm12, 1        // swap 64bit halves
    vpmovzxbd       ymm12, xmm12            // 8 u32

    vpsubd          ymm9, ymm9, ymm13
    vpsubd          ymm10, ymm10, ymm13
    vpsubd          ymm11, ymm11, ymm13
    vpsubd          ymm12, ymm12, ymm13

    vcvtdq2ps       ymm9, ymm9
    vcvtdq2ps       ymm10, ymm10
    vcvtdq2ps       ymm11, ymm11
    vcvtdq2ps       ymm12, ymm12

    vmulps          ymm9, ymm9, ymm4
    vmulps          ymm10, ymm10, ymm5
    vmulps          ymm11, ymm11, ymm6
    vmulps          ymm12, ymm12, ymm7

    vfmadd231ps     ymm0, ymm15, ymm9
    vfmadd231ps     ymm1, ymm15, ymm10
    vfmadd231ps     ymm2, ymm15, ymm11
    vfmadd231ps     ymm3, ymm15, ymm12

    add             rax, 16
    add             rcx, 2
    sub             rdx, 1
    jnz             {{L}}q40f16_innerloop

    sub             rbx, 32
    jnz             {{L}}q40f16_outerloop

    jmp             {{L}}non_linear_loop

{{L}}f16f16:
{{align}} 16
    vpbroadcastw    ymm15, word ptr [rcx]

    vmovaps     xmm4, [rax]
    vmovaps     xmm5, [rax + 16]
    vmovaps     xmm6, [rax + 32]
    vmovaps     xmm7, [rax + 48]

    vcvtph2ps       ymm15, xmm15
    vcvtph2ps       ymm4, xmm4
    vcvtph2ps       ymm5, xmm5
    vcvtph2ps       ymm6, xmm6
    vcvtph2ps       ymm7, xmm7

    vfmadd231ps     ymm0, ymm15, ymm4
    vfmadd231ps     ymm1, ymm15, ymm5
    vfmadd231ps     ymm2, ymm15, ymm6
    vfmadd231ps     ymm3, ymm15, ymm7

    add             rcx, 2
	add             rax, 64
    sub             rbx, 1
    jnz             {{L}}f16f16

    jmp             {{L}}non_linear_loop

{{L}}f32f16:
{{align}} 16
    vpbroadcastw    ymm15, word ptr [rcx]

    vmovaps     ymm4, [rax]
    vmovaps     ymm5, [rax + 32]
    vmovaps     ymm6, [rax + 64]
    vmovaps     ymm7, [rax + 96]

    vcvtph2ps       ymm15, xmm15

    vfmadd231ps     ymm0, ymm15, ymm4
    vfmadd231ps     ymm1, ymm15, ymm5
    vfmadd231ps     ymm2, ymm15, ymm6
    vfmadd231ps     ymm3, ymm15, ymm7

    add             rcx, 2
	add             rax, 128
    sub             rbx, 1
    jnz             {{L}}f32f16

    jmp             {{L}}non_linear_loop

{{L}}f16f32:
{{align}} 16
    vbroadcastss    ymm15,  dword ptr [rcx]

    vmovaps     xmm4, [rax]
    vmovaps     xmm5, [rax + 16]
    vmovaps     xmm6, [rax + 32]
    vmovaps     xmm7, [rax + 48]

    vcvtph2ps       ymm4, xmm4
    vcvtph2ps       ymm5, xmm5
    vcvtph2ps       ymm6, xmm6
    vcvtph2ps       ymm7, xmm7

    vfmadd231ps     ymm0, ymm15, ymm4
    vfmadd231ps     ymm1, ymm15, ymm5
    vfmadd231ps     ymm2, ymm15, ymm6
    vfmadd231ps     ymm3, ymm15, ymm7

    add             rcx, 4
	add             rax, 64
    sub             rbx, 1
    jnz             {{L}}f16f32

    jmp             {{L}}non_linear_loop


{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:3, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:32, from:0, to:3, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:32, from:0, to:3, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:3 %}

{{L}}add_unicast:
    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

	cmp rsi, 4
	jne {{L}}add_unicast_generic

    {% for row in (0..3) %}
        vaddps ymm{{row}}, ymm{{row}}, [ r10 + {{row|times:32}} ]
    {% endfor %}
    jmp    {{L}}non_linear_loop


    jmp    {{L}}non_linear_loop

{{L}}add_unicast_generic:
    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

{% for i in (0..3) %}
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm12,  [ r10 + ymm14 ], ymm15

    vaddps          ymm{{i}},   ymm{{i}},   ymm12
    lea             r10, [ r10 + rsi * 8 ]
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    ymm14, dword ptr [rbx]

{% for i in (0..3) %}
    vmovups         ymm12,  [rax + {{i|times:32}}]
    vfmadd231ps     ymm{{i}}, ymm12, ymm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     r11,    [rdi + 32]          // item size

    cmp     r11, 2
    je      {{L}}store_f16

	cmp rsi, 4
	jne {{L}}store_generic

	{% for row in (0..3) %}
        vmovups [r8 + {{row|times:32}}], ymm{{row}}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_generic:

    {% for vec in (0..3) %}
        {% for half in (0..1) %}
            {% if half == 0 %}
                movaps xmm9, xmm{{vec}}
            {% else %}
                vperm2f128 ymm9, ymm{{vec}}, ymm{{vec}}, 1
            {% endif %}
            {% for row in (0..3) %}
                vextractps  dword ptr [r8], xmm9, {{row}}
                add         r8, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}store_f16:

    vcvtps2ph   xmm0, ymm0, 0
    vcvtps2ph   xmm1, ymm1, 0
    vcvtps2ph   xmm2, ymm2, 0
    vcvtps2ph   xmm3, ymm3, 0

    cmp         rsi, 2
	jne {{L}}store_generic_f16

	{% for row in (0..3) %}
        vmovups [r8 + {{row|times:16}}], xmm{{row}}
    {% endfor %}

    jmp     {{L}}non_linear_loop
    
{{L}}store_generic_f16:

    {% for vec in (0..3) %}
        {% for row in (0..7) %}
            pextrw      word ptr [r8], xmm{{vec}}, {{row}}
            add         r8, rsi
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" type:"f32", size:"32x1", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_32x3.tmpl
================================================
{% comment %}
// vim: set syntax=asm :
/* mmm 16 x 5:

    ymm0 ymm4 ymm8
    ymm1 ymm5 ymm9
    ymm2 ymm6 ymm10
    ymm3 ymm7 ymm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"32x3", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rbx,    [rdi + 8]    // k
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     r8,    [rdi + 32]   // packing

    test    rbx,    rbx
    jz      {{L}}non_linear_loop

    cmp     r8, 1
    jz      {{L}}main_loop_packed_packed_f32_f16

    cmp     r8, 2
    jz      {{L}}main_loop_packed_packed_f16_f32

    cmp     r8, 3
    jz      {{L}}main_loop_packed_packed_f16_f16

{{L}}main_loop_packed_packed:
    {% include "4x3/packed_packed_loop1/avx.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{{L}}main_loop_packed_packed_f32_f16:
	// Load col of A
	vmovaps			ymm12,	[rax]

	// Fill 3 cols of B
    vpbroadcastw    xmm13,  word ptr [rcx + 0]
    vpbroadcastw    xmm14,  word ptr [rcx + 2]
    vpbroadcastw    xmm15,  word ptr [rcx + 4]

    vcvtph2ps       ymm13, xmm13
    vcvtph2ps       ymm14, xmm14
    vcvtph2ps       ymm15, xmm15

	// N.B. Stepping cols in inner loop
	vfmadd231ps		ymm0,	ymm12, ymm13
	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm8,	ymm12, ymm15

	vmovaps			ymm12,	[rax+32]

	vfmadd231ps		ymm1,	ymm12, ymm13
	vfmadd231ps		ymm5,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm12, ymm15

	vmovaps			ymm12,	[rax+64]

	vfmadd231ps		ymm2,	ymm12, ymm13
	vfmadd231ps		ymm6,	ymm12, ymm14
	vfmadd231ps		ymm10,	 ymm12, ymm15

	vmovaps			ymm12,	[rax+96]

	vfmadd231ps		ymm3,	ymm12, ymm13
	vfmadd231ps		ymm7,	ymm12, ymm14
	vfmadd231ps		ymm11,	ymm12, ymm15

    add             rcx,    6
    add             rax,    128

    dec             rbx
    jnz             {{L}}main_loop_packed_packed_f32_f16

    jmp             {{L}}non_linear_loop
    
{{L}}main_loop_packed_packed_f16_f32:
	// Load col of A
	vmovaps			xmm12,	[rax]

	// Fill 3 cols of B
    vbroadcastss    ymm13, dword ptr [rcx + 0]
    vbroadcastss    ymm14, dword ptr [rcx + 4]
    vbroadcastss    ymm15, dword ptr [rcx + 8]

    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm0,	ymm12, ymm13
	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm8,	ymm12, ymm15

	vmovaps			xmm12,	[rax+16]
    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm1,	ymm12, ymm13
	vfmadd231ps		ymm5,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm12, ymm15

	vmovaps			xmm12,	[rax+32]
    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm2,	ymm12, ymm13
	vfmadd231ps		ymm6,	ymm12, ymm14
	vfmadd231ps		ymm10,	 ymm12, ymm15

	vmovaps			xmm12,	[rax+48]
    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm3,	ymm12, ymm13
	vfmadd231ps		ymm7,	ymm12, ymm14
	vfmadd231ps		ymm11,	ymm12, ymm15

    add             rcx,    12
    add             rax,    64

    dec             rbx
    jnz             {{L}}main_loop_packed_packed_f16_f32

    jmp             {{L}}non_linear_loop

{{L}}main_loop_packed_packed_f16_f16:
	// Load col of A
	vmovaps			xmm12,	[rax]

	// Fill 3 cols of B
    vpbroadcastw    xmm13,  word ptr [rcx + 0]
    vpbroadcastw    xmm14,  word ptr [rcx + 2]
    vpbroadcastw    xmm15,  word ptr [rcx + 4]

    vcvtph2ps       ymm12, xmm12
    vcvtph2ps       ymm13, xmm13
    vcvtph2ps       ymm14, xmm14
    vcvtph2ps       ymm15, xmm15

	vfmadd231ps		ymm0,	ymm12, ymm13
	vfmadd231ps		ymm4,	ymm12, ymm14
	vfmadd231ps		ymm8,	ymm12, ymm15

	vmovaps			xmm12,	[rax+16]
    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm1,	ymm12, ymm13
	vfmadd231ps		ymm5,	ymm12, ymm14
	vfmadd231ps		ymm9,	ymm12, ymm15

	vmovaps			xmm12,	[rax+32]
    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm2,	ymm12, ymm13
	vfmadd231ps		ymm6,	ymm12, ymm14
	vfmadd231ps		ymm10,	 ymm12, ymm15

	vmovaps			xmm12,	[rax+48]
    vcvtph2ps       ymm12, xmm12

	vfmadd231ps		ymm3,	ymm12, ymm13
	vfmadd231ps		ymm7,	ymm12, ymm14
	vfmadd231ps		ymm11,	ymm12, ymm15

    add             rcx,    6
    add             rax,    64

    dec             rbx
    jnz             {{L}}main_loop_packed_packed_f16_f16

    jmp             {{L}}non_linear_loop

// NON LINEAR / ADDC

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:11, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:32, from:0, to:11, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:32, from:0, to:11, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:11 %}

{{L}}add_unicast:
    mov     r8,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    cmp     rsi, 4
    jne     {{L}}unicast_generic

    lea             r9,  [ r8 + rbx ]
    lea             r10, [ r9 + rbx]
    lea             r11, [ r10 + rbx ]

{% for col in (0..2) %}
    {% for row in (0..3) %}
        vmovups     ymm12, [ r{{col|plus:8}} ]
        add         r{{col|plus:8}}, 32
        vaddps      ymm{{col|times:4|plus:row}}, ymm{{col|times:4|plus:row}}, ymm12
    {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}unicast_generic:
    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

//  mov r12, [0]
    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

    lea             r9,  [ r8 + rsi * 8 ]
    lea             r10, [ r9 + rsi * 8 ]
    lea             r11, [ r10 + rsi * 8 ]

{% for col in (0..2) %}
   {% for row in (0..3) %}
      vpcmpeqd      ymm15,  ymm15, ymm15
      vgatherdps    ymm12,  [ r{{row|plus:8}} + ymm14 ], ymm15
      add           r{{row|plus:8}}, rbx
      vaddps        ymm{{col|times:4|plus:row}}, ymm{{col|times:4|plus:row}}, ymm12
   {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop


{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    ymm13, dword ptr [rbx]
    vbroadcastss    ymm14, dword ptr [rbx + 4]
    vbroadcastss    ymm15, dword ptr [rbx + 8]
{% for i in (0..3) %}
    vmovups         ymm12,  [rax + {{i|times:32}}]
    vfmadd231ps     ymm{{0|plus:i}}, ymm12, ymm13
    vfmadd231ps     ymm{{4|plus:i}}, ymm12, ymm14
    vfmadd231ps     ymm{{8|plus:i}}, ymm12, ymm15
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride
    mov     r11,    [rdi + 32]          // item size

    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]

    cmp     r11, 2
    je      {{L}}store_f16

    cmp         rsi, 4
    jne         {{L}}store_strides_generic

    {% for col in (0..2) %}
        {% for row in (0..3) %}
            vmovups ymmword ptr [r{{col|plus:8}}], ymm{{col|times:4|plus:row}}
            add     r{{col|plus:8}}, 32
       {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_strides_generic:

    {% for col in (0..2) %}
       {% for row in (0..3) %}
           {% for i in (0..3) %}
                vextractps  dword ptr [r{{col | plus: 8}}], xmm{{col | times:4 | plus:row}}, {{i}}
                add         r{{col | plus: 8}}, rsi
           {% endfor %}
           vperm2f128  ymm{{col | times:4 | plus:row}}, ymm{{col | times:4 | plus:row}}, ymm{{col | times:4 | plus:row}}, 1
           {% for i in (0..3) %}
                vextractps  dword ptr [r{{col | plus: 8}}], xmm{{col | times:4|plus:row}}, {{i}}
                add         r{{col | plus: 8}}, rsi
           {% endfor %}
       {% endfor %}
    {% endfor %}
    jmp     {{L}}non_linear_loop

{{L}}store_f16:

    {% for reg in (0..11) %}
        vcvtps2ph   xmm{{reg}}, ymm{{reg}}, 0
    {% endfor %}

    cmp         rsi, 2
	jne {{L}}store_generic_f16

    {% for col in (0..2) %}
        {% for row in (0..3) %}
            vmovups [r{{col|plus:8}} + {{row|times:16}}], xmm{{col|times:4|plus:row}}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop
    
{{L}}store_generic_f16:
    {% for col in (0..2) %}
        {% for vec in (0..3) %}
            {% for row in (0..7) %}
                pextrw  word ptr [r{{col|plus:8}}], xmm{{col|times:4|plus:vec}}, {{row}}
                add         r{{col|plus:8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" type:"f32", size:"32x3", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_40x2.tmpl
================================================
{% comment %}
// vim: set syntax=asm :
/* mmm 40 x 5:

    ymm0 ymm5
    ymm1 ymm6
    ymm2 ymm7
    ymm3 ymm8
    ymm4 ymm9

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"40x2", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
    {% include "5x2/packed_packed_loop1/avx.tmpli" %}

    dec             rbx
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

// NON LINEAR / ADDC

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:9, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:40, from:0, to:9, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:40, from:0, to:9, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:9 %}

{{L}}add_unicast:
    mov     r8,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    cmp rsi, 4
    jne {{L}}unicast_generic

    lea             r9,  [ r8 + rbx ]
    lea             r10, [ r9 + rbx]
    lea             r11, [ r10 + rbx ]
    lea             r12, [ r11 + rbx ]


{% for col in (0..1) %}
    {% for row in (0..4) %}
        vmovups ymm12,  [ r{{col|plus:8}} ]
        add		r{{col|plus:8}}, 32
        vaddps 	ymm{{col|times:5|plus:row}}, ymm{{col|times:5|plus:row}}, ymm12
    {% endfor %}
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}unicast_generic:
    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

    lea             r9,  [ r8 + rsi * 8]
    lea             r10, [ r9 + rsi * 8]
    lea             r11, [ r10 + rsi * 8]
    lea             r12, [ r11 + rsi * 8]

{% for col in (0..1) %}
   {% for row in (0..4) %}
      vpcmpeqd        ymm15, ymm15, ymm15
      vgatherdps      ymm12, [ r{{row|plus:8}} + ymm14 ], ymm15
      add 			  r{{row|plus:8}}, 	rbx
      vaddps 		  ymm{{col|times:5|plus:row}}, ymm{{col|times:5|plus:row}}, ymm12
   {% endfor %}
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    ymm10, dword ptr [rbx]
    vbroadcastss    ymm11, dword ptr [rbx + 4]
{% for i in (0..4) %}
    vmovups         ymm12,  [rax + {{i|times:32}}]
    vfmadd231ps     ymm{{0|plus:i}}, ymm12, ymm10
    vfmadd231ps     ymm{{5|plus:i}}, ymm12, ymm11
{% endfor %}
    jmp    {{L}}non_linear_loop


{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    lea     r9,     [ r8  +     rbx ]
    lea     r10,    [ r8  + 2 * rbx ]
    lea     r11,    [ r10 +     rbx ]
    lea     r12,    [ r10 + 2 * rbx ]

    cmp         rsi, 4
    jne         {{L}}store_strides_generic

    {% for col in (0..1) %}
       {% for row in (0..4) %}
            vmovups ymmword ptr [r{{col|plus:8}}], ymm{{col|times:5|plus:row}}
            add 	r{{col|plus:8}}, 32
       {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_strides_generic:
    {% for col in (0..1) %}
       {% for row in (0..4) %}
           {% for i in (0..3) %}
                vextractps  dword ptr [r{{col | plus: 8}}], xmm{{col | times:5 | plus:row}}, {{i}}
                add         r{{col | plus: 8}}, rsi
           {% endfor %}
           vperm2f128  ymm{{col | times:5 | plus:row}}, ymm{{col | times:5 | plus:row}}, ymm{{col | times:5 | plus:row}}, 1
           {% for i in (0..3) %}
                vextractps  dword ptr [r{{col | plus: 8}}], xmm{{col | times:5|plus:row}}, {{i}}
                add         r{{col | plus: 8}}, rsi
           {% endfor %}
       {% endfor %}
    {% endfor %}
    jmp     {{L}}non_linear_loop

{% include "postamble.tmpliq" type:"f32", size:"40x2", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_64x1.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 64 x 1

    ymm0
    ymm1
    ...
    ymm8

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"64x1", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp     {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rcx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rbx,    [rdi + 8]    // k
    test    rbx,    rbx
    jz      {{L}}non_linear_loop

	test rbx, 1
	jz {{L}}main_loop_packed_packed
	{% include "8x1/packed_packed_loop1/avx.tmpli" %}

    dec             rbx
    jz              {{L}}non_linear_loop

{{align}} 16
{{L}}main_loop_packed_packed:
	{% include "8x1/packed_packed_loop1/avx-unroll.tmpli" %}

    sub             rbx, 2
    jnz             {{L}}main_loop_packed_packed

    jmp             {{L}}non_linear_loop

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:7, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:64, from:0, to:7, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:64, from:0, to:7, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:7 %}

{{L}}add_unicast:
    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

	cmp rsi, 4
	jne {{L}}add_unicast_generic

    {% for row in (0..7) %}
        vaddps ymm{{row}}, ymm{{row}}, [ r10 + {{row|times:32}} ]
    {% endfor %}
    jmp    {{L}}non_linear_loop


    jmp    {{L}}non_linear_loop

{{L}}add_unicast_generic:
    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

{% for i in (0..7) %}
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm12,  [ r10 + ymm14 ], ymm15

    vaddps          ymm{{i}},   ymm{{i}},   ymm12
    lea             r10, [ r10 + rsi * 8 ]
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vbroadcastss    ymm14, dword ptr [rbx]

{% for i in (0..7) %}
    vmovups         ymm12,  [rax + {{i|times:32}}]
    vfmadd231ps     ymm{{i}}, ymm12, ymm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride

	cmp rsi, 4
	jne {{L}}store_generic

	{% for row in (0..7) %}
        vmovups [r8 + {{row|times:32}}], ymm{{row}}
    {% endfor %}

    jmp     {{L}}non_linear_loop

{{L}}store_generic:

    {% for vec in (0..7) %}
        {% for half in (0..1) %}
            {% if half == 0 %}
                movaps xmm9, xmm{{vec}}
            {% else %}
                vperm2f128 ymm9, ymm{{vec}}, ymm{{vec}}, 1
            {% endif %}
            {% for row in (0..3) %}
                vextractps  dword ptr [r8], xmm9, {{row}}
                add         r8, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp    {{L}}non_linear_loop


{% include "postamble.tmpliq" type:"f32", size:"64x1", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_8x8.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

/* mmm 16 x 6:

    ymm0 ymm2 ymm4 ymm6 ymm8 ymm10
    ymm1 ymm3 ymm5 ymm7 ymm9 ymm11

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)
*/
{% endcomment %}

{% include "preamble.tmpliq" type:"f32", size:"8x8", suffix:suffix, G:G %}

{{L}}clear:
    vzeroall
    jmp             {{L}}non_linear_loop

{{L}}add_mat_mul:
    mov     rbx,    [rdi + 24]   // B
    mov     rax,    [rdi + 16]   // A

    mov     rcx,    [rdi + 8]    // k
    test    rcx,    rcx
    jz      {{L}}non_linear_loop

{{L}}main_loop_packed_packed:
    vmovaps         ymm12,  [rax]

    {% for i in (0..7) %}
        vbroadcastss    ymm14, dword ptr [rbx + {{i}} * 4]
        vfmadd231ps     ymm{{i}}, ymm12, ymm14
    {% endfor %}

    add             rax,    32
    add             rbx,    32
    dec             rcx
    jnz             {{L}}main_loop_packed_packed
    jmp             {{L}}non_linear_loop

// NON LINEAR / ADDC

{% include "fma_mmm_f32_scalars.tmpliq" from:0, to:7, type:"f32" %}
{% include "fma_mmm_f32_per_rows.tmpliq" mr:8, from:0, to:7, type:"f32" %}
{% include "fma_mmm_f32_per_cols.tmpliq" mr:8, from:0, to:7, type:"f32" %}
{% include "fma_mmm_load_tile.tmpliq" from:0, to:7 %}

{{L}}add_unicast:

    mov     r10,    [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    mov     eax,    0
{% for i in (0..3) %}
    pinsrd  xmm14, eax, {{i}}
    add     eax,    esi
{% endfor %}
{% for i in (0..3) %}
    pinsrd  xmm15, eax, {{i}}
    add     eax,    esi
{% endfor %}

    vperm2f128      ymm14,  ymm14, ymm15,         32 // ymm14 <- xmm14::xmm15

{% for i in (0..7) %}
    vpcmpeqd        ymm15,  ymm15, ymm15
    vgatherdps      ymm12,  [ r10 + ymm14 ],      ymm15
    add     r10, rbx
    vaddps          ymm{{i}},   ymm{{i}},   ymm12
{% endfor %}

    jmp    {{L}}non_linear_loop

{{L}}add_row_col_products:
    mov             rax, [ rdi + 8 ]
    mov             rbx, [ rdi + 16 ]

    vmovups         ymm12,  [rax]

{% for i in (0..7) %}
    vbroadcastss    ymm14, dword ptr [rbx + {{i|times:4}} ]
    vfmadd231ps     ymm{{i}},   ymm12, ymm14
{% endfor %}
    jmp    {{L}}non_linear_loop

{{L}}store:
    mov     r8,     [rdi + 8]           // c ptr
    mov     rsi,    [rdi + 16]          // row stride
    mov     rbx,    [rdi + 24]          // col stride

    // tops of cols
    lea     r9,     [ r8 + rbx ]
    lea     r10,    [ r8 + 2 * rbx ]
    lea     r12,    [ r8 + 4 * rbx ]
    lea     r11,    [ r10 + rbx ]
    lea     r13,    [ r12 + rbx ]
    lea     r14,    [ r12 + 2 * rbx ]
    lea     r15,    [ r13 + 2 * rbx ]

    {% for quarter in (0..1) %}
        {% if quarter != 0 %}
            // move next four rows at top (xmm0,2,..10)
            {% for r in (0..7) %}
                vperm2f128  ymm{{r}},   ymm{{r}},   ymm{{r}},  {{quarter}}
            {% endfor %}
        {% endif %}
        {% for row in (0..3) %}
            {% for i in (0..7) %}
                vextractps  dword ptr [r{{i | plus: 8}}], xmm{{i}}, {{row}}
                add         r{{i | plus: 8}}, rsi
            {% endfor %}
        {% endfor %}
    {% endfor %}

    jmp     {{L}}non_linear_loop


{% include "postamble.tmpliq" type:"f32", size:"8x8", suffix:suffix, G:G, L:L %}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_per_cols.tmpliq
================================================
// vim: set syntax=asm :

{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_min", op:"vminps", mr:mr, from:from, to:to, type:type%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_max", op:"vmaxps", mr:mr, from:from, to:to, type:type%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_add", op:"vaddps", mr:mr, from:from, to:to, type:type%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_mul", op:"vmulps", mr:mr, from:from, to:to, type:type%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_sub", op:"vsubps", from:from, to:to, type:type %}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_sub_flipped", op:"vsubps", from:from, to:to, flipped: true, type:type%}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_per_rows.tmpliq
================================================
// vim: set syntax=asm :

{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_min", op:"vminps", mr:mr, from:from, to:to, type: type%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_max", op:"vmaxps", mr:mr, from:from, to:to, type: type%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_add", op:"vaddps", mr:mr, from:from, to:to, type: type%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_mul", op:"vmulps", mr:mr, from:from, to:to, type: type%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_sub", op:"vsubps", from:from, to:to, type: type%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_sub_flipped", op:"vsubps", from:from, to:to, flipped: true, type: type%}


================================================
FILE: linalg/x86_64/fma/fma_mmm_f32_scalars.tmpliq
================================================
// vim: set syntax=asm :

{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_min", op:"vminps", from:from, to:to, type:type%}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_max", op:"vmaxps", from:from, to:to, type:type%}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_add", op:"vaddps", from:from, to:to, type:type%}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_mul", op:"vmulps", from:from, to:to, type:type%}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub", op:"vsubps", from:from, to:to, type:type%}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vsubps", from:from, to:to, flipped: true, type:type%}

{{L}}leaky_relu:
    // can only use ymm12 to ymm15
    // ymm15 <- alpha
    {% if type == "f32" %}
        vbroadcastss    ymm15, dword ptr [rdi + 8]
    {% else %}
        pinsrw          xmm15, word ptr [rdi + 8], 0
        vcvtph2ps       ymm15, xmm15
        vbroadcastss    ymm15, xmm15
    {% endif %}

    // ymm14 <- all zero
    vpxor           ymm14, ymm14, ymm14

    {% for reg in (from..to) %}
        // ymm12 <- alpha * x
        vmulps      ymm12, ymm{{reg}}, ymm15
        vcmpps     ymm13, ymm14, ymm{{reg}}, 1 // 1 means LT
        vblendvps   ymm{{reg}}, ymm12, ymm{{reg}}, ymm13
    {% endfor %}
    // select muled of orginal

    jmp    {{L}}non_linear_loop

{{L}}q_scale:
{{L}}q_shl:
{{L}}q_shr:
    jmp {{L}}unsupported


================================================
FILE: linalg/x86_64/fma/fma_mmm_i32_per_cols.tmpliq
================================================
// vim: set syntax=asm :

{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_min", op:"vpminsd", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_max", op:"vpmaxsd", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_add", op:"vpaddd", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_mul", op:"vpmulld", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_sub", op:"vpsubd", from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_col.tmpliq" label:"per_col_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true, type:"i32"%}


================================================
FILE: linalg/x86_64/fma/fma_mmm_i32_per_rows.tmpliq
================================================
// vim: set syntax=asm :

{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_min", op:"vpminsd", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_max", op:"vpmaxsd", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_add", op:"vpaddd", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_mul", op:"vpmulld", mr:mr, from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_sub", op:"vpsubd", from:from, to:to, type:"i32"%}
{% include "fma_mmm_ymm_per_row.tmpliq" label:"per_row_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true, type:"i32"%}


================================================
FILE: linalg/x86_64/fma/fma_mmm_i32_scalars.tmpliq
================================================
// vim: set syntax=asm :

{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_min", op:"vpminsd", from:from, to:to, type:"i32" %}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_max", op:"vpmaxsd", from:from, to:to, type:"i32" %}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_mul", op:"vpmulld", from:from, to:to, type:"i32" %}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_add", op:"vpaddd", from:from, to:to, type:"i32" %}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub", op:"vpsubd", from:from, to:to, type:"i32" %}
{% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true, type:"i32" %}

{{L}}leaky_relu:
    // can only use ymm12 to ymm15
    // ymm15 <- alpha
    vbroadcastss    ymm15, dword ptr [rdi + 8]
    // ymm14 <- all zero
    vpxor          ymm14, ymm14, ymm14

    {% for reg in (from..to) %}
        vpmulld     ymm12, ymm{{reg}}, ymm15
        vpcmpgtd    ymm13, ymm14, ymm{{reg}}
        vblendvps   ymm{{reg}}, ymm{{reg}}, ymm12, ymm13
    {% endfor %}

    jmp    {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/fma/fma_mmm_load_tile.tmpliq
================================================
// vim: set syntax=asm :

{{L}}load_tile:
    mov          r8, [rdi + 8]
    {% for reg in (from..to) %}
        vmovups         ymm{{reg}}, ymmword ptr [r8 + {{ reg|minus:from|times:32 }}]
    {% endfor %}

    jmp    {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/fma/fma_mmm_ymm_per_col.tmpliq
================================================
// vim: set syntax=asm :

{{L}}{{label}}:
    mov             rax, [ rdi + 8 ]

{% capture mr_over_8 %}{{ mr | divided_by: 8}}{%endcapture%}
{% capture mr_over_8_min_1 %}{{ mr | divided_by: 8 | minus: 1}}{%endcapture%}

{%capture tmp%}{{to | plus: 1 }}{%endcapture%}

{%capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_8}}{%endcapture%}
{%capture cols_min_1%}{{to | plus: 1| minus:from| divided_by:mr_over_8|minus:1}}{%endcapture%}


{% for right in (0..cols_min_1) %}
    {% if type == "f16" %} 
        pinsrw          xmm{{tmp}}, word ptr [ rax ], 0
        add             rax, 2
        vcvtph2ps      ymm{{tmp}}, xmm{{tmp}}
        vbroadcastss    ymm{{tmp}}, xmm{{tmp}}
    {% else %}
        vbroadcastss    ymm{{tmp}}, dword ptr [ rax ]
        add             rax, 4
    {% endif %}
    {% for down in (0..mr_over_8_min_1) %}
        {%capture acc%}{{mr_over_8|times:right|plus:from|plus:down}}{%endcapture%}
        {% if flipped %}
            {{op}} ymm{{acc}}, ymm{{acc}}, ymm{{tmp}}
        {% else %}
            {{op}} ymm{{acc}}, ymm{{tmp}}, ymm{{acc}}
        {% endif %}
    {% endfor %}
{% endfor %}

    jmp {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/fma/fma_mmm_ymm_per_row.tmpliq
================================================
// vim: set syntax=asm :

{{L}}{{label}}:
    mov             rax, [ rdi + 8 ]

{% capture mr_over_8 %}{{ mr | divided_by: 8}}{%endcapture%}
{% capture mr_over_8_min_1 %}{{ mr | divided_by: 8 | minus: 1}}{%endcapture%}

{% if type == "f16" %}
    {% for ix in (0..mr_over_8_min_1) %}
        vmovups         xmm{{to | plus: 1 | plus: ix}},  [rax + {{ix | times: 16}}]
    {% endfor %}
    {% for ix in (0..mr_over_8_min_1) %}
        vcvtph2ps       ymm{{to | plus: 1 | plus: ix}}, xmm{{to | plus: 1 | plus: ix}}
    {% endfor %}
{% else %}
    {% for ix in (0..mr_over_8_min_1) %}
        vmovups         ymm{{to | plus: 1 | plus: ix}},  [rax + {{ix | times: 32}}]
    {% endfor %}
{% endif %}

{% if flipped %}
    {% for acc in (from..to) %}
        {{op}} ymm{{acc}}, ymm{{acc}}, ymm{{ acc | modulo: mr_over_8 | plus: to | plus: 1 }}
    {% endfor %}
{% else %}
    {% for acc in (from..to) %}
        {{op}} ymm{{acc}}, ymm{{ acc | modulo: mr_over_8 | plus: to | plus: 1 }}, ymm{{acc}}
    {% endfor %}
{% endif %}

    jmp {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/fma/fma_mmm_ymm_scalar.tmpliq
================================================
// vim: set syntax=asm :

{{L}}{{label}}:
    {% if type == "f16" %}
        pinsrw          xmm12, word ptr [rdi + 8], 0
        vcvtph2ps       ymm12, xmm12
        vbroadcastss    ymm12, xmm12
    {% else %}
        vbroadcastss    ymm12, dword ptr [rdi + 8]
    {% endif %}
    
    {% if flipped %}
        {% for reg in (from..to) %}
            {{op}}          ymm{{reg}}, ymm{{reg}}, ymm12
        {% endfor %}
    {% else %}
        {% for reg in (from..to) %}
            {{op}}          ymm{{reg}}, ymm12, ymm{{reg}}
        {% endfor %}
    {% endif %}

    jmp    {{L}}non_linear_loop


================================================
FILE: linalg/x86_64/fma/fma_sigmoid_f32.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)

{% endcomment %}

{% if msvc %}

_text segment
fma_sigmoid_f32_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}fma_sigmoid_f32_{{suffix}}
{{G}}fma_sigmoid_f32_{{suffix}}:
.cfi_startproc
{% endif %}

    push        rbp
    mov         rbp, rsp


{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    // move around arguments to mimick SysV rdi,rsi passing
    push        rdi
    push        rsi
    mov         rdi, rcx
    mov         rsi, rdx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
// FIXME
// .cfi_def_cfa_offset 64 
{% endif %}

    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]
// ----------------------------------------------------------------------

    cmp     rsi, 0
    je      {{L}}done

    cmp     rsi, 32
    jl      {{L}}loop_1

{{L}}loop_4:

    vmovaps         ymm4, [rdi]
    vmovaps         ymm5, [rdi + 32]
    vmovaps         ymm6, [rdi + 64]
    vmovaps         ymm7, [rdi + 96]

    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_13]
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_11]

    vmaxps          ymm4, ymm4, ymm0
    vmaxps          ymm5, ymm5, ymm0
    vmaxps          ymm6, ymm6, ymm0
    vmaxps          ymm7, ymm7, ymm0
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]

    vminps          ymm4, ymm4, ymm1
    vminps          ymm5, ymm5, ymm1
    vminps          ymm6, ymm6, ymm1
    vminps          ymm7, ymm7, ymm1        // ymm4..7 <- x
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmulps          ymm8, ymm4, ymm4
    vmulps          ymm9, ymm5, ymm5
    vmulps          ymm10, ymm6, ymm6
    vmulps          ymm11, ymm7, ymm7        // ymm8..11 <- x^2

    vmovaps         ymm12, ymm2
    vmovaps         ymm13, ymm2
    vmovaps         ymm14, ymm2
    vmovaps         ymm15, ymm2
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm13, ymm3, ymm9
    vfmadd132ps     ymm14, ymm3, ymm10
    vfmadd132ps     ymm15, ymm3, ymm11
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]
    vfmadd132ps     ymm12, ymm0, ymm8
    vfmadd132ps     ymm13, ymm0, ymm9
    vfmadd132ps     ymm14, ymm0, ymm10
    vfmadd132ps     ymm15, ymm0, ymm11
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     ymm12, ymm1, ymm8
    vfmadd132ps     ymm13, ymm1, ymm9
    vfmadd132ps     ymm14, ymm1, ymm10
    vfmadd132ps     ymm15, ymm1, ymm11
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     ymm12, ymm2, ymm8
    vfmadd132ps     ymm13, ymm2, ymm9
    vfmadd132ps     ymm14, ymm2, ymm10
    vfmadd132ps     ymm15, ymm2, ymm11
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm13, ymm3, ymm9
    vfmadd132ps     ymm14, ymm3, ymm10
    vfmadd132ps     ymm15, ymm3, ymm11
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     ymm12, ymm0, ymm8
    vfmadd132ps     ymm13, ymm0, ymm9
    vfmadd132ps     ymm14, ymm0, ymm10
    vfmadd132ps     ymm15, ymm0, ymm11
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vmulps          ymm4, ymm4, ymm12
    vmulps          ymm5, ymm5, ymm13
    vmulps          ymm6, ymm6, ymm14
    vmulps          ymm7, ymm7, ymm15   // ymm4..7 <- num

    vmovaps         ymm12, ymm1
    vmovaps         ymm13, ymm1
    vmovaps         ymm14, ymm1
    vmovaps         ymm15, ymm1

    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_half]
    vfmadd132ps     ymm12, ymm2, ymm8
    vfmadd132ps     ymm13, ymm2, ymm9
    vfmadd132ps     ymm14, ymm2, ymm10
    vfmadd132ps     ymm15, ymm2, ymm11
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm13, ymm3, ymm9
    vfmadd132ps     ymm14, ymm3, ymm10
    vfmadd132ps     ymm15, ymm3, ymm11
    vfmadd132ps     ymm12, ymm0, ymm8
    vfmadd132ps     ymm13, ymm0, ymm9
    vfmadd132ps     ymm14, ymm0, ymm10
    vfmadd132ps     ymm15, ymm0, ymm11  // ymm12..14 <- denum

    vdivps          ymm4, ymm4, ymm12
    vdivps          ymm5, ymm5, ymm13
    vdivps          ymm6, ymm6, ymm14
    vdivps          ymm7, ymm7, ymm15
    vaddps          ymm4, ymm4, ymm1
    vaddps          ymm5, ymm5, ymm1
    vaddps          ymm6, ymm6, ymm1
    vaddps          ymm7, ymm7, ymm1

    vmovaps [rdi], ymm4
    vmovaps [rdi + 32], ymm5
    vmovaps [rdi + 64], ymm6
    vmovaps [rdi + 96], ymm7

    add     rdi, 128
    sub     rsi, 32
    cmp     rsi, 32
    jg      {{L}}loop_4

    cmp     rsi, 0
    je      {{L}}done

{{L}}loop_1:
    vmovaps         ymm4, [rdi]

    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_13]
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_11]

    vmaxps          ymm4, ymm4, ymm0
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]

    vminps          ymm4, ymm4, ymm1        // ymm4 <- x
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmulps          ymm8, ymm4, ymm4        // ymm8 <- x^2

    vmovaps         ymm12, ymm2
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]
    vfmadd132ps     ymm12, ymm3, ymm8
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]
    vfmadd132ps     ymm12, ymm0, ymm8
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     ymm12, ymm1, ymm8
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     ymm12, ymm2, ymm8
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vfmadd132ps     ymm12, ymm3, ymm8
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     ymm12, ymm0, ymm8
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vmulps          ymm4, ymm4, ymm12

    vmovaps         ymm12, ymm1
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_half]
    vfmadd132ps     ymm12, ymm2, ymm8
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm12, ymm0, ymm8

    vdivps          ymm4, ymm4, ymm12
    vaddps          ymm4, ymm4, ymm1

    vmovaps [rdi], ymm4
    add     rdi, 32
    sub     rsi, 8
    jnz     {{L}}loop_1
{{L}}done:

// ----------------------------------------------------------------------

    ldmxcsr     [rsp + 4]

    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret

{%capture float%}{% if msvc %} real4 {%else%} .float {%endif%}{%endcapture%}

{{L}}coeffs_num_low:
    {{float}} -18.6                   // low
{{L}}coeffs_num_high:
    {{float}} 18.6                     // high         

{{L}}coeffs_num_alpha_13:
    {{float}} -4.433153405e-18
{{L}}coeffs_num_alpha_11:
    {{float}} 1.169974371e-14
{{L}}coeffs_num_alpha_9:
    {{float}} -1.875289645e-11
{{L}}coeffs_num_alpha_7:
    {{float}} 4.257889523e-8
{{L}}coeffs_num_alpha_5:
    {{float}} 0.00004811817576
{{L}}coeffs_num_alpha_3:
    {{float}} 0.008163842030
{{L}}coeffs_num_alpha_1:
    {{float}} 0.2499999971

{{L}}coeffs_num_beta_6:
    {{float}} 3.922935744e-6
{{L}}coeffs_num_beta_4:
    {{float}} 0.001524872358
{{L}}coeffs_num_beta_2:
    {{float}} 0.1159886749
{{L}}coeffs_num_beta_0:
    {{float}} 1.0;

{{L}}coeffs_num_half:
    {{float}} 0.5

{% if msvc %}
fma_sigmoid_f32_{{suffix}} endp
_text ends
end
{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/fma/fma_tanh_f32.tmpl
================================================
{% comment %}
// vim: set syntax=asm :

System V ABI:
    args: rdi, rsi, rdx, rcx, r8, r9
    preserve: rbx, rsp, rbp, r12, r13, r14, r15
    scratch: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
    return: rax (+rdx)

Windows ABI:
    args: RCX, RDX, R8, R9
    preserve: RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15
    scratch: RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, and the upper portions of YMM0-15 and ZMM0-15
    return: rax (+rdx)

{% endcomment %}

{% if msvc %}

_text segment
fma_tanh_f32_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}fma_tanh_f32_{{suffix}}
{{G}}fma_tanh_f32_{{suffix}}:
.cfi_startproc
{% endif %}

    push        rbp
    mov         rbp, rsp


{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    // move around arguments to mimick SysV rdi,rsi passing
    push        rdi
    push        rsi
    mov         rdi, rcx
    mov         rsi, rdx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
// FIXME
// .cfi_def_cfa_offset 64 
{% endif %}

    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]
// ----------------------------------------------------------------------

{%capture offset%}{% if msvc %} offset {%else%} rip + {%endif%} {%endcapture%}

    cmp     rsi, 0
    je      {{L}}done

    cmp     rsi, 32
    jl      {{L}}loop_1

{{L}}loop_4:

    vmovaps         ymm4, [rdi]
    vmovaps         ymm5, [rdi + 32]
    vmovaps         ymm6, [rdi + 64]
    vmovaps         ymm7, [rdi + 96]

    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_13]
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_11]

    vmaxps          ymm4, ymm4, ymm0
    vmaxps          ymm5, ymm5, ymm0
    vmaxps          ymm6, ymm6, ymm0
    vmaxps          ymm7, ymm7, ymm0
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]

    vminps          ymm4, ymm4, ymm1
    vminps          ymm5, ymm5, ymm1
    vminps          ymm6, ymm6, ymm1
    vminps          ymm7, ymm7, ymm1        // ymm4..7 <- x
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmulps          ymm8, ymm4, ymm4
    vmulps          ymm9, ymm5, ymm5
    vmulps          ymm10, ymm6, ymm6
    vmulps          ymm11, ymm7, ymm7        // ymm8..11 <- x^2

    vmovaps         ymm12, ymm2
    vmovaps         ymm13, ymm2
    vmovaps         ymm14, ymm2
    vmovaps         ymm15, ymm2
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm13, ymm3, ymm9
    vfmadd132ps     ymm14, ymm3, ymm10
    vfmadd132ps     ymm15, ymm3, ymm11
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]
    vfmadd132ps     ymm12, ymm0, ymm8
    vfmadd132ps     ymm13, ymm0, ymm9
    vfmadd132ps     ymm14, ymm0, ymm10
    vfmadd132ps     ymm15, ymm0, ymm11
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     ymm12, ymm1, ymm8
    vfmadd132ps     ymm13, ymm1, ymm9
    vfmadd132ps     ymm14, ymm1, ymm10
    vfmadd132ps     ymm15, ymm1, ymm11
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     ymm12, ymm2, ymm8
    vfmadd132ps     ymm13, ymm2, ymm9
    vfmadd132ps     ymm14, ymm2, ymm10
    vfmadd132ps     ymm15, ymm2, ymm11
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm13, ymm3, ymm9
    vfmadd132ps     ymm14, ymm3, ymm10
    vfmadd132ps     ymm15, ymm3, ymm11
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     ymm12, ymm0, ymm8
    vfmadd132ps     ymm13, ymm0, ymm9
    vfmadd132ps     ymm14, ymm0, ymm10
    vfmadd132ps     ymm15, ymm0, ymm11
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vmulps          ymm4, ymm4, ymm12
    vmulps          ymm5, ymm5, ymm13
    vmulps          ymm6, ymm6, ymm14
    vmulps          ymm7, ymm7, ymm15   // ymm4..7 <- num

    vmovaps         ymm12, ymm1
    vmovaps         ymm13, ymm1
    vmovaps         ymm14, ymm1
    vmovaps         ymm15, ymm1
    vfmadd132ps     ymm12, ymm2, ymm8
    vfmadd132ps     ymm13, ymm2, ymm9
    vfmadd132ps     ymm14, ymm2, ymm10
    vfmadd132ps     ymm15, ymm2, ymm11
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm13, ymm3, ymm9
    vfmadd132ps     ymm14, ymm3, ymm10
    vfmadd132ps     ymm15, ymm3, ymm11
    vfmadd132ps     ymm12, ymm0, ymm8
    vfmadd132ps     ymm13, ymm0, ymm9
    vfmadd132ps     ymm14, ymm0, ymm10
    vfmadd132ps     ymm15, ymm0, ymm11  // ymm12..14 <- denum

    vdivps          ymm4, ymm4, ymm12
    vdivps          ymm5, ymm5, ymm13
    vdivps          ymm6, ymm6, ymm14
    vdivps          ymm7, ymm7, ymm15

    vmovaps [rdi], ymm4
    vmovaps [rdi + 32], ymm5
    vmovaps [rdi + 64], ymm6
    vmovaps [rdi + 96], ymm7

    add     rdi, 128
    sub     rsi, 32
    cmp     rsi, 32
    jg      {{L}}loop_4

    cmp     rsi, 0
    je      {{L}}done

{{L}}loop_1:
    vmovaps         ymm4, [rdi]

    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_low]
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_high]
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_13]
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_11]

    vmaxps          ymm4, ymm4, ymm0
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_9]

    vminps          ymm4, ymm4, ymm1        // ymm4 <- x
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_alpha_7]

    vmulps          ymm8, ymm4, ymm4        // ymm8 <- x^2

    vmovaps         ymm12, ymm2
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_alpha_5]
    vfmadd132ps     ymm12, ymm3, ymm8
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_alpha_3]
    vfmadd132ps     ymm12, ymm0, ymm8
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_alpha_1]
    vfmadd132ps     ymm12, ymm1, ymm8
    vbroadcastss    ymm1, dword ptr [{{offset}} {{L}}coeffs_num_beta_6]
    vfmadd132ps     ymm12, ymm2, ymm8
    vbroadcastss    ymm2, dword ptr [{{offset}} {{L}}coeffs_num_beta_4]
    vfmadd132ps     ymm12, ymm3, ymm8
    vbroadcastss    ymm3, dword ptr [{{offset}} {{L}}coeffs_num_beta_2]
    vfmadd132ps     ymm12, ymm0, ymm8
    vbroadcastss    ymm0, dword ptr [{{offset}} {{L}}coeffs_num_beta_0]
    vmulps          ymm4, ymm4, ymm12

    vmovaps         ymm12, ymm1
    vfmadd132ps     ymm12, ymm2, ymm8
    vfmadd132ps     ymm12, ymm3, ymm8
    vfmadd132ps     ymm12, ymm0, ymm8

    vdivps          ymm4, ymm4, ymm12

    vmovaps [rdi], ymm4
    add     rdi, 32
    sub     rsi, 8
    jnz     {{L}}loop_1

{{L}}done:

// ----------------------------------------------------------------------

    ldmxcsr     [rsp + 4]

    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret

{%capture float%}{% if msvc %} real4 {%else%} .float {%endif%}{%endcapture%}

{{L}}coeffs_num_low:
    {{float}} -8.9
{{L}}coeffs_num_high:
    {{float}} 8.9

{{L}}coeffs_num_alpha_13:
    {{float}} -8.488492677e-14
{{L}}coeffs_num_alpha_11:
    {{float}} 5.277853000e-11
{{L}}coeffs_num_alpha_9:
    {{float}} -2.022500419e-8
{{L}}coeffs_num_alpha_7:
    {{float}} 0.00001115424833
{{L}}coeffs_num_alpha_5:
    {{float}} 0.003103950131
{{L}}coeffs_num_alpha_3:
    {{float}} 0.1308400453
{{L}}coeffs_num_alpha_1:
    {{float}} 0.9999999934

{{L}}coeffs_num_beta_6:
    {{float}} 0.0002546136580
{{L}}coeffs_num_beta_4:
    {{float}} 0.02449515379
{{L}}coeffs_num_beta_2:
    {{float}} 0.4641733162
{{L}}coeffs_num_beta_0:
    {{float}} 1.0


{% if msvc %}
fma_tanh_f32_{{suffix}} endp
_text ends
end
{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/fma/postamble.tmpliq
================================================
{{L}}return:
    ldmxcsr     [rsp + 4]
    add         rsp, 8

    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx

{% if family == "windows" %}
    pop rsi
    pop rdi

    vmovaps xmm15, [rsp+16*9]
    vmovaps xmm14, [rsp+16*8]
    vmovaps xmm13, [rsp+16*7]
    vmovaps xmm12, [rsp+16*6]
    vmovaps xmm11, [rsp+16*5]
    vmovaps xmm10, [rsp+16*4]
    vmovaps xmm9, [rsp+16*3]
    vmovaps xmm8, [rsp+16*2]
    vmovaps xmm7, [rsp+16*1]
    vmovaps xmm6, [rsp]
{% endif %}

    mov rsp, rbp
    pop rbp
    ret

{% if msvc %}
fma_mmm_{{type}}_{{size}}_{{suffix}} endp
_text ends
end

{% else %}
.cfi_endproc
{% endif %}


================================================
FILE: linalg/x86_64/fma/preamble.tmpliq
================================================

{% if msvc %}

_text segment
fma_mmm_{{type}}_{{size}}_{{suffix}} proc

{% else %}

.intel_syntax noprefix
.text
.p2align 5
.globl {{G}}fma_mmm_{{type}}_{{size}}_{{suffix}}
{{G}}fma_mmm_{{type}}_{{size}}_{{suffix}}:
.cfi_startproc

{% endif %}

    push        rbp
    mov         rbp, rsp

{% if family == "windows" %}
// https://www.agner.org/optimize/calling_conventions.pdf xmm6-15 are not scratch
// https://stackoverflow.com/questions/43358429/save-value-of-xmm-registers
    and rsp,-16
    lea rsp,[rsp-160]
    vmovaps [rsp], xmm6
    vmovaps [rsp+16*1],xmm7
    vmovaps [rsp+16*2],xmm8
    vmovaps [rsp+16*3],xmm9
    vmovaps [rsp+16*4],xmm10
    vmovaps [rsp+16*5],xmm11
    vmovaps [rsp+16*6],xmm12
    vmovaps [rsp+16*7],xmm13
    vmovaps [rsp+16*8],xmm14
    vmovaps [rsp+16*9],xmm15

    push        rdi
    push        rsi

    mov         rdi, rcx

{% endif %}

    push        rbx
    push        r12
    push        r13
    push        r14
    push        r15

    sub         rsp, 8

{% if family == "unix" %}
.cfi_def_cfa_offset 64
{% endif %}
    stmxcsr     [rsp + 4]
{% if msvc %}
    mov         rax, 1FC0h
{% else %}
    mov         rax, 0x1FC0
{% endif %}
    mov         [rsp], eax
    ldmxcsr     [rsp]

{% include "dispatcher.tmpliq" %}


================================================
FILE: metal/Cargo.toml
================================================
[package]
name = "tract-metal"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = [
	"Hubert de La Jonquière <hubert.delajonquiere@sonos.com>",
	"Mathieu Poumeyrol <kali@zoy.org>",
]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks", "Metal" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
anyhow.workspace = true
derive-new.workspace = true
downcast-rs.workspace = true
inventory.workspace = true
log.workspace = true
metal.workspace = true
objc = { version = "0.2.7" }
num-traits.workspace = true
tract-core.workspace = true
tract-pulse-opl.workspace = true
tract-transformers.workspace = true
tract-gpu.workspace = true

[features]
default = [ ]

[dev-dependencies]
criterion.workspace = true
proptest.workspace = true
rand.workspace = true
ggml = { git = "https://github.com/rustformers/llm.git", rev="9376078", features = ["metal"] }

[[bench]]
name = "metal_gemm"
harness = false


================================================
FILE: metal/README.md
================================================
# tract-metal

## Updating Metal Flash Attention library

```
git clone https://github.com/philipturner/metal-flash-attention.git
cd metal-flash-attention

# for iOS
swift build.swift --platform iOS --xcode-path /Applications/Xcode.app
cp build/lib/libMetalFlashAttention.metallib path/to/tract/metal/src/kernels/libMetalFlashAttention-ios.metallib

# for MacOS
swift build.swift --platform macOS --xcode-path /Applications/Xcode.app
cp build/lib/libMetalFlashAttention.metallib path/to/tract/metal/src/kernels/libMetalFlashAttention-macos.metallib
```

================================================
FILE: metal/benches/metal_gemm.rs
================================================
use crate::matmul::{BasicMatMul, GemmImpl, GemmKernel, MfaGemm, MlxGemm};
use criterion::measurement::WallTime;
use criterion::*;
use ggml::Context;
use tract_core::internal::*;
use tract_gpu::tensor::IntoDevice;
use tract_linalg::mmm::AsInputValue;
use tract_metal::MetalStream;
use tract_metal::kernels::matmul::GgmlGemm;
use tract_metal::kernels::{LibraryName, matmul};

pub fn ggml_matmul(
    crit: &mut BenchmarkGroup<WallTime>,
    m: usize,
    k: usize,
    n: usize,
    dt: DatumType,
) {
    let ggml_dt = match dt {
        DatumType::F32 => ggml::Type::F32,
        DatumType::F16 => ggml::Type::F16,
        _ => unimplemented!(),
    };

    let ctxt = Context::new_with_allocate(500_000_000);

    let mut t = ctxt.new_tensor_3d(ggml_dt, 1, 2, 3);
    t.zero_data();

    let mut a = ctxt.new_tensor_2d(ggml_dt, k, m);
    a.zero_data();
    let mut b = ctxt.new_tensor_2d(ggml_dt, k, n); // intern transposition
    b.zero_data();

    crit.bench_function(&format!("ggml_{:?}", dt), |be| {
        be.iter(|| {
            let ctxt = Context::new_with_allocate(500_000_000);
            let mut a = ctxt.new_tensor_2d(ggml_dt, k, m);
            a.zero_data();
            let mut b = ctxt.new_tensor_2d(ggml_dt, k, n); // intern transposition
            b.zero_data();
            let c = ctxt.op_mul_mat(&a, &b);
            let mut graph = ctxt.create_compute_graph();
            graph.build_forward_expand(&c);

            let mut execution_plan = ggml::GraphExecutionPlan::new(&mut graph, 1);
            execution_plan.execute(&ctxt);
        });
    });
}

pub fn tract_with_packing(
    crit: &mut BenchmarkGroup<WallTime>,
    batch: usize,
    m: usize,
    k: usize,
    n: usize,
    dt: DatumType,
) {
    use tract_linalg::mmm::FusedSpec;
    let a = Tensor::zero_dt(dt, &[batch, m, k]).unwrap();
    let b = Tensor::zero_dt(dt, &[batch, k, n]).unwrap();
    let mut c = Tensor::zero_dt(dt, &[m, n]).unwrap();

    // mk,kn -> mn
    unsafe {
        let mmm = tract_linalg::ops().mmm(dt, Some(m), Some(k), Some(n)).unwrap();

        let c_storage = mmm.c_view(Some(0), Some(1));

        let mut scratch = mmm.allocate_scratch_space();

        let (packer_a, packer_b) = &mmm.packings()[0];

        crit.bench_function(&format!("tract_with_packing_{:?}", dt), |be| {
            let packed_a = packer_a.prepare_one(&a, 1, 0).unwrap();
            let packed_b = packer_b.prepare_one(&b, 0, 1).unwrap();

            be.iter(|| {
                mmm.run_with_scratch_space(
                    m,
                    n,
                    &mut *scratch,
                    &[
                        FusedSpec::AddMatMul {
                            packing: 0,
                            a: AsInputValue::Borrowed(&(*packed_a)),
                            b: AsInputValue::Borrowed(&(*packed_b)),
                        },
                        FusedSpec::Store(c_storage.wrap(&mut c.view_mut())),
                    ],
                )
                .unwrap()
            });
        });
    }
}

pub fn metal_gemm<K: GemmKernel>(
    crit: &mut BenchmarkGroup<WallTime>,
    batch: usize,
    m: usize,
    k: usize,
    n: usize,
    dt: DatumType,
    is_ggml: bool,
) {
    let stream = MetalStream::new();
    stream.load_library(LibraryName::MfaLib).unwrap();

    let a = Tensor::zero_dt(dt, &[batch, m, k]).unwrap();
    let b = if is_ggml {
        Tensor::zero_dt(dt, &[batch, n, k]).unwrap()
    } else {
        Tensor::zero_dt(dt, &[batch, k, n]).unwrap()
    };

    let metal_a = a.into_device().unwrap();
    let metal_b = b.into_device().unwrap();
    // Warmup
    let _ = GemmImpl::<MfaGemm>::default().eval(&stream, &metal_a, &metal_b).unwrap();

    crit.bench_function(&format!("tract_metal_gemm_{}_{:?}", K::name(), dt), |be| {
        be.iter(|| {
            let _ = GemmImpl::<K>::new(false, is_ggml).eval(&stream, &metal_a, &metal_b).unwrap();
        });
    });
}

fn matmul(c: &mut Criterion, b: usize, m: usize, k: usize, n: usize) {
    let mut c = c.benchmark_group(format!("{}x{}x{}x{}", b, m, k, n));
    c.throughput(Throughput::Elements((m * k * n) as _));
    // ggml_matmul(&mut c, m, k, n, f32::datum_type());

    for dt in [f32::datum_type(), f16::datum_type()] {
        metal_gemm::<BasicMatMul>(&mut c, b, m, k, n, dt, false);
        metal_gemm::<MlxGemm>(&mut c, b, m, k, n, dt, false);
        metal_gemm::<MfaGemm>(&mut c, b, m, k, n, dt, false);
        metal_gemm::<GgmlGemm>(&mut c, b, m, k, n, dt, true);
    }
    // ggml_matmul(&mut c, m, k, n, f16::datum_type());
    // tract_with_packing(&mut c, b, m, k, n, f32::datum_type());
    //tract_with_packing(&mut c, b, m, k, n, f16::datum_type());
    c.finish();
}

#[allow(unused)]
fn tinyllama(c: &mut Criterion) {
    let shapes = vec![
        (32, 1, 25, 32),
        (1, 32003, 2048, 1),
        (1, 1, 2048, 32003),
        // (32003, 2048, 6),
        // (1, 32, 32),
        // (1, 4, 4),
        // (1, 4096, 4096),
        // (1, 2048, 2048),
        // (1, 1024, 1024),
        // (1, 128, 128),
        // (1, 64, 3),
        // (1, 64, 1),
        // (1, 5632, 2048),
        // (1, 3, 64),
        // (1, 64, 13),
        // (1, 12, 64),
        // (1, 2048, 5632),
        // (1, 2048, 32003),
        // (1, 2048, 2048),
        // (1, 2048, 256),
    ];
    for (b, m, k, n) in shapes {
        matmul(c, b, m, k, n);
    }
}

#[allow(unused)]
fn big(c: &mut Criterion) {
    matmul(c, 1, 2048, 2048, 1);
    matmul(c, 1, 1, 2048, 2048);
    matmul(c, 1, 2048, 2048, 2048);
    matmul(c, 1, 4096, 4096, 4096);
}

#[allow(unused)]
fn wavenet(c: &mut Criterion) {
    matmul(c, 1, 32, 32, 8);
    matmul(c, 1, 16, 60, 8);
}

#[allow(unused)]
fn asr_15_m(c: &mut Criterion) {
    matmul(c, 1, 768, 200, 24);
    matmul(c, 1, 768, 2304, 24);
    matmul(c, 1, 768, 2304, 8);
    matmul(c, 1, 768, 384, 1);
}

#[allow(unused)]
fn inception(c: &mut Criterion) {
    matmul(c, 1, 64, 288, 21609);
}

#[allow(unused)]
fn whisper_base(c: &mut Criterion) {
    matmul(c, 1, 512, 512, 1500);
}

criterion_group!(benches, tinyllama); //big, wavenet, asr_15_m, inception, whisper_base);
criterion_main!(benches);


================================================
FILE: metal/src/command_buffer.rs
================================================
use metal::{CommandBuffer, ComputeCommandEncoder, ComputeCommandEncoderRef};
use std::ops::{Deref, DerefMut};

#[derive(Debug, Clone)]
pub struct TCommandBuffer {
    inner: CommandBuffer,
    encoder: ComputeCommandEncoder,
}

impl TCommandBuffer {
    pub fn new(command_buffer: CommandBuffer) -> Self {
        let encoder = command_buffer.new_compute_command_encoder().to_owned();

        TCommandBuffer { inner: command_buffer, encoder }
    }

    pub fn encoder(&self) -> &ComputeCommandEncoder {
        &self.encoder
    }

    pub fn encode<EncodeCallback>(&self, encode_cb: EncodeCallback)
    where
        EncodeCallback: Fn(&ComputeCommandEncoderRef),
    {
        encode_cb(&self.encoder);
    }
}

impl Deref for TCommandBuffer {
    type Target = CommandBuffer;

    fn deref(&self) -> &Self::Target {
        &self.inner
    }
}

impl DerefMut for TCommandBuffer {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.inner
    }
}


================================================
FILE: metal/src/context.rs
================================================
use crate::command_buffer::TCommandBuffer;
use crate::func_constants::ConstantValues;
use crate::kernels::{LibraryContent, LibraryName};
use crate::tensor::{MValue, MetalTensor};

use metal::NSUInteger;
use tract_core::tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};
use tract_gpu::device::{DeviceBuffer, DeviceContext};
use tract_gpu::tensor::{DeviceTensor, OwnedDeviceTensor};
use tract_gpu::utils::as_q40_tensor;

use std::alloc::Layout;
use std::cell::RefCell;
use std::ffi::c_void;
use std::ops::{Deref, DerefMut};
use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, OnceLock, RwLock};

use anyhow::{Context, anyhow};
use metal::{
    Buffer, CommandQueue, CompileOptions, ComputePipelineState, Device, Function,
    FunctionConstantValues, Library, MTLResourceOptions,
};
use std::collections::HashMap;
use tract_core::internal::*;

thread_local! {
    static METAL_STREAM: RefCell<Option<MetalStream>> = const { RefCell::new(None) };
}

pub fn with_metal_stream<R>(f: impl FnOnce(&MetalStream) -> TractResult<R>) -> TractResult<R> {
    metal_context(); // ensures context is initialized
    METAL_STREAM.with(|cell| {
        let needs_init = cell.borrow().is_none();
        if needs_init {
            let stream = MetalStream::new();
            *cell.borrow_mut() = Some(stream);
        }
        let borrow = cell.borrow();
        f(borrow.as_ref().unwrap())
    })
}

pub fn metal_context() -> MetalContext {
    static INSTANCE: OnceLock<MetalContext> = OnceLock::new();
    INSTANCE
        .get_or_init(|| {
            let ctxt = MetalContext::new().expect("Could not create Metal context");
            tract_gpu::device::set_context(Box::new(ctxt.clone()))
                .expect("Could not set Metal context");
            ctxt
        })
        .clone()
}

#[derive(Debug, Clone)]
pub struct MetalContext {
    device: Device,
    cache_libraries: Arc<RwLock<HashMap<LibraryName, Library>>>,
    #[allow(clippy::type_complexity)]
    cache_pipelines:
        Arc<RwLock<HashMap<(LibraryName, String, Option<ConstantValues>), ComputePipelineState>>>,
}

impl MetalContext {
    pub fn new() -> TractResult<Self> {
        let device = Device::system_default()
            .with_context(|| "Could not find system default Metal device")?;

        let ctxt = Self {
            device,
            cache_libraries: Arc::new(RwLock::new(HashMap::new())),
            cache_pipelines: Arc::new(RwLock::new(HashMap::new())),
        };
        ctxt.preload_pipelines()?;
        Ok(ctxt)
    }

    pub fn preload_pipelines(&self) -> TractResult<()> {
        for ew_func in crate::kernels::element_wise::all_functions() {
            let _ = self.load_pipeline(LibraryName::ElementWiseOps, &ew_func);
        }
        for bin_func in crate::kernels::bin_ops::all_functions() {
            let _ = self.load_pipeline(LibraryName::BinOps, &bin_func);
        }
        for func in crate::kernels::array::all_functions() {
            let _ = self.load_pipeline(LibraryName::ArrayOps, &func);
        }
        for func in crate::kernels::nn::all_functions() {
            let _ = self.load_pipeline(LibraryName::NNOps, &func);
        }
        Ok(())
    }

    pub fn load_library(&self, name: LibraryName) -> TractResult<Library> {
        {
            let cache_libraries = self.cache_libraries.read().map_err(|e| anyhow!("{:?}", e))?;
            if let Some(library) = cache_libraries.get(&name) {
                return Ok(library.clone());
            }
        }
        let mut cache_libraries = self.cache_libraries.write().map_err(|e| anyhow!("{:?}", e))?;
        let library = match name.content() {
            LibraryContent::Data(lib_data) => self
                .device
                .new_library_with_data(lib_data)
                .map_err(|e| anyhow!("{}", e))
                .with_context(|| {
                    format!("Error while loading Metal library from data: {:?}", name)
                })?,
            LibraryContent::Source(lib_source) => self
                .device
                .new_library_with_source(lib_source, &CompileOptions::new())
                .map_err(|e| anyhow!("{}", e))
                .with_context(|| {
                    format!("Error while loading Metal library from source: {:?}", name)
                })?,
        };
        cache_libraries.insert(name, library.clone());
        Ok(library)
    }

    pub fn load_function(
        &self,
        library_name: LibraryName,
        func_name: &str,
        constants: Option<FunctionConstantValues>,
    ) -> TractResult<Function> {
        let func = self
            .load_library(library_name)?
            .get_function(func_name, constants)
            .map_err(|e| anyhow!("{}", e))
            .with_context(|| {
                format!(
                    "Error while loading function {func_name} from library: {:?} with constants",
                    library_name
                )
            })?;
        Ok(func)
    }

    pub(crate) fn load_pipeline_with_constants(
        &self,
        library_name: LibraryName,
        func_name: &str,
        constants: Option<ConstantValues>,
    ) -> TractResult<ComputePipelineState> {
        let key = (library_name, func_name.to_string(), constants);
        {
            let cache_pipelines = self.cache_pipelines.read().map_err(|e| anyhow!("{:?}", e))?;
            if let Some(pipeline) = cache_pipelines.get(&key) {
                return Ok(pipeline.clone());
            }
        }
        let mut cache_pipelines = self.cache_pipelines.write().map_err(|e| anyhow!("{:?}", e))?;

        let (library_name, func_name, constants) = key;
        let func = self.load_function(
            library_name,
            &func_name,
            constants.as_ref().map(|c| c.function_constant_values()),
        )?;
        let pipeline = self.device
            .new_compute_pipeline_state_with_function(&func)
            .map_err(|e| anyhow!("{}", e))
            .with_context(|| format!("Error while creating compute pipeline for function {func_name} from source: {:?}", library_name))?;
        cache_pipelines.insert((library_name, func_name.to_string(), constants), pipeline.clone());
        Ok(pipeline)
    }

    pub fn load_pipeline(
        &self,
        library_name: LibraryName,
        func_name: &str,
    ) -> TractResult<ComputePipelineState> {
        self.load_pipeline_with_constants(library_name, func_name, None)
    }
}

impl DeviceContext for MetalContext {
    fn synchronize(&self) -> TractResult<()> {
        with_metal_stream(|stream| stream.wait_until_completed())
    }

    fn tensor_to_device(&self, tensor: TValue) -> TractResult<Box<dyn OwnedDeviceTensor>> {
        let view = tensor.view();
        ensure!(
            DeviceTensor::is_supported_dt(view.datum_type()),
            "Tensor of {:?} is not copied. No device buffer can be allocated for it.",
            view.datum_type(),
        );
        let bqs = as_q40_tensor(view.tensor);

        let (data_bytes, bqf) = if let Some(bqs) = bqs {
            (
                bqs.value().as_bytes(),
                Some(Box::new(BlockQuantFact::new(
                    tract_core::dyn_clone::clone_box(bqs.format()),
                    tensor.view().tensor.shape().into(),
                )) as Box<dyn ExoticFact>),
            )
        } else {
            (view.tensor.as_bytes(), None)
        };

        // Handle empty data
        static ZERO: [u8; 1] = [0];
        let data = if data_bytes.is_empty() { &ZERO } else { data_bytes };

        let size = core::mem::size_of_val(data) as NSUInteger;
        let device_buffer = MetalBuffer {
            inner: self.device.new_buffer_with_bytes_no_copy(
                data.as_ptr() as *const core::ffi::c_void,
                size,
                MTLResourceOptions::StorageModeShared,
                None,
            ),
        };

        Ok(Box::new(MetalTensor {
            inner: MValue::Natural(tensor.into_arc_tensor()),
            device_buffer,
            exotic_fact: bqf,
        }))
    }

    fn uninitialized_device_tensor(
        &self,
        shape: &[usize],
        dt: DatumType,
    ) -> TractResult<Box<dyn OwnedDeviceTensor>> {
        let tensor = unsafe {
            Tensor::uninitialized_dt(dt, shape).with_context(|| {
                format!("Error while allocating a {dt:?} tensor of shape {shape:?}")
            })?
        };
        self.tensor_to_device(tensor.into())
    }

    fn uninitialized_device_exotic_tensor(
        &self,
        exotic_fact: Box<dyn ExoticFact>,
    ) -> TractResult<Box<dyn OwnedDeviceTensor>> {
        if let Some(bqf) = exotic_fact.downcast_ref::<BlockQuantFact>() {
            let blocks = bqf.shape().iter().product::<usize>() / bqf.format.block_len();
            let blob = unsafe {
                Blob::for_layout(
                    Layout::from_size_align(blocks * bqf.format.block_bytes(), vector_size())
                        .unwrap(),
                )
            };
            let tensor =
                BlockQuantStorage::new(bqf.format.clone(), bqf.m(), bqf.k(), Arc::new(blob))?
                    .into_tensor_with_shape(f32::datum_type(), bqf.shape());
            self.tensor_to_device(tensor.into())
        } else {
            bail!("Only BlockQuant Tensor allocation supported for now")
        }
    }

    fn copy_nd(
        &self,
        input: &DeviceTensor,
        input_offset: usize,
        input_strides: &[isize],
        output: &DeviceTensor,
        output_offset: usize,
        output_shape: &[usize],
        output_strides: &[isize],
    ) -> TractResult<()> {
        crate::kernels::array::metal_copy_nd_dispatch(
            input,
            input_offset,
            input_strides,
            output,
            output_offset,
            output_shape,
            output_strides,
        )
    }
}

#[derive(Debug)]
pub struct MetalStream {
    context: MetalContext,
    command_queue: CommandQueue,
    command_buffer: RefCell<Option<TCommandBuffer>>,
    command_buffer_id: AtomicUsize,
    retained_tensors: RefCell<Vec<DeviceTensor>>,
}

impl Default for MetalStream {
    fn default() -> Self {
        Self::new()
    }
}

impl MetalStream {
    pub fn new() -> Self {
        let context = metal_context();
        let command_queue = context.device.new_command_queue();
        Self {
            context,
            command_queue,
            command_buffer: RefCell::new(None),
            command_buffer_id: AtomicUsize::new(0),
            retained_tensors: RefCell::new(vec![]),
        }
    }

    pub fn load_library(&self, name: LibraryName) -> TractResult<Library> {
        self.context.load_library(name)
    }

    pub fn load_pipeline(
        &self,
        library_name: LibraryName,
        func_name: &str,
    ) -> TractResult<ComputePipelineState> {
        self.context.load_pipeline(library_name, func_name)
    }

    pub(crate) fn load_pipeline_with_constants(
        &self,
        library_name: LibraryName,
        func_name: &str,
        constants: Option<ConstantValues>,
    ) -> TractResult<ComputePipelineState> {
        self.context.load_pipeline_with_constants(library_name, func_name, constants)
    }

    pub fn retain_tensor(&self, tensor: &DeviceTensor) {
        self.retained_tensors.borrow_mut().push(tensor.clone());
    }

    pub fn command_buffer(&self) -> TCommandBuffer {
        self.command_buffer
            .borrow_mut()
            .get_or_insert_with(|| {
                TCommandBuffer::new(self.command_queue.new_command_buffer().to_owned())
            })
            .to_owned()
    }

    pub fn wait_until_completed(&self) -> TractResult<()> {
        let Some(command_buffer) = self.command_buffer.borrow().to_owned() else { return Ok(()) };

        command_buffer.encoder().end_encoding();

        match command_buffer.status() {
            metal::MTLCommandBufferStatus::Committed
            | metal::MTLCommandBufferStatus::Scheduled
            | metal::MTLCommandBufferStatus::Completed => {
                anyhow::bail!("Current Metal command buffer is already committed.")
            }
            _ => {}
        }
        let command_buffer_id = self.command_buffer_id.load(Ordering::Relaxed);
        command_buffer.commit();
        log::trace!("Command buffer {:?} commit", command_buffer_id);
        command_buffer.wait_until_completed();
        log::trace!("Command buffer {:?} has completed (Blocking call)", command_buffer_id);

        // Clear local retained values used by the command buffer
        self.retained_tensors.borrow_mut().clear();

        *self.command_buffer.borrow_mut() = None;
        Ok(())
    }

    pub fn capture_trace<P, F>(&self, path: P, compute: F) -> TractResult<()>
    where
        P: AsRef<Path>,
        F: FnOnce(&Self) -> TractResult<()>,
    {
        self.wait_until_completed()?;

        anyhow::ensure!(path.as_ref().is_absolute());

        let capture = metal::CaptureManager::shared();
        let descriptor = metal::CaptureDescriptor::new();
        descriptor.set_destination(metal::MTLCaptureDestination::GpuTraceDocument);
        descriptor.set_capture_device(&self.context.device);
        descriptor.set_output_url(path);

        capture.start_capture(&descriptor).map_err(|e| anyhow!("Error Metal Capture: {:?}", e))?;

        (compute)(self)?;

        self.wait_until_completed()?;
        capture.stop_capture();
        Ok(())
    }
}

impl Drop for MetalStream {
    fn drop(&mut self) {
        let Some(command_buffer) = self.command_buffer.borrow_mut().to_owned() else { return };

        match command_buffer.status() {
            metal::MTLCommandBufferStatus::Committed
            | metal::MTLCommandBufferStatus::Scheduled
            | metal::MTLCommandBufferStatus::Completed => {
                panic!("Current Metal command buffer is already committed.")
            }
            _ => {}
        }

        command_buffer.encoder().end_encoding();
        command_buffer.commit();
        command_buffer.wait_until_completed();
    }
}

#[derive(Debug, Clone)]
pub struct MetalBuffer {
    pub inner: Buffer,
}

impl PartialEq for MetalBuffer {
    fn eq(&self, other: &Self) -> bool {
        self.inner.length() == other.inner.length() && self.inner.length() == other.inner.length()
    }
}
impl Eq for MetalBuffer {}

impl Deref for MetalBuffer {
    type Target = Buffer;

    fn deref(&self) -> &Self::Target {
        &self.inner
    }
}

impl DerefMut for MetalBuffer {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.inner
    }
}
impl DeviceBuffer for MetalBuffer {
    fn ptr(&self) -> *const c_void {
        self.inner.gpu_address() as *const c_void
    }
}


================================================
FILE: metal/src/encoder.rs
================================================
use metal::{ComputeCommandEncoderRef, MTLResourceUsage};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

use crate::utils::get_metal_buffer;

pub trait EncoderExt {
    fn set_metal_tensor(&self, idx: u64, t: &DeviceTensor, usage: MTLResourceUsage);
    fn set_metal_tensor_with_offset(
        &self,
        idx: u64,
        t: &DeviceTensor,
        offset: u64,
        usage: MTLResourceUsage,
    );
    fn set_tensor(&self, idx: u64, t: &Tensor);
    fn set_slice<T: Copy>(&self, idx: u64, data: &[T]);
}

impl EncoderExt for &ComputeCommandEncoderRef {
    fn set_metal_tensor(&self, idx: u64, t: &DeviceTensor, usage: MTLResourceUsage) {
        let buffer = get_metal_buffer(t);
        self.set_buffer(idx, Some(buffer), t.buffer_offset());
        self.use_resource(buffer, usage);
    }

    fn set_metal_tensor_with_offset(
        &self,
        idx: u64,
        t: &DeviceTensor,
        offset: u64,
        usage: MTLResourceUsage,
    ) {
        let buffer = get_metal_buffer(t);
        self.set_buffer(idx, Some(buffer), t.buffer_offset::<u64>() + offset);
        self.use_resource(buffer, usage);
    }

    fn set_tensor(&self, idx: u64, t: &Tensor) {
        self.set_bytes(idx, (t.datum_type().size_of() * t.len()) as _, unsafe {
            t.as_ptr_unchecked::<u8>()
        } as *const _);
    }

    fn set_slice<T: Copy>(&self, idx: u64, data: &[T]) {
        self.set_bytes(idx, std::mem::size_of_val(data) as _, data.as_ptr() as *const _)
    }
}


================================================
FILE: metal/src/func_constants.rs
================================================
use metal::{FunctionConstantValues, MTLDataType};
use std::ffi::c_void;

/// From candle-metal-kernels
#[derive(Debug, PartialEq)]
pub enum Value {
    USize(usize),
    Bool(bool),
    F32(f32),
    U16(u16),
}

impl std::hash::Hash for Value {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        match self {
            Value::F32(v) => v.to_bits().hash(state),
            Value::USize(v) => v.hash(state),
            Value::U16(v) => v.hash(state),
            Value::Bool(v) => v.hash(state),
        }
    }
}

impl Value {
    fn data_type(&self) -> MTLDataType {
        match self {
            Value::USize(_) => MTLDataType::UInt,
            Value::F32(_) => MTLDataType::Float,
            Value::U16(_) => MTLDataType::UShort,
            Value::Bool(_) => MTLDataType::Bool,
        }
    }
}

// Not true, good enough for our purposes.
impl Eq for Value {}

/// From candle-metal-kernels
#[derive(Debug, Eq, PartialEq, Hash)]
pub(crate) struct ConstantValues(Vec<(usize, Value)>);

impl ConstantValues {
    pub fn new(values: Vec<(usize, Value)>) -> Self {
        Self(values)
    }

    pub fn function_constant_values(&self) -> FunctionConstantValues {
        let f = FunctionConstantValues::new();
        for (index, value) in &self.0 {
            let ty = value.data_type();
            match value {
                Value::USize(v) => {
                    f.set_constant_value_at_index(
                        v as *const usize as *const c_void,
                        ty,
                        *index as u64,
                    );
                }
                Value::F32(v) => {
                    f.set_constant_value_at_index(
                        v as *const f32 as *const c_void,
                        ty,
                        *index as u64,
                    );
                }
                Value::U16(v) => {
                    f.set_constant_value_at_index(
                        v as *const u16 as *const c_void,
                        ty,
                        *index as u64,
                    );
                }
                Value::Bool(v) => {
                    f.set_constant_value_at_index(
                        v as *const bool as *const c_void,
                        ty,
                        *index as u64,
                    );
                }
            }
        }
        f
    }
}


================================================
FILE: metal/src/kernels/array/array_ops.metal
================================================
#include <metal_integer>
#include <metal_math>
#include <metal_simdgroup_matrix> // Available from Metal version 2.3 released with OS X 11.0+
#include <metal_stdlib>

using namespace metal;

namespace utils {
METAL_FUNC uint indices_to_idx_2(uint2 indices,
                                 constant const size_t strides[2]) {
    return indices.x * strides[1] + indices.y * strides[0];
}

// Returns offset for iterating over most inner axis
METAL_FUNC uint indices_to_outer_idx(uint3 indices,
                                     constant const size_t *shape,
                                     constant const size_t *strides,
                                     size_t rank) {
    if (rank == 1) {
        return 0;
    } else if (rank == 2) {
        return indices.x * strides[0];
    } else {
        auto idx =
            indices.x * strides[rank - 2] + indices.y * strides[rank - 3];

        for (int32_t i = rank - 4; i >= 0; i--) {
            idx += (indices.z % shape[i]) * strides[i];
            indices.z /= shape[i];
        }
        return idx;
    }
}
} // namespace utils

#define INSTANTIATE_COPY(tname, type)                                          \
    template [[host_name(                                                      \
        "array_ops::copy_nd1_" #tname)]] [[kernel]] copy_nd1_t copy_nd1<type>; \
    template [[host_name(                                                      \
        "array_ops::copy_nd2_" #tname)]] [[kernel]] copy_nd2_t copy_nd2<type>; \
    template [[host_name(                                                      \
        "array_ops::copy_nd3_" #tname)]] [[kernel]] copy_nd3_t copy_nd3<type>; \
    template [[host_name(                                                      \
        "array_ops::copy_nd4_" #tname)]] [[kernel]] copy_nd4_t copy_nd4<type>; \
    template [[host_name(                                                      \
        "array_ops::copy_nd5_" #tname)]] [[kernel]] copy_nd5_t copy_nd5<type>; \
    template [[host_name(                                                      \
        "array_ops::copy_nd6_" #tname)]] [[kernel]] copy_nd6_t copy_nd6<type>; \
    template [[host_name(                                                      \
        "array_ops::copy_unicast_" #tname)]] [[kernel]] copy_unicast_t         \
        copy_unicast<type>;

#define INSTANTIATE_CAST_OP(tname, itype, otype)                               \
    template [[host_name(                                                      \
        "array_ops::cast_" #tname)]] [[kernel]] cast_t cast<itype, otype>;

template <typename In, typename Out>
[[kernel]] void cast(device const void *input_b [[buffer(0)]],
                     device void *output_b [[buffer(1)]],
                     uint tpig [[thread_position_in_grid]]) {
    device const In *input = (device const In *)input_b;
    device Out *output = (device Out *)output_b;
    output[tpig] = static_cast<Out>(input[tpig]);
}

typedef decltype(cast<float, float>) cast_t;

template <typename T>
[[kernel]] void copy_unicast(device const void *input_b [[buffer(0)]],
                             device void *output_b [[buffer(1)]],
                             uint tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;
    output[tpig] = input[tpig];
}

typedef decltype(copy_unicast<float>) copy_unicast_t;

template <typename T>
[[kernel]] void copy_nd1(device const void *input_b [[buffer(0)]],
                         constant const size_t *input_strides [[buffer(1)]],
                         device void *output_b [[buffer(2)]],
                         constant const size_t *out_shape [[buffer(3)]],
                         constant const size_t *out_strides [[buffer(4)]],
                         uint3 tgpig [[threadgroup_position_in_grid]],
                         ushort3 tpitg [[thread_position_in_threadgroup]],
                         ushort3 ntg [[threads_per_threadgroup]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;
    for (size_t i = tpitg.x; i < out_shape[0]; i += ntg.x) {
        output[i] = input[i * input_strides[0]];
    }
}

typedef decltype(copy_nd1<float>) copy_nd1_t;

template <typename T>
[[kernel]] void copy_nd2(device const void *input_b [[buffer(0)]],
                         constant const size_t *input_strides [[buffer(1)]],
                         device void *output_b [[buffer(2)]],
                         constant const size_t *out_shape [[buffer(3)]],
                         constant const size_t *out_strides [[buffer(4)]],
                         uint3 tgpig [[threadgroup_position_in_grid]],
                         ushort3 tpitg [[thread_position_in_threadgroup]],
                         ushort3 ntg [[threads_per_threadgroup]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    auto idx = utils::indices_to_outer_idx(tgpig, out_shape, input_strides, 2);
    auto out_idx =
        utils::indices_to_outer_idx(tgpig, out_shape, out_strides, 2);
    for (size_t i = tpitg.x; i < out_shape[1]; i += ntg.x) {
        output[out_idx + i] = input[idx + i * input_strides[1]];
    }
}

typedef decltype(copy_nd2<float>) copy_nd2_t;

template <typename T>
[[kernel]] void copy_nd3(device const void *input_b [[buffer(0)]],
                         constant const size_t *input_strides [[buffer(1)]],
                         device void *output_b [[buffer(2)]],
                         constant const size_t *out_shape [[buffer(3)]],
                         constant const size_t *out_strides [[buffer(4)]],
                         uint3 tgpig [[threadgroup_position_in_grid]],
                         ushort3 tpitg [[thread_position_in_threadgroup]],
                         ushort3 ntg [[threads_per_threadgroup]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    auto idx = utils::indices_to_outer_idx(tgpig, out_shape, input_strides, 3);
    auto out_idx =
        utils::indices_to_outer_idx(tgpig, out_shape, out_strides, 3);
    for (size_t i = tpitg.x; i < out_shape[2]; i += ntg.x) {
        output[out_idx + i] = input[idx + i * input_strides[2]];
    }
}

typedef decltype(copy_nd3<float>) copy_nd3_t;

template <typename T>
[[kernel]] void copy_nd4(device const void *input_b [[buffer(0)]],
                         constant const size_t *input_strides [[buffer(1)]],
                         device void *output_b [[buffer(2)]],
                         constant const size_t *out_shape [[buffer(3)]],
                         constant const size_t *out_strides [[buffer(4)]],
                         uint3 tgpig [[threadgroup_position_in_grid]],
                         ushort3 tpitg [[thread_position_in_threadgroup]],
                         ushort3 ntg [[threads_per_threadgroup]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    auto idx = utils::indices_to_outer_idx(tgpig, out_shape, input_strides, 4);
    auto out_idx =
        utils::indices_to_outer_idx(tgpig, out_shape, out_strides, 4);
    for (size_t i = tpitg.x; i < out_shape[3]; i += ntg.x) {
        output[out_idx + i] = input[idx + i * input_strides[3]];
    }
}

typedef decltype(copy_nd4<float>) copy_nd4_t;

template <typename T>
[[kernel]] void copy_nd5(device const void *input_b [[buffer(0)]],
                         constant const size_t *input_strides [[buffer(1)]],
                         device void *output_b [[buffer(2)]],
                         constant const size_t *out_shape [[buffer(3)]],
                         constant const size_t *out_strides [[buffer(4)]],
                         uint3 tgpig [[threadgroup_position_in_grid]],
                         ushort3 tpitg [[thread_position_in_threadgroup]],
                         ushort3 ntg [[threads_per_threadgroup]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    auto idx = utils::indices_to_outer_idx(tgpig, out_shape, input_strides, 5);
    auto out_idx =
        utils::indices_to_outer_idx(tgpig, out_shape, out_strides, 5);
    for (size_t i = tpitg.x; i < out_shape[4]; i += ntg.x) {
        output[out_idx + i] = input[idx + i * input_strides[4]];
    }
}

typedef decltype(copy_nd5<float>) copy_nd5_t;

template <typename T>
[[kernel]] void copy_nd6(device const void *input_b [[buffer(0)]],
                         constant const size_t *input_strides [[buffer(1)]],
                         device void *output_b [[buffer(2)]],
                         constant const size_t *out_shape [[buffer(3)]],
                         constant const size_t *out_strides [[buffer(4)]],
                         uint3 tgpig [[threadgroup_position_in_grid]],
                         ushort3 tpitg [[thread_position_in_threadgroup]],
                         ushort3 ntg [[threads_per_threadgroup]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    auto idx = utils::indices_to_outer_idx(tgpig, out_shape, input_strides, 6);
    auto out_idx =
        utils::indices_to_outer_idx(tgpig, out_shape, out_strides, 6);
    for (size_t i = tpitg.x; i < out_shape[5]; i += ntg.x) {
        output[out_idx + i] = input[idx + i * input_strides[5]];
    }
}

typedef decltype(copy_nd6<float>) copy_nd6_t;

// Rotate half of the input buffer
//
// Y = Concat(Neg(Slice(X, X.shape[-1]/2.., -1)), Slice(X, ..X.shape[-1]/2, -1))
//
template <typename T>
[[kernel]] void rotate_half_nd2(device const void *input_b [[buffer(0)]],
                                device void *output_b [[buffer(1)]],
                                constant const size_t *shape [[buffer(2)]],
                                constant const size_t *strides [[buffer(3)]],
                                uint2 tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    uint2 rotated_tpig = tpig;
    rotated_tpig.x += shape[1] / 2;

    // output[tpig] = -1 * input[rotated_tpig]
    // output[rotated_tpig] = input[tpig]

    auto rotated_idx = utils::indices_to_idx_2(rotated_tpig, strides);
    auto out_idx = utils::indices_to_idx_2(tpig, strides);

    output[out_idx] = -input[rotated_idx];

    auto idx = utils::indices_to_idx_2(tpig, strides);
    auto rotated_out_idx = utils::indices_to_idx_2(rotated_tpig, strides);

    output[rotated_out_idx] = input[idx];
}

typedef decltype(rotate_half_nd2<float>) rotate_half_nd2_t;

#define INSTANTIATE_ROTATE_HALF_OP(tname, type)                                \
    template [[host_name(                                                      \
        "array_ops::rotate_half_nd2_" #tname)]] [[kernel]] rotate_half_nd2_t   \
        rotate_half_nd2<type>;

// Copy kernels: only u8/u16/u32/u64 (copy is type-size based)
INSTANTIATE_COPY(u8, uint8_t)
INSTANTIATE_COPY(u16, uint16_t)
INSTANTIATE_COPY(u32, uint32_t)
INSTANTIATE_COPY(u64, uint64_t)

// Cast kernels: all types
#define INSTANTIATE_CAST_FROM(tname, type)                                     \
    INSTANTIATE_CAST_OP(tname##_bool, type, bool)                              \
    INSTANTIATE_CAST_OP(tname##_f32, type, float)                              \
    INSTANTIATE_CAST_OP(tname##_f16, type, half)                               \
    INSTANTIATE_CAST_OP(tname##_u8, type, uint8_t)                             \
    INSTANTIATE_CAST_OP(tname##_u16, type, uint16_t)                           \
    INSTANTIATE_CAST_OP(tname##_u32, type, uint32_t)                           \
    INSTANTIATE_CAST_OP(tname##_u64, type, uint64_t)                           \
    INSTANTIATE_CAST_OP(tname##_i8, type, int8_t)                              \
    INSTANTIATE_CAST_OP(tname##_i16, type, int16_t)                            \
    INSTANTIATE_CAST_OP(tname##_i32, type, int32_t)                            \
    INSTANTIATE_CAST_OP(tname##_i64, type, int64_t)

INSTANTIATE_CAST_FROM(bool, bool)
INSTANTIATE_CAST_FROM(f32, float)
INSTANTIATE_CAST_FROM(f16, half)
INSTANTIATE_CAST_FROM(i8, int8_t)
INSTANTIATE_CAST_FROM(i16, int16_t)
INSTANTIATE_CAST_FROM(i32, int32_t)
INSTANTIATE_CAST_FROM(i64, int64_t)
INSTANTIATE_CAST_FROM(u8, uint8_t)
INSTANTIATE_CAST_FROM(u16, uint16_t)
INSTANTIATE_CAST_FROM(u32, uint32_t)
INSTANTIATE_CAST_FROM(u64, uint64_t)

// Rotate half: only float types
INSTANTIATE_ROTATE_HALF_OP(f32, float)
INSTANTIATE_ROTATE_HALF_OP(f16, half)


================================================
FILE: metal/src/kernels/array/cast.rs
================================================
use crate::encoder::EncoderExt;

use crate::{LibraryName, MetalStream};
use derive_new::new;
use metal::{MTLSize, NSUInteger};
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, new, PartialEq, Eq, Hash)]
pub struct Cast;

impl fmt::Display for Cast {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:?}", self)
    }
}

impl Cast {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(
            dt,
            DatumType::F32
                | DatumType::F16
                | DatumType::U8
                | DatumType::U16
                | DatumType::U32
                | DatumType::U64
                | DatumType::I8
                | DatumType::I16
                | DatumType::I32
                | DatumType::I64
                | DatumType::Bool
        )
    }

    pub fn kernel_name(&self, from_dt: DatumType, to_dt: DatumType) -> TractResult<String> {
        ensure!(
            Self::is_supported_dt(from_dt),
            "Unsupported from_dt {:?} for metal castop",
            from_dt
        );
        ensure!(Self::is_supported_dt(to_dt), "Unsupported to_dt {:?} for metal castop", to_dt);
        let from_tname = DeviceTensor::tname(from_dt)?;
        let to_tname = DeviceTensor::tname(to_dt)?;
        Ok(format!("array_ops::cast_{from_tname}_{to_tname}"))
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        to_dt: DatumType,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(to_dt, input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);
        ensure!(
            input.shape() == output.shape(),
            "Cast I/O don't have the same shape in: {:?}, out: {:?}",
            input.shape(),
            output.shape()
        );

        let kernel_name = self.kernel_name(input.datum_type(), output.datum_type())?;

        let pipeline = stream.load_pipeline(LibraryName::ArrayOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);

            let grid_size = MTLSize { width: output.len() as NSUInteger, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

pub fn metal_cast_dispatch(input: &DeviceTensor, output: &DeviceTensor) -> TractResult<()> {
    crate::with_metal_stream(|stream| Cast.dispatch_eval(stream, input, output))
}

crate::register_metal_op!(tract_core::ops::cast::Cast, |_source, _node, op| {
    Ok(crate::transform::metal_cast_new(op.to).map(|c| Box::new(c) as _))
});


================================================
FILE: metal/src/kernels/array/copy.rs
================================================
use crate::encoder::EncoderExt;
use crate::{LibraryName, MetalStream};
use derive_new::new;
use metal::{MTLSize, NSUInteger};
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, new, PartialEq, Eq, Hash)]
pub struct Memcpy;

impl fmt::Display for Memcpy {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:?}", self)
    }
}

pub fn metal_memcpy_dispatch(
    input: &DeviceTensor,
    input_offset: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| Memcpy.dispatch_eval(stream, input, input_offset, output))
}

impl Memcpy {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(
            dt,
            DatumType::F32
                | DatumType::F16
                | DatumType::U8
                | DatumType::U16
                | DatumType::U32
                | DatumType::U64
                | DatumType::I8
                | DatumType::I16
                | DatumType::I32
                | DatumType::I64
                | DatumType::Bool
        )
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal copyop", dt);
        let tname = tract_gpu::utils::BroadcastKind::copy_tname(dt);
        Ok(format!("array_ops::copy_unicast_{tname}"))
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        input_offset: usize,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(input_offset % input.datum_type().size_of() == 0);
        ensure!(output.len() <= input.len() - (input_offset / input.datum_type().size_of()));

        stream.retain_tensor(input);
        stream.retain_tensor(output);

        let kernel_name = self.kernel_name(input.datum_type())?;

        let pipeline = stream.load_pipeline(LibraryName::ArrayOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor_with_offset(
                0,
                input,
                input_offset as _,
                metal::MTLResourceUsage::Read,
            );
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);

            let grid_size = MTLSize { width: output.len() as NSUInteger, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        input_offset: usize,
        output_shape: &[usize],
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), output_shape)? };
        self.dispatch_eval(stream, input, input_offset, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }
}


================================================
FILE: metal/src/kernels/array/dispatch.rs
================================================
use crate::encoder::EncoderExt;
use crate::kernels::utils::build_metal_grid_and_groups_for_el_wise_op;
use crate::{LibraryName, MetalStream};
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;
use tract_gpu::utils::BroadcastKind;

/// Single dispatch function for all copy_nd kernel launches.
/// Used by GpuMultiBroadcastTo, GpuSlice, GpuConcat, and GpuAxisOp.
pub fn metal_copy_nd_dispatch(
    input: &DeviceTensor,
    input_offset: usize,
    input_strides: &[isize],
    output: &DeviceTensor,
    output_offset: usize,
    output_shape: &[usize],
    output_strides: &[isize],
) -> TractResult<()> {
    crate::with_metal_stream(|stream| {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        let kernel_name = BroadcastKind::from_rank(output_shape.len())?
            .copy_kernel_name(input.datum_type(), "array_ops::")?;

        let pipeline = stream.load_pipeline(LibraryName::ArrayOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();

        // Convert isize strides to usize for Metal buffers
        let input_strides_usize: TVec<usize> = input_strides.iter().map(|&s| s as usize).collect();
        let output_strides_usize: TVec<usize> =
            output_strides.iter().map(|&s| s as usize).collect();

        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor_with_offset(
                0,
                input,
                input_offset as _,
                metal::MTLResourceUsage::Read,
            );
            encoder.set_slice(1, &input_strides_usize);
            encoder.set_metal_tensor_with_offset(
                2,
                output,
                output_offset as _,
                metal::MTLResourceUsage::Write,
            );
            encoder.set_slice(3, output_shape);
            encoder.set_slice(4, &output_strides_usize);

            let (grid_size, group_size) = build_metal_grid_and_groups_for_el_wise_op(
                output_shape,
                pipeline.max_total_threads_per_threadgroup() as _,
            );
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    })
}


================================================
FILE: metal/src/kernels/array/mod.rs
================================================
mod cast;
mod copy;
mod dispatch;
mod rotate_half;

pub use cast::Cast;
pub use cast::metal_cast_dispatch;
pub use copy::Memcpy;
pub use dispatch::metal_copy_nd_dispatch;
pub use rotate_half::RotateHalf;
pub use rotate_half::metal_rotate_half_dispatch;

pub fn all_functions() -> Vec<String> {
    use std::collections::HashSet;
    use tract_gpu::utils::BroadcastKind;
    let mut functions = HashSet::<String>::new();

    functions.extend(BroadcastKind::all_copy_kernel_names("array_ops::"));

    functions.extend(
        ["u8", "u16", "u32", "u64"]
            .into_iter()
            .map(|tname| format!("array_ops::copy_unicast_{tname}")),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt1| {
                tract_gpu::tensor::DeviceTensor::SUPPORTED_DT.into_iter().map(move |dt2| (dt1, dt2))
            })
            .flat_map(|(dt1, dt2)| Cast.kernel_name(dt1, dt2).into_iter()),
    );

    functions.into_iter().collect()
}


================================================
FILE: metal/src/kernels/array/rotate_half.rs
================================================
use crate::encoder::EncoderExt;
use crate::kernels::utils;
use crate::{LibraryName, MetalStream};
use anyhow::ensure;
use metal::MTLSize;
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RotateHalf;

impl fmt::Display for RotateHalf {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:?}", self)
    }
}

impl RotateHalf {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(
            dt,
            DatumType::F32
                | DatumType::F16
                | DatumType::I8
                | DatumType::I16
                | DatumType::I32
                | DatumType::I64
        )
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal rotate halfop", dt);
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("array_ops::rotate_half_nd2_{tname}"))
    }

    pub fn eval(&self, stream: &MetalStream, input: &DeviceTensor) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        let shape_nd2 = utils::reshape_to_rank_2(input.shape(), input.rank() - 1);
        ensure!(
            shape_nd2[1] % 2 == 0,
            "Rotate half required most inner dimension to be a multiple of 2: {:?}",
            input.shape()
        );
        let strides_nd2 = Tensor::natural_strides(&shape_nd2);

        let kernel_name = self.kernel_name(input.datum_type())?;

        let pipeline = stream.load_pipeline(LibraryName::ArrayOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(2, &shape_nd2);
            encoder.set_slice(3, &strides_nd2);

            let grid_size =
                MTLSize { width: (shape_nd2[1] / 2) as _, height: shape_nd2[0] as _, depth: 1 };
            let group_size = utils::build_metal_size_with_ones();

            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

pub fn metal_rotate_half_dispatch(input: &DeviceTensor, output: &DeviceTensor) -> TractResult<()> {
    crate::with_metal_stream(|stream| RotateHalf.dispatch_eval(stream, input, output))
}

crate::register_metal_op!(tract_transformers::ops::apply_rope::RotateHalf, |source, node, _op| {
    rule_if!(RotateHalf::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::rotate_half::GpuRotateHalf::new(
        "Metal",
        metal_rotate_half_dispatch,
    ))))
});

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;

    use super::*;
    use num_traits::AsPrimitive;
    use tract_core::internal::Tensor;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::apply_rope;

    fn run_test_case<F>(shape: &[usize]) -> TractResult<()>
    where
        F: Copy + 'static + Datum,
        usize: AsPrimitive<F>,
    {
        with_borrowed_metal_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a =
                Tensor::from_shape(shape, &(0..len).map(|f| -> F { f.as_() }).collect::<Vec<_>>())?;

            let metal_a = a.clone().into_device()?;

            let cpu_output =
                apply_rope::RotateHalf.eval(tvec![a.clone().into()])?[0].clone().into_tensor();
            let metal_output = RotateHalf.eval(stream, &metal_a)?;

            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Exact)
                .with_context(|| {
                format!(
                    "Input: {:?} Cpu: {:?}, Metal: {:?}",
                    a.dump(true),
                    cpu_output.dump(true),
                    metal_output.to_host().and_then(|it| it.dump(true))
                )
            })?;
            Ok(())
        })
    }

    #[test]
    fn test_rotate_half() -> TractResult<()> {
        run_test_case::<f32>(&[2, 2])?;
        run_test_case::<f32>(&[512, 512])?;
        run_test_case::<f32>(&[10, 8, 8])?;
        run_test_case::<f32>(&[10, 512, 1024])?;
        run_test_case::<f32>(&[10, 512, 1024])?;
        run_test_case::<f16>(&[10, 256, 4])?;
        Ok(())
    }
}


================================================
FILE: metal/src/kernels/bin_ops.metal
================================================
#include <metal_stdlib>
#include <metal_integer>
#include <metal_math>
#include <metal_simdgroup_matrix>  // Available from Metal version 2.3 released with OS X 11.0+

using namespace metal;

namespace utils {
    
    METAL_FUNC uint indices_to_idx_1(uint index, constant const size_t strides[1]) {
        return index * strides[0];
    }
    
    METAL_FUNC uint indices_to_idx_2(uint2 indices, constant const size_t strides[2]) {
        return indices.x * strides[1] + indices.y * strides[0];
    }
    
    METAL_FUNC uint indices_to_idx_3(uint3 indices, constant const size_t strides[3]) {
        return indices.x * strides[2] + indices.y * strides[1] + indices.z * strides[0];
    }
    
    METAL_FUNC uint indices_to_idx_4(uint3 indices,
                                     constant const size_t shape[4], 
                                     constant const size_t strides[4]) {
        auto idx = indices.x * strides[3] + indices.y * strides[2];
        idx += (indices.z % shape[1]) * strides[1];
        indices.z /= shape[1];
        idx += indices.z * strides[0];
        return idx;
    }
    
    METAL_FUNC uint indices_to_idx_5(uint3 indices,
                                     constant const size_t shape[5], 
                                     constant const size_t strides[5]) {
        auto idx = indices.x * strides[4] + indices.y * strides[3];
        idx += (indices.z % shape[2]) * strides[2];
        indices.z /= shape[2];
        idx += (indices.z % shape[1]) * strides[1];
        indices.z /= shape[1];
        idx += indices.z * strides[0];
        return idx;
    }
}

/*
 * Based on code from:
 * https://github.com/ml-explore/mlx/blob/main/mlx/backend/metal/kernels/binary_ops.h
 */

struct Add {
    template <typename T>
    T operator()(T x, T y) {
        return x + y;
    }
};

struct Div {
    template <typename T>
    T operator()(T x, T y) {
        return x / y;
    }
};

struct Sub {
    template <typename T>
    T operator()(T x, T y) {
        return x - y;
    }
};

struct Mul {
    template <typename T>
    T operator()(T x, T y) {
        return x * y;
    }
};

struct Equals {
    template <typename T>
    bool operator()(T x, T y) {
        return x == y;
    }
};

struct NotEquals {
    template <typename T>
    bool operator()(T x, T y) {
        return x != y;
    }
};

struct Greater {
    template <typename T>
    bool operator()(T x, T y) {
        return x > y;
    }
};

struct GreaterEqual {
    template <typename T>
    bool operator()(T x, T y) {
        return x >= y;
    }
};

struct Less {
    template <typename T>
    bool operator()(T x, T y) {
        return x < y;
    }
};

struct LessEqual {
    template <typename T>
    bool operator()(T x, T y) {
        return x <= y;
    }
};

struct And {
    template <typename T>
    T operator()(T x, T y) {
        return x && y;
    };
};

struct Or {
    template <typename T>
    T operator()(T x, T y) {
        return x || y;
    };
};

struct Min {
    template <typename T>
    T operator()(T x, T y) {
        return x < y ? x : y;
    }
};

struct Max {
    template <typename T>
    T operator()(T x, T y) {
        return x > y ? x : y;
    }
};

struct BitAnd {
    template <typename T>
    T operator()(T x, T y) {
        return x & y;
    }
};

struct BitOr {
    template <typename T>
    T operator()(T x, T y) {
        return x | y;
    }
};

struct BitXor {
    template <typename T>
    T operator()(T x, T y) {
        return x ^ y;
    }
};

struct Pow {
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T base, T exp) {
        return metal::pow(base, exp);
    }
    
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T base, T exp) {
        T res = 1;
        while (exp) {
            if (exp & 1) {
                res *= base;
            }
            exp >>= 1;
            base *= base;
        }
        return res;
    }
};

#define INSTANTIATE_1ROW_BIN_OP()                             \
template [[host_name("bin_ops::add_1row_f32")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<float4, Add>;                         \
template [[host_name("bin_ops::sub_1row_f32")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<float4, Sub>;                         \
template [[host_name("bin_ops::div_1row_f32")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<float4, Div>;                         \
template [[host_name("bin_ops::mul_1row_f32")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<float4, Mul>;                         \
template [[host_name("bin_ops::add_1row_f16")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<half4, Add>;                         \
template [[host_name("bin_ops::sub_1row_f16")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<half4, Sub>;                         \
template [[host_name("bin_ops::dib_1row_f16")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<half4, Div>;                         \
template [[host_name("bin_ops::mul_1row_f16")]] [[kernel]]     \
bin_op_1row_t bin_op_1row<half4, Mul>;                         \

#define INSTANTIATE_BIN_OP(name, op, itname, itype, otype)                    \
template [[host_name("bin_ops::" #name "_" #itname)]] [[kernel]]      \
bin_op_t bin_op<itype, otype, op>;                            \

#define INSTANTIATE_FLOAT(name, op)                     \
INSTANTIATE_BIN_OP(name, op, f32, float, float)         \
INSTANTIATE_BIN_OP(name, op, f16, half, half)          

#define INSTANTIATE_FLOAT_BOOL(name, op)                \
INSTANTIATE_BIN_OP(name, op, f32, float, bool)          \
INSTANTIATE_BIN_OP(name, op, f16, half, bool)          

#define INSTANTIATE_INTEGER(name, op)                    \
INSTANTIATE_BIN_OP(name, op, u8,  uint8_t, uint8_t)      \
INSTANTIATE_BIN_OP(name, op, u16, uint16_t, uint16_t)    \
INSTANTIATE_BIN_OP(name, op, u32, uint32_t, uint32_t)    \
INSTANTIATE_BIN_OP(name, op, u64, uint64_t, uint64_t)    \
INSTANTIATE_BIN_OP(name, op, i8,  int8_t, int8_t)        \
INSTANTIATE_BIN_OP(name, op, i16, int16_t, int16_t)      \
INSTANTIATE_BIN_OP(name, op, i32, int32_t, int32_t)      \
INSTANTIATE_BIN_OP(name, op, i64, int64_t, int64_t)       

#define INSTANTIATE_INTEGER_BOOL(name, op)               \
INSTANTIATE_BIN_OP(name, op, u8,  uint8_t, bool)         \
INSTANTIATE_BIN_OP(name, op, u16, uint16_t, bool)        \
INSTANTIATE_BIN_OP(name, op, u32, uint32_t, bool)        \
INSTANTIATE_BIN_OP(name, op, u64, uint64_t, bool)        \
INSTANTIATE_BIN_OP(name, op, i8,  int8_t, bool)          \
INSTANTIATE_BIN_OP(name, op, i16, int16_t, bool)         \
INSTANTIATE_BIN_OP(name, op, i32, int32_t, bool)         \
INSTANTIATE_BIN_OP(name, op, i64, int64_t, bool)        

#define INSTANTIATE_ALL_TYPES(name, op)                  \
INSTANTIATE_FLOAT(name, op)                              \
INSTANTIATE_INTEGER(name, op)  

#define INSTANTIATE_ALL_TYPES_BOOL(name, op)             \
INSTANTIATE_FLOAT_BOOL(name, op)                         \
INSTANTIATE_INTEGER_BOOL(name, op)                

template<typename In, typename Out, typename Op>
[[kernel]] void bin_op(device const void *lhs_b [[buffer(0)]],
                    constant const size_t * lhs_shape [[buffer(1)]],
                    constant const size_t * lhs_strides [[buffer(2)]],
                    device const void *rhs_b [[buffer(3)]],
                    constant const size_t * rhs_shape [[buffer(4)]],
                    constant const size_t * rhs_strides [[buffer(5)]],
                    device void *output_b [[buffer(6)]],
                    constant const size_t * out_shape [[buffer(7)]],
                    constant const size_t * out_strides [[buffer(8)]],
                    uint3   tgpig[[threadgroup_position_in_grid]],
                    ushort3 tpitg[[thread_position_in_threadgroup]],
                    ushort3   ntg[[threads_per_threadgroup]]) {
        device const In * lhs = (device const In *)lhs_b;
        device const In * rhs = (device const In *)rhs_b;
        device  Out * output = (device Out *)output_b;

        auto lhs_idx = tgpig.z * lhs_strides[0] + tgpig.y * lhs_strides[1] + tgpig.x * lhs_strides[2];
        auto rhs_idx = tgpig.z * rhs_strides[0] + tgpig.y * rhs_strides[1] + tgpig.x * rhs_strides[2];
        auto out_idx = tgpig.z * out_strides[0] + tgpig.y * out_strides[1] + tgpig.x * out_strides[2];

        for (size_t i = tpitg.x; i < out_shape[3]; i += ntg.x) {
            output[out_idx + i] = Op()(lhs[lhs_idx + i * lhs_strides[3]], rhs[rhs_idx + i * rhs_strides[3]]);
        }
}

typedef decltype(bin_op<float, float, Mul>) bin_op_t;


template<typename T4, typename Op>
[[kernel]] void bin_op_1row(device const void *lhs_b [[buffer(0)]],
                           device const void *rhs_b [[buffer(1)]],
                           device void *output_b [[buffer(2)]],
                           device const size_t & n [[buffer(3)]],
                           uint tpig[[thread_position_in_grid]]) {
    device const T4 * lhs = (device const T4 *)lhs_b;
    device const T4 * rhs = (device const T4 *)rhs_b;
    device  T4 * output = (device  T4 *)output_b;

    const uint nb = n/4;
    output[tpig] = Op()(lhs[tpig], rhs[tpig % nb]);
}

typedef decltype(bin_op_1row<float4, Mul>) bin_op_1row_t;

INSTANTIATE_ALL_TYPES(mul, Mul)
INSTANTIATE_ALL_TYPES(div, Div)
INSTANTIATE_ALL_TYPES(add, Add)
INSTANTIATE_ALL_TYPES(sub, Sub)
INSTANTIATE_ALL_TYPES(pow, Pow)
INSTANTIATE_ALL_TYPES_BOOL(lt, Less)
INSTANTIATE_ALL_TYPES_BOOL(gt, Greater)
INSTANTIATE_ALL_TYPES_BOOL(lte, LessEqual)
INSTANTIATE_ALL_TYPES_BOOL(gte, GreaterEqual)
INSTANTIATE_ALL_TYPES_BOOL(eq, Equals)
INSTANTIATE_ALL_TYPES_BOOL(ne, NotEquals)
INSTANTIATE_ALL_TYPES(min, Min)
INSTANTIATE_ALL_TYPES(max, Max)
INSTANTIATE_INTEGER(bitand, BitAnd)
INSTANTIATE_INTEGER(bitor, BitOr)
INSTANTIATE_INTEGER(bitxor, BitXor)
INSTANTIATE_BIN_OP(and, And, bool, bool, bool)
INSTANTIATE_BIN_OP(or, Or, bool, bool, bool)

INSTANTIATE_1ROW_BIN_OP()

// --- Iff (select) kernel ---

template <typename T>
[[kernel]] void iff_generic(
    device const bool *cond [[buffer(0)]],
    device const T *then_values [[buffer(1)]],
    device const T *else_values [[buffer(2)]],
    device T *out [[buffer(3)]],
    constant const size_t *out_shape [[buffer(4)]],
    constant const size_t *cond_strides [[buffer(5)]],
    constant const size_t *then_strides [[buffer(6)]],
    constant const size_t *else_strides [[buffer(7)]],
    constant const size_t *out_strides [[buffer(8)]],
    uint tpig [[thread_position_in_grid]])
{
    size_t total = out_shape[0] * out_shape[1] * out_shape[2] * out_shape[3] * out_shape[4];
    if (tpig >= total) return;

    size_t tmp = tpig;
    size_t i4 = tmp % out_shape[4]; tmp /= out_shape[4];
    size_t i3 = tmp % out_shape[3]; tmp /= out_shape[3];
    size_t i2 = tmp % out_shape[2]; tmp /= out_shape[2];
    size_t i1 = tmp % out_shape[1]; tmp /= out_shape[1];
    size_t i0 = tmp;

    size_t icond = i0 * cond_strides[0] + i1 * cond_strides[1] + i2 * cond_strides[2]
                 + i3 * cond_strides[3] + i4 * cond_strides[4];
    bool pick = cond[icond];

    size_t offset = i0 * (pick ? then_strides[0] : else_strides[0])
                  + i1 * (pick ? then_strides[1] : else_strides[1])
                  + i2 * (pick ? then_strides[2] : else_strides[2])
                  + i3 * (pick ? then_strides[3] : else_strides[3])
                  + i4 * (pick ? then_strides[4] : else_strides[4]);

    size_t io = i0 * out_strides[0] + i1 * out_strides[1] + i2 * out_strides[2]
              + i3 * out_strides[3] + i4 * out_strides[4];

    out[io] = (pick ? then_values : else_values)[offset];
}

#define INSTANTIATE_IFF(tname, type) \
    template [[host_name("bin_ops::iff_generic_" #tname)]] [[kernel]] \
    void iff_generic<type>( \
        device const bool*, device const type*, device const type*, device type*, \
        constant const size_t*, constant const size_t*, constant const size_t*, \
        constant const size_t*, constant const size_t*, uint);

INSTANTIATE_IFF(u8, uint8_t)
INSTANTIATE_IFF(u16, uint16_t)
INSTANTIATE_IFF(u32, uint32_t)
INSTANTIATE_IFF(u64, uint64_t)


================================================
FILE: metal/src/kernels/bin_ops.rs
================================================
use super::BroadcastKind;
use super::utils::build_metal_grid_and_groups_for_el_wise_op;
use crate::encoder::EncoderExt;
use crate::kernels::utils::compute_broadcast_strides;
use crate::{LibraryName, MetalStream};
use anyhow::ensure;
use metal::{MTLSize, NSUInteger};
use std::ffi::c_void;
use tract_core::internal::tract_smallvec::SmallVec;
use tract_core::internal::*;
use tract_core::ops::binary::BinMiniOp;
use tract_gpu::tensor::DeviceTensor;

const ALL_OP_NAMES: &[&str] = &[
    "mul", "add", "div", "sub", "pow", "min", "max", "gt", "gte", "eq", "ne", "lt", "lte", "and",
    "or", "bitor", "bitand", "bitxor",
];

pub fn all_functions() -> Vec<String> {
    ALL_OP_NAMES
        .iter()
        .flat_map(|kname| {
            DeviceTensor::SUPPORTED_DT.into_iter().flat_map(move |dt| {
                let tname = DeviceTensor::tname(dt).ok()?;
                Some([true, false].into_iter().map(move |row| {
                    if row {
                        format!("bin_ops::{kname}_1row_{tname}")
                    } else {
                        format!("bin_ops::{kname}_{tname}")
                    }
                }))
            })
        })
        .flatten()
        .chain(
            ["u8", "u16", "u32", "u64"]
                .into_iter()
                .map(|tname| format!("bin_ops::iff_generic_{tname}")),
        )
        .collect()
}

pub fn is_supported(mini_op: &dyn BinMiniOp, dt: DatumType) -> bool {
    ALL_OP_NAMES.contains(&mini_op.name().to_lowercase().as_str())
        && (dt.is_number() || dt.is::<bool>())
}

fn kernel_name(op_name: &str, dt: DatumType, use_row_kernel: bool) -> TractResult<String> {
    let tname = DeviceTensor::tname(dt)?;
    if use_row_kernel {
        Ok(format!("bin_ops::{op_name}_1row_{tname}"))
    } else {
        Ok(format!("bin_ops::{op_name}_{tname}"))
    }
}

fn can_use_row_kernel(mini_op: &dyn BinMiniOp, lhs: &DeviceTensor, rhs: &DeviceTensor) -> bool {
    let compatible_op = matches!(mini_op.name(), "Mul" | "Add" | "Div" | "Sub");
    let compatible_type = matches!(lhs.datum_type(), DatumType::F16 | DatumType::F32);
    let rank = lhs.rank();

    compatible_op
        && compatible_type
        && (rank > 0)
        && ((rhs.len() == rhs.shape()[rank - 1])
            || ((lhs.len() == lhs.shape()[rank - 1]) && matches!(mini_op.name(), "Mul" | "Add")))
        && (lhs.shape()[rank - 1] % 4 == 0)
        && (rhs.shape()[rank - 1] % 4 == 0)
}

fn reshape_to_rank_4_with_broadcast(
    lhs: &DeviceTensor,
    rhs: &DeviceTensor,
    out: &DeviceTensor,
) -> TractResult<(TVec<usize>, TVec<usize>, TVec<usize>)> {
    let rank = lhs.rank();

    if rank <= 4 {
        let mut pad = |shape: &[usize]| {
            let mut result = [1; 4];
            result[4 - shape.len()..].copy_from_slice(shape);
            result.into()
        };
        return Ok((pad(lhs.shape()), pad(rhs.shape()), pad(out.shape())));
    }

    if lhs.shape() == rhs.shape() {
        let mut shape = vec![lhs.shape()[..rank - 3].iter().product::<usize>()];
        shape.extend(&lhs.shape()[rank - 3..]);

        Ok((shape.clone().into(), shape.clone().into(), shape.into()))
    } else {
        let broadcast_axes: Vec<usize> = (0..lhs.rank())
            .filter(|ix| lhs.shape()[*ix] != rhs.shape()[*ix] || lhs.shape()[*ix] == 1)
            .collect();

        let mut segments = vec![];
        let mut current_segment = vec![0];
        let mut current_is_broadcast = broadcast_axes.contains(&0);

        for i in 1..rank {
            let is_broadcast = broadcast_axes.contains(&i);
            if is_broadcast == current_is_broadcast {
                current_segment.push(i);
            } else {
                segments.push((current_is_broadcast, current_segment));
                current_segment = vec![i];
                current_is_broadcast = is_broadcast;
            }
        }
        segments.push((current_is_broadcast, current_segment));

        let mut reshaped_groups: Vec<Vec<usize>> = vec![vec![], vec![], vec![], vec![]];
        let mut group_idx = 0;
        for (_, segment) in segments {
            reshaped_groups[group_idx].extend(segment);
            group_idx += 1;
            ensure!(group_idx < 4, "Cannot reshape to rank 4");
        }

        fn compute_shape(shape: &[usize], groups: &[Vec<usize>]) -> TVec<usize> {
            let mut result = [1; 4];
            for (i, group) in groups.iter().enumerate() {
                result[i] = group.iter().map(|&dim| shape[dim]).product();
            }
            result.into()
        }

        Ok((
            compute_shape(lhs.shape(), &reshaped_groups),
            compute_shape(rhs.shape(), &reshaped_groups),
            compute_shape(out.shape(), &reshaped_groups),
        ))
    }
}

fn natural_strides(shape: &[usize]) -> SmallVec<[isize; 4]> {
    let mut strides = SmallVec::from_elem(1, shape.len());
    for i in (0..shape.len()).rev().skip(1) {
        strides[i] = strides[i + 1] * shape[i + 1] as isize;
    }
    strides
}

pub fn dispatch_eval(
    stream: &MetalStream,
    mini_op: &dyn BinMiniOp,
    lhs: &DeviceTensor,
    rhs: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    stream.retain_tensor(lhs);
    stream.retain_tensor(rhs);
    stream.retain_tensor(output);

    ensure!(lhs.rank() == rhs.rank());
    let rank = lhs.rank();

    let use_row = can_use_row_kernel(mini_op, lhs, rhs);
    let op_name = mini_op.name().to_lowercase();
    let kname = kernel_name(&op_name, lhs.datum_type(), use_row)?;

    if use_row {
        let pipeline = stream.load_pipeline(LibraryName::BinOps, &kname)?;

        let (a, b) = if rhs.len() == rhs.shape()[rank - 1] { (lhs, rhs) } else { (rhs, lhs) };
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, a, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, b, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(2, output, metal::MTLResourceUsage::Write);
            encoder.set_bytes(
                3,
                std::mem::size_of::<usize>() as u64,
                &b.len() as *const usize as *const c_void,
            );

            let grid_size =
                MTLSize { width: (output.len() / 4) as NSUInteger, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
    } else {
        let (lhs_shape, rhs_shape, out_shape) = reshape_to_rank_4_with_broadcast(lhs, rhs, output)?;

        let lhs_strides =
            compute_broadcast_strides::<usize>(&lhs_shape, &*natural_strides(&lhs_shape))?;
        let rhs_strides =
            compute_broadcast_strides::<usize>(&rhs_shape, &natural_strides(&rhs_shape))?;
        let out_strides =
            compute_broadcast_strides::<usize>(&out_shape, &natural_strides(&out_shape))?;

        let pipeline = stream.load_pipeline(LibraryName::BinOps, &kname)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, lhs, metal::MTLResourceUsage::Read);
            encoder.set_slice(1, &lhs_shape);
            encoder.set_slice(2, &lhs_strides);
            encoder.set_metal_tensor(3, rhs, metal::MTLResourceUsage::Read);
            encoder.set_slice(4, &rhs_shape);
            encoder.set_slice(5, &rhs_strides);
            encoder.set_metal_tensor(6, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(7, &out_shape);
            encoder.set_slice(8, &out_strides);

            let (grid_size, group_size) = build_metal_grid_and_groups_for_el_wise_op(
                &out_shape,
                pipeline.max_total_threads_per_threadgroup() as _,
            );
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
    }
    Ok(())
}

pub fn metal_bin_op_dispatch(
    mini_op: &dyn BinMiniOp,
    lhs: &DeviceTensor,
    rhs: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| dispatch_eval(stream, mini_op, lhs, rhs, output))
}

pub fn metal_bin_op(mini_op: Box<dyn BinMiniOp>) -> tract_gpu::ops::binary::GpuBinOp {
    tract_gpu::ops::binary::GpuBinOp::new(mini_op, "Metal", metal_bin_op_dispatch)
}

crate::register_metal_op!(tract_core::ops::binary::TypedBinOp, |source, node, op| {
    rule_if!(is_supported(&*op.0, source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(metal_bin_op(op.0.clone()))))
});

crate::register_metal_op!(tract_core::ops::logic::Iff, |_source, _node, _op| {
    Ok(Some(Box::new(tract_gpu::ops::iff::GpuIff::new("Metal", metal_iff_dispatch))))
});

pub fn metal_iff_dispatch(
    cond: &DeviceTensor,
    then_value: &DeviceTensor,
    else_value: &DeviceTensor,
    cond_strides: &[isize],
    then_strides: &[isize],
    else_strides: &[isize],
    output: &DeviceTensor,
    output_shape: &[usize],
    output_strides: &[isize],
) -> TractResult<()> {
    crate::with_metal_stream(|stream| {
        stream.retain_tensor(cond);
        stream.retain_tensor(then_value);
        stream.retain_tensor(else_value);
        stream.retain_tensor(output);

        let tname = tract_gpu::utils::BroadcastKind::copy_tname(output.datum_type());
        let kernel_name = format!("bin_ops::iff_generic_{tname}");
        let total_elems: usize = output_shape.iter().product();

        let pipeline = stream.load_pipeline(LibraryName::BinOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();

        let cond_strides_usize: TVec<usize> = cond_strides.iter().map(|&s| s as usize).collect();
        let then_strides_usize: TVec<usize> = then_strides.iter().map(|&s| s as usize).collect();
        let else_strides_usize: TVec<usize> = else_strides.iter().map(|&s| s as usize).collect();
        let out_strides_usize: TVec<usize> = output_strides.iter().map(|&s| s as usize).collect();

        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, cond, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, then_value, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(2, else_value, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(3, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(4, output_shape);
            encoder.set_slice(5, &cond_strides_usize);
            encoder.set_slice(6, &then_strides_usize);
            encoder.set_slice(7, &else_strides_usize);
            encoder.set_slice(8, &out_strides_usize);

            let grid_size = MTLSize { width: total_elems as NSUInteger, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    })
}

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;

    use super::*;
    use tract_gpu::tensor::IntoDevice;

    fn reference<FI: Datum, FO: Datum>(
        a: &Tensor,
        b: &Tensor,
        cab: impl Fn(&mut FO, &FI, &FI),
    ) -> TractResult<Tensor> {
        let out_shape = tract_core::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
        let mut out = unsafe { Tensor::uninitialized_dt(FO::datum_type(), &out_shape)? };
        let a_view = a.to_plain_array_view::<FI>()?;
        let b_view = b.to_plain_array_view::<FI>()?;
        let mut plain_out = out.try_as_plain_mut()?;
        let mut c = plain_out.to_array_view_mut::<FO>()?;
        tract_core::ndarray::Zip::from(&mut c)
            .and_broadcast(a_view)
            .and_broadcast(b_view)
            .for_each(cab);
        Ok(out)
    }

    fn run_test_case_logic(
        mini_op: &dyn BinMiniOp,
        a_shape: &[usize],
        b_shape: &[usize],
        cab: impl Fn(&mut bool, &bool, &bool),
    ) -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let a_len = a_shape.iter().product::<usize>();
            let b_len = b_shape.iter().product::<usize>();

            let a =
                Tensor::from_shape(a_shape, &(0..a_len).map(|f| f % 2 == 0).collect::<Vec<_>>())?
                    .into_device()?;
            let b =
                Tensor::from_shape(b_shape, &(0..b_len).map(|f| f % 4 == 0).collect::<Vec<_>>())?
                    .into_device()?;

            let out_dt = mini_op.result_datum_type(a.datum_type(), b.datum_type())?;
            let out_shape = tract_core::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
            let output = unsafe { DeviceTensor::uninitialized_dt(out_dt, &out_shape)? };
            dispatch_eval(stream, mini_op, &a, &b, &output)?;
            stream.wait_until_completed()?;

            let ref_output = reference::<bool, bool>(
                &a.to_host()?.into_tensor(),
                &b.to_host()?.into_tensor(),
                cab,
            )?;

            assert_eq!(output.to_host()?.into_tensor(), ref_output);
            Ok(())
        })
    }

    #[test]
    fn test_logic() -> TractResult<()> {
        run_test_case_logic(&tract_core::ops::logic::And, &[2, 4], &[2, 4], |c, a, b| {
            *c = *a && *b
        })?;
        run_test_case_logic(&tract_core::ops::logic::Or, &[2, 4], &[2, 4], |c, a, b| {
            *c = *a || *b
        })?;
        Ok(())
    }
}


================================================
FILE: metal/src/kernels/conv.metal
================================================
#include <metal_stdlib>
using namespace metal;

// Direct convolution kernel — one thread per output spatial position.
// Grid: (ceil(spatial_out / threads_per_group), output_channels, batch_size)
//
// Buffer layout:
//   0: input         [T]
//   1: in_shape      [N, C, spatial...]  (2 + georank ints)
//   2: in_strides    [N, C, spatial...]  (2 + georank ints)
//   3: weights       [T]
//   4: ker_params    [groups, co_per_group, ci_per_group, ker_spatial...]  (3 + georank ints)
//   5: ker_strides   [g_stride, o_stride, i_stride, spatial...]  (3 + georank ints)
//   6: bias          [T] (may be empty)
//   7: bias_stride   scalar int32 (-1 = no bias)
//   8: pad           [spatial...]  (georank ints)
//   9: strides       [spatial...]  (georank ints)
//  10: dilations     [spatial...]  (georank ints)
//  11: output        [T]
//  12: out_shape     [N, C, spatial...]  (2 + georank ints)
//  13: out_strides   [N, C, spatial...]  (2 + georank ints)

template <typename T, int GEORANK>
void conv_generic_impl(
    device const T *input,
    constant int32_t *in_shape,
    constant int32_t *in_strides,
    device const T *weights,
    constant int32_t *ker_params,
    constant int32_t *ker_strides,
    device const T *bias,
    int32_t bias_stride,
    constant int32_t *p,
    constant int32_t *str,
    constant int32_t *dil,
    device T *output,
    constant int32_t *out_shape,
    constant int32_t *out_strides,
    uint3 gid)
{
    int n  = gid.z;
    int co = gid.y;
    int xyz = gid.x;

    int co_per_group = ker_params[1];
    int ci_per_group = ker_params[2];
    int group        = co / co_per_group;

    // Decompose linear index into per-axis output coords (last axis fastest)
    int ox[GEORANK];
    {
        int rem = xyz;
        for (int d = GEORANK - 1; d >= 0; d--) {
            int dim = out_shape[2 + d];
            ox[d] = rem % dim;
            rem /= dim;
        }
    }

    // Bounds check
    for (int d = 0; d < GEORANK; d++) {
        if (ox[d] >= out_shape[2 + d]) return;
    }
    if (n >= out_shape[0] || co >= out_shape[1]) return;

    device const T *pfi = input + n * in_strides[0]
                          + ci_per_group * group * in_strides[1];
    device const T *pfk = weights + co * ker_strides[1];

    float sum = (bias_stride >= 0) ? float(bias[co * bias_stride]) : 0.0f;

    for (int ci = 0; ci < ci_per_group; ci++) {
        // Recursive-style nested loop over spatial kernel dims.
        // Unrolled at compile time thanks to constexpr GEORANK.
        if (GEORANK == 1) {
            for (int k0 = 0; k0 < ker_params[3]; k0++) {
                int x0 = ox[0] * str[0] + k0 * dil[0] - p[0];
                if (x0 < 0 || x0 >= in_shape[2]) continue;
                sum += float(pfi[ci * in_strides[1] + x0 * in_strides[2]])
                     * float(pfk[ci * ker_strides[2] + k0 * ker_strides[3]]);
            }
        } else if (GEORANK == 2) {
            for (int k0 = 0; k0 < ker_params[3]; k0++) {
                int x0 = ox[0] * str[0] + k0 * dil[0] - p[0];
                if (x0 < 0 || x0 >= in_shape[2]) continue;
                for (int k1 = 0; k1 < ker_params[4]; k1++) {
                    int x1 = ox[1] * str[1] + k1 * dil[1] - p[1];
                    if (x1 < 0 || x1 >= in_shape[3]) continue;
                    sum += float(pfi[ci * in_strides[1] + x0 * in_strides[2] + x1 * in_strides[3]])
                         * float(pfk[ci * ker_strides[2] + k0 * ker_strides[3] + k1 * ker_strides[4]]);
                }
            }
        } else if (GEORANK == 3) {
            for (int k0 = 0; k0 < ker_params[3]; k0++) {
                int x0 = ox[0] * str[0] + k0 * dil[0] - p[0];
                if (x0 < 0 || x0 >= in_shape[2]) continue;
                for (int k1 = 0; k1 < ker_params[4]; k1++) {
                    int x1 = ox[1] * str[1] + k1 * dil[1] - p[1];
                    if (x1 < 0 || x1 >= in_shape[3]) continue;
                    for (int k2 = 0; k2 < ker_params[5]; k2++) {
                        int x2 = ox[2] * str[2] + k2 * dil[2] - p[2];
                        if (x2 < 0 || x2 >= in_shape[4]) continue;
                        sum += float(pfi[ci * in_strides[1] + x0 * in_strides[2]
                                        + x1 * in_strides[3] + x2 * in_strides[4]])
                             * float(pfk[ci * ker_strides[2] + k0 * ker_strides[3]
                                        + k1 * ker_strides[4] + k2 * ker_strides[5]]);
                    }
                }
            }
        } else if (GEORANK == 4) {
            for (int k0 = 0; k0 < ker_params[3]; k0++) {
                int x0 = ox[0] * str[0] + k0 * dil[0] - p[0];
                if (x0 < 0 || x0 >= in_shape[2]) continue;
                for (int k1 = 0; k1 < ker_params[4]; k1++) {
                    int x1 = ox[1] * str[1] + k1 * dil[1] - p[1];
                    if (x1 < 0 || x1 >= in_shape[3]) continue;
                    for (int k2 = 0; k2 < ker_params[5]; k2++) {
                        int x2 = ox[2] * str[2] + k2 * dil[2] - p[2];
                        if (x2 < 0 || x2 >= in_shape[4]) continue;
                        for (int k3 = 0; k3 < ker_params[6]; k3++) {
                            int x3 = ox[3] * str[3] + k3 * dil[3] - p[3];
                            if (x3 < 0 || x3 >= in_shape[5]) continue;
                            sum += float(pfi[ci * in_strides[1] + x0 * in_strides[2]
                                            + x1 * in_strides[3] + x2 * in_strides[4]
                                            + x3 * in_strides[5]])
                                 * float(pfk[ci * ker_strides[2] + k0 * ker_strides[3]
                                            + k1 * ker_strides[4] + k2 * ker_strides[5]
                                            + k3 * ker_strides[6]]);
                        }
                    }
                }
            }
        }
    }

    int out_offset = n * out_strides[0] + co * out_strides[1];
    for (int d = 0; d < GEORANK; d++) {
        out_offset += ox[d] * out_strides[2 + d];
    }
    output[out_offset] = T(sum);
}

// --- Kernel entry points: 8 variants (f32/f16 × georank 1-4) ---

#define CONV_ENTRY(GEORANK, SUFFIX, T)                                              \
kernel void conv##GEORANK##d_##SUFFIX##_generic(                                    \
    device const T *input          [[buffer(0)]],                                   \
    constant int32_t *in_shape     [[buffer(1)]],                                   \
    constant int32_t *in_strides   [[buffer(2)]],                                   \
    device const T *weights        [[buffer(3)]],                                   \
    constant int32_t *ker_params   [[buffer(4)]],                                   \
    constant int32_t *ker_strides  [[buffer(5)]],                                   \
    device const T *bias           [[buffer(6)]],                                   \
    constant int32_t &bias_stride  [[buffer(7)]],                                   \
    constant int32_t *p            [[buffer(8)]],                                   \
    constant int32_t *str          [[buffer(9)]],                                   \
    constant int32_t *dil          [[buffer(10)]],                                  \
    device T *output               [[buffer(11)]],                                  \
    constant int32_t *out_shape    [[buffer(12)]],                                  \
    constant int32_t *out_strides  [[buffer(13)]],                                  \
    uint3 gid                      [[thread_position_in_grid]])                     \
{                                                                                   \
    conv_generic_impl<T, GEORANK>(input, in_shape, in_strides, weights, ker_params, \
        ker_strides, bias, bias_stride, p, str, dil, output, out_shape,             \
        out_strides, gid);                                                          \
}

CONV_ENTRY(1, f32, float)
CONV_ENTRY(2, f32, float)
CONV_ENTRY(3, f32, float)
CONV_ENTRY(4, f32, float)

CONV_ENTRY(1, f16, half)
CONV_ENTRY(2, f16, half)
CONV_ENTRY(3, f16, half)
CONV_ENTRY(4, f16, half)


================================================
FILE: metal/src/kernels/conv.rs
================================================
use crate::encoder::EncoderExt;
use crate::{LibraryName, MetalStream};
use metal::MTLSize;
use tract_core::internal::*;
use tract_core::ops::cnn::Conv;
use tract_gpu::tensor::DeviceTensor;

pub fn kernel_name(hw_rank: usize, dt: DatumType) -> TractResult<String> {
    let dt_name = if dt == DatumType::F16 { "f16" } else { "f32" };
    Ok(format!("conv{hw_rank}d_{dt_name}_generic"))
}

pub fn metal_conv_dispatch(
    stream: &MetalStream,
    op: &Conv,
    input: &DeviceTensor,
    weights: &DeviceTensor,
    bias: Option<&DeviceTensor>,
    output: &DeviceTensor,
) -> TractResult<()> {
    stream.retain_tensor(input);
    stream.retain_tensor(weights);
    if let Some(b) = bias {
        stream.retain_tensor(b);
    }
    stream.retain_tensor(output);

    let input_shape = op.pool_spec.data_format.shape(input.shape())?;
    let hw_rank = input_shape.hw_rank();
    let func_name = kernel_name(hw_rank, input.datum_type())?;
    let pipeline = stream.load_pipeline(LibraryName::ConvOps, &func_name)?;

    let co_per_group = op.pool_spec.output_channels / op.group;
    let ci_per_group = op.pool_spec.input_channels / op.group;

    // in_shape: [N, C, spatial...]
    let in_n = *input_shape.n().unwrap_or(&1);
    let in_c = *input_shape.c();
    let mut in_shape_buf: TVec<i32> = tvec![in_n as i32, in_c as i32];
    in_shape_buf.extend(input_shape.hw_dims().iter().map(|&d| d as i32));

    let mut in_strides_buf: TVec<i32> =
        tvec![*input_shape.n_stride().unwrap_or(&0) as i32, *input_shape.c_stride() as i32];
    in_strides_buf.extend(input_shape.hw_strides().iter().map(|&s| s as i32));

    // ker_params: [groups, co_per_group, ci_per_group, ker_spatial...]
    let mut ker_params: TVec<i32> =
        tvec![op.group as i32, co_per_group as i32, ci_per_group as i32];
    ker_params.extend(weights.shape()[2..].iter().map(|&d| d as i32));

    // ker_strides: [g_stride, o_stride, i_stride, spatial...]
    let group_stride = weights.strides()[0] as usize * co_per_group;
    let mut ker_strides: TVec<i32> = tvec![group_stride as i32];
    ker_strides.extend(weights.strides().iter().map(|&s| s as i32));

    // padding
    let padding = op.pool_spec.computed_padding(input_shape.hw_dims());
    let pad_buf: TVec<i32> = padding.iter().map(|p| p.pad_before as i32).collect();

    let strides = op.pool_spec.strides();
    let strides_buf: TVec<i32> = strides.iter().map(|&s| s as i32).collect();

    let dilations = op.pool_spec.dilations();
    let dilations_buf: TVec<i32> = dilations.iter().map(|&d| d as i32).collect();

    let output_shape = op.pool_spec.data_format.shape(output.shape())?;
    let out_n = *output_shape.n().unwrap_or(&1);
    let out_c = *output_shape.c();
    let mut out_shape_buf: TVec<i32> = tvec![out_n as i32, out_c as i32];
    out_shape_buf.extend(output_shape.hw_dims().iter().map(|&d| d as i32));

    let mut out_strides_buf: TVec<i32> =
        tvec![*output_shape.n_stride().unwrap_or(&0) as i32, *output_shape.c_stride() as i32];
    out_strides_buf.extend(output_shape.hw_strides().iter().map(|&s| s as i32));

    // bias_stride: -1 means no bias, 0 means scalar broadcast, 1 means per-channel
    let bias_stride: i32 = if let Some(b) = bias { if b.rank() == 0 { 0 } else { 1 } } else { -1 };

    let spatial_out: usize = output_shape.hw_dims().iter().product();
    let threads_per_group = 32usize;

    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
        encoder.set_slice(1, &in_shape_buf);
        encoder.set_slice(2, &in_strides_buf);
        encoder.set_metal_tensor(3, weights, metal::MTLResourceUsage::Read);
        encoder.set_slice(4, &ker_params);
        encoder.set_slice(5, &ker_strides);
        if let Some(b) = bias {
            encoder.set_metal_tensor(6, b, metal::MTLResourceUsage::Read);
        } else {
            // Empty buffer — kernel checks bias_stride < 0
            encoder.set_bytes(6, 0, std::ptr::null());
        }
        encoder.set_slice(7, &[bias_stride]);
        encoder.set_slice(8, &pad_buf);
        encoder.set_slice(9, &strides_buf);
        encoder.set_slice(10, &dilations_buf);
        encoder.set_metal_tensor(11, output, metal::MTLResourceUsage::Write);
        encoder.set_slice(12, &out_shape_buf);
        encoder.set_slice(13, &out_strides_buf);

        let grid_size = MTLSize {
            width: spatial_out.div_ceil(threads_per_group) as _,
            height: out_c as _,
            depth: out_n as _,
        };
        let group_size = MTLSize { width: threads_per_group as _, height: 1, depth: 1 };
        encoder.dispatch_thread_groups(grid_size, group_size);
    });
    Ok(())
}


================================================
FILE: metal/src/kernels/element_wise.metal
================================================
#include <metal_stdlib>
#include <metal_integer>
#include <metal_math>
#include <metal_simdgroup_matrix>  // Available from Metal version 2.3 released with OS X 11.0+

using namespace metal;

METAL_FUNC float erf_f32(float x ) {
    const float a1 = 0.0705230784;
    const float a2 = 0.0422820123;
    const float a3 = 0.0092705272;
    const float a4 = 0.0001520143;
    const float a5 = 0.0002765672;
    const float a6 = 0.0000430638;
    
    float abs = metal::abs(x);
    float y = a6 * abs;
    y = (a5 + y) * abs;
    y = (a4 + y) * abs;
    y = (a3 + y) * abs;
    y = (a2 + y) * abs;
    y = (a1 + y) * abs;
    y = 1.0 - (1.0 / metal::powr(y + 1.0, 16));
    y = metal::copysign(y, x);
    return y;
}

/*
 * Based on code from:
 * https://github.com/ml-explore/mlx/blob/main/mlx/backend/metal/kernels/unary_ops.h
 */

struct Abs {
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T> & !metal::is_signed_v<T>, T>
    operator()(T x) {
        return x;
    }
    
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T> & metal::is_signed_v<T>, T>
    operator()(T x) {
        return metal::abs(x);
    };
    
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T x) {
        return metal::abs(x);
    };
};

struct Ceil {
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T x) {
        return metal::ceil(x);
    }
    
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T x) {
        return x;
    }
};

struct Floor {
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T x) {
        return metal::floor(x);
    }
    
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T x) {
        return x;
    }
};

struct Round {
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T x) {
        return metal::round(x);
    }
    
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T x) {
        return x;
    }
};

struct RoundHalfToEven {
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T x) {
        return metal::rint(x);
    }
    
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T x) {
        return x;
    }
};

struct Recip {
    template <typename T>
    T operator()(T x) {
        return 1 / x;
    }
};

struct Erf {
    template <typename T>
    T operator()(T x) {
        return static_cast<T>(erf_f32(static_cast<float>(x)));
    };
};

struct Exp {
    template <typename T>
    T operator()(T x) {
        return metal::precise::exp(x);
    };
};

struct Ln {
    template <typename T>
    T operator()(T x) {
        return metal::precise::log(x);
    };
};

struct Sigmoid {
    template <typename T>
    T operator()(T x) {
        auto y = 1 / (1 + metal::exp(-metal::abs(x)));
        return (x < 0) ? 1 - y : y;
    }
};

// Cosine of x
struct Cos {
    template <typename T>
    T operator()(T x) {
        return metal::cos(x);
    }
};

// Hyperbolic cosine of x
struct Cosh {
    template <typename T>
    T operator()(T x) {
        return metal::cosh(x);
    }
};

// Arc cosine of x
struct Acos {
    template <typename T>
    T operator()(T x) {
        return metal::acos(x);
    }
};

// Inverse hyperbolic cosine of x
struct Acosh {
    template <typename T>
    T operator()(T x) {
        return metal::acosh(x);
    }
};

// Sine of x
struct Sin {
    template <typename T>
    T operator()(T x) {
        return metal::sin(x);
    }
};

// Hyperbolic sine of x
struct Sinh {
    template <typename T>
    T operator()(T x) {
        return metal::sinh(x);
    }
};

// Arc sine of x
struct Asin {
    template <typename T>
    T operator()(T x) {
        return metal::asin(x);
    }
};

// Inverse hyperbolic sine of x
struct Asinh {
    template <typename T>
    T operator()(T x) {
        return metal::asinh(x);
    }
};

// Tangent of x
struct Tan {
    template <typename T>
    T operator()(T x) {
        return metal::tan(x);
    }
};

// Arc tangent of x
struct Atan {
    template <typename T>
    T operator()(T x) {
        return metal::precise::atan(x);
    }
};

// Inverse hyperbolic tangent of x
struct Atanh {
    template <typename T>
    T operator()(T x) {
        return metal::precise::atanh(x);
    }
};

// Hyperbolic tangent of x
struct Tanh {
    template <typename T>
    T operator()(T x) {
        // Use precise to avoid NaN for large value with fast implementation 
        return metal::precise::tanh(x);
    }
};

struct Square {
    template <typename T>
    T operator()(T x) {
        return metal::pow(x, static_cast<T>(2.0));
    }
};

struct Sqrt {
    template <typename T>
    T operator()(T x) {
        return metal::precise::sqrt(x);
    };
};

struct Rsqrt {
    template <typename T>
    T operator()(T x) {
        return metal::precise::rsqrt(x);
    };
};

struct Neg {
    template <typename T>
    T operator()(T x) {
        return -x;
    };
};

struct Sign {
    template <typename T>
    metal::enable_if_t<!metal::is_integral_v<T>, T>
    operator()(T x) {
        return (x > T(0)) ? T(1) : ((x < T(0)) ? T(-1) : T(0));
    }

    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T x) {
        return (x > T(0)) - (x < T(0));
    }
};

struct HardSwish {
    template <typename T>
    T operator()(T x) {
        return x * metal::max(T(0), metal::min(T(1), x / T(6) + T(0.5)));
    }
};

struct Silu {
    template <typename T>
    T operator()(T x) {
        return x / (T(1) + metal::exp(-x));
    }
};

struct BitNot {
    template <typename T>
    metal::enable_if_t<metal::is_integral_v<T>, T>
    operator()(T x) {
        return ~x;
    }

    bool operator()(bool x) {
        return !x;
    }
};

template<typename T, typename Op>
[[kernel]] void eval_out_of_place(device const T *input[  [buffer(0)]],
                                  device T *output [[buffer(1)]],
                                  uint tpig[[thread_position_in_grid]]) {
    output[tpig] = Op()(input[tpig]);
}

template<typename T, typename Op>
[[kernel]] void eval_in_place(device T *inout[  [buffer(0)]],
                              uint tpig[[thread_position_in_grid]]) {
    inout[tpig] = Op()(inout[tpig]);
}

#define INSTANTIATE_ELEMENT_WISE_OP(name, op, tname, type)            \
template [[host_name("element_wise_ops::" #name "_out_of_place_" #tname)]] [[kernel]] void eval_out_of_place<type, op>(                                   \
device const type *input [[buffer(0)]],                    \
device type *output [[buffer(1)]],                        \
uint tpig[[thread_position_in_grid]]                       \
);                                                             \
template [[host_name("element_wise_ops::" #name "_in_place_" #tname)]] [[kernel]] void eval_in_place<type, op>(                                  \
device type *inout [[buffer(0)]],                          \
uint tpig[[thread_position_in_grid]]                       \
);


#define INSTANTIATE_FLOAT(name, op)                      \
INSTANTIATE_ELEMENT_WISE_OP(name, op, f32,  float)       \
INSTANTIATE_ELEMENT_WISE_OP(name, op, f16, half)         \

#define INSTANTIATE_INTEGER_SIGNED(name, op)             \
INSTANTIATE_ELEMENT_WISE_OP(name, op, i8,  int8_t)       \
INSTANTIATE_ELEMENT_WISE_OP(name, op, i16, int16_t)      \
INSTANTIATE_ELEMENT_WISE_OP(name, op, i32, int32_t)      \
INSTANTIATE_ELEMENT_WISE_OP(name, op, i64, int64_t)

#define INSTANTIATE_INTEGER_UNSIGNED(name, op)                    \
INSTANTIATE_ELEMENT_WISE_OP(name, op, u8,  uint8_t)      \
INSTANTIATE_ELEMENT_WISE_OP(name, op, u16, uint16_t)     \
INSTANTIATE_ELEMENT_WISE_OP(name, op, u32, uint32_t)     \
INSTANTIATE_ELEMENT_WISE_OP(name, op, u64, uint64_t)     \

#define INSTANTIATE_INTEGER(name, op)                    \
INSTANTIATE_INTEGER_SIGNED(name, op)                     \
INSTANTIATE_INTEGER_UNSIGNED(name, op)                   \

#define INSTANTIATE_ALL_TYPES(name, op)                  \
INSTANTIATE_FLOAT(name, op)                              \
INSTANTIATE_INTEGER(name, op)

INSTANTIATE_ALL_TYPES(abs, Abs)
INSTANTIATE_FLOAT(exp, Exp)
INSTANTIATE_FLOAT(ln, Ln)
INSTANTIATE_FLOAT(sqrt, Sqrt)
INSTANTIATE_FLOAT(rsqrt, Rsqrt)
INSTANTIATE_FLOAT(sigmoid, Sigmoid)
INSTANTIATE_FLOAT(square, Square)
INSTANTIATE_FLOAT(recip, Recip)
INSTANTIATE_ALL_TYPES(ceil, Ceil)
INSTANTIATE_ALL_TYPES(floor, Floor)
INSTANTIATE_ALL_TYPES(round, Round)
INSTANTIATE_ALL_TYPES(roundhalftoeven, RoundHalfToEven)
INSTANTIATE_FLOAT(cos, Cos)
INSTANTIATE_FLOAT(acos, Acos)
INSTANTIATE_FLOAT(acosh, Acosh)
INSTANTIATE_FLOAT(cosh, Cosh)
INSTANTIATE_FLOAT(sin, Sin)
INSTANTIATE_FLOAT(asin, Asin)
INSTANTIATE_FLOAT(asinh, Asinh)
INSTANTIATE_FLOAT(sinh, Sinh)
INSTANTIATE_FLOAT(tan, Tan)
INSTANTIATE_FLOAT(atan, Atan)
INSTANTIATE_FLOAT(atanh, Atanh)
INSTANTIATE_FLOAT(tanh, Tanh)
INSTANTIATE_FLOAT(erf, Erf)
INSTANTIATE_FLOAT(neg, Neg)
INSTANTIATE_INTEGER_SIGNED(neg, Neg)
INSTANTIATE_FLOAT(sign, Sign)
INSTANTIATE_INTEGER_SIGNED(sign, Sign)
INSTANTIATE_FLOAT(hardswish, HardSwish)
INSTANTIATE_FLOAT(silu, Silu)
INSTANTIATE_INTEGER(bitnot, BitNot)
INSTANTIATE_ELEMENT_WISE_OP(bitnot, BitNot, bool, bool)


================================================
FILE: metal/src/kernels/element_wise.rs
================================================
use crate::encoder::EncoderExt;
use crate::{LibraryName, MetalStream};
use anyhow::ensure;
use metal::{MTLSize, NSUInteger};
use tract_core::internal::*;
use tract_core::ops::element_wise::ElementWiseMiniOp;
use tract_gpu::tensor::DeviceTensor;

const ALL_OP_NAMES: &[&str] = &[
    "abs",
    "exp",
    "ln",
    "sigmoid",
    "square",
    "sqrt",
    "rsqrt",
    "recip",
    "ceil",
    "floor",
    "round",
    "roundhalftoeven",
    "cos",
    "acos",
    "acosh",
    "cosh",
    "sin",
    "asin",
    "asinh",
    "sinh",
    "tan",
    "atan",
    "atanh",
    "tanh",
    "erf",
    "neg",
    "sign",
    "hardswish",
    "silu",
    "bitnot",
];

pub fn all_functions() -> Vec<String> {
    ALL_OP_NAMES
        .iter()
        .flat_map(|kname| {
            DeviceTensor::SUPPORTED_DT.into_iter().flat_map(move |dt| {
                let tname = DeviceTensor::tname(dt).ok()?;
                Some(format!("element_wise_ops::{kname}_out_of_place_{tname}"))
            })
        })
        .collect()
}

pub fn is_supported(mini_op: &dyn ElementWiseMiniOp, dt: DatumType) -> bool {
    let name = mini_op.name().to_lowercase();
    ALL_OP_NAMES.contains(&name.as_str())
        && if name == "bitnot" {
            dt.is_integer() || dt.is::<bool>()
        } else {
            matches!(dt, DatumType::F32 | DatumType::F16)
        }
}

pub fn dispatch_eval(
    stream: &MetalStream,
    mini_op: &dyn ElementWiseMiniOp,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    stream.retain_tensor(input);
    stream.retain_tensor(output);

    ensure!(output.shape() == input.shape() && output.datum_type() == input.datum_type());

    let op_name = mini_op.name().to_lowercase();
    let tname = DeviceTensor::tname(input.datum_type())?;
    let kernel_name = format!("element_wise_ops::{op_name}_out_of_place_{tname}");

    let pipeline = stream.load_pipeline(LibraryName::ElementWiseOps, &kernel_name)?;
    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
        encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);

        let grid_size = MTLSize { width: output.len() as NSUInteger, height: 1, depth: 1 };
        let group_size = MTLSize { width: 1, height: 1, depth: 1 };
        encoder.dispatch_thread_groups(grid_size, group_size);
    });
    Ok(())
}

pub fn metal_element_wise_dispatch(
    mini_op: &dyn ElementWiseMiniOp,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| dispatch_eval(stream, mini_op, input, output))
}

pub fn metal_element_wise_op(
    mini_op: Box<dyn ElementWiseMiniOp>,
) -> tract_gpu::ops::element_wise::GpuElementWise {
    tract_gpu::ops::element_wise::GpuElementWise::new(mini_op, "Metal", metal_element_wise_dispatch)
}

// Generic element-wise fallback — checked after LeakyRelu, GeluApproximate.
crate::register_metal_op!(tract_core::ops::element_wise::ElementWiseOp, |source, node, op| {
    rule_if!(is_supported(&*op.0, source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(metal_element_wise_op(op.0.clone()))))
});


================================================
FILE: metal/src/kernels/matmul/basic/basic_mat_mul.metal
================================================
#include <metal_stdlib>
#include <metal_simdgroup_matrix>  // Available from Metal version 2.3 released with OS X 11.0+

using namespace metal;

#define NUM_SIMDGROUP 32

#define INSTANTIATE_BASIC_MATMUL(tname, type)                    \
template [[host_name("matmul::basic_matvec_" #tname)]]           \
[[kernel]] void basic_matvec<type>(                              \
    device const type *lhs [[buffer(0)]],                        \
    device const type *rhs [[buffer(1)]],                        \
    device type *output [[buffer(2)]],                           \
    constant   int32_t & m,                                      \
    constant   int32_t & k,                                      \
    uint3 tgpig[[threadgroup_position_in_grid]],                 \
    uint  tiisg[[thread_index_in_simdgroup]],                    \
    uint  sgitg[[simdgroup_index_in_threadgroup]]                \
);                                                               \
template [[host_name("matmul::basic_matmul_" #tname)]]           \
[[kernel]] void basic_matmul<type>(                              \
    device const type *lhs [[buffer(0)]],                        \
    device const type *rhs [[buffer(1)]],                        \
    device type *output [[buffer(2)]],                           \
    constant   int32_t & m,                                      \
    constant   int32_t & k,                                      \
    constant   int32_t & n,                                      \
    constant   int32_t & transpose_lhs,                          \
    constant   int32_t & transpose_rhs,                          \
    uint3 tgpig[[threadgroup_position_in_grid]],                 \
    uint  tiisg[[thread_index_in_simdgroup]],                    \
    uint  sgitg[[simdgroup_index_in_threadgroup]]                \
);                                                           


template<typename T>  
[[kernel]]  void basic_matvec(device const T *lhs [[buffer(0)]],
                              device const T *rhs [[buffer(1)]],
                              device T *output [[buffer(2)]],
                              constant   int32_t & m,
                              constant   int32_t & k,
                              uint3 tgpig[[threadgroup_position_in_grid]],
                              uint  tiisg[[thread_index_in_simdgroup]],
                              uint  sgitg[[simdgroup_index_in_threadgroup]]
                              ) {
    
    const int32_t m_group_size = 4;
    const int32_t _batch_idx = tgpig.x;
    const int32_t m_group_start = tgpig.y*m_group_size;
    
    for (int m_group_idx = 0; m_group_idx < m_group_size; ++m_group_idx) {
        int m_idx = m_group_start + m_group_idx;
        if (m_idx >= m) {
            break;
        }
        device const T * lhs_m = (device const T *) (lhs + m_idx * k);
        T sumf = 0;
        // Accumulate per simd

        for (int i = tiisg; i < k; i += NUM_SIMDGROUP) {
            sumf +=  rhs[i] * lhs_m[i];
        }
        T all_sum = simd_sum(sumf);
        if (tiisg == 0) {
            output[m_idx] = all_sum;
        }
    }
}


template<typename T>  
[[kernel]]  void basic_matmul(device const T  *lhs [[buffer(0)]],
                              device const T *rhs [[buffer(1)]],
                              device T *output [[buffer(2)]],
                              constant   int32_t & m,
                              constant   int32_t & k,
                              constant   int32_t & n,
                              constant   int32_t & transpose_lhs,
                              constant   int32_t & transpose_rhs, 
                              uint3 tgpig[[threadgroup_position_in_grid]],
                              uint  tiisg[[thread_index_in_simdgroup]],
                              uint  sgitg[[simdgroup_index_in_threadgroup]]
                              ) {
    
    const int32_t group_size = 4;
    const int32_t n_group_start = tgpig.x * group_size;
    const int32_t m_group_start = tgpig.y * group_size;
    
    // [m_idx, n_idx] = m_idx * n + n_idx
    
    for (int m_group_idx = 0; m_group_idx < group_size; ++m_group_idx) {
        int m_idx = m_group_start + m_group_idx;
        if (m_idx >= m) {
            break;
        }
        for (int n_group_idx = 0; n_group_idx < group_size; ++n_group_idx) {
            int n_idx = n_group_start + n_group_idx;
            
            if (n_idx >= n) {
                break;
            }
            
            T sumf = 0;
            // Accumulate per simd
            if(transpose_lhs == 0 && transpose_rhs == 0) {
                for (int i = tiisg; i < k; i += NUM_SIMDGROUP) {
                    // lhs[m_idx, i] = m_idx * k + i
                    // rhs[i, n_idx] = i * n + n_idx
                    sumf += rhs[i * n + n_idx] * lhs[m_idx * k + i];
                }
            } else if(transpose_lhs == 0 && transpose_rhs != 0) {
                for (int i = tiisg; i < k; i += NUM_SIMDGROUP) {
                    // lhs[m_idx, i] = m_idx * k + i
                    // rhs[n_idx, i] = n_idx * k + i
                    sumf += rhs[n_idx * k + i] * lhs[m_idx * k + i];
                }
            } else if(transpose_lhs != 0 && transpose_rhs != 0) {
                for (int i = tiisg; i < k; i += NUM_SIMDGROUP) {
                    // lhs[i, m_idx] = i * m + m_idx
                    // rhs[n_idx, i] = n_idx * k + i
                    sumf += rhs[n_idx * k + i] * lhs[i * m + m_idx];
                }
            } else if(transpose_lhs != 0 && transpose_rhs == 0) {
                for (int i = tiisg; i < k; i += NUM_SIMDGROUP) {
                    // lhs[i, m_idx] = i * m + m_idx
                    // rhs[i, n_idx] = i * n + n_idx
                    sumf += rhs[i * n + n_idx] * lhs[i * m + m_idx];
                }
            }
            
            T all_sum = simd_sum(sumf);
            if (tiisg == 0) {
                output[m_idx * n + n_idx] = all_sum;
            }
        }
    }
}

INSTANTIATE_BASIC_MATMUL(f32, float)
INSTANTIATE_BASIC_MATMUL(f16, half)


================================================
FILE: metal/src/kernels/matmul/basic/mod.rs
================================================
use crate::kernels::matmul::{GemmDispatchParams, GemmKernel};
use crate::{LibraryName, MetalStream};
use anyhow::bail;
use derive_new::new;
use metal::{Buffer, MTLSize, NSUInteger};
use std::fmt;
use tract_core::internal::*;

#[derive(Debug, Clone, new, Default, PartialEq, Eq, Hash)]
pub struct BasicMatMul;

impl GemmKernel for BasicMatMul {
    fn name() -> &'static str {
        "basic"
    }

    fn dispatch_eval(
        &self,
        stream: &MetalStream,
        params: GemmDispatchParams,
        a_buffer: &Buffer,
        b_buffer: &Buffer,
        c_buffer: &Buffer,
    ) -> TractResult<()> {
        let GemmDispatchParams {
            dts,
            a_batch,
            m,
            k,
            n,
            transpose_a,
            a_offset,
            transpose_b,
            b_offset,
            c_offset,
            ..
        } = params;

        ensure!(
            Self::tname(dts[0]).is_ok(),
            "Unsupported datum type for Metal BasicMatmul {:?}",
            dts[0]
        );
        ensure!(
            dts[0] == dts[1] && dts[0] == dts[2],
            "Metal BasicMatmul only supports homogenous datum types. I: {:?}, {:?}. O: {:?}",
            dts[0],
            dts[1],
            dts[2]
        );

        let dt = dts[0];
        for b_idx in 0..a_batch {
            let a_offset = a_offset + b_idx * m * k * dt.size_of();
            let b_offset = b_offset + b_idx * n * k * dt.size_of();
            let c_offset = c_offset + b_idx * m * n * dt.size_of();
            if n == 1 && !transpose_a && !transpose_b {
                Self::metal_mat_vec(
                    stream, dt, m, k, a_buffer, a_offset, b_buffer, b_offset, c_buffer, c_offset,
                )?;
            } else {
                Self::metal_mat_mul(
                    stream,
                    dt,
                    m,
                    k,
                    n,
                    a_buffer,
                    a_offset,
                    transpose_a,
                    b_buffer,
                    b_offset,
                    transpose_b,
                    c_buffer,
                    c_offset,
                )?;
            }
        }
        Ok(())
    }
}

impl fmt::Display for BasicMatMul {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "BasicMatMul")
    }
}

impl BasicMatMul {
    pub fn tname(dt: DatumType) -> TractResult<&'static str> {
        let tname = match dt {
            DatumType::F32 => "f32",
            DatumType::F16 => "f16",
            _ => bail!("Unsupported dt {:?} for metal basic matmul", dt),
        };
        Ok(tname)
    }

    pub fn kernel_name(dt: DatumType, mat_vec: bool) -> TractResult<String> {
        let tname = Self::tname(dt)?;
        if mat_vec {
            Ok(format!("matmul::basic_matvec_{tname}"))
        } else {
            Ok(format!("matmul::basic_matmul_{tname}"))
        }
    }

    #[allow(clippy::too_many_arguments)]
    pub fn metal_mat_vec(
        stream: &MetalStream,
        dt: DatumType,
        m: usize,
        k: usize,
        lhs_buffer: &Buffer,
        lhs_offset: usize,
        rhs_buffer: &Buffer,
        rhs_offset: usize,
        output: &Buffer,
        output_offset: usize,
    ) -> TractResult<()> {
        let pipeline =
            stream.load_pipeline(LibraryName::BasicMatMul, &Self::kernel_name(dt, true)?)?;

        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_buffer(0, Some(lhs_buffer), lhs_offset as _);
            encoder.set_buffer(1, Some(rhs_buffer), rhs_offset as _);
            encoder.set_buffer(2, Some(output), output_offset as _);
            encoder.set_bytes(3, 4, &(m as i32) as *const i32 as *const _);
            encoder.set_bytes(4, 4, &(k as i32) as *const i32 as *const _);

            // m x k * k * 1
            let grid_size =
                MTLSize { width: 1, height: m.div_ceil(4) as NSUInteger, depth: 1 as NSUInteger };
            let group_size = MTLSize { width: 32, height: 1, depth: 1 };
            encoder.use_resource(lhs_buffer, metal::MTLResourceUsage::Read);
            encoder.use_resource(rhs_buffer, metal::MTLResourceUsage::Read);
            encoder.use_resource(output, metal::MTLResourceUsage::Write);
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }

    #[allow(clippy::too_many_arguments)]
    pub fn metal_mat_mul(
        stream: &MetalStream,
        dt: DatumType,
        m: usize,
        k: usize,
        n: usize,
        lhs_buffer: &Buffer,
        lhs_offset: usize,
        lhs_transpose: bool,
        rhs_buffer: &Buffer,
        rhs_offset: usize,
        rhs_transpose: bool,
        output: &Buffer,
        output_offset: usize,
    ) -> TractResult<()> {
        let pipeline =
            stream.load_pipeline(LibraryName::BasicMatMul, &Self::kernel_name(dt, false)?)?;

        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_buffer(0, Some(lhs_buffer), lhs_offset as _);
            encoder.set_buffer(1, Some(rhs_buffer), rhs_offset as _);
            encoder.set_buffer(2, Some(output), output_offset as _);
            encoder.set_bytes(3, 4, &(m as i32) as *const i32 as *const _);
            encoder.set_bytes(4, 4, &(k as i32) as *const i32 as *const _);
            encoder.set_bytes(5, 4, &(n as i32) as *const i32 as *const _);
            encoder.set_bytes(6, 4, &(lhs_transpose as i32) as *const i32 as *const _);
            encoder.set_bytes(7, 4, &(rhs_transpose as i32) as *const i32 as *const _);

            let grid_size = MTLSize {
                width: n.div_ceil(4) as NSUInteger,
                height: m.div_ceil(4) as NSUInteger,
                depth: 1 as NSUInteger,
            };
            let group_size: MTLSize = MTLSize { width: 32, height: 1, depth: 1 };
            encoder.use_resource(lhs_buffer, metal::MTLResourceUsage::Read);
            encoder.use_resource(rhs_buffer, metal::MTLResourceUsage::Read);
            encoder.use_resource(output, metal::MTLResourceUsage::Write);
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::kernels::matmul::tests::run_mmm_test_case;

    #[test]
    fn test_mat_vec() -> TractResult<()> {
        run_mmm_test_case::<BasicMatMul>(
            (1, 4, 4, 1),
            false,
            false,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (1, 1, 4, 4),
            false,
            false,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (1, 1, 15, 7),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        Ok(())
    }

    #[test]
    fn test_mat_mul() -> TractResult<()> {
        run_mmm_test_case::<BasicMatMul>(
            (1, 3, 5, 4),
            false,
            false,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (1, 2, 5, 10),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (1, 4, 4, 4),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (1, 4, 4, 200),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (1, 25, 1280, 32000),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;

        run_mmm_test_case::<BasicMatMul>(
            (10, 3, 5, 4),
            false,
            false,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (10, 2, 5, 10),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (10, 4, 4, 4),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (10, 4, 4, 200),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        run_mmm_test_case::<BasicMatMul>(
            (10, 25, 1280, 32000),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        Ok(())
    }
}


================================================
FILE: metal/src/kernels/matmul/ggml_gemm/README.md
================================================
### Kernel list

Outputs are always F32. Accumulation is F32. Weights are in first position.
- kernel_mul_mv: (f16_f16, f32_f32, f16_f32)
- kernel_mul_mv_1row (f16_f32)
- kernel_mul_mv_l4 (f16_f32)

- kernel_mul_mm (f32_f32, f16_f32)

### Tensor layout

Channel is a batch-like dimension. It is NOT as in the matmul operational convention (==m or k)

- A: [a_batch, a_channel, m, k]
- B: [b_batch, b_channel, n, k]
- Out: [b_batch, b_channel, n, m]

**We actually swap A and B when calling the kernel to have the untransposed output!**

### Kernel prototype

Matvec Kernel params. Matmul params are a subset of this struct
```
ggml_metal_kargs_mul_mv args = {
        /*.ne00 =*/ ne00, // Inner axis len of input A: k
        /*.ne01 =*/ ne01, // m
        /*.ne02 =*/ ne02, // a_channel
        /*.nb00 =*/ nb00, // Inner stride of input A
        /*.nb01 =*/ nb01, // k
        /*.nb02 =*/ nb02,
        /*.nb03 =*/ nb03,
        /*.ne10 =*/ ne10, // Inner axis len of input B: k
        /*.ne11 =*/ ne11, // n
        /*.ne12 =*/ ne12, //b_channel
        /*.nb10 =*/ nb10, // Inner stride of input B
        /*.nb11 =*/ nb11,
        /*.nb12 =*/ nb12,
        /*.nb13 =*/ nb13,
        /*.ne0  =*/ ne0, // Inner axis len of Output: m
        /*.ne1  =*/ ne1, // n
        /*.r2   =*/ r2, // channel_broadcast_ratio
        /*.r3   =*/ r3, // batch_broadcast_ratio
    };
```

r2 and r3 are currently always 1!
These are for multiplying multiple Bs with a single A.
Kernel should support it if frame tries to call them with values != 1.

```
template<typename T0, typename T04, typename T1, typename T14>
kernel void kernel_mul_mv(
        constant ggml_metal_kargs_mul_mv & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiisg[[thread_index_in_simdgroup]])
```

```
template<typename T, typename T4x4, typename simdgroup_T8x8, typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread T4x4 &)>
kernel void kernel_mul_mm(
        constant ggml_metal_kargs_mul_mm & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        threadgroup  char * shmem [[threadgroup(0)]],
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiitg[[thread_index_in_threadgroup]],
        ushort sgitg[[simdgroup_index_in_threadgroup]])
```


================================================
FILE: metal/src/kernels/matmul/ggml_gemm/ggml_mm_mv.metal
================================================
#include <metal_simdgroup>
#include <metal_simdgroup_matrix>
#include <metal_stdlib>

using namespace metal;

#define N_SIMDWIDTH 32 // assuming SIMD group size is 32

#define QK4_0 32
typedef struct {
    half d;           // delta
    uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;

typedef struct {
    int32_t  ne00;
    int32_t  ne02;
    uint64_t nb01;
    uint64_t nb02;
    uint64_t nb03;
    int32_t  ne12;
    uint64_t nb10;
    uint64_t nb11;
    uint64_t nb12;
    uint64_t nb13;
    int32_t  ne0;
    int32_t  ne1;
    int16_t  r2;
    int16_t  r3;
} ggml_metal_kargs_mul_mm;

typedef struct {
    int32_t  ne00;
    int32_t  ne01;
    int32_t  ne02;
    uint64_t nb00;
    uint64_t nb01;
    uint64_t nb02;
    uint64_t nb03;
    int32_t  ne10;
    int32_t  ne11;
    int32_t  ne12;
    uint64_t nb10;
    uint64_t nb11;
    uint64_t nb12;
    uint64_t nb13;
    int32_t  ne0;
    int32_t  ne1;
    int16_t  r2;
    int16_t  r3;
} ggml_metal_kargs_mul_mv;

#define N_MV_T_T 4

template<typename T0, typename T04, typename T1, typename T14, typename args_t>
void kernel_mul_mv_impl(
        args_t args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        uint3  tgpig,
        ushort tiisg) {
    const int r0 = tgpig.x;
    const int rb = tgpig.y*N_MV_T_T;
    const int im = tgpig.z;

    const uint i12 = im%args.ne12;
    const uint i13 = im/args.ne12;

    const uint64_t offset0 = r0*args.nb01 + (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03;

    device const T0 * x = (device const T0 *) (src0 + offset0);

    device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1;

    if (args.ne00 < 128) {
        for (int row = 0; row < N_MV_T_T; ++row) {
            int r1 = rb + row;
            if (r1 >= args.ne11) {
                break;
            }

            const uint64_t offset1 = r1*args.nb11 + (i12   )*args.nb12 + (i13   )*args.nb13;

            device const T1 * y = (device const T1 *) (src1 + offset1);

            float sumf = 0;
            for (int i = tiisg; i < args.ne00; i += 32) {
                sumf += (T0) x[i] * (T1) y[i];
            }

            float all_sum = simd_sum(sumf);
            if (tiisg == 0) {
                dst_f32[(uint64_t)r1*args.ne0 + r0] = all_sum;
            }
        }
    } else {
        device const T04 * x4 = (device const T04 *) x;
        for (int row = 0; row < N_MV_T_T; ++row) {
            int r1 = rb + row;
            if (r1 >= args.ne11) {
                break;
            }

            const uint64_t offset1 = r1*args.nb11 + (i12   )*args.nb12 + (i13   )*args.nb13;

            device const T1  * y  = (device const T1  *) (src1 + offset1);
            device const T14 * y4 = (device const T14 *) y;

            float sumf = 0;
            for (int i = tiisg; i < args.ne00/4; i += 32) {
                sumf += dot((float4) x4[i], (float4) y4[i]);
            }

            float all_sum = simd_sum(sumf);
            if (tiisg == 0) {
                for (int i = 4*(args.ne00/4); i < args.ne00; ++i) all_sum += (float) (x[i] * y[i]);
                dst_f32[(uint64_t)r1*args.ne0 + r0] = all_sum;
            }
        }
    }
}

template<typename T0, typename T04, typename T1, typename T14>
kernel void kernel_mul_mv(
        constant ggml_metal_kargs_mul_mv & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiisg[[thread_index_in_simdgroup]]) {
    kernel_mul_mv_impl<T0, T04, T1, T14, constant ggml_metal_kargs_mul_mv &>(
        args,
        src0,
        src1,
        dst,
        tgpig,
        tiisg);
}

typedef decltype(kernel_mul_mv<half, half4, half, half4>) mul_mv_t;

template [[host_name("kernel_mul_mv_f32_f32")]]   kernel mul_mv_t kernel_mul_mv<float,  float4,  float,  float4>;
template [[host_name("kernel_mul_mv_f16_f32")]]   kernel mul_mv_t kernel_mul_mv<half,   half4,   float,  float4>;
template [[host_name("kernel_mul_mv_f16_f16")]]   kernel mul_mv_t kernel_mul_mv<half,   half4,   half,   half4>;

template<typename T, typename T4>
kernel void kernel_mul_mv_1row(
        constant ggml_metal_kargs_mul_mv & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiisg[[thread_index_in_simdgroup]]) {

    const int r0 = tgpig.x;
    const int r1 = tgpig.y;
    const int im = tgpig.z;

    const uint i12 = im%args.ne12;
    const uint i13 = im/args.ne12;

    const uint64_t offset0 = r0*args.nb01 + (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03;
    const uint64_t offset1 = r1*args.nb11 + (i12        )*args.nb12 + (i13        )*args.nb13;

    device const T     * x = (device const T     *) (src0 + offset0);
    device const float * y = (device const float *) (src1 + offset1);

    device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;

    float sumf = 0;
    if (args.ne00 < 128) {
        for (int i = tiisg; i < args.ne00; i += 32) {
            sumf += (float) x[i] * (float) y[i];
        }
        float all_sum = simd_sum(sumf);
        if (tiisg == 0) {
            dst_f32[r0] = all_sum;
        }
    } else {
        device const T4     * x4 = (device const T4     *) x;
        device const float4 * y4 = (device const float4 *) y;

        for (int i = tiisg; i < args.ne00/4; i += 32) {
            sumf += dot((float4) x4[i], y4[i]);
        }

        float all_sum = simd_sum(sumf);

        if (tiisg == 0) {
            for (int i = 4*(args.ne00/4); i < args.ne00; ++i) all_sum += (float) (x[i] * y[i]);
            dst_f32[r0] = all_sum;
        }
    }
}

typedef decltype(kernel_mul_mv_1row<half, half4>) mul_mv_1row_t;

template [[host_name("kernel_mul_mv_f16_f32_1row")]]  kernel mul_mv_1row_t kernel_mul_mv_1row<half,   half4>;

// Assumes row size (ne00) is a multiple of 4
template<typename T, typename T4>
kernel void kernel_mul_mv_l4(
        constant ggml_metal_kargs_mul_mv & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiisg[[thread_index_in_simdgroup]]) {

    const int nrows = args.ne11;
    const int r0 = tgpig.x;
    const int im = tgpig.z;

    const uint i12 = im%args.ne12;
    const uint i13 = im/args.ne12;

    const uint64_t offset0 = r0*args.nb01 + (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03;

    device const T4 * x4 = (device const T4 *) (src0 + offset0);

    device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1;

    for (int r1 = 0; r1 < nrows; ++r1) {
        const uint64_t offset1 = r1*args.nb11 + (i12   )*args.nb12 + (i13   )*args.nb13;

        device const float4 * y4 = (device const float4 *) (src1 + offset1);

        float sumf = 0;
        for (int i = tiisg; i < args.ne00/4; i += 32) {
            sumf += dot((float4) x4[i], y4[i]);
        }

        float all_sum = simd_sum(sumf);
        if (tiisg == 0) {
            dst_f32[(uint64_t)r1*args.ne0 + r0] = all_sum;
        }
    }
}

typedef decltype(kernel_mul_mv_l4<half, half4>) mul_mv_l4_t;

template [[host_name("kernel_mul_mv_f16_f32_l4")]]  kernel mul_mv_l4_t kernel_mul_mv_l4<half, half4>;

// function for calculate inner product between half a q4_0 block and 16 floats (yl), sumy is SUM(yl[i])
// il indicates where the q4 quants begin (0 or QK4_0/4)
// we assume that the yl's have been multiplied with the appropriate scale factor
// that corresponds to the missing bit shifts (1, 1/16, 1/256, 1/4096)
inline float block_q_n_dot_y(device const block_q4_0 * qb_curr, float sumy, thread float * yl, int il) {
    float d = qb_curr->d;

    float acc[4] = { 0.0f, 0.0f, 0.0f, 0.0f };

    device const uint16_t * qs = ((device const uint16_t *) qb_curr + 1 + il/2);

    for (int i = 0; i < 8; i += 2) {
        acc[0] += yl[i + 0] * (qs[i / 2] & 0x000F);
        acc[1] += yl[i + 1] * (qs[i / 2] & 0x0F00);
        acc[2] += yl[i + 8] * (qs[i / 2] & 0x00F0);
        acc[3] += yl[i + 9] * (qs[i / 2] & 0xF000);
    }

    return d * (sumy * -8.f + acc[0] + acc[1] + acc[2] + acc[3]);
}

// putting them in the kernel cause a significant performance penalty
#define N_DST 4        // each SIMD group works on 4 rows
#define N_SIMDGROUP 2  // number of SIMD groups in a thread group
//Note: This is a template, but strictly speaking it only applies to
//      quantizations where the block size is 32. It also does not
//      guard against the number of rows not being divisible by
//      N_DST, so this is another explicit assumption of the implementation.
template<typename block_q_type, int nr, int nsg, int nw, typename args_t>
void mul_vec_q_n_f32_impl(
        args_t args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        threadgroup  char * shmem,
        uint3  tgpig,
        ushort tiisg,
        ushort sgitg) {
    const int nb = args.ne00/QK4_0;

    const int r0 = tgpig.x;
    const int r1 = tgpig.y;
    const int im = tgpig.z;

    const int first_row = (r0 * nsg + sgitg) * nr;

    const uint i12 = im%args.ne12;
    const uint i13 = im/args.ne12;

  //const uint64_t offset0 = first_row*args.nb01 + (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03;
    const uint64_t offset1 =        r1*args.nb11 + (i12        )*args.nb12 + (i13        )*args.nb13;

  //device const block_q_type * x = (device const block_q_type *) (src0 + offset0);
    device const float        * y = (device const float        *) (src1 + offset1);

    // pointers to src0 rows
    device const block_q_type * ax[nr];
    for (int row = 0; row < nr; ++row) {
        const uint64_t offset0 = (first_row + row)*args.nb01 + (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03;

        ax[row] = (device const block_q_type *) ((device char *) src0 + offset0);
    }

    float yl[16]; // src1 vector cache
    float sumf[nr] = {0.f};

    const short ix = (tiisg/2);
    const short il = (tiisg%2)*8;

    device const float * yb = y + ix*QK4_0 + il;

    // each thread in a SIMD group deals with half a block.
    for (int ib = ix; ib < nb; ib += nw/2) {
        float sumy[2] = { 0.f, 0.f };

#pragma unroll
        for (int i = 0; i < 8; i += 2) {
            sumy[0]  += yb[i +  0] + yb[i +  1];
            yl[i + 0] = yb[i +  0];
            yl[i + 1] = yb[i +  1]/256.f;

            sumy[1]  += yb[i + 16] + yb[i + 17];
            yl[i + 8] = yb[i + 16]/16.f;
            yl[i + 9] = yb[i + 17]/4096.f;
        }

#pragma unroll
        for (int row = 0; row < nr; row++) {
            sumf[row] += block_q_n_dot_y(ax[row] + ib, sumy[0] + sumy[1], yl, il);
        }

        yb += QK4_0 * 16;
    }

    device float * dst_f32 = (device float *) dst + im*args.ne0*args.ne1 + r1*args.ne0;

    for (int row = 0; row < nr; ++row) {
        const float tot = simd_sum(sumf[row]);

        if (tiisg == 0 && first_row + row < args.ne01) {
            dst_f32[first_row + row] = tot;
        }
    }
}

kernel void kernel_mul_mv_q4_0_f32(
        constant ggml_metal_kargs_mul_mv & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiisg[[thread_index_in_simdgroup]],
        ushort sgitg[[simdgroup_index_in_threadgroup]]) {
    mul_vec_q_n_f32_impl<block_q4_0, N_DST, N_SIMDGROUP, N_SIMDWIDTH, constant ggml_metal_kargs_mul_mv &>(args, src0, src1, dst, nullptr, tgpig, tiisg, sgitg);
}

#define BLOCK_SIZE_M 64 // 8 simdgroup matrices from matrix A
#define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix B
#define BLOCK_SIZE_K 32
#define THREAD_MAT_M 4 // each thread take 4 simdgroup matrices from matrix A
#define THREAD_MAT_N 2 // each thread take 2 simdgroup matrices from matrix B
#define THREAD_PER_BLOCK 128
#define THREAD_PER_ROW 2 // 2 thread for each row in matrix A to load numbers
#define THREAD_PER_COL 4 // 4 thread for each row in matrix B to load numbers
#define SG_MAT_SIZE 64 // simdgroup matrix is of shape 8x8
#define SG_MAT_ROW 8

// each block_q contains 16*nl weights
template<typename T, typename T4x4, typename simdgroup_T8x8, typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread T4x4 &)>
kernel void kernel_mul_mm(
        constant ggml_metal_kargs_mul_mm & args,
        device const char * src0,
        device const char * src1,
        device       char * dst,
        threadgroup  char * shmem [[threadgroup(0)]],
        uint3  tgpig[[threadgroup_position_in_grid]],
        ushort tiitg[[thread_index_in_threadgroup]],
        ushort sgitg[[simdgroup_index_in_threadgroup]]) {

    threadgroup T     * sa = (threadgroup T     *)(shmem);
    threadgroup float * sb = (threadgroup float *)(shmem + 4096);

    const int r0 = tgpig.y;
    const int r1 = tgpig.x;
    const int im = tgpig.z;

    // if this block is of 64x32 shape or smaller
    const short n_rows = (args.ne0 - r0*BLOCK_SIZE_M < BLOCK_SIZE_M) ? (args.ne0 - r0*BLOCK_SIZE_M) : BLOCK_SIZE_M;
    const short n_cols = (args.ne1 - r1*BLOCK_SIZE_N < BLOCK_SIZE_N) ? (args.ne1 - r1*BLOCK_SIZE_N) : BLOCK_SIZE_N;

    // a thread shouldn't load data outside of the matrix
    const short thread_row = ((short)tiitg/THREAD_PER_ROW) < n_rows ? ((short)tiitg/THREAD_PER_ROW) : n_rows - 1;
    const short thread_col = ((short)tiitg/THREAD_PER_COL) < n_cols ? ((short)tiitg/THREAD_PER_COL) : n_cols - 1;

    simdgroup_T8x8     ma[4];
    simdgroup_float8x8 mb[2];
    simdgroup_float8x8 mc[8];

    for (short i = 0; i < 8; i++){
        mc[i] = make_filled_simdgroup_matrix<float, 8>(0.f);
    }

    short il = (tiitg % THREAD_PER_ROW);

    const int i12 = im%args.ne12;
    const int i13 = im/args.ne12;

    const uint64_t offset0 = (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03;
    const short    offset1 = il/nl;

    device const block_q * x = (device const block_q *)(src0
        + args.nb01*(r0*BLOCK_SIZE_M + thread_row) + offset0) + offset1;

    device const float   * y = (device const float   *)(src1
        + args.nb13*i13
        + args.nb12*i12
        + args.nb11*(r1*BLOCK_SIZE_N + thread_col)
        + args.nb10*(BLOCK_SIZE_K / THREAD_PER_COL * (tiitg % THREAD_PER_COL)));

    for (int loop_k = 0; loop_k < args.ne00; loop_k += BLOCK_SIZE_K) {
        // load data and store to threadgroup memory
        T4x4 temp_a;
        dequantize_func(x, il, temp_a);

        threadgroup_barrier(mem_flags::mem_threadgroup);

        #pragma unroll(16)
        for (short i = 0; i < 16; i++) {
            *(sa + SG_MAT_SIZE * ((tiitg/THREAD_PER_ROW/8) \
            +                     (tiitg%THREAD_PER_ROW)*16 + (i/8)*8) \
            +                     (tiitg/THREAD_PER_ROW)%8  + (i&7)*8) = temp_a[i/4][i%4];
        }

        *(threadgroup float2x4 *)(sb + 32*8*(tiitg%THREAD_PER_COL) + 8*(tiitg/THREAD_PER_COL)) = *((device float2x4 *) y);

        il = (il + 2 < nl) ? il + 2 : il % 2;
        x  = (il < 2) ? x + (2 + nl - 1)/nl : x;
        y += BLOCK_SIZE_K;

        threadgroup_barrier(mem_flags::mem_threadgroup);

        // load matrices from threadgroup memory and conduct outer products
        threadgroup const T     * lsma = (sa + THREAD_MAT_M*SG_MAT_SIZE*(sgitg%2));
        threadgroup const float * lsmb = (sb + THREAD_MAT_N*SG_MAT_SIZE*(sgitg/2));

        #pragma unroll(4)
        for (short ik = 0; ik < BLOCK_SIZE_K/8; ik++) {
            #pragma unroll(4)
            for (short i = 0; i < 4; i++) {
                simdgroup_load(ma[i], lsma + SG_MAT_SIZE * i);
            }

            simdgroup_barrier(mem_flags::mem_none);

            #pragma unroll(2)
            for (short i = 0; i < 2; i++) {
                simdgroup_load(mb[i], lsmb + SG_MAT_SIZE * i);
            }

            #pragma unroll(8)
            for (short i = 0; i < 8; i++){
                simdgroup_multiply_accumulate(mc[i], mb[i/4], ma[i%4], mc[i]);
            }

            lsma += (BLOCK_SIZE_M/SG_MAT_ROW)*SG_MAT_SIZE;
            lsmb += (BLOCK_SIZE_N/SG_MAT_ROW)*SG_MAT_SIZE;
        }
    }

    if ((r0 + 1) * BLOCK_SIZE_M <= args.ne0 && (r1 + 1) * BLOCK_SIZE_N <= args.ne1) {
        device float * C = (device float *) dst +
            (BLOCK_SIZE_M * r0 + 32*(sgitg &  1)) + \
            (BLOCK_SIZE_N * r1 + 16*(sgitg >> 1)) * args.ne0 + im*args.ne1*args.ne0;

        for (short i = 0; i < 8; i++) {
            simdgroup_store(mc[i], C + 8 * (i%4) + 8 * args.ne0 * (i/4), args.ne0);
        }
    } else {
        // block is smaller than 64x32, we should avoid writing data outside of the matrix
        threadgroup_barrier(mem_flags::mem_threadgroup);
        threadgroup float * temp_str = ((threadgroup float *) shmem) \
                                     + 32*(sgitg&1) + (16*(sgitg >> 1))*BLOCK_SIZE_M;
        for (short i = 0; i < 8; i++) {
            simdgroup_store(mc[i], temp_str + 8*(i%4) + 8*BLOCK_SIZE_M*(i/4), BLOCK_SIZE_M);
        }

        threadgroup_barrier(mem_flags::mem_threadgroup);

        if (sgitg == 0) {
            for (int j = tiitg; j < n_cols; j += BLOCK_SIZE_N) {
                device float  * D  = (device float  *) dst + (r0*BLOCK_SIZE_M) + (r1*BLOCK_SIZE_N + j)*args.ne0 + im*args.ne1*args.ne0;
                device float4 * D4 = (device float4 *) D;

                threadgroup float  * C  = temp_str + (j*BLOCK_SIZE_M);
                threadgroup float4 * C4 = (threadgroup float4 *) C;

                int i = 0;
                for (; i < n_rows/4; i++) {
                    *(D4 + i) = *(C4 + i);
                }

                i *= 4;
                for (; i < n_rows; i++) {
                    *(D + i) = *(C + i);
                }
            }
        }
    }
}

template <typename type4x4>
void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) {
    reg = (type4x4)(*src);
}

// NOTE: this is not dequantizing - we are simply fitting the template
template <typename type4x4>
void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) {
    reg = (type4x4)(*src);
}

template <typename type4x4>
void dequantize_q4_0(device const block_q4_0 * xb, short il, thread type4x4 & reg) {
    device const uint16_t * qs = ((device const uint16_t *)xb + 1);
    const float d1 = il ? (xb->d / 16.h) : xb->d;
    const float d2 = d1 / 256.f;
    const float md = -8.h * xb->d;
    const ushort mask0 = il ? 0x00F0 : 0x000F;
    const ushort mask1 = mask0 << 8;

    float4x4 reg_f;

    for (int i = 0; i < 8; i++) {
        reg_f[i/2][2*(i%2) + 0] = d1 * (qs[i] & mask0) + md;
        reg_f[i/2][2*(i%2) + 1] = d2 * (qs[i] & mask1) + md;
    }

    reg = (type4x4) reg_f;
}

typedef decltype(kernel_mul_mm<half, half4x4, simdgroup_half8x8, float4x4, 1, dequantize_f32>) mat_mm_t;

template [[host_name("kernel_mul_mm_f32_f32")]]     kernel mat_mm_t kernel_mul_mm<half,   half4x4,   simdgroup_half8x8,   float4x4,      1,     dequantize_f32>;
template [[host_name("kernel_mul_mm_f16_f32")]]     kernel mat_mm_t kernel_mul_mm<half,   half4x4,   simdgroup_half8x8,   half4x4,       1,     dequantize_f16>;
template [[host_name("kernel_mul_mm_q4_0_f32")]]    kernel mat_mm_t kernel_mul_mm<half,   half4x4,   simdgroup_half8x8,   block_q4_0,    2,     dequantize_q4_0>;

================================================
FILE: metal/src/kernels/matmul/ggml_gemm/mod.rs
================================================
use crate::kernels::matmul::{GemmDispatchParams, GemmKernel};
use crate::{LibraryName, MetalStream};
use DatumType::{F16, F32};
use anyhow::ensure;
use metal::{Buffer, MTLSize, NSUInteger};
use std::fmt;
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::{BlockQuant, Q4_0};
use tract_gpu::tensor::DeviceTensor;
use tract_gpu::utils::as_quant_fact;

#[derive(Debug)]
#[repr(C)]
struct GgmlGemmParams {
    ne00: i32,
    ne02: i32,
    nb01: u64,
    nb02: u64,
    nb03: u64,
    ne12: i32,
    nb10: u64,
    nb11: u64,
    nb12: u64,
    nb13: u64,
    ne0: i32,
    ne1: i32,
    r2: i16,
    r3: i16,
}

impl From<GemmDispatchParams> for GgmlGemmParams {
    fn from(params: GemmDispatchParams) -> Self {
        assert!(params.a_strides.len() == 3 && params.b_strides.len() == 3);
        let a_el_size = params.dts[0].size_of();

        let b_el_size = if params.q40_b { Q4_0.block_bytes() } else { params.dts[1].size_of() };
        let mut b_strides = params.b_strides;
        if params.q40_b {
            b_strides[0] /= Q4_0.block_len() as isize;
            b_strides[1] /= Q4_0.block_len() as isize;
        };

        // Kernel produced transposed output so we swap the inputs
        GgmlGemmParams {
            ne00: params.k as i32,
            ne02: params.b_batch as i32,
            nb01: (b_strides[1] as usize * b_el_size) as u64,
            nb02: (b_strides[0] as usize * b_el_size) as u64,
            nb03: (b_strides[0] as usize * params.b_batch * b_el_size) as u64,
            ne12: params.a_batch as i32,
            nb10: (params.a_strides[2] as usize * a_el_size) as u64,
            nb11: (params.a_strides[1] as usize * a_el_size) as u64,
            nb12: (params.a_strides[0] as usize * a_el_size) as u64,
            nb13: (params.a_strides[0] as usize * params.a_batch * a_el_size) as u64,
            ne0: params.n as i32,
            ne1: params.m as i32,
            r2: (params.a_batch / params.b_batch) as i16,
            r3: 1,
        }
    }
}

#[derive(Debug)]
#[repr(C)]
struct GgmlGemvParams {
    ne00: i32,
    ne01: i32,
    ne02: i32,
    nb00: u64,
    nb01: u64,
    nb02: u64,
    nb03: u64,
    ne10: i32,
    ne11: i32,
    ne12: i32,
    nb10: u64,
    nb11: u64,
    nb12: u64,
    nb13: u64,
    ne0: i32,
    ne1: i32,
    r2: i16,
    r3: i16,
}

impl From<GemmDispatchParams> for GgmlGemvParams {
    fn from(params: GemmDispatchParams) -> Self {
        assert!(params.a_strides.len() == 3 && params.b_strides.len() == 3);
        let a_el_size = params.dts[0].size_of();

        let b_el_size = if params.q40_b { Q4_0.block_bytes() } else { params.dts[1].size_of() };
        let mut b_strides = params.b_strides;
        if params.q40_b {
            b_strides[0] /= Q4_0.block_len() as isize;
            b_strides[1] /= Q4_0.block_len() as isize;
        };

        // Kernel produced transposed output so we swap the inputs
        GgmlGemvParams {
            ne00: params.k as i32,
            ne01: params.n as i32,
            ne02: params.b_batch as i32,
            nb00: (b_strides[2] as usize * b_el_size) as u64,
            nb01: (b_strides[1] as usize * b_el_size) as u64,
            nb02: (b_strides[0] as usize * b_el_size) as u64,
            nb03: (b_strides[0] as usize * params.b_batch * b_el_size) as u64,
            ne10: params.k as i32,
            ne11: params.m as i32,
            ne12: params.a_batch as i32,
            nb10: (params.a_strides[2] as usize * a_el_size) as u64,
            nb11: (params.a_strides[1] as usize * a_el_size) as u64,
            nb12: (params.a_strides[0] as usize * a_el_size) as u64,
            nb13: (params.a_strides[0] as usize * params.a_batch * a_el_size) as u64,
            ne0: params.n as i32,
            ne1: params.m as i32,
            r2: (params.a_batch / params.b_batch) as i16,
            r3: 1,
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct GgmlGemm;

impl fmt::Display for GgmlGemm {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "GgmlGemm")
    }
}

impl GemmKernel for GgmlGemm {
    fn name() -> &'static str {
        "ggml"
    }

    fn supports_broadcast() -> bool {
        true
    }

    fn is_supported_dts(&self, facts: &[TypedFact]) -> bool {
        assert!(facts.len() == 2, "Ggml: Expected 2 inputs for Matmul");

        let regular_types_support = facts.iter().all(|f| f.is_plain())
            && matches!(
                (facts[0].datum_type, facts[1].datum_type),
                (F32, F32) | (F16, F16) | (F32, F16)
            );

        regular_types_support
            || (as_quant_fact(&facts[1], &Q4_0).is_some()
                && facts[0].is_plain()
                && matches!(facts[0].datum_type, F16 | F32))
    }

    fn output_dt(&self, _a_dt: DatumType, _b_dt: DatumType) -> TractResult<DatumType> {
        Ok(F32)
    }

    fn dispatch_eval(
        &self,
        stream: &MetalStream,
        params: GemmDispatchParams,
        a_buffer: &Buffer,
        b_buffer: &Buffer,
        c_buffer: &Buffer,
    ) -> TractResult<()> {
        let GemmDispatchParams {
            dts,
            a_batch,
            m,
            k,
            transpose_a,
            a_offset,
            transpose_b,
            b_offset,
            c_offset,
            q40_b,
            ..
        } = params;

        ensure!(!transpose_a && transpose_b);

        if (dts[0] == F32) && (k % 32 == 0) && (k >= 64) && ((m > 4) || (q40_b && a_batch > 1)) {
            dispatch_metal_ggml_gemm(
                stream, params, a_offset, a_buffer, b_offset, b_buffer, c_buffer, c_offset,
            )?;
        } else {
            dispatch_metal_ggml_gemv(
                stream, params, a_offset, a_buffer, b_offset, b_buffer, c_buffer, c_offset,
            )?;
        }

        Ok(())
    }
}

fn mv_kernel_name_and_dispatch_params(
    params: &GemmDispatchParams,
) -> TractResult<(String, (u64, u64, u64))> {
    if params.q40_b {
        ensure!(params.dts[0] == F32);
        Ok(("kernel_mul_mv_q4_0_f32".to_string(), (8, 8, 1)))
    } else if params.dts[1] == F32 {
        ensure!(params.dts[0] == F32);
        Ok(("kernel_mul_mv_f32_f32".to_string(), (32, 1, 4)))
    } else if params.dts[1] == F16 {
        if params.dts[0] == F32 {
            if (params.m * params.a_batch) < 4 {
                Ok(("kernel_mul_mv_f16_f32_1row".to_string(), (32, 1, 1)))
            } else if (params.k >= 128) && (params.k % 4 == 0) && (params.n >= 8) {
                Ok(("kernel_mul_mv_f16_f32_l4".to_string(), (32, 1, params.m as u64)))
            } else {
                Ok(("kernel_mul_mv_f16_f32".to_string(), (32, 1, 4)))
            }
        } else {
            // Never used in practice since we upcast input[0] to f32
            ensure!(params.dts[0] == F16);
            Ok(("kernel_mul_mv_f16_f16".to_string(), (32, 1, 4)))
        }
    } else {
        bail!("Unsupported dtype combination for GGML gemv: dts={:?}", params.dts);
    }
}

#[allow(clippy::too_many_arguments)]
fn dispatch_metal_ggml_gemv(
    stream: &MetalStream,
    params: GemmDispatchParams,
    a_offset: usize,
    a_buffer: &Buffer,
    b_offset: usize,
    b_buffer: &Buffer,
    output: &Buffer,
    output_offset: usize,
) -> TractResult<()> {
    let (name, (nth0, nth1, nrows)) = mv_kernel_name_and_dispatch_params(&params)?;
    //dbg!(&name);
    let pipeline = stream.load_pipeline(LibraryName::Ggml, &name)?;

    let ggml_params: GgmlGemvParams = params.clone().into();
    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        encoder.set_bytes(
            0,
            std::mem::size_of::<GgmlGemvParams>() as u64,
            &ggml_params as *const _ as *const _,
        );

        // Kernel produced transposed output so we swap the inputs
        encoder.set_buffer(1, Some(b_buffer), b_offset as NSUInteger);
        encoder.set_buffer(2, Some(a_buffer), a_offset as NSUInteger);
        encoder.set_buffer(3, Some(output), output_offset as NSUInteger);

        let grid_size = if !params.q40_b {
            MTLSize {
                width: params.n as u64,
                height: (params.m as u64).div_ceil(nrows),
                depth: /* batch_size_out */ params.a_batch as u64,
            }
        } else {
            MTLSize {
                width: (params.n as u64).div_ceil(8),
                height: params.m as u64,
                depth: /* batch_size_out */ params.a_batch as u64,
            }
        };
        let group_size = MTLSize { width: nth0, height: nth1, depth: 1 };

        encoder.dispatch_thread_groups(grid_size, group_size);
    });

    Ok(())
}

#[allow(clippy::too_many_arguments)]
fn dispatch_metal_ggml_gemm(
    stream: &MetalStream,
    params: GemmDispatchParams,
    a_offset: usize,
    a_buffer: &Buffer,
    b_offset: usize,
    b_buffer: &Buffer,
    output: &Buffer,
    output_offset: usize,
) -> TractResult<()> {
    let GemmDispatchParams { dts, q40_b, .. } = params;

    ensure!((matches!(dts[1], F32 | F16) || q40_b) && dts[0] == F32);

    let i1_tname = if !q40_b { DeviceTensor::tname(dts[1])? } else { "q4_0" };
    let i2_tname = DeviceTensor::tname(dts[0])?;

    let name = format!("kernel_mul_mm_{i1_tname}_{i2_tname}");
    //dbg!(&name);
    let pipeline = stream.load_pipeline(LibraryName::Ggml, &name)?;

    let ggml_params: GgmlGemmParams = params.clone().into();
    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        encoder.set_bytes(
            0,
            std::mem::size_of::<GgmlGemmParams>() as u64,
            &ggml_params as *const _ as *const _,
        );

        // Kernel produced transposed output so we swap the inputs
        encoder.set_buffer(1, Some(b_buffer), b_offset as NSUInteger);
        encoder.set_buffer(2, Some(a_buffer), a_offset as NSUInteger);
        encoder.set_buffer(3, Some(output), output_offset as NSUInteger);

        let grid_size = MTLSize {
            width: (params.m as u64).div_ceil(32),
            height: (params.n as u64).div_ceil(64),
            depth: /* batch_size_out */ params.a_batch as u64,
        };
        let group_size = MTLSize { width: 128, height: 1, depth: 1 };

        encoder.set_threadgroup_memory_length(0, 8192);
        encoder.dispatch_thread_groups(grid_size, group_size);
    });

    Ok(())
}

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;

    use std::any::TypeId;

    use num_traits::Float;
    use tract_core::ops::array::MultiBroadcastTo;
    use tract_core::ops::cast::Cast;
    use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
    use tract_linalg::block_quant::{BlockQuant, BlockQuantStorage, Q4_0};

    use super::*;
    use crate::kernels::matmul::GemmImpl;
    use crate::kernels::matmul::tests::run_mmm_test_case;
    use tract_gpu::tensor::IntoDevice;

    #[test]
    fn test_ggml_compilation() -> TractResult<()> {
        crate::utils::with_borrowed_metal_stream(|stream| stream.load_library(LibraryName::Ggml))?;
        Ok(())
    }

    #[test]
    fn test_mat_mul() -> TractResult<()> {
        run_mmm_test_case::<GgmlGemm>((1, 5, 64, 2), false, true, F32, F32)?;
        run_mmm_test_case::<GgmlGemm>((2, 8, 64, 2), false, true, F32, F32)?;
        run_mmm_test_case::<GgmlGemm>((1, 5, 64, 2), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((3, 8, 64, 200), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((10, 25, 512, 320), false, true, F32, F16)?;
        Ok(())
    }

    #[test]
    fn test_mat_vec() -> TractResult<()> {
        // f32_f32
        run_mmm_test_case::<GgmlGemm>((1, 8, 32, 3), false, true, F32, F32)?;
        run_mmm_test_case::<GgmlGemm>((1, 4, 61, 2), false, true, F32, F32)?;
        run_mmm_test_case::<GgmlGemm>((2, 4, 128, 8), false, true, F32, F32)?;

        // f16_f32_1row
        run_mmm_test_case::<GgmlGemm>((1, 1, 32, 2), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((1, 3, 62, 2), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((1, 3, 2, 9), false, true, F32, F16)?;

        // f16_f32_L4
        run_mmm_test_case::<GgmlGemm>((2, 2, 128, 8), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((4, 4, 156, 30), false, true, F32, F16)?;

        // f16_f32
        run_mmm_test_case::<GgmlGemm>((1, 4, 32, 2), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((1, 4, 61, 2), false, true, F32, F16)?;
        run_mmm_test_case::<GgmlGemm>((4, 4, 128, 7), false, true, F32, F16)?;

        // f16_f16
        run_mmm_test_case::<GgmlGemm>((1, 1, 2, 1), false, true, F16, F16)?;
        run_mmm_test_case::<GgmlGemm>((1, 4, 61, 2), false, true, F16, F16)?;
        run_mmm_test_case::<GgmlGemm>((2, 16, 128, 9), false, true, F16, F16)?;
        Ok(())
    }

    fn reference(a: Tensor, b: Tensor) -> TractResult<Tensor> {
        let batch = b.shape()[0];
        let batch_ratio = a.shape()[0] / batch;
        let matmul = PrefixMatMul {
            transpose_a: false,
            transpose_b: true,
            transpose_c: false,
            quantize_output: None,
            operating_dt: Some(DatumType::F32),
        };

        let mut model = TypedModel::default();

        let lhs = model.add_source("lhs", TypedFact::shape_and_dt_of(&a))?;
        let mut rhs = model.add_source("rhs", TypedFact::shape_and_dt_of(&b))?;

        if b.datum_type() == DatumType::F16 {
            rhs = model.wire_node("cast", Cast { to: DatumType::F32 }, &[rhs])?[0];
        }
        if batch_ratio > 1 {
            let add_axis_out = model.wire_node("add_axis", AxisOp::Add(1), &[rhs])?[0];
            let mut broadcast_shape = b.shape().to_vec();

            broadcast_shape.insert(1, batch_ratio);
            let broadcast_out = model.wire_node(
                "broadcast",
                MultiBroadcastTo { shape: ShapeFact::from_dims(broadcast_shape) },
                &[add_axis_out],
            )?[0];
            rhs = model.wire_node(
                "reshape",
                AxisOp::Reshape(
                    0,
                    tvec![batch.into(), batch_ratio.into()],
                    tvec![(batch * batch_ratio).into()],
                ),
                &[broadcast_out],
            )?[0]
        }
        let output = model.wire_node("matmul", matmul, &[lhs, rhs])?;

        model.select_output_outlets(&output)?;
        model = model.into_decluttered()?;
        let mut output =
            DefaultRuntime.prepare(model)?.run(tvec!(a.into_tvalue(), b.into_tvalue()))?;
        Ok(output.remove(0).into_tensor())
    }

    fn run_ggml_mat_mul_test<F: Datum + Float>(
        batch: usize,
        broadcast_ratio: usize,
        m: usize,
        k: usize,
        n: usize,
        q40: bool,
    ) -> TractResult<()>
    where
        f32: From<F>,
    {
        with_borrowed_metal_stream(|stream| {
            let a_shape = [batch * broadcast_ratio, m, k];
            let b_shape = [batch, n, k];

            let a_data = (0..batch * broadcast_ratio * k * m)
                .map(|f| f as f32 / (batch * broadcast_ratio * m * k) as f32)
                .collect::<Vec<_>>();

            let a = Tensor::from_shape(&a_shape, &a_data)?;

            let b_data = (0..batch * n * k)
                .map(|f| F::from(f).unwrap() / F::from(batch * n * k).unwrap())
                .collect::<Vec<_>>();

            let (ref_b, metal_b) = if q40 {
                ensure!(TypeId::of::<F>() == TypeId::of::<f32>());
                let b_data: Vec<f32> = b_data.into_iter().map(|x| x.into()).collect();
                let b_tensor =
                    Q4_0.simulate_precision_loss(Tensor::from_shape(&b_shape, &b_data)?, 2)?;

                ensure!(k % 32 == 0);
                let b_q4_0_tensor = BlockQuantStorage::new(
                    Box::new(Q4_0),
                    batch * n,
                    k,
                    Arc::new(Q4_0.quant_f32(&b_data)?),
                )?
                .into_tensor_with_shape(f32::datum_type(), &[batch, n, k]);
                (b_tensor, b_q4_0_tensor)
            } else {
                let b_tensor = Tensor::from_shape(&b_shape, &b_data)?;
                (b_tensor.clone(), b_tensor)
            };

            let metal_output = GemmImpl::<GgmlGemm>::new(false, true).eval(
                stream,
                &a.clone().into_device()?,
                &metal_b.clone().into_device()?,
            )?;
            let output = reference(a, ref_b)?;
            metal_output.to_host()?.close_enough(&output, Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_broadcast() -> TractResult<()> {
        run_ggml_mat_mul_test::<f32>(2, 2, 1, 8, 4, false)?;
        run_ggml_mat_mul_test::<f32>(6, 3, 26, 22, 1, false)?;
        run_ggml_mat_mul_test::<f16>(1, 2, 1, 64, 10, false)?;
        run_ggml_mat_mul_test::<f16>(2, 2, 1, 128, 8, false)?;
        run_ggml_mat_mul_test::<f16>(4, 4, 6, 64, 10, false)?;
        Ok(())
    }

    #[test]
    fn test_q4() -> TractResult<()> {
        run_ggml_mat_mul_test::<f32>(32, 1, 1, 32, 32, true)?;
        run_ggml_mat_mul_test::<f32>(1, 1, 320, 2048, 1, true)?;
        run_ggml_mat_mul_test::<f32>(4, 1, 1, 2048, 320, true)?;
        run_ggml_mat_mul_test::<f32>(1, 1, 1, 32, 32, true)?;
        run_ggml_mat_mul_test::<f32>(1, 1, 1, 64, 4, true)?;
        run_ggml_mat_mul_test::<f32>(3, 1, 1, 4096, 512, true)?;
        run_ggml_mat_mul_test::<f32>(1, 1, 1, 32, 32, true)?;
        run_ggml_mat_mul_test::<f32>(1, 1, 1, 64, 4, true)?;
        run_ggml_mat_mul_test::<f32>(3, 1, 1, 2048, 128, true)?;
        run_ggml_mat_mul_test::<f32>(1, 3, 1, 32, 32, true)?;
        run_ggml_mat_mul_test::<f32>(4, 2, 1, 64, 4, true)?;
        run_ggml_mat_mul_test::<f32>(3, 2, 1, 512, 256, true)?;
        Ok(())
    }
}


================================================
FILE: metal/src/kernels/matmul/mfa/mod.rs
================================================
use crate::kernels::matmul::{GemmDispatchParams, GemmKernel};
use crate::{ConstantValues, LibraryName, MetalStream, Value};
use anyhow::ensure;
use metal::{Buffer, MTLSize, NSUInteger};
use std::ffi::c_void;
use std::fmt;
use tract_core::internal::*;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct MfaGemm;

impl fmt::Display for MfaGemm {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "MfaGemm")
    }
}

impl GemmKernel for MfaGemm {
    fn name() -> &'static str {
        "mfa"
    }

    fn dispatch_eval(
        &self,
        stream: &MetalStream,
        params: GemmDispatchParams,
        a_buffer: &Buffer,
        b_buffer: &Buffer,
        c_buffer: &Buffer,
    ) -> TractResult<()> {
        let GemmDispatchParams {
            dts,
            a_batch,
            m,
            k,
            n,
            transpose_a,
            a_offset,
            transpose_b,
            b_offset,
            c_offset,
            a_strides,
            b_strides,
            ..
        } = params;

        ensure!(
            matches!(dts[0], DatumType::F32 | DatumType::F16),
            "Unsupported datum type for Mfa {:?}",
            dts[0]
        );
        ensure!(
            dts[0] == dts[1] && dts[0] == dts[2],
            "Mfa only supports homogeneous datum types. I: {:?}, {:?}. O: {:?}",
            dts[0],
            dts[1],
            dts[2]
        );

        dispatch_metal_mfa_gemm(
            stream,
            dts[0],
            (a_batch, m, n, k),
            unsafe { std::mem::transmute::<&[isize], &[usize]>(a_strides.as_slice()) },
            a_offset,
            a_buffer,
            transpose_a,
            unsafe { std::mem::transmute::<&[isize], &[usize]>(b_strides.as_slice()) },
            b_offset,
            b_buffer,
            transpose_b,
            c_buffer,
            c_offset,
        )?;

        Ok(())
    }
}

// From https://github.com/huggingface/candle/blob/main/candle-metal-kernels/src/lib.rs
#[allow(clippy::too_many_arguments)]
pub fn dispatch_metal_mfa_gemm(
    stream: &MetalStream,
    dt: DatumType,
    (b, m, n, k): (usize, usize, usize, usize),
    lhs_stride: &[usize],
    lhs_offset: usize,
    lhs_buffer: &Buffer,
    lhs_transpose: bool,
    rhs_stride: &[usize],
    rhs_offset: usize,
    rhs_buffer: &Buffer,
    rhs_transpose: bool,
    output: &Buffer,
    output_offset: usize,
) -> TractResult<()> {
    assert!(rhs_stride.len() >= 2);
    assert!(lhs_stride.len() >= 2);
    let rhs_m1 = rhs_stride[rhs_stride.len() - 1];
    let rhs_m2 = rhs_stride[rhs_stride.len() - 2];
    let lhs_m1 = lhs_stride[lhs_stride.len() - 1];
    let lhs_m2 = lhs_stride[lhs_stride.len() - 2];
    let a_trans = lhs_transpose;
    let b_trans = rhs_transpose;

    if a_trans {
        // (k, m)
        ensure!(
            lhs_m1 == 1 && lhs_m2 == m,
            "Invalid left matmul argument [{lhs_m2}, {lhs_m1}] != [{m}, 1], strides: {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    } else {
        // (m, k)
        ensure!(
            lhs_m1 == 1 && lhs_m2 == k,
            "Invalid left matmul argument [{lhs_m2}, {lhs_m1}] != [{k}, 1], strides: {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    }

    if b_trans {
        // (n, k)
        ensure!(
            rhs_m1 == 1 && rhs_m2 == k,
            "Invalid right matmul argument [{rhs_m2}, {rhs_m1}] != [{k}, 1], strides: {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    } else {
        // (k, n)
        ensure!(
            rhs_m1 == 1 && rhs_m2 == n,
            "Invalid right matmul argument [{rhs_m2}, {rhs_m1}] != [{n}, 1] {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    }

    let d_trans = false;
    let alpha = 1.0f32;
    let beta = 0.0f32;
    let batched = b > 1;
    let fused_activation = false;
    let fused_bias = false;
    let (m_simd, n_simd, k_simd, m_splits, n_splits) = if m == 1 {
        let m_simd = 8;
        let n_simd = 8;
        let k_simd = 64;
        let m_splits = 1;
        let n_splits = 1;
        (m_simd, n_simd, k_simd, m_splits, n_splits)
    } else {
        let m_simd = 40;
        let n_simd = 40;
        let k_simd = 32;
        let m_splits = 1;
        let n_splits = 1;
        (m_simd, n_simd, k_simd, m_splits, n_splits)
    };
    let constants = Some(ConstantValues::new(vec![
        (0, Value::USize(m)),
        (1, Value::USize(n)),
        (2, Value::USize(k)),
        (10, Value::Bool(a_trans)),
        (11, Value::Bool(b_trans)),
        (13, Value::Bool(d_trans)),
        (20, Value::F32(alpha)),
        (21, Value::F32(beta)),
        (100, Value::Bool(batched)),
        (101, Value::Bool(fused_activation)),
        // Garbage
        (102, Value::Bool(false)),
        (103, Value::Bool(false)),
        (113, Value::Bool(false)),
        (50_000, Value::Bool(false)),
        // End garbage
        (200, Value::U16(m_simd)),
        (201, Value::U16(n_simd)),
        (202, Value::U16(k_simd)),
        (210, Value::U16(m_splits)),
        (211, Value::U16(n_splits)),
        (50_001, Value::Bool(fused_bias)),
    ]));

    let name = match dt {
        DatumType::F32 => "sgemm",
        DatumType::F16 => "hgemm",
        _ => bail!("MFA GEMM only support F32 or F16 tensors"),
    };

    let pipeline = stream.load_pipeline_with_constants(LibraryName::MfaLib, name, constants)?;
    let m_group = m_simd * m_splits;
    let n_group = n_simd * n_splits;

    let a_block_length = m_group * k_simd;
    let b_block_length = k_simd * n_group;

    let mut block_elements = a_block_length + b_block_length;
    if (m % 8 != 0) && (n % 8 != 0) {
        let c_block_length = m_group * n_group;
        block_elements = std::cmp::max(c_block_length, block_elements)
    }
    if fused_bias {
        if d_trans {
            block_elements = std::cmp::max(block_elements, m_group);
        } else {
            block_elements = std::cmp::max(block_elements, n_group);
        }
    }

    let block_bytes = block_elements * dt.size_of() as u16;

    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        encoder.set_threadgroup_memory_length(0, block_bytes.into());
        encoder.set_buffer(0, Some(lhs_buffer), lhs_offset as NSUInteger);
        encoder.set_buffer(1, Some(rhs_buffer), rhs_offset as NSUInteger);
        encoder.set_buffer(2, Some(output), output_offset as NSUInteger);
        // TODO Tensor D

        let grid_z = b;
        if batched {
            let byte_stride_a: usize = lhs_stride[lhs_stride.len() - 3] * dt.size_of();
            let byte_stride_b: usize = rhs_stride[rhs_stride.len() - 3] * dt.size_of();
            let byte_stride_c = m * n * dt.size_of();
            // TODO byte_stride_d
            let byte_stride_d = 0;

            let buffer: Vec<u64> = vec![
                byte_stride_a as _,
                byte_stride_b as _,
                byte_stride_c as _,
                byte_stride_d as _,
            ];
            encoder.set_bytes(
                10,
                (buffer.len() * core::mem::size_of::<u64>()) as NSUInteger,
                buffer.as_ptr() as *const NSUInteger as *const c_void,
            );
        }

        let grid_size = MTLSize {
            width: n.div_ceil(n_group.into()) as NSUInteger,
            height: m.div_ceil(m_group.into()) as NSUInteger,
            depth: grid_z as NSUInteger,
        };
        let group_size =
            MTLSize { width: 32 * (m_splits as u64) * (n_splits as u64), height: 1, depth: 1 };
        encoder.use_resource(lhs_buffer, metal::MTLResourceUsage::Read);
        encoder.use_resource(rhs_buffer, metal::MTLResourceUsage::Read);
        encoder.use_resource(output, metal::MTLResourceUsage::Write);
        encoder.dispatch_thread_groups(grid_size, group_size);
    });
    Ok(())
}

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;

    use super::*;
    use crate::kernels::matmul::GemmImpl;
    use tract_gpu::tensor::{DeviceTensor, IntoDevice};

    #[test]
    fn test_mfa_gemm() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let (b, m, n, k) = (1, 2, 4, 3);
            let a = Tensor::from_shape(
                &[b, m, k],
                &(0..b * m * k).map(|f| f as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;
            let b = Tensor::from_shape(
                &[b, k, n],
                &(0..b * n * k).map(|f| f as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let c = GemmImpl::<MfaGemm>::default().eval(stream, &a, &b)?;

            let expected_c =
                Tensor::from_shape(&[1, 2, 4], &[20.0, 23.0, 26.0, 29.0, 56.0, 68.0, 80.0, 92.0])?;

            let c = c.to_host()?;
            assert!(c.close_enough(&expected_c, Approximation::Close).is_ok());

            let (b, m, n, k) = (2, 2, 4, 3);
            let a = DeviceTensor::from_shape(
                &[b, m, k],
                &(0..b * m * k).map(|f| f as f32).collect::<Vec<_>>(),
            )?;
            let b = DeviceTensor::from_shape(
                &[b, k, n],
                &(0..b * n * k).map(|f| f as f32).collect::<Vec<_>>(),
            )?;

            let c = GemmImpl::<MfaGemm>::default().eval(stream, &a, &b)?;

            let expected_c = Tensor::from_shape(
                &[2, 2, 4],
                &[
                    20.0, 23.0, 26.0, 29.0, 56.0, 68.0, 80.0, 92.0, 344.0, 365.0, 386.0, 407.0,
                    488.0, 518.0, 548.0, 578.0,
                ],
            )?;

            assert!(c.to_host()?.close_enough(&expected_c, Approximation::Close).is_ok());
            Ok(())
        })
    }
}


================================================
FILE: metal/src/kernels/matmul/mlx_gemm/mlx_gemm.metal
================================================
// MLX Kernel extracted from:
// https://github.com/ml-explore/mlx/blob/main/mlx/backend/metal/kernels/steel/gemm
// Copyright © 2024 Apple Inc.

#include <metal_simdgroup>
#include <metal_simdgroup_matrix>
#include <metal_stdlib>

#define STEEL_CONST static constant constexpr const
#define STEEL_PRAGMA_UNROLL _Pragma("clang loop unroll(full)")

using namespace metal;

// https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/kernels/steel/gemm/params.h#L1
///////////////////////////////////////////////////////////////////////////////
// GEMM param classes
///////////////////////////////////////////////////////////////////////////////

struct GEMMParams {
  const int M;
  const int N;
  const int K;

  const int lda;
  const int ldb;
  const int ldd;

  const int tiles_n;
  const int tiles_m;

  const size_t batch_stride_a;
  const size_t batch_stride_b;
  const size_t batch_stride_d;

  const int swizzle_log;
  const int gemm_k_iterations_aligned;

  const int batch_ndim;
};

struct GEMMDebug {
  int TM_stride;
  int TN_stride;
  int WM;
  int WN;
  int TM;
  int TN;
  int num_threads_in_simd;
  int num_simd_group;
};

struct GEMMSpiltKParams {
  const int M;
  const int N;
  const int K;

  const int lda;
  const int ldb;
  const int ldc;

  const int tiles_n;
  const int tiles_m;

  const int split_k_partitions;
  const int split_k_partition_stride;
  const int split_k_partition_size;

  const int gemm_k_iterations_aligned;
};

struct GEMMAddMMParams {
  const int ldc;
  const int fdc;

  const size_t batch_stride_c;

  const float alpha;
  const float beta;
};

// https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/kernels/steel/gemm/loader.h#L1
///////////////////////////////////////////////////////////////////////////////
// Loading helper
///////////////////////////////////////////////////////////////////////////////

template <
    typename T,
    short BROWS,
    short BCOLS,
    short dst_ld,
    short reduction_dim,
    short tgp_size,
    short alignment = 1,
    short n_reads = (BCOLS * BROWS) / (tgp_size),
    short TCOLS = BCOLS / n_reads,
    short TROWS = tgp_size / TCOLS>
struct BlockLoader {
  STEEL_CONST short n_rows = (BROWS + TROWS - 1) / TROWS;
  STEEL_CONST short vec_size = n_reads;

  // Leading dimension for src
  const int src_ld;
  const int tile_stride;

  // Thread location indices
  const short thread_idx;
  const short bi;
  const short bj;

  // threadgroup and device memory
  threadgroup T* dst;
  const device T* src;

  struct alignas(alignment * sizeof(T)) ReadVector {
    uint8_t v[sizeof(T) * vec_size];
  };

  /* Constructor */
  METAL_FUNC BlockLoader(
      const device T* src_,
      const int src_ld_,
      threadgroup T* dst_,
      ushort simd_group_id [[simdgroup_index_in_threadgroup]],
      ushort simd_lane_id [[thread_index_in_simdgroup]])
      : src_ld(src_ld_),
        tile_stride(reduction_dim ? BCOLS : BROWS * src_ld),
        thread_idx(simd_group_id * 32 + simd_lane_id),
        bi(thread_idx / TCOLS),
        bj(vec_size * (thread_idx % TCOLS)),
        dst(dst_ + bi * dst_ld + bj),
        src(src_ + bi * src_ld + bj) {}

  /* Apply operation to threadgroup without bound checking */
  template <typename UnaryOp>
  METAL_FUNC void apply_inplace_op(thread const UnaryOp& op) const {
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < BROWS; i += TROWS) {
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < vec_size; j++) {
        dst[i * dst_ld + j] = op.apply(dst[i * dst_ld + j]);
      }
    }
  }

  /* Load from device memory into threadgroup memory - without bound checking */
  METAL_FUNC void load_unsafe() const {
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < BROWS; i += TROWS) {
      *((threadgroup ReadVector*)(&dst[i * dst_ld])) =
          *((const device ReadVector*)(&src[i * src_ld]));
    }
  }

  /* Load from device memory into threadgroup memory - with bound checking */
  METAL_FUNC void load_safe(short2 src_tile_dim) const {
    src_tile_dim = src_tile_dim - short2(bj, bi);

    // Skip loading if thread has no valid reads
    if (src_tile_dim.x <= 0 || src_tile_dim.y <= 0) {
      STEEL_PRAGMA_UNROLL
      for (short i = 0; i < BROWS; i += TROWS) {
        STEEL_PRAGMA_UNROLL
        for (short j = 0; j < vec_size; j++) {
          dst[i * dst_ld + j] = T(0);
        }
      }
      return;
    }

    // Use fast thread memory for bound checks
    bool tmp_idx[vec_size];
    T tmp_val[vec_size];

    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < BROWS; i += TROWS) {
      // Make sure tmp_idx only contains valid indices
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < vec_size; j++) {
        tmp_idx[j] = (i < src_tile_dim.y) && (j < src_tile_dim.x);
      }

      // Read valid indices into tmp_val
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < vec_size; j++) {
        tmp_val[j] = src[(tmp_idx[j] ? i * src_ld + j : 0)];
      }

      // Zero out uneeded values
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < vec_size; j++) {
        tmp_val[j] = tmp_idx[j] ? tmp_val[j] : T(0);
      }

      // Copy values to threadgroup memory
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < vec_size; j++) {
        dst[i * dst_ld + j] = tmp_val[j];
      }
    }
  }

  /* Iteration helper */
  METAL_FUNC void next() {
    src += tile_stride;
  }
};

// https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/kernels/steel/gemm/transforms.h#L1
///////////////////////////////////////////////////////////////////////////////
// Transforms and Epilogues
///////////////////////////////////////////////////////////////////////////////

template <typename OutT, typename InT>
struct TransformNone {
  static METAL_FUNC OutT apply(InT x) {
    return static_cast<OutT>(x);
  }

  static METAL_FUNC OutT apply(InT x, OutT) {
    return static_cast<OutT>(x);
  }
};

template <typename OutT, typename InT>
struct TransformAdd {
  TransformAdd(const float, const float) {}

  static METAL_FUNC OutT apply(InT x) {
    return static_cast<OutT>(x);
  }

  static METAL_FUNC OutT apply(InT x, OutT c) {
    return static_cast<OutT>(x) + c;
  }
};

template <typename OutT, typename InT>
struct TransformAxpby {
  const float alpha;
  const float beta;

  TransformAxpby(const float alpha_, const float beta_)
      : alpha(alpha_), beta(beta_) {}

  static METAL_FUNC OutT apply(InT x) {
    return static_cast<OutT>(x);
  }

  METAL_FUNC OutT apply(InT x, OutT c) const {
    return static_cast<OutT>(x * alpha + (beta * c));
  }
};

template <typename T>
struct AccumHelper {
  typedef float accum_type;
};

struct BlockSwizzle {
  static METAL_FUNC int2
  swizzle(uint3 tid [[threadgroup_position_in_grid]], const int swizzle_log) {
    const int tid_x = (tid.x) >> swizzle_log;
    const int tid_y =
        ((tid.y) << swizzle_log) + ((tid.x) & ((1 << swizzle_log) - 1));
    return int2(tid_x, tid_y);
  }
};

// https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/kernels/steel/gemm/mma.h#L1
///////////////////////////////////////////////////////////////////////////////
// MMA helper
///////////////////////////////////////////////////////////////////////////////

template <
    typename T,
    typename U,
    int BM,
    int BN,
    int BK,
    int WM,
    int WN,
    bool transpose_a,
    bool transpose_b,
    short lda_tgp,
    short ldb_tgp,
    typename AccumType = float,
    typename Epilogue = TransformNone<U, AccumType>>
struct BlockMMA {
  // Warp tile simdgroup matrix strides along M
  STEEL_CONST short TM_stride = 8 * WM;
  // Warp tile simdgroup matrix strides along M
  STEEL_CONST short TN_stride = 8 * WN;

  // Warp tile size along M
  STEEL_CONST short TM = BM / TM_stride;
  // Warp tile size along N
  STEEL_CONST short TN = BN / TN_stride;

  // Strides of A, B along reduction axis
  STEEL_CONST short simd_stride_a = {
      transpose_a ? TM_stride : TM_stride * lda_tgp};
  STEEL_CONST short simd_stride_b = {
      transpose_b ? TN_stride * ldb_tgp : TN_stride};

  // Jump between elements
  STEEL_CONST short jump_a = {transpose_a ? lda_tgp : 1};
  STEEL_CONST short jump_b = {transpose_b ? ldb_tgp : 1};

  STEEL_CONST short tile_stride_a = {transpose_a ? 8 * lda_tgp : 8};
  STEEL_CONST short tile_stride_b = {transpose_b ? 8 : 8 * ldb_tgp};

  // Simdgroup matrices
  simdgroup_matrix<AccumType, 8, 8> Asimd[TM];
  simdgroup_matrix<AccumType, 8, 8> Bsimd[TN];
  simdgroup_matrix<AccumType, 8, 8> results[TM * TN] = {
      simdgroup_matrix<AccumType, 8, 8>(0)};

  // Offsets within threadgroup
  const short tm;
  const short tn;

  short sm;
  short sn;

  short As_offset;
  short Bs_offset;

  /* Constructor */
  METAL_FUNC BlockMMA(
      ushort simd_group_id [[simdgroup_index_in_threadgroup]],
      ushort simd_lane_id [[thread_index_in_simdgroup]])
      : tm(8 * (simd_group_id / WN)), tn(8 * (simd_group_id % WN)) {
    // Determine thread position in simdgroup matrix
    short qid = simd_lane_id / 4;
    sm = (qid & 4) + (simd_lane_id / 2) % 4;
    sn = (qid & 2) * 2 + (simd_lane_id % 2) * 2;

    // Determine thread and simdgroup offset
    As_offset =
        transpose_a ? ((sn)*lda_tgp + (tm + sm)) : ((sn) + (tm + sm) * lda_tgp);
    Bs_offset =
        transpose_b ? ((tn + sn) * ldb_tgp + (sm)) : ((sm)*ldb_tgp + (tn + sn));
  }

  /* (BM, BK) X (BK, BN) multiply accumulate function */
  METAL_FUNC void mma(const threadgroup T* As, const threadgroup T* Bs) {
    // Adjust for simdgroup and thread location
    As += As_offset;
    Bs += Bs_offset;

    // Iterate over BK in blocks of 8
    STEEL_PRAGMA_UNROLL
    for (short kk = 0; kk < BK; kk += 8) {
      simdgroup_barrier(mem_flags::mem_none);

      // Load elements from threadgroup A as simdgroup matrices
      STEEL_PRAGMA_UNROLL
      for (short i = 0; i < TM; i++) {
        Asimd[i].thread_elements()[0] =
            static_cast<AccumType>(As[i * simd_stride_a + 0]);
        Asimd[i].thread_elements()[1] =
            static_cast<AccumType>(As[i * simd_stride_a + jump_a]);
      }

      simdgroup_barrier(mem_flags::mem_none);

      // Load elements from threadgroup B as simdgroup matrices
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < TN; j++) {
        Bsimd[j].thread_elements()[0] =
            static_cast<AccumType>(Bs[j * simd_stride_b + 0]);
        Bsimd[j].thread_elements()[1] =
            static_cast<AccumType>(Bs[j * simd_stride_b + jump_b]);
      }

      simdgroup_barrier(mem_flags::mem_none);

      // Multiply and accumulate into result simdgroup matrices
      STEEL_PRAGMA_UNROLL
      for (short i = 0; i < TM; i++) {
        STEEL_PRAGMA_UNROLL
        for (short j = 0; j < TN; j++) {
          short j_serp = (i % 2) ? (TN - 1 - j) : j;

          simdgroup_multiply_accumulate(
              results[i * TN + j_serp],
              Asimd[i],
              Bsimd[j_serp],
              results[i * TN + j_serp]);
        }
      }

      // Progress to next simdgroup tile
      As += tile_stride_a;
      Bs += tile_stride_b;
    }
  }

  /* Store results from simdgroup_matrix results into device memory */
  METAL_FUNC void store_result(device U* D, const int ldd) const {
    // Adjust for simdgroup and thread location
    D += (sm + tm) * ldd + tn + sn;

    // Loop over all simdgroup tiles
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < TM; i++) {
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < TN; j++) {
        // Get accumulated result and associated offset in C
        thread const auto& accum = results[i * TN + j].thread_elements();
        int offset = (i * TM_stride) * ldd + (j * TN_stride);

        // Apply epilogue
        U outs[2] = {Epilogue::apply(accum[0]), Epilogue::apply(accum[1])};

        // Write out D
        D[offset] = outs[0];
        D[offset + 1] = outs[1];
      }
    }
  }

  METAL_FUNC void
  store_result_safe(device U* D, const int ldd, short2 dst_tile_dims) const {
    // Adjust for simdgroup and thread location
    D += (sm + tm) * ldd + (tn + sn);
    dst_tile_dims -= short2(tn + sn, sm + tm);

    if (dst_tile_dims.x <= 0 || dst_tile_dims.y <= 0)
      return;

    STEEL_PRAGMA_UNROLL
    for (int i = 0; i < TM; i++) {
      if (i * TM_stride < dst_tile_dims.y) {
        STEEL_PRAGMA_UNROLL
        for (int j = 0; j < TN; j++) {
          // Get accumulated result and associated offset in C
          thread const auto& accum = results[i * TN + j].thread_elements();
          int offset = (i * TM_stride) * ldd + (j * TN_stride);

          // Apply epilogue and output C
          if (j * TN_stride < dst_tile_dims.x) {
            D[offset] = Epilogue::apply(accum[0]);
          }

          if (j * TN_stride + 1 < dst_tile_dims.x) {
            D[offset + 1] = Epilogue::apply(accum[1]);
          }
        }
      }
    }
  }

  /* Apply epilogue */
  template <typename UnaryEpilogue>
  METAL_FUNC void apply_epilogue(thread const UnaryEpilogue& epilogue_op) {
    // Loop over all simdgroup tiles
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < TM; i++) {
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < TN; j++) {
        // Get accumulated result and associated offset in C
        thread auto& accum = results[i * TN + j].thread_elements();

        // Apply epilogue
        accum[0] = epilogue_op.apply(accum[0]);
        accum[1] = epilogue_op.apply(accum[1]);
      }
    }
  }

  /* Apply epilogue */
  template <typename BinaryEpilogue>
  METAL_FUNC void apply_epilogue(
      const device U* C,
      const int ldc,
      const int fdc,
      thread const BinaryEpilogue& epilogue_op) {
    // Adjust for simdgroup and thread location
    C += (sm + tm) * ldc + (tn + sn) * fdc;

    // Loop over all simdgroup tiles
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < TM; i++) {
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < TN; j++) {
        // Get accumulated result and associated offset in C
        thread auto& accum = results[i * TN + j].thread_elements();
        int offset_c = (i * TM_stride) * ldc + (j * TN_stride) * fdc;

        // Apply epilogue
        accum[0] = epilogue_op.apply(accum[0], C[offset_c]);
        accum[1] = epilogue_op.apply(accum[1], C[offset_c + fdc]);
      }
    }
  }

  /* Apply epilogue */
  template <typename BinaryEpilogue>
  METAL_FUNC void apply_epilogue_safe(
      const device U* C,
      const int ldc,
      const int fdc,
      short2 dst_tile_dims,
      thread const BinaryEpilogue& epilogue_op) {
    // Adjust for simdgroup and thread location
    C += (sm + tm) * ldc + (tn + sn) * fdc;
    dst_tile_dims -= short2(tn + sn, sm + tm);

    if (dst_tile_dims.x <= 0 || dst_tile_dims.y <= 0)
      return;

    // Loop over all simdgroup tiles
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < TM; i++) {
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < TN; j++) {
        // Get accumulated result and associated offset in C
        thread auto& accum = results[i * TN + j].thread_elements();
        int offset_c = (i * TM_stride) * ldc + (j * TN_stride) * fdc;

        // Read C
        U c_elems[2] = {0};

        if ((j * TN_stride + 1) < dst_tile_dims.x) {
          c_elems[0] = C[offset_c];
          c_elems[1] = C[offset_c + fdc];
        } else if ((j * TN_stride) < dst_tile_dims.x) {
          c_elems[0] = C[offset_c];
        }

        // Apply epilogue
        accum[0] = epilogue_op.apply(accum[0], c_elems[0]);
        accum[1] = epilogue_op.apply(accum[1], c_elems[1]);
      }
    }
  }

  /* Store results from simdgroup_matrix results into device memory */
  METAL_FUNC void store_result(
      device U* D,
      const int ldd,
      const device U* C,
      const int ldc,
      const int fdc,
      thread const Epilogue& epilogue_op) const {
    // Adjust for simdgroup and thread location
    C += (sm + tm) * ldc + (tn + sn) * fdc;
    D += (sm + tm) * ldd + tn + sn;

    // Loop over all simdgroup tiles
    STEEL_PRAGMA_UNROLL
    for (short i = 0; i < TM; i++) {
      STEEL_PRAGMA_UNROLL
      for (short j = 0; j < TN; j++) {
        // Get accumulated result and associated offset in C
        thread const auto& accum = results[i * TN + j].thread_elements();
        int offset_c = (i * TM_stride) * ldc + (j * TN_stride) * fdc;
        int offset_d = (i * TM_stride) * ldd + (j * TN_stride);

        // Apply epilogue
        U outs[2] = {
            epilogue_op.apply(accum[0], C[offset_c]),
            epilogue_op.apply(accum[1], C[offset_c + fdc])};

        // Write out D
        D[offset_d] = outs[0];
        D[offset_d + 1] = outs[1];
      }
    }
  }

  METAL_FUNC void store_result_safe(
      device U* D,
      const int ldd,
      const device U* C,
      const int ldc,
      const int fdc,
      short2 dst_tile_dims,
      thread const Epilogue& epilogue_op) const {
    // Adjust for simdgroup and thread location
    C += (sm + tm) * ldc + (tn + sn) * fdc;
    D += (sm + tm) * ldd + tn + sn;
    dst_tile_dims -= short2(tn + sn, sm + tm);

    if (dst_tile_dims.x <= 0 || dst_tile_dims.y <= 0)
      return;

    STEEL_PRAGMA_UNROLL
    for (int i = 0; i < TM; i++) {
      if (i * TM_stride < dst_tile_dims.y) {
        STEEL_PRAGMA_UNROLL
        for (int j = 0; j < TN; j++) {
          // Get accumulated result and associated offset in C
          thread const auto& accum = results[i * TN + j].thread_elements();
          int offset_c = (i * TM_stride) * ldc + (j * TN_stride) * fdc;
          int offset_d = (i * TM_stride) * ldd + (j * TN_stride);

          // Apply epilogue and output C
          if (j * TN_stride < dst_tile_dims.x) {
            D[offset_d] = epilogue_op.apply(accum[0], C[offset_c]);
          }

          if (j * TN_stride + 1 < dst_tile_dims.x) {
            D[offset_d + 1] = epilogue_op.apply(accum[1], C[offset_c + fdc]);
          }
        }
      }
    }
  }
};

// https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/kernels/steel/gemm/gemm.h#L1
///////////////////////////////////////////////////////////////////////////////
// GEMM kernel class
///////////////////////////////////////////////////////////////////////////////

template <bool M_aligned, bool N_aligned, bool K_aligned>
struct LoopAlignment {};

template <
    typename T,
    typename U,
    int BM,
    int BN,
    int BK,
    int WM,
    int WN,
    bool transpose_a,
    bool transpose_b,
    bool MN_aligned,
    bool K_aligned,
    typename AccumType = typename AccumHelper<T>::accum_type,
    typename Epilogue = TransformNone<U, AccumType>>
struct GEMMKernel {
  STEEL_CONST short tgp_padding_a = 16 / sizeof(T);
  STEEL_CONST short tgp_padding_b = 16 / sizeof(T);
  STEEL_CONST short tgp_mem_size_a =
      transpose_a ? BK * (BM + tgp_padding_a) : BM * (BK + tgp_padding_a);
  STEEL_CONST short tgp_mem_size_b =
      transpose_b ? BN * (BK + tgp_padding_b) : BK * (BN + tgp_padding_b);
  STEEL_CONST short tgp_mem_size = tgp_mem_size_a + tgp_mem_size_b;

  STEEL_CONST short tgp_size = WM * WN * 32;

  using loader_a_t = BlockLoader<
      T,
      transpose_a ? BK : BM,
      transpose_a ? BM : BK,
      transpose_a ? BM + tgp_padding_a : BK + tgp_padding_a,
      !transpose_a,
      tgp_size>;
  using loader_b_t = BlockLoader<
      T,
      transpose_b ? BN : BK,
      transpose_b ? BK : BN,
      transpose_b ? BK + tgp_padding_b : BN + tgp_padding_b,
      transpose_b,
      tgp_size>;
  using mma_t = BlockMMA<
      T,
      U,
      BM,
      BN,
      BK,
      WM,
      WN,
      transpose_a,
      transpose_b,
      transpose_a ? BM + tgp_padding_a : BK + tgp_padding_a,
      transpose_b ? BK + tgp_padding_b : BN + tgp_padding_b,
      AccumType,
      Epilogue>;

  /* Main kernel function */
  template <bool M_aligned, bool N_aligned, bool K_aligned_>
  static METAL_FUNC void gemm_loop(
      threadgroup T* As [[threadgroup(0)]],
      threadgroup T* Bs [[threadgroup(1)]],
      const int gemm_k_iterations,
      thread loader_a_t& loader_a,
      thread loader_b_t& loader_b,
      thread mma_t& mma_op,
      thread const short& tgp_bm,
      thread const short& tgp_bn,
      thread const short& lbk,
      LoopAlignment<M_aligned, N_aligned, K_aligned_> l = {}) {
    // Appease the compiler
    (void)l;

    short2 tile_dims_A = transpose_a ? short2(tgp_bm, BK) : short2(BK, tgp_bm);

    short2 tile_dims_B = transpose_b ? short2(BK, tgp_bn) : short2(tgp_bn, BK);

    for (int k = 0; k < gemm_k_iterations; k++) {
      threadgroup_barrier(mem_flags::mem_threadgroup);
      // Load elements into threadgroup
      if (M_aligned) {
        loader_a.load_unsafe();
      } else {
        loader_a.load_safe(tile_dims_A);
      }

      if (N_aligned) {
        loader_b.load_unsafe();
      } else {
        loader_b.load_safe(tile_dims_B);
      }

      threadgroup_barrier(mem_flags::mem_threadgroup);

      // Multiply and accumulate threadgroup elements
      mma_op.mma(As, Bs);

      // Prepare for next iteration
      loader_a.next();
      loader_b.next();
    }

    if (!K_aligned_) {
      threadgroup_barrier(mem_flags::mem_threadgroup);

      short2 tile_dims_A_last =
          transpose_a ? short2(tgp_bm, lbk) : short2(lbk, tgp_bm);
      short2 tile_dims_B_last =
          transpose_b ? short2(lbk, tgp_bn) : short2(tgp_bn, lbk);

      loader_a.load_safe(tile_dims_A_last);
      loader_b.load_safe(tile_dims_B_last);

      threadgroup_barrier(mem_flags::mem_threadgroup);

      mma_op.mma(As, Bs);
    }
  }

  /* Main kernel function */
  static METAL_FUNC void run(
      const device T* A [[buffer(0)]],
      const device T* B [[buffer(1)]],
      device U* D [[buffer(2)]],
      const constant GEMMParams* params [[buffer(3)]],
      threadgroup T* As [[threadgroup(0)]],
      threadgroup T* Bs [[threadgroup(1)]],
      uint simd_lane_id [[thread_index_in_simdgroup]],
      uint simd_group_id [[simdgroup_index_in_threadgroup]],
      uint3 tid [[threadgroup_position_in_grid]],
      uint3 lid [[thread_position_in_threadgroup]]) {
    // Pacifying compiler
    (void)lid;

    const int tid_y = ((tid.y) << params->swizzle_log) +
        ((tid.x) & ((1 << params->swizzle_log) - 1));
    const int tid_x = (tid.x) >> params->swizzle_log;

    if (params->tiles_n <= tid_x || params->tiles_m <= tid_y) {
      return;
    }

    threadgroup_barrier(mem_flags::mem_none);

    // Find block in A, B, C
    const int c_row = tid_y * BM;
    const int c_col = tid_x * BN;
    const size_t c_row_long = size_t(c_row);
    const size_t c_col_long = size_t(c_col);

    A += transpose_a ? c_row_long : c_row_long * params->lda;
    B += transpose_b ? c_col_long * params->ldb : c_col_long;
    D += c_row_long * params->ldd + c_col_long;

    // Prepare threadgroup loading operations
    thread loader_a_t loader_a(A, params->lda, As, simd_group_id, simd_lane_id);
    thread loader_b_t loader_b(B, params->ldb, Bs, simd_group_id, simd_lane_id);

    // Prepare threadgroup mma operation
    thread mma_t mma_op(simd_group_id, simd_lane_id);

    int gemm_k_iterations = params->gemm_k_iterations_aligned;

    ///////////////////////////////////////////////////////////////////////////////
    // MNK aligned loop
    if (MN_aligned) {
      for (int k = 0; k < gemm_k_iterations; k++) {
        threadgroup_barrier(mem_flags::mem_threadgroup);
        // Load elements into threadgroup
        loader_a.load_unsafe();
        loader_b.load_unsafe();

        threadgroup_barrier(mem_flags::mem_threadgroup);

        // Multiply and accumulate threadgroup elements
        mma_op.mma(As, Bs);

        // Prepare for next iteration
        loader_a.next();
        loader_b.next();
      }

      threadgroup_barrier(mem_flags::mem_none);

      // Loop tail
      if (!K_aligned) {
        int lbk = params->K - params->gemm_k_iterations_aligned * BK;
        short2 tile_dims_A = transpose_a ? short2(BM, lbk) : short2(lbk, BM);
        short2 tile_dims_B = transpose_b ? short2(lbk, BN) : short2(BN, lbk);

        loader_a.load_safe(tile_dims_A);
        loader_b.load_safe(tile_dims_B);

        threadgroup_barrier(mem_flags::mem_threadgroup);

        mma_op.mma(As, Bs);
      }

      // Store results to device memory
      mma_op.store_result(D, params->ldd);
      return;

    }
    ///////////////////////////////////////////////////////////////////////////////
    // MN unaligned loop
    else { // Loop over K - unaligned case
      short tgp_bm = min(BM, params->M - c_row);
      short tgp_bn = min(BN, params->N - c_col);
      short leftover_bk = params->K - params->gemm_k_iterations_aligned * BK;

      if (tgp_bm == BM && tgp_bn == BN) {
        gemm_loop<true, true, K_aligned>(
            As,
            Bs,
            gemm_k_iterations,
            loader_a,
            loader_b,
            mma_op,
            tgp_bm,
            tgp_bn,
            leftover_bk);

        mma_op.store_result(D, params->ldd);
        return;

      } else if (tgp_bn == BN) {
        gemm_loop<false, true, K_aligned>(
            As,
            Bs,
            gemm_k_iterations,
            loader_a,
            loader_b,
            mma_op,
            tgp_bm,
            tgp_bn,
            leftover_bk);

        mma_op.store_result_safe(D, params->ldd, short2(tgp_bn, tgp_bm));
        return;

      } else if (tgp_bm == BM) {
        gemm_loop<true, false, K_aligned>(
            As,
            Bs,
            gemm_k_iterations,
            loader_a,
            loader_b,
            mma_op,
            tgp_bm,
            tgp_bn,
            leftover_bk);

        mma_op.store_result_safe(D, params->ldd, short2(tgp_bn, tgp_bm));
        return;

      } else {
        gemm_loop<false, false, K_aligned>(
            As,
            Bs,
            gemm_k_iterations,
            loader_a,
            loader_b,
            mma_op,
            tgp_bm,
            tgp_bn,
            leftover_bk);

        mma_op.store_result_safe(D, params->ldd, short2(tgp_bn, tgp_bm));
        return;
      }
    }
  }
};

// utils.h
///////////////////////////////////////////////////////////////////////////////
// Single Array with generic dims

template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    uint elem,
    device const int* shape,
    device const stride_t* strides,
    int ndim) {
  stride_t loc = 0;
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    loc += (elem % shape[i]) * strides[i];
    elem /= shape[i];
  }
  return loc;
}

template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    uint elem,
    constant const int* shape,
    constant const stride_t* strides,
    int ndim) {
  stride_t loc = 0;
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    loc += (elem % shape[i]) * strides[i];
    elem /= shape[i];
  }
  return loc;
}

template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    stride_t elem,
    device const int* shape,
    device const stride_t* strides,
    int ndim) {
  stride_t loc = 0;
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    loc += (elem % shape[i]) * strides[i];
    elem /= shape[i];
  }
  return loc;
}

template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    stride_t elem,
    constant const int* shape,
    constant const stride_t* strides,
    int ndim) {
  stride_t loc = 0;
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    loc += (elem % shape[i]) * strides[i];
    elem /= shape[i];
  }
  return loc;
}

// Non templated version to handle arbitrary dims
template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    uint3 elem,
    constant const int* shape,
    constant const stride_t* strides,
    int ndim) {
  stride_t loc = elem.x * strides[ndim - 1] + elem.y * strides[ndim - 2];
  for (int d = ndim - 3; d >= 0; --d) {
    loc += (elem.z % shape[d]) * strides[d];
    elem.z /= shape[d];
  }
  return loc;
}


METAL_FUNC ulong2 elem_to_loc_broadcast(
    uint elem,
    constant const int* shape,
    constant const size_t* a_strides,
    constant const size_t* b_strides,
    int ndim) {
  ulong loc_a{0};
  ulong loc_b{0};
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    int pos_in_dim = (elem % shape[i]);
    elem /= shape[i];
    loc_a += pos_in_dim * a_strides[i];
    loc_b += pos_in_dim * b_strides[i];
  }
  return ulong2(loc_a, loc_b);
}

METAL_FUNC ulong3 elem_to_loc_broadcast(
    uint elem,
    constant const int* shape,
    constant const size_t* a_strides,
    constant const size_t* b_strides,
    constant const size_t* c_strides,
    int ndim) {
  ulong loc_a{0};
  ulong loc_b{0};
  ulong loc_c{0};
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    int pos_in_dim = (elem % shape[i]);
    elem /= shape[i];
    loc_a += pos_in_dim * a_strides[i];
    loc_b += pos_in_dim * b_strides[i];
    loc_c += pos_in_dim * c_strides[i];
  }
  return ulong3(loc_a, loc_b, loc_c);
}


// https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.h#L1
///////////////////////////////////////////////////////////////////////////////
// GEMM kernels
///////////////////////////////////////////////////////////////////////////////

constant bool has_batch [[function_constant(10)]];

constant bool use_out_source [[function_constant(100)]];
constant bool do_axpby [[function_constant(110)]];

constant bool align_M [[function_constant(200)]];
constant bool align_N [[function_constant(201)]];
constant bool align_K [[function_constant(202)]];

constant bool do_gather [[function_constant(300)]];

constant bool debug_mode [[function_constant(400)]];

constant bool gather_bias = do_gather && use_out_source;

// clang-format off
template <
    typename T,
    int BM,
    int BN,
    int BK,
    int WM,
    int WN,
    bool transpose_a,
    bool transpose_b,
    typename AccumType = float>
[[kernel, max_total_threads_per_threadgroup(WM* WN * 32)]] void gemm(
    const device T* A [[buffer(0)]],
    const device T* B [[buffer(1)]],
    const device T* C [[buffer(2), function_constant(use_out_source)]],
    device T* D [[buffer(3)]],
    const constant GEMMParams* params [[buffer(4)]],
    const constant GEMMAddMMParams* addmm_params [[buffer(5), function_constant(use_out_source)]],
    const constant int* batch_shape [[buffer(6)]],
    const constant size_t* batch_strides [[buffer(7)]],
    const constant uint32_t* lhs_indices [[buffer(10), function_constant(do_gather)]],
    const constant uint32_t* rhs_indices [[buffer(11), function_constant(do_gather)]],
    const constant uint32_t* C_indices [[buffer(12), function_constant(gather_bias)]],
    const constant int* operand_shape [[buffer(13), function_constant(do_gather)]],
    const constant size_t* operand_strides [[buffer(14), function_constant(do_gather)]],
    const constant packed_int3& operand_batch_ndim [[buffer(15), function_constant(do_gather)]],
    device GEMMDebug * debug [[buffer(16), function_constant(debug_mode)]],
    uint3 tpig[[thread_position_in_grid]],
    uint num_simd_group [[simdgroups_per_threadgroup]],
    uint num_threads_in_simd [[threads_per_simdgroup]],
    uint simd_lane_id [[thread_index_in_simdgroup]],
    uint simd_group_id [[simdgroup_index_in_threadgroup]],
    uint3 tid [[threadgroup_position_in_grid]],
    uint3 lid [[thread_position_in_threadgroup]]) { // clang-format on
  // Pacifying compiler
  (void)lid;

  using gemm_kernel = GEMMKernel<
      T,
      T,
      BM,
      BN,
      BK,
      WM,
      WN,
      transpose_a,
      transpose_b,
      true,
      true,
      AccumType>;

  using loader_a_t = typename gemm_kernel::loader_a_t;
  using loader_b_t = typename gemm_kernel::loader_b_t;
  using mma_t = typename gemm_kernel::mma_t;

  // Find block
  const int tid_y = ((tid.y) << params->swizzle_log) +
      ((tid.x) & ((1 << params->swizzle_log) - 1));
  const int tid_x = (tid.x) >> params->swizzle_log;

  // Exit early if out of bounds
  if (params->tiles_n <= tid_x || params->tiles_m <= tid_y) {
    return;
  }

  // Adjust for batch

  // Handle gather
  if (do_gather) {
    // Read indices
    uint32_t indx_A, indx_B, indx_C;

    if (has_batch) {
      const constant size_t* indx_A_bstrides = batch_strides;
      const constant size_t* indx_B_bstrides =
          batch_strides + params->batch_ndim;

      ulong2 indx_offsets = elem_to_loc_broadcast(
          tid.z,
          batch_shape,
          indx_A_bstrides,
          indx_B_bstrides,
          params->batch_ndim);
      indx_A = lhs_indices[indx_offsets.x];
      indx_B = rhs_indices[indx_offsets.y];

      if (use_out_source) {
        const constant size_t* indx_C_bstrides =
            indx_B_bstrides + params->batch_ndim;
        auto indx_offset_C = elem_to_loc(
            tid.z, batch_shape, indx_C_bstrides, params->batch_ndim);
        indx_C = C_indices[indx_offset_C];
      }
    } else {
      indx_A = lhs_indices[params->batch_stride_a * tid.z];
      indx_B = rhs_indices[params->batch_stride_b * tid.z];

      if (use_out_source) {
        indx_C = C_indices[addmm_params->batch_stride_c * tid.z];
      }
    }

    // Translate indices to offsets
    int batch_ndim_A = operand_batch_ndim.x;
    const constant int* batch_shape_A = operand_shape;
    const constant size_t* batch_strides_A = operand_strides;
    A += elem_to_loc(indx_A, batch_shape_A, batch_strides_A, batch_ndim_A);

    int batch_ndim_B = operand_batch_ndim.y;
    const constant int* batch_shape_B = batch_shape_A + batch_ndim_A;
    const constant size_t* batch_strides_B = batch_strides_A + batch_ndim_A;
    B += elem_to_loc(indx_B, batch_shape_B, batch_strides_B, batch_ndim_B);

    if (use_out_source) {
      int batch_ndim_C = operand_batch_ndim.z;
      const constant int* batch_shape_C = batch_shape_B + batch_ndim_B;
      const constant size_t* batch_strides_C = batch_strides_B + batch_ndim_B;
      C += elem_to_loc(indx_C, batch_shape_C, batch_strides_C, batch_ndim_C);
    }

  }

  // Handle regular batch
  else {
    if (has_batch) {
      const constant size_t* A_bstrides = batch_strides;
      const constant size_t* B_bstrides = batch_strides + params->batch_ndim;

      ulong2 batch_offsets = elem_to_loc_broadcast(
          tid.z, batch_shape, A_bstrides, B_bstrides, params->batch_ndim);

      A += batch_offsets.x;
      B += batch_offsets.y;

      if (use_out_source) {
        const constant size_t* C_bstrides = B_bstrides + params->batch_ndim;
        C += elem_to_loc(tid.z, batch_shape, C_bstrides, params->batch_ndim);
      }
    } else {
      A += params->batch_stride_a * tid.z;
      B += params->batch_stride_b * tid.z;

      if (use_out_source) {
        C += addmm_params->batch_stride_c * tid.z;
      }
    }
  }

  D += params->batch_stride_d * tid.z;

  // Prepare threadgroup memory
  threadgroup T As[gemm_kernel::tgp_mem_size_a];
  threadgroup T Bs[gemm_kernel::tgp_mem_size_b];

  threadgroup_barrier(mem_flags::mem_none);

  // Find block in A, B, C
  const int c_row = tid_y * BM;
  const int c_col = tid_x * BN;
  const size_t c_row_long = size_t(c_row);
  const size_t c_col_long = size_t(c_col);

  A += transpose_a ? c_row_long : c_row_long * params->lda;
  B += transpose_b ? c_col_long * params->ldb : c_col_long;
  D += c_row_long * params->ldd + c_col_long;

  if (use_out_source) {
    C += c_row_long * addmm_params->ldc + c_col_long * addmm_params->fdc;
  }

  // Prepare threadgroup mma operation
  thread mma_t mma_op(simd_group_id, simd_lane_id);

  // Collect debug information
  if(debug_mode && tpig.x == 0 && tpig.y == 0 && tpig.z == 0) {
     debug -> WM = WM;
     debug -> WN = WN;
     debug -> TM_stride = mma_op.TM_stride;
     debug -> TN_stride = mma_op.TN_stride;
     debug -> TM = mma_op.TM;
     debug -> TN = mma_op.TN;
     debug -> num_threads_in_simd = num_threads_in_simd;
     debug -> num_simd_group = num_simd_group;
  }

  // Prepare threadgroup loading operations
  thread loader_a_t loader_a(A, params->lda, As, simd_group_id, simd_lane_id);
  thread loader_b_t loader_b(B, params->ldb, Bs, simd_group_id, simd_lane_id);

  // Prepare threadgroup bounds
  const short tgp_bm = align_M ? BM : short(min(BM, params->M - c_row));
  const short tgp_bn = align_N ? BN : short(min(BN, params->N - c_col));

  // Prepare iterations
  int gemm_k_iterations = params->gemm_k_iterations_aligned;

  // Do unaligned K iterations first
  if (!align_K) {
    const int k_last = params->gemm_k_iterations_aligned * BK;
    const int k_remain = params->K - k_last;
    const size_t k_jump_a =
        transpose_a ? params->lda * size_t(k_last) : size_t(k_last);
    const size_t k_jump_b =
        transpose_b ? size_t(k_last) : params->ldb * size_t(k_last);

    // Move loader source ahead to end
    loader_a.src += k_jump_a;
    loader_b.src += k_jump_b;

    // Load tile
    const short2 tile_dims_A =
        transpose_a ? short2(tgp_bm, k_remain) : short2(k_remain, tgp_bm);
    const short2 tile_dims_B =
        transpose_b ? short2(k_remain, tgp_bn) : short2(tgp_bn, k_remain);

    loader_a.load_safe(tile_dims_A);
    loader_b.load_safe(tile_dims_B);

    threadgroup_barrier(mem_flags::mem_threadgroup);

    // Do matmul
    mma_op.mma(As, Bs);

    // Reset source back to start
    loader_a.src -= k_jump_a;
    loader_b.src -= k_jump_b;
  }

  const TransformAdd<AccumType, AccumType> epilogue_op_add(
      addmm_params->alpha, addmm_params->beta);
  const TransformAxpby<AccumType, AccumType> epilogue_op_axpby(
      addmm_params->alpha, addmm_params->beta);

  ///////////////////////////////////////////////////////////////////////////////
  // MNK aligned loop
  if (align_M && align_N) {
    // Do gemm
    for (int k = 0; k < gemm_k_iterations; k++) {
      threadgroup_barrier(mem_flags::mem_threadgroup);
      // Load elements into threadgroup
      loader_a.load_unsafe();
      loader_b.load_unsafe();

      threadgroup_barrier(mem_flags::mem_threadgroup);

      // Multiply and accumulate threadgroup elements
      mma_op.mma(As, Bs);

      // Prepare for next iteration
      loader_a.next();
      loader_b.next();
    }

    threadgroup_barrier(mem_flags::mem_none);

    // Do epilogue
    if (use_out_source) {
      if (do_axpby) {
        mma_op.apply_epilogue(
            C, addmm_params->ldc, addmm_params->fdc, epilogue_op_axpby);
      } else {
        mma_op.apply_epilogue(
            C, addmm_params->ldc, addmm_params->fdc, epilogue_op_add);
      }
    }

    // Store results to device memory
    return mma_op.store_result(D, params->ldd);

  }
  ///////////////////////////////////////////////////////////////////////////////
  // MN unaligned loop
  else { // Loop over K - unaligned case
    const int leftover_bk = 0;

    if ((align_M || tgp_bm == BM) && (align_N || tgp_bn == BN)) {
      // Do gemm
      gemm_kernel::gemm_loop(
          As,
          Bs,
          gemm_k_iterations,
          loader_a,
          loader_b,
          mma_op,
          tgp_bm,
          tgp_bn,
          leftover_bk,
          LoopAlignment<true, true, true>{});

      // Do epilogue
      if (use_out_source) {
        if (do_axpby) {
          mma_op.apply_epilogue(
              C, addmm_params->ldc, addmm_params->fdc, epilogue_op_axpby);
        } else {
          mma_op.apply_epilogue(
              C, addmm_params->ldc, addmm_params->fdc, epilogue_op_add);
        }
      }

      // Store results to device memory
      return mma_op.store_result(D, params->ldd);

    } else if (align_N || tgp_bn == BN) {
      gemm_kernel::gemm_loop(
          As,
          Bs,
          gemm_k_iterations,
          loader_a,
          loader_b,
          mma_op,
          tgp_bm,
          tgp_bn,
          leftover_bk,
          LoopAlignment<false, true, true>{});

      // Do epilogue
      if (use_out_source) {
        if (do_axpby) {
          mma_op.apply_epilogue_safe(
              C,
              addmm_params->ldc,
              addmm_params->fdc,
              short2(tgp_bn, tgp_bm),
              epilogue_op_axpby);
        } else {
          mma_op.apply_epilogue_safe(
              C,
              addmm_params->ldc,
              addmm_params->fdc,
              short2(tgp_bn, tgp_bm),
              epilogue_op_add);
        }
      }

      // Store results to device memory
      return mma_op.store_result_safe(D, params->ldd, short2(tgp_bn, tgp_bm));

    } else if (align_M || tgp_bm == BM) {
      gemm_kernel::gemm_loop(
          As,
          Bs,
          gemm_k_iterations,
          loader_a,
          loader_b,
          mma_op,
          tgp_bm,
          tgp_bn,
          leftover_bk,
          LoopAlignment<true, false, true>{});

      // Do epilogue
      if (use_out_source) {
        if (do_axpby) {
          mma_op.apply_epilogue_safe(
              C,
              addmm_params->ldc,
              addmm_params->fdc,
              short2(tgp_bn, tgp_bm),
              epilogue_op_axpby);
        } else {
          mma_op.apply_epilogue_safe(
              C,
              addmm_params->ldc,
              addmm_params->fdc,
              short2(tgp_bn, tgp_bm),
              epilogue_op_add);
        }
      }

      // Store results to device memory
      return mma_op.store_result_safe(D, params->ldd, short2(tgp_bn, tgp_bm));

    } else {
      gemm_kernel::gemm_loop(
          As,
          Bs,
          gemm_k_iterations,
          loader_a,
          loader_b,
          mma_op,
          tgp_bm,
          tgp_bn,
          leftover_bk,
          LoopAlignment<false, false, true>{});

      // Do epilogue
      if (use_out_source) {
        if (do_axpby) {
          mma_op.apply_epilogue_safe(
              C,
              addmm_params->ldc,
              addmm_params->fdc,
              short2(tgp_bn, tgp_bm),
              epilogue_op_axpby);
        } else {
          mma_op.apply_epilogue_safe(
              C,
              addmm_params->ldc,
              addmm_params->fdc,
              short2(tgp_bn, tgp_bm),
              epilogue_op_add);
        }
      }

      // Store results to device memory
      return mma_op.store_result_safe(D, params->ldd, short2(tgp_bn, tgp_bm));
    }
  }
}

#define instantiate_gemm(tname, trans_a, trans_b, iname, itype, oname, otype, acctype, bm, bn, bk, wm, wn) \
  template [[host_name("gemm_" #tname "_"  #iname "_" #oname "_" #bm "_" #bn "_" #bk "_" #wm "_" #wn)]] \
  [[kernel]] void gemm<itype, bm, bn, bk, wm, wn, trans_a, trans_b, acctype>( \
      const device itype *A [[buffer(0)]], \
      const device itype *B [[buffer(1)]], \
      const device itype *C [[buffer(2), function_constant(use_out_source)]], \
      device itype *D [[buffer(3)]], \
      const constant GEMMParams* params [[buffer(4)]], \
      const constant GEMMAddMMParams* addmm_params [[buffer(5), function_constant(use_out_source)]], \
      const constant int* batch_shape [[buffer(6)]], \
      const constant size_t* batch_strides [[buffer(7)]], \
      const constant uint32_t* lhs_indices [[buffer(10), function_constant(do_gather)]], \
      const constant uint32_t* rhs_indices [[buffer(11), function_constant(do_gather)]], \
      const constant uint32_t* C_indices [[buffer(12), function_constant(gather_bias)]], \
      const constant int* operand_shape [[buffer(13), function_constant(do_gather)]], \
      const constant size_t* operand_strides [[buffer(14), function_constant(do_gather)]], \
      const constant packed_int3& operand_batch_ndim [[buffer(15), function_constant(do_gather)]], \
      device GEMMDebug * debug [[buffer(16), function_constant(debug_mode)]], \
      uint3 tpig[[thread_position_in_grid]], \
      uint num_simd_group [[simdgroups_per_threadgroup]], \
      uint num_threads_in_simd [[threads_per_simdgroup]], \
      uint simd_lane_id [[thread_index_in_simdgroup]], \
      uint simd_group_id [[simdgroup_index_in_threadgroup]], \
      uint3 tid [[threadgroup_position_in_grid]], \
      uint3 lid [[thread_position_in_threadgroup]]);

#define instantiate_gemm_transpose_helper(iname, itype, oname, otype, acctype, bm, bn, bk, wm, wn) \
    instantiate_gemm(nn, false, false, iname, itype, oname, otype, acctype, bm, bn, bk, wm, wn) \
    instantiate_gemm(nt, false, true , iname, itype, oname, otype, acctype, bm, bn, bk, wm, wn) \
    instantiate_gemm(tn, true , false, iname, itype, oname, otype, acctype, bm, bn, bk, wm, wn) \
    instantiate_gemm(tt, true , true , iname, itype, oname, otype, acctype, bm, bn, bk, wm, wn)

instantiate_gemm_transpose_helper(f32, float, f32, float, float, 32, 32, 16, 2, 2)
instantiate_gemm_transpose_helper(f16, half, f16, half, half, 32, 32, 16, 2, 2)

================================================
FILE: metal/src/kernels/matmul/mlx_gemm/mlx_gemv.metal
================================================
// MLX Kernel extracted from:
// https://github.com/ml-explore/mlx/blob/main/mlx/backend/metal/kernels/gemv.metal
// Copyright © 2023-2024 Apple Inc.

#include <metal_simdgroup>
#include <metal_stdlib>

// #include "mlx/backend/metal/kernels/defines.h"
// #include "mlx/backend/metal/kernels/utils.h"
// #include "mlx/backend/metal/kernels/steel/utils.h"

//////////////////////////////////////////////////////////////////////////////
/// "mlx/backend/metal/kernels/utils.h"
///////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////
// Single Array with generic dims

template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    uint elem,
    constant const int* shape,
    constant const stride_t* strides,
    int ndim) {
  stride_t loc = 0;
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    loc += (elem % shape[i]) * strides[i];
    elem /= shape[i];
  }
  return loc;
}

template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    stride_t elem,
    constant const int* shape,
    constant const stride_t* strides,
    int ndim) {
  stride_t loc = 0;
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    loc += (elem % shape[i]) * strides[i];
    elem /= shape[i];
  }
  return loc;
}

// Non templated version to handle arbitrary dims
template <typename stride_t>
METAL_FUNC stride_t elem_to_loc(
    uint3 elem,
    constant const int* shape,
    constant const stride_t* strides,
    int ndim) {
  stride_t loc = elem.x * strides[ndim - 1] + elem.y * strides[ndim - 2];
  for (int d = ndim - 3; d >= 0; --d) {
    loc += (elem.z % shape[d]) * strides[d];
    elem.z /= shape[d];
  }
  return loc;
}

///////////////////////////////////////////////////////////////////////////////
// Indexing utils
///////////////////////////////////////////////////////////////////////////////

#define MLX_MTL_PRAGMA_UNROLL _Pragma("clang loop unroll(full)")

///////////////////////////////////////////////////////////////////////////////
// SIMD shuffle ops
///////////////////////////////////////////////////////////////////////////////

inline uint64_t simd_shuffle_down(uint64_t data, uint16_t delta) {
  return as_type<uint64_t>(
      metal::simd_shuffle_down(as_type<uint2>(data), delta));
}

inline int64_t simd_shuffle_down(int64_t data, uint16_t delta) {
  return as_type<int64_t>(
      metal::simd_shuffle_down(as_type<uint2>(data), delta));
}


//////////////////////////////////////////////////////////////////////////////
/// "mlx/backend/metal/kernels/steel/utils.h"
///////////////////////////////////////////////////////////////////////////////

METAL_FUNC ulong2 elem_to_loc_broadcast(
    uint elem,
    constant const int* shape,
    constant const size_t* a_strides,
    constant const size_t* b_strides,
    int ndim) {
  ulong loc_a{0};
  ulong loc_b{0};
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    int pos_in_dim = (elem % shape[i]);
    elem /= shape[i];
    loc_a += pos_in_dim * a_strides[i];
    loc_b += pos_in_dim * b_strides[i];
  }
  return ulong2(loc_a, loc_b);
}

METAL_FUNC ulong3 elem_to_loc_broadcast(
    uint elem,
    constant const int* shape,
    constant const size_t* a_strides,
    constant const size_t* b_strides,
    constant const size_t* c_strides,
    int ndim) {
  ulong loc_a{0};
  ulong loc_b{0};
  ulong loc_c{0};
  for (int i = ndim - 1; i >= 0 && elem > 0; --i) {
    int pos_in_dim = (elem % shape[i]);
    elem /= shape[i];
    loc_a += pos_in_dim * a_strides[i];
    loc_b += pos_in_dim * b_strides[i];
    loc_c += pos_in_dim * c_strides[i];
  }
  return ulong3(loc_a, loc_b, loc_c);
}


using namespace metal;

///////////////////////////////////////////////////////////////////////////////
/// Matrix vector multiplication
///////////////////////////////////////////////////////////////////////////////

#define MLX_MTL_CONST static constant constexpr const

template <
    typename T,
    const int BM, /* Threadgroup rows (in simdgroups) */
    const int BN, /* Threadgroup cols (in simdgroups) */
    const int SM, /* Simdgroup rows (in threads) */
    const int SN, /* Simdgroup cols (in threads) */
    const int TM, /* Thread rows (in elements) */
    const int TN, /* Thread cols (in elements) */
    const bool kDoAxpby> /* Do out = alpha * out + beta * bias */
struct GEMVKernel {
  MLX_MTL_CONST int threadsM = BM * SM;
  MLX_MTL_CONST int threadsN = BN * SN;

  MLX_MTL_CONST int blockM = threadsM * TM;
  MLX_MTL_CONST int blockN = threadsN * TN;

  static_assert(SM * SN == 32, "simdgroup can only have 32 threads");

  static_assert(
      SN == 8 || SN == 16 || SN == 32,
      "gemv block must have a width of 8, 16, or 32");

  // - The matrix of size (M = out_vec_size, K = in_vec_size) is divided up
  //   into blocks of (blockM, blockN) divided among threadgroups
  // - Every thread works on a block of (TM, TN)
  // - We assume each threadgroup has (threadsN, threadsM, 1) threads
  //
  // 1. A thread loads TN elements each from mat along TM rows
  //    and the corresponding scalar from the vector
  // 2. The thread then multiplies and adds to accumulate its local result for
  //    the block
  // 3. At the end, each thread has accumulated results over all blocks across
  //    the rows. These are then summed up across the threadgroup
  // 4. Each threadgroup writes its accumulated blockM outputs
  //
  // Edge case handling:
  // - The threadgroup with the largest tid has blocks that exceed the matrix
  //   * The blocks that start outside the matrix are never read (thread results
  //     remain zero)
  //   * The last thread that partially overlaps with the matrix is shifted
  //     inwards such that the thread block fits exactly in the matrix

  MLX_MTL_CONST short tgp_mem_size = BN > 1 ? BN*(blockM + TM) : 0;
  MLX_MTL_CONST bool needs_tgp_reduction = BN > 1;

  static METAL_FUNC void
  load_unsafe(const device T* src, thread T dst[TN], const int src_offset = 0) {
    MLX_MTL_PRAGMA_UNROLL
    for (int tn = 0; tn < TN; tn++) {
      dst[tn] = src[src_offset + tn];
    }
  }

  static METAL_FUNC void load_safe(
      const device T* src,
      thread T dst[TN],
      const int src_offset = 0,
      const int src_size = TN) {
    if (src_offset + TN <= src_size) {
      MLX_MTL_PRAGMA_UNROLL
      for (int tn = 0; tn < TN; tn++) {
        dst[tn] = src[src_offset + tn];
      }
    } else { // Edgecase
      MLX_MTL_PRAGMA_UNROLL
      for (int tn = 0; tn < TN; tn++) {
        dst[tn] = src_offset + tn < src_size ? src[src_offset + tn] : 0;
      }
    }
  }

  static METAL_FUNC void run(
      const device T* mat [[buffer(0)]],
      const device T* in_vec [[buffer(1)]],
      const device T* bias [[buffer(2)]],
      device T* out_vec [[buffer(3)]],
      const constant int& in_vec_size [[buffer(4)]],
      const constant int& out_vec_size [[buffer(5)]],
      const constant int& matrix_ld [[buffer(6)]],
      const constant float& alpha [[buffer(7)]],
      const constant float& beta [[buffer(8)]],
      const constant int& bias_stride [[buffer(14)]],
      threadgroup T* tgp_memory [[threadgroup(0)]],
      uint3 tid [[threadgroup_position_in_grid]],
      uint3 lid [[thread_position_in_threadgroup]],
      uint simd_gid [[simdgroup_index_in_threadgroup]],
      uint simd_lid [[thread_index_in_simdgroup]]) {
    // Appease compiler
    (void)lid;

    // Thread local accumulation results
    thread T result[TM] = {0};
    thread T inter[TN];
    thread T v_coeff[TN];

    const int thrM = SN != 32 ? simd_lid / SN : 0;
    const int thrN = SN != 32 ? simd_lid % SN : int(simd_lid);

    const int sgN = BN != 1 ? (simd_gid % BN) : 0;

    const int simdM = BN != 1 ? SM * (simd_gid / BN) : int(SM * simd_gid);
    const int simdN = BN != 1 ? SN * (simd_gid % BN) : 0;

    int bm = (simdM + thrM) * TM;
    int bn = (simdN + thrN) * TN;

    // Block position
    int out_row = tid.x * blockM + bm;

    // Exit simdgroup if rows out of bound
    if (out_row >= out_vec_size)
      return;

    // Adjust tail simdgroup to ensure in bound reads
    out_row = out_row + TM <= out_vec_size ? out_row : out_vec_size - TM;

    // Advance matrix
    mat += out_row * matrix_ld;

    constexpr const uniform<int> loop_stride = make_uniform(blockN);
    const uniform<int> in_size = make_uniform(in_vec_size);
    const uniform<int> n_iter = in_size / loop_stride;
    const uniform<int> last_iter = loop_stride * n_iter;
    const uniform<int> leftover = in_size - last_iter;

    // Loop over in_vec in blocks of blockN
    for (int i = 0; i < n_iter; ++i) {
      load_unsafe(in_vec, v_coeff, bn);

      // Per thread work loop
      int mat_offset = 0;
      MLX_MTL_PRAGMA_UNROLL
      for (int tm = 0; tm < TM; tm++) {
        // Load for the row
        load_unsafe(mat, inter, mat_offset + bn);

        // Accumulate results
        MLX_MTL_PRAGMA_UNROLL
        for (int tn = 0; tn < TN; tn++) {
          result[tm] += inter[tn] * v_coeff[tn];
        }

        mat_offset += matrix_ld;
      }

      bn += blockN;
    }

    if (leftover > 0) {
      load_safe(in_vec, v_coeff, bn, in_size);

      // Per thread work loop
      MLX_MTL_PRAGMA_UNROLL
      for (int tm = 0; tm < TM; tm++) {
        // Load for the row
        load_safe(&mat[tm * matrix_ld], inter, bn, in_size);

        // Accumulate results
        MLX_MTL_PRAGMA_UNROLL
        for (int tn = 0; tn < TN; tn++) {
          result[tm] += inter[tn] * v_coeff[tn];
        }
      }
    }

    // Simdgroup accumulations
    MLX_MTL_PRAGMA_UNROLL
    for (int tm = 0; tm < TM; tm++) {
      MLX_MTL_PRAGMA_UNROLL
      for (ushort sn = (SN / 2); sn >= 1; sn >>= 1) {
        result[tm] += simd_shuffle_down(result[tm], sn);
      }
    }

    // Threadgroup accumulation results
    if (needs_tgp_reduction) {
      threadgroup T* tgp_results = tgp_memory + sgN * (blockM + TM) + bm;
      if (thrN == 0) {
        MLX_MTL_PRAGMA_UNROLL
        for (int tm = 0; tm < TM; tm++) {
          tgp_results[tm] = result[tm];
        }

        threadgroup_barrier(mem_flags::mem_none);

        if (sgN == 0) {
          MLX_MTL_PRAGMA_UNROLL
          for (int sgn = 1; sgn < BN; sgn++) {
            MLX_MTL_PRAGMA_UNROLL
            for (int tm = 0; tm < TM; tm++) {
              result[tm] += tgp_results[sgn * (blockM + TM) + tm];
            }
          }
        }
      }
    }

    // Write outputs
    if (simdN == 0 && thrN == 0) {
      MLX_MTL_PRAGMA_UNROLL
      for (int tm = 0; tm < TM; tm++) {
        if (kDoAxpby) {
          out_vec[out_row + tm] = static_cast<T>(alpha) * result[tm] +
              static_cast<T>(beta) * bias[(out_row + tm) * bias_stride];
        } else {
          out_vec[out_row + tm] = result[tm];
        }
      }
    }
  }
};

///////////////////////////////////////////////////////////////////////////////
/// Vector matrix multiplication
///////////////////////////////////////////////////////////////////////////////

template <
    typename T,
    const int BM, /* Threadgroup rows (in simdgroups) */
    const int BN, /* Threadgroup cols (in simdgroups) */
    const int SM, /* Simdgroup rows (in threads) */
    const int SN, /* Simdgroup cols (in threads) */
    const int TM, /* Thread rows (in elements) */
    const int TN, /* Thread cols (in elements) */
    const bool kDoAxpby> /* Do out = alpha * out + beta * bias */
struct GEMVTKernel {
  MLX_MTL_CONST int threadsM = BM * SM;
  MLX_MTL_CONST int threadsN = BN * SN;

  MLX_MTL_CONST int blockM = threadsM * TM;
  MLX_MTL_CONST int blockN = threadsN * TN;

  static_assert(SM * SN == 32, "simdgroup can only have 32 threads");

  // - The matrix of size (M = in_vec_size, N = out_vec_size) is divided up
  //   into blocks of (blockM, blockN) divided among threadgroups
  // - Every thread works on a block of (TM, TN)
  // - We assume each threadgroup has (threadsN, threadsM, 1) threads
  //
  // 1. A thread loads TN elements each from mat along TM contiguous rows
  //    and the corresponding scalar from the vector
  // 2. The thread then accumulates its local result for the block
  // 3. At the end, each thread has accumulated results over all blocks across
  //    the rows. These are then summed up across the threadgroup
  // 4. Each threadgroup writes its accumulated BN * TN outputs
  //
  // Edge case handling:
  // - The threadgroup with the largest tid has blocks that exceed the matrix
  //   * The blocks that start outside the matrix are never read (thread results
  //     remain zero)
  //   * The last thread that partially overlaps with the matrix is shifted
  //     inwards such that the thread block fits exactly in the matrix

  MLX_MTL_CONST short tgp_mem_size = BM > 1 ? BM*(blockN + TN) : 0;
  MLX_MTL_CONST bool needs_tgp_reduction = BM > 1;

  static METAL_FUNC void run(
      const device T* mat [[buffer(0)]],
      const device T* in_vec [[buffer(1)]],
      const device T* bias [[buffer(2)]],
      device T* out_vec [[buffer(3)]],
      const constant int& in_vec_size [[buffer(4)]],
      const constant int& out_vec_size [[buffer(5)]],
      const constant int& marix_ld [[buffer(6)]],
      const constant float& alpha [[buffer(7)]],
      const constant float& beta [[buffer(8)]],
      const constant int& bias_stride [[buffer(14)]],
      threadgroup T* tgp_memory [[threadgroup(0)]],
      uint3 tid [[threadgroup_position_in_grid]],
      uint3 lid [[thread_position_in_threadgroup]],
      uint simd_gid [[simdgroup_index_in_threadgroup]],
      uint simd_lid [[thread_index_in_simdgroup]]) {
    // Appease compiler
    (void)lid;

    // Thread local accumulation results
    T result[TN] = {0};
    T inter[TN];
    T v_coeff[TM];

    const int thrM = SN != 32 ? simd_lid / SN : 0;
    const int thrN = SN != 32 ? simd_lid % SN : int(simd_lid);

    const int sgM = BN != 1 ? (simd_gid / BN) : int(simd_gid);
    const int sgN = BN != 1 ? (simd_gid % BN) : 0;

    const int simdM = SM * sgM;
    const int simdN = SN * sgN;

    int cm = (simdM + thrM);
    int cn = (simdN + thrN);

    int bm = cm * TM;
    int bn = cn * TN;

    int out_col = tid.x * blockN + bn;

    constexpr const uniform<int> loop_stride = make_uniform(blockM);
    const uniform<int> in_size = make_uniform(in_vec_size);
    const uniform<int> n_iter = in_size / loop_stride;
    const uniform<int> last_iter = loop_stride * n_iter;
    const uniform<int> leftover = in_size - last_iter;

    // Edgecase handling
    if (out_col < out_vec_size) {
      out_col = out_col + TN < out_vec_size ? out_col : out_vec_size - TN;

      // Per thread accumulation main loop
      for (int i = 0; i < n_iter; ++i) {
        // Adding a threadgroup_barrier improves performance slightly
        // This is possibly it may help exploit cache better
        threadgroup_barrier(mem_flags::mem_none);

        MLX_MTL_PRAGMA_UNROLL
        for (int tm = 0; tm < TM; tm++) {
          v_coeff[tm] = in_vec[bm + tm];
        }

        MLX_MTL_PRAGMA_UNROLL
        for (int tm = 0; tm < TM; tm++) {
          for (int tn = 0; tn < TN; tn++) {
            inter[tn] = mat[(bm + tm) * marix_ld + out_col + tn];
          }
          for (int tn = 0; tn < TN; tn++) {
            result[tn] += v_coeff[tm] * inter[tn];
          }
        }

        bm += blockM;
      }

      if (leftover > 0) {
        for (int tm = 0; tm < TM && bm + tm < in_vec_size; tm++) {
          v_coeff[tm] = in_vec[bm + tm];

          MLX_MTL_PRAGMA_UNROLL
          for (int tn = 0; tn < TN; tn++) {
            inter[tn] = mat[(bm + tm) * marix_ld + out_col + tn];
          }

          MLX_MTL_PRAGMA_UNROLL
          for (int tn = 0; tn < TN; tn++) {
            result[tn] += v_coeff[tm] * inter[tn];
          }
        }
      }
    }

    // Simdgroup accumulations
    MLX_MTL_PRAGMA_UNROLL
    for (int tn = 0; tn < TN; tn++) {
      MLX_MTL_PRAGMA_UNROLL
      for (ushort sm = (SM / 2); sm >= 1; sm >>= 1) {
        result[tn] += simd_shuffle_down(result[tn], SN * sm);
      }
    }

    // Threadgroup accumulation results
    if (needs_tgp_reduction) {
      threadgroup T* tgp_results = tgp_memory + sgM * (blockN + TN) + bn;
      if (thrM == 0) {
        MLX_MTL_PRAGMA_UNROLL
        for (int tn = 0; tn < TN; tn++) {
          tgp_results[tn] = result[tn];
        }

        threadgroup_barrier(mem_flags::mem_none);

        if (sgM == 0) {
          MLX_MTL_PRAGMA_UNROLL
          for (int sgm = 1; sgm < BM; sgm++) {
            MLX_MTL_PRAGMA_UNROLL
            for (int tn = 0; tn < TN; tn++) {
              result[tn] += tgp_results[sgm * (blockN + TN) + tn];
            }
          }
        }
      }
    }

    // Threadgroup accumulation and writing out results
    if (cm == 0 && out_col < out_vec_size) {
      MLX_MTL_PRAGMA_UNROLL
      for (int j = 0; j < TN; j++) {
        if (kDoAxpby) {
          out_vec[out_col + j] = static_cast<T>(alpha) * result[j] +
              static_cast<T>(beta) * bias[(out_col + j) * bias_stride];
        } else {
          out_vec[out_col + j] = result[j];
        }
      }
    }
  }
};

///////////////////////////////////////////////////////////////////////////////
/// Matrix vector multiplication
///////////////////////////////////////////////////////////////////////////////

template <
    typename T,
    const int BM, /* Threadgroup rows (in simdgroups) */
    const int BN, /* Threadgroup cols (in simdgroups) */
    const int SM, /* Simdgroup rows (in threads) */
    const int SN, /* Simdgroup cols (in threads) */
    const int TM, /* Thread rows (in elements) */
    const int TN, /* Thread cols (in elements) */
    const bool kDoNCBatch, /* Batch ndim > 1 */
    const bool kDoAxpby> /* Do out = alpha * out + beta * bias */
[[kernel, max_total_threads_per_threadgroup(BM* BN * 32)]] void gemv(
    const device T* mat [[buffer(0)]],
    const device T* in_vec [[buffer(1)]],
    const device T* bias [[buffer(2)]],
    device T* out_vec [[buffer(3)]],
    const constant int& in_vec_size [[buffer(4)]],
    const constant int& out_vec_size [[buffer(5)]],
    const constant int& marix_ld [[buffer(6)]],
    const constant float& alpha [[buffer(7)]],
    const constant float& beta [[buffer(8)]],
    const constant int& batch_ndim [[buffer(9)]],
    const constant int* batch_shape [[buffer(10)]],
    const constant size_t* vector_batch_stride [[buffer(11)]],
    const constant size_t* matrix_batch_stride [[buffer(12)]],
    const constant size_t* bias_batch_stride [[buffer(13)]],
    const constant int& bias_stride [[buffer(14)]],
    uint3 tid [[threadgroup_position_in_grid]],
    uint3 lid [[thread_position_in_threadgroup]],
    uint simd_gid [[simdgroup_index_in_threadgroup]],
    uint simd_lid [[thread_index_in_simdgroup]]) {
  using gemv_kernel = GEMVKernel<T, BM, BN, SM, SN, TM, TN, kDoAxpby>;
  threadgroup T tgp_memory
      [gemv_kernel::tgp_mem_size == 0 ? 1 : gemv_kernel::tgp_mem_size];

  // Update batch offsets
  if (kDoNCBatch) {
    in_vec += elem_to_loc(tid.z, batch_shape, vector_batch_stride, batch_ndim);
    mat += elem_to_loc(tid.z, batch_shape, matrix_batch_stride, batch_ndim);

    if (kDoAxpby) {
      bias += elem_to_loc(tid.z, batch_shape, bias_batch_stride, batch_ndim);
    }

  } else {
    in_vec += tid.z * vector_batch_stride[0];
    mat += tid.z * matrix_batch_stride[0];

    if (kDoAxpby) {
      bias += tid.z * bias_batch_stride[0];
    }
  }

  out_vec += tid.z * out_vec_size;

  gemv_kernel::run(
      mat,
      in_vec,
      bias,
      out_vec,
      in_vec_size,
      out_vec_size,
      marix_ld,
      alpha,
      beta,
      bias_stride,
      gemv_kernel::tgp_mem_size == 0 ? nullptr : tgp_memory,
      tid,
      lid,
      simd_gid,
      simd_lid);
}

#define instantiate_gemv_helper(                                             \
    name, itype, bm, bn, sm, sn, tm, tn, nc, axpby)                          \
  template [[host_name("gemv_" #name "_bm" #bm "_bn" #bn "_sm" #sm "_sn" #sn \
                       "_tm" #tm "_tn" #tn "_nc" #nc                         \
                       "_axpby" #axpby)]] [[kernel]] void                    \
  gemv<itype, bm, bn, sm, sn, tm, tn, nc, axpby>(                            \
      const device itype* mat [[buffer(0)]],                                 \
      const device itype* in_vec [[buffer(1)]],                              \
      const device itype* bias [[buffer(2)]],                                \
      device itype* out_vec [[buffer(3)]],                                   \
      const constant int& in_vec_size [[buffer(4)]],                         \
      const constant int& out_vec_size [[buffer(5)]],                        \
      const constant int& marix_ld [[buffer(6)]],                            \
      const constant float& alpha [[buffer(7)]],                             \
      const constant float& beta [[buffer(8)]],                              \
      const constant int& batch_ndim [[buffer(9)]],                          \
      const constant int* batch_shape [[buffer(10)]],                        \
      const constant size_t* vector_batch_stride [[buffer(11)]],             \
      const constant size_t* matrix_batch_stride [[buffer(12)]],             \
      const constant size_t* bias_batch_stride [[buffer(13)]],               \
      const constant int& bias_stride [[buffer(14)]],                        \
      uint3 tid [[threadgroup_position_in_grid]],                            \
      uint3 lid [[thread_position_in_threadgroup]],                          \
      uint simd_gid [[simdgroup_index_in_threadgroup]],                      \
      uint simd_lid [[thread_index_in_simdgroup]]);

// clang-format off
#define instantiate_gemv(name, itype, bm, bn, tm, tn)              \
  instantiate_gemv_helper(name, itype, bm, 1, 1, bn, tm, tn, 0, 0) \
  instantiate_gemv_helper(name, itype, bm, 1, 1, bn, tm, tn, 0, 1) \
  instantiate_gemv_helper(name, itype, bm, 1, 1, bn, tm, tn, 1, 0) \
  instantiate_gemv_helper(name, itype, bm, 1, 1, bn, tm, tn, 1, 1) // clang-format on

// clang-format off
#define instantiate_gemv_blocks(name, itype) \
  instantiate_gemv(name, itype, 4, 32, 1, 4) \
  instantiate_gemv(name, itype, 4, 32, 4, 4) \
  instantiate_gemv(name, itype, 8, 32, 4, 4) // clang-format on

instantiate_gemv_blocks(f32, float);
instantiate_gemv_blocks(f16, half);

template <
    typename T,
    const int BM, /* Threadgroup rows (in simdgroups) */
    const int BN, /* Threadgroup cols (in simdgroups) */
    const int SM, /* Simdgroup rows (in threads) */
    const int SN, /* Simdgroup cols (in threads) */
    const int TM, /* Thread rows (in elements) */
    const int TN> /* Thread cols (in elements) */
[[kernel, max_total_threads_per_threadgroup(BM* BN * 32)]] void gemv_gather(
    const device T* mat [[buffer(0)]],
    const device T* in_vec [[buffer(1)]],
    const device T* bias [[buffer(2)]],
    device T* out_vec [[buffer(3)]],
    const constant int& in_vec_size [[buffer(4)]],
    const constant int& out_vec_size [[buffer(5)]],
    const constant int& marix_ld [[buffer(6)]],
    const constant float& alpha [[buffer(7)]],
    const constant float& beta [[buffer(8)]],
    const constant int& batch_ndim [[buffer(9)]],
    const constant int* batch_shape [[buffer(10)]],
    const constant size_t* index_batch_strides [[buffer(11)]],
    const constant int& vector_batch_ndim [[buffer(12)]],
    const constant int* vector_batch_shape [[buffer(13)]],
    const constant size_t* vector_batch_stride [[buffer(14)]],
    const constant int& matrix_batch_ndim [[buffer(15)]],
    const constant int* matrix_batch_shape [[buffer(16)]],
    const constant size_t* matrix_batch_stride [[buffer(17)]],
    const constant uint32_t* vec_indices [[buffer(18)]],
    const constant uint32_t* mat_indices [[buffer(19)]],
    uint3 tid [[threadgroup_position_in_grid]],
    uint3 lid [[thread_position_in_threadgroup]],
    uint simd_gid [[simdgroup_index_in_threadgroup]],
    uint simd_lid [[thread_index_in_simdgroup]]) {
  using gemv_kernel = GEMVKernel<T, BM, BN, SM, SN, TM, TN, false>;
  threadgroup T tgp_memory
      [gemv_kernel::tgp_mem_size == 0 ? 1 : gemv_kernel::tgp_mem_size];

  uint32_t indx_vec;
  uint32_t indx_mat;

  // Update batch offsets
  if (batch_ndim > 1) {
    const constant size_t* veci_bstrides = index_batch_strides;
    const constant size_t* mati_bstrides = index_batch_strides + batch_ndim;

    ulong2 batch_offsets = elem_to_loc_broadcast(
        tid.z, batch_shape, veci_bstrides, mati_bstrides, batch_ndim);

    indx_vec = vec_indices[batch_offsets.x];
    indx_mat = mat_indices[batch_offsets.y];

  } else {
    indx_vec = vec_indices[index_batch_strides[0] * tid.z];
    indx_mat = mat_indices[index_batch_strides[batch_ndim] * tid.z];
  }

  if (vector_batch_ndim > 1) {
    in_vec += elem_to_loc(
        indx_vec, vector_batch_shape, vector_batch_stride, vector_batch_ndim);
  } else {
    in_vec += indx_vec * vector_batch_stride[0];
  }

  if (matrix_batch_ndim > 1) {
    mat += elem_to_loc(
        indx_mat, matrix_batch_shape, matrix_batch_stride, matrix_batch_ndim);
  } else {
    mat += indx_mat * matrix_batch_stride[0];
  }

  out_vec += tid.z * out_vec_size;

  gemv_kernel::run(
      mat,
      in_vec,
      bias,
      out_vec,
      in_vec_size,
      out_vec_size,
      marix_ld,
      alpha,
      beta,
      batch_ndim, // Not used
      gemv_kernel::tgp_mem_size == 0 ? nullptr : tgp_memory,
      tid,
      lid,
      simd_gid,
      simd_lid);
}

#define instantiate_gemv_bs_helper(nm, itype, bm, bn, sm, sn, tm, tn)   \
  template [[host_name("gemv_gather_" #nm "_bm" #bm "_bn" #bn "_sm" #sm \
                       "_sn" #sn "_tm" #tm "_tn" #tn)]] [[kernel]] void \
  gemv_gather<itype, bm, bn, sm, sn, tm, tn>(                           \
      const device itype* mat [[buffer(0)]],                            \
      const device itype* in_vec [[buffer(1)]],                         \
      const device itype* bias [[buffer(2)]],                           \
      device itype* out_vec [[buffer(3)]],                              \
      const constant int& in_vec_size [[buffer(4)]],                    \
      const constant int& out_vec_size [[buffer(5)]],                   \
      const constant int& marix_ld [[buffer(6)]],                       \
      const constant float& alpha [[buffer(7)]],                        \
      const constant float& beta [[buffer(8)]],                         \
      const constant int& batch_ndim [[buffer(9)]],                     \
      const constant int* batch_shape [[buffer(10)]],                   \
      const constant size_t* index_batch_strides [[buffer(11)]],        \
      const constant int& vector_batch_ndim [[buffer(12)]],             \
      const constant int* vector_batch_shape [[buffer(13)]],            \
      const constant size_t* vector_batch_stride [[buffer(14)]],        \
      const constant int& matrix_batch_ndim [[buffer(15)]],             \
      const constant int* matrix_batch_shape [[buffer(16)]],            \
      const constant size_t* matrix_batch_stride [[buffer(17)]],        \
      const constant uint32_t* vec_indices [[buffer(18)]],              \
      const constant uint32_t* mat_indices [[buffer(19)]],              \
      uint3 tid [[threadgroup_position_in_grid]],                       \
      uint3 lid [[thread_position_in_threadgroup]],                     \
      uint simd_gid [[simdgroup_index_in_threadgroup]],                 \
      uint simd_lid [[thread_index_in_simdgroup]]);

// clang-format off
#define instantiate_gemv_bs_blocks(name, itype)        \
  instantiate_gemv_bs_helper(name, itype, 4, 1, 1, 32, 1, 4) \
  instantiate_gemv_bs_helper(name, itype, 4, 1, 1, 32, 4, 4) \
  instantiate_gemv_bs_helper(name, itype, 8, 1, 1, 32, 4, 4) // clang-format on

instantiate_gemv_bs_blocks(f32, float);
instantiate_gemv_bs_blocks(f16, half);

///////////////////////////////////////////////////////////////////////////////
/// Vector matrix multiplication
///////////////////////////////////////////////////////////////////////////////

template <
    typename T,
    const int BM, /* Threadgroup rows (in simdgroups) */
    const int BN, /* Threadgroup cols (in simdgroups) */
    const int SM, /* Simdgroup rows (in threads) */
    const int SN, /* Simdgroup cols (in threads) */
    const int TM, /* Thread rows (in elements) */
    const int TN, /* Thread cols (in elements) */
    const bool kDoNCBatch, /* Batch ndim > 1 */
    const bool kDoAxpby> /* Do out = alpha * out + beta * bias */
[[kernel, max_total_threads_per_threadgroup(BM* BN * 32)]] void gemv_t(
    const device T* mat [[buffer(0)]],
    const device T* in_vec [[buffer(1)]],
    const device T* bias [[buffer(2)]],
    device T* out_vec [[buffer(3)]],
    const constant int& in_vec_size [[buffer(4)]],
    const constant int& out_vec_size [[buffer(5)]],
    const constant int& marix_ld [[buffer(6)]],
    const constant float& alpha [[buffer(7)]],
    const constant float& beta [[buffer(8)]],
    const constant int& batch_ndim [[buffer(9)]],
    const constant int* batch_shape [[buffer(10)]],
    const constant size_t* vector_batch_stride [[buffer(11)]],
    const constant size_t* matrix_batch_stride [[buffer(12)]],
    const constant size_t* bias_batch_stride [[buffer(13)]],
    const constant int& bias_stride [[buffer(14)]],
    uint3 tid [[threadgroup_position_in_grid]],
    uint3 lid [[thread_position_in_threadgroup]],
    uint simd_gid [[simdgroup_index_in_threadgroup]],
    uint simd_lid [[thread_index_in_simdgroup]]) {
  using gemv_kernel = GEMVTKernel<T, BM, BN, SM, SN, TM, TN, kDoAxpby>;
  threadgroup T tgp_memory
      [gemv_kernel::tgp_mem_size == 0 ? 1 : gemv_kernel::tgp_mem_size];

  // Update batch offsets
  if (kDoNCBatch) {
    in_vec += elem_to_loc(tid.z, batch_shape, vector_batch_stride, batch_ndim);
    mat += elem_to_loc(tid.z, batch_shape, matrix_batch_stride, batch_ndim);

    if (kDoAxpby) {
      bias += elem_to_loc(tid.z, batch_shape, bias_batch_stride, batch_ndim);
    }

  } else {
    in_vec += tid.z * vector_batch_stride[0];
    mat += tid.z * matrix_batch_stride[0];

    if (kDoAxpby) {
      bias += tid.z * bias_batch_stride[0];
    }
  }

  out_vec += tid.z * out_vec_size;

  gemv_kernel::run(
      mat,
      in_vec,
      bias,
      out_vec,
      in_vec_size,
      out_vec_size,
      marix_ld,
      alpha,
      beta,
      bias_stride,
      gemv_kernel::tgp_mem_size == 0 ? nullptr : tgp_memory,
      tid,
      lid,
      simd_gid,
      simd_lid);
}

#define instantiate_gemv_t_helper(                                             \
    name, itype, bm, bn, sm, sn, tm, tn, nc, axpby)                            \
  template [[host_name("gemv_t_" #name "_bm" #bm "_bn" #bn "_sm" #sm "_sn" #sn \
                       "_tm" #tm "_tn" #tn "_nc" #nc                           \
                       "_axpby" #axpby)]] [[kernel]] void                      \
  gemv_t<itype, bm, bn, sm, sn, tm, tn, nc, axpby>(                            \
      const device itype* mat [[buffer(0)]],                                   \
      const device itype* in_vec [[buffer(1)]],                                \
      const device itype* bias [[buffer(2)]],                                  \
      device itype* out_vec [[buffer(3)]],                                     \
      const constant int& in_vec_size [[buffer(4)]],                           \
      const constant int& out_vec_size [[buffer(5)]],                          \
      const constant int& marix_ld [[buffer(6)]],                              \
      const constant float& alpha [[buffer(7)]],                               \
      const constant float& beta [[buffer(8)]],                                \
      const constant int& batch_ndim [[buffer(9)]],                            \
      const constant int* batch_shape [[buffer(10)]],                          \
      const constant size_t* vector_batch_stride [[buffer(11)]],               \
      const constant size_t* matrix_batch_stride [[buffer(12)]],               \
      const constant size_t* bias_batch_stride [[buffer(13)]],                 \
      const constant int& bias_stride [[buffer(14)]],                          \
      uint3 tid [[threadgroup_position_in_grid]],                              \
      uint3 lid [[thread_position_in_threadgroup]],                            \
      uint simd_gid [[simdgroup_index_in_threadgroup]],                        \
      uint simd_lid [[thread_index_in_simdgroup]]);

// clang-format off
#define instantiate_gemv_t(name, itype, bm, bn, sm, sn, tm, tn)        \
  instantiate_gemv_t_helper(name, itype, bm, bn, sm, sn, tm, tn, 0, 0) \
  instantiate_gemv_t_helper(name, itype, bm, bn, sm, sn, tm, tn, 0, 1) \
  instantiate_gemv_t_helper(name, itype, bm, bn, sm, sn, tm, tn, 1, 0) \
  instantiate_gemv_t_helper(name, itype, bm, bn, sm, sn, tm, tn, 1, 1) // clang-format on

// clang-format off
#define instantiate_gemv_t_blocks(name, itype) \
  instantiate_gemv_t(name, itype, 1, 2,  8, 4, 4, 1) \
  instantiate_gemv_t(name, itype, 1, 2,  8, 4, 4, 4) \
  instantiate_gemv_t(name, itype, 1, 4,  8, 4, 4, 4) \
  instantiate_gemv_t(name, itype, 1, 16, 8, 4, 4, 4) \
  instantiate_gemv_t(name, itype, 1, 16, 4, 8, 4, 4) // clang-format on

// clang-format off
instantiate_gemv_t_blocks(f32, float);
instantiate_gemv_t_blocks(f16, half);

template <
    typename T,
    const int BM, /* Threadgroup rows (in simdgroups) */
    const int BN, /* Threadgroup cols (in simdgroups) */
    const int SM, /* Simdgroup rows (in threads) */
    const int SN, /* Simdgroup cols (in threads) */
    const int TM, /* Thread rows (in elements) */
    const int TN> /* Thread cols (in elements) */
[[kernel, max_total_threads_per_threadgroup(BM* BN * 32)]] void gemv_t_gather(
    const device T* mat [[buffer(0)]],
    const device T* in_vec [[buffer(1)]],
    const device T* bias [[buffer(2)]],
    device T* out_vec [[buffer(3)]],
    const constant int& in_vec_size [[buffer(4)]],
    const constant int& out_vec_size [[buffer(5)]],
    const constant int& marix_ld [[buffer(6)]],
    const constant float& alpha [[buffer(7)]],
    const constant float& beta [[buffer(8)]],
    const constant int& batch_ndim [[buffer(9)]],
    const constant int* batch_shape [[buffer(10)]],
    const constant size_t* index_batch_strides [[buffer(11)]],
    const constant int& vector_batch_ndim [[buffer(12)]],
    const constant int* vector_batch_shape [[buffer(13)]],
    const constant size_t* vector_batch_stride [[buffer(14)]],
    const constant int& matrix_batch_ndim [[buffer(15)]],
    const constant int* matrix_batch_shape [[buffer(16)]],
    const constant size_t* matrix_batch_stride [[buffer(17)]],
    const constant uint32_t* vec_indices [[buffer(18)]],
    const constant uint32_t* mat_indices [[buffer(19)]],
    uint3 tid [[threadgroup_position_in_grid]],
    uint3 lid [[thread_position_in_threadgroup]],
    uint simd_gid [[simdgroup_index_in_threadgroup]],
    uint simd_lid [[thread_index_in_simdgroup]]) {
  using gemv_kernel = GEMVTKernel<T, BM, BN, SM, SN, TM, TN, false>;
  threadgroup T tgp_memory
      [gemv_kernel::tgp_mem_size == 0 ? 1 : gemv_kernel::tgp_mem_size];

  uint32_t indx_vec;
  uint32_t indx_mat;

  // Update batch offsets
  if (batch_ndim > 1) {
    const constant size_t* veci_bstrides = index_batch_strides;
    const constant size_t* mati_bstrides = index_batch_strides + batch_ndim;

    ulong2 batch_offsets = elem_to_loc_broadcast(
        tid.z, batch_shape, veci_bstrides, mati_bstrides, batch_ndim);

    indx_vec = vec_indices[batch_offsets.x];
    indx_mat = mat_indices[batch_offsets.y];

  } else {
    indx_vec = vec_indices[index_batch_strides[0] * tid.z];
    indx_mat = mat_indices[index_batch_strides[batch_ndim] * tid.z];
  }

  if (vector_batch_ndim > 1) {
    in_vec += elem_to_loc(
        indx_vec, vector_batch_shape, vector_batch_stride, vector_batch_ndim);
  } else {
    in_vec += indx_vec * vector_batch_stride[0];
  }

  if (matrix_batch_ndim > 1) {
    mat += elem_to_loc(
        indx_mat, matrix_batch_shape, matrix_batch_stride, matrix_batch_ndim);
  } else {
    mat += indx_mat * matrix_batch_stride[0];
  }

  out_vec += tid.z * out_vec_size;

  gemv_kernel::run(
      mat,
      in_vec,
      bias,
      out_vec,
      in_vec_size,
      out_vec_size,
      marix_ld,
      alpha,
      beta,
      batch_ndim, // Not used,
      gemv_kernel::tgp_mem_size == 0 ? nullptr : tgp_memory,
      tid,
      lid,
      simd_gid,
      simd_lid);
}

#define instantiate_gemv_t_bs_helper(nm, itype, bm, bn, sm, sn, tm, tn)   \
  template [[host_name("gemv_t_gather_" #nm "_bm" #bm "_bn" #bn "_sm" #sm \
                       "_sn" #sn "_tm" #tm "_tn" #tn)]] [[kernel]] void   \
  gemv_t_gather<itype, bm, bn, sm, sn, tm, tn>(                           \
      const device itype* mat [[buffer(0)]],                              \
      const device itype* in_vec [[buffer(1)]],                           \
      const device itype* bias [[buffer(2)]],                             \
      device itype* out_vec [[buffer(3)]],                                \
      const constant int& in_vec_size [[buffer(4)]],                      \
      const constant int& out_vec_size [[buffer(5)]],                     \
      const constant int& marix_ld [[buffer(6)]],                         \
      const constant float& alpha [[buffer(7)]],                          \
      const constant float& beta [[buffer(8)]],                           \
      const constant int& batch_ndim [[buffer(9)]],                       \
      const constant int* batch_shape [[buffer(10)]],                     \
      const constant size_t* index_batch_strides [[buffer(11)]],          \
      const constant int& vector_batch_ndim [[buffer(12)]],               \
      const constant int* vector_batch_shape [[buffer(13)]],              \
      const constant size_t* vector_batch_stride [[buffer(14)]],          \
      const constant int& matrix_batch_ndim [[buffer(15)]],               \
      const constant int* matrix_batch_shape [[buffer(16)]],              \
      const constant size_t* matrix_batch_stride [[buffer(17)]],          \
      const constant uint32_t* vec_indices [[buffer(18)]],                \
      const constant uint32_t* mat_indices [[buffer(19)]],                \
      uint3 tid [[threadgroup_position_in_grid]],                         \
      uint3 lid [[thread_position_in_threadgroup]],                       \
      uint simd_gid [[simdgroup_index_in_threadgroup]],                   \
      uint simd_lid [[thread_index_in_simdgroup]]);

// clang-format off
#define instantiate_gemv_t_bs_blocks(name, itype)              \
  instantiate_gemv_t_bs_helper(name, itype, 1,  2, 8, 4, 4, 1) \
  instantiate_gemv_t_bs_helper(name, itype, 1,  2, 8, 4, 4, 4) \
  instantiate_gemv_t_bs_helper(name, itype, 1,  4, 8, 4, 4, 4) \
  instantiate_gemv_t_bs_helper(name, itype, 1, 16, 8, 4, 4, 4) \
  instantiate_gemv_t_bs_helper(name, itype, 1, 16, 4, 8, 4, 4) // clang-format on

// clang-format off
instantiate_gemv_t_bs_blocks(f32, float);
instantiate_gemv_t_bs_blocks(f16, half);


================================================
FILE: metal/src/kernels/matmul/mlx_gemm/mod.rs
================================================
use crate::kernels::matmul::{GemmDispatchParams, GemmKernel};
use crate::{ConstantValues, LibraryName, MetalStream, Value};
use anyhow::ensure;
use metal::{Buffer, MTLSize, NSUInteger};
use std::ffi::c_void;
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug)]
#[repr(C)]
struct MlxGemmParams {
    m: i32,
    n: i32,
    k: i32,
    lda: i32,
    ldb: i32,
    ldd: i32,
    tiles_n: i32,
    tiles_m: i32,
    batch_stride_a: isize,
    batch_stride_b: isize,
    batch_stride_d: isize,
    swizzle_log: i32,
    gemm_k_iterations_aligned: i32,
    batch_ndim: i32,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct MlxGemm;

impl fmt::Display for MlxGemm {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "MlxGemm")
    }
}

impl GemmKernel for MlxGemm {
    fn name() -> &'static str {
        "mlx"
    }

    fn dispatch_eval(
        &self,
        stream: &MetalStream,
        params: GemmDispatchParams,
        a_buffer: &Buffer,
        b_buffer: &Buffer,
        c_buffer: &Buffer,
    ) -> TractResult<()> {
        let GemmDispatchParams {
            dts,
            a_batch,
            m,
            k,
            n,
            transpose_a,
            a_offset,
            transpose_b,
            b_offset,
            c_offset,
            a_strides,
            b_strides,
            ..
        } = params;

        ensure!(
            matches!(dts[0], DatumType::F32 | DatumType::F16),
            "Unsupported datum type for MlxGemm {:?}",
            dts[0]
        );
        ensure!(
            dts[0] == dts[1] && dts[0] == dts[2],
            "MlxGemm only supports homogeneous datum types. I: {:?}, {:?}. O: {:?}",
            dts[0],
            dts[1],
            dts[2]
        );

        if m == 1 || n == 1 {
            dispatch_metal_mlx_gemv(
                stream,
                dts[0],
                (a_batch, m, n, k),
                unsafe { std::mem::transmute::<&[isize], &[usize]>(a_strides.as_slice()) },
                a_offset,
                a_buffer,
                transpose_a,
                unsafe { std::mem::transmute::<&[isize], &[usize]>(b_strides.as_slice()) },
                b_offset,
                b_buffer,
                transpose_b,
                c_buffer,
                c_offset,
            )?;
        } else {
            dispatch_metal_mlx_gemm(
                stream,
                dts[0],
                (a_batch, m, n, k),
                unsafe { std::mem::transmute::<&[isize], &[usize]>(a_strides.as_slice()) },
                a_offset,
                a_buffer,
                transpose_a,
                unsafe { std::mem::transmute::<&[isize], &[usize]>(b_strides.as_slice()) },
                b_offset,
                b_buffer,
                transpose_b,
                c_buffer,
                c_offset,
                false,
            )?;
        }

        Ok(())
    }
}

#[allow(clippy::too_many_arguments)]
pub fn dispatch_metal_mlx_gemv(
    stream: &MetalStream,
    dt: DatumType,
    (b, m, n, k): (usize, usize, usize, usize),
    a_strides: &[usize],
    a_offset: usize,
    a_buffer: &Buffer,
    a_trans: bool,
    b_strides: &[usize],
    b_offset: usize,
    b_buffer: &Buffer,
    b_trans: bool,
    output: &Buffer,
    output_offset: usize,
) -> TractResult<()> {
    ensure!(m == 1 || n == 1);
    ensure!(a_strides.len() >= 2 && b_strides.len() >= 2);
    ensure!(a_strides.len() >= 2);

    let lda = if a_trans { m } else { k };
    let ldb = if b_trans { k } else { n };

    // Determine dispatch kernel
    let (mut tm, mut tn) = (4, 4);
    #[allow(unused_assignments)]
    let (mut sm, mut sn) = (1, 32);
    let (mut bm, mut bn) = (1, 1);

    // Map (m, k, n) to Matrix * Vector

    let is_b_matrix = n != 1;
    let mv_m = if is_b_matrix { n } else { m };
    let mv_k = k;
    let mv_ld = if is_b_matrix { ldb } else { lda };
    let mv_trans = if is_b_matrix { !b_trans } else { a_trans };
    let mat_batch_stride = if is_b_matrix { b_strides[0] } else { a_strides[0] };
    let vec_batch_stride = if is_b_matrix { a_strides[0] } else { b_strides[0] };

    let n_out_per_tgp = if mv_trans {
        (sm, sn) = if mv_k >= 8192 && mv_m >= 2048 { (4, 8) } else { (8, 4) };
        bn = if mv_m >= 2048 {
            16
        } else if mv_m >= 512 {
            4
        } else {
            2
        };
        // Specialized kernel for very small outputs
        tn = if mv_m < tn { 1 } else { tn };

        bn * sn * tn
    } else {
        bm = if mv_m >= 4096 { 8 } else { 4 };
        sn = 32;
        // Specialized kernel for very small outputs
        tm = if mv_m < tm { 1 } else { tm };
        bm * sm * tm
    };

    let n_tgp = mv_m.div_ceil(n_out_per_tgp);

    let group_size = MTLSize { width: 32, height: bn as _, depth: bm as _ };
    let grid_size = MTLSize {
        width: n_tgp as _,
        height: 1,
        depth: /* batch_size_out */ b as u64,
    };

    let t_mat = if mv_trans { "t_" } else { "" };

    let tname = DeviceTensor::tname(dt)?;
    let name = format!("gemv_{t_mat}{tname}_bm{bm}_bn{bn}_sm{sm}_sn{sn}_tm{tm}_tn{tn}_nc0_axpby0");
    let pipeline = stream.load_pipeline(LibraryName::MlxGemv, &name)?;

    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        if is_b_matrix {
            encoder.set_buffer(0, Some(b_buffer), b_offset as _);
            encoder.set_buffer(1, Some(a_buffer), a_offset as _);
        } else {
            encoder.set_buffer(0, Some(a_buffer), a_offset as _);
            encoder.set_buffer(1, Some(b_buffer), b_offset as _);
        }
        encoder.set_buffer(3, Some(output), output_offset as _);

        encoder.set_bytes(
            4,
            std::mem::size_of::<i32>() as u64,
            &(mv_k as i32) as *const i32 as *const c_void,
        );

        encoder.set_bytes(
            5,
            std::mem::size_of::<i32>() as u64,
            &(mv_m as i32) as *const i32 as *const c_void,
        );

        encoder.set_bytes(
            6,
            std::mem::size_of::<i32>() as u64,
            &(mv_ld as i32) as *const i32 as *const c_void,
        );

        encoder.set_bytes(
            9, // batch_ndim
            std::mem::size_of::<i32>() as u64,
            &1_i32 as *const i32 as *const c_void,
        );
        encoder.set_bytes(
            10, // batch_shape
            std::mem::size_of::<i32>() as u64,
            &(b as i32) as *const i32 as *const c_void,
        );
        encoder.set_bytes(
            11, // batch_strides_vec
            std::mem::size_of::<usize>() as u64,
            &vec_batch_stride as *const usize as *const c_void,
        );
        encoder.set_bytes(
            12, // batch_strides_mat
            std::mem::size_of::<usize>() as u64,
            &mat_batch_stride as *const usize as *const c_void,
        );

        encoder.use_resource(a_buffer, metal::MTLResourceUsage::Read);
        encoder.use_resource(b_buffer, metal::MTLResourceUsage::Read);
        encoder.use_resource(output, metal::MTLResourceUsage::Write);
        encoder.dispatch_thread_groups(grid_size, group_size);
    });
    Ok(())
}

// From https://github.com/huggingface/candle/blob/main/candle-metal-kernels/src/lib.rs
#[allow(clippy::too_many_arguments)]
pub fn dispatch_metal_mlx_gemm(
    stream: &MetalStream,
    dt: DatumType,
    (b, m, n, k): (usize, usize, usize, usize),
    lhs_stride: &[usize],
    lhs_offset: usize,
    lhs_buffer: &Buffer,
    lhs_transpose: bool,
    rhs_stride: &[usize],
    rhs_offset: usize,
    rhs_buffer: &Buffer,
    rhs_transpose: bool,
    output: &Buffer,
    output_offset: usize,
    debug: bool,
) -> TractResult<()> {
    ensure!(rhs_stride.len() >= 2);
    ensure!(lhs_stride.len() >= 2);

    let rhs_m1 = rhs_stride[rhs_stride.len() - 1];
    let rhs_m2 = rhs_stride[rhs_stride.len() - 2];
    let lhs_m1 = lhs_stride[lhs_stride.len() - 1];
    let lhs_m2 = lhs_stride[lhs_stride.len() - 2];
    let a_trans = lhs_transpose;
    let b_trans = rhs_transpose;

    if a_trans {
        // (k, m)
        ensure!(
            lhs_m1 == 1 && lhs_m2 == m,
            "Invalid left matmul argument [{lhs_m2}, {lhs_m1}] != [{m}, 1], strides: {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    } else {
        // (m, k)
        ensure!(
            lhs_m1 == 1 && lhs_m2 == k,
            "Invalid left matmul argument [{lhs_m2}, {lhs_m1}] != [{k}, 1], strides: {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    }

    if b_trans {
        // (n, k)
        ensure!(
            rhs_m1 == 1 && rhs_m2 == k,
            "Invalid right matmul argument [{rhs_m2}, {rhs_m1}] != [{k}, 1], strides: {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    } else {
        // (k, n)
        ensure!(
            rhs_m1 == 1 && rhs_m2 == n,
            "Invalid right matmul argument [{rhs_m2}, {rhs_m1}] != [{n}, 1] {:?} {:?} dims: (m: {m}, n: {n}, k: {k})",
            lhs_stride,
            rhs_stride
        );
    }

    let (bm, bn, bk, wn, wm) = (32, 32, 16, 2, 2);
    // https://github.com/ml-explore/mlx/blob/02efb310cac667bc547d1b96f21596c221f84fe7/mlx/backend/metal/matmul.cpp#L422
    let constants = Some(ConstantValues::new(vec![
        (10, Value::Bool(/* has_batch */ b > 1)),
        (100, Value::Bool(/* use_out_source */ false)),
        (110, Value::Bool(/* do_axpby */ false)),
        (200, Value::Bool(/* align_m */ m % bm == 0)),
        (201, Value::Bool(/* align_n */ n % bn == 0)),
        (202, Value::Bool(/* align_k */ k % bk == 0)),
        (300, Value::Bool(/* do_gather */ false)),
        (400, Value::Bool(debug)),
    ]));

    let swizzle_log = 0;
    let tile = 1 << swizzle_log;
    let tn = n.div_ceil(bn);
    let tm = m.div_ceil(bm);
    let tn = tn * tile;
    let tm = tm.div_ceil(tile);

    let batch_stride_a =
        if lhs_stride.len() > 2 { lhs_stride[lhs_stride.len() - 3] } else { m * k };
    let batch_stride_b =
        if rhs_stride.len() > 2 { rhs_stride[rhs_stride.len() - 3] } else { n * k };

    let gemm_params = MlxGemmParams {
        m: m as i32,
        n: n as i32,
        k: k as i32,
        lda: if a_trans { m } else { k } as i32,
        ldb: if b_trans { k } else { n } as i32,
        ldd: n as i32,
        tiles_n: tn as i32,
        tiles_m: tm as i32,
        swizzle_log,
        batch_stride_a: batch_stride_a as isize,
        batch_stride_b: batch_stride_b as isize,
        batch_stride_d: (m * n) as isize,
        batch_ndim: 1i32,
        gemm_k_iterations_aligned: (k / bk) as i32,
    };

    let batch_strides = [gemm_params.batch_stride_a, gemm_params.batch_stride_b];

    let name = kernel_name_gemm(dt, a_trans, b_trans)?;

    let pipeline = stream.load_pipeline_with_constants(LibraryName::MlxGemm, &name, constants)?;

    let command_buffer = stream.command_buffer();
    command_buffer.encode(|encoder| {
        encoder.set_compute_pipeline_state(&pipeline);
        encoder.set_buffer(0, Some(lhs_buffer), lhs_offset as NSUInteger);
        encoder.set_buffer(1, Some(rhs_buffer), rhs_offset as NSUInteger);
        encoder.set_buffer(3, Some(output), output_offset as NSUInteger);
        encoder.set_bytes(
            4,
            std::mem::size_of::<MlxGemmParams>() as u64,
            &gemm_params as *const MlxGemmParams as *const c_void,
        );
        encoder.set_bytes(
            6, // batch_shape
            std::mem::size_of::<i32>() as u64,
            &(b as i32) as *const i32 as *const c_void,
        );
        encoder.set_bytes(
            7,
            (std::mem::size_of::<isize>() * batch_strides.len()) as u64,
            batch_strides.as_ptr() as *const c_void,
        );

        let grid_size = MTLSize {
            width: tn as u64,
            height: tm as u64,
            depth: /* batch_size_out */ b as u64,
        };
        let group_size = MTLSize { width: 32, height: wn, depth: wm };
        encoder.use_resource(lhs_buffer, metal::MTLResourceUsage::Read);
        encoder.use_resource(rhs_buffer, metal::MTLResourceUsage::Read);
        encoder.use_resource(output, metal::MTLResourceUsage::Write);
        encoder.dispatch_thread_groups(grid_size, group_size);
    });
    if debug {
        stream.wait_until_completed()?;
        //log::debug!("{:#?}", gemm_debug);
    }

    Ok(())
}

pub fn kernel_name_gemm(
    dt: DatumType,
    transpose_a: bool,
    transpose_b: bool,
) -> TractResult<String> {
    let t_a = if transpose_a { "t" } else { "n" };
    let t_b = if transpose_b { "t" } else { "n" };

    let tname = DeviceTensor::tname(dt)?;
    Ok(format!("gemm_{t_a}{t_b}_{tname}_{tname}_32_32_16_2_2"))
}

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;

    use super::*;
    use crate::kernels::matmul::GemmImpl;
    use crate::kernels::matmul::tests::run_mmm_test_case;
    use tract_gpu::tensor::{DeviceTensor, IntoDevice};

    #[test]
    fn test_mlx_gemv_compilation() -> TractResult<()> {
        crate::utils::with_borrowed_metal_stream(|stream| {
            stream.load_library(LibraryName::MlxGemv)
        })?;
        Ok(())
    }

    #[test]
    fn test_mlx_gemm() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let (b, m, n, k) = (10, 32, 32, 16);
            let a = Tensor::from_shape(
                &[b, m, k],
                &(0..b * m * k).map(|_f| 1.0 as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;
            let b = Tensor::from_shape(
                &[b, k, n],
                &(0..b * n * k).map(|_f| 1.0 as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let c = GemmImpl::<MlxGemm>::default().eval(stream, &a, &b)?;

            let expected_c = Tensor::from_shape(&[10, 32, 32], &vec![16.0; 10 * 32 * 32])?;

            let c = c.to_host()?;
            c.close_enough(&expected_c, Approximation::Approximate)?;
            assert!(c.close_enough(&expected_c, Approximation::Approximate).is_ok());

            let (b, m, n, k) = (2, 2, 4, 3);
            let a = DeviceTensor::from_shape(
                &[b, m, k],
                &(0..b * m * k).map(|f| f as f32).collect::<Vec<_>>(),
            )?;
            let b = DeviceTensor::from_shape(
                &[b, k, n],
                &(0..b * n * k).map(|f| f as f32).collect::<Vec<_>>(),
            )?;

            let c = GemmImpl::<MlxGemm>::default().eval(stream, &a, &b)?;

            let expected_c = Tensor::from_shape(
                &[2, 2, 4],
                &[
                    20.0, 23.0, 26.0, 29.0, 56.0, 68.0, 80.0, 92.0, 344.0, 365.0, 386.0, 407.0,
                    488.0, 518.0, 548.0, 578.0,
                ],
            )?;

            assert!(c.to_host()?.close_enough(&expected_c, Approximation::Approximate).is_ok());
            Ok(())
        })
    }

    #[test]
    fn test_mat_vec() -> TractResult<()> {
        run_mmm_test_case::<MlxGemm>((1, 4, 4, 1), false, false, DatumType::F32, DatumType::F32)?;
        run_mmm_test_case::<MlxGemm>((10, 1, 4, 4), false, false, DatumType::F32, DatumType::F32)?;
        run_mmm_test_case::<MlxGemm>((5, 1, 15, 7), false, true, DatumType::F32, DatumType::F32)?;
        Ok(())
    }

    #[test]
    fn test_mat_mul() -> TractResult<()> {
        run_mmm_test_case::<MlxGemm>((1, 3, 5, 4), false, false, DatumType::F32, DatumType::F32)?;
        run_mmm_test_case::<MlxGemm>((1, 2, 5, 10), false, true, DatumType::F32, DatumType::F32)?;
        run_mmm_test_case::<MlxGemm>((1, 4, 4, 4), false, true, DatumType::F32, DatumType::F32)?;
        run_mmm_test_case::<MlxGemm>((1, 4, 4, 200), false, true, DatumType::F32, DatumType::F32)?;
        run_mmm_test_case::<MlxGemm>(
            (1, 25, 1280, 32000),
            false,
            true,
            DatumType::F32,
            DatumType::F32,
        )?;
        Ok(())
    }
}


================================================
FILE: metal/src/kernels/matmul/mod.rs
================================================
mod basic;
mod ggml_gemm;
mod mfa;
mod mlx_gemm;

pub use basic::BasicMatMul;
pub use ggml_gemm::GgmlGemm;
pub use mfa::MfaGemm;
pub use mlx_gemm::MlxGemm;
use tract_core::tract_linalg::block_quant::{BlockQuant, Q4_0};

use crate::MetalStream;
use crate::utils::get_metal_buffer;
use metal::Buffer;
use num_traits::One;
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;
use tract_gpu::utils::{as_q40_tensor, get_quant_fact};

#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Default)]
pub enum MetalGemmImplKind {
    Mlx,
    Mfa,
    #[default]
    Ggml,
}

#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct GemmDispatchParams {
    pub dts: [DatumType; 3],
    pub a_batch: usize,
    pub b_batch: usize,
    pub m: usize,
    pub k: usize,
    pub n: usize,
    pub transpose_a: bool,
    pub a_offset: usize,
    pub transpose_b: bool,
    pub b_offset: usize,
    pub q40_b: bool,
    pub c_offset: usize,
    pub a_strides: TVec<isize>,
    pub b_strides: TVec<isize>,
}

impl GemmDispatchParams {
    #[allow(clippy::too_many_arguments)]
    pub fn compute_dispatches_params<M: GemmKernel>(
        dts: [DatumType; 3],
        a_offset: usize,
        a_shape: &[usize],
        transpose_a: bool,
        b_offset: usize,
        b_shape: &[usize],
        transpose_b: bool,
        q40_b: bool,
        c_offset: usize,
        c_shape: &[usize],
    ) -> TractResult<Vec<GemmDispatchParams>> {
        let rank = c_shape.len();
        let squeezed_a_shape = squeeze_batch_axes(a_shape)?;
        let squeezed_b_shape = squeeze_batch_axes(b_shape)?;
        let squeezed_c_shape = squeeze_batch_axes(c_shape)?;

        let a_batch = squeezed_a_shape[0];
        let b_batch = squeezed_b_shape[0];

        ensure!(squeezed_c_shape[0] == a_batch || squeezed_c_shape[0] == b_batch);

        let m = c_shape[rank - 2];
        let n = c_shape[rank - 1];
        let k = a_shape[a_shape.len() - 2 + !transpose_a as usize];

        ensure!((a_batch % b_batch == 0) || (a_batch == 1));
        let a_strides = if transpose_a {
            natural_strides(&[a_batch, k, m])
        } else {
            natural_strides(&[a_batch, m, k])
        };
        let b_strides = if transpose_b {
            natural_strides(&[b_batch, n, k])
        } else {
            natural_strides(&[b_batch, k, n])
        };

        let b_batch_stride = if !q40_b {
            n * k * dts[1].size_of()
        } else {
            ensure!(k % Q4_0.block_len() == 0);
            n * (k / Q4_0.block_len()) * Q4_0.block_bytes()
        };

        match (a_batch, b_batch) {
            // bmk, 1kn -> bmn
            // bmk, 1nk -> bmn
            (a_batch, 1) if a_batch != 1 && !transpose_a => Ok(vec![GemmDispatchParams {
                dts,
                a_batch: 1,
                b_batch: 1,
                m: m * a_batch,
                n,
                k,
                transpose_a,
                a_offset,
                transpose_b,
                b_offset,
                q40_b,
                c_offset,
                a_strides,
                b_strides,
            }]),
            // bkm, 1kn -> bmn
            // bkm, 1nk -> bmn
            // As many dispatches as batch dimension.
            (a_batch, 1) if a_batch != 1 => Ok((0..a_batch)
                .map(|a_batch_idx| GemmDispatchParams {
                    dts,
                    a_batch: 1,
                    b_batch: 1,
                    m,
                    n,
                    k,
                    transpose_a,
                    a_offset: a_offset + a_batch_idx * m * k * dts[0].size_of(),
                    transpose_b,
                    b_offset,
                    q40_b,
                    c_offset: c_offset + a_batch_idx * m * n * dts[2].size_of(),
                    a_strides: a_strides.clone(),
                    b_strides: b_strides.clone(),
                })
                .collect()),
            // 1mk, bkn -> bmn
            // 1km, bkn -> bmn
            // 1mk, bnk -> bmn
            // 1km, bnk -> bmn
            // As many dispatch as batch dimension.
            (1, b_batch) if b_batch != 1 => Ok((0..b_batch)
                .map(|b_batch_idx| GemmDispatchParams {
                    dts,
                    a_batch: 1,
                    b_batch: 1,
                    m,
                    n,
                    k,
                    transpose_a,
                    a_offset,
                    transpose_b,
                    b_offset: b_offset + b_batch_idx * b_batch_stride,
                    q40_b,
                    c_offset: c_offset + b_batch_idx * m * n * dts[2].size_of(),
                    a_strides: a_strides.clone(),
                    b_strides: b_strides.clone(),
                })
                .collect()),
            (a_batch, b_batch) => {
                if M::supports_broadcast() {
                    Ok(vec![GemmDispatchParams {
                        dts,
                        a_batch,
                        b_batch,
                        m,
                        n,
                        k,
                        transpose_a,
                        a_offset,
                        transpose_b,
                        b_offset,
                        q40_b,
                        c_offset,
                        a_strides,
                        b_strides,
                    }])
                } else {
                    ensure!(a_batch == b_batch);
                    // bmk, bkn -> bmn
                    // bkm, bkn -> bmn
                    // bmk, bnk -> bmn
                    // bkm, bnk -> bmn
                    Ok(vec![GemmDispatchParams {
                        dts,
                        a_batch,
                        b_batch,
                        m,
                        n,
                        k,
                        transpose_a,
                        a_offset,
                        transpose_b,
                        b_offset,
                        q40_b,
                        c_offset,
                        a_strides,
                        b_strides,
                    }])
                }
            }
        }
    }
}

pub trait GemmKernel:
    fmt::Display + fmt::Debug + Clone + Default + Send + Sync + PartialEq + Eq + Hash
{
    fn name() -> &'static str;

    fn supports_broadcast() -> bool {
        false
    }

    fn is_supported_dts(&self, facts: &[TypedFact]) -> bool {
        assert!(facts.len() == 2, "Expected 2 inputs for matmul");
        facts.iter().all(|f| f.is_plain())
            && matches!(facts[0].datum_type, DatumType::F32 | DatumType::F16)
            && facts[0].datum_type == facts[1].datum_type
    }

    fn output_dt(&self, a_dt: DatumType, b_dt: DatumType) -> TractResult<DatumType> {
        ensure!([DatumType::F16, DatumType::F32].contains(&a_dt));
        ensure!(a_dt == b_dt);
        Ok(a_dt)
    }

    fn dispatch_eval(
        &self,
        stream: &MetalStream,
        params: GemmDispatchParams,
        a_buffer: &Buffer,
        b_buffer: &Buffer,
        c_buffer: &Buffer,
    ) -> TractResult<()>;
}

#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
pub struct GemmImpl<M: GemmKernel> {
    pub transpose_a: bool,
    pub transpose_b: bool,
    pub matmul: M,
}

impl<M: GemmKernel> fmt::Display for GemmImpl<M> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.matmul)
    }
}

impl<M: GemmKernel> GemmImpl<M> {
    pub fn new(transpose_a: bool, transpose_b: bool) -> Self {
        Self { transpose_a, transpose_b, matmul: M::default() }
    }

    pub fn output_shape<D: DimLike + One>(&self, a: &[D], b: &[D]) -> TVec<D> {
        let rank = a.len();
        let mut output: TVec<D> = (0..rank - 2)
            .map(|ix| if a[ix].is_one() { b[ix].clone() } else { a[ix].clone() })
            .collect();
        output.push(a[rank - 2 + self.transpose_a as usize].clone());
        output.push(b[rank - 2 + !self.transpose_b as usize].clone());
        output
    }

    pub fn output_facts(
        &self,
        shape: &[TDim],
        a_dt: DatumType,
        b_dt: DatumType,
    ) -> TractResult<TVec<TypedFact>> {
        let out_dt = self.matmul.output_dt(a_dt, b_dt)?;
        ensure!([DatumType::F16, DatumType::F32].contains(&out_dt));
        Ok(tvec!(out_dt.fact(shape)))
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        a: &DeviceTensor,
        b: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        // For q40 weights the tensor shape already carries the full logical
        // dimensions [batch, n, k].  No need to chain with the fact shape.
        let b_shape = b.shape().to_vec();

        let c_dt = self.matmul.output_dt(a.datum_type(), b.datum_type())?;
        let c_shape = self.output_shape(a.shape(), &b_shape);
        let c = unsafe { DeviceTensor::uninitialized_dt(c_dt, &c_shape)? };

        self.dispatch_eval(stream, a, b, &c)?;
        stream.wait_until_completed()?;
        Ok(c)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        a: &DeviceTensor,
        b: &DeviceTensor,
        c: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(a);
        stream.retain_tensor(b);
        stream.retain_tensor(c);

        let q40_b = get_quant_fact(b, &Q4_0);
        // For q40 weights the tensor shape already carries the full logical
        // dimensions [batch, n, k].  No need to chain with the fact shape.
        let b_shape = b.shape().to_vec();

        ensure!(c.shape() == self.output_shape(a.shape(), &b_shape).as_slice());

        if c.shape().iter().product::<usize>() == 0 {
            return Ok(());
        }

        let dispatches = GemmDispatchParams::compute_dispatches_params::<M>(
            [a.datum_type(), b.datum_type(), c.datum_type()],
            a.buffer_offset(),
            a.shape(),
            self.transpose_a,
            b.buffer_offset(),
            &b_shape,
            self.transpose_b,
            q40_b.is_some(),
            c.buffer_offset(),
            c.shape(),
        )?;

        let a_buff = get_metal_buffer(a);
        let b_buff = get_metal_buffer(b);
        let c_buff = get_metal_buffer(c);

        for d in dispatches {
            self.matmul
                .dispatch_eval(
                    stream,
                    d.clone(),
                    a_buff,
                    b_buff,
                    c_buff,
                )
                .with_context(|| {
                    format!(
                    "Error while performing MatMul with {:?} (a: {:?}), (b: {:?}) = (c: {:?}) for dispatch: {:?}",
                    self.matmul,
                    a.shape(),
                    b.shape(),
                    c.shape(),
                    d,
                )
            })?;
        }

        Ok(())
    }
}

// Squeeze batch axes and return a shape with a rank of 3.
fn squeeze_batch_axes(s: &[usize]) -> TractResult<TVec<usize>> {
    ensure!(s.len() >= 2);
    let rank = s.len();
    if s.len() == 2 {
        return Ok(tvec![1, s[rank - 2], s[rank - 1]]);
    }
    let rank = s.len();
    Ok(tvec![s[..rank - 2].iter().product(), s[rank - 2], s[rank - 1],])
}

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;

    use super::*;
    use crate::kernels::matmul::GemmImpl;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
    use tract_core::tract_data::itertools::Itertools;
    use tract_core::tract_linalg::block_quant::{BlockQuant, BlockQuantStorage, Q4_0};
    use tract_gpu::tensor::IntoDevice;

    pub(crate) fn run_mmm_test_case<K: GemmKernel>(
        (batch, m, k, n): (usize, usize, usize, usize),
        transpose_a: bool,
        transpose_b: bool,
        a_dt: DatumType,
        b_dt: DatumType,
    ) -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let a_shape = if !transpose_a { [batch, m, k] } else { [batch, k, m] };
            let b_shape = if !transpose_b { [batch, k, n] } else { [batch, n, k] };
            let mut a = if a_dt == DatumType::F16 {
                Tensor::from_shape(
                    &a_shape,
                    &(0..batch * m * k)
                        .map(|f| f16::from_f32(f as f32 / (batch * m * k) as f32))
                        .collect::<Vec<_>>(),
                )?
            } else {
                Tensor::from_shape(
                    &a_shape,
                    &(0..batch * m * k)
                        .map(|f| f as f32 / (batch * m * k) as f32)
                        .collect::<Vec<_>>(),
                )?
            };

            let mut b = if b_dt == DatumType::F16 {
                Tensor::from_shape(
                    &b_shape,
                    &(0..batch * k * n)
                        .map(|f| f16::from_f32(f as f32 / (batch * n * k) as f32))
                        .collect::<Vec<_>>(),
                )?
            } else {
                Tensor::from_shape(
                    &b_shape,
                    &(0..batch * k * n)
                        .map(|f| f as f32 / (batch * m * k) as f32)
                        .collect::<Vec<_>>(),
                )?
            };

            let metal_output = GemmImpl::<K>::new(transpose_a, transpose_b).eval(
                stream,
                &a.clone().into_device()?,
                &b.clone().into_device()?,
            )?;

            let matmul = PrefixMatMul {
                transpose_a,
                transpose_b,
                transpose_c: false,
                quantize_output: None,
                operating_dt: Some(DatumType::F32),
            };

            // Compare to full precision
            if a_dt == DatumType::F16 && !(b_dt == DatumType::F16) {
                a = a.clone().cast_to_dt(DatumType::F32).unwrap().into_owned();
            }
            if b_dt == DatumType::F16 && !(a_dt == DatumType::F16) {
                b = b.clone().cast_to_dt(DatumType::F32).unwrap().into_owned();
            }

            let output = args_1!(matmul.eval(tvec![a.into_tvalue(), b.into_tvalue()])?);
            metal_output.to_host()?.close_enough(&output, Approximation::SuperApproximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_gemm_dispatches_params() -> TractResult<()> {
        let dt = DatumType::F32;
        let (m, k, n) = (2, 3, 4);
        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<MlxGemm>(
                [dt; 3],
                0,
                &[1, m, k],
                false,
                0,
                &[1, k, n],
                false,
                false,
                0,
                &[1, m, n],
            )?,
            vec![GemmDispatchParams {
                dts: [dt; 3],
                a_batch: 1,
                b_batch: 1,
                m,
                n,
                k,
                transpose_a: false,
                a_offset: 0,
                transpose_b: false,
                b_offset: 0,
                q40_b: false,
                c_offset: 0,
                a_strides: natural_strides(&[1, m, k]),
                b_strides: natural_strides(&[1, k, n])
            }]
        );

        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<MlxGemm>(
                [dt; 3],
                0,
                &[10, m, k],
                false,
                0,
                &[10, k, n],
                false,
                false,
                0,
                &[10, m, n],
            )?,
            vec![GemmDispatchParams {
                dts: [dt; 3],
                a_batch: 10,
                b_batch: 10,
                m,
                n,
                k,
                transpose_a: false,
                a_offset: 0,
                transpose_b: false,
                b_offset: 0,
                q40_b: false,
                c_offset: 0,
                a_strides: natural_strides(&[10, m, k]),
                b_strides: natural_strides(&[10, k, n])
            }]
        );

        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<MlxGemm>(
                [dt; 3],
                0,
                &[1, m, k],
                false,
                0,
                &[2, k, n],
                false,
                false,
                10,
                &[2, m, n],
            )?,
            vec![
                GemmDispatchParams {
                    dts: [dt; 3],
                    a_batch: 1,
                    b_batch: 1,
                    m,
                    n,
                    k,
                    transpose_a: false,
                    a_offset: 0,
                    transpose_b: false,
                    b_offset: 0,
                    q40_b: false,
                    c_offset: 10,
                    a_strides: natural_strides(&[1, m, k]),
                    b_strides: natural_strides(&[2, k, n])
                },
                GemmDispatchParams {
                    dts: [dt; 3],
                    a_batch: 1,
                    b_batch: 1,
                    m,
                    n,
                    k,
                    transpose_a: false,
                    a_offset: 0,
                    transpose_b: false,
                    b_offset: 1 * n * k * dt.size_of(),
                    q40_b: false,
                    c_offset: 10 + m * n * dt.size_of(),
                    a_strides: natural_strides(&[1, m, k]),
                    b_strides: natural_strides(&[2, k, n])
                }
            ]
        );

        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<MlxGemm>(
                [dt; 3],
                0,
                &[2, k, m],
                true,
                0,
                &[2, k, n],
                false,
                false,
                100,
                &[2, m, n],
            )?,
            vec![GemmDispatchParams {
                dts: [dt; 3],
                a_batch: 2,
                b_batch: 2,
                m,
                n,
                k,
                transpose_a: true,
                a_offset: 0,
                transpose_b: false,
                b_offset: 0,
                q40_b: false,
                c_offset: 100,
                a_strides: natural_strides(&[2, k, m]),
                b_strides: natural_strides(&[2, k, n])
            }]
        );

        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<MlxGemm>(
                [dt; 3],
                0,
                &[2, k, m],
                true,
                0,
                &[1, k, n],
                false,
                false,
                100,
                &[2, m, n],
            )?,
            vec![
                GemmDispatchParams {
                    dts: [dt; 3],
                    a_batch: 1,
                    b_batch: 1,
                    m,
                    n,
                    k,
                    transpose_a: true,
                    a_offset: 0,
                    transpose_b: false,
                    b_offset: 0,
                    q40_b: false,
                    c_offset: 100,
                    a_strides: natural_strides(&[2, k, m]),
                    b_strides: natural_strides(&[1, k, n])
                },
                GemmDispatchParams {
                    dts: [dt; 3],
                    a_batch: 1,
                    b_batch: 1,
                    m,
                    n,
                    k,
                    transpose_a: true,
                    a_offset: 1 * m * k * dt.size_of(),
                    transpose_b: false,
                    b_offset: 0,
                    q40_b: false,
                    c_offset: 100 + 1 * m * n * dt.size_of(),
                    a_strides: natural_strides(&[2, k, m]),
                    b_strides: natural_strides(&[1, k, n])
                }
            ]
        );

        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<MlxGemm>(
                [dt; 3],
                0,
                &[10, m, k],
                false,
                10,
                &[1, k, n],
                false,
                false,
                0,
                &[10, m, n],
            )?,
            vec![GemmDispatchParams {
                dts: [dt; 3],
                a_batch: 1,
                b_batch: 1,
                m: 10 * m,
                n,
                k,
                transpose_a: false,
                a_offset: 0,
                transpose_b: false,
                b_offset: 10,
                q40_b: false,
                c_offset: 0,
                a_strides: natural_strides(&[10, m, k]),
                b_strides: natural_strides(&[1, k, n])
            }]
        );

        assert_eq!(
            GemmDispatchParams::compute_dispatches_params::<GgmlGemm>(
                [dt; 3],
                0,
                &[4, m, k],
                false,
                0,
                &[2, k, n],
                false,
                false,
                0,
                &[4, m, n],
            )?,
            vec![GemmDispatchParams {
                dts: [dt; 3],
                a_batch: 4,
                b_batch: 2,
                m,
                n,
                k,
                transpose_a: false,
                a_offset: 0,
                transpose_b: false,
                b_offset: 0,
                q40_b: false,
                c_offset: 0,
                a_strides: natural_strides(&[4, m, k]),
                b_strides: natural_strides(&[2, k, n])
            },]
        );

        Ok(())
    }

    #[test]
    fn test_squeeze_batch_axes() -> TractResult<()> {
        assert_eq!(squeeze_batch_axes(&[1, 2, 3, 4])?, tvec![2, 3, 4]);
        assert_eq!(squeeze_batch_axes(&[3, 2, 3, 4])?, tvec![6, 3, 4]);
        assert_eq!(squeeze_batch_axes(&[3, 1, 2, 3, 4])?, tvec![6, 3, 4]);
        assert!(squeeze_batch_axes(&[1]).is_err());
        assert_eq!(squeeze_batch_axes(&[1, 1, 3, 4])?, tvec![1, 3, 4]);
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn mmm_mfa_prop_f32(pb in any::<MmmProblem<MfaGemm, f32>>()) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::Close).is_ok())
        }

        #[test]
        fn mmm_mfa_prop_f16(pb in any::<MmmProblem<MfaGemm, f16>>()) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::Approximate).is_ok())
        }

        #[test]
        fn mmm_mlx_prop_f32(pb in any::<MmmProblem<MlxGemm, f32>>()) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::Approximate).is_ok())
        }

        #[test]
        fn mmm_mlx_prop_f16(pb in any::<MmmProblem<MlxGemm, f16>>()) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::VeryApproximate).is_ok())
        }

        #[test]
        fn mmm_ggml_prop_f32(pb in <MmmProblem<GgmlGemm, f32>>::arbitrary_with(
            MmmProblemParams {
                force_k_as_inner_axis: true,
                q4_0_weights: false,
            }
        )) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::Approximate).is_ok())
        }

        #[test]
        fn mmm_ggml_prop_f16(pb in <MmmProblem<GgmlGemm, f16>>::arbitrary_with(
            MmmProblemParams {
                force_k_as_inner_axis: true,
                q4_0_weights: false,
            }
        )) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::VeryApproximate).is_ok())
        }

        #[test]
        fn mmm_ggml_prop_q4(pb in <MmmProblem<GgmlGemm, f32>>::arbitrary_with(
            MmmProblemParams {
                force_k_as_inner_axis: true,
                q4_0_weights: true,
            }
        )) {
            let output = pb.run().unwrap();
            prop_assert!(output.close_enough(&pb.reference().unwrap(), Approximation::Approximate).is_ok())
        }
    }

    #[derive(Default, Debug, Clone)]
    pub struct MmmProblemParams {
        pub force_k_as_inner_axis: bool,
        pub q4_0_weights: bool,
    }

    #[derive(Debug)]
    pub struct MmmProblem<K: GemmKernel, F: Datum + Float>
    where
        F: Datum + Float,
        f32: AsPrimitive<F>,
    {
        pub b: usize,
        pub m: usize,
        pub k: usize,
        pub n: usize,
        pub lhs: Vec<F>,
        pub transpose_lhs: bool,
        pub rhs: Vec<F>,
        pub transpose_rhs: bool,
        pub q4_0: bool,
        pub _phantom: std::marker::PhantomData<K>,
    }

    impl<K, F> Arbitrary for MmmProblem<K, F>
    where
        K: GemmKernel,
        F: Datum + Float,
        f32: AsPrimitive<F>,
    {
        type Parameters = MmmProblemParams;
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(params: MmmProblemParams) -> Self::Strategy {
            (1usize..4, 1usize..128, 1usize..256, 1usize..128)
                .prop_flat_map(move |(b, m, mut k, n)| {
                    if params.q4_0_weights {
                        k = k.div_ceil(32) * 32
                    };

                    let lhs_len = b * m * k;
                    let rhs_len = b * n * k;
                    let datum = (0f32..1f32).prop_map(|x| x.as_());
                    (
                        Just(b),
                        Just(m),
                        Just(k),
                        Just(n),
                        vec(datum.clone(), lhs_len..=lhs_len),
                        proptest::bool::ANY,
                        vec(datum, rhs_len..=rhs_len),
                        proptest::bool::ANY,
                    )
                })
                .prop_map(move |(b, m, k, n, lhs, mut transpose_lhs, rhs, mut transpose_rhs)| {
                    if params.force_k_as_inner_axis {
                        (transpose_lhs, transpose_rhs) = (false, true);
                    }
                    Self {
                        b,
                        m,
                        k,
                        n,
                        lhs,
                        transpose_lhs,
                        rhs,
                        transpose_rhs,
                        q4_0: params.q4_0_weights,
                        _phantom: std::marker::PhantomData,
                    }
                })
                .boxed()
        }
    }

    impl<K, F> MmmProblem<K, F>
    where
        K: GemmKernel,
        F: Datum + Float + std::ops::AddAssign,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let matmul = PrefixMatMul {
                transpose_a: self.transpose_lhs,
                transpose_b: self.transpose_rhs,
                transpose_c: false,
                quantize_output: None,
                operating_dt: Some(F::datum_type()),
            };

            let lhs_tensor = if self.transpose_lhs {
                Tensor::from_shape(&[self.b, self.k, self.m], &self.lhs)?
            } else {
                Tensor::from_shape(&[self.b, self.m, self.k], &self.lhs)?
            };
            let mut rhs_tensor = if self.transpose_rhs {
                Tensor::from_shape(&[self.b, self.n, self.k], &self.rhs)?
            } else {
                Tensor::from_shape(&[self.b, self.k, self.n], &self.rhs)?
            };

            if self.q4_0 {
                rhs_tensor = Q4_0.simulate_precision_loss(rhs_tensor, 2)?
            };
            let output = matmul.eval(tvec![lhs_tensor.into_tvalue(), rhs_tensor.into_tvalue()])?;

            Ok(output[0].clone().into_tensor())
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let lhs = if self.transpose_lhs {
                    Tensor::from_shape(&[self.b, self.k, self.m], &self.lhs)?.into_device()?
                } else {
                    Tensor::from_shape(&[self.b, self.m, self.k], &self.lhs)?.into_device()?
                };
                let rhs = if self.transpose_rhs {
                    if !self.q4_0 {
                        Tensor::from_shape(&[self.b, self.n, self.k], &self.rhs)?
                    } else {
                        let b_quant = Q4_0.quant_f32(
                            &self
                                .rhs
                                .clone()
                                .into_iter()
                                .map(|x| x.to_f32().unwrap())
                                .collect_vec(),
                        )?;

                        BlockQuantStorage::new(
                            Box::new(Q4_0),
                            self.b * self.n,
                            self.k,
                            Arc::new(b_quant),
                        )?
                        .into_tensor_with_shape(f32::datum_type(), &[self.b, self.n, self.k])
                    }
                } else {
                    Tensor::from_shape(&[self.b, self.k, self.n], &self.rhs)?
                }
                .into_device()?;

                let matmul = GemmImpl::<K>::new(self.transpose_lhs, self.transpose_rhs);

                let c = matmul.eval(stream, &lhs, &rhs)?;
                Ok(c.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/mod.rs
================================================
#![allow(unused)]

pub mod array;
pub mod bin_ops;
pub mod conv;
pub mod element_wise;
pub mod matmul;
pub mod nn;
mod utils;

use tract_core::internal::*;

#[cfg(target_os = "ios")]
const METAL_FLASH_ATTENTION_LIB: &[u8] =
    include_bytes!("matmul/mfa/libMetalFlashAttention-ios.metallib");

#[cfg(target_os = "macos")]
const METAL_FLASH_ATTENTION_LIB: &[u8] =
    include_bytes!("matmul/mfa/libMetalFlashAttention-macos.metallib");

#[cfg(not(any(target_os = "ios", target_os = "macos")))]
const METAL_FLASH_ATTENTION_LIB: &[u8] = &[];

const MLX_GEMM: &str = include_str!("matmul/mlx_gemm/mlx_gemm.metal");
const MLX_GEMV: &str = include_str!("matmul/mlx_gemm/mlx_gemv.metal");
const GGML: &str = include_str!("matmul/ggml_gemm/ggml_mm_mv.metal");
const BASIC_MAT_MUL: &str = include_str!("matmul/basic/basic_mat_mul.metal");
const ARRAY_OPS: &str = include_str!("array/array_ops.metal");
const BIN_OPS: &str = include_str!("bin_ops.metal");
const NN_OPS: &str = include_str!("nn/nn_ops.metal");
const CONV_OPS: &str = include_str!("conv.metal");
const ELEMENT_WISE_OPS: &str = include_str!("element_wise.metal");

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum LibraryContent<'a> {
    Data(&'a [u8]),
    Source(&'a str),
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LibraryName {
    MlxGemm,
    MlxGemv,
    MfaLib,
    BasicMatMul,
    BinOps,
    ArrayOps,
    ConvOps,
    NNOps,
    ElementWiseOps,
    Ggml,
}

impl LibraryName {
    pub fn content(&self) -> LibraryContent<'static> {
        match self {
            Self::MfaLib => LibraryContent::Data(METAL_FLASH_ATTENTION_LIB),
            Self::BasicMatMul => LibraryContent::Source(BASIC_MAT_MUL),
            Self::ArrayOps => LibraryContent::Source(ARRAY_OPS),
            Self::BinOps => LibraryContent::Source(BIN_OPS),
            Self::ConvOps => LibraryContent::Source(CONV_OPS),
            Self::NNOps => LibraryContent::Source(NN_OPS),
            Self::ElementWiseOps => LibraryContent::Source(ELEMENT_WISE_OPS),
            Self::MlxGemm => LibraryContent::Source(MLX_GEMM),
            Self::MlxGemv => LibraryContent::Source(MLX_GEMV),
            Self::Ggml => LibraryContent::Source(GGML),
        }
    }
}

pub use tract_gpu::utils::BroadcastKind;


================================================
FILE: metal/src/kernels/nn/apply_rope.rs
================================================
use crate::encoder::EncoderExt;
use crate::kernels::utils::compute_broadcast_strides;
use crate::kernels::{BroadcastKind, utils};
use crate::{LibraryName, MetalStream};
use anyhow::ensure;
use std::fmt;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ApplyRope;

impl fmt::Display for ApplyRope {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:?}", self)
    }
}

impl ApplyRope {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn is_supported_broadcast(broadcast_kind: BroadcastKind) -> bool {
        matches!(broadcast_kind, BroadcastKind::Nd2 | BroadcastKind::Nd3 | BroadcastKind::Nd4)
    }

    pub fn kernel_name(&self, dt: DatumType, broadcast_kind: BroadcastKind) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal apply rope", dt);
        ensure!(
            Self::is_supported_broadcast(broadcast_kind),
            "Unsupported broadcast kind {:?} for metal apply rope",
            broadcast_kind
        );
        let tname = DeviceTensor::tname(dt)?;
        let broadcast_name = broadcast_kind.name();
        Ok(format!("nn_ops::apply_rope_{broadcast_name}_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        cos: &DeviceTensor,
        sin: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, cos, sin, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        cos: &DeviceTensor,
        sin: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        ensure!(input.datum_type() == cos.datum_type());
        ensure!(input.datum_type() == sin.datum_type());

        ensure!(cos.shape() == sin.shape());

        stream.retain_tensor(input);
        stream.retain_tensor(cos);
        stream.retain_tensor(sin);
        stream.retain_tensor(output);

        ensure!(input.rank() >= 2 && input.rank() <= 4);
        ensure!(cos.rank() <= input.rank());

        let padded_shape = [&tvec![1; input.rank() - cos.rank()], cos.shape()].concat();
        let (padded_cos, padded_sin) =
            (cos.reshaped(padded_shape.clone().into())?, sin.reshaped(padded_shape.into())?);

        ensure!(
            input.shape()[input.rank() - 1] % 2 == 0,
            "Rotate half required most inner dimension to be a multiple of 2: {:?}",
            input.shape()
        );

        let cos_sin_strides =
            compute_broadcast_strides::<usize>(padded_cos.shape(), padded_sin.strides())?;

        let broadcast_kind = BroadcastKind::from_rank(input.rank())
            .with_context(|| format!("Unsupported rank for ApplyRope op: {:?}", input.shape(),))?;

        let kernel_name = self.kernel_name(input.datum_type(), broadcast_kind)?;

        let pipeline = stream.load_pipeline(LibraryName::NNOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, &padded_cos, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(2, &padded_sin, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(3, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(4, input.shape());
            encoder.set_slice(5, input.strides());
            encoder.set_slice(6, &cos_sin_strides);
            encoder.set_slice(7, output.strides());

            let mut grid_size = utils::build_metal_size_for_shape(input.shape());
            grid_size.width /= 2;

            let group_size = metal::MTLSize { width: 32 as _, height: 32 as _, depth: 1 as _ };
            encoder.dispatch_threads(grid_size, group_size);
        });
        Ok(())
    }
}

pub fn metal_apply_rope_dispatch(
    input: &DeviceTensor,
    cos: &DeviceTensor,
    sin: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| ApplyRope.dispatch_eval(stream, input, cos, sin, output))
}

crate::register_metal_op!(tract_transformers::ops::apply_rope::ApplyRope, |source, node, _op| {
    rule_if!(ApplyRope::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::apply_rope::GpuApplyRope::new(
        "Metal",
        metal_apply_rope_dispatch,
    ))))
});

#[cfg(test)]
mod tests {
    use super::*;
    use crate::utils::with_borrowed_metal_stream;
    use tract_core::internal::Tensor;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::apply_rope;

    fn run_test_case(shape: &[usize]) -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len).map(|f| f as f32 / 1000.0).collect::<Vec<_>>(),
            )?;

            let cos =
                Tensor::from_shape(shape, &(0..len).map(|f| (f as f32).cos()).collect::<Vec<_>>())?;

            let sin =
                Tensor::from_shape(shape, &(0..len).map(|f| (f as f32).sin()).collect::<Vec<_>>())?;

            let metal_a = a.clone().into_device()?;
            let metal_sin = sin.clone().into_device()?;
            let metal_cos = cos.clone().into_device()?;

            let cpu_output = apply_rope::ApplyRope.eval(tvec![
                a.clone().into(),
                cos.clone().into(),
                sin.clone().into(),
            ])?[0]
                .clone()
                .into_tensor();
            let metal_output = ApplyRope.eval(stream, &metal_a, &metal_cos, &metal_sin)?;

            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?} Cpu: {:?}, Metal: {:?}",
                        a.dump(true),
                        cpu_output.dump(true),
                        metal_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_apply_rope() -> TractResult<()> {
        run_test_case(&[2, 1, 2, 2])?;
        run_test_case(&[2, 4, 4])?;
        run_test_case(&[2, 1, 512, 10])?;
        run_test_case(&[8, 8])?;
        run_test_case(&[1, 10, 512, 24])?;
        run_test_case(&[3, 10, 512, 24])?;
        Ok(())
    }
}


================================================
FILE: metal/src/kernels/nn/gelu_approximate.rs
================================================
use crate::encoder::EncoderExt;
use crate::{LibraryName, MetalStream};
use metal::MTLSize;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Default, Copy, PartialEq, Eq, Hash)]
pub struct GeluApproximate {
    pub fast_impl: bool,
}

impl GeluApproximate {
    pub fn fast() -> Self {
        Self { fast_impl: true }
    }

    pub fn accurate() -> Self {
        Self { fast_impl: false }
    }

    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal geluop", dt);
        let tname = DeviceTensor::tname(dt)?;
        if self.fast_impl {
            Ok(format!("nn_ops::gelu_approx_fast_{tname}"))
        } else {
            Ok(format!("nn_ops::gelu_approx_{tname}"))
        }
    }

    pub fn eval(&self, stream: &MetalStream, input: &DeviceTensor) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let kernel_name = self.kernel_name(input.datum_type())?;

        let pipeline = stream.load_pipeline(LibraryName::NNOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);
            let grid_size = MTLSize { width: output.len() as _, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

pub fn metal_gelu_approximate_dispatch(
    fast_impl: bool,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| {
        GeluApproximate { fast_impl }.dispatch_eval(stream, input, output)
    })
}

// GeluApproximate is an ElementWiseMiniOp, so we register under ElementWiseOp's TypeId.
crate::register_metal_op!(tract_core::ops::element_wise::ElementWiseOp, |source, node, op| {
    rule_if_some!(
        ew = op.0.downcast_ref::<tract_transformers::ops::gelu_approximate::GeluApproximate>()
    );
    rule_if!(GeluApproximate::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::gelu_approximate::GpuGeluApproximate::new(
        ew.fast_impl,
        "Metal",
        metal_gelu_approximate_dispatch,
    ))))
});

#[cfg(test)]
mod tests {
    use super::*;
    use crate::utils::with_borrowed_metal_stream;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_gpu::tensor::IntoDevice;
    use tract_transformers::ops::gelu_approximate;

    fn test_case<F>(
        gelu_approx: GeluApproximate,
        shape: &[usize],
        offset: f32,
        scale: f32,
        approximate: Approximation,
    ) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        with_borrowed_metal_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_output = gelu_approximate::gelu_approximate(false)
                .eval(tvec![a.to_host()?.into_tvalue()])?[0]
                .clone()
                .into_tensor();
            let metal_output = gelu_approx.eval(stream, &a)?;

            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?}, scale: {:?} Cpu: {:?}, Metal: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        scale,
                        cpu_output.dump(true),
                        metal_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_gelu_approx() -> TractResult<()> {
        test_case::<f32>(
            GeluApproximate::accurate(),
            &[4, 4],
            -0.0,
            1.0 / 100.0,
            Approximation::Approximate,
        )?;
        test_case::<f32>(
            GeluApproximate::accurate(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::Approximate,
        )?;
        test_case::<f16>(
            GeluApproximate::accurate(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::SuperApproximate,
        )?;
        Ok(())
    }
    #[test]
    fn test_gelu_approx_fast() -> TractResult<()> {
        test_case::<f32>(
            GeluApproximate::fast(),
            &[4, 4],
            -0.0,
            1.0 / 100.0,
            Approximation::SuperApproximate,
        )?;
        test_case::<f32>(
            GeluApproximate::fast(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::SuperApproximate,
        )?;
        test_case::<f16>(
            GeluApproximate::fast(),
            &[4, 4],
            -6.0,
            1.0 / 1000.0,
            Approximation::SuperApproximate,
        )?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn gelu_approx_prop_f32(pb in any::<GeluProblem<f32>>()) {
            fn run(pb: GeluProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn gelu_approx_prop_f16(pb in any::<GeluProblem<f16>>()) {
            fn run(pb: GeluProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct GeluProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for GeluProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..3, 0usize..3)
                .prop_flat_map(|(left, right)| {
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    vec(shape, shape_len..=shape_len)
                })
                .prop_map(|shape| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, input }
                })
                .boxed()
        }
    }

    impl<F> GeluProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;

            let cpu_output = gelu_approximate::gelu_approximate(false)
                .eval(tvec![a.into_tvalue()])?[0]
                .clone()
                .into_tensor();

            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let metal_output = GeluApproximate::accurate().eval(stream, &a)?;
                Ok(metal_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/nn/leaky_relu.rs
================================================
use crate::encoder::EncoderExt;
use crate::{LibraryName, MetalStream};
use metal::{MTLSize, NSUInteger};
use std::ffi::c_void;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LeakyRelu;

impl LeakyRelu {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal leaky_relu", dt);
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("nn_ops::leaky_relu_{tname}"))
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        alpha: f32,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let pipeline =
            stream.load_pipeline(LibraryName::NNOps, &self.kernel_name(input.datum_type())?)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);
            encoder.set_bytes(
                2,
                std::mem::size_of::<f32>() as u64,
                &alpha as *const f32 as *const c_void,
            );

            let grid_size = MTLSize { width: output.len() as NSUInteger, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

pub fn metal_leaky_relu_dispatch(
    alpha: f32,
    input: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| LeakyRelu.dispatch_eval(stream, input, alpha, output))
}

// LeakyRelu is an ElementWiseMiniOp, so we register under ElementWiseOp's TypeId.
crate::register_metal_op!(tract_core::ops::element_wise::ElementWiseOp, |_source, _node, op| {
    rule_if_some!(leaky = op.0.downcast_ref::<tract_core::ops::nn::LeakyRelu>());
    Ok(Some(Box::new(tract_gpu::ops::leaky_relu::GpuLeakyRelu::new(
        leaky.alpha,
        "Metal",
        metal_leaky_relu_dispatch,
    ))))
});


================================================
FILE: metal/src/kernels/nn/mod.rs
================================================
pub mod apply_rope;
pub mod gelu_approximate;
pub mod leaky_relu;
pub mod reduce;
pub mod rms_norm;
pub mod scaled_masked_softmax;
pub mod silu;
pub mod softmax;

pub use apply_rope::{ApplyRope, metal_apply_rope_dispatch};
pub use gelu_approximate::GeluApproximate;
pub use gelu_approximate::metal_gelu_approximate_dispatch;
pub use leaky_relu::LeakyRelu;
pub use leaky_relu::metal_leaky_relu_dispatch;
pub use reduce::{Reducer, metal_reduce_launch};
pub use rms_norm::RmsNorm;
pub use rms_norm::metal_rms_norm_dispatch;
pub use scaled_masked_softmax::{ScaledMaskedSoftmax, metal_scaled_masked_softmax_dispatch};
pub use silu::Silu;
pub use softmax::Softmax;
pub use softmax::metal_softmax_dispatch;

use crate::kernels::BroadcastKind;

pub fn all_functions() -> Vec<String> {
    use std::collections::HashSet;
    let mut functions = HashSet::<String>::new();

    functions.extend(
        Reducer::ALL
            .into_iter()
            .flat_map(|op| {
                tract_gpu::tensor::DeviceTensor::SUPPORTED_DT.into_iter().map(move |dt| (op, dt))
            })
            .flat_map(|(op, dt)| reduce::kernel_name(&op, dt).into_iter()),
    );
    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| Softmax.kernel_name(dt).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| ScaledMaskedSoftmax.kernel_name(dt).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| [true, false].into_iter().map(move |is_l4| (dt, is_l4)))
            .flat_map(|(dt, is_l4)| RmsNorm.kernel_name(dt, is_l4).into_iter()),
    );

    functions.extend(
        BroadcastKind::ALL
            .into_iter()
            .flat_map(|brdcast| {
                tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
                    .into_iter()
                    .map(move |dt| (dt, brdcast))
            })
            .flat_map(|(dt, brdcast)| ApplyRope.kernel_name(dt, brdcast).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| [true, false].into_iter().map(move |fast_impl| (dt, fast_impl)))
            .flat_map(|(dt, fast_impl)| GeluApproximate { fast_impl }.kernel_name(dt).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| [true, false].into_iter().map(move |is_l4| (dt, is_l4)))
            .flat_map(|(dt, is_l4)| Silu.kernel_name(dt, is_l4).into_iter()),
    );

    functions.extend(
        tract_gpu::tensor::DeviceTensor::SUPPORTED_DT
            .into_iter()
            .flat_map(|dt| LeakyRelu.kernel_name(dt).into_iter()),
    );
    functions.into_iter().collect()
}


================================================
FILE: metal/src/kernels/nn/nn_ops.metal
================================================
#include <metal_math>
#include <metal_stdlib>

using namespace metal;

#define NUM_SIMDGROUP 32

METAL_FUNC uint indices_to_idx_2(uint2 indices,
                                 constant const size_t strides[2]) {
    return indices.x * strides[1] + indices.y * strides[0];
}

METAL_FUNC uint indices_to_idx_3(uint3 indices,
                                 constant const size_t strides[3]) {
    return indices.x * strides[2] + indices.y * strides[1] +
           indices.z * strides[0];
}

METAL_FUNC uint indices_to_idx_4(uint3 indices, constant const size_t shape[4],
                                 constant const size_t strides[4]) {
    auto idx = indices.x * strides[3] + indices.y * strides[2];
    idx += (indices.z % shape[1]) * strides[1];
    indices.z /= shape[1];
    idx += indices.z * strides[0];
    return idx;
}

template <typename U> struct MeanOfSquares {
    float simd_reduce(float val, size_t reduce_dim) {
        return simd_sum(val) / static_cast<float>(reduce_dim);
    }

    static constexpr constant float init = 0.0;

    // Operator
    float operator()(float acc, U a) {
        float a_f = static_cast<float>(a);
        return acc + a_f * a_f;
    }
};

template <typename U> struct Sum {
    U simd_reduce(U val, size_t reduce_dim) { return simd_sum(val); }

    static constexpr constant U init = U(0);

    // Operator
    U operator()(U acc, U a) { return acc + a; }
};

template <typename U> struct Min {
    template <typename T> T simd_reduce(T val, size_t reduce_dim) {
        return simd_min(val);
    }

    static constexpr constant U init = metal::numeric_limits<U>::infinity();

    // Operator
    U operator()(U a, U b) { return a < b ? a : b; }
};

template <typename U> struct Max {
    template <typename T> T simd_reduce(T val, size_t reduce_dim) {
        return simd_max(val);
    }

    static constexpr constant U init = -metal::numeric_limits<U>::infinity();

    // Operator
    U operator()(U a, U b) { return a > b ? a : b; }
};

template <typename U> struct Prod {
    U simd_reduce(U val, size_t reduce_dim) { return simd_product(val); }

    static constexpr constant U init = U(1);

    // Operator
    U operator()(U acc, U a) { return acc * a; }
};

template <typename U> struct All {
    U simd_reduce(U val, size_t reduce_dim) { return simd_all(val); }

    static constexpr constant U init = U(1);

    // Operator
    U operator()(U acc, U a) { return acc && a; }
};

template <typename U> struct Any {
    U simd_reduce(U val, size_t reduce_dim) { return simd_any(val); }

    static constexpr constant U init = U(0);

    // Operator
    U operator()(U acc, U a) { return acc || a; }
};

template <typename F, typename Op>
[[kernel]] void reduce_nd3(device const void *input_b, device void *output_b,
                           constant const size_t input_shape[3],
                           constant const size_t input_strides[3],
                           constant const size_t output_strides[3],
                           uint3 tgpig [[threadgroup_position_in_grid]],
                           uint tiisg [[thread_index_in_simdgroup]],
                           uint tpsg [[threads_per_simdgroup]]) {

    device const F *input = (device const F *)input_b;
    device F *output = (device F *)output_b;

    Op op = Op();

    size_t reduce_dim = input_shape[1];

    size_t out_idx = tgpig.x * output_strides[2] + tgpig.y * output_strides[1] +
                     tgpig.z * output_strides[0];

    size_t base_in_idx =
        tgpig.x * input_strides[2] + tgpig.z * input_strides[0];

    auto partial_acc = Op::init;
    for (size_t i = tiisg; i < reduce_dim; i += tpsg) {
        F el = input[base_in_idx + i * input_strides[1]];
        partial_acc = op(partial_acc, el);
    }
    auto acc = op.simd_reduce(partial_acc, reduce_dim);

    if (tiisg == 0) {
        output[out_idx] = acc;
    }
}

typedef decltype(reduce_nd3<float, Prod<float>>) reduce_nd3_t;

#define INSTANTIATE_REDUCE(name, op, tname, type)                              \
    template [[host_name(                                                      \
        "nn_ops::reduce_" #name                                                \
        "_nd3_" #tname)]] [[kernel]] reduce_nd3_t reduce_nd3<type, op<type>>;

INSTANTIATE_REDUCE(mean_of_squares, MeanOfSquares, f32, float)
INSTANTIATE_REDUCE(mean_of_squares, MeanOfSquares, f16, half)
INSTANTIATE_REDUCE(sum, Sum, f32, float)
INSTANTIATE_REDUCE(sum, Sum, f16, half)
INSTANTIATE_REDUCE(min, Min, f32, float)
INSTANTIATE_REDUCE(min, Min, f16, half)
INSTANTIATE_REDUCE(max, Max, f32, float)
INSTANTIATE_REDUCE(max, Max, f16, half)
INSTANTIATE_REDUCE(prod, Prod, f32, float)
INSTANTIATE_REDUCE(prod, Prod, f16, half)
INSTANTIATE_REDUCE(all, All, bool, char)
INSTANTIATE_REDUCE(any, Any, bool, char)

template <typename F>
[[kernel]] void rms_norm_nd3(device const void *input_b, constant void *eps_b,
                             device void *output_b,
                             constant const size_t shape[3],
                             constant const size_t strides[3],
                             threadgroup float *shmem_f32 [[threadgroup(0)]],
                             uint tgpig [[threadgroup_position_in_grid]],
                             ushort tpitg [[thread_position_in_threadgroup]],
                             ushort sgitg [[simdgroup_index_in_threadgroup]],
                             ushort tiisg [[thread_index_in_simdgroup]],
                             ushort ntg [[threads_per_threadgroup]]) {
    if (sgitg == 0) {
        shmem_f32[tiisg] = 0.0f;
    }
    device const F *input = (device const F *)input_b;
    float eps = ((constant float *)eps_b)[0];
    device F *output = (device F *)output_b;

    size_t dim = shape[1];

    size_t base_idx =
        (tgpig % shape[2]) * strides[2] + (tgpig / shape[2]) * strides[0];

    float partial_acc = 0.0;
    for (size_t i = tpitg; i < dim; i += ntg) {
        float el = static_cast<float>(input[base_idx + i * strides[1]]);
        partial_acc += el * el;
    }

    partial_acc = simd_sum(partial_acc);
    threadgroup_barrier(mem_flags::mem_threadgroup);

    if (tiisg == 0) {
        shmem_f32[sgitg] = partial_acc;
    }

    threadgroup_barrier(mem_flags::mem_threadgroup);

    partial_acc = shmem_f32[tiisg];
    partial_acc = simd_sum(partial_acc);

    float mean_of_squares = partial_acc / dim;

    float norm = metal::rsqrt(mean_of_squares + eps);

    for (size_t i = tpitg; i < dim; i += ntg) {
        auto idx = base_idx + i * strides[1];
        output[idx] = input[idx] * norm;
    }
}

template <typename F, typename F4>
[[kernel]] void rms_norm_nd2_l4(device const char *input_b,
                                constant char *eps_b, device char *output_b,
                                constant const size_t &n,
                                constant const size_t &n_div_4,
                                constant const size_t &outer_stride,
                                threadgroup float *shmem_f32 [[threadgroup(0)]],
                                uint tgpig [[threadgroup_position_in_grid]],
                                ushort tpitg [[thread_position_in_threadgroup]],
                                ushort sgitg [[simdgroup_index_in_threadgroup]],
                                ushort tiisg [[thread_index_in_simdgroup]],
                                ushort ntg [[threads_per_threadgroup]]) {
    if (sgitg == 0) {
        shmem_f32[tiisg] = 0.0f;
    }

    device const F4 *x = (device const F4 *)(input_b + tgpig * outer_stride);
    float eps = ((constant float *)eps_b)[0];
    float sumf = 0.0f;

    // parallel sum
    for (size_t i = tpitg; i < n_div_4; i += ntg) {
        float4 el = static_cast<float4>(x[i]);
        sumf += dot(el, el);
    }
    sumf = simd_sum(sumf);

    threadgroup_barrier(mem_flags::mem_threadgroup);

    if (tiisg == 0) {
        shmem_f32[sgitg] = sumf;
    }

    threadgroup_barrier(mem_flags::mem_threadgroup);

    sumf = shmem_f32[tiisg];
    sumf = simd_sum(sumf);

    const float mean = sumf / n;
    const float scale = 1.0f / sqrt(mean + eps);

    device F4 *y = (device F4 *)output_b + tgpig * n_div_4;
    for (size_t i = tpitg; i < n_div_4; i += ntg) {
        y[i] = x[i] * scale;
    }
}

typedef decltype(rms_norm_nd3<float>) rms_norm_nd3_t;
typedef decltype(rms_norm_nd2_l4<float, float4>) rms_norm_nd2_l4_t;

template [[host_name(
    "nn_ops::rms_norm_nd3_f32")]] [[kernel]] rms_norm_nd3_t rms_norm_nd3<float>;
template [[host_name(
    "nn_ops::rms_norm_nd3_f16")]] [[kernel]] rms_norm_nd3_t rms_norm_nd3<half>;
template
    [[host_name("nn_ops::rms_norm_nd2_l4_f32")]] [[kernel]] rms_norm_nd2_l4_t
        rms_norm_nd2_l4<float, float4>;
template
    [[host_name("nn_ops::rms_norm_nd2_l4_f16")]] [[kernel]] rms_norm_nd2_l4_t
        rms_norm_nd2_l4<half, half4>;

struct Sigmoid {
    template <typename T> T operator()(T x) {
        auto y = 1 / (1 + metal::exp(-metal::abs(x)));
        return (x < 0) ? 1 - y : y;
    }
};

template <typename T>
[[kernel]] void silu(device const void *input_b [[buffer(0)]],
                     device void *output_b [[buffer(1)]],
                     uint tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;

    output[tpig] = Sigmoid()(static_cast<float>(input[tpig])) * input[tpig];
}

typedef decltype(silu<float>) silu_t;

template <typename T4>
[[kernel]] void silu_4(device const void *input_b, device void *output_b,
                       uint tpig [[thread_position_in_grid]]) {
    device const T4 *input = (device const T4 *)input_b;
    device T4 *output = (device T4 *)output_b;
    auto x = input[tpig];
    output[tpig] = x / (1.0f + exp(-x));
}

typedef decltype(silu_4<float4>) silu_4_t;

template [[host_name("nn_ops::silu_f32")]] [[kernel]] silu_t silu<float>;
template [[host_name("nn_ops::silu_f16")]] [[kernel]] silu_t silu<half>;

template [[host_name("nn_ops::silu_4_f32")]] [[kernel]] silu_4_t silu_4<float4>;
template [[host_name("nn_ops::silu_4_f16")]] [[kernel]] silu_4_t silu_4<half4>;

template <typename T>
[[kernel]] void leaky_relu(device const void *input_b [[buffer(0)]],
                           device void *output_b [[buffer(1)]],
                           constant float &alpha [[buffer(2)]],
                           uint tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device T *output = (device T *)output_b;
    T x = input[tpig];
    output[tpig] = x >= T(0) ? x : T(alpha) * x;
}

typedef decltype(leaky_relu<float>) leaky_relu_t;

template [[host_name("nn_ops::leaky_relu_f32")]] [[kernel]] leaky_relu_t leaky_relu<float>;
template [[host_name("nn_ops::leaky_relu_f16")]] [[kernel]] leaky_relu_t leaky_relu<half>;

template <typename F>
[[kernel]] void softmax_nd3(device const void *input_b, device void *output_b,
                            constant const size_t shape[3],
                            constant const size_t strides[3],
                            uint3 tgpig [[threadgroup_position_in_grid]],
                            uint tiisg [[thread_index_in_simdgroup]],
                            uint tpsg [[threads_per_simdgroup]]) {

    device const F *input = (device const F *)input_b;
    device F *output = (device F *)output_b;

    size_t dim = shape[1];

    size_t base_idx = tgpig.x * strides[2] + tgpig.z * strides[0];

    // Get max value on softmax dim
    float partial_max = -INFINITY;
    for (size_t i = tiisg; i < dim; i += tpsg) {
        auto idx = base_idx + i * strides[1];
        float el = static_cast<float>(input[idx]);
        partial_max = max(partial_max, el);
    }

    float axis_max = simd_max(partial_max);

    // Compute Sum(exp(x - max))
    float partial_norm = 0;
    for (size_t i = tiisg; i < dim; i += tpsg) {
        auto idx = base_idx + i * strides[1];
        float el = static_cast<float>(input[idx]);
        float exp_el = fast::exp(el - axis_max);
        partial_norm += exp_el;
        output[idx] = static_cast<F>(exp_el);
    }

    float axis_norm = simd_sum(partial_norm);
    float inv_axis_norm = 1.0 / axis_norm;

    for (size_t i = tiisg; i < dim; i += tpsg) {
        auto idx = base_idx + i * strides[1];
        float exp_el = static_cast<float>(output[idx]);
        output[idx] = static_cast<F>(exp_el * inv_axis_norm);
    }
}

typedef decltype(softmax_nd3<float>) softmax_nd3_t;

template [[host_name(
    "nn_ops::softmax_nd3_f32")]] [[kernel]] softmax_nd3_t softmax_nd3<float>;
template [[host_name(
    "nn_ops::softmax_nd3_f16")]] [[kernel]] softmax_nd3_t softmax_nd3<half>;

template <typename F>
[[kernel]] void scaled_masked_softmax_nd5(
    device const void *input_b, device const void *mask_b,
    constant float *scale_b, device void *output_b,
    constant const size_t shape[5], constant const size_t strides[5],
    constant const size_t mask_strides[5], constant const size_t out_strides[5],

    uint3 tgpig [[threadgroup_position_in_grid]],
    uint tiisg [[thread_index_in_simdgroup]],
    uint tpsg [[threads_per_simdgroup]],
    uint3 tptg [[thread_position_in_threadgroup]],
    uint3 tptgN [[threads_per_threadgroup]],

    threadgroup float *tgmem [[threadgroup(0)]]) {

    const uint tid = tptg.x;
    const uint tg_sz = tptgN.x;
    const uint sg_id = tid / tpsg;
    const uint lane = tiisg;

    // Grid is (rows, g, b * kh) == (shape[3], shape[2], shape[0] * shape[1])
    const size_t row = (size_t)tgpig.x;
    const size_t h = (size_t)tgpig.y;
    const size_t z = (size_t)tgpig.z;
    const size_t z0 = z / shape[1];
    const size_t z1 = z % shape[1];

    device const F *x = (device const F *)input_b;
    device const F *mask = (device const F *)mask_b;
    device F *out = (device F *)output_b;

    const float scale = *scale_b;

    x += row * strides[3] + h * strides[2] + z1 * strides[1] + z0 * strides[0];
    out += row * out_strides[3] + h * out_strides[2] + z1 * out_strides[1] +
           z0 * out_strides[0];

    const bool has_mask = (mask_b != nullptr);
    if (has_mask) {
        mask += row * mask_strides[3] + h * mask_strides[2] +
                z1 * mask_strides[1] + z0 * mask_strides[0];
    }

    // Threadgroup scratch layout:
    // tgmem[0..31]                -> buf_iw (one float per simdgroup, up to 32
    // simdgroups) tgmem[32..32+cols-1]        -> vals   (float[cols]) If you
    // allocate nextPow2(cols) for vals, that's fine too.
    threadgroup float *buf_iw = tgmem;
    threadgroup float *vals = tgmem + 32;

    const uint simd_size = tpsg; // usually 32 on Apple GPUs
    const uint num_sg = (tg_sz + simd_size - 1u) / simd_size;

    const size_t cols = shape[4];

    // 1) Load (x*scale + mask) and compute max in float
    float max_val = -INFINITY;

    for (size_t col = (size_t)tid; col < cols; col += (size_t)tg_sz) {
        const float xv = (float)x[col * strides[4]] * scale;
        const float mv = has_mask ? (float)mask[col * mask_strides[4]] : 0.0f;
        const float v = xv + mv;

        vals[col] = v;
        max_val = metal::max(max_val, v);
    }

    // reduce max across simdgroup
    float sg_max = simd_max(max_val);

    // write per-simdgroup max
    if (lane == 0) {
        buf_iw[sg_id] = sg_max;
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);

    // reduce across simdgroups using first simdgroup
    if (sg_id == 0) {
        float x0 = (lane < num_sg) ? buf_iw[lane] : -INFINITY;
        float block_max = simd_max(x0);
        if (lane == 0)
            buf_iw[0] = block_max;
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    max_val = buf_iw[0];

    // 2) exp(vals - max) and sum
    float sum = 0.0f;
    for (size_t col = (size_t)tid; col < cols; col += (size_t)tg_sz) {
        float e = exp(vals[col] - max_val);
        vals[col] = e;
        sum += e;
    }

    float sg_sum = simd_sum(sum);

    if (lane == 0) {
        buf_iw[sg_id] = sg_sum;
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);

    if (sg_id == 0) {
        float x0 = (lane < num_sg) ? buf_iw[lane] : 0.0f;
        float block_sum = simd_sum(x0);
        if (lane == 0)
            buf_iw[0] = block_sum;
    }
    threadgroup_barrier(mem_flags::mem_threadgroup);
    sum = buf_iw[0];

    const float inv_sum = 1.0f / sum;

    // 3) write output
    for (size_t col = (size_t)tid; col < cols; col += (size_t)tg_sz) {
        float y = vals[col] * inv_sum;
        out[col * out_strides[4]] = (F)y;
    }
}

typedef decltype(scaled_masked_softmax_nd5<float>) scaled_masked_softmax_nd5_t;

template [[host_name("nn_ops::scaled_masked_softmax_nd5_"
                     "f32")]] [[kernel]] scaled_masked_softmax_nd5_t
    scaled_masked_softmax_nd5<float>;
template [[host_name("nn_ops::scaled_masked_softmax_nd5_"
                     "f16")]] [[kernel]] scaled_masked_softmax_nd5_t
    scaled_masked_softmax_nd5<half>;

constant float GELU_COEF_A = 0.044715f;
constant float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;

template <typename F>
[[kernel]] void gelu_approx(device const void *input_b, device void *output_b,
                            uint tpig [[thread_position_in_grid]]) {

    device const F *input = (device const F *)input_b;
    device F *output = (device F *)output_b;

    float x = static_cast<float>(input[tpig]);
    float output_f32 =
        0.5 * x *
        (1.0 +
         precise::tanh(SQRT_2_OVER_PI * (x + GELU_COEF_A * metal::powr(x, 3))));
    output[tpig] = static_cast<F>(output_f32);
}

typedef decltype(gelu_approx<float>) gelu_approx_t;

template [[host_name(
    "nn_ops::gelu_approx_f32")]] [[kernel]] gelu_approx_t gelu_approx<float>;
template [[host_name(
    "nn_ops::gelu_approx_f16")]] [[kernel]] gelu_approx_t gelu_approx<half>;

template <typename F>
[[kernel]] void gelu_approx_fast(device const void *input_b,
                                 device void *output_b,
                                 uint tpig [[thread_position_in_grid]]) {

    device const F *input = (device const F *)input_b;
    device F *output = (device F *)output_b;

    float x = static_cast<float>(input[tpig]);
    float output_f32 =
        0.5 * x *
        (1.0 +
         precise::tanh(SQRT_2_OVER_PI * (x + GELU_COEF_A * metal::powr(x, 2))));
    output[tpig] = static_cast<F>(output_f32);
}

typedef decltype(gelu_approx_fast<float>) gelu_approx_fast_t;

template
    [[host_name("nn_ops::gelu_approx_fast_f32")]] [[kernel]] gelu_approx_fast_t
        gelu_approx_fast<float>;
template
    [[host_name("nn_ops::gelu_approx_fast_f16")]] [[kernel]] gelu_approx_fast_t
        gelu_approx_fast<half>;

template <typename T>
[[kernel]] void apply_rope_nd2(device const void *input_b [[buffer(0)]],
                               device const void *cos_b [[buffer(1)]],
                               device const void *sin_b [[buffer(2)]],
                               device void *output_b [[buffer(3)]],
                               constant const size_t *shape [[buffer(4)]],
                               constant const size_t *strides [[buffer(5)]],
                               constant const size_t *cos_sin_strides
                               [[buffer(6)]],
                               constant const size_t *out_strides [[buffer(7)]],
                               uint2 tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device const T *cos = (device const T *)cos_b;
    device const T *sin = (device const T *)sin_b;

    device T *output = (device T *)output_b;

    uint2 rotated_tpig = tpig;
    rotated_tpig.x += shape[1] / 2;

    auto idx = indices_to_idx_2(tpig, strides);
    auto rot_idx = indices_to_idx_2(rotated_tpig, strides);
    auto out_idx = indices_to_idx_2(tpig, out_strides);
    auto out_rot_idx = indices_to_idx_2(rotated_tpig, out_strides);

    auto cos_sin_idx = indices_to_idx_2(tpig, cos_sin_strides);
    auto rot_cos_sin_idx = indices_to_idx_2(rotated_tpig, cos_sin_strides);

    output[out_idx] =
        input[idx] * cos[cos_sin_idx] - input[rot_idx] * sin[cos_sin_idx];
    output[out_rot_idx] = input[rot_idx] * cos[rot_cos_sin_idx] +
                          input[idx] * sin[rot_cos_sin_idx];
}

template <typename T>
[[kernel]] void apply_rope_nd3(device const void *input_b [[buffer(0)]],
                               device const void *cos_b [[buffer(1)]],
                               device const void *sin_b [[buffer(2)]],
                               device void *output_b [[buffer(3)]],
                               constant const size_t *shape [[buffer(4)]],
                               constant const size_t *strides [[buffer(5)]],
                               constant const size_t *cos_sin_strides
                               [[buffer(6)]],
                               constant const size_t *out_strides [[buffer(7)]],
                               uint3 tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device const T *cos = (device const T *)cos_b;
    device const T *sin = (device const T *)sin_b;

    device T *output = (device T *)output_b;

    uint3 rotated_tpig = tpig;
    rotated_tpig.x += shape[2] / 2;

    auto idx = indices_to_idx_3(tpig, strides);
    auto rot_idx = indices_to_idx_3(rotated_tpig, strides);
    auto out_idx = indices_to_idx_3(tpig, out_strides);
    auto out_rot_idx = indices_to_idx_3(rotated_tpig, out_strides);

    auto cos_sin_idx = indices_to_idx_3(tpig, cos_sin_strides);
    auto rot_cos_sin_idx = indices_to_idx_3(rotated_tpig, cos_sin_strides);

    output[out_idx] =
        input[idx] * cos[cos_sin_idx] - input[rot_idx] * sin[cos_sin_idx];
    output[out_rot_idx] = input[rot_idx] * cos[rot_cos_sin_idx] +
                          input[idx] * sin[rot_cos_sin_idx];
}

template <typename T>
[[kernel]] void apply_rope_nd4(device const void *input_b [[buffer(0)]],
                               device const void *cos_b [[buffer(1)]],
                               device const void *sin_b [[buffer(2)]],
                               device void *output_b [[buffer(3)]],
                               constant const size_t *shape [[buffer(4)]],
                               constant const size_t *strides [[buffer(5)]],
                               constant const size_t *cos_sin_strides
                               [[buffer(6)]],
                               constant const size_t *out_strides [[buffer(7)]],
                               uint3 tpig [[thread_position_in_grid]]) {
    device const T *input = (device const T *)input_b;
    device const T *cos = (device const T *)cos_b;
    device const T *sin = (device const T *)sin_b;

    device T *output = (device T *)output_b;

    uint3 rotated_tpig = tpig;
    rotated_tpig.x += shape[3] / 2;

    auto idx = indices_to_idx_4(tpig, shape, strides);
    auto rot_idx = indices_to_idx_4(rotated_tpig, shape, strides);
    auto out_idx = indices_to_idx_4(tpig, shape, out_strides);
    auto out_rot_idx = indices_to_idx_4(rotated_tpig, shape, out_strides);

    auto cos_sin_idx = indices_to_idx_4(tpig, shape, cos_sin_strides);
    auto rot_cos_sin_idx =
        indices_to_idx_4(rotated_tpig, shape, cos_sin_strides);

    output[out_idx] =
        input[idx] * cos[cos_sin_idx] - input[rot_idx] * sin[cos_sin_idx];
    output[out_rot_idx] = input[rot_idx] * cos[rot_cos_sin_idx] +
                          input[idx] * sin[rot_cos_sin_idx];
}

typedef decltype(apply_rope_nd2<float>) apply_rope_nd2_t;
typedef decltype(apply_rope_nd3<float>) apply_rope_nd3_t;
typedef decltype(apply_rope_nd4<float>) apply_rope_nd4_t;

template [[host_name("nn_ops::apply_rope_nd2_f32")]] [[kernel]] apply_rope_nd2_t
    apply_rope_nd2<float>;
template [[host_name("nn_ops::apply_rope_nd3_f32")]] [[kernel]] apply_rope_nd3_t
    apply_rope_nd3<float>;
template [[host_name("nn_ops::apply_rope_nd4_f32")]] [[kernel]] apply_rope_nd4_t
    apply_rope_nd4<float>;

template [[host_name("nn_ops::apply_rope_nd2_f16")]] [[kernel]] apply_rope_nd2_t
    apply_rope_nd2<half>;
template [[host_name("nn_ops::apply_rope_nd3_f16")]] [[kernel]] apply_rope_nd3_t
    apply_rope_nd3<half>;
template [[host_name("nn_ops::apply_rope_nd4_f16")]] [[kernel]] apply_rope_nd4_t
    apply_rope_nd4<half>;


================================================
FILE: metal/src/kernels/nn/reduce.rs
================================================
use crate::LibraryName;
use crate::encoder::EncoderExt;
use crate::kernels::utils;
use metal::MTLSize;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

pub use tract_gpu::ops::reduce::Reducer;

pub fn kernel_name(reducer: &Reducer, dt: DatumType) -> TractResult<String> {
    ensure!(reducer.is_supported_dt(dt), "Unsupported dt {dt:?} for metal reduceop {reducer:?}",);
    let tname = DeviceTensor::tname(dt)?;
    Ok(format!("nn_ops::reduce_{}_nd3_{tname}", reducer))
}

pub fn metal_reduce_launch(
    reducer: &Reducer,
    input: &DeviceTensor,
    axis: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        ensure!(output.datum_type() == input.datum_type());
        ensure!(output.shape()[axis] == 1);

        let input_shape_nd3 = utils::reshape_to_rank_3(input.shape(), axis);
        let input_strides_nd3 = Tensor::natural_strides(&input_shape_nd3);
        let output_shape_nd3 = utils::reshape_to_rank_3(output.shape(), axis);
        let output_strides_nd3 = Tensor::natural_strides(&output_shape_nd3);

        let pipeline =
            stream.load_pipeline(LibraryName::NNOps, &kernel_name(reducer, input.datum_type())?)?;

        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(2, &input_shape_nd3);
            encoder.set_slice(3, &input_strides_nd3);
            encoder.set_slice(4, &output_strides_nd3);

            let grid_size = utils::build_metal_size_for_shape(&output_shape_nd3);
            let group_size =
                MTLSize { width: usize::min(32, input_shape_nd3[1]) as _, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    })
}

crate::register_metal_op!(tract_core::ops::nn::Reduce, |source, node, op| {
    let dt = source.node_input_facts(node.id)?[0].datum_type;
    if let Ok(gpu_op) =
        tract_gpu::ops::reduce::GpuReduce::from_tract_core(op, "Metal", metal_reduce_launch)
    {
        rule_if!(gpu_op.reducer.is_supported_dt(dt));
        return Ok(Some(Box::new(gpu_op)));
    }
    Ok(None)
});

#[cfg(test)]
mod tests {
    use super::*;
    use crate::utils::with_borrowed_metal_stream;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_core::ops::nn::Reducer as TractReducer;
    use tract_core::tract_data::itertools::Itertools;
    use tract_gpu::tensor::IntoDevice;

    fn test_case<F>(
        reducer: Reducer,
        tract_reducer: TractReducer,
        shape: &[usize],
        axis: usize,
        scale: f32,
    ) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        with_borrowed_metal_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_output = tract_reducer.reduce(&[axis], &a.to_host()?.into_tensor())?;
            let mut o_shape = a.shape().to_vec();
            o_shape[axis] = 1;
            let output = unsafe { DeviceTensor::uninitialized_dt(a.datum_type(), &o_shape)? };
            metal_reduce_launch(&reducer, &a, axis, &output)?;
            stream.wait_until_completed()?;
            let metal_output = output;
            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)
                .with_context(|| {
                    format!(
                        "A: {:?}, scale: {:?} Cpu: {:?}, Metal: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        scale,
                        cpu_output.dump(true),
                        metal_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_reduce_mean_of_squares() -> TractResult<()> {
        test_case::<f32>(Reducer::MeanOfSquares, TractReducer::MeanOfSquares, &[4, 4], 1, 1.0)?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[4, 4],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[1, 10],
            0,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[1, 10],
            0,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 1],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 1],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f16>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            2,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            1,
            1.0 / 100.0,
        )?;
        test_case::<f32>(
            Reducer::MeanOfSquares,
            TractReducer::MeanOfSquares,
            &[2, 2, 82, 38],
            2,
            1.0 / 100.0,
        )?;
        Ok(())
    }

    #[test]
    fn test_reduce_sum() -> TractResult<()> {
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Sum, TractReducer::Sum, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        Ok(())
    }

    #[test]
    fn test_reduce_prod() -> TractResult<()> {
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 2, 1.0 / 100000.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Prod, TractReducer::Prod, &[2, 2, 82, 38], 2, 1.0 / 1000.0)?;
        Ok(())
    }

    #[test]
    fn test_reduce_max() -> TractResult<()> {
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[1, 10], 0, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[2, 1], 1, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 1, -1.0 / 100.0)?;
        test_case::<f16>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Max, TractReducer::Max, &[2, 2, 82, 38], 2, -1.0 / 100.0)?;
        Ok(())
    }

    #[test]
    fn test_reduce_min() -> TractResult<()> {
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[4, 4], 1, 1.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[4, 4], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[1, 10], 0, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[1, 10], 0, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[2, 1], 1, 1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 1, -1.0 / 100.0)?;
        test_case::<f16>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 1, -1.0 / 100.0)?;
        test_case::<f32>(Reducer::Min, TractReducer::Min, &[2, 2, 82, 38], 2, 1.0 / 100.0)?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn reduce_prop_f32(pb in any::<ReduceProblem<f32>>()) {
            fn run(pb: ReduceProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn reduce_prop_f16(pb in any::<ReduceProblem<f16>>()) {
            fn run(pb: ReduceProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct ReduceProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        pub op: Reducer,
        pub shape: Vec<usize>,
        pub axis: usize,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for ReduceProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            let reducers = Reducer::ALL.into_iter().filter(|r| !r.is_logic()).collect_vec();
            (0..reducers.len(), 0usize..3, 0usize..3)
                .prop_flat_map(move |(op_idx, left, right)| {
                    let axis = left;
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    let op = reducers[op_idx];
                    (Just(op), vec(shape, shape_len..=shape_len), Just(axis))
                })
                .prop_map(|(op, shape, axis)| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { op, shape, axis, input }
                })
                .boxed()
        }
    }

    impl<F> ReduceProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;
            let cpu_output = match self.op {
                Reducer::Sum => TractReducer::Sum.reduce(&[self.axis], &a)?,
                Reducer::Prod => TractReducer::Prod.reduce(&[self.axis], &a)?,
                Reducer::MeanOfSquares => TractReducer::MeanOfSquares.reduce(&[self.axis], &a)?,
                Reducer::Min => TractReducer::Min.reduce(&[self.axis], &a)?,
                Reducer::Max => TractReducer::Max.reduce(&[self.axis], &a)?,
                _ => unreachable!(),
            };
            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let mut o_shape = a.shape().to_vec();
                o_shape[self.axis] = 1;
                let output = unsafe { DeviceTensor::uninitialized_dt(a.datum_type(), &o_shape)? };
                metal_reduce_launch(&self.op, &a, self.axis, &output)?;
                stream.wait_until_completed()?;
                Ok(output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/nn/rms_norm.rs
================================================
use crate::encoder::EncoderExt;
use crate::kernels::utils;
use crate::{LibraryName, MetalStream};
use metal::MTLSize;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct RmsNorm;

impl RmsNorm {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType, is_l4: bool) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal rmsop", dt);
        let tname = DeviceTensor::tname(dt)?;
        if !is_l4 {
            Ok(format!("nn_ops::rms_norm_nd3_{tname}"))
        } else {
            Ok(format!("nn_ops::rms_norm_nd2_l4_{tname}"))
        }
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        axis: usize,
        eps: &Tensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, axis, eps, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        axis: usize,
        eps: &Tensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        if (axis == (input.rank() - 1)) && (input.shape()[axis] % 4 == 0) {
            let shape = input.shape();
            let shape_nd2 = tvec![shape[..axis].iter().product::<usize>(), shape[axis]];

            let pipeline = stream
                .load_pipeline(LibraryName::NNOps, &self.kernel_name(input.datum_type(), true)?)?;

            let iter_dim = shape_nd2[1];
            let iter_dim_div_4 = iter_dim / 4;
            let outer_stride = iter_dim * input.datum_type().size_of();

            let mut nthreads = 32;
            let limit = iter_dim_div_4.min(pipeline.max_total_threads_per_threadgroup() as usize);

            while (nthreads * 2) < limit {
                nthreads *= 2;
            }
            nthreads = nthreads.min(iter_dim_div_4);
            let command_buffer = stream.command_buffer();
            command_buffer.encode(|encoder| {
                encoder.set_compute_pipeline_state(&pipeline);
                encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
                encoder.set_tensor(1, eps);
                encoder.set_metal_tensor(2, output, metal::MTLResourceUsage::Write);
                encoder.set_bytes(
                    3,
                    std::mem::size_of::<usize>() as u64,
                    &iter_dim as *const usize as *const _,
                );
                encoder.set_bytes(
                    4,
                    std::mem::size_of::<usize>() as u64,
                    &iter_dim_div_4 as *const usize as *const _,
                );
                encoder.set_bytes(
                    5,
                    std::mem::size_of::<usize>() as u64,
                    &outer_stride as *const usize as *const _,
                );
                encoder.set_threadgroup_memory_length(0, 32 * std::mem::size_of::<f32>() as u64);
                let grid_size = MTLSize { width: shape_nd2[0] as _, height: 1, depth: 1 };
                let group_size = MTLSize { width: nthreads as _, height: 1, depth: 1 };

                encoder.dispatch_thread_groups(grid_size, group_size);
            });
        } else {
            let shape_nd3 = utils::reshape_to_rank_3(input.shape(), axis);
            let strides_nd3 = Tensor::natural_strides(&shape_nd3);

            let pipeline = stream
                .load_pipeline(LibraryName::NNOps, &self.kernel_name(input.datum_type(), false)?)?;

            let iter_dim = shape_nd3[1];

            let mut nthreads = 32;
            let limit = iter_dim.min(pipeline.max_total_threads_per_threadgroup() as usize);
            while (nthreads * 2) < limit {
                nthreads *= 2;
            }

            let command_buffer = stream.command_buffer();
            command_buffer.encode(|encoder| {
                encoder.set_compute_pipeline_state(&pipeline);
                encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
                encoder.set_tensor(1, eps);
                encoder.set_metal_tensor(2, output, metal::MTLResourceUsage::Write);
                encoder.set_slice(3, &shape_nd3);
                encoder.set_slice(4, &strides_nd3);
                encoder.set_threadgroup_memory_length(0, 32 * std::mem::size_of::<f32>() as u64);
                let grid_size =
                    MTLSize { width: (shape_nd3[2] * shape_nd3[0]) as _, height: 1, depth: 1 };
                let group_size = MTLSize { width: nthreads as _, height: 1, depth: 1 };

                encoder.dispatch_thread_groups(grid_size, group_size);
            });
        }
        Ok(())
    }
}

pub fn metal_rms_norm_dispatch(
    input: &DeviceTensor,
    axis: usize,
    eps: &Tensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| RmsNorm.dispatch_eval(stream, input, axis, eps, output))
}

crate::register_metal_op!(tract_transformers::ops::rms_norm::RmsNorm, |source, node, op| {
    rule_if!(RmsNorm::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::rms_norm::GpuRmsNorm::new(
        op.axis,
        op.eps.clone(),
        "Metal",
        metal_rms_norm_dispatch,
    ))))
});

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;
    use tract_gpu::tensor::IntoDevice;

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_transformers::ops::rms_norm;

    fn test_case<F>(shape: &[usize], axis: usize, offset: f32, scale: f32) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        with_borrowed_metal_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let eps = Arc::new(tensor0(0.0001f32));
            let cpu_rms = rms_norm::RmsNorm { axis, eps: Arc::clone(&eps) };

            let cpu_output =
                cpu_rms.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let metal_output = RmsNorm.eval(stream, &a, axis, &eps)?;

            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?}, scale: {:?} Cpu: {:?}, Metal: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        scale,
                        cpu_output.dump(true),
                        metal_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_rms() -> TractResult<()> {
        test_case::<f32>(&[4, 4], 1, -8.0, 1.0 / 100.0)?;
        test_case::<f16>(&[4, 4], 1, -8.0, 1.0 / 100.0)?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn rms_prop_f32(pb in any::<RmsNormProblem<f32>>()) {
            fn run(pb: RmsNormProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn rms_prop_f16(pb in any::<RmsNormProblem<f16>>()) {
            fn run(pb: RmsNormProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct RmsNormProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub axis: usize,
        pub input: Vec<F>,
        pub eps: Arc<Tensor>,
    }

    impl<F> Arbitrary for RmsNormProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..3, 0usize..3)
                .prop_flat_map(|(left, right)| {
                    let axis = left;
                    let shape_len = usize::min(left + right, 4);
                    let iter_ax_dim = 1usize..1024;
                    let other_dim = 1usize..10;
                    (iter_ax_dim, vec(other_dim, shape_len..=shape_len), Just(axis))
                })
                .prop_map(|(iter_dim, mut shape, axis)| {
                    shape.insert(axis, iter_dim);
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, axis, input, eps: Arc::new(tensor0(0.0001f32)) }
                })
                .boxed()
        }
    }

    impl<F> RmsNormProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;

            let cpu_rms = rms_norm::RmsNorm { axis: self.axis, eps: Arc::clone(&self.eps) };

            let cpu_output = cpu_rms.eval(tvec![a.into_tvalue()])?[0].clone().into_tensor();

            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let metal_output = RmsNorm.eval(stream, &a, self.axis, &self.eps)?;
                Ok(metal_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/nn/scaled_masked_softmax.rs
================================================
use crate::encoder::EncoderExt;
use crate::kernels::utils::compute_broadcast_strides;
use crate::{LibraryName, MetalStream};
use metal::MTLSize;
use num_traits::AsPrimitive;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ScaledMaskedSoftmax;

impl ScaledMaskedSoftmax {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(
            Self::is_supported_dt(dt),
            "Unsupported dt {:?} for metal scaled masked softmaxop",
            dt
        );
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("nn_ops::scaled_masked_softmax_nd5_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        scale: &Tensor,
        mask: &DeviceTensor,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, scale, mask, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        scale: &Tensor,
        mask: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(mask);
        stream.retain_tensor(output);

        ensure!(output.shape() == input.shape());
        ensure!(input.rank() >= 2);
        ensure!(input.rank() <= 5);
        ensure!(mask.rank() == input.rank());
        ensure!(output.datum_type() == input.datum_type());
        ensure!(mask.datum_type() == input.datum_type());
        let scale = scale.cast_to::<f32>()?;

        let shape = pad(input.shape(), 1);
        let strides = pad(input.strides(), 0);
        let mask_strides =
            pad(&compute_broadcast_strides::<usize>(mask.shape(), mask.strides())?, 0);
        let out_strides = pad(output.strides(), 0);

        let inner_len = shape[4] as usize;
        let mut nth = 32usize;
        while nth < inner_len && nth < 256 {
            nth *= 2;
        }

        let tg_floats = 32 + inner_len;
        let tg_bytes = tg_floats * f32::datum_type().size_of();

        let pipeline =
            stream.load_pipeline(LibraryName::NNOps, &self.kernel_name(input.datum_type())?)?;

        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, mask, metal::MTLResourceUsage::Read);
            encoder.set_tensor(2, &scale);
            encoder.set_metal_tensor(3, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(4, &shape);
            encoder.set_slice(5, &strides);
            encoder.set_slice(6, &mask_strides);
            encoder.set_slice(7, &out_strides);
            encoder.set_threadgroup_memory_length(0, tg_bytes as _);
            let grid_size = MTLSize {
                width: shape[3] as _,
                height: shape[2] as _,
                depth: (shape[0] * shape[1]) as _,
            };
            let group_size = MTLSize { width: nth as _, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

fn pad(vals: &[impl AsPrimitive<isize>], neutral: isize) -> [isize; 5] {
    let mut it = [neutral; 5];
    for (ix, val) in vals.iter().enumerate() {
        it[ix + 5 - vals.len()] = val.as_();
    }
    it
}

pub fn metal_scaled_masked_softmax_dispatch(
    input: &DeviceTensor,
    scale: &Tensor,
    mask: &DeviceTensor,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| {
        ScaledMaskedSoftmax.dispatch_eval(stream, input, scale, mask, output)
    })
}

crate::register_metal_op!(
    tract_transformers::ops::scaled_masked_softmax::ScaledMaskedSoftmax,
    |source, node, op| {
        rule_if!(!op.post_softmax_mask);
        rule_if!(ScaledMaskedSoftmax::is_supported_dt(
            source.node_input_facts(node.id)?[0].datum_type
        ));
        Ok(Some(Box::new(tract_gpu::ops::scaled_masked_softmax::GpuScaledMaskedSoftmax::new(
            op.scale.clone(),
            "Metal",
            metal_scaled_masked_softmax_dispatch,
        ))))
    }
);

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;
    use tract_gpu::tensor::IntoDevice;

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use proptest::strategy::Strategy;
    use tract_core::internal::Tensor;
    use tract_transformers::ops::scaled_masked_softmax;

    #[test]
    fn test_scaled_masked_softmax_f32() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let m = 4;
            let n = 4;
            let scale: Arc<_> = tensor0(0.125f32).into();
            let mask = Tensor::from_shape(&[1, 1, m, n], &vec![-1000f32; m * n])?.into_device()?;

            let a = Tensor::from_shape(
                &[1, 1, m, n],
                &(0..m * n).map(|f| f as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu = scaled_masked_softmax::ScaledMaskedSoftmax {
                scale: scale.clone(),
                post_softmax_mask: false,
            };

            let cpu_output = cpu
                .eval(tvec![a.to_host()?.into_tvalue(), mask.to_host()?.into_tvalue()])?[0]
                .clone()
                .into_tensor();
            let metal_output = ScaledMaskedSoftmax.eval(stream, &a, &scale, &mask)?;
            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_scaled_masked_softmax_f32_2() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let m = 4;
            let n = 1024;
            let scale: Arc<_> = tensor0(0.125f32).into();
            let mask = Tensor::from_shape(&[1, 1, m, n], &vec![-1000f32; m * n])?.into_device()?;

            let a = Tensor::from_shape(
                &[1, 1, m, n],
                &(0..m * n).map(|f| f as f32).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu = scaled_masked_softmax::ScaledMaskedSoftmax {
                scale: scale.clone(),
                post_softmax_mask: false,
            };

            let cpu_output = cpu
                .eval(tvec![a.to_host()?.into_tvalue(), mask.to_host()?.into_tvalue()])?[0]
                .clone()
                .into_tensor();
            let metal_output = ScaledMaskedSoftmax.eval(stream, &a, &scale, &mask)?;
            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    proptest::proptest! {
        #[test]
        fn scaled_masked_softmax_prop_f32(pb in any::<ScaledMaskedSoftmaxProblem<f32>>()) {
            fn run(pb: ScaledMaskedSoftmaxProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn scaled_masked_softmax_prop_f16(pb in any::<ScaledMaskedSoftmaxProblem<f16>>()) {
            fn run(pb: ScaledMaskedSoftmaxProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct ScaledMaskedSoftmaxProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub mask_shape: Vec<usize>,
        pub input: Vec<F>,
        pub mask: Vec<F>,
    }

    impl<F> Arbitrary for ScaledMaskedSoftmaxProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            vec(1usize..10, 4..=4)
                .prop_map(|shape| {
                    let mut mask_shape = shape.clone();
                    mask_shape[0] = 1;
                    mask_shape[1] = 1;

                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();

                    let mask = (0..mask_shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, input, mask_shape, mask }
                })
                .boxed()
        }
    }

    impl<F> ScaledMaskedSoftmaxProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;
            let mask = Tensor::from_shape(self.mask_shape.as_slice(), &self.mask)?;
            let scale: Arc<_> = tensor0::<F>(0.125f32.as_()).into();

            let cpu_output =
                scaled_masked_softmax::ScaledMaskedSoftmax { scale, post_softmax_mask: false }
                    .eval(tvec![a.into_tvalue(), mask.into_tvalue()])?[0]
                    .clone()
                    .into_tensor();
            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let mask =
                    Tensor::from_shape(self.mask_shape.as_slice(), &self.mask)?.into_device()?;
                let scale: Arc<_> = tensor0::<F>(0.125f32.as_()).into();
                let metal_output = ScaledMaskedSoftmax.eval(stream, &a, &scale, &mask)?;
                Ok(metal_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/nn/silu.rs
================================================
use crate::encoder::EncoderExt;
use crate::{LibraryName, MetalStream};
use metal::MTLSize;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Silu;

impl Silu {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType, use_silu_4: bool) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal siluop", dt);
        let tname = DeviceTensor::tname(dt)?;
        if use_silu_4 {
            Ok(format!("nn_ops::silu_4_{tname}"))
        } else {
            Ok(format!("nn_ops::silu_{tname}"))
        }
    }

    pub fn eval(&self, stream: &MetalStream, input: &DeviceTensor) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let n_el = output.len();

        let use_silu_4 = (n_el % 4 == 0) && (n_el as f32 > 2f32.powi(12));
        let kernel_name = self.kernel_name(input.datum_type(), use_silu_4)?;

        let n_threads = if use_silu_4 { n_el / 4 } else { n_el };
        let pipeline = stream.load_pipeline(LibraryName::NNOps, &kernel_name)?;
        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);

            let grid_size = MTLSize { width: n_threads as _, height: 1, depth: 1 };
            let group_size = MTLSize { width: 1, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use crate::utils::with_borrowed_metal_stream;
    use tract_gpu::tensor::IntoDevice;

    use super::*;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;

    fn test_case<F>(
        shape: &[usize],
        offset: f32,
        scale: f32,
        approximate: Approximation,
    ) -> TractResult<()>
    where
        F: Float + Datum,
        usize: AsPrimitive<f32>,
        f32: AsPrimitive<F>,
    {
        with_borrowed_metal_stream(|stream| {
            let len = shape.iter().product::<usize>();

            let a = Tensor::from_shape(
                shape,
                &(0..len)
                    .map(|f| -> F {
                        let v: f32 = f.as_();
                        (v * scale + offset).as_()
                    })
                    .collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_output = tract_core::ops::nn::silu::silu()
                .eval(tvec![a.to_host()?.into_tvalue()])?[0]
                .clone()
                .into_tensor();
            let metal_output = Silu.eval(stream, &a)?;

            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), approximate)
                .with_context(|| {
                    format!(
                        "Input: {:?}, scale: {:?} Cpu: {:?}, Metal: {:?}",
                        a.to_host().and_then(|it| it.dump(true)),
                        scale,
                        cpu_output.dump(true),
                        metal_output.to_host().and_then(|it| it.dump(true))
                    )
                })?;
            Ok(())
        })
    }

    #[test]
    fn test_silu() -> TractResult<()> {
        test_case::<f32>(&[4, 4], -0.0, 1.0 / 100.0, Approximation::Approximate)?;
        test_case::<f16>(&[4, 4], -6.0, 1.0 / 1000.0, Approximation::SuperApproximate)?;
        Ok(())
    }

    proptest::proptest! {
        #[test]
        fn silu_prop_f32(pb in any::<SiluProblem<f32>>()) {
            fn run(pb: SiluProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn silu_prop_f16(pb in any::<SiluProblem<f16>>()) {
            fn run(pb: SiluProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct SiluProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for SiluProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..3, 0usize..3)
                .prop_flat_map(|(left, right)| {
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    vec(shape, shape_len..=shape_len)
                })
                .prop_map(|shape| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, input }
                })
                .boxed()
        }
    }

    impl<F> SiluProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
        f32: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;
            let silu = tract_core::ops::nn::silu::silu();
            let cpu_output = silu.eval(tvec![a.into_tvalue()])?[0].clone().into_tensor();

            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let metal_output = Silu.eval(stream, &a)?;
                Ok(metal_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/nn/softmax.rs
================================================
use crate::encoder::EncoderExt;
use crate::kernels::utils;
use crate::{LibraryName, MetalStream};
use metal::MTLSize;
use tract_core::internal::*;
use tract_gpu::tensor::DeviceTensor;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Softmax;

impl Softmax {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }

    pub fn kernel_name(&self, dt: DatumType) -> TractResult<String> {
        ensure!(Self::is_supported_dt(dt), "Unsupported dt {:?} for metal softmaxop", dt);
        let tname = DeviceTensor::tname(dt)?;
        Ok(format!("nn_ops::softmax_nd3_{tname}"))
    }

    pub fn eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        axis: usize,
    ) -> TractResult<DeviceTensor> {
        let output = unsafe { DeviceTensor::uninitialized_dt(input.datum_type(), input.shape())? };
        self.dispatch_eval(stream, input, axis, &output)?;
        stream.wait_until_completed()?;
        Ok(output)
    }

    pub fn dispatch_eval(
        &self,
        stream: &MetalStream,
        input: &DeviceTensor,
        axis: usize,
        output: &DeviceTensor,
    ) -> TractResult<()> {
        stream.retain_tensor(input);
        stream.retain_tensor(output);

        ensure!(output.shape() == input.shape());
        ensure!(output.datum_type() == input.datum_type());

        let shape_nd3 = utils::reshape_to_rank_3(input.shape(), axis);
        let strides_nd3 = Tensor::natural_strides(&shape_nd3);

        let pipeline =
            stream.load_pipeline(LibraryName::NNOps, &self.kernel_name(input.datum_type())?)?;

        let command_buffer = stream.command_buffer();
        command_buffer.encode(|encoder| {
            encoder.set_compute_pipeline_state(&pipeline);
            encoder.set_metal_tensor(0, input, metal::MTLResourceUsage::Read);
            encoder.set_metal_tensor(1, output, metal::MTLResourceUsage::Write);
            encoder.set_slice(2, &shape_nd3);
            encoder.set_slice(3, &strides_nd3);

            let grid_size =
                MTLSize { width: shape_nd3[2] as _, height: 1, depth: shape_nd3[0] as _ };
            let group_size =
                MTLSize { width: usize::min(32, shape_nd3[1]) as _, height: 1, depth: 1 };
            encoder.dispatch_thread_groups(grid_size, group_size);
        });
        Ok(())
    }
}

pub fn metal_softmax_dispatch(
    input: &DeviceTensor,
    axis: usize,
    output: &DeviceTensor,
) -> TractResult<()> {
    crate::with_metal_stream(|stream| Softmax.dispatch_eval(stream, input, axis, output))
}

crate::register_metal_op!(tract_core::ops::nn::Softmax, |source, node, op| {
    rule_if!(Softmax::is_supported_dt(source.node_input_facts(node.id)?[0].datum_type));
    Ok(Some(Box::new(tract_gpu::ops::softmax::GpuSoftmax::from_tract_core(
        op,
        "Metal",
        metal_softmax_dispatch,
    )?)))
});

#[cfg(test)]
mod tests {
    use super::*;
    use crate::utils::with_borrowed_metal_stream;
    use derive_new::new;
    use num_traits::AsPrimitive;
    use num_traits::Float;
    use proptest::collection::vec;
    use proptest::prelude::*;
    use tract_core::internal::Tensor;
    use tract_core::ops::nn::Softmax as TractSoftmax;
    use tract_core::ops::nn::{SoftmaxExp, SoftmaxKind};
    use tract_gpu::tensor::IntoDevice;

    #[test]
    fn test_softmax_f32() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let m = 4;
            let k = 4;
            let axis = 1;

            let a = Tensor::from_shape(&[m, k], &(0..m * k).map(|f| f as f32).collect::<Vec<_>>())?
                .into_device()?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };

            let cpu_output =
                cpu_softmax.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let metal_output = Softmax.eval(stream, &a, axis)?;
            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_softmax_f32_2() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let shape = [8, 4, 3];
            let num_elements = shape.iter().product();
            let axis = 0;

            let a = Tensor::from_shape(
                &shape,
                &(0..num_elements).map(|f| f as f32 / 1000.0).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };

            let cpu_output =
                cpu_softmax.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let metal_output = Softmax.eval(stream, &a, axis)?;
            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    #[test]
    fn test_softmax_f16() -> TractResult<()> {
        with_borrowed_metal_stream(|stream| {
            let m = 4;
            let k = 4;
            let axis = 1;

            let a = Tensor::from_shape(
                &[m, k],
                &(0..m * k).map(|f| -> f16 { f.as_() }).collect::<Vec<_>>(),
            )?
            .into_device()?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };

            let cpu_output =
                cpu_softmax.eval(tvec![a.to_host()?.into_tvalue()])?[0].clone().into_tensor();
            let metal_output = Softmax.eval(stream, &a, axis)?;
            cpu_output
                .close_enough(&metal_output.to_host()?.into_tensor(), Approximation::Approximate)?;
            Ok(())
        })
    }

    proptest::proptest! {
        #[test]
        fn softmax_prop_f32(pb in any::<SoftmaxProblem<f32>>()) {
            fn run(pb: SoftmaxProblem<f32>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }
            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }

        #[test]
        fn softmax_prop_f16(pb in any::<SoftmaxProblem<f16>>()) {
            fn run(pb: SoftmaxProblem<f16>) -> TractResult<()> {
                let out = pb.run()?;
                let reference = pb.reference()?;

                out.close_enough(&reference, Approximation::Approximate)
                   .with_context(|| format!("Cpu: {:?}, Metal: {:?}", reference.dump(true), out.dump(true)))
            }

            run(pb).map_err(|e| TestCaseError::Fail(format!("{:?}", e).into()))?;
        }
    }

    #[derive(Debug, new)]
    pub struct SoftmaxProblem<F: Datum + Float>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        pub shape: Vec<usize>,
        pub axis: usize,
        pub input: Vec<F>,
    }

    impl<F> Arbitrary for SoftmaxProblem<F>
    where
        F: Datum + Float,
        usize: AsPrimitive<F>,
    {
        type Parameters = ();
        type Strategy = BoxedStrategy<Self>;

        fn arbitrary_with(_: ()) -> Self::Strategy {
            (0usize..3, 0usize..3)
                .prop_flat_map(|(left, right)| {
                    let axis = left;
                    let shape_len = usize::min(left + right + 1, 4);
                    let shape = 1usize..10;
                    (vec(shape, shape_len..=shape_len), Just(axis))
                })
                .prop_map(|(shape, axis)| {
                    let input = (0..shape.iter().product::<usize>())
                        .map(|f| f.as_() / 1000.as_())
                        .collect::<Vec<_>>();
                    Self { shape, axis, input }
                })
                .boxed()
        }
    }

    impl<F> SoftmaxProblem<F>
    where
        F: Datum + Float + std::ops::AddAssign,
        usize: AsPrimitive<F>,
    {
        pub fn reference(&self) -> TractResult<Tensor> {
            let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?;

            let cpu_softmax = TractSoftmax {
                axes: tvec![self.axis],
                quant_output_dt: None,
                kind: SoftmaxKind::Softmax(SoftmaxExp::Libc),
            };
            let cpu_output = cpu_softmax.eval(tvec![a.into_tvalue()])?[0].clone().into_tensor();
            Ok(cpu_output)
        }

        pub fn run(&self) -> TractResult<Tensor> {
            with_borrowed_metal_stream(|stream| {
                let a = Tensor::from_shape(self.shape.as_slice(), &self.input)?.into_device()?;
                let metal_output = Softmax.eval(stream, &a, self.axis)?;
                Ok(metal_output.to_host()?.into_tensor())
            })
        }
    }
}


================================================
FILE: metal/src/kernels/utils.rs
================================================
use metal::{ComputePipelineState, MTLSize};
use tract_core::internal::TractResult;

pub fn build_metal_size_for_shape(shape: &[usize]) -> MTLSize {
    match shape.len() {
        0 => panic!("Unexpected empty shape while build grid size"),
        1 => MTLSize { width: shape[0] as _, height: 1, depth: 1 },
        2 => MTLSize { width: shape[1] as _, height: shape[0] as _, depth: 1 },
        3.. => MTLSize {
            width: shape[shape.len() - 1] as _,
            height: shape[shape.len() - 2] as _,
            depth: (shape[..shape.len() - 2].iter().product::<usize>()) as _,
        },
    }
}

pub fn build_metal_grid_and_groups_for_el_wise_op(
    shape: &[usize],
    max_thread: usize,
) -> (MTLSize, MTLSize) {
    let grid_size = match shape.len() {
        0 => panic!("Unexpected empty shape while build grid size"),
        1 => MTLSize { width: 1, height: 1, depth: 1 },
        2 => MTLSize { width: shape[0] as _, height: 1, depth: 1 },
        3 => MTLSize { width: shape[1] as _, height: shape[0] as _, depth: 1 },
        4.. => MTLSize {
            width: shape[shape.len() - 2] as _,
            height: shape[shape.len() - 3] as _,
            depth: (shape[..shape.len() - 3].iter().product::<usize>()) as _,
        },
    };

    (grid_size, MTLSize { width: shape[shape.len() - 1].min(max_thread) as _, height: 1, depth: 1 })
}

pub fn build_metal_size_with_ones() -> MTLSize {
    MTLSize { width: 1, height: 1, depth: 1 }
}

pub use tract_gpu::utils::{compute_broadcast_strides, reshape_to_rank_2, reshape_to_rank_3};


================================================
FILE: metal/src/lib.rs
================================================
mod command_buffer;
mod context;
mod encoder;
mod func_constants;
pub mod kernels;
pub mod ops;
mod rewrite_rules;
mod tensor;
mod tests;
mod transform;
mod utils;

use tract_core::internal::*;
use tract_core::transform::ModelTransform;

use crate::func_constants::{ConstantValues, Value};
use crate::kernels::LibraryName;
pub use crate::kernels::matmul::MetalGemmImplKind;

pub use crate::context::{MetalContext, MetalStream, with_metal_stream};
pub use crate::transform::MetalTransform;

#[derive(Debug)]
struct MetalRuntime;

impl Runtime for MetalRuntime {
    fn name(&self) -> StaticName {
        "metal".into()
    }

    fn prepare_with_options(
        &self,
        mut model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        MetalTransform::default().transform(&mut model)?;
        model = model.into_optimized()?;

        let options = RunOptions { skip_order_opt_ram: true, ..options.clone() };
        let mut runnable = TypedSimplePlan::build(model, &options)?;
        if let Some(hints) = options.memory_sizing_hints {
            let session_handler =
                tract_gpu::session_handler::DeviceSessionHandler::from_plan(&runnable, &hints)
                    .context("While sizing memory arena. Missing hint ?")?;
            runnable = runnable.with_session_handler(session_handler);
        }

        Ok(Box::new(Arc::new(runnable)))
    }

    fn check(&self) -> TractResult<()> {
        Ok(())
    }
}

register_runtime!(MetalRuntime = MetalRuntime);


================================================
FILE: metal/src/ops/conv.rs
================================================
use crate::kernels::conv::metal_conv_dispatch;
use tract_core::internal::*;
use tract_core::ops::cnn::Conv;
use tract_gpu::ops::change_axes::GpuAxisOp;
use tract_gpu::tensor::DeviceTensorExt;

pub fn wire_metal_conv(
    source: &TypedModel,
    node: &TypedNode,
    target: &mut TypedModel,
    inputs: &[OutletId],
    op: &Conv,
) -> TractResult<TVec<OutletId>> {
    let facts = source.node_input_facts(node.id)?;
    let data_shape = op.pool_spec.data_format.shape(&facts[0].shape)?;
    let prefix = &node.name;
    let bias = &facts[2];
    let need_bias = !(bias.konst.is_some() && bias.konst.as_ref().unwrap().is_all_zero()?);
    let conv_name = format!("{prefix}.conv");
    let mut conv_wire = target.wire_node(
        if need_bias { &conv_name } else { &node.name },
        MetalConv { op: op.clone() },
        &inputs[0..2],
    )?[0];
    if need_bias {
        let mut needed_shape = tvec![1.to_dim(); node.outputs[0].fact.rank()];
        needed_shape[data_shape.c_axis()] = op.pool_spec.output_channels.to_dim();
        let reshaped = target.wire_node(
            format!("{prefix}.bias_reshaped"),
            GpuAxisOp::new(AxisOp::Reshape(0, bias.shape.to_tvec(), needed_shape)),
            &[inputs[2]],
        )?[0];
        conv_wire = target.wire_node(
            prefix,
            crate::kernels::bin_ops::metal_bin_op(Box::new(tract_core::ops::math::Add)),
            &[conv_wire, reshaped],
        )?[0];
    }
    Ok(tvec!(conv_wire))
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MetalConv {
    pub op: Conv,
}

impl Op for MetalConv {
    fn name(&self) -> StaticName {
        "MetalConv".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        self.op.info()
    }

    op_as_typed_op!();
}

impl EvalOp for MetalConv {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let inputs =
            inputs.iter().map(|it| it.to_device_tensor()).collect::<TractResult<TVec<_>>>()?;
        let output_shape = self.op.pool_spec.output_shape(inputs[0].shape())?;
        let output = tract_gpu::session_handler::make_tensor_for_node(
            session,
            node_id,
            inputs[0].datum_type(),
            &output_shape.shape,
        )?;

        if output.len() > 0 {
            crate::with_metal_stream(|stream| {
                metal_conv_dispatch(
                    stream,
                    &self.op,
                    inputs[0],
                    inputs[1],
                    inputs.get(2).cloned(),
                    &output,
                )
            })?;
        }
        Ok(tvec!(output.into_tensor().into_tvalue()))
    }
}

impl TypedOp for MetalConv {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |facts| {
            let zero = facts[0].datum_type.scalar_fact();
            let mut facts: TVec<&TypedFact> = facts.into();
            if facts.len() == 2 {
                facts.push(&zero);
            }
            self.op.output_facts(&facts)
        })
        .with_context(|| "Error while computing facts for MetalConv")
    }
}


================================================
FILE: metal/src/ops/fused_axis_op.rs
================================================
use derive_new::new;
use tract_core::internal::tract_smallvec::ToSmallVec;
use tract_core::internal::*;
use tract_core::ops::OpStateFreeze;
use tract_gpu::ops::change_axes::GpuAxisOp;
use tract_gpu::tensor::{DeviceTensor, DeviceTensorExt};

#[derive(Clone, Debug, new, PartialEq, Eq)]
pub struct MetalFusedAxisOp {
    /// List of axis ops to apply for each op inputs
    /// Length of the list is equal to number of inputs
    pub grouped_axis_ops: TVec<TVec<GpuAxisOp>>,
    pub op: Box<dyn TypedOp>,
}

#[derive(Debug, Clone, new)]
pub struct MetalFusedAxisOpState {
    pub op_state: Box<dyn OpState>,
}

fn compute_reshaped_inputs(
    inputs: TVec<TValue>,
    grouped_axis_ops: &TVec<TVec<GpuAxisOp>>,
    session: &TurnState,
) -> TractResult<TVec<TValue>> {
    // Apply Axis Ops per input

    inputs
        .into_iter()
        .zip(grouped_axis_ops.iter())
        .map(|(input, axis_ops)| {
            if axis_ops.is_empty() {
                return Ok(input);
            };
            let m_input = input.to_device_tensor()?;
            let reshaped_input = axis_ops.iter().try_fold(
                m_input.clone(),
                |t, axis_op| -> TractResult<DeviceTensor> {
                    let new_shape = match &axis_op.inner {
                        AxisOp::Reshape(skip, from, to) => {
                            let from =
                                from.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                            let to = to.iter().map(|d| d.eval(&session.resolved_symbols)).collect();
                            let mut shape: TVec<usize> = t.shape().into();
                            AxisOp::Reshape(*skip, from, to)
                                .change_shape_array(&mut shape, false)?;
                            shape
                        }
                        AxisOp::Add(_) | AxisOp::Rm(_) | AxisOp::Move(..) => {
                            let mut shape: TVec<usize> = t.shape().into();
                            axis_op.inner.change_shape_array(&mut shape, false)?;
                            shape
                        }
                    };
                    if let AxisOp::Move(from, to) = axis_op.inner {
                        let mut out_strides: TVec<isize> = t.strides().to_smallvec();
                        let removed_stride = out_strides.remove(from);
                        out_strides.insert(to, removed_stride);
                        let tmp_t = t.reshaped(new_shape)?;
                        tmp_t.restrided(out_strides)
                    } else {
                        t.reshaped(new_shape)
                    }
                },
            )?;

            Ok(reshaped_input.into_tensor().into())
        })
        .collect::<TractResult<TVec<_>>>()
}

impl OpState for MetalFusedAxisOpState {
    fn init_tensor_fact(&self) -> Option<(String, TypedFact)> {
        self.op_state.init_tensor_fact()
    }

    fn load_from(
        &mut self,
        session: &mut TurnState,
        states: &mut dyn Iterator<Item = tract_core::value::TValue>,
    ) -> TractResult<()> {
        self.op_state.load_from(session, states)
    }

    fn save_to(&self, states: &mut Vec<TValue>) -> TractResult<()> {
        self.op_state.save_to(states)
    }

    fn resolve_symbols(&mut self, session: &mut TurnState) -> TractResult<()> {
        self.op_state.resolve_symbols(session)
    }

    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let fused_axis_op = op.downcast_ref::<MetalFusedAxisOp>().unwrap();
        let inputs = compute_reshaped_inputs(inputs, &fused_axis_op.grouped_axis_ops, session)?;
        // Runner inner op
        self.op_state.eval(session, fused_axis_op.op.as_op(), inputs)
    }
}

#[derive(Debug, Clone)]
pub struct FrozenMetalFusedAxisOpState {
    pub op_state: Box<dyn FrozenOpState>,
}

impl OpStateFreeze for MetalFusedAxisOpState {
    fn freeze(&self) -> Box<dyn FrozenOpState + 'static> {
        Box::new(FrozenMetalFusedAxisOpState { op_state: self.op_state.freeze() })
    }
}

impl FrozenOpState for FrozenMetalFusedAxisOpState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(MetalFusedAxisOpState { op_state: self.op_state.unfreeze() })
    }
}

impl Op for MetalFusedAxisOp {
    fn name(&self) -> StaticName {
        self.op.name()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        let mut info = self.op.info()?;
        for (idx, axis_ops) in self.grouped_axis_ops.iter().enumerate() {
            if !axis_ops.is_empty() {
                info.push(format!(
                    "Fused axis Op on Input #{idx}: {}",
                    axis_ops
                        .iter()
                        .map(|axis_op| Ok(format!(
                            "{} - {}",
                            axis_op.name(),
                            axis_op.info()?.join(" | ")
                        )))
                        .collect::<TractResult<TVec<_>>>()?
                        .join(" | ")
                ));
            }
        }
        Ok(info)
    }

    op_as_typed_op!();
}

impl EvalOp for MetalFusedAxisOp {
    fn is_stateless(&self) -> bool {
        self.op.is_stateless()
    }

    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        if let Some(state) = self.op.state(session, node_id)? {
            Ok(Some(Box::new(MetalFusedAxisOpState { op_state: state })))
        } else {
            Ok(None)
        }
    }
    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let inputs = compute_reshaped_inputs(inputs, &self.grouped_axis_ops, session)?;
        // Runner inner op
        self.op.eval_with_session(node_id, session, inputs)
    }
}

impl TypedOp for MetalFusedAxisOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(
            inputs.len() == self.grouped_axis_ops.len(),
            "Number of inputs and fused axis ops are not aligned"
        );
        // Apply AxisOp
        let inputs = inputs
            .iter()
            .zip(self.grouped_axis_ops.iter())
            .map(|(i, axis_ops)| {
                axis_ops.iter().try_fold((*i).clone(), |reshaped_i, axis_op| {
                    Ok(axis_op.output_facts(&[&reshaped_i])?[0].clone())
                })
            })
            .collect::<TractResult<TVec<_>>>()?;

        let inputs_ref = inputs.iter().collect::<TVec<_>>();
        // Apply Op
        self.op.output_facts(&inputs_ref)
    }

    as_op!();
}


================================================
FILE: metal/src/ops/gemm.rs
================================================
use crate::kernels::matmul::{GemmImpl, GemmKernel};

use anyhow::{bail, ensure};
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::Q4_0;
use tract_gpu::tensor::DeviceTensorExt;
use tract_gpu::utils::as_quant_fact;

#[derive(Debug, Default, Clone, Hash, PartialEq, Eq)]
pub struct MetalGemm<K: GemmKernel> {
    pub kernel: GemmImpl<K>,
}

impl<K: GemmKernel> MetalGemm<K> {
    pub fn new(transpose_a: bool, transpose_b: bool) -> Self {
        Self { kernel: GemmImpl::<K>::new(transpose_a, transpose_b) }
    }
}

impl<K: GemmKernel + 'static> Op for MetalGemm<K> {
    fn name(&self) -> StaticName {
        format!("Metal{}", self.kernel).into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![
            format!("transpose_a: {} transpose_b: {}", self.transpose_a(), self.transpose_b(),),
        ])
    }

    op_as_typed_op!();
}

impl<K: GemmKernel> MetalGemm<K> {
    fn transpose_a(&self) -> bool {
        self.kernel.transpose_a
    }

    fn transpose_b(&self) -> bool {
        self.kernel.transpose_b
    }

    fn resolve_output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let [a, b] = inputs else {
            bail!("Expects 2 inputs");
        };

        if a.is_plain() && b.is_plain() && a.datum_type.is_number() && b.datum_type.is_number() {
            ensure!(a.rank() == b.rank());
            ensure!(a.rank() >= 2);
            ensure!(
                a.shape[a.rank() - 2 + !self.transpose_a() as usize]
                    == b.shape[b.rank() - 2 + self.transpose_b() as usize]
            );
            let out_shape = self.kernel.output_shape(&a.shape, &b.shape);
            Ok(self.kernel.output_facts(&out_shape, a.datum_type, b.datum_type)?)
        } else if as_quant_fact(inputs[0], &Q4_0).is_some()
            || as_quant_fact(inputs[1], &Q4_0).is_some()
        {
            // Exotic tensors now carry their full logical shape directly on the
            // fact, so no need to chain with BlockQuantFact.shape().
            let out_shape = self.kernel.output_shape(&a.shape, &b.shape);
            Ok(self.kernel.output_facts(&out_shape, a.datum_type, b.datum_type)?)
        } else {
            todo!()
        }
    }
}
impl<K: GemmKernel + 'static> EvalOp for MetalGemm<K> {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval_with_session(
        &self,
        node_id: usize,
        session: &TurnState,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let (a_raw, b_raw) = args_2!(inputs);
        let a = a_raw
            .to_device_tensor()
            .with_context(|| format!("A tensor is not a metal tensor: {:?}", a_raw))?;
        let b = b_raw
            .to_device_tensor()
            .with_context(|| format!("B tensor is not a metal tensor {:?}", b_raw))?;

        // For q40 weights the tensor shape already carries the full logical
        // dimensions [batch, n, k].  No need to chain with the fact shape.
        let b_shape = b.shape().to_vec();

        let c_dt = self.kernel.matmul.output_dt(a.datum_type(), b.datum_type())?;
        let c_shape = self.kernel.output_shape(a.shape(), &b_shape);
        let c = tract_gpu::session_handler::make_tensor_for_node(session, node_id, c_dt, &c_shape)?;

        crate::with_metal_stream(|stream| self.kernel.dispatch_eval(stream, a, b, &c))?;

        Ok(tvec![c.into_tensor().into_tvalue()])
    }
}

impl<K: GemmKernel + 'static> TypedOp for MetalGemm<K> {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        tract_gpu::utils::facts_to_device_facts(inputs, |input_facts| {
            self.resolve_output_facts(input_facts)
        })
        .with_context(|| format!("Error while computing output facts for {}", self.name()))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        tract_gpu::utils::get_device_facts(inputs, |input_facts| {
            let fma = self.resolve_output_facts(input_facts)?[0].shape.iter().product::<TDim>()
                * input_facts[0].shape.last().unwrap();
            if input_facts[0].datum_type == f16::datum_type() {
                Ok(tvec!((Cost::FMA(f16::datum_type()), fma)))
            } else {
                Ok(tvec!((Cost::FMA(f32::datum_type()), fma)))
            }
        })
        .with_context(|| format!("Error while computing cost for {:?}", self.name()))
    }

    as_op!();
}


================================================
FILE: metal/src/ops/mod.rs
================================================
pub mod conv;
pub mod fused_axis_op;
pub mod gemm;

pub use fused_axis_op::MetalFusedAxisOp;
pub use gemm::MetalGemm;


================================================
FILE: metal/src/rewrite_rules/add_matmul_broadcast.rs
================================================
use crate::{MetalGemmImplKind, MetalTransform};
use tract_core::internal::*;
use tract_core::ops::array::MultiBroadcastTo;
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_gpu::rule_ensure;

pub fn add_broadcast_pre_matmul(
    ctx: &MetalTransform,
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    let in_facts = model.node_input_facts(node.id)?;
    // GGML supports broadcast
    rule_ensure!(in_facts[0].rank() > 2);
    rule_ensure!(
        !(ctx.gemm_impl == Some(MetalGemmImplKind::Ggml)
            || (ctx.gemm_impl.is_none() && in_facts[0].datum_type == DatumType::F32))
    );

    // Detect broadcast
    let a_shape = &in_facts[0].shape;
    let b_shape = &in_facts[1].shape;
    let a_rank = a_shape.rank();

    let a_batch = &a_shape[..a_rank - 2];
    let b_batch = &b_shape[..a_rank - 2];

    // Remove from batch_dim array all symbolic dimensions also present in the other batch_dim array
    // Symbolic Dimensions will be considered as 1 in gcd() so this allows identifying a
    // symbolic broadcast factor.
    let a_batch_dims: Vec<_> = a_batch
        .iter()
        .filter(|tdim| !matches!(tdim, TDim::Sym(_)) || b_batch.contains(tdim))
        .cloned()
        .collect();

    let b_batch_dims: Vec<_> = b_batch
        .iter()
        .filter(|tdim| !matches!(tdim, TDim::Sym(_)) || a_batch.contains(tdim))
        .cloned()
        .collect();

    let symb_in_a = a_batch_dims != a_batch;
    let symb_in_b = b_batch_dims != b_batch;

    let a_batch_size = a_batch_dims.iter().product::<TDim>().gcd();
    let b_batch_size = b_batch_dims.iter().product::<TDim>().gcd();

    let (activ_slot, weight_slot) = if (a_batch_size % b_batch_size == 0)
        && ((a_batch_size != b_batch_size) || symb_in_a)
    {
        (0, 1)
    } else if (b_batch_size % a_batch_size == 0) && ((a_batch_size != b_batch_size) || symb_in_b) {
        (1, 0)
    } else {
        return Ok(None);
    };

    let mut patch = TypedModelPatch::default();
    let activ = patch.tap_model(model, node.inputs[activ_slot])?;
    let weights = patch.tap_model(model, node.inputs[weight_slot])?;
    let brd_shape = ShapeFact::from_dims(
        [
            in_facts[activ_slot].shape.dims()[..a_rank - 2].to_vec(),
            in_facts[weight_slot].shape.dims()[a_rank - 2..].to_vec(),
        ]
        .concat(),
    );
    let brd = MultiBroadcastTo { shape: brd_shape };

    let brd_out = patch.wire_node(format!("{node_name}.broadcast"), brd, &[weights])?[0];

    let inputs = if activ_slot == 1 { [brd_out, activ] } else { [activ, brd_out] };
    let mm_out = patch.wire_node(node_name, *op, &inputs)?[0];

    patch.shunt_outside(model, node.id.into(), mm_out)?;

    Ok(Some(patch))
}


================================================
FILE: metal/src/rewrite_rules/fuse_axis_op.rs
================================================
use crate::ops::MetalFusedAxisOp;
use tract_core::internal::*;
use tract_core::tract_data::itertools::Itertools;
use tract_gpu::fact::DeviceTypedFactExt;
use tract_gpu::ops::change_axes::GpuAxisOp;
use tract_gpu::rule_ensure;

fn is_supported_axis_op(op: &GpuAxisOp) -> bool {
    matches!(op.inner, AxisOp::Add(_) | AxisOp::Rm(_) | AxisOp::Reshape(..))
}

fn can_fuse_move(model: &TypedModel, axis_node: &TypedNode) -> bool {
    model.single_succ(axis_node.id).unwrap().is_some_and(|node| {
        node.op_is::<tract_gpu::ops::concat::GpuConcat>()
            || node.op_is::<tract_gpu::ops::apply_rope::GpuApplyRope>()
            || node.op_is::<tract_gpu::ops::scaled_masked_softmax::GpuScaledMaskedSoftmax>()
            || node.op_is::<tract_gpu::ops::slice::GpuSlice>()
            || node.op_is::<tract_gpu::ops::broadcast::GpuMultiBroadcastTo>()
            || node.op_is::<tract_gpu::ops::dyn_kv_cache::GpuDynKVCache>()
    })
}

pub fn collect_chain_of_axis_ops<'a>(
    model: &'a TypedModel,
    mut cursor: &'a TypedNode,
) -> TractResult<Option<(TVec<GpuAxisOp>, &'a TypedNode)>> {
    let mut acc_axis_ops = tvec![];
    let mut head_of_chain = cursor;

    while let Some(axis_op) = cursor.op_as::<GpuAxisOp>().filter(|o| {
        is_supported_axis_op(o)
            || (matches!(o.inner, AxisOp::Move(..)) && can_fuse_move(model, cursor))
    }) {
        acc_axis_ops.push(axis_op.clone());
        head_of_chain = cursor;

        if let Some(prev) = model.single_prec(cursor.id)? {
            cursor = prev;
        } else {
            break;
        }
    }

    Ok(if acc_axis_ops.is_empty() {
        None
    } else {
        Some((acc_axis_ops.into_iter().rev().collect(), head_of_chain))
    })
}

fn split_succs(
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_node_name: &str,
    axis_op: &GpuAxisOp,
) -> TractResult<Option<TypedModelPatch>> {
    let succs = model.all_succ(axis_node.id)?.context("Expected node with successors")?;

    let mut patch = TypedModelPatch::default();
    let input = patch.tap_model(model, axis_node.inputs[0])?;

    for (i, succ) in succs.iter().enumerate() {
        let axis_out =
            patch.wire_node(format!("{axis_node_name}.{i}"), axis_op.clone(), &[input])?[0];

        let mut op_ins = patch.taps(model, &succ.inputs)?;

        let (idx, _) = succ
            .inputs
            .iter()
            .enumerate()
            .find(|(_, inlet)| inlet.node == axis_node.id)
            .context("Axis node not found in its successor inputs")?;

        op_ins[idx] = axis_out;

        let op_outs = patch.wire_node(succ.name.clone(), succ.op.clone(), &op_ins)?;
        for out in op_outs {
            patch.shunt_outside(model, succ.id.into(), out)?;
        }
    }

    Ok(Some(patch))
}

pub fn fuse_axis_op(
    _ctx: &(),
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_node_name: &str,
    axis_op: &GpuAxisOp,
) -> TractResult<Option<TypedModelPatch>> {
    // Only support certain axis ops (or a Move, which is handled specially below)
    rule_ensure!(is_supported_axis_op(axis_op) || matches!(axis_op.inner, AxisOp::Move(..)));

    let Some(node) = model.single_succ(axis_node.id)? else {
        return split_succs(model, axis_node, axis_node_name, axis_op);
    };

    // Disallow fusing when the successor is already an axis/fused op or a sync,
    // *unless* it's a Move AxisOp (we allow that via the early-quit branch).
    let is_axis_like = node.op_is::<GpuAxisOp>() || node.op_is::<MetalFusedAxisOp>();
    let is_allowed_move =
        node.op_as::<GpuAxisOp>().is_some_and(|op| matches!(op.inner, AxisOp::Move(..)));

    rule_ensure!(!is_axis_like || is_allowed_move);

    let node_name = &node.name;

    let Some(in_nodes) = model.all_prec(node.id)? else {
        return Ok(None);
    };

    let mut grouped_axis_ops: TVec<TVec<GpuAxisOp>> = tvec![];
    let mut tap_inputs = tvec![];
    let mut patch = TypedModelPatch::default();

    for (in_idx, in_node) in in_nodes.into_iter().enumerate() {
        match collect_chain_of_axis_ops(model, in_node)? {
            Some((acc_axis_ops, head_of_chain)) => {
                grouped_axis_ops.push(acc_axis_ops);
                tap_inputs.push(patch.tap_model(model, head_of_chain.inputs[0])?);
            }
            None => {
                grouped_axis_ops.push(tvec![]);
                tap_inputs.push(patch.tap_model(model, node.inputs[in_idx])?);
            }
        }
    }

    // If the successor is a Move, we may fuse it now or defer.
    if let Some(op) = node.op_as::<GpuAxisOp>() {
        if matches!(op.inner, AxisOp::Move(..)) {
            let should_defer_move = !grouped_axis_ops[0].is_empty() && !can_fuse_move(model, node);
            if should_defer_move {
                let out = patch.wire_node(
                    format!("{node_name}.fused_axis_op"),
                    MetalFusedAxisOp { grouped_axis_ops, op: Box::new(op.clone()) },
                    &tap_inputs,
                )?;
                patch.shunt_outside(model, node.id.into(), out[0])?;
                return Ok(Some(patch));
            } else {
                // Nothing to do right now; we’ll fuse on a later pass.
                return Ok(None);
            }
        }
    }

    // General case: fuse using the successor's op.
    let out = patch.wire_node(
        format!("{node_name}.fused_axis_op"),
        MetalFusedAxisOp { grouped_axis_ops, op: node.op.clone() },
        &tap_inputs,
    )?;
    patch.shunt_outside(model, node.id.into(), out[0])?;
    Ok(Some(patch))
}

pub fn fuse_move_axis(
    _ctx: &(),
    model: &TypedModel,
    axis_node: &TypedNode,
    axis_node_name: &str,
    axis_op: &GpuAxisOp,
) -> TractResult<Option<TypedModelPatch>> {
    rule_ensure!(matches!(axis_op.inner, AxisOp::Move(..)));

    let in_fact = model.node_input_facts(axis_node.id)?[0];
    let in_shape =
        in_fact.as_device_fact().map(|mf| mf.shape.clone()).unwrap_or(in_fact.shape.clone());

    let out_fact = model.node_output_facts(axis_node.id)?[0];
    let out_shape =
        out_fact.as_device_fact().map(|mf| mf.shape.clone()).unwrap_or(out_fact.shape.clone());

    // Checks if MoveAxis has no impact on shape + layout
    if in_shape == out_shape {
        if let (Some(in_strides), AxisOp::Move(from, to)) =
            (in_shape.as_concrete().map(Tensor::natural_strides), axis_op.inner.clone())
        {
            let mut out_strides = in_strides.clone();
            let remove_stride = out_strides.remove(from);
            out_strides.insert(to, remove_stride);
            if in_strides == out_strides {
                return TypedModelPatch::shunt_one_op(model, axis_node);
            }
        }
    }

    // Reshape are always fusable. Change Move by Reshape if possible
    let simpl_op = GpuAxisOp::simplify_axis_op(axis_op.inner.clone(), in_shape.dims());
    if simpl_op != *axis_op {
        return Ok(Some(TypedModelPatch::replace_single_op(
            model,
            axis_node,
            &[axis_node.inputs[0]],
            simpl_op,
        )?));
    }

    // Fuse consecutive MoveAxis if possible
    let Some(cursor) = model.single_succ(axis_node.id)? else { return Ok(None) };
    if let (AxisOp::Move(from_1, to_1), AxisOp::Move(from_2, to_2)) = (
        axis_op.inner.clone(),
        cursor.op_as::<GpuAxisOp>().map(|ax_op| ax_op.inner.clone()).unwrap_or(AxisOp::Add(0)),
    ) {
        let max_rank = [from_1, from_2, to_1, to_2].iter().max().unwrap() + 1;
        let mut perm: TVec<usize> = (0..max_rank).collect_vec().into();

        AxisOp::Move(from_1, to_1).change_shape_array(&mut perm, false)?;
        AxisOp::Move(from_2, to_2).change_shape_array(&mut perm, false)?;
        let new_axis_ops = perm_to_ops(&perm);
        if new_axis_ops.len() == 1 {
            let mut patch = TypedModelPatch::default();
            let inputs = patch.taps(model, &axis_node.inputs)?;
            let out = patch.wire_node(
                format!("{axis_node_name}.fused_move_axis"),
                GpuAxisOp::new(new_axis_ops[0].clone()),
                &inputs,
            )?;
            patch.shunt_outside(model, cursor.id.into(), out[0])?;
            return Ok(Some(patch));
        }
    }

    // Add(x) -> Move(x, y)
    let Some(cursor) = model.single_prec(axis_node.id)? else { return Ok(None) };
    if let (AxisOp::Move(from_1, to_1), AxisOp::Add(ax)) = (
        axis_op.inner.clone(),
        cursor.op_as::<GpuAxisOp>().map(|ax_op| ax_op.inner.clone()).unwrap_or(AxisOp::Rm(0)),
    ) {
        if ax == from_1 {
            let mut patch = TypedModelPatch::default();
            let inputs = patch.taps(model, &cursor.inputs)?;
            let out =
                patch.wire_node(cursor.name.clone(), GpuAxisOp::new(AxisOp::Add(to_1)), &inputs)?;
            patch.shunt_outside(model, axis_node.id.into(), out[0])?;
            return Ok(Some(patch));
        }
    }
    Ok(None)
}


================================================
FILE: metal/src/rewrite_rules/mod.rs
================================================
mod add_matmul_broadcast;
mod fuse_axis_op;
mod untranspose_matmul_output;

pub use add_matmul_broadcast::add_broadcast_pre_matmul;
pub use fuse_axis_op::{fuse_axis_op, fuse_move_axis};
pub use untranspose_matmul_output::untranspose_matmul_output;


================================================
FILE: metal/src/rewrite_rules/untranspose_matmul_output.rs
================================================
use crate::MetalTransform;
use tract_core::internal::*;
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_gpu::rule_ensure;

/// Rewrite BasicMatMul { .. transpose_c: true } to BasicMatMul { .. transpose_c: false}
pub fn untranspose_matmul_output(
    _ctx: &MetalTransform,
    model: &TypedModel,
    node: &TypedNode,
    _node_name: &str,
    op: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    rule_ensure!(op.transpose_c);

    let new_matmul = PrefixMatMul {
        transpose_a: !op.transpose_b,
        transpose_b: !op.transpose_a,
        transpose_c: false,
        ..*op
    };

    TypedModelPatch::replace_single_op(model, node, &[node.inputs[1], node.inputs[0]], new_matmul)
        .map(Some)
}


================================================
FILE: metal/src/tensor.rs
================================================
use std::fmt::Display;
use tract_core::internal::*;
use tract_gpu::device::DeviceBuffer;
use tract_gpu::tensor::{DeviceTensor, OwnedDeviceTensor};
use tract_gpu::utils::check_strides_validity;

use crate::context::MetalBuffer;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum MValue {
    Natural(Arc<Tensor>),
    Reshaped { t: Arc<Tensor>, shape: TVec<usize>, strides: TVec<isize> },
}

impl MValue {
    /// Get the datum type of the tensor.
    #[inline]
    pub fn datum_type(&self) -> DatumType {
        match self {
            Self::Natural(t) => t.datum_type(),
            Self::Reshaped { t, .. } => t.datum_type(),
        }
    }

    #[inline]
    pub fn shape(&self) -> &[usize] {
        match self {
            MValue::Natural(t) => t.shape(),
            MValue::Reshaped { shape, .. } => shape,
        }
    }

    /// Get the number of values.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        self.shape().iter().product()
    }

    /// Reshaped tensor with given shape.
    pub fn reshaped(&self, shape: impl Into<TVec<usize>>) -> TractResult<Self> {
        let shape = shape.into();
        if self.len() != shape.iter().product::<usize>() {
            bail!("Invalid reshape {:?} to {:?}", self.shape(), shape);
        }
        if shape.as_slice() != self.shape() {
            match &self {
                MValue::Natural(t) | MValue::Reshaped { t, .. } => Ok(Self::Reshaped {
                    t: Arc::clone(t),
                    strides: Tensor::natural_strides(&shape),
                    shape,
                }),
            }
        } else {
            Ok(self.clone())
        }
    }

    pub fn restrided(&self, strides: impl Into<TVec<isize>>) -> TractResult<Self> {
        let strides = strides.into();
        check_strides_validity(self.shape().into(), strides.clone())?;

        match &self {
            MValue::Natural(t) => {
                Ok(Self::Reshaped { t: Arc::clone(t), strides, shape: self.shape().into() })
            }
            MValue::Reshaped { t, strides: old_strides, .. } => {
                if &strides != old_strides {
                    Ok(Self::Reshaped { t: Arc::clone(t), strides, shape: self.shape().into() })
                } else {
                    Ok(self.clone())
                }
            }
        }
    }

    pub fn as_arc_tensor(&self) -> Option<&Arc<Tensor>> {
        match self {
            MValue::Natural(t) => Some(t),
            MValue::Reshaped { .. } => None,
        }
    }
}

impl IntoTensor for MValue {
    fn into_tensor(self) -> Tensor {
        match self {
            Self::Natural(t) => Arc::try_unwrap(t).unwrap_or_else(|t| (*t).clone()),
            Self::Reshaped { t, shape, strides: _ } => {
                let mut t = Arc::try_unwrap(t).unwrap_or_else(|t| (*t).clone());
                t.set_shape(&shape).expect("Could not apply shape to reshaped GPU tensor");
                t
            }
        }
    }
}

impl From<Tensor> for MValue {
    fn from(v: Tensor) -> Self {
        Self::Natural(Arc::new(v))
    }
}

impl From<Arc<Tensor>> for MValue {
    fn from(v: Arc<Tensor>) -> Self {
        Self::Natural(v)
    }
}

/// This struct represents a owned tensor that can be accessed from the
/// GPU and the CPU.
#[derive(Clone, PartialEq, Eq)]
pub struct MetalTensor {
    pub inner: MValue,
    pub device_buffer: MetalBuffer,
    pub exotic_fact: Option<Box<dyn ExoticFact>>,
}

impl std::fmt::Debug for MetalTensor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "MetalTensor: {:?}", self.inner)
    }
}

impl Hash for MetalTensor {
    #[inline]
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.inner.hash(state)
    }
}

impl OwnedDeviceTensor for MetalTensor {
    fn datum_type(&self) -> DatumType {
        self.inner.datum_type()
    }

    #[inline]
    fn shape(&self) -> &[usize] {
        self.inner.shape()
    }

    /// Get the number of values in the tensor.
    #[inline]
    #[allow(clippy::len_without_is_empty)]
    fn len(&self) -> usize {
        self.shape().iter().product()
    }

    /// Get the strides of the tensor.
    #[inline]
    fn strides(&self) -> &[isize] {
        match &self.inner {
            MValue::Natural(t) => t.strides(),
            MValue::Reshaped { strides, .. } => strides,
        }
    }

    /// Get underlying inner device buffer.
    #[inline]
    fn device_buffer(&self) -> &dyn DeviceBuffer {
        &self.device_buffer
    }

    /// Reshaped tensor with given shape.
    #[inline]
    fn reshaped(&self, shape: TVec<usize>) -> TractResult<DeviceTensor> {
        Ok(DeviceTensor::Owned(Box::new(Self {
            inner: self.inner.reshaped(shape)?,
            device_buffer: self.device_buffer.clone(),
            exotic_fact: self.exotic_fact.clone(),
        })))
    }

    /// Change tensor stride.
    #[inline]
    fn restrided(&self, strides: TVec<isize>) -> TractResult<DeviceTensor> {
        Ok(DeviceTensor::Owned(Box::new(Self {
            inner: self.inner.restrided(strides)?,
            device_buffer: self.device_buffer.clone(),
            exotic_fact: self.exotic_fact.clone(),
        })))
    }

    fn to_host(&self) -> TractResult<Arc<Tensor>> {
        Ok(self
            .inner
            .as_arc_tensor()
            .cloned()
            .unwrap_or_else(|| self.inner.clone().into_tensor().into_arc_tensor()))
    }

    fn exotic_fact(&self) -> Option<&dyn ExoticFact> {
        self.exotic_fact.as_deref()
    }

    fn get_bytes_slice(&self, offset: usize, len: usize) -> Vec<u8> {
        self.inner.as_arc_tensor().unwrap().as_bytes()[offset..offset + len].to_vec()
    }
}

impl Display for MetalTensor {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match &self.inner {
            MValue::Natural(t) => {
                let content = t.dump(false).unwrap_or_else(|e| format!("Error : {e:?}"));
                write!(f, "GPU {{ {content} }}")
            }
            MValue::Reshaped { t, shape, strides: _ } => {
                let content = t.dump(false).unwrap_or_else(|e| format!("Error : {e:?}"));
                write!(f, "GPU reshaped: {:?} - {{ {content} }}", shape)
            }
        }
    }
}


================================================
FILE: metal/src/tests.rs
================================================
#[cfg(test)]
mod tests {
    use crate::MetalTransform;
    use crate::utils::with_borrowed_metal_stream;
    use tract_core::internal::*;
    use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
    use tract_core::ops::math::{add, mul};
    use tract_core::ops::nn::{Softmax, SoftmaxExp, SoftmaxKind};
    use tract_core::transform::ModelTransform;
    use tract_gpu::memory::DeviceMemSchema;
    use tract_gpu::tensor::IntoDevice;

    #[test]
    fn test_alloc_zero() -> TractResult<()> {
        with_borrowed_metal_stream(|_| Tensor::from_shape::<f32>(&[0], &[])?.into_device())?;
        Ok(())
    }

    fn wire_sdpa_layer(
        model: &mut TypedModel,
        name: impl ToString,
        q: OutletId,
        k: OutletId,
        v: OutletId,
    ) -> TractResult<TVec<OutletId>> {
        let name = name.to_string();

        // Reshape Q
        let q_shape = model.outlet_fact(q)?.shape.to_tvec();
        let embed_dim: TDim = q_shape[1].clone();
        let head_dim: TDim = q_shape[3].clone();
        let batch: TDim = q_shape[0].clone();
        let seq_len: TDim = q_shape[2].clone();
        ensure!(batch.to_i64()? == 1, "Input 'q' shape is {:?} (expect batch = 1)", q_shape);
        ensure!(q_shape.len() == 4, "Input 'q' shape is {:?} (expect 4D)", q_shape);
        let q_reshaped = model.wire_node(
            format!("q_reshape_{}", name),
            AxisOp::Reshape(
                0,
                q_shape.clone(),
                tvec![embed_dim.clone(), batch.clone(), seq_len.clone(), head_dim.clone(),],
            ),
            &[q],
        )?[0];

        // Reshape K
        let k_shape = model.outlet_fact(k)?.shape.to_tvec();
        ensure!(k_shape.len() == 4, "Input 'k' shape is {:?} (expect 4D)", k_shape);
        let seq_plus_prompt_len: TDim = k_shape[2].clone();

        let k_reshaped = model.wire_node(
            format!("k_reshape_{}", name),
            AxisOp::Reshape(
                0,
                k_shape.clone(),
                tvec![
                    embed_dim.clone(),
                    batch.clone(),
                    seq_plus_prompt_len.clone(),
                    head_dim.clone(),
                ],
            ),
            &[k],
        )?[0];

        // Compute Q * K^T
        let qk = model.wire_node(
            format!("qk_{}", name),
            PrefixMatMul {
                transpose_a: false,
                transpose_b: true,
                transpose_c: false,
                quantize_output: None,
                operating_dt: Some(DatumType::F32),
            },
            &[q_reshaped, k_reshaped],
        )?[0];

        let qk_squeezed = model.wire_node(
            format!("qk_squeezed_{}", name),
            AxisOp::Reshape(
                0,
                tvec![
                    embed_dim.clone(),
                    batch.clone(),
                    seq_len.clone(),
                    seq_plus_prompt_len.clone(),
                ],
                tvec![embed_dim.clone(), seq_len.clone(), seq_plus_prompt_len.clone(),],
            ),
            &[qk],
        )?[0];

        // Scale factor for attention
        let scale = model.add_const(
            format!("scale_{}", name),
            tensor3(&[[[1.0f32 / (head_dim.to_i64()? as f32).sqrt()]]]),
        )?;
        let qk_scaled =
            model.wire_node(format!("qk_scaled_{}", name), mul(), &[qk_squeezed, scale])?[0];

        // Mask QK
        let mask = model.add_const("mask", tensor3(&[[[1.0f32]]]))?;
        let qk_scaled_masked =
            model.wire_node(format!("qk_scaled_masked_{}", name), add(), &[qk_scaled, mask])?[0];

        // Apply softmax
        let attention = model.wire_node(
            format!("attention_weights_{}", name),
            Softmax::new(tvec![2], None, SoftmaxKind::Softmax(SoftmaxExp::Libc)),
            &[qk_scaled_masked],
        )?[0];

        // Reshape V
        let v_reshaped = model.wire_node(
            format!("v_reshape_{}", name),
            AxisOp::Reshape(
                0,
                k_shape,
                tvec![embed_dim.clone(), seq_plus_prompt_len.clone(), head_dim.clone(),],
            ),
            &[v],
        )?[0];

        // Multiply with V
        let output = model.wire_node(
            format!("attention_output_{}", name),
            PrefixMatMul {
                transpose_a: false,
                transpose_b: false,
                transpose_c: false,
                quantize_output: None,
                operating_dt: Some(DatumType::F32),
            },
            &[attention, v_reshaped],
        )?[0];

        // Reshape output
        let output_reshaped = model.wire_node(
            format!("output_reshape_{}", name),
            AxisOp::Reshape(
                0,
                tvec![embed_dim.clone(), seq_len.clone(), head_dim.clone(),],
                q_shape,
            ),
            &[output],
        )?;
        Ok(output_reshaped)
    }

    #[test]
    fn test_build_schema_from_model() -> TractResult<()> {
        // Given
        const EMBED_DIM: i64 = 32;
        const HEAD_DIM: i64 = 64;
        const SEQUENCE_LENGTH: i64 = 1;
        const PAST_SEQUENCE_LENGTH: i64 = 8;
        const EXPECTED_PEAK_SIZE: i64 = 9344;
        const EXPECTED_USAGE: f32 = 0.89;

        // Build a model with Scaled Dot-Product Attention (SDPA) layers
        let mut model = TypedModel::default();

        // Input shapes for Q, K, V
        let s = TDim::Sym(model.sym("S"));
        let p = TDim::Sym(model.sym("P"));
        let q_fact = f32::fact(tvec![1.into(), EMBED_DIM.into(), s.clone(), HEAD_DIM.into()]);
        let k_fact = f32::fact(tvec![1.into(), EMBED_DIM.into(), s + p, HEAD_DIM.into()]);
        let v_fact = k_fact.clone();

        // Create inputs for Q, K, V
        let q = model.add_source("q", q_fact)?;
        let k = model.add_source("k", k_fact)?;
        let v = model.add_source("v", v_fact)?;

        let outputs = wire_sdpa_layer(&mut model, "0", q, k, v)?;
        let outputs = wire_sdpa_layer(&mut model, "1", outputs[0], k, v)?;

        model.select_output_outlets(&outputs)?;

        // Transform model for Metal execution
        let model = MetalTransform::default().transform_into(model)?;

        // Get execution order
        let order = model.eval_order()?;

        // Hint symbol values
        let mut symbol_values = SymbolValues::default();
        symbol_values.set(&model.symbols.get("S").context("Missing symbol S")?, SEQUENCE_LENGTH);
        symbol_values
            .set(&model.symbols.get("P").context("Missing symbol P")?, PAST_SEQUENCE_LENGTH);

        // Build memory schema
        let schema = DeviceMemSchema::build(&model, &order, &symbol_values)?;

        // Verify number of nodes
        assert!(schema.model_num_nodes > 1, "Schema should contain at least 2 nodes");

        // Verify number of partitions
        assert!(schema.by_partition.len() > 1, "Schema should contain at least 2 partitions");

        // Verify steps
        assert_eq!(schema.by_steps.len(), order.len());
        for step in 0..schema.by_steps.len() {
            for partition in schema.by_partition.iter() {
                let partition_size = partition.eval_size_to_i64(&symbol_values)?;

                // No empty partition
                assert!(!partition.nodes.is_empty());

                if let Some(this) = partition.find_node_alive_at_step(step) {
                    // Node memory requirement should be <= the partition size
                    let node_size = this.mem_size.eval_to_i64(&symbol_values)?;
                    assert!(node_size <= partition_size);
                    assert!(node_size > 0);

                    // All nodes should have a valid lifetime
                    assert!(this.lifetime.start < this.lifetime.end);

                    // No other node in the partition should be alive at this step
                    for other in partition.nodes.iter().filter(|it| it.outlet_id != this.outlet_id)
                    {
                        assert!(
                            !other.lifetime.is_alive_at_step(step)
                                && other.lifetime.is_disjoint(&this.lifetime),
                            "Lifetime conflict @ step {}\n{:?}\n{:?}",
                            step,
                            this,
                            other
                        );
                    }

                    // This node should not be alive in another partition at the same step
                    for p in schema.by_partition.iter().filter(|it| it != &partition) {
                        if let Some(other) = p.find_node_alive_at_step(step) {
                            assert!(other.outlet_id != this.outlet_id);
                        }
                    }
                }
            }
        }

        // Verify schema usage
        let usage = schema.eval_usage(&symbol_values)?;
        assert!(usage >= EXPECTED_USAGE, "Usage {}, expected >= {}", usage, EXPECTED_USAGE);

        // Verify peak memory size
        let peak_memory_size = schema.eval_peak_memory_size(&symbol_values)?;
        assert_eq!(peak_memory_size, EXPECTED_PEAK_SIZE, "Peak memory size mismatch");

        Ok(())
    }
}


================================================
FILE: metal/src/transform.rs
================================================
use std::any::TypeId;
use std::collections::HashMap;
use std::fmt::Debug;
use std::str::FromStr;
use std::sync::OnceLock;

use crate::context::metal_context;
use crate::kernels::matmul::{GemmKernel, GgmlGemm, MetalGemmImplKind, MfaGemm, MlxGemm};
use crate::{kernels, ops};
use tract_core::dyn_clone::clone_box;
use tract_core::internal::translator::Translate;
use tract_core::internal::*;
use tract_core::ops::cnn::conv::rewrite_kernel_conv_in_oihw;
use tract_core::ops::cnn::{Conv, rewrite_conv_with_n_axis};
use tract_core::ops::einsum::prefix_matmul::{PrefixMatMul, rewrite_einsum_to_prefix_matmul};
use tract_core::ops::konst::Const;
use tract_core::tract_linalg::block_quant::Q4_0;
use tract_core::transform::ModelTransform;
use tract_gpu::fact::{DeviceFact, DeviceTypedFactExt};
use tract_gpu::rewrite_rules::rewire_sdpa::rewire_sdpa;
use tract_gpu::rewrite_rules::rewire_syncs::rewire_syncs;
use tract_gpu::rewrite_rules::rms_norm::remove_rms_norm_cast;
use tract_gpu::sync::{DeviceSyncKind, sync_inputs_if_required, sync_model_outputs_if_required};
use tract_gpu::tensor::{DeviceTensor, IntoDevice};
use tract_gpu::utils::as_quant_fact;

use crate::rewrite_rules;

/// A registered translator that can convert a core op into a Metal GPU op.
/// Each kernel module submits one (or more) of these via [`register_metal_op!`].
pub struct MetalOpTranslator {
    pub type_id: TypeId,
    pub try_make: fn(&TypedModel, &TypedNode) -> TractResult<Option<Box<dyn TypedOp>>>,
}

inventory::collect!(MetalOpTranslator);

/// Register a translator for a core op type. The closure receives `(source, node, op)`
/// where `op` is already downcast to `$op_type`. Return `Ok(Some(gpu_op))` to translate,
/// `Ok(None)` to skip.
#[macro_export]
macro_rules! register_metal_op {
    ($op_type:ty, |$source:ident, $node:ident, $op:ident| $body:expr) => {
        inventory::submit! {
            $crate::transform::MetalOpTranslator {
                type_id: std::any::TypeId::of::<$op_type>(),
                try_make: |$source, $node| {
                    let Some($op) = $node.op_as::<$op_type>() else {
                        return Ok(None);
                    };
                    $body
                },
            }
        }
    };
}

impl MetalGemmImplKind {
    pub fn variants() -> Vec<MetalGemmImplKind> {
        vec![Self::Mlx, Self::Mfa, Self::Ggml]
    }

    pub fn variants_str() -> Vec<&'static str> {
        Self::variants().into_iter().map(|it| it.to_str()).collect()
    }

    pub fn to_str(&self) -> &'static str {
        match self {
            Self::Mlx => "mlx",
            Self::Mfa => "mfa",
            Self::Ggml => "ggml",
        }
    }
}

#[derive(Debug, Default)]
pub struct MetalTransform {
    pub gemm_impl: Option<MetalGemmImplKind>,
}

impl ModelTransform for MetalTransform {
    fn name(&self) -> StaticName {
        "metal-transform".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        self.transform_up_to_phase(model, usize::MAX)
    }
}

impl FromStr for MetalTransform {
    type Err = TractError;
    fn from_str(str: &str) -> TractResult<Self> {
        let gemm_impl = match str {
            "mlx" => Some(MetalGemmImplKind::Mlx),
            "ggml" => Some(MetalGemmImplKind::Ggml),
            "mfa" => Some(MetalGemmImplKind::Mfa),
            "" => None,
            _ => bail!("Unknown backend"),
        };
        Ok(MetalTransform { gemm_impl })
    }
}

impl MetalTransform {
    pub fn transform_up_to_phase(
        &self,
        model: &mut TypedModel,
        stop_at_phase: usize,
    ) -> TractResult<()> {
        // Init Metal Context if not done previously
        metal_context();

        rewire_sdpa(model)?;
        rewrite_einsum_to_prefix_matmul(model, false)?;
        if stop_at_phase == 0 {
            return Ok(());
        }

        Rewriter::<MetalTransform>::default()
            .with_rule_for("untranspose-matmul-output", rewrite_rules::untranspose_matmul_output)
            .with_rule_for("add-broadcast-pre-matmul", rewrite_rules::add_broadcast_pre_matmul)
            .rewrite(self, model)?;

        Rewriter::default()
            .with_rule_for("rewrite_kernel_conv_in_oihw", rewrite_kernel_conv_in_oihw)
            .with_rule_for("rewrite_conv_with_n_axis", rewrite_conv_with_n_axis)
            .with_rule_for("remove_rms_norm_cast", remove_rms_norm_cast)
            .rewrite(&(), model)?;

        if stop_at_phase == 1 {
            return Ok(());
        }

        *model = self.translate_model(model)?;

        if stop_at_phase == 2 {
            return Ok(());
        }

        Rewriter::default()
            .with_rule_for("fuse_move_axis", rewrite_rules::fuse_move_axis)
            .rewrite(&(), model)?;
        Rewriter::default()
            .with_rule_for("fuse_axis_op", rewrite_rules::fuse_axis_op)
            .rewrite(&(), model)?;

        rewire_syncs(model)?;
        Ok(())
    }
}

/// Looks up the node's op TypeId in the inventory of registered `MetalOpTranslator`s.
/// Returns `Some(gpu_op)` if a translator matches and succeeds, `None` otherwise.
fn try_make_metal_op(
    source: &TypedModel,
    node: &TypedNode,
) -> TractResult<Option<Box<dyn TypedOp>>> {
    type TranslateFn = fn(&TypedModel, &TypedNode) -> TractResult<Option<Box<dyn TypedOp>>>;
    static MAP: OnceLock<HashMap<TypeId, Vec<TranslateFn>>> = OnceLock::new();
    let map = MAP.get_or_init(|| {
        let mut m: HashMap<TypeId, Vec<TranslateFn>> = HashMap::new();
        for t in inventory::iter::<MetalOpTranslator> {
            m.entry(t.type_id).or_default().push(t.try_make);
        }
        m
    });

    let input_facts = source.node_input_facts(node.id)?;
    if !input_facts.iter().all(|f| DeviceTensor::is_supported_dt(f.datum_type)) {
        return Ok(None);
    }

    // Copy-based ops are fully generic (no backend-specific dispatch needed).
    if let Some(op) = tract_gpu::ops::copy_based::try_make_copy_based_op(source, node)? {
        return Ok(Some(op));
    }

    if let Some(fns) = map.get(&(*node.op).type_id()) {
        for f in fns {
            if let Some(op) = f(source, node)? {
                return Ok(Some(op));
            }
        }
    }
    Ok(None)
}

impl Translate<TypedFact, Box<dyn TypedOp>, TypedFact, Box<dyn TypedOp>> for MetalTransform {
    fn translate_node(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        // Special multi-node ops handled first
        let input_facts = source.node_input_facts(node.id)?;
        if let Some(op) = node.op_as::<PrefixMatMul>() {
            let facts: Vec<TypedFact> = input_facts.iter().map(|f| (*f).clone()).collect();
            if !op.transpose_c && op.quantize_output.is_none() && check_matmul_in_dts(&facts) {
                let mut device_inputs =
                    sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
                let outlet_ids = convert_matmul_to_metal(
                    source,
                    node,
                    target,
                    &mut device_inputs,
                    op,
                    self.gemm_impl,
                )?;
                return sync_model_outputs_if_required(source, node, target, outlet_ids);
            }
        }
        if let Some(conv) = node.op_as::<Conv>() {
            if input_facts.iter().all(|f| DeviceTensor::is_supported_dt(f.datum_type))
                && matches!(input_facts[0].datum_type, DatumType::F16 | DatumType::F32)
            {
                let device_inputs =
                    sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
                let outlet_ids =
                    ops::conv::wire_metal_conv(source, node, target, &device_inputs, conv)?;
                return sync_model_outputs_if_required(source, node, target, outlet_ids);
            }
        }
        // Const: inline conversion, not a GPU op
        if let Some(op) = node.op_as::<Const>() {
            if DeviceTensor::is_supported_dt(op.val().datum_type()) {
                let device_inputs =
                    sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
                let outlet_ids =
                    target.wire_node(node.name.clone(), convert_const(op)?, &device_inputs)?;
                return sync_model_outputs_if_required(source, node, target, outlet_ids);
            }
        }

        // Single-op translation
        if let Some(gpu_op) = try_make_metal_op(source, node)? {
            let device_inputs =
                sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToDevice)?;
            let outlet_ids = target.wire_node(node.name.clone(), gpu_op, &device_inputs)?;
            sync_model_outputs_if_required(source, node, target, outlet_ids)
        } else {
            let cpu_inputs =
                sync_inputs_if_required(target, node, mapping, DeviceSyncKind::ToHost)?;
            target.wire_node(&node.name, node.op.clone(), &cpu_inputs)
        }
    }
}

pub(crate) fn metal_cast_new(to: DatumType) -> Option<tract_gpu::ops::cast::GpuCast> {
    tract_gpu::ops::cast::GpuCast::new(
        to,
        "Metal",
        kernels::array::metal_cast_dispatch,
        kernels::array::Cast::is_supported_dt,
    )
}

fn check_matmul_in_dts(in_facts: &[TypedFact]) -> bool {
    MlxGemm.is_supported_dts(in_facts)
        || MfaGemm.is_supported_dts(in_facts)
        || GgmlGemm.is_supported_dts(in_facts)
        || GgmlGemm.is_supported_dts(&[in_facts[1].clone(), in_facts[0].clone()])
}

fn is_input_broadcast(facts: TVec<&TypedFact>) -> bool {
    // Assume weights are in second postion
    let b_batch_dims: Vec<TDim> = if as_quant_fact(facts[1], &Q4_0).is_some() {
        facts[1].shape.dims().to_vec()
    } else {
        let rank = facts[1].rank();
        facts[1].shape.dims()[..rank - 2].to_vec()
    };

    let a_rank = facts[0].rank();
    let mut a_batch_dims = facts[0].shape[..(a_rank - 2)].to_vec();

    a_batch_dims.retain(|tdim| !matches!(tdim, TDim::Sym(_)) || b_batch_dims.contains(tdim));
    let symb_in_a = a_batch_dims != facts[0].shape[..(a_rank - 2)].to_vec();

    let a_batch_size = a_batch_dims.iter().product::<TDim>().gcd();
    let b_batch_size = b_batch_dims.iter().product::<TDim>().gcd();

    (a_batch_size % b_batch_size == 0) && ((a_batch_size != b_batch_size) || symb_in_a)
}

pub fn resolve_gemm_impl(
    gemm_impl: Option<MetalGemmImplKind>,
    input_facts: TVec<&TypedFact>,
) -> TractResult<MetalGemmImplKind> {
    if let Some(gemm) = gemm_impl {
        Ok(gemm)
    } else if as_quant_fact(input_facts[0], &Q4_0).is_some()
        || as_quant_fact(input_facts[1], &Q4_0).is_some()
        || input_facts[0].datum_type != input_facts[1].datum_type
        || is_input_broadcast(input_facts)
    {
        Ok(MetalGemmImplKind::Ggml)
    } else {
        Ok(MetalGemmImplKind::Mlx)
    }
}

fn convert_matmul_to_metal(
    model: &TypedModel,
    node: &TypedNode,
    target: &mut TypedModel,
    inputs: &mut [OutletId],
    op: &PrefixMatMul,
    gemm_impl: Option<MetalGemmImplKind>,
) -> TractResult<TVec<OutletId>> {
    let mut input_facts = model.node_input_facts(node.id)?;

    let resolved_gemm_impl = resolve_gemm_impl(gemm_impl, input_facts.clone())?;
    if matches!(resolved_gemm_impl, MetalGemmImplKind::Mlx | MetalGemmImplKind::Mfa)
        && (input_facts[0].datum_type != input_facts[1].datum_type)
    {
        ensure!(
            input_facts[0].datum_type == DatumType::F16
                || input_facts[1].datum_type == DatumType::F16
        );
        let inp_to_cast = if input_facts[0].datum_type == DatumType::F16 {
            &mut inputs[0]
        } else {
            &mut inputs[1]
        };
        *inp_to_cast = target.wire_node(
            node.name.clone() + ".cast_input",
            metal_cast_new(DatumType::F32).unwrap(),
            &[*inp_to_cast],
        )?[0];
    }

    let mut matmul_output = match resolved_gemm_impl {
        MetalGemmImplKind::Mlx => {
            let op = ops::MetalGemm::<MlxGemm>::new(op.transpose_a, op.transpose_b);
            target.wire_node(node.name.clone(), op, inputs)?
        }
        MetalGemmImplKind::Mfa => {
            let op = ops::MetalGemm::<MfaGemm>::new(op.transpose_a, op.transpose_b);
            target.wire_node(node.name.clone(), op, inputs)?
        }
        MetalGemmImplKind::Ggml => {
            let mut swap_inputs = false;
            if !GgmlGemm.is_supported_dts(&[input_facts[0].clone(), input_facts[1].clone()])
                && GgmlGemm.is_supported_dts(&[input_facts[1].clone(), input_facts[0].clone()])
            {
                input_facts.swap(0, 1);
                inputs.swap(0, 1);
                swap_inputs = true;
            }

            let a_pos = swap_inputs as usize;
            let b_pos = 1 - swap_inputs as usize;
            if op.transpose_a {
                ensure!(
                    as_quant_fact(input_facts[a_pos], &Q4_0).is_none(),
                    "Cannot transpose Q40 tensor"
                );

                let rank = input_facts[a_pos].rank();
                let perm_a_op =
                    tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Move(rank - 2, rank - 1));
                let perm_a_name = node.name.clone() + ".perm_a";
                inputs[a_pos] = target.wire_node(perm_a_name, perm_a_op, &[inputs[a_pos]])?[0];
            }

            if input_facts[0].datum_type == DatumType::F16 {
                let in_cast_op = metal_cast_new(DatumType::F32).unwrap();
                inputs[0] =
                    target.wire_node(node.name.clone() + ".in_cast", in_cast_op, &[inputs[0]])?[0];
            }

            if !op.transpose_b {
                ensure!(
                    as_quant_fact(input_facts[b_pos], &Q4_0).is_none(),
                    "Cannot transpose Q40 tensor"
                );

                let rank = input_facts[b_pos].rank();
                let perm_b_op =
                    tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Move(rank - 2, rank - 1));
                let perm_b_name = node.name.clone() + ".perm_b";
                inputs[b_pos] = target.wire_node(perm_b_name, perm_b_op, &[inputs[b_pos]])?[0];
            }
            let op = ops::MetalGemm::<GgmlGemm>::new(false, true);
            let mut matmul_output = target.wire_node(node.name.clone(), op, inputs)?;

            if swap_inputs {
                let out_fact = target.outlet_fact(matmul_output[0])?;
                let rank = &out_fact
                    .exotic_fact
                    .clone()
                    .map(|fact| fact.clarify_dt_shape().unwrap().1.len())
                    .unwrap();

                let perm_out_op =
                    tract_gpu::ops::change_axes::GpuAxisOp::new(AxisOp::Move(rank - 2, rank - 1));
                matmul_output = target.wire_node(
                    node.name.clone() + ".perm_out",
                    perm_out_op,
                    &matmul_output,
                )?;
            }
            matmul_output
        }
    };

    let out_fact = target.outlet_fact(matmul_output[0])?;
    let out_dt = out_fact.as_device_fact().map(|f| f.datum_type).unwrap_or(out_fact.datum_type);

    let expected_dt = model.node_output_facts(node.id)?[0].datum_type;

    if out_dt != expected_dt {
        ensure!(
            kernels::array::Cast::is_supported_dt(out_dt),
            "Matmul output type cannot be casted to expected type"
        );
        let cast_op = metal_cast_new(model.node_output_facts(node.id)?[0].datum_type).unwrap();
        matmul_output =
            target.wire_node(node.name.clone() + ".out_cast", cast_op, &matmul_output)?
    }
    Ok(matmul_output)
}

fn convert_const(op: &Const) -> TractResult<Const> {
    let typed_fact: TypedFact = Arc::clone(op.val()).try_into()?;
    let metal_fact = if let Some(of) = op.exotic_fact() {
        DeviceFact::from_host(typed_fact.with_exotic_fact(clone_box(of)))?
    } else {
        DeviceFact::from_host(typed_fact)?
    };

    let metal_const = op.val().clone().into_device()?.into_tensor().into_arc_tensor();
    Const::new_with_exotic_fact(metal_const, Box::new(metal_fact))
}


================================================
FILE: metal/src/utils.rs
================================================
#![allow(clippy::missing_safety_doc)]

use crate::context::MetalBuffer;
use metal::Buffer;
use tract_gpu::tensor::DeviceTensor;

pub fn get_metal_buffer(tensor: &DeviceTensor) -> &Buffer {
    if let Some(metal_buffer) = tensor.device_buffer().downcast_ref::<MetalBuffer>() {
        &metal_buffer.inner
    } else {
        panic!("Non-Metal Buffer accessed during Metal execution")
    }
}
#[cfg(test)]
pub use tests::with_borrowed_metal_stream;

#[cfg(test)]
mod tests {
    use std::ffi::c_void;

    use crate::MetalStream;
    use objc::runtime::{objc_autoreleasePoolPop, objc_autoreleasePoolPush};
    use tract_core::internal::*;

    // Copied code from objc crate to avoid closures
    struct AutoReleaseHelper {
        context: *mut c_void,
    }

    impl AutoReleaseHelper {
        unsafe fn new() -> Self {
            AutoReleaseHelper { context: unsafe { objc_autoreleasePoolPush() } }
        }
    }

    impl Drop for AutoReleaseHelper {
        fn drop(&mut self) {
            unsafe { objc_autoreleasePoolPop(self.context) }
        }
    }

    pub fn with_borrowed_metal_stream<T, F: FnOnce(&MetalStream) -> TractResult<T>>(
        f: F,
    ) -> TractResult<T> {
        let _context = unsafe { AutoReleaseHelper::new() };
        crate::with_metal_stream(f)
    }
}


================================================
FILE: nnef/Cargo.toml
================================================
[package]
name = "tract-nnef"
version = "0.23.0-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "NeuralNetworks", "NNEF" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
byteorder.workspace = true
liquid.workspace = true
liquid-core.workspace = true
log.workspace = true
nom.workspace = true
nom-language.workspace = true
safetensors.workspace = true
serde_json.workspace = true
tar.workspace = true
flate2 = { workspace = true, optional = true }
simd-adler32.workspace = true
walkdir.workspace = true
erased-serde.workspace = true
serde.workspace = true
tract-core.workspace = true

[dev-dependencies]
temp-dir = "0.2.0"
serde_json.workspace = true

[features]
default = ["flate2"]
complex = [ "tract-core/complex" ]


================================================
FILE: nnef/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: nnef/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: nnef/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: nnef/cli/Cargo.toml
================================================
[package]
name = "tract-nnef-cli"
version = "0.21.8-pre"
authors = [
	"Mathieu Poumeyrol <kali@zoy.org>",
	"Hubert de La Jonquière <hubert.delajonquiere@sonos.com>"
]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "NeuralNetworks", "NNEF" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[dependencies]
anyhow.workspace = true
env_logger.workspace = true
log.workspace = true
clap.workspace = true
tract-nnef.workspace = true
tract-pulse.workspace = true
tract-onnx-opl.workspace = true


================================================
FILE: nnef/cli/src/main.rs
================================================
use anyhow::{Context, Result, anyhow};
use clap::Parser;
use std::path::PathBuf;
use tract_nnef::internal::DocDumper;

fn main() {
    // Collecting user arguments
    let cli_args = CliArgs::parse();

    // Setting up log level
    let level = match cli_args.verbosity {
        0 => "info",
        1 => "debug",
        _ => "trace",
    };
    unsafe { std::env::set_var("RUST_LOG", level) };
    env_logger::Builder::from_env(env_logger::Env::default()).init();

    if let Err(e) = cli_args.run() {
        log::error!("{e:?}");
        std::process::exit(1)
    }
}

/// Struct used to define NNEF documentation CLI arguments.
#[derive(Debug, Parser)]
#[command(
    name = "tract NNEF doc command line",
    about = "Command line to generate NNEF documentaion"
)]
pub struct CliArgs {
    #[arg(short = 'v', action = clap::ArgAction::Count)]
    pub verbosity: u8,
    /// Path to write to the directory where to write the NNEF documentations
    #[arg(long = "doc-dir")]
    pub docs_path: PathBuf,
}

impl CliArgs {
    pub fn run(&self) -> Result<()> {
        let registries = vec![
            ("tract-core.nnef", tract_nnef::ops::tract_core()),
            ("tract-resource.nnef", tract_nnef::ops::tract_resource()),
            ("tract-pulse.nnef", tract_pulse::tract_nnef_registry()),
            ("tract-onnx.nnef", tract_onnx_opl::onnx_opl_registry()),
        ];

        for (filename, registry) in registries {
            let path = self.docs_path.join(filename);
            DocDumper::registry_to_path(self.docs_path.join(filename), &registry).with_context(
                || {
                    anyhow!(
                        "Error while dumping NNEF documentation for {:?} registry at path {:?}",
                        registry.id,
                        path
                    )
                },
            )?;
        }
        Ok(())
    }
}


================================================
FILE: nnef/nnef-resources/Cargo.toml
================================================
[package]
name = "tract-nnef-resources"
version = "0.23.0-pre"
authors = [
	"Mathieu Poumeyrol <kali@zoy.org>",
	"Hubert de La Jonquière <hubert.delajonquiere@sonos.com>"
]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "NeuralNetworks", "NNEF" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
serde_json.workspace = true
serde.workspace = true
anyhow.workspace = true
liquid-core.workspace = true
nom.workspace = true
nom-language.workspace = true
tract-nnef.workspace = true


================================================
FILE: nnef/nnef-resources/src/json_loader.rs
================================================
use anyhow::Result;
use nom::branch::alt;
use nom::character::complete::{char, digit1};
use nom::combinator::{all_consuming, opt};
use nom::combinator::{map, map_res};
use nom::error::ErrorKind;
use nom::multi::separated_list1;
use nom::sequence::delimited;
use nom::{AsChar, Parser};
use nom::{IResult, Input};
use nom_language::error::VerboseError;
use std::path::Path;
use tract_nnef::internal::*;

type R<'i, O> = IResult<&'i str, O, VerboseError<&'i str>>;

/// Loader for JSON resources inside a NNEF archive
#[derive(Debug, Clone, PartialEq)]
pub struct JsonLoader;

impl ResourceLoader for JsonLoader {
    fn name(&self) -> StaticName {
        "JsonLoader".into()
    }

    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        _framework: &tract_nnef::framework::Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>> {
        if path.extension().map(|e| e == "json").unwrap_or(false) {
            let value = serde_json::from_reader(reader)
                .with_context(|| anyhow!("Error while parsing JSON"))?;
            Ok(Some((
                tract_nnef::resource::resource_path_to_id(path)?,
                Arc::new(JsonResource(value)),
            )))
        } else {
            Ok(None)
        }
    }
}

/// JSON resource than can be queried while loading a NNEF graph.
#[derive(Debug, Clone, PartialEq)]
pub struct JsonResource(pub serde_json::Value);

impl Resource for JsonResource {
    fn get(&self, key: &str) -> TractResult<Value> {
        let value = JsonPath::parse(key)
            .with_context(|| anyhow!("Error while parsing JSON path: {:?}", key))?
            .search(&self.0)
            .with_context(|| anyhow!("Error while acessing JSON using given path: {:?}", key))?;

        json_to_tract(value)
            .with_context(|| anyhow!("Error while converting JSON value to NNEF value"))
    }

    fn to_liquid_value(&self) -> Option<liquid_core::model::Value> {
        Some(json_to_liquid(&self.0))
    }
}

fn json_to_liquid(json: &serde_json::Value) -> liquid_core::model::Value {
    use liquid_core::model::Value as L;
    use serde_json::Value as J;
    match json {
        J::Number(n) => {
            if let Some(n) = n.as_i64() {
                L::Scalar(n.into())
            } else {
                L::Scalar(n.as_f64().unwrap().into())
            }
        }
        J::Null => L::Nil,
        J::Bool(b) => L::Scalar((*b).into()),
        J::String(s) => L::Scalar(s.clone().into()),
        J::Array(values) => L::Array(values.iter().map(json_to_liquid).collect()),
        J::Object(map) => {
            L::Object(map.iter().map(|(k, v)| (k.into(), json_to_liquid(v))).collect())
        }
    }
}

pub fn json_to_tract(value: &serde_json::Value) -> TractResult<Value> {
    match value {
        serde_json::Value::Bool(b) => Ok(Value::Bool(*b)),
        serde_json::Value::Number(v) => {
            if let Some(v) = v.as_i64() {
                Ok(Value::Dim(TDim::Val(v)))
            } else {
                let v = v.as_f64().ok_or_else(|| {
                    anyhow!("Json number {} could not be cast to floating value", v)
                })?;
                Ok(Value::Scalar(v as f32))
            }
        }
        serde_json::Value::String(s) => Ok(Value::String(s.clone())),
        serde_json::Value::Null => bail!("JSON null value cannot be converted to NNEF value"),
        serde_json::Value::Object(_) => bail!("JSON object cannot be converted to NNEF value"),
        serde_json::Value::Array(values) => {
            let t_values = values.iter().map(json_to_tract).collect::<Result<Vec<Value>>>()?;
            Ok(Value::Array(t_values))
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum JsonComponent {
    Root,
    Field(String),
    Index(usize),
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct JsonPath {
    pub components: Vec<JsonComponent>,
}

impl JsonPath {
    pub fn new(components: Vec<JsonComponent>) -> Self {
        Self { components }
    }

    pub fn parse(s: &str) -> Result<Self> {
        let (_, components) = all_consuming(parse_components)
            .parse(s)
            .map_err(|e| anyhow!("Error while parsing JSON path: {:?}", e))?;

        ensure!(
            components.first() == Some(&JsonComponent::Root),
            "Json path must start with the root symbol '$'. None found in {}",
            s
        );

        Ok(Self::new(components))
    }

    pub fn search<'a>(&self, json: &'a serde_json::Value) -> Result<&'a serde_json::Value> {
        let mut components_iter = self.components.iter();
        ensure!(
            components_iter.next() == Some(&JsonComponent::Root),
            "JSON path must start with root key '$'"
        );

        let value = components_iter
            .try_fold(json, |json, component| match component {
                JsonComponent::Index(idx) => Ok(&json[idx]),
                JsonComponent::Field(field) => Ok(&json[field]),
                JsonComponent::Root => bail!("Unexpected '$'(root) symbol in json path"),
            })
            .with_context(|| anyhow!("Error while accessing JSON with path: {:?}", self))?;
        Ok(value)
    }
}

pub fn json_key(input: &str) -> R<'_, &str> {
    input.split_at_position1_complete(
        |item| {
            let c = item.as_char();
            !(c.is_alphanum() || ['-', '_', '+', '='].contains(&c))
        },
        ErrorKind::Fail,
    )
}

fn parse_components(i: &str) -> R<'_, Vec<JsonComponent>> {
    map(
        separated_list1(
            char('.'),
            map(
                (
                    alt((
                        map(char('$'), |_| JsonComponent::Root),
                        map(json_key, |f: &str| JsonComponent::Field(f.to_string())),
                    )),
                    opt(map_res(delimited(char('['), digit1, char(']')), |s: &str| {
                        s.parse().map(JsonComponent::Index)
                    })),
                ),
                |(c, idx)| vec![Some(c), idx].into_iter().flatten().collect::<Vec<_>>(),
            ),
        ),
        |components| components.into_iter().flatten().collect::<Vec<_>>(),
    )
    .parse(i)
}

#[cfg(test)]
mod tests {
    use super::*;
    use anyhow::Result;
    use serde_json::json;

    #[test]
    fn test_json_key() -> Result<()> {
        let example: serde_json::Value = json!({
            "name": "John Doe",
            "age": 43usize,
            "phones": [
                "+44 1234567",
                "+44 2345678"
            ],
            "others-info": {
                "address": ["Sonos"],
                "indexes": [1, 2, 3, 4, 5, 6],
                "weights": [[1, 2], [3, 4], [5, 6]]
            }
        });
        let resource = JsonResource(example);
        assert_eq!(resource.get("$.name")?, Value::String("John Doe".into()));
        assert_eq!(resource.get("$.age")?, Value::Dim(TDim::Val(43)));
        assert_eq!(resource.get("$.phones[0]")?, Value::String("+44 1234567".into()));
        assert_eq!(resource.get("$.others-info.address[0]")?, Value::String("Sonos".into()));
        assert_eq!(
            resource.get("$.others-info.indexes")?,
            Value::Array(vec![
                Value::Dim(TDim::Val(1)),
                Value::Dim(TDim::Val(2)),
                Value::Dim(TDim::Val(3)),
                Value::Dim(TDim::Val(4)),
                Value::Dim(TDim::Val(5)),
                Value::Dim(TDim::Val(6))
            ])
        );
        assert_eq!(
            resource.get("$.others-info.weights")?,
            Value::Array(vec![
                Value::Array(vec![Value::Dim(TDim::Val(1)), Value::Dim(TDim::Val(2)),]),
                Value::Array(vec![Value::Dim(TDim::Val(3)), Value::Dim(TDim::Val(4)),]),
                Value::Array(vec![Value::Dim(TDim::Val(5)), Value::Dim(TDim::Val(6))]),
            ])
        );
        Ok(())
    }
}


================================================
FILE: nnef/nnef-resources/src/lib.rs
================================================
pub mod json_loader;

pub mod internal {
    pub use crate::json_loader::{JsonLoader, JsonResource};
}


================================================
FILE: nnef/nnef-resources/tests/nnef_with_json/graph.nnef
================================================
version 1.0;

extension tract_registry tract_resource;

graph afe_graph(src) -> (output)
{
    w = tract_resource_get(label = "src_config", key = "$.width");
    h = tract_resource_get(label = "src_config", key = "$.height");
    src = external<scalar>(shape = [w, h]);
    output = src;
}

================================================
FILE: nnef/nnef-resources/tests/nnef_with_json/src_config.json
================================================
{
    "width": 2,
    "height": 10
}

================================================
FILE: nnef/nnef-resources/tests/test_json_resource.rs
================================================
use tract_nnef::internal::*;
use tract_nnef_resources::internal::JsonLoader;

#[test]
fn load_model_with_json_resource() -> TractResult<()> {
    let model = tract_nnef::nnef()
        .with_tract_core()
        .with_tract_resource()
        .with_resource_loader(JsonLoader)
        .model_for_path("tests/nnef_with_json")?;

    assert_eq!(model.input_fact(0)?.shape.as_concrete().unwrap(), &vec![2, 10]);
    assert_eq!(model.output_fact(0)?.shape.as_concrete().unwrap(), &vec![2, 10]);
    Ok(())
}


================================================
FILE: nnef/src/ast/dump.rs
================================================
use std::io::Write;

use crate::ast::*;
use tract_core::internal::*;
use tract_itertools::Itertools;

macro_rules! comma_loop {
    ($self:ident, $rec: ident, $items: expr) => {
        for (ix, l) in $items.iter().enumerate() {
            if ix > 0 {
                write!($self.w, ", ")?;
            }
            $self.$rec(l)?;
        }
    };
}

pub struct Dumper<'a> {
    nnef: &'a Nnef,
    w: &'a mut dyn std::io::Write,
    with_doc: bool,
}

impl<'a> Dumper<'a> {
    pub fn new(nnef: &'a Nnef, w: &'a mut dyn std::io::Write) -> Dumper<'a> {
        Dumper { nnef, w, with_doc: false }
    }

    pub fn with_doc(mut self) -> Self {
        self.with_doc = true;
        self
    }

    pub fn document(&mut self, document: &Document) -> TractResult<()> {
        writeln!(self.w, "version {};\n", document.version)?;
        for ext in document.extension.iter().sorted() {
            write!(self.w, "extension ")?;
            self.identifier(&ext.0)?;
            writeln!(self.w, " {};", ext.1)?;
        }
        if document.extension.len() > 0 {
            writeln!(self.w)?;
        }
        self.fragments(&document.fragments)?;
        self.graph_def(&document.graph_def)?;
        Ok(())
    }

    pub fn fragments(&mut self, defs: &[FragmentDef]) -> TractResult<()> {
        for fragment_def in defs.iter().sorted_by_key(|frag| &frag.decl.id) {
            self.fragment_def(fragment_def)?
        }
        Ok(())
    }

    pub fn fragment_def(&mut self, def: &FragmentDef) -> TractResult<()> {
        self.fragment_decl(&def.decl)?;
        if let Some(body) = &def.body {
            writeln!(self.w, "\n{{")?;
            for assignment in body {
                self.assignment(assignment)?;
            }
            writeln!(self.w, "}}\n")?;
        } else {
            writeln!(self.w, ";")?;
        };
        Ok(())
    }

    pub(crate) fn fragment_decl(&mut self, decl: &FragmentDecl) -> TractResult<()> {
        write!(self.w, "fragment ")?;
        self.identifier(&decl.id)?;
        if let Some(generic_decl) = &decl.generic_decl {
            if let Some(name) = generic_decl {
                write!(self.w, "<?=")?;
                self.type_name(name)?;
                write!(self.w, ">")?;
            } else {
                write!(self.w, "<?>")?;
            }
        }
        self.parameter_list(&decl.parameters)?;
        write!(self.w, " -> (")?;
        for (ix, res) in decl.results.iter().enumerate() {
            if ix > 0 {
                write!(self.w, ", ")?;
            }
            self.identifier(&res.id)?;
            write!(self.w, ": ")?;
            self.type_spec(&res.spec)?;
        }
        write!(self.w, ")")?;
        Ok(())
    }

    fn parameter_list(&mut self, parameters: &[Parameter]) -> TractResult<()> {
        write!(self.w, "(")?;
        let num_parameters = parameters.len();
        for (ix, param) in parameters.iter().enumerate() {
            if self.with_doc
                && let Some(doc) = &param.doc
            {
                write!(self.w, "\n    # {doc}")?;
            }
            write!(self.w, "\n    ")?;
            self.identifier(&param.id)?;
            write!(self.w, ": ")?;
            self.type_spec(&param.spec)?;
            if let Some(lit) = &param.lit {
                write!(self.w, " = ")?;
                self.literal(lit)?;
            }
            if ix < num_parameters - 1 {
                write!(self.w, ",")?;
            }
        }
        write!(self.w, "\n)")?;
        Ok(())
    }

    fn type_name(&mut self, name: &TypeName) -> TractResult<()> {
        let s = match name {
            TypeName::Integer => "integer",
            TypeName::Scalar => "scalar",
            TypeName::Logical => "logical",
            TypeName::String => "string",
            #[cfg(feature = "complex")]
            TypeName::Complex => "complex",
            TypeName::Any => "?",
        };
        write!(self.w, "{s}")?;
        Ok(())
    }

    fn type_spec(&mut self, spec: &TypeSpec) -> TractResult<()> {
        match spec {
            TypeSpec::Array(t) => {
                self.type_spec(t)?;
                write!(self.w, "[]")?;
            }
            TypeSpec::Single(s) => self.type_name(s)?,
            TypeSpec::Tensor(t) => {
                write!(self.w, "tensor<")?;
                self.type_name(t)?;
                write!(self.w, ">")?;
            }
            TypeSpec::Tuple(types) => {
                write!(self.w, "(")?;
                comma_loop!(self, type_spec, types);
                write!(self.w, ")")?;
            }
        }
        Ok(())
    }

    fn literal(&mut self, lit: &Literal) -> TractResult<()> {
        match lit {
            Literal::Array(lits) => {
                write!(self.w, "[")?;
                comma_loop!(self, literal, lits);
                write!(self.w, "]")?;
            }
            Literal::Logical(b) => write!(self.w, "{}", if *b { "true" } else { "false" })?,
            Literal::Numeric(num) => write!(self.w, "{num}")?,
            Literal::String(s) => write!(self.w, "{s:?}")?,
            Literal::Tuple(lits) => {
                write!(self.w, "(")?;
                comma_loop!(self, literal, lits);
                write!(self.w, ")")?;
            }
        }
        Ok(())
    }

    fn graph_def(&mut self, def: &GraphDef) -> TractResult<()> {
        write!(self.w, "graph ")?;
        self.identifier(&def.id)?;
        write!(self.w, "(")?;
        for (ix, id) in def.parameters.iter().enumerate() {
            if ix > 0 {
                write!(self.w, ", ")?;
            }
            self.identifier(id)?;
        }
        write!(self.w, ") -> (")?;
        for (ix, id) in def.results.iter().enumerate() {
            if ix > 0 {
                write!(self.w, ", ")?;
            }
            self.identifier(id)?;
        }
        writeln!(self.w, ") {{")?;
        for assignment in &def.body {
            self.assignment(assignment)?;
        }
        writeln!(self.w, "}}")?;
        Ok(())
    }

    fn assignment(&mut self, assignment: &Assignment) -> TractResult<()> {
        write!(self.w, "  ")?;
        self.lvalue(&assignment.left)?;
        write!(self.w, " = ")?;
        self.rvalue(&assignment.right)?;
        writeln!(self.w, ";")?;
        Ok(())
    }

    fn lvalue(&mut self, left: &LValue) -> TractResult<()> {
        match left {
            LValue::Identifier(s) => self.identifier(s)?,
            LValue::Tuple(s) => {
                write!(self.w, "( ")?;
                comma_loop!(self, lvalue, s);
                write!(self.w, " )")?;
            }
            LValue::Array(s) => {
                write!(self.w, "[ ")?;
                comma_loop!(self, lvalue, s);
                write!(self.w, " ]")?;
            }
        }
        Ok(())
    }

    pub fn rvalue(&mut self, rv: &RValue) -> TractResult<()> {
        match rv {
            RValue::Array(vals) => {
                write!(self.w, "[")?;
                comma_loop!(self, rvalue, vals);
                write!(self.w, "]")?;
            }
            RValue::Binary(left, op, right) => {
                write!(self.w, "(")?;
                self.rvalue(left)?;
                write!(self.w, " {op} ")?;
                self.rvalue(right)?;
                write!(self.w, ")")?;
            }
            RValue::Comprehension(comp) => self.comprehension(comp)?,
            RValue::Identifier(id) => self.identifier(id)?,
            RValue::IfThenElse(ifte) => {
                self.rvalue(&ifte.then)?;
                write!(self.w, " if ")?;
                self.rvalue(&ifte.cond)?;
                write!(self.w, " else ")?;
                self.rvalue(&ifte.otherwise)?;
            }
            RValue::Invocation(inv) => self.invocation(inv)?,
            RValue::Literal(lit) => self.literal(lit)?,
            RValue::Subscript(left, s) => {
                self.rvalue(left)?;
                write!(self.w, "[")?;
                match s.as_ref() {
                    Subscript::Single(s) => self.rvalue(s)?,
                    Subscript::Range(a, b) => {
                        if let Some(it) = a {
                            self.rvalue(it)?;
                        }
                        write!(self.w, ":")?;
                        if let Some(it) = b {
                            self.rvalue(it)?;
                        }
                    }
                }
                write!(self.w, "]")?;
            }
            RValue::Tuple(vals) => {
                write!(self.w, "(")?;
                comma_loop!(self, rvalue, vals);
                write!(self.w, ")")?;
            }
            RValue::Unary(op, rv) => {
                write!(self.w, "{op}")?;
                self.rvalue(rv)?;
            }
        }
        Ok(())
    }

    fn invocation(&mut self, inv: &Invocation) -> TractResult<()> {
        self.identifier(&inv.id)?;
        if let Some(tn) = &inv.generic_type_name {
            write!(self.w, "<")?;
            self.type_name(tn)?;
            write!(self.w, ">")?;
        }
        write!(self.w, "(")?;
        for (ix, arg) in inv.arguments.iter().enumerate() {
            if ix > 0 {
                write!(self.w, ", ")?;
            }
            if let Some(n) = &arg.id {
                self.identifier(n)?;
                write!(self.w, " = ")?;
            }
            self.rvalue(&arg.rvalue)?;
        }
        write!(self.w, ")")?;
        Ok(())
    }

    fn comprehension(&mut self, comp: &Comprehension) -> TractResult<()> {
        write!(self.w, "[ for")?;
        for iter in &comp.loop_iters {
            self.identifier(&iter.0)?;
            write!(self.w, " in ")?;
            self.rvalue(&iter.1)?;
        }
        if let Some(filter) = &comp.filter {
            write!(self.w, " if ")?;
            self.rvalue(filter)?;
        }
        write!(self.w, " yield ")?;
        self.rvalue(&comp.yields)?;
        write!(self.w, "]")?;
        Ok(())
    }

    fn identifier(&mut self, id: &Identifier) -> TractResult<()> {
        write_identifier(&mut self.w, id, self.nnef.allow_extended_identifier_syntax, false)
    }
}

pub fn write_identifier(
    w: &mut dyn Write,
    id: &Identifier,
    allow_extended_identifier_syntax: bool,
    force_double_quotes: bool,
) -> TractResult<()> {
    if id.0.len() == 0 {
        return Ok(());
    }
    let first = id.0.chars().next().unwrap();
    let force_double_quotes = if force_double_quotes { "\"" } else { "" };
    if (first.is_alphabetic() || first == '_')
        && id.0.chars().all(|c| c.is_alphanumeric() || c == '_')
    {
        write!(w, "{force_double_quotes}{}{force_double_quotes}", id.0)?;
    } else if allow_extended_identifier_syntax {
        write!(w, "i\"{}\"", id.0.replace('\\', "\\\\").replace('\"', "\\\""))?;
    } else {
        write!(w, "{force_double_quotes}")?;
        if !(first.is_alphabetic() || first == '_') {
            write!(w, "_")?;
        }
        for c in id.0.chars() {
            if c.is_alphanumeric() {
                write!(w, "{c}")?;
            } else {
                write!(w, "_")?;
            }
        }
        write!(w, "{force_double_quotes}")?;
    }
    Ok(())
}


================================================
FILE: nnef/src/ast/dump_doc.rs
================================================
use crate::ast::dump::Dumper;
use crate::ast::*;
use std::path::Path;
use tract_core::internal::*;

pub struct DocDumper<'a> {
    w: &'a mut dyn std::io::Write,
}

impl DocDumper<'_> {
    pub fn new(w: &mut dyn std::io::Write) -> DocDumper<'_> {
        DocDumper { w }
    }

    pub fn registry(&mut self, registry: &Registry) -> TractResult<()> {
        // Write registry docstrings.
        for d in registry.docstrings.iter().flatten() {
            writeln!(self.w, "# {d}")?;
        }
        writeln!(self.w)?;
        // Generate and write unit element wise op.
        for unit_el_wise_op in registry.unit_element_wise_ops.iter() {
            // we are assuming function names will not exhibit crazy node name weirdness, so we can
            // dispense with escaping
            writeln!(
                self.w,
                "fragment {}( x: tensor<scalar> ) -> (y: tensor<scalar>);",
                &unit_el_wise_op.0.0
            )?;
        }
        writeln!(self.w)?;

        // Generate and write element wise op.
        for el_wise_op in registry.element_wise_ops.iter() {
            let fragment_decl = FragmentDecl {
                id: el_wise_op.0.clone(),
                generic_decl: None,
                parameters: el_wise_op.3.clone(),
                results: vec![Result_ { id: "output".into(), spec: TypeName::Any.tensor() }],
            };
            Dumper::new(&Nnef::default(), self.w).with_doc().fragment_decl(&fragment_decl)?;
        }
        // Generate and write Primitive declarations.
        for primitive in registry.primitives.values().sorted_by_key(|v| &v.decl.id) {
            primitive.docstrings.iter().flatten().try_for_each(|d| writeln!(self.w, "# {d}"))?;

            Dumper::new(&Nnef::default(), self.w).with_doc().fragment_decl(&primitive.decl)?;
            writeln!(self.w, ";\n")?;
        }

        // Generate and write fragment declarations
        Dumper::new(&Nnef::default(), self.w)
            .with_doc()
            .fragments(registry.fragments.values().cloned().collect::<Vec<_>>().as_slice())?;

        Ok(())
    }

    pub fn registry_to_path(path: impl AsRef<Path>, registry: &Registry) -> TractResult<()> {
        let mut file = std::fs::File::create(path.as_ref())
            .with_context(|| anyhow!("Error while creating file at path: {:?}", path.as_ref()))?;
        DocDumper::new(&mut file).registry(registry)
    }

    pub fn to_directory(path: impl AsRef<Path>, nnef: &Nnef) -> TractResult<()> {
        for registry in nnef.registries.iter() {
            let registry_file = path.as_ref().join(format!("{}.nnef", registry.id.0));
            let mut file = std::fs::File::create(&registry_file).with_context(|| {
                anyhow!("Error while creating file at path: {:?}", registry_file)
            })?;
            DocDumper::new(&mut file).registry(registry)?;
        }
        Ok(())
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use temp_dir::TempDir;

    #[test]
    fn doc_example() -> TractResult<()> {
        let d = TempDir::new()?;
        let nnef = crate::nnef().with_tract_core().with_tract_resource();
        DocDumper::to_directory(d.path(), &nnef)?;
        Ok(())
    }

    #[test]
    fn doc_registry() -> TractResult<()> {
        let mut registry = Registry::new("test_doc")
            .with_doc("test_doc registry gather all the needed primitives")
            .with_doc("to test the documentation dumper");
        registry.register_primitive(
            "tract_primitive",
            &[TypeName::Integer.tensor().named("input")],
            &[("output", TypeName::Scalar.tensor())],
            |_, _| panic!("No deserialization needed"),
        );
        let mut docbytes = vec![];
        let mut dumper = DocDumper::new(&mut docbytes);
        dumper.registry(&registry)?;
        let docstring = String::from_utf8(docbytes)?;
        assert_eq!(
            docstring,
            r#"# test_doc registry gather all the needed primitives
# to test the documentation dumper


fragment tract_primitive(
    input: tensor<integer>
) -> (output: tensor<scalar>);

"#
        );
        Ok(())
    }
}


================================================
FILE: nnef/src/ast/parse.rs
================================================
use nom_language::error::{VerboseError, convert_error};
use tract_core::internal::*;

use nom::branch::alt;
use nom::combinator::map;
use nom::{Finish, IResult, Parser};
use nom::{bytes::complete::*, character::complete::*, combinator::*, multi::*, sequence::*};

use crate::ast::*;

type R<'i, O> = IResult<&'i str, O, VerboseError<&'i str>>;

pub(super) fn translate_error(e: nom::Err<VerboseError<&str>>) -> TractError {
    format_err!("{}", e)
}

#[inline(never)]
pub fn unwrap_parse<'s, P, O>(input: &'s str, parser: P) -> TractResult<O>
where
    P: Parser<&'s str, Output = O, Error = VerboseError<&'s str>>,
{
    all_consuming(parser)
        .parse(input)
        .finish()
        .map(|(_, p)| p)
        .map_err(|e| anyhow!(convert_error(input, e)))
}

pub fn parse_document(doc: &str) -> TractResult<Document> {
    unwrap_parse(doc, document)
}

#[inline(never)]
pub fn parse_fragments(doc: &str) -> TractResult<Vec<FragmentDef>> {
    unwrap_parse(doc, fragments)
}

#[inline(never)]
pub fn parse_fragment_decl(doc: &str) -> TractResult<FragmentDecl> {
    unwrap_parse(doc, fragment_decl)
}

#[inline(never)]
pub fn parse_parameters(doc: &str) -> TractResult<Vec<Parameter>> {
    unwrap_parse(doc, parameter_list)
}

#[inline(never)]
pub fn parse_assignments(doc: &str) -> TractResult<Vec<Assignment>> {
    unwrap_parse(doc, many1(assignment))
}

// <document> ::= <version> <extension>* <fragmentdefinition>* <graph-definition>
fn document(i: &str) -> R<'_, Document> {
    map(
        (version, many0(extension), fragments, graph_def),
        |(version, extension, fragments, graph_def)| Document {
            version,
            extension,
            fragments,
            graph_def,
        },
    )
    .parse(i)
}

fn fragments(i: &str) -> R<'_, Vec<FragmentDef>> {
    many0(fragment_def).parse(i)
}

// <version> ::= "version" <numeric-literal> ";"

fn version(i: &str) -> R<'_, NumericLiteral> {
    preceded(stag("version"), cut(terminated(numeric_literal, stag(";")))).parse(i)
}

// NNEF spec: <extension> ::= "extension" <identifier>+ ";"
// tract accepts: <extension> ::= "extension" <identifier> <anything-but-;>";"
fn extension(i: &str) -> R<'_, (Identifier, String)> {
    delimited(
        stag("extension"),
        pair(spaced(identifier), map(take_until(";"), |s: &str| s.to_string())),
        stag(";"),
    )
    .parse(i)
}

// FRAGMENT

// <fragment-definition> ::= <fragment-declaration> (<body> | ";")
fn fragment_def(i: &str) -> R<'_, FragmentDef> {
    spaced(map(
        pair(fragment_decl, alt((map(body, Some), map(stag(";"), |_| None)))),
        |(decl, body)| FragmentDef { decl, body },
    ))
    .parse(i)
}

// <fragment-declaration> ::= "fragment" <identifier> [<generic-declaration>] "(" <parameter-list> ")" "->" "(" <result-list> ")"
fn fragment_decl(i: &str) -> R<'_, FragmentDecl> {
    preceded(stag("fragment"), cut(commited_fragment_decl)).parse(i)
}

fn commited_fragment_decl(i: &str) -> R<'_, FragmentDecl> {
    let (i, id) = identifier(i)?;
    let (i, generic_decl) = opt(generic_decl).parse(i)?;
    let (i, _) = stag("(").parse(i)?;
    let (i, parameters) = cut(parameter_list).parse(i)?;
    let (i, _) = stag(")").parse(i)?;
    let (i, _) = stag("->").parse(i)?;
    let (i, _) = stag("(").parse(i)?;
    let (i, results) = cut(result_list).parse(i)?;
    let (i, _) = stag(")").parse(i)?;
    Ok((i, FragmentDecl { id, parameters, results, generic_decl }))
}

// <generic-declaration> ::= "<" "?" ["=" <type-name>] ">"
fn generic_decl(i: &str) -> R<'_, Option<TypeName>> {
    let (i, _) = stag("<").parse(i)?;
    let (i, _) = stag("?").parse(i)?;
    let (i, name) = opt(preceded(stag("="), type_name)).parse(i)?;
    let (i, _) = stag(">").parse(i)?;
    Ok((i, name))
}

// <parameter-list> ::= <parameter> ("," <parameter>)*
fn parameter_list(i: &str) -> R<'_, Vec<Parameter>> {
    separated_list0(stag(","), parameter).parse(i)
}

// <result-list> ::= <result> ("," <result>)*
fn result_list(i: &str) -> R<'_, Vec<Result_>> {
    separated_list0(stag(","), result).parse(i)
}

// <parameter> ::= <identifier> ":" <type-spec> ["=" <literal-expr>]
fn parameter(i: &str) -> R<'_, Parameter> {
    map(
        pair(
            separated_pair(identifier, stag(":"), cut(type_spec)),
            opt(preceded(stag("="), literal_expr)),
        ),
        |((id, spec), lit)| Parameter { id, spec, lit, doc: None },
    )
    .parse(i)
}

// <result> ::= <identifier> ":" <type-spec>
fn result(i: &str) -> R<'_, Result_> {
    map(separated_pair(identifier, stag(":"), cut(type_spec)), |(id, spec)| Result_ { id, spec })
        .parse(i)
}

fn literal_expr(i: &str) -> R<'_, Literal> {
    spaced(alt((
        literal,
        map(delimited(stag("["), separated_list0(stag(","), literal), stag("]")), Literal::Array),
        map(delimited(stag("("), separated_list0(stag(","), literal), stag(")")), Literal::Tuple),
    )))
    .parse(i)
}

// <type-spec> ::= <type-name> | <tensor-type-spec> | <array-type-spec> | <tuple-type-spec>
fn type_spec(i: &str) -> R<'_, TypeSpec> {
    fn non_array_type(i: &str) -> R<'_, TypeSpec> {
        alt((tuple_type_spec, map(type_name, TypeSpec::Single), tensor_type_spec)).parse(i)
    }
    alt((
        (map(terminated(non_array_type, pair(stag("["), stag("]"))), |t| {
            TypeSpec::Array(Box::new(t))
        })),
        non_array_type,
    ))
    .parse(i)
}

// <type-name> ::= "integer" | "scalar" | "logical" | "string" | "?"
fn type_name(i: &str) -> R<'_, TypeName> {
    spaced(alt((
        map(tag("integer"), |_| TypeName::Integer),
        map(tag("scalar"), |_| TypeName::Scalar),
        map(tag("logical"), |_| TypeName::Logical),
        map(tag("string"), |_| TypeName::String),
        #[cfg(feature = "complex")]
        map(tag("complex"), |_| TypeName::Complex),
        map(tag("?"), |_| TypeName::Any),
    )))
    .parse(i)
}

// <tensor-type-spec> ::= "tensor" "<" [<type-name>] ">"
fn tensor_type_spec(i: &str) -> R<'_, TypeSpec> {
    map(delimited(pair(stag("tensor"), stag("<")), type_name, stag(">")), TypeSpec::Tensor).parse(i)
}

// <tuple-type-spec> ::= "(" <type-spec> ("," <type-spec>)+ ")"
fn tuple_type_spec(i: &str) -> R<'_, TypeSpec> {
    map(delimited(stag("("), separated_list0(stag(","), type_spec), stag(")")), TypeSpec::Tuple)
        .parse(i)
}

// GRAPH

// <graph-definition> ::= <graph-declaration> <body>
// <graph-declaration> ::= "graph" <identifier> "(" <identifier-list> ")" "->" "(" <identifier-list> ")"
// <identifier-list> ::= <identifier> ("," <identifier>)*
fn graph_def(i: &str) -> R<'_, GraphDef> {
    let (i, _) = stag("graph").parse(i)?;
    let (i, id) = identifier(i)?;
    let (i, _) = stag("(").parse(i)?;
    let (i, parameters) = separated_list0(stag(","), identifier).parse(i)?;
    let (i, _) = stag(")").parse(i)?;
    let (i, _) = stag("->").parse(i)?;
    let (i, _) = stag("(").parse(i)?;
    let (i, results) = separated_list0(stag(","), identifier).parse(i)?;
    let (i, _) = stag(")").parse(i)?;
    let (i, body) = spaced(body).parse(i)?;
    Ok((i, GraphDef { id, parameters, results, body }))
}

// BODY

// <body> ::= "{" <assignment>+ "}"
fn body(i: &str) -> R<'_, Vec<Assignment>> {
    delimited(stag("{"), many0(assignment), stag("}")).parse(i)
}

// <assignment> ::= <lvalue-expr> "=" <rvalue-expr> ";"
fn assignment(i: &str) -> R<'_, Assignment> {
    spaced(terminated(
        map(separated_pair(lvalue, stag("="), rvalue), |(left, right)| Assignment { left, right }),
        stag(";"),
    ))
    .parse(i)
}

// <lvalue-expr> ::= <identifier> | <array-lvalue-expr> | <tuple-lvalue-expr>
// <array-lvalue-expr> ::= "[" [<lvalue-expr> ("," <lvalue-expr>)* ] "]"
// <tuple-lvalue-expr> ::= "(" <lvalue-expr> ("," <lvalue-expr>)+ ")" | <lvalue-expr> ("," <lvalue-expr>)+
fn lvalue(i: &str) -> R<'_, LValue> {
    fn inner_lvalue(i: &str) -> R<'_, LValue> {
        alt((
            map(
                delimited(stag("["), separated_list0(stag(","), inner_lvalue), stag("]")),
                LValue::Array,
            ),
            map(
                delimited(stag("("), separated_list0(stag(","), inner_lvalue), stag(")")),
                LValue::Tuple,
            ),
            map(spaced(identifier), LValue::Identifier),
        ))
        .parse(i)
    }

    map(separated_list0(stag(","), inner_lvalue), |mut iv| {
        if iv.len() == 1 { iv.remove(0) } else { LValue::Tuple(iv) }
    })
    .parse(i)
}

// <invocation> ::= <identifier> ["<" <type-name> ">"] "(" <argument-list> ")"
fn invocation(i: &str) -> R<'_, Invocation> {
    let (i, id) = spaced(identifier).parse(i)?;
    let (i, generic_type_name) = opt(delimited(stag("<"), type_name, stag(">"))).parse(i)?;
    let (i, _) = stag("(").parse(i)?;
    let (i, arguments) = argument_list.parse(i)?;
    let (i, _) = stag(")").parse(i)?;
    Ok((i, Invocation { id, generic_type_name, arguments }))
}

// <argument-list> ::= <argument> ("," <argument>)*
fn argument_list(i: &str) -> R<'_, Vec<Argument>> {
    separated_list0(stag(","), argument).parse(i)
}

// <argument> ::= <rvalue-expr> | <identifier> "=" <rvalue-expr>
fn argument(i: &str) -> R<'_, Argument> {
    spaced(map(pair(opt(terminated(identifier, stag("="))), rvalue), |(id, rvalue)| Argument {
        id,
        rvalue,
    }))
    .parse(i)
}

//<rvalue-expr> ::= <identifier> | <literal> | <binary-expr> | <unary-expr> | <paren-expr>
//                  | <array-rvalue-expr> | <tuple-rvalue-expr> | <subscript-expr> | <if-else-expr>
//                  | <comprehension-expr> | <builtin-expr> | <invocation>
fn rvalue(i: &str) -> R<'_, RValue> {
    fn atom(i: &str) -> R<'_, RValue> {
        spaced(alt((
            map(invocation, RValue::Invocation),
            map(literal, RValue::Literal),
            map(identifier, RValue::Identifier),
            map(pair(spaced(recognize(one_of("+-!"))), rvalue), |(op, rv)| {
                RValue::Unary(op.into(), Box::new(rv))
            }),
            map(delimited(tag("("), separated_list0(stag(","), rvalue), tag(")")), |mut rvs| {
                if rvs.len() == 1 { rvs.remove(0) } else { RValue::Tuple(rvs) }
            }),
            map(comprehension_expr, |c| RValue::Comprehension(Box::new(c))),
            map(delimited(tag("["), separated_list0(stag(","), rvalue), tag("]")), |rvs| {
                RValue::Array(rvs)
            }),
        )))
        .parse(i)
    }
    macro_rules! bin {
        ($name:ident, $operand: ident, $operator: expr) => {
            fn $name(i: &str) -> R<'_, RValue> {
                let (i, init) = $operand(i)?;
                fold_many0(
                    pair($operator, $operand),
                    move || init.clone(),
                    |left, (op, right)| {
                        RValue::Binary(Box::new(left), op.to_string(), Box::new(right))
                    },
                )
                .parse(i)
            }
        };
    }

    // <subscript-expr> ::= <rvalue-expr> "[" (<rvalue-expr> | [<rvalue-expr>] ":" [<rvalue-expr>]) "]"
    fn sub(i: &str) -> R<'_, RValue> {
        alt((
            map(
                pair(
                    atom,
                    delimited(
                        stag("["),
                        alt((
                            map(separated_pair(opt(rvalue), stag(":"), opt(rvalue)), |(a, b)| {
                                Subscript::Range(a, b)
                            }),
                            map(rvalue, Subscript::Single),
                        )),
                        stag("]"),
                    ),
                ),
                |(rv, range)| RValue::Subscript(Box::new(rv), Box::new(range)),
            ),
            atom,
        ))
        .parse(i)
    }

    bin!(exp, sub, tag("^"));
    bin!(mul, exp, one_of("*/"));
    bin!(add, mul, one_of("+-"));
    bin!(comp, add, alt((tag("=="), tag("!="), tag("<"), tag(">"), tag("<="), tag(">="))));
    bin!(boolean, comp, alt((tag("||"), tag("&&"))));
    bin!(in_for, boolean, tag("in"));

    // <if-else-expr> ::= <rvalue-expr> "if" <rvalue-expr> "else" <rvalue-expr>
    fn ite(i: &str) -> R<'_, RValue> {
        let (i, leftmost) = in_for(i)?;
        let (i, _) = space_and_comments(i)?;
        if i.starts_with("if") {
            let (i, _) = stag("if").parse(i)?;
            let (i, cond) = in_for(i)?;
            let (i, _) = stag("else").parse(i)?;
            let (i, otherwise) = in_for(i)?;
            Ok((i, RValue::IfThenElse(Box::new(IfThenElse { cond, then: leftmost, otherwise }))))
        } else {
            Ok((i, leftmost))
        }
    }

    ite(i)
}

// <comprehension-expr> ::= "[" "for" <loop-iter-list> ["if" <rvalue-expr>] "yield" <rvalue-expr> "]"
fn comprehension_expr(i: &str) -> R<'_, Comprehension> {
    delimited(
        pair(stag("["), stag("for")),
        map(separated_pair(loop_iters, stag("yield"), rvalue), |(loop_iters, yields)| {
            Comprehension { loop_iters, filter: None, yields }
        }),
        stag("]"),
    )
    .parse(i)
}

// <loop-iter> ::= <identifier> "in" <rvalue-expr>
// <loop-iter-list> ::= <loop-iter> ("," <loop-iter>)*
fn loop_iters(i: &str) -> R<'_, Vec<(Identifier, RValue)>> {
    separated_list0(stag(","), separated_pair(identifier, stag("in"), rvalue)).parse(i)
}

// TERMINALS

// identifier: identifiers must consist of the following ASCII characters: _, [a-z], [A-Z], [0-9].
// The identifier must not start with a digit.
pub(super) fn identifier(i: &str) -> R<'_, Identifier> {
    alt((escaped_identifier, direct_identifier)).parse(i)
}

pub(super) fn direct_identifier(i: &str) -> R<'_, Identifier> {
    map(
        recognize(pair(alt((alpha1, tag("_"))), many0(alt((alphanumeric1, tag("_")))))),
        Identifier::from,
    )
    .parse(i)
}

pub(super) fn escaped_identifier(i: &str) -> R<'_, Identifier> {
    map(preceded(tag("i"), string_literal), Identifier).parse(i)
}

// <literal> ::= <numeric-literal> | <string-literal> | <logical-literal>
fn literal(i: &str) -> R<'_, Literal> {
    spaced(alt((
        map(numeric_literal, Literal::Numeric),
        map(string_literal, Literal::String),
        map(logical_literal, Literal::Logical),
    )))
    .parse(i)
}

pub(super) fn numeric_literal(i: &str) -> R<'_, String> {
    fn exp_part(i: &str) -> R<'_, &str> {
        recognize((one_of("eE"), opt(one_of("+-")), digit1)).parse(i)
    }
    fn frac_part(i: &str) -> R<'_, &str> {
        recognize((tag("."), digit0)).parse(i)
    }
    spaced(map(
        recognize((opt(tag("-")), alt((digit1, tag("inf"))), opt(frac_part), opt(exp_part))),
        |s: &str| s.to_owned(),
    ))
    .parse(i)
}

fn string_literal(i: &str) -> R<'_, String> {
    fn inner(i: &str) -> R<'_, String> {
        map(
            many0(alt((
                preceded(tag("\\"), nom::character::complete::anychar),
                nom::character::complete::none_of("\\\"'"),
            ))),
            |v: Vec<char>| v.into_iter().collect(),
        )
        .parse(i)
    }
    map(alt((delimited(tag("'"), inner, tag("'")), delimited(tag("\""), inner, tag("\"")))), |s| s)
        .parse(i)
}

pub(super) fn logical_literal(i: &str) -> R<'_, bool> {
    spaced(alt((map(tag("true"), |_| true), map(tag("false"), |_| false)))).parse(i)
}

// SPACES

fn space_and_comments(i: &str) -> R<'_, ()> {
    map(
        many0(alt((recognize(one_of(" \t\n\r")), recognize((tag("#"), many0(none_of("\r\n"))))))),
        |_| (),
    )
    .parse(i)
}

fn spaced<'s, O, F>(it: F) -> impl Parser<&'s str, Output = O, Error = VerboseError<&'s str>>
where
    F: Parser<&'s str, Output = O, Error = VerboseError<&'s str>>,
{
    delimited(space_and_comments, it, space_and_comments)
}

pub(super) fn stag<'s>(
    t: &'static str,
) -> impl Parser<&'s str, Output = &'s str, Error = VerboseError<&'s str>> {
    spaced(tag(t))
}

#[cfg(test)]
mod test {
    use super::*;
    use TypeName::*;
    use TypeSpec::*;

    fn p<'s, P, O, E>(parser: P, i: &'s str) -> O
    where
        O: std::fmt::Debug,
        P: Fn(&'s str) -> IResult<&'s str, O, E>,
        E: nom::error::ParseError<&'s str> + std::fmt::Debug,
    {
        let res = all_consuming(parser).parse(i).unwrap();
        res.1
    }

    fn param(s: impl Into<std::string::String>, t: TypeSpec) -> Parameter {
        Parameter { id: Identifier(s.into()), spec: t, lit: None, doc: None }
    }

    fn result(s: impl Into<std::string::String>, t: TypeSpec) -> Result_ {
        Result_ { id: Identifier(s.into()), spec: t }
    }

    #[test]
    fn test_type_spec() {
        assert_eq!(p(type_spec, "scalar"), Single(Scalar));
        assert_eq!(p(type_spec, "scalar[]"), Array(Box::new(Single(Scalar))));
        assert_eq!(p(type_spec, "tensor<scalar>[]"), Array(Box::new(Tensor(TypeName::Scalar))));
        assert_eq!(
            p(type_spec, "(scalar,scalar[],tensor<scalar>)"),
            Tuple(vec!(Single(Scalar), Array(Box::new(Single(Scalar))), Tensor(Scalar)))
        );
        assert_eq!(p(type_spec, "tensor<?>[]"), Array(Box::new(Tensor(TypeName::Any))));
        assert_eq!(p(type_spec, "scalar[ ]"), Array(Box::new(Single(Scalar))));
        assert_eq!(
            p(type_spec, " ( scalar , scalar [ ] , tensor < scalar > ) "),
            Tuple(vec!(Single(Scalar), Array(Box::new(Single(Scalar))), Tensor(Scalar)))
        );
        #[cfg(feature = "complex")]
        assert_eq!(p(type_spec, "tensor<complex>[]"), Array(Box::new(Tensor(TypeName::Complex))));
    }

    #[test]
    fn test_fragment_decl_fizz() {
        let parsed = p(
            fragment_decl,
            "fragment fizz<? = scalar>( shape: integer[] ) -> ( output: tensor<?> )",
        );
        assert_eq!(
            parsed,
            FragmentDecl {
                id: "fizz".into(),
                generic_decl: Some(Some(Scalar)),
                parameters: vec!(param("shape", Array(Box::new(Single(Integer)))),),
                results: vec!(result("output", Tensor(Any))),
            }
        );
    }

    #[test]
    fn test_fragment_decl_logarithmic_quantize() {
        let parsed = p(
            fragment_decl,
            "fragment logarithmic_quantize(x: tensor<scalar>, max: tensor<scalar>, bits: integer ) -> ( y: tensor<scalar> )",
        );
        assert_eq!(
            parsed,
            FragmentDecl {
                id: "logarithmic_quantize".into(),
                generic_decl: None,
                parameters: vec!(
                    param("x", Tensor(Scalar)),
                    param("max", Tensor(Scalar)),
                    param("bits", Single(Integer))
                ),
                results: vec!(result("y", Tensor(Scalar))),
            }
        );
    }

    #[test]
    fn test_fragment_decl_external() {
        p(
            fragment_decl,
            "fragment external<? = scalar>( shape: integer[] ) -> ( output: tensor<?> )",
        );
    }

    #[test]
    fn test_fragment_reshape() {
        p(
            fragments,
            "fragment reshape<?>( input: tensor<?>, shape: integer[], axis_start: integer = 0, axis_count: integer = -1 ) -> ( output: tensor<?> );",
        );
    }

    #[test]
    fn test_fragment_conv() {
        p(
            fragments,
            r#"
            fragment conv(
                input: tensor<scalar>,
                filter: tensor<scalar>,
                bias: tensor<scalar> = 0.0,
                border: string = 'constant',
                padding: (integer,integer)[] = [],
                stride: integer[] = [],
                dilation: integer[] = [],
                groups: integer = 1 )
            -> ( output: tensor<scalar> );
            "#,
        );
    }

    #[test]
    fn test_fragment_local_response_normalization() {
        p(
            fragments,
            r#"
            fragment local_response_normalization(
                input: tensor<scalar>,
                size: integer[],
                alpha: scalar = 1.0,
                beta: scalar = 0.5,
                bias: scalar = 1.0 )
            -> ( output: tensor<scalar> )
            {
                sigma = bias + alpha * box(sqr(input), size = size, normalize = true);
                output = input / (sigma ^ beta);
            }
            "#,
        );
    }

    #[test]
    fn test_batch_normalization() {
        p(
            fragments,
            r#"
            fragment batch_normalization( input: tensor<scalar>, mean: tensor<scalar>, variance: tensor<scalar>, offset: tensor<scalar>, scale: tensor<scalar>, epsilon: scalar )
            -> ( output: tensor<scalar> )
            {
                output = offset + scale * (input - mean) / sqrt(variance + epsilon);
            }
            "#,
        );
    }

    #[test]
    fn test_avg_roi_align() {
        p(
            fragments,
            r#"
                fragment avg_roi_align(
                    input: tensor<scalar>,
                    rois: tensor<scalar>,
                    batch_index: tensor<integer>,
                    output_size: integer[],
                    sampling_rate: integer[],
                    resize_method: string = 'symmetric' )
                -> ( output: tensor<scalar> )
                {
                    size = [for i in range_of(output_size) yield output_size[i] * sampling_rate[i]];
                    resized = roi_resample(input, rois, batch_index, output_size = size,
                                         method = resize_method);
                    output = avg_pool(resized, size = sampling_rate, stride = sampling_rate);
                }
            "#,
        );
    }

    #[test]
    fn test_min_max_linear_quantize() {
        p(
            fragments,
            r#"
                fragment min_max_linear_quantize(
                    x: tensor<scalar>,
                    min: tensor<scalar>,
                    max: tensor<scalar>,
                    bits: integer,
                    signed: logical,
                    symmetric: logical )
                -> ( y: tensor<scalar> )
                {
                    r = scalar(2 ^ bits - 1 - integer(signed && symmetric));
                    z = clamp(x, min, max);
                    p = scalar(2 ^ (bits - 1) - integer(symmetric) if signed else 0);
                    q = round((z - min) / (max - min) * r) - p;
                    y = (q + p) / r * (max - min) + min;
}
            "#,
        );
    }

    #[test]
    fn test_numeric() {
        p(numeric_literal, "12.0");
    }

    #[test]
    fn test_string() {
        assert_eq!(p(string_literal, r#""""#), "");
        assert_eq!(p(string_literal, r#""foo""#), "foo");
        assert_eq!(p(string_literal, r#"''"#), "");
        assert_eq!(p(string_literal, r#"'foo'"#), "foo");

        assert_eq!(p(string_literal, r"'f\oo'"), "foo");
        assert_eq!(p(string_literal, r"'f\'oo'"), "f'oo");
        assert_eq!(p(string_literal, r#"'f\"oo'"#), "f\"oo");
    }

    #[test]
    fn test_identifier() {
        p(identifier, "foo");
        assert!(identifier("1").is_err());
        assert!(identifier("1foo").is_err());
    }

    #[test]
    fn test_spacing() {
        p(space_and_comments, "");
        p(space_and_comments, "\n");
        p(space_and_comments, "#comment\n");
        p(space_and_comments, "#boum");
    }

    #[test]
    fn test_spaced() {
        assert!(spaced(identifier).parse("foo").is_ok());
        assert!(spaced(identifier).parse(" foo ").is_ok());
        assert!(many1(spaced(identifier)).parse(" foo bar ").is_ok());
        assert_eq!(
            many1(spaced(identifier)).parse(" foo bar\n").unwrap().1,
            &[Identifier("foo".to_string()), Identifier("bar".to_string())]
        );
        assert_eq!(
            many1(spaced(identifier)).parse(" foo # bar\n").unwrap().1,
            &[Identifier("foo".to_string())]
        );
        assert_eq!(
            many1(spaced(identifier)).parse(" foo # bar\nbaz").unwrap().1,
            &[Identifier("foo".to_string()), Identifier("baz".to_string())]
        );
    }

    #[test]
    fn test_document() {
        assert!(document("version 1.0; graph foo() -> () {}").is_ok());
    }

    #[test]
    fn test_version() {
        p(version, "version 1.0;");
    }

    #[test]
    fn test_body() {
        p(body, "{}");
        p(body, "{foo=bar;}");
    }

    #[test]
    fn test_lvalue() {
        p(lvalue, "foo");
        p(lvalue, "foo,bar");
        p(lvalue, "foo , bar");
        p(lvalue, "(foo,bar)");
    }

    #[test]
    fn test_graph_def() {
        p(graph_def, "graph foo() -> () {}");
    }

    #[test]
    fn test_assignment() {
        p(assignment, "input = external(12);");
        p(assignment, "input = external(shape = [1, 3, 224, 224]);");
        p(assignment, "sigma = bias + alpha * box(sqr(input), size = size, normalize = true);");
        p(assignment, "output = offset + scale * (input - mean) / sqrt(variance + epsilon);");
        p(
            assignment,
            "size = [for i in range_of(output_size) yield output_size[i] * sampling_rate[i]];",
        );
        p(assignment, "r = scalar(2 ^ bits - 1 - integer(signed && symmetric));");
        p(
            assignment,
            "output, index = max_pool_with_index(input, size = size, border = border, padding = padding, stride = stride, dilation = dilation);",
        );
    }

    #[test]
    fn test_invocation() {
        p(invocation, "external(12)");
        p(invocation, "sqrt(var + eps)");
    }

    #[test]
    fn test_arguments() {
        p(argument, "2");
        p(argument, "12");
        p(argument, "shape = [1, 3, 224, 224]");
    }

    #[test]
    fn test_rvalue() {
        p(rvalue, "12");
        p(rvalue, "(0, 0)");
        p(rvalue, "x ^ 2.0");
        p(rvalue, "1+2");
        p(rvalue, "1+sqrt(var)");
        p(rvalue, "1+sqrt(var+eps)");
        p(rvalue, "1 + sqrt(var + eps)");
        p(rvalue, "[for i in range_of(output_size) yield output_size[i] * sampling_rate[i]]");
        p(rvalue, "scalar(2 ^ (bits - 1) - integer(symmetric) if signed else 0)");
    }

    #[test]
    fn test_comprehenion() {
        p(comprehension_expr, "[for i in range_of(output_size) yield output_size * sampling_rate]");
    }

    #[test]
    fn test_freeze() {
        p(
            document,
            r#"
version 1.0;

graph y( x, s, bias ) -> ( y ) {
  x = external<scalar>(shape = [1, 2, 1, 3]);
  s = external<scalar>(shape = [2]);
  bias = external<scalar>(shape = [2]);
  y = add(
        mul(
            mul(
                sub(
                    x,
                    mul(
                        0.33333334,
                        sum_reduce(
                            x,
                            axes = [0, 2, 3]
                        )
                    )
                ),
                rsqrt(
                    add(
                        0.00001,
                        mul(
                            0.33333334,
                            sum_reduce(
                                square(
                                    sub(
                                        x,
                                        mul(
                                            0.33333334,
                                            sum_reduce(
                                                x,
                                                axes = [0, 2, 3]
                                            )
                                        )
                                    )
                                ),
                                axes = [0, 2, 3]
                            )
                        )
                    )
                )
            ),
            unsqueeze(
                unsqueeze(
                    unsqueeze(
                        s,
                        axes = [0]
                    ),
                axes = [2]
                ),
            axes = [2]
            )
        ),
        unsqueeze(
            unsqueeze(
                unsqueeze(
                    bias,
                    axes = [0]
                ),
                axes = [2]
            ),
            axes = [2]
        )
    );
}

"#,
        );
    }

    #[test]
    fn test_fragments() {
        p(
            fragments,
            r#"
            fragment add( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );
            fragment sub( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );
            "#,
        );
    }
}


================================================
FILE: nnef/src/ast/quant.rs
================================================
use std::str::FromStr;

use nom::Parser;
use nom::branch::permutation;
use nom::character::complete::digit1;
use nom::combinator::{map_res, recognize};
use nom::sequence::{delimited, pair};
use nom_language::error::VerboseError;
use tract_core::internal::*;

use nom::branch::alt;
use nom::{IResult, combinator::all_consuming};
use nom::{bytes::complete::*, multi::*};
use nom::{combinator::opt, number::complete::float};

use crate::ast::*;

use super::dump::write_identifier;
use super::parse::{direct_identifier, escaped_identifier, logical_literal, stag, translate_error};

type R<'i, O> = IResult<&'i str, O, VerboseError<&'i str>>;

#[inline(never)]
pub fn parse_quantization(doc: &str) -> TractResult<Vec<(Identifier, QuantFormat)>> {
    all_consuming(many0(quantization)).parse(doc).map(|pair| pair.1).map_err(translate_error)
}

// <quantization> ::= "<identifier>": <qparam>
fn quantization(i: &str) -> R<'_, (Identifier, QuantFormat)> {
    let (i, _) = stag("").parse(i)?;
    let (i, id) =
        alt((delimited(tag("\""), direct_identifier, tag("\"")), escaped_identifier)).parse(i)?;
    let (i, _) = stag(":").parse(i)?;
    let (i, qp) = qparam(i)?;
    let (i, _) = stag(";").parse(i)?;
    Ok((i, (id, qp)))
}

fn integer_numeric<T: FromStr>(i: &str) -> R<'_, T> {
    map_res(recognize(pair(opt(tag("-")), digit1)), |s: &str| s.parse::<T>()).parse(i)
}

// <qparam> ::= "<identifier>": <qparam>
fn qparam(i: &str) -> R<'_, QuantFormat> {
    let (i, id) =
        nom::branch::alt((stag("linear_quantize"), stag("zero_point_linear_quantize"))).parse(i)?;
    let (i, _) = stag("(").parse(i)?;
    let (i, params, bits, signed) = match id {
        "linear_quantize" => {
            let (i, (bits, max, min)) =
                permutation((arg("bits", integer_numeric), arg("max", float), arg("min", float)))
                    .parse(i)?;

            (i, QParams::MinMax { min, max }, bits, true)
        }
        "zero_point_linear_quantize" => {
            let (i, (zero_point, scale, bits, signed, _)) = permutation((
                arg("zero_point", integer_numeric),
                arg("scale", float),
                arg("bits", integer_numeric),
                arg("signed", logical_literal),
                opt(arg("symmetric", logical_literal)),
            ))
            .parse(i)?;
            (i, QParams::ZpScale { zero_point, scale }, bits, signed)
        }
        _ => unreachable!(),
    };

    let (i, _) = stag(")").parse(i)?;
    Ok((i, QuantFormat::Linear { params, bits, signed }))
}
// <arg>(<id>, <f>) ::= <id> "=" <f> ","
fn arg<'s, T, F>(name: &'static str, f: F) -> impl Fn(&'s str) -> R<'s, T>
where
    F: Fn(&'s str) -> R<'s, T>,
{
    move |i: &str| {
        let (i, _) = stag(name).parse(i)?;
        let (i, _) = stag("=").parse(i)?;
        let (i, num) = f(i)?;
        let (i, _) = opt(stag(",")).parse(i)?;
        Ok((i, num))
    }
}

pub(crate) fn write_quant_format(
    w: &mut impl std::io::Write,
    name: &Identifier,
    format: QuantFormat,
    allow_extended_identifier_syntax: bool,
) -> TractResult<()> {
    write_identifier(w, name, allow_extended_identifier_syntax, true)?;
    match format {
        QuantFormat::Linear { params: QParams::ZpScale { zero_point, scale }, bits, signed } => {
            writeln!(
                w,
                ": zero_point_linear_quantize(zero_point = {zero_point}, scale = {scale:.9}, bits = {bits}, signed = {signed}, symmetric = {});",
                zero_point == 0
            )?
        }
        QuantFormat::Linear { params: QParams::MinMax { min, max }, bits, signed: _ } => {
            writeln!(w, ": linear_quantize(max = {max:.9}, min = {min:.9}, bits = {bits});")?
        }
    }
    Ok(())
}

#[cfg(test)]
mod test {
    use super::*;
    use nom::combinator::all_consuming;

    fn p<'s, P, O>(parser: P, i: &'s str) -> O
    where
        O: std::fmt::Debug,
        P: Parser<&'s str, Output = O, Error = VerboseError<&'s str>>,
    {
        let res = all_consuming(parser).parse(i).unwrap();
        res.1
    }

    #[test]
    fn test_arg() {
        assert_eq!(p(arg("arg", float), "arg = 2.35,"), 2.35);

        assert_eq!(p(arg("test", tag("a")), "test = a"), "a");
    }

    #[test]
    fn test_qparam() {
        assert_eq!(
            p(qparam, "linear_quantize(min = 0.5, max = 123.8, bits = 8)"),
            QuantFormat::Linear {
                params: QParams::MinMax { min: 0.5, max: 123.8 },
                bits: 8,
                signed: true
            }
        );
    }

    #[test]
    fn test_quantization() {
        assert_eq!(
            p(quantization, r#""tensor_name": linear_quantize(min = 0.5, max = 123.8, bits = 8);"#),
            (
                "tensor_name".into(),
                QuantFormat::Linear {
                    params: QParams::MinMax { min: 0.5, max: 123.8 },
                    bits: 8,
                    signed: true
                }
            )
        );
    }

    #[test]
    fn test_quant_file() {
        assert_eq!(
            p(
                many0(quantization),
                r#"
                   "tensor_name1": linear_quantize(min = 0.5, max = 123.8, bits = 8);
                   "tensor_name2": linear_quantize(max = 0.52, min = 123.82, bits = 82);
                   "tensor_name3": zero_point_linear_quantize(zero_point = 52, scale = 123.83, bits = 83, signed = true, symmetric = false);"#
            ),
            vec![
                (
                    Identifier("tensor_name1".to_string()),
                    QuantFormat::Linear {
                        params: QParams::MinMax { min: 0.5, max: 123.8 },
                        bits: 8,
                        signed: true
                    }
                ),
                (
                    Identifier("tensor_name2".to_string()),
                    QuantFormat::Linear {
                        params: QParams::MinMax { max: 0.52, min: 123.82 },
                        bits: 82,
                        signed: true
                    }
                ),
                (
                    Identifier("tensor_name3".to_string()),
                    QuantFormat::Linear {
                        params: QParams::ZpScale { zero_point: 52, scale: 123.83 },
                        bits: 83,
                        signed: true
                    }
                )
            ]
        );
    }

    #[test]
    fn test_quant_file_1() {
        assert_eq!(
            p(
                many0(quantization),
                r#"
                   i"tensor.name1": linear_quantize(min = 0.5, max = 123.8, bits = 8);
                   i"tensor/name2": linear_quantize(max = 0.52, min = 123.82, bits = 82);
                   "tensor_name3": zero_point_linear_quantize(zero_point = 52, scale = 123.83, bits = 83, signed = true, symmetric = false);"#
            ),
            vec![
                (
                    Identifier("tensor.name1".to_string()),
                    QuantFormat::Linear {
                        params: QParams::MinMax { min: 0.5, max: 123.8 },
                        bits: 8,
                        signed: true
                    }
                ),
                (
                    Identifier("tensor/name2".to_string()),
                    QuantFormat::Linear {
                        params: QParams::MinMax { max: 0.52, min: 123.82 },
                        bits: 82,
                        signed: true
                    }
                ),
                (
                    Identifier("tensor_name3".to_string()),
                    QuantFormat::Linear {
                        params: QParams::ZpScale { zero_point: 52, scale: 123.83 },
                        bits: 83,
                        signed: true
                    }
                )
            ]
        );
    }
}


================================================
FILE: nnef/src/ast.rs
================================================
use crate::internal::*;
use tract_itertools::Itertools;

pub mod dump;
pub mod dump_doc;
pub mod parse;
pub mod quant;

#[derive(Clone, Debug)]
pub struct ProtoModel {
    pub doc: Document,
    pub tensors: HashMap<Identifier, Arc<Tensor>>,
    pub quantization: Option<HashMap<Identifier, QuantFormat>>,
    pub resources: HashMap<String, Arc<dyn Resource>>,
}

impl ProtoModel {
    pub fn validate(&self) -> TractResult<()> {
        self.doc.validate()
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum QuantFormat {
    Linear { params: QParams, bits: i8, signed: bool },
}

impl QuantFormat {
    pub fn from_dt(datum_type: DatumType) -> Option<QuantFormat> {
        if let Some(params) = datum_type.qparams() {
            let quant_format = QuantFormat::Linear {
                params,
                bits: 8 * datum_type.size_of() as i8,
                signed: datum_type.is_signed(),
            };
            Some(quant_format)
        } else {
            None
        }
    }

    pub fn datum_type(&self) -> DatumType {
        match self {
            QuantFormat::Linear { params, bits, signed } => match (bits, signed) {
                (8, true) => DatumType::QI8(*params),
                (8, false) => DatumType::QU8(*params),
                (32, true) => DatumType::QI32(*params),
                (32, false) => DatumType::U32,
                _ => todo!(),
            },
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Document {
    pub version: NumericLiteral,
    pub extension: Vec<(Identifier, String)>,
    pub fragments: Vec<FragmentDef>,
    pub graph_def: GraphDef,
}

impl Document {
    pub fn validate(&self) -> TractResult<()> {
        for frag in &self.fragments {
            frag.validate()?;
        }
        Ok(())
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum TypeSpec {
    Single(TypeName),
    Tensor(TypeName),
    Array(Box<TypeSpec>),
    Tuple(Vec<TypeSpec>),
}

impl TypeSpec {
    pub fn array(self) -> TypeSpec {
        TypeSpec::Array(Box::new(self))
    }
    pub fn named(self, s: impl AsRef<str>) -> Parameter {
        Parameter { id: s.as_ref().into(), spec: self, lit: None, doc: None }
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TypeName {
    Integer,
    Scalar,
    #[cfg(feature = "complex")]
    Complex,
    Logical,
    String,
    Any,
}

impl TypeName {
    pub fn tensor(self) -> TypeSpec {
        TypeSpec::Tensor(self)
    }
    pub fn spec(self) -> TypeSpec {
        TypeSpec::Single(self)
    }
    pub fn array(self) -> TypeSpec {
        self.spec().array()
    }
    pub fn named(self, s: impl AsRef<str>) -> Parameter {
        self.spec().named(s)
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct GraphDef {
    pub id: Identifier,
    pub parameters: Vec<Identifier>,
    pub results: Vec<Identifier>,
    pub body: Vec<Assignment>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FragmentDef {
    pub decl: FragmentDecl,
    pub body: Option<Vec<Assignment>>,
}

impl FragmentDef {
    pub fn validate(&self) -> TractResult<()> {
        self.decl.validate().with_context(|| format!("Invalid fragment {:?}", self.decl.id))
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FragmentDecl {
    pub id: Identifier,
    pub generic_decl: Option<Option<TypeName>>,
    pub parameters: Vec<Parameter>,
    pub results: Vec<Result_>,
}

impl FragmentDecl {
    pub fn validate(&self) -> TractResult<()> {
        if let Some(dup) = self
            .parameters
            .iter()
            .map(|p| &p.id)
            .sorted()
            .chunk_by(|x| x.to_owned())
            .into_iter()
            .find_map(|(key, values)| if values.count() > 1 { Some(key) } else { None })
        {
            bail!("Duplicate parameter name found {:?}", dup);
        }
        if let Some(dup) = self
            .results
            .iter()
            .map(|p| &p.id)
            .sorted()
            .chunk_by(|x| x.to_owned())
            .into_iter()
            .find_map(|(key, values)| if values.count() > 1 { Some(key) } else { None })
        {
            bail!("Duplicate result name found {:?}", dup);
        }
        if let Some(dup) = self
            .parameters
            .iter()
            .map(|p| &p.id)
            .chain(self.results.iter().map(|p| &p.id))
            .sorted()
            .chunk_by(|x| x.to_owned())
            .into_iter()
            .find_map(|(key, values)| if values.count() > 1 { Some(key) } else { None })
        {
            bail!("Same name used as parameter and result {:?}", dup);
        }
        Ok(())
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Parameter {
    pub id: Identifier,
    pub spec: TypeSpec,
    pub lit: Option<Literal>,
    pub doc: Option<String>,
}

impl Parameter {
    pub fn default(self, lit: impl Into<Literal>) -> Parameter {
        Parameter { lit: Some(lit.into()), ..self }
    }

    pub fn doc(mut self, s: impl Into<String>) -> Parameter {
        self.doc = Some(s.into());
        self
    }
}

pub fn param(s: impl AsRef<str>, spec: TypeSpec) -> Parameter {
    Parameter { id: s.as_ref().into(), spec, lit: None, doc: None }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Result_ {
    pub id: Identifier,
    pub spec: TypeSpec,
}

impl<S: Into<String>> From<(S, TypeSpec)> for Result_ {
    fn from(v: (S, TypeSpec)) -> Result_ {
        Result_ { id: Identifier(v.0.into()), spec: v.1 }
    }
}

#[derive(Clone, Debug, PartialEq, Eq, Default, Ord, PartialOrd, Hash)]
pub struct Identifier(pub String);

impl From<&str> for Identifier {
    fn from(value: &str) -> Self {
        Identifier(value.to_string())
    }
}

impl AsRef<str> for Identifier {
    fn as_ref(&self) -> &str {
        &self.0
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Assignment {
    pub left: LValue,
    pub right: RValue,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum LValue {
    Identifier(Identifier),
    Array(Vec<LValue>),
    Tuple(Vec<LValue>),
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Invocation {
    pub id: Identifier,
    pub generic_type_name: Option<TypeName>,
    pub arguments: Vec<Argument>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Argument {
    pub id: Option<Identifier>,
    pub rvalue: RValue,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum RValue {
    Identifier(Identifier),
    Literal(Literal),
    Binary(Box<RValue>, String, Box<RValue>),
    Unary(String, Box<RValue>),
    Tuple(Vec<RValue>),
    Array(Vec<RValue>),
    Subscript(Box<RValue>, Box<Subscript>),
    Comprehension(Box<Comprehension>),
    IfThenElse(Box<IfThenElse>),
    Invocation(Invocation),
}

impl RValue {
    pub fn boxed(self) -> Box<RValue> {
        Box::new(self)
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Comprehension {
    pub loop_iters: Vec<(Identifier, RValue)>,
    pub filter: Option<RValue>,
    pub yields: RValue,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Subscript {
    Single(RValue),
    Range(Option<RValue>, Option<RValue>),
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct IfThenElse {
    pub cond: RValue,
    pub then: RValue,
    pub otherwise: RValue,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
    Numeric(NumericLiteral),
    String(StringLiteral),
    Logical(LogicalLiteral),
    Array(Vec<Literal>),
    Tuple(Vec<Literal>),
}

impl From<bool> for Literal {
    fn from(b: bool) -> Literal {
        Literal::Logical(b)
    }
}

impl From<i64> for Literal {
    fn from(i: i64) -> Literal {
        Literal::Numeric(i.to_string())
    }
}

impl From<f32> for Literal {
    fn from(f: f32) -> Literal {
        Literal::Numeric(format!("{f:?}"))
    }
}

impl<'a> From<&'a str> for Literal {
    fn from(s: &'a str) -> Literal {
        Literal::String(s.to_string())
    }
}

pub type NumericLiteral = String;
pub type StringLiteral = String;
pub type LogicalLiteral = bool;


================================================
FILE: nnef/src/deser.rs
================================================
use std::ops::ControlFlow;

use tract_core::num_traits::Zero;
use tract_core::tract_data::itertools::Itertools;

use crate::ast::*;
use crate::internal::*;

pub struct ModelBuilder<'a> {
    pub framework: &'a Nnef,
    pub registries: Vec<Identifier>,
    pub model: TypedModel,
    pub naming_scopes: Vec<Identifier>,
    pub scopes: Vec<HashMap<Identifier, Value>>,
    pub proto_model: &'a ProtoModel,
    pub symbols: Vec<Symbol>,
    allow_new_symbol: bool,
    #[allow(clippy::type_complexity)]
    pub wire_resolver:
        Option<Box<dyn FnMut(&str, &mut TypedModel) -> TractResult<Option<OutletId>> + 'a>>,
}

impl<'mb> ModelBuilder<'mb> {
    pub fn new(
        framework: &'mb Nnef,
        proto_model: &'mb ProtoModel,
        template: TypedModel,
    ) -> ModelBuilder<'mb> {
        ModelBuilder {
            registries: vec!["tract_nnef".into()],
            framework,
            model: template,
            naming_scopes: vec![],
            scopes: vec![],
            proto_model,
            symbols: vec![],
            allow_new_symbol: false,
            wire_resolver: None,
        }
    }

    pub fn allowing_new_symbols<R>(&mut self, closure: impl Fn(&mut Self) -> R) -> R {
        self.allow_new_symbol = true;
        let r = closure(self);
        self.allow_new_symbol = false;
        r
    }

    fn translate(&mut self) -> TractResult<()> {
        let mut scenario_specs = vec![];
        'ext: for ext in &self.proto_model.doc.extension {
            match &*ext.0.0 {
                "tract_registry" => {
                    let registry = Identifier(ext.1.trim().to_owned());
                    if self.framework.registries.iter().any(|reg| reg.id == registry) {
                        self.registries.push(registry.clone())
                    } else if let Some(reg) =
                        self.framework.registries.iter().find(|reg| reg.aliases.contains(&registry))
                    {
                        self.registries.push(reg.id.clone())
                    } else {
                        bail!("Registry not found {:?}", registry)
                    }
                }
                "tract_symbol" => {
                    let symbol = self.model.symbols.new_with_prefix(ext.1.trim());
                    self.symbols.push(symbol);
                }
                "tract_assert" => {
                    if let Some(pair) = ext.1.split_once(':') {
                        scenario_specs.push(pair);
                    } else {
                        self.model.symbols.add_assertion(&ext.1)?;
                    }
                }
                "KHR_enable_fragment_definitions" | "KHR_enable_operator_expressions" => (),
                _ => {
                    for reg in &self.framework.registries {
                        for reg_ext in &reg.extensions {
                            match reg_ext(self, &ext.0, &ext.1)? {
                                ControlFlow::Continue(_) => (),
                                ControlFlow::Break(_) => continue 'ext,
                            }
                        }
                    }
                    warn!("Ignore unknown extension {:?}", ext.0);
                }
            };
        }
        for (scen, rule) in scenario_specs {
            self.model.symbols.add_scenario_assertion(scen, rule)?;
        }
        self.scopes.push(HashMap::new());
        self.wire_body(&self.proto_model.doc.graph_def.body).context("Wiring root graph body")?;
        let vars = self.scopes.pop().unwrap();

        let outputs = self
            .proto_model
            .doc
            .graph_def
            .results
            .iter()
            .map(|s| {
                vars.get(s)
                    .with_context(|| format!("Could not find variable for output named {s:?}"))
            })
            .collect::<TractResult<TVec<&Value>>>()?;

        let outputs = outputs
            .into_iter()
            .map(|s| s.to::<OutletId>(self))
            .collect::<TractResult<TVec<OutletId>>>()?;
        self.model.select_output_outlets(&outputs)?;

        self.parse_properties().context("Parsing properties")?;

        for (ix, name) in self.proto_model.doc.graph_def.results.iter().enumerate() {
            self.model.set_outlet_label(outputs[ix], name.0.to_string())?;
        }

        Ok(())
    }

    #[allow(clippy::result_large_err)]
    pub fn into_typed_model(mut self) -> Result<TypedModel, (TypedModel, TractError)> {
        match self.translate().context("In ModelBuilder::translate") {
            Ok(()) => Ok(self.model),
            Err(e) => Err((self.model, e)),
        }
    }

    fn parse_properties(&mut self) -> TractResult<()> {
        if let Some(properties) = self
            .proto_model
            .doc
            .fragments
            .iter()
            .find(|f| &f.decl.id.0 == "tract_core_properties")
            .and_then(|f| f.body.as_ref())
            .and_then(|body| body.first())
        {
            let properties: TVec<(String, Arc<Tensor>)> =
                properties.right.resolve(self, &[])?.to(self)?;
            self.model.properties = properties.into_iter().collect();
        }
        Ok(())
    }

    pub fn wire_body(&mut self, body: &[Assignment]) -> TractResult<()> {
        // todo: can i relax the outlet id constraint ?
        for assignment in body {
            let identifiers = assignment.left.to_identifiers()?;
            trace!("Wiring identifiers {identifiers:?}");
            let datum_types = identifiers
                .iter()
                .map(|s| {
                    self.proto_model
                        .quantization
                        .as_ref()
                        .and_then(|qm| qm.get(*s).map(|q| q.datum_type()))
                })
                .collect::<Vec<_>>();
            self.naming_scopes.push(identifiers[0].clone());
            let mut values = if identifiers.len() == 1 {
                let value: OutletId = assignment
                    .right
                    .resolve(self, &datum_types)
                    .and_then(|v| v.to(self))
                    .with_context(|| {
                        format!(
                            "Plugging in assignement for {:?}",
                            identifiers.iter().map(|i| &i.0).join(", ")
                        )
                    })?;
                tvec!(value)
            } else {
                let values: TVec<OutletId> = assignment
                    .right
                    .resolve(self, &datum_types)
                    .and_then(|v| v.to(self))
                    .with_context(|| {
                        format!(
                            "Plugging in assignement for {:?}",
                            identifiers.iter().map(|i| &i.0).join(", ")
                        )
                    })?;
                if values.len() != identifiers.len() {
                    bail!(
                        "Assignement for {} received {} value(s).",
                        identifiers.iter().map(|i| &i.0).join(","),
                        values.len()
                    )
                }
                values
            };
            for (qparam, value) in datum_types.into_iter().zip(values.iter_mut()) {
                if let Some(qparam) = qparam
                    && qparam != self.model.outlet_fact(*value)?.datum_type
                {
                    self.model.node_mut(value.node).name =
                        format!("{}_raw", self.naming_scopes.iter().map(|i| &i.0).join("_"));
                    if self.model.outlet_fact(*value)?.datum_type == TDim::datum_type() {
                        *value = self.model.wire_node(
                            format!(
                                "{}_cast_to_f32",
                                self.naming_scopes.iter().map(|i| &i.0).join("_")
                            ),
                            tract_core::ops::cast::cast(f32::datum_type()),
                            &[*value],
                        )?[0];
                    }
                    *value = self.model.wire_node(
                        format!("{}_cast_to_q", self.naming_scopes.iter().map(|i| &i.0).join("_")),
                        tract_core::ops::cast::cast(qparam),
                        &[*value],
                    )?[0];
                }
            }
            for (id, outlet) in identifiers.iter().zip(values.iter()) {
                self.scopes.last_mut().unwrap().insert((*id).clone(), Value::Wire(*outlet));
            }
            self.naming_scopes.pop();
            for (value, identifier) in values.iter().zip(identifiers) {
                if self.model.node_mut(value.node).name.is_empty() {
                    self.naming_scopes.push(identifier.clone());
                    self.model.node_mut(value.node).name = self.generate_node_name();
                    self.naming_scopes.pop();
                }
            }
        }
        Ok(())
    }

    pub fn wire_invocation(
        &mut self,
        invocation: &Invocation,
        dt: &[Option<DatumType>],
    ) -> TractResult<Value> {
        for frag in &self.proto_model.doc.fragments {
            if frag.decl.id == invocation.id && frag.body.is_some() {
                let resolved = ResolvedInvocation {
                    invocation,
                    dt_from_quant_file: dt,
                    default_params: &frag.decl.parameters,
                };
                return self.wire_fragment_invocation(
                    &resolved,
                    &frag.decl,
                    frag.body.as_deref().unwrap(),
                );
            }
        }

        // We start with the registry that has been added last
        for registry in self.framework.registries.iter().rev() {
            if self.registries.contains(&registry.id)
                && let Some(outputs) = registry
                    .deserialize(self, invocation, dt)
                    .with_context(|| format!("Interrogating registry {:?}", registry.id))?
            {
                return Ok(outputs);
            }
        }
        bail!("No definition for operator {:?}", invocation.id);
    }

    pub fn wire_fragment_invocation(
        &mut self,
        invocation: &ResolvedInvocation,
        decl: &FragmentDecl,
        body: &[Assignment],
    ) -> TractResult<Value> {
        let mut inner_scope = HashMap::new();
        for par in invocation.default_params.iter() {
            inner_scope.insert(par.id.clone(), invocation.named_arg_as::<Value>(self, &par.id.0)?);
        }
        self.scopes.push(inner_scope);
        self.with_extra_naming_scope(invocation.invocation.id.clone(), |b| b.wire_body(body))?;
        let inner_scope = self.scopes.pop().unwrap();
        Ok(Value::Tuple(
            decl.results.iter().map(|res| inner_scope.get(&res.id).unwrap()).cloned().collect(),
        ))
    }

    fn with_extra_naming_scope<F: FnOnce(&mut Self) -> R, R>(
        &mut self,
        name: Identifier,
        f: F,
    ) -> R {
        self.naming_scopes.push(name);
        let r = f(self);
        self.naming_scopes.pop();
        r
    }

    pub fn generate_node_name(&self) -> String {
        let name = self.naming_scopes.iter().map(|n| &n.0).join("_");
        if self.model.nodes().iter().any(|n| n.name == name) {
            for i in 0.. {
                let candidate = format!("{name}_{i}");
                if !self.model.nodes().iter().any(|n| n.name.starts_with(&candidate)) {
                    return candidate;
                }
            }
        }
        name
    }

    pub fn wire_as_outlets(
        &mut self,
        op: impl Into<Box<dyn TypedOp>>,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let op = op.into();
        let name = self.generate_node_name();
        self.model.wire_node(name, op, inputs).with_context(|| format!("inputs are {inputs:?}"))
    }

    pub fn add_const(&mut self, v: impl IntoArcTensor) -> TractResult<OutletId> {
        self.model.add_const(self.generate_node_name(), v)
    }

    pub fn wire(
        &mut self,
        op: impl Into<Box<dyn TypedOp>>,
        inputs: &[OutletId],
    ) -> TractResult<Value> {
        self.wire_as_outlets(op, inputs).map(Value::from)
    }
}

#[derive(Clone, Debug)]
pub struct ResolvedInvocation<'a> {
    pub invocation: &'a Invocation,
    pub dt_from_quant_file: &'a [Option<DatumType>],
    pub default_params: &'a [Parameter],
}

impl ResolvedInvocation<'_> {
    pub fn named_arg_as<T>(&self, builder: &mut ModelBuilder, name: &str) -> TractResult<T>
    where
        T: CoerceFrom<Value>,
    {
        let rv = self.named_arg(name)?;
        builder.with_extra_naming_scope(Identifier(name.into()), |builder| {
            let v = rv
                .resolve(builder, &[])
                .with_context(|| format!("Resolving argument `{name}' ({rv:?})"))?;
            v.to::<T>(builder).with_context(|| format!("Converting argument `{name}' from {v:?}"))
        })
    }

    pub fn optional_named_arg_as<T>(
        &self,
        builder: &mut ModelBuilder,
        name: &str,
    ) -> TractResult<Option<T>>
    where
        T: CoerceFrom<Value>,
    {
        let Some(rv) = self.get_named_arg(name) else { return Ok(None) };
        let v = rv
            .resolve(builder, &[])
            .with_context(|| format!("Resolving argument `{name}' ({rv:?})"))?;
        match v {
            Value::Bool(b) => {
                if !b {
                    Ok(None)
                } else {
                    bail!(
                        "Bool(true) not expected for optional values, you might want to access a boolean direclty."
                    )
                }
            }
            _ => v
                .to::<T>(builder)
                .map(Option::Some)
                .with_context(|| format!("Converting argument `{name}' from {v:?}")),
        }
    }

    pub fn named_arg(&self, name: &str) -> TractResult<Cow<'_, RValue>> {
        self.get_named_arg(name).ok_or_else(|| format_err!("expected argument {}", name))
    }

    pub fn get_named_arg(&self, name: &str) -> Option<Cow<'_, RValue>> {
        // first look explicit name in invocation arguments
        if let Some(arg) = self
            .invocation
            .arguments
            .iter()
            .find(|arg| arg.id.as_ref().map(|i| &*i.0) == Some(name))
        {
            return Some(Cow::Borrowed(&arg.rvalue));
        }
        // then use fragment prototype:
        if let Some((ix, param)) =
            self.default_params.iter().enumerate().find(|(_ix, param)| &*param.id.0 == name)
        {
            // check that all previous (and our) arguments are positional (todo:
            // valid args when building augmented_invocation)
            if self.invocation.arguments.len() > ix
                && self.invocation.arguments.iter().take(ix + 1).all(|arg| arg.id.is_none())
            {
                return Some(Cow::Borrowed(&self.invocation.arguments[ix].rvalue));
            }
            if let Some(rv) = &param.lit {
                return Some(Cow::Owned(RValue::Literal(rv.clone())));
            }
        }
        None
    }

    pub fn get_named_arg_as<T>(
        &self,
        builder: &mut ModelBuilder,
        name: &str,
    ) -> TractResult<Option<T>>
    where
        T: CoerceFrom<Value>,
    {
        let Some(rv) = self.get_named_arg(name) else { return Ok(None) };
        let v = rv
            .resolve(builder, &[])
            .with_context(|| format!("Resolving argument `{name}' ({rv:?})"))?;
        v.to::<T>(builder)
            .with_context(|| format!("Converting argument `{name}' from {v:?}"))
            .map(Some)
    }
}

impl ModelBuilder<'_> {}

impl LValue {
    fn to_identifier(&self) -> TractResult<&Identifier> {
        match self {
            LValue::Identifier(id) => Ok(id),
            _ => bail!("Expected an identifier, found a tuple: {:?}", self),
        }
    }

    #[allow(dead_code)]
    fn to_identifiers(&self) -> TractResult<TVec<&Identifier>> {
        match self {
            LValue::Identifier(_) => Ok(tvec!(self.to_identifier()?)),
            LValue::Tuple(ids) => ids.iter().map(|id| id.to_identifier()).collect(),
            LValue::Array(ids) => ids.iter().map(|id| id.to_identifier()).collect(),
        }
    }
}

impl Invocation {}

impl RValue {
    pub fn resolve(
        &self,
        builder: &mut ModelBuilder,
        dt: &[Option<DatumType>],
    ) -> TractResult<Value> {
        match self {
            RValue::Identifier(id) => {
                if let Some(mut outlet) = builder.scopes.last().unwrap().get(id).cloned() {
                    if let Value::Wire(outlet_id) = outlet {
                        let out_dt = builder.model.node(outlet_id.node).outputs[outlet_id.slot]
                            .fact
                            .datum_type;
                        if let Some(Some(dt)) = dt.first() {
                            if out_dt.unquantized() != dt.unquantized() {
                                return Err(format_err!(
                                    "Mismatched types expected {:?}, got {:?}",
                                    dt,
                                    out_dt
                                ));
                            }
                            if out_dt != *dt {
                                outlet =
                                    builder.wire(tract_core::ops::cast::cast(*dt), &[outlet_id])?;
                            }
                        }
                    }
                    Ok(outlet)
                } else if let Some(outlet) = {
                    if let Some(mut resolver) = builder.wire_resolver.take() {
                        let result = resolver(&id.0, &mut builder.model);
                        builder.wire_resolver = Some(resolver);
                        result?
                    } else {
                        None
                    }
                } {
                    let value = Value::Wire(outlet);
                    builder.scopes.last_mut().unwrap().insert(id.clone(), value.clone());
                    Ok(value)
                } else if let Some(sym) = builder.model.symbols.get(&id.0) {
                    Ok(Value::Dim(sym.into()))
                } else if builder.allow_new_symbol {
                    warn!(
                        "Introducing symbol {id:?} without forward declaration (\"extension tract_symbol ...\"). May be deprecated soon."
                    );
                    let sym = builder.model.symbols.sym(&id.0);
                    Ok(Value::Dim(sym.into()))
                } else {
                    bail!(
                        "Can not resolve {:?}. Not a known identifier, and symbol introduction is forbidden out of \"external\" shape field",
                        id
                    );
                }
            }
            RValue::Invocation(inv) => builder
                .wire_invocation(inv, dt)
                .with_context(|| format!("Resolving invocation {:?}", inv.id)),
            RValue::Binary(left, op, right) => {
                let op = match &**op {
                    "+" => "add",
                    "-" => "sub",
                    "*" => "mul",
                    "/" => "div",
                    "^" => "pow",
                    ">" => "gt",
                    "<" => "lt",
                    "==" => "eq",
                    "!=" => "ne",
                    ">=" => "ge",
                    "<=" => "le",
                    op => bail!("Unknown binary operator: {}", op),
                };
                let inv = Invocation {
                    id: op.into(),
                    generic_type_name: None,
                    arguments: vec![
                        Argument { id: None, rvalue: left.as_ref().clone() },
                        Argument { id: None, rvalue: right.as_ref().clone() },
                    ],
                };
                builder
                    .wire_invocation(&inv, dt)
                    .with_context(|| format!("Resolving invocation {:?}", &inv.id))
            }
            RValue::Array(array) => Ok(Value::Array(
                array
                    .iter()
                    .zip(std::iter::repeat(&dt.first().copied().flatten()))
                    .map(|(i, dt)| i.resolve(builder, &[*dt]))
                    .collect::<TractResult<_>>()?,
            )),
            RValue::Tuple(array) => {
                let dt_iter: Box<dyn Iterator<Item = &Option<DatumType>>> =
                    if dt.len() == 0 || dt.len() == 1 && dt[0].is_none() {
                        Box::new(std::iter::repeat(&None))
                    } else if dt.len() == array.len() {
                        Box::new(dt.iter())
                    } else {
                        bail!("Wrong number of types for a tuple, got {:?} for {:?}", dt, array)
                    };
                Ok(Value::Tuple(
                    array
                        .iter()
                        .zip(dt_iter)
                        .map(|(i, dt)| {
                            if dt.is_none() {
                                i.resolve(builder, &[])
                            } else {
                                i.resolve(builder, &[*dt])
                            }
                        })
                        .collect::<TractResult<_>>()?,
                ))
            }
            RValue::Literal(Literal::Numeric(f)) => {
                if f.contains('.') || f.contains('e') || f == "inf" || f == "-inf" {
                    f.parse::<f32>()
                        .map(Value::Scalar)
                        .with_context(|| format!("Can not parse {f} as f32"))
                } else if let Ok(i) = f.parse::<i64>() {
                    Ok(Value::Dim(i.into()))
                } else if let Some(s) = builder.model.symbols.get(f) {
                    Ok(Value::Dim(s.into()))
                } else {
                    bail!("Can not parse {}", f)
                }
            }
            RValue::Literal(Literal::String(s)) => Ok(Value::String(s.clone())),
            RValue::Literal(Literal::Logical(s)) => Ok(Value::Bool(*s)),
            RValue::Literal(Literal::Array(array)) => Ok(Value::Array(
                array
                    .iter()
                    .zip(std::iter::repeat(&dt.first().copied().flatten()))
                    .map(|(i, dt)| RValue::Literal(i.clone()).resolve(builder, &[*dt]))
                    .collect::<TractResult<_>>()?,
            )),
            RValue::Subscript(target, subscript) => {
                let target_value = target.resolve(builder, dt)?;
                match subscript.as_ref() {
                    Subscript::Single(idx_rvalue) => {
                        let idx: usize = idx_rvalue.resolve(builder, &[])?.to(builder)?;
                        match target_value {
                            Value::Array(vec) | Value::Tuple(vec) => vec
                                .into_iter()
                                .nth(idx)
                                .with_context(|| format!("Index {idx} out of bounds")),
                            Value::Wire(outlet) => {
                                let fact = builder.model.outlet_fact(outlet)?;
                                if let Some(konst) = &fact.konst {
                                    let plain = konst.try_as_plain()?;
                                    if konst.datum_type() == TDim::datum_type() {
                                        let dim = plain.as_slice::<TDim>()?[idx].clone();
                                        Ok(Value::Dim(dim))
                                    } else {
                                        let element = konst
                                            .slice(0, idx, idx + 1)?
                                            .into_shape(&[])?
                                            .into_arc_tensor();
                                        let wire = builder
                                            .model
                                            .add_const(builder.generate_node_name(), element)?;
                                        Ok(Value::Wire(wire))
                                    }
                                } else {
                                    bail!("Subscript on non-const wire not yet supported")
                                }
                            }
                            _ => bail!("Subscript not supported on {target_value:?}"),
                        }
                    }
                    Subscript::Range(..) => bail!("Subscript ranges not yet supported"),
                }
            }
            _ => bail!("Unsupported RValue variant: {self:?}"),
        }
    }
}

#[derive(Clone, Debug, PartialEq)]
pub enum Value {
    Tensor(Arc<Tensor>),
    Wire(OutletId),
    Array(Vec<Value>),
    Tuple(Vec<Value>),
    String(String),
    Bool(bool),
    Scalar(f32),
    Dim(TDim),
}

impl Value {
    pub fn to<T>(&self, builder: &mut ModelBuilder) -> TractResult<T>
    where
        T: CoerceFrom<Value>,
    {
        T::coerce(builder, self)
    }
}

impl From<TVec<OutletId>> for Value {
    fn from(outled_ids: TVec<OutletId>) -> Self {
        Self::Tuple(outled_ids.into_iter().map(Self::Wire).collect())
    }
}

pub trait CoerceFrom<F> {
    fn coerce(builder: &mut ModelBuilder, from: &F) -> TractResult<Self>
    where
        Self: Sized;
}

impl CoerceFrom<Value> for Value {
    fn coerce(_builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        Ok(from.clone())
    }
}

impl CoerceFrom<Value> for Arc<Tensor> {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Dim(t) => Ok(rctensor0(t.to_i32()?)),
            Value::Tensor(t) => Ok(t.clone()),
            Value::Tuple(t) if t.len() == 1 => t[0].to(builder),
            Value::Scalar(f) => Ok(rctensor0(*f)),
            Value::String(f) => Ok(rctensor0(f.clone())),
            Value::Bool(b) => Ok(rctensor0(*b)),
            Value::Wire(o) => builder
                .model
                .outlet_fact(*o)?
                .konst
                .clone()
                .ok_or_else(|| format_err!("Not a const")),
            Value::Array(items) => {
                let mut tensors = vec![];
                for item in items {
                    let tensor = Arc::<Tensor>::coerce(builder, item)?;
                    let mut tensor = tensor.into_tensor();
                    tensor.insert_axis(0)?;
                    tensors.push(tensor);
                }
                let tensor = Tensor::stack_tensors(0, &tensors)?;
                Ok(tensor.into_arc_tensor())
            }
            _ => bail!("Can not build a tensor from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for (Arc<Tensor>, DatumType) {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Tensor(t) => Ok((t.clone(), t.datum_type())),
            Value::Scalar(f) => Ok((rctensor0(*f), DatumType::F32)),
            Value::String(f) => Ok((rctensor0(f.clone()), DatumType::String)),
            Value::Bool(b) => Ok((rctensor0(*b), DatumType::Bool)),
            Value::Wire(o) => {
                let outlet_fact = builder.model.outlet_fact(*o)?;
                Ok((
                    outlet_fact.konst.clone().ok_or_else(|| format_err!("Not a const"))?,
                    outlet_fact.datum_type,
                ))
            }
            _ => bail!("Can not build a tensor from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for OutletId {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Tensor(t) => builder.add_const(t.clone()),
            Value::Scalar(f) => builder.add_const(rctensor0(*f)),
            Value::Dim(i) => builder.add_const(rctensor0(i.clone())),
            Value::Wire(outlet) => Ok(*outlet),
            Value::Tuple(tuple) if tuple.len() == 1 => OutletId::coerce(builder, &tuple[0]),
            Value::Array(inputs) => {
                if let Ok(c) = from.to::<Arc<Tensor>>(builder) {
                    return builder.add_const(c);
                }
                let mut outlets = tvec!();
                for i in inputs {
                    let outlet = OutletId::coerce(builder, i)?;
                    outlets.push(builder.wire_as_outlets(AxisOp::Add(0), &[outlet])?[0]);
                }
                if inputs.len() > 0 {
                    builder
                        .wire_as_outlets(tract_core::ops::array::TypedConcat::new(0), &outlets)
                        .map(|o| o[0])
                } else {
                    builder.add_const(tensor1::<f32>(&[]))
                }
            }
            Value::String(s) => builder.add_const(rctensor0(s.clone())),
            Value::Bool(b) => builder.add_const(rctensor0(*b)),
            _ => bail!("Can not build an outletid from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for u64 {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Dim(d) => Ok(d.to_i64()? as u64),
            Value::Tensor(t) => Ok(t.cast_to_scalar::<u64>()?),
            Value::Wire(_) => Ok(from.to::<Arc<Tensor>>(builder)?.cast_to_scalar::<u64>()?),
            _ => bail!("Can not build a u64 from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for i64 {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Dim(d) => d.to_i64(),
            Value::Tensor(t) => Ok(*t.try_as_plain()?.to_scalar::<i64>()?),
            Value::Wire(_) => Ok(from.to::<Arc<Tensor>>(builder)?.cast_to_scalar::<i64>()?),
            _ => bail!("Can not build a i64 from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for TDim {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Dim(d) => Ok(d.clone()),
            Value::Tensor(t) => Ok(t.try_as_plain()?.to_scalar::<TDim>()?.clone()),
            Value::Wire(_) => Ok(from
                .to::<Arc<Tensor>>(builder)?
                .cast_to::<TDim>()?
                .try_as_plain()?
                .to_scalar::<TDim>()?
                .clone()),
            _ => bail!("Can not build a TDim from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for String {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::String(s) => Ok(s.to_string()),
            Value::Tensor(t) => Ok(t.try_as_plain()?.to_scalar::<String>()?.clone()),
            Value::Wire(_) => Ok(from
                .to::<Arc<Tensor>>(builder)?
                .cast_to::<String>()?
                .try_as_plain()?
                .to_scalar::<String>()?
                .clone()),
            _ => bail!("Can not build a String from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for bool {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Bool(b) => Ok(*b),
            Value::Tensor(t) => Ok(*t.try_as_plain()?.to_scalar::<bool>()?),
            Value::Wire(_) => Ok(*from
                .to::<Arc<Tensor>>(builder)?
                .cast_to::<bool>()?
                .try_as_plain()?
                .to_scalar::<bool>()?),
            Value::Dim(n) => Ok(!n.is_zero()),
            _ => bail!("Can not build a boolean from {:?}", from),
        }
    }
}

impl CoerceFrom<Value> for usize {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        Ok(i64::coerce(builder, from)? as usize)
    }
}

impl CoerceFrom<Value> for isize {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        Ok(i64::coerce(builder, from)? as isize)
    }
}

impl CoerceFrom<Value> for f32 {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Scalar(f) => Ok(*f),
            Value::Dim(d) => Ok(d.to_i64()? as f32),
            Value::Tensor(t) => Ok(*t.try_as_plain()?.to_scalar::<f32>()?),
            Value::Wire(_) => Ok(*from
                .to::<Arc<Tensor>>(builder)?
                .cast_to::<f32>()?
                .try_as_plain()?
                .to_scalar::<f32>()?),
            _ => bail!("Can not build a f32 from {:?}", from),
        }
    }
}

impl<D: CoerceFrom<Value>> CoerceFrom<Value> for TVec<D> {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Array(vec) => vec.iter().map(|item| D::coerce(builder, item)).collect(),
            Value::Tuple(vec) => vec.iter().map(|item| D::coerce(builder, item)).collect(),
            any => Ok(tvec!(D::coerce(builder, any)?)),
        }
    }
}

impl CoerceFrom<Value> for ShapeFact {
    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
        match from {
            Value::Array(vec) => vec.iter().map(|item| TDim::coerce(builder, item)).collect(),
            Value::Tuple(vec) => vec.iter().map(|item| TDim::coerce(builder, item)).collect(),
            _ => {
                let t = from.to::<Arc<Tensor>>(builder)?;
                Ok(t.cast_to::<TDim>()?.try_as_plain()?.as_slice::<TDim>()?.into())
            }
        }
    }
}

macro_rules! tuple {
    ($($d: ident),*) => {
        impl<$($d),*> CoerceFrom<Value> for ($($d),*)
            where
                $($d: CoerceFrom<Value>),*
                {
                    fn coerce(builder: &mut ModelBuilder, from: &Value) -> TractResult<Self> {
                        match from {
                            Value::Tuple(vec) => {
                                let mut vec = vec.iter();
                                Ok((
                                        $($d::coerce(builder, vec.next().context("Too small a tuple")?)?),*
                                   ))
                            }
                            _ => bail!("Can not build a tuple from {:?}", from),
                        }
                    }
                }
    }
}

tuple!(D1, D2);
tuple!(D1, D2, D3);
tuple!(D1, D2, D3, D4);


================================================
FILE: nnef/src/framework.rs
================================================
use tar::Builder;
use tract_core::tract_data::itertools::Itertools;

use crate::ast::quant::write_quant_format;
use crate::ast::{Identifier, ProtoModel, QuantFormat};
use crate::resource::{GraphNnef, SafeTensorsLoader};
use crate::{internal::*, nnef};
use std::io::Read;
#[cfg(target_family = "unix")]
use std::os::unix::prelude::OsStrExt;
use std::path::Path;
use std::str::FromStr;

pub fn stdlib() -> Vec<FragmentDef> {
    crate::ast::parse::parse_fragments(include_str!("../stdlib.nnef")).unwrap()
}

pub struct Nnef {
    pub stdlib: Vec<FragmentDef>,
    pub registries: Vec<Registry>,
    pub resource_loaders: Vec<Box<dyn ResourceLoader + 'static>>,
    pub allow_extended_identifier_syntax: bool,
    pub extern_all_constants: bool,
}

impl Default for Nnef {
    fn default() -> Nnef {
        Nnef {
            stdlib: stdlib(),
            registries: vec![crate::ops::tract_nnef()],
            resource_loaders: vec![
                GraphNnefLoader.into_boxed(),
                DatLoader.into_boxed(),
                GraphQuantLoader.into_boxed(),
                TypedModelLoader::new(false).into_boxed(),
                SafeTensorsLoader.into_boxed(),
            ],
            allow_extended_identifier_syntax: false,
            extern_all_constants: false,
        }
    }
}

impl Nnef {
    pub fn with_registry(mut self, registry: Registry) -> Nnef {
        self.registries.push(registry);
        self
    }

    pub fn with_resource_loader(mut self, loader: impl ResourceLoader + 'static) -> Nnef {
        self.resource_loaders.push(Box::new(loader));
        self
    }

    pub fn enable_tract_core(&mut self) {
        self.registries.push(crate::ops::tract_core());
    }

    pub fn with_tract_core(mut self) -> Self {
        self.registries.push(crate::ops::tract_core());
        self
    }

    pub fn enable_tract_resource(&mut self) {
        self.registries.push(crate::ops::tract_resource());
    }

    pub fn with_tract_resource(mut self) -> Self {
        self.registries.push(crate::ops::tract_resource());
        self
    }

    pub fn allow_extended_identifier_syntax(&mut self, allow_extended_identifier_syntax: bool) {
        self.allow_extended_identifier_syntax = allow_extended_identifier_syntax;
    }

    pub fn extern_all_constants(&mut self, value: bool) {
        self.extern_all_constants = value;
    }

    #[allow(clippy::result_large_err)]
    pub fn translate(
        &self,
        proto_model: &ProtoModel,
        template: TypedModel,
    ) -> Result<TypedModel, (TypedModel, TractError)> {
        debug!("Build TypedModel from NNEF proto model (translate)");
        ModelBuilder::new(self, proto_model, template).into_typed_model()
    }

    pub fn write(&self, model: &TypedModel, w: impl std::io::Write) -> TractResult<()> {
        self.write_to_tar(model, w)?;
        Ok(())
    }

    pub fn write_to_tar<W: std::io::Write>(&self, model: &TypedModel, w: W) -> TractResult<W> {
        let mut ar = tar::Builder::new(w);
        let timestamp =
            std::time::SystemTime::now().duration_since(std::time::SystemTime::UNIX_EPOCH).unwrap();
        self._write_to_tar(model, &mut ar, false, timestamp)?;
        ar.into_inner().context("Finalizing tar")
    }

    pub fn write_to_tar_with_config<W: std::io::Write>(
        &self,
        model: &TypedModel,
        w: W,
        compress_nested_models: bool,
        deterministic: bool,
    ) -> TractResult<W> {
        let mut ar = tar::Builder::new(w);
        let timestamp = if deterministic {
            // 1 Jan 1980, MS-DOS epoch. Some tools have issues with 0 timestamps.
            std::time::Duration::from_secs(315532800)
        } else {
            std::time::SystemTime::now().duration_since(std::time::SystemTime::UNIX_EPOCH).unwrap()
        };

        self._write_to_tar(model, &mut ar, compress_nested_models, timestamp)?;
        ar.into_inner().context("Finalizing tar")
    }

    fn _write_to_tar<W: std::io::Write>(
        &self,
        model: &TypedModel,
        ar: &mut Builder<W>,
        compress_nested_models: bool,
        timestamp: std::time::Duration,
    ) -> TractResult<()> {
        let proto_model =
            crate::ser::to_proto_model(self, model).context("Translating model to proto_model")?;

        let mut graph_data = vec![];
        crate::ast::dump::Dumper::new(self, &mut graph_data)
            .document(&proto_model.doc)
            .context("Serializing graph.nnef")?;

        let mut header = tar::Header::new_gnu();
        header.set_path("graph.nnef").context("Setting graph.nnef path")?;
        header.set_size(graph_data.len() as u64);
        header.set_mode(0o644);
        header.set_mtime(timestamp.as_secs());
        header.set_cksum();
        ar.append(&header, &mut &*graph_data).context("Appending graph.nnef")?;

        if let Some(mut quantization) = proto_model.quantization {
            let mut quant_data = vec![];

            let mut keys = quantization.keys().cloned().collect::<Vec<_>>();
            keys.sort();
            for name in keys {
                let format = quantization.remove(&name).unwrap();
                write_quant_format(
                    &mut quant_data,
                    &name,
                    format,
                    self.allow_extended_identifier_syntax,
                )
                .context("Serializing graph.quant")?;
            }

            header.set_path("graph.quant").context("Setting graph.quant path")?;
            header.set_size(quant_data.len() as u64);
            header.set_mode(0o644);
            header.set_mtime(timestamp.as_secs());
            header.set_cksum();
            ar.append(&header, &mut &*quant_data).context("Appending graph.quant")?;
        }

        let mut labels = proto_model.tensors.keys().collect::<Vec<_>>();
        labels.sort();
        for label in labels {
            let t = proto_model.tensors.get(label).unwrap();
            let mut label = label.0.to_string() + ".dat";
            if label.starts_with('/') {
                label.insert(0, '.');
            }
            let filename = std::path::Path::new(&label);
            let mut data = vec![];
            crate::tensors::write_tensor(&mut data, t)
                .with_context(|| format!("Serializing tensor {filename:?}: {t:?}"))?;

            let mut header = tar::Header::new_gnu();
            header.set_size(data.len() as u64);
            header.set_mode(0o644);
            header.set_mtime(timestamp.as_secs());
            header.set_cksum();

            ar.append_data(&mut header, filename, &mut &*data)
                .with_context(|| format!("Appending tensor {filename:?}"))?;
        }

        let mut labels = proto_model.resources.keys().collect::<Vec<_>>();
        labels.sort();
        for label in labels {
            let resource = proto_model.resources.get(label).unwrap();
            if let Some(typed_model_resource) = resource.downcast_ref::<TypedModelResource>() {
                let mut submodel_data = vec![];
                let mut filename = std::path::PathBuf::from_str(label)?;
                let typed_model = &typed_model_resource.0;

                if compress_nested_models {
                    filename.set_extension("nnef.tgz");
                    let encoder = flate2::write::GzEncoder::new(
                        &mut submodel_data,
                        flate2::Compression::default(),
                    );
                    self.write(typed_model, encoder)?;
                } else {
                    filename.set_extension("nnef.tar");
                    self.write(typed_model, &mut submodel_data)?;
                }

                let mut header = tar::Header::new_gnu();
                header.set_size(submodel_data.len() as u64);
                header.set_mode(0o644);
                header.set_mtime(timestamp.as_secs());
                header.set_cksum();

                ar.append_data(&mut header, filename, &mut &*submodel_data)
                    .with_context(|| format!("Appending submodel {label:?}"))?;
            }
        }
        Ok(())
    }

    pub fn write_to_dir(
        &self,
        model: &TypedModel,
        path: impl AsRef<std::path::Path>,
    ) -> TractResult<()> {
        let path = path.as_ref();
        if path.exists() {
            bail!("{:?} already exists. Won't overwrite.", path);
        }
        let proto_model = crate::ser::to_proto_model(self, model)?;
        std::fs::create_dir_all(path)?;
        let mut graph_nnef = std::fs::File::create(path.join("graph.nnef"))?;
        crate::ast::dump::Dumper::new(self, &mut graph_nnef).document(&proto_model.doc)?;

        if let Some(quantization) = proto_model.quantization {
            let mut graph_quant = std::fs::File::create(path.join("graph.quant"))?;
            for (name, format) in quantization.into_iter().sorted_by_key(|(x, _)| x.clone()) {
                write_quant_format(
                    &mut graph_quant,
                    &name,
                    format,
                    self.allow_extended_identifier_syntax,
                )?;
            }
        }

        for (label, t) in &proto_model.tensors {
            let label = label.0.to_string() + ".dat";
            let label = label.trim_start_matches('/');
            let parent = path.join(label).parent().unwrap().to_owned();
            std::fs::create_dir_all(&parent).with_context(|| format!("Creating dir {parent:?}"))?;
            let filename = path.join(label).to_owned();
            let mut file = std::fs::File::create(&filename)
                .with_context(|| format!("Creating file {filename:?}"))?;

            crate::tensors::write_tensor(&mut file, t)?;
        }
        Ok(())
    }
}

impl tract_core::prelude::Framework<ProtoModel, TypedModel> for Nnef {
    fn model_for_path(&self, p: impl AsRef<Path>) -> TractResult<TypedModel> {
        let proto = self.proto_model_for_path(p)?;
        self.model_for_proto_model(&proto)
    }

    fn proto_model_for_path(&self, path: impl AsRef<Path>) -> TractResult<ProtoModel> {
        let path = path.as_ref();
        if path.is_file() {
            let mut f = std::fs::File::open(path)?;
            return self.proto_model_for_read(&mut f);
        }

        let mut resources: HashMap<String, Arc<dyn Resource>> = Default::default();

        // `walkdir::new` will first yield the given path at depth 0, but we don't want to load this
        // entry here: only its descendants at depth >= 1.
        for entry in walkdir::WalkDir::new(path).min_depth(1) {
            let entry =
                entry.map_err(|e| format_err!("Can not walk directory {:?}: {:?}", path, e))?;
            // We don't want to load sub-directories themselves either.
            if entry.path().is_dir() {
                continue;
            }
            let subpath = entry
                .path()
                .components()
                .skip(path.components().count())
                .collect::<std::path::PathBuf>();
            let mut stream = std::fs::File::open(entry.path())?;
            read_stream(&subpath, &mut stream, &mut resources, self)?;
        }
        proto_model_from_resources(resources)
    }

    fn proto_model_for_read(&self, reader: &mut dyn std::io::Read) -> TractResult<ProtoModel> {
        let mut resources: HashMap<String, Arc<dyn Resource>> = Default::default();

        let mut buffer = vec![0u8; 2];
        reader.read_exact(&mut buffer)?;
        let header = std::io::Cursor::new(buffer.clone());
        let stream = header.chain(reader);
        let mut tar = if buffer == [0x1f, 0x8b] {
            #[cfg(feature = "flate2")]
            {
                let f = flate2::read::GzDecoder::new(stream);
                tar::Archive::new(Box::new(f) as Box<dyn Read>)
            }
            #[cfg(not(feature = "flate2"))]
            bail!("Cannot read gzip file without flate2 enabled.");
        } else {
            tar::Archive::new(Box::new(stream) as Box<dyn Read>)
        };
        for entry in tar.entries()? {
            let mut entry = entry?;
            let mut path = entry.path()?.to_path_buf();
            if path.starts_with("./") {
                path = path.strip_prefix("./")?.to_path_buf();
            }
            read_stream(&path, &mut entry, &mut resources, self)?;
        }
        proto_model_from_resources(resources)
    }

    fn model_for_proto_model_with_model_template(
        &self,
        proto: &ProtoModel,
        template: TypedModel,
    ) -> TractResult<TypedModel> {
        self.translate(proto, template).map_err(|e| e.1)
    }
}

fn proto_model_from_resources(
    resources: HashMap<String, Arc<dyn Resource>>,
) -> TractResult<ProtoModel> {
    // Iter resources IDs to detect submodels. Submodels are IDs with
    // - two path compoents (ex: XXX/file)
    // - a graph.nnef file as filename
    let sub_models = resources
        .keys()
        .clone()
        .filter_map(|id| {
            let id_components = id.split('/').collect::<Vec<_>>();
            if (id_components.last() == Some(&crate::resource::GRAPH_NNEF_FILENAME))
                & (id_components.len() == 2)
            {
                id_components.first().map(|it| it.to_string())
            } else {
                None
            }
        })
        .collect::<Vec<_>>();

    // If there are submodels, we use the associated resources to create a TypedModel resource and add
    // it as a new resource.
    let mut new_resources = if sub_models.len() > 0 {
        sub_models.into_iter().try_fold(resources, |r, it| -> TractResult<HashMap<_, _>> {
            let (submodel_resources, mut resources): (HashMap<String, Arc<dyn Resource>>, _) =
                r.into_iter().partition(|(k, _v)| k.starts_with(&it));
            let submodel_resources = submodel_resources
                .into_iter()
                .map(|(k, v)| (k.split('/').next_back().unwrap().to_string(), v))
                .collect::<HashMap<String, Arc<dyn Resource>>>();
            let typed_model = nnef()
                .model_for_proto_model(&proto_model_from_resources(submodel_resources).unwrap())?;
            resources.insert(it, Arc::new(TypedModelResource(typed_model)));
            Ok(resources)
        })?
    } else {
        resources
    };

    // NNEF document extraction
    let doc = new_resources
        .remove(crate::resource::GRAPH_NNEF_FILENAME)
        .with_context(|| {
            anyhow!("Resource {} was not found in the model", crate::resource::GRAPH_NNEF_FILENAME)
        })?
        .downcast_arc::<GraphNnef>()
        .map_err(|_| anyhow!("Error while downcasting NNEF document resource"))?;

    let doc = Arc::try_unwrap(doc)
            .map_err(|_| anyhow!("Error while extracting NNEF Document from shared reference. Only one reference to the document is expected"))?;

    // Collect all resources that can be downcastable to Arc<Tensor>.
    let mut tensors: HashMap<_, _> = new_resources
        .iter()
        .filter_map(|(key, resource)| {
            Arc::clone(resource)
                .downcast_arc::<Tensor>()
                .ok()
                .map(|r| (Identifier::from(&**key), r))
        })
        .collect();

    for r in new_resources.values() {
        if let Some(safe_tensors) = r.downcast_ref::<Vec<(String, Arc<Tensor>)>>() {
            for (name, t) in safe_tensors.iter() {
                tensors.insert(Identifier::from(&**name), Arc::clone(t));
            }
        }
    }
    new_resources.retain(|_, r| !r.is::<Tensor>() && !r.is::<Vec<(String, Arc<Tensor>)>>());

    // Quantization format resources extraction if present.
    let quantization = if let Some(q_r) =
        new_resources.remove(crate::resource::GRAPH_QUANT_FILENAME)
    {
        let Ok(q_r) = q_r.downcast_arc::<HashMap<String, QuantFormat>>() else {
            bail!("Error while downcasting quantization format resource")
        };
        let Ok(q_r) = Arc::try_unwrap(q_r) else {
            bail!(
                "Error while extracting quantization format resource from shared reference. Only one reference to it is expected"
            )
        };
        Some(q_r.into_iter().map(|(k, v)| (Identifier(k), v)).collect())
    } else {
        None
    };

    let doc = crate::liquid::process_file(&doc.0, &new_resources)?;
    let doc = crate::ast::parse::parse_document(&doc)?;

    let proto = ProtoModel { doc, tensors, quantization, resources: new_resources };
    proto.validate()?;
    Ok(proto)
}

fn read_stream<R: std::io::Read>(
    path: &Path,
    reader: &mut R,
    resources: &mut HashMap<String, Arc<dyn Resource>>,
    framework: &Nnef,
) -> TractResult<()> {
    // ignore path with any component starting with "." (because OSX's tar is weird)
    #[cfg(target_family = "unix")]
    if path.components().any(|name| name.as_os_str().as_bytes().first() == Some(&b'.')) {
        return Ok(());
    }
    let mut last_loader_name;
    for loader in framework.resource_loaders.iter() {
        last_loader_name = Some(loader.name());
        let loaded = loader.try_load(path, reader, framework).with_context(|| {
            anyhow!("Error while loading resource by {:?} at path {:?}", loader.name(), path)
        })?;
        if let Some((id, resource)) = loaded {
            ensure!(
                !resources.contains_key(&id),
                "Loader {:?} succeeded to load {:?} which has been already loaded by {:?}",
                loader.name(),
                id,
                last_loader_name
            );
            resources.insert(id, resource);
            break;
        }
    }
    Ok(())
}


================================================
FILE: nnef/src/lib.rs
================================================
#![allow(clippy::len_zero)]
#[macro_use]
extern crate log;

pub mod ast;
pub mod deser;
pub mod framework;
mod liquid;
pub mod ops;
pub mod registry;
pub mod resource;
pub mod ser;
pub mod tensors;
mod transform;

pub use ast::ProtoModel;

pub use tract_core;
pub use tract_core::prelude::tract_ndarray;
pub use tract_core::prelude::tract_num_traits;

pub mod prelude {
    pub use tract_core;
    pub use tract_core::prelude::*;
}

pub mod internal {
    pub use crate::ast::dump_doc::DocDumper;
    pub use crate::ast::parse::{parse_assignments, parse_parameters};
    pub use crate::ast::{
        Assignment, FragmentDecl, FragmentDef, Identifier, Parameter, RValue, TypeName, param,
    };
    pub use crate::deser::{ModelBuilder, ResolvedInvocation, Value};
    pub use crate::framework::Nnef;
    pub use crate::prelude::*;
    pub use crate::registry::*;
    pub use crate::resource::{
        DatLoader, GraphNnefLoader, GraphQuantLoader, Resource, ResourceLoader, TypedModelLoader,
        TypedModelResource,
    };
    pub use crate::ser::{IntoAst, invocation, logical, numeric, string};
    pub use std::any::TypeId;
    pub use tract_core::internal::*;
}

pub fn nnef() -> framework::Nnef {
    framework::Nnef::default()
}


================================================
FILE: nnef/src/liquid.rs
================================================
use crate::internal::*;

pub(crate) fn process_file(
    input: &str,
    resources: &HashMap<String, Arc<dyn Resource>>,
) -> TractResult<String> {
    let parser = liquid::ParserBuilder::with_stdlib().build()?;
    let tmpl = parser.parse(input)?;
    let mut globals = liquid::object!({});
    for (k, v) in resources {
        if let Some(value) = v.to_liquid_value() {
            globals.insert(k.into(), value);
        }
    }
    Ok(tmpl.render(&globals)?)
}


================================================
FILE: nnef/src/ops/core/broadcast.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_broadcast);
    registry.register_primitive(
        "tract_core_broadcast",
        &[TypeName::Scalar.tensor().named("input"), TypeName::Integer.array().named("shape")],
        &[("output", TypeName::Scalar.tensor())],
        de_broadcast,
    );
}

fn de_broadcast(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let shape: TVec<TDim> =
        builder.allowing_new_symbols(|builder| invocation.named_arg_as(builder, "shape"))?;
    builder.wire(ops::array::MultiBroadcastTo { shape: shape.into() }, &[wire])
}

fn ser_broadcast(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::array::MultiBroadcastTo,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation("tract_core_broadcast", &[wire], &[("shape", tdims(&op.shape))])))
}


================================================
FILE: nnef/src/ops/core/cast.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::cast::Cast;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(cast_dump);
    registry.register_primitive(
        "tract_core_cast",
        &cast_parameters(),
        &[("output", TypeName::Scalar.tensor())],
        cast_load,
    );
}

fn cast_parameters() -> Vec<Parameter> {
    vec![TypeName::Scalar.tensor().named("input"), TypeName::String.named("to")]
}

fn cast_dump(ast: &mut IntoAst, node: &TypedNode, op: &Cast) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation("tract_core_cast", &[input], &[("to", datum_type(op.to))])))
}

fn cast_load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let invocation_dt = invocation.dt_from_quant_file.first().copied().flatten();
    let to = if let Ok(s) = invocation.named_arg_as::<String>(builder, "to") {
        let dt: DatumType = s.parse()?;
        if let Some(invocation_dt) = invocation_dt {
            if invocation_dt.unquantized() != dt.unquantized() {
                bail!("Mismatched cast: graph.quant {:?}, got graph.nnef {:?}", invocation_dt, dt)
            } else {
                invocation_dt
            }
        } else {
            dt
        }
    } else {
        invocation_dt.context("No datum type for cast")?
    };
    builder.wire(Cast { to }, &[input])
}


================================================
FILE: nnef/src/ops/core/complex.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::math::{ComplexToInnerDim, InnerDimToComplex};

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_ctid);
    registry.register_dumper(ser_idtc);
    registry.register_primitive(
        "tract_core_complex_to_inner_dim",
        &[TypeName::Complex.tensor().named("input")],
        &[("output", TypeName::Scalar.tensor())],
        de_ctid,
    );
    registry.register_primitive(
        "tract_core_inner_dim_to_complex",
        &[TypeName::Scalar.tensor().named("input")],
        &[("output", TypeName::Complex.tensor())],
        de_idtc,
    );
}

fn ser_ctid(
    ast: &mut IntoAst,
    node: &TypedNode,
    _: &ComplexToInnerDim,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation("tract_core_complex_to_inner_dim", &[wire], &[])))
}

fn de_ctid(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    builder.wire(ComplexToInnerDim, &[wire])
}

fn ser_idtc(
    ast: &mut IntoAst,
    node: &TypedNode,
    _: &InnerDimToComplex,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation("tract_core_inner_dim_to_complex", &[wire], &[])))
}

fn de_idtc(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    builder.wire(InnerDimToComplex, &[wire])
}


================================================
FILE: nnef/src/ops/core/downsample.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::Downsample;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_downsample);
    registry.register_primitive(
        "tract_core_downsample",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Integer.named("stride"),
            TypeName::Integer.named("modulo").default(0),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_downsample,
    );
}

fn ser_downsample(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &Downsample,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_core_downsample",
        &[wire],
        &[
            ("axis", numeric(op.axis)),
            ("stride", numeric(op.stride)),
            ("modulo", numeric(op.modulo)),
        ],
    )))
}

fn de_downsample(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let stride = invocation.named_arg_as::<i64>(builder, "stride")? as isize;
    let modulo = invocation.named_arg_as(builder, "modulo")?;
    builder.wire(Downsample { axis, stride, modulo }, &[wire])
}


================================================
FILE: nnef/src/ops/core/dyn_slice.rs
================================================
use tract_core::ops::array::DynSlice;

use crate::internal::*;
use crate::ser::tdim;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser);
    registry.register_primitive(
        "tract_core_dyn_slice",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("start"),
            TypeName::Integer.named("end"), //.default(0),
            TypeName::Integer.named("len"),
            TypeName::Integer.named("axis"),
            //            TypeName::Integer.named("to_the_end").default(0),
        ],
        &[("output", TypeName::Scalar.tensor())],
        deser,
    );
}

pub fn ser(ast: &mut IntoAst, node: &TypedNode, op: &DynSlice) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let start = ast.mapping[&node.inputs[1]].clone();
    let end = ast.mapping[&node.inputs[2]].clone();
    Ok(Some(invocation(
        "tract_core_dyn_slice",
        &[input, start, end],
        &[("axis", numeric(op.axis)), ("len", tdim(&op.len))],
    )))
}

pub fn deser(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let start = invocation.named_arg_as(builder, "start")?;
    let end = invocation.named_arg_as(builder, "end")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let len = invocation.named_arg_as(builder, "len")?;
    builder.wire(DynSlice { axis, len }, &[wire, start, end])
}


================================================
FILE: nnef/src/ops/core/einsum.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::einsum::EinSum;
use tract_core::tract_data::itertools::Itertools;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser);
    registry.register_primitive(
        "tract_core_einsum",
        &parameters(),
        &[("output", TypeName::Scalar.tensor())],
        de_einsum,
    );
    registry.register_primitive(
        "tract_core_einsum_q",
        &parameters_q(),
        &[("output", TypeName::Scalar.tensor())],
        de_einsum_q,
    );
}

pub fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().array().named("inputs"),
        TypeName::String.named("expr"),
        TypeName::String.named("acc"),
        TypeName::String.named("output").default(""),
    ]
}

pub fn parameters_q() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().array().named("inputs"),
        TypeName::String.named("expr"),
        TypeName::String.named("acc"),
        TypeName::String.named("output").default(""),
        TypeName::Scalar.tensor().named("bias").default(0),
        TypeName::Integer.tensor().named("a0"),
        TypeName::Scalar.tensor().named("a_scale"),
        TypeName::Integer.tensor().named("b0"),
        TypeName::Scalar.tensor().named("b_scale"),
        TypeName::Integer.tensor().named("c0"),
        TypeName::Scalar.tensor().named("c_scale"),
    ]
}

pub fn ser(ast: &mut IntoAst, node: &TypedNode, op: &EinSum) -> TractResult<Option<Arc<RValue>>> {
    if op.q_params.is_some() { ser_einsum_q(ast, node) } else { ser_einsum(ast, node) }
}

pub fn ser_einsum(ast: &mut IntoAst, node: &TypedNode) -> TractResult<Option<Arc<RValue>>> {
    let einsum = node.op_as::<EinSum>().unwrap();
    let inputs: Vec<_> = node.inputs.iter().map(|i| (*ast.mapping[i]).clone()).collect();
    Ok(Some(invocation(
        "tract_core_einsum",
        &[Arc::new(RValue::Array(inputs))],
        &[
            ("expr", string(einsum.axes.to_string())),
            ("acc", datum_type(einsum.operating_dt)),
            ("output", einsum.q_params.map(datum_type).unwrap_or_else(|| string(""))),
        ],
    )))
}

pub fn ser_einsum_q(ast: &mut IntoAst, node: &TypedNode) -> TractResult<Option<Arc<RValue>>> {
    let einsum = node.op_as::<EinSum>().unwrap();
    let inputs = node.inputs.iter().map(|i| (*ast.mapping[i]).clone()).collect_vec();
    Ok(Some(invocation(
        "tract_core_einsum_q",
        &[Arc::new(RValue::Array(vec![inputs[0].clone(), inputs[1].clone()]))],
        &[
            ("expr", string(einsum.axes.to_string())),
            ("acc", datum_type(einsum.operating_dt)),
            ("output", einsum.q_params.map(datum_type).unwrap_or_else(|| string(""))),
            ("bias", inputs[2].clone()),
            ("a0", inputs[3].clone()),
            ("a_scale", inputs[4].clone()),
            ("b0", inputs[5].clone()),
            ("b_scale", inputs[6].clone()),
            ("c0", inputs[7].clone()),
            ("c_scale", inputs[8].clone()),
        ],
    )))
}

pub fn de_einsum(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let expr = invocation.named_arg_as::<String>(builder, "expr")?.parse::<AxesMapping>()?;
    let inputs: TVec<OutletId> = invocation.named_arg_as(builder, "inputs")?;
    let operating_dt = invocation.named_arg_as::<String>(builder, "acc")?;
    let operating_dt = operating_dt.parse()?;
    let einsum = EinSum::new(expr, operating_dt);
    builder.wire(einsum, &inputs)
}

pub fn de_einsum_q(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let expr = invocation.named_arg_as::<String>(builder, "expr")?.parse::<AxesMapping>()?;
    let mut inputs: TVec<OutletId> = invocation.named_arg_as(builder, "inputs")?;
    for qp in parameters_q().iter().skip(4) {
        inputs.push(invocation.named_arg_as(builder, &qp.id.0)?);
    }
    let operating_dt = invocation.named_arg_as::<String>(builder, "acc")?;
    let operating_dt = operating_dt.parse()?;
    let output_dt = if let Some(odt) =
        invocation.get_named_arg_as::<String>(builder, "output")?.filter(|odt| odt.len() > 0)
    {
        odt.parse()?
    } else {
        bail!("Expected an output type for tract_core_einsum_q")
    };
    let einsum = EinSum::newq(expr, operating_dt, output_dt);
    builder.wire(einsum, &inputs)
}


================================================
FILE: nnef/src/ops/core/fft.rs
================================================
use crate::internal::*;
use tract_core::ops::fft::{Fft, Stft};

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_fft);
    registry.register_primitive(
        "tract_core_fft",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Logical.named("inverse"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_fft,
    );
    registry.register_dumper(ser_stft);
    registry.register_primitive(
        "tract_core_stft",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Integer.named("frame"),
            TypeName::Integer.named("stride"),
            TypeName::Scalar.tensor().named("window").default(false),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_stft,
    );
}

fn ser_fft(ast: &mut IntoAst, node: &TypedNode, op: &Fft) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_core_fft",
        &[input],
        &[("axis", numeric(op.axis)), ("inverse", logical(op.inverse))],
    )))
}

fn de_fft(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let axis: usize = invocation.named_arg_as(builder, "axis")?;
    let inverse: bool = invocation.named_arg_as(builder, "inverse")?;
    let op = Fft { axis, inverse };
    builder.wire(op, &[input])
}

fn ser_stft(ast: &mut IntoAst, node: &TypedNode, op: &Stft) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let mut named: TVec<(_, RValue)> = tvec![
        ("axis", numeric(op.axis)),
        ("frame", numeric(op.frame)),
        ("stride", numeric(op.stride)),
    ];
    if let Some(w) = &op.window {
        let w = ast.konst(format!("{}_window", node.name), w)?;
        named.push(("window", (*w).clone()));
    }
    Ok(Some(invocation("tract_core_stft", &[input], &named)))
}

fn de_stft(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let axis: usize = invocation.named_arg_as(builder, "axis")?;
    let frame: usize = invocation.named_arg_as(builder, "frame")?;
    let stride: usize = invocation.named_arg_as(builder, "stride")?;
    let window = invocation.optional_named_arg_as::<Arc<Tensor>>(builder, "window")?;
    let op = Stft { axis, frame, stride, window };
    builder.wire(op, &[input])
}


================================================
FILE: nnef/src/ops/core/gather.rs
================================================
use tract_core::ops::array::Gather;

use crate::internal::*;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_gather);
    registry.register_primitive(
        "tract_core_gather",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("indices"),
            TypeName::Integer.named("axis"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_gather,
    );

    macro_rules! gather_op_nnef {
        ($GatherOp:ty, $name:ident, $field_name:ident) => {
            mod $name {
                use crate::internal::*;

                pub fn ser_gather(
                    ast: &mut IntoAst,
                    node: &TypedNode,
                    op: &$GatherOp,
                ) -> TractResult<Option<Arc<RValue>>> {
                    let wire = ast.mapping[&node.inputs[0]].clone();
                    let indices = ast.mapping[&node.inputs[1]].clone();
                    Ok(Some(invocation(
                        concat!("tract_core_", stringify!($name)),
                        &[wire, indices],
                        &[(stringify!($field_name), numeric(op.$field_name))],
                    )))
                }

                pub fn de_gather(
                    builder: &mut ModelBuilder,
                    invocation: &ResolvedInvocation,
                ) -> TractResult<Value> {
                    let wire = invocation.named_arg_as(builder, "input")?;
                    let indices = invocation.named_arg_as(builder, "indices")?;
                    let cast_indices = builder.wire_as_outlets(
                        tract_core::ops::cast::cast(i64::datum_type()),
                        &[indices],
                    )?[0];
                    let value = invocation.named_arg_as(builder, stringify!($field_name))?;
                    builder.wire((<$GatherOp>::new(value)), &[wire, cast_indices])
                }
            }
            registry.register_dumper($name::ser_gather);
            registry.register_primitive(
                concat!("tract_core_", stringify!($name)),
                &[
                    TypeName::Scalar.tensor().named("input"),
                    TypeName::Scalar.tensor().named("indices"),
                    TypeName::Integer.named(stringify!($field_name)),
                ],
                &[("output", TypeName::Scalar.tensor())],
                $name::de_gather,
            );
        };
    }
    gather_op_nnef!(tract_core::ops::array::GatherElements, gather_elements, axis);
    gather_op_nnef!(tract_core::ops::array::GatherNd, gather_nd, batch_dims);
}

pub fn ser_gather(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &Gather,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let indices = ast.mapping[&node.inputs[1]].clone();
    let mut named_args = tvec!(("axis", numeric(op.axis)));
    if let Some(dt) = op.output_type {
        named_args.push(("datum_type", string(format!("{dt:?}"))));
    }
    Ok(Some(invocation("tract_core_gather", &[wire, indices], &named_args)))
}

pub fn de_gather(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let indices = invocation.named_arg_as(builder, "indices")?;
    let output_type: Option<DatumType> = invocation
        .optional_named_arg_as::<String>(builder, "datum_type")?
        .map(|s| s.parse())
        .transpose()?;
    let cast_indices =
        builder.wire_as_outlets(tract_core::ops::cast::cast(i64::datum_type()), &[indices])?[0];
    let axis = invocation.named_arg_as(builder, "axis")?;
    builder.wire(Gather { axis, output_type }, &[wire, cast_indices])
}


================================================
FILE: nnef/src/ops/core/gelu_approximate.rs
================================================
use crate::internal::*;
use crate::ser::*;
use std::any::TypeId;
use tract_core::ops::element_wise::ElementWiseOp;
use tract_core::ops::nn::gelu_approximate::GeluApproximate;

fn parameters() -> Vec<Parameter> {
    vec![TypeName::Scalar.tensor().named("input"), TypeName::Logical.named("fast_impl")]
}

fn dump(ast: &mut IntoAst, node: &TypedNode) -> TractResult<Option<Arc<RValue>>> {
    let op = node.op_as::<ElementWiseOp>().unwrap().0.downcast_ref::<GeluApproximate>().unwrap();
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_core_gelu_approx",
        &[input],
        &[("fast_impl", logical(op.fast_impl))],
    )))
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let fast_impl = invocation.named_arg_as(builder, "fast_impl")?;
    builder.wire(ElementWiseOp(Box::new(GeluApproximate { fast_impl }), None), &[input])
}

pub fn register(registry: &mut Registry) {
    registry.register_element_wise(
        "tract_core_gelu_approx",
        TypeId::of::<GeluApproximate>(),
        Box::new(dump),
        parameters(),
        load,
    );
    // Backward compatibility alias
    registry.register_element_wise(
        "tract_transformers_gelu_approx",
        TypeId::of::<GeluApproximate>(),
        Box::new(dump),
        parameters(),
        load,
    );
}


================================================
FILE: nnef/src/ops/core/is_inf.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::element_wise::ElementWiseOp;
use tract_core::ops::math::IsInf;

pub fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Logical.named("detect_positive").default(true),
        TypeName::Logical.named("detect_negative").default(true),
    ]
}

pub fn dump(ast: &mut IntoAst, node: &TypedNode) -> TractResult<Option<Arc<RValue>>> {
    let op = node.op_as::<ElementWiseOp>().unwrap().0.downcast_ref::<IsInf>().unwrap();
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_core_is_inf",
        &[input],
        &[
            ("detect_negative", logical(op.detect_negative)),
            ("detect_positive", logical(op.detect_positive)),
        ],
    )))
}

pub fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let detect_positive = invocation.named_arg_as(builder, "detect_positive")?;
    let detect_negative = invocation.named_arg_as(builder, "detect_negative")?;
    let op = IsInf { detect_negative, detect_positive };
    builder.wire(ElementWiseOp(Box::new(op), None), &[input])
}

pub fn register(registry: &mut Registry) {
    registry.register_element_wise(
        "tract_core_is_inf",
        TypeId::of::<IsInf>(),
        Box::new(dump),
        parameters(),
        load,
    );
}


================================================
FILE: nnef/src/ops/core/matmul.rs
================================================
use crate::internal::*;
use tract_core::ops::einsum::EinSum;

use super::qmatmul::from_legacy_axes_spec;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_core_matmul",
        &matmul_parameters(),
        &[("output", TypeName::Scalar.tensor())],
        matmul_load,
    );
}

fn matmul_parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("A"),
        TypeName::Scalar.tensor().named("B"),
        TypeName::Integer.array().named("axes"),
    ]
}

fn matmul_load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let a: OutletId = invocation.named_arg_as(builder, "A")?;
    let b: OutletId = invocation.named_arg_as(builder, "B")?;
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let fact = builder.model.outlet_fact(a)?;
    let axes = from_legacy_axes_spec(&axes, fact.rank())?;
    builder.wire(EinSum::new(axes, fact.datum_type), &[a, b])
}


================================================
FILE: nnef/src/ops/core/one_hot.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::array::OneHot;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(one_hot_dump);
    registry.register_primitive(
        "tract_core_one_hot",
        &one_hot_parameters(),
        &[("output", TypeName::Scalar.tensor())],
        one_hot_load,
    );
}

pub fn one_hot_parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Integer.named("axis"),
        TypeName::Integer.named("dim"),
        TypeName::Scalar.named("value_off").default(0.0),
        TypeName::Scalar.named("value_on").default(1.0),
    ]
}

pub fn one_hot_dump(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &OneHot,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_core_one_hot",
        &[input],
        &[
            ("axis", numeric(op.axis)),
            ("dim", numeric(op.dim)),
            ("value_off", numeric(op.off.cast_to_scalar::<f32>()?)),
            ("value_on", numeric(op.on.cast_to_scalar::<f32>()?)),
        ],
    )))
}

pub fn one_hot_load(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let dim = invocation.named_arg_as(builder, "dim")?;
    let off = invocation.named_arg_as(builder, "value_off")?;
    let on = invocation.named_arg_as(builder, "value_on")?;
    let op = OneHot { axis, dim, on, off };
    builder.wire(op, &[input])
}


================================================
FILE: nnef/src/ops/core/qconv.rs
================================================
use crate::deser::Value;
use crate::internal::*;
use crate::ops::nnef::deser::read_conv_parameters;
use crate::ops::nnef::ser::make_conv_named_args;
use crate::ser::*;
use tract_core::ops::cnn::Conv;
use tract_core::ops::cnn::KernelFormat;

use super::qmatmul::qparams_as_outlets;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(qconv_unary_dump);
    registry.register_primitive(
        "tract_core_qconv",
        &qconv_parameters(),
        &[("output", TypeName::Scalar.tensor())],
        qconv_load,
    );
}

fn qconv_parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Scalar.tensor().named("filter"),
        TypeName::Scalar.tensor().named("bias").default(0),
        TypeName::Integer.spec().named("group"),
        TypeName::Integer.array().named("dilation"),
        TypeName::Integer.array().named("stride"),
        TypeName::Integer.array().array().named("padding"),
        TypeName::String.spec().named("border"),
        TypeName::Integer.spec().named("a0"),
        TypeName::Scalar.spec().named("a_scale"),
        TypeName::Integer.spec().named("b0"),
        TypeName::Scalar.spec().named("b_scale"),
        TypeName::Integer.spec().named("c0"),
        TypeName::Scalar.spec().named("c_scale"),
    ]
}

fn qconv_unary_dump(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &Conv,
) -> TractResult<Option<Arc<RValue>>> {
    if op.q_params.is_none() || node.outputs[0].fact.datum_type.is_quantized() {
        return Ok(None);
    }
    let mut named_args = make_conv_named_args(node, &op.pool_spec, op.group, false, None)?;

    for (ix, name) in ["b0", "b_scale", "a0", "a_scale", "c0", "c_scale"].iter().enumerate() {
        named_args.push((name, (*ast.mapping[&node.inputs[3 + ix]]).clone()));
    }

    let wire = ast.mapping[&node.inputs[0]].clone();
    ensure!(op.kernel_fmt == KernelFormat::OIHW);
    let weights = ast.mapping[&node.inputs[1]].clone();
    let bias = ast.mapping[&node.inputs[2]].clone();
    let inputs = tvec![wire, weights, bias];

    Ok(Some(invocation("tract_core_qconv", &inputs, &named_args)))
}

fn qconv_load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let mut inputs: TVec<OutletId> = tvec!(invocation.named_arg_as(builder, "input")?);
    inputs.push(invocation.named_arg_as(builder, "filter")?);
    inputs.push(invocation.named_arg_as(builder, "bias")?);

    let input_fact = builder.model.outlet_fact(inputs[0])?.clone();
    let kernel_fact = builder.model.outlet_fact(inputs[1])?.clone();

    if input_fact.rank() != kernel_fact.rank() {
        bail!(
            "Convolution input expected as NCHW, filter as OIHW. Got {:?} and {:?}.",
            input_fact,
            kernel_fact
        );
    }

    let (group, pool_spec) = read_conv_parameters(
        builder,
        invocation,
        kernel_fact.shape.as_concrete().context("Expect fixed size kernel")?,
        &input_fact,
    )?;

    let mut qparams = qparams_as_outlets(builder, invocation).context("Loading qparams")?;
    qparams.swap(0, 2);
    qparams.swap(1, 3);
    inputs.extend(qparams.iter().cloned());

    let Some(c0) = &builder.model.outlet_fact(qparams[4])?.konst else {
        bail!("For quantized convolution, output quantization must be static");
    };
    let Some(c_scale) = &builder.model.outlet_fact(qparams[5])?.konst else {
        bail!("For quantized convolution, output quantization must be static");
    };
    let output_dt = input_fact.datum_type.with_qparams(QParams::ZpScale {
        zero_point: c0.cast_to_scalar()?,
        scale: c_scale.cast_to_scalar()?,
    });

    let op: Box<dyn TypedOp> =
        Box::new(Conv::new(pool_spec, KernelFormat::OIHW, group, Some(output_dt)));

    builder.wire(op, &inputs)
}


================================================
FILE: nnef/src/ops/core/qmatmul.rs
================================================
use std::str::FromStr;

use crate::internal::*;
use tract_core::ops::einsum::EinSum;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_core_qmatmul",
        &qmatmul_parameters(),
        &[("output", TypeName::Scalar.tensor())],
        qmatmul_load,
    );
}

fn qmatmul_parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("A"),
        TypeName::Scalar.tensor().named("B"),
        TypeName::Scalar.tensor().named("bias").default(0),
        TypeName::Integer.array().named("axes"),
        TypeName::Integer.spec().named("a0"),
        TypeName::Scalar.spec().named("a_scale"),
        TypeName::Integer.spec().named("b0"),
        TypeName::Scalar.spec().named("b_scale"),
        TypeName::Integer.spec().named("c0"),
        TypeName::Scalar.spec().named("c_scale"),
        TypeName::String.spec().named("output_type"),
    ]
}

pub fn qparams_as_outlets(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<TVec<OutletId>> {
    let a0: OutletId = invocation
        .named_arg_as::<OutletId>(builder, "a0")
        .or_else(|_| builder.add_const(rctensor0(0i32)))?;
    let a_scale: OutletId = invocation
        .named_arg_as::<OutletId>(builder, "a_scale")
        .or_else(|_| builder.add_const(rctensor0(1f32)))?;
    let b0: OutletId = invocation
        .named_arg_as::<OutletId>(builder, "b0")
        .or_else(|_| builder.add_const(rctensor0(0i32)))?;
    let b_scale: OutletId = invocation
        .named_arg_as::<OutletId>(builder, "b_scale")
        .or_else(|_| builder.add_const(rctensor0(1f32)))?;
    let c0: OutletId = invocation
        .named_arg_as::<OutletId>(builder, "c0")
        .or_else(|_| builder.add_const(rctensor0(0i32)))?;
    let c_scale: OutletId = invocation
        .named_arg_as::<OutletId>(builder, "c_scale")
        .or_else(|_| builder.add_const(rctensor0(1f32)))?;
    let a0 = builder.wire_as_outlets(tract_core::ops::cast::cast(i32::datum_type()), &[a0])?[0];
    let b0 = builder.wire_as_outlets(tract_core::ops::cast::cast(i32::datum_type()), &[b0])?[0];
    let c0 = builder.wire_as_outlets(tract_core::ops::cast::cast(i32::datum_type()), &[c0])?[0];
    Ok(tvec!(a0, a_scale, b0, b_scale, c0, c_scale))
}

fn qmatmul_load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let a: OutletId = invocation.named_arg_as(builder, "A")?;
    let b: OutletId = invocation.named_arg_as(builder, "B")?;
    let bias: OutletId = invocation.named_arg_as(builder, "bias")?;
    let qparams = qparams_as_outlets(builder, invocation)?;
    let inputs: Vec<OutletId> = [a, b, bias].into_iter().chain(qparams).collect();
    let c_dt = if let Some(c) = invocation.dt_from_quant_file.first().cloned().flatten() {
        c
    } else {
        DatumType::from_str(&invocation.named_arg_as::<String>(builder, "output_type")?)?
    };
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let axes = from_legacy_axes_spec(&axes, builder.model.outlet_fact(a)?.rank())?;
    builder.wire(EinSum { axes, operating_dt: i32::datum_type(), q_params: Some(c_dt) }, &inputs)
}

pub fn from_legacy_axes_spec(spec: &[usize], rank: usize) -> TractResult<AxesMapping> {
    let [a_m, a_k, b_k, b_n, c_m, c_n] = spec else { bail!("Invalid axes specification") };
    AxesMapping::disconnected_for_ranks(&[rank, rank], &[rank])?
        .renaming((InOut::In(0), *a_m), 'm')?
        .linking('m', (InOut::Out(0), *c_m))?
        .renaming((InOut::In(0), *a_k), 'k')?
        .linking('k', (InOut::In(1), *b_k))?
        .renaming((InOut::In(1), *b_n), 'n')?
        .linking('n', (InOut::In(0), *c_n))
}


================================================
FILE: nnef/src/ops/core/range.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::array::Range;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(range_dump);
    registry.register_primitive(
        "tract_core_range",
        &range_parameters(),
        &[("output", TypeName::Scalar.tensor())],
        range_load,
    );
}

fn range_parameters() -> Vec<Parameter> {
    vec![
        TypeName::Integer.named("start"),
        TypeName::Integer.named("end"),
        TypeName::Integer.named("step"),
    ]
}

fn range_dump(ast: &mut IntoAst, node: &TypedNode, _: &Range) -> TractResult<Option<Arc<RValue>>> {
    let start = ast.mapping[&node.inputs[0]].clone();
    let end = ast.mapping[&node.inputs[1]].clone();
    let step = ast.mapping[&node.inputs[2]].clone();

    Ok(Some(invocation("tract_core_range", &[start, end, step], &[])))
}

fn range_load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let start: OutletId = invocation.named_arg_as(builder, "start")?;
    let end: OutletId = invocation.named_arg_as(builder, "end")?;
    let step: OutletId = invocation.named_arg_as(builder, "step")?;

    let len = builder.model.symbols.new_with_prefix("range");
    builder.wire(Range::new(len.into()), &[start, end, step])
}


================================================
FILE: nnef/src/ops/core/reduce.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::nn::{Reduce, Reducer};

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_reduce);
    for red in &[
        "tract_core_argmax_reduce_last",
        "tract_core_argmin_reduce_last",
        "tract_core_product_reduce",
    ] {
        registry.register_primitive(
            red,
            &[TypeName::Scalar.tensor().named("input"), TypeName::Integer.array().named("axes")],
            &[("output", TypeName::Scalar.tensor())],
            de_reduce,
        );
    }
}

fn ser_reduce(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &Reduce,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let oper = match op.reducer {
        Reducer::ArgMax(last) if last => "tract_core_argmax_reduce_last",
        Reducer::ArgMin(last) if last => "tract_core_argmin_reduce_last",
        Reducer::Prod => "tract_core_product_reduce",
        _ => return Ok(None),
    };
    Ok(Some(invocation(oper, &[wire], &[("axes", ints(&op.axes))])))
}

fn de_reduce(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let reducer = match &*invocation.invocation.id.0 {
        "tract_core_argmin_reduce_last" => Reducer::ArgMin(true),
        "tract_core_argmax_reduce_last" => Reducer::ArgMax(true),
        "tract_core_product_reduce" => Reducer::Prod,
        _ => panic!(),
    };
    let axes = invocation.named_arg_as(builder, "axes")?;
    let reduce = Reduce { axes, reducer };
    builder.wire(reduce, &[wire])
}


================================================
FILE: nnef/src/ops/core/rms_norm.rs
================================================
use crate::internal::*;
use tract_core::ops::nn::RmsNorm;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_rms_norm);
    registry.register_primitive(
        "tract_core_rms_norm",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Scalar.named("eps").default(1e-6f32),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_rms_norm,
    );
    // Backward compatibility alias
    registry.register_primitive(
        "tract_transformers_rms_norm",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Scalar.named("eps").default(1e-6f32),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_rms_norm,
    );
}

fn de_rms_norm(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let axis: usize = invocation.named_arg_as(builder, "axis")?;
    let eps = invocation.named_arg_as(builder, "eps")?;
    builder.wire(RmsNorm { axis, eps }, &[input])
}

fn ser_rms_norm(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &RmsNorm,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_core_rms_norm",
        &[input],
        &[("axis", numeric(op.axis)), ("eps", numeric(op.eps.cast_to_scalar::<f32>()?))],
    )))
}


================================================
FILE: nnef/src/ops/core/scan.rs
================================================
use crate::ast;
use crate::ast::Identifier;
use crate::deser::Value;
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::scan::*;
use tract_itertools::Itertools;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_scan);
    registry.register_primitive(
        "tract_core_scan",
        &[
            TypeName::String.named("body"),
            ast::TypeSpec::Tuple(vec![
                TypeName::String.spec(),   // body param name
                TypeName::Scalar.tensor(), // input
                TypeName::Integer.spec(),  // axis
                TypeName::Integer.spec(),  // step
            ])
            .array()
            .named("scan"),
            ast::TypeSpec::Tuple(vec![
                TypeName::String.spec(),   // body param name
                TypeName::Scalar.tensor(), // input
            ])
            .array()
            .named("full"),
            ast::TypeSpec::Tuple(vec![
                TypeName::String.spec(),   // body param name
                TypeName::Scalar.tensor(), // initializer
                TypeName::String.spec(),   // body result name
            ])
            .array()
            .named("state"),
            ast::TypeSpec::Tuple(vec![
                TypeName::String.spec(),  // body param name
                TypeName::String.spec(),  // "full" or "last"
                TypeName::Integer.spec(), // axis (ignored for last)
                TypeName::Integer.spec(), // step (ignored for last)
            ])
            .array()
            .named("output"),
            TypeName::Integer.spec().named("skip").default(0), // needed for pulse
            TypeName::Integer.spec().named("reset_every_turn").default(0), // needed for pulse
        ],
        &[("outputs", TypeName::Scalar.tensor().array())],
        de_scan,
    );
}

fn ser_scan(ast: &mut IntoAst, node: &TypedNode, op: &Scan) -> TractResult<Option<Arc<RValue>>> {
    let (mut body, body_tensors) = crate::ser::to_fragment_def(ast, &op.body)?;
    body.decl.id = Identifier(format!("scan_body_{}", ast.fragments.len()));
    let mut scan = vec![];
    let mut state = vec![];
    let mut full = vec![];
    let mut outputs = vec![];
    for (slot, input) in op.input_mapping.iter().enumerate() {
        let name = string(&body.decl.parameters[slot].id.0);
        match input {
            InputMapping::Scan(info) => {
                scan.push(tuple_4(
                    name,
                    ast.mapping[&node.inputs[slot]].as_ref().clone(),
                    numeric(info.axis),
                    numeric(info.chunk),
                ));
            }
            InputMapping::State => {
                let initializer = (*ast.mapping[&node.inputs[slot]]).clone();
                let output: usize = op
                    .output_mapping
                    .iter()
                    .enumerate()
                    .filter(|(_ix, o)| o.state)
                    .nth(state.len())
                    .unwrap()
                    .0;
                state.push(tuple_3(
                    name,
                    initializer,
                    string(body.decl.results[output].id.clone()),
                ));
            }
            InputMapping::Full => {
                full.push(tuple_2(name, ast.mapping[&node.inputs[slot]].as_ref().clone()))
            }
        }
    }
    for tensor in body_tensors.iter().sorted_by_key(|t| &t.label) {
        let t = ast.konst_variable(&tensor.label, &tensor.value)?;
        full.push(tuple_2(string(&tensor.parameter_id), t.as_ref().clone()));
    }
    for slot in 0..node.outputs.len() {
        if let Some((r_ix, om)) = op
            .output_mapping
            .iter()
            .enumerate()
            .find(|(_ix, om)| om.scan.map(|s| s.0) == Some(slot))
        {
            outputs.push(tuple_4(
                string(body.decl.results[r_ix].id.clone()),
                string("full"),
                numeric(om.scan.unwrap().1.axis),
                numeric(om.scan.unwrap().1.chunk),
            ));
        } else if let Some((r_ix, _om)) =
            op.output_mapping.iter().enumerate().find(|(_ix, om)| om.last_value_slot == Some(slot))
        {
            outputs.push(tuple_4(
                string(body.decl.results[r_ix].id.clone()),
                string("last"),
                numeric(0),
                numeric(1),
            ));
        } else {
            bail!("output {} is unbound", slot);
        };
    }
    let invoke = invocation(
        "tract_core_scan",
        &[],
        &[
            ("body", string(&body.decl.id)),
            ("scan", array(scan)),
            ("full", array(full)),
            ("state", array(state)),
            ("output", array(outputs)),
            ("skip", numeric(op.skip)),
            ("reset_every_turn", numeric(op.reset_every_turn)),
        ],
    );
    ast.fragments.insert(body.decl.id.clone(), body);
    Ok(Some(invoke))
}

fn de_scan(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let fragment_name: String = invocation.named_arg_as(builder, "body")?;
    let fragment = builder
        .proto_model
        .doc
        .fragments
        .iter()
        .find(|n| n.decl.id.0 == fragment_name)
        .ok_or_else(|| format_err!("Cound not find fragment `{}'", fragment_name))?;
    let template = TypedModel { symbols: builder.model.symbols.clone(), ..TypedModel::default() };
    let mut body = ModelBuilder::new(builder.framework, builder.proto_model, template);
    body.scopes.push(HashMap::new());
    body.naming_scopes.clone_from(&builder.naming_scopes);
    body.registries.clone_from(&builder.registries);
    let mut outer_inputs: TVec<OutletId> = tvec!();
    let mut input_mapping = vec![];
    let scan: TVec<(String, OutletId, usize, isize)> = invocation.named_arg_as(builder, "scan")?;
    let full: TVec<(String, OutletId)> = invocation.named_arg_as(builder, "full")?;
    let state: TVec<(String, OutletId, String)> = invocation.named_arg_as(builder, "state")?;
    for par in &fragment.decl.parameters {
        let (outer_input_wire, inner_fact) = if let Some((_, wire, axis, chunk)) =
            scan.iter().find(|s| s.0 == par.id.0 || escape(&s.0) == par.id.0)
        {
            input_mapping.push(InputMapping::Scan(ScanInfo { axis: *axis, chunk: *chunk }));
            let mut fact = builder.model.outlet_fact(*wire)?.clone();
            fact.shape.set(*axis, chunk.abs().to_dim());
            (*wire, fact)
        } else if let Some((_, wire)) =
            full.iter().find(|s| s.0 == par.id.0 || escape(&s.0) == par.id.0)
        {
            input_mapping.push(InputMapping::Full);
            let fact = builder.model.outlet_fact(*wire)?.clone();
            (*wire, fact)
        } else if let Some((_, wire, _out)) =
            state.iter().find(|s| s.0 == par.id.0 || escape(&s.0) == par.id.0)
        {
            let fact = builder.model.outlet_fact(*wire)?.clone();
            input_mapping.push(InputMapping::State);
            (*wire, fact.datum_type.fact(fact.shape))
        } else {
            bail!("Unbound body input parameter {}", par.id.0);
        };
        outer_inputs.push(outer_input_wire);
        body.scopes.last_mut().unwrap().insert(
            par.id.clone(),
            Value::Wire(body.model.add_source(par.id.0.to_string(), inner_fact)?),
        );
    }
    body.wire_body(fragment.body.as_deref().unwrap()).context("wiring scan body")?;
    let body_outputs = fragment
        .decl
        .results
        .iter()
        .map(|r| {
            body.scopes.last().unwrap().get(&r.id).with_context(|| {
                format!("Could not find variable for scan output named `{}'", r.id.0)
            })
        })
        .collect::<TractResult<Vec<&Value>>>()
        .context("Finding output in body")?;

    let body_outputs: Vec<OutletId> = body_outputs
        .iter()
        .map(|v| v.to::<OutletId>(builder))
        .collect::<TractResult<Vec<OutletId>>>()
        .context("Coercing outputs to wires")?;
    body.model.select_output_outlets(&body_outputs)?;
    // preferred form for output is 4 arguments, but early models had 3 arguments output,
    // breaking support for bidirectional
    // this awkward dance somewhat maintains compatibility
    let outputs: TVec<(String, String, usize, isize)> = if let Ok(tuples_4) =
        invocation.named_arg_as(builder, "output")
    {
        tuples_4
    } else {
        let outputs: TVec<(String, String, usize)> = invocation.named_arg_as(builder, "output")?;
        outputs.into_iter().map(|(a, b, c)| (a, b, c, 1)).collect()
    };
    for output in &outputs {
        if output.1 != "full" && output.1 != "last" {
            bail!(
                "output named `{}' must specify type \"full\" or \"last\", found `{}'",
                output.0,
                output.1
            )
        }
    }
    let mut output_mapping = vec![];
    for output_name in fragment.decl.results.iter().map(|o| &*o.id.0) {
        output_mapping.push(OutputMapping {
            full_dim_hint: None,
            scan: outputs
                .iter()
                .enumerate()
                .find(|(_, om)| {
                    (om.0 == output_name || escape(&om.0) == output_name) && om.1 == "full"
                })
                .map(|(ix, om)| (ix, ScanInfo { axis: om.2, chunk: om.3 })),
            last_value_slot: outputs
                .iter()
                .enumerate()
                .find(|(_, om)| {
                    (om.0 == output_name || escape(&om.0) == output_name) && om.1 == "last"
                })
                .map(|(ix, _om)| ix),
            state: state
                .iter()
                .any(|state| state.2 == output_name || escape(&state.2) == output_name),
        });
    }
    let skip: usize = invocation.named_arg_as(builder, "skip")?;
    let mut op = Scan::new(body.model, input_mapping, output_mapping, skip)?;
    op.reset_every_turn = invocation.named_arg_as(builder, "reset_every_turn")?;
    builder.wire(op, &outer_inputs)
}

fn escape(it: &str) -> String {
    let mut escaped = String::new();
    let first = it.chars().next().unwrap();
    if !(first.is_alphabetic() || first == '_') {
        escaped.push('_');
    }
    escaped.extend(it.chars().map(|c| if c.is_alphanumeric() { c } else { '_' }));
    escaped
}


================================================
FILE: nnef/src/ops/core/scatter.rs
================================================
use crate::internal::*;
use tract_core::ops::array::ScatterElements;
use tract_core::ops::array::ScatterNd;
use tract_core::ops::array::ScatterReduction;

pub fn register(registry: &mut Registry) {
    use crate::internal::*;

    registry.register_dumper(ser_scatter_elements);
    registry.register_primitive(
        "tract_core_scatter_elements",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("indices"),
            TypeName::Scalar.tensor().named("updates"),
            TypeName::Integer.named("axis"),
            TypeName::String.named("reduction").default("none"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_scatter_elements,
    );

    registry.register_dumper(ser_scatter_nd);
    registry.register_primitive(
        "tract_core_scatter_nd",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("indices"),
            TypeName::Scalar.tensor().named("updates"),
            TypeName::String.named("reduction").default("none"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_scatter_nd,
    );
}

fn ser_scatter_nd(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ScatterNd,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let indices = ast.mapping[&node.inputs[1]].clone();
    let updates = ast.mapping[&node.inputs[2]].clone();
    Ok(Some(invocation(
        "tract_core_scatter_nd",
        &[wire, indices, updates],
        &[("reduction", string(op.reduction.as_str()))],
    )))
}

fn de_scatter_nd(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let indices = invocation.named_arg_as(builder, "indices")?;
    let updates = invocation.named_arg_as(builder, "updates")?;
    let reduction: String = invocation.named_arg_as(builder, "reduction")?;
    builder.wire(ScatterNd::new(ScatterReduction::parse(&reduction)?), &[wire, indices, updates])
}

fn ser_scatter_elements(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ScatterElements,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let indices = ast.mapping[&node.inputs[1]].clone();
    let updates = ast.mapping[&node.inputs[2]].clone();
    Ok(Some(invocation(
        "tract_core_scatter_elements",
        &[wire, indices, updates],
        &[("axis", numeric(op.axis)), ("reduction", string(op.reduction.as_str()))],
    )))
}

fn de_scatter_elements(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let indices = invocation.named_arg_as(builder, "indices")?;
    let updates = invocation.named_arg_as(builder, "updates")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let reduction: String = invocation.named_arg_as(builder, "reduction")?;
    builder.wire(
        ScatterElements::new(axis, ScatterReduction::parse(&reduction)?),
        &[wire, indices, updates],
    )
}


================================================
FILE: nnef/src/ops/core/shape_of.rs
================================================
use crate::internal::*;

pub fn register(registry: &mut Registry) {
    // No serialization is done since: operation follow ONNX design:
    // At deserialization we wire it to a constant used by tract.
    // This make the operation serialization/deserialization non-symmetric
    registry.register_primitive(
        "tract_core_shape_of",
        &[TypeName::Scalar.tensor().named("input")],
        &[("output", TypeName::Integer.tensor())],
        de_shape_of,
    );
}

fn de_shape_of(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let shape = tensor1(&builder.model.outlet_fact(input)?.shape.to_tvec());
    let wire = builder.add_const(shape)?;
    Ok(Value::Wire(wire))
}


================================================
FILE: nnef/src/ops/core/silu.rs
================================================
use crate::internal::*;
use tract_core::ops::nn::silu::Silu;

pub fn register(registry: &mut Registry) {
    registry.register_unit_element_wise("tract_core_silu", &Silu {});
    // Backward compatibility alias
    registry.register_unit_element_wise("tract_transformers_silu", &Silu {});
}


================================================
FILE: nnef/src/ops/core/softmax.rs
================================================
use tract_core::ops::nn::{Softmax, SoftmaxExp, SoftmaxKind};

use crate::{internal::*, ser::ints};

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_softmax);
    registry.register_primitive(
        "tract_core_softmax",
        &[
            TypeName::Scalar.tensor().named("x"),
            TypeName::Integer.tensor().named("axes"),
            TypeName::String.named("exp"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        deser_softmax,
    );
    registry.register_primitive(
        "tract_core_log_softmax",
        &[TypeName::Scalar.tensor().named("x"), TypeName::Integer.tensor().named("axes")],
        &[("output", TypeName::Scalar.tensor())],
        deser_log_softmax,
    );
}

pub fn deser_softmax(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let x = invocation.named_arg_as(builder, "x")?;
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;

    let input_fact = builder.model.outlet_fact(x)?.clone();
    let quant_output_dt = if input_fact.datum_type.is_float() {
        None
    } else {
        invocation.dt_from_quant_file.first().cloned().flatten()
    };

    let exp: Option<String> = invocation.get_named_arg_as(builder, "exp")?;
    let exp = match exp.as_deref() {
        Some("fast_compact") => SoftmaxExp::FastCompact,
        _ => SoftmaxExp::Libc,
    };

    builder.wire(Softmax { axes, quant_output_dt, kind: SoftmaxKind::Softmax(exp) }, &[x])
}

pub fn deser_log_softmax(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let x = invocation.named_arg_as(builder, "x")?;
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;

    let input_fact = builder.model.outlet_fact(x)?.clone();
    let quant_output_dt = if input_fact.datum_type.is_float() {
        None
    } else {
        invocation.dt_from_quant_file.first().cloned().flatten()
    };

    builder.wire(Softmax { axes, quant_output_dt, kind: SoftmaxKind::LogSoftmax }, &[x])
}

fn ser_softmax(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &Softmax,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let mut args = vec![("axes", ints(&op.axes))];
    let op_name = match op.kind {
        SoftmaxKind::Softmax(exp) => {
            if exp == SoftmaxExp::FastCompact {
                args.push(("exp", string("fast_compact")))
            } else {
                return Ok(None);
            };
            "tract_core_softmax"
        }
        SoftmaxKind::LogSoftmax => "tract_core_log_softmax",
    };
    Ok(Some(invocation(op_name, &[wire], &args)))
}


================================================
FILE: nnef/src/ops/core/source.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::source::TypedSource;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(external_dump);
    registry.register_primitive(
        "tract_core_external",
        &external_parameters(),
        &[("output", TypeName::Any.tensor())],
        external_load,
    );
}

fn external_dump(
    _ast: &mut IntoAst,
    _node: &TypedNode,
    op: &TypedSource,
) -> TractResult<Option<Arc<RValue>>> {
    let shape = tdims(&op.fact.shape);
    Ok(Some(invocation(
        "tract_core_external",
        &[],
        &[
            ("shape", shape),
            ("datum_type", string(format!("{:?}", op.fact.datum_type.unquantized()))),
        ],
    )))
}

fn external_parameters() -> Vec<Parameter> {
    vec![TypeName::String.named("datum_type"), TypeName::Integer.array().named("shape")]
}

fn external_load(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let shape: TVec<TDim> =
        builder.allowing_new_symbols(|builder| invocation.named_arg_as(builder, "shape"))?;
    let mut dt: DatumType = invocation.named_arg_as::<String>(builder, "datum_type")?.parse()?;
    if let Some(Some(qdt)) = invocation.dt_from_quant_file.first() {
        dt = *qdt;
    }
    Ok(Value::Wire(builder.model.add_source("", dt.fact(&*shape))?))
}


================================================
FILE: nnef/src/ops/core/submodel.rs
================================================
use tract_core::ops::submodel::SubmodelOp;

use crate::internal::*;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_submodel);
    registry.register_primitive(
        "tract_core_submodel",
        &[TypeName::Scalar.tensor().array().named("input"), TypeName::String.named("label")],
        &[("outputs", TypeName::Any.tensor().array())],
        de_submodel,
    );
}

fn de_submodel(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wires: TVec<OutletId> = invocation.named_arg_as(builder, "input")?;
    let label: String = invocation.named_arg_as(builder, "label")?;
    let model: TypedModel = builder
        .proto_model
        .resources
        .get(label.as_str())
        .with_context(|| anyhow!("{} not found in model builder loaded resources", label.as_str()))?
        .clone()
        .downcast_arc::<TypedModelResource>()
        .map_err(|_| anyhow!("Error while downcasting typed model resource"))
        .map(|r| r.0.clone())
        .with_context(|| anyhow!("Error while loading typed model resource"))?;

    let op: Box<dyn TypedOp> = Box::new(SubmodelOp::new(Box::new(model), &label)?);

    builder.model.wire_node(label, op, &wires).map(Value::from)
}

fn ser_submodel(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &SubmodelOp,
) -> TractResult<Option<Arc<RValue>>> {
    let input = tvec![ast.mapping[&node.inputs[0]].clone()];
    let invoke = invocation("tract_core_submodel", &input, &[("label", string(op.label()))]);
    ast.resources.insert(op.label().to_string(), Arc::new(TypedModelResource(op.model().clone())));
    Ok(Some(invoke))
}


================================================
FILE: nnef/src/ops/core/topk.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::array::Topk;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_topk);
    registry.register_primitive(
        "tract_core_topk",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.tensor().named("k"),
            TypeName::Integer.named("axis"),
            TypeName::Logical.named("largest"),
        ],
        &[("values", TypeName::Scalar.tensor()), ("indices", TypeName::Integer.tensor())],
        de_topk,
    );
}

fn ser_topk(ast: &mut IntoAst, node: &TypedNode, op: &Topk) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let k = ast.mapping[&node.inputs[1]].clone();
    Ok(Some(invocation(
        "tract_core_topk",
        &[input, k],
        &[("largest", logical(op.largest)), ("axis", numeric(op.axis))],
    )))
}

fn de_topk(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let k = invocation.named_arg_as(builder, "k")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let largest = invocation.named_arg_as(builder, "largest")?;
    let fallback_k = builder.model.symbols.new_with_prefix("k").into();
    builder.wire(Topk { largest, fallback_k, axis }, &[input, k])
}


================================================
FILE: nnef/src/ops/core/trilu.rs
================================================
use crate::internal::*;
use crate::ser::*;
use tract_core::ops::array::Trilu;
use tract_core::ops::cast::cast;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_trilu);
    registry.register_primitive(
        "tract_core_trilu",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.tensor().named("k"),
            TypeName::Logical.named("upper"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_trilu,
    );
}

fn ser_trilu(ast: &mut IntoAst, node: &TypedNode, op: &Trilu) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let k = ast.mapping[&node.inputs[1]].clone();
    Ok(Some(invocation("tract_core_trilu", &[input, k], &[("upper", logical(op.upper))])))
}

fn de_trilu(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let k = invocation.named_arg_as(builder, "k")?;
    let upper = invocation.named_arg_as(builder, "upper")?;
    let k_casted = builder.wire_as_outlets(cast(DatumType::I64), &[k])?[0];
    builder.wire(Trilu { upper }, &[input, k_casted])
}


================================================
FILE: nnef/src/ops/core.rs
================================================
use crate::internal::*;
use tract_core::ops;

mod broadcast;
mod cast;
#[cfg(feature = "complex")]
mod complex;
mod downsample;
mod dyn_slice;
mod einsum;
mod fft;
mod gather;
mod gelu_approximate;
mod is_inf;
mod matmul;
mod one_hot;
mod qconv;
mod qmatmul;
mod range;
mod reduce;
mod rms_norm;
mod scan;
mod scatter;
mod shape_of;
mod silu;
mod softmax;
mod source;
mod submodel;
mod topk;
mod trilu;

pub fn register(registry: &mut Registry) {
    registry.register_unit_element_wise("tract_core_round_even", &ops::math::RoundHalfToEven {});
    registry.register_unit_element_wise("tract_core_erf", &ops::math::Erf {});
    registry.register_unit_element_wise("tract_core_hard_swish", &ops::nn::HardSwish {});

    registry.register_binary("tract_core_xor", &ops::logic::Xor {});
    registry.register_binary("tract_core_bitand", &ops::logic::BitAnd {});
    registry.register_binary("tract_core_bitor", &ops::logic::BitOr {});
    registry.register_binary("tract_core_bitxor", &ops::logic::BitXor {});
    registry.register_unit_element_wise("tract_core_bitnot", &ops::logic::BitNot {});

    registry.register_unit_element_wise("tract_core_is_nan", &ops::math::IsNan {});

    registry.register_binary("tract_shl", &ops::math::ShiftLeft);
    registry.register_binary("tract_shr", &ops::math::ShiftRight);
    broadcast::register(registry);
    cast::register(registry);
    #[cfg(feature = "complex")]
    complex::register(registry);
    downsample::register(registry);
    dyn_slice::register(registry);
    einsum::register(registry);
    fft::register(registry);
    gather::register(registry);
    gelu_approximate::register(registry);
    matmul::register(registry);
    one_hot::register(registry);
    qconv::register(registry);
    qmatmul::register(registry);
    reduce::register(registry);
    rms_norm::register(registry);
    scan::register(registry);
    scatter::register(registry);
    shape_of::register(registry);
    silu::register(registry);
    softmax::register(registry);
    source::register(registry);
    submodel::register(registry);
    range::register(registry);
    topk::register(registry);
    trilu::register(registry);
    is_inf::register(registry);
}


================================================
FILE: nnef/src/ops/mod.rs
================================================
use crate::internal::*;

pub(super) mod core;
pub mod nnef;
pub(super) mod resource;

pub use nnef::tract_nnef;

pub fn tract_core() -> Registry {
    let mut reg = Registry::new("tract_core")
        .with_doc("Extension `tract_core` exposes NNEF fragments for using")
        .with_doc("operator defined by tract-core crate.")
        .with_doc("")
        .with_doc("Add `extension tract_core` to `graph.nnef`");
    core::register(&mut reg);
    reg
}

pub fn tract_resource() -> Registry {
    let mut reg = Registry::new("tract_resource")
        .with_doc("Extension `tract_resource` exposes NNEF fragments for accessing")
        .with_doc("resources files in NNEF folder or archive.")
        .with_doc("")
        .with_doc("Add `extension tract_resource` to `graph.nnef`");
    resource::register(&mut reg);
    reg
}


================================================
FILE: nnef/src/ops/nnef/deser.rs
================================================
use crate::ast::*;
use crate::deser::Value;
use crate::ops::tract_core;
use crate::tract_ndarray::Array;

use ops::cnn::deconv::Deconv;
use ops::cnn::{Conv, KernelFormat};
use tract_core::internal::*;
use tract_core::ops::array::{PadMode, TypedConcat};
use tract_core::ops::cast::cast;
use tract_core::ops::cnn::PaddingSpec;
use tract_core::ops::cnn::PoolSpec;
use tract_core::ops::cnn::deconv::adjustments;
use tract_core::ops::einsum::block_quant_aware_input_shape;
use tract_core::ops::konst::Const;
use tract_core::ops::math::min;
use tract_core::ops::nn::{DataFormat, Softmax, SoftmaxKind};
use tract_core::tract_linalg::block_quant::{BlockQuantFact, BlockQuantStorage};
use tract_itertools::Itertools;

use tract_core::ops;

use crate::deser::{ModelBuilder, ResolvedInvocation};

fn convert_to_shape_input(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
    name: &str,
) -> TractResult<Value> {
    if let Ok(tensor) = invocation.named_arg_as::<Arc<Tensor>>(builder, name) {
        return Ok(Value::Tensor(tensor.cast_to::<TDim>()?.into_owned().into_arc_tensor()));
    }
    if let Ok(bits) = invocation.named_arg_as::<TVec<OutletId>>(builder, name) {
        let concat_input = bits
            .into_iter()
            .map(|mut bit| {
                let fact = builder.model.outlet_fact(bit)?.to_owned();
                if fact.rank() != 1 {
                    bit = builder.wire_as_outlets(
                        AxisOp::Reshape(0, fact.shape.to_tvec(), tvec![fact.shape.volume()]),
                        &[bit],
                    )?[0];
                }
                if !fact.datum_type.is_tdim() {
                    bit = builder.wire_as_outlets(cast(TDim::datum_type()), &[bit])?[0];
                }
                Ok(bit)
            })
            .collect::<TractResult<TVec<OutletId>>>()?;
        return builder.wire(TypedConcat::new(0), &concat_input);
    }
    bail!("Argument '{}' only support tensor or list of integers", name);
}

// fragment external<? = scalar>( shape: integer[] ) -> ( output: tensor<?> );
pub fn external(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let type_name = invocation.invocation.generic_type_name.unwrap_or(TypeName::Scalar);
    let dt = if let Some(Some(dt)) = invocation.dt_from_quant_file.first() {
        *dt
    } else if type_name == TypeName::Scalar {
        f32::datum_type()
    } else if type_name == TypeName::Logical {
        bool::datum_type()
    } else if type_name == TypeName::Integer {
        i64::datum_type()
    } else {
        todo!()
    };
    let shape: TVec<TDim> =
        builder.allowing_new_symbols(|builder| invocation.named_arg_as(builder, "shape"))?;
    Ok(Value::Wire(builder.model.add_source("", dt.fact(&shape))?))
}

// fragment variable<? = scalar>( shape: integer[], label: string ) -> ( output: tensor<?> );
pub fn variable(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let shape: TVec<usize> = invocation.named_arg_as(builder, "shape")?;
    let label = Identifier(invocation.named_arg_as(builder, "label")?);
    let tensors = &builder.proto_model.tensors;
    let mut tensor = Arc::clone(
        tensors
            .get(&label)
            .or_else(|| tensors.get(&Identifier(label.0.trim_start_matches('/').to_owned())))
            .ok_or_else(|| format_err!("No data for tensor {:?}", label))?,
    );
    if let Some(Some(dt)) = invocation.dt_from_quant_file.first() {
        if dt.size_of() != tensor.datum_type().size_of() {
            bail!(
                "Mismatched tensor type for tensor {}: expected {:?}, got {:?}",
                label.0,
                *dt,
                tensor.datum_type()
            );
        }
        if *dt != tensor.datum_type() {
            trace!(
                "Casting tensor {} from {:?} to {:?} when deserializing",
                label.0,
                tensor.datum_type(),
                *dt
            );
            //FIXME: avoid cast by late-loading tensors ?
            tensor = tensor.cast_to_dt(*dt)?.into_owned().into_arc_tensor()
        }
    }
    if let Some(bqs) = tensor.storage_as::<BlockQuantStorage>() {
        // Use the NNEF variable shape directly — the graph's own unsqueeze
        // ops will add any group dims as needed.
        let tensor =
            bqs.clone().into_tensor_with_shape(tensor.datum_type(), &shape).into_arc_tensor();
        let fact: Box<dyn ExoticFact> = Box::new(BlockQuantFact::new(
            tract_core::dyn_clone::clone_box(bqs.format()),
            shape.clone(),
        ));
        builder.wire(Const::new_with_exotic_fact(tensor, fact)?, &[])
    } else {
        ensure!(
            tensor.shape() == &*shape,
            "Wrong shape for tensor: {:?}, tensor file says {:?}, graph files says {:?}",
            label,
            tensor.shape(),
            shape
        );
        builder.wire(Const::new(tensor)?, &[])
    }
}

// fragment reshape<?>( input: tensor<?>, shape: integer[], axis_start: integer = 0, axis_count: integer = -1 )
//      -> ( output: tensor<?> );
pub fn reshape(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let input_shape = builder.model.outlet_fact(input)?.shape.to_tvec();
    let start: usize = invocation.named_arg_as(builder, "axis_start")?;
    let count: i64 = invocation.named_arg_as(builder, "axis_count")?;
    let count = if count == -1 { input_shape.len() - start } else { count as usize };
    let replacement =
        convert_to_shape_input(builder, invocation, "shape")?.to::<Arc<Tensor>>(builder)?;
    let mut replacement: TVec<TDim> = replacement.try_as_plain()?.as_slice::<TDim>()?.into();
    for i in 0..replacement.len() {
        if replacement[i] == 0.to_dim() {
            replacement[i] = input_shape[i + start].clone();
        }
    }
    if let Some(pos) = replacement.iter().position(|d| *d == (-1).to_dim()) {
        let product: TDim = replacement.iter().filter(|d| **d != (-1).to_dim()).product();
        let product_input: TDim = input_shape[start..][..count].iter().product();
        replacement[pos] = product_input.maybe_div(&product)?.0;
    }

    let op = AxisOp::Reshape(start, input_shape[start..][..count].into(), replacement);
    builder.wire(op, &[input])
}

// fragment transpose<?>( input: tensor<?>, axes: integer[] ) -> ( output: tensor<?> );
pub fn transpose(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let wire = tvec!(invocation.named_arg_as(builder, "input")?);
    ops::change_axes::perm_to_ops(&axes)
        .into_iter()
        .try_fold(wire, |wire, mov| builder.wire_as_outlets(mov, &wire))
        .map(Value::from)
}

// fragment concat<?>( values: tensor<?>[], axis: integer ) -> ( value: tensor<?> );
pub fn concat(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let axis: usize = invocation.named_arg_as(builder, "axis")?;
    let mut values: TVec<OutletId> = invocation.named_arg_as(builder, "values")?;
    let dt = if let Some(dt) = invocation.dt_from_quant_file.first().and_then(|it| *it) {
        dt
    } else {
        builder.model.outlet_fact(values[0])?.datum_type
    };
    for value in &mut values {
        if builder.model.outlet_fact(*value)?.datum_type != dt {
            *value = builder.wire_as_outlets(ops::cast::cast(dt), &[*value])?[0];
        }
    }

    builder.wire(ops::array::TypedConcat::new(axis), &values)
}

// fragment slice<?>( input: tensor<?>, axes: integer[], begin: integer[], end: integer[] ) -> ( output: tensor<?> );
pub fn slice(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let mut wire = tvec!(invocation.named_arg_as(builder, "input")?);
    let input_fact = builder.model.outlet_fact(wire[0])?.clone();
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let (begins, ends): (OutletId, OutletId) =
        builder.allowing_new_symbols(|builder| -> TractResult<_> {
            Ok((
                invocation.named_arg_as(builder, "begin")?,
                invocation.named_arg_as(builder, "end")?,
            ))
        })?;
    ensure!(builder.model.outlet_fact(begins)?.rank() == 1);
    ensure!(builder.model.outlet_fact(ends)?.rank() == 1);
    let strides: TVec<isize> =
        invocation.named_arg_as(builder, "stride").unwrap_or_else(|_| tvec!(1; axes.len()));
    for (ix, axis) in axes.into_iter().enumerate() {
        let axis_len = builder
            .wire_as_outlets(Const::new(rctensor0(input_fact.shape[axis].clone()))?, &[])?[0];
        let b = builder.wire_as_outlets(
            tract_core::ops::array::Slice { axis: 0, start: ix.into(), end: ix.to_dim() + 1 },
            &[begins],
        )?;
        let mut b = builder.wire_as_outlets(tract_core::ops::change_axes::AxisOp::Rm(0), &b)?[0];
        b = builder.wire_as_outlets(cast(TDim::datum_type()), &[b])?[0];
        b = builder.wire_as_outlets(min(), &[b, axis_len])?[0];
        if let Some(k) = &builder.model.outlet_fact(b)?.konst
            && let Ok(i) = k.cast_to_scalar::<i64>()
            && i < 0
        {
            b = builder
                .wire_as_outlets(Const::new(rctensor0(input_fact.shape[axis].clone() + i))?, &[])?
                [0];
        }
        let e = builder.wire_as_outlets(
            tract_core::ops::array::Slice { axis: 0, start: ix.into(), end: ix.to_dim() + 1 },
            &[ends],
        )?;
        let mut e = builder.wire_as_outlets(tract_core::ops::change_axes::AxisOp::Rm(0), &e)?[0];
        e = builder.wire_as_outlets(cast(TDim::datum_type()), &[e])?[0];
        e = builder.wire_as_outlets(min(), &[e, axis_len])?[0];
        // use "<=", no "<" end[axis] = 0 means "up to the end"
        // CAUTION: this notation is 1/ deprecated 2/ invalid with non trivial slicing
        if let Some(k) = &builder.model.outlet_fact(e)?.konst
            && let Ok(i) = k.cast_to_scalar::<i64>()
            && i <= 0
        {
            e = builder
                .wire_as_outlets(Const::new(rctensor0(input_fact.shape[axis].clone() + i))?, &[])?
                [0];
        }
        let len = if let (Some(ev), Some(bv)) =
            (&builder.model.outlet_fact(e)?.konst, &builder.model.outlet_fact(b)?.konst)
        {
            ev.cast_to::<TDim>()?.try_as_plain()?.to_scalar::<TDim>()?.clone()
                - bv.cast_to::<TDim>()?.try_as_plain()?.to_scalar::<TDim>()?
        } else {
            let s = builder.model.symbols.new_with_prefix("slice");
            builder.model.symbols.add_assertion(format!("{s} >= 0")).ok();
            s.into()
        };
        wire = builder
            .wire_as_outlets(tract_core::ops::array::DynSlice { axis, len }, &[wire[0], b, e])?;
        if strides[ix] != 1 {
            wire = builder.wire_as_outlets(
                tract_core::ops::downsample::Downsample::new(axis, strides[ix], 0),
                &wire,
            )?;
        }
    }
    Ok(wire.into())
}

// fragment squeeze<?>( input: tensor<?>, axes: integer[] ) -> ( output: tensor<?> );
pub fn squeeze(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let wire = tvec!(invocation.named_arg_as(builder, "input")?);
    axes.iter()
        .sorted()
        .rev()
        .try_fold(wire, |wire, &axis| {
            builder.wire_as_outlets(ops::change_axes::AxisOp::Rm(axis), &wire)
        })
        .map(Value::from)
}

// fragment unsqueeze<?>( input: tensor<?>, axes: integer[] ) -> ( output: tensor<?> );
pub fn unsqueeze(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let wire = tvec!(invocation.named_arg_as(builder, "input")?);
    axes.iter()
        .sorted()
        .try_fold(wire, |wire, &axis| {
            builder.wire_as_outlets(ops::change_axes::AxisOp::Add(axis), &wire)
        })
        .map(Value::from)
}

// fragment tile<?>( input: tensor<?>, repeats: integer[] ) -> ( output: tensor<?> );
pub fn tile(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let multipliers =
        convert_to_shape_input(builder, invocation, "repeats")?.to::<OutletId>(builder)?;
    let rank = builder.model.outlet_fact(wire)?.rank();
    ensure!(builder.model.outlet_fact(multipliers)?.rank() == 1);
    ensure!(builder.model.outlet_fact(multipliers)?.shape[0] == rank.to_dim());
    builder.wire(ops::array::DynTile::new(&builder.model.symbols, rank), &[wire, multipliers])
}

pub fn pad_mode(border: &str, value: Tensor) -> TractResult<tract_core::ops::array::PadMode> {
    Ok(match border {
        "constant" => PadMode::Constant(value.into_arc_tensor()),
        "replicated" => PadMode::Edge,
        "reflect" => PadMode::Reflect,
        _ => bail!("unsupported padding mode {}", border),
    })
}

// fragment pad( input: tensor<scalar>, padding: (integer, integer)[], border: string = 'constant', value: scalar = 0.0 ) -> ( output: tensor<scalar> );
pub fn pad(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    use tract_core::ops::array::Pad;
    let wire = tvec!(invocation.named_arg_as(builder, "input")?);
    let padding: TVec<TVec<usize>> = invocation.named_arg_as(builder, "padding")?;
    let padding: Vec<(usize, usize)> = padding.iter().map(|a| (a[0], a[1])).collect();
    let value: Tensor = tensor0(invocation.named_arg_as::<f32>(builder, "value")?);
    let border: String = invocation.named_arg_as(builder, "border")?;
    let mode = pad_mode(&border, value)?;
    builder.wire(Pad { pads: padding, mode }, &wire)
}

/*
fragment conv( input: tensor<scalar>, filter: tensor<scalar>,
bias: tensor<scalar> = 0.0, border: string = 'constant',
padding: (integer,integer)[] = [], stride: integer[] = [],
dilation: integer[] = [], groups: integer = 1 )
-> ( output: tensor<scalar> );
*/

/*  fragment deconv(
input: tensor<scalar>,
filter: tensor<scalar>,
bias: tensor<scalar> = 0.0,
border: string = 'constant',
padding: (integer,integer)[] = [],
stride: integer[] = [],
dilation: integer[] = [],
output_shape: integer[] = [],
groups: integer = 1 )
-> ( output: tensor<scalar> );
*/

pub fn conv(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    conv_like(builder, invocation, ConvLikeVariant::Conv)
}

pub fn deconv(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    conv_like(builder, invocation, ConvLikeVariant::Deconv)
}

/// Debox equivalent to deconv with a fixed all-one kernel, no bias, and an extra two unsqueeze at input and
/// output. This implementation is not optimal, since it add useless matmul and reshape memory.
pub fn debox(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    conv_like(builder, invocation, ConvLikeVariant::Debox)
}

pub fn read_conv_parameters(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
    kernel_shape: &[usize],
    input_fact: &TypedFact,
) -> TractResult<(usize, PoolSpec)> {
    let mut group = invocation.named_arg_as(builder, "groups").unwrap_or(0);
    if group == 0 {
        group = kernel_shape[0]
    }
    if input_fact.shape[1] != kernel_shape[1].to_dim() * group {
        bail!(
            "Convolution input and kernel channels (second axis in both) must match. Got {:?} and {:?}.",
            input_fact,
            kernel_shape
        );
    }
    let dilation: TVec<usize> = invocation.named_arg_as(builder, "dilation")?;
    if dilation.len() != 0 && dilation.len() != input_fact.rank() - 2 {
        bail!(
            "Convolution dilation only apply to spatial dimensions, so it should be of rank {}. Got {:?}",
            input_fact.rank() - 2,
            dilation
        )
    }
    let stride: TVec<usize> = invocation.named_arg_as(builder, "stride")?;
    if stride.len() != 0 && stride.len() != input_fact.rank() - 2 {
        bail!(
            "Convolution stride only apply to spatial dimensions, so it should be of rank {}. Got {:?}",
            input_fact.rank() - 2,
            stride
        )
    }
    let padding: TVec<TVec<usize>> = invocation.named_arg_as(builder, "padding")?;
    let padding = if padding.len() == 0 {
        PaddingSpec::SameUpper
    } else {
        let mut before = tvec!();
        let mut after = tvec!();
        for p in padding {
            before.push(p[0]);
            after.push(p[1]);
        }
        PaddingSpec::Explicit(before, after)
    };
    let pool_spec = PoolSpec::new(
        DataFormat::NCHW,
        kernel_shape[2..].into(),
        padding,
        if dilation.len() > 0 { Some(dilation) } else { None },
        if stride.len() > 0 { Some(stride) } else { None },
        kernel_shape[1] * group,
        kernel_shape[0],
    );

    let border: String = invocation.named_arg_as(builder, "border")?;
    assert_eq!(border, "constant");

    Ok((group, pool_spec))
}

#[derive(PartialEq)]
pub enum ConvLikeVariant {
    Conv,
    Deconv,
    Debox,
}

pub fn conv_like(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
    variant: ConvLikeVariant,
) -> TractResult<Value> {
    let mut input: OutletId = invocation.named_arg_as(builder, "input")?;
    if variant == ConvLikeVariant::Debox {
        let i0 = builder.wire_as_outlets(ops::change_axes::AxisOp::Add(0), &[input])?[0];
        let i1 = builder.wire_as_outlets(ops::change_axes::AxisOp::Add(0), &[i0])?[0];
        input = i1;
    }

    let kernel: OutletId = if variant == ConvLikeVariant::Debox {
        let mut size: TVec<usize> = invocation.named_arg_as(builder, "size")?;
        size.insert(0, 1);
        size.insert(0, 1);
        let filter_ndarray = Array::<f32, _>::ones(size.to_vec());
        let input_dt = builder.model.outlet_fact(input)?.datum_type;
        let filter = filter_ndarray.into_arc_tensor().cast_to_dt(input_dt)?.into_owned();
        builder.model.add_const(format!("{}.filter", invocation.invocation.id.0), filter)?
    } else {
        invocation.named_arg_as(builder, "filter")?
    };
    let mut bias: OutletId = if variant == ConvLikeVariant::Debox {
        builder.model.add_const(
            format!("{}.bias", invocation.invocation.id.0),
            tensor0(0.0f32).into_arc_tensor(),
        )?
    } else {
        invocation.named_arg_as(builder, "bias")?
    };
    let input_fact = builder.model.outlet_fact(input)?.clone();
    let kernel_fact = builder.model.outlet_fact(kernel)?.clone();

    let name = builder.generate_node_name();
    while let Some((axis, _)) = builder
        .model
        .outlet_fact(bias)?
        .shape
        .to_tvec()
        .iter()
        .enumerate()
        .rev()
        .find(|(_, dim)| dim.is_one())
    {
        bias =
            builder.model.wire_node(format!("{name}.bias_rm_{axis}"), AxisOp::Rm(axis), &[bias])?
                [0];
    }

    let bias_dt =
        if input_fact.datum_type.is_float() { input_fact.datum_type } else { i32::datum_type() };
    bias = builder.model.wire_node(format!("{name}.cast_bias"), cast(bias_dt), &[bias])?[0];

    let mut inputs = tvec!(input, kernel, bias);
    let kernel_shape: ShapeFact = block_quant_aware_input_shape(&kernel_fact)?.iter().collect();
    let (group, pool_spec) = read_conv_parameters(
        builder,
        invocation,
        kernel_shape.as_concrete().context("Except fixed kernel shape")?,
        &input_fact,
    )?;

    let output_dt: Option<DatumType> = if input_fact.datum_type.is_float() {
        None
    } else if let Some(dt) = invocation.dt_from_quant_file.first().cloned().flatten() {
        Some(dt)
    } else {
        Some(DatumType::I32)
    };

    let op: Box<dyn TypedOp> =
        if ConvLikeVariant::Deconv == variant || ConvLikeVariant::Debox == variant {
            let output_shape = invocation.named_arg_as::<TVec<usize>>(builder, "output_shape")?;
            let output_shape = Some(output_shape).filter(|os| os.len() == pool_spec.rank());
            let adjustments = if let Some(output_shape) = output_shape {
                let input_shape = &input_fact
                    .shape
                    .as_concrete()
                    .context("symbolic dimension not supported in deconv")?[2..];
                adjustments(&pool_spec, input_shape, &output_shape)?
            } else {
                tvec!(0; pool_spec.rank())
            };
            Box::new(Deconv::new(pool_spec, KernelFormat::OIHW, adjustments, group))
        } else {
            if let Some(odt) = &output_dt {
                for dt in &[&input_fact.datum_type, &kernel_fact.datum_type, odt] {
                    let qp = dt.qparams().unwrap_or_default();
                    inputs.push(builder.add_const(tensor0(qp.zp_scale().0))?);
                    inputs.push(builder.add_const(tensor0(qp.zp_scale().1))?);
                }
            }
            Box::new(Conv::new(pool_spec, KernelFormat::OIHW, group, output_dt))
        };
    if variant == ConvLikeVariant::Debox {
        let outlets = builder.wire_as_outlets(op, &inputs)?;
        let outlets_unsqueeze =
            builder.wire_as_outlets(ops::change_axes::AxisOp::Rm(0), outlets.as_slice())?;
        return builder.wire(ops::change_axes::AxisOp::Rm(0), outlets_unsqueeze.as_slice());
    }
    builder.wire(op, &inputs)
}

fn pool_spec_for_pools(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
    kernel_shape: &[usize],
    channels: usize,
) -> TractResult<ops::cnn::PoolSpec> {
    let kernel_shape = DataFormat::NCHW.shape(kernel_shape)?;
    let spatial_shape = kernel_shape.hw_dims();
    let dilation: TVec<usize> = invocation.named_arg_as(builder, "dilation")?;
    if dilation.len() > 0
        && (dilation.len() != kernel_shape.rank() || dilation[0] != 1 || dilation[1] != 1)
    {
        bail!(
            "dilation should be like [1, 1, ... ] because first two first dimensions are N and C, so they should be of dilation 1. Got dilation {dilation:?}."
        );
    }
    let spatial_dilation = if dilation.iter().all(|it| *it == 1) || dilation.len() == 0 {
        None
    } else {
        Some(DataFormat::NCHW.shape(&dilation)?.hw_dims().into())
    };
    let stride: TVec<usize> = invocation.named_arg_as(builder, "stride")?;
    if stride.len() > 0 && (stride.len() != kernel_shape.rank() || stride[0] != 1 || stride[1] != 1)
    {
        bail!(
            "stride should be like [1, 1, ... ] because first two first dimensions are N and C, so they should be of stride 1. Got stride {stride:?}."
        );
    }
    let spatial_stride = if stride.len() == 0 || stride.iter().all(|it| *it == 1) {
        None
    } else {
        Some(DataFormat::NCHW.shape(&stride)?.hw_dims().into())
    };
    let padding: TVec<TVec<usize>> = invocation.named_arg_as(builder, "padding")?;
    if padding.len() > 0 && (padding.len() != padding.len()) {
        bail!(
            "padding should have the same rank as the input. Got padding {padding:?} but kernel_shape is {kernel_shape:?}."
        );
    }
    let padding = if padding.len() == 0 {
        PaddingSpec::SameUpper
    } else {
        let mut before = tvec!();
        let mut after = tvec!();
        for p in padding {
            before.push(p[0]);
            after.push(p[1]);
        }
        let spatial_pool_bef = DataFormat::NCHW.shape(&before)?.hw_dims().into();
        let spatial_pool_aft = DataFormat::NCHW.shape(&after)?.hw_dims().into();
        PaddingSpec::ExplicitOnnxPool(spatial_pool_bef, spatial_pool_aft, false)
    };
    Ok(PoolSpec::new(
        DataFormat::NCHW,
        spatial_shape.into(),
        padding,
        spatial_dilation,
        spatial_stride,
        channels,
        channels,
    ))
}

/*
 * fragment max_pool_with_index( input: tensor<scalar>, size: integer[], border: string = 'constant',
 *  padding: (integer,integer)[] = [], stride: integer[] = [], dilation: integer[] = [] )
 *   -> ( output: tensor<scalar>, index: tensor<integer> )
 */

pub fn max_pool_with_index(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let size: TVec<usize> = invocation.named_arg_as(builder, "size")?;
    let input_fact = builder.model.outlet_fact(input)?;
    if input_fact.rank() != size.len() {
        bail!(
            "Max pool input expected as NCHW, and \"size\" paramater must be [ 1, 1, x, y ]. Got {:?}, and {:?}",
            input_fact,
            size
        );
    }
    let channels = DataFormat::NCHW
        .shape(&input_fact.shape)?
        .c()
        .to_usize()
        .context("Expect constant channel depth")?;
    let border: String = invocation.named_arg_as(builder, "border")?;
    assert!(&*border == "ignore" || &*border == "constant");
    //FIXME : constant is not actually supported, but it should be the same in most cases
    let pool_spec = pool_spec_for_pools(builder, invocation, &size, channels)?;
    let op = ops::cnn::MaxPool { pool_spec, with_index_outputs: Some(i64::datum_type()) };
    builder.wire(op, &[input])
}

/*
 * fragment box( input: tensor<scalar>, size: integer[], border: string = 'constant', padding: (integer,integer)[] = [],
 *   stride: integer[] = [], dilation: integer[] = [], normalize: logical = false )
 * -> ( output: tensor<scalar> );
 */

pub fn sum_pool(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let size: TVec<usize> = invocation.named_arg_as(builder, "size")?;
    let input_fact = builder.model.outlet_fact(input)?;
    if input_fact.rank() != size.len() {
        bail!(
            "Sum pool input expected as NCHW, and \"size\" paramater must be [ 1, 1, x, y ]. Got {:?}, and {:?}",
            input_fact,
            size
        );
    }
    let channels = DataFormat::NCHW
        .shape(&input_fact.shape)?
        .c()
        .to_usize()
        .context("Expect constant channel depth")?;
    let border: String = invocation.named_arg_as(builder, "border")?;
    assert!(&*border == "ignore" || &*border == "constant");
    let pool_spec = pool_spec_for_pools(builder, invocation, &size, channels)?;
    let op = ops::cnn::SumPool {
        pool_spec,
        count_include_pad: false,
        normalize: invocation.named_arg_as(builder, "normalize")?,
    };
    builder.wire(op, &[input])
}

/*
 *   fragment sum_reduce( input: tensor<scalar>, axes: integer[], normalize: logical = false ) -> ( output: tensor<scalar> );
 *   fragment max_reduce( input: tensor<scalar>, axes: integer[] ) -> ( output: tensor<scalar> );
 *   and also min, argmax, armmin, any, all
 */
pub fn reduce(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;
    let reducer_name = invocation.invocation.id.0.split('_').next().unwrap();
    let reducer = match reducer_name {
        "all" => ops::nn::Reducer::All,
        "any" => ops::nn::Reducer::Any,
        "sum" => ops::nn::Reducer::Sum,
        "min" => ops::nn::Reducer::Min,
        "max" => ops::nn::Reducer::Max,
        "argmin" => ops::nn::Reducer::ArgMin(false),
        "argmax" => ops::nn::Reducer::ArgMax(false),
        _ => bail!("unsupported reducer: {}", invocation.invocation.id.0),
    };
    let wire = builder.wire_as_outlets(ops::nn::Reduce::new(axes.clone(), reducer), &[input])?;
    if reducer_name != "sum" || !invocation.named_arg_as(builder, "normalize")? {
        return Ok(wire.into());
    }

    let fact = builder.model.outlet_fact(wire[0])?.clone();
    let input_shape = &builder.model.outlet_fact(input)?.shape;
    let cardinality: TDim = axes.iter().map(|ax| &input_shape[*ax]).product();
    let cardinality = builder.wire_as_outlets(
        ops::konst::Const::new(
            tensor0(cardinality).broadcast_into_rank(fact.rank())?.into_arc_tensor(),
        )?,
        &[],
    )?;
    let cardinality =
        builder.wire_as_outlets(ops::cast::Cast::new(fact.datum_type), &cardinality)?;
    builder.wire(ops::math::div(), &[wire[0], cardinality[0]])
}

/*
 * fragment matmul( A: tensor<scalar>, B: tensor<scalar>, transposeA: logical = false, transposeB: logical = false ) -> ( C: tensor<scalar> );
 */
pub fn matmul(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let a: OutletId = invocation.named_arg_as(builder, "A")?;
    let b: OutletId = invocation.named_arg_as(builder, "B")?;
    let a_trans: bool = invocation.named_arg_as(builder, "transposeA")?;
    let b_trans: bool = invocation.named_arg_as(builder, "transposeB")?;
    let a_dt = builder.model.outlet_fact(a)?.datum_type;
    let b_dt = builder.model.outlet_fact(b)?.datum_type;
    let name = &*invocation.invocation.id.0;
    let a_rank = builder.model.outlet_fact(a)?.rank();
    let b_rank = builder.model.outlet_fact(b)?.rank();
    if builder.model.outlet_fact(a)?.is_exotic() {
        // Block-quant tensor may have a leading group dim ([1, M, K]) while
        // the other operand is 2D. Unsqueeze the non-exotic operand to match.
        let mut b = b;
        if a_rank > b_rank {
            for _ in 0..(a_rank - b_rank) {
                b = builder.wire_as_outlets(ops::change_axes::AxisOp::Add(0), &[b])?[0];
            }
        }
        let axes = AxesMapping::for_numpy_matmul(a_rank, false, b_trans, false)?;
        return builder
            .wire(ops::einsum::EinSum { axes, operating_dt: b_dt, q_params: None }, &[a, b]);
    }
    if builder.model.outlet_fact(b)?.is_exotic() {
        let mut a = a;
        if b_rank > a_rank {
            for _ in 0..(b_rank - a_rank) {
                a = builder.wire_as_outlets(ops::change_axes::AxisOp::Add(0), &[a])?[0];
            }
        }
        let axes = AxesMapping::for_numpy_matmul(b_rank, false, !a_trans, true)?;
        return builder
            .wire(ops::einsum::EinSum { axes, operating_dt: a_dt, q_params: None }, &[b, a]);
    }
    ensure!(a_rank == b_rank);
    let c_rank = a_rank.max(b_rank);
    let mut axes = AxesMapping::for_numpy_matmul(c_rank, a_trans, b_trans, false)?;
    if a_dt.is_quantized() || b_dt.is_quantized() {
        for input in 0..7 {
            axes = axes.with_extra_input(2 + input)?;
        }
        let accum_dt = DatumType::QI32(QParams::ZpScale {
            scale: a_dt.zp_scale().1 * b_dt.zp_scale().1,
            zero_point: 0,
        });
        let c_dt = invocation.dt_from_quant_file.first().cloned().flatten().unwrap_or(accum_dt);

        let a_qp = a_dt.qparams().unwrap_or_default().zp_scale();
        let b_qp = b_dt.qparams().unwrap_or_default().zp_scale();
        let c_qp = c_dt.qparams().unwrap_or_default().zp_scale();
        let bias =
            builder.model.add_const(format!("{name}.bias"), Tensor::zero_scalar_dt(accum_dt)?)?;
        let a0 = builder.model.add_const(format!("{name}.a0"), rctensor0(a_qp.0))?;
        let a_scale = builder.model.add_const(format!("{name}.a_scale"), rctensor0(a_qp.1))?;
        let b0 = builder.model.add_const(format!("{name}.b0"), rctensor0(b_qp.0))?;
        let b_scale = builder.model.add_const(format!("{name}.b_scale"), rctensor0(b_qp.1))?;
        let c0 = builder.model.add_const(format!("{name}.c0"), rctensor0(c_qp.0))?;
        let c_scale = builder.model.add_const(format!("{name}.c_scale"), rctensor0(c_qp.1))?;

        builder.wire(
            ops::einsum::EinSum { axes, operating_dt: i32::datum_type(), q_params: Some(c_dt) },
            &[a, b, bias, a0, a_scale, b0, b_scale, c0, c_scale],
        )
    } else {
        builder.wire(ops::einsum::EinSum { axes, operating_dt: a_dt, q_params: None }, &[a, b])
    }
}

/*
fragment lt( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> )
fragment gt( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> )
fragment le( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> )
fragment ge( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> )
fragment eq( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> )
fragment ne( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> )
*/
// comp() removed — comparison ops now handled by register_binary

/*
* fragment select<?>(
condition: tensor<logical>,     # the condition for selecting the result
true_value: tensor<?>,          # the result when the condition is true
false_value: tensor<?> )        # the result when the condition is false
-> ( output: tensor<?> )
*/

pub fn select(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let cond = invocation.named_arg_as(builder, "condition")?;
    let true_value = invocation.named_arg_as(builder, "true_value")?;
    let false_value = invocation.named_arg_as(builder, "false_value")?;
    let inputs = crate::registry::multi_rank_broadcast(builder, &[cond, true_value, false_value])?;

    builder.wire(ops::logic::Iff {}, &inputs)
}

/*
 * fragment leaky_relu( x: tensor<scalar>, alpha: scalar )-> ( y: tensor<scalar> )
 */

pub fn leaky_relu(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let x = invocation.named_arg_as(builder, "x")?;
    let alpha = invocation.named_arg_as(builder, "alpha")?;
    builder.wire(ops::nn::leaky_relu(alpha), &[x])
}

/*
 * fragment stack<?>( values: tensor<?>[], axis: integer ) -> ( value: tensor<?> )
 *
 * Same as concat but on dedicated axis
 */

pub fn stack(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let axis: usize = invocation.named_arg_as(builder, "axis")?;
    let mut values: TVec<OutletId> = invocation.named_arg_as(builder, "values")?;
    if let Some(Some(dt)) = invocation.dt_from_quant_file.first() {
        for value in &mut values {
            if builder.model.node(value.node).outputs[value.slot].fact.datum_type != *dt {
                *value = builder.wire_as_outlets(ops::cast::cast(*dt), &[*value])?[0];
            }
        }
    }

    for value in &mut values {
        // add unsqueeze
        *value = builder.wire_as_outlets(ops::change_axes::AxisOp::Add(axis), &[*value])?[0];
    }

    builder.wire(ops::array::TypedConcat::new(axis), &values)
}

/*
 * fragment unstack<?>( value: tensor<?>, axis: integer ) -> ( values: tensor<?>[] )
 *
 * Inverse of stack operator
 */
pub fn unstack(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = tvec!(invocation.named_arg_as(builder, "value")?);
    let axis: usize = invocation.named_arg_as(builder, "axis")?;

    let input_fact = builder.model.outlet_fact(wire[0])?.clone();

    (0..input_fact.shape[axis].clone().to_i32()?)
        .map(|start_int| {
            let start = start_int.to_dim();
            let end = (start_int + 1).to_dim();
            let sliced_wire = builder
                .wire_as_outlets(tract_core::ops::array::Slice { axis, start, end }, &wire)?;
            let squeezed_wire =
                builder.wire_as_outlets(ops::change_axes::AxisOp::Rm(axis), &sliced_wire)?;
            Ok(squeezed_wire[0])
        })
        .collect::<TractResult<TVec<_>>>()
        .map(Value::from)
}

/*
 * fragment softmax( x: tensor<scalar>, axes: integer[] = [1] ) -> ( y: tensor<scalar> )
 * {
 *    m = max_reduce(x, axes = axes);
 *    e = exp(x - m);
 *    y = e / sum_reduce(e, axes = axes);
 * }
 */

pub fn softmax(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let x = invocation.named_arg_as(builder, "x")?;
    let axes: TVec<usize> = invocation.named_arg_as(builder, "axes")?;

    let input_fact = builder.model.outlet_fact(x)?.clone();
    let quant_output_dt = if input_fact.datum_type.is_float() {
        None
    } else {
        invocation.dt_from_quant_file.first().cloned().flatten()
    };

    builder.wire(Softmax { axes, quant_output_dt, kind: SoftmaxKind::default() }, &[x])
}


================================================
FILE: nnef/src/ops/nnef/mod.rs
================================================
use tract_core::ops;

use crate::internal::*;
use crate::ops::tract_core;

pub mod deser;
pub mod ser;

pub fn tract_nnef() -> Registry {
    let mut registry = Registry::new("tract_nnef");
    let mut stdlib = crate::framework::stdlib();

    let mut primitive = |registry: &mut Registry, id: &str, func: ToTract| {
        let pos = stdlib.iter().position(|f| f.decl.id.0 == id).unwrap();
        let decl = stdlib.remove(pos).decl;
        registry.register_primitive(id, &decl.parameters, &decl.results, func);
    };

    registry.register_dumper(pin_const);

    primitive(&mut registry, "external", deser::external);
    registry.register_dumper(ser::source);
    primitive(&mut registry, "variable", deser::variable);
    registry.register_dumper(ser::konst);

    primitive(&mut registry, "reshape", deser::reshape);
    primitive(&mut registry, "transpose", deser::transpose);

    primitive(&mut registry, "concat", deser::concat);
    registry.register_dumper(ser::concat);
    primitive(&mut registry, "slice", deser::slice);
    registry.register_dumper(ser::slice);

    primitive(&mut registry, "squeeze", deser::squeeze);
    primitive(&mut registry, "unsqueeze", deser::unsqueeze);
    registry.register_dumper(ser::axis_op);

    primitive(&mut registry, "tile", deser::tile);
    registry.register_dumper(ser::tile);
    registry.register_dumper(ser::dyn_tile);

    primitive(&mut registry, "pad", deser::pad);
    registry.register_dumper(ser::pad);

    primitive(&mut registry, "stack", deser::stack);
    primitive(&mut registry, "unstack", deser::unstack);

    registry.register_binary("add", &ops::math::Add {});
    registry.register_binary("sub", &ops::math::Sub {});
    registry.register_binary("mul", &ops::math::Mul {});
    registry.register_binary("div", &ops::math::Div {});
    registry.register_binary("pow", &ops::math::Pow {});

    registry.register_unit_element_wise("exp", &ops::math::Exp {});
    registry.register_unit_element_wise("log", &ops::math::Ln {});
    registry.register_unit_element_wise("sin", &ops::math::Sin {});
    registry.register_unit_element_wise("cos", &ops::math::Cos {});
    registry.register_unit_element_wise("abs", &ops::math::Abs {});
    registry.register_unit_element_wise("neg", &ops::math::Neg {});
    registry.register_unit_element_wise("sign", &ops::math::Sign {});
    registry.register_unit_element_wise("recip", &ops::math::Recip {});

    registry.register_unit_element_wise("tan", &ops::math::Tan {});
    registry.register_unit_element_wise("acos", &ops::math::Acos {});
    registry.register_unit_element_wise("asin", &ops::math::Asin {});
    registry.register_unit_element_wise("atan", &ops::math::Atan {});
    registry.register_unit_element_wise("cosh", &ops::math::Cosh {});
    registry.register_unit_element_wise("sinh", &ops::math::Sinh {});
    registry.register_unit_element_wise("acosh", &ops::math::Acosh {});
    registry.register_unit_element_wise("asinh", &ops::math::Asinh {});
    registry.register_unit_element_wise("atanh", &ops::math::Atanh {});

    registry.register_unit_element_wise("floor", &ops::math::Floor {});
    registry.register_unit_element_wise("ceil", &ops::math::Ceil {});
    registry.register_unit_element_wise("round", &ops::math::Round {});

    registry.register_unit_element_wise("square", &ops::math::Square {});
    registry.register_unit_element_wise("sqrt", &ops::math::Sqrt {});
    registry.register_unit_element_wise("rsqrt", &ops::math::Rsqrt {});

    registry.register_unit_element_wise("tanh", &ops::math::Tanh {});
    registry.register_unit_element_wise("sigmoid", &ops::nn::Sigmoid {});

    registry.register_unit_element_wise("not", &ops::logic::Not {});

    registry.register_unit_element_wise("neg", &ops::math::Neg {});

    registry.register_element_wise(
        "leaky_relu",
        TypeId::of::<ops::nn::LeakyRelu>(),
        Box::new(ser::leaky_relu),
        vec![TypeName::Scalar.tensor().named("x"), TypeName::Scalar.named("alpha")],
        deser::leaky_relu,
    );

    registry.register_binary("eq", &ops::logic::CompEq);
    registry.register_binary("ne", &ops::logic::CompNE);
    registry.register_binary("lt", &ops::logic::CompLT);
    registry.register_binary("gt", &ops::logic::CompGT);
    registry.register_binary("le", &ops::logic::CompLTE);
    registry.register_binary("ge", &ops::logic::CompGTE);

    registry.register_binary("and", &ops::logic::And {});
    registry.register_binary("or", &ops::logic::Or {});

    registry.register_binary("select", &ops::logic::Or {});
    registry.register_dumper(ser::select);
    primitive(&mut registry, "select", deser::select);

    registry.register_binary("min", &ops::math::Min {});
    registry.register_binary("max", &ops::math::Max {});

    primitive(&mut registry, "matmul", deser::matmul);
    //    registry.register_dumper(ser::matmul);

    primitive(&mut registry, "conv", deser::conv);
    registry.register_dumper(ser::conv);
    primitive(&mut registry, "deconv", deser::deconv);
    registry.register_dumper(ser::deconv);

    primitive(&mut registry, "sum_reduce", deser::reduce);
    primitive(&mut registry, "max_reduce", deser::reduce);
    primitive(&mut registry, "min_reduce", deser::reduce);
    primitive(&mut registry, "all_reduce", deser::reduce);
    primitive(&mut registry, "any_reduce", deser::reduce);
    primitive(&mut registry, "argmax_reduce", deser::reduce);
    primitive(&mut registry, "argmin_reduce", deser::reduce);
    registry.register_dumper(ser::reduce);

    primitive(&mut registry, "softmax", deser::softmax);
    registry.register_dumper(ser::softmax);

    primitive(&mut registry, "max_pool_with_index", deser::max_pool_with_index);
    registry.register_dumper(ser::max_pool);
    primitive(&mut registry, "box", deser::sum_pool);
    primitive(&mut registry, "debox", deser::debox);
    registry.register_dumper(ser::sum_pool);

    registry.register_dumper(ser::basic_matmul);

    for frag in stdlib {
        if frag.body.is_some() {
            registry.register_fragment(frag);
        }
    }
    registry
}

pub fn pin_const(
    ast: &mut IntoAst,
    node: &TypedNode,
    _op: &ops::identity::PinConst,
) -> TractResult<Option<Arc<RValue>>> {
    Ok(Some(ast.mapping[&node.inputs[0]].clone()))
}


================================================
FILE: nnef/src/ops/nnef/ser.rs
================================================
use crate::ast::Identifier;
use crate::ast::QuantFormat;
use crate::internal::*;
use crate::ser::*;
use tract_core::num_traits::Zero;
use tract_core::ops;
use tract_core::ops::cast::cast;
use tract_core::ops::cnn::Conv;
use tract_core::ops::cnn::PoolSpec;
use tract_core::ops::einsum::block_quant_aware_input_shape;
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_core::ops::konst::Const;
use tract_core::ops::nn::DataFormat;
use tract_core::ops::nn::SoftmaxKind;
use tract_core::tract_data::itertools::Itertools;

pub fn source(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::source::TypedSource,
) -> TractResult<Option<Arc<RValue>>> {
    if let Some(shape) = op.fact.shape.as_concrete() {
        if op.fact.datum_type == DatumType::F32 {
            return Ok(Some(invocation("external", &[], &[("shape", ints(shape))])));
        } else if op.fact.datum_type.is_quantized() {
            if let Some(qp) = QuantFormat::from_dt(node.outputs[0].fact.datum_type) {
                ast.quantization.insert(Identifier(node.name.to_string()), qp);
            }
            return Ok(Some(invocation("external", &[], &[("shape", ints(shape))])));
        }
    };
    Ok(None)
}

pub fn basic_matmul(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &PrefixMatMul,
) -> TractResult<Option<Arc<RValue>>> {
    let inputs = node.inputs.iter().map(|i| (*ast.mapping[i]).clone()).collect_vec();
    if op.transpose_c {
        Ok(Some(invocation(
            "matmul",
            &[Arc::new(inputs[1].clone()), Arc::new(inputs[0].clone())],
            &[("transposeA", logical(!op.transpose_b)), ("transposeB", logical(!op.transpose_a))],
        )))
    } else {
        Ok(Some(invocation(
            "matmul",
            &[Arc::new(inputs[0].clone()), Arc::new(inputs[1].clone())],
            &[("transposeA", logical(op.transpose_a)), ("transposeB", logical(op.transpose_b))],
        )))
    }
}

pub fn konst(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::konst::Const,
) -> TractResult<Option<Arc<RValue>>> {
    Ok(Some(ast.konst(&node.name, op.val())?))
}

// comp() removed — comparison ops now serialized via register_binary

pub fn concat(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::array::TypedConcat,
) -> TractResult<Option<Arc<RValue>>> {
    let wires = node
        .inputs
        .iter()
        .map(|i| Ok(ast.mapping[i].as_ref().clone()))
        .collect::<TractResult<TVec<RValue>>>()?;
    Ok(Some(invocation("concat", &[array(&wires).into()], &[("axis", numeric(op.axis))])))
}

pub fn slice(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::array::Slice,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    // end = 0 means "to the end" in early nnef specs.
    // the case begin = 0, end = 0: tract says "empty tensor", but nnef says "noop"
    // so serialize as begin = 0, end = -dim
    let end = if op.end.is_zero() && op.start == op.end {
        -ast.model.node_input_facts(node.id)?[0].shape[op.axis].clone()
    } else {
        op.end.clone()
    };
    Ok(Some(invocation(
        "slice",
        &[wire],
        &[
            ("axes", ints(&[op.axis])),
            ("begin", tdims(std::slice::from_ref(&op.start))),
            ("end", tdims(&[end])),
        ],
    )))
}

pub fn tile(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::array::Tile,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation("tile", &[wire], &[("repeats", tdims(&op.multipliers))])))
}

pub fn dyn_tile(
    ast: &mut IntoAst,
    node: &TypedNode,
    _: &ops::array::DynTile,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let multiplier = ast.mapping[&node.inputs[1]].clone();
    Ok(Some(invocation("tile", &[wire], &[("repeats", (*multiplier).clone())])))
}

pub fn pad_mode(mode: &ops::array::PadMode, dt: DatumType) -> TractResult<(&str, Option<RValue>)> {
    use ops::array::PadMode;
    Ok(match &mode {
        PadMode::Constant(c) => (
            "constant",
            Some(if dt.is_float() {
                numeric(c.cast_to_scalar::<f32>()?)
            } else {
                numeric(c.cast_to_scalar::<i64>()?)
            }),
        ),
        PadMode::Reflect => ("reflect", None),
        PadMode::Edge => ("replicated", None),
    })
}

pub fn pad(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::array::Pad,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let dt = ast.model.outlet_fact(node.inputs[0])?.datum_type;
    let padding = array(op.pads.iter().map(|pair| ints(&[pair.0, pair.1])).collect::<TVec<_>>());
    let mut params = tvec!(("padding", padding));
    let (border, value) = pad_mode(&op.mode, dt)?;
    params.push(("border", string(border)));
    if let Some(value) = value {
        params.push(("value", value));
    }
    Ok(Some(invocation("pad", &[wire], &params)))
}

fn data_into_ncwh(data_format: DataFormat, geo_rank: usize, mut wire: Arc<RValue>) -> Arc<RValue> {
    use tract_core::ops::nn::DataFormat::*;
    if !data_format.has_n() {
        wire = invocation("unsqueeze", &[wire], &[("axes", ints(&[0]))]);
    }
    if data_format == NHWC || data_format == HWC {
        let mut perm: TVec<usize> = (0..geo_rank + 2).collect();
        perm[1..].rotate_right(1);
        wire = invocation("transpose", &[wire], &[("axes", ints(&perm))])
    }
    wire
}

fn data_from_ncwh(data_format: DataFormat, geo_rank: usize, mut wire: Arc<RValue>) -> Arc<RValue> {
    use tract_core::ops::nn::DataFormat::*;
    if data_format == NHWC || data_format == HWC {
        let mut perm: TVec<usize> = (0..geo_rank + 2).collect();
        perm[1..].rotate_left(1);
        wire = invocation("transpose", &[wire], &[("axes", ints(&perm))])
    }
    if !data_format.has_n() {
        wire = invocation("squeeze", &[wire], &[("axes", ints(&[0]))]);
    }
    wire
}

pub fn make_conv_named_args<'a>(
    node: &'a TypedNode,
    pool_spec: &'a PoolSpec,
    group: usize,
    deconv: bool,
    adjustments: Option<&[usize]>,
) -> TractResult<TVec<(&'a str, RValue)>> {
    use tract_core::ops::cnn::PaddingSpec;
    let output_shape = pool_spec.data_format.shape(node.outputs[0].fact.shape.to_tvec())?;
    let padding = match &pool_spec.padding {
        PaddingSpec::ExplicitOnnxPool(bef, after, _) | PaddingSpec::Explicit(bef, after) => array(
            bef.iter()
                .zip(after.iter())
                .map(|(a, b)| tuple_2(numeric(a), numeric(b)))
                .collect::<Vec<_>>(),
        ),
        PaddingSpec::SameUpper => array(&[]),
        PaddingSpec::SameLower => bail!("Unsupported padding scheme"),
        PaddingSpec::Valid => array(
            (0..pool_spec.rank()).map(|_| tuple_2(numeric(0), numeric(0))).collect::<Vec<_>>(),
        ),
    };
    let mut named_args = tvec![
        ("dilation", ints(&pool_spec.dilations())),
        ("stride", ints(&pool_spec.strides())),
        ("border", string("constant")),
        ("groups", numeric(group)),
        ("padding", padding),
    ];
    if deconv && adjustments.unwrap().iter().any(|a| *a != 0) {
        let output_shape = output_shape
            .hw_dims()
            .iter()
            .map(|d| d.to_usize())
            .collect::<TractResult<TVec<_>>>()?;
        named_args.push(("output_shape", ints(&output_shape)));
    };
    Ok(named_args)
}

#[allow(clippy::too_many_arguments)]
pub fn conv_like(
    ast: &mut IntoAst,
    node: &TypedNode,
    pool_spec: &PoolSpec,
    group: usize,
    deconv: bool,
    adjustments: Option<&[usize]>,
) -> TractResult<Option<Arc<RValue>>> {
    let mut wire = ast.mapping[&node.inputs[0]].clone();
    let kernel = ast.mapping[&node.inputs[1]].clone();
    let bias = ast.mapping[&node.inputs[2]].clone();
    let data_format = pool_spec.data_format;
    ensure!(data_format.has_n());
    if data_format.c_is_last() {
        let mut perm: TVec<usize> = (0..pool_spec.rank() + 1).collect();
        perm.insert(1, pool_spec.rank() + 1);
        wire = invocation("transpose", &[wire], &[("axes", ints(&perm))]);
    }
    wire = ast.force_variable(format!("{}_input", node.name), &wire);

    let inputs = tvec![wire, kernel, bias];
    let named_args = make_conv_named_args(node, pool_spec, group, deconv, adjustments)?;

    let name = if deconv { "deconv" } else { "conv" };
    wire = invocation(name, &inputs, &named_args);
    // need to force quantization storage as output code may miss it
    let var_name = Identifier(format!("{}_{}", node.name, name));
    if let Some(qp) = QuantFormat::from_dt(node.outputs[0].fact.datum_type) {
        ast.quantization.insert(var_name.clone(), qp);
    }
    wire = ast.force_variable(var_name, &wire);

    if data_format.c_is_last() {
        let mut perm: TVec<usize> = (0..pool_spec.rank() + 2).collect();
        perm.remove(1);
        perm.push(1);
        wire = invocation("transpose", &[wire], &[("axes", ints(&perm))]);
    }
    Ok(Some(wire))
}

pub fn conv(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::cnn::conv::Conv,
) -> TractResult<Option<Arc<RValue>>> {
    conv_like(ast, node, &op.pool_spec, op.group, false, None)
}

pub fn deconv(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::cnn::deconv::Deconv,
) -> TractResult<Option<Arc<RValue>>> {
    conv_like(ast, node, &op.pool_spec, op.group, true, Some(&op.adjustments))
}

fn cnn_pool_fragment(
    ast: &mut IntoAst,
    data_format: DataFormat,
    geo_rank: usize,
    op_name: &str,
) -> Identifier {
    if data_format == DataFormat::NCHW {
        return op_name.into();
    }
    let fragment_name =
        Identifier(format!("tract_{op_name}_{data_format:?}_{geo_rank}D").to_lowercase());
    if ast.fragments.contains_key(&fragment_name) {
        return fragment_name;
    }

    let mut body = vec![];
    let mut fragment =
        ast.framework.stdlib.iter().find(|f| f.decl.id.0 == op_name).unwrap().clone();
    fragment.decl.id = fragment_name.clone();

    let mut wire = ident("input").into();
    wire = data_into_ncwh(data_format, geo_rank, wire);

    body.push(assignment("nchw", wire));
    wire = invocation(
        op_name,
        &[ident("nchw").into()],
        &fragment
            .decl
            .parameters
            .iter()
            .skip(1)
            .map(|f| (&*f.id.0, ident(&f.id)))
            .collect::<Vec<_>>(),
    );
    body.push(assignment(op_name, wire));

    wire = data_from_ncwh(data_format, geo_rank, ident(op_name).into());

    body.push(assignment("output", wire));
    fragment.body = Some(body);
    ast.fragments.insert(fragment_name.clone(), fragment);
    fragment_name
}

fn cnn_pool(
    ast: &mut IntoAst,
    node: &TypedNode,
    op_name: &str,
    pool_spec: &tract_core::ops::cnn::PoolSpec,
    normalize_arg: Option<(&'static str, RValue)>,
) -> TractResult<Option<Arc<RValue>>> {
    use tract_core::ops::cnn::PaddingSpec;
    let mut wire = ast.mapping[&node.inputs[0]].clone();
    wire = ast.force_variable(format!("{}_input", node.name), &wire);
    let conv_fragment = cnn_pool_fragment(ast, pool_spec.data_format, pool_spec.rank(), op_name);
    let padding = match &pool_spec.padding {
        PaddingSpec::ExplicitOnnxPool(bef, after, _) | PaddingSpec::Explicit(bef, after) => Some(
            bef.iter()
                .zip(after.iter())
                .map(|(a, b)| tuple_2(numeric(a), numeric(b)))
                .collect::<Vec<_>>(),
        ),
        PaddingSpec::SameUpper => None,
        PaddingSpec::SameLower => bail!("Unsupported padding scheme"),
        PaddingSpec::Valid => {
            Some((0..pool_spec.rank()).map(|_| tuple_2(numeric(0), numeric(0))).collect::<Vec<_>>())
        }
    };
    let mut size = tvec!(1, 1);
    size.extend(pool_spec.kernel_shape.iter().cloned());
    let mut strides = tvec!(1, 1);
    strides.extend(pool_spec.strides().iter().cloned());
    let mut dilations = tvec!(1, 1);
    dilations.extend(pool_spec.dilations().iter().cloned());
    let padding = if let Some(pad) = padding {
        let mut full_padding =
            vec![tuple_2(numeric(0), numeric(0)), tuple_2(numeric(0), numeric(0))];
        full_padding.extend(pad.iter().cloned());
        array(full_padding)
    } else {
        array(&[])
    };
    let mut params = tvec!(
        ("size", ints(&size)),
        ("dilation", ints(&dilations)),
        ("stride", ints(&strides)),
        ("border", string("ignore")),
        ("padding", padding),
    );
    if let Some(normalize_arg) = normalize_arg {
        params.push(normalize_arg);
    };
    wire = invocation(conv_fragment, &[wire], &params);
    wire = ast.force_variable(&node.name, &wire);
    Ok(Some(wire))
}

pub fn max_pool(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::cnn::MaxPool,
) -> TractResult<Option<Arc<RValue>>> {
    cnn_pool(ast, node, "max_pool", &op.pool_spec, None)
}

pub fn sum_pool(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::cnn::SumPool,
) -> TractResult<Option<Arc<RValue>>> {
    cnn_pool(ast, node, "box", &op.pool_spec, Some(("normalize", logical(op.normalize))))
}

pub fn ser_axis_op(op: &ops::change_axes::AxisOp, wire: Arc<RValue>, rank: usize) -> Arc<RValue> {
    match op {
        AxisOp::Rm(axis) => invocation("squeeze", &[wire], &[("axes", ints(&[*axis]))]),
        AxisOp::Add(axis) => invocation("unsqueeze", &[wire], &[("axes", ints(&[*axis]))]),
        AxisOp::Move(from, to) => {
            let mut perm: TVec<usize> = (0..rank).collect();
            if from < to {
                perm[*from..(to + 1)].rotate_left(1);
            } else {
                perm[*to..(from + 1)].rotate_right(1);
            }
            invocation("transpose", &[wire], &[("axes", ints(&perm))])
        }
        AxisOp::Reshape(start, from, to) => invocation(
            "reshape",
            &[wire],
            &[
                ("shape", tdims(to)),
                ("axis_start", numeric(start)),
                ("axis_count", numeric(from.len())),
            ],
        ),
    }
}

pub fn axis_op(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::change_axes::AxisOp,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let rank = node.outputs[0].fact.rank();
    Ok(Some(ser_axis_op(op, wire, rank)))
}

pub fn reduce(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::nn::Reduce,
) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let oper = match op.reducer {
        ops::nn::Reducer::ArgMax(last) if !last => "argmax_reduce",
        ops::nn::Reducer::ArgMin(last) if !last => "argmin_reduce",
        ops::nn::Reducer::Sum => "sum_reduce",
        ops::nn::Reducer::Max => "max_reduce",
        ops::nn::Reducer::Min => "min_reduce",
        _ => return Ok(None),
    };
    Ok(Some(invocation(oper, &[wire], &[("axes", ints(&op.axes))])))
}

pub fn select(
    ast: &mut IntoAst,
    node: &TypedNode,
    _op: &ops::logic::Iff,
) -> TractResult<Option<Arc<RValue>>> {
    Ok(Some(invocation(
        "select",
        &node.inputs.iter().map(|o| ast.mapping[o].clone()).collect::<TVec<_>>(),
        &[],
    )))
}

pub fn leaky_relu(ast: &mut IntoAst, node: &TypedNode) -> TractResult<Option<Arc<RValue>>> {
    let op = node.op_as::<ops::element_wise::ElementWiseOp>().context("Wrong op")?;
    let op = op.0.downcast_ref::<ops::nn::LeakyRelu>().context("Wrong op")?;
    Ok(Some(invocation(
        "leaky_relu",
        &node.inputs.iter().map(|o| ast.mapping[o].clone()).collect::<TVec<_>>(),
        &[("alpha", RValue::Literal(op.alpha.into()))],
    )))
}

pub fn softmax(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ops::nn::Softmax,
) -> TractResult<Option<Arc<RValue>>> {
    if op.kind != SoftmaxKind::default() {
        return Ok(None);
    }
    let litteral_axes: Vec<_> = op.axes.iter().map(|&it| (it as i64).into()).collect();
    Ok(Some(invocation(
        "softmax",
        &[ast.mapping[&node.inputs[0]].clone()],
        &[("axes", RValue::Literal(crate::ast::Literal::Array(litteral_axes)))],
    )))
}

pub fn rewrite_block_quant_const_to_scalar(
    _ctx: &(),
    _model: &TypedModel,
    _node: &TypedNode,
    _prefix: &str,
    _op: &Const,
) -> TractResult<Option<TypedModelPatch>> {
    // Block-quant tensors now carry their logical shape [G, M, K] directly,
    // so no scalar-to-shaped rewriting is needed.
    Ok(None)
}

pub fn rewrite_matmul_to_same_rank(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    prefix: &str,
    op: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    let a_rank = block_quant_aware_input_shape(model.outlet_fact(node.inputs[0])?)?.len();
    let b_rank = block_quant_aware_input_shape(model.outlet_fact(node.inputs[1])?)?.len();
    if a_rank == b_rank {
        return Ok(None);
    }
    let mut patch = TypedModelPatch::default();
    let mut inputs = patch.taps(model, &node.inputs)?;
    for i in a_rank..a_rank.max(b_rank) {
        inputs[0] =
            patch.wire_node(format!("{prefix}.extra_a_axis.{i}"), AxisOp::Add(0), &[inputs[0]])?[0];
    }
    for i in b_rank..a_rank.max(b_rank) {
        inputs[1] =
            patch.wire_node(format!("{prefix}.extra_b_axis.{i}"), AxisOp::Add(0), &[inputs[1]])?[1];
    }
    let result = patch.wire_node(prefix, *op, &inputs)?[0];
    patch.shunt_outside(model, node.id.into(), result)?;
    Ok(Some(patch))
}

pub fn rewrite_consistent_quantized_conv(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    op: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    let facts = model.node_input_facts(node.id)?;
    if facts.len() > 3 {
        ensure!(facts[3..9].iter().all(|fact| fact.konst.is_some()));
        for ix in [0, 1] {
            let fact = model.outlet_fact(node.inputs[ix])?;
            if !fact.datum_type.is_quantized() {
                let mut patch = TypedModelPatch::default();
                let mut wire = patch.taps(model, &node.inputs)?;
                let dt = fact.datum_type.quantize(QParams::ZpScale {
                    zero_point: facts[3 + 2 * ix]
                        .konst
                        .as_ref()
                        .unwrap()
                        .cast_to_scalar::<i32>()?,
                    scale: facts[4 + 2 * ix].konst.as_ref().unwrap().cast_to_scalar::<f32>()?,
                });
                wire[ix] =
                    patch.wire_node(format!("{name}.cast_to_q_{ix}"), cast(dt), &[wire[ix]])?[0];
                let output = patch.wire_node(name, op.clone(), &wire)?[0];
                patch.shunt_outside(model, node.id.into(), output)?;
                return Ok(Some(patch));
            }
        }
    }
    Ok(None)
}


================================================
FILE: nnef/src/ops/resource.rs
================================================
use crate::internal::*;

pub fn register(registry: &mut Registry) {
    registry
        .register_primitive(
            "tract_resource_get",
            &[
                TypeName::String.named("label").doc("Resource label to access"),
                TypeName::String.named("key").doc("Key path in resource"),
            ],
            &[("output", TypeName::Any.tensor())],
            resource_get,
        )
        .with_doc("Access embedded resource by key");
}

fn resource_get(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let label: String = invocation.named_arg_as(builder, "label")?;
    let key: String = invocation.named_arg_as(builder, "key")?;
    let resource = builder
        .proto_model
        .resources
        .get(&label)
        .with_context(|| anyhow!("No resource found for label {:?} in the model", label))?;
    resource.get(&key)
}


================================================
FILE: nnef/src/registry.rs
================================================
use std::ops::ControlFlow;

use crate::ast::Identifier;
use crate::internal::*;

use crate::ast;
use crate::deser::Value;

use tract_core::dyn_clone::clone_box;
use tract_core::ops::binary::*;

pub type ToTract = fn(&mut ModelBuilder, &ResolvedInvocation) -> TractResult<Value>;
pub type FromTract =
    Box<dyn Fn(&mut IntoAst, &TypedNode) -> TractResult<Option<Arc<RValue>>> + Send + Sync>;
pub type FromTractWithOp<O> =
    fn(&mut IntoAst, node: &TypedNode, op: &O) -> TractResult<Option<Arc<RValue>>>;
pub type BinOp = (Identifier, Box<dyn BinMiniOp>);
pub type Extension = Box<
    dyn Fn(&mut crate::deser::ModelBuilder, &Identifier, &str) -> TractResult<ControlFlow<(), ()>>
        + Send
        + Sync,
>;

#[derive(Clone)]
pub struct PrimitiveDecl {
    pub decl: FragmentDecl,
    pub docstrings: Option<Vec<String>>,
    pub to_tract: ToTract,
}

impl PrimitiveDecl {
    pub fn validate(&self) -> TractResult<()> {
        self.decl.validate().with_context(|| format!("Invalid primitive `{}'", self.decl.id.0))
    }

    pub fn with_doc(&mut self, docstring: impl Into<String>) -> &mut Self {
        self.docstrings.get_or_insert_with(Vec::new).push(docstring.into());
        self
    }
}

pub struct Registry {
    pub id: Identifier,
    pub docstrings: Option<Vec<String>>,
    pub aliases: Vec<Identifier>,
    pub fragments: HashMap<Identifier, FragmentDef>,
    pub primitives: HashMap<Identifier, PrimitiveDecl>,
    pub unit_element_wise_ops: Vec<(Identifier, Box<dyn ElementWiseMiniOp>)>,
    pub element_wise_ops: Vec<(Identifier, TypeId, FromTract, Vec<ast::Parameter>, ToTract)>,
    pub binary_ops: Vec<BinOp>,
    pub from_tract: HashMap<TypeId, FromTract>,
    pub extensions: Vec<Extension>,
}

impl Registry {
    pub fn new(id: impl AsRef<str>) -> Registry {
        Registry {
            id: id.as_ref().into(),
            docstrings: None,
            aliases: Default::default(),
            primitives: Default::default(),
            fragments: Default::default(),
            from_tract: Default::default(),
            unit_element_wise_ops: Default::default(),
            element_wise_ops: Default::default(),
            binary_ops: Default::default(),
            extensions: Default::default(),
        }
    }

    pub fn with_doc(mut self, docstring: impl Into<String>) -> Registry {
        self.docstrings.get_or_insert_with(Vec::new).push(docstring.into());
        self
    }

    pub fn register_dumper<O: TypedOp>(&mut self, dumper: FromTractWithOp<O>) {
        self.from_tract.insert(
            std::any::TypeId::of::<O>(),
            Box::new(move |ast: &mut IntoAst, node: &TypedNode| {
                let op = node.op_as::<O>().unwrap();
                dumper(ast, node, op)
            }),
        );
    }

    pub fn register_primitive(
        &mut self,
        id: impl AsRef<str>,
        params: &[ast::Parameter],
        results: &[impl Into<ast::Result_> + Clone],
        func: ToTract,
    ) -> &mut PrimitiveDecl {
        let id: Identifier = id.as_ref().into();
        let decl = FragmentDecl {
            id: id.clone(),
            generic_decl: None,
            parameters: params.to_vec(),
            results: results.iter().cloned().map(|it| it.into()).collect(),
        };
        let primitive_decl = PrimitiveDecl { decl, docstrings: None, to_tract: func };
        self.primitives.insert(id.clone(), primitive_decl);
        self.primitives.get_mut(&id).expect("Unexpected empty entry in primitives hashmap")
    }

    pub fn register_fragment(&mut self, def: FragmentDef) {
        self.fragments.insert(def.decl.id.clone(), def);
    }

    pub fn register_unit_element_wise(&mut self, id: impl AsRef<str>, ew: &dyn ElementWiseMiniOp) {
        assert!(std::mem::size_of_val(ew) == 0);
        self.unit_element_wise_ops.push((id.as_ref().into(), clone_box(ew)));
    }

    pub fn register_element_wise(
        &mut self,
        id: impl AsRef<str>,
        type_id: TypeId,
        dumper: FromTract,
        parameters: Vec<ast::Parameter>,
        loader: ToTract,
    ) {
        self.element_wise_ops.push((id.as_ref().into(), type_id, dumper, parameters, loader));
    }

    pub fn register_binary(&mut self, id: impl AsRef<str>, op: &dyn BinMiniOp) {
        self.binary_ops.push((id.as_ref().into(), clone_box(op)));
    }

    pub fn serialize(
        &self,
        ast: &mut IntoAst,
        node: &TypedNode,
    ) -> TractResult<Option<Arc<RValue>>> {
        use tract_core::ops;
        if node.op_is::<ops::identity::Identity>() {
            return Ok(Some(ast.mapping[&node.inputs[0]].clone()));
        } else if let Some(op) = node.op().downcast_ref::<ops::element_wise::ElementWiseOp>() {
            if std::mem::size_of_val(op.0.as_ref()) == 0 {
                if let Some(op) = self
                    .unit_element_wise_ops
                    .iter()
                    .find(|ew| ew.1.as_ref().type_id() == op.0.type_id())
                {
                    let a = ast.mapping[&node.inputs[0]].clone();
                    return Ok(Some(invocation(&op.0, &[a], &[])));
                }
            } else if let Some(op) = self.element_wise_ops.iter().find(|ew| ew.1 == op.0.type_id())
                && let Some(result) = (op.2)(ast, node)?
            {
                return Ok(Some(result));
            }
        } else if let Some(op) = node.op().downcast_ref::<ops::binary::TypedBinOp>() {
            if let Some(op) =
                self.binary_ops.iter().find(|ew| ew.1.as_ref().type_id() == op.0.type_id())
            {
                let a = ast.mapping[&node.inputs[0]].clone();
                let b = ast.mapping[&node.inputs[1]].clone();
                return Ok(Some(invocation(&op.0, &[a, b], &[])));
            }
        } else if let Some(op) = self.from_tract.get(&node.op().type_id())
            && let Some(result) = op(ast, node)?
        {
            return Ok(Some(result));
        }
        Ok(None)
    }

    pub fn deserialize(
        &self,
        builder: &mut ModelBuilder,
        invocation: &ast::Invocation,
        dt: &[Option<DatumType>],
    ) -> TractResult<Option<Value>> {
        if let Some(op) = self.primitives.get(&invocation.id) {
            let resolved = ResolvedInvocation {
                invocation,
                default_params: &op.decl.parameters,
                dt_from_quant_file: dt,
            };
            let out_value = (op.to_tract)(builder, &resolved)
                .with_context(|| format!("Deserializing op `{}'", invocation.id.0))?;
            return Ok(Some(out_value));
        }
        let c_dt: Option<DatumType> = dt.first().cloned().and_then(|dt| dt);
        if let Some(ew) = self.unit_element_wise_ops.iter().find(|ew| ew.0 == invocation.id) {
            let input =
                invocation.arguments[0].rvalue.resolve(builder, &[])?.to::<OutletId>(builder)?;
            let outlet = builder.wire_as_outlets(
                tract_core::ops::element_wise::ElementWiseOp(ew.1.clone(), c_dt),
                &[input],
            )?;
            if let Some(assumed_out_dt) = c_dt {
                let out_dt = builder.model.outlet_fact(outlet[0])?.datum_type;
                if out_dt != assumed_out_dt {
                    return Ok(Some(
                        builder.wire(tract_core::ops::cast::cast(assumed_out_dt), &outlet)?,
                    ));
                }
            }
            return Ok(Some(Value::Wire(outlet[0])));
        }
        if let Some(ew) = self.element_wise_ops.iter().find(|ew| ew.0 == invocation.id) {
            let resolved =
                ResolvedInvocation { invocation, default_params: &ew.3, dt_from_quant_file: dt };
            return Ok(Some(
                (ew.4)(builder, &resolved)
                    .with_context(|| format!("Deserializing op `{}'", invocation.id.0))?,
            ));
        }
        if let Some(bin) = self.binary_ops.iter().find(|bin| bin.0 == invocation.id) {
            let mut a =
                invocation.arguments[0].rvalue.resolve(builder, &[])?.to::<OutletId>(builder)?;
            let mut b =
                invocation.arguments[1].rvalue.resolve(builder, &[])?.to::<OutletId>(builder)?;
            let a_fact = builder.model.outlet_fact(a)?;
            let b_fact = builder.model.outlet_fact(b)?;
            let a_dt = a_fact.datum_type;
            let b_dt = b_fact.datum_type;

            // mitigation of nnef "scalar" type mismatch with tract-core more
            // strict types
            let operating_dt = if a_dt == b_dt
                && bin.1.operating_datum_type(a_dt, b_dt).map(|it| it == a_dt).unwrap_or(false)
            {
                a_dt
            } else if a_dt == String::datum_type() || b_dt == String::datum_type() {
                String::datum_type()
            } else if a_dt == TDim::datum_type() || b_dt == TDim::datum_type() {
                bin.1.operating_datum_type(a_dt, b_dt)?
            // assume scalar are inline and we should not trust their DT
            } else if a_fact.konst.is_some() && a_fact.shape.volume().is_one() {
                b_dt
            } else if b_fact.konst.is_some() && b_fact.shape.volume().is_one() {
                a_dt
            } else if builder.model.node(a.node).op_is::<tract_core::ops::konst::Const>() {
                b_dt
            } else if builder.model.node(b.node).op_is::<tract_core::ops::konst::Const>() {
                a_dt
            } else {
                bin.1.operating_datum_type(a_dt, b_dt)?
            };

            if !a_dt.is_quantized() || !b_dt.is_quantized() {
                a = builder.wire_as_outlets(tract_core::ops::cast::cast(operating_dt), &[a])?[0];
                b = builder.wire_as_outlets(tract_core::ops::cast::cast(operating_dt), &[b])?[0];
            }
            let inputs = multi_rank_broadcast(builder, &[a, b])?;

            let c_dt: Option<DatumType> = dt.first().cloned().and_then(|dt| dt);
            let required_operating_dt = a_dt.is_quantized() || b_dt.is_quantized();
            let mut wire = builder.wire_as_outlets(
                tract_core::ops::binary::TypedBinOp(
                    bin.1.clone(),
                    c_dt.filter(|_| required_operating_dt),
                ),
                &inputs,
            )?[0];
            if let Some(c_dt) = c_dt {
                wire = builder.wire_as_outlets(tract_core::ops::cast::cast(c_dt), &[wire])?[0];
            }
            return Ok(Some(Value::Wire(wire)));
        }
        if let Some(frag) = self.fragments.get(&invocation.id) {
            let resolved = ResolvedInvocation {
                invocation,
                default_params: &frag.decl.parameters,
                dt_from_quant_file: dt,
            };
            return Ok(Some(builder.wire_fragment_invocation(
                &resolved,
                &frag.decl,
                frag.body.as_deref().unwrap(),
            )?));
        }
        Ok(None)
    }
}

pub fn multi_rank_broadcast(
    builder: &mut ModelBuilder,
    inputs: &[OutletId],
) -> TractResult<TVec<OutletId>> {
    let ranks = inputs
        .iter()
        .map(|&i| Ok(builder.model.outlet_fact(i)?.rank()))
        .collect::<TractResult<Vec<usize>>>()?;
    let max_rank = ranks.iter().copied().max().unwrap();
    (inputs.iter())
        .zip(ranks.iter())
        .map(|(&i, &r)| {
            (r..max_rank).try_fold(i, |w, n| Ok(builder.wire_as_outlets(AxisOp::Add(n), &[w])?[0]))
        })
        .collect()
}


================================================
FILE: nnef/src/resource.rs
================================================
use std::path::Path;

use crate::ast::QuantFormat;
use crate::internal::*;
use safetensors::SafeTensors;
use tract_core::downcast_rs::{DowncastSync, impl_downcast};
use tract_core::tract_data::itertools::Itertools;

pub const GRAPH_NNEF_FILENAME: &str = "graph.nnef";
pub const GRAPH_QUANT_FILENAME: &str = "graph.quant";

pub fn resource_path_to_id(path: impl AsRef<Path>) -> TractResult<String> {
    let mut path = path.as_ref().to_path_buf();
    path.set_extension("");
    path.to_str()
        .ok_or_else(|| format_err!("Badly encoded filename for path: {:?}", path))
        .map(|s| s.to_string())
}

pub trait Resource: DowncastSync + std::fmt::Debug + Send + Sync {
    /// Get value for a given key.
    fn get(&self, _key: &str) -> TractResult<Value> {
        bail!("No key access supported by this resource");
    }

    fn to_liquid_value(&self) -> Option<liquid::model::Value> {
        None
    }
}

impl_downcast!(sync Resource);

pub trait ResourceLoader: Send + Sync {
    /// Name of the resource loader.
    fn name(&self) -> StaticName;
    /// Try to load a resource give a path and its corresponding reader.
    /// None is returned if the path is not accepted by this loader.
    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        framework: &Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>>;

    fn into_boxed(self) -> Box<dyn ResourceLoader>
    where
        Self: Sized + 'static,
    {
        Box::new(self)
    }
}

#[derive(Debug)]
pub struct GraphNnef(pub String);
impl Resource for GraphNnef {}

#[derive(Clone, Debug, Copy, PartialEq, Eq, Hash)]
pub struct GraphNnefLoader;

impl ResourceLoader for GraphNnefLoader {
    fn name(&self) -> StaticName {
        "GraphNnefLoader".into()
    }

    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        _framework: &Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>> {
        if path.ends_with(GRAPH_NNEF_FILENAME) {
            let mut text = String::new();
            reader.read_to_string(&mut text)?;
            Ok(Some((path.to_string_lossy().to_string(), Arc::new(GraphNnef(text)))))
        } else {
            Ok(None)
        }
    }
}

impl Resource for Tensor {}

#[derive(Clone, Debug, Copy, PartialEq, Eq, Hash)]
pub struct DatLoader;

impl ResourceLoader for DatLoader {
    fn name(&self) -> StaticName {
        "DatLoader".into()
    }

    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        _framework: &Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>> {
        if path.extension().map(|e| e == "dat").unwrap_or(false) {
            let tensor = crate::tensors::read_tensor(reader)
                .with_context(|| format!("Error while reading tensor {path:?}"))?;
            Ok(Some((resource_path_to_id(path)?, Arc::new(tensor))))
        } else {
            Ok(None)
        }
    }
}

impl Resource for HashMap<String, QuantFormat> {}

#[derive(Clone, Debug, Copy, PartialEq, Eq, Hash)]
pub struct GraphQuantLoader;

impl ResourceLoader for GraphQuantLoader {
    fn name(&self) -> StaticName {
        "GraphQuantLoader".into()
    }

    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        _framework: &Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>> {
        if path.ends_with(GRAPH_QUANT_FILENAME) {
            let mut text = String::new();
            reader.read_to_string(&mut text)?;
            let quant = crate::ast::quant::parse_quantization(&text)?;
            let quant: HashMap<String, QuantFormat> =
                quant.into_iter().map(|(k, v)| (k.0, v)).collect();
            Ok(Some((path.to_str().unwrap().to_string(), Arc::new(quant))))
        } else {
            Ok(None)
        }
    }
}

pub struct TypedModelLoader {
    pub optimized_model: bool,
}

impl TypedModelLoader {
    pub fn new(optimized_model: bool) -> Self {
        Self { optimized_model }
    }
}

impl ResourceLoader for TypedModelLoader {
    fn name(&self) -> StaticName {
        "TypedModelLoader".into()
    }

    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        framework: &Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>> {
        const NNEF_TGZ: &str = ".nnef.tgz";
        const NNEF_TAR: &str = ".nnef.tar";
        let path_str = path.to_str().unwrap_or("");
        if path_str.ends_with(NNEF_TGZ) || path_str.ends_with(NNEF_TAR) {
            let model = if self.optimized_model {
                framework.model_for_read(reader)?.into_optimized()?
            } else {
                framework.model_for_read(reader)?
            };

            let label = if path_str.ends_with(NNEF_TGZ) {
                path.to_str()
                    .ok_or_else(|| anyhow!("invalid model resource path"))?
                    .trim_end_matches(NNEF_TGZ)
            } else {
                path.to_str()
                    .ok_or_else(|| anyhow!("invalid model resource path"))?
                    .trim_end_matches(NNEF_TAR)
            };
            Ok(Some((resource_path_to_id(label)?, Arc::new(TypedModelResource(model)))))
        } else {
            Ok(None)
        }
    }
}

#[derive(Debug, Clone)]
pub struct TypedModelResource(pub TypedModel);

impl Resource for TypedModelResource {}

pub struct SafeTensorsLoader;

impl ResourceLoader for SafeTensorsLoader {
    fn name(&self) -> StaticName {
        "SafeTensorsLoader".into()
    }

    fn try_load(
        &self,
        path: &Path,
        reader: &mut dyn std::io::Read,
        _framework: &Nnef,
    ) -> TractResult<Option<(String, Arc<dyn Resource>)>> {
        if path.extension().is_some_and(|e| e == "safetensors") {
            let mut buffer = vec![];
            reader.read_to_end(&mut buffer)?;
            let tensors: Vec<(String, Arc<Tensor>)> = SafeTensors::deserialize(&buffer)?
                .tensors()
                .into_iter()
                .map(|(name, t)| {
                    let dt = match t.dtype() {
                        safetensors::Dtype::F32 => DatumType::F32,
                        safetensors::Dtype::F16 => DatumType::F16,
                        _ => panic!(),
                    };
                    let tensor = unsafe { Tensor::from_raw_dt(dt, t.shape(), t.data()).unwrap() };
                    (name, tensor.into_arc_tensor())
                })
                .collect_vec();
            return Ok(Some((path.to_string_lossy().to_string(), Arc::new(tensors))));
        }
        Ok(None)
    }
}

impl Resource for Vec<(String, Arc<Tensor>)> {}


================================================
FILE: nnef/src/ser.rs
================================================
use crate::ast::*;
use crate::internal::*;
use tract_core::ndarray::ArrayViewD;
use tract_core::ndarray::Axis;
use tract_core::ops::cnn::conv::{rewrite_kernel_conv_in_oihw, rewrite_kernel_deconv_in_oihw};
use tract_itertools::Itertools;

pub fn rewrite_model(model: &mut TypedModel) -> TractResult<()> {
    model.prop_consts()?;
    tract_core::ops::einsum::prefix_matmul::rewrite_einsum_to_prefix_matmul(model, true)?;
    Rewriter::default()
        .with_rule_for(
            "rewrite_block_quant_const_to_scalar",
            crate::ops::nnef::ser::rewrite_block_quant_const_to_scalar,
        )
        .with_rule_for(
            "rewrite_matmul_to_same_rank",
            crate::ops::nnef::ser::rewrite_matmul_to_same_rank,
        )
        .with_rule_for("rewrite_conv_with_n_axis", tract_core::ops::cnn::rewrite_conv_with_n_axis)
        .with_rule_for(
            "rewrite_deconv_with_n_axis",
            tract_core::ops::cnn::rewrite_deconv_with_n_axis,
        )
        .with_rule_for("rewrite_kernel_conv_in_oihw", rewrite_kernel_conv_in_oihw)
        .with_rule_for("rewrite_kernel_deconv_in_oihw", rewrite_kernel_deconv_in_oihw)
        .with_rule_for(
            "rewrite_consistent_quantized_conv",
            crate::ops::nnef::ser::rewrite_consistent_quantized_conv,
        )
        .with_rule_for("expand_mean_of_square", tract_core::ops::nn::expand_mean_of_squares)
        .rewrite(&(), model)
}

pub fn to_proto_model(framework: &Nnef, model: &TypedModel) -> TractResult<ProtoModel> {
    let mut fixed_model = model.clone();
    rewrite_model(&mut fixed_model)?;
    let mut into_ast = IntoAst::new(framework, &fixed_model);
    into_ast.translate().context("Translating model to AST")?;
    into_ast.into_proto_model().context("Translating AST to proto model")
}

pub fn to_fragment_def(
    parent: &IntoAst,
    model: &TypedModel,
) -> TractResult<(FragmentDef, Vec<RequiredTensorParameter>)> {
    let mut into_ast = IntoAst::new(parent.framework, model);
    into_ast.parent = Some(parent);
    into_ast.translate()?;
    into_ast.into_fragment()
}

pub struct IntoAst<'a> {
    pub framework: &'a Nnef,
    pub parent: Option<&'a IntoAst<'a>>,
    pub registries: Vec<Identifier>,
    pub model: &'a TypedModel,
    pub parameters: Vec<Identifier>,
    pub results: Vec<Identifier>,
    pub mapping: HashMap<OutletId, Arc<RValue>>,
    pub tensors: HashMap<Identifier, Arc<Tensor>>,
    pub quantization: HashMap<Identifier, QuantFormat>,
    pub resources: HashMap<String, Arc<dyn Resource>>,
    pub fragments: HashMap<Identifier, FragmentDef>,
    pub body: Vec<Assignment>,
}

pub struct RequiredTensorParameter {
    pub parameter_id: Identifier,
    pub label: Identifier,
    pub value: Arc<Tensor>,
}

impl<'a> IntoAst<'a> {
    pub fn new(framework: &'a Nnef, model: &'a TypedModel) -> IntoAst<'a> {
        IntoAst {
            framework,
            registries: Default::default(),
            model,
            parameters: Default::default(),
            results: Default::default(),
            mapping: Default::default(),
            tensors: Default::default(),
            quantization: Default::default(),
            resources: Default::default(),
            fragments: Default::default(),
            body: Default::default(),
            parent: None,
        }
    }

    fn ensure_registry(&mut self, id: &Identifier) -> TractResult<()> {
        if !self.framework.registries.iter().any(|r| &r.id == id) {
            bail!("Registry {} required, consider allowing it on the NNEF framework.", id.0);
        }
        if !self.registries.iter().any(|r| r == id) {
            self.registries.push(id.clone());
        }
        Ok(())
    }

    fn translate(&mut self) -> TractResult<()> {
        for input in self.model.input_outlets()? {
            let left = self.scoped_id(&self.model.node(input.node).name);
            self.parameters.push(left.clone());
            self.node(self.model.node(input.node))?;
            self.mapping.insert(*input, RValue::Identifier(left).into());
        }
        for node in self.model.eval_order()? {
            if self.model.input_outlets()?.iter().any(|io| io.node == node) {
                continue;
            }
            self.node(self.model.node(node))
                .with_context(|| format!("translating node {}", self.model.node(node)))?;
        }
        let outlets: Vec<OutletId> = self.model.output_outlets()?.to_vec();
        for (ix, o) in outlets.into_iter().enumerate() {
            let rv = if let Some(label) = self.model.outlet_label(o) {
                self.force_variable_and_name(label, &self.mapping[&o].clone())
            } else {
                self.force_variable(format!("output_{ix}"), &self.mapping[&o].clone())
            };
            if let RValue::Identifier(name) = rv.as_ref() {
                self.results.push(name.clone());
            } else {
                unreachable!()
            };
        }
        Ok(())
    }

    pub fn into_fragment(self) -> TractResult<(FragmentDef, Vec<RequiredTensorParameter>)> {
        let mut tensor_params = vec![];
        for (name, t) in &self.tensors {
            tensor_params.push(RequiredTensorParameter {
                parameter_id: self.scoped_id(name),
                label: name.clone(),
                value: t.clone(),
            })
        }
        let IntoAst { body, mut parameters, results, .. } = self;
        parameters.extend(tensor_params.iter().map(|rtp| rtp.parameter_id.clone()).sorted());
        let body = body
            .into_iter()
            .filter(|assign| match &assign.left {
                LValue::Identifier(id) => !parameters.contains(id),
                _ => true,
            })
            .collect();
        Ok((
            FragmentDef {
                decl: FragmentDecl {
                    id: Identifier("network".into()),
                    generic_decl: None,
                    parameters: parameters
                        .into_iter()
                        .map(|s| TypeName::Scalar.tensor().named(s))
                        .collect(),
                    results: results
                        .into_iter()
                        .map(|s| Result_ { id: s, spec: TypeName::Scalar.tensor() })
                        .collect(),
                },
                body: Some(body),
            },
            tensor_params,
        ))
    }

    pub fn into_proto_model(mut self) -> TractResult<ProtoModel> {
        let mut properties = self
            .model
            .properties
            .iter()
            .sorted_by_key(|(k, _v)| k.to_owned())
            .map(|(k, v)| Ok(tuple_2(string(k), self.konst(k, v)?.as_ref().clone())))
            .collect::<TractResult<Vec<_>>>()?;
        let version = env!("CARGO_PKG_VERSION");
        properties.push(tuple_2(
            string("tract_nnef_ser_version"),
            self.konst("tract_nnef_ser_version", &rctensor0(version.to_string()))?.as_ref().clone(),
        ));
        properties.push(tuple_2(
            string("tract_nnef_format_version"),
            self.konst("tract_nnef_format_version", &rctensor0("beta1".to_string()))?
                .as_ref()
                .clone(),
        ));
        let properties: Assignment = assignment("properties", Arc::new(array(properties)));
        let IntoAst { mut fragments, body, tensors, parameters, results, .. } = self;
        let mut extension = vec![];
        self.registries.sort();
        for reg in self.registries {
            if reg.0 != "tract_nnef" {
                extension.push(("tract_registry".into(), reg.0));
            }
        }
        for sym in self.model.symbols.all_symbols() {
            extension.push(("tract_symbol".into(), sym.to_string()));
        }
        let locked = self.model.symbols.0.lock();
        for assert in locked.borrow().all_assertions() {
            extension.push(("tract_assert".into(), assert.to_string()));
        }
        for scenario in locked.borrow().scenarios() {
            for assert in locked.borrow().scenario(scenario) {
                extension.push(("tract_assert".into(), format!("{scenario}: {assert}")));
            }
        }
        let properties = FragmentDef {
            decl: FragmentDecl {
                id: Identifier("tract_core_properties".to_string()),
                generic_decl: None,
                parameters: vec![],
                results: vec![Result_ {
                    id: Identifier("properties".to_string()),
                    spec: TypeSpec::Tuple(vec![TypeName::String.spec(), TypeName::Scalar.tensor()])
                        .array(),
                }],
            },
            body: Some(vec![properties]),
        };
        fragments.insert(properties.decl.id.clone(), properties);
        let doc = Document {
            version: "1.0".into(),
            extension,
            fragments: fragments.into_values().collect(),
            graph_def: GraphDef { id: Identifier("network".into()), parameters, results, body },
        };
        let quantization = if self.quantization.len() > 0 { Some(self.quantization) } else { None };
        Ok(ProtoModel { doc, tensors, quantization, resources: self.resources })
    }

    fn node(&mut self, node: &TypedNode) -> TractResult<TVec<Arc<RValue>>> {
        let mut required_registries = Vec::new();
        for reg in &self.framework.registries {
            if let Some(outputs) = reg.serialize(self, node).context("Serializing op")? {
                if self.ensure_registry(&reg.id).is_err() {
                    required_registries.push(&reg.id);
                    continue;
                };
                let scoped = self.scoped_id(&node.name);
                let names: Vec<_> = (0..node.outputs.len())
                    .map(|ix| {
                        if ix > 0 {
                            Identifier(format!("{}_{}", scoped.0, ix))
                        } else {
                            scoped.clone()
                        }
                    })
                    .collect();
                if node.outputs.len() > 1 {
                    self.body.push(Assignment {
                        left: LValue::Tuple(
                            names.iter().map(|n| LValue::Identifier(n.clone())).collect(),
                        ),
                        right: outputs.as_ref().clone(),
                    });
                } else {
                    self.assignment(names[0].clone(), outputs);
                };

                for (outlet, name) in node.outputs.iter().zip(names.iter()) {
                    if let Some(qf) = QuantFormat::from_dt(outlet.fact.datum_type) {
                        self.quantization.insert(name.clone(), qf);
                    }
                }

                let mut outputs = tvec!();
                for (ix, o) in names.into_iter().enumerate() {
                    let rv = Arc::new(ident(o));
                    self.mapping.insert((node.id, ix).into(), rv.clone());
                    outputs.push(rv);
                }

                return Ok(outputs);
            }
        }
        if required_registries.is_empty() {
            bail!("No serializer found for node {}", node);
        } else if required_registries.len() == 1 {
            bail!(
                "Registry {} required, consider allowing it on the NNEF framework.",
                required_registries[0].0
            );
        } else {
            bail!(
                "One of the following registries is required: {:?}, consider allowing one on the NNEF framework.",
                required_registries
            );
        }
    }

    pub fn scoped_id(&self, name: impl AsRef<str>) -> Identifier {
        let name = name.as_ref().to_string();
        Identifier(name)
    }

    pub fn force_variable(&mut self, name: impl AsRef<str>, exp: &Arc<RValue>) -> Arc<RValue> {
        if let RValue::Identifier(_) = exp.as_ref() {
            exp.clone()
        } else {
            let name = self.scoped_id(name);
            self.assignment(name.clone(), exp.clone());
            ident(name).into()
        }
    }

    pub fn force_variable_and_name(
        &mut self,
        name: impl Into<String>,
        exp: &Arc<RValue>,
    ) -> Arc<RValue> {
        let name = name.into();
        if let RValue::Identifier(id) = exp.as_ref()
            && name == id.0
        {
            return exp.clone();
        }
        let name = self.scoped_id(name);
        self.assignment(name.clone(), exp.clone());
        ident(name).into()
    }

    pub fn konst(
        &mut self,
        name: impl AsRef<str>,
        tensor: &Arc<Tensor>,
    ) -> TractResult<Arc<RValue>> {
        self.do_konst(name, tensor, false)
    }

    pub fn konst_variable(
        &mut self,
        name: impl AsRef<str>,
        tensor: &Arc<Tensor>,
    ) -> TractResult<Arc<RValue>> {
        self.do_konst(name, tensor, true)
    }

    fn dump_rec_tensor<T: Datum>(
        t: &ArrayViewD<T>,
        el: impl for<'t> Fn(&'t T) -> RValue + Copy,
    ) -> RValue {
        if t.ndim() == 0 {
            el(&t.as_slice().unwrap()[0])
        } else {
            let values: TVec<RValue> = (0..t.shape()[0])
                .map(|i| Self::dump_rec_tensor(&t.index_axis(Axis(0), i), el))
                .collect();
            array(values)
        }
    }

    fn do_konst(
        &mut self,
        name: impl AsRef<str>,
        tensor: &Arc<Tensor>,
        force_variable: bool,
    ) -> TractResult<Arc<RValue>> {
        let mut name: Identifier = name.as_ref().into();
        let have_tract_core = self.ensure_registry(&"tract_core".into()).is_ok();
        if tensor.datum_type() == TDim::datum_type() {
            return Ok(Self::dump_rec_tensor(&tensor.to_plain_array_view::<TDim>()?, tdim).into());
        }
        if !force_variable
            && !self.framework.extern_all_constants
            && tensor.len() <= 8
            && tensor.len() > 0
        {
            if tensor.datum_type() == String::datum_type() {
                return Ok(Self::dump_rec_tensor(&tensor.to_plain_array_view::<String>()?, |f| {
                    string(f)
                })
                .into());
            } else if tensor.datum_type() == DatumType::F32 {
                return Ok(Self::dump_rec_tensor(&tensor.to_plain_array_view::<f32>()?, |f| {
                    numeric(f)
                })
                .into());
            } else if have_tract_core && tensor.datum_type() == DatumType::F16 {
                let array =
                    Self::dump_rec_tensor(&tensor.to_plain_array_view::<f16>()?, |f| numeric(f))
                        .into();
                return Ok(invocation("tract_core_cast", &[array], &[("to", string("f16"))]));
            } else if have_tract_core
                && tensor.datum_type().is_integer()
                && let Ok(value) = tensor.cast_to::<i64>()
            {
                let value =
                    Self::dump_rec_tensor(&value.to_plain_array_view::<i64>().unwrap(), |i| {
                        numeric(i)
                    });
                let to = string(format!("{:?}", tensor.datum_type()).to_lowercase());
                return Ok(invocation("tract_core_cast", &[value.into()], &[("to", to)]));
            };
        }

        if self.tensors.contains_key(&name) {
            name = (0..)
                .map(|it| Identifier::from(&*format!("{}_{}", name.0, it)))
                .find(|it| !self.tensors.contains_key(it))
                .unwrap();
        }

        self.tensors.insert(name.clone(), tensor.clone());
        let id = self.scoped_id(&name);
        let shape = tensor.shape().to_vec();
        self.assignment(
            id.clone(),
            RValue::Invocation(Invocation {
                id: "variable".into(),
                generic_type_name: Some(TypeName::Scalar),
                arguments: vec![
                    named_arg("label", string(name.0)),
                    named_arg("shape", ints(&shape)),
                ],
            })
            .into(),
        );
        if let Some(qp) = QuantFormat::from_dt(tensor.datum_type()) {
            self.quantization.insert(id.clone(), qp);
        }
        Ok(ident(id).into())
    }

    fn assignment(&mut self, name: impl AsRef<str>, right: Arc<RValue>) {
        let name = name.as_ref();
        if *right == ident(name) {
            return;
        }
        self.body.push(assignment(name, right))
    }
}

pub fn assignment(name: impl AsRef<str>, right: Arc<RValue>) -> Assignment {
    Assignment { left: LValue::Identifier(name.as_ref().into()), right: right.as_ref().to_owned() }
}

pub fn ints(shape: &[usize]) -> RValue {
    RValue::Array(shape.iter().map(|s| RValue::Literal(Literal::Numeric(s.to_string()))).collect())
}

pub fn tdims(shape: &[TDim]) -> RValue {
    RValue::Array(shape.iter().map(tdim).collect())
}

pub fn tdim(dim: &TDim) -> RValue {
    match dim {
        TDim::Val(x) => numeric(x),
        TDim::Sym(s) => ident(s.to_string()),
        TDim::Add(terms) => terms
            .iter()
            .map(tdim)
            .reduce(|x, y| RValue::Binary(x.boxed(), "+".to_string(), y.boxed()))
            .unwrap(),
        TDim::Mul(terms) => terms
            .iter()
            .map(tdim)
            .reduce(|x, y| RValue::Binary(x.boxed(), "*".to_string(), y.boxed()))
            .unwrap(),
        TDim::MulInt(x, y) => RValue::Binary(numeric(x).boxed(), "*".to_string(), tdim(y).boxed()),
        TDim::Div(x, y) => RValue::Binary(tdim(x).boxed(), "/".to_string(), numeric(y).boxed()),
        TDim::Broadcast(_) => todo!(),
        TDim::Min(_) | TDim::Max(_) => todo!(),
        TDim::Ge(_, _) | TDim::Eq(_, _) => {
            panic!("Comparison/boolean TDim variants are transient and cannot be serialized")
        }
    }
}

pub fn string(s: impl AsRef<str>) -> RValue {
    RValue::Literal(Literal::String(s.as_ref().into()))
}

pub fn datum_type(dt: DatumType) -> RValue {
    string(format!("{:?}", dt.unquantized()).to_lowercase())
}

pub fn logical(b: bool) -> RValue {
    RValue::Literal(Literal::Logical(b))
}

pub fn lident(s: impl AsRef<str>) -> LValue {
    LValue::Identifier(s.as_ref().into())
}

pub fn ident(s: impl AsRef<str>) -> RValue {
    RValue::Identifier(s.as_ref().into())
}

pub fn array(items: impl AsRef<[RValue]>) -> RValue {
    RValue::Array(items.as_ref().to_vec())
}

pub fn tuple_2(a: RValue, b: RValue) -> RValue {
    RValue::Tuple(vec![a, b])
}

pub fn tuple_3(a: RValue, b: RValue, c: RValue) -> RValue {
    RValue::Tuple(vec![a, b, c])
}

pub fn tuple_4(a: RValue, b: RValue, c: RValue, d: RValue) -> RValue {
    RValue::Tuple(vec![a, b, c, d])
}

pub fn numeric<D: std::fmt::Debug>(num: D) -> RValue {
    RValue::Literal(Literal::Numeric(format!("{num:?}")))
}

pub fn named_arg(id: &str, rv: RValue) -> Argument {
    Argument { id: Some(id.into()), rvalue: rv }
}

pub fn invocation(
    id: impl AsRef<str>,
    positional: &[Arc<RValue>],
    named: &[(&str, RValue)],
) -> Arc<RValue> {
    let arguments = positional
        .iter()
        .map(|rv| Argument { id: None, rvalue: rv.as_ref().clone() })
        .chain(named.iter().map(|(n, v)| named_arg(n, v.clone())))
        .collect();
    RValue::Invocation(Invocation { id: id.as_ref().into(), generic_type_name: None, arguments })
        .into()
}


================================================
FILE: nnef/src/tensors.rs
================================================
use std::io::{Read, Write};

use byteorder::{LE, ReadBytesExt, WriteBytesExt};
use tract_core::internal::*;
use tract_core::tract_linalg::block_quant::Q8_1;
use tract_linalg::block_quant::{BlockQuant, BlockQuantStorage, Q4_0};

const TRACT_ITEM_TYPE_VENDOR: u16 = ((b'T' as u16) << 8u16) | b'R' as u16;

#[repr(C)]
#[derive(Debug)]
struct Header {
    magic: [u8; 2],
    version_maj: u8,
    version_min: u8,
    data_size_bytes: u32,
    rank: u32,
    dims: [u32; 8],
    bits_per_item: u32,
    item_type: u16,
    item_type_vendor: u16,
    item_type_params_deprecated: [u8; 32],
    padding: [u32; 11],
}

impl Default for Header {
    fn default() -> Self {
        let mut header: Header = unsafe { std::mem::zeroed() };
        header.magic = [0x4e, 0xef];
        header.version_maj = 1;
        header.version_min = 0;
        header
    }
}

impl Header {
    pub fn write(&self, w: &mut impl Write) -> TractResult<()> {
        let slice = unsafe { std::mem::transmute::<&Header, &[u8; 128]>(self) };
        w.write_all(slice)?;
        Ok(())
    }

    pub fn read(reader: &mut impl Read) -> TractResult<Header> {
        let mut header: Header = unsafe { std::mem::zeroed() };
        reader.read_exact(unsafe {
            std::mem::transmute::<&mut Header, &mut [u8; 128]>(&mut header)
        })?;
        if header.magic != [0x4e, 0xef] {
            bail!("Wrong magic number");
        };
        if header.version_maj != 1 && header.version_min != 0 {
            bail!("Wrong version number");
        }
        if header.rank > 8 {
            bail!("Wrong tensor rank {}", header.rank);
        }
        Ok(header)
    }
}

pub fn read_tensor(mut reader: impl Read) -> TractResult<Tensor> {
    let header = Header::read(&mut reader)?;
    let shape: TVec<usize> = header.dims[0..header.rank as usize].iter().map(|d| *d as _).collect();
    let len = shape.iter().product::<usize>();

    if header.item_type == 5 {
        let expected_bit_size = len * header.bits_per_item as usize;
        let real_bit_size = header.data_size_bytes as usize * 8;
        if !(real_bit_size - 8 <= expected_bit_size && expected_bit_size <= real_bit_size) {
            bail!(
                "Shape and len mismatch: shape:{:?}, bits_per_item:{}, bytes:{} ",
                shape,
                header.bits_per_item,
                header.data_size_bytes
            );
        }
    } else if header.bits_per_item != u32::MAX
        && len * (header.bits_per_item as usize / 8) != header.data_size_bytes as usize
    {
        bail!(
            "Shape and len mismatch: shape:{:?}, bits_per_item:{}, bytes:{} ",
            shape,
            header.bits_per_item,
            header.data_size_bytes
        );
    }
    if header.item_type_vendor != 0 && header.item_type_vendor != TRACT_ITEM_TYPE_VENDOR {
        bail!("Unknownn item type vendor {}", header.item_type_vendor);
    }

    // last checked with spec 1.0.5: https://registry.khronos.org/NNEF/specs/1.0/nnef-1.0.5.html
    //
    // Quantized types are not instanciated as DatumType::Q* here since
    // quant infos are joined later from .quant file (
    //  see: ops/nnef/deser.rs
    // )
    let dt = match (header.item_type_vendor, header.item_type, header.bits_per_item) {
        // 0 - 0b0000 - float values in IEEE format, valid bits per item is 16, 32, 64
        (0, 0, 16) => DatumType::F16,
        (0, 0, 32) => DatumType::F32,
        (0, 0, 64) => DatumType::F64,

        // 1 - 0b0001 - unsigned integer values, maximum bits per item is 64.
        (0, 1, 8) => DatumType::U8,
        (0, 1, 16) => DatumType::U16,
        (0, 1, 32) => DatumType::U32,
        (0, 1, 64) => DatumType::U64,

        // 2 - 0b0010 - quantized unsigned integer values, maximum bits per item is 64.
        (0, 2, 8) => DatumType::U8,
        (0, 2, 16) => DatumType::U16,
        (0, 2, 32) => DatumType::U32,
        (0, 2, 64) => DatumType::U64,

        // 3 - 0b0011 - quantized signed integer values, maximum bits per item is 64.
        (0, 3, 8) => DatumType::I8,
        (0, 3, 16) => DatumType::I16,
        (0, 3, 32) => DatumType::I32,
        (0, 3, 64) => DatumType::I64,

        // 4 - 0b0100 - signed integer values, maximum bits per item is 64.
        (0, 4, 8) => DatumType::I8,
        (0, 4, 16) => DatumType::I16,
        (0, 4, 32) => DatumType::I32,
        (0, 4, 64) => DatumType::I64,

        // 5 - 0b0101 - bool values, 1 bit or 8 bits (0 means false, non-zero means true)
        (0, 5, 1 | 8) => DatumType::Bool,
        (TRACT_ITEM_TYPE_VENDOR, 0x1000, _) => DatumType::String,
        #[cfg(feature = "complex")]
        (TRACT_ITEM_TYPE_VENDOR, 0, 32) => DatumType::ComplexF16,
        #[cfg(feature = "complex")]
        (TRACT_ITEM_TYPE_VENDOR, 0, 64) => DatumType::ComplexF32,
        #[cfg(feature = "complex")]
        (TRACT_ITEM_TYPE_VENDOR, 0, 128) => DatumType::ComplexF64,
        #[cfg(feature = "complex")]
        (TRACT_ITEM_TYPE_VENDOR, 4, 32) => DatumType::ComplexI16,
        #[cfg(feature = "complex")]
        (TRACT_ITEM_TYPE_VENDOR, 4, 64) => DatumType::ComplexI32,
        #[cfg(feature = "complex")]
        (TRACT_ITEM_TYPE_VENDOR, 4, 128) => DatumType::ComplexI64,
        (TRACT_ITEM_TYPE_VENDOR, it, _)
            if ((it & 0x2000) == 0x2000) || ((it & 0x3000) == 0x3000) =>
        {
            return read_block_quant_value(&mut reader, &header);
        }
        _ => bail!(
            "Unsupported type in tensor type:{} bits_per_item:{}",
            header.item_type,
            header.bits_per_item
        ),
    };
    if dt.is_copy() {
        let mut tensor = unsafe { Tensor::uninitialized_dt(dt, &shape)? };
        let mut plain = tensor.try_as_plain_mut()?;
        if dt == DatumType::Bool && header.bits_per_item == 1 {
            let buf = plain.as_slice_mut::<bool>()?;

            let mut current_byte = 0;
            for (ix, value) in buf.iter_mut().enumerate() {
                let bit_ix = ix % 8;
                if bit_ix == 0 {
                    current_byte = reader.read_u8()?;
                }
                *value = ((current_byte >> (7 - bit_ix)) & 0x1) != 0;
            }
        } else {
            reader.read_exact(plain.as_bytes_mut())?;
        }
        Ok(tensor)
    } else if dt == DatumType::String {
        let mut tensor = Tensor::zero_dt(dt, &shape)?;
        let mut plain = tensor.try_as_plain_mut()?;
        for item in plain.as_slice_mut::<String>()? {
            let len: u32 = reader.read_u32::<LE>()?;
            let mut bytes = Vec::with_capacity(len as usize);
            #[allow(clippy::uninit_vec)]
            unsafe {
                bytes.set_len(len as usize);
            };
            reader.read_exact(&mut bytes)?;
            *item = String::from_utf8(bytes)?;
        }
        Ok(tensor)
    } else {
        todo!()
    }
}

pub fn write_tensor(w: &mut impl Write, tensor: &Tensor) -> TractResult<()> {
    ensure!(tensor.datum_type() != TDim::datum_type());
    if tensor.storage_as::<BlockQuantStorage>().is_some() {
        return write_block_quant_value(w, tensor);
    }
    let plain = tensor.try_as_plain()?;
    let mut header = Header::default();
    if tensor.rank() > 8 {
        bail!("Only rank up to 8 are supported");
    }
    header.rank = tensor.rank() as u32;
    for d in 0..tensor.rank() {
        header.dims[d] = tensor.shape()[d] as u32;
    }
    header.data_size_bytes = (tensor.len() * tensor.datum_type().size_of()) as u32;
    header.bits_per_item = (tensor.datum_type().size_of() * 8) as u32;

    let (itv, it) = match tensor.datum_type() {
        DatumType::F16 | DatumType::F32 | DatumType::F64 => (0, 0),
        DatumType::U8 | DatumType::U16 | DatumType::U32 | DatumType::U64 | DatumType::QU8(_) => {
            (0, 2)
        }
        DatumType::I8
        | DatumType::I16
        | DatumType::I32
        | DatumType::I64
        | DatumType::QI8(_)
        | DatumType::QI32(_) => (0, 3),
        DatumType::String => {
            header.bits_per_item = u32::MAX;
            (TRACT_ITEM_TYPE_VENDOR, 0x1000)
        }
        #[cfg(feature = "complex")]
        DatumType::ComplexF16 | DatumType::ComplexF32 | DatumType::ComplexF64 => {
            (TRACT_ITEM_TYPE_VENDOR, 0)
        }
        #[cfg(feature = "complex")]
        DatumType::ComplexI16 | DatumType::ComplexI32 | DatumType::ComplexI64 => {
            (TRACT_ITEM_TYPE_VENDOR, 4)
        }
        DatumType::Bool => (0, 5),
        DatumType::TDim | DatumType::Blob => {
            bail!("Don't know how to serialize {:?}", tensor.datum_type())
        }
    };
    header.item_type = it;
    header.item_type_vendor = itv;
    header.write(w)?;
    if tensor.datum_type().is_copy() {
        w.write_all(plain.as_bytes())?;
    } else if tensor.datum_type() == DatumType::String {
        for s in plain.as_slice::<String>()? {
            w.write_u32::<LE>(s.len() as u32)?;
            w.write_all(s.as_bytes())?;
        }
    }
    Ok(())
}

pub fn tract_to_gguf_q4_0_packing(data: &mut Blob) -> TractResult<()> {
    let block_size = Q4_0.block_bytes();
    ensure!(data.layout().size() % block_size == 0);

    let n_block = data.layout().size() / block_size;
    let data_bytes = data.as_bytes_mut();

    for b in 0..n_block {
        let offset = b * block_size + 2;
        let nibbles = &mut data_bytes[offset..offset + 16];
        let second_part: &mut [u8; 8] = &mut [0; 8];
        second_part.clone_from_slice(&nibbles[8..]);
        for i in (0..16).rev() {
            let lsb = if i % 2 == 0 { nibbles[i / 2] & 0x0F } else { (nibbles[i / 2] & 0xF0) >> 4 };
            let msb = if i % 2 == 0 {
                (second_part[i / 2] & 0x0F) << 4
            } else {
                second_part[i / 2] & 0xF0
            };
            nibbles[i] = msb | lsb;
        }
    }
    Ok(())
}

fn read_block_quant_value(r: &mut impl Read, header: &Header) -> TractResult<Tensor> {
    let format: Box<dyn BlockQuant> = match header.item_type {
        0x2040 | 0x3040 => Box::new(Q4_0),
        0x3080 => Box::new(Q8_1),
        _ => bail!("Unexpected block quant format"),
    };
    ensure!(header.rank >= 2);
    let shape: TVec<_> =
        header.dims.iter().take(header.rank as usize).map(|d| *d as usize).collect();
    let q_m = shape[0];
    let q_k = shape.iter().skip(1).product::<usize>();
    ensure!(q_k % format.block_len() == 0);
    let expected_len = (q_m * q_k) / format.block_len() * format.block_bytes();
    ensure!(expected_len == header.data_size_bytes as usize);
    let mut blob = unsafe { Blob::new_for_size_and_align(expected_len, 128) };
    r.read_exact(&mut blob)?;
    if header.item_type == 0x2040 {
        tract_to_gguf_q4_0_packing(&mut blob)?;
    }
    let tensor = BlockQuantStorage::new(format, q_m, q_k, Arc::new(blob))?
        .into_tensor_with_shape(f32::datum_type(), &shape);
    Ok(tensor)
}

#[allow(clippy::field_reassign_with_default)]
fn write_block_quant_value(w: &mut impl Write, tensor: &Tensor) -> TractResult<()> {
    let bqs = tensor.try_storage_as::<BlockQuantStorage>()?;
    let format = bqs.format();
    ensure!(format.dyn_eq(&Q4_0) || format.dyn_eq(&Q8_1));
    let s = tensor.shape();
    let flat_shape: [usize; 2] = [s[..s.len() - 1].iter().product(), *s.last().unwrap()];

    let mut header = Header::default();
    header.rank = flat_shape.len() as u32;
    for (h, v) in header.dims.iter_mut().zip(flat_shape.iter()) {
        *h = *v as u32;
    }
    header.bits_per_item = u32::MAX;
    header.data_size_bytes = bqs.value().len() as _;
    header.item_type_vendor = TRACT_ITEM_TYPE_VENDOR;
    // 0x3040 3 is for GGML formats, 0 for Q formats then 4 and 0
    header.item_type = if format.dyn_eq(&Q4_0) { 0x3040 } else { 0x3081 };
    header.write(w)?;
    w.write_all(bqs.value())?;
    Ok(())
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn header_is_128_bytes() {
        assert_eq!(std::mem::size_of::<Header>(), 128);
    }

    #[test]
    #[cfg(feature = "complex")]
    fn serde_tensor_complex_f32() -> TractResult<()> {
        let t = tensor2(&[
            [Complex::new(1.0f32, 2.0), Complex::new(2.0, 1.0), Complex::new(3.5, 2.4)],
            [Complex::new(3.0, 4.5), Complex::new(3.0, 2.5), Complex::new(1.5, 2.5)],
        ]);
        let mut buffer = Vec::<u8>::new();
        write_tensor(&mut buffer, &t)?;
        let serde_tensor = read_tensor(buffer.as_slice())?;
        assert_eq!(t, serde_tensor);
        Ok(())
    }

    #[test]
    #[cfg(feature = "complex")]
    fn serde_tensor_complex_f64() -> TractResult<()> {
        let t = tensor2(&[
            [Complex::new(1.0f64, 2.0), Complex::new(2.0, 1.0), Complex::new(3.5, 2.4)],
            [Complex::new(3.0, 4.5), Complex::new(3.0, 2.5), Complex::new(1.5, 2.5)],
        ]);
        let mut buffer = Vec::<u8>::new();
        write_tensor(&mut buffer, &t)?;
        let serde_tensor = read_tensor(buffer.as_slice())?;
        assert_eq!(t, serde_tensor);
        Ok(())
    }

    #[test]
    #[cfg(feature = "complex")]
    fn serde_tensor_complex_i32() -> TractResult<()> {
        let t = tensor2(&[
            [Complex::new(1i32, 2), Complex::new(2, 1), Complex::new(3, 2)],
            [Complex::new(3, 4), Complex::new(3, 2), Complex::new(1, 2)],
        ]);
        let mut buffer = Vec::<u8>::new();
        write_tensor(&mut buffer, &t)?;
        let serde_tensor = read_tensor(buffer.as_slice())?;
        assert_eq!(t, serde_tensor);
        Ok(())
    }

    #[test]
    #[cfg(feature = "complex")]
    fn serde_tensor_complex_i64() -> TractResult<()> {
        let t = tensor2(&[
            [Complex::new(1i64, 2), Complex::new(2, 1), Complex::new(3, 2)],
            [Complex::new(3, 4), Complex::new(3, 2), Complex::new(1, 2)],
        ]);
        let mut buffer = Vec::<u8>::new();
        write_tensor(&mut buffer, &t)?;
        let serde_tensor = read_tensor(buffer.as_slice())?;
        assert_eq!(t, serde_tensor);
        Ok(())
    }
}


================================================
FILE: nnef/src/transform.rs
================================================
use crate::ast::parse::parse_assignments;
use crate::ast::*;
use crate::deser::{ModelBuilder, Value};
use tract_core::internal::*;
use tract_core::model::TypedModelPatch;
use tract_core::transform::{ModelTransform, ModelTransformFactory};

#[derive(Debug, serde::Deserialize)]
struct PatchConfig {
    body: String,
    #[serde(default)]
    new_outputs: Vec<String>,
}

#[derive(Debug)]
struct PatchTransform(PatchConfig);

impl ModelTransform for PatchTransform {
    fn name(&self) -> StaticName {
        "patch".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        let assignments = parse_assignments(&self.0.body)
            .with_context(|| format!("Parsing wire statement: {:?}", self.0.body))?;

        let lhs_names: Vec<String> = assignments
            .iter()
            .filter_map(|a| match &a.left {
                LValue::Identifier(id) => Some(id.0.clone()),
                _ => None,
            })
            .collect();

        // Run the builder in a block so it (and its borrow of model) is dropped before we mutate model
        let (patch_model, taps, scope) = {
            let framework = crate::nnef().with_tract_core();

            let doc = Document {
                version: "1.0".into(),
                extension: vec![],
                fragments: vec![],
                graph_def: GraphDef {
                    id: Identifier("patch".into()),
                    parameters: vec![],
                    results: vec![],
                    body: vec![],
                },
            };
            let proto_model = ProtoModel {
                doc,
                tensors: Default::default(),
                quantization: None,
                resources: Default::default(),
            };

            let model_ref: &TypedModel = &*model;
            let mut taps = HashMap::<OutletId, OutletId>::default();

            let template = TypedModel { symbols: model.symbols.clone(), ..TypedModel::default() };
            let mut builder = ModelBuilder::new(&framework, &proto_model, template);
            builder.registries.push("tract_core".into());
            builder.scopes.push(HashMap::default());

            let taps_ref = &mut taps;
            builder.wire_resolver =
                Some(Box::new(move |name: &str, patch_model: &mut TypedModel| {
                    let Some(node_id) =
                        model_ref.nodes.iter().find(|n| n.name == name).map(|n| n.id)
                    else {
                        return Ok(None);
                    };
                    let original_outlet = OutletId::new(node_id, 0);
                    let fact = model_ref.outlet_fact(original_outlet)?.clone();
                    let patch_outlet = patch_model.add_source(name, fact)?;
                    taps_ref.insert(patch_outlet, original_outlet);
                    Ok(Some(patch_outlet))
                }));

            builder.wire_body(&assignments).with_context(|| "Executing wire statements")?;

            let scope = builder.scopes.last().unwrap().clone();
            builder.wire_resolver.take();
            let patch_model = std::mem::take(&mut builder.model);
            drop(builder);
            (patch_model, taps, scope)
        };

        // Build the TypedModelPatch
        let mut patch = TypedModelPatch { model: patch_model, taps, ..TypedModelPatch::default() };

        let mut inputs_to_remove = vec![];
        let mut new_output_names = vec![];

        for (i, lhs_name) in lhs_names.iter().enumerate() {
            let patch_outlet = match scope.get(&Identifier(lhs_name.clone())) {
                Some(Value::Wire(o)) => *o,
                _ => continue,
            };

            let is_model_input =
                model.inputs.iter().find(|&&inp| model.node(inp.node).name == *lhs_name).copied();

            if let Some(input_outlet) = is_model_input {
                let expected_fact = model.outlet_fact(input_outlet)?;
                let mut wire = patch_outlet;
                let patch_fact = patch.model.outlet_fact(wire)?.clone();
                // Cast dtype if needed (e.g. TDim → I64)
                if patch_fact.datum_type != expected_fact.datum_type {
                    wire = patch.model.wire_node(
                        format!("{}_cast", lhs_name),
                        tract_core::ops::cast::cast(expected_fact.datum_type),
                        &[wire],
                    )?[0];
                }
                // Reshape if needed (e.g. scalar → [1])
                let wire_fact = patch.model.outlet_fact(wire)?.clone();
                if wire_fact.shape != expected_fact.shape {
                    wire = patch.model.wire_node(
                        format!("{}_reshape", lhs_name),
                        tract_core::ops::change_axes::AxisOp::Reshape(
                            0,
                            wire_fact.shape.to_tvec(),
                            expected_fact.shape.to_tvec(),
                        ),
                        &[wire],
                    )?[0];
                }
                patch.shunt_outside(model, input_outlet, wire)?;
                inputs_to_remove.push(input_outlet);
            } else if self.0.new_outputs.contains(lhs_name) {
                new_output_names.push(lhs_name.clone());
            } else {
                let is_intermediate = i < lhs_names.len() - 1;
                if !is_intermediate {
                    bail!(
                        "Wire '{}' is not a model input and not declared in new_outputs",
                        lhs_name
                    );
                }
            }
        }

        patch.apply(model)?;

        for inp in &inputs_to_remove {
            model.inputs.retain(|o| o != inp);
        }
        for name in &new_output_names {
            let node_id = model.node_id_by_name(name)?;
            model.outputs.push(OutletId::new(node_id, 0));
        }

        model.refresh_output_facts()?;
        Ok(())
    }
}

inventory::submit! {
    ModelTransformFactory {
        name: "patch",
        build_default: || {
            bail!("patch transform requires a 'body' parameter, e.g. patch(body: \"x = some_op(y);\")")
        },
        build: |de| {
            let config: PatchConfig = erased_serde::deserialize(de)
                .map_err(|e| anyhow!("deserializing patch config: {e}"))?;
            Ok(Box::new(PatchTransform(config)))
        },
    }
}


================================================
FILE: nnef/stdlib.nnef
================================================
# Copyright (c) 2017 The Khronos Group Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# tensor declaration operations

fragment external<? = scalar>( shape: integer[] ) -> ( output: tensor<?> );
fragment variable<? = scalar>( shape: integer[], label: string ) -> ( output: tensor<?> );
fragment constant<? = scalar>( shape: integer[], value: ?[] ) ->  ( output: tensor<?> );

fragment update<?>( variable: tensor<?>, value: tensor<?> ) -> ( result: tensor<?> );


# tensor shape operations

fragment reshape<?>( input: tensor<?>, shape: integer[], axis_start: integer = 0, axis_count: integer = -1 ) -> ( output: tensor<?> );
fragment transpose<?>( input: tensor<?>, axes: integer[] ) -> ( output: tensor<?> );
fragment concat<?>( values: tensor<?>[], axis: integer ) -> ( value: tensor<?> );
fragment split<?>( value: tensor<?>, axis: integer, ratios: integer[] ) -> ( values: tensor<?>[] );
fragment slice<?>( input: tensor<?>, axes: integer[], begin: integer[], end: integer[] ) -> ( output: tensor<?> );
fragment squeeze<?>( input: tensor<?>, axes: integer[] ) -> ( output: tensor<?> );
fragment unsqueeze<?>( input: tensor<?>, axes: integer[] ) -> ( output: tensor<?> );
fragment stack<?>( values: tensor<?>[], axis: integer ) -> ( value: tensor<?> );
fragment unstack<?>( value: tensor<?>, axis: integer ) -> ( values: tensor<?>[] );
fragment tile<?>( input: tensor<?>, repeats: integer[] ) -> ( output: tensor<?> );
fragment pad( input: tensor<scalar>, padding: (integer, integer)[], border: string = 'constant', value: scalar = 0.0 ) -> ( output: tensor<scalar> );


# element-wise arithmetic operations

fragment add( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );
fragment sub( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );
fragment mul( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );
fragment div( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );
fragment pow( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> );

fragment exp( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment log( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment sin( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment cos( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment abs( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment sign( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment rcp( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment neg( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment copy<?>( x: tensor<?> ) -> ( y: tensor<?> );

# element-wise comparison operations

fragment lt( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> );
fragment gt( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> );
fragment le( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> );
fragment ge( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> );
fragment eq( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> );
fragment ne( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<logical> );

# element-wise logical operations

fragment and( x: tensor<logical>, y: tensor<logical> ) -> ( z: tensor<logical> );
fragment or( x: tensor<logical>, y: tensor<logical> ) -> ( z: tensor<logical> );
fragment not( x: tensor<logical> ) -> ( y: tensor<logical> );

# element-wise rounding operations

fragment floor( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment ceil( x: tensor<scalar> ) -> ( y: tensor<scalar> );
fragment round( x: tensor<scalar> ) -> ( y: tensor<scalar> );

# element-wise select operation

fragment select<?>( condition: tensor<logical>, true_value: tensor<?>, false_value: tensor<?> ) -> ( output: tensor<?> );

# simplifier operations

fragment sqr( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = x ^ 2.0;
}

fragment sqrt( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = x ^ 0.5;
}

fragment rsqr( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = x ^ -2.0;
}

fragment rsqrt( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = x ^ -0.5;
}

fragment log2( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = log(x) / log(2.0);
}

fragment min( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> )
{
    z = select(x < y, x, y);
}

fragment max( x: tensor<scalar>, y: tensor<scalar> ) -> ( z: tensor<scalar> )
{
    z = select(x > y, x, y);
}

fragment clamp( x: tensor<scalar>, a: tensor<scalar>, b: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = max(min(x, b), a);
}


# matrix multiplication

fragment matmul( A: tensor<scalar>, B: tensor<scalar>, transposeA: logical = false, transposeB: logical = false ) -> ( C: tensor<scalar> );


# sliding-window operations

fragment conv(
    input: tensor<scalar>,
    filter: tensor<scalar>,
    bias: tensor<scalar> = 0.0,
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    groups: integer = 1 )
-> ( output: tensor<scalar> );

fragment deconv(
    input: tensor<scalar>,
    filter: tensor<scalar>,
    bias: tensor<scalar> = 0.0,
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    output_shape: integer[] = [],
    groups: integer = 1 )
-> ( output: tensor<scalar> );


fragment box(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    normalize: logical = false )
-> ( output: tensor<scalar> );

fragment debox(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    output_shape: integer[] = [],
    normalize: logical = false )
-> ( output: tensor<scalar> );


fragment argmax_pool(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [] )
-> ( index: tensor<integer> );


fragment sample(
    input: tensor<scalar>,
    index: tensor<integer>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [] )
-> ( output: tensor<scalar> );

fragment desample(
    input: tensor<scalar>,
    index: tensor<integer>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    output_shape: integer[] = [] )
-> ( output: tensor<scalar> );


# up/down-sampling operations

fragment nearest_downsample( input: tensor<scalar>, factor: integer[] ) -> ( output: tensor<scalar> )
{
    dims = 2 + length_of(factor);
    output = box(input, size = [1] * dims, stride = [1,1] + factor, padding = [(0,0)] * dims);
}

fragment area_downsample( input: tensor<scalar>, factor: integer[] ) -> ( output: tensor<scalar> )
{
    dims = 2 + length_of(factor);
    output = box(input, size = [1,1] + factor, stride = [1,1] + factor, padding = [(0,0)] * dims, normalize = true);
}

fragment nearest_upsample( input: tensor<scalar>, factor: integer[] ) -> ( output: tensor<scalar> )
{
    dims = 2 + length_of(factor);
    output = debox(input, size = [1,1] + factor, stride = [1,1] + factor, padding = [(0,0)] * dims);
}

fragment multilinear_upsample( input: tensor<scalar>, factor: integer[], method: string = 'symmetric', border: string = 'replicate' )
-> ( output: tensor<scalar> );


# reduce operations

fragment sum_reduce( input: tensor<scalar>, axes: integer[], normalize: logical = false ) -> ( output: tensor<scalar> );
fragment max_reduce( input: tensor<scalar>, axes: integer[] ) -> ( output: tensor<scalar> );
fragment min_reduce( input: tensor<scalar>, axes: integer[] ) -> ( output: tensor<scalar> );
fragment argmax_reduce( input: tensor<scalar>, axes: integer[] ) -> ( output: tensor<integer> );
fragment argmin_reduce( input: tensor<scalar>, axes: integer[] ) -> ( output: tensor<integer> );
fragment any_reduce( input: tensor<logical>, axes: integer[] ) -> ( output: tensor<logical> );
fragment all_reduce( input: tensor<logical>, axes: integer[] ) -> ( output: tensor<logical> );

fragment mean_reduce( input: tensor<scalar>, axes: integer[] ) -> ( output: tensor<scalar> )
{
    output = sum_reduce(input, axes = axes, normalize = true);
}

fragment moments( input: tensor<scalar>, axes: integer[] ) -> ( mean: tensor<scalar>, variance: tensor<scalar> )
{
    mean = mean_reduce(input, axes = axes);
    variance = mean_reduce(sqr(input - mean), axes = axes);
}


# activation functions

fragment relu( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = max(x, 0.0);
}

fragment sigmoid( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = 1.0 / (1.0 + exp(-x));
}

fragment tanh( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = (exp(x) - exp(-x)) / (exp(x) + exp(-x));
}

fragment softabs( x: tensor<scalar>, epsilon: scalar ) -> ( y: tensor<scalar> )
{
    y = sqrt(sqr(x) + epsilon);
}

fragment softmax( x: tensor<scalar>, axes: integer[] = [1] ) -> ( y: tensor<scalar> )
{
    m = max_reduce(x, axes = axes);
    e = exp(x - m);
    y = e / sum_reduce(e, axes = axes);
}

fragment softplus( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = log(exp(x) + 1.0);
}

fragment elu( x: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = select(x < 0.0, exp(x) - 1.0, x);
}

fragment prelu( x: tensor<scalar>, alpha: tensor<scalar> ) -> ( y: tensor<scalar> )
{
    y = select(x < 0.0, alpha * x, x);
}

fragment leaky_relu( x: tensor<scalar>, alpha: scalar ) -> ( y: tensor<scalar> )
{
    y = prelu(x, alpha = alpha);
}


# pooling operations

fragment max_pool_with_index(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [] )
-> ( output: tensor<scalar>, index: tensor<integer> )
{
    index = argmax_pool(input, size = size, border = border, padding = padding, stride = stride, dilation = dilation);
    output = sample(input, index, size = size, border = border, padding = padding, stride = stride, dilation = dilation);
}

fragment max_pool(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [] )
-> ( output: tensor<scalar> )
{
    output, index = max_pool_with_index(input, size = size, border = border, padding = padding, stride = stride, dilation = dilation);
}

fragment avg_pool(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [] )
-> ( output: tensor<scalar> )
{
    output = box(input, size = size, border = border, padding = padding, stride = stride, dilation = dilation, normalize = true);
}

fragment rms_pool(
    input: tensor<scalar>,
    size: integer[],
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [] )
-> ( output: tensor<scalar> )
{
    output = sqrt(avg_pool(sqr(input), size = size, border = border, padding = padding, stride = stride, dilation = dilation));
}


# linear operations

fragment linear(
    input: tensor<scalar>,
    filter: tensor<scalar>,
    bias: tensor<scalar> = 0.0 )
-> ( output: tensor<scalar> )
{
    output = matmul(input, filter, transposeB = true) + bias;
}

fragment separable_conv(
    input: tensor<scalar>,
    plane_filter: tensor<scalar>,
    point_filter: tensor<scalar>,
    bias: tensor<scalar> = 0.0,
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    groups: integer = 1 )
-> ( output: tensor<scalar> )
{
    filtered = conv(input, plane_filter, border = border, padding = padding,
                    stride = stride, dilation = dilation, groups = 0);
    output = conv(filtered, point_filter, bias, groups = groups);
}

fragment separable_deconv(
    input: tensor<scalar>,
    plane_filter: tensor<scalar>,
    point_filter: tensor<scalar>,
    bias: tensor<scalar> = 0.0,
    border: string = 'constant',
    padding: (integer,integer)[] = [],
    stride: integer[] = [],
    dilation: integer[] = [],
    output_shape: integer[] = [],
    groups: integer = 1 )
-> ( output: tensor<scalar> )
{
    filtered = deconv(input, point_filter, groups = groups);
    output = deconv(filtered, plane_filter, bias, border = border, padding = padding,
                    stride = stride, dilation = dilation, output_shape = output_shape, groups = 0);
}


# normalization operations

fragment local_response_normalization(
    input: tensor<scalar>,
    size: integer[],
    alpha: scalar = 1.0,
    beta: scalar = 0.5,
    bias: scalar = 1.0 )
-> ( output: tensor<scalar> )
{
    sigma = bias + alpha * box(sqr(input), size = size, normalize = true);
    output = input / (sigma ^ beta);
}

fragment local_mean_normalization( input: tensor<scalar>, size: integer[] ) -> ( output: tensor<scalar> )
{
    mean = box(input, size = size, normalize = true);
    output = sub(input, mean);
}

fragment local_variance_normalization( input: tensor<scalar>, size: integer[], bias: scalar = 0.0, epsilon: scalar = 0.0 ) -> ( output: tensor<scalar> )
{
    sigma = sqrt(box(sqr(input), size = size, normalize = true));
    output = input / max(sigma + bias, epsilon);
}

fragment local_contrast_normalization( input: tensor<scalar>, size: integer[], bias: scalar = 0.0, epsilon: scalar = 0.0 ) -> ( output: tensor<scalar> )
{
    centered = local_mean_normalization(input, size = size);
    output = local_variance_normalization(centered, size = size, bias = bias, epsilon = epsilon);
}

fragment l1_normalization( input: tensor<scalar>, axes: integer[], bias: scalar = 0.0, epsilon: scalar = 0.0 ) -> ( output: tensor<scalar> )
{
    sigma = sum_reduce(abs(input), axes = axes);
    output = input / max(sigma + bias, epsilon);
}

fragment l2_normalization( input: tensor<scalar>, axes: integer[], bias: scalar = 0.0, epsilon: scalar = 0.0 ) -> ( output: tensor<scalar> )
{
    sigma = sqrt(sum_reduce(sqr(input), axes = axes));
    output = input / max(sigma + bias, epsilon);
}

fragment batch_normalization( input: tensor<scalar>, mean: tensor<scalar>, variance: tensor<scalar>, offset: tensor<scalar>, scale: tensor<scalar>, epsilon: scalar )
-> ( output: tensor<scalar> )
{
    output = offset + scale * (input - mean) / sqrt(variance + epsilon);
}


# roi operations

fragment avg_roi_pool(
    input: tensor<scalar>,
    rois: tensor<scalar>,
    batch_index: tensor<integer>,
    output_size: integer[] )
-> ( output: tensor<scalar> );

fragment max_roi_pool(
    input: tensor<scalar>,
    rois: tensor<scalar>,
    batch_index: tensor<integer>,
    output_size: integer[] )
-> ( output: tensor<scalar> );

fragment roi_resample(
    input: tensor<scalar>,
    rois: tensor<scalar>,
    batch_index: tensor<integer>,
    output_size: integer[],
    method: string = 'symmetric' )
-> ( output: tensor<scalar> );

fragment avg_roi_align(
    input: tensor<scalar>,
    rois: tensor<scalar>,
    batch_index: tensor<integer>,
    output_size: integer[],
    sampling_rate: integer[],
    resize_method: string = 'symmetric' )
-> ( output: tensor<scalar> )
{
    size = [for i in range_of(output_size) yield output_size[i] * sampling_rate[i]];
    resized = roi_resample(input, rois, batch_index, output_size = size,
                         method = resize_method);
    output = avg_pool(resized, size = sampling_rate, stride = sampling_rate);
}

fragment max_roi_align(
    input: tensor<scalar>,
    rois: tensor<scalar>,
    batch_index: tensor<integer>,
    output_size: integer[],
    sampling_rate: integer[],
    resize_method: string = 'symmetric' )
-> ( output: tensor<scalar> )
{
    size = [for i in range_of(output_size) yield output_size[i] * sampling_rate[i]];
    resized = roi_resample(input, rois, batch_index, output_size = size,
                         method = resize_method);
    output = max_pool(resized, size = sampling_rate, stride = sampling_rate);
}


# quantization operations

fragment min_max_linear_quantize(
    x: tensor<scalar>,
    min: tensor<scalar>,
    max: tensor<scalar>,
    bits: integer,
    signed: logical,
    symmetric: logical )
-> ( y: tensor<scalar> )
{
    r = scalar(2 ^ bits - 1 - integer(signed && symmetric));
    z = clamp(x, min, max);
    p = scalar(2 ^ (bits - 1) - integer(symmetric) if signed else 0);
    q = round((z - min) / (max - min) * r) - p;
    y = (q + p) / r * (max - min) + min;
}

fragment zero_point_linear_quantize(
    x: tensor<scalar>,
    zero_point: integer,
    scale: scalar,
    bits: integer,
    signed: logical,
    symmetric: logical )
-> ( y: tensor<scalar> )
{
    z = round(x / scale) + scalar(zero_point);
    r = scalar(2 ^ (bits - 1) - 1 if signed else 2 ^ bits - 1);
    q = clamp(z, 0.0 if !signed else -r if symmetric else -r - 1.0, r);
    y = (q - scalar(zero_point)) * scale;
}

fragment linear_quantize(
    x: tensor<scalar>,
    min: tensor<scalar>,
    max: tensor<scalar>,
    bits: integer )
-> ( y: tensor<scalar> )
{
    y = min_max_linear_quantize(x, min = min, max = max, bits = bits,
                                signed = false, symmetric = false);
}

fragment logarithmic_quantize(
    x: tensor<scalar>,
    max: tensor<scalar>,
    bits: integer )
-> ( y: tensor<scalar> )
{
    m = ceil(log2(max));
    r = scalar(2 ^ bits - 1);
    q = round(clamp(log2(abs(x)), m - r, m));
    y = sign(x) * 2.0 ^ q;
}


# misc operations

fragment copy_n<?>( x: tensor<?>, times: integer ) -> ( y: tensor<?>[] )
{
    y = [x] * times;
}

fragment add_n( x: tensor<scalar>[] ) -> ( y: tensor<scalar> )
{
    y = x[0] + add_n(x[1:]) if length_of(x) > 0 else constant(shape = [1], value = [0.0]);
}


================================================
FILE: nnef/tests/alexnet.nnef
================================================
# Copyright (c) 2017 The Khronos Group Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


version 1.0;

graph alexnet( input ) -> ( output )
{
    input = external(shape = [1, 3, 224, 224]);
    kernel1 = variable(shape = [64, 3, 11, 11], label = 'alexnet_v2/conv1/kernel');
    bias1 = variable(shape = [1, 64], label = 'alexnet_v2/conv1/bias');
    conv1 = conv(input, kernel1, bias1, padding = [(0, 0), (0, 0)], border = 'constant', stride = [4, 4], dilation = [1, 1]);
    relu1 = relu(conv1);
    pool1 = max_pool(relu1, size = [1, 1, 3, 3], padding = [(0, 0), (0, 0), (0, 0), (0, 0)], border = 'ignore', stride = [1, 1, 2, 2]);
    kernel2 = variable(shape = [192, 64, 5, 5], label = 'alexnet_v2/conv2/kernel');
    bias2 = variable(shape = [1, 192], label = 'alexnet_v2/conv2/bias');
    conv2 = conv(pool1, kernel2, bias2, padding = [], border = 'constant', stride = [1, 1], dilation = [1, 1]);
    relu2 = relu(conv2);
    pool2 = max_pool(relu2, size = [1, 1, 3, 3], padding = [(0, 0), (0, 0), (0, 0), (0, 0)], border = 'ignore', stride = [1, 1, 2, 2]);
    kernel3 = variable(shape = [384, 192, 3, 3], label = 'alexnet_v2/conv3/kernel');
    bias3 = variable(shape = [1, 384], label = 'alexnet_v2/conv3/bias');
    conv3 = conv(pool2, kernel3, bias3, padding = [], border = 'constant', stride = [1, 1], dilation = [1, 1]);
    relu3 = relu(conv3);
    kernel4 = variable(shape = [384, 384, 3, 3], label = 'alexnet_v2/conv4/kernel');
    bias4 = variable(shape = [1, 384], label = 'alexnet_v2/conv4/bias');
    conv4 = conv(relu3, kernel4, bias4, padding = [], border = 'constant', stride = [1, 1], dilation = [1, 1]);
    relu4 = relu(conv4);
    kernel5 = variable(shape = [256, 384, 3, 3], label = 'alexnet_v2/conv5/kernel');
    bias5 = variable(shape = [1, 256], label = 'alexnet_v2/conv5/bias');
    conv5 = conv(relu4, kernel5, bias5, padding = [], border = 'constant', stride = [1, 1], dilation = [1, 1]);
    relu5 = relu(conv5);
    pool3 = max_pool(relu5, size = [1, 1, 3, 3], padding = [(0, 0), (0, 0), (0, 0), (0, 0)], border = 'ignore', stride = [1, 1, 2, 2]);
    kernel6 = variable(shape = [4096, 256, 5, 5], label = 'alexnet_v2/fc6/kernel');
    bias6 = variable(shape = [1, 4096], label = 'alexnet_v2/fc6/bias');
    conv6 = conv(pool3, kernel6, bias6, padding = [(0, 0), (0, 0)], border = 'constant', stride = [1, 1], dilation = [1, 1]);
    relu6 = relu(conv6);
    kernel7 = variable(shape = [4096, 4096, 1, 1], label = 'alexnet_v2/fc7/kernel');
    bias7 = variable(shape = [1, 4096], label = 'alexnet_v2/fc7/bias');
    conv7 = conv(relu6, kernel7, bias7, padding = [], border = 'constant', stride = [1, 1], dilation = [1, 1]);
    relu7 = relu(conv7);
    kernel8 = variable(shape = [1000, 4096, 1, 1], label = 'alexnet_v2/fc8/kernel');
    bias8 = variable(shape = [1, 1000], label = 'alexnet_v2/fc8/bias');
    output = conv(relu7, kernel8, bias8, padding = [], border = 'constant', stride = [1, 1], dilation = [1, 1]);
}


================================================
FILE: nnef/tests/parse.rs
================================================
use tract_nnef::ast::dump;
use tract_nnef::ast::parse;
use tract_nnef::internal::Nnef;

#[test]
fn parse_alexnet() {
    let content = std::fs::read_to_string("tests/alexnet.nnef").unwrap();
    parse::parse_document(&content).unwrap();
}

#[test]
fn parse_dump_parse_alexnet() {
    let content = std::fs::read_to_string("tests/alexnet.nnef").unwrap();
    let ast = parse::parse_document(&content).unwrap();
    let mut dumped = vec![];
    dump::Dumper::new(&Nnef::default(), &mut dumped).document(&ast).unwrap();

    let dumped = String::from_utf8(dumped).unwrap();
    let ast2 = parse::parse_document(&dumped).unwrap();

    assert_eq!(ast, ast2);
}

#[test]
fn parse_stdlib() {
    let content = std::fs::read_to_string("stdlib.nnef").unwrap();
    parse::parse_fragments(&content).unwrap();
}

#[test]
fn parse_dump_parse_stdlib() {
    let content = std::fs::read_to_string("stdlib.nnef").unwrap();
    let mut ast = parse::parse_fragments(&content).unwrap();
    ast.sort_by_key(|f| f.decl.id.clone());
    let mut dumped = vec![];
    dump::Dumper::new(&Nnef::default(), &mut dumped).fragments(&ast).unwrap();

    let dumped = String::from_utf8(dumped).unwrap();
    println!("{dumped}");
    let mut ast2 = parse::parse_fragments(&dumped).unwrap();

    assert_eq!(ast.len(), ast2.len());
    ast2.sort_by_key(|f| f.decl.id.clone());
    for (a, b) in ast.iter().zip(ast2.iter()) {
        assert_eq!(a, b);
    }
}


================================================
FILE: onnx/Cargo.toml
================================================
[package]
name = "tract-onnx"
version = "0.23.0-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks", "ONNX" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
exclude = [ "test_cases" ]
# build = "build-proto-rs"
rust-version.workspace = true


[badges]
maintenance = { status = "actively-developed" }

[dependencies]
bytes.workspace = true
dyn-eq.workspace = true
derive-new.workspace = true
log.workspace = true
memmap2.workspace = true
num-integer.workspace = true
prost.workspace = true
smallvec.workspace = true
tract-nnef.workspace = true
tract-hir.workspace = true
tract-onnx-opl.workspace = true
tract-extra.workspace = true

[dev-dependencies]
criterion.workspace = true
env_logger.workspace = true
rand.workspace = true
rayon.workspace = true

# [build-dependencies]
# protobuf-src = "1.0.5+3.19.3"
# prost-build = "0.14"

[[bench]]
name = "linear_classifier"
harness = false

[[bench]]
name = "linear_regressor"
harness = false

[features]
default = []
getrandom-js = ["tract-onnx-opl/getrandom-js"]


================================================
FILE: onnx/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: onnx/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: onnx/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: onnx/benches/linear_classifier.rs
================================================
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};

use rand::RngExt;
use rayon::prelude::*;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use tract_hir::internal::*;
use tract_onnx::tract_core::dims;

fn bench_linear_classifier(c: &mut Criterion) {
    let mut group = c.benchmark_group("onnx_linear_classifier");

    // Load model
    let model_path = "test_cases/linear_classifier/model.onnx";
    let onnx_path = PathBuf::from(&model_path);
    let model = tract_onnx::onnx().model_for_path(&onnx_path).unwrap();

    // Configure dimensions
    let n = model.sym("N");
    let model = model
        .with_input_fact(0, f32::fact(dims!(n, 12)).into())
        .unwrap()
        .with_output_fact(0, i64::fact(dims!(n)).into())
        .unwrap()
        .with_output_fact(1, f32::fact(dims!(n, 14)).into())
        .unwrap()
        .into_optimized()
        .unwrap();

    let input_fact = model.input_fact(0).unwrap().clone();
    let shape: TVec<usize> = input_fact
        .shape
        .as_concrete()
        .map(|s| s.iter().copied().collect())
        .unwrap_or_else(|| tvec![1, 12]);
    let num_features = shape[1];

    // Pre-generate random input tensors
    let mut rng = rand::rng();
    let input_tensors: Arc<Vec<Tensor>> = Arc::new(
        (0..1_000_000)
            .map(|_| {
                let sample: Vec<f32> =
                    (0..num_features).map(|_| rng.random_range(-30.0f32..30.0f32)).collect();
                Tensor::from_shape(&shape, &sample).unwrap()
            })
            .collect(),
    );

    let runnable = Arc::new(model.clone().into_runnable().unwrap());

    let iteration_counts = vec![1_000, 10_000, 100_000, 1_000_000];

    for &iterations in &iteration_counts {
        group.bench_function(BenchmarkId::new("load_opt_run_parallel", iterations), |b| {
            let runnable = Arc::clone(&runnable);
            let tensors = Arc::clone(&input_tensors);

            b.iter_custom(|_| {
                let start = Instant::now();

                (0..iterations).into_par_iter().for_each(|i| {
                    let runnable = Arc::clone(&runnable);
                    let input_val = tensors[i % 1_000_000].clone().into_tvalue();
                    let _ = runnable.run(tvec!(input_val)).unwrap();
                });

                start.elapsed()
            });
        });
    }

    group.finish();
}

criterion_group!(benches, bench_linear_classifier);
criterion_main!(benches);


================================================
FILE: onnx/benches/linear_regressor.rs
================================================
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};

use rand::RngExt;
use rayon::prelude::*;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use tract_hir::internal::*;
use tract_onnx::tract_core::dims;

fn bench_linear_regressor(c: &mut Criterion) {
    let mut group = c.benchmark_group("onnx_linear_regressor");

    // Load model
    let model_path = "test_cases/linear_regressor/model.onnx";
    let onnx_path = PathBuf::from(&model_path);
    let model = tract_onnx::onnx().model_for_path(&onnx_path).unwrap();

    // Configure dimensions
    let n = model.sym("N");
    let model = model
        .with_input_fact(0, f32::fact(dims!(n, 21)).into())
        .unwrap()
        .with_output_fact(0, f32::fact(dims!(n, 1)).into())
        .unwrap()
        .into_optimized()
        .unwrap();

    // Configure dimensions
    let input_fact = model.input_fact(0).unwrap().clone();
    let shape: TVec<usize> = input_fact
        .shape
        .as_concrete()
        .map(|s| s.iter().copied().collect())
        .unwrap_or_else(|| tvec![1, 21]);
    let num_features = shape[1];

    // Pre-generate random input tensors
    let mut rng = rand::rng();
    let input_tensors: Arc<Vec<Tensor>> = Arc::new(
        (0..1_000_000)
            .map(|_| {
                let sample: Vec<f32> =
                    (0..num_features).map(|_| rng.random_range(-30.0f32..30.0f32)).collect();
                Tensor::from_shape(&shape, &sample).unwrap()
            })
            .collect(),
    );

    let runnable = Arc::new(model.clone().into_runnable().unwrap());

    let iteration_counts = vec![1_000, 10_000, 100_000, 1_000_000];

    for &iterations in &iteration_counts {
        group.bench_function(BenchmarkId::new("load_opt_run_parallel", iterations), |b| {
            let runnable = Arc::clone(&runnable);
            let tensors = Arc::clone(&input_tensors);

            b.iter_custom(|_| {
                let start = Instant::now();

                (0..iterations).into_par_iter().for_each(|i| {
                    let runnable = Arc::clone(&runnable);
                    let input_val = tensors[i % 1_000_000].clone().into_tvalue();
                    let _ = runnable.run(tvec!(input_val)).unwrap();
                });

                start.elapsed()
            });
        });
    }

    group.finish();
}

criterion_group!(benches, bench_linear_regressor);
criterion_main!(benches);


================================================
FILE: onnx/build-proto.rs
================================================
fn main() {
    let _ = std::fs::create_dir_all("src/prost");
    std::env::set_var("PROTOC", protobuf_src::protoc());
    prost_build::Config::new()
        .out_dir("src/prost")
        .compile_protos(&["protos/onnx/onnx.proto3"], &["protos/"])
        .unwrap();
}


================================================
FILE: onnx/protos/onnx/onnx-operators.proto3
================================================
//
// WARNING: This file is automatically generated!  Please edit onnx.in.proto.
//


// Copyright (c) Facebook Inc. and Microsoft Corporation.
// Licensed under the MIT license.

syntax = "proto3";

package onnx;
import "onnx/onnx.proto3";

//
// This file contains the proto definitions for OperatorSetProto and
// OperatorProto.  OperatorSetProtos are used to describe a versioned
// set of operators that can be used by a ModelProto.
//
// Like ModelProto, OperatorSetProto is defined as a top-level file/wire
// format, however their usage is different.
//
// ModelProto files are used to describe executable graphs that can be
// executed directly by a framework, runtime, or engine.
//
// OperatorSetProto files are used to describe a set of operators that are
// available in a given environment.  The file TBD.TBD is the OperatorSetProto
// that describes the ONNX standard operators.
//

// Operator/function status.
enum OperatorStatus {
    EXPERIMENTAL = 0;
    STABLE = 1;
}

message FunctionProto {
  // The name of the function, similar usage of op_type in OperatorProto.
  string name = 1;
  
  // The first version of a function set which contains this function.
  // When there's any breaking change for this function, the function set
  // contains the function needs to bump its version, and since_version of
  // the updated function will be changed to the updated function set version.  
  int64 since_version = 2;

  // This field indicates whether the syntax, semantics, or presence
  // of this function is in an experimental or stable stage. Once an
  // function is published as STABLE, its syntax and semantics MUST NOT
  // change in subsequent versions of the operator set.
  // When a function is published as EXPERIMENTAL, the syntax and semantics
  // of the function MAY change across operator set versions.
  // Functions "become" stable by deprecating the experimental version and
  // introducing a new stable function with the same name.
  OperatorStatus status = 3;

  // The inputs and outputs of the function.
  repeated string input = 4;
  repeated string output = 5;

  // The attributes of the function.
  repeated string attribute= 6;
  
  // The nodes in the function.
  repeated NodeProto node = 7;
  // A human-readable documentation for this function. Markdown is allowed.
  string doc_string = 8;
}

// An OperatorProto represents the immutable specification of the signature
// and semantics of an operator.
//
// Operators are declared as part of an OperatorSet, which also defines the
// domain name for the set.
//
// Operators are uniquely identified by a three part identifier
//   (domain, op_type, since_version)
// where
//   *domain* is the domain of an operator set that
//      contains this operator specification.
//
//   *op_type* is the name of the operator as referenced by a
//      NodeProto.op_type
//
//   *since_version* is the version of the operator set that
//      this operator was initially declared in.
//
message OperatorProto {  

  // The name of the operator within a domain.
  // This field MUST be present in this version of the IR.
  string op_type = 1;

  // The version of the operator set that first introduced this
  // operator. This value MUST be the same value as the
  // opset_version of the operator set that first published this operator.
  // Subsequent versions of the operator set MUST NOT alter the signature
  // or semantics of the operator once published as STABLE.
  // This field MUST be present in this version of the IR.
  int64 since_version = 2;

  // This field indicates whether the syntax, semantics, or presence
  // of this operator is in an experimental or stable stage. Once an
  // operator is published as STABLE, it's syntax and semantics MUST NOT
  // change in subsequent versions of the operator set.
  // When an operator is published as EXPERIMENTAL, the syntax and semantics
  // of the operator MAY change across operator set versions.
  // Operators "become" stable by deprecating the experimental version and
  // introducing a new stable operator with the same op_type.
  OperatorStatus status = 3;

  // Eventually we will declare the signature of the operator here

  // A human-readable documentation for this operator. Markdown is allowed.
  string doc_string = 10;
}

// An OperatorSetProto represents an immutable set of immutable operator
// specifications.
//
// The domain of the set (OperatorSetProto.domain) is a reverse-DNS name
// that disambiguates operator sets defined by independent entities.
//
// The version of the set (opset_version) is a monotonically increasing
// integer that indicates changes to the membership of the operator set.
//
//
// Operator sets are uniquely identified by a two part identifier (domain, opset_version)
//
// Like ModelProto, OperatorSetProto is intended as a top-level file/wire format,
// and thus has the standard format headers in addition to the operator set information.
//
message OperatorSetProto {
  // All OperatorSetProtos start with a distingushed byte sequence to disambiguate
  // protobuf files containing OperatorSets from other content.
  // This field MUST be "ONNXOPSET"
  // This field MUST be present in this version of the IR
  string magic = 1;

  // All OperatorSetProtos indicate the version of the IR syntax and semantics
  // they adhere to. It is always IR_VERSION.
  // This field MUST be present in this version of the IR
  int32 ir_version = 2;

  // The prerelease component of the SemVer of the IR.
  // This field MAY be absent in this version of the IR
  string ir_version_prerelease = 3;

  // The build metadata component of the SemVer of the IR.
  // This field MAY be absent in this version of the IR
  string ir_build_metadata = 7;

  // Domain name of the operator set, in reverse DNS form (e.g., com.acme.dnnops).
  string domain = 4;

  // The version of the set of operators. This is a simple int value
  // that is monotonically increasing as new versions of operator set
  // are published. All operators in this set MUST have version
  // numbers no greater than opset_version.
  int64 opset_version = 5;

  // A human-readable documentation for this set of operators. Markdown is allowed.
  string doc_string = 6;

  // The operators specified by this operator set.
  // The (name, version) MUST be unique across all OperatorProtos in operator
  repeated OperatorProto operator = 8;
  
  // The functions specified by this operator set.
  // The (name, version) MUST be unique across all OperatorProtos/FunctionProtos in operator/functions
  repeated FunctionProto functions = 9;
}

================================================
FILE: onnx/protos/onnx/onnx.proto
================================================
//
// WARNING: This file is automatically generated!  Please edit onnx.in.proto.
//


// Copyright (c) Facebook Inc. and Microsoft Corporation.
// Licensed under the MIT license.

syntax = "proto2";

package onnx;

// Overview
//
// ONNX is an open specification that is comprised of the following components:
//
// 1)  A definition of an extensible computation graph model.
// 2)  Definitions of standard data types.
// 3)  Definitions of built-in operators.
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
// Intermediate Representation, or 'IR' for short. 
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.

// Notes
//
// Release
//
// We are still in the very early stage of defining ONNX. The current
// version of ONNX is a starting point. While we are actively working
// towards a complete spec, we would like to get the community involved
// by sharing our working version of ONNX.
//
// Protobuf compatibility
// 
// To simplify framework compatibility, ONNX is defined using the subset of protobuf 
// that is compatible with both protobuf v2 and v3. This means that we do not use any
// protobuf features that are only available in one of the two versions.
//
// Here are the most notable contortions we have to carry out to work around
// these limitations:
//
//   - No 'map' (added protobuf 3.0). We instead represent mappings as lists
//     of key-value pairs, where order does not matter and duplicates
//     are not allowed.


// Versioning
//
// ONNX versioning is specified in docs/IR.md and elaborated on in docs/Versioning.md
//
// To be compatible with both proto2 and proto3, we will use a version number
// that is not defined by the default value but an explicit enum number.
enum Version {
  // proto3 requires the first enum value to be zero.
  // We add this just to appease the compiler.
  _START_VERSION = 0;
  // The version field is always serialized and we will use it to store the
  // version that the  graph is generated from. This helps us set up version
  // control. 
  // For the IR, we are using simple numbers starting with with 0x00000001, 
  // which was the version we published on Oct 10, 2017.
  IR_VERSION_2017_10_10 = 0x0000000000000001;

  // IR_VERSION 2 published on Oct 30, 2017
  // - Added type discriminator to AttributeProto to support proto3 users
  IR_VERSION_2017_10_30 = 0x0000000000000002;

  // IR VERSION 3 published on Nov 3, 2017
  // - For operator versioning:
  //    - Added new message OperatorSetIdProto
  //    - Added opset_import in ModelProto
  // - For vendor extensions, added domain in NodeProto
  IR_VERSION = 0x0000000000000003;
}

// Attributes
//
// A named attribute containing either singular float, integer, string, graph,
// and tensor values, or repeated float, integer, string, graph, and tensor values.
// An AttributeProto MUST contain the name field, and *only one* of the
// following content fields, effectively enforcing a C/C++ union equivalent.
message AttributeProto {

  // Note: this enum is structurally identical to the OpSchema::AttrType
  // enum defined in schema.h.  If you rev one, you likely need to rev the other.
  enum AttributeType {
    UNDEFINED = 0;
    FLOAT = 1;
    INT = 2;
    STRING = 3;
    TENSOR = 4;
    GRAPH = 5;

    FLOATS = 6;
    INTS = 7;
    STRINGS = 8;
    TENSORS = 9;
    GRAPHS = 10;
  }

  // The name field MUST be present for this version of the IR.
  optional string name = 1;           // namespace Attribute
 
  // if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
  // In this case, this AttributeProto does not contain data, and it's a reference of attribute
  // in parent scope.
  // NOTE: This should ONLY be used in function (sub-graph). It's invalid to be used in main graph.
  optional string ref_attr_name = 21;

  // A human-readable documentation for this attribute. Markdown is allowed.
  optional string doc_string = 13;

  // The type field MUST be present for this version of the IR.
  // For 0.0.1 versions of the IR, this field was not defined, and
  // implementations needed to use has_field hueristics to determine
  // which value field was in use.  For IR_VERSION 0.0.2 or later, this
  // field MUST be set and match the f|i|s|t|... field in use.  This
  // change was made to accomodate proto3 implementations.
  optional AttributeType type = 20;   // discriminator that indicates which field below is in use

  // Exactly ONE of the following fields must be present for this version of the IR
  optional float f = 2;               // float
  optional int64 i = 3;               // int
  optional bytes s = 4;               // UTF-8 string
  optional TensorProto t = 5;         // tensor value
  optional GraphProto g = 6;          // graph
  // Do not use field below, it's deprecated.
  // optional ValueProto v = 12;         // value - subsumes everything but graph

  repeated float floats = 7;          // list of floats
  repeated int64 ints = 8;            // list of ints
  repeated bytes strings = 9;         // list of UTF-8 strings
  repeated TensorProto tensors = 10;  // list of tensors
  repeated GraphProto graphs = 11;    // list of graph
}

// Defines information on value, including the name, the type, and
// the shape of the value.
message ValueInfoProto {
  // This field MUST be present in this version of the IR.
  optional string name = 1;     // namespace Value
  // This field MUST be present in this version of the IR.
  optional TypeProto type = 2;
  // A human-readable documentation for this value. Markdown is allowed.
  optional string doc_string = 3;
}

// Nodes
//
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
// For example, it can be a node of type "Conv" that takes in an image, a filter 
// tensor and a bias tensor, and produces the convolved output.
message NodeProto {
  repeated string input = 1;    // namespace Value
  repeated string output = 2;   // namespace Value

  // An optional identifier for this node in a graph.
  // This field MAY be absent in ths version of the IR.
  optional string name = 3;     // namespace Node

  // The symbolic identifier of the Operator to execute.
  optional string op_type = 4;  // namespace Operator
  // The domain of the OperatorSet that specifies the operator named by op_type.
  optional string domain = 7;   // namespace Domain

  // Additional named attributes.
  repeated AttributeProto attribute = 5;

  // A human-readable documentation for this node. Markdown is allowed.
  optional string doc_string = 6;
}

// Models
//
// ModelProto is a top-level file/container format for bundling a ML model and
// associating its computation graph with metadata.
//
// The semantics of the model are described by the associated GraphProto.
message ModelProto {
  // The version of the IR this model targets. See Version enum above.
  // This field MUST be present.
  optional int64 ir_version = 1;

  // The OperatorSets this model relies on.
  // All ModelProtos MUST have at least one entry that
  // specifies which version of the ONNX OperatorSet is
  // being imported.
  //
  // All nodes in the ModelProto's graph will bind against the operator
  // with the same-domain/same-op_type operator with the HIGHEST version
  // in the referenced operator sets.
  repeated OperatorSetIdProto opset_import = 8;

  // The name of the framework or tool used to generate this model.
  // This field SHOULD be present to indicate which implementation/tool/framework
  // emitted the model.
  optional string producer_name = 2;

  // The version of the framework or tool used to generate this model.
  // This field SHOULD be present to indicate which implementation/tool/framework
  // emitted the model.
  optional string producer_version = 3;

  // Domain name of the model.
  // We use reverse domain names as name space indicators. For example:
  // `com.facebook.fair` or `com.microsoft.cognitiveservices`
  //
  // Together with `model_version` and GraphProto.name, this forms the unique identity of
  // the graph.
  optional string domain = 4;

  // The version of the graph encoded. See Version enum below.
  optional int64 model_version = 5;

  // A human-readable documentation for this model. Markdown is allowed.
  optional string doc_string = 6;

  // The parameterized graph that is evaluated to execute the model.
  optional GraphProto graph = 7;

  // Named metadata values; keys should be distinct.
  repeated StringStringEntryProto metadata_props = 14;
};

// StringStringEntryProto follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message StringStringEntryProto {
  optional string key = 1;
  optional string value= 2;
};

// Graphs
//
// A graph defines the computational logic of a model and is comprised of a parameterized 
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
message GraphProto {
  // The nodes in the graph, sorted topologically.
  repeated NodeProto node = 1;

  // The name of the graph.
  optional string name = 2;   // namespace Graph

  // A list of named tensor values, used to specify constant inputs of the graph.
  // Each TensorProto entry must have a distinct name (within the list) that
  // also appears in the input list.
  repeated TensorProto initializer = 5;

  // A human-readable documentation for this graph. Markdown is allowed.
  optional string doc_string = 10;

  // The inputs and outputs of the graph.
  repeated ValueInfoProto input = 11;
  repeated ValueInfoProto output = 12;

  // Information for the values in the graph. The ValueInfoProto.name's
  // must be distinct. It is optional for a value to appear in value_info list.
  repeated ValueInfoProto value_info = 13;

  // DO NOT USE the following fields, they were deprecated from earlier versions.
  // repeated string input = 3;
  // repeated string output = 4;
  // optional int64 ir_version = 6;
  // optional int64 producer_version = 7;
  // optional string producer_tag = 8;
  // optional string domain = 9;
}

// Tensors
//
// A serialized tensor value.
message TensorProto {
  enum DataType {
    UNDEFINED = 0;
    // Basic types.
    FLOAT = 1;   // float
    UINT8 = 2;   // uint8_t
    INT8 = 3;    // int8_t
    UINT16 = 4;  // uint16_t
    INT16 = 5;   // int16_t
    INT32 = 6;   // int32_t
    INT64 = 7;   // int64_t
    STRING = 8;  // string
    BOOL = 9;    // bool

    // Advanced types
    FLOAT16 = 10;
    DOUBLE = 11;
    UINT32 = 12;
    UINT64 = 13;
    COMPLEX64 = 14;     // complex with float32 real and imaginary components
    COMPLEX128 = 15;    // complex with float64 real and imaginary components
    // Future extensions go here.
  }

  // The shape of the tensor.
  repeated int64 dims = 1;

  // The data type of the tensor.
  optional DataType data_type = 2;

  // For very large tensors, we may want to store them in chunks, in which
  // case the following fields will specify the segment that is stored in
  // the current TensorProto.
  message Segment {
    optional int64 begin = 1;
    optional int64 end = 2;
  }
  optional Segment segment = 3;

  // Tensor content must be organized in row-major order.
  //
  // Depending on the data_type field, exactly one of the fields below with
  // name ending in _data is used to store the elements of the tensor.

  // For float and complex64 values
  // Complex64 tensors are encoded as a single array of floats,
  // with the real components appearing in odd numbered positions,
  // and the corresponding imaginary component apparing in the
  // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
  // is encoded as [1.0, 2.0 ,3.0 ,4.0]
  // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
  repeated float float_data = 4 [packed = true];

  // For int32, uint8, int8, uint16, int16, bool, and float16 values
  // float16 values must be bit-wise converted to an uint16_t prior
  // to writing to the buffer.
  // When this field is present, the data_type field MUST be
  // INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT16
  repeated int32 int32_data = 5 [packed = true];

  // For strings.
  // Each element of string_data is a UTF-8 encoded Unicode
  // string. No trailing null, no leading BOM. The protobuf "string"
  // scalar type is not used to match ML community conventions.
  // When this field is present, the data_type field MUST be STRING
  repeated bytes string_data = 6;

  // For int64.
  // When this field is present, the data_type field MUST be INT64
  repeated int64 int64_data = 7 [packed = true];

  // Optionally, a name for the tensor.
  optional string name = 8; // namespace Value

  // A human-readable documentation for this tensor. Markdown is allowed.
  optional string doc_string = 12;

  // Serializations can either use one of the fields above, or use this
  // raw bytes field. The only exception is the string case, where one is
  // required to store the content in the repeated bytes string_data field.
  //
  // When this raw_data field is used to store tensor value, elements MUST
  // be stored in as fixed-width, little-endian order.
  // Floating-point data types MUST be stored in IEEE 754 format.
  // Complex64 elements must be written as two consecutive FLOAT values, real component first.
  // Complex128 elements must be written as two consecutive DOUBLE values, real component first.
  // Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
  //
  // Note: the advantage of specific field rather than the raw_data field is
  // that in some cases (e.g. int data), protobuf does a better packing via
  // variable length storage, and may lead to smaller binary footprint.
  // When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
  optional bytes raw_data = 9;

  // For double
  // Complex64 tensors are encoded as a single array of doubles,
  // with the real components appearing in odd numbered positions,
  // and the corresponding imaginary component apparing in the
  // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
  // is encoded as [1.0, 2.0 ,3.0 ,4.0]
  // When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
  repeated double double_data = 10 [packed = true];

  // For uint64 and uint32 values
  // When this field is present, the data_type field MUST be
  // UINT32 or UINT64
  repeated uint64 uint64_data = 11 [packed = true];
}

// Defines a tensor shape. A dimension can be either an integer value
// or a symbolic variable. A symbolic variable represents an unknown
// dimension.
message TensorShapeProto {
  message Dimension {
    oneof value {
      int64 dim_value = 1;
      string dim_param = 2;   // namespace Shape
    };
    // Standard denotation can optionally be used to denote tensor
    // dimensions with standard semantic descriptions to ensure
    // that operations are applied to the correct axis of a tensor.
    // Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
    // for pre-defined dimension denotations.
    optional string denotation = 3;
  };
  repeated Dimension dim = 1;
}

// Types
//
// The standard ONNX data types.
message TypeProto {

  message Tensor {
    // This field MUST NOT have the value of UNDEFINED
    // This field MUST be present for this version of the IR.
    optional TensorProto.DataType elem_type = 1;
    optional TensorShapeProto shape = 2;
  }


  oneof value {
    // The type of a tensor.
    Tensor tensor_type = 1;

  }

  // An optional denotation can be used to denote the whole 
  // type with a standard semantic description as to what is 
  // stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
  // for pre-defined type denotations.
  optional string denotation = 6;
}

// Operator Sets
//
// OperatorSets are uniquely identified by a (domain, opset_version) pair.
message OperatorSetIdProto {
  // The domain of the operator set being identified.
  // The empty string ("") or absence of this field implies the operator
  // set that is defined as part of the ONNX specification.
  // This field MUST be present in this version of the IR when referring to any other operator set.
  optional string domain = 1;

  // The version of the operator set being identified.
  // This field MUST be present in this version of the IR.
  optional int64 version = 2;
}

================================================
FILE: onnx/protos/onnx/onnx.proto3
================================================
//
// WARNING: This file is automatically generated!  Please edit onnx.in.proto.
//


// Copyright (c) Facebook Inc. and Microsoft Corporation.
// Licensed under the MIT license.

syntax = "proto3";

package onnx;

// Overview
//
// ONNX is an open specification that is comprised of the following components:
//
// 1)  A definition of an extensible computation graph model.
// 2)  Definitions of standard data types.
// 3)  Definitions of built-in operators.
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
// Intermediate Representation, or 'IR' for short. 
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.

// Notes
//
// Release
//
// We are still in the very early stage of defining ONNX. The current
// version of ONNX is a starting point. While we are actively working
// towards a complete spec, we would like to get the community involved
// by sharing our working version of ONNX.
//
// Protobuf compatibility
// 
// To simplify framework compatibility, ONNX is defined using the subset of protobuf 
// that is compatible with both protobuf v2 and v3. This means that we do not use any
// protobuf features that are only available in one of the two versions.
//
// Here are the most notable contortions we have to carry out to work around
// these limitations:
//
//   - No 'map' (added protobuf 3.0). We instead represent mappings as lists
//     of key-value pairs, where order does not matter and duplicates
//     are not allowed.


// Versioning
//
// ONNX versioning is specified in docs/IR.md and elaborated on in docs/Versioning.md
//
// To be compatible with both proto2 and proto3, we will use a version number
// that is not defined by the default value but an explicit enum number.
enum Version {
  // proto3 requires the first enum value to be zero.
  // We add this just to appease the compiler.
  _START_VERSION = 0;
  // The version field is always serialized and we will use it to store the
  // version that the  graph is generated from. This helps us set up version
  // control.
  // For the IR, we are using simple numbers starting with 0x00000001,
  // which was the version we published on Oct 10, 2017.
  IR_VERSION_2017_10_10 = 0x0000000000000001;

  // IR_VERSION 2 published on Oct 30, 2017
  // - Added type discriminator to AttributeProto to support proto3 users
  IR_VERSION_2017_10_30 = 0x0000000000000002;

  // IR VERSION 3 published on Nov 3, 2017
  // - For operator versioning:
  //    - Added new message OperatorSetIdProto
  //    - Added opset_import in ModelProto
  // - For vendor extensions, added domain in NodeProto
  IR_VERSION_2017_11_3 = 0x0000000000000003;

  // IR VERSION 4 published on Jan 22, 2019
  // - Relax constraint that initializers should be a subset of graph inputs
  // - Add type BFLOAT16
  IR_VERSION_2019_1_22 = 0x0000000000000004;

  // IR VERSION 5 published on March 18, 2019
  // - Add message TensorAnnotation.
  // - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters.
  IR_VERSION_2019_3_18 = 0x0000000000000005;

  // IR VERSION 6 published on Sep 19, 2019
  // - Add support for sparse tensor constants stored in model.
  //   - Add message SparseTensorProto
  //   - Add sparse initializers
  IR_VERSION_2019_9_19 = 0x0000000000000006;

  // IR VERSION 7 published on May 8, 2020
  // - Add support to allow function body graph to rely on multiple external opreator sets.
  // - Add a list to promote inference graph's initializers to global and
  //   mutable variables. Global variables are visible in all graphs of the
  //   stored models.
  // - Add message TrainingInfoProto to store initialization
  //   method and training algorithm. The execution of TrainingInfoProto
  //   can modify the values of mutable variables.
  // - Implicitly add inference graph into each TrainingInfoProto's algorithm.
  IR_VERSION_2020_5_8 = 0x0000000000000007;

  // IR VERSION 8 published on <TBD>
  // Introduce TypeProto.SparseTensor
  // Introduce TypeProto.Optional
  // Added a list of FunctionProtos local to the model
  // Deprecated since_version and operator status from FunctionProto
  IR_VERSION = 0x0000000000000008;
}

// Attributes
//
// A named attribute containing either singular float, integer, string, graph,
// and tensor values, or repeated float, integer, string, graph, and tensor values.
// An AttributeProto MUST contain the name field, and *only one* of the
// following content fields, effectively enforcing a C/C++ union equivalent.
message AttributeProto {

  // Note: this enum is structurally identical to the OpSchema::AttrType
  // enum defined in schema.h.  If you rev one, you likely need to rev the other.
  enum AttributeType {
    UNDEFINED = 0;
    FLOAT = 1;
    INT = 2;
    STRING = 3;
    TENSOR = 4;
    GRAPH = 5;
    SPARSE_TENSOR = 11;
    TYPE_PROTO = 13;

    FLOATS = 6;
    INTS = 7;
    STRINGS = 8;
    TENSORS = 9;
    GRAPHS = 10;
    SPARSE_TENSORS = 12;
    TYPE_PROTOS = 14;
  }

  // The name field MUST be present for this version of the IR.
  string name = 1;           // namespace Attribute
 
  // if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
  // In this case, this AttributeProto does not contain data, and it's a reference of attribute
  // in parent scope.
  // NOTE: This should ONLY be used in function (sub-graph). It's invalid to be used in main graph.
  string ref_attr_name = 21;

  // A human-readable documentation for this attribute. Markdown is allowed.
  string doc_string = 13;

  // The type field MUST be present for this version of the IR.
  // For 0.0.1 versions of the IR, this field was not defined, and
  // implementations needed to use has_field hueristics to determine
  // which value field was in use.  For IR_VERSION 0.0.2 or later, this
  // field MUST be set and match the f|i|s|t|... field in use.  This
  // change was made to accomodate proto3 implementations.
  AttributeType type = 20;   // discriminator that indicates which field below is in use

  // Exactly ONE of the following fields must be present for this version of the IR
  float f = 2;               // float
  int64 i = 3;               // int
  bytes s = 4;               // UTF-8 string
  TensorProto t = 5;         // tensor value
  GraphProto g = 6;          // graph
  SparseTensorProto sparse_tensor = 22;  // sparse tensor value
  // Do not use field below, it's deprecated.
  // optional ValueProto v = 12;         // value - subsumes everything but graph

  repeated float floats = 7;          // list of floats
  repeated int64 ints = 8;            // list of ints
  repeated bytes strings = 9;         // list of UTF-8 strings
  repeated TensorProto tensors = 10;  // list of tensors
  repeated GraphProto graphs = 11;    // list of graph
  repeated SparseTensorProto sparse_tensors = 23; // list of sparse tensors
  repeated TypeProto type_protos = 15;// list of type protos
}

// Defines information on value, including the name, the type, and
// the shape of the value.
message ValueInfoProto {
  // This field MUST be present in this version of the IR.
  string name = 1;     // namespace Value
  // This field MUST be present in this version of the IR.
  TypeProto type = 2;
  // A human-readable documentation for this value. Markdown is allowed.
  string doc_string = 3;
}

// Nodes
//
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
// For example, it can be a node of type "Conv" that takes in an image, a filter 
// tensor and a bias tensor, and produces the convolved output.
message NodeProto {
  repeated string input = 1;    // namespace Value
  repeated string output = 2;   // namespace Value

  // An optional identifier for this node in a graph.
  // This field MAY be absent in ths version of the IR.
  string name = 3;     // namespace Node

  // The symbolic identifier of the Operator to execute.
  string op_type = 4;  // namespace Operator
  // The domain of the OperatorSet that specifies the operator named by op_type.
  string domain = 7;   // namespace Domain

  // Additional named attributes.
  repeated AttributeProto attribute = 5;

  // A human-readable documentation for this node. Markdown is allowed.
  string doc_string = 6;
}

// Models
//
// ModelProto is a top-level file/container format for bundling a ML model and
// associating its computation graph with metadata.
//
// The semantics of the model are described by the associated GraphProto.
message ModelProto {
  // The version of the IR this model targets. See Version enum above.
  // This field MUST be present.
  int64 ir_version = 1;

  // The OperatorSets this model relies on.
  // All ModelProtos MUST have at least one entry that
  // specifies which version of the ONNX OperatorSet is
  // being imported.
  //
  // All nodes in the ModelProto's graph will bind against the operator
  // with the same-domain/same-op_type operator with the HIGHEST version
  // in the referenced operator sets.
  repeated OperatorSetIdProto opset_import = 8;

  // The name of the framework or tool used to generate this model.
  // This field SHOULD be present to indicate which implementation/tool/framework
  // emitted the model.
  string producer_name = 2;

  // The version of the framework or tool used to generate this model.
  // This field SHOULD be present to indicate which implementation/tool/framework
  // emitted the model.
  string producer_version = 3;

  // Domain name of the model.
  // We use reverse domain names as name space indicators. For example:
  // `com.facebook.fair` or `com.microsoft.cognitiveservices`
  //
  // Together with `model_version` and GraphProto.name, this forms the unique identity of
  // the graph.
  string domain = 4;

  // The version of the graph encoded. See Version enum below.
  int64 model_version = 5;

  // A human-readable documentation for this model. Markdown is allowed.
  string doc_string = 6;

  // The parameterized graph that is evaluated to execute the model.
  GraphProto graph = 7;

  // Named metadata values; keys should be distinct.
  repeated StringStringEntryProto metadata_props = 14;

  // Training-specific information. Sequentially executing all stored
  // `TrainingInfoProto.algorithm`s and assigning their outputs following
  // the corresponding `TrainingInfoProto.update_binding`s is one training
  // iteration. Similarly, to initialize the model
  // (as if training hasn't happened), the user should sequentially execute
  // all stored `TrainingInfoProto.initialization`s and assigns their outputs
  // using `TrainingInfoProto.initialization_binding`s.
  //
  // If this field is empty, the training behavior of the model is undefined.
  repeated TrainingInfoProto training_info = 20;

  // A list of function protos local to the model.
  //
  // Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain".
  // In case of any conflicts the behavior (whether the model local functions are given higher priority,
  // or standard opserator sets are given higher priotity or this is treated as error) is defined by
  // the runtimes.
  //
  // The operator sets imported by FunctionProto should be compatible with the ones
  // imported by ModelProto and other model local FunctionProtos.
  // Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto
  // or by 2 FunctionProtos then versions for the operator set may be different but,
  // the operator schema returned for op_type, domain, version combination
  // for both the versions should be same for every node in the function body.
  //
  // One FunctionProto can reference other FunctionProto in the model, however, recursive reference
  // is not allowed.
  repeated FunctionProto functions = 25;
};

// StringStringEntryProto follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message StringStringEntryProto {
  string key = 1;
  string value= 2;
};

message TensorAnnotation {
  optional string tensor_name = 1;
  // <key, value> pairs to annotate tensor specified by <tensor_name> above.
  // The keys used in the mapping below must be pre-defined in ONNX spec.
  // For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as
  // quantization parameter keys.
  repeated StringStringEntryProto quant_parameter_tensor_names = 2;
}

// Graphs
//
// A graph defines the computational logic of a model and is comprised of a parameterized 
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
message GraphProto {
  // The nodes in the graph, sorted topologically.
  repeated NodeProto node = 1;

  // The name of the graph.
  string name = 2;   // namespace Graph

  // A list of named tensor values, used to specify constant inputs of the graph.
  // Each initializer (both TensorProto as well SparseTensorProto) MUST have a name.
  // The name MUST be unique across both initializer and sparse_initializer,
  // but the name MAY also appear in the input list.
  repeated TensorProto initializer = 5;

  // Initializers (see above) stored in sparse format.
  repeated SparseTensorProto sparse_initializer = 15;

  // A human-readable documentation for this graph. Markdown is allowed.
  string doc_string = 10;

  // The inputs and outputs of the graph.
  repeated ValueInfoProto input = 11;
  repeated ValueInfoProto output = 12;

  // Information for the values in the graph. The ValueInfoProto.name's
  // must be distinct. It is optional for a value to appear in value_info list.
  repeated ValueInfoProto value_info = 13;

  // This field carries information to indicate the mapping among a tensor and its
  // quantization parameter tensors. For example:
  // For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated,
  // which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
  repeated TensorAnnotation quantization_annotation = 14;

  reserved 3, 4, 6 to 9;
  reserved "ir_version", "producer_version", "producer_tag", "domain";
}

// Training information
// TrainingInfoProto stores information for training a model.
// In particular, this defines two functionalities: an initialization-step
// and a training-algorithm-step. Initialization resets the model
// back to its original state as if no training has been performed.
// Training algorithm improves the model based on input data.
//
// The semantics of the initialization-step is that the initializers
// in ModelProto.graph and in TrainingInfoProto.algorithm are first
// initialized as specified by the initializers in the graph, and then
// updated by the "initialization_binding" in every instance in
// ModelProto.training_info.
//
// The field "algorithm" defines a computation graph which represents a
// training algorithm's step. After the execution of a
// TrainingInfoProto.algorithm, the initializers specified by "update_binding"
// may be immediately updated. If the targeted training algorithm contains
// consecutive update steps (such as block coordinate descent methods),
// the user needs to create a TrainingInfoProto for each step.
message TrainingInfoProto {
  // This field describes a graph to compute the initial tensors
  // upon starting the training process. Initialization graph has no input
  // and can have multiple outputs. Usually, trainable tensors in neural
  // networks are randomly initialized. To achieve that, for each tensor,
  // the user can put a random number operator such as RandomNormal or
  // RandomUniform in TrainingInfoProto.initialization.node and assign its
  // random output to the specific tensor using "initialization_binding".
  // This graph can also set the initializers in "algorithm" in the same
  // TrainingInfoProto; a use case is resetting the number of training
  // iteration to zero.
  //
  // By default, this field is an empty graph and its evaluation does not
  // produce any output. Thus, no initializer would be changed by default.
  optional GraphProto initialization = 1;

  // This field represents a training algorithm step. Given required inputs,
  // it computes outputs to update initializers in its own or inference graph's
  // initializer lists. In general, this field contains loss node, gradient node,
  // optimizer node, increment of iteration count.
  //
  // An execution of the training algorithm step is performed by executing the
  // graph obtained by combining the inference graph (namely "ModelProto.graph")
  // and the "algorithm" graph. That is, the actual the actual
  // input/initializer/output/node/value_info/sparse_initializer list of
  // the training graph is the concatenation of
  // "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer"
  // and "algorithm.input/initializer/output/node/value_info/sparse_initializer"
  // in that order. This combined graph must satisfy the normal ONNX conditions.
  // Now, let's provide a visualization of graph combination for clarity.
  // Let the inference graph (i.e., "ModelProto.graph") be
  //    tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d
  // and the "algorithm" graph be
  //    tensor_d -> Add -> tensor_e
  // The combination process results
  //    tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d -> Add -> tensor_e
  //
  // Notice that an input of a node in the "algorithm" graph may reference the
  // output of a node in the inference graph (but not the other way round). Also, inference
  // node cannot reference inputs of "algorithm". With these restrictions, inference graph
  // can always be run independently without training information.
  //
  // By default, this field is an empty graph and its evaluation does not
  // produce any output. Evaluating the default training step never
  // update any initializers.
  optional GraphProto algorithm = 2;

  // This field specifies the bindings from the outputs of "initialization" to
  // some initializers in "ModelProto.graph.initializer" and
  // the "algorithm.initializer" in the same TrainingInfoProto.
  // See "update_binding" below for details.
  //
  // By default, this field is empty and no initializer would be changed
  // by the execution of "initialization".
  repeated StringStringEntryProto initialization_binding = 3;

  // Gradient-based training is usually an iterative procedure. In one gradient
  // descent iteration, we apply
  //
  // x = x - r * g
  //
  // where "x" is the optimized tensor, "r" stands for learning rate, and "g" is
  // gradient of "x" with respect to a chosen loss. To avoid adding assignments
  // into the training graph, we split the update equation into
  //
  // y = x - r * g
  // x = y
  //
  // The user needs to save "y = x - r * g" into TrainingInfoProto.algorithm. To
  // tell that "y" should be assigned to "x", the field "update_binding" may
  // contain a key-value pair of strings, "x" (key of StringStringEntryProto)
  // and "y" (value of StringStringEntryProto).
  // For a neural network with multiple trainable (mutable) tensors, there can
  // be multiple key-value pairs in "update_binding".
  //
  // The initializers appears as keys in "update_binding" are considered
  // mutable variables. This implies some behaviors
  // as described below.
  //
  //  1. We have only unique keys in all "update_binding"s so that two
  //     variables may not have the same name. This ensures that one
  //     variable is assigned up to once.
  //  2. The keys must appear in names of "ModelProto.graph.initializer" or
  //     "TrainingInfoProto.algorithm.initializer".
  //  3. The values must be output names of "algorithm" or "ModelProto.graph.output".
  //  4. Mutable variables are initialized to the value specified by the
  //     corresponding initializer, and then potentially updated by
  //     "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s.
  //
  // This field usually contains names of trainable tensors
  // (in ModelProto.graph), optimizer states such as momentums in advanced
  // stochastic gradient methods (in TrainingInfoProto.graph),
  // and number of training iterations (in TrainingInfoProto.graph).
  //
  // By default, this field is empty and no initializer would be changed
  // by the execution of "algorithm".
  repeated StringStringEntryProto update_binding = 4;
}


// Tensors
//
// A serialized tensor value.
message TensorProto {
  enum DataType {
    UNDEFINED = 0;
    // Basic types.
    FLOAT = 1;   // float
    UINT8 = 2;   // uint8_t
    INT8 = 3;    // int8_t
    UINT16 = 4;  // uint16_t
    INT16 = 5;   // int16_t
    INT32 = 6;   // int32_t
    INT64 = 7;   // int64_t
    STRING = 8;  // string
    BOOL = 9;    // bool

    // IEEE754 half-precision floating-point format (16 bits wide).
    // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
    FLOAT16 = 10;

    DOUBLE = 11;
    UINT32 = 12;
    UINT64 = 13;
    COMPLEX64 = 14;     // complex with float32 real and imaginary components
    COMPLEX128 = 15;    // complex with float64 real and imaginary components

    // Non-IEEE floating-point format based on IEEE754 single-precision
    // floating-point number truncated to 16 bits.
    // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
    BFLOAT16 = 16;
  }

  // The shape of the tensor.
  repeated int64 dims = 1;

  // The data type of the tensor.
  DataType data_type = 2;

  // For very large tensors, we may want to store them in chunks, in which
  // case the following fields will specify the segment that is stored in
  // the current TensorProto.
  message Segment {
    int64 begin = 1;
    int64 end = 2;
  }
  Segment segment = 3;

  // Tensor content must be organized in row-major order.
  //
  // Depending on the data_type field, exactly one of the fields below with
  // name ending in _data is used to store the elements of the tensor.

  // For float and complex64 values
  // Complex64 tensors are encoded as a single array of floats,
  // with the real components appearing in odd numbered positions,
  // and the corresponding imaginary component apparing in the
  // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
  // is encoded as [1.0, 2.0 ,3.0 ,4.0]
  // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
  repeated float float_data = 4 [packed = true];

  // For int32, uint8, int8, uint16, int16, bool, and float16 values
  // float16 values must be bit-wise converted to an uint16_t prior
  // to writing to the buffer.
  // When this field is present, the data_type field MUST be
  // INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT16
  repeated int32 int32_data = 5 [packed = true];

  // For strings.
  // Each element of string_data is a UTF-8 encoded Unicode
  // string. No trailing null, no leading BOM. The protobuf "string"
  // scalar type is not used to match ML community conventions.
  // When this field is present, the data_type field MUST be STRING
  repeated bytes string_data = 6;

  // For int64.
  // When this field is present, the data_type field MUST be INT64
  repeated int64 int64_data = 7 [packed = true];

  // Optionally, a name for the tensor.
  string name = 8; // namespace Value

  // A human-readable documentation for this tensor. Markdown is allowed.
  string doc_string = 12;

  // Serializations can either use one of the fields above, or use this
  // raw bytes field. The only exception is the string case, where one is
  // required to store the content in the repeated bytes string_data field.
  //
  // When this raw_data field is used to store tensor value, elements MUST
  // be stored in as fixed-width, little-endian order.
  // Floating-point data types MUST be stored in IEEE 754 format.
  // Complex64 elements must be written as two consecutive FLOAT values, real component first.
  // Complex128 elements must be written as two consecutive DOUBLE values, real component first.
  // Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
  //
  // Note: the advantage of specific field rather than the raw_data field is
  // that in some cases (e.g. int data), protobuf does a better packing via
  // variable length storage, and may lead to smaller binary footprint.
  // When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
  bytes raw_data = 9;

  // For double
  // Complex64 tensors are encoded as a single array of doubles,
  // with the real components appearing in odd numbered positions,
  // and the corresponding imaginary component apparing in the
  // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
  // is encoded as [1.0, 2.0 ,3.0 ,4.0]
  // When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
  repeated double double_data = 10 [packed = true];

  // For uint64 and uint32 values
  // When this field is present, the data_type field MUST be
  // UINT32 or UINT64
  repeated uint64 uint64_data = 11 [packed = true];

  // Location of the data for this tensor. MUST be one of:
  // - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field.
  // - EXTERNAL - data stored in an external location as described by external_data field.
  enum DataLocation {
    DEFAULT = 0;
    EXTERNAL = 1;
  }

  // If value not set, data is stored in raw_data (if set) otherwise in type-specified field.
  optional DataLocation data_location = 14;

  // Data can be stored inside the protobuf file using type-specific fields or raw_data.
  // Alternatively, raw bytes data can be stored in an external file, using the external_data field.
  // external_data stores key-value pairs describing data location. Recognized keys are:
  // - "location" (required) - POSIX filesystem path relative to the directory where the ONNX
  //                           protobuf model was stored
  // - "offset" (optional) - position of byte at which stored data begins. Integer stored as string.
  //                         Offset values SHOULD be multiples 4096 (page size) to enable mmap support.
  // - "length" (optional) - number of bytes containing data. Integer stored as string.
  // - "checksum" (optional) - SHA1 digest of file specified in under 'location' key.
  repeated StringStringEntryProto external_data = 13;
}

// A serialized sparse-tensor value
message SparseTensorProto {
  // The sequence of non-default values are encoded as a tensor of shape [NNZ].
  // The default-value is zero for numeric tensors, and empty-string for string tensors.
  // values must have a non-empty name present which serves as a name for SparseTensorProto
  // when used in sparse_initializer list.
  optional TensorProto values = 1;

  // The indices of the non-default values, which may be stored in one of two formats.
  // (a) Indices can be a tensor of shape [NNZ, rank] with the [i,j]-th value
  // corresponding to the j-th index of the i-th value (in the values tensor).
  // (b) Indices can be a tensor of shape [NNZ], in which case the i-th value
  // must be the linearized-index of the i-th value (in the values tensor).
  // The linearized-index can be converted into an index tuple (k_1,...,k_rank)
  // using the shape provided below.
  // The indices must appear in ascending order without duplication.
  // In the first format, the ordering is lexicographic-ordering:
  // e.g., index-value [1,4] must appear before [2,1]
  optional TensorProto indices = 2;

  // The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank]
  repeated int64 dims = 3;
}

// Defines a tensor shape. A dimension can be either an integer value
// or a symbolic variable. A symbolic variable represents an unknown
// dimension.
message TensorShapeProto {
  message Dimension {
    oneof value {
      int64 dim_value = 1;
      string dim_param = 2;   // namespace Shape
    };
    // Standard denotation can optionally be used to denote tensor
    // dimensions with standard semantic descriptions to ensure
    // that operations are applied to the correct axis of a tensor.
    // Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
    // for pre-defined dimension denotations.
    string denotation = 3;
  };
  repeated Dimension dim = 1;
}

// Types
//
// The standard ONNX data types.
message TypeProto {

  message Tensor {
    // This field MUST NOT have the value of UNDEFINED
    // This field MUST be present for this version of the IR.
    TensorProto.DataType elem_type = 1;
    TensorShapeProto shape = 2;
  }


  oneof value {
    // The type of a tensor.
    Tensor tensor_type = 1;

  }

  // An optional denotation can be used to denote the whole 
  // type with a standard semantic description as to what is 
  // stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
  // for pre-defined type denotations.
  string denotation = 6;
}

// Operator Sets
//
// OperatorSets are uniquely identified by a (domain, opset_version) pair.
message OperatorSetIdProto {
  // The domain of the operator set being identified.
  // The empty string ("") or absence of this field implies the operator
  // set that is defined as part of the ONNX specification.
  // This field MUST be present in this version of the IR when referring to any other operator set.
  string domain = 1;

  // The version of the operator set being identified.
  // This field MUST be present in this version of the IR.
  int64 version = 2;
}

// Operator/function status.
enum OperatorStatus {
    EXPERIMENTAL = 0;
    STABLE = 1;
}

message FunctionProto {
  // The name of the function, similar usage of op_type in OperatorProto.
  // Combined with FunctionProto.domain, this forms the unique identity of
  // the FunctionProto.
  optional string name = 1;

  // Deprecated since IR Version 8
  // optional int64 since_version = 2;
  reserved 2;
  reserved "since_version";

  // Deprecated since IR Version 8
  // optional OperatorStatus status = 3;
  reserved 3;
  reserved "status";

  // The inputs and outputs of the function.
  repeated string input = 4;
  repeated string output = 5;

  // The attributes of the function.
  repeated string attribute = 6;

  // The nodes in the function.
  repeated NodeProto node = 7;
  // A human-readable documentation for this function. Markdown is allowed.
  optional string doc_string = 8;

  // The OperatorSets this function body (graph) relies on.
  //
  // All nodes in the function body (graph) will bind against the operator
  // with the same-domain/same-op_type operator with the HIGHEST version
  // in the referenced operator sets. This means at most one version can be relied
  // for one domain.
  //
  // The operator sets imported by FunctionProto should be compatible with the ones
  // imported by ModelProto. Example, if same operator set say 'A' is imported by FunctionProto
  // and ModelProto then versions for the operator set may be different but,
  // the operator schema returned for op_type, domain, version combination
  // for both the versions should be same.

  repeated OperatorSetIdProto opset_import = 9;

  // The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of
  // the FunctionProto.
  optional string domain = 10;
}

================================================
FILE: onnx/src/data_resolver.rs
================================================
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use tract_hir::internal::*;

use tract_hir::internal::TractResult;

#[cfg(not(target_family = "wasm"))]
pub fn default() -> Box<dyn ModelDataResolver> {
    Box::new(MmapDataResolver)
}

#[cfg(target_family = "wasm")]
pub fn default() -> Box<dyn ModelDataResolver> {
    Box::new(FopenDataResolver)
}

pub trait ModelDataResolver {
    fn read_bytes_from_path(
        &self,
        buf: &mut Vec<u8>,
        p: &Path,
        offset: usize,
        length: Option<usize>,
    ) -> TractResult<()>;
}

pub struct FopenDataResolver;

impl ModelDataResolver for FopenDataResolver {
    fn read_bytes_from_path(
        &self,
        buf: &mut Vec<u8>,
        p: &Path,
        offset: usize,
        length: Option<usize>,
    ) -> TractResult<()> {
        let file = File::open(p).with_context(|| format!("Opening {p:?}"))?;
        let file_size = file.metadata()?.len() as usize;
        let length = length.unwrap_or(file_size - offset);
        buf.reserve(length);

        let mut reader = BufReader::new(file);
        reader.seek_relative(offset as i64)?;
        while reader.fill_buf()?.len() > 0 {
            let num_read = std::cmp::min(reader.buffer().len(), length - buf.len());
            buf.extend_from_slice(&reader.buffer()[..num_read]);
            if buf.len() == length {
                break;
            }
            reader.consume(reader.buffer().len());
        }
        Ok(())
    }
}

pub struct MmapDataResolver;

impl ModelDataResolver for MmapDataResolver {
    fn read_bytes_from_path(
        &self,
        buf: &mut Vec<u8>,
        p: &Path,
        offset: usize,
        length: Option<usize>,
    ) -> TractResult<()> {
        let file = File::open(p).with_context(|| format!("Opening {p:?}"))?;
        let mmap = unsafe { memmap2::Mmap::map(&file)? };
        match length {
            Some(length) => buf.extend_from_slice(&mmap[offset..offset + length]),
            None => buf.extend_from_slice(&mmap[offset..]),
        }
        Ok(())
    }
}


================================================
FILE: onnx/src/lib.rs
================================================
#![allow(clippy::len_zero)]
#[allow(unused_imports)]
#[macro_use]
extern crate derive_new;
#[allow(unused_imports)]
#[macro_use]
extern crate log;
extern crate num_integer;
#[allow(unused_imports)]
#[macro_use]
pub extern crate tract_hir;

pub mod model;
pub mod ops;

#[allow(clippy::all)]
pub mod pb {
    include!("prost/onnx.rs");
}

pub mod data_resolver;
pub mod pb_helpers;
pub mod tensor;

pub use model::Onnx;

pub use tract_hir::tract_core;
pub mod prelude {
    pub use crate::onnx;
    pub use tract_hir::prelude::*;
    pub use tract_onnx_opl::WithOnnx;
}
pub use tract_onnx_opl::WithOnnx;

use tract_hir::prelude::*;

#[deprecated(note = "Please use onnx().model_for_path(..)")]
pub fn for_path(p: impl AsRef<std::path::Path>) -> TractResult<InferenceModel> {
    onnx().model_for_path(p)
}

#[deprecated(note = "Please use onnx().model_for_read(..)")]
pub fn for_reader<R: std::io::Read>(mut r: R) -> TractResult<InferenceModel> {
    onnx().model_for_read(&mut r)
}

pub fn onnx() -> Onnx {
    let mut ops = crate::model::OnnxOpRegister::default();
    ops::register_all_ops(&mut ops);
    Onnx { op_register: ops, ..Onnx::default() }
}


================================================
FILE: onnx/src/model.rs
================================================
use std::path::PathBuf;
use std::{fs, path};

use std::collections::HashMap;

use tract_hir::internal::*;
use tract_hir::prelude::tract_itertools::Itertools;

use crate::data_resolver::{self, ModelDataResolver};
use crate::pb::type_proto::Value;
use crate::pb::{self, TensorProto, TypeProto};
use crate::tensor::{load_tensor, translate_inference_fact};
use prost::Message;

pub fn optional_inputs(pb: &pb::NodeProto) -> impl Iterator<Item = Option<usize>> + '_ {
    let mut real_input = 0;
    (0..).map(move |i| {
        if pb.input.get(i).filter(|s| !s.is_empty()).is_some() {
            real_input += 1;
            Some(real_input - 1)
        } else {
            None
        }
    })
}

pub fn optional_outputs(pb: &pb::NodeProto) -> impl Iterator<Item = Option<usize>> + '_ {
    let mut real_output = 0;
    (0..).map(move |i| {
        if pb.output.get(i).filter(|s| !s.is_empty()).is_some() {
            real_output += 1;
            Some(real_output - 1)
        } else {
            None
        }
    })
}

#[derive(Clone)]
pub struct ParsingContext<'a> {
    pub onnx_operator_set_version: i64,
    pub framework: &'a Onnx,
    pub model: &'a pb::ModelProto,
    pub parent_graphs: Vec<&'a pb::GraphProto>,
    pub model_dir: Option<&'a str>,
    pub template: InferenceModel,
}

#[derive(Clone, Debug)]
pub struct ParseResult {
    pub model: InferenceModel,
    pub unresolved_inputs: Vec<String>,
    pub outlets_by_name: HashMap<String, OutletId>,
}

impl ParsingContext<'_> {
    pub fn load_tensor(&self, proto: &TensorProto) -> TractResult<Tensor> {
        load_tensor(&*self.framework.provider, proto, self.model_dir)
    }

    pub fn parse_graph(&self, graph: &pb::GraphProto) -> TractResult<ParseResult> {
        let mut ctx = self.clone();
        ctx.parent_graphs.push(graph);
        let mut model = self.template.clone();
        let mut unresolved_inputs = vec![];
        let mut closures_to_wire = vec![];
        trace!("trying to initialize initializers hashmap...");
        #[allow(unused_assignments)]
        let mut initializers: HashMap<&str, Tensor> = graph
            .initializer
            .iter()
            .map(|name| {
                let t = self.load_tensor(name)?;
                Ok((&*name.name, t))
            })
            .collect::<TractResult<_>>()?;
        for (k, v) in initializers.iter().sorted_by_key(|kv| kv.0) {
            trace!("Initializer: {k} {v:?}");
        }
        let mut outlets_by_name = HashMap::<String, OutletId>::new();
        for input in graph.input.iter() {
            if let Some(init) = initializers.remove(&*input.name) {
                trace!("Input: {} initialized by {:?}", input.name, init);
                let id = model.add_const(input.name.to_owned(), init)?;
                outlets_by_name.insert(input.name.to_owned(), id);
            } else {
                let fact = input.r#type.as_ref().unwrap().value.as_ref().unwrap();
                #[allow(irrefutable_let_patterns)]
                let fact: InferenceFact = if let pb::type_proto::Value::TensorType(fact) = fact {
                    translate_inference_fact(&ctx, fact, true)
                        .with_context(|| format!("translating to fact: {fact:?}"))?
                } else {
                    bail!("Can not parse tensor type");
                };
                trace!("Input: {} is a source ({:?})", input.name, fact);
                let id = model.add_source(&*input.name, fact)?;
                outlets_by_name.insert(input.name.to_owned(), id);
            }
        }
        for output in graph.output.iter() {
            trace!("Model output: {output:?}");
        }
        for (name, t) in initializers.into_iter().sorted_by_key(|kv| kv.0) {
            let id = model.add_const(name, t)?;
            outlets_by_name.insert(name.to_string(), id);
        }
        let consts = model.nodes().len();
        for pbnode in graph.node.iter() {
            let name = if !pbnode.name.is_empty() {
                pbnode.name.to_string()
            } else if pbnode.output.len() > 0 && !pbnode.output[0].is_empty() {
                pbnode.output[0].to_owned()
            } else {
                format!("{}-{}", model.nodes().len(), pbnode.op_type)
            };
            trace!("Creating node {name}");
            let facts = pbnode
                .output
                .iter()
                .filter(|s| !s.is_empty())
                .map(|_| InferenceFact::default())
                .collect();
            trace!("  outputs {:?}", pbnode.output);
            let (op, closures) = match self.framework.op_register.0.get(&pbnode.op_type) {
                Some(builder) => (builder)(&ctx, pbnode).with_context(|| {
                    format!("Building node {} ({})", pbnode.name, pbnode.op_type)
                })?,
                None => (
                    tract_hir::ops::unimpl::UnimplementedOp::new(
                        pbnode.output.len(),
                        &*pbnode.op_type,
                        format!("{pbnode:?}"),
                    )
                    .into(),
                    vec![],
                ),
            };
            let id = model.add_node(name, op, facts)?;
            for (ix, output) in pbnode.output.iter().filter(|s| !s.is_empty()).enumerate() {
                outlets_by_name.insert(output.to_owned(), OutletId::new(id, ix));
                model.set_outlet_label(OutletId::new(id, ix), output.to_owned())?;
            }
            for closure in closures {
                trace!("Node {} closes on {}", model.nodes()[id], closure);
                closures_to_wire.push((id, closure))
            }
        }
        for (id, pbnode) in graph.node.iter().enumerate() {
            for (ix, input) in pbnode.input.iter().filter(|s| !s.is_empty()).enumerate() {
                if !outlets_by_name.contains_key(input) {
                    let id = model.add_source(input.clone(), InferenceFact::default())?;
                    unresolved_inputs.push(input.to_string());
                    outlets_by_name.insert(input.to_string(), id);
                }
                let outlet = outlets_by_name[input];
                model.add_edge(outlet, InletId::new(id + consts, ix))?;
            }
        }
        for (id, closure) in closures_to_wire {
            if !outlets_by_name.contains_key(&*closure) {
                let id = model.add_source(closure.clone(), InferenceFact::default())?;
                unresolved_inputs.push(closure.to_string());
                outlets_by_name.insert(closure.to_string(), id);
            }
            let outlet = outlets_by_name[&*closure];
            let ix = model.nodes()[id].inputs.len();
            model.add_edge(outlet, InletId::new(id, ix))?;
        }
        let mut outputs = vec![];
        for output in graph.output.iter() {
            let mut fact = InferenceFact::default();
            if self.framework.use_output_shapes {
                if let Some(f) = output.r#type.as_ref().and_then(|t| t.value.as_ref()) {
                    let pb::type_proto::Value::TensorType(f) = f;
                    fact = translate_inference_fact(&ctx, f, false)?
                };
            }
            if self.framework.ignore_output_types {
                fact = fact.without_datum_type();
            }
            let outlet = outlets_by_name[&*output.name];
            outputs.push(outlet);
            model.set_outlet_label(outlet, output.name.clone())?;
            model.set_outlet_fact(outlet, fact)?;
        }
        model.select_output_outlets(&outputs)?;
        if !self.framework.ignore_value_info {
            for info in &graph.value_info {
                if let Some(TypeProto { value: Some(Value::TensorType(t)), .. }) = &info.r#type {
                    if let Some(outlet) = outlets_by_name.get(&info.name) {
                        let mut pbfact = translate_inference_fact(&ctx, t, false)?;
                        // be conservative, these are likely to be TDim
                        if pbfact.datum_type() == Some(i64::datum_type()) {
                            pbfact = pbfact.without_datum_type();
                        }
                        model.set_outlet_fact(*outlet, pbfact)?;
                    }
                }
            }
        }
        let result = ParseResult { model, unresolved_inputs, outlets_by_name };
        Ok(result)
    }
}

type OpBuilder =
    fn(&ParsingContext, node: &pb::NodeProto) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)>;

#[derive(Clone, Default)]
pub struct OnnxOpRegister(pub HashMap<String, OpBuilder>);

impl OnnxOpRegister {
    pub fn insert(&mut self, s: &'static str, builder: OpBuilder) {
        self.0.insert(s.into(), builder);
    }
}

#[derive(Clone)]
pub struct Onnx {
    pub op_register: OnnxOpRegister,
    pub use_output_shapes: bool,
    pub ignore_output_types: bool,
    pub provider: Arc<dyn ModelDataResolver + Send + Sync>,
    pub ignore_value_info: bool,
}

impl Default for Onnx {
    fn default() -> Self {
        Onnx {
            op_register: Default::default(),
            use_output_shapes: Default::default(),
            ignore_output_types: Default::default(),
            provider: Arc::new(data_resolver::MmapDataResolver),
            ignore_value_info: false,
        }
    }
}

impl Onnx {
    pub fn parse(&self, proto: &pb::ModelProto, path: Option<&str>) -> TractResult<ParseResult> {
        self.parse_with_template(proto, path, Default::default())
    }
    pub fn parse_with_template(
        &self,
        proto: &pb::ModelProto,
        model_dir: Option<&str>,
        template: InferenceModel,
    ) -> TractResult<ParseResult> {
        let onnx_operator_set_version = proto
            .opset_import
            .iter()
            .find(|import| import.domain.is_empty() || import.domain == "ai.onnx")
            .map(|op| op.version)
            .unwrap_or(0);
        let graph =
            proto.graph.as_ref().ok_or_else(|| anyhow!("model proto does not contain a graph"))?;
        debug!("ONNX operator set version: {onnx_operator_set_version:?}");
        if onnx_operator_set_version != 0 && !(9..19).contains(&onnx_operator_set_version) {
            warn!(
                "ONNX operator for your model is {onnx_operator_set_version}, tract is only tested against \
                  operator set 9 to 18 (included). Your model may still work so this is not a hard fail."
            );
        }
        let ctx = ParsingContext {
            framework: self,
            model: proto,
            parent_graphs: vec![],
            onnx_operator_set_version,
            model_dir,
            template,
        };
        trace!("created ParsingContext");
        ctx.parse_graph(graph)
    }

    pub fn with_ignore_output_shapes(self, ignore: bool) -> Onnx {
        Self { use_output_shapes: !ignore, ..self }
    }

    pub fn with_ignore_output_types(self, ignore: bool) -> Onnx {
        Self { ignore_output_types: ignore, ..self }
    }

    pub fn with_ignore_value_info(self, ignore: bool) -> Onnx {
        Self { ignore_value_info: ignore, ..self }
    }

    pub fn determinize(model: &mut InferenceModel) -> TractResult<()> {
        use crate::ops::multinomial::Multinomial;
        for node in model.nodes_mut() {
            if let Some(op) = node.op_as_mut::<Box<dyn Expansion>>() {
                if let Some(op) = op.as_any_mut().downcast_mut::<Multinomial>() {
                    op.seed.get_or_insert(1.0);
                }
            }
        }
        Ok(())
    }
}

impl Framework<pb::ModelProto, InferenceModel> for Onnx {
    fn model_for_path(&self, p: impl AsRef<path::Path>) -> TractResult<InferenceModel> {
        let mut path = PathBuf::new();
        path.push(&p);
        let mut dir: Option<&str> = None;
        if let Some(dir_opt) = path.parent() {
            dir = dir_opt.to_str();
        }
        let proto = self.proto_model_for_path(p)?;
        let ParseResult { model, unresolved_inputs, .. } = self.parse(&proto, dir)?;
        if unresolved_inputs.len() > 0 {
            bail!("Could not resolve inputs at top-level: {:?}", unresolved_inputs)
        }
        Ok(model)
    }

    #[cfg(target_family = "wasm")]
    fn proto_model_for_path(&self, p: impl AsRef<path::Path>) -> TractResult<pb::ModelProto> {
        let p = p.as_ref();
        let mut file = fs::File::open(p).with_context(|| format!("Opening {p:?}"))?;
        Ok(self.proto_model_for_read(&mut file)?)
    }

    #[cfg(not(target_family = "wasm"))]
    fn proto_model_for_path(&self, p: impl AsRef<path::Path>) -> TractResult<pb::ModelProto> {
        let p = p.as_ref();
        let map = unsafe {
            memmap2::Mmap::map(&fs::File::open(p).with_context(|| format!("Opening {p:?}"))?)?
        };
        Ok(crate::pb::ModelProto::decode(&*map)?)
    }

    fn proto_model_for_read(&self, r: &mut dyn std::io::Read) -> TractResult<pb::ModelProto> {
        let mut v = vec![];
        r.read_to_end(&mut v)?;
        let b = bytes::Bytes::from(v);
        Ok(crate::pb::ModelProto::decode(b)?)
    }

    fn model_for_proto_model_with_model_template(
        &self,
        proto: &pb::ModelProto,
        template: InferenceModel,
    ) -> TractResult<InferenceModel> {
        let ParseResult { model, unresolved_inputs, .. } =
            self.parse_with_template(proto, None, template)?;
        if unresolved_inputs.len() > 0 {
            bail!("Could not resolve inputs at top-level: {:?}", unresolved_inputs)
        }
        Ok(model)
    }

    fn model_for_read(&self, r: &mut dyn std::io::Read) -> TractResult<InferenceModel> {
        let proto_model = self.proto_model_for_read(r).context("Reading proto model")?;
        self.model_for_proto_model(&proto_model).context("Translating proto model to model")
    }
}


================================================
FILE: onnx/src/ops/array/compress.rs
================================================
use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub fn compress(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    Ok((Box::new(Compress::new(node.get_attr_opt("axis")?)), vec![]))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Compress {
    axis: Option<isize>,
}

impl Compress {
    unsafe fn eval_t<T: Datum>(
        axis: Option<usize>,
        input: &Tensor,
        conds: &[bool],
        output: &mut Tensor,
    ) {
        use tract_ndarray::*;
        let input = unsafe { input.to_array_view_unchecked::<T>() };
        if let Some(ax) = axis {
            for (ixo, ixi) in
                conds.iter().enumerate().filter(|(_, c)| **c).map(|(ix, _)| ix).enumerate()
            {
                unsafe {
                    output
                        .to_array_view_mut_unchecked::<T>()
                        .index_axis_mut(Axis(ax), ixo)
                        .assign(&input.index_axis(Axis(ax), ixi))
                };
            }
        } else {
            let output = unsafe { output.as_slice_mut_unchecked::<T>() };
            let mut ix = 0;
            for (c, i) in conds.iter().zip(input.iter()) {
                if *c {
                    output[ix] = i.clone();
                    ix += 1;
                }
            }
        }
    }
}

impl Op for Compress {
    fn name(&self) -> StaticName {
        "Compress".into()
    }

    not_a_typed_op!();
}

impl EvalOp for Compress {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, conds) = args_2!(inputs);
        let conds = conds.try_as_plain()?.as_slice()?;
        let compressed_dim = conds.iter().filter(|c| **c).count();
        let (shape, axis) = if let Some(axis) = self.axis {
            let axis = if axis < 0 { axis + input.rank() as isize } else { axis } as usize;
            let mut shape: TVec<usize> = input.shape().into();
            shape[axis] = compressed_dim;
            (shape, Some(axis))
        } else {
            (tvec!(compressed_dim), None)
        };
        unsafe {
            let mut output = Tensor::uninitialized_dt(input.datum_type(), &shape)?;
            dispatch_datum_by_size!(Self::eval_t(input.datum_type())(
                axis,
                &input,
                conds,
                &mut output
            ));
            Ok(tvec!(output.into_tvalue()))
        }
    }
}

impl InferenceRulesOp for Compress {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[1].datum_type, bool::datum_type())?;
        s.equals(&inputs[1].rank, 1)?;
        if let Some(op_axis) = self.axis {
            s.equals(&inputs[0].rank, &outputs[0].rank)?;
            s.given(&inputs[0].rank, move |s, rank| {
                let rank = rank as usize;
                let op_axis = if op_axis < 0 { op_axis + rank as isize } else { op_axis } as usize;
                for axis in 0..rank {
                    if axis != op_axis {
                        s.equals(&inputs[0].shape[axis], &outputs[0].shape[axis])?;
                    }
                }
                Ok(())
            })?;
        } else {
            s.equals(&outputs[0].rank, 1)?;
        }
        Ok(())
    }

    as_op!();
}


================================================
FILE: onnx/src/ops/array/mod.rs
================================================
mod compress;
mod nonzero;
mod one_hot;
mod pad;
mod shape;
mod slice;
mod split;
mod squeeze;
mod topk;
mod trilu;
mod unsqueeze;

use tract_hir::internal::*;
use tract_hir::ops::array;

use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("ArrayFeatureExtractor", array_feature_extractor);
    reg.insert("Compress", compress::compress);
    reg.insert("Concat", concat);
    reg.insert("ConstantLike", constant_like);
    reg.insert("ConstantOfShape", constant_of_shape);
    reg.insert("Expand", |_, _| Ok((expand(array::MultiBroadcastTo), vec![])));
    reg.insert("EyeLike", eye_like);
    reg.insert("Flatten", flatten);
    reg.insert("Gather", gather);
    reg.insert("GatherElements", gather_elements);
    reg.insert("GatherND", gather_nd);
    reg.insert("NonZero", nonzero::non_zero);
    reg.insert("OneHot", one_hot::one_hot);
    reg.insert("Range", |_, _| Ok((expand(array::Range), vec![])));
    reg.insert("Pad", pad::pad);
    reg.insert("Reshape", |_, _| Ok((expand(array::Reshape::default()), vec![])));
    reg.insert("Scatter", scatter_elements);
    reg.insert("ScatterElements", scatter_elements);
    reg.insert("ScatterND", |_, node| {
        let reduction =
            array::ScatterReduction::parse(node.get_attr_opt("reduction")?.unwrap_or("none"))?;
        Ok((Box::new(array::ScatterNd::new(reduction)), vec![]))
    });
    reg.insert("Shape", shape::shape);
    reg.insert("Size", |_, _| Ok((expand(array::Size::new(DatumType::TDim)), vec![])));
    reg.insert("Slice", slice::slice);
    reg.insert("Split", split::split);
    reg.insert("Squeeze", squeeze::squeeze);
    reg.insert("Tile", |_, _| Ok((expand(array::Tile), vec![])));
    reg.insert("TopK", topk::topk);
    reg.insert("Transpose", transpose);
    reg.insert("Trilu", trilu::trilu);
    reg.insert("Unsqueeze", unsqueeze::unsqueeze);
}

pub fn array_feature_extractor(
    _ctx: &ParsingContext,
    _node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    Ok((expand(array::ArrayFeatureExtractor), vec![]))
}

pub fn concat(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr("axis")?;
    Ok((expand(array::Concat::new(axis)), vec![]))
}

pub fn constant_like(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let value = node.get_attr_opt("value")?.unwrap_or(0.);
    if node.input.len() == 0 {
        let dt = node.get_attr_opt("dtype")?.unwrap_or(DatumType::F32);
        let shape: Vec<usize> = node.get_attr_vec("shape")?;
        let tensor =
            tensor0(value).cast_to_dt(dt)?.broadcast_scalar_to_shape(&shape)?.into_arc_tensor();
        Ok((Box::new(tract_hir::ops::konst::Const::new(tensor)?), vec![]))
    } else {
        Ok((Box::new(array::ConstantLike::new(value)), vec![]))
    }
}

pub fn constant_of_shape(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut value = match node.get_attr_opt("value")? {
        Some(val) => ctx.load_tensor(val)?.into_arc_tensor(),
        None => rctensor0(0.0),
    };
    if value.rank() > 0 {
        if value.len() != 1 {
            bail!("Expected scalar (or vector of length 1), got {:?}", value);
        }
        value = value.nth(0)?.into_arc_tensor();
    }
    Ok((expand(array::ConstantOfShape::new(value)), vec![]))
}

pub fn eye_like(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let dt = node.get_attr_opt("dtype")?;
    let k = node.get_attr_opt("k")?.unwrap_or(0);
    Ok((Box::new(array::EyeLike::new(dt, k)), vec![]))
}

pub fn flatten(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis: i64 = node.get_attr_opt("axis")?.unwrap_or(1);
    Ok((expand(array::Flatten::new(axis)), vec![]))
}

pub fn gather(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(0);
    Ok((expand(array::Gather::new(axis)), vec![]))
}

pub fn gather_elements(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(0);
    Ok((expand(array::GatherElements::new(axis)), vec![]))
}

pub fn gather_nd(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let batch_dims = node.get_attr_opt("batch_dims")?.unwrap_or(0);
    Ok((Box::new(array::GatherNd::new(batch_dims)), vec![]))
}

pub fn scatter_elements(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(0);
    let reduction =
        array::ScatterReduction::parse(node.get_attr_opt("reduction")?.unwrap_or("none"))?;
    Ok((expand(array::ScatterElements::new(axis, reduction)), vec![]))
}

pub fn transpose(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let perm = node.get_attr_opt_vec("perm")?;
    Ok((expand(array::PermuteAxes::new(perm.map(|t| t.into()))), vec![]))
}


================================================
FILE: onnx/src/ops/array/nonzero.rs
================================================
use tract_hir::internal::*;
use tract_ndarray::Dimension;

use crate::model::ParsingContext;
use crate::pb::NodeProto;

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct NonZero(Symbol);

pub fn non_zero(
    ctx: &ParsingContext,
    _node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    // symbol table is shared between all templates and models
    let x = ctx.template.symbols.new_with_prefix("x");
    Ok((Box::new(NonZero(x)) as _, vec![]))
}

impl NonZero {
    unsafe fn eval_t<T: Datum + tract_num_traits::Zero>(input: &Tensor) -> TractResult<Tensor> {
        unsafe {
            let count = input.as_slice_unchecked::<T>().iter().filter(|d| !d.is_zero()).count();
            let view = input.to_array_view_unchecked::<T>();
            let mut output = Tensor::uninitialized::<i64>(&[input.rank(), count])?;
            let mut view_mut: tract_ndarray::ArrayViewMut2<i64> =
                output.to_array_view_mut_unchecked::<i64>().into_dimensionality().unwrap();
            for (i, (coords, _)) in
                view.indexed_iter().filter(|(_, value)| !value.is_zero()).enumerate()
            {
                view_mut
                    .index_axis_mut(tract_ndarray::Axis(1), i)
                    .assign(&coords.as_array_view().map(|d| *d as i64));
            }
            Ok(output)
        }
    }
}

impl Op for NonZero {
    fn name(&self) -> StaticName {
        "NonZero".into()
    }

    op_as_typed_op!();
}

impl EvalOp for NonZero {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        unsafe {
            let input = args_1!(inputs);
            let output = if input.datum_type() == bool::datum_type() {
                Self::eval_t::<u8>(&input)?
            } else {
                dispatch_numbers!(Self::eval_t(input.datum_type())(&input))?
            };
            Ok(tvec!(output.into_tvalue()))
        }
    }
}

impl InferenceRulesOp for NonZero {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, i64::datum_type())?;
        s.equals(&outputs[0].rank, 2)?;
        s.equals(&outputs[0].shape[0], inputs[0].rank.bex().to_dim())?;
        Ok(())
    }

    as_op!();
    to_typed!();
}

impl TypedOp for NonZero {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(i64::fact(dims![inputs[0].rank(), self.0])))
    }

    as_op!();
}


================================================
FILE: onnx/src/ops/array/one_hot.rs
================================================
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::pb::NodeProto;

pub fn one_hot(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(-1);
    Ok((expand(OneHot::new(axis)), vec![]))
}

#[derive(Debug, PartialEq, Clone, new, Hash)]
struct OneHot {
    axis: i64,
}

impl Expansion for OneHot {
    fn name(&self) -> StaticName {
        "OneHot".into()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let dim = model.outlet_fact(inputs[1])?;
        let values = model.outlet_fact(inputs[2])?;
        if let (Some(dim), Some(values)) = (&dim.konst, &values.konst) {
            let rank = model.outlet_fact(inputs[0])?.rank();
            let axis = if self.axis < 0 { self.axis + rank as i64 + 1 } else { self.axis } as usize;
            let dim = dim.cast_to::<i64>()?;
            let dim = dim.try_as_plain()?.as_slice::<i64>()?[0];
            if dim < 0 {
                bail!("Expected positive dimension, got {}", dim)
            }
            let off = values.nth(0)?;
            let on = values.nth(1)?;
            let op = tract_core::ops::array::OneHot {
                axis,
                dim: dim as usize,
                off: off.into_arc_tensor(),
                on: on.into_arc_tensor(),
            };
            model.wire_node(prefix, op, &[inputs[0]])
        } else {
            bail!("Expected dim and value to be determined, got {:?} and {:?}", dim, values)
        }
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[2].datum_type, &outputs[0].datum_type)?;
        s.equals(inputs[0].rank.bex() + 1, &outputs[0].rank)?;
        s.equals(&inputs[2].rank, 1)?;
        s.equals(&inputs[2].shape[0], 2.to_dim())?;
        s.given(&inputs[0].rank, move |s, irank| {
            let axis = if self.axis < 0 { self.axis + irank + 1 } else { self.axis } as usize;
            for ix in 0..axis {
                s.equals(&inputs[0].shape[ix], &outputs[0].shape[ix])?;
            }
            for ix in axis + 1..irank as usize + 1 {
                s.equals(&inputs[0].shape[ix - 1], &outputs[0].shape[ix])?;
            }
            s.given(&inputs[1].value, move |s, value| {
                let dim = value.cast_to_scalar::<i64>()?;
                s.equals(&outputs[0].shape[axis], dim.to_dim())
            })
        })
    }
}


================================================
FILE: onnx/src/ops/array/pad.rs
================================================
use crate::model::{ParsingContext, optional_inputs};
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops::array;
use tract_hir::prelude::tract_itertools::Itertools;

pub fn pad(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    match ctx.onnx_operator_set_version {
        2..=10 => pad_2(ctx, node),
        11.. => pad_18(ctx, node), // pad 11 is more restrivie that pad 18 (no axes input)
        _ => bail!("Unsupported operator set for Pad operator"),
    }
}

pub fn pad_mode(node: &NodeProto) -> TractResult<array::PadMode> {
    let value: f32 = node.get_attr_opt("value")?.unwrap_or(0.0);
    let mode = match node.get_attr_opt("mode")? {
        None | Some("constant") => None,
        Some(mode) => node.check_value(
            "mode",
            match mode {
                "reflect" => TractResult::Ok(Some(array::PadMode::Reflect)),
                "edge" => Ok(Some(array::PadMode::Edge)),
                _ => bail!("Unsupported mode {mode}"),
            },
        )?,
    }
    .unwrap_or_else(|| tract_hir::ops::array::PadMode::Constant(Arc::new(value.into())));
    Ok(mode)
}

pub fn pad_2(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let pads = node.get_attr_tvec("pads")?;
    let rank = pads.len() / 2;
    let pads = (0..rank).map(|ax| (pads[ax], pads[ax + rank])).collect();
    let mode = pad_mode(node)?;
    Ok((Box::new(tract_hir::ops::array::Pad::new(pads, mode)), vec![]))
}

pub fn pad_18(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mode = pad_mode(node)?;
    let mut inputs = optional_inputs(node).skip(2);
    let op = Pad18::new(mode, inputs.next().unwrap(), inputs.next().unwrap());
    Ok((expand(op), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Pad18 {
    mode: array::PadMode,
    constant_input: Option<usize>,
    axes_input: Option<usize>,
}

impl Expansion for Pad18 {
    fn name(&self) -> StaticName {
        "Pad".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(
            inputs,
            2 + self.constant_input.is_some() as usize + self.axes_input.is_some() as usize,
        )?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[1].rank, 1)?;
        if let Some(constant) = self.constant_input {
            s.equals(&inputs[0].datum_type, &inputs[constant].datum_type)?;
            s.equals(&inputs[constant].rank, 0)?;
        }

        fn do_output_shape<'a, 'r, 'p: 'r>(
            s: &mut Solver<'r>,
            inputs: &'p [TensorProxy],
            outputs: &'p [TensorProxy],
            axes: Vec<usize>,
        ) -> TractResult<()> {
            s.given(&inputs[1].value, move |s, pads| {
                let pads = pads.cast_to::<TDim>()?;
                let pads = pads.try_as_plain()?.as_slice::<TDim>()?;
                let rank = pads.len() / 2;
                for (ix, axis) in axes.iter().enumerate() {
                    let left = pads[ix].clone();
                    let right = pads[ix + rank].clone();
                    s.equals(
                        &outputs[0].shape[*axis],
                        inputs[0].shape[*axis].bex() + left + right,
                    )?;
                }
                Ok(())
            })?;
            Ok(())
        }

        if let Some(axes) = self.axes_input {
            s.equals(&inputs[axes].rank, 1)?;
            s.equals(&inputs[1].shape[0], 2 * inputs[axes].shape[0].bex())?;
            s.given_2(&inputs[0].rank, &inputs[axes].value, move |s, rank, axes| {
                let axes = axes
                    .cast_to::<i64>()?
                    .try_as_plain()?
                    .as_slice::<i64>()?
                    .iter()
                    .map(|x| (if *x < 0 { *x + rank } else { *x }) as usize)
                    .collect_vec();
                do_output_shape(s, inputs, outputs, axes)
            })
        } else {
            s.equals(&inputs[1].shape[0], 2 * inputs[0].rank.bex().to_dim())?;
            s.given(&inputs[0].rank, move |s, rank| {
                let axes = (0..rank as usize).collect_vec();
                do_output_shape(s, inputs, outputs, axes)
            })
        }
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mode = if let Some(constant) = self.constant_input {
            if let Some(c) = model.outlet_fact(inputs[constant])?.konst.clone() {
                array::PadMode::Constant(c)
            } else {
                bail!("Pad constant input must be constant")
            }
        } else {
            self.mode.clone()
        };
        let rank = model.outlet_fact(inputs[0])?.rank();
        let axes = if let Some(axes) = self.axes_input {
            model
                .outlet_fact(inputs[axes])?
                .konst
                .as_ref()
                .context("Axes must be a constant")?
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|x| (if *x < 0 { *x + rank as i64 } else { *x }) as usize)
                .collect_vec()
        } else {
            (0..rank).collect_vec()
        };
        let pads = model
            .outlet_fact(inputs[1])?
            .konst
            .as_ref()
            .context("Expect padding to be constant")?
            .cast_to::<i64>()?;
        let pads = pads.try_as_plain()?.as_slice::<i64>()?;

        let mut fixed_pads = vec![(0, 0); rank];
        for (ix, &axis) in axes.iter().enumerate() {
            fixed_pads[axis] = (pads[ix] as usize, pads[ix + pads.len() / 2] as usize);
        }
        model.wire_node(name, array::Pad { mode, pads: fixed_pads }, &inputs[0..1])
    }
}


================================================
FILE: onnx/src/ops/array/shape.rs
================================================
use std::ops::Range;

use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub fn shape(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let start = node.get_attr_opt("start")?.unwrap_or(0);
    let end = node.get_attr_opt("end")?;
    Ok((expand(Shape { start, end }), vec![]))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
struct Shape {
    start: i64,
    end: Option<i64>,
}

impl Shape {
    fn resolve(&self, rank: i64) -> Range<usize> {
        let start =
            if self.start >= 0 { self.start } else { (rank + self.start).clamp(0, rank) } as usize;
        let end =
            if let Some(end) = self.end { if end >= 0 { end } else { end + rank } } else { rank }
                .clamp(0, rank) as usize;
        start..end
    }
}

impl Expansion for Shape {
    fn name(&self) -> StaticName {
        "Shape".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].rank, 1)?;
        s.equals(&outputs[0].datum_type, TDim::datum_type())?;
        s.given(&inputs[0].shape, |s, shape| {
            let rank = shape.len() as i64;
            let range = self.resolve(rank);
            s.equals(&outputs[0].value, rctensor1(&shape[range]))?;
            Ok(())
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let fact = model.outlet_fact(inputs[0])?;
        let range = self.resolve(fact.rank() as i64);
        let shape = fact.shape.to_tvec();
        let wire = model.add_const(prefix, tensor1(&shape[range]))?;
        Ok(tvec!(wire))
    }
}


================================================
FILE: onnx/src/ops/array/slice.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn slice(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let v = ctx.onnx_operator_set_version;
    if (1..10).contains(&v) { slice1(ctx, node) } else { slice10(ctx, node) }
}

fn slice1(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axes = node.get_attr_opt_vec("axes")?;
    let begin = node.get_attr_vec("starts")?;
    let end = node.get_attr_vec("ends")?;
    Ok((expand(Slice1::new(axes, begin, end)), vec![]))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Slice1 {
    axes: Option<Vec<usize>>,
    starts: Vec<i64>,
    ends: Vec<i64>,
}

impl Expansion for Slice1 {
    fn name(&self) -> StaticName {
        "Slice1".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        if self.axes.is_none() {
            s.equals(&inputs[0].rank, self.starts.len() as i64)?;
            s.equals(&inputs[0].rank, self.ends.len() as i64)?;
        }
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.given(&inputs[0].shape, move |s, shape| {
            (0..shape.len()).try_for_each(move |axis| {
                let d = &shape[axis];
                let spec = if let Some(axes) = self.axes.as_ref() {
                    axes.iter().position(|&a| a == axis).map(|ix| (self.starts[ix], self.ends[ix]))
                } else {
                    Some((self.starts[axis], self.ends[axis]))
                };
                if let Some((mut b, mut e)) = spec {
                    if let Ok(d) = d.to_i64() {
                        if b > d {
                            b = d;
                        }
                        if e > d {
                            e = d;
                        }
                    }
                    let b = if b < 0 { d.bex() + TDim::from(b) } else { TDim::from(b).bex() };
                    let e = if e < 0 { d.bex() + TDim::from(e) } else { TDim::from(e).bex() };
                    s.equals(&outputs[0].shape[axis], e - b)
                } else {
                    s.equals(&outputs[0].shape[axis], &shape[axis])
                }
            })
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = target.outlet_fact(inputs[0])?.clone();
        let mut wire = inputs[0];
        for (ix, (&b, &e)) in self.starts.iter().zip(self.ends.iter()).enumerate() {
            let axis = self.axes.as_ref().map(|axes| axes[ix]).unwrap_or(ix);
            let dim = &input.shape[axis];
            if let Ok(dim) = dim.to_i64() {
                let b = (if b >= 0 { b.min(dim) } else { dim + b }) as usize;
                let e = (if e >= 0 { e.min(dim) } else { dim + e }) as usize;
                if b > 0 || e < dim as usize {
                    wire = target.wire_node(
                        format!("{prefix}.axis-{axis}"),
                        tract_hir::ops::array::Slice::new(axis, b, e),
                        [wire].as_ref(),
                    )?[0];
                }
            } else {
                bail!("Can't translate slice: axis={} dim={} b={} e={}", axis, dim, b, e)
            }
        }
        target.rename_node(wire.node, prefix)?;
        Ok(tvec!(wire))
    }
}

fn slice10(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut optional_inputs = crate::model::optional_inputs(node).skip(3);
    Ok((
        Box::new(tract_hir::ops::array::StridedSlice {
            begin_mask: 0,
            end_mask: 0,
            shrink_axis_mask: 0,
            optional_axes_input: optional_inputs.next().unwrap(),
            optional_steps_input: optional_inputs.next().unwrap(),
        }),
        vec![],
    ))
}


================================================
FILE: onnx/src/ops/array/split.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::array;

use crate::model::ParsingContext;
use crate::pb::*;

pub fn split(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(0);
    if ctx.onnx_operator_set_version < 13 || node.input.len() == 1 {
        let split = node.get_attr_opt_vec("split")?;
        Ok((expand(array::Split::new(axis, node.output.len(), split)), vec![]))
    } else {
        Ok((expand(Split13 { axis, outputs: node.output.len() }), vec![]))
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct Split13 {
    axis: isize,
    outputs: usize,
}

impl Expansion for Split13 {
    fn name(&self) -> StaticName {
        "Split13".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        for o in outputs {
            s.equals(&inputs[0].rank, &o.rank)?;
            s.equals(&inputs[0].datum_type, &o.datum_type)?;
        }
        s.given(&inputs[0].rank, move |s, rank| {
            let axis = (self.axis + if self.axis < 0 { rank as isize } else { 0 }) as usize;
            for a in 0..rank as usize {
                if a != axis {
                    for o in outputs {
                        s.equals(&inputs[0].shape[a], &o.shape[a])?;
                    }
                }
            }
            Ok(())
        })?;
        s.given_2(&inputs[0].shape, &inputs[1].value, move |s, shape, splits| {
            let splits = splits.cast_to::<TDim>()?;
            let splits = splits.try_as_plain()?.as_slice::<TDim>()?;
            let axis = self.axis + if self.axis < 0 { shape.len() as isize } else { 0 };
            for (o, dim) in outputs.iter().zip(splits.iter()) {
                s.equals(&o.shape[axis as usize], dim)?;
            }
            Ok(())
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(splits) = model.outlet_fact(inputs[1])?.konst.as_ref() {
            let axis = self.axis
                + if self.axis < 0 { model.outlet_fact(inputs[0])?.rank() as isize } else { 0 };
            let splits = splits.cast_to::<i64>()?;
            let splits = splits
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|i| *i as usize)
                .collect::<Vec<_>>();
            let op = tract_hir::ops::array::Split::new(axis, splits.len(), Some(splits));
            return op.wire(prefix, model, &inputs[0..1]);
        }
        bail!("Need splits to be a constant and explicit (constant integers)")
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(self.outputs)
    }
}


================================================
FILE: onnx/src/ops/array/squeeze.rs
================================================
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::pb::*;

pub fn squeeze(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    if ctx.onnx_operator_set_version < 13 {
        let axes = node.get_attr_vec::<i64>("axes")?.into_iter().map(|x| x as isize).collect();
        Ok((expand(tract_hir::ops::array::Squeeze::new(Some(axes))), vec![]))
    } else {
        Ok((expand(Squeeze13), vec![]))
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct Squeeze13;

impl Expansion for Squeeze13 {
    fn name(&self) -> StaticName {
        "Squeeze13".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        if inputs.len() == 2 {
            s.given_2(&inputs[0].shape, &inputs[1].value, move |s, shape, axes| {
                let axes = axes
                    .cast_to::<i64>()?
                    .try_as_plain()?
                    .as_slice::<i64>()?
                    .iter()
                    .map(|i| *i as isize)
                    .collect();
                let op = tract_hir::ops::array::Squeeze::new(Some(axes));
                let out_shape = op.output_shape(&shape)?;
                s.equals(&outputs[0].shape, out_shape)
            })
        } else {
            s.given(&inputs[0].shape, move |s, shape| {
                let axes = shape
                    .iter()
                    .enumerate()
                    .filter(|(_, dim)| dim.is_one())
                    .map(|(pos, _)| pos as isize)
                    .collect();
                let op = tract_hir::ops::array::Squeeze::new(Some(axes));
                let out_shape = op.output_shape(&shape)?;
                s.equals(&outputs[0].shape, out_shape)
            })
        }
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if inputs.len() == 2 {
            if let Some(axes) = model.outlet_fact(inputs[1])?.konst.as_ref() {
                let axes = axes
                    .cast_to::<i64>()?
                    .try_as_plain()?
                    .as_slice::<i64>()?
                    .iter()
                    .map(|i| *i as isize)
                    .collect();
                let op = tract_hir::ops::array::Squeeze::new(Some(axes));
                op.wire(prefix, model, &inputs[0..1])
            } else {
                bail!("Need axes to be a constant")
            }
        } else {
            let axes = model
                .outlet_fact(inputs[0])?
                .shape
                .iter()
                .enumerate()
                .filter(|(_, dim)| dim.is_one())
                .map(|(pos, _)| pos as isize)
                .collect();
            let op = tract_hir::ops::array::Squeeze::new(Some(axes));
            op.wire(prefix, model, &inputs[0..1])
        }
    }
}


================================================
FILE: onnx/src/ops/array/topk.rs
================================================
use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub fn topk(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(-1i64);
    let largest = node.get_attr_opt("largest")?.unwrap_or(1i64) == 1;
    Ok((expand(Topk { axis, largest }), vec![]))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
struct Topk {
    axis: i64,
    largest: bool,
}

impl Expansion for Topk {
    fn name(&self) -> StaticName {
        "Topk".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_input_arity(outputs, 2)?;

        solver.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        solver.equals(&outputs[1].datum_type, i64::datum_type())?;

        solver.equals(&inputs[0].rank, &outputs[0].rank)?;
        solver.equals(&inputs[0].rank, &outputs[1].rank)?;
        solver.equals(&inputs[1].rank, 1)?;

        solver.equals(&inputs[1].shape[0], 1.to_dim())?;

        solver.given(&inputs[0].rank, move |s, rank| {
            let axis = if self.axis >= 0 { self.axis } else { self.axis + rank } as usize;
            for ix in 0..rank as usize {
                if ix != axis {
                    s.equals(&inputs[0].shape[ix], &outputs[0].shape[ix])?;
                    s.equals(&inputs[0].shape[ix], &outputs[1].shape[ix])?;
                } else {
                    s.given(&inputs[1].value, move |s, k| {
                        if let Ok(k) = k
                            .cast_to::<TDim>()
                            .and_then(|t| t.try_as_plain()?.to_scalar::<TDim>().cloned())
                        {
                            s.equals(&outputs[0].shape[ix], k.clone())?;
                            s.equals(&outputs[1].shape[ix], k)?;
                        }
                        Ok(())
                    })?;
                }
            }
            Ok(())
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = model.outlet_fact(inputs[0])?;
        let rank = input.rank();
        let axis = if self.axis >= 0 { self.axis } else { self.axis + rank as i64 } as usize;
        let fallback_k = model.symbols.new_with_prefix("k").into();
        model.wire_node(
            prefix,
            tract_core::ops::array::Topk { axis, fallback_k, largest: self.largest },
            &[inputs[0], inputs[1]],
        )
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(2)
    }
}


================================================
FILE: onnx/src/ops/array/trilu.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn trilu(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let upper: i64 = node.get_attr_opt("upper")?.unwrap_or(1);
    let has_k = node.input.len() == 2;
    Ok((expand(Trilu { upper: upper == 1, has_k }), vec![]))
}

#[derive(Debug, Clone)]
struct Trilu {
    upper: bool,
    has_k: bool,
}

impl Expansion for Trilu {
    fn name(&self) -> StaticName {
        "Trilu".into()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let augmented_inputs = if self.has_k {
            inputs.into()
        } else {
            let k = model.add_const(format!("{prefix}.k"), tensor0(0i64))?;
            tvec!(inputs[0], k)
        };
        model.wire_node(
            prefix,
            tract_core::ops::array::Trilu { upper: self.upper },
            &augmented_inputs,
        )
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        solver: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1 + self.has_k as usize)?;
        check_output_arity(outputs, 1)?;
        solver.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        solver.equals(&inputs[0].shape, &outputs[0].shape)?;
        if self.has_k {
            solver.equals(&inputs[1].datum_type, i64::datum_type())?;
            solver.equals(&inputs[1].rank, 0)?;
        }
        Ok(())
    }
}


================================================
FILE: onnx/src/ops/array/unsqueeze.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::array;

use crate::model::ParsingContext;
use crate::pb::*;

pub fn unsqueeze(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    if ctx.onnx_operator_set_version < 13 {
        let axes = node.get_attr_vec::<i64>("axes")?.into_iter().map(|x| x as isize).collect();
        Ok((expand(array::AddDims::new(axes)), vec![]))
    } else {
        Ok((expand(Unsqueeze13), vec![]))
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct Unsqueeze13;

impl Expansion for Unsqueeze13 {
    fn name(&self) -> StaticName {
        "Unsqueeze13".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.given_2(&inputs[0].shape, &inputs[1].value, move |s, shape, axes| {
            let axes = axes
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|i| *i as isize)
                .collect();
            let op = tract_hir::ops::array::AddDims::new(axes);
            let out_shape = op.output_shape(&shape);
            s.equals(&outputs[0].shape, out_shape)
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(axes) = model.outlet_fact(inputs[1])?.konst.as_ref() {
            let axes = axes
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|i| *i as isize)
                .collect();
            let op = tract_hir::ops::array::AddDims::new(axes);
            op.wire(prefix, model, &inputs[0..1])
        } else {
            bail!("Need axes to be a constant")
        }
    }
}


================================================
FILE: onnx/src/ops/cast.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops::identity::Identity;
use tract_hir::tract_core::ops::element_wise::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("Cast", cast);
    reg.insert("CastLike", cast_like);
}

fn cast(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut to = node.get_attr::<DatumType>("to")?;
    if to == i64::datum_type() {
        to = TDim::datum_type();
    }
    Ok((ElementWiseOp(Box::new(Cast::new(to)), None).into_hir(), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Cast {
    to: DatumType,
}

impl ElementWiseMiniOp for Cast {
    fn name(&self) -> String {
        "onnx.Cast".into()
    }

    fn output_type(&self, _input_type: DatumType) -> Option<DatumType> {
        Some(self.to)
    }

    fn eval_out_of_place(&self, t: &Tensor, _out_dt: Option<DatumType>) -> TractResult<Tensor> {
        if t.datum_type() == String::datum_type() && self.to == f32::datum_type() {
            unsafe {
                let mut output = Tensor::uninitialized::<f32>(t.shape())?;
                let output_slice = output.as_slice_mut_unchecked();
                let input = t.as_slice_unchecked::<String>();
                for i in 0..input.len() {
                    output_slice[i] = match &*input[i] {
                        "-INF" => f32::NEG_INFINITY,
                        "INF" | "+INF" => f32::INFINITY,
                        v => v.parse()?,
                    };
                }
                Ok(output)
            }
        } else {
            tract_hir::ops::cast::cast(self.to)
                .eval_with_session(
                    usize::MAX,
                    &TurnState::default(),
                    tvec!(t.clone().into_tvalue()),
                )
                .map(|mut t| t.remove(0).into_tensor())
        }
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let from = model.outlet_fact(node.inputs[0])?.datum_type;
        if from == self.to {
            Ok(Some(TypedModelPatch::replace_single_op(model, node, &node.inputs, Identity)?))
        } else if from == String::datum_type() && self.to == f32::datum_type() {
            Ok(None)
        } else {
            Ok(Some(TypedModelPatch::replace_single_op(
                model,
                node,
                &node.inputs,
                tract_hir::ops::cast::cast(self.to),
            )?))
        }
    }
}

fn cast_like(
    _ctx: &ParsingContext,
    _node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    Ok((expand(CastLike), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct CastLike;

impl Expansion for CastLike {
    fn name(&self) -> StaticName {
        "CastLike".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        s.equals(&outputs[0].shape, &inputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let dt = model.outlet_fact(inputs[1])?.datum_type;
        model.wire_node(prefix, tract_core::ops::cast::cast(dt), &[inputs[0]])
    }
}


================================================
FILE: onnx/src/ops/cumsum.rs
================================================
use tract_hir::internal::*;
use tract_hir::tract_core::ops::scan::ScanInfo;

use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("CumSum", cumsum);
}

fn cumsum(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let reverse = node.get_attr_opt::<i64>("reverse")? == Some(1);
    let exclusive = node.get_attr_opt::<i64>("exclusive")? == Some(1);
    Ok((expand(CumSum { reverse, exclusive }), vec![]))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct CumSum {
    pub reverse: bool,
    pub exclusive: bool,
}

impl Expansion for CumSum {
    fn name(&self) -> StaticName {
        "CumSum".into()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_core::ops::scan;
        let axis =
            model.outlet_fact(inputs[1])?.konst.as_ref().context("Axis expected to be a const")?;
        let axis = axis.cast_to_scalar::<i64>()?;
        let data = model.outlet_fact(inputs[0])?.clone();
        let mut var_shape = data.shape.clone();
        let axis = if axis < 0 { (axis + data.rank() as i64) as usize } else { axis as usize };
        let zero = model.add_const(
            format!("{prefix}.zero"),
            Tensor::zero_dt(data.datum_type, &[])?.into_arc_tensor(),
        )?;
        var_shape.set(axis, 1.to_dim());
        let init = model.wire_node(
            format!("{prefix}.init"),
            tract_core::ops::array::MultiBroadcastTo::new(var_shape.clone()),
            &[zero],
        )?[0];
        let chunk = if self.reverse { -1 } else { 1 };
        let input_mapping =
            vec![scan::InputMapping::Scan(ScanInfo { axis, chunk }), scan::InputMapping::State];
        // outputs will be
        // acc + x (!exclusive)
        // acc input (exclusive)
        let output_mapping = vec![
            scan::OutputMapping {
                scan: Some((0, ScanInfo { axis, chunk })),
                full_dim_hint: None,
                last_value_slot: None,
                state: true,
            },
            scan::OutputMapping {
                scan: Some((1, ScanInfo { axis, chunk })),
                full_dim_hint: None,
                last_value_slot: None,
                state: false,
            },
        ];
        let mut body = TypedModel { symbols: model.symbols.clone(), ..TypedModel::default() };
        let var_fact = data.datum_type.fact(var_shape);
        let x = body.add_source("scan_input", var_fact.clone())?;
        let acc = body.add_source("acc_input", var_fact)?;
        let sum = body.wire_node("add", tract_core::ops::math::add(), &[x, acc])?[0];
        body.select_output_outlets(&[sum, acc])?;
        let scan = scan::Scan::new(body, input_mapping, output_mapping, 0)?;
        let wires = model.wire_node(prefix, scan, &[inputs[0], init])?;
        let output = wires[self.exclusive as usize];
        Ok(tvec![output])
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals(&inputs[1].rank, 0)?;
        Ok(())
    }
}


================================================
FILE: onnx/src/ops/d2s.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("DepthToSpace", depth_to_space);
}

pub fn depth_to_space(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let blocksize = node.get_attr_opt("blocksize")?.unwrap_or(2);
    let mode = depth_to_space_mode(node)?;
    Ok((expand(DepthToSpace { blocksize, mode }), vec![]))
}

pub fn depth_to_space_mode(node: &NodeProto) -> TractResult<DepthToSpaceMode> {
    let mode = match node.get_attr_opt("mode")? {
        None => None,
        Some(mode) => node.check_value(
            "mode",
            match mode {
                "DCR" => Ok(Some(DepthToSpaceMode::Dcr)),
                "CRD" => Ok(Some(DepthToSpaceMode::Crd)),
                _ => Err(mode),
            },
        )?,
    }
    .unwrap_or(DepthToSpaceMode::Dcr);
    Ok(mode)
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum DepthToSpaceMode {
    Dcr,
    Crd,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct DepthToSpace {
    blocksize: usize,
    mode: DepthToSpaceMode,
}

impl DepthToSpace {
    pub fn compute_shape(&self, shape: &[TDim]) -> TVec<TDim> {
        tvec!(
            shape[0].clone(),
            shape[1].clone() / (self.blocksize * self.blocksize),
            shape[2].clone() * self.blocksize,
            shape[3].clone() * self.blocksize,
        )
    }

    pub fn to_axis_ops(&self, shape: &[TDim]) -> TractResult<TVec<AxisOp>> {
        let mut stack: TVec<AxisOp> = tvec!();

        let ishape_from = tvec!(shape[1].clone());
        let mut ishape_to = tvec!(
            self.blocksize.into(),
            self.blocksize.into(),
            shape[1].clone() / (self.blocksize * self.blocksize)
        );

        let oshape_from =
            tvec!(shape[2].clone(), self.blocksize.into(), shape[3].clone(), self.blocksize.into());
        let oshape_to = tvec!(shape[2].clone() * self.blocksize, shape[3].clone() * self.blocksize);

        match self.mode {
            DepthToSpaceMode::Dcr => {
                stack.push(AxisOp::Reshape(1, ishape_from, ishape_to));
                stack.push(AxisOp::Move(2, 5));
                stack.push(AxisOp::Move(1, 3));
                stack.push(AxisOp::Reshape(2, oshape_from, oshape_to));
            }
            DepthToSpaceMode::Crd => {
                ishape_to.reverse();
                stack.push(AxisOp::Reshape(1, ishape_from, ishape_to));
                stack.push(AxisOp::Move(3, 5));
                stack.push(AxisOp::Move(2, 3));
                stack.push(AxisOp::Reshape(2, oshape_from, oshape_to));
            }
        };

        Ok(stack)
    }
}

impl Expansion for DepthToSpace {
    fn name(&self) -> StaticName {
        "DepthToSpace".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].rank, 4)?;
        s.equals(&outputs[0].rank, 4)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.given(&inputs[0].shape, move |s, ishape| {
            let oshape = self.compute_shape(&ishape);
            s.equals(&outputs[0].shape, ShapeFactoid::from(oshape))
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let ishape = model.outlet_fact(inputs[0])?.shape.to_tvec();
        let idepth = ishape[1].to_usize()?;

        if idepth % (self.blocksize * self.blocksize) != 0 {
            bail!("DepthToSpace requires input depth to be a multiple of (blocksize * bloksize)")
        }
        let mut wire = tvec!(inputs[0]);
        for (ix, op) in self.to_axis_ops(&ishape)?.into_iter().enumerate() {
            wire = model.wire_node(format!("{prefix}.{ix}"), op, &wire)?;
        }
        Ok(wire)
    }
}


================================================
FILE: onnx/src/ops/einsum.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn einsum(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let expr = node.get_attr::<String>("equation")?;
    let expr: AxesMapping = expr.replace("...", "*").parse()?;
    Ok((expand(EinSum { expr }), vec![]))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct EinSum {
    pub expr: AxesMapping,
}

impl Expansion for EinSum {
    fn name(&self) -> StaticName {
        "EinSum".into()
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let ranks = inputs
            .iter()
            .map(|o| model.outlet_fact(*o).map(|f| f.rank()))
            .collect::<TractResult<TVec<_>>>()?;
        let expr = resolve_ellipsis(&self.expr, &ranks)?;
        let operating_dt = model.outlet_fact(inputs[0])?.datum_type;
        model.wire_node(
            prefix,
            tract_core::ops::einsum::EinSum { axes: expr, operating_dt, q_params: None },
            inputs,
        )
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, self.expr.input_count())?;
        check_output_arity(outputs, 1)?;
        for (ix, input) in inputs.iter().enumerate() {
            s.equals(&input.datum_type, &outputs[0].datum_type)?;
            // if no elipsis in input spec then rank is known
            // onnx specifies that all ellipsis usage must have the same rank, but this rule is
            // broken by pytorch exporter which assumes numpy convention
            if !self.expr.iter_all_axes().any(|axis| axis.repr == '*' && axis.inputs[ix].len() == 1)
            {
                let rank =
                    self.expr.iter_all_axes().map(|axis| axis.inputs[ix].len()).sum::<usize>();
                s.equals(rank as i64, &input.rank)?;
            }
        }

        let ranks: Vec<_> = inputs.iter().map(|i| &i.rank).collect();
        s.given_all(ranks, move |s, ranks| {
            let ranks = ranks.iter().map(|r| *r as usize).collect::<TVec<_>>();
            let expr = resolve_ellipsis(&self.expr, &ranks)?;
            s.equals(&outputs[0].rank, expr.rank(InOut::Out(0)) as i64)?;
            for axis in expr.iter_all_axes() {
                let mut axes = vec![];
                if let Some(result) = axis.outputs[0].first() {
                    axes.push(outputs[0].shape[*result].bex())
                }
                for (input_id, input_axis_positions) in axis.inputs.iter().enumerate() {
                    for position in input_axis_positions {
                        axes.push(inputs[input_id].shape[*position].bex());
                    }
                }
                s.equals_all(axes)?;
            }
            Ok(())
        })
    }
}

fn resolve_ellipsis(expr: &AxesMapping, ranks: &[usize]) -> TractResult<AxesMapping> {
    if expr.axis('*').is_err() {
        return Ok(expr.clone());
    }
    let elipsed_axes: TVec<usize> = ranks
        .iter()
        .enumerate()
        .filter_map(|(ix, rank)| {
            if expr.axis_positions(InOut::In(ix), '*').is_ok() {
                Some(rank + 1 - expr.rank(InOut::In(ix)))
            } else {
                None
            }
        })
        .collect();
    let max_axes = *elipsed_axes.iter().max().unwrap();
    let axis_resolved: String = ('a'..)
        .filter(|l| expr.iter_all_axes().all(|axis| *l != axis.repr))
        .take(max_axes)
        .collect();
    //let mut resolved = expr.iter_all_axes().filter(|axis| axis.repr != '*').collect();
    // lol.
    let mut resolved = expr.to_string();
    for axes in elipsed_axes {
        resolved = resolved.replacen(
            '*',
            &axis_resolved.chars().skip(max_axes - axes).collect::<String>(),
            1,
        );
    }
    // replace in output
    resolved = resolved.replacen('*', &axis_resolved, 1);
    resolved.parse()
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_resolve_ellipsis_0() {
        assert_eq!(
            resolve_ellipsis(&"*ii->*i".parse().unwrap(), &[6]).unwrap().to_string(),
            "abcdii->abcdi"
        )
    }

    #[test]
    fn test_resolve_ellipsis_1() {
        assert_eq!(
            resolve_ellipsis(&"*mk,*kn->*mn".parse().unwrap(), &[4, 4]).unwrap().to_string(),
            "abmk,abkn->abmn"
        )
    }

    #[test]
    fn test_resolve_ellipsis_2() {
        assert_eq!(
            resolve_ellipsis(&"*ab,*bc->*ac".parse().unwrap(), &[4, 4]).unwrap().to_string(),
            "deab,debc->deac"
        )
    }

    #[test]
    fn test_resolve_numpy_ellipsis_1() -> TractResult<()> {
        let expr: AxesMapping = "*gi,*gih->*gh".parse()?;
        let resolved = resolve_ellipsis(&expr, &[4, 3])?;
        assert_eq!(resolved, "abgi,gih->abgh".parse().unwrap());
        Ok(())
    }
}


================================================
FILE: onnx/src/ops/fft.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_hir::internal::*;
use tract_hir::ops::array::Pad;
use tract_hir::ops::cast::cast;
use tract_hir::ops::math::div;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("DFT", dft);
    reg.insert("STFT", stft);
    reg.insert("MelWeightMatrix", mel_weight_matrix);
    reg.insert("BlackmanWindow", window);
    reg.insert("HammingWindow", window);
    reg.insert("HannWindow", window);
}

fn dft(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(1);
    let inverse = node.get_attr_opt("inverse")?.unwrap_or(0i64) != 0;
    let onesided = node.get_attr_opt("onesided")?.unwrap_or(0) != 0;
    if node.input.len() > 1 {
        bail!("length input is not implemented")
    }
    Ok((expand(Dft { axis, inverse, onesided, has_length_input: node.input.len() == 2 }), vec![]))
}

fn stft(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let onesided = node.get_attr_opt("onesided")?.unwrap_or(1) != 0;
    let mut options = crate::model::optional_inputs(node).skip(2);
    Ok((
        expand(Stft {
            onesided,
            optional_window_input: options.next().unwrap(),
            optional_frame_length_input: options.next().unwrap(),
        }),
        vec![],
    ))
}

fn mel_weight_matrix(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let datum_type = node.get_attr_opt("output_datatype")?.unwrap_or(DatumType::F32);
    Ok((expand(MelWeightMatrix { datum_type }), vec![]))
}

fn window(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let datum_type = node.get_attr_opt("output_datatype")?.unwrap_or(DatumType::F32);
    let periodic = node.get_attr_opt("periodic")?.unwrap_or(1i64) == 1i64;
    let window = match &*node.op_type {
        "BlackmanWindow" => StftWindowType::Blackman,
        "HammingWindow" => StftWindowType::Hamming,
        "HannWindow" => StftWindowType::Hann,
        _ => unreachable!(),
    };
    Ok((expand(StftWindow { datum_type, periodic, window }), vec![]))
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct Dft {
    axis: usize,
    inverse: bool,
    onesided: bool,
    has_length_input: bool,
}

impl Expansion for Dft {
    fn name(&self) -> StaticName {
        "DFT".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1 + self.has_length_input as usize)?;
        check_output_arity(outputs, 1)?;

        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        if self.has_length_input {
            s.equals(&inputs[1].rank, 0)?;
        }
        s.given(&inputs[0].rank, |s, rank| {
            for ax in 0..rank as usize - 1 {
                if ax != self.axis {
                    s.equals(&inputs[0].shape[ax], &outputs[0].shape[ax])?;
                }
            }
            s.equals(&outputs[0].shape[rank as usize - 1], 2.to_dim())?;
            Ok(())
        })?;
        if self.has_length_input {
            s.given(&inputs[1].value[0], |s, len| {
                s.equals(len.to_dim(), &outputs[0].shape[self.axis])
            })?;
        } else {
            s.equals(&inputs[0].shape[self.axis], &outputs[0].shape[self.axis])?;
        }
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let fact = model.outlet_fact(inputs[0])?.clone();
        let mut wire: TVec<OutletId> = inputs.into();
        if fact.shape.last() == Some(&1.to_dim()) {
            let mut pads = vec![(0, 0); fact.rank() - 1];
            pads.push((0, 1));
            wire = model.wire_node(
                format!("{prefix}.add_imaginary"),
                Pad { mode: tract_hir::ops::array::PadMode::Constant(rctensor0(0f32)), pads },
                &wire,
            )?;
        };
        wire = model.wire_node(
            format!("{prefix}.fft"),
            tract_core::ops::fft::Fft { axis: self.axis, inverse: self.inverse },
            &wire,
        )?;
        if self.inverse {
            let len = model.add_const(
                format!("{prefix}.len"),
                tensor0(fact.shape[self.axis].clone()).broadcast_into_rank(fact.rank())?,
            )?;
            let casted =
                model.wire_node(format!("{prefix}.cast"), cast(fact.datum_type), &[len])?;
            wire = model.wire_node(format!("{prefix}.norm"), div(), &[wire[0], casted[0]])?;
        }
        if self.onesided {
            let frame = fact.shape[self.axis].clone() / 2 + 1;
            wire = model.wire_node(
                format!("{prefix}.onesided"),
                tract_core::ops::array::Slice::new(2, 0, frame),
                &wire,
            )?;
        }
        Ok(wire)
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct Stft {
    onesided: bool,
    optional_window_input: Option<usize>,
    optional_frame_length_input: Option<usize>,
}

impl Expansion for Stft {
    fn name(&self) -> StaticName {
        "STFT".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(
            inputs,
            2 + self.optional_window_input.is_some() as usize
                + self.optional_frame_length_input.is_some() as usize,
        )?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, 3)?;
        s.equals(&outputs[0].rank, 4)?;
        s.equals(&outputs[0].shape[0], &inputs[0].shape[0])?;
        s.equals(&outputs[0].shape[3], 2.to_dim())?;
        let mut frame_len = None;
        let mut frame_len_2 = None; // ugly ! but exps are not clonable
        if let Some(l) = self.optional_frame_length_input {
            s.equals(&inputs[l].datum_type, i64::datum_type())?;
            s.equals(&inputs[l].rank, 0)?;
            frame_len = Some(inputs[l].value[0].bex().to_dim());
            frame_len_2 = Some(inputs[l].value[0].bex().to_dim());
        }
        if let Some(w) = self.optional_window_input {
            s.equals(&inputs[w].datum_type, &inputs[0].datum_type)?;
            s.equals(&inputs[w].rank, 1)?;
            frame_len = Some(inputs[w].shape[0].bex());
            frame_len_2 = Some(inputs[w].shape[0].bex());
        }
        if let (Some(w), Some(l)) = (self.optional_window_input, self.optional_frame_length_input) {
            s.equals(inputs[l].value[0].bex().to_dim(), &inputs[w].shape[0])?;
        }
        if let Some(frame_len) = frame_len {
            s.given_3(
                &inputs[0].shape[1],
                frame_len,
                &inputs[1].value[0],
                |s, signal, frame, stride| {
                    let frames = (signal - frame) / stride + 1;
                    s.equals(&outputs[0].shape[1], frames)?;
                    Ok(())
                },
            )?;
        }
        if let Some(frame_len) = frame_len_2 {
            s.given(frame_len, |s, frame_len| {
                let fst_len = if self.onesided { frame_len / 2 + 1 } else { frame_len };
                s.equals(&outputs[0].shape[2], fst_len)
            })?;
        }
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let fact = model.outlet_fact(inputs[0])?.clone();
        let mut wire: TVec<OutletId> = tvec!(inputs[0]);
        let (frame, window) = if let Some(w) = self.optional_window_input {
            let window = model
                .outlet_fact(inputs[w])?
                .konst
                .clone()
                .context("STFT expects a constant window")?;
            (window.len(), Some(window))
        } else if let Some(fl) = self.optional_frame_length_input {
            let frame = model
                .outlet_fact(inputs[fl])?
                .konst
                .as_ref()
                .context("STFT expects a constant frame length")?
                .cast_to_scalar::<i64>()? as usize;
            (frame, None)
        } else {
            bail!("Need window or frame len")
        };
        let stride = model
            .outlet_fact(inputs[1])?
            .konst
            .as_ref()
            .context("STFT expects a constant frame_step")?
            .cast_to_scalar::<i64>()? as usize;
        if fact.shape.last() == Some(&1.to_dim()) {
            let mut pads = vec![(0, 0); fact.rank() - 1];
            pads.push((0, 1));
            wire = model.wire_node(
                format!("{prefix}.add_imaginary"),
                Pad { mode: tract_hir::ops::array::PadMode::Constant(rctensor0(0f32)), pads },
                &wire,
            )?;
        };
        wire = model.wire_node(
            format!("{prefix}.fft"),
            tract_core::ops::fft::Stft { axis: 1, frame, window, stride },
            &wire,
        )?;
        if self.onesided {
            wire = model.wire_node(
                format!("{prefix}.onesided"),
                tract_core::ops::array::Slice::new(2, 0, frame / 2 + 1),
                &wire,
            )?;
        }
        Ok(wire)
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct MelWeightMatrix {
    datum_type: DatumType,
}

impl Expansion for MelWeightMatrix {
    fn name(&self) -> StaticName {
        "MelWeightMatrix".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 5)?;
        check_output_arity(outputs, 1)?;
        for input in inputs {
            s.equals(&input.rank, 0)?;
        }
        s.equals(&outputs[0].datum_type, self.datum_type)?;
        s.equals(&outputs[0].rank, 2)?;
        s.given(&inputs[1].value[0], |s, dft_length| {
            s.equals(&outputs[0].shape[0], (dft_length / 2 + 1).to_dim())
        })?;
        s.given(&inputs[0].value[0], |s, num_mel_bins| {
            s.equals(&outputs[0].shape[1], num_mel_bins.to_dim())
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let (
            Some(num_mel_bins),
            Some(dft_length),
            Some(sample_rate),
            Some(lower_edge_hertz),
            Some(upper_edge_hertz),
        ) = (
            model.outlet_fact(inputs[0])?.konst.as_ref(),
            model.outlet_fact(inputs[1])?.konst.as_ref(),
            model.outlet_fact(inputs[2])?.konst.as_ref(),
            model.outlet_fact(inputs[3])?.konst.as_ref(),
            model.outlet_fact(inputs[4])?.konst.as_ref(),
        )
        else {
            bail!("Expect all inputs to be constants")
        };
        let num_mel_bins = num_mel_bins.cast_to_scalar::<i64>()? as usize;
        let dft_length = dft_length.cast_to_scalar::<i64>()? as usize;
        let sample_rate = sample_rate.cast_to_scalar::<i64>()? as usize;
        let lower_edge_hertz = lower_edge_hertz.cast_to_scalar::<f32>()?;
        let upper_edge_hertz = upper_edge_hertz.cast_to_scalar::<f32>()?;

        let num_spectrogram_bins = dft_length / 2 + 1;
        let low_frequency_mel = 2595. * (1. + lower_edge_hertz / 700.).log10();
        let high_frequency_mel = 2595. * (1. + upper_edge_hertz / 700.).log10();
        let mel_step = (high_frequency_mel - low_frequency_mel) / (num_mel_bins + 2) as f32;

        let frequency_bins: Vec<usize> = (0..num_mel_bins + 2)
            .map(|ix| {
                let freq = ix as f32 * mel_step + low_frequency_mel;
                let freq = 700. * (10f32.powf(freq / 2596.) - 1.);
                let freq = ((dft_length + 1) as f32 * freq) / sample_rate as f32;
                freq as usize
            })
            .collect();

        let mut output = Tensor::zero::<f32>(&[num_spectrogram_bins, num_mel_bins])?;
        let mut output_plain = output.try_as_plain_mut()?;
        let mut view = output_plain.to_array_view_mut::<f32>()?.into_dimensionality()?;
        for i in 0..num_mel_bins {
            let lower = frequency_bins[i];
            let center = frequency_bins[i + 1];
            let higher = frequency_bins[i + 2];
            if center == lower {
                view[(center, i)] = 1.;
            } else {
                for j in lower..center + 1 {
                    view[(j, i)] = (j - lower) as f32 / (center - lower) as f32;
                }
            }
            if higher > center {
                for j in center..higher {
                    view[(j, i)] = (higher - j) as f32 / (higher - center) as f32;
                }
            }
        }
        let wire = model.add_const(prefix, output.cast_to_dt(self.datum_type)?.into_owned())?;
        Ok(tvec!(wire))
    }
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
enum StftWindowType {
    Blackman,
    Hamming,
    Hann,
}

impl StftWindowType {
    fn generate(&self, size: usize, periodic: bool) -> TractResult<Tensor> {
        use std::f32::consts::PI;
        let divisor = ((size - 1 + periodic as usize) as f32).recip();
        let mut output = Tensor::zero::<f32>(&[size])?;
        match self {
            Self::Blackman => {
                output.try_as_plain_mut()?.as_slice_mut::<f32>()?.iter_mut().enumerate().for_each(
                    |(ix, y)| {
                        *y = 0.42 - 0.5 * (2. * PI * ix as f32 * divisor).cos()
                            + 0.08 * (4. * PI * ix as f32 * divisor).cos()
                    },
                )
            }
            Self::Hamming => {
                output.try_as_plain_mut()?.as_slice_mut::<f32>()?.iter_mut().enumerate().for_each(
                    |(ix, y)| {
                        *y = (25. / 46.) - (21. / 46.) * (2. * PI * ix as f32 * divisor).cos()
                    },
                )
            }
            Self::Hann => output
                .try_as_plain_mut()?
                .as_slice_mut::<f32>()?
                .iter_mut()
                .enumerate()
                .for_each(|(ix, y)| *y = 0.5 - 0.5 * (2. * PI * ix as f32 * divisor).cos()),
        }
        Ok(output)
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct StftWindow {
    datum_type: DatumType,
    periodic: bool,
    window: StftWindowType,
}

impl Expansion for StftWindow {
    fn name(&self) -> StaticName {
        format!("StftWindow<{:?}>", self.window).into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].rank, 0)?;
        s.equals(&outputs[0].datum_type, self.datum_type)?;
        s.equals(&outputs[0].rank, 1)?;
        s.given(&inputs[0].value[0], |s, length| s.equals(&outputs[0].shape[0], length.to_dim()))
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let len = model
            .outlet_fact(inputs[0])?
            .konst
            .as_ref()
            .context("Expect constant input size")?
            .cast_to_scalar::<i64>()? as usize;
        let window =
            self.window.generate(len, self.periodic)?.cast_to_dt(self.datum_type)?.into_owned();
        let wire = model.add_const(prefix, window)?;
        Ok(tvec!(wire))
    }
}


================================================
FILE: onnx/src/ops/grid_sample.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_onnx_opl::grid_sample::{InterpolationMode, PaddingMode};

pub fn grid_sample(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mode = match node.get_attr_opt("mode")?.unwrap_or("linear") {
        "bilinear" | "linear" => InterpolationMode::Bilinear,
        "nearest" => InterpolationMode::Nearest,
        "bicubic" | "cubic" => InterpolationMode::Bicubic,
        s => bail!("Unsupported GridSample mode: {}", s),
    };
    let padding_mode = match node.get_attr_opt("padding_mode")?.unwrap_or("zeros") {
        "zeros" => PaddingMode::Zeros,
        "border" => PaddingMode::Border,
        "reflection" => PaddingMode::Reflection,
        s => bail!("Unsupported GridSample padding_mode: {}", s),
    };
    let align_corners = node.get_attr_opt::<i64>("align_corners")?.unwrap_or(0) != 0;

    match ctx.onnx_operator_set_version {
        16.. => {}
        v => bail!("Unsupported operator set for GridSample operator ({v})"),
    }

    Ok((expand(GridSampleInference { mode, padding_mode, align_corners }), vec![]))
}

#[derive(Clone, Debug)]
struct GridSampleInference {
    mode: InterpolationMode,
    padding_mode: PaddingMode,
    align_corners: bool,
}

impl Expansion for GridSampleInference {
    fn name(&self) -> StaticName {
        "GridSample".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;

        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;

        s.equals(&inputs[0].rank, &inputs[1].rank)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;

        s.equals(&inputs[0].shape[0], &inputs[1].shape[0])?;
        s.equals(&inputs[0].shape[0], &outputs[0].shape[0])?;

        s.equals(&inputs[0].shape[1], &outputs[0].shape[1])?;

        s.given(&inputs[0].rank, move |s, rank| {
            let rank = rank as usize;
            let spatial_rank = rank - 2;
            for d in 0..spatial_rank {
                s.equals(&outputs[0].shape[2 + d], &inputs[1].shape[1 + d])?;
            }
            s.equals(&inputs[1].shape[rank - 1], (spatial_rank as i64).to_dim())?;
            Ok(())
        })
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        model.wire_node(
            name,
            tract_onnx_opl::grid_sample::GridSample {
                mode: self.mode.clone(),
                padding_mode: self.padding_mode.clone(),
                align_corners: self.align_corners,
            },
            inputs,
        )
    }
}


================================================
FILE: onnx/src/ops/logic.rs
================================================
use crate::model::OnnxOpRegister;
use crate::model::ParseResult;
use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_core::ops;
use tract_hir::internal::*;
use tract_hir::ops::binary::BinIntoHir;
use tract_itertools::Itertools;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("Not", |_, _| Ok((ops::logic::not().into_hir(), vec![])));
    reg.insert("And", |_, _| Ok((ops::logic::And.into_hir(), vec![])));
    reg.insert("Or", |_, _| Ok((ops::logic::Or.into_hir(), vec![])));
    reg.insert("Xor", |_, _| Ok((ops::logic::Xor.into_hir(), vec![])));

    reg.insert("Equal", |_, _| Ok((ops::logic::CompEq.into_hir(), vec![])));
    reg.insert("Greater", |_, _| Ok((ops::logic::CompGT.into_hir(), vec![])));
    reg.insert("Less", |_, _| Ok((ops::logic::CompLT.into_hir(), vec![])));
    reg.insert("LessOrEqual", |_, _| Ok((ops::logic::CompLTE.into_hir(), vec![])));
    reg.insert("GreaterOrEqual", |_, _| Ok((ops::logic::CompGTE.into_hir(), vec![])));

    reg.insert("Where", |_, _| Ok((expand(tract_hir::ops::logic::Iff), vec![])));

    reg.insert("If", _if)
}

pub fn _if(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let graph_then = node.get_attr("then_branch")?;
    let graph_else = node.get_attr("else_branch")?;
    let ParseResult { model: then_body, unresolved_inputs: unresolved_inputs_then, .. } =
        ctx.parse_graph(graph_then)?;
    let ParseResult { model: else_body, unresolved_inputs: unresolved_inputs_else, .. } =
        ctx.parse_graph(graph_else)?;
    let unresolved_inputs: Vec<String> = unresolved_inputs_then
        .iter()
        .chain(unresolved_inputs_else.iter())
        .sorted()
        .unique()
        .cloned()
        .collect();
    let then_input_mapping = unresolved_inputs_then
        .iter()
        .map(|i| unresolved_inputs.iter().position(|s| s == i).unwrap() + 1)
        .collect();
    let else_input_mapping = unresolved_inputs_else
        .iter()
        .map(|i| unresolved_inputs.iter().position(|s| s == i).unwrap() + 1)
        .collect();
    Ok((
        Box::new(If { then_body, then_input_mapping, else_body, else_input_mapping }),
        unresolved_inputs,
    ))
}

#[derive(Debug, Clone, new)]
pub struct If {
    pub then_body: InferenceModel,
    then_input_mapping: Vec<usize>,
    pub else_body: InferenceModel,
    else_input_mapping: Vec<usize>,
}
impl PartialEq for If {
    fn eq(&self, _: &Self) -> bool {
        false
    }
}
impl Eq for If {}

impl Op for If {
    fn name(&self) -> StaticName {
        "If".into()
    }

    not_a_typed_op!();
}

impl EvalOp for If {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let cond = inputs[0].cast_to_scalar::<bool>()?;
        let (input_mapping, body) = if cond {
            (&self.then_input_mapping, &self.then_body)
        } else {
            (&self.else_input_mapping, &self.else_body)
        };
        let inputs: TVec<TValue> = input_mapping.iter().map(|&ix| inputs[ix].clone()).collect();
        body.clone().into_runnable()?.run(inputs)
    }
}

impl InferenceOp for If {
    fn infer_facts(
        &mut self,
        inputs: TVec<&InferenceFact>,
        outputs: TVec<&InferenceFact>,
        observed: TVec<&InferenceFact>,
    ) -> TractResult<(TVec<InferenceFact>, TVec<InferenceFact>, TVec<InferenceFact>)> {
        let mut inputs: TVec<InferenceFact> = inputs.into_iter().cloned().collect();
        let mut outputs: TVec<InferenceFact> = outputs.into_iter().cloned().collect();
        loop {
            let mut changed = false;
            changed = changed || inputs[0].datum_type.unify_with(&bool::datum_type().into())?;
            for (body_ix, outer_ix) in self.then_input_mapping.iter().enumerate() {
                changed = changed
                    || self
                        .then_body
                        .input_fact_mut(body_ix)?
                        .unify_with_mut(&mut inputs[*outer_ix])?;
            }
            for (body_ix, outer_ix) in self.else_input_mapping.iter().enumerate() {
                changed = changed
                    || self
                        .else_body
                        .input_fact_mut(body_ix)?
                        .unify_with_mut(&mut inputs[*outer_ix])?;
            }
            if let Some(a) = inputs[0].value.concretize() {
                let a = a.cast_to_scalar()?;
                let body = if a { &mut self.then_body } else { &mut self.else_body };
                for oix in 0..body.output_outlets()?.len() {
                    changed =
                        changed || body.output_fact_mut(oix)?.unify_with_mut(&mut outputs[oix])?;
                }
            } else {
                for ix in 0..self.nboutputs()? {
                    changed = changed
                        || self
                            .then_body
                            .output_fact_mut(ix)?
                            .shape
                            .unify_with_mut(&mut outputs[ix].shape)?
                        || self
                            .else_body
                            .output_fact_mut(ix)?
                            .shape
                            .unify_with_mut(&mut outputs[ix].shape)?
                        || self
                            .then_body
                            .output_fact_mut(ix)?
                            .datum_type
                            .unify_with_mut(&mut outputs[ix].datum_type)?
                        || self
                            .else_body
                            .output_fact_mut(ix)?
                            .datum_type
                            .unify_with_mut(&mut outputs[ix].datum_type)?;
                }
            }
            changed = changed || self.then_body.analyse(false)?;
            changed = changed || self.else_body.analyse(false)?;
            if !changed {
                return Ok((inputs, outputs, observed.into_iter().cloned().collect()));
            }
        }
    }

    fn nboutputs(&self) -> TractResult<usize> {
        let then_outputs = self.then_body.outputs.len();
        let else_outputs = self.else_body.outputs.len();
        ensure!(
            then_outputs == else_outputs,
            "If Operators expect the `then_branch` {} and `else_branch` {} to produce the same number of outputs",
            then_outputs,
            else_outputs
        );
        Ok(then_outputs)
    }

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        let then_body = self.then_body.clone().into_typed()?;
        let else_body = self.else_body.clone().into_typed()?;
        let inputs: TVec<_> = node.inputs.iter().map(|o| mapping[o]).collect();
        let op = tract_core::ops::logic::IfThenElse {
            then_body,
            else_body,
            then_input_mapping: self.then_input_mapping.clone(),
            else_input_mapping: self.else_input_mapping.clone(),
        };
        target.wire_node(self.name(), op, &inputs)
    }

    as_op!();
}


================================================
FILE: onnx/src/ops/math/clip.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops::logic::wire_with_rank_broadcast;

pub fn clip(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    match ctx.onnx_operator_set_version {
        6..=10 => clip_6(ctx, node),
        v if v >= 10 => clip_11(ctx, node),
        _ => bail!("Unsupported operator set for Clip operator"),
    }
}

pub fn clip_6(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let min: Option<f32> = node.get_attr_opt("min")?;
    let max: Option<f32> = node.get_attr_opt("max")?;
    Ok((expand(tract_hir::ops::activations::Clip::new(min, max)), vec![]))
}

pub fn clip_11(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut options = crate::model::optional_inputs(node).skip(1);
    let op = Clip11::new(options.next().unwrap(), options.next().unwrap());
    Ok((expand(op), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Clip11 {
    input_min: Option<usize>,
    input_max: Option<usize>,
}

impl Expansion for Clip11 {
    fn name(&self) -> StaticName {
        "Clip".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(
            inputs,
            1 + self.input_min.is_some() as usize + self.input_max.is_some() as usize,
        )?;
        check_output_arity(outputs, 1)?;
        if let Some(input) = self.input_min {
            s.equals(&inputs[0].datum_type, &inputs[input].datum_type)?;
        }
        if let Some(input) = self.input_max {
            s.equals(&inputs[0].datum_type, &inputs[input].datum_type)?;
        }
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut wire = inputs[0];
        if let Some(min) = self.input_min {
            wire = wire_with_rank_broadcast(
                format!("{name}.min"),
                model,
                tract_hir::ops::math::max(),
                &[wire, inputs[min]],
            )?[0];
        }
        if let Some(max) = self.input_max {
            wire = wire_with_rank_broadcast(
                format!("{name}.max"),
                model,
                tract_hir::ops::math::min(),
                &[wire, inputs[max]],
            )?[0];
        }
        Ok(tvec!(wire))
    }
}


================================================
FILE: onnx/src/ops/math/gemm.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::tract_core::ops::einsum::EinSum;

pub fn gemm(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(1.);
    let beta = node.get_attr_opt("beta")?.unwrap_or(1.);
    let trans_a = node.get_attr_opt("transA")?.unwrap_or(false);
    let trans_b = node.get_attr_opt("transB")?.unwrap_or(false);
    Ok((expand(Gemm::new(alpha, beta, trans_a, trans_b)), vec![]))
}

#[derive(Debug, Clone, new)]
pub struct Gemm {
    alpha: f32,
    beta: f32,
    trans_a: bool,
    trans_b: bool,
}

impl Expansion for Gemm {
    fn name(&self) -> StaticName {
        "Gemm".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        if inputs.len() == 3 {
            s.equals(&inputs[2].datum_type, &outputs[0].datum_type)?;
        }
        s.equals(&inputs[0].rank, 2)?;
        s.equals(&inputs[1].rank, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].rank, 2)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[1].datum_type, &outputs[0].datum_type)?;
        let (ca, ra) = if self.trans_a { (0, 1) } else { (1, 0) };
        let (cb, rb) = if self.trans_b { (0, 1) } else { (1, 0) };
        s.equals(&inputs[0].shape[ra], &outputs[0].shape[0])?;
        s.equals(&inputs[0].shape[ca], &inputs[1].shape[rb])?;
        s.equals(&inputs[1].shape[cb], &outputs[0].shape[1])?;
        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let (a, b, c) = (inputs[0], inputs[1], inputs.get(2));
        let axes = AxesMapping::for_numpy_matmul(2, self.trans_a, self.trans_b, false)?;
        let mut wire = model.wire_node(
            format!("{name}.ab"),
            EinSum::new(axes, model.outlet_fact(a)?.datum_type),
            [a, b].as_ref(),
        )?[0];
        if self.alpha != 1.0 {
            let alpha = tensor0(self.alpha).broadcast_into_rank(model.outlet_fact(wire)?.rank())?;
            let alpha = model.add_const(name.to_string() + ".alpha_ab.cst", alpha)?;
            wire = model.wire_node(
                name.to_string() + ".alpha_ab",
                ops::math::mul(),
                &[alpha, wire],
            )?[0];
        }
        if self.beta != 0.0f32 && c.is_some() {
            let mut c = c.copied().unwrap();
            while model.outlet_fact(wire)?.rank() > model.outlet_fact(c)?.rank() {
                c = model.wire_node(
                    format!("{}.c_add_axis_{}", name, model.outlet_fact(c)?.rank()),
                    tract_hir::tract_core::ops::change_axes::AxisOp::Add(0),
                    &[c],
                )?[0];
            }
            let beta = tensor0(self.beta).broadcast_into_rank(model.outlet_fact(wire)?.rank())?;
            let beta = model.add_const(name.to_string() + ".beta_c.cst", beta)?;
            let beta_c =
                model.wire_node(name.to_string() + ".beta_c", ops::math::mul(), &[beta, c])?[0];
            wire = model.wire_node(name, ops::math::add(), &[wire, beta_c])?[0];
        }
        Ok(tvec!(wire))
    }
}


================================================
FILE: onnx/src/ops/math/mat_mul_integer.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn mat_mul_integer(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut options = crate::model::optional_inputs(node).skip(2);
    let op = MatMulInteger::new(options.next().unwrap(), options.next().unwrap());
    Ok((expand(op), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
struct MatMulInteger {
    pub optional_a_zero_point_input: Option<usize>,
    pub optional_b_zero_point_input: Option<usize>,
}

impl Expansion for MatMulInteger {
    fn name(&self) -> StaticName {
        "MatMulInteger".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(
            inputs,
            2 + self.optional_a_zero_point_input.is_some() as usize
                + self.optional_b_zero_point_input.is_some() as usize,
        )?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, i32::datum_type())?;
        if let Some(a_zp) = self.optional_a_zero_point_input {
            s.equals(&inputs[a_zp].datum_type, &inputs[0].datum_type)?
        }
        if let Some(b_zp) = self.optional_b_zero_point_input {
            s.equals(&inputs[b_zp].datum_type, &inputs[1].datum_type)?
        }
        s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, ashape, bshape| {
            let (_, _, cshape, _) =
                tract_hir::ops::matmul::compute_shapes(ashape, bshape, false, false, false)?;
            s.equals(&outputs[0].shape, cshape)
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut new_inputs =
            tract_hir::ops::binary::wire_rank_broadcast(prefix, target, &[inputs[0], inputs[1]])?;
        new_inputs.push(target.add_const(format!("{prefix}.bias"), tensor0(0i32))?);
        if let Some(o) = self.optional_a_zero_point_input {
            new_inputs.push(inputs[o]);
        } else {
            new_inputs.push(target.add_const(format!("{prefix}.a0"), tensor0(0i32))?);
        };
        new_inputs.push(target.add_const(format!("{prefix}.a_scale"), tensor0(1f32))?);
        if let Some(o) = self.optional_b_zero_point_input {
            new_inputs.push(inputs[o]);
        } else {
            new_inputs.push(target.add_const(format!("{prefix}.b0"), tensor0(0i32))?);
        };
        new_inputs.push(target.add_const(format!("{prefix}.b_scale"), tensor0(1f32))?);
        new_inputs.push(target.add_const(format!("{prefix}.c0"), tensor0(0i32))?);
        new_inputs.push(target.add_const(format!("{prefix}.c_scale"), tensor0(1f32))?);
        wire_as_einsum(prefix, target, &new_inputs, i32::datum_type())
    }
}

pub fn q_linear_mat_mul(
    _ctx: &ParsingContext,
    _node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    Ok((expand(QLinearMatMul), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
struct QLinearMatMul;

impl Expansion for QLinearMatMul {
    fn name(&self) -> StaticName {
        "QLinearMatMul".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 8)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &inputs[2].datum_type)?;
        s.equals(&inputs[3].datum_type, &inputs[5].datum_type)?;
        s.equals(&inputs[1].datum_type, f32::datum_type())?;
        s.equals(&inputs[4].datum_type, f32::datum_type())?;
        s.equals(&inputs[6].datum_type, f32::datum_type())?;
        s.equals(&outputs[0].datum_type, &inputs[7].datum_type)?;
        s.equals(&inputs[1].rank, &inputs[2].rank)?;
        s.equals(&inputs[4].rank, &inputs[5].rank)?;
        s.equals(&inputs[6].rank, &inputs[7].rank)?;
        s.given_2(&inputs[0].shape, &inputs[3].shape, move |s, ashape, bshape| {
            let (_, _, _, cshape) =
                tract_hir::ops::matmul::compute_shapes(ashape, bshape, false, false, false)?;
            s.equals(&outputs[0].shape, cshape)
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut new_inputs =
            tract_hir::ops::binary::wire_rank_broadcast(prefix, target, &[inputs[0], inputs[3]])?;
        new_inputs.push(target.add_const(format!("{prefix}.bias"), tensor0(0i32))?);
        for i in [2, 1, 5, 4, 7, 6] {
            new_inputs.push(inputs[i]);
        }
        wire_as_einsum(prefix, target, &new_inputs, target.outlet_fact(inputs[7])?.datum_type)
    }
}

fn wire_as_einsum(
    prefix: &str,
    target: &mut TypedModel,
    inputs: &[OutletId],
    output: DatumType,
) -> TractResult<TVec<OutletId>> {
    assert!(inputs.len() == 9);
    let rank = target.outlet_fact(inputs[0])?.rank();
    let ranks = inputs
        .iter()
        .map(|i| Ok(target.outlet_fact(*i)?.rank()))
        .collect::<TractResult<Vec<_>>>()?;
    let mut expr = AxesMapping::disconnected_for_ranks(&ranks, &ranks[0..1])?;
    expr = expr
        .renaming((InOut::In(0), rank - 2), 'm')?
        .linking('m', (InOut::Out(0), rank - 2))?
        .renaming((InOut::In(1), rank - 1), 'n')?
        .linking('n', (InOut::Out(0), rank - 1))?
        .renaming((InOut::In(0), rank - 1), 'k')?
        .linking('k', (InOut::In(1), rank - 2))?;
    for ax in 0..rank - 2 {
        expr = expr
            .linking((InOut::In(0), ax), (InOut::In(1), ax))?
            .linking((InOut::In(0), ax), (InOut::Out(0), ax))?;
    }
    if ranks[2] == 1 {
        expr = expr.linking('m', (InOut::In(2), 0))?;
    }
    if ranks[3] == 1 {
        expr = expr.linking('m', (InOut::In(3), 0))?;
    }
    if ranks[4] == 1 {
        expr = expr.linking('m', (InOut::In(4), 0))?;
    }
    if ranks[5] == 1 {
        expr = expr.linking('n', (InOut::In(5), 0))?;
    }
    if ranks[6] == 1 {
        expr = expr.linking('n', (InOut::In(6), 0))?;
    }
    if ranks[7] == 1 {
        expr = expr.linking('m', (InOut::In(7), 0))?;
    }
    if ranks[8] == 1 {
        expr = expr.linking('m', (InOut::In(8), 0))?;
    }
    let op = tract_core::ops::einsum::EinSum {
        axes: expr,
        operating_dt: i32::datum_type(),
        q_params: Some(output),
    };
    target.wire_node(prefix, op, inputs)
}


================================================
FILE: onnx/src/ops/math/pow.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn pow(
    _ctx: &ParsingContext,
    _node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    Ok((expand(Pow), vec![]))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Pow;

impl Expansion for Pow {
    fn name(&self) -> StaticName {
        "Pow".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;

        s.with(&inputs[0].shape, move |s, a_shape| {
            s.with(&inputs[1].shape, move |s, b_shape| {
                if let Ok(Some(c_shape)) =
                    tract_hir::infer::helpers::infer_shape_broadcasting(&[&a_shape, &b_shape])
                {
                    s.equals(&outputs[0].shape, c_shape)?;
                }
                Ok(())
            })
        })?;
        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use DatumType::*;
        let dta = model.outlet_fact(inputs[0])?.datum_type;
        let dtb = model.outlet_fact(inputs[1])?.datum_type;
        let mut wires = tract_hir::ops::binary::wire_rank_broadcast(name, model, inputs)?;
        if dta.is_integer() != dtb.is_integer() {
            wires = tract_hir::ops::binary::wire_cast(name, model, &wires, F64)?;
            wires = model.wire_node(format!("{name}.pow"), tract_hir::ops::math::pow(), &wires)?;
            model.wire_node(name, tract_hir::ops::cast::cast(dta), &wires)
        } else {
            let dt = dta.common_super_type(dtb).unwrap();
            wires = tract_hir::ops::binary::wire_cast(name, model, &wires, dt)?;
            model.wire_node(name, tract_hir::ops::math::pow(), &wires)
        }
    }
}


================================================
FILE: onnx/src/ops/math/rem.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_core::ops::binary::TypedBinOp;
use tract_core::ops::change_axes::wire_rank_broadcast;
use tract_core::ops::logic::{comp_gt, comp_lt};
use tract_hir::internal::*;
use tract_hir::ops;

pub fn rem(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    if node.get_attr_opt::<i64>("fmod")? == Some(1) {
        Ok((ops::math::Rem.into_hir(), vec![]))
    } else {
        Ok((expand(RemInt), vec![]))
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct RemInt;

impl Expansion for RemInt {
    fn name(&self) -> StaticName {
        "Remint".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        tract_hir::ops::binary::rules(s, inputs, outputs, move |a, b| {
            a.common_super_type(b).with_context(|| format!("No super type for {a:?} and {b:?}"))
        })
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let zero = tract_hir::ops::activations::broadcast_scalar(0.0, model, inputs)?;
        let a = model.outlet_fact(inputs[0])?.datum_type;
        let b = model.outlet_fact(inputs[1])?.datum_type;
        let dt =
            a.common_super_type(b).with_context(|| format!("No super type for {a:?} and {b:?}"))?;
        let wires = tract_hir::ops::binary::wire_rank_broadcast(name, model, inputs)?;
        let wires = tract_hir::ops::binary::wire_cast(name, model, &wires, dt)?;
        if dt.is_unsigned() || dt == DatumType::TDim {
            return model.wire_node(name, tract_hir::ops::math::rem(), &wires);
        }
        // from onnx runtime:
        // auto res = x % y;
        // if ((res < 0 && y > 0) || (res > 0 && y < 0)) { res += y; }
        let zero = model.add_const(name.to_string() + ".zero", zero)?;
        let rem =
            model.wire_node(name.to_string() + ".rem", tract_hir::ops::math::rem(), &wires)?[0];
        let rem_inputs = wire_rank_broadcast(name, model, &[zero, rem])?;
        let rem_is_neg = model.wire_node(
            name.to_string() + ".rem_is_neg",
            TypedBinOp(comp_gt(), None),
            &rem_inputs,
        )?;
        let rem_is_pos = model.wire_node(
            name.to_string() + ".rem_is_pos",
            TypedBinOp(comp_lt(), None),
            &rem_inputs,
        )?;
        let b_inputs = wire_rank_broadcast(name, model, &[zero, wires[1]])?;
        let b_is_neg = model.wire_node(
            name.to_string() + ".b_is_neg",
            TypedBinOp(comp_gt(), None),
            &b_inputs,
        )?;
        let b_is_pos = model.wire_node(
            name.to_string() + ".b_is_pos",
            TypedBinOp(comp_lt(), None),
            &b_inputs,
        )?;
        let rem_is_neg_b_is_pos = model.wire_node(
            name.to_string() + ".rem_is_neg_b_is_pos",
            tract_hir::ops::logic::and(),
            &[rem_is_neg[0], b_is_pos[0]],
        )?;
        let rem_is_pos_b_is_neg = model.wire_node(
            name.to_string() + ".rem_is_pos_b_is_neg",
            tract_hir::ops::logic::and(),
            &[rem_is_pos[0], b_is_neg[0]],
        )?;
        let adjust = model.wire_node(
            name.to_string() + ".adjust",
            tract_hir::ops::logic::or(),
            &[rem_is_pos_b_is_neg[0], rem_is_neg_b_is_pos[0]],
        )?;
        let adjusted = model.wire_node(
            name.to_string() + ".adjusted",
            tract_hir::ops::math::add(),
            &[rem, wires[1]],
        )?;
        model.wire_node(
            name.to_string(),
            tract_core::ops::logic::Iff,
            &[adjust[0], adjusted[0], rem],
        )
    }
}


================================================
FILE: onnx/src/ops/math.rs
================================================
use crate::model::OnnxOpRegister;
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::ops::binary::Nary;

mod clip;
mod gemm;
mod mat_mul_integer;
mod pow;
mod rem;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("Add", |_, _| Ok((ops::math::Add.into_hir(), vec![])));
    reg.insert("Sub", |_, _| Ok((ops::math::Sub.into_hir(), vec![])));
    reg.insert("Mul", |_, _| Ok((ops::math::Mul.into_hir(), vec![])));
    reg.insert("Div", |_, _| Ok((ops::math::Div.into_hir(), vec![])));
    reg.insert("Mod", rem::rem);

    reg.insert("BitShift", bitshift);
    reg.insert("BitwiseAnd", |_, _| Ok((ops::logic::BitAnd.into_hir(), vec![])));
    reg.insert("BitwiseOr", |_, _| Ok((ops::logic::BitOr.into_hir(), vec![])));
    reg.insert("BitwiseXor", |_, _| Ok((ops::logic::BitXor.into_hir(), vec![])));
    reg.insert("BitwiseNot", |_, _| Ok((ops::logic::bitnot().into_hir(), vec![])));

    reg.insert("Sum", |_, _| Ok((Box::new(Nary(Box::new(ops::math::Add), false)), vec![])));
    reg.insert("Max", |_, _| Ok((Box::new(Nary(Box::new(ops::math::Max), false)), vec![])));
    reg.insert("Min", |_, _| Ok((Box::new(Nary(Box::new(ops::math::Min), false)), vec![])));
    reg.insert("Mean", |_, _| Ok((Box::new(Nary(Box::new(ops::math::Add), true)), vec![])));

    reg.insert("Abs", |_, _| Ok((ops::math::abs().into_hir(), vec![])));
    reg.insert("Ceil", |_, _| Ok((ops::math::ceil().into_hir(), vec![])));
    reg.insert("Floor", |_, _| Ok((ops::math::floor().into_hir(), vec![])));
    reg.insert("Round", |_, _| Ok((ops::math::round_half_to_even().into_hir(), vec![])));
    reg.insert("Clip", clip::clip);

    reg.insert("Cos", |_, _| Ok((ops::math::cos().into_hir(), vec![])));
    reg.insert("Sin", |_, _| Ok((ops::math::sin().into_hir(), vec![])));
    reg.insert("Tan", |_, _| Ok((ops::math::tan().into_hir(), vec![])));
    reg.insert("Acos", |_, _| Ok((ops::math::acos().into_hir(), vec![])));
    reg.insert("Asin", |_, _| Ok((ops::math::asin().into_hir(), vec![])));
    reg.insert("Atan", |_, _| Ok((ops::math::atan().into_hir(), vec![])));

    reg.insert("Cosh", |_, _| Ok((ops::math::cosh().into_hir(), vec![])));
    reg.insert("Sinh", |_, _| Ok((ops::math::sinh().into_hir(), vec![])));
    reg.insert("Tanh", |_, _| Ok((ops::math::tanh().into_hir(), vec![])));
    reg.insert("Acosh", |_, _| Ok((ops::math::acosh().into_hir(), vec![])));
    reg.insert("Asinh", |_, _| Ok((ops::math::asinh().into_hir(), vec![])));
    reg.insert("Atanh", |_, _| Ok((ops::math::atanh().into_hir(), vec![])));

    reg.insert("Erf", |_, _| Ok((ops::math::erf().into_hir(), vec![])));
    reg.insert("Exp", |_, _| Ok((ops::math::exp().into_hir(), vec![])));
    reg.insert("Log", |_, _| Ok((ops::math::ln().into_hir(), vec![])));
    reg.insert("Sqrt", |_, _| Ok((ops::math::sqrt().into_hir(), vec![])));
    reg.insert("Rsqrt", |_, _| Ok((ops::math::rsqrt().into_hir(), vec![])));

    reg.insert("IsNaN", |_, _| Ok((tract_core::ops::math::is_nan().into_hir(), vec![])));
    reg.insert("IsInf", isinf);
    reg.insert("Neg", |_, _| Ok((ops::math::neg().into_hir(), vec![])));
    reg.insert("Sign", |_, _| Ok((ops::math::sign().into_hir(), vec![])));
    reg.insert("Reciprocal", |_, _| Ok((ops::math::recip().into_hir(), vec![])));

    reg.insert("Pow", pow::pow);

    reg.insert("MatMul", |_, _| Ok((expand(ops::matmul::MatMulInference::default()), vec![])));
    reg.insert("MatMulInteger", mat_mul_integer::mat_mul_integer);
    reg.insert("QLinearMatMul", mat_mul_integer::q_linear_mat_mul);
    reg.insert("Gemm", gemm::gemm);
}

fn isinf(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let detect_positive = node.get_attr_opt("detect_positive")?.unwrap_or(1) != 0;
    let detect_negative = node.get_attr_opt("detect_negative")?.unwrap_or(1) != 0;
    Ok((tract_core::ops::math::is_inf(detect_positive, detect_negative).into_hir(), vec![]))
}

fn bitshift(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let op: Box<dyn InferenceOp> = if node.get_attr_opt("direction")?.unwrap_or("LEFT") == "RIGHT" {
        ops::math::ShiftRight.into_hir()
    } else {
        ops::math::ShiftLeft.into_hir()
    };
    Ok((op, vec![]))
}


================================================
FILE: onnx/src/ops/ml/category_mapper.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::*;
use tract_hir::internal::*;
use tract_onnx_opl::ml::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("CategoryMapper", category_mapper);
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct CategoryMapper {
    pub from: Arc<Tensor>,
    pub to: Arc<Tensor>,
    pub fallback: Arc<Tensor>,
}

impl Expansion for CategoryMapper {
    fn name(&self) -> StaticName {
        "CategoryMapper".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals(&inputs[0].datum_type, self.from.datum_type())?;
        s.equals(&outputs[0].datum_type, self.to.datum_type())?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let wire = model.wire_node(
            format!("{prefix}.reverse"),
            ReverseLookup::new(self.from.clone(), -1)?,
            inputs,
        )?;
        model.wire_node(
            format!("{prefix}.direct"),
            DirectLookup::new(self.to.clone(), self.fallback.clone())?,
            &wire,
        )
    }
}

fn category_mapper(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let ints: Vec<i64> = node.get_attr_vec("cats_int64s")?;
    let strings: Vec<String> = node.get_attr_vec("cats_strings")?;
    let default_int: Option<i64> = node.get_attr_opt("default_int64")?;
    let default_string: Option<String> = node.get_attr_opt("default_string")?;
    let op: Box<dyn InferenceOp> = match (default_int, default_string.as_ref()) {
        (None, None) | (Some(_), Some(_)) => bail!(
            "CategoryMapper requires exactly one of default_int64 and default_string (found {:?})",
            (default_int, default_string)
        ),
        (Some(def), None) => expand(CategoryMapper {
            from: rctensor1(&strings),
            to: rctensor1(&ints),
            fallback: rctensor0(def),
        }),
        (None, Some(def)) => expand(CategoryMapper {
            from: rctensor1(&ints),
            to: rctensor1(&strings),
            fallback: rctensor0(def.clone()),
        }),
    };
    Ok((op, vec![]))
}


================================================
FILE: onnx/src/ops/ml/linear_classifier.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_hir::internal::*;
use tract_hir::ops::array::TypedConcat;
use tract_hir::tract_core::ops::einsum::EinSum;
use tract_onnx_opl::ml::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("LinearClassifier", linear_classifier);
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum PostTransform {
    Softmax,
    Logistic,
}

pub fn parse_post_transform(s: &str) -> TractResult<Option<PostTransform>> {
    match s {
        "NONE" => Ok(None),
        "SOFTMAX" => Ok(Some(PostTransform::Softmax)),
        "LOGISTIC" => Ok(Some(PostTransform::Logistic)),
        "PROBIT" | "SOFTMAX_ZERO" => bail!("PROBIT and SOFTMAX_ZERO unsupported"),
        _ => bail!("Invalid post transform: {}", s),
    }
}

fn parse_class_data(node: &NodeProto) -> TractResult<Arc<Tensor>> {
    let ints = node.get_attr_opt_slice::<i64>("classlabels_ints")?;
    let strs = node.get_attr_opt_tvec::<&str>("classlabels_strings")?;
    match (ints, strs) {
        (Some(n), None) => Ok(rctensor1(n)),
        (None, Some(n)) => Ok(rctensor1(&n.iter().map(|d| d.to_string()).collect::<Vec<_>>())),
        (None, None) => bail!("cannot find neither 'classlabels_ints' not 'classlabels_strings'"),
        (Some(_), Some(_)) => {
            bail!("only one of 'classlabels_ints' and 'classlabels_strings' can be set")
        }
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct LinearClassifier {
    pub class_labels: Arc<Tensor>,
    pub coefficients: Arc<Tensor>,
    pub intercepts: Option<Arc<Tensor>>,
    pub post_transform: Option<PostTransform>,
    pub binary_result_layout: bool,
    pub num_models: usize,
}

fn linear_classifier(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let class_labels = parse_class_data(node)?;
    let n_classes = class_labels.len();
    let multi_class: i64 = node.get_attr_opt("multi_class")?.unwrap_or(0);
    let raw_coeffs: Vec<f32> = node.get_attr_vec("coefficients")?;
    node.expect(!raw_coeffs.is_empty(), "coefficients not empty")?;

    let intercepts_raw: Option<Vec<f32>> = node.get_attr_opt_vec("intercepts")?;

    let (e_prime, binary_result_layout) = match intercepts_raw.as_ref() {
        Some(v) => {
            node.expect(
                raw_coeffs.len() % v.len() == 0,
                "coefficients length must be a multiple of intercepts length",
            )?;
            let e_prime = v.len();
            let binary = n_classes == 2 && e_prime == 1 && multi_class == 0;
            (e_prime, binary)
        }
        None if n_classes == 2 && multi_class == 0 => (1, true),
        None if raw_coeffs.len() % n_classes == 0 => (n_classes, false),
        None => bail!(
            "coefficients length {} not compatible with number of classes {}",
            raw_coeffs.len(),
            n_classes
        ),
    };

    node.expect(
        raw_coeffs.len() % e_prime == 0,
        "coefficients length must be a multiple of number of models",
    )?;

    if binary_result_layout {
        node.expect(n_classes == 2, "binary result layout requires exactly 2 class labels")?;
    } else {
        node.expect(
            n_classes == e_prime,
            "class labels length must match number of models when not using binary single-model layout",
        )?;
    }

    let c = raw_coeffs.len() / e_prime;
    let coeffs_ec = tensor1(&raw_coeffs).into_shape(&[e_prime, c])?;
    let coefficients = coeffs_ec.permute_axes(&[1, 0])?.into_arc_tensor();

    let intercepts = match intercepts_raw {
        Some(v) => {
            node.expect(v.len() == e_prime, "intercepts length should match number of models")?;
            Some(rctensor1(&v))
        }
        None => None,
    };

    let post_transform =
        node.get_attr_opt("post_transform")?.map(parse_post_transform).transpose()?.unwrap_or(None);

    Ok((
        expand(LinearClassifier {
            class_labels,
            coefficients,
            intercepts,
            post_transform,
            binary_result_layout,
            num_models: e_prime,
        }),
        vec![],
    ))
}

impl Expansion for LinearClassifier {
    fn name(&self) -> StaticName {
        "LinearClassifier".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 2)?;

        s.equals(&outputs[0].datum_type, self.class_labels.datum_type())?;
        s.equals(&outputs[1].datum_type, DatumType::F32)?;

        s.equals(&outputs[0].rank, 1)?;
        s.equals(&outputs[1].rank, 2)?;
        s.equals(&outputs[0].shape[0], &inputs[0].shape[0])?;
        s.equals(&outputs[1].shape[0], &inputs[0].shape[0])?;
        // Scores second dim depends on layout
        if self.binary_result_layout {
            s.equals(&outputs[1].shape[1], 2.to_dim())?;
        } else {
            s.equals(&outputs[1].shape[1], (self.num_models as i64).to_dim())?;
        }

        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_core::ops::nn::*;

        let mut x = inputs[0];
        if model.outlet_fact(x)?.rank() == 1 {
            x = model.wire_node(format!("{prefix}.add_batch_axis"), AxisOp::Add(0), &[x])?[0];
        }

        if model.outlet_fact(x)?.datum_type != f32::datum_type() {
            x = model.wire_node(
                format!("{prefix}.to_f32"),
                tract_core::ops::cast::cast(f32::datum_type()),
                &[x],
            )?[0];
        }

        let w = model.add_const(format!("{prefix}.coefficients"), self.coefficients.clone())?;
        let axes = AxesMapping::for_numpy_matmul(2, false, false, false)?;
        let mut scores = model.wire_node(
            format!("{prefix}.matmul"),
            EinSum::new(axes, model.outlet_fact(x)?.datum_type),
            [x, w].as_ref(),
        )?;

        if let Some(intercepts) = self.intercepts.as_deref() {
            let bias = intercepts.clone().broadcast_into_rank(2)?.into_arc_tensor();
            let bias = model.add_const(format!("{prefix}.intercepts"), bias)?;
            scores = model.wire_node(
                format!("{prefix}.add_bias"),
                tract_core::ops::math::add(),
                &[scores[0], bias],
            )?;
        }

        let final_scores = if self.binary_result_layout {
            match self.post_transform {
                None => {
                    // logits [-s, s]
                    let m1 = model.add_const(format!("{prefix}.m1"), rctensor2(&[[-1f32]]))?;
                    let neg = model.wire_node(
                        format!("{prefix}.binary.neg"),
                        tract_core::ops::math::mul(),
                        &[scores[0], m1],
                    )?;
                    model.wire_node(
                        format!("{prefix}.binary.concat"),
                        TypedConcat::new(1),
                        &[neg[0], scores[0]],
                    )?
                }
                Some(PostTransform::Logistic) => {
                    // probabilities [1 - sigmoid(s), sigmoid(s)]
                    let p = model.wire_node(
                        format!("{prefix}.logistic"),
                        tract_core::ops::nn::sigmoid(),
                        &scores,
                    )?;
                    let one = model.add_const(prefix.to_string() + ".one", rctensor2(&[[1f32]]))?;
                    let complement = model.wire_node(
                        format!("{prefix}.binary.complement"),
                        tract_core::ops::math::sub(),
                        &[one, p[0]],
                    )?;
                    model.wire_node(
                        format!("{prefix}.binary.concat"),
                        TypedConcat::new(1),
                        &[complement[0], p[0]],
                    )?
                }
                Some(PostTransform::Softmax) => {
                    let m1 = model.add_const(format!("{prefix}.m1"), rctensor2(&[[-1f32]]))?;
                    let neg = model.wire_node(
                        format!("{prefix}.binary.neg"),
                        tract_core::ops::math::mul(),
                        &[scores[0], m1],
                    )?;
                    let logits2 = model.wire_node(
                        format!("{prefix}.binary.logits2"),
                        TypedConcat::new(1),
                        &[neg[0], scores[0]],
                    )?;
                    model.wire_node(
                        format!("{prefix}.softmax"),
                        tract_core::ops::nn::Softmax { axes: tvec!(1), ..Softmax::default() },
                        &logits2,
                    )?
                }
            }
        } else {
            let mut tmp = scores.clone();
            match self.post_transform {
                None => {}
                Some(PostTransform::Softmax) => {
                    tmp = model.wire_node(
                        format!("{prefix}.softmax"),
                        tract_core::ops::nn::Softmax { axes: tvec!(1), ..Softmax::default() },
                        &tmp,
                    )?;
                }
                Some(PostTransform::Logistic) => {
                    tmp = model.wire_node(
                        format!("{prefix}.logistic"),
                        tract_core::ops::nn::sigmoid(),
                        &tmp,
                    )?;
                }
            }
            tmp
        };

        let winners = model.wire_node(
            format!("{prefix}.argmax"),
            tract_core::ops::nn::Reduce::new(tvec!(1), tract_core::ops::nn::Reducer::ArgMax(false)),
            &final_scores,
        )?;
        let reduced = model.wire_node(
            format!("{prefix}.rm_axis"),
            tract_core::ops::change_axes::AxisOp::Rm(1),
            &winners,
        )?;
        let casted = model.wire_node(
            format!("{prefix}.casted"),
            tract_core::ops::cast::cast(i32::datum_type()),
            &reduced,
        )?;
        let labels = model.wire_node(
            format!("{prefix}.labels"),
            DirectLookup::new(
                self.class_labels.clone(),
                Tensor::zero_dt(self.class_labels.datum_type(), &[])?.into_arc_tensor(),
            )?,
            &casted,
        )?[0];

        Ok(tvec!(labels, final_scores[0]))
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(2)
    }
}


================================================
FILE: onnx/src/ops/ml/linear_regressor.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_hir::internal::*;
use tract_hir::tract_core::ops::einsum::EinSum;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("LinearRegressor", linear_regressor);
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum PostTransform {
    Softmax,
    Logistic,
}

pub fn parse_post_transform(s: &str) -> TractResult<Option<PostTransform>> {
    match s {
        "NONE" => Ok(None),
        "SOFTMAX" => Ok(Some(PostTransform::Softmax)),
        "LOGISTIC" => Ok(Some(PostTransform::Logistic)),
        "PROBIT" | "SOFTMAX_ZERO" => bail!("PROBIT and SOFTMAX_ZERO unsupported"),
        _ => bail!("Invalid post transform: {}", s),
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct LinearRegressor {
    pub coefficients: Arc<Tensor>,
    pub intercepts: Option<Arc<Tensor>>,
    pub post_transform: Option<PostTransform>,
    pub targets: usize,
}

fn linear_regressor(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let targets_i64: i64 = node.get_attr_opt("targets")?.unwrap_or(1);
    node.expect(targets_i64 > 0, "targets must be > 0")?;
    let targets: usize = usize::try_from(targets_i64)
        .map_err(|_| format_err!("targets out of range: {}", targets_i64))?;

    let raw_coeffs: Vec<f32> = node.get_attr_vec("coefficients")?;
    node.expect(!raw_coeffs.is_empty(), "coefficients not empty")?;

    node.expect(
        raw_coeffs.len() % targets == 0,
        "coefficients length must be a multiple of targets",
    )?;
    let c = raw_coeffs.len() / targets;

    let coeffs_tc = tensor1(&raw_coeffs).into_shape(&[targets, c])?;
    let coefficients = coeffs_tc.permute_axes(&[1, 0])?.into_arc_tensor();

    let intercepts: Option<Vec<f32>> = node.get_attr_opt_vec("intercepts")?;
    let intercepts = match intercepts {
        Some(v) => {
            node.expect(v.len() == targets, "intercepts length matches number of targets")?;
            Some(rctensor1(&v))
        }
        None => None,
    };

    let post_transform =
        node.get_attr_opt("post_transform")?.map(parse_post_transform).transpose()?.unwrap_or(None);

    Ok((expand(LinearRegressor { coefficients, intercepts, post_transform, targets }), vec![]))
}

impl Expansion for LinearRegressor {
    fn name(&self) -> StaticName {
        "LinearRegressor".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;

        s.equals(&outputs[0].datum_type, DatumType::F32)?;
        s.equals(&outputs[0].rank, 2)?;
        s.equals(&outputs[0].shape[0], &inputs[0].shape[0])?;
        s.equals(&outputs[0].shape[1], self.targets.to_dim())?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_core::ops::nn::*;

        let mut x = inputs[0];
        if model.outlet_fact(x)?.rank() == 1 {
            x = model.wire_node(format!("{prefix}.add_batch_axis"), AxisOp::Add(0), &[x])?[0];
        }
        if model.outlet_fact(x)?.datum_type != f32::datum_type() {
            x = model.wire_node(
                format!("{prefix}.to_f32"),
                tract_core::ops::cast::cast(f32::datum_type()),
                &[x],
            )?[0];
        }

        let w = model.add_const(format!("{prefix}.coefficients"), self.coefficients.clone())?;
        let axes = AxesMapping::for_numpy_matmul(2, false, false, false)?;
        let mut y = model.wire_node(
            format!("{prefix}.matmul"),
            EinSum::new(axes, model.outlet_fact(x)?.datum_type),
            [x, w].as_ref(),
        )?;

        if let Some(intercepts) = self.intercepts.as_deref() {
            let bias = intercepts.clone().broadcast_into_rank(2)?.into_arc_tensor();
            let bias = model.add_const(format!("{prefix}.intercepts"), bias)?;
            y = model.wire_node(
                format!("{prefix}.add_bias"),
                tract_core::ops::math::add(),
                &[y[0], bias],
            )?;
        }

        match self.post_transform {
            None => {}
            Some(PostTransform::Softmax) => {
                y = model.wire_node(
                    format!("{prefix}.softmax"),
                    tract_core::ops::nn::Softmax { axes: tvec!(1), ..Softmax::default() },
                    &y,
                )?;
            }
            Some(PostTransform::Logistic) => {
                y = model.wire_node(
                    format!("{prefix}.logistic"),
                    tract_core::ops::nn::sigmoid(),
                    &y,
                )?;
            }
        }

        Ok(tvec!(y[0]))
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1)
    }
}


================================================
FILE: onnx/src/ops/ml/mod.rs
================================================
mod category_mapper;
mod linear_classifier;
mod linear_regressor;
mod normalizer;
mod tree_ensemble_classifier;

use crate::model::OnnxOpRegister;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    category_mapper::register_all_ops(reg);
    linear_classifier::register_all_ops(reg);
    linear_regressor::register_all_ops(reg);
    normalizer::register_all_ops(reg);
    tree_ensemble_classifier::register_all_ops(reg);
}


================================================
FILE: onnx/src/ops/ml/normalizer.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_core::ops::math::{abs, div, max, mul, rsqrt, square};
use tract_core::ops::nn::{Reduce, Reducer};
use tract_hir::internal::*;
use tract_hir::ops::logic::wire_with_rank_broadcast;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("Normalizer", normalizer);
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum NormKind {
    Max,
    L1,
    L2,
}

fn parse_norm_kind(s: &str) -> TractResult<NormKind> {
    match s.to_ascii_uppercase().as_str() {
        "MAX" => Ok(NormKind::Max),
        "L1" => Ok(NormKind::L1),
        "L2" => Ok(NormKind::L2),
        other => bail!("Invalid norm kind: {}", other),
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct Normalizer {
    kind: NormKind,
}

fn normalizer(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let norm: String = node.get_attr_opt("norm")?.unwrap_or_else(|| "MAX".to_string());
    let kind = parse_norm_kind(&norm)?;
    Ok((expand(Normalizer { kind }), vec![]))
}

impl Expansion for Normalizer {
    fn name(&self) -> StaticName {
        "Normalizer".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, DatumType::F32)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_fact = model.outlet_fact(inputs[0])?.clone();
        let rank = input_fact.rank();
        ensure!(rank >= 1, "Normalizer expects rank 1 or 2 inputs");
        let axis = rank - 1;

        let mut x = inputs[0];
        let x_fact = model.outlet_fact(x)?.clone();

        if x_fact.datum_type != f32::datum_type() {
            x = model.wire_node(
                format!("{prefix}.to_f32"),
                tract_core::ops::cast::cast(f32::datum_type()),
                &[x],
            )?[0];
        }

        let eps = model.add_const(format!("{prefix}.eps"), rctensor0(1e-12f32))?;

        let y = match self.kind {
            NormKind::Max => {
                let ax = model.wire_node(format!("{prefix}.abs"), abs(), &[x])?;
                let d0 = model.wire_node(
                    format!("{prefix}.max"),
                    Reduce { axes: tvec![axis], reducer: Reducer::Max },
                    &ax,
                )?[0];
                let d = wire_with_rank_broadcast(
                    format!("{prefix}.clamp_max"),
                    model,
                    max(),
                    &[d0, eps],
                )?[0];
                wire_with_rank_broadcast(format!("{prefix}.div_max"), model, div(), &[x, d])?[0]
            }
            NormKind::L1 => {
                let ax = model.wire_node(format!("{prefix}.abs"), abs(), &[x])?;
                let d0 = model.wire_node(
                    format!("{prefix}.sum_abs"),
                    Reduce { axes: tvec![axis], reducer: Reducer::Sum },
                    &ax,
                )?[0];
                let d = wire_with_rank_broadcast(
                    format!("{prefix}.clamp_l1"),
                    model,
                    max(),
                    &[d0, eps],
                )?[0];
                wire_with_rank_broadcast(format!("{prefix}.div_sum"), model, div(), &[x, d])?[0]
            }
            NormKind::L2 => {
                let x2 = model.wire_node(format!("{prefix}.square"), square(), &[x])?;
                let ss0 = model.wire_node(
                    format!("{prefix}.sum_sq"),
                    Reduce { axes: tvec![axis], reducer: Reducer::Sum },
                    &x2,
                )?[0];
                let ss = wire_with_rank_broadcast(
                    format!("{prefix}.clamp_l2"),
                    model,
                    max(),
                    &[ss0, eps],
                )?[0];
                let inv = model.wire_node(format!("{prefix}.rsqrt"), rsqrt(), &[ss])?[0];
                wire_with_rank_broadcast(format!("{prefix}.mul_invnorm"), model, mul(), &[x, inv])?
                    [0]
            }
        };

        Ok(tvec!(y))
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1)
    }
}


================================================
FILE: onnx/src/ops/ml/tree_ensemble_classifier.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use crate::pb_helpers::*;
use std::iter;
use tract_hir::internal::*;
use tract_hir::ops::array::{Slice, TypedConcat};
use tract_onnx_opl::ml::tree::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("TreeEnsembleClassifier", tree_classifier);
}

fn tree_classifier(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let ensemble = parse_nodes_data(node, true)?;
    let class_labels = parse_class_data(node)?;
    let base_class_score =
        get_vec_attr_opt::<f32>(node, "base_values", ensemble.n_classes())?.map(|t| rctensor1(&t));
    let post_transform =
        node.get_attr_opt("post_transform")?.map(parse_post_transform).transpose()?.unwrap_or(None);

    // even numbers in leaves are categories id target of leaf contrib
    let binary_result_layout = class_labels.len() < 3
        && ensemble
            .data
            .leaves
            .try_as_plain()?
            .as_slice::<u32>()?
            .iter()
            .enumerate()
            .all(|(ix, v)| ix % 2 == 1 || *v == 0);

    Ok((
        expand(TreeEnsembleClassifier {
            ensemble,
            class_labels,
            base_class_score,
            post_transform,
            binary_result_layout,
        }),
        vec![],
    ))
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum PostTransform {
    Softmax,
    Logistic,
    // SoftmaxZero,
    // Probit, // probit, especially multinomial, is p.i.t.a. - so let's ignore it for now
}

pub fn parse_post_transform(s: &str) -> TractResult<Option<PostTransform>> {
    match s {
        "NONE" => Ok(None),
        "SOFTMAX" => Ok(Some(PostTransform::Softmax)),
        "LOGISTIC" => Ok(Some(PostTransform::Logistic)),
        "PROBIT" | "SOFTMAX_ZERO" => bail!("PROBIT and SOFTMAX_ZERO unsupported"),
        _ => bail!("Invalid post transform: {}", s),
    }
}

fn parse_node_mode(s: &str) -> TractResult<Option<Cmp>> {
    match s {
        "BRANCH_LEQ" => Ok(Some(Cmp::LessEqual)),
        "BRANCH_LT" => Ok(Some(Cmp::Less)),
        "BRANCH_GTE" => Ok(Some(Cmp::GreaterEqual)),
        "BRANCH_GT" => Ok(Some(Cmp::Greater)),
        "BRANCH_EQ" => Ok(Some(Cmp::Equal)),
        "BRANCH_NEQ" => Ok(Some(Cmp::NotEqual)),
        "LEAF" => Ok(None),
        _ => bail!("Unsupported mode node: {}", s),
    }
}

fn get_vec_attr<'a, T>(node: &'a NodeProto, attr: &str, n: usize) -> TractResult<Vec<T>>
where
    T: AttrTVecType<'a>,
{
    let vec = node.get_attr_vec(attr)?;
    node.expect_attr(attr, vec.len() == n, || format!("length {}, got {}", vec.len(), n))?;
    Ok(vec)
}

fn get_vec_attr_opt<'a, T>(node: &'a NodeProto, attr: &str, n: usize) -> TractResult<Option<Vec<T>>>
where
    T: AttrTVecType<'a>,
{
    match node.get_attr_opt_vec(attr)? {
        Some(vec) => {
            node.expect_attr(attr, vec.len() == n, || {
                format!("length {} (or undefined), got {}", vec.len(), n)
            })?;
            Ok(Some(vec))
        }
        None => Ok(None),
    }
}

fn parse_class_data(node: &NodeProto) -> TractResult<Arc<Tensor>> {
    // parse n_classes from protobuf
    let ints = node.get_attr_opt_slice::<i64>("classlabels_int64s")?;
    let strs = node.get_attr_opt_tvec::<&str>("classlabels_strings")?;
    match (ints, strs) {
        (Some(n), None) => Ok(rctensor1(n)),
        (None, Some(n)) => Ok(rctensor1(&n.iter().map(|d| d.to_string()).collect::<Vec<_>>())),
        (None, None) => {
            bail!("cannot find neither 'classlabels_int64s' not 'classlabels_strings'")
        }
        (Some(_), Some(_)) => {
            bail!("only one of 'classlabels_int64s' and 'classlabels_strings' can be set")
        }
    }
}

fn parse_nodes_data(node: &NodeProto, is_classifier: bool) -> TractResult<TreeEnsemble> {
    // parse n_classes from protobuf
    let n_classes = if is_classifier {
        let ints = node.get_attr_opt_slice::<i64>("classlabels_int64s")?;
        let strs = node.get_attr_opt_tvec::<&str>("classlabels_strings")?;
        match (ints, strs) {
            (Some(n), None) => n.len(),
            (None, Some(n)) => n.len(),
            (None, None) => {
                bail!("cannot find neither 'classlabels_int64s' not 'classlabels_strings'")
            }
            (Some(_), Some(_)) => {
                bail!("only one of 'classlabels_int64s' and 'classlabels_strings' can be set")
            }
        }
    } else {
        node.get_attr("n_targets")?
    };
    let n_nodes = node.get_attr_slice::<i64>("nodes_featureids")?.len();
    node.expect_attr("nodes_featureids", n_nodes != 0, "at least one node")?;

    // parse base_values from protobuf
    let node_ids = get_vec_attr::<usize>(node, "nodes_nodeids", n_nodes)?;
    let tree_ids = get_vec_attr::<usize>(node, "nodes_treeids", n_nodes)?;
    let feature_ids = get_vec_attr::<usize>(node, "nodes_featureids", n_nodes)?;
    let true_ids = get_vec_attr::<usize>(node, "nodes_truenodeids", n_nodes)?;
    let false_ids = get_vec_attr::<usize>(node, "nodes_falsenodeids", n_nodes)?;
    let node_values = get_vec_attr::<f32>(node, "nodes_values", n_nodes)?;
    let nan_is_true = get_vec_attr_opt::<bool>(node, "nodes_missing_value_tracks_true", n_nodes)?
        .unwrap_or_else(|| iter::repeat_n(false, n_nodes).collect());
    let node_modes: Vec<Option<Cmp>> = get_vec_attr::<&str>(node, "nodes_modes", n_nodes)?
        .iter()
        .map(|s| parse_node_mode(s))
        .collect::<TractResult<_>>()?;

    let max_used_features = feature_ids.iter().max().copied().unwrap_or(0);

    use tract_onnx_opl::ml::tree_ensemble_classifier::parse_aggregate;
    // parse post_transform from protobuf
    // parse aggregate_fn from protobuf (for regressors)
    let aggregate_fn = parse_aggregate(if is_classifier {
        "SUM"
    } else {
        node.get_attr_opt("aggregate")?.unwrap_or("SUM")
    })?;

    // parse leaf data from protobuf
    let leaf_prefix = if is_classifier { "class" } else { "target" };
    let cls = |name| format!("{leaf_prefix}_{name}");

    let n_leaves = node.get_attr_slice::<i64>(&cls("ids"))?.len();
    node.expect_attr(&cls("ids"), n_leaves != 0, "at least one leaf")?;

    let leaf_node_ids = get_vec_attr::<usize>(node, &cls("nodeids"), n_leaves)?;
    let leaf_tree_ids = get_vec_attr::<usize>(node, &cls("treeids"), n_leaves)?;
    let leaf_class_ids = get_vec_attr::<usize>(node, &cls("ids"), n_leaves)?;
    let leaf_weights = get_vec_attr::<f32>(node, &cls("weights"), n_leaves)?;

    let inc_by_1 = |x: &[_]| x.iter().zip(x.iter().skip(1)).all(|(&x, &y)| y == x || y == x + 1);
    node.expect_attr("nodes_treeids", inc_by_1(&tree_ids), "tree ids to increase by 1")?;
    node.expect_attr(&cls("treeids"), inc_by_1(&leaf_tree_ids), "leaf tree ids to increase by 1")?;
    node.expect_attr("nodes_treeids", tree_ids[0] == 0, "tree ids to start from 0")?;
    node.expect_attr(&cls("treeids"), leaf_tree_ids[0] == 0, "leaf tree ids to start from 0")?;
    let n_trees = *tree_ids.last().unwrap() + 1;
    node.expect(
        leaf_tree_ids.last() == Some(&(n_trees - 1)),
        "mismatching # of trees (nodes/leaves)",
    )?;

    let mut node_order: Vec<usize> = (0usize..node_ids.len()).collect();
    node_order.sort_by_key(|&ix| (tree_ids[ix], node_ids[ix]));

    let mut leaf_order: Vec<usize> = (0usize..leaf_node_ids.len()).collect();
    leaf_order.sort_by_key(|&ix| (leaf_tree_ids[ix], leaf_node_ids[ix]));

    let mut trees = vec![];
    let mut nodes: Vec<u32> = vec![];
    let mut leaves: Vec<u32> = vec![];
    let mut current_tree_id = None;
    let mut in_leaf_ix = 0;
    for n in node_order.into_iter() {
        let node_id = node_ids[n];
        let tree_id = tree_ids[n];
        if Some(tree_id) != current_tree_id {
            current_tree_id = Some(tree_id);
            trees.push(nodes.len() as u32 / 5);
        }
        if let Some(mode) = node_modes[n] {
            let mut row = [0u32; 5];
            row[0] = feature_ids[n] as u32;
            row[1] = true_ids[n] as u32 + trees.last().unwrap();
            row[2] = false_ids[n] as u32 + trees.last().unwrap();
            row[3] = node_values[n].to_bits();
            row[4] = (0x0100u32 * nan_is_true[n] as u32) | mode as u32;
            nodes.extend(row.iter());
        } else {
            let mut row = [0u32; 5];
            row[0] = leaves.len() as u32 / 2;
            loop {
                if in_leaf_ix >= leaf_order.len()
                    || leaf_tree_ids[leaf_order[in_leaf_ix]] != tree_id
                    || leaf_node_ids[leaf_order[in_leaf_ix]] != node_id
                {
                    break;
                }
                let leaf_ix = leaf_order[in_leaf_ix];
                leaves.push(leaf_class_ids[leaf_ix] as u32);
                leaves.push(leaf_weights[leaf_ix].to_bits());
                in_leaf_ix += 1;
            }
            row[1] = leaves.len() as u32 / 2;
            nodes.extend(row.iter());
        };
    }
    let trees = rctensor1(&trees);
    let nodes = tensor1(&nodes).into_shape(&[nodes.len() / 5, 5])?.into_arc_tensor();
    let leaves = tensor1(&leaves).into_shape(&[leaves.len() / 2, 2])?.into_arc_tensor();
    let data = TreeEnsembleData { trees, nodes, leaves };
    TreeEnsemble::build(data, max_used_features, n_classes, aggregate_fn)
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct TreeEnsembleClassifier {
    pub ensemble: TreeEnsemble,
    pub class_labels: Arc<Tensor>,
    pub base_class_score: Option<Arc<Tensor>>,
    pub post_transform: Option<PostTransform>,
    pub binary_result_layout: bool,
}

impl Expansion for TreeEnsembleClassifier {
    fn name(&self) -> StaticName {
        "TreeEnsembleClassifier".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("binary result layout kludge: {:?}", self.binary_result_layout)])
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 2)?;

        s.equals(&outputs[0].datum_type, self.class_labels.datum_type())?;
        s.equals(&outputs[1].datum_type, DatumType::F32)?;

        s.equals(&outputs[0].rank, 1)?;
        s.equals(&outputs[1].rank, 2)?;
        s.equals(&outputs[0].shape[0], &inputs[0].shape[0])?;
        s.equals(&outputs[1].shape[0], &inputs[0].shape[0])?;
        if self.binary_result_layout {
            s.equals(&outputs[1].shape[1], 2.to_dim())?;
        } else {
            s.equals(&outputs[1].shape[1], self.class_labels.len().to_dim())?;
        }

        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_core::ops::nn::*;

        let mut scores = model.wire_node(
            format!("{prefix}.classifier"),
            tract_onnx_opl::ml::tree_ensemble_classifier::TreeEnsembleClassifier {
                ensemble: self.ensemble.clone(),
            },
            inputs,
        )?;
        if let Some(base_class_score) = self.base_class_score.as_deref() {
            let base = base_class_score.clone().broadcast_into_rank(2)?.into_arc_tensor();
            let base = model.add_const(prefix.to_string() + ".base", base)?;
            scores = model.wire_node(
                format!("{prefix}.base_class_score"),
                tract_core::ops::math::add(),
                &[scores[0], base],
            )?;
        }
        match self.post_transform {
            None => (),
            Some(PostTransform::Softmax) => {
                scores = tract_hir::ops::nn::LayerSoftmax::new(1, false).wire(
                    &format!("{prefix}.softmax"),
                    model,
                    &scores,
                )?;
            }
            Some(PostTransform::Logistic) => {
                scores = model.wire_node(
                    format!("{prefix}.logistic"),
                    tract_core::ops::nn::sigmoid(),
                    &scores,
                )?;
            }
        }
        let processed_scores = scores.clone();
        if self.binary_result_layout {
            scores = model.wire_node(
                format!("{prefix}.binary_result_slice"),
                Slice::new(1, 0, 1),
                &scores,
            )?;
            let one = model.add_const(prefix.to_string() + ".one", rctensor2(&[[1f32]]))?;
            let complement = model.wire_node(
                format!("{prefix}.binary_result_complement"),
                tract_core::ops::math::sub(),
                &[one, scores[0]],
            )?;
            scores = model.wire_node(
                format!("{prefix}.binary_result"),
                TypedConcat::new(1),
                &[complement[0], scores[0]],
            )?;
        }
        let winners = model.wire_node(
            format!("{prefix}.argmax"),
            Reduce::new(tvec!(1), Reducer::ArgMax(false)),
            &processed_scores,
        )?;
        let reduced = model.wire_node(
            format!("{prefix}.rm_axis"),
            tract_core::ops::change_axes::AxisOp::Rm(1),
            &winners,
        )?;
        let casted = model.wire_node(
            format!("{prefix}.casted"),
            tract_core::ops::cast::cast(i32::datum_type()),
            &reduced,
        )?;
        let labels = model.wire_node(
            format!("{prefix}.labels"),
            tract_onnx_opl::ml::DirectLookup::new(
                self.class_labels.clone(),
                Tensor::zero_dt(self.class_labels.datum_type(), &[])?.into_arc_tensor(),
            )?,
            &casted,
        )?[0];
        Ok(tvec!(labels, scores[0]))
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(2)
    }
}


================================================
FILE: onnx/src/ops/mod.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::*;
use tract_hir::internal::*;

mod array;
mod cast;
pub mod cumsum;
mod d2s;
mod einsum;
mod fft;
mod grid_sample;
pub mod logic;
mod math;
mod ml;
pub mod multinomial;
mod nn;
mod non_max_suppression;
mod quant;
mod random;
pub mod rec;
mod resize;
mod s2d;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("Constant", konst);
    reg.insert("Einsum", einsum::einsum);
    reg.insert("Identity", |_, _| {
        Ok((Box::<tract_hir::ops::identity::Identity>::default(), vec![]))
    });
    reg.insert("GridSample", grid_sample::grid_sample);
    reg.insert("Resize", resize::resize);
    reg.insert("NonMaxSuppression", non_max_suppression::non_max_suppression);
    reg.insert("Multinomial", multinomial::multinomial);
    array::register_all_ops(reg);
    cast::register_all_ops(reg);
    cumsum::register_all_ops(reg);
    d2s::register_all_ops(reg);
    fft::register_all_ops(reg);
    logic::register_all_ops(reg);
    math::register_all_ops(reg);
    ml::register_all_ops(reg);
    nn::register_all_ops(reg);
    quant::register_all_ops(reg);
    random::register_all_ops(reg);
    rec::register_all_ops(reg);
    s2d::register_all_ops(reg);
}

fn konst(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let value = if let Some(v) = node.get_attr_opt("value")? {
        ctx.load_tensor(v)?
    } else if let Some(i) = node.get_attr_opt::<i64>("value_int")? {
        tensor0(i)
    } else if let Some(v) = node.get_attr_opt::<f32>("value_float")? {
        tensor0(v)
    } else if let Some(ints) = node.get_attr_opt_slice::<i64>("value_ints")? {
        tensor1(ints)
    } else if let Some(floats) = node.get_attr_opt_slice::<f32>("value_floats")? {
        tensor1(floats)
    } else if let Some(s) = node.get_attr_opt::<String>("value_string")? {
        tensor0(s)
    } else if let Some(strings) = node.get_attr_opt_slice::<Vec<u8>>("value_strings")? {
        let strings: Vec<String> =
            strings.iter().map(|b| String::from_utf8_lossy(b).into_owned()).collect();
        tensor1(&strings)
    } else {
        bail!("Could not extract value out of Constant node")
    };
    Ok((Box::new(tract_hir::ops::konst::Const::new(value.into_arc_tensor())?), vec![]))
}


================================================
FILE: onnx/src/ops/multinomial.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn multinomial(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let dtype = match node.get_attr_opt("dtype")?.unwrap_or(6) {
        6 => DatumType::I32,
        7 => DatumType::I64,
        i => bail!("Unsupported datum type {} for ONNX Multinomial", i),
    };
    let sample_size = node.get_attr_opt("sample_size")?.unwrap_or(1);
    let seed = node.get_attr::<f32>("seed").ok();

    Ok((expand(Multinomial { dtype, sample_size, seed }), vec![]))
}

#[derive(Clone, Debug)]
pub struct Multinomial {
    dtype: DatumType,
    sample_size: i32,
    pub seed: Option<f32>,
}
impl PartialEq for Multinomial {
    fn eq(&self, _: &Self) -> bool {
        false
    }
}
impl Eq for Multinomial {}

impl Expansion for Multinomial {
    fn name(&self) -> StaticName {
        "Multinomial".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        check_input_arity(inputs, 1)?;

        // inputs[0]: tensor(float16), tensor(float), tensor(double) ; [batch_size, class_size]
        // outputs[0]: tensor(int32), tensor(int64) {depending on self.datum_type} ; [batch_size, sample_size]

        s.equals(&inputs[0].rank, 2)?;
        s.equals(&outputs[0].rank, 2)?;
        s.equals(&outputs[0].datum_type, self.dtype)?;
        s.equals(&inputs[0].shape[0], &outputs[0].shape[0])?; // batch_size
        s.equals(&outputs[0].shape[1], self.sample_size.to_dim())?; // sample_size

        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        model.wire_node(
            name,
            tract_onnx_opl::multinomial::Multinomial {
                dtype: self.dtype,
                sample_size: self.sample_size,
                seed: self.seed,
            },
            &[inputs[0]],
        )
    }
}


================================================
FILE: onnx/src/ops/nn/batch_norm.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::nn::DataFormat;
use tract_num_traits::AsPrimitive;

#[derive(Debug, Clone, new, Default)]
pub struct BatchNorm {
    data_format: DataFormat,
    epsilon: f32,
    #[allow(dead_code)]
    spatial: bool,
}

impl BatchNorm {
    fn to_slope_and_inter<T>(
        &self,
        c_dim: usize,
        scale: &Tensor,
        beta: &Tensor,
        mean: &Tensor,
        var: &Tensor,
    ) -> TractResult<(Tensor, Tensor)>
    where
        T: Datum + tract_num_traits::Float,
        f32: AsPrimitive<T>,
    {
        let scale = scale.to_plain_array_view::<T>()?.into_shape_with_order((c_dim,))?;
        let beta = beta.to_plain_array_view::<T>()?.into_shape_with_order((c_dim,))?;
        let mean = mean.to_plain_array_view::<T>()?.into_shape_with_order((c_dim,))?;
        let var = var.to_plain_array_view::<T>()?.into_shape_with_order((c_dim,))?;

        let denominator = var.mapv(|x| (x + self.epsilon.as_()).sqrt());

        let slope = &scale / &denominator;
        let intercept = beta.to_owned() - (&mean * &scale) / denominator;
        Ok((slope.into_tensor(), intercept.into_tensor()))
    }
}

impl Expansion for BatchNorm {
    fn name(&self) -> StaticName {
        "BatchNorm".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 5)?;
        check_output_arity(outputs, 1)?;
        s.equals_all(wrap!(
            &outputs[0].datum_type,
            &inputs[0].datum_type,
            &inputs[1].datum_type,
            &inputs[2].datum_type,
            &inputs[3].datum_type,
            &inputs[4].datum_type
        ))?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals_all(wrap!(
            &inputs[1].shape,
            &inputs[2].shape,
            &inputs[3].shape,
            &inputs[4].shape
        ))?;
        s.given(&inputs[0].shape, move |s, shape| {
            let shape = self.data_format.shape(shape)?;
            s.equals(&inputs[1].shape[0], shape.c_dim())
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let x = target.outlet_fact(inputs[0])?;
        let params = (1..5)
            .map(|i| Ok(target.outlet_fact(inputs[i])?.konst.clone()))
            .collect::<TractResult<TVec<Option<Arc<Tensor>>>>>()?;

        if let (Some(scale), Some(beta), Some(mean), Some(var)) =
            (params[0].as_ref(), params[1].as_ref(), params[2].as_ref(), params[3].as_ref())
        {
            let x_shape = x.shape.to_tvec();
            let c_axis = self.data_format.shape(&x_shape)?.c_axis();
            let c_dim = self.data_format.shape(&x_shape)?.c_dim().to_usize()?;

            let (mut slope, mut inter) =
                dispatch_floatlike!(Self::to_slope_and_inter(x.datum_type)(
                    self, c_dim, scale, beta, mean, var
                ))?;

            let mut const_shape = tvec!(1; x_shape.len());
            const_shape[c_axis] = c_dim;

            slope.set_shape(&const_shape)?;
            inter.set_shape(&const_shape)?;

            let slope = target.add_const(prefix.to_string() + ".slope", slope)?;
            let inter = target.add_const(prefix.to_string() + ".inter", inter)?;
            let wire = target.wire_node(
                format!("{prefix}.mul"),
                tract_hir::ops::math::mul(),
                &[slope, inputs[0]],
            )?;
            return target.wire_node(prefix, tract_hir::ops::math::add(), &[inter, wire[0]]);
        }
        bail!("Params are not const")
    }
}


================================================
FILE: onnx/src/ops/nn/conv_transpose.rs
================================================
use std::str;

use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_core::ops::cnn::KernelFormat;
use tract_core::ops::cnn::deconv::adjustments;
use tract_hir::ops::cnn::PaddingSpec;
use tract_hir::ops::nn::DataFormat;
use tract_hir::{internal::*, ops::cnn::PoolSpec};

pub fn conv_transpose(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let padding_spec = super::pad(node, false)?;
    let strides = super::strides(node)?;
    let dilations = super::dilations(node)?;
    let adjustments = node.get_attr_opt_tvec::<usize>("output_padding")?;
    let output_shape = node.get_attr_opt_tvec::<usize>("output_shape")?;
    let group = node.get_attr_opt::<usize>("group")?.unwrap_or(1);
    Ok((
        expand(ConvTranspose::new(
            padding_spec,
            strides,
            dilations,
            adjustments,
            output_shape,
            group,
            node.input.len() == 3,
        )),
        vec![],
    ))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct ConvTranspose {
    padding_spec: PaddingSpec,
    strides: Option<TVec<usize>>,
    dilations: Option<TVec<usize>>,
    adjustments: Option<TVec<usize>>,
    output_shape: Option<TVec<usize>>,
    group: usize,
    have_bias: bool,
}

impl Expansion for ConvTranspose {
    fn name(&self) -> StaticName {
        "ConvTranspose".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2 + self.have_bias as usize)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&inputs[0].rank, &inputs[1].rank)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        s.equals(&outputs[0].shape[0], &inputs[0].shape[0])?; // N
        s.equals(&inputs[0].shape[1], &inputs[1].shape[0])?; // O
        s.equals(&outputs[0].shape[1], (self.group as i64) * inputs[1].shape[1].bex())?; // I

        s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, x_shape, w_shape| {
            if let (Ok(x_shape), Ok(w_shape)) = (
                x_shape.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<usize>>>(),
                w_shape.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<usize>>>(),
            ) {
                let y_shape = if let Some(output_shape) = &self.output_shape {
                    let mut y_shape = x_shape;
                    y_shape[1] = w_shape[1] * self.group;
                    for (ix, d) in output_shape.iter().enumerate() {
                        y_shape[ix + 2] = *d;
                    }
                    y_shape
                } else {
                    // ONNX deconv kernels are stored as gi_o_h_w (convolution are go_i_hw)
                    // so tract KernelFormat (in|out)put_channel functions do not work.
                    let ci = w_shape[0];
                    let co = w_shape[1] * self.group;
                    let pool_spec = PoolSpec::new(
                        DataFormat::NCHW,
                        w_shape[2..].into(),
                        self.padding_spec.clone(),
                        self.dilations.clone(),
                        self.strides.clone(),
                        ci,
                        co,
                    );
                    tract_core::ops::cnn::deconv::output_shape(
                        &pool_spec,
                        &x_shape,
                        &self.adjustments.clone().unwrap_or_else(|| tvec!(0; x_shape.len() - 2 )),
                    )?
                };
                let y_shape = y_shape.iter().map(|x| x.to_dim()).collect::<TVec<TDim>>();
                s.equals(&outputs[0].shape, y_shape)?;
            }
            Ok(())
        })?;
        if self.have_bias {
            s.equals(&inputs[2].datum_type, &inputs[0].datum_type)?;
            s.equals(&inputs[2].rank, 1)?;
            s.equals(&inputs[2].shape[0], &outputs[0].shape[1])?;
        }
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        // ONNX deconv kernels are stored as gi_o_h_w (convolution are go_i_hw)
        /*
        let kernel =
            k.into_tensor().split_axis(0, self.group)?.move_axis(1, 2)?.collapse_axis_with_next(0);
        */
        let mut kernel = AxisOp::wire_split_axis(
            target,
            format!("{prefix}.kernel_split_group"),
            inputs[1],
            0,
            self.group,
        )?;
        kernel =
            target.wire_node(format!("{prefix}.kernel_reorder"), AxisOp::Move(1, 2), &kernel)?;
        kernel = AxisOp::wire_collapse_axis(
            target,
            format!("{prefix}.kernel_merge_group"),
            kernel[0],
            0,
        )?;

        let bias = if self.have_bias {
            inputs[2]
        } else {
            target.add_const(
                format!("{prefix}.bias"),
                Tensor::zero_scalar_dt(target.outlet_fact(inputs[0])?.datum_type)?,
            )?
        };

        let kernel_shape = target
            .outlet_fact(kernel[0])?
            .shape
            .as_concrete()
            .context("Expects concrete kernel shape")?;
        let ci = KernelFormat::OIHW.input_channels(kernel_shape, self.group).into_owned();
        let co = KernelFormat::OIHW.output_channels(kernel_shape, self.group).into_owned();
        let pool_spec = PoolSpec::new(
            DataFormat::NCHW,
            kernel_shape[2..].into(),
            self.padding_spec.clone(),
            self.dilations.clone(),
            self.strides.clone(),
            ci,
            co,
        );
        let op = if let Some(output_shape) = &self.output_shape {
            let x_shape = &target.outlet_fact(inputs[0])?.shape;
            let adjustments = adjustments(
                &pool_spec,
                &x_shape.as_concrete().context("expects concrete dim for deconv")?[2..],
                output_shape,
            )?;
            tract_core::ops::cnn::Deconv::new(
                pool_spec,
                KernelFormat::OIHW,
                adjustments,
                self.group,
            )
        } else {
            tract_core::ops::cnn::Deconv::new(
                pool_spec,
                KernelFormat::OIHW,
                self.adjustments.clone().unwrap_or_else(|| tvec!(0; kernel_shape.len() - 2)),
                self.group,
            )
        };
        target.wire_node(prefix, op, &[inputs[0], kernel[0], bias])
    }
}


================================================
FILE: onnx/src/ops/nn/dropout.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;

pub fn dropout(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    Ok((Box::new(Dropout::new(node.output.len() == 2)), vec![]))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct Dropout {
    output_mask: bool,
}

impl Op for Dropout {
    fn name(&self) -> StaticName {
        "Dropout".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Dropout {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        if self.output_mask {
            let input = args_1!(inputs);
            let mask = tract_ndarray::ArrayD::from_elem(input.shape(), true);
            Ok(tvec!(input, mask.into_tvalue()))
        } else {
            Ok(inputs)
        }
    }
}

impl InferenceRulesOp for Dropout {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1 + self.output_mask as usize)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        if outputs.len() == 2 {
            s.equals(&outputs[1].datum_type, bool::datum_type())?;
            s.equals(&inputs[0].shape, &outputs[1].shape)?;
        }
        Ok(())
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1 + self.output_mask as usize)
    }

    as_op!();
    to_typed!();
}

impl TypedOp for Dropout {
    as_op!();
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if node.outputs.len() == 1 || node.outputs[1].successors.len() == 0 {
            Ok(Some(TypedModelPatch::single_unary_op(
                model,
                node,
                tract_hir::ops::identity::Identity,
            )?))
        } else {
            Ok(None)
        }
    }
}


================================================
FILE: onnx/src/ops/nn/instance_norm.rs
================================================
use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub fn instance_normalization(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let epsilon = node.get_attr_opt("epsilon")?.unwrap_or(1e-5);
    Ok((expand(InstanceNorm::new(epsilon)), vec![]))
}

#[derive(Debug, Clone, new, Default)]
pub struct InstanceNorm {
    epsilon: f32,
}

impl Expansion for InstanceNorm {
    fn name(&self) -> StaticName {
        "InstanceNorm".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&inputs[0].datum_type, &inputs[2].datum_type)?;
        s.equals(&inputs[1].shape, &inputs[2].shape)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals(&inputs[1].shape[0], &inputs[0].shape[1])?;
        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input_fact = model.outlet_fact(inputs[0])?.clone();
        let rank = input_fact.rank();
        let axes: Vec<_> = (0..rank as i64).filter(|&axis| axis != 1).collect();
        let mean = tract_hir::ops::nn::Reduce::new(
            Some(axes.clone()),
            true,
            tract_hir::ops::nn::Reducer::Mean,
        )
        .wire(&format!("{name}.mean"), model, &inputs[0..1])?[0];
        let diff = model.wire_node(
            format!("{name}.diff"),
            tract_hir::ops::math::sub(),
            &[inputs[0], mean],
        )?;
        let sqr_diff =
            model.wire_node(format!("{name}.sqr"), tract_hir::ops::math::square(), &diff)?;
        let vari =
            tract_hir::ops::nn::Reduce::new(Some(axes), true, tract_hir::ops::nn::Reducer::Mean)
                .wire(&format!("{name}.variance"), model, &sqr_diff)?[0];
        let epsilon = model.add_const(
            format!("{name}.epsilon.cst"),
            tensor0(self.epsilon)
                .cast_to_dt(input_fact.datum_type)?
                .into_owned()
                .broadcast_into_rank(rank)?
                .into_arc_tensor(),
        )?;
        let vari_sane = model.wire_node(
            format!("{name}.epsilon"),
            tract_hir::ops::math::add(),
            &[vari, epsilon],
        )?;
        let div =
            model.wire_node(format!("{name}.rsqrt"), tract_hir::ops::math::rsqrt(), &vari_sane)?;
        let divised = model.wire_node(
            format!("{name}.div"),
            tract_hir::ops::math::mul(),
            &[diff[0], div[0]],
        )?;
        let mut scale =
            model.wire_node(format!("{name}.add-scale-axis-n"), AxisOp::Add(0), &inputs[1..2])?;
        for i in 2..rank {
            scale =
                model.wire_node(format!("{name}.add-scale-axis-{i}"), AxisOp::Add(2), &scale)?;
        }
        let scaled = model.wire_node(
            format!("{name}.scaled"),
            tract_hir::ops::math::mul(),
            &[divised[0], scale[0]],
        )?;
        let mut bias =
            model.wire_node(format!("{name}.add-bias-axis-n"), AxisOp::Add(0), &inputs[2..3])?;
        for i in 2..rank {
            bias = model.wire_node(format!("{name}.add-bias-axis-{i}"), AxisOp::Add(2), &bias)?;
        }
        model.wire_node(name, tract_hir::ops::math::add(), &[scaled[0], bias[0]])
    }
}


================================================
FILE: onnx/src/ops/nn/layer_norm.rs
================================================
use crate::model::{ParsingContext, optional_outputs};
use crate::pb::NodeProto;
use tract_core::ops::cast::cast;
use tract_core::ops::math::{add, div, mul, rsqrt, square, sub};
use tract_core::ops::nn::{Reduce, Reducer};
use tract_hir::internal::*;
use tract_hir::ops::logic::wire_with_rank_broadcast;

pub fn layer_norm(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(-1);
    let epsilon = node.get_attr_opt("epsilon")?.unwrap_or(1e-5);
    let datum_type = node.get_attr_opt("stash_type")?.unwrap_or(DatumType::F32);
    let have_bias = node.input.len() == 3;
    let mut optional_outputs = optional_outputs(node).skip(1);
    let mean_output = optional_outputs.next().unwrap();
    let invstddev_output = optional_outputs.next().unwrap();
    Ok((
        expand(LayerNorm { axis, epsilon, datum_type, have_bias, mean_output, invstddev_output }),
        vec![],
    ))
}

#[derive(Debug, Clone)]
pub struct LayerNorm {
    axis: isize,
    epsilon: f32,
    datum_type: DatumType,
    have_bias: bool,
    mean_output: Option<usize>,
    invstddev_output: Option<usize>,
}

impl Expansion for LayerNorm {
    fn name(&self) -> StaticName {
        "LayerNorm".into()
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(1 + self.mean_output.is_some() as usize + self.invstddev_output.is_some() as usize)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2 + self.have_bias as usize)?;
        check_output_arity(outputs, self.nboutputs()?)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        if self.have_bias {
            s.equals(&inputs[0].datum_type, &inputs[2].datum_type)?;
        }
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;

        if let Some(mean) = self.mean_output {
            s.equals(&outputs[mean].datum_type, self.datum_type)?;
            s.equals(&inputs[0].rank, &outputs[mean].rank)?;
        }
        if let Some(invstddev) = self.invstddev_output {
            s.equals(&outputs[invstddev].datum_type, self.datum_type)?;
            s.equals(&inputs[0].rank, &outputs[invstddev].rank)?;
        }
        s.given(&inputs[0].rank, move |s, rank| {
            let axis = if self.axis < 0 {
                (self.axis + rank as isize) as usize
            } else {
                self.axis as usize
            };
            for ax in 0..axis {
                if let Some(mean) = self.mean_output {
                    s.equals(&inputs[0].shape[ax], &outputs[mean].shape[ax])?;
                }
                if let Some(invstddev) = self.invstddev_output {
                    s.equals(&inputs[0].shape[ax], &outputs[invstddev].shape[ax])?;
                }
            }
            for ax in axis..rank as usize {
                if let Some(mean) = self.mean_output {
                    s.equals(&outputs[mean].shape[ax], 1.to_dim())?;
                }
                if let Some(invstddev) = self.invstddev_output {
                    s.equals(&outputs[invstddev].shape[ax], 1.to_dim())?;
                }
            }
            Ok(())
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        // Mean = ReduceMean<axes=normalized_axes>(X) D = Sub(X, Mean) DD = Mul(D, D) Var = ReduceMean<axes=normalized_axes>(DD) VarEps = Add(Var, epsilon) StdDev = Sqrt(VarEps) InvStdDev = Reciprocal(StdDev) Normalized = Mul(D, InvStdDev) }
        let fact = model.outlet_fact(inputs[0])?.clone();
        let axis = if self.axis < 0 {
            (self.axis + fact.rank() as isize) as usize
        } else {
            self.axis as usize
        };
        let cast_x =
            model.wire_node(format!("{prefix}.cast_x"), cast(self.datum_type), &[inputs[0]])?;
        let cast_scale =
            model.wire_node(format!("{prefix}.cast_scale"), cast(self.datum_type), &[inputs[1]])?;
        let cast_bias = if self.have_bias {
            Some(model.wire_node(
                format!("{prefix}.cast_bias"),
                cast(self.datum_type),
                &[inputs[2]],
            )?)
        } else {
            None
        };
        let axes: TVec<usize> = (axis..fact.rank()).collect();
        let reduced_sum_x = model.wire_node(
            format!("{prefix}.reduced_sum"),
            Reduce { axes: axes.clone(), reducer: Reducer::Sum },
            &cast_x,
        )?;
        let len = axes.iter().map(|ax| fact.shape[*ax].clone()).product::<TDim>();
        let len = model.add_const(format!("{prefix}.len"), tensor0(len))?;
        let cast_len =
            model.wire_node(format!("{prefix}.cast_len"), cast(self.datum_type), &[len])?;
        let reduced_mean_x = wire_with_rank_broadcast(
            format!("{prefix}.reduced_mean_x"),
            model,
            div(),
            &[reduced_sum_x[0], cast_len[0]],
        )?;
        let d = model.wire_node(format!("{prefix}.d"), sub(), &[cast_x[0], reduced_mean_x[0]])?;
        let dd = model.wire_node(format!("{prefix}.dd"), square(), &d)?;
        let reduced_sum_dd = model.wire_node(
            format!("{prefix}.reduced_sum_dd"),
            Reduce { axes, reducer: Reducer::Sum },
            &dd,
        )?;
        let var = wire_with_rank_broadcast(
            format!("{prefix}.var"),
            model,
            div(),
            &[reduced_sum_dd[0], cast_len[0]],
        )?;
        let epsilon = model.add_const(
            format!("{prefix}.epsilon"),
            tensor0(self.epsilon).cast_to_dt(self.datum_type)?.into_owned(),
        )?;
        let var_eps = wire_with_rank_broadcast(
            format!("{prefix}.var_eps"),
            model,
            add(),
            &[var[0], epsilon],
        )?;
        let inv_std_dev = model.wire_node(format!("{prefix}.inv_std_dev"), rsqrt(), &var_eps)?;
        let normalized =
            model.wire_node(format!("{prefix}.normalized"), mul(), &[d[0], inv_std_dev[0]])?;
        // NormalizedScaled = Mul(Normalized, Scale) Y = Add(NormalizedScaled, B)
        let cast_normalized = model.wire_node(
            format!("{prefix}.cast_normalized"),
            cast(fact.datum_type),
            &normalized,
        )?;
        let normalized_scaled = wire_with_rank_broadcast(
            format!("{prefix}.normalized_scaled"),
            model,
            mul(),
            &[cast_normalized[0], cast_scale[0]],
        )?;
        let y = if let Some(bias) = cast_bias {
            wire_with_rank_broadcast(
                format!("{prefix}.y"),
                model,
                add(),
                &[normalized_scaled[0], bias[0]],
            )?
        } else {
            normalized_scaled
        };
        let mut outputs = tvec!(y[0]);
        if self.mean_output.is_some() {
            outputs.push(reduced_mean_x[0]);
        }
        if self.invstddev_output.is_some() {
            outputs.push(inv_std_dev[0]);
        }
        Ok(outputs)
    }
}


================================================
FILE: onnx/src/ops/nn/lrn.rs
================================================
use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_hir::internal::*;

use tract_onnx_opl::lrn::Lrn;

pub fn lrn(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(0.0001);
    let beta = node.get_attr_opt("beta")?.unwrap_or(0.75);
    let bias = node.get_attr_opt("bias")?.unwrap_or(1.);
    let size = node.get_attr("size")?;
    Ok((inference_wrap(Lrn { alpha, beta, bias, size }, 1, lrn_rules), vec![]))
}

fn lrn_rules<'p>(
    _op: &dyn Op,
    s: &mut Solver,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    check_input_arity(inputs, 1)?;
    check_output_arity(outputs, 1)?;
    s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
    s.equals(&inputs[0].shape, &outputs[0].shape)?;
    Ok(())
}


================================================
FILE: onnx/src/ops/nn/mod.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops;

use tract_hir::ops::{cnn, nn};

use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use crate::pb_helpers::OptionExt;

mod batch_norm;
mod conv_transpose;
mod dropout;
mod instance_norm;
mod layer_norm;
mod lrn;
mod reduce;

pub fn arg_max_min(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?.unwrap_or(0);
    let keepdims = node.get_attr_opt("keepdims")?.unwrap_or(true);
    let take_last = node.get_attr_opt("select_last_index")?.unwrap_or(false);
    let red = if node.op_type == "ArgMax" {
        nn::Reducer::ArgMax(take_last)
    } else {
        nn::Reducer::ArgMin(take_last)
    };
    Ok((expand(nn::Reduce::new(Some(vec![axis]), keepdims, red)), vec![]))
}

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("ArgMax", arg_max_min);
    reg.insert("ArgMin", arg_max_min);
    reg.insert("AveragePool", average_pool);
    reg.insert("BatchNormalization", batch_normalization);
    reg.insert("Celu", celu);
    reg.insert("Conv", conv);
    reg.insert("ConvInteger", conv_integer);
    reg.insert("ConvTranspose", conv_transpose::conv_transpose);
    reg.insert("Dropout", dropout::dropout);
    reg.insert("Elu", elu);
    reg.insert("GlobalAveragePool", |_, _| Ok((expand(ops::nn::GlobalAvgPool), vec![])));
    reg.insert("GlobalLpPool", global_lp_pool);
    reg.insert("GlobalMaxPool", |_, _| Ok((expand(ops::nn::GlobalMaxPool), vec![])));
    reg.insert("Hardmax", layer_hard_max);
    reg.insert("HardSigmoid", hard_sigmoid);
    reg.insert("InstanceNormalization", instance_norm::instance_normalization);
    reg.insert("LayerNormalization", layer_norm::layer_norm);
    reg.insert("LeakyRelu", leaky_relu);
    reg.insert("LogSoftmax", layer_log_soft_max);
    reg.insert("LRN", lrn::lrn);
    reg.insert("MaxPool", max_pool);
    reg.insert("ParametricSoftplus", parametric_softplus);
    reg.insert("QLinearConv", conv_qlinear);
    reg.insert("PRelu", |_, _| Ok((expand(Prelu), vec![])));
    reg.insert("ReduceL1", |c, node| reduce::reduce(c, node, nn::Reducer::L1));
    reg.insert("ReduceL2", |c, node| reduce::reduce(c, node, nn::Reducer::L2));
    reg.insert("ReduceLogSum", |c, node| reduce::reduce(c, node, nn::Reducer::LogSum));
    reg.insert("ReduceLogSumExp", |c, node| reduce::reduce(c, node, nn::Reducer::LogSumExp));
    reg.insert("ReduceMax", |c, node| reduce::reduce(c, node, nn::Reducer::Max));
    reg.insert("ReduceMean", |c, node| reduce::reduce(c, node, nn::Reducer::Mean));
    reg.insert("ReduceMin", |c, node| reduce::reduce(c, node, nn::Reducer::Min));
    reg.insert("ReduceProd", |c, node| reduce::reduce(c, node, nn::Reducer::Prod));
    reg.insert("ReduceSum", |c, node| reduce::reduce(c, node, nn::Reducer::Sum));
    reg.insert("ReduceSumSquare", |c, node| reduce::reduce(c, node, nn::Reducer::SumSquare));
    reg.insert("Relu", |_, _| Ok((expand(ops::activations::Clip::new(Some(0.0), None)), vec![])));
    reg.insert("ScaledTanh", scaled_tanh);
    reg.insert("Shrink", shrink);
    reg.insert("ThresholdedRelu", thresholded_relu);
    reg.insert("Selu", selu);
    reg.insert("Sigmoid", |_, _| Ok((ops::nn::sigmoid().into_hir(), vec![])));
    reg.insert("HardSwish", |_, _| Ok((ops::nn::hard_swish().into_hir(), vec![])));
    reg.insert("Softmax", layer_soft_max);
    reg.insert("Softplus", |_, _| Ok((expand(ops::activations::Softplus), vec![])));
    reg.insert("Softsign", |_, _| Ok((expand(ops::activations::Softsign), vec![])));
}

fn pad(node: &NodeProto, pool_rules: bool) -> TractResult<cnn::PaddingSpec> {
    let ceil_mode = node.get_attr_opt::<isize>("ceil_mode")?.unwrap_or(0) == 1;
    let default = match node.get_attr_opt_vec::<isize>("kernel_shape")? {
        Some(shape) => {
            if pool_rules {
                cnn::PaddingSpec::ExplicitOnnxPool(
                    tvec!(0; shape.len()),
                    tvec!(0; shape.len()),
                    ceil_mode,
                )
            } else {
                cnn::PaddingSpec::Explicit(tvec!(0; shape.len()), tvec!(0; shape.len()))
            }
        }
        None => cnn::PaddingSpec::Valid,
    };
    if let Some(pads) = node.get_attr_opt_tvec("pads")? {
        let len = pads.len();
        let left = pads.iter().cloned().take(len / 2).collect();
        let right = pads.iter().cloned().skip(len / 2).collect();
        if pool_rules {
            return Ok(cnn::PaddingSpec::ExplicitOnnxPool(left, right, ceil_mode));
        } else {
            return Ok(cnn::PaddingSpec::Explicit(left, right));
        }
    }
    Ok(node
        .get_attr_opt("auto_pad")?
        .and_try(|s| {
            node.check_value(
                "auto_pad",
                match s {
                    "NOTSET" => Ok(default.clone()),
                    "VALID" => Ok(cnn::PaddingSpec::Valid),
                    "SAME_UPPER" => Ok(cnn::PaddingSpec::SameUpper),
                    "SAME_LOWER" => Ok(cnn::PaddingSpec::SameLower),
                    _ => Err(s),
                },
            )
        })?
        .unwrap_or(default))
}

fn dilations(node: &NodeProto) -> TractResult<Option<TVec<usize>>> {
    node.get_attr_opt_tvec("dilations")
}

fn strides(node: &NodeProto) -> TractResult<Option<TVec<usize>>> {
    node.get_attr_opt_tvec("strides")
}

pub fn batch_normalization(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let epsilon = node.get_attr_opt("epsilon")?.unwrap_or(1e-5);
    let spatial = node.get_attr_opt("spatial")?.unwrap_or(1);
    if spatial != 1 {
        bail!(
            "BatchNormalization: attribute 'spatial' is not supported (deprecated by ONNX operator set 9)"
        )
    }
    Ok((expand(batch_norm::BatchNorm::new(nn::DataFormat::NCHW, epsilon, spatial != 0)), vec![]))
}

fn common_conv(node: &NodeProto) -> TractResult<cnn::Conv> {
    let mut op = ops::cnn::Conv::default().padding(pad(node, false)?);
    if let Some(kernel_shape) = node.get_attr_opt_tvec("kernel_shape")? {
        op = op.kernel_shape(kernel_shape);
    }
    if let Some(group) = node.get_attr_opt("group")? {
        op = op.group(group);
    }
    if let Some(v) = dilations(node)? {
        op = op.dilations(v);
    }
    if let Some(v) = strides(node)? {
        op = op.strides(v);
    }
    Ok(op)
}

pub fn conv(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut op = common_conv(node)?;
    if node.input.len() == 3 {
        op = op.bias_input(2);
    }
    Ok((expand(op), vec![]))
}

pub fn conv_integer(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut op = common_conv(node)?;
    let mut options = crate::model::optional_inputs(node).skip(2);
    if let Some(i) = options.next().unwrap() {
        op = op.x_zero_point_input(i);
    }
    if let Some(i) = options.next().unwrap() {
        op = op.k_zero_point_input(i);
    }
    op.override_output_datum_type = Some(i32::datum_type());
    Ok((expand(op), vec![]))
}

pub fn conv_qlinear(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let mut op = common_conv(node)?;
    op.x_scale_input = Some(1);
    op.x_zero_point_input = Some(2);
    op.k_input = Some(3);
    op.k_scale_input = Some(4);
    op.k_zero_point_input = Some(5);
    op.y_scale_input = Some(6);
    op.y_zero_point_input = Some(7);
    if node.input.len() == 9 {
        op.bias_input = Some(8);
    }
    Ok((expand(op), vec![]))
}

pub fn average_pool(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let kernel_shape = node.get_attr_tvec("kernel_shape")?;
    let pad = pad(node, true)?;
    let strides = strides(node)?;
    let count_include_pad = node.get_attr_opt("count_include_pad")?.unwrap_or(false);
    Ok((
        expand(cnn::HirSumPool::new(
            cnn::PoolSpec::new(nn::DataFormat::NCHW, kernel_shape, pad, None, strides, 0, 0),
            count_include_pad,
            true,
        )),
        vec![],
    ))
}

pub fn celu(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(1.);
    Ok((expand(ops::activations::Celu(alpha)), vec![]))
}

pub fn elu(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(1.);
    Ok((expand(ops::activations::Elu(alpha)), vec![]))
}

pub fn global_lp_pool(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let p: usize = node.get_attr_opt("p")?.unwrap_or(2);
    Ok((expand(ops::nn::GlobalLpPool::new(p)), vec![]))
}

pub fn hard_sigmoid(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(0.2);
    let beta = node.get_attr_opt("beta")?.unwrap_or(0.5);
    Ok((expand(ops::activations::HardSigmoid(alpha, beta)), vec![]))
}

pub fn layer_hard_max(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?;
    if ctx.onnx_operator_set_version < 13 {
        Ok((expand(ops::nn::LayerHardmax::new(axis.unwrap_or(1), true)), vec![]))
    } else {
        Ok((expand(ops::nn::LayerHardmax::new(axis.unwrap_or(-1), false)), vec![]))
    }
}

pub fn layer_log_soft_max(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?;
    if ctx.onnx_operator_set_version < 13 {
        Ok((expand(ops::nn::LayerLogSoftmax::new(axis.unwrap_or(1), true)), vec![]))
    } else {
        Ok((expand(ops::nn::LayerLogSoftmax::new(axis.unwrap_or(-1), false)), vec![]))
    }
}

pub fn layer_soft_max(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let axis = node.get_attr_opt("axis")?;
    if ctx.onnx_operator_set_version < 13 {
        Ok((expand(ops::nn::LayerSoftmax::new(axis.unwrap_or(1), true)), vec![]))
    } else {
        Ok((expand(ops::nn::Softmax::new(axis.unwrap_or(-1))), vec![]))
    }
}

pub fn leaky_relu(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(0.01);
    Ok((expand(ops::activations::LeakyRelu(alpha)), vec![]))
}

pub fn max_pool(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let kernel_shape = node.get_attr_tvec("kernel_shape")?;
    let pad = pad(node, true)?;
    let strides = strides(node)?;
    Ok((
        expand(cnn::HirMaxPool::new(
            cnn::PoolSpec::new(nn::DataFormat::NCHW, kernel_shape, pad, None, strides, 0, 0),
            if node.output.len() == 2 { Some(DatumType::I64) } else { None },
        )),
        vec![],
    ))
}

pub fn parametric_softplus(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr("alpha")?;
    let beta = node.get_attr("beta")?;
    Ok((expand(ops::activations::ParametricSoftplus(alpha, beta)), vec![]))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct Prelu;

impl Expansion for Prelu {
    fn name(&self) -> StaticName {
        "Prelu".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let a = inputs[0];
        let mut b = inputs[1];
        let rank = model.outlet_fact(a)?.rank();
        while model.outlet_fact(b)?.rank() < rank {
            b = model.wire_node(
                format!("{}.add-axis-{}", name, model.outlet_fact(b)?.rank()),
                AxisOp::Add(0),
                &[b],
            )?[0];
        }
        let zero = tensor0(0.0)
            .cast_to_dt(model.outlet_fact(a)?.datum_type)?
            .into_owned()
            .broadcast_into_rank(rank)?;
        let ab = model.wire_node(format!("{name}.mul"), tract_hir::ops::math::mul(), &[a, b])?[0];
        let zero = model.add_const(name.to_string() + ".zero", zero)?;
        let test = model.wire_node(
            name.to_string() + ".test",
            tract_core::ops::binary::TypedBinOp(tract_core::ops::logic::comp_gt(), None),
            &[zero, a],
        )?;
        model.wire_node(name.to_string() + ".iff", tract_core::ops::logic::Iff, &[test[0], ab, a])
    }
}

pub fn scaled_tanh(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr("alpha")?;
    let beta = node.get_attr("beta")?;
    Ok((expand(ops::activations::ScaledTanh(alpha, beta)), vec![]))
}

pub fn shrink(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let bias = node.get_attr_opt("bias")?.unwrap_or(0.0);
    let lambd = node.get_attr_opt("lambd")?.unwrap_or(0.5);
    Ok((expand(ops::activations::Shrink(bias, lambd)), vec![]))
}

pub fn selu(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(1.67326);
    let gamma = node.get_attr_opt("gamma")?.unwrap_or(1.0507);
    Ok((expand(ops::activations::Selu(alpha, gamma)), vec![]))
}

pub fn thresholded_relu(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let alpha = node.get_attr_opt("alpha")?.unwrap_or(1.);
    Ok((expand(ops::activations::ThresholdRelu(alpha)), vec![]))
}


================================================
FILE: onnx/src/ops/nn/reduce.rs
================================================
use crate::model::ParsingContext;
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub(crate) fn reduce(
    ctx: &ParsingContext,
    node: &NodeProto,
    reducer: tract_hir::ops::nn::Reducer,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    // this is crazy. sum changed semantics at opset 13, other reducers switched at 18!
    if (ctx.onnx_operator_set_version >= 13 && "ReduceSum" == node.op_type)
        || ctx.onnx_operator_set_version >= 18
    {
        let have_axis_input = node.input.len() == 2;
        let keep_dims = node.get_attr_opt("keepdims")?.unwrap_or(1i64) == 1;
        let noop_with_empty_axes = node.get_attr_opt("noop_with_empty_axes")?.unwrap_or(0i64) == 1;
        Ok((
            expand(ReduceSum13 { have_axis_input, keep_dims, noop_with_empty_axes, reducer }),
            vec![],
        ))
    } else {
        let axes = node.get_attr_opt_vec("axes")?;
        let keep_dims = node.get_attr_opt("keepdims")?.unwrap_or(1i64) == 1;
        Ok((expand(tract_hir::ops::nn::Reduce::new(axes, keep_dims, reducer)), vec![]))
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct ReduceSum13 {
    pub have_axis_input: bool,
    pub keep_dims: bool,
    pub noop_with_empty_axes: bool,
    pub reducer: tract_hir::ops::nn::Reducer,
}

impl Expansion for ReduceSum13 {
    fn name(&self) -> StaticName {
        "Reduce13".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1 + self.have_axis_input as usize)?;
        check_output_arity(outputs, 1)?;
        if let tract_hir::ops::nn::Reducer::ArgMax(_) | tract_hir::ops::nn::Reducer::ArgMin(_) =
            self.reducer
        {
            s.equals(&outputs[0].datum_type, DatumType::I64)?;
        } else {
            s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        }
        if self.have_axis_input {
            s.given_2(&inputs[0].rank, &inputs[1].value, move |s, rank, axes| {
                let mut axes = axes.cast_to::<i64>()?.try_as_plain()?.as_slice::<i64>()?.to_vec();
                if axes.len() == 0 && !self.noop_with_empty_axes {
                    axes = (0..rank).collect()
                };
                let op = tract_hir::ops::nn::Reduce::new(
                    Some(axes.clone()),
                    self.keep_dims,
                    self.reducer,
                );
                if self.keep_dims {
                    s.equals(&inputs[0].rank, &outputs[0].rank)?;
                } else {
                    s.equals(inputs[0].rank.bex() - axes.len() as i64, &outputs[0].rank)?;
                }
                s.given(&inputs[0].shape, move |s, shape| {
                    let out_shape = op.output_shape(&shape);
                    s.equals(&outputs[0].shape, out_shape)
                })
            })
        } else {
            s.given(&inputs[0].rank, move |s, rank| {
                let axes = if self.noop_with_empty_axes { vec![] } else { (0..rank).collect() };
                let op = tract_hir::ops::nn::Reduce::new(
                    Some(axes.clone()),
                    self.keep_dims,
                    self.reducer,
                );
                if self.keep_dims {
                    s.equals(&inputs[0].rank, &outputs[0].rank)?;
                } else {
                    s.equals(inputs[0].rank.bex() - axes.len() as i64, &outputs[0].rank)?;
                }
                s.given(&inputs[0].shape, move |s, shape| {
                    let out_shape = op.output_shape(&shape);
                    s.equals(&outputs[0].shape, out_shape)
                })
            })
        }
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let axes: Vec<i64> = if self.have_axis_input {
            model
                .outlet_fact(inputs[1])?
                .konst
                .as_ref()
                .context("expected axes as a constant")?
                .try_as_plain()?
                .as_slice::<i64>()?
                .to_vec()
        } else {
            vec![]
        };
        let axes: Vec<i64> = if axes.len() == 0 {
            if self.noop_with_empty_axes {
                vec![]
            } else {
                (0..model.outlet_fact(inputs[0])?.rank()).map(|ax| ax as i64).collect()
            }
        } else {
            axes
        };
        let op = tract_hir::ops::nn::Reduce::new(Some(axes), self.keep_dims, self.reducer);
        op.wire(prefix, model, &inputs[0..1])
    }
}


================================================
FILE: onnx/src/ops/non_max_suppression.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_onnx_opl::non_max_suppression::BoxRepr;

pub fn non_max_suppression(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let center_point_box =
        BoxRepr::from_i64(node.get_attr_opt("center_point_box")?.unwrap_or(0i64))?;

    let mut options = crate::model::optional_inputs(node).skip(2);
    Ok((
        expand(NonMaxSuppression {
            optional_max_output_boxes_per_class_input: options.next().unwrap(),
            optional_iou_threshold_input: options.next().unwrap(),
            optional_score_threshold_input: options.next().unwrap(),
            center_point_box,
            num_selected_indices_symbol: ctx.template.symbols.new_with_prefix("x"),
        }),
        vec![],
    ))
}

#[derive(Clone, new, Debug, Hash, PartialEq, Eq)]
struct NonMaxSuppression {
    optional_max_output_boxes_per_class_input: Option<usize>,
    optional_iou_threshold_input: Option<usize>,
    optional_score_threshold_input: Option<usize>,
    center_point_box: BoxRepr,
    num_selected_indices_symbol: Symbol,
}

impl Expansion for NonMaxSuppression {
    fn name(&self) -> StaticName {
        "NonMaxSuppression".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        let input_count = 2
            + self.optional_max_output_boxes_per_class_input.is_some() as usize
            + self.optional_iou_threshold_input.is_some() as usize
            + self.optional_score_threshold_input.is_some() as usize;
        check_input_arity(inputs, input_count)?;
        check_output_arity(outputs, 1)?;

        // [out] selected_indices: shape=[num_selected_indices, 3], type=int64
        s.equals(&outputs[0].rank, 2)?;
        s.equals(&outputs[0].shape[0], self.num_selected_indices_symbol.to_dim())?;
        s.equals(&outputs[0].shape[1], 3usize.to_dim())?;
        s.equals(&outputs[0].datum_type, i64::datum_type())?;

        // [in] boxes: shape=[num_batches, spatial_dimension, 4], type=float
        s.equals(&inputs[0].rank, 3)?;
        s.equals(&inputs[0].shape[2], 4usize.to_dim())?;
        s.equals(&inputs[0].datum_type, f32::datum_type())?;

        // [in] scores: shape=[num_batches, num_classes, spatial_dimension], type=float
        s.equals(&inputs[1].rank, 3)?;
        s.equals(&inputs[1].datum_type, f32::datum_type())?;

        // num_batches in boxes, scores
        s.equals(&inputs[0].shape[0], &inputs[1].shape[0])?;
        // spatial_dimension in boxes, scores
        s.equals(&inputs[0].shape[1], &inputs[1].shape[2])?;

        // [in, optional] max_output_boxes_per_class: scalar, type=int64
        if let Some(index) = self.optional_max_output_boxes_per_class_input {
            s.equals(&inputs[index].rank, 1)?;
            s.equals(&inputs[index].shape[0], 1usize.to_dim())?;
            s.equals(&inputs[index].datum_type, i64::datum_type())?;
        }

        // [in, optional] iou_threshold: scalar, type=float
        if let Some(index) = self.optional_iou_threshold_input {
            s.equals(&inputs[index].rank, 1)?;
            s.equals(&inputs[index].shape[0], 1usize.to_dim())?;
            s.equals(&inputs[index].datum_type, f32::datum_type())?;
        }

        // [in, optional] score_threshold: scalar, type=float
        if let Some(index) = self.optional_score_threshold_input {
            s.equals(&inputs[index].rank, 1)?;
            s.equals(&inputs[index].shape[0], 1usize.to_dim())?;
            s.equals(&inputs[index].datum_type, f32::datum_type())?;
        }

        Ok(())
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let max_output_boxes_per_class = self
            .optional_max_output_boxes_per_class_input
            .map(|index| Ok(inputs[index]))
            .unwrap_or_else(|| {
                model.add_const(format!("{name}.max_output_boxes_per_class"), tensor0(0i64))
            })?;
        let iou_threshold = self
            .optional_iou_threshold_input
            .map(|index| Ok(inputs[index]))
            .unwrap_or_else(|| model.add_const(format!("{name}.iou_threshold"), tensor0(0.0f32)))?;
        // score_threshold is an optional input, but we cannot assing it a meaningful default value
        let score_threshold = self.optional_score_threshold_input.map(|index| inputs[index]);

        let op = tract_onnx_opl::non_max_suppression::NonMaxSuppression {
            center_point_box: self.center_point_box,
            num_selected_indices_symbol: self.num_selected_indices_symbol.clone(),
            has_score_threshold: score_threshold.is_some(),
        };

        if let Some(score_threshold) = score_threshold {
            model.wire_node(
                name,
                op,
                &[
                    inputs[0], // boxes
                    inputs[1], // scores
                    max_output_boxes_per_class,
                    iou_threshold,
                    score_threshold,
                ],
            )
        } else {
            model.wire_node(
                name,
                op,
                &[
                    inputs[0], // boxes
                    inputs[1], // scores
                    max_output_boxes_per_class,
                    iou_threshold,
                ],
            )
        }
    }
}


================================================
FILE: onnx/src/ops/quant.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_hir::internal::*;
use tract_hir::ops::quant::*;
use tract_ndarray::ArrayViewD;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("QuantizeLinear", quantize_linear);
    reg.insert("DequantizeLinear", dequantize_linear);
    reg.insert("DynamicQuantizeLinear", dynamic_quantize_linear);
}

fn quantize_linear(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let op = QuantizeLinear::new(Some(2).filter(|_| node.input.len() == 3));
    Ok((expand(op), vec![]))
}

fn dequantize_linear(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let op = DequantizeLinear::new(Some(2).filter(|_| node.input.len() == 3));
    Ok((expand(op), vec![]))
}

fn dynamic_quantize_linear(
    _ctx: &ParsingContext,
    _node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let op = DynamicQuantizeLinear::new();
    Ok((expand(op), vec![]))
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct QuantizeLinear {
    optional_zero_point_input: Option<usize>,
}

impl Expansion for QuantizeLinear {
    fn name(&self) -> StaticName {
        "QuantizeLinear".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 2 + self.optional_zero_point_input.is_some() as usize)?;
        check_output_arity(outputs, 1)?;
        //         s.equals(&inputs[1].rank, 0)?; broken in Onnx test suite
        s.equals(&inputs[1].datum_type, f32::datum_type())?;
        if self.optional_zero_point_input.is_some() {
            s.equals(&outputs[0].datum_type, &inputs[2].datum_type)?;
        //            s.equals(&inputs[2].rank, 0)?; // broken in Onnx test suite
        } else {
            s.equals(&outputs[0].datum_type, u8::datum_type())?;
        }
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_hir::ops::quant::*;
        let scale = target
            .outlet_fact(inputs[1])?
            .konst
            .as_ref()
            .context("y_scale must be a const")?
            .try_as_plain()?
            .as_slice::<f32>()?[0]
            .recip();
        let zero_point = if self.optional_zero_point_input.is_some() {
            target
                .outlet_fact(inputs[2])?
                .konst
                .as_ref()
                .context("y_zero_point must be a const")?
                .clone()
        } else {
            rctensor0(0u8)
        };
        let op: Box<dyn TypedOp> = if zero_point.datum_type() == u8::datum_type() {
            Box::new(quantize_linear_u8(scale, zero_point.try_as_plain()?.as_slice::<u8>()?[0]))
        } else {
            Box::new(quantize_linear_i8(scale, zero_point.try_as_plain()?.as_slice::<i8>()?[0]))
        };
        target.wire_node(prefix, op, &[inputs[0]])
    }
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct DequantizeLinear {
    optional_zero_point_input: Option<usize>,
}

impl Expansion for DequantizeLinear {
    fn name(&self) -> StaticName {
        "DequantizeLinear".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 2 + self.optional_zero_point_input.is_some() as usize)?;
        check_output_arity(outputs, 1)?;
        //         s.equals(&inputs[1].rank, 0)?; broken in Onnx test suite
        s.equals(&inputs[1].datum_type, f32::datum_type())?;
        s.equals(&outputs[0].datum_type, f32::datum_type())?;
        if self.optional_zero_point_input.is_some() {
            s.equals(&inputs[0].datum_type, &inputs[2].datum_type)?;
            //            s.equals(&inputs[2].rank, 0)?; // broken in Onnx test suite
        }
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let scale = target
            .outlet_fact(inputs[1])?
            .konst
            .as_ref()
            .context("y_scale must be a const")?
            .try_as_plain()?
            .as_slice::<f32>()?[0];
        let zero_point = if self.optional_zero_point_input.is_some() {
            target
                .outlet_fact(inputs[2])?
                .konst
                .as_ref()
                .context("y_zero_point must be a const")?
                .clone()
        } else {
            rctensor0(0u8)
        };
        let op: Box<dyn TypedOp> = if zero_point.datum_type() == u8::datum_type() {
            Box::new(DequantizeLinearF32::new(
                scale,
                zero_point.try_as_plain()?.as_slice::<u8>()?[0] as i32,
            ))
        } else if zero_point.datum_type() == i8::datum_type() {
            Box::new(DequantizeLinearF32::new(
                scale,
                zero_point.try_as_plain()?.as_slice::<i8>()?[0] as i32,
            ))
        } else {
            Box::new(DequantizeLinearF32::new(
                scale,
                zero_point.try_as_plain()?.as_slice::<i32>()?[0],
            ))
        };
        target.wire_node(prefix, op, &[inputs[0]])
    }
}

#[derive(Debug, Clone, new, Default, Hash, PartialEq, Eq)]
pub struct DynamicQuantizeLinear {}

impl Expansion for DynamicQuantizeLinear {
    fn name(&self) -> StaticName {
        "DynamicQuantizeLinear".into()
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(3)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 3)?;
        s.equals(&inputs[0].datum_type, f32::datum_type())?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals(&outputs[0].datum_type, u8::datum_type())?;
        s.equals(&outputs[1].datum_type, f32::datum_type())?;
        s.equals(&outputs[1].rank, 0)?;
        s.equals(&outputs[2].datum_type, u8::datum_type())?;
        s.equals(&outputs[2].rank, 0)?;

        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let op: Box<dyn TypedOp> = Box::new(DynamicQuantizeLinearU8::new());
        target.wire_node(format!("{prefix}.dynamic_quantize"), op, &[inputs[0]])
    }
}

fn dynamic_quantize_linear_f32_u8(x: f32, scale: f32, zero_point: u8) -> u8 {
    (((x / scale).round() as i32) + zero_point as i32).clamp(u8::MIN as i32, u8::MAX as i32) as u8
}

fn dynamic_quantize_linear_u8(scale: f32, zero_point: u8, xs: &[f32], ys: &mut [u8]) {
    xs.iter()
        .zip(ys.iter_mut())
        .for_each(|(x, y)| *y = dynamic_quantize_linear_f32_u8(*x, scale, zero_point));
}

fn scale_and_zero_point(v: ArrayViewD<f32>) -> (f32, u8) {
    // get the min and max of v and extend it to have zero included
    // in the interval [min, max]
    let (min, max) = v.fold((0., 0.), |(a_min, a_max), &v| {
        if v < a_min {
            (v, a_max)
        } else if v > a_max {
            (a_min, v)
        } else {
            (a_min, a_max)
        }
    });

    // quantize range
    let min_t = u8::MIN as f32;
    let max_t = u8::MAX as f32;

    let scale = (max - min) / max_t;

    let zero_point = -min / scale;
    let zero_point = zero_point.round();
    // clipping to [0, 255]
    let zero_point = zero_point.max(min_t);
    let zero_point = zero_point.min(max_t);

    let zero_point: u8 = zero_point as u8;

    (scale, zero_point)
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct DynamicQuantizeLinearU8;

impl Op for DynamicQuantizeLinearU8 {
    fn name(&self) -> StaticName {
        "DynamicQuantizeLinearU8".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![])
    }

    fn validation(&self) -> Validation {
        Validation::Accurate
    }

    op_as_typed_op!();
}

impl EvalOp for DynamicQuantizeLinearU8 {
    fn is_stateless(&self) -> bool {
        true
    }
    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = &inputs[0];
        let input = input.cast_to::<f32>()?;
        let a_input = input.to_plain_array_view::<f32>()?;
        let (scale, zero_point) = scale_and_zero_point(a_input);

        let mut dst = unsafe { Tensor::uninitialized_dt(u8::datum_type(), input.shape())? };
        // We cannot use quantize_linear_u8 here because it does `x * scale.recip()`
        // instead of `x / scale`. This change some number enough to be rounded to another integer.
        dynamic_quantize_linear_u8(
            scale,
            zero_point,
            input.try_as_plain()?.as_slice::<f32>()?,
            dst.try_as_plain_mut()?.as_slice_mut::<u8>()?,
        );

        let quantized_tensor = dst.into_tvalue();
        let scale_tensor = tensor0(scale).into();
        let zero_point_tensor = tensor0(zero_point).into();

        Ok(tvec!(quantized_tensor, scale_tensor, zero_point_tensor))
    }
}

impl TypedOp for DynamicQuantizeLinearU8 {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut quantized_fact = inputs[0].clone();
        quantized_fact.datum_type = u8::datum_type();
        let scale_fact = f32::fact([0; 0]);
        let zero_fact = u8::fact([0; 0]);
        Ok(tvec!(quantized_fact, scale_fact, zero_fact))
    }

    as_op!();
}

#[cfg(test)]
mod tests {
    use super::*;
    use tract_ndarray::arr1;

    // Data for tests is from:
    // https://github.com/onnx/onnx/blob/master/docs/Operators.md#DynamicQuantizeLinear
    #[test]
    fn test_scale_and_zero_point() {
        let data: [(&[f32], f32, u8); 3] = [
            (&[0., 2., -3., -2.5, 1.34, 0.5], 0.019_607_844, 153),
            (&[-1., -2.1, -1.3, -2.5, -3.34, -4.], 0.015_686_275, 255),
            (&[1., 2.1, 1.3, 2.5, 3.34, 4., 1.5, 2.6, 3.9, 4., 3., 2.345], 0.015_686_275, 0),
        ];

        let epsilon = 0.00000001;
        for (v, scale_ok, zero_point_ok) in &data {
            let v = arr1(v).into_dyn();
            let v = v.view();
            let (scale, zero_point) = scale_and_zero_point(v);
            assert!((scale - scale_ok).abs() < epsilon);
            assert_eq!(zero_point, *zero_point_ok);
        }
    }

    #[test]
    fn test_dynamic_quantize_linear_u8() {
        let data: [(&[f32], &[u8]); 3] = [
            (&[0., 2., -3., -2.5, 1.34, 0.5], &[153, 255, 0, 26, 221, 179]),
            (&[-1., -2.1, -1.3, -2.5, -3.34, -4.], &[191, 121, 172, 96, 42, 0]),
            (
                &[1., 2.1, 1.3, 2.5, 3.34, 4., 1.5, 2.6, 3.9, 4., 3., 2.345],
                &[64, 134, 83, 159, 213, 255, 96, 166, 249, 255, 191, 149],
            ),
        ];

        for (v, quantized_ok) in &data {
            let v = arr1(v).into_dyn();
            let (scale, zero_point) = scale_and_zero_point(v.view());

            // same shape of v but with u8 type, values will be overwritten
            let mut quantized = v.mapv(|_| 0_u8);
            dynamic_quantize_linear_u8(
                scale,
                zero_point,
                v.as_slice().unwrap(),
                quantized.as_slice_mut().unwrap(),
            );
            assert_eq!(quantized.as_slice().unwrap(), *quantized_ok);
        }
    }
}


================================================
FILE: onnx/src/ops/random.rs
================================================
use crate::model::ParsingContext;
use crate::ops::OnnxOpRegister;
use crate::pb::*;
use tract_hir::internal::*;
use tract_onnx_opl::random::Dist;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("RandomUniform", random);
    reg.insert("RandomUniformLike", random);
    reg.insert("RandomNormal", random);
    reg.insert("RandomNormalLike", random);
}

pub fn random(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let dt: Option<DatumType> = node.get_attr_opt("dtype")?;
    let seed = node.get_attr_opt::<f32>("seed")?;

    let dist = if node.op_type.starts_with("RandomNormal") {
        Dist::Normal {
            mean: rctensor0(node.get_attr::<f32>("mean").unwrap_or(0.0)),
            dev: rctensor0(node.get_attr::<f32>("scale").unwrap_or(1.0)),
        }
    } else {
        Dist::Uniform {
            low: rctensor0(node.get_attr::<f32>("low").unwrap_or(0.0)),
            high: rctensor0(node.get_attr::<f32>("high").unwrap_or(1.0)),
        }
    };

    if node.name.ends_with("Like") {
        Ok((expand(RandomLike { dt, dist, seed }), vec![]))
    } else {
        let shape = node.get_attr_slice::<i64>("shape")?.iter().map(|i| i.to_dim()).collect();
        Ok((expand(Random { dt: dt.unwrap_or(DatumType::F32), dist, shape, seed }), vec![]))
    }
}

#[derive(Debug, Clone)]
struct Random {
    dt: DatumType,
    dist: Dist,
    shape: TVec<TDim>,
    seed: Option<f32>,
}

impl Expansion for Random {
    fn name(&self) -> StaticName {
        "Random".into()
    }

    fn validation(&self) -> Validation {
        Validation::Random
    }

    fn is_stateless(&self) -> bool {
        false
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 0)?;
        check_output_arity(outputs, 1)?;

        s.equals(&outputs[0].shape, self.shape.clone())?;
        s.equals(&outputs[0].datum_type, self.dt)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        _inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        model.wire_node(
            prefix,
            tract_onnx_opl::random::Random {
                dist: self.dist.clone(),
                fact: self.dt.fact(&self.shape),
                seed: self.seed.map(|f| f.to_bits() as u64),
            },
            &[],
        )
    }
}

#[derive(Debug, Clone)]
struct RandomLike {
    dt: Option<DatumType>,
    dist: Dist,
    seed: Option<f32>,
}

impl Expansion for RandomLike {
    fn name(&self) -> StaticName {
        "RandomLike".into()
    }

    fn validation(&self) -> Validation {
        Validation::Random
    }

    fn is_stateless(&self) -> bool {
        false
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;

        s.equals(&outputs[0].shape, &inputs[0].shape)?;
        if let Some(dt) = self.dt {
            s.equals(&outputs[0].datum_type, dt)?;
        } else {
            s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        }
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let mut fact = model.outlet_fact(inputs[0])?.without_value();
        if let Some(dt) = self.dt {
            fact.datum_type = dt;
        }
        model.wire_node(
            prefix,
            tract_onnx_opl::random::Random {
                dist: self.dist.clone(),
                fact,
                seed: self.seed.map(|f| f.to_bits() as u64),
            },
            &[],
        )
    }
}


================================================
FILE: onnx/src/ops/rec/common.rs
================================================
use std::fmt::Debug;

use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::tract_core::dyn_clone::{DynClone, clone_trait_object};
use tract_hir::tract_core::ops::scan::ScanInfo;

pub trait WireBody: Debug + DynClone + Send + Sync {
    fn name(&self) -> &'static str;
    fn wire_body(&self, prefix: &str, body: &mut TypedModel) -> TractResult<()>;
    fn w_b_multipliers(&self) -> (usize, usize);
    fn have_extra_c_state(&self) -> bool;
}

clone_trait_object!(WireBody);

#[derive(Debug, Clone)]
pub struct CommonRec {
    pub optional_bias_input: Option<usize>,
    pub optional_sequence_lens_input: Option<usize>,
    pub optional_initial_h_input: Option<usize>,
    pub optional_initial_c_input: Option<usize>,
    pub optional_p_input: Option<usize>,
    pub optional_y_output: Option<usize>,
    pub optional_y_h_output: Option<usize>,
    pub optional_y_c_output: Option<usize>,
    pub batch_first: bool,
    pub body: Box<dyn WireBody>,
}

impl CommonRec {
    pub fn from_node_and_options(
        pb: &NodeProto,
        fixed_input: usize,
        fixed_outputs: usize,
        body: Box<dyn WireBody>,
    ) -> TractResult<Self> {
        let mut inputs = crate::model::optional_inputs(pb).skip(fixed_input);
        let mut outputs = crate::model::optional_outputs(pb).skip(fixed_outputs);
        Ok(Self {
            optional_bias_input: inputs.next().unwrap(),
            optional_sequence_lens_input: inputs.next().unwrap(),
            optional_initial_h_input: inputs.next().unwrap(),
            optional_initial_c_input: inputs.next().unwrap(),
            optional_p_input: inputs.next().unwrap(),

            optional_y_output: outputs.next().unwrap(),
            optional_y_h_output: outputs.next().unwrap(),
            optional_y_c_output: outputs.next().unwrap(),

            batch_first: pb.get_attr_opt("layout")?.unwrap_or(0) == 1,
            body,
        })
    }

    #[allow(non_snake_case)]
    fn wire_one_side(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
        dir: usize,
    ) -> TractResult<TVec<OutletId>> {
        use tract_hir::ops::{array, scan};

        let x_fact = target.outlet_fact(inputs[0])?.clone();
        let r_fact = target.outlet_fact(inputs[2])?.clone();

        if let Some(seqlen) = self.optional_sequence_lens_input {
            let Some(seqlen) = &target.outlet_fact(inputs[seqlen])?.konst else {
                bail!("Non constant seq_len is not supported");
            };
            let Some(seqlen) = seqlen.as_uniform() else {
                bail!("Non uniform seq_len is not supported");
            };
            let seqlen = seqlen.cast_to::<TDim>()?;
            if seqlen.try_as_plain()?.to_scalar::<TDim>()?
                != &x_fact.shape[self.batch_first as usize]
            {
                bail!("seq_len only supported for trivial noop case");
            };
        }

        let b_size = &x_fact.shape[1 - self.batch_first as usize];
        let h_size = &r_fact.shape[2];

        let chunk = if dir == 0 { 1 } else { -1 };

        let mut body = TypedModel { symbols: target.symbols.clone(), ..TypedModel::default() };
        let mut outer_inputs = vec![];
        let mut input_mapping = vec![];

        macro_rules! target_wire {
            ($name: ident = $op: expr, $($param: expr),*) => {
                let $name = target.wire_node(
                    format!("{}.{}", prefix, stringify!($name)),
                    $op, [$($param),*].as_ref())?[0];
            }
        }

        macro_rules! wire {
            ($name: ident = $op: expr, $($param: expr),*) => {
                #[allow(unused_variables)]
                let $name = body.wire_node(
                    stringify!($name),
                    $op, [$($param),*].as_ref())?[0];
            }
        }

        // X: onnx interface: [batch_size, seq_length, input_size]
        // move batch first
        let x_batch_first = if self.batch_first {
            inputs[0]
        } else {
            target_wire!(x_batch_first = AxisOp::Move(1, 0), inputs[0]);
            x_batch_first
        };
        // scan outer interface: idem
        // scann inner interface: [chunk=1, batch_size, input_size]
        // onnx inner interface: [batch_size, input_size]
        outer_inputs.push(x_batch_first);
        input_mapping.push(scan::InputMapping::Scan(ScanInfo { axis: 1, chunk }));
        let mut x_source_fact = target.outlet_fact(x_batch_first)?.without_value();
        x_source_fact.shape.set(1, 1.to_dim());
        let x_source = body.add_source("x_source", x_source_fact)?;
        wire!(Xt = AxisOp::Rm(1), x_source);

        // W: onnx interface: [num_directions, 3*hidden_size, input_size]
        // scan interfaces: [3*hidden_size, input_size]
        target_wire!(w_dir = array::Slice::new(0, dir, dir + 1), inputs[1]);
        target_wire!(w = AxisOp::Rm(0), w_dir);
        outer_inputs.push(w);
        input_mapping.push(scan::InputMapping::Full);
        body.add_source("W", target.outlet_fact(w)?.clone())?;

        // R: onnx interface: [num_directions, 3*hidden_size, hidden_size]
        // scan interfaces: [3*hidden_size, hidden_size]
        target_wire!(r_dir = array::Slice::new(0, dir, dir + 1), inputs[2]);
        target_wire!(r = AxisOp::Rm(0), r_dir);
        outer_inputs.push(r);
        input_mapping.push(scan::InputMapping::Full);
        body.add_source("R", target.outlet_fact(r)?.clone())?;

        // B: onnx interface: [num_directions, 6*hidden_size]
        if let Some(slot) = self.optional_bias_input {
            target_wire!(b_dir = array::Slice::new(0, dir, dir + 1), inputs[slot]);
            outer_inputs.push(b_dir);
            input_mapping.push(scan::InputMapping::Full);
            let b = body.add_source("b", target.outlet_fact(b_dir)?.clone())?;
            Some(b)
        } else {
            None
        };

        // initial h, optional: onnx: [num_directions, batch_size, hidden_size]
        // scan outer: [batch_size, chunk=1, hidden_size]
        // scan inner: [batch_size, chunk=1, hidden_size]
        // onnx inner: [batch_size, hidden_size]
        let initializer = if let Some(initial_h_input) = self.optional_initial_h_input {
            let mut input = inputs[initial_h_input];
            if self.batch_first {
                target_wire!(h_batch_first = AxisOp::Move(1, 0), input);
                input = h_batch_first;
            };
            target_wire!(h_dir = array::Slice::new(0, dir, dir + 1), input);
            target_wire!(h = AxisOp::Rm(0), h_dir);
            target_wire!(h_chunk_ = AxisOp::Add(0), h);
            target_wire!(h_chunk = AxisOp::Move(1, 0), h_chunk_);
            h_chunk
        } else {
            target.add_const(
                format!("{prefix}.h0"),
                tensor0(0.0f32)
                    .broadcast_scalar_to_shape(&[
                        b_size.to_usize().unwrap(),
                        1,
                        h_size.to_usize().unwrap(),
                    ])?
                    .into_arc_tensor(),
            )?
        };
        outer_inputs.push(initializer);
        input_mapping.push(scan::InputMapping::State);

        let h_source = body.add_source(
            "h_source",
            x_fact.datum_type.fact(&[b_size.clone(), 1.to_dim(), h_size.clone()]),
        )?;
        wire!(Ht_1 = AxisOp::Rm(1), h_source);

        if self.body.have_extra_c_state() {
            let initializer = if let Some(initial_c_input) = self.optional_initial_c_input {
                let mut input = inputs[initial_c_input];
                if self.batch_first {
                    target_wire!(c_batch_first = AxisOp::Move(1, 0), input);
                    input = c_batch_first;
                };
                target_wire!(c_dir = array::Slice::new(0, dir, dir + 1), input);
                target_wire!(c = AxisOp::Rm(0), c_dir);
                target_wire!(c_chunk_ = AxisOp::Add(0), c);
                target_wire!(c_chunk = AxisOp::Move(1, 0), c_chunk_);
                c_chunk
            } else {
                target.add_const(
                    format!("{prefix}.c0"),
                    tensor0(0.0f32)
                        .broadcast_scalar_to_shape(&[
                            b_size.to_usize().unwrap(),
                            1,
                            h_size.to_usize().unwrap(),
                        ])?
                        .into_arc_tensor(),
                )?
            };
            outer_inputs.push(initializer);
            input_mapping.push(scan::InputMapping::State);
            let c_source = body.add_source(
                "c_source",
                x_fact.datum_type.fact(&[b_size.clone(), 1.to_dim(), h_size.clone()]),
            )?;
            wire!(Ct_1 = AxisOp::Rm(1), c_source);
        }

        // P: onnx [num_directions, 3*hidde_size]
        if let Some(slot) = self.optional_p_input {
            target_wire!(p = array::Slice::new(0, dir, dir + 1), inputs[slot]);
            outer_inputs.push(p);
            input_mapping.push(scan::InputMapping::Full);
            body.add_source("peepholes", target.outlet_fact(p)?.clone())?;
        };

        self.body.wire_body(prefix, &mut body).context("Wiring body")?;

        let mut output_mapping = vec![scan::OutputMapping {
            state: true,
            full_dim_hint: None,
            last_value_slot: self.optional_y_h_output,
            scan: self.optional_y_output.map(|slot| (slot, ScanInfo { axis: 1, chunk })),
        }];
        if self.body.have_extra_c_state() {
            output_mapping.push(scan::OutputMapping {
                state: true,
                full_dim_hint: None,
                last_value_slot: self.optional_y_c_output,
                scan: None,
            });
        }

        let scan_outputs = target.wire_node(
            prefix,
            tract_core::ops::scan::Scan::new(body, input_mapping, output_mapping, 0)?,
            &outer_inputs,
        )?;

        let mut result = tvec!();
        if let Some(slot) = self.optional_y_output {
            // scan: [batch_size, seq_len, hidden_size]
            if self.batch_first {
                // onnx: Y.shape = [batch_size, seq_length, num_directions, hidden_size]
                target_wire!(y = AxisOp::Add(2), scan_outputs[slot]);
                result.push(y);
            } else {
                // onnx: Y.shape = [seq_length, num_directions, batch_size, hidden_size]
                target_wire!(y_batch_middle = AxisOp::Move(1, 0), scan_outputs[slot]);
                target_wire!(y = AxisOp::Add(1), y_batch_middle);
                result.push(y);
            }
        }
        if let Some(slot) = self.optional_y_h_output {
            if self.batch_first {
                result.push(scan_outputs[slot]);
            } else {
                target_wire!(y_h_batch_middle = AxisOp::Move(1, 0), scan_outputs[slot]);
                result.push(y_h_batch_middle);
            }
        }
        if let Some(slot) = self.optional_y_c_output {
            if self.batch_first {
                result.push(scan_outputs[slot]);
            } else {
                target_wire!(y_c_batch_middle = AxisOp::Move(1, 0), scan_outputs[slot]);
                result.push(y_c_batch_middle);
            }
        }

        Ok(result)
    }
}

impl Expansion for CommonRec {
    fn name(&self) -> StaticName {
        self.body.name().into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("batch_first: {:?}", self.batch_first)])
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(self.optional_y_output.is_some() as usize
            + self.optional_y_h_output.is_some() as usize
            + self.optional_y_c_output.is_some() as usize)
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> TractResult<()> {
        let input_count = 3
            + self.optional_bias_input.is_some() as usize
            + self.optional_sequence_lens_input.is_some() as usize
            + self.optional_initial_h_input.is_some() as usize
            + self.optional_initial_c_input.is_some() as usize
            + self.optional_p_input.is_some() as usize;
        check_input_arity(inputs, input_count)?;
        let output_count = self.optional_y_output.is_some() as usize
            + self.optional_y_h_output.is_some() as usize
            + self.optional_y_c_output.is_some() as usize;
        check_output_arity(outputs, output_count)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&inputs[0].datum_type, &inputs[2].datum_type)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, 3)?;
        s.equals(&inputs[1].rank, 3)?;
        s.equals(&inputs[2].rank, 3)?;

        /* If 0
         *      X.shape = [seq_length, batch_size, input_size],
         *      Y.shape = [seq_length, num_directions, batch_size, hidden_size],
         *      initial_h.shape = Y_h.shape = [num_directions, batch_size, hidden_size].
         *  If 1,
         *      X.shape = [batch_size, seq_length, input_size],
         *      Y.shape = [batch_size, seq_length, num_directions, hidden_size],
         *      initial_h.shape = Y_h.shape = [batch_size, num_directions, hidden_size].
         */

        let b = if self.batch_first { 0 } else { 1 };
        let b_in_y = if self.batch_first { 0 } else { 2 };
        let seq_len = if self.batch_first { 1 } else { 0 };
        let dirs = if self.batch_first { 1 } else { 0 };
        let dirs_in_y = if self.batch_first { 2 } else { 1 };

        let (w_mul, b_mul) = self.body.w_b_multipliers();

        s.equals(&inputs[1].shape[0], &inputs[2].shape[0])?; // num_directions
        s.equals(&inputs[1].shape[1], (w_mul as i64) * inputs[2].shape[2].bex())?; // hidden_size
        s.equals(&inputs[2].shape[1], (w_mul as i64) * inputs[2].shape[2].bex())?; // hidden_size
        if let Some(bias) = self.optional_bias_input {
            s.equals(&inputs[bias].datum_type, &inputs[0].datum_type)?;
            s.equals(&inputs[bias].rank, 2)?;
            s.equals(&inputs[bias].shape[0], &inputs[2].shape[0])?; // num_directions
            s.equals(&inputs[bias].shape[1], (b_mul as i64) * inputs[2].shape[2].bex())?;
            // 6 * hidden_size
        }
        if let Some(seq_len) = self.optional_sequence_lens_input {
            s.equals(&inputs[seq_len].rank, 1)?;
            s.equals(&inputs[seq_len].shape[0], &inputs[0].shape[b])?; // batch_size
        }
        if let Some(initial_h) = self.optional_initial_h_input {
            s.equals(&inputs[initial_h].datum_type, &inputs[0].datum_type)?;
            s.equals(&inputs[initial_h].rank, 3)?;
            s.equals(&inputs[initial_h].shape[dirs], &inputs[1].shape[0])?; // num_directions
            s.equals(&inputs[initial_h].shape[b], &inputs[0].shape[b])?; // batch_size
            s.equals(&inputs[initial_h].shape[2], &inputs[2].shape[2])?; // hidden_size
        }
        if let Some(initial_c) = self.optional_initial_c_input {
            s.equals(&inputs[initial_c].datum_type, &inputs[0].datum_type)?;
            s.equals(&inputs[initial_c].rank, 3)?;
            s.equals(&inputs[initial_c].shape[dirs], &inputs[1].shape[0])?; // num_directions
            s.equals(&inputs[initial_c].shape[b], &inputs[0].shape[b])?; // batch_size
            s.equals(&inputs[initial_c].shape[2], &inputs[2].shape[2])?; // hidden_size
        }
        if let Some(p) = self.optional_p_input {
            s.equals(&inputs[p].datum_type, &inputs[0].datum_type)?;
            s.equals(&inputs[p].rank, 2)?;
            s.equals(&inputs[p].shape[0], &inputs[1].shape[0])?; // num_directions
            s.equals(&inputs[p].shape[1], 3 * inputs[2].shape[2].bex())?; // hidden_size
        }
        if let Some(y) = self.optional_y_output {
            s.equals(&outputs[y].datum_type, &inputs[0].datum_type)?;
            s.equals(&outputs[y].rank, 4)?;
            s.equals(&outputs[y].shape[seq_len], &inputs[0].shape[seq_len])?; // seq_lenght
            s.equals(&outputs[y].shape[dirs_in_y], &inputs[1].shape[0])?; // num_directions
            s.equals(&outputs[y].shape[b_in_y], &inputs[0].shape[b])?; // batch_size
            s.equals(&outputs[y].shape[3], &inputs[2].shape[2])?; // hidden_size
        }
        if let Some(y_h) = self.optional_y_h_output {
            s.equals(&outputs[y_h].datum_type, &inputs[0].datum_type)?;
            s.equals(&outputs[y_h].rank, 3)?;
            s.equals(&outputs[y_h].shape[dirs], &inputs[1].shape[0])?; // num_directions
            s.equals(&outputs[y_h].shape[b], &inputs[0].shape[b])?; // batch_size
            s.equals(&outputs[y_h].shape[2], &inputs[2].shape[2])?; // hidden_size
        }
        if let Some(y_c) = self.optional_y_c_output {
            s.equals(&outputs[y_c].datum_type, &inputs[0].datum_type)?;
            s.equals(&outputs[y_c].rank, 3)?;
            s.equals(&outputs[y_c].shape[dirs], &inputs[1].shape[0])?; // num_directions
            s.equals(&outputs[y_c].shape[b], &inputs[0].shape[b])?; // batch_size
            s.equals(&outputs[y_c].shape[2], &inputs[2].shape[2])?; // hidden_size
        }
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_hir::tract_core::ops::array::TypedConcat;
        let fore = self.wire_one_side(prefix, target, inputs, 0)?;
        let w_fact = target.outlet_fact(inputs[1])?;
        if w_fact.shape[0] == 2.into() {
            let back = self.wire_one_side(&format!("{prefix}.back"), target, inputs, 1)?;
            let mut outputs = tvec!(0.into(); self.nboutputs()?);
            if let Some(ix) = self.optional_y_output {
                outputs[ix] = target.wire_node(
                    format!("{prefix}.merge_y_output"),
                    TypedConcat::new(1),
                    &[fore[ix], back[ix]],
                )?[0];
            }
            if let Some(ix) = self.optional_y_h_output {
                outputs[ix] = target.wire_node(
                    format!("{prefix}.merge_y_h_output"),
                    TypedConcat::new(0),
                    &[fore[ix], back[ix]],
                )?[0];
            }
            if let Some(ix) = self.optional_y_c_output {
                outputs[ix] = target.wire_node(
                    format!("{prefix}.merge_y_c_output"),
                    TypedConcat::new(0),
                    &[fore[ix], back[ix]],
                )?[0];
            }
            Ok(outputs)
        } else {
            Ok(fore)
        }
    }
}


================================================
FILE: onnx/src/ops/rec/gru.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::tract_core::ops::einsum::EinSum;

use super::common::CommonRec;
use super::common::WireBody;

pub fn gru(
    _ctx: &ParsingContext,
    pb: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let gru = GRU {
        f: Box::new(ops::nn::sigmoid()),
        g: Box::new(ops::math::tanh()),
        linear_before_reset: pb.get_attr("linear_before_reset").unwrap_or(false),
    };
    let common = CommonRec::from_node_and_options(pb, 3, 0, Box::new(gru))?;

    Ok((expand(common), vec![]))
}

#[derive(Debug, Clone)]
pub struct GRU {
    pub f: Box<dyn TypedOp>,
    pub g: Box<dyn TypedOp>,
    pub linear_before_reset: bool,
}

impl WireBody for GRU {
    fn name(&self) -> &'static str {
        "GRU"
    }

    fn w_b_multipliers(&self) -> (usize, usize) {
        (3, 6)
    }

    fn have_extra_c_state(&self) -> bool {
        false
    }

    #[allow(non_snake_case)]
    fn wire_body(&self, prefix: &str, body: &mut TypedModel) -> TractResult<()> {
        use tract_hir::ops::{array, math};
        macro_rules! wire {
            ($name: ident = $op: expr, $($param: expr),*) => {
                let $name = body.wire_node(
                    format!("{}.{}", prefix, stringify!($name)),
                    $op, [$($param),*].as_ref())?[0];
            }
        }

        let Xt: OutletId = body.node_by_name("Xt").unwrap().id.into();
        let W: OutletId = body.node_by_name("W").unwrap().id.into();
        let R: OutletId = body.node_by_name("R").unwrap().id.into();
        let Ht_1: OutletId = body.node_by_name("Ht_1").unwrap().id.into();
        let b: Option<OutletId> = body.node_by_name("b").ok().map(|n| n.id.into());

        let h_size = body.outlet_fact(R)?.shape[1].clone();

        wire!(Rz = array::Slice::new(0, 0.to_dim() * &h_size, 1.to_dim() * &h_size), R);
        wire!(Rr = array::Slice::new(0, 1.to_dim() * &h_size, 2.to_dim() * &h_size), R);
        wire!(Rh = array::Slice::new(0, 2.to_dim() * &h_size, 3.to_dim() * &h_size), R);

        wire!(Wz = array::Slice::new(0, 0.to_dim() * &h_size, 1.to_dim() * &h_size), W);
        wire!(Wr = array::Slice::new(0, 1.to_dim() * &h_size, 2.to_dim() * &h_size), W);
        wire!(Wh = array::Slice::new(0, 2.to_dim() * &h_size, 3.to_dim() * &h_size), W);

        let dt = body.outlet_fact(Xt)?.datum_type;
        let matmul_t = EinSum::new("mk,nk->mn".parse()?, dt);

        // zt = f(Xt*(Wz^T) + Ht-1*(Rz^T) + Wbz + Rbz)
        wire!(Xt_WzT = matmul_t.clone(), Xt, Wz);
        wire!(Ht_1_RzT = matmul_t.clone(), Ht_1, Rz);
        wire!(zt0 = math::add(), Xt_WzT, Ht_1_RzT);
        let mut zt0 = zt0;
        if let Some(b) = b {
            wire!(Wbz = array::Slice::new(1, 0.to_dim() * &h_size, 1.to_dim() * &h_size), b);
            wire!(Rbz = array::Slice::new(1, 3.to_dim() * &h_size, 4.to_dim() * &h_size), b);
            wire!(Wbz_Rbz = math::add(), Wbz, Rbz);
            wire!(zt0_biased = math::add(), zt0, Wbz_Rbz);
            zt0 = zt0_biased
        };
        wire!(zt = self.f.clone(), zt0);

        // rt = f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr)
        wire!(Xt_WrT = matmul_t.clone(), Xt, Wr);
        wire!(Ht_1_RrT = matmul_t.clone(), Ht_1, Rr);
        wire!(rt0 = math::add(), Xt_WrT, Ht_1_RrT);
        let mut rt0 = rt0;
        if let Some(b) = b {
            wire!(Wbr = array::Slice::new(1, 1.to_dim() * &h_size, 2.to_dim() * &h_size), b);
            wire!(Rbr = array::Slice::new(1, 4.to_dim() * &h_size, 5.to_dim() * &h_size), b);
            wire!(Wbr_Rbr = math::add(), Wbr, Rbr);
            wire!(rt0_biased = math::add(), rt0, Wbr_Rbr);
            rt0 = rt0_biased
        };
        wire!(rt = self.f.clone(), rt0);

        // ht = g(Xt*(Wh^T) + (rt (.) Ht-1)*(Rh^T) + Rbh + Wbh) # default, when linear_before_reset = 0
        // ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh) # when linear_before_reset != 0
        wire!(Xt_WhT = matmul_t.clone(), Xt, Wh);
        let rt_Ht_1_RhT_Rbh = if self.linear_before_reset {
            // rt (.) (Ht-1*(Rh^T) + Rbh)
            wire!(Ht_1_RhT = matmul_t, Ht_1, Rh);
            let Ht_1_RhT_Rbh = if let Some(b) = b {
                wire!(Rbh = array::Slice::new(1, 5.to_dim() * &h_size, 6.to_dim() * &h_size), b);
                wire!(Ht_1_RhT_Rbh = math::add(), Ht_1_RhT, Rbh);
                Ht_1_RhT_Rbh
            } else {
                Ht_1_RhT
            };
            wire!(rt_Ht_1_RhT_Rbh = math::mul(), rt, Ht_1_RhT_Rbh);
            rt_Ht_1_RhT_Rbh
        } else {
            // (rt (.) Ht-1)*(Rh^T) + Rbh
            wire!(rt_Ht_1 = math::mul(), rt, Ht_1);
            wire!(rt_Ht_1_RhT = matmul_t, rt_Ht_1, Rh);
            if let Some(b) = b {
                wire!(Rbh = array::Slice::new(1, 5.to_dim() * &h_size, 6.to_dim() * &h_size), b);
                wire!(rt_Ht_1_RhT_Rbh = math::add(), rt_Ht_1_RhT, Rbh);
                rt_Ht_1_RhT_Rbh
            } else {
                rt_Ht_1_RhT
            }
        };
        wire!(ht0 = math::add(), Xt_WhT, rt_Ht_1_RhT_Rbh);
        let mut ht0 = ht0;
        if let Some(b) = b {
            wire!(Wbh = array::Slice::new(1, 2.to_dim() * &h_size, 3.to_dim() * &h_size), b);
            wire!(ht0_biased = math::add(), ht0, Wbh);
            ht0 = ht0_biased
        }
        wire!(ht = self.g.clone(), ht0);

        // Ht = (1 - zt) (.) ht + zt (.) Ht-1
        let one: OutletId =
            body.add_const("one", tensor2(&[[1f32]]).cast_to_dt(dt)?.into_owned())?;
        wire!(one_sub_zt = math::sub(), one, zt);
        wire!(one_sub_zt_ht = math::mul(), one_sub_zt, ht);
        wire!(zt_Ht_1 = math::mul(), zt, Ht_1);
        wire!(Ht = math::add(), one_sub_zt_ht, zt_Ht_1);

        wire!(y_h = AxisOp::Add(1), Ht);
        body.select_output_outlets(&[y_h])?;
        Ok(())
    }
}


================================================
FILE: onnx/src/ops/rec/lstm.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::tract_core::ops::einsum::EinSum;

use super::common::CommonRec;
use super::common::WireBody;

pub fn lstm(
    _ctx: &ParsingContext,
    pb: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let lstm = LSTM {
        f: Box::new(ops::nn::sigmoid()),
        g: Box::new(ops::math::tanh()),
        h: Box::new(ops::math::tanh()),
    };
    let common = CommonRec::from_node_and_options(pb, 3, 0, Box::new(lstm))?;
    Ok((expand(common), vec![]))
}

#[derive(Debug, Clone)]
pub struct LSTM {
    pub f: Box<dyn TypedOp>,
    pub g: Box<dyn TypedOp>,
    pub h: Box<dyn TypedOp>,
}

impl WireBody for LSTM {
    fn name(&self) -> &'static str {
        "LSTM"
    }

    fn w_b_multipliers(&self) -> (usize, usize) {
        (4, 8)
    }

    fn have_extra_c_state(&self) -> bool {
        true
    }

    #[allow(non_snake_case)]
    fn wire_body(&self, prefix: &str, body: &mut TypedModel) -> TractResult<()> {
        use tract_hir::ops::{array, math};
        macro_rules! wire {
            ($name: ident = $op: expr, $($param: expr),*) => {
                let $name = body.wire_node(
                    format!("{}.{}", prefix, stringify!($name)),
                    $op, [$($param),*].as_ref())?[0];
            }
        }

        let Xt: OutletId = body.node_by_name("Xt").unwrap().id.into();
        let W: OutletId = body.node_by_name("W").unwrap().id.into();
        let R: OutletId = body.node_by_name("R").unwrap().id.into();
        let Ht_1: OutletId = body.node_by_name("Ht_1").unwrap().id.into();
        let Ct_1: OutletId = body.node_by_name("Ct_1").unwrap().id.into();
        let b: Option<OutletId> = body.node_by_name("b").ok().map(|n| n.id.into());
        let peepholes: Option<OutletId> = body.node_by_name("peepholes").ok().map(|n| n.id.into());

        let h_size = body.outlet_fact(R)?.shape[1].clone();
        let dt = body.outlet_fact(R)?.datum_type;

        wire!(Wi = array::Slice::new(0, 0.to_dim() * &h_size, 1.to_dim() * &h_size), W);
        wire!(Wo = array::Slice::new(0, 1.to_dim() * &h_size, 2.to_dim() * &h_size), W);
        wire!(Wf = array::Slice::new(0, 2.to_dim() * &h_size, 3.to_dim() * &h_size), W);
        wire!(Wc = array::Slice::new(0, 3.to_dim() * &h_size, 4.to_dim() * &h_size), W);

        wire!(Ri = array::Slice::new(0, 0.to_dim() * &h_size, 1.to_dim() * &h_size), R);
        wire!(Ro = array::Slice::new(0, 1.to_dim() * &h_size, 2.to_dim() * &h_size), R);
        wire!(Rf = array::Slice::new(0, 2.to_dim() * &h_size, 3.to_dim() * &h_size), R);
        wire!(Rc = array::Slice::new(0, 3.to_dim() * &h_size, 4.to_dim() * &h_size), R);

        let biases = if let Some(b) = b {
            wire!(Wbi = array::Slice::new(1, 0.to_dim() * &h_size, 1.to_dim() * &h_size), b);
            wire!(Wbo = array::Slice::new(1, 1.to_dim() * &h_size, 2.to_dim() * &h_size), b);
            wire!(Wbf = array::Slice::new(1, 2.to_dim() * &h_size, 3.to_dim() * &h_size), b);
            wire!(Wbc = array::Slice::new(1, 3.to_dim() * &h_size, 4.to_dim() * &h_size), b);

            wire!(Rbi = array::Slice::new(1, 4.to_dim() * &h_size, 5.to_dim() * &h_size), b);
            wire!(Rbo = array::Slice::new(1, 5.to_dim() * &h_size, 6.to_dim() * &h_size), b);
            wire!(Rbf = array::Slice::new(1, 6.to_dim() * &h_size, 7.to_dim() * &h_size), b);
            wire!(Rbc = array::Slice::new(1, 7.to_dim() * &h_size, 8.to_dim() * &h_size), b);

            wire!(bi = math::add(), Wbi, Rbi);
            wire!(bo = math::add(), Wbo, Rbo);
            wire!(bf = math::add(), Wbf, Rbf);
            wire!(bc = math::add(), Wbc, Rbc);

            Some((bi, bo, bf, bc))
        } else {
            None
        };

        let peepholes = if let Some(p) = peepholes {
            wire!(pi = array::Slice::new(1, 0.to_dim() * &h_size, 1.to_dim() * &h_size), p);
            wire!(po = array::Slice::new(1, 1.to_dim() * &h_size, 2.to_dim() * &h_size), p);
            wire!(pf = array::Slice::new(1, 2.to_dim() * &h_size, 3.to_dim() * &h_size), p);
            Some((pi, po, pf))
        } else {
            None
        };

        let matmul_t = EinSum::new("mk,nk->mn".parse()?, dt);

        // it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
        wire!(Xt_WiT = matmul_t.clone(), Xt, Wi);
        wire!(Ht_1_RiT = matmul_t.clone(), Ht_1, Ri);
        wire!(it0 = math::add(), Xt_WiT, Ht_1_RiT);
        let mut it0 = it0;
        if let Some(biases) = biases {
            wire!(it_bias = math::add(), it0, biases.0);
            it0 = it_bias;
        };
        if let Some(peephole) = peepholes {
            wire!(Pi_Ct_1 = math::mul(), peephole.0, Ct_1);
            wire!(it_peep = math::add(), Pi_Ct_1, it0);
            it0 = it_peep;
        }
        wire!(it = self.f.clone(), it0);

        // ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
        wire!(Xt_WfT = matmul_t.clone(), Xt, Wf);
        wire!(Ht_1_RfT = matmul_t.clone(), Ht_1, Rf);
        wire!(ft0 = math::add(), Xt_WfT, Ht_1_RfT);
        let mut ft0 = ft0;
        if let Some(biases) = biases {
            wire!(ft_bias = math::add(), ft0, biases.2);
            ft0 = ft_bias;
        };
        if let Some(peephole) = peepholes {
            wire!(Pf_Ct_1 = math::mul(), peephole.2, Ct_1);
            wire!(ft_peep = math::add(), Pf_Ct_1, ft0);
            ft0 = ft_peep;
        }
        wire!(ft = self.f.clone(), ft0);

        // ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
        wire!(Xt_WcT = matmul_t.clone(), Xt, Wc);
        wire!(Ht_1_RcT = matmul_t.clone(), Ht_1, Rc);
        wire!(ct0 = math::add(), Xt_WcT, Ht_1_RcT);
        let mut ct0 = ct0;
        if let Some(biases) = biases {
            wire!(ct_bias = math::add(), ct0, biases.3);
            ct0 = ct_bias
        };
        wire!(ct = self.g.clone(), ct0);

        // Ct = ft (.) Ct-1 + it (.) ct
        wire!(ft_Ct_1 = math::mul(), ft, Ct_1);
        wire!(it_ct = math::mul(), it, ct);
        wire!(Ct = math::add(), ft_Ct_1, it_ct);

        // ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
        wire!(Xt_WoT = matmul_t.clone(), Xt, Wo);
        wire!(Ht_1_RoT = matmul_t, Ht_1, Ro);
        wire!(ot0 = math::add(), Xt_WoT, Ht_1_RoT);
        let mut ot0 = ot0;
        if let Some(biases) = biases {
            wire!(ot_bias = math::add(), ot0, biases.1);
            ot0 = ot_bias
        };
        if let Some(peephole) = peepholes {
            wire!(Po_Ct = math::mul(), peephole.1, Ct);
            wire!(ot_peep = math::add(), Po_Ct, ot0);
            ot0 = ot_peep;
        }
        wire!(ot = self.f.clone(), ot0);

        // Ht = ot (.) h(Ct)
        wire!(h_Ct = self.h.clone(), Ct);
        wire!(Ht = math::mul(), ot, h_Ct);

        // onnx inner interface: [batch_size, input_size]
        // add sequence axis (chunk == 1)
        wire!(Ht_fixed = AxisOp::Add(1), Ht);
        wire!(Ct_fixed = AxisOp::Add(1), Ct);
        body.select_output_outlets(&[Ht_fixed, Ct_fixed])?;

        Ok(())
    }
}


================================================
FILE: onnx/src/ops/rec/rnn.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::tract_core::ops::einsum::EinSum;

use super::common::CommonRec;
use super::common::WireBody;

pub fn rnn(
    _ctx: &ParsingContext,
    pb: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let rnn = RNN { fore: Box::new(ops::math::tanh()), back: Box::new(ops::math::tanh()) };
    let common = CommonRec::from_node_and_options(pb, 3, 0, Box::new(rnn))?;
    Ok((expand(common), vec![]))
}

#[derive(Debug, Clone, new)]
pub struct RNN {
    pub fore: Box<dyn TypedOp>,
    pub back: Box<dyn TypedOp>,
}

impl WireBody for RNN {
    fn name(&self) -> &'static str {
        "RNN"
    }

    fn w_b_multipliers(&self) -> (usize, usize) {
        (1, 2)
    }

    fn have_extra_c_state(&self) -> bool {
        false
    }

    #[allow(non_snake_case)]
    fn wire_body(&self, prefix: &str, body: &mut TypedModel) -> TractResult<()> {
        use tract_hir::ops::{array, math};
        macro_rules! wire {
            ($name: ident = $op: expr, $($param: expr),*) => {
                let $name = body.wire_node(
                    format!("{}.{}", prefix, stringify!($name)),
                    $op, [$($param),*].as_ref())?[0];
            }
        }

        let Xt: OutletId = body.node_by_name("Xt").unwrap().id.into();
        let W: OutletId = body.node_by_name("W").unwrap().id.into();
        let R: OutletId = body.node_by_name("R").unwrap().id.into();
        let Ht_1: OutletId = body.node_by_name("Ht_1").unwrap().id.into();
        let b: Option<OutletId> = body.node_by_name("b").ok().map(|n| n.id.into());

        let h_size = body.outlet_fact(R)?.shape[1].clone();
        let dt = body.outlet_fact(R)?.datum_type;

        let bias = if let Some(b) = b {
            wire!(Wbi = array::Slice::new(1, 0.to_dim() * &h_size, 1.to_dim() * &h_size), b);
            wire!(Rbi = array::Slice::new(1, 1.to_dim() * &h_size, 2.to_dim() * &h_size), b);
            wire!(bi = math::add(), Wbi, Rbi);
            Some(bi)
        } else {
            None
        };

        let matmul_t = EinSum::new("mk,nk->mn".parse()?, dt);

        // Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)
        wire!(Xt_WiT = matmul_t.clone(), Xt, W);
        wire!(Ht_1_RiT = matmul_t, Ht_1, R);

        wire!(ht0 = math::add(), Xt_WiT, Ht_1_RiT);
        let mut ht0 = ht0;
        if let Some(bias) = bias {
            wire!(ht_bias = math::add(), ht0, bias);
            ht0 = ht_bias;
        }
        wire!(Ht = self.fore.clone(), ht0);

        wire!(y_h = AxisOp::Add(1), Ht);
        body.select_output_outlets(&[y_h])?;
        Ok(())
    }
}


================================================
FILE: onnx/src/ops/rec/scan.rs
================================================
use crate::model::{ParseResult, ParsingContext};
use crate::pb::*;
use crate::tract_core::ops::scan::ScanInfo;
use tract_hir::internal::*;

use tract_hir::ops;

pub fn scan(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let num_scan_inputs = node.get_attr("num_scan_inputs")?;
    let graph: &GraphProto = node.get_attr("body")?;
    let ParseResult { mut model, unresolved_inputs, .. } = ctx.parse_graph(graph)?;
    let scan_input_axes =
        node.get_attr_opt_vec("scan_input_axes")?.unwrap_or_else(|| vec![0; num_scan_inputs]);
    let closure_inputs = unresolved_inputs.len();
    let num_hidden_state = model.input_outlets()?.len() - closure_inputs - num_scan_inputs;
    let num_scan_outputs = model.output_outlets()?.len() - num_hidden_state;
    let scan_output_axes =
        node.get_attr_opt_vec("scan_output_axes")?.unwrap_or_else(|| vec![0; num_scan_outputs]);

    let mut mapped_inputs = vec![];
    let mut mapped_outputs = vec![];
    for ix in 0..num_hidden_state {
        mapped_inputs.push(ops::scan::InputMapping::State);
        mapped_outputs.push(ops::scan::OutputMapping {
            state: true,
            last_value_slot: Some(ix),
            scan: None,
            full_dim_hint: None,
        });
    }

    for (ix, ax) in scan_input_axes.iter().enumerate() {
        let op = expand(ops::array::RmDims::new(vec![*ax]));
        let outlet = model.input_outlets()?[num_hidden_state + ix];
        InferenceModelPatch::intercept(
            &model,
            outlet,
            format!("{}.input-{}.adjust-dim", node.name, ix),
            op,
            model.outlet_fact(outlet)?.clone(),
        )?
        .apply(&mut model)?;
        model.set_outlet_fact(outlet, InferenceFact::default())?;
        mapped_inputs
            .push(ops::scan::InputMapping::Scan(ScanInfo { axis: *ax as usize, chunk: 1 }));
    }

    for _input in unresolved_inputs.iter() {
        mapped_inputs.push(ops::scan::InputMapping::Full);
    }

    for (ix, ax) in scan_output_axes.iter().enumerate() {
        let op = ops::array::AddDims::new(vec![*ax]);
        let outlet = model.output_outlets()?[num_hidden_state + ix];
        InferenceModelPatch::intercept(
            &model,
            outlet,
            format!("{}.output-{}-adjust-dim", node.name, ix),
            expand(op),
            InferenceFact::default(),
        )?
        .apply(&mut model)?;
        mapped_outputs.push(ops::scan::OutputMapping {
            state: false,
            scan: Some((ix + num_hidden_state, ScanInfo { axis: *ax as usize, chunk: 1 })),
            full_dim_hint: None,
            last_value_slot: None,
        });
    }

    Ok((
        Box::new(ops::scan::InferenceScan::new(
            model,
            mapped_inputs,
            mapped_outputs,
            true,
            GenericFactoid::default(),
        )),
        unresolved_inputs,
    ))
}


================================================
FILE: onnx/src/ops/rec.rs
================================================
use crate::model::OnnxOpRegister;

pub mod common;
pub mod gru;
pub mod lstm;
pub mod rnn;
pub mod scan;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("GRU", gru::gru);
    reg.insert("LSTM", lstm::lstm);
    reg.insert("RNN", rnn::rnn);
    reg.insert("Scan", scan::scan);
}


================================================
FILE: onnx/src/ops/resize.rs
================================================
use crate::model::ParsingContext;
use crate::pb::*;
use tract_hir::internal::*;
use tract_nnef::tract_num_traits::Zero as _;
use tract_onnx_opl::resize::{CoordTransformer, Interpolator, Nearest, Resize};

pub fn resize(
    ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let op = match ctx.onnx_operator_set_version {
        10 => resize_10(node)?,
        11..=12 => resize_11(node)?,
        13..=17 => resize_13(node)?,
        18.. => resize_18(node)?,
        v => bail!("Unsupported operator set for Resize operator ({v})"),
    };
    Ok((expand(ResizeInference(op)), vec![]))
}

fn resize_10(node: &NodeProto) -> TractResult<Resize> {
    Ok(Resize {
        axes: None,
        optional_roi_input: None,
        optional_scales_input: Some(1),
        optional_sizes_input: None,
        coord_transformer: coord_transformer_from_node(node)?,
        interpolator: interpolator_from_node(node)?,
        nearest: nearest_from_node(node)?,
    })
}

fn resize_11(node: &NodeProto) -> TractResult<Resize> {
    let mut options = crate::model::optional_inputs(node).skip(3);
    Ok(Resize {
        axes: None,
        optional_roi_input: Some(1),
        optional_scales_input: Some(2),
        optional_sizes_input: options.next().unwrap(),
        coord_transformer: coord_transformer_from_node(node)?,
        interpolator: interpolator_from_node(node)?,
        nearest: nearest_from_node(node)?,
    })
}

fn resize_13(node: &NodeProto) -> TractResult<Resize> {
    let mut options = crate::model::optional_inputs(node).skip(1);
    Ok(Resize {
        axes: None,
        optional_roi_input: options.next().unwrap(),
        optional_scales_input: options.next().unwrap(),
        optional_sizes_input: options.next().unwrap(),
        coord_transformer: coord_transformer_from_node(node)?,
        interpolator: interpolator_from_node(node)?,
        nearest: nearest_from_node(node)?,
    })
}

fn resize_18(node: &NodeProto) -> TractResult<Resize> {
    let mut options = crate::model::optional_inputs(node).skip(1);
    Ok(Resize {
        axes: node.get_attr_opt_vec("axes")?,
        optional_roi_input: options.next().unwrap(),
        optional_scales_input: options.next().unwrap(),
        optional_sizes_input: options.next().unwrap(),
        coord_transformer: coord_transformer_from_node(node)?,
        interpolator: interpolator_from_node(node)?,
        nearest: nearest_from_node(node)?,
    })
}

fn coord_transformer_from_node(node: &NodeProto) -> TractResult<CoordTransformer> {
    CoordTransformer::parse(
        node.get_attr_opt("coordinate_transformation_mode")?.unwrap_or("half_pixel"),
    )
}

fn interpolator_from_node(node: &NodeProto) -> TractResult<Interpolator> {
    Interpolator::parse(node.get_attr_opt("mode")?.unwrap_or("nearest"))
}

fn nearest_from_node(node: &NodeProto) -> TractResult<Nearest> {
    Nearest::parse(node.get_attr_opt("nearest_mode")?.unwrap_or("round_prefer_floor"))
}

#[derive(Clone, Debug)]
struct ResizeInference(Resize);

impl Expansion for ResizeInference {
    fn name(&self) -> StaticName {
        "Resize".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        let op = &self.0;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].rank, &outputs[0].rank)?;
        if let Some(scales) = op.optional_scales_input {
            s.given(&inputs[scales].shape[0], move |s, len| {
                if len.is_zero() {
                    rules_with_sizes(op, s, inputs, outputs)
                } else {
                    rules_with_scales(op, s, inputs, outputs)
                }
            })
        } else if op.optional_sizes_input.is_some() {
            rules_with_sizes(op, s, inputs, outputs)
        } else {
            todo!()
        }
    }

    fn wire(
        &self,
        name: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        model.wire_node(name, self.0.clone(), inputs)
    }
}

fn rules_with_scales<'r, 'p: 'r, 's: 'r>(
    op: &'s Resize,
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    let scales_input = op.optional_scales_input.unwrap();
    let scales = &inputs[scales_input];
    s.equals(&scales.datum_type, f32::datum_type())?;
    s.equals(&scales.rank, 1)?;
    s.equals(&scales.shape[0], inputs[0].rank.bex().to_dim())?;
    s.given_2(&inputs[0].shape, &inputs[scales_input].value, move |s, input_shape, scales| {
        let output_size = op.compute_output_shape(&input_shape, Some(scales.as_ref()), None)?;
        let rank = input_shape.len();
        for i in 0..rank {
            s.equals(&outputs[0].shape[i], output_size[i].to_dim())?;
        }
        Ok(())
    })
}

fn rules_with_sizes<'r, 'p: 'r, 's: 'r>(
    op: &'s Resize,
    s: &mut Solver<'r>,
    inputs: &'p [TensorProxy],
    outputs: &'p [TensorProxy],
) -> InferenceResult {
    let sizes = &inputs[op.optional_sizes_input.unwrap()];
    s.equals(&sizes.rank, 1)?;
    s.equals(&sizes.shape[0], inputs[0].rank.bex().to_dim())?;
    s.given(&inputs[0].rank, move |s, rank| {
        for i in 0..(rank as usize) {
            s.equals(&outputs[0].shape[i], sizes.value[i].bex().to_dim())?;
        }
        Ok(())
    })
}


================================================
FILE: onnx/src/ops/s2d.rs
================================================
use crate::model::{OnnxOpRegister, ParsingContext};
use crate::pb::NodeProto;
use tract_hir::internal::*;

pub fn register_all_ops(reg: &mut OnnxOpRegister) {
    reg.insert("SpaceToDepth", space_to_depth);
}

pub fn space_to_depth(
    _ctx: &ParsingContext,
    node: &NodeProto,
) -> TractResult<(Box<dyn InferenceOp>, Vec<String>)> {
    let blocksize = node.get_attr_opt("blocksize")?.unwrap_or(2);
    Ok((expand(SpaceToDepth { blocksize }), vec![]))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct SpaceToDepth {
    blocksize: usize,
}

impl SpaceToDepth {
    pub fn compute_shape(&self, shape: &[TDim]) -> TVec<TDim> {
        tvec!(
            shape[0].clone(),
            shape[1].clone() * self.blocksize * self.blocksize,
            shape[2].clone() / self.blocksize,
            shape[3].clone() / self.blocksize,
        )
    }

    pub fn to_axis_ops(&self, shape: &[TDim]) -> TractResult<TVec<AxisOp>> {
        let mut stack: TVec<AxisOp> = tvec!();

        let ishape_from = tvec!(shape[2].clone(), shape[3].clone());
        let ishape_to = tvec!(
            shape[2].clone() / self.blocksize,
            self.blocksize.into(),
            shape[3].clone() / self.blocksize,
            self.blocksize.into(),
        );

        let oshape_from = tvec!(self.blocksize.into(), self.blocksize.into(), shape[1].clone());
        let oshape_to = tvec!(shape[1].clone() * self.blocksize * self.blocksize);

        stack.push(AxisOp::Reshape(2, ishape_from, ishape_to));
        stack.push(AxisOp::Move(3, 1));
        stack.push(AxisOp::Move(5, 2));
        stack.push(AxisOp::Reshape(1, oshape_from, oshape_to));

        Ok(stack)
    }
}

impl Expansion for SpaceToDepth {
    fn name(&self) -> StaticName {
        "SpaceToDepth".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].rank, 4)?;
        s.equals(&outputs[0].rank, 4)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.given(&inputs[0].shape, move |s, ishape| {
            let oshape = self.compute_shape(&ishape);
            s.equals(&outputs[0].shape, ShapeFactoid::from(oshape))
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let ishape = model.outlet_fact(inputs[0])?.shape.to_tvec();
        let iheight = ishape[2].to_usize()?;
        let iwidth = ishape[3].to_usize()?;

        if iheight % self.blocksize != 0 {
            bail!("SpaceToDepth requires input height to be a multiple of blocksize")
        }
        if iwidth % self.blocksize != 0 {
            bail!("SpaceToDepth requires input width to be a multiple of blocksize")
        }

        let mut wire = tvec!(inputs[0]);
        for (ix, op) in self.to_axis_ops(&ishape)?.into_iter().enumerate() {
            wire = model.wire_node(format!("{prefix}.{ix}"), op, &wire)?;
        }
        Ok(wire)
    }
}


================================================
FILE: onnx/src/pb_helpers.rs
================================================
use crate::pb::*;
use attribute_proto::AttributeType;
use tract_hir::internal::*;

use tract_num_traits::{AsPrimitive, Bounded};

use std::fmt::{self, Debug, Display};
use std::str;

use std::convert::TryInto;

pub trait TryCollect<T, E>: Iterator<Item = Result<T, E>> + Sized {
    fn try_collect<B: Default + Extend<T>>(self) -> Result<B, E> {
        let mut out = B::default();
        for item in self {
            out.extend(Some(item?));
        }
        Ok(out)
    }
}

impl<T, E, I> TryCollect<T, E> for I where I: Iterator<Item = Result<T, E>> + Sized {}

pub trait Reason {
    fn reason(&self) -> StaticName;
}

impl Reason for &'static str {
    fn reason(&self) -> StaticName {
        (*self).into()
    }
}

impl<F> Reason for F
where
    F: Fn() -> String,
{
    fn reason(&self) -> StaticName {
        self().into()
    }
}

pub trait OptionExt {
    type Item;

    fn and_try<F, T>(self, f: F) -> TractResult<Option<T>>
    where
        F: Fn(Self::Item) -> TractResult<T>;

    fn and_ok<F, T>(self, f: F) -> TractResult<Option<T>>
    where
        F: Fn(Self::Item) -> T;
}

impl<A> OptionExt for Option<A> {
    type Item = A;

    fn and_try<F, T>(self, f: F) -> TractResult<Option<T>>
    where
        F: Fn(Self::Item) -> TractResult<T>,
    {
        match self {
            Some(attr) => f(attr).map(Some),
            None => Ok(None),
        }
    }

    fn and_ok<F, T>(self, f: F) -> TractResult<Option<T>>
    where
        F: Fn(Self::Item) -> T,
    {
        Ok(self.map(f))
    }
}

impl Display for attribute_proto::AttributeType {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(match self {
            AttributeType::Int => "int",
            AttributeType::Float => "float",
            AttributeType::Tensor => "tensor",
            AttributeType::String => "string",
            AttributeType::Ints => "list of ints",
            AttributeType::Floats => "list of floats",
            AttributeType::Tensors => "list of tensors",
            AttributeType::Strings => "list of strings",
            AttributeType::Graph => "graph",
            AttributeType::Graphs => "graphs",
            _ => "<undefined>",
        })
    }
}

pub trait AttrScalarType<'a>: 'a + Sized {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>>;
}

impl<'a> AttrScalarType<'a> for DatumType {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        i32::get_attr_opt_scalar(node, name)?
            .map(|d| tensor_proto::DataType::try_from(d).unwrap().try_into())
            .transpose()
    }
}

impl<'a> AttrScalarType<'a> for &'a TensorProto {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        Ok(node
            .get_attr_opt_with_type(name, AttributeType::Tensor)?
            .map(|attr| attr.t.as_ref().unwrap()))
    }
}

impl<'a> AttrScalarType<'a> for &'a [u8] {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        Ok(node.get_attr_opt_with_type(name, AttributeType::String)?.map(|attr| &*attr.s))
    }
}

impl<'a> AttrScalarType<'a> for &'a str {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        let bytes: Option<&[u8]> = AttrScalarType::get_attr_opt_scalar(node, name)?;
        bytes.and_try(|b| str::from_utf8(b).map_err(Into::into))
    }
}

impl<'a> AttrScalarType<'a> for String {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        let string: Option<&'a str> = AttrScalarType::get_attr_opt_scalar(node, name)?;
        string.and_ok(Into::into)
    }
}

impl<'a> AttrScalarType<'a> for i64 {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        node.get_attr_opt_with_type(name, AttributeType::Int)?.and_ok(|a| a.i)
    }
}

impl<'a> AttrScalarType<'a> for bool {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        let int: Option<i64> = AttrScalarType::get_attr_opt_scalar(node, name)?;
        int.and_try(|int| {
            node.expect_attr(name, int == 0 || int == 1, "boolean (0 or 1)")?;
            Ok(int == 1)
        })
    }
}

impl<'a> AttrScalarType<'a> for usize {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        let int: Option<i64> = AttrScalarType::get_attr_opt_scalar(node, name)?;
        int.and_try(|int| {
            node.expect_attr(name, int >= 0, "non-negative int")?;
            Ok(int as _)
        })
    }
}

impl<'a> AttrScalarType<'a> for &'a GraphProto {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        node.get_attr_opt_with_type(name, AttributeType::Graph)?.and_ok(|a| a.g.as_ref().unwrap())
    }
}

fn check_int<T>(node: &NodeProto, attr: &str, int: i64, is_list: bool) -> TractResult<T>
where
    T: AsPrimitive<i64> + Bounded + Display,
    i64: AsPrimitive<T>,
{
    let desc = if is_list { "list of ints" } else { "int" };
    node.expect_attr(attr, int <= T::max_value().as_(), || {
        format!("{} <= {}, got {}", desc, T::max_value(), int)
    })?;
    node.expect_attr(attr, int >= T::min_value().as_(), || {
        format!("{} >= {}, got {}", desc, T::min_value(), int)
    })?;
    Ok(int.as_())
}

macro_rules! impl_attr_scalar_type_int {
    ($ty:ident) => {
        impl<'a> AttrScalarType<'a> for $ty {
            fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
                AttrScalarType::get_attr_opt_scalar(node, name)?
                    .and_try(|int| check_int(node, name, int, false))
            }
        }

        impl<'a> AttrTVecType<'a> for $ty {
            fn get_attr_opt_tvec(
                node: &'a NodeProto,
                name: &str,
            ) -> TractResult<Option<TVec<Self>>> {
                AttrTVecType::get_attr_opt_tvec(node, name)?.and_try(|ints| {
                    ints.into_iter().map(|int| check_int(node, name, int, true)).try_collect()
                })
            }
        }
    };
}

impl_attr_scalar_type_int!(i8);
impl_attr_scalar_type_int!(i16);
impl_attr_scalar_type_int!(i32);
impl_attr_scalar_type_int!(isize);

impl<'a> AttrScalarType<'a> for f32 {
    fn get_attr_opt_scalar(node: &'a NodeProto, name: &str) -> TractResult<Option<Self>> {
        node.get_attr_opt_with_type(name, AttributeType::Float)?.and_ok(|x| x.f)
    }
}

pub trait AttrSliceType<'a>: 'a + Sized {
    fn get_attr_opt_slice(node: &'a NodeProto, name: &str) -> TractResult<Option<&'a [Self]>>;
}

impl<'a> AttrSliceType<'a> for Vec<u8> {
    fn get_attr_opt_slice(node: &'a NodeProto, name: &str) -> TractResult<Option<&'a [Self]>> {
        node.get_attr_opt_with_type(name, AttributeType::Strings)?.and_ok(|x| &*x.strings)
    }
}

impl<'a> AttrSliceType<'a> for i64 {
    fn get_attr_opt_slice(node: &'a NodeProto, name: &str) -> TractResult<Option<&'a [Self]>> {
        node.get_attr_opt_with_type(name, AttributeType::Ints)?.and_ok(|a| &*a.ints)
    }
}

impl<'a> AttrSliceType<'a> for f32 {
    fn get_attr_opt_slice(node: &'a NodeProto, name: &str) -> TractResult<Option<&'a [Self]>> {
        node.get_attr_opt_with_type(name, AttributeType::Floats)?.and_ok(|a| &*a.floats)
    }
}

pub trait AttrTVecType<'a>: 'a + Sized {
    fn get_attr_opt_tvec(node: &'a NodeProto, name: &str) -> TractResult<Option<TVec<Self>>>;
}

impl<'a, T> AttrTVecType<'a> for T
where
    T: AttrSliceType<'a> + Clone,
{
    fn get_attr_opt_tvec(node: &'a NodeProto, name: &str) -> TractResult<Option<TVec<Self>>> {
        T::get_attr_opt_slice(node, name)?.and_ok(Into::into)
    }
}

impl<'a> AttrTVecType<'a> for &'a str {
    fn get_attr_opt_tvec(node: &'a NodeProto, name: &str) -> TractResult<Option<TVec<Self>>> {
        <Vec<u8>>::get_attr_opt_slice(node, name)?
            .and_try(|b| b.iter().map(|v| str::from_utf8(v)).try_collect().map_err(Into::into))
    }
}

impl<'a> AttrTVecType<'a> for String {
    fn get_attr_opt_tvec(node: &'a NodeProto, name: &str) -> TractResult<Option<TVec<Self>>> {
        <Vec<u8>>::get_attr_opt_slice(node, name)?.and_try(|b| {
            b.iter().map(|v| str::from_utf8(v).map(Into::into)).try_collect().map_err(Into::into)
        })
    }
}

impl<'a> AttrTVecType<'a> for bool {
    fn get_attr_opt_tvec(node: &'a NodeProto, name: &str) -> TractResult<Option<TVec<Self>>> {
        let ints: Option<&[i64]> = AttrSliceType::get_attr_opt_slice(node, name)?;
        ints.and_try(|ints| {
            for int in ints.iter() {
                node.expect_attr(name, *int == 0 || *int == 1, "list of booleans (0 or 1)")?;
            }
            Ok(ints.iter().map(|&x| x == 1).collect())
        })
    }
}

impl<'a> AttrTVecType<'a> for usize {
    fn get_attr_opt_tvec(node: &'a NodeProto, name: &str) -> TractResult<Option<TVec<Self>>> {
        let ints: Option<&[i64]> = AttrSliceType::get_attr_opt_slice(node, name)?;
        ints.and_try(|ints| {
            for int in ints.iter() {
                node.expect_attr(name, *int >= 0, "list of non-negative ints")?;
            }
            Ok(ints.iter().map(|&x| x as _).collect())
        })
    }
}

impl NodeProto {
    pub fn bail<T>(&self, msg: &str) -> TractResult<T> {
        bail!("Node {} ({}): {}", self.name, self.op_type, msg)
    }

    pub fn bail_attr<T>(&self, attr: &str, msg: &str) -> TractResult<T> {
        bail!("Node {} ({}), attribute '{}': {}", self.name, self.op_type, attr, msg)
    }

    pub fn expect<R: Reason>(&self, cond: bool, what: R) -> TractResult<()> {
        if !cond { self.bail(&format!("expected {}", what.reason())) } else { Ok(()) }
    }

    pub fn expect_attr<R: Reason>(&self, attr: &str, cond: bool, what: R) -> TractResult<()> {
        if !cond { self.bail_attr(attr, &format!("expected {}", what.reason())) } else { Ok(()) }
    }

    pub fn expect_ok_or_else<T, R: Reason>(&self, result: Option<T>, what: R) -> TractResult<T> {
        match result {
            Some(v) => Ok(v),
            None => Err(self.expect(false, what).unwrap_err()),
        }
    }

    fn get_attr_opt_with_type(
        &self,
        name: &str,
        ty: AttributeType,
    ) -> TractResult<Option<&AttributeProto>> {
        let attr = match self.attribute.iter().find(|a| a.name == name) {
            Some(attr) => attr,
            _ => return Ok(None),
        };
        self.expect_attr(name, AttributeType::try_from(attr.r#type).unwrap() == ty, || {
            format!("{}, got {}", ty, attr.r#type)
        })?;
        Ok(Some(attr))
    }

    pub fn get_attr_opt<'a, T>(&'a self, name: &str) -> TractResult<Option<T>>
    where
        T: AttrScalarType<'a>,
    {
        T::get_attr_opt_scalar(self, name)
    }

    pub fn get_attr<'a, T>(&'a self, name: &str) -> TractResult<T>
    where
        T: AttrScalarType<'a>,
    {
        self.expect_ok_or_else(self.get_attr_opt(name)?, || format!("attribute '{name}'"))
    }

    pub fn check_value<T, V: Debug>(&self, attr: &str, value: Result<T, V>) -> TractResult<T> {
        match value {
            Ok(value) => Ok(value),
            Err(err) => self.bail_attr(attr, &format!("unexpected value: {err:?}")),
        }
    }

    pub fn get_attr_opt_slice<'a, T>(&'a self, name: &str) -> TractResult<Option<&'a [T]>>
    where
        T: AttrSliceType<'a>,
    {
        T::get_attr_opt_slice(self, name)
    }

    pub fn get_attr_slice<'a, T>(&'a self, name: &str) -> TractResult<&'a [T]>
    where
        T: AttrSliceType<'a>,
    {
        self.expect_ok_or_else(self.get_attr_opt_slice(name)?, || format!("attribute '{name}'"))
    }

    pub fn get_attr_opt_tvec<'a, T>(&'a self, name: &str) -> TractResult<Option<TVec<T>>>
    where
        T: AttrTVecType<'a>,
    {
        T::get_attr_opt_tvec(self, name)
    }

    pub fn get_attr_tvec<'a, T>(&'a self, name: &str) -> TractResult<TVec<T>>
    where
        T: AttrTVecType<'a>,
    {
        self.expect_ok_or_else(self.get_attr_opt_tvec(name)?, || format!("attribute '{name}'"))
    }

    pub fn get_attr_opt_vec<'a, T>(&'a self, name: &str) -> TractResult<Option<Vec<T>>>
    where
        T: AttrTVecType<'a>,
    {
        Ok(self.get_attr_opt_tvec(name)?.map(TVec::into_vec))
    }

    pub fn get_attr_vec<'a, T>(&'a self, name: &str) -> TractResult<Vec<T>>
    where
        T: AttrTVecType<'a>,
    {
        self.get_attr_tvec(name).map(TVec::into_vec)
    }
}


================================================
FILE: onnx/src/prost/onnx.rs
================================================
/// Attributes
///
/// A named attribute containing either singular float, integer, string, graph,
/// and tensor values, or repeated float, integer, string, graph, and tensor values.
/// An AttributeProto MUST contain the name field, and *only one* of the
/// following content fields, effectively enforcing a C/C++ union equivalent.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct AttributeProto {
    /// The name field MUST be present for this version of the IR.
    ///
    /// namespace Attribute
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    /// if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
    /// In this case, this AttributeProto does not contain data, and it's a reference of attribute
    /// in parent scope.
    /// NOTE: This should ONLY be used in function (sub-graph). It's invalid to be used in main graph.
    #[prost(string, tag="21")]
    pub ref_attr_name: ::prost::alloc::string::String,
    /// A human-readable documentation for this attribute. Markdown is allowed.
    #[prost(string, tag="13")]
    pub doc_string: ::prost::alloc::string::String,
    /// The type field MUST be present for this version of the IR.
    /// For 0.0.1 versions of the IR, this field was not defined, and
    /// implementations needed to use has_field hueristics to determine
    /// which value field was in use.  For IR_VERSION 0.0.2 or later, this
    /// field MUST be set and match the f|i|s|t|... field in use.  This
    /// change was made to accomodate proto3 implementations.
    ///
    /// discriminator that indicates which field below is in use
    #[prost(enumeration="attribute_proto::AttributeType", tag="20")]
    pub r#type: i32,
    /// Exactly ONE of the following fields must be present for this version of the IR
    ///
    /// float
    #[prost(float, tag="2")]
    pub f: f32,
    /// int
    #[prost(int64, tag="3")]
    pub i: i64,
    /// UTF-8 string
    #[prost(bytes="vec", tag="4")]
    pub s: ::prost::alloc::vec::Vec<u8>,
    /// tensor value
    #[prost(message, optional, tag="5")]
    pub t: ::core::option::Option<TensorProto>,
    /// graph
    #[prost(message, optional, tag="6")]
    pub g: ::core::option::Option<GraphProto>,
    /// sparse tensor value
    #[prost(message, optional, tag="22")]
    pub sparse_tensor: ::core::option::Option<SparseTensorProto>,
    // Do not use field below, it's deprecated.
    // optional ValueProto v = 12;         // value - subsumes everything but graph

    /// list of floats
    #[prost(float, repeated, tag="7")]
    pub floats: ::prost::alloc::vec::Vec<f32>,
    /// list of ints
    #[prost(int64, repeated, tag="8")]
    pub ints: ::prost::alloc::vec::Vec<i64>,
    /// list of UTF-8 strings
    #[prost(bytes="vec", repeated, tag="9")]
    pub strings: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
    /// list of tensors
    #[prost(message, repeated, tag="10")]
    pub tensors: ::prost::alloc::vec::Vec<TensorProto>,
    /// list of graph
    #[prost(message, repeated, tag="11")]
    pub graphs: ::prost::alloc::vec::Vec<GraphProto>,
    /// list of sparse tensors
    #[prost(message, repeated, tag="23")]
    pub sparse_tensors: ::prost::alloc::vec::Vec<SparseTensorProto>,
    /// list of type protos
    #[prost(message, repeated, tag="15")]
    pub type_protos: ::prost::alloc::vec::Vec<TypeProto>,
}
/// Nested message and enum types in `AttributeProto`.
pub mod attribute_proto {
    /// Note: this enum is structurally identical to the OpSchema::AttrType
    /// enum defined in schema.h.  If you rev one, you likely need to rev the other.
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum AttributeType {
        Undefined = 0,
        Float = 1,
        Int = 2,
        String = 3,
        Tensor = 4,
        Graph = 5,
        SparseTensor = 11,
        TypeProto = 13,
        Floats = 6,
        Ints = 7,
        Strings = 8,
        Tensors = 9,
        Graphs = 10,
        SparseTensors = 12,
        TypeProtos = 14,
    }
    impl AttributeType {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                AttributeType::Undefined => "UNDEFINED",
                AttributeType::Float => "FLOAT",
                AttributeType::Int => "INT",
                AttributeType::String => "STRING",
                AttributeType::Tensor => "TENSOR",
                AttributeType::Graph => "GRAPH",
                AttributeType::SparseTensor => "SPARSE_TENSOR",
                AttributeType::TypeProto => "TYPE_PROTO",
                AttributeType::Floats => "FLOATS",
                AttributeType::Ints => "INTS",
                AttributeType::Strings => "STRINGS",
                AttributeType::Tensors => "TENSORS",
                AttributeType::Graphs => "GRAPHS",
                AttributeType::SparseTensors => "SPARSE_TENSORS",
                AttributeType::TypeProtos => "TYPE_PROTOS",
            }
        }
    }
}
/// Defines information on value, including the name, the type, and
/// the shape of the value.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ValueInfoProto {
    /// This field MUST be present in this version of the IR.
    ///
    /// namespace Value
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    /// This field MUST be present in this version of the IR.
    #[prost(message, optional, tag="2")]
    pub r#type: ::core::option::Option<TypeProto>,
    /// A human-readable documentation for this value. Markdown is allowed.
    #[prost(string, tag="3")]
    pub doc_string: ::prost::alloc::string::String,
}
/// Nodes
///
/// Computation graphs are made up of a DAG of nodes, which represent what is
/// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
///
/// For example, it can be a node of type "Conv" that takes in an image, a filter 
/// tensor and a bias tensor, and produces the convolved output.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NodeProto {
    /// namespace Value
    #[prost(string, repeated, tag="1")]
    pub input: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// namespace Value
    #[prost(string, repeated, tag="2")]
    pub output: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// An optional identifier for this node in a graph.
    /// This field MAY be absent in ths version of the IR.
    ///
    /// namespace Node
    #[prost(string, tag="3")]
    pub name: ::prost::alloc::string::String,
    /// The symbolic identifier of the Operator to execute.
    ///
    /// namespace Operator
    #[prost(string, tag="4")]
    pub op_type: ::prost::alloc::string::String,
    /// The domain of the OperatorSet that specifies the operator named by op_type.
    ///
    /// namespace Domain
    #[prost(string, tag="7")]
    pub domain: ::prost::alloc::string::String,
    /// Additional named attributes.
    #[prost(message, repeated, tag="5")]
    pub attribute: ::prost::alloc::vec::Vec<AttributeProto>,
    /// A human-readable documentation for this node. Markdown is allowed.
    #[prost(string, tag="6")]
    pub doc_string: ::prost::alloc::string::String,
}
/// Models
///
/// ModelProto is a top-level file/container format for bundling a ML model and
/// associating its computation graph with metadata.
///
/// The semantics of the model are described by the associated GraphProto.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ModelProto {
    /// The version of the IR this model targets. See Version enum above.
    /// This field MUST be present.
    #[prost(int64, tag="1")]
    pub ir_version: i64,
    /// The OperatorSets this model relies on.
    /// All ModelProtos MUST have at least one entry that
    /// specifies which version of the ONNX OperatorSet is
    /// being imported.
    ///
    /// All nodes in the ModelProto's graph will bind against the operator
    /// with the same-domain/same-op_type operator with the HIGHEST version
    /// in the referenced operator sets.
    #[prost(message, repeated, tag="8")]
    pub opset_import: ::prost::alloc::vec::Vec<OperatorSetIdProto>,
    /// The name of the framework or tool used to generate this model.
    /// This field SHOULD be present to indicate which implementation/tool/framework
    /// emitted the model.
    #[prost(string, tag="2")]
    pub producer_name: ::prost::alloc::string::String,
    /// The version of the framework or tool used to generate this model.
    /// This field SHOULD be present to indicate which implementation/tool/framework
    /// emitted the model.
    #[prost(string, tag="3")]
    pub producer_version: ::prost::alloc::string::String,
    /// Domain name of the model.
    /// We use reverse domain names as name space indicators. For example:
    /// `com.facebook.fair` or `com.microsoft.cognitiveservices`
    ///
    /// Together with `model_version` and GraphProto.name, this forms the unique identity of
    /// the graph.
    #[prost(string, tag="4")]
    pub domain: ::prost::alloc::string::String,
    /// The version of the graph encoded. See Version enum below.
    #[prost(int64, tag="5")]
    pub model_version: i64,
    /// A human-readable documentation for this model. Markdown is allowed.
    #[prost(string, tag="6")]
    pub doc_string: ::prost::alloc::string::String,
    /// The parameterized graph that is evaluated to execute the model.
    #[prost(message, optional, tag="7")]
    pub graph: ::core::option::Option<GraphProto>,
    /// Named metadata values; keys should be distinct.
    #[prost(message, repeated, tag="14")]
    pub metadata_props: ::prost::alloc::vec::Vec<StringStringEntryProto>,
    /// Training-specific information. Sequentially executing all stored
    /// `TrainingInfoProto.algorithm`s and assigning their outputs following
    /// the corresponding `TrainingInfoProto.update_binding`s is one training
    /// iteration. Similarly, to initialize the model
    /// (as if training hasn't happened), the user should sequentially execute
    /// all stored `TrainingInfoProto.initialization`s and assigns their outputs
    /// using `TrainingInfoProto.initialization_binding`s.
    ///
    /// If this field is empty, the training behavior of the model is undefined.
    #[prost(message, repeated, tag="20")]
    pub training_info: ::prost::alloc::vec::Vec<TrainingInfoProto>,
    /// A list of function protos local to the model.
    ///
    /// Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain".
    /// In case of any conflicts the behavior (whether the model local functions are given higher priority,
    /// or standard opserator sets are given higher priotity or this is treated as error) is defined by
    /// the runtimes.
    ///
    /// The operator sets imported by FunctionProto should be compatible with the ones
    /// imported by ModelProto and other model local FunctionProtos.
    /// Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto
    /// or by 2 FunctionProtos then versions for the operator set may be different but,
    /// the operator schema returned for op_type, domain, version combination
    /// for both the versions should be same for every node in the function body.
    ///
    /// One FunctionProto can reference other FunctionProto in the model, however, recursive reference
    /// is not allowed.
    #[prost(message, repeated, tag="25")]
    pub functions: ::prost::alloc::vec::Vec<FunctionProto>,
}
/// StringStringEntryProto follows the pattern for cross-proto-version maps.
/// See <https://developers.google.com/protocol-buffers/docs/proto3#maps>
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct StringStringEntryProto {
    #[prost(string, tag="1")]
    pub key: ::prost::alloc::string::String,
    #[prost(string, tag="2")]
    pub value: ::prost::alloc::string::String,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorAnnotation {
    #[prost(string, optional, tag="1")]
    pub tensor_name: ::core::option::Option<::prost::alloc::string::String>,
    /// <key, value> pairs to annotate tensor specified by <tensor_name> above.
    /// The keys used in the mapping below must be pre-defined in ONNX spec.
    /// For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as
    /// quantization parameter keys.
    #[prost(message, repeated, tag="2")]
    pub quant_parameter_tensor_names: ::prost::alloc::vec::Vec<StringStringEntryProto>,
}
/// Graphs
///
/// A graph defines the computational logic of a model and is comprised of a parameterized 
/// list of nodes that form a directed acyclic graph based on their inputs and outputs.
/// This is the equivalent of the "network" or "graph" in many deep learning
/// frameworks.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GraphProto {
    /// The nodes in the graph, sorted topologically.
    #[prost(message, repeated, tag="1")]
    pub node: ::prost::alloc::vec::Vec<NodeProto>,
    /// The name of the graph.
    ///
    /// namespace Graph
    #[prost(string, tag="2")]
    pub name: ::prost::alloc::string::String,
    /// A list of named tensor values, used to specify constant inputs of the graph.
    /// Each initializer (both TensorProto as well SparseTensorProto) MUST have a name.
    /// The name MUST be unique across both initializer and sparse_initializer,
    /// but the name MAY also appear in the input list.
    #[prost(message, repeated, tag="5")]
    pub initializer: ::prost::alloc::vec::Vec<TensorProto>,
    /// Initializers (see above) stored in sparse format.
    #[prost(message, repeated, tag="15")]
    pub sparse_initializer: ::prost::alloc::vec::Vec<SparseTensorProto>,
    /// A human-readable documentation for this graph. Markdown is allowed.
    #[prost(string, tag="10")]
    pub doc_string: ::prost::alloc::string::String,
    /// The inputs and outputs of the graph.
    #[prost(message, repeated, tag="11")]
    pub input: ::prost::alloc::vec::Vec<ValueInfoProto>,
    #[prost(message, repeated, tag="12")]
    pub output: ::prost::alloc::vec::Vec<ValueInfoProto>,
    /// Information for the values in the graph. The ValueInfoProto.name's
    /// must be distinct. It is optional for a value to appear in value_info list.
    #[prost(message, repeated, tag="13")]
    pub value_info: ::prost::alloc::vec::Vec<ValueInfoProto>,
    /// This field carries information to indicate the mapping among a tensor and its
    /// quantization parameter tensors. For example:
    /// For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated,
    /// which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
    #[prost(message, repeated, tag="14")]
    pub quantization_annotation: ::prost::alloc::vec::Vec<TensorAnnotation>,
}
/// Training information
/// TrainingInfoProto stores information for training a model.
/// In particular, this defines two functionalities: an initialization-step
/// and a training-algorithm-step. Initialization resets the model
/// back to its original state as if no training has been performed.
/// Training algorithm improves the model based on input data.
///
/// The semantics of the initialization-step is that the initializers
/// in ModelProto.graph and in TrainingInfoProto.algorithm are first
/// initialized as specified by the initializers in the graph, and then
/// updated by the "initialization_binding" in every instance in
/// ModelProto.training_info.
///
/// The field "algorithm" defines a computation graph which represents a
/// training algorithm's step. After the execution of a
/// TrainingInfoProto.algorithm, the initializers specified by "update_binding"
/// may be immediately updated. If the targeted training algorithm contains
/// consecutive update steps (such as block coordinate descent methods),
/// the user needs to create a TrainingInfoProto for each step.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TrainingInfoProto {
    /// This field describes a graph to compute the initial tensors
    /// upon starting the training process. Initialization graph has no input
    /// and can have multiple outputs. Usually, trainable tensors in neural
    /// networks are randomly initialized. To achieve that, for each tensor,
    /// the user can put a random number operator such as RandomNormal or
    /// RandomUniform in TrainingInfoProto.initialization.node and assign its
    /// random output to the specific tensor using "initialization_binding".
    /// This graph can also set the initializers in "algorithm" in the same
    /// TrainingInfoProto; a use case is resetting the number of training
    /// iteration to zero.
    ///
    /// By default, this field is an empty graph and its evaluation does not
    /// produce any output. Thus, no initializer would be changed by default.
    #[prost(message, optional, tag="1")]
    pub initialization: ::core::option::Option<GraphProto>,
    /// This field represents a training algorithm step. Given required inputs,
    /// it computes outputs to update initializers in its own or inference graph's
    /// initializer lists. In general, this field contains loss node, gradient node,
    /// optimizer node, increment of iteration count.
    ///
    /// An execution of the training algorithm step is performed by executing the
    /// graph obtained by combining the inference graph (namely "ModelProto.graph")
    /// and the "algorithm" graph. That is, the actual the actual
    /// input/initializer/output/node/value_info/sparse_initializer list of
    /// the training graph is the concatenation of
    /// "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer"
    /// and "algorithm.input/initializer/output/node/value_info/sparse_initializer"
    /// in that order. This combined graph must satisfy the normal ONNX conditions.
    /// Now, let's provide a visualization of graph combination for clarity.
    /// Let the inference graph (i.e., "ModelProto.graph") be
    ///     tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d
    /// and the "algorithm" graph be
    ///     tensor_d -> Add -> tensor_e
    /// The combination process results
    ///     tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d -> Add -> tensor_e
    ///
    /// Notice that an input of a node in the "algorithm" graph may reference the
    /// output of a node in the inference graph (but not the other way round). Also, inference
    /// node cannot reference inputs of "algorithm". With these restrictions, inference graph
    /// can always be run independently without training information.
    ///
    /// By default, this field is an empty graph and its evaluation does not
    /// produce any output. Evaluating the default training step never
    /// update any initializers.
    #[prost(message, optional, tag="2")]
    pub algorithm: ::core::option::Option<GraphProto>,
    /// This field specifies the bindings from the outputs of "initialization" to
    /// some initializers in "ModelProto.graph.initializer" and
    /// the "algorithm.initializer" in the same TrainingInfoProto.
    /// See "update_binding" below for details.
    ///
    /// By default, this field is empty and no initializer would be changed
    /// by the execution of "initialization".
    #[prost(message, repeated, tag="3")]
    pub initialization_binding: ::prost::alloc::vec::Vec<StringStringEntryProto>,
    /// Gradient-based training is usually an iterative procedure. In one gradient
    /// descent iteration, we apply
    ///
    /// x = x - r * g
    ///
    /// where "x" is the optimized tensor, "r" stands for learning rate, and "g" is
    /// gradient of "x" with respect to a chosen loss. To avoid adding assignments
    /// into the training graph, we split the update equation into
    ///
    /// y = x - r * g
    /// x = y
    ///
    /// The user needs to save "y = x - r * g" into TrainingInfoProto.algorithm. To
    /// tell that "y" should be assigned to "x", the field "update_binding" may
    /// contain a key-value pair of strings, "x" (key of StringStringEntryProto)
    /// and "y" (value of StringStringEntryProto).
    /// For a neural network with multiple trainable (mutable) tensors, there can
    /// be multiple key-value pairs in "update_binding".
    ///
    /// The initializers appears as keys in "update_binding" are considered
    /// mutable variables. This implies some behaviors
    /// as described below.
    ///
    ///   1. We have only unique keys in all "update_binding"s so that two
    ///      variables may not have the same name. This ensures that one
    ///      variable is assigned up to once.
    ///   2. The keys must appear in names of "ModelProto.graph.initializer" or
    ///      "TrainingInfoProto.algorithm.initializer".
    ///   3. The values must be output names of "algorithm" or "ModelProto.graph.output".
    ///   4. Mutable variables are initialized to the value specified by the
    ///      corresponding initializer, and then potentially updated by
    ///      "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s.
    ///
    /// This field usually contains names of trainable tensors
    /// (in ModelProto.graph), optimizer states such as momentums in advanced
    /// stochastic gradient methods (in TrainingInfoProto.graph),
    /// and number of training iterations (in TrainingInfoProto.graph).
    ///
    /// By default, this field is empty and no initializer would be changed
    /// by the execution of "algorithm".
    #[prost(message, repeated, tag="4")]
    pub update_binding: ::prost::alloc::vec::Vec<StringStringEntryProto>,
}
/// Tensors
///
/// A serialized tensor value.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorProto {
    /// The shape of the tensor.
    #[prost(int64, repeated, tag="1")]
    pub dims: ::prost::alloc::vec::Vec<i64>,
    /// The data type of the tensor.
    #[prost(enumeration="tensor_proto::DataType", tag="2")]
    pub data_type: i32,
    #[prost(message, optional, tag="3")]
    pub segment: ::core::option::Option<tensor_proto::Segment>,
    // Tensor content must be organized in row-major order.
    //
    // Depending on the data_type field, exactly one of the fields below with
    // name ending in _data is used to store the elements of the tensor.

    /// For float and complex64 values
    /// Complex64 tensors are encoded as a single array of floats,
    /// with the real components appearing in odd numbered positions,
    /// and the corresponding imaginary component apparing in the
    /// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
    /// is encoded as [1.0, 2.0 ,3.0 ,4.0]
    /// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
    #[prost(float, repeated, tag="4")]
    pub float_data: ::prost::alloc::vec::Vec<f32>,
    /// For int32, uint8, int8, uint16, int16, bool, and float16 values
    /// float16 values must be bit-wise converted to an uint16_t prior
    /// to writing to the buffer.
    /// When this field is present, the data_type field MUST be
    /// INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT16
    #[prost(int32, repeated, tag="5")]
    pub int32_data: ::prost::alloc::vec::Vec<i32>,
    /// For strings.
    /// Each element of string_data is a UTF-8 encoded Unicode
    /// string. No trailing null, no leading BOM. The protobuf "string"
    /// scalar type is not used to match ML community conventions.
    /// When this field is present, the data_type field MUST be STRING
    #[prost(bytes="vec", repeated, tag="6")]
    pub string_data: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
    /// For int64.
    /// When this field is present, the data_type field MUST be INT64
    #[prost(int64, repeated, tag="7")]
    pub int64_data: ::prost::alloc::vec::Vec<i64>,
    /// Optionally, a name for the tensor.
    ///
    /// namespace Value
    #[prost(string, tag="8")]
    pub name: ::prost::alloc::string::String,
    /// A human-readable documentation for this tensor. Markdown is allowed.
    #[prost(string, tag="12")]
    pub doc_string: ::prost::alloc::string::String,
    /// Serializations can either use one of the fields above, or use this
    /// raw bytes field. The only exception is the string case, where one is
    /// required to store the content in the repeated bytes string_data field.
    ///
    /// When this raw_data field is used to store tensor value, elements MUST
    /// be stored in as fixed-width, little-endian order.
    /// Floating-point data types MUST be stored in IEEE 754 format.
    /// Complex64 elements must be written as two consecutive FLOAT values, real component first.
    /// Complex128 elements must be written as two consecutive DOUBLE values, real component first.
    /// Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
    ///
    /// Note: the advantage of specific field rather than the raw_data field is
    /// that in some cases (e.g. int data), protobuf does a better packing via
    /// variable length storage, and may lead to smaller binary footprint.
    /// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
    #[prost(bytes="vec", tag="9")]
    pub raw_data: ::prost::alloc::vec::Vec<u8>,
    /// For double
    /// Complex64 tensors are encoded as a single array of doubles,
    /// with the real components appearing in odd numbered positions,
    /// and the corresponding imaginary component apparing in the
    /// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
    /// is encoded as [1.0, 2.0 ,3.0 ,4.0]
    /// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
    #[prost(double, repeated, tag="10")]
    pub double_data: ::prost::alloc::vec::Vec<f64>,
    /// For uint64 and uint32 values
    /// When this field is present, the data_type field MUST be
    /// UINT32 or UINT64
    #[prost(uint64, repeated, tag="11")]
    pub uint64_data: ::prost::alloc::vec::Vec<u64>,
    /// If value not set, data is stored in raw_data (if set) otherwise in type-specified field.
    #[prost(enumeration="tensor_proto::DataLocation", optional, tag="14")]
    pub data_location: ::core::option::Option<i32>,
    /// Data can be stored inside the protobuf file using type-specific fields or raw_data.
    /// Alternatively, raw bytes data can be stored in an external file, using the external_data field.
    /// external_data stores key-value pairs describing data location. Recognized keys are:
    /// - "location" (required) - POSIX filesystem path relative to the directory where the ONNX
    ///                            protobuf model was stored
    /// - "offset" (optional) - position of byte at which stored data begins. Integer stored as string.
    ///                          Offset values SHOULD be multiples 4096 (page size) to enable mmap support.
    /// - "length" (optional) - number of bytes containing data. Integer stored as string.
    /// - "checksum" (optional) - SHA1 digest of file specified in under 'location' key.
    #[prost(message, repeated, tag="13")]
    pub external_data: ::prost::alloc::vec::Vec<StringStringEntryProto>,
}
/// Nested message and enum types in `TensorProto`.
pub mod tensor_proto {
    /// For very large tensors, we may want to store them in chunks, in which
    /// case the following fields will specify the segment that is stored in
    /// the current TensorProto.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct Segment {
        #[prost(int64, tag="1")]
        pub begin: i64,
        #[prost(int64, tag="2")]
        pub end: i64,
    }
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum DataType {
        Undefined = 0,
        /// Basic types.
        ///
        /// float
        Float = 1,
        /// uint8_t
        Uint8 = 2,
        /// int8_t
        Int8 = 3,
        /// uint16_t
        Uint16 = 4,
        /// int16_t
        Int16 = 5,
        /// int32_t
        Int32 = 6,
        /// int64_t
        Int64 = 7,
        /// string
        String = 8,
        /// bool
        Bool = 9,
        /// IEEE754 half-precision floating-point format (16 bits wide).
        /// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
        Float16 = 10,
        Double = 11,
        Uint32 = 12,
        Uint64 = 13,
        /// complex with float32 real and imaginary components
        Complex64 = 14,
        /// complex with float64 real and imaginary components
        Complex128 = 15,
        /// Non-IEEE floating-point format based on IEEE754 single-precision
        /// floating-point number truncated to 16 bits.
        /// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
        Bfloat16 = 16,
    }
    impl DataType {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                DataType::Undefined => "UNDEFINED",
                DataType::Float => "FLOAT",
                DataType::Uint8 => "UINT8",
                DataType::Int8 => "INT8",
                DataType::Uint16 => "UINT16",
                DataType::Int16 => "INT16",
                DataType::Int32 => "INT32",
                DataType::Int64 => "INT64",
                DataType::String => "STRING",
                DataType::Bool => "BOOL",
                DataType::Float16 => "FLOAT16",
                DataType::Double => "DOUBLE",
                DataType::Uint32 => "UINT32",
                DataType::Uint64 => "UINT64",
                DataType::Complex64 => "COMPLEX64",
                DataType::Complex128 => "COMPLEX128",
                DataType::Bfloat16 => "BFLOAT16",
            }
        }
    }
    /// Location of the data for this tensor. MUST be one of:
    /// - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field.
    /// - EXTERNAL - data stored in an external location as described by external_data field.
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum DataLocation {
        Default = 0,
        External = 1,
    }
    impl DataLocation {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                DataLocation::Default => "DEFAULT",
                DataLocation::External => "EXTERNAL",
            }
        }
    }
}
/// A serialized sparse-tensor value
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SparseTensorProto {
    /// The sequence of non-default values are encoded as a tensor of shape \[NNZ\].
    /// The default-value is zero for numeric tensors, and empty-string for string tensors.
    /// values must have a non-empty name present which serves as a name for SparseTensorProto
    /// when used in sparse_initializer list.
    #[prost(message, optional, tag="1")]
    pub values: ::core::option::Option<TensorProto>,
    /// The indices of the non-default values, which may be stored in one of two formats.
    /// (a) Indices can be a tensor of shape [NNZ, rank] with the \[i,j\]-th value
    /// corresponding to the j-th index of the i-th value (in the values tensor).
    /// (b) Indices can be a tensor of shape \[NNZ\], in which case the i-th value
    /// must be the linearized-index of the i-th value (in the values tensor).
    /// The linearized-index can be converted into an index tuple (k_1,...,k_rank)
    /// using the shape provided below.
    /// The indices must appear in ascending order without duplication.
    /// In the first format, the ordering is lexicographic-ordering:
    /// e.g., index-value \[1,4\] must appear before \[2,1\]
    #[prost(message, optional, tag="2")]
    pub indices: ::core::option::Option<TensorProto>,
    /// The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank]
    #[prost(int64, repeated, tag="3")]
    pub dims: ::prost::alloc::vec::Vec<i64>,
}
/// Defines a tensor shape. A dimension can be either an integer value
/// or a symbolic variable. A symbolic variable represents an unknown
/// dimension.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorShapeProto {
    #[prost(message, repeated, tag="1")]
    pub dim: ::prost::alloc::vec::Vec<tensor_shape_proto::Dimension>,
}
/// Nested message and enum types in `TensorShapeProto`.
pub mod tensor_shape_proto {
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct Dimension {
        /// Standard denotation can optionally be used to denote tensor
        /// dimensions with standard semantic descriptions to ensure
        /// that operations are applied to the correct axis of a tensor.
        /// Refer to <https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition>
        /// for pre-defined dimension denotations.
        #[prost(string, tag="3")]
        pub denotation: ::prost::alloc::string::String,
        #[prost(oneof="dimension::Value", tags="1, 2")]
        pub value: ::core::option::Option<dimension::Value>,
    }
    /// Nested message and enum types in `Dimension`.
    pub mod dimension {
        #[derive(Clone, PartialEq, ::prost::Oneof)]
        pub enum Value {
            #[prost(int64, tag="1")]
            DimValue(i64),
            /// namespace Shape
            #[prost(string, tag="2")]
            DimParam(::prost::alloc::string::String),
        }
    }
}
/// Types
///
/// The standard ONNX data types.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TypeProto {
    /// An optional denotation can be used to denote the whole 
    /// type with a standard semantic description as to what is 
    /// stored inside. Refer to <https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition>
    /// for pre-defined type denotations.
    #[prost(string, tag="6")]
    pub denotation: ::prost::alloc::string::String,
    #[prost(oneof="type_proto::Value", tags="1")]
    pub value: ::core::option::Option<type_proto::Value>,
}
/// Nested message and enum types in `TypeProto`.
pub mod type_proto {
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct Tensor {
        /// This field MUST NOT have the value of UNDEFINED
        /// This field MUST be present for this version of the IR.
        #[prost(enumeration="super::tensor_proto::DataType", tag="1")]
        pub elem_type: i32,
        #[prost(message, optional, tag="2")]
        pub shape: ::core::option::Option<super::TensorShapeProto>,
    }
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Value {
        /// The type of a tensor.
        #[prost(message, tag="1")]
        TensorType(Tensor),
    }
}
/// Operator Sets
///
/// OperatorSets are uniquely identified by a (domain, opset_version) pair.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct OperatorSetIdProto {
    /// The domain of the operator set being identified.
    /// The empty string ("") or absence of this field implies the operator
    /// set that is defined as part of the ONNX specification.
    /// This field MUST be present in this version of the IR when referring to any other operator set.
    #[prost(string, tag="1")]
    pub domain: ::prost::alloc::string::String,
    /// The version of the operator set being identified.
    /// This field MUST be present in this version of the IR.
    #[prost(int64, tag="2")]
    pub version: i64,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct FunctionProto {
    /// The name of the function, similar usage of op_type in OperatorProto.
    /// Combined with FunctionProto.domain, this forms the unique identity of
    /// the FunctionProto.
    #[prost(string, optional, tag="1")]
    pub name: ::core::option::Option<::prost::alloc::string::String>,
    /// The inputs and outputs of the function.
    #[prost(string, repeated, tag="4")]
    pub input: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    #[prost(string, repeated, tag="5")]
    pub output: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// The attributes of the function.
    #[prost(string, repeated, tag="6")]
    pub attribute: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// The nodes in the function.
    #[prost(message, repeated, tag="7")]
    pub node: ::prost::alloc::vec::Vec<NodeProto>,
    /// A human-readable documentation for this function. Markdown is allowed.
    #[prost(string, optional, tag="8")]
    pub doc_string: ::core::option::Option<::prost::alloc::string::String>,
    // The OperatorSets this function body (graph) relies on.
    //
    // All nodes in the function body (graph) will bind against the operator
    // with the same-domain/same-op_type operator with the HIGHEST version
    // in the referenced operator sets. This means at most one version can be relied
    // for one domain.
    //
    // The operator sets imported by FunctionProto should be compatible with the ones
    // imported by ModelProto. Example, if same operator set say 'A' is imported by FunctionProto
    // and ModelProto then versions for the operator set may be different but,
    // the operator schema returned for op_type, domain, version combination
    // for both the versions should be same.

    #[prost(message, repeated, tag="9")]
    pub opset_import: ::prost::alloc::vec::Vec<OperatorSetIdProto>,
    /// The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of
    /// the FunctionProto.
    #[prost(string, optional, tag="10")]
    pub domain: ::core::option::Option<::prost::alloc::string::String>,
}
// Overview
//
// ONNX is an open specification that is comprised of the following components:
//
// 1)  A definition of an extensible computation graph model.
// 2)  Definitions of standard data types.
// 3)  Definitions of built-in operators.
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
// Intermediate Representation, or 'IR' for short. 
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.

// Notes
//
// Release
//
// We are still in the very early stage of defining ONNX. The current
// version of ONNX is a starting point. While we are actively working
// towards a complete spec, we would like to get the community involved
// by sharing our working version of ONNX.
//
// Protobuf compatibility
// 
// To simplify framework compatibility, ONNX is defined using the subset of protobuf 
// that is compatible with both protobuf v2 and v3. This means that we do not use any
// protobuf features that are only available in one of the two versions.
//
// Here are the most notable contortions we have to carry out to work around
// these limitations:
//
//    - No 'map' (added protobuf 3.0). We instead represent mappings as lists
//      of key-value pairs, where order does not matter and duplicates
//      are not allowed.

/// Versioning
///
/// ONNX versioning is specified in docs/IR.md and elaborated on in docs/Versioning.md
///
/// To be compatible with both proto2 and proto3, we will use a version number
/// that is not defined by the default value but an explicit enum number.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum Version {
    /// proto3 requires the first enum value to be zero.
    /// We add this just to appease the compiler.
    StartVersion = 0,
    /// The version field is always serialized and we will use it to store the
    /// version that the  graph is generated from. This helps us set up version
    /// control.
    /// For the IR, we are using simple numbers starting with 0x00000001,
    /// which was the version we published on Oct 10, 2017.
    IrVersion20171010 = 1,
    /// IR_VERSION 2 published on Oct 30, 2017
    /// - Added type discriminator to AttributeProto to support proto3 users
    IrVersion20171030 = 2,
    /// IR VERSION 3 published on Nov 3, 2017
    /// - For operator versioning:
    ///     - Added new message OperatorSetIdProto
    ///     - Added opset_import in ModelProto
    /// - For vendor extensions, added domain in NodeProto
    IrVersion2017113 = 3,
    /// IR VERSION 4 published on Jan 22, 2019
    /// - Relax constraint that initializers should be a subset of graph inputs
    /// - Add type BFLOAT16
    IrVersion2019122 = 4,
    /// IR VERSION 5 published on March 18, 2019
    /// - Add message TensorAnnotation.
    /// - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters.
    IrVersion2019318 = 5,
    /// IR VERSION 6 published on Sep 19, 2019
    /// - Add support for sparse tensor constants stored in model.
    ///    - Add message SparseTensorProto
    ///    - Add sparse initializers
    IrVersion2019919 = 6,
    /// IR VERSION 7 published on May 8, 2020
    /// - Add support to allow function body graph to rely on multiple external opreator sets.
    /// - Add a list to promote inference graph's initializers to global and
    ///    mutable variables. Global variables are visible in all graphs of the
    ///    stored models.
    /// - Add message TrainingInfoProto to store initialization
    ///    method and training algorithm. The execution of TrainingInfoProto
    ///    can modify the values of mutable variables.
    /// - Implicitly add inference graph into each TrainingInfoProto's algorithm.
    IrVersion202058 = 7,
    /// IR VERSION 8 published on <TBD>
    /// Introduce TypeProto.SparseTensor
    /// Introduce TypeProto.Optional
    /// Added a list of FunctionProtos local to the model
    /// Deprecated since_version and operator status from FunctionProto
    IrVersion = 8,
}
impl Version {
    /// String value of the enum field names used in the ProtoBuf definition.
    ///
    /// The values are not transformed in any way and thus are considered stable
    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
    pub fn as_str_name(&self) -> &'static str {
        match self {
            Version::StartVersion => "_START_VERSION",
            Version::IrVersion20171010 => "IR_VERSION_2017_10_10",
            Version::IrVersion20171030 => "IR_VERSION_2017_10_30",
            Version::IrVersion2017113 => "IR_VERSION_2017_11_3",
            Version::IrVersion2019122 => "IR_VERSION_2019_1_22",
            Version::IrVersion2019318 => "IR_VERSION_2019_3_18",
            Version::IrVersion2019919 => "IR_VERSION_2019_9_19",
            Version::IrVersion202058 => "IR_VERSION_2020_5_8",
            Version::IrVersion => "IR_VERSION",
        }
    }
}
/// Operator/function status.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum OperatorStatus {
    Experimental = 0,
    Stable = 1,
}
impl OperatorStatus {
    /// String value of the enum field names used in the ProtoBuf definition.
    ///
    /// The values are not transformed in any way and thus are considered stable
    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
    pub fn as_str_name(&self) -> &'static str {
        match self {
            OperatorStatus::Experimental => "EXPERIMENTAL",
            OperatorStatus::Stable => "STABLE",
        }
    }
}


================================================
FILE: onnx/src/tensor.rs
================================================
use crate::data_resolver::ModelDataResolver;
use crate::model::ParsingContext;
use crate::pb::tensor_proto::DataType;
use crate::pb::*;
use prost::Message;
use std::convert::{TryFrom, TryInto};
use std::path::PathBuf;
use tract_hir::internal::*;

impl TryFrom<DataType> for DatumType {
    type Error = TractError;
    fn try_from(t: DataType) -> TractResult<DatumType> {
        match t {
            DataType::Bool => Ok(DatumType::Bool),
            DataType::Uint8 => Ok(DatumType::U8),
            DataType::Uint16 => Ok(DatumType::U16),
            DataType::Uint32 => Ok(DatumType::U32),
            DataType::Uint64 => Ok(DatumType::U64),
            DataType::Int8 => Ok(DatumType::I8),
            DataType::Int16 => Ok(DatumType::I16),
            DataType::Int32 => Ok(DatumType::I32),
            DataType::Int64 => Ok(DatumType::I64),
            DataType::Float16 => Ok(DatumType::F16),
            DataType::Float => Ok(DatumType::F32),
            DataType::Double => Ok(DatumType::F64),
            DataType::String => Ok(DatumType::String),
            _ => bail!("Unknown DatumType {:?}", t),
        }
    }
}

pub fn translate_inference_fact(
    ctx: &ParsingContext,
    t: &type_proto::Tensor,
    include_unknown_symbols: bool,
) -> TractResult<InferenceFact> {
    let mut fact = InferenceFact::default();
    fact = fact.with_datum_type(DataType::try_from(t.elem_type).unwrap().try_into()?);
    if let Some(shape) = &t.shape {
        let shape: TVec<DimFact> = shape
            .dim
            .iter()
            .map(|d| -> TractResult<DimFact> {
                match &d.value {
                    Some(tensor_shape_proto::dimension::Value::DimValue(v)) if *v >= 0 => {
                        Ok(DimFact::from(v.to_dim()))
                    }
                    Some(tensor_shape_proto::dimension::Value::DimParam(v)) => {
                        if v == "?" || (v.starts_with("unk__") && !include_unknown_symbols) {
                            Ok(DimFact::default())
                        } else if let Ok(dim) = parse_tdim(&ctx.template.symbols, v) {
                            Ok(DimFact::from(dim))
                        } else {
                            // Non-standard dim_param (e.g. sympy expressions from torch dynamo):
                            // treat as unknown and let tract infer from the graph.
                            log::debug!(
                                "Could not parse dim_param `{v}` as TDim, treating as unknown"
                            );
                            Ok(DimFact::default())
                        }
                    }
                    _ => Ok(DimFact::default()),
                }
            })
            .collect::<TractResult<_>>()?;
        fact = fact.with_shape(ShapeFactoid::closed(shape));
    }
    Ok(fact)
}

fn get_external_resources(
    provider: &dyn ModelDataResolver,
    t: &TensorProto,
    path: &str,
) -> TractResult<Vec<u8>> {
    let mut tensor_data: Vec<u8> = Vec::new();
    trace!("number of external file needed for this tensor: {}", t.external_data.len());
    let location = t
        .external_data
        .iter()
        .find(|it| it.key == "location")
        .map(|it| it.value.as_str())
        .context("Could not find external data location")?;

    let offset: usize = t
        .external_data
        .iter()
        .find(|it| it.key == "offset")
        .map(|it| it.value.parse())
        .transpose()
        .context("Error while parsing offset value on external data description")?
        .unwrap_or(0);

    let length: Option<usize> = t
        .external_data
        .iter()
        .find(|it| it.key == "length")
        .map(|it| it.value.parse())
        .transpose()
        .context("Error while parsing length value on external data description")?;

    let p = PathBuf::from(path).join(location);

    trace!("external file detected: {p:?}, offset {offset:?}, length: {length:?}");
    provider.read_bytes_from_path(&mut tensor_data, &p, offset, length)?;
    trace!("external file loaded");
    Ok(tensor_data)
}

fn create_tensor(shape: Vec<usize>, dt: DatumType, data: &[u8]) -> TractResult<Tensor> {
    unsafe {
        match dt {
            DatumType::U8 => Tensor::from_raw::<u8>(&shape, data),
            DatumType::U16 => Tensor::from_raw::<u16>(&shape, data),
            DatumType::U32 => Tensor::from_raw::<u32>(&shape, data),
            DatumType::U64 => Tensor::from_raw::<u64>(&shape, data),
            DatumType::I8 => Tensor::from_raw::<i8>(&shape, data),
            DatumType::I16 => Tensor::from_raw::<i16>(&shape, data),
            DatumType::I32 => Tensor::from_raw::<i32>(&shape, data),
            DatumType::I64 => Tensor::from_raw::<i64>(&shape, data),
            DatumType::F16 => Tensor::from_raw::<f16>(&shape, data),
            DatumType::F32 => Tensor::from_raw::<f32>(&shape, data),
            DatumType::F64 => Tensor::from_raw::<f64>(&shape, data),
            DatumType::Bool => Ok(Tensor::from_raw::<u8>(&shape, data)?
                .into_plain_array::<u8>()?
                .mapv(|x| x != 0)
                .into()),
            _ => unimplemented!("FIXME, raw tensor loading"),
        }
    }
}

pub fn load_tensor(
    provider: &dyn ModelDataResolver,
    t: &TensorProto,
    path: Option<&str>,
) -> TractResult<Tensor> {
    let dt = DataType::try_from(t.data_type).unwrap().try_into()?;
    let shape: Vec<usize> = t.dims.iter().map(|&i| i as usize).collect();
    // detect if the tensor is rather in an external file than inside the onnx file directly
    let is_external = t.data_location.is_some()
        && t.data_location == Some(tensor_proto::DataLocation::External as i32);
    if t.raw_data.len() > 0 {
        create_tensor(shape, dt, &t.raw_data)
    } else if is_external {
        if let Some(model_path) = path {
            // external files will be loaded and fed to the tensor if necessary
            let external_data = get_external_resources(provider, t, model_path)?;
            create_tensor(shape, dt, &external_data)
        } else {
            bail!(
                "no model path was specified in the parsing context, yet external data was detected. aborting"
            );
        }
    } else {
        use tract_ndarray::Array;
        let it = match dt {
            DatumType::Bool => {
                Array::from_shape_vec(&*shape, t.int32_data.iter().map(|&x| x != 0).collect())?
                    .into()
            }
            DatumType::U8 => {
                Array::from_shape_vec(&*shape, t.int32_data.iter().map(|&x| x as u8).collect())?
                    .into()
            }
            DatumType::U16 => {
                Array::from_shape_vec(&*shape, t.int32_data.iter().map(|&x| x as u16).collect())?
                    .into()
            }
            DatumType::U32 => Array::from_shape_vec(&*shape, t.int32_data.to_vec())?.into(),
            DatumType::U64 => Array::from_shape_vec(&*shape, t.int64_data.to_vec())?.into(),
            DatumType::I8 => {
                Array::from_shape_vec(&*shape, t.int32_data.iter().map(|&x| x as i8).collect())?
                    .into()
            }
            DatumType::I16 => {
                Array::from_shape_vec(&*shape, t.int32_data.iter().map(|&x| x as i16).collect())?
                    .into()
            }
            DatumType::I32 => Array::from_shape_vec(&*shape, t.int32_data.to_vec())?.into(),
            DatumType::I64 => Array::from_shape_vec(&*shape, t.int64_data.to_vec())?.into(),
            DatumType::F16 => Array::from_shape_vec(
                &*shape,
                t.int32_data.iter().map(|&x| f16::from_bits(x as u16)).collect(),
            )?
            .into(),
            DatumType::F32 => Array::from_shape_vec(&*shape, t.float_data.to_vec())?.into(),
            DatumType::F64 => Array::from_shape_vec(&*shape, t.double_data.to_vec())?.into(),
            DatumType::String => {
                let strings = t
                    .string_data
                    .iter()
                    .cloned()
                    .map(String::from_utf8)
                    .collect::<Result<Vec<String>, _>>()
                    .context("Invalid UTF8 buffer")?;
                Array::from_shape_vec(&*shape, strings)?.into()
            }
            _ => unimplemented!("FIXME, struct tensor loading: {:?}", dt),
        };
        Ok(it)
    }
}

pub fn proto_from_reader<R: ::std::io::Read>(mut r: R) -> TractResult<TensorProto> {
    let mut v = vec![];
    r.read_to_end(&mut v)?;
    let b = bytes::Bytes::from(v);
    TensorProto::decode(b).context("Can not parse protobuf input")
}


================================================
FILE: onnx/test_cases/byte_sb_bidi_lstm/README.md
================================================
# Byte-level sentence boundary detection with a bidirectional LSTM

See [here](https://app.wandb.ai/bminixhofer/nnsplit/runs/3ty2i90r/overview) for the code used to generate `model.onnx`.


================================================
FILE: onnx/test_cases/byte_sb_bidi_lstm/generate_io.py
================================================
import onnxruntime
import numpy as np

n_pad = 5
texts = [
    "Das ist ein Test. Das ist noch ein Test.",
    "Das ist ein weiterer Test. Und ein zweiter Satz.",
]
boundaries = [[17, 39], [26, 47]]

# pad text by zeros and encode as utf-8 bytes
encoded = [[0] * n_pad + list(x.encode("utf-8")) + [0] * n_pad for x in texts]

# pda to same length with zeros at the end
max_length = max(len(x) for x in encoded)
padded = [np.pad(x, ((0, max(max_length - len(x), 0)),)) for x in encoded]
inputs = np.stack(padded, 0).astype(np.uint8)

sess = onnxruntime.InferenceSession("model.onnx")
input_name = sess.get_inputs()[0].name

# shape batch x len x 2
# value 1 in last dimension are token boundaries, value 0 are sentence boundaries
# outputs are logits, in practice there would be a sigmoid afterwards
outputs = sess.run(None, {input_name: inputs})[0]

assert len(outputs) == len(texts)

for i in range(len(outputs)):
    assert (np.where(outputs[i, :, 0] > 0)[0] - n_pad).tolist() == boundaries[i]

np.savez_compressed(open("io.npz", "wb"), input=inputs, Add_26=outputs)


================================================
FILE: onnx/test_cases/deconv_group/vars.sh
================================================


================================================
FILE: onnx/test_cases/lgbm_classifier_tensor/generate_io.py
================================================
import numpy as np
from sklearn import datasets
from lightgbm.sklearn import LGBMClassifier
from hummingbird.ml import convert
import onnxruntime
import torch

x, y = datasets.load_wine(return_X_y=True)
x = x.astype(np.float32)

model = LGBMClassifier(n_estimators=5)
model.fit(x, y)
preds = model.predict_proba(x)

pytorch_model = convert(model, "pytorch")

torch.onnx.export(
    pytorch_model.model,
    (torch.from_numpy(x)),
    "model.onnx",
    input_names=["input"],
    output_names=["output", "probabilities"],
    dynamic_axes={
        "input": {0: "batch"},
        "output": {0: "batch"},
        "probabilities": {0: "batch"},
    },
)

np.savez_compressed(
    open("io.npz", "wb"), input=x[:1], probabilities=preds[:1],
)

# sanity check - onnxruntime inference

sess = onnxruntime.InferenceSession("model.onnx")
outputs = sess.run(["probabilities"], {"input": x[:1]})[0]

assert np.allclose(outputs, preds[:1])


================================================
FILE: onnx/test_cases/lgbm_classifier_tensor/vars.sh
================================================
IGNORE=
OPTIONS="--output-node probabilities"


================================================
FILE: onnx/test_cases/lgbm_regressor_tensor/generate_io.py
================================================
import numpy as np
from sklearn import datasets
from lightgbm.sklearn import LGBMRegressor
from hummingbird.ml import convert
import onnxruntime
import torch

x, y = datasets.load_wine(return_X_y=True)
x = x.astype(np.float32)

model = LGBMRegressor(n_estimators=10)
model.fit(x, y)
preds = model.predict(x)

pytorch_model = convert(model, "pytorch")

torch.onnx.export(
    pytorch_model.model,
    (torch.from_numpy(x)),
    "model.onnx",
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={"input": {0: "batch"}, "output": {0: "batch"}},
)

np.savez_compressed(
    open("io.npz", "wb"), input=x[:1], output=preds[:1],
)

# sanity check - onnxruntime inference

sess = onnxruntime.InferenceSession("model.onnx")
outputs = sess.run(None, {"input": x[:1]})[0][:, 0]

assert np.allclose(outputs, preds[:1])


================================================
FILE: onnx/test_cases/lgbm_regressor_tensor/vars.sh
================================================
IGNORE="plain decl opti nnef"


================================================
FILE: onnx/test_cases/qlstm_3-2-3_T3_S1/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qrelu_1/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qrelu_2/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qsigmoid_1/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qsigmoid_2/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qtanh_1/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qtanh_2/vars.sh
================================================
left_context=0
right_context=0
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/qtdnn_10x5_101_i32_biases/vars.sh
================================================
left_context=1
right_context=1
subsampling=1
IGNORE=nnef


================================================
FILE: onnx/test_cases/run_all.sh
================================================
#!/bin/bash

set -e

if [ -z "$CACHEDIR" ]
then
    CACHEDIR=`dirname $0`/../../.cached
fi

TEST_CASE_DIR=$(dirname $0)
FAILURES=""
FAILED=()

if [ "$#" -gt 0 ]
then
    TEST_CASES="$@"
else
    TEST_CASES="$TEST_CASE_DIR/*"
fi

: ${TRACT_RUN:=cargo run -p tract-cli $CARGO_OPTS --}

for tc in $TEST_CASES
do
    if [ ! -e "$tc/vars.sh" ]
    then
        continue
    fi
    unset OPTIONS IGNORE MODEL left_context right_context subsampling
    . $tc/vars.sh
    for file in $CACHE_FILES
    do
        $TEST_CASE_DIR/../../.travis/cache_file.sh $file
    done
    : ${MODEL:=$tc/model.onnx}
    for pass in plain decl opti nnef
    do
        printf "$tc ($pass) "
        if [[ $IGNORE == *$pass* ]]
        then
            printf "\e[93mignored\e[39m\n"
            continue
        fi
        case $pass in
            plain) opti="--pass incorporate" ;;
            decl) opti="" ;;
            opti) opti="-O" ;;
            nnef) opti="--nnef-cycle --nnef-tract-core" ;;
        esac
        options="$OPTIONS"
        if [ -n "$left_context" -a "$left_context" != "0" ]
        then
            options="$options --edge-left-context $left_context"
        fi
        if [ -n "$right_context" -a "$right_context" != "0" ]
        then
            options="$options --edge-right-context $right_context"
        fi
        cmd="$TRACT_RUN \
            $MODEL \
            --input-facts-from-bundle $tc/io.npz \
            --onnx-ignore-output-shapes \
            $options \
            $opti \
            run \
            --input-from-bundle $tc/io.npz \
            --assert-output-bundle $tc/io.npz"

        if $($cmd 2> /dev/null > /dev/null)
        then
            printf "\e[92mOK\e[39m\n"
        else
            printf "\e[91mFAIL\e[39m\n"
            FAILED+=("$cmd")
            FAILURES="$FAILURES $tc"
        fi
    done
done

if [ -n "$FAILURES" ]
then
    echo 
    printf "    \e[91m$(echo $FAILURES | wc -w) FAILURES\e[39m\n"
    echo
fi

for cmd in "${FAILED[@]}"
do
    echo $cmd
done

[ -z "$FAILURES" ]


================================================
FILE: onnx/test_cases/tinyyolov2/vars.sh
================================================
CACHE_FILES=tinyyolov2-7-model.onnx
MODEL=$CACHEDIR/tinyyolov2-7-model.onnx
IGNORE="nnef"


================================================
FILE: onnx/test_cases/transformer-mlm/generate_io.py
================================================
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForMaskedLM
import onnxruntime

model_name = "distilbert-base-uncased"
model_path = "model.onnx"

text = "tract is a machine [MASK] library."
filler = "learning"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)

encoded = tokenizer.encode_plus(text)
mask_idx = encoded["input_ids"].index(tokenizer.mask_token_id)

input_ids = torch.tensor([encoded["input_ids"]], dtype=torch.long)
attention_mask = torch.tensor([encoded["attention_mask"]], dtype=torch.long)

torch.onnx.export(
    model,
    (input_ids, attention_mask),
    model_path,
    input_names=["input_ids", "attention_mask"],
    output_names=["output"],
    dynamic_axes={
        "input_ids": {0: "batch", 1: "seq"},
        "attention_mask": {0: "batch", 1: "seq"},
        "output": {0: "batch", 1: "seq"},
    },
)

sess = onnxruntime.InferenceSession(model_path)

outputs = sess.run(
    None, {"input_ids": input_ids.numpy(), "attention_mask": attention_mask.numpy()}
)[0]
assert tokenizer.convert_ids_to_tokens(int(np.argmax(outputs[0, mask_idx]))) == filler

np.savez_compressed(
    open("io.npz", "wb"),
    input_ids=input_ids.numpy(),
    attention_mask=attention_mask.numpy(),
    output=outputs,
)


================================================
FILE: onnx/test_cases/transformer-mlm/vars.sh
================================================
CACHE_FILES=transformer-mlm-model.onnx
MODEL=$CACHEDIR/transformer-mlm-model.onnx
IGNORE="nnef"


================================================
FILE: onnx/test_cases/xgboost_classifier_tree/generate_io.py
================================================
import numpy as np
from sklearn import datasets
from xgboost.sklearn import XGBClassifier
from onnxmltools.convert.common import data_types
import onnxmltools
import onnx
import onnxruntime

x, y = datasets.load_wine(return_X_y=True)
x = x.astype(np.float32)

model = XGBClassifier(n_estimators=10)
model.fit(x, y)
preds = model.predict_proba(x)

onnx_model = onnxmltools.convert_xgboost(
    model, initial_types=[("input", data_types.FloatTensorType([None, x.shape[1]]))],
)

onnx.save(onnx_model, "model.onnx")

np.savez_compressed(
    open("io.npz", "wb"), input=x[:1], probabilities=preds[:1],
)

# sanity check - onnxruntime inference

sess = onnxruntime.InferenceSession("model.onnx")
outputs = sess.run(["probabilities"], {"input": x[:1]})[0]

assert np.allclose(outputs, preds[:1])


================================================
FILE: onnx/test_cases/xgboost_classifier_tree/vars.sh
================================================
IGNORE=
OPTIONS="--output-node probabilities --nnef-tract-onnx"


================================================
FILE: onnx/test_cases/xgboost_regressor_tree/generate_io.py
================================================
import numpy as np
from sklearn import datasets
from xgboost.sklearn import XGBRegressor
from onnxmltools.convert.common import data_types
import onnxmltools
import onnx
import onnxruntime

x, y = datasets.load_wine(return_X_y=True)
x = x.astype(np.float32)

model = XGBRegressor(n_estimators=10)
model.fit(x, y)
preds = model.predict(x)

onnx_model = onnxmltools.convert_xgboost(
    model, initial_types=[("input", data_types.FloatTensorType([None, x.shape[1]]))],
)

onnx.save(onnx_model, "model.onnx")

np.savez_compressed(
    open("io.npz", "wb"), input=x[:1], variable=preds[:1],
)

# sanity check - onnxruntime inference

sess = onnxruntime.InferenceSession("model.onnx")
outputs = sess.run(None, {"input": x[:1]})[0][:, 0]

assert np.allclose(outputs, preds[:1])


================================================
FILE: onnx/test_cases/xgboost_regressor_tree/vars.sh
================================================
IGNORE="plain decl opti nnef"


================================================
FILE: onnx-opl/Cargo.toml
================================================
[package]
name = "tract-onnx-opl"
version = "0.23.0-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = ["TensorFlow", "NeuralNetworks", "ONNX"]
categories = ["science"]
autobenches = false
edition = "2024"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
dyn-eq.workspace = true
getrandom.workspace = true
log.workspace = true
rand.workspace = true
rand_distr.workspace = true
rustfft.workspace = true
tract-nnef.workspace = true
tract-extra.workspace = true

[dev-dependencies]
env_logger.workspace = true

[features]
default = []
getrandom-js = ["getrandom/wasm_js"]


================================================
FILE: onnx-opl/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: onnx-opl/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: onnx-opl/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: onnx-opl/src/grid_sample.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_core::ops::math::round_ties_to_even;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_grid_sample",
        &parameters(),
        &[("output", TypeName::Scalar.tensor())],
        load,
    );
    registry.register_dumper(dump);
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum InterpolationMode {
    Bilinear,
    Nearest,
    Bicubic,
}

impl InterpolationMode {
    fn as_str(&self) -> &'static str {
        match self {
            InterpolationMode::Bilinear => "bilinear",
            InterpolationMode::Nearest => "nearest",
            InterpolationMode::Bicubic => "bicubic",
        }
    }

    fn from_str(s: &str) -> TractResult<Self> {
        Ok(match s {
            "bilinear" => InterpolationMode::Bilinear,
            "nearest" => InterpolationMode::Nearest,
            "bicubic" => InterpolationMode::Bicubic,
            _ => bail!("Unsupported GridSample mode: {}", s),
        })
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PaddingMode {
    Zeros,
    Border,
    Reflection,
}

impl PaddingMode {
    fn as_str(&self) -> &'static str {
        match self {
            PaddingMode::Zeros => "zeros",
            PaddingMode::Border => "border",
            PaddingMode::Reflection => "reflection",
        }
    }

    fn from_str(s: &str) -> TractResult<Self> {
        Ok(match s {
            "zeros" => PaddingMode::Zeros,
            "border" => PaddingMode::Border,
            "reflection" => PaddingMode::Reflection,
            _ => bail!("Unsupported GridSample padding_mode: {}", s),
        })
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct GridSample {
    pub mode: InterpolationMode,
    pub padding_mode: PaddingMode,
    pub align_corners: bool,
}

impl GridSample {
    fn denormalize(&self, coord: f32, size: usize) -> f32 {
        if self.align_corners {
            (coord + 1.0) / 2.0 * (size as f32 - 1.0)
        } else {
            ((coord + 1.0) * size as f32 - 1.0) / 2.0
        }
    }

    fn bounds(&self, size: usize) -> (f32, f32) {
        if self.align_corners { (0.0, size as f32 - 1.0) } else { (-0.5, size as f32 - 0.5) }
    }

    fn pixel_at_nd(
        &self,
        x: &tract_ndarray::ArrayViewD<'_, f32>,
        batch: usize,
        channel: usize,
        coords: &[isize],
        spatial_sizes: &[usize],
    ) -> f32 {
        match self.padding_mode {
            PaddingMode::Zeros => {
                for (&c, &s) in coords.iter().zip(spatial_sizes.iter()) {
                    if c < 0 || c >= s as isize {
                        return 0.0;
                    }
                }
                let mut idx = vec![batch, channel];
                idx.extend(coords.iter().map(|&c| c as usize));
                x[idx.as_slice()]
            }
            PaddingMode::Border => {
                let mut idx = vec![batch, channel];
                for (&c, &s) in coords.iter().zip(spatial_sizes.iter()) {
                    idx.push((c.max(0) as usize).min(s - 1));
                }
                x[idx.as_slice()]
            }
            PaddingMode::Reflection => {
                let mut idx = vec![batch, channel];
                for (&c, &s) in coords.iter().zip(spatial_sizes.iter()) {
                    let (lo, hi) = self.bounds(s);
                    idx.push(gs_reflect(c as f32, lo, hi) as usize);
                }
                x[idx.as_slice()]
            }
        }
    }

    fn apply_padding(&self, coord: f32, lo: f32, hi: f32) -> f32 {
        match self.padding_mode {
            PaddingMode::Border => coord.clamp(0.0, hi + lo),
            PaddingMode::Reflection => gs_reflect(coord, lo, hi),
            PaddingMode::Zeros => coord,
        }
    }

    fn is_oob(&self, coords: &[f32], bounds: &[(f32, f32)]) -> bool {
        coords.iter().zip(bounds.iter()).any(|(&c, &(lo, hi))| c < lo || c > hi)
    }

    fn pad_coords(&self, coords: &mut [f32], bounds: &[(f32, f32)]) {
        for (c, &(lo, hi)) in coords.iter_mut().zip(bounds.iter()) {
            *c = self.apply_padding(*c, lo, hi);
        }
    }

    fn sample_nd(
        &self,
        x: &tract_ndarray::ArrayViewD<'_, f32>,
        batch: usize,
        channel: usize,
        pixel_coords: &[f32],
        spatial_sizes: &[usize],
    ) -> f32 {
        let ndim = pixel_coords.len();
        let bounds: Vec<(f32, f32)> = spatial_sizes.iter().map(|&s| self.bounds(s)).collect();

        match self.mode {
            InterpolationMode::Nearest => {
                let mut coords: Vec<f32> =
                    pixel_coords.iter().map(|&c| round_ties_to_even(c)).collect();
                if self.is_oob(&coords, &bounds) {
                    self.pad_coords(&mut coords, &bounds);
                }
                let icoords: Vec<isize> = coords.iter().map(|&c| c as isize).collect();
                self.pixel_at_nd(x, batch, channel, &icoords, spatial_sizes)
            }
            InterpolationMode::Bilinear => {
                let mut coords: Vec<f32> = pixel_coords.to_vec();
                if self.is_oob(&coords, &bounds) {
                    self.pad_coords(&mut coords, &bounds);
                }
                let num_corners = 1 << ndim;
                let mut result = 0.0f32;
                for corner in 0..num_corners {
                    let mut weight = 1.0f32;
                    let mut icoords = Vec::with_capacity(ndim);
                    for (d, &c) in coords.iter().enumerate() {
                        let lo = c.floor() as isize;
                        if (corner >> d) & 1 == 0 {
                            icoords.push(lo);
                            weight *= (lo + 1) as f32 - c;
                        } else {
                            icoords.push(lo + 1);
                            weight *= c - lo as f32;
                        }
                    }
                    result += weight * self.pixel_at_nd(x, batch, channel, &icoords, spatial_sizes);
                }
                result
            }
            InterpolationMode::Bicubic => {
                assert!(ndim == 2, "Bicubic interpolation only supports 2D spatial dimensions");
                let (mut px, mut py) = (pixel_coords[0], pixel_coords[1]);
                if self.is_oob(&[px, py], &bounds) {
                    px = self.apply_padding(px, bounds[0].0, bounds[0].1);
                    py = self.apply_padding(py, bounds[1].0, bounds[1].1);
                }
                let x0 = px.floor() as isize - 1;
                let y0 = py.floor() as isize - 1;
                let dx = px - x0 as f32 - 1.0;
                let dy = py - y0 as f32 - 1.0;

                let mut p = [[0.0f32; 4]; 4];
                for (h, row) in p.iter_mut().enumerate() {
                    for (w, val) in row.iter_mut().enumerate() {
                        *val = self.pixel_at_nd(
                            x,
                            batch,
                            channel,
                            &[x0 + w as isize, y0 + h as isize],
                            spatial_sizes,
                        );
                    }
                }
                bicubic_interpolate(&p, dx, dy)
            }
        }
    }
}

fn gs_reflect(x: f32, x_min: f32, x_max: f32) -> f32 {
    let rng = x_max - x_min;
    if rng == 0.0 {
        return x_min;
    }
    if x < x_min {
        let dx = x_min - x;
        let n = (dx / rng) as i32;
        let r = dx - n as f32 * rng;
        if n % 2 == 0 { x_min + r } else { x_max - r }
    } else if x > x_max {
        let dx = x - x_max;
        let n = (dx / rng) as i32;
        let r = dx - n as f32 * rng;
        if n % 2 == 0 { x_max - r } else { x_min + r }
    } else {
        x
    }
}

fn bicubic_interpolate(p: &[[f32; 4]; 4], dx: f32, dy: f32) -> f32 {
    let mut v = [0.0f32; 4];
    let mut coeffs = [0.0f32; 4];
    cubic_coeffs(dx, &mut coeffs);
    for i in 0..4 {
        v[i] =
            coeffs[0] * p[i][0] + coeffs[1] * p[i][1] + coeffs[2] * p[i][2] + coeffs[3] * p[i][3];
    }
    cubic_coeffs(dy, &mut coeffs);
    coeffs[0] * v[0] + coeffs[1] * v[1] + coeffs[2] * v[2] + coeffs[3] * v[3]
}

fn cubic_coeffs(x: f32, coeffs: &mut [f32; 4]) {
    let a = -0.75f32;
    let xp1 = x + 1.0;
    let xm1 = 1.0 - x;
    let xm2 = 2.0 - x;
    coeffs[0] = ((a * xp1 - 5.0 * a) * xp1 + 8.0 * a) * xp1 - 4.0 * a;
    coeffs[1] = ((a + 2.0) * x - (a + 3.0)) * x * x + 1.0;
    coeffs[2] = ((a + 2.0) * xm1 - (a + 3.0)) * xm1 * xm1 + 1.0;
    coeffs[3] = ((a * xm2 - 5.0 * a) * xm2 + 8.0 * a) * xm2 - 4.0 * a;
}

impl Op for GridSample {
    fn name(&self) -> StaticName {
        "GridSample".into()
    }

    op_as_typed_op!();
}

impl EvalOp for GridSample {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (x, grid) = args_2!(inputs);
        let input_dt = x.datum_type();
        let x_tensor = x.into_tensor();
        let x_cow = x_tensor.cast_to::<f32>()?;
        let x = x_cow.to_plain_array_view::<f32>()?;
        let grid_tensor = grid.into_tensor();
        let grid_cow = grid_tensor.cast_to::<f32>()?;
        let grid = grid_cow.to_plain_array_view::<f32>()?;

        let x_shape = x.shape();
        let grid_shape = grid.shape();
        let rank = x_shape.len();
        let spatial_rank = rank - 2;

        let n_batch = x_shape[0];
        let n_channel = x_shape[1];
        let spatial_sizes: Vec<usize> = x_shape[2..].to_vec();

        let mut output_shape = vec![n_batch, n_channel];
        output_shape.extend_from_slice(&grid_shape[1..rank - 1]);

        let output = tract_ndarray::ArrayD::from_shape_fn(&*output_shape, |idx| -> f32 {
            let batch = idx[0];
            let channel = idx[1];
            let out_spatial: Vec<usize> = (2..rank).map(|d| idx[d]).collect();

            let mut grid_idx = vec![batch];
            grid_idx.extend_from_slice(&out_spatial);
            grid_idx.push(0);

            let mut pixel_coords = Vec::with_capacity(spatial_rank);
            for (d, &size) in spatial_sizes.iter().enumerate() {
                *grid_idx.last_mut().unwrap() = spatial_rank - 1 - d;
                let norm_coord = grid[grid_idx.as_slice()];
                pixel_coords.push(self.denormalize(norm_coord, size));
            }

            self.sample_nd(&x, batch, channel, &pixel_coords, &spatial_sizes)
        });

        Ok(tvec!(output.into_tensor().cast_to_dt(input_dt)?.into_owned().into_tvalue()))
    }
}

impl TypedOp for GridSample {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let x_shape = &inputs[0].shape;
        let grid_shape = &inputs[1].shape;
        let rank = x_shape.len();

        let mut output_shape: TVec<TDim> = tvec![x_shape[0].clone(), x_shape[1].clone()];
        for d in 1..rank - 1 {
            output_shape.push(grid_shape[d].clone());
        }

        Ok(tvec!(inputs[0].datum_type.fact(&output_shape)))
    }
}

fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Scalar.tensor().named("grid"),
        TypeName::String.named("mode").default("bilinear"),
        TypeName::String.named("padding_mode").default("zeros"),
        TypeName::Logical.named("align_corners").default(false),
    ]
}

fn dump(ast: &mut IntoAst, node: &TypedNode, op: &GridSample) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let grid = ast.mapping[&node.inputs[1]].clone();
    Ok(Some(invocation(
        "tract_onnx_grid_sample",
        &[input, grid],
        &[
            ("mode", string(op.mode.as_str())),
            ("padding_mode", string(op.padding_mode.as_str())),
            ("align_corners", logical(op.align_corners)),
        ],
    )))
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let grid = invocation.named_arg_as(builder, "grid")?;
    let mode: String = invocation.named_arg_as(builder, "mode")?;
    let padding_mode: String = invocation.named_arg_as(builder, "padding_mode")?;
    let align_corners: bool = invocation.named_arg_as(builder, "align_corners")?;
    let op = GridSample {
        mode: InterpolationMode::from_str(&mode)?,
        padding_mode: PaddingMode::from_str(&padding_mode)?,
        align_corners,
    };
    builder.wire(op, &[input, grid])
}


================================================
FILE: onnx-opl/src/lib.rs
================================================
#![allow(clippy::len_zero)]

use tract_nnef::internal::*;

pub mod grid_sample;
pub mod lrn;
pub mod ml;
pub mod multinomial;
pub mod non_max_suppression;
pub mod random;
pub mod resize;

pub trait WithOnnx {
    fn with_onnx(self) -> Self;
    fn enable_onnx(&mut self);
}

impl WithOnnx for tract_nnef::framework::Nnef {
    fn enable_onnx(&mut self) {
        self.enable_tract_core();
        self.registries.push(onnx_opl_registry());
    }
    fn with_onnx(mut self) -> Self {
        self.enable_onnx();
        self
    }
}

pub fn onnx_opl_registry() -> Registry {
    let mut registry: Registry = Registry::new("tract_onnx")
        .with_doc(
            "Extension `tract_onnx` extends NNEF for supporting some corner case ONNX operators.",
        )
        .with_doc("")
        .with_doc("Add `extension tract_onnx` to `graph.nnef`");
    grid_sample::register(&mut registry);
    ml::register(&mut registry);
    non_max_suppression::register(&mut registry);
    multinomial::register(&mut registry);
    random::register(&mut registry);
    resize::register(&mut registry);
    registry.register_dumper(lrn::dump);
    registry.register_primitive(
        "tract_onnx_lrn",
        &lrn::parameters(),
        &[("output", TypeName::Scalar.tensor())],
        lrn::load,
    );
    registry
}


================================================
FILE: onnx-opl/src/lrn.rs
================================================
use tract_ndarray::prelude::*;
use tract_nnef::internal::*;

#[derive(Debug, Clone, Default, PartialEq)]
pub struct Lrn {
    pub alpha: f32,
    pub beta: f32,
    pub bias: f32,
    pub size: usize,
}
impl Eq for Lrn {}

impl Lrn {
    fn eval_t<T>(&self, input: TValue) -> TractResult<TVec<TValue>>
    where
        T: Datum + tract_num_traits::Float + ::std::iter::Sum,
    {
        let input = input.to_plain_array_view::<T>()?;
        let channels = input.shape()[1];
        let output = Array::from_shape_fn(input.shape(), |mut coords| {
            let c = coords[1];
            let x = input[&coords];
            let c_min = c.saturating_sub((self.size - 1) / 2);
            let c_max = (c + ((self.size - 1).divceil(2))).min(channels - 1);
            let square_sum: T = (c_min..=c_max)
                .map(|c| {
                    coords[1] = c;
                    input[&coords].powi(2)
                })
                .sum();
            x / (T::from(self.bias).unwrap()
                + T::from(self.alpha).unwrap() / T::from(self.size).unwrap() * square_sum)
                .powf(T::from(self.beta).unwrap())
        });
        Ok(tvec!(output.into_tvalue()))
    }
}

impl Op for Lrn {
    fn name(&self) -> StaticName {
        "Lrn".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Lrn {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        dispatch_floatlike!(Self::eval_t(input.datum_type())(self, input))
    }
}

impl TypedOp for Lrn {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }
}

pub fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Scalar.named("alpha").default(0.0001),
        TypeName::Scalar.named("beta").default(0.75),
        TypeName::Scalar.named("bias").default(1.0),
        TypeName::Integer.named("size"),
    ]
}

pub fn dump(ast: &mut IntoAst, node: &TypedNode, lrn: &Lrn) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_onnx_lrn",
        &[input],
        &[
            ("alpha", numeric(lrn.alpha)),
            ("beta", numeric(lrn.beta)),
            ("bias", numeric(lrn.bias)),
            ("size", numeric(lrn.size)),
        ],
    )))
}

pub fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let alpha = invocation.named_arg_as(builder, "alpha")?;
    let beta = invocation.named_arg_as(builder, "beta")?;
    let bias = invocation.named_arg_as(builder, "bias")?;
    let size = invocation.named_arg_as(builder, "size")?;
    let op = Lrn { alpha, beta, bias, size };
    builder.wire(op, &[input])
}


================================================
FILE: onnx-opl/src/ml/category_mapper.rs
================================================
use std::hash::*;
use tract_itertools::Itertools;
use tract_nnef::internal::*;
use tract_smallvec::SmallVec;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_ml_direct_lookup",
        &parameters_direct_lookup(),
        &[("output", TypeName::Scalar.tensor())],
        load_direct_lookup,
    );
    registry.register_primitive(
        "tract_onnx_ml_reverse_lookup",
        &parameters_reverse_lookup(),
        &[("output", TypeName::Scalar.tensor())],
        load_reverse_lookup,
    );
    registry.register_dumper(dump_direct_lookup);
    registry.register_dumper(dump_reverse_lookup);
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct DirectLookup {
    values: Arc<Tensor>,
    fallback_value: Arc<Tensor>,
}

impl DirectLookup {
    pub fn new(values: Arc<Tensor>, fallback_value: Arc<Tensor>) -> TractResult<DirectLookup> {
        Ok(DirectLookup { values, fallback_value })
    }

    fn eval_t<T: Datum>(&self, input: &Tensor) -> TractResult<Tensor> {
        let values = self.values.try_as_plain()?.as_slice::<T>()?;
        let fallback_value = self.fallback_value.try_as_plain()?.to_scalar::<T>()?;
        Ok(input
            .to_plain_array_view::<i32>()?
            .mapv(|ix| values.get(ix as usize).unwrap_or(fallback_value).clone())
            .into_tensor())
    }
}

impl Op for DirectLookup {
    fn name(&self) -> StaticName {
        "DirectLookup".into()
    }

    op_as_typed_op!();
}

impl EvalOp for DirectLookup {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let output = dispatch_hash!(Self::eval_t(self.values.datum_type())(self, &input))?;
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for DirectLookup {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        if self.values.datum_type() != self.fallback_value.datum_type() {
            bail!(
                "values and fallback value should be of the same type, got {:?}, {:?}",
                self.values,
                self.fallback_value
            )
        }
        Ok(tvec!(self.values.datum_type().fact(inputs[0].shape.iter())))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
    }

    as_op!();
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ReverseLookup {
    keys: Arc<Tensor>,
    index: HashMap<u64, SmallVec<[i32; 1]>>,
    fallback_value: i32,
}

#[allow(clippy::manual_hash_one)]
impl ReverseLookup {
    pub fn new(keys: Arc<Tensor>, fallback_value: i32) -> TractResult<ReverseLookup> {
        unsafe fn new_t<T: Datum + Hash>(keys: &Tensor) -> HashMap<u64, SmallVec<[i32; 1]>> {
            let keys = unsafe { keys.as_slice_unchecked::<T>() };
            let mut hashmap = HashMap::<u64, SmallVec<[i32; 1]>>::default();
            for (ix, k) in keys.iter().enumerate() {
                let mut hasher = hashmap.hasher().build_hasher();
                k.hash(&mut hasher);
                let u = hasher.finish();
                hashmap.entry(u).or_default().push(ix as i32);
            }
            hashmap
        }
        let index = unsafe { dispatch_hash!(new_t(keys.datum_type())(&keys)) };
        Ok(ReverseLookup { index, keys, fallback_value })
    }

    unsafe fn search_t<T: Datum + Hash>(&self, needle: &T) -> Option<i32> {
        let keys = unsafe { self.keys.as_slice_unchecked::<T>() };
        let mut hasher = self.index.hasher().build_hasher();
        needle.hash(&mut hasher);
        let u = hasher.finish();
        if let Some(candidates) = self.index.get(&u) {
            for candidate in candidates {
                if &keys[*candidate as usize] == needle {
                    return Some(*candidate);
                }
            }
        }
        None
    }

    fn eval_t<T: Datum + Hash>(&self, input: &Tensor) -> TractResult<Tensor> {
        unsafe {
            let mut output = Tensor::uninitialized_dt(i32::datum_type(), input.shape())?;
            for (i, o) in input
                .try_as_plain()?
                .as_slice::<T>()?
                .iter()
                .zip(output.as_slice_mut_unchecked::<i32>().iter_mut())
            {
                *o = self.search_t(i).unwrap_or(self.fallback_value);
            }
            Ok(output)
        }
    }
}

impl Hash for ReverseLookup {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.keys.hash(state);
        self.fallback_value.hash(state);
        self.index.iter().sorted().for_each(|v| Hash::hash(&v, state));
    }
}

impl Op for ReverseLookup {
    fn name(&self) -> StaticName {
        "ReverseLookup".into()
    }

    op_as_typed_op!();
}

impl EvalOp for ReverseLookup {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let output = dispatch_hash!(Self::eval_t(self.keys.datum_type())(self, &input))?;
        Ok(tvec!(output.into_tvalue()))
    }
}

impl TypedOp for ReverseLookup {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(i32::fact(inputs[0].shape.iter())))
    }

    fn axes_mapping(
        &self,
        inputs: &[&TypedFact],
        outputs: &[&TypedFact],
    ) -> TractResult<AxesMapping> {
        AxesMapping::natural(inputs, outputs)
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
    }

    as_op!();
}

fn parameters_direct_lookup() -> Vec<Parameter> {
    vec![
        TypeName::String.tensor().named("input"),
        TypeName::Scalar.tensor().named("values"),
        TypeName::Scalar.tensor().named("fallback"),
    ]
}

fn parameters_reverse_lookup() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Scalar.tensor().named("keys"),
        TypeName::Scalar.named("fallback"),
    ]
}

fn dump_direct_lookup(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &DirectLookup,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let keys = ast.konst_variable(format!("{}.values", node.name), &op.values)?;
    let fallback = ast.konst_variable(format!("{}.fallback", node.name), &op.fallback_value)?;
    Ok(Some(invocation("tract_onnx_ml_direct_lookup", &[input, keys, fallback], &[])))
}

fn dump_reverse_lookup(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ReverseLookup,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let values = ast.konst_variable(format!("{}.keys", node.name), &op.keys)?;
    Ok(Some(invocation(
        "tract_onnx_ml_reverse_lookup",
        &[input, values],
        &[("fallback", numeric(op.fallback_value))],
    )))
}

fn load_direct_lookup(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let values: Arc<Tensor> = invocation.named_arg_as(builder, "values")?;
    let fallback_value = invocation.named_arg_as(builder, "fallback")?;
    let op = DirectLookup { fallback_value, values };
    builder.wire(op, &[input])
}

fn load_reverse_lookup(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let keys: isize = invocation.named_arg_as(builder, "keys")?;
    let fallback_value = invocation.named_arg_as(builder, "fallback")?;
    let op = ReverseLookup::new(fallback_value, keys as i32)?;
    builder.wire(op, &[input])
}


================================================
FILE: onnx-opl/src/ml/mod.rs
================================================
use tract_nnef::internal::*;

pub mod category_mapper;
pub mod tree;
pub mod tree_ensemble_classifier;

pub use category_mapper::{DirectLookup, ReverseLookup};

pub fn register(registry: &mut Registry) {
    category_mapper::register(registry);
    tree_ensemble_classifier::register(registry);
}


================================================
FILE: onnx-opl/src/ml/tree.rs
================================================
use std::convert::TryFrom;
use std::convert::TryInto;
use std::fmt::{self, Debug, Display};
use std::iter;

use tract_nnef::internal::*;

use tract_ndarray::{
    Array1, Array2, ArrayD, ArrayView1, ArrayView2, ArrayViewD, ArrayViewMut1, Axis, Ix1, Ix2,
};

use tract_num_traits::AsPrimitive;

macro_rules! ensure {
    ($cond: expr, $($rest: expr),* $(,)?) => {
        if !$cond {
            bail!($($rest),*)
        }
    }
}

#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Cmp {
    Equal = 1,
    NotEqual = 2,
    Less = 3,
    Greater = 4,
    LessEqual = 5,
    GreaterEqual = 6,
}

impl Cmp {
    pub fn compare<T>(&self, x: T, y: T) -> bool
    where
        T: PartialOrd + Copy,
    {
        match self {
            Cmp::LessEqual => x <= y,
            Cmp::Less => x < y,
            Cmp::GreaterEqual => x >= y,
            Cmp::Greater => x > y,
            Cmp::Equal => x == y,
            Cmp::NotEqual => x != y,
        }
    }
    pub fn to_u8(&self) -> u8 {
        unsafe { std::mem::transmute(*self) }
    }
}

impl TryFrom<u8> for Cmp {
    type Error = TractError;
    fn try_from(value: u8) -> Result<Self, Self::Error> {
        if (1..=5).contains(&value) {
            unsafe { Ok(std::mem::transmute::<u8, Cmp>(value)) }
        } else {
            bail!("Invalid value for Cmp: {}", value);
        }
    }
}

impl Display for Cmp {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(match self {
            Cmp::LessEqual => "<=",
            Cmp::Less => "<",
            Cmp::GreaterEqual => ">=",
            Cmp::Greater => ">",
            Cmp::Equal => "==",
            Cmp::NotEqual => "!=",
        })
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct TreeEnsembleData {
    // u32, [Ntrees], root row of each tree in nodes array (in rows)
    pub trees: Arc<Tensor>,
    // u32, [_, 5],
    // 5th number is flags: last byte is comparator, 0 for leaves, transmuted Cmp for the internal nodes
    //                      is_nan is 0x100 bit
    // intern nodes:    feature_id, true_id, false_id, value.to_bits(),
    //                  comp | (0x100 if nan_is_true)
    // leaves:          start row, end row in leaves array, 3 zeros for padding
    pub nodes: Arc<Tensor>,
    // categ,
    pub leaves: Arc<Tensor>,
}

impl Display for TreeEnsembleData {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let tree = self.trees.try_as_plain().unwrap().as_slice::<u32>().unwrap();
        for t in 0..tree.len() {
            let last_node = tree.get(t + 1).cloned().unwrap_or(self.nodes.len() as u32 / 5);
            writeln!(f, "Tree {}, nodes {:?}", t, tree[t]..last_node)?;
            for n in tree[t]..last_node {
                unsafe {
                    let node = self.get_unchecked(n as _);
                    if let TreeNode::Leaf(leaf) = node {
                        for vote in leaf.start_id..leaf.end_id {
                            let cat =
                                self.leaves.try_as_plain().unwrap().as_slice::<u32>().unwrap()
                                    [vote * 2];
                            let contrib =
                                self.leaves.try_as_plain().unwrap().as_slice::<u32>().unwrap()
                                    [vote * 2 + 1];
                            let contrib = f32::from_bits(contrib);
                            writeln!(f, "{n} categ:{cat} add:{contrib}")?;
                        }
                    } else {
                        writeln!(f, "{} {:?}", n, self.get_unchecked(n as _))?;
                    }
                }
            }
        }
        Ok(())
    }
}

impl TreeEnsembleData {
    unsafe fn get_unchecked(&self, node: usize) -> TreeNode {
        let row = &unsafe { self.nodes.as_slice_unchecked::<u32>() }[node * 5..][..5];
        if let Ok(cmp) = ((row[4] & 0xFF) as u8).try_into() {
            let feature_id = row[0];
            let true_id = row[1];
            let false_id = row[2];
            let value = f32::from_bits(row[3]);
            let nan_is_true = (row[4] & 0x0100) != 0;
            TreeNode::Branch(BranchNode { cmp, feature_id, value, true_id, false_id, nan_is_true })
        } else {
            TreeNode::Leaf(LeafNode { start_id: row[0] as usize, end_id: row[1] as usize })
        }
    }

    unsafe fn get_leaf_unchecked<T>(&self, tree: usize, input: &ArrayView1<T>) -> LeafNode
    where
        T: AsPrimitive<f32>,
    {
        unsafe {
            let mut node_id = self.trees.as_slice_unchecked::<u32>()[tree] as usize;
            loop {
                let node = self.get_unchecked(node_id);
                match node {
                    TreeNode::Branch(ref b) => {
                        let feature = *input.uget(b.feature_id as usize);
                        node_id = b.get_child_id(feature.as_());
                    }
                    TreeNode::Leaf(l) => return l,
                }
            }
        }
    }

    unsafe fn eval_unchecked<A, T>(
        &self,
        tree: usize,
        input: &ArrayView1<T>,
        output: &mut ArrayViewMut1<f32>,
        aggs: &mut [A],
    ) where
        A: AggregateFn,
        T: AsPrimitive<f32>,
    {
        unsafe {
            let leaf = self.get_leaf_unchecked(tree, input);
            for leaf in self
                .leaves
                .to_array_view_unchecked::<u32>()
                .outer_iter()
                .skip(leaf.start_id)
                .take(leaf.end_id - leaf.start_id)
            {
                let class_id = leaf[0] as usize;
                let weight = f32::from_bits(leaf[1]);
                let agg_fn = aggs.get_unchecked_mut(class_id);
                agg_fn.aggregate(weight, output.uget_mut(class_id));
            }
        }
    }
}

#[derive(Copy, Clone)]
struct BranchNode {
    pub cmp: Cmp, // TODO: perf: most real forests have only 1 type of comparison
    pub feature_id: u32,
    pub value: f32,
    pub true_id: u32,
    pub false_id: u32,
    pub nan_is_true: bool,
}

impl std::fmt::Debug for BranchNode {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "if feat({}) {} {} then {} else {}",
            self.feature_id, self.cmp, self.value, self.true_id, self.false_id
        )
    }
}

impl BranchNode {
    pub fn get_child_id(&self, feature: f32) -> usize {
        let condition =
            if feature.is_nan() { self.nan_is_true } else { self.cmp.compare(feature, self.value) };
        if condition { self.true_id as usize } else { self.false_id as usize }
    }
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
struct LeafNode {
    pub start_id: usize,
    pub end_id: usize,
}

#[derive(Copy, Clone, Debug)]
enum TreeNode {
    Branch(BranchNode),
    Leaf(LeafNode),
}

pub trait AggregateFn: Default {
    fn aggregate(&mut self, score: f32, total: &mut f32);

    fn post_aggregate(&mut self, _total: &mut f32) {}
}

#[derive(Clone, Copy, Default, Debug)]
pub struct SumFn;

impl AggregateFn for SumFn {
    fn aggregate(&mut self, score: f32, total: &mut f32) {
        *total += score;
    }
}

#[derive(Clone, Copy, Default, Debug)]
pub struct AvgFn {
    count: usize,
}

impl AggregateFn for AvgFn {
    fn aggregate(&mut self, score: f32, total: &mut f32) {
        *total += score;
        self.count += 1;
    }

    fn post_aggregate(&mut self, total: &mut f32) {
        if self.count > 1 {
            *total /= self.count as f32;
        }
        self.count = 0;
    }
}

#[derive(Clone, Copy, Default, Debug)]
pub struct MaxFn;

impl AggregateFn for MaxFn {
    fn aggregate(&mut self, score: f32, total: &mut f32) {
        *total = total.max(score);
    }
}

#[derive(Clone, Copy, Default, Debug)]
pub struct MinFn;

impl AggregateFn for MinFn {
    fn aggregate(&mut self, score: f32, total: &mut f32) {
        *total = total.min(score);
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
pub enum Aggregate {
    #[default]
    Sum,
    Avg,
    Max,
    Min,
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct TreeEnsemble {
    pub data: TreeEnsembleData,
    pub max_used_feature: usize,
    pub n_classes: usize,
    pub aggregate_fn: Aggregate, // TODO: should this be an argument to eval()?
}

impl TreeEnsemble {
    pub fn build(
        data: TreeEnsembleData,
        max_used_feature: usize,
        n_classes: usize,
        aggregate_fn: Aggregate,
    ) -> TractResult<Self> {
        Ok(Self { data, max_used_feature, n_classes, aggregate_fn })
    }

    pub fn n_classes(&self) -> usize {
        self.n_classes
    }

    unsafe fn eval_one_unchecked<A, T>(
        &self,
        input: &ArrayView1<T>,
        output: &mut ArrayViewMut1<f32>,
        aggs: &mut [A],
    ) where
        A: AggregateFn,
        T: AsPrimitive<f32>,
    {
        unsafe {
            for t in 0..self.data.trees.len() {
                self.data.eval_unchecked(t, input, output, aggs)
            }
            for i in 0..self.n_classes {
                aggs.get_unchecked_mut(i).post_aggregate(output.uget_mut(i));
            }
        }
    }

    pub fn check_n_features(&self, n_features: usize) -> TractResult<()> {
        ensure!(
            n_features > self.max_used_feature,
            "Invalid input shape: input has {} features, tree ensemble use feature #{}",
            n_features,
            self.max_used_feature
        );
        Ok(())
    }

    fn eval_2d<A, T>(&self, input: &ArrayView2<T>) -> TractResult<Array2<f32>>
    where
        A: AggregateFn,
        T: AsPrimitive<f32>,
    {
        self.check_n_features(input.shape()[1])?;
        let n = input.shape()[0];
        let mut output = Array2::zeros((n, self.n_classes));
        let mut aggs: tract_smallvec::SmallVec<[A; 16]> =
            iter::repeat_with(Default::default).take(self.n_classes).collect();
        for i in 0..n {
            unsafe {
                self.eval_one_unchecked::<A, T>(
                    &input.index_axis(Axis(0), i),
                    &mut output.index_axis_mut(Axis(0), i),
                    &mut aggs,
                );
            }
        }
        Ok(output)
    }

    fn eval_1d<A, T>(&self, input: &ArrayView1<T>) -> TractResult<Array1<f32>>
    where
        A: AggregateFn,
        T: AsPrimitive<f32>,
    {
        self.check_n_features(input.len())?;
        let mut output = Array1::zeros(self.n_classes);
        let mut aggs: tract_smallvec::SmallVec<[A; 16]> =
            iter::repeat_with(Default::default).take(self.n_classes).collect();
        unsafe {
            self.eval_one_unchecked::<A, T>(input, &mut output.view_mut(), &mut aggs);
        }
        Ok(output)
    }

    pub fn eval<'i, I, T>(&self, input: I) -> TractResult<ArrayD<f32>>
    where
        I: Into<ArrayViewD<'i, T>>, // TODO: accept generic dimensions, not just IxDyn
        T: Datum + AsPrimitive<f32>,
    {
        let input = input.into();
        if let Ok(input) = input.view().into_dimensionality::<Ix1>() {
            Ok(match self.aggregate_fn {
                Aggregate::Sum => self.eval_1d::<SumFn, T>(&input),
                Aggregate::Avg => self.eval_1d::<AvgFn, T>(&input),
                Aggregate::Min => self.eval_1d::<MinFn, T>(&input),
                Aggregate::Max => self.eval_1d::<MaxFn, T>(&input),
            }?
            .into_dyn())
        } else if let Ok(input) = input.view().into_dimensionality::<Ix2>() {
            Ok(match self.aggregate_fn {
                Aggregate::Sum => self.eval_2d::<SumFn, T>(&input),
                Aggregate::Avg => self.eval_2d::<AvgFn, T>(&input),
                Aggregate::Min => self.eval_2d::<MinFn, T>(&input),
                Aggregate::Max => self.eval_2d::<MaxFn, T>(&input),
            }?
            .into_dyn())
        } else {
            bail!("Invalid input dimensionality for tree ensemble: {:?}", input.shape());
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tract_ndarray::prelude::*;

    fn b(
        node_offset: usize,
        cmp: Cmp,
        feat: usize,
        v: f32,
        left: usize,
        right: usize,
        nan_is_true: bool,
    ) -> [u32; 5] {
        [
            feat as u32,
            (node_offset + left) as u32,
            (node_offset + right) as u32,
            v.to_bits(),
            cmp as u32 | if nan_is_true { 0x100 } else { 0 },
        ]
    }

    fn l(leaf_offset: usize, start_id: usize, end_id: usize) -> [u32; 5] {
        [(leaf_offset + start_id) as u32, (leaf_offset + end_id) as u32, 0, 0, 0]
    }

    fn w(categ: usize, weight: f32) -> [u32; 2] {
        [categ as u32, weight.to_bits()]
    }

    fn generate_gbm_trees() -> TreeEnsembleData {
        let trees = rctensor1(&[0u32, 5u32, 14, 21, 30, 41]);
        let nodes = rctensor2(&[
            b(0, Cmp::LessEqual, 2, 3.15, 1, 2, true),
            b(0, Cmp::LessEqual, 1, 3.35, 3, 4, true),
            l(0, 0, 1),
            l(0, 1, 2),
            l(0, 2, 3),
            //
            b(5, Cmp::LessEqual, 2, 1.8, 1, 2, true),
            l(3, 0, 1),
            b(5, Cmp::LessEqual, 3, 1.65, 3, 4, true),
            b(5, Cmp::LessEqual, 2, 4.45, 5, 6, true),
            b(5, Cmp::LessEqual, 2, 5.35, 7, 8, true),
            l(3, 1, 2),
            l(3, 2, 3),
            l(3, 3, 4),
            l(3, 4, 5),
            //
            b(14, Cmp::LessEqual, 3, 1.65, 1, 2, true),
            b(14, Cmp::LessEqual, 2, 4.45, 3, 4, true),
            b(14, Cmp::LessEqual, 2, 5.35, 5, 6, true),
            l(8, 0, 1),
            l(8, 1, 2),
            l(8, 2, 3),
            l(8, 3, 4),
            //
            b(21, Cmp::LessEqual, 2, 3.15, 1, 2, true),
            b(21, Cmp::LessEqual, 1, 3.35, 3, 4, true),
            b(21, Cmp::LessEqual, 2, 4.45, 5, 6, true),
            l(12, 0, 1),
            l(12, 1, 2),
            l(12, 2, 3),
            b(21, Cmp::LessEqual, 2, 5.35, 7, 8, true),
            l(12, 3, 4),
            l(12, 4, 5),
            //
            b(30, Cmp::LessEqual, 3, 0.45, 1, 2, true),
            b(30, Cmp::LessEqual, 2, 1.45, 3, 4, true),
            b(30, Cmp::LessEqual, 3, 1.65, 5, 6, true),
            l(17, 0, 1),
            l(17, 1, 2),
            b(30, Cmp::LessEqual, 2, 4.45, 7, 8, true),
            b(30, Cmp::LessEqual, 2, 5.35, 9, 10, true),
            l(17, 2, 3),
            l(17, 3, 4),
            l(17, 4, 5),
            l(17, 5, 6),
            //
            b(41, Cmp::LessEqual, 2, 4.75, 1, 2, true),
            b(41, Cmp::LessEqual, 1, 2.75, 3, 4, true),
            b(41, Cmp::LessEqual, 2, 5.15, 7, 8, true),
            l(23, 0, 1),
            b(41, Cmp::LessEqual, 2, 4.15, 5, 6, true),
            l(23, 1, 2),
            l(23, 2, 3),
            l(23, 3, 4),
            l(23, 4, 5),
        ]);
        assert_eq!(nodes.shape(), &[50, 5]);
        let leaves = rctensor2(&[
            w(0, -0.075),
            w(0, 0.13928571),
            w(0, 0.15),
            //
            w(1, -0.075),
            w(1, 0.13548388),
            w(1, 0.110869564),
            w(1, -0.052500002),
            w(1, -0.075),
            //
            w(2, -0.075),
            w(2, -0.035869565),
            w(2, 0.1275),
            w(2, 0.15),
            //
            w(0, 0.12105576),
            w(0, 0.1304589),
            w(0, -0.07237862),
            w(0, -0.07226522),
            w(0, -0.07220469),
            //
            w(1, -0.07226842),
            w(1, -0.07268012),
            w(1, 0.119391434),
            w(1, 0.097440675),
            w(1, -0.049815115),
            w(1, -0.07219931),
            //
            w(2, -0.061642267),
            w(2, -0.0721846),
            w(2, -0.07319043),
            w(2, 0.076814815),
            w(2, 0.1315959),
        ]);
        assert_eq!(leaves.shape(), &[28, 2]);
        TreeEnsembleData { nodes, trees, leaves }
    }

    fn generate_gbm_ensemble() -> TreeEnsemble {
        // converted manually from LightGBM, fitted on iris dataset
        let trees = generate_gbm_trees();
        TreeEnsemble::build(trees, 4, 3, Aggregate::Sum).unwrap()
    }

    fn generate_gbm_input() -> Array2<f32> {
        arr2(&[
            [5.1, 3.5, 1.4, 0.2],
            [5.4, 3.7, 1.5, 0.2],
            [5.4, 3.4, 1.7, 0.2],
            [4.8, 3.1, 1.6, 0.2],
            [5.0, 3.5, 1.3, 0.3],
            [7.0, 3.2, 4.7, 1.4],
            [5.0, 2.0, 3.5, 1.0],
            [5.9, 3.2, 4.8, 1.8],
            [5.5, 2.4, 3.8, 1.1],
            [5.5, 2.6, 4.4, 1.2],
            [6.3, 3.3, 6.0, 2.5],
            [6.5, 3.2, 5.1, 2.0],
            [6.9, 3.2, 5.7, 2.3],
            [7.4, 2.8, 6.1, 1.9],
            [6.7, 3.1, 5.6, 2.4],
        ])
    }

    fn generate_gbm_raw_output() -> Array2<f32> {
        arr2(&[
            [0.28045893, -0.14726841, -0.14718461],
            [0.28045893, -0.14768013, -0.14718461],
            [0.28045893, -0.14768013, -0.14718461],
            [0.26034147, -0.14768013, -0.14718461],
            [0.28045893, -0.14726841, -0.14718461],
            [-0.14726523, 0.20831025, -0.10905999],
            [-0.14737862, 0.254_875_3, -0.13664228],
            [-0.14726523, -0.10231511, 0.20431481],
            [-0.14737862, 0.254_875_3, -0.13664228],
            [-0.14737862, 0.254_875_3, -0.13664228],
            [-0.147_204_7, -0.147_199_3, 0.281_595_9],
            [-0.14726523, -0.10231511, 0.20431481],
            [-0.147_204_7, -0.147_199_3, 0.281_595_9],
            [-0.147_204_7, -0.147_199_3, 0.281_595_9],
            [-0.147_204_7, -0.147_199_3, 0.281_595_9],
        ])
    }

    #[test]
    #[ignore]
    fn test_tree_ensemble() {
        let ensemble = generate_gbm_ensemble();
        let input = generate_gbm_input();
        let output = ensemble.eval(input.view().into_dyn()).unwrap();
        assert_eq!(output, generate_gbm_raw_output().into_dyn());
    }
}


================================================
FILE: onnx-opl/src/ml/tree_ensemble_classifier.rs
================================================
pub use super::tree::{Aggregate, Cmp, TreeEnsemble, TreeEnsembleData};
use tract_nnef::internal::*;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_ml_tree_ensemble_classifier",
        &parameters(),
        &[("output", TypeName::Scalar.tensor())],
        load,
    );
    registry.register_dumper(dump);
}

pub fn parse_aggregate(s: &str) -> TractResult<Aggregate> {
    match s {
        "SUM" => Ok(Aggregate::Sum),
        "AVERAGE" => Ok(Aggregate::Avg),
        "MAX" => Ok(Aggregate::Max),
        "MIN" => Ok(Aggregate::Min),
        _ => bail!("Invalid aggregate function: {}", s),
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct TreeEnsembleClassifier {
    pub ensemble: TreeEnsemble,
}

impl Op for TreeEnsembleClassifier {
    fn name(&self) -> StaticName {
        "TreeEnsembleClassifier".into()
    }

    op_as_typed_op!();
}

impl EvalOp for TreeEnsembleClassifier {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let input = input.cast_to::<f32>()?;
        let input = input.to_plain_array_view::<f32>()?;
        let scores = self.ensemble.eval(input)?;
        Ok(tvec!(scores.into_tvalue()))
    }
}

impl TypedOp for TreeEnsembleClassifier {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let n = &inputs[0].shape[0];
        Ok(tvec!(f32::fact(&[n.clone(), self.ensemble.n_classes().into()])))
    }

    as_op!();
}

fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Scalar.tensor().named("trees"),
        TypeName::Scalar.tensor().named("nodes"),
        TypeName::Scalar.tensor().named("leaves"),
        TypeName::Integer.named("max_used_feature"),
        TypeName::Integer.named("n_classes"),
        TypeName::String.named("aggregate_fn"),
    ]
}

fn dump(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &TreeEnsembleClassifier,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let trees = ast.konst_variable(format!("{}_trees", node.name), &op.ensemble.data.trees)?;
    let nodes = ast.konst_variable(format!("{}_nodes", node.name), &op.ensemble.data.nodes)?;
    let leaves = ast.konst_variable(format!("{}_leaves", node.name), &op.ensemble.data.leaves)?;
    let agg = match op.ensemble.aggregate_fn {
        Aggregate::Min => "MIN",
        Aggregate::Max => "MAX",
        Aggregate::Sum => "SUM",
        Aggregate::Avg => "AVERAGE",
    };
    Ok(Some(invocation(
        "tract_onnx_ml_tree_ensemble_classifier",
        &[input, trees, nodes, leaves],
        &[
            ("max_used_feature", numeric(op.ensemble.max_used_feature)),
            ("n_classes", numeric(op.ensemble.n_classes)),
            ("aggregate_fn", string(agg)),
        ],
    )))
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let trees = invocation.named_arg_as(builder, "trees")?;
    let nodes = invocation.named_arg_as(builder, "nodes")?;
    let leaves = invocation.named_arg_as(builder, "leaves")?;
    let max_used_feature = invocation.named_arg_as(builder, "max_used_feature")?;
    let n_classes = invocation.named_arg_as(builder, "n_classes")?;
    let aggregate_fn: String = invocation.named_arg_as(builder, "aggregate_fn")?;
    let aggregate_fn = parse_aggregate(&aggregate_fn)?;
    let data = TreeEnsembleData { trees, nodes, leaves };
    let ensemble = TreeEnsemble { data, n_classes, max_used_feature, aggregate_fn };
    let op = TreeEnsembleClassifier { ensemble };
    builder.wire(op, &[input])
}


================================================
FILE: onnx-opl/src/multinomial.rs
================================================
use rand::distr::Distribution;
use rand::distr::StandardUniform;
use rand::rngs::SmallRng;
use rand::{RngExt, SeedableRng};

use tract_nnef::internal::*;
use tract_nnef::tract_ndarray::s;
use tract_nnef::tract_num_traits::{AsPrimitive, Float, Zero};

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_multinomial",
        &parameters(),
        &[("output", TypeName::Scalar.tensor())],
        load,
    );
    registry.register_dumper(dump);
}

/// https://github.com/onnx/onnx/blob/main/docs/Operators.md#Multinomial
#[derive(Clone, Debug)]
pub struct Multinomial {
    pub dtype: DatumType,
    pub sample_size: i32,
    pub seed: Option<f32>,
}
impl PartialEq for Multinomial {
    fn eq(&self, _: &Self) -> bool {
        false
    }
}
impl Eq for Multinomial {}

impl Multinomial {
    fn eval_t0<T1>(&self, input: TValue) -> TractResult<TValue>
    where
        T1: Datum + std::ops::SubAssign + Float + std::iter::Sum,
        StandardUniform: Distribution<T1>,
    {
        match self.dtype {
            DatumType::I32 => self.eval_t::<T1, i32>(input),
            DatumType::I64 => self.eval_t::<T1, i64>(input),
            dt => bail!("Unsupported output datum type for Multinomial: {:?}", dt),
        }
    }
    fn eval_t<T1, T2>(&self, input: TValue) -> TractResult<TValue>
    where
        T1: Datum + std::ops::SubAssign + Float + std::iter::Sum,
        StandardUniform: Distribution<T1>,
        T2: Datum + Zero + Copy,
        usize: AsPrimitive<T2>,
    {
        let batch_size = input.shape()[0];
        let class_size = input.shape()[1];

        let mut rng = self
            .seed
            .map_or_else(rand::make_rng, |seed| SmallRng::seed_from_u64(seed.to_bits() as _));

        // shape: [batch_size, class_size]
        let input = input.to_plain_array_view::<T1>()?;

        // ONNX Multinomial inputs are "unnormalized log probabilities".
        // This means that we need to compute the maximum for each batch beforehand,
        //  and we also need to exp every value.

        let maximums: TVec<_> = input
            .rows()
            .into_iter()
            .map(|r: tract_ndarray::ArrayView1<'_, T1>| r.iter().map(|e| e.exp()).sum::<T1>())
            .collect();

        // shape: [batch_size, sample_size]
        let out_shape: &[_] = &[batch_size, self.sample_size as usize];
        let output = tract_ndarray::ArrayD::from_shape_fn(out_shape, |co_o| -> T2 {
            let batch = co_o[0];

            let mut rand = rng.random::<T1>() * maximums[batch];
            let mut ret: T2 = usize::as_(class_size - 1);

            for (i, prob) in input.slice(s![batch, ..]).iter().enumerate() {
                let prob = prob.exp();
                if rand < prob {
                    ret = usize::as_(i);
                    break;
                }
                rand -= prob;
            }

            ret
        });

        Ok(output.into_tvalue())
    }
}

impl Op for Multinomial {
    fn name(&self) -> StaticName {
        "Multinomial".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Multinomial {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);

        let output = match input.datum_type() {
            // DatumType::F16 => self.eval_t0::<f16>(input), // TODO: implement random for f16
            DatumType::F32 => self.eval_t0::<f32>(input),
            DatumType::F64 => self.eval_t0::<f64>(input),
            dt => bail!("Unsupported input datum type for Multinomial: {:?}", dt),
        }?;

        Ok(tvec![output])
    }
}

impl TypedOp for Multinomial {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let input_shape = if let Some(s) = inputs[0].shape.as_concrete() {
            s
        } else {
            bail!("Only constant input shape are supported in Multinomial")
        };

        let batch_size = input_shape[0];
        Ok(tvec!(self.dtype.fact([batch_size, self.sample_size as usize])))
    }

    as_op!();
}

fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Integer.tensor().named("input"),
        TypeName::Integer.named("dtype").default(6),
        TypeName::Integer.named("sample_size").default(1),
        TypeName::Integer.named("seed"),
    ]
}

fn dump(ast: &mut IntoAst, node: &TypedNode, op: &Multinomial) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();

    let dtype = match op.dtype {
        DatumType::I32 => 6,
        DatumType::I64 => 7,
        dt => bail!("Unsupported datum type {:?} for ONNX Multinomial", dt),
    };

    let inv = if let Some(seed) = op.seed {
        invocation(
            "tract_onnx_multinomial",
            &[input],
            &[
                ("dtype", numeric(dtype)),
                ("sample_size", numeric(op.sample_size)),
                ("seed", numeric(seed)),
            ],
        )
    } else {
        invocation(
            "tract_onnx_multinomial",
            &[input],
            &[("dtype", numeric(dtype)), ("sample_size", numeric(op.sample_size))],
        )
    };

    Ok(Some(inv))
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let dtype = match invocation.named_arg_as::<i64>(builder, "dtype")? {
        6 => DatumType::I32,
        7 => DatumType::I64,
        i => bail!("Unsupported datum type {} for ONNX Multinomial", i),
    };
    let sample_size = invocation.named_arg_as::<i64>(builder, "sample_size")? as _;
    let seed = invocation.named_arg_as(builder, "seed").ok();

    let op = Multinomial { dtype, sample_size, seed };
    builder.wire(op, &[input])
}


================================================
FILE: onnx-opl/src/non_max_suppression.rs
================================================
use std::cmp::Ordering;

use rustfft::num_traits::Float;
use tract_nnef::{
    internal::*,
    tract_ndarray::{ArrayView1, s},
};

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_non_max_suppression",
        &parameters(),
        &[("output", TypeName::Integer.tensor())],
        load,
    );
    registry.register_dumper(dump);
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum BoxRepr {
    // boxes data format [y1, x1, y2, x2]
    TwoPoints,
    // boxes data format [x_center, y_center, width, height]
    CenterWidthHeight,
}

fn get_min_max<T: Float>(lhs: T, rhs: T) -> (T, T) {
    if lhs >= rhs { (rhs, lhs) } else { (lhs, rhs) }
}

impl BoxRepr {
    pub fn from_i64(val: i64) -> TractResult<BoxRepr> {
        Ok(match val {
            0 => BoxRepr::TwoPoints,
            1 => BoxRepr::CenterWidthHeight,
            other => bail!("unsupported center_point_box argument value: {}", other),
        })
    }

    pub fn into_i64(self) -> i64 {
        match self {
            BoxRepr::TwoPoints => 0,
            BoxRepr::CenterWidthHeight => 1,
        }
    }

    // iou: intersection over union
    fn should_suppress_by_iou<T: Datum + Float>(
        &self,
        box1: ArrayView1<T>,
        box2: ArrayView1<T>,
        iou_threshold: T,
    ) -> bool {
        let two = T::one() + T::one();
        let (x1_min, x1_max, x2_min, x2_max, y1_min, y1_max, y2_min, y2_max) = match self {
            BoxRepr::TwoPoints => {
                let (x1_min, x1_max) = get_min_max(box1[[1]], box1[[3]]);
                let (x2_min, x2_max) = get_min_max(box2[[1]], box2[[3]]);

                let (y1_min, y1_max) = get_min_max(box1[[0]], box1[[2]]);
                let (y2_min, y2_max) = get_min_max(box2[[0]], box2[[2]]);

                (x1_min, x1_max, x2_min, x2_max, y1_min, y1_max, y2_min, y2_max)
            }
            BoxRepr::CenterWidthHeight => {
                let (box1_width_half, box1_height_half) = (box1[[2]] / two, box1[[3]] / two);
                let (box2_width_half, box2_height_half) = (box2[[2]] / two, box2[[3]] / two);

                let (x1_min, x1_max) = (box1[[0]] - box1_width_half, box1[[0]] + box1_width_half);
                let (x2_min, x2_max) = (box2[[0]] - box2_width_half, box2[[0]] + box2_width_half);

                let (y1_min, y1_max) = (box1[[1]] - box1_height_half, box1[[1]] + box1_height_half);
                let (y2_min, y2_max) = (box2[[1]] - box2_height_half, box2[[1]] + box2_height_half);

                (x1_min, x1_max, x2_min, x2_max, y1_min, y1_max, y2_min, y2_max)
            }
        };

        let intersection_y_min = T::max(y1_min, y2_min);
        let intersection_y_max = T::min(y1_max, y2_max);
        if intersection_y_max <= intersection_y_min {
            return false;
        }

        let intersection_x_min = T::max(x1_min, x2_min);
        let intersection_x_max = T::min(x1_max, x2_max);
        if intersection_x_max <= intersection_x_min {
            return false;
        }

        let intersection_area =
            (intersection_x_max - intersection_x_min) * (intersection_y_max - intersection_y_min);

        if intersection_area.is_sign_negative() {
            return false;
        }

        let area1 = (x1_max - x1_min) * (y1_max - y1_min);
        let area2 = (x2_max - x2_min) * (y2_max - y2_min);

        let union_area = area1 + area2 - intersection_area;

        if area1.is_sign_negative() || area2.is_sign_negative() || union_area.is_sign_negative() {
            return false;
        }

        let intersection_over_union = intersection_area / union_area;

        intersection_over_union > iou_threshold
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct NonMaxSuppression {
    pub center_point_box: BoxRepr,
    pub num_selected_indices_symbol: Symbol,
    pub has_score_threshold: bool,
}

impl NonMaxSuppression {
    fn eval_t<T: Datum + Float>(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold) =
            if self.has_score_threshold {
                let (t1, t2, t3, t4, t5) = args_5!(inputs);
                (t1, t2, t3, t4, Some(t5))
            } else {
                let (t1, t2, t3, t4) = args_4!(inputs);
                (t1, t2, t3, t4, None)
            };

        let mut max_output_boxes_per_class =
            *max_output_boxes_per_class.try_as_plain()?.to_scalar::<i64>()?;
        let iou_threshold = *iou_threshold.try_as_plain()?.to_scalar::<T>()?;
        let score_threshold = score_threshold.map_or(Ok::<_, TractError>(None), |val| {
            Ok(Some(*val.try_as_plain()?.to_scalar::<T>()?))
        })?;

        if max_output_boxes_per_class == 0 {
            max_output_boxes_per_class = i64::MAX;
        }
        //        ensure!((0.0..=1.0).contains(&iou_threshold), "iou_threshold must be between 0 and 1");

        let num_batches = scores.shape()[0];
        let num_classes = scores.shape()[1];
        let num_dim = scores.shape()[2];

        let boxes = boxes.to_plain_array_view::<T>()?;
        let scores = scores.to_plain_array_view::<T>()?;

        // items: (batch, class, index)
        let mut selected_global: TVec<(usize, usize, usize)> = tvec![];

        for batch in 0..num_batches {
            for class in 0..num_classes {
                // items: (score, index)
                let mut candidates: TVec<(T, usize)> =
                    if let Some(score_threshold) = score_threshold {
                        (0..num_dim)
                            .map(|i| (scores[[batch, class, i]], i))
                            .filter(|(score, _)| *score > score_threshold)
                            .collect()
                    } else {
                        (0..num_dim).map(|i| (scores[[batch, class, i]], i)).collect()
                    };

                candidates.sort_by(|(a, _), (b, _)| b.partial_cmp(a).unwrap_or(Ordering::Equal));

                // items: (score, index)
                let mut selected_in_class: TVec<(T, usize)> = tvec![];

                for (score, index) in candidates {
                    if selected_in_class.len() as i64 >= max_output_boxes_per_class {
                        break;
                    }

                    let box1 = boxes.slice(s![batch, index, ..]);
                    let suppr = selected_in_class.iter().any(|(_, index)| {
                        let box2 = boxes.slice(s![batch, *index, ..]);
                        self.center_point_box.should_suppress_by_iou(box1, box2, iou_threshold)
                    });
                    if !suppr {
                        selected_in_class.push((score, index));
                        selected_global.push((batch, class, index));
                    }
                }
            }
        }

        // output shape is [num_selected_indices, 3]; format is [batch_index, class_index, box_index]
        let num_selected = selected_global.len();
        let v = selected_global
            .into_iter()
            .flat_map(|(batch, class, index)| [batch as i64, class as i64, index as i64])
            .collect();
        let res = tract_ndarray::ArrayD::from_shape_vec(&*tvec![num_selected, 3], v)?;

        Ok(tvec![res.into_tvalue()])
    }
}

impl Op for NonMaxSuppression {
    fn name(&self) -> StaticName {
        "NonMaxSuppression".into()
    }

    op_as_typed_op!();
}

impl EvalOp for NonMaxSuppression {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let dt = inputs[0].datum_type();
        dispatch_floatlike!(Self::eval_t(dt)(self, inputs))
    }
}

impl TypedOp for NonMaxSuppression {
    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec![i64::fact([self.num_selected_indices_symbol.to_dim(), 3usize.to_dim()])])
    }

    as_op!();
}

fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Integer.tensor().named("boxes"),
        TypeName::Scalar.tensor().named("scores"),
        TypeName::Integer.named("max_output_boxes_per_class").default(0),
        TypeName::Scalar.named("iou_threshold").default(0.0),
        TypeName::Scalar.named("score_threshold"),
        TypeName::Integer.named("center_point_box").default(0),
    ]
}

fn dump(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &NonMaxSuppression,
) -> TractResult<Option<Arc<RValue>>> {
    let boxes = ast.mapping[&node.inputs[0]].clone();
    let scores = ast.mapping[&node.inputs[1]].clone();
    let max_output_boxes_per_class = ast.mapping[&node.inputs[2]].clone();
    let iou_threshold = ast.mapping[&node.inputs[3]].clone();
    let score_threshold = node.inputs.get(4).map(|v| ast.mapping[v].clone());

    let inv = if let Some(score_threshold) = score_threshold {
        invocation(
            "tract_onnx_non_max_suppression",
            &[boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold],
            &[("center_point_box", numeric(op.center_point_box.into_i64()))],
        )
    } else {
        invocation(
            "tract_onnx_non_max_suppression",
            &[boxes, scores, max_output_boxes_per_class, iou_threshold],
            &[("center_point_box", numeric(op.center_point_box.into_i64()))],
        )
    };

    Ok(Some(inv))
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let boxes = invocation.named_arg_as(builder, "boxes")?;
    let scores = invocation.named_arg_as(builder, "scores")?;
    let max_output_boxes_per_class =
        invocation.named_arg_as(builder, "max_output_boxes_per_class")?;
    let iou_threshold = invocation.named_arg_as(builder, "iou_threshold")?;
    let score_threshold = invocation.named_arg_as(builder, "score_threshold").ok();

    let center_point_box =
        BoxRepr::from_i64(invocation.named_arg_as(builder, "center_point_box")?)?;

    let n = builder.model.symbols.sym("n");
    let op = NonMaxSuppression {
        center_point_box,
        num_selected_indices_symbol: n,
        has_score_threshold: score_threshold.is_some(),
    };
    if let Some(score_threshold) = score_threshold {
        builder
            .wire(op, &[boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold])
    } else {
        builder.wire(op, &[boxes, scores, max_output_boxes_per_class, iou_threshold])
    }
}


================================================
FILE: onnx-opl/src/random.rs
================================================
use rand::SeedableRng;
use rand::distr::Distribution;
use rand::distr::uniform::SampleUniform;
use rand::rngs::SmallRng;
use rand_distr::StandardNormal;
use rand_distr::num_traits::Float;
use tract_nnef::internal::*;
use tract_nnef::ser::{array, tdims};
use tract_nnef::tract_core::trivial_op_state_freeze;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_random",
        &[
            TypeName::String.named("datum_type"),
            TypeName::Integer.array().named("shape"),
            TypeName::String.named("dist"),
            TypeName::Scalar.array().named("parameters"),
            TypeName::Integer.named("seed"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        load,
    );
    registry.register_dumper(dump);
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let dt: DatumType = invocation.named_arg_as::<String>(builder, "datum_type")?.parse()?;
    let shape: TVec<TDim> = invocation.named_arg_as(builder, "shape")?;
    let fact = dt.fact(&shape);
    let dist: String = invocation.named_arg_as(builder, "dist")?;
    let parameters: TVec<Arc<Tensor>> = invocation.named_arg_as(builder, "parameters")?;
    let [p1, p2] = &*parameters else { bail!("Random expect two parameters") };
    let dist = match &*dist {
        "normal" => Dist::Normal { mean: p1.clone(), dev: p2.clone() },
        "uniform" => Dist::Uniform { low: p1.clone(), high: p2.clone() },
        _ => bail!("Unexpected distribution {}", dist),
    };
    let seed = invocation.get_named_arg_as(builder, "seed")?;
    let op = Random { fact, dist, seed };
    builder.wire(op, &[])
}

fn dump(_ast: &mut IntoAst, _node: &TypedNode, op: &Random) -> TractResult<Option<Arc<RValue>>> {
    let mut named = vec![
        ("datum_type", string(format!("{:?}", op.fact.datum_type))),
        ("shape", tdims(&op.fact.shape)),
    ];
    if let Some(seed) = op.seed {
        named.push(("seed", numeric(seed)));
    }
    match &op.dist {
        Dist::Uniform { low, high } => {
            named.push(("dist", string("uniform")));
            named.push((
                "parameters",
                array(&[
                    numeric(low.cast_to_scalar::<f32>()?),
                    numeric(high.cast_to_scalar::<f32>()?),
                ]),
            ));
        }
        Dist::Normal { mean, dev } => {
            named.push(("dist", string("normal")));
            named.push((
                "parameters",
                array(&[
                    numeric(mean.cast_to_scalar::<f32>()?),
                    numeric(dev.cast_to_scalar::<f32>()?),
                ]),
            ));
        }
    }
    Ok(Some(invocation("tract_onnx_random", &[], &named)))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum Dist {
    Uniform { low: Arc<Tensor>, high: Arc<Tensor> },
    Normal { mean: Arc<Tensor>, dev: Arc<Tensor> },
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Random {
    pub fact: TypedFact,
    pub dist: Dist,
    pub seed: Option<u64>,
}

impl Op for Random {
    fn name(&self) -> StaticName {
        "Random".into()
    }

    op_as_typed_op!();
}

impl TypedOp for Random {
    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(self.fact.clone()))
    }

    as_op!();
}

impl EvalOp for Random {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        let rng = self.seed.map(SmallRng::seed_from_u64).unwrap_or_else(rand::make_rng);
        Ok(Some(Box::new(RandomState(rng))))
    }
}

#[derive(Clone, Debug)]
struct RandomState(SmallRng);

impl OpState for RandomState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        _inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let op = op.downcast_ref::<Random>().context("op and state mismatch")?;
        let mut tensor = unsafe {
            Tensor::uninitialized_dt(
                op.fact.datum_type,
                &op.fact.shape.eval_to_usize(&session.resolved_symbols)?,
            )?
        };
        match &op.dist {
            Dist::Uniform { low, high } => match op.fact.datum_type {
                DatumType::F32 => sample_uniform::<f32>(&mut tensor, &mut self.0, low, high)?,
                DatumType::F64 => sample_uniform::<f64>(&mut tensor, &mut self.0, low, high)?,
                DatumType::F16 => {
                    sample_uniform::<f32>(&mut tensor, &mut self.0, low, high)?;
                    tensor = tensor.cast_to::<f16>()?.into_owned();
                }
                _ => bail!("Random only support float types"),
            },
            Dist::Normal { mean, dev } => match op.fact.datum_type {
                DatumType::F32 => sample_normal::<f32>(&mut tensor, &mut self.0, mean, dev)?,
                DatumType::F64 => sample_normal::<f64>(&mut tensor, &mut self.0, mean, dev)?,
                DatumType::F16 => {
                    sample_uniform::<f32>(&mut tensor, &mut self.0, mean, dev)?;
                    tensor = tensor.cast_to::<f16>()?.into_owned();
                }
                _ => bail!("Random only support float types"),
            },
        }
        Ok(tvec!(tensor.into_tvalue()))
    }
}

trivial_op_state_freeze!(RandomState);

fn sample_uniform<T: Datum + SampleUniform + Copy>(
    t: &mut Tensor,
    r: &mut SmallRng,
    low: &Tensor,
    high: &Tensor,
) -> TractResult<()> {
    let dist = rand::distr::Uniform::new(low.cast_to_scalar::<T>()?, high.cast_to_scalar::<T>()?)?;
    t.try_as_plain_mut()?
        .as_slice_mut::<T>()?
        .iter_mut()
        .zip(dist.sample_iter(r))
        .for_each(|(v, r)| *v = r);
    Ok(())
}

fn sample_normal<T: Datum + Float + Copy>(
    t: &mut Tensor,
    r: &mut SmallRng,
    mean: &Tensor,
    dev: &Tensor,
) -> TractResult<()>
where
    StandardNormal: Distribution<T>,
{
    let dist =
        rand_distr::Normal::<T>::new(mean.cast_to_scalar::<T>()?, dev.cast_to_scalar::<T>()?)?;
    t.try_as_plain_mut()?
        .as_slice_mut::<T>()?
        .iter_mut()
        .zip(dist.sample_iter(r))
        .for_each(|(v, r)| *v = r);
    Ok(())
}


================================================
FILE: onnx-opl/src/resize.rs
================================================
use tract_nnef::internal::*;

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub enum CoordTransformer {
    HalfPixel,
    AlignCorners,
    Asymmetric,
}

impl CoordTransformer {
    pub fn transform(&self, x_out: usize, scale: f32, len_in: usize, len_out: usize) -> f32 {
        match self {
            CoordTransformer::HalfPixel => (x_out as f32 + 0.5) / scale - 0.5,
            CoordTransformer::AlignCorners => {
                (x_out as f32 * (len_in as f32 - 1.0)) / (len_out as f32 - 1.0)
            }
            CoordTransformer::Asymmetric => (x_out as f32) / scale,
        }
    }

    pub fn as_str(&self) -> &'static str {
        match self {
            CoordTransformer::HalfPixel => "half_pixel",
            CoordTransformer::AlignCorners => "align_corners",
            CoordTransformer::Asymmetric => "asymmetric",
        }
    }

    pub fn parse(s: &str) -> TractResult<Self> {
        Ok(match s {
            "half_pixel" => CoordTransformer::HalfPixel,
            "align_corners" => CoordTransformer::AlignCorners,
            "asymmetric" => CoordTransformer::Asymmetric,
            s => bail!("coordinate_transformation_mode: {s}"),
        })
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub enum Interpolator {
    Linear,
    Nearest,
}

impl Interpolator {
    pub fn interpolate(
        &self,
        y_left: f32,
        y_right: f32,
        x_ratio: f32,
        nearest_mode: Nearest,
    ) -> f32 {
        match self {
            Interpolator::Linear => y_left * (1.0 - x_ratio) + y_right * x_ratio,
            Interpolator::Nearest => match nearest_mode {
                Nearest::Floor => y_left,
                Nearest::Ceil => y_right,
                Nearest::RoundPreferFloor => {
                    if x_ratio <= 0.5 {
                        y_left
                    } else {
                        y_right
                    }
                }
                Nearest::RoundPreferCeil => {
                    if x_ratio < 0.5 {
                        y_left
                    } else {
                        y_right
                    }
                }
            },
        }
    }

    pub fn as_str(&self) -> &'static str {
        match self {
            Interpolator::Linear => "linear",
            Interpolator::Nearest => "nearest",
        }
    }

    pub fn parse(s: &str) -> TractResult<Self> {
        Ok(match s {
            "linear" => Interpolator::Linear,
            "nearest" => Interpolator::Nearest,
            s => bail!("mode: {s}"),
        })
    }
}

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Nearest {
    Floor,
    Ceil,
    RoundPreferFloor,
    RoundPreferCeil,
}

impl Nearest {
    pub fn as_str(&self) -> &'static str {
        match self {
            Nearest::Floor => "floor",
            Nearest::Ceil => "ceil",
            Nearest::RoundPreferFloor => "round_prefer_floor",
            Nearest::RoundPreferCeil => "round_prefer_ceil",
        }
    }

    pub fn parse(s: &str) -> TractResult<Self> {
        Ok(match s {
            "floor" => Nearest::Floor,
            "ceil" => Nearest::Ceil,
            "round_prefer_floor" => Nearest::RoundPreferFloor,
            "round_prefer_ceil" => Nearest::RoundPreferCeil,
            s => bail!("nearest_mode: {s}"),
        })
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Resize {
    pub axes: Option<Vec<i64>>,
    pub coord_transformer: CoordTransformer,
    pub interpolator: Interpolator,
    pub nearest: Nearest,
    pub optional_roi_input: Option<usize>,
    pub optional_scales_input: Option<usize>,
    pub optional_sizes_input: Option<usize>,
}

impl Resize {
    pub fn compute_output_shape<D: DimLike>(
        &self,
        input_shape: &[D],
        input_scale: Option<&Tensor>,
        input_sizes: Option<&Tensor>,
    ) -> TractResult<TVec<D>> {
        if let Some(scale) = input_scale {
            if scale.len() == input_shape.len() {
                let mut shape = tvec!();
                for (i, s) in input_shape
                    .iter()
                    .zip(scale.cast_to::<f32>()?.try_as_plain()?.as_slice::<f32>()?.iter())
                {
                    if s.round() == *s {
                        shape.push(i.clone() * (*s as usize));
                    } else if let Ok(i) = i.to_usize() {
                        shape.push(((i as f32 * s) as usize).into());
                    } else {
                        bail!(
                            "Can not compute output shape. inputs are {input_shape:?} and scale {scale:?}"
                        )
                    }
                }
                return Ok(shape);
            }
        }
        if let Some(sizes) = input_sizes {
            if sizes.len() == input_shape.len() {
                return sizes
                    .cast_to::<TDim>()?
                    .try_as_plain()?
                    .as_slice::<TDim>()?
                    .iter()
                    .map(|i| i.try_into())
                    .collect();
            }
        }
        bail!(
            "Neither sizes nor scales makes sense: input_shape: {:?}, scale: {:?}, sizes: {:?}",
            input_shape,
            input_scale,
            input_sizes,
        );
    }
}

impl Op for Resize {
    fn name(&self) -> StaticName {
        "Resize".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Resize {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, mut inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let scales = self.optional_scales_input.and_then(|ix| inputs.get(ix));
        let sizes = self.optional_sizes_input.and_then(|ix| inputs.get(ix));
        let output_shape = self.compute_output_shape(
            inputs[0].shape(),
            scales.map(|t| &**t),
            sizes.map(|t| &**t),
        )?;
        let scales: TVec<f32> = if let Some(scales) = scales {
            scales.try_as_plain()?.as_slice::<f32>()?.into()
        } else {
            output_shape.iter().zip(inputs[0].shape()).map(|(o, i)| *o as f32 / *i as f32).collect()
        };
        let mut data = inputs.remove(0).into_tensor().into_plain_array::<f32>()?;
        for (axis, scale) in scales.into_iter().enumerate().filter(|(_, s)| *s != 1.0) {
            let mut new_shape: TVec<usize> = data.shape().into();
            new_shape[axis] = output_shape[axis];
            data = tract_ndarray::ArrayD::from_shape_fn(&*new_shape, |co_o| -> f32 {
                let x_out = co_o[axis];
                let x_in = self.coord_transformer.transform(
                    x_out,
                    scale,
                    data.shape()[axis],
                    new_shape[axis],
                );
                let mut co_i = co_o;
                let x_left = (x_in as usize).clamp(0, data.shape()[axis] - 1);
                co_i[axis] = x_left;
                let y_left = data[&co_i];
                let x_right = (x_left + 1).min(data.shape()[axis] - 1);
                co_i[axis] = x_right;
                let y_right = data[&co_i];
                let x_frac = x_in - x_left as f32;
                self.interpolator.interpolate(y_left, y_right, x_frac, self.nearest)
            })
        }
        Ok(tvec!(data.into_tvalue()))
    }
}

impl TypedOp for Resize {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let _roi = self.optional_roi_input.and_then(|ix| inputs.get(ix));
        let scales = self.optional_scales_input.and_then(|ix| inputs.get(ix));
        let sizes = self.optional_sizes_input.and_then(|ix| inputs.get(ix));
        let output_shape = self.compute_output_shape(
            &inputs[0].shape,
            scales.and_then(|f| f.konst.as_deref()),
            sizes.and_then(|f| f.konst.as_deref()),
        )?;
        Ok(tvec!(inputs[0].datum_type.fact(&output_shape)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        // Lower nearest-neighbor integer-scale upsamples to Reshape → Tile → Reshape
        if !matches!(self.interpolator, Interpolator::Nearest) {
            return Ok(None);
        }
        let Some(scales_input) = self.optional_scales_input else { return Ok(None) };
        let input_fact = model.outlet_fact(node.inputs[0])?;
        let scales_fact = model.outlet_fact(node.inputs[scales_input])?;
        let Some(scales_tensor) = &scales_fact.konst else { return Ok(None) };
        let scales: Vec<f32> =
            scales_tensor.cast_to::<f32>()?.try_as_plain()?.as_slice::<f32>()?.to_vec();

        // Check all scales are positive integers
        let int_scales: Vec<usize> = scales.iter().map(|&s| s.round() as usize).collect();
        if scales.iter().zip(&int_scales).any(|(&s, &i)| (s - i as f32).abs() > 1e-5 || i == 0) {
            return Ok(None);
        }
        // Only if at least one axis actually upsamples
        if int_scales.iter().all(|&s| s == 1) {
            return Ok(None);
        }

        let input_shape = &input_fact.shape;

        let mut patch = TypedModelPatch::default();
        let mut wire = patch.tap_model(model, node.inputs[0])?;

        // Step 1: Reshape to interleave size-1 axes after each upsampled dim
        // e.g. (N, C, H, W) with scales (1,1,2,2) → (N, C, H, 1, W, 1)
        let mut from_dims: TVec<TDim> = tvec![];
        let mut to_dims: TVec<TDim> = tvec![];
        let mut tile_multipliers: TVec<TDim> = tvec![];
        let mut first_upsampled = None;

        for (i, &scale) in int_scales.iter().enumerate() {
            from_dims.push(input_shape[i].clone());
            to_dims.push(input_shape[i].clone());
            tile_multipliers.push(1.into());
            if scale > 1 {
                if first_upsampled.is_none() {
                    first_upsampled = Some(i);
                }
                to_dims.push(1.into());
                tile_multipliers.push(scale.into());
            }
        }

        if to_dims.len() > from_dims.len() {
            let first = first_upsampled.unwrap();
            wire = patch.wire_node(
                format!("{}.reshape_pre", node.name),
                AxisOp::Reshape(first, from_dims[first..].into(), to_dims[first..].into()),
                &[wire],
            )?[0];
        }

        // Step 2: Tile the size-1 axes
        use tract_core::ops::array::Tile;
        wire = patch.wire_node(
            format!("{}.tile", node.name),
            Tile { multipliers: tile_multipliers },
            &[wire],
        )?[0];

        // Step 3: Reshape back to merge the tiled dims
        // e.g. (N, C, H, 2, W, 2) → (N, C, H*2, W*2)
        let tiled_shape: TVec<TDim> = to_dims
            .iter()
            .zip(int_scales.iter().flat_map(|&s| if s > 1 { vec![1usize, s] } else { vec![1] }))
            .map(|(d, s)| d.clone() * s)
            .collect();
        let mut final_dims: TVec<TDim> = tvec![];
        let mut idx = 0;
        for &scale in &int_scales {
            if scale > 1 {
                final_dims.push(tiled_shape[idx].clone() * tiled_shape[idx + 1].clone());
                idx += 2;
            } else {
                final_dims.push(tiled_shape[idx].clone());
                idx += 1;
            }
        }

        if tiled_shape.len() > final_dims.len() {
            let first = first_upsampled.unwrap();
            wire = patch.wire_node(
                format!("{}.reshape_post", node.name),
                AxisOp::Reshape(first, tiled_shape[first..].into(), final_dims[first..].into()),
                &[wire],
            )?[0];
        }

        patch.shunt_outside(model, node.id.into(), wire)?;
        Ok(Some(patch))
    }
}

// --- NNEF serialization ---

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_onnx_resize",
        &parameters(),
        &[("output", TypeName::Scalar.tensor())],
        load,
    );
    registry.register_dumper(dump);
}

fn parameters() -> Vec<Parameter> {
    vec![
        TypeName::Scalar.tensor().named("input"),
        TypeName::Scalar.tensor().named("scales"),
        TypeName::String.named("coord_transformer").default("half_pixel"),
        TypeName::String.named("interpolator").default("nearest"),
        TypeName::String.named("nearest_mode").default("floor"),
    ]
}

fn dump(ast: &mut IntoAst, node: &TypedNode, op: &Resize) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let scales =
        ast.mapping[&node.inputs[op.optional_scales_input.context("no scales input")?]].clone();
    Ok(Some(invocation(
        "tract_onnx_resize",
        &[input, scales],
        &[
            ("coord_transformer", string(op.coord_transformer.as_str())),
            ("interpolator", string(op.interpolator.as_str())),
            ("nearest_mode", string(op.nearest.as_str())),
        ],
    )))
}

fn load(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let scales = invocation.named_arg_as(builder, "scales")?;
    let coord_transformer: String = invocation.named_arg_as(builder, "coord_transformer")?;
    let interpolator: String = invocation.named_arg_as(builder, "interpolator")?;
    let nearest_mode: String = invocation.named_arg_as(builder, "nearest_mode")?;

    let op = Resize {
        axes: None,
        coord_transformer: CoordTransformer::parse(&coord_transformer)?,
        interpolator: Interpolator::parse(&interpolator)?,
        nearest: Nearest::parse(&nearest_mode)?,
        optional_roi_input: None,
        optional_scales_input: Some(1),
        optional_sizes_input: None,
    };

    builder.wire(op, &[input, scales])
}


================================================
FILE: post-release.sh
================================================
#!/bin/sh

VERSION=$1
. ./.all_crates.sh

if [ `uname` = "Darwin" ]
then
    SED=gsed
else
    SED=sed
fi

if [ -z "$VERSION" ]
then
    echo "Usage: $0 <version>" 
    exit 1
fi

for path in $ALL_CRATES_PATH
do
    crate=$(tomato get package.name $path/Cargo.toml)
    echo $crate
    tomato set package.version $VERSION $path/Cargo.toml > /dev/null
    tomato set workspace.dependencies.$crate.version $VERSION Cargo.toml
done

git commit . -m "post-release $VERSION"
git push


================================================
FILE: pulse/Cargo.toml
================================================
[package]
name = "tract-pulse"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
downcast-rs.workspace = true
dyn-eq.workspace = true
erased-serde.workspace = true
lazy_static.workspace = true
log.workspace = true
serde.workspace = true
tract-pulse-opl.workspace = true


================================================
FILE: pulse/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: pulse/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: pulse/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: pulse/src/fact.rs
================================================
use crate::internal::*;
use dyn_eq::DynEq;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct StreamInfo {
    pub axis: usize,
    pub dim: TDim,
    pub delay: usize,
}

pub trait StreamFact {
    fn stream_info(&self, stream_sym: &Symbol) -> Option<(usize, &TDim)>;
}

impl StreamFact for ShapeFact {
    fn stream_info(&self, stream_sym: &Symbol) -> Option<(usize, &TDim)> {
        let streaming_dims: TVec<(usize, &TDim)> = (**self)
            .iter()
            .enumerate()
            .filter(|(_ix, d)| d.symbols().contains(stream_sym))
            .collect();
        if streaming_dims.len() != 1 { None } else { Some(streaming_dims[0]) }
    }
}

#[derive(Clone, PartialEq, Eq, Hash)]
pub struct PulsedFact {
    pub datum_type: DatumType,
    pub shape: ShapeFact,
    pub stream: Option<StreamInfo>,
}

impl PulsedFact {
    pub fn from_tensor_fact_pulse(
        tf: &TypedFact,
        symbol: &Symbol,
        pulse: &TDim,
    ) -> TractResult<PulsedFact> {
        let datum_type = tf.datum_type;
        let (axis, len) = tf
            .shape
            .stream_info(symbol)
            .ok_or_else(|| format_err!("Can not pulse a tensor with no streaming dim"))?;
        let mut shape: TVec<TDim> = tf.shape.to_tvec();
        shape[axis] = pulse.clone();
        Ok(PulsedFact {
            datum_type,
            shape: shape.into(),
            stream: Some(StreamInfo { axis, dim: len.clone(), delay: 0 }),
        })
    }

    pub fn pulse(&self) -> Option<&TDim> {
        if let Some(stream) = &self.stream { Some(&self.shape[stream.axis]) } else { None }
    }

    pub fn to_pulse_fact(&self) -> TypedFact {
        self.datum_type.fact(self.shape.clone())
    }

    pub fn streaming_shape(&self) -> TVec<TDim> {
        if let Some(stream) = &self.stream {
            self.shape
                .iter()
                .enumerate()
                .map(|(ix, d)| if ix == stream.axis { stream.dim.clone() } else { d.clone() })
                .collect()
        } else {
            self.shape.to_tvec()
        }
    }

    pub fn to_streaming_fact(&self) -> TypedFact {
        let mut info = self.to_pulse_fact();
        info.shape = self.streaming_shape().into();
        info
    }
}

impl fmt::Debug for PulsedFact {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        use tract_itertools::Itertools;
        if let Some(stream) = &self.stream {
            write!(
                fmt,
                "{},{:?} [pulse axis:{} ∂:{} full dim:{}]",
                self.shape.iter().join(","),
                self.datum_type,
                stream.axis,
                stream.delay,
                stream.dim
            )
        } else {
            write!(fmt, "{:?}", self.to_pulse_fact())
        }
    }
}

impl Fact for PulsedFact {
    fn to_typed_fact(&self) -> TractResult<Cow<'_, TypedFact>> {
        Ok(Cow::Owned(self.into()))
    }

    fn compatible_with(&self, other: &dyn Fact) -> bool {
        self.dyn_eq(other)
    }

    fn datum_type(&self) -> Option<DatumType> {
        Some(self.datum_type)
    }
}

impl From<PulsedFact> for TypedFact {
    fn from(fact: PulsedFact) -> TypedFact {
        fact.datum_type.fact(fact.shape)
    }
}

impl<'a> From<&'a PulsedFact> for TypedFact {
    fn from(fact: &'a PulsedFact) -> TypedFact {
        fact.datum_type.fact(fact.shape.clone())
    }
}


================================================
FILE: pulse/src/lib.rs
================================================
#![allow(clippy::len_zero)]
#[macro_use]
pub mod macros;

pub mod fact;
pub mod model;
pub mod ops;

pub mod internal {
    pub use std::fmt;
    pub use tract_nnef::internal::*;
    pub use tract_pulse_opl::tract_nnef;

    pub use downcast_rs::Downcast;

    pub use crate::fact::PulsedFact;
    pub use crate::model::{PulsedModel, PulsedModelExt};
    pub use crate::ops::{OpPulsifier, PulsedOp};
}

use std::ops::ControlFlow;

use internal::*;
use tract_core::transform::ModelTransform;
use tract_pulse_opl::tract_nnef::tract_core;

pub use ops::PulsedOp;

#[derive(Debug, Default, serde::Deserialize)]
pub struct PulseConfig {
    pub symbol: Option<String>,
    pub pulse: String,
}

#[derive(Debug)]
struct PulseTransform(PulseConfig);

impl ModelTransform for PulseTransform {
    fn name(&self) -> std::borrow::Cow<'static, str> {
        "pulse".into()
    }
    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        let symbol = self.0.symbol.as_deref().unwrap_or("S");
        let sym = model.symbols.sym(symbol);
        let pulse_dim = parse_tdim(&model.symbols, &self.0.pulse)?;
        let pulsed = model::PulsedModel::new(model, sym, &pulse_dim)?;
        *model = pulsed.into_typed()?;
        Ok(())
    }
}

register_model_transform!("pulse", PulseConfig, |config| Ok(Box::new(PulseTransform(config))));

pub trait WithPulse {
    fn enable_pulse(&mut self);
    fn with_pulse(self) -> Self;
}

impl WithPulse for tract_nnef::framework::Nnef {
    fn enable_pulse(&mut self) {
        self.enable_tract_core();
        self.registries.push(tract_nnef_registry());
    }
    fn with_pulse(mut self) -> Self {
        self.enable_pulse();
        self
    }
}

pub fn tract_nnef_registry() -> Registry {
    let mut reg = tract_pulse_opl::tract_nnef_registry();
    ops::delay::register(&mut reg);
    reg.extensions.push(Box::new(decl_stream_symbol));
    reg
}

fn decl_stream_symbol(
    _proto_model: &mut ModelBuilder,
    name: &Identifier,
    _rest: &str,
) -> TractResult<ControlFlow<(), ()>> {
    if name.0 == "tract_pulse_streaming_symbol" {
        Ok(ControlFlow::Break(()))
    } else {
        Ok(ControlFlow::Continue(()))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_source_must_stream() {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let _a = model.add_source("a", f32::fact([1, 2, 3])).unwrap();
        model.auto_outputs().unwrap();
        assert!(PulsedModel::new(&model, s.clone(), &4.to_dim()).is_err());

        let mut model = TypedModel::default();
        let _a = model.add_source("a", f32::fact(dims![1, s, 3].as_ref())).unwrap();
        model.auto_outputs().unwrap();
        let pulse = PulsedModel::new(&model, s, &4.to_dim()).unwrap();
        assert_eq!(
            *pulse.outlet_fact(OutletId::new(0, 0)).unwrap().to_typed_fact().unwrap(),
            f32::fact([1usize, 4, 3])
        );
    }

    #[test]
    fn test_immediate() {
        let mut model = TypedModel::default();
        let s = model.symbols.sym("S");
        let _a = model.add_source("a", f32::fact(dims![s, 2, 3].as_ref())).unwrap();
        model.auto_outputs().unwrap();

        let pulse = PulsedModel::new(&model, s, &4.to_dim()).unwrap();

        assert_eq!(*pulse.input_fact(0).unwrap().to_typed_fact().unwrap(), f32::fact([4, 2, 3]));
        assert_eq!(*pulse.output_fact(0).unwrap().to_typed_fact().unwrap(), f32::fact([4, 2, 3]));
    }
}


================================================
FILE: pulse/src/macros.rs
================================================
#[macro_export]
macro_rules! pulsed_op_to_typed_op {
    () => {
        fn to_typed(&self) -> Box<dyn TypedOp> {
            tract_core::dyn_clone::clone_box(self)
        }
    };
}

#[macro_export]
macro_rules! register_all_mod {
    ($($m: ident),*) => {
        pub fn register_all(inventory: &mut HashMap<TypeId, OpPulsifier>) {
            $( $m::register_all(inventory); )*
        }
    }
}

#[macro_export]
macro_rules! register_all {
    ($($op: ty: $func: expr),*) => {
        pub fn register_all(inventory: &mut HashMap<TypeId, OpPulsifier>) {
            $(
            inventory.insert(
                std::any::TypeId::of::<$op>(),
                OpPulsifier {
                    type_id: std::any::TypeId::of::<$op>(),
                    func: |source: &TypedModel,
                           node: &TypedNode,
                           target: &mut PulsedModel,
                           mapping: &HashMap<OutletId, OutletId>,
                           symbol: &Symbol,
                           pulse: &TDim|
                     -> TractResult<Option<TVec<OutletId>>> {
                        let op = node.op_as::<$op>().unwrap();
                        ($func)(op, source, node, target, mapping, symbol, pulse)
                    },
                    name: stringify!($op)
                }
            );)*
        }
    };
}


================================================
FILE: pulse/src/model.rs
================================================
#![allow(clippy::collapsible_if)]
use std::sync::RwLock;

use crate::fact::StreamInfo;
use crate::{internal::*, ops::sync_inputs};
use tract_core::model::translator::Translate;
use tract_pulse_opl::tract_core::ops::konst::Const;
use tract_pulse_opl::tract_core::ops::source::TypedSource;

pub type PulsedModel = Graph<PulsedFact, Box<dyn PulsedOp>>;
pub type PulsedNode = Node<PulsedFact, Box<dyn PulsedOp>>;

#[allow(clippy::new_ret_no_self)]
pub trait PulsedModelExt {
    fn new(source: &TypedModel, symbol: Symbol, pulse: &TDim) -> TractResult<PulsedModel>;

    fn new_with_mapping(
        source: &TypedModel,
        symbol: Symbol,
        pulse: &TDim,
    ) -> TractResult<(PulsedModel, HashMap<OutletId, OutletId>)>;

    fn into_typed(self) -> TractResult<TypedModel>;
}

impl PulsedModelExt for PulsedModel {
    fn new(source: &TypedModel, symbol: Symbol, pulse: &TDim) -> TractResult<PulsedModel> {
        Ok(PulsedModel::new_with_mapping(source, symbol, pulse)?.0)
    }

    fn new_with_mapping(
        source: &TypedModel,
        symbol: Symbol,
        pulse: &TDim,
    ) -> TractResult<(PulsedModel, HashMap<OutletId, OutletId>)> {
        let pulsifiers = crate::ops::OpPulsifier::inventory();
        Pulsifier(symbol, pulse.to_owned(), pulsifiers).translate_model_with_mappings(source)
    }

    fn into_typed(self) -> TractResult<TypedModel> {
        let mut typed = tract_core::model::translator::IntoTranslator.translate_model(&self)?;
        ensure!(
            self.input_outlets()?.iter().all(|o| self.outlet_fact(*o).unwrap().stream.is_some())
        );
        ensure!(
            self.output_outlets()?.iter().all(|o| self.outlet_fact(*o).unwrap().stream.is_some())
        );
        let delays = tensor1(
            &self
                .output_outlets()?
                .iter()
                .map(|oo| Ok(self.outlet_fact(*oo)?.stream.as_ref().unwrap().delay as _))
                .collect::<TractResult<TVec<i64>>>()?,
        );
        typed.properties.insert("pulse.delay".to_string(), delays.into_arc_tensor());
        let input_axes = tensor1(
            &self
                .input_outlets()?
                .iter()
                .map(|oo| Ok(self.outlet_fact(*oo)?.stream.as_ref().unwrap().axis as _))
                .collect::<TractResult<TVec<i64>>>()?,
        );
        typed.properties.insert("pulse.input_axes".to_string(), input_axes.into_arc_tensor());
        let output_axes = tensor1(
            &self
                .output_outlets()?
                .iter()
                .map(|oo| Ok(self.outlet_fact(*oo)?.stream.as_ref().unwrap().axis as _))
                .collect::<TractResult<TVec<i64>>>()?,
        );
        typed.properties.insert("pulse.output_axes".to_string(), output_axes.into_arc_tensor());
        Ok(typed)
    }
}

impl SpecialOps<PulsedFact, Box<dyn PulsedOp>> for PulsedModel {
    fn is_source(op: &Box<dyn PulsedOp>) -> bool {
        op.as_op().downcast_ref::<crate::ops::source::PulsedSource>().is_some()
    }

    fn create_source(&self, fact: PulsedFact) -> Box<dyn PulsedOp> {
        Box::new(crate::ops::source::PulsedSource(fact))
    }

    fn create_dummy(&self) -> Box<dyn PulsedOp> {
        Box::new(tract_core::ops::dummy::Dummy::new())
    }

    fn wire_node(
        &mut self,
        name: impl Into<String>,
        op: impl Into<Box<dyn PulsedOp>>,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let op = op.into();
        let output_facts = {
            let input_facts =
                inputs.iter().map(|o| self.outlet_fact(*o)).collect::<TractResult<TVec<_>>>()?;
            op.pulsed_output_facts(&input_facts)?
        };
        let id = self.add_node(name, op, output_facts)?;
        inputs
            .iter()
            .enumerate()
            .try_for_each(|(ix, i)| self.add_edge(*i, InletId::new(id, ix)))?;
        Ok(self.node(id).outputs.iter().enumerate().map(|(ix, _)| OutletId::new(id, ix)).collect())
    }

    fn add_const(
        &mut self,
        name: impl Into<String>,
        v: impl IntoArcTensor,
    ) -> TractResult<OutletId> {
        let v = v.into_arc_tensor();
        for node in &self.nodes {
            if let Some(op) = node.op_as::<Const>() {
                if op.val() == &v {
                    return Ok(node.id.into());
                }
            }
        }
        let op = NonPulsingWrappingOp(Box::new(Const::new(v)?));
        Ok(self.wire_node(name, op, &[])?[0])
    }
}

struct Pulsifier(
    Symbol,
    TDim,
    #[allow(dead_code)] Arc<RwLock<HashMap<TypeId, crate::ops::OpPulsifier>>>,
);

impl std::fmt::Debug for Pulsifier {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Pulsifier({})", self.0)
    }
}

impl
    tract_core::model::translator::Translate<
        TypedFact,
        Box<dyn TypedOp>,
        PulsedFact,
        Box<dyn PulsedOp>,
    > for Pulsifier
{
    fn translate_node(
        &self,
        source: &TypedModel,
        node: &TypedNode,
        target: &mut PulsedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let Some(op) = node.op_as::<TypedSource>() {
            return Ok(crate::ops::source::pulsify(
                op, source, node, target, mapping, &self.0, &self.1,
            )?
            .unwrap());
        }
        log::debug!("Pulsifying node {node}");

        if !source
            .node_input_facts(node.id)?
            .iter()
            .any(|f| f.shape.iter().any(|d| d.symbols().contains(&self.0)))
            && !node
                .outputs
                .iter()
                .any(|o| o.fact.shape.iter().any(|d| d.symbols().contains(&self.0)))
        {
            let pulse_op = NonPulsingWrappingOp(node.op.clone());
            let inputs: TVec<OutletId> = node.inputs.iter().map(|i| mapping[i]).collect();
            log::debug!("Pulsified node {node} with NonPulsingWrappingOp");
            return target.wire_node(&node.name, pulse_op, &inputs);
        }

        if let Some(pulsified) =
            OpPulsifier::pulsify(source, node, target, mapping, &self.0, &self.1)?
        {
            log::debug!("Pulsified node {node} with adhoc pulsifier");
            return Ok(pulsified);
        }

        let pulse_facts: TVec<PulsedFact> =
            node.inputs.iter().map(|i| target.outlet_fact(mapping[i]).unwrap().clone()).collect();
        if pulse_facts.iter().all(|pf| pf.stream.is_none()) {
            let pulse_op = NonPulsingWrappingOp(node.op.clone());
            let inputs: TVec<OutletId> = node.inputs.iter().map(|i| mapping[i]).collect();
            log::debug!("Pulsified node {node} with NonPulsingWrappingOp");
            return target.wire_node(&node.name, pulse_op, &inputs);
        }

        let (stream_input_ix, pulse_fact) =
            pulse_facts.iter().enumerate().find(|(_ix, pf)| pf.stream.is_some()).unwrap();
        let (input_facts, output_facts) = source.node_facts(node.id)?;
        let axes_mapping = node.op.axes_mapping(&input_facts, &output_facts)?;
        let axis_info = axes_mapping
            .axis((InOut::In(stream_input_ix), pulse_fact.stream.as_ref().unwrap().axis))?;
        if axis_info.outputs[0].len() == 1 {
            let pulse_op = PulseWrappingOp(node.op.clone());
            let inputs = sync_inputs(node, target, mapping)?;
            log::debug!("Pulsified node {node} with PulsingWrappingOp");
            return target.wire_node(&node.name, pulse_op, &inputs);
        }

        bail!("No specific pulse transformation for {}, and could not track pulsing axis.", node)
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct PulseWrappingOp(pub Box<dyn TypedOp>);

impl Op for PulseWrappingOp {
    fn name(&self) -> StaticName {
        format!("PulseWrapping({}", self.0.name()).into()
    }

    fn as_typed(&self) -> Option<&dyn TypedOp> {
        Some(self.0.as_ref())
    }
}

impl EvalOp for PulseWrappingOp {
    fn is_stateless(&self) -> bool {
        self.0.is_stateless()
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        self.0.eval(inputs)
    }

    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        self.0.state(session, node_id)
    }
}

impl PulsedOp for PulseWrappingOp {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let (pulsing_input, stream) = if let Some((ix, fact)) =
            &inputs.iter().enumerate().find(|(_ix, f)| f.stream.is_some())
        {
            (*ix, fact.stream.as_ref().unwrap())
        } else {
            bail!("PulseWrappingOp used on non streaming input")
        };
        let input_facts =
            inputs.iter().map(|pf| pf.to_typed_fact()).collect::<TractResult<TVec<_>>>()?;
        let input_facts_ref = input_facts.iter().map(|f| f.as_ref()).collect::<TVec<_>>();
        let output_facts = self.0.output_facts(&input_facts_ref)?;
        let output_facts_ref = output_facts.iter().collect::<TVec<_>>();
        let axes_mapping = self.0.axes_mapping(&input_facts_ref, &output_facts_ref)?;
        let axis_info = axes_mapping.axis((InOut::In(pulsing_input), stream.axis))?;
        std::mem::drop(output_facts_ref);
        output_facts
            .into_iter()
            .enumerate()
            .map(|(ix, tf)| {
                if let &[axis] = &*axis_info.outputs[ix] {
                    Ok(PulsedFact {
                        shape: tf.shape,
                        datum_type: tf.datum_type,
                        stream: Some(StreamInfo {
                            delay: stream.delay,
                            axis,
                            dim: stream.dim.clone(),
                        }),
                    })
                } else {
                    bail!("Disappearing pulsing axis")
                }
            })
            .collect()
    }

    as_op!();

    fn to_typed(&self) -> Box<dyn TypedOp> {
        self.0.clone()
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct NonPulsingWrappingOp(pub Box<dyn TypedOp>);

impl Op for NonPulsingWrappingOp {
    fn name(&self) -> StaticName {
        format!("NonePulsingWrapping({}", self.0.name()).into()
    }

    fn as_typed(&self) -> Option<&dyn TypedOp> {
        Some(self.0.as_ref())
    }
}

impl EvalOp for NonPulsingWrappingOp {
    fn is_stateless(&self) -> bool {
        self.0.is_stateless()
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        self.0.eval(inputs)
    }

    fn state(&self, session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        self.0.state(session, node_id)
    }
}

impl PulsedOp for NonPulsingWrappingOp {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let input_facts =
            inputs.iter().map(|pf| pf.to_typed_fact()).collect::<TractResult<TVec<_>>>()?;
        let input_facts_ref = input_facts.iter().map(|f| f.as_ref()).collect::<TVec<_>>();
        let output_facts = self.0.output_facts(&input_facts_ref)?;
        let output_facts_ref = output_facts.iter().collect::<TVec<_>>();
        std::mem::drop(output_facts_ref);
        output_facts
            .into_iter()
            .map(|tf| Ok(PulsedFact { shape: tf.shape, datum_type: tf.datum_type, stream: None }))
            .collect()
    }

    as_op!();

    fn to_typed(&self) -> Box<dyn TypedOp> {
        self.0.clone()
    }
}


================================================
FILE: pulse/src/ops/array/broadcast.rs
================================================
use crate::fact::StreamInfo;
use crate::internal::*;
use tract_pulse_opl::tract_core::ops::array::MultiBroadcastTo;

register_all!(MultiBroadcastTo: pulsify);

fn pulsify(
    op: &MultiBroadcastTo,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    symbol: &Symbol,
    pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    if let Some(axis) = op.shape.iter().position(|dim| dim.symbols().contains(symbol)) {
        let full_dim = op.shape[axis].clone();
        let fact = PulsedFact {
            datum_type: _source.outlet_fact(node.inputs[0])?.datum_type,
            shape: op
                .shape
                .iter()
                .map(|dim| dim.substitute(symbol, pulse))
                .collect::<TractResult<_>>()?,
            stream: Some(StreamInfo { axis, dim: full_dim, delay: 0 }),
        };
        let new_op = PulsedMultibroadcastTo { fact };
        target.wire_node(&node.name, new_op, &[mapping[&node.inputs[0]]]).map(Some)
    } else {
        Ok(None)
    }
}

/// Concat with pulse along concat axis
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct PulsedMultibroadcastTo {
    fact: PulsedFact,
}

impl Op for PulsedMultibroadcastTo {
    fn name(&self) -> StaticName {
        "PulsedMultibroadcastTo".into()
    }

    op_as_typed_op!();
}

impl TypedOp for PulsedMultibroadcastTo {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(self.fact.to_pulse_fact().shape)))
    }
    as_op!();
}

impl EvalOp for PulsedMultibroadcastTo {
    fn is_stateless(&self) -> bool {
        true
    }
    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        self.to_typed().eval(inputs)
    }
}

impl PulsedOp for PulsedMultibroadcastTo {
    fn pulsed_output_facts(&self, _inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        Ok(tvec!(self.fact.clone()))
    }

    fn to_typed(&self) -> Box<dyn TypedOp> {
        Box::new(MultiBroadcastTo { shape: self.fact.to_pulse_fact().shape })
    }

    as_op!();
}


================================================
FILE: pulse/src/ops/array/concat.rs
================================================
use crate::internal::*;
use crate::model::NonPulsingWrappingOp;
use tract_core::ops::array::{Slice, TypedConcat};
use tract_pulse_opl::concat::PulsedSameAxisConcat;
use tract_pulse_opl::ops::Delay;
use tract_pulse_opl::tract_core::ops::array::MultiBroadcastTo;
use tract_pulse_opl::tract_core::tract_data::itertools::Itertools;

register_all!(TypedConcat: pulsify);

fn pulsify(
    op: &TypedConcat,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let pulse_facts: TVec<PulsedFact> =
        node.inputs.iter().map(|i| target.outlet_fact(mapping[i]).unwrap().clone()).collect();
    let (_stream_input_ix, pulse_fact) =
        pulse_facts.iter().enumerate().find(|(_ix, pf)| pf.stream.is_some()).unwrap();

    if pulse_fact.stream.as_ref().unwrap().axis == op.axis {
        pulsify_along_concat_axis(op, source, node, target, mapping, symbol)
    } else {
        Ok(None)
    }
}

fn pulsify_along_concat_axis(
    op: &TypedConcat,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    symbol: &Symbol,
) -> TractResult<Option<TVec<OutletId>>> {
    let name = &node.name;
    let axis = op.axis;
    let source_facts: TVec<TypedFact> =
        node.inputs.iter().map(|i| source.outlet_fact(*i).unwrap().clone()).collect();
    ensure!(
        source_facts.iter().filter(|fact| fact.shape[axis].symbols().contains(symbol)).count() == 1,
        "Concat over pulse axis (#{axis}, {symbol:?}) expcts one single streaming input. Got: {source_facts:?}"
    );
    let pulsed_inputs: TVec<OutletId> = node.inputs.iter().map(|i| mapping[i]).collect();
    let pulse_facts: TVec<PulsedFact> = pulsed_inputs
        .iter()
        .map(|i| target.outlet_fact(*i).cloned())
        .collect::<TractResult<_>>()?;

    // Use the source model to identify which input carries the streaming symbol,
    // rather than the first pulsed fact with stream info. Inputs derived from the
    // stream (e.g. a slice selecting a single element) may also have stream info in
    // the pulsed model but their source shape is concrete, so they are treated as
    // pre/post rather than the main streaming input.
    let stream_input_ix =
        source_facts.iter().position(|f| f.shape[axis].symbols().contains(symbol)).unwrap();
    let pulse_fact = &pulse_facts[stream_input_ix];
    ensure!(
        pulse_fact.stream.is_some(),
        "Expected pulsed fact at stream_input_ix={stream_input_ix} to be streaming: {pulse_fact:?}"
    );
    let stream = pulse_fact.stream.as_ref().unwrap();

    // before_len and after_len come from the source model where non-streaming
    // inputs have concrete shapes.
    let before_len: usize = source_facts[..stream_input_ix]
        .iter()
        .map(|f| f.shape[axis].to_usize())
        .collect::<TractResult<Vec<_>>>()?
        .into_iter()
        .sum();
    let after_len: usize = source_facts[stream_input_ix + 1..]
        .iter()
        .map(|f| f.shape[axis].to_usize())
        .collect::<TractResult<Vec<_>>>()?
        .into_iter()
        .sum();

    let zero = target
        .add_const(format!("{name}.zero"), Tensor::zero_scalar_dt(source_facts[0].datum_type)?)?;
    let mut shape = pulse_fact.shape.clone();
    shape.set(axis, 0.to_dim());
    let empty = target.wire_node(
        format!("{name}.pre"),
        NonPulsingWrappingOp(Box::new(MultiBroadcastTo { shape })),
        &[zero],
    )?[0];

    // Build pre node: concat of pulsed inputs before the main stream, then sliced
    // to before_len so the runtime tensor has the right size at axis regardless of
    // whether the inputs are themselves pulsed (pulse-sized) or constant.
    let pre = if stream_input_ix > 0 {
        let pre_concat = target.wire_node(
            format!("{name}.pre"),
            NonPulsingWrappingOp(Box::new(TypedConcat::new(axis))),
            &pulsed_inputs.iter().take(stream_input_ix).cloned().collect_vec(),
        )?[0];
        target.wire_node(
            format!("{name}.pre.slice"),
            NonPulsingWrappingOp(Box::new(Slice {
                axis,
                start: 0.to_dim(),
                end: before_len.to_dim(),
            })),
            &[pre_concat],
        )?[0]
    } else {
        empty
    };

    // Build post node: same pattern, sliced to after_len.
    let post = if stream_input_ix + 1 < pulsed_inputs.len() {
        let post_concat = target.wire_node(
            format!("{name}.post"),
            NonPulsingWrappingOp(Box::new(TypedConcat::new(axis))),
            &pulsed_inputs.iter().skip(stream_input_ix + 1).cloned().collect_vec(),
        )?[0];
        target.wire_node(
            format!("{name}.post.slice"),
            NonPulsingWrappingOp(Box::new(Slice {
                axis,
                start: 0.to_dim(),
                end: after_len.to_dim(),
            })),
            &[post_concat],
        )?[0]
    } else {
        empty
    };

    let mut input = pulsed_inputs[stream_input_ix];
    // The main stream must arrive late enough that the pre content fits before it.
    // effective_delay = max(stream.delay, before_len).
    let effective_delay = if stream.delay < before_len {
        input = target.wire_node(
            format!("{}.Delay", node.name),
            Delay::new_typed(
                source.outlet_fact(node.inputs[stream_input_ix])?,
                stream.axis,
                before_len - stream.delay,
                0,
            ),
            &[input],
        )?[0];
        before_len
    } else {
        stream.delay
    };

    let main_op = PulsedSameAxisConcat {
        axis: op.axis,
        before_len,
        after_len,
        input_delay: effective_delay,
        input_len: stream.dim.clone(),
    };
    Ok(Some(target.wire_node(&*node.name, main_op, &[pre, input, post])?))
}

impl PulsedOp for PulsedSameAxisConcat {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let &[_pre, fact, _post] = inputs else { bail!("Expect 3 inputs") };
        let mut fact: PulsedFact = fact.clone();
        let stream = fact.stream.as_mut().unwrap();
        stream.dim += (self.before_len + self.after_len).to_dim();
        stream.delay -= self.before_len;
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/array/mask.rs
================================================
use crate::internal::*;
use tract_pulse_opl::ops::PulseMask;

impl PulsedOp for PulseMask {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        Ok(inputs.iter().cloned().cloned().collect())
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/array/mod.rs
================================================
use crate::internal::*;

mod broadcast;
mod concat;
mod mask;
mod pad;
mod slice;

register_all_mod!(broadcast, concat, pad, slice);


================================================
FILE: pulse/src/ops/array/pad.rs
================================================
use crate::internal::*;
use tract_core::ops::array::{Pad, PadMode};
use tract_pulse_opl::ops::{Delay, PulsePad};

register_all!(Pad: pulsify);

fn pulsify(
    op: &Pad,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let mut input = mapping[&node.inputs[0]];
    let fact = target.outlet_fact(input)?.clone();
    let stream = fact.stream.as_ref().unwrap();
    if !op.pads.iter().enumerate().all(|(ax, &(a, b))| ax == stream.axis || (a == 0 && b == 0)) {
        return Ok(None);
    }
    let (before, after) = op.pads[stream.axis];
    let pulse = fact.pulse().unwrap();
    let mut extra_delay = before.saturating_sub(stream.delay);
    match op.mode {
        PadMode::Constant(_) => (),
        PadMode::Edge => {
            let pulse = if let Ok(pulse) = pulse.to_usize() {
                pulse
            } else {
                bail!("Edge padding can only by pulsified with concrete integer values")
            };
            if before < pulse {
                let start_offset = (stream.delay + extra_delay) % pulse;
                if before > start_offset {
                    extra_delay += before - start_offset;
                }
            } else {
                bail!(
                    "Edge padding mode needs pulse strictly bigger than left padding (pulse={} padding={})",
                    pulse,
                    before
                )
            }
        }
        PadMode::Reflect => bail!("Reflect padding mode pulsing is not supported"),
    };
    if extra_delay > 0 {
        input = target.wire_node(
            format!("{}.Delay", node.name),
            Delay::new_typed(&(&fact).into(), stream.axis, extra_delay, 0),
            &[input],
        )?[0];
    }
    let op = PulsePad {
        axis: stream.axis,
        before,
        after: after.into(),
        begin_input: stream.delay + extra_delay,
        end_input: stream.delay.to_dim() + extra_delay + &stream.dim,
        mode: op.mode.clone(),
        overlap: 0,
    };
    Ok(Some(target.wire_node(&*node.name, op, &[input])?))
}

impl PulsedOp for PulsePad {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        stream.dim += self.before.to_dim() + &self.after;
        stream.delay -= self.before;
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/array/slice.rs
================================================
use crate::internal::*;
use tract_core::ops::array::Slice;

register_all!(Slice: pulsify);

fn pulsify(
    op: &Slice,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    symbol: &Symbol,
    pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let input = mapping[&node.inputs[0]];
    let fact = target.outlet_fact(input)?.clone();
    let stream = fact.stream.with_context(|| {
        format!(
            "Unexpected streamless fact in pulsify {node}\ninput:{:?}",
            target.outlet_fact(input).unwrap()
        )
    })?;
    if op.axis == stream.axis {
        let start = op.start.substitute(symbol, pulse)?;
        let skip = start.to_usize()?;
        let take = node.outputs[0].fact.shape[op.axis].clone();
        let op = PulsedAxisSlice { axis: op.axis, skip, take };
        Ok(Some(target.wire_node(&*node.name, op, &[input])?))
    } else {
        Ok(None)
    }
}

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct PulsedAxisSlice {
    pub axis: usize,
    pub skip: usize,
    pub take: TDim,
}

impl Op for PulsedAxisSlice {
    fn name(&self) -> StaticName {
        "PulsedAxisSlice".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis:{}, skip:{} take:{}", self.axis, self.skip, self.take)])
    }

    not_a_typed_op!();
}

impl EvalOp for PulsedAxisSlice {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(inputs)
    }
}

impl PulsedOp for PulsedAxisSlice {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        stream.delay += self.skip;
        stream.dim = self.take.clone();
        Ok(tvec!(fact))
    }

    fn to_typed(&self) -> Box<dyn TypedOp> {
        Box::<tract_pulse_opl::tract_core::ops::identity::Identity>::default()
    }

    as_op!();
}


================================================
FILE: pulse/src/ops/cnn/conv.rs
================================================
use crate::internal::*;
use crate::ops::cnn::pools::pulsify_pooled_input;
use tract_core::ops::cnn::Conv;

register_all!(Conv: pulsify);

fn pulsify(
    op: &Conv,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let fact = target.outlet_fact(mapping[&node.inputs[0]])?;
    let zero = Tensor::zero_scalar_dt(fact.datum_type)?;
    if let Some((wire, pool_spec)) =
        pulsify_pooled_input(&op.pool_spec, source, node, target, mapping, Some(zero))?
    {
        let mut wires: TVec<_> = node.inputs.iter().map(|i| mapping[i]).collect();
        wires[0] = wire;
        Ok(Some(target.wire_node(&node.name, Conv { pool_spec, ..op.clone() }, &wires)?))
    } else {
        Ok(None)
    }
}

impl PulsedOp for Conv {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let dt = self.q_params.unwrap_or(inputs[0].datum_type);
        super::pools::pulsed_output_facts(&self.pool_spec, inputs, dt)
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/cnn/deconv.rs
================================================
use crate::internal::*;
use tract_core::num_traits::Zero;
use tract_core::ops::cnn::Deconv;
use tract_core::ops::cnn::PaddingSpec;
use tract_pulse_opl::ops::DeconvDelay;
use tract_pulse_opl::ops::PulseMask;

register_all!(Deconv: pulsify);

fn pulsify(
    op: &Deconv,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let fact = target.outlet_fact(mapping[&node.inputs[0]])?.clone();
    let pulse = fact.pulse().unwrap();
    let stream = fact.stream.as_ref().unwrap();
    let c_axis = op.pool_spec.data_format.shape(&fact.shape)?.c_axis();
    if c_axis == stream.axis {
        bail!("Pulsification on C axis is not supported");
    }
    if op
        .axes_mapping(&source.node_input_facts(node.id)?, &source.node_output_facts(node.id)?)?
        .axis((InOut::In(0), stream.axis))?
        .outputs[0]
        .len()
        == 1
    {
        // general case for invariants will manage
        return Ok(None);
    }
    let geo_axis = stream.axis - op.pool_spec.data_format.h_axis();
    let stride = op.pool_spec.stride(geo_axis);
    let mut pulse_op = op.clone();
    pulse_op.adjustments[geo_axis] = stride - 1;
    pulse_op.pool_spec.padding = PaddingSpec::Valid;
    let mut wire = tvec![mapping[&node.inputs[0]]];
    let mask = PulseMask {
        axis: stream.axis,
        begin: stream.delay,
        end: stream.dim.clone() + stream.delay,
        value: Tensor::zero_scalar_dt(fact.datum_type)?,
    };
    wire = target.wire_node(format!("{}.mask", node.name), mask, &wire)?;
    wire.push(mapping[&node.inputs[1]]);
    wire.push(mapping[&node.inputs[2]]);
    wire = target.wire_node(format!("{}.deconv", node.name), pulse_op, &wire)?;
    let overlap = overlap(stream.axis, op);
    let deconv_input_dim = (stream.dim.clone() - 1) * stride + 1;
    let output_shape = tract_core::ops::cnn::deconv::output_shape(
        &op.pool_spec,
        &fact.streaming_shape(),
        &op.adjustments,
    )?;
    let kernel_spatial_shape = &op.pool_spec.kernel_shape;
    let shape = op.pool_spec.data_format.shape(fact.streaming_shape())?;
    let paddings = op.pool_spec.padding.compute_for_deconv(
        shape.hw_dims(),
        kernel_spatial_shape,
        &op.pool_spec.dilations(),
        &op.pool_spec.strides(),
        &op.adjustments,
    )?;
    wire = target.wire_node(
        &node.name,
        DeconvDelay {
            axis: stream.axis,
            overlap,
            delay: paddings[geo_axis].pad_before.to_usize()? + stream.delay,
            deconv_input_dim,
            stride,
            pulse: pulse.to_owned(),
            deconv_output_dim: output_shape[stream.axis].clone(),
        },
        &wire,
    )?;

    for (geo_axis, padding) in paddings.iter().enumerate() {
        if !padding.pad_before.is_zero() || !padding.pad_after.is_zero() {
            let axis = geo_axis + shape.h_axis();
            if axis == stream.axis {
                continue;
            };
            let op = crate::model::PulseWrappingOp(Box::new(tract_core::ops::array::Slice::new(
                axis,
                padding.pad_before.clone(),
                padding.deconvoluted.clone() + &padding.pad_before,
            )));
            wire = target.wire_node(format!("{}.padding.{}", node.name, geo_axis), op, &wire)?;
        }
    }

    Ok(Some(wire))
}

fn overlap(pulse_axis: usize, op: &Deconv) -> usize {
    let geo_axis = pulse_axis - op.pool_spec.data_format.h_axis();
    (op.pool_spec.kernel_shape[geo_axis] - 1) * op.pool_spec.dilation(geo_axis)
}

impl PulsedOp for Deconv {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        let overlap = overlap(stream.axis, self);
        let geo_axis = stream.axis - self.pool_spec.data_format.h_axis();
        let stride = self.pool_spec.stride(geo_axis);
        let mut output_shape = tract_core::ops::cnn::deconv::output_shape(
            &self.pool_spec,
            &inputs[0].streaming_shape(),
            &self.adjustments,
        )?;
        stream.dim = output_shape[stream.axis].clone();
        let pulse_len = fact.shape[stream.axis].clone() * stride;
        output_shape[stream.axis] = pulse_len + overlap;
        let c_axis = self.pool_spec.data_format.shape(&output_shape)?.c_axis();
        output_shape[c_axis] = self.pool_spec.output_channels.into();
        fact.shape = output_shape.into();
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}

impl PulsedOp for DeconvDelay {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        stream.dim = self.deconv_output_dim.clone();
        let pulse_len = fact.shape[stream.axis].clone();
        fact.shape.set(stream.axis, pulse_len - self.overlap);
        stream.delay = self.delay;
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/cnn/mod.rs
================================================
use crate::internal::*;

mod conv;
mod deconv;
mod pools;

register_all_mod!(conv, deconv, pools);


================================================
FILE: pulse/src/ops/cnn/pools.rs
================================================
use crate::internal::*;
use tract_core::num_traits::Zero;
use tract_core::ops::cnn::{MaxPool, PaddingSpec, PoolSpec, SumPool};

register_all!(MaxPool: pulsify_max_pool, SumPool: pulsify_sum_pool);

fn pulsify_max_pool(
    op: &MaxPool,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    fn min_value<D: Datum + tract_core::num_traits::Bounded>() -> Tensor {
        tensor0(D::min_value())
    }
    let fact = target.outlet_fact(mapping[&node.inputs[0]])?;
    let min = dispatch_numbers!(min_value(fact.datum_type)());
    if let Some((wire, pool_spec)) =
        pulsify_pooled_input(&op.pool_spec, source, node, target, mapping, Some(min))?
    {
        Ok(Some(target.wire_node(&node.name, MaxPool { pool_spec, ..op.clone() }, &[wire])?))
    } else {
        Ok(None)
    }
}

fn pulsify_sum_pool(
    op: &SumPool,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    if let Some((wire, pool_spec)) =
        pulsify_pooled_input(&op.pool_spec, source, node, target, mapping, None)?
    {
        Ok(Some(target.wire_node(&node.name, SumPool { pool_spec, ..op.clone() }, &[wire])?))
    } else {
        Ok(None)
    }
}

impl PulsedOp for SumPool {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        pulsed_output_facts(&self.pool_spec, inputs, inputs[0].datum_type)
    }

    as_op!();
    pulsed_op_to_typed_op!();
}

impl PulsedOp for MaxPool {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut facts = pulsed_output_facts(&self.pool_spec, inputs, inputs[0].datum_type)?;
        if let Some(idt) = self.with_index_outputs {
            facts.push(facts[0].clone());
            facts[1].datum_type = idt;
        }
        Ok(facts)
    }

    as_op!();
    pulsed_op_to_typed_op!();
}

pub fn pulsed_output_facts(
    spec: &PoolSpec,
    inputs: &[&PulsedFact],
    output_dt: DatumType,
) -> TractResult<TVec<PulsedFact>> {
    let ishape = spec.data_format.shape(&inputs[0].shape)?;
    let computed = spec.padding.compute(
        ishape.hw_dims(),
        &spec.kernel_shape,
        &spec.dilations(),
        &spec.strides(),
    );
    let spatial_dims = computed.into_iter().map(|d| d.convoluted).collect::<TVec<TDim>>();
    let oshape = spec.data_format.from_n_c_hw(
        ishape.n().cloned().unwrap_or_else(|| 1.to_dim()),
        spec.output_channels.into(),
        spatial_dims,
    )?;
    let mut fact = inputs[0].clone();
    let stream = fact.stream.as_mut().unwrap();
    let input_shape = spec.data_format.shape(&*fact.shape)?;
    let geo_axis = stream.axis - input_shape.h_axis();
    let dilation = spec.dilations.as_ref().map(|d| d[geo_axis]).unwrap_or(1);
    let kernel_len = (spec.kernel_shape[geo_axis] - 1) * dilation;
    let stride = spec.strides.as_ref().and_then(|v| v.get(geo_axis).cloned()).unwrap_or(1);
    stream.delay /= stride;
    stream.dim = (stream.dim.clone() - kernel_len.to_dim()).div_ceil(stride as _);
    fact.shape = oshape.shape.into();
    fact.datum_type = output_dt;
    Ok(tvec!(fact))
}

pub fn pulsify_pooled_input(
    spec: &PoolSpec,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    padding_value: Option<Tensor>,
) -> TractResult<Option<(OutletId, PoolSpec)>> {
    let mut wire = mapping[&node.inputs[0]];
    let input_fact: PulsedFact = target.outlet_fact(wire)?.clone();
    let input_stream = input_fact.stream.as_ref().unwrap();
    let input_shape = spec.data_format.shape(input_fact.shape.clone())?;
    if Some(input_stream.axis) == input_shape.n_axis() {
        return Ok(None);
    }
    if input_stream.axis == input_shape.c_axis() {
        bail!("Can not pulsify cnn pooling ops along the input channel axis");
    }

    let geo_axis = input_stream.axis - input_shape.h_axis();
    let stride = spec.strides.as_ref().and_then(|v| v.get(geo_axis).cloned()).unwrap_or(1);
    let pulse = input_fact.pulse().unwrap();
    if !(pulse.to_owned() % (stride as i64)).is_zero() {
        bail!("Pulsification requires pulse ({}) to be a stride ({}) multiple", pulse, stride)
    }

    let dilation = spec.dilations.as_ref().map(|d| d[geo_axis]).unwrap_or(1);
    let kernel_len = (spec.kernel_shape[geo_axis] - 1) * dilation;
    let overlap = (kernel_len + 1).saturating_sub(stride);

    let computed_padding = spec.padding.compute_one(
        geo_axis,
        &input_stream.dim,
        spec.kernel_shape[geo_axis],
        spec.dilation(geo_axis),
        spec.stride(geo_axis),
    );

    let before = computed_padding.pad_before.to_usize()?;
    let early = input_stream.delay as isize + overlap as isize - before as isize;
    let mut extra_delay = if early < 0 { (-early) as usize } else { 0 };
    let delayed_input = input_stream.delay + overlap + extra_delay - before;
    let misalignment = delayed_input % stride;
    if misalignment > 0 {
        extra_delay += stride - misalignment;
    }

    if overlap > 0 || extra_delay > 0 {
        wire = target.wire_node(
            format!("{}.delay", node.name),
            tract_pulse_opl::ops::Delay::new_typed(
                &(&input_fact).into(),
                input_stream.axis,
                extra_delay,
                overlap,
            ),
            &[wire],
        )?[0];
    }

    let has_padding =
        !computed_padding.pad_before.is_zero() || !computed_padding.pad_after.is_zero();

    if has_padding {
        use tract_core::ops::array::PadMode;
        let value = if let Some(tensor) = padding_value {
            tensor.into_arc_tensor()
        } else {
            bail!("No padding value for streaming pool operation");
        };
        let op = tract_pulse_opl::ops::PulsePad {
            axis: input_stream.axis,
            before,
            after: computed_padding.pad_after,
            begin_input: input_stream.delay + extra_delay + overlap,
            end_input: input_stream.dim.clone()
                + input_stream.delay
                + extra_delay
                + overlap.to_dim(),
            mode: PadMode::Constant(value),
            overlap,
        };
        wire = target.wire_node(format!("{}.pulse-pad", node.name), op, &[wire])?[0];
    }

    if has_padding {
        let mut bef = tvec!();
        let mut aft = tvec!();
        for ix in 0..input_shape.hw_rank() {
            if ix == geo_axis {
                bef.push(0);
                aft.push(0);
            } else {
                let c = spec.padding.compute_one(
                    ix,
                    &input_shape.hw_dims()[ix],
                    spec.kernel_shape[ix],
                    spec.dilations()[ix],
                    spec.strides()[ix],
                );
                bef.push(c.pad_before.to_usize()?);
                aft.push(c.pad_after.to_usize()?);
            };
        }
        Ok(Some((
            wire,
            PoolSpec { padding: PaddingSpec::ExplicitOnnxPool(bef, aft, false), ..spec.clone() },
        )))
    } else {
        Ok(Some((wire, spec.clone())))
    }
}

#[cfg(test)]
mod test {
    use tract_pulse_opl::tract_core::ops::cnn::{Conv, PoolSpec};
    use tract_pulse_opl::tract_nnef::internal::*;

    use crate::model::{PulsedModel, PulsedModelExt};

    #[test]
    fn left_padded_conv_wo_delay() -> TractResult<()> {
        let mut model = TypedModel::default();
        let stream_sym = model.symbols.sym("S");
        let stream_dim = stream_sym.to_dim();
        let source = model.add_source("source", f32::fact(dims!(1, stream_dim)))?;
        let kernel = model.add_const("kernel", rctensor3(&[[[1f32, 2f32]]]))?;
        let bias = model.add_const("bias", rctensor0(0f32))?;
        let conv = model.wire_node(
            "conv",
            Conv {
                pool_spec: PoolSpec {
                    data_format: tract_core::ops::nn::DataFormat::CHW,
                    dilations: None,
                    strides: None,
                    kernel_shape: tvec![2],
                    padding: tract_core::ops::cnn::PaddingSpec::ExplicitOnnxPool(
                        tvec![1],
                        tvec![0],
                        false,
                    ),
                    input_channels: 1,
                    output_channels: 1,
                },
                kernel_fmt: tract_core::ops::cnn::KernelFormat::OIHW,
                group: 1,
                q_params: None,
            },
            &[source, kernel, bias],
        )?;
        model.select_output_outlets(&conv)?;
        let pulsed = PulsedModel::new(&model, stream_sym, &1.to_dim())?;
        let output_fact = pulsed.output_fact(0)?;
        assert_eq!(output_fact.stream.as_ref().unwrap().delay, 0);
        Ok(())
    }
}


================================================
FILE: pulse/src/ops/delay.rs
================================================
use crate::internal::*;
use tract_pulse_opl::ops::Delay;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_delay)
}

fn ser_delay(ast: &mut IntoAst, node: &TypedNode, op: &Delay) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_pulse_delay",
        &[wire],
        &[
            ("axis", numeric(op.axis)),
            ("delay", numeric(op.delay)),
            ("overlap", numeric(op.overlap)),
        ],
    )))
}

impl PulsedOp for Delay {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        ensure!(inputs.len() == 1);
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        fact.shape.set(self.axis, fact.shape[self.axis].clone() + self.overlap);
        stream.delay += self.delay + self.overlap;
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}

#[cfg(test)]
mod test {
    use crate::fact::StreamInfo;

    use super::*;

    fn test_pulse_delay_over(pulse: usize, delay: usize, overlap: usize) {
        let mut model = PulsedModel::default();
        let stream_dim = model.symbols.sym("S").to_dim();
        let fact1 = PulsedFact {
            datum_type: u8::datum_type(),
            shape: (&[pulse]).into(),
            stream: Some(StreamInfo { axis: 0, dim: stream_dim, delay: 0 }),
        };
        let source = model.add_source("source", fact1.clone()).unwrap();
        model
            .wire_node(
                "delay",
                Delay::new_typed(&(&fact1).into(), fact1.stream.unwrap().axis, delay, overlap),
                &[source],
            )
            .unwrap();
        model.auto_outputs().unwrap();

        let plan = model.into_runnable().unwrap();
        let mut state = plan.spawn().unwrap();

        for i in 0..5 {
            let input: Vec<u8> = (pulse * i..(pulse * (i + 1))).map(|a| a as u8).collect();
            let expect: Vec<u8> = (pulse * i..(pulse * (i + 1) + overlap))
                .map(|i| i.saturating_sub(delay + overlap) as u8)
                .collect();
            let output = state.run(tvec!(tensor1(&input).into())).unwrap();
            let skip = (delay + overlap).saturating_sub(i * pulse).min(pulse + overlap);
            assert_eq!(
                &output[0].try_as_plain().unwrap().as_slice::<u8>().unwrap()[skip..],
                &expect[skip..]
            );
        }
    }

    #[test]
    fn sub_pulse() {
        test_pulse_delay_over(4, 1, 0);
    }

    #[test]
    fn supra_pulse() {
        test_pulse_delay_over(4, 5, 0);
    }

    #[test]
    fn sub_pulse_context() {
        test_pulse_delay_over(4, 0, 2);
    }

    #[test]
    fn supra_pulse_context() {
        test_pulse_delay_over(4, 0, 6);
    }

    #[test]
    fn test_two_delays() {
        let pulse = 4usize;
        let mut model = PulsedModel::default();
        let stream_dim = model.symbols.sym("S").to_dim();
        let fact_0 = PulsedFact {
            datum_type: u8::datum_type(),
            shape: (&[pulse]).into(),
            stream: Some(StreamInfo { axis: 0, dim: stream_dim, delay: 0 }),
        };
        let stream = fact_0.stream.as_ref().unwrap();
        let source = model.add_source("source", fact_0.clone()).unwrap();
        let delay_1 = model
            .wire_node("delay-1", Delay::new_typed(&(&fact_0).into(), stream.axis, 2, 0), &[source])
            .unwrap()[0];
        let fact_1 = model.outlet_fact(delay_1).unwrap().clone();
        let delay_2 = model
            .wire_node(
                "delay-1",
                Delay::new_typed(&(&fact_1).into(), stream.axis, 2, 0),
                &[delay_1],
            )
            .unwrap();
        model.select_output_outlets(&delay_2).unwrap();

        let plan = model.into_runnable().unwrap();
        let mut state = plan.spawn().unwrap();

        for i in 0..5 {
            let input: Vec<u8> = (pulse * i..(pulse * (i + 1))).map(|a| a as u8).collect();
            let expect: Vec<u8> =
                (pulse * i..(pulse * (i + 1))).map(|i| i.saturating_sub(4) as u8).collect();
            let skip = 4usize.saturating_sub(i * pulse).min(pulse);
            let output = state.run(tvec!(tensor1(&input).into())).unwrap();
            assert_eq!(
                &output[0].try_as_plain().unwrap().as_slice::<u8>().unwrap()[skip..],
                &expect[skip..]
            );
        }
    }
}


================================================
FILE: pulse/src/ops/downsample.rs
================================================
use crate::internal::*;
use tract_core::ops::Downsample;
use tract_pulse_opl::ops::PulsedAxisSlice;
use tract_pulse_opl::tract_nnef::tract_num_traits::Zero;

register_all!(Downsample: pulsify);

fn pulsify(
    op: &Downsample,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let input = mapping[&node.inputs[0]];
    let fact = target.outlet_fact(input)?.clone();
    if let Some(stream) = fact.stream.as_ref() {
        if stream.axis != op.axis {
            return Ok(None);
        }
        let stride = if op.stride > 0 {
            op.stride as usize
        } else {
            bail!("Negative strides are not causal, can not pulsify.")
        };
        let pulse = fact.pulse().unwrap();
        if !(pulse.clone() % stride).is_zero() {
            bail!("Pulsification requires pulse ({}) to be a stride ({}) multiple", pulse, stride)
        }
        let mut wire = tvec!(input);
        let first_offset = stream.delay + op.modulo;
        let new_op = Downsample { modulo: first_offset % stride, axis: op.axis, stride: op.stride };
        wire = target.wire_node(format!("{}.downsample", node.name), new_op, &wire)?;
        wire = target.wire_node(
            &node.name,
            PulsedAxisSlice {
                axis: stream.axis,
                skip: first_offset / stride,
                take: (stream.dim.to_owned() - op.modulo).divceil(stride),
            },
            &wire,
        )?;
        target.rename_node(wire[0].node, &node.name)?;
        Ok(Some(wire))
    } else {
        Ok(None)
    }
}

impl PulsedOp for Downsample {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        fact.shape.set(self.axis, fact.shape[self.axis].clone() / self.stride as usize);
        stream.dim = (stream.dim.clone() + stream.delay).divceil(self.stride as _);
        stream.delay = 0;
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/dummy.rs
================================================
use crate::internal::*;
use tract_core::ops::dummy::Dummy;

impl PulsedOp for Dummy {
    fn pulsed_output_facts(&self, _inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        Ok(tvec!())
    }
    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/fft.rs
================================================
use crate::fact::StreamInfo;
use crate::internal::*;
use tract_core::ops::fft::Stft;
use tract_pulse_opl::ops::Delay;

register_all!(Stft: pulsify);

fn pulsify(
    op: &Stft,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    _symbol: &Symbol,
    _pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let mut wire = mapping[&node.inputs[0]];
    let input_fact = target.outlet_fact(wire)?.clone();

    let stream = match &input_fact.stream {
        Some(s) => s.clone(),
        None => return Ok(None),
    };

    if stream.axis != op.axis {
        return Ok(None);
    }

    let overlap = op.frame - op.stride;

    // Compute extra delay so that (stream.delay + overlap + extra_delay) % stride == 0
    let delayed = stream.delay + overlap;
    let misalignment = delayed % op.stride;
    let extra_delay = if misalignment > 0 { op.stride - misalignment } else { 0 };

    if overlap > 0 || extra_delay > 0 {
        wire = target.wire_node(
            format!("{}.delay", node.name),
            Delay::new_typed(&(&input_fact).into(), stream.axis, extra_delay, overlap),
            &[wire],
        )?[0];
    }

    Ok(Some(target.wire_node(&node.name, op.clone(), &[wire])?))
}

impl PulsedOp for Stft {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let input = inputs[0];
        let stream = input.stream.as_ref().unwrap();

        // pulse after delay = original_pulse + overlap
        let pulse = &input.shape[stream.axis];
        let out_pulse = (pulse.clone() - self.frame) / self.stride + 1;

        let mut shape = input.shape.to_tvec();
        shape[self.axis] = out_pulse;
        shape.insert(self.axis + 1, self.frame.to_dim());

        Ok(tvec!(PulsedFact {
            datum_type: input.datum_type,
            shape: shape.into(),
            stream: Some(StreamInfo {
                axis: self.axis,
                dim: (stream.dim.clone() - self.frame) / self.stride + 1,
                delay: stream.delay / self.stride,
            }),
        }))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/identity.rs
================================================


impl PulsedOp for Identity {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/mask.rs
================================================


================================================
FILE: pulse/src/ops/mod.rs
================================================
#![allow(clippy::collapsible_if)]
use std::any::Any;
use std::sync::RwLock;

use crate::internal::*;
use lazy_static::lazy_static;
use tract_pulse_opl::ops::Delay;

pub mod array;
pub mod cnn;
pub mod delay;
pub mod downsample;
pub mod dummy;
pub mod fft;
pub mod mask;
pub mod scan;
pub mod slice;
pub mod source;

pub(crate) fn sync_inputs(
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
) -> TractResult<TVec<OutletId>> {
    let mut max_delay = 0;
    for input in &node.inputs {
        let fact = target.outlet_fact(mapping[input])?;
        if let Some(stream) = &fact.stream {
            max_delay = max_delay.max(stream.delay);
        }
    }
    let mut inputs = tvec!();
    for input in &node.inputs {
        let mut input = mapping[input];
        let fact = target.outlet_fact(input)?.clone();
        if let Some(stream) = &fact.stream {
            if stream.delay < max_delay {
                let add_delay = max_delay - stream.delay;
                let delay_axis = stream.axis;
                input = target.wire_node(
                    format!("{}.Delay", &*node.name),
                    Delay::new_typed(&fact.into(), delay_axis, add_delay, 0),
                    &[input],
                )?[0];
            }
        }
        inputs.push(input);
    }
    Ok(inputs)
}

register_all_mod!(array, cnn, downsample, fft, scan, source);

type PulsifierFn = fn(
    &TypedModel,
    &TypedNode,
    &mut PulsedModel,
    &HashMap<OutletId, OutletId>,
    &Symbol,
    &TDim,
) -> TractResult<Option<TVec<OutletId>>>;

pub struct OpPulsifier {
    pub type_id: std::any::TypeId,
    pub name: &'static str,
    pub func: PulsifierFn,
}

impl OpPulsifier {
    pub fn inventory() -> Arc<RwLock<HashMap<TypeId, OpPulsifier>>> {
        lazy_static! {
            static ref INVENTORY: Arc<RwLock<HashMap<TypeId, OpPulsifier>>> = {
                let mut it = HashMap::default();
                register_all(&mut it);
                Arc::new(RwLock::new(it))
            };
        };
        (*INVENTORY).clone()
    }

    pub fn register<T: Any>(func: PulsifierFn) -> TractResult<()> {
        let inv = Self::inventory();
        let mut inv = inv.write().map_err(|e| anyhow!("Fail to lock inventory {e}"))?;
        inv.insert(
            std::any::TypeId::of::<T>(),
            OpPulsifier {
                type_id: std::any::TypeId::of::<T>(),
                name: std::any::type_name::<T>(),
                func,
            },
        );
        Ok(())
    }

    pub fn pulsify(
        source: &TypedModel,
        node: &TypedNode,
        target: &mut PulsedModel,
        mapping: &HashMap<OutletId, OutletId>,
        symbol: &Symbol,
        pulse: &TDim,
    ) -> TractResult<Option<TVec<OutletId>>> {
        let inv = Self::inventory();
        let inv = inv.read().map_err(|e| anyhow!("Fail to lock inventory {e}"))?;
        if let Some(pulsifier) = inv.get(&(*node.op).type_id()) {
            if let Some(pulsified) = (pulsifier.func)(source, node, target, mapping, symbol, pulse)?
            {
                return Ok(Some(pulsified));
            }
        }
        Ok(None)
    }
}

pub trait PulsedOp:
    Op + fmt::Debug + tract_core::dyn_clone::DynClone + Send + Sync + 'static + Downcast + EvalOp
{
    /// Reinterpret the PulsedOp as an Op.
    fn as_op(&self) -> &dyn Op;

    /// Reinterpret the PulsedOp as an Op, mutably.
    fn as_op_mut(&mut self) -> &mut dyn Op;

    /// Reinterpret the PulsedOp as an TypedOp.
    fn to_typed(&self) -> Box<dyn TypedOp>;

    /// Deduce output facts from input facts.
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>>;
}

tract_core::dyn_clone::clone_trait_object!(PulsedOp);

impl<O: PulsedOp> From<O> for Box<dyn PulsedOp> {
    fn from(it: O) -> Box<dyn PulsedOp> {
        Box::new(it)
    }
}

impl AsMut<dyn Op> for Box<dyn PulsedOp> {
    fn as_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }
}

impl AsRef<dyn Op> for dyn PulsedOp {
    fn as_ref(&self) -> &dyn Op {
        self.as_op()
    }
}

impl AsRef<dyn Op> for Box<dyn PulsedOp> {
    fn as_ref(&self) -> &dyn Op {
        self.as_op()
    }
}

impl AsMut<dyn Op> for dyn PulsedOp {
    fn as_mut(&mut self) -> &mut dyn Op {
        self.as_op_mut()
    }
}

impl std::fmt::Display for Box<dyn PulsedOp> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}", self.name())
    }
}

impl<'a> From<&'a Box<dyn PulsedOp>> for Box<dyn TypedOp> {
    fn from(op: &'a Box<dyn PulsedOp>) -> Box<dyn TypedOp> {
        op.to_typed()
    }
}


================================================
FILE: pulse/src/ops/scan.rs
================================================
use crate::fact::StreamInfo;
use crate::internal::*;
use tract_core::ops::scan::{InputMapping, Scan};

register_all!(Scan: pulsify);

fn pulsify(
    op: &Scan,
    source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    mapping: &HashMap<OutletId, OutletId>,
    symbol: &Symbol,
    pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    /*

        dbg!(source.node_axes_mapping(node.id)?.to_string());
        for input_id in &node.inputs {
            dbg!(target.outlet_fact(mapping[input_id]))?;
        }
        for input_id in 0..node.inputs.len() {
            let input = mapping[&node.inputs[input_id]];
            let input_fact = target.outlet_fact(input)?;
            if let Some(info) = op.input_mapping[input_id].as_scan() {
                if info.chunk < 0 {
                    bail!("Can not pulsify a backward scan.")
                }
                if input_fact.stream.as_ref().context("scan on non-streamed input")?.axis != info.axis {
                    bail!("Scan pulsification limited to scanning axis");
                }
            }
        }
    */

    let pulse_inputs = node.inputs.iter().map(|i| mapping[i]).collect::<TVec<_>>();

    let axes_mapping = source.node_axes_mapping(node.id)?;
    let first_scan_slot = op.input_mapping.iter().position(InputMapping::is_scan).unwrap();
    let first_scan_axis =
        target.outlet_fact(pulse_inputs[first_scan_slot])?.stream.as_ref().unwrap().axis;
    let scan_axis = axes_mapping.axis((InOut::In(first_scan_slot), first_scan_axis))?;
    if first_scan_axis == op.input_mapping[first_scan_slot].as_scan().unwrap().axis {
        let mut op = op.clone();
        op.skip = target.outlet_fact(pulse_inputs[first_scan_slot])?.stream.as_ref().unwrap().delay;
        for om in op.output_mapping.iter_mut() {
            if om.scan.is_some() {
                om.full_dim_hint = None;
            }
        }
        Ok(Some(target.wire_node(&*node.name, op, &pulse_inputs)?))
    } else if scan_axis.outputs.iter().all(|x| x.len() == 1) {
        let body = PulsedModel::new(&op.body, symbol.clone(), pulse)?.into_typed()?;
        let mut new_op = Scan::new(body, op.input_mapping.clone(), op.output_mapping.clone(), 0)?;
        new_op.reset_every_turn = true;
        target.wire_node(&node.name, new_op, &pulse_inputs).map(Some)
    } else {
        todo!("Unsupported pulsification")
    }
}

impl PulsedOp for Scan {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let outer_output_count = self
            .output_mapping
            .iter()
            .map(|om| om.scan.map(|s| s.0).unwrap_or(0).max(om.last_value_slot.unwrap_or(0)))
            .max()
            .context("no output?")?
            + 1;

        let first_scan_slot = self.input_mapping.iter().position(InputMapping::is_scan).unwrap();
        let first_pulse_axis = inputs[first_scan_slot].stream.as_ref().unwrap().axis;
        let first_scan_axis = self.input_mapping[first_scan_slot].as_scan().as_ref().unwrap().axis;
        let tracking = self.body.axes_mapping()?;
        let pulse_axis = tracking.axis((InOut::In(first_scan_slot), first_pulse_axis))?;
        let mut facts = tvec!();
        for output_slot in 0..outer_output_count {
            let (output_body_ix, output_mapping) = self
                .output_mapping
                .iter()
                .enumerate()
                .find(|(_ix, om)| om.scan.map(|s| s.0) == Some(output_slot))
                .context("Scan pulse only supports full outputs")?;
            let output_body_fact = self.body.output_fact(output_body_ix)?;
            let fact = if first_scan_axis == first_pulse_axis {
                let shape: ShapeFact = output_body_fact
                    .shape
                    .iter()
                    .enumerate()
                    .map(|(axis, d)| {
                        if axis == output_mapping.scan.unwrap().1.axis {
                            inputs[first_scan_slot].pulse().unwrap().to_dim()
                        } else {
                            d.clone()
                        }
                    })
                    .collect();
                PulsedFact {
                    datum_type: output_body_fact.datum_type,
                    shape,
                    stream: Some(StreamInfo {
                        axis: output_mapping.scan.unwrap().1.axis,
                        dim: inputs[first_scan_slot].stream.as_ref().unwrap().dim.clone(),
                        delay: inputs[first_scan_slot].stream.as_ref().unwrap().delay,
                    }),
                }
            } else {
                let pulse_axis = pulse_axis.outputs[output_body_ix][0];
                let mut shape = output_body_fact.shape.clone();
                if let Some(info) = output_mapping.scan {
                    shape.set(info.0, inputs[first_scan_slot].shape[first_scan_axis].clone());
                }
                PulsedFact {
                    datum_type: output_body_fact.datum_type,
                    shape,
                    stream: Some(StreamInfo {
                        axis: pulse_axis,
                        dim: inputs[first_scan_slot].stream.as_ref().unwrap().dim.clone(),
                        delay: inputs[first_scan_slot].stream.as_ref().unwrap().delay,
                    }),
                }
            };
            facts.push(fact);
        }
        Ok(facts)
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/slice.rs
================================================
use crate::internal::*;
use tract_pulse_opl::ops::PulsedAxisSlice;

impl PulsedOp for PulsedAxisSlice {
    fn pulsed_output_facts(&self, inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        let mut fact = inputs[0].clone();
        let stream = fact.stream.as_mut().unwrap();
        stream.delay += self.skip;
        stream.dim = self.take.clone();
        Ok(tvec!(fact))
    }

    as_op!();
    pulsed_op_to_typed_op!();
}


================================================
FILE: pulse/src/ops/source.rs
================================================
use crate::internal::*;
use tract_core::ops::source::*;

register_all!(TypedSource: pulsify);

pub fn pulsify(
    _op: &TypedSource,
    _source: &TypedModel,
    node: &TypedNode,
    target: &mut PulsedModel,
    _mapping: &HashMap<OutletId, OutletId>,
    stream_symbol: &Symbol,
    pulse: &TDim,
) -> TractResult<Option<TVec<OutletId>>> {
    let pulsed_fact =
        PulsedFact::from_tensor_fact_pulse(&node.outputs[0].fact, stream_symbol, pulse)?;
    let id = target.add_source(node.name.clone(), pulsed_fact)?;
    Ok(Some(tvec!(id)))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct PulsedSource(pub PulsedFact);

impl Op for PulsedSource {
    fn name(&self) -> StaticName {
        "PulsedSource".into()
    }
    not_a_typed_op!();
}

impl EvalOp for PulsedSource {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _session: &TurnState, node_id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(SourceState(node_id))))
    }
}

impl PulsedOp for PulsedSource {
    fn pulsed_output_facts(&self, _inputs: &[&PulsedFact]) -> TractResult<TVec<PulsedFact>> {
        Ok(tvec!(self.0.clone()))
    }

    fn to_typed(&self) -> Box<dyn TypedOp> {
        Box::new(TypedSource::new(self.0.datum_type.fact(self.0.shape.clone())))
    }

    as_op!();
}


================================================
FILE: pulse-opl/Cargo.toml
================================================
[package]
name = "tract-pulse-opl"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks" ]
categories = [ "science" ]
autobenches = false
edition = "2024"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
downcast-rs.workspace = true
dyn-eq.workspace = true
lazy_static.workspace = true
tract-nnef.workspace = true

[features]
complex = [ "tract-nnef/complex" ]


================================================
FILE: pulse-opl/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: pulse-opl/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: pulse-opl/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: pulse-opl/src/concat.rs
================================================
use std::ops::Range;
use tract_nnef::internal::*;
use tract_nnef::tract_core::trivial_op_state_freeze;

/// Concat with pulse along concat axis
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct PulsedSameAxisConcat {
    pub axis: usize,
    pub before_len: usize,
    pub after_len: usize,
    pub input_delay: usize,
    pub input_len: TDim,
}

impl Op for PulsedSameAxisConcat {
    fn name(&self) -> StaticName {
        "PulsedSameAxisConcat".into()
    }

    op_as_typed_op!();
}

impl EvalOp for PulsedSameAxisConcat {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::<PulsedSameAxisConcatState>::default()))
    }
}

impl TypedOp for PulsedSameAxisConcat {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }
}

#[derive(Clone, Debug, Default)]
pub struct PulsedSameAxisConcatState {
    current_pos: usize,
}
trivial_op_state_freeze!(PulsedSameAxisConcatState);

impl OpState for PulsedSameAxisConcatState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let op = op
            .downcast_ref::<PulsedSameAxisConcat>()
            .ok_or_else(|| format_err!("Wrong Op type"))?;
        let (pre, input, post) = args_3!(inputs);
        let mut data = input.into_tensor();
        let pulse = data.shape()[op.axis];
        let current_pos = self.current_pos;
        self.current_pos += pulse;

        let pre_length = pre.shape()[op.axis];
        let pre_offset = op.input_delay - pre_length;
        overwrite_part_of_pulse(op.axis, &mut data, current_pos, &pre, pre_offset)?;
        if let Ok(l) = op.input_len.eval(&session.resolved_symbols).to_usize() {
            let post_offset = op.input_delay + l;
            overwrite_part_of_pulse(op.axis, &mut data, current_pos, &post, post_offset)?;
        }

        Ok(tvec!(data.into_tvalue()))
    }
}

pub fn overwrite_part_of_pulse(
    axis: usize,
    pulse_data: &mut Tensor,
    current_pos: usize,
    const_data: &Tensor,
    const_offset: usize,
) -> TractResult<()> {
    let pulse = pulse_data.shape()[axis];
    let const_length = const_data.shape()[axis];
    let const_range = const_offset..const_offset + const_length;
    let pulse_range = current_pos..current_pos + pulse;

    match range_in_range(&pulse_range, &const_range) {
        RangeInRange::Before(_) | RangeInRange::After(_) => (),
        RangeInRange::Begin(offset) => {
            // ----[<----->HHH]HH----
            pulse_data.assign_slice(offset..pulse, const_data, 0..pulse - offset, axis)?;
        }
        RangeInRange::Contain(offset) => {
            // ----[<----->HHHHHHH-]---
            pulse_data.assign_slice(
                offset..offset + const_length,
                const_data,
                0..const_length,
                axis,
            )?;
        }
        RangeInRange::Inside(offset) => {
            // ----------<H>[HH]HH----
            pulse_data.assign_slice(0..pulse, const_data, offset..offset + pulse, axis)?;
        }
        RangeInRange::End(offset) => {
            // --------<HHH>[HHHH-]---
            pulse_data.assign_slice(
                0..const_length - offset,
                const_data,
                offset..const_length,
                axis,
            )?;
        }
    }
    Ok(())
}

#[derive(Copy, Clone, Debug)]
#[allow(dead_code)]
pub enum RangeInRange {
    /// ----[--]<-->HHHH----
    Before(usize),
    /// ----[<----->HHH]HH----
    Begin(usize),
    /// ----[<----->HHHHHHH-]---
    Contain(usize),
    /// ----------<H>[HH]HH----
    Inside(usize),
    /// --------<HHH>[HHHH-]---
    End(usize),
    /// --------HHHHHHH<->[--]---
    After(usize),
}

pub fn range_in_range(needle: &Range<usize>, haystack: &Range<usize>) -> RangeInRange {
    if needle.end <= haystack.start {
        RangeInRange::Before(haystack.start - needle.end)
    } else if needle.start < haystack.start {
        if needle.end < haystack.end {
            RangeInRange::Begin(haystack.start - needle.start)
        } else {
            RangeInRange::Contain(haystack.start - needle.start)
        }
    } else if needle.start >= haystack.end {
        RangeInRange::After(needle.start - haystack.end)
    } else if needle.end > haystack.end {
        RangeInRange::End(needle.start - haystack.start)
    } else {
        RangeInRange::Inside(needle.start - haystack.start)
    }
}


================================================
FILE: pulse-opl/src/deconv_delay.rs
================================================
use std::ops::AddAssign;

use tract_ndarray::Axis;
use tract_nnef::internal::*;
use tract_nnef::tract_core::ops::OpStateFreeze;
use tract_num_traits::Zero;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DeconvDelay {
    pub axis: usize,
    pub overlap: usize,
    pub delay: usize,
    pub stride: usize,
    pub pulse: TDim,
    pub deconv_input_dim: TDim,
    pub deconv_output_dim: TDim,
}

impl Op for DeconvDelay {
    fn name(&self) -> StaticName {
        "DeconvDelay".into()
    }

    op_as_typed_op!();
}

impl EvalOp for DeconvDelay {
    fn is_stateless(&self) -> bool {
        false
    }

    fn eval(&self, _inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        unreachable!()
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(DeconvDelayState { valid_inputed: -(self.delay as isize), buffer: None })))
    }
}

impl TypedOp for DeconvDelay {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].clone();
        let len = fact.shape[self.axis].clone();
        fact.shape.set(self.axis, len - self.overlap);
        Ok(tvec!(fact))
    }

    as_op!();
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct DeconvDelayState {
    valid_inputed: isize,
    buffer: Option<Tensor>,
}

impl OpState for DeconvDelayState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let op = op.downcast_ref::<DeconvDelay>().context("Wrong op")?;
        if self.buffer.is_none() {
            let mut buffer_size: TVec<usize> = inputs[0].shape().into();
            buffer_size[op.axis] = op.overlap; //+ (op.stride - 1) * (op.pulse - 1);
            self.buffer = Some(Tensor::zero_dt(inputs[0].datum_type(), &buffer_size)?);
        }
        let mut input = inputs[0].clone().into_tensor();
        dispatch_numbers!(Self::eval_t(input.datum_type())(self, session, op, &mut input))?;
        let output = input.slice(op.axis, 0, input.shape()[op.axis] - op.overlap)?;
        Ok(tvec!(output.into_tvalue()))
    }
}

impl DeconvDelayState {
    fn eval_t<T: Datum + AddAssign + Zero>(
        &mut self,
        session: &TurnState,
        op: &DeconvDelay,
        input: &mut Tensor,
    ) -> TractResult<()> {
        let buffer = self.buffer.as_mut().unwrap();
        let mut buffer_plain = buffer.try_as_plain_mut()?;
        let mut buffer = buffer_plain.to_array_view_mut::<T>()?;
        let mut input_plain = input.try_as_plain_mut()?;
        let mut input = input_plain.to_array_view_mut::<T>()?;
        let input_pulse = input.shape()[op.axis];
        let output_pulse = input_pulse - op.overlap;
        self.valid_inputed += output_pulse as isize;
        if let Ok(input_dim) = op.deconv_input_dim.eval(&session.resolved_symbols).to_isize() {
            if self.valid_inputed > input_dim {
                let to_be_zeroed = ((self.valid_inputed - input_dim) as usize).min(input_pulse);
                let mut zeroed =
                    input.slice_axis_mut(Axis(op.axis), (input_pulse - to_be_zeroed..).into());
                zeroed.fill(T::zero());
            }
        }
        {
            let mut input_view = input.slice_axis_mut(Axis(op.axis), (0..op.overlap).into());
            input_view += &buffer;
        }
        buffer.assign(&input.slice_axis(Axis(op.axis), (output_pulse..).into()));

        Ok(())
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
struct FrozenDeconvDelayState {
    valid_inputed: isize,
    buffer: Option<Arc<Tensor>>,
}

impl OpStateFreeze for DeconvDelayState {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(FrozenDeconvDelayState {
            valid_inputed: self.valid_inputed,
            buffer: self.buffer.as_ref().map(|t| t.clone().into_arc_tensor()),
        })
    }
}

impl FrozenOpState for FrozenDeconvDelayState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(DeconvDelayState {
            valid_inputed: self.valid_inputed,
            buffer: self.buffer.as_ref().map(|t| t.clone().into_tensor()),
        })
    }
}


================================================
FILE: pulse-opl/src/delay.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_core::ops::OpStateFreeze;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_pulse_delay",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Integer.named("delay"),
            TypeName::Integer.named("overlap"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_delay,
    );
}

fn de_delay(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let axis = invocation.named_arg_as::<i64>(builder, "axis")? as usize;
    let delay = invocation.named_arg_as::<i64>(builder, "delay")? as usize;
    let overlap = invocation.named_arg_as::<i64>(builder, "overlap")? as usize;
    let input_fact = builder.model.outlet_fact(wire)?;
    let op = Delay::new_typed(input_fact, axis, delay, overlap);
    builder.wire(op, &[wire])
}

#[derive(Debug, Clone)]
pub struct DelayState {
    pub buffer: Option<Tensor>,
}

impl DelayState {
    /// Apply delay op on input and store the result in the output tensor
    /// This method doesn't use allocation.
    ///
    /// # Safety
    ///
    /// Input and Ouput tensors shape must be compatible with this operator, otherwise it could lead
    /// to an undefined behaviour.
    pub unsafe fn apply_delay_unchecked(
        &mut self,
        op: &Delay,
        input: &Tensor,
        output: &mut Tensor,
    ) {
        unsafe {
            let buffered = op.delay + op.overlap;
            let input_pulse = input.shape()[op.axis];
            let output_pulse = input_pulse + op.overlap;
            let buffer = self.buffer.as_mut().unwrap();

            let from_input = input_pulse.saturating_sub(op.delay);
            let from_buffer = output_pulse.saturating_sub(from_input);
            output.assign_slice_unchecked(..from_buffer, buffer, ..from_buffer, op.axis);
            output.assign_slice_unchecked(from_buffer.., input, ..from_input, op.axis);

            // maintain buffer
            if buffered < input_pulse {
                buffer.assign_slice_unchecked(.., input, (input_pulse - buffered).., op.axis);
            } else {
                let stride =
                    buffer.strides()[op.axis] as usize * input.datum_type().size_of() * input_pulse;
                std::slice::from_raw_parts_mut(
                    buffer.as_ptr_mut_unchecked::<u8>(),
                    buffer.len() * input.datum_type().size_of(),
                )
                .rotate_left(stride);
                buffer.assign_slice_unchecked((buffered - input_pulse).., input, .., op.axis);
            }
        }
    }
}

impl OpState for DelayState {
    fn eval(
        &mut self,
        _state: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let op = op.downcast_ref::<Delay>().ok_or_else(|| format_err!("Wrong Op type"))?;
        let buffered = op.delay + op.overlap;
        let input_pulse = input.shape()[op.axis];
        let output_pulse = input_pulse + op.overlap;
        let mut output_shape: TVec<usize> = input.shape().into();
        output_shape[op.axis] = output_pulse;
        // build output
        unsafe {
            if self.buffer.is_none() {
                let mut shape = input.shape().to_owned();
                shape[op.axis] = buffered;
                self.buffer = Some(Tensor::uninitialized_dt(input.datum_type(), &shape)?);
            };
            let mut output = Tensor::uninitialized_dt(input.datum_type(), &output_shape)?;
            self.apply_delay_unchecked(op, &input, &mut output);
            Ok(tvec!(output.into()))
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Delay {
    pub buffer_shape: TVec<TDim>,
    pub axis: usize,
    pub delay: usize,
    pub overlap: usize,
}

impl Delay {
    pub fn new_typed(input_fact: &TypedFact, axis: usize, delay: usize, overlap: usize) -> Delay {
        let mut buffer_shape: TVec<TDim> = input_fact.shape.to_tvec();
        buffer_shape[axis] = (delay + overlap).to_dim();
        Delay { buffer_shape, axis, delay, overlap }
    }
}

impl Op for Delay {
    fn name(&self) -> StaticName {
        "Delay".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![
            format!("axis: {} delay: {} overlap: {}", self.axis, self.delay, self.overlap),
            format!("buffer: {:?}", self.buffer_shape),
        ])
    }

    op_as_typed_op!();
}

impl EvalOp for Delay {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(DelayState { buffer: None })))
    }
}

impl TypedOp for Delay {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let mut fact = inputs[0].clone();
        fact.shape.set(self.axis, fact.shape[self.axis].clone() + self.overlap.to_dim());
        Ok(tvec!(fact))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        Ok(tvec!((Cost::Buffer(inputs[0].datum_type), self.buffer_shape.iter().product())))
    }

    fn suggested_axis_changes(&self) -> TractResult<TVec<(InOut, AxisOp)>> {
        if self.axis != 0 {
            Ok(tvec!((InOut::In(0), AxisOp::Move(self.axis, 0))))
        } else {
            Ok(tvec!())
        }
    }

    fn change_axes(
        &self,
        model: &TypedModel,
        node: &TypedNode,
        _io: InOut,
        change: &AxisOp,
    ) -> TractResult<Option<AxisChangeConsequence>> {
        if let Some(axis) = change.transform_axis(self.axis) {
            if axis != self.axis {
                Ok(Some(AxisChangeConsequence::new(
                    model,
                    node,
                    Some(Box::new(Self { axis, ..self.clone() }) as _),
                    change,
                )))
            } else {
                Ok(Some(AxisChangeConsequence::new(model, node, None, change)))
            }
        } else {
            Ok(None)
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
struct FrozenDelayState {
    buffer: Option<Arc<Tensor>>,
}

impl OpStateFreeze for DelayState {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(FrozenDelayState {
            buffer: self.buffer.as_ref().map(|t| t.clone().into_arc_tensor()),
        })
    }
}

impl FrozenOpState for FrozenDelayState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(DelayState { buffer: self.buffer.as_ref().map(|t| t.clone().into_tensor()) })
    }
}


================================================
FILE: pulse-opl/src/lib.rs
================================================
#![allow(clippy::collapsible_if)]
use tract_nnef::internal::*;

pub mod concat;
mod deconv_delay;
mod delay;
mod mask;
mod pad;
mod slice;

pub use tract_nnef;
pub use tract_nnef::tract_core;

pub mod prelude {
    pub use crate::WithPulse;
    pub use tract_nnef::tract_core::internal::DimLike;
}

pub mod ops {
    pub use super::deconv_delay::DeconvDelay;
    pub use super::delay::{Delay, DelayState};
    pub use super::mask::PulseMask;
    pub use super::pad::PulsePad;
    pub use super::slice::PulsedAxisSlice;
}

pub trait WithPulse {
    fn enable_pulse(&mut self);
    fn with_pulse(self) -> Self;
}

impl WithPulse for tract_nnef::framework::Nnef {
    fn enable_pulse(&mut self) {
        self.enable_tract_core();
        self.registries.push(tract_nnef_registry());
    }
    fn with_pulse(mut self) -> Self {
        self.enable_pulse();
        self
    }
}

pub fn tract_nnef_registry() -> Registry {
    let mut reg = Registry::new("tract_pulse")
        .with_doc("Extension `tract_resource` extends NNEF with operators")
        .with_doc("for pulsified networks.")
        .with_doc("")
        .with_doc("Add `extension tract_pulse` to `graph.nnef`");

    reg.aliases.push("pulse".into());
    delay::register(&mut reg);
    mask::register(&mut reg);
    pad::register(&mut reg);
    reg
}


================================================
FILE: pulse-opl/src/mask.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::ser::tdim;
use tract_nnef::tract_core::trivial_op_state_freeze;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_pulse_mask",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Integer.named("begin"),
            TypeName::Integer.named("end"),
            TypeName::Scalar.named("value"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        deser,
    );
    registry.register_dumper(ser)
}

fn ser(ast: &mut IntoAst, node: &TypedNode, op: &PulseMask) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let params = vec![
        ("axis", numeric(op.axis)),
        ("begin", numeric(op.begin)),
        ("end", tdim(&op.end)),
        ("value", numeric(op.value.cast_to_scalar::<f32>())),
    ];
    Ok(Some(invocation("tract_pulse_mask", &[wire], &params)))
}

fn deser(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let begin = invocation.named_arg_as(builder, "begin")?;
    let value: Tensor = tensor0(invocation.named_arg_as::<f32>(builder, "value")?);
    let end = builder.allowing_new_symbols(|builder| invocation.named_arg_as(builder, "end"))?;
    let op = PulseMask { axis, begin, end, value };
    builder.wire(op, &[wire])
}

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
struct PulseMaskOpState {
    current_pos: usize,
}

impl OpState for PulseMaskOpState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs).into_tensor();
        let op = op.downcast_ref::<PulseMask>().ok_or_else(|| format_err!("Wrong Op type"))?;
        let tensor = self.pad(session, op, input)?;
        Ok(tvec!(tensor.into_tvalue()))
    }
}

impl PulseMaskOpState {
    fn pad(
        &mut self,
        session: &TurnState,
        op: &PulseMask,
        mut input: Tensor,
    ) -> TractResult<Tensor> {
        let pulse = input.shape()[op.axis];
        let pulse_begin = self.current_pos;
        let pulse_end = self.current_pos + pulse;
        self.current_pos += pulse;
        let end = op.end.eval(&session.resolved_symbols).to_usize().unwrap_or(usize::MAX);

        // pulse is entirely in valid input, just forward
        if pulse_begin >= op.begin && pulse_end <= end {
            return Ok(input);
        }

        if pulse_begin < op.begin {
            let fill_up_to = (op.begin - pulse_begin).min(pulse);
            unsafe {
                dispatch_copy_by_size!(crate::pad::fill_slice_constant(input.datum_type())(
                    &mut input,
                    &op.value,
                    op.axis,
                    0..fill_up_to
                ))
            };
        }
        if pulse_end > end {
            let fill_from = pulse - (pulse_end - end).min(pulse);
            unsafe {
                dispatch_copy_by_size!(crate::pad::fill_slice_constant(input.datum_type())(
                    &mut input,
                    &op.value,
                    op.axis,
                    fill_from..pulse
                ))
            }
        }

        Ok(input)
    }
}

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct PulseMask {
    pub axis: usize,
    pub begin: usize,
    pub end: TDim,
    pub value: Tensor,
}

impl Op for PulseMask {
    fn name(&self) -> StaticName {
        "PulseMask".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis: {} begin: {} end: {}", self.axis, self.begin, self.end,)])
    }

    op_as_typed_op!();
}

impl EvalOp for PulseMask {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::<PulseMaskOpState>::default()))
    }
}

impl TypedOp for PulseMask {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    as_op!();
}

trivial_op_state_freeze!(PulseMaskOpState);


================================================
FILE: pulse-opl/src/pad.rs
================================================
use tract_core::ndarray::*;
use tract_core::ops::array::PadMode;
use tract_nnef::internal::*;
use tract_nnef::ser::tdim;
use tract_nnef::tract_core::ops::OpStateFreeze;

pub fn register(registry: &mut Registry) {
    registry.register_primitive(
        "tract_pulse_pulse_pad",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Integer.named("axis"),
            TypeName::Integer.named("before"),
            TypeName::Integer.named("after"),
            TypeName::Integer.named("begin_input"),
            TypeName::Integer.named("end_input"),
            TypeName::String.named("border"),
            TypeName::Scalar.named("value"),
            TypeName::Integer.named("overlap"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        deser,
    );
    registry.register_dumper(ser)
}

fn ser(ast: &mut IntoAst, node: &TypedNode, op: &PulsePad) -> TractResult<Option<Arc<RValue>>> {
    let wire = ast.mapping[&node.inputs[0]].clone();
    let dt = ast.model.outlet_fact(node.inputs[0])?.datum_type;
    let (border, value) = tract_nnef::ops::nnef::ser::pad_mode(&op.mode, dt)?;
    let mut params = vec![
        ("axis", numeric(op.axis)),
        ("before", numeric(op.before)),
        ("begin_input", numeric(op.begin_input)),
        ("overlap", numeric(op.overlap)),
        ("after", tdim(&op.after)),
        ("end_input", tdim(&op.end_input)),
    ];
    params.push(("border", string(border)));
    if let Some(value) = value {
        params.push(("value", value));
    }
    Ok(Some(invocation("tract_pulse_pulse_pad", &[wire], &params)))
}

fn deser(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let wire = invocation.named_arg_as(builder, "input")?;
    let axis = invocation.named_arg_as(builder, "axis")?;
    let before = invocation.named_arg_as(builder, "before")?;
    let begin_input = invocation.named_arg_as(builder, "begin_input")?;
    let overlap = invocation.named_arg_as(builder, "overlap")?;
    let border = invocation.named_arg_as::<String>(builder, "border")?;
    let value: Tensor = tensor0(invocation.named_arg_as::<f32>(builder, "value")?);
    let (after, end_input) = builder.allowing_new_symbols(|builder| {
        TractResult::Ok((
            invocation.named_arg_as(builder, "after")?,
            invocation.named_arg_as(builder, "end_input")?,
        ))
    })?;

    let mode = tract_nnef::ops::nnef::deser::pad_mode(&border, value)?;
    let op = PulsePad { axis, before, after, begin_input, end_input, mode, overlap };
    builder.wire(op, &[wire])
}

pub(crate) unsafe fn fill_slice_constant<T: Datum + Copy>(
    data: &mut Tensor,
    constant: &Tensor,
    axis: usize,
    range: std::ops::Range<usize>,
) {
    unsafe {
        let c = constant.to_scalar_unchecked::<T>();
        data.to_array_view_mut_unchecked::<T>().slice_axis_mut(Axis(axis), range.into()).fill(*c);
    }
}

unsafe fn fill_slice_with_frame<T: Datum + Copy>(
    data: &mut Tensor,
    axis: usize,
    valid: &Tensor,
    range: std::ops::Range<usize>,
) {
    unsafe {
        let mut data = data.to_array_view_mut_unchecked::<T>();
        let valid = valid.to_array_view_unchecked::<T>();
        for i in range {
            data.slice_axis_mut(Axis(axis), (i..i + 1).into()).assign(&valid);
        }
    }
}

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
struct PulsePadOpState {
    current_pos: usize,
    last_valid_frame: Option<Tensor>,
}

impl OpState for PulsePadOpState {
    fn eval(
        &mut self,
        session: &mut TurnState,
        op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs).into_tensor();
        let op = op.downcast_ref::<PulsePad>().ok_or_else(|| format_err!("Wrong Op type"))?;
        let tensor = self.pad(session, op, input)?;
        Ok(tvec!(tensor.into_tvalue()))
    }
}

impl PulsePadOpState {
    unsafe fn save_frame<T: Datum + Copy>(&mut self, op: &PulsePad, input: &Tensor, frame: usize) {
        let data = unsafe { input.to_array_view_unchecked::<T>() };
        self.last_valid_frame =
            Some(data.index_axis(Axis(op.axis), frame).to_owned().into_tensor());
    }

    fn pad(
        &mut self,
        session: &TurnState,
        op: &PulsePad,
        mut input: Tensor,
    ) -> TractResult<Tensor> {
        let pulse = input.shape()[op.axis];
        let pulse_begin = self.current_pos;
        let pulse_end = self.current_pos + pulse;
        self.current_pos += pulse - op.overlap;
        let end_input =
            op.end_input.eval(&session.resolved_symbols).to_usize().unwrap_or(usize::MAX);
        let after = op.after.eval(&session.resolved_symbols).to_usize().unwrap_or(usize::MAX);

        if let PadMode::Edge = op.mode {
            if after != 0 && pulse_begin < end_input {
                let latest_valid_frame = (end_input - pulse_begin).min(pulse) - 1;
                unsafe {
                    dispatch_copy_by_size!(Self::save_frame(input.datum_type())(
                        self,
                        op,
                        &input,
                        latest_valid_frame
                    ))
                }
            }
        }

        // pulse is entirely in valid input, just forward
        if pulse_begin >= op.begin_input && pulse_end <= end_input {
            return Ok(input);
        }
        // pulse is entirely before or after output is valid, just forward
        if pulse_end <= op.begin_input - op.before || pulse_begin >= end_input.saturating_add(after)
        {
            return Ok(input);
        }

        if pulse_begin < op.begin_input {
            let fill_up_to = (op.begin_input - pulse_begin).min(pulse);
            match &op.mode {
                PadMode::Constant(c) => unsafe {
                    dispatch_copy_by_size!(fill_slice_constant(input.datum_type())(
                        &mut input,
                        c,
                        op.axis,
                        0..fill_up_to
                    ))
                },
                PadMode::Edge => {
                    let frame = input.slice(op.axis, fill_up_to, fill_up_to + 1)?;
                    unsafe {
                        dispatch_copy_by_size!(fill_slice_with_frame(input.datum_type())(
                            &mut input,
                            op.axis,
                            &frame,
                            0..fill_up_to
                        ))
                    }
                }
                _ => unimplemented!(),
            }
        }
        if pulse_end > end_input && after > 0 {
            let fill_from = pulse - (pulse_end - end_input).min(pulse);
            match &op.mode {
                PadMode::Constant(c) => unsafe {
                    dispatch_copy_by_size!(fill_slice_constant(input.datum_type())(
                        &mut input,
                        c,
                        op.axis,
                        fill_from..pulse
                    ))
                },
                PadMode::Edge => {
                    let last_frame = self.last_valid_frame.as_ref().unwrap();
                    unsafe {
                        dispatch_copy_by_size!(fill_slice_with_frame(input.datum_type())(
                            &mut input,
                            op.axis,
                            last_frame,
                            fill_from..pulse
                        ))
                    }
                }
                _ => unimplemented!(),
            }
        }

        Ok(input)
    }
}

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct PulsePad {
    pub axis: usize,
    pub before: usize,
    pub after: TDim,
    pub begin_input: usize,
    pub end_input: TDim,
    pub mode: PadMode,
    pub overlap: usize,
}

impl Op for PulsePad {
    fn name(&self) -> StaticName {
        "PulsePad".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!(
            "Mode: {:?}, axis: {} before: {} after: {}",
            self.mode, self.axis, self.before, self.after,
        )])
    }

    op_as_typed_op!();
}

impl EvalOp for PulsePad {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::<PulsePadOpState>::default()))
    }
}

impl TypedOp for PulsePad {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    as_op!();
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
struct FrozenPulsePadOpState {
    current_pos: usize,
    last_valid_frame: Option<Arc<Tensor>>,
}

impl OpStateFreeze for PulsePadOpState {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(FrozenPulsePadOpState {
            current_pos: self.current_pos,
            last_valid_frame: self.last_valid_frame.as_ref().map(|t| t.clone().into_arc_tensor()),
        })
    }
}

impl FrozenOpState for FrozenPulsePadOpState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(PulsePadOpState {
            current_pos: self.current_pos,
            last_valid_frame: self.last_valid_frame.as_ref().map(|t| t.clone().into_tensor()),
        })
    }
}


================================================
FILE: pulse-opl/src/slice.rs
================================================
use tract_nnef::internal::*;

#[derive(Debug, Clone, Default, Hash, PartialEq, Eq)]
pub struct PulsedAxisSlice {
    pub axis: usize,
    pub skip: usize,
    pub take: TDim,
}

impl Op for PulsedAxisSlice {
    fn name(&self) -> StaticName {
        "PulsedAxisSlice".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("axis:{}, skip:{} take:{}", self.axis, self.skip, self.take)])
    }

    not_a_typed_op!();
}

impl TypedOp for PulsedAxisSlice {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].clone()))
    }

    as_op!();
}

impl EvalOp for PulsedAxisSlice {
    fn is_stateless(&self) -> bool {
        false
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(inputs)
    }
}


================================================
FILE: release.sh
================================================
#!/bin/bash

set -e

git pull # make sure we are in sync
git push

which tomato || cargo install tomato-toml

CRATE_PATH=$1
VERSION=$2
. ./.all_crates.sh

if [ -z "$VERSION" ]
then
    echo "Usage: $0 <crate> <version>" 
    echo crates order is: $ALL_CRATES_PATH
    exit 1
fi

set -ex

if [ "$CRATE_PATH" = "all" ]
then
    for c in $ALL_CRATES_PATH
    do
        $0 $c $VERSION
    done
    exit 0
fi

crate=$(tomato get package.name $CRATE_PATH/Cargo.toml)
tomato set package.version $VERSION $CRATE_PATH/Cargo.toml
./.change_crate_dep.sh $crate $VERSION

if [ "$crate" = "tract-metal" -o "$crate" = 'tract-cuda' ]
then
    cargo publish -q --allow-dirty --no-verify -p $crate 
else
    cargo publish -q --allow-dirty -p $crate
fi


if [ "$CRATE_PATH" = "cli" ]
then
    git commit -m "release $VERSION" .
    git tag -f v"$VERSION"
    git push -f --tags
fi


================================================
FILE: rustfmt.toml
================================================
use_small_heuristics = "Max"
use_field_init_shorthand = true
use_try_shorthand = true
edition = "2018"


================================================
FILE: tensorflow/Cargo.toml
================================================
[package]
name = "tract-tensorflow"
version = "0.23.0-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks", "ONNX" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true
# build = "build-proto.rs"

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
bytes.workspace = true
derive-new.workspace = true
log.workspace = true
memmap2.workspace = true
prost.workspace = true
prost-types.workspace = true
tensorflow = { workspace = true, optional = true }
tract-hir.workspace = true
tract-pulse.workspace = true

# [build-dependencies]
# protobuf-src = "1.0.5+3.19.3"
# prost-build = "0.14"

[features]
conform = [ "tensorflow" ]

[dev-dependencies]
criterion.workspace = true
env_logger.workspace = true
proptest.workspace = true
rand.workspace = true

# [[bench]]
# name = "conv"
# harness = false


================================================
FILE: tensorflow/LICENSE
================================================
## License

Licensed under either of
 * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.

### Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall
be dual licensed as above, without any additional terms or conditions.


================================================
FILE: tensorflow/LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: tensorflow/LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: tensorflow/build-proto.rs
================================================
use std::{env, fs, path};

fn main() -> std::io::Result<()> {
    env::set_var("PROTOC", protobuf_src::protoc());
    let inputs: Vec<path::PathBuf> = {
        let mut inputs: Vec<path::PathBuf> = vec![];

        for dir in &["protos/tensorflow/core/framework", "protos/tensorflow/core/protobuf"] {
            for pb in fs::read_dir(dir)? {
                inputs.push(pb?.path())
            }
        }

        inputs.sort();
        inputs
    };

    let gen = "src/prost";
    let _ = fs::create_dir_all(&gen);
    prost_build::Config::new()
        .out_dir(gen)
        .compile_protos(&inputs, &[path::PathBuf::from("protos/")])?;

    Ok(())
}


================================================
FILE: tensorflow/examples/plus3.rs
================================================
extern crate tract_tensorflow;
use std::convert::TryFrom;
use tract_hir::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtFloat;
use tract_tensorflow::tfpb::tensorflow::TensorProto;

fn main() {
    let plus3 =
        tfpb::node().op("Add").name("output").attr("T", DtFloat).input("input").input("three");
    let konst = tfpb::node()
        .op("Const")
        .name("three")
        .attr("dtype", DtFloat)
        .attr("value", TensorProto::try_from(&tensor1(&[3.0f32])).unwrap());
    let input = tfpb::node().op("Placeholder").name("input").attr("dtype", DtFloat);
    let graph = tfpb::graph().node(input).node(konst).node(plus3);
    graph.save_to("tests/plus3.pb").unwrap();
}


================================================
FILE: tensorflow/protos/tensorflow/core/framework/attr_value.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "AttrValueProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
import "tensorflow/core/framework/tensor.proto";
import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";

// Protocol buffer representing the value for an attr used to configure an Op.
// Comment indicates the corresponding attr type.  Only the field matching the
// attr type may be filled.
message AttrValue {
  // LINT.IfChange
  message ListValue {
    repeated bytes s = 2;                        // "list(string)"
    repeated int64 i = 3 [packed = true];        // "list(int)"
    repeated float f = 4 [packed = true];        // "list(float)"
    repeated bool b = 5 [packed = true];         // "list(bool)"
    repeated DataType type = 6 [packed = true];  // "list(type)"
    repeated TensorShapeProto shape = 7;         // "list(shape)"
    repeated TensorProto tensor = 8;             // "list(tensor)"
    repeated NameAttrList func = 9;              // "list(attr)"
  }
  // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc)

  oneof value {
    bytes s = 2;                 // "string"
    int64 i = 3;                 // "int"
    float f = 4;                 // "float"
    bool b = 5;                  // "bool"
    DataType type = 6;           // "type"
    TensorShapeProto shape = 7;  // "shape"
    TensorProto tensor = 8;      // "tensor"
    ListValue list = 1;          // any "list(...)"

    // "func" represents a function. func.name is a function's name or
    // a primitive op's name. func.attr.first is the name of an attr
    // defined for that function. func.attr.second is the value for
    // that attr in the instantiation.
    NameAttrList func = 10;

    // This is a placeholder only used in nodes defined inside a
    // function.  It indicates the attr value will be supplied when
    // the function is instantiated.  For example, let us suppose a
    // node "N" in function "FN". "N" has an attr "A" with value
    // placeholder = "foo". When FN is instantiated with attr "foo"
    // set to "bar", the instantiated node N's attr A will have been
    // given the value "bar".
    string placeholder = 9;
  }
}

// A list of attr names and their values. The whole list is attached
// with a string name.  E.g., MatMul[T=float].
message NameAttrList {
  string name = 1;
  map<string, AttrValue> attr = 2;
}


================================================
FILE: tensorflow/protos/tensorflow/core/framework/function.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "FunctionProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensorflow/core/framework/attr_value.proto";
import "tensorflow/core/framework/node_def.proto";
import "tensorflow/core/framework/op_def.proto";

// A library is a set of named functions.
message FunctionDefLibrary {
  repeated FunctionDef function = 1;
  repeated GradientDef gradient = 2;
}

// A function can be instantiated when the runtime can bind every attr
// with a value. When a GraphDef has a call to a function, it must
// have binding for every attr defined in the signature.
//
// TODO(zhifengc):
//   * device spec, etc.
message FunctionDef {
  // The definition of the function's name, arguments, return values,
  // attrs etc.
  OpDef signature = 1;

  // Attributes specific to this function definition.
  map<string, AttrValue> attr = 5;

  // NOTE: field id 2 deleted on Jan 11, 2016, GraphDef version 21.

  // In both of the following fields, there is the need to specify an
  // output that is used as either the input to another node (in
  // `node_def`) or as a return value of the function (in `ret`).
  // Unlike the NodeDefs in GraphDef, we need to be able to specify a
  // list in some cases (instead of just single outputs).  Also, we
  // need to be able to deal with lists of unknown length (so the
  // output index may not be known at function definition time).  So
  // we use the following format instead:
  // * "fun_in" where "fun_in" is the name of a function input arg in
  //   the `signature` field above.  This represents that input, whether
  //   it is a single tensor or a list.
  // * "fun_in:0" gives the first element of a function input arg (a
  //   non-list input is considered a list of length 1 for these
  //   purposes).
  // * "node:out" where "node" is the name of a node in `node_def` and
  //   "out" is the name one of its op's output arguments (the name
  //   comes from the OpDef of the node's op). This represents that
  //   node's output, whether it is a single tensor or a list.
  //   Note: We enforce that an op's output arguments are never
  //   renamed in the backwards-compatibility test.
  // * "node:out:0" gives the first element of a node output arg (a
  //   non-list output is considered a list of length 1 for these
  //   purposes).
  //
  // NOT CURRENTLY SUPPORTED (but may be in the future):
  // * "node:out:-1" gives last element in a node output list
  // * "node:out:1:" gives a list with all but the first element in a
  //   node output list
  // * "node:out::-1" gives a list with all but the last element in a
  //   node output list

  // The body of the function.  Unlike the NodeDefs in a GraphDef, attrs
  // may have values of type `placeholder` and the `input` field uses
  // the "output" format above.

  // By convention, "op" in node_def is resolved by consulting with a
  // user-defined library first. If not resolved, "func" is assumed to
  // be a builtin op.
  repeated NodeDef node_def = 3;

  // A mapping from the output arg names from `signature` to the
  // outputs from `node_def` that should be returned by the function.
  map<string, string> ret = 4;
}

// GradientDef defines the gradient function of a function defined in
// a function library.
//
// A gradient function g (specified by gradient_func) for a function f
// (specified by function_name) must follow the following:
//
// The function 'f' must be a numerical function which takes N inputs
// and produces M outputs. Its gradient function 'g', which is a
// function taking N + M inputs and produces N outputs.
//
// I.e. if we have
//    (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
// then, g is
//    (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
//                                      dL/dy1, dL/dy2, ..., dL/dy_M),
// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
// loss function). dL/dx_i is the partial derivative of L with respect
// to x_i.
message GradientDef {
  string function_name = 1;  // The function name.
  string gradient_func = 2;  // The gradient function's name.
}


================================================
FILE: tensorflow/protos/tensorflow/core/framework/graph.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "GraphProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
import "tensorflow/core/framework/node_def.proto";
import "tensorflow/core/framework/function.proto";
import "tensorflow/core/framework/versions.proto";

// Represents the graph of operations
message GraphDef {
  repeated NodeDef node = 1;

  // Compatibility versions of the graph.  See core/public/version.h for version
  // history.  The GraphDef version is distinct from the TensorFlow version, and
  // each release of TensorFlow will support a range of GraphDef versions.
  VersionDef versions = 4;

  // Deprecated single version field; use versions above instead.  Since all
  // GraphDef changes before "versions" was introduced were forward
  // compatible, this field is entirely ignored.
  int32 version = 3 [deprecated = true];

  // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
  //
  // "library" provides user-defined functions.
  //
  // Naming:
  //   * library.function.name are in a flat namespace.
  //     NOTE: We may need to change it to be hierarchical to support
  //     different orgs. E.g.,
  //     { "/google/nn", { ... }},
  //     { "/google/vision", { ... }}
  //     { "/org_foo/module_bar", { ... }}
  //     map<string, FunctionDefLib> named_lib;
  //   * If node[i].op is the name of one function in "library",
  //     node[i] is deemed as a function call. Otherwise, node[i].op
  //     must be a primitive operation supported by the runtime.
  //
  //
  // Function call semantics:
  //
  //   * The callee may start execution as soon as some of its inputs
  //     are ready. The caller may want to use Tuple() mechanism to
  //     ensure all inputs are ready in the same time.
  //
  //   * The consumer of return values may start executing as soon as
  //     the return values the consumer depends on are ready.  The
  //     consumer may want to use Tuple() mechanism to ensure the
  //     consumer does not start until all return values of the callee
  //     function are ready.
  FunctionDefLibrary library = 2;
};


================================================
FILE: tensorflow/protos/tensorflow/core/framework/node_def.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "NodeProto";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
import "tensorflow/core/framework/attr_value.proto";

message NodeDef {
  // The name given to this operator. Used for naming inputs,
  // logging, visualization, etc.  Unique within a single GraphDef.
  // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*".
  string name = 1;

  // The operation name.  There may be custom parameters in attrs.
  // Op names starting with an underscore are reserved for internal use.
  string op = 2;

  // Each input is "node:src_output" with "node" being a string name and
  // "src_output" indicating which output tensor to use from "node". If
  // "src_output" is 0 the ":0" suffix can be omitted.  Regular inputs
  // may optionally be followed by control inputs that have the format
  // "^node".
  repeated string input = 3;

  // A (possibly partial) specification for the device on which this
  // node should be placed.
  // The expected syntax for this string is as follows:
  //
  // DEVICE_SPEC ::= PARTIAL_SPEC
  //
  // PARTIAL_SPEC ::= ("/" CONSTRAINT) *
  // CONSTRAINT ::= ("job:" JOB_NAME)
  //              | ("replica:" [1-9][0-9]*)
  //              | ("task:" [1-9][0-9]*)
  //              | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") )
  //
  // Valid values for this string include:
  // * "/job:worker/replica:0/task:1/device:GPU:3"  (full specification)
  // * "/job:worker/device:GPU:3"                   (partial specification)
  // * ""                                    (no specification)
  //
  // If the constraints do not resolve to a single device (or if this
  // field is empty or not present), the runtime will attempt to
  // choose a device automatically.
  string device = 4;

  // Operation-specific graph-construction-time configuration.
  // Note that this should include all attrs defined in the
  // corresponding OpDef, including those with a value matching
  // the default -- this allows the default to change and makes
  // NodeDefs easier to interpret on their own.  However, if
  // an attr with a default is not specified in this list, the
  // default will be used.
  // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and
  // one of the names from the corresponding OpDef's attr field).
  // The values must have a type matching the corresponding OpDef
  // attr's type field.
  // TODO(josh11b): Add some examples here showing best practices.
  map<string, AttrValue> attr = 5;
};


================================================
FILE: tensorflow/protos/tensorflow/core/framework/op_def.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "OpDefProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
import "tensorflow/core/framework/attr_value.proto";
import "tensorflow/core/framework/types.proto";

// Defines an operation. A NodeDef in a GraphDef specifies an Op by
// using the "op" field which should match the name of a OpDef.
// LINT.IfChange
message OpDef {
  // Op names starting with an underscore are reserved for internal use.
  // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*".
  string name = 1;

  // For describing inputs and outputs.
  message ArgDef {
    // Name for the input/output.  Should match the regexp "[a-z][a-z0-9_]*".
    string name = 1;

    // Human readable description.
    string description = 2;

    // Describes the type of one or more tensors that are accepted/produced
    // by this input/output arg.  The only legal combinations are:
    // * For a single tensor: either the "type" field is set or the
    //   "type_attr" field is set to the name of an attr with type "type".
    // * For a sequence of tensors with the same type: the "number_attr"
    //   field will be set to the name of an attr with type "int", and
    //   either the "type" or "type_attr" field will be set as for
    //   single tensors.
    // * For a sequence of tensors, the "type_list_attr" field will be set
    //   to the name of an attr with type "list(type)".
    DataType type = 3;
    string type_attr = 4;    // if specified, attr must have type "type"
    string number_attr = 5;  // if specified, attr must have type "int"
    // If specified, attr must have type "list(type)", and none of
    // type, type_attr, and number_attr may be specified.
    string type_list_attr = 6;

    // For inputs: if true, the inputs are required to be refs.
    //   By default, inputs can be either refs or non-refs.
    // For outputs: if true, outputs are refs, otherwise they are not.
    bool is_ref = 16;
  };

  // Description of the input(s).
  repeated ArgDef input_arg = 2;

  // Description of the output(s).
  repeated ArgDef output_arg = 3;

  // Description of the graph-construction-time configuration of this
  // Op.  That is to say, this describes the attr fields that will
  // be specified in the NodeDef.
  message AttrDef {
    // A descriptive name for the argument.  May be used, e.g. by the
    // Python client, as a keyword argument name, and so should match
    // the regexp "[a-z][a-z0-9_]+".
    string name = 1;

    // One of the type names from attr_value.proto ("string", "list(string)",
    // "int", etc.).
    string type = 2;

    // A reasonable default for this attribute if the user does not supply
    // a value.  If not specified, the user must supply a value.
    AttrValue default_value = 3;

    // Human-readable description.
    string description = 4;

    // TODO(josh11b): bool is_optional?

    // --- Constraints ---
    // These constraints are only in effect if specified.  Default is no
    // constraints.

    // For type == "int", this is a minimum value.  For "list(___)"
    // types, this is the minimum length.
    bool has_minimum = 5;
    int64 minimum = 6;

    // The set of allowed values.  Has type that is the "list" version
    // of the "type" field above (uses the "list" field of AttrValue).
    // If type == "type" or "list(type)" above, then the "type" field
    // of "allowed_values.list" has the set of allowed DataTypes.
    // If type == "string" or "list(string)", then the "s" field of
    // "allowed_values.list" has the set of allowed strings.
    AttrValue allowed_values = 7;
  }
  repeated AttrDef attr = 4;

  // Optional deprecation based on GraphDef versions.
  OpDeprecation deprecation = 8;

  // One-line human-readable description of what the Op does.
  string summary = 5;

  // Additional, longer human-readable description of what the Op does.
  string description = 6;

  // -------------------------------------------------------------------------
  // Which optimizations this operation can participate in.

  // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs)
  bool is_commutative = 18;

  // If is_aggregate is true, then this operation accepts N >= 2
  // inputs and produces 1 output all of the same type.  Should be
  // associative and commutative, and produce output with the same
  // shape as the input.  The optimizer may replace an aggregate op
  // taking input from multiple devices with a tree of aggregate ops
  // that aggregate locally within each device (and possibly within
  // groups of nearby devices) before communicating.
  // TODO(josh11b): Implement that optimization.
  bool is_aggregate = 16;  // for things like add

  // Other optimizations go here, like
  //   can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc.

  // -------------------------------------------------------------------------
  // Optimization constraints.

  // Ops are marked as stateful if their behavior depends on some state beyond
  // their input tensors (e.g. variable reading op) or if they have
  // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops
  // must always produce the same output for the same input and have
  // no side-effects.
  //
  // By default Ops may be moved between devices.  Stateful ops should
  // either not be moved, or should only be moved if that state can also
  // be moved (e.g. via some sort of save / restore).
  // Stateful ops are guaranteed to never be optimized away by Common
  // Subexpression Elimination (CSE).
  bool is_stateful = 17;  // for things like variables, queue

  // -------------------------------------------------------------------------
  // Non-standard options.

  // By default, all inputs to an Op must be initialized Tensors.  Ops
  // that may initialize tensors for the first time should set this
  // field to true, to allow the Op to take an uninitialized Tensor as
  // input.
  bool allows_uninitialized_input = 19;  // for Assign, etc.
};
// LINT.ThenChange(
//     https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc)

// Information about version-dependent deprecation of an op
message OpDeprecation {
  // First GraphDef version at which the op is disallowed.
  int32 version = 1;

  // Explanation of why it was deprecated and what to use instead.
  string explanation = 2;
};

// A collection of OpDefs
message OpList {
  repeated OpDef op = 1;
};


================================================
FILE: tensorflow/protos/tensorflow/core/framework/resource_handle.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "ResourceHandle";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";

// Protocol buffer representing a handle to a tensorflow resource. Handles are
// not valid across executions, but can be serialized back and forth from within
// a single run.
message ResourceHandleProto {
  // Unique name for the device containing the resource.
  string device = 1;

  // Container in which this resource is placed.
  string container = 2;

  // Unique name of this resource.
  string name = 3;

  // Hash code for the type of the resource. Is only valid in the same device
  // and in the same execution.
  uint64 hash_code = 4;

  // For debug-only, the name of the type pointed to by this handle, if
  // available.
  string maybe_type_name = 5;
};


================================================
FILE: tensorflow/protos/tensorflow/core/framework/tensor.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "TensorProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
import "tensorflow/core/framework/resource_handle.proto";
import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";

// Protocol buffer representing a tensor.
message TensorProto {
  DataType dtype = 1;

  // Shape of the tensor.  TODO(touts): sort out the 0-rank issues.
  TensorShapeProto tensor_shape = 2;

  // Only one of the representations below is set, one of "tensor_contents" and
  // the "xxx_val" attributes.  We are not using oneof because as oneofs cannot
  // contain repeated fields it would require another extra set of messages.

  // Version number.
  //
  // In version 0, if the "repeated xxx" representations contain only one
  // element, that element is repeated to fill the shape.  This makes it easy
  // to represent a constant Tensor with a single value.
  int32 version_number = 3;

  // Serialized raw tensor content from either Tensor::AsProtoTensorContent or
  // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
  // can be used for all tensor types. The purpose of this representation is to
  // reduce serialization overhead during RPC call by avoiding serialization of
  // many repeated small items.
  bytes tensor_content = 4;

  // Type specific representations that make it easy to create tensor protos in
  // all languages.  Only the representation corresponding to "dtype" can
  // be set.  The values hold the flattened representation of the tensor in
  // row major order.

  // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
  // have some pointless zero padding for each value here.
  repeated int32 half_val = 13 [packed = true];

  // DT_FLOAT.
  repeated float float_val = 5 [packed = true];

  // DT_DOUBLE.
  repeated double double_val = 6 [packed = true];

  // DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
  repeated int32 int_val = 7 [packed = true];

  // DT_STRING
  repeated bytes string_val = 8;

  // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
  // and imaginary parts of i-th single precision complex.
  repeated float scomplex_val = 9 [packed = true];

  // DT_INT64
  repeated int64 int64_val = 10 [packed = true];

  // DT_BOOL
  repeated bool bool_val = 11 [packed = true];

  // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
  // and imaginary parts of i-th double precision complex.
  repeated double dcomplex_val = 12 [packed = true];

  // DT_RESOURCE
  repeated ResourceHandleProto resource_handle_val = 14;

  // DT_VARIANT
  repeated VariantTensorDataProto variant_val = 15;

  // DT_UINT32
  repeated uint32 uint32_val = 16 [packed = true];

  // DT_UINT64
  repeated uint64 uint64_val = 17 [packed = true];
};

// Protocol buffer representing the serialization format of DT_VARIANT tensors.
message VariantTensorDataProto {
  // Name of the type of objects being serialized.
  string type_name = 1;
  // Portions of the object that are not Tensors.
  bytes metadata = 2;
  // Tensors contained within objects being serialized.
  repeated TensorProto tensors = 3;
}


================================================
FILE: tensorflow/protos/tensorflow/core/framework/tensor_shape.proto
================================================
// Protocol buffer representing the shape of tensors.

syntax = "proto3";
option cc_enable_arenas = true;
option java_outer_classname = "TensorShapeProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";

package tensorflow;

// Dimensions of a tensor.
message TensorShapeProto {
  // One dimension of the tensor.
  message Dim {
    // Size of the tensor in that dimension.
    // This value must be >= -1, but values of -1 are reserved for "unknown"
    // shapes (values of -1 mean "unknown" dimension).  Certain wrappers
    // that work with TensorShapeProto may fail at runtime when deserializing
    // a TensorShapeProto containing a dim value of -1.
    int64 size = 1;

    // Optional name of the tensor dimension.
    string name = 2;
  };

  // Dimensions of the tensor, such as {"input", 30}, {"output", 40}
  // for a 30 x 40 2D tensor.  If an entry has size -1, this
  // corresponds to a dimension of unknown size. The names are
  // optional.
  //
  // The order of entries in "dim" matters: It indicates the layout of the
  // values in the tensor in-memory representation.
  //
  // The first entry in "dim" is the outermost dimension used to layout the
  // values, the last entry is the innermost dimension.  This matches the
  // in-memory layout of RowMajor Eigen tensors.
  //
  // If "dim.size()" > 0, "unknown_rank" must be false.
  repeated Dim dim = 2;

  // If true, the number of dimensions in the shape is unknown.
  //
  // If true, "dim.size()" must be 0.
  bool unknown_rank = 3;
};


================================================
FILE: tensorflow/protos/tensorflow/core/framework/types.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "TypesProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";

// LINT.IfChange
enum DataType {
  // Not a legal value for DataType.  Used to indicate a DataType field
  // has not been set.
  DT_INVALID = 0;

  // Data types that all computation devices are expected to be
  // capable to support.
  DT_FLOAT = 1;
  DT_DOUBLE = 2;
  DT_INT32 = 3;
  DT_UINT8 = 4;
  DT_INT16 = 5;
  DT_INT8 = 6;
  DT_STRING = 7;
  DT_COMPLEX64 = 8;  // Single-precision complex
  DT_INT64 = 9;
  DT_BOOL = 10;
  DT_QINT8 = 11;     // Quantized int8
  DT_QUINT8 = 12;    // Quantized uint8
  DT_QINT32 = 13;    // Quantized int32
  DT_BFLOAT16 = 14;  // Float32 truncated to 16 bits.  Only for cast ops.
  DT_QINT16 = 15;    // Quantized int16
  DT_QUINT16 = 16;   // Quantized uint16
  DT_UINT16 = 17;
  DT_COMPLEX128 = 18;  // Double-precision complex
  DT_HALF = 19;
  DT_RESOURCE = 20;
  DT_VARIANT = 21;  // Arbitrary C++ data types
  DT_UINT32 = 22;
  DT_UINT64 = 23;

  // Do not use!  These are only for parameters.  Every enum above
  // should have a corresponding value below (verified by types_test).
  DT_FLOAT_REF = 101;
  DT_DOUBLE_REF = 102;
  DT_INT32_REF = 103;
  DT_UINT8_REF = 104;
  DT_INT16_REF = 105;
  DT_INT8_REF = 106;
  DT_STRING_REF = 107;
  DT_COMPLEX64_REF = 108;
  DT_INT64_REF = 109;
  DT_BOOL_REF = 110;
  DT_QINT8_REF = 111;
  DT_QUINT8_REF = 112;
  DT_QINT32_REF = 113;
  DT_BFLOAT16_REF = 114;
  DT_QINT16_REF = 115;
  DT_QUINT16_REF = 116;
  DT_UINT16_REF = 117;
  DT_COMPLEX128_REF = 118;
  DT_HALF_REF = 119;
  DT_RESOURCE_REF = 120;
  DT_VARIANT_REF = 121;
  DT_UINT32_REF = 122;
  DT_UINT64_REF = 123;
}
// LINT.ThenChange(
//    https://www.tensorflow.org/code/tensorflow/c/c_api.h,
//    https://www.tensorflow.org/code/tensorflow/go/tensor.go,
//    https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc,
//    https://www.tensorflow.org/code/tensorflow/core/framework/types.h,
//    https://www.tensorflow.org/code/tensorflow/core/framework/types.cc,
//    https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py,
//    https://www.tensorflow.org/code/tensorflow/python/framework/function.py)


================================================
FILE: tensorflow/protos/tensorflow/core/framework/variable.proto
================================================
syntax = "proto3";

package tensorflow;

option cc_enable_arenas = true;
option java_outer_classname = "VariableProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";

// Indicates when a distributed variable will be synced.
enum VariableSynchronization {
  // `AUTO`: Indicates that the synchronization will be determined by the
  // current `DistributionStrategy` (eg. With `MirroredStrategy` this would be
  // `ON_WRITE`).
  VARIABLE_SYNCHRONIZATION_AUTO = 0;
  // `NONE`: Indicates that there will only be one copy of the variable, so
  // there is no need to sync.
  VARIABLE_SYNCHRONIZATION_NONE = 1;
  // `ON_WRITE`: Indicates that the variable will be updated across devices
  // every time it is written.
  VARIABLE_SYNCHRONIZATION_ON_WRITE = 2;
  // `ON_READ`: Indicates that the variable will be aggregated across devices
  // when it is read (eg. when checkpointing or when evaluating an op that uses
  // the variable).
  VARIABLE_SYNCHRONIZATION_ON_READ = 3;
}

// Indicates how a distributed variable will be aggregated.
enum VariableAggregation {
  // `NONE`: This is the default, giving an error if you use a
  // variable-update operation with multiple replicas.
  VARIABLE_AGGREGATION_NONE = 0;
  // `SUM`: Add the updates across replicas.
  VARIABLE_AGGREGATION_SUM = 1;
  // `MEAN`: Take the arithmetic mean ("average") of the updates across
  // replicas.
  VARIABLE_AGGREGATION_MEAN = 2;
  // `ONLY_FIRST_REPLICA`: This is for when every replica is performing the same
  // update, but we only want to perform the update once. Used, e.g., for the
  // global step counter.
  VARIABLE_AGGREGATION_ONLY_FIRST_REPLICA = 3;
}

// Protocol buffer representing a Variable.
message VariableDef {
  // Name of the variable tensor.
  string variable_name = 1;

  // Name of the tensor holding the variable's initial value.
  string initial_value_name = 6;

  // Name of the initializer op.
  string initializer_name = 2;

  // Name of the snapshot tensor.
  string snapshot_name = 3;

  // Support for saving variables as slices of a larger variable.
  SaveSliceInfoDef save_slice_info_def = 4;

  // Whether to represent this as a ResourceVariable.
  bool is_resource = 5;

  // Whether this variable should be trained.
  bool trainable = 7;

  // Indicates when a distributed variable will be synced.
  VariableSynchronization synchronization = 8;

  // Indicates how a distributed variable will be aggregated.
  VariableAggregation aggregation = 9;
}

message SaveSliceInfoDef {
  // Name of the full variable of which this is a slice.
  string full_name = 1;
  // Shape of the full variable.
  repeated int64 full_shape = 2;
  // Offset of this variable into the full variable.
  repeated int64 var_offset = 3;
  // Shape of this variable.
  repeated int64 var_shape = 4;
}


================================================
FILE: tensorflow/protos/tensorflow/core/framework/versions.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "VersionsProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";

// Version information for a piece of serialized data
//
// There are different types of versions for each type of data
// (GraphDef, etc.), but they all have the same common shape
// described here.
//
// Each consumer has "consumer" and "min_producer" versions (specified
// elsewhere).  A consumer is allowed to consume this data if
//
//   producer >= min_producer
//   consumer >= min_consumer
//   consumer not in bad_consumers
//
message VersionDef {
  // The version of the code that produced this data.
  int32 producer = 1;

  // Any consumer below this version is not allowed to consume this data.
  int32 min_consumer = 2;

  // Specific consumer versions which are disallowed (e.g. due to bugs).
  repeated int32 bad_consumers = 3;
};


================================================
FILE: tensorflow/protos/tensorflow/core/protobuf/meta_graph.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "MetaGraphProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
import "google/protobuf/any.proto";

import "tensorflow/core/framework/graph.proto";
import "tensorflow/core/framework/op_def.proto";
import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";
import "tensorflow/core/protobuf/saved_object_graph.proto";
import "tensorflow/core/protobuf/saver.proto";
import "tensorflow/core/protobuf/struct.proto";

// NOTE: This protocol buffer is evolving, and will go through revisions in the
// coming months.
//
// Protocol buffer containing the following which are necessary to restart
// training, run inference. It can be used to serialize/de-serialize memory
// objects necessary for running computation in a graph when crossing the
// process boundary. It can be used for long term storage of graphs,
// cross-language execution of graphs, etc.
//   MetaInfoDef
//   GraphDef
//   SaverDef
//   CollectionDef
//   TensorInfo
//   SignatureDef
message MetaGraphDef {
  // Meta information regarding the graph to be exported.  To be used by users
  // of this protocol buffer to encode information regarding their meta graph.
  message MetaInfoDef {
    // User specified Version string. Can be the name of the model and revision,
    // steps this model has been trained to, etc.
    string meta_graph_version = 1;

    // A copy of the OpDefs used by the producer of this graph_def.
    // Descriptions and Ops not used in graph_def are stripped out.
    OpList stripped_op_list = 2;

    // A serialized protobuf. Can be the time this meta graph is created, or
    // modified, or name of the model.
    google.protobuf.Any any_info = 3;

    // User supplied tag(s) on the meta_graph and included graph_def.
    //
    // MetaGraphDefs should be tagged with their capabilities or use-cases.
    // Examples: "train", "serve", "gpu", "tpu", etc.
    // These tags enable loaders to access the MetaGraph(s) appropriate for a
    // specific use-case or runtime environment.
    repeated string tags = 4;

    // The __version__ string of the tensorflow build used to write this graph.
    // This will be populated by the framework, which will overwrite any user
    // supplied value.
    string tensorflow_version = 5;

    // The __git_version__ string of the tensorflow build used to write this
    // graph. This will be populated by the framework, which will overwrite any
    // user supplied value.
    string tensorflow_git_version = 6;

    // A flag to denote whether default-valued attrs have been stripped from
    // the nodes in this graph_def.
    bool stripped_default_attrs = 7;
  }
  MetaInfoDef meta_info_def = 1;

  // GraphDef.
  GraphDef graph_def = 2;

  // SaverDef.
  SaverDef saver_def = 3;

  // collection_def: Map from collection name to collections.
  // See CollectionDef section for details.
  map<string, CollectionDef> collection_def = 4;

  // signature_def: Map from user supplied key for a signature to a single
  // SignatureDef.
  map<string, SignatureDef> signature_def = 5;

  // Asset file def to be used with the defined graph.
  repeated AssetFileDef asset_file_def = 6;

  // Extra information about the structure of functions and stateful objects.
  SavedObjectGraph object_graph_def = 7;
}

// CollectionDef should cover most collections.
// To add a user-defined collection, do one of the following:
// 1. For simple data types, such as string, int, float:
//      tf.add_to_collection("your_collection_name", your_simple_value)
//    strings will be stored as bytes_list.
//
// 2. For Protobuf types, there are three ways to add them:
//    1) tf.add_to_collection("your_collection_name",
//         your_proto.SerializeToString())
//
//       collection_def {
//         key: "user_defined_bytes_collection"
//         value {
//           bytes_list {
//             value: "queue_name: \"test_queue\"\n"
//           }
//         }
//       }
//
//  or
//
//    2) tf.add_to_collection("your_collection_name", str(your_proto))
//
//       collection_def {
//         key: "user_defined_string_collection"
//         value {
//          bytes_list {
//             value: "\n\ntest_queue"
//           }
//         }
//       }
//
//  or
//
//    3) any_buf = any_pb2.Any()
//       tf.add_to_collection("your_collection_name",
//         any_buf.Pack(your_proto))
//
//       collection_def {
//         key: "user_defined_any_collection"
//         value {
//           any_list {
//             value {
//               type_url: "type.googleapis.com/tensorflow.QueueRunnerDef"
//               value: "\n\ntest_queue"
//             }
//           }
//         }
//       }
//
// 3. For Python objects, implement to_proto() and from_proto(), and register
//    them in the following manner:
//    ops.register_proto_function("your_collection_name",
//                                proto_type,
//                                to_proto=YourPythonObject.to_proto,
//                                from_proto=YourPythonObject.from_proto)
//    These functions will be invoked to serialize and de-serialize the
//    collection. For example,
//    ops.register_proto_function(ops.GraphKeys.GLOBAL_VARIABLES,
//                                proto_type=variable_pb2.VariableDef,
//                                to_proto=Variable.to_proto,
//                                from_proto=Variable.from_proto)
message CollectionDef {
  // NodeList is used for collecting nodes in graph. For example
  // collection_def {
  //   key: "summaries"
  //   value {
  //     node_list {
  //       value: "input_producer/ScalarSummary:0"
  //       value: "shuffle_batch/ScalarSummary:0"
  //       value: "ImageSummary:0"
  //     }
  //   }
  message NodeList {
    repeated string value = 1;
  }

  // BytesList is used for collecting strings and serialized protobufs. For
  // example:
  // collection_def {
  //   key: "trainable_variables"
  //   value {
  //     bytes_list {
  //       value: "\n\017conv1/weights:0\022\024conv1/weights/Assign
  //              \032\024conv1/weights/read:0"
  //       value: "\n\016conv1/biases:0\022\023conv1/biases/Assign\032
  //              \023conv1/biases/read:0"
  //     }
  //   }
  // }
  message BytesList {
    repeated bytes value = 1;
  }

  // Int64List is used for collecting int, int64 and long values.
  message Int64List {
    repeated int64 value = 1 [packed = true];
  }

  // FloatList is used for collecting float values.
  message FloatList {
    repeated float value = 1 [packed = true];
  }

  // AnyList is used for collecting Any protos.
  message AnyList {
    repeated google.protobuf.Any value = 1;
  }

  oneof kind {
    NodeList node_list = 1;
    BytesList bytes_list = 2;
    Int64List int64_list = 3;
    FloatList float_list = 4;
    AnyList any_list = 5;
  }
}

// Information about a Tensor necessary for feeding or retrieval.
message TensorInfo {
  // For sparse tensors, The COO encoding stores a triple of values, indices,
  // and shape.
  message CooSparse {
    // The shape of the values Tensor is [?].  Its dtype must be the dtype of
    // the SparseTensor as a whole, given in the enclosing TensorInfo.
    string values_tensor_name = 1;

    // The indices Tensor must have dtype int64 and shape [?, ?].
    string indices_tensor_name = 2;

    // The dynamic logical shape represented by the SparseTensor is recorded in
    // the Tensor referenced here.  It must have dtype int64 and shape [?].
    string dense_shape_tensor_name = 3;
  }

  // Generic encoding for composite tensors.
  message CompositeTensor {
    // The serialized TypeSpec for the composite tensor.
    TypeSpecProto type_spec = 1;

    // A TensorInfo for each flattened component tensor.
    repeated TensorInfo components = 2;
  }

  oneof encoding {
    // For dense `Tensor`s, the name of the tensor in the graph.
    string name = 1;
    // There are many possible encodings of sparse matrices
    // (https://en.wikipedia.org/wiki/Sparse_matrix).  Currently, TensorFlow
    // uses only the COO encoding.  This is supported and documented in the
    // SparseTensor Python class.
    CooSparse coo_sparse = 4;
    // Generic encoding for CompositeTensors.
    CompositeTensor composite_tensor = 5;
  }
  DataType dtype = 2;
  // The static shape should be recorded here, to the extent that it can
  // be known in advance.  In the case of a SparseTensor, this field describes
  // the logical shape of the represented tensor (aka dense_shape).
  TensorShapeProto tensor_shape = 3;
}

// SignatureDef defines the signature of a computation supported by a TensorFlow
// graph.
//
// For example, a model with two loss computations, sharing a single input,
// might have the following signature_def map.
//
// Note that across the two SignatureDefs "loss_A" and "loss_B", the input key,
// output key, and method_name are identical, and will be used by system(s) that
// implement or rely upon this particular loss method. The output tensor names
// differ, demonstrating how different outputs can exist for the same method.
//
// signature_def {
//   key: "loss_A"
//   value {
//     inputs {
//       key: "input"
//       value {
//         name: "input:0"
//         dtype: DT_STRING
//         tensor_shape: ...
//       }
//     }
//     outputs {
//       key: "loss_output"
//       value {
//         name: "loss_output_A:0"
//         dtype: DT_FLOAT
//         tensor_shape: ...
//       }
//     }
//   }
//   ...
//   method_name: "some/package/compute_loss"
// }
// signature_def {
//   key: "loss_B"
//   value {
//     inputs {
//       key: "input"
//       value {
//         name: "input:0"
//         dtype: DT_STRING
//         tensor_shape: ...
//       }
//     }
//     outputs {
//       key: "loss_output"
//       value {
//         name: "loss_output_B:0"
//         dtype: DT_FLOAT
//         tensor_shape: ...
//       }
//     }
//   }
//   ...
//   method_name: "some/package/compute_loss"
// }
message SignatureDef {
  // Named input parameters.
  map<string, TensorInfo> inputs = 1;
  // Named output parameters.
  map<string, TensorInfo> outputs = 2;
  // Extensible method_name information enabling third-party users to mark a
  // SignatureDef as supporting a particular method. This enables producers and
  // consumers of SignatureDefs, e.g. a model definition library and a serving
  // library to have a clear hand-off regarding the semantics of a computation.
  //
  // Note that multiple SignatureDefs in a single MetaGraphDef may have the same
  // method_name. This is commonly used to support multi-headed computation,
  // where a single graph computation may return multiple results.
  string method_name = 3;
}

// An asset file def for a single file or a set of sharded files with the same
// name.
message AssetFileDef {
  // The tensor to bind the asset filename to.
  TensorInfo tensor_info = 1;
  // The filename within an assets directory. Note: does not include the path
  // prefix, i.e. directories. For an asset at /tmp/path/vocab.txt, the filename
  // would be "vocab.txt".
  string filename = 2;
}


================================================
FILE: tensorflow/protos/tensorflow/core/protobuf/saved_model.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "SavedModelProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
import "tensorflow/core/protobuf/meta_graph.proto";

// SavedModel is the high level serialization format for TensorFlow Models.
// See [todo: doc links, similar to session_bundle] for more information.
message SavedModel {
  // The schema version of the SavedModel instance. Used for versioning when
  // making future changes to the specification/implementation. Initial value
  // at release will be 1.
  int64 saved_model_schema_version = 1;

  // One or more MetaGraphs.
  repeated MetaGraphDef meta_graphs = 2;
}


================================================
FILE: tensorflow/protos/tensorflow/core/protobuf/saved_object_graph.proto
================================================
syntax = "proto3";

import "tensorflow/core/protobuf/trackable_object_graph.proto";
import "tensorflow/core/protobuf/struct.proto";
import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";
import "tensorflow/core/framework/versions.proto";
import "tensorflow/core/framework/variable.proto";

option cc_enable_arenas = true;

package tensorflow;

// A SavedObjectGraph is part of object-based SavedModels in TF 2.0. It
// describes the directed graph of Python objects (or equivalent in other
// languages) that make up a model, with nodes[0] at the root.

// SavedObjectGraph shares some structure with TrackableObjectGraph, but
// SavedObjectGraph belongs to the MetaGraph and contains pointers to functions
// and type information, while TrackableObjectGraph lives in the checkpoint
// and contains pointers only to variable values.

message SavedObjectGraph {
  // Flattened list of objects in the object graph.
  //
  // The position of the object in this list indicates its id.
  // Nodes[0] is considered the root node.
  repeated SavedObject nodes = 1;

  // Information about captures and output structures in concrete functions.
  // Referenced from SavedBareConcreteFunction and SavedFunction.
  map<string, SavedConcreteFunction> concrete_functions = 2;
}

message SavedObject {
  // Objects which this object depends on: named edges in the dependency
  // graph.
  //
  // Note: currently only valid if kind == "user_object".
  repeated TrackableObjectGraph.TrackableObject.ObjectReference
      children = 1;

  // Removed when forking SavedObject from TrackableObjectGraph.
  reserved "attributes";
  reserved 2;

  // Slot variables owned by this object. This describes the three-way
  // (optimizer, variable, slot variable) relationship; none of the three
  // depend on the others directly.
  //
  // Note: currently only valid if kind == "user_object".
  repeated TrackableObjectGraph.TrackableObject.SlotVariableReference
      slot_variables = 3;

  oneof kind {
    SavedUserObject user_object = 4;
    SavedAsset asset = 5;
    SavedFunction function = 6;
    SavedVariable variable = 7;
    SavedBareConcreteFunction bare_concrete_function = 8;
    SavedConstant constant = 9;
    SavedResource resource = 10;
  }
}

// A SavedUserObject is an object (in the object-oriented language of the
// TensorFlow program) of some user- or framework-defined class other than
// those handled specifically by the other kinds of SavedObjects.
//
// This object cannot be evaluated as a tensor, and therefore cannot be bound
// to an input of a function.
message SavedUserObject {
  // Corresponds to a registration of the type to use in the loading program.
  string identifier = 1;
  // Version information from the producer of this SavedUserObject.
  VersionDef version = 2;
  // Initialization-related metadata.
  string metadata = 3;
}

// A SavedAsset points to an asset in the MetaGraph.
//
// When bound to a function this object evaluates to a tensor with the absolute
// filename. Users should not depend on a particular part of the filename to
// remain stable (e.g. basename could be changed).
message SavedAsset {
  // Index into `MetaGraphDef.asset_file_def[]` that describes the Asset.
  //
  // Only the field `AssetFileDef.filename` is used. Other fields, such as
  // `AssetFileDef.tensor_info`, MUST be ignored.
  int32 asset_file_def_index = 1;
}

// A function with multiple signatures, possibly with non-Tensor arguments.
message SavedFunction {
  repeated string concrete_functions = 1;
  FunctionSpec function_spec = 2;
}

// Stores low-level information about a concrete function. Referenced in either
// a SavedFunction or a SavedBareConcreteFunction.
message SavedConcreteFunction {
  // Bound inputs to the function. The SavedObjects identified by the node ids
  // given here are appended as extra inputs to the caller-supplied inputs.
  // The only types of SavedObjects valid here are SavedVariable, SavedResource
  // and SavedAsset.
  repeated int32 bound_inputs = 2;
  // Input in canonicalized form that was received to create this concrete
  // function.
  StructuredValue canonicalized_input_signature = 3;
  // Output that was the return value of this function after replacing all
  // Tensors with TensorSpecs. This can be an arbitrary nested function and will
  // be used to reconstruct the full structure from pure tensors.
  StructuredValue output_signature = 4;
}

message SavedBareConcreteFunction {
  // Identifies a SavedConcreteFunction.
  string concrete_function_name = 1;

  // A sequence of unique strings, one per Tensor argument.
  repeated string argument_keywords = 2;
  // The prefix of `argument_keywords` which may be identified by position.
  int64 allowed_positional_arguments = 3;
}

message SavedConstant {
  // An Operation name for a ConstantOp in this SavedObjectGraph's MetaGraph.
  string operation = 1;
}

// Represents a Variable that is initialized by loading the contents from the
// checkpoint.
message SavedVariable {
  DataType dtype = 1;
  TensorShapeProto shape = 2;
  bool trainable = 3;
  VariableSynchronization synchronization = 4;
  VariableAggregation aggregation = 5;
  string name = 6;
}

// Represents `FunctionSpec` used in `Function`. This represents a
// function that has been wrapped as a TensorFlow `Function`.
message FunctionSpec {
  // Full arg spec from inspect.getfullargspec().
  StructuredValue fullargspec = 1;
  // Whether this represents a class method.
  bool is_method = 2;
  // The input signature, if specified.
  StructuredValue input_signature = 5;

  reserved 3, 4;
}

// A SavedResource represents a TF object that holds state during its lifetime.
// An object of this type can have a reference to a:
// create_resource() and an initialize() function.
message SavedResource {
  // A device specification indicating a required placement for the resource
  // creation function, e.g. "CPU". An empty string allows the user to select a
  // device.
  string device = 1;
}


================================================
FILE: tensorflow/protos/tensorflow/core/protobuf/saver.proto
================================================
syntax = "proto3";

package tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "SaverProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.util";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";

// Protocol buffer representing the configuration of a Saver.
message SaverDef {
  // The name of the tensor in which to specify the filename when saving or
  // restoring a model checkpoint.
  string filename_tensor_name = 1;

  // The operation to run when saving a model checkpoint.
  string save_tensor_name = 2;

  // The operation to run when restoring a model checkpoint.
  string restore_op_name = 3;

  // Maximum number of checkpoints to keep.  If 0, no checkpoints are deleted.
  int32 max_to_keep = 4;

  // Shard the save files, one per device that has Variable nodes.
  bool sharded = 5;

  // How often to keep an additional checkpoint. If not specified, only the last
  // "max_to_keep" checkpoints are kept; if specified, in addition to keeping
  // the last "max_to_keep" checkpoints, an additional checkpoint will be kept
  // for every n hours of training.
  float keep_checkpoint_every_n_hours = 6;

  // A version number that identifies a different on-disk checkpoint format.
  // Usually, each subclass of BaseSaverBuilder works with a particular
  // version/format.  However, it is possible that the same builder may be
  // upgraded to support a newer checkpoint format in the future.
  enum CheckpointFormatVersion {
    // Internal legacy format.
    LEGACY = 0;
    // Deprecated format: tf.Saver() which works with tensorflow::table::Table.
    V1 = 1;
    // Current format: more efficient.
    V2 = 2;
  }
  CheckpointFormatVersion version = 7;
}


================================================
FILE: tensorflow/protos/tensorflow/core/protobuf/struct.proto
================================================
syntax = "proto3";

package tensorflow;

import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";

// `StructuredValue` represents a dynamically typed value representing various
// data structures that are inspired by Python data structures typically used in
// TensorFlow functions as inputs and outputs.
//
// For example when saving a Layer there may be a `training` argument. If the
// user passes a boolean True/False, that switches between two concrete
// TensorFlow functions. In order to switch between them in the same way after
// loading the SavedModel, we need to represent "True" and "False".
//
// A more advanced example might be a function which takes a list of
// dictionaries mapping from strings to Tensors. In order to map from
// user-specified arguments `[{"a": tf.constant(1.)}, {"q": tf.constant(3.)}]`
// after load to the right saved TensorFlow function, we need to represent the
// nested structure and the strings, recording that we have a trace for anything
// matching `[{"a": tf.TensorSpec(None, tf.float32)}, {"q": tf.TensorSpec([],
// tf.float64)}]` as an example.
//
// Likewise functions may return nested structures of Tensors, for example
// returning a dictionary mapping from strings to Tensors. In order for the
// loaded function to return the same structure we need to serialize it.
//
// This is an ergonomic aid for working with loaded SavedModels, not a promise
// to serialize all possible function signatures. For example we do not expect
// to pickle generic Python objects, and ideally we'd stay language-agnostic.
message StructuredValue {
  // The kind of value.
  oneof kind {
    // Represents None.
    NoneValue none_value = 1;

    // Represents a double-precision floating-point value (a Python `float`).
    double float64_value = 11;
    // Represents a signed integer value, limited to 64 bits.
    // Larger values from Python's arbitrary-precision integers are unsupported.
    sint64 int64_value = 12;
    // Represents a string of Unicode characters stored in a Python `str`.
    // In Python 3, this is exactly what type `str` is.
    // In Python 2, this is the UTF-8 encoding of the characters.
    // For strings with ASCII characters only (as often used in TensorFlow code)
    // there is effectively no difference between the language versions.
    // The obsolescent `unicode` type of Python 2 is not supported here.
    string string_value = 13;
    // Represents a boolean value.
    bool bool_value = 14;

    // Represents a TensorShape.
    tensorflow.TensorShapeProto tensor_shape_value = 31;
    // Represents an enum value for dtype.
    tensorflow.DataType tensor_dtype_value = 32;
    // Represents a value for tf.TensorSpec.
    TensorSpecProto tensor_spec_value = 33;
    // Represents a value for tf.TypeSpec.
    TypeSpecProto type_spec_value = 34;

    // Represents a list of `Value`.
    ListValue list_value = 51;
    // Represents a tuple of `Value`.
    TupleValue tuple_value = 52;
    // Represents a dict `Value`.
    DictValue dict_value = 53;
    // Represents Python's namedtuple.
    NamedTupleValue named_tuple_value = 54;
  }
}

// Represents None.
message NoneValue {}

// Represents a Python list.
message ListValue {
  repeated StructuredValue values = 1;
}

// Represents a Python tuple.
message TupleValue {
  repeated StructuredValue values = 1;
}

// Represents a Python dict keyed by `str`.
// The comment on Unicode from Value.string_value applies analogously.
message DictValue {
  map<string, StructuredValue> fields = 1;
}

// Represents a (key, value) pair.
message PairValue {
  string key = 1;
  StructuredValue value = 2;
}

// Represents Python's namedtuple.
message NamedTupleValue {
  string name = 1;
  repeated PairValue values = 2;
}

// A protobuf to tf.TensorSpec.
message TensorSpecProto {
  string name = 1;
  tensorflow.TensorShapeProto shape = 2;
  tensorflow.DataType dtype = 3;
}

// Represents a tf.TypeSpec
message TypeSpecProto {
  enum TypeSpecClass {
    UNKNOWN = 0;
    SPARSE_TENSOR_SPEC = 1;   // tf.SparseTensorSpec
    INDEXED_SLICES_SPEC = 2;  // tf.IndexedSlicesSpec
    RAGGED_TENSOR_SPEC = 3;   // tf.RaggedTensorSpec
    TENSOR_ARRAY_SPEC = 4;    // tf.TensorArraySpec
    DATA_DATASET_SPEC = 5;    // tf.data.DatasetSpec
    DATA_ITERATOR_SPEC = 6;   // IteratorSpec from data/ops/iterator_ops.py
    OPTIONAL_SPEC = 7;        // tf.OptionalSpec
    PER_REPLICA_SPEC = 8;     // PerReplicaSpec from distribute/values.py
    VARIABLE_SPEC = 9;        // tf.VariableSpec
  }
  TypeSpecClass type_spec_class = 1;

  // The value returned by TypeSpec._serialize().
  StructuredValue type_state = 2;

  // This is currently redundant with the type_spec_class enum, and is only
  // used for error reporting.  In particular, if you use an older binary to
  // load a newer model, and the model uses a TypeSpecClass that the older
  // binary doesn't support, then this lets us display a useful error message.
  string type_spec_class_name = 3;
}


================================================
FILE: tensorflow/protos/tensorflow/core/protobuf/trackable_object_graph.proto
================================================
syntax = "proto3";

option cc_enable_arenas = true;

package tensorflow;

// A TensorBundle addition which saves extra information about the objects which
// own variables, allowing for more robust checkpoint loading into modified
// programs.

message TrackableObjectGraph {
  message TrackableObject {
    message ObjectReference {
      // An index into `TrackableObjectGraph.nodes`, indicating the object
      // being referenced.
      int32 node_id = 1;
      // A user-provided name for the edge.
      string local_name = 2;
    }

    message SerializedTensor {
      // A name for the Tensor. Simple variables have only one
      // `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may
      // be restored on object creation as an optimization.
      string name = 1;
      // The full name of the variable/tensor, if applicable. Used to allow
      // name-based loading of checkpoints which were saved using an
      // object-based API. Should match the checkpoint key which would have been
      // assigned by tf.train.Saver.
      string full_name = 2;
      // The generated name of the Tensor in the checkpoint.
      string checkpoint_key = 3;
      // Whether checkpoints should be considered as matching even without this
      // value restored. Used for non-critical values which don't affect the
      // TensorFlow graph, such as layer configurations.
      bool optional_restore = 4;
    }

    message SlotVariableReference {
      // An index into `TrackableObjectGraph.nodes`, indicating the
      // variable object this slot was created for.
      int32 original_variable_node_id = 1;
      // The name of the slot (e.g. "m"/"v").
      string slot_name = 2;
      // An index into `TrackableObjectGraph.nodes`, indicating the
      // `Object` with the value of the slot variable.
      int32 slot_variable_node_id = 3;
    }

    // Objects which this object depends on.
    repeated ObjectReference children = 1;
    // Serialized data specific to this object.
    repeated SerializedTensor attributes = 2;
    // Slot variables owned by this object.
    repeated SlotVariableReference slot_variables = 3;
  }

  repeated TrackableObject nodes = 1;
}


================================================
FILE: tensorflow/src/conform/mod.rs
================================================
#![allow(unused)]
#![allow(deprecated)]
#![allow(non_snake_case)]

pub mod tf;

use crate::tfpb;
use crate::tfpb::tensorflow::tensor_shape_proto::Dim;
use crate::tfpb::tensorflow::{DataType, TensorProto, TensorShapeProto};
use std::convert::TryInto;
use tract_hir::internal::*;

pub fn placeholder<Shape: Into<Option<TensorShapeProto>>>(
    name: &str,
    t: DataType,
    shape: Shape,
) -> tfpb::tensorflow::NodeDef {
    let mut node = tfpb::node().name(name).op("Placeholder").attr("dtype", t);
    if let Some(shape) = shape.into() {
        node = node.attr("shape", shape)
    }
    node
}

pub fn tensor_shape(dims: &[usize]) -> TensorShapeProto {
    TensorShapeProto {
        dim: dims.iter().map(|&d| Dim { size: d as i64, name: String::new() }).collect(),
        unknown_rank: false,
    }
}

pub fn const_f32(name: &str, t: &Tensor) -> tfpb::tensorflow::NodeDef {
    let tf: TensorProto = t.cast_to::<f32>().unwrap().as_ref().try_into().unwrap();
    tfpb::node().name(name).op("Const").attr("dtype", DataType::DtFloat).attr("value", tf)
}

pub fn placeholder_f32(name: &str) -> tfpb::tensorflow::NodeDef {
    placeholder(name, DataType::DtFloat, None)
}

pub fn const_i32(name: &str, t: &Tensor) -> tfpb::tensorflow::NodeDef {
    let tf: TensorProto = t.cast_to::<i32>().unwrap().as_ref().try_into().unwrap();
    tfpb::node().name(name).op("Const").attr("dtype", DataType::DtInt32).attr("value", tf)
}

pub fn placeholder_i32(name: &str) -> tfpb::tensorflow::NodeDef {
    placeholder(name, DataType::DtInt32, None)
}


================================================
FILE: tensorflow/src/conform/tf.rs
================================================
#![allow(dead_code)]

use std::{fs, path};

use tensorflow as tf;
use tensorflow::DataType;
use tensorflow::FetchToken;
use tensorflow::Graph;
use tensorflow::Session;
use tensorflow::SessionRunArgs;

use tract_hir::internal::*;
use tract_ndarray::prelude::*;

use std::collections::HashMap;
use std::collections::HashSet;

pub struct Tensorflow {
    graph: Graph,
}

pub fn version() -> String {
    tf::version().unwrap()
}

pub fn for_path<P: AsRef<path::Path>>(p: P) -> TractResult<Tensorflow> {
    use std::io::Read;
    let mut model = vec![];
    fs::File::open(p)?.read_to_end(&mut model)?;
    for_slice(&*model)
}

pub fn for_slice(buf: &[u8]) -> TractResult<Tensorflow> {
    let mut graph = Graph::new();
    graph.import_graph_def(buf, &::tensorflow::ImportGraphDefOptions::new())?;
    Ok(Tensorflow { graph })
}

enum TensorHolder {
    Bool(tf::Tensor<bool>),
    F16(tf::Tensor<::tensorflow::BFloat16>),
    F32(tf::Tensor<f32>),
    F64(tf::Tensor<f64>),
    U8(tf::Tensor<u8>),
    U16(tf::Tensor<u16>),
    I8(tf::Tensor<i8>),
    I16(tf::Tensor<i16>),
    I32(tf::Tensor<i32>),
    I64(tf::Tensor<i64>),
    String(tf::Tensor<i8>),
}

impl TensorHolder {
    fn to_tensor<T: ::tensorflow::TensorType + Copy>(m: ArrayD<T>) -> tf::Tensor<T> {
        let dims: Vec<u64> = m.shape().iter().map(|d| *d as _).collect();
        let mut tensor = tf::Tensor::<T>::new(&*dims);
        tensor.copy_from_slice(m.as_slice().unwrap());
        tensor
    }
}

impl From<Tensor> for TensorHolder {
    fn from(m: Tensor) -> TensorHolder {
        match m.datum_type() {
            DatumType::Bool => TensorHolder::Bool(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::F16 => unimplemented!(),
            DatumType::F32 => TensorHolder::F32(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::F64 => TensorHolder::F64(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::I8 => TensorHolder::I8(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::I16 => TensorHolder::I16(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::I32 => TensorHolder::I32(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::I64 => TensorHolder::I64(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::U8 => TensorHolder::U8(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::U16 => TensorHolder::U16(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::U32 => TensorHolder::U16(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::U64 => TensorHolder::U16(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::QU8(_) => TensorHolder::U8(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::QI8(_) => TensorHolder::I8(Self::to_tensor(m.into_plain_array().unwrap())),
            DatumType::QI32(_) => TensorHolder::I32(Self::to_tensor(m.into_plain_array().unwrap())),
            #[cfg(feature = "complex")]
            DatumType::ComplexI16 => unimplemented!(),
            #[cfg(feature = "complex")]
            DatumType::ComplexI32 => unimplemented!(),
            #[cfg(feature = "complex")]
            DatumType::ComplexI64 => unimplemented!(),
            #[cfg(feature = "complex")]
            DatumType::ComplexF16 => unimplemented!(),
            #[cfg(feature = "complex")]
            DatumType::ComplexF32 => unimplemented!(),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => unimplemented!(),
            DatumType::TDim => {
                let dims = m.to_plain_array_view::<TDim>().unwrap();
                if let Ok(dims) = dims.iter().map(|d| d.to_i32()).collect::<TractResult<Vec<_>>>() {
                    TensorHolder::I32(Self::to_tensor(arr1(&dims).into_dyn()))
                } else {
                    panic!("Streaming used in tensorflow settings")
                }
            }
            DatumType::String => {
                TensorHolder::String(Self::to_tensor(m.into_plain_array().unwrap()))
            }
            DatumType::Blob => TensorHolder::String(Self::to_tensor(m.into_plain_array().unwrap())),
            _ => panic!("No support for {:?} DT in tensorflow", m.datum_type()),
        }
    }
}

fn tensor_to_array<T: ::tensorflow::TensorType>(tensor: &tf::Tensor<T>) -> TractResult<ArrayD<T>> {
    let shape: Vec<usize> = tensor.dims().iter().map(|d| *d as _).collect();
    Ok(Array::from(tensor.into_iter().cloned().collect::<Vec<_>>()).into_shape_with_order(shape)?)
}

impl Tensorflow {
    /// Executes the graph in one batch.
    pub fn run(
        &mut self,
        inputs: Vec<(&str, Tensor)>,
        output_name: &str,
    ) -> TractResult<Vec<Tensor>> {
        let tensors: Vec<(&str, TensorHolder)> =
            inputs.into_iter().map(|(name, mat)| (name, mat.into())).collect();

        let mut step = SessionRunArgs::new();
        for t in &tensors {
            let op = self.graph.operation_by_name_required(t.0)?;
            match t.1 {
                TensorHolder::Bool(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::U8(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::U16(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I8(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I16(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I32(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I64(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::F16(_) => unimplemented!(),
                TensorHolder::F32(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::F64(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::String(ref it) => step.add_feed(&op, 0, &it),
            }
        }

        let op = &self.graph.operation_by_name_required(output_name)?;
        let tokens =
            (0..op.num_outputs()).map(|ix| step.request_fetch(&op, ix as i32)).collect::<Vec<_>>();

        let mut session = Session::new(&::tensorflow::SessionOptions::new(), &self.graph)?;
        session.run(&mut step)?;

        tokens
            .into_iter()
            .enumerate()
            .map(|(ix, tok)| {
                let output_type =
                    &self.graph.operation_by_name_required(&output_name)?.output_type(ix);
                convert_output(&mut step, output_type, tok)
            })
            .collect()
    }

    /// Executes the graph in one batch, and returns the output for every node but the inputs.
    pub fn run_get_many<'a>(
        &mut self,
        inputs: Vec<(&'a str, Tensor)>,
        targets: Vec<&'a str>,
    ) -> TractResult<HashMap<&'a str, Vec<Tensor>>> {
        let mut input_pairs: Vec<(&str, TensorHolder)> = Vec::new();
        let mut excluded = HashSet::new();

        for (name, mat) in inputs {
            input_pairs.push((name, mat.into()));
            excluded.insert(name.to_string());
        }

        let mut step = SessionRunArgs::new();
        for t in &input_pairs {
            let op = self.graph.operation_by_name_required(t.0)?;
            match t.1 {
                TensorHolder::Bool(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::U8(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::U16(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I8(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I16(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I32(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::I64(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::F16(_) => unimplemented!(),
                TensorHolder::F32(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::F64(ref it) => step.add_feed(&op, 0, &it),
                TensorHolder::String(ref it) => step.add_feed(&op, 0, &it),
            }
        }

        let mut tokens = HashMap::new();
        trace!("Targets: {:?}", targets);
        for name in targets {
            if excluded.contains(name) {
                continue;
            }

            if let Some(operation) = self.graph.operation_by_name(name)? {
                // switch only computes one of its outputs. tf explodes during
                // the call to run() if we registers them
                if operation.op_type()? == "Switch" {
                    continue;
                }

                // this one pretends to have 5 outputs, but has only one
                if operation.op_type()? == "FusedBatchNorm" {
                    continue;
                }

                let outputs = (0..operation.num_outputs())
                    .map(|ix| step.request_fetch(&operation, ix as i32))
                    .collect::<Vec<_>>();

                tokens.insert(name, outputs);
            }
        }
        trace!("Generated all output tokens");
        trace!("{:?}", tokens);

        // Execute the graph using tensorflow.
        let mut session = Session::new(&::tensorflow::SessionOptions::new(), &self.graph)?;
        session.run(&mut step)?;
        trace!("Tensorflow ran succesfully");

        // Return the output for every node.
        let mut outputs = HashMap::new();
        for (name, tokens) in tokens {
            let tensors = tokens
                .iter()
                .enumerate()
                .map(|(ix, tok)| {
                    let output_type =
                        &self.graph.operation_by_name_required(&name)?.output_type(ix);
                    convert_output(&mut step, output_type, *tok)
                })
                .collect::<TractResult<Vec<_>>>()?;
            outputs.insert(name, tensors);
        }

        Ok(outputs)
    }
}

/// Converts the output of a Tensorflow node into a Tensor.
fn convert_output(
    step: &mut SessionRunArgs,
    output_type: &DataType,
    output: FetchToken,
) -> TractResult<Tensor> {
    macro_rules! convert {
        ($dt:ident) => {
            match step.fetch(output) {
                Err(r) => Err(r)?,
                Ok(output) => tensor_to_array::<$dt>(&output)?.into(),
            }
        };
    };

    let tract_tensor = match output_type {
        DataType::Bool => convert!(bool),
        DataType::Float => convert!(f32),
        DataType::Double => convert!(f64),
        DataType::UInt8 => convert!(u8),
        DataType::Int8 => convert!(i8),
        DataType::Int32 => convert!(i32),
        DataType::Int64 => convert!(i64),
        t => bail!("Missing conversion for tensorflow to tract (type: {:?})", t),
    };

    Ok(tract_tensor)
}


================================================
FILE: tensorflow/src/lib.rs
================================================
#![allow(clippy::len_zero)]
//! # Tract TensorFlow module
//!
//! Tiny, no-nonsense, self contained, portable inference.
//!
//! ## Example
//!
//! ```
//! # extern crate tract_tensorflow;
//! # fn main() {
//! use tract_tensorflow::prelude::*;
//!
//! // build a simple model that just add 3 to each input component
//! let tf = tensorflow();
//! let mut model = tf.model_for_path("tests/models/plus3.pb").unwrap();
//!
//! // set input input type and shape, then optimize the network.
//! model.set_input_fact(0, f32::fact(&[3]).into()).unwrap();
//! let model = model.into_optimized().unwrap();
//!
//! // we build an execution plan. default input and output are inferred from
//! // the model graph
//! let plan = SimplePlan::new(&model).unwrap();
//!
//! // run the computation.
//! let input = tensor1(&[1.0f32, 2.5, 5.0]);
//! let mut outputs = plan.run(tvec![input]).unwrap();
//!
//! // take the first and only output tensor
//! let mut tensor = outputs.pop().unwrap();
//!
//! assert_eq!(tensor, rctensor1(&[4.0f32, 5.5, 8.0]));
//! # }
//! ```
//!

#[macro_use]
extern crate derive_new;
#[allow(unused_imports)]
#[macro_use]
extern crate log;
#[cfg(test)]
extern crate env_logger;
extern crate prost;
extern crate prost_types;
#[cfg(feature = "conform")]
extern crate tensorflow;
pub extern crate tract_hir;

#[cfg(feature = "conform")]
pub mod conform;

pub mod model;
pub mod ops;
pub mod tensor;
pub mod tfpb;

pub use model::Tensorflow;

pub fn tensorflow() -> Tensorflow {
    let mut ops = crate::model::TfOpRegister::default();
    ops::register_all_ops(&mut ops);
    Tensorflow { op_register: ops }
}

pub use tract_hir::tract_core;
pub mod prelude {
    pub use crate::tensorflow;
    pub use tract_hir::prelude::*;
    pub use tract_hir::tract_core;
}

#[cfg(test)]
#[allow(dead_code)]
pub fn setup_test_logger() {
    env_logger::Builder::from_default_env().filter_level(log::LevelFilter::Trace).init();
}


================================================
FILE: tensorflow/src/model.rs
================================================
use crate::tfpb::tensorflow::{GraphDef, NodeDef, SavedModel};
use prost::Message;
use std::{fs, path};
use tract_hir::internal::*;

#[derive(Default)]
pub struct ParsingContext {
    pub node_output_arities: HashMap<String, usize>,
}

type OpBuilder = fn(&ParsingContext, node: &NodeDef) -> TractResult<Box<dyn InferenceOp>>;

#[derive(Clone, Default)]
pub struct TfOpRegister(pub HashMap<String, OpBuilder>);

impl TfOpRegister {
    pub fn insert(&mut self, s: &'static str, builder: OpBuilder) {
        self.0.insert(s.into(), builder);
    }
}

pub struct Tensorflow {
    pub op_register: TfOpRegister,
}

pub struct TfModelExtensions {
    pub control_inputs: Vec<(usize, usize)>,
}

impl TfModelExtensions {
    pub fn preproc(&self, original: InferenceModel) -> TractResult<InferenceModel> {
        Ok(original)
    }
}

pub struct TfModelAndExtensions(pub InferenceModel, pub TfModelExtensions);

impl Tensorflow {
    // From the node_def.proto documentation:
    // Each input is "node:src_output" with "node" being a string name and
    // "src_output" indicating which output tensor to use from "node". If
    // "src_output" is 0 the ":0" suffix can be omitted. Regular inputs may
    // optionally be followed by control inputs that have the format "^node".
    fn parse_input(i: &str) -> TractResult<(&str, usize)> {
        let pair = if let Some(stripped) = i.strip_prefix('^') {
            (stripped, 0)
        } else {
            let splits: Vec<_> = i.splitn(2, ':').collect();
            (splits[0], if splits.len() > 1 { splits[1].parse::<usize>()? } else { 0 })
        };
        Ok(pair)
    }

    pub fn determinize(model: &mut GraphDef) -> TractResult<()> {
        for pbnode in &mut model.node {
            if pbnode.op == "RandomUniform"
                && pbnode.get_attr_int::<i64>("seed")? == 0
                && pbnode.get_attr_int::<i64>("seed2")? == 0
            {
                pbnode.attr.insert("seed".to_string(), 1.into());
                pbnode.attr.insert("seed2".to_string(), 1.into());
            }
        }
        Ok(())
    }

    #[cfg(target_family = "wasm")]
    pub fn read_frozen_from_path(&self, p: impl AsRef<path::Path>) -> TractResult<GraphDef> {
        use std::io::Read;
        let mut file = fs::File::open(p)?;
        let mut v = Vec::with_capacity(file.metadata()?.len() as usize);
        file.read_to_end(&mut v)?;
        let b = bytes::Bytes::from(v);
        Ok(GraphDef::decode(b)?)
    }

    #[cfg(all(any(windows, unix), not(target_os = "emscripten")))]
    pub fn read_frozen_from_path(&self, p: impl AsRef<path::Path>) -> TractResult<GraphDef> {
        let map = unsafe { memmap2::Mmap::map(&fs::File::open(p)?)? };
        Ok(GraphDef::decode(&*map)?)
    }

    pub fn read_frozen_model(&self, r: &mut dyn std::io::Read) -> TractResult<GraphDef> {
        let mut v = vec![];
        r.read_to_end(&mut v)?;
        let b = bytes::Bytes::from(v);
        Ok(GraphDef::decode(b)?)
    }

    pub fn open_saved_model(&self, r: &mut dyn std::io::Read) -> TractResult<SavedModel> {
        let mut v = vec![];
        r.read_to_end(&mut v)?;
        let b = bytes::Bytes::from(v);
        Ok(SavedModel::decode(b)?)
    }

    /// Convenience method: will read the first model in the saved model
    /// container. Use open_avec_model for more control.
    pub fn read_saved_model(&self, r: &mut dyn std::io::Read) -> TractResult<GraphDef> {
        let mut saved = self.open_saved_model(r)?;
        Ok(saved.meta_graphs.remove(0).graph_def.unwrap())
    }

    pub fn parse_graph(&self, graph: &GraphDef) -> TractResult<TfModelAndExtensions> {
        self.parse_graph_with_template(graph, Default::default())
    }

    pub fn parse_graph_with_template(
        &self,
        graph: &GraphDef,
        mut model: InferenceModel,
    ) -> TractResult<TfModelAndExtensions> {
        use crate::ops::control_flow as cf;

        let mut inputs = tvec!();
        let mut context = ParsingContext::default();
        let mut control_inputs = vec![];

        // compute min output arity for all nodes
        for pbnode in &graph.node {
            for i in &pbnode.input {
                let (node, slot) = Self::parse_input(i)?;
                let arity = context.node_output_arities.entry(node.to_string()).or_insert(1);
                *arity = (*arity).max(slot + 1);
            }
        }

        for pbnode in &graph.node {
            let name = &pbnode.name;

            if pbnode.op == "NextIteration" {
                let source_op = cf::NextIteration::new(name.clone(), cf::NextIterationRole::Source);
                let sink_op = cf::NextIteration::new(name.clone(), cf::NextIterationRole::Sink);
                let _source =
                    model.add_node(name.clone(), source_op, tvec!(InferenceFact::default()))?;
                let _sink = model.add_node(format!("{name}-Sink"), sink_op, tvec!())?;
                continue;
            }

            let op = match self.op_register.0.get(&pbnode.op) {
                Some(builder) => (builder)(&context, pbnode)?,
                None => tract_hir::ops::unimpl::UnimplementedOp::new(
                    context.node_output_arities.get(name).cloned().unwrap_or(1),
                    &pbnode.op,
                    format!("{pbnode:?}"),
                )
                .into(),
            };

            let noutputs =
                op.nboutputs()?.max(context.node_output_arities.get(name).cloned().unwrap_or(1));
            let facts = tvec!(InferenceFact::default(); noutputs);

            let node_id = model.add_node(name.clone(), op, facts)?;
            if pbnode.op == "Placeholder" {
                let dt = pbnode.get_attr_datum_type("dtype")?;
                let mut fact = InferenceFact::dt(dt);
                if let Some(shape) = pbnode.get_attr_opt_shape("shape")? {
                    let shape_factoid = ShapeFactoid::closed(
                        shape
                            .iter()
                            .map(|d| {
                                if *d == -1 {
                                    GenericFactoid::Any
                                } else {
                                    GenericFactoid::Only(d.to_dim())
                                }
                            })
                            .collect(),
                    );
                    fact = fact.with_shape(shape_factoid);
                }
                inputs.push(OutletId::new(node_id, 0));
                model.set_outlet_fact(OutletId::new(node_id, 0), fact)?;
            }
        }

        for pbnode in &graph.node {
            let node_id = if pbnode.op == "NextIteration" {
                model.node_by_name(&*format!("{}-Sink", &pbnode.name))?.id
            } else {
                model.node_by_name(&pbnode.name)?.id
            };
            for (ix, i) in pbnode.input.iter().filter(|n| !n.starts_with('^')).enumerate() {
                let input = Self::parse_input(i)?;
                let prec = model.node_by_name(input.0)?.id;
                let outlet = OutletId::new(prec, input.1);
                let inlet = InletId::new(node_id, ix);
                model.add_edge(outlet, inlet)?;
                model.set_outlet_label(outlet, i.to_string())?;
            }
            for i in pbnode.input.iter().filter(|n| n.starts_with('^')) {
                let input = Self::parse_input(i)?;
                let prec = model.node_by_name(input.0)?.id;
                control_inputs.push((model.node_id_by_name(&pbnode.name)?, prec));
            }
        }

        // variable -> assign rewire
        //  * Assign consumes this by_ref tensor on #0 and somehow performs
        //      updates on it (it has a second input on #1 for the value to
        //      assign)
        //
        // in tract:
        //  * VariableV2 outputs a regular tensor stored in the session state
        //  * Assign has the same inputs, but do not uses the #0, udating the
        //      state session instead
        //
        // 2026-01: remove vars support in tract core
        //
        // for id in 0..model.nodes().len() {
        //     use crate::ops::vars::*;
        //     if model.node(id).op_is::<Assign>() {
        //         let prec = model.node(id).inputs[0];
        //         let var_id = model.node(prec.node).op_as::<VariableV2>().map(|v| v.id.clone());
        //         if let (Some(var_id), Some(assign)) =
        //             (var_id, model.node_mut(id).op_as_mut::<Assign>())
        //         {
        //             assign.var_id = Some(var_id);
        //         } else {
        //             bail!("Model contains unlinked Assign/Variable2");
        //         }
        //     }
        // }
        model.set_input_outlets(&inputs)?;
        model.auto_outputs()?;
        let extensions = TfModelExtensions { control_inputs };
        Ok(TfModelAndExtensions(model, extensions))
    }
}

impl Framework<GraphDef, InferenceModel> for Tensorflow {
    /// This method will try to read as frozen model, then as a saved model.
    fn proto_model_for_path(&self, r: impl AsRef<path::Path>) -> TractResult<GraphDef> {
        self.read_frozen_model(&mut fs::File::open(r.as_ref())?)
            .or_else(|_| self.read_saved_model(&mut fs::File::open(r.as_ref())?))
    }

    /// This method expects a frozen model, use open_saved_model for TF2 saved
    /// model format.
    fn proto_model_for_read(&self, r: &mut dyn std::io::Read) -> TractResult<GraphDef> {
        self.read_frozen_model(r)
    }

    fn model_for_proto_model_with_model_template(
        &self,
        proto: &GraphDef,
        template: InferenceModel,
    ) -> TractResult<InferenceModel> {
        Ok(self.parse_graph_with_template(proto, template)?.0)
    }
}


================================================
FILE: tensorflow/src/ops/array/concatv2.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;
use tract_hir::ops::array::TypedConcat;

pub fn build(_ctx: &ParsingContext, _pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    Ok(expand(ConcatV2))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct ConcatV2;

impl Expansion for ConcatV2 {
    fn name(&self) -> StaticName {
        "ConcatV2".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(outputs, 1)?;
        let n = inputs.len() - 1;
        s.equals_all((0..n).map(|i| (&inputs[i].datum_type).bex()).collect())?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&inputs[n].datum_type, DatumType::I32)?;
        s.equals_all((0..n).map(|i| (&inputs[i].rank).bex()).collect())?;
        s.equals(&inputs[n].rank, 0)?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        s.given(&inputs[n].value, move |s, axis| {
            let axis = *axis.try_as_plain()?.to_scalar::<i32>()? as usize;
            trace!("axis for ConcatV2: {axis}");
            for d in 0..axis {
                s.equals_all((0..n).map(|i| (&inputs[i].shape[d]).bex()).collect())?;
            }
            for d in 0..axis {
                s.equals(&inputs[0].shape[d], &outputs[0].shape[d])?;
            }
            s.given(&inputs[0].rank, move |s, rank| {
                trace!("Given rank {rank}");
                for d in (axis + 1)..(rank as usize) {
                    s.equals(&inputs[0].shape[d], &outputs[0].shape[d])?;
                }
                for d in (axis + 1)..(rank as usize) {
                    s.equals_all((0..n).map(|i| (&inputs[i].shape[d]).bex()).collect())?;
                }
                Ok(())
            })?;

            let mut concat_dim = -1 * outputs[0].shape[axis].bex();
            for input in inputs.iter().take(n) {
                concat_dim = concat_dim + input.shape[axis].bex();
            }
            s.equals_zero(concat_dim)
        })
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(ref axis) = model.outlet_fact(*inputs.last().unwrap())?.konst {
            let axis = *axis.try_as_plain()?.to_scalar::<i32>()? as usize;
            let inputs = inputs.iter().copied().rev().skip(1).rev().collect::<TVec<_>>();
            model.wire_node(prefix, TypedConcat::new(axis), &inputs)
        } else {
            bail!("Except axis to be a constant")
        }
    }
}


================================================
FILE: tensorflow/src/ops/array/expand_dims.rs
================================================
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

pub fn build(_ctx: &ParsingContext, _pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    Ok(expand(ExpandDims))
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ExpandDims;

impl Expansion for ExpandDims {
    fn name(&self) -> StaticName {
        "ExpandDims".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        let data = &inputs[0];
        let dims = &inputs[1];
        let output = &outputs[0];

        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&dims.datum_type, DatumType::I32)?;
        s.equals(&data.datum_type, &output.datum_type)?;
        s.equals(data.rank.bex() + 1, &output.rank)?;
        s.given_2(&dims.value, &data.rank, move |s, index, rank| {
            let mut index = index.cast_to_scalar::<i64>()?;
            if index < 0 {
                index += rank + 1
            }
            let index = index as usize;

            for i in 0..index {
                s.equals(&output.shape[i], &data.shape[i])?;
            }

            s.equals(output.shape[index].bex(), 1i64.to_dim().bex())?;

            s.given(&data.rank, move |s, rank| {
                for i in index..(rank as usize) {
                    s.equals(&output.shape[i + 1], &data.shape[i])?;
                }
                Ok(())
            })
        })
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(ref axes) = target.outlet_fact(inputs[1])?.konst {
            let mut axes = axes
                .cast_to::<i32>()?
                .try_as_plain()?
                .as_slice::<i32>()?
                .iter()
                .map(|&axis| {
                    Ok(if axis < 0 {
                        axis + target.outlet_fact(inputs[0])?.shape.rank() as i32
                    } else {
                        axis
                    })
                })
                .collect::<TractResult<Vec<_>>>()?;
            axes.sort();
            let mut wire = inputs[0];
            for axis in axes.iter().rev() {
                wire = target.wire_node(
                    format!("{prefix}.axis-{axis}"),
                    AxisOp::Add(*axis as _),
                    &[wire],
                )?[0];
            }
            Ok(tvec!(wire))
        } else {
            bail!("Need axes to be const")
        }
    }
}


================================================
FILE: tensorflow/src/ops/array/fill.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Fill {
    dt: DatumType,
}

pub fn fill(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let dtype = pb.get_attr_datum_type("T")?;
    Ok(Box::new(Fill::new(dtype)))
}

impl Fill {
    fn eval_t<T: Datum>(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (shape, value) = args_2!(inputs);
        let value = value.try_as_plain()?.to_scalar::<T>()?;
        let shape = shape.cast_to::<i32>()?;
        let shape = shape.to_plain_array_view::<i32>()?;
        let array = tract_ndarray::Array::from_shape_fn(
            shape.iter().map(|i| *i as usize).collect::<Vec<usize>>(),
            |_| value.clone(),
        );
        Ok(tvec![array.into_tvalue()])
    }
}

impl Op for Fill {
    fn name(&self) -> StaticName {
        "Fill".into()
    }

    not_a_typed_op!();
}

impl EvalOp for Fill {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        dispatch_datum!(Self::eval_t(self.dt)(self, inputs))
    }
}

impl InferenceRulesOp for Fill {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, self.dt)?;
        s.equals(&inputs[1].datum_type, self.dt)?;
        s.equals(&inputs[0].rank, 1)?;
        s.equals(&inputs[1].rank, 0)?;
        s.equals(outputs[0].rank.bex().to_dim(), &inputs[0].shape[0])?;
        s.given(&outputs[0].rank, move |s, rank| {
            for dim in 0..(rank as usize) {
                s.equals(&outputs[0].shape[dim], inputs[0].value[dim].bex().to_dim())?;
            }
            Ok(())
        })
    }

    as_op!();

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let (Some(shape), Some(value)) = (
            target.outlet_fact(mapping[&node.inputs[0]])?.konst.as_ref(),
            target.outlet_fact(mapping[&node.inputs[1]])?.konst.as_ref(),
        ) {
            let mut value = dispatch_datum!(Self::eval_t(value.datum_type())(
                self,
                tvec!(shape.clone().into_tvalue(), value.clone().into_tvalue())
            ))?;
            let id = target.add_const(&*node.name, value.remove(0))?;
            Ok(tvec!(id))
        } else {
            bail!("Can not type Fill op")
        }
    }
}


================================================
FILE: tensorflow/src/ops/array/gather_nd.rs
================================================
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

use tract_hir::ops::array::GatherNd;

pub fn gather_nd(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let batch_dims = pb.get_attr_opt_int("batch_dims")?.unwrap_or(0);
    Ok(Box::new(GatherNd::new(batch_dims)))
}

#[cfg(test)]
mod tests {
    use super::*;

    // https://www.tensorflow.org/api_docs/python/tf/gather_nd
    #[test]
    fn simple_indexing() {
        let g = GatherNd::new(0);
        assert_eq!(
            g.eval(tvec!(tensor2(&[[1, 2], [3, 4]]).into(), tensor2(&[[0, 0], [1, 1]]).into()))
                .unwrap(),
            tvec!(tensor1(&[1, 4]).into())
        );
    }

    #[test]
    fn slice_indexing() {
        let g = GatherNd::new(0);
        assert_eq!(
            g.eval(tvec!(tensor2(&[[1, 2], [3, 4]]).into(), tensor2(&[[1], [0]]).into())).unwrap(),
            tvec!(tensor2(&[[3, 4], [1, 2]]).into())
        );
    }

    #[test]
    fn tensor_3d_1() {
        let g = GatherNd::new(0);
        let t = tensor3(&[[[10, 20], [30, 40]], [[11, 21], [31, 41]]]);
        assert_eq!(
            g.eval(tvec!(t.into(), tensor2(&[[1]]).into())).unwrap(),
            tvec!(tensor3(&[[[11, 21], [31, 41]]]).into())
        );
    }

    #[test]
    fn tensor_3d_2() {
        let g = GatherNd::new(0);
        let t = tensor3(&[[[10, 20], [30, 40]], [[11, 21], [31, 41]]]);
        assert_eq!(
            g.eval(tvec!(t.into(), tensor2(&[[0, 1], [1, 0]]).into())).unwrap(),
            tvec!(tensor2(&[[30, 40], [11, 21]]).into())
        );
    }

    #[test]
    fn tensor_3d_3() {
        let g = GatherNd::new(0);
        let t = tensor3(&[[[10, 20], [30, 40]], [[11, 21], [31, 41]]]);
        assert_eq!(
            g.eval(tvec!(t.into(), tensor2(&[[0, 0, 1], [1, 0, 1]]).into())).unwrap(),
            tvec!(tensor1(&[20, 21]).into())
        );
    }

    #[test]
    fn tensor_bd1_1() {
        let g = GatherNd::new(1);
        let t = tensor3(&[[[10, 20], [30, 40]], [[11, 21], [31, 41]]]);
        assert_eq!(
            g.eval(tvec!(t.into(), tensor2(&[[1], [0]]).into())).unwrap(),
            tvec!(tensor2(&[[30, 40], [11, 21]]).into())
        );
    }

    #[test]
    fn tensor_bd1_2() {
        let g = GatherNd::new(1);
        let t = tensor3(&[[[10, 20], [30, 40]], [[11, 21], [31, 41]]]);
        assert_eq!(
            g.eval(tvec!(t.into(), tensor3(&[[[1]], [[0]]]).into())).unwrap(),
            tvec!(tensor3(&[[[30, 40]], [[11, 21]]]).into())
        );
    }

    #[test]
    fn tensor_bd1_3() {
        let g = GatherNd::new(1);
        let t = tensor3(&[[[10, 20], [30, 40]], [[11, 21], [31, 41]]]);
        assert_eq!(
            g.eval(tvec!(t.into(), tensor3(&[[[1, 0]], [[0, 1]]]).into())).unwrap(),
            tvec!(tensor2(&[[30], [21]]).into())
        );
    }
}


================================================
FILE: tensorflow/src/ops/array/gather_v2.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;

pub fn gather_v2(_ctx: &ParsingContext, _pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    Ok(expand(GatherV2))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct GatherV2;

impl Expansion for GatherV2 {
    fn name(&self) -> StaticName {
        "GatherV2".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[1].datum_type, i32::datum_type())?;
        s.equals(&inputs[2].datum_type, i32::datum_type())?;
        s.equals(&inputs[2].rank, 0)?;
        s.given_3(
            &inputs[0].shape,
            &inputs[1].shape,
            &inputs[2].value,
            move |s, input_shape, indices_shape, axis| {
                let axis = axis.cast_to_scalar::<i64>()?;
                let op = tract_hir::ops::array::Gather::new(axis);
                let output_shape = op
                    .to_type_op(input_shape.len())
                    .compute_output_shape(&input_shape, &indices_shape)?;
                s.equals(&outputs[0].shape, output_shape)
            },
        )
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(axis) = target.outlet_fact(inputs[2])?.konst.as_ref() {
            let axis = axis.cast_to_scalar::<i64>()?;
            let input_fact = target.outlet_fact(inputs[0])?;
            let op = tract_hir::ops::array::Gather::new(axis).to_type_op(input_fact.rank());
            target.wire_node(prefix, op, &inputs[0..2])
        } else {
            bail!("Need to know axis to type GatherV2")
        }
    }
}


================================================
FILE: tensorflow/src/ops/array/mod.rs
================================================
use crate::model::TfOpRegister;
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_core::ops::array::StridedSlice;

mod concatv2;
mod expand_dims;
mod fill;
mod gather_nd;
mod gather_v2;
mod pack;
mod pad;
mod squeeze;
mod transpose;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("ConcatV2", concatv2::build);
    reg.insert("ExpandDims", expand_dims::build);
    reg.insert("Fill", fill::fill);
    reg.insert("GatherNd", gather_nd::gather_nd);
    reg.insert("GatherV2", gather_v2::gather_v2);
    reg.insert("Pack", pack::pack);
    reg.insert("Pad", pad::pad);
    reg.insert("Range", |_, _| Ok(expand(tract_hir::ops::array::Range)));
    reg.insert("Reshape", |_, _| Ok(expand(tract_hir::ops::array::Reshape::new())));
    reg.insert("Shape", |_, _| Ok(expand(tract_hir::ops::array::Shape::new(DatumType::TDim))));
    reg.insert("Slice", slice);
    reg.insert("Squeeze", squeeze::squeeze);
    reg.insert("StridedSlice", strided_slice);
    reg.insert("Tile", |_, _| Ok(expand(::tract_hir::ops::array::Tile)));
    reg.insert("Transpose", transpose::transpose);
}

fn strided_slice(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let begin_mask = pb.get_attr_opt_int("begin_mask")?.unwrap_or(0);
    let end_mask = pb.get_attr_opt_int("end_mask")?.unwrap_or(0);
    let shrink_axis_mask = pb.get_attr_opt_int("shrink_axis_mask")?.unwrap_or(0);
    Ok(Box::new(StridedSlice {
        begin_mask,
        end_mask,
        shrink_axis_mask,
        optional_axes_input: None,
        optional_steps_input: Some(3),
    }))
}

fn slice(_ctx: &ParsingContext, _pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    Ok(Box::new(StridedSlice {
        optional_axes_input: None,
        optional_steps_input: None,
        begin_mask: 0,
        end_mask: 0,
        shrink_axis_mask: 0,
    }))
}


================================================
FILE: tensorflow/src/ops/array/pack.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::array::TypedConcat;
use tract_hir::ops::binary::wire_cast;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

pub fn pack(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let n = pb.input.len();
    let axis = pb.get_attr_int("axis")?;

    Ok(expand(Pack::new(n, axis)))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Pack {
    n: usize, // The number of inputs
    axis: usize,
}

impl Expansion for Pack {
    fn name(&self) -> StaticName {
        "Pack".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        let axis = self.axis;
        check_input_arity(inputs, self.n)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].rank, inputs[0].rank.bex() + 1)?;
        s.equals_all((0..self.n).map(|i| inputs[i].rank.bex()).collect())?;
        s.given_all((0..self.n).map(move |i| &inputs[i].datum_type), move |s, dts| {
            if let Some(dt) = DatumType::super_type_for(dts) {
                s.equals(&outputs[0].datum_type, dt)?;
            }
            Ok(())
        })?;
        s.given(&inputs[0].rank, move |s, r| {
            for d in 0..r as usize {
                s.equals_all((0..self.n).map(|i| inputs[i].shape[d].bex()).collect())?;
            }
            Ok(())
        })?;
        s.given(&inputs[0].rank, move |s, r| {
            for d in 0..axis {
                s.equals(&outputs[0].shape[d], &inputs[0].shape[d])?;
            }
            if r > 0 {
                for d in axis..r as usize {
                    s.equals(&outputs[0].shape[d + 1], &inputs[0].shape[d])?
                }
            }
            Ok(())
        })?;
        s.equals(&outputs[0].shape[axis], self.n.to_dim())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let dt = inputs
            .iter()
            .map(|&i| Ok(model.outlet_fact(i)?.datum_type))
            .collect::<TractResult<TVec<DatumType>>>()?;
        let dt = DatumType::super_type_for(dt.iter()).context("No supertype")?;
        let wires = wire_cast(prefix, model, inputs, dt)?;
        let inputs: TVec<OutletId> = wires
            .iter()
            .enumerate()
            .map(|(ix, &o)| {
                Ok(model.wire_node(
                    format!("{prefix}.add_dims-{ix}"),
                    AxisOp::Add(self.axis),
                    &[o],
                )?[0])
            })
            .collect::<TractResult<TVec<OutletId>>>()?;
        model.wire_node(prefix, TypedConcat::new(self.axis), &inputs)
    }
}


================================================
FILE: tensorflow/src/ops/array/pad.rs
================================================
use tract_hir::internal::*;
use tract_ndarray::{Array, ArrayView2};

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

#[derive(Debug, Clone, Default, new, Hash, PartialEq, Eq)]
pub struct Pad;

pub fn pad(_ctx: &ParsingContext, _pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    Ok(Box::<Pad>::default())
}

impl Pad {
    fn compute_t<T: Datum + Default + Copy>(
        input: &Tensor,
        paddings: ArrayView2<i32>,
        stream_dim: Option<usize>,
    ) -> TractResult<TValue> {
        let shape: Vec<usize> = input
            .shape()
            .iter()
            .enumerate()
            .map(|(ix, &dim)| {
                if Some(ix) != stream_dim {
                    dim + (paddings[(ix, 0)] + paddings[(ix, 1)]) as usize
                } else {
                    dim
                }
            })
            .collect();
        let mut index_in_input = vec![0; input.rank()];
        let input = input.to_plain_array_view::<T>()?;
        let result = Array::from_shape_fn(shape, |index| {
            for i in 0..input.ndim() {
                if index[i] < paddings[(i, 0)] as usize
                    || index[i] - paddings[(i, 0)] as usize >= input.shape()[i]
                {
                    return T::default();
                } else {
                    index_in_input[i] = index[i] - paddings[(i, 0)] as usize;
                };
            }
            input[&*index_in_input]
        });
        Ok(result.into_tvalue())
    }
}

impl Op for Pad {
    fn name(&self) -> StaticName {
        "Pad".into()
    }

    not_a_typed_op!();
}

impl EvalOp for Pad {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, paddings) = args_2!(inputs);
        let paddings = paddings.to_plain_array_view::<i32>()?.into_dimensionality()?;
        Ok(tvec![dispatch_copy!(Self::compute_t(input.datum_type())(&input, paddings, None))?])
    }
}

impl InferenceRulesOp for Pad {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        let input = &inputs[0];
        let padding = &inputs[1];
        let output = &outputs[0];
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&output.datum_type, &input.datum_type)?;
        s.equals(&padding.datum_type, DatumType::TDim)?;
        s.equals(&input.rank, &output.rank)?;
        s.equals(&padding.rank, 2)?;
        s.equals(&padding.shape[0], input.rank.bex().to_dim())?;
        s.equals(&padding.shape[1], 2.to_dim())?;
        s.given(&input.rank, move |s, rank| {
            for d in 0..rank as usize {
                s.equals(
                    &output.shape[d],
                    input.shape[d].bex()
                        + padding.value[d][0].bex().to_dim()
                        + padding.value[d][1].bex().to_dim(),
                )?
            }
            Ok(())
        })
    }

    as_op!();
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn pad_0() {
        let inputs =
            tvec![tensor2(&[[1, 2, 3], [4, 5, 6]]).into(), tensor2(&[[1, 1], [2, 2]]).into(),];

        let expected: TVec<_> = tvec!(
            tensor2(&[
                [0, 0, 0, 0, 0, 0, 0],
                [0, 0, 1, 2, 3, 0, 0],
                [0, 0, 4, 5, 6, 0, 0],
                [0, 0, 0, 0, 0, 0, 0],
            ])
            .into()
        );

        assert_eq!(Pad::new().eval(inputs).unwrap(), expected);
    }
}


================================================
FILE: tensorflow/src/ops/array/squeeze.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;
use tract_hir::ops::array::Squeeze;

pub fn squeeze(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let squeeze_dims = pb.get_attr_opt_list_int("squeeze_dims")?;
    if let Some(mut squeeze_dims) = squeeze_dims {
        if squeeze_dims.len() > 0 {
            squeeze_dims.sort();
            return Ok(expand(Squeeze::new(Some(squeeze_dims))));
        }
    }
    Ok(expand(Squeeze::default()))
}

#[cfg(test)]
mod tests {
    #![allow(non_snake_case)]
    use super::*;
    use tract_ndarray::Array;

    fn run<I>(op: Squeeze, input: I) -> Tensor
    where
        I: Into<Tensor>,
    {
        expand(op).eval(tvec![input.into().into()]).unwrap().pop().unwrap().into_tensor()
    }

    #[test]
    fn squeeze_1() {
        assert_eq!(
            run(Squeeze::new(None), Array::from_elem([1, 2, 1, 3, 1, 1], 0)).shape(),
            &[2, 3]
        );
    }

    #[test]
    fn squeeze_2() {
        assert_eq!(
            run(Squeeze::new(Some(vec![2, 4])), Array::from_elem([1, 2, 1, 3, 1, 1], 0)).shape(),
            &[1, 2, 3, 1]
        );
    }
}


================================================
FILE: tensorflow/src/ops/array/transpose.rs
================================================
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Transpose {
    t: DatumType,
    t_perm: DatumType,
}

pub fn transpose(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let t = pb.get_attr_datum_type("T")?;
    let t_perm = pb.get_attr_datum_type("Tperm")?;
    Ok(expand(Transpose::new(t, t_perm)))
}

impl Transpose {
    fn compute_shape<D: DimLike>(shape: &[D], perm: &[i32]) -> TVec<D> {
        let mut new_shape = tvec![D::zero(); shape.len()];
        for (ix, &d) in perm.iter().enumerate() {
            new_shape[ix] = shape[d as usize].clone();
        }
        new_shape
    }
}

impl Expansion for Transpose {
    fn name(&self) -> StaticName {
        "Transpose".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_output_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, self.t)?;
        s.equals(&inputs[1].datum_type, self.t_perm)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        s.equals(&outputs[0].rank, &inputs[0].rank)?;
        s.equals(&inputs[1].rank, 1)?;
        s.equals(&inputs[1].shape[0], inputs[0].rank.bex().to_dim())?;
        s.given_2(&inputs[0].shape, &inputs[1].value, move |s, shape, perm| {
            let perm = perm.cast_to::<i32>()?;
            let output_shape = Self::compute_shape(&shape, perm.try_as_plain()?.as_slice::<i32>()?);
            s.equals(&outputs[0].shape, output_shape)
        })
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let Some(axes) = &target.outlet_fact(inputs[1])?.konst {
            let axes: TVec<usize> = axes
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|i| *i as usize)
                .collect();
            let mut wire = tvec!(inputs[0]);
            for pair in tract_hir::tract_core::ops::change_axes::perm_to_ops(&axes) {
                wire = target.wire_node(format!("{prefix}.{pair:?}"), pair, &wire)?;
            }
            Ok(wire)
        } else {
            bail!("Expect permutation input to be const")
        }
    }
}


================================================
FILE: tensorflow/src/ops/control_flow.rs
================================================
use tract_hir::internal::*;

use crate::model::TfOpRegister;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("Enter", |_, node| {
        Ok(Box::new(LoopGate(LoopGateRole::Enter(node.get_attr_str("frame_name")?))))
    });
    reg.insert("Exit", |_, _| Ok(Box::new(LoopGate(LoopGateRole::Exit))));
    reg.insert("LoopCond", |_, _| Ok(Box::new(LoopGate(LoopGateRole::LoopCond))));
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum LoopGateRole {
    Enter(String),
    Exit,
    LoopCond,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct LoopGate(LoopGateRole);

impl Op for LoopGate {
    fn name(&self) -> StaticName {
        format!("{:?}", self.0).into()
    }

    not_a_typed_op!();
}

impl EvalOp for LoopGate {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(inputs)
    }
}

impl InferenceRulesOp for LoopGate {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    as_op!();
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum NextIterationRole {
    Source,
    Sink,
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct NextIteration {
    name: String,
    role: NextIterationRole,
}

impl Op for NextIteration {
    fn name(&self) -> StaticName {
        format!("{:?}({})", self.role, self.name).into()
    }

    not_a_typed_op!();
}

impl EvalOp for NextIteration {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(&self, _state: &TurnState, _id: usize) -> TractResult<Option<Box<dyn OpState>>> {
        unimplemented!();
    }
}

impl InferenceRulesOp for NextIteration {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        _s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        match self.role {
            NextIterationRole::Source => {
                check_input_arity(inputs, 0)?;
                check_output_arity(outputs, 1)?;
            }
            NextIterationRole::Sink => {
                check_input_arity(inputs, 1)?;
                check_output_arity(outputs, 0)?;
            }
        }
        Ok(())
    }

    as_op!();
}


================================================
FILE: tensorflow/src/ops/logic.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::ops::binary::BinIntoHir;
use tract_hir::ops::logic::{CompEq, CompGT, CompGTE, CompLT, CompLTE};

use crate::model::ParsingContext;
use crate::model::TfOpRegister;
use crate::tfpb::tensorflow::NodeDef;
use std::collections::HashSet;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("Equal", |_, _| Ok(CompEq.into_hir()));
    reg.insert("Greater", |_, _| Ok(CompGT.into_hir()));
    reg.insert("GreaterEqual", |_, _| Ok(CompGTE.into_hir()));
    reg.insert("Less", |_, _| Ok(CompLT.into_hir()));
    reg.insert("LessEqual", |_, _| Ok(CompLTE.into_hir()));
    reg.insert("LogicalAnd", |_, _| Ok(ops::logic::And.into_hir()));
    reg.insert("LogicalOr", |_, _| Ok(ops::logic::Or.into_hir()));
    reg.insert("Merge", merge);
    reg.insert("Switch", |_, _| Ok(Box::new(Switch)));
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Switch;

impl Op for Switch {
    fn name(&self) -> StaticName {
        "Switch".into()
    }

    not_a_typed_op!();
}

impl EvalOp for Switch {
    fn is_stateless(&self) -> bool {
        true
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }
}

impl InferenceRulesOp for Switch {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 2)?;
        s.equals(&inputs[1].datum_type, DatumType::Bool)?;
        s.equals(&inputs[1].shape, shapefactoid!())?;
        for output in outputs {
            s.equals(&inputs[0].datum_type, &output.datum_type)?;
            s.equals(&inputs[0].shape, &output.shape)?;
        }
        Ok(())
    }

    fn incorporate(
        &self,
        model: &InferenceModel,
        node: &InferenceNode,
    ) -> TractResult<Option<InferenceModelPatch>> {
        let pred = model.outlet_fact(node.inputs[1])?;
        if let Some(pred) = pred.concretize() {
            let pred = *pred.try_as_plain()?.to_scalar::<bool>()?;
            let mut dead_to_visit = HashSet::new();
            let mut dead_done = HashSet::new();
            let mut patch = InferenceModelPatch::default();
            dead_to_visit.insert(OutletId::new(node.id, !pred as usize));
            while let Some(dead_outlet) = dead_to_visit.iter().cloned().next() {
                dead_to_visit.remove(&dead_outlet);
                dead_done.insert(dead_outlet);
                for succ in model.outlet_successors(dead_outlet) {
                    if model.node(succ.node).op_is::<Merge>() {
                        let outlet = model.node(succ.node).inputs[(succ.slot == 0) as usize];
                        let tap = patch.tap_model(model, outlet)?;
                        patch.shunt_outside(model, succ.node.into(), tap)?;
                    } else {
                        for slot in 0..model.node(succ.node).outputs.len() {
                            let new = OutletId::new(succ.node, slot);
                            if !dead_done.contains(&new) {
                                dead_to_visit.insert(new);
                            }
                        }
                    }
                }
            }
            let tap = patch.tap_model(model, node.inputs[0])?;
            patch.shunt_outside(model, OutletId::new(node.id, 0), tap)?;
            patch.shunt_outside(model, OutletId::new(node.id, 1), tap)?;
            return Ok(Some(patch));
        }
        Ok(None)
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(2)
    }

    as_op!();
}

fn merge(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let inputs = pb.get_attr_int::<i32>("N")?;
    Ok(Box::new(Merge::new(inputs as usize)))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Merge {
    n: usize,
}

impl Op for Merge {
    fn name(&self) -> StaticName {
        "Merge".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Merge {
    fn is_stateless(&self) -> bool {
        true
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(None)
    }
}

impl InferenceRulesOp for Merge {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, self.n)?;
        check_output_arity(outputs, 1)?;
        for i in 1..self.n {
            s.equals(&inputs[0].datum_type, &inputs[i].datum_type)?;
            s.equals(&inputs[0].shape, &inputs[i].shape)?;
        }
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    as_op!();
    to_typed!();
}

impl TypedOp for Merge {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(f32::fact(inputs[0].shape.iter()), i32::fact([0; 0])))
    }
}


================================================
FILE: tensorflow/src/ops/math/reduce.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::nn;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct Reduce {
    t: DatumType,
    t_idx: DatumType,
    keep_dims: bool,
    reducer: nn::Reducer,
}

pub fn max(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    reduce(pb, nn::Reducer::Max)
}

pub fn mean(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    reduce(pb, nn::Reducer::Mean)
}

pub fn min(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    reduce(pb, nn::Reducer::Min)
}

pub fn prod(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    reduce(pb, nn::Reducer::Prod)
}

pub fn sum(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    reduce(pb, nn::Reducer::Sum)
}

pub fn reduce(pb: &NodeDef, op: nn::Reducer) -> TractResult<Box<dyn InferenceOp>> {
    let t = pb.get_attr_datum_type("T")?;
    let t_idx = pb.get_attr_datum_type("Tidx")?;
    let keep_dims = pb.get_attr_bool("keep_dims")?;
    Ok(Box::new(Reduce::new(t, t_idx, keep_dims, op)))
}

impl Op for Reduce {
    fn name(&self) -> StaticName {
        format!("{:?}", self.reducer).into()
    }

    not_a_typed_op!();
}

impl EvalOp for Reduce {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, axes) = args_2!(inputs);
        let axes: Vec<i64> = axes.cast_to::<i64>()?.try_as_plain()?.as_slice::<i64>()?.to_vec();
        let op = nn::Reduce::new(Some(axes), self.keep_dims, self.reducer);
        expand(op).eval(tvec!(input))
    }
}

impl InferenceRulesOp for Reduce {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, &inputs[0].datum_type)?;
        if self.keep_dims {
            s.equals(&inputs[0].rank, &outputs[0].rank)?;
        } else {
            s.given(&inputs[1].rank, move |s, rank| {
                if rank == 1 {
                    s.equals(
                        inputs[0].rank.bex().to_dim(),
                        inputs[1].shape[0].bex() + outputs[0].rank.bex().to_dim(),
                    )
                } else {
                    s.equals(
                        inputs[0].rank.bex().to_dim(),
                        outputs[0].rank.bex().to_dim() + 1.to_dim(),
                    )
                }
            })?;
        }
        s.given_3(
            &inputs[0].rank,
            &outputs[0].rank,
            &inputs[1].value,
            move |s, irank, orank, axes| {
                let axes: TVec<usize> = axes
                    .cast_to::<i64>()?
                    .try_as_plain()?
                    .as_slice::<i64>()?
                    .iter()
                    .map(|&ax| if ax > 0 { ax } else { ax + irank } as usize)
                    .collect();
                let mut od = 0;
                for id in 0..(irank as usize) {
                    if axes.contains(&id) {
                        if self.keep_dims {
                            s.equals(&outputs[0].shape[od], 1.to_dim())?;
                            od += 1;
                        }
                    } else if od < orank as usize {
                        s.equals(&outputs[0].shape[od], &inputs[0].shape[id])?;
                        od += 1;
                    }
                }
                Ok(())
            },
        )?;
        Ok(())
    }

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let Some(ref axes) = target.outlet_fact(mapping[&node.inputs[1]])?.konst {
            let axes: Vec<i64> = axes.cast_to::<i64>()?.try_as_plain()?.as_slice::<i64>()?.to_vec();
            let op = nn::Reduce::new(Some(axes), self.keep_dims, self.reducer);
            op.wire(&node.name, target, &[mapping[&node.inputs[0]]])
        } else {
            bail!("Nees axes to be const")
        }
    }

    as_op!();
}


================================================
FILE: tensorflow/src/ops/math.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops;

use crate::model::ParsingContext;
use crate::model::TfOpRegister;
use crate::tfpb::tensorflow::NodeDef;

mod reduce;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("Abs", |_, _| Ok(ops::math::abs().into_hir()));
    reg.insert("Add", |_, _| Ok(ops::math::Add.into_hir()));
    reg.insert("AddN", add_n);
    reg.insert("AddV2", |_, _| Ok(ops::math::Add.into_hir()));
    reg.insert("BiasAdd", |_, _| Ok(ops::math::Add.into_hir()));
    reg.insert("Ceil", |_, _| Ok(ops::math::ceil().into_hir()));
    reg.insert("Div", |_, _| Ok(ops::math::Div.into_hir()));
    reg.insert("FloorMod", |_, _| Ok(ops::math::Rem.into_hir()));
    reg.insert("MatMul", mat_mul);
    reg.insert("Max", reduce::max);
    reg.insert("Mean", reduce::mean);
    reg.insert("Min", reduce::min);
    reg.insert("Prod", reduce::prod);
    reg.insert("Sum", reduce::sum);
    reg.insert("Maximum", |_, _| Ok(ops::math::Max.into_hir()));
    reg.insert("Minimum", |_, _| Ok(ops::math::Min.into_hir()));
    reg.insert("Log", |_, _| Ok(ops::math::ln().into_hir()));
    reg.insert("Mul", |_, _| Ok(ops::math::Mul.into_hir()));
    reg.insert("Pow", |_, _| Ok(ops::math::Pow.into_hir()));
    reg.insert("Neg", |_, _| Ok(ops::math::neg().into_hir()));
    reg.insert("RealDiv", |_, _| Ok(ops::math::Div.into_hir()));
    reg.insert("Rsqrt", |_, _| Ok(ops::math::rsqrt().into_hir()));
    reg.insert("Sub", |_, _| Ok(ops::math::Sub.into_hir()));
    reg.insert("Tanh", |_, _| Ok(ops::math::tanh().into_hir()));
}

pub fn add_n(_ctx: &ParsingContext, _pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    Ok(Box::new(ops::binary::Nary(Box::new(ops::math::Add), false)))
}

pub fn mat_mul(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let trans_a = pb.get_attr_bool("transpose_a")?;
    let trans_b = pb.get_attr_bool("transpose_b")?;
    Ok(expand(ops::matmul::MatMulInference::default().with_a_trans(trans_a).with_b_trans(trans_b)))
}


================================================
FILE: tensorflow/src/ops/mod.rs
================================================
use tract_hir::internal::*;

use crate::model::ParsingContext;
use crate::model::TfOpRegister;
use crate::tfpb::tensorflow::NodeDef;

pub mod array;
pub mod control_flow;
pub mod logic;
pub mod math;
pub mod nn;
pub mod quant;
pub mod random;
pub mod rec;
// pub mod vars;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    array::register_all_ops(reg);
    control_flow::register_all_ops(reg);
    logic::register_all_ops(reg);
    math::register_all_ops(reg);
    nn::register_all_ops(reg);
    quant::register_all_ops(reg);
    random::register_all_ops(reg);
    rec::register_all_ops(reg);
    // vars::register_all_ops(reg);
    reg.insert("Cast", cast);
    reg.insert("Const", konst);
    reg.insert("Identity", |_, _| Ok(Box::new(tract_hir::ops::identity::Identity)));
    reg.insert("NoOp", |_, _| Ok(Box::new(Noop)));
    reg.insert("Placeholder", |_, _| Ok(Box::new(tract_hir::ops::source::Source::new())));
}

fn cast(_ctx: &ParsingContext, node: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let dtype = node.get_attr_datum_type("DstT")?;
    Ok(Box::new(tract_hir::ops::cast::cast(dtype)))
}

fn konst(_ctx: &ParsingContext, node: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let dtype = node.get_attr_datum_type("dtype")?;
    let mat = node.get_attr_tensor("value")?;

    if mat.datum_type() != dtype {
        bail!("Const node {:?} doesn't have the expected {:?} type.", mat, dtype);
    }

    Ok(Box::new(tract_hir::ops::konst::Const::new(mat.into())?))
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
pub struct Noop;

impl Op for Noop {
    fn name(&self) -> StaticName {
        "Noop".into()
    }

    op_as_typed_op!();
}

impl EvalOp for Noop {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, _inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        Ok(tvec!(Tensor::from(false).into()))
    }
}

impl InferenceRulesOp for Noop {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        _inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        s.equals(&outputs[0].datum_type, bool::datum_type())?;
        s.equals(&outputs[0].rank, 0)?;
        Ok(())
    }

    as_op!();
    to_typed!();
}

impl TypedOp for Noop {
    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(bool::scalar_fact()))
    }

    as_op!();
}


================================================
FILE: tensorflow/src/ops/nn/conv2d.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::cnn;
use tract_hir::ops::nn::DataFormat;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

pub fn conv2d(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let strides = super::strides(pb)?;
    let mut op =
        cnn::Conv::default().hwio().padding(super::padding(pb)?).strides(strides[1..3].into());
    if super::data_format(pb)? == DataFormat::NHWC {
        op = op.nhwc()
    }
    Ok(expand(op))
}

#[cfg(test)]
mod tests {
    #![allow(non_snake_case)]
    use super::*;
    use tract_hir::ops::cnn::{Conv, PaddingSpec};
    use tract_ndarray::*;

    fn mk(sizes: &[usize]) -> Tensor {
        Array::range(1f32, sizes.iter().product::<usize>() as f32 + 1.0, 1.0)
            .into_shape_with_order(sizes)
            .unwrap()
            .into()
    }

    fn make_conv(h_stride: usize, v_stride: usize, padding: PaddingSpec) -> Box<dyn InferenceOp> {
        expand(Conv::default().nhwc().hwio().padding(padding).strides(tvec![v_stride, h_stride]))
    }

    fn verify(input: Tensor, filter: Tensor, stride: usize, padding: PaddingSpec, expect: &[f32]) {
        let result = make_conv(stride, stride, padding)
            .eval(tvec![input.into(), filter.into()])
            .unwrap()
            .remove(0);
        assert_eq!(expect.len(), result.shape().iter().product::<usize>());
        let found = result.to_plain_array_view::<f32>().unwrap();
        let expect = ArrayD::from_shape_vec(found.shape(), expect.to_vec()).unwrap();
        assert_eq!(expect, found);
    }

    #[test]
    fn testConv2D3CNoopFilter() {
        verify(
            mk(&[1, 2, 3, 3]),
            tensor4(&[[[[1.0f32, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]]]),
            1,
            PaddingSpec::Valid,
            &[
                1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
                16.0, 17.0, 18.0,
            ],
        )
    }

    #[test]
    fn testConv2D1x1Filter() {
        verify(
            mk(&[1, 2, 3, 3]),
            mk(&[1, 1, 3, 3]),
            1,
            PaddingSpec::Valid,
            &[
                30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0, 204.0,
                174.0, 216.0, 258.0, 210.0, 261.0, 312.0,
            ],
        );
    }

    #[test]
    fn testConv2D1x2Filter() {
        verify(
            mk(&[1, 2, 3, 3]),
            mk(&[1, 2, 3, 3]),
            1,
            PaddingSpec::Valid,
            &[231.0, 252.0, 273.0, 384.0, 423.0, 462.0, 690.0, 765.0, 840.0, 843.0, 936.0, 1029.0],
        )
    }

    #[test]
    fn testConv2D2x1Filter() {
        verify(
            mk(&[1, 2, 3, 3]),
            mk(&[2, 1, 3, 3]),
            1,
            PaddingSpec::Valid,
            &[465.0, 504.0, 543.0, 618.0, 675.0, 732.0, 771.0, 846.0, 921.0],
        );
    }

    #[test]
    fn testConv2D2x2Filter() {
        verify(
            mk(&[1, 2, 3, 3]),
            mk(&[2, 2, 3, 3]),
            1,
            PaddingSpec::Valid,
            &[2271.0, 2367.0, 2463.0, 2901.0, 3033.0, 3165.0],
        )
    }

    #[test]
    fn testConv2D2x2FilterStride2() {
        verify(
            mk(&[1, 2, 3, 3]),
            mk(&[2, 2, 3, 3]),
            2,
            PaddingSpec::Valid,
            &[2271.0, 2367.0, 2463.0],
        )
    }

    #[test]
    fn testConv2D2x2FilterStride2Same() {
        verify(
            mk(&[1, 2, 3, 3]),
            mk(&[2, 2, 3, 3]),
            2,
            PaddingSpec::SameUpper,
            &[2271.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0],
        );
    }

    #[test]
    fn test_conv_1() {
        let conv = make_conv(1, 1, PaddingSpec::SameUpper);
        // NHWC
        let data = tensor4(&[[[[1f32]]]]);
        // HWIO
        let filter = tensor4(&[[[[0.0f32]]], [[[1.0]]], [[[0.0]]]]);
        let exp = tensor4(&[[[[1f32]]]]);

        let result = conv.eval(tvec![data.into(), filter.into()]).unwrap();
        result[0].close_enough(&exp, Approximation::Approximate).unwrap()
    }

    #[test]
    fn test_conv_2() {
        let conv = make_conv(1, 1, PaddingSpec::SameUpper);
        let data = tensor4(&[[[[142.3088f32], [48.891083]], [[208.3187], [-11.274994]]]]);
        let filter = tensor4(&[[[[160.72833f32]], [[107.84076]]], [[[247.50552]], [[-38.738464]]]]);
        let exp = tensor4(&[[[[80142.31f32], [5067.5586]], [[32266.81], [-1812.2109]]]]);
        let got = &conv.eval(tvec![data.into(), filter.into()]).unwrap()[0];
        //println!("{:?}", got);
        //println!("{:?}", exp);
        exp.close_enough(got, true).unwrap()
    }

    #[test]
    fn inference_1() {
        let mut op = make_conv(1, 3, PaddingSpec::Valid);
        let img = InferenceFact::from(Tensor::zero::<f32>(&[1, 1, 7, 1]).unwrap());
        let ker = InferenceFact::from(Tensor::zero::<f32>(&[1, 3, 1, 1]).unwrap());
        let any = InferenceFact::default();

        let (_, output_facts, _) = op.infer_facts(tvec![&img, &ker], tvec![&any], tvec!()).unwrap();

        assert_eq!(output_facts, tvec![f32::fact([1, 1, (7 - 3 + 1), 1]).into()]);
    }

    #[test]
    fn inference_2() {
        let mut op = make_conv(1, 1, PaddingSpec::SameUpper);
        let img = InferenceFact::from(Tensor::zero::<f32>(&[1, 1, 1, 1]).unwrap());
        let ker = InferenceFact::from(Tensor::zero::<f32>(&[1, 1, 1, 1]).unwrap());
        let any = InferenceFact::default();

        let (_, output_facts, _) = op.infer_facts(tvec![&img, &ker], tvec![&any], tvec!()).unwrap();

        assert_eq!(output_facts, tvec![f32::fact([1, 1, 1, 1]).into()]);
    }
}


================================================
FILE: tensorflow/src/ops/nn/dw_conv2d.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;
use tract_hir::ops::cnn::*;
use tract_hir::ops::nn::*;

pub fn depthwise_conv2d(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let data_format = super::data_format(pb)?;
    let padding = super::padding(pb)?;
    let strides = super::strides(pb)?.into();
    let dilations: TVec<usize> = pb.get_attr_list_int("dilations")?.into();
    if dilations.len() != 4 || dilations[0] != 1 && dilations[3] != 1 {
        bail!("dilations must be of the form [1, h, v, 1], found {:?}", dilations)
    };
    Ok(expand(DepthwiseConv2d::new(data_format, padding, strides, dilations)))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct DepthwiseConv2d {
    data_format: DataFormat,
    padding: PaddingSpec,
    strides: TVec<usize>,
    dilations: TVec<usize>,
}

impl Expansion for DepthwiseConv2d {
    fn name(&self) -> StaticName {
        "DepthwiseConv2dNative".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 2)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].rank, 4)?;
        s.equals(&inputs[1].rank, 4)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&outputs[0].rank, 4)?;
        s.given_2(&inputs[0].shape, &inputs[1].shape, move |s, img, ker| {
            let img = self.data_format.shape(img)?;
            s.equals(&inputs[1].shape[2], &inputs[0].shape[img.c_axis()])?;
            s.equals(&outputs[0].shape[img.n_axis().unwrap()], img.n_dim().unwrap())?;
            if let Ok(ker) = ker.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<_>>>() {
                let output_shape = self.padding.compute(
                    img.hw_dims(),
                    &ker[0..2],
                    &self.dilations[img.hw_axes()],
                    &self.strides[img.hw_axes()],
                );
                let in_channels = ker[2].to_usize()?;
                let multiplier = ker[3].to_usize()?;
                s.equals(&outputs[0].shape[img.h_axis()], &output_shape[0].convoluted)?;
                s.equals(&outputs[0].shape[img.h_axis() + 1], &output_shape[1].convoluted)?;
                s.equals(&outputs[0].shape[img.c_axis()], (in_channels * multiplier).to_dim())?;
            }
            Ok(())
        })?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let input = model.outlet_fact(inputs[0])?;
        let kernel = model.outlet_fact(inputs[1])?;
        let input_shape = input.shape.to_tvec();
        let kernel_shape = if let Some(s) = kernel.shape.as_concrete() {
            s
        } else {
            bail!("Do not expect streaming on kernel dims");
        };
        let shape = self.data_format.shape(&input_shape)?;
        let mut conv = Conv::default()
            .hwio()
            .group(kernel_shape[2])
            .dilations(self.dilations[shape.hw_axes()].into())
            .strides(self.strides[shape.hw_axes()].into())
            .padding(self.padding.clone());
        if self.data_format == DataFormat::NHWC {
            conv = conv.nhwc()
        }
        conv.wire(prefix, model, inputs)
    }
}


================================================
FILE: tensorflow/src/ops/nn/fused_batch_norm.rs
================================================
use tract_hir::internal::*;
use tract_itertools::izip;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

pub fn fused_batch_norm(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let epsilon = pb.get_attr_float::<f32>("epsilon")?;
    Ok(expand(FusedBatchNorm::new(epsilon)))
}

#[derive(Debug, Clone, new)]
struct FusedBatchNorm {
    epsilon: f32,
}

impl Expansion for FusedBatchNorm {
    fn name(&self) -> StaticName {
        "FusedBatchNorm".into()
    }

    fn validation(&self) -> Validation {
        Validation::Rounding
    }

    /// Registers the inference rules of the operator.
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 5)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, f32::datum_type())?;
        s.equals(&inputs[1].datum_type, f32::datum_type())?;
        s.equals(&inputs[2].datum_type, f32::datum_type())?;
        s.equals(&inputs[3].datum_type, f32::datum_type())?;
        s.equals(&inputs[4].datum_type, f32::datum_type())?;
        s.equals(&outputs[0].datum_type, f32::datum_type())?;
        s.equals(&outputs[0].shape, &inputs[0].shape)?;
        s.equals(&inputs[0].rank, 4)?;
        s.equals(&inputs[1].rank, 1)?;
        s.equals(&inputs[2].rank, 1)?;
        s.equals(&inputs[3].rank, 1)?;
        s.equals(&inputs[4].rank, 1)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        s.equals(&inputs[1].shape[0], &inputs[0].shape[3])?;
        s.equals(&inputs[2].shape[0], &inputs[0].shape[3])?;
        s.equals(&inputs[3].shape[0], &inputs[0].shape[3])?;
        s.equals(&inputs[4].shape[0], &inputs[0].shape[3])?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        let scale = target.outlet_fact(inputs[1])?;
        let offset = target.outlet_fact(inputs[2])?;
        let mean = target.outlet_fact(inputs[3])?;
        let variance = target.outlet_fact(inputs[4])?;
        if let (Some(scale), Some(offset), Some(mean), Some(variance)) =
            (&scale.konst, &offset.konst, &mean.konst, &variance.konst)
        {
            let scale = scale.try_as_plain()?.as_slice::<f32>()?;
            let offset = offset.try_as_plain()?.as_slice::<f32>()?;
            let mean = mean.try_as_plain()?.as_slice::<f32>()?;
            let variance = variance.try_as_plain()?.as_slice::<f32>()?;
            let slope: Vec<f32> =
                izip!(variance, scale).map(|(v, s)| s / (v + self.epsilon).sqrt()).collect();
            let inter: Vec<f32> = izip!(offset, mean, &slope).map(|(o, m, s)| o - m * s).collect();
            let shape = tvec!(1, 1, 1, scale.len());
            let slope = tensor1(&slope).into_shape(&shape)?;
            let inter = tensor1(&inter).into_shape(&shape)?;
            let slope = target.add_const(prefix.to_string() + ".slope", slope)?;
            let inter = target.add_const(prefix.to_string() + ".inter", inter)?;
            let wire = target.wire_node(
                format!("{prefix}.mul"),
                tract_hir::ops::math::mul(),
                &[inputs[0], slope],
            )?;
            return target.wire_node(
                format!("{prefix}.add"),
                tract_hir::ops::math::add(),
                &[wire[0], inter],
            );
        };
        bail!("Batch norm parameters expected to be known")
    }
}


================================================
FILE: tensorflow/src/ops/nn/mod.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops::cnn::PaddingSpec;
use tract_hir::ops::nn::{DataFormat, LayerSoftmax};

use crate::model::TfOpRegister;
use crate::tfpb::tensorflow::NodeDef;

pub mod conv2d;
pub mod dw_conv2d;
pub mod fused_batch_norm;
pub mod pools;
pub mod s2b;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("AvgPool", pools::avgpool);
    reg.insert("Conv2D", conv2d::conv2d);
    reg.insert("DepthwiseConv2dNative", dw_conv2d::depthwise_conv2d);
    reg.insert("FusedBatchNorm", fused_batch_norm::fused_batch_norm);
    reg.insert("MaxPool", pools::maxpool);
    reg.insert("Relu", |_, _| Ok(expand(tract_hir::ops::activations::Clip::new(Some(0.0), None))));
    reg.insert("Relu6", |_, _| {
        Ok(expand(tract_hir::ops::activations::Clip::new(Some(0.0), Some(6.0))))
    });
    reg.insert("Sigmoid", |_, _| Ok(tract_hir::ops::nn::sigmoid().into_hir()));
    reg.insert("Softmax", |_, _| Ok(expand(LayerSoftmax::new(1, true))));
    reg.insert("SpaceToBatchND", s2b::space_to_batch_nd);
    reg.insert("BatchToSpaceND", s2b::batch_to_space_nd);
}

pub fn strides(pb: &NodeDef) -> TractResult<Vec<usize>> {
    let strides: Vec<usize> = pb.get_attr_list_int("strides")?;
    if strides.len() != 4 || strides[0] != 1 && strides[3] != 1 {
        bail!("strides must be of the form [1, h, v, 1], found {:?}", strides)
    };
    Ok(strides)
}

pub fn data_format(pb: &NodeDef) -> TractResult<DataFormat> {
    let df = if pb.get_attr_opt_raw_str("data_format")?.unwrap_or(b"NHWC") == b"NHWC" {
        DataFormat::NHWC
    } else {
        DataFormat::NCHW
    };
    Ok(df)
}

pub fn padding(pb: &NodeDef) -> TractResult<PaddingSpec> {
    let padding = pb.get_attr_raw_str("padding")?;
    match padding {
        b"VALID" => Ok(PaddingSpec::Valid),
        b"SAME" => Ok(PaddingSpec::SameUpper),
        s => bail!("unsupported Padding {}", String::from_utf8_lossy(s)),
    }
}


================================================
FILE: tensorflow/src/ops/nn/pools.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;
use tract_hir::ops::cnn::*;

pub fn avgpool(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let ksize: Vec<usize> = pb.get_attr_list_int("ksize")?;
    let data_format = super::data_format(pb)?;
    let kshape = data_format.shape(ksize)?;
    let strides = super::strides(pb)?;
    let padding = super::padding(pb)?;
    Ok(expand(HirSumPool::new(
        PoolSpec::new(
            data_format,
            kshape.hw_dims().into(),
            padding,
            None,
            Some(strides[kshape.hw_axes()].into()),
            0,
            0,
        ),
        false,
        true,
    )))
}

pub fn maxpool(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let ksize: Vec<usize> = pb.get_attr_list_int("ksize")?;
    let data_format = super::data_format(pb)?;
    let kshape = data_format.shape(ksize)?;
    let strides = super::strides(pb)?;
    let padding = super::padding(pb)?;
    Ok(expand(HirMaxPool::new(
        PoolSpec::new(
            data_format,
            kshape.hw_dims().into(),
            padding,
            None,
            Some(strides[kshape.hw_axes()].into()),
            0,
            0,
        ),
        None,
    )))
}


================================================
FILE: tensorflow/src/ops/nn/s2b/mod.rs
================================================
use tract_hir::internal::*;
use tract_ndarray::prelude::*;
use tract_num_traits::Zero;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

pub mod raw;
pub mod unary;

pub fn space_to_batch_nd(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let datum_type = pb.get_attr_datum_type("T")?;
    Ok(Box::new(raw::SpaceToBatch::new(datum_type)))
}

pub fn batch_to_space_nd(_ctx: &ParsingContext, pb: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let datum_type = pb.get_attr_datum_type("T")?;
    Ok(Box::new(raw::BatchToSpace::new(datum_type)))
}

fn space_to_batch<T: Copy + Datum + Zero>(
    input: TValue,
    block_shape: &ArrayView1<i32>,
    paddings: &ArrayView2<i32>,
) -> TractResult<TValue> {
    let mut data = input.into_tensor();

    for (ix, pad) in paddings.view().outer_iter().enumerate() {
        if pad[0] == 0 && pad[1] == 0 {
            continue;
        }
        let mut stack = tvec!();
        let mut pad_shape = data.shape().to_vec();
        if pad[0] != 0 {
            pad_shape[ix + 1] = pad[0] as usize;
            stack.push(Tensor::zero::<T>(&pad_shape)?);
        }
        stack.push(data);
        if pad[1] != 0 {
            pad_shape[ix + 1] = pad[1] as usize;
            stack.push(Tensor::zero::<T>(&pad_shape)?);
        }
        data = Tensor::stack_tensors(ix + 1, &stack)?;
    }

    let mut reshaped = vec![data.shape()[0]];
    let block_size = block_shape.iter().map(|a| *a as usize).product::<usize>();
    let mut final_shape = vec![block_size * data.shape()[0]];
    for (m, &block_shape_dim) in block_shape.iter().enumerate() {
        reshaped.push(data.shape()[m + 1] / block_shape_dim as usize);
        reshaped.push(block_shape_dim as usize);
        final_shape.push(data.shape()[m + 1] / block_shape_dim as usize);
    }
    reshaped.extend(&data.shape()[block_shape.len() + 1..]);
    final_shape.extend(&data.shape()[block_shape.len() + 1..]);
    let data = data.into_shape(&reshaped)?;

    let mut permuted_axes: Vec<_> = (0..block_shape.len()).map(|x| 2 * x + 2).collect();
    permuted_axes.push(0);
    permuted_axes.extend((0..block_shape.len()).map(|x| 2 * x + 1));
    permuted_axes.extend((block_shape.len() * 2 + 1)..data.rank());
    let data = data.permute_axes(&permuted_axes)?;
    let data = data.into_shape(&final_shape)?;

    Ok(data.into_tvalue())
}

fn batch_to_space<T: Copy + Datum + Zero>(
    input: TValue,
    block_shape: &ArrayView1<i32>,
    crops: &ArrayView2<i32>,
) -> TractResult<TValue> {
    let data = input.into_tensor().into_plain_array()?;
    let input_shape = data.shape().to_vec();
    let crops: ArrayView2<i32> = crops.view().into_dimensionality()?;

    let block_size = block_shape.iter().map(|a| *a as usize).product::<usize>();

    // block_dim_1 .. block_dim_n, batches/bloc_size, dim_1, .. dim_n, chan_1, .., chan_n
    let mut unflatten_blocked_shape = vec![];
    unflatten_blocked_shape.extend(block_shape.iter().map(|a| *a as usize));
    let batches = data.shape()[0] / block_size;
    unflatten_blocked_shape.push(batches);
    unflatten_blocked_shape.extend(&data.shape()[1..]);
    let data = data.into_shape_with_order(&*unflatten_blocked_shape)?;
    let mut permuted_axes = vec![block_shape.len()];
    let mut padded_shape = vec![batches];
    for i in 0..block_shape.shape()[0] {
        permuted_axes.push(block_shape.len() + 1 + i);
        permuted_axes.push(i);
        padded_shape.push(block_shape[i] as usize * input_shape[i + 1]);
    }
    permuted_axes.extend((1 + block_shape.len() * 2)..data.ndim());
    padded_shape.extend(&input_shape[1 + block_shape.len()..]);
    let data = data.permuted_axes(permuted_axes);
    let data: Vec<T> = data.iter().copied().collect();
    let data = tract_ndarray::ArrayD::from_shape_vec(padded_shape, data)?;
    let mut data = data;
    for (i, crop) in crops.outer_iter().enumerate() {
        if crop[0] != 0 || crop[1] != 0 {
            let end = data.shape()[1 + i];
            let range = (crop[0] as usize)..(end - crop[1] as usize);
            data = data.slice_axis(Axis(i + 1), range.into()).map(|x| *x).to_owned();
        }
    }
    Ok(data.into_tvalue())
}

#[cfg(test)]
mod tests {
    #![allow(non_snake_case)]
    use super::raw::{BatchToSpace, SpaceToBatch};
    use super::*;

    // https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd
    #[test]
    fn space_to_batch_nd_1() {
        assert_eq!(
            SpaceToBatch::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[[[[1i32], [2]], [[3], [4]]]]).into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [0, 0]]).into(),
                ])
                .unwrap(),
            tvec![tensor4(&[[[[1i32]]], [[[2]]], [[[3]]], [[[4]]]]).into()],
        )
    }

    #[test]
    fn space_to_batch_nd_2() {
        assert_eq!(
            SpaceToBatch::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]).into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [0, 0]]).into(),
                ])
                .unwrap(),
            tvec![
                tensor4(&[[[[1i32, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]],])
                    .into(),
            ],
        )
    }

    #[test]
    fn space_to_batch_nd_3() {
        assert_eq!(
            SpaceToBatch::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[[
                        [[1], [2], [3], [4]],
                        [[5], [6], [7], [8]],
                        [[9], [10], [11], [12]],
                        [[13], [14], [15], [16]],
                    ]])
                    .into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [0, 0]]).into(),
                ])
                .unwrap(),
            tvec![
                tensor4(&[
                    [[[1], [3]], [[9], [11]]],
                    [[[2], [4]], [[10], [12]]],
                    [[[5], [7]], [[13], [15]]],
                    [[[6], [8]], [[14], [16]]],
                ])
                .into()
            ],
        )
    }

    #[test]
    fn space_to_batch_nd_4() {
        assert_eq!(
            SpaceToBatch::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[
                        [[[1], [2], [3], [4]], [[5], [6], [7], [8]]],
                        [[[9], [10], [11], [12]], [[13], [14], [15], [16]]],
                    ])
                    .into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [2, 0]]).into(),
                ])
                .unwrap(),
            tvec![
                tensor4(&[
                    [[[0], [1], [3]]],
                    [[[0], [9], [11]]],
                    [[[0], [2], [4]]],
                    [[[0], [10], [12]]],
                    [[[0], [5], [7]]],
                    [[[0], [13], [15]]],
                    [[[0], [6], [8]]],
                    [[[0], [14], [16]]],
                ])
                .into(),
            ],
        )
    }

    #[test]
    fn space_to_batch_nd_infer_1() {
        let mut op = SpaceToBatch::new(f32::datum_type());
        let data = f32::fact([1, 4, 16]).into();
        let block_shape = InferenceFact::from(Tensor::from(arr1(&[2])));
        let paddings = InferenceFact::from(Tensor::from(arr2(&[[0.to_dim(), 0.to_dim()]])));
        let any = InferenceFact::default();

        let (_, outputs, _) =
            op.infer_facts(tvec!(&data, &block_shape, &paddings), tvec!(&any), tvec!()).unwrap();

        assert_eq!(outputs[0], f32::fact([2, 2, 16]).into())
    }

    #[test]
    fn space_to_batch_nd_infer_2() {
        let table = SymbolScope::default();
        let s = table.sym("S");
        let mut op = SpaceToBatch::new(f32::datum_type());
        let data = f32::fact(dims!(1, s.to_dim() - 4, 16)).into();
        let block_shape = InferenceFact::from(Tensor::from(arr1(&[2])));
        let paddings = InferenceFact::from(Tensor::from(arr2(&[[0.to_dim(), (s.to_dim() % 2)]])));
        let any = InferenceFact::default();

        let (_, outputs, _) =
            op.infer_facts(tvec!(&data, &block_shape, &paddings), tvec!(&any), tvec!()).unwrap();
        assert_eq!(
            outputs[0],
            f32::fact(dims!(2, (s.to_dim() + s.to_dim() % 2 - 4) / 2, 16)).into()
        );
    }

    #[test]
    fn batch_to_space_nd_1() {
        assert_eq!(
            BatchToSpace::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]).into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [0, 0]]).into(),
                ])
                .unwrap(),
            tvec![tensor4(&[[[[1], [2]], [[3], [4]]]]).into()]
        )
    }

    #[test]
    fn batch_to_space_nd_2() {
        assert_eq!(
            BatchToSpace::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[[[[1i32, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]],])
                        .into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [0, 0]]).into(),
                ])
                .unwrap(),
            tvec![tensor4(&[[[[1i32, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]).into()]
        )
    }

    #[test]
    fn batch_to_space_nd_3() {
        assert_eq!(
            BatchToSpace::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[
                        [[[1i32], [3]], [[9], [11]]],
                        [[[2], [4]], [[10], [12]]],
                        [[[5], [7]], [[13], [15]]],
                        [[[6], [8]], [[14], [16]]],
                    ])
                    .into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [0, 0]]).into(),
                ])
                .unwrap(),
            tvec![
                tensor4(&[[
                    [[1i32], [2], [3], [4]],
                    [[5], [6], [7], [8]],
                    [[9], [10], [11], [12]],
                    [[13], [14], [15], [16]],
                ]])
                .into(),
            ]
        )
    }

    #[test]
    fn batch_to_space_nd_4() {
        assert_eq!(
            BatchToSpace::new(i32::datum_type())
                .eval(tvec![
                    tensor4(&[
                        [[[0i32], [1], [3]]],
                        [[[0], [9], [11]]],
                        [[[0], [2], [4]]],
                        [[[0], [10], [12]]],
                        [[[0], [5], [7]]],
                        [[[0], [13], [15]]],
                        [[[0], [6], [8]]],
                        [[[0], [14], [16]]],
                    ])
                    .into(),
                    tensor1(&[2, 2]).into(),
                    tensor2(&[[0, 0], [2, 0]]).into(),
                ])
                .unwrap(),
            tvec![
                tensor4(&[
                    [[[1], [2], [3], [4]], [[5], [6], [7], [8]]],
                    [[[9], [10], [11], [12]], [[13], [14], [15], [16]]],
                ])
                .into(),
            ]
        )
    }
}


================================================
FILE: tensorflow/src/ops/nn/s2b/raw.rs
================================================
use tract_hir::internal::*;
use tract_ndarray::prelude::*;

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct SpaceToBatch {
    datum_type: DatumType,
}

impl Op for SpaceToBatch {
    fn name(&self) -> StaticName {
        "SpaceToBatch".into()
    }

    not_a_typed_op!();
}

impl EvalOp for SpaceToBatch {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, block_shape, paddings) = args_3!(inputs);
        let block_shape = block_shape.cast_to::<i32>()?;
        let block_shape = block_shape.to_plain_array_view::<i32>()?.into_dimensionality()?;
        let paddings = paddings.cast_to::<i32>()?;
        let paddings = paddings.to_plain_array_view::<i32>()?.into_dimensionality()?;
        let r = dispatch_numbers!(super::space_to_batch(input.datum_type())(
            input,
            &block_shape.view(),
            &paddings.view()
        ))?;
        Ok(tvec!(r))
    }
}

impl InferenceRulesOp for SpaceToBatch {
    /// Registers the inference rules of the operator.
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        rules(s, self.datum_type, &outputs[0], &inputs[0], &inputs[1], &inputs[2])
    }

    as_op!();

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let (Some(block_shape), Some(paddings)) = (
            target.outlet_fact(mapping[&node.inputs[1]])?.konst.clone(),
            target.outlet_fact(mapping[&node.inputs[2]])?.konst.clone(),
        ) {
            let paddings = paddings.cast_to::<TDim>()?;
            let paddings_view =
                paddings.to_plain_array_view::<TDim>()?.into_dimensionality::<Ix2>()?;
            let mut paddings = tvec![];
            for p in paddings_view.outer_iter() {
                let pad = match (p[0].to_usize(), p[1].to_usize()) {
                    (Ok(bef), Ok(aft)) => super::unary::PaddingStrat::FixedFixed(bef, aft),
                    (_, Ok(aft)) => super::unary::PaddingStrat::FlexFixed(aft),
                    (Ok(bef), _) => super::unary::PaddingStrat::FixedFlex(bef),
                    _ => bail!("Failed to unarize SpaceToBatch because of padding"),
                };
                paddings.push(pad);
            }
            let op = super::unary::SpaceToBatchUnary::new(
                self.datum_type,
                target.outlet_fact(mapping[&node.inputs[0]])?.shape.to_tvec(),
                node.outputs[0]
                    .fact
                    .shape
                    .concretize()
                    .unwrap()
                    .iter()
                    .cloned()
                    .collect::<TVec<_>>(),
                block_shape.into_tensor().into_plain_array::<i32>()?.into_dimensionality()?,
                paddings,
            );
            target.wire_node(&*node.name, op, [mapping[&node.inputs[0]]].as_ref())
        } else {
            bail!("Need fixed block shape and padding")
        }
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct BatchToSpace {
    datum_type: DatumType,
}

impl Op for BatchToSpace {
    fn name(&self) -> StaticName {
        "BatchToSpace".into()
    }

    not_a_typed_op!();
}

impl EvalOp for BatchToSpace {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, block_shape, crops) = args_3!(inputs);
        let block_shape = block_shape.cast_to::<i32>()?;
        let block_shape = block_shape.to_plain_array_view::<i32>()?.into_dimensionality()?;
        let crops = crops.cast_to::<i32>()?;
        let crops = crops.to_plain_array_view::<i32>()?.into_dimensionality()?;
        let r = dispatch_numbers!(super::batch_to_space(input.datum_type())(
            input,
            &block_shape.view(),
            &crops.view()
        ))?;
        Ok(tvec!(r))
    }
}

impl InferenceRulesOp for BatchToSpace {
    /// Registers the inference rules of the operator.
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        rules(s, self.datum_type, &inputs[0], &outputs[0], &inputs[1], &inputs[2])
    }

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let (Some(block_shape), Some(paddings)) = (
            target.outlet_fact(mapping[&node.inputs[1]])?.konst.clone(),
            target.outlet_fact(mapping[&node.inputs[2]])?.konst.clone(),
        ) {
            let paddings = paddings.cast_to::<TDim>()?;
            let paddings = paddings.to_plain_array_view::<TDim>()?.into_dimensionality::<Ix2>()?;
            let paddings = paddings
                .outer_iter()
                .map(|p| {
                    Ok(match (p[0].to_usize(), p[1].to_usize()) {
                        (Ok(bef), Ok(aft)) => super::unary::PaddingStrat::FixedFixed(bef, aft),
                        (_, Ok(aft)) => super::unary::PaddingStrat::FlexFixed(aft),
                        (Ok(bef), _) => super::unary::PaddingStrat::FixedFlex(bef),
                        _ => bail!("Failed to unarize SpaceToBatch because of padding"),
                    })
                })
                .collect::<TractResult<_>>()?;
            let op = super::unary::BatchToSpaceUnary::new(
                self.datum_type,
                target.outlet_fact(mapping[&node.inputs[0]])?.shape.to_tvec(),
                node.outputs[0]
                    .fact
                    .shape
                    .concretize()
                    .unwrap()
                    .iter()
                    .cloned()
                    .collect::<TVec<_>>(),
                block_shape.into_tensor().into_plain_array::<i32>()?.into_dimensionality()?,
                paddings,
            );
            target.wire_node(&*node.name, op, [mapping[&node.inputs[0]]].as_ref())
        } else {
            bail!("Need fixed block shape and padding")
        }
    }
    as_op!();
}

fn rules<'r, 'p: 'r>(
    s: &mut Solver<'r>,
    datum_type: DatumType,
    batch: &'p TensorProxy,
    space: &'p TensorProxy,
    block_shape: &'p TensorProxy,
    paddings: &'p TensorProxy,
) -> InferenceResult {
    s.equals(&batch.datum_type, datum_type)?;
    s.equals(&batch.datum_type, &space.datum_type)?;
    s.equals(&block_shape.datum_type, DatumType::I32)?;
    s.equals(&batch.rank, &space.rank)?;
    s.equals(&block_shape.rank, 1)?;
    s.equals(&paddings.rank, 2)?;
    s.equals(&block_shape.shape[0], &paddings.shape[0])?;
    s.given(&block_shape.value, move |s, block_shape| {
        let block_shape = block_shape.into_tensor().into_plain_array::<i32>()?;
        let block_shape_prod = block_shape.iter().map(|s| *s as usize).product::<usize>();
        s.equals(&batch.shape[0], (block_shape_prod as i64) * space.shape[0].bex())?;
        s.given(&paddings.value, move |s, paddings| {
            let paddings = paddings.cast_to::<TDim>()?;
            let paddings = paddings.to_plain_array_view::<TDim>()?.into_dimensionality()?;
            for d in 0..block_shape.len() {
                s.equals(
                    space.shape[1 + d].bex() + &paddings[(d, 0)] + &paddings[(d, 1)],
                    (block_shape[d] as i64) * batch.shape[1 + d].bex(),
                )?;
            }
            Ok(())
        })
    })?;
    s.given(&block_shape.value, move |s, block_shape| {
        let block_shape = block_shape.into_tensor().into_plain_array::<i32>()?;
        s.given(&space.rank, move |s, rank: i64| {
            for d in block_shape.len() + 1..(rank as usize) {
                s.equals(&space.shape[d], &batch.shape[d])?
            }
            Ok(())
        })
    })
}


================================================
FILE: tensorflow/src/ops/nn/s2b/unary.rs
================================================
use tract_hir::internal::*;
use tract_ndarray::prelude::*;

use tract_hir::tract_core::ops::cnn::{Conv, PoolSpec};

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub enum PaddingStrat {
    FlexFixed(usize),
    FixedFlex(usize),
    FixedFixed(usize, usize),
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct SpaceToBatchUnary {
    pub datum_type: DatumType,
    pub space_shape: TVec<TDim>,
    pub batch_shape: TVec<TDim>,
    pub block_shape: Array1<i32>,
    pub pad: TVec<PaddingStrat>,
}

impl Op for SpaceToBatchUnary {
    fn name(&self) -> StaticName {
        "SpaceToBatchUnary".into()
    }

    op_as_typed_op!();
}

impl EvalOp for SpaceToBatchUnary {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let mut paddings = Array2::zeros((self.block_shape.len(), 2));
        for (ax, &strat) in self.pad.iter().enumerate() {
            let spread = (self.batch_shape[2 + ax].clone() * self.block_shape[ax]
                - &self.space_shape[2 + ax])
                .to_usize()?;
            let (bef, aft) = match strat {
                PaddingStrat::FlexFixed(f) => (spread - f, f),
                PaddingStrat::FixedFlex(f) => (f, spread - f),
                PaddingStrat::FixedFixed(a, b) => (a, b),
            };
            paddings[(ax, 0)] = bef as i32;
            paddings[(ax, 1)] = aft as i32;
        }
        let r = dispatch_numbers!(super::space_to_batch(input.datum_type())(
            input,
            &self.block_shape.view(),
            &paddings.view()
        ))?;
        Ok(tvec!(r))
    }
}

impl TypedOp for SpaceToBatchUnary {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(&self.batch_shape)))
    }

    fn declutter(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        // Find any successor chain: S2B → Conv → B2S
        // (S2B may have multiple successors after PushSplitDown merges duplicate nodes)
        for succ in &model.node(node.id).outputs[0].successors {
            let conv_node = model.node(succ.node);
            let Some(conv_op) = conv_node.op_as::<Conv>() else { continue };
            for conv_succ in &conv_node.outputs[0].successors {
                let b2s_node = model.node(conv_succ.node);
                if b2s_node.op_as::<BatchToSpaceUnary>().is_none() {
                    continue;
                }
                let op = Conv {
                    pool_spec: PoolSpec {
                        dilations: Some(self.block_shape.iter().map(|&i| i as usize).collect()),
                        ..conv_op.pool_spec.clone()
                    },
                    ..conv_op.clone()
                };
                let mut patch = TypedModelPatch::default();
                let taps_s2b = patch.taps(model, &node.inputs)?;
                let mut taps_conv = patch.taps(model, &conv_node.inputs)?;
                taps_conv[0] = taps_s2b[0];
                let out = patch.model.wire_node(&*conv_node.name, op, &taps_conv)?[0];
                patch.shunt_outside(model, OutletId::new(b2s_node.id, 0), out)?;
                return Ok(Some(patch));
            }
        }
        Ok(None)
    }

    as_op!();
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct BatchToSpaceUnary {
    datum_type: DatumType,
    batch_shape: TVec<TDim>,
    space_shape: TVec<TDim>,
    block_shape: Array1<i32>,
    pad: Vec<PaddingStrat>,
}

impl Op for BatchToSpaceUnary {
    fn name(&self) -> StaticName {
        "BatchToSpaceUnary".into()
    }

    op_as_typed_op!();
}

impl EvalOp for BatchToSpaceUnary {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let mut paddings = Array2::zeros((self.block_shape.len(), 2));
        for (ax, &strat) in self.pad.iter().enumerate() {
            let spread = (self.batch_shape[2 + ax].clone() * self.block_shape[ax]
                - &self.space_shape[2 + ax])
                .to_usize()?;
            let (bef, aft) = match strat {
                PaddingStrat::FlexFixed(f) => (spread - f, f),
                PaddingStrat::FixedFlex(f) => (f, spread - f),
                PaddingStrat::FixedFixed(a, b) => (a, b),
            };
            paddings[(ax, 0)] = bef as i32;
            paddings[(ax, 1)] = aft as i32;
        }
        let r = dispatch_numbers!(super::batch_to_space(input.datum_type())(
            input,
            &self.block_shape.view(),
            &paddings.view()
        ))?;
        Ok(tvec!(r))
    }
}

impl TypedOp for BatchToSpaceUnary {
    as_op!();

    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(inputs[0].datum_type.fact(&self.space_shape)))
    }
}


================================================
FILE: tensorflow/src/ops/quant.rs
================================================
use tract_hir::internal::*;
use tract_hir::ops;
use tract_hir::ops::math::round_ties_to_even;

use crate::model::ParsingContext;
use crate::model::TfOpRegister;
use crate::tfpb::tensorflow::NodeDef;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("FakeQuantWithMinMaxVars", fake_quant_with_min_max_vars);
}

fn fake_quant_with_min_max_vars(
    _ctx: &ParsingContext,
    node: &NodeDef,
) -> TractResult<Box<dyn InferenceOp>> {
    let narrow_range = node.get_attr_bool("narrow_range")?;
    let num_bits = node.get_attr_int("num_bits")?;
    Ok(expand(FakeQuantWithMinMaxVars::new(narrow_range, num_bits)))
}

#[derive(Clone, Debug, new, Hash, PartialEq, Eq)]
struct FakeQuantWithMinMaxVars {
    narrow_range: bool,
    num_bits: usize,
}

impl FakeQuantWithMinMaxVars {
    fn step(&self, min: &Tensor, max: &Tensor) -> TractResult<f32> {
        let min = min.try_as_plain()?.to_scalar::<f32>()?;
        let max = max.try_as_plain()?.to_scalar::<f32>()?;
        let amplitude = max - min;
        let scale_len = 2_usize.pow(self.num_bits as u32) - 1 - self.narrow_range as usize;
        Ok(amplitude / scale_len as f32)
    }
}

impl Expansion for FakeQuantWithMinMaxVars {
    fn name(&self) -> StaticName {
        "FakeQuantWithMinMaxVars".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&inputs[0].datum_type, &inputs[1].datum_type)?;
        s.equals(&inputs[0].datum_type, &inputs[2].datum_type)?;
        s.equals(&inputs[1].shape, shapefactoid!())?;
        s.equals(&inputs[2].shape, shapefactoid!())?;
        s.equals(&inputs[0].datum_type, &outputs[0].datum_type)?;
        s.equals(&inputs[0].shape, &outputs[0].shape)?;
        Ok(())
    }

    fn wire(
        &self,
        prefix: &str,
        target: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        if let (Some(min), Some(max)) = (
            target.outlet_fact(inputs[1])?.konst.as_ref(),
            target.outlet_fact(inputs[2])?.konst.as_ref(),
        ) {
            let rank = target.outlet_fact(inputs[0])?.rank();
            macro_rules! cst {
                ($id:ident, $value: expr) => {
                    let $id = tensor0($value).broadcast_into_rank(rank)?;
                    let $id = target.add_const(prefix.to_string() + "." + stringify!($id), $id)?;
                };
            }
            let step = self.step(min, max)?;
            let min = *min.try_as_plain()?.to_scalar::<f32>()?;
            let max = *max.try_as_plain()?.to_scalar::<f32>()?;
            let min_adj = step * round_ties_to_even(min / step);
            let max_adj = max - min + min_adj;
            let wire = inputs[0];
            cst!(min_adj, min_adj);
            cst!(max_adj, max_adj);
            cst!(step, step);
            let wire = target.wire_node(
                format!("{prefix}.clamp_min"),
                ops::math::max(),
                &[wire, min_adj],
            )?[0];
            let wire = target.wire_node(
                format!("{prefix}.clamp_max"),
                ops::math::min(),
                &[max_adj, wire],
            )?[0];
            let wire = target.wire_node(
                format!("{prefix}.sub-min"),
                ops::math::sub(),
                &[wire, min_adj],
            )?[0];
            let wire =
                target.wire_node(format!("{prefix}.div-step"), ops::math::div(), &[wire, step])?[0];
            let wire = target.wire_node(
                format!("{prefix}.round"),
                ops::math::round_half_to_even(),
                &[wire],
            )?[0];
            let wire =
                target.wire_node(format!("{prefix}.mul-step"), ops::math::mul(), &[wire, step])?[0];
            target.wire_node(format!("{prefix}.add-min"), ops::math::add(), &[wire, min_adj])
        } else {
            bail!("Operator can not be made a TypedOp.")
        }
    }
}


================================================
FILE: tensorflow/src/ops/random/mod.rs
================================================
mod philox;
mod random_uniform;

use crate::model::TfOpRegister;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("RandomUniform", random_uniform::random_uniform);
    reg.insert("RandomUniformInt", random_uniform::random_uniform_int);
}


================================================
FILE: tensorflow/src/ops/random/philox.rs
================================================
// from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/lib/random/philox_random.h

use tract_hir::internal::*;

#[derive(Copy, Clone)]
pub struct Philox4x32x10 {
    key: u64,
    counter: u128,
}

fn mul_hilo(a: u32, b: u32) -> (u32, u32) {
    ((((a as u64) * (b as u64)) >> 32) as u32, ((a as u64) * (b as u64)) as u32)
}

#[allow(non_upper_case_globals)]
impl Philox4x32x10 {
    pub fn weird_tf_constructor(seed_lo: u64, seed_hi: u64) -> Philox4x32x10 {
        let mut ph = Self::for_seed(seed_lo);
        ph.skip_fast((seed_hi as u128) << 64);
        ph
    }

    #[allow(unused)]
    pub fn for_seeds(seed1: u32, seed2: u32) -> Philox4x32x10 {
        Self::for_seed(((seed2 as u64) << 32) | seed1 as u64)
    }

    pub fn for_seed(seed: u64) -> Philox4x32x10 {
        Philox4x32x10 { key: seed, counter: 0 }
    }

    pub fn skip_fast(&mut self, n: u128) {
        self.counter = self.counter.wrapping_add(n);
    }

    #[allow(unused)]
    pub fn next_as_u32s(&mut self) -> [u32; 4] {
        let v = self.next();
        [v as u32, (v >> 32) as u32, (v >> 64) as u32, (v >> 96) as u32]
    }

    pub fn next(&mut self) -> u128 {
        let mut key = self.key;
        let mut counter = self.counter;

        // 0
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 1
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 2
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 3
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 4
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 5
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 6
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 7
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 8
        Self::compute_one(&mut counter, key);
        Self::raise_key(&mut key);
        // 9
        Self::compute_one(&mut counter, key);

        self.counter = self.counter.wrapping_add(1);
        counter
    }

    fn raise_key(key: &mut u64) {
        const kPhiloxW32A: u32 = 0x9E3779B9;
        const kPhiloxW32B: u32 = 0xBB67AE85;

        let k0 = *key as u32;
        let k1 = (*key >> 32) as u32;
        let k0 = k0.wrapping_add(kPhiloxW32A) as u64;
        let k1 = k1.wrapping_add(kPhiloxW32B) as u64;

        *key = (k1 << 32) | k0;
    }

    fn compute_one(counter: &mut u128, key: u64) {
        const kPhiloxM4x32A: u32 = 0xD2511F53;
        const kPhiloxM4x32B: u32 = 0xCD9E8D57;

        let c0 = *counter as u32;
        let c1 = (*counter >> 32) as u32;
        let c2 = (*counter >> 64) as u32;
        let c3 = (*counter >> 96) as u32;

        let (hi0, lo0) = mul_hilo(kPhiloxM4x32A, c0);
        let (hi1, lo1) = mul_hilo(kPhiloxM4x32B, c2);

        let r0 = (hi1 ^ c1 ^ (key as u32)) as u128;
        let r1 = lo1 as u128;
        let r2 = (hi0 ^ c3 ^ ((key >> 32) as u32)) as u128;
        let r3 = lo0 as u128;

        *counter = (r3 << 96) | (r2 << 64) | (r1 << 32) | r0
    }

    pub fn u32_iter(self) -> impl Iterator<Item = u32> {
        self.flat_map(|big| {
            tvec![big as u32, (big >> 32) as u32, (big >> 64) as u32, (big >> 96) as u32]
                .into_iter()
        })
    }
}

impl Iterator for Philox4x32x10 {
    type Item = u128;
    fn next(&mut self) -> Option<u128> {
        Some(Philox4x32x10::next(self))
    }
}

#[cfg(test)]
mod test {
    use super::*;

    // checked against https://github.com/dominikwerder/philox
    // https://github.com/dominikwerder/philox/blob/master/src/test.rs#L62
    #[test]
    fn seed() {
        let mut ph = Philox4x32x10::for_seeds(1, 2);
        assert_eq!(ph.next_as_u32s(), [0x598de3a, 0x98d2802e, 0x270f8f9e, 0xeab709d3]);
    }

    #[test]
    fn zeros() {
        let mut ph = Philox4x32x10::for_seeds(0, 0);
        assert_eq!(ph.next_as_u32s(), [0x6627e8d5, 0xe169c58d, 0xbc57ac4c, 0x9b00dbd8]);
    }

    #[test]
    fn ffff() {
        let mut ph = Philox4x32x10::for_seeds(0xffffffff, 0xffffffff);
        ph.skip_fast(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff);
        assert_eq!(ph.next_as_u32s(), [0x408f276d, 0x41c83b0e, 0xa20bc7c6, 0x6d5451fd]);
    }

    #[test]
    fn x243f6a88() {
        let mut ph = Philox4x32x10::for_seeds(0xa4093822, 0x299f31d0);
        ph.skip_fast(0x0370_7344_1319_8a2e_85a3_08d3_243f_6a88);
        assert_eq!(ph.next_as_u32s(), [0xd16cfe09, 0x94fdcceb, 0x5001e420, 0x24126ea1]);
    }
}


================================================
FILE: tensorflow/src/ops/random/random_uniform.rs
================================================
use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;
use tract_hir::internal::*;

use super::philox::Philox4x32x10;

pub fn random_uniform(_ctx: &ParsingContext, node: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let dtype = node.get_attr_datum_type("dtype")?;
    let seed: u64 = node.get_attr_int("seed")?;
    let seed2: u64 = node.get_attr_int("seed2")?;
    Ok(Box::new(RandomUniform::new(dtype, seed, seed2)))
}

pub fn random_uniform_int(
    _ctx: &ParsingContext,
    node: &NodeDef,
) -> TractResult<Box<dyn InferenceOp>> {
    let dtype = node.get_attr_datum_type("Tout")?;
    let seed: u64 = node.get_attr_int("seed")?;
    let seed2: u64 = node.get_attr_int("seed2")?;
    Ok(Box::new(RandomUniformInt::new(dtype, seed, seed2)))
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct RandomUniform {
    t: DatumType,
    seed1: u64,
    seed2: u64,
}

impl Op for RandomUniform {
    fn name(&self) -> StaticName {
        "RandomUniform".into()
    }

    fn validation(&self) -> Validation {
        if self.seed1 == 0 && self.seed2 == 0 { Validation::Random } else { Validation::Accurate }
    }

    not_a_typed_op!();
}

impl EvalOp for RandomUniform {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let shape: TVec<usize> = inputs[0]
            .cast_to::<i64>()?
            .try_as_plain()?
            .as_slice::<i64>()?
            .iter()
            .map(|&x| x as usize)
            .collect();
        match self.t {
            DatumType::F32 => Ok(tvec!(make_f32(&shape, self.seed1, self.seed2)?)),
            dt => bail!("RandomUniform not implemented for {:?}", dt),
        }
    }
}

impl InferenceRulesOp for RandomUniform {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 1)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, self.t)?;
        s.equals(&inputs[0].rank, 1)?;
        s.equals(&inputs[0].shape[0], outputs[0].rank.bex().to_dim())?;
        s.given(&inputs[0].value, move |s, value| {
            let shape: TVec<TDim> = value
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|&x| x.to_dim())
                .collect();
            s.equals(&outputs[0].shape, shape.bex())
        })?;
        Ok(())
    }

    as_op!();

    fn to_typed(
        &self,
        _source: &InferenceModel,
        node: &InferenceNode,
        target: &mut TypedModel,
        mapping: &HashMap<OutletId, OutletId>,
    ) -> TractResult<TVec<OutletId>> {
        if let Some(ref shape) = target.outlet_fact(mapping[&node.inputs[0]])?.konst {
            let op = TypedRandomUniform::new(
                self.t,
                self.seed1,
                self.seed2,
                shape.cast_to::<TDim>()?.try_as_plain()?.as_slice::<TDim>()?.into(),
            );
            target.wire_node(&*node.name, op, &[node.inputs[0]])
        } else {
            bail!("Dynamic shape")
        }
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct TypedRandomUniform {
    t: DatumType,
    seed1: u64,
    seed2: u64,
    shape: TVec<TDim>,
}

impl Op for TypedRandomUniform {
    fn name(&self) -> StaticName {
        "TypedRandomUniform".into()
    }

    fn validation(&self) -> Validation {
        if self.seed1 == 0 && self.seed2 == 0 { Validation::Random } else { Validation::Accurate }
    }

    op_as_typed_op!();
}

impl EvalOp for TypedRandomUniform {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, _inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let shape = self.shape.iter().map(|d| d.to_usize()).collect::<TractResult<TVec<_>>>()?;
        match self.t {
            DatumType::F32 => Ok(tvec!(make_f32(&shape, self.seed1, self.seed2)?)),
            dt => bail!("RandomUniform not implemented for {:?}", dt),
        }
    }
}

impl TypedOp for TypedRandomUniform {
    fn output_facts(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        Ok(tvec!(self.t.fact(&self.shape)))
    }

    as_op!();
}

pub fn make_f32(shape: &[usize], seed1: u64, seed2: u64) -> TractResult<TValue> {
    let mut rng = Philox4x32x10::weird_tf_constructor(seed1, seed2).u32_iter();
    unsafe {
        let mut tensor = Tensor::uninitialized::<f32>(shape)?;
        tensor.try_as_plain_mut()?.as_slice_mut::<f32>()?.iter_mut().for_each(|x| {
            let mantissa = rng.next().unwrap() & 0x7fffff;
            let exp = 127u32;
            let f = (exp << 23) | mantissa;
            *x = f32::from_bits(f) - 1.0
        });
        Ok(tensor.into_tvalue())
    }
}

#[derive(Debug, Clone, new, Hash, PartialEq, Eq)]
pub struct RandomUniformInt {
    t: DatumType,
    seed1: u64,
    seed2: u64,
}

impl RandomUniformInt {
    pub fn make_i32(&self, shape: &[usize], lo: i32, hi: i32) -> TractResult<TValue> {
        let mut rng = Philox4x32x10::weird_tf_constructor(self.seed1, self.seed2).u32_iter();
        unsafe {
            let mut tensor = Tensor::uninitialized::<i32>(shape)?;
            tensor.try_as_plain_mut()?.as_slice_mut::<i32>()?.iter_mut().for_each(|x| {
                // reproduce TF casts, with no conviction
                let lo = lo as u32;
                let hi = hi as u32;
                *x = (lo + rng.next().unwrap() % (hi - lo)) as i32;
            });
            Ok(tensor.into_tvalue())
        }
    }
}

impl Op for RandomUniformInt {
    fn name(&self) -> StaticName {
        "RandomUniformInt".into()
    }

    fn validation(&self) -> Validation {
        if self.seed1 == 0 && self.seed2 == 0 { Validation::Random } else { Validation::Accurate }
    }

    not_a_typed_op!();
}

impl EvalOp for RandomUniformInt {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let shape: TVec<usize> = inputs[0]
            .cast_to::<i64>()?
            .try_as_plain()?
            .as_slice::<i64>()?
            .iter()
            .map(|&x| x as usize)
            .collect();
        match self.t {
            DatumType::I32 => Ok(tvec!(Self::make_i32(
                self,
                &shape,
                *inputs[1].try_as_plain()?.to_scalar::<i32>()?,
                *inputs[2].try_as_plain()?.to_scalar::<i32>()?
            )?)),
            dt => bail!("RandomUniformInt not implemented for {:?}", dt),
        }
    }
}

impl InferenceRulesOp for RandomUniformInt {
    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 3)?;
        check_output_arity(outputs, 1)?;
        s.equals(&outputs[0].datum_type, self.t)?;
        s.equals(&inputs[1].datum_type, self.t)?;
        s.equals(&inputs[2].datum_type, self.t)?;
        s.equals(&inputs[0].rank, 1)?;
        s.equals(&inputs[1].rank, 0)?;
        s.equals(&inputs[2].rank, 0)?;
        s.equals(&inputs[0].shape[0], outputs[0].rank.bex().to_dim())?;
        s.given(&inputs[0].value, move |s, value| {
            let shape: TVec<TDim> = value
                .cast_to::<i64>()?
                .try_as_plain()?
                .as_slice::<i64>()?
                .iter()
                .map(|&x| x.to_dim())
                .collect();
            s.equals(&outputs[0].shape, shape.bex())
        })?;
        Ok(())
    }

    as_op!();
}


================================================
FILE: tensorflow/src/ops/rec/block_lstm.rs
================================================
use tract_hir::internal::*;
use tract_hir::tract_core::ops::einsum::EinSum;
use tract_hir::tract_core::ops::scan::ScanInfo;

use crate::model::ParsingContext;
use crate::tfpb::tensorflow::NodeDef;

pub fn block_lstm(_ctx: &ParsingContext, node: &NodeDef) -> TractResult<Box<dyn InferenceOp>> {
    let forget_bias = node.get_attr_opt_float("forget_bias")?.unwrap_or(1.0);
    let cell_clip = node.get_attr_opt_float("cell_clip")?.unwrap_or(3.0);
    let t = node.get_attr_datum_type("T")?;
    let use_peephole = node.get_attr_opt_bool("use_peephole")?.unwrap_or(false);
    if use_peephole {
        unimplemented!("Block LSTM peeplholes");
    }
    Ok(expand(BlockLSTM::new(forget_bias, cell_clip, t, use_peephole)))
}

#[derive(Clone, Debug, new)]
#[allow(dead_code)]
pub struct BlockLSTM {
    forget_bias: f32,
    cell_clip: f32,
    t: DatumType,
    use_peephole: bool,
}

impl Expansion for BlockLSTM {
    fn name(&self) -> StaticName {
        "BlockLSTM".into()
    }

    fn rules<'r, 'p: 'r, 's: 'r>(
        &'s self,
        s: &mut Solver<'r>,
        inputs: &'p [TensorProxy],
        outputs: &'p [TensorProxy],
    ) -> InferenceResult {
        check_input_arity(inputs, 9)?;
        check_input_arity(outputs, 7)?;

        s.equals(&inputs[0].rank, 0)?; // seq_len_max
        s.equals(&inputs[0].datum_type, i64::datum_type())?;

        // other inputs and outputs are consistent float-like
        s.equals_all((1..=7).map(move |i| (&inputs[i].datum_type).bex()).collect())?;

        s.equals(&inputs[1].rank, 3)?; // x:  [ time, batch, cell_size ]
        s.equals(&inputs[2].rank, 2)?; // cs_prev: [batch, cell_size]
        s.equals(&inputs[3].rank, 2)?; // h_prev: [batch, cell_size]
        s.equals(&inputs[4].rank, 2)?; // w: []
        s.equals(&inputs[5].rank, 1)?; // peephole input
        s.equals(&inputs[6].rank, 1)?; // peephole forget
        s.equals(&inputs[7].rank, 1)?; // peephole output
        s.equals(&inputs[8].rank, 1)?; // bias: [ 4*cell_size ]
        s.equals(&inputs[8].shape[0], 4 * inputs[1].shape[2].bex())?; // bias: [ 4*cell_size ]

        // i, cs, f, o, ci, co, h
        for (i, output) in outputs.iter().take(7).enumerate() {
            s.equals(&inputs[1].datum_type, &output.datum_type)?;
            s.equals(&outputs[i].shape, &inputs[1].shape)?;
        }

        Ok(())
    }

    fn nboutputs(&self) -> TractResult<usize> {
        Ok(7)
    }

    fn wire(
        &self,
        prefix: &str,
        model: &mut TypedModel,
        inputs: &[OutletId],
    ) -> TractResult<TVec<OutletId>> {
        use tract_hir::tract_core::ops::{array, math, nn, scan};

        let mut body = TypedModel { symbols: model.symbols.clone(), ..TypedModel::default() };
        let mut outer_inputs = vec![];
        let mut input_mapping = vec![];
        let mut output_mapping = vec![];

        let w = model.outlet_fact(inputs[4])?.konst.clone().context("W must be cosntant")?;
        let b = model.outlet_fact(inputs[8])?.konst.clone().context("B must be constant")?;
        let cell_size = w.shape()[1] / 4;
        let mut b = b.into_tensor();
        b.insert_axis(0)?;

        macro_rules! wire {
            ($name: ident = $op: expr, $($param: expr),*) => {
                let $name = body.wire_node(
                    format!("{}-{}", prefix, stringify!($name)),
                    $op, [$($param),*].as_ref())?[0];
            }
        }

        // X: body input 0: X, new outside input 0 (was 1)
        outer_inputs.push(inputs[1]);
        input_mapping.push(scan::InputMapping::Scan(ScanInfo { axis: 0, chunk: 1 }));
        let mut x_source_fact = model.outlet_fact(inputs[1])?.clone();
        x_source_fact.shape.set(0, 1.to_dim());
        let x_source = body.add_source("x_source", x_source_fact)?;
        wire!(x = AxisOp::Rm(0), x_source);

        // CS: body input 1
        let cs = model.wire_node(format!("{prefix}.cs-axis"), AxisOp::Add(0), &[inputs[2]])?[0];
        outer_inputs.push(cs);
        let cs_fact = model.outlet_fact(cs)?.clone();
        let cs_source = body.add_source("cs_source", cs_fact)?;
        input_mapping.push(scan::InputMapping::State);
        wire!(cs_prev = AxisOp::Rm(0), cs_source);

        // H: body input 2
        let h = model.wire_node(format!("{prefix}.h-axis"), AxisOp::Add(0), &[inputs[3]])?[0];
        outer_inputs.push(h);
        let h_fact = model.outlet_fact(h)?.clone();
        let h_source = body.add_source("h_source", h_fact)?;
        input_mapping.push(scan::InputMapping::State);
        wire!(h_prev = AxisOp::Rm(0), h_source);

        wire!(xh = array::TypedConcat::new(1), x, h_prev);

        let w = body.add_const(format!("{prefix}-w"), w)?;
        let b = body.add_const(format!("{prefix}-b"), b)?;
        wire!(i_ci_f_o_1 = EinSum::new("mk,kn->mn".parse()?, f32::datum_type()), xh, w);
        wire!(i_ci_f_o = math::add(), b, i_ci_f_o_1);

        wire!(i_1 = array::Slice::new(1, 0, cell_size), i_ci_f_o);
        wire!(i = nn::sigmoid(), i_1);

        wire!(f_1 = array::Slice::new(1, 2 * cell_size, 3 * cell_size), i_ci_f_o);
        let bias = body.add_const(format!("{prefix}-bias"), rctensor2(&[[self.forget_bias]]))?;
        wire!(f_2 = math::add(), f_1, bias);
        wire!(f = nn::sigmoid(), f_2);

        wire!(ci_1 = array::Slice::new(1, cell_size, 2 * cell_size), i_ci_f_o);
        wire!(ci = math::tanh(), ci_1);

        wire!(o_1 = array::Slice::new(1, 3 * cell_size, 4 * cell_size), i_ci_f_o);
        wire!(o = nn::sigmoid(), o_1);

        wire!(ci_i = math::mul(), ci, i);
        wire!(cs_1 = math::mul(), cs_prev, f);
        wire!(cs = math::add(), cs_1, ci_i);

        wire!(co = math::tanh(), cs);
        wire!(h = math::mul(), co, o);

        wire!(i_ = AxisOp::Add(0), i);
        wire!(cs_ = AxisOp::Add(0), cs);
        wire!(f_ = AxisOp::Add(0), f);
        wire!(o_ = AxisOp::Add(0), o);
        wire!(ci_ = AxisOp::Add(0), ci);
        wire!(co_ = AxisOp::Add(0), co);
        wire!(h_ = AxisOp::Add(0), h);
        body.select_output_outlets(&[i_, cs_, f_, o_, ci_, co_, h_])?;
        for ix in 0..7 {
            output_mapping.push(scan::OutputMapping::<TDim> {
                state: ix == 1 || ix == 6,
                full_dim_hint: None,
                last_value_slot: None,
                scan: Some((ix, ScanInfo { axis: 0, chunk: 1 })),
            })
        }

        let Some(seqlen) = &model.outlet_fact(inputs[0])?.konst else {
            bail!("Non constant seq_len is not supported");
        };
        let Some(seqlen) = seqlen.as_uniform() else {
            bail!("Non uniform seq_len is not supported");
        };
        let seqlen = seqlen.cast_to::<TDim>()?;
        if seqlen.try_as_plain()?.to_scalar::<TDim>()? != &model.outlet_fact(inputs[1])?.shape[0] {
            bail!("seq_len only supported for trivial noop case");
        };
        let scan = scan::Scan::new(body, input_mapping, output_mapping, 0)?;
        model.wire_node(prefix, scan, &outer_inputs)
    }
}

/*
// TODO: rewrite this logic as a tf.Assign declutter ?
impl BlockLSTM {
fn inline_var_assign(
&self,
model: &TypedModel,
node: &TypedNode,
input_id: usize,
output_id: usize,
patch: &mut TypedModelPatch,
) -> TractResult<Option<Arc<Tensor>>> {
let var_2 = model.node(node.inputs[input_id].node);
let var_2_op = if let Some(op) = var_2.op_as::<crate::ops::vars::VariableV2>() {
op
} else {
return Ok(None);
};
if var_2.outputs[0].successors.len() != 2 {
return Ok(None);
}
let assign = if let Some(assign_node) = var_2.outputs[0]
.successors
.iter()
.map(|s| model.node(s.node))
.filter(|s| s.op_is::<crate::ops::vars::Assign>())
.next()
{
assign_node
} else {
return Ok(None);
};
let rm_axis_node = model.node(assign.inputs[1].node);
let rm_axis_op = if let Some(op) = rm_axis_node.op_as::<tract_hir::internal::AxisOp>() {
op
} else {
return Ok(None);
};
if rm_axis_op != &tract_hir::internal::AxisOp::Rm(0) {
return Ok(None);
}
let slice_node = model.node(rm_axis_node.inputs[0].node);
let slice_op = if let Some(op) = slice_node.op_as::<tract_hir::ops::array::Slice<usize>>() {
op
} else {
return Ok(None);
};
if slice_node.inputs[0] != (node.id, output_id).into() {
return Ok(None);
}
let lstm_output_fact = model.outlet_fact(slice_node.inputs[0])?;
if slice_op.axis != 0
|| slice_op.end != slice_op.start + 1
|| slice_op.end.to_dim() != lstm_output_fact.shape.dim(0)
{
return Ok(None);
}
let tap = patch.tap_model(model, rm_axis_node.id.into())?;
patch.shunt_outside(model, assign.id.into(), tap)?;
Ok(var_2_op.initializer.clone())
}
}
*/


================================================
FILE: tensorflow/src/ops/rec/mod.rs
================================================
use crate::model::TfOpRegister;

pub mod block_lstm;

pub fn register_all_ops(reg: &mut TfOpRegister) {
    reg.insert("BlockLSTM", block_lstm::block_lstm);
}


================================================
FILE: tensorflow/src/prost/google.protobuf.rs
================================================


================================================
FILE: tensorflow/src/prost/tensorflow.rs
================================================
/// Protocol buffer representing a handle to a tensorflow resource. Handles are
/// not valid across executions, but can be serialized back and forth from within
/// a single run.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ResourceHandleProto {
    /// Unique name for the device containing the resource.
    #[prost(string, tag="1")]
    pub device: ::prost::alloc::string::String,
    /// Container in which this resource is placed.
    #[prost(string, tag="2")]
    pub container: ::prost::alloc::string::String,
    /// Unique name of this resource.
    #[prost(string, tag="3")]
    pub name: ::prost::alloc::string::String,
    /// Hash code for the type of the resource. Is only valid in the same device
    /// and in the same execution.
    #[prost(uint64, tag="4")]
    pub hash_code: u64,
    /// For debug-only, the name of the type pointed to by this handle, if
    /// available.
    #[prost(string, tag="5")]
    pub maybe_type_name: ::prost::alloc::string::String,
}
/// Dimensions of a tensor.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorShapeProto {
    /// Dimensions of the tensor, such as {"input", 30}, {"output", 40}
    /// for a 30 x 40 2D tensor.  If an entry has size -1, this
    /// corresponds to a dimension of unknown size. The names are
    /// optional.
    ///
    /// The order of entries in "dim" matters: It indicates the layout of the
    /// values in the tensor in-memory representation.
    ///
    /// The first entry in "dim" is the outermost dimension used to layout the
    /// values, the last entry is the innermost dimension.  This matches the
    /// in-memory layout of RowMajor Eigen tensors.
    ///
    /// If "dim.size()" > 0, "unknown_rank" must be false.
    #[prost(message, repeated, tag="2")]
    pub dim: ::prost::alloc::vec::Vec<tensor_shape_proto::Dim>,
    /// If true, the number of dimensions in the shape is unknown.
    ///
    /// If true, "dim.size()" must be 0.
    #[prost(bool, tag="3")]
    pub unknown_rank: bool,
}
/// Nested message and enum types in `TensorShapeProto`.
pub mod tensor_shape_proto {
    /// One dimension of the tensor.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct Dim {
        /// Size of the tensor in that dimension.
        /// This value must be >= -1, but values of -1 are reserved for "unknown"
        /// shapes (values of -1 mean "unknown" dimension).  Certain wrappers
        /// that work with TensorShapeProto may fail at runtime when deserializing
        /// a TensorShapeProto containing a dim value of -1.
        #[prost(int64, tag="1")]
        pub size: i64,
        /// Optional name of the tensor dimension.
        #[prost(string, tag="2")]
        pub name: ::prost::alloc::string::String,
    }
}
/// LINT.IfChange
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum DataType {
    /// Not a legal value for DataType.  Used to indicate a DataType field
    /// has not been set.
    DtInvalid = 0,
    /// Data types that all computation devices are expected to be
    /// capable to support.
    DtFloat = 1,
    DtDouble = 2,
    DtInt32 = 3,
    DtUint8 = 4,
    DtInt16 = 5,
    DtInt8 = 6,
    DtString = 7,
    /// Single-precision complex
    DtComplex64 = 8,
    DtInt64 = 9,
    DtBool = 10,
    /// Quantized int8
    DtQint8 = 11,
    /// Quantized uint8
    DtQuint8 = 12,
    /// Quantized int32
    DtQint32 = 13,
    /// Float32 truncated to 16 bits.  Only for cast ops.
    DtBfloat16 = 14,
    /// Quantized int16
    DtQint16 = 15,
    /// Quantized uint16
    DtQuint16 = 16,
    DtUint16 = 17,
    /// Double-precision complex
    DtComplex128 = 18,
    DtHalf = 19,
    DtResource = 20,
    /// Arbitrary C++ data types
    DtVariant = 21,
    DtUint32 = 22,
    DtUint64 = 23,
    /// Do not use!  These are only for parameters.  Every enum above
    /// should have a corresponding value below (verified by types_test).
    DtFloatRef = 101,
    DtDoubleRef = 102,
    DtInt32Ref = 103,
    DtUint8Ref = 104,
    DtInt16Ref = 105,
    DtInt8Ref = 106,
    DtStringRef = 107,
    DtComplex64Ref = 108,
    DtInt64Ref = 109,
    DtBoolRef = 110,
    DtQint8Ref = 111,
    DtQuint8Ref = 112,
    DtQint32Ref = 113,
    DtBfloat16Ref = 114,
    DtQint16Ref = 115,
    DtQuint16Ref = 116,
    DtUint16Ref = 117,
    DtComplex128Ref = 118,
    DtHalfRef = 119,
    DtResourceRef = 120,
    DtVariantRef = 121,
    DtUint32Ref = 122,
    DtUint64Ref = 123,
}
impl DataType {
    /// String value of the enum field names used in the ProtoBuf definition.
    ///
    /// The values are not transformed in any way and thus are considered stable
    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
    pub fn as_str_name(&self) -> &'static str {
        match self {
            DataType::DtInvalid => "DT_INVALID",
            DataType::DtFloat => "DT_FLOAT",
            DataType::DtDouble => "DT_DOUBLE",
            DataType::DtInt32 => "DT_INT32",
            DataType::DtUint8 => "DT_UINT8",
            DataType::DtInt16 => "DT_INT16",
            DataType::DtInt8 => "DT_INT8",
            DataType::DtString => "DT_STRING",
            DataType::DtComplex64 => "DT_COMPLEX64",
            DataType::DtInt64 => "DT_INT64",
            DataType::DtBool => "DT_BOOL",
            DataType::DtQint8 => "DT_QINT8",
            DataType::DtQuint8 => "DT_QUINT8",
            DataType::DtQint32 => "DT_QINT32",
            DataType::DtBfloat16 => "DT_BFLOAT16",
            DataType::DtQint16 => "DT_QINT16",
            DataType::DtQuint16 => "DT_QUINT16",
            DataType::DtUint16 => "DT_UINT16",
            DataType::DtComplex128 => "DT_COMPLEX128",
            DataType::DtHalf => "DT_HALF",
            DataType::DtResource => "DT_RESOURCE",
            DataType::DtVariant => "DT_VARIANT",
            DataType::DtUint32 => "DT_UINT32",
            DataType::DtUint64 => "DT_UINT64",
            DataType::DtFloatRef => "DT_FLOAT_REF",
            DataType::DtDoubleRef => "DT_DOUBLE_REF",
            DataType::DtInt32Ref => "DT_INT32_REF",
            DataType::DtUint8Ref => "DT_UINT8_REF",
            DataType::DtInt16Ref => "DT_INT16_REF",
            DataType::DtInt8Ref => "DT_INT8_REF",
            DataType::DtStringRef => "DT_STRING_REF",
            DataType::DtComplex64Ref => "DT_COMPLEX64_REF",
            DataType::DtInt64Ref => "DT_INT64_REF",
            DataType::DtBoolRef => "DT_BOOL_REF",
            DataType::DtQint8Ref => "DT_QINT8_REF",
            DataType::DtQuint8Ref => "DT_QUINT8_REF",
            DataType::DtQint32Ref => "DT_QINT32_REF",
            DataType::DtBfloat16Ref => "DT_BFLOAT16_REF",
            DataType::DtQint16Ref => "DT_QINT16_REF",
            DataType::DtQuint16Ref => "DT_QUINT16_REF",
            DataType::DtUint16Ref => "DT_UINT16_REF",
            DataType::DtComplex128Ref => "DT_COMPLEX128_REF",
            DataType::DtHalfRef => "DT_HALF_REF",
            DataType::DtResourceRef => "DT_RESOURCE_REF",
            DataType::DtVariantRef => "DT_VARIANT_REF",
            DataType::DtUint32Ref => "DT_UINT32_REF",
            DataType::DtUint64Ref => "DT_UINT64_REF",
        }
    }
}
/// Protocol buffer representing a tensor.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorProto {
    #[prost(enumeration="DataType", tag="1")]
    pub dtype: i32,
    /// Shape of the tensor.  TODO(touts): sort out the 0-rank issues.
    #[prost(message, optional, tag="2")]
    pub tensor_shape: ::core::option::Option<TensorShapeProto>,
    // Only one of the representations below is set, one of "tensor_contents" and
    // the "xxx_val" attributes.  We are not using oneof because as oneofs cannot
    // contain repeated fields it would require another extra set of messages.

    /// Version number.
    ///
    /// In version 0, if the "repeated xxx" representations contain only one
    /// element, that element is repeated to fill the shape.  This makes it easy
    /// to represent a constant Tensor with a single value.
    #[prost(int32, tag="3")]
    pub version_number: i32,
    /// Serialized raw tensor content from either Tensor::AsProtoTensorContent or
    /// memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
    /// can be used for all tensor types. The purpose of this representation is to
    /// reduce serialization overhead during RPC call by avoiding serialization of
    /// many repeated small items.
    #[prost(bytes="vec", tag="4")]
    pub tensor_content: ::prost::alloc::vec::Vec<u8>,
    // Type specific representations that make it easy to create tensor protos in
    // all languages.  Only the representation corresponding to "dtype" can
    // be set.  The values hold the flattened representation of the tensor in
    // row major order.

    /// DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
    /// have some pointless zero padding for each value here.
    #[prost(int32, repeated, tag="13")]
    pub half_val: ::prost::alloc::vec::Vec<i32>,
    /// DT_FLOAT.
    #[prost(float, repeated, tag="5")]
    pub float_val: ::prost::alloc::vec::Vec<f32>,
    /// DT_DOUBLE.
    #[prost(double, repeated, tag="6")]
    pub double_val: ::prost::alloc::vec::Vec<f64>,
    /// DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
    #[prost(int32, repeated, tag="7")]
    pub int_val: ::prost::alloc::vec::Vec<i32>,
    /// DT_STRING
    #[prost(bytes="vec", repeated, tag="8")]
    pub string_val: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
    /// DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
    /// and imaginary parts of i-th single precision complex.
    #[prost(float, repeated, tag="9")]
    pub scomplex_val: ::prost::alloc::vec::Vec<f32>,
    /// DT_INT64
    #[prost(int64, repeated, tag="10")]
    pub int64_val: ::prost::alloc::vec::Vec<i64>,
    /// DT_BOOL
    #[prost(bool, repeated, tag="11")]
    pub bool_val: ::prost::alloc::vec::Vec<bool>,
    /// DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
    /// and imaginary parts of i-th double precision complex.
    #[prost(double, repeated, tag="12")]
    pub dcomplex_val: ::prost::alloc::vec::Vec<f64>,
    /// DT_RESOURCE
    #[prost(message, repeated, tag="14")]
    pub resource_handle_val: ::prost::alloc::vec::Vec<ResourceHandleProto>,
    /// DT_VARIANT
    #[prost(message, repeated, tag="15")]
    pub variant_val: ::prost::alloc::vec::Vec<VariantTensorDataProto>,
    /// DT_UINT32
    #[prost(uint32, repeated, tag="16")]
    pub uint32_val: ::prost::alloc::vec::Vec<u32>,
    /// DT_UINT64
    #[prost(uint64, repeated, tag="17")]
    pub uint64_val: ::prost::alloc::vec::Vec<u64>,
}
/// Protocol buffer representing the serialization format of DT_VARIANT tensors.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct VariantTensorDataProto {
    /// Name of the type of objects being serialized.
    #[prost(string, tag="1")]
    pub type_name: ::prost::alloc::string::String,
    /// Portions of the object that are not Tensors.
    #[prost(bytes="vec", tag="2")]
    pub metadata: ::prost::alloc::vec::Vec<u8>,
    /// Tensors contained within objects being serialized.
    #[prost(message, repeated, tag="3")]
    pub tensors: ::prost::alloc::vec::Vec<TensorProto>,
}
/// Protocol buffer representing the value for an attr used to configure an Op.
/// Comment indicates the corresponding attr type.  Only the field matching the
/// attr type may be filled.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct AttrValue {
    #[prost(oneof="attr_value::Value", tags="2, 3, 4, 5, 6, 7, 8, 1, 10, 9")]
    pub value: ::core::option::Option<attr_value::Value>,
}
/// Nested message and enum types in `AttrValue`.
pub mod attr_value {
    /// LINT.IfChange
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct ListValue {
        /// "list(string)"
        #[prost(bytes="vec", repeated, tag="2")]
        pub s: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
        /// "list(int)"
        #[prost(int64, repeated, tag="3")]
        pub i: ::prost::alloc::vec::Vec<i64>,
        /// "list(float)"
        #[prost(float, repeated, tag="4")]
        pub f: ::prost::alloc::vec::Vec<f32>,
        /// "list(bool)"
        #[prost(bool, repeated, tag="5")]
        pub b: ::prost::alloc::vec::Vec<bool>,
        /// "list(type)"
        #[prost(enumeration="super::DataType", repeated, tag="6")]
        pub r#type: ::prost::alloc::vec::Vec<i32>,
        /// "list(shape)"
        #[prost(message, repeated, tag="7")]
        pub shape: ::prost::alloc::vec::Vec<super::TensorShapeProto>,
        /// "list(tensor)"
        #[prost(message, repeated, tag="8")]
        pub tensor: ::prost::alloc::vec::Vec<super::TensorProto>,
        /// "list(attr)"
        #[prost(message, repeated, tag="9")]
        pub func: ::prost::alloc::vec::Vec<super::NameAttrList>,
    }
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Value {
        /// "string"
        #[prost(bytes, tag="2")]
        S(::prost::alloc::vec::Vec<u8>),
        /// "int"
        #[prost(int64, tag="3")]
        I(i64),
        /// "float"
        #[prost(float, tag="4")]
        F(f32),
        /// "bool"
        #[prost(bool, tag="5")]
        B(bool),
        /// "type"
        #[prost(enumeration="super::DataType", tag="6")]
        Type(i32),
        /// "shape"
        #[prost(message, tag="7")]
        Shape(super::TensorShapeProto),
        /// "tensor"
        #[prost(message, tag="8")]
        Tensor(super::TensorProto),
        /// any "list(...)"
        #[prost(message, tag="1")]
        List(ListValue),
        /// "func" represents a function. func.name is a function's name or
        /// a primitive op's name. func.attr.first is the name of an attr
        /// defined for that function. func.attr.second is the value for
        /// that attr in the instantiation.
        #[prost(message, tag="10")]
        Func(super::NameAttrList),
        /// This is a placeholder only used in nodes defined inside a
        /// function.  It indicates the attr value will be supplied when
        /// the function is instantiated.  For example, let us suppose a
        /// node "N" in function "FN". "N" has an attr "A" with value
        /// placeholder = "foo". When FN is instantiated with attr "foo"
        /// set to "bar", the instantiated node N's attr A will have been
        /// given the value "bar".
        #[prost(string, tag="9")]
        Placeholder(::prost::alloc::string::String),
    }
}
/// A list of attr names and their values. The whole list is attached
/// with a string name.  E.g., MatMul\[T=float\].
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NameAttrList {
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    #[prost(map="string, message", tag="2")]
    pub attr: ::std::collections::HashMap<::prost::alloc::string::String, AttrValue>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NodeDef {
    /// The name given to this operator. Used for naming inputs,
    /// logging, visualization, etc.  Unique within a single GraphDef.
    /// Must match the regexp "\[A-Za-z0-9.][A-Za-z0-9_./\]*".
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    /// The operation name.  There may be custom parameters in attrs.
    /// Op names starting with an underscore are reserved for internal use.
    #[prost(string, tag="2")]
    pub op: ::prost::alloc::string::String,
    /// Each input is "node:src_output" with "node" being a string name and
    /// "src_output" indicating which output tensor to use from "node". If
    /// "src_output" is 0 the ":0" suffix can be omitted.  Regular inputs
    /// may optionally be followed by control inputs that have the format
    /// "^node".
    #[prost(string, repeated, tag="3")]
    pub input: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// A (possibly partial) specification for the device on which this
    /// node should be placed.
    /// The expected syntax for this string is as follows:
    ///
    /// DEVICE_SPEC ::= PARTIAL_SPEC
    ///
    /// PARTIAL_SPEC ::= ("/" CONSTRAINT) *
    /// CONSTRAINT ::= ("job:" JOB_NAME)
    ///               | ("replica:" \[1-9][0-9\]*)
    ///               | ("task:" \[1-9][0-9\]*)
    ///               | ("device:" \[A-Za-z\]* ":" (\[1-9][0-9\]* | "*") )
    ///
    /// Valid values for this string include:
    /// * "/job:worker/replica:0/task:1/device:GPU:3"  (full specification)
    /// * "/job:worker/device:GPU:3"                   (partial specification)
    /// * ""                                    (no specification)
    ///
    /// If the constraints do not resolve to a single device (or if this
    /// field is empty or not present), the runtime will attempt to
    /// choose a device automatically.
    #[prost(string, tag="4")]
    pub device: ::prost::alloc::string::String,
    /// Operation-specific graph-construction-time configuration.
    /// Note that this should include all attrs defined in the
    /// corresponding OpDef, including those with a value matching
    /// the default -- this allows the default to change and makes
    /// NodeDefs easier to interpret on their own.  However, if
    /// an attr with a default is not specified in this list, the
    /// default will be used.
    /// The "names" (keys) must match the regexp "\[a-z][a-z0-9_\]+" (and
    /// one of the names from the corresponding OpDef's attr field).
    /// The values must have a type matching the corresponding OpDef
    /// attr's type field.
    /// TODO(josh11b): Add some examples here showing best practices.
    #[prost(map="string, message", tag="5")]
    pub attr: ::std::collections::HashMap<::prost::alloc::string::String, AttrValue>,
}
/// Defines an operation. A NodeDef in a GraphDef specifies an Op by
/// using the "op" field which should match the name of a OpDef.
/// LINT.IfChange
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct OpDef {
    /// Op names starting with an underscore are reserved for internal use.
    /// Names should be CamelCase and match the regexp "\[A-Z][a-zA-Z0-9_\]*".
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    /// Description of the input(s).
    #[prost(message, repeated, tag="2")]
    pub input_arg: ::prost::alloc::vec::Vec<op_def::ArgDef>,
    /// Description of the output(s).
    #[prost(message, repeated, tag="3")]
    pub output_arg: ::prost::alloc::vec::Vec<op_def::ArgDef>,
    #[prost(message, repeated, tag="4")]
    pub attr: ::prost::alloc::vec::Vec<op_def::AttrDef>,
    /// Optional deprecation based on GraphDef versions.
    #[prost(message, optional, tag="8")]
    pub deprecation: ::core::option::Option<OpDeprecation>,
    /// One-line human-readable description of what the Op does.
    #[prost(string, tag="5")]
    pub summary: ::prost::alloc::string::String,
    /// Additional, longer human-readable description of what the Op does.
    #[prost(string, tag="6")]
    pub description: ::prost::alloc::string::String,
    // -------------------------------------------------------------------------
    // Which optimizations this operation can participate in.

    /// True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs)
    #[prost(bool, tag="18")]
    pub is_commutative: bool,
    /// If is_aggregate is true, then this operation accepts N >= 2
    /// inputs and produces 1 output all of the same type.  Should be
    /// associative and commutative, and produce output with the same
    /// shape as the input.  The optimizer may replace an aggregate op
    /// taking input from multiple devices with a tree of aggregate ops
    /// that aggregate locally within each device (and possibly within
    /// groups of nearby devices) before communicating.
    /// TODO(josh11b): Implement that optimization.
    ///
    /// for things like add
    #[prost(bool, tag="16")]
    pub is_aggregate: bool,
    // Other optimizations go here, like
    //    can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc.

    // -------------------------------------------------------------------------
    // Optimization constraints.

    /// Ops are marked as stateful if their behavior depends on some state beyond
    /// their input tensors (e.g. variable reading op) or if they have
    /// a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops
    /// must always produce the same output for the same input and have
    /// no side-effects.
    ///
    /// By default Ops may be moved between devices.  Stateful ops should
    /// either not be moved, or should only be moved if that state can also
    /// be moved (e.g. via some sort of save / restore).
    /// Stateful ops are guaranteed to never be optimized away by Common
    /// Subexpression Elimination (CSE).
    ///
    /// for things like variables, queue
    #[prost(bool, tag="17")]
    pub is_stateful: bool,
    // -------------------------------------------------------------------------
    // Non-standard options.

    /// By default, all inputs to an Op must be initialized Tensors.  Ops
    /// that may initialize tensors for the first time should set this
    /// field to true, to allow the Op to take an uninitialized Tensor as
    /// input.
    ///
    /// for Assign, etc.
    #[prost(bool, tag="19")]
    pub allows_uninitialized_input: bool,
}
/// Nested message and enum types in `OpDef`.
pub mod op_def {
    /// For describing inputs and outputs.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct ArgDef {
        /// Name for the input/output.  Should match the regexp "\[a-z][a-z0-9_\]*".
        #[prost(string, tag="1")]
        pub name: ::prost::alloc::string::String,
        /// Human readable description.
        #[prost(string, tag="2")]
        pub description: ::prost::alloc::string::String,
        /// Describes the type of one or more tensors that are accepted/produced
        /// by this input/output arg.  The only legal combinations are:
        /// * For a single tensor: either the "type" field is set or the
        ///    "type_attr" field is set to the name of an attr with type "type".
        /// * For a sequence of tensors with the same type: the "number_attr"
        ///    field will be set to the name of an attr with type "int", and
        ///    either the "type" or "type_attr" field will be set as for
        ///    single tensors.
        /// * For a sequence of tensors, the "type_list_attr" field will be set
        ///    to the name of an attr with type "list(type)".
        #[prost(enumeration="super::DataType", tag="3")]
        pub r#type: i32,
        /// if specified, attr must have type "type"
        #[prost(string, tag="4")]
        pub type_attr: ::prost::alloc::string::String,
        /// if specified, attr must have type "int"
        #[prost(string, tag="5")]
        pub number_attr: ::prost::alloc::string::String,
        /// If specified, attr must have type "list(type)", and none of
        /// type, type_attr, and number_attr may be specified.
        #[prost(string, tag="6")]
        pub type_list_attr: ::prost::alloc::string::String,
        /// For inputs: if true, the inputs are required to be refs.
        ///    By default, inputs can be either refs or non-refs.
        /// For outputs: if true, outputs are refs, otherwise they are not.
        #[prost(bool, tag="16")]
        pub is_ref: bool,
    }
    /// Description of the graph-construction-time configuration of this
    /// Op.  That is to say, this describes the attr fields that will
    /// be specified in the NodeDef.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct AttrDef {
        /// A descriptive name for the argument.  May be used, e.g. by the
        /// Python client, as a keyword argument name, and so should match
        /// the regexp "\[a-z][a-z0-9_\]+".
        #[prost(string, tag="1")]
        pub name: ::prost::alloc::string::String,
        /// One of the type names from attr_value.proto ("string", "list(string)",
        /// "int", etc.).
        #[prost(string, tag="2")]
        pub r#type: ::prost::alloc::string::String,
        /// A reasonable default for this attribute if the user does not supply
        /// a value.  If not specified, the user must supply a value.
        #[prost(message, optional, tag="3")]
        pub default_value: ::core::option::Option<super::AttrValue>,
        /// Human-readable description.
        #[prost(string, tag="4")]
        pub description: ::prost::alloc::string::String,
        // TODO(josh11b): bool is_optional?

        // --- Constraints ---
        // These constraints are only in effect if specified.  Default is no
        // constraints.

        /// For type == "int", this is a minimum value.  For "list(___)"
        /// types, this is the minimum length.
        #[prost(bool, tag="5")]
        pub has_minimum: bool,
        #[prost(int64, tag="6")]
        pub minimum: i64,
        /// The set of allowed values.  Has type that is the "list" version
        /// of the "type" field above (uses the "list" field of AttrValue).
        /// If type == "type" or "list(type)" above, then the "type" field
        /// of "allowed_values.list" has the set of allowed DataTypes.
        /// If type == "string" or "list(string)", then the "s" field of
        /// "allowed_values.list" has the set of allowed strings.
        #[prost(message, optional, tag="7")]
        pub allowed_values: ::core::option::Option<super::AttrValue>,
    }
}
/// Information about version-dependent deprecation of an op
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct OpDeprecation {
    /// First GraphDef version at which the op is disallowed.
    #[prost(int32, tag="1")]
    pub version: i32,
    /// Explanation of why it was deprecated and what to use instead.
    #[prost(string, tag="2")]
    pub explanation: ::prost::alloc::string::String,
}
/// A collection of OpDefs
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct OpList {
    #[prost(message, repeated, tag="1")]
    pub op: ::prost::alloc::vec::Vec<OpDef>,
}
/// A library is a set of named functions.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct FunctionDefLibrary {
    #[prost(message, repeated, tag="1")]
    pub function: ::prost::alloc::vec::Vec<FunctionDef>,
    #[prost(message, repeated, tag="2")]
    pub gradient: ::prost::alloc::vec::Vec<GradientDef>,
}
/// A function can be instantiated when the runtime can bind every attr
/// with a value. When a GraphDef has a call to a function, it must
/// have binding for every attr defined in the signature.
///
/// TODO(zhifengc):
///    * device spec, etc.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct FunctionDef {
    /// The definition of the function's name, arguments, return values,
    /// attrs etc.
    #[prost(message, optional, tag="1")]
    pub signature: ::core::option::Option<OpDef>,
    /// Attributes specific to this function definition.
    #[prost(map="string, message", tag="5")]
    pub attr: ::std::collections::HashMap<::prost::alloc::string::String, AttrValue>,
    // NOTE: field id 2 deleted on Jan 11, 2016, GraphDef version 21.

    // In both of the following fields, there is the need to specify an
    // output that is used as either the input to another node (in
    // `node_def`) or as a return value of the function (in `ret`).
    // Unlike the NodeDefs in GraphDef, we need to be able to specify a
    // list in some cases (instead of just single outputs).  Also, we
    // need to be able to deal with lists of unknown length (so the
    // output index may not be known at function definition time).  So
    // we use the following format instead:
    // * "fun_in" where "fun_in" is the name of a function input arg in
    //    the `signature` field above.  This represents that input, whether
    //    it is a single tensor or a list.
    // * "fun_in:0" gives the first element of a function input arg (a
    //    non-list input is considered a list of length 1 for these
    //    purposes).
    // * "node:out" where "node" is the name of a node in `node_def` and
    //    "out" is the name one of its op's output arguments (the name
    //    comes from the OpDef of the node's op). This represents that
    //    node's output, whether it is a single tensor or a list.
    //    Note: We enforce that an op's output arguments are never
    //    renamed in the backwards-compatibility test.
    // * "node:out:0" gives the first element of a node output arg (a
    //    non-list output is considered a list of length 1 for these
    //    purposes).
    //
    // NOT CURRENTLY SUPPORTED (but may be in the future):
    // * "node:out:-1" gives last element in a node output list
    // * "node:out:1:" gives a list with all but the first element in a
    //    node output list
    // * "node:out::-1" gives a list with all but the last element in a
    //    node output list

    // The body of the function.  Unlike the NodeDefs in a GraphDef, attrs
    // may have values of type `placeholder` and the `input` field uses
    // the "output" format above.

    /// By convention, "op" in node_def is resolved by consulting with a
    /// user-defined library first. If not resolved, "func" is assumed to
    /// be a builtin op.
    #[prost(message, repeated, tag="3")]
    pub node_def: ::prost::alloc::vec::Vec<NodeDef>,
    /// A mapping from the output arg names from `signature` to the
    /// outputs from `node_def` that should be returned by the function.
    #[prost(map="string, string", tag="4")]
    pub ret: ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>,
}
/// GradientDef defines the gradient function of a function defined in
/// a function library.
///
/// A gradient function g (specified by gradient_func) for a function f
/// (specified by function_name) must follow the following:
///
/// The function 'f' must be a numerical function which takes N inputs
/// and produces M outputs. Its gradient function 'g', which is a
/// function taking N + M inputs and produces N outputs.
///
/// I.e. if we have
///     (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
/// then, g is
///     (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
///                                       dL/dy1, dL/dy2, ..., dL/dy_M),
/// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
/// loss function). dL/dx_i is the partial derivative of L with respect
/// to x_i.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GradientDef {
    /// The function name.
    #[prost(string, tag="1")]
    pub function_name: ::prost::alloc::string::String,
    /// The gradient function's name.
    #[prost(string, tag="2")]
    pub gradient_func: ::prost::alloc::string::String,
}
/// Version information for a piece of serialized data
///
/// There are different types of versions for each type of data
/// (GraphDef, etc.), but they all have the same common shape
/// described here.
///
/// Each consumer has "consumer" and "min_producer" versions (specified
/// elsewhere).  A consumer is allowed to consume this data if
///
///    producer >= min_producer
///    consumer >= min_consumer
///    consumer not in bad_consumers
///
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct VersionDef {
    /// The version of the code that produced this data.
    #[prost(int32, tag="1")]
    pub producer: i32,
    /// Any consumer below this version is not allowed to consume this data.
    #[prost(int32, tag="2")]
    pub min_consumer: i32,
    /// Specific consumer versions which are disallowed (e.g. due to bugs).
    #[prost(int32, repeated, tag="3")]
    pub bad_consumers: ::prost::alloc::vec::Vec<i32>,
}
/// Represents the graph of operations
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GraphDef {
    #[prost(message, repeated, tag="1")]
    pub node: ::prost::alloc::vec::Vec<NodeDef>,
    /// Compatibility versions of the graph.  See core/public/version.h for version
    /// history.  The GraphDef version is distinct from the TensorFlow version, and
    /// each release of TensorFlow will support a range of GraphDef versions.
    #[prost(message, optional, tag="4")]
    pub versions: ::core::option::Option<VersionDef>,
    /// Deprecated single version field; use versions above instead.  Since all
    /// GraphDef changes before "versions" was introduced were forward
    /// compatible, this field is entirely ignored.
    #[deprecated]
    #[prost(int32, tag="3")]
    pub version: i32,
    /// EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
    ///
    /// "library" provides user-defined functions.
    ///
    /// Naming:
    ///    * library.function.name are in a flat namespace.
    ///      NOTE: We may need to change it to be hierarchical to support
    ///      different orgs. E.g.,
    ///      { "/google/nn", { ... }},
    ///      { "/google/vision", { ... }}
    ///      { "/org_foo/module_bar", { ... }}
    ///      map<string, FunctionDefLib> named_lib;
    ///    * If node\[i\].op is the name of one function in "library",
    ///      node\[i\] is deemed as a function call. Otherwise, node\[i\].op
    ///      must be a primitive operation supported by the runtime.
    ///
    ///
    /// Function call semantics:
    ///
    ///    * The callee may start execution as soon as some of its inputs
    ///      are ready. The caller may want to use Tuple() mechanism to
    ///      ensure all inputs are ready in the same time.
    ///
    ///    * The consumer of return values may start executing as soon as
    ///      the return values the consumer depends on are ready.  The
    ///      consumer may want to use Tuple() mechanism to ensure the
    ///      consumer does not start until all return values of the callee
    ///      function are ready.
    #[prost(message, optional, tag="2")]
    pub library: ::core::option::Option<FunctionDefLibrary>,
}
/// Protocol buffer representing a Variable.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct VariableDef {
    /// Name of the variable tensor.
    #[prost(string, tag="1")]
    pub variable_name: ::prost::alloc::string::String,
    /// Name of the tensor holding the variable's initial value.
    #[prost(string, tag="6")]
    pub initial_value_name: ::prost::alloc::string::String,
    /// Name of the initializer op.
    #[prost(string, tag="2")]
    pub initializer_name: ::prost::alloc::string::String,
    /// Name of the snapshot tensor.
    #[prost(string, tag="3")]
    pub snapshot_name: ::prost::alloc::string::String,
    /// Support for saving variables as slices of a larger variable.
    #[prost(message, optional, tag="4")]
    pub save_slice_info_def: ::core::option::Option<SaveSliceInfoDef>,
    /// Whether to represent this as a ResourceVariable.
    #[prost(bool, tag="5")]
    pub is_resource: bool,
    /// Whether this variable should be trained.
    #[prost(bool, tag="7")]
    pub trainable: bool,
    /// Indicates when a distributed variable will be synced.
    #[prost(enumeration="VariableSynchronization", tag="8")]
    pub synchronization: i32,
    /// Indicates how a distributed variable will be aggregated.
    #[prost(enumeration="VariableAggregation", tag="9")]
    pub aggregation: i32,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SaveSliceInfoDef {
    /// Name of the full variable of which this is a slice.
    #[prost(string, tag="1")]
    pub full_name: ::prost::alloc::string::String,
    /// Shape of the full variable.
    #[prost(int64, repeated, tag="2")]
    pub full_shape: ::prost::alloc::vec::Vec<i64>,
    /// Offset of this variable into the full variable.
    #[prost(int64, repeated, tag="3")]
    pub var_offset: ::prost::alloc::vec::Vec<i64>,
    /// Shape of this variable.
    #[prost(int64, repeated, tag="4")]
    pub var_shape: ::prost::alloc::vec::Vec<i64>,
}
/// Indicates when a distributed variable will be synced.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum VariableSynchronization {
    /// `AUTO`: Indicates that the synchronization will be determined by the
    /// current `DistributionStrategy` (eg. With `MirroredStrategy` this would be
    /// `ON_WRITE`).
    Auto = 0,
    /// `NONE`: Indicates that there will only be one copy of the variable, so
    /// there is no need to sync.
    None = 1,
    /// `ON_WRITE`: Indicates that the variable will be updated across devices
    /// every time it is written.
    OnWrite = 2,
    /// `ON_READ`: Indicates that the variable will be aggregated across devices
    /// when it is read (eg. when checkpointing or when evaluating an op that uses
    /// the variable).
    OnRead = 3,
}
impl VariableSynchronization {
    /// String value of the enum field names used in the ProtoBuf definition.
    ///
    /// The values are not transformed in any way and thus are considered stable
    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
    pub fn as_str_name(&self) -> &'static str {
        match self {
            VariableSynchronization::Auto => "VARIABLE_SYNCHRONIZATION_AUTO",
            VariableSynchronization::None => "VARIABLE_SYNCHRONIZATION_NONE",
            VariableSynchronization::OnWrite => "VARIABLE_SYNCHRONIZATION_ON_WRITE",
            VariableSynchronization::OnRead => "VARIABLE_SYNCHRONIZATION_ON_READ",
        }
    }
}
/// Indicates how a distributed variable will be aggregated.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum VariableAggregation {
    /// `NONE`: This is the default, giving an error if you use a
    /// variable-update operation with multiple replicas.
    None = 0,
    /// `SUM`: Add the updates across replicas.
    Sum = 1,
    /// `MEAN`: Take the arithmetic mean ("average") of the updates across
    /// replicas.
    Mean = 2,
    /// `ONLY_FIRST_REPLICA`: This is for when every replica is performing the same
    /// update, but we only want to perform the update once. Used, e.g., for the
    /// global step counter.
    OnlyFirstReplica = 3,
}
impl VariableAggregation {
    /// String value of the enum field names used in the ProtoBuf definition.
    ///
    /// The values are not transformed in any way and thus are considered stable
    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
    pub fn as_str_name(&self) -> &'static str {
        match self {
            VariableAggregation::None => "VARIABLE_AGGREGATION_NONE",
            VariableAggregation::Sum => "VARIABLE_AGGREGATION_SUM",
            VariableAggregation::Mean => "VARIABLE_AGGREGATION_MEAN",
            VariableAggregation::OnlyFirstReplica => "VARIABLE_AGGREGATION_ONLY_FIRST_REPLICA",
        }
    }
}
// A TensorBundle addition which saves extra information about the objects which
// own variables, allowing for more robust checkpoint loading into modified
// programs.

#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TrackableObjectGraph {
    #[prost(message, repeated, tag="1")]
    pub nodes: ::prost::alloc::vec::Vec<trackable_object_graph::TrackableObject>,
}
/// Nested message and enum types in `TrackableObjectGraph`.
pub mod trackable_object_graph {
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct TrackableObject {
        /// Objects which this object depends on.
        #[prost(message, repeated, tag="1")]
        pub children: ::prost::alloc::vec::Vec<trackable_object::ObjectReference>,
        /// Serialized data specific to this object.
        #[prost(message, repeated, tag="2")]
        pub attributes: ::prost::alloc::vec::Vec<trackable_object::SerializedTensor>,
        /// Slot variables owned by this object.
        #[prost(message, repeated, tag="3")]
        pub slot_variables: ::prost::alloc::vec::Vec<trackable_object::SlotVariableReference>,
    }
    /// Nested message and enum types in `TrackableObject`.
    pub mod trackable_object {
        #[derive(Clone, PartialEq, ::prost::Message)]
        pub struct ObjectReference {
            /// An index into `TrackableObjectGraph.nodes`, indicating the object
            /// being referenced.
            #[prost(int32, tag="1")]
            pub node_id: i32,
            /// A user-provided name for the edge.
            #[prost(string, tag="2")]
            pub local_name: ::prost::alloc::string::String,
        }
        #[derive(Clone, PartialEq, ::prost::Message)]
        pub struct SerializedTensor {
            /// A name for the Tensor. Simple variables have only one
            /// `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may
            /// be restored on object creation as an optimization.
            #[prost(string, tag="1")]
            pub name: ::prost::alloc::string::String,
            /// The full name of the variable/tensor, if applicable. Used to allow
            /// name-based loading of checkpoints which were saved using an
            /// object-based API. Should match the checkpoint key which would have been
            /// assigned by tf.train.Saver.
            #[prost(string, tag="2")]
            pub full_name: ::prost::alloc::string::String,
            /// The generated name of the Tensor in the checkpoint.
            #[prost(string, tag="3")]
            pub checkpoint_key: ::prost::alloc::string::String,
            /// Whether checkpoints should be considered as matching even without this
            /// value restored. Used for non-critical values which don't affect the
            /// TensorFlow graph, such as layer configurations.
            #[prost(bool, tag="4")]
            pub optional_restore: bool,
        }
        #[derive(Clone, PartialEq, ::prost::Message)]
        pub struct SlotVariableReference {
            /// An index into `TrackableObjectGraph.nodes`, indicating the
            /// variable object this slot was created for.
            #[prost(int32, tag="1")]
            pub original_variable_node_id: i32,
            /// The name of the slot (e.g. "m"/"v").
            #[prost(string, tag="2")]
            pub slot_name: ::prost::alloc::string::String,
            /// An index into `TrackableObjectGraph.nodes`, indicating the
            /// `Object` with the value of the slot variable.
            #[prost(int32, tag="3")]
            pub slot_variable_node_id: i32,
        }
    }
}
/// `StructuredValue` represents a dynamically typed value representing various
/// data structures that are inspired by Python data structures typically used in
/// TensorFlow functions as inputs and outputs.
///
/// For example when saving a Layer there may be a `training` argument. If the
/// user passes a boolean True/False, that switches between two concrete
/// TensorFlow functions. In order to switch between them in the same way after
/// loading the SavedModel, we need to represent "True" and "False".
///
/// A more advanced example might be a function which takes a list of
/// dictionaries mapping from strings to Tensors. In order to map from
/// user-specified arguments `[{"a": tf.constant(1.)}, {"q": tf.constant(3.)}]`
/// after load to the right saved TensorFlow function, we need to represent the
/// nested structure and the strings, recording that we have a trace for anything
/// matching `[{"a": tf.TensorSpec(None, tf.float32)}, {"q": tf.TensorSpec([],
/// tf.float64)}]` as an example.
///
/// Likewise functions may return nested structures of Tensors, for example
/// returning a dictionary mapping from strings to Tensors. In order for the
/// loaded function to return the same structure we need to serialize it.
///
/// This is an ergonomic aid for working with loaded SavedModels, not a promise
/// to serialize all possible function signatures. For example we do not expect
/// to pickle generic Python objects, and ideally we'd stay language-agnostic.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct StructuredValue {
    /// The kind of value.
    #[prost(oneof="structured_value::Kind", tags="1, 11, 12, 13, 14, 31, 32, 33, 34, 51, 52, 53, 54")]
    pub kind: ::core::option::Option<structured_value::Kind>,
}
/// Nested message and enum types in `StructuredValue`.
pub mod structured_value {
    /// The kind of value.
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Kind {
        /// Represents None.
        #[prost(message, tag="1")]
        NoneValue(super::NoneValue),
        /// Represents a double-precision floating-point value (a Python `float`).
        #[prost(double, tag="11")]
        Float64Value(f64),
        /// Represents a signed integer value, limited to 64 bits.
        /// Larger values from Python's arbitrary-precision integers are unsupported.
        #[prost(sint64, tag="12")]
        Int64Value(i64),
        /// Represents a string of Unicode characters stored in a Python `str`.
        /// In Python 3, this is exactly what type `str` is.
        /// In Python 2, this is the UTF-8 encoding of the characters.
        /// For strings with ASCII characters only (as often used in TensorFlow code)
        /// there is effectively no difference between the language versions.
        /// The obsolescent `unicode` type of Python 2 is not supported here.
        #[prost(string, tag="13")]
        StringValue(::prost::alloc::string::String),
        /// Represents a boolean value.
        #[prost(bool, tag="14")]
        BoolValue(bool),
        /// Represents a TensorShape.
        #[prost(message, tag="31")]
        TensorShapeValue(super::TensorShapeProto),
        /// Represents an enum value for dtype.
        #[prost(enumeration="super::DataType", tag="32")]
        TensorDtypeValue(i32),
        /// Represents a value for tf.TensorSpec.
        #[prost(message, tag="33")]
        TensorSpecValue(super::TensorSpecProto),
        /// Represents a value for tf.TypeSpec.
        #[prost(message, tag="34")]
        TypeSpecValue(::prost::alloc::boxed::Box<super::TypeSpecProto>),
        /// Represents a list of `Value`.
        #[prost(message, tag="51")]
        ListValue(super::ListValue),
        /// Represents a tuple of `Value`.
        #[prost(message, tag="52")]
        TupleValue(super::TupleValue),
        /// Represents a dict `Value`.
        #[prost(message, tag="53")]
        DictValue(super::DictValue),
        /// Represents Python's namedtuple.
        #[prost(message, tag="54")]
        NamedTupleValue(super::NamedTupleValue),
    }
}
/// Represents None.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NoneValue {
}
/// Represents a Python list.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ListValue {
    #[prost(message, repeated, tag="1")]
    pub values: ::prost::alloc::vec::Vec<StructuredValue>,
}
/// Represents a Python tuple.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TupleValue {
    #[prost(message, repeated, tag="1")]
    pub values: ::prost::alloc::vec::Vec<StructuredValue>,
}
/// Represents a Python dict keyed by `str`.
/// The comment on Unicode from Value.string_value applies analogously.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct DictValue {
    #[prost(map="string, message", tag="1")]
    pub fields: ::std::collections::HashMap<::prost::alloc::string::String, StructuredValue>,
}
/// Represents a (key, value) pair.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct PairValue {
    #[prost(string, tag="1")]
    pub key: ::prost::alloc::string::String,
    #[prost(message, optional, tag="2")]
    pub value: ::core::option::Option<StructuredValue>,
}
/// Represents Python's namedtuple.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct NamedTupleValue {
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    #[prost(message, repeated, tag="2")]
    pub values: ::prost::alloc::vec::Vec<PairValue>,
}
/// A protobuf to tf.TensorSpec.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorSpecProto {
    #[prost(string, tag="1")]
    pub name: ::prost::alloc::string::String,
    #[prost(message, optional, tag="2")]
    pub shape: ::core::option::Option<TensorShapeProto>,
    #[prost(enumeration="DataType", tag="3")]
    pub dtype: i32,
}
/// Represents a tf.TypeSpec
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TypeSpecProto {
    #[prost(enumeration="type_spec_proto::TypeSpecClass", tag="1")]
    pub type_spec_class: i32,
    /// The value returned by TypeSpec._serialize().
    #[prost(message, optional, boxed, tag="2")]
    pub type_state: ::core::option::Option<::prost::alloc::boxed::Box<StructuredValue>>,
    /// This is currently redundant with the type_spec_class enum, and is only
    /// used for error reporting.  In particular, if you use an older binary to
    /// load a newer model, and the model uses a TypeSpecClass that the older
    /// binary doesn't support, then this lets us display a useful error message.
    #[prost(string, tag="3")]
    pub type_spec_class_name: ::prost::alloc::string::String,
}
/// Nested message and enum types in `TypeSpecProto`.
pub mod type_spec_proto {
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum TypeSpecClass {
        Unknown = 0,
        /// tf.SparseTensorSpec
        SparseTensorSpec = 1,
        /// tf.IndexedSlicesSpec
        IndexedSlicesSpec = 2,
        /// tf.RaggedTensorSpec
        RaggedTensorSpec = 3,
        /// tf.TensorArraySpec
        TensorArraySpec = 4,
        /// tf.data.DatasetSpec
        DataDatasetSpec = 5,
        /// IteratorSpec from data/ops/iterator_ops.py
        DataIteratorSpec = 6,
        /// tf.OptionalSpec
        OptionalSpec = 7,
        /// PerReplicaSpec from distribute/values.py
        PerReplicaSpec = 8,
        /// tf.VariableSpec
        VariableSpec = 9,
    }
    impl TypeSpecClass {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                TypeSpecClass::Unknown => "UNKNOWN",
                TypeSpecClass::SparseTensorSpec => "SPARSE_TENSOR_SPEC",
                TypeSpecClass::IndexedSlicesSpec => "INDEXED_SLICES_SPEC",
                TypeSpecClass::RaggedTensorSpec => "RAGGED_TENSOR_SPEC",
                TypeSpecClass::TensorArraySpec => "TENSOR_ARRAY_SPEC",
                TypeSpecClass::DataDatasetSpec => "DATA_DATASET_SPEC",
                TypeSpecClass::DataIteratorSpec => "DATA_ITERATOR_SPEC",
                TypeSpecClass::OptionalSpec => "OPTIONAL_SPEC",
                TypeSpecClass::PerReplicaSpec => "PER_REPLICA_SPEC",
                TypeSpecClass::VariableSpec => "VARIABLE_SPEC",
            }
        }
    }
}
// A SavedObjectGraph is part of object-based SavedModels in TF 2.0. It
// describes the directed graph of Python objects (or equivalent in other
// languages) that make up a model, with nodes\[0\] at the root.

// SavedObjectGraph shares some structure with TrackableObjectGraph, but
// SavedObjectGraph belongs to the MetaGraph and contains pointers to functions
// and type information, while TrackableObjectGraph lives in the checkpoint
// and contains pointers only to variable values.

#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedObjectGraph {
    /// Flattened list of objects in the object graph.
    ///
    /// The position of the object in this list indicates its id.
    /// Nodes\[0\] is considered the root node.
    #[prost(message, repeated, tag="1")]
    pub nodes: ::prost::alloc::vec::Vec<SavedObject>,
    /// Information about captures and output structures in concrete functions.
    /// Referenced from SavedBareConcreteFunction and SavedFunction.
    #[prost(map="string, message", tag="2")]
    pub concrete_functions: ::std::collections::HashMap<::prost::alloc::string::String, SavedConcreteFunction>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedObject {
    /// Objects which this object depends on: named edges in the dependency
    /// graph.
    ///
    /// Note: currently only valid if kind == "user_object".
    #[prost(message, repeated, tag="1")]
    pub children: ::prost::alloc::vec::Vec<trackable_object_graph::trackable_object::ObjectReference>,
    /// Slot variables owned by this object. This describes the three-way
    /// (optimizer, variable, slot variable) relationship; none of the three
    /// depend on the others directly.
    ///
    /// Note: currently only valid if kind == "user_object".
    #[prost(message, repeated, tag="3")]
    pub slot_variables: ::prost::alloc::vec::Vec<trackable_object_graph::trackable_object::SlotVariableReference>,
    #[prost(oneof="saved_object::Kind", tags="4, 5, 6, 7, 8, 9, 10")]
    pub kind: ::core::option::Option<saved_object::Kind>,
}
/// Nested message and enum types in `SavedObject`.
pub mod saved_object {
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Kind {
        #[prost(message, tag="4")]
        UserObject(super::SavedUserObject),
        #[prost(message, tag="5")]
        Asset(super::SavedAsset),
        #[prost(message, tag="6")]
        Function(super::SavedFunction),
        #[prost(message, tag="7")]
        Variable(super::SavedVariable),
        #[prost(message, tag="8")]
        BareConcreteFunction(super::SavedBareConcreteFunction),
        #[prost(message, tag="9")]
        Constant(super::SavedConstant),
        #[prost(message, tag="10")]
        Resource(super::SavedResource),
    }
}
/// A SavedUserObject is an object (in the object-oriented language of the
/// TensorFlow program) of some user- or framework-defined class other than
/// those handled specifically by the other kinds of SavedObjects.
///
/// This object cannot be evaluated as a tensor, and therefore cannot be bound
/// to an input of a function.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedUserObject {
    /// Corresponds to a registration of the type to use in the loading program.
    #[prost(string, tag="1")]
    pub identifier: ::prost::alloc::string::String,
    /// Version information from the producer of this SavedUserObject.
    #[prost(message, optional, tag="2")]
    pub version: ::core::option::Option<VersionDef>,
    /// Initialization-related metadata.
    #[prost(string, tag="3")]
    pub metadata: ::prost::alloc::string::String,
}
/// A SavedAsset points to an asset in the MetaGraph.
///
/// When bound to a function this object evaluates to a tensor with the absolute
/// filename. Users should not depend on a particular part of the filename to
/// remain stable (e.g. basename could be changed).
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedAsset {
    /// Index into `MetaGraphDef.asset_file_def[]` that describes the Asset.
    ///
    /// Only the field `AssetFileDef.filename` is used. Other fields, such as
    /// `AssetFileDef.tensor_info`, MUST be ignored.
    #[prost(int32, tag="1")]
    pub asset_file_def_index: i32,
}
/// A function with multiple signatures, possibly with non-Tensor arguments.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedFunction {
    #[prost(string, repeated, tag="1")]
    pub concrete_functions: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    #[prost(message, optional, tag="2")]
    pub function_spec: ::core::option::Option<FunctionSpec>,
}
/// Stores low-level information about a concrete function. Referenced in either
/// a SavedFunction or a SavedBareConcreteFunction.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedConcreteFunction {
    /// Bound inputs to the function. The SavedObjects identified by the node ids
    /// given here are appended as extra inputs to the caller-supplied inputs.
    /// The only types of SavedObjects valid here are SavedVariable, SavedResource
    /// and SavedAsset.
    #[prost(int32, repeated, tag="2")]
    pub bound_inputs: ::prost::alloc::vec::Vec<i32>,
    /// Input in canonicalized form that was received to create this concrete
    /// function.
    #[prost(message, optional, tag="3")]
    pub canonicalized_input_signature: ::core::option::Option<StructuredValue>,
    /// Output that was the return value of this function after replacing all
    /// Tensors with TensorSpecs. This can be an arbitrary nested function and will
    /// be used to reconstruct the full structure from pure tensors.
    #[prost(message, optional, tag="4")]
    pub output_signature: ::core::option::Option<StructuredValue>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedBareConcreteFunction {
    /// Identifies a SavedConcreteFunction.
    #[prost(string, tag="1")]
    pub concrete_function_name: ::prost::alloc::string::String,
    /// A sequence of unique strings, one per Tensor argument.
    #[prost(string, repeated, tag="2")]
    pub argument_keywords: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    /// The prefix of `argument_keywords` which may be identified by position.
    #[prost(int64, tag="3")]
    pub allowed_positional_arguments: i64,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedConstant {
    /// An Operation name for a ConstantOp in this SavedObjectGraph's MetaGraph.
    #[prost(string, tag="1")]
    pub operation: ::prost::alloc::string::String,
}
/// Represents a Variable that is initialized by loading the contents from the
/// checkpoint.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedVariable {
    #[prost(enumeration="DataType", tag="1")]
    pub dtype: i32,
    #[prost(message, optional, tag="2")]
    pub shape: ::core::option::Option<TensorShapeProto>,
    #[prost(bool, tag="3")]
    pub trainable: bool,
    #[prost(enumeration="VariableSynchronization", tag="4")]
    pub synchronization: i32,
    #[prost(enumeration="VariableAggregation", tag="5")]
    pub aggregation: i32,
    #[prost(string, tag="6")]
    pub name: ::prost::alloc::string::String,
}
/// Represents `FunctionSpec` used in `Function`. This represents a
/// function that has been wrapped as a TensorFlow `Function`.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct FunctionSpec {
    /// Full arg spec from inspect.getfullargspec().
    #[prost(message, optional, tag="1")]
    pub fullargspec: ::core::option::Option<StructuredValue>,
    /// Whether this represents a class method.
    #[prost(bool, tag="2")]
    pub is_method: bool,
    /// The input signature, if specified.
    #[prost(message, optional, tag="5")]
    pub input_signature: ::core::option::Option<StructuredValue>,
}
/// A SavedResource represents a TF object that holds state during its lifetime.
/// An object of this type can have a reference to a:
/// create_resource() and an initialize() function.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedResource {
    /// A device specification indicating a required placement for the resource
    /// creation function, e.g. "CPU". An empty string allows the user to select a
    /// device.
    #[prost(string, tag="1")]
    pub device: ::prost::alloc::string::String,
}
/// Protocol buffer representing the configuration of a Saver.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SaverDef {
    /// The name of the tensor in which to specify the filename when saving or
    /// restoring a model checkpoint.
    #[prost(string, tag="1")]
    pub filename_tensor_name: ::prost::alloc::string::String,
    /// The operation to run when saving a model checkpoint.
    #[prost(string, tag="2")]
    pub save_tensor_name: ::prost::alloc::string::String,
    /// The operation to run when restoring a model checkpoint.
    #[prost(string, tag="3")]
    pub restore_op_name: ::prost::alloc::string::String,
    /// Maximum number of checkpoints to keep.  If 0, no checkpoints are deleted.
    #[prost(int32, tag="4")]
    pub max_to_keep: i32,
    /// Shard the save files, one per device that has Variable nodes.
    #[prost(bool, tag="5")]
    pub sharded: bool,
    /// How often to keep an additional checkpoint. If not specified, only the last
    /// "max_to_keep" checkpoints are kept; if specified, in addition to keeping
    /// the last "max_to_keep" checkpoints, an additional checkpoint will be kept
    /// for every n hours of training.
    #[prost(float, tag="6")]
    pub keep_checkpoint_every_n_hours: f32,
    #[prost(enumeration="saver_def::CheckpointFormatVersion", tag="7")]
    pub version: i32,
}
/// Nested message and enum types in `SaverDef`.
pub mod saver_def {
    /// A version number that identifies a different on-disk checkpoint format.
    /// Usually, each subclass of BaseSaverBuilder works with a particular
    /// version/format.  However, it is possible that the same builder may be
    /// upgraded to support a newer checkpoint format in the future.
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum CheckpointFormatVersion {
        /// Internal legacy format.
        Legacy = 0,
        /// Deprecated format: tf.Saver() which works with tensorflow::table::Table.
        V1 = 1,
        /// Current format: more efficient.
        V2 = 2,
    }
    impl CheckpointFormatVersion {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                CheckpointFormatVersion::Legacy => "LEGACY",
                CheckpointFormatVersion::V1 => "V1",
                CheckpointFormatVersion::V2 => "V2",
            }
        }
    }
}
/// NOTE: This protocol buffer is evolving, and will go through revisions in the
/// coming months.
///
/// Protocol buffer containing the following which are necessary to restart
/// training, run inference. It can be used to serialize/de-serialize memory
/// objects necessary for running computation in a graph when crossing the
/// process boundary. It can be used for long term storage of graphs,
/// cross-language execution of graphs, etc.
///    MetaInfoDef
///    GraphDef
///    SaverDef
///    CollectionDef
///    TensorInfo
///    SignatureDef
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct MetaGraphDef {
    #[prost(message, optional, tag="1")]
    pub meta_info_def: ::core::option::Option<meta_graph_def::MetaInfoDef>,
    /// GraphDef.
    #[prost(message, optional, tag="2")]
    pub graph_def: ::core::option::Option<GraphDef>,
    /// SaverDef.
    #[prost(message, optional, tag="3")]
    pub saver_def: ::core::option::Option<SaverDef>,
    /// collection_def: Map from collection name to collections.
    /// See CollectionDef section for details.
    #[prost(map="string, message", tag="4")]
    pub collection_def: ::std::collections::HashMap<::prost::alloc::string::String, CollectionDef>,
    /// signature_def: Map from user supplied key for a signature to a single
    /// SignatureDef.
    #[prost(map="string, message", tag="5")]
    pub signature_def: ::std::collections::HashMap<::prost::alloc::string::String, SignatureDef>,
    /// Asset file def to be used with the defined graph.
    #[prost(message, repeated, tag="6")]
    pub asset_file_def: ::prost::alloc::vec::Vec<AssetFileDef>,
    /// Extra information about the structure of functions and stateful objects.
    #[prost(message, optional, tag="7")]
    pub object_graph_def: ::core::option::Option<SavedObjectGraph>,
}
/// Nested message and enum types in `MetaGraphDef`.
pub mod meta_graph_def {
    /// Meta information regarding the graph to be exported.  To be used by users
    /// of this protocol buffer to encode information regarding their meta graph.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct MetaInfoDef {
        /// User specified Version string. Can be the name of the model and revision,
        /// steps this model has been trained to, etc.
        #[prost(string, tag="1")]
        pub meta_graph_version: ::prost::alloc::string::String,
        /// A copy of the OpDefs used by the producer of this graph_def.
        /// Descriptions and Ops not used in graph_def are stripped out.
        #[prost(message, optional, tag="2")]
        pub stripped_op_list: ::core::option::Option<super::OpList>,
        /// A serialized protobuf. Can be the time this meta graph is created, or
        /// modified, or name of the model.
        #[prost(message, optional, tag="3")]
        pub any_info: ::core::option::Option<::prost_types::Any>,
        /// User supplied tag(s) on the meta_graph and included graph_def.
        ///
        /// MetaGraphDefs should be tagged with their capabilities or use-cases.
        /// Examples: "train", "serve", "gpu", "tpu", etc.
        /// These tags enable loaders to access the MetaGraph(s) appropriate for a
        /// specific use-case or runtime environment.
        #[prost(string, repeated, tag="4")]
        pub tags: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
        /// The __version__ string of the tensorflow build used to write this graph.
        /// This will be populated by the framework, which will overwrite any user
        /// supplied value.
        #[prost(string, tag="5")]
        pub tensorflow_version: ::prost::alloc::string::String,
        /// The __git_version__ string of the tensorflow build used to write this
        /// graph. This will be populated by the framework, which will overwrite any
        /// user supplied value.
        #[prost(string, tag="6")]
        pub tensorflow_git_version: ::prost::alloc::string::String,
        /// A flag to denote whether default-valued attrs have been stripped from
        /// the nodes in this graph_def.
        #[prost(bool, tag="7")]
        pub stripped_default_attrs: bool,
    }
}
/// CollectionDef should cover most collections.
/// To add a user-defined collection, do one of the following:
/// 1. For simple data types, such as string, int, float:
///       tf.add_to_collection("your_collection_name", your_simple_value)
///     strings will be stored as bytes_list.
///
/// 2. For Protobuf types, there are three ways to add them:
///     1) tf.add_to_collection("your_collection_name",
///          your_proto.SerializeToString())
///
///        collection_def {
///          key: "user_defined_bytes_collection"
///          value {
///            bytes_list {
///              value: "queue_name: \"test_queue\"\n"
///            }
///          }
///        }
///
///   or
///
///     2) tf.add_to_collection("your_collection_name", str(your_proto))
///
///        collection_def {
///          key: "user_defined_string_collection"
///          value {
///           bytes_list {
///              value: "\n\ntest_queue"
///            }
///          }
///        }
///
///   or
///
///     3) any_buf = any_pb2.Any()
///        tf.add_to_collection("your_collection_name",
///          any_buf.Pack(your_proto))
///
///        collection_def {
///          key: "user_defined_any_collection"
///          value {
///            any_list {
///              value {
///                type_url: "type.googleapis.com/tensorflow.QueueRunnerDef"
///                value: "\n\ntest_queue"
///              }
///            }
///          }
///        }
///
/// 3. For Python objects, implement to_proto() and from_proto(), and register
///     them in the following manner:
///     ops.register_proto_function("your_collection_name",
///                                 proto_type,
///                                 to_proto=YourPythonObject.to_proto,
///                                 from_proto=YourPythonObject.from_proto)
///     These functions will be invoked to serialize and de-serialize the
///     collection. For example,
///     ops.register_proto_function(ops.GraphKeys.GLOBAL_VARIABLES,
///                                 proto_type=variable_pb2.VariableDef,
///                                 to_proto=Variable.to_proto,
///                                 from_proto=Variable.from_proto)
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct CollectionDef {
    #[prost(oneof="collection_def::Kind", tags="1, 2, 3, 4, 5")]
    pub kind: ::core::option::Option<collection_def::Kind>,
}
/// Nested message and enum types in `CollectionDef`.
pub mod collection_def {
    /// NodeList is used for collecting nodes in graph. For example
    /// collection_def {
    ///    key: "summaries"
    ///    value {
    ///      node_list {
    ///        value: "input_producer/ScalarSummary:0"
    ///        value: "shuffle_batch/ScalarSummary:0"
    ///        value: "ImageSummary:0"
    ///      }
    ///    }
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct NodeList {
        #[prost(string, repeated, tag="1")]
        pub value: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
    }
    /// BytesList is used for collecting strings and serialized protobufs. For
    /// example:
    /// collection_def {
    ///    key: "trainable_variables"
    ///    value {
    ///      bytes_list {
    ///        value: "\n\017conv1/weights:0\022\024conv1/weights/Assign
    ///               \032\024conv1/weights/read:0"
    ///        value: "\n\016conv1/biases:0\022\023conv1/biases/Assign\032
    ///               \023conv1/biases/read:0"
    ///      }
    ///    }
    /// }
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct BytesList {
        #[prost(bytes="vec", repeated, tag="1")]
        pub value: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
    }
    /// Int64List is used for collecting int, int64 and long values.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct Int64List {
        #[prost(int64, repeated, tag="1")]
        pub value: ::prost::alloc::vec::Vec<i64>,
    }
    /// FloatList is used for collecting float values.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct FloatList {
        #[prost(float, repeated, tag="1")]
        pub value: ::prost::alloc::vec::Vec<f32>,
    }
    /// AnyList is used for collecting Any protos.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct AnyList {
        #[prost(message, repeated, tag="1")]
        pub value: ::prost::alloc::vec::Vec<::prost_types::Any>,
    }
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Kind {
        #[prost(message, tag="1")]
        NodeList(NodeList),
        #[prost(message, tag="2")]
        BytesList(BytesList),
        #[prost(message, tag="3")]
        Int64List(Int64List),
        #[prost(message, tag="4")]
        FloatList(FloatList),
        #[prost(message, tag="5")]
        AnyList(AnyList),
    }
}
/// Information about a Tensor necessary for feeding or retrieval.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TensorInfo {
    #[prost(enumeration="DataType", tag="2")]
    pub dtype: i32,
    /// The static shape should be recorded here, to the extent that it can
    /// be known in advance.  In the case of a SparseTensor, this field describes
    /// the logical shape of the represented tensor (aka dense_shape).
    #[prost(message, optional, tag="3")]
    pub tensor_shape: ::core::option::Option<TensorShapeProto>,
    #[prost(oneof="tensor_info::Encoding", tags="1, 4, 5")]
    pub encoding: ::core::option::Option<tensor_info::Encoding>,
}
/// Nested message and enum types in `TensorInfo`.
pub mod tensor_info {
    /// For sparse tensors, The COO encoding stores a triple of values, indices,
    /// and shape.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct CooSparse {
        /// The shape of the values Tensor is \[?\].  Its dtype must be the dtype of
        /// the SparseTensor as a whole, given in the enclosing TensorInfo.
        #[prost(string, tag="1")]
        pub values_tensor_name: ::prost::alloc::string::String,
        /// The indices Tensor must have dtype int64 and shape [?, ?].
        #[prost(string, tag="2")]
        pub indices_tensor_name: ::prost::alloc::string::String,
        /// The dynamic logical shape represented by the SparseTensor is recorded in
        /// the Tensor referenced here.  It must have dtype int64 and shape \[?\].
        #[prost(string, tag="3")]
        pub dense_shape_tensor_name: ::prost::alloc::string::String,
    }
    /// Generic encoding for composite tensors.
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct CompositeTensor {
        /// The serialized TypeSpec for the composite tensor.
        #[prost(message, optional, tag="1")]
        pub type_spec: ::core::option::Option<super::TypeSpecProto>,
        /// A TensorInfo for each flattened component tensor.
        #[prost(message, repeated, tag="2")]
        pub components: ::prost::alloc::vec::Vec<super::TensorInfo>,
    }
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Encoding {
        /// For dense `Tensor`s, the name of the tensor in the graph.
        #[prost(string, tag="1")]
        Name(::prost::alloc::string::String),
        /// There are many possible encodings of sparse matrices
        /// (<https://en.wikipedia.org/wiki/Sparse_matrix>).  Currently, TensorFlow
        /// uses only the COO encoding.  This is supported and documented in the
        /// SparseTensor Python class.
        #[prost(message, tag="4")]
        CooSparse(CooSparse),
        /// Generic encoding for CompositeTensors.
        #[prost(message, tag="5")]
        CompositeTensor(CompositeTensor),
    }
}
/// SignatureDef defines the signature of a computation supported by a TensorFlow
/// graph.
///
/// For example, a model with two loss computations, sharing a single input,
/// might have the following signature_def map.
///
/// Note that across the two SignatureDefs "loss_A" and "loss_B", the input key,
/// output key, and method_name are identical, and will be used by system(s) that
/// implement or rely upon this particular loss method. The output tensor names
/// differ, demonstrating how different outputs can exist for the same method.
///
/// signature_def {
///    key: "loss_A"
///    value {
///      inputs {
///        key: "input"
///        value {
///          name: "input:0"
///          dtype: DT_STRING
///          tensor_shape: ...
///        }
///      }
///      outputs {
///        key: "loss_output"
///        value {
///          name: "loss_output_A:0"
///          dtype: DT_FLOAT
///          tensor_shape: ...
///        }
///      }
///    }
///    ...
///    method_name: "some/package/compute_loss"
/// }
/// signature_def {
///    key: "loss_B"
///    value {
///      inputs {
///        key: "input"
///        value {
///          name: "input:0"
///          dtype: DT_STRING
///          tensor_shape: ...
///        }
///      }
///      outputs {
///        key: "loss_output"
///        value {
///          name: "loss_output_B:0"
///          dtype: DT_FLOAT
///          tensor_shape: ...
///        }
///      }
///    }
///    ...
///    method_name: "some/package/compute_loss"
/// }
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SignatureDef {
    /// Named input parameters.
    #[prost(map="string, message", tag="1")]
    pub inputs: ::std::collections::HashMap<::prost::alloc::string::String, TensorInfo>,
    /// Named output parameters.
    #[prost(map="string, message", tag="2")]
    pub outputs: ::std::collections::HashMap<::prost::alloc::string::String, TensorInfo>,
    /// Extensible method_name information enabling third-party users to mark a
    /// SignatureDef as supporting a particular method. This enables producers and
    /// consumers of SignatureDefs, e.g. a model definition library and a serving
    /// library to have a clear hand-off regarding the semantics of a computation.
    ///
    /// Note that multiple SignatureDefs in a single MetaGraphDef may have the same
    /// method_name. This is commonly used to support multi-headed computation,
    /// where a single graph computation may return multiple results.
    #[prost(string, tag="3")]
    pub method_name: ::prost::alloc::string::String,
}
/// An asset file def for a single file or a set of sharded files with the same
/// name.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct AssetFileDef {
    /// The tensor to bind the asset filename to.
    #[prost(message, optional, tag="1")]
    pub tensor_info: ::core::option::Option<TensorInfo>,
    /// The filename within an assets directory. Note: does not include the path
    /// prefix, i.e. directories. For an asset at /tmp/path/vocab.txt, the filename
    /// would be "vocab.txt".
    #[prost(string, tag="2")]
    pub filename: ::prost::alloc::string::String,
}
/// SavedModel is the high level serialization format for TensorFlow Models.
/// See [todo: doc links, similar to session_bundle] for more information.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SavedModel {
    /// The schema version of the SavedModel instance. Used for versioning when
    /// making future changes to the specification/implementation. Initial value
    /// at release will be 1.
    #[prost(int64, tag="1")]
    pub saved_model_schema_version: i64,
    /// One or more MetaGraphs.
    #[prost(message, repeated, tag="2")]
    pub meta_graphs: ::prost::alloc::vec::Vec<MetaGraphDef>,
}


================================================
FILE: tensorflow/src/tensor.rs
================================================
use tract_hir::internal::*;

use crate::tfpb::tensorflow::tensor_shape_proto::Dim;
use crate::tfpb::tensorflow::{TensorProto, TensorShapeProto};

use crate::tfpb::tensorflow::DataType;
use std::convert::TryFrom;

impl TryFrom<DataType> for DatumType {
    type Error = TractError;
    fn try_from(t: DataType) -> TractResult<DatumType> {
        match t {
            DataType::DtBool => Ok(DatumType::Bool),
            DataType::DtUint8 => Ok(DatumType::U8),
            DataType::DtUint16 => Ok(DatumType::U16),
            DataType::DtUint32 => Ok(DatumType::U32),
            DataType::DtUint64 => Ok(DatumType::U64),
            DataType::DtInt8 => Ok(DatumType::I8),
            DataType::DtInt16 => Ok(DatumType::I16),
            DataType::DtInt32 => Ok(DatumType::I32),
            DataType::DtInt64 => Ok(DatumType::I64),
            DataType::DtHalf => Ok(DatumType::F16),
            DataType::DtFloat => Ok(DatumType::F32),
            DataType::DtDouble => Ok(DatumType::F64),
            DataType::DtString => Ok(DatumType::Blob),
            _ => Err(format_err!("Unknown DatumType {:?}", t))?,
        }
    }
}

impl<'a> TryFrom<&'a TensorShapeProto> for TVec<isize> {
    type Error = TractError;
    fn try_from(t: &'a TensorShapeProto) -> TractResult<TVec<isize>> {
        Ok(t.dim.iter().map(|d| d.size as isize).collect::<TVec<_>>())
    }
}

impl<'a> TryFrom<&'a TensorShapeProto> for TVec<usize> {
    type Error = TractError;
    fn try_from(t: &'a TensorShapeProto) -> TractResult<TVec<usize>> {
        if t.dim.iter().any(|d| d.size < 0) {
            bail!("Negative dim found")
        }
        Ok(t.dim.iter().map(|d| d.size as usize).collect::<TVec<_>>())
    }
}

impl TryFrom<DatumType> for DataType {
    type Error = TractError;
    fn try_from(dt: DatumType) -> TractResult<DataType> {
        match dt {
            DatumType::Bool => Ok(DataType::DtBool),
            DatumType::U8 => Ok(DataType::DtUint8),
            DatumType::U16 => Ok(DataType::DtUint16),
            DatumType::U32 => Ok(DataType::DtUint32),
            DatumType::U64 => Ok(DataType::DtUint64),
            DatumType::I8 => Ok(DataType::DtInt8),
            DatumType::I16 => Ok(DataType::DtInt16),
            DatumType::I32 => Ok(DataType::DtInt32),
            DatumType::I64 => Ok(DataType::DtInt64),
            DatumType::F16 => Ok(DataType::DtHalf),
            DatumType::F32 => Ok(DataType::DtFloat),
            DatumType::F64 => Ok(DataType::DtDouble),
            DatumType::Blob => Ok(DataType::DtString),
            DatumType::String => Ok(DataType::DtString),
            DatumType::QI8(_) => Ok(DataType::DtQint8),
            DatumType::QU8(_) => Ok(DataType::DtQint8),
            DatumType::QI32(_) => Ok(DataType::DtQint32),
            _ => bail!("DatumType is not translatable in protobuf"),
        }
    }
}

fn tensor_from_repeated_field<T: Datum>(shape: &[usize], data: Vec<T>) -> TractResult<Tensor> {
    let t = if data.len() == 1 {
        tract_ndarray::ArrayD::from_elem(shape, data[0].clone()).into()
    } else {
        tract_ndarray::ArrayD::from_shape_vec(shape, data.to_vec())?.into()
    };
    Ok(t)
}

impl TryFrom<&TensorProto> for Tensor {
    type Error = TractError;
    fn try_from(t: &TensorProto) -> TractResult<Tensor> {
        let dims: TVec<usize> =
            t.tensor_shape.as_ref().unwrap().dim.iter().map(|x| x.size as _).collect();
        let rank = dims.len();
        let content = &t.tensor_content;
        let dtype = DataType::try_from(t.dtype).unwrap();
        let mat: Tensor = if content.len() != 0 {
            unsafe {
                match dtype {
                    DataType::DtFloat => Self::from_raw::<f32>(&dims, content)?,
                    DataType::DtDouble => Self::from_raw::<f64>(&dims, content)?,
                    DataType::DtInt32 => Self::from_raw::<i32>(&dims, content)?,
                    DataType::DtInt64 => Self::from_raw::<i64>(&dims, content)?,
                    _ => unimplemented!("missing type (for get_tensor_content) {:?}", dtype),
                }
            }
        } else {
            match dtype {
                DataType::DtInt32 => tensor_from_repeated_field(&dims, t.int_val.to_vec())?,
                DataType::DtInt64 => tensor_from_repeated_field(&dims, t.int64_val.to_vec())?,
                DataType::DtFloat => tensor_from_repeated_field(&dims, t.float_val.to_vec())?,
                DataType::DtDouble => tensor_from_repeated_field(&dims, t.double_val.to_vec())?,
                DataType::DtString => {
                    let strings = t
                        .string_val
                        .iter()
                        .map(|s| Blob::try_from(&**s))
                        .collect::<TractResult<Vec<Blob>>>()?;
                    tensor_from_repeated_field(&dims, strings)?
                }
                _ => unimplemented!("missing type (for _val()) {:?}", t.dtype),
            }
        };
        assert_eq!(rank, mat.shape().len());
        Ok(mat)
    }
}

fn empty_tensor_proto() -> TensorProto {
    TensorProto {
        dtype: 0,
        tensor_shape: None,
        version_number: 0,
        tensor_content: vec![],
        half_val: vec![],
        float_val: vec![],
        double_val: vec![],
        int_val: vec![],
        string_val: vec![],
        scomplex_val: vec![],
        dcomplex_val: vec![],
        resource_handle_val: vec![],
        variant_val: vec![],
        uint32_val: vec![],
        uint64_val: vec![],
        int64_val: vec![],
        bool_val: vec![],
    }
}

impl TryFrom<&Tensor> for TensorProto {
    type Error = TractError;
    fn try_from(from: &Tensor) -> TractResult<TensorProto> {
        let mut tensor = empty_tensor_proto();
        let shape = TensorShapeProto {
            dim: from.shape().iter().map(|d| Dim { size: *d as _, name: String::new() }).collect(),
            unknown_rank: false,
        };
        tensor.tensor_shape = Some(shape);
        let dt = DataType::try_from(from.datum_type())?;
        tensor.dtype = dt.into();
        match from.datum_type() {
            DatumType::F32 => {
                tensor.float_val = from.to_plain_array_view::<f32>()?.iter().cloned().collect();
            }
            DatumType::F64 => {
                tensor.double_val = from.to_plain_array_view::<f64>()?.iter().cloned().collect();
            }
            DatumType::I32 => {
                tensor.int_val = from.to_plain_array_view::<i32>()?.iter().cloned().collect();
            }
            DatumType::I64 => {
                tensor.int64_val = from.to_plain_array_view::<i64>()?.iter().cloned().collect();
            }
            _ => unimplemented!("missing type {:?}", from.datum_type()),
        }
        Ok(tensor)
    }
}


================================================
FILE: tensorflow/src/tfpb.rs
================================================
use tract_hir::internal::*;

use std::fs;

#[allow(clippy::all)]
mod google {
    mod protobuf {
        include!("prost/google.protobuf.rs");
    }
}

#[allow(clippy::all)]
pub mod tensorflow {
    include!("prost/tensorflow.rs");
}

use self::tensorflow::attr_value::ListValue;
use self::tensorflow::attr_value::Value;
use self::tensorflow::{AttrValue, DataType, GraphDef, NodeDef, TensorProto, TensorShapeProto};

use std::convert::TryInto;

pub fn graph() -> GraphDef {
    GraphDef::default()
}

pub fn node() -> NodeDef {
    NodeDef {
        name: String::new(),
        op: String::new(),
        input: vec![],
        device: String::new(),
        attr: HashMap::new(),
    }
}

impl GraphDef {
    pub fn node(mut self, n: NodeDef) -> Self {
        self.node.push(n);
        self
    }
    pub fn write_to_bytes(&self) -> TractResult<Vec<u8>> {
        use prost::Message;
        let mut buf = vec![];
        self.encode(&mut buf)?;
        Ok(buf)
    }
    pub fn save_to<P: AsRef<::std::path::Path>>(self, p: P) -> TractResult<()> {
        let buf = self.write_to_bytes()?;
        fs::write(p, buf)?;
        Ok(())
    }
}

impl NodeDef {
    pub fn name<S: ToString>(mut self, n: S) -> NodeDef {
        self.name = n.to_string();
        self
    }
    pub fn op<S: ToString>(mut self, n: S) -> NodeDef {
        self.op = n.to_string();
        self
    }
    pub fn input<S: ToString>(mut self, n: S) -> NodeDef {
        self.input.push(n.to_string());
        self
    }
    pub fn attr<S: ToString, V: Into<AttrValue>>(mut self, n: S, v: V) -> NodeDef {
        self.attr.insert(n.to_string(), v.into());
        self
    }
}

impl NodeDef {
    pub fn get_attr_raw_str(&self, name: &str) -> TractResult<&[u8]> {
        self.get_attr_opt_raw_str(name)?.with_context(|| {
            format!("Node {} ({}) expected string attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_raw_str(&self, name: &str) -> TractResult<Option<&[u8]>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::S(bytes) = a.value.as_ref().unwrap() {
                return Ok(Some(bytes));
            }
        };
        Ok(None)
    }

    pub fn get_attr_str(&self, name: &str) -> TractResult<String> {
        self.get_attr_opt_str(name)?.with_context(|| {
            format!("Node {} ({}) expected UTF-8 string attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_str(&self, name: &str) -> TractResult<Option<String>> {
        if let Some(s) = self.get_attr_opt_raw_str(name)? {
            Ok(Some(String::from_utf8(s.to_vec()).map_err(|_| {
                format_err!(
                    "Node {} ({}) expected an UTF-8 string for attribute '{}'",
                    self.name,
                    self.op,
                    name
                )
            })?))
        } else {
            Ok(None)
        }
    }

    pub fn get_attr_bool(&self, name: &str) -> TractResult<bool> {
        self.get_attr_opt_bool(name)?.with_context(|| {
            format!("Node {} ({}) expected bool attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_bool(&self, name: &str) -> TractResult<Option<bool>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::B(v) = a.value.as_ref().unwrap() {
                return Ok(Some(*v));
            }
        };
        Ok(None)
    }

    pub fn get_attr_datum_type(&self, name: &str) -> TractResult<DatumType> {
        self.get_attr_opt_datum_type(name)?.with_context(|| {
            format!("Node {} ({}) expected datum_type attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_datum_type(&self, name: &str) -> TractResult<Option<DatumType>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::Type(v) = a.value.as_ref().unwrap() {
                return Ok(Some(DataType::try_from(*v).unwrap().try_into()?));
            }
        };
        Ok(None)
    }

    pub fn get_attr_shape(&self, name: &str) -> TractResult<TVec<isize>> {
        self.get_attr_opt_shape(name)?.with_context(|| {
            format!("Node {} ({}) expected shape attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_shape(&self, name: &str) -> TractResult<Option<TVec<isize>>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::Shape(shape) = a.value.as_ref().unwrap() {
                return Ok(Some(shape.try_into()?));
            }
        };
        Ok(None)
    }

    pub fn get_attr_tensor(&self, name: &str) -> TractResult<Tensor> {
        self.get_attr_opt_tensor(name)?.with_context(|| {
            format!("Node {} ({}) expected tensor attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_tensor(&self, name: &str) -> TractResult<Option<Tensor>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::Tensor(t) = a.value.as_ref().unwrap() {
                return Ok(Some(t.try_into()?));
            }
        };
        Ok(None)
    }

    pub fn get_attr_int<T: tract_num_traits::FromPrimitive>(&self, name: &str) -> TractResult<T> {
        self.get_attr_opt_int(name)?.with_context(|| {
            format!("Node {} ({}) expected int attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_int<T: tract_num_traits::FromPrimitive>(
        &self,
        name: &str,
    ) -> TractResult<Option<T>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::I(i) = a.value.as_ref().unwrap() {
                return Ok(Some(T::from_i64(*i).unwrap()));
            }
        };
        Ok(None)
    }

    pub fn get_attr_float<T: tract_num_traits::FromPrimitive>(&self, name: &str) -> TractResult<T> {
        self.get_attr_opt_float(name)?.with_context(|| {
            format!("Node {} ({}) expected int attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_float<T: tract_num_traits::FromPrimitive>(
        &self,
        name: &str,
    ) -> TractResult<Option<T>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::F(i) = a.value.as_ref().unwrap() {
                return Ok(Some(T::from_f32(*i).unwrap()));
            }
        };
        Ok(None)
    }

    pub fn get_attr_list_int<T: tract_num_traits::FromPrimitive>(
        &self,
        name: &str,
    ) -> TractResult<Vec<T>> {
        self.get_attr_opt_list_int(name)?.with_context(|| {
            format!("Node {} ({}) expected list<int> attribute '{}'", self.name, self.op, name)
        })
    }

    pub fn get_attr_opt_list_int<T: tract_num_traits::FromPrimitive>(
        &self,
        name: &str,
    ) -> TractResult<Option<Vec<T>>> {
        if let Some(a) = self.attr.get(name) {
            if let Value::List(list) = a.value.as_ref().unwrap() {
                return Ok(Some(list.i.iter().map(|&i| T::from_i64(i).unwrap()).collect()));
            }
        };
        Ok(None)
    }
}

impl From<DataType> for AttrValue {
    fn from(t: DataType) -> AttrValue {
        AttrValue { value: Some(Value::Type(t.into())) }
    }
}

impl<'a> From<&'a str> for AttrValue {
    fn from(t: &'a str) -> AttrValue {
        AttrValue { value: Some(Value::S(t.as_bytes().to_vec())) }
    }
}

impl From<i32> for AttrValue {
    fn from(t: i32) -> AttrValue {
        AttrValue::from(t as i64)
    }
}

impl From<i64> for AttrValue {
    fn from(t: i64) -> AttrValue {
        AttrValue { value: Some(Value::I(t)) }
    }
}

impl From<f32> for AttrValue {
    fn from(t: f32) -> AttrValue {
        AttrValue { value: Some(Value::F(t)) }
    }
}

impl From<Vec<i64>> for AttrValue {
    fn from(t: Vec<i64>) -> AttrValue {
        AttrValue {
            value: Some(Value::List(ListValue {
                s: vec![],
                i: t,
                f: vec![],
                b: vec![],
                r#type: vec![],
                shape: vec![],
                tensor: vec![],
                func: vec![],
            })),
        }
    }
}

impl From<TensorProto> for AttrValue {
    fn from(t: TensorProto) -> AttrValue {
        AttrValue { value: Some(Value::Tensor(t)) }
    }
}

impl From<TensorShapeProto> for AttrValue {
    fn from(t: TensorShapeProto) -> AttrValue {
        AttrValue { value: Some(Value::Shape(t)) }
    }
}

impl From<bool> for AttrValue {
    fn from(t: bool) -> AttrValue {
        AttrValue { value: Some(Value::B(t)) }
    }
}


================================================
FILE: tensorflow/tests/ops_array_pack.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtInt32;

fn strat() -> BoxedStrategy<(usize, Vec<Tensor>)> {
    // input rank
    (1usize..8)
        // rank, dimensions, number of inputs
        .prop_flat_map(|r| (0usize..r, vec(1usize..5, r..r + 1), 1..5))
        .prop_map(|(ax, dims, n)| {
            let size = dims.iter().map(|a| *a).product::<usize>();
            let mats: Vec<Tensor> = (0..n)
                .map(|ix| {
                    Tensor::from(
                        tract_ndarray::Array::from_shape_vec(dims.clone(), ((ix * 1000)..).take(size).collect())
                            .unwrap(),
                    )
                })
                .collect();
            (ax, mats)
        })
        .boxed()
}

proptest! {
    #[test]
    fn pack((axis, ref inputs) in strat()) {
        let mut graph = tfpb::graph();
        let mut graph_inputs = vec!();
        let mut pack = tfpb::node()
            .name("op")
            .op("Pack")
            .attr("T", DtInt32)
            .attr("N", inputs.len() as i64)
            .attr("axis", axis as i64);
        for (ix,input) in inputs.iter().enumerate() {
            let input_name = format!("input-{}", ix);
            graph = graph.node(placeholder_i32(&input_name));
            pack = pack.input(&input_name);
            graph_inputs.push((input_name, input.clone()));
        }
        graph = graph.node(pack);
        let graph = graph.write_to_bytes().unwrap();
        compare(&graph, graph_inputs, "op")?
    }
}


================================================
FILE: tensorflow/tests/ops_array_strided_slice.proptest-regressions
================================================
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
xs 107361357 1164668208 2912097219 625237657 # shrinks to (ref i, ref b, ref e, ref s, ref masks) = (I32([0] shape=[1], strides=[1], layout=C | F (0x3)), I32([0] shape=[1], strides=[1], layout=C | F (0x3)), I32([0] shape=[1], strides=[1], layout=C | F (0x3)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), (0, 0, 0, 0, 0))
xs 1302349240 1330784477 4100086807 922471477 # shrinks to (ref i, ref b, ref e, ref s, ref masks) = (I32([[[0, 1, 2]]] shape=[1, 1, 3], strides=[3, 3, 1], layout=C (0x1)), I32([-1, -1, -1] shape=[3], strides=[1], layout=C | F (0x3)), I32([0, 0, -2] shape=[3], strides=[1], layout=C | F (0x3)), I32([1, 1, -1] shape=[3], strides=[1], layout=C | F (0x3)), (0, 3, 0, 0, 1))
cc 3a723fc42d67eb51993f93b6a4b13e1f21aa974145049fc49a19047d0b868fb6 # shrinks to (ref i, ref b, ref e, ref s, ref masks) = (2xI32 0, 1, 1xI32 1, 1xI32 0, 1xI32 -1, (0, 0, 0, 0, 0))
cc e5ad056ed488ae866246b3eb77347b7687d1b7cbc86554bf5917ef138f89e401 # shrinks to (ref i, ref b, ref e, ref s, ref masks) = (1x2xI32 0, 1, 2xI32 0, 0, 2xI32 0, 0, 2xI32 1, 1, (0, 2, 0, 0, 0))
cc 5a9e569b2790687d9aabe4cbf53d9701a80a229664634013bf5f8f8bd56ea411 # shrinks to (ref i, ref b, ref e, ref s, ref masks) = (2xI32 0, 1, 1xI32 1, 1xI32 1, 1xI32 1, (1, 0, 0, 0, 1))


================================================
FILE: tensorflow/tests/ops_array_strided_slice.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtInt32;

fn strided_slice_strat()
-> BoxedStrategy<(Tensor, Tensor, Tensor, Tensor, (i32, i32, i32, i32, i32))> {
    ::proptest::collection::vec(
        (1..5).prop_flat_map(|n| {
            // each dim max
            (
                Just(n),       // input size
                0..n,          // begin
                0..n,          // end
                1..2.max(n),   // stride, abs
                any::<bool>(), // make begin negative
                any::<bool>(), // make end negative
            )
        }),
        1..=2, // rank
    )
    .prop_map(|mut tuples| {
        tuples.iter_mut().for_each(|tuple| {
            if tuple.1 > tuple.2 {
                std::mem::swap(&mut tuple.1, &mut tuple.2)
            }
        });
        tuples
    })
    .prop_flat_map(|dims| {
        let rank = dims.iter().len();
        (Just(dims), (0..(1 << rank), 0..(1 << rank), Just(0), Just(0), 0..(1 << rank)))
    })
    .prop_map(|(dims, masks)| {
        let shape = dims.iter().map(|d| d.0 as usize).collect::<Vec<_>>();
        let size: i32 = shape.iter().map(|d| *d as i32).product();
        (
            Tensor::from(tract_ndarray::Array::from_shape_vec(shape, (0..size).collect()).unwrap()),
            tract_ndarray::Array::from(
                dims.iter().map(|d| if d.4 { d.1 - d.0 } else { d.1 }).collect::<Vec<_>>(),
            )
            .into(),
            tract_ndarray::Array::from(
                dims.iter().map(|d| if d.5 { d.2 - d.0 } else { d.2 }).collect::<Vec<_>>(),
            )
            .into(),
            tract_ndarray::Array::from(
                dims.iter()
                    .enumerate()
                    .map(|(ix, d)| {
                        if d.2 == d.1 || masks.4 & (1 << ix) != 0 {
                            1
                        } else {
                            d.3 as i32 * (d.2 as i32 - d.1 as i32).signum()
                        }
                    })
                    .collect::<Vec<_>>(),
            )
            .into(),
            masks,
        )
    })
    .boxed()
}

proptest! {
    #[test]
    fn strided_slice((ref i, ref b, ref e, ref s, ref masks) in strided_slice_strat()) {
        let graph = tfpb::graph()
            .node(placeholder_i32("input"))
            .node(const_i32("begin", b))
            .node(const_i32("end", e))
            .node(const_i32("stride", s))
            .node(tfpb::node().name("op")
                  .attr("T", DtInt32)
                  .attr("Index", DtInt32)
                  .attr("begin_mask", masks.0 as i64)
                  .attr("end_mask", masks.1 as i64)
                  .attr("shrink_axis_mask", masks.4 as i64)
                  .input("input").input("begin")
                  .input("end").input("stride")
                  .op("StridedSlice")
                 ).write_to_bytes().unwrap();

        let inputs = vec!(("input", i.clone()));
        let res = compare(&graph, inputs, "op")?;
        res
    }
}

#[test]
fn strided_slice_1() {
    let graph = tfpb::graph()
        .node(placeholder_i32("input"))
        .node(const_i32("begin", &tensor1(&[0])))
        .node(const_i32("end", &tensor1(&[2])))
        .node(const_i32("stride", &tensor1(&[1])))
        .node(
            tfpb::node()
                .name("op")
                .attr("T", DtInt32)
                .attr("Index", DtInt32)
                .input("input")
                .input("begin")
                .input("end")
                .input("stride")
                .op("StridedSlice"),
        )
        .write_to_bytes()
        .unwrap();

    let inputs = vec![("input", tensor2(&[[0, 6], [0, 0]]))];
    compare(&graph, inputs, "op").unwrap()
}

#[test]
fn strided_slice_2() {
    let graph = tfpb::graph()
        .node(placeholder_i32("input"))
        .node(const_i32("begin", &tensor1(&[0])))
        .node(const_i32("end", &tensor1(&[0])))
        .node(const_i32("stride", &tensor1(&[1])))
        .node(
            tfpb::node()
                .name("op")
                .attr("T", DtInt32)
                .attr("Index", DtInt32)
                .attr("shrink_axis_mask", 1 as i64)
                .input("input")
                .input("begin")
                .input("end")
                .input("stride")
                .op("StridedSlice"),
        )
        .write_to_bytes()
        .unwrap();

    let inputs = vec![("input", tensor1(&[0]))];
    compare(&graph, inputs, "op").unwrap()
}

#[test]
fn strided_slice_3() {
    let graph = tfpb::graph()
        .node(placeholder_i32("input"))
        .node(const_i32("begin", &tensor1(&[0])))
        .node(const_i32("end", &tensor1(&[0])))
        .node(const_i32("stride", &tensor1(&[1])))
        .node(
            tfpb::node()
                .name("op")
                .attr("T", DtInt32)
                .attr("Index", DtInt32)
                .attr("shrink_axis_mask", 1 as i64)
                .input("input")
                .input("begin")
                .input("end")
                .input("stride")
                .op("StridedSlice"),
        );
    let graph = graph.write_to_bytes().unwrap();
    let inputs = vec![("input", tensor1(&[0, 1]))];
    compare(&graph, inputs, "op").unwrap()
}

#[ignore] // negative stride
#[test]
fn strided_slice_4() {
    let graph = tfpb::graph()
        .node(placeholder_i32("input"))
        .node(const_i32("begin", &tensor1(&[1])))
        .node(const_i32("end", &tensor1(&[0])))
        .node(const_i32("stride", &tensor1(&[-1])))
        .node(
            tfpb::node()
                .name("op")
                .attr("T", DtInt32)
                .attr("Index", DtInt32)
                .input("input")
                .input("begin")
                .input("end")
                .input("stride")
                .op("StridedSlice"),
        );
    let graph = graph.write_to_bytes().unwrap();
    let inputs = vec![("input", tensor1(&[0, 1]))];
    compare(&graph, inputs, "op").unwrap()
}

#[test]
fn strided_slice_5() {
    let graph = tfpb::graph()
        .node(placeholder_i32("input"))
        .node(const_i32("begin", &tensor1(&[0, 0])))
        .node(const_i32("end", &tensor1(&[0, 0])))
        .node(const_i32("stride", &tensor1(&[1, 1])))
        .node(
            tfpb::node()
                .name("op")
                .attr("T", DtInt32)
                .attr("Index", DtInt32)
                .attr("end_mask", 2 as i64)
                .input("input")
                .input("begin")
                .input("end")
                .input("stride")
                .op("StridedSlice"),
        );
    let graph = graph.write_to_bytes().unwrap();
    let inputs = vec![("input", tensor2(&[[0, 1]]))];
    compare(&graph, inputs, "op").unwrap()
}

#[test]
fn strided_slice_shrink_override_begin_mask() {
    let graph = tfpb::graph()
        .node(placeholder_i32("input"))
        .node(const_i32("begin", &tensor1(&[1])))
        .node(const_i32("end", &tensor1(&[1])))
        .node(const_i32("stride", &tensor1(&[1])))
        .node(
            tfpb::node()
                .name("op")
                .attr("T", DtInt32)
                .attr("Index", DtInt32)
                .attr("begin_mask", 1 as i64)
                .attr("shrink_axis_mask", 1 as i64)
                .input("input")
                .input("begin")
                .input("end")
                .input("stride")
                .op("StridedSlice"),
        );
    let graph = graph.write_to_bytes().unwrap();
    let inputs = vec![("input", tensor1(&[0, 1]))];
    compare(&graph, inputs, "op").unwrap()
}


================================================
FILE: tensorflow/tests/ops_fake_quant_with_min_max_vars.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType;

fn fake_quant_with_min_max_vars(
    input: f32,
    min: f32,
    max: f32,
    narrow_range: bool,
    num_bits: i64,
) -> proptest::test_runner::TestCaseResult {
    let graph = tfpb::graph()
        .node(const_f32("input", &tensor0(input)))
        .node(const_f32("min", &tensor0(min)))
        .node(const_f32("max", &tensor0(max)))
        .node(
            tfpb::node()
                .name("op")
                .op("FakeQuantWithMinMaxVars")
                .input("input")
                .input("min")
                .input("max")
                .attr("narrow_range", narrow_range)
                .attr("num_bits", num_bits),
        );
    let graph = graph.write_to_bytes().unwrap();
    compare::<&'static str>(&graph, vec![], "op")
}

//may fails due to rounding errors
proptest! {
    #[test]
    #[ignore]
    fn ops_fake_quant_with_min_max_vars(input in -10f32..10f32, min in -10f32..-0.1f32, max in 0.1f32..10f32, narrow_range: bool, num_bits in 2..8i64) {

        fake_quant_with_min_max_vars(input, min, max, narrow_range, num_bits)?
    }
}

#[test]
fn trivial0() -> std::result::Result<(), TestCaseError> {
    fake_quant_with_min_max_vars(0.0, -1., 1., false, 2)
}

//fails due to rounding errors (recip != div)
#[test]
#[ignore]
fn trivial1() -> std::result::Result<(), TestCaseError> {
    fake_quant_with_min_max_vars(-0.059596814, -0.9537212, 0.8271718, true, 8)
}


================================================
FILE: tensorflow/tests/ops_nn_conv2d.proptest-regressions
================================================
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
xs 148868935 2656186008 3130107924 971183391 # shrinks to (ref i, ref k, ref strides) = (F32([[[[0.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1)), F32([[[[0.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1)), (1, 1)), valid = false
cc 26c8e393370a681ca5715cc50972bfedf5e306e8fb14d0de9ad9e4127b7bdb23 # shrinks to (ref i, ref k, ref strides) = (1x1x3x1xF32 1, 1, 0, 1x2x1x2xF32 0, 0, 0, -1, (1, 1)), valid = true
cc b124ee0315131f2371cff1a3c07f454b37277dbf1fb2127d9d28d8bf01d01a45 # shrinks to (ref i, ref k, ref strides) = (1x2x1x2xF32 0, 0, 0, -1, 2x1x2x2xF32 0, 0, 0, 0, 0, 0, 0, -1, (1, 1)), valid = false
cc 693fb4b86aff1b5ffd69b2f155949e014989d038ed3b86716d0ead375132586d # shrinks to (ref i, ref k, ref strides) = (1x2x2x1xF32 0, 0, 0, 1, 1x2x1x1xF32 0, 1, (1, 1)), valid = false
cc 09991fe7a0d4a11a9b1c9e680a4ff7482b30ee49bab291c7f33532a8e9b442a1 # shrinks to (ref i, ref k, ref strides) = (1x2x2x1xF32 0, 0, 0, -1, 1x1x1x1xF32 1, (1, 1)), valid = false


================================================
FILE: tensorflow/tests/ops_nn_conv2d.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtFloat;

fn convolution_pb(
    v_stride: usize,
    h_stride: usize,
    valid: bool,
    kernel: &Tensor,
) -> TractResult<Vec<u8>> {
    let conv = tfpb::node()
        .name("conv")
        .op("Conv2D")
        .input("data")
        .input("kernel")
        .attr("strides", vec![1, v_stride as i64, h_stride as i64, 1])
        .attr("padding", if valid { "VALID" } else { "SAME" })
        .attr("T", DtFloat);

    let graph =
        tfpb::graph().node(placeholder_f32("data")).node(const_f32("kernel", kernel)).node(conv);

    Ok(graph.write_to_bytes()?)
}

fn img_and_ker() -> BoxedStrategy<(Tensor, Tensor, (usize, usize))> {
    (1usize..4, 1usize..3, 1usize..3, 1usize..4)
        .prop_flat_map(|(ic, kh, kw, kc)| (1usize..2, kh..10, kw..10, Just((ic, kh, kw, kc))))
        .prop_flat_map(|(ib, ih, iw, (ic, kh, kw, kc))| {
            let i_size = ib * iw * ih * ic;
            let k_size = kw * kh * kc * ic;
            (
                Just((ib, ih, iw, ic)),
                Just((kh, kw, ic, kc)),
                ::proptest::collection::vec(-9i32..9, i_size..i_size + 1),
                ::proptest::collection::vec(-9i32..9, k_size..k_size + 1),
                (1..(kh + 1), 1..(kw + 1)),
            )
        })
        .prop_map(|(img_shape, ker_shape, img, ker, strides)| {
            (
                tract_ndarray::Array::from(img.into_iter().map(|i| i as f32).collect::<Vec<_>>())
                    .into_shape_with_order(img_shape)
                    .unwrap()
                    .into(),
                tract_ndarray::Array::from(ker.into_iter().map(|i| i as f32).collect::<Vec<_>>())
                    .into_shape_with_order(ker_shape)
                    .unwrap()
                    .into(),
                strides,
            )
        })
        .boxed()
}

proptest! {
    #[test]
    fn conv_compare((ref i, ref k, ref strides) in img_and_ker(),
                       valid in ::proptest::bool::ANY) {
        let model = convolution_pb(strides.0, strides.1, valid,& k).unwrap();
        compare(&model, vec!(("data", i.clone())), "conv")?;
    }

    #[test]
    fn conv_infer_facts((ref i, ref k, ref strides) in img_and_ker(),
                       valid in ::proptest::bool::ANY) {
        if valid {
            prop_assume!(i.shape()[1] >= k.shape()[0]);
            prop_assume!(i.shape()[2] >= k.shape()[1]);
        }
        let model = convolution_pb(strides.0, strides.1, valid, &k).unwrap();
        infer(&model, vec!(("data", i.clone())),  "conv")?;
    }
}

#[test]
fn conv_infer_facts_1() {
    let i: Tensor = tract_ndarray::ArrayD::<f32>::zeros(vec![1, 2, 2, 2]).into();
    let k: Tensor = tract_ndarray::ArrayD::<f32>::zeros(vec![2, 2, 2, 1]).into();
    let model = convolution_pb(1, 1, false, &k).unwrap();
    infer(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_1() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [1.0, 0.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32], [0.0]], [[1.0], [0.0]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_2() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, -1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [1.0, 0.0]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_3() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_4() {
    let i: Tensor = Tensor::from(arr4(&[[[[1.0f32], [1.0], [0.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0f32, 0.0]], [[0.0, -1.0]]]]));
    let model = convolution_pb(1, 1, true, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_5() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0]], [[0.0, 1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0f32, 0.0], [0.0, 0.0]]], [[[0.0, 0.0], [0.0, 1.0]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_6() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32], [0.0]], [[0.0], [1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0f32]], [[1.0]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_7() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32]], [[0.0]], [[0.0]], [[1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32]]], [[[1.0]]]]));
    let model = convolution_pb(2, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_8() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32], [0.0]], [[0.0], [1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[1.0f32]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_mobilenet_v2() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, -1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [1.0, 0.0]]]]));
    let model = convolution_pb(1, 1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}


================================================
FILE: tensorflow/tests/ops_nn_dwconv2d.proptest-regressions
================================================
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
xs 3626947671 567190746 1066259331 2169673451 # shrinks to (ref i, ref k, ref strides) = (1x1x1x1xF32 0, 1x1x1x1xF32 0, (1, 1)), valid = false
xs 2095200658 2521600292 167531540 27545920 # shrinks to (ref i, ref k, ref strides) = (1x1x1x1xF32 0, 1x1x1x1xF32 0, (1, 1)), valid = false
xs 3785992409 1022059155 3135754526 805113969 # shrinks to (ref i, ref k, stride) = (1x1x2x1xF32 1, 0, 1x1x1x1xF32 1, 2), valid = false
xs 144961368 3467274053 4120060015 4072688890 # shrinks to (ref i, ref k, stride) = (1x3x8x1xF32 0, 0, 0, 0..., 1x1x1x1xF32 -1, 2), valid = false
xs 3441037282 1568902391 1947917976 3753470628 # shrinks to (ref i, ref k, stride) = (1x3x4x1xF32 0, 0, 0, 0..., 1x1x1x1xF32 -1, 2), valid = false
xs 1210613092 3469268474 2211162269 2728189630 # shrinks to (ref i, ref k, stride) = ([[[[0.0],    [0.0]],   [[0.0],    [0.0]],   [[0.0],    [0.0]]],  [[[0.0],    [-1.0]],   [[0.0],    [0.0]],   [[0.0],    [0.0]]]] shape=[2, 3, 2, 1], strides=[6, 2, 1, 1], layout=C (0x1), const ndim=4, [[[[0.0]]],  [[[-1.0]]]] shape=[2, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1), const ndim=4, 1), valid = false
xs 3872883304 3578968752 800499419 963336543 # shrinks to (ref i, ref k, stride) = ([[[[0.0],    [0.0],    [0.0],    [0.0],    [0.0]],   [[0.0],    [0.0],    [0.0],    [0.0],    [0.0]],   [[0.0],    [0.0],    [0.0],    [0.0],    [0.0]],   [[0.0],    [0.0],    [0.0],    [0.0],    [0.0]],   [[0.0],    [3.0],    [0.0],    [0.0],    [-1.0]]]] shape=[1, 5, 5, 1], strides=[25, 5, 1, 1], layout=C (0x1), const ndim=4, [[[[0.0, 0.0]],   [[1.0, 0.0]]],  [[[4.0, 6.0]],   [[-3.0, 8.0]]]] shape=[2, 2, 1, 2], strides=[4, 2, 2, 1], layout=C (0x1), const ndim=4, 1), valid = false
cc 8e8f6d5b3a36324175a0f916e75c5d293ef168682b6f1ea6c970d947795693f1 # shrinks to (ref i, ref k, stride) = ([[[[0.0],    [0.0]],   [[0.0],    [0.0]]],  [[[0.0],    [0.0]],   [[0.0],    [0.0]]]] shape=[2, 2, 2, 1], strides=[4, 2, 1, 1], layout=C (0x1), const ndim=4, [[[[0.0, 0.0]]]] shape=[1, 1, 1, 2], strides=[2, 2, 2, 1], layout=C (0x1), const ndim=4, 1), valid = true
cc f7f232c45db8c3fe8207d437f98f8b17008bbbc4ef7394ee19f6ca11bf0be034 # shrinks to (ref i, ref k, stride) = ([[[[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, -6.0, -8.0],    [-3.0, -7.0, 6.0, -1.0, -4.0]]]], shape=[1, 3, 3, 5], strides=[45, 15, 5, 1], layout=C (0x1), const ndim=4, [[[[0.0, 1.0, 6.0],    [5.0, 7.0, -7.0],    [3.0, -7.0, -8.0],    [6.0, -8.0, -5.0],    [-4.0, -4.0, -2.0]]]], shape=[1, 1, 5, 3], strides=[15, 15, 3, 1], layout=C (0x1), const ndim=4, 2), valid = true


================================================
FILE: tensorflow/tests/ops_nn_dwconv2d.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::prelude::*;
use tract_ndarray::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtFloat;

fn convolution_pb(stride: usize, padded: bool, k: &Tensor) -> TractResult<Vec<u8>> {
    let conv = tfpb::node()
        .name("conv")
        .op("DepthwiseConv2dNative")
        .input("data")
        .input("kernel")
        .attr("strides", vec![1, stride as i64, stride as i64, 1])
        .attr("dilations", vec![1, 1, 1, 1])
        .attr("padding", if padded { "SAME" } else { "VALID" })
        .attr("T", DtFloat);

    let graph = tfpb::graph().node(placeholder_f32("data")).node(const_f32("kernel", k)).node(conv);

    Ok(graph.write_to_bytes()?)
}

fn img_and_ker() -> BoxedStrategy<(Array4<f32>, Array4<f32>, usize)> {
    (1usize..=5, 1usize..=3, 1usize..=3, 1usize..=3, 1usize..=3)
        .prop_flat_map(|(ic, kh, kw, q, s)| {
            (1usize..3, (kh..2 * kh + 4 * s), (kw..2 * kw + 4 * s), Just((ic, kh, kw, q, s)))
        })
        .prop_flat_map(|(ib, ih, iw, (ic, kh, kw, q, s))| {
            let i_size = ib * iw * ih * ic;
            let k_size = kw * kh * ic * q;
            (
                Just((ib, ih, iw, ic)),
                Just((kh, kw, ic, q)),
                ::proptest::collection::vec(-9i32..9, i_size..i_size + 1),
                ::proptest::collection::vec(-9i32..9, k_size..k_size + 1),
                Just(s),
            )
        })
        .prop_map(|(img_shape, ker_shape, img, ker, stride)| {
            (
                Array::from(img.into_iter().map(|i| i as f32).collect::<Vec<_>>())
                    .into_shape_with_order(img_shape)
                    .unwrap(),
                Array::from(ker.into_iter().map(|i| i as f32).collect::<Vec<_>>())
                    .into_shape_with_order(ker_shape)
                    .unwrap(),
                stride,
            )
        })
        .boxed()
}

proptest! {
    #[test]
    fn conv_compare((ref i, ref k, stride) in img_and_ker(),
                    padded in any::<bool>()) {
        assert!(i.shape()[1] >= k.shape()[0] && i.shape()[2] >= k.shape()[1]);
        let k = Tensor::from(k.clone());
        let model = convolution_pb(stride, padded, &k).unwrap();
        compare(&model, vec!(("data", i.clone().into()), ), "conv")?;
    }
}

proptest! {
    #[test]
    fn conv_infer_facts((ref i, ref k, stride) in img_and_ker(),
                        padded in any::<bool>()) {
        let k = Tensor::from(k.clone());
        let model = convolution_pb(stride, padded, &k).unwrap();
        infer(&model, vec!(("data", i.clone().into())), "conv")?;
    }
}

#[test]
fn conv_infer_facts_1() {
    let i: Tensor = ArrayD::<f32>::zeros(vec![1, 2, 2, 2]).into();
    let k: Tensor = ArrayD::<f32>::zeros(vec![2, 2, 2, 1]).into();
    let model = convolution_pb(1, true, &k).unwrap();
    infer(&model, vec![("data", i.clone().into())], "conv").unwrap();
}

#[test]
fn conv_eval_1() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [1.0, 0.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32], [0.0]], [[1.0], [0.0]]]]));
    let model = convolution_pb(1, true, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_2() {
    let i: Tensor = Tensor::from(arr4::<f32, _, _, _>(&[[[[-1.0], [0.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[1.0f32]]]]));
    let model = convolution_pb(2, true, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_3() {
    let i: Tensor =
        Tensor::from(Array::from_shape_fn((1, 112, 112, 48), |_| rand::random::<f32>()));
    let k: Tensor = Tensor::from(Array::from_shape_fn((3, 3, 48, 1), |_| rand::random::<f32>()));
    let conv = tfpb::node()
        .name("conv")
        .op("DepthwiseConv2dNative")
        .input("data")
        .input("kernel")
        .attr("strides", vec![1, 1, 1, 1])
        .attr("dilations", vec![1, 1, 1, 1])
        .attr("padding", "SAME")
        .attr("T", DtFloat);

    let graph =
        tfpb::graph().node(placeholder_f32("data")).node(const_f32("kernel", &k)).node(conv);

    let model = graph.write_to_bytes().unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_4() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32], [0.0]], [[0.0], [-1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32, -1.0]]]]));
    let model = convolution_pb(1, true, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_5() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [0.0, 0.0]], [[0.0, 0.0], [0.0, 1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [1.0, 0.0]]]]));
    let model = convolution_pb(1, true, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_6() {
    let i: Tensor = Tensor::from(arr4(&[[[[0.0f32, 0.0], [0.0, 0.0]], [[0.0, 0.0], [0.0, 1.0]]]]));
    let k: Tensor = Tensor::from(arr4(&[[[[0.0f32], [1.0]]]]));
    let model = convolution_pb(1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_7() {
    let i: Tensor = tensor4(&[[[[1.0f32, 2.0]]]]);
    let k: Tensor = tensor4(&[[[[3.0f32, 5.0], [7.0, 11.0]]]]);
    let model = convolution_pb(1, true, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_8() {
    let i: Tensor =
        tensor4(&[[[[0.0f32], [0.0]], [[0.0], [0.0]]], [[[0.0], [0.0]], [[0.0], [0.0]]]]);
    let k: Tensor = tensor4(&[[[[0.0f32, 0.0]]]]);
    let model = convolution_pb(1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}

#[test]
fn conv_eval_9() {
    let i: Tensor = tensor4(&[[[[0f32, 0.0, 0.0, 0.0, 1.0]]]]);
    let mut k = Tensor::zero::<f32>(&[1, 1, 5, 3]).unwrap();
    *k.try_as_plain_mut().unwrap().as_slice_mut::<f32>().unwrap().last_mut().unwrap() = 1.0;
    let model = convolution_pb(1, false, &k).unwrap();
    compare(&model, vec![("data", i.into())], "conv").unwrap();
}


================================================
FILE: tensorflow/tests/ops_nn_pools.proptest-regressions
================================================
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
xs 4157211250 2256075456 81453571 64911648 # shrinks to (ref i, k, ref padding, stride) = (F32([[[[0.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1)), (1, 1), "VALID", 1)
cc 77a89c09806f55ec2f67377514f77ccad551c7eb9f91cff206b7b1e831606506 # shrinks to (ref i, k, ref padding, stride) = ([[[[0.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1), const ndim=4, (1, 2), "VALID", 1)
cc e80892b5e8e6eb45cb799e7844c5af46775f03de51133d55b5db1a9c1d58083d # shrinks to (ref i, k, ref padding, stride) = ([[[[0.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1), const ndim=4, (2, 1), "VALID", 1)
cc 70eccb53ff68aaab2bb835e6e83a20a90cdff149101dbab926eefe1222e706ef # shrinks to (ref i, k, ref padding, stride) = ([[[[1.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1), const ndim=4, (1, 2), "SAME", 1)
cc 9cecfd4a142ead272fa863e0999eb1a092224fee791a8fe5ff2652dd72ff6603 # shrinks to (ref i, k, ref padding, stride) = ([[[[-1.0]]]] shape=[1, 1, 1, 1], strides=[1, 1, 1, 1], layout=C (0x1), const ndim=4, (1, 2), "SAME", 1)


================================================
FILE: tensorflow/tests/ops_nn_pools.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
#[macro_use]
extern crate log;
extern crate env_logger;
#[macro_use]
extern crate proptest;
extern crate tensorflow;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::prelude::*;
use proptest::test_runner::TestCaseResult;
use tract_ndarray::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtFloat;

fn img_and_pool() -> BoxedStrategy<(Array4<f32>, (usize, usize), String, usize)> {
    (1usize..5, 1usize..5, 1usize..5, (1usize..3, 1usize..3))
        .prop_flat_map(|(ih, iw, ic, k)| {
            let i_size = iw * ih * ic;
            (
                Just((1, ih, iw, ic)),
                Just(k),
                ::proptest::collection::vec((-10..10).prop_map(|a| a as f32), i_size..i_size + 1),
                prop_oneof!("VALID", "SAME"),
                1usize..3,
            )
        })
        .prop_map(|(img_shape, k, img, padding, stride)| {
            (Array::from(img).into_shape_with_order(img_shape).unwrap(), k, padding, stride)
        })
        .boxed()
}

fn pool(
    op: &str,
    i: &Array4<f32>,
    k: (usize, usize),
    padding: &str,
    stride: usize,
) -> TestCaseResult {
    if padding == "VALID" {
        prop_assume!(i.shape()[1] >= k.0);
        prop_assume!(i.shape()[2] >= k.1);
    }
    let graph = tfpb::graph()
        .node(placeholder_f32("data"))
        .node(
            tfpb::node()
                .name("pool")
                .op(op)
                .input("data")
                .attr("T", DtFloat)
                .attr("strides", vec![1, stride as i64, stride as i64, 1])
                .attr("ksize", vec![1, k.0 as i64, k.1 as i64, 1])
                .attr("padding", padding),
        )
        .write_to_bytes()
        .unwrap();
    compare(&graph, vec![("data", i.clone().into())], "pool")
}

proptest! {
    #[test]
    fn proptest_maxpool((ref i, k, ref padding, stride) in img_and_pool()) {
        pool("MaxPool", i, k, padding, stride)?;
    }
}

proptest! {
    #[test]
    fn proptest_avgpool((ref i, k, ref padding, stride) in img_and_pool()) {
        pool("AvgPool", i, k, padding, stride)?;
    }
}

#[test]
fn maxpool_1() {
    pool("MaxPool", &Array4::<f32>::zeros((1, 1, 4, 1)), (1, 2), "SAME", 1).unwrap();
}

#[test]
fn maxpool_2() {
    pool("MaxPool", &arr4(&[[[[0.0]], [[-1.0]]]]), (2, 1), "SAME", 1).unwrap();
}

#[test]
fn maxpool_3() {
    pool("MaxPool", &arr4(&[[[[-1.0]]]]), (1, 2), "SAME", 1).unwrap();
}

#[test]
fn avgpool_1() {
    pool("AvgPool", &Array4::<f32>::zeros((1, 1, 6, 1)), (1, 1), "SAME", 2).unwrap();
}


================================================
FILE: tensorflow/tests/ops_nn_space_to_batch.proptest-regressions
================================================
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
xs 734568891 2529215138 1297005414 89662159 # shrinks to (ref b, ref bs, ref c) = (F32([[[[[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]],    [[[1.0, 2.0, 3.0],      [4.0, 5.0, 6.0]],     [[7.0, 8.0, 9.0],      [10.0, 11.0, 12.0]],     [[13.0, 14.0, 15.0],      [16.0, 17.0, 18.0]],     [[19.0, 20.0, 21.0],      [22.0, 23.0, 24.0]],     [[25.0, 26.0, 27.0],      [28.0, 29.0, 30.0]],     [[31.0, 32.0, 33.0],      [34.0, 35.0, 36.0]]],    [[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]]],   [[[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]]]],  [[[[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]],    [[[37.0, 38.0, 39.0],      [40.0, 41.0, 42.0]],     [[43.0, 44.0, 45.0],      [46.0, 47.0, 48.0]],     [[49.0, 50.0, 51.0],      [52.0, 53.0, 54.0]],     [[55.0, 56.0, 57.0],      [58.0, 59.0, 60.0]],     [[61.0, 62.0, 63.0],      [64.0, 65.0, 66.0]],     [[67.0, 68.0, 69.0],      [70.0, 71.0, 72.0]]],    [[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]]],   [[[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0],      [0.0, 0.0, 0.0]]]]]] shape=[2, 2, 3, 6, 2, 3], strides=[216, 108, 36, 6, 3, 1], layout=C (0x1)), I32([2, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[0, 2],  [1, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 411003962 2830010119 2443466156 1680642744 # shrinks to (ref i, ref bs, ref p) = (F32([[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],    [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],    [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],    [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],    [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],    [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],   [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],    [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],    [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],    [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],    [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],    [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],   [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],    [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],    [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],    [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],    [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],    [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0]],   [[127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],    [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0],    [141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],    [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],    [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],    [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],   [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],    [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],    [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],    [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],    [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],    [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]]],  [[[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],    [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],    [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],    [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],    [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],    [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]],   [[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],    [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],    [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],    [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0],    [281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],    [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0]],   [[295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],    [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],    [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],    [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],    [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],    [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],   [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],    [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],    [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],    [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],    [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],    [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0]],   [[379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],    [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],    [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],    [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],    [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],    [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]]],  [[[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],    [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],    [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],    [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],    [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],    [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],   [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],    [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],    [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],    [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],    [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],    [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]],   [[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],    [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],    [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],    [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],    [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],    [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],   [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],    [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0],    [561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],    [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],    [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],    [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]],   [[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],    [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],    [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],    [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],    [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],    [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]]]] shape=[3, 5, 6, 7], strides=[210, 42, 7, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[3, 2]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 4181824025 559530094 3721056842 3878587004 # shrinks to (ref b, ref bs, ref c) = (F32([[[[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]],   [[51.0, 52.0, 53.0, 54.0, 55.0],    [56.0, 57.0, 58.0, 59.0, 60.0],    [61.0, 62.0, 63.0, 64.0, 65.0],    [66.0, 67.0, 68.0, 69.0, 70.0],    [71.0, 72.0, 73.0, 74.0, 75.0]],   [[126.0, 127.0, 128.0, 129.0, 130.0],    [131.0, 132.0, 133.0, 134.0, 135.0],    [136.0, 137.0, 138.0, 139.0, 140.0],    [141.0, 142.0, 143.0, 144.0, 145.0],    [146.0, 147.0, 148.0, 149.0, 150.0]]],  [[[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]],   [[201.0, 202.0, 203.0, 204.0, 205.0],    [206.0, 207.0, 208.0, 209.0, 210.0],    [211.0, 212.0, 213.0, 214.0, 215.0],    [216.0, 217.0, 218.0, 219.0, 220.0],    [221.0, 222.0, 223.0, 224.0, 225.0]],   [[276.0, 277.0, 278.0, 279.0, 280.0],    [281.0, 282.0, 283.0, 284.0, 285.0],    [286.0, 287.0, 288.0, 289.0, 290.0],    [291.0, 292.0, 293.0, 294.0, 295.0],    [296.0, 297.0, 298.0, 299.0, 300.0]]],  [[[1.0, 2.0, 3.0, 4.0, 5.0],    [6.0, 7.0, 8.0, 9.0, 10.0],    [11.0, 12.0, 13.0, 14.0, 15.0],    [16.0, 17.0, 18.0, 19.0, 20.0],    [21.0, 22.0, 23.0, 24.0, 25.0]],   [[76.0, 77.0, 78.0, 79.0, 80.0],    [81.0, 82.0, 83.0, 84.0, 85.0],    [86.0, 87.0, 88.0, 89.0, 90.0],    [91.0, 92.0, 93.0, 94.0, 95.0],    [96.0, 97.0, 98.0, 99.0, 100.0]],   [[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]]],  [[[151.0, 152.0, 153.0, 154.0, 155.0],    [156.0, 157.0, 158.0, 159.0, 160.0],    [161.0, 162.0, 163.0, 164.0, 165.0],    [166.0, 167.0, 168.0, 169.0, 170.0],    [171.0, 172.0, 173.0, 174.0, 175.0]],   [[226.0, 227.0, 228.0, 229.0, 230.0],    [231.0, 232.0, 233.0, 234.0, 235.0],    [236.0, 237.0, 238.0, 239.0, 240.0],    [241.0, 242.0, 243.0, 244.0, 245.0],    [246.0, 247.0, 248.0, 249.0, 250.0]],   [[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]]],  [[[26.0, 27.0, 28.0, 29.0, 30.0],    [31.0, 32.0, 33.0, 34.0, 35.0],    [36.0, 37.0, 38.0, 39.0, 40.0],    [41.0, 42.0, 43.0, 44.0, 45.0],    [46.0, 47.0, 48.0, 49.0, 50.0]],   [[101.0, 102.0, 103.0, 104.0, 105.0],    [106.0, 107.0, 108.0, 109.0, 110.0],    [111.0, 112.0, 113.0, 114.0, 115.0],    [116.0, 117.0, 118.0, 119.0, 120.0],    [121.0, 122.0, 123.0, 124.0, 125.0]],   [[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]]],  [[[176.0, 177.0, 178.0, 179.0, 180.0],    [181.0, 182.0, 183.0, 184.0, 185.0],    [186.0, 187.0, 188.0, 189.0, 190.0],    [191.0, 192.0, 193.0, 194.0, 195.0],    [196.0, 197.0, 198.0, 199.0, 200.0]],   [[251.0, 252.0, 253.0, 254.0, 255.0],    [256.0, 257.0, 258.0, 259.0, 260.0],    [261.0, 262.0, 263.0, 264.0, 265.0],    [266.0, 267.0, 268.0, 269.0, 270.0],    [271.0, 272.0, 273.0, 274.0, 275.0]],   [[0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0, 0.0]]]] shape=[6, 3, 5, 5], strides=[75, 25, 5, 1], layout=C (0x1)), I32([3] shape=[1], strides=[1], layout=C | F (0x3)), I32([[1, 2]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 4193473029 3556217132 3271733469 2131532162 # shrinks to (ref i, ref bs, ref p) = (F32([[[1.0, 2.0],   [3.0, 4.0],   [5.0, 6.0],   [7.0, 8.0],   [9.0, 10.0],   [11.0, 12.0],   [13.0, 14.0]],  [[15.0, 16.0],   [17.0, 18.0],   [19.0, 20.0],   [21.0, 22.0],   [23.0, 24.0],   [25.0, 26.0],   [27.0, 28.0]],  [[29.0, 30.0],   [31.0, 32.0],   [33.0, 34.0],   [35.0, 36.0],   [37.0, 38.0],   [39.0, 40.0],   [41.0, 42.0]]] shape=[3, 7, 2], strides=[14, 2, 1], layout=C (0x1)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), I32([[2, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 2919562556 1152727052 1831867138 1283103872 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0],      [4.0, 5.0, 6.0],      [7.0, 8.0, 9.0],      [10.0, 11.0, 12.0],      [13.0, 14.0, 15.0],      [16.0, 17.0, 18.0],      [19.0, 20.0, 21.0]],     [[22.0, 23.0, 24.0],      [25.0, 26.0, 27.0],      [28.0, 29.0, 30.0],      [31.0, 32.0, 33.0],      [34.0, 35.0, 36.0],      [37.0, 38.0, 39.0],      [40.0, 41.0, 42.0]],     [[43.0, 44.0, 45.0],      [46.0, 47.0, 48.0],      [49.0, 50.0, 51.0],      [52.0, 53.0, 54.0],      [55.0, 56.0, 57.0],      [58.0, 59.0, 60.0],      [61.0, 62.0, 63.0]],     [[64.0, 65.0, 66.0],      [67.0, 68.0, 69.0],      [70.0, 71.0, 72.0],      [73.0, 74.0, 75.0],      [76.0, 77.0, 78.0],      [79.0, 80.0, 81.0],      [82.0, 83.0, 84.0]],     [[85.0, 86.0, 87.0],      [88.0, 89.0, 90.0],      [91.0, 92.0, 93.0],      [94.0, 95.0, 96.0],      [97.0, 98.0, 99.0],      [100.0, 101.0, 102.0],      [103.0, 104.0, 105.0]],     [[106.0, 107.0, 108.0],      [109.0, 110.0, 111.0],      [112.0, 113.0, 114.0],      [115.0, 116.0, 117.0],      [118.0, 119.0, 120.0],      [121.0, 122.0, 123.0],      [124.0, 125.0, 126.0]]],    [[[127.0, 128.0, 129.0],      [130.0, 131.0, 132.0],      [133.0, 134.0, 135.0],      [136.0, 137.0, 138.0],      [139.0, 140.0, 141.0],      [142.0, 143.0, 144.0],      [145.0, 146.0, 147.0]],     [[148.0, 149.0, 150.0],      [151.0, 152.0, 153.0],      [154.0, 155.0, 156.0],      [157.0, 158.0, 159.0],      [160.0, 161.0, 162.0],      [163.0, 164.0, 165.0],      [166.0, 167.0, 168.0]],     [[169.0, 170.0, 171.0],      [172.0, 173.0, 174.0],      [175.0, 176.0, 177.0],      [178.0, 179.0, 180.0],      [181.0, 182.0, 183.0],      [184.0, 185.0, 186.0],      [187.0, 188.0, 189.0]],     [[190.0, 191.0, 192.0],      [193.0, 194.0, 195.0],      [196.0, 197.0, 198.0],      [199.0, 200.0, 201.0],      [202.0, 203.0, 204.0],      [205.0, 206.0, 207.0],      [208.0, 209.0, 210.0]],     [[211.0, 212.0, 213.0],      [214.0, 215.0, 216.0],      [217.0, 218.0, 219.0],      [220.0, 221.0, 222.0],      [223.0, 224.0, 225.0],      [226.0, 227.0, 228.0],      [229.0, 230.0, 231.0]],     [[232.0, 233.0, 234.0],      [235.0, 236.0, 237.0],      [238.0, 239.0, 240.0],      [241.0, 242.0, 243.0],      [244.0, 245.0, 246.0],      [247.0, 248.0, 249.0],      [250.0, 251.0, 252.0]]],    [[[253.0, 254.0, 255.0],      [256.0, 257.0, 258.0],      [259.0, 260.0, 261.0],      [262.0, 263.0, 264.0],      [265.0, 266.0, 267.0],      [268.0, 269.0, 270.0],      [271.0, 272.0, 273.0]],     [[274.0, 275.0, 276.0],      [277.0, 278.0, 279.0],      [280.0, 281.0, 282.0],      [283.0, 284.0, 285.0],      [286.0, 287.0, 288.0],      [289.0, 290.0, 291.0],      [292.0, 293.0, 294.0]],     [[295.0, 296.0, 297.0],      [298.0, 299.0, 300.0],      [301.0, 302.0, 303.0],      [304.0, 305.0, 306.0],      [307.0, 308.0, 309.0],      [310.0, 311.0, 312.0],      [313.0, 314.0, 315.0]],     [[316.0, 317.0, 318.0],      [319.0, 320.0, 321.0],      [322.0, 323.0, 324.0],      [325.0, 326.0, 327.0],      [328.0, 329.0, 330.0],      [331.0, 332.0, 333.0],      [334.0, 335.0, 336.0]],     [[337.0, 338.0, 339.0],      [340.0, 341.0, 342.0],      [343.0, 344.0, 345.0],      [346.0, 347.0, 348.0],      [349.0, 350.0, 351.0],      [352.0, 353.0, 354.0],      [355.0, 356.0, 357.0]],     [[358.0, 359.0, 360.0],      [361.0, 362.0, 363.0],      [364.0, 365.0, 366.0],      [367.0, 368.0, 369.0],      [370.0, 371.0, 372.0],      [373.0, 374.0, 375.0],      [376.0, 377.0, 378.0]]],    [[[379.0, 380.0, 381.0],      [382.0, 383.0, 384.0],      [385.0, 386.0, 387.0],      [388.0, 389.0, 390.0],      [391.0, 392.0, 393.0],      [394.0, 395.0, 396.0],      [397.0, 398.0, 399.0]],     [[400.0, 401.0, 402.0],      [403.0, 404.0, 405.0],      [406.0, 407.0, 408.0],      [409.0, 410.0, 411.0],      [412.0, 413.0, 414.0],      [415.0, 416.0, 417.0],      [418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0],      [424.0, 425.0, 426.0],      [427.0, 428.0, 429.0],      [430.0, 431.0, 432.0],      [433.0, 434.0, 435.0],      [436.0, 437.0, 438.0],      [439.0, 440.0, 441.0]],     [[442.0, 443.0, 444.0],      [445.0, 446.0, 447.0],      [448.0, 449.0, 450.0],      [451.0, 452.0, 453.0],      [454.0, 455.0, 456.0],      [457.0, 458.0, 459.0],      [460.0, 461.0, 462.0]],     [[463.0, 464.0, 465.0],      [466.0, 467.0, 468.0],      [469.0, 470.0, 471.0],      [472.0, 473.0, 474.0],      [475.0, 476.0, 477.0],      [478.0, 479.0, 480.0],      [481.0, 482.0, 483.0]],     [[484.0, 485.0, 486.0],      [487.0, 488.0, 489.0],      [490.0, 491.0, 492.0],      [493.0, 494.0, 495.0],      [496.0, 497.0, 498.0],      [499.0, 500.0, 501.0],      [502.0, 503.0, 504.0]]],    [[[505.0, 506.0, 507.0],      [508.0, 509.0, 510.0],      [511.0, 512.0, 513.0],      [514.0, 515.0, 516.0],      [517.0, 518.0, 519.0],      [520.0, 521.0, 522.0],      [523.0, 524.0, 525.0]],     [[526.0, 527.0, 528.0],      [529.0, 530.0, 531.0],      [532.0, 533.0, 534.0],      [535.0, 536.0, 537.0],      [538.0, 539.0, 540.0],      [541.0, 542.0, 543.0],      [544.0, 545.0, 546.0]],     [[547.0, 548.0, 549.0],      [550.0, 551.0, 552.0],      [553.0, 554.0, 555.0],      [556.0, 557.0, 558.0],      [559.0, 560.0, 561.0],      [562.0, 563.0, 564.0],      [565.0, 566.0, 567.0]],     [[568.0, 569.0, 570.0],      [571.0, 572.0, 573.0],      [574.0, 575.0, 576.0],      [577.0, 578.0, 579.0],      [580.0, 581.0, 582.0],      [583.0, 584.0, 585.0],      [586.0, 587.0, 588.0]],     [[589.0, 590.0, 591.0],      [592.0, 593.0, 594.0],      [595.0, 596.0, 597.0],      [598.0, 599.0, 600.0],      [601.0, 602.0, 603.0],      [604.0, 605.0, 606.0],      [607.0, 608.0, 609.0]],     [[610.0, 611.0, 612.0],      [613.0, 614.0, 615.0],      [616.0, 617.0, 618.0],      [619.0, 620.0, 621.0],      [622.0, 623.0, 624.0],      [625.0, 626.0, 627.0],      [628.0, 629.0, 630.0]]],    [[[631.0, 632.0, 633.0],      [634.0, 635.0, 636.0],      [637.0, 638.0, 639.0],      [640.0, 641.0, 642.0],      [643.0, 644.0, 645.0],      [646.0, 647.0, 648.0],      [649.0, 650.0, 651.0]],     [[652.0, 653.0, 654.0],      [655.0, 656.0, 657.0],      [658.0, 659.0, 660.0],      [661.0, 662.0, 663.0],      [664.0, 665.0, 666.0],      [667.0, 668.0, 669.0],      [670.0, 671.0, 672.0]],     [[673.0, 674.0, 675.0],      [676.0, 677.0, 678.0],      [679.0, 680.0, 681.0],      [682.0, 683.0, 684.0],      [685.0, 686.0, 687.0],      [688.0, 689.0, 690.0],      [691.0, 692.0, 693.0]],     [[694.0, 695.0, 696.0],      [697.0, 698.0, 699.0],      [700.0, 701.0, 702.0],      [703.0, 704.0, 705.0],      [706.0, 707.0, 708.0],      [709.0, 710.0, 711.0],      [712.0, 713.0, 714.0]],     [[715.0, 716.0, 717.0],      [718.0, 719.0, 720.0],      [721.0, 722.0, 723.0],      [724.0, 725.0, 726.0],      [727.0, 728.0, 729.0],      [730.0, 731.0, 732.0],      [733.0, 734.0, 735.0]],     [[736.0, 737.0, 738.0],      [739.0, 740.0, 741.0],      [742.0, 743.0, 744.0],      [745.0, 746.0, 747.0],      [748.0, 749.0, 750.0],      [751.0, 752.0, 753.0],      [754.0, 755.0, 756.0]]]],   [[[[757.0, 758.0, 759.0],      [760.0, 761.0, 762.0],      [763.0, 764.0, 765.0],      [766.0, 767.0, 768.0],      [769.0, 770.0, 771.0],      [772.0, 773.0, 774.0],      [775.0, 776.0, 777.0]],     [[778.0, 779.0, 780.0],      [781.0, 782.0, 783.0],      [784.0, 785.0, 786.0],      [787.0, 788.0, 789.0],      [790.0, 791.0, 792.0],      [793.0, 794.0, 795.0],      [796.0, 797.0, 798.0]],     [[799.0, 800.0, 801.0],      [802.0, 803.0, 804.0],      [805.0, 806.0, 807.0],      [808.0, 809.0, 810.0],      [811.0, 812.0, 813.0],      [814.0, 815.0, 816.0],      [817.0, 818.0, 819.0]],     [[820.0, 821.0, 822.0],      [823.0, 824.0, 825.0],      [826.0, 827.0, 828.0],      [829.0, 830.0, 831.0],      [832.0, 833.0, 834.0],      [835.0, 836.0, 837.0],      [838.0, 839.0, 840.0]],     [[841.0, 842.0, 843.0],      [844.0, 845.0, 846.0],      [847.0, 848.0, 849.0],      [850.0, 851.0, 852.0],      [853.0, 854.0, 855.0],      [856.0, 857.0, 858.0],      [859.0, 860.0, 861.0]],     [[862.0, 863.0, 864.0],      [865.0, 866.0, 867.0],      [868.0, 869.0, 870.0],      [871.0, 872.0, 873.0],      [874.0, 875.0, 876.0],      [877.0, 878.0, 879.0],      [880.0, 881.0, 882.0]]],    [[[883.0, 884.0, 885.0],      [886.0, 887.0, 888.0],      [889.0, 890.0, 891.0],      [892.0, 893.0, 894.0],      [895.0, 896.0, 897.0],      [898.0, 899.0, 900.0],      [901.0, 902.0, 903.0]],     [[904.0, 905.0, 906.0],      [907.0, 908.0, 909.0],      [910.0, 911.0, 912.0],      [913.0, 914.0, 915.0],      [916.0, 917.0, 918.0],      [919.0, 920.0, 921.0],      [922.0, 923.0, 924.0]],     [[925.0, 926.0, 927.0],      [928.0, 929.0, 930.0],      [931.0, 932.0, 933.0],      [934.0, 935.0, 936.0],      [937.0, 938.0, 939.0],      [940.0, 941.0, 942.0],      [943.0, 944.0, 945.0]],     [[946.0, 947.0, 948.0],      [949.0, 950.0, 951.0],      [952.0, 953.0, 954.0],      [955.0, 956.0, 957.0],      [958.0, 959.0, 960.0],      [961.0, 962.0, 963.0],      [964.0, 965.0, 966.0]],     [[967.0, 968.0, 969.0],      [970.0, 971.0, 972.0],      [973.0, 974.0, 975.0],      [976.0, 977.0, 978.0],      [979.0, 980.0, 981.0],      [982.0, 983.0, 984.0],      [985.0, 986.0, 987.0]],     [[988.0, 989.0, 990.0],      [991.0, 992.0, 993.0],      [994.0, 995.0, 996.0],      [997.0, 998.0, 999.0],      [1000.0, 1001.0, 1002.0],      [1003.0, 1004.0, 1005.0],      [1006.0, 1007.0, 1008.0]]],    [[[1009.0, 1010.0, 1011.0],      [1012.0, 1013.0, 1014.0],      [1015.0, 1016.0, 1017.0],      [1018.0, 1019.0, 1020.0],      [1021.0, 1022.0, 1023.0],      [1024.0, 1025.0, 1026.0],      [1027.0, 1028.0, 1029.0]],     [[1030.0, 1031.0, 1032.0],      [1033.0, 1034.0, 1035.0],      [1036.0, 1037.0, 1038.0],      [1039.0, 1040.0, 1041.0],      [1042.0, 1043.0, 1044.0],      [1045.0, 1046.0, 1047.0],      [1048.0, 1049.0, 1050.0]],     [[1051.0, 1052.0, 1053.0],      [1054.0, 1055.0, 1056.0],      [1057.0, 1058.0, 1059.0],      [1060.0, 1061.0, 1062.0],      [1063.0, 1064.0, 1065.0],      [1066.0, 1067.0, 1068.0],      [1069.0, 1070.0, 1071.0]],     [[1072.0, 1073.0, 1074.0],      [1075.0, 1076.0, 1077.0],      [1078.0, 1079.0, 1080.0],      [1081.0, 1082.0, 1083.0],      [1084.0, 1085.0, 1086.0],      [1087.0, 1088.0, 1089.0],      [1090.0, 1091.0, 1092.0]],     [[1093.0, 1094.0, 1095.0],      [1096.0, 1097.0, 1098.0],      [1099.0, 1100.0, 1101.0],      [1102.0, 1103.0, 1104.0],      [1105.0, 1106.0, 1107.0],      [1108.0, 1109.0, 1110.0],      [1111.0, 1112.0, 1113.0]],     [[1114.0, 1115.0, 1116.0],      [1117.0, 1118.0, 1119.0],      [1120.0, 1121.0, 1122.0],      [1123.0, 1124.0, 1125.0],      [1126.0, 1127.0, 1128.0],      [1129.0, 1130.0, 1131.0],      [1132.0, 1133.0, 1134.0]]],    [[[1135.0, 1136.0, 1137.0],      [1138.0, 1139.0, 1140.0],      [1141.0, 1142.0, 1143.0],      [1144.0, 1145.0, 1146.0],      [1147.0, 1148.0, 1149.0],      [1150.0, 1151.0, 1152.0],      [1153.0, 1154.0, 1155.0]],     [[1156.0, 1157.0, 1158.0],      [1159.0, 1160.0, 1161.0],      [1162.0, 1163.0, 1164.0],      [1165.0, 1166.0, 1167.0],      [1168.0, 1169.0, 1170.0],      [1171.0, 1172.0, 1173.0],      [1174.0, 1175.0, 1176.0]],     [[1177.0, 1178.0, 1179.0],      [1180.0, 1181.0, 1182.0],      [1183.0, 1184.0, 1185.0],      [1186.0, 1187.0, 1188.0],      [1189.0, 1190.0, 1191.0],      [1192.0, 1193.0, 1194.0],      [1195.0, 1196.0, 1197.0]],     [[1198.0, 1199.0, 1200.0],      [1201.0, 1202.0, 1203.0],      [1204.0, 1205.0, 1206.0],      [1207.0, 1208.0, 1209.0],      [1210.0, 1211.0, 1212.0],      [1213.0, 1214.0, 1215.0],      [1216.0, 1217.0, 1218.0]],     [[1219.0, 1220.0, 1221.0],      [1222.0, 1223.0, 1224.0],      [1225.0, 1226.0, 1227.0],      [1228.0, 1229.0, 1230.0],      [1231.0, 1232.0, 1233.0],      [1234.0, 1235.0, 1236.0],      [1237.0, 1238.0, 1239.0]],     [[1240.0, 1241.0, 1242.0],      [1243.0, 1244.0, 1245.0],      [1246.0, 1247.0, 1248.0],      [1249.0, 1250.0, 1251.0],      [1252.0, 1253.0, 1254.0],      [1255.0, 1256.0, 1257.0],      [1258.0, 1259.0, 1260.0]]],    [[[1261.0, 1262.0, 1263.0],      [1264.0, 1265.0, 1266.0],      [1267.0, 1268.0, 1269.0],      [1270.0, 1271.0, 1272.0],      [1273.0, 1274.0, 1275.0],      [1276.0, 1277.0, 1278.0],      [1279.0, 1280.0, 1281.0]],     [[1282.0, 1283.0, 1284.0],      [1285.0, 1286.0, 1287.0],      [1288.0, 1289.0, 1290.0],      [1291.0, 1292.0, 1293.0],      [1294.0, 1295.0, 1296.0],      [1297.0, 1298.0, 1299.0],      [1300.0, 1301.0, 1302.0]],     [[1303.0, 1304.0, 1305.0],      [1306.0, 1307.0, 1308.0],      [1309.0, 1310.0, 1311.0],      [1312.0, 1313.0, 1314.0],      [1315.0, 1316.0, 1317.0],      [1318.0, 1319.0, 1320.0],      [1321.0, 1322.0, 1323.0]],     [[1324.0, 1325.0, 1326.0],      [1327.0, 1328.0, 1329.0],      [1330.0, 1331.0, 1332.0],      [1333.0, 1334.0, 1335.0],      [1336.0, 1337.0, 1338.0],      [1339.0, 1340.0, 1341.0],      [1342.0, 1343.0, 1344.0]],     [[1345.0, 1346.0, 1347.0],      [1348.0, 1349.0, 1350.0],      [1351.0, 1352.0, 1353.0],      [1354.0, 1355.0, 1356.0],      [1357.0, 1358.0, 1359.0],      [1360.0, 1361.0, 1362.0],      [1363.0, 1364.0, 1365.0]],     [[1366.0, 1367.0, 1368.0],      [1369.0, 1370.0, 1371.0],      [1372.0, 1373.0, 1374.0],      [1375.0, 1376.0, 1377.0],      [1378.0, 1379.0, 1380.0],      [1381.0, 1382.0, 1383.0],      [1384.0, 1385.0, 1386.0]]],    [[[1387.0, 1388.0, 1389.0],      [1390.0, 1391.0, 1392.0],      [1393.0, 1394.0, 1395.0],      [1396.0, 1397.0, 1398.0],      [1399.0, 1400.0, 1401.0],      [1402.0, 1403.0, 1404.0],      [1405.0, 1406.0, 1407.0]],     [[1408.0, 1409.0, 1410.0],      [1411.0, 1412.0, 1413.0],      [1414.0, 1415.0, 1416.0],      [1417.0, 1418.0, 1419.0],      [1420.0, 1421.0, 1422.0],      [1423.0, 1424.0, 1425.0],      [1426.0, 1427.0, 1428.0]],     [[1429.0, 1430.0, 1431.0],      [1432.0, 1433.0, 1434.0],      [1435.0, 1436.0, 1437.0],      [1438.0, 1439.0, 1440.0],      [1441.0, 1442.0, 1443.0],      [1444.0, 1445.0, 1446.0],      [1447.0, 1448.0, 1449.0]],     [[1450.0, 1451.0, 1452.0],      [1453.0, 1454.0, 1455.0],      [1456.0, 1457.0, 1458.0],      [1459.0, 1460.0, 1461.0],      [1462.0, 1463.0, 1464.0],      [1465.0, 1466.0, 1467.0],      [1468.0, 1469.0, 1470.0]],     [[1471.0, 1472.0, 1473.0],      [1474.0, 1475.0, 1476.0],      [1477.0, 1478.0, 1479.0],      [1480.0, 1481.0, 1482.0],      [1483.0, 1484.0, 1485.0],      [1486.0, 1487.0, 1488.0],      [1489.0, 1490.0, 1491.0]],     [[1492.0, 1493.0, 1494.0],      [1495.0, 1496.0, 1497.0],      [1498.0, 1499.0, 1500.0],      [1501.0, 1502.0, 1503.0],      [1504.0, 1505.0, 1506.0],      [1507.0, 1508.0, 1509.0],      [1510.0, 1511.0, 1512.0]]]],   [[[[1513.0, 1514.0, 1515.0],      [1516.0, 1517.0, 1518.0],      [1519.0, 1520.0, 1521.0],      [1522.0, 1523.0, 1524.0],      [1525.0, 1526.0, 1527.0],      [1528.0, 1529.0, 1530.0],      [1531.0, 1532.0, 1533.0]],     [[1534.0, 1535.0, 1536.0],      [1537.0, 1538.0, 1539.0],      [1540.0, 1541.0, 1542.0],      [1543.0, 1544.0, 1545.0],      [1546.0, 1547.0, 1548.0],      [1549.0, 1550.0, 1551.0],      [1552.0, 1553.0, 1554.0]],     [[1555.0, 1556.0, 1557.0],      [1558.0, 1559.0, 1560.0],      [1561.0, 1562.0, 1563.0],      [1564.0, 1565.0, 1566.0],      [1567.0, 1568.0, 1569.0],      [1570.0, 1571.0, 1572.0],      [1573.0, 1574.0, 1575.0]],     [[1576.0, 1577.0, 1578.0],      [1579.0, 1580.0, 1581.0],      [1582.0, 1583.0, 1584.0],      [1585.0, 1586.0, 1587.0],      [1588.0, 1589.0, 1590.0],      [1591.0, 1592.0, 1593.0],      [1594.0, 1595.0, 1596.0]],     [[1597.0, 1598.0, 1599.0],      [1600.0, 1601.0, 1602.0],      [1603.0, 1604.0, 1605.0],      [1606.0, 1607.0, 1608.0],      [1609.0, 1610.0, 1611.0],      [1612.0, 1613.0, 1614.0],      [1615.0, 1616.0, 1617.0]],     [[1618.0, 1619.0, 1620.0],      [1621.0, 1622.0, 1623.0],      [1624.0, 1625.0, 1626.0],      [1627.0, 1628.0, 1629.0],      [1630.0, 1631.0, 1632.0],      [1633.0, 1634.0, 1635.0],      [1636.0, 1637.0, 1638.0]]],    [[[1639.0, 1640.0, 1641.0],      [1642.0, 1643.0, 1644.0],      [1645.0, 1646.0, 1647.0],      [1648.0, 1649.0, 1650.0],      [1651.0, 1652.0, 1653.0],      [1654.0, 1655.0, 1656.0],      [1657.0, 1658.0, 1659.0]],     [[1660.0, 1661.0, 1662.0],      [1663.0, 1664.0, 1665.0],      [1666.0, 1667.0, 1668.0],      [1669.0, 1670.0, 1671.0],      [1672.0, 1673.0, 1674.0],      [1675.0, 1676.0, 1677.0],      [1678.0, 1679.0, 1680.0]],     [[1681.0, 1682.0, 1683.0],      [1684.0, 1685.0, 1686.0],      [1687.0, 1688.0, 1689.0],      [1690.0, 1691.0, 1692.0],      [1693.0, 1694.0, 1695.0],      [1696.0, 1697.0, 1698.0],      [1699.0, 1700.0, 1701.0]],     [[1702.0, 1703.0, 1704.0],      [1705.0, 1706.0, 1707.0],      [1708.0, 1709.0, 1710.0],      [1711.0, 1712.0, 1713.0],      [1714.0, 1715.0, 1716.0],      [1717.0, 1718.0, 1719.0],      [1720.0, 1721.0, 1722.0]],     [[1723.0, 1724.0, 1725.0],      [1726.0, 1727.0, 1728.0],      [1729.0, 1730.0, 1731.0],      [1732.0, 1733.0, 1734.0],      [1735.0, 1736.0, 1737.0],      [1738.0, 1739.0, 1740.0],      [1741.0, 1742.0, 1743.0]],     [[1744.0, 1745.0, 1746.0],      [1747.0, 1748.0, 1749.0],      [1750.0, 1751.0, 1752.0],      [1753.0, 1754.0, 1755.0],      [1756.0, 1757.0, 1758.0],      [1759.0, 1760.0, 1761.0],      [1762.0, 1763.0, 1764.0]]],    [[[1765.0, 1766.0, 1767.0],      [1768.0, 1769.0, 1770.0],      [1771.0, 1772.0, 1773.0],      [1774.0, 1775.0, 1776.0],      [1777.0, 1778.0, 1779.0],      [1780.0, 1781.0, 1782.0],      [1783.0, 1784.0, 1785.0]],     [[1786.0, 1787.0, 1788.0],      [1789.0, 1790.0, 1791.0],      [1792.0, 1793.0, 1794.0],      [1795.0, 1796.0, 1797.0],      [1798.0, 1799.0, 1800.0],      [1801.0, 1802.0, 1803.0],      [1804.0, 1805.0, 1806.0]],     [[1807.0, 1808.0, 1809.0],      [1810.0, 1811.0, 1812.0],      [1813.0, 1814.0, 1815.0],      [1816.0, 1817.0, 1818.0],      [1819.0, 1820.0, 1821.0],      [1822.0, 1823.0, 1824.0],      [1825.0, 1826.0, 1827.0]],     [[1828.0, 1829.0, 1830.0],      [1831.0, 1832.0, 1833.0],      [1834.0, 1835.0, 1836.0],      [1837.0, 1838.0, 1839.0],      [1840.0, 1841.0, 1842.0],      [1843.0, 1844.0, 1845.0],      [1846.0, 1847.0, 1848.0]],     [[1849.0, 1850.0, 1851.0],      [1852.0, 1853.0, 1854.0],      [1855.0, 1856.0, 1857.0],      [1858.0, 1859.0, 1860.0],      [1861.0, 1862.0, 1863.0],      [1864.0, 1865.0, 1866.0],      [1867.0, 1868.0, 1869.0]],     [[1870.0, 1871.0, 1872.0],      [1873.0, 1874.0, 1875.0],      [1876.0, 1877.0, 1878.0],      [1879.0, 1880.0, 1881.0],      [1882.0, 1883.0, 1884.0],      [1885.0, 1886.0, 1887.0],      [1888.0, 1889.0, 1890.0]]],    [[[1891.0, 1892.0, 1893.0],      [1894.0, 1895.0, 1896.0],      [1897.0, 1898.0, 1899.0],      [1900.0, 1901.0, 1902.0],      [1903.0, 1904.0, 1905.0],      [1906.0, 1907.0, 1908.0],      [1909.0, 1910.0, 1911.0]],     [[1912.0, 1913.0, 1914.0],      [1915.0, 1916.0, 1917.0],      [1918.0, 1919.0, 1920.0],      [1921.0, 1922.0, 1923.0],      [1924.0, 1925.0, 1926.0],      [1927.0, 1928.0, 1929.0],      [1930.0, 1931.0, 1932.0]],     [[1933.0, 1934.0, 1935.0],      [1936.0, 1937.0, 1938.0],      [1939.0, 1940.0, 1941.0],      [1942.0, 1943.0, 1944.0],      [1945.0, 1946.0, 1947.0],      [1948.0, 1949.0, 1950.0],      [1951.0, 1952.0, 1953.0]],     [[1954.0, 1955.0, 1956.0],      [1957.0, 1958.0, 1959.0],      [1960.0, 1961.0, 1962.0],      [1963.0, 1964.0, 1965.0],      [1966.0, 1967.0, 1968.0],      [1969.0, 1970.0, 1971.0],      [1972.0, 1973.0, 1974.0]],     [[1975.0, 1976.0, 1977.0],      [1978.0, 1979.0, 1980.0],      [1981.0, 1982.0, 1983.0],      [1984.0, 1985.0, 1986.0],      [1987.0, 1988.0, 1989.0],      [1990.0, 1991.0, 1992.0],      [1993.0, 1994.0, 1995.0]],     [[1996.0, 1997.0, 1998.0],      [1999.0, 2000.0, 2001.0],      [2002.0, 2003.0, 2004.0],      [2005.0, 2006.0, 2007.0],      [2008.0, 2009.0, 2010.0],      [2011.0, 2012.0, 2013.0],      [2014.0, 2015.0, 2016.0]]],    [[[2017.0, 2018.0, 2019.0],      [2020.0, 2021.0, 2022.0],      [2023.0, 2024.0, 2025.0],      [2026.0, 2027.0, 2028.0],      [2029.0, 2030.0, 2031.0],      [2032.0, 2033.0, 2034.0],      [2035.0, 2036.0, 2037.0]],     [[2038.0, 2039.0, 2040.0],      [2041.0, 2042.0, 2043.0],      [2044.0, 2045.0, 2046.0],      [2047.0, 2048.0, 2049.0],      [2050.0, 2051.0, 2052.0],      [2053.0, 2054.0, 2055.0],      [2056.0, 2057.0, 2058.0]],     [[2059.0, 2060.0, 2061.0],      [2062.0, 2063.0, 2064.0],      [2065.0, 2066.0, 2067.0],      [2068.0, 2069.0, 2070.0],      [2071.0, 2072.0, 2073.0],      [2074.0, 2075.0, 2076.0],      [2077.0, 2078.0, 2079.0]],     [[2080.0, 2081.0, 2082.0],      [2083.0, 2084.0, 2085.0],      [2086.0, 2087.0, 2088.0],      [2089.0, 2090.0, 2091.0],      [2092.0, 2093.0, 2094.0],      [2095.0, 2096.0, 2097.0],      [2098.0, 2099.0, 2100.0]],     [[2101.0, 2102.0, 2103.0],      [2104.0, 2105.0, 2106.0],      [2107.0, 2108.0, 2109.0],      [2110.0, 2111.0, 2112.0],      [2113.0, 2114.0, 2115.0],      [2116.0, 2117.0, 2118.0],      [2119.0, 2120.0, 2121.0]],     [[2122.0, 2123.0, 2124.0],      [2125.0, 2126.0, 2127.0],      [2128.0, 2129.0, 2130.0],      [2131.0, 2132.0, 2133.0],      [2134.0, 2135.0, 2136.0],      [2137.0, 2138.0, 2139.0],      [2140.0, 2141.0, 2142.0]]],    [[[2143.0, 2144.0, 2145.0],      [2146.0, 2147.0, 2148.0],      [2149.0, 2150.0, 2151.0],      [2152.0, 2153.0, 2154.0],      [2155.0, 2156.0, 2157.0],      [2158.0, 2159.0, 2160.0],      [2161.0, 2162.0, 2163.0]],     [[2164.0, 2165.0, 2166.0],      [2167.0, 2168.0, 2169.0],      [2170.0, 2171.0, 2172.0],      [2173.0, 2174.0, 2175.0],      [2176.0, 2177.0, 2178.0],      [2179.0, 2180.0, 2181.0],      [2182.0, 2183.0, 2184.0]],     [[2185.0, 2186.0, 2187.0],      [2188.0, 2189.0, 2190.0],      [2191.0, 2192.0, 2193.0],      [2194.0, 2195.0, 2196.0],      [2197.0, 2198.0, 2199.0],      [2200.0, 2201.0, 2202.0],      [2203.0, 2204.0, 2205.0]],     [[2206.0, 2207.0, 2208.0],      [2209.0, 2210.0, 2211.0],      [2212.0, 2213.0, 2214.0],      [2215.0, 2216.0, 2217.0],      [2218.0, 2219.0, 2220.0],      [2221.0, 2222.0, 2223.0],      [2224.0, 2225.0, 2226.0]],     [[2227.0, 2228.0, 2229.0],      [2230.0, 2231.0, 2232.0],      [2233.0, 2234.0, 2235.0],      [2236.0, 2237.0, 2238.0],      [2239.0, 2240.0, 2241.0],      [2242.0, 2243.0, 2244.0],      [2245.0, 2246.0, 2247.0]],     [[2248.0, 2249.0, 2250.0],      [2251.0, 2252.0, 2253.0],      [2254.0, 2255.0, 2256.0],      [2257.0, 2258.0, 2259.0],      [2260.0, 2261.0, 2262.0],      [2263.0, 2264.0, 2265.0],      [2266.0, 2267.0, 2268.0]]]],   [[[[2269.0, 2270.0, 2271.0],      [2272.0, 2273.0, 2274.0],      [2275.0, 2276.0, 2277.0],      [2278.0, 2279.0, 2280.0],      [2281.0, 2282.0, 2283.0],      [2284.0, 2285.0, 2286.0],      [2287.0, 2288.0, 2289.0]],     [[2290.0, 2291.0, 2292.0],      [2293.0, 2294.0, 2295.0],      [2296.0, 2297.0, 2298.0],      [2299.0, 2300.0, 2301.0],      [2302.0, 2303.0, 2304.0],      [2305.0, 2306.0, 2307.0],      [2308.0, 2309.0, 2310.0]],     [[2311.0, 2312.0, 2313.0],      [2314.0, 2315.0, 2316.0],      [2317.0, 2318.0, 2319.0],      [2320.0, 2321.0, 2322.0],      [2323.0, 2324.0, 2325.0],      [2326.0, 2327.0, 2328.0],      [2329.0, 2330.0, 2331.0]],     [[2332.0, 2333.0, 2334.0],      [2335.0, 2336.0, 2337.0],      [2338.0, 2339.0, 2340.0],      [2341.0, 2342.0, 2343.0],      [2344.0, 2345.0, 2346.0],      [2347.0, 2348.0, 2349.0],      [2350.0, 2351.0, 2352.0]],     [[2353.0, 2354.0, 2355.0],      [2356.0, 2357.0, 2358.0],      [2359.0, 2360.0, 2361.0],      [2362.0, 2363.0, 2364.0],      [2365.0, 2366.0, 2367.0],      [2368.0, 2369.0, 2370.0],      [2371.0, 2372.0, 2373.0]],     [[2374.0, 2375.0, 2376.0],      [2377.0, 2378.0, 2379.0],      [2380.0, 2381.0, 2382.0],      [2383.0, 2384.0, 2385.0],      [2386.0, 2387.0, 2388.0],      [2389.0, 2390.0, 2391.0],      [2392.0, 2393.0, 2394.0]]],    [[[2395.0, 2396.0, 2397.0],      [2398.0, 2399.0, 2400.0],      [2401.0, 2402.0, 2403.0],      [2404.0, 2405.0, 2406.0],      [2407.0, 2408.0, 2409.0],      [2410.0, 2411.0, 2412.0],      [2413.0, 2414.0, 2415.0]],     [[2416.0, 2417.0, 2418.0],      [2419.0, 2420.0, 2421.0],      [2422.0, 2423.0, 2424.0],      [2425.0, 2426.0, 2427.0],      [2428.0, 2429.0, 2430.0],      [2431.0, 2432.0, 2433.0],      [2434.0, 2435.0, 2436.0]],     [[2437.0, 2438.0, 2439.0],      [2440.0, 2441.0, 2442.0],      [2443.0, 2444.0, 2445.0],      [2446.0, 2447.0, 2448.0],      [2449.0, 2450.0, 2451.0],      [2452.0, 2453.0, 2454.0],      [2455.0, 2456.0, 2457.0]],     [[2458.0, 2459.0, 2460.0],      [2461.0, 2462.0, 2463.0],      [2464.0, 2465.0, 2466.0],      [2467.0, 2468.0, 2469.0],      [2470.0, 2471.0, 2472.0],      [2473.0, 2474.0, 2475.0],      [2476.0, 2477.0, 2478.0]],     [[2479.0, 2480.0, 2481.0],      [2482.0, 2483.0, 2484.0],      [2485.0, 2486.0, 2487.0],      [2488.0, 2489.0, 2490.0],      [2491.0, 2492.0, 2493.0],      [2494.0, 2495.0, 2496.0],      [2497.0, 2498.0, 2499.0]],     [[2500.0, 2501.0, 2502.0],      [2503.0, 2504.0, 2505.0],      [2506.0, 2507.0, 2508.0],      [2509.0, 2510.0, 2511.0],      [2512.0, 2513.0, 2514.0],      [2515.0, 2516.0, 2517.0],      [2518.0, 2519.0, 2520.0]]],    [[[2521.0, 2522.0, 2523.0],      [2524.0, 2525.0, 2526.0],      [2527.0, 2528.0, 2529.0],      [2530.0, 2531.0, 2532.0],      [2533.0, 2534.0, 2535.0],      [2536.0, 2537.0, 2538.0],      [2539.0, 2540.0, 2541.0]],     [[2542.0, 2543.0, 2544.0],      [2545.0, 2546.0, 2547.0],      [2548.0, 2549.0, 2550.0],      [2551.0, 2552.0, 2553.0],      [2554.0, 2555.0, 2556.0],      [2557.0, 2558.0, 2559.0],      [2560.0, 2561.0, 2562.0]],     [[2563.0, 2564.0, 2565.0],      [2566.0, 2567.0, 2568.0],      [2569.0, 2570.0, 2571.0],      [2572.0, 2573.0, 2574.0],      [2575.0, 2576.0, 2577.0],      [2578.0, 2579.0, 2580.0],      [2581.0, 2582.0, 2583.0]],     [[2584.0, 2585.0, 2586.0],      [2587.0, 2588.0, 2589.0],      [2590.0, 2591.0, 2592.0],      [2593.0, 2594.0, 2595.0],      [2596.0, 2597.0, 2598.0],      [2599.0, 2600.0, 2601.0],      [2602.0, 2603.0, 2604.0]],     [[2605.0, 2606.0, 2607.0],      [2608.0, 2609.0, 2610.0],      [2611.0, 2612.0, 2613.0],      [2614.0, 2615.0, 2616.0],      [2617.0, 2618.0, 2619.0],      [2620.0, 2621.0, 2622.0],      [2623.0, 2624.0, 2625.0]],     [[2626.0, 2627.0, 2628.0],      [2629.0, 2630.0, 2631.0],      [2632.0, 2633.0, 2634.0],      [2635.0, 2636.0, 2637.0],      [2638.0, 2639.0, 2640.0],      [2641.0, 2642.0, 2643.0],      [2644.0, 2645.0, 2646.0]]],    [[[2647.0, 2648.0, 2649.0],      [2650.0, 2651.0, 2652.0],      [2653.0, 2654.0, 2655.0],      [2656.0, 2657.0, 2658.0],      [2659.0, 2660.0, 2661.0],      [2662.0, 2663.0, 2664.0],      [2665.0, 2666.0, 2667.0]],     [[2668.0, 2669.0, 2670.0],      [2671.0, 2672.0, 2673.0],      [2674.0, 2675.0, 2676.0],      [2677.0, 2678.0, 2679.0],      [2680.0, 2681.0, 2682.0],      [2683.0, 2684.0, 2685.0],      [2686.0, 2687.0, 2688.0]],     [[2689.0, 2690.0, 2691.0],      [2692.0, 2693.0, 2694.0],      [2695.0, 2696.0, 2697.0],      [2698.0, 2699.0, 2700.0],      [2701.0, 2702.0, 2703.0],      [2704.0, 2705.0, 2706.0],      [2707.0, 2708.0, 2709.0]],     [[2710.0, 2711.0, 2712.0],      [2713.0, 2714.0, 2715.0],      [2716.0, 2717.0, 2718.0],      [2719.0, 2720.0, 2721.0],      [2722.0, 2723.0, 2724.0],      [2725.0, 2726.0, 2727.0],      [2728.0, 2729.0, 2730.0]],     [[2731.0, 2732.0, 2733.0],      [2734.0, 2735.0, 2736.0],      [2737.0, 2738.0, 2739.0],      [2740.0, 2741.0, 2742.0],      [2743.0, 2744.0, 2745.0],      [2746.0, 2747.0, 2748.0],      [2749.0, 2750.0, 2751.0]],     [[2752.0, 2753.0, 2754.0],      [2755.0, 2756.0, 2757.0],      [2758.0, 2759.0, 2760.0],      [2761.0, 2762.0, 2763.0],      [2764.0, 2765.0, 2766.0],      [2767.0, 2768.0, 2769.0],      [2770.0, 2771.0, 2772.0]]],    [[[2773.0, 2774.0, 2775.0],      [2776.0, 2777.0, 2778.0],      [2779.0, 2780.0, 2781.0],      [2782.0, 2783.0, 2784.0],      [2785.0, 2786.0, 2787.0],      [2788.0, 2789.0, 2790.0],      [2791.0, 2792.0, 2793.0]],     [[2794.0, 2795.0, 2796.0],      [2797.0, 2798.0, 2799.0],      [2800.0, 2801.0, 2802.0],      [2803.0, 2804.0, 2805.0],      [2806.0, 2807.0, 2808.0],      [2809.0, 2810.0, 2811.0],      [2812.0, 2813.0, 2814.0]],     [[2815.0, 2816.0, 2817.0],      [2818.0, 2819.0, 2820.0],      [2821.0, 2822.0, 2823.0],      [2824.0, 2825.0, 2826.0],      [2827.0, 2828.0, 2829.0],      [2830.0, 2831.0, 2832.0],      [2833.0, 2834.0, 2835.0]],     [[2836.0, 2837.0, 2838.0],      [2839.0, 2840.0, 2841.0],      [2842.0, 2843.0, 2844.0],      [2845.0, 2846.0, 2847.0],      [2848.0, 2849.0, 2850.0],      [2851.0, 2852.0, 2853.0],      [2854.0, 2855.0, 2856.0]],     [[2857.0, 2858.0, 2859.0],      [2860.0, 2861.0, 2862.0],      [2863.0, 2864.0, 2865.0],      [2866.0, 2867.0, 2868.0],      [2869.0, 2870.0, 2871.0],      [2872.0, 2873.0, 2874.0],      [2875.0, 2876.0, 2877.0]],     [[2878.0, 2879.0, 2880.0],      [2881.0, 2882.0, 2883.0],      [2884.0, 2885.0, 2886.0],      [2887.0, 2888.0, 2889.0],      [2890.0, 2891.0, 2892.0],      [2893.0, 2894.0, 2895.0],      [2896.0, 2897.0, 2898.0]]],    [[[2899.0, 2900.0, 2901.0],      [2902.0, 2903.0, 2904.0],      [2905.0, 2906.0, 2907.0],      [2908.0, 2909.0, 2910.0],      [2911.0, 2912.0, 2913.0],      [2914.0, 2915.0, 2916.0],      [2917.0, 2918.0, 2919.0]],     [[2920.0, 2921.0, 2922.0],      [2923.0, 2924.0, 2925.0],      [2926.0, 2927.0, 2928.0],      [2929.0, 2930.0, 2931.0],      [2932.0, 2933.0, 2934.0],      [2935.0, 2936.0, 2937.0],      [2938.0, 2939.0, 2940.0]],     [[2941.0, 2942.0, 2943.0],      [2944.0, 2945.0, 2946.0],      [2947.0, 2948.0, 2949.0],      [2950.0, 2951.0, 2952.0],      [2953.0, 2954.0, 2955.0],      [2956.0, 2957.0, 2958.0],      [2959.0, 2960.0, 2961.0]],     [[2962.0, 2963.0, 2964.0],      [2965.0, 2966.0, 2967.0],      [2968.0, 2969.0, 2970.0],      [2971.0, 2972.0, 2973.0],      [2974.0, 2975.0, 2976.0],      [2977.0, 2978.0, 2979.0],      [2980.0, 2981.0, 2982.0]],     [[2983.0, 2984.0, 2985.0],      [2986.0, 2987.0, 2988.0],      [2989.0, 2990.0, 2991.0],      [2992.0, 2993.0, 2994.0],      [2995.0, 2996.0, 2997.0],      [2998.0, 2999.0, 3000.0],      [3001.0, 3002.0, 3003.0]],     [[3004.0, 3005.0, 3006.0],      [3007.0, 3008.0, 3009.0],      [3010.0, 3011.0, 3012.0],      [3013.0, 3014.0, 3015.0],      [3016.0, 3017.0, 3018.0],      [3019.0, 3020.0, 3021.0],      [3022.0, 3023.0, 3024.0]]]],   [[[[3025.0, 3026.0, 3027.0],      [3028.0, 3029.0, 3030.0],      [3031.0, 3032.0, 3033.0],      [3034.0, 3035.0, 3036.0],      [3037.0, 3038.0, 3039.0],      [3040.0, 3041.0, 3042.0],      [3043.0, 3044.0, 3045.0]],     [[3046.0, 3047.0, 3048.0],      [3049.0, 3050.0, 3051.0],      [3052.0, 3053.0, 3054.0],      [3055.0, 3056.0, 3057.0],      [3058.0, 3059.0, 3060.0],      [3061.0, 3062.0, 3063.0],      [3064.0, 3065.0, 3066.0]],     [[3067.0, 3068.0, 3069.0],      [3070.0, 3071.0, 3072.0],      [3073.0, 3074.0, 3075.0],      [3076.0, 3077.0, 3078.0],      [3079.0, 3080.0, 3081.0],      [3082.0, 3083.0, 3084.0],      [3085.0, 3086.0, 3087.0]],     [[3088.0, 3089.0, 3090.0],      [3091.0, 3092.0, 3093.0],      [3094.0, 3095.0, 3096.0],      [3097.0, 3098.0, 3099.0],      [3100.0, 3101.0, 3102.0],      [3103.0, 3104.0, 3105.0],      [3106.0, 3107.0, 3108.0]],     [[3109.0, 3110.0, 3111.0],      [3112.0, 3113.0, 3114.0],      [3115.0, 3116.0, 3117.0],      [3118.0, 3119.0, 3120.0],      [3121.0, 3122.0, 3123.0],      [3124.0, 3125.0, 3126.0],      [3127.0, 3128.0, 3129.0]],     [[3130.0, 3131.0, 3132.0],      [3133.0, 3134.0, 3135.0],      [3136.0, 3137.0, 3138.0],      [3139.0, 3140.0, 3141.0],      [3142.0, 3143.0, 3144.0],      [3145.0, 3146.0, 3147.0],      [3148.0, 3149.0, 3150.0]]],    [[[3151.0, 3152.0, 3153.0],      [3154.0, 3155.0, 3156.0],      [3157.0, 3158.0, 3159.0],      [3160.0, 3161.0, 3162.0],      [3163.0, 3164.0, 3165.0],      [3166.0, 3167.0, 3168.0],      [3169.0, 3170.0, 3171.0]],     [[3172.0, 3173.0, 3174.0],      [3175.0, 3176.0, 3177.0],      [3178.0, 3179.0, 3180.0],      [3181.0, 3182.0, 3183.0],      [3184.0, 3185.0, 3186.0],      [3187.0, 3188.0, 3189.0],      [3190.0, 3191.0, 3192.0]],     [[3193.0, 3194.0, 3195.0],      [3196.0, 3197.0, 3198.0],      [3199.0, 3200.0, 3201.0],      [3202.0, 3203.0, 3204.0],      [3205.0, 3206.0, 3207.0],      [3208.0, 3209.0, 3210.0],      [3211.0, 3212.0, 3213.0]],     [[3214.0, 3215.0, 3216.0],      [3217.0, 3218.0, 3219.0],      [3220.0, 3221.0, 3222.0],      [3223.0, 3224.0, 3225.0],      [3226.0, 3227.0, 3228.0],      [3229.0, 3230.0, 3231.0],      [3232.0, 3233.0, 3234.0]],     [[3235.0, 3236.0, 3237.0],      [3238.0, 3239.0, 3240.0],      [3241.0, 3242.0, 3243.0],      [3244.0, 3245.0, 3246.0],      [3247.0, 3248.0, 3249.0],      [3250.0, 3251.0, 3252.0],      [3253.0, 3254.0, 3255.0]],     [[3256.0, 3257.0, 3258.0],      [3259.0, 3260.0, 3261.0],      [3262.0, 3263.0, 3264.0],      [3265.0, 3266.0, 3267.0],      [3268.0, 3269.0, 3270.0],      [3271.0, 3272.0, 3273.0],      [3274.0, 3275.0, 3276.0]]],    [[[3277.0, 3278.0, 3279.0],      [3280.0, 3281.0, 3282.0],      [3283.0, 3284.0, 3285.0],      [3286.0, 3287.0, 3288.0],      [3289.0, 3290.0, 3291.0],      [3292.0, 3293.0, 3294.0],      [3295.0, 3296.0, 3297.0]],     [[3298.0, 3299.0, 3300.0],      [3301.0, 3302.0, 3303.0],      [3304.0, 3305.0, 3306.0],      [3307.0, 3308.0, 3309.0],      [3310.0, 3311.0, 3312.0],      [3313.0, 3314.0, 3315.0],      [3316.0, 3317.0, 3318.0]],     [[3319.0, 3320.0, 3321.0],      [3322.0, 3323.0, 3324.0],      [3325.0, 3326.0, 3327.0],      [3328.0, 3329.0, 3330.0],      [3331.0, 3332.0, 3333.0],      [3334.0, 3335.0, 3336.0],      [3337.0, 3338.0, 3339.0]],     [[3340.0, 3341.0, 3342.0],      [3343.0, 3344.0, 3345.0],      [3346.0, 3347.0, 3348.0],      [3349.0, 3350.0, 3351.0],      [3352.0, 3353.0, 3354.0],      [3355.0, 3356.0, 3357.0],      [3358.0, 3359.0, 3360.0]],     [[3361.0, 3362.0, 3363.0],      [3364.0, 3365.0, 3366.0],      [3367.0, 3368.0, 3369.0],      [3370.0, 3371.0, 3372.0],      [3373.0, 3374.0, 3375.0],      [3376.0, 3377.0, 3378.0],      [3379.0, 3380.0, 3381.0]],     [[3382.0, 3383.0, 3384.0],      [3385.0, 3386.0, 3387.0],      [3388.0, 3389.0, 3390.0],      [3391.0, 3392.0, 3393.0],      [3394.0, 3395.0, 3396.0],      [3397.0, 3398.0, 3399.0],      [3400.0, 3401.0, 3402.0]]],    [[[3403.0, 3404.0, 3405.0],      [3406.0, 3407.0, 3408.0],      [3409.0, 3410.0, 3411.0],      [3412.0, 3413.0, 3414.0],      [3415.0, 3416.0, 3417.0],      [3418.0, 3419.0, 3420.0],      [3421.0, 3422.0, 3423.0]],     [[3424.0, 3425.0, 3426.0],      [3427.0, 3428.0, 3429.0],      [3430.0, 3431.0, 3432.0],      [3433.0, 3434.0, 3435.0],      [3436.0, 3437.0, 3438.0],      [3439.0, 3440.0, 3441.0],      [3442.0, 3443.0, 3444.0]],     [[3445.0, 3446.0, 3447.0],      [3448.0, 3449.0, 3450.0],      [3451.0, 3452.0, 3453.0],      [3454.0, 3455.0, 3456.0],      [3457.0, 3458.0, 3459.0],      [3460.0, 3461.0, 3462.0],      [3463.0, 3464.0, 3465.0]],     [[3466.0, 3467.0, 3468.0],      [3469.0, 3470.0, 3471.0],      [3472.0, 3473.0, 3474.0],      [3475.0, 3476.0, 3477.0],      [3478.0, 3479.0, 3480.0],      [3481.0, 3482.0, 3483.0],      [3484.0, 3485.0, 3486.0]],     [[3487.0, 3488.0, 3489.0],      [3490.0, 3491.0, 3492.0],      [3493.0, 3494.0, 3495.0],      [3496.0, 3497.0, 3498.0],      [3499.0, 3500.0, 3501.0],      [3502.0, 3503.0, 3504.0],      [3505.0, 3506.0, 3507.0]],     [[3508.0, 3509.0, 3510.0],      [3511.0, 3512.0, 3513.0],      [3514.0, 3515.0, 3516.0],      [3517.0, 3518.0, 3519.0],      [3520.0, 3521.0, 3522.0],      [3523.0, 3524.0, 3525.0],      [3526.0, 3527.0, 3528.0]]],    [[[3529.0, 3530.0, 3531.0],      [3532.0, 3533.0, 3534.0],      [3535.0, 3536.0, 3537.0],      [3538.0, 3539.0, 3540.0],      [3541.0, 3542.0, 3543.0],      [3544.0, 3545.0, 3546.0],      [3547.0, 3548.0, 3549.0]],     [[3550.0, 3551.0, 3552.0],      [3553.0, 3554.0, 3555.0],      [3556.0, 3557.0, 3558.0],      [3559.0, 3560.0, 3561.0],      [3562.0, 3563.0, 3564.0],      [3565.0, 3566.0, 3567.0],      [3568.0, 3569.0, 3570.0]],     [[3571.0, 3572.0, 3573.0],      [3574.0, 3575.0, 3576.0],      [3577.0, 3578.0, 3579.0],      [3580.0, 3581.0, 3582.0],      [3583.0, 3584.0, 3585.0],      [3586.0, 3587.0, 3588.0],      [3589.0, 3590.0, 3591.0]],     [[3592.0, 3593.0, 3594.0],      [3595.0, 3596.0, 3597.0],      [3598.0, 3599.0, 3600.0],      [3601.0, 3602.0, 3603.0],      [3604.0, 3605.0, 3606.0],      [3607.0, 3608.0, 3609.0],      [3610.0, 3611.0, 3612.0]],     [[3613.0, 3614.0, 3615.0],      [3616.0, 3617.0, 3618.0],      [3619.0, 3620.0, 3621.0],      [3622.0, 3623.0, 3624.0],      [3625.0, 3626.0, 3627.0],      [3628.0, 3629.0, 3630.0],      [3631.0, 3632.0, 3633.0]],     [[3634.0, 3635.0, 3636.0],      [3637.0, 3638.0, 3639.0],      [3640.0, 3641.0, 3642.0],      [3643.0, 3644.0, 3645.0],      [3646.0, 3647.0, 3648.0],      [3649.0, 3650.0, 3651.0],      [3652.0, 3653.0, 3654.0]]],    [[[3655.0, 3656.0, 3657.0],      [3658.0, 3659.0, 3660.0],      [3661.0, 3662.0, 3663.0],      [3664.0, 3665.0, 3666.0],      [3667.0, 3668.0, 3669.0],      [3670.0, 3671.0, 3672.0],      [3673.0, 3674.0, 3675.0]],     [[3676.0, 3677.0, 3678.0],      [3679.0, 3680.0, 3681.0],      [3682.0, 3683.0, 3684.0],      [3685.0, 3686.0, 3687.0],      [3688.0, 3689.0, 3690.0],      [3691.0, 3692.0, 3693.0],      [3694.0, 3695.0, 3696.0]],     [[3697.0, 3698.0, 3699.0],      [3700.0, 3701.0, 3702.0],      [3703.0, 3704.0, 3705.0],      [3706.0, 3707.0, 3708.0],      [3709.0, 3710.0, 3711.0],      [3712.0, 3713.0, 3714.0],      [3715.0, 3716.0, 3717.0]],     [[3718.0, 3719.0, 3720.0],      [3721.0, 3722.0, 3723.0],      [3724.0, 3725.0, 3726.0],      [3727.0, 3728.0, 3729.0],      [3730.0, 3731.0, 3732.0],      [3733.0, 3734.0, 3735.0],      [3736.0, 3737.0, 3738.0]],     [[3739.0, 3740.0, 3741.0],      [3742.0, 3743.0, 3744.0],      [3745.0, 3746.0, 3747.0],      [3748.0, 3749.0, 3750.0],      [3751.0, 3752.0, 3753.0],      [3754.0, 3755.0, 3756.0],      [3757.0, 3758.0, 3759.0]],     [[3760.0, 3761.0, 3762.0],      [3763.0, 3764.0, 3765.0],      [3766.0, 3767.0, 3768.0],      [3769.0, 3770.0, 3771.0],      [3772.0, 3773.0, 3774.0],      [3775.0, 3776.0, 3777.0],      [3778.0, 3779.0, 3780.0]]]],   [[[[3781.0, 3782.0, 3783.0],      [3784.0, 3785.0, 3786.0],      [3787.0, 3788.0, 3789.0],      [3790.0, 3791.0, 3792.0],      [3793.0, 3794.0, 3795.0],      [3796.0, 3797.0, 3798.0],      [3799.0, 3800.0, 3801.0]],     [[3802.0, 3803.0, 3804.0],      [3805.0, 3806.0, 3807.0],      [3808.0, 3809.0, 3810.0],      [3811.0, 3812.0, 3813.0],      [3814.0, 3815.0, 3816.0],      [3817.0, 3818.0, 3819.0],      [3820.0, 3821.0, 3822.0]],     [[3823.0, 3824.0, 3825.0],      [3826.0, 3827.0, 3828.0],      [3829.0, 3830.0, 3831.0],      [3832.0, 3833.0, 3834.0],      [3835.0, 3836.0, 3837.0],      [3838.0, 3839.0, 3840.0],      [3841.0, 3842.0, 3843.0]],     [[3844.0, 3845.0, 3846.0],      [3847.0, 3848.0, 3849.0],      [3850.0, 3851.0, 3852.0],      [3853.0, 3854.0, 3855.0],      [3856.0, 3857.0, 3858.0],      [3859.0, 3860.0, 3861.0],      [3862.0, 3863.0, 3864.0]],     [[3865.0, 3866.0, 3867.0],      [3868.0, 3869.0, 3870.0],      [3871.0, 3872.0, 3873.0],      [3874.0, 3875.0, 3876.0],      [3877.0, 3878.0, 3879.0],      [3880.0, 3881.0, 3882.0],      [3883.0, 3884.0, 3885.0]],     [[3886.0, 3887.0, 3888.0],      [3889.0, 3890.0, 3891.0],      [3892.0, 3893.0, 3894.0],      [3895.0, 3896.0, 3897.0],      [3898.0, 3899.0, 3900.0],      [3901.0, 3902.0, 3903.0],      [3904.0, 3905.0, 3906.0]]],    [[[3907.0, 3908.0, 3909.0],      [3910.0, 3911.0, 3912.0],      [3913.0, 3914.0, 3915.0],      [3916.0, 3917.0, 3918.0],      [3919.0, 3920.0, 3921.0],      [3922.0, 3923.0, 3924.0],      [3925.0, 3926.0, 3927.0]],     [[3928.0, 3929.0, 3930.0],      [3931.0, 3932.0, 3933.0],      [3934.0, 3935.0, 3936.0],      [3937.0, 3938.0, 3939.0],      [3940.0, 3941.0, 3942.0],      [3943.0, 3944.0, 3945.0],      [3946.0, 3947.0, 3948.0]],     [[3949.0, 3950.0, 3951.0],      [3952.0, 3953.0, 3954.0],      [3955.0, 3956.0, 3957.0],      [3958.0, 3959.0, 3960.0],      [3961.0, 3962.0, 3963.0],      [3964.0, 3965.0, 3966.0],      [3967.0, 3968.0, 3969.0]],     [[3970.0, 3971.0, 3972.0],      [3973.0, 3974.0, 3975.0],      [3976.0, 3977.0, 3978.0],      [3979.0, 3980.0, 3981.0],      [3982.0, 3983.0, 3984.0],      [3985.0, 3986.0, 3987.0],      [3988.0, 3989.0, 3990.0]],     [[3991.0, 3992.0, 3993.0],      [3994.0, 3995.0, 3996.0],      [3997.0, 3998.0, 3999.0],      [4000.0, 4001.0, 4002.0],      [4003.0, 4004.0, 4005.0],      [4006.0, 4007.0, 4008.0],      [4009.0, 4010.0, 4011.0]],     [[4012.0, 4013.0, 4014.0],      [4015.0, 4016.0, 4017.0],      [4018.0, 4019.0, 4020.0],      [4021.0, 4022.0, 4023.0],      [4024.0, 4025.0, 4026.0],      [4027.0, 4028.0, 4029.0],      [4030.0, 4031.0, 4032.0]]],    [[[4033.0, 4034.0, 4035.0],      [4036.0, 4037.0, 4038.0],      [4039.0, 4040.0, 4041.0],      [4042.0, 4043.0, 4044.0],      [4045.0, 4046.0, 4047.0],      [4048.0, 4049.0, 4050.0],      [4051.0, 4052.0, 4053.0]],     [[4054.0, 4055.0, 4056.0],      [4057.0, 4058.0, 4059.0],      [4060.0, 4061.0, 4062.0],      [4063.0, 4064.0, 4065.0],      [4066.0, 4067.0, 4068.0],      [4069.0, 4070.0, 4071.0],      [4072.0, 4073.0, 4074.0]],     [[4075.0, 4076.0, 4077.0],      [4078.0, 4079.0, 4080.0],      [4081.0, 4082.0, 4083.0],      [4084.0, 4085.0, 4086.0],      [4087.0, 4088.0, 4089.0],      [4090.0, 4091.0, 4092.0],      [4093.0, 4094.0, 4095.0]],     [[4096.0, 4097.0, 4098.0],      [4099.0, 4100.0, 4101.0],      [4102.0, 4103.0, 4104.0],      [4105.0, 4106.0, 4107.0],      [4108.0, 4109.0, 4110.0],      [4111.0, 4112.0, 4113.0],      [4114.0, 4115.0, 4116.0]],     [[4117.0, 4118.0, 4119.0],      [4120.0, 4121.0, 4122.0],      [4123.0, 4124.0, 4125.0],      [4126.0, 4127.0, 4128.0],      [4129.0, 4130.0, 4131.0],      [4132.0, 4133.0, 4134.0],      [4135.0, 4136.0, 4137.0]],     [[4138.0, 4139.0, 4140.0],      [4141.0, 4142.0, 4143.0],      [4144.0, 4145.0, 4146.0],      [4147.0, 4148.0, 4149.0],      [4150.0, 4151.0, 4152.0],      [4153.0, 4154.0, 4155.0],      [4156.0, 4157.0, 4158.0]]],    [[[4159.0, 4160.0, 4161.0],      [4162.0, 4163.0, 4164.0],      [4165.0, 4166.0, 4167.0],      [4168.0, 4169.0, 4170.0],      [4171.0, 4172.0, 4173.0],      [4174.0, 4175.0, 4176.0],      [4177.0, 4178.0, 4179.0]],     [[4180.0, 4181.0, 4182.0],      [4183.0, 4184.0, 4185.0],      [4186.0, 4187.0, 4188.0],      [4189.0, 4190.0, 4191.0],      [4192.0, 4193.0, 4194.0],      [4195.0, 4196.0, 4197.0],      [4198.0, 4199.0, 4200.0]],     [[4201.0, 4202.0, 4203.0],      [4204.0, 4205.0, 4206.0],      [4207.0, 4208.0, 4209.0],      [4210.0, 4211.0, 4212.0],      [4213.0, 4214.0, 4215.0],      [4216.0, 4217.0, 4218.0],      [4219.0, 4220.0, 4221.0]],     [[4222.0, 4223.0, 4224.0],      [4225.0, 4226.0, 4227.0],      [4228.0, 4229.0, 4230.0],      [4231.0, 4232.0, 4233.0],      [4234.0, 4235.0, 4236.0],      [4237.0, 4238.0, 4239.0],      [4240.0, 4241.0, 4242.0]],     [[4243.0, 4244.0, 4245.0],      [4246.0, 4247.0, 4248.0],      [4249.0, 4250.0, 4251.0],      [4252.0, 4253.0, 4254.0],      [4255.0, 4256.0, 4257.0],      [4258.0, 4259.0, 4260.0],      [4261.0, 4262.0, 4263.0]],     [[4264.0, 4265.0, 4266.0],      [4267.0, 4268.0, 4269.0],      [4270.0, 4271.0, 4272.0],      [4273.0, 4274.0, 4275.0],      [4276.0, 4277.0, 4278.0],      [4279.0, 4280.0, 4281.0],      [4282.0, 4283.0, 4284.0]]],    [[[4285.0, 4286.0, 4287.0],      [4288.0, 4289.0, 4290.0],      [4291.0, 4292.0, 4293.0],      [4294.0, 4295.0, 4296.0],      [4297.0, 4298.0, 4299.0],      [4300.0, 4301.0, 4302.0],      [4303.0, 4304.0, 4305.0]],     [[4306.0, 4307.0, 4308.0],      [4309.0, 4310.0, 4311.0],      [4312.0, 4313.0, 4314.0],      [4315.0, 4316.0, 4317.0],      [4318.0, 4319.0, 4320.0],      [4321.0, 4322.0, 4323.0],      [4324.0, 4325.0, 4326.0]],     [[4327.0, 4328.0, 4329.0],      [4330.0, 4331.0, 4332.0],      [4333.0, 4334.0, 4335.0],      [4336.0, 4337.0, 4338.0],      [4339.0, 4340.0, 4341.0],      [4342.0, 4343.0, 4344.0],      [4345.0, 4346.0, 4347.0]],     [[4348.0, 4349.0, 4350.0],      [4351.0, 4352.0, 4353.0],      [4354.0, 4355.0, 4356.0],      [4357.0, 4358.0, 4359.0],      [4360.0, 4361.0, 4362.0],      [4363.0, 4364.0, 4365.0],      [4366.0, 4367.0, 4368.0]],     [[4369.0, 4370.0, 4371.0],      [4372.0, 4373.0, 4374.0],      [4375.0, 4376.0, 4377.0],      [4378.0, 4379.0, 4380.0],      [4381.0, 4382.0, 4383.0],      [4384.0, 4385.0, 4386.0],      [4387.0, 4388.0, 4389.0]],     [[4390.0, 4391.0, 4392.0],      [4393.0, 4394.0, 4395.0],      [4396.0, 4397.0, 4398.0],      [4399.0, 4400.0, 4401.0],      [4402.0, 4403.0, 4404.0],      [4405.0, 4406.0, 4407.0],      [4408.0, 4409.0, 4410.0]]],    [[[4411.0, 4412.0, 4413.0],      [4414.0, 4415.0, 4416.0],      [4417.0, 4418.0, 4419.0],      [4420.0, 4421.0, 4422.0],      [4423.0, 4424.0, 4425.0],      [4426.0, 4427.0, 4428.0],      [4429.0, 4430.0, 4431.0]],     [[4432.0, 4433.0, 4434.0],      [4435.0, 4436.0, 4437.0],      [4438.0, 4439.0, 4440.0],      [4441.0, 4442.0, 4443.0],      [4444.0, 4445.0, 4446.0],      [4447.0, 4448.0, 4449.0],      [4450.0, 4451.0, 4452.0]],     [[4453.0, 4454.0, 4455.0],      [4456.0, 4457.0, 4458.0],      [4459.0, 4460.0, 4461.0],      [4462.0, 4463.0, 4464.0],      [4465.0, 4466.0, 4467.0],      [4468.0, 4469.0, 4470.0],      [4471.0, 4472.0, 4473.0]],     [[4474.0, 4475.0, 4476.0],      [4477.0, 4478.0, 4479.0],      [4480.0, 4481.0, 4482.0],      [4483.0, 4484.0, 4485.0],      [4486.0, 4487.0, 4488.0],      [4489.0, 4490.0, 4491.0],      [4492.0, 4493.0, 4494.0]],     [[4495.0, 4496.0, 4497.0],      [4498.0, 4499.0, 4500.0],      [4501.0, 4502.0, 4503.0],      [4504.0, 4505.0, 4506.0],      [4507.0, 4508.0, 4509.0],      [4510.0, 4511.0, 4512.0],      [4513.0, 4514.0, 4515.0]],     [[4516.0, 4517.0, 4518.0],      [4519.0, 4520.0, 4521.0],      [4522.0, 4523.0, 4524.0],      [4525.0, 4526.0, 4527.0],      [4528.0, 4529.0, 4530.0],      [4531.0, 4532.0, 4533.0],      [4534.0, 4535.0, 4536.0]]]]],  [[[[[4537.0, 4538.0, 4539.0],      [4540.0, 4541.0, 4542.0],      [4543.0, 4544.0, 4545.0],      [4546.0, 4547.0, 4548.0],      [4549.0, 4550.0, 4551.0],      [4552.0, 4553.0, 4554.0],      [4555.0, 4556.0, 4557.0]],     [[4558.0, 4559.0, 4560.0],      [4561.0, 4562.0, 4563.0],      [4564.0, 4565.0, 4566.0],      [4567.0, 4568.0, 4569.0],      [4570.0, 4571.0, 4572.0],      [4573.0, 4574.0, 4575.0],      [4576.0, 4577.0, 4578.0]],     [[4579.0, 4580.0, 4581.0],      [4582.0, 4583.0, 4584.0],      [4585.0, 4586.0, 4587.0],      [4588.0, 4589.0, 4590.0],      [4591.0, 4592.0, 4593.0],      [4594.0, 4595.0, 4596.0],      [4597.0, 4598.0, 4599.0]],     [[4600.0, 4601.0, 4602.0],      [4603.0, 4604.0, 4605.0],      [4606.0, 4607.0, 4608.0],      [4609.0, 4610.0, 4611.0],      [4612.0, 4613.0, 4614.0],      [4615.0, 4616.0, 4617.0],      [4618.0, 4619.0, 4620.0]],     [[4621.0, 4622.0, 4623.0],      [4624.0, 4625.0, 4626.0],      [4627.0, 4628.0, 4629.0],      [4630.0, 4631.0, 4632.0],      [4633.0, 4634.0, 4635.0],      [4636.0, 4637.0, 4638.0],      [4639.0, 4640.0, 4641.0]],     [[4642.0, 4643.0, 4644.0],      [4645.0, 4646.0, 4647.0],      [4648.0, 4649.0, 4650.0],      [4651.0, 4652.0, 4653.0],      [4654.0, 4655.0, 4656.0],      [4657.0, 4658.0, 4659.0],      [4660.0, 4661.0, 4662.0]]],    [[[4663.0, 4664.0, 4665.0],      [4666.0, 4667.0, 4668.0],      [4669.0, 4670.0, 4671.0],      [4672.0, 4673.0, 4674.0],      [4675.0, 4676.0, 4677.0],      [4678.0, 4679.0, 4680.0],      [4681.0, 4682.0, 4683.0]],     [[4684.0, 4685.0, 4686.0],      [4687.0, 4688.0, 4689.0],      [4690.0, 4691.0, 4692.0],      [4693.0, 4694.0, 4695.0],      [4696.0, 4697.0, 4698.0],      [4699.0, 4700.0, 4701.0],      [4702.0, 4703.0, 4704.0]],     [[4705.0, 4706.0, 4707.0],      [4708.0, 4709.0, 4710.0],      [4711.0, 4712.0, 4713.0],      [4714.0, 4715.0, 4716.0],      [4717.0, 4718.0, 4719.0],      [4720.0, 4721.0, 4722.0],      [4723.0, 4724.0, 4725.0]],     [[4726.0, 4727.0, 4728.0],      [4729.0, 4730.0, 4731.0],      [4732.0, 4733.0, 4734.0],      [4735.0, 4736.0, 4737.0],      [4738.0, 4739.0, 4740.0],      [4741.0, 4742.0, 4743.0],      [4744.0, 4745.0, 4746.0]],     [[4747.0, 4748.0, 4749.0],      [4750.0, 4751.0, 4752.0],      [4753.0, 4754.0, 4755.0],      [4756.0, 4757.0, 4758.0],      [4759.0, 4760.0, 4761.0],      [4762.0, 4763.0, 4764.0],      [4765.0, 4766.0, 4767.0]],     [[4768.0, 4769.0, 4770.0],      [4771.0, 4772.0, 4773.0],      [4774.0, 4775.0, 4776.0],      [4777.0, 4778.0, 4779.0],      [4780.0, 4781.0, 4782.0],      [4783.0, 4784.0, 4785.0],      [4786.0, 4787.0, 4788.0]]],    [[[4789.0, 4790.0, 4791.0],      [4792.0, 4793.0, 4794.0],      [4795.0, 4796.0, 4797.0],      [4798.0, 4799.0, 4800.0],      [4801.0, 4802.0, 4803.0],      [4804.0, 4805.0, 4806.0],      [4807.0, 4808.0, 4809.0]],     [[4810.0, 4811.0, 4812.0],      [4813.0, 4814.0, 4815.0],      [4816.0, 4817.0, 4818.0],      [4819.0, 4820.0, 4821.0],      [4822.0, 4823.0, 4824.0],      [4825.0, 4826.0, 4827.0],      [4828.0, 4829.0, 4830.0]],     [[4831.0, 4832.0, 4833.0],      [4834.0, 4835.0, 4836.0],      [4837.0, 4838.0, 4839.0],      [4840.0, 4841.0, 4842.0],      [4843.0, 4844.0, 4845.0],      [4846.0, 4847.0, 4848.0],      [4849.0, 4850.0, 4851.0]],     [[4852.0, 4853.0, 4854.0],      [4855.0, 4856.0, 4857.0],      [4858.0, 4859.0, 4860.0],      [4861.0, 4862.0, 4863.0],      [4864.0, 4865.0, 4866.0],      [4867.0, 4868.0, 4869.0],      [4870.0, 4871.0, 4872.0]],     [[4873.0, 4874.0, 4875.0],      [4876.0, 4877.0, 4878.0],      [4879.0, 4880.0, 4881.0],      [4882.0, 4883.0, 4884.0],      [4885.0, 4886.0, 4887.0],      [4888.0, 4889.0, 4890.0],      [4891.0, 4892.0, 4893.0]],     [[4894.0, 4895.0, 4896.0],      [4897.0, 4898.0, 4899.0],      [4900.0, 4901.0, 4902.0],      [4903.0, 4904.0, 4905.0],      [4906.0, 4907.0, 4908.0],      [4909.0, 4910.0, 4911.0],      [4912.0, 4913.0, 4914.0]]],    [[[4915.0, 4916.0, 4917.0],      [4918.0, 4919.0, 4920.0],      [4921.0, 4922.0, 4923.0],      [4924.0, 4925.0, 4926.0],      [4927.0, 4928.0, 4929.0],      [4930.0, 4931.0, 4932.0],      [4933.0, 4934.0, 4935.0]],     [[4936.0, 4937.0, 4938.0],      [4939.0, 4940.0, 4941.0],      [4942.0, 4943.0, 4944.0],      [4945.0, 4946.0, 4947.0],      [4948.0, 4949.0, 4950.0],      [4951.0, 4952.0, 4953.0],      [4954.0, 4955.0, 4956.0]],     [[4957.0, 4958.0, 4959.0],      [4960.0, 4961.0, 4962.0],      [4963.0, 4964.0, 4965.0],      [4966.0, 4967.0, 4968.0],      [4969.0, 4970.0, 4971.0],      [4972.0, 4973.0, 4974.0],      [4975.0, 4976.0, 4977.0]],     [[4978.0, 4979.0, 4980.0],      [4981.0, 4982.0, 4983.0],      [4984.0, 4985.0, 4986.0],      [4987.0, 4988.0, 4989.0],      [4990.0, 4991.0, 4992.0],      [4993.0, 4994.0, 4995.0],      [4996.0, 4997.0, 4998.0]],     [[4999.0, 5000.0, 5001.0],      [5002.0, 5003.0, 5004.0],      [5005.0, 5006.0, 5007.0],      [5008.0, 5009.0, 5010.0],      [5011.0, 5012.0, 5013.0],      [5014.0, 5015.0, 5016.0],      [5017.0, 5018.0, 5019.0]],     [[5020.0, 5021.0, 5022.0],      [5023.0, 5024.0, 5025.0],      [5026.0, 5027.0, 5028.0],      [5029.0, 5030.0, 5031.0],      [5032.0, 5033.0, 5034.0],      [5035.0, 5036.0, 5037.0],      [5038.0, 5039.0, 5040.0]]],    [[[5041.0, 5042.0, 5043.0],      [5044.0, 5045.0, 5046.0],      [5047.0, 5048.0, 5049.0],      [5050.0, 5051.0, 5052.0],      [5053.0, 5054.0, 5055.0],      [5056.0, 5057.0, 5058.0],      [5059.0, 5060.0, 5061.0]],     [[5062.0, 5063.0, 5064.0],      [5065.0, 5066.0, 5067.0],      [5068.0, 5069.0, 5070.0],      [5071.0, 5072.0, 5073.0],      [5074.0, 5075.0, 5076.0],      [5077.0, 5078.0, 5079.0],      [5080.0, 5081.0, 5082.0]],     [[5083.0, 5084.0, 5085.0],      [5086.0, 5087.0, 5088.0],      [5089.0, 5090.0, 5091.0],      [5092.0, 5093.0, 5094.0],      [5095.0, 5096.0, 5097.0],      [5098.0, 5099.0, 5100.0],      [5101.0, 5102.0, 5103.0]],     [[5104.0, 5105.0, 5106.0],      [5107.0, 5108.0, 5109.0],      [5110.0, 5111.0, 5112.0],      [5113.0, 5114.0, 5115.0],      [5116.0, 5117.0, 5118.0],      [5119.0, 5120.0, 5121.0],      [5122.0, 5123.0, 5124.0]],     [[5125.0, 5126.0, 5127.0],      [5128.0, 5129.0, 5130.0],      [5131.0, 5132.0, 5133.0],      [5134.0, 5135.0, 5136.0],      [5137.0, 5138.0, 5139.0],      [5140.0, 5141.0, 5142.0],      [5143.0, 5144.0, 5145.0]],     [[5146.0, 5147.0, 5148.0],      [5149.0, 5150.0, 5151.0],      [5152.0, 5153.0, 5154.0],      [5155.0, 5156.0, 5157.0],      [5158.0, 5159.0, 5160.0],      [5161.0, 5162.0, 5163.0],      [5164.0, 5165.0, 5166.0]]],    [[[5167.0, 5168.0, 5169.0],      [5170.0, 5171.0, 5172.0],      [5173.0, 5174.0, 5175.0],      [5176.0, 5177.0, 5178.0],      [5179.0, 5180.0, 5181.0],      [5182.0, 5183.0, 5184.0],      [5185.0, 5186.0, 5187.0]],     [[5188.0, 5189.0, 5190.0],      [5191.0, 5192.0, 5193.0],      [5194.0, 5195.0, 5196.0],      [5197.0, 5198.0, 5199.0],      [5200.0, 5201.0, 5202.0],      [5203.0, 5204.0, 5205.0],      [5206.0, 5207.0, 5208.0]],     [[5209.0, 5210.0, 5211.0],      [5212.0, 5213.0, 5214.0],      [5215.0, 5216.0, 5217.0],      [5218.0, 5219.0, 5220.0],      [5221.0, 5222.0, 5223.0],      [5224.0, 5225.0, 5226.0],      [5227.0, 5228.0, 5229.0]],     [[5230.0, 5231.0, 5232.0],      [5233.0, 5234.0, 5235.0],      [5236.0, 5237.0, 5238.0],      [5239.0, 5240.0, 5241.0],      [5242.0, 5243.0, 5244.0],      [5245.0, 5246.0, 5247.0],      [5248.0, 5249.0, 5250.0]],     [[5251.0, 5252.0, 5253.0],      [5254.0, 5255.0, 5256.0],      [5257.0, 5258.0, 5259.0],      [5260.0, 5261.0, 5262.0],      [5263.0, 5264.0, 5265.0],      [5266.0, 5267.0, 5268.0],      [5269.0, 5270.0, 5271.0]],     [[5272.0, 5273.0, 5274.0],      [5275.0, 5276.0, 5277.0],      [5278.0, 5279.0, 5280.0],      [5281.0, 5282.0, 5283.0],      [5284.0, 5285.0, 5286.0],      [5287.0, 5288.0, 5289.0],      [5290.0, 5291.0, 5292.0]]]],   [[[[5293.0, 5294.0, 5295.0],      [5296.0, 5297.0, 5298.0],      [5299.0, 5300.0, 5301.0],      [5302.0, 5303.0, 5304.0],      [5305.0, 5306.0, 5307.0],      [5308.0, 5309.0, 5310.0],      [5311.0, 5312.0, 5313.0]],     [[5314.0, 5315.0, 5316.0],      [5317.0, 5318.0, 5319.0],      [5320.0, 5321.0, 5322.0],      [5323.0, 5324.0, 5325.0],      [5326.0, 5327.0, 5328.0],      [5329.0, 5330.0, 5331.0],      [5332.0, 5333.0, 5334.0]],     [[5335.0, 5336.0, 5337.0],      [5338.0, 5339.0, 5340.0],      [5341.0, 5342.0, 5343.0],      [5344.0, 5345.0, 5346.0],      [5347.0, 5348.0, 5349.0],      [5350.0, 5351.0, 5352.0],      [5353.0, 5354.0, 5355.0]],     [[5356.0, 5357.0, 5358.0],      [5359.0, 5360.0, 5361.0],      [5362.0, 5363.0, 5364.0],      [5365.0, 5366.0, 5367.0],      [5368.0, 5369.0, 5370.0],      [5371.0, 5372.0, 5373.0],      [5374.0, 5375.0, 5376.0]],     [[5377.0, 5378.0, 5379.0],      [5380.0, 5381.0, 5382.0],      [5383.0, 5384.0, 5385.0],      [5386.0, 5387.0, 5388.0],      [5389.0, 5390.0, 5391.0],      [5392.0, 5393.0, 5394.0],      [5395.0, 5396.0, 5397.0]],     [[5398.0, 5399.0, 5400.0],      [5401.0, 5402.0, 5403.0],      [5404.0, 5405.0, 5406.0],      [5407.0, 5408.0, 5409.0],      [5410.0, 5411.0, 5412.0],      [5413.0, 5414.0, 5415.0],      [5416.0, 5417.0, 5418.0]]],    [[[5419.0, 5420.0, 5421.0],      [5422.0, 5423.0, 5424.0],      [5425.0, 5426.0, 5427.0],      [5428.0, 5429.0, 5430.0],      [5431.0, 5432.0, 5433.0],      [5434.0, 5435.0, 5436.0],      [5437.0, 5438.0, 5439.0]],     [[5440.0, 5441.0, 5442.0],      [5443.0, 5444.0, 5445.0],      [5446.0, 5447.0, 5448.0],      [5449.0, 5450.0, 5451.0],      [5452.0, 5453.0, 5454.0],      [5455.0, 5456.0, 5457.0],      [5458.0, 5459.0, 5460.0]],     [[5461.0, 5462.0, 5463.0],      [5464.0, 5465.0, 5466.0],      [5467.0, 5468.0, 5469.0],      [5470.0, 5471.0, 5472.0],      [5473.0, 5474.0, 5475.0],      [5476.0, 5477.0, 5478.0],      [5479.0, 5480.0, 5481.0]],     [[5482.0, 5483.0, 5484.0],      [5485.0, 5486.0, 5487.0],      [5488.0, 5489.0, 5490.0],      [5491.0, 5492.0, 5493.0],      [5494.0, 5495.0, 5496.0],      [5497.0, 5498.0, 5499.0],      [5500.0, 5501.0, 5502.0]],     [[5503.0, 5504.0, 5505.0],      [5506.0, 5507.0, 5508.0],      [5509.0, 5510.0, 5511.0],      [5512.0, 5513.0, 5514.0],      [5515.0, 5516.0, 5517.0],      [5518.0, 5519.0, 5520.0],      [5521.0, 5522.0, 5523.0]],     [[5524.0, 5525.0, 5526.0],      [5527.0, 5528.0, 5529.0],      [5530.0, 5531.0, 5532.0],      [5533.0, 5534.0, 5535.0],      [5536.0, 5537.0, 5538.0],      [5539.0, 5540.0, 5541.0],      [5542.0, 5543.0, 5544.0]]],    [[[5545.0, 5546.0, 5547.0],      [5548.0, 5549.0, 5550.0],      [5551.0, 5552.0, 5553.0],      [5554.0, 5555.0, 5556.0],      [5557.0, 5558.0, 5559.0],      [5560.0, 5561.0, 5562.0],      [5563.0, 5564.0, 5565.0]],     [[5566.0, 5567.0, 5568.0],      [5569.0, 5570.0, 5571.0],      [5572.0, 5573.0, 5574.0],      [5575.0, 5576.0, 5577.0],      [5578.0, 5579.0, 5580.0],      [5581.0, 5582.0, 5583.0],      [5584.0, 5585.0, 5586.0]],     [[5587.0, 5588.0, 5589.0],      [5590.0, 5591.0, 5592.0],      [5593.0, 5594.0, 5595.0],      [5596.0, 5597.0, 5598.0],      [5599.0, 5600.0, 5601.0],      [5602.0, 5603.0, 5604.0],      [5605.0, 5606.0, 5607.0]],     [[5608.0, 5609.0, 5610.0],      [5611.0, 5612.0, 5613.0],      [5614.0, 5615.0, 5616.0],      [5617.0, 5618.0, 5619.0],      [5620.0, 5621.0, 5622.0],      [5623.0, 5624.0, 5625.0],      [5626.0, 5627.0, 5628.0]],     [[5629.0, 5630.0, 5631.0],      [5632.0, 5633.0, 5634.0],      [5635.0, 5636.0, 5637.0],      [5638.0, 5639.0, 5640.0],      [5641.0, 5642.0, 5643.0],      [5644.0, 5645.0, 5646.0],      [5647.0, 5648.0, 5649.0]],     [[5650.0, 5651.0, 5652.0],      [5653.0, 5654.0, 5655.0],      [5656.0, 5657.0, 5658.0],      [5659.0, 5660.0, 5661.0],      [5662.0, 5663.0, 5664.0],      [5665.0, 5666.0, 5667.0],      [5668.0, 5669.0, 5670.0]]],    [[[5671.0, 5672.0, 5673.0],      [5674.0, 5675.0, 5676.0],      [5677.0, 5678.0, 5679.0],      [5680.0, 5681.0, 5682.0],      [5683.0, 5684.0, 5685.0],      [5686.0, 5687.0, 5688.0],      [5689.0, 5690.0, 5691.0]],     [[5692.0, 5693.0, 5694.0],      [5695.0, 5696.0, 5697.0],      [5698.0, 5699.0, 5700.0],      [5701.0, 5702.0, 5703.0],      [5704.0, 5705.0, 5706.0],      [5707.0, 5708.0, 5709.0],      [5710.0, 5711.0, 5712.0]],     [[5713.0, 5714.0, 5715.0],      [5716.0, 5717.0, 5718.0],      [5719.0, 5720.0, 5721.0],      [5722.0, 5723.0, 5724.0],      [5725.0, 5726.0, 5727.0],      [5728.0, 5729.0, 5730.0],      [5731.0, 5732.0, 5733.0]],     [[5734.0, 5735.0, 5736.0],      [5737.0, 5738.0, 5739.0],      [5740.0, 5741.0, 5742.0],      [5743.0, 5744.0, 5745.0],      [5746.0, 5747.0, 5748.0],      [5749.0, 5750.0, 5751.0],      [5752.0, 5753.0, 5754.0]],     [[5755.0, 5756.0, 5757.0],      [5758.0, 5759.0, 5760.0],      [5761.0, 5762.0, 5763.0],      [5764.0, 5765.0, 5766.0],      [5767.0, 5768.0, 5769.0],      [5770.0, 5771.0, 5772.0],      [5773.0, 5774.0, 5775.0]],     [[5776.0, 5777.0, 5778.0],      [5779.0, 5780.0, 5781.0],      [5782.0, 5783.0, 5784.0],      [5785.0, 5786.0, 5787.0],      [5788.0, 5789.0, 5790.0],      [5791.0, 5792.0, 5793.0],      [5794.0, 5795.0, 5796.0]]],    [[[5797.0, 5798.0, 5799.0],      [5800.0, 5801.0, 5802.0],      [5803.0, 5804.0, 5805.0],      [5806.0, 5807.0, 5808.0],      [5809.0, 5810.0, 5811.0],      [5812.0, 5813.0, 5814.0],      [5815.0, 5816.0, 5817.0]],     [[5818.0, 5819.0, 5820.0],      [5821.0, 5822.0, 5823.0],      [5824.0, 5825.0, 5826.0],      [5827.0, 5828.0, 5829.0],      [5830.0, 5831.0, 5832.0],      [5833.0, 5834.0, 5835.0],      [5836.0, 5837.0, 5838.0]],     [[5839.0, 5840.0, 5841.0],      [5842.0, 5843.0, 5844.0],      [5845.0, 5846.0, 5847.0],      [5848.0, 5849.0, 5850.0],      [5851.0, 5852.0, 5853.0],      [5854.0, 5855.0, 5856.0],      [5857.0, 5858.0, 5859.0]],     [[5860.0, 5861.0, 5862.0],      [5863.0, 5864.0, 5865.0],      [5866.0, 5867.0, 5868.0],      [5869.0, 5870.0, 5871.0],      [5872.0, 5873.0, 5874.0],      [5875.0, 5876.0, 5877.0],      [5878.0, 5879.0, 5880.0]],     [[5881.0, 5882.0, 5883.0],      [5884.0, 5885.0, 5886.0],      [5887.0, 5888.0, 5889.0],      [5890.0, 5891.0, 5892.0],      [5893.0, 5894.0, 5895.0],      [5896.0, 5897.0, 5898.0],      [5899.0, 5900.0, 5901.0]],     [[5902.0, 5903.0, 5904.0],      [5905.0, 5906.0, 5907.0],      [5908.0, 5909.0, 5910.0],      [5911.0, 5912.0, 5913.0],      [5914.0, 5915.0, 5916.0],      [5917.0, 5918.0, 5919.0],      [5920.0, 5921.0, 5922.0]]],    [[[5923.0, 5924.0, 5925.0],      [5926.0, 5927.0, 5928.0],      [5929.0, 5930.0, 5931.0],      [5932.0, 5933.0, 5934.0],      [5935.0, 5936.0, 5937.0],      [5938.0, 5939.0, 5940.0],      [5941.0, 5942.0, 5943.0]],     [[5944.0, 5945.0, 5946.0],      [5947.0, 5948.0, 5949.0],      [5950.0, 5951.0, 5952.0],      [5953.0, 5954.0, 5955.0],      [5956.0, 5957.0, 5958.0],      [5959.0, 5960.0, 5961.0],      [5962.0, 5963.0, 5964.0]],     [[5965.0, 5966.0, 5967.0],      [5968.0, 5969.0, 5970.0],      [5971.0, 5972.0, 5973.0],      [5974.0, 5975.0, 5976.0],      [5977.0, 5978.0, 5979.0],      [5980.0, 5981.0, 5982.0],      [5983.0, 5984.0, 5985.0]],     [[5986.0, 5987.0, 5988.0],      [5989.0, 5990.0, 5991.0],      [5992.0, 5993.0, 5994.0],      [5995.0, 5996.0, 5997.0],      [5998.0, 5999.0, 6000.0],      [6001.0, 6002.0, 6003.0],      [6004.0, 6005.0, 6006.0]],     [[6007.0, 6008.0, 6009.0],      [6010.0, 6011.0, 6012.0],      [6013.0, 6014.0, 6015.0],      [6016.0, 6017.0, 6018.0],      [6019.0, 6020.0, 6021.0],      [6022.0, 6023.0, 6024.0],      [6025.0, 6026.0, 6027.0]],     [[6028.0, 6029.0, 6030.0],      [6031.0, 6032.0, 6033.0],      [6034.0, 6035.0, 6036.0],      [6037.0, 6038.0, 6039.0],      [6040.0, 6041.0, 6042.0],      [6043.0, 6044.0, 6045.0],      [6046.0, 6047.0, 6048.0]]]],   [[[[6049.0, 6050.0, 6051.0],      [6052.0, 6053.0, 6054.0],      [6055.0, 6056.0, 6057.0],      [6058.0, 6059.0, 6060.0],      [6061.0, 6062.0, 6063.0],      [6064.0, 6065.0, 6066.0],      [6067.0, 6068.0, 6069.0]],     [[6070.0, 6071.0, 6072.0],      [6073.0, 6074.0, 6075.0],      [6076.0, 6077.0, 6078.0],      [6079.0, 6080.0, 6081.0],      [6082.0, 6083.0, 6084.0],      [6085.0, 6086.0, 6087.0],      [6088.0, 6089.0, 6090.0]],     [[6091.0, 6092.0, 6093.0],      [6094.0, 6095.0, 6096.0],      [6097.0, 6098.0, 6099.0],      [6100.0, 6101.0, 6102.0],      [6103.0, 6104.0, 6105.0],      [6106.0, 6107.0, 6108.0],      [6109.0, 6110.0, 6111.0]],     [[6112.0, 6113.0, 6114.0],      [6115.0, 6116.0, 6117.0],      [6118.0, 6119.0, 6120.0],      [6121.0, 6122.0, 6123.0],      [6124.0, 6125.0, 6126.0],      [6127.0, 6128.0, 6129.0],      [6130.0, 6131.0, 6132.0]],     [[6133.0, 6134.0, 6135.0],      [6136.0, 6137.0, 6138.0],      [6139.0, 6140.0, 6141.0],      [6142.0, 6143.0, 6144.0],      [6145.0, 6146.0, 6147.0],      [6148.0, 6149.0, 6150.0],      [6151.0, 6152.0, 6153.0]],     [[6154.0, 6155.0, 6156.0],      [6157.0, 6158.0, 6159.0],      [6160.0, 6161.0, 6162.0],      [6163.0, 6164.0, 6165.0],      [6166.0, 6167.0, 6168.0],      [6169.0, 6170.0, 6171.0],      [6172.0, 6173.0, 6174.0]]],    [[[6175.0, 6176.0, 6177.0],      [6178.0, 6179.0, 6180.0],      [6181.0, 6182.0, 6183.0],      [6184.0, 6185.0, 6186.0],      [6187.0, 6188.0, 6189.0],      [6190.0, 6191.0, 6192.0],      [6193.0, 6194.0, 6195.0]],     [[6196.0, 6197.0, 6198.0],      [6199.0, 6200.0, 6201.0],      [6202.0, 6203.0, 6204.0],      [6205.0, 6206.0, 6207.0],      [6208.0, 6209.0, 6210.0],      [6211.0, 6212.0, 6213.0],      [6214.0, 6215.0, 6216.0]],     [[6217.0, 6218.0, 6219.0],      [6220.0, 6221.0, 6222.0],      [6223.0, 6224.0, 6225.0],      [6226.0, 6227.0, 6228.0],      [6229.0, 6230.0, 6231.0],      [6232.0, 6233.0, 6234.0],      [6235.0, 6236.0, 6237.0]],     [[6238.0, 6239.0, 6240.0],      [6241.0, 6242.0, 6243.0],      [6244.0, 6245.0, 6246.0],      [6247.0, 6248.0, 6249.0],      [6250.0, 6251.0, 6252.0],      [6253.0, 6254.0, 6255.0],      [6256.0, 6257.0, 6258.0]],     [[6259.0, 6260.0, 6261.0],      [6262.0, 6263.0, 6264.0],      [6265.0, 6266.0, 6267.0],      [6268.0, 6269.0, 6270.0],      [6271.0, 6272.0, 6273.0],      [6274.0, 6275.0, 6276.0],      [6277.0, 6278.0, 6279.0]],     [[6280.0, 6281.0, 6282.0],      [6283.0, 6284.0, 6285.0],      [6286.0, 6287.0, 6288.0],      [6289.0, 6290.0, 6291.0],      [6292.0, 6293.0, 6294.0],      [6295.0, 6296.0, 6297.0],      [6298.0, 6299.0, 6300.0]]],    [[[6301.0, 6302.0, 6303.0],      [6304.0, 6305.0, 6306.0],      [6307.0, 6308.0, 6309.0],      [6310.0, 6311.0, 6312.0],      [6313.0, 6314.0, 6315.0],      [6316.0, 6317.0, 6318.0],      [6319.0, 6320.0, 6321.0]],     [[6322.0, 6323.0, 6324.0],      [6325.0, 6326.0, 6327.0],      [6328.0, 6329.0, 6330.0],      [6331.0, 6332.0, 6333.0],      [6334.0, 6335.0, 6336.0],      [6337.0, 6338.0, 6339.0],      [6340.0, 6341.0, 6342.0]],     [[6343.0, 6344.0, 6345.0],      [6346.0, 6347.0, 6348.0],      [6349.0, 6350.0, 6351.0],      [6352.0, 6353.0, 6354.0],      [6355.0, 6356.0, 6357.0],      [6358.0, 6359.0, 6360.0],      [6361.0, 6362.0, 6363.0]],     [[6364.0, 6365.0, 6366.0],      [6367.0, 6368.0, 6369.0],      [6370.0, 6371.0, 6372.0],      [6373.0, 6374.0, 6375.0],      [6376.0, 6377.0, 6378.0],      [6379.0, 6380.0, 6381.0],      [6382.0, 6383.0, 6384.0]],     [[6385.0, 6386.0, 6387.0],      [6388.0, 6389.0, 6390.0],      [6391.0, 6392.0, 6393.0],      [6394.0, 6395.0, 6396.0],      [6397.0, 6398.0, 6399.0],      [6400.0, 6401.0, 6402.0],      [6403.0, 6404.0, 6405.0]],     [[6406.0, 6407.0, 6408.0],      [6409.0, 6410.0, 6411.0],      [6412.0, 6413.0, 6414.0],      [6415.0, 6416.0, 6417.0],      [6418.0, 6419.0, 6420.0],      [6421.0, 6422.0, 6423.0],      [6424.0, 6425.0, 6426.0]]],    [[[6427.0, 6428.0, 6429.0],      [6430.0, 6431.0, 6432.0],      [6433.0, 6434.0, 6435.0],      [6436.0, 6437.0, 6438.0],      [6439.0, 6440.0, 6441.0],      [6442.0, 6443.0, 6444.0],      [6445.0, 6446.0, 6447.0]],     [[6448.0, 6449.0, 6450.0],      [6451.0, 6452.0, 6453.0],      [6454.0, 6455.0, 6456.0],      [6457.0, 6458.0, 6459.0],      [6460.0, 6461.0, 6462.0],      [6463.0, 6464.0, 6465.0],      [6466.0, 6467.0, 6468.0]],     [[6469.0, 6470.0, 6471.0],      [6472.0, 6473.0, 6474.0],      [6475.0, 6476.0, 6477.0],      [6478.0, 6479.0, 6480.0],      [6481.0, 6482.0, 6483.0],      [6484.0, 6485.0, 6486.0],      [6487.0, 6488.0, 6489.0]],     [[6490.0, 6491.0, 6492.0],      [6493.0, 6494.0, 6495.0],      [6496.0, 6497.0, 6498.0],      [6499.0, 6500.0, 6501.0],      [6502.0, 6503.0, 6504.0],      [6505.0, 6506.0, 6507.0],      [6508.0, 6509.0, 6510.0]],     [[6511.0, 6512.0, 6513.0],      [6514.0, 6515.0, 6516.0],      [6517.0, 6518.0, 6519.0],      [6520.0, 6521.0, 6522.0],      [6523.0, 6524.0, 6525.0],      [6526.0, 6527.0, 6528.0],      [6529.0, 6530.0, 6531.0]],     [[6532.0, 6533.0, 6534.0],      [6535.0, 6536.0, 6537.0],      [6538.0, 6539.0, 6540.0],      [6541.0, 6542.0, 6543.0],      [6544.0, 6545.0, 6546.0],      [6547.0, 6548.0, 6549.0],      [6550.0, 6551.0, 6552.0]]],    [[[6553.0, 6554.0, 6555.0],      [6556.0, 6557.0, 6558.0],      [6559.0, 6560.0, 6561.0],      [6562.0, 6563.0, 6564.0],      [6565.0, 6566.0, 6567.0],      [6568.0, 6569.0, 6570.0],      [6571.0, 6572.0, 6573.0]],     [[6574.0, 6575.0, 6576.0],      [6577.0, 6578.0, 6579.0],      [6580.0, 6581.0, 6582.0],      [6583.0, 6584.0, 6585.0],      [6586.0, 6587.0, 6588.0],      [6589.0, 6590.0, 6591.0],      [6592.0, 6593.0, 6594.0]],     [[6595.0, 6596.0, 6597.0],      [6598.0, 6599.0, 6600.0],      [6601.0, 6602.0, 6603.0],      [6604.0, 6605.0, 6606.0],      [6607.0, 6608.0, 6609.0],      [6610.0, 6611.0, 6612.0],      [6613.0, 6614.0, 6615.0]],     [[6616.0, 6617.0, 6618.0],      [6619.0, 6620.0, 6621.0],      [6622.0, 6623.0, 6624.0],      [6625.0, 6626.0, 6627.0],      [6628.0, 6629.0, 6630.0],      [6631.0, 6632.0, 6633.0],      [6634.0, 6635.0, 6636.0]],     [[6637.0, 6638.0, 6639.0],      [6640.0, 6641.0, 6642.0],      [6643.0, 6644.0, 6645.0],      [6646.0, 6647.0, 6648.0],      [6649.0, 6650.0, 6651.0],      [6652.0, 6653.0, 6654.0],      [6655.0, 6656.0, 6657.0]],     [[6658.0, 6659.0, 6660.0],      [6661.0, 6662.0, 6663.0],      [6664.0, 6665.0, 6666.0],      [6667.0, 6668.0, 6669.0],      [6670.0, 6671.0, 6672.0],      [6673.0, 6674.0, 6675.0],      [6676.0, 6677.0, 6678.0]]],    [[[6679.0, 6680.0, 6681.0],      [6682.0, 6683.0, 6684.0],      [6685.0, 6686.0, 6687.0],      [6688.0, 6689.0, 6690.0],      [6691.0, 6692.0, 6693.0],      [6694.0, 6695.0, 6696.0],      [6697.0, 6698.0, 6699.0]],     [[6700.0, 6701.0, 6702.0],      [6703.0, 6704.0, 6705.0],      [6706.0, 6707.0, 6708.0],      [6709.0, 6710.0, 6711.0],      [6712.0, 6713.0, 6714.0],      [6715.0, 6716.0, 6717.0],      [6718.0, 6719.0, 6720.0]],     [[6721.0, 6722.0, 6723.0],      [6724.0, 6725.0, 6726.0],      [6727.0, 6728.0, 6729.0],      [6730.0, 6731.0, 6732.0],      [6733.0, 6734.0, 6735.0],      [6736.0, 6737.0, 6738.0],      [6739.0, 6740.0, 6741.0]],     [[6742.0, 6743.0, 6744.0],      [6745.0, 6746.0, 6747.0],      [6748.0, 6749.0, 6750.0],      [6751.0, 6752.0, 6753.0],      [6754.0, 6755.0, 6756.0],      [6757.0, 6758.0, 6759.0],      [6760.0, 6761.0, 6762.0]],     [[6763.0, 6764.0, 6765.0],      [6766.0, 6767.0, 6768.0],      [6769.0, 6770.0, 6771.0],      [6772.0, 6773.0, 6774.0],      [6775.0, 6776.0, 6777.0],      [6778.0, 6779.0, 6780.0],      [6781.0, 6782.0, 6783.0]],     [[6784.0, 6785.0, 6786.0],      [6787.0, 6788.0, 6789.0],      [6790.0, 6791.0, 6792.0],      [6793.0, 6794.0, 6795.0],      [6796.0, 6797.0, 6798.0],      [6799.0, 6800.0, 6801.0],      [6802.0, 6803.0, 6804.0]]]],   [[[[6805.0, 6806.0, 6807.0],      [6808.0, 6809.0, 6810.0],      [6811.0, 6812.0, 6813.0],      [6814.0, 6815.0, 6816.0],      [6817.0, 6818.0, 6819.0],      [6820.0, 6821.0, 6822.0],      [6823.0, 6824.0, 6825.0]],     [[6826.0, 6827.0, 6828.0],      [6829.0, 6830.0, 6831.0],      [6832.0, 6833.0, 6834.0],      [6835.0, 6836.0, 6837.0],      [6838.0, 6839.0, 6840.0],      [6841.0, 6842.0, 6843.0],      [6844.0, 6845.0, 6846.0]],     [[6847.0, 6848.0, 6849.0],      [6850.0, 6851.0, 6852.0],      [6853.0, 6854.0, 6855.0],      [6856.0, 6857.0, 6858.0],      [6859.0, 6860.0, 6861.0],      [6862.0, 6863.0, 6864.0],      [6865.0, 6866.0, 6867.0]],     [[6868.0, 6869.0, 6870.0],      [6871.0, 6872.0, 6873.0],      [6874.0, 6875.0, 6876.0],      [6877.0, 6878.0, 6879.0],      [6880.0, 6881.0, 6882.0],      [6883.0, 6884.0, 6885.0],      [6886.0, 6887.0, 6888.0]],     [[6889.0, 6890.0, 6891.0],      [6892.0, 6893.0, 6894.0],      [6895.0, 6896.0, 6897.0],      [6898.0, 6899.0, 6900.0],      [6901.0, 6902.0, 6903.0],      [6904.0, 6905.0, 6906.0],      [6907.0, 6908.0, 6909.0]],     [[6910.0, 6911.0, 6912.0],      [6913.0, 6914.0, 6915.0],      [6916.0, 6917.0, 6918.0],      [6919.0, 6920.0, 6921.0],      [6922.0, 6923.0, 6924.0],      [6925.0, 6926.0, 6927.0],      [6928.0, 6929.0, 6930.0]]],    [[[6931.0, 6932.0, 6933.0],      [6934.0, 6935.0, 6936.0],      [6937.0, 6938.0, 6939.0],      [6940.0, 6941.0, 6942.0],      [6943.0, 6944.0, 6945.0],      [6946.0, 6947.0, 6948.0],      [6949.0, 6950.0, 6951.0]],     [[6952.0, 6953.0, 6954.0],      [6955.0, 6956.0, 6957.0],      [6958.0, 6959.0, 6960.0],      [6961.0, 6962.0, 6963.0],      [6964.0, 6965.0, 6966.0],      [6967.0, 6968.0, 6969.0],      [6970.0, 6971.0, 6972.0]],     [[6973.0, 6974.0, 6975.0],      [6976.0, 6977.0, 6978.0],      [6979.0, 6980.0, 6981.0],      [6982.0, 6983.0, 6984.0],      [6985.0, 6986.0, 6987.0],      [6988.0, 6989.0, 6990.0],      [6991.0, 6992.0, 6993.0]],     [[6994.0, 6995.0, 6996.0],      [6997.0, 6998.0, 6999.0],      [7000.0, 7001.0, 7002.0],      [7003.0, 7004.0, 7005.0],      [7006.0, 7007.0, 7008.0],      [7009.0, 7010.0, 7011.0],      [7012.0, 7013.0, 7014.0]],     [[7015.0, 7016.0, 7017.0],      [7018.0, 7019.0, 7020.0],      [7021.0, 7022.0, 7023.0],      [7024.0, 7025.0, 7026.0],      [7027.0, 7028.0, 7029.0],      [7030.0, 7031.0, 7032.0],      [7033.0, 7034.0, 7035.0]],     [[7036.0, 7037.0, 7038.0],      [7039.0, 7040.0, 7041.0],      [7042.0, 7043.0, 7044.0],      [7045.0, 7046.0, 7047.0],      [7048.0, 7049.0, 7050.0],      [7051.0, 7052.0, 7053.0],      [7054.0, 7055.0, 7056.0]]],    [[[7057.0, 7058.0, 7059.0],      [7060.0, 7061.0, 7062.0],      [7063.0, 7064.0, 7065.0],      [7066.0, 7067.0, 7068.0],      [7069.0, 7070.0, 7071.0],      [7072.0, 7073.0, 7074.0],      [7075.0, 7076.0, 7077.0]],     [[7078.0, 7079.0, 7080.0],      [7081.0, 7082.0, 7083.0],      [7084.0, 7085.0, 7086.0],      [7087.0, 7088.0, 7089.0],      [7090.0, 7091.0, 7092.0],      [7093.0, 7094.0, 7095.0],      [7096.0, 7097.0, 7098.0]],     [[7099.0, 7100.0, 7101.0],      [7102.0, 7103.0, 7104.0],      [7105.0, 7106.0, 7107.0],      [7108.0, 7109.0, 7110.0],      [7111.0, 7112.0, 7113.0],      [7114.0, 7115.0, 7116.0],      [7117.0, 7118.0, 7119.0]],     [[7120.0, 7121.0, 7122.0],      [7123.0, 7124.0, 7125.0],      [7126.0, 7127.0, 7128.0],      [7129.0, 7130.0, 7131.0],      [7132.0, 7133.0, 7134.0],      [7135.0, 7136.0, 7137.0],      [7138.0, 7139.0, 7140.0]],     [[7141.0, 7142.0, 7143.0],      [7144.0, 7145.0, 7146.0],      [7147.0, 7148.0, 7149.0],      [7150.0, 7151.0, 7152.0],      [7153.0, 7154.0, 7155.0],      [7156.0, 7157.0, 7158.0],      [7159.0, 7160.0, 7161.0]],     [[7162.0, 7163.0, 7164.0],      [7165.0, 7166.0, 7167.0],      [7168.0, 7169.0, 7170.0],      [7171.0, 7172.0, 7173.0],      [7174.0, 7175.0, 7176.0],      [7177.0, 7178.0, 7179.0],      [7180.0, 7181.0, 7182.0]]],    [[[7183.0, 7184.0, 7185.0],      [7186.0, 7187.0, 7188.0],      [7189.0, 7190.0, 7191.0],      [7192.0, 7193.0, 7194.0],      [7195.0, 7196.0, 7197.0],      [7198.0, 7199.0, 7200.0],      [7201.0, 7202.0, 7203.0]],     [[7204.0, 7205.0, 7206.0],      [7207.0, 7208.0, 7209.0],      [7210.0, 7211.0, 7212.0],      [7213.0, 7214.0, 7215.0],      [7216.0, 7217.0, 7218.0],      [7219.0, 7220.0, 7221.0],      [7222.0, 7223.0, 7224.0]],     [[7225.0, 7226.0, 7227.0],      [7228.0, 7229.0, 7230.0],      [7231.0, 7232.0, 7233.0],      [7234.0, 7235.0, 7236.0],      [7237.0, 7238.0, 7239.0],      [7240.0, 7241.0, 7242.0],      [7243.0, 7244.0, 7245.0]],     [[7246.0, 7247.0, 7248.0],      [7249.0, 7250.0, 7251.0],      [7252.0, 7253.0, 7254.0],      [7255.0, 7256.0, 7257.0],      [7258.0, 7259.0, 7260.0],      [7261.0, 7262.0, 7263.0],      [7264.0, 7265.0, 7266.0]],     [[7267.0, 7268.0, 7269.0],      [7270.0, 7271.0, 7272.0],      [7273.0, 7274.0, 7275.0],      [7276.0, 7277.0, 7278.0],      [7279.0, 7280.0, 7281.0],      [7282.0, 7283.0, 7284.0],      [7285.0, 7286.0, 7287.0]],     [[7288.0, 7289.0, 7290.0],      [7291.0, 7292.0, 7293.0],      [7294.0, 7295.0, 7296.0],      [7297.0, 7298.0, 7299.0],      [7300.0, 7301.0, 7302.0],      [7303.0, 7304.0, 7305.0],      [7306.0, 7307.0, 7308.0]]],    [[[7309.0, 7310.0, 7311.0],      [7312.0, 7313.0, 7314.0],      [7315.0, 7316.0, 7317.0],      [7318.0, 7319.0, 7320.0],      [7321.0, 7322.0, 7323.0],      [7324.0, 7325.0, 7326.0],      [7327.0, 7328.0, 7329.0]],     [[7330.0, 7331.0, 7332.0],      [7333.0, 7334.0, 7335.0],      [7336.0, 7337.0, 7338.0],      [7339.0, 7340.0, 7341.0],      [7342.0, 7343.0, 7344.0],      [7345.0, 7346.0, 7347.0],      [7348.0, 7349.0, 7350.0]],     [[7351.0, 7352.0, 7353.0],      [7354.0, 7355.0, 7356.0],      [7357.0, 7358.0, 7359.0],      [7360.0, 7361.0, 7362.0],      [7363.0, 7364.0, 7365.0],      [7366.0, 7367.0, 7368.0],      [7369.0, 7370.0, 7371.0]],     [[7372.0, 7373.0, 7374.0],      [7375.0, 7376.0, 7377.0],      [7378.0, 7379.0, 7380.0],      [7381.0, 7382.0, 7383.0],      [7384.0, 7385.0, 7386.0],      [7387.0, 7388.0, 7389.0],      [7390.0, 7391.0, 7392.0]],     [[7393.0, 7394.0, 7395.0],      [7396.0, 7397.0, 7398.0],      [7399.0, 7400.0, 7401.0],      [7402.0, 7403.0, 7404.0],      [7405.0, 7406.0, 7407.0],      [7408.0, 7409.0, 7410.0],      [7411.0, 7412.0, 7413.0]],     [[7414.0, 7415.0, 7416.0],      [7417.0, 7418.0, 7419.0],      [7420.0, 7421.0, 7422.0],      [7423.0, 7424.0, 7425.0],      [7426.0, 7427.0, 7428.0],      [7429.0, 7430.0, 7431.0],      [7432.0, 7433.0, 7434.0]]],    [[[7435.0, 7436.0, 7437.0],      [7438.0, 7439.0, 7440.0],      [7441.0, 7442.0, 7443.0],      [7444.0, 7445.0, 7446.0],      [7447.0, 7448.0, 7449.0],      [7450.0, 7451.0, 7452.0],      [7453.0, 7454.0, 7455.0]],     [[7456.0, 7457.0, 7458.0],      [7459.0, 7460.0, 7461.0],      [7462.0, 7463.0, 7464.0],      [7465.0, 7466.0, 7467.0],      [7468.0, 7469.0, 7470.0],      [7471.0, 7472.0, 7473.0],      [7474.0, 7475.0, 7476.0]],     [[7477.0, 7478.0, 7479.0],      [7480.0, 7481.0, 7482.0],      [7483.0, 7484.0, 7485.0],      [7486.0, 7487.0, 7488.0],      [7489.0, 7490.0, 7491.0],      [7492.0, 7493.0, 7494.0],      [7495.0, 7496.0, 7497.0]],     [[7498.0, 7499.0, 7500.0],      [7501.0, 7502.0, 7503.0],      [7504.0, 7505.0, 7506.0],      [7507.0, 7508.0, 7509.0],      [7510.0, 7511.0, 7512.0],      [7513.0, 7514.0, 7515.0],      [7516.0, 7517.0, 7518.0]],     [[7519.0, 7520.0, 7521.0],      [7522.0, 7523.0, 7524.0],      [7525.0, 7526.0, 7527.0],      [7528.0, 7529.0, 7530.0],      [7531.0, 7532.0, 7533.0],      [7534.0, 7535.0, 7536.0],      [7537.0, 7538.0, 7539.0]],     [[7540.0, 7541.0, 7542.0],      [7543.0, 7544.0, 7545.0],      [7546.0, 7547.0, 7548.0],      [7549.0, 7550.0, 7551.0],      [7552.0, 7553.0, 7554.0],      [7555.0, 7556.0, 7557.0],      [7558.0, 7559.0, 7560.0]]]],   [[[[7561.0, 7562.0, 7563.0],      [7564.0, 7565.0, 7566.0],      [7567.0, 7568.0, 7569.0],      [7570.0, 7571.0, 7572.0],      [7573.0, 7574.0, 7575.0],      [7576.0, 7577.0, 7578.0],      [7579.0, 7580.0, 7581.0]],     [[7582.0, 7583.0, 7584.0],      [7585.0, 7586.0, 7587.0],      [7588.0, 7589.0, 7590.0],      [7591.0, 7592.0, 7593.0],      [7594.0, 7595.0, 7596.0],      [7597.0, 7598.0, 7599.0],      [7600.0, 7601.0, 7602.0]],     [[7603.0, 7604.0, 7605.0],      [7606.0, 7607.0, 7608.0],      [7609.0, 7610.0, 7611.0],      [7612.0, 7613.0, 7614.0],      [7615.0, 7616.0, 7617.0],      [7618.0, 7619.0, 7620.0],      [7621.0, 7622.0, 7623.0]],     [[7624.0, 7625.0, 7626.0],      [7627.0, 7628.0, 7629.0],      [7630.0, 7631.0, 7632.0],      [7633.0, 7634.0, 7635.0],      [7636.0, 7637.0, 7638.0],      [7639.0, 7640.0, 7641.0],      [7642.0, 7643.0, 7644.0]],     [[7645.0, 7646.0, 7647.0],      [7648.0, 7649.0, 7650.0],      [7651.0, 7652.0, 7653.0],      [7654.0, 7655.0, 7656.0],      [7657.0, 7658.0, 7659.0],      [7660.0, 7661.0, 7662.0],      [7663.0, 7664.0, 7665.0]],     [[7666.0, 7667.0, 7668.0],      [7669.0, 7670.0, 7671.0],      [7672.0, 7673.0, 7674.0],      [7675.0, 7676.0, 7677.0],      [7678.0, 7679.0, 7680.0],      [7681.0, 7682.0, 7683.0],      [7684.0, 7685.0, 7686.0]]],    [[[7687.0, 7688.0, 7689.0],      [7690.0, 7691.0, 7692.0],      [7693.0, 7694.0, 7695.0],      [7696.0, 7697.0, 7698.0],      [7699.0, 7700.0, 7701.0],      [7702.0, 7703.0, 7704.0],      [7705.0, 7706.0, 7707.0]],     [[7708.0, 7709.0, 7710.0],      [7711.0, 7712.0, 7713.0],      [7714.0, 7715.0, 7716.0],      [7717.0, 7718.0, 7719.0],      [7720.0, 7721.0, 7722.0],      [7723.0, 7724.0, 7725.0],      [7726.0, 7727.0, 7728.0]],     [[7729.0, 7730.0, 7731.0],      [7732.0, 7733.0, 7734.0],      [7735.0, 7736.0, 7737.0],      [7738.0, 7739.0, 7740.0],      [7741.0, 7742.0, 7743.0],      [7744.0, 7745.0, 7746.0],      [7747.0, 7748.0, 7749.0]],     [[7750.0, 7751.0, 7752.0],      [7753.0, 7754.0, 7755.0],      [7756.0, 7757.0, 7758.0],      [7759.0, 7760.0, 7761.0],      [7762.0, 7763.0, 7764.0],      [7765.0, 7766.0, 7767.0],      [7768.0, 7769.0, 7770.0]],     [[7771.0, 7772.0, 7773.0],      [7774.0, 7775.0, 7776.0],      [7777.0, 7778.0, 7779.0],      [7780.0, 7781.0, 7782.0],      [7783.0, 7784.0, 7785.0],      [7786.0, 7787.0, 7788.0],      [7789.0, 7790.0, 7791.0]],     [[7792.0, 7793.0, 7794.0],      [7795.0, 7796.0, 7797.0],      [7798.0, 7799.0, 7800.0],      [7801.0, 7802.0, 7803.0],      [7804.0, 7805.0, 7806.0],      [7807.0, 7808.0, 7809.0],      [7810.0, 7811.0, 7812.0]]],    [[[7813.0, 7814.0, 7815.0],      [7816.0, 7817.0, 7818.0],      [7819.0, 7820.0, 7821.0],      [7822.0, 7823.0, 7824.0],      [7825.0, 7826.0, 7827.0],      [7828.0, 7829.0, 7830.0],      [7831.0, 7832.0, 7833.0]],     [[7834.0, 7835.0, 7836.0],      [7837.0, 7838.0, 7839.0],      [7840.0, 7841.0, 7842.0],      [7843.0, 7844.0, 7845.0],      [7846.0, 7847.0, 7848.0],      [7849.0, 7850.0, 7851.0],      [7852.0, 7853.0, 7854.0]],     [[7855.0, 7856.0, 7857.0],      [7858.0, 7859.0, 7860.0],      [7861.0, 7862.0, 7863.0],      [7864.0, 7865.0, 7866.0],      [7867.0, 7868.0, 7869.0],      [7870.0, 7871.0, 7872.0],      [7873.0, 7874.0, 7875.0]],     [[7876.0, 7877.0, 7878.0],      [7879.0, 7880.0, 7881.0],      [7882.0, 7883.0, 7884.0],      [7885.0, 7886.0, 7887.0],      [7888.0, 7889.0, 7890.0],      [7891.0, 7892.0, 7893.0],      [7894.0, 7895.0, 7896.0]],     [[7897.0, 7898.0, 7899.0],      [7900.0, 7901.0, 7902.0],      [7903.0, 7904.0, 7905.0],      [7906.0, 7907.0, 7908.0],      [7909.0, 7910.0, 7911.0],      [7912.0, 7913.0, 7914.0],      [7915.0, 7916.0, 7917.0]],     [[7918.0, 7919.0, 7920.0],      [7921.0, 7922.0, 7923.0],      [7924.0, 7925.0, 7926.0],      [7927.0, 7928.0, 7929.0],      [7930.0, 7931.0, 7932.0],      [7933.0, 7934.0, 7935.0],      [7936.0, 7937.0, 7938.0]]],    [[[7939.0, 7940.0, 7941.0],      [7942.0, 7943.0, 7944.0],      [7945.0, 7946.0, 7947.0],      [7948.0, 7949.0, 7950.0],      [7951.0, 7952.0, 7953.0],      [7954.0, 7955.0, 7956.0],      [7957.0, 7958.0, 7959.0]],     [[7960.0, 7961.0, 7962.0],      [7963.0, 7964.0, 7965.0],      [7966.0, 7967.0, 7968.0],      [7969.0, 7970.0, 7971.0],      [7972.0, 7973.0, 7974.0],      [7975.0, 7976.0, 7977.0],      [7978.0, 7979.0, 7980.0]],     [[7981.0, 7982.0, 7983.0],      [7984.0, 7985.0, 7986.0],      [7987.0, 7988.0, 7989.0],      [7990.0, 7991.0, 7992.0],      [7993.0, 7994.0, 7995.0],      [7996.0, 7997.0, 7998.0],      [7999.0, 8000.0, 8001.0]],     [[8002.0, 8003.0, 8004.0],      [8005.0, 8006.0, 8007.0],      [8008.0, 8009.0, 8010.0],      [8011.0, 8012.0, 8013.0],      [8014.0, 8015.0, 8016.0],      [8017.0, 8018.0, 8019.0],      [8020.0, 8021.0, 8022.0]],     [[8023.0, 8024.0, 8025.0],      [8026.0, 8027.0, 8028.0],      [8029.0, 8030.0, 8031.0],      [8032.0, 8033.0, 8034.0],      [8035.0, 8036.0, 8037.0],      [8038.0, 8039.0, 8040.0],      [8041.0, 8042.0, 8043.0]],     [[8044.0, 8045.0, 8046.0],      [8047.0, 8048.0, 8049.0],      [8050.0, 8051.0, 8052.0],      [8053.0, 8054.0, 8055.0],      [8056.0, 8057.0, 8058.0],      [8059.0, 8060.0, 8061.0],      [8062.0, 8063.0, 8064.0]]],    [[[8065.0, 8066.0, 8067.0],      [8068.0, 8069.0, 8070.0],      [8071.0, 8072.0, 8073.0],      [8074.0, 8075.0, 8076.0],      [8077.0, 8078.0, 8079.0],      [8080.0, 8081.0, 8082.0],      [8083.0, 8084.0, 8085.0]],     [[8086.0, 8087.0, 8088.0],      [8089.0, 8090.0, 8091.0],      [8092.0, 8093.0, 8094.0],      [8095.0, 8096.0, 8097.0],      [8098.0, 8099.0, 8100.0],      [8101.0, 8102.0, 8103.0],      [8104.0, 8105.0, 8106.0]],     [[8107.0, 8108.0, 8109.0],      [8110.0, 8111.0, 8112.0],      [8113.0, 8114.0, 8115.0],      [8116.0, 8117.0, 8118.0],      [8119.0, 8120.0, 8121.0],      [8122.0, 8123.0, 8124.0],      [8125.0, 8126.0, 8127.0]],     [[8128.0, 8129.0, 8130.0],      [8131.0, 8132.0, 8133.0],      [8134.0, 8135.0, 8136.0],      [8137.0, 8138.0, 8139.0],      [8140.0, 8141.0, 8142.0],      [8143.0, 8144.0, 8145.0],      [8146.0, 8147.0, 8148.0]],     [[8149.0, 8150.0, 8151.0],      [8152.0, 8153.0, 8154.0],      [8155.0, 8156.0, 8157.0],      [8158.0, 8159.0, 8160.0],      [8161.0, 8162.0, 8163.0],      [8164.0, 8165.0, 8166.0],      [8167.0, 8168.0, 8169.0]],     [[8170.0, 8171.0, 8172.0],      [8173.0, 8174.0, 8175.0],      [8176.0, 8177.0, 8178.0],      [8179.0, 8180.0, 8181.0],      [8182.0, 8183.0, 8184.0],      [8185.0, 8186.0, 8187.0],      [8188.0, 8189.0, 8190.0]]],    [[[8191.0, 8192.0, 8193.0],      [8194.0, 8195.0, 8196.0],      [8197.0, 8198.0, 8199.0],      [8200.0, 8201.0, 8202.0],      [8203.0, 8204.0, 8205.0],      [8206.0, 8207.0, 8208.0],      [8209.0, 8210.0, 8211.0]],     [[8212.0, 8213.0, 8214.0],      [8215.0, 8216.0, 8217.0],      [8218.0, 8219.0, 8220.0],      [8221.0, 8222.0, 8223.0],      [8224.0, 8225.0, 8226.0],      [8227.0, 8228.0, 8229.0],      [8230.0, 8231.0, 8232.0]],     [[8233.0, 8234.0, 8235.0],      [8236.0, 8237.0, 8238.0],      [8239.0, 8240.0, 8241.0],      [8242.0, 8243.0, 8244.0],      [8245.0, 8246.0, 8247.0],      [8248.0, 8249.0, 8250.0],      [8251.0, 8252.0, 8253.0]],     [[8254.0, 8255.0, 8256.0],      [8257.0, 8258.0, 8259.0],      [8260.0, 8261.0, 8262.0],      [8263.0, 8264.0, 8265.0],      [8266.0, 8267.0, 8268.0],      [8269.0, 8270.0, 8271.0],      [8272.0, 8273.0, 8274.0]],     [[8275.0, 8276.0, 8277.0],      [8278.0, 8279.0, 8280.0],      [8281.0, 8282.0, 8283.0],      [8284.0, 8285.0, 8286.0],      [8287.0, 8288.0, 8289.0],      [8290.0, 8291.0, 8292.0],      [8293.0, 8294.0, 8295.0]],     [[8296.0, 8297.0, 8298.0],      [8299.0, 8300.0, 8301.0],      [8302.0, 8303.0, 8304.0],      [8305.0, 8306.0, 8307.0],      [8308.0, 8309.0, 8310.0],      [8311.0, 8312.0, 8313.0],      [8314.0, 8315.0, 8316.0]]]],   [[[[8317.0, 8318.0, 8319.0],      [8320.0, 8321.0, 8322.0],      [8323.0, 8324.0, 8325.0],      [8326.0, 8327.0, 8328.0],      [8329.0, 8330.0, 8331.0],      [8332.0, 8333.0, 8334.0],      [8335.0, 8336.0, 8337.0]],     [[8338.0, 8339.0, 8340.0],      [8341.0, 8342.0, 8343.0],      [8344.0, 8345.0, 8346.0],      [8347.0, 8348.0, 8349.0],      [8350.0, 8351.0, 8352.0],      [8353.0, 8354.0, 8355.0],      [8356.0, 8357.0, 8358.0]],     [[8359.0, 8360.0, 8361.0],      [8362.0, 8363.0, 8364.0],      [8365.0, 8366.0, 8367.0],      [8368.0, 8369.0, 8370.0],      [8371.0, 8372.0, 8373.0],      [8374.0, 8375.0, 8376.0],      [8377.0, 8378.0, 8379.0]],     [[8380.0, 8381.0, 8382.0],      [8383.0, 8384.0, 8385.0],      [8386.0, 8387.0, 8388.0],      [8389.0, 8390.0, 8391.0],      [8392.0, 8393.0, 8394.0],      [8395.0, 8396.0, 8397.0],      [8398.0, 8399.0, 8400.0]],     [[8401.0, 8402.0, 8403.0],      [8404.0, 8405.0, 8406.0],      [8407.0, 8408.0, 8409.0],      [8410.0, 8411.0, 8412.0],      [8413.0, 8414.0, 8415.0],      [8416.0, 8417.0, 8418.0],      [8419.0, 8420.0, 8421.0]],     [[8422.0, 8423.0, 8424.0],      [8425.0, 8426.0, 8427.0],      [8428.0, 8429.0, 8430.0],      [8431.0, 8432.0, 8433.0],      [8434.0, 8435.0, 8436.0],      [8437.0, 8438.0, 8439.0],      [8440.0, 8441.0, 8442.0]]],    [[[8443.0, 8444.0, 8445.0],      [8446.0, 8447.0, 8448.0],      [8449.0, 8450.0, 8451.0],      [8452.0, 8453.0, 8454.0],      [8455.0, 8456.0, 8457.0],      [8458.0, 8459.0, 8460.0],      [8461.0, 8462.0, 8463.0]],     [[8464.0, 8465.0, 8466.0],      [8467.0, 8468.0, 8469.0],      [8470.0, 8471.0, 8472.0],      [8473.0, 8474.0, 8475.0],      [8476.0, 8477.0, 8478.0],      [8479.0, 8480.0, 8481.0],      [8482.0, 8483.0, 8484.0]],     [[8485.0, 8486.0, 8487.0],      [8488.0, 8489.0, 8490.0],      [8491.0, 8492.0, 8493.0],      [8494.0, 8495.0, 8496.0],      [8497.0, 8498.0, 8499.0],      [8500.0, 8501.0, 8502.0],      [8503.0, 8504.0, 8505.0]],     [[8506.0, 8507.0, 8508.0],      [8509.0, 8510.0, 8511.0],      [8512.0, 8513.0, 8514.0],      [8515.0, 8516.0, 8517.0],      [8518.0, 8519.0, 8520.0],      [8521.0, 8522.0, 8523.0],      [8524.0, 8525.0, 8526.0]],     [[8527.0, 8528.0, 8529.0],      [8530.0, 8531.0, 8532.0],      [8533.0, 8534.0, 8535.0],      [8536.0, 8537.0, 8538.0],      [8539.0, 8540.0, 8541.0],      [8542.0, 8543.0, 8544.0],      [8545.0, 8546.0, 8547.0]],     [[8548.0, 8549.0, 8550.0],      [8551.0, 8552.0, 8553.0],      [8554.0, 8555.0, 8556.0],      [8557.0, 8558.0, 8559.0],      [8560.0, 8561.0, 8562.0],      [8563.0, 8564.0, 8565.0],      [8566.0, 8567.0, 8568.0]]],    [[[8569.0, 8570.0, 8571.0],      [8572.0, 8573.0, 8574.0],      [8575.0, 8576.0, 8577.0],      [8578.0, 8579.0, 8580.0],      [8581.0, 8582.0, 8583.0],      [8584.0, 8585.0, 8586.0],      [8587.0, 8588.0, 8589.0]],     [[8590.0, 8591.0, 8592.0],      [8593.0, 8594.0, 8595.0],      [8596.0, 8597.0, 8598.0],      [8599.0, 8600.0, 8601.0],      [8602.0, 8603.0, 8604.0],      [8605.0, 8606.0, 8607.0],      [8608.0, 8609.0, 8610.0]],     [[8611.0, 8612.0, 8613.0],      [8614.0, 8615.0, 8616.0],      [8617.0, 8618.0, 8619.0],      [8620.0, 8621.0, 8622.0],      [8623.0, 8624.0, 8625.0],      [8626.0, 8627.0, 8628.0],      [8629.0, 8630.0, 8631.0]],     [[8632.0, 8633.0, 8634.0],      [8635.0, 8636.0, 8637.0],      [8638.0, 8639.0, 8640.0],      [8641.0, 8642.0, 8643.0],      [8644.0, 8645.0, 8646.0],      [8647.0, 8648.0, 8649.0],      [8650.0, 8651.0, 8652.0]],     [[8653.0, 8654.0, 8655.0],      [8656.0, 8657.0, 8658.0],      [8659.0, 8660.0, 8661.0],      [8662.0, 8663.0, 8664.0],      [8665.0, 8666.0, 8667.0],      [8668.0, 8669.0, 8670.0],      [8671.0, 8672.0, 8673.0]],     [[8674.0, 8675.0, 8676.0],      [8677.0, 8678.0, 8679.0],      [8680.0, 8681.0, 8682.0],      [8683.0, 8684.0, 8685.0],      [8686.0, 8687.0, 8688.0],      [8689.0, 8690.0, 8691.0],      [8692.0, 8693.0, 8694.0]]],    [[[8695.0, 8696.0, 8697.0],      [8698.0, 8699.0, 8700.0],      [8701.0, 8702.0, 8703.0],      [8704.0, 8705.0, 8706.0],      [8707.0, 8708.0, 8709.0],      [8710.0, 8711.0, 8712.0],      [8713.0, 8714.0, 8715.0]],     [[8716.0, 8717.0, 8718.0],      [8719.0, 8720.0, 8721.0],      [8722.0, 8723.0, 8724.0],      [8725.0, 8726.0, 8727.0],      [8728.0, 8729.0, 8730.0],      [8731.0, 8732.0, 8733.0],      [8734.0, 8735.0, 8736.0]],     [[8737.0, 8738.0, 8739.0],      [8740.0, 8741.0, 8742.0],      [8743.0, 8744.0, 8745.0],      [8746.0, 8747.0, 8748.0],      [8749.0, 8750.0, 8751.0],      [8752.0, 8753.0, 8754.0],      [8755.0, 8756.0, 8757.0]],     [[8758.0, 8759.0, 8760.0],      [8761.0, 8762.0, 8763.0],      [8764.0, 8765.0, 8766.0],      [8767.0, 8768.0, 8769.0],      [8770.0, 8771.0, 8772.0],      [8773.0, 8774.0, 8775.0],      [8776.0, 8777.0, 8778.0]],     [[8779.0, 8780.0, 8781.0],      [8782.0, 8783.0, 8784.0],      [8785.0, 8786.0, 8787.0],      [8788.0, 8789.0, 8790.0],      [8791.0, 8792.0, 8793.0],      [8794.0, 8795.0, 8796.0],      [8797.0, 8798.0, 8799.0]],     [[8800.0, 8801.0, 8802.0],      [8803.0, 8804.0, 8805.0],      [8806.0, 8807.0, 8808.0],      [8809.0, 8810.0, 8811.0],      [8812.0, 8813.0, 8814.0],      [8815.0, 8816.0, 8817.0],      [8818.0, 8819.0, 8820.0]]],    [[[8821.0, 8822.0, 8823.0],      [8824.0, 8825.0, 8826.0],      [8827.0, 8828.0, 8829.0],      [8830.0, 8831.0, 8832.0],      [8833.0, 8834.0, 8835.0],      [8836.0, 8837.0, 8838.0],      [8839.0, 8840.0, 8841.0]],     [[8842.0, 8843.0, 8844.0],      [8845.0, 8846.0, 8847.0],      [8848.0, 8849.0, 8850.0],      [8851.0, 8852.0, 8853.0],      [8854.0, 8855.0, 8856.0],      [8857.0, 8858.0, 8859.0],      [8860.0, 8861.0, 8862.0]],     [[8863.0, 8864.0, 8865.0],      [8866.0, 8867.0, 8868.0],      [8869.0, 8870.0, 8871.0],      [8872.0, 8873.0, 8874.0],      [8875.0, 8876.0, 8877.0],      [8878.0, 8879.0, 8880.0],      [8881.0, 8882.0, 8883.0]],     [[8884.0, 8885.0, 8886.0],      [8887.0, 8888.0, 8889.0],      [8890.0, 8891.0, 8892.0],      [8893.0, 8894.0, 8895.0],      [8896.0, 8897.0, 8898.0],      [8899.0, 8900.0, 8901.0],      [8902.0, 8903.0, 8904.0]],     [[8905.0, 8906.0, 8907.0],      [8908.0, 8909.0, 8910.0],      [8911.0, 8912.0, 8913.0],      [8914.0, 8915.0, 8916.0],      [8917.0, 8918.0, 8919.0],      [8920.0, 8921.0, 8922.0],      [8923.0, 8924.0, 8925.0]],     [[8926.0, 8927.0, 8928.0],      [8929.0, 8930.0, 8931.0],      [8932.0, 8933.0, 8934.0],      [8935.0, 8936.0, 8937.0],      [8938.0, 8939.0, 8940.0],      [8941.0, 8942.0, 8943.0],      [8944.0, 8945.0, 8946.0]]],    [[[8947.0, 8948.0, 8949.0],      [8950.0, 8951.0, 8952.0],      [8953.0, 8954.0, 8955.0],      [8956.0, 8957.0, 8958.0],      [8959.0, 8960.0, 8961.0],      [8962.0, 8963.0, 8964.0],      [8965.0, 8966.0, 8967.0]],     [[8968.0, 8969.0, 8970.0],      [8971.0, 8972.0, 8973.0],      [8974.0, 8975.0, 8976.0],      [8977.0, 8978.0, 8979.0],      [8980.0, 8981.0, 8982.0],      [8983.0, 8984.0, 8985.0],      [8986.0, 8987.0, 8988.0]],     [[8989.0, 8990.0, 8991.0],      [8992.0, 8993.0, 8994.0],      [8995.0, 8996.0, 8997.0],      [8998.0, 8999.0, 9000.0],      [9001.0, 9002.0, 9003.0],      [9004.0, 9005.0, 9006.0],      [9007.0, 9008.0, 9009.0]],     [[9010.0, 9011.0, 9012.0],      [9013.0, 9014.0, 9015.0],      [9016.0, 9017.0, 9018.0],      [9019.0, 9020.0, 9021.0],      [9022.0, 9023.0, 9024.0],      [9025.0, 9026.0, 9027.0],      [9028.0, 9029.0, 9030.0]],     [[9031.0, 9032.0, 9033.0],      [9034.0, 9035.0, 9036.0],      [9037.0, 9038.0, 9039.0],      [9040.0, 9041.0, 9042.0],      [9043.0, 9044.0, 9045.0],      [9046.0, 9047.0, 9048.0],      [9049.0, 9050.0, 9051.0]],     [[9052.0, 9053.0, 9054.0],      [9055.0, 9056.0, 9057.0],      [9058.0, 9059.0, 9060.0],      [9061.0, 9062.0, 9063.0],      [9064.0, 9065.0, 9066.0],      [9067.0, 9068.0, 9069.0],      [9070.0, 9071.0, 9072.0]]]]],  [[[[[9073.0, 9074.0, 9075.0],      [9076.0, 9077.0, 9078.0],      [9079.0, 9080.0, 9081.0],      [9082.0, 9083.0, 9084.0],      [9085.0, 9086.0, 9087.0],      [9088.0, 9089.0, 9090.0],      [9091.0, 9092.0, 9093.0]],     [[9094.0, 9095.0, 9096.0],      [9097.0, 9098.0, 9099.0],      [9100.0, 9101.0, 9102.0],      [9103.0, 9104.0, 9105.0],      [9106.0, 9107.0, 9108.0],      [9109.0, 9110.0, 9111.0],      [9112.0, 9113.0, 9114.0]],     [[9115.0, 9116.0, 9117.0],      [9118.0, 9119.0, 9120.0],      [9121.0, 9122.0, 9123.0],      [9124.0, 9125.0, 9126.0],      [9127.0, 9128.0, 9129.0],      [9130.0, 9131.0, 9132.0],      [9133.0, 9134.0, 9135.0]],     [[9136.0, 9137.0, 9138.0],      [9139.0, 9140.0, 9141.0],      [9142.0, 9143.0, 9144.0],      [9145.0, 9146.0, 9147.0],      [9148.0, 9149.0, 9150.0],      [9151.0, 9152.0, 9153.0],      [9154.0, 9155.0, 9156.0]],     [[9157.0, 9158.0, 9159.0],      [9160.0, 9161.0, 9162.0],      [9163.0, 9164.0, 9165.0],      [9166.0, 9167.0, 9168.0],      [9169.0, 9170.0, 9171.0],      [9172.0, 9173.0, 9174.0],      [9175.0, 9176.0, 9177.0]],     [[9178.0, 9179.0, 9180.0],      [9181.0, 9182.0, 9183.0],      [9184.0, 9185.0, 9186.0],      [9187.0, 9188.0, 9189.0],      [9190.0, 9191.0, 9192.0],      [9193.0, 9194.0, 9195.0],      [9196.0, 9197.0, 9198.0]]],    [[[9199.0, 9200.0, 9201.0],      [9202.0, 9203.0, 9204.0],      [9205.0, 9206.0, 9207.0],      [9208.0, 9209.0, 9210.0],      [9211.0, 9212.0, 9213.0],      [9214.0, 9215.0, 9216.0],      [9217.0, 9218.0, 9219.0]],     [[9220.0, 9221.0, 9222.0],      [9223.0, 9224.0, 9225.0],      [9226.0, 9227.0, 9228.0],      [9229.0, 9230.0, 9231.0],      [9232.0, 9233.0, 9234.0],      [9235.0, 9236.0, 9237.0],      [9238.0, 9239.0, 9240.0]],     [[9241.0, 9242.0, 9243.0],      [9244.0, 9245.0, 9246.0],      [9247.0, 9248.0, 9249.0],      [9250.0, 9251.0, 9252.0],      [9253.0, 9254.0, 9255.0],      [9256.0, 9257.0, 9258.0],      [9259.0, 9260.0, 9261.0]],     [[9262.0, 9263.0, 9264.0],      [9265.0, 9266.0, 9267.0],      [9268.0, 9269.0, 9270.0],      [9271.0, 9272.0, 9273.0],      [9274.0, 9275.0, 9276.0],      [9277.0, 9278.0, 9279.0],      [9280.0, 9281.0, 9282.0]],     [[9283.0, 9284.0, 9285.0],      [9286.0, 9287.0, 9288.0],      [9289.0, 9290.0, 9291.0],      [9292.0, 9293.0, 9294.0],      [9295.0, 9296.0, 9297.0],      [9298.0, 9299.0, 9300.0],      [9301.0, 9302.0, 9303.0]],     [[9304.0, 9305.0, 9306.0],      [9307.0, 9308.0, 9309.0],      [9310.0, 9311.0, 9312.0],      [9313.0, 9314.0, 9315.0],      [9316.0, 9317.0, 9318.0],      [9319.0, 9320.0, 9321.0],      [9322.0, 9323.0, 9324.0]]],    [[[9325.0, 9326.0, 9327.0],      [9328.0, 9329.0, 9330.0],      [9331.0, 9332.0, 9333.0],      [9334.0, 9335.0, 9336.0],      [9337.0, 9338.0, 9339.0],      [9340.0, 9341.0, 9342.0],      [9343.0, 9344.0, 9345.0]],     [[9346.0, 9347.0, 9348.0],      [9349.0, 9350.0, 9351.0],      [9352.0, 9353.0, 9354.0],      [9355.0, 9356.0, 9357.0],      [9358.0, 9359.0, 9360.0],      [9361.0, 9362.0, 9363.0],      [9364.0, 9365.0, 9366.0]],     [[9367.0, 9368.0, 9369.0],      [9370.0, 9371.0, 9372.0],      [9373.0, 9374.0, 9375.0],      [9376.0, 9377.0, 9378.0],      [9379.0, 9380.0, 9381.0],      [9382.0, 9383.0, 9384.0],      [9385.0, 9386.0, 9387.0]],     [[9388.0, 9389.0, 9390.0],      [9391.0, 9392.0, 9393.0],      [9394.0, 9395.0, 9396.0],      [9397.0, 9398.0, 9399.0],      [9400.0, 9401.0, 9402.0],      [9403.0, 9404.0, 9405.0],      [9406.0, 9407.0, 9408.0]],     [[9409.0, 9410.0, 9411.0],      [9412.0, 9413.0, 9414.0],      [9415.0, 9416.0, 9417.0],      [9418.0, 9419.0, 9420.0],      [9421.0, 9422.0, 9423.0],      [9424.0, 9425.0, 9426.0],      [9427.0, 9428.0, 9429.0]],     [[9430.0, 9431.0, 9432.0],      [9433.0, 9434.0, 9435.0],      [9436.0, 9437.0, 9438.0],      [9439.0, 9440.0, 9441.0],      [9442.0, 9443.0, 9444.0],      [9445.0, 9446.0, 9447.0],      [9448.0, 9449.0, 9450.0]]],    [[[9451.0, 9452.0, 9453.0],      [9454.0, 9455.0, 9456.0],      [9457.0, 9458.0, 9459.0],      [9460.0, 9461.0, 9462.0],      [9463.0, 9464.0, 9465.0],      [9466.0, 9467.0, 9468.0],      [9469.0, 9470.0, 9471.0]],     [[9472.0, 9473.0, 9474.0],      [9475.0, 9476.0, 9477.0],      [9478.0, 9479.0, 9480.0],      [9481.0, 9482.0, 9483.0],      [9484.0, 9485.0, 9486.0],      [9487.0, 9488.0, 9489.0],      [9490.0, 9491.0, 9492.0]],     [[9493.0, 9494.0, 9495.0],      [9496.0, 9497.0, 9498.0],      [9499.0, 9500.0, 9501.0],      [9502.0, 9503.0, 9504.0],      [9505.0, 9506.0, 9507.0],      [9508.0, 9509.0, 9510.0],      [9511.0, 9512.0, 9513.0]],     [[9514.0, 9515.0, 9516.0],      [9517.0, 9518.0, 9519.0],      [9520.0, 9521.0, 9522.0],      [9523.0, 9524.0, 9525.0],      [9526.0, 9527.0, 9528.0],      [9529.0, 9530.0, 9531.0],      [9532.0, 9533.0, 9534.0]],     [[9535.0, 9536.0, 9537.0],      [9538.0, 9539.0, 9540.0],      [9541.0, 9542.0, 9543.0],      [9544.0, 9545.0, 9546.0],      [9547.0, 9548.0, 9549.0],      [9550.0, 9551.0, 9552.0],      [9553.0, 9554.0, 9555.0]],     [[9556.0, 9557.0, 9558.0],      [9559.0, 9560.0, 9561.0],      [9562.0, 9563.0, 9564.0],      [9565.0, 9566.0, 9567.0],      [9568.0, 9569.0, 9570.0],      [9571.0, 9572.0, 9573.0],      [9574.0, 9575.0, 9576.0]]],    [[[9577.0, 9578.0, 9579.0],      [9580.0, 9581.0, 9582.0],      [9583.0, 9584.0, 9585.0],      [9586.0, 9587.0, 9588.0],      [9589.0, 9590.0, 9591.0],      [9592.0, 9593.0, 9594.0],      [9595.0, 9596.0, 9597.0]],     [[9598.0, 9599.0, 9600.0],      [9601.0, 9602.0, 9603.0],      [9604.0, 9605.0, 9606.0],      [9607.0, 9608.0, 9609.0],      [9610.0, 9611.0, 9612.0],      [9613.0, 9614.0, 9615.0],      [9616.0, 9617.0, 9618.0]],     [[9619.0, 9620.0, 9621.0],      [9622.0, 9623.0, 9624.0],      [9625.0, 9626.0, 9627.0],      [9628.0, 9629.0, 9630.0],      [9631.0, 9632.0, 9633.0],      [9634.0, 9635.0, 9636.0],      [9637.0, 9638.0, 9639.0]],     [[9640.0, 9641.0, 9642.0],      [9643.0, 9644.0, 9645.0],      [9646.0, 9647.0, 9648.0],      [9649.0, 9650.0, 9651.0],      [9652.0, 9653.0, 9654.0],      [9655.0, 9656.0, 9657.0],      [9658.0, 9659.0, 9660.0]],     [[9661.0, 9662.0, 9663.0],      [9664.0, 9665.0, 9666.0],      [9667.0, 9668.0, 9669.0],      [9670.0, 9671.0, 9672.0],      [9673.0, 9674.0, 9675.0],      [9676.0, 9677.0, 9678.0],      [9679.0, 9680.0, 9681.0]],     [[9682.0, 9683.0, 9684.0],      [9685.0, 9686.0, 9687.0],      [9688.0, 9689.0, 9690.0],      [9691.0, 9692.0, 9693.0],      [9694.0, 9695.0, 9696.0],      [9697.0, 9698.0, 9699.0],      [9700.0, 9701.0, 9702.0]]],    [[[9703.0, 9704.0, 9705.0],      [9706.0, 9707.0, 9708.0],      [9709.0, 9710.0, 9711.0],      [9712.0, 9713.0, 9714.0],      [9715.0, 9716.0, 9717.0],      [9718.0, 9719.0, 9720.0],      [9721.0, 9722.0, 9723.0]],     [[9724.0, 9725.0, 9726.0],      [9727.0, 9728.0, 9729.0],      [9730.0, 9731.0, 9732.0],      [9733.0, 9734.0, 9735.0],      [9736.0, 9737.0, 9738.0],      [9739.0, 9740.0, 9741.0],      [9742.0, 9743.0, 9744.0]],     [[9745.0, 9746.0, 9747.0],      [9748.0, 9749.0, 9750.0],      [9751.0, 9752.0, 9753.0],      [9754.0, 9755.0, 9756.0],      [9757.0, 9758.0, 9759.0],      [9760.0, 9761.0, 9762.0],      [9763.0, 9764.0, 9765.0]],     [[9766.0, 9767.0, 9768.0],      [9769.0, 9770.0, 9771.0],      [9772.0, 9773.0, 9774.0],      [9775.0, 9776.0, 9777.0],      [9778.0, 9779.0, 9780.0],      [9781.0, 9782.0, 9783.0],      [9784.0, 9785.0, 9786.0]],     [[9787.0, 9788.0, 9789.0],      [9790.0, 9791.0, 9792.0],      [9793.0, 9794.0, 9795.0],      [9796.0, 9797.0, 9798.0],      [9799.0, 9800.0, 9801.0],      [9802.0, 9803.0, 9804.0],      [9805.0, 9806.0, 9807.0]],     [[9808.0, 9809.0, 9810.0],      [9811.0, 9812.0, 9813.0],      [9814.0, 9815.0, 9816.0],      [9817.0, 9818.0, 9819.0],      [9820.0, 9821.0, 9822.0],      [9823.0, 9824.0, 9825.0],      [9826.0, 9827.0, 9828.0]]]],   [[[[9829.0, 9830.0, 9831.0],      [9832.0, 9833.0, 9834.0],      [9835.0, 9836.0, 9837.0],      [9838.0, 9839.0, 9840.0],      [9841.0, 9842.0, 9843.0],      [9844.0, 9845.0, 9846.0],      [9847.0, 9848.0, 9849.0]],     [[9850.0, 9851.0, 9852.0],      [9853.0, 9854.0, 9855.0],      [9856.0, 9857.0, 9858.0],      [9859.0, 9860.0, 9861.0],      [9862.0, 9863.0, 9864.0],      [9865.0, 9866.0, 9867.0],      [9868.0, 9869.0, 9870.0]],     [[9871.0, 9872.0, 9873.0],      [9874.0, 9875.0, 9876.0],      [9877.0, 9878.0, 9879.0],      [9880.0, 9881.0, 9882.0],      [9883.0, 9884.0, 9885.0],      [9886.0, 9887.0, 9888.0],      [9889.0, 9890.0, 9891.0]],     [[9892.0, 9893.0, 9894.0],      [9895.0, 9896.0, 9897.0],      [9898.0, 9899.0, 9900.0],      [9901.0, 9902.0, 9903.0],      [9904.0, 9905.0, 9906.0],      [9907.0, 9908.0, 9909.0],      [9910.0, 9911.0, 9912.0]],     [[9913.0, 9914.0, 9915.0],      [9916.0, 9917.0, 9918.0],      [9919.0, 9920.0, 9921.0],      [9922.0, 9923.0, 9924.0],      [9925.0, 9926.0, 9927.0],      [9928.0, 9929.0, 9930.0],      [9931.0, 9932.0, 9933.0]],     [[9934.0, 9935.0, 9936.0],      [9937.0, 9938.0, 9939.0],      [9940.0, 9941.0, 9942.0],      [9943.0, 9944.0, 9945.0],      [9946.0, 9947.0, 9948.0],      [9949.0, 9950.0, 9951.0],      [9952.0, 9953.0, 9954.0]]],    [[[9955.0, 9956.0, 9957.0],      [9958.0, 9959.0, 9960.0],      [9961.0, 9962.0, 9963.0],      [9964.0, 9965.0, 9966.0],      [9967.0, 9968.0, 9969.0],      [9970.0, 9971.0, 9972.0],      [9973.0, 9974.0, 9975.0]],     [[9976.0, 9977.0, 9978.0],      [9979.0, 9980.0, 9981.0],      [9982.0, 9983.0, 9984.0],      [9985.0, 9986.0, 9987.0],      [9988.0, 9989.0, 9990.0],      [9991.0, 9992.0, 9993.0],      [9994.0, 9995.0, 9996.0]],     [[9997.0, 9998.0, 9999.0],      [10000.0, 10001.0, 10002.0],      [10003.0, 10004.0, 10005.0],      [10006.0, 10007.0, 10008.0],      [10009.0, 10010.0, 10011.0],      [10012.0, 10013.0, 10014.0],      [10015.0, 10016.0, 10017.0]],     [[10018.0, 10019.0, 10020.0],      [10021.0, 10022.0, 10023.0],      [10024.0, 10025.0, 10026.0],      [10027.0, 10028.0, 10029.0],      [10030.0, 10031.0, 10032.0],      [10033.0, 10034.0, 10035.0],      [10036.0, 10037.0, 10038.0]],     [[10039.0, 10040.0, 10041.0],      [10042.0, 10043.0, 10044.0],      [10045.0, 10046.0, 10047.0],      [10048.0, 10049.0, 10050.0],      [10051.0, 10052.0, 10053.0],      [10054.0, 10055.0, 10056.0],      [10057.0, 10058.0, 10059.0]],     [[10060.0, 10061.0, 10062.0],      [10063.0, 10064.0, 10065.0],      [10066.0, 10067.0, 10068.0],      [10069.0, 10070.0, 10071.0],      [10072.0, 10073.0, 10074.0],      [10075.0, 10076.0, 10077.0],      [10078.0, 10079.0, 10080.0]]],    [[[10081.0, 10082.0, 10083.0],      [10084.0, 10085.0, 10086.0],      [10087.0, 10088.0, 10089.0],      [10090.0, 10091.0, 10092.0],      [10093.0, 10094.0, 10095.0],      [10096.0, 10097.0, 10098.0],      [10099.0, 10100.0, 10101.0]],     [[10102.0, 10103.0, 10104.0],      [10105.0, 10106.0, 10107.0],      [10108.0, 10109.0, 10110.0],      [10111.0, 10112.0, 10113.0],      [10114.0, 10115.0, 10116.0],      [10117.0, 10118.0, 10119.0],      [10120.0, 10121.0, 10122.0]],     [[10123.0, 10124.0, 10125.0],      [10126.0, 10127.0, 10128.0],      [10129.0, 10130.0, 10131.0],      [10132.0, 10133.0, 10134.0],      [10135.0, 10136.0, 10137.0],      [10138.0, 10139.0, 10140.0],      [10141.0, 10142.0, 10143.0]],     [[10144.0, 10145.0, 10146.0],      [10147.0, 10148.0, 10149.0],      [10150.0, 10151.0, 10152.0],      [10153.0, 10154.0, 10155.0],      [10156.0, 10157.0, 10158.0],      [10159.0, 10160.0, 10161.0],      [10162.0, 10163.0, 10164.0]],     [[10165.0, 10166.0, 10167.0],      [10168.0, 10169.0, 10170.0],      [10171.0, 10172.0, 10173.0],      [10174.0, 10175.0, 10176.0],      [10177.0, 10178.0, 10179.0],      [10180.0, 10181.0, 10182.0],      [10183.0, 10184.0, 10185.0]],     [[10186.0, 10187.0, 10188.0],      [10189.0, 10190.0, 10191.0],      [10192.0, 10193.0, 10194.0],      [10195.0, 10196.0, 10197.0],      [10198.0, 10199.0, 10200.0],      [10201.0, 10202.0, 10203.0],      [10204.0, 10205.0, 10206.0]]],    [[[10207.0, 10208.0, 10209.0],      [10210.0, 10211.0, 10212.0],      [10213.0, 10214.0, 10215.0],      [10216.0, 10217.0, 10218.0],      [10219.0, 10220.0, 10221.0],      [10222.0, 10223.0, 10224.0],      [10225.0, 10226.0, 10227.0]],     [[10228.0, 10229.0, 10230.0],      [10231.0, 10232.0, 10233.0],      [10234.0, 10235.0, 10236.0],      [10237.0, 10238.0, 10239.0],      [10240.0, 10241.0, 10242.0],      [10243.0, 10244.0, 10245.0],      [10246.0, 10247.0, 10248.0]],     [[10249.0, 10250.0, 10251.0],      [10252.0, 10253.0, 10254.0],      [10255.0, 10256.0, 10257.0],      [10258.0, 10259.0, 10260.0],      [10261.0, 10262.0, 10263.0],      [10264.0, 10265.0, 10266.0],      [10267.0, 10268.0, 10269.0]],     [[10270.0, 10271.0, 10272.0],      [10273.0, 10274.0, 10275.0],      [10276.0, 10277.0, 10278.0],      [10279.0, 10280.0, 10281.0],      [10282.0, 10283.0, 10284.0],      [10285.0, 10286.0, 10287.0],      [10288.0, 10289.0, 10290.0]],     [[10291.0, 10292.0, 10293.0],      [10294.0, 10295.0, 10296.0],      [10297.0, 10298.0, 10299.0],      [10300.0, 10301.0, 10302.0],      [10303.0, 10304.0, 10305.0],      [10306.0, 10307.0, 10308.0],      [10309.0, 10310.0, 10311.0]],     [[10312.0, 10313.0, 10314.0],      [10315.0, 10316.0, 10317.0],      [10318.0, 10319.0, 10320.0],      [10321.0, 10322.0, 10323.0],      [10324.0, 10325.0, 10326.0],      [10327.0, 10328.0, 10329.0],      [10330.0, 10331.0, 10332.0]]],    [[[10333.0, 10334.0, 10335.0],      [10336.0, 10337.0, 10338.0],      [10339.0, 10340.0, 10341.0],      [10342.0, 10343.0, 10344.0],      [10345.0, 10346.0, 10347.0],      [10348.0, 10349.0, 10350.0],      [10351.0, 10352.0, 10353.0]],     [[10354.0, 10355.0, 10356.0],      [10357.0, 10358.0, 10359.0],      [10360.0, 10361.0, 10362.0],      [10363.0, 10364.0, 10365.0],      [10366.0, 10367.0, 10368.0],      [10369.0, 10370.0, 10371.0],      [10372.0, 10373.0, 10374.0]],     [[10375.0, 10376.0, 10377.0],      [10378.0, 10379.0, 10380.0],      [10381.0, 10382.0, 10383.0],      [10384.0, 10385.0, 10386.0],      [10387.0, 10388.0, 10389.0],      [10390.0, 10391.0, 10392.0],      [10393.0, 10394.0, 10395.0]],     [[10396.0, 10397.0, 10398.0],      [10399.0, 10400.0, 10401.0],      [10402.0, 10403.0, 10404.0],      [10405.0, 10406.0, 10407.0],      [10408.0, 10409.0, 10410.0],      [10411.0, 10412.0, 10413.0],      [10414.0, 10415.0, 10416.0]],     [[10417.0, 10418.0, 10419.0],      [10420.0, 10421.0, 10422.0],      [10423.0, 10424.0, 10425.0],      [10426.0, 10427.0, 10428.0],      [10429.0, 10430.0, 10431.0],      [10432.0, 10433.0, 10434.0],      [10435.0, 10436.0, 10437.0]],     [[10438.0, 10439.0, 10440.0],      [10441.0, 10442.0, 10443.0],      [10444.0, 10445.0, 10446.0],      [10447.0, 10448.0, 10449.0],      [10450.0, 10451.0, 10452.0],      [10453.0, 10454.0, 10455.0],      [10456.0, 10457.0, 10458.0]]],    [[[10459.0, 10460.0, 10461.0],      [10462.0, 10463.0, 10464.0],      [10465.0, 10466.0, 10467.0],      [10468.0, 10469.0, 10470.0],      [10471.0, 10472.0, 10473.0],      [10474.0, 10475.0, 10476.0],      [10477.0, 10478.0, 10479.0]],     [[10480.0, 10481.0, 10482.0],      [10483.0, 10484.0, 10485.0],      [10486.0, 10487.0, 10488.0],      [10489.0, 10490.0, 10491.0],      [10492.0, 10493.0, 10494.0],      [10495.0, 10496.0, 10497.0],      [10498.0, 10499.0, 10500.0]],     [[10501.0, 10502.0, 10503.0],      [10504.0, 10505.0, 10506.0],      [10507.0, 10508.0, 10509.0],      [10510.0, 10511.0, 10512.0],      [10513.0, 10514.0, 10515.0],      [10516.0, 10517.0, 10518.0],      [10519.0, 10520.0, 10521.0]],     [[10522.0, 10523.0, 10524.0],      [10525.0, 10526.0, 10527.0],      [10528.0, 10529.0, 10530.0],      [10531.0, 10532.0, 10533.0],      [10534.0, 10535.0, 10536.0],      [10537.0, 10538.0, 10539.0],      [10540.0, 10541.0, 10542.0]],     [[10543.0, 10544.0, 10545.0],      [10546.0, 10547.0, 10548.0],      [10549.0, 10550.0, 10551.0],      [10552.0, 10553.0, 10554.0],      [10555.0, 10556.0, 10557.0],      [10558.0, 10559.0, 10560.0],      [10561.0, 10562.0, 10563.0]],     [[10564.0, 10565.0, 10566.0],      [10567.0, 10568.0, 10569.0],      [10570.0, 10571.0, 10572.0],      [10573.0, 10574.0, 10575.0],      [10576.0, 10577.0, 10578.0],      [10579.0, 10580.0, 10581.0],      [10582.0, 10583.0, 10584.0]]]],   [[[[10585.0, 10586.0, 10587.0],      [10588.0, 10589.0, 10590.0],      [10591.0, 10592.0, 10593.0],      [10594.0, 10595.0, 10596.0],      [10597.0, 10598.0, 10599.0],      [10600.0, 10601.0, 10602.0],      [10603.0, 10604.0, 10605.0]],     [[10606.0, 10607.0, 10608.0],      [10609.0, 10610.0, 10611.0],      [10612.0, 10613.0, 10614.0],      [10615.0, 10616.0, 10617.0],      [10618.0, 10619.0, 10620.0],      [10621.0, 10622.0, 10623.0],      [10624.0, 10625.0, 10626.0]],     [[10627.0, 10628.0, 10629.0],      [10630.0, 10631.0, 10632.0],      [10633.0, 10634.0, 10635.0],      [10636.0, 10637.0, 10638.0],      [10639.0, 10640.0, 10641.0],      [10642.0, 10643.0, 10644.0],      [10645.0, 10646.0, 10647.0]],     [[10648.0, 10649.0, 10650.0],      [10651.0, 10652.0, 10653.0],      [10654.0, 10655.0, 10656.0],      [10657.0, 10658.0, 10659.0],      [10660.0, 10661.0, 10662.0],      [10663.0, 10664.0, 10665.0],      [10666.0, 10667.0, 10668.0]],     [[10669.0, 10670.0, 10671.0],      [10672.0, 10673.0, 10674.0],      [10675.0, 10676.0, 10677.0],      [10678.0, 10679.0, 10680.0],      [10681.0, 10682.0, 10683.0],      [10684.0, 10685.0, 10686.0],      [10687.0, 10688.0, 10689.0]],     [[10690.0, 10691.0, 10692.0],      [10693.0, 10694.0, 10695.0],      [10696.0, 10697.0, 10698.0],      [10699.0, 10700.0, 10701.0],      [10702.0, 10703.0, 10704.0],      [10705.0, 10706.0, 10707.0],      [10708.0, 10709.0, 10710.0]]],    [[[10711.0, 10712.0, 10713.0],      [10714.0, 10715.0, 10716.0],      [10717.0, 10718.0, 10719.0],      [10720.0, 10721.0, 10722.0],      [10723.0, 10724.0, 10725.0],      [10726.0, 10727.0, 10728.0],      [10729.0, 10730.0, 10731.0]],     [[10732.0, 10733.0, 10734.0],      [10735.0, 10736.0, 10737.0],      [10738.0, 10739.0, 10740.0],      [10741.0, 10742.0, 10743.0],      [10744.0, 10745.0, 10746.0],      [10747.0, 10748.0, 10749.0],      [10750.0, 10751.0, 10752.0]],     [[10753.0, 10754.0, 10755.0],      [10756.0, 10757.0, 10758.0],      [10759.0, 10760.0, 10761.0],      [10762.0, 10763.0, 10764.0],      [10765.0, 10766.0, 10767.0],      [10768.0, 10769.0, 10770.0],      [10771.0, 10772.0, 10773.0]],     [[10774.0, 10775.0, 10776.0],      [10777.0, 10778.0, 10779.0],      [10780.0, 10781.0, 10782.0],      [10783.0, 10784.0, 10785.0],      [10786.0, 10787.0, 10788.0],      [10789.0, 10790.0, 10791.0],      [10792.0, 10793.0, 10794.0]],     [[10795.0, 10796.0, 10797.0],      [10798.0, 10799.0, 10800.0],      [10801.0, 10802.0, 10803.0],      [10804.0, 10805.0, 10806.0],      [10807.0, 10808.0, 10809.0],      [10810.0, 10811.0, 10812.0],      [10813.0, 10814.0, 10815.0]],     [[10816.0, 10817.0, 10818.0],      [10819.0, 10820.0, 10821.0],      [10822.0, 10823.0, 10824.0],      [10825.0, 10826.0, 10827.0],      [10828.0, 10829.0, 10830.0],      [10831.0, 10832.0, 10833.0],      [10834.0, 10835.0, 10836.0]]],    [[[10837.0, 10838.0, 10839.0],      [10840.0, 10841.0, 10842.0],      [10843.0, 10844.0, 10845.0],      [10846.0, 10847.0, 10848.0],      [10849.0, 10850.0, 10851.0],      [10852.0, 10853.0, 10854.0],      [10855.0, 10856.0, 10857.0]],     [[10858.0, 10859.0, 10860.0],      [10861.0, 10862.0, 10863.0],      [10864.0, 10865.0, 10866.0],      [10867.0, 10868.0, 10869.0],      [10870.0, 10871.0, 10872.0],      [10873.0, 10874.0, 10875.0],      [10876.0, 10877.0, 10878.0]],     [[10879.0, 10880.0, 10881.0],      [10882.0, 10883.0, 10884.0],      [10885.0, 10886.0, 10887.0],      [10888.0, 10889.0, 10890.0],      [10891.0, 10892.0, 10893.0],      [10894.0, 10895.0, 10896.0],      [10897.0, 10898.0, 10899.0]],     [[10900.0, 10901.0, 10902.0],      [10903.0, 10904.0, 10905.0],      [10906.0, 10907.0, 10908.0],      [10909.0, 10910.0, 10911.0],      [10912.0, 10913.0, 10914.0],      [10915.0, 10916.0, 10917.0],      [10918.0, 10919.0, 10920.0]],     [[10921.0, 10922.0, 10923.0],      [10924.0, 10925.0, 10926.0],      [10927.0, 10928.0, 10929.0],      [10930.0, 10931.0, 10932.0],      [10933.0, 10934.0, 10935.0],      [10936.0, 10937.0, 10938.0],      [10939.0, 10940.0, 10941.0]],     [[10942.0, 10943.0, 10944.0],      [10945.0, 10946.0, 10947.0],      [10948.0, 10949.0, 10950.0],      [10951.0, 10952.0, 10953.0],      [10954.0, 10955.0, 10956.0],      [10957.0, 10958.0, 10959.0],      [10960.0, 10961.0, 10962.0]]],    [[[10963.0, 10964.0, 10965.0],      [10966.0, 10967.0, 10968.0],      [10969.0, 10970.0, 10971.0],      [10972.0, 10973.0, 10974.0],      [10975.0, 10976.0, 10977.0],      [10978.0, 10979.0, 10980.0],      [10981.0, 10982.0, 10983.0]],     [[10984.0, 10985.0, 10986.0],      [10987.0, 10988.0, 10989.0],      [10990.0, 10991.0, 10992.0],      [10993.0, 10994.0, 10995.0],      [10996.0, 10997.0, 10998.0],      [10999.0, 11000.0, 11001.0],      [11002.0, 11003.0, 11004.0]],     [[11005.0, 11006.0, 11007.0],      [11008.0, 11009.0, 11010.0],      [11011.0, 11012.0, 11013.0],      [11014.0, 11015.0, 11016.0],      [11017.0, 11018.0, 11019.0],      [11020.0, 11021.0, 11022.0],      [11023.0, 11024.0, 11025.0]],     [[11026.0, 11027.0, 11028.0],      [11029.0, 11030.0, 11031.0],      [11032.0, 11033.0, 11034.0],      [11035.0, 11036.0, 11037.0],      [11038.0, 11039.0, 11040.0],      [11041.0, 11042.0, 11043.0],      [11044.0, 11045.0, 11046.0]],     [[11047.0, 11048.0, 11049.0],      [11050.0, 11051.0, 11052.0],      [11053.0, 11054.0, 11055.0],      [11056.0, 11057.0, 11058.0],      [11059.0, 11060.0, 11061.0],      [11062.0, 11063.0, 11064.0],      [11065.0, 11066.0, 11067.0]],     [[11068.0, 11069.0, 11070.0],      [11071.0, 11072.0, 11073.0],      [11074.0, 11075.0, 11076.0],      [11077.0, 11078.0, 11079.0],      [11080.0, 11081.0, 11082.0],      [11083.0, 11084.0, 11085.0],      [11086.0, 11087.0, 11088.0]]],    [[[11089.0, 11090.0, 11091.0],      [11092.0, 11093.0, 11094.0],      [11095.0, 11096.0, 11097.0],      [11098.0, 11099.0, 11100.0],      [11101.0, 11102.0, 11103.0],      [11104.0, 11105.0, 11106.0],      [11107.0, 11108.0, 11109.0]],     [[11110.0, 11111.0, 11112.0],      [11113.0, 11114.0, 11115.0],      [11116.0, 11117.0, 11118.0],      [11119.0, 11120.0, 11121.0],      [11122.0, 11123.0, 11124.0],      [11125.0, 11126.0, 11127.0],      [11128.0, 11129.0, 11130.0]],     [[11131.0, 11132.0, 11133.0],      [11134.0, 11135.0, 11136.0],      [11137.0, 11138.0, 11139.0],      [11140.0, 11141.0, 11142.0],      [11143.0, 11144.0, 11145.0],      [11146.0, 11147.0, 11148.0],      [11149.0, 11150.0, 11151.0]],     [[11152.0, 11153.0, 11154.0],      [11155.0, 11156.0, 11157.0],      [11158.0, 11159.0, 11160.0],      [11161.0, 11162.0, 11163.0],      [11164.0, 11165.0, 11166.0],      [11167.0, 11168.0, 11169.0],      [11170.0, 11171.0, 11172.0]],     [[11173.0, 11174.0, 11175.0],      [11176.0, 11177.0, 11178.0],      [11179.0, 11180.0, 11181.0],      [11182.0, 11183.0, 11184.0],      [11185.0, 11186.0, 11187.0],      [11188.0, 11189.0, 11190.0],      [11191.0, 11192.0, 11193.0]],     [[11194.0, 11195.0, 11196.0],      [11197.0, 11198.0, 11199.0],      [11200.0, 11201.0, 11202.0],      [11203.0, 11204.0, 11205.0],      [11206.0, 11207.0, 11208.0],      [11209.0, 11210.0, 11211.0],      [11212.0, 11213.0, 11214.0]]],    [[[11215.0, 11216.0, 11217.0],      [11218.0, 11219.0, 11220.0],      [11221.0, 11222.0, 11223.0],      [11224.0, 11225.0, 11226.0],      [11227.0, 11228.0, 11229.0],      [11230.0, 11231.0, 11232.0],      [11233.0, 11234.0, 11235.0]],     [[11236.0, 11237.0, 11238.0],      [11239.0, 11240.0, 11241.0],      [11242.0, 11243.0, 11244.0],      [11245.0, 11246.0, 11247.0],      [11248.0, 11249.0, 11250.0],      [11251.0, 11252.0, 11253.0],      [11254.0, 11255.0, 11256.0]],     [[11257.0, 11258.0, 11259.0],      [11260.0, 11261.0, 11262.0],      [11263.0, 11264.0, 11265.0],      [11266.0, 11267.0, 11268.0],      [11269.0, 11270.0, 11271.0],      [11272.0, 11273.0, 11274.0],      [11275.0, 11276.0, 11277.0]],     [[11278.0, 11279.0, 11280.0],      [11281.0, 11282.0, 11283.0],      [11284.0, 11285.0, 11286.0],      [11287.0, 11288.0, 11289.0],      [11290.0, 11291.0, 11292.0],      [11293.0, 11294.0, 11295.0],      [11296.0, 11297.0, 11298.0]],     [[11299.0, 11300.0, 11301.0],      [11302.0, 11303.0, 11304.0],      [11305.0, 11306.0, 11307.0],      [11308.0, 11309.0, 11310.0],      [11311.0, 11312.0, 11313.0],      [11314.0, 11315.0, 11316.0],      [11317.0, 11318.0, 11319.0]],     [[11320.0, 11321.0, 11322.0],      [11323.0, 11324.0, 11325.0],      [11326.0, 11327.0, 11328.0],      [11329.0, 11330.0, 11331.0],      [11332.0, 11333.0, 11334.0],      [11335.0, 11336.0, 11337.0],      [11338.0, 11339.0, 11340.0]]]],   [[[[11341.0, 11342.0, 11343.0],      [11344.0, 11345.0, 11346.0],      [11347.0, 11348.0, 11349.0],      [11350.0, 11351.0, 11352.0],      [11353.0, 11354.0, 11355.0],      [11356.0, 11357.0, 11358.0],      [11359.0, 11360.0, 11361.0]],     [[11362.0, 11363.0, 11364.0],      [11365.0, 11366.0, 11367.0],      [11368.0, 11369.0, 11370.0],      [11371.0, 11372.0, 11373.0],      [11374.0, 11375.0, 11376.0],      [11377.0, 11378.0, 11379.0],      [11380.0, 11381.0, 11382.0]],     [[11383.0, 11384.0, 11385.0],      [11386.0, 11387.0, 11388.0],      [11389.0, 11390.0, 11391.0],      [11392.0, 11393.0, 11394.0],      [11395.0, 11396.0, 11397.0],      [11398.0, 11399.0, 11400.0],      [11401.0, 11402.0, 11403.0]],     [[11404.0, 11405.0, 11406.0],      [11407.0, 11408.0, 11409.0],      [11410.0, 11411.0, 11412.0],      [11413.0, 11414.0, 11415.0],      [11416.0, 11417.0, 11418.0],      [11419.0, 11420.0, 11421.0],      [11422.0, 11423.0, 11424.0]],     [[11425.0, 11426.0, 11427.0],      [11428.0, 11429.0, 11430.0],      [11431.0, 11432.0, 11433.0],      [11434.0, 11435.0, 11436.0],      [11437.0, 11438.0, 11439.0],      [11440.0, 11441.0, 11442.0],      [11443.0, 11444.0, 11445.0]],     [[11446.0, 11447.0, 11448.0],      [11449.0, 11450.0, 11451.0],      [11452.0, 11453.0, 11454.0],      [11455.0, 11456.0, 11457.0],      [11458.0, 11459.0, 11460.0],      [11461.0, 11462.0, 11463.0],      [11464.0, 11465.0, 11466.0]]],    [[[11467.0, 11468.0, 11469.0],      [11470.0, 11471.0, 11472.0],      [11473.0, 11474.0, 11475.0],      [11476.0, 11477.0, 11478.0],      [11479.0, 11480.0, 11481.0],      [11482.0, 11483.0, 11484.0],      [11485.0, 11486.0, 11487.0]],     [[11488.0, 11489.0, 11490.0],      [11491.0, 11492.0, 11493.0],      [11494.0, 11495.0, 11496.0],      [11497.0, 11498.0, 11499.0],      [11500.0, 11501.0, 11502.0],      [11503.0, 11504.0, 11505.0],      [11506.0, 11507.0, 11508.0]],     [[11509.0, 11510.0, 11511.0],      [11512.0, 11513.0, 11514.0],      [11515.0, 11516.0, 11517.0],      [11518.0, 11519.0, 11520.0],      [11521.0, 11522.0, 11523.0],      [11524.0, 11525.0, 11526.0],      [11527.0, 11528.0, 11529.0]],     [[11530.0, 11531.0, 11532.0],      [11533.0, 11534.0, 11535.0],      [11536.0, 11537.0, 11538.0],      [11539.0, 11540.0, 11541.0],      [11542.0, 11543.0, 11544.0],      [11545.0, 11546.0, 11547.0],      [11548.0, 11549.0, 11550.0]],     [[11551.0, 11552.0, 11553.0],      [11554.0, 11555.0, 11556.0],      [11557.0, 11558.0, 11559.0],      [11560.0, 11561.0, 11562.0],      [11563.0, 11564.0, 11565.0],      [11566.0, 11567.0, 11568.0],      [11569.0, 11570.0, 11571.0]],     [[11572.0, 11573.0, 11574.0],      [11575.0, 11576.0, 11577.0],      [11578.0, 11579.0, 11580.0],      [11581.0, 11582.0, 11583.0],      [11584.0, 11585.0, 11586.0],      [11587.0, 11588.0, 11589.0],      [11590.0, 11591.0, 11592.0]]],    [[[11593.0, 11594.0, 11595.0],      [11596.0, 11597.0, 11598.0],      [11599.0, 11600.0, 11601.0],      [11602.0, 11603.0, 11604.0],      [11605.0, 11606.0, 11607.0],      [11608.0, 11609.0, 11610.0],      [11611.0, 11612.0, 11613.0]],     [[11614.0, 11615.0, 11616.0],      [11617.0, 11618.0, 11619.0],      [11620.0, 11621.0, 11622.0],      [11623.0, 11624.0, 11625.0],      [11626.0, 11627.0, 11628.0],      [11629.0, 11630.0, 11631.0],      [11632.0, 11633.0, 11634.0]],     [[11635.0, 11636.0, 11637.0],      [11638.0, 11639.0, 11640.0],      [11641.0, 11642.0, 11643.0],      [11644.0, 11645.0, 11646.0],      [11647.0, 11648.0, 11649.0],      [11650.0, 11651.0, 11652.0],      [11653.0, 11654.0, 11655.0]],     [[11656.0, 11657.0, 11658.0],      [11659.0, 11660.0, 11661.0],      [11662.0, 11663.0, 11664.0],      [11665.0, 11666.0, 11667.0],      [11668.0, 11669.0, 11670.0],      [11671.0, 11672.0, 11673.0],      [11674.0, 11675.0, 11676.0]],     [[11677.0, 11678.0, 11679.0],      [11680.0, 11681.0, 11682.0],      [11683.0, 11684.0, 11685.0],      [11686.0, 11687.0, 11688.0],      [11689.0, 11690.0, 11691.0],      [11692.0, 11693.0, 11694.0],      [11695.0, 11696.0, 11697.0]],     [[11698.0, 11699.0, 11700.0],      [11701.0, 11702.0, 11703.0],      [11704.0, 11705.0, 11706.0],      [11707.0, 11708.0, 11709.0],      [11710.0, 11711.0, 11712.0],      [11713.0, 11714.0, 11715.0],      [11716.0, 11717.0, 11718.0]]],    [[[11719.0, 11720.0, 11721.0],      [11722.0, 11723.0, 11724.0],      [11725.0, 11726.0, 11727.0],      [11728.0, 11729.0, 11730.0],      [11731.0, 11732.0, 11733.0],      [11734.0, 11735.0, 11736.0],      [11737.0, 11738.0, 11739.0]],     [[11740.0, 11741.0, 11742.0],      [11743.0, 11744.0, 11745.0],      [11746.0, 11747.0, 11748.0],      [11749.0, 11750.0, 11751.0],      [11752.0, 11753.0, 11754.0],      [11755.0, 11756.0, 11757.0],      [11758.0, 11759.0, 11760.0]],     [[11761.0, 11762.0, 11763.0],      [11764.0, 11765.0, 11766.0],      [11767.0, 11768.0, 11769.0],      [11770.0, 11771.0, 11772.0],      [11773.0, 11774.0, 11775.0],      [11776.0, 11777.0, 11778.0],      [11779.0, 11780.0, 11781.0]],     [[11782.0, 11783.0, 11784.0],      [11785.0, 11786.0, 11787.0],      [11788.0, 11789.0, 11790.0],      [11791.0, 11792.0, 11793.0],      [11794.0, 11795.0, 11796.0],      [11797.0, 11798.0, 11799.0],      [11800.0, 11801.0, 11802.0]],     [[11803.0, 11804.0, 11805.0],      [11806.0, 11807.0, 11808.0],      [11809.0, 11810.0, 11811.0],      [11812.0, 11813.0, 11814.0],      [11815.0, 11816.0, 11817.0],      [11818.0, 11819.0, 11820.0],      [11821.0, 11822.0, 11823.0]],     [[11824.0, 11825.0, 11826.0],      [11827.0, 11828.0, 11829.0],      [11830.0, 11831.0, 11832.0],      [11833.0, 11834.0, 11835.0],      [11836.0, 11837.0, 11838.0],      [11839.0, 11840.0, 11841.0],      [11842.0, 11843.0, 11844.0]]],    [[[11845.0, 11846.0, 11847.0],      [11848.0, 11849.0, 11850.0],      [11851.0, 11852.0, 11853.0],      [11854.0, 11855.0, 11856.0],      [11857.0, 11858.0, 11859.0],      [11860.0, 11861.0, 11862.0],      [11863.0, 11864.0, 11865.0]],     [[11866.0, 11867.0, 11868.0],      [11869.0, 11870.0, 11871.0],      [11872.0, 11873.0, 11874.0],      [11875.0, 11876.0, 11877.0],      [11878.0, 11879.0, 11880.0],      [11881.0, 11882.0, 11883.0],      [11884.0, 11885.0, 11886.0]],     [[11887.0, 11888.0, 11889.0],      [11890.0, 11891.0, 11892.0],      [11893.0, 11894.0, 11895.0],      [11896.0, 11897.0, 11898.0],      [11899.0, 11900.0, 11901.0],      [11902.0, 11903.0, 11904.0],      [11905.0, 11906.0, 11907.0]],     [[11908.0, 11909.0, 11910.0],      [11911.0, 11912.0, 11913.0],      [11914.0, 11915.0, 11916.0],      [11917.0, 11918.0, 11919.0],      [11920.0, 11921.0, 11922.0],      [11923.0, 11924.0, 11925.0],      [11926.0, 11927.0, 11928.0]],     [[11929.0, 11930.0, 11931.0],      [11932.0, 11933.0, 11934.0],      [11935.0, 11936.0, 11937.0],      [11938.0, 11939.0, 11940.0],      [11941.0, 11942.0, 11943.0],      [11944.0, 11945.0, 11946.0],      [11947.0, 11948.0, 11949.0]],     [[11950.0, 11951.0, 11952.0],      [11953.0, 11954.0, 11955.0],      [11956.0, 11957.0, 11958.0],      [11959.0, 11960.0, 11961.0],      [11962.0, 11963.0, 11964.0],      [11965.0, 11966.0, 11967.0],      [11968.0, 11969.0, 11970.0]]],    [[[11971.0, 11972.0, 11973.0],      [11974.0, 11975.0, 11976.0],      [11977.0, 11978.0, 11979.0],      [11980.0, 11981.0, 11982.0],      [11983.0, 11984.0, 11985.0],      [11986.0, 11987.0, 11988.0],      [11989.0, 11990.0, 11991.0]],     [[11992.0, 11993.0, 11994.0],      [11995.0, 11996.0, 11997.0],      [11998.0, 11999.0, 12000.0],      [12001.0, 12002.0, 12003.0],      [12004.0, 12005.0, 12006.0],      [12007.0, 12008.0, 12009.0],      [12010.0, 12011.0, 12012.0]],     [[12013.0, 12014.0, 12015.0],      [12016.0, 12017.0, 12018.0],      [12019.0, 12020.0, 12021.0],      [12022.0, 12023.0, 12024.0],      [12025.0, 12026.0, 12027.0],      [12028.0, 12029.0, 12030.0],      [12031.0, 12032.0, 12033.0]],     [[12034.0, 12035.0, 12036.0],      [12037.0, 12038.0, 12039.0],      [12040.0, 12041.0, 12042.0],      [12043.0, 12044.0, 12045.0],      [12046.0, 12047.0, 12048.0],      [12049.0, 12050.0, 12051.0],      [12052.0, 12053.0, 12054.0]],     [[12055.0, 12056.0, 12057.0],      [12058.0, 12059.0, 12060.0],      [12061.0, 12062.0, 12063.0],      [12064.0, 12065.0, 12066.0],      [12067.0, 12068.0, 12069.0],      [12070.0, 12071.0, 12072.0],      [12073.0, 12074.0, 12075.0]],     [[12076.0, 12077.0, 12078.0],      [12079.0, 12080.0, 12081.0],      [12082.0, 12083.0, 12084.0],      [12085.0, 12086.0, 12087.0],      [12088.0, 12089.0, 12090.0],      [12091.0, 12092.0, 12093.0],      [12094.0, 12095.0, 12096.0]]]],   [[[[12097.0, 12098.0, 12099.0],      [12100.0, 12101.0, 12102.0],      [12103.0, 12104.0, 12105.0],      [12106.0, 12107.0, 12108.0],      [12109.0, 12110.0, 12111.0],      [12112.0, 12113.0, 12114.0],      [12115.0, 12116.0, 12117.0]],     [[12118.0, 12119.0, 12120.0],      [12121.0, 12122.0, 12123.0],      [12124.0, 12125.0, 12126.0],      [12127.0, 12128.0, 12129.0],      [12130.0, 12131.0, 12132.0],      [12133.0, 12134.0, 12135.0],      [12136.0, 12137.0, 12138.0]],     [[12139.0, 12140.0, 12141.0],      [12142.0, 12143.0, 12144.0],      [12145.0, 12146.0, 12147.0],      [12148.0, 12149.0, 12150.0],      [12151.0, 12152.0, 12153.0],      [12154.0, 12155.0, 12156.0],      [12157.0, 12158.0, 12159.0]],     [[12160.0, 12161.0, 12162.0],      [12163.0, 12164.0, 12165.0],      [12166.0, 12167.0, 12168.0],      [12169.0, 12170.0, 12171.0],      [12172.0, 12173.0, 12174.0],      [12175.0, 12176.0, 12177.0],      [12178.0, 12179.0, 12180.0]],     [[12181.0, 12182.0, 12183.0],      [12184.0, 12185.0, 12186.0],      [12187.0, 12188.0, 12189.0],      [12190.0, 12191.0, 12192.0],      [12193.0, 12194.0, 12195.0],      [12196.0, 12197.0, 12198.0],      [12199.0, 12200.0, 12201.0]],     [[12202.0, 12203.0, 12204.0],      [12205.0, 12206.0, 12207.0],      [12208.0, 12209.0, 12210.0],      [12211.0, 12212.0, 12213.0],      [12214.0, 12215.0, 12216.0],      [12217.0, 12218.0, 12219.0],      [12220.0, 12221.0, 12222.0]]],    [[[12223.0, 12224.0, 12225.0],      [12226.0, 12227.0, 12228.0],      [12229.0, 12230.0, 12231.0],      [12232.0, 12233.0, 12234.0],      [12235.0, 12236.0, 12237.0],      [12238.0, 12239.0, 12240.0],      [12241.0, 12242.0, 12243.0]],     [[12244.0, 12245.0, 12246.0],      [12247.0, 12248.0, 12249.0],      [12250.0, 12251.0, 12252.0],      [12253.0, 12254.0, 12255.0],      [12256.0, 12257.0, 12258.0],      [12259.0, 12260.0, 12261.0],      [12262.0, 12263.0, 12264.0]],     [[12265.0, 12266.0, 12267.0],      [12268.0, 12269.0, 12270.0],      [12271.0, 12272.0, 12273.0],      [12274.0, 12275.0, 12276.0],      [12277.0, 12278.0, 12279.0],      [12280.0, 12281.0, 12282.0],      [12283.0, 12284.0, 12285.0]],     [[12286.0, 12287.0, 12288.0],      [12289.0, 12290.0, 12291.0],      [12292.0, 12293.0, 12294.0],      [12295.0, 12296.0, 12297.0],      [12298.0, 12299.0, 12300.0],      [12301.0, 12302.0, 12303.0],      [12304.0, 12305.0, 12306.0]],     [[12307.0, 12308.0, 12309.0],      [12310.0, 12311.0, 12312.0],      [12313.0, 12314.0, 12315.0],      [12316.0, 12317.0, 12318.0],      [12319.0, 12320.0, 12321.0],      [12322.0, 12323.0, 12324.0],      [12325.0, 12326.0, 12327.0]],     [[12328.0, 12329.0, 12330.0],      [12331.0, 12332.0, 12333.0],      [12334.0, 12335.0, 12336.0],      [12337.0, 12338.0, 12339.0],      [12340.0, 12341.0, 12342.0],      [12343.0, 12344.0, 12345.0],      [12346.0, 12347.0, 12348.0]]],    [[[12349.0, 12350.0, 12351.0],      [12352.0, 12353.0, 12354.0],      [12355.0, 12356.0, 12357.0],      [12358.0, 12359.0, 12360.0],      [12361.0, 12362.0, 12363.0],      [12364.0, 12365.0, 12366.0],      [12367.0, 12368.0, 12369.0]],     [[12370.0, 12371.0, 12372.0],      [12373.0, 12374.0, 12375.0],      [12376.0, 12377.0, 12378.0],      [12379.0, 12380.0, 12381.0],      [12382.0, 12383.0, 12384.0],      [12385.0, 12386.0, 12387.0],      [12388.0, 12389.0, 12390.0]],     [[12391.0, 12392.0, 12393.0],      [12394.0, 12395.0, 12396.0],      [12397.0, 12398.0, 12399.0],      [12400.0, 12401.0, 12402.0],      [12403.0, 12404.0, 12405.0],      [12406.0, 12407.0, 12408.0],      [12409.0, 12410.0, 12411.0]],     [[12412.0, 12413.0, 12414.0],      [12415.0, 12416.0, 12417.0],      [12418.0, 12419.0, 12420.0],      [12421.0, 12422.0, 12423.0],      [12424.0, 12425.0, 12426.0],      [12427.0, 12428.0, 12429.0],      [12430.0, 12431.0, 12432.0]],     [[12433.0, 12434.0, 12435.0],      [12436.0, 12437.0, 12438.0],      [12439.0, 12440.0, 12441.0],      [12442.0, 12443.0, 12444.0],      [12445.0, 12446.0, 12447.0],      [12448.0, 12449.0, 12450.0],      [12451.0, 12452.0, 12453.0]],     [[12454.0, 12455.0, 12456.0],      [12457.0, 12458.0, 12459.0],      [12460.0, 12461.0, 12462.0],      [12463.0, 12464.0, 12465.0],      [12466.0, 12467.0, 12468.0],      [12469.0, 12470.0, 12471.0],      [12472.0, 12473.0, 12474.0]]],    [[[12475.0, 12476.0, 12477.0],      [12478.0, 12479.0, 12480.0],      [12481.0, 12482.0, 12483.0],      [12484.0, 12485.0, 12486.0],      [12487.0, 12488.0, 12489.0],      [12490.0, 12491.0, 12492.0],      [12493.0, 12494.0, 12495.0]],     [[12496.0, 12497.0, 12498.0],      [12499.0, 12500.0, 12501.0],      [12502.0, 12503.0, 12504.0],      [12505.0, 12506.0, 12507.0],      [12508.0, 12509.0, 12510.0],      [12511.0, 12512.0, 12513.0],      [12514.0, 12515.0, 12516.0]],     [[12517.0, 12518.0, 12519.0],      [12520.0, 12521.0, 12522.0],      [12523.0, 12524.0, 12525.0],      [12526.0, 12527.0, 12528.0],      [12529.0, 12530.0, 12531.0],      [12532.0, 12533.0, 12534.0],      [12535.0, 12536.0, 12537.0]],     [[12538.0, 12539.0, 12540.0],      [12541.0, 12542.0, 12543.0],      [12544.0, 12545.0, 12546.0],      [12547.0, 12548.0, 12549.0],      [12550.0, 12551.0, 12552.0],      [12553.0, 12554.0, 12555.0],      [12556.0, 12557.0, 12558.0]],     [[12559.0, 12560.0, 12561.0],      [12562.0, 12563.0, 12564.0],      [12565.0, 12566.0, 12567.0],      [12568.0, 12569.0, 12570.0],      [12571.0, 12572.0, 12573.0],      [12574.0, 12575.0, 12576.0],      [12577.0, 12578.0, 12579.0]],     [[12580.0, 12581.0, 12582.0],      [12583.0, 12584.0, 12585.0],      [12586.0, 12587.0, 12588.0],      [12589.0, 12590.0, 12591.0],      [12592.0, 12593.0, 12594.0],      [12595.0, 12596.0, 12597.0],      [12598.0, 12599.0, 12600.0]]],    [[[12601.0, 12602.0, 12603.0],      [12604.0, 12605.0, 12606.0],      [12607.0, 12608.0, 12609.0],      [12610.0, 12611.0, 12612.0],      [12613.0, 12614.0, 12615.0],      [12616.0, 12617.0, 12618.0],      [12619.0, 12620.0, 12621.0]],     [[12622.0, 12623.0, 12624.0],      [12625.0, 12626.0, 12627.0],      [12628.0, 12629.0, 12630.0],      [12631.0, 12632.0, 12633.0],      [12634.0, 12635.0, 12636.0],      [12637.0, 12638.0, 12639.0],      [12640.0, 12641.0, 12642.0]],     [[12643.0, 12644.0, 12645.0],      [12646.0, 12647.0, 12648.0],      [12649.0, 12650.0, 12651.0],      [12652.0, 12653.0, 12654.0],      [12655.0, 12656.0, 12657.0],      [12658.0, 12659.0, 12660.0],      [12661.0, 12662.0, 12663.0]],     [[12664.0, 12665.0, 12666.0],      [12667.0, 12668.0, 12669.0],      [12670.0, 12671.0, 12672.0],      [12673.0, 12674.0, 12675.0],      [12676.0, 12677.0, 12678.0],      [12679.0, 12680.0, 12681.0],      [12682.0, 12683.0, 12684.0]],     [[12685.0, 12686.0, 12687.0],      [12688.0, 12689.0, 12690.0],      [12691.0, 12692.0, 12693.0],      [12694.0, 12695.0, 12696.0],      [12697.0, 12698.0, 12699.0],      [12700.0, 12701.0, 12702.0],      [12703.0, 12704.0, 12705.0]],     [[12706.0, 12707.0, 12708.0],      [12709.0, 12710.0, 12711.0],      [12712.0, 12713.0, 12714.0],      [12715.0, 12716.0, 12717.0],      [12718.0, 12719.0, 12720.0],      [12721.0, 12722.0, 12723.0],      [12724.0, 12725.0, 12726.0]]],    [[[12727.0, 12728.0, 12729.0],      [12730.0, 12731.0, 12732.0],      [12733.0, 12734.0, 12735.0],      [12736.0, 12737.0, 12738.0],      [12739.0, 12740.0, 12741.0],      [12742.0, 12743.0, 12744.0],      [12745.0, 12746.0, 12747.0]],     [[12748.0, 12749.0, 12750.0],      [12751.0, 12752.0, 12753.0],      [12754.0, 12755.0, 12756.0],      [12757.0, 12758.0, 12759.0],      [12760.0, 12761.0, 12762.0],      [12763.0, 12764.0, 12765.0],      [12766.0, 12767.0, 12768.0]],     [[12769.0, 12770.0, 12771.0],      [12772.0, 12773.0, 12774.0],      [12775.0, 12776.0, 12777.0],      [12778.0, 12779.0, 12780.0],      [12781.0, 12782.0, 12783.0],      [12784.0, 12785.0, 12786.0],      [12787.0, 12788.0, 12789.0]],     [[12790.0, 12791.0, 12792.0],      [12793.0, 12794.0, 12795.0],      [12796.0, 12797.0, 12798.0],      [12799.0, 12800.0, 12801.0],      [12802.0, 12803.0, 12804.0],      [12805.0, 12806.0, 12807.0],      [12808.0, 12809.0, 12810.0]],     [[12811.0, 12812.0, 12813.0],      [12814.0, 12815.0, 12816.0],      [12817.0, 12818.0, 12819.0],      [12820.0, 12821.0, 12822.0],      [12823.0, 12824.0, 12825.0],      [12826.0, 12827.0, 12828.0],      [12829.0, 12830.0, 12831.0]],     [[12832.0, 12833.0, 12834.0],      [12835.0, 12836.0, 12837.0],      [12838.0, 12839.0, 12840.0],      [12841.0, 12842.0, 12843.0],      [12844.0, 12845.0, 12846.0],      [12847.0, 12848.0, 12849.0],      [12850.0, 12851.0, 12852.0]]]],   [[[[12853.0, 12854.0, 12855.0],      [12856.0, 12857.0, 12858.0],      [12859.0, 12860.0, 12861.0],      [12862.0, 12863.0, 12864.0],      [12865.0, 12866.0, 12867.0],      [12868.0, 12869.0, 12870.0],      [12871.0, 12872.0, 12873.0]],     [[12874.0, 12875.0, 12876.0],      [12877.0, 12878.0, 12879.0],      [12880.0, 12881.0, 12882.0],      [12883.0, 12884.0, 12885.0],      [12886.0, 12887.0, 12888.0],      [12889.0, 12890.0, 12891.0],      [12892.0, 12893.0, 12894.0]],     [[12895.0, 12896.0, 12897.0],      [12898.0, 12899.0, 12900.0],      [12901.0, 12902.0, 12903.0],      [12904.0, 12905.0, 12906.0],      [12907.0, 12908.0, 12909.0],      [12910.0, 12911.0, 12912.0],      [12913.0, 12914.0, 12915.0]],     [[12916.0, 12917.0, 12918.0],      [12919.0, 12920.0, 12921.0],      [12922.0, 12923.0, 12924.0],      [12925.0, 12926.0, 12927.0],      [12928.0, 12929.0, 12930.0],      [12931.0, 12932.0, 12933.0],      [12934.0, 12935.0, 12936.0]],     [[12937.0, 12938.0, 12939.0],      [12940.0, 12941.0, 12942.0],      [12943.0, 12944.0, 12945.0],      [12946.0, 12947.0, 12948.0],      [12949.0, 12950.0, 12951.0],      [12952.0, 12953.0, 12954.0],      [12955.0, 12956.0, 12957.0]],     [[12958.0, 12959.0, 12960.0],      [12961.0, 12962.0, 12963.0],      [12964.0, 12965.0, 12966.0],      [12967.0, 12968.0, 12969.0],      [12970.0, 12971.0, 12972.0],      [12973.0, 12974.0, 12975.0],      [12976.0, 12977.0, 12978.0]]],    [[[12979.0, 12980.0, 12981.0],      [12982.0, 12983.0, 12984.0],      [12985.0, 12986.0, 12987.0],      [12988.0, 12989.0, 12990.0],      [12991.0, 12992.0, 12993.0],      [12994.0, 12995.0, 12996.0],      [12997.0, 12998.0, 12999.0]],     [[13000.0, 13001.0, 13002.0],      [13003.0, 13004.0, 13005.0],      [13006.0, 13007.0, 13008.0],      [13009.0, 13010.0, 13011.0],      [13012.0, 13013.0, 13014.0],      [13015.0, 13016.0, 13017.0],      [13018.0, 13019.0, 13020.0]],     [[13021.0, 13022.0, 13023.0],      [13024.0, 13025.0, 13026.0],      [13027.0, 13028.0, 13029.0],      [13030.0, 13031.0, 13032.0],      [13033.0, 13034.0, 13035.0],      [13036.0, 13037.0, 13038.0],      [13039.0, 13040.0, 13041.0]],     [[13042.0, 13043.0, 13044.0],      [13045.0, 13046.0, 13047.0],      [13048.0, 13049.0, 13050.0],      [13051.0, 13052.0, 13053.0],      [13054.0, 13055.0, 13056.0],      [13057.0, 13058.0, 13059.0],      [13060.0, 13061.0, 13062.0]],     [[13063.0, 13064.0, 13065.0],      [13066.0, 13067.0, 13068.0],      [13069.0, 13070.0, 13071.0],      [13072.0, 13073.0, 13074.0],      [13075.0, 13076.0, 13077.0],      [13078.0, 13079.0, 13080.0],      [13081.0, 13082.0, 13083.0]],     [[13084.0, 13085.0, 13086.0],      [13087.0, 13088.0, 13089.0],      [13090.0, 13091.0, 13092.0],      [13093.0, 13094.0, 13095.0],      [13096.0, 13097.0, 13098.0],      [13099.0, 13100.0, 13101.0],      [13102.0, 13103.0, 13104.0]]],    [[[13105.0, 13106.0, 13107.0],      [13108.0, 13109.0, 13110.0],      [13111.0, 13112.0, 13113.0],      [13114.0, 13115.0, 13116.0],      [13117.0, 13118.0, 13119.0],      [13120.0, 13121.0, 13122.0],      [13123.0, 13124.0, 13125.0]],     [[13126.0, 13127.0, 13128.0],      [13129.0, 13130.0, 13131.0],      [13132.0, 13133.0, 13134.0],      [13135.0, 13136.0, 13137.0],      [13138.0, 13139.0, 13140.0],      [13141.0, 13142.0, 13143.0],      [13144.0, 13145.0, 13146.0]],     [[13147.0, 13148.0, 13149.0],      [13150.0, 13151.0, 13152.0],      [13153.0, 13154.0, 13155.0],      [13156.0, 13157.0, 13158.0],      [13159.0, 13160.0, 13161.0],      [13162.0, 13163.0, 13164.0],      [13165.0, 13166.0, 13167.0]],     [[13168.0, 13169.0, 13170.0],      [13171.0, 13172.0, 13173.0],      [13174.0, 13175.0, 13176.0],      [13177.0, 13178.0, 13179.0],      [13180.0, 13181.0, 13182.0],      [13183.0, 13184.0, 13185.0],      [13186.0, 13187.0, 13188.0]],     [[13189.0, 13190.0, 13191.0],      [13192.0, 13193.0, 13194.0],      [13195.0, 13196.0, 13197.0],      [13198.0, 13199.0, 13200.0],      [13201.0, 13202.0, 13203.0],      [13204.0, 13205.0, 13206.0],      [13207.0, 13208.0, 13209.0]],     [[13210.0, 13211.0, 13212.0],      [13213.0, 13214.0, 13215.0],      [13216.0, 13217.0, 13218.0],      [13219.0, 13220.0, 13221.0],      [13222.0, 13223.0, 13224.0],      [13225.0, 13226.0, 13227.0],      [13228.0, 13229.0, 13230.0]]],    [[[13231.0, 13232.0, 13233.0],      [13234.0, 13235.0, 13236.0],      [13237.0, 13238.0, 13239.0],      [13240.0, 13241.0, 13242.0],      [13243.0, 13244.0, 13245.0],      [13246.0, 13247.0, 13248.0],      [13249.0, 13250.0, 13251.0]],     [[13252.0, 13253.0, 13254.0],      [13255.0, 13256.0, 13257.0],      [13258.0, 13259.0, 13260.0],      [13261.0, 13262.0, 13263.0],      [13264.0, 13265.0, 13266.0],      [13267.0, 13268.0, 13269.0],      [13270.0, 13271.0, 13272.0]],     [[13273.0, 13274.0, 13275.0],      [13276.0, 13277.0, 13278.0],      [13279.0, 13280.0, 13281.0],      [13282.0, 13283.0, 13284.0],      [13285.0, 13286.0, 13287.0],      [13288.0, 13289.0, 13290.0],      [13291.0, 13292.0, 13293.0]],     [[13294.0, 13295.0, 13296.0],      [13297.0, 13298.0, 13299.0],      [13300.0, 13301.0, 13302.0],      [13303.0, 13304.0, 13305.0],      [13306.0, 13307.0, 13308.0],      [13309.0, 13310.0, 13311.0],      [13312.0, 13313.0, 13314.0]],     [[13315.0, 13316.0, 13317.0],      [13318.0, 13319.0, 13320.0],      [13321.0, 13322.0, 13323.0],      [13324.0, 13325.0, 13326.0],      [13327.0, 13328.0, 13329.0],      [13330.0, 13331.0, 13332.0],      [13333.0, 13334.0, 13335.0]],     [[13336.0, 13337.0, 13338.0],      [13339.0, 13340.0, 13341.0],      [13342.0, 13343.0, 13344.0],      [13345.0, 13346.0, 13347.0],      [13348.0, 13349.0, 13350.0],      [13351.0, 13352.0, 13353.0],      [13354.0, 13355.0, 13356.0]]],    [[[13357.0, 13358.0, 13359.0],      [13360.0, 13361.0, 13362.0],      [13363.0, 13364.0, 13365.0],      [13366.0, 13367.0, 13368.0],      [13369.0, 13370.0, 13371.0],      [13372.0, 13373.0, 13374.0],      [13375.0, 13376.0, 13377.0]],     [[13378.0, 13379.0, 13380.0],      [13381.0, 13382.0, 13383.0],      [13384.0, 13385.0, 13386.0],      [13387.0, 13388.0, 13389.0],      [13390.0, 13391.0, 13392.0],      [13393.0, 13394.0, 13395.0],      [13396.0, 13397.0, 13398.0]],     [[13399.0, 13400.0, 13401.0],      [13402.0, 13403.0, 13404.0],      [13405.0, 13406.0, 13407.0],      [13408.0, 13409.0, 13410.0],      [13411.0, 13412.0, 13413.0],      [13414.0, 13415.0, 13416.0],      [13417.0, 13418.0, 13419.0]],     [[13420.0, 13421.0, 13422.0],      [13423.0, 13424.0, 13425.0],      [13426.0, 13427.0, 13428.0],      [13429.0, 13430.0, 13431.0],      [13432.0, 13433.0, 13434.0],      [13435.0, 13436.0, 13437.0],      [13438.0, 13439.0, 13440.0]],     [[13441.0, 13442.0, 13443.0],      [13444.0, 13445.0, 13446.0],      [13447.0, 13448.0, 13449.0],      [13450.0, 13451.0, 13452.0],      [13453.0, 13454.0, 13455.0],      [13456.0, 13457.0, 13458.0],      [13459.0, 13460.0, 13461.0]],     [[13462.0, 13463.0, 13464.0],      [13465.0, 13466.0, 13467.0],      [13468.0, 13469.0, 13470.0],      [13471.0, 13472.0, 13473.0],      [13474.0, 13475.0, 13476.0],      [13477.0, 13478.0, 13479.0],      [13480.0, 13481.0, 13482.0]]],    [[[13483.0, 13484.0, 13485.0],      [13486.0, 13487.0, 13488.0],      [13489.0, 13490.0, 13491.0],      [13492.0, 13493.0, 13494.0],      [13495.0, 13496.0, 13497.0],      [13498.0, 13499.0, 13500.0],      [13501.0, 13502.0, 13503.0]],     [[13504.0, 13505.0, 13506.0],      [13507.0, 13508.0, 13509.0],      [13510.0, 13511.0, 13512.0],      [13513.0, 13514.0, 13515.0],      [13516.0, 13517.0, 13518.0],      [13519.0, 13520.0, 13521.0],      [13522.0, 13523.0, 13524.0]],     [[13525.0, 13526.0, 13527.0],      [13528.0, 13529.0, 13530.0],      [13531.0, 13532.0, 13533.0],      [13534.0, 13535.0, 13536.0],      [13537.0, 13538.0, 13539.0],      [13540.0, 13541.0, 13542.0],      [13543.0, 13544.0, 13545.0]],     [[13546.0, 13547.0, 13548.0],      [13549.0, 13550.0, 13551.0],      [13552.0, 13553.0, 13554.0],      [13555.0, 13556.0, 13557.0],      [13558.0, 13559.0, 13560.0],      [13561.0, 13562.0, 13563.0],      [13564.0, 13565.0, 13566.0]],     [[13567.0, 13568.0, 13569.0],      [13570.0, 13571.0, 13572.0],      [13573.0, 13574.0, 13575.0],      [13576.0, 13577.0, 13578.0],      [13579.0, 13580.0, 13581.0],      [13582.0, 13583.0, 13584.0],      [13585.0, 13586.0, 13587.0]],     [[13588.0, 13589.0, 13590.0],      [13591.0, 13592.0, 13593.0],      [13594.0, 13595.0, 13596.0],      [13597.0, 13598.0, 13599.0],      [13600.0, 13601.0, 13602.0],      [13603.0, 13604.0, 13605.0],      [13606.0, 13607.0, 13608.0]]]]]] shape=[3, 6, 6, 6, 7, 3], strides=[4536, 756, 126, 21, 3, 1], layout=C (0x1)), I32([1, 3, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[0, 1],  [3, 3],  [2, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2455436335 2435272652 4081531820 2100981165 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0]],     [[5.0, 6.0, 7.0, 8.0]],     [[9.0, 10.0, 11.0, 12.0]]],    [[[13.0, 14.0, 15.0, 16.0]],     [[17.0, 18.0, 19.0, 20.0]],     [[21.0, 22.0, 23.0, 24.0]]],    [[[25.0, 26.0, 27.0, 28.0]],     [[29.0, 30.0, 31.0, 32.0]],     [[33.0, 34.0, 35.0, 36.0]]],    [[[37.0, 38.0, 39.0, 40.0]],     [[41.0, 42.0, 43.0, 44.0]],     [[45.0, 46.0, 47.0, 48.0]]]],   [[[[49.0, 50.0, 51.0, 52.0]],     [[53.0, 54.0, 55.0, 56.0]],     [[57.0, 58.0, 59.0, 60.0]]],    [[[61.0, 62.0, 63.0, 64.0]],     [[65.0, 66.0, 67.0, 68.0]],     [[69.0, 70.0, 71.0, 72.0]]],    [[[73.0, 74.0, 75.0, 76.0]],     [[77.0, 78.0, 79.0, 80.0]],     [[81.0, 82.0, 83.0, 84.0]]],    [[[85.0, 86.0, 87.0, 88.0]],     [[89.0, 90.0, 91.0, 92.0]],     [[93.0, 94.0, 95.0, 96.0]]]],   [[[[97.0, 98.0, 99.0, 100.0]],     [[101.0, 102.0, 103.0, 104.0]],     [[105.0, 106.0, 107.0, 108.0]]],    [[[109.0, 110.0, 111.0, 112.0]],     [[113.0, 114.0, 115.0, 116.0]],     [[117.0, 118.0, 119.0, 120.0]]],    [[[121.0, 122.0, 123.0, 124.0]],     [[125.0, 126.0, 127.0, 128.0]],     [[129.0, 130.0, 131.0, 132.0]]],    [[[133.0, 134.0, 135.0, 136.0]],     [[137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0]]]],   [[[[145.0, 146.0, 147.0, 148.0]],     [[149.0, 150.0, 151.0, 152.0]],     [[153.0, 154.0, 155.0, 156.0]]],    [[[157.0, 158.0, 159.0, 160.0]],     [[161.0, 162.0, 163.0, 164.0]],     [[165.0, 166.0, 167.0, 168.0]]],    [[[169.0, 170.0, 171.0, 172.0]],     [[173.0, 174.0, 175.0, 176.0]],     [[177.0, 178.0, 179.0, 180.0]]],    [[[181.0, 182.0, 183.0, 184.0]],     [[185.0, 186.0, 187.0, 188.0]],     [[189.0, 190.0, 191.0, 192.0]]]],   [[[[193.0, 194.0, 195.0, 196.0]],     [[197.0, 198.0, 199.0, 200.0]],     [[201.0, 202.0, 203.0, 204.0]]],    [[[205.0, 206.0, 207.0, 208.0]],     [[209.0, 210.0, 211.0, 212.0]],     [[213.0, 214.0, 215.0, 216.0]]],    [[[217.0, 218.0, 219.0, 220.0]],     [[221.0, 222.0, 223.0, 224.0]],     [[225.0, 226.0, 227.0, 228.0]]],    [[[229.0, 230.0, 231.0, 232.0]],     [[233.0, 234.0, 235.0, 236.0]],     [[237.0, 238.0, 239.0, 240.0]]]]],  [[[[[241.0, 242.0, 243.0, 244.0]],     [[245.0, 246.0, 247.0, 248.0]],     [[249.0, 250.0, 251.0, 252.0]]],    [[[253.0, 254.0, 255.0, 256.0]],     [[257.0, 258.0, 259.0, 260.0]],     [[261.0, 262.0, 263.0, 264.0]]],    [[[265.0, 266.0, 267.0, 268.0]],     [[269.0, 270.0, 271.0, 272.0]],     [[273.0, 274.0, 275.0, 276.0]]],    [[[277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0]],     [[285.0, 286.0, 287.0, 288.0]]]],   [[[[289.0, 290.0, 291.0, 292.0]],     [[293.0, 294.0, 295.0, 296.0]],     [[297.0, 298.0, 299.0, 300.0]]],    [[[301.0, 302.0, 303.0, 304.0]],     [[305.0, 306.0, 307.0, 308.0]],     [[309.0, 310.0, 311.0, 312.0]]],    [[[313.0, 314.0, 315.0, 316.0]],     [[317.0, 318.0, 319.0, 320.0]],     [[321.0, 322.0, 323.0, 324.0]]],    [[[325.0, 326.0, 327.0, 328.0]],     [[329.0, 330.0, 331.0, 332.0]],     [[333.0, 334.0, 335.0, 336.0]]]],   [[[[337.0, 338.0, 339.0, 340.0]],     [[341.0, 342.0, 343.0, 344.0]],     [[345.0, 346.0, 347.0, 348.0]]],    [[[349.0, 350.0, 351.0, 352.0]],     [[353.0, 354.0, 355.0, 356.0]],     [[357.0, 358.0, 359.0, 360.0]]],    [[[361.0, 362.0, 363.0, 364.0]],     [[365.0, 366.0, 367.0, 368.0]],     [[369.0, 370.0, 371.0, 372.0]]],    [[[373.0, 374.0, 375.0, 376.0]],     [[377.0, 378.0, 379.0, 380.0]],     [[381.0, 382.0, 383.0, 384.0]]]],   [[[[385.0, 386.0, 387.0, 388.0]],     [[389.0, 390.0, 391.0, 392.0]],     [[393.0, 394.0, 395.0, 396.0]]],    [[[397.0, 398.0, 399.0, 400.0]],     [[401.0, 402.0, 403.0, 404.0]],     [[405.0, 406.0, 407.0, 408.0]]],    [[[409.0, 410.0, 411.0, 412.0]],     [[413.0, 414.0, 415.0, 416.0]],     [[417.0, 418.0, 419.0, 420.0]]],    [[[421.0, 422.0, 423.0, 424.0]],     [[425.0, 426.0, 427.0, 428.0]],     [[429.0, 430.0, 431.0, 432.0]]]],   [[[[433.0, 434.0, 435.0, 436.0]],     [[437.0, 438.0, 439.0, 440.0]],     [[441.0, 442.0, 443.0, 444.0]]],    [[[445.0, 446.0, 447.0, 448.0]],     [[449.0, 450.0, 451.0, 452.0]],     [[453.0, 454.0, 455.0, 456.0]]],    [[[457.0, 458.0, 459.0, 460.0]],     [[461.0, 462.0, 463.0, 464.0]],     [[465.0, 466.0, 467.0, 468.0]]],    [[[469.0, 470.0, 471.0, 472.0]],     [[473.0, 474.0, 475.0, 476.0]],     [[477.0, 478.0, 479.0, 480.0]]]]]] shape=[2, 5, 4, 3, 1, 4], strides=[240, 48, 12, 4, 4, 1], layout=C (0x1)), I32([3, 3] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 2],  [0, 2]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 2718423857 1217562331 4192127495 2874279647 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0]],     [[3.0, 4.0]],     [[5.0, 6.0]],     [[7.0, 8.0]],     [[9.0, 10.0]],     [[11.0, 12.0]],     [[13.0, 14.0]]],    [[[15.0, 16.0]],     [[17.0, 18.0]],     [[19.0, 20.0]],     [[21.0, 22.0]],     [[23.0, 24.0]],     [[25.0, 26.0]],     [[27.0, 28.0]]],    [[[29.0, 30.0]],     [[31.0, 32.0]],     [[33.0, 34.0]],     [[35.0, 36.0]],     [[37.0, 38.0]],     [[39.0, 40.0]],     [[41.0, 42.0]]],    [[[43.0, 44.0]],     [[45.0, 46.0]],     [[47.0, 48.0]],     [[49.0, 50.0]],     [[51.0, 52.0]],     [[53.0, 54.0]],     [[55.0, 56.0]]]],   [[[[57.0, 58.0]],     [[59.0, 60.0]],     [[61.0, 62.0]],     [[63.0, 64.0]],     [[65.0, 66.0]],     [[67.0, 68.0]],     [[69.0, 70.0]]],    [[[71.0, 72.0]],     [[73.0, 74.0]],     [[75.0, 76.0]],     [[77.0, 78.0]],     [[79.0, 80.0]],     [[81.0, 82.0]],     [[83.0, 84.0]]],    [[[85.0, 86.0]],     [[87.0, 88.0]],     [[89.0, 90.0]],     [[91.0, 92.0]],     [[93.0, 94.0]],     [[95.0, 96.0]],     [[97.0, 98.0]]],    [[[99.0, 100.0]],     [[101.0, 102.0]],     [[103.0, 104.0]],     [[105.0, 106.0]],     [[107.0, 108.0]],     [[109.0, 110.0]],     [[111.0, 112.0]]]],   [[[[113.0, 114.0]],     [[115.0, 116.0]],     [[117.0, 118.0]],     [[119.0, 120.0]],     [[121.0, 122.0]],     [[123.0, 124.0]],     [[125.0, 126.0]]],    [[[127.0, 128.0]],     [[129.0, 130.0]],     [[131.0, 132.0]],     [[133.0, 134.0]],     [[135.0, 136.0]],     [[137.0, 138.0]],     [[139.0, 140.0]]],    [[[141.0, 142.0]],     [[143.0, 144.0]],     [[145.0, 146.0]],     [[147.0, 148.0]],     [[149.0, 150.0]],     [[151.0, 152.0]],     [[153.0, 154.0]]],    [[[155.0, 156.0]],     [[157.0, 158.0]],     [[159.0, 160.0]],     [[161.0, 162.0]],     [[163.0, 164.0]],     [[165.0, 166.0]],     [[167.0, 168.0]]]]],  [[[[[169.0, 170.0]],     [[171.0, 172.0]],     [[173.0, 174.0]],     [[175.0, 176.0]],     [[177.0, 178.0]],     [[179.0, 180.0]],     [[181.0, 182.0]]],    [[[183.0, 184.0]],     [[185.0, 186.0]],     [[187.0, 188.0]],     [[189.0, 190.0]],     [[191.0, 192.0]],     [[193.0, 194.0]],     [[195.0, 196.0]]],    [[[197.0, 198.0]],     [[199.0, 200.0]],     [[201.0, 202.0]],     [[203.0, 204.0]],     [[205.0, 206.0]],     [[207.0, 208.0]],     [[209.0, 210.0]]],    [[[211.0, 212.0]],     [[213.0, 214.0]],     [[215.0, 216.0]],     [[217.0, 218.0]],     [[219.0, 220.0]],     [[221.0, 222.0]],     [[223.0, 224.0]]]],   [[[[225.0, 226.0]],     [[227.0, 228.0]],     [[229.0, 230.0]],     [[231.0, 232.0]],     [[233.0, 234.0]],     [[235.0, 236.0]],     [[237.0, 238.0]]],    [[[239.0, 240.0]],     [[241.0, 242.0]],     [[243.0, 244.0]],     [[245.0, 246.0]],     [[247.0, 248.0]],     [[249.0, 250.0]],     [[251.0, 252.0]]],    [[[253.0, 254.0]],     [[255.0, 256.0]],     [[257.0, 258.0]],     [[259.0, 260.0]],     [[261.0, 262.0]],     [[263.0, 264.0]],     [[265.0, 266.0]]],    [[[267.0, 268.0]],     [[269.0, 270.0]],     [[271.0, 272.0]],     [[273.0, 274.0]],     [[275.0, 276.0]],     [[277.0, 278.0]],     [[279.0, 280.0]]]],   [[[[281.0, 282.0]],     [[283.0, 284.0]],     [[285.0, 286.0]],     [[287.0, 288.0]],     [[289.0, 290.0]],     [[291.0, 292.0]],     [[293.0, 294.0]]],    [[[295.0, 296.0]],     [[297.0, 298.0]],     [[299.0, 300.0]],     [[301.0, 302.0]],     [[303.0, 304.0]],     [[305.0, 306.0]],     [[307.0, 308.0]]],    [[[309.0, 310.0]],     [[311.0, 312.0]],     [[313.0, 314.0]],     [[315.0, 316.0]],     [[317.0, 318.0]],     [[319.0, 320.0]],     [[321.0, 322.0]]],    [[[323.0, 324.0]],     [[325.0, 326.0]],     [[327.0, 328.0]],     [[329.0, 330.0]],     [[331.0, 332.0]],     [[333.0, 334.0]],     [[335.0, 336.0]]]]]] shape=[2, 3, 4, 7, 1, 2], strides=[168, 56, 14, 2, 2, 1], layout=C (0x1)), I32([1, 3, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[1, 1],  [1, 1],  [1, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 3949851176 3199064296 137693726 2236308343 # shrinks to (ref b, ref bs, ref c) = (F32([[[[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]],   [[[1.0, 2.0, 3.0],     [4.0, 5.0, 6.0],     [7.0, 8.0, 9.0],     [10.0, 11.0, 12.0]],    [[13.0, 14.0, 15.0],     [16.0, 17.0, 18.0],     [19.0, 20.0, 21.0],     [22.0, 23.0, 24.0]],    [[25.0, 26.0, 27.0],     [28.0, 29.0, 30.0],     [31.0, 32.0, 33.0],     [34.0, 35.0, 36.0]],    [[37.0, 38.0, 39.0],     [40.0, 41.0, 42.0],     [43.0, 44.0, 45.0],     [46.0, 47.0, 48.0]],    [[49.0, 50.0, 51.0],     [52.0, 53.0, 54.0],     [55.0, 56.0, 57.0],     [58.0, 59.0, 60.0]],    [[61.0, 62.0, 63.0],     [64.0, 65.0, 66.0],     [67.0, 68.0, 69.0],     [70.0, 71.0, 72.0]]],   [[[73.0, 74.0, 75.0],     [76.0, 77.0, 78.0],     [79.0, 80.0, 81.0],     [82.0, 83.0, 84.0]],    [[85.0, 86.0, 87.0],     [88.0, 89.0, 90.0],     [91.0, 92.0, 93.0],     [94.0, 95.0, 96.0]],    [[97.0, 98.0, 99.0],     [100.0, 101.0, 102.0],     [103.0, 104.0, 105.0],     [106.0, 107.0, 108.0]],    [[109.0, 110.0, 111.0],     [112.0, 113.0, 114.0],     [115.0, 116.0, 117.0],     [118.0, 119.0, 120.0]],    [[121.0, 122.0, 123.0],     [124.0, 125.0, 126.0],     [127.0, 128.0, 129.0],     [130.0, 131.0, 132.0]],    [[133.0, 134.0, 135.0],     [136.0, 137.0, 138.0],     [139.0, 140.0, 141.0],     [142.0, 143.0, 144.0]]],   [[[145.0, 146.0, 147.0],     [148.0, 149.0, 150.0],     [151.0, 152.0, 153.0],     [154.0, 155.0, 156.0]],    [[157.0, 158.0, 159.0],     [160.0, 161.0, 162.0],     [163.0, 164.0, 165.0],     [166.0, 167.0, 168.0]],    [[169.0, 170.0, 171.0],     [172.0, 173.0, 174.0],     [175.0, 176.0, 177.0],     [178.0, 179.0, 180.0]],    [[181.0, 182.0, 183.0],     [184.0, 185.0, 186.0],     [187.0, 188.0, 189.0],     [190.0, 191.0, 192.0]],    [[193.0, 194.0, 195.0],     [196.0, 197.0, 198.0],     [199.0, 200.0, 201.0],     [202.0, 203.0, 204.0]],    [[205.0, 206.0, 207.0],     [208.0, 209.0, 210.0],     [211.0, 212.0, 213.0],     [214.0, 215.0, 216.0]]],   [[[217.0, 218.0, 219.0],     [220.0, 221.0, 222.0],     [223.0, 224.0, 225.0],     [226.0, 227.0, 228.0]],    [[229.0, 230.0, 231.0],     [232.0, 233.0, 234.0],     [235.0, 236.0, 237.0],     [238.0, 239.0, 240.0]],    [[241.0, 242.0, 243.0],     [244.0, 245.0, 246.0],     [247.0, 248.0, 249.0],     [250.0, 251.0, 252.0]],    [[253.0, 254.0, 255.0],     [256.0, 257.0, 258.0],     [259.0, 260.0, 261.0],     [262.0, 263.0, 264.0]],    [[265.0, 266.0, 267.0],     [268.0, 269.0, 270.0],     [271.0, 272.0, 273.0],     [274.0, 275.0, 276.0]],    [[277.0, 278.0, 279.0],     [280.0, 281.0, 282.0],     [283.0, 284.0, 285.0],     [286.0, 287.0, 288.0]]],   [[[289.0, 290.0, 291.0],     [292.0, 293.0, 294.0],     [295.0, 296.0, 297.0],     [298.0, 299.0, 300.0]],    [[301.0, 302.0, 303.0],     [304.0, 305.0, 306.0],     [307.0, 308.0, 309.0],     [310.0, 311.0, 312.0]],    [[313.0, 314.0, 315.0],     [316.0, 317.0, 318.0],     [319.0, 320.0, 321.0],     [322.0, 323.0, 324.0]],    [[325.0, 326.0, 327.0],     [328.0, 329.0, 330.0],     [331.0, 332.0, 333.0],     [334.0, 335.0, 336.0]],    [[337.0, 338.0, 339.0],     [340.0, 341.0, 342.0],     [343.0, 344.0, 345.0],     [346.0, 347.0, 348.0]],    [[349.0, 350.0, 351.0],     [352.0, 353.0, 354.0],     [355.0, 356.0, 357.0],     [358.0, 359.0, 360.0]]],   [[[361.0, 362.0, 363.0],     [364.0, 365.0, 366.0],     [367.0, 368.0, 369.0],     [370.0, 371.0, 372.0]],    [[373.0, 374.0, 375.0],     [376.0, 377.0, 378.0],     [379.0, 380.0, 381.0],     [382.0, 383.0, 384.0]],    [[385.0, 386.0, 387.0],     [388.0, 389.0, 390.0],     [391.0, 392.0, 393.0],     [394.0, 395.0, 396.0]],    [[397.0, 398.0, 399.0],     [400.0, 401.0, 402.0],     [403.0, 404.0, 405.0],     [406.0, 407.0, 408.0]],    [[409.0, 410.0, 411.0],     [412.0, 413.0, 414.0],     [415.0, 416.0, 417.0],     [418.0, 419.0, 420.0]],    [[421.0, 422.0, 423.0],     [424.0, 425.0, 426.0],     [427.0, 428.0, 429.0],     [430.0, 431.0, 432.0]]],   [[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]]],  [[[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]],   [[[433.0, 434.0, 435.0],     [436.0, 437.0, 438.0],     [439.0, 440.0, 441.0],     [442.0, 443.0, 444.0]],    [[445.0, 446.0, 447.0],     [448.0, 449.0, 450.0],     [451.0, 452.0, 453.0],     [454.0, 455.0, 456.0]],    [[457.0, 458.0, 459.0],     [460.0, 461.0, 462.0],     [463.0, 464.0, 465.0],     [466.0, 467.0, 468.0]],    [[469.0, 470.0, 471.0],     [472.0, 473.0, 474.0],     [475.0, 476.0, 477.0],     [478.0, 479.0, 480.0]],    [[481.0, 482.0, 483.0],     [484.0, 485.0, 486.0],     [487.0, 488.0, 489.0],     [490.0, 491.0, 492.0]],    [[493.0, 494.0, 495.0],     [496.0, 497.0, 498.0],     [499.0, 500.0, 501.0],     [502.0, 503.0, 504.0]]],   [[[505.0, 506.0, 507.0],     [508.0, 509.0, 510.0],     [511.0, 512.0, 513.0],     [514.0, 515.0, 516.0]],    [[517.0, 518.0, 519.0],     [520.0, 521.0, 522.0],     [523.0, 524.0, 525.0],     [526.0, 527.0, 528.0]],    [[529.0, 530.0, 531.0],     [532.0, 533.0, 534.0],     [535.0, 536.0, 537.0],     [538.0, 539.0, 540.0]],    [[541.0, 542.0, 543.0],     [544.0, 545.0, 546.0],     [547.0, 548.0, 549.0],     [550.0, 551.0, 552.0]],    [[553.0, 554.0, 555.0],     [556.0, 557.0, 558.0],     [559.0, 560.0, 561.0],     [562.0, 563.0, 564.0]],    [[565.0, 566.0, 567.0],     [568.0, 569.0, 570.0],     [571.0, 572.0, 573.0],     [574.0, 575.0, 576.0]]],   [[[577.0, 578.0, 579.0],     [580.0, 581.0, 582.0],     [583.0, 584.0, 585.0],     [586.0, 587.0, 588.0]],    [[589.0, 590.0, 591.0],     [592.0, 593.0, 594.0],     [595.0, 596.0, 597.0],     [598.0, 599.0, 600.0]],    [[601.0, 602.0, 603.0],     [604.0, 605.0, 606.0],     [607.0, 608.0, 609.0],     [610.0, 611.0, 612.0]],    [[613.0, 614.0, 615.0],     [616.0, 617.0, 618.0],     [619.0, 620.0, 621.0],     [622.0, 623.0, 624.0]],    [[625.0, 626.0, 627.0],     [628.0, 629.0, 630.0],     [631.0, 632.0, 633.0],     [634.0, 635.0, 636.0]],    [[637.0, 638.0, 639.0],     [640.0, 641.0, 642.0],     [643.0, 644.0, 645.0],     [646.0, 647.0, 648.0]]],   [[[649.0, 650.0, 651.0],     [652.0, 653.0, 654.0],     [655.0, 656.0, 657.0],     [658.0, 659.0, 660.0]],    [[661.0, 662.0, 663.0],     [664.0, 665.0, 666.0],     [667.0, 668.0, 669.0],     [670.0, 671.0, 672.0]],    [[673.0, 674.0, 675.0],     [676.0, 677.0, 678.0],     [679.0, 680.0, 681.0],     [682.0, 683.0, 684.0]],    [[685.0, 686.0, 687.0],     [688.0, 689.0, 690.0],     [691.0, 692.0, 693.0],     [694.0, 695.0, 696.0]],    [[697.0, 698.0, 699.0],     [700.0, 701.0, 702.0],     [703.0, 704.0, 705.0],     [706.0, 707.0, 708.0]],    [[709.0, 710.0, 711.0],     [712.0, 713.0, 714.0],     [715.0, 716.0, 717.0],     [718.0, 719.0, 720.0]]],   [[[721.0, 722.0, 723.0],     [724.0, 725.0, 726.0],     [727.0, 728.0, 729.0],     [730.0, 731.0, 732.0]],    [[733.0, 734.0, 735.0],     [736.0, 737.0, 738.0],     [739.0, 740.0, 741.0],     [742.0, 743.0, 744.0]],    [[745.0, 746.0, 747.0],     [748.0, 749.0, 750.0],     [751.0, 752.0, 753.0],     [754.0, 755.0, 756.0]],    [[757.0, 758.0, 759.0],     [760.0, 761.0, 762.0],     [763.0, 764.0, 765.0],     [766.0, 767.0, 768.0]],    [[769.0, 770.0, 771.0],     [772.0, 773.0, 774.0],     [775.0, 776.0, 777.0],     [778.0, 779.0, 780.0]],    [[781.0, 782.0, 783.0],     [784.0, 785.0, 786.0],     [787.0, 788.0, 789.0],     [790.0, 791.0, 792.0]]],   [[[793.0, 794.0, 795.0],     [796.0, 797.0, 798.0],     [799.0, 800.0, 801.0],     [802.0, 803.0, 804.0]],    [[805.0, 806.0, 807.0],     [808.0, 809.0, 810.0],     [811.0, 812.0, 813.0],     [814.0, 815.0, 816.0]],    [[817.0, 818.0, 819.0],     [820.0, 821.0, 822.0],     [823.0, 824.0, 825.0],     [826.0, 827.0, 828.0]],    [[829.0, 830.0, 831.0],     [832.0, 833.0, 834.0],     [835.0, 836.0, 837.0],     [838.0, 839.0, 840.0]],    [[841.0, 842.0, 843.0],     [844.0, 845.0, 846.0],     [847.0, 848.0, 849.0],     [850.0, 851.0, 852.0]],    [[853.0, 854.0, 855.0],     [856.0, 857.0, 858.0],     [859.0, 860.0, 861.0],     [862.0, 863.0, 864.0]]],   [[[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0],     [0.0, 0.0, 0.0]]]]] shape=[2, 10, 6, 4, 3], strides=[720, 72, 12, 3, 1], layout=C (0x1)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), I32([[3, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 1528214438 2204478873 970992606 2116816964 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0],       [3.0, 4.0],       [5.0, 6.0]],      [[7.0, 8.0],       [9.0, 10.0],       [11.0, 12.0]]],     [[[13.0, 14.0],       [15.0, 16.0],       [17.0, 18.0]],      [[19.0, 20.0],       [21.0, 22.0],       [23.0, 24.0]]],     [[[25.0, 26.0],       [27.0, 28.0],       [29.0, 30.0]],      [[31.0, 32.0],       [33.0, 34.0],       [35.0, 36.0]]],     [[[37.0, 38.0],       [39.0, 40.0],       [41.0, 42.0]],      [[43.0, 44.0],       [45.0, 46.0],       [47.0, 48.0]]],     [[[49.0, 50.0],       [51.0, 52.0],       [53.0, 54.0]],      [[55.0, 56.0],       [57.0, 58.0],       [59.0, 60.0]]],     [[[61.0, 62.0],       [63.0, 64.0],       [65.0, 66.0]],      [[67.0, 68.0],       [69.0, 70.0],       [71.0, 72.0]]]],    [[[[73.0, 74.0],       [75.0, 76.0],       [77.0, 78.0]],      [[79.0, 80.0],       [81.0, 82.0],       [83.0, 84.0]]],     [[[85.0, 86.0],       [87.0, 88.0],       [89.0, 90.0]],      [[91.0, 92.0],       [93.0, 94.0],       [95.0, 96.0]]],     [[[97.0, 98.0],       [99.0, 100.0],       [101.0, 102.0]],      [[103.0, 104.0],       [105.0, 106.0],       [107.0, 108.0]]],     [[[109.0, 110.0],       [111.0, 112.0],       [113.0, 114.0]],      [[115.0, 116.0],       [117.0, 118.0],       [119.0, 120.0]]],     [[[121.0, 122.0],       [123.0, 124.0],       [125.0, 126.0]],      [[127.0, 128.0],       [129.0, 130.0],       [131.0, 132.0]]],     [[[133.0, 134.0],       [135.0, 136.0],       [137.0, 138.0]],      [[139.0, 140.0],       [141.0, 142.0],       [143.0, 144.0]]]],    [[[[145.0, 146.0],       [147.0, 148.0],       [149.0, 150.0]],      [[151.0, 152.0],       [153.0, 154.0],       [155.0, 156.0]]],     [[[157.0, 158.0],       [159.0, 160.0],       [161.0, 162.0]],      [[163.0, 164.0],       [165.0, 166.0],       [167.0, 168.0]]],     [[[169.0, 170.0],       [171.0, 172.0],       [173.0, 174.0]],      [[175.0, 176.0],       [177.0, 178.0],       [179.0, 180.0]]],     [[[181.0, 182.0],       [183.0, 184.0],       [185.0, 186.0]],      [[187.0, 188.0],       [189.0, 190.0],       [191.0, 192.0]]],     [[[193.0, 194.0],       [195.0, 196.0],       [197.0, 198.0]],      [[199.0, 200.0],       [201.0, 202.0],       [203.0, 204.0]]],     [[[205.0, 206.0],       [207.0, 208.0],       [209.0, 210.0]],      [[211.0, 212.0],       [213.0, 214.0],       [215.0, 216.0]]]]],   [[[[[217.0, 218.0],       [219.0, 220.0],       [221.0, 222.0]],      [[223.0, 224.0],       [225.0, 226.0],       [227.0, 228.0]]],     [[[229.0, 230.0],       [231.0, 232.0],       [233.0, 234.0]],      [[235.0, 236.0],       [237.0, 238.0],       [239.0, 240.0]]],     [[[241.0, 242.0],       [243.0, 244.0],       [245.0, 246.0]],      [[247.0, 248.0],       [249.0, 250.0],       [251.0, 252.0]]],     [[[253.0, 254.0],       [255.0, 256.0],       [257.0, 258.0]],      [[259.0, 260.0],       [261.0, 262.0],       [263.0, 264.0]]],     [[[265.0, 266.0],       [267.0, 268.0],       [269.0, 270.0]],      [[271.0, 272.0],       [273.0, 274.0],       [275.0, 276.0]]],     [[[277.0, 278.0],       [279.0, 280.0],       [281.0, 282.0]],      [[283.0, 284.0],       [285.0, 286.0],       [287.0, 288.0]]]],    [[[[289.0, 290.0],       [291.0, 292.0],       [293.0, 294.0]],      [[295.0, 296.0],       [297.0, 298.0],       [299.0, 300.0]]],     [[[301.0, 302.0],       [303.0, 304.0],       [305.0, 306.0]],      [[307.0, 308.0],       [309.0, 310.0],       [311.0, 312.0]]],     [[[313.0, 314.0],       [315.0, 316.0],       [317.0, 318.0]],      [[319.0, 320.0],       [321.0, 322.0],       [323.0, 324.0]]],     [[[325.0, 326.0],       [327.0, 328.0],       [329.0, 330.0]],      [[331.0, 332.0],       [333.0, 334.0],       [335.0, 336.0]]],     [[[337.0, 338.0],       [339.0, 340.0],       [341.0, 342.0]],      [[343.0, 344.0],       [345.0, 346.0],       [347.0, 348.0]]],     [[[349.0, 350.0],       [351.0, 352.0],       [353.0, 354.0]],      [[355.0, 356.0],       [357.0, 358.0],       [359.0, 360.0]]]],    [[[[361.0, 362.0],       [363.0, 364.0],       [365.0, 366.0]],      [[367.0, 368.0],       [369.0, 370.0],       [371.0, 372.0]]],     [[[373.0, 374.0],       [375.0, 376.0],       [377.0, 378.0]],      [[379.0, 380.0],       [381.0, 382.0],       [383.0, 384.0]]],     [[[385.0, 386.0],       [387.0, 388.0],       [389.0, 390.0]],      [[391.0, 392.0],       [393.0, 394.0],       [395.0, 396.0]]],     [[[397.0, 398.0],       [399.0, 400.0],       [401.0, 402.0]],      [[403.0, 404.0],       [405.0, 406.0],       [407.0, 408.0]]],     [[[409.0, 410.0],       [411.0, 412.0],       [413.0, 414.0]],      [[415.0, 416.0],       [417.0, 418.0],       [419.0, 420.0]]],     [[[421.0, 422.0],       [423.0, 424.0],       [425.0, 426.0]],      [[427.0, 428.0],       [429.0, 430.0],       [431.0, 432.0]]]]]],  [[[[[[433.0, 434.0],       [435.0, 436.0],       [437.0, 438.0]],      [[439.0, 440.0],       [441.0, 442.0],       [443.0, 444.0]]],     [[[445.0, 446.0],       [447.0, 448.0],       [449.0, 450.0]],      [[451.0, 452.0],       [453.0, 454.0],       [455.0, 456.0]]],     [[[457.0, 458.0],       [459.0, 460.0],       [461.0, 462.0]],      [[463.0, 464.0],       [465.0, 466.0],       [467.0, 468.0]]],     [[[469.0, 470.0],       [471.0, 472.0],       [473.0, 474.0]],      [[475.0, 476.0],       [477.0, 478.0],       [479.0, 480.0]]],     [[[481.0, 482.0],       [483.0, 484.0],       [485.0, 486.0]],      [[487.0, 488.0],       [489.0, 490.0],       [491.0, 492.0]]],     [[[493.0, 494.0],       [495.0, 496.0],       [497.0, 498.0]],      [[499.0, 500.0],       [501.0, 502.0],       [503.0, 504.0]]]],    [[[[505.0, 506.0],       [507.0, 508.0],       [509.0, 510.0]],      [[511.0, 512.0],       [513.0, 514.0],       [515.0, 516.0]]],     [[[517.0, 518.0],       [519.0, 520.0],       [521.0, 522.0]],      [[523.0, 524.0],       [525.0, 526.0],       [527.0, 528.0]]],     [[[529.0, 530.0],       [531.0, 532.0],       [533.0, 534.0]],      [[535.0, 536.0],       [537.0, 538.0],       [539.0, 540.0]]],     [[[541.0, 542.0],       [543.0, 544.0],       [545.0, 546.0]],      [[547.0, 548.0],       [549.0, 550.0],       [551.0, 552.0]]],     [[[553.0, 554.0],       [555.0, 556.0],       [557.0, 558.0]],      [[559.0, 560.0],       [561.0, 562.0],       [563.0, 564.0]]],     [[[565.0, 566.0],       [567.0, 568.0],       [569.0, 570.0]],      [[571.0, 572.0],       [573.0, 574.0],       [575.0, 576.0]]]],    [[[[577.0, 578.0],       [579.0, 580.0],       [581.0, 582.0]],      [[583.0, 584.0],       [585.0, 586.0],       [587.0, 588.0]]],     [[[589.0, 590.0],       [591.0, 592.0],       [593.0, 594.0]],      [[595.0, 596.0],       [597.0, 598.0],       [599.0, 600.0]]],     [[[601.0, 602.0],       [603.0, 604.0],       [605.0, 606.0]],      [[607.0, 608.0],       [609.0, 610.0],       [611.0, 612.0]]],     [[[613.0, 614.0],       [615.0, 616.0],       [617.0, 618.0]],      [[619.0, 620.0],       [621.0, 622.0],       [623.0, 624.0]]],     [[[625.0, 626.0],       [627.0, 628.0],       [629.0, 630.0]],      [[631.0, 632.0],       [633.0, 634.0],       [635.0, 636.0]]],     [[[637.0, 638.0],       [639.0, 640.0],       [641.0, 642.0]],      [[643.0, 644.0],       [645.0, 646.0],       [647.0, 648.0]]]]],   [[[[[649.0, 650.0],       [651.0, 652.0],       [653.0, 654.0]],      [[655.0, 656.0],       [657.0, 658.0],       [659.0, 660.0]]],     [[[661.0, 662.0],       [663.0, 664.0],       [665.0, 666.0]],      [[667.0, 668.0],       [669.0, 670.0],       [671.0, 672.0]]],     [[[673.0, 674.0],       [675.0, 676.0],       [677.0, 678.0]],      [[679.0, 680.0],       [681.0, 682.0],       [683.0, 684.0]]],     [[[685.0, 686.0],       [687.0, 688.0],       [689.0, 690.0]],      [[691.0, 692.0],       [693.0, 694.0],       [695.0, 696.0]]],     [[[697.0, 698.0],       [699.0, 700.0],       [701.0, 702.0]],      [[703.0, 704.0],       [705.0, 706.0],       [707.0, 708.0]]],     [[[709.0, 710.0],       [711.0, 712.0],       [713.0, 714.0]],      [[715.0, 716.0],       [717.0, 718.0],       [719.0, 720.0]]]],    [[[[721.0, 722.0],       [723.0, 724.0],       [725.0, 726.0]],      [[727.0, 728.0],       [729.0, 730.0],       [731.0, 732.0]]],     [[[733.0, 734.0],       [735.0, 736.0],       [737.0, 738.0]],      [[739.0, 740.0],       [741.0, 742.0],       [743.0, 744.0]]],     [[[745.0, 746.0],       [747.0, 748.0],       [749.0, 750.0]],      [[751.0, 752.0],       [753.0, 754.0],       [755.0, 756.0]]],     [[[757.0, 758.0],       [759.0, 760.0],       [761.0, 762.0]],      [[763.0, 764.0],       [765.0, 766.0],       [767.0, 768.0]]],     [[[769.0, 770.0],       [771.0, 772.0],       [773.0, 774.0]],      [[775.0, 776.0],       [777.0, 778.0],       [779.0, 780.0]]],     [[[781.0, 782.0],       [783.0, 784.0],       [785.0, 786.0]],      [[787.0, 788.0],       [789.0, 790.0],       [791.0, 792.0]]]],    [[[[793.0, 794.0],       [795.0, 796.0],       [797.0, 798.0]],      [[799.0, 800.0],       [801.0, 802.0],       [803.0, 804.0]]],     [[[805.0, 806.0],       [807.0, 808.0],       [809.0, 810.0]],      [[811.0, 812.0],       [813.0, 814.0],       [815.0, 816.0]]],     [[[817.0, 818.0],       [819.0, 820.0],       [821.0, 822.0]],      [[823.0, 824.0],       [825.0, 826.0],       [827.0, 828.0]]],     [[[829.0, 830.0],       [831.0, 832.0],       [833.0, 834.0]],      [[835.0, 836.0],       [837.0, 838.0],       [839.0, 840.0]]],     [[[841.0, 842.0],       [843.0, 844.0],       [845.0, 846.0]],      [[847.0, 848.0],       [849.0, 850.0],       [851.0, 852.0]]],     [[[853.0, 854.0],       [855.0, 856.0],       [857.0, 858.0]],      [[859.0, 860.0],       [861.0, 862.0],       [863.0, 864.0]]]]]],  [[[[[[865.0, 866.0],       [867.0, 868.0],       [869.0, 870.0]],      [[871.0, 872.0],       [873.0, 874.0],       [875.0, 876.0]]],     [[[877.0, 878.0],       [879.0, 880.0],       [881.0, 882.0]],      [[883.0, 884.0],       [885.0, 886.0],       [887.0, 888.0]]],     [[[889.0, 890.0],       [891.0, 892.0],       [893.0, 894.0]],      [[895.0, 896.0],       [897.0, 898.0],       [899.0, 900.0]]],     [[[901.0, 902.0],       [903.0, 904.0],       [905.0, 906.0]],      [[907.0, 908.0],       [909.0, 910.0],       [911.0, 912.0]]],     [[[913.0, 914.0],       [915.0, 916.0],       [917.0, 918.0]],      [[919.0, 920.0],       [921.0, 922.0],       [923.0, 924.0]]],     [[[925.0, 926.0],       [927.0, 928.0],       [929.0, 930.0]],      [[931.0, 932.0],       [933.0, 934.0],       [935.0, 936.0]]]],    [[[[937.0, 938.0],       [939.0, 940.0],       [941.0, 942.0]],      [[943.0, 944.0],       [945.0, 946.0],       [947.0, 948.0]]],     [[[949.0, 950.0],       [951.0, 952.0],       [953.0, 954.0]],      [[955.0, 956.0],       [957.0, 958.0],       [959.0, 960.0]]],     [[[961.0, 962.0],       [963.0, 964.0],       [965.0, 966.0]],      [[967.0, 968.0],       [969.0, 970.0],       [971.0, 972.0]]],     [[[973.0, 974.0],       [975.0, 976.0],       [977.0, 978.0]],      [[979.0, 980.0],       [981.0, 982.0],       [983.0, 984.0]]],     [[[985.0, 986.0],       [987.0, 988.0],       [989.0, 990.0]],      [[991.0, 992.0],       [993.0, 994.0],       [995.0, 996.0]]],     [[[997.0, 998.0],       [999.0, 1000.0],       [1001.0, 1002.0]],      [[1003.0, 1004.0],       [1005.0, 1006.0],       [1007.0, 1008.0]]]],    [[[[1009.0, 1010.0],       [1011.0, 1012.0],       [1013.0, 1014.0]],      [[1015.0, 1016.0],       [1017.0, 1018.0],       [1019.0, 1020.0]]],     [[[1021.0, 1022.0],       [1023.0, 1024.0],       [1025.0, 1026.0]],      [[1027.0, 1028.0],       [1029.0, 1030.0],       [1031.0, 1032.0]]],     [[[1033.0, 1034.0],       [1035.0, 1036.0],       [1037.0, 1038.0]],      [[1039.0, 1040.0],       [1041.0, 1042.0],       [1043.0, 1044.0]]],     [[[1045.0, 1046.0],       [1047.0, 1048.0],       [1049.0, 1050.0]],      [[1051.0, 1052.0],       [1053.0, 1054.0],       [1055.0, 1056.0]]],     [[[1057.0, 1058.0],       [1059.0, 1060.0],       [1061.0, 1062.0]],      [[1063.0, 1064.0],       [1065.0, 1066.0],       [1067.0, 1068.0]]],     [[[1069.0, 1070.0],       [1071.0, 1072.0],       [1073.0, 1074.0]],      [[1075.0, 1076.0],       [1077.0, 1078.0],       [1079.0, 1080.0]]]]],   [[[[[1081.0, 1082.0],       [1083.0, 1084.0],       [1085.0, 1086.0]],      [[1087.0, 1088.0],       [1089.0, 1090.0],       [1091.0, 1092.0]]],     [[[1093.0, 1094.0],       [1095.0, 1096.0],       [1097.0, 1098.0]],      [[1099.0, 1100.0],       [1101.0, 1102.0],       [1103.0, 1104.0]]],     [[[1105.0, 1106.0],       [1107.0, 1108.0],       [1109.0, 1110.0]],      [[1111.0, 1112.0],       [1113.0, 1114.0],       [1115.0, 1116.0]]],     [[[1117.0, 1118.0],       [1119.0, 1120.0],       [1121.0, 1122.0]],      [[1123.0, 1124.0],       [1125.0, 1126.0],       [1127.0, 1128.0]]],     [[[1129.0, 1130.0],       [1131.0, 1132.0],       [1133.0, 1134.0]],      [[1135.0, 1136.0],       [1137.0, 1138.0],       [1139.0, 1140.0]]],     [[[1141.0, 1142.0],       [1143.0, 1144.0],       [1145.0, 1146.0]],      [[1147.0, 1148.0],       [1149.0, 1150.0],       [1151.0, 1152.0]]]],    [[[[1153.0, 1154.0],       [1155.0, 1156.0],       [1157.0, 1158.0]],      [[1159.0, 1160.0],       [1161.0, 1162.0],       [1163.0, 1164.0]]],     [[[1165.0, 1166.0],       [1167.0, 1168.0],       [1169.0, 1170.0]],      [[1171.0, 1172.0],       [1173.0, 1174.0],       [1175.0, 1176.0]]],     [[[1177.0, 1178.0],       [1179.0, 1180.0],       [1181.0, 1182.0]],      [[1183.0, 1184.0],       [1185.0, 1186.0],       [1187.0, 1188.0]]],     [[[1189.0, 1190.0],       [1191.0, 1192.0],       [1193.0, 1194.0]],      [[1195.0, 1196.0],       [1197.0, 1198.0],       [1199.0, 1200.0]]],     [[[1201.0, 1202.0],       [1203.0, 1204.0],       [1205.0, 1206.0]],      [[1207.0, 1208.0],       [1209.0, 1210.0],       [1211.0, 1212.0]]],     [[[1213.0, 1214.0],       [1215.0, 1216.0],       [1217.0, 1218.0]],      [[1219.0, 1220.0],       [1221.0, 1222.0],       [1223.0, 1224.0]]]],    [[[[1225.0, 1226.0],       [1227.0, 1228.0],       [1229.0, 1230.0]],      [[1231.0, 1232.0],       [1233.0, 1234.0],       [1235.0, 1236.0]]],     [[[1237.0, 1238.0],       [1239.0, 1240.0],       [1241.0, 1242.0]],      [[1243.0, 1244.0],       [1245.0, 1246.0],       [1247.0, 1248.0]]],     [[[1249.0, 1250.0],       [1251.0, 1252.0],       [1253.0, 1254.0]],      [[1255.0, 1256.0],       [1257.0, 1258.0],       [1259.0, 1260.0]]],     [[[1261.0, 1262.0],       [1263.0, 1264.0],       [1265.0, 1266.0]],      [[1267.0, 1268.0],       [1269.0, 1270.0],       [1271.0, 1272.0]]],     [[[1273.0, 1274.0],       [1275.0, 1276.0],       [1277.0, 1278.0]],      [[1279.0, 1280.0],       [1281.0, 1282.0],       [1283.0, 1284.0]]],     [[[1285.0, 1286.0],       [1287.0, 1288.0],       [1289.0, 1290.0]],      [[1291.0, 1292.0],       [1293.0, 1294.0],       [1295.0, 1296.0]]]]]]] shape=[3, 2, 3, 6, 2, 3, 2], strides=[432, 216, 72, 12, 6, 2, 1], layout=C (0x1)), I32([1, 2, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [3, 2],  [2, 2]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 3421406824 10690064 856202962 1554995406 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0],      [4.0, 5.0, 6.0],      [7.0, 8.0, 9.0],      [10.0, 11.0, 12.0],      [13.0, 14.0, 15.0],      [16.0, 17.0, 18.0],      [19.0, 20.0, 21.0]],     [[22.0, 23.0, 24.0],      [25.0, 26.0, 27.0],      [28.0, 29.0, 30.0],      [31.0, 32.0, 33.0],      [34.0, 35.0, 36.0],      [37.0, 38.0, 39.0],      [40.0, 41.0, 42.0]],     [[43.0, 44.0, 45.0],      [46.0, 47.0, 48.0],      [49.0, 50.0, 51.0],      [52.0, 53.0, 54.0],      [55.0, 56.0, 57.0],      [58.0, 59.0, 60.0],      [61.0, 62.0, 63.0]]],    [[[64.0, 65.0, 66.0],      [67.0, 68.0, 69.0],      [70.0, 71.0, 72.0],      [73.0, 74.0, 75.0],      [76.0, 77.0, 78.0],      [79.0, 80.0, 81.0],      [82.0, 83.0, 84.0]],     [[85.0, 86.0, 87.0],      [88.0, 89.0, 90.0],      [91.0, 92.0, 93.0],      [94.0, 95.0, 96.0],      [97.0, 98.0, 99.0],      [100.0, 101.0, 102.0],      [103.0, 104.0, 105.0]],     [[106.0, 107.0, 108.0],      [109.0, 110.0, 111.0],      [112.0, 113.0, 114.0],      [115.0, 116.0, 117.0],      [118.0, 119.0, 120.0],      [121.0, 122.0, 123.0],      [124.0, 125.0, 126.0]]],    [[[127.0, 128.0, 129.0],      [130.0, 131.0, 132.0],      [133.0, 134.0, 135.0],      [136.0, 137.0, 138.0],      [139.0, 140.0, 141.0],      [142.0, 143.0, 144.0],      [145.0, 146.0, 147.0]],     [[148.0, 149.0, 150.0],      [151.0, 152.0, 153.0],      [154.0, 155.0, 156.0],      [157.0, 158.0, 159.0],      [160.0, 161.0, 162.0],      [163.0, 164.0, 165.0],      [166.0, 167.0, 168.0]],     [[169.0, 170.0, 171.0],      [172.0, 173.0, 174.0],      [175.0, 176.0, 177.0],      [178.0, 179.0, 180.0],      [181.0, 182.0, 183.0],      [184.0, 185.0, 186.0],      [187.0, 188.0, 189.0]]],    [[[190.0, 191.0, 192.0],      [193.0, 194.0, 195.0],      [196.0, 197.0, 198.0],      [199.0, 200.0, 201.0],      [202.0, 203.0, 204.0],      [205.0, 206.0, 207.0],      [208.0, 209.0, 210.0]],     [[211.0, 212.0, 213.0],      [214.0, 215.0, 216.0],      [217.0, 218.0, 219.0],      [220.0, 221.0, 222.0],      [223.0, 224.0, 225.0],      [226.0, 227.0, 228.0],      [229.0, 230.0, 231.0]],     [[232.0, 233.0, 234.0],      [235.0, 236.0, 237.0],      [238.0, 239.0, 240.0],      [241.0, 242.0, 243.0],      [244.0, 245.0, 246.0],      [247.0, 248.0, 249.0],      [250.0, 251.0, 252.0]]],    [[[253.0, 254.0, 255.0],      [256.0, 257.0, 258.0],      [259.0, 260.0, 261.0],      [262.0, 263.0, 264.0],      [265.0, 266.0, 267.0],      [268.0, 269.0, 270.0],      [271.0, 272.0, 273.0]],     [[274.0, 275.0, 276.0],      [277.0, 278.0, 279.0],      [280.0, 281.0, 282.0],      [283.0, 284.0, 285.0],      [286.0, 287.0, 288.0],      [289.0, 290.0, 291.0],      [292.0, 293.0, 294.0]],     [[295.0, 296.0, 297.0],      [298.0, 299.0, 300.0],      [301.0, 302.0, 303.0],      [304.0, 305.0, 306.0],      [307.0, 308.0, 309.0],      [310.0, 311.0, 312.0],      [313.0, 314.0, 315.0]]],    [[[316.0, 317.0, 318.0],      [319.0, 320.0, 321.0],      [322.0, 323.0, 324.0],      [325.0, 326.0, 327.0],      [328.0, 329.0, 330.0],      [331.0, 332.0, 333.0],      [334.0, 335.0, 336.0]],     [[337.0, 338.0, 339.0],      [340.0, 341.0, 342.0],      [343.0, 344.0, 345.0],      [346.0, 347.0, 348.0],      [349.0, 350.0, 351.0],      [352.0, 353.0, 354.0],      [355.0, 356.0, 357.0]],     [[358.0, 359.0, 360.0],      [361.0, 362.0, 363.0],      [364.0, 365.0, 366.0],      [367.0, 368.0, 369.0],      [370.0, 371.0, 372.0],      [373.0, 374.0, 375.0],      [376.0, 377.0, 378.0]]],    [[[379.0, 380.0, 381.0],      [382.0, 383.0, 384.0],      [385.0, 386.0, 387.0],      [388.0, 389.0, 390.0],      [391.0, 392.0, 393.0],      [394.0, 395.0, 396.0],      [397.0, 398.0, 399.0]],     [[400.0, 401.0, 402.0],      [403.0, 404.0, 405.0],      [406.0, 407.0, 408.0],      [409.0, 410.0, 411.0],      [412.0, 413.0, 414.0],      [415.0, 416.0, 417.0],      [418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0],      [424.0, 425.0, 426.0],      [427.0, 428.0, 429.0],      [430.0, 431.0, 432.0],      [433.0, 434.0, 435.0],      [436.0, 437.0, 438.0],      [439.0, 440.0, 441.0]]]],   [[[[442.0, 443.0, 444.0],      [445.0, 446.0, 447.0],      [448.0, 449.0, 450.0],      [451.0, 452.0, 453.0],      [454.0, 455.0, 456.0],      [457.0, 458.0, 459.0],      [460.0, 461.0, 462.0]],     [[463.0, 464.0, 465.0],      [466.0, 467.0, 468.0],      [469.0, 470.0, 471.0],      [472.0, 473.0, 474.0],      [475.0, 476.0, 477.0],      [478.0, 479.0, 480.0],      [481.0, 482.0, 483.0]],     [[484.0, 485.0, 486.0],      [487.0, 488.0, 489.0],      [490.0, 491.0, 492.0],      [493.0, 494.0, 495.0],      [496.0, 497.0, 498.0],      [499.0, 500.0, 501.0],      [502.0, 503.0, 504.0]]],    [[[505.0, 506.0, 507.0],      [508.0, 509.0, 510.0],      [511.0, 512.0, 513.0],      [514.0, 515.0, 516.0],      [517.0, 518.0, 519.0],      [520.0, 521.0, 522.0],      [523.0, 524.0, 525.0]],     [[526.0, 527.0, 528.0],      [529.0, 530.0, 531.0],      [532.0, 533.0, 534.0],      [535.0, 536.0, 537.0],      [538.0, 539.0, 540.0],      [541.0, 542.0, 543.0],      [544.0, 545.0, 546.0]],     [[547.0, 548.0, 549.0],      [550.0, 551.0, 552.0],      [553.0, 554.0, 555.0],      [556.0, 557.0, 558.0],      [559.0, 560.0, 561.0],      [562.0, 563.0, 564.0],      [565.0, 566.0, 567.0]]],    [[[568.0, 569.0, 570.0],      [571.0, 572.0, 573.0],      [574.0, 575.0, 576.0],      [577.0, 578.0, 579.0],      [580.0, 581.0, 582.0],      [583.0, 584.0, 585.0],      [586.0, 587.0, 588.0]],     [[589.0, 590.0, 591.0],      [592.0, 593.0, 594.0],      [595.0, 596.0, 597.0],      [598.0, 599.0, 600.0],      [601.0, 602.0, 603.0],      [604.0, 605.0, 606.0],      [607.0, 608.0, 609.0]],     [[610.0, 611.0, 612.0],      [613.0, 614.0, 615.0],      [616.0, 617.0, 618.0],      [619.0, 620.0, 621.0],      [622.0, 623.0, 624.0],      [625.0, 626.0, 627.0],      [628.0, 629.0, 630.0]]],    [[[631.0, 632.0, 633.0],      [634.0, 635.0, 636.0],      [637.0, 638.0, 639.0],      [640.0, 641.0, 642.0],      [643.0, 644.0, 645.0],      [646.0, 647.0, 648.0],      [649.0, 650.0, 651.0]],     [[652.0, 653.0, 654.0],      [655.0, 656.0, 657.0],      [658.0, 659.0, 660.0],      [661.0, 662.0, 663.0],      [664.0, 665.0, 666.0],      [667.0, 668.0, 669.0],      [670.0, 671.0, 672.0]],     [[673.0, 674.0, 675.0],      [676.0, 677.0, 678.0],      [679.0, 680.0, 681.0],      [682.0, 683.0, 684.0],      [685.0, 686.0, 687.0],      [688.0, 689.0, 690.0],      [691.0, 692.0, 693.0]]],    [[[694.0, 695.0, 696.0],      [697.0, 698.0, 699.0],      [700.0, 701.0, 702.0],      [703.0, 704.0, 705.0],      [706.0, 707.0, 708.0],      [709.0, 710.0, 711.0],      [712.0, 713.0, 714.0]],     [[715.0, 716.0, 717.0],      [718.0, 719.0, 720.0],      [721.0, 722.0, 723.0],      [724.0, 725.0, 726.0],      [727.0, 728.0, 729.0],      [730.0, 731.0, 732.0],      [733.0, 734.0, 735.0]],     [[736.0, 737.0, 738.0],      [739.0, 740.0, 741.0],      [742.0, 743.0, 744.0],      [745.0, 746.0, 747.0],      [748.0, 749.0, 750.0],      [751.0, 752.0, 753.0],      [754.0, 755.0, 756.0]]],    [[[757.0, 758.0, 759.0],      [760.0, 761.0, 762.0],      [763.0, 764.0, 765.0],      [766.0, 767.0, 768.0],      [769.0, 770.0, 771.0],      [772.0, 773.0, 774.0],      [775.0, 776.0, 777.0]],     [[778.0, 779.0, 780.0],      [781.0, 782.0, 783.0],      [784.0, 785.0, 786.0],      [787.0, 788.0, 789.0],      [790.0, 791.0, 792.0],      [793.0, 794.0, 795.0],      [796.0, 797.0, 798.0]],     [[799.0, 800.0, 801.0],      [802.0, 803.0, 804.0],      [805.0, 806.0, 807.0],      [808.0, 809.0, 810.0],      [811.0, 812.0, 813.0],      [814.0, 815.0, 816.0],      [817.0, 818.0, 819.0]]],    [[[820.0, 821.0, 822.0],      [823.0, 824.0, 825.0],      [826.0, 827.0, 828.0],      [829.0, 830.0, 831.0],      [832.0, 833.0, 834.0],      [835.0, 836.0, 837.0],      [838.0, 839.0, 840.0]],     [[841.0, 842.0, 843.0],      [844.0, 845.0, 846.0],      [847.0, 848.0, 849.0],      [850.0, 851.0, 852.0],      [853.0, 854.0, 855.0],      [856.0, 857.0, 858.0],      [859.0, 860.0, 861.0]],     [[862.0, 863.0, 864.0],      [865.0, 866.0, 867.0],      [868.0, 869.0, 870.0],      [871.0, 872.0, 873.0],      [874.0, 875.0, 876.0],      [877.0, 878.0, 879.0],      [880.0, 881.0, 882.0]]]],   [[[[883.0, 884.0, 885.0],      [886.0, 887.0, 888.0],      [889.0, 890.0, 891.0],      [892.0, 893.0, 894.0],      [895.0, 896.0, 897.0],      [898.0, 899.0, 900.0],      [901.0, 902.0, 903.0]],     [[904.0, 905.0, 906.0],      [907.0, 908.0, 909.0],      [910.0, 911.0, 912.0],      [913.0, 914.0, 915.0],      [916.0, 917.0, 918.0],      [919.0, 920.0, 921.0],      [922.0, 923.0, 924.0]],     [[925.0, 926.0, 927.0],      [928.0, 929.0, 930.0],      [931.0, 932.0, 933.0],      [934.0, 935.0, 936.0],      [937.0, 938.0, 939.0],      [940.0, 941.0, 942.0],      [943.0, 944.0, 945.0]]],    [[[946.0, 947.0, 948.0],      [949.0, 950.0, 951.0],      [952.0, 953.0, 954.0],      [955.0, 956.0, 957.0],      [958.0, 959.0, 960.0],      [961.0, 962.0, 963.0],      [964.0, 965.0, 966.0]],     [[967.0, 968.0, 969.0],      [970.0, 971.0, 972.0],      [973.0, 974.0, 975.0],      [976.0, 977.0, 978.0],      [979.0, 980.0, 981.0],      [982.0, 983.0, 984.0],      [985.0, 986.0, 987.0]],     [[988.0, 989.0, 990.0],      [991.0, 992.0, 993.0],      [994.0, 995.0, 996.0],      [997.0, 998.0, 999.0],      [1000.0, 1001.0, 1002.0],      [1003.0, 1004.0, 1005.0],      [1006.0, 1007.0, 1008.0]]],    [[[1009.0, 1010.0, 1011.0],      [1012.0, 1013.0, 1014.0],      [1015.0, 1016.0, 1017.0],      [1018.0, 1019.0, 1020.0],      [1021.0, 1022.0, 1023.0],      [1024.0, 1025.0, 1026.0],      [1027.0, 1028.0, 1029.0]],     [[1030.0, 1031.0, 1032.0],      [1033.0, 1034.0, 1035.0],      [1036.0, 1037.0, 1038.0],      [1039.0, 1040.0, 1041.0],      [1042.0, 1043.0, 1044.0],      [1045.0, 1046.0, 1047.0],      [1048.0, 1049.0, 1050.0]],     [[1051.0, 1052.0, 1053.0],      [1054.0, 1055.0, 1056.0],      [1057.0, 1058.0, 1059.0],      [1060.0, 1061.0, 1062.0],      [1063.0, 1064.0, 1065.0],      [1066.0, 1067.0, 1068.0],      [1069.0, 1070.0, 1071.0]]],    [[[1072.0, 1073.0, 1074.0],      [1075.0, 1076.0, 1077.0],      [1078.0, 1079.0, 1080.0],      [1081.0, 1082.0, 1083.0],      [1084.0, 1085.0, 1086.0],      [1087.0, 1088.0, 1089.0],      [1090.0, 1091.0, 1092.0]],     [[1093.0, 1094.0, 1095.0],      [1096.0, 1097.0, 1098.0],      [1099.0, 1100.0, 1101.0],      [1102.0, 1103.0, 1104.0],      [1105.0, 1106.0, 1107.0],      [1108.0, 1109.0, 1110.0],      [1111.0, 1112.0, 1113.0]],     [[1114.0, 1115.0, 1116.0],      [1117.0, 1118.0, 1119.0],      [1120.0, 1121.0, 1122.0],      [1123.0, 1124.0, 1125.0],      [1126.0, 1127.0, 1128.0],      [1129.0, 1130.0, 1131.0],      [1132.0, 1133.0, 1134.0]]],    [[[1135.0, 1136.0, 1137.0],      [1138.0, 1139.0, 1140.0],      [1141.0, 1142.0, 1143.0],      [1144.0, 1145.0, 1146.0],      [1147.0, 1148.0, 1149.0],      [1150.0, 1151.0, 1152.0],      [1153.0, 1154.0, 1155.0]],     [[1156.0, 1157.0, 1158.0],      [1159.0, 1160.0, 1161.0],      [1162.0, 1163.0, 1164.0],      [1165.0, 1166.0, 1167.0],      [1168.0, 1169.0, 1170.0],      [1171.0, 1172.0, 1173.0],      [1174.0, 1175.0, 1176.0]],     [[1177.0, 1178.0, 1179.0],      [1180.0, 1181.0, 1182.0],      [1183.0, 1184.0, 1185.0],      [1186.0, 1187.0, 1188.0],      [1189.0, 1190.0, 1191.0],      [1192.0, 1193.0, 1194.0],      [1195.0, 1196.0, 1197.0]]],    [[[1198.0, 1199.0, 1200.0],      [1201.0, 1202.0, 1203.0],      [1204.0, 1205.0, 1206.0],      [1207.0, 1208.0, 1209.0],      [1210.0, 1211.0, 1212.0],      [1213.0, 1214.0, 1215.0],      [1216.0, 1217.0, 1218.0]],     [[1219.0, 1220.0, 1221.0],      [1222.0, 1223.0, 1224.0],      [1225.0, 1226.0, 1227.0],      [1228.0, 1229.0, 1230.0],      [1231.0, 1232.0, 1233.0],      [1234.0, 1235.0, 1236.0],      [1237.0, 1238.0, 1239.0]],     [[1240.0, 1241.0, 1242.0],      [1243.0, 1244.0, 1245.0],      [1246.0, 1247.0, 1248.0],      [1249.0, 1250.0, 1251.0],      [1252.0, 1253.0, 1254.0],      [1255.0, 1256.0, 1257.0],      [1258.0, 1259.0, 1260.0]]],    [[[1261.0, 1262.0, 1263.0],      [1264.0, 1265.0, 1266.0],      [1267.0, 1268.0, 1269.0],      [1270.0, 1271.0, 1272.0],      [1273.0, 1274.0, 1275.0],      [1276.0, 1277.0, 1278.0],      [1279.0, 1280.0, 1281.0]],     [[1282.0, 1283.0, 1284.0],      [1285.0, 1286.0, 1287.0],      [1288.0, 1289.0, 1290.0],      [1291.0, 1292.0, 1293.0],      [1294.0, 1295.0, 1296.0],      [1297.0, 1298.0, 1299.0],      [1300.0, 1301.0, 1302.0]],     [[1303.0, 1304.0, 1305.0],      [1306.0, 1307.0, 1308.0],      [1309.0, 1310.0, 1311.0],      [1312.0, 1313.0, 1314.0],      [1315.0, 1316.0, 1317.0],      [1318.0, 1319.0, 1320.0],      [1321.0, 1322.0, 1323.0]]]],   [[[[1324.0, 1325.0, 1326.0],      [1327.0, 1328.0, 1329.0],      [1330.0, 1331.0, 1332.0],      [1333.0, 1334.0, 1335.0],      [1336.0, 1337.0, 1338.0],      [1339.0, 1340.0, 1341.0],      [1342.0, 1343.0, 1344.0]],     [[1345.0, 1346.0, 1347.0],      [1348.0, 1349.0, 1350.0],      [1351.0, 1352.0, 1353.0],      [1354.0, 1355.0, 1356.0],      [1357.0, 1358.0, 1359.0],      [1360.0, 1361.0, 1362.0],      [1363.0, 1364.0, 1365.0]],     [[1366.0, 1367.0, 1368.0],      [1369.0, 1370.0, 1371.0],      [1372.0, 1373.0, 1374.0],      [1375.0, 1376.0, 1377.0],      [1378.0, 1379.0, 1380.0],      [1381.0, 1382.0, 1383.0],      [1384.0, 1385.0, 1386.0]]],    [[[1387.0, 1388.0, 1389.0],      [1390.0, 1391.0, 1392.0],      [1393.0, 1394.0, 1395.0],      [1396.0, 1397.0, 1398.0],      [1399.0, 1400.0, 1401.0],      [1402.0, 1403.0, 1404.0],      [1405.0, 1406.0, 1407.0]],     [[1408.0, 1409.0, 1410.0],      [1411.0, 1412.0, 1413.0],      [1414.0, 1415.0, 1416.0],      [1417.0, 1418.0, 1419.0],      [1420.0, 1421.0, 1422.0],      [1423.0, 1424.0, 1425.0],      [1426.0, 1427.0, 1428.0]],     [[1429.0, 1430.0, 1431.0],      [1432.0, 1433.0, 1434.0],      [1435.0, 1436.0, 1437.0],      [1438.0, 1439.0, 1440.0],      [1441.0, 1442.0, 1443.0],      [1444.0, 1445.0, 1446.0],      [1447.0, 1448.0, 1449.0]]],    [[[1450.0, 1451.0, 1452.0],      [1453.0, 1454.0, 1455.0],      [1456.0, 1457.0, 1458.0],      [1459.0, 1460.0, 1461.0],      [1462.0, 1463.0, 1464.0],      [1465.0, 1466.0, 1467.0],      [1468.0, 1469.0, 1470.0]],     [[1471.0, 1472.0, 1473.0],      [1474.0, 1475.0, 1476.0],      [1477.0, 1478.0, 1479.0],      [1480.0, 1481.0, 1482.0],      [1483.0, 1484.0, 1485.0],      [1486.0, 1487.0, 1488.0],      [1489.0, 1490.0, 1491.0]],     [[1492.0, 1493.0, 1494.0],      [1495.0, 1496.0, 1497.0],      [1498.0, 1499.0, 1500.0],      [1501.0, 1502.0, 1503.0],      [1504.0, 1505.0, 1506.0],      [1507.0, 1508.0, 1509.0],      [1510.0, 1511.0, 1512.0]]],    [[[1513.0, 1514.0, 1515.0],      [1516.0, 1517.0, 1518.0],      [1519.0, 1520.0, 1521.0],      [1522.0, 1523.0, 1524.0],      [1525.0, 1526.0, 1527.0],      [1528.0, 1529.0, 1530.0],      [1531.0, 1532.0, 1533.0]],     [[1534.0, 1535.0, 1536.0],      [1537.0, 1538.0, 1539.0],      [1540.0, 1541.0, 1542.0],      [1543.0, 1544.0, 1545.0],      [1546.0, 1547.0, 1548.0],      [1549.0, 1550.0, 1551.0],      [1552.0, 1553.0, 1554.0]],     [[1555.0, 1556.0, 1557.0],      [1558.0, 1559.0, 1560.0],      [1561.0, 1562.0, 1563.0],      [1564.0, 1565.0, 1566.0],      [1567.0, 1568.0, 1569.0],      [1570.0, 1571.0, 1572.0],      [1573.0, 1574.0, 1575.0]]],    [[[1576.0, 1577.0, 1578.0],      [1579.0, 1580.0, 1581.0],      [1582.0, 1583.0, 1584.0],      [1585.0, 1586.0, 1587.0],      [1588.0, 1589.0, 1590.0],      [1591.0, 1592.0, 1593.0],      [1594.0, 1595.0, 1596.0]],     [[1597.0, 1598.0, 1599.0],      [1600.0, 1601.0, 1602.0],      [1603.0, 1604.0, 1605.0],      [1606.0, 1607.0, 1608.0],      [1609.0, 1610.0, 1611.0],      [1612.0, 1613.0, 1614.0],      [1615.0, 1616.0, 1617.0]],     [[1618.0, 1619.0, 1620.0],      [1621.0, 1622.0, 1623.0],      [1624.0, 1625.0, 1626.0],      [1627.0, 1628.0, 1629.0],      [1630.0, 1631.0, 1632.0],      [1633.0, 1634.0, 1635.0],      [1636.0, 1637.0, 1638.0]]],    [[[1639.0, 1640.0, 1641.0],      [1642.0, 1643.0, 1644.0],      [1645.0, 1646.0, 1647.0],      [1648.0, 1649.0, 1650.0],      [1651.0, 1652.0, 1653.0],      [1654.0, 1655.0, 1656.0],      [1657.0, 1658.0, 1659.0]],     [[1660.0, 1661.0, 1662.0],      [1663.0, 1664.0, 1665.0],      [1666.0, 1667.0, 1668.0],      [1669.0, 1670.0, 1671.0],      [1672.0, 1673.0, 1674.0],      [1675.0, 1676.0, 1677.0],      [1678.0, 1679.0, 1680.0]],     [[1681.0, 1682.0, 1683.0],      [1684.0, 1685.0, 1686.0],      [1687.0, 1688.0, 1689.0],      [1690.0, 1691.0, 1692.0],      [1693.0, 1694.0, 1695.0],      [1696.0, 1697.0, 1698.0],      [1699.0, 1700.0, 1701.0]]],    [[[1702.0, 1703.0, 1704.0],      [1705.0, 1706.0, 1707.0],      [1708.0, 1709.0, 1710.0],      [1711.0, 1712.0, 1713.0],      [1714.0, 1715.0, 1716.0],      [1717.0, 1718.0, 1719.0],      [1720.0, 1721.0, 1722.0]],     [[1723.0, 1724.0, 1725.0],      [1726.0, 1727.0, 1728.0],      [1729.0, 1730.0, 1731.0],      [1732.0, 1733.0, 1734.0],      [1735.0, 1736.0, 1737.0],      [1738.0, 1739.0, 1740.0],      [1741.0, 1742.0, 1743.0]],     [[1744.0, 1745.0, 1746.0],      [1747.0, 1748.0, 1749.0],      [1750.0, 1751.0, 1752.0],      [1753.0, 1754.0, 1755.0],      [1756.0, 1757.0, 1758.0],      [1759.0, 1760.0, 1761.0],      [1762.0, 1763.0, 1764.0]]]],   [[[[1765.0, 1766.0, 1767.0],      [1768.0, 1769.0, 1770.0],      [1771.0, 1772.0, 1773.0],      [1774.0, 1775.0, 1776.0],      [1777.0, 1778.0, 1779.0],      [1780.0, 1781.0, 1782.0],      [1783.0, 1784.0, 1785.0]],     [[1786.0, 1787.0, 1788.0],      [1789.0, 1790.0, 1791.0],      [1792.0, 1793.0, 1794.0],      [1795.0, 1796.0, 1797.0],      [1798.0, 1799.0, 1800.0],      [1801.0, 1802.0, 1803.0],      [1804.0, 1805.0, 1806.0]],     [[1807.0, 1808.0, 1809.0],      [1810.0, 1811.0, 1812.0],      [1813.0, 1814.0, 1815.0],      [1816.0, 1817.0, 1818.0],      [1819.0, 1820.0, 1821.0],      [1822.0, 1823.0, 1824.0],      [1825.0, 1826.0, 1827.0]]],    [[[1828.0, 1829.0, 1830.0],      [1831.0, 1832.0, 1833.0],      [1834.0, 1835.0, 1836.0],      [1837.0, 1838.0, 1839.0],      [1840.0, 1841.0, 1842.0],      [1843.0, 1844.0, 1845.0],      [1846.0, 1847.0, 1848.0]],     [[1849.0, 1850.0, 1851.0],      [1852.0, 1853.0, 1854.0],      [1855.0, 1856.0, 1857.0],      [1858.0, 1859.0, 1860.0],      [1861.0, 1862.0, 1863.0],      [1864.0, 1865.0, 1866.0],      [1867.0, 1868.0, 1869.0]],     [[1870.0, 1871.0, 1872.0],      [1873.0, 1874.0, 1875.0],      [1876.0, 1877.0, 1878.0],      [1879.0, 1880.0, 1881.0],      [1882.0, 1883.0, 1884.0],      [1885.0, 1886.0, 1887.0],      [1888.0, 1889.0, 1890.0]]],    [[[1891.0, 1892.0, 1893.0],      [1894.0, 1895.0, 1896.0],      [1897.0, 1898.0, 1899.0],      [1900.0, 1901.0, 1902.0],      [1903.0, 1904.0, 1905.0],      [1906.0, 1907.0, 1908.0],      [1909.0, 1910.0, 1911.0]],     [[1912.0, 1913.0, 1914.0],      [1915.0, 1916.0, 1917.0],      [1918.0, 1919.0, 1920.0],      [1921.0, 1922.0, 1923.0],      [1924.0, 1925.0, 1926.0],      [1927.0, 1928.0, 1929.0],      [1930.0, 1931.0, 1932.0]],     [[1933.0, 1934.0, 1935.0],      [1936.0, 1937.0, 1938.0],      [1939.0, 1940.0, 1941.0],      [1942.0, 1943.0, 1944.0],      [1945.0, 1946.0, 1947.0],      [1948.0, 1949.0, 1950.0],      [1951.0, 1952.0, 1953.0]]],    [[[1954.0, 1955.0, 1956.0],      [1957.0, 1958.0, 1959.0],      [1960.0, 1961.0, 1962.0],      [1963.0, 1964.0, 1965.0],      [1966.0, 1967.0, 1968.0],      [1969.0, 1970.0, 1971.0],      [1972.0, 1973.0, 1974.0]],     [[1975.0, 1976.0, 1977.0],      [1978.0, 1979.0, 1980.0],      [1981.0, 1982.0, 1983.0],      [1984.0, 1985.0, 1986.0],      [1987.0, 1988.0, 1989.0],      [1990.0, 1991.0, 1992.0],      [1993.0, 1994.0, 1995.0]],     [[1996.0, 1997.0, 1998.0],      [1999.0, 2000.0, 2001.0],      [2002.0, 2003.0, 2004.0],      [2005.0, 2006.0, 2007.0],      [2008.0, 2009.0, 2010.0],      [2011.0, 2012.0, 2013.0],      [2014.0, 2015.0, 2016.0]]],    [[[2017.0, 2018.0, 2019.0],      [2020.0, 2021.0, 2022.0],      [2023.0, 2024.0, 2025.0],      [2026.0, 2027.0, 2028.0],      [2029.0, 2030.0, 2031.0],      [2032.0, 2033.0, 2034.0],      [2035.0, 2036.0, 2037.0]],     [[2038.0, 2039.0, 2040.0],      [2041.0, 2042.0, 2043.0],      [2044.0, 2045.0, 2046.0],      [2047.0, 2048.0, 2049.0],      [2050.0, 2051.0, 2052.0],      [2053.0, 2054.0, 2055.0],      [2056.0, 2057.0, 2058.0]],     [[2059.0, 2060.0, 2061.0],      [2062.0, 2063.0, 2064.0],      [2065.0, 2066.0, 2067.0],      [2068.0, 2069.0, 2070.0],      [2071.0, 2072.0, 2073.0],      [2074.0, 2075.0, 2076.0],      [2077.0, 2078.0, 2079.0]]],    [[[2080.0, 2081.0, 2082.0],      [2083.0, 2084.0, 2085.0],      [2086.0, 2087.0, 2088.0],      [2089.0, 2090.0, 2091.0],      [2092.0, 2093.0, 2094.0],      [2095.0, 2096.0, 2097.0],      [2098.0, 2099.0, 2100.0]],     [[2101.0, 2102.0, 2103.0],      [2104.0, 2105.0, 2106.0],      [2107.0, 2108.0, 2109.0],      [2110.0, 2111.0, 2112.0],      [2113.0, 2114.0, 2115.0],      [2116.0, 2117.0, 2118.0],      [2119.0, 2120.0, 2121.0]],     [[2122.0, 2123.0, 2124.0],      [2125.0, 2126.0, 2127.0],      [2128.0, 2129.0, 2130.0],      [2131.0, 2132.0, 2133.0],      [2134.0, 2135.0, 2136.0],      [2137.0, 2138.0, 2139.0],      [2140.0, 2141.0, 2142.0]]],    [[[2143.0, 2144.0, 2145.0],      [2146.0, 2147.0, 2148.0],      [2149.0, 2150.0, 2151.0],      [2152.0, 2153.0, 2154.0],      [2155.0, 2156.0, 2157.0],      [2158.0, 2159.0, 2160.0],      [2161.0, 2162.0, 2163.0]],     [[2164.0, 2165.0, 2166.0],      [2167.0, 2168.0, 2169.0],      [2170.0, 2171.0, 2172.0],      [2173.0, 2174.0, 2175.0],      [2176.0, 2177.0, 2178.0],      [2179.0, 2180.0, 2181.0],      [2182.0, 2183.0, 2184.0]],     [[2185.0, 2186.0, 2187.0],      [2188.0, 2189.0, 2190.0],      [2191.0, 2192.0, 2193.0],      [2194.0, 2195.0, 2196.0],      [2197.0, 2198.0, 2199.0],      [2200.0, 2201.0, 2202.0],      [2203.0, 2204.0, 2205.0]]]],   [[[[2206.0, 2207.0, 2208.0],      [2209.0, 2210.0, 2211.0],      [2212.0, 2213.0, 2214.0],      [2215.0, 2216.0, 2217.0],      [2218.0, 2219.0, 2220.0],      [2221.0, 2222.0, 2223.0],      [2224.0, 2225.0, 2226.0]],     [[2227.0, 2228.0, 2229.0],      [2230.0, 2231.0, 2232.0],      [2233.0, 2234.0, 2235.0],      [2236.0, 2237.0, 2238.0],      [2239.0, 2240.0, 2241.0],      [2242.0, 2243.0, 2244.0],      [2245.0, 2246.0, 2247.0]],     [[2248.0, 2249.0, 2250.0],      [2251.0, 2252.0, 2253.0],      [2254.0, 2255.0, 2256.0],      [2257.0, 2258.0, 2259.0],      [2260.0, 2261.0, 2262.0],      [2263.0, 2264.0, 2265.0],      [2266.0, 2267.0, 2268.0]]],    [[[2269.0, 2270.0, 2271.0],      [2272.0, 2273.0, 2274.0],      [2275.0, 2276.0, 2277.0],      [2278.0, 2279.0, 2280.0],      [2281.0, 2282.0, 2283.0],      [2284.0, 2285.0, 2286.0],      [2287.0, 2288.0, 2289.0]],     [[2290.0, 2291.0, 2292.0],      [2293.0, 2294.0, 2295.0],      [2296.0, 2297.0, 2298.0],      [2299.0, 2300.0, 2301.0],      [2302.0, 2303.0, 2304.0],      [2305.0, 2306.0, 2307.0],      [2308.0, 2309.0, 2310.0]],     [[2311.0, 2312.0, 2313.0],      [2314.0, 2315.0, 2316.0],      [2317.0, 2318.0, 2319.0],      [2320.0, 2321.0, 2322.0],      [2323.0, 2324.0, 2325.0],      [2326.0, 2327.0, 2328.0],      [2329.0, 2330.0, 2331.0]]],    [[[2332.0, 2333.0, 2334.0],      [2335.0, 2336.0, 2337.0],      [2338.0, 2339.0, 2340.0],      [2341.0, 2342.0, 2343.0],      [2344.0, 2345.0, 2346.0],      [2347.0, 2348.0, 2349.0],      [2350.0, 2351.0, 2352.0]],     [[2353.0, 2354.0, 2355.0],      [2356.0, 2357.0, 2358.0],      [2359.0, 2360.0, 2361.0],      [2362.0, 2363.0, 2364.0],      [2365.0, 2366.0, 2367.0],      [2368.0, 2369.0, 2370.0],      [2371.0, 2372.0, 2373.0]],     [[2374.0, 2375.0, 2376.0],      [2377.0, 2378.0, 2379.0],      [2380.0, 2381.0, 2382.0],      [2383.0, 2384.0, 2385.0],      [2386.0, 2387.0, 2388.0],      [2389.0, 2390.0, 2391.0],      [2392.0, 2393.0, 2394.0]]],    [[[2395.0, 2396.0, 2397.0],      [2398.0, 2399.0, 2400.0],      [2401.0, 2402.0, 2403.0],      [2404.0, 2405.0, 2406.0],      [2407.0, 2408.0, 2409.0],      [2410.0, 2411.0, 2412.0],      [2413.0, 2414.0, 2415.0]],     [[2416.0, 2417.0, 2418.0],      [2419.0, 2420.0, 2421.0],      [2422.0, 2423.0, 2424.0],      [2425.0, 2426.0, 2427.0],      [2428.0, 2429.0, 2430.0],      [2431.0, 2432.0, 2433.0],      [2434.0, 2435.0, 2436.0]],     [[2437.0, 2438.0, 2439.0],      [2440.0, 2441.0, 2442.0],      [2443.0, 2444.0, 2445.0],      [2446.0, 2447.0, 2448.0],      [2449.0, 2450.0, 2451.0],      [2452.0, 2453.0, 2454.0],      [2455.0, 2456.0, 2457.0]]],    [[[2458.0, 2459.0, 2460.0],      [2461.0, 2462.0, 2463.0],      [2464.0, 2465.0, 2466.0],      [2467.0, 2468.0, 2469.0],      [2470.0, 2471.0, 2472.0],      [2473.0, 2474.0, 2475.0],      [2476.0, 2477.0, 2478.0]],     [[2479.0, 2480.0, 2481.0],      [2482.0, 2483.0, 2484.0],      [2485.0, 2486.0, 2487.0],      [2488.0, 2489.0, 2490.0],      [2491.0, 2492.0, 2493.0],      [2494.0, 2495.0, 2496.0],      [2497.0, 2498.0, 2499.0]],     [[2500.0, 2501.0, 2502.0],      [2503.0, 2504.0, 2505.0],      [2506.0, 2507.0, 2508.0],      [2509.0, 2510.0, 2511.0],      [2512.0, 2513.0, 2514.0],      [2515.0, 2516.0, 2517.0],      [2518.0, 2519.0, 2520.0]]],    [[[2521.0, 2522.0, 2523.0],      [2524.0, 2525.0, 2526.0],      [2527.0, 2528.0, 2529.0],      [2530.0, 2531.0, 2532.0],      [2533.0, 2534.0, 2535.0],      [2536.0, 2537.0, 2538.0],      [2539.0, 2540.0, 2541.0]],     [[2542.0, 2543.0, 2544.0],      [2545.0, 2546.0, 2547.0],      [2548.0, 2549.0, 2550.0],      [2551.0, 2552.0, 2553.0],      [2554.0, 2555.0, 2556.0],      [2557.0, 2558.0, 2559.0],      [2560.0, 2561.0, 2562.0]],     [[2563.0, 2564.0, 2565.0],      [2566.0, 2567.0, 2568.0],      [2569.0, 2570.0, 2571.0],      [2572.0, 2573.0, 2574.0],      [2575.0, 2576.0, 2577.0],      [2578.0, 2579.0, 2580.0],      [2581.0, 2582.0, 2583.0]]],    [[[2584.0, 2585.0, 2586.0],      [2587.0, 2588.0, 2589.0],      [2590.0, 2591.0, 2592.0],      [2593.0, 2594.0, 2595.0],      [2596.0, 2597.0, 2598.0],      [2599.0, 2600.0, 2601.0],      [2602.0, 2603.0, 2604.0]],     [[2605.0, 2606.0, 2607.0],      [2608.0, 2609.0, 2610.0],      [2611.0, 2612.0, 2613.0],      [2614.0, 2615.0, 2616.0],      [2617.0, 2618.0, 2619.0],      [2620.0, 2621.0, 2622.0],      [2623.0, 2624.0, 2625.0]],     [[2626.0, 2627.0, 2628.0],      [2629.0, 2630.0, 2631.0],      [2632.0, 2633.0, 2634.0],      [2635.0, 2636.0, 2637.0],      [2638.0, 2639.0, 2640.0],      [2641.0, 2642.0, 2643.0],      [2644.0, 2645.0, 2646.0]]]]],  [[[[[2647.0, 2648.0, 2649.0],      [2650.0, 2651.0, 2652.0],      [2653.0, 2654.0, 2655.0],      [2656.0, 2657.0, 2658.0],      [2659.0, 2660.0, 2661.0],      [2662.0, 2663.0, 2664.0],      [2665.0, 2666.0, 2667.0]],     [[2668.0, 2669.0, 2670.0],      [2671.0, 2672.0, 2673.0],      [2674.0, 2675.0, 2676.0],      [2677.0, 2678.0, 2679.0],      [2680.0, 2681.0, 2682.0],      [2683.0, 2684.0, 2685.0],      [2686.0, 2687.0, 2688.0]],     [[2689.0, 2690.0, 2691.0],      [2692.0, 2693.0, 2694.0],      [2695.0, 2696.0, 2697.0],      [2698.0, 2699.0, 2700.0],      [2701.0, 2702.0, 2703.0],      [2704.0, 2705.0, 2706.0],      [2707.0, 2708.0, 2709.0]]],    [[[2710.0, 2711.0, 2712.0],      [2713.0, 2714.0, 2715.0],      [2716.0, 2717.0, 2718.0],      [2719.0, 2720.0, 2721.0],      [2722.0, 2723.0, 2724.0],      [2725.0, 2726.0, 2727.0],      [2728.0, 2729.0, 2730.0]],     [[2731.0, 2732.0, 2733.0],      [2734.0, 2735.0, 2736.0],      [2737.0, 2738.0, 2739.0],      [2740.0, 2741.0, 2742.0],      [2743.0, 2744.0, 2745.0],      [2746.0, 2747.0, 2748.0],      [2749.0, 2750.0, 2751.0]],     [[2752.0, 2753.0, 2754.0],      [2755.0, 2756.0, 2757.0],      [2758.0, 2759.0, 2760.0],      [2761.0, 2762.0, 2763.0],      [2764.0, 2765.0, 2766.0],      [2767.0, 2768.0, 2769.0],      [2770.0, 2771.0, 2772.0]]],    [[[2773.0, 2774.0, 2775.0],      [2776.0, 2777.0, 2778.0],      [2779.0, 2780.0, 2781.0],      [2782.0, 2783.0, 2784.0],      [2785.0, 2786.0, 2787.0],      [2788.0, 2789.0, 2790.0],      [2791.0, 2792.0, 2793.0]],     [[2794.0, 2795.0, 2796.0],      [2797.0, 2798.0, 2799.0],      [2800.0, 2801.0, 2802.0],      [2803.0, 2804.0, 2805.0],      [2806.0, 2807.0, 2808.0],      [2809.0, 2810.0, 2811.0],      [2812.0, 2813.0, 2814.0]],     [[2815.0, 2816.0, 2817.0],      [2818.0, 2819.0, 2820.0],      [2821.0, 2822.0, 2823.0],      [2824.0, 2825.0, 2826.0],      [2827.0, 2828.0, 2829.0],      [2830.0, 2831.0, 2832.0],      [2833.0, 2834.0, 2835.0]]],    [[[2836.0, 2837.0, 2838.0],      [2839.0, 2840.0, 2841.0],      [2842.0, 2843.0, 2844.0],      [2845.0, 2846.0, 2847.0],      [2848.0, 2849.0, 2850.0],      [2851.0, 2852.0, 2853.0],      [2854.0, 2855.0, 2856.0]],     [[2857.0, 2858.0, 2859.0],      [2860.0, 2861.0, 2862.0],      [2863.0, 2864.0, 2865.0],      [2866.0, 2867.0, 2868.0],      [2869.0, 2870.0, 2871.0],      [2872.0, 2873.0, 2874.0],      [2875.0, 2876.0, 2877.0]],     [[2878.0, 2879.0, 2880.0],      [2881.0, 2882.0, 2883.0],      [2884.0, 2885.0, 2886.0],      [2887.0, 2888.0, 2889.0],      [2890.0, 2891.0, 2892.0],      [2893.0, 2894.0, 2895.0],      [2896.0, 2897.0, 2898.0]]],    [[[2899.0, 2900.0, 2901.0],      [2902.0, 2903.0, 2904.0],      [2905.0, 2906.0, 2907.0],      [2908.0, 2909.0, 2910.0],      [2911.0, 2912.0, 2913.0],      [2914.0, 2915.0, 2916.0],      [2917.0, 2918.0, 2919.0]],     [[2920.0, 2921.0, 2922.0],      [2923.0, 2924.0, 2925.0],      [2926.0, 2927.0, 2928.0],      [2929.0, 2930.0, 2931.0],      [2932.0, 2933.0, 2934.0],      [2935.0, 2936.0, 2937.0],      [2938.0, 2939.0, 2940.0]],     [[2941.0, 2942.0, 2943.0],      [2944.0, 2945.0, 2946.0],      [2947.0, 2948.0, 2949.0],      [2950.0, 2951.0, 2952.0],      [2953.0, 2954.0, 2955.0],      [2956.0, 2957.0, 2958.0],      [2959.0, 2960.0, 2961.0]]],    [[[2962.0, 2963.0, 2964.0],      [2965.0, 2966.0, 2967.0],      [2968.0, 2969.0, 2970.0],      [2971.0, 2972.0, 2973.0],      [2974.0, 2975.0, 2976.0],      [2977.0, 2978.0, 2979.0],      [2980.0, 2981.0, 2982.0]],     [[2983.0, 2984.0, 2985.0],      [2986.0, 2987.0, 2988.0],      [2989.0, 2990.0, 2991.0],      [2992.0, 2993.0, 2994.0],      [2995.0, 2996.0, 2997.0],      [2998.0, 2999.0, 3000.0],      [3001.0, 3002.0, 3003.0]],     [[3004.0, 3005.0, 3006.0],      [3007.0, 3008.0, 3009.0],      [3010.0, 3011.0, 3012.0],      [3013.0, 3014.0, 3015.0],      [3016.0, 3017.0, 3018.0],      [3019.0, 3020.0, 3021.0],      [3022.0, 3023.0, 3024.0]]],    [[[3025.0, 3026.0, 3027.0],      [3028.0, 3029.0, 3030.0],      [3031.0, 3032.0, 3033.0],      [3034.0, 3035.0, 3036.0],      [3037.0, 3038.0, 3039.0],      [3040.0, 3041.0, 3042.0],      [3043.0, 3044.0, 3045.0]],     [[3046.0, 3047.0, 3048.0],      [3049.0, 3050.0, 3051.0],      [3052.0, 3053.0, 3054.0],      [3055.0, 3056.0, 3057.0],      [3058.0, 3059.0, 3060.0],      [3061.0, 3062.0, 3063.0],      [3064.0, 3065.0, 3066.0]],     [[3067.0, 3068.0, 3069.0],      [3070.0, 3071.0, 3072.0],      [3073.0, 3074.0, 3075.0],      [3076.0, 3077.0, 3078.0],      [3079.0, 3080.0, 3081.0],      [3082.0, 3083.0, 3084.0],      [3085.0, 3086.0, 3087.0]]]],   [[[[3088.0, 3089.0, 3090.0],      [3091.0, 3092.0, 3093.0],      [3094.0, 3095.0, 3096.0],      [3097.0, 3098.0, 3099.0],      [3100.0, 3101.0, 3102.0],      [3103.0, 3104.0, 3105.0],      [3106.0, 3107.0, 3108.0]],     [[3109.0, 3110.0, 3111.0],      [3112.0, 3113.0, 3114.0],      [3115.0, 3116.0, 3117.0],      [3118.0, 3119.0, 3120.0],      [3121.0, 3122.0, 3123.0],      [3124.0, 3125.0, 3126.0],      [3127.0, 3128.0, 3129.0]],     [[3130.0, 3131.0, 3132.0],      [3133.0, 3134.0, 3135.0],      [3136.0, 3137.0, 3138.0],      [3139.0, 3140.0, 3141.0],      [3142.0, 3143.0, 3144.0],      [3145.0, 3146.0, 3147.0],      [3148.0, 3149.0, 3150.0]]],    [[[3151.0, 3152.0, 3153.0],      [3154.0, 3155.0, 3156.0],      [3157.0, 3158.0, 3159.0],      [3160.0, 3161.0, 3162.0],      [3163.0, 3164.0, 3165.0],      [3166.0, 3167.0, 3168.0],      [3169.0, 3170.0, 3171.0]],     [[3172.0, 3173.0, 3174.0],      [3175.0, 3176.0, 3177.0],      [3178.0, 3179.0, 3180.0],      [3181.0, 3182.0, 3183.0],      [3184.0, 3185.0, 3186.0],      [3187.0, 3188.0, 3189.0],      [3190.0, 3191.0, 3192.0]],     [[3193.0, 3194.0, 3195.0],      [3196.0, 3197.0, 3198.0],      [3199.0, 3200.0, 3201.0],      [3202.0, 3203.0, 3204.0],      [3205.0, 3206.0, 3207.0],      [3208.0, 3209.0, 3210.0],      [3211.0, 3212.0, 3213.0]]],    [[[3214.0, 3215.0, 3216.0],      [3217.0, 3218.0, 3219.0],      [3220.0, 3221.0, 3222.0],      [3223.0, 3224.0, 3225.0],      [3226.0, 3227.0, 3228.0],      [3229.0, 3230.0, 3231.0],      [3232.0, 3233.0, 3234.0]],     [[3235.0, 3236.0, 3237.0],      [3238.0, 3239.0, 3240.0],      [3241.0, 3242.0, 3243.0],      [3244.0, 3245.0, 3246.0],      [3247.0, 3248.0, 3249.0],      [3250.0, 3251.0, 3252.0],      [3253.0, 3254.0, 3255.0]],     [[3256.0, 3257.0, 3258.0],      [3259.0, 3260.0, 3261.0],      [3262.0, 3263.0, 3264.0],      [3265.0, 3266.0, 3267.0],      [3268.0, 3269.0, 3270.0],      [3271.0, 3272.0, 3273.0],      [3274.0, 3275.0, 3276.0]]],    [[[3277.0, 3278.0, 3279.0],      [3280.0, 3281.0, 3282.0],      [3283.0, 3284.0, 3285.0],      [3286.0, 3287.0, 3288.0],      [3289.0, 3290.0, 3291.0],      [3292.0, 3293.0, 3294.0],      [3295.0, 3296.0, 3297.0]],     [[3298.0, 3299.0, 3300.0],      [3301.0, 3302.0, 3303.0],      [3304.0, 3305.0, 3306.0],      [3307.0, 3308.0, 3309.0],      [3310.0, 3311.0, 3312.0],      [3313.0, 3314.0, 3315.0],      [3316.0, 3317.0, 3318.0]],     [[3319.0, 3320.0, 3321.0],      [3322.0, 3323.0, 3324.0],      [3325.0, 3326.0, 3327.0],      [3328.0, 3329.0, 3330.0],      [3331.0, 3332.0, 3333.0],      [3334.0, 3335.0, 3336.0],      [3337.0, 3338.0, 3339.0]]],    [[[3340.0, 3341.0, 3342.0],      [3343.0, 3344.0, 3345.0],      [3346.0, 3347.0, 3348.0],      [3349.0, 3350.0, 3351.0],      [3352.0, 3353.0, 3354.0],      [3355.0, 3356.0, 3357.0],      [3358.0, 3359.0, 3360.0]],     [[3361.0, 3362.0, 3363.0],      [3364.0, 3365.0, 3366.0],      [3367.0, 3368.0, 3369.0],      [3370.0, 3371.0, 3372.0],      [3373.0, 3374.0, 3375.0],      [3376.0, 3377.0, 3378.0],      [3379.0, 3380.0, 3381.0]],     [[3382.0, 3383.0, 3384.0],      [3385.0, 3386.0, 3387.0],      [3388.0, 3389.0, 3390.0],      [3391.0, 3392.0, 3393.0],      [3394.0, 3395.0, 3396.0],      [3397.0, 3398.0, 3399.0],      [3400.0, 3401.0, 3402.0]]],    [[[3403.0, 3404.0, 3405.0],      [3406.0, 3407.0, 3408.0],      [3409.0, 3410.0, 3411.0],      [3412.0, 3413.0, 3414.0],      [3415.0, 3416.0, 3417.0],      [3418.0, 3419.0, 3420.0],      [3421.0, 3422.0, 3423.0]],     [[3424.0, 3425.0, 3426.0],      [3427.0, 3428.0, 3429.0],      [3430.0, 3431.0, 3432.0],      [3433.0, 3434.0, 3435.0],      [3436.0, 3437.0, 3438.0],      [3439.0, 3440.0, 3441.0],      [3442.0, 3443.0, 3444.0]],     [[3445.0, 3446.0, 3447.0],      [3448.0, 3449.0, 3450.0],      [3451.0, 3452.0, 3453.0],      [3454.0, 3455.0, 3456.0],      [3457.0, 3458.0, 3459.0],      [3460.0, 3461.0, 3462.0],      [3463.0, 3464.0, 3465.0]]],    [[[3466.0, 3467.0, 3468.0],      [3469.0, 3470.0, 3471.0],      [3472.0, 3473.0, 3474.0],      [3475.0, 3476.0, 3477.0],      [3478.0, 3479.0, 3480.0],      [3481.0, 3482.0, 3483.0],      [3484.0, 3485.0, 3486.0]],     [[3487.0, 3488.0, 3489.0],      [3490.0, 3491.0, 3492.0],      [3493.0, 3494.0, 3495.0],      [3496.0, 3497.0, 3498.0],      [3499.0, 3500.0, 3501.0],      [3502.0, 3503.0, 3504.0],      [3505.0, 3506.0, 3507.0]],     [[3508.0, 3509.0, 3510.0],      [3511.0, 3512.0, 3513.0],      [3514.0, 3515.0, 3516.0],      [3517.0, 3518.0, 3519.0],      [3520.0, 3521.0, 3522.0],      [3523.0, 3524.0, 3525.0],      [3526.0, 3527.0, 3528.0]]]],   [[[[3529.0, 3530.0, 3531.0],      [3532.0, 3533.0, 3534.0],      [3535.0, 3536.0, 3537.0],      [3538.0, 3539.0, 3540.0],      [3541.0, 3542.0, 3543.0],      [3544.0, 3545.0, 3546.0],      [3547.0, 3548.0, 3549.0]],     [[3550.0, 3551.0, 3552.0],      [3553.0, 3554.0, 3555.0],      [3556.0, 3557.0, 3558.0],      [3559.0, 3560.0, 3561.0],      [3562.0, 3563.0, 3564.0],      [3565.0, 3566.0, 3567.0],      [3568.0, 3569.0, 3570.0]],     [[3571.0, 3572.0, 3573.0],      [3574.0, 3575.0, 3576.0],      [3577.0, 3578.0, 3579.0],      [3580.0, 3581.0, 3582.0],      [3583.0, 3584.0, 3585.0],      [3586.0, 3587.0, 3588.0],      [3589.0, 3590.0, 3591.0]]],    [[[3592.0, 3593.0, 3594.0],      [3595.0, 3596.0, 3597.0],      [3598.0, 3599.0, 3600.0],      [3601.0, 3602.0, 3603.0],      [3604.0, 3605.0, 3606.0],      [3607.0, 3608.0, 3609.0],      [3610.0, 3611.0, 3612.0]],     [[3613.0, 3614.0, 3615.0],      [3616.0, 3617.0, 3618.0],      [3619.0, 3620.0, 3621.0],      [3622.0, 3623.0, 3624.0],      [3625.0, 3626.0, 3627.0],      [3628.0, 3629.0, 3630.0],      [3631.0, 3632.0, 3633.0]],     [[3634.0, 3635.0, 3636.0],      [3637.0, 3638.0, 3639.0],      [3640.0, 3641.0, 3642.0],      [3643.0, 3644.0, 3645.0],      [3646.0, 3647.0, 3648.0],      [3649.0, 3650.0, 3651.0],      [3652.0, 3653.0, 3654.0]]],    [[[3655.0, 3656.0, 3657.0],      [3658.0, 3659.0, 3660.0],      [3661.0, 3662.0, 3663.0],      [3664.0, 3665.0, 3666.0],      [3667.0, 3668.0, 3669.0],      [3670.0, 3671.0, 3672.0],      [3673.0, 3674.0, 3675.0]],     [[3676.0, 3677.0, 3678.0],      [3679.0, 3680.0, 3681.0],      [3682.0, 3683.0, 3684.0],      [3685.0, 3686.0, 3687.0],      [3688.0, 3689.0, 3690.0],      [3691.0, 3692.0, 3693.0],      [3694.0, 3695.0, 3696.0]],     [[3697.0, 3698.0, 3699.0],      [3700.0, 3701.0, 3702.0],      [3703.0, 3704.0, 3705.0],      [3706.0, 3707.0, 3708.0],      [3709.0, 3710.0, 3711.0],      [3712.0, 3713.0, 3714.0],      [3715.0, 3716.0, 3717.0]]],    [[[3718.0, 3719.0, 3720.0],      [3721.0, 3722.0, 3723.0],      [3724.0, 3725.0, 3726.0],      [3727.0, 3728.0, 3729.0],      [3730.0, 3731.0, 3732.0],      [3733.0, 3734.0, 3735.0],      [3736.0, 3737.0, 3738.0]],     [[3739.0, 3740.0, 3741.0],      [3742.0, 3743.0, 3744.0],      [3745.0, 3746.0, 3747.0],      [3748.0, 3749.0, 3750.0],      [3751.0, 3752.0, 3753.0],      [3754.0, 3755.0, 3756.0],      [3757.0, 3758.0, 3759.0]],     [[3760.0, 3761.0, 3762.0],      [3763.0, 3764.0, 3765.0],      [3766.0, 3767.0, 3768.0],      [3769.0, 3770.0, 3771.0],      [3772.0, 3773.0, 3774.0],      [3775.0, 3776.0, 3777.0],      [3778.0, 3779.0, 3780.0]]],    [[[3781.0, 3782.0, 3783.0],      [3784.0, 3785.0, 3786.0],      [3787.0, 3788.0, 3789.0],      [3790.0, 3791.0, 3792.0],      [3793.0, 3794.0, 3795.0],      [3796.0, 3797.0, 3798.0],      [3799.0, 3800.0, 3801.0]],     [[3802.0, 3803.0, 3804.0],      [3805.0, 3806.0, 3807.0],      [3808.0, 3809.0, 3810.0],      [3811.0, 3812.0, 3813.0],      [3814.0, 3815.0, 3816.0],      [3817.0, 3818.0, 3819.0],      [3820.0, 3821.0, 3822.0]],     [[3823.0, 3824.0, 3825.0],      [3826.0, 3827.0, 3828.0],      [3829.0, 3830.0, 3831.0],      [3832.0, 3833.0, 3834.0],      [3835.0, 3836.0, 3837.0],      [3838.0, 3839.0, 3840.0],      [3841.0, 3842.0, 3843.0]]],    [[[3844.0, 3845.0, 3846.0],      [3847.0, 3848.0, 3849.0],      [3850.0, 3851.0, 3852.0],      [3853.0, 3854.0, 3855.0],      [3856.0, 3857.0, 3858.0],      [3859.0, 3860.0, 3861.0],      [3862.0, 3863.0, 3864.0]],     [[3865.0, 3866.0, 3867.0],      [3868.0, 3869.0, 3870.0],      [3871.0, 3872.0, 3873.0],      [3874.0, 3875.0, 3876.0],      [3877.0, 3878.0, 3879.0],      [3880.0, 3881.0, 3882.0],      [3883.0, 3884.0, 3885.0]],     [[3886.0, 3887.0, 3888.0],      [3889.0, 3890.0, 3891.0],      [3892.0, 3893.0, 3894.0],      [3895.0, 3896.0, 3897.0],      [3898.0, 3899.0, 3900.0],      [3901.0, 3902.0, 3903.0],      [3904.0, 3905.0, 3906.0]]],    [[[3907.0, 3908.0, 3909.0],      [3910.0, 3911.0, 3912.0],      [3913.0, 3914.0, 3915.0],      [3916.0, 3917.0, 3918.0],      [3919.0, 3920.0, 3921.0],      [3922.0, 3923.0, 3924.0],      [3925.0, 3926.0, 3927.0]],     [[3928.0, 3929.0, 3930.0],      [3931.0, 3932.0, 3933.0],      [3934.0, 3935.0, 3936.0],      [3937.0, 3938.0, 3939.0],      [3940.0, 3941.0, 3942.0],      [3943.0, 3944.0, 3945.0],      [3946.0, 3947.0, 3948.0]],     [[3949.0, 3950.0, 3951.0],      [3952.0, 3953.0, 3954.0],      [3955.0, 3956.0, 3957.0],      [3958.0, 3959.0, 3960.0],      [3961.0, 3962.0, 3963.0],      [3964.0, 3965.0, 3966.0],      [3967.0, 3968.0, 3969.0]]]],   [[[[3970.0, 3971.0, 3972.0],      [3973.0, 3974.0, 3975.0],      [3976.0, 3977.0, 3978.0],      [3979.0, 3980.0, 3981.0],      [3982.0, 3983.0, 3984.0],      [3985.0, 3986.0, 3987.0],      [3988.0, 3989.0, 3990.0]],     [[3991.0, 3992.0, 3993.0],      [3994.0, 3995.0, 3996.0],      [3997.0, 3998.0, 3999.0],      [4000.0, 4001.0, 4002.0],      [4003.0, 4004.0, 4005.0],      [4006.0, 4007.0, 4008.0],      [4009.0, 4010.0, 4011.0]],     [[4012.0, 4013.0, 4014.0],      [4015.0, 4016.0, 4017.0],      [4018.0, 4019.0, 4020.0],      [4021.0, 4022.0, 4023.0],      [4024.0, 4025.0, 4026.0],      [4027.0, 4028.0, 4029.0],      [4030.0, 4031.0, 4032.0]]],    [[[4033.0, 4034.0, 4035.0],      [4036.0, 4037.0, 4038.0],      [4039.0, 4040.0, 4041.0],      [4042.0, 4043.0, 4044.0],      [4045.0, 4046.0, 4047.0],      [4048.0, 4049.0, 4050.0],      [4051.0, 4052.0, 4053.0]],     [[4054.0, 4055.0, 4056.0],      [4057.0, 4058.0, 4059.0],      [4060.0, 4061.0, 4062.0],      [4063.0, 4064.0, 4065.0],      [4066.0, 4067.0, 4068.0],      [4069.0, 4070.0, 4071.0],      [4072.0, 4073.0, 4074.0]],     [[4075.0, 4076.0, 4077.0],      [4078.0, 4079.0, 4080.0],      [4081.0, 4082.0, 4083.0],      [4084.0, 4085.0, 4086.0],      [4087.0, 4088.0, 4089.0],      [4090.0, 4091.0, 4092.0],      [4093.0, 4094.0, 4095.0]]],    [[[4096.0, 4097.0, 4098.0],      [4099.0, 4100.0, 4101.0],      [4102.0, 4103.0, 4104.0],      [4105.0, 4106.0, 4107.0],      [4108.0, 4109.0, 4110.0],      [4111.0, 4112.0, 4113.0],      [4114.0, 4115.0, 4116.0]],     [[4117.0, 4118.0, 4119.0],      [4120.0, 4121.0, 4122.0],      [4123.0, 4124.0, 4125.0],      [4126.0, 4127.0, 4128.0],      [4129.0, 4130.0, 4131.0],      [4132.0, 4133.0, 4134.0],      [4135.0, 4136.0, 4137.0]],     [[4138.0, 4139.0, 4140.0],      [4141.0, 4142.0, 4143.0],      [4144.0, 4145.0, 4146.0],      [4147.0, 4148.0, 4149.0],      [4150.0, 4151.0, 4152.0],      [4153.0, 4154.0, 4155.0],      [4156.0, 4157.0, 4158.0]]],    [[[4159.0, 4160.0, 4161.0],      [4162.0, 4163.0, 4164.0],      [4165.0, 4166.0, 4167.0],      [4168.0, 4169.0, 4170.0],      [4171.0, 4172.0, 4173.0],      [4174.0, 4175.0, 4176.0],      [4177.0, 4178.0, 4179.0]],     [[4180.0, 4181.0, 4182.0],      [4183.0, 4184.0, 4185.0],      [4186.0, 4187.0, 4188.0],      [4189.0, 4190.0, 4191.0],      [4192.0, 4193.0, 4194.0],      [4195.0, 4196.0, 4197.0],      [4198.0, 4199.0, 4200.0]],     [[4201.0, 4202.0, 4203.0],      [4204.0, 4205.0, 4206.0],      [4207.0, 4208.0, 4209.0],      [4210.0, 4211.0, 4212.0],      [4213.0, 4214.0, 4215.0],      [4216.0, 4217.0, 4218.0],      [4219.0, 4220.0, 4221.0]]],    [[[4222.0, 4223.0, 4224.0],      [4225.0, 4226.0, 4227.0],      [4228.0, 4229.0, 4230.0],      [4231.0, 4232.0, 4233.0],      [4234.0, 4235.0, 4236.0],      [4237.0, 4238.0, 4239.0],      [4240.0, 4241.0, 4242.0]],     [[4243.0, 4244.0, 4245.0],      [4246.0, 4247.0, 4248.0],      [4249.0, 4250.0, 4251.0],      [4252.0, 4253.0, 4254.0],      [4255.0, 4256.0, 4257.0],      [4258.0, 4259.0, 4260.0],      [4261.0, 4262.0, 4263.0]],     [[4264.0, 4265.0, 4266.0],      [4267.0, 4268.0, 4269.0],      [4270.0, 4271.0, 4272.0],      [4273.0, 4274.0, 4275.0],      [4276.0, 4277.0, 4278.0],      [4279.0, 4280.0, 4281.0],      [4282.0, 4283.0, 4284.0]]],    [[[4285.0, 4286.0, 4287.0],      [4288.0, 4289.0, 4290.0],      [4291.0, 4292.0, 4293.0],      [4294.0, 4295.0, 4296.0],      [4297.0, 4298.0, 4299.0],      [4300.0, 4301.0, 4302.0],      [4303.0, 4304.0, 4305.0]],     [[4306.0, 4307.0, 4308.0],      [4309.0, 4310.0, 4311.0],      [4312.0, 4313.0, 4314.0],      [4315.0, 4316.0, 4317.0],      [4318.0, 4319.0, 4320.0],      [4321.0, 4322.0, 4323.0],      [4324.0, 4325.0, 4326.0]],     [[4327.0, 4328.0, 4329.0],      [4330.0, 4331.0, 4332.0],      [4333.0, 4334.0, 4335.0],      [4336.0, 4337.0, 4338.0],      [4339.0, 4340.0, 4341.0],      [4342.0, 4343.0, 4344.0],      [4345.0, 4346.0, 4347.0]]],    [[[4348.0, 4349.0, 4350.0],      [4351.0, 4352.0, 4353.0],      [4354.0, 4355.0, 4356.0],      [4357.0, 4358.0, 4359.0],      [4360.0, 4361.0, 4362.0],      [4363.0, 4364.0, 4365.0],      [4366.0, 4367.0, 4368.0]],     [[4369.0, 4370.0, 4371.0],      [4372.0, 4373.0, 4374.0],      [4375.0, 4376.0, 4377.0],      [4378.0, 4379.0, 4380.0],      [4381.0, 4382.0, 4383.0],      [4384.0, 4385.0, 4386.0],      [4387.0, 4388.0, 4389.0]],     [[4390.0, 4391.0, 4392.0],      [4393.0, 4394.0, 4395.0],      [4396.0, 4397.0, 4398.0],      [4399.0, 4400.0, 4401.0],      [4402.0, 4403.0, 4404.0],      [4405.0, 4406.0, 4407.0],      [4408.0, 4409.0, 4410.0]]]],   [[[[4411.0, 4412.0, 4413.0],      [4414.0, 4415.0, 4416.0],      [4417.0, 4418.0, 4419.0],      [4420.0, 4421.0, 4422.0],      [4423.0, 4424.0, 4425.0],      [4426.0, 4427.0, 4428.0],      [4429.0, 4430.0, 4431.0]],     [[4432.0, 4433.0, 4434.0],      [4435.0, 4436.0, 4437.0],      [4438.0, 4439.0, 4440.0],      [4441.0, 4442.0, 4443.0],      [4444.0, 4445.0, 4446.0],      [4447.0, 4448.0, 4449.0],      [4450.0, 4451.0, 4452.0]],     [[4453.0, 4454.0, 4455.0],      [4456.0, 4457.0, 4458.0],      [4459.0, 4460.0, 4461.0],      [4462.0, 4463.0, 4464.0],      [4465.0, 4466.0, 4467.0],      [4468.0, 4469.0, 4470.0],      [4471.0, 4472.0, 4473.0]]],    [[[4474.0, 4475.0, 4476.0],      [4477.0, 4478.0, 4479.0],      [4480.0, 4481.0, 4482.0],      [4483.0, 4484.0, 4485.0],      [4486.0, 4487.0, 4488.0],      [4489.0, 4490.0, 4491.0],      [4492.0, 4493.0, 4494.0]],     [[4495.0, 4496.0, 4497.0],      [4498.0, 4499.0, 4500.0],      [4501.0, 4502.0, 4503.0],      [4504.0, 4505.0, 4506.0],      [4507.0, 4508.0, 4509.0],      [4510.0, 4511.0, 4512.0],      [4513.0, 4514.0, 4515.0]],     [[4516.0, 4517.0, 4518.0],      [4519.0, 4520.0, 4521.0],      [4522.0, 4523.0, 4524.0],      [4525.0, 4526.0, 4527.0],      [4528.0, 4529.0, 4530.0],      [4531.0, 4532.0, 4533.0],      [4534.0, 4535.0, 4536.0]]],    [[[4537.0, 4538.0, 4539.0],      [4540.0, 4541.0, 4542.0],      [4543.0, 4544.0, 4545.0],      [4546.0, 4547.0, 4548.0],      [4549.0, 4550.0, 4551.0],      [4552.0, 4553.0, 4554.0],      [4555.0, 4556.0, 4557.0]],     [[4558.0, 4559.0, 4560.0],      [4561.0, 4562.0, 4563.0],      [4564.0, 4565.0, 4566.0],      [4567.0, 4568.0, 4569.0],      [4570.0, 4571.0, 4572.0],      [4573.0, 4574.0, 4575.0],      [4576.0, 4577.0, 4578.0]],     [[4579.0, 4580.0, 4581.0],      [4582.0, 4583.0, 4584.0],      [4585.0, 4586.0, 4587.0],      [4588.0, 4589.0, 4590.0],      [4591.0, 4592.0, 4593.0],      [4594.0, 4595.0, 4596.0],      [4597.0, 4598.0, 4599.0]]],    [[[4600.0, 4601.0, 4602.0],      [4603.0, 4604.0, 4605.0],      [4606.0, 4607.0, 4608.0],      [4609.0, 4610.0, 4611.0],      [4612.0, 4613.0, 4614.0],      [4615.0, 4616.0, 4617.0],      [4618.0, 4619.0, 4620.0]],     [[4621.0, 4622.0, 4623.0],      [4624.0, 4625.0, 4626.0],      [4627.0, 4628.0, 4629.0],      [4630.0, 4631.0, 4632.0],      [4633.0, 4634.0, 4635.0],      [4636.0, 4637.0, 4638.0],      [4639.0, 4640.0, 4641.0]],     [[4642.0, 4643.0, 4644.0],      [4645.0, 4646.0, 4647.0],      [4648.0, 4649.0, 4650.0],      [4651.0, 4652.0, 4653.0],      [4654.0, 4655.0, 4656.0],      [4657.0, 4658.0, 4659.0],      [4660.0, 4661.0, 4662.0]]],    [[[4663.0, 4664.0, 4665.0],      [4666.0, 4667.0, 4668.0],      [4669.0, 4670.0, 4671.0],      [4672.0, 4673.0, 4674.0],      [4675.0, 4676.0, 4677.0],      [4678.0, 4679.0, 4680.0],      [4681.0, 4682.0, 4683.0]],     [[4684.0, 4685.0, 4686.0],      [4687.0, 4688.0, 4689.0],      [4690.0, 4691.0, 4692.0],      [4693.0, 4694.0, 4695.0],      [4696.0, 4697.0, 4698.0],      [4699.0, 4700.0, 4701.0],      [4702.0, 4703.0, 4704.0]],     [[4705.0, 4706.0, 4707.0],      [4708.0, 4709.0, 4710.0],      [4711.0, 4712.0, 4713.0],      [4714.0, 4715.0, 4716.0],      [4717.0, 4718.0, 4719.0],      [4720.0, 4721.0, 4722.0],      [4723.0, 4724.0, 4725.0]]],    [[[4726.0, 4727.0, 4728.0],      [4729.0, 4730.0, 4731.0],      [4732.0, 4733.0, 4734.0],      [4735.0, 4736.0, 4737.0],      [4738.0, 4739.0, 4740.0],      [4741.0, 4742.0, 4743.0],      [4744.0, 4745.0, 4746.0]],     [[4747.0, 4748.0, 4749.0],      [4750.0, 4751.0, 4752.0],      [4753.0, 4754.0, 4755.0],      [4756.0, 4757.0, 4758.0],      [4759.0, 4760.0, 4761.0],      [4762.0, 4763.0, 4764.0],      [4765.0, 4766.0, 4767.0]],     [[4768.0, 4769.0, 4770.0],      [4771.0, 4772.0, 4773.0],      [4774.0, 4775.0, 4776.0],      [4777.0, 4778.0, 4779.0],      [4780.0, 4781.0, 4782.0],      [4783.0, 4784.0, 4785.0],      [4786.0, 4787.0, 4788.0]]],    [[[4789.0, 4790.0, 4791.0],      [4792.0, 4793.0, 4794.0],      [4795.0, 4796.0, 4797.0],      [4798.0, 4799.0, 4800.0],      [4801.0, 4802.0, 4803.0],      [4804.0, 4805.0, 4806.0],      [4807.0, 4808.0, 4809.0]],     [[4810.0, 4811.0, 4812.0],      [4813.0, 4814.0, 4815.0],      [4816.0, 4817.0, 4818.0],      [4819.0, 4820.0, 4821.0],      [4822.0, 4823.0, 4824.0],      [4825.0, 4826.0, 4827.0],      [4828.0, 4829.0, 4830.0]],     [[4831.0, 4832.0, 4833.0],      [4834.0, 4835.0, 4836.0],      [4837.0, 4838.0, 4839.0],      [4840.0, 4841.0, 4842.0],      [4843.0, 4844.0, 4845.0],      [4846.0, 4847.0, 4848.0],      [4849.0, 4850.0, 4851.0]]]],   [[[[4852.0, 4853.0, 4854.0],      [4855.0, 4856.0, 4857.0],      [4858.0, 4859.0, 4860.0],      [4861.0, 4862.0, 4863.0],      [4864.0, 4865.0, 4866.0],      [4867.0, 4868.0, 4869.0],      [4870.0, 4871.0, 4872.0]],     [[4873.0, 4874.0, 4875.0],      [4876.0, 4877.0, 4878.0],      [4879.0, 4880.0, 4881.0],      [4882.0, 4883.0, 4884.0],      [4885.0, 4886.0, 4887.0],      [4888.0, 4889.0, 4890.0],      [4891.0, 4892.0, 4893.0]],     [[4894.0, 4895.0, 4896.0],      [4897.0, 4898.0, 4899.0],      [4900.0, 4901.0, 4902.0],      [4903.0, 4904.0, 4905.0],      [4906.0, 4907.0, 4908.0],      [4909.0, 4910.0, 4911.0],      [4912.0, 4913.0, 4914.0]]],    [[[4915.0, 4916.0, 4917.0],      [4918.0, 4919.0, 4920.0],      [4921.0, 4922.0, 4923.0],      [4924.0, 4925.0, 4926.0],      [4927.0, 4928.0, 4929.0],      [4930.0, 4931.0, 4932.0],      [4933.0, 4934.0, 4935.0]],     [[4936.0, 4937.0, 4938.0],      [4939.0, 4940.0, 4941.0],      [4942.0, 4943.0, 4944.0],      [4945.0, 4946.0, 4947.0],      [4948.0, 4949.0, 4950.0],      [4951.0, 4952.0, 4953.0],      [4954.0, 4955.0, 4956.0]],     [[4957.0, 4958.0, 4959.0],      [4960.0, 4961.0, 4962.0],      [4963.0, 4964.0, 4965.0],      [4966.0, 4967.0, 4968.0],      [4969.0, 4970.0, 4971.0],      [4972.0, 4973.0, 4974.0],      [4975.0, 4976.0, 4977.0]]],    [[[4978.0, 4979.0, 4980.0],      [4981.0, 4982.0, 4983.0],      [4984.0, 4985.0, 4986.0],      [4987.0, 4988.0, 4989.0],      [4990.0, 4991.0, 4992.0],      [4993.0, 4994.0, 4995.0],      [4996.0, 4997.0, 4998.0]],     [[4999.0, 5000.0, 5001.0],      [5002.0, 5003.0, 5004.0],      [5005.0, 5006.0, 5007.0],      [5008.0, 5009.0, 5010.0],      [5011.0, 5012.0, 5013.0],      [5014.0, 5015.0, 5016.0],      [5017.0, 5018.0, 5019.0]],     [[5020.0, 5021.0, 5022.0],      [5023.0, 5024.0, 5025.0],      [5026.0, 5027.0, 5028.0],      [5029.0, 5030.0, 5031.0],      [5032.0, 5033.0, 5034.0],      [5035.0, 5036.0, 5037.0],      [5038.0, 5039.0, 5040.0]]],    [[[5041.0, 5042.0, 5043.0],      [5044.0, 5045.0, 5046.0],      [5047.0, 5048.0, 5049.0],      [5050.0, 5051.0, 5052.0],      [5053.0, 5054.0, 5055.0],      [5056.0, 5057.0, 5058.0],      [5059.0, 5060.0, 5061.0]],     [[5062.0, 5063.0, 5064.0],      [5065.0, 5066.0, 5067.0],      [5068.0, 5069.0, 5070.0],      [5071.0, 5072.0, 5073.0],      [5074.0, 5075.0, 5076.0],      [5077.0, 5078.0, 5079.0],      [5080.0, 5081.0, 5082.0]],     [[5083.0, 5084.0, 5085.0],      [5086.0, 5087.0, 5088.0],      [5089.0, 5090.0, 5091.0],      [5092.0, 5093.0, 5094.0],      [5095.0, 5096.0, 5097.0],      [5098.0, 5099.0, 5100.0],      [5101.0, 5102.0, 5103.0]]],    [[[5104.0, 5105.0, 5106.0],      [5107.0, 5108.0, 5109.0],      [5110.0, 5111.0, 5112.0],      [5113.0, 5114.0, 5115.0],      [5116.0, 5117.0, 5118.0],      [5119.0, 5120.0, 5121.0],      [5122.0, 5123.0, 5124.0]],     [[5125.0, 5126.0, 5127.0],      [5128.0, 5129.0, 5130.0],      [5131.0, 5132.0, 5133.0],      [5134.0, 5135.0, 5136.0],      [5137.0, 5138.0, 5139.0],      [5140.0, 5141.0, 5142.0],      [5143.0, 5144.0, 5145.0]],     [[5146.0, 5147.0, 5148.0],      [5149.0, 5150.0, 5151.0],      [5152.0, 5153.0, 5154.0],      [5155.0, 5156.0, 5157.0],      [5158.0, 5159.0, 5160.0],      [5161.0, 5162.0, 5163.0],      [5164.0, 5165.0, 5166.0]]],    [[[5167.0, 5168.0, 5169.0],      [5170.0, 5171.0, 5172.0],      [5173.0, 5174.0, 5175.0],      [5176.0, 5177.0, 5178.0],      [5179.0, 5180.0, 5181.0],      [5182.0, 5183.0, 5184.0],      [5185.0, 5186.0, 5187.0]],     [[5188.0, 5189.0, 5190.0],      [5191.0, 5192.0, 5193.0],      [5194.0, 5195.0, 5196.0],      [5197.0, 5198.0, 5199.0],      [5200.0, 5201.0, 5202.0],      [5203.0, 5204.0, 5205.0],      [5206.0, 5207.0, 5208.0]],     [[5209.0, 5210.0, 5211.0],      [5212.0, 5213.0, 5214.0],      [5215.0, 5216.0, 5217.0],      [5218.0, 5219.0, 5220.0],      [5221.0, 5222.0, 5223.0],      [5224.0, 5225.0, 5226.0],      [5227.0, 5228.0, 5229.0]]],    [[[5230.0, 5231.0, 5232.0],      [5233.0, 5234.0, 5235.0],      [5236.0, 5237.0, 5238.0],      [5239.0, 5240.0, 5241.0],      [5242.0, 5243.0, 5244.0],      [5245.0, 5246.0, 5247.0],      [5248.0, 5249.0, 5250.0]],     [[5251.0, 5252.0, 5253.0],      [5254.0, 5255.0, 5256.0],      [5257.0, 5258.0, 5259.0],      [5260.0, 5261.0, 5262.0],      [5263.0, 5264.0, 5265.0],      [5266.0, 5267.0, 5268.0],      [5269.0, 5270.0, 5271.0]],     [[5272.0, 5273.0, 5274.0],      [5275.0, 5276.0, 5277.0],      [5278.0, 5279.0, 5280.0],      [5281.0, 5282.0, 5283.0],      [5284.0, 5285.0, 5286.0],      [5287.0, 5288.0, 5289.0],      [5290.0, 5291.0, 5292.0]]]]],  [[[[[5293.0, 5294.0, 5295.0],      [5296.0, 5297.0, 5298.0],      [5299.0, 5300.0, 5301.0],      [5302.0, 5303.0, 5304.0],      [5305.0, 5306.0, 5307.0],      [5308.0, 5309.0, 5310.0],      [5311.0, 5312.0, 5313.0]],     [[5314.0, 5315.0, 5316.0],      [5317.0, 5318.0, 5319.0],      [5320.0, 5321.0, 5322.0],      [5323.0, 5324.0, 5325.0],      [5326.0, 5327.0, 5328.0],      [5329.0, 5330.0, 5331.0],      [5332.0, 5333.0, 5334.0]],     [[5335.0, 5336.0, 5337.0],      [5338.0, 5339.0, 5340.0],      [5341.0, 5342.0, 5343.0],      [5344.0, 5345.0, 5346.0],      [5347.0, 5348.0, 5349.0],      [5350.0, 5351.0, 5352.0],      [5353.0, 5354.0, 5355.0]]],    [[[5356.0, 5357.0, 5358.0],      [5359.0, 5360.0, 5361.0],      [5362.0, 5363.0, 5364.0],      [5365.0, 5366.0, 5367.0],      [5368.0, 5369.0, 5370.0],      [5371.0, 5372.0, 5373.0],      [5374.0, 5375.0, 5376.0]],     [[5377.0, 5378.0, 5379.0],      [5380.0, 5381.0, 5382.0],      [5383.0, 5384.0, 5385.0],      [5386.0, 5387.0, 5388.0],      [5389.0, 5390.0, 5391.0],      [5392.0, 5393.0, 5394.0],      [5395.0, 5396.0, 5397.0]],     [[5398.0, 5399.0, 5400.0],      [5401.0, 5402.0, 5403.0],      [5404.0, 5405.0, 5406.0],      [5407.0, 5408.0, 5409.0],      [5410.0, 5411.0, 5412.0],      [5413.0, 5414.0, 5415.0],      [5416.0, 5417.0, 5418.0]]],    [[[5419.0, 5420.0, 5421.0],      [5422.0, 5423.0, 5424.0],      [5425.0, 5426.0, 5427.0],      [5428.0, 5429.0, 5430.0],      [5431.0, 5432.0, 5433.0],      [5434.0, 5435.0, 5436.0],      [5437.0, 5438.0, 5439.0]],     [[5440.0, 5441.0, 5442.0],      [5443.0, 5444.0, 5445.0],      [5446.0, 5447.0, 5448.0],      [5449.0, 5450.0, 5451.0],      [5452.0, 5453.0, 5454.0],      [5455.0, 5456.0, 5457.0],      [5458.0, 5459.0, 5460.0]],     [[5461.0, 5462.0, 5463.0],      [5464.0, 5465.0, 5466.0],      [5467.0, 5468.0, 5469.0],      [5470.0, 5471.0, 5472.0],      [5473.0, 5474.0, 5475.0],      [5476.0, 5477.0, 5478.0],      [5479.0, 5480.0, 5481.0]]],    [[[5482.0, 5483.0, 5484.0],      [5485.0, 5486.0, 5487.0],      [5488.0, 5489.0, 5490.0],      [5491.0, 5492.0, 5493.0],      [5494.0, 5495.0, 5496.0],      [5497.0, 5498.0, 5499.0],      [5500.0, 5501.0, 5502.0]],     [[5503.0, 5504.0, 5505.0],      [5506.0, 5507.0, 5508.0],      [5509.0, 5510.0, 5511.0],      [5512.0, 5513.0, 5514.0],      [5515.0, 5516.0, 5517.0],      [5518.0, 5519.0, 5520.0],      [5521.0, 5522.0, 5523.0]],     [[5524.0, 5525.0, 5526.0],      [5527.0, 5528.0, 5529.0],      [5530.0, 5531.0, 5532.0],      [5533.0, 5534.0, 5535.0],      [5536.0, 5537.0, 5538.0],      [5539.0, 5540.0, 5541.0],      [5542.0, 5543.0, 5544.0]]],    [[[5545.0, 5546.0, 5547.0],      [5548.0, 5549.0, 5550.0],      [5551.0, 5552.0, 5553.0],      [5554.0, 5555.0, 5556.0],      [5557.0, 5558.0, 5559.0],      [5560.0, 5561.0, 5562.0],      [5563.0, 5564.0, 5565.0]],     [[5566.0, 5567.0, 5568.0],      [5569.0, 5570.0, 5571.0],      [5572.0, 5573.0, 5574.0],      [5575.0, 5576.0, 5577.0],      [5578.0, 5579.0, 5580.0],      [5581.0, 5582.0, 5583.0],      [5584.0, 5585.0, 5586.0]],     [[5587.0, 5588.0, 5589.0],      [5590.0, 5591.0, 5592.0],      [5593.0, 5594.0, 5595.0],      [5596.0, 5597.0, 5598.0],      [5599.0, 5600.0, 5601.0],      [5602.0, 5603.0, 5604.0],      [5605.0, 5606.0, 5607.0]]],    [[[5608.0, 5609.0, 5610.0],      [5611.0, 5612.0, 5613.0],      [5614.0, 5615.0, 5616.0],      [5617.0, 5618.0, 5619.0],      [5620.0, 5621.0, 5622.0],      [5623.0, 5624.0, 5625.0],      [5626.0, 5627.0, 5628.0]],     [[5629.0, 5630.0, 5631.0],      [5632.0, 5633.0, 5634.0],      [5635.0, 5636.0, 5637.0],      [5638.0, 5639.0, 5640.0],      [5641.0, 5642.0, 5643.0],      [5644.0, 5645.0, 5646.0],      [5647.0, 5648.0, 5649.0]],     [[5650.0, 5651.0, 5652.0],      [5653.0, 5654.0, 5655.0],      [5656.0, 5657.0, 5658.0],      [5659.0, 5660.0, 5661.0],      [5662.0, 5663.0, 5664.0],      [5665.0, 5666.0, 5667.0],      [5668.0, 5669.0, 5670.0]]],    [[[5671.0, 5672.0, 5673.0],      [5674.0, 5675.0, 5676.0],      [5677.0, 5678.0, 5679.0],      [5680.0, 5681.0, 5682.0],      [5683.0, 5684.0, 5685.0],      [5686.0, 5687.0, 5688.0],      [5689.0, 5690.0, 5691.0]],     [[5692.0, 5693.0, 5694.0],      [5695.0, 5696.0, 5697.0],      [5698.0, 5699.0, 5700.0],      [5701.0, 5702.0, 5703.0],      [5704.0, 5705.0, 5706.0],      [5707.0, 5708.0, 5709.0],      [5710.0, 5711.0, 5712.0]],     [[5713.0, 5714.0, 5715.0],      [5716.0, 5717.0, 5718.0],      [5719.0, 5720.0, 5721.0],      [5722.0, 5723.0, 5724.0],      [5725.0, 5726.0, 5727.0],      [5728.0, 5729.0, 5730.0],      [5731.0, 5732.0, 5733.0]]]],   [[[[5734.0, 5735.0, 5736.0],      [5737.0, 5738.0, 5739.0],      [5740.0, 5741.0, 5742.0],      [5743.0, 5744.0, 5745.0],      [5746.0, 5747.0, 5748.0],      [5749.0, 5750.0, 5751.0],      [5752.0, 5753.0, 5754.0]],     [[5755.0, 5756.0, 5757.0],      [5758.0, 5759.0, 5760.0],      [5761.0, 5762.0, 5763.0],      [5764.0, 5765.0, 5766.0],      [5767.0, 5768.0, 5769.0],      [5770.0, 5771.0, 5772.0],      [5773.0, 5774.0, 5775.0]],     [[5776.0, 5777.0, 5778.0],      [5779.0, 5780.0, 5781.0],      [5782.0, 5783.0, 5784.0],      [5785.0, 5786.0, 5787.0],      [5788.0, 5789.0, 5790.0],      [5791.0, 5792.0, 5793.0],      [5794.0, 5795.0, 5796.0]]],    [[[5797.0, 5798.0, 5799.0],      [5800.0, 5801.0, 5802.0],      [5803.0, 5804.0, 5805.0],      [5806.0, 5807.0, 5808.0],      [5809.0, 5810.0, 5811.0],      [5812.0, 5813.0, 5814.0],      [5815.0, 5816.0, 5817.0]],     [[5818.0, 5819.0, 5820.0],      [5821.0, 5822.0, 5823.0],      [5824.0, 5825.0, 5826.0],      [5827.0, 5828.0, 5829.0],      [5830.0, 5831.0, 5832.0],      [5833.0, 5834.0, 5835.0],      [5836.0, 5837.0, 5838.0]],     [[5839.0, 5840.0, 5841.0],      [5842.0, 5843.0, 5844.0],      [5845.0, 5846.0, 5847.0],      [5848.0, 5849.0, 5850.0],      [5851.0, 5852.0, 5853.0],      [5854.0, 5855.0, 5856.0],      [5857.0, 5858.0, 5859.0]]],    [[[5860.0, 5861.0, 5862.0],      [5863.0, 5864.0, 5865.0],      [5866.0, 5867.0, 5868.0],      [5869.0, 5870.0, 5871.0],      [5872.0, 5873.0, 5874.0],      [5875.0, 5876.0, 5877.0],      [5878.0, 5879.0, 5880.0]],     [[5881.0, 5882.0, 5883.0],      [5884.0, 5885.0, 5886.0],      [5887.0, 5888.0, 5889.0],      [5890.0, 5891.0, 5892.0],      [5893.0, 5894.0, 5895.0],      [5896.0, 5897.0, 5898.0],      [5899.0, 5900.0, 5901.0]],     [[5902.0, 5903.0, 5904.0],      [5905.0, 5906.0, 5907.0],      [5908.0, 5909.0, 5910.0],      [5911.0, 5912.0, 5913.0],      [5914.0, 5915.0, 5916.0],      [5917.0, 5918.0, 5919.0],      [5920.0, 5921.0, 5922.0]]],    [[[5923.0, 5924.0, 5925.0],      [5926.0, 5927.0, 5928.0],      [5929.0, 5930.0, 5931.0],      [5932.0, 5933.0, 5934.0],      [5935.0, 5936.0, 5937.0],      [5938.0, 5939.0, 5940.0],      [5941.0, 5942.0, 5943.0]],     [[5944.0, 5945.0, 5946.0],      [5947.0, 5948.0, 5949.0],      [5950.0, 5951.0, 5952.0],      [5953.0, 5954.0, 5955.0],      [5956.0, 5957.0, 5958.0],      [5959.0, 5960.0, 5961.0],      [5962.0, 5963.0, 5964.0]],     [[5965.0, 5966.0, 5967.0],      [5968.0, 5969.0, 5970.0],      [5971.0, 5972.0, 5973.0],      [5974.0, 5975.0, 5976.0],      [5977.0, 5978.0, 5979.0],      [5980.0, 5981.0, 5982.0],      [5983.0, 5984.0, 5985.0]]],    [[[5986.0, 5987.0, 5988.0],      [5989.0, 5990.0, 5991.0],      [5992.0, 5993.0, 5994.0],      [5995.0, 5996.0, 5997.0],      [5998.0, 5999.0, 6000.0],      [6001.0, 6002.0, 6003.0],      [6004.0, 6005.0, 6006.0]],     [[6007.0, 6008.0, 6009.0],      [6010.0, 6011.0, 6012.0],      [6013.0, 6014.0, 6015.0],      [6016.0, 6017.0, 6018.0],      [6019.0, 6020.0, 6021.0],      [6022.0, 6023.0, 6024.0],      [6025.0, 6026.0, 6027.0]],     [[6028.0, 6029.0, 6030.0],      [6031.0, 6032.0, 6033.0],      [6034.0, 6035.0, 6036.0],      [6037.0, 6038.0, 6039.0],      [6040.0, 6041.0, 6042.0],      [6043.0, 6044.0, 6045.0],      [6046.0, 6047.0, 6048.0]]],    [[[6049.0, 6050.0, 6051.0],      [6052.0, 6053.0, 6054.0],      [6055.0, 6056.0, 6057.0],      [6058.0, 6059.0, 6060.0],      [6061.0, 6062.0, 6063.0],      [6064.0, 6065.0, 6066.0],      [6067.0, 6068.0, 6069.0]],     [[6070.0, 6071.0, 6072.0],      [6073.0, 6074.0, 6075.0],      [6076.0, 6077.0, 6078.0],      [6079.0, 6080.0, 6081.0],      [6082.0, 6083.0, 6084.0],      [6085.0, 6086.0, 6087.0],      [6088.0, 6089.0, 6090.0]],     [[6091.0, 6092.0, 6093.0],      [6094.0, 6095.0, 6096.0],      [6097.0, 6098.0, 6099.0],      [6100.0, 6101.0, 6102.0],      [6103.0, 6104.0, 6105.0],      [6106.0, 6107.0, 6108.0],      [6109.0, 6110.0, 6111.0]]],    [[[6112.0, 6113.0, 6114.0],      [6115.0, 6116.0, 6117.0],      [6118.0, 6119.0, 6120.0],      [6121.0, 6122.0, 6123.0],      [6124.0, 6125.0, 6126.0],      [6127.0, 6128.0, 6129.0],      [6130.0, 6131.0, 6132.0]],     [[6133.0, 6134.0, 6135.0],      [6136.0, 6137.0, 6138.0],      [6139.0, 6140.0, 6141.0],      [6142.0, 6143.0, 6144.0],      [6145.0, 6146.0, 6147.0],      [6148.0, 6149.0, 6150.0],      [6151.0, 6152.0, 6153.0]],     [[6154.0, 6155.0, 6156.0],      [6157.0, 6158.0, 6159.0],      [6160.0, 6161.0, 6162.0],      [6163.0, 6164.0, 6165.0],      [6166.0, 6167.0, 6168.0],      [6169.0, 6170.0, 6171.0],      [6172.0, 6173.0, 6174.0]]]],   [[[[6175.0, 6176.0, 6177.0],      [6178.0, 6179.0, 6180.0],      [6181.0, 6182.0, 6183.0],      [6184.0, 6185.0, 6186.0],      [6187.0, 6188.0, 6189.0],      [6190.0, 6191.0, 6192.0],      [6193.0, 6194.0, 6195.0]],     [[6196.0, 6197.0, 6198.0],      [6199.0, 6200.0, 6201.0],      [6202.0, 6203.0, 6204.0],      [6205.0, 6206.0, 6207.0],      [6208.0, 6209.0, 6210.0],      [6211.0, 6212.0, 6213.0],      [6214.0, 6215.0, 6216.0]],     [[6217.0, 6218.0, 6219.0],      [6220.0, 6221.0, 6222.0],      [6223.0, 6224.0, 6225.0],      [6226.0, 6227.0, 6228.0],      [6229.0, 6230.0, 6231.0],      [6232.0, 6233.0, 6234.0],      [6235.0, 6236.0, 6237.0]]],    [[[6238.0, 6239.0, 6240.0],      [6241.0, 6242.0, 6243.0],      [6244.0, 6245.0, 6246.0],      [6247.0, 6248.0, 6249.0],      [6250.0, 6251.0, 6252.0],      [6253.0, 6254.0, 6255.0],      [6256.0, 6257.0, 6258.0]],     [[6259.0, 6260.0, 6261.0],      [6262.0, 6263.0, 6264.0],      [6265.0, 6266.0, 6267.0],      [6268.0, 6269.0, 6270.0],      [6271.0, 6272.0, 6273.0],      [6274.0, 6275.0, 6276.0],      [6277.0, 6278.0, 6279.0]],     [[6280.0, 6281.0, 6282.0],      [6283.0, 6284.0, 6285.0],      [6286.0, 6287.0, 6288.0],      [6289.0, 6290.0, 6291.0],      [6292.0, 6293.0, 6294.0],      [6295.0, 6296.0, 6297.0],      [6298.0, 6299.0, 6300.0]]],    [[[6301.0, 6302.0, 6303.0],      [6304.0, 6305.0, 6306.0],      [6307.0, 6308.0, 6309.0],      [6310.0, 6311.0, 6312.0],      [6313.0, 6314.0, 6315.0],      [6316.0, 6317.0, 6318.0],      [6319.0, 6320.0, 6321.0]],     [[6322.0, 6323.0, 6324.0],      [6325.0, 6326.0, 6327.0],      [6328.0, 6329.0, 6330.0],      [6331.0, 6332.0, 6333.0],      [6334.0, 6335.0, 6336.0],      [6337.0, 6338.0, 6339.0],      [6340.0, 6341.0, 6342.0]],     [[6343.0, 6344.0, 6345.0],      [6346.0, 6347.0, 6348.0],      [6349.0, 6350.0, 6351.0],      [6352.0, 6353.0, 6354.0],      [6355.0, 6356.0, 6357.0],      [6358.0, 6359.0, 6360.0],      [6361.0, 6362.0, 6363.0]]],    [[[6364.0, 6365.0, 6366.0],      [6367.0, 6368.0, 6369.0],      [6370.0, 6371.0, 6372.0],      [6373.0, 6374.0, 6375.0],      [6376.0, 6377.0, 6378.0],      [6379.0, 6380.0, 6381.0],      [6382.0, 6383.0, 6384.0]],     [[6385.0, 6386.0, 6387.0],      [6388.0, 6389.0, 6390.0],      [6391.0, 6392.0, 6393.0],      [6394.0, 6395.0, 6396.0],      [6397.0, 6398.0, 6399.0],      [6400.0, 6401.0, 6402.0],      [6403.0, 6404.0, 6405.0]],     [[6406.0, 6407.0, 6408.0],      [6409.0, 6410.0, 6411.0],      [6412.0, 6413.0, 6414.0],      [6415.0, 6416.0, 6417.0],      [6418.0, 6419.0, 6420.0],      [6421.0, 6422.0, 6423.0],      [6424.0, 6425.0, 6426.0]]],    [[[6427.0, 6428.0, 6429.0],      [6430.0, 6431.0, 6432.0],      [6433.0, 6434.0, 6435.0],      [6436.0, 6437.0, 6438.0],      [6439.0, 6440.0, 6441.0],      [6442.0, 6443.0, 6444.0],      [6445.0, 6446.0, 6447.0]],     [[6448.0, 6449.0, 6450.0],      [6451.0, 6452.0, 6453.0],      [6454.0, 6455.0, 6456.0],      [6457.0, 6458.0, 6459.0],      [6460.0, 6461.0, 6462.0],      [6463.0, 6464.0, 6465.0],      [6466.0, 6467.0, 6468.0]],     [[6469.0, 6470.0, 6471.0],      [6472.0, 6473.0, 6474.0],      [6475.0, 6476.0, 6477.0],      [6478.0, 6479.0, 6480.0],      [6481.0, 6482.0, 6483.0],      [6484.0, 6485.0, 6486.0],      [6487.0, 6488.0, 6489.0]]],    [[[6490.0, 6491.0, 6492.0],      [6493.0, 6494.0, 6495.0],      [6496.0, 6497.0, 6498.0],      [6499.0, 6500.0, 6501.0],      [6502.0, 6503.0, 6504.0],      [6505.0, 6506.0, 6507.0],      [6508.0, 6509.0, 6510.0]],     [[6511.0, 6512.0, 6513.0],      [6514.0, 6515.0, 6516.0],      [6517.0, 6518.0, 6519.0],      [6520.0, 6521.0, 6522.0],      [6523.0, 6524.0, 6525.0],      [6526.0, 6527.0, 6528.0],      [6529.0, 6530.0, 6531.0]],     [[6532.0, 6533.0, 6534.0],      [6535.0, 6536.0, 6537.0],      [6538.0, 6539.0, 6540.0],      [6541.0, 6542.0, 6543.0],      [6544.0, 6545.0, 6546.0],      [6547.0, 6548.0, 6549.0],      [6550.0, 6551.0, 6552.0]]],    [[[6553.0, 6554.0, 6555.0],      [6556.0, 6557.0, 6558.0],      [6559.0, 6560.0, 6561.0],      [6562.0, 6563.0, 6564.0],      [6565.0, 6566.0, 6567.0],      [6568.0, 6569.0, 6570.0],      [6571.0, 6572.0, 6573.0]],     [[6574.0, 6575.0, 6576.0],      [6577.0, 6578.0, 6579.0],      [6580.0, 6581.0, 6582.0],      [6583.0, 6584.0, 6585.0],      [6586.0, 6587.0, 6588.0],      [6589.0, 6590.0, 6591.0],      [6592.0, 6593.0, 6594.0]],     [[6595.0, 6596.0, 6597.0],      [6598.0, 6599.0, 6600.0],      [6601.0, 6602.0, 6603.0],      [6604.0, 6605.0, 6606.0],      [6607.0, 6608.0, 6609.0],      [6610.0, 6611.0, 6612.0],      [6613.0, 6614.0, 6615.0]]]],   [[[[6616.0, 6617.0, 6618.0],      [6619.0, 6620.0, 6621.0],      [6622.0, 6623.0, 6624.0],      [6625.0, 6626.0, 6627.0],      [6628.0, 6629.0, 6630.0],      [6631.0, 6632.0, 6633.0],      [6634.0, 6635.0, 6636.0]],     [[6637.0, 6638.0, 6639.0],      [6640.0, 6641.0, 6642.0],      [6643.0, 6644.0, 6645.0],      [6646.0, 6647.0, 6648.0],      [6649.0, 6650.0, 6651.0],      [6652.0, 6653.0, 6654.0],      [6655.0, 6656.0, 6657.0]],     [[6658.0, 6659.0, 6660.0],      [6661.0, 6662.0, 6663.0],      [6664.0, 6665.0, 6666.0],      [6667.0, 6668.0, 6669.0],      [6670.0, 6671.0, 6672.0],      [6673.0, 6674.0, 6675.0],      [6676.0, 6677.0, 6678.0]]],    [[[6679.0, 6680.0, 6681.0],      [6682.0, 6683.0, 6684.0],      [6685.0, 6686.0, 6687.0],      [6688.0, 6689.0, 6690.0],      [6691.0, 6692.0, 6693.0],      [6694.0, 6695.0, 6696.0],      [6697.0, 6698.0, 6699.0]],     [[6700.0, 6701.0, 6702.0],      [6703.0, 6704.0, 6705.0],      [6706.0, 6707.0, 6708.0],      [6709.0, 6710.0, 6711.0],      [6712.0, 6713.0, 6714.0],      [6715.0, 6716.0, 6717.0],      [6718.0, 6719.0, 6720.0]],     [[6721.0, 6722.0, 6723.0],      [6724.0, 6725.0, 6726.0],      [6727.0, 6728.0, 6729.0],      [6730.0, 6731.0, 6732.0],      [6733.0, 6734.0, 6735.0],      [6736.0, 6737.0, 6738.0],      [6739.0, 6740.0, 6741.0]]],    [[[6742.0, 6743.0, 6744.0],      [6745.0, 6746.0, 6747.0],      [6748.0, 6749.0, 6750.0],      [6751.0, 6752.0, 6753.0],      [6754.0, 6755.0, 6756.0],      [6757.0, 6758.0, 6759.0],      [6760.0, 6761.0, 6762.0]],     [[6763.0, 6764.0, 6765.0],      [6766.0, 6767.0, 6768.0],      [6769.0, 6770.0, 6771.0],      [6772.0, 6773.0, 6774.0],      [6775.0, 6776.0, 6777.0],      [6778.0, 6779.0, 6780.0],      [6781.0, 6782.0, 6783.0]],     [[6784.0, 6785.0, 6786.0],      [6787.0, 6788.0, 6789.0],      [6790.0, 6791.0, 6792.0],      [6793.0, 6794.0, 6795.0],      [6796.0, 6797.0, 6798.0],      [6799.0, 6800.0, 6801.0],      [6802.0, 6803.0, 6804.0]]],    [[[6805.0, 6806.0, 6807.0],      [6808.0, 6809.0, 6810.0],      [6811.0, 6812.0, 6813.0],      [6814.0, 6815.0, 6816.0],      [6817.0, 6818.0, 6819.0],      [6820.0, 6821.0, 6822.0],      [6823.0, 6824.0, 6825.0]],     [[6826.0, 6827.0, 6828.0],      [6829.0, 6830.0, 6831.0],      [6832.0, 6833.0, 6834.0],      [6835.0, 6836.0, 6837.0],      [6838.0, 6839.0, 6840.0],      [6841.0, 6842.0, 6843.0],      [6844.0, 6845.0, 6846.0]],     [[6847.0, 6848.0, 6849.0],      [6850.0, 6851.0, 6852.0],      [6853.0, 6854.0, 6855.0],      [6856.0, 6857.0, 6858.0],      [6859.0, 6860.0, 6861.0],      [6862.0, 6863.0, 6864.0],      [6865.0, 6866.0, 6867.0]]],    [[[6868.0, 6869.0, 6870.0],      [6871.0, 6872.0, 6873.0],      [6874.0, 6875.0, 6876.0],      [6877.0, 6878.0, 6879.0],      [6880.0, 6881.0, 6882.0],      [6883.0, 6884.0, 6885.0],      [6886.0, 6887.0, 6888.0]],     [[6889.0, 6890.0, 6891.0],      [6892.0, 6893.0, 6894.0],      [6895.0, 6896.0, 6897.0],      [6898.0, 6899.0, 6900.0],      [6901.0, 6902.0, 6903.0],      [6904.0, 6905.0, 6906.0],      [6907.0, 6908.0, 6909.0]],     [[6910.0, 6911.0, 6912.0],      [6913.0, 6914.0, 6915.0],      [6916.0, 6917.0, 6918.0],      [6919.0, 6920.0, 6921.0],      [6922.0, 6923.0, 6924.0],      [6925.0, 6926.0, 6927.0],      [6928.0, 6929.0, 6930.0]]],    [[[6931.0, 6932.0, 6933.0],      [6934.0, 6935.0, 6936.0],      [6937.0, 6938.0, 6939.0],      [6940.0, 6941.0, 6942.0],      [6943.0, 6944.0, 6945.0],      [6946.0, 6947.0, 6948.0],      [6949.0, 6950.0, 6951.0]],     [[6952.0, 6953.0, 6954.0],      [6955.0, 6956.0, 6957.0],      [6958.0, 6959.0, 6960.0],      [6961.0, 6962.0, 6963.0],      [6964.0, 6965.0, 6966.0],      [6967.0, 6968.0, 6969.0],      [6970.0, 6971.0, 6972.0]],     [[6973.0, 6974.0, 6975.0],      [6976.0, 6977.0, 6978.0],      [6979.0, 6980.0, 6981.0],      [6982.0, 6983.0, 6984.0],      [6985.0, 6986.0, 6987.0],      [6988.0, 6989.0, 6990.0],      [6991.0, 6992.0, 6993.0]]],    [[[6994.0, 6995.0, 6996.0],      [6997.0, 6998.0, 6999.0],      [7000.0, 7001.0, 7002.0],      [7003.0, 7004.0, 7005.0],      [7006.0, 7007.0, 7008.0],      [7009.0, 7010.0, 7011.0],      [7012.0, 7013.0, 7014.0]],     [[7015.0, 7016.0, 7017.0],      [7018.0, 7019.0, 7020.0],      [7021.0, 7022.0, 7023.0],      [7024.0, 7025.0, 7026.0],      [7027.0, 7028.0, 7029.0],      [7030.0, 7031.0, 7032.0],      [7033.0, 7034.0, 7035.0]],     [[7036.0, 7037.0, 7038.0],      [7039.0, 7040.0, 7041.0],      [7042.0, 7043.0, 7044.0],      [7045.0, 7046.0, 7047.0],      [7048.0, 7049.0, 7050.0],      [7051.0, 7052.0, 7053.0],      [7054.0, 7055.0, 7056.0]]]],   [[[[7057.0, 7058.0, 7059.0],      [7060.0, 7061.0, 7062.0],      [7063.0, 7064.0, 7065.0],      [7066.0, 7067.0, 7068.0],      [7069.0, 7070.0, 7071.0],      [7072.0, 7073.0, 7074.0],      [7075.0, 7076.0, 7077.0]],     [[7078.0, 7079.0, 7080.0],      [7081.0, 7082.0, 7083.0],      [7084.0, 7085.0, 7086.0],      [7087.0, 7088.0, 7089.0],      [7090.0, 7091.0, 7092.0],      [7093.0, 7094.0, 7095.0],      [7096.0, 7097.0, 7098.0]],     [[7099.0, 7100.0, 7101.0],      [7102.0, 7103.0, 7104.0],      [7105.0, 7106.0, 7107.0],      [7108.0, 7109.0, 7110.0],      [7111.0, 7112.0, 7113.0],      [7114.0, 7115.0, 7116.0],      [7117.0, 7118.0, 7119.0]]],    [[[7120.0, 7121.0, 7122.0],      [7123.0, 7124.0, 7125.0],      [7126.0, 7127.0, 7128.0],      [7129.0, 7130.0, 7131.0],      [7132.0, 7133.0, 7134.0],      [7135.0, 7136.0, 7137.0],      [7138.0, 7139.0, 7140.0]],     [[7141.0, 7142.0, 7143.0],      [7144.0, 7145.0, 7146.0],      [7147.0, 7148.0, 7149.0],      [7150.0, 7151.0, 7152.0],      [7153.0, 7154.0, 7155.0],      [7156.0, 7157.0, 7158.0],      [7159.0, 7160.0, 7161.0]],     [[7162.0, 7163.0, 7164.0],      [7165.0, 7166.0, 7167.0],      [7168.0, 7169.0, 7170.0],      [7171.0, 7172.0, 7173.0],      [7174.0, 7175.0, 7176.0],      [7177.0, 7178.0, 7179.0],      [7180.0, 7181.0, 7182.0]]],    [[[7183.0, 7184.0, 7185.0],      [7186.0, 7187.0, 7188.0],      [7189.0, 7190.0, 7191.0],      [7192.0, 7193.0, 7194.0],      [7195.0, 7196.0, 7197.0],      [7198.0, 7199.0, 7200.0],      [7201.0, 7202.0, 7203.0]],     [[7204.0, 7205.0, 7206.0],      [7207.0, 7208.0, 7209.0],      [7210.0, 7211.0, 7212.0],      [7213.0, 7214.0, 7215.0],      [7216.0, 7217.0, 7218.0],      [7219.0, 7220.0, 7221.0],      [7222.0, 7223.0, 7224.0]],     [[7225.0, 7226.0, 7227.0],      [7228.0, 7229.0, 7230.0],      [7231.0, 7232.0, 7233.0],      [7234.0, 7235.0, 7236.0],      [7237.0, 7238.0, 7239.0],      [7240.0, 7241.0, 7242.0],      [7243.0, 7244.0, 7245.0]]],    [[[7246.0, 7247.0, 7248.0],      [7249.0, 7250.0, 7251.0],      [7252.0, 7253.0, 7254.0],      [7255.0, 7256.0, 7257.0],      [7258.0, 7259.0, 7260.0],      [7261.0, 7262.0, 7263.0],      [7264.0, 7265.0, 7266.0]],     [[7267.0, 7268.0, 7269.0],      [7270.0, 7271.0, 7272.0],      [7273.0, 7274.0, 7275.0],      [7276.0, 7277.0, 7278.0],      [7279.0, 7280.0, 7281.0],      [7282.0, 7283.0, 7284.0],      [7285.0, 7286.0, 7287.0]],     [[7288.0, 7289.0, 7290.0],      [7291.0, 7292.0, 7293.0],      [7294.0, 7295.0, 7296.0],      [7297.0, 7298.0, 7299.0],      [7300.0, 7301.0, 7302.0],      [7303.0, 7304.0, 7305.0],      [7306.0, 7307.0, 7308.0]]],    [[[7309.0, 7310.0, 7311.0],      [7312.0, 7313.0, 7314.0],      [7315.0, 7316.0, 7317.0],      [7318.0, 7319.0, 7320.0],      [7321.0, 7322.0, 7323.0],      [7324.0, 7325.0, 7326.0],      [7327.0, 7328.0, 7329.0]],     [[7330.0, 7331.0, 7332.0],      [7333.0, 7334.0, 7335.0],      [7336.0, 7337.0, 7338.0],      [7339.0, 7340.0, 7341.0],      [7342.0, 7343.0, 7344.0],      [7345.0, 7346.0, 7347.0],      [7348.0, 7349.0, 7350.0]],     [[7351.0, 7352.0, 7353.0],      [7354.0, 7355.0, 7356.0],      [7357.0, 7358.0, 7359.0],      [7360.0, 7361.0, 7362.0],      [7363.0, 7364.0, 7365.0],      [7366.0, 7367.0, 7368.0],      [7369.0, 7370.0, 7371.0]]],    [[[7372.0, 7373.0, 7374.0],      [7375.0, 7376.0, 7377.0],      [7378.0, 7379.0, 7380.0],      [7381.0, 7382.0, 7383.0],      [7384.0, 7385.0, 7386.0],      [7387.0, 7388.0, 7389.0],      [7390.0, 7391.0, 7392.0]],     [[7393.0, 7394.0, 7395.0],      [7396.0, 7397.0, 7398.0],      [7399.0, 7400.0, 7401.0],      [7402.0, 7403.0, 7404.0],      [7405.0, 7406.0, 7407.0],      [7408.0, 7409.0, 7410.0],      [7411.0, 7412.0, 7413.0]],     [[7414.0, 7415.0, 7416.0],      [7417.0, 7418.0, 7419.0],      [7420.0, 7421.0, 7422.0],      [7423.0, 7424.0, 7425.0],      [7426.0, 7427.0, 7428.0],      [7429.0, 7430.0, 7431.0],      [7432.0, 7433.0, 7434.0]]],    [[[7435.0, 7436.0, 7437.0],      [7438.0, 7439.0, 7440.0],      [7441.0, 7442.0, 7443.0],      [7444.0, 7445.0, 7446.0],      [7447.0, 7448.0, 7449.0],      [7450.0, 7451.0, 7452.0],      [7453.0, 7454.0, 7455.0]],     [[7456.0, 7457.0, 7458.0],      [7459.0, 7460.0, 7461.0],      [7462.0, 7463.0, 7464.0],      [7465.0, 7466.0, 7467.0],      [7468.0, 7469.0, 7470.0],      [7471.0, 7472.0, 7473.0],      [7474.0, 7475.0, 7476.0]],     [[7477.0, 7478.0, 7479.0],      [7480.0, 7481.0, 7482.0],      [7483.0, 7484.0, 7485.0],      [7486.0, 7487.0, 7488.0],      [7489.0, 7490.0, 7491.0],      [7492.0, 7493.0, 7494.0],      [7495.0, 7496.0, 7497.0]]]],   [[[[7498.0, 7499.0, 7500.0],      [7501.0, 7502.0, 7503.0],      [7504.0, 7505.0, 7506.0],      [7507.0, 7508.0, 7509.0],      [7510.0, 7511.0, 7512.0],      [7513.0, 7514.0, 7515.0],      [7516.0, 7517.0, 7518.0]],     [[7519.0, 7520.0, 7521.0],      [7522.0, 7523.0, 7524.0],      [7525.0, 7526.0, 7527.0],      [7528.0, 7529.0, 7530.0],      [7531.0, 7532.0, 7533.0],      [7534.0, 7535.0, 7536.0],      [7537.0, 7538.0, 7539.0]],     [[7540.0, 7541.0, 7542.0],      [7543.0, 7544.0, 7545.0],      [7546.0, 7547.0, 7548.0],      [7549.0, 7550.0, 7551.0],      [7552.0, 7553.0, 7554.0],      [7555.0, 7556.0, 7557.0],      [7558.0, 7559.0, 7560.0]]],    [[[7561.0, 7562.0, 7563.0],      [7564.0, 7565.0, 7566.0],      [7567.0, 7568.0, 7569.0],      [7570.0, 7571.0, 7572.0],      [7573.0, 7574.0, 7575.0],      [7576.0, 7577.0, 7578.0],      [7579.0, 7580.0, 7581.0]],     [[7582.0, 7583.0, 7584.0],      [7585.0, 7586.0, 7587.0],      [7588.0, 7589.0, 7590.0],      [7591.0, 7592.0, 7593.0],      [7594.0, 7595.0, 7596.0],      [7597.0, 7598.0, 7599.0],      [7600.0, 7601.0, 7602.0]],     [[7603.0, 7604.0, 7605.0],      [7606.0, 7607.0, 7608.0],      [7609.0, 7610.0, 7611.0],      [7612.0, 7613.0, 7614.0],      [7615.0, 7616.0, 7617.0],      [7618.0, 7619.0, 7620.0],      [7621.0, 7622.0, 7623.0]]],    [[[7624.0, 7625.0, 7626.0],      [7627.0, 7628.0, 7629.0],      [7630.0, 7631.0, 7632.0],      [7633.0, 7634.0, 7635.0],      [7636.0, 7637.0, 7638.0],      [7639.0, 7640.0, 7641.0],      [7642.0, 7643.0, 7644.0]],     [[7645.0, 7646.0, 7647.0],      [7648.0, 7649.0, 7650.0],      [7651.0, 7652.0, 7653.0],      [7654.0, 7655.0, 7656.0],      [7657.0, 7658.0, 7659.0],      [7660.0, 7661.0, 7662.0],      [7663.0, 7664.0, 7665.0]],     [[7666.0, 7667.0, 7668.0],      [7669.0, 7670.0, 7671.0],      [7672.0, 7673.0, 7674.0],      [7675.0, 7676.0, 7677.0],      [7678.0, 7679.0, 7680.0],      [7681.0, 7682.0, 7683.0],      [7684.0, 7685.0, 7686.0]]],    [[[7687.0, 7688.0, 7689.0],      [7690.0, 7691.0, 7692.0],      [7693.0, 7694.0, 7695.0],      [7696.0, 7697.0, 7698.0],      [7699.0, 7700.0, 7701.0],      [7702.0, 7703.0, 7704.0],      [7705.0, 7706.0, 7707.0]],     [[7708.0, 7709.0, 7710.0],      [7711.0, 7712.0, 7713.0],      [7714.0, 7715.0, 7716.0],      [7717.0, 7718.0, 7719.0],      [7720.0, 7721.0, 7722.0],      [7723.0, 7724.0, 7725.0],      [7726.0, 7727.0, 7728.0]],     [[7729.0, 7730.0, 7731.0],      [7732.0, 7733.0, 7734.0],      [7735.0, 7736.0, 7737.0],      [7738.0, 7739.0, 7740.0],      [7741.0, 7742.0, 7743.0],      [7744.0, 7745.0, 7746.0],      [7747.0, 7748.0, 7749.0]]],    [[[7750.0, 7751.0, 7752.0],      [7753.0, 7754.0, 7755.0],      [7756.0, 7757.0, 7758.0],      [7759.0, 7760.0, 7761.0],      [7762.0, 7763.0, 7764.0],      [7765.0, 7766.0, 7767.0],      [7768.0, 7769.0, 7770.0]],     [[7771.0, 7772.0, 7773.0],      [7774.0, 7775.0, 7776.0],      [7777.0, 7778.0, 7779.0],      [7780.0, 7781.0, 7782.0],      [7783.0, 7784.0, 7785.0],      [7786.0, 7787.0, 7788.0],      [7789.0, 7790.0, 7791.0]],     [[7792.0, 7793.0, 7794.0],      [7795.0, 7796.0, 7797.0],      [7798.0, 7799.0, 7800.0],      [7801.0, 7802.0, 7803.0],      [7804.0, 7805.0, 7806.0],      [7807.0, 7808.0, 7809.0],      [7810.0, 7811.0, 7812.0]]],    [[[7813.0, 7814.0, 7815.0],      [7816.0, 7817.0, 7818.0],      [7819.0, 7820.0, 7821.0],      [7822.0, 7823.0, 7824.0],      [7825.0, 7826.0, 7827.0],      [7828.0, 7829.0, 7830.0],      [7831.0, 7832.0, 7833.0]],     [[7834.0, 7835.0, 7836.0],      [7837.0, 7838.0, 7839.0],      [7840.0, 7841.0, 7842.0],      [7843.0, 7844.0, 7845.0],      [7846.0, 7847.0, 7848.0],      [7849.0, 7850.0, 7851.0],      [7852.0, 7853.0, 7854.0]],     [[7855.0, 7856.0, 7857.0],      [7858.0, 7859.0, 7860.0],      [7861.0, 7862.0, 7863.0],      [7864.0, 7865.0, 7866.0],      [7867.0, 7868.0, 7869.0],      [7870.0, 7871.0, 7872.0],      [7873.0, 7874.0, 7875.0]]],    [[[7876.0, 7877.0, 7878.0],      [7879.0, 7880.0, 7881.0],      [7882.0, 7883.0, 7884.0],      [7885.0, 7886.0, 7887.0],      [7888.0, 7889.0, 7890.0],      [7891.0, 7892.0, 7893.0],      [7894.0, 7895.0, 7896.0]],     [[7897.0, 7898.0, 7899.0],      [7900.0, 7901.0, 7902.0],      [7903.0, 7904.0, 7905.0],      [7906.0, 7907.0, 7908.0],      [7909.0, 7910.0, 7911.0],      [7912.0, 7913.0, 7914.0],      [7915.0, 7916.0, 7917.0]],     [[7918.0, 7919.0, 7920.0],      [7921.0, 7922.0, 7923.0],      [7924.0, 7925.0, 7926.0],      [7927.0, 7928.0, 7929.0],      [7930.0, 7931.0, 7932.0],      [7933.0, 7934.0, 7935.0],      [7936.0, 7937.0, 7938.0]]]]]] shape=[3, 6, 7, 3, 7, 3], strides=[2646, 441, 63, 21, 3, 1], layout=C (0x1)), I32([2, 3, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [1, 1],  [3, 3]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 1387074125 1652777031 291473114 3531660464 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],     [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],     [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],     [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],     [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],    [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],     [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],     [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],     [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],     [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],     [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],    [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],     [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],     [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],     [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],     [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],     [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0]],    [[127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],     [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],     [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],     [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],     [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],    [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],     [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],     [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],     [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],     [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],     [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],    [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],     [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],     [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],     [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],     [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]],    [[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],     [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],     [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],     [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],     [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0]]],   [[[295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],     [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],     [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],     [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],     [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],     [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],    [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],     [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],     [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],     [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],     [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],     [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0]],    [[379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],     [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],     [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],     [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],     [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],     [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],    [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],     [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],     [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],     [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],     [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],     [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],    [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],     [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],     [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],     [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],     [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],     [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]],    [[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],     [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],     [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],     [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],     [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],     [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],    [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],     [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],     [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],     [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],     [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]]],   [[[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],     [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],     [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],     [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],     [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],     [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]],    [[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],     [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0],     [645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],     [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],     [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],     [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],    [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],     [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],     [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],     [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0],     [701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],     [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0]],    [[715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],     [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0],     [729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],     [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],     [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],     [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]],    [[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],     [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0],     [771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],     [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0],     [785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],     [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0]],    [[799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],     [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0],     [813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],     [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],     [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],     [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],    [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],     [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],     [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],     [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0],     [869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],     [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0]]],   [[[883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],     [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0],     [897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],     [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0],     [911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],     [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],    [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],     [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],     [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],     [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0],     [953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],     [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0]],    [[967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],     [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],     [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],     [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],     [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]],    [[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],     [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],     [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],     [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0],     [1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],     [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]],    [[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],     [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0],     [1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],     [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],     [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],     [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],    [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],     [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],     [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],     [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],     [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0]],    [[1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],     [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0],     [1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],     [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],     [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],     [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]]],   [[[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],     [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0],     [1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],     [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0],     [1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],     [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0]],    [[1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],     [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0],     [1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],     [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],     [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],     [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]],    [[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],     [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],     [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],     [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0],     [1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0],     [1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0]],    [[1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],     [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0],     [1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],     [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0],     [1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],     [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]],    [[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],     [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],     [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0],     [1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0],     [1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],     [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0]],    [[1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],     [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0],     [1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],     [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],     [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],     [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]],    [[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0],     [1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],     [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],     [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0],     [1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],     [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]]],   [[[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],     [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0],     [1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],     [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],     [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0],     [1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]],    [[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],     [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],     [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],     [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0],     [1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],     [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0]],    [[1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],     [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0],     [1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0],     [1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],     [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],     [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]],    [[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],     [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0],     [1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],     [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0],     [1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],     [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0]],    [[1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0],     [1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0],     [1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],     [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],     [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],     [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]],    [[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],     [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],     [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],     [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0],     [1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0],     [1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0]],    [[1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],     [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0],     [1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],     [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0],     [1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],     [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]]],   [[[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],     [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],     [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0],     [1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0],     [1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],     [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0]],    [[1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],     [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0],     [1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],     [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],     [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],     [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]],    [[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0],     [1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],     [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],     [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0],     [1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],     [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]],    [[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],     [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0],     [1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],     [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],     [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0],     [1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]],    [[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],     [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],     [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],     [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0],     [1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0],     [1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0]],    [[1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0, 1981.0],     [1982.0, 1983.0, 1984.0, 1985.0, 1986.0, 1987.0, 1988.0],     [1989.0, 1990.0, 1991.0, 1992.0, 1993.0, 1994.0, 1995.0],     [1996.0, 1997.0, 1998.0, 1999.0, 2000.0, 2001.0, 2002.0],     [2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0],     [2010.0, 2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]],    [[2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0, 2023.0],     [2024.0, 2025.0, 2026.0, 2027.0, 2028.0, 2029.0, 2030.0],     [2031.0, 2032.0, 2033.0, 2034.0, 2035.0, 2036.0, 2037.0],     [2038.0, 2039.0, 2040.0, 2041.0, 2042.0, 2043.0, 2044.0],     [2045.0, 2046.0, 2047.0, 2048.0, 2049.0, 2050.0, 2051.0],     [2052.0, 2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0]]]],  [[[[2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0, 2065.0],     [2066.0, 2067.0, 2068.0, 2069.0, 2070.0, 2071.0, 2072.0],     [2073.0, 2074.0, 2075.0, 2076.0, 2077.0, 2078.0, 2079.0],     [2080.0, 2081.0, 2082.0, 2083.0, 2084.0, 2085.0, 2086.0],     [2087.0, 2088.0, 2089.0, 2090.0, 2091.0, 2092.0, 2093.0],     [2094.0, 2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]],    [[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0, 2107.0],     [2108.0, 2109.0, 2110.0, 2111.0, 2112.0, 2113.0, 2114.0],     [2115.0, 2116.0, 2117.0, 2118.0, 2119.0, 2120.0, 2121.0],     [2122.0, 2123.0, 2124.0, 2125.0, 2126.0, 2127.0, 2128.0],     [2129.0, 2130.0, 2131.0, 2132.0, 2133.0, 2134.0, 2135.0],     [2136.0, 2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0]],    [[2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0, 2149.0],     [2150.0, 2151.0, 2152.0, 2153.0, 2154.0, 2155.0, 2156.0],     [2157.0, 2158.0, 2159.0, 2160.0, 2161.0, 2162.0, 2163.0],     [2164.0, 2165.0, 2166.0, 2167.0, 2168.0, 2169.0, 2170.0],     [2171.0, 2172.0, 2173.0, 2174.0, 2175.0, 2176.0, 2177.0],     [2178.0, 2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0]],    [[2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0, 2191.0],     [2192.0, 2193.0, 2194.0, 2195.0, 2196.0, 2197.0, 2198.0],     [2199.0, 2200.0, 2201.0, 2202.0, 2203.0, 2204.0, 2205.0],     [2206.0, 2207.0, 2208.0, 2209.0, 2210.0, 2211.0, 2212.0],     [2213.0, 2214.0, 2215.0, 2216.0, 2217.0, 2218.0, 2219.0],     [2220.0, 2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0]],    [[2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0, 2233.0],     [2234.0, 2235.0, 2236.0, 2237.0, 2238.0, 2239.0, 2240.0],     [2241.0, 2242.0, 2243.0, 2244.0, 2245.0, 2246.0, 2247.0],     [2248.0, 2249.0, 2250.0, 2251.0, 2252.0, 2253.0, 2254.0],     [2255.0, 2256.0, 2257.0, 2258.0, 2259.0, 2260.0, 2261.0],     [2262.0, 2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0]],    [[2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0, 2275.0],     [2276.0, 2277.0, 2278.0, 2279.0, 2280.0, 2281.0, 2282.0],     [2283.0, 2284.0, 2285.0, 2286.0, 2287.0, 2288.0, 2289.0],     [2290.0, 2291.0, 2292.0, 2293.0, 2294.0, 2295.0, 2296.0],     [2297.0, 2298.0, 2299.0, 2300.0, 2301.0, 2302.0, 2303.0],     [2304.0, 2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]],    [[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0, 2317.0],     [2318.0, 2319.0, 2320.0, 2321.0, 2322.0, 2323.0, 2324.0],     [2325.0, 2326.0, 2327.0, 2328.0, 2329.0, 2330.0, 2331.0],     [2332.0, 2333.0, 2334.0, 2335.0, 2336.0, 2337.0, 2338.0],     [2339.0, 2340.0, 2341.0, 2342.0, 2343.0, 2344.0, 2345.0],     [2346.0, 2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0]]],   [[[2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0, 2359.0],     [2360.0, 2361.0, 2362.0, 2363.0, 2364.0, 2365.0, 2366.0],     [2367.0, 2368.0, 2369.0, 2370.0, 2371.0, 2372.0, 2373.0],     [2374.0, 2375.0, 2376.0, 2377.0, 2378.0, 2379.0, 2380.0],     [2381.0, 2382.0, 2383.0, 2384.0, 2385.0, 2386.0, 2387.0],     [2388.0, 2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0]],    [[2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0, 2401.0],     [2402.0, 2403.0, 2404.0, 2405.0, 2406.0, 2407.0, 2408.0],     [2409.0, 2410.0, 2411.0, 2412.0, 2413.0, 2414.0, 2415.0],     [2416.0, 2417.0, 2418.0, 2419.0, 2420.0, 2421.0, 2422.0],     [2423.0, 2424.0, 2425.0, 2426.0, 2427.0, 2428.0, 2429.0],     [2430.0, 2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0]],    [[2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0, 2443.0],     [2444.0, 2445.0, 2446.0, 2447.0, 2448.0, 2449.0, 2450.0],     [2451.0, 2452.0, 2453.0, 2454.0, 2455.0, 2456.0, 2457.0],     [2458.0, 2459.0, 2460.0, 2461.0, 2462.0, 2463.0, 2464.0],     [2465.0, 2466.0, 2467.0, 2468.0, 2469.0, 2470.0, 2471.0],     [2472.0, 2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0]],    [[2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0, 2485.0],     [2486.0, 2487.0, 2488.0, 2489.0, 2490.0, 2491.0, 2492.0],     [2493.0, 2494.0, 2495.0, 2496.0, 2497.0, 2498.0, 2499.0],     [2500.0, 2501.0, 2502.0, 2503.0, 2504.0, 2505.0, 2506.0],     [2507.0, 2508.0, 2509.0, 2510.0, 2511.0, 2512.0, 2513.0],     [2514.0, 2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]],    [[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0, 2527.0],     [2528.0, 2529.0, 2530.0, 2531.0, 2532.0, 2533.0, 2534.0],     [2535.0, 2536.0, 2537.0, 2538.0, 2539.0, 2540.0, 2541.0],     [2542.0, 2543.0, 2544.0, 2545.0, 2546.0, 2547.0, 2548.0],     [2549.0, 2550.0, 2551.0, 2552.0, 2553.0, 2554.0, 2555.0],     [2556.0, 2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0]],    [[2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0, 2569.0],     [2570.0, 2571.0, 2572.0, 2573.0, 2574.0, 2575.0, 2576.0],     [2577.0, 2578.0, 2579.0, 2580.0, 2581.0, 2582.0, 2583.0],     [2584.0, 2585.0, 2586.0, 2587.0, 2588.0, 2589.0, 2590.0],     [2591.0, 2592.0, 2593.0, 2594.0, 2595.0, 2596.0, 2597.0],     [2598.0, 2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0]],    [[2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0, 2611.0],     [2612.0, 2613.0, 2614.0, 2615.0, 2616.0, 2617.0, 2618.0],     [2619.0, 2620.0, 2621.0, 2622.0, 2623.0, 2624.0, 2625.0],     [2626.0, 2627.0, 2628.0, 2629.0, 2630.0, 2631.0, 2632.0],     [2633.0, 2634.0, 2635.0, 2636.0, 2637.0, 2638.0, 2639.0],     [2640.0, 2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0]]],   [[[2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0, 2653.0],     [2654.0, 2655.0, 2656.0, 2657.0, 2658.0, 2659.0, 2660.0],     [2661.0, 2662.0, 2663.0, 2664.0, 2665.0, 2666.0, 2667.0],     [2668.0, 2669.0, 2670.0, 2671.0, 2672.0, 2673.0, 2674.0],     [2675.0, 2676.0, 2677.0, 2678.0, 2679.0, 2680.0, 2681.0],     [2682.0, 2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0]],    [[2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0, 2695.0],     [2696.0, 2697.0, 2698.0, 2699.0, 2700.0, 2701.0, 2702.0],     [2703.0, 2704.0, 2705.0, 2706.0, 2707.0, 2708.0, 2709.0],     [2710.0, 2711.0, 2712.0, 2713.0, 2714.0, 2715.0, 2716.0],     [2717.0, 2718.0, 2719.0, 2720.0, 2721.0, 2722.0, 2723.0],     [2724.0, 2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]],    [[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0, 2737.0],     [2738.0, 2739.0, 2740.0, 2741.0, 2742.0, 2743.0, 2744.0],     [2745.0, 2746.0, 2747.0, 2748.0, 2749.0, 2750.0, 2751.0],     [2752.0, 2753.0, 2754.0, 2755.0, 2756.0, 2757.0, 2758.0],     [2759.0, 2760.0, 2761.0, 2762.0, 2763.0, 2764.0, 2765.0],     [2766.0, 2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0]],    [[2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0, 2779.0],     [2780.0, 2781.0, 2782.0, 2783.0, 2784.0, 2785.0, 2786.0],     [2787.0, 2788.0, 2789.0, 2790.0, 2791.0, 2792.0, 2793.0],     [2794.0, 2795.0, 2796.0, 2797.0, 2798.0, 2799.0, 2800.0],     [2801.0, 2802.0, 2803.0, 2804.0, 2805.0, 2806.0, 2807.0],     [2808.0, 2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0]],    [[2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0, 2821.0],     [2822.0, 2823.0, 2824.0, 2825.0, 2826.0, 2827.0, 2828.0],     [2829.0, 2830.0, 2831.0, 2832.0, 2833.0, 2834.0, 2835.0],     [2836.0, 2837.0, 2838.0, 2839.0, 2840.0, 2841.0, 2842.0],     [2843.0, 2844.0, 2845.0, 2846.0, 2847.0, 2848.0, 2849.0],     [2850.0, 2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0]],    [[2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0, 2863.0],     [2864.0, 2865.0, 2866.0, 2867.0, 2868.0, 2869.0, 2870.0],     [2871.0, 2872.0, 2873.0, 2874.0, 2875.0, 2876.0, 2877.0],     [2878.0, 2879.0, 2880.0, 2881.0, 2882.0, 2883.0, 2884.0],     [2885.0, 2886.0, 2887.0, 2888.0, 2889.0, 2890.0, 2891.0],     [2892.0, 2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0]],    [[2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0, 2905.0],     [2906.0, 2907.0, 2908.0, 2909.0, 2910.0, 2911.0, 2912.0],     [2913.0, 2914.0, 2915.0, 2916.0, 2917.0, 2918.0, 2919.0],     [2920.0, 2921.0, 2922.0, 2923.0, 2924.0, 2925.0, 2926.0],     [2927.0, 2928.0, 2929.0, 2930.0, 2931.0, 2932.0, 2933.0],     [2934.0, 2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]]],   [[[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0, 2947.0],     [2948.0, 2949.0, 2950.0, 2951.0, 2952.0, 2953.0, 2954.0],     [2955.0, 2956.0, 2957.0, 2958.0, 2959.0, 2960.0, 2961.0],     [2962.0, 2963.0, 2964.0, 2965.0, 2966.0, 2967.0, 2968.0],     [2969.0, 2970.0, 2971.0, 2972.0, 2973.0, 2974.0, 2975.0],     [2976.0, 2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0]],    [[2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0, 2989.0],     [2990.0, 2991.0, 2992.0, 2993.0, 2994.0, 2995.0, 2996.0],     [2997.0, 2998.0, 2999.0, 3000.0, 3001.0, 3002.0, 3003.0],     [3004.0, 3005.0, 3006.0, 3007.0, 3008.0, 3009.0, 3010.0],     [3011.0, 3012.0, 3013.0, 3014.0, 3015.0, 3016.0, 3017.0],     [3018.0, 3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0]],    [[3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0, 3031.0],     [3032.0, 3033.0, 3034.0, 3035.0, 3036.0, 3037.0, 3038.0],     [3039.0, 3040.0, 3041.0, 3042.0, 3043.0, 3044.0, 3045.0],     [3046.0, 3047.0, 3048.0, 3049.0, 3050.0, 3051.0, 3052.0],     [3053.0, 3054.0, 3055.0, 3056.0, 3057.0, 3058.0, 3059.0],     [3060.0, 3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0]],    [[3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0, 3073.0],     [3074.0, 3075.0, 3076.0, 3077.0, 3078.0, 3079.0, 3080.0],     [3081.0, 3082.0, 3083.0, 3084.0, 3085.0, 3086.0, 3087.0],     [3088.0, 3089.0, 3090.0, 3091.0, 3092.0, 3093.0, 3094.0],     [3095.0, 3096.0, 3097.0, 3098.0, 3099.0, 3100.0, 3101.0],     [3102.0, 3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0]],    [[3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0, 3115.0],     [3116.0, 3117.0, 3118.0, 3119.0, 3120.0, 3121.0, 3122.0],     [3123.0, 3124.0, 3125.0, 3126.0, 3127.0, 3128.0, 3129.0],     [3130.0, 3131.0, 3132.0, 3133.0, 3134.0, 3135.0, 3136.0],     [3137.0, 3138.0, 3139.0, 3140.0, 3141.0, 3142.0, 3143.0],     [3144.0, 3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]],    [[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0, 3157.0],     [3158.0, 3159.0, 3160.0, 3161.0, 3162.0, 3163.0, 3164.0],     [3165.0, 3166.0, 3167.0, 3168.0, 3169.0, 3170.0, 3171.0],     [3172.0, 3173.0, 3174.0, 3175.0, 3176.0, 3177.0, 3178.0],     [3179.0, 3180.0, 3181.0, 3182.0, 3183.0, 3184.0, 3185.0],     [3186.0, 3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0]],    [[3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0, 3199.0],     [3200.0, 3201.0, 3202.0, 3203.0, 3204.0, 3205.0, 3206.0],     [3207.0, 3208.0, 3209.0, 3210.0, 3211.0, 3212.0, 3213.0],     [3214.0, 3215.0, 3216.0, 3217.0, 3218.0, 3219.0, 3220.0],     [3221.0, 3222.0, 3223.0, 3224.0, 3225.0, 3226.0, 3227.0],     [3228.0, 3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0]]],   [[[3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0, 3241.0],     [3242.0, 3243.0, 3244.0, 3245.0, 3246.0, 3247.0, 3248.0],     [3249.0, 3250.0, 3251.0, 3252.0, 3253.0, 3254.0, 3255.0],     [3256.0, 3257.0, 3258.0, 3259.0, 3260.0, 3261.0, 3262.0],     [3263.0, 3264.0, 3265.0, 3266.0, 3267.0, 3268.0, 3269.0],     [3270.0, 3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0]],    [[3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0, 3283.0],     [3284.0, 3285.0, 3286.0, 3287.0, 3288.0, 3289.0, 3290.0],     [3291.0, 3292.0, 3293.0, 3294.0, 3295.0, 3296.0, 3297.0],     [3298.0, 3299.0, 3300.0, 3301.0, 3302.0, 3303.0, 3304.0],     [3305.0, 3306.0, 3307.0, 3308.0, 3309.0, 3310.0, 3311.0],     [3312.0, 3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0]],    [[3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0, 3325.0],     [3326.0, 3327.0, 3328.0, 3329.0, 3330.0, 3331.0, 3332.0],     [3333.0, 3334.0, 3335.0, 3336.0, 3337.0, 3338.0, 3339.0],     [3340.0, 3341.0, 3342.0, 3343.0, 3344.0, 3345.0, 3346.0],     [3347.0, 3348.0, 3349.0, 3350.0, 3351.0, 3352.0, 3353.0],     [3354.0, 3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]],    [[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0, 3367.0],     [3368.0, 3369.0, 3370.0, 3371.0, 3372.0, 3373.0, 3374.0],     [3375.0, 3376.0, 3377.0, 3378.0, 3379.0, 3380.0, 3381.0],     [3382.0, 3383.0, 3384.0, 3385.0, 3386.0, 3387.0, 3388.0],     [3389.0, 3390.0, 3391.0, 3392.0, 3393.0, 3394.0, 3395.0],     [3396.0, 3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0]],    [[3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0, 3409.0],     [3410.0, 3411.0, 3412.0, 3413.0, 3414.0, 3415.0, 3416.0],     [3417.0, 3418.0, 3419.0, 3420.0, 3421.0, 3422.0, 3423.0],     [3424.0, 3425.0, 3426.0, 3427.0, 3428.0, 3429.0, 3430.0],     [3431.0, 3432.0, 3433.0, 3434.0, 3435.0, 3436.0, 3437.0],     [3438.0, 3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0]],    [[3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0, 3451.0],     [3452.0, 3453.0, 3454.0, 3455.0, 3456.0, 3457.0, 3458.0],     [3459.0, 3460.0, 3461.0, 3462.0, 3463.0, 3464.0, 3465.0],     [3466.0, 3467.0, 3468.0, 3469.0, 3470.0, 3471.0, 3472.0],     [3473.0, 3474.0, 3475.0, 3476.0, 3477.0, 3478.0, 3479.0],     [3480.0, 3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0]],    [[3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0, 3493.0],     [3494.0, 3495.0, 3496.0, 3497.0, 3498.0, 3499.0, 3500.0],     [3501.0, 3502.0, 3503.0, 3504.0, 3505.0, 3506.0, 3507.0],     [3508.0, 3509.0, 3510.0, 3511.0, 3512.0, 3513.0, 3514.0],     [3515.0, 3516.0, 3517.0, 3518.0, 3519.0, 3520.0, 3521.0],     [3522.0, 3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0]]],   [[[3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0, 3535.0],     [3536.0, 3537.0, 3538.0, 3539.0, 3540.0, 3541.0, 3542.0],     [3543.0, 3544.0, 3545.0, 3546.0, 3547.0, 3548.0, 3549.0],     [3550.0, 3551.0, 3552.0, 3553.0, 3554.0, 3555.0, 3556.0],     [3557.0, 3558.0, 3559.0, 3560.0, 3561.0, 3562.0, 3563.0],     [3564.0, 3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]],    [[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0, 3577.0],     [3578.0, 3579.0, 3580.0, 3581.0, 3582.0, 3583.0, 3584.0],     [3585.0, 3586.0, 3587.0, 3588.0, 3589.0, 3590.0, 3591.0],     [3592.0, 3593.0, 3594.0, 3595.0, 3596.0, 3597.0, 3598.0],     [3599.0, 3600.0, 3601.0, 3602.0, 3603.0, 3604.0, 3605.0],     [3606.0, 3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0]],    [[3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0, 3619.0],     [3620.0, 3621.0, 3622.0, 3623.0, 3624.0, 3625.0, 3626.0],     [3627.0, 3628.0, 3629.0, 3630.0, 3631.0, 3632.0, 3633.0],     [3634.0, 3635.0, 3636.0, 3637.0, 3638.0, 3639.0, 3640.0],     [3641.0, 3642.0, 3643.0, 3644.0, 3645.0, 3646.0, 3647.0],     [3648.0, 3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0]],    [[3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0, 3661.0],     [3662.0, 3663.0, 3664.0, 3665.0, 3666.0, 3667.0, 3668.0],     [3669.0, 3670.0, 3671.0, 3672.0, 3673.0, 3674.0, 3675.0],     [3676.0, 3677.0, 3678.0, 3679.0, 3680.0, 3681.0, 3682.0],     [3683.0, 3684.0, 3685.0, 3686.0, 3687.0, 3688.0, 3689.0],     [3690.0, 3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0]],    [[3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0, 3703.0],     [3704.0, 3705.0, 3706.0, 3707.0, 3708.0, 3709.0, 3710.0],     [3711.0, 3712.0, 3713.0, 3714.0, 3715.0, 3716.0, 3717.0],     [3718.0, 3719.0, 3720.0, 3721.0, 3722.0, 3723.0, 3724.0],     [3725.0, 3726.0, 3727.0, 3728.0, 3729.0, 3730.0, 3731.0],     [3732.0, 3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0]],    [[3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0, 3745.0],     [3746.0, 3747.0, 3748.0, 3749.0, 3750.0, 3751.0, 3752.0],     [3753.0, 3754.0, 3755.0, 3756.0, 3757.0, 3758.0, 3759.0],     [3760.0, 3761.0, 3762.0, 3763.0, 3764.0, 3765.0, 3766.0],     [3767.0, 3768.0, 3769.0, 3770.0, 3771.0, 3772.0, 3773.0],     [3774.0, 3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]],    [[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0, 3787.0],     [3788.0, 3789.0, 3790.0, 3791.0, 3792.0, 3793.0, 3794.0],     [3795.0, 3796.0, 3797.0, 3798.0, 3799.0, 3800.0, 3801.0],     [3802.0, 3803.0, 3804.0, 3805.0, 3806.0, 3807.0, 3808.0],     [3809.0, 3810.0, 3811.0, 3812.0, 3813.0, 3814.0, 3815.0],     [3816.0, 3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0]]],   [[[3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0, 3829.0],     [3830.0, 3831.0, 3832.0, 3833.0, 3834.0, 3835.0, 3836.0],     [3837.0, 3838.0, 3839.0, 3840.0, 3841.0, 3842.0, 3843.0],     [3844.0, 3845.0, 3846.0, 3847.0, 3848.0, 3849.0, 3850.0],     [3851.0, 3852.0, 3853.0, 3854.0, 3855.0, 3856.0, 3857.0],     [3858.0, 3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0]],    [[3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0, 3871.0],     [3872.0, 3873.0, 3874.0, 3875.0, 3876.0, 3877.0, 3878.0],     [3879.0, 3880.0, 3881.0, 3882.0, 3883.0, 3884.0, 3885.0],     [3886.0, 3887.0, 3888.0, 3889.0, 3890.0, 3891.0, 3892.0],     [3893.0, 3894.0, 3895.0, 3896.0, 3897.0, 3898.0, 3899.0],     [3900.0, 3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0]],    [[3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0, 3913.0],     [3914.0, 3915.0, 3916.0, 3917.0, 3918.0, 3919.0, 3920.0],     [3921.0, 3922.0, 3923.0, 3924.0, 3925.0, 3926.0, 3927.0],     [3928.0, 3929.0, 3930.0, 3931.0, 3932.0, 3933.0, 3934.0],     [3935.0, 3936.0, 3937.0, 3938.0, 3939.0, 3940.0, 3941.0],     [3942.0, 3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0]],    [[3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0, 3955.0],     [3956.0, 3957.0, 3958.0, 3959.0, 3960.0, 3961.0, 3962.0],     [3963.0, 3964.0, 3965.0, 3966.0, 3967.0, 3968.0, 3969.0],     [3970.0, 3971.0, 3972.0, 3973.0, 3974.0, 3975.0, 3976.0],     [3977.0, 3978.0, 3979.0, 3980.0, 3981.0, 3982.0, 3983.0],     [3984.0, 3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]],    [[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0, 3997.0],     [3998.0, 3999.0, 4000.0, 4001.0, 4002.0, 4003.0, 4004.0],     [4005.0, 4006.0, 4007.0, 4008.0, 4009.0, 4010.0, 4011.0],     [4012.0, 4013.0, 4014.0, 4015.0, 4016.0, 4017.0, 4018.0],     [4019.0, 4020.0, 4021.0, 4022.0, 4023.0, 4024.0, 4025.0],     [4026.0, 4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0]],    [[4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0, 4039.0],     [4040.0, 4041.0, 4042.0, 4043.0, 4044.0, 4045.0, 4046.0],     [4047.0, 4048.0, 4049.0, 4050.0, 4051.0, 4052.0, 4053.0],     [4054.0, 4055.0, 4056.0, 4057.0, 4058.0, 4059.0, 4060.0],     [4061.0, 4062.0, 4063.0, 4064.0, 4065.0, 4066.0, 4067.0],     [4068.0, 4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0]],    [[4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0, 4081.0],     [4082.0, 4083.0, 4084.0, 4085.0, 4086.0, 4087.0, 4088.0],     [4089.0, 4090.0, 4091.0, 4092.0, 4093.0, 4094.0, 4095.0],     [4096.0, 4097.0, 4098.0, 4099.0, 4100.0, 4101.0, 4102.0],     [4103.0, 4104.0, 4105.0, 4106.0, 4107.0, 4108.0, 4109.0],     [4110.0, 4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0]]]],  [[[[4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0, 4123.0],     [4124.0, 4125.0, 4126.0, 4127.0, 4128.0, 4129.0, 4130.0],     [4131.0, 4132.0, 4133.0, 4134.0, 4135.0, 4136.0, 4137.0],     [4138.0, 4139.0, 4140.0, 4141.0, 4142.0, 4143.0, 4144.0],     [4145.0, 4146.0, 4147.0, 4148.0, 4149.0, 4150.0, 4151.0],     [4152.0, 4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0]],    [[4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0, 4165.0],     [4166.0, 4167.0, 4168.0, 4169.0, 4170.0, 4171.0, 4172.0],     [4173.0, 4174.0, 4175.0, 4176.0, 4177.0, 4178.0, 4179.0],     [4180.0, 4181.0, 4182.0, 4183.0, 4184.0, 4185.0, 4186.0],     [4187.0, 4188.0, 4189.0, 4190.0, 4191.0, 4192.0, 4193.0],     [4194.0, 4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]],    [[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0, 4207.0],     [4208.0, 4209.0, 4210.0, 4211.0, 4212.0, 4213.0, 4214.0],     [4215.0, 4216.0, 4217.0, 4218.0, 4219.0, 4220.0, 4221.0],     [4222.0, 4223.0, 4224.0, 4225.0, 4226.0, 4227.0, 4228.0],     [4229.0, 4230.0, 4231.0, 4232.0, 4233.0, 4234.0, 4235.0],     [4236.0, 4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0]],    [[4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0, 4249.0],     [4250.0, 4251.0, 4252.0, 4253.0, 4254.0, 4255.0, 4256.0],     [4257.0, 4258.0, 4259.0, 4260.0, 4261.0, 4262.0, 4263.0],     [4264.0, 4265.0, 4266.0, 4267.0, 4268.0, 4269.0, 4270.0],     [4271.0, 4272.0, 4273.0, 4274.0, 4275.0, 4276.0, 4277.0],     [4278.0, 4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0]],    [[4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0, 4291.0],     [4292.0, 4293.0, 4294.0, 4295.0, 4296.0, 4297.0, 4298.0],     [4299.0, 4300.0, 4301.0, 4302.0, 4303.0, 4304.0, 4305.0],     [4306.0, 4307.0, 4308.0, 4309.0, 4310.0, 4311.0, 4312.0],     [4313.0, 4314.0, 4315.0, 4316.0, 4317.0, 4318.0, 4319.0],     [4320.0, 4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0]],    [[4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0, 4333.0],     [4334.0, 4335.0, 4336.0, 4337.0, 4338.0, 4339.0, 4340.0],     [4341.0, 4342.0, 4343.0, 4344.0, 4345.0, 4346.0, 4347.0],     [4348.0, 4349.0, 4350.0, 4351.0, 4352.0, 4353.0, 4354.0],     [4355.0, 4356.0, 4357.0, 4358.0, 4359.0, 4360.0, 4361.0],     [4362.0, 4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0]],    [[4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0, 4375.0],     [4376.0, 4377.0, 4378.0, 4379.0, 4380.0, 4381.0, 4382.0],     [4383.0, 4384.0, 4385.0, 4386.0, 4387.0, 4388.0, 4389.0],     [4390.0, 4391.0, 4392.0, 4393.0, 4394.0, 4395.0, 4396.0],     [4397.0, 4398.0, 4399.0, 4400.0, 4401.0, 4402.0, 4403.0],     [4404.0, 4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]]],   [[[4411.0, 4412.0, 4413.0, 4414.0, 4415.0, 4416.0, 4417.0],     [4418.0, 4419.0, 4420.0, 4421.0, 4422.0, 4423.0, 4424.0],     [4425.0, 4426.0, 4427.0, 4428.0, 4429.0, 4430.0, 4431.0],     [4432.0, 4433.0, 4434.0, 4435.0, 4436.0, 4437.0, 4438.0],     [4439.0, 4440.0, 4441.0, 4442.0, 4443.0, 4444.0, 4445.0],     [4446.0, 4447.0, 4448.0, 4449.0, 4450.0, 4451.0, 4452.0]],    [[4453.0, 4454.0, 4455.0, 4456.0, 4457.0, 4458.0, 4459.0],     [4460.0, 4461.0, 4462.0, 4463.0, 4464.0, 4465.0, 4466.0],     [4467.0, 4468.0, 4469.0, 4470.0, 4471.0, 4472.0, 4473.0],     [4474.0, 4475.0, 4476.0, 4477.0, 4478.0, 4479.0, 4480.0],     [4481.0, 4482.0, 4483.0, 4484.0, 4485.0, 4486.0, 4487.0],     [4488.0, 4489.0, 4490.0, 4491.0, 4492.0, 4493.0, 4494.0]],    [[4495.0, 4496.0, 4497.0, 4498.0, 4499.0, 4500.0, 4501.0],     [4502.0, 4503.0, 4504.0, 4505.0, 4506.0, 4507.0, 4508.0],     [4509.0, 4510.0, 4511.0, 4512.0, 4513.0, 4514.0, 4515.0],     [4516.0, 4517.0, 4518.0, 4519.0, 4520.0, 4521.0, 4522.0],     [4523.0, 4524.0, 4525.0, 4526.0, 4527.0, 4528.0, 4529.0],     [4530.0, 4531.0, 4532.0, 4533.0, 4534.0, 4535.0, 4536.0]],    [[4537.0, 4538.0, 4539.0, 4540.0, 4541.0, 4542.0, 4543.0],     [4544.0, 4545.0, 4546.0, 4547.0, 4548.0, 4549.0, 4550.0],     [4551.0, 4552.0, 4553.0, 4554.0, 4555.0, 4556.0, 4557.0],     [4558.0, 4559.0, 4560.0, 4561.0, 4562.0, 4563.0, 4564.0],     [4565.0, 4566.0, 4567.0, 4568.0, 4569.0, 4570.0, 4571.0],     [4572.0, 4573.0, 4574.0, 4575.0, 4576.0, 4577.0, 4578.0]],    [[4579.0, 4580.0, 4581.0, 4582.0, 4583.0, 4584.0, 4585.0],     [4586.0, 4587.0, 4588.0, 4589.0, 4590.0, 4591.0, 4592.0],     [4593.0, 4594.0, 4595.0, 4596.0, 4597.0, 4598.0, 4599.0],     [4600.0, 4601.0, 4602.0, 4603.0, 4604.0, 4605.0, 4606.0],     [4607.0, 4608.0, 4609.0, 4610.0, 4611.0, 4612.0, 4613.0],     [4614.0, 4615.0, 4616.0, 4617.0, 4618.0, 4619.0, 4620.0]],    [[4621.0, 4622.0, 4623.0, 4624.0, 4625.0, 4626.0, 4627.0],     [4628.0, 4629.0, 4630.0, 4631.0, 4632.0, 4633.0, 4634.0],     [4635.0, 4636.0, 4637.0, 4638.0, 4639.0, 4640.0, 4641.0],     [4642.0, 4643.0, 4644.0, 4645.0, 4646.0, 4647.0, 4648.0],     [4649.0, 4650.0, 4651.0, 4652.0, 4653.0, 4654.0, 4655.0],     [4656.0, 4657.0, 4658.0, 4659.0, 4660.0, 4661.0, 4662.0]],    [[4663.0, 4664.0, 4665.0, 4666.0, 4667.0, 4668.0, 4669.0],     [4670.0, 4671.0, 4672.0, 4673.0, 4674.0, 4675.0, 4676.0],     [4677.0, 4678.0, 4679.0, 4680.0, 4681.0, 4682.0, 4683.0],     [4684.0, 4685.0, 4686.0, 4687.0, 4688.0, 4689.0, 4690.0],     [4691.0, 4692.0, 4693.0, 4694.0, 4695.0, 4696.0, 4697.0],     [4698.0, 4699.0, 4700.0, 4701.0, 4702.0, 4703.0, 4704.0]]],   [[[4705.0, 4706.0, 4707.0, 4708.0, 4709.0, 4710.0, 4711.0],     [4712.0, 4713.0, 4714.0, 4715.0, 4716.0, 4717.0, 4718.0],     [4719.0, 4720.0, 4721.0, 4722.0, 4723.0, 4724.0, 4725.0],     [4726.0, 4727.0, 4728.0, 4729.0, 4730.0, 4731.0, 4732.0],     [4733.0, 4734.0, 4735.0, 4736.0, 4737.0, 4738.0, 4739.0],     [4740.0, 4741.0, 4742.0, 4743.0, 4744.0, 4745.0, 4746.0]],    [[4747.0, 4748.0, 4749.0, 4750.0, 4751.0, 4752.0, 4753.0],     [4754.0, 4755.0, 4756.0, 4757.0, 4758.0, 4759.0, 4760.0],     [4761.0, 4762.0, 4763.0, 4764.0, 4765.0, 4766.0, 4767.0],     [4768.0, 4769.0, 4770.0, 4771.0, 4772.0, 4773.0, 4774.0],     [4775.0, 4776.0, 4777.0, 4778.0, 4779.0, 4780.0, 4781.0],     [4782.0, 4783.0, 4784.0, 4785.0, 4786.0, 4787.0, 4788.0]],    [[4789.0, 4790.0, 4791.0, 4792.0, 4793.0, 4794.0, 4795.0],     [4796.0, 4797.0, 4798.0, 4799.0, 4800.0, 4801.0, 4802.0],     [4803.0, 4804.0, 4805.0, 4806.0, 4807.0, 4808.0, 4809.0],     [4810.0, 4811.0, 4812.0, 4813.0, 4814.0, 4815.0, 4816.0],     [4817.0, 4818.0, 4819.0, 4820.0, 4821.0, 4822.0, 4823.0],     [4824.0, 4825.0, 4826.0, 4827.0, 4828.0, 4829.0, 4830.0]],    [[4831.0, 4832.0, 4833.0, 4834.0, 4835.0, 4836.0, 4837.0],     [4838.0, 4839.0, 4840.0, 4841.0, 4842.0, 4843.0, 4844.0],     [4845.0, 4846.0, 4847.0, 4848.0, 4849.0, 4850.0, 4851.0],     [4852.0, 4853.0, 4854.0, 4855.0, 4856.0, 4857.0, 4858.0],     [4859.0, 4860.0, 4861.0, 4862.0, 4863.0, 4864.0, 4865.0],     [4866.0, 4867.0, 4868.0, 4869.0, 4870.0, 4871.0, 4872.0]],    [[4873.0, 4874.0, 4875.0, 4876.0, 4877.0, 4878.0, 4879.0],     [4880.0, 4881.0, 4882.0, 4883.0, 4884.0, 4885.0, 4886.0],     [4887.0, 4888.0, 4889.0, 4890.0, 4891.0, 4892.0, 4893.0],     [4894.0, 4895.0, 4896.0, 4897.0, 4898.0, 4899.0, 4900.0],     [4901.0, 4902.0, 4903.0, 4904.0, 4905.0, 4906.0, 4907.0],     [4908.0, 4909.0, 4910.0, 4911.0, 4912.0, 4913.0, 4914.0]],    [[4915.0, 4916.0, 4917.0, 4918.0, 4919.0, 4920.0, 4921.0],     [4922.0, 4923.0, 4924.0, 4925.0, 4926.0, 4927.0, 4928.0],     [4929.0, 4930.0, 4931.0, 4932.0, 4933.0, 4934.0, 4935.0],     [4936.0, 4937.0, 4938.0, 4939.0, 4940.0, 4941.0, 4942.0],     [4943.0, 4944.0, 4945.0, 4946.0, 4947.0, 4948.0, 4949.0],     [4950.0, 4951.0, 4952.0, 4953.0, 4954.0, 4955.0, 4956.0]],    [[4957.0, 4958.0, 4959.0, 4960.0, 4961.0, 4962.0, 4963.0],     [4964.0, 4965.0, 4966.0, 4967.0, 4968.0, 4969.0, 4970.0],     [4971.0, 4972.0, 4973.0, 4974.0, 4975.0, 4976.0, 4977.0],     [4978.0, 4979.0, 4980.0, 4981.0, 4982.0, 4983.0, 4984.0],     [4985.0, 4986.0, 4987.0, 4988.0, 4989.0, 4990.0, 4991.0],     [4992.0, 4993.0, 4994.0, 4995.0, 4996.0, 4997.0, 4998.0]]],   [[[4999.0, 5000.0, 5001.0, 5002.0, 5003.0, 5004.0, 5005.0],     [5006.0, 5007.0, 5008.0, 5009.0, 5010.0, 5011.0, 5012.0],     [5013.0, 5014.0, 5015.0, 5016.0, 5017.0, 5018.0, 5019.0],     [5020.0, 5021.0, 5022.0, 5023.0, 5024.0, 5025.0, 5026.0],     [5027.0, 5028.0, 5029.0, 5030.0, 5031.0, 5032.0, 5033.0],     [5034.0, 5035.0, 5036.0, 5037.0, 5038.0, 5039.0, 5040.0]],    [[5041.0, 5042.0, 5043.0, 5044.0, 5045.0, 5046.0, 5047.0],     [5048.0, 5049.0, 5050.0, 5051.0, 5052.0, 5053.0, 5054.0],     [5055.0, 5056.0, 5057.0, 5058.0, 5059.0, 5060.0, 5061.0],     [5062.0, 5063.0, 5064.0, 5065.0, 5066.0, 5067.0, 5068.0],     [5069.0, 5070.0, 5071.0, 5072.0, 5073.0, 5074.0, 5075.0],     [5076.0, 5077.0, 5078.0, 5079.0, 5080.0, 5081.0, 5082.0]],    [[5083.0, 5084.0, 5085.0, 5086.0, 5087.0, 5088.0, 5089.0],     [5090.0, 5091.0, 5092.0, 5093.0, 5094.0, 5095.0, 5096.0],     [5097.0, 5098.0, 5099.0, 5100.0, 5101.0, 5102.0, 5103.0],     [5104.0, 5105.0, 5106.0, 5107.0, 5108.0, 5109.0, 5110.0],     [5111.0, 5112.0, 5113.0, 5114.0, 5115.0, 5116.0, 5117.0],     [5118.0, 5119.0, 5120.0, 5121.0, 5122.0, 5123.0, 5124.0]],    [[5125.0, 5126.0, 5127.0, 5128.0, 5129.0, 5130.0, 5131.0],     [5132.0, 5133.0, 5134.0, 5135.0, 5136.0, 5137.0, 5138.0],     [5139.0, 5140.0, 5141.0, 5142.0, 5143.0, 5144.0, 5145.0],     [5146.0, 5147.0, 5148.0, 5149.0, 5150.0, 5151.0, 5152.0],     [5153.0, 5154.0, 5155.0, 5156.0, 5157.0, 5158.0, 5159.0],     [5160.0, 5161.0, 5162.0, 5163.0, 5164.0, 5165.0, 5166.0]],    [[5167.0, 5168.0, 5169.0, 5170.0, 5171.0, 5172.0, 5173.0],     [5174.0, 5175.0, 5176.0, 5177.0, 5178.0, 5179.0, 5180.0],     [5181.0, 5182.0, 5183.0, 5184.0, 5185.0, 5186.0, 5187.0],     [5188.0, 5189.0, 5190.0, 5191.0, 5192.0, 5193.0, 5194.0],     [5195.0, 5196.0, 5197.0, 5198.0, 5199.0, 5200.0, 5201.0],     [5202.0, 5203.0, 5204.0, 5205.0, 5206.0, 5207.0, 5208.0]],    [[5209.0, 5210.0, 5211.0, 5212.0, 5213.0, 5214.0, 5215.0],     [5216.0, 5217.0, 5218.0, 5219.0, 5220.0, 5221.0, 5222.0],     [5223.0, 5224.0, 5225.0, 5226.0, 5227.0, 5228.0, 5229.0],     [5230.0, 5231.0, 5232.0, 5233.0, 5234.0, 5235.0, 5236.0],     [5237.0, 5238.0, 5239.0, 5240.0, 5241.0, 5242.0, 5243.0],     [5244.0, 5245.0, 5246.0, 5247.0, 5248.0, 5249.0, 5250.0]],    [[5251.0, 5252.0, 5253.0, 5254.0, 5255.0, 5256.0, 5257.0],     [5258.0, 5259.0, 5260.0, 5261.0, 5262.0, 5263.0, 5264.0],     [5265.0, 5266.0, 5267.0, 5268.0, 5269.0, 5270.0, 5271.0],     [5272.0, 5273.0, 5274.0, 5275.0, 5276.0, 5277.0, 5278.0],     [5279.0, 5280.0, 5281.0, 5282.0, 5283.0, 5284.0, 5285.0],     [5286.0, 5287.0, 5288.0, 5289.0, 5290.0, 5291.0, 5292.0]]],   [[[5293.0, 5294.0, 5295.0, 5296.0, 5297.0, 5298.0, 5299.0],     [5300.0, 5301.0, 5302.0, 5303.0, 5304.0, 5305.0, 5306.0],     [5307.0, 5308.0, 5309.0, 5310.0, 5311.0, 5312.0, 5313.0],     [5314.0, 5315.0, 5316.0, 5317.0, 5318.0, 5319.0, 5320.0],     [5321.0, 5322.0, 5323.0, 5324.0, 5325.0, 5326.0, 5327.0],     [5328.0, 5329.0, 5330.0, 5331.0, 5332.0, 5333.0, 5334.0]],    [[5335.0, 5336.0, 5337.0, 5338.0, 5339.0, 5340.0, 5341.0],     [5342.0, 5343.0, 5344.0, 5345.0, 5346.0, 5347.0, 5348.0],     [5349.0, 5350.0, 5351.0, 5352.0, 5353.0, 5354.0, 5355.0],     [5356.0, 5357.0, 5358.0, 5359.0, 5360.0, 5361.0, 5362.0],     [5363.0, 5364.0, 5365.0, 5366.0, 5367.0, 5368.0, 5369.0],     [5370.0, 5371.0, 5372.0, 5373.0, 5374.0, 5375.0, 5376.0]],    [[5377.0, 5378.0, 5379.0, 5380.0, 5381.0, 5382.0, 5383.0],     [5384.0, 5385.0, 5386.0, 5387.0, 5388.0, 5389.0, 5390.0],     [5391.0, 5392.0, 5393.0, 5394.0, 5395.0, 5396.0, 5397.0],     [5398.0, 5399.0, 5400.0, 5401.0, 5402.0, 5403.0, 5404.0],     [5405.0, 5406.0, 5407.0, 5408.0, 5409.0, 5410.0, 5411.0],     [5412.0, 5413.0, 5414.0, 5415.0, 5416.0, 5417.0, 5418.0]],    [[5419.0, 5420.0, 5421.0, 5422.0, 5423.0, 5424.0, 5425.0],     [5426.0, 5427.0, 5428.0, 5429.0, 5430.0, 5431.0, 5432.0],     [5433.0, 5434.0, 5435.0, 5436.0, 5437.0, 5438.0, 5439.0],     [5440.0, 5441.0, 5442.0, 5443.0, 5444.0, 5445.0, 5446.0],     [5447.0, 5448.0, 5449.0, 5450.0, 5451.0, 5452.0, 5453.0],     [5454.0, 5455.0, 5456.0, 5457.0, 5458.0, 5459.0, 5460.0]],    [[5461.0, 5462.0, 5463.0, 5464.0, 5465.0, 5466.0, 5467.0],     [5468.0, 5469.0, 5470.0, 5471.0, 5472.0, 5473.0, 5474.0],     [5475.0, 5476.0, 5477.0, 5478.0, 5479.0, 5480.0, 5481.0],     [5482.0, 5483.0, 5484.0, 5485.0, 5486.0, 5487.0, 5488.0],     [5489.0, 5490.0, 5491.0, 5492.0, 5493.0, 5494.0, 5495.0],     [5496.0, 5497.0, 5498.0, 5499.0, 5500.0, 5501.0, 5502.0]],    [[5503.0, 5504.0, 5505.0, 5506.0, 5507.0, 5508.0, 5509.0],     [5510.0, 5511.0, 5512.0, 5513.0, 5514.0, 5515.0, 5516.0],     [5517.0, 5518.0, 5519.0, 5520.0, 5521.0, 5522.0, 5523.0],     [5524.0, 5525.0, 5526.0, 5527.0, 5528.0, 5529.0, 5530.0],     [5531.0, 5532.0, 5533.0, 5534.0, 5535.0, 5536.0, 5537.0],     [5538.0, 5539.0, 5540.0, 5541.0, 5542.0, 5543.0, 5544.0]],    [[5545.0, 5546.0, 5547.0, 5548.0, 5549.0, 5550.0, 5551.0],     [5552.0, 5553.0, 5554.0, 5555.0, 5556.0, 5557.0, 5558.0],     [5559.0, 5560.0, 5561.0, 5562.0, 5563.0, 5564.0, 5565.0],     [5566.0, 5567.0, 5568.0, 5569.0, 5570.0, 5571.0, 5572.0],     [5573.0, 5574.0, 5575.0, 5576.0, 5577.0, 5578.0, 5579.0],     [5580.0, 5581.0, 5582.0, 5583.0, 5584.0, 5585.0, 5586.0]]],   [[[5587.0, 5588.0, 5589.0, 5590.0, 5591.0, 5592.0, 5593.0],     [5594.0, 5595.0, 5596.0, 5597.0, 5598.0, 5599.0, 5600.0],     [5601.0, 5602.0, 5603.0, 5604.0, 5605.0, 5606.0, 5607.0],     [5608.0, 5609.0, 5610.0, 5611.0, 5612.0, 5613.0, 5614.0],     [5615.0, 5616.0, 5617.0, 5618.0, 5619.0, 5620.0, 5621.0],     [5622.0, 5623.0, 5624.0, 5625.0, 5626.0, 5627.0, 5628.0]],    [[5629.0, 5630.0, 5631.0, 5632.0, 5633.0, 5634.0, 5635.0],     [5636.0, 5637.0, 5638.0, 5639.0, 5640.0, 5641.0, 5642.0],     [5643.0, 5644.0, 5645.0, 5646.0, 5647.0, 5648.0, 5649.0],     [5650.0, 5651.0, 5652.0, 5653.0, 5654.0, 5655.0, 5656.0],     [5657.0, 5658.0, 5659.0, 5660.0, 5661.0, 5662.0, 5663.0],     [5664.0, 5665.0, 5666.0, 5667.0, 5668.0, 5669.0, 5670.0]],    [[5671.0, 5672.0, 5673.0, 5674.0, 5675.0, 5676.0, 5677.0],     [5678.0, 5679.0, 5680.0, 5681.0, 5682.0, 5683.0, 5684.0],     [5685.0, 5686.0, 5687.0, 5688.0, 5689.0, 5690.0, 5691.0],     [5692.0, 5693.0, 5694.0, 5695.0, 5696.0, 5697.0, 5698.0],     [5699.0, 5700.0, 5701.0, 5702.0, 5703.0, 5704.0, 5705.0],     [5706.0, 5707.0, 5708.0, 5709.0, 5710.0, 5711.0, 5712.0]],    [[5713.0, 5714.0, 5715.0, 5716.0, 5717.0, 5718.0, 5719.0],     [5720.0, 5721.0, 5722.0, 5723.0, 5724.0, 5725.0, 5726.0],     [5727.0, 5728.0, 5729.0, 5730.0, 5731.0, 5732.0, 5733.0],     [5734.0, 5735.0, 5736.0, 5737.0, 5738.0, 5739.0, 5740.0],     [5741.0, 5742.0, 5743.0, 5744.0, 5745.0, 5746.0, 5747.0],     [5748.0, 5749.0, 5750.0, 5751.0, 5752.0, 5753.0, 5754.0]],    [[5755.0, 5756.0, 5757.0, 5758.0, 5759.0, 5760.0, 5761.0],     [5762.0, 5763.0, 5764.0, 5765.0, 5766.0, 5767.0, 5768.0],     [5769.0, 5770.0, 5771.0, 5772.0, 5773.0, 5774.0, 5775.0],     [5776.0, 5777.0, 5778.0, 5779.0, 5780.0, 5781.0, 5782.0],     [5783.0, 5784.0, 5785.0, 5786.0, 5787.0, 5788.0, 5789.0],     [5790.0, 5791.0, 5792.0, 5793.0, 5794.0, 5795.0, 5796.0]],    [[5797.0, 5798.0, 5799.0, 5800.0, 5801.0, 5802.0, 5803.0],     [5804.0, 5805.0, 5806.0, 5807.0, 5808.0, 5809.0, 5810.0],     [5811.0, 5812.0, 5813.0, 5814.0, 5815.0, 5816.0, 5817.0],     [5818.0, 5819.0, 5820.0, 5821.0, 5822.0, 5823.0, 5824.0],     [5825.0, 5826.0, 5827.0, 5828.0, 5829.0, 5830.0, 5831.0],     [5832.0, 5833.0, 5834.0, 5835.0, 5836.0, 5837.0, 5838.0]],    [[5839.0, 5840.0, 5841.0, 5842.0, 5843.0, 5844.0, 5845.0],     [5846.0, 5847.0, 5848.0, 5849.0, 5850.0, 5851.0, 5852.0],     [5853.0, 5854.0, 5855.0, 5856.0, 5857.0, 5858.0, 5859.0],     [5860.0, 5861.0, 5862.0, 5863.0, 5864.0, 5865.0, 5866.0],     [5867.0, 5868.0, 5869.0, 5870.0, 5871.0, 5872.0, 5873.0],     [5874.0, 5875.0, 5876.0, 5877.0, 5878.0, 5879.0, 5880.0]]],   [[[5881.0, 5882.0, 5883.0, 5884.0, 5885.0, 5886.0, 5887.0],     [5888.0, 5889.0, 5890.0, 5891.0, 5892.0, 5893.0, 5894.0],     [5895.0, 5896.0, 5897.0, 5898.0, 5899.0, 5900.0, 5901.0],     [5902.0, 5903.0, 5904.0, 5905.0, 5906.0, 5907.0, 5908.0],     [5909.0, 5910.0, 5911.0, 5912.0, 5913.0, 5914.0, 5915.0],     [5916.0, 5917.0, 5918.0, 5919.0, 5920.0, 5921.0, 5922.0]],    [[5923.0, 5924.0, 5925.0, 5926.0, 5927.0, 5928.0, 5929.0],     [5930.0, 5931.0, 5932.0, 5933.0, 5934.0, 5935.0, 5936.0],     [5937.0, 5938.0, 5939.0, 5940.0, 5941.0, 5942.0, 5943.0],     [5944.0, 5945.0, 5946.0, 5947.0, 5948.0, 5949.0, 5950.0],     [5951.0, 5952.0, 5953.0, 5954.0, 5955.0, 5956.0, 5957.0],     [5958.0, 5959.0, 5960.0, 5961.0, 5962.0, 5963.0, 5964.0]],    [[5965.0, 5966.0, 5967.0, 5968.0, 5969.0, 5970.0, 5971.0],     [5972.0, 5973.0, 5974.0, 5975.0, 5976.0, 5977.0, 5978.0],     [5979.0, 5980.0, 5981.0, 5982.0, 5983.0, 5984.0, 5985.0],     [5986.0, 5987.0, 5988.0, 5989.0, 5990.0, 5991.0, 5992.0],     [5993.0, 5994.0, 5995.0, 5996.0, 5997.0, 5998.0, 5999.0],     [6000.0, 6001.0, 6002.0, 6003.0, 6004.0, 6005.0, 6006.0]],    [[6007.0, 6008.0, 6009.0, 6010.0, 6011.0, 6012.0, 6013.0],     [6014.0, 6015.0, 6016.0, 6017.0, 6018.0, 6019.0, 6020.0],     [6021.0, 6022.0, 6023.0, 6024.0, 6025.0, 6026.0, 6027.0],     [6028.0, 6029.0, 6030.0, 6031.0, 6032.0, 6033.0, 6034.0],     [6035.0, 6036.0, 6037.0, 6038.0, 6039.0, 6040.0, 6041.0],     [6042.0, 6043.0, 6044.0, 6045.0, 6046.0, 6047.0, 6048.0]],    [[6049.0, 6050.0, 6051.0, 6052.0, 6053.0, 6054.0, 6055.0],     [6056.0, 6057.0, 6058.0, 6059.0, 6060.0, 6061.0, 6062.0],     [6063.0, 6064.0, 6065.0, 6066.0, 6067.0, 6068.0, 6069.0],     [6070.0, 6071.0, 6072.0, 6073.0, 6074.0, 6075.0, 6076.0],     [6077.0, 6078.0, 6079.0, 6080.0, 6081.0, 6082.0, 6083.0],     [6084.0, 6085.0, 6086.0, 6087.0, 6088.0, 6089.0, 6090.0]],    [[6091.0, 6092.0, 6093.0, 6094.0, 6095.0, 6096.0, 6097.0],     [6098.0, 6099.0, 6100.0, 6101.0, 6102.0, 6103.0, 6104.0],     [6105.0, 6106.0, 6107.0, 6108.0, 6109.0, 6110.0, 6111.0],     [6112.0, 6113.0, 6114.0, 6115.0, 6116.0, 6117.0, 6118.0],     [6119.0, 6120.0, 6121.0, 6122.0, 6123.0, 6124.0, 6125.0],     [6126.0, 6127.0, 6128.0, 6129.0, 6130.0, 6131.0, 6132.0]],    [[6133.0, 6134.0, 6135.0, 6136.0, 6137.0, 6138.0, 6139.0],     [6140.0, 6141.0, 6142.0, 6143.0, 6144.0, 6145.0, 6146.0],     [6147.0, 6148.0, 6149.0, 6150.0, 6151.0, 6152.0, 6153.0],     [6154.0, 6155.0, 6156.0, 6157.0, 6158.0, 6159.0, 6160.0],     [6161.0, 6162.0, 6163.0, 6164.0, 6165.0, 6166.0, 6167.0],     [6168.0, 6169.0, 6170.0, 6171.0, 6172.0, 6173.0, 6174.0]]]]] shape=[3, 7, 7, 6, 7], strides=[2058, 294, 42, 7, 1], layout=C (0x1)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), I32([[2, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 2128799455 550984655 646522774 3494772426 # shrinks to (ref b, ref bs, ref c) = (F32([[[[0.0],    [0.0]],   [[0.0],    [0.0]],   [[0.0],    [2.0]],   [[0.0],    [4.0]],   [[0.0],    [6.0]],   [[0.0],    [0.0]]],  [[[0.0],    [0.0]],   [[0.0],    [0.0]],   [[1.0],    [0.0]],   [[3.0],    [0.0]],   [[5.0],    [0.0]],   [[0.0],    [0.0]]]] shape=[2, 6, 2, 1], strides=[12, 2, 1, 1], layout=C (0x1)), I32([1, 2] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [1, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 661129098 2787562587 2664391140 2978828846 # shrinks to (ref b, ref bs, ref c) = (F32([[[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]],   [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],     [7.0, 8.0, 9.0, 10.0, 11.0, 12.0],     [13.0, 14.0, 15.0, 16.0, 17.0, 18.0],     [19.0, 20.0, 21.0, 22.0, 23.0, 24.0],     [25.0, 26.0, 27.0, 28.0, 29.0, 30.0],     [31.0, 32.0, 33.0, 34.0, 35.0, 36.0]],    [[37.0, 38.0, 39.0, 40.0, 41.0, 42.0],     [43.0, 44.0, 45.0, 46.0, 47.0, 48.0],     [49.0, 50.0, 51.0, 52.0, 53.0, 54.0],     [55.0, 56.0, 57.0, 58.0, 59.0, 60.0],     [61.0, 62.0, 63.0, 64.0, 65.0, 66.0],     [67.0, 68.0, 69.0, 70.0, 71.0, 72.0]]],   [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]]]] shape=[1, 4, 2, 6, 6], strides=[288, 72, 36, 6, 1], layout=C (0x1)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), I32([[2, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 3999413022 1951376667 363951431 121259509 # shrinks to (ref b, ref bs, ref c) = (F32([[[[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1.0, 2.0, 3.0, 4.0],     [5.0, 6.0, 7.0, 8.0],     [9.0, 10.0, 11.0, 12.0]],    [[25.0, 26.0, 27.0, 28.0],     [29.0, 30.0, 31.0, 32.0],     [33.0, 34.0, 35.0, 36.0]],    [[49.0, 50.0, 51.0, 52.0],     [53.0, 54.0, 55.0, 56.0],     [57.0, 58.0, 59.0, 60.0]],    [[73.0, 74.0, 75.0, 76.0],     [77.0, 78.0, 79.0, 80.0],     [81.0, 82.0, 83.0, 84.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[85.0, 86.0, 87.0, 88.0],     [89.0, 90.0, 91.0, 92.0],     [93.0, 94.0, 95.0, 96.0]],    [[109.0, 110.0, 111.0, 112.0],     [113.0, 114.0, 115.0, 116.0],     [117.0, 118.0, 119.0, 120.0]],    [[133.0, 134.0, 135.0, 136.0],     [137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0]],    [[157.0, 158.0, 159.0, 160.0],     [161.0, 162.0, 163.0, 164.0],     [165.0, 166.0, 167.0, 168.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[169.0, 170.0, 171.0, 172.0],     [173.0, 174.0, 175.0, 176.0],     [177.0, 178.0, 179.0, 180.0]],    [[193.0, 194.0, 195.0, 196.0],     [197.0, 198.0, 199.0, 200.0],     [201.0, 202.0, 203.0, 204.0]],    [[217.0, 218.0, 219.0, 220.0],     [221.0, 222.0, 223.0, 224.0],     [225.0, 226.0, 227.0, 228.0]],    [[241.0, 242.0, 243.0, 244.0],     [245.0, 246.0, 247.0, 248.0],     [249.0, 250.0, 251.0, 252.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[253.0, 254.0, 255.0, 256.0],     [257.0, 258.0, 259.0, 260.0],     [261.0, 262.0, 263.0, 264.0]],    [[277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0],     [285.0, 286.0, 287.0, 288.0]],    [[301.0, 302.0, 303.0, 304.0],     [305.0, 306.0, 307.0, 308.0],     [309.0, 310.0, 311.0, 312.0]],    [[325.0, 326.0, 327.0, 328.0],     [329.0, 330.0, 331.0, 332.0],     [333.0, 334.0, 335.0, 336.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[337.0, 338.0, 339.0, 340.0],     [341.0, 342.0, 343.0, 344.0],     [345.0, 346.0, 347.0, 348.0]],    [[361.0, 362.0, 363.0, 364.0],     [365.0, 366.0, 367.0, 368.0],     [369.0, 370.0, 371.0, 372.0]],    [[385.0, 386.0, 387.0, 388.0],     [389.0, 390.0, 391.0, 392.0],     [393.0, 394.0, 395.0, 396.0]],    [[409.0, 410.0, 411.0, 412.0],     [413.0, 414.0, 415.0, 416.0],     [417.0, 418.0, 419.0, 420.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[421.0, 422.0, 423.0, 424.0],     [425.0, 426.0, 427.0, 428.0],     [429.0, 430.0, 431.0, 432.0]],    [[445.0, 446.0, 447.0, 448.0],     [449.0, 450.0, 451.0, 452.0],     [453.0, 454.0, 455.0, 456.0]],    [[469.0, 470.0, 471.0, 472.0],     [473.0, 474.0, 475.0, 476.0],     [477.0, 478.0, 479.0, 480.0]],    [[493.0, 494.0, 495.0, 496.0],     [497.0, 498.0, 499.0, 500.0],     [501.0, 502.0, 503.0, 504.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[505.0, 506.0, 507.0, 508.0],     [509.0, 510.0, 511.0, 512.0],     [513.0, 514.0, 515.0, 516.0]],    [[529.0, 530.0, 531.0, 532.0],     [533.0, 534.0, 535.0, 536.0],     [537.0, 538.0, 539.0, 540.0]],    [[553.0, 554.0, 555.0, 556.0],     [557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0]],    [[577.0, 578.0, 579.0, 580.0],     [581.0, 582.0, 583.0, 584.0],     [585.0, 586.0, 587.0, 588.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]]],  [[[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[589.0, 590.0, 591.0, 592.0],     [593.0, 594.0, 595.0, 596.0],     [597.0, 598.0, 599.0, 600.0]],    [[613.0, 614.0, 615.0, 616.0],     [617.0, 618.0, 619.0, 620.0],     [621.0, 622.0, 623.0, 624.0]],    [[637.0, 638.0, 639.0, 640.0],     [641.0, 642.0, 643.0, 644.0],     [645.0, 646.0, 647.0, 648.0]],    [[661.0, 662.0, 663.0, 664.0],     [665.0, 666.0, 667.0, 668.0],     [669.0, 670.0, 671.0, 672.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[673.0, 674.0, 675.0, 676.0],     [677.0, 678.0, 679.0, 680.0],     [681.0, 682.0, 683.0, 684.0]],    [[697.0, 698.0, 699.0, 700.0],     [701.0, 702.0, 703.0, 704.0],     [705.0, 706.0, 707.0, 708.0]],    [[721.0, 722.0, 723.0, 724.0],     [725.0, 726.0, 727.0, 728.0],     [729.0, 730.0, 731.0, 732.0]],    [[745.0, 746.0, 747.0, 748.0],     [749.0, 750.0, 751.0, 752.0],     [753.0, 754.0, 755.0, 756.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[757.0, 758.0, 759.0, 760.0],     [761.0, 762.0, 763.0, 764.0],     [765.0, 766.0, 767.0, 768.0]],    [[781.0, 782.0, 783.0, 784.0],     [785.0, 786.0, 787.0, 788.0],     [789.0, 790.0, 791.0, 792.0]],    [[805.0, 806.0, 807.0, 808.0],     [809.0, 810.0, 811.0, 812.0],     [813.0, 814.0, 815.0, 816.0]],    [[829.0, 830.0, 831.0, 832.0],     [833.0, 834.0, 835.0, 836.0],     [837.0, 838.0, 839.0, 840.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[841.0, 842.0, 843.0, 844.0],     [845.0, 846.0, 847.0, 848.0],     [849.0, 850.0, 851.0, 852.0]],    [[865.0, 866.0, 867.0, 868.0],     [869.0, 870.0, 871.0, 872.0],     [873.0, 874.0, 875.0, 876.0]],    [[889.0, 890.0, 891.0, 892.0],     [893.0, 894.0, 895.0, 896.0],     [897.0, 898.0, 899.0, 900.0]],    [[913.0, 914.0, 915.0, 916.0],     [917.0, 918.0, 919.0, 920.0],     [921.0, 922.0, 923.0, 924.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[925.0, 926.0, 927.0, 928.0],     [929.0, 930.0, 931.0, 932.0],     [933.0, 934.0, 935.0, 936.0]],    [[949.0, 950.0, 951.0, 952.0],     [953.0, 954.0, 955.0, 956.0],     [957.0, 958.0, 959.0, 960.0]],    [[973.0, 974.0, 975.0, 976.0],     [977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0]],    [[997.0, 998.0, 999.0, 1000.0],     [1001.0, 1002.0, 1003.0, 1004.0],     [1005.0, 1006.0, 1007.0, 1008.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1009.0, 1010.0, 1011.0, 1012.0],     [1013.0, 1014.0, 1015.0, 1016.0],     [1017.0, 1018.0, 1019.0, 1020.0]],    [[1033.0, 1034.0, 1035.0, 1036.0],     [1037.0, 1038.0, 1039.0, 1040.0],     [1041.0, 1042.0, 1043.0, 1044.0]],    [[1057.0, 1058.0, 1059.0, 1060.0],     [1061.0, 1062.0, 1063.0, 1064.0],     [1065.0, 1066.0, 1067.0, 1068.0]],    [[1081.0, 1082.0, 1083.0, 1084.0],     [1085.0, 1086.0, 1087.0, 1088.0],     [1089.0, 1090.0, 1091.0, 1092.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1093.0, 1094.0, 1095.0, 1096.0],     [1097.0, 1098.0, 1099.0, 1100.0],     [1101.0, 1102.0, 1103.0, 1104.0]],    [[1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0],     [1125.0, 1126.0, 1127.0, 1128.0]],    [[1141.0, 1142.0, 1143.0, 1144.0],     [1145.0, 1146.0, 1147.0, 1148.0],     [1149.0, 1150.0, 1151.0, 1152.0]],    [[1165.0, 1166.0, 1167.0, 1168.0],     [1169.0, 1170.0, 1171.0, 1172.0],     [1173.0, 1174.0, 1175.0, 1176.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]]],  [[[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1177.0, 1178.0, 1179.0, 1180.0],     [1181.0, 1182.0, 1183.0, 1184.0],     [1185.0, 1186.0, 1187.0, 1188.0]],    [[1201.0, 1202.0, 1203.0, 1204.0],     [1205.0, 1206.0, 1207.0, 1208.0],     [1209.0, 1210.0, 1211.0, 1212.0]],    [[1225.0, 1226.0, 1227.0, 1228.0],     [1229.0, 1230.0, 1231.0, 1232.0],     [1233.0, 1234.0, 1235.0, 1236.0]],    [[1249.0, 1250.0, 1251.0, 1252.0],     [1253.0, 1254.0, 1255.0, 1256.0],     [1257.0, 1258.0, 1259.0, 1260.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1261.0, 1262.0, 1263.0, 1264.0],     [1265.0, 1266.0, 1267.0, 1268.0],     [1269.0, 1270.0, 1271.0, 1272.0]],    [[1285.0, 1286.0, 1287.0, 1288.0],     [1289.0, 1290.0, 1291.0, 1292.0],     [1293.0, 1294.0, 1295.0, 1296.0]],    [[1309.0, 1310.0, 1311.0, 1312.0],     [1313.0, 1314.0, 1315.0, 1316.0],     [1317.0, 1318.0, 1319.0, 1320.0]],    [[1333.0, 1334.0, 1335.0, 1336.0],     [1337.0, 1338.0, 1339.0, 1340.0],     [1341.0, 1342.0, 1343.0, 1344.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1345.0, 1346.0, 1347.0, 1348.0],     [1349.0, 1350.0, 1351.0, 1352.0],     [1353.0, 1354.0, 1355.0, 1356.0]],    [[1369.0, 1370.0, 1371.0, 1372.0],     [1373.0, 1374.0, 1375.0, 1376.0],     [1377.0, 1378.0, 1379.0, 1380.0]],    [[1393.0, 1394.0, 1395.0, 1396.0],     [1397.0, 1398.0, 1399.0, 1400.0],     [1401.0, 1402.0, 1403.0, 1404.0]],    [[1417.0, 1418.0, 1419.0, 1420.0],     [1421.0, 1422.0, 1423.0, 1424.0],     [1425.0, 1426.0, 1427.0, 1428.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1429.0, 1430.0, 1431.0, 1432.0],     [1433.0, 1434.0, 1435.0, 1436.0],     [1437.0, 1438.0, 1439.0, 1440.0]],    [[1453.0, 1454.0, 1455.0, 1456.0],     [1457.0, 1458.0, 1459.0, 1460.0],     [1461.0, 1462.0, 1463.0, 1464.0]],    [[1477.0, 1478.0, 1479.0, 1480.0],     [1481.0, 1482.0, 1483.0, 1484.0],     [1485.0, 1486.0, 1487.0, 1488.0]],    [[1501.0, 1502.0, 1503.0, 1504.0],     [1505.0, 1506.0, 1507.0, 1508.0],     [1509.0, 1510.0, 1511.0, 1512.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1513.0, 1514.0, 1515.0, 1516.0],     [1517.0, 1518.0, 1519.0, 1520.0],     [1521.0, 1522.0, 1523.0, 1524.0]],    [[1537.0, 1538.0, 1539.0, 1540.0],     [1541.0, 1542.0, 1543.0, 1544.0],     [1545.0, 1546.0, 1547.0, 1548.0]],    [[1561.0, 1562.0, 1563.0, 1564.0],     [1565.0, 1566.0, 1567.0, 1568.0],     [1569.0, 1570.0, 1571.0, 1572.0]],    [[1585.0, 1586.0, 1587.0, 1588.0],     [1589.0, 1590.0, 1591.0, 1592.0],     [1593.0, 1594.0, 1595.0, 1596.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1597.0, 1598.0, 1599.0, 1600.0],     [1601.0, 1602.0, 1603.0, 1604.0],     [1605.0, 1606.0, 1607.0, 1608.0]],    [[1621.0, 1622.0, 1623.0, 1624.0],     [1625.0, 1626.0, 1627.0, 1628.0],     [1629.0, 1630.0, 1631.0, 1632.0]],    [[1645.0, 1646.0, 1647.0, 1648.0],     [1649.0, 1650.0, 1651.0, 1652.0],     [1653.0, 1654.0, 1655.0, 1656.0]],    [[1669.0, 1670.0, 1671.0, 1672.0],     [1673.0, 1674.0, 1675.0, 1676.0],     [1677.0, 1678.0, 1679.0, 1680.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1681.0, 1682.0, 1683.0, 1684.0],     [1685.0, 1686.0, 1687.0, 1688.0],     [1689.0, 1690.0, 1691.0, 1692.0]],    [[1705.0, 1706.0, 1707.0, 1708.0],     [1709.0, 1710.0, 1711.0, 1712.0],     [1713.0, 1714.0, 1715.0, 1716.0]],    [[1729.0, 1730.0, 1731.0, 1732.0],     [1733.0, 1734.0, 1735.0, 1736.0],     [1737.0, 1738.0, 1739.0, 1740.0]],    [[1753.0, 1754.0, 1755.0, 1756.0],     [1757.0, 1758.0, 1759.0, 1760.0],     [1761.0, 1762.0, 1763.0, 1764.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]]],  [[[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[13.0, 14.0, 15.0, 16.0],     [17.0, 18.0, 19.0, 20.0],     [21.0, 22.0, 23.0, 24.0]],    [[37.0, 38.0, 39.0, 40.0],     [41.0, 42.0, 43.0, 44.0],     [45.0, 46.0, 47.0, 48.0]],    [[61.0, 62.0, 63.0, 64.0],     [65.0, 66.0, 67.0, 68.0],     [69.0, 70.0, 71.0, 72.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[97.0, 98.0, 99.0, 100.0],     [101.0, 102.0, 103.0, 104.0],     [105.0, 106.0, 107.0, 108.0]],    [[121.0, 122.0, 123.0, 124.0],     [125.0, 126.0, 127.0, 128.0],     [129.0, 130.0, 131.0, 132.0]],    [[145.0, 146.0, 147.0, 148.0],     [149.0, 150.0, 151.0, 152.0],     [153.0, 154.0, 155.0, 156.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[181.0, 182.0, 183.0, 184.0],     [185.0, 186.0, 187.0, 188.0],     [189.0, 190.0, 191.0, 192.0]],    [[205.0, 206.0, 207.0, 208.0],     [209.0, 210.0, 211.0, 212.0],     [213.0, 214.0, 215.0, 216.0]],    [[229.0, 230.0, 231.0, 232.0],     [233.0, 234.0, 235.0, 236.0],     [237.0, 238.0, 239.0, 240.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[265.0, 266.0, 267.0, 268.0],     [269.0, 270.0, 271.0, 272.0],     [273.0, 274.0, 275.0, 276.0]],    [[289.0, 290.0, 291.0, 292.0],     [293.0, 294.0, 295.0, 296.0],     [297.0, 298.0, 299.0, 300.0]],    [[313.0, 314.0, 315.0, 316.0],     [317.0, 318.0, 319.0, 320.0],     [321.0, 322.0, 323.0, 324.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[349.0, 350.0, 351.0, 352.0],     [353.0, 354.0, 355.0, 356.0],     [357.0, 358.0, 359.0, 360.0]],    [[373.0, 374.0, 375.0, 376.0],     [377.0, 378.0, 379.0, 380.0],     [381.0, 382.0, 383.0, 384.0]],    [[397.0, 398.0, 399.0, 400.0],     [401.0, 402.0, 403.0, 404.0],     [405.0, 406.0, 407.0, 408.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[433.0, 434.0, 435.0, 436.0],     [437.0, 438.0, 439.0, 440.0],     [441.0, 442.0, 443.0, 444.0]],    [[457.0, 458.0, 459.0, 460.0],     [461.0, 462.0, 463.0, 464.0],     [465.0, 466.0, 467.0, 468.0]],    [[481.0, 482.0, 483.0, 484.0],     [485.0, 486.0, 487.0, 488.0],     [489.0, 490.0, 491.0, 492.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[517.0, 518.0, 519.0, 520.0],     [521.0, 522.0, 523.0, 524.0],     [525.0, 526.0, 527.0, 528.0]],    [[541.0, 542.0, 543.0, 544.0],     [545.0, 546.0, 547.0, 548.0],     [549.0, 550.0, 551.0, 552.0]],    [[565.0, 566.0, 567.0, 568.0],     [569.0, 570.0, 571.0, 572.0],     [573.0, 574.0, 575.0, 576.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]]],  [[[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[601.0, 602.0, 603.0, 604.0],     [605.0, 606.0, 607.0, 608.0],     [609.0, 610.0, 611.0, 612.0]],    [[625.0, 626.0, 627.0, 628.0],     [629.0, 630.0, 631.0, 632.0],     [633.0, 634.0, 635.0, 636.0]],    [[649.0, 650.0, 651.0, 652.0],     [653.0, 654.0, 655.0, 656.0],     [657.0, 658.0, 659.0, 660.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[685.0, 686.0, 687.0, 688.0],     [689.0, 690.0, 691.0, 692.0],     [693.0, 694.0, 695.0, 696.0]],    [[709.0, 710.0, 711.0, 712.0],     [713.0, 714.0, 715.0, 716.0],     [717.0, 718.0, 719.0, 720.0]],    [[733.0, 734.0, 735.0, 736.0],     [737.0, 738.0, 739.0, 740.0],     [741.0, 742.0, 743.0, 744.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[769.0, 770.0, 771.0, 772.0],     [773.0, 774.0, 775.0, 776.0],     [777.0, 778.0, 779.0, 780.0]],    [[793.0, 794.0, 795.0, 796.0],     [797.0, 798.0, 799.0, 800.0],     [801.0, 802.0, 803.0, 804.0]],    [[817.0, 818.0, 819.0, 820.0],     [821.0, 822.0, 823.0, 824.0],     [825.0, 826.0, 827.0, 828.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[853.0, 854.0, 855.0, 856.0],     [857.0, 858.0, 859.0, 860.0],     [861.0, 862.0, 863.0, 864.0]],    [[877.0, 878.0, 879.0, 880.0],     [881.0, 882.0, 883.0, 884.0],     [885.0, 886.0, 887.0, 888.0]],    [[901.0, 902.0, 903.0, 904.0],     [905.0, 906.0, 907.0, 908.0],     [909.0, 910.0, 911.0, 912.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[937.0, 938.0, 939.0, 940.0],     [941.0, 942.0, 943.0, 944.0],     [945.0, 946.0, 947.0, 948.0]],    [[961.0, 962.0, 963.0, 964.0],     [965.0, 966.0, 967.0, 968.0],     [969.0, 970.0, 971.0, 972.0]],    [[985.0, 986.0, 987.0, 988.0],     [989.0, 990.0, 991.0, 992.0],     [993.0, 994.0, 995.0, 996.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1021.0, 1022.0, 1023.0, 1024.0],     [1025.0, 1026.0, 1027.0, 1028.0],     [1029.0, 1030.0, 1031.0, 1032.0]],    [[1045.0, 1046.0, 1047.0, 1048.0],     [1049.0, 1050.0, 1051.0, 1052.0],     [1053.0, 1054.0, 1055.0, 1056.0]],    [[1069.0, 1070.0, 1071.0, 1072.0],     [1073.0, 1074.0, 1075.0, 1076.0],     [1077.0, 1078.0, 1079.0, 1080.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1105.0, 1106.0, 1107.0, 1108.0],     [1109.0, 1110.0, 1111.0, 1112.0],     [1113.0, 1114.0, 1115.0, 1116.0]],    [[1129.0, 1130.0, 1131.0, 1132.0],     [1133.0, 1134.0, 1135.0, 1136.0],     [1137.0, 1138.0, 1139.0, 1140.0]],    [[1153.0, 1154.0, 1155.0, 1156.0],     [1157.0, 1158.0, 1159.0, 1160.0],     [1161.0, 1162.0, 1163.0, 1164.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]]],  [[[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1189.0, 1190.0, 1191.0, 1192.0],     [1193.0, 1194.0, 1195.0, 1196.0],     [1197.0, 1198.0, 1199.0, 1200.0]],    [[1213.0, 1214.0, 1215.0, 1216.0],     [1217.0, 1218.0, 1219.0, 1220.0],     [1221.0, 1222.0, 1223.0, 1224.0]],    [[1237.0, 1238.0, 1239.0, 1240.0],     [1241.0, 1242.0, 1243.0, 1244.0],     [1245.0, 1246.0, 1247.0, 1248.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1273.0, 1274.0, 1275.0, 1276.0],     [1277.0, 1278.0, 1279.0, 1280.0],     [1281.0, 1282.0, 1283.0, 1284.0]],    [[1297.0, 1298.0, 1299.0, 1300.0],     [1301.0, 1302.0, 1303.0, 1304.0],     [1305.0, 1306.0, 1307.0, 1308.0]],    [[1321.0, 1322.0, 1323.0, 1324.0],     [1325.0, 1326.0, 1327.0, 1328.0],     [1329.0, 1330.0, 1331.0, 1332.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1357.0, 1358.0, 1359.0, 1360.0],     [1361.0, 1362.0, 1363.0, 1364.0],     [1365.0, 1366.0, 1367.0, 1368.0]],    [[1381.0, 1382.0, 1383.0, 1384.0],     [1385.0, 1386.0, 1387.0, 1388.0],     [1389.0, 1390.0, 1391.0, 1392.0]],    [[1405.0, 1406.0, 1407.0, 1408.0],     [1409.0, 1410.0, 1411.0, 1412.0],     [1413.0, 1414.0, 1415.0, 1416.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1441.0, 1442.0, 1443.0, 1444.0],     [1445.0, 1446.0, 1447.0, 1448.0],     [1449.0, 1450.0, 1451.0, 1452.0]],    [[1465.0, 1466.0, 1467.0, 1468.0],     [1469.0, 1470.0, 1471.0, 1472.0],     [1473.0, 1474.0, 1475.0, 1476.0]],    [[1489.0, 1490.0, 1491.0, 1492.0],     [1493.0, 1494.0, 1495.0, 1496.0],     [1497.0, 1498.0, 1499.0, 1500.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1525.0, 1526.0, 1527.0, 1528.0],     [1529.0, 1530.0, 1531.0, 1532.0],     [1533.0, 1534.0, 1535.0, 1536.0]],    [[1549.0, 1550.0, 1551.0, 1552.0],     [1553.0, 1554.0, 1555.0, 1556.0],     [1557.0, 1558.0, 1559.0, 1560.0]],    [[1573.0, 1574.0, 1575.0, 1576.0],     [1577.0, 1578.0, 1579.0, 1580.0],     [1581.0, 1582.0, 1583.0, 1584.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1609.0, 1610.0, 1611.0, 1612.0],     [1613.0, 1614.0, 1615.0, 1616.0],     [1617.0, 1618.0, 1619.0, 1620.0]],    [[1633.0, 1634.0, 1635.0, 1636.0],     [1637.0, 1638.0, 1639.0, 1640.0],     [1641.0, 1642.0, 1643.0, 1644.0]],    [[1657.0, 1658.0, 1659.0, 1660.0],     [1661.0, 1662.0, 1663.0, 1664.0],     [1665.0, 1666.0, 1667.0, 1668.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[1693.0, 1694.0, 1695.0, 1696.0],     [1697.0, 1698.0, 1699.0, 1700.0],     [1701.0, 1702.0, 1703.0, 1704.0]],    [[1717.0, 1718.0, 1719.0, 1720.0],     [1721.0, 1722.0, 1723.0, 1724.0],     [1725.0, 1726.0, 1727.0, 1728.0]],    [[1741.0, 1742.0, 1743.0, 1744.0],     [1745.0, 1746.0, 1747.0, 1748.0],     [1749.0, 1750.0, 1751.0, 1752.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]],   [[[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]],    [[0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0],     [0.0, 0.0, 0.0, 0.0]]]]] shape=[6, 11, 5, 3, 4], strides=[660, 60, 12, 4, 1], layout=C (0x1)), I32([1, 2] shape=[2], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [2, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 3185783731 933522725 726568694 1737619865 # shrinks to (ref b, ref bs, ref c) = (F32([[[[0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0],    [1.0, 2.0, 3.0, 4.0],    [5.0, 6.0, 7.0, 8.0],    [9.0, 10.0, 11.0, 12.0],    [13.0, 14.0, 15.0, 16.0],    [0.0, 0.0, 0.0, 0.0]],   [[0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0],    [0.0, 0.0, 0.0, 0.0]]]] shape=[1, 2, 7, 4], strides=[56, 28, 4, 1], layout=C (0x1)), I32([1, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[0, 1],  [2, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 1312994265 4088602256 3635909562 3335228408 # shrinks to (ref b, ref bs, ref c) = (F32([[[[[[1.0, 2.0, 3.0, 4.0],      [5.0, 6.0, 7.0, 8.0],      [9.0, 10.0, 11.0, 12.0],      [13.0, 14.0, 15.0, 16.0],      [17.0, 18.0, 19.0, 20.0]],     [[21.0, 22.0, 23.0, 24.0],      [25.0, 26.0, 27.0, 28.0],      [29.0, 30.0, 31.0, 32.0],      [33.0, 34.0, 35.0, 36.0],      [37.0, 38.0, 39.0, 40.0]],     [[41.0, 42.0, 43.0, 44.0],      [45.0, 46.0, 47.0, 48.0],      [49.0, 50.0, 51.0, 52.0],      [53.0, 54.0, 55.0, 56.0],      [57.0, 58.0, 59.0, 60.0]],     [[61.0, 62.0, 63.0, 64.0],      [65.0, 66.0, 67.0, 68.0],      [69.0, 70.0, 71.0, 72.0],      [73.0, 74.0, 75.0, 76.0],      [77.0, 78.0, 79.0, 80.0]]],    [[[81.0, 82.0, 83.0, 84.0],      [85.0, 86.0, 87.0, 88.0],      [89.0, 90.0, 91.0, 92.0],      [93.0, 94.0, 95.0, 96.0],      [97.0, 98.0, 99.0, 100.0]],     [[101.0, 102.0, 103.0, 104.0],      [105.0, 106.0, 107.0, 108.0],      [109.0, 110.0, 111.0, 112.0],      [113.0, 114.0, 115.0, 116.0],      [117.0, 118.0, 119.0, 120.0]],     [[121.0, 122.0, 123.0, 124.0],      [125.0, 126.0, 127.0, 128.0],      [129.0, 130.0, 131.0, 132.0],      [133.0, 134.0, 135.0, 136.0],      [137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0],      [145.0, 146.0, 147.0, 148.0],      [149.0, 150.0, 151.0, 152.0],      [153.0, 154.0, 155.0, 156.0],      [157.0, 158.0, 159.0, 160.0]]],    [[[161.0, 162.0, 163.0, 164.0],      [165.0, 166.0, 167.0, 168.0],      [169.0, 170.0, 171.0, 172.0],      [173.0, 174.0, 175.0, 176.0],      [177.0, 178.0, 179.0, 180.0]],     [[181.0, 182.0, 183.0, 184.0],      [185.0, 186.0, 187.0, 188.0],      [189.0, 190.0, 191.0, 192.0],      [193.0, 194.0, 195.0, 196.0],      [197.0, 198.0, 199.0, 200.0]],     [[201.0, 202.0, 203.0, 204.0],      [205.0, 206.0, 207.0, 208.0],      [209.0, 210.0, 211.0, 212.0],      [213.0, 214.0, 215.0, 216.0],      [217.0, 218.0, 219.0, 220.0]],     [[221.0, 222.0, 223.0, 224.0],      [225.0, 226.0, 227.0, 228.0],      [229.0, 230.0, 231.0, 232.0],      [233.0, 234.0, 235.0, 236.0],      [237.0, 238.0, 239.0, 240.0]]],    [[[241.0, 242.0, 243.0, 244.0],      [245.0, 246.0, 247.0, 248.0],      [249.0, 250.0, 251.0, 252.0],      [253.0, 254.0, 255.0, 256.0],      [257.0, 258.0, 259.0, 260.0]],     [[261.0, 262.0, 263.0, 264.0],      [265.0, 266.0, 267.0, 268.0],      [269.0, 270.0, 271.0, 272.0],      [273.0, 274.0, 275.0, 276.0],      [277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0],      [285.0, 286.0, 287.0, 288.0],      [289.0, 290.0, 291.0, 292.0],      [293.0, 294.0, 295.0, 296.0],      [297.0, 298.0, 299.0, 300.0]],     [[301.0, 302.0, 303.0, 304.0],      [305.0, 306.0, 307.0, 308.0],      [309.0, 310.0, 311.0, 312.0],      [313.0, 314.0, 315.0, 316.0],      [317.0, 318.0, 319.0, 320.0]]],    [[[321.0, 322.0, 323.0, 324.0],      [325.0, 326.0, 327.0, 328.0],      [329.0, 330.0, 331.0, 332.0],      [333.0, 334.0, 335.0, 336.0],      [337.0, 338.0, 339.0, 340.0]],     [[341.0, 342.0, 343.0, 344.0],      [345.0, 346.0, 347.0, 348.0],      [349.0, 350.0, 351.0, 352.0],      [353.0, 354.0, 355.0, 356.0],      [357.0, 358.0, 359.0, 360.0]],     [[361.0, 362.0, 363.0, 364.0],      [365.0, 366.0, 367.0, 368.0],      [369.0, 370.0, 371.0, 372.0],      [373.0, 374.0, 375.0, 376.0],      [377.0, 378.0, 379.0, 380.0]],     [[381.0, 382.0, 383.0, 384.0],      [385.0, 386.0, 387.0, 388.0],      [389.0, 390.0, 391.0, 392.0],      [393.0, 394.0, 395.0, 396.0],      [397.0, 398.0, 399.0, 400.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]],   [[[[1201.0, 1202.0, 1203.0, 1204.0],      [1205.0, 1206.0, 1207.0, 1208.0],      [1209.0, 1210.0, 1211.0, 1212.0],      [1213.0, 1214.0, 1215.0, 1216.0],      [1217.0, 1218.0, 1219.0, 1220.0]],     [[1221.0, 1222.0, 1223.0, 1224.0],      [1225.0, 1226.0, 1227.0, 1228.0],      [1229.0, 1230.0, 1231.0, 1232.0],      [1233.0, 1234.0, 1235.0, 1236.0],      [1237.0, 1238.0, 1239.0, 1240.0]],     [[1241.0, 1242.0, 1243.0, 1244.0],      [1245.0, 1246.0, 1247.0, 1248.0],      [1249.0, 1250.0, 1251.0, 1252.0],      [1253.0, 1254.0, 1255.0, 1256.0],      [1257.0, 1258.0, 1259.0, 1260.0]],     [[1261.0, 1262.0, 1263.0, 1264.0],      [1265.0, 1266.0, 1267.0, 1268.0],      [1269.0, 1270.0, 1271.0, 1272.0],      [1273.0, 1274.0, 1275.0, 1276.0],      [1277.0, 1278.0, 1279.0, 1280.0]]],    [[[1281.0, 1282.0, 1283.0, 1284.0],      [1285.0, 1286.0, 1287.0, 1288.0],      [1289.0, 1290.0, 1291.0, 1292.0],      [1293.0, 1294.0, 1295.0, 1296.0],      [1297.0, 1298.0, 1299.0, 1300.0]],     [[1301.0, 1302.0, 1303.0, 1304.0],      [1305.0, 1306.0, 1307.0, 1308.0],      [1309.0, 1310.0, 1311.0, 1312.0],      [1313.0, 1314.0, 1315.0, 1316.0],      [1317.0, 1318.0, 1319.0, 1320.0]],     [[1321.0, 1322.0, 1323.0, 1324.0],      [1325.0, 1326.0, 1327.0, 1328.0],      [1329.0, 1330.0, 1331.0, 1332.0],      [1333.0, 1334.0, 1335.0, 1336.0],      [1337.0, 1338.0, 1339.0, 1340.0]],     [[1341.0, 1342.0, 1343.0, 1344.0],      [1345.0, 1346.0, 1347.0, 1348.0],      [1349.0, 1350.0, 1351.0, 1352.0],      [1353.0, 1354.0, 1355.0, 1356.0],      [1357.0, 1358.0, 1359.0, 1360.0]]],    [[[1361.0, 1362.0, 1363.0, 1364.0],      [1365.0, 1366.0, 1367.0, 1368.0],      [1369.0, 1370.0, 1371.0, 1372.0],      [1373.0, 1374.0, 1375.0, 1376.0],      [1377.0, 1378.0, 1379.0, 1380.0]],     [[1381.0, 1382.0, 1383.0, 1384.0],      [1385.0, 1386.0, 1387.0, 1388.0],      [1389.0, 1390.0, 1391.0, 1392.0],      [1393.0, 1394.0, 1395.0, 1396.0],      [1397.0, 1398.0, 1399.0, 1400.0]],     [[1401.0, 1402.0, 1403.0, 1404.0],      [1405.0, 1406.0, 1407.0, 1408.0],      [1409.0, 1410.0, 1411.0, 1412.0],      [1413.0, 1414.0, 1415.0, 1416.0],      [1417.0, 1418.0, 1419.0, 1420.0]],     [[1421.0, 1422.0, 1423.0, 1424.0],      [1425.0, 1426.0, 1427.0, 1428.0],      [1429.0, 1430.0, 1431.0, 1432.0],      [1433.0, 1434.0, 1435.0, 1436.0],      [1437.0, 1438.0, 1439.0, 1440.0]]],    [[[1441.0, 1442.0, 1443.0, 1444.0],      [1445.0, 1446.0, 1447.0, 1448.0],      [1449.0, 1450.0, 1451.0, 1452.0],      [1453.0, 1454.0, 1455.0, 1456.0],      [1457.0, 1458.0, 1459.0, 1460.0]],     [[1461.0, 1462.0, 1463.0, 1464.0],      [1465.0, 1466.0, 1467.0, 1468.0],      [1469.0, 1470.0, 1471.0, 1472.0],      [1473.0, 1474.0, 1475.0, 1476.0],      [1477.0, 1478.0, 1479.0, 1480.0]],     [[1481.0, 1482.0, 1483.0, 1484.0],      [1485.0, 1486.0, 1487.0, 1488.0],      [1489.0, 1490.0, 1491.0, 1492.0],      [1493.0, 1494.0, 1495.0, 1496.0],      [1497.0, 1498.0, 1499.0, 1500.0]],     [[1501.0, 1502.0, 1503.0, 1504.0],      [1505.0, 1506.0, 1507.0, 1508.0],      [1509.0, 1510.0, 1511.0, 1512.0],      [1513.0, 1514.0, 1515.0, 1516.0],      [1517.0, 1518.0, 1519.0, 1520.0]]],    [[[1521.0, 1522.0, 1523.0, 1524.0],      [1525.0, 1526.0, 1527.0, 1528.0],      [1529.0, 1530.0, 1531.0, 1532.0],      [1533.0, 1534.0, 1535.0, 1536.0],      [1537.0, 1538.0, 1539.0, 1540.0]],     [[1541.0, 1542.0, 1543.0, 1544.0],      [1545.0, 1546.0, 1547.0, 1548.0],      [1549.0, 1550.0, 1551.0, 1552.0],      [1553.0, 1554.0, 1555.0, 1556.0],      [1557.0, 1558.0, 1559.0, 1560.0]],     [[1561.0, 1562.0, 1563.0, 1564.0],      [1565.0, 1566.0, 1567.0, 1568.0],      [1569.0, 1570.0, 1571.0, 1572.0],      [1573.0, 1574.0, 1575.0, 1576.0],      [1577.0, 1578.0, 1579.0, 1580.0]],     [[1581.0, 1582.0, 1583.0, 1584.0],      [1585.0, 1586.0, 1587.0, 1588.0],      [1589.0, 1590.0, 1591.0, 1592.0],      [1593.0, 1594.0, 1595.0, 1596.0],      [1597.0, 1598.0, 1599.0, 1600.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]]],  [[[[[2001.0, 2002.0, 2003.0, 2004.0],      [2005.0, 2006.0, 2007.0, 2008.0],      [2009.0, 2010.0, 2011.0, 2012.0],      [2013.0, 2014.0, 2015.0, 2016.0],      [2017.0, 2018.0, 2019.0, 2020.0]],     [[2021.0, 2022.0, 2023.0, 2024.0],      [2025.0, 2026.0, 2027.0, 2028.0],      [2029.0, 2030.0, 2031.0, 2032.0],      [2033.0, 2034.0, 2035.0, 2036.0],      [2037.0, 2038.0, 2039.0, 2040.0]],     [[2041.0, 2042.0, 2043.0, 2044.0],      [2045.0, 2046.0, 2047.0, 2048.0],      [2049.0, 2050.0, 2051.0, 2052.0],      [2053.0, 2054.0, 2055.0, 2056.0],      [2057.0, 2058.0, 2059.0, 2060.0]],     [[2061.0, 2062.0, 2063.0, 2064.0],      [2065.0, 2066.0, 2067.0, 2068.0],      [2069.0, 2070.0, 2071.0, 2072.0],      [2073.0, 2074.0, 2075.0, 2076.0],      [2077.0, 2078.0, 2079.0, 2080.0]]],    [[[2081.0, 2082.0, 2083.0, 2084.0],      [2085.0, 2086.0, 2087.0, 2088.0],      [2089.0, 2090.0, 2091.0, 2092.0],      [2093.0, 2094.0, 2095.0, 2096.0],      [2097.0, 2098.0, 2099.0, 2100.0]],     [[2101.0, 2102.0, 2103.0, 2104.0],      [2105.0, 2106.0, 2107.0, 2108.0],      [2109.0, 2110.0, 2111.0, 2112.0],      [2113.0, 2114.0, 2115.0, 2116.0],      [2117.0, 2118.0, 2119.0, 2120.0]],     [[2121.0, 2122.0, 2123.0, 2124.0],      [2125.0, 2126.0, 2127.0, 2128.0],      [2129.0, 2130.0, 2131.0, 2132.0],      [2133.0, 2134.0, 2135.0, 2136.0],      [2137.0, 2138.0, 2139.0, 2140.0]],     [[2141.0, 2142.0, 2143.0, 2144.0],      [2145.0, 2146.0, 2147.0, 2148.0],      [2149.0, 2150.0, 2151.0, 2152.0],      [2153.0, 2154.0, 2155.0, 2156.0],      [2157.0, 2158.0, 2159.0, 2160.0]]],    [[[2161.0, 2162.0, 2163.0, 2164.0],      [2165.0, 2166.0, 2167.0, 2168.0],      [2169.0, 2170.0, 2171.0, 2172.0],      [2173.0, 2174.0, 2175.0, 2176.0],      [2177.0, 2178.0, 2179.0, 2180.0]],     [[2181.0, 2182.0, 2183.0, 2184.0],      [2185.0, 2186.0, 2187.0, 2188.0],      [2189.0, 2190.0, 2191.0, 2192.0],      [2193.0, 2194.0, 2195.0, 2196.0],      [2197.0, 2198.0, 2199.0, 2200.0]],     [[2201.0, 2202.0, 2203.0, 2204.0],      [2205.0, 2206.0, 2207.0, 2208.0],      [2209.0, 2210.0, 2211.0, 2212.0],      [2213.0, 2214.0, 2215.0, 2216.0],      [2217.0, 2218.0, 2219.0, 2220.0]],     [[2221.0, 2222.0, 2223.0, 2224.0],      [2225.0, 2226.0, 2227.0, 2228.0],      [2229.0, 2230.0, 2231.0, 2232.0],      [2233.0, 2234.0, 2235.0, 2236.0],      [2237.0, 2238.0, 2239.0, 2240.0]]],    [[[2241.0, 2242.0, 2243.0, 2244.0],      [2245.0, 2246.0, 2247.0, 2248.0],      [2249.0, 2250.0, 2251.0, 2252.0],      [2253.0, 2254.0, 2255.0, 2256.0],      [2257.0, 2258.0, 2259.0, 2260.0]],     [[2261.0, 2262.0, 2263.0, 2264.0],      [2265.0, 2266.0, 2267.0, 2268.0],      [2269.0, 2270.0, 2271.0, 2272.0],      [2273.0, 2274.0, 2275.0, 2276.0],      [2277.0, 2278.0, 2279.0, 2280.0]],     [[2281.0, 2282.0, 2283.0, 2284.0],      [2285.0, 2286.0, 2287.0, 2288.0],      [2289.0, 2290.0, 2291.0, 2292.0],      [2293.0, 2294.0, 2295.0, 2296.0],      [2297.0, 2298.0, 2299.0, 2300.0]],     [[2301.0, 2302.0, 2303.0, 2304.0],      [2305.0, 2306.0, 2307.0, 2308.0],      [2309.0, 2310.0, 2311.0, 2312.0],      [2313.0, 2314.0, 2315.0, 2316.0],      [2317.0, 2318.0, 2319.0, 2320.0]]],    [[[2321.0, 2322.0, 2323.0, 2324.0],      [2325.0, 2326.0, 2327.0, 2328.0],      [2329.0, 2330.0, 2331.0, 2332.0],      [2333.0, 2334.0, 2335.0, 2336.0],      [2337.0, 2338.0, 2339.0, 2340.0]],     [[2341.0, 2342.0, 2343.0, 2344.0],      [2345.0, 2346.0, 2347.0, 2348.0],      [2349.0, 2350.0, 2351.0, 2352.0],      [2353.0, 2354.0, 2355.0, 2356.0],      [2357.0, 2358.0, 2359.0, 2360.0]],     [[2361.0, 2362.0, 2363.0, 2364.0],      [2365.0, 2366.0, 2367.0, 2368.0],      [2369.0, 2370.0, 2371.0, 2372.0],      [2373.0, 2374.0, 2375.0, 2376.0],      [2377.0, 2378.0, 2379.0, 2380.0]],     [[2381.0, 2382.0, 2383.0, 2384.0],      [2385.0, 2386.0, 2387.0, 2388.0],      [2389.0, 2390.0, 2391.0, 2392.0],      [2393.0, 2394.0, 2395.0, 2396.0],      [2397.0, 2398.0, 2399.0, 2400.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]],   [[[[3201.0, 3202.0, 3203.0, 3204.0],      [3205.0, 3206.0, 3207.0, 3208.0],      [3209.0, 3210.0, 3211.0, 3212.0],      [3213.0, 3214.0, 3215.0, 3216.0],      [3217.0, 3218.0, 3219.0, 3220.0]],     [[3221.0, 3222.0, 3223.0, 3224.0],      [3225.0, 3226.0, 3227.0, 3228.0],      [3229.0, 3230.0, 3231.0, 3232.0],      [3233.0, 3234.0, 3235.0, 3236.0],      [3237.0, 3238.0, 3239.0, 3240.0]],     [[3241.0, 3242.0, 3243.0, 3244.0],      [3245.0, 3246.0, 3247.0, 3248.0],      [3249.0, 3250.0, 3251.0, 3252.0],      [3253.0, 3254.0, 3255.0, 3256.0],      [3257.0, 3258.0, 3259.0, 3260.0]],     [[3261.0, 3262.0, 3263.0, 3264.0],      [3265.0, 3266.0, 3267.0, 3268.0],      [3269.0, 3270.0, 3271.0, 3272.0],      [3273.0, 3274.0, 3275.0, 3276.0],      [3277.0, 3278.0, 3279.0, 3280.0]]],    [[[3281.0, 3282.0, 3283.0, 3284.0],      [3285.0, 3286.0, 3287.0, 3288.0],      [3289.0, 3290.0, 3291.0, 3292.0],      [3293.0, 3294.0, 3295.0, 3296.0],      [3297.0, 3298.0, 3299.0, 3300.0]],     [[3301.0, 3302.0, 3303.0, 3304.0],      [3305.0, 3306.0, 3307.0, 3308.0],      [3309.0, 3310.0, 3311.0, 3312.0],      [3313.0, 3314.0, 3315.0, 3316.0],      [3317.0, 3318.0, 3319.0, 3320.0]],     [[3321.0, 3322.0, 3323.0, 3324.0],      [3325.0, 3326.0, 3327.0, 3328.0],      [3329.0, 3330.0, 3331.0, 3332.0],      [3333.0, 3334.0, 3335.0, 3336.0],      [3337.0, 3338.0, 3339.0, 3340.0]],     [[3341.0, 3342.0, 3343.0, 3344.0],      [3345.0, 3346.0, 3347.0, 3348.0],      [3349.0, 3350.0, 3351.0, 3352.0],      [3353.0, 3354.0, 3355.0, 3356.0],      [3357.0, 3358.0, 3359.0, 3360.0]]],    [[[3361.0, 3362.0, 3363.0, 3364.0],      [3365.0, 3366.0, 3367.0, 3368.0],      [3369.0, 3370.0, 3371.0, 3372.0],      [3373.0, 3374.0, 3375.0, 3376.0],      [3377.0, 3378.0, 3379.0, 3380.0]],     [[3381.0, 3382.0, 3383.0, 3384.0],      [3385.0, 3386.0, 3387.0, 3388.0],      [3389.0, 3390.0, 3391.0, 3392.0],      [3393.0, 3394.0, 3395.0, 3396.0],      [3397.0, 3398.0, 3399.0, 3400.0]],     [[3401.0, 3402.0, 3403.0, 3404.0],      [3405.0, 3406.0, 3407.0, 3408.0],      [3409.0, 3410.0, 3411.0, 3412.0],      [3413.0, 3414.0, 3415.0, 3416.0],      [3417.0, 3418.0, 3419.0, 3420.0]],     [[3421.0, 3422.0, 3423.0, 3424.0],      [3425.0, 3426.0, 3427.0, 3428.0],      [3429.0, 3430.0, 3431.0, 3432.0],      [3433.0, 3434.0, 3435.0, 3436.0],      [3437.0, 3438.0, 3439.0, 3440.0]]],    [[[3441.0, 3442.0, 3443.0, 3444.0],      [3445.0, 3446.0, 3447.0, 3448.0],      [3449.0, 3450.0, 3451.0, 3452.0],      [3453.0, 3454.0, 3455.0, 3456.0],      [3457.0, 3458.0, 3459.0, 3460.0]],     [[3461.0, 3462.0, 3463.0, 3464.0],      [3465.0, 3466.0, 3467.0, 3468.0],      [3469.0, 3470.0, 3471.0, 3472.0],      [3473.0, 3474.0, 3475.0, 3476.0],      [3477.0, 3478.0, 3479.0, 3480.0]],     [[3481.0, 3482.0, 3483.0, 3484.0],      [3485.0, 3486.0, 3487.0, 3488.0],      [3489.0, 3490.0, 3491.0, 3492.0],      [3493.0, 3494.0, 3495.0, 3496.0],      [3497.0, 3498.0, 3499.0, 3500.0]],     [[3501.0, 3502.0, 3503.0, 3504.0],      [3505.0, 3506.0, 3507.0, 3508.0],      [3509.0, 3510.0, 3511.0, 3512.0],      [3513.0, 3514.0, 3515.0, 3516.0],      [3517.0, 3518.0, 3519.0, 3520.0]]],    [[[3521.0, 3522.0, 3523.0, 3524.0],      [3525.0, 3526.0, 3527.0, 3528.0],      [3529.0, 3530.0, 3531.0, 3532.0],      [3533.0, 3534.0, 3535.0, 3536.0],      [3537.0, 3538.0, 3539.0, 3540.0]],     [[3541.0, 3542.0, 3543.0, 3544.0],      [3545.0, 3546.0, 3547.0, 3548.0],      [3549.0, 3550.0, 3551.0, 3552.0],      [3553.0, 3554.0, 3555.0, 3556.0],      [3557.0, 3558.0, 3559.0, 3560.0]],     [[3561.0, 3562.0, 3563.0, 3564.0],      [3565.0, 3566.0, 3567.0, 3568.0],      [3569.0, 3570.0, 3571.0, 3572.0],      [3573.0, 3574.0, 3575.0, 3576.0],      [3577.0, 3578.0, 3579.0, 3580.0]],     [[3581.0, 3582.0, 3583.0, 3584.0],      [3585.0, 3586.0, 3587.0, 3588.0],      [3589.0, 3590.0, 3591.0, 3592.0],      [3593.0, 3594.0, 3595.0, 3596.0],      [3597.0, 3598.0, 3599.0, 3600.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]]],  [[[[[401.0, 402.0, 403.0, 404.0],      [405.0, 406.0, 407.0, 408.0],      [409.0, 410.0, 411.0, 412.0],      [413.0, 414.0, 415.0, 416.0],      [417.0, 418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0, 424.0],      [425.0, 426.0, 427.0, 428.0],      [429.0, 430.0, 431.0, 432.0],      [433.0, 434.0, 435.0, 436.0],      [437.0, 438.0, 439.0, 440.0]],     [[441.0, 442.0, 443.0, 444.0],      [445.0, 446.0, 447.0, 448.0],      [449.0, 450.0, 451.0, 452.0],      [453.0, 454.0, 455.0, 456.0],      [457.0, 458.0, 459.0, 460.0]],     [[461.0, 462.0, 463.0, 464.0],      [465.0, 466.0, 467.0, 468.0],      [469.0, 470.0, 471.0, 472.0],      [473.0, 474.0, 475.0, 476.0],      [477.0, 478.0, 479.0, 480.0]]],    [[[481.0, 482.0, 483.0, 484.0],      [485.0, 486.0, 487.0, 488.0],      [489.0, 490.0, 491.0, 492.0],      [493.0, 494.0, 495.0, 496.0],      [497.0, 498.0, 499.0, 500.0]],     [[501.0, 502.0, 503.0, 504.0],      [505.0, 506.0, 507.0, 508.0],      [509.0, 510.0, 511.0, 512.0],      [513.0, 514.0, 515.0, 516.0],      [517.0, 518.0, 519.0, 520.0]],     [[521.0, 522.0, 523.0, 524.0],      [525.0, 526.0, 527.0, 528.0],      [529.0, 530.0, 531.0, 532.0],      [533.0, 534.0, 535.0, 536.0],      [537.0, 538.0, 539.0, 540.0]],     [[541.0, 542.0, 543.0, 544.0],      [545.0, 546.0, 547.0, 548.0],      [549.0, 550.0, 551.0, 552.0],      [553.0, 554.0, 555.0, 556.0],      [557.0, 558.0, 559.0, 560.0]]],    [[[561.0, 562.0, 563.0, 564.0],      [565.0, 566.0, 567.0, 568.0],      [569.0, 570.0, 571.0, 572.0],      [573.0, 574.0, 575.0, 576.0],      [577.0, 578.0, 579.0, 580.0]],     [[581.0, 582.0, 583.0, 584.0],      [585.0, 586.0, 587.0, 588.0],      [589.0, 590.0, 591.0, 592.0],      [593.0, 594.0, 595.0, 596.0],      [597.0, 598.0, 599.0, 600.0]],     [[601.0, 602.0, 603.0, 604.0],      [605.0, 606.0, 607.0, 608.0],      [609.0, 610.0, 611.0, 612.0],      [613.0, 614.0, 615.0, 616.0],      [617.0, 618.0, 619.0, 620.0]],     [[621.0, 622.0, 623.0, 624.0],      [625.0, 626.0, 627.0, 628.0],      [629.0, 630.0, 631.0, 632.0],      [633.0, 634.0, 635.0, 636.0],      [637.0, 638.0, 639.0, 640.0]]],    [[[641.0, 642.0, 643.0, 644.0],      [645.0, 646.0, 647.0, 648.0],      [649.0, 650.0, 651.0, 652.0],      [653.0, 654.0, 655.0, 656.0],      [657.0, 658.0, 659.0, 660.0]],     [[661.0, 662.0, 663.0, 664.0],      [665.0, 666.0, 667.0, 668.0],      [669.0, 670.0, 671.0, 672.0],      [673.0, 674.0, 675.0, 676.0],      [677.0, 678.0, 679.0, 680.0]],     [[681.0, 682.0, 683.0, 684.0],      [685.0, 686.0, 687.0, 688.0],      [689.0, 690.0, 691.0, 692.0],      [693.0, 694.0, 695.0, 696.0],      [697.0, 698.0, 699.0, 700.0]],     [[701.0, 702.0, 703.0, 704.0],      [705.0, 706.0, 707.0, 708.0],      [709.0, 710.0, 711.0, 712.0],      [713.0, 714.0, 715.0, 716.0],      [717.0, 718.0, 719.0, 720.0]]],    [[[721.0, 722.0, 723.0, 724.0],      [725.0, 726.0, 727.0, 728.0],      [729.0, 730.0, 731.0, 732.0],      [733.0, 734.0, 735.0, 736.0],      [737.0, 738.0, 739.0, 740.0]],     [[741.0, 742.0, 743.0, 744.0],      [745.0, 746.0, 747.0, 748.0],      [749.0, 750.0, 751.0, 752.0],      [753.0, 754.0, 755.0, 756.0],      [757.0, 758.0, 759.0, 760.0]],     [[761.0, 762.0, 763.0, 764.0],      [765.0, 766.0, 767.0, 768.0],      [769.0, 770.0, 771.0, 772.0],      [773.0, 774.0, 775.0, 776.0],      [777.0, 778.0, 779.0, 780.0]],     [[781.0, 782.0, 783.0, 784.0],      [785.0, 786.0, 787.0, 788.0],      [789.0, 790.0, 791.0, 792.0],      [793.0, 794.0, 795.0, 796.0],      [797.0, 798.0, 799.0, 800.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]],   [[[[1601.0, 1602.0, 1603.0, 1604.0],      [1605.0, 1606.0, 1607.0, 1608.0],      [1609.0, 1610.0, 1611.0, 1612.0],      [1613.0, 1614.0, 1615.0, 1616.0],      [1617.0, 1618.0, 1619.0, 1620.0]],     [[1621.0, 1622.0, 1623.0, 1624.0],      [1625.0, 1626.0, 1627.0, 1628.0],      [1629.0, 1630.0, 1631.0, 1632.0],      [1633.0, 1634.0, 1635.0, 1636.0],      [1637.0, 1638.0, 1639.0, 1640.0]],     [[1641.0, 1642.0, 1643.0, 1644.0],      [1645.0, 1646.0, 1647.0, 1648.0],      [1649.0, 1650.0, 1651.0, 1652.0],      [1653.0, 1654.0, 1655.0, 1656.0],      [1657.0, 1658.0, 1659.0, 1660.0]],     [[1661.0, 1662.0, 1663.0, 1664.0],      [1665.0, 1666.0, 1667.0, 1668.0],      [1669.0, 1670.0, 1671.0, 1672.0],      [1673.0, 1674.0, 1675.0, 1676.0],      [1677.0, 1678.0, 1679.0, 1680.0]]],    [[[1681.0, 1682.0, 1683.0, 1684.0],      [1685.0, 1686.0, 1687.0, 1688.0],      [1689.0, 1690.0, 1691.0, 1692.0],      [1693.0, 1694.0, 1695.0, 1696.0],      [1697.0, 1698.0, 1699.0, 1700.0]],     [[1701.0, 1702.0, 1703.0, 1704.0],      [1705.0, 1706.0, 1707.0, 1708.0],      [1709.0, 1710.0, 1711.0, 1712.0],      [1713.0, 1714.0, 1715.0, 1716.0],      [1717.0, 1718.0, 1719.0, 1720.0]],     [[1721.0, 1722.0, 1723.0, 1724.0],      [1725.0, 1726.0, 1727.0, 1728.0],      [1729.0, 1730.0, 1731.0, 1732.0],      [1733.0, 1734.0, 1735.0, 1736.0],      [1737.0, 1738.0, 1739.0, 1740.0]],     [[1741.0, 1742.0, 1743.0, 1744.0],      [1745.0, 1746.0, 1747.0, 1748.0],      [1749.0, 1750.0, 1751.0, 1752.0],      [1753.0, 1754.0, 1755.0, 1756.0],      [1757.0, 1758.0, 1759.0, 1760.0]]],    [[[1761.0, 1762.0, 1763.0, 1764.0],      [1765.0, 1766.0, 1767.0, 1768.0],      [1769.0, 1770.0, 1771.0, 1772.0],      [1773.0, 1774.0, 1775.0, 1776.0],      [1777.0, 1778.0, 1779.0, 1780.0]],     [[1781.0, 1782.0, 1783.0, 1784.0],      [1785.0, 1786.0, 1787.0, 1788.0],      [1789.0, 1790.0, 1791.0, 1792.0],      [1793.0, 1794.0, 1795.0, 1796.0],      [1797.0, 1798.0, 1799.0, 1800.0]],     [[1801.0, 1802.0, 1803.0, 1804.0],      [1805.0, 1806.0, 1807.0, 1808.0],      [1809.0, 1810.0, 1811.0, 1812.0],      [1813.0, 1814.0, 1815.0, 1816.0],      [1817.0, 1818.0, 1819.0, 1820.0]],     [[1821.0, 1822.0, 1823.0, 1824.0],      [1825.0, 1826.0, 1827.0, 1828.0],      [1829.0, 1830.0, 1831.0, 1832.0],      [1833.0, 1834.0, 1835.0, 1836.0],      [1837.0, 1838.0, 1839.0, 1840.0]]],    [[[1841.0, 1842.0, 1843.0, 1844.0],      [1845.0, 1846.0, 1847.0, 1848.0],      [1849.0, 1850.0, 1851.0, 1852.0],      [1853.0, 1854.0, 1855.0, 1856.0],      [1857.0, 1858.0, 1859.0, 1860.0]],     [[1861.0, 1862.0, 1863.0, 1864.0],      [1865.0, 1866.0, 1867.0, 1868.0],      [1869.0, 1870.0, 1871.0, 1872.0],      [1873.0, 1874.0, 1875.0, 1876.0],      [1877.0, 1878.0, 1879.0, 1880.0]],     [[1881.0, 1882.0, 1883.0, 1884.0],      [1885.0, 1886.0, 1887.0, 1888.0],      [1889.0, 1890.0, 1891.0, 1892.0],      [1893.0, 1894.0, 1895.0, 1896.0],      [1897.0, 1898.0, 1899.0, 1900.0]],     [[1901.0, 1902.0, 1903.0, 1904.0],      [1905.0, 1906.0, 1907.0, 1908.0],      [1909.0, 1910.0, 1911.0, 1912.0],      [1913.0, 1914.0, 1915.0, 1916.0],      [1917.0, 1918.0, 1919.0, 1920.0]]],    [[[1921.0, 1922.0, 1923.0, 1924.0],      [1925.0, 1926.0, 1927.0, 1928.0],      [1929.0, 1930.0, 1931.0, 1932.0],      [1933.0, 1934.0, 1935.0, 1936.0],      [1937.0, 1938.0, 1939.0, 1940.0]],     [[1941.0, 1942.0, 1943.0, 1944.0],      [1945.0, 1946.0, 1947.0, 1948.0],      [1949.0, 1950.0, 1951.0, 1952.0],      [1953.0, 1954.0, 1955.0, 1956.0],      [1957.0, 1958.0, 1959.0, 1960.0]],     [[1961.0, 1962.0, 1963.0, 1964.0],      [1965.0, 1966.0, 1967.0, 1968.0],      [1969.0, 1970.0, 1971.0, 1972.0],      [1973.0, 1974.0, 1975.0, 1976.0],      [1977.0, 1978.0, 1979.0, 1980.0]],     [[1981.0, 1982.0, 1983.0, 1984.0],      [1985.0, 1986.0, 1987.0, 1988.0],      [1989.0, 1990.0, 1991.0, 1992.0],      [1993.0, 1994.0, 1995.0, 1996.0],      [1997.0, 1998.0, 1999.0, 2000.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]]],  [[[[[2401.0, 2402.0, 2403.0, 2404.0],      [2405.0, 2406.0, 2407.0, 2408.0],      [2409.0, 2410.0, 2411.0, 2412.0],      [2413.0, 2414.0, 2415.0, 2416.0],      [2417.0, 2418.0, 2419.0, 2420.0]],     [[2421.0, 2422.0, 2423.0, 2424.0],      [2425.0, 2426.0, 2427.0, 2428.0],      [2429.0, 2430.0, 2431.0, 2432.0],      [2433.0, 2434.0, 2435.0, 2436.0],      [2437.0, 2438.0, 2439.0, 2440.0]],     [[2441.0, 2442.0, 2443.0, 2444.0],      [2445.0, 2446.0, 2447.0, 2448.0],      [2449.0, 2450.0, 2451.0, 2452.0],      [2453.0, 2454.0, 2455.0, 2456.0],      [2457.0, 2458.0, 2459.0, 2460.0]],     [[2461.0, 2462.0, 2463.0, 2464.0],      [2465.0, 2466.0, 2467.0, 2468.0],      [2469.0, 2470.0, 2471.0, 2472.0],      [2473.0, 2474.0, 2475.0, 2476.0],      [2477.0, 2478.0, 2479.0, 2480.0]]],    [[[2481.0, 2482.0, 2483.0, 2484.0],      [2485.0, 2486.0, 2487.0, 2488.0],      [2489.0, 2490.0, 2491.0, 2492.0],      [2493.0, 2494.0, 2495.0, 2496.0],      [2497.0, 2498.0, 2499.0, 2500.0]],     [[2501.0, 2502.0, 2503.0, 2504.0],      [2505.0, 2506.0, 2507.0, 2508.0],      [2509.0, 2510.0, 2511.0, 2512.0],      [2513.0, 2514.0, 2515.0, 2516.0],      [2517.0, 2518.0, 2519.0, 2520.0]],     [[2521.0, 2522.0, 2523.0, 2524.0],      [2525.0, 2526.0, 2527.0, 2528.0],      [2529.0, 2530.0, 2531.0, 2532.0],      [2533.0, 2534.0, 2535.0, 2536.0],      [2537.0, 2538.0, 2539.0, 2540.0]],     [[2541.0, 2542.0, 2543.0, 2544.0],      [2545.0, 2546.0, 2547.0, 2548.0],      [2549.0, 2550.0, 2551.0, 2552.0],      [2553.0, 2554.0, 2555.0, 2556.0],      [2557.0, 2558.0, 2559.0, 2560.0]]],    [[[2561.0, 2562.0, 2563.0, 2564.0],      [2565.0, 2566.0, 2567.0, 2568.0],      [2569.0, 2570.0, 2571.0, 2572.0],      [2573.0, 2574.0, 2575.0, 2576.0],      [2577.0, 2578.0, 2579.0, 2580.0]],     [[2581.0, 2582.0, 2583.0, 2584.0],      [2585.0, 2586.0, 2587.0, 2588.0],      [2589.0, 2590.0, 2591.0, 2592.0],      [2593.0, 2594.0, 2595.0, 2596.0],      [2597.0, 2598.0, 2599.0, 2600.0]],     [[2601.0, 2602.0, 2603.0, 2604.0],      [2605.0, 2606.0, 2607.0, 2608.0],      [2609.0, 2610.0, 2611.0, 2612.0],      [2613.0, 2614.0, 2615.0, 2616.0],      [2617.0, 2618.0, 2619.0, 2620.0]],     [[2621.0, 2622.0, 2623.0, 2624.0],      [2625.0, 2626.0, 2627.0, 2628.0],      [2629.0, 2630.0, 2631.0, 2632.0],      [2633.0, 2634.0, 2635.0, 2636.0],      [2637.0, 2638.0, 2639.0, 2640.0]]],    [[[2641.0, 2642.0, 2643.0, 2644.0],      [2645.0, 2646.0, 2647.0, 2648.0],      [2649.0, 2650.0, 2651.0, 2652.0],      [2653.0, 2654.0, 2655.0, 2656.0],      [2657.0, 2658.0, 2659.0, 2660.0]],     [[2661.0, 2662.0, 2663.0, 2664.0],      [2665.0, 2666.0, 2667.0, 2668.0],      [2669.0, 2670.0, 2671.0, 2672.0],      [2673.0, 2674.0, 2675.0, 2676.0],      [2677.0, 2678.0, 2679.0, 2680.0]],     [[2681.0, 2682.0, 2683.0, 2684.0],      [2685.0, 2686.0, 2687.0, 2688.0],      [2689.0, 2690.0, 2691.0, 2692.0],      [2693.0, 2694.0, 2695.0, 2696.0],      [2697.0, 2698.0, 2699.0, 2700.0]],     [[2701.0, 2702.0, 2703.0, 2704.0],      [2705.0, 2706.0, 2707.0, 2708.0],      [2709.0, 2710.0, 2711.0, 2712.0],      [2713.0, 2714.0, 2715.0, 2716.0],      [2717.0, 2718.0, 2719.0, 2720.0]]],    [[[2721.0, 2722.0, 2723.0, 2724.0],      [2725.0, 2726.0, 2727.0, 2728.0],      [2729.0, 2730.0, 2731.0, 2732.0],      [2733.0, 2734.0, 2735.0, 2736.0],      [2737.0, 2738.0, 2739.0, 2740.0]],     [[2741.0, 2742.0, 2743.0, 2744.0],      [2745.0, 2746.0, 2747.0, 2748.0],      [2749.0, 2750.0, 2751.0, 2752.0],      [2753.0, 2754.0, 2755.0, 2756.0],      [2757.0, 2758.0, 2759.0, 2760.0]],     [[2761.0, 2762.0, 2763.0, 2764.0],      [2765.0, 2766.0, 2767.0, 2768.0],      [2769.0, 2770.0, 2771.0, 2772.0],      [2773.0, 2774.0, 2775.0, 2776.0],      [2777.0, 2778.0, 2779.0, 2780.0]],     [[2781.0, 2782.0, 2783.0, 2784.0],      [2785.0, 2786.0, 2787.0, 2788.0],      [2789.0, 2790.0, 2791.0, 2792.0],      [2793.0, 2794.0, 2795.0, 2796.0],      [2797.0, 2798.0, 2799.0, 2800.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]],   [[[[3601.0, 3602.0, 3603.0, 3604.0],      [3605.0, 3606.0, 3607.0, 3608.0],      [3609.0, 3610.0, 3611.0, 3612.0],      [3613.0, 3614.0, 3615.0, 3616.0],      [3617.0, 3618.0, 3619.0, 3620.0]],     [[3621.0, 3622.0, 3623.0, 3624.0],      [3625.0, 3626.0, 3627.0, 3628.0],      [3629.0, 3630.0, 3631.0, 3632.0],      [3633.0, 3634.0, 3635.0, 3636.0],      [3637.0, 3638.0, 3639.0, 3640.0]],     [[3641.0, 3642.0, 3643.0, 3644.0],      [3645.0, 3646.0, 3647.0, 3648.0],      [3649.0, 3650.0, 3651.0, 3652.0],      [3653.0, 3654.0, 3655.0, 3656.0],      [3657.0, 3658.0, 3659.0, 3660.0]],     [[3661.0, 3662.0, 3663.0, 3664.0],      [3665.0, 3666.0, 3667.0, 3668.0],      [3669.0, 3670.0, 3671.0, 3672.0],      [3673.0, 3674.0, 3675.0, 3676.0],      [3677.0, 3678.0, 3679.0, 3680.0]]],    [[[3681.0, 3682.0, 3683.0, 3684.0],      [3685.0, 3686.0, 3687.0, 3688.0],      [3689.0, 3690.0, 3691.0, 3692.0],      [3693.0, 3694.0, 3695.0, 3696.0],      [3697.0, 3698.0, 3699.0, 3700.0]],     [[3701.0, 3702.0, 3703.0, 3704.0],      [3705.0, 3706.0, 3707.0, 3708.0],      [3709.0, 3710.0, 3711.0, 3712.0],      [3713.0, 3714.0, 3715.0, 3716.0],      [3717.0, 3718.0, 3719.0, 3720.0]],     [[3721.0, 3722.0, 3723.0, 3724.0],      [3725.0, 3726.0, 3727.0, 3728.0],      [3729.0, 3730.0, 3731.0, 3732.0],      [3733.0, 3734.0, 3735.0, 3736.0],      [3737.0, 3738.0, 3739.0, 3740.0]],     [[3741.0, 3742.0, 3743.0, 3744.0],      [3745.0, 3746.0, 3747.0, 3748.0],      [3749.0, 3750.0, 3751.0, 3752.0],      [3753.0, 3754.0, 3755.0, 3756.0],      [3757.0, 3758.0, 3759.0, 3760.0]]],    [[[3761.0, 3762.0, 3763.0, 3764.0],      [3765.0, 3766.0, 3767.0, 3768.0],      [3769.0, 3770.0, 3771.0, 3772.0],      [3773.0, 3774.0, 3775.0, 3776.0],      [3777.0, 3778.0, 3779.0, 3780.0]],     [[3781.0, 3782.0, 3783.0, 3784.0],      [3785.0, 3786.0, 3787.0, 3788.0],      [3789.0, 3790.0, 3791.0, 3792.0],      [3793.0, 3794.0, 3795.0, 3796.0],      [3797.0, 3798.0, 3799.0, 3800.0]],     [[3801.0, 3802.0, 3803.0, 3804.0],      [3805.0, 3806.0, 3807.0, 3808.0],      [3809.0, 3810.0, 3811.0, 3812.0],      [3813.0, 3814.0, 3815.0, 3816.0],      [3817.0, 3818.0, 3819.0, 3820.0]],     [[3821.0, 3822.0, 3823.0, 3824.0],      [3825.0, 3826.0, 3827.0, 3828.0],      [3829.0, 3830.0, 3831.0, 3832.0],      [3833.0, 3834.0, 3835.0, 3836.0],      [3837.0, 3838.0, 3839.0, 3840.0]]],    [[[3841.0, 3842.0, 3843.0, 3844.0],      [3845.0, 3846.0, 3847.0, 3848.0],      [3849.0, 3850.0, 3851.0, 3852.0],      [3853.0, 3854.0, 3855.0, 3856.0],      [3857.0, 3858.0, 3859.0, 3860.0]],     [[3861.0, 3862.0, 3863.0, 3864.0],      [3865.0, 3866.0, 3867.0, 3868.0],      [3869.0, 3870.0, 3871.0, 3872.0],      [3873.0, 3874.0, 3875.0, 3876.0],      [3877.0, 3878.0, 3879.0, 3880.0]],     [[3881.0, 3882.0, 3883.0, 3884.0],      [3885.0, 3886.0, 3887.0, 3888.0],      [3889.0, 3890.0, 3891.0, 3892.0],      [3893.0, 3894.0, 3895.0, 3896.0],      [3897.0, 3898.0, 3899.0, 3900.0]],     [[3901.0, 3902.0, 3903.0, 3904.0],      [3905.0, 3906.0, 3907.0, 3908.0],      [3909.0, 3910.0, 3911.0, 3912.0],      [3913.0, 3914.0, 3915.0, 3916.0],      [3917.0, 3918.0, 3919.0, 3920.0]]],    [[[3921.0, 3922.0, 3923.0, 3924.0],      [3925.0, 3926.0, 3927.0, 3928.0],      [3929.0, 3930.0, 3931.0, 3932.0],      [3933.0, 3934.0, 3935.0, 3936.0],      [3937.0, 3938.0, 3939.0, 3940.0]],     [[3941.0, 3942.0, 3943.0, 3944.0],      [3945.0, 3946.0, 3947.0, 3948.0],      [3949.0, 3950.0, 3951.0, 3952.0],      [3953.0, 3954.0, 3955.0, 3956.0],      [3957.0, 3958.0, 3959.0, 3960.0]],     [[3961.0, 3962.0, 3963.0, 3964.0],      [3965.0, 3966.0, 3967.0, 3968.0],      [3969.0, 3970.0, 3971.0, 3972.0],      [3973.0, 3974.0, 3975.0, 3976.0],      [3977.0, 3978.0, 3979.0, 3980.0]],     [[3981.0, 3982.0, 3983.0, 3984.0],      [3985.0, 3986.0, 3987.0, 3988.0],      [3989.0, 3990.0, 3991.0, 3992.0],      [3993.0, 3994.0, 3995.0, 3996.0],      [3997.0, 3998.0, 3999.0, 4000.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]]],  [[[[[801.0, 802.0, 803.0, 804.0],      [805.0, 806.0, 807.0, 808.0],      [809.0, 810.0, 811.0, 812.0],      [813.0, 814.0, 815.0, 816.0],      [817.0, 818.0, 819.0, 820.0]],     [[821.0, 822.0, 823.0, 824.0],      [825.0, 826.0, 827.0, 828.0],      [829.0, 830.0, 831.0, 832.0],      [833.0, 834.0, 835.0, 836.0],      [837.0, 838.0, 839.0, 840.0]],     [[841.0, 842.0, 843.0, 844.0],      [845.0, 846.0, 847.0, 848.0],      [849.0, 850.0, 851.0, 852.0],      [853.0, 854.0, 855.0, 856.0],      [857.0, 858.0, 859.0, 860.0]],     [[861.0, 862.0, 863.0, 864.0],      [865.0, 866.0, 867.0, 868.0],      [869.0, 870.0, 871.0, 872.0],      [873.0, 874.0, 875.0, 876.0],      [877.0, 878.0, 879.0, 880.0]]],    [[[881.0, 882.0, 883.0, 884.0],      [885.0, 886.0, 887.0, 888.0],      [889.0, 890.0, 891.0, 892.0],      [893.0, 894.0, 895.0, 896.0],      [897.0, 898.0, 899.0, 900.0]],     [[901.0, 902.0, 903.0, 904.0],      [905.0, 906.0, 907.0, 908.0],      [909.0, 910.0, 911.0, 912.0],      [913.0, 914.0, 915.0, 916.0],      [917.0, 918.0, 919.0, 920.0]],     [[921.0, 922.0, 923.0, 924.0],      [925.0, 926.0, 927.0, 928.0],      [929.0, 930.0, 931.0, 932.0],      [933.0, 934.0, 935.0, 936.0],      [937.0, 938.0, 939.0, 940.0]],     [[941.0, 942.0, 943.0, 944.0],      [945.0, 946.0, 947.0, 948.0],      [949.0, 950.0, 951.0, 952.0],      [953.0, 954.0, 955.0, 956.0],      [957.0, 958.0, 959.0, 960.0]]],    [[[961.0, 962.0, 963.0, 964.0],      [965.0, 966.0, 967.0, 968.0],      [969.0, 970.0, 971.0, 972.0],      [973.0, 974.0, 975.0, 976.0],      [977.0, 978.0, 979.0, 980.0]],     [[981.0, 982.0, 983.0, 984.0],      [985.0, 986.0, 987.0, 988.0],      [989.0, 990.0, 991.0, 992.0],      [993.0, 994.0, 995.0, 996.0],      [997.0, 998.0, 999.0, 1000.0]],     [[1001.0, 1002.0, 1003.0, 1004.0],      [1005.0, 1006.0, 1007.0, 1008.0],      [1009.0, 1010.0, 1011.0, 1012.0],      [1013.0, 1014.0, 1015.0, 1016.0],      [1017.0, 1018.0, 1019.0, 1020.0]],     [[1021.0, 1022.0, 1023.0, 1024.0],      [1025.0, 1026.0, 1027.0, 1028.0],      [1029.0, 1030.0, 1031.0, 1032.0],      [1033.0, 1034.0, 1035.0, 1036.0],      [1037.0, 1038.0, 1039.0, 1040.0]]],    [[[1041.0, 1042.0, 1043.0, 1044.0],      [1045.0, 1046.0, 1047.0, 1048.0],      [1049.0, 1050.0, 1051.0, 1052.0],      [1053.0, 1054.0, 1055.0, 1056.0],      [1057.0, 1058.0, 1059.0, 1060.0]],     [[1061.0, 1062.0, 1063.0, 1064.0],      [1065.0, 1066.0, 1067.0, 1068.0],      [1069.0, 1070.0, 1071.0, 1072.0],      [1073.0, 1074.0, 1075.0, 1076.0],      [1077.0, 1078.0, 1079.0, 1080.0]],     [[1081.0, 1082.0, 1083.0, 1084.0],      [1085.0, 1086.0, 1087.0, 1088.0],      [1089.0, 1090.0, 1091.0, 1092.0],      [1093.0, 1094.0, 1095.0, 1096.0],      [1097.0, 1098.0, 1099.0, 1100.0]],     [[1101.0, 1102.0, 1103.0, 1104.0],      [1105.0, 1106.0, 1107.0, 1108.0],      [1109.0, 1110.0, 1111.0, 1112.0],      [1113.0, 1114.0, 1115.0, 1116.0],      [1117.0, 1118.0, 1119.0, 1120.0]]],    [[[1121.0, 1122.0, 1123.0, 1124.0],      [1125.0, 1126.0, 1127.0, 1128.0],      [1129.0, 1130.0, 1131.0, 1132.0],      [1133.0, 1134.0, 1135.0, 1136.0],      [1137.0, 1138.0, 1139.0, 1140.0]],     [[1141.0, 1142.0, 1143.0, 1144.0],      [1145.0, 1146.0, 1147.0, 1148.0],      [1149.0, 1150.0, 1151.0, 1152.0],      [1153.0, 1154.0, 1155.0, 1156.0],      [1157.0, 1158.0, 1159.0, 1160.0]],     [[1161.0, 1162.0, 1163.0, 1164.0],      [1165.0, 1166.0, 1167.0, 1168.0],      [1169.0, 1170.0, 1171.0, 1172.0],      [1173.0, 1174.0, 1175.0, 1176.0],      [1177.0, 1178.0, 1179.0, 1180.0]],     [[1181.0, 1182.0, 1183.0, 1184.0],      [1185.0, 1186.0, 1187.0, 1188.0],      [1189.0, 1190.0, 1191.0, 1192.0],      [1193.0, 1194.0, 1195.0, 1196.0],      [1197.0, 1198.0, 1199.0, 1200.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]],   [[[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]]],  [[[[[2801.0, 2802.0, 2803.0, 2804.0],      [2805.0, 2806.0, 2807.0, 2808.0],      [2809.0, 2810.0, 2811.0, 2812.0],      [2813.0, 2814.0, 2815.0, 2816.0],      [2817.0, 2818.0, 2819.0, 2820.0]],     [[2821.0, 2822.0, 2823.0, 2824.0],      [2825.0, 2826.0, 2827.0, 2828.0],      [2829.0, 2830.0, 2831.0, 2832.0],      [2833.0, 2834.0, 2835.0, 2836.0],      [2837.0, 2838.0, 2839.0, 2840.0]],     [[2841.0, 2842.0, 2843.0, 2844.0],      [2845.0, 2846.0, 2847.0, 2848.0],      [2849.0, 2850.0, 2851.0, 2852.0],      [2853.0, 2854.0, 2855.0, 2856.0],      [2857.0, 2858.0, 2859.0, 2860.0]],     [[2861.0, 2862.0, 2863.0, 2864.0],      [2865.0, 2866.0, 2867.0, 2868.0],      [2869.0, 2870.0, 2871.0, 2872.0],      [2873.0, 2874.0, 2875.0, 2876.0],      [2877.0, 2878.0, 2879.0, 2880.0]]],    [[[2881.0, 2882.0, 2883.0, 2884.0],      [2885.0, 2886.0, 2887.0, 2888.0],      [2889.0, 2890.0, 2891.0, 2892.0],      [2893.0, 2894.0, 2895.0, 2896.0],      [2897.0, 2898.0, 2899.0, 2900.0]],     [[2901.0, 2902.0, 2903.0, 2904.0],      [2905.0, 2906.0, 2907.0, 2908.0],      [2909.0, 2910.0, 2911.0, 2912.0],      [2913.0, 2914.0, 2915.0, 2916.0],      [2917.0, 2918.0, 2919.0, 2920.0]],     [[2921.0, 2922.0, 2923.0, 2924.0],      [2925.0, 2926.0, 2927.0, 2928.0],      [2929.0, 2930.0, 2931.0, 2932.0],      [2933.0, 2934.0, 2935.0, 2936.0],      [2937.0, 2938.0, 2939.0, 2940.0]],     [[2941.0, 2942.0, 2943.0, 2944.0],      [2945.0, 2946.0, 2947.0, 2948.0],      [2949.0, 2950.0, 2951.0, 2952.0],      [2953.0, 2954.0, 2955.0, 2956.0],      [2957.0, 2958.0, 2959.0, 2960.0]]],    [[[2961.0, 2962.0, 2963.0, 2964.0],      [2965.0, 2966.0, 2967.0, 2968.0],      [2969.0, 2970.0, 2971.0, 2972.0],      [2973.0, 2974.0, 2975.0, 2976.0],      [2977.0, 2978.0, 2979.0, 2980.0]],     [[2981.0, 2982.0, 2983.0, 2984.0],      [2985.0, 2986.0, 2987.0, 2988.0],      [2989.0, 2990.0, 2991.0, 2992.0],      [2993.0, 2994.0, 2995.0, 2996.0],      [2997.0, 2998.0, 2999.0, 3000.0]],     [[3001.0, 3002.0, 3003.0, 3004.0],      [3005.0, 3006.0, 3007.0, 3008.0],      [3009.0, 3010.0, 3011.0, 3012.0],      [3013.0, 3014.0, 3015.0, 3016.0],      [3017.0, 3018.0, 3019.0, 3020.0]],     [[3021.0, 3022.0, 3023.0, 3024.0],      [3025.0, 3026.0, 3027.0, 3028.0],      [3029.0, 3030.0, 3031.0, 3032.0],      [3033.0, 3034.0, 3035.0, 3036.0],      [3037.0, 3038.0, 3039.0, 3040.0]]],    [[[3041.0, 3042.0, 3043.0, 3044.0],      [3045.0, 3046.0, 3047.0, 3048.0],      [3049.0, 3050.0, 3051.0, 3052.0],      [3053.0, 3054.0, 3055.0, 3056.0],      [3057.0, 3058.0, 3059.0, 3060.0]],     [[3061.0, 3062.0, 3063.0, 3064.0],      [3065.0, 3066.0, 3067.0, 3068.0],      [3069.0, 3070.0, 3071.0, 3072.0],      [3073.0, 3074.0, 3075.0, 3076.0],      [3077.0, 3078.0, 3079.0, 3080.0]],     [[3081.0, 3082.0, 3083.0, 3084.0],      [3085.0, 3086.0, 3087.0, 3088.0],      [3089.0, 3090.0, 3091.0, 3092.0],      [3093.0, 3094.0, 3095.0, 3096.0],      [3097.0, 3098.0, 3099.0, 3100.0]],     [[3101.0, 3102.0, 3103.0, 3104.0],      [3105.0, 3106.0, 3107.0, 3108.0],      [3109.0, 3110.0, 3111.0, 3112.0],      [3113.0, 3114.0, 3115.0, 3116.0],      [3117.0, 3118.0, 3119.0, 3120.0]]],    [[[3121.0, 3122.0, 3123.0, 3124.0],      [3125.0, 3126.0, 3127.0, 3128.0],      [3129.0, 3130.0, 3131.0, 3132.0],      [3133.0, 3134.0, 3135.0, 3136.0],      [3137.0, 3138.0, 3139.0, 3140.0]],     [[3141.0, 3142.0, 3143.0, 3144.0],      [3145.0, 3146.0, 3147.0, 3148.0],      [3149.0, 3150.0, 3151.0, 3152.0],      [3153.0, 3154.0, 3155.0, 3156.0],      [3157.0, 3158.0, 3159.0, 3160.0]],     [[3161.0, 3162.0, 3163.0, 3164.0],      [3165.0, 3166.0, 3167.0, 3168.0],      [3169.0, 3170.0, 3171.0, 3172.0],      [3173.0, 3174.0, 3175.0, 3176.0],      [3177.0, 3178.0, 3179.0, 3180.0]],     [[3181.0, 3182.0, 3183.0, 3184.0],      [3185.0, 3186.0, 3187.0, 3188.0],      [3189.0, 3190.0, 3191.0, 3192.0],      [3193.0, 3194.0, 3195.0, 3196.0],      [3197.0, 3198.0, 3199.0, 3200.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]],   [[[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]],    [[[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]],     [[0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0],      [0.0, 0.0, 0.0, 0.0]]]]]] shape=[6, 2, 6, 4, 5, 4], strides=[960, 480, 80, 20, 4, 1], layout=C (0x1)), I32([3, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[0, 1],  [0, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 949314028 2364610026 3159385384 4282123370 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],      [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]],     [[15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],      [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0]],     [[29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],      [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],     [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],      [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0]],     [[57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],      [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0]]],    [[[71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],      [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],     [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],      [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0]],     [[99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],      [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0]],     [[113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],      [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0]],     [[127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],      [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0]]],    [[[141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],      [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0]],     [[155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],      [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],     [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],      [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0]],     [[183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],      [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0]],     [[197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],      [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]]],    [[[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],      [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0]],     [[225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],      [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0]],     [[239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],      [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]],     [[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],      [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0]],     [[267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],      [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0]]],    [[[281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],      [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0]],     [[295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],      [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0]],     [[309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],      [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0]],     [[323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],      [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],     [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],      [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0]]],    [[[351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],      [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0]],     [[365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],      [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0]],     [[379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],      [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0]],     [[393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],      [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0]],     [[407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],      [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]]]],   [[[[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],      [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0]],     [[435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],      [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0]],     [[449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],      [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],     [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],      [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0]],     [[477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],      [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0]]],    [[[491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],      [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]],     [[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],      [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0]],     [[519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],      [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0]],     [[533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],      [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],     [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],      [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0]]],    [[[561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],      [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0]],     [[575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],      [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]],     [[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],      [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0]],     [[603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],      [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0]],     [[617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],      [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]]],    [[[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],      [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0]],     [[645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],      [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0]],     [[659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],      [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],     [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],      [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0]],     [[687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],      [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0]]],    [[[701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],      [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0]],     [[715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],      [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0]],     [[729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],      [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0]],     [[743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],      [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]],     [[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],      [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0]]],    [[[771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],      [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0]],     [[785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],      [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0]],     [[799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],      [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0]],     [[813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],      [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0]],     [[827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],      [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]]]],   [[[[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],      [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0]],     [[855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],      [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0]],     [[869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],      [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0]],     [[883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],      [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0]],     [[897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],      [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0]]],    [[[911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],      [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],     [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],      [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0]],     [[939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],      [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0]],     [[953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],      [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0]],     [[967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],      [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0]]],    [[[981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],      [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0]],     [[995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],      [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]],     [[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],      [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0]],     [[1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],      [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0]],     [[1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],      [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]],    [[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],      [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0]],     [[1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],      [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0]],     [[1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],      [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],     [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],      [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0]],     [[1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],      [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0]]],    [[[1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],      [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0]],     [[1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],      [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0]],     [[1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],      [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0]],     [[1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],      [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]],     [[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],      [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0]]],    [[[1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],      [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0]],     [[1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],      [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0]],     [[1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],      [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0]],     [[1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],      [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0]],     [[1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],      [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]]],   [[[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],      [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0]],     [[1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],      [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0]],     [[1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0],      [1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0]],     [[1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],      [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0]],     [[1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],      [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0]]],    [[[1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],      [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]],     [[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],      [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0]],     [[1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0],      [1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0]],     [[1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],      [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0]],     [[1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],      [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0]]],    [[[1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],      [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0]],     [[1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],      [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]],     [[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0],      [1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0]],     [[1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],      [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0]],     [[1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],      [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]]],    [[[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],      [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0]],     [[1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],      [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0]],     [[1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0],      [1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]],     [[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],      [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0]],     [[1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],      [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0]]],    [[[1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],      [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0]],     [[1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],      [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0]],     [[1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0],      [1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0]],     [[1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],      [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]],     [[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],      [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0]]],    [[[1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],      [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0]],     [[1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],      [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0]],     [[1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0],      [1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0]],     [[1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],      [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0]],     [[1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],      [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]]]],   [[[[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],      [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0]],     [[1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],      [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0]],     [[1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0],      [1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0]],     [[1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],      [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0]],     [[1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],      [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0]]],    [[[1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],      [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]],     [[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],      [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0]],     [[1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0],      [1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0]],     [[1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],      [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0]],     [[1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],      [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0]]],    [[[1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],      [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0]],     [[1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],      [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]],     [[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0],      [1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0]],     [[1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],      [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0]],     [[1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],      [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]]],    [[[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],      [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0]],     [[1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],      [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0]],     [[1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0],      [1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]],     [[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],      [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0]],     [[1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],      [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0]]],    [[[1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0],      [1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0]],     [[1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0, 1981.0],      [1982.0, 1983.0, 1984.0, 1985.0, 1986.0, 1987.0, 1988.0]],     [[1989.0, 1990.0, 1991.0, 1992.0, 1993.0, 1994.0, 1995.0],      [1996.0, 1997.0, 1998.0, 1999.0, 2000.0, 2001.0, 2002.0]],     [[2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0],      [2010.0, 2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]],     [[2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0, 2023.0],      [2024.0, 2025.0, 2026.0, 2027.0, 2028.0, 2029.0, 2030.0]]],    [[[2031.0, 2032.0, 2033.0, 2034.0, 2035.0, 2036.0, 2037.0],      [2038.0, 2039.0, 2040.0, 2041.0, 2042.0, 2043.0, 2044.0]],     [[2045.0, 2046.0, 2047.0, 2048.0, 2049.0, 2050.0, 2051.0],      [2052.0, 2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0]],     [[2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0, 2065.0],      [2066.0, 2067.0, 2068.0, 2069.0, 2070.0, 2071.0, 2072.0]],     [[2073.0, 2074.0, 2075.0, 2076.0, 2077.0, 2078.0, 2079.0],      [2080.0, 2081.0, 2082.0, 2083.0, 2084.0, 2085.0, 2086.0]],     [[2087.0, 2088.0, 2089.0, 2090.0, 2091.0, 2092.0, 2093.0],      [2094.0, 2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]]]],   [[[[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0, 2107.0],      [2108.0, 2109.0, 2110.0, 2111.0, 2112.0, 2113.0, 2114.0]],     [[2115.0, 2116.0, 2117.0, 2118.0, 2119.0, 2120.0, 2121.0],      [2122.0, 2123.0, 2124.0, 2125.0, 2126.0, 2127.0, 2128.0]],     [[2129.0, 2130.0, 2131.0, 2132.0, 2133.0, 2134.0, 2135.0],      [2136.0, 2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0]],     [[2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0, 2149.0],      [2150.0, 2151.0, 2152.0, 2153.0, 2154.0, 2155.0, 2156.0]],     [[2157.0, 2158.0, 2159.0, 2160.0, 2161.0, 2162.0, 2163.0],      [2164.0, 2165.0, 2166.0, 2167.0, 2168.0, 2169.0, 2170.0]]],    [[[2171.0, 2172.0, 2173.0, 2174.0, 2175.0, 2176.0, 2177.0],      [2178.0, 2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0]],     [[2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0, 2191.0],      [2192.0, 2193.0, 2194.0, 2195.0, 2196.0, 2197.0, 2198.0]],     [[2199.0, 2200.0, 2201.0, 2202.0, 2203.0, 2204.0, 2205.0],      [2206.0, 2207.0, 2208.0, 2209.0, 2210.0, 2211.0, 2212.0]],     [[2213.0, 2214.0, 2215.0, 2216.0, 2217.0, 2218.0, 2219.0],      [2220.0, 2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0]],     [[2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0, 2233.0],      [2234.0, 2235.0, 2236.0, 2237.0, 2238.0, 2239.0, 2240.0]]],    [[[2241.0, 2242.0, 2243.0, 2244.0, 2245.0, 2246.0, 2247.0],      [2248.0, 2249.0, 2250.0, 2251.0, 2252.0, 2253.0, 2254.0]],     [[2255.0, 2256.0, 2257.0, 2258.0, 2259.0, 2260.0, 2261.0],      [2262.0, 2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0]],     [[2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0, 2275.0],      [2276.0, 2277.0, 2278.0, 2279.0, 2280.0, 2281.0, 2282.0]],     [[2283.0, 2284.0, 2285.0, 2286.0, 2287.0, 2288.0, 2289.0],      [2290.0, 2291.0, 2292.0, 2293.0, 2294.0, 2295.0, 2296.0]],     [[2297.0, 2298.0, 2299.0, 2300.0, 2301.0, 2302.0, 2303.0],      [2304.0, 2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]]],    [[[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0, 2317.0],      [2318.0, 2319.0, 2320.0, 2321.0, 2322.0, 2323.0, 2324.0]],     [[2325.0, 2326.0, 2327.0, 2328.0, 2329.0, 2330.0, 2331.0],      [2332.0, 2333.0, 2334.0, 2335.0, 2336.0, 2337.0, 2338.0]],     [[2339.0, 2340.0, 2341.0, 2342.0, 2343.0, 2344.0, 2345.0],      [2346.0, 2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0]],     [[2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0, 2359.0],      [2360.0, 2361.0, 2362.0, 2363.0, 2364.0, 2365.0, 2366.0]],     [[2367.0, 2368.0, 2369.0, 2370.0, 2371.0, 2372.0, 2373.0],      [2374.0, 2375.0, 2376.0, 2377.0, 2378.0, 2379.0, 2380.0]]],    [[[2381.0, 2382.0, 2383.0, 2384.0, 2385.0, 2386.0, 2387.0],      [2388.0, 2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0]],     [[2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0, 2401.0],      [2402.0, 2403.0, 2404.0, 2405.0, 2406.0, 2407.0, 2408.0]],     [[2409.0, 2410.0, 2411.0, 2412.0, 2413.0, 2414.0, 2415.0],      [2416.0, 2417.0, 2418.0, 2419.0, 2420.0, 2421.0, 2422.0]],     [[2423.0, 2424.0, 2425.0, 2426.0, 2427.0, 2428.0, 2429.0],      [2430.0, 2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0]],     [[2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0, 2443.0],      [2444.0, 2445.0, 2446.0, 2447.0, 2448.0, 2449.0, 2450.0]]],    [[[2451.0, 2452.0, 2453.0, 2454.0, 2455.0, 2456.0, 2457.0],      [2458.0, 2459.0, 2460.0, 2461.0, 2462.0, 2463.0, 2464.0]],     [[2465.0, 2466.0, 2467.0, 2468.0, 2469.0, 2470.0, 2471.0],      [2472.0, 2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0]],     [[2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0, 2485.0],      [2486.0, 2487.0, 2488.0, 2489.0, 2490.0, 2491.0, 2492.0]],     [[2493.0, 2494.0, 2495.0, 2496.0, 2497.0, 2498.0, 2499.0],      [2500.0, 2501.0, 2502.0, 2503.0, 2504.0, 2505.0, 2506.0]],     [[2507.0, 2508.0, 2509.0, 2510.0, 2511.0, 2512.0, 2513.0],      [2514.0, 2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]]]],   [[[[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0, 2527.0],      [2528.0, 2529.0, 2530.0, 2531.0, 2532.0, 2533.0, 2534.0]],     [[2535.0, 2536.0, 2537.0, 2538.0, 2539.0, 2540.0, 2541.0],      [2542.0, 2543.0, 2544.0, 2545.0, 2546.0, 2547.0, 2548.0]],     [[2549.0, 2550.0, 2551.0, 2552.0, 2553.0, 2554.0, 2555.0],      [2556.0, 2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0]],     [[2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0, 2569.0],      [2570.0, 2571.0, 2572.0, 2573.0, 2574.0, 2575.0, 2576.0]],     [[2577.0, 2578.0, 2579.0, 2580.0, 2581.0, 2582.0, 2583.0],      [2584.0, 2585.0, 2586.0, 2587.0, 2588.0, 2589.0, 2590.0]]],    [[[2591.0, 2592.0, 2593.0, 2594.0, 2595.0, 2596.0, 2597.0],      [2598.0, 2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0]],     [[2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0, 2611.0],      [2612.0, 2613.0, 2614.0, 2615.0, 2616.0, 2617.0, 2618.0]],     [[2619.0, 2620.0, 2621.0, 2622.0, 2623.0, 2624.0, 2625.0],      [2626.0, 2627.0, 2628.0, 2629.0, 2630.0, 2631.0, 2632.0]],     [[2633.0, 2634.0, 2635.0, 2636.0, 2637.0, 2638.0, 2639.0],      [2640.0, 2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0]],     [[2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0, 2653.0],      [2654.0, 2655.0, 2656.0, 2657.0, 2658.0, 2659.0, 2660.0]]],    [[[2661.0, 2662.0, 2663.0, 2664.0, 2665.0, 2666.0, 2667.0],      [2668.0, 2669.0, 2670.0, 2671.0, 2672.0, 2673.0, 2674.0]],     [[2675.0, 2676.0, 2677.0, 2678.0, 2679.0, 2680.0, 2681.0],      [2682.0, 2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0]],     [[2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0, 2695.0],      [2696.0, 2697.0, 2698.0, 2699.0, 2700.0, 2701.0, 2702.0]],     [[2703.0, 2704.0, 2705.0, 2706.0, 2707.0, 2708.0, 2709.0],      [2710.0, 2711.0, 2712.0, 2713.0, 2714.0, 2715.0, 2716.0]],     [[2717.0, 2718.0, 2719.0, 2720.0, 2721.0, 2722.0, 2723.0],      [2724.0, 2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]]],    [[[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0, 2737.0],      [2738.0, 2739.0, 2740.0, 2741.0, 2742.0, 2743.0, 2744.0]],     [[2745.0, 2746.0, 2747.0, 2748.0, 2749.0, 2750.0, 2751.0],      [2752.0, 2753.0, 2754.0, 2755.0, 2756.0, 2757.0, 2758.0]],     [[2759.0, 2760.0, 2761.0, 2762.0, 2763.0, 2764.0, 2765.0],      [2766.0, 2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0]],     [[2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0, 2779.0],      [2780.0, 2781.0, 2782.0, 2783.0, 2784.0, 2785.0, 2786.0]],     [[2787.0, 2788.0, 2789.0, 2790.0, 2791.0, 2792.0, 2793.0],      [2794.0, 2795.0, 2796.0, 2797.0, 2798.0, 2799.0, 2800.0]]],    [[[2801.0, 2802.0, 2803.0, 2804.0, 2805.0, 2806.0, 2807.0],      [2808.0, 2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0]],     [[2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0, 2821.0],      [2822.0, 2823.0, 2824.0, 2825.0, 2826.0, 2827.0, 2828.0]],     [[2829.0, 2830.0, 2831.0, 2832.0, 2833.0, 2834.0, 2835.0],      [2836.0, 2837.0, 2838.0, 2839.0, 2840.0, 2841.0, 2842.0]],     [[2843.0, 2844.0, 2845.0, 2846.0, 2847.0, 2848.0, 2849.0],      [2850.0, 2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0]],     [[2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0, 2863.0],      [2864.0, 2865.0, 2866.0, 2867.0, 2868.0, 2869.0, 2870.0]]],    [[[2871.0, 2872.0, 2873.0, 2874.0, 2875.0, 2876.0, 2877.0],      [2878.0, 2879.0, 2880.0, 2881.0, 2882.0, 2883.0, 2884.0]],     [[2885.0, 2886.0, 2887.0, 2888.0, 2889.0, 2890.0, 2891.0],      [2892.0, 2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0]],     [[2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0, 2905.0],      [2906.0, 2907.0, 2908.0, 2909.0, 2910.0, 2911.0, 2912.0]],     [[2913.0, 2914.0, 2915.0, 2916.0, 2917.0, 2918.0, 2919.0],      [2920.0, 2921.0, 2922.0, 2923.0, 2924.0, 2925.0, 2926.0]],     [[2927.0, 2928.0, 2929.0, 2930.0, 2931.0, 2932.0, 2933.0],      [2934.0, 2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]]]]],  [[[[[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0, 2947.0],      [2948.0, 2949.0, 2950.0, 2951.0, 2952.0, 2953.0, 2954.0]],     [[2955.0, 2956.0, 2957.0, 2958.0, 2959.0, 2960.0, 2961.0],      [2962.0, 2963.0, 2964.0, 2965.0, 2966.0, 2967.0, 2968.0]],     [[2969.0, 2970.0, 2971.0, 2972.0, 2973.0, 2974.0, 2975.0],      [2976.0, 2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0]],     [[2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0, 2989.0],      [2990.0, 2991.0, 2992.0, 2993.0, 2994.0, 2995.0, 2996.0]],     [[2997.0, 2998.0, 2999.0, 3000.0, 3001.0, 3002.0, 3003.0],      [3004.0, 3005.0, 3006.0, 3007.0, 3008.0, 3009.0, 3010.0]]],    [[[3011.0, 3012.0, 3013.0, 3014.0, 3015.0, 3016.0, 3017.0],      [3018.0, 3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0]],     [[3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0, 3031.0],      [3032.0, 3033.0, 3034.0, 3035.0, 3036.0, 3037.0, 3038.0]],     [[3039.0, 3040.0, 3041.0, 3042.0, 3043.0, 3044.0, 3045.0],      [3046.0, 3047.0, 3048.0, 3049.0, 3050.0, 3051.0, 3052.0]],     [[3053.0, 3054.0, 3055.0, 3056.0, 3057.0, 3058.0, 3059.0],      [3060.0, 3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0]],     [[3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0, 3073.0],      [3074.0, 3075.0, 3076.0, 3077.0, 3078.0, 3079.0, 3080.0]]],    [[[3081.0, 3082.0, 3083.0, 3084.0, 3085.0, 3086.0, 3087.0],      [3088.0, 3089.0, 3090.0, 3091.0, 3092.0, 3093.0, 3094.0]],     [[3095.0, 3096.0, 3097.0, 3098.0, 3099.0, 3100.0, 3101.0],      [3102.0, 3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0]],     [[3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0, 3115.0],      [3116.0, 3117.0, 3118.0, 3119.0, 3120.0, 3121.0, 3122.0]],     [[3123.0, 3124.0, 3125.0, 3126.0, 3127.0, 3128.0, 3129.0],      [3130.0, 3131.0, 3132.0, 3133.0, 3134.0, 3135.0, 3136.0]],     [[3137.0, 3138.0, 3139.0, 3140.0, 3141.0, 3142.0, 3143.0],      [3144.0, 3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]]],    [[[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0, 3157.0],      [3158.0, 3159.0, 3160.0, 3161.0, 3162.0, 3163.0, 3164.0]],     [[3165.0, 3166.0, 3167.0, 3168.0, 3169.0, 3170.0, 3171.0],      [3172.0, 3173.0, 3174.0, 3175.0, 3176.0, 3177.0, 3178.0]],     [[3179.0, 3180.0, 3181.0, 3182.0, 3183.0, 3184.0, 3185.0],      [3186.0, 3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0]],     [[3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0, 3199.0],      [3200.0, 3201.0, 3202.0, 3203.0, 3204.0, 3205.0, 3206.0]],     [[3207.0, 3208.0, 3209.0, 3210.0, 3211.0, 3212.0, 3213.0],      [3214.0, 3215.0, 3216.0, 3217.0, 3218.0, 3219.0, 3220.0]]],    [[[3221.0, 3222.0, 3223.0, 3224.0, 3225.0, 3226.0, 3227.0],      [3228.0, 3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0]],     [[3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0, 3241.0],      [3242.0, 3243.0, 3244.0, 3245.0, 3246.0, 3247.0, 3248.0]],     [[3249.0, 3250.0, 3251.0, 3252.0, 3253.0, 3254.0, 3255.0],      [3256.0, 3257.0, 3258.0, 3259.0, 3260.0, 3261.0, 3262.0]],     [[3263.0, 3264.0, 3265.0, 3266.0, 3267.0, 3268.0, 3269.0],      [3270.0, 3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0]],     [[3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0, 3283.0],      [3284.0, 3285.0, 3286.0, 3287.0, 3288.0, 3289.0, 3290.0]]],    [[[3291.0, 3292.0, 3293.0, 3294.0, 3295.0, 3296.0, 3297.0],      [3298.0, 3299.0, 3300.0, 3301.0, 3302.0, 3303.0, 3304.0]],     [[3305.0, 3306.0, 3307.0, 3308.0, 3309.0, 3310.0, 3311.0],      [3312.0, 3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0]],     [[3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0, 3325.0],      [3326.0, 3327.0, 3328.0, 3329.0, 3330.0, 3331.0, 3332.0]],     [[3333.0, 3334.0, 3335.0, 3336.0, 3337.0, 3338.0, 3339.0],      [3340.0, 3341.0, 3342.0, 3343.0, 3344.0, 3345.0, 3346.0]],     [[3347.0, 3348.0, 3349.0, 3350.0, 3351.0, 3352.0, 3353.0],      [3354.0, 3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]]]],   [[[[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0, 3367.0],      [3368.0, 3369.0, 3370.0, 3371.0, 3372.0, 3373.0, 3374.0]],     [[3375.0, 3376.0, 3377.0, 3378.0, 3379.0, 3380.0, 3381.0],      [3382.0, 3383.0, 3384.0, 3385.0, 3386.0, 3387.0, 3388.0]],     [[3389.0, 3390.0, 3391.0, 3392.0, 3393.0, 3394.0, 3395.0],      [3396.0, 3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0]],     [[3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0, 3409.0],      [3410.0, 3411.0, 3412.0, 3413.0, 3414.0, 3415.0, 3416.0]],     [[3417.0, 3418.0, 3419.0, 3420.0, 3421.0, 3422.0, 3423.0],      [3424.0, 3425.0, 3426.0, 3427.0, 3428.0, 3429.0, 3430.0]]],    [[[3431.0, 3432.0, 3433.0, 3434.0, 3435.0, 3436.0, 3437.0],      [3438.0, 3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0]],     [[3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0, 3451.0],      [3452.0, 3453.0, 3454.0, 3455.0, 3456.0, 3457.0, 3458.0]],     [[3459.0, 3460.0, 3461.0, 3462.0, 3463.0, 3464.0, 3465.0],      [3466.0, 3467.0, 3468.0, 3469.0, 3470.0, 3471.0, 3472.0]],     [[3473.0, 3474.0, 3475.0, 3476.0, 3477.0, 3478.0, 3479.0],      [3480.0, 3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0]],     [[3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0, 3493.0],      [3494.0, 3495.0, 3496.0, 3497.0, 3498.0, 3499.0, 3500.0]]],    [[[3501.0, 3502.0, 3503.0, 3504.0, 3505.0, 3506.0, 3507.0],      [3508.0, 3509.0, 3510.0, 3511.0, 3512.0, 3513.0, 3514.0]],     [[3515.0, 3516.0, 3517.0, 3518.0, 3519.0, 3520.0, 3521.0],      [3522.0, 3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0]],     [[3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0, 3535.0],      [3536.0, 3537.0, 3538.0, 3539.0, 3540.0, 3541.0, 3542.0]],     [[3543.0, 3544.0, 3545.0, 3546.0, 3547.0, 3548.0, 3549.0],      [3550.0, 3551.0, 3552.0, 3553.0, 3554.0, 3555.0, 3556.0]],     [[3557.0, 3558.0, 3559.0, 3560.0, 3561.0, 3562.0, 3563.0],      [3564.0, 3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]]],    [[[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0, 3577.0],      [3578.0, 3579.0, 3580.0, 3581.0, 3582.0, 3583.0, 3584.0]],     [[3585.0, 3586.0, 3587.0, 3588.0, 3589.0, 3590.0, 3591.0],      [3592.0, 3593.0, 3594.0, 3595.0, 3596.0, 3597.0, 3598.0]],     [[3599.0, 3600.0, 3601.0, 3602.0, 3603.0, 3604.0, 3605.0],      [3606.0, 3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0]],     [[3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0, 3619.0],      [3620.0, 3621.0, 3622.0, 3623.0, 3624.0, 3625.0, 3626.0]],     [[3627.0, 3628.0, 3629.0, 3630.0, 3631.0, 3632.0, 3633.0],      [3634.0, 3635.0, 3636.0, 3637.0, 3638.0, 3639.0, 3640.0]]],    [[[3641.0, 3642.0, 3643.0, 3644.0, 3645.0, 3646.0, 3647.0],      [3648.0, 3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0]],     [[3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0, 3661.0],      [3662.0, 3663.0, 3664.0, 3665.0, 3666.0, 3667.0, 3668.0]],     [[3669.0, 3670.0, 3671.0, 3672.0, 3673.0, 3674.0, 3675.0],      [3676.0, 3677.0, 3678.0, 3679.0, 3680.0, 3681.0, 3682.0]],     [[3683.0, 3684.0, 3685.0, 3686.0, 3687.0, 3688.0, 3689.0],      [3690.0, 3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0]],     [[3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0, 3703.0],      [3704.0, 3705.0, 3706.0, 3707.0, 3708.0, 3709.0, 3710.0]]],    [[[3711.0, 3712.0, 3713.0, 3714.0, 3715.0, 3716.0, 3717.0],      [3718.0, 3719.0, 3720.0, 3721.0, 3722.0, 3723.0, 3724.0]],     [[3725.0, 3726.0, 3727.0, 3728.0, 3729.0, 3730.0, 3731.0],      [3732.0, 3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0]],     [[3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0, 3745.0],      [3746.0, 3747.0, 3748.0, 3749.0, 3750.0, 3751.0, 3752.0]],     [[3753.0, 3754.0, 3755.0, 3756.0, 3757.0, 3758.0, 3759.0],      [3760.0, 3761.0, 3762.0, 3763.0, 3764.0, 3765.0, 3766.0]],     [[3767.0, 3768.0, 3769.0, 3770.0, 3771.0, 3772.0, 3773.0],      [3774.0, 3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]]]],   [[[[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0, 3787.0],      [3788.0, 3789.0, 3790.0, 3791.0, 3792.0, 3793.0, 3794.0]],     [[3795.0, 3796.0, 3797.0, 3798.0, 3799.0, 3800.0, 3801.0],      [3802.0, 3803.0, 3804.0, 3805.0, 3806.0, 3807.0, 3808.0]],     [[3809.0, 3810.0, 3811.0, 3812.0, 3813.0, 3814.0, 3815.0],      [3816.0, 3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0]],     [[3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0, 3829.0],      [3830.0, 3831.0, 3832.0, 3833.0, 3834.0, 3835.0, 3836.0]],     [[3837.0, 3838.0, 3839.0, 3840.0, 3841.0, 3842.0, 3843.0],      [3844.0, 3845.0, 3846.0, 3847.0, 3848.0, 3849.0, 3850.0]]],    [[[3851.0, 3852.0, 3853.0, 3854.0, 3855.0, 3856.0, 3857.0],      [3858.0, 3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0]],     [[3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0, 3871.0],      [3872.0, 3873.0, 3874.0, 3875.0, 3876.0, 3877.0, 3878.0]],     [[3879.0, 3880.0, 3881.0, 3882.0, 3883.0, 3884.0, 3885.0],      [3886.0, 3887.0, 3888.0, 3889.0, 3890.0, 3891.0, 3892.0]],     [[3893.0, 3894.0, 3895.0, 3896.0, 3897.0, 3898.0, 3899.0],      [3900.0, 3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0]],     [[3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0, 3913.0],      [3914.0, 3915.0, 3916.0, 3917.0, 3918.0, 3919.0, 3920.0]]],    [[[3921.0, 3922.0, 3923.0, 3924.0, 3925.0, 3926.0, 3927.0],      [3928.0, 3929.0, 3930.0, 3931.0, 3932.0, 3933.0, 3934.0]],     [[3935.0, 3936.0, 3937.0, 3938.0, 3939.0, 3940.0, 3941.0],      [3942.0, 3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0]],     [[3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0, 3955.0],      [3956.0, 3957.0, 3958.0, 3959.0, 3960.0, 3961.0, 3962.0]],     [[3963.0, 3964.0, 3965.0, 3966.0, 3967.0, 3968.0, 3969.0],      [3970.0, 3971.0, 3972.0, 3973.0, 3974.0, 3975.0, 3976.0]],     [[3977.0, 3978.0, 3979.0, 3980.0, 3981.0, 3982.0, 3983.0],      [3984.0, 3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]]],    [[[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0, 3997.0],      [3998.0, 3999.0, 4000.0, 4001.0, 4002.0, 4003.0, 4004.0]],     [[4005.0, 4006.0, 4007.0, 4008.0, 4009.0, 4010.0, 4011.0],      [4012.0, 4013.0, 4014.0, 4015.0, 4016.0, 4017.0, 4018.0]],     [[4019.0, 4020.0, 4021.0, 4022.0, 4023.0, 4024.0, 4025.0],      [4026.0, 4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0]],     [[4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0, 4039.0],      [4040.0, 4041.0, 4042.0, 4043.0, 4044.0, 4045.0, 4046.0]],     [[4047.0, 4048.0, 4049.0, 4050.0, 4051.0, 4052.0, 4053.0],      [4054.0, 4055.0, 4056.0, 4057.0, 4058.0, 4059.0, 4060.0]]],    [[[4061.0, 4062.0, 4063.0, 4064.0, 4065.0, 4066.0, 4067.0],      [4068.0, 4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0]],     [[4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0, 4081.0],      [4082.0, 4083.0, 4084.0, 4085.0, 4086.0, 4087.0, 4088.0]],     [[4089.0, 4090.0, 4091.0, 4092.0, 4093.0, 4094.0, 4095.0],      [4096.0, 4097.0, 4098.0, 4099.0, 4100.0, 4101.0, 4102.0]],     [[4103.0, 4104.0, 4105.0, 4106.0, 4107.0, 4108.0, 4109.0],      [4110.0, 4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0]],     [[4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0, 4123.0],      [4124.0, 4125.0, 4126.0, 4127.0, 4128.0, 4129.0, 4130.0]]],    [[[4131.0, 4132.0, 4133.0, 4134.0, 4135.0, 4136.0, 4137.0],      [4138.0, 4139.0, 4140.0, 4141.0, 4142.0, 4143.0, 4144.0]],     [[4145.0, 4146.0, 4147.0, 4148.0, 4149.0, 4150.0, 4151.0],      [4152.0, 4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0]],     [[4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0, 4165.0],      [4166.0, 4167.0, 4168.0, 4169.0, 4170.0, 4171.0, 4172.0]],     [[4173.0, 4174.0, 4175.0, 4176.0, 4177.0, 4178.0, 4179.0],      [4180.0, 4181.0, 4182.0, 4183.0, 4184.0, 4185.0, 4186.0]],     [[4187.0, 4188.0, 4189.0, 4190.0, 4191.0, 4192.0, 4193.0],      [4194.0, 4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]]]],   [[[[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0, 4207.0],      [4208.0, 4209.0, 4210.0, 4211.0, 4212.0, 4213.0, 4214.0]],     [[4215.0, 4216.0, 4217.0, 4218.0, 4219.0, 4220.0, 4221.0],      [4222.0, 4223.0, 4224.0, 4225.0, 4226.0, 4227.0, 4228.0]],     [[4229.0, 4230.0, 4231.0, 4232.0, 4233.0, 4234.0, 4235.0],      [4236.0, 4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0]],     [[4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0, 4249.0],      [4250.0, 4251.0, 4252.0, 4253.0, 4254.0, 4255.0, 4256.0]],     [[4257.0, 4258.0, 4259.0, 4260.0, 4261.0, 4262.0, 4263.0],      [4264.0, 4265.0, 4266.0, 4267.0, 4268.0, 4269.0, 4270.0]]],    [[[4271.0, 4272.0, 4273.0, 4274.0, 4275.0, 4276.0, 4277.0],      [4278.0, 4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0]],     [[4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0, 4291.0],      [4292.0, 4293.0, 4294.0, 4295.0, 4296.0, 4297.0, 4298.0]],     [[4299.0, 4300.0, 4301.0, 4302.0, 4303.0, 4304.0, 4305.0],      [4306.0, 4307.0, 4308.0, 4309.0, 4310.0, 4311.0, 4312.0]],     [[4313.0, 4314.0, 4315.0, 4316.0, 4317.0, 4318.0, 4319.0],      [4320.0, 4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0]],     [[4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0, 4333.0],      [4334.0, 4335.0, 4336.0, 4337.0, 4338.0, 4339.0, 4340.0]]],    [[[4341.0, 4342.0, 4343.0, 4344.0, 4345.0, 4346.0, 4347.0],      [4348.0, 4349.0, 4350.0, 4351.0, 4352.0, 4353.0, 4354.0]],     [[4355.0, 4356.0, 4357.0, 4358.0, 4359.0, 4360.0, 4361.0],      [4362.0, 4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0]],     [[4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0, 4375.0],      [4376.0, 4377.0, 4378.0, 4379.0, 4380.0, 4381.0, 4382.0]],     [[4383.0, 4384.0, 4385.0, 4386.0, 4387.0, 4388.0, 4389.0],      [4390.0, 4391.0, 4392.0, 4393.0, 4394.0, 4395.0, 4396.0]],     [[4397.0, 4398.0, 4399.0, 4400.0, 4401.0, 4402.0, 4403.0],      [4404.0, 4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]]],    [[[4411.0, 4412.0, 4413.0, 4414.0, 4415.0, 4416.0, 4417.0],      [4418.0, 4419.0, 4420.0, 4421.0, 4422.0, 4423.0, 4424.0]],     [[4425.0, 4426.0, 4427.0, 4428.0, 4429.0, 4430.0, 4431.0],      [4432.0, 4433.0, 4434.0, 4435.0, 4436.0, 4437.0, 4438.0]],     [[4439.0, 4440.0, 4441.0, 4442.0, 4443.0, 4444.0, 4445.0],      [4446.0, 4447.0, 4448.0, 4449.0, 4450.0, 4451.0, 4452.0]],     [[4453.0, 4454.0, 4455.0, 4456.0, 4457.0, 4458.0, 4459.0],      [4460.0, 4461.0, 4462.0, 4463.0, 4464.0, 4465.0, 4466.0]],     [[4467.0, 4468.0, 4469.0, 4470.0, 4471.0, 4472.0, 4473.0],      [4474.0, 4475.0, 4476.0, 4477.0, 4478.0, 4479.0, 4480.0]]],    [[[4481.0, 4482.0, 4483.0, 4484.0, 4485.0, 4486.0, 4487.0],      [4488.0, 4489.0, 4490.0, 4491.0, 4492.0, 4493.0, 4494.0]],     [[4495.0, 4496.0, 4497.0, 4498.0, 4499.0, 4500.0, 4501.0],      [4502.0, 4503.0, 4504.0, 4505.0, 4506.0, 4507.0, 4508.0]],     [[4509.0, 4510.0, 4511.0, 4512.0, 4513.0, 4514.0, 4515.0],      [4516.0, 4517.0, 4518.0, 4519.0, 4520.0, 4521.0, 4522.0]],     [[4523.0, 4524.0, 4525.0, 4526.0, 4527.0, 4528.0, 4529.0],      [4530.0, 4531.0, 4532.0, 4533.0, 4534.0, 4535.0, 4536.0]],     [[4537.0, 4538.0, 4539.0, 4540.0, 4541.0, 4542.0, 4543.0],      [4544.0, 4545.0, 4546.0, 4547.0, 4548.0, 4549.0, 4550.0]]],    [[[4551.0, 4552.0, 4553.0, 4554.0, 4555.0, 4556.0, 4557.0],      [4558.0, 4559.0, 4560.0, 4561.0, 4562.0, 4563.0, 4564.0]],     [[4565.0, 4566.0, 4567.0, 4568.0, 4569.0, 4570.0, 4571.0],      [4572.0, 4573.0, 4574.0, 4575.0, 4576.0, 4577.0, 4578.0]],     [[4579.0, 4580.0, 4581.0, 4582.0, 4583.0, 4584.0, 4585.0],      [4586.0, 4587.0, 4588.0, 4589.0, 4590.0, 4591.0, 4592.0]],     [[4593.0, 4594.0, 4595.0, 4596.0, 4597.0, 4598.0, 4599.0],      [4600.0, 4601.0, 4602.0, 4603.0, 4604.0, 4605.0, 4606.0]],     [[4607.0, 4608.0, 4609.0, 4610.0, 4611.0, 4612.0, 4613.0],      [4614.0, 4615.0, 4616.0, 4617.0, 4618.0, 4619.0, 4620.0]]]],   [[[[4621.0, 4622.0, 4623.0, 4624.0, 4625.0, 4626.0, 4627.0],      [4628.0, 4629.0, 4630.0, 4631.0, 4632.0, 4633.0, 4634.0]],     [[4635.0, 4636.0, 4637.0, 4638.0, 4639.0, 4640.0, 4641.0],      [4642.0, 4643.0, 4644.0, 4645.0, 4646.0, 4647.0, 4648.0]],     [[4649.0, 4650.0, 4651.0, 4652.0, 4653.0, 4654.0, 4655.0],      [4656.0, 4657.0, 4658.0, 4659.0, 4660.0, 4661.0, 4662.0]],     [[4663.0, 4664.0, 4665.0, 4666.0, 4667.0, 4668.0, 4669.0],      [4670.0, 4671.0, 4672.0, 4673.0, 4674.0, 4675.0, 4676.0]],     [[4677.0, 4678.0, 4679.0, 4680.0, 4681.0, 4682.0, 4683.0],      [4684.0, 4685.0, 4686.0, 4687.0, 4688.0, 4689.0, 4690.0]]],    [[[4691.0, 4692.0, 4693.0, 4694.0, 4695.0, 4696.0, 4697.0],      [4698.0, 4699.0, 4700.0, 4701.0, 4702.0, 4703.0, 4704.0]],     [[4705.0, 4706.0, 4707.0, 4708.0, 4709.0, 4710.0, 4711.0],      [4712.0, 4713.0, 4714.0, 4715.0, 4716.0, 4717.0, 4718.0]],     [[4719.0, 4720.0, 4721.0, 4722.0, 4723.0, 4724.0, 4725.0],      [4726.0, 4727.0, 4728.0, 4729.0, 4730.0, 4731.0, 4732.0]],     [[4733.0, 4734.0, 4735.0, 4736.0, 4737.0, 4738.0, 4739.0],      [4740.0, 4741.0, 4742.0, 4743.0, 4744.0, 4745.0, 4746.0]],     [[4747.0, 4748.0, 4749.0, 4750.0, 4751.0, 4752.0, 4753.0],      [4754.0, 4755.0, 4756.0, 4757.0, 4758.0, 4759.0, 4760.0]]],    [[[4761.0, 4762.0, 4763.0, 4764.0, 4765.0, 4766.0, 4767.0],      [4768.0, 4769.0, 4770.0, 4771.0, 4772.0, 4773.0, 4774.0]],     [[4775.0, 4776.0, 4777.0, 4778.0, 4779.0, 4780.0, 4781.0],      [4782.0, 4783.0, 4784.0, 4785.0, 4786.0, 4787.0, 4788.0]],     [[4789.0, 4790.0, 4791.0, 4792.0, 4793.0, 4794.0, 4795.0],      [4796.0, 4797.0, 4798.0, 4799.0, 4800.0, 4801.0, 4802.0]],     [[4803.0, 4804.0, 4805.0, 4806.0, 4807.0, 4808.0, 4809.0],      [4810.0, 4811.0, 4812.0, 4813.0, 4814.0, 4815.0, 4816.0]],     [[4817.0, 4818.0, 4819.0, 4820.0, 4821.0, 4822.0, 4823.0],      [4824.0, 4825.0, 4826.0, 4827.0, 4828.0, 4829.0, 4830.0]]],    [[[4831.0, 4832.0, 4833.0, 4834.0, 4835.0, 4836.0, 4837.0],      [4838.0, 4839.0, 4840.0, 4841.0, 4842.0, 4843.0, 4844.0]],     [[4845.0, 4846.0, 4847.0, 4848.0, 4849.0, 4850.0, 4851.0],      [4852.0, 4853.0, 4854.0, 4855.0, 4856.0, 4857.0, 4858.0]],     [[4859.0, 4860.0, 4861.0, 4862.0, 4863.0, 4864.0, 4865.0],      [4866.0, 4867.0, 4868.0, 4869.0, 4870.0, 4871.0, 4872.0]],     [[4873.0, 4874.0, 4875.0, 4876.0, 4877.0, 4878.0, 4879.0],      [4880.0, 4881.0, 4882.0, 4883.0, 4884.0, 4885.0, 4886.0]],     [[4887.0, 4888.0, 4889.0, 4890.0, 4891.0, 4892.0, 4893.0],      [4894.0, 4895.0, 4896.0, 4897.0, 4898.0, 4899.0, 4900.0]]],    [[[4901.0, 4902.0, 4903.0, 4904.0, 4905.0, 4906.0, 4907.0],      [4908.0, 4909.0, 4910.0, 4911.0, 4912.0, 4913.0, 4914.0]],     [[4915.0, 4916.0, 4917.0, 4918.0, 4919.0, 4920.0, 4921.0],      [4922.0, 4923.0, 4924.0, 4925.0, 4926.0, 4927.0, 4928.0]],     [[4929.0, 4930.0, 4931.0, 4932.0, 4933.0, 4934.0, 4935.0],      [4936.0, 4937.0, 4938.0, 4939.0, 4940.0, 4941.0, 4942.0]],     [[4943.0, 4944.0, 4945.0, 4946.0, 4947.0, 4948.0, 4949.0],      [4950.0, 4951.0, 4952.0, 4953.0, 4954.0, 4955.0, 4956.0]],     [[4957.0, 4958.0, 4959.0, 4960.0, 4961.0, 4962.0, 4963.0],      [4964.0, 4965.0, 4966.0, 4967.0, 4968.0, 4969.0, 4970.0]]],    [[[4971.0, 4972.0, 4973.0, 4974.0, 4975.0, 4976.0, 4977.0],      [4978.0, 4979.0, 4980.0, 4981.0, 4982.0, 4983.0, 4984.0]],     [[4985.0, 4986.0, 4987.0, 4988.0, 4989.0, 4990.0, 4991.0],      [4992.0, 4993.0, 4994.0, 4995.0, 4996.0, 4997.0, 4998.0]],     [[4999.0, 5000.0, 5001.0, 5002.0, 5003.0, 5004.0, 5005.0],      [5006.0, 5007.0, 5008.0, 5009.0, 5010.0, 5011.0, 5012.0]],     [[5013.0, 5014.0, 5015.0, 5016.0, 5017.0, 5018.0, 5019.0],      [5020.0, 5021.0, 5022.0, 5023.0, 5024.0, 5025.0, 5026.0]],     [[5027.0, 5028.0, 5029.0, 5030.0, 5031.0, 5032.0, 5033.0],      [5034.0, 5035.0, 5036.0, 5037.0, 5038.0, 5039.0, 5040.0]]]],   [[[[5041.0, 5042.0, 5043.0, 5044.0, 5045.0, 5046.0, 5047.0],      [5048.0, 5049.0, 5050.0, 5051.0, 5052.0, 5053.0, 5054.0]],     [[5055.0, 5056.0, 5057.0, 5058.0, 5059.0, 5060.0, 5061.0],      [5062.0, 5063.0, 5064.0, 5065.0, 5066.0, 5067.0, 5068.0]],     [[5069.0, 5070.0, 5071.0, 5072.0, 5073.0, 5074.0, 5075.0],      [5076.0, 5077.0, 5078.0, 5079.0, 5080.0, 5081.0, 5082.0]],     [[5083.0, 5084.0, 5085.0, 5086.0, 5087.0, 5088.0, 5089.0],      [5090.0, 5091.0, 5092.0, 5093.0, 5094.0, 5095.0, 5096.0]],     [[5097.0, 5098.0, 5099.0, 5100.0, 5101.0, 5102.0, 5103.0],      [5104.0, 5105.0, 5106.0, 5107.0, 5108.0, 5109.0, 5110.0]]],    [[[5111.0, 5112.0, 5113.0, 5114.0, 5115.0, 5116.0, 5117.0],      [5118.0, 5119.0, 5120.0, 5121.0, 5122.0, 5123.0, 5124.0]],     [[5125.0, 5126.0, 5127.0, 5128.0, 5129.0, 5130.0, 5131.0],      [5132.0, 5133.0, 5134.0, 5135.0, 5136.0, 5137.0, 5138.0]],     [[5139.0, 5140.0, 5141.0, 5142.0, 5143.0, 5144.0, 5145.0],      [5146.0, 5147.0, 5148.0, 5149.0, 5150.0, 5151.0, 5152.0]],     [[5153.0, 5154.0, 5155.0, 5156.0, 5157.0, 5158.0, 5159.0],      [5160.0, 5161.0, 5162.0, 5163.0, 5164.0, 5165.0, 5166.0]],     [[5167.0, 5168.0, 5169.0, 5170.0, 5171.0, 5172.0, 5173.0],      [5174.0, 5175.0, 5176.0, 5177.0, 5178.0, 5179.0, 5180.0]]],    [[[5181.0, 5182.0, 5183.0, 5184.0, 5185.0, 5186.0, 5187.0],      [5188.0, 5189.0, 5190.0, 5191.0, 5192.0, 5193.0, 5194.0]],     [[5195.0, 5196.0, 5197.0, 5198.0, 5199.0, 5200.0, 5201.0],      [5202.0, 5203.0, 5204.0, 5205.0, 5206.0, 5207.0, 5208.0]],     [[5209.0, 5210.0, 5211.0, 5212.0, 5213.0, 5214.0, 5215.0],      [5216.0, 5217.0, 5218.0, 5219.0, 5220.0, 5221.0, 5222.0]],     [[5223.0, 5224.0, 5225.0, 5226.0, 5227.0, 5228.0, 5229.0],      [5230.0, 5231.0, 5232.0, 5233.0, 5234.0, 5235.0, 5236.0]],     [[5237.0, 5238.0, 5239.0, 5240.0, 5241.0, 5242.0, 5243.0],      [5244.0, 5245.0, 5246.0, 5247.0, 5248.0, 5249.0, 5250.0]]],    [[[5251.0, 5252.0, 5253.0, 5254.0, 5255.0, 5256.0, 5257.0],      [5258.0, 5259.0, 5260.0, 5261.0, 5262.0, 5263.0, 5264.0]],     [[5265.0, 5266.0, 5267.0, 5268.0, 5269.0, 5270.0, 5271.0],      [5272.0, 5273.0, 5274.0, 5275.0, 5276.0, 5277.0, 5278.0]],     [[5279.0, 5280.0, 5281.0, 5282.0, 5283.0, 5284.0, 5285.0],      [5286.0, 5287.0, 5288.0, 5289.0, 5290.0, 5291.0, 5292.0]],     [[5293.0, 5294.0, 5295.0, 5296.0, 5297.0, 5298.0, 5299.0],      [5300.0, 5301.0, 5302.0, 5303.0, 5304.0, 5305.0, 5306.0]],     [[5307.0, 5308.0, 5309.0, 5310.0, 5311.0, 5312.0, 5313.0],      [5314.0, 5315.0, 5316.0, 5317.0, 5318.0, 5319.0, 5320.0]]],    [[[5321.0, 5322.0, 5323.0, 5324.0, 5325.0, 5326.0, 5327.0],      [5328.0, 5329.0, 5330.0, 5331.0, 5332.0, 5333.0, 5334.0]],     [[5335.0, 5336.0, 5337.0, 5338.0, 5339.0, 5340.0, 5341.0],      [5342.0, 5343.0, 5344.0, 5345.0, 5346.0, 5347.0, 5348.0]],     [[5349.0, 5350.0, 5351.0, 5352.0, 5353.0, 5354.0, 5355.0],      [5356.0, 5357.0, 5358.0, 5359.0, 5360.0, 5361.0, 5362.0]],     [[5363.0, 5364.0, 5365.0, 5366.0, 5367.0, 5368.0, 5369.0],      [5370.0, 5371.0, 5372.0, 5373.0, 5374.0, 5375.0, 5376.0]],     [[5377.0, 5378.0, 5379.0, 5380.0, 5381.0, 5382.0, 5383.0],      [5384.0, 5385.0, 5386.0, 5387.0, 5388.0, 5389.0, 5390.0]]],    [[[5391.0, 5392.0, 5393.0, 5394.0, 5395.0, 5396.0, 5397.0],      [5398.0, 5399.0, 5400.0, 5401.0, 5402.0, 5403.0, 5404.0]],     [[5405.0, 5406.0, 5407.0, 5408.0, 5409.0, 5410.0, 5411.0],      [5412.0, 5413.0, 5414.0, 5415.0, 5416.0, 5417.0, 5418.0]],     [[5419.0, 5420.0, 5421.0, 5422.0, 5423.0, 5424.0, 5425.0],      [5426.0, 5427.0, 5428.0, 5429.0, 5430.0, 5431.0, 5432.0]],     [[5433.0, 5434.0, 5435.0, 5436.0, 5437.0, 5438.0, 5439.0],      [5440.0, 5441.0, 5442.0, 5443.0, 5444.0, 5445.0, 5446.0]],     [[5447.0, 5448.0, 5449.0, 5450.0, 5451.0, 5452.0, 5453.0],      [5454.0, 5455.0, 5456.0, 5457.0, 5458.0, 5459.0, 5460.0]]]],   [[[[5461.0, 5462.0, 5463.0, 5464.0, 5465.0, 5466.0, 5467.0],      [5468.0, 5469.0, 5470.0, 5471.0, 5472.0, 5473.0, 5474.0]],     [[5475.0, 5476.0, 5477.0, 5478.0, 5479.0, 5480.0, 5481.0],      [5482.0, 5483.0, 5484.0, 5485.0, 5486.0, 5487.0, 5488.0]],     [[5489.0, 5490.0, 5491.0, 5492.0, 5493.0, 5494.0, 5495.0],      [5496.0, 5497.0, 5498.0, 5499.0, 5500.0, 5501.0, 5502.0]],     [[5503.0, 5504.0, 5505.0, 5506.0, 5507.0, 5508.0, 5509.0],      [5510.0, 5511.0, 5512.0, 5513.0, 5514.0, 5515.0, 5516.0]],     [[5517.0, 5518.0, 5519.0, 5520.0, 5521.0, 5522.0, 5523.0],      [5524.0, 5525.0, 5526.0, 5527.0, 5528.0, 5529.0, 5530.0]]],    [[[5531.0, 5532.0, 5533.0, 5534.0, 5535.0, 5536.0, 5537.0],      [5538.0, 5539.0, 5540.0, 5541.0, 5542.0, 5543.0, 5544.0]],     [[5545.0, 5546.0, 5547.0, 5548.0, 5549.0, 5550.0, 5551.0],      [5552.0, 5553.0, 5554.0, 5555.0, 5556.0, 5557.0, 5558.0]],     [[5559.0, 5560.0, 5561.0, 5562.0, 5563.0, 5564.0, 5565.0],      [5566.0, 5567.0, 5568.0, 5569.0, 5570.0, 5571.0, 5572.0]],     [[5573.0, 5574.0, 5575.0, 5576.0, 5577.0, 5578.0, 5579.0],      [5580.0, 5581.0, 5582.0, 5583.0, 5584.0, 5585.0, 5586.0]],     [[5587.0, 5588.0, 5589.0, 5590.0, 5591.0, 5592.0, 5593.0],      [5594.0, 5595.0, 5596.0, 5597.0, 5598.0, 5599.0, 5600.0]]],    [[[5601.0, 5602.0, 5603.0, 5604.0, 5605.0, 5606.0, 5607.0],      [5608.0, 5609.0, 5610.0, 5611.0, 5612.0, 5613.0, 5614.0]],     [[5615.0, 5616.0, 5617.0, 5618.0, 5619.0, 5620.0, 5621.0],      [5622.0, 5623.0, 5624.0, 5625.0, 5626.0, 5627.0, 5628.0]],     [[5629.0, 5630.0, 5631.0, 5632.0, 5633.0, 5634.0, 5635.0],      [5636.0, 5637.0, 5638.0, 5639.0, 5640.0, 5641.0, 5642.0]],     [[5643.0, 5644.0, 5645.0, 5646.0, 5647.0, 5648.0, 5649.0],      [5650.0, 5651.0, 5652.0, 5653.0, 5654.0, 5655.0, 5656.0]],     [[5657.0, 5658.0, 5659.0, 5660.0, 5661.0, 5662.0, 5663.0],      [5664.0, 5665.0, 5666.0, 5667.0, 5668.0, 5669.0, 5670.0]]],    [[[5671.0, 5672.0, 5673.0, 5674.0, 5675.0, 5676.0, 5677.0],      [5678.0, 5679.0, 5680.0, 5681.0, 5682.0, 5683.0, 5684.0]],     [[5685.0, 5686.0, 5687.0, 5688.0, 5689.0, 5690.0, 5691.0],      [5692.0, 5693.0, 5694.0, 5695.0, 5696.0, 5697.0, 5698.0]],     [[5699.0, 5700.0, 5701.0, 5702.0, 5703.0, 5704.0, 5705.0],      [5706.0, 5707.0, 5708.0, 5709.0, 5710.0, 5711.0, 5712.0]],     [[5713.0, 5714.0, 5715.0, 5716.0, 5717.0, 5718.0, 5719.0],      [5720.0, 5721.0, 5722.0, 5723.0, 5724.0, 5725.0, 5726.0]],     [[5727.0, 5728.0, 5729.0, 5730.0, 5731.0, 5732.0, 5733.0],      [5734.0, 5735.0, 5736.0, 5737.0, 5738.0, 5739.0, 5740.0]]],    [[[5741.0, 5742.0, 5743.0, 5744.0, 5745.0, 5746.0, 5747.0],      [5748.0, 5749.0, 5750.0, 5751.0, 5752.0, 5753.0, 5754.0]],     [[5755.0, 5756.0, 5757.0, 5758.0, 5759.0, 5760.0, 5761.0],      [5762.0, 5763.0, 5764.0, 5765.0, 5766.0, 5767.0, 5768.0]],     [[5769.0, 5770.0, 5771.0, 5772.0, 5773.0, 5774.0, 5775.0],      [5776.0, 5777.0, 5778.0, 5779.0, 5780.0, 5781.0, 5782.0]],     [[5783.0, 5784.0, 5785.0, 5786.0, 5787.0, 5788.0, 5789.0],      [5790.0, 5791.0, 5792.0, 5793.0, 5794.0, 5795.0, 5796.0]],     [[5797.0, 5798.0, 5799.0, 5800.0, 5801.0, 5802.0, 5803.0],      [5804.0, 5805.0, 5806.0, 5807.0, 5808.0, 5809.0, 5810.0]]],    [[[5811.0, 5812.0, 5813.0, 5814.0, 5815.0, 5816.0, 5817.0],      [5818.0, 5819.0, 5820.0, 5821.0, 5822.0, 5823.0, 5824.0]],     [[5825.0, 5826.0, 5827.0, 5828.0, 5829.0, 5830.0, 5831.0],      [5832.0, 5833.0, 5834.0, 5835.0, 5836.0, 5837.0, 5838.0]],     [[5839.0, 5840.0, 5841.0, 5842.0, 5843.0, 5844.0, 5845.0],      [5846.0, 5847.0, 5848.0, 5849.0, 5850.0, 5851.0, 5852.0]],     [[5853.0, 5854.0, 5855.0, 5856.0, 5857.0, 5858.0, 5859.0],      [5860.0, 5861.0, 5862.0, 5863.0, 5864.0, 5865.0, 5866.0]],     [[5867.0, 5868.0, 5869.0, 5870.0, 5871.0, 5872.0, 5873.0],      [5874.0, 5875.0, 5876.0, 5877.0, 5878.0, 5879.0, 5880.0]]]]]] shape=[2, 7, 6, 5, 2, 7], strides=[2940, 420, 70, 14, 7, 1], layout=C (0x1)), I32([1, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [0, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 2514446536 3654250445 3180802555 3889266564 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],     [7.0, 8.0, 9.0, 10.0, 11.0, 12.0]],    [[13.0, 14.0, 15.0, 16.0, 17.0, 18.0],     [19.0, 20.0, 21.0, 22.0, 23.0, 24.0]],    [[25.0, 26.0, 27.0, 28.0, 29.0, 30.0],     [31.0, 32.0, 33.0, 34.0, 35.0, 36.0]],    [[37.0, 38.0, 39.0, 40.0, 41.0, 42.0],     [43.0, 44.0, 45.0, 46.0, 47.0, 48.0]],    [[49.0, 50.0, 51.0, 52.0, 53.0, 54.0],     [55.0, 56.0, 57.0, 58.0, 59.0, 60.0]]],   [[[61.0, 62.0, 63.0, 64.0, 65.0, 66.0],     [67.0, 68.0, 69.0, 70.0, 71.0, 72.0]],    [[73.0, 74.0, 75.0, 76.0, 77.0, 78.0],     [79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],    [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0],     [91.0, 92.0, 93.0, 94.0, 95.0, 96.0]],    [[97.0, 98.0, 99.0, 100.0, 101.0, 102.0],     [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]],    [[109.0, 110.0, 111.0, 112.0, 113.0, 114.0],     [115.0, 116.0, 117.0, 118.0, 119.0, 120.0]]]],  [[[[121.0, 122.0, 123.0, 124.0, 125.0, 126.0],     [127.0, 128.0, 129.0, 130.0, 131.0, 132.0]],    [[133.0, 134.0, 135.0, 136.0, 137.0, 138.0],     [139.0, 140.0, 141.0, 142.0, 143.0, 144.0]],    [[145.0, 146.0, 147.0, 148.0, 149.0, 150.0],     [151.0, 152.0, 153.0, 154.0, 155.0, 156.0]],    [[157.0, 158.0, 159.0, 160.0, 161.0, 162.0],     [163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],    [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0],     [175.0, 176.0, 177.0, 178.0, 179.0, 180.0]]],   [[[181.0, 182.0, 183.0, 184.0, 185.0, 186.0],     [187.0, 188.0, 189.0, 190.0, 191.0, 192.0]],    [[193.0, 194.0, 195.0, 196.0, 197.0, 198.0],     [199.0, 200.0, 201.0, 202.0, 203.0, 204.0]],    [[205.0, 206.0, 207.0, 208.0, 209.0, 210.0],     [211.0, 212.0, 213.0, 214.0, 215.0, 216.0]],    [[217.0, 218.0, 219.0, 220.0, 221.0, 222.0],     [223.0, 224.0, 225.0, 226.0, 227.0, 228.0]],    [[229.0, 230.0, 231.0, 232.0, 233.0, 234.0],     [235.0, 236.0, 237.0, 238.0, 239.0, 240.0]]]]] shape=[2, 2, 5, 2, 6], strides=[120, 60, 12, 6, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[1, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 1399071358 3728216013 2021915633 924837630 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0],     [2.0],     [3.0],     [4.0]],    [[5.0],     [6.0],     [7.0],     [8.0]],    [[9.0],     [10.0],     [11.0],     [12.0]],    [[13.0],     [14.0],     [15.0],     [16.0]]]],  [[[[17.0],     [18.0],     [19.0],     [20.0]],    [[21.0],     [22.0],     [23.0],     [24.0]],    [[25.0],     [26.0],     [27.0],     [28.0]],    [[29.0],     [30.0],     [31.0],     [32.0]]]],  [[[[33.0],     [34.0],     [35.0],     [36.0]],    [[37.0],     [38.0],     [39.0],     [40.0]],    [[41.0],     [42.0],     [43.0],     [44.0]],    [[45.0],     [46.0],     [47.0],     [48.0]]]]] shape=[3, 1, 4, 4, 1], strides=[16, 16, 4, 1, 1], layout=C (0x1)), I32([1, 3, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[1, 1],  [2, 3],  [2, 2]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 1239969046 1165753001 2616270403 2760672970 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0, 3.0, 4.0],       [5.0, 6.0, 7.0, 8.0]],      [[9.0, 10.0, 11.0, 12.0],       [13.0, 14.0, 15.0, 16.0]]],     [[[17.0, 18.0, 19.0, 20.0],       [21.0, 22.0, 23.0, 24.0]],      [[25.0, 26.0, 27.0, 28.0],       [29.0, 30.0, 31.0, 32.0]]],     [[[33.0, 34.0, 35.0, 36.0],       [37.0, 38.0, 39.0, 40.0]],      [[41.0, 42.0, 43.0, 44.0],       [45.0, 46.0, 47.0, 48.0]]]],    [[[[49.0, 50.0, 51.0, 52.0],       [53.0, 54.0, 55.0, 56.0]],      [[57.0, 58.0, 59.0, 60.0],       [61.0, 62.0, 63.0, 64.0]]],     [[[65.0, 66.0, 67.0, 68.0],       [69.0, 70.0, 71.0, 72.0]],      [[73.0, 74.0, 75.0, 76.0],       [77.0, 78.0, 79.0, 80.0]]],     [[[81.0, 82.0, 83.0, 84.0],       [85.0, 86.0, 87.0, 88.0]],      [[89.0, 90.0, 91.0, 92.0],       [93.0, 94.0, 95.0, 96.0]]]],    [[[[97.0, 98.0, 99.0, 100.0],       [101.0, 102.0, 103.0, 104.0]],      [[105.0, 106.0, 107.0, 108.0],       [109.0, 110.0, 111.0, 112.0]]],     [[[113.0, 114.0, 115.0, 116.0],       [117.0, 118.0, 119.0, 120.0]],      [[121.0, 122.0, 123.0, 124.0],       [125.0, 126.0, 127.0, 128.0]]],     [[[129.0, 130.0, 131.0, 132.0],       [133.0, 134.0, 135.0, 136.0]],      [[137.0, 138.0, 139.0, 140.0],       [141.0, 142.0, 143.0, 144.0]]]],    [[[[145.0, 146.0, 147.0, 148.0],       [149.0, 150.0, 151.0, 152.0]],      [[153.0, 154.0, 155.0, 156.0],       [157.0, 158.0, 159.0, 160.0]]],     [[[161.0, 162.0, 163.0, 164.0],       [165.0, 166.0, 167.0, 168.0]],      [[169.0, 170.0, 171.0, 172.0],       [173.0, 174.0, 175.0, 176.0]]],     [[[177.0, 178.0, 179.0, 180.0],       [181.0, 182.0, 183.0, 184.0]],      [[185.0, 186.0, 187.0, 188.0],       [189.0, 190.0, 191.0, 192.0]]]],    [[[[193.0, 194.0, 195.0, 196.0],       [197.0, 198.0, 199.0, 200.0]],      [[201.0, 202.0, 203.0, 204.0],       [205.0, 206.0, 207.0, 208.0]]],     [[[209.0, 210.0, 211.0, 212.0],       [213.0, 214.0, 215.0, 216.0]],      [[217.0, 218.0, 219.0, 220.0],       [221.0, 222.0, 223.0, 224.0]]],     [[[225.0, 226.0, 227.0, 228.0],       [229.0, 230.0, 231.0, 232.0]],      [[233.0, 234.0, 235.0, 236.0],       [237.0, 238.0, 239.0, 240.0]]]]],   [[[[[241.0, 242.0, 243.0, 244.0],       [245.0, 246.0, 247.0, 248.0]],      [[249.0, 250.0, 251.0, 252.0],       [253.0, 254.0, 255.0, 256.0]]],     [[[257.0, 258.0, 259.0, 260.0],       [261.0, 262.0, 263.0, 264.0]],      [[265.0, 266.0, 267.0, 268.0],       [269.0, 270.0, 271.0, 272.0]]],     [[[273.0, 274.0, 275.0, 276.0],       [277.0, 278.0, 279.0, 280.0]],      [[281.0, 282.0, 283.0, 284.0],       [285.0, 286.0, 287.0, 288.0]]]],    [[[[289.0, 290.0, 291.0, 292.0],       [293.0, 294.0, 295.0, 296.0]],      [[297.0, 298.0, 299.0, 300.0],       [301.0, 302.0, 303.0, 304.0]]],     [[[305.0, 306.0, 307.0, 308.0],       [309.0, 310.0, 311.0, 312.0]],      [[313.0, 314.0, 315.0, 316.0],       [317.0, 318.0, 319.0, 320.0]]],     [[[321.0, 322.0, 323.0, 324.0],       [325.0, 326.0, 327.0, 328.0]],      [[329.0, 330.0, 331.0, 332.0],       [333.0, 334.0, 335.0, 336.0]]]],    [[[[337.0, 338.0, 339.0, 340.0],       [341.0, 342.0, 343.0, 344.0]],      [[345.0, 346.0, 347.0, 348.0],       [349.0, 350.0, 351.0, 352.0]]],     [[[353.0, 354.0, 355.0, 356.0],       [357.0, 358.0, 359.0, 360.0]],      [[361.0, 362.0, 363.0, 364.0],       [365.0, 366.0, 367.0, 368.0]]],     [[[369.0, 370.0, 371.0, 372.0],       [373.0, 374.0, 375.0, 376.0]],      [[377.0, 378.0, 379.0, 380.0],       [381.0, 382.0, 383.0, 384.0]]]],    [[[[385.0, 386.0, 387.0, 388.0],       [389.0, 390.0, 391.0, 392.0]],      [[393.0, 394.0, 395.0, 396.0],       [397.0, 398.0, 399.0, 400.0]]],     [[[401.0, 402.0, 403.0, 404.0],       [405.0, 406.0, 407.0, 408.0]],      [[409.0, 410.0, 411.0, 412.0],       [413.0, 414.0, 415.0, 416.0]]],     [[[417.0, 418.0, 419.0, 420.0],       [421.0, 422.0, 423.0, 424.0]],      [[425.0, 426.0, 427.0, 428.0],       [429.0, 430.0, 431.0, 432.0]]]],    [[[[433.0, 434.0, 435.0, 436.0],       [437.0, 438.0, 439.0, 440.0]],      [[441.0, 442.0, 443.0, 444.0],       [445.0, 446.0, 447.0, 448.0]]],     [[[449.0, 450.0, 451.0, 452.0],       [453.0, 454.0, 455.0, 456.0]],      [[457.0, 458.0, 459.0, 460.0],       [461.0, 462.0, 463.0, 464.0]]],     [[[465.0, 466.0, 467.0, 468.0],       [469.0, 470.0, 471.0, 472.0]],      [[473.0, 474.0, 475.0, 476.0],       [477.0, 478.0, 479.0, 480.0]]]]],   [[[[[481.0, 482.0, 483.0, 484.0],       [485.0, 486.0, 487.0, 488.0]],      [[489.0, 490.0, 491.0, 492.0],       [493.0, 494.0, 495.0, 496.0]]],     [[[497.0, 498.0, 499.0, 500.0],       [501.0, 502.0, 503.0, 504.0]],      [[505.0, 506.0, 507.0, 508.0],       [509.0, 510.0, 511.0, 512.0]]],     [[[513.0, 514.0, 515.0, 516.0],       [517.0, 518.0, 519.0, 520.0]],      [[521.0, 522.0, 523.0, 524.0],       [525.0, 526.0, 527.0, 528.0]]]],    [[[[529.0, 530.0, 531.0, 532.0],       [533.0, 534.0, 535.0, 536.0]],      [[537.0, 538.0, 539.0, 540.0],       [541.0, 542.0, 543.0, 544.0]]],     [[[545.0, 546.0, 547.0, 548.0],       [549.0, 550.0, 551.0, 552.0]],      [[553.0, 554.0, 555.0, 556.0],       [557.0, 558.0, 559.0, 560.0]]],     [[[561.0, 562.0, 563.0, 564.0],       [565.0, 566.0, 567.0, 568.0]],      [[569.0, 570.0, 571.0, 572.0],       [573.0, 574.0, 575.0, 576.0]]]],    [[[[577.0, 578.0, 579.0, 580.0],       [581.0, 582.0, 583.0, 584.0]],      [[585.0, 586.0, 587.0, 588.0],       [589.0, 590.0, 591.0, 592.0]]],     [[[593.0, 594.0, 595.0, 596.0],       [597.0, 598.0, 599.0, 600.0]],      [[601.0, 602.0, 603.0, 604.0],       [605.0, 606.0, 607.0, 608.0]]],     [[[609.0, 610.0, 611.0, 612.0],       [613.0, 614.0, 615.0, 616.0]],      [[617.0, 618.0, 619.0, 620.0],       [621.0, 622.0, 623.0, 624.0]]]],    [[[[625.0, 626.0, 627.0, 628.0],       [629.0, 630.0, 631.0, 632.0]],      [[633.0, 634.0, 635.0, 636.0],       [637.0, 638.0, 639.0, 640.0]]],     [[[641.0, 642.0, 643.0, 644.0],       [645.0, 646.0, 647.0, 648.0]],      [[649.0, 650.0, 651.0, 652.0],       [653.0, 654.0, 655.0, 656.0]]],     [[[657.0, 658.0, 659.0, 660.0],       [661.0, 662.0, 663.0, 664.0]],      [[665.0, 666.0, 667.0, 668.0],       [669.0, 670.0, 671.0, 672.0]]]],    [[[[673.0, 674.0, 675.0, 676.0],       [677.0, 678.0, 679.0, 680.0]],      [[681.0, 682.0, 683.0, 684.0],       [685.0, 686.0, 687.0, 688.0]]],     [[[689.0, 690.0, 691.0, 692.0],       [693.0, 694.0, 695.0, 696.0]],      [[697.0, 698.0, 699.0, 700.0],       [701.0, 702.0, 703.0, 704.0]]],     [[[705.0, 706.0, 707.0, 708.0],       [709.0, 710.0, 711.0, 712.0]],      [[713.0, 714.0, 715.0, 716.0],       [717.0, 718.0, 719.0, 720.0]]]]],   [[[[[721.0, 722.0, 723.0, 724.0],       [725.0, 726.0, 727.0, 728.0]],      [[729.0, 730.0, 731.0, 732.0],       [733.0, 734.0, 735.0, 736.0]]],     [[[737.0, 738.0, 739.0, 740.0],       [741.0, 742.0, 743.0, 744.0]],      [[745.0, 746.0, 747.0, 748.0],       [749.0, 750.0, 751.0, 752.0]]],     [[[753.0, 754.0, 755.0, 756.0],       [757.0, 758.0, 759.0, 760.0]],      [[761.0, 762.0, 763.0, 764.0],       [765.0, 766.0, 767.0, 768.0]]]],    [[[[769.0, 770.0, 771.0, 772.0],       [773.0, 774.0, 775.0, 776.0]],      [[777.0, 778.0, 779.0, 780.0],       [781.0, 782.0, 783.0, 784.0]]],     [[[785.0, 786.0, 787.0, 788.0],       [789.0, 790.0, 791.0, 792.0]],      [[793.0, 794.0, 795.0, 796.0],       [797.0, 798.0, 799.0, 800.0]]],     [[[801.0, 802.0, 803.0, 804.0],       [805.0, 806.0, 807.0, 808.0]],      [[809.0, 810.0, 811.0, 812.0],       [813.0, 814.0, 815.0, 816.0]]]],    [[[[817.0, 818.0, 819.0, 820.0],       [821.0, 822.0, 823.0, 824.0]],      [[825.0, 826.0, 827.0, 828.0],       [829.0, 830.0, 831.0, 832.0]]],     [[[833.0, 834.0, 835.0, 836.0],       [837.0, 838.0, 839.0, 840.0]],      [[841.0, 842.0, 843.0, 844.0],       [845.0, 846.0, 847.0, 848.0]]],     [[[849.0, 850.0, 851.0, 852.0],       [853.0, 854.0, 855.0, 856.0]],      [[857.0, 858.0, 859.0, 860.0],       [861.0, 862.0, 863.0, 864.0]]]],    [[[[865.0, 866.0, 867.0, 868.0],       [869.0, 870.0, 871.0, 872.0]],      [[873.0, 874.0, 875.0, 876.0],       [877.0, 878.0, 879.0, 880.0]]],     [[[881.0, 882.0, 883.0, 884.0],       [885.0, 886.0, 887.0, 888.0]],      [[889.0, 890.0, 891.0, 892.0],       [893.0, 894.0, 895.0, 896.0]]],     [[[897.0, 898.0, 899.0, 900.0],       [901.0, 902.0, 903.0, 904.0]],      [[905.0, 906.0, 907.0, 908.0],       [909.0, 910.0, 911.0, 912.0]]]],    [[[[913.0, 914.0, 915.0, 916.0],       [917.0, 918.0, 919.0, 920.0]],      [[921.0, 922.0, 923.0, 924.0],       [925.0, 926.0, 927.0, 928.0]]],     [[[929.0, 930.0, 931.0, 932.0],       [933.0, 934.0, 935.0, 936.0]],      [[937.0, 938.0, 939.0, 940.0],       [941.0, 942.0, 943.0, 944.0]]],     [[[945.0, 946.0, 947.0, 948.0],       [949.0, 950.0, 951.0, 952.0]],      [[953.0, 954.0, 955.0, 956.0],       [957.0, 958.0, 959.0, 960.0]]]]],   [[[[[961.0, 962.0, 963.0, 964.0],       [965.0, 966.0, 967.0, 968.0]],      [[969.0, 970.0, 971.0, 972.0],       [973.0, 974.0, 975.0, 976.0]]],     [[[977.0, 978.0, 979.0, 980.0],       [981.0, 982.0, 983.0, 984.0]],      [[985.0, 986.0, 987.0, 988.0],       [989.0, 990.0, 991.0, 992.0]]],     [[[993.0, 994.0, 995.0, 996.0],       [997.0, 998.0, 999.0, 1000.0]],      [[1001.0, 1002.0, 1003.0, 1004.0],       [1005.0, 1006.0, 1007.0, 1008.0]]]],    [[[[1009.0, 1010.0, 1011.0, 1012.0],       [1013.0, 1014.0, 1015.0, 1016.0]],      [[1017.0, 1018.0, 1019.0, 1020.0],       [1021.0, 1022.0, 1023.0, 1024.0]]],     [[[1025.0, 1026.0, 1027.0, 1028.0],       [1029.0, 1030.0, 1031.0, 1032.0]],      [[1033.0, 1034.0, 1035.0, 1036.0],       [1037.0, 1038.0, 1039.0, 1040.0]]],     [[[1041.0, 1042.0, 1043.0, 1044.0],       [1045.0, 1046.0, 1047.0, 1048.0]],      [[1049.0, 1050.0, 1051.0, 1052.0],       [1053.0, 1054.0, 1055.0, 1056.0]]]],    [[[[1057.0, 1058.0, 1059.0, 1060.0],       [1061.0, 1062.0, 1063.0, 1064.0]],      [[1065.0, 1066.0, 1067.0, 1068.0],       [1069.0, 1070.0, 1071.0, 1072.0]]],     [[[1073.0, 1074.0, 1075.0, 1076.0],       [1077.0, 1078.0, 1079.0, 1080.0]],      [[1081.0, 1082.0, 1083.0, 1084.0],       [1085.0, 1086.0, 1087.0, 1088.0]]],     [[[1089.0, 1090.0, 1091.0, 1092.0],       [1093.0, 1094.0, 1095.0, 1096.0]],      [[1097.0, 1098.0, 1099.0, 1100.0],       [1101.0, 1102.0, 1103.0, 1104.0]]]],    [[[[1105.0, 1106.0, 1107.0, 1108.0],       [1109.0, 1110.0, 1111.0, 1112.0]],      [[1113.0, 1114.0, 1115.0, 1116.0],       [1117.0, 1118.0, 1119.0, 1120.0]]],     [[[1121.0, 1122.0, 1123.0, 1124.0],       [1125.0, 1126.0, 1127.0, 1128.0]],      [[1129.0, 1130.0, 1131.0, 1132.0],       [1133.0, 1134.0, 1135.0, 1136.0]]],     [[[1137.0, 1138.0, 1139.0, 1140.0],       [1141.0, 1142.0, 1143.0, 1144.0]],      [[1145.0, 1146.0, 1147.0, 1148.0],       [1149.0, 1150.0, 1151.0, 1152.0]]]],    [[[[1153.0, 1154.0, 1155.0, 1156.0],       [1157.0, 1158.0, 1159.0, 1160.0]],      [[1161.0, 1162.0, 1163.0, 1164.0],       [1165.0, 1166.0, 1167.0, 1168.0]]],     [[[1169.0, 1170.0, 1171.0, 1172.0],       [1173.0, 1174.0, 1175.0, 1176.0]],      [[1177.0, 1178.0, 1179.0, 1180.0],       [1181.0, 1182.0, 1183.0, 1184.0]]],     [[[1185.0, 1186.0, 1187.0, 1188.0],       [1189.0, 1190.0, 1191.0, 1192.0]],      [[1193.0, 1194.0, 1195.0, 1196.0],       [1197.0, 1198.0, 1199.0, 1200.0]]]]],   [[[[[1201.0, 1202.0, 1203.0, 1204.0],       [1205.0, 1206.0, 1207.0, 1208.0]],      [[1209.0, 1210.0, 1211.0, 1212.0],       [1213.0, 1214.0, 1215.0, 1216.0]]],     [[[1217.0, 1218.0, 1219.0, 1220.0],       [1221.0, 1222.0, 1223.0, 1224.0]],      [[1225.0, 1226.0, 1227.0, 1228.0],       [1229.0, 1230.0, 1231.0, 1232.0]]],     [[[1233.0, 1234.0, 1235.0, 1236.0],       [1237.0, 1238.0, 1239.0, 1240.0]],      [[1241.0, 1242.0, 1243.0, 1244.0],       [1245.0, 1246.0, 1247.0, 1248.0]]]],    [[[[1249.0, 1250.0, 1251.0, 1252.0],       [1253.0, 1254.0, 1255.0, 1256.0]],      [[1257.0, 1258.0, 1259.0, 1260.0],       [1261.0, 1262.0, 1263.0, 1264.0]]],     [[[1265.0, 1266.0, 1267.0, 1268.0],       [1269.0, 1270.0, 1271.0, 1272.0]],      [[1273.0, 1274.0, 1275.0, 1276.0],       [1277.0, 1278.0, 1279.0, 1280.0]]],     [[[1281.0, 1282.0, 1283.0, 1284.0],       [1285.0, 1286.0, 1287.0, 1288.0]],      [[1289.0, 1290.0, 1291.0, 1292.0],       [1293.0, 1294.0, 1295.0, 1296.0]]]],    [[[[1297.0, 1298.0, 1299.0, 1300.0],       [1301.0, 1302.0, 1303.0, 1304.0]],      [[1305.0, 1306.0, 1307.0, 1308.0],       [1309.0, 1310.0, 1311.0, 1312.0]]],     [[[1313.0, 1314.0, 1315.0, 1316.0],       [1317.0, 1318.0, 1319.0, 1320.0]],      [[1321.0, 1322.0, 1323.0, 1324.0],       [1325.0, 1326.0, 1327.0, 1328.0]]],     [[[1329.0, 1330.0, 1331.0, 1332.0],       [1333.0, 1334.0, 1335.0, 1336.0]],      [[1337.0, 1338.0, 1339.0, 1340.0],       [1341.0, 1342.0, 1343.0, 1344.0]]]],    [[[[1345.0, 1346.0, 1347.0, 1348.0],       [1349.0, 1350.0, 1351.0, 1352.0]],      [[1353.0, 1354.0, 1355.0, 1356.0],       [1357.0, 1358.0, 1359.0, 1360.0]]],     [[[1361.0, 1362.0, 1363.0, 1364.0],       [1365.0, 1366.0, 1367.0, 1368.0]],      [[1369.0, 1370.0, 1371.0, 1372.0],       [1373.0, 1374.0, 1375.0, 1376.0]]],     [[[1377.0, 1378.0, 1379.0, 1380.0],       [1381.0, 1382.0, 1383.0, 1384.0]],      [[1385.0, 1386.0, 1387.0, 1388.0],       [1389.0, 1390.0, 1391.0, 1392.0]]]],    [[[[1393.0, 1394.0, 1395.0, 1396.0],       [1397.0, 1398.0, 1399.0, 1400.0]],      [[1401.0, 1402.0, 1403.0, 1404.0],       [1405.0, 1406.0, 1407.0, 1408.0]]],     [[[1409.0, 1410.0, 1411.0, 1412.0],       [1413.0, 1414.0, 1415.0, 1416.0]],      [[1417.0, 1418.0, 1419.0, 1420.0],       [1421.0, 1422.0, 1423.0, 1424.0]]],     [[[1425.0, 1426.0, 1427.0, 1428.0],       [1429.0, 1430.0, 1431.0, 1432.0]],      [[1433.0, 1434.0, 1435.0, 1436.0],       [1437.0, 1438.0, 1439.0, 1440.0]]]]]]] shape=[1, 6, 5, 3, 2, 2, 4], strides=[1440, 240, 48, 16, 8, 4, 1], layout=C (0x1)), I32([3, 2, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[1, 2],  [0, 1],  [2, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2674844818 2514457199 2511386163 1149925133 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],      [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],      [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],      [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],      [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],      [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],     [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],      [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],      [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],      [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],      [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],      [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],     [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],      [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],      [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],      [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],      [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],      [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0]]],    [[[127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],      [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0],      [141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],      [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],      [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],      [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],     [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],      [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],      [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],      [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],      [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],      [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],     [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],      [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],      [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],      [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],      [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],      [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]]],    [[[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],      [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],      [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],      [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0],      [281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],      [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0]],     [[295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],      [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],      [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],      [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],      [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],      [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],     [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],      [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],      [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],      [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],      [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],      [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0]]]],   [[[[379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],      [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],      [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],      [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],      [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],      [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],      [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],      [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],      [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],      [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],      [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],     [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],      [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],      [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],      [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],      [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],      [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]]],    [[[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],      [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],      [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],      [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],      [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],      [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],     [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],      [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0],      [561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],      [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],      [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],      [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]],     [[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],      [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],      [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],      [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],      [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],      [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]]],    [[[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],      [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0],      [645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],      [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],      [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],      [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],     [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],      [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],      [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],      [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0],      [701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],      [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0]],     [[715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],      [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0],      [729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],      [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],      [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],      [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]]]],   [[[[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],      [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0],      [771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],      [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0],      [785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],      [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0]],     [[799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],      [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0],      [813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],      [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],      [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],      [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],     [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],      [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],      [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],      [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0],      [869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],      [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0]]],    [[[883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],      [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0],      [897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],      [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0],      [911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],      [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],     [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],      [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],      [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],      [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0],      [953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],      [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0]],     [[967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],      [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0],      [981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],      [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],      [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],      [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]]],    [[[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],      [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],      [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],      [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0],      [1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],      [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]],     [[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],      [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0],      [1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],      [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],      [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],      [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],     [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],      [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],      [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],      [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0],      [1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],      [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0]]]],   [[[[1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],      [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0],      [1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],      [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],      [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],      [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]],     [[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],      [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0],      [1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],      [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0],      [1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],      [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0]],     [[1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],      [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0],      [1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],      [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],      [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],      [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]],    [[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],      [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],      [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],      [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0],      [1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0],      [1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0]],     [[1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],      [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0],      [1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],      [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0],      [1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],      [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]],     [[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],      [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],      [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0],      [1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0],      [1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],      [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0]]],    [[[1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],      [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0],      [1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],      [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],      [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],      [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]],     [[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0],      [1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],      [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],      [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0],      [1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],      [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]],     [[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],      [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0],      [1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],      [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],      [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0],      [1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]]]],   [[[[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],      [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],      [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],      [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0],      [1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],      [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0]],     [[1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],      [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0],      [1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0],      [1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],      [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],      [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]],     [[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],      [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0],      [1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],      [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0],      [1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],      [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0]]],    [[[1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0],      [1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0],      [1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],      [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],      [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],      [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]],     [[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],      [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],      [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],      [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0],      [1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0],      [1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0]],     [[1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],      [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0],      [1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],      [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0],      [1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],      [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]]],    [[[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],      [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],      [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0],      [1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0],      [1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],      [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0]],     [[1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],      [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0],      [1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],      [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],      [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],      [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]],     [[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0],      [1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],      [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],      [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0],      [1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],      [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]]]]],  [[[[[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],      [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0],      [1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],      [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],      [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0],      [1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]],     [[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],      [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],      [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],      [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0],      [1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0],      [1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0]],     [[1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0, 1981.0],      [1982.0, 1983.0, 1984.0, 1985.0, 1986.0, 1987.0, 1988.0],      [1989.0, 1990.0, 1991.0, 1992.0, 1993.0, 1994.0, 1995.0],      [1996.0, 1997.0, 1998.0, 1999.0, 2000.0, 2001.0, 2002.0],      [2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0],      [2010.0, 2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]]],    [[[2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0, 2023.0],      [2024.0, 2025.0, 2026.0, 2027.0, 2028.0, 2029.0, 2030.0],      [2031.0, 2032.0, 2033.0, 2034.0, 2035.0, 2036.0, 2037.0],      [2038.0, 2039.0, 2040.0, 2041.0, 2042.0, 2043.0, 2044.0],      [2045.0, 2046.0, 2047.0, 2048.0, 2049.0, 2050.0, 2051.0],      [2052.0, 2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0]],     [[2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0, 2065.0],      [2066.0, 2067.0, 2068.0, 2069.0, 2070.0, 2071.0, 2072.0],      [2073.0, 2074.0, 2075.0, 2076.0, 2077.0, 2078.0, 2079.0],      [2080.0, 2081.0, 2082.0, 2083.0, 2084.0, 2085.0, 2086.0],      [2087.0, 2088.0, 2089.0, 2090.0, 2091.0, 2092.0, 2093.0],      [2094.0, 2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]],     [[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0, 2107.0],      [2108.0, 2109.0, 2110.0, 2111.0, 2112.0, 2113.0, 2114.0],      [2115.0, 2116.0, 2117.0, 2118.0, 2119.0, 2120.0, 2121.0],      [2122.0, 2123.0, 2124.0, 2125.0, 2126.0, 2127.0, 2128.0],      [2129.0, 2130.0, 2131.0, 2132.0, 2133.0, 2134.0, 2135.0],      [2136.0, 2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0]]],    [[[2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0, 2149.0],      [2150.0, 2151.0, 2152.0, 2153.0, 2154.0, 2155.0, 2156.0],      [2157.0, 2158.0, 2159.0, 2160.0, 2161.0, 2162.0, 2163.0],      [2164.0, 2165.0, 2166.0, 2167.0, 2168.0, 2169.0, 2170.0],      [2171.0, 2172.0, 2173.0, 2174.0, 2175.0, 2176.0, 2177.0],      [2178.0, 2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0]],     [[2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0, 2191.0],      [2192.0, 2193.0, 2194.0, 2195.0, 2196.0, 2197.0, 2198.0],      [2199.0, 2200.0, 2201.0, 2202.0, 2203.0, 2204.0, 2205.0],      [2206.0, 2207.0, 2208.0, 2209.0, 2210.0, 2211.0, 2212.0],      [2213.0, 2214.0, 2215.0, 2216.0, 2217.0, 2218.0, 2219.0],      [2220.0, 2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0]],     [[2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0, 2233.0],      [2234.0, 2235.0, 2236.0, 2237.0, 2238.0, 2239.0, 2240.0],      [2241.0, 2242.0, 2243.0, 2244.0, 2245.0, 2246.0, 2247.0],      [2248.0, 2249.0, 2250.0, 2251.0, 2252.0, 2253.0, 2254.0],      [2255.0, 2256.0, 2257.0, 2258.0, 2259.0, 2260.0, 2261.0],      [2262.0, 2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0]]]],   [[[[2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0, 2275.0],      [2276.0, 2277.0, 2278.0, 2279.0, 2280.0, 2281.0, 2282.0],      [2283.0, 2284.0, 2285.0, 2286.0, 2287.0, 2288.0, 2289.0],      [2290.0, 2291.0, 2292.0, 2293.0, 2294.0, 2295.0, 2296.0],      [2297.0, 2298.0, 2299.0, 2300.0, 2301.0, 2302.0, 2303.0],      [2304.0, 2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]],     [[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0, 2317.0],      [2318.0, 2319.0, 2320.0, 2321.0, 2322.0, 2323.0, 2324.0],      [2325.0, 2326.0, 2327.0, 2328.0, 2329.0, 2330.0, 2331.0],      [2332.0, 2333.0, 2334.0, 2335.0, 2336.0, 2337.0, 2338.0],      [2339.0, 2340.0, 2341.0, 2342.0, 2343.0, 2344.0, 2345.0],      [2346.0, 2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0]],     [[2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0, 2359.0],      [2360.0, 2361.0, 2362.0, 2363.0, 2364.0, 2365.0, 2366.0],      [2367.0, 2368.0, 2369.0, 2370.0, 2371.0, 2372.0, 2373.0],      [2374.0, 2375.0, 2376.0, 2377.0, 2378.0, 2379.0, 2380.0],      [2381.0, 2382.0, 2383.0, 2384.0, 2385.0, 2386.0, 2387.0],      [2388.0, 2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0]]],    [[[2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0, 2401.0],      [2402.0, 2403.0, 2404.0, 2405.0, 2406.0, 2407.0, 2408.0],      [2409.0, 2410.0, 2411.0, 2412.0, 2413.0, 2414.0, 2415.0],      [2416.0, 2417.0, 2418.0, 2419.0, 2420.0, 2421.0, 2422.0],      [2423.0, 2424.0, 2425.0, 2426.0, 2427.0, 2428.0, 2429.0],      [2430.0, 2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0]],     [[2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0, 2443.0],      [2444.0, 2445.0, 2446.0, 2447.0, 2448.0, 2449.0, 2450.0],      [2451.0, 2452.0, 2453.0, 2454.0, 2455.0, 2456.0, 2457.0],      [2458.0, 2459.0, 2460.0, 2461.0, 2462.0, 2463.0, 2464.0],      [2465.0, 2466.0, 2467.0, 2468.0, 2469.0, 2470.0, 2471.0],      [2472.0, 2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0]],     [[2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0, 2485.0],      [2486.0, 2487.0, 2488.0, 2489.0, 2490.0, 2491.0, 2492.0],      [2493.0, 2494.0, 2495.0, 2496.0, 2497.0, 2498.0, 2499.0],      [2500.0, 2501.0, 2502.0, 2503.0, 2504.0, 2505.0, 2506.0],      [2507.0, 2508.0, 2509.0, 2510.0, 2511.0, 2512.0, 2513.0],      [2514.0, 2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]]],    [[[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0, 2527.0],      [2528.0, 2529.0, 2530.0, 2531.0, 2532.0, 2533.0, 2534.0],      [2535.0, 2536.0, 2537.0, 2538.0, 2539.0, 2540.0, 2541.0],      [2542.0, 2543.0, 2544.0, 2545.0, 2546.0, 2547.0, 2548.0],      [2549.0, 2550.0, 2551.0, 2552.0, 2553.0, 2554.0, 2555.0],      [2556.0, 2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0]],     [[2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0, 2569.0],      [2570.0, 2571.0, 2572.0, 2573.0, 2574.0, 2575.0, 2576.0],      [2577.0, 2578.0, 2579.0, 2580.0, 2581.0, 2582.0, 2583.0],      [2584.0, 2585.0, 2586.0, 2587.0, 2588.0, 2589.0, 2590.0],      [2591.0, 2592.0, 2593.0, 2594.0, 2595.0, 2596.0, 2597.0],      [2598.0, 2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0]],     [[2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0, 2611.0],      [2612.0, 2613.0, 2614.0, 2615.0, 2616.0, 2617.0, 2618.0],      [2619.0, 2620.0, 2621.0, 2622.0, 2623.0, 2624.0, 2625.0],      [2626.0, 2627.0, 2628.0, 2629.0, 2630.0, 2631.0, 2632.0],      [2633.0, 2634.0, 2635.0, 2636.0, 2637.0, 2638.0, 2639.0],      [2640.0, 2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0]]]],   [[[[2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0, 2653.0],      [2654.0, 2655.0, 2656.0, 2657.0, 2658.0, 2659.0, 2660.0],      [2661.0, 2662.0, 2663.0, 2664.0, 2665.0, 2666.0, 2667.0],      [2668.0, 2669.0, 2670.0, 2671.0, 2672.0, 2673.0, 2674.0],      [2675.0, 2676.0, 2677.0, 2678.0, 2679.0, 2680.0, 2681.0],      [2682.0, 2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0]],     [[2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0, 2695.0],      [2696.0, 2697.0, 2698.0, 2699.0, 2700.0, 2701.0, 2702.0],      [2703.0, 2704.0, 2705.0, 2706.0, 2707.0, 2708.0, 2709.0],      [2710.0, 2711.0, 2712.0, 2713.0, 2714.0, 2715.0, 2716.0],      [2717.0, 2718.0, 2719.0, 2720.0, 2721.0, 2722.0, 2723.0],      [2724.0, 2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]],     [[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0, 2737.0],      [2738.0, 2739.0, 2740.0, 2741.0, 2742.0, 2743.0, 2744.0],      [2745.0, 2746.0, 2747.0, 2748.0, 2749.0, 2750.0, 2751.0],      [2752.0, 2753.0, 2754.0, 2755.0, 2756.0, 2757.0, 2758.0],      [2759.0, 2760.0, 2761.0, 2762.0, 2763.0, 2764.0, 2765.0],      [2766.0, 2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0]]],    [[[2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0, 2779.0],      [2780.0, 2781.0, 2782.0, 2783.0, 2784.0, 2785.0, 2786.0],      [2787.0, 2788.0, 2789.0, 2790.0, 2791.0, 2792.0, 2793.0],      [2794.0, 2795.0, 2796.0, 2797.0, 2798.0, 2799.0, 2800.0],      [2801.0, 2802.0, 2803.0, 2804.0, 2805.0, 2806.0, 2807.0],      [2808.0, 2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0]],     [[2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0, 2821.0],      [2822.0, 2823.0, 2824.0, 2825.0, 2826.0, 2827.0, 2828.0],      [2829.0, 2830.0, 2831.0, 2832.0, 2833.0, 2834.0, 2835.0],      [2836.0, 2837.0, 2838.0, 2839.0, 2840.0, 2841.0, 2842.0],      [2843.0, 2844.0, 2845.0, 2846.0, 2847.0, 2848.0, 2849.0],      [2850.0, 2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0]],     [[2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0, 2863.0],      [2864.0, 2865.0, 2866.0, 2867.0, 2868.0, 2869.0, 2870.0],      [2871.0, 2872.0, 2873.0, 2874.0, 2875.0, 2876.0, 2877.0],      [2878.0, 2879.0, 2880.0, 2881.0, 2882.0, 2883.0, 2884.0],      [2885.0, 2886.0, 2887.0, 2888.0, 2889.0, 2890.0, 2891.0],      [2892.0, 2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0]]],    [[[2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0, 2905.0],      [2906.0, 2907.0, 2908.0, 2909.0, 2910.0, 2911.0, 2912.0],      [2913.0, 2914.0, 2915.0, 2916.0, 2917.0, 2918.0, 2919.0],      [2920.0, 2921.0, 2922.0, 2923.0, 2924.0, 2925.0, 2926.0],      [2927.0, 2928.0, 2929.0, 2930.0, 2931.0, 2932.0, 2933.0],      [2934.0, 2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]],     [[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0, 2947.0],      [2948.0, 2949.0, 2950.0, 2951.0, 2952.0, 2953.0, 2954.0],      [2955.0, 2956.0, 2957.0, 2958.0, 2959.0, 2960.0, 2961.0],      [2962.0, 2963.0, 2964.0, 2965.0, 2966.0, 2967.0, 2968.0],      [2969.0, 2970.0, 2971.0, 2972.0, 2973.0, 2974.0, 2975.0],      [2976.0, 2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0]],     [[2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0, 2989.0],      [2990.0, 2991.0, 2992.0, 2993.0, 2994.0, 2995.0, 2996.0],      [2997.0, 2998.0, 2999.0, 3000.0, 3001.0, 3002.0, 3003.0],      [3004.0, 3005.0, 3006.0, 3007.0, 3008.0, 3009.0, 3010.0],      [3011.0, 3012.0, 3013.0, 3014.0, 3015.0, 3016.0, 3017.0],      [3018.0, 3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0]]]],   [[[[3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0, 3031.0],      [3032.0, 3033.0, 3034.0, 3035.0, 3036.0, 3037.0, 3038.0],      [3039.0, 3040.0, 3041.0, 3042.0, 3043.0, 3044.0, 3045.0],      [3046.0, 3047.0, 3048.0, 3049.0, 3050.0, 3051.0, 3052.0],      [3053.0, 3054.0, 3055.0, 3056.0, 3057.0, 3058.0, 3059.0],      [3060.0, 3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0]],     [[3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0, 3073.0],      [3074.0, 3075.0, 3076.0, 3077.0, 3078.0, 3079.0, 3080.0],      [3081.0, 3082.0, 3083.0, 3084.0, 3085.0, 3086.0, 3087.0],      [3088.0, 3089.0, 3090.0, 3091.0, 3092.0, 3093.0, 3094.0],      [3095.0, 3096.0, 3097.0, 3098.0, 3099.0, 3100.0, 3101.0],      [3102.0, 3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0]],     [[3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0, 3115.0],      [3116.0, 3117.0, 3118.0, 3119.0, 3120.0, 3121.0, 3122.0],      [3123.0, 3124.0, 3125.0, 3126.0, 3127.0, 3128.0, 3129.0],      [3130.0, 3131.0, 3132.0, 3133.0, 3134.0, 3135.0, 3136.0],      [3137.0, 3138.0, 3139.0, 3140.0, 3141.0, 3142.0, 3143.0],      [3144.0, 3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]]],    [[[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0, 3157.0],      [3158.0, 3159.0, 3160.0, 3161.0, 3162.0, 3163.0, 3164.0],      [3165.0, 3166.0, 3167.0, 3168.0, 3169.0, 3170.0, 3171.0],      [3172.0, 3173.0, 3174.0, 3175.0, 3176.0, 3177.0, 3178.0],      [3179.0, 3180.0, 3181.0, 3182.0, 3183.0, 3184.0, 3185.0],      [3186.0, 3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0]],     [[3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0, 3199.0],      [3200.0, 3201.0, 3202.0, 3203.0, 3204.0, 3205.0, 3206.0],      [3207.0, 3208.0, 3209.0, 3210.0, 3211.0, 3212.0, 3213.0],      [3214.0, 3215.0, 3216.0, 3217.0, 3218.0, 3219.0, 3220.0],      [3221.0, 3222.0, 3223.0, 3224.0, 3225.0, 3226.0, 3227.0],      [3228.0, 3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0]],     [[3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0, 3241.0],      [3242.0, 3243.0, 3244.0, 3245.0, 3246.0, 3247.0, 3248.0],      [3249.0, 3250.0, 3251.0, 3252.0, 3253.0, 3254.0, 3255.0],      [3256.0, 3257.0, 3258.0, 3259.0, 3260.0, 3261.0, 3262.0],      [3263.0, 3264.0, 3265.0, 3266.0, 3267.0, 3268.0, 3269.0],      [3270.0, 3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0]]],    [[[3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0, 3283.0],      [3284.0, 3285.0, 3286.0, 3287.0, 3288.0, 3289.0, 3290.0],      [3291.0, 3292.0, 3293.0, 3294.0, 3295.0, 3296.0, 3297.0],      [3298.0, 3299.0, 3300.0, 3301.0, 3302.0, 3303.0, 3304.0],      [3305.0, 3306.0, 3307.0, 3308.0, 3309.0, 3310.0, 3311.0],      [3312.0, 3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0]],     [[3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0, 3325.0],      [3326.0, 3327.0, 3328.0, 3329.0, 3330.0, 3331.0, 3332.0],      [3333.0, 3334.0, 3335.0, 3336.0, 3337.0, 3338.0, 3339.0],      [3340.0, 3341.0, 3342.0, 3343.0, 3344.0, 3345.0, 3346.0],      [3347.0, 3348.0, 3349.0, 3350.0, 3351.0, 3352.0, 3353.0],      [3354.0, 3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]],     [[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0, 3367.0],      [3368.0, 3369.0, 3370.0, 3371.0, 3372.0, 3373.0, 3374.0],      [3375.0, 3376.0, 3377.0, 3378.0, 3379.0, 3380.0, 3381.0],      [3382.0, 3383.0, 3384.0, 3385.0, 3386.0, 3387.0, 3388.0],      [3389.0, 3390.0, 3391.0, 3392.0, 3393.0, 3394.0, 3395.0],      [3396.0, 3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0]]]],   [[[[3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0, 3409.0],      [3410.0, 3411.0, 3412.0, 3413.0, 3414.0, 3415.0, 3416.0],      [3417.0, 3418.0, 3419.0, 3420.0, 3421.0, 3422.0, 3423.0],      [3424.0, 3425.0, 3426.0, 3427.0, 3428.0, 3429.0, 3430.0],      [3431.0, 3432.0, 3433.0, 3434.0, 3435.0, 3436.0, 3437.0],      [3438.0, 3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0]],     [[3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0, 3451.0],      [3452.0, 3453.0, 3454.0, 3455.0, 3456.0, 3457.0, 3458.0],      [3459.0, 3460.0, 3461.0, 3462.0, 3463.0, 3464.0, 3465.0],      [3466.0, 3467.0, 3468.0, 3469.0, 3470.0, 3471.0, 3472.0],      [3473.0, 3474.0, 3475.0, 3476.0, 3477.0, 3478.0, 3479.0],      [3480.0, 3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0]],     [[3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0, 3493.0],      [3494.0, 3495.0, 3496.0, 3497.0, 3498.0, 3499.0, 3500.0],      [3501.0, 3502.0, 3503.0, 3504.0, 3505.0, 3506.0, 3507.0],      [3508.0, 3509.0, 3510.0, 3511.0, 3512.0, 3513.0, 3514.0],      [3515.0, 3516.0, 3517.0, 3518.0, 3519.0, 3520.0, 3521.0],      [3522.0, 3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0]]],    [[[3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0, 3535.0],      [3536.0, 3537.0, 3538.0, 3539.0, 3540.0, 3541.0, 3542.0],      [3543.0, 3544.0, 3545.0, 3546.0, 3547.0, 3548.0, 3549.0],      [3550.0, 3551.0, 3552.0, 3553.0, 3554.0, 3555.0, 3556.0],      [3557.0, 3558.0, 3559.0, 3560.0, 3561.0, 3562.0, 3563.0],      [3564.0, 3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]],     [[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0, 3577.0],      [3578.0, 3579.0, 3580.0, 3581.0, 3582.0, 3583.0, 3584.0],      [3585.0, 3586.0, 3587.0, 3588.0, 3589.0, 3590.0, 3591.0],      [3592.0, 3593.0, 3594.0, 3595.0, 3596.0, 3597.0, 3598.0],      [3599.0, 3600.0, 3601.0, 3602.0, 3603.0, 3604.0, 3605.0],      [3606.0, 3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0]],     [[3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0, 3619.0],      [3620.0, 3621.0, 3622.0, 3623.0, 3624.0, 3625.0, 3626.0],      [3627.0, 3628.0, 3629.0, 3630.0, 3631.0, 3632.0, 3633.0],      [3634.0, 3635.0, 3636.0, 3637.0, 3638.0, 3639.0, 3640.0],      [3641.0, 3642.0, 3643.0, 3644.0, 3645.0, 3646.0, 3647.0],      [3648.0, 3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0]]],    [[[3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0, 3661.0],      [3662.0, 3663.0, 3664.0, 3665.0, 3666.0, 3667.0, 3668.0],      [3669.0, 3670.0, 3671.0, 3672.0, 3673.0, 3674.0, 3675.0],      [3676.0, 3677.0, 3678.0, 3679.0, 3680.0, 3681.0, 3682.0],      [3683.0, 3684.0, 3685.0, 3686.0, 3687.0, 3688.0, 3689.0],      [3690.0, 3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0]],     [[3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0, 3703.0],      [3704.0, 3705.0, 3706.0, 3707.0, 3708.0, 3709.0, 3710.0],      [3711.0, 3712.0, 3713.0, 3714.0, 3715.0, 3716.0, 3717.0],      [3718.0, 3719.0, 3720.0, 3721.0, 3722.0, 3723.0, 3724.0],      [3725.0, 3726.0, 3727.0, 3728.0, 3729.0, 3730.0, 3731.0],      [3732.0, 3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0]],     [[3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0, 3745.0],      [3746.0, 3747.0, 3748.0, 3749.0, 3750.0, 3751.0, 3752.0],      [3753.0, 3754.0, 3755.0, 3756.0, 3757.0, 3758.0, 3759.0],      [3760.0, 3761.0, 3762.0, 3763.0, 3764.0, 3765.0, 3766.0],      [3767.0, 3768.0, 3769.0, 3770.0, 3771.0, 3772.0, 3773.0],      [3774.0, 3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]]]]],  [[[[[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0, 3787.0],      [3788.0, 3789.0, 3790.0, 3791.0, 3792.0, 3793.0, 3794.0],      [3795.0, 3796.0, 3797.0, 3798.0, 3799.0, 3800.0, 3801.0],      [3802.0, 3803.0, 3804.0, 3805.0, 3806.0, 3807.0, 3808.0],      [3809.0, 3810.0, 3811.0, 3812.0, 3813.0, 3814.0, 3815.0],      [3816.0, 3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0]],     [[3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0, 3829.0],      [3830.0, 3831.0, 3832.0, 3833.0, 3834.0, 3835.0, 3836.0],      [3837.0, 3838.0, 3839.0, 3840.0, 3841.0, 3842.0, 3843.0],      [3844.0, 3845.0, 3846.0, 3847.0, 3848.0, 3849.0, 3850.0],      [3851.0, 3852.0, 3853.0, 3854.0, 3855.0, 3856.0, 3857.0],      [3858.0, 3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0]],     [[3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0, 3871.0],      [3872.0, 3873.0, 3874.0, 3875.0, 3876.0, 3877.0, 3878.0],      [3879.0, 3880.0, 3881.0, 3882.0, 3883.0, 3884.0, 3885.0],      [3886.0, 3887.0, 3888.0, 3889.0, 3890.0, 3891.0, 3892.0],      [3893.0, 3894.0, 3895.0, 3896.0, 3897.0, 3898.0, 3899.0],      [3900.0, 3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0]]],    [[[3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0, 3913.0],      [3914.0, 3915.0, 3916.0, 3917.0, 3918.0, 3919.0, 3920.0],      [3921.0, 3922.0, 3923.0, 3924.0, 3925.0, 3926.0, 3927.0],      [3928.0, 3929.0, 3930.0, 3931.0, 3932.0, 3933.0, 3934.0],      [3935.0, 3936.0, 3937.0, 3938.0, 3939.0, 3940.0, 3941.0],      [3942.0, 3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0]],     [[3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0, 3955.0],      [3956.0, 3957.0, 3958.0, 3959.0, 3960.0, 3961.0, 3962.0],      [3963.0, 3964.0, 3965.0, 3966.0, 3967.0, 3968.0, 3969.0],      [3970.0, 3971.0, 3972.0, 3973.0, 3974.0, 3975.0, 3976.0],      [3977.0, 3978.0, 3979.0, 3980.0, 3981.0, 3982.0, 3983.0],      [3984.0, 3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]],     [[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0, 3997.0],      [3998.0, 3999.0, 4000.0, 4001.0, 4002.0, 4003.0, 4004.0],      [4005.0, 4006.0, 4007.0, 4008.0, 4009.0, 4010.0, 4011.0],      [4012.0, 4013.0, 4014.0, 4015.0, 4016.0, 4017.0, 4018.0],      [4019.0, 4020.0, 4021.0, 4022.0, 4023.0, 4024.0, 4025.0],      [4026.0, 4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0]]],    [[[4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0, 4039.0],      [4040.0, 4041.0, 4042.0, 4043.0, 4044.0, 4045.0, 4046.0],      [4047.0, 4048.0, 4049.0, 4050.0, 4051.0, 4052.0, 4053.0],      [4054.0, 4055.0, 4056.0, 4057.0, 4058.0, 4059.0, 4060.0],      [4061.0, 4062.0, 4063.0, 4064.0, 4065.0, 4066.0, 4067.0],      [4068.0, 4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0]],     [[4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0, 4081.0],      [4082.0, 4083.0, 4084.0, 4085.0, 4086.0, 4087.0, 4088.0],      [4089.0, 4090.0, 4091.0, 4092.0, 4093.0, 4094.0, 4095.0],      [4096.0, 4097.0, 4098.0, 4099.0, 4100.0, 4101.0, 4102.0],      [4103.0, 4104.0, 4105.0, 4106.0, 4107.0, 4108.0, 4109.0],      [4110.0, 4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0]],     [[4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0, 4123.0],      [4124.0, 4125.0, 4126.0, 4127.0, 4128.0, 4129.0, 4130.0],      [4131.0, 4132.0, 4133.0, 4134.0, 4135.0, 4136.0, 4137.0],      [4138.0, 4139.0, 4140.0, 4141.0, 4142.0, 4143.0, 4144.0],      [4145.0, 4146.0, 4147.0, 4148.0, 4149.0, 4150.0, 4151.0],      [4152.0, 4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0]]]],   [[[[4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0, 4165.0],      [4166.0, 4167.0, 4168.0, 4169.0, 4170.0, 4171.0, 4172.0],      [4173.0, 4174.0, 4175.0, 4176.0, 4177.0, 4178.0, 4179.0],      [4180.0, 4181.0, 4182.0, 4183.0, 4184.0, 4185.0, 4186.0],      [4187.0, 4188.0, 4189.0, 4190.0, 4191.0, 4192.0, 4193.0],      [4194.0, 4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]],     [[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0, 4207.0],      [4208.0, 4209.0, 4210.0, 4211.0, 4212.0, 4213.0, 4214.0],      [4215.0, 4216.0, 4217.0, 4218.0, 4219.0, 4220.0, 4221.0],      [4222.0, 4223.0, 4224.0, 4225.0, 4226.0, 4227.0, 4228.0],      [4229.0, 4230.0, 4231.0, 4232.0, 4233.0, 4234.0, 4235.0],      [4236.0, 4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0]],     [[4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0, 4249.0],      [4250.0, 4251.0, 4252.0, 4253.0, 4254.0, 4255.0, 4256.0],      [4257.0, 4258.0, 4259.0, 4260.0, 4261.0, 4262.0, 4263.0],      [4264.0, 4265.0, 4266.0, 4267.0, 4268.0, 4269.0, 4270.0],      [4271.0, 4272.0, 4273.0, 4274.0, 4275.0, 4276.0, 4277.0],      [4278.0, 4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0]]],    [[[4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0, 4291.0],      [4292.0, 4293.0, 4294.0, 4295.0, 4296.0, 4297.0, 4298.0],      [4299.0, 4300.0, 4301.0, 4302.0, 4303.0, 4304.0, 4305.0],      [4306.0, 4307.0, 4308.0, 4309.0, 4310.0, 4311.0, 4312.0],      [4313.0, 4314.0, 4315.0, 4316.0, 4317.0, 4318.0, 4319.0],      [4320.0, 4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0]],     [[4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0, 4333.0],      [4334.0, 4335.0, 4336.0, 4337.0, 4338.0, 4339.0, 4340.0],      [4341.0, 4342.0, 4343.0, 4344.0, 4345.0, 4346.0, 4347.0],      [4348.0, 4349.0, 4350.0, 4351.0, 4352.0, 4353.0, 4354.0],      [4355.0, 4356.0, 4357.0, 4358.0, 4359.0, 4360.0, 4361.0],      [4362.0, 4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0]],     [[4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0, 4375.0],      [4376.0, 4377.0, 4378.0, 4379.0, 4380.0, 4381.0, 4382.0],      [4383.0, 4384.0, 4385.0, 4386.0, 4387.0, 4388.0, 4389.0],      [4390.0, 4391.0, 4392.0, 4393.0, 4394.0, 4395.0, 4396.0],      [4397.0, 4398.0, 4399.0, 4400.0, 4401.0, 4402.0, 4403.0],      [4404.0, 4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]]],    [[[4411.0, 4412.0, 4413.0, 4414.0, 4415.0, 4416.0, 4417.0],      [4418.0, 4419.0, 4420.0, 4421.0, 4422.0, 4423.0, 4424.0],      [4425.0, 4426.0, 4427.0, 4428.0, 4429.0, 4430.0, 4431.0],      [4432.0, 4433.0, 4434.0, 4435.0, 4436.0, 4437.0, 4438.0],      [4439.0, 4440.0, 4441.0, 4442.0, 4443.0, 4444.0, 4445.0],      [4446.0, 4447.0, 4448.0, 4449.0, 4450.0, 4451.0, 4452.0]],     [[4453.0, 4454.0, 4455.0, 4456.0, 4457.0, 4458.0, 4459.0],      [4460.0, 4461.0, 4462.0, 4463.0, 4464.0, 4465.0, 4466.0],      [4467.0, 4468.0, 4469.0, 4470.0, 4471.0, 4472.0, 4473.0],      [4474.0, 4475.0, 4476.0, 4477.0, 4478.0, 4479.0, 4480.0],      [4481.0, 4482.0, 4483.0, 4484.0, 4485.0, 4486.0, 4487.0],      [4488.0, 4489.0, 4490.0, 4491.0, 4492.0, 4493.0, 4494.0]],     [[4495.0, 4496.0, 4497.0, 4498.0, 4499.0, 4500.0, 4501.0],      [4502.0, 4503.0, 4504.0, 4505.0, 4506.0, 4507.0, 4508.0],      [4509.0, 4510.0, 4511.0, 4512.0, 4513.0, 4514.0, 4515.0],      [4516.0, 4517.0, 4518.0, 4519.0, 4520.0, 4521.0, 4522.0],      [4523.0, 4524.0, 4525.0, 4526.0, 4527.0, 4528.0, 4529.0],      [4530.0, 4531.0, 4532.0, 4533.0, 4534.0, 4535.0, 4536.0]]]],   [[[[4537.0, 4538.0, 4539.0, 4540.0, 4541.0, 4542.0, 4543.0],      [4544.0, 4545.0, 4546.0, 4547.0, 4548.0, 4549.0, 4550.0],      [4551.0, 4552.0, 4553.0, 4554.0, 4555.0, 4556.0, 4557.0],      [4558.0, 4559.0, 4560.0, 4561.0, 4562.0, 4563.0, 4564.0],      [4565.0, 4566.0, 4567.0, 4568.0, 4569.0, 4570.0, 4571.0],      [4572.0, 4573.0, 4574.0, 4575.0, 4576.0, 4577.0, 4578.0]],     [[4579.0, 4580.0, 4581.0, 4582.0, 4583.0, 4584.0, 4585.0],      [4586.0, 4587.0, 4588.0, 4589.0, 4590.0, 4591.0, 4592.0],      [4593.0, 4594.0, 4595.0, 4596.0, 4597.0, 4598.0, 4599.0],      [4600.0, 4601.0, 4602.0, 4603.0, 4604.0, 4605.0, 4606.0],      [4607.0, 4608.0, 4609.0, 4610.0, 4611.0, 4612.0, 4613.0],      [4614.0, 4615.0, 4616.0, 4617.0, 4618.0, 4619.0, 4620.0]],     [[4621.0, 4622.0, 4623.0, 4624.0, 4625.0, 4626.0, 4627.0],      [4628.0, 4629.0, 4630.0, 4631.0, 4632.0, 4633.0, 4634.0],      [4635.0, 4636.0, 4637.0, 4638.0, 4639.0, 4640.0, 4641.0],      [4642.0, 4643.0, 4644.0, 4645.0, 4646.0, 4647.0, 4648.0],      [4649.0, 4650.0, 4651.0, 4652.0, 4653.0, 4654.0, 4655.0],      [4656.0, 4657.0, 4658.0, 4659.0, 4660.0, 4661.0, 4662.0]]],    [[[4663.0, 4664.0, 4665.0, 4666.0, 4667.0, 4668.0, 4669.0],      [4670.0, 4671.0, 4672.0, 4673.0, 4674.0, 4675.0, 4676.0],      [4677.0, 4678.0, 4679.0, 4680.0, 4681.0, 4682.0, 4683.0],      [4684.0, 4685.0, 4686.0, 4687.0, 4688.0, 4689.0, 4690.0],      [4691.0, 4692.0, 4693.0, 4694.0, 4695.0, 4696.0, 4697.0],      [4698.0, 4699.0, 4700.0, 4701.0, 4702.0, 4703.0, 4704.0]],     [[4705.0, 4706.0, 4707.0, 4708.0, 4709.0, 4710.0, 4711.0],      [4712.0, 4713.0, 4714.0, 4715.0, 4716.0, 4717.0, 4718.0],      [4719.0, 4720.0, 4721.0, 4722.0, 4723.0, 4724.0, 4725.0],      [4726.0, 4727.0, 4728.0, 4729.0, 4730.0, 4731.0, 4732.0],      [4733.0, 4734.0, 4735.0, 4736.0, 4737.0, 4738.0, 4739.0],      [4740.0, 4741.0, 4742.0, 4743.0, 4744.0, 4745.0, 4746.0]],     [[4747.0, 4748.0, 4749.0, 4750.0, 4751.0, 4752.0, 4753.0],      [4754.0, 4755.0, 4756.0, 4757.0, 4758.0, 4759.0, 4760.0],      [4761.0, 4762.0, 4763.0, 4764.0, 4765.0, 4766.0, 4767.0],      [4768.0, 4769.0, 4770.0, 4771.0, 4772.0, 4773.0, 4774.0],      [4775.0, 4776.0, 4777.0, 4778.0, 4779.0, 4780.0, 4781.0],      [4782.0, 4783.0, 4784.0, 4785.0, 4786.0, 4787.0, 4788.0]]],    [[[4789.0, 4790.0, 4791.0, 4792.0, 4793.0, 4794.0, 4795.0],      [4796.0, 4797.0, 4798.0, 4799.0, 4800.0, 4801.0, 4802.0],      [4803.0, 4804.0, 4805.0, 4806.0, 4807.0, 4808.0, 4809.0],      [4810.0, 4811.0, 4812.0, 4813.0, 4814.0, 4815.0, 4816.0],      [4817.0, 4818.0, 4819.0, 4820.0, 4821.0, 4822.0, 4823.0],      [4824.0, 4825.0, 4826.0, 4827.0, 4828.0, 4829.0, 4830.0]],     [[4831.0, 4832.0, 4833.0, 4834.0, 4835.0, 4836.0, 4837.0],      [4838.0, 4839.0, 4840.0, 4841.0, 4842.0, 4843.0, 4844.0],      [4845.0, 4846.0, 4847.0, 4848.0, 4849.0, 4850.0, 4851.0],      [4852.0, 4853.0, 4854.0, 4855.0, 4856.0, 4857.0, 4858.0],      [4859.0, 4860.0, 4861.0, 4862.0, 4863.0, 4864.0, 4865.0],      [4866.0, 4867.0, 4868.0, 4869.0, 4870.0, 4871.0, 4872.0]],     [[4873.0, 4874.0, 4875.0, 4876.0, 4877.0, 4878.0, 4879.0],      [4880.0, 4881.0, 4882.0, 4883.0, 4884.0, 4885.0, 4886.0],      [4887.0, 4888.0, 4889.0, 4890.0, 4891.0, 4892.0, 4893.0],      [4894.0, 4895.0, 4896.0, 4897.0, 4898.0, 4899.0, 4900.0],      [4901.0, 4902.0, 4903.0, 4904.0, 4905.0, 4906.0, 4907.0],      [4908.0, 4909.0, 4910.0, 4911.0, 4912.0, 4913.0, 4914.0]]]],   [[[[4915.0, 4916.0, 4917.0, 4918.0, 4919.0, 4920.0, 4921.0],      [4922.0, 4923.0, 4924.0, 4925.0, 4926.0, 4927.0, 4928.0],      [4929.0, 4930.0, 4931.0, 4932.0, 4933.0, 4934.0, 4935.0],      [4936.0, 4937.0, 4938.0, 4939.0, 4940.0, 4941.0, 4942.0],      [4943.0, 4944.0, 4945.0, 4946.0, 4947.0, 4948.0, 4949.0],      [4950.0, 4951.0, 4952.0, 4953.0, 4954.0, 4955.0, 4956.0]],     [[4957.0, 4958.0, 4959.0, 4960.0, 4961.0, 4962.0, 4963.0],      [4964.0, 4965.0, 4966.0, 4967.0, 4968.0, 4969.0, 4970.0],      [4971.0, 4972.0, 4973.0, 4974.0, 4975.0, 4976.0, 4977.0],      [4978.0, 4979.0, 4980.0, 4981.0, 4982.0, 4983.0, 4984.0],      [4985.0, 4986.0, 4987.0, 4988.0, 4989.0, 4990.0, 4991.0],      [4992.0, 4993.0, 4994.0, 4995.0, 4996.0, 4997.0, 4998.0]],     [[4999.0, 5000.0, 5001.0, 5002.0, 5003.0, 5004.0, 5005.0],      [5006.0, 5007.0, 5008.0, 5009.0, 5010.0, 5011.0, 5012.0],      [5013.0, 5014.0, 5015.0, 5016.0, 5017.0, 5018.0, 5019.0],      [5020.0, 5021.0, 5022.0, 5023.0, 5024.0, 5025.0, 5026.0],      [5027.0, 5028.0, 5029.0, 5030.0, 5031.0, 5032.0, 5033.0],      [5034.0, 5035.0, 5036.0, 5037.0, 5038.0, 5039.0, 5040.0]]],    [[[5041.0, 5042.0, 5043.0, 5044.0, 5045.0, 5046.0, 5047.0],      [5048.0, 5049.0, 5050.0, 5051.0, 5052.0, 5053.0, 5054.0],      [5055.0, 5056.0, 5057.0, 5058.0, 5059.0, 5060.0, 5061.0],      [5062.0, 5063.0, 5064.0, 5065.0, 5066.0, 5067.0, 5068.0],      [5069.0, 5070.0, 5071.0, 5072.0, 5073.0, 5074.0, 5075.0],      [5076.0, 5077.0, 5078.0, 5079.0, 5080.0, 5081.0, 5082.0]],     [[5083.0, 5084.0, 5085.0, 5086.0, 5087.0, 5088.0, 5089.0],      [5090.0, 5091.0, 5092.0, 5093.0, 5094.0, 5095.0, 5096.0],      [5097.0, 5098.0, 5099.0, 5100.0, 5101.0, 5102.0, 5103.0],      [5104.0, 5105.0, 5106.0, 5107.0, 5108.0, 5109.0, 5110.0],      [5111.0, 5112.0, 5113.0, 5114.0, 5115.0, 5116.0, 5117.0],      [5118.0, 5119.0, 5120.0, 5121.0, 5122.0, 5123.0, 5124.0]],     [[5125.0, 5126.0, 5127.0, 5128.0, 5129.0, 5130.0, 5131.0],      [5132.0, 5133.0, 5134.0, 5135.0, 5136.0, 5137.0, 5138.0],      [5139.0, 5140.0, 5141.0, 5142.0, 5143.0, 5144.0, 5145.0],      [5146.0, 5147.0, 5148.0, 5149.0, 5150.0, 5151.0, 5152.0],      [5153.0, 5154.0, 5155.0, 5156.0, 5157.0, 5158.0, 5159.0],      [5160.0, 5161.0, 5162.0, 5163.0, 5164.0, 5165.0, 5166.0]]],    [[[5167.0, 5168.0, 5169.0, 5170.0, 5171.0, 5172.0, 5173.0],      [5174.0, 5175.0, 5176.0, 5177.0, 5178.0, 5179.0, 5180.0],      [5181.0, 5182.0, 5183.0, 5184.0, 5185.0, 5186.0, 5187.0],      [5188.0, 5189.0, 5190.0, 5191.0, 5192.0, 5193.0, 5194.0],      [5195.0, 5196.0, 5197.0, 5198.0, 5199.0, 5200.0, 5201.0],      [5202.0, 5203.0, 5204.0, 5205.0, 5206.0, 5207.0, 5208.0]],     [[5209.0, 5210.0, 5211.0, 5212.0, 5213.0, 5214.0, 5215.0],      [5216.0, 5217.0, 5218.0, 5219.0, 5220.0, 5221.0, 5222.0],      [5223.0, 5224.0, 5225.0, 5226.0, 5227.0, 5228.0, 5229.0],      [5230.0, 5231.0, 5232.0, 5233.0, 5234.0, 5235.0, 5236.0],      [5237.0, 5238.0, 5239.0, 5240.0, 5241.0, 5242.0, 5243.0],      [5244.0, 5245.0, 5246.0, 5247.0, 5248.0, 5249.0, 5250.0]],     [[5251.0, 5252.0, 5253.0, 5254.0, 5255.0, 5256.0, 5257.0],      [5258.0, 5259.0, 5260.0, 5261.0, 5262.0, 5263.0, 5264.0],      [5265.0, 5266.0, 5267.0, 5268.0, 5269.0, 5270.0, 5271.0],      [5272.0, 5273.0, 5274.0, 5275.0, 5276.0, 5277.0, 5278.0],      [5279.0, 5280.0, 5281.0, 5282.0, 5283.0, 5284.0, 5285.0],      [5286.0, 5287.0, 5288.0, 5289.0, 5290.0, 5291.0, 5292.0]]]],   [[[[5293.0, 5294.0, 5295.0, 5296.0, 5297.0, 5298.0, 5299.0],      [5300.0, 5301.0, 5302.0, 5303.0, 5304.0, 5305.0, 5306.0],      [5307.0, 5308.0, 5309.0, 5310.0, 5311.0, 5312.0, 5313.0],      [5314.0, 5315.0, 5316.0, 5317.0, 5318.0, 5319.0, 5320.0],      [5321.0, 5322.0, 5323.0, 5324.0, 5325.0, 5326.0, 5327.0],      [5328.0, 5329.0, 5330.0, 5331.0, 5332.0, 5333.0, 5334.0]],     [[5335.0, 5336.0, 5337.0, 5338.0, 5339.0, 5340.0, 5341.0],      [5342.0, 5343.0, 5344.0, 5345.0, 5346.0, 5347.0, 5348.0],      [5349.0, 5350.0, 5351.0, 5352.0, 5353.0, 5354.0, 5355.0],      [5356.0, 5357.0, 5358.0, 5359.0, 5360.0, 5361.0, 5362.0],      [5363.0, 5364.0, 5365.0, 5366.0, 5367.0, 5368.0, 5369.0],      [5370.0, 5371.0, 5372.0, 5373.0, 5374.0, 5375.0, 5376.0]],     [[5377.0, 5378.0, 5379.0, 5380.0, 5381.0, 5382.0, 5383.0],      [5384.0, 5385.0, 5386.0, 5387.0, 5388.0, 5389.0, 5390.0],      [5391.0, 5392.0, 5393.0, 5394.0, 5395.0, 5396.0, 5397.0],      [5398.0, 5399.0, 5400.0, 5401.0, 5402.0, 5403.0, 5404.0],      [5405.0, 5406.0, 5407.0, 5408.0, 5409.0, 5410.0, 5411.0],      [5412.0, 5413.0, 5414.0, 5415.0, 5416.0, 5417.0, 5418.0]]],    [[[5419.0, 5420.0, 5421.0, 5422.0, 5423.0, 5424.0, 5425.0],      [5426.0, 5427.0, 5428.0, 5429.0, 5430.0, 5431.0, 5432.0],      [5433.0, 5434.0, 5435.0, 5436.0, 5437.0, 5438.0, 5439.0],      [5440.0, 5441.0, 5442.0, 5443.0, 5444.0, 5445.0, 5446.0],      [5447.0, 5448.0, 5449.0, 5450.0, 5451.0, 5452.0, 5453.0],      [5454.0, 5455.0, 5456.0, 5457.0, 5458.0, 5459.0, 5460.0]],     [[5461.0, 5462.0, 5463.0, 5464.0, 5465.0, 5466.0, 5467.0],      [5468.0, 5469.0, 5470.0, 5471.0, 5472.0, 5473.0, 5474.0],      [5475.0, 5476.0, 5477.0, 5478.0, 5479.0, 5480.0, 5481.0],      [5482.0, 5483.0, 5484.0, 5485.0, 5486.0, 5487.0, 5488.0],      [5489.0, 5490.0, 5491.0, 5492.0, 5493.0, 5494.0, 5495.0],      [5496.0, 5497.0, 5498.0, 5499.0, 5500.0, 5501.0, 5502.0]],     [[5503.0, 5504.0, 5505.0, 5506.0, 5507.0, 5508.0, 5509.0],      [5510.0, 5511.0, 5512.0, 5513.0, 5514.0, 5515.0, 5516.0],      [5517.0, 5518.0, 5519.0, 5520.0, 5521.0, 5522.0, 5523.0],      [5524.0, 5525.0, 5526.0, 5527.0, 5528.0, 5529.0, 5530.0],      [5531.0, 5532.0, 5533.0, 5534.0, 5535.0, 5536.0, 5537.0],      [5538.0, 5539.0, 5540.0, 5541.0, 5542.0, 5543.0, 5544.0]]],    [[[5545.0, 5546.0, 5547.0, 5548.0, 5549.0, 5550.0, 5551.0],      [5552.0, 5553.0, 5554.0, 5555.0, 5556.0, 5557.0, 5558.0],      [5559.0, 5560.0, 5561.0, 5562.0, 5563.0, 5564.0, 5565.0],      [5566.0, 5567.0, 5568.0, 5569.0, 5570.0, 5571.0, 5572.0],      [5573.0, 5574.0, 5575.0, 5576.0, 5577.0, 5578.0, 5579.0],      [5580.0, 5581.0, 5582.0, 5583.0, 5584.0, 5585.0, 5586.0]],     [[5587.0, 5588.0, 5589.0, 5590.0, 5591.0, 5592.0, 5593.0],      [5594.0, 5595.0, 5596.0, 5597.0, 5598.0, 5599.0, 5600.0],      [5601.0, 5602.0, 5603.0, 5604.0, 5605.0, 5606.0, 5607.0],      [5608.0, 5609.0, 5610.0, 5611.0, 5612.0, 5613.0, 5614.0],      [5615.0, 5616.0, 5617.0, 5618.0, 5619.0, 5620.0, 5621.0],      [5622.0, 5623.0, 5624.0, 5625.0, 5626.0, 5627.0, 5628.0]],     [[5629.0, 5630.0, 5631.0, 5632.0, 5633.0, 5634.0, 5635.0],      [5636.0, 5637.0, 5638.0, 5639.0, 5640.0, 5641.0, 5642.0],      [5643.0, 5644.0, 5645.0, 5646.0, 5647.0, 5648.0, 5649.0],      [5650.0, 5651.0, 5652.0, 5653.0, 5654.0, 5655.0, 5656.0],      [5657.0, 5658.0, 5659.0, 5660.0, 5661.0, 5662.0, 5663.0],      [5664.0, 5665.0, 5666.0, 5667.0, 5668.0, 5669.0, 5670.0]]]]]] shape=[3, 5, 3, 3, 6, 7], strides=[1890, 378, 126, 42, 7, 1], layout=C (0x1)), I32([3, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [2, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 3199994800 3258041763 205857558 1701255492 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0],     [3.0, 4.0],     [5.0, 6.0]],    [[7.0, 8.0],     [9.0, 10.0],     [11.0, 12.0]],    [[13.0, 14.0],     [15.0, 16.0],     [17.0, 18.0]],    [[19.0, 20.0],     [21.0, 22.0],     [23.0, 24.0]],    [[25.0, 26.0],     [27.0, 28.0],     [29.0, 30.0]],    [[31.0, 32.0],     [33.0, 34.0],     [35.0, 36.0]]],   [[[37.0, 38.0],     [39.0, 40.0],     [41.0, 42.0]],    [[43.0, 44.0],     [45.0, 46.0],     [47.0, 48.0]],    [[49.0, 50.0],     [51.0, 52.0],     [53.0, 54.0]],    [[55.0, 56.0],     [57.0, 58.0],     [59.0, 60.0]],    [[61.0, 62.0],     [63.0, 64.0],     [65.0, 66.0]],    [[67.0, 68.0],     [69.0, 70.0],     [71.0, 72.0]]],   [[[73.0, 74.0],     [75.0, 76.0],     [77.0, 78.0]],    [[79.0, 80.0],     [81.0, 82.0],     [83.0, 84.0]],    [[85.0, 86.0],     [87.0, 88.0],     [89.0, 90.0]],    [[91.0, 92.0],     [93.0, 94.0],     [95.0, 96.0]],    [[97.0, 98.0],     [99.0, 100.0],     [101.0, 102.0]],    [[103.0, 104.0],     [105.0, 106.0],     [107.0, 108.0]]],   [[[109.0, 110.0],     [111.0, 112.0],     [113.0, 114.0]],    [[115.0, 116.0],     [117.0, 118.0],     [119.0, 120.0]],    [[121.0, 122.0],     [123.0, 124.0],     [125.0, 126.0]],    [[127.0, 128.0],     [129.0, 130.0],     [131.0, 132.0]],    [[133.0, 134.0],     [135.0, 136.0],     [137.0, 138.0]],    [[139.0, 140.0],     [141.0, 142.0],     [143.0, 144.0]]],   [[[145.0, 146.0],     [147.0, 148.0],     [149.0, 150.0]],    [[151.0, 152.0],     [153.0, 154.0],     [155.0, 156.0]],    [[157.0, 158.0],     [159.0, 160.0],     [161.0, 162.0]],    [[163.0, 164.0],     [165.0, 166.0],     [167.0, 168.0]],    [[169.0, 170.0],     [171.0, 172.0],     [173.0, 174.0]],    [[175.0, 176.0],     [177.0, 178.0],     [179.0, 180.0]]],   [[[181.0, 182.0],     [183.0, 184.0],     [185.0, 186.0]],    [[187.0, 188.0],     [189.0, 190.0],     [191.0, 192.0]],    [[193.0, 194.0],     [195.0, 196.0],     [197.0, 198.0]],    [[199.0, 200.0],     [201.0, 202.0],     [203.0, 204.0]],    [[205.0, 206.0],     [207.0, 208.0],     [209.0, 210.0]],    [[211.0, 212.0],     [213.0, 214.0],     [215.0, 216.0]]],   [[[217.0, 218.0],     [219.0, 220.0],     [221.0, 222.0]],    [[223.0, 224.0],     [225.0, 226.0],     [227.0, 228.0]],    [[229.0, 230.0],     [231.0, 232.0],     [233.0, 234.0]],    [[235.0, 236.0],     [237.0, 238.0],     [239.0, 240.0]],    [[241.0, 242.0],     [243.0, 244.0],     [245.0, 246.0]],    [[247.0, 248.0],     [249.0, 250.0],     [251.0, 252.0]]]],  [[[[253.0, 254.0],     [255.0, 256.0],     [257.0, 258.0]],    [[259.0, 260.0],     [261.0, 262.0],     [263.0, 264.0]],    [[265.0, 266.0],     [267.0, 268.0],     [269.0, 270.0]],    [[271.0, 272.0],     [273.0, 274.0],     [275.0, 276.0]],    [[277.0, 278.0],     [279.0, 280.0],     [281.0, 282.0]],    [[283.0, 284.0],     [285.0, 286.0],     [287.0, 288.0]]],   [[[289.0, 290.0],     [291.0, 292.0],     [293.0, 294.0]],    [[295.0, 296.0],     [297.0, 298.0],     [299.0, 300.0]],    [[301.0, 302.0],     [303.0, 304.0],     [305.0, 306.0]],    [[307.0, 308.0],     [309.0, 310.0],     [311.0, 312.0]],    [[313.0, 314.0],     [315.0, 316.0],     [317.0, 318.0]],    [[319.0, 320.0],     [321.0, 322.0],     [323.0, 324.0]]],   [[[325.0, 326.0],     [327.0, 328.0],     [329.0, 330.0]],    [[331.0, 332.0],     [333.0, 334.0],     [335.0, 336.0]],    [[337.0, 338.0],     [339.0, 340.0],     [341.0, 342.0]],    [[343.0, 344.0],     [345.0, 346.0],     [347.0, 348.0]],    [[349.0, 350.0],     [351.0, 352.0],     [353.0, 354.0]],    [[355.0, 356.0],     [357.0, 358.0],     [359.0, 360.0]]],   [[[361.0, 362.0],     [363.0, 364.0],     [365.0, 366.0]],    [[367.0, 368.0],     [369.0, 370.0],     [371.0, 372.0]],    [[373.0, 374.0],     [375.0, 376.0],     [377.0, 378.0]],    [[379.0, 380.0],     [381.0, 382.0],     [383.0, 384.0]],    [[385.0, 386.0],     [387.0, 388.0],     [389.0, 390.0]],    [[391.0, 392.0],     [393.0, 394.0],     [395.0, 396.0]]],   [[[397.0, 398.0],     [399.0, 400.0],     [401.0, 402.0]],    [[403.0, 404.0],     [405.0, 406.0],     [407.0, 408.0]],    [[409.0, 410.0],     [411.0, 412.0],     [413.0, 414.0]],    [[415.0, 416.0],     [417.0, 418.0],     [419.0, 420.0]],    [[421.0, 422.0],     [423.0, 424.0],     [425.0, 426.0]],    [[427.0, 428.0],     [429.0, 430.0],     [431.0, 432.0]]],   [[[433.0, 434.0],     [435.0, 436.0],     [437.0, 438.0]],    [[439.0, 440.0],     [441.0, 442.0],     [443.0, 444.0]],    [[445.0, 446.0],     [447.0, 448.0],     [449.0, 450.0]],    [[451.0, 452.0],     [453.0, 454.0],     [455.0, 456.0]],    [[457.0, 458.0],     [459.0, 460.0],     [461.0, 462.0]],    [[463.0, 464.0],     [465.0, 466.0],     [467.0, 468.0]]],   [[[469.0, 470.0],     [471.0, 472.0],     [473.0, 474.0]],    [[475.0, 476.0],     [477.0, 478.0],     [479.0, 480.0]],    [[481.0, 482.0],     [483.0, 484.0],     [485.0, 486.0]],    [[487.0, 488.0],     [489.0, 490.0],     [491.0, 492.0]],    [[493.0, 494.0],     [495.0, 496.0],     [497.0, 498.0]],    [[499.0, 500.0],     [501.0, 502.0],     [503.0, 504.0]]]]] shape=[2, 7, 6, 3, 2], strides=[252, 36, 6, 2, 1], layout=C (0x1)), I32([2, 3, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [3, 3],  [0, 3]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 3945896962 1778438073 3860423294 3447446989 # shrinks to (ref i, ref bs, ref p) = (F32([[[[1.0, 2.0, 3.0, 4.0, 5.0],    [6.0, 7.0, 8.0, 9.0, 10.0],    [11.0, 12.0, 13.0, 14.0, 15.0],    [16.0, 17.0, 18.0, 19.0, 20.0],    [21.0, 22.0, 23.0, 24.0, 25.0]],   [[26.0, 27.0, 28.0, 29.0, 30.0],    [31.0, 32.0, 33.0, 34.0, 35.0],    [36.0, 37.0, 38.0, 39.0, 40.0],    [41.0, 42.0, 43.0, 44.0, 45.0],    [46.0, 47.0, 48.0, 49.0, 50.0]],   [[51.0, 52.0, 53.0, 54.0, 55.0],    [56.0, 57.0, 58.0, 59.0, 60.0],    [61.0, 62.0, 63.0, 64.0, 65.0],    [66.0, 67.0, 68.0, 69.0, 70.0],    [71.0, 72.0, 73.0, 74.0, 75.0]]],  [[[76.0, 77.0, 78.0, 79.0, 80.0],    [81.0, 82.0, 83.0, 84.0, 85.0],    [86.0, 87.0, 88.0, 89.0, 90.0],    [91.0, 92.0, 93.0, 94.0, 95.0],    [96.0, 97.0, 98.0, 99.0, 100.0]],   [[101.0, 102.0, 103.0, 104.0, 105.0],    [106.0, 107.0, 108.0, 109.0, 110.0],    [111.0, 112.0, 113.0, 114.0, 115.0],    [116.0, 117.0, 118.0, 119.0, 120.0],    [121.0, 122.0, 123.0, 124.0, 125.0]],   [[126.0, 127.0, 128.0, 129.0, 130.0],    [131.0, 132.0, 133.0, 134.0, 135.0],    [136.0, 137.0, 138.0, 139.0, 140.0],    [141.0, 142.0, 143.0, 144.0, 145.0],    [146.0, 147.0, 148.0, 149.0, 150.0]]],  [[[151.0, 152.0, 153.0, 154.0, 155.0],    [156.0, 157.0, 158.0, 159.0, 160.0],    [161.0, 162.0, 163.0, 164.0, 165.0],    [166.0, 167.0, 168.0, 169.0, 170.0],    [171.0, 172.0, 173.0, 174.0, 175.0]],   [[176.0, 177.0, 178.0, 179.0, 180.0],    [181.0, 182.0, 183.0, 184.0, 185.0],    [186.0, 187.0, 188.0, 189.0, 190.0],    [191.0, 192.0, 193.0, 194.0, 195.0],    [196.0, 197.0, 198.0, 199.0, 200.0]],   [[201.0, 202.0, 203.0, 204.0, 205.0],    [206.0, 207.0, 208.0, 209.0, 210.0],    [211.0, 212.0, 213.0, 214.0, 215.0],    [216.0, 217.0, 218.0, 219.0, 220.0],    [221.0, 222.0, 223.0, 224.0, 225.0]]]] shape=[3, 3, 5, 5], strides=[75, 25, 5, 1], layout=C (0x1)), I32([3, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[0, 3],  [0, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 1830892940 1323159383 1761980141 1935468957 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0],       [3.0, 4.0]],      [[5.0, 6.0],       [7.0, 8.0]]],     [[[9.0, 10.0],       [11.0, 12.0]],      [[13.0, 14.0],       [15.0, 16.0]]],     [[[17.0, 18.0],       [19.0, 20.0]],      [[21.0, 22.0],       [23.0, 24.0]]],     [[[25.0, 26.0],       [27.0, 28.0]],      [[29.0, 30.0],       [31.0, 32.0]]],     [[[33.0, 34.0],       [35.0, 36.0]],      [[37.0, 38.0],       [39.0, 40.0]]],     [[[41.0, 42.0],       [43.0, 44.0]],      [[45.0, 46.0],       [47.0, 48.0]]],     [[[49.0, 50.0],       [51.0, 52.0]],      [[53.0, 54.0],       [55.0, 56.0]]]],    [[[[57.0, 58.0],       [59.0, 60.0]],      [[61.0, 62.0],       [63.0, 64.0]]],     [[[65.0, 66.0],       [67.0, 68.0]],      [[69.0, 70.0],       [71.0, 72.0]]],     [[[73.0, 74.0],       [75.0, 76.0]],      [[77.0, 78.0],       [79.0, 80.0]]],     [[[81.0, 82.0],       [83.0, 84.0]],      [[85.0, 86.0],       [87.0, 88.0]]],     [[[89.0, 90.0],       [91.0, 92.0]],      [[93.0, 94.0],       [95.0, 96.0]]],     [[[97.0, 98.0],       [99.0, 100.0]],      [[101.0, 102.0],       [103.0, 104.0]]],     [[[105.0, 106.0],       [107.0, 108.0]],      [[109.0, 110.0],       [111.0, 112.0]]]],    [[[[113.0, 114.0],       [115.0, 116.0]],      [[117.0, 118.0],       [119.0, 120.0]]],     [[[121.0, 122.0],       [123.0, 124.0]],      [[125.0, 126.0],       [127.0, 128.0]]],     [[[129.0, 130.0],       [131.0, 132.0]],      [[133.0, 134.0],       [135.0, 136.0]]],     [[[137.0, 138.0],       [139.0, 140.0]],      [[141.0, 142.0],       [143.0, 144.0]]],     [[[145.0, 146.0],       [147.0, 148.0]],      [[149.0, 150.0],       [151.0, 152.0]]],     [[[153.0, 154.0],       [155.0, 156.0]],      [[157.0, 158.0],       [159.0, 160.0]]],     [[[161.0, 162.0],       [163.0, 164.0]],      [[165.0, 166.0],       [167.0, 168.0]]]],    [[[[169.0, 170.0],       [171.0, 172.0]],      [[173.0, 174.0],       [175.0, 176.0]]],     [[[177.0, 178.0],       [179.0, 180.0]],      [[181.0, 182.0],       [183.0, 184.0]]],     [[[185.0, 186.0],       [187.0, 188.0]],      [[189.0, 190.0],       [191.0, 192.0]]],     [[[193.0, 194.0],       [195.0, 196.0]],      [[197.0, 198.0],       [199.0, 200.0]]],     [[[201.0, 202.0],       [203.0, 204.0]],      [[205.0, 206.0],       [207.0, 208.0]]],     [[[209.0, 210.0],       [211.0, 212.0]],      [[213.0, 214.0],       [215.0, 216.0]]],     [[[217.0, 218.0],       [219.0, 220.0]],      [[221.0, 222.0],       [223.0, 224.0]]]],    [[[[225.0, 226.0],       [227.0, 228.0]],      [[229.0, 230.0],       [231.0, 232.0]]],     [[[233.0, 234.0],       [235.0, 236.0]],      [[237.0, 238.0],       [239.0, 240.0]]],     [[[241.0, 242.0],       [243.0, 244.0]],      [[245.0, 246.0],       [247.0, 248.0]]],     [[[249.0, 250.0],       [251.0, 252.0]],      [[253.0, 254.0],       [255.0, 256.0]]],     [[[257.0, 258.0],       [259.0, 260.0]],      [[261.0, 262.0],       [263.0, 264.0]]],     [[[265.0, 266.0],       [267.0, 268.0]],      [[269.0, 270.0],       [271.0, 272.0]]],     [[[273.0, 274.0],       [275.0, 276.0]],      [[277.0, 278.0],       [279.0, 280.0]]]],    [[[[281.0, 282.0],       [283.0, 284.0]],      [[285.0, 286.0],       [287.0, 288.0]]],     [[[289.0, 290.0],       [291.0, 292.0]],      [[293.0, 294.0],       [295.0, 296.0]]],     [[[297.0, 298.0],       [299.0, 300.0]],      [[301.0, 302.0],       [303.0, 304.0]]],     [[[305.0, 306.0],       [307.0, 308.0]],      [[309.0, 310.0],       [311.0, 312.0]]],     [[[313.0, 314.0],       [315.0, 316.0]],      [[317.0, 318.0],       [319.0, 320.0]]],     [[[321.0, 322.0],       [323.0, 324.0]],      [[325.0, 326.0],       [327.0, 328.0]]],     [[[329.0, 330.0],       [331.0, 332.0]],      [[333.0, 334.0],       [335.0, 336.0]]]]],   [[[[[337.0, 338.0],       [339.0, 340.0]],      [[341.0, 342.0],       [343.0, 344.0]]],     [[[345.0, 346.0],       [347.0, 348.0]],      [[349.0, 350.0],       [351.0, 352.0]]],     [[[353.0, 354.0],       [355.0, 356.0]],      [[357.0, 358.0],       [359.0, 360.0]]],     [[[361.0, 362.0],       [363.0, 364.0]],      [[365.0, 366.0],       [367.0, 368.0]]],     [[[369.0, 370.0],       [371.0, 372.0]],      [[373.0, 374.0],       [375.0, 376.0]]],     [[[377.0, 378.0],       [379.0, 380.0]],      [[381.0, 382.0],       [383.0, 384.0]]],     [[[385.0, 386.0],       [387.0, 388.0]],      [[389.0, 390.0],       [391.0, 392.0]]]],    [[[[393.0, 394.0],       [395.0, 396.0]],      [[397.0, 398.0],       [399.0, 400.0]]],     [[[401.0, 402.0],       [403.0, 404.0]],      [[405.0, 406.0],       [407.0, 408.0]]],     [[[409.0, 410.0],       [411.0, 412.0]],      [[413.0, 414.0],       [415.0, 416.0]]],     [[[417.0, 418.0],       [419.0, 420.0]],      [[421.0, 422.0],       [423.0, 424.0]]],     [[[425.0, 426.0],       [427.0, 428.0]],      [[429.0, 430.0],       [431.0, 432.0]]],     [[[433.0, 434.0],       [435.0, 436.0]],      [[437.0, 438.0],       [439.0, 440.0]]],     [[[441.0, 442.0],       [443.0, 444.0]],      [[445.0, 446.0],       [447.0, 448.0]]]],    [[[[449.0, 450.0],       [451.0, 452.0]],      [[453.0, 454.0],       [455.0, 456.0]]],     [[[457.0, 458.0],       [459.0, 460.0]],      [[461.0, 462.0],       [463.0, 464.0]]],     [[[465.0, 466.0],       [467.0, 468.0]],      [[469.0, 470.0],       [471.0, 472.0]]],     [[[473.0, 474.0],       [475.0, 476.0]],      [[477.0, 478.0],       [479.0, 480.0]]],     [[[481.0, 482.0],       [483.0, 484.0]],      [[485.0, 486.0],       [487.0, 488.0]]],     [[[489.0, 490.0],       [491.0, 492.0]],      [[493.0, 494.0],       [495.0, 496.0]]],     [[[497.0, 498.0],       [499.0, 500.0]],      [[501.0, 502.0],       [503.0, 504.0]]]],    [[[[505.0, 506.0],       [507.0, 508.0]],      [[509.0, 510.0],       [511.0, 512.0]]],     [[[513.0, 514.0],       [515.0, 516.0]],      [[517.0, 518.0],       [519.0, 520.0]]],     [[[521.0, 522.0],       [523.0, 524.0]],      [[525.0, 526.0],       [527.0, 528.0]]],     [[[529.0, 530.0],       [531.0, 532.0]],      [[533.0, 534.0],       [535.0, 536.0]]],     [[[537.0, 538.0],       [539.0, 540.0]],      [[541.0, 542.0],       [543.0, 544.0]]],     [[[545.0, 546.0],       [547.0, 548.0]],      [[549.0, 550.0],       [551.0, 552.0]]],     [[[553.0, 554.0],       [555.0, 556.0]],      [[557.0, 558.0],       [559.0, 560.0]]]],    [[[[561.0, 562.0],       [563.0, 564.0]],      [[565.0, 566.0],       [567.0, 568.0]]],     [[[569.0, 570.0],       [571.0, 572.0]],      [[573.0, 574.0],       [575.0, 576.0]]],     [[[577.0, 578.0],       [579.0, 580.0]],      [[581.0, 582.0],       [583.0, 584.0]]],     [[[585.0, 586.0],       [587.0, 588.0]],      [[589.0, 590.0],       [591.0, 592.0]]],     [[[593.0, 594.0],       [595.0, 596.0]],      [[597.0, 598.0],       [599.0, 600.0]]],     [[[601.0, 602.0],       [603.0, 604.0]],      [[605.0, 606.0],       [607.0, 608.0]]],     [[[609.0, 610.0],       [611.0, 612.0]],      [[613.0, 614.0],       [615.0, 616.0]]]],    [[[[617.0, 618.0],       [619.0, 620.0]],      [[621.0, 622.0],       [623.0, 624.0]]],     [[[625.0, 626.0],       [627.0, 628.0]],      [[629.0, 630.0],       [631.0, 632.0]]],     [[[633.0, 634.0],       [635.0, 636.0]],      [[637.0, 638.0],       [639.0, 640.0]]],     [[[641.0, 642.0],       [643.0, 644.0]],      [[645.0, 646.0],       [647.0, 648.0]]],     [[[649.0, 650.0],       [651.0, 652.0]],      [[653.0, 654.0],       [655.0, 656.0]]],     [[[657.0, 658.0],       [659.0, 660.0]],      [[661.0, 662.0],       [663.0, 664.0]]],     [[[665.0, 666.0],       [667.0, 668.0]],      [[669.0, 670.0],       [671.0, 672.0]]]]]]] shape=[1, 2, 6, 7, 2, 2, 2], strides=[672, 336, 56, 8, 4, 2, 1], layout=C (0x1)), I32([2, 3, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[2, 2],  [0, 3],  [1, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 603688422 2356550593 511821555 3646878586 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0],     [6.0, 7.0, 8.0, 9.0, 10.0]],    [[11.0, 12.0, 13.0, 14.0, 15.0],     [16.0, 17.0, 18.0, 19.0, 20.0]],    [[21.0, 22.0, 23.0, 24.0, 25.0],     [26.0, 27.0, 28.0, 29.0, 30.0]]],   [[[31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0]],    [[41.0, 42.0, 43.0, 44.0, 45.0],     [46.0, 47.0, 48.0, 49.0, 50.0]],    [[51.0, 52.0, 53.0, 54.0, 55.0],     [56.0, 57.0, 58.0, 59.0, 60.0]]],   [[[61.0, 62.0, 63.0, 64.0, 65.0],     [66.0, 67.0, 68.0, 69.0, 70.0]],    [[71.0, 72.0, 73.0, 74.0, 75.0],     [76.0, 77.0, 78.0, 79.0, 80.0]],    [[81.0, 82.0, 83.0, 84.0, 85.0],     [86.0, 87.0, 88.0, 89.0, 90.0]]],   [[[91.0, 92.0, 93.0, 94.0, 95.0],     [96.0, 97.0, 98.0, 99.0, 100.0]],    [[101.0, 102.0, 103.0, 104.0, 105.0],     [106.0, 107.0, 108.0, 109.0, 110.0]],    [[111.0, 112.0, 113.0, 114.0, 115.0],     [116.0, 117.0, 118.0, 119.0, 120.0]]]],  [[[[121.0, 122.0, 123.0, 124.0, 125.0],     [126.0, 127.0, 128.0, 129.0, 130.0]],    [[131.0, 132.0, 133.0, 134.0, 135.0],     [136.0, 137.0, 138.0, 139.0, 140.0]],    [[141.0, 142.0, 143.0, 144.0, 145.0],     [146.0, 147.0, 148.0, 149.0, 150.0]]],   [[[151.0, 152.0, 153.0, 154.0, 155.0],     [156.0, 157.0, 158.0, 159.0, 160.0]],    [[161.0, 162.0, 163.0, 164.0, 165.0],     [166.0, 167.0, 168.0, 169.0, 170.0]],    [[171.0, 172.0, 173.0, 174.0, 175.0],     [176.0, 177.0, 178.0, 179.0, 180.0]]],   [[[181.0, 182.0, 183.0, 184.0, 185.0],     [186.0, 187.0, 188.0, 189.0, 190.0]],    [[191.0, 192.0, 193.0, 194.0, 195.0],     [196.0, 197.0, 198.0, 199.0, 200.0]],    [[201.0, 202.0, 203.0, 204.0, 205.0],     [206.0, 207.0, 208.0, 209.0, 210.0]]],   [[[211.0, 212.0, 213.0, 214.0, 215.0],     [216.0, 217.0, 218.0, 219.0, 220.0]],    [[221.0, 222.0, 223.0, 224.0, 225.0],     [226.0, 227.0, 228.0, 229.0, 230.0]],    [[231.0, 232.0, 233.0, 234.0, 235.0],     [236.0, 237.0, 238.0, 239.0, 240.0]]]],  [[[[241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0]],    [[251.0, 252.0, 253.0, 254.0, 255.0],     [256.0, 257.0, 258.0, 259.0, 260.0]],    [[261.0, 262.0, 263.0, 264.0, 265.0],     [266.0, 267.0, 268.0, 269.0, 270.0]]],   [[[271.0, 272.0, 273.0, 274.0, 275.0],     [276.0, 277.0, 278.0, 279.0, 280.0]],    [[281.0, 282.0, 283.0, 284.0, 285.0],     [286.0, 287.0, 288.0, 289.0, 290.0]],    [[291.0, 292.0, 293.0, 294.0, 295.0],     [296.0, 297.0, 298.0, 299.0, 300.0]]],   [[[301.0, 302.0, 303.0, 304.0, 305.0],     [306.0, 307.0, 308.0, 309.0, 310.0]],    [[311.0, 312.0, 313.0, 314.0, 315.0],     [316.0, 317.0, 318.0, 319.0, 320.0]],    [[321.0, 322.0, 323.0, 324.0, 325.0],     [326.0, 327.0, 328.0, 329.0, 330.0]]],   [[[331.0, 332.0, 333.0, 334.0, 335.0],     [336.0, 337.0, 338.0, 339.0, 340.0]],    [[341.0, 342.0, 343.0, 344.0, 345.0],     [346.0, 347.0, 348.0, 349.0, 350.0]],    [[351.0, 352.0, 353.0, 354.0, 355.0],     [356.0, 357.0, 358.0, 359.0, 360.0]]]]] shape=[3, 4, 3, 2, 5], strides=[120, 30, 10, 5, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[1, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 4093654868 4061763967 881279373 2911816562 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0],       [3.0, 4.0],       [5.0, 6.0],       [7.0, 8.0]],      [[9.0, 10.0],       [11.0, 12.0],       [13.0, 14.0],       [15.0, 16.0]],      [[17.0, 18.0],       [19.0, 20.0],       [21.0, 22.0],       [23.0, 24.0]]],     [[[25.0, 26.0],       [27.0, 28.0],       [29.0, 30.0],       [31.0, 32.0]],      [[33.0, 34.0],       [35.0, 36.0],       [37.0, 38.0],       [39.0, 40.0]],      [[41.0, 42.0],       [43.0, 44.0],       [45.0, 46.0],       [47.0, 48.0]]],     [[[49.0, 50.0],       [51.0, 52.0],       [53.0, 54.0],       [55.0, 56.0]],      [[57.0, 58.0],       [59.0, 60.0],       [61.0, 62.0],       [63.0, 64.0]],      [[65.0, 66.0],       [67.0, 68.0],       [69.0, 70.0],       [71.0, 72.0]]],     [[[73.0, 74.0],       [75.0, 76.0],       [77.0, 78.0],       [79.0, 80.0]],      [[81.0, 82.0],       [83.0, 84.0],       [85.0, 86.0],       [87.0, 88.0]],      [[89.0, 90.0],       [91.0, 92.0],       [93.0, 94.0],       [95.0, 96.0]]],     [[[97.0, 98.0],       [99.0, 100.0],       [101.0, 102.0],       [103.0, 104.0]],      [[105.0, 106.0],       [107.0, 108.0],       [109.0, 110.0],       [111.0, 112.0]],      [[113.0, 114.0],       [115.0, 116.0],       [117.0, 118.0],       [119.0, 120.0]]]],    [[[[121.0, 122.0],       [123.0, 124.0],       [125.0, 126.0],       [127.0, 128.0]],      [[129.0, 130.0],       [131.0, 132.0],       [133.0, 134.0],       [135.0, 136.0]],      [[137.0, 138.0],       [139.0, 140.0],       [141.0, 142.0],       [143.0, 144.0]]],     [[[145.0, 146.0],       [147.0, 148.0],       [149.0, 150.0],       [151.0, 152.0]],      [[153.0, 154.0],       [155.0, 156.0],       [157.0, 158.0],       [159.0, 160.0]],      [[161.0, 162.0],       [163.0, 164.0],       [165.0, 166.0],       [167.0, 168.0]]],     [[[169.0, 170.0],       [171.0, 172.0],       [173.0, 174.0],       [175.0, 176.0]],      [[177.0, 178.0],       [179.0, 180.0],       [181.0, 182.0],       [183.0, 184.0]],      [[185.0, 186.0],       [187.0, 188.0],       [189.0, 190.0],       [191.0, 192.0]]],     [[[193.0, 194.0],       [195.0, 196.0],       [197.0, 198.0],       [199.0, 200.0]],      [[201.0, 202.0],       [203.0, 204.0],       [205.0, 206.0],       [207.0, 208.0]],      [[209.0, 210.0],       [211.0, 212.0],       [213.0, 214.0],       [215.0, 216.0]]],     [[[217.0, 218.0],       [219.0, 220.0],       [221.0, 222.0],       [223.0, 224.0]],      [[225.0, 226.0],       [227.0, 228.0],       [229.0, 230.0],       [231.0, 232.0]],      [[233.0, 234.0],       [235.0, 236.0],       [237.0, 238.0],       [239.0, 240.0]]]],    [[[[241.0, 242.0],       [243.0, 244.0],       [245.0, 246.0],       [247.0, 248.0]],      [[249.0, 250.0],       [251.0, 252.0],       [253.0, 254.0],       [255.0, 256.0]],      [[257.0, 258.0],       [259.0, 260.0],       [261.0, 262.0],       [263.0, 264.0]]],     [[[265.0, 266.0],       [267.0, 268.0],       [269.0, 270.0],       [271.0, 272.0]],      [[273.0, 274.0],       [275.0, 276.0],       [277.0, 278.0],       [279.0, 280.0]],      [[281.0, 282.0],       [283.0, 284.0],       [285.0, 286.0],       [287.0, 288.0]]],     [[[289.0, 290.0],       [291.0, 292.0],       [293.0, 294.0],       [295.0, 296.0]],      [[297.0, 298.0],       [299.0, 300.0],       [301.0, 302.0],       [303.0, 304.0]],      [[305.0, 306.0],       [307.0, 308.0],       [309.0, 310.0],       [311.0, 312.0]]],     [[[313.0, 314.0],       [315.0, 316.0],       [317.0, 318.0],       [319.0, 320.0]],      [[321.0, 322.0],       [323.0, 324.0],       [325.0, 326.0],       [327.0, 328.0]],      [[329.0, 330.0],       [331.0, 332.0],       [333.0, 334.0],       [335.0, 336.0]]],     [[[337.0, 338.0],       [339.0, 340.0],       [341.0, 342.0],       [343.0, 344.0]],      [[345.0, 346.0],       [347.0, 348.0],       [349.0, 350.0],       [351.0, 352.0]],      [[353.0, 354.0],       [355.0, 356.0],       [357.0, 358.0],       [359.0, 360.0]]]]],   [[[[[361.0, 362.0],       [363.0, 364.0],       [365.0, 366.0],       [367.0, 368.0]],      [[369.0, 370.0],       [371.0, 372.0],       [373.0, 374.0],       [375.0, 376.0]],      [[377.0, 378.0],       [379.0, 380.0],       [381.0, 382.0],       [383.0, 384.0]]],     [[[385.0, 386.0],       [387.0, 388.0],       [389.0, 390.0],       [391.0, 392.0]],      [[393.0, 394.0],       [395.0, 396.0],       [397.0, 398.0],       [399.0, 400.0]],      [[401.0, 402.0],       [403.0, 404.0],       [405.0, 406.0],       [407.0, 408.0]]],     [[[409.0, 410.0],       [411.0, 412.0],       [413.0, 414.0],       [415.0, 416.0]],      [[417.0, 418.0],       [419.0, 420.0],       [421.0, 422.0],       [423.0, 424.0]],      [[425.0, 426.0],       [427.0, 428.0],       [429.0, 430.0],       [431.0, 432.0]]],     [[[433.0, 434.0],       [435.0, 436.0],       [437.0, 438.0],       [439.0, 440.0]],      [[441.0, 442.0],       [443.0, 444.0],       [445.0, 446.0],       [447.0, 448.0]],      [[449.0, 450.0],       [451.0, 452.0],       [453.0, 454.0],       [455.0, 456.0]]],     [[[457.0, 458.0],       [459.0, 460.0],       [461.0, 462.0],       [463.0, 464.0]],      [[465.0, 466.0],       [467.0, 468.0],       [469.0, 470.0],       [471.0, 472.0]],      [[473.0, 474.0],       [475.0, 476.0],       [477.0, 478.0],       [479.0, 480.0]]]],    [[[[481.0, 482.0],       [483.0, 484.0],       [485.0, 486.0],       [487.0, 488.0]],      [[489.0, 490.0],       [491.0, 492.0],       [493.0, 494.0],       [495.0, 496.0]],      [[497.0, 498.0],       [499.0, 500.0],       [501.0, 502.0],       [503.0, 504.0]]],     [[[505.0, 506.0],       [507.0, 508.0],       [509.0, 510.0],       [511.0, 512.0]],      [[513.0, 514.0],       [515.0, 516.0],       [517.0, 518.0],       [519.0, 520.0]],      [[521.0, 522.0],       [523.0, 524.0],       [525.0, 526.0],       [527.0, 528.0]]],     [[[529.0, 530.0],       [531.0, 532.0],       [533.0, 534.0],       [535.0, 536.0]],      [[537.0, 538.0],       [539.0, 540.0],       [541.0, 542.0],       [543.0, 544.0]],      [[545.0, 546.0],       [547.0, 548.0],       [549.0, 550.0],       [551.0, 552.0]]],     [[[553.0, 554.0],       [555.0, 556.0],       [557.0, 558.0],       [559.0, 560.0]],      [[561.0, 562.0],       [563.0, 564.0],       [565.0, 566.0],       [567.0, 568.0]],      [[569.0, 570.0],       [571.0, 572.0],       [573.0, 574.0],       [575.0, 576.0]]],     [[[577.0, 578.0],       [579.0, 580.0],       [581.0, 582.0],       [583.0, 584.0]],      [[585.0, 586.0],       [587.0, 588.0],       [589.0, 590.0],       [591.0, 592.0]],      [[593.0, 594.0],       [595.0, 596.0],       [597.0, 598.0],       [599.0, 600.0]]]],    [[[[601.0, 602.0],       [603.0, 604.0],       [605.0, 606.0],       [607.0, 608.0]],      [[609.0, 610.0],       [611.0, 612.0],       [613.0, 614.0],       [615.0, 616.0]],      [[617.0, 618.0],       [619.0, 620.0],       [621.0, 622.0],       [623.0, 624.0]]],     [[[625.0, 626.0],       [627.0, 628.0],       [629.0, 630.0],       [631.0, 632.0]],      [[633.0, 634.0],       [635.0, 636.0],       [637.0, 638.0],       [639.0, 640.0]],      [[641.0, 642.0],       [643.0, 644.0],       [645.0, 646.0],       [647.0, 648.0]]],     [[[649.0, 650.0],       [651.0, 652.0],       [653.0, 654.0],       [655.0, 656.0]],      [[657.0, 658.0],       [659.0, 660.0],       [661.0, 662.0],       [663.0, 664.0]],      [[665.0, 666.0],       [667.0, 668.0],       [669.0, 670.0],       [671.0, 672.0]]],     [[[673.0, 674.0],       [675.0, 676.0],       [677.0, 678.0],       [679.0, 680.0]],      [[681.0, 682.0],       [683.0, 684.0],       [685.0, 686.0],       [687.0, 688.0]],      [[689.0, 690.0],       [691.0, 692.0],       [693.0, 694.0],       [695.0, 696.0]]],     [[[697.0, 698.0],       [699.0, 700.0],       [701.0, 702.0],       [703.0, 704.0]],      [[705.0, 706.0],       [707.0, 708.0],       [709.0, 710.0],       [711.0, 712.0]],      [[713.0, 714.0],       [715.0, 716.0],       [717.0, 718.0],       [719.0, 720.0]]]]],   [[[[[721.0, 722.0],       [723.0, 724.0],       [725.0, 726.0],       [727.0, 728.0]],      [[729.0, 730.0],       [731.0, 732.0],       [733.0, 734.0],       [735.0, 736.0]],      [[737.0, 738.0],       [739.0, 740.0],       [741.0, 742.0],       [743.0, 744.0]]],     [[[745.0, 746.0],       [747.0, 748.0],       [749.0, 750.0],       [751.0, 752.0]],      [[753.0, 754.0],       [755.0, 756.0],       [757.0, 758.0],       [759.0, 760.0]],      [[761.0, 762.0],       [763.0, 764.0],       [765.0, 766.0],       [767.0, 768.0]]],     [[[769.0, 770.0],       [771.0, 772.0],       [773.0, 774.0],       [775.0, 776.0]],      [[777.0, 778.0],       [779.0, 780.0],       [781.0, 782.0],       [783.0, 784.0]],      [[785.0, 786.0],       [787.0, 788.0],       [789.0, 790.0],       [791.0, 792.0]]],     [[[793.0, 794.0],       [795.0, 796.0],       [797.0, 798.0],       [799.0, 800.0]],      [[801.0, 802.0],       [803.0, 804.0],       [805.0, 806.0],       [807.0, 808.0]],      [[809.0, 810.0],       [811.0, 812.0],       [813.0, 814.0],       [815.0, 816.0]]],     [[[817.0, 818.0],       [819.0, 820.0],       [821.0, 822.0],       [823.0, 824.0]],      [[825.0, 826.0],       [827.0, 828.0],       [829.0, 830.0],       [831.0, 832.0]],      [[833.0, 834.0],       [835.0, 836.0],       [837.0, 838.0],       [839.0, 840.0]]]],    [[[[841.0, 842.0],       [843.0, 844.0],       [845.0, 846.0],       [847.0, 848.0]],      [[849.0, 850.0],       [851.0, 852.0],       [853.0, 854.0],       [855.0, 856.0]],      [[857.0, 858.0],       [859.0, 860.0],       [861.0, 862.0],       [863.0, 864.0]]],     [[[865.0, 866.0],       [867.0, 868.0],       [869.0, 870.0],       [871.0, 872.0]],      [[873.0, 874.0],       [875.0, 876.0],       [877.0, 878.0],       [879.0, 880.0]],      [[881.0, 882.0],       [883.0, 884.0],       [885.0, 886.0],       [887.0, 888.0]]],     [[[889.0, 890.0],       [891.0, 892.0],       [893.0, 894.0],       [895.0, 896.0]],      [[897.0, 898.0],       [899.0, 900.0],       [901.0, 902.0],       [903.0, 904.0]],      [[905.0, 906.0],       [907.0, 908.0],       [909.0, 910.0],       [911.0, 912.0]]],     [[[913.0, 914.0],       [915.0, 916.0],       [917.0, 918.0],       [919.0, 920.0]],      [[921.0, 922.0],       [923.0, 924.0],       [925.0, 926.0],       [927.0, 928.0]],      [[929.0, 930.0],       [931.0, 932.0],       [933.0, 934.0],       [935.0, 936.0]]],     [[[937.0, 938.0],       [939.0, 940.0],       [941.0, 942.0],       [943.0, 944.0]],      [[945.0, 946.0],       [947.0, 948.0],       [949.0, 950.0],       [951.0, 952.0]],      [[953.0, 954.0],       [955.0, 956.0],       [957.0, 958.0],       [959.0, 960.0]]]],    [[[[961.0, 962.0],       [963.0, 964.0],       [965.0, 966.0],       [967.0, 968.0]],      [[969.0, 970.0],       [971.0, 972.0],       [973.0, 974.0],       [975.0, 976.0]],      [[977.0, 978.0],       [979.0, 980.0],       [981.0, 982.0],       [983.0, 984.0]]],     [[[985.0, 986.0],       [987.0, 988.0],       [989.0, 990.0],       [991.0, 992.0]],      [[993.0, 994.0],       [995.0, 996.0],       [997.0, 998.0],       [999.0, 1000.0]],      [[1001.0, 1002.0],       [1003.0, 1004.0],       [1005.0, 1006.0],       [1007.0, 1008.0]]],     [[[1009.0, 1010.0],       [1011.0, 1012.0],       [1013.0, 1014.0],       [1015.0, 1016.0]],      [[1017.0, 1018.0],       [1019.0, 1020.0],       [1021.0, 1022.0],       [1023.0, 1024.0]],      [[1025.0, 1026.0],       [1027.0, 1028.0],       [1029.0, 1030.0],       [1031.0, 1032.0]]],     [[[1033.0, 1034.0],       [1035.0, 1036.0],       [1037.0, 1038.0],       [1039.0, 1040.0]],      [[1041.0, 1042.0],       [1043.0, 1044.0],       [1045.0, 1046.0],       [1047.0, 1048.0]],      [[1049.0, 1050.0],       [1051.0, 1052.0],       [1053.0, 1054.0],       [1055.0, 1056.0]]],     [[[1057.0, 1058.0],       [1059.0, 1060.0],       [1061.0, 1062.0],       [1063.0, 1064.0]],      [[1065.0, 1066.0],       [1067.0, 1068.0],       [1069.0, 1070.0],       [1071.0, 1072.0]],      [[1073.0, 1074.0],       [1075.0, 1076.0],       [1077.0, 1078.0],       [1079.0, 1080.0]]]]],   [[[[[1081.0, 1082.0],       [1083.0, 1084.0],       [1085.0, 1086.0],       [1087.0, 1088.0]],      [[1089.0, 1090.0],       [1091.0, 1092.0],       [1093.0, 1094.0],       [1095.0, 1096.0]],      [[1097.0, 1098.0],       [1099.0, 1100.0],       [1101.0, 1102.0],       [1103.0, 1104.0]]],     [[[1105.0, 1106.0],       [1107.0, 1108.0],       [1109.0, 1110.0],       [1111.0, 1112.0]],      [[1113.0, 1114.0],       [1115.0, 1116.0],       [1117.0, 1118.0],       [1119.0, 1120.0]],      [[1121.0, 1122.0],       [1123.0, 1124.0],       [1125.0, 1126.0],       [1127.0, 1128.0]]],     [[[1129.0, 1130.0],       [1131.0, 1132.0],       [1133.0, 1134.0],       [1135.0, 1136.0]],      [[1137.0, 1138.0],       [1139.0, 1140.0],       [1141.0, 1142.0],       [1143.0, 1144.0]],      [[1145.0, 1146.0],       [1147.0, 1148.0],       [1149.0, 1150.0],       [1151.0, 1152.0]]],     [[[1153.0, 1154.0],       [1155.0, 1156.0],       [1157.0, 1158.0],       [1159.0, 1160.0]],      [[1161.0, 1162.0],       [1163.0, 1164.0],       [1165.0, 1166.0],       [1167.0, 1168.0]],      [[1169.0, 1170.0],       [1171.0, 1172.0],       [1173.0, 1174.0],       [1175.0, 1176.0]]],     [[[1177.0, 1178.0],       [1179.0, 1180.0],       [1181.0, 1182.0],       [1183.0, 1184.0]],      [[1185.0, 1186.0],       [1187.0, 1188.0],       [1189.0, 1190.0],       [1191.0, 1192.0]],      [[1193.0, 1194.0],       [1195.0, 1196.0],       [1197.0, 1198.0],       [1199.0, 1200.0]]]],    [[[[1201.0, 1202.0],       [1203.0, 1204.0],       [1205.0, 1206.0],       [1207.0, 1208.0]],      [[1209.0, 1210.0],       [1211.0, 1212.0],       [1213.0, 1214.0],       [1215.0, 1216.0]],      [[1217.0, 1218.0],       [1219.0, 1220.0],       [1221.0, 1222.0],       [1223.0, 1224.0]]],     [[[1225.0, 1226.0],       [1227.0, 1228.0],       [1229.0, 1230.0],       [1231.0, 1232.0]],      [[1233.0, 1234.0],       [1235.0, 1236.0],       [1237.0, 1238.0],       [1239.0, 1240.0]],      [[1241.0, 1242.0],       [1243.0, 1244.0],       [1245.0, 1246.0],       [1247.0, 1248.0]]],     [[[1249.0, 1250.0],       [1251.0, 1252.0],       [1253.0, 1254.0],       [1255.0, 1256.0]],      [[1257.0, 1258.0],       [1259.0, 1260.0],       [1261.0, 1262.0],       [1263.0, 1264.0]],      [[1265.0, 1266.0],       [1267.0, 1268.0],       [1269.0, 1270.0],       [1271.0, 1272.0]]],     [[[1273.0, 1274.0],       [1275.0, 1276.0],       [1277.0, 1278.0],       [1279.0, 1280.0]],      [[1281.0, 1282.0],       [1283.0, 1284.0],       [1285.0, 1286.0],       [1287.0, 1288.0]],      [[1289.0, 1290.0],       [1291.0, 1292.0],       [1293.0, 1294.0],       [1295.0, 1296.0]]],     [[[1297.0, 1298.0],       [1299.0, 1300.0],       [1301.0, 1302.0],       [1303.0, 1304.0]],      [[1305.0, 1306.0],       [1307.0, 1308.0],       [1309.0, 1310.0],       [1311.0, 1312.0]],      [[1313.0, 1314.0],       [1315.0, 1316.0],       [1317.0, 1318.0],       [1319.0, 1320.0]]]],    [[[[1321.0, 1322.0],       [1323.0, 1324.0],       [1325.0, 1326.0],       [1327.0, 1328.0]],      [[1329.0, 1330.0],       [1331.0, 1332.0],       [1333.0, 1334.0],       [1335.0, 1336.0]],      [[1337.0, 1338.0],       [1339.0, 1340.0],       [1341.0, 1342.0],       [1343.0, 1344.0]]],     [[[1345.0, 1346.0],       [1347.0, 1348.0],       [1349.0, 1350.0],       [1351.0, 1352.0]],      [[1353.0, 1354.0],       [1355.0, 1356.0],       [1357.0, 1358.0],       [1359.0, 1360.0]],      [[1361.0, 1362.0],       [1363.0, 1364.0],       [1365.0, 1366.0],       [1367.0, 1368.0]]],     [[[1369.0, 1370.0],       [1371.0, 1372.0],       [1373.0, 1374.0],       [1375.0, 1376.0]],      [[1377.0, 1378.0],       [1379.0, 1380.0],       [1381.0, 1382.0],       [1383.0, 1384.0]],      [[1385.0, 1386.0],       [1387.0, 1388.0],       [1389.0, 1390.0],       [1391.0, 1392.0]]],     [[[1393.0, 1394.0],       [1395.0, 1396.0],       [1397.0, 1398.0],       [1399.0, 1400.0]],      [[1401.0, 1402.0],       [1403.0, 1404.0],       [1405.0, 1406.0],       [1407.0, 1408.0]],      [[1409.0, 1410.0],       [1411.0, 1412.0],       [1413.0, 1414.0],       [1415.0, 1416.0]]],     [[[1417.0, 1418.0],       [1419.0, 1420.0],       [1421.0, 1422.0],       [1423.0, 1424.0]],      [[1425.0, 1426.0],       [1427.0, 1428.0],       [1429.0, 1430.0],       [1431.0, 1432.0]],      [[1433.0, 1434.0],       [1435.0, 1436.0],       [1437.0, 1438.0],       [1439.0, 1440.0]]]]],   [[[[[1441.0, 1442.0],       [1443.0, 1444.0],       [1445.0, 1446.0],       [1447.0, 1448.0]],      [[1449.0, 1450.0],       [1451.0, 1452.0],       [1453.0, 1454.0],       [1455.0, 1456.0]],      [[1457.0, 1458.0],       [1459.0, 1460.0],       [1461.0, 1462.0],       [1463.0, 1464.0]]],     [[[1465.0, 1466.0],       [1467.0, 1468.0],       [1469.0, 1470.0],       [1471.0, 1472.0]],      [[1473.0, 1474.0],       [1475.0, 1476.0],       [1477.0, 1478.0],       [1479.0, 1480.0]],      [[1481.0, 1482.0],       [1483.0, 1484.0],       [1485.0, 1486.0],       [1487.0, 1488.0]]],     [[[1489.0, 1490.0],       [1491.0, 1492.0],       [1493.0, 1494.0],       [1495.0, 1496.0]],      [[1497.0, 1498.0],       [1499.0, 1500.0],       [1501.0, 1502.0],       [1503.0, 1504.0]],      [[1505.0, 1506.0],       [1507.0, 1508.0],       [1509.0, 1510.0],       [1511.0, 1512.0]]],     [[[1513.0, 1514.0],       [1515.0, 1516.0],       [1517.0, 1518.0],       [1519.0, 1520.0]],      [[1521.0, 1522.0],       [1523.0, 1524.0],       [1525.0, 1526.0],       [1527.0, 1528.0]],      [[1529.0, 1530.0],       [1531.0, 1532.0],       [1533.0, 1534.0],       [1535.0, 1536.0]]],     [[[1537.0, 1538.0],       [1539.0, 1540.0],       [1541.0, 1542.0],       [1543.0, 1544.0]],      [[1545.0, 1546.0],       [1547.0, 1548.0],       [1549.0, 1550.0],       [1551.0, 1552.0]],      [[1553.0, 1554.0],       [1555.0, 1556.0],       [1557.0, 1558.0],       [1559.0, 1560.0]]]],    [[[[1561.0, 1562.0],       [1563.0, 1564.0],       [1565.0, 1566.0],       [1567.0, 1568.0]],      [[1569.0, 1570.0],       [1571.0, 1572.0],       [1573.0, 1574.0],       [1575.0, 1576.0]],      [[1577.0, 1578.0],       [1579.0, 1580.0],       [1581.0, 1582.0],       [1583.0, 1584.0]]],     [[[1585.0, 1586.0],       [1587.0, 1588.0],       [1589.0, 1590.0],       [1591.0, 1592.0]],      [[1593.0, 1594.0],       [1595.0, 1596.0],       [1597.0, 1598.0],       [1599.0, 1600.0]],      [[1601.0, 1602.0],       [1603.0, 1604.0],       [1605.0, 1606.0],       [1607.0, 1608.0]]],     [[[1609.0, 1610.0],       [1611.0, 1612.0],       [1613.0, 1614.0],       [1615.0, 1616.0]],      [[1617.0, 1618.0],       [1619.0, 1620.0],       [1621.0, 1622.0],       [1623.0, 1624.0]],      [[1625.0, 1626.0],       [1627.0, 1628.0],       [1629.0, 1630.0],       [1631.0, 1632.0]]],     [[[1633.0, 1634.0],       [1635.0, 1636.0],       [1637.0, 1638.0],       [1639.0, 1640.0]],      [[1641.0, 1642.0],       [1643.0, 1644.0],       [1645.0, 1646.0],       [1647.0, 1648.0]],      [[1649.0, 1650.0],       [1651.0, 1652.0],       [1653.0, 1654.0],       [1655.0, 1656.0]]],     [[[1657.0, 1658.0],       [1659.0, 1660.0],       [1661.0, 1662.0],       [1663.0, 1664.0]],      [[1665.0, 1666.0],       [1667.0, 1668.0],       [1669.0, 1670.0],       [1671.0, 1672.0]],      [[1673.0, 1674.0],       [1675.0, 1676.0],       [1677.0, 1678.0],       [1679.0, 1680.0]]]],    [[[[1681.0, 1682.0],       [1683.0, 1684.0],       [1685.0, 1686.0],       [1687.0, 1688.0]],      [[1689.0, 1690.0],       [1691.0, 1692.0],       [1693.0, 1694.0],       [1695.0, 1696.0]],      [[1697.0, 1698.0],       [1699.0, 1700.0],       [1701.0, 1702.0],       [1703.0, 1704.0]]],     [[[1705.0, 1706.0],       [1707.0, 1708.0],       [1709.0, 1710.0],       [1711.0, 1712.0]],      [[1713.0, 1714.0],       [1715.0, 1716.0],       [1717.0, 1718.0],       [1719.0, 1720.0]],      [[1721.0, 1722.0],       [1723.0, 1724.0],       [1725.0, 1726.0],       [1727.0, 1728.0]]],     [[[1729.0, 1730.0],       [1731.0, 1732.0],       [1733.0, 1734.0],       [1735.0, 1736.0]],      [[1737.0, 1738.0],       [1739.0, 1740.0],       [1741.0, 1742.0],       [1743.0, 1744.0]],      [[1745.0, 1746.0],       [1747.0, 1748.0],       [1749.0, 1750.0],       [1751.0, 1752.0]]],     [[[1753.0, 1754.0],       [1755.0, 1756.0],       [1757.0, 1758.0],       [1759.0, 1760.0]],      [[1761.0, 1762.0],       [1763.0, 1764.0],       [1765.0, 1766.0],       [1767.0, 1768.0]],      [[1769.0, 1770.0],       [1771.0, 1772.0],       [1773.0, 1774.0],       [1775.0, 1776.0]]],     [[[1777.0, 1778.0],       [1779.0, 1780.0],       [1781.0, 1782.0],       [1783.0, 1784.0]],      [[1785.0, 1786.0],       [1787.0, 1788.0],       [1789.0, 1790.0],       [1791.0, 1792.0]],      [[1793.0, 1794.0],       [1795.0, 1796.0],       [1797.0, 1798.0],       [1799.0, 1800.0]]]]],   [[[[[1801.0, 1802.0],       [1803.0, 1804.0],       [1805.0, 1806.0],       [1807.0, 1808.0]],      [[1809.0, 1810.0],       [1811.0, 1812.0],       [1813.0, 1814.0],       [1815.0, 1816.0]],      [[1817.0, 1818.0],       [1819.0, 1820.0],       [1821.0, 1822.0],       [1823.0, 1824.0]]],     [[[1825.0, 1826.0],       [1827.0, 1828.0],       [1829.0, 1830.0],       [1831.0, 1832.0]],      [[1833.0, 1834.0],       [1835.0, 1836.0],       [1837.0, 1838.0],       [1839.0, 1840.0]],      [[1841.0, 1842.0],       [1843.0, 1844.0],       [1845.0, 1846.0],       [1847.0, 1848.0]]],     [[[1849.0, 1850.0],       [1851.0, 1852.0],       [1853.0, 1854.0],       [1855.0, 1856.0]],      [[1857.0, 1858.0],       [1859.0, 1860.0],       [1861.0, 1862.0],       [1863.0, 1864.0]],      [[1865.0, 1866.0],       [1867.0, 1868.0],       [1869.0, 1870.0],       [1871.0, 1872.0]]],     [[[1873.0, 1874.0],       [1875.0, 1876.0],       [1877.0, 1878.0],       [1879.0, 1880.0]],      [[1881.0, 1882.0],       [1883.0, 1884.0],       [1885.0, 1886.0],       [1887.0, 1888.0]],      [[1889.0, 1890.0],       [1891.0, 1892.0],       [1893.0, 1894.0],       [1895.0, 1896.0]]],     [[[1897.0, 1898.0],       [1899.0, 1900.0],       [1901.0, 1902.0],       [1903.0, 1904.0]],      [[1905.0, 1906.0],       [1907.0, 1908.0],       [1909.0, 1910.0],       [1911.0, 1912.0]],      [[1913.0, 1914.0],       [1915.0, 1916.0],       [1917.0, 1918.0],       [1919.0, 1920.0]]]],    [[[[1921.0, 1922.0],       [1923.0, 1924.0],       [1925.0, 1926.0],       [1927.0, 1928.0]],      [[1929.0, 1930.0],       [1931.0, 1932.0],       [1933.0, 1934.0],       [1935.0, 1936.0]],      [[1937.0, 1938.0],       [1939.0, 1940.0],       [1941.0, 1942.0],       [1943.0, 1944.0]]],     [[[1945.0, 1946.0],       [1947.0, 1948.0],       [1949.0, 1950.0],       [1951.0, 1952.0]],      [[1953.0, 1954.0],       [1955.0, 1956.0],       [1957.0, 1958.0],       [1959.0, 1960.0]],      [[1961.0, 1962.0],       [1963.0, 1964.0],       [1965.0, 1966.0],       [1967.0, 1968.0]]],     [[[1969.0, 1970.0],       [1971.0, 1972.0],       [1973.0, 1974.0],       [1975.0, 1976.0]],      [[1977.0, 1978.0],       [1979.0, 1980.0],       [1981.0, 1982.0],       [1983.0, 1984.0]],      [[1985.0, 1986.0],       [1987.0, 1988.0],       [1989.0, 1990.0],       [1991.0, 1992.0]]],     [[[1993.0, 1994.0],       [1995.0, 1996.0],       [1997.0, 1998.0],       [1999.0, 2000.0]],      [[2001.0, 2002.0],       [2003.0, 2004.0],       [2005.0, 2006.0],       [2007.0, 2008.0]],      [[2009.0, 2010.0],       [2011.0, 2012.0],       [2013.0, 2014.0],       [2015.0, 2016.0]]],     [[[2017.0, 2018.0],       [2019.0, 2020.0],       [2021.0, 2022.0],       [2023.0, 2024.0]],      [[2025.0, 2026.0],       [2027.0, 2028.0],       [2029.0, 2030.0],       [2031.0, 2032.0]],      [[2033.0, 2034.0],       [2035.0, 2036.0],       [2037.0, 2038.0],       [2039.0, 2040.0]]]],    [[[[2041.0, 2042.0],       [2043.0, 2044.0],       [2045.0, 2046.0],       [2047.0, 2048.0]],      [[2049.0, 2050.0],       [2051.0, 2052.0],       [2053.0, 2054.0],       [2055.0, 2056.0]],      [[2057.0, 2058.0],       [2059.0, 2060.0],       [2061.0, 2062.0],       [2063.0, 2064.0]]],     [[[2065.0, 2066.0],       [2067.0, 2068.0],       [2069.0, 2070.0],       [2071.0, 2072.0]],      [[2073.0, 2074.0],       [2075.0, 2076.0],       [2077.0, 2078.0],       [2079.0, 2080.0]],      [[2081.0, 2082.0],       [2083.0, 2084.0],       [2085.0, 2086.0],       [2087.0, 2088.0]]],     [[[2089.0, 2090.0],       [2091.0, 2092.0],       [2093.0, 2094.0],       [2095.0, 2096.0]],      [[2097.0, 2098.0],       [2099.0, 2100.0],       [2101.0, 2102.0],       [2103.0, 2104.0]],      [[2105.0, 2106.0],       [2107.0, 2108.0],       [2109.0, 2110.0],       [2111.0, 2112.0]]],     [[[2113.0, 2114.0],       [2115.0, 2116.0],       [2117.0, 2118.0],       [2119.0, 2120.0]],      [[2121.0, 2122.0],       [2123.0, 2124.0],       [2125.0, 2126.0],       [2127.0, 2128.0]],      [[2129.0, 2130.0],       [2131.0, 2132.0],       [2133.0, 2134.0],       [2135.0, 2136.0]]],     [[[2137.0, 2138.0],       [2139.0, 2140.0],       [2141.0, 2142.0],       [2143.0, 2144.0]],      [[2145.0, 2146.0],       [2147.0, 2148.0],       [2149.0, 2150.0],       [2151.0, 2152.0]],      [[2153.0, 2154.0],       [2155.0, 2156.0],       [2157.0, 2158.0],       [2159.0, 2160.0]]]]],   [[[[[2161.0, 2162.0],       [2163.0, 2164.0],       [2165.0, 2166.0],       [2167.0, 2168.0]],      [[2169.0, 2170.0],       [2171.0, 2172.0],       [2173.0, 2174.0],       [2175.0, 2176.0]],      [[2177.0, 2178.0],       [2179.0, 2180.0],       [2181.0, 2182.0],       [2183.0, 2184.0]]],     [[[2185.0, 2186.0],       [2187.0, 2188.0],       [2189.0, 2190.0],       [2191.0, 2192.0]],      [[2193.0, 2194.0],       [2195.0, 2196.0],       [2197.0, 2198.0],       [2199.0, 2200.0]],      [[2201.0, 2202.0],       [2203.0, 2204.0],       [2205.0, 2206.0],       [2207.0, 2208.0]]],     [[[2209.0, 2210.0],       [2211.0, 2212.0],       [2213.0, 2214.0],       [2215.0, 2216.0]],      [[2217.0, 2218.0],       [2219.0, 2220.0],       [2221.0, 2222.0],       [2223.0, 2224.0]],      [[2225.0, 2226.0],       [2227.0, 2228.0],       [2229.0, 2230.0],       [2231.0, 2232.0]]],     [[[2233.0, 2234.0],       [2235.0, 2236.0],       [2237.0, 2238.0],       [2239.0, 2240.0]],      [[2241.0, 2242.0],       [2243.0, 2244.0],       [2245.0, 2246.0],       [2247.0, 2248.0]],      [[2249.0, 2250.0],       [2251.0, 2252.0],       [2253.0, 2254.0],       [2255.0, 2256.0]]],     [[[2257.0, 2258.0],       [2259.0, 2260.0],       [2261.0, 2262.0],       [2263.0, 2264.0]],      [[2265.0, 2266.0],       [2267.0, 2268.0],       [2269.0, 2270.0],       [2271.0, 2272.0]],      [[2273.0, 2274.0],       [2275.0, 2276.0],       [2277.0, 2278.0],       [2279.0, 2280.0]]]],    [[[[2281.0, 2282.0],       [2283.0, 2284.0],       [2285.0, 2286.0],       [2287.0, 2288.0]],      [[2289.0, 2290.0],       [2291.0, 2292.0],       [2293.0, 2294.0],       [2295.0, 2296.0]],      [[2297.0, 2298.0],       [2299.0, 2300.0],       [2301.0, 2302.0],       [2303.0, 2304.0]]],     [[[2305.0, 2306.0],       [2307.0, 2308.0],       [2309.0, 2310.0],       [2311.0, 2312.0]],      [[2313.0, 2314.0],       [2315.0, 2316.0],       [2317.0, 2318.0],       [2319.0, 2320.0]],      [[2321.0, 2322.0],       [2323.0, 2324.0],       [2325.0, 2326.0],       [2327.0, 2328.0]]],     [[[2329.0, 2330.0],       [2331.0, 2332.0],       [2333.0, 2334.0],       [2335.0, 2336.0]],      [[2337.0, 2338.0],       [2339.0, 2340.0],       [2341.0, 2342.0],       [2343.0, 2344.0]],      [[2345.0, 2346.0],       [2347.0, 2348.0],       [2349.0, 2350.0],       [2351.0, 2352.0]]],     [[[2353.0, 2354.0],       [2355.0, 2356.0],       [2357.0, 2358.0],       [2359.0, 2360.0]],      [[2361.0, 2362.0],       [2363.0, 2364.0],       [2365.0, 2366.0],       [2367.0, 2368.0]],      [[2369.0, 2370.0],       [2371.0, 2372.0],       [2373.0, 2374.0],       [2375.0, 2376.0]]],     [[[2377.0, 2378.0],       [2379.0, 2380.0],       [2381.0, 2382.0],       [2383.0, 2384.0]],      [[2385.0, 2386.0],       [2387.0, 2388.0],       [2389.0, 2390.0],       [2391.0, 2392.0]],      [[2393.0, 2394.0],       [2395.0, 2396.0],       [2397.0, 2398.0],       [2399.0, 2400.0]]]],    [[[[2401.0, 2402.0],       [2403.0, 2404.0],       [2405.0, 2406.0],       [2407.0, 2408.0]],      [[2409.0, 2410.0],       [2411.0, 2412.0],       [2413.0, 2414.0],       [2415.0, 2416.0]],      [[2417.0, 2418.0],       [2419.0, 2420.0],       [2421.0, 2422.0],       [2423.0, 2424.0]]],     [[[2425.0, 2426.0],       [2427.0, 2428.0],       [2429.0, 2430.0],       [2431.0, 2432.0]],      [[2433.0, 2434.0],       [2435.0, 2436.0],       [2437.0, 2438.0],       [2439.0, 2440.0]],      [[2441.0, 2442.0],       [2443.0, 2444.0],       [2445.0, 2446.0],       [2447.0, 2448.0]]],     [[[2449.0, 2450.0],       [2451.0, 2452.0],       [2453.0, 2454.0],       [2455.0, 2456.0]],      [[2457.0, 2458.0],       [2459.0, 2460.0],       [2461.0, 2462.0],       [2463.0, 2464.0]],      [[2465.0, 2466.0],       [2467.0, 2468.0],       [2469.0, 2470.0],       [2471.0, 2472.0]]],     [[[2473.0, 2474.0],       [2475.0, 2476.0],       [2477.0, 2478.0],       [2479.0, 2480.0]],      [[2481.0, 2482.0],       [2483.0, 2484.0],       [2485.0, 2486.0],       [2487.0, 2488.0]],      [[2489.0, 2490.0],       [2491.0, 2492.0],       [2493.0, 2494.0],       [2495.0, 2496.0]]],     [[[2497.0, 2498.0],       [2499.0, 2500.0],       [2501.0, 2502.0],       [2503.0, 2504.0]],      [[2505.0, 2506.0],       [2507.0, 2508.0],       [2509.0, 2510.0],       [2511.0, 2512.0]],      [[2513.0, 2514.0],       [2515.0, 2516.0],       [2517.0, 2518.0],       [2519.0, 2520.0]]]]]],  [[[[[[2521.0, 2522.0],       [2523.0, 2524.0],       [2525.0, 2526.0],       [2527.0, 2528.0]],      [[2529.0, 2530.0],       [2531.0, 2532.0],       [2533.0, 2534.0],       [2535.0, 2536.0]],      [[2537.0, 2538.0],       [2539.0, 2540.0],       [2541.0, 2542.0],       [2543.0, 2544.0]]],     [[[2545.0, 2546.0],       [2547.0, 2548.0],       [2549.0, 2550.0],       [2551.0, 2552.0]],      [[2553.0, 2554.0],       [2555.0, 2556.0],       [2557.0, 2558.0],       [2559.0, 2560.0]],      [[2561.0, 2562.0],       [2563.0, 2564.0],       [2565.0, 2566.0],       [2567.0, 2568.0]]],     [[[2569.0, 2570.0],       [2571.0, 2572.0],       [2573.0, 2574.0],       [2575.0, 2576.0]],      [[2577.0, 2578.0],       [2579.0, 2580.0],       [2581.0, 2582.0],       [2583.0, 2584.0]],      [[2585.0, 2586.0],       [2587.0, 2588.0],       [2589.0, 2590.0],       [2591.0, 2592.0]]],     [[[2593.0, 2594.0],       [2595.0, 2596.0],       [2597.0, 2598.0],       [2599.0, 2600.0]],      [[2601.0, 2602.0],       [2603.0, 2604.0],       [2605.0, 2606.0],       [2607.0, 2608.0]],      [[2609.0, 2610.0],       [2611.0, 2612.0],       [2613.0, 2614.0],       [2615.0, 2616.0]]],     [[[2617.0, 2618.0],       [2619.0, 2620.0],       [2621.0, 2622.0],       [2623.0, 2624.0]],      [[2625.0, 2626.0],       [2627.0, 2628.0],       [2629.0, 2630.0],       [2631.0, 2632.0]],      [[2633.0, 2634.0],       [2635.0, 2636.0],       [2637.0, 2638.0],       [2639.0, 2640.0]]]],    [[[[2641.0, 2642.0],       [2643.0, 2644.0],       [2645.0, 2646.0],       [2647.0, 2648.0]],      [[2649.0, 2650.0],       [2651.0, 2652.0],       [2653.0, 2654.0],       [2655.0, 2656.0]],      [[2657.0, 2658.0],       [2659.0, 2660.0],       [2661.0, 2662.0],       [2663.0, 2664.0]]],     [[[2665.0, 2666.0],       [2667.0, 2668.0],       [2669.0, 2670.0],       [2671.0, 2672.0]],      [[2673.0, 2674.0],       [2675.0, 2676.0],       [2677.0, 2678.0],       [2679.0, 2680.0]],      [[2681.0, 2682.0],       [2683.0, 2684.0],       [2685.0, 2686.0],       [2687.0, 2688.0]]],     [[[2689.0, 2690.0],       [2691.0, 2692.0],       [2693.0, 2694.0],       [2695.0, 2696.0]],      [[2697.0, 2698.0],       [2699.0, 2700.0],       [2701.0, 2702.0],       [2703.0, 2704.0]],      [[2705.0, 2706.0],       [2707.0, 2708.0],       [2709.0, 2710.0],       [2711.0, 2712.0]]],     [[[2713.0, 2714.0],       [2715.0, 2716.0],       [2717.0, 2718.0],       [2719.0, 2720.0]],      [[2721.0, 2722.0],       [2723.0, 2724.0],       [2725.0, 2726.0],       [2727.0, 2728.0]],      [[2729.0, 2730.0],       [2731.0, 2732.0],       [2733.0, 2734.0],       [2735.0, 2736.0]]],     [[[2737.0, 2738.0],       [2739.0, 2740.0],       [2741.0, 2742.0],       [2743.0, 2744.0]],      [[2745.0, 2746.0],       [2747.0, 2748.0],       [2749.0, 2750.0],       [2751.0, 2752.0]],      [[2753.0, 2754.0],       [2755.0, 2756.0],       [2757.0, 2758.0],       [2759.0, 2760.0]]]],    [[[[2761.0, 2762.0],       [2763.0, 2764.0],       [2765.0, 2766.0],       [2767.0, 2768.0]],      [[2769.0, 2770.0],       [2771.0, 2772.0],       [2773.0, 2774.0],       [2775.0, 2776.0]],      [[2777.0, 2778.0],       [2779.0, 2780.0],       [2781.0, 2782.0],       [2783.0, 2784.0]]],     [[[2785.0, 2786.0],       [2787.0, 2788.0],       [2789.0, 2790.0],       [2791.0, 2792.0]],      [[2793.0, 2794.0],       [2795.0, 2796.0],       [2797.0, 2798.0],       [2799.0, 2800.0]],      [[2801.0, 2802.0],       [2803.0, 2804.0],       [2805.0, 2806.0],       [2807.0, 2808.0]]],     [[[2809.0, 2810.0],       [2811.0, 2812.0],       [2813.0, 2814.0],       [2815.0, 2816.0]],      [[2817.0, 2818.0],       [2819.0, 2820.0],       [2821.0, 2822.0],       [2823.0, 2824.0]],      [[2825.0, 2826.0],       [2827.0, 2828.0],       [2829.0, 2830.0],       [2831.0, 2832.0]]],     [[[2833.0, 2834.0],       [2835.0, 2836.0],       [2837.0, 2838.0],       [2839.0, 2840.0]],      [[2841.0, 2842.0],       [2843.0, 2844.0],       [2845.0, 2846.0],       [2847.0, 2848.0]],      [[2849.0, 2850.0],       [2851.0, 2852.0],       [2853.0, 2854.0],       [2855.0, 2856.0]]],     [[[2857.0, 2858.0],       [2859.0, 2860.0],       [2861.0, 2862.0],       [2863.0, 2864.0]],      [[2865.0, 2866.0],       [2867.0, 2868.0],       [2869.0, 2870.0],       [2871.0, 2872.0]],      [[2873.0, 2874.0],       [2875.0, 2876.0],       [2877.0, 2878.0],       [2879.0, 2880.0]]]]],   [[[[[2881.0, 2882.0],       [2883.0, 2884.0],       [2885.0, 2886.0],       [2887.0, 2888.0]],      [[2889.0, 2890.0],       [2891.0, 2892.0],       [2893.0, 2894.0],       [2895.0, 2896.0]],      [[2897.0, 2898.0],       [2899.0, 2900.0],       [2901.0, 2902.0],       [2903.0, 2904.0]]],     [[[2905.0, 2906.0],       [2907.0, 2908.0],       [2909.0, 2910.0],       [2911.0, 2912.0]],      [[2913.0, 2914.0],       [2915.0, 2916.0],       [2917.0, 2918.0],       [2919.0, 2920.0]],      [[2921.0, 2922.0],       [2923.0, 2924.0],       [2925.0, 2926.0],       [2927.0, 2928.0]]],     [[[2929.0, 2930.0],       [2931.0, 2932.0],       [2933.0, 2934.0],       [2935.0, 2936.0]],      [[2937.0, 2938.0],       [2939.0, 2940.0],       [2941.0, 2942.0],       [2943.0, 2944.0]],      [[2945.0, 2946.0],       [2947.0, 2948.0],       [2949.0, 2950.0],       [2951.0, 2952.0]]],     [[[2953.0, 2954.0],       [2955.0, 2956.0],       [2957.0, 2958.0],       [2959.0, 2960.0]],      [[2961.0, 2962.0],       [2963.0, 2964.0],       [2965.0, 2966.0],       [2967.0, 2968.0]],      [[2969.0, 2970.0],       [2971.0, 2972.0],       [2973.0, 2974.0],       [2975.0, 2976.0]]],     [[[2977.0, 2978.0],       [2979.0, 2980.0],       [2981.0, 2982.0],       [2983.0, 2984.0]],      [[2985.0, 2986.0],       [2987.0, 2988.0],       [2989.0, 2990.0],       [2991.0, 2992.0]],      [[2993.0, 2994.0],       [2995.0, 2996.0],       [2997.0, 2998.0],       [2999.0, 3000.0]]]],    [[[[3001.0, 3002.0],       [3003.0, 3004.0],       [3005.0, 3006.0],       [3007.0, 3008.0]],      [[3009.0, 3010.0],       [3011.0, 3012.0],       [3013.0, 3014.0],       [3015.0, 3016.0]],      [[3017.0, 3018.0],       [3019.0, 3020.0],       [3021.0, 3022.0],       [3023.0, 3024.0]]],     [[[3025.0, 3026.0],       [3027.0, 3028.0],       [3029.0, 3030.0],       [3031.0, 3032.0]],      [[3033.0, 3034.0],       [3035.0, 3036.0],       [3037.0, 3038.0],       [3039.0, 3040.0]],      [[3041.0, 3042.0],       [3043.0, 3044.0],       [3045.0, 3046.0],       [3047.0, 3048.0]]],     [[[3049.0, 3050.0],       [3051.0, 3052.0],       [3053.0, 3054.0],       [3055.0, 3056.0]],      [[3057.0, 3058.0],       [3059.0, 3060.0],       [3061.0, 3062.0],       [3063.0, 3064.0]],      [[3065.0, 3066.0],       [3067.0, 3068.0],       [3069.0, 3070.0],       [3071.0, 3072.0]]],     [[[3073.0, 3074.0],       [3075.0, 3076.0],       [3077.0, 3078.0],       [3079.0, 3080.0]],      [[3081.0, 3082.0],       [3083.0, 3084.0],       [3085.0, 3086.0],       [3087.0, 3088.0]],      [[3089.0, 3090.0],       [3091.0, 3092.0],       [3093.0, 3094.0],       [3095.0, 3096.0]]],     [[[3097.0, 3098.0],       [3099.0, 3100.0],       [3101.0, 3102.0],       [3103.0, 3104.0]],      [[3105.0, 3106.0],       [3107.0, 3108.0],       [3109.0, 3110.0],       [3111.0, 3112.0]],      [[3113.0, 3114.0],       [3115.0, 3116.0],       [3117.0, 3118.0],       [3119.0, 3120.0]]]],    [[[[3121.0, 3122.0],       [3123.0, 3124.0],       [3125.0, 3126.0],       [3127.0, 3128.0]],      [[3129.0, 3130.0],       [3131.0, 3132.0],       [3133.0, 3134.0],       [3135.0, 3136.0]],      [[3137.0, 3138.0],       [3139.0, 3140.0],       [3141.0, 3142.0],       [3143.0, 3144.0]]],     [[[3145.0, 3146.0],       [3147.0, 3148.0],       [3149.0, 3150.0],       [3151.0, 3152.0]],      [[3153.0, 3154.0],       [3155.0, 3156.0],       [3157.0, 3158.0],       [3159.0, 3160.0]],      [[3161.0, 3162.0],       [3163.0, 3164.0],       [3165.0, 3166.0],       [3167.0, 3168.0]]],     [[[3169.0, 3170.0],       [3171.0, 3172.0],       [3173.0, 3174.0],       [3175.0, 3176.0]],      [[3177.0, 3178.0],       [3179.0, 3180.0],       [3181.0, 3182.0],       [3183.0, 3184.0]],      [[3185.0, 3186.0],       [3187.0, 3188.0],       [3189.0, 3190.0],       [3191.0, 3192.0]]],     [[[3193.0, 3194.0],       [3195.0, 3196.0],       [3197.0, 3198.0],       [3199.0, 3200.0]],      [[3201.0, 3202.0],       [3203.0, 3204.0],       [3205.0, 3206.0],       [3207.0, 3208.0]],      [[3209.0, 3210.0],       [3211.0, 3212.0],       [3213.0, 3214.0],       [3215.0, 3216.0]]],     [[[3217.0, 3218.0],       [3219.0, 3220.0],       [3221.0, 3222.0],       [3223.0, 3224.0]],      [[3225.0, 3226.0],       [3227.0, 3228.0],       [3229.0, 3230.0],       [3231.0, 3232.0]],      [[3233.0, 3234.0],       [3235.0, 3236.0],       [3237.0, 3238.0],       [3239.0, 3240.0]]]]],   [[[[[3241.0, 3242.0],       [3243.0, 3244.0],       [3245.0, 3246.0],       [3247.0, 3248.0]],      [[3249.0, 3250.0],       [3251.0, 3252.0],       [3253.0, 3254.0],       [3255.0, 3256.0]],      [[3257.0, 3258.0],       [3259.0, 3260.0],       [3261.0, 3262.0],       [3263.0, 3264.0]]],     [[[3265.0, 3266.0],       [3267.0, 3268.0],       [3269.0, 3270.0],       [3271.0, 3272.0]],      [[3273.0, 3274.0],       [3275.0, 3276.0],       [3277.0, 3278.0],       [3279.0, 3280.0]],      [[3281.0, 3282.0],       [3283.0, 3284.0],       [3285.0, 3286.0],       [3287.0, 3288.0]]],     [[[3289.0, 3290.0],       [3291.0, 3292.0],       [3293.0, 3294.0],       [3295.0, 3296.0]],      [[3297.0, 3298.0],       [3299.0, 3300.0],       [3301.0, 3302.0],       [3303.0, 3304.0]],      [[3305.0, 3306.0],       [3307.0, 3308.0],       [3309.0, 3310.0],       [3311.0, 3312.0]]],     [[[3313.0, 3314.0],       [3315.0, 3316.0],       [3317.0, 3318.0],       [3319.0, 3320.0]],      [[3321.0, 3322.0],       [3323.0, 3324.0],       [3325.0, 3326.0],       [3327.0, 3328.0]],      [[3329.0, 3330.0],       [3331.0, 3332.0],       [3333.0, 3334.0],       [3335.0, 3336.0]]],     [[[3337.0, 3338.0],       [3339.0, 3340.0],       [3341.0, 3342.0],       [3343.0, 3344.0]],      [[3345.0, 3346.0],       [3347.0, 3348.0],       [3349.0, 3350.0],       [3351.0, 3352.0]],      [[3353.0, 3354.0],       [3355.0, 3356.0],       [3357.0, 3358.0],       [3359.0, 3360.0]]]],    [[[[3361.0, 3362.0],       [3363.0, 3364.0],       [3365.0, 3366.0],       [3367.0, 3368.0]],      [[3369.0, 3370.0],       [3371.0, 3372.0],       [3373.0, 3374.0],       [3375.0, 3376.0]],      [[3377.0, 3378.0],       [3379.0, 3380.0],       [3381.0, 3382.0],       [3383.0, 3384.0]]],     [[[3385.0, 3386.0],       [3387.0, 3388.0],       [3389.0, 3390.0],       [3391.0, 3392.0]],      [[3393.0, 3394.0],       [3395.0, 3396.0],       [3397.0, 3398.0],       [3399.0, 3400.0]],      [[3401.0, 3402.0],       [3403.0, 3404.0],       [3405.0, 3406.0],       [3407.0, 3408.0]]],     [[[3409.0, 3410.0],       [3411.0, 3412.0],       [3413.0, 3414.0],       [3415.0, 3416.0]],      [[3417.0, 3418.0],       [3419.0, 3420.0],       [3421.0, 3422.0],       [3423.0, 3424.0]],      [[3425.0, 3426.0],       [3427.0, 3428.0],       [3429.0, 3430.0],       [3431.0, 3432.0]]],     [[[3433.0, 3434.0],       [3435.0, 3436.0],       [3437.0, 3438.0],       [3439.0, 3440.0]],      [[3441.0, 3442.0],       [3443.0, 3444.0],       [3445.0, 3446.0],       [3447.0, 3448.0]],      [[3449.0, 3450.0],       [3451.0, 3452.0],       [3453.0, 3454.0],       [3455.0, 3456.0]]],     [[[3457.0, 3458.0],       [3459.0, 3460.0],       [3461.0, 3462.0],       [3463.0, 3464.0]],      [[3465.0, 3466.0],       [3467.0, 3468.0],       [3469.0, 3470.0],       [3471.0, 3472.0]],      [[3473.0, 3474.0],       [3475.0, 3476.0],       [3477.0, 3478.0],       [3479.0, 3480.0]]]],    [[[[3481.0, 3482.0],       [3483.0, 3484.0],       [3485.0, 3486.0],       [3487.0, 3488.0]],      [[3489.0, 3490.0],       [3491.0, 3492.0],       [3493.0, 3494.0],       [3495.0, 3496.0]],      [[3497.0, 3498.0],       [3499.0, 3500.0],       [3501.0, 3502.0],       [3503.0, 3504.0]]],     [[[3505.0, 3506.0],       [3507.0, 3508.0],       [3509.0, 3510.0],       [3511.0, 3512.0]],      [[3513.0, 3514.0],       [3515.0, 3516.0],       [3517.0, 3518.0],       [3519.0, 3520.0]],      [[3521.0, 3522.0],       [3523.0, 3524.0],       [3525.0, 3526.0],       [3527.0, 3528.0]]],     [[[3529.0, 3530.0],       [3531.0, 3532.0],       [3533.0, 3534.0],       [3535.0, 3536.0]],      [[3537.0, 3538.0],       [3539.0, 3540.0],       [3541.0, 3542.0],       [3543.0, 3544.0]],      [[3545.0, 3546.0],       [3547.0, 3548.0],       [3549.0, 3550.0],       [3551.0, 3552.0]]],     [[[3553.0, 3554.0],       [3555.0, 3556.0],       [3557.0, 3558.0],       [3559.0, 3560.0]],      [[3561.0, 3562.0],       [3563.0, 3564.0],       [3565.0, 3566.0],       [3567.0, 3568.0]],      [[3569.0, 3570.0],       [3571.0, 3572.0],       [3573.0, 3574.0],       [3575.0, 3576.0]]],     [[[3577.0, 3578.0],       [3579.0, 3580.0],       [3581.0, 3582.0],       [3583.0, 3584.0]],      [[3585.0, 3586.0],       [3587.0, 3588.0],       [3589.0, 3590.0],       [3591.0, 3592.0]],      [[3593.0, 3594.0],       [3595.0, 3596.0],       [3597.0, 3598.0],       [3599.0, 3600.0]]]]],   [[[[[3601.0, 3602.0],       [3603.0, 3604.0],       [3605.0, 3606.0],       [3607.0, 3608.0]],      [[3609.0, 3610.0],       [3611.0, 3612.0],       [3613.0, 3614.0],       [3615.0, 3616.0]],      [[3617.0, 3618.0],       [3619.0, 3620.0],       [3621.0, 3622.0],       [3623.0, 3624.0]]],     [[[3625.0, 3626.0],       [3627.0, 3628.0],       [3629.0, 3630.0],       [3631.0, 3632.0]],      [[3633.0, 3634.0],       [3635.0, 3636.0],       [3637.0, 3638.0],       [3639.0, 3640.0]],      [[3641.0, 3642.0],       [3643.0, 3644.0],       [3645.0, 3646.0],       [3647.0, 3648.0]]],     [[[3649.0, 3650.0],       [3651.0, 3652.0],       [3653.0, 3654.0],       [3655.0, 3656.0]],      [[3657.0, 3658.0],       [3659.0, 3660.0],       [3661.0, 3662.0],       [3663.0, 3664.0]],      [[3665.0, 3666.0],       [3667.0, 3668.0],       [3669.0, 3670.0],       [3671.0, 3672.0]]],     [[[3673.0, 3674.0],       [3675.0, 3676.0],       [3677.0, 3678.0],       [3679.0, 3680.0]],      [[3681.0, 3682.0],       [3683.0, 3684.0],       [3685.0, 3686.0],       [3687.0, 3688.0]],      [[3689.0, 3690.0],       [3691.0, 3692.0],       [3693.0, 3694.0],       [3695.0, 3696.0]]],     [[[3697.0, 3698.0],       [3699.0, 3700.0],       [3701.0, 3702.0],       [3703.0, 3704.0]],      [[3705.0, 3706.0],       [3707.0, 3708.0],       [3709.0, 3710.0],       [3711.0, 3712.0]],      [[3713.0, 3714.0],       [3715.0, 3716.0],       [3717.0, 3718.0],       [3719.0, 3720.0]]]],    [[[[3721.0, 3722.0],       [3723.0, 3724.0],       [3725.0, 3726.0],       [3727.0, 3728.0]],      [[3729.0, 3730.0],       [3731.0, 3732.0],       [3733.0, 3734.0],       [3735.0, 3736.0]],      [[3737.0, 3738.0],       [3739.0, 3740.0],       [3741.0, 3742.0],       [3743.0, 3744.0]]],     [[[3745.0, 3746.0],       [3747.0, 3748.0],       [3749.0, 3750.0],       [3751.0, 3752.0]],      [[3753.0, 3754.0],       [3755.0, 3756.0],       [3757.0, 3758.0],       [3759.0, 3760.0]],      [[3761.0, 3762.0],       [3763.0, 3764.0],       [3765.0, 3766.0],       [3767.0, 3768.0]]],     [[[3769.0, 3770.0],       [3771.0, 3772.0],       [3773.0, 3774.0],       [3775.0, 3776.0]],      [[3777.0, 3778.0],       [3779.0, 3780.0],       [3781.0, 3782.0],       [3783.0, 3784.0]],      [[3785.0, 3786.0],       [3787.0, 3788.0],       [3789.0, 3790.0],       [3791.0, 3792.0]]],     [[[3793.0, 3794.0],       [3795.0, 3796.0],       [3797.0, 3798.0],       [3799.0, 3800.0]],      [[3801.0, 3802.0],       [3803.0, 3804.0],       [3805.0, 3806.0],       [3807.0, 3808.0]],      [[3809.0, 3810.0],       [3811.0, 3812.0],       [3813.0, 3814.0],       [3815.0, 3816.0]]],     [[[3817.0, 3818.0],       [3819.0, 3820.0],       [3821.0, 3822.0],       [3823.0, 3824.0]],      [[3825.0, 3826.0],       [3827.0, 3828.0],       [3829.0, 3830.0],       [3831.0, 3832.0]],      [[3833.0, 3834.0],       [3835.0, 3836.0],       [3837.0, 3838.0],       [3839.0, 3840.0]]]],    [[[[3841.0, 3842.0],       [3843.0, 3844.0],       [3845.0, 3846.0],       [3847.0, 3848.0]],      [[3849.0, 3850.0],       [3851.0, 3852.0],       [3853.0, 3854.0],       [3855.0, 3856.0]],      [[3857.0, 3858.0],       [3859.0, 3860.0],       [3861.0, 3862.0],       [3863.0, 3864.0]]],     [[[3865.0, 3866.0],       [3867.0, 3868.0],       [3869.0, 3870.0],       [3871.0, 3872.0]],      [[3873.0, 3874.0],       [3875.0, 3876.0],       [3877.0, 3878.0],       [3879.0, 3880.0]],      [[3881.0, 3882.0],       [3883.0, 3884.0],       [3885.0, 3886.0],       [3887.0, 3888.0]]],     [[[3889.0, 3890.0],       [3891.0, 3892.0],       [3893.0, 3894.0],       [3895.0, 3896.0]],      [[3897.0, 3898.0],       [3899.0, 3900.0],       [3901.0, 3902.0],       [3903.0, 3904.0]],      [[3905.0, 3906.0],       [3907.0, 3908.0],       [3909.0, 3910.0],       [3911.0, 3912.0]]],     [[[3913.0, 3914.0],       [3915.0, 3916.0],       [3917.0, 3918.0],       [3919.0, 3920.0]],      [[3921.0, 3922.0],       [3923.0, 3924.0],       [3925.0, 3926.0],       [3927.0, 3928.0]],      [[3929.0, 3930.0],       [3931.0, 3932.0],       [3933.0, 3934.0],       [3935.0, 3936.0]]],     [[[3937.0, 3938.0],       [3939.0, 3940.0],       [3941.0, 3942.0],       [3943.0, 3944.0]],      [[3945.0, 3946.0],       [3947.0, 3948.0],       [3949.0, 3950.0],       [3951.0, 3952.0]],      [[3953.0, 3954.0],       [3955.0, 3956.0],       [3957.0, 3958.0],       [3959.0, 3960.0]]]]],   [[[[[3961.0, 3962.0],       [3963.0, 3964.0],       [3965.0, 3966.0],       [3967.0, 3968.0]],      [[3969.0, 3970.0],       [3971.0, 3972.0],       [3973.0, 3974.0],       [3975.0, 3976.0]],      [[3977.0, 3978.0],       [3979.0, 3980.0],       [3981.0, 3982.0],       [3983.0, 3984.0]]],     [[[3985.0, 3986.0],       [3987.0, 3988.0],       [3989.0, 3990.0],       [3991.0, 3992.0]],      [[3993.0, 3994.0],       [3995.0, 3996.0],       [3997.0, 3998.0],       [3999.0, 4000.0]],      [[4001.0, 4002.0],       [4003.0, 4004.0],       [4005.0, 4006.0],       [4007.0, 4008.0]]],     [[[4009.0, 4010.0],       [4011.0, 4012.0],       [4013.0, 4014.0],       [4015.0, 4016.0]],      [[4017.0, 4018.0],       [4019.0, 4020.0],       [4021.0, 4022.0],       [4023.0, 4024.0]],      [[4025.0, 4026.0],       [4027.0, 4028.0],       [4029.0, 4030.0],       [4031.0, 4032.0]]],     [[[4033.0, 4034.0],       [4035.0, 4036.0],       [4037.0, 4038.0],       [4039.0, 4040.0]],      [[4041.0, 4042.0],       [4043.0, 4044.0],       [4045.0, 4046.0],       [4047.0, 4048.0]],      [[4049.0, 4050.0],       [4051.0, 4052.0],       [4053.0, 4054.0],       [4055.0, 4056.0]]],     [[[4057.0, 4058.0],       [4059.0, 4060.0],       [4061.0, 4062.0],       [4063.0, 4064.0]],      [[4065.0, 4066.0],       [4067.0, 4068.0],       [4069.0, 4070.0],       [4071.0, 4072.0]],      [[4073.0, 4074.0],       [4075.0, 4076.0],       [4077.0, 4078.0],       [4079.0, 4080.0]]]],    [[[[4081.0, 4082.0],       [4083.0, 4084.0],       [4085.0, 4086.0],       [4087.0, 4088.0]],      [[4089.0, 4090.0],       [4091.0, 4092.0],       [4093.0, 4094.0],       [4095.0, 4096.0]],      [[4097.0, 4098.0],       [4099.0, 4100.0],       [4101.0, 4102.0],       [4103.0, 4104.0]]],     [[[4105.0, 4106.0],       [4107.0, 4108.0],       [4109.0, 4110.0],       [4111.0, 4112.0]],      [[4113.0, 4114.0],       [4115.0, 4116.0],       [4117.0, 4118.0],       [4119.0, 4120.0]],      [[4121.0, 4122.0],       [4123.0, 4124.0],       [4125.0, 4126.0],       [4127.0, 4128.0]]],     [[[4129.0, 4130.0],       [4131.0, 4132.0],       [4133.0, 4134.0],       [4135.0, 4136.0]],      [[4137.0, 4138.0],       [4139.0, 4140.0],       [4141.0, 4142.0],       [4143.0, 4144.0]],      [[4145.0, 4146.0],       [4147.0, 4148.0],       [4149.0, 4150.0],       [4151.0, 4152.0]]],     [[[4153.0, 4154.0],       [4155.0, 4156.0],       [4157.0, 4158.0],       [4159.0, 4160.0]],      [[4161.0, 4162.0],       [4163.0, 4164.0],       [4165.0, 4166.0],       [4167.0, 4168.0]],      [[4169.0, 4170.0],       [4171.0, 4172.0],       [4173.0, 4174.0],       [4175.0, 4176.0]]],     [[[4177.0, 4178.0],       [4179.0, 4180.0],       [4181.0, 4182.0],       [4183.0, 4184.0]],      [[4185.0, 4186.0],       [4187.0, 4188.0],       [4189.0, 4190.0],       [4191.0, 4192.0]],      [[4193.0, 4194.0],       [4195.0, 4196.0],       [4197.0, 4198.0],       [4199.0, 4200.0]]]],    [[[[4201.0, 4202.0],       [4203.0, 4204.0],       [4205.0, 4206.0],       [4207.0, 4208.0]],      [[4209.0, 4210.0],       [4211.0, 4212.0],       [4213.0, 4214.0],       [4215.0, 4216.0]],      [[4217.0, 4218.0],       [4219.0, 4220.0],       [4221.0, 4222.0],       [4223.0, 4224.0]]],     [[[4225.0, 4226.0],       [4227.0, 4228.0],       [4229.0, 4230.0],       [4231.0, 4232.0]],      [[4233.0, 4234.0],       [4235.0, 4236.0],       [4237.0, 4238.0],       [4239.0, 4240.0]],      [[4241.0, 4242.0],       [4243.0, 4244.0],       [4245.0, 4246.0],       [4247.0, 4248.0]]],     [[[4249.0, 4250.0],       [4251.0, 4252.0],       [4253.0, 4254.0],       [4255.0, 4256.0]],      [[4257.0, 4258.0],       [4259.0, 4260.0],       [4261.0, 4262.0],       [4263.0, 4264.0]],      [[4265.0, 4266.0],       [4267.0, 4268.0],       [4269.0, 4270.0],       [4271.0, 4272.0]]],     [[[4273.0, 4274.0],       [4275.0, 4276.0],       [4277.0, 4278.0],       [4279.0, 4280.0]],      [[4281.0, 4282.0],       [4283.0, 4284.0],       [4285.0, 4286.0],       [4287.0, 4288.0]],      [[4289.0, 4290.0],       [4291.0, 4292.0],       [4293.0, 4294.0],       [4295.0, 4296.0]]],     [[[4297.0, 4298.0],       [4299.0, 4300.0],       [4301.0, 4302.0],       [4303.0, 4304.0]],      [[4305.0, 4306.0],       [4307.0, 4308.0],       [4309.0, 4310.0],       [4311.0, 4312.0]],      [[4313.0, 4314.0],       [4315.0, 4316.0],       [4317.0, 4318.0],       [4319.0, 4320.0]]]]],   [[[[[4321.0, 4322.0],       [4323.0, 4324.0],       [4325.0, 4326.0],       [4327.0, 4328.0]],      [[4329.0, 4330.0],       [4331.0, 4332.0],       [4333.0, 4334.0],       [4335.0, 4336.0]],      [[4337.0, 4338.0],       [4339.0, 4340.0],       [4341.0, 4342.0],       [4343.0, 4344.0]]],     [[[4345.0, 4346.0],       [4347.0, 4348.0],       [4349.0, 4350.0],       [4351.0, 4352.0]],      [[4353.0, 4354.0],       [4355.0, 4356.0],       [4357.0, 4358.0],       [4359.0, 4360.0]],      [[4361.0, 4362.0],       [4363.0, 4364.0],       [4365.0, 4366.0],       [4367.0, 4368.0]]],     [[[4369.0, 4370.0],       [4371.0, 4372.0],       [4373.0, 4374.0],       [4375.0, 4376.0]],      [[4377.0, 4378.0],       [4379.0, 4380.0],       [4381.0, 4382.0],       [4383.0, 4384.0]],      [[4385.0, 4386.0],       [4387.0, 4388.0],       [4389.0, 4390.0],       [4391.0, 4392.0]]],     [[[4393.0, 4394.0],       [4395.0, 4396.0],       [4397.0, 4398.0],       [4399.0, 4400.0]],      [[4401.0, 4402.0],       [4403.0, 4404.0],       [4405.0, 4406.0],       [4407.0, 4408.0]],      [[4409.0, 4410.0],       [4411.0, 4412.0],       [4413.0, 4414.0],       [4415.0, 4416.0]]],     [[[4417.0, 4418.0],       [4419.0, 4420.0],       [4421.0, 4422.0],       [4423.0, 4424.0]],      [[4425.0, 4426.0],       [4427.0, 4428.0],       [4429.0, 4430.0],       [4431.0, 4432.0]],      [[4433.0, 4434.0],       [4435.0, 4436.0],       [4437.0, 4438.0],       [4439.0, 4440.0]]]],    [[[[4441.0, 4442.0],       [4443.0, 4444.0],       [4445.0, 4446.0],       [4447.0, 4448.0]],      [[4449.0, 4450.0],       [4451.0, 4452.0],       [4453.0, 4454.0],       [4455.0, 4456.0]],      [[4457.0, 4458.0],       [4459.0, 4460.0],       [4461.0, 4462.0],       [4463.0, 4464.0]]],     [[[4465.0, 4466.0],       [4467.0, 4468.0],       [4469.0, 4470.0],       [4471.0, 4472.0]],      [[4473.0, 4474.0],       [4475.0, 4476.0],       [4477.0, 4478.0],       [4479.0, 4480.0]],      [[4481.0, 4482.0],       [4483.0, 4484.0],       [4485.0, 4486.0],       [4487.0, 4488.0]]],     [[[4489.0, 4490.0],       [4491.0, 4492.0],       [4493.0, 4494.0],       [4495.0, 4496.0]],      [[4497.0, 4498.0],       [4499.0, 4500.0],       [4501.0, 4502.0],       [4503.0, 4504.0]],      [[4505.0, 4506.0],       [4507.0, 4508.0],       [4509.0, 4510.0],       [4511.0, 4512.0]]],     [[[4513.0, 4514.0],       [4515.0, 4516.0],       [4517.0, 4518.0],       [4519.0, 4520.0]],      [[4521.0, 4522.0],       [4523.0, 4524.0],       [4525.0, 4526.0],       [4527.0, 4528.0]],      [[4529.0, 4530.0],       [4531.0, 4532.0],       [4533.0, 4534.0],       [4535.0, 4536.0]]],     [[[4537.0, 4538.0],       [4539.0, 4540.0],       [4541.0, 4542.0],       [4543.0, 4544.0]],      [[4545.0, 4546.0],       [4547.0, 4548.0],       [4549.0, 4550.0],       [4551.0, 4552.0]],      [[4553.0, 4554.0],       [4555.0, 4556.0],       [4557.0, 4558.0],       [4559.0, 4560.0]]]],    [[[[4561.0, 4562.0],       [4563.0, 4564.0],       [4565.0, 4566.0],       [4567.0, 4568.0]],      [[4569.0, 4570.0],       [4571.0, 4572.0],       [4573.0, 4574.0],       [4575.0, 4576.0]],      [[4577.0, 4578.0],       [4579.0, 4580.0],       [4581.0, 4582.0],       [4583.0, 4584.0]]],     [[[4585.0, 4586.0],       [4587.0, 4588.0],       [4589.0, 4590.0],       [4591.0, 4592.0]],      [[4593.0, 4594.0],       [4595.0, 4596.0],       [4597.0, 4598.0],       [4599.0, 4600.0]],      [[4601.0, 4602.0],       [4603.0, 4604.0],       [4605.0, 4606.0],       [4607.0, 4608.0]]],     [[[4609.0, 4610.0],       [4611.0, 4612.0],       [4613.0, 4614.0],       [4615.0, 4616.0]],      [[4617.0, 4618.0],       [4619.0, 4620.0],       [4621.0, 4622.0],       [4623.0, 4624.0]],      [[4625.0, 4626.0],       [4627.0, 4628.0],       [4629.0, 4630.0],       [4631.0, 4632.0]]],     [[[4633.0, 4634.0],       [4635.0, 4636.0],       [4637.0, 4638.0],       [4639.0, 4640.0]],      [[4641.0, 4642.0],       [4643.0, 4644.0],       [4645.0, 4646.0],       [4647.0, 4648.0]],      [[4649.0, 4650.0],       [4651.0, 4652.0],       [4653.0, 4654.0],       [4655.0, 4656.0]]],     [[[4657.0, 4658.0],       [4659.0, 4660.0],       [4661.0, 4662.0],       [4663.0, 4664.0]],      [[4665.0, 4666.0],       [4667.0, 4668.0],       [4669.0, 4670.0],       [4671.0, 4672.0]],      [[4673.0, 4674.0],       [4675.0, 4676.0],       [4677.0, 4678.0],       [4679.0, 4680.0]]]]],   [[[[[4681.0, 4682.0],       [4683.0, 4684.0],       [4685.0, 4686.0],       [4687.0, 4688.0]],      [[4689.0, 4690.0],       [4691.0, 4692.0],       [4693.0, 4694.0],       [4695.0, 4696.0]],      [[4697.0, 4698.0],       [4699.0, 4700.0],       [4701.0, 4702.0],       [4703.0, 4704.0]]],     [[[4705.0, 4706.0],       [4707.0, 4708.0],       [4709.0, 4710.0],       [4711.0, 4712.0]],      [[4713.0, 4714.0],       [4715.0, 4716.0],       [4717.0, 4718.0],       [4719.0, 4720.0]],      [[4721.0, 4722.0],       [4723.0, 4724.0],       [4725.0, 4726.0],       [4727.0, 4728.0]]],     [[[4729.0, 4730.0],       [4731.0, 4732.0],       [4733.0, 4734.0],       [4735.0, 4736.0]],      [[4737.0, 4738.0],       [4739.0, 4740.0],       [4741.0, 4742.0],       [4743.0, 4744.0]],      [[4745.0, 4746.0],       [4747.0, 4748.0],       [4749.0, 4750.0],       [4751.0, 4752.0]]],     [[[4753.0, 4754.0],       [4755.0, 4756.0],       [4757.0, 4758.0],       [4759.0, 4760.0]],      [[4761.0, 4762.0],       [4763.0, 4764.0],       [4765.0, 4766.0],       [4767.0, 4768.0]],      [[4769.0, 4770.0],       [4771.0, 4772.0],       [4773.0, 4774.0],       [4775.0, 4776.0]]],     [[[4777.0, 4778.0],       [4779.0, 4780.0],       [4781.0, 4782.0],       [4783.0, 4784.0]],      [[4785.0, 4786.0],       [4787.0, 4788.0],       [4789.0, 4790.0],       [4791.0, 4792.0]],      [[4793.0, 4794.0],       [4795.0, 4796.0],       [4797.0, 4798.0],       [4799.0, 4800.0]]]],    [[[[4801.0, 4802.0],       [4803.0, 4804.0],       [4805.0, 4806.0],       [4807.0, 4808.0]],      [[4809.0, 4810.0],       [4811.0, 4812.0],       [4813.0, 4814.0],       [4815.0, 4816.0]],      [[4817.0, 4818.0],       [4819.0, 4820.0],       [4821.0, 4822.0],       [4823.0, 4824.0]]],     [[[4825.0, 4826.0],       [4827.0, 4828.0],       [4829.0, 4830.0],       [4831.0, 4832.0]],      [[4833.0, 4834.0],       [4835.0, 4836.0],       [4837.0, 4838.0],       [4839.0, 4840.0]],      [[4841.0, 4842.0],       [4843.0, 4844.0],       [4845.0, 4846.0],       [4847.0, 4848.0]]],     [[[4849.0, 4850.0],       [4851.0, 4852.0],       [4853.0, 4854.0],       [4855.0, 4856.0]],      [[4857.0, 4858.0],       [4859.0, 4860.0],       [4861.0, 4862.0],       [4863.0, 4864.0]],      [[4865.0, 4866.0],       [4867.0, 4868.0],       [4869.0, 4870.0],       [4871.0, 4872.0]]],     [[[4873.0, 4874.0],       [4875.0, 4876.0],       [4877.0, 4878.0],       [4879.0, 4880.0]],      [[4881.0, 4882.0],       [4883.0, 4884.0],       [4885.0, 4886.0],       [4887.0, 4888.0]],      [[4889.0, 4890.0],       [4891.0, 4892.0],       [4893.0, 4894.0],       [4895.0, 4896.0]]],     [[[4897.0, 4898.0],       [4899.0, 4900.0],       [4901.0, 4902.0],       [4903.0, 4904.0]],      [[4905.0, 4906.0],       [4907.0, 4908.0],       [4909.0, 4910.0],       [4911.0, 4912.0]],      [[4913.0, 4914.0],       [4915.0, 4916.0],       [4917.0, 4918.0],       [4919.0, 4920.0]]]],    [[[[4921.0, 4922.0],       [4923.0, 4924.0],       [4925.0, 4926.0],       [4927.0, 4928.0]],      [[4929.0, 4930.0],       [4931.0, 4932.0],       [4933.0, 4934.0],       [4935.0, 4936.0]],      [[4937.0, 4938.0],       [4939.0, 4940.0],       [4941.0, 4942.0],       [4943.0, 4944.0]]],     [[[4945.0, 4946.0],       [4947.0, 4948.0],       [4949.0, 4950.0],       [4951.0, 4952.0]],      [[4953.0, 4954.0],       [4955.0, 4956.0],       [4957.0, 4958.0],       [4959.0, 4960.0]],      [[4961.0, 4962.0],       [4963.0, 4964.0],       [4965.0, 4966.0],       [4967.0, 4968.0]]],     [[[4969.0, 4970.0],       [4971.0, 4972.0],       [4973.0, 4974.0],       [4975.0, 4976.0]],      [[4977.0, 4978.0],       [4979.0, 4980.0],       [4981.0, 4982.0],       [4983.0, 4984.0]],      [[4985.0, 4986.0],       [4987.0, 4988.0],       [4989.0, 4990.0],       [4991.0, 4992.0]]],     [[[4993.0, 4994.0],       [4995.0, 4996.0],       [4997.0, 4998.0],       [4999.0, 5000.0]],      [[5001.0, 5002.0],       [5003.0, 5004.0],       [5005.0, 5006.0],       [5007.0, 5008.0]],      [[5009.0, 5010.0],       [5011.0, 5012.0],       [5013.0, 5014.0],       [5015.0, 5016.0]]],     [[[5017.0, 5018.0],       [5019.0, 5020.0],       [5021.0, 5022.0],       [5023.0, 5024.0]],      [[5025.0, 5026.0],       [5027.0, 5028.0],       [5029.0, 5030.0],       [5031.0, 5032.0]],      [[5033.0, 5034.0],       [5035.0, 5036.0],       [5037.0, 5038.0],       [5039.0, 5040.0]]]]]],  [[[[[[5041.0, 5042.0],       [5043.0, 5044.0],       [5045.0, 5046.0],       [5047.0, 5048.0]],      [[5049.0, 5050.0],       [5051.0, 5052.0],       [5053.0, 5054.0],       [5055.0, 5056.0]],      [[5057.0, 5058.0],       [5059.0, 5060.0],       [5061.0, 5062.0],       [5063.0, 5064.0]]],     [[[5065.0, 5066.0],       [5067.0, 5068.0],       [5069.0, 5070.0],       [5071.0, 5072.0]],      [[5073.0, 5074.0],       [5075.0, 5076.0],       [5077.0, 5078.0],       [5079.0, 5080.0]],      [[5081.0, 5082.0],       [5083.0, 5084.0],       [5085.0, 5086.0],       [5087.0, 5088.0]]],     [[[5089.0, 5090.0],       [5091.0, 5092.0],       [5093.0, 5094.0],       [5095.0, 5096.0]],      [[5097.0, 5098.0],       [5099.0, 5100.0],       [5101.0, 5102.0],       [5103.0, 5104.0]],      [[5105.0, 5106.0],       [5107.0, 5108.0],       [5109.0, 5110.0],       [5111.0, 5112.0]]],     [[[5113.0, 5114.0],       [5115.0, 5116.0],       [5117.0, 5118.0],       [5119.0, 5120.0]],      [[5121.0, 5122.0],       [5123.0, 5124.0],       [5125.0, 5126.0],       [5127.0, 5128.0]],      [[5129.0, 5130.0],       [5131.0, 5132.0],       [5133.0, 5134.0],       [5135.0, 5136.0]]],     [[[5137.0, 5138.0],       [5139.0, 5140.0],       [5141.0, 5142.0],       [5143.0, 5144.0]],      [[5145.0, 5146.0],       [5147.0, 5148.0],       [5149.0, 5150.0],       [5151.0, 5152.0]],      [[5153.0, 5154.0],       [5155.0, 5156.0],       [5157.0, 5158.0],       [5159.0, 5160.0]]]],    [[[[5161.0, 5162.0],       [5163.0, 5164.0],       [5165.0, 5166.0],       [5167.0, 5168.0]],      [[5169.0, 5170.0],       [5171.0, 5172.0],       [5173.0, 5174.0],       [5175.0, 5176.0]],      [[5177.0, 5178.0],       [5179.0, 5180.0],       [5181.0, 5182.0],       [5183.0, 5184.0]]],     [[[5185.0, 5186.0],       [5187.0, 5188.0],       [5189.0, 5190.0],       [5191.0, 5192.0]],      [[5193.0, 5194.0],       [5195.0, 5196.0],       [5197.0, 5198.0],       [5199.0, 5200.0]],      [[5201.0, 5202.0],       [5203.0, 5204.0],       [5205.0, 5206.0],       [5207.0, 5208.0]]],     [[[5209.0, 5210.0],       [5211.0, 5212.0],       [5213.0, 5214.0],       [5215.0, 5216.0]],      [[5217.0, 5218.0],       [5219.0, 5220.0],       [5221.0, 5222.0],       [5223.0, 5224.0]],      [[5225.0, 5226.0],       [5227.0, 5228.0],       [5229.0, 5230.0],       [5231.0, 5232.0]]],     [[[5233.0, 5234.0],       [5235.0, 5236.0],       [5237.0, 5238.0],       [5239.0, 5240.0]],      [[5241.0, 5242.0],       [5243.0, 5244.0],       [5245.0, 5246.0],       [5247.0, 5248.0]],      [[5249.0, 5250.0],       [5251.0, 5252.0],       [5253.0, 5254.0],       [5255.0, 5256.0]]],     [[[5257.0, 5258.0],       [5259.0, 5260.0],       [5261.0, 5262.0],       [5263.0, 5264.0]],      [[5265.0, 5266.0],       [5267.0, 5268.0],       [5269.0, 5270.0],       [5271.0, 5272.0]],      [[5273.0, 5274.0],       [5275.0, 5276.0],       [5277.0, 5278.0],       [5279.0, 5280.0]]]],    [[[[5281.0, 5282.0],       [5283.0, 5284.0],       [5285.0, 5286.0],       [5287.0, 5288.0]],      [[5289.0, 5290.0],       [5291.0, 5292.0],       [5293.0, 5294.0],       [5295.0, 5296.0]],      [[5297.0, 5298.0],       [5299.0, 5300.0],       [5301.0, 5302.0],       [5303.0, 5304.0]]],     [[[5305.0, 5306.0],       [5307.0, 5308.0],       [5309.0, 5310.0],       [5311.0, 5312.0]],      [[5313.0, 5314.0],       [5315.0, 5316.0],       [5317.0, 5318.0],       [5319.0, 5320.0]],      [[5321.0, 5322.0],       [5323.0, 5324.0],       [5325.0, 5326.0],       [5327.0, 5328.0]]],     [[[5329.0, 5330.0],       [5331.0, 5332.0],       [5333.0, 5334.0],       [5335.0, 5336.0]],      [[5337.0, 5338.0],       [5339.0, 5340.0],       [5341.0, 5342.0],       [5343.0, 5344.0]],      [[5345.0, 5346.0],       [5347.0, 5348.0],       [5349.0, 5350.0],       [5351.0, 5352.0]]],     [[[5353.0, 5354.0],       [5355.0, 5356.0],       [5357.0, 5358.0],       [5359.0, 5360.0]],      [[5361.0, 5362.0],       [5363.0, 5364.0],       [5365.0, 5366.0],       [5367.0, 5368.0]],      [[5369.0, 5370.0],       [5371.0, 5372.0],       [5373.0, 5374.0],       [5375.0, 5376.0]]],     [[[5377.0, 5378.0],       [5379.0, 5380.0],       [5381.0, 5382.0],       [5383.0, 5384.0]],      [[5385.0, 5386.0],       [5387.0, 5388.0],       [5389.0, 5390.0],       [5391.0, 5392.0]],      [[5393.0, 5394.0],       [5395.0, 5396.0],       [5397.0, 5398.0],       [5399.0, 5400.0]]]]],   [[[[[5401.0, 5402.0],       [5403.0, 5404.0],       [5405.0, 5406.0],       [5407.0, 5408.0]],      [[5409.0, 5410.0],       [5411.0, 5412.0],       [5413.0, 5414.0],       [5415.0, 5416.0]],      [[5417.0, 5418.0],       [5419.0, 5420.0],       [5421.0, 5422.0],       [5423.0, 5424.0]]],     [[[5425.0, 5426.0],       [5427.0, 5428.0],       [5429.0, 5430.0],       [5431.0, 5432.0]],      [[5433.0, 5434.0],       [5435.0, 5436.0],       [5437.0, 5438.0],       [5439.0, 5440.0]],      [[5441.0, 5442.0],       [5443.0, 5444.0],       [5445.0, 5446.0],       [5447.0, 5448.0]]],     [[[5449.0, 5450.0],       [5451.0, 5452.0],       [5453.0, 5454.0],       [5455.0, 5456.0]],      [[5457.0, 5458.0],       [5459.0, 5460.0],       [5461.0, 5462.0],       [5463.0, 5464.0]],      [[5465.0, 5466.0],       [5467.0, 5468.0],       [5469.0, 5470.0],       [5471.0, 5472.0]]],     [[[5473.0, 5474.0],       [5475.0, 5476.0],       [5477.0, 5478.0],       [5479.0, 5480.0]],      [[5481.0, 5482.0],       [5483.0, 5484.0],       [5485.0, 5486.0],       [5487.0, 5488.0]],      [[5489.0, 5490.0],       [5491.0, 5492.0],       [5493.0, 5494.0],       [5495.0, 5496.0]]],     [[[5497.0, 5498.0],       [5499.0, 5500.0],       [5501.0, 5502.0],       [5503.0, 5504.0]],      [[5505.0, 5506.0],       [5507.0, 5508.0],       [5509.0, 5510.0],       [5511.0, 5512.0]],      [[5513.0, 5514.0],       [5515.0, 5516.0],       [5517.0, 5518.0],       [5519.0, 5520.0]]]],    [[[[5521.0, 5522.0],       [5523.0, 5524.0],       [5525.0, 5526.0],       [5527.0, 5528.0]],      [[5529.0, 5530.0],       [5531.0, 5532.0],       [5533.0, 5534.0],       [5535.0, 5536.0]],      [[5537.0, 5538.0],       [5539.0, 5540.0],       [5541.0, 5542.0],       [5543.0, 5544.0]]],     [[[5545.0, 5546.0],       [5547.0, 5548.0],       [5549.0, 5550.0],       [5551.0, 5552.0]],      [[5553.0, 5554.0],       [5555.0, 5556.0],       [5557.0, 5558.0],       [5559.0, 5560.0]],      [[5561.0, 5562.0],       [5563.0, 5564.0],       [5565.0, 5566.0],       [5567.0, 5568.0]]],     [[[5569.0, 5570.0],       [5571.0, 5572.0],       [5573.0, 5574.0],       [5575.0, 5576.0]],      [[5577.0, 5578.0],       [5579.0, 5580.0],       [5581.0, 5582.0],       [5583.0, 5584.0]],      [[5585.0, 5586.0],       [5587.0, 5588.0],       [5589.0, 5590.0],       [5591.0, 5592.0]]],     [[[5593.0, 5594.0],       [5595.0, 5596.0],       [5597.0, 5598.0],       [5599.0, 5600.0]],      [[5601.0, 5602.0],       [5603.0, 5604.0],       [5605.0, 5606.0],       [5607.0, 5608.0]],      [[5609.0, 5610.0],       [5611.0, 5612.0],       [5613.0, 5614.0],       [5615.0, 5616.0]]],     [[[5617.0, 5618.0],       [5619.0, 5620.0],       [5621.0, 5622.0],       [5623.0, 5624.0]],      [[5625.0, 5626.0],       [5627.0, 5628.0],       [5629.0, 5630.0],       [5631.0, 5632.0]],      [[5633.0, 5634.0],       [5635.0, 5636.0],       [5637.0, 5638.0],       [5639.0, 5640.0]]]],    [[[[5641.0, 5642.0],       [5643.0, 5644.0],       [5645.0, 5646.0],       [5647.0, 5648.0]],      [[5649.0, 5650.0],       [5651.0, 5652.0],       [5653.0, 5654.0],       [5655.0, 5656.0]],      [[5657.0, 5658.0],       [5659.0, 5660.0],       [5661.0, 5662.0],       [5663.0, 5664.0]]],     [[[5665.0, 5666.0],       [5667.0, 5668.0],       [5669.0, 5670.0],       [5671.0, 5672.0]],      [[5673.0, 5674.0],       [5675.0, 5676.0],       [5677.0, 5678.0],       [5679.0, 5680.0]],      [[5681.0, 5682.0],       [5683.0, 5684.0],       [5685.0, 5686.0],       [5687.0, 5688.0]]],     [[[5689.0, 5690.0],       [5691.0, 5692.0],       [5693.0, 5694.0],       [5695.0, 5696.0]],      [[5697.0, 5698.0],       [5699.0, 5700.0],       [5701.0, 5702.0],       [5703.0, 5704.0]],      [[5705.0, 5706.0],       [5707.0, 5708.0],       [5709.0, 5710.0],       [5711.0, 5712.0]]],     [[[5713.0, 5714.0],       [5715.0, 5716.0],       [5717.0, 5718.0],       [5719.0, 5720.0]],      [[5721.0, 5722.0],       [5723.0, 5724.0],       [5725.0, 5726.0],       [5727.0, 5728.0]],      [[5729.0, 5730.0],       [5731.0, 5732.0],       [5733.0, 5734.0],       [5735.0, 5736.0]]],     [[[5737.0, 5738.0],       [5739.0, 5740.0],       [5741.0, 5742.0],       [5743.0, 5744.0]],      [[5745.0, 5746.0],       [5747.0, 5748.0],       [5749.0, 5750.0],       [5751.0, 5752.0]],      [[5753.0, 5754.0],       [5755.0, 5756.0],       [5757.0, 5758.0],       [5759.0, 5760.0]]]]],   [[[[[5761.0, 5762.0],       [5763.0, 5764.0],       [5765.0, 5766.0],       [5767.0, 5768.0]],      [[5769.0, 5770.0],       [5771.0, 5772.0],       [5773.0, 5774.0],       [5775.0, 5776.0]],      [[5777.0, 5778.0],       [5779.0, 5780.0],       [5781.0, 5782.0],       [5783.0, 5784.0]]],     [[[5785.0, 5786.0],       [5787.0, 5788.0],       [5789.0, 5790.0],       [5791.0, 5792.0]],      [[5793.0, 5794.0],       [5795.0, 5796.0],       [5797.0, 5798.0],       [5799.0, 5800.0]],      [[5801.0, 5802.0],       [5803.0, 5804.0],       [5805.0, 5806.0],       [5807.0, 5808.0]]],     [[[5809.0, 5810.0],       [5811.0, 5812.0],       [5813.0, 5814.0],       [5815.0, 5816.0]],      [[5817.0, 5818.0],       [5819.0, 5820.0],       [5821.0, 5822.0],       [5823.0, 5824.0]],      [[5825.0, 5826.0],       [5827.0, 5828.0],       [5829.0, 5830.0],       [5831.0, 5832.0]]],     [[[5833.0, 5834.0],       [5835.0, 5836.0],       [5837.0, 5838.0],       [5839.0, 5840.0]],      [[5841.0, 5842.0],       [5843.0, 5844.0],       [5845.0, 5846.0],       [5847.0, 5848.0]],      [[5849.0, 5850.0],       [5851.0, 5852.0],       [5853.0, 5854.0],       [5855.0, 5856.0]]],     [[[5857.0, 5858.0],       [5859.0, 5860.0],       [5861.0, 5862.0],       [5863.0, 5864.0]],      [[5865.0, 5866.0],       [5867.0, 5868.0],       [5869.0, 5870.0],       [5871.0, 5872.0]],      [[5873.0, 5874.0],       [5875.0, 5876.0],       [5877.0, 5878.0],       [5879.0, 5880.0]]]],    [[[[5881.0, 5882.0],       [5883.0, 5884.0],       [5885.0, 5886.0],       [5887.0, 5888.0]],      [[5889.0, 5890.0],       [5891.0, 5892.0],       [5893.0, 5894.0],       [5895.0, 5896.0]],      [[5897.0, 5898.0],       [5899.0, 5900.0],       [5901.0, 5902.0],       [5903.0, 5904.0]]],     [[[5905.0, 5906.0],       [5907.0, 5908.0],       [5909.0, 5910.0],       [5911.0, 5912.0]],      [[5913.0, 5914.0],       [5915.0, 5916.0],       [5917.0, 5918.0],       [5919.0, 5920.0]],      [[5921.0, 5922.0],       [5923.0, 5924.0],       [5925.0, 5926.0],       [5927.0, 5928.0]]],     [[[5929.0, 5930.0],       [5931.0, 5932.0],       [5933.0, 5934.0],       [5935.0, 5936.0]],      [[5937.0, 5938.0],       [5939.0, 5940.0],       [5941.0, 5942.0],       [5943.0, 5944.0]],      [[5945.0, 5946.0],       [5947.0, 5948.0],       [5949.0, 5950.0],       [5951.0, 5952.0]]],     [[[5953.0, 5954.0],       [5955.0, 5956.0],       [5957.0, 5958.0],       [5959.0, 5960.0]],      [[5961.0, 5962.0],       [5963.0, 5964.0],       [5965.0, 5966.0],       [5967.0, 5968.0]],      [[5969.0, 5970.0],       [5971.0, 5972.0],       [5973.0, 5974.0],       [5975.0, 5976.0]]],     [[[5977.0, 5978.0],       [5979.0, 5980.0],       [5981.0, 5982.0],       [5983.0, 5984.0]],      [[5985.0, 5986.0],       [5987.0, 5988.0],       [5989.0, 5990.0],       [5991.0, 5992.0]],      [[5993.0, 5994.0],       [5995.0, 5996.0],       [5997.0, 5998.0],       [5999.0, 6000.0]]]],    [[[[6001.0, 6002.0],       [6003.0, 6004.0],       [6005.0, 6006.0],       [6007.0, 6008.0]],      [[6009.0, 6010.0],       [6011.0, 6012.0],       [6013.0, 6014.0],       [6015.0, 6016.0]],      [[6017.0, 6018.0],       [6019.0, 6020.0],       [6021.0, 6022.0],       [6023.0, 6024.0]]],     [[[6025.0, 6026.0],       [6027.0, 6028.0],       [6029.0, 6030.0],       [6031.0, 6032.0]],      [[6033.0, 6034.0],       [6035.0, 6036.0],       [6037.0, 6038.0],       [6039.0, 6040.0]],      [[6041.0, 6042.0],       [6043.0, 6044.0],       [6045.0, 6046.0],       [6047.0, 6048.0]]],     [[[6049.0, 6050.0],       [6051.0, 6052.0],       [6053.0, 6054.0],       [6055.0, 6056.0]],      [[6057.0, 6058.0],       [6059.0, 6060.0],       [6061.0, 6062.0],       [6063.0, 6064.0]],      [[6065.0, 6066.0],       [6067.0, 6068.0],       [6069.0, 6070.0],       [6071.0, 6072.0]]],     [[[6073.0, 6074.0],       [6075.0, 6076.0],       [6077.0, 6078.0],       [6079.0, 6080.0]],      [[6081.0, 6082.0],       [6083.0, 6084.0],       [6085.0, 6086.0],       [6087.0, 6088.0]],      [[6089.0, 6090.0],       [6091.0, 6092.0],       [6093.0, 6094.0],       [6095.0, 6096.0]]],     [[[6097.0, 6098.0],       [6099.0, 6100.0],       [6101.0, 6102.0],       [6103.0, 6104.0]],      [[6105.0, 6106.0],       [6107.0, 6108.0],       [6109.0, 6110.0],       [6111.0, 6112.0]],      [[6113.0, 6114.0],       [6115.0, 6116.0],       [6117.0, 6118.0],       [6119.0, 6120.0]]]]],   [[[[[6121.0, 6122.0],       [6123.0, 6124.0],       [6125.0, 6126.0],       [6127.0, 6128.0]],      [[6129.0, 6130.0],       [6131.0, 6132.0],       [6133.0, 6134.0],       [6135.0, 6136.0]],      [[6137.0, 6138.0],       [6139.0, 6140.0],       [6141.0, 6142.0],       [6143.0, 6144.0]]],     [[[6145.0, 6146.0],       [6147.0, 6148.0],       [6149.0, 6150.0],       [6151.0, 6152.0]],      [[6153.0, 6154.0],       [6155.0, 6156.0],       [6157.0, 6158.0],       [6159.0, 6160.0]],      [[6161.0, 6162.0],       [6163.0, 6164.0],       [6165.0, 6166.0],       [6167.0, 6168.0]]],     [[[6169.0, 6170.0],       [6171.0, 6172.0],       [6173.0, 6174.0],       [6175.0, 6176.0]],      [[6177.0, 6178.0],       [6179.0, 6180.0],       [6181.0, 6182.0],       [6183.0, 6184.0]],      [[6185.0, 6186.0],       [6187.0, 6188.0],       [6189.0, 6190.0],       [6191.0, 6192.0]]],     [[[6193.0, 6194.0],       [6195.0, 6196.0],       [6197.0, 6198.0],       [6199.0, 6200.0]],      [[6201.0, 6202.0],       [6203.0, 6204.0],       [6205.0, 6206.0],       [6207.0, 6208.0]],      [[6209.0, 6210.0],       [6211.0, 6212.0],       [6213.0, 6214.0],       [6215.0, 6216.0]]],     [[[6217.0, 6218.0],       [6219.0, 6220.0],       [6221.0, 6222.0],       [6223.0, 6224.0]],      [[6225.0, 6226.0],       [6227.0, 6228.0],       [6229.0, 6230.0],       [6231.0, 6232.0]],      [[6233.0, 6234.0],       [6235.0, 6236.0],       [6237.0, 6238.0],       [6239.0, 6240.0]]]],    [[[[6241.0, 6242.0],       [6243.0, 6244.0],       [6245.0, 6246.0],       [6247.0, 6248.0]],      [[6249.0, 6250.0],       [6251.0, 6252.0],       [6253.0, 6254.0],       [6255.0, 6256.0]],      [[6257.0, 6258.0],       [6259.0, 6260.0],       [6261.0, 6262.0],       [6263.0, 6264.0]]],     [[[6265.0, 6266.0],       [6267.0, 6268.0],       [6269.0, 6270.0],       [6271.0, 6272.0]],      [[6273.0, 6274.0],       [6275.0, 6276.0],       [6277.0, 6278.0],       [6279.0, 6280.0]],      [[6281.0, 6282.0],       [6283.0, 6284.0],       [6285.0, 6286.0],       [6287.0, 6288.0]]],     [[[6289.0, 6290.0],       [6291.0, 6292.0],       [6293.0, 6294.0],       [6295.0, 6296.0]],      [[6297.0, 6298.0],       [6299.0, 6300.0],       [6301.0, 6302.0],       [6303.0, 6304.0]],      [[6305.0, 6306.0],       [6307.0, 6308.0],       [6309.0, 6310.0],       [6311.0, 6312.0]]],     [[[6313.0, 6314.0],       [6315.0, 6316.0],       [6317.0, 6318.0],       [6319.0, 6320.0]],      [[6321.0, 6322.0],       [6323.0, 6324.0],       [6325.0, 6326.0],       [6327.0, 6328.0]],      [[6329.0, 6330.0],       [6331.0, 6332.0],       [6333.0, 6334.0],       [6335.0, 6336.0]]],     [[[6337.0, 6338.0],       [6339.0, 6340.0],       [6341.0, 6342.0],       [6343.0, 6344.0]],      [[6345.0, 6346.0],       [6347.0, 6348.0],       [6349.0, 6350.0],       [6351.0, 6352.0]],      [[6353.0, 6354.0],       [6355.0, 6356.0],       [6357.0, 6358.0],       [6359.0, 6360.0]]]],    [[[[6361.0, 6362.0],       [6363.0, 6364.0],       [6365.0, 6366.0],       [6367.0, 6368.0]],      [[6369.0, 6370.0],       [6371.0, 6372.0],       [6373.0, 6374.0],       [6375.0, 6376.0]],      [[6377.0, 6378.0],       [6379.0, 6380.0],       [6381.0, 6382.0],       [6383.0, 6384.0]]],     [[[6385.0, 6386.0],       [6387.0, 6388.0],       [6389.0, 6390.0],       [6391.0, 6392.0]],      [[6393.0, 6394.0],       [6395.0, 6396.0],       [6397.0, 6398.0],       [6399.0, 6400.0]],      [[6401.0, 6402.0],       [6403.0, 6404.0],       [6405.0, 6406.0],       [6407.0, 6408.0]]],     [[[6409.0, 6410.0],       [6411.0, 6412.0],       [6413.0, 6414.0],       [6415.0, 6416.0]],      [[6417.0, 6418.0],       [6419.0, 6420.0],       [6421.0, 6422.0],       [6423.0, 6424.0]],      [[6425.0, 6426.0],       [6427.0, 6428.0],       [6429.0, 6430.0],       [6431.0, 6432.0]]],     [[[6433.0, 6434.0],       [6435.0, 6436.0],       [6437.0, 6438.0],       [6439.0, 6440.0]],      [[6441.0, 6442.0],       [6443.0, 6444.0],       [6445.0, 6446.0],       [6447.0, 6448.0]],      [[6449.0, 6450.0],       [6451.0, 6452.0],       [6453.0, 6454.0],       [6455.0, 6456.0]]],     [[[6457.0, 6458.0],       [6459.0, 6460.0],       [6461.0, 6462.0],       [6463.0, 6464.0]],      [[6465.0, 6466.0],       [6467.0, 6468.0],       [6469.0, 6470.0],       [6471.0, 6472.0]],      [[6473.0, 6474.0],       [6475.0, 6476.0],       [6477.0, 6478.0],       [6479.0, 6480.0]]]]],   [[[[[6481.0, 6482.0],       [6483.0, 6484.0],       [6485.0, 6486.0],       [6487.0, 6488.0]],      [[6489.0, 6490.0],       [6491.0, 6492.0],       [6493.0, 6494.0],       [6495.0, 6496.0]],      [[6497.0, 6498.0],       [6499.0, 6500.0],       [6501.0, 6502.0],       [6503.0, 6504.0]]],     [[[6505.0, 6506.0],       [6507.0, 6508.0],       [6509.0, 6510.0],       [6511.0, 6512.0]],      [[6513.0, 6514.0],       [6515.0, 6516.0],       [6517.0, 6518.0],       [6519.0, 6520.0]],      [[6521.0, 6522.0],       [6523.0, 6524.0],       [6525.0, 6526.0],       [6527.0, 6528.0]]],     [[[6529.0, 6530.0],       [6531.0, 6532.0],       [6533.0, 6534.0],       [6535.0, 6536.0]],      [[6537.0, 6538.0],       [6539.0, 6540.0],       [6541.0, 6542.0],       [6543.0, 6544.0]],      [[6545.0, 6546.0],       [6547.0, 6548.0],       [6549.0, 6550.0],       [6551.0, 6552.0]]],     [[[6553.0, 6554.0],       [6555.0, 6556.0],       [6557.0, 6558.0],       [6559.0, 6560.0]],      [[6561.0, 6562.0],       [6563.0, 6564.0],       [6565.0, 6566.0],       [6567.0, 6568.0]],      [[6569.0, 6570.0],       [6571.0, 6572.0],       [6573.0, 6574.0],       [6575.0, 6576.0]]],     [[[6577.0, 6578.0],       [6579.0, 6580.0],       [6581.0, 6582.0],       [6583.0, 6584.0]],      [[6585.0, 6586.0],       [6587.0, 6588.0],       [6589.0, 6590.0],       [6591.0, 6592.0]],      [[6593.0, 6594.0],       [6595.0, 6596.0],       [6597.0, 6598.0],       [6599.0, 6600.0]]]],    [[[[6601.0, 6602.0],       [6603.0, 6604.0],       [6605.0, 6606.0],       [6607.0, 6608.0]],      [[6609.0, 6610.0],       [6611.0, 6612.0],       [6613.0, 6614.0],       [6615.0, 6616.0]],      [[6617.0, 6618.0],       [6619.0, 6620.0],       [6621.0, 6622.0],       [6623.0, 6624.0]]],     [[[6625.0, 6626.0],       [6627.0, 6628.0],       [6629.0, 6630.0],       [6631.0, 6632.0]],      [[6633.0, 6634.0],       [6635.0, 6636.0],       [6637.0, 6638.0],       [6639.0, 6640.0]],      [[6641.0, 6642.0],       [6643.0, 6644.0],       [6645.0, 6646.0],       [6647.0, 6648.0]]],     [[[6649.0, 6650.0],       [6651.0, 6652.0],       [6653.0, 6654.0],       [6655.0, 6656.0]],      [[6657.0, 6658.0],       [6659.0, 6660.0],       [6661.0, 6662.0],       [6663.0, 6664.0]],      [[6665.0, 6666.0],       [6667.0, 6668.0],       [6669.0, 6670.0],       [6671.0, 6672.0]]],     [[[6673.0, 6674.0],       [6675.0, 6676.0],       [6677.0, 6678.0],       [6679.0, 6680.0]],      [[6681.0, 6682.0],       [6683.0, 6684.0],       [6685.0, 6686.0],       [6687.0, 6688.0]],      [[6689.0, 6690.0],       [6691.0, 6692.0],       [6693.0, 6694.0],       [6695.0, 6696.0]]],     [[[6697.0, 6698.0],       [6699.0, 6700.0],       [6701.0, 6702.0],       [6703.0, 6704.0]],      [[6705.0, 6706.0],       [6707.0, 6708.0],       [6709.0, 6710.0],       [6711.0, 6712.0]],      [[6713.0, 6714.0],       [6715.0, 6716.0],       [6717.0, 6718.0],       [6719.0, 6720.0]]]],    [[[[6721.0, 6722.0],       [6723.0, 6724.0],       [6725.0, 6726.0],       [6727.0, 6728.0]],      [[6729.0, 6730.0],       [6731.0, 6732.0],       [6733.0, 6734.0],       [6735.0, 6736.0]],      [[6737.0, 6738.0],       [6739.0, 6740.0],       [6741.0, 6742.0],       [6743.0, 6744.0]]],     [[[6745.0, 6746.0],       [6747.0, 6748.0],       [6749.0, 6750.0],       [6751.0, 6752.0]],      [[6753.0, 6754.0],       [6755.0, 6756.0],       [6757.0, 6758.0],       [6759.0, 6760.0]],      [[6761.0, 6762.0],       [6763.0, 6764.0],       [6765.0, 6766.0],       [6767.0, 6768.0]]],     [[[6769.0, 6770.0],       [6771.0, 6772.0],       [6773.0, 6774.0],       [6775.0, 6776.0]],      [[6777.0, 6778.0],       [6779.0, 6780.0],       [6781.0, 6782.0],       [6783.0, 6784.0]],      [[6785.0, 6786.0],       [6787.0, 6788.0],       [6789.0, 6790.0],       [6791.0, 6792.0]]],     [[[6793.0, 6794.0],       [6795.0, 6796.0],       [6797.0, 6798.0],       [6799.0, 6800.0]],      [[6801.0, 6802.0],       [6803.0, 6804.0],       [6805.0, 6806.0],       [6807.0, 6808.0]],      [[6809.0, 6810.0],       [6811.0, 6812.0],       [6813.0, 6814.0],       [6815.0, 6816.0]]],     [[[6817.0, 6818.0],       [6819.0, 6820.0],       [6821.0, 6822.0],       [6823.0, 6824.0]],      [[6825.0, 6826.0],       [6827.0, 6828.0],       [6829.0, 6830.0],       [6831.0, 6832.0]],      [[6833.0, 6834.0],       [6835.0, 6836.0],       [6837.0, 6838.0],       [6839.0, 6840.0]]]]],   [[[[[6841.0, 6842.0],       [6843.0, 6844.0],       [6845.0, 6846.0],       [6847.0, 6848.0]],      [[6849.0, 6850.0],       [6851.0, 6852.0],       [6853.0, 6854.0],       [6855.0, 6856.0]],      [[6857.0, 6858.0],       [6859.0, 6860.0],       [6861.0, 6862.0],       [6863.0, 6864.0]]],     [[[6865.0, 6866.0],       [6867.0, 6868.0],       [6869.0, 6870.0],       [6871.0, 6872.0]],      [[6873.0, 6874.0],       [6875.0, 6876.0],       [6877.0, 6878.0],       [6879.0, 6880.0]],      [[6881.0, 6882.0],       [6883.0, 6884.0],       [6885.0, 6886.0],       [6887.0, 6888.0]]],     [[[6889.0, 6890.0],       [6891.0, 6892.0],       [6893.0, 6894.0],       [6895.0, 6896.0]],      [[6897.0, 6898.0],       [6899.0, 6900.0],       [6901.0, 6902.0],       [6903.0, 6904.0]],      [[6905.0, 6906.0],       [6907.0, 6908.0],       [6909.0, 6910.0],       [6911.0, 6912.0]]],     [[[6913.0, 6914.0],       [6915.0, 6916.0],       [6917.0, 6918.0],       [6919.0, 6920.0]],      [[6921.0, 6922.0],       [6923.0, 6924.0],       [6925.0, 6926.0],       [6927.0, 6928.0]],      [[6929.0, 6930.0],       [6931.0, 6932.0],       [6933.0, 6934.0],       [6935.0, 6936.0]]],     [[[6937.0, 6938.0],       [6939.0, 6940.0],       [6941.0, 6942.0],       [6943.0, 6944.0]],      [[6945.0, 6946.0],       [6947.0, 6948.0],       [6949.0, 6950.0],       [6951.0, 6952.0]],      [[6953.0, 6954.0],       [6955.0, 6956.0],       [6957.0, 6958.0],       [6959.0, 6960.0]]]],    [[[[6961.0, 6962.0],       [6963.0, 6964.0],       [6965.0, 6966.0],       [6967.0, 6968.0]],      [[6969.0, 6970.0],       [6971.0, 6972.0],       [6973.0, 6974.0],       [6975.0, 6976.0]],      [[6977.0, 6978.0],       [6979.0, 6980.0],       [6981.0, 6982.0],       [6983.0, 6984.0]]],     [[[6985.0, 6986.0],       [6987.0, 6988.0],       [6989.0, 6990.0],       [6991.0, 6992.0]],      [[6993.0, 6994.0],       [6995.0, 6996.0],       [6997.0, 6998.0],       [6999.0, 7000.0]],      [[7001.0, 7002.0],       [7003.0, 7004.0],       [7005.0, 7006.0],       [7007.0, 7008.0]]],     [[[7009.0, 7010.0],       [7011.0, 7012.0],       [7013.0, 7014.0],       [7015.0, 7016.0]],      [[7017.0, 7018.0],       [7019.0, 7020.0],       [7021.0, 7022.0],       [7023.0, 7024.0]],      [[7025.0, 7026.0],       [7027.0, 7028.0],       [7029.0, 7030.0],       [7031.0, 7032.0]]],     [[[7033.0, 7034.0],       [7035.0, 7036.0],       [7037.0, 7038.0],       [7039.0, 7040.0]],      [[7041.0, 7042.0],       [7043.0, 7044.0],       [7045.0, 7046.0],       [7047.0, 7048.0]],      [[7049.0, 7050.0],       [7051.0, 7052.0],       [7053.0, 7054.0],       [7055.0, 7056.0]]],     [[[7057.0, 7058.0],       [7059.0, 7060.0],       [7061.0, 7062.0],       [7063.0, 7064.0]],      [[7065.0, 7066.0],       [7067.0, 7068.0],       [7069.0, 7070.0],       [7071.0, 7072.0]],      [[7073.0, 7074.0],       [7075.0, 7076.0],       [7077.0, 7078.0],       [7079.0, 7080.0]]]],    [[[[7081.0, 7082.0],       [7083.0, 7084.0],       [7085.0, 7086.0],       [7087.0, 7088.0]],      [[7089.0, 7090.0],       [7091.0, 7092.0],       [7093.0, 7094.0],       [7095.0, 7096.0]],      [[7097.0, 7098.0],       [7099.0, 7100.0],       [7101.0, 7102.0],       [7103.0, 7104.0]]],     [[[7105.0, 7106.0],       [7107.0, 7108.0],       [7109.0, 7110.0],       [7111.0, 7112.0]],      [[7113.0, 7114.0],       [7115.0, 7116.0],       [7117.0, 7118.0],       [7119.0, 7120.0]],      [[7121.0, 7122.0],       [7123.0, 7124.0],       [7125.0, 7126.0],       [7127.0, 7128.0]]],     [[[7129.0, 7130.0],       [7131.0, 7132.0],       [7133.0, 7134.0],       [7135.0, 7136.0]],      [[7137.0, 7138.0],       [7139.0, 7140.0],       [7141.0, 7142.0],       [7143.0, 7144.0]],      [[7145.0, 7146.0],       [7147.0, 7148.0],       [7149.0, 7150.0],       [7151.0, 7152.0]]],     [[[7153.0, 7154.0],       [7155.0, 7156.0],       [7157.0, 7158.0],       [7159.0, 7160.0]],      [[7161.0, 7162.0],       [7163.0, 7164.0],       [7165.0, 7166.0],       [7167.0, 7168.0]],      [[7169.0, 7170.0],       [7171.0, 7172.0],       [7173.0, 7174.0],       [7175.0, 7176.0]]],     [[[7177.0, 7178.0],       [7179.0, 7180.0],       [7181.0, 7182.0],       [7183.0, 7184.0]],      [[7185.0, 7186.0],       [7187.0, 7188.0],       [7189.0, 7190.0],       [7191.0, 7192.0]],      [[7193.0, 7194.0],       [7195.0, 7196.0],       [7197.0, 7198.0],       [7199.0, 7200.0]]]]],   [[[[[7201.0, 7202.0],       [7203.0, 7204.0],       [7205.0, 7206.0],       [7207.0, 7208.0]],      [[7209.0, 7210.0],       [7211.0, 7212.0],       [7213.0, 7214.0],       [7215.0, 7216.0]],      [[7217.0, 7218.0],       [7219.0, 7220.0],       [7221.0, 7222.0],       [7223.0, 7224.0]]],     [[[7225.0, 7226.0],       [7227.0, 7228.0],       [7229.0, 7230.0],       [7231.0, 7232.0]],      [[7233.0, 7234.0],       [7235.0, 7236.0],       [7237.0, 7238.0],       [7239.0, 7240.0]],      [[7241.0, 7242.0],       [7243.0, 7244.0],       [7245.0, 7246.0],       [7247.0, 7248.0]]],     [[[7249.0, 7250.0],       [7251.0, 7252.0],       [7253.0, 7254.0],       [7255.0, 7256.0]],      [[7257.0, 7258.0],       [7259.0, 7260.0],       [7261.0, 7262.0],       [7263.0, 7264.0]],      [[7265.0, 7266.0],       [7267.0, 7268.0],       [7269.0, 7270.0],       [7271.0, 7272.0]]],     [[[7273.0, 7274.0],       [7275.0, 7276.0],       [7277.0, 7278.0],       [7279.0, 7280.0]],      [[7281.0, 7282.0],       [7283.0, 7284.0],       [7285.0, 7286.0],       [7287.0, 7288.0]],      [[7289.0, 7290.0],       [7291.0, 7292.0],       [7293.0, 7294.0],       [7295.0, 7296.0]]],     [[[7297.0, 7298.0],       [7299.0, 7300.0],       [7301.0, 7302.0],       [7303.0, 7304.0]],      [[7305.0, 7306.0],       [7307.0, 7308.0],       [7309.0, 7310.0],       [7311.0, 7312.0]],      [[7313.0, 7314.0],       [7315.0, 7316.0],       [7317.0, 7318.0],       [7319.0, 7320.0]]]],    [[[[7321.0, 7322.0],       [7323.0, 7324.0],       [7325.0, 7326.0],       [7327.0, 7328.0]],      [[7329.0, 7330.0],       [7331.0, 7332.0],       [7333.0, 7334.0],       [7335.0, 7336.0]],      [[7337.0, 7338.0],       [7339.0, 7340.0],       [7341.0, 7342.0],       [7343.0, 7344.0]]],     [[[7345.0, 7346.0],       [7347.0, 7348.0],       [7349.0, 7350.0],       [7351.0, 7352.0]],      [[7353.0, 7354.0],       [7355.0, 7356.0],       [7357.0, 7358.0],       [7359.0, 7360.0]],      [[7361.0, 7362.0],       [7363.0, 7364.0],       [7365.0, 7366.0],       [7367.0, 7368.0]]],     [[[7369.0, 7370.0],       [7371.0, 7372.0],       [7373.0, 7374.0],       [7375.0, 7376.0]],      [[7377.0, 7378.0],       [7379.0, 7380.0],       [7381.0, 7382.0],       [7383.0, 7384.0]],      [[7385.0, 7386.0],       [7387.0, 7388.0],       [7389.0, 7390.0],       [7391.0, 7392.0]]],     [[[7393.0, 7394.0],       [7395.0, 7396.0],       [7397.0, 7398.0],       [7399.0, 7400.0]],      [[7401.0, 7402.0],       [7403.0, 7404.0],       [7405.0, 7406.0],       [7407.0, 7408.0]],      [[7409.0, 7410.0],       [7411.0, 7412.0],       [7413.0, 7414.0],       [7415.0, 7416.0]]],     [[[7417.0, 7418.0],       [7419.0, 7420.0],       [7421.0, 7422.0],       [7423.0, 7424.0]],      [[7425.0, 7426.0],       [7427.0, 7428.0],       [7429.0, 7430.0],       [7431.0, 7432.0]],      [[7433.0, 7434.0],       [7435.0, 7436.0],       [7437.0, 7438.0],       [7439.0, 7440.0]]]],    [[[[7441.0, 7442.0],       [7443.0, 7444.0],       [7445.0, 7446.0],       [7447.0, 7448.0]],      [[7449.0, 7450.0],       [7451.0, 7452.0],       [7453.0, 7454.0],       [7455.0, 7456.0]],      [[7457.0, 7458.0],       [7459.0, 7460.0],       [7461.0, 7462.0],       [7463.0, 7464.0]]],     [[[7465.0, 7466.0],       [7467.0, 7468.0],       [7469.0, 7470.0],       [7471.0, 7472.0]],      [[7473.0, 7474.0],       [7475.0, 7476.0],       [7477.0, 7478.0],       [7479.0, 7480.0]],      [[7481.0, 7482.0],       [7483.0, 7484.0],       [7485.0, 7486.0],       [7487.0, 7488.0]]],     [[[7489.0, 7490.0],       [7491.0, 7492.0],       [7493.0, 7494.0],       [7495.0, 7496.0]],      [[7497.0, 7498.0],       [7499.0, 7500.0],       [7501.0, 7502.0],       [7503.0, 7504.0]],      [[7505.0, 7506.0],       [7507.0, 7508.0],       [7509.0, 7510.0],       [7511.0, 7512.0]]],     [[[7513.0, 7514.0],       [7515.0, 7516.0],       [7517.0, 7518.0],       [7519.0, 7520.0]],      [[7521.0, 7522.0],       [7523.0, 7524.0],       [7525.0, 7526.0],       [7527.0, 7528.0]],      [[7529.0, 7530.0],       [7531.0, 7532.0],       [7533.0, 7534.0],       [7535.0, 7536.0]]],     [[[7537.0, 7538.0],       [7539.0, 7540.0],       [7541.0, 7542.0],       [7543.0, 7544.0]],      [[7545.0, 7546.0],       [7547.0, 7548.0],       [7549.0, 7550.0],       [7551.0, 7552.0]],      [[7553.0, 7554.0],       [7555.0, 7556.0],       [7557.0, 7558.0],       [7559.0, 7560.0]]]]]]] shape=[3, 7, 3, 5, 3, 4, 2], strides=[2520, 360, 120, 24, 8, 2, 1], layout=C (0x1)), I32([2, 2, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [1, 2],  [1, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2399511889 1822809182 1126451318 533230088 # shrinks to (ref i, ref bs, ref p) = (F32([[[1.0, 2.0, 3.0],   [4.0, 5.0, 6.0],   [7.0, 8.0, 9.0],   [10.0, 11.0, 12.0],   [13.0, 14.0, 15.0],   [16.0, 17.0, 18.0]]] shape=[1, 6, 3], strides=[18, 3, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[2, 2]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 2346609117 3140942103 2279391288 2283599207 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0],      [5.0, 6.0, 7.0, 8.0],      [9.0, 10.0, 11.0, 12.0],      [13.0, 14.0, 15.0, 16.0],      [17.0, 18.0, 19.0, 20.0],      [21.0, 22.0, 23.0, 24.0],      [25.0, 26.0, 27.0, 28.0]],     [[29.0, 30.0, 31.0, 32.0],      [33.0, 34.0, 35.0, 36.0],      [37.0, 38.0, 39.0, 40.0],      [41.0, 42.0, 43.0, 44.0],      [45.0, 46.0, 47.0, 48.0],      [49.0, 50.0, 51.0, 52.0],      [53.0, 54.0, 55.0, 56.0]],     [[57.0, 58.0, 59.0, 60.0],      [61.0, 62.0, 63.0, 64.0],      [65.0, 66.0, 67.0, 68.0],      [69.0, 70.0, 71.0, 72.0],      [73.0, 74.0, 75.0, 76.0],      [77.0, 78.0, 79.0, 80.0],      [81.0, 82.0, 83.0, 84.0]],     [[85.0, 86.0, 87.0, 88.0],      [89.0, 90.0, 91.0, 92.0],      [93.0, 94.0, 95.0, 96.0],      [97.0, 98.0, 99.0, 100.0],      [101.0, 102.0, 103.0, 104.0],      [105.0, 106.0, 107.0, 108.0],      [109.0, 110.0, 111.0, 112.0]],     [[113.0, 114.0, 115.0, 116.0],      [117.0, 118.0, 119.0, 120.0],      [121.0, 122.0, 123.0, 124.0],      [125.0, 126.0, 127.0, 128.0],      [129.0, 130.0, 131.0, 132.0],      [133.0, 134.0, 135.0, 136.0],      [137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0],      [145.0, 146.0, 147.0, 148.0],      [149.0, 150.0, 151.0, 152.0],      [153.0, 154.0, 155.0, 156.0],      [157.0, 158.0, 159.0, 160.0],      [161.0, 162.0, 163.0, 164.0],      [165.0, 166.0, 167.0, 168.0]]],    [[[169.0, 170.0, 171.0, 172.0],      [173.0, 174.0, 175.0, 176.0],      [177.0, 178.0, 179.0, 180.0],      [181.0, 182.0, 183.0, 184.0],      [185.0, 186.0, 187.0, 188.0],      [189.0, 190.0, 191.0, 192.0],      [193.0, 194.0, 195.0, 196.0]],     [[197.0, 198.0, 199.0, 200.0],      [201.0, 202.0, 203.0, 204.0],      [205.0, 206.0, 207.0, 208.0],      [209.0, 210.0, 211.0, 212.0],      [213.0, 214.0, 215.0, 216.0],      [217.0, 218.0, 219.0, 220.0],      [221.0, 222.0, 223.0, 224.0]],     [[225.0, 226.0, 227.0, 228.0],      [229.0, 230.0, 231.0, 232.0],      [233.0, 234.0, 235.0, 236.0],      [237.0, 238.0, 239.0, 240.0],      [241.0, 242.0, 243.0, 244.0],      [245.0, 246.0, 247.0, 248.0],      [249.0, 250.0, 251.0, 252.0]],     [[253.0, 254.0, 255.0, 256.0],      [257.0, 258.0, 259.0, 260.0],      [261.0, 262.0, 263.0, 264.0],      [265.0, 266.0, 267.0, 268.0],      [269.0, 270.0, 271.0, 272.0],      [273.0, 274.0, 275.0, 276.0],      [277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0],      [285.0, 286.0, 287.0, 288.0],      [289.0, 290.0, 291.0, 292.0],      [293.0, 294.0, 295.0, 296.0],      [297.0, 298.0, 299.0, 300.0],      [301.0, 302.0, 303.0, 304.0],      [305.0, 306.0, 307.0, 308.0]],     [[309.0, 310.0, 311.0, 312.0],      [313.0, 314.0, 315.0, 316.0],      [317.0, 318.0, 319.0, 320.0],      [321.0, 322.0, 323.0, 324.0],      [325.0, 326.0, 327.0, 328.0],      [329.0, 330.0, 331.0, 332.0],      [333.0, 334.0, 335.0, 336.0]]],    [[[337.0, 338.0, 339.0, 340.0],      [341.0, 342.0, 343.0, 344.0],      [345.0, 346.0, 347.0, 348.0],      [349.0, 350.0, 351.0, 352.0],      [353.0, 354.0, 355.0, 356.0],      [357.0, 358.0, 359.0, 360.0],      [361.0, 362.0, 363.0, 364.0]],     [[365.0, 366.0, 367.0, 368.0],      [369.0, 370.0, 371.0, 372.0],      [373.0, 374.0, 375.0, 376.0],      [377.0, 378.0, 379.0, 380.0],      [381.0, 382.0, 383.0, 384.0],      [385.0, 386.0, 387.0, 388.0],      [389.0, 390.0, 391.0, 392.0]],     [[393.0, 394.0, 395.0, 396.0],      [397.0, 398.0, 399.0, 400.0],      [401.0, 402.0, 403.0, 404.0],      [405.0, 406.0, 407.0, 408.0],      [409.0, 410.0, 411.0, 412.0],      [413.0, 414.0, 415.0, 416.0],      [417.0, 418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0, 424.0],      [425.0, 426.0, 427.0, 428.0],      [429.0, 430.0, 431.0, 432.0],      [433.0, 434.0, 435.0, 436.0],      [437.0, 438.0, 439.0, 440.0],      [441.0, 442.0, 443.0, 444.0],      [445.0, 446.0, 447.0, 448.0]],     [[449.0, 450.0, 451.0, 452.0],      [453.0, 454.0, 455.0, 456.0],      [457.0, 458.0, 459.0, 460.0],      [461.0, 462.0, 463.0, 464.0],      [465.0, 466.0, 467.0, 468.0],      [469.0, 470.0, 471.0, 472.0],      [473.0, 474.0, 475.0, 476.0]],     [[477.0, 478.0, 479.0, 480.0],      [481.0, 482.0, 483.0, 484.0],      [485.0, 486.0, 487.0, 488.0],      [489.0, 490.0, 491.0, 492.0],      [493.0, 494.0, 495.0, 496.0],      [497.0, 498.0, 499.0, 500.0],      [501.0, 502.0, 503.0, 504.0]]],    [[[505.0, 506.0, 507.0, 508.0],      [509.0, 510.0, 511.0, 512.0],      [513.0, 514.0, 515.0, 516.0],      [517.0, 518.0, 519.0, 520.0],      [521.0, 522.0, 523.0, 524.0],      [525.0, 526.0, 527.0, 528.0],      [529.0, 530.0, 531.0, 532.0]],     [[533.0, 534.0, 535.0, 536.0],      [537.0, 538.0, 539.0, 540.0],      [541.0, 542.0, 543.0, 544.0],      [545.0, 546.0, 547.0, 548.0],      [549.0, 550.0, 551.0, 552.0],      [553.0, 554.0, 555.0, 556.0],      [557.0, 558.0, 559.0, 560.0]],     [[561.0, 562.0, 563.0, 564.0],      [565.0, 566.0, 567.0, 568.0],      [569.0, 570.0, 571.0, 572.0],      [573.0, 574.0, 575.0, 576.0],      [577.0, 578.0, 579.0, 580.0],      [581.0, 582.0, 583.0, 584.0],      [585.0, 586.0, 587.0, 588.0]],     [[589.0, 590.0, 591.0, 592.0],      [593.0, 594.0, 595.0, 596.0],      [597.0, 598.0, 599.0, 600.0],      [601.0, 602.0, 603.0, 604.0],      [605.0, 606.0, 607.0, 608.0],      [609.0, 610.0, 611.0, 612.0],      [613.0, 614.0, 615.0, 616.0]],     [[617.0, 618.0, 619.0, 620.0],      [621.0, 622.0, 623.0, 624.0],      [625.0, 626.0, 627.0, 628.0],      [629.0, 630.0, 631.0, 632.0],      [633.0, 634.0, 635.0, 636.0],      [637.0, 638.0, 639.0, 640.0],      [641.0, 642.0, 643.0, 644.0]],     [[645.0, 646.0, 647.0, 648.0],      [649.0, 650.0, 651.0, 652.0],      [653.0, 654.0, 655.0, 656.0],      [657.0, 658.0, 659.0, 660.0],      [661.0, 662.0, 663.0, 664.0],      [665.0, 666.0, 667.0, 668.0],      [669.0, 670.0, 671.0, 672.0]]],    [[[673.0, 674.0, 675.0, 676.0],      [677.0, 678.0, 679.0, 680.0],      [681.0, 682.0, 683.0, 684.0],      [685.0, 686.0, 687.0, 688.0],      [689.0, 690.0, 691.0, 692.0],      [693.0, 694.0, 695.0, 696.0],      [697.0, 698.0, 699.0, 700.0]],     [[701.0, 702.0, 703.0, 704.0],      [705.0, 706.0, 707.0, 708.0],      [709.0, 710.0, 711.0, 712.0],      [713.0, 714.0, 715.0, 716.0],      [717.0, 718.0, 719.0, 720.0],      [721.0, 722.0, 723.0, 724.0],      [725.0, 726.0, 727.0, 728.0]],     [[729.0, 730.0, 731.0, 732.0],      [733.0, 734.0, 735.0, 736.0],      [737.0, 738.0, 739.0, 740.0],      [741.0, 742.0, 743.0, 744.0],      [745.0, 746.0, 747.0, 748.0],      [749.0, 750.0, 751.0, 752.0],      [753.0, 754.0, 755.0, 756.0]],     [[757.0, 758.0, 759.0, 760.0],      [761.0, 762.0, 763.0, 764.0],      [765.0, 766.0, 767.0, 768.0],      [769.0, 770.0, 771.0, 772.0],      [773.0, 774.0, 775.0, 776.0],      [777.0, 778.0, 779.0, 780.0],      [781.0, 782.0, 783.0, 784.0]],     [[785.0, 786.0, 787.0, 788.0],      [789.0, 790.0, 791.0, 792.0],      [793.0, 794.0, 795.0, 796.0],      [797.0, 798.0, 799.0, 800.0],      [801.0, 802.0, 803.0, 804.0],      [805.0, 806.0, 807.0, 808.0],      [809.0, 810.0, 811.0, 812.0]],     [[813.0, 814.0, 815.0, 816.0],      [817.0, 818.0, 819.0, 820.0],      [821.0, 822.0, 823.0, 824.0],      [825.0, 826.0, 827.0, 828.0],      [829.0, 830.0, 831.0, 832.0],      [833.0, 834.0, 835.0, 836.0],      [837.0, 838.0, 839.0, 840.0]]],    [[[841.0, 842.0, 843.0, 844.0],      [845.0, 846.0, 847.0, 848.0],      [849.0, 850.0, 851.0, 852.0],      [853.0, 854.0, 855.0, 856.0],      [857.0, 858.0, 859.0, 860.0],      [861.0, 862.0, 863.0, 864.0],      [865.0, 866.0, 867.0, 868.0]],     [[869.0, 870.0, 871.0, 872.0],      [873.0, 874.0, 875.0, 876.0],      [877.0, 878.0, 879.0, 880.0],      [881.0, 882.0, 883.0, 884.0],      [885.0, 886.0, 887.0, 888.0],      [889.0, 890.0, 891.0, 892.0],      [893.0, 894.0, 895.0, 896.0]],     [[897.0, 898.0, 899.0, 900.0],      [901.0, 902.0, 903.0, 904.0],      [905.0, 906.0, 907.0, 908.0],      [909.0, 910.0, 911.0, 912.0],      [913.0, 914.0, 915.0, 916.0],      [917.0, 918.0, 919.0, 920.0],      [921.0, 922.0, 923.0, 924.0]],     [[925.0, 926.0, 927.0, 928.0],      [929.0, 930.0, 931.0, 932.0],      [933.0, 934.0, 935.0, 936.0],      [937.0, 938.0, 939.0, 940.0],      [941.0, 942.0, 943.0, 944.0],      [945.0, 946.0, 947.0, 948.0],      [949.0, 950.0, 951.0, 952.0]],     [[953.0, 954.0, 955.0, 956.0],      [957.0, 958.0, 959.0, 960.0],      [961.0, 962.0, 963.0, 964.0],      [965.0, 966.0, 967.0, 968.0],      [969.0, 970.0, 971.0, 972.0],      [973.0, 974.0, 975.0, 976.0],      [977.0, 978.0, 979.0, 980.0]],     [[981.0, 982.0, 983.0, 984.0],      [985.0, 986.0, 987.0, 988.0],      [989.0, 990.0, 991.0, 992.0],      [993.0, 994.0, 995.0, 996.0],      [997.0, 998.0, 999.0, 1000.0],      [1001.0, 1002.0, 1003.0, 1004.0],      [1005.0, 1006.0, 1007.0, 1008.0]]]],   [[[[1009.0, 1010.0, 1011.0, 1012.0],      [1013.0, 1014.0, 1015.0, 1016.0],      [1017.0, 1018.0, 1019.0, 1020.0],      [1021.0, 1022.0, 1023.0, 1024.0],      [1025.0, 1026.0, 1027.0, 1028.0],      [1029.0, 1030.0, 1031.0, 1032.0],      [1033.0, 1034.0, 1035.0, 1036.0]],     [[1037.0, 1038.0, 1039.0, 1040.0],      [1041.0, 1042.0, 1043.0, 1044.0],      [1045.0, 1046.0, 1047.0, 1048.0],      [1049.0, 1050.0, 1051.0, 1052.0],      [1053.0, 1054.0, 1055.0, 1056.0],      [1057.0, 1058.0, 1059.0, 1060.0],      [1061.0, 1062.0, 1063.0, 1064.0]],     [[1065.0, 1066.0, 1067.0, 1068.0],      [1069.0, 1070.0, 1071.0, 1072.0],      [1073.0, 1074.0, 1075.0, 1076.0],      [1077.0, 1078.0, 1079.0, 1080.0],      [1081.0, 1082.0, 1083.0, 1084.0],      [1085.0, 1086.0, 1087.0, 1088.0],      [1089.0, 1090.0, 1091.0, 1092.0]],     [[1093.0, 1094.0, 1095.0, 1096.0],      [1097.0, 1098.0, 1099.0, 1100.0],      [1101.0, 1102.0, 1103.0, 1104.0],      [1105.0, 1106.0, 1107.0, 1108.0],      [1109.0, 1110.0, 1111.0, 1112.0],      [1113.0, 1114.0, 1115.0, 1116.0],      [1117.0, 1118.0, 1119.0, 1120.0]],     [[1121.0, 1122.0, 1123.0, 1124.0],      [1125.0, 1126.0, 1127.0, 1128.0],      [1129.0, 1130.0, 1131.0, 1132.0],      [1133.0, 1134.0, 1135.0, 1136.0],      [1137.0, 1138.0, 1139.0, 1140.0],      [1141.0, 1142.0, 1143.0, 1144.0],      [1145.0, 1146.0, 1147.0, 1148.0]],     [[1149.0, 1150.0, 1151.0, 1152.0],      [1153.0, 1154.0, 1155.0, 1156.0],      [1157.0, 1158.0, 1159.0, 1160.0],      [1161.0, 1162.0, 1163.0, 1164.0],      [1165.0, 1166.0, 1167.0, 1168.0],      [1169.0, 1170.0, 1171.0, 1172.0],      [1173.0, 1174.0, 1175.0, 1176.0]]],    [[[1177.0, 1178.0, 1179.0, 1180.0],      [1181.0, 1182.0, 1183.0, 1184.0],      [1185.0, 1186.0, 1187.0, 1188.0],      [1189.0, 1190.0, 1191.0, 1192.0],      [1193.0, 1194.0, 1195.0, 1196.0],      [1197.0, 1198.0, 1199.0, 1200.0],      [1201.0, 1202.0, 1203.0, 1204.0]],     [[1205.0, 1206.0, 1207.0, 1208.0],      [1209.0, 1210.0, 1211.0, 1212.0],      [1213.0, 1214.0, 1215.0, 1216.0],      [1217.0, 1218.0, 1219.0, 1220.0],      [1221.0, 1222.0, 1223.0, 1224.0],      [1225.0, 1226.0, 1227.0, 1228.0],      [1229.0, 1230.0, 1231.0, 1232.0]],     [[1233.0, 1234.0, 1235.0, 1236.0],      [1237.0, 1238.0, 1239.0, 1240.0],      [1241.0, 1242.0, 1243.0, 1244.0],      [1245.0, 1246.0, 1247.0, 1248.0],      [1249.0, 1250.0, 1251.0, 1252.0],      [1253.0, 1254.0, 1255.0, 1256.0],      [1257.0, 1258.0, 1259.0, 1260.0]],     [[1261.0, 1262.0, 1263.0, 1264.0],      [1265.0, 1266.0, 1267.0, 1268.0],      [1269.0, 1270.0, 1271.0, 1272.0],      [1273.0, 1274.0, 1275.0, 1276.0],      [1277.0, 1278.0, 1279.0, 1280.0],      [1281.0, 1282.0, 1283.0, 1284.0],      [1285.0, 1286.0, 1287.0, 1288.0]],     [[1289.0, 1290.0, 1291.0, 1292.0],      [1293.0, 1294.0, 1295.0, 1296.0],      [1297.0, 1298.0, 1299.0, 1300.0],      [1301.0, 1302.0, 1303.0, 1304.0],      [1305.0, 1306.0, 1307.0, 1308.0],      [1309.0, 1310.0, 1311.0, 1312.0],      [1313.0, 1314.0, 1315.0, 1316.0]],     [[1317.0, 1318.0, 1319.0, 1320.0],      [1321.0, 1322.0, 1323.0, 1324.0],      [1325.0, 1326.0, 1327.0, 1328.0],      [1329.0, 1330.0, 1331.0, 1332.0],      [1333.0, 1334.0, 1335.0, 1336.0],      [1337.0, 1338.0, 1339.0, 1340.0],      [1341.0, 1342.0, 1343.0, 1344.0]]],    [[[1345.0, 1346.0, 1347.0, 1348.0],      [1349.0, 1350.0, 1351.0, 1352.0],      [1353.0, 1354.0, 1355.0, 1356.0],      [1357.0, 1358.0, 1359.0, 1360.0],      [1361.0, 1362.0, 1363.0, 1364.0],      [1365.0, 1366.0, 1367.0, 1368.0],      [1369.0, 1370.0, 1371.0, 1372.0]],     [[1373.0, 1374.0, 1375.0, 1376.0],      [1377.0, 1378.0, 1379.0, 1380.0],      [1381.0, 1382.0, 1383.0, 1384.0],      [1385.0, 1386.0, 1387.0, 1388.0],      [1389.0, 1390.0, 1391.0, 1392.0],      [1393.0, 1394.0, 1395.0, 1396.0],      [1397.0, 1398.0, 1399.0, 1400.0]],     [[1401.0, 1402.0, 1403.0, 1404.0],      [1405.0, 1406.0, 1407.0, 1408.0],      [1409.0, 1410.0, 1411.0, 1412.0],      [1413.0, 1414.0, 1415.0, 1416.0],      [1417.0, 1418.0, 1419.0, 1420.0],      [1421.0, 1422.0, 1423.0, 1424.0],      [1425.0, 1426.0, 1427.0, 1428.0]],     [[1429.0, 1430.0, 1431.0, 1432.0],      [1433.0, 1434.0, 1435.0, 1436.0],      [1437.0, 1438.0, 1439.0, 1440.0],      [1441.0, 1442.0, 1443.0, 1444.0],      [1445.0, 1446.0, 1447.0, 1448.0],      [1449.0, 1450.0, 1451.0, 1452.0],      [1453.0, 1454.0, 1455.0, 1456.0]],     [[1457.0, 1458.0, 1459.0, 1460.0],      [1461.0, 1462.0, 1463.0, 1464.0],      [1465.0, 1466.0, 1467.0, 1468.0],      [1469.0, 1470.0, 1471.0, 1472.0],      [1473.0, 1474.0, 1475.0, 1476.0],      [1477.0, 1478.0, 1479.0, 1480.0],      [1481.0, 1482.0, 1483.0, 1484.0]],     [[1485.0, 1486.0, 1487.0, 1488.0],      [1489.0, 1490.0, 1491.0, 1492.0],      [1493.0, 1494.0, 1495.0, 1496.0],      [1497.0, 1498.0, 1499.0, 1500.0],      [1501.0, 1502.0, 1503.0, 1504.0],      [1505.0, 1506.0, 1507.0, 1508.0],      [1509.0, 1510.0, 1511.0, 1512.0]]],    [[[1513.0, 1514.0, 1515.0, 1516.0],      [1517.0, 1518.0, 1519.0, 1520.0],      [1521.0, 1522.0, 1523.0, 1524.0],      [1525.0, 1526.0, 1527.0, 1528.0],      [1529.0, 1530.0, 1531.0, 1532.0],      [1533.0, 1534.0, 1535.0, 1536.0],      [1537.0, 1538.0, 1539.0, 1540.0]],     [[1541.0, 1542.0, 1543.0, 1544.0],      [1545.0, 1546.0, 1547.0, 1548.0],      [1549.0, 1550.0, 1551.0, 1552.0],      [1553.0, 1554.0, 1555.0, 1556.0],      [1557.0, 1558.0, 1559.0, 1560.0],      [1561.0, 1562.0, 1563.0, 1564.0],      [1565.0, 1566.0, 1567.0, 1568.0]],     [[1569.0, 1570.0, 1571.0, 1572.0],      [1573.0, 1574.0, 1575.0, 1576.0],      [1577.0, 1578.0, 1579.0, 1580.0],      [1581.0, 1582.0, 1583.0, 1584.0],      [1585.0, 1586.0, 1587.0, 1588.0],      [1589.0, 1590.0, 1591.0, 1592.0],      [1593.0, 1594.0, 1595.0, 1596.0]],     [[1597.0, 1598.0, 1599.0, 1600.0],      [1601.0, 1602.0, 1603.0, 1604.0],      [1605.0, 1606.0, 1607.0, 1608.0],      [1609.0, 1610.0, 1611.0, 1612.0],      [1613.0, 1614.0, 1615.0, 1616.0],      [1617.0, 1618.0, 1619.0, 1620.0],      [1621.0, 1622.0, 1623.0, 1624.0]],     [[1625.0, 1626.0, 1627.0, 1628.0],      [1629.0, 1630.0, 1631.0, 1632.0],      [1633.0, 1634.0, 1635.0, 1636.0],      [1637.0, 1638.0, 1639.0, 1640.0],      [1641.0, 1642.0, 1643.0, 1644.0],      [1645.0, 1646.0, 1647.0, 1648.0],      [1649.0, 1650.0, 1651.0, 1652.0]],     [[1653.0, 1654.0, 1655.0, 1656.0],      [1657.0, 1658.0, 1659.0, 1660.0],      [1661.0, 1662.0, 1663.0, 1664.0],      [1665.0, 1666.0, 1667.0, 1668.0],      [1669.0, 1670.0, 1671.0, 1672.0],      [1673.0, 1674.0, 1675.0, 1676.0],      [1677.0, 1678.0, 1679.0, 1680.0]]],    [[[1681.0, 1682.0, 1683.0, 1684.0],      [1685.0, 1686.0, 1687.0, 1688.0],      [1689.0, 1690.0, 1691.0, 1692.0],      [1693.0, 1694.0, 1695.0, 1696.0],      [1697.0, 1698.0, 1699.0, 1700.0],      [1701.0, 1702.0, 1703.0, 1704.0],      [1705.0, 1706.0, 1707.0, 1708.0]],     [[1709.0, 1710.0, 1711.0, 1712.0],      [1713.0, 1714.0, 1715.0, 1716.0],      [1717.0, 1718.0, 1719.0, 1720.0],      [1721.0, 1722.0, 1723.0, 1724.0],      [1725.0, 1726.0, 1727.0, 1728.0],      [1729.0, 1730.0, 1731.0, 1732.0],      [1733.0, 1734.0, 1735.0, 1736.0]],     [[1737.0, 1738.0, 1739.0, 1740.0],      [1741.0, 1742.0, 1743.0, 1744.0],      [1745.0, 1746.0, 1747.0, 1748.0],      [1749.0, 1750.0, 1751.0, 1752.0],      [1753.0, 1754.0, 1755.0, 1756.0],      [1757.0, 1758.0, 1759.0, 1760.0],      [1761.0, 1762.0, 1763.0, 1764.0]],     [[1765.0, 1766.0, 1767.0, 1768.0],      [1769.0, 1770.0, 1771.0, 1772.0],      [1773.0, 1774.0, 1775.0, 1776.0],      [1777.0, 1778.0, 1779.0, 1780.0],      [1781.0, 1782.0, 1783.0, 1784.0],      [1785.0, 1786.0, 1787.0, 1788.0],      [1789.0, 1790.0, 1791.0, 1792.0]],     [[1793.0, 1794.0, 1795.0, 1796.0],      [1797.0, 1798.0, 1799.0, 1800.0],      [1801.0, 1802.0, 1803.0, 1804.0],      [1805.0, 1806.0, 1807.0, 1808.0],      [1809.0, 1810.0, 1811.0, 1812.0],      [1813.0, 1814.0, 1815.0, 1816.0],      [1817.0, 1818.0, 1819.0, 1820.0]],     [[1821.0, 1822.0, 1823.0, 1824.0],      [1825.0, 1826.0, 1827.0, 1828.0],      [1829.0, 1830.0, 1831.0, 1832.0],      [1833.0, 1834.0, 1835.0, 1836.0],      [1837.0, 1838.0, 1839.0, 1840.0],      [1841.0, 1842.0, 1843.0, 1844.0],      [1845.0, 1846.0, 1847.0, 1848.0]]],    [[[1849.0, 1850.0, 1851.0, 1852.0],      [1853.0, 1854.0, 1855.0, 1856.0],      [1857.0, 1858.0, 1859.0, 1860.0],      [1861.0, 1862.0, 1863.0, 1864.0],      [1865.0, 1866.0, 1867.0, 1868.0],      [1869.0, 1870.0, 1871.0, 1872.0],      [1873.0, 1874.0, 1875.0, 1876.0]],     [[1877.0, 1878.0, 1879.0, 1880.0],      [1881.0, 1882.0, 1883.0, 1884.0],      [1885.0, 1886.0, 1887.0, 1888.0],      [1889.0, 1890.0, 1891.0, 1892.0],      [1893.0, 1894.0, 1895.0, 1896.0],      [1897.0, 1898.0, 1899.0, 1900.0],      [1901.0, 1902.0, 1903.0, 1904.0]],     [[1905.0, 1906.0, 1907.0, 1908.0],      [1909.0, 1910.0, 1911.0, 1912.0],      [1913.0, 1914.0, 1915.0, 1916.0],      [1917.0, 1918.0, 1919.0, 1920.0],      [1921.0, 1922.0, 1923.0, 1924.0],      [1925.0, 1926.0, 1927.0, 1928.0],      [1929.0, 1930.0, 1931.0, 1932.0]],     [[1933.0, 1934.0, 1935.0, 1936.0],      [1937.0, 1938.0, 1939.0, 1940.0],      [1941.0, 1942.0, 1943.0, 1944.0],      [1945.0, 1946.0, 1947.0, 1948.0],      [1949.0, 1950.0, 1951.0, 1952.0],      [1953.0, 1954.0, 1955.0, 1956.0],      [1957.0, 1958.0, 1959.0, 1960.0]],     [[1961.0, 1962.0, 1963.0, 1964.0],      [1965.0, 1966.0, 1967.0, 1968.0],      [1969.0, 1970.0, 1971.0, 1972.0],      [1973.0, 1974.0, 1975.0, 1976.0],      [1977.0, 1978.0, 1979.0, 1980.0],      [1981.0, 1982.0, 1983.0, 1984.0],      [1985.0, 1986.0, 1987.0, 1988.0]],     [[1989.0, 1990.0, 1991.0, 1992.0],      [1993.0, 1994.0, 1995.0, 1996.0],      [1997.0, 1998.0, 1999.0, 2000.0],      [2001.0, 2002.0, 2003.0, 2004.0],      [2005.0, 2006.0, 2007.0, 2008.0],      [2009.0, 2010.0, 2011.0, 2012.0],      [2013.0, 2014.0, 2015.0, 2016.0]]]],   [[[[2017.0, 2018.0, 2019.0, 2020.0],      [2021.0, 2022.0, 2023.0, 2024.0],      [2025.0, 2026.0, 2027.0, 2028.0],      [2029.0, 2030.0, 2031.0, 2032.0],      [2033.0, 2034.0, 2035.0, 2036.0],      [2037.0, 2038.0, 2039.0, 2040.0],      [2041.0, 2042.0, 2043.0, 2044.0]],     [[2045.0, 2046.0, 2047.0, 2048.0],      [2049.0, 2050.0, 2051.0, 2052.0],      [2053.0, 2054.0, 2055.0, 2056.0],      [2057.0, 2058.0, 2059.0, 2060.0],      [2061.0, 2062.0, 2063.0, 2064.0],      [2065.0, 2066.0, 2067.0, 2068.0],      [2069.0, 2070.0, 2071.0, 2072.0]],     [[2073.0, 2074.0, 2075.0, 2076.0],      [2077.0, 2078.0, 2079.0, 2080.0],      [2081.0, 2082.0, 2083.0, 2084.0],      [2085.0, 2086.0, 2087.0, 2088.0],      [2089.0, 2090.0, 2091.0, 2092.0],      [2093.0, 2094.0, 2095.0, 2096.0],      [2097.0, 2098.0, 2099.0, 2100.0]],     [[2101.0, 2102.0, 2103.0, 2104.0],      [2105.0, 2106.0, 2107.0, 2108.0],      [2109.0, 2110.0, 2111.0, 2112.0],      [2113.0, 2114.0, 2115.0, 2116.0],      [2117.0, 2118.0, 2119.0, 2120.0],      [2121.0, 2122.0, 2123.0, 2124.0],      [2125.0, 2126.0, 2127.0, 2128.0]],     [[2129.0, 2130.0, 2131.0, 2132.0],      [2133.0, 2134.0, 2135.0, 2136.0],      [2137.0, 2138.0, 2139.0, 2140.0],      [2141.0, 2142.0, 2143.0, 2144.0],      [2145.0, 2146.0, 2147.0, 2148.0],      [2149.0, 2150.0, 2151.0, 2152.0],      [2153.0, 2154.0, 2155.0, 2156.0]],     [[2157.0, 2158.0, 2159.0, 2160.0],      [2161.0, 2162.0, 2163.0, 2164.0],      [2165.0, 2166.0, 2167.0, 2168.0],      [2169.0, 2170.0, 2171.0, 2172.0],      [2173.0, 2174.0, 2175.0, 2176.0],      [2177.0, 2178.0, 2179.0, 2180.0],      [2181.0, 2182.0, 2183.0, 2184.0]]],    [[[2185.0, 2186.0, 2187.0, 2188.0],      [2189.0, 2190.0, 2191.0, 2192.0],      [2193.0, 2194.0, 2195.0, 2196.0],      [2197.0, 2198.0, 2199.0, 2200.0],      [2201.0, 2202.0, 2203.0, 2204.0],      [2205.0, 2206.0, 2207.0, 2208.0],      [2209.0, 2210.0, 2211.0, 2212.0]],     [[2213.0, 2214.0, 2215.0, 2216.0],      [2217.0, 2218.0, 2219.0, 2220.0],      [2221.0, 2222.0, 2223.0, 2224.0],      [2225.0, 2226.0, 2227.0, 2228.0],      [2229.0, 2230.0, 2231.0, 2232.0],      [2233.0, 2234.0, 2235.0, 2236.0],      [2237.0, 2238.0, 2239.0, 2240.0]],     [[2241.0, 2242.0, 2243.0, 2244.0],      [2245.0, 2246.0, 2247.0, 2248.0],      [2249.0, 2250.0, 2251.0, 2252.0],      [2253.0, 2254.0, 2255.0, 2256.0],      [2257.0, 2258.0, 2259.0, 2260.0],      [2261.0, 2262.0, 2263.0, 2264.0],      [2265.0, 2266.0, 2267.0, 2268.0]],     [[2269.0, 2270.0, 2271.0, 2272.0],      [2273.0, 2274.0, 2275.0, 2276.0],      [2277.0, 2278.0, 2279.0, 2280.0],      [2281.0, 2282.0, 2283.0, 2284.0],      [2285.0, 2286.0, 2287.0, 2288.0],      [2289.0, 2290.0, 2291.0, 2292.0],      [2293.0, 2294.0, 2295.0, 2296.0]],     [[2297.0, 2298.0, 2299.0, 2300.0],      [2301.0, 2302.0, 2303.0, 2304.0],      [2305.0, 2306.0, 2307.0, 2308.0],      [2309.0, 2310.0, 2311.0, 2312.0],      [2313.0, 2314.0, 2315.0, 2316.0],      [2317.0, 2318.0, 2319.0, 2320.0],      [2321.0, 2322.0, 2323.0, 2324.0]],     [[2325.0, 2326.0, 2327.0, 2328.0],      [2329.0, 2330.0, 2331.0, 2332.0],      [2333.0, 2334.0, 2335.0, 2336.0],      [2337.0, 2338.0, 2339.0, 2340.0],      [2341.0, 2342.0, 2343.0, 2344.0],      [2345.0, 2346.0, 2347.0, 2348.0],      [2349.0, 2350.0, 2351.0, 2352.0]]],    [[[2353.0, 2354.0, 2355.0, 2356.0],      [2357.0, 2358.0, 2359.0, 2360.0],      [2361.0, 2362.0, 2363.0, 2364.0],      [2365.0, 2366.0, 2367.0, 2368.0],      [2369.0, 2370.0, 2371.0, 2372.0],      [2373.0, 2374.0, 2375.0, 2376.0],      [2377.0, 2378.0, 2379.0, 2380.0]],     [[2381.0, 2382.0, 2383.0, 2384.0],      [2385.0, 2386.0, 2387.0, 2388.0],      [2389.0, 2390.0, 2391.0, 2392.0],      [2393.0, 2394.0, 2395.0, 2396.0],      [2397.0, 2398.0, 2399.0, 2400.0],      [2401.0, 2402.0, 2403.0, 2404.0],      [2405.0, 2406.0, 2407.0, 2408.0]],     [[2409.0, 2410.0, 2411.0, 2412.0],      [2413.0, 2414.0, 2415.0, 2416.0],      [2417.0, 2418.0, 2419.0, 2420.0],      [2421.0, 2422.0, 2423.0, 2424.0],      [2425.0, 2426.0, 2427.0, 2428.0],      [2429.0, 2430.0, 2431.0, 2432.0],      [2433.0, 2434.0, 2435.0, 2436.0]],     [[2437.0, 2438.0, 2439.0, 2440.0],      [2441.0, 2442.0, 2443.0, 2444.0],      [2445.0, 2446.0, 2447.0, 2448.0],      [2449.0, 2450.0, 2451.0, 2452.0],      [2453.0, 2454.0, 2455.0, 2456.0],      [2457.0, 2458.0, 2459.0, 2460.0],      [2461.0, 2462.0, 2463.0, 2464.0]],     [[2465.0, 2466.0, 2467.0, 2468.0],      [2469.0, 2470.0, 2471.0, 2472.0],      [2473.0, 2474.0, 2475.0, 2476.0],      [2477.0, 2478.0, 2479.0, 2480.0],      [2481.0, 2482.0, 2483.0, 2484.0],      [2485.0, 2486.0, 2487.0, 2488.0],      [2489.0, 2490.0, 2491.0, 2492.0]],     [[2493.0, 2494.0, 2495.0, 2496.0],      [2497.0, 2498.0, 2499.0, 2500.0],      [2501.0, 2502.0, 2503.0, 2504.0],      [2505.0, 2506.0, 2507.0, 2508.0],      [2509.0, 2510.0, 2511.0, 2512.0],      [2513.0, 2514.0, 2515.0, 2516.0],      [2517.0, 2518.0, 2519.0, 2520.0]]],    [[[2521.0, 2522.0, 2523.0, 2524.0],      [2525.0, 2526.0, 2527.0, 2528.0],      [2529.0, 2530.0, 2531.0, 2532.0],      [2533.0, 2534.0, 2535.0, 2536.0],      [2537.0, 2538.0, 2539.0, 2540.0],      [2541.0, 2542.0, 2543.0, 2544.0],      [2545.0, 2546.0, 2547.0, 2548.0]],     [[2549.0, 2550.0, 2551.0, 2552.0],      [2553.0, 2554.0, 2555.0, 2556.0],      [2557.0, 2558.0, 2559.0, 2560.0],      [2561.0, 2562.0, 2563.0, 2564.0],      [2565.0, 2566.0, 2567.0, 2568.0],      [2569.0, 2570.0, 2571.0, 2572.0],      [2573.0, 2574.0, 2575.0, 2576.0]],     [[2577.0, 2578.0, 2579.0, 2580.0],      [2581.0, 2582.0, 2583.0, 2584.0],      [2585.0, 2586.0, 2587.0, 2588.0],      [2589.0, 2590.0, 2591.0, 2592.0],      [2593.0, 2594.0, 2595.0, 2596.0],      [2597.0, 2598.0, 2599.0, 2600.0],      [2601.0, 2602.0, 2603.0, 2604.0]],     [[2605.0, 2606.0, 2607.0, 2608.0],      [2609.0, 2610.0, 2611.0, 2612.0],      [2613.0, 2614.0, 2615.0, 2616.0],      [2617.0, 2618.0, 2619.0, 2620.0],      [2621.0, 2622.0, 2623.0, 2624.0],      [2625.0, 2626.0, 2627.0, 2628.0],      [2629.0, 2630.0, 2631.0, 2632.0]],     [[2633.0, 2634.0, 2635.0, 2636.0],      [2637.0, 2638.0, 2639.0, 2640.0],      [2641.0, 2642.0, 2643.0, 2644.0],      [2645.0, 2646.0, 2647.0, 2648.0],      [2649.0, 2650.0, 2651.0, 2652.0],      [2653.0, 2654.0, 2655.0, 2656.0],      [2657.0, 2658.0, 2659.0, 2660.0]],     [[2661.0, 2662.0, 2663.0, 2664.0],      [2665.0, 2666.0, 2667.0, 2668.0],      [2669.0, 2670.0, 2671.0, 2672.0],      [2673.0, 2674.0, 2675.0, 2676.0],      [2677.0, 2678.0, 2679.0, 2680.0],      [2681.0, 2682.0, 2683.0, 2684.0],      [2685.0, 2686.0, 2687.0, 2688.0]]],    [[[2689.0, 2690.0, 2691.0, 2692.0],      [2693.0, 2694.0, 2695.0, 2696.0],      [2697.0, 2698.0, 2699.0, 2700.0],      [2701.0, 2702.0, 2703.0, 2704.0],      [2705.0, 2706.0, 2707.0, 2708.0],      [2709.0, 2710.0, 2711.0, 2712.0],      [2713.0, 2714.0, 2715.0, 2716.0]],     [[2717.0, 2718.0, 2719.0, 2720.0],      [2721.0, 2722.0, 2723.0, 2724.0],      [2725.0, 2726.0, 2727.0, 2728.0],      [2729.0, 2730.0, 2731.0, 2732.0],      [2733.0, 2734.0, 2735.0, 2736.0],      [2737.0, 2738.0, 2739.0, 2740.0],      [2741.0, 2742.0, 2743.0, 2744.0]],     [[2745.0, 2746.0, 2747.0, 2748.0],      [2749.0, 2750.0, 2751.0, 2752.0],      [2753.0, 2754.0, 2755.0, 2756.0],      [2757.0, 2758.0, 2759.0, 2760.0],      [2761.0, 2762.0, 2763.0, 2764.0],      [2765.0, 2766.0, 2767.0, 2768.0],      [2769.0, 2770.0, 2771.0, 2772.0]],     [[2773.0, 2774.0, 2775.0, 2776.0],      [2777.0, 2778.0, 2779.0, 2780.0],      [2781.0, 2782.0, 2783.0, 2784.0],      [2785.0, 2786.0, 2787.0, 2788.0],      [2789.0, 2790.0, 2791.0, 2792.0],      [2793.0, 2794.0, 2795.0, 2796.0],      [2797.0, 2798.0, 2799.0, 2800.0]],     [[2801.0, 2802.0, 2803.0, 2804.0],      [2805.0, 2806.0, 2807.0, 2808.0],      [2809.0, 2810.0, 2811.0, 2812.0],      [2813.0, 2814.0, 2815.0, 2816.0],      [2817.0, 2818.0, 2819.0, 2820.0],      [2821.0, 2822.0, 2823.0, 2824.0],      [2825.0, 2826.0, 2827.0, 2828.0]],     [[2829.0, 2830.0, 2831.0, 2832.0],      [2833.0, 2834.0, 2835.0, 2836.0],      [2837.0, 2838.0, 2839.0, 2840.0],      [2841.0, 2842.0, 2843.0, 2844.0],      [2845.0, 2846.0, 2847.0, 2848.0],      [2849.0, 2850.0, 2851.0, 2852.0],      [2853.0, 2854.0, 2855.0, 2856.0]]],    [[[2857.0, 2858.0, 2859.0, 2860.0],      [2861.0, 2862.0, 2863.0, 2864.0],      [2865.0, 2866.0, 2867.0, 2868.0],      [2869.0, 2870.0, 2871.0, 2872.0],      [2873.0, 2874.0, 2875.0, 2876.0],      [2877.0, 2878.0, 2879.0, 2880.0],      [2881.0, 2882.0, 2883.0, 2884.0]],     [[2885.0, 2886.0, 2887.0, 2888.0],      [2889.0, 2890.0, 2891.0, 2892.0],      [2893.0, 2894.0, 2895.0, 2896.0],      [2897.0, 2898.0, 2899.0, 2900.0],      [2901.0, 2902.0, 2903.0, 2904.0],      [2905.0, 2906.0, 2907.0, 2908.0],      [2909.0, 2910.0, 2911.0, 2912.0]],     [[2913.0, 2914.0, 2915.0, 2916.0],      [2917.0, 2918.0, 2919.0, 2920.0],      [2921.0, 2922.0, 2923.0, 2924.0],      [2925.0, 2926.0, 2927.0, 2928.0],      [2929.0, 2930.0, 2931.0, 2932.0],      [2933.0, 2934.0, 2935.0, 2936.0],      [2937.0, 2938.0, 2939.0, 2940.0]],     [[2941.0, 2942.0, 2943.0, 2944.0],      [2945.0, 2946.0, 2947.0, 2948.0],      [2949.0, 2950.0, 2951.0, 2952.0],      [2953.0, 2954.0, 2955.0, 2956.0],      [2957.0, 2958.0, 2959.0, 2960.0],      [2961.0, 2962.0, 2963.0, 2964.0],      [2965.0, 2966.0, 2967.0, 2968.0]],     [[2969.0, 2970.0, 2971.0, 2972.0],      [2973.0, 2974.0, 2975.0, 2976.0],      [2977.0, 2978.0, 2979.0, 2980.0],      [2981.0, 2982.0, 2983.0, 2984.0],      [2985.0, 2986.0, 2987.0, 2988.0],      [2989.0, 2990.0, 2991.0, 2992.0],      [2993.0, 2994.0, 2995.0, 2996.0]],     [[2997.0, 2998.0, 2999.0, 3000.0],      [3001.0, 3002.0, 3003.0, 3004.0],      [3005.0, 3006.0, 3007.0, 3008.0],      [3009.0, 3010.0, 3011.0, 3012.0],      [3013.0, 3014.0, 3015.0, 3016.0],      [3017.0, 3018.0, 3019.0, 3020.0],      [3021.0, 3022.0, 3023.0, 3024.0]]]]]] shape=[1, 3, 6, 6, 7, 4], strides=[3024, 1008, 168, 28, 4, 1], layout=C (0x1)), I32([2, 3] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [3, 3]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 2086683851 929394702 2325782660 540397897 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0],       [2.0]],      [[3.0],       [4.0]],      [[5.0],       [6.0]],      [[7.0],       [8.0]],      [[9.0],       [10.0]],      [[11.0],       [12.0]],      [[13.0],       [14.0]]],     [[[15.0],       [16.0]],      [[17.0],       [18.0]],      [[19.0],       [20.0]],      [[21.0],       [22.0]],      [[23.0],       [24.0]],      [[25.0],       [26.0]],      [[27.0],       [28.0]]]],    [[[[29.0],       [30.0]],      [[31.0],       [32.0]],      [[33.0],       [34.0]],      [[35.0],       [36.0]],      [[37.0],       [38.0]],      [[39.0],       [40.0]],      [[41.0],       [42.0]]],     [[[43.0],       [44.0]],      [[45.0],       [46.0]],      [[47.0],       [48.0]],      [[49.0],       [50.0]],      [[51.0],       [52.0]],      [[53.0],       [54.0]],      [[55.0],       [56.0]]]],    [[[[57.0],       [58.0]],      [[59.0],       [60.0]],      [[61.0],       [62.0]],      [[63.0],       [64.0]],      [[65.0],       [66.0]],      [[67.0],       [68.0]],      [[69.0],       [70.0]]],     [[[71.0],       [72.0]],      [[73.0],       [74.0]],      [[75.0],       [76.0]],      [[77.0],       [78.0]],      [[79.0],       [80.0]],      [[81.0],       [82.0]],      [[83.0],       [84.0]]]],    [[[[85.0],       [86.0]],      [[87.0],       [88.0]],      [[89.0],       [90.0]],      [[91.0],       [92.0]],      [[93.0],       [94.0]],      [[95.0],       [96.0]],      [[97.0],       [98.0]]],     [[[99.0],       [100.0]],      [[101.0],       [102.0]],      [[103.0],       [104.0]],      [[105.0],       [106.0]],      [[107.0],       [108.0]],      [[109.0],       [110.0]],      [[111.0],       [112.0]]]],    [[[[113.0],       [114.0]],      [[115.0],       [116.0]],      [[117.0],       [118.0]],      [[119.0],       [120.0]],      [[121.0],       [122.0]],      [[123.0],       [124.0]],      [[125.0],       [126.0]]],     [[[127.0],       [128.0]],      [[129.0],       [130.0]],      [[131.0],       [132.0]],      [[133.0],       [134.0]],      [[135.0],       [136.0]],      [[137.0],       [138.0]],      [[139.0],       [140.0]]]],    [[[[141.0],       [142.0]],      [[143.0],       [144.0]],      [[145.0],       [146.0]],      [[147.0],       [148.0]],      [[149.0],       [150.0]],      [[151.0],       [152.0]],      [[153.0],       [154.0]]],     [[[155.0],       [156.0]],      [[157.0],       [158.0]],      [[159.0],       [160.0]],      [[161.0],       [162.0]],      [[163.0],       [164.0]],      [[165.0],       [166.0]],      [[167.0],       [168.0]]]],    [[[[169.0],       [170.0]],      [[171.0],       [172.0]],      [[173.0],       [174.0]],      [[175.0],       [176.0]],      [[177.0],       [178.0]],      [[179.0],       [180.0]],      [[181.0],       [182.0]]],     [[[183.0],       [184.0]],      [[185.0],       [186.0]],      [[187.0],       [188.0]],      [[189.0],       [190.0]],      [[191.0],       [192.0]],      [[193.0],       [194.0]],      [[195.0],       [196.0]]]]],   [[[[[197.0],       [198.0]],      [[199.0],       [200.0]],      [[201.0],       [202.0]],      [[203.0],       [204.0]],      [[205.0],       [206.0]],      [[207.0],       [208.0]],      [[209.0],       [210.0]]],     [[[211.0],       [212.0]],      [[213.0],       [214.0]],      [[215.0],       [216.0]],      [[217.0],       [218.0]],      [[219.0],       [220.0]],      [[221.0],       [222.0]],      [[223.0],       [224.0]]]],    [[[[225.0],       [226.0]],      [[227.0],       [228.0]],      [[229.0],       [230.0]],      [[231.0],       [232.0]],      [[233.0],       [234.0]],      [[235.0],       [236.0]],      [[237.0],       [238.0]]],     [[[239.0],       [240.0]],      [[241.0],       [242.0]],      [[243.0],       [244.0]],      [[245.0],       [246.0]],      [[247.0],       [248.0]],      [[249.0],       [250.0]],      [[251.0],       [252.0]]]],    [[[[253.0],       [254.0]],      [[255.0],       [256.0]],      [[257.0],       [258.0]],      [[259.0],       [260.0]],      [[261.0],       [262.0]],      [[263.0],       [264.0]],      [[265.0],       [266.0]]],     [[[267.0],       [268.0]],      [[269.0],       [270.0]],      [[271.0],       [272.0]],      [[273.0],       [274.0]],      [[275.0],       [276.0]],      [[277.0],       [278.0]],      [[279.0],       [280.0]]]],    [[[[281.0],       [282.0]],      [[283.0],       [284.0]],      [[285.0],       [286.0]],      [[287.0],       [288.0]],      [[289.0],       [290.0]],      [[291.0],       [292.0]],      [[293.0],       [294.0]]],     [[[295.0],       [296.0]],      [[297.0],       [298.0]],      [[299.0],       [300.0]],      [[301.0],       [302.0]],      [[303.0],       [304.0]],      [[305.0],       [306.0]],      [[307.0],       [308.0]]]],    [[[[309.0],       [310.0]],      [[311.0],       [312.0]],      [[313.0],       [314.0]],      [[315.0],       [316.0]],      [[317.0],       [318.0]],      [[319.0],       [320.0]],      [[321.0],       [322.0]]],     [[[323.0],       [324.0]],      [[325.0],       [326.0]],      [[327.0],       [328.0]],      [[329.0],       [330.0]],      [[331.0],       [332.0]],      [[333.0],       [334.0]],      [[335.0],       [336.0]]]],    [[[[337.0],       [338.0]],      [[339.0],       [340.0]],      [[341.0],       [342.0]],      [[343.0],       [344.0]],      [[345.0],       [346.0]],      [[347.0],       [348.0]],      [[349.0],       [350.0]]],     [[[351.0],       [352.0]],      [[353.0],       [354.0]],      [[355.0],       [356.0]],      [[357.0],       [358.0]],      [[359.0],       [360.0]],      [[361.0],       [362.0]],      [[363.0],       [364.0]]]],    [[[[365.0],       [366.0]],      [[367.0],       [368.0]],      [[369.0],       [370.0]],      [[371.0],       [372.0]],      [[373.0],       [374.0]],      [[375.0],       [376.0]],      [[377.0],       [378.0]]],     [[[379.0],       [380.0]],      [[381.0],       [382.0]],      [[383.0],       [384.0]],      [[385.0],       [386.0]],      [[387.0],       [388.0]],      [[389.0],       [390.0]],      [[391.0],       [392.0]]]]],   [[[[[393.0],       [394.0]],      [[395.0],       [396.0]],      [[397.0],       [398.0]],      [[399.0],       [400.0]],      [[401.0],       [402.0]],      [[403.0],       [404.0]],      [[405.0],       [406.0]]],     [[[407.0],       [408.0]],      [[409.0],       [410.0]],      [[411.0],       [412.0]],      [[413.0],       [414.0]],      [[415.0],       [416.0]],      [[417.0],       [418.0]],      [[419.0],       [420.0]]]],    [[[[421.0],       [422.0]],      [[423.0],       [424.0]],      [[425.0],       [426.0]],      [[427.0],       [428.0]],      [[429.0],       [430.0]],      [[431.0],       [432.0]],      [[433.0],       [434.0]]],     [[[435.0],       [436.0]],      [[437.0],       [438.0]],      [[439.0],       [440.0]],      [[441.0],       [442.0]],      [[443.0],       [444.0]],      [[445.0],       [446.0]],      [[447.0],       [448.0]]]],    [[[[449.0],       [450.0]],      [[451.0],       [452.0]],      [[453.0],       [454.0]],      [[455.0],       [456.0]],      [[457.0],       [458.0]],      [[459.0],       [460.0]],      [[461.0],       [462.0]]],     [[[463.0],       [464.0]],      [[465.0],       [466.0]],      [[467.0],       [468.0]],      [[469.0],       [470.0]],      [[471.0],       [472.0]],      [[473.0],       [474.0]],      [[475.0],       [476.0]]]],    [[[[477.0],       [478.0]],      [[479.0],       [480.0]],      [[481.0],       [482.0]],      [[483.0],       [484.0]],      [[485.0],       [486.0]],      [[487.0],       [488.0]],      [[489.0],       [490.0]]],     [[[491.0],       [492.0]],      [[493.0],       [494.0]],      [[495.0],       [496.0]],      [[497.0],       [498.0]],      [[499.0],       [500.0]],      [[501.0],       [502.0]],      [[503.0],       [504.0]]]],    [[[[505.0],       [506.0]],      [[507.0],       [508.0]],      [[509.0],       [510.0]],      [[511.0],       [512.0]],      [[513.0],       [514.0]],      [[515.0],       [516.0]],      [[517.0],       [518.0]]],     [[[519.0],       [520.0]],      [[521.0],       [522.0]],      [[523.0],       [524.0]],      [[525.0],       [526.0]],      [[527.0],       [528.0]],      [[529.0],       [530.0]],      [[531.0],       [532.0]]]],    [[[[533.0],       [534.0]],      [[535.0],       [536.0]],      [[537.0],       [538.0]],      [[539.0],       [540.0]],      [[541.0],       [542.0]],      [[543.0],       [544.0]],      [[545.0],       [546.0]]],     [[[547.0],       [548.0]],      [[549.0],       [550.0]],      [[551.0],       [552.0]],      [[553.0],       [554.0]],      [[555.0],       [556.0]],      [[557.0],       [558.0]],      [[559.0],       [560.0]]]],    [[[[561.0],       [562.0]],      [[563.0],       [564.0]],      [[565.0],       [566.0]],      [[567.0],       [568.0]],      [[569.0],       [570.0]],      [[571.0],       [572.0]],      [[573.0],       [574.0]]],     [[[575.0],       [576.0]],      [[577.0],       [578.0]],      [[579.0],       [580.0]],      [[581.0],       [582.0]],      [[583.0],       [584.0]],      [[585.0],       [586.0]],      [[587.0],       [588.0]]]]],   [[[[[589.0],       [590.0]],      [[591.0],       [592.0]],      [[593.0],       [594.0]],      [[595.0],       [596.0]],      [[597.0],       [598.0]],      [[599.0],       [600.0]],      [[601.0],       [602.0]]],     [[[603.0],       [604.0]],      [[605.0],       [606.0]],      [[607.0],       [608.0]],      [[609.0],       [610.0]],      [[611.0],       [612.0]],      [[613.0],       [614.0]],      [[615.0],       [616.0]]]],    [[[[617.0],       [618.0]],      [[619.0],       [620.0]],      [[621.0],       [622.0]],      [[623.0],       [624.0]],      [[625.0],       [626.0]],      [[627.0],       [628.0]],      [[629.0],       [630.0]]],     [[[631.0],       [632.0]],      [[633.0],       [634.0]],      [[635.0],       [636.0]],      [[637.0],       [638.0]],      [[639.0],       [640.0]],      [[641.0],       [642.0]],      [[643.0],       [644.0]]]],    [[[[645.0],       [646.0]],      [[647.0],       [648.0]],      [[649.0],       [650.0]],      [[651.0],       [652.0]],      [[653.0],       [654.0]],      [[655.0],       [656.0]],      [[657.0],       [658.0]]],     [[[659.0],       [660.0]],      [[661.0],       [662.0]],      [[663.0],       [664.0]],      [[665.0],       [666.0]],      [[667.0],       [668.0]],      [[669.0],       [670.0]],      [[671.0],       [672.0]]]],    [[[[673.0],       [674.0]],      [[675.0],       [676.0]],      [[677.0],       [678.0]],      [[679.0],       [680.0]],      [[681.0],       [682.0]],      [[683.0],       [684.0]],      [[685.0],       [686.0]]],     [[[687.0],       [688.0]],      [[689.0],       [690.0]],      [[691.0],       [692.0]],      [[693.0],       [694.0]],      [[695.0],       [696.0]],      [[697.0],       [698.0]],      [[699.0],       [700.0]]]],    [[[[701.0],       [702.0]],      [[703.0],       [704.0]],      [[705.0],       [706.0]],      [[707.0],       [708.0]],      [[709.0],       [710.0]],      [[711.0],       [712.0]],      [[713.0],       [714.0]]],     [[[715.0],       [716.0]],      [[717.0],       [718.0]],      [[719.0],       [720.0]],      [[721.0],       [722.0]],      [[723.0],       [724.0]],      [[725.0],       [726.0]],      [[727.0],       [728.0]]]],    [[[[729.0],       [730.0]],      [[731.0],       [732.0]],      [[733.0],       [734.0]],      [[735.0],       [736.0]],      [[737.0],       [738.0]],      [[739.0],       [740.0]],      [[741.0],       [742.0]]],     [[[743.0],       [744.0]],      [[745.0],       [746.0]],      [[747.0],       [748.0]],      [[749.0],       [750.0]],      [[751.0],       [752.0]],      [[753.0],       [754.0]],      [[755.0],       [756.0]]]],    [[[[757.0],       [758.0]],      [[759.0],       [760.0]],      [[761.0],       [762.0]],      [[763.0],       [764.0]],      [[765.0],       [766.0]],      [[767.0],       [768.0]],      [[769.0],       [770.0]]],     [[[771.0],       [772.0]],      [[773.0],       [774.0]],      [[775.0],       [776.0]],      [[777.0],       [778.0]],      [[779.0],       [780.0]],      [[781.0],       [782.0]],      [[783.0],       [784.0]]]]],   [[[[[785.0],       [786.0]],      [[787.0],       [788.0]],      [[789.0],       [790.0]],      [[791.0],       [792.0]],      [[793.0],       [794.0]],      [[795.0],       [796.0]],      [[797.0],       [798.0]]],     [[[799.0],       [800.0]],      [[801.0],       [802.0]],      [[803.0],       [804.0]],      [[805.0],       [806.0]],      [[807.0],       [808.0]],      [[809.0],       [810.0]],      [[811.0],       [812.0]]]],    [[[[813.0],       [814.0]],      [[815.0],       [816.0]],      [[817.0],       [818.0]],      [[819.0],       [820.0]],      [[821.0],       [822.0]],      [[823.0],       [824.0]],      [[825.0],       [826.0]]],     [[[827.0],       [828.0]],      [[829.0],       [830.0]],      [[831.0],       [832.0]],      [[833.0],       [834.0]],      [[835.0],       [836.0]],      [[837.0],       [838.0]],      [[839.0],       [840.0]]]],    [[[[841.0],       [842.0]],      [[843.0],       [844.0]],      [[845.0],       [846.0]],      [[847.0],       [848.0]],      [[849.0],       [850.0]],      [[851.0],       [852.0]],      [[853.0],       [854.0]]],     [[[855.0],       [856.0]],      [[857.0],       [858.0]],      [[859.0],       [860.0]],      [[861.0],       [862.0]],      [[863.0],       [864.0]],      [[865.0],       [866.0]],      [[867.0],       [868.0]]]],    [[[[869.0],       [870.0]],      [[871.0],       [872.0]],      [[873.0],       [874.0]],      [[875.0],       [876.0]],      [[877.0],       [878.0]],      [[879.0],       [880.0]],      [[881.0],       [882.0]]],     [[[883.0],       [884.0]],      [[885.0],       [886.0]],      [[887.0],       [888.0]],      [[889.0],       [890.0]],      [[891.0],       [892.0]],      [[893.0],       [894.0]],      [[895.0],       [896.0]]]],    [[[[897.0],       [898.0]],      [[899.0],       [900.0]],      [[901.0],       [902.0]],      [[903.0],       [904.0]],      [[905.0],       [906.0]],      [[907.0],       [908.0]],      [[909.0],       [910.0]]],     [[[911.0],       [912.0]],      [[913.0],       [914.0]],      [[915.0],       [916.0]],      [[917.0],       [918.0]],      [[919.0],       [920.0]],      [[921.0],       [922.0]],      [[923.0],       [924.0]]]],    [[[[925.0],       [926.0]],      [[927.0],       [928.0]],      [[929.0],       [930.0]],      [[931.0],       [932.0]],      [[933.0],       [934.0]],      [[935.0],       [936.0]],      [[937.0],       [938.0]]],     [[[939.0],       [940.0]],      [[941.0],       [942.0]],      [[943.0],       [944.0]],      [[945.0],       [946.0]],      [[947.0],       [948.0]],      [[949.0],       [950.0]],      [[951.0],       [952.0]]]],    [[[[953.0],       [954.0]],      [[955.0],       [956.0]],      [[957.0],       [958.0]],      [[959.0],       [960.0]],      [[961.0],       [962.0]],      [[963.0],       [964.0]],      [[965.0],       [966.0]]],     [[[967.0],       [968.0]],      [[969.0],       [970.0]],      [[971.0],       [972.0]],      [[973.0],       [974.0]],      [[975.0],       [976.0]],      [[977.0],       [978.0]],      [[979.0],       [980.0]]]]],   [[[[[981.0],       [982.0]],      [[983.0],       [984.0]],      [[985.0],       [986.0]],      [[987.0],       [988.0]],      [[989.0],       [990.0]],      [[991.0],       [992.0]],      [[993.0],       [994.0]]],     [[[995.0],       [996.0]],      [[997.0],       [998.0]],      [[999.0],       [1000.0]],      [[1001.0],       [1002.0]],      [[1003.0],       [1004.0]],      [[1005.0],       [1006.0]],      [[1007.0],       [1008.0]]]],    [[[[1009.0],       [1010.0]],      [[1011.0],       [1012.0]],      [[1013.0],       [1014.0]],      [[1015.0],       [1016.0]],      [[1017.0],       [1018.0]],      [[1019.0],       [1020.0]],      [[1021.0],       [1022.0]]],     [[[1023.0],       [1024.0]],      [[1025.0],       [1026.0]],      [[1027.0],       [1028.0]],      [[1029.0],       [1030.0]],      [[1031.0],       [1032.0]],      [[1033.0],       [1034.0]],      [[1035.0],       [1036.0]]]],    [[[[1037.0],       [1038.0]],      [[1039.0],       [1040.0]],      [[1041.0],       [1042.0]],      [[1043.0],       [1044.0]],      [[1045.0],       [1046.0]],      [[1047.0],       [1048.0]],      [[1049.0],       [1050.0]]],     [[[1051.0],       [1052.0]],      [[1053.0],       [1054.0]],      [[1055.0],       [1056.0]],      [[1057.0],       [1058.0]],      [[1059.0],       [1060.0]],      [[1061.0],       [1062.0]],      [[1063.0],       [1064.0]]]],    [[[[1065.0],       [1066.0]],      [[1067.0],       [1068.0]],      [[1069.0],       [1070.0]],      [[1071.0],       [1072.0]],      [[1073.0],       [1074.0]],      [[1075.0],       [1076.0]],      [[1077.0],       [1078.0]]],     [[[1079.0],       [1080.0]],      [[1081.0],       [1082.0]],      [[1083.0],       [1084.0]],      [[1085.0],       [1086.0]],      [[1087.0],       [1088.0]],      [[1089.0],       [1090.0]],      [[1091.0],       [1092.0]]]],    [[[[1093.0],       [1094.0]],      [[1095.0],       [1096.0]],      [[1097.0],       [1098.0]],      [[1099.0],       [1100.0]],      [[1101.0],       [1102.0]],      [[1103.0],       [1104.0]],      [[1105.0],       [1106.0]]],     [[[1107.0],       [1108.0]],      [[1109.0],       [1110.0]],      [[1111.0],       [1112.0]],      [[1113.0],       [1114.0]],      [[1115.0],       [1116.0]],      [[1117.0],       [1118.0]],      [[1119.0],       [1120.0]]]],    [[[[1121.0],       [1122.0]],      [[1123.0],       [1124.0]],      [[1125.0],       [1126.0]],      [[1127.0],       [1128.0]],      [[1129.0],       [1130.0]],      [[1131.0],       [1132.0]],      [[1133.0],       [1134.0]]],     [[[1135.0],       [1136.0]],      [[1137.0],       [1138.0]],      [[1139.0],       [1140.0]],      [[1141.0],       [1142.0]],      [[1143.0],       [1144.0]],      [[1145.0],       [1146.0]],      [[1147.0],       [1148.0]]]],    [[[[1149.0],       [1150.0]],      [[1151.0],       [1152.0]],      [[1153.0],       [1154.0]],      [[1155.0],       [1156.0]],      [[1157.0],       [1158.0]],      [[1159.0],       [1160.0]],      [[1161.0],       [1162.0]]],     [[[1163.0],       [1164.0]],      [[1165.0],       [1166.0]],      [[1167.0],       [1168.0]],      [[1169.0],       [1170.0]],      [[1171.0],       [1172.0]],      [[1173.0],       [1174.0]],      [[1175.0],       [1176.0]]]]]]] shape=[1, 6, 7, 2, 7, 2, 1], strides=[1176, 196, 28, 14, 2, 1, 1], layout=C (0x1)), I32([2, 1, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [1, 1],  [0, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 1807712379 1150418439 3241773026 2048754559 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0],       [3.0, 4.0],       [5.0, 6.0]],      [[7.0, 8.0],       [9.0, 10.0],       [11.0, 12.0]],      [[13.0, 14.0],       [15.0, 16.0],       [17.0, 18.0]]],     [[[19.0, 20.0],       [21.0, 22.0],       [23.0, 24.0]],      [[25.0, 26.0],       [27.0, 28.0],       [29.0, 30.0]],      [[31.0, 32.0],       [33.0, 34.0],       [35.0, 36.0]]],     [[[37.0, 38.0],       [39.0, 40.0],       [41.0, 42.0]],      [[43.0, 44.0],       [45.0, 46.0],       [47.0, 48.0]],      [[49.0, 50.0],       [51.0, 52.0],       [53.0, 54.0]]],     [[[55.0, 56.0],       [57.0, 58.0],       [59.0, 60.0]],      [[61.0, 62.0],       [63.0, 64.0],       [65.0, 66.0]],      [[67.0, 68.0],       [69.0, 70.0],       [71.0, 72.0]]]],    [[[[73.0, 74.0],       [75.0, 76.0],       [77.0, 78.0]],      [[79.0, 80.0],       [81.0, 82.0],       [83.0, 84.0]],      [[85.0, 86.0],       [87.0, 88.0],       [89.0, 90.0]]],     [[[91.0, 92.0],       [93.0, 94.0],       [95.0, 96.0]],      [[97.0, 98.0],       [99.0, 100.0],       [101.0, 102.0]],      [[103.0, 104.0],       [105.0, 106.0],       [107.0, 108.0]]],     [[[109.0, 110.0],       [111.0, 112.0],       [113.0, 114.0]],      [[115.0, 116.0],       [117.0, 118.0],       [119.0, 120.0]],      [[121.0, 122.0],       [123.0, 124.0],       [125.0, 126.0]]],     [[[127.0, 128.0],       [129.0, 130.0],       [131.0, 132.0]],      [[133.0, 134.0],       [135.0, 136.0],       [137.0, 138.0]],      [[139.0, 140.0],       [141.0, 142.0],       [143.0, 144.0]]]],    [[[[145.0, 146.0],       [147.0, 148.0],       [149.0, 150.0]],      [[151.0, 152.0],       [153.0, 154.0],       [155.0, 156.0]],      [[157.0, 158.0],       [159.0, 160.0],       [161.0, 162.0]]],     [[[163.0, 164.0],       [165.0, 166.0],       [167.0, 168.0]],      [[169.0, 170.0],       [171.0, 172.0],       [173.0, 174.0]],      [[175.0, 176.0],       [177.0, 178.0],       [179.0, 180.0]]],     [[[181.0, 182.0],       [183.0, 184.0],       [185.0, 186.0]],      [[187.0, 188.0],       [189.0, 190.0],       [191.0, 192.0]],      [[193.0, 194.0],       [195.0, 196.0],       [197.0, 198.0]]],     [[[199.0, 200.0],       [201.0, 202.0],       [203.0, 204.0]],      [[205.0, 206.0],       [207.0, 208.0],       [209.0, 210.0]],      [[211.0, 212.0],       [213.0, 214.0],       [215.0, 216.0]]]],    [[[[217.0, 218.0],       [219.0, 220.0],       [221.0, 222.0]],      [[223.0, 224.0],       [225.0, 226.0],       [227.0, 228.0]],      [[229.0, 230.0],       [231.0, 232.0],       [233.0, 234.0]]],     [[[235.0, 236.0],       [237.0, 238.0],       [239.0, 240.0]],      [[241.0, 242.0],       [243.0, 244.0],       [245.0, 246.0]],      [[247.0, 248.0],       [249.0, 250.0],       [251.0, 252.0]]],     [[[253.0, 254.0],       [255.0, 256.0],       [257.0, 258.0]],      [[259.0, 260.0],       [261.0, 262.0],       [263.0, 264.0]],      [[265.0, 266.0],       [267.0, 268.0],       [269.0, 270.0]]],     [[[271.0, 272.0],       [273.0, 274.0],       [275.0, 276.0]],      [[277.0, 278.0],       [279.0, 280.0],       [281.0, 282.0]],      [[283.0, 284.0],       [285.0, 286.0],       [287.0, 288.0]]]],    [[[[289.0, 290.0],       [291.0, 292.0],       [293.0, 294.0]],      [[295.0, 296.0],       [297.0, 298.0],       [299.0, 300.0]],      [[301.0, 302.0],       [303.0, 304.0],       [305.0, 306.0]]],     [[[307.0, 308.0],       [309.0, 310.0],       [311.0, 312.0]],      [[313.0, 314.0],       [315.0, 316.0],       [317.0, 318.0]],      [[319.0, 320.0],       [321.0, 322.0],       [323.0, 324.0]]],     [[[325.0, 326.0],       [327.0, 328.0],       [329.0, 330.0]],      [[331.0, 332.0],       [333.0, 334.0],       [335.0, 336.0]],      [[337.0, 338.0],       [339.0, 340.0],       [341.0, 342.0]]],     [[[343.0, 344.0],       [345.0, 346.0],       [347.0, 348.0]],      [[349.0, 350.0],       [351.0, 352.0],       [353.0, 354.0]],      [[355.0, 356.0],       [357.0, 358.0],       [359.0, 360.0]]]],    [[[[361.0, 362.0],       [363.0, 364.0],       [365.0, 366.0]],      [[367.0, 368.0],       [369.0, 370.0],       [371.0, 372.0]],      [[373.0, 374.0],       [375.0, 376.0],       [377.0, 378.0]]],     [[[379.0, 380.0],       [381.0, 382.0],       [383.0, 384.0]],      [[385.0, 386.0],       [387.0, 388.0],       [389.0, 390.0]],      [[391.0, 392.0],       [393.0, 394.0],       [395.0, 396.0]]],     [[[397.0, 398.0],       [399.0, 400.0],       [401.0, 402.0]],      [[403.0, 404.0],       [405.0, 406.0],       [407.0, 408.0]],      [[409.0, 410.0],       [411.0, 412.0],       [413.0, 414.0]]],     [[[415.0, 416.0],       [417.0, 418.0],       [419.0, 420.0]],      [[421.0, 422.0],       [423.0, 424.0],       [425.0, 426.0]],      [[427.0, 428.0],       [429.0, 430.0],       [431.0, 432.0]]]]],   [[[[[433.0, 434.0],       [435.0, 436.0],       [437.0, 438.0]],      [[439.0, 440.0],       [441.0, 442.0],       [443.0, 444.0]],      [[445.0, 446.0],       [447.0, 448.0],       [449.0, 450.0]]],     [[[451.0, 452.0],       [453.0, 454.0],       [455.0, 456.0]],      [[457.0, 458.0],       [459.0, 460.0],       [461.0, 462.0]],      [[463.0, 464.0],       [465.0, 466.0],       [467.0, 468.0]]],     [[[469.0, 470.0],       [471.0, 472.0],       [473.0, 474.0]],      [[475.0, 476.0],       [477.0, 478.0],       [479.0, 480.0]],      [[481.0, 482.0],       [483.0, 484.0],       [485.0, 486.0]]],     [[[487.0, 488.0],       [489.0, 490.0],       [491.0, 492.0]],      [[493.0, 494.0],       [495.0, 496.0],       [497.0, 498.0]],      [[499.0, 500.0],       [501.0, 502.0],       [503.0, 504.0]]]],    [[[[505.0, 506.0],       [507.0, 508.0],       [509.0, 510.0]],      [[511.0, 512.0],       [513.0, 514.0],       [515.0, 516.0]],      [[517.0, 518.0],       [519.0, 520.0],       [521.0, 522.0]]],     [[[523.0, 524.0],       [525.0, 526.0],       [527.0, 528.0]],      [[529.0, 530.0],       [531.0, 532.0],       [533.0, 534.0]],      [[535.0, 536.0],       [537.0, 538.0],       [539.0, 540.0]]],     [[[541.0, 542.0],       [543.0, 544.0],       [545.0, 546.0]],      [[547.0, 548.0],       [549.0, 550.0],       [551.0, 552.0]],      [[553.0, 554.0],       [555.0, 556.0],       [557.0, 558.0]]],     [[[559.0, 560.0],       [561.0, 562.0],       [563.0, 564.0]],      [[565.0, 566.0],       [567.0, 568.0],       [569.0, 570.0]],      [[571.0, 572.0],       [573.0, 574.0],       [575.0, 576.0]]]],    [[[[577.0, 578.0],       [579.0, 580.0],       [581.0, 582.0]],      [[583.0, 584.0],       [585.0, 586.0],       [587.0, 588.0]],      [[589.0, 590.0],       [591.0, 592.0],       [593.0, 594.0]]],     [[[595.0, 596.0],       [597.0, 598.0],       [599.0, 600.0]],      [[601.0, 602.0],       [603.0, 604.0],       [605.0, 606.0]],      [[607.0, 608.0],       [609.0, 610.0],       [611.0, 612.0]]],     [[[613.0, 614.0],       [615.0, 616.0],       [617.0, 618.0]],      [[619.0, 620.0],       [621.0, 622.0],       [623.0, 624.0]],      [[625.0, 626.0],       [627.0, 628.0],       [629.0, 630.0]]],     [[[631.0, 632.0],       [633.0, 634.0],       [635.0, 636.0]],      [[637.0, 638.0],       [639.0, 640.0],       [641.0, 642.0]],      [[643.0, 644.0],       [645.0, 646.0],       [647.0, 648.0]]]],    [[[[649.0, 650.0],       [651.0, 652.0],       [653.0, 654.0]],      [[655.0, 656.0],       [657.0, 658.0],       [659.0, 660.0]],      [[661.0, 662.0],       [663.0, 664.0],       [665.0, 666.0]]],     [[[667.0, 668.0],       [669.0, 670.0],       [671.0, 672.0]],      [[673.0, 674.0],       [675.0, 676.0],       [677.0, 678.0]],      [[679.0, 680.0],       [681.0, 682.0],       [683.0, 684.0]]],     [[[685.0, 686.0],       [687.0, 688.0],       [689.0, 690.0]],      [[691.0, 692.0],       [693.0, 694.0],       [695.0, 696.0]],      [[697.0, 698.0],       [699.0, 700.0],       [701.0, 702.0]]],     [[[703.0, 704.0],       [705.0, 706.0],       [707.0, 708.0]],      [[709.0, 710.0],       [711.0, 712.0],       [713.0, 714.0]],      [[715.0, 716.0],       [717.0, 718.0],       [719.0, 720.0]]]],    [[[[721.0, 722.0],       [723.0, 724.0],       [725.0, 726.0]],      [[727.0, 728.0],       [729.0, 730.0],       [731.0, 732.0]],      [[733.0, 734.0],       [735.0, 736.0],       [737.0, 738.0]]],     [[[739.0, 740.0],       [741.0, 742.0],       [743.0, 744.0]],      [[745.0, 746.0],       [747.0, 748.0],       [749.0, 750.0]],      [[751.0, 752.0],       [753.0, 754.0],       [755.0, 756.0]]],     [[[757.0, 758.0],       [759.0, 760.0],       [761.0, 762.0]],      [[763.0, 764.0],       [765.0, 766.0],       [767.0, 768.0]],      [[769.0, 770.0],       [771.0, 772.0],       [773.0, 774.0]]],     [[[775.0, 776.0],       [777.0, 778.0],       [779.0, 780.0]],      [[781.0, 782.0],       [783.0, 784.0],       [785.0, 786.0]],      [[787.0, 788.0],       [789.0, 790.0],       [791.0, 792.0]]]],    [[[[793.0, 794.0],       [795.0, 796.0],       [797.0, 798.0]],      [[799.0, 800.0],       [801.0, 802.0],       [803.0, 804.0]],      [[805.0, 806.0],       [807.0, 808.0],       [809.0, 810.0]]],     [[[811.0, 812.0],       [813.0, 814.0],       [815.0, 816.0]],      [[817.0, 818.0],       [819.0, 820.0],       [821.0, 822.0]],      [[823.0, 824.0],       [825.0, 826.0],       [827.0, 828.0]]],     [[[829.0, 830.0],       [831.0, 832.0],       [833.0, 834.0]],      [[835.0, 836.0],       [837.0, 838.0],       [839.0, 840.0]],      [[841.0, 842.0],       [843.0, 844.0],       [845.0, 846.0]]],     [[[847.0, 848.0],       [849.0, 850.0],       [851.0, 852.0]],      [[853.0, 854.0],       [855.0, 856.0],       [857.0, 858.0]],      [[859.0, 860.0],       [861.0, 862.0],       [863.0, 864.0]]]]]],  [[[[[[865.0, 866.0],       [867.0, 868.0],       [869.0, 870.0]],      [[871.0, 872.0],       [873.0, 874.0],       [875.0, 876.0]],      [[877.0, 878.0],       [879.0, 880.0],       [881.0, 882.0]]],     [[[883.0, 884.0],       [885.0, 886.0],       [887.0, 888.0]],      [[889.0, 890.0],       [891.0, 892.0],       [893.0, 894.0]],      [[895.0, 896.0],       [897.0, 898.0],       [899.0, 900.0]]],     [[[901.0, 902.0],       [903.0, 904.0],       [905.0, 906.0]],      [[907.0, 908.0],       [909.0, 910.0],       [911.0, 912.0]],      [[913.0, 914.0],       [915.0, 916.0],       [917.0, 918.0]]],     [[[919.0, 920.0],       [921.0, 922.0],       [923.0, 924.0]],      [[925.0, 926.0],       [927.0, 928.0],       [929.0, 930.0]],      [[931.0, 932.0],       [933.0, 934.0],       [935.0, 936.0]]]],    [[[[937.0, 938.0],       [939.0, 940.0],       [941.0, 942.0]],      [[943.0, 944.0],       [945.0, 946.0],       [947.0, 948.0]],      [[949.0, 950.0],       [951.0, 952.0],       [953.0, 954.0]]],     [[[955.0, 956.0],       [957.0, 958.0],       [959.0, 960.0]],      [[961.0, 962.0],       [963.0, 964.0],       [965.0, 966.0]],      [[967.0, 968.0],       [969.0, 970.0],       [971.0, 972.0]]],     [[[973.0, 974.0],       [975.0, 976.0],       [977.0, 978.0]],      [[979.0, 980.0],       [981.0, 982.0],       [983.0, 984.0]],      [[985.0, 986.0],       [987.0, 988.0],       [989.0, 990.0]]],     [[[991.0, 992.0],       [993.0, 994.0],       [995.0, 996.0]],      [[997.0, 998.0],       [999.0, 1000.0],       [1001.0, 1002.0]],      [[1003.0, 1004.0],       [1005.0, 1006.0],       [1007.0, 1008.0]]]],    [[[[1009.0, 1010.0],       [1011.0, 1012.0],       [1013.0, 1014.0]],      [[1015.0, 1016.0],       [1017.0, 1018.0],       [1019.0, 1020.0]],      [[1021.0, 1022.0],       [1023.0, 1024.0],       [1025.0, 1026.0]]],     [[[1027.0, 1028.0],       [1029.0, 1030.0],       [1031.0, 1032.0]],      [[1033.0, 1034.0],       [1035.0, 1036.0],       [1037.0, 1038.0]],      [[1039.0, 1040.0],       [1041.0, 1042.0],       [1043.0, 1044.0]]],     [[[1045.0, 1046.0],       [1047.0, 1048.0],       [1049.0, 1050.0]],      [[1051.0, 1052.0],       [1053.0, 1054.0],       [1055.0, 1056.0]],      [[1057.0, 1058.0],       [1059.0, 1060.0],       [1061.0, 1062.0]]],     [[[1063.0, 1064.0],       [1065.0, 1066.0],       [1067.0, 1068.0]],      [[1069.0, 1070.0],       [1071.0, 1072.0],       [1073.0, 1074.0]],      [[1075.0, 1076.0],       [1077.0, 1078.0],       [1079.0, 1080.0]]]],    [[[[1081.0, 1082.0],       [1083.0, 1084.0],       [1085.0, 1086.0]],      [[1087.0, 1088.0],       [1089.0, 1090.0],       [1091.0, 1092.0]],      [[1093.0, 1094.0],       [1095.0, 1096.0],       [1097.0, 1098.0]]],     [[[1099.0, 1100.0],       [1101.0, 1102.0],       [1103.0, 1104.0]],      [[1105.0, 1106.0],       [1107.0, 1108.0],       [1109.0, 1110.0]],      [[1111.0, 1112.0],       [1113.0, 1114.0],       [1115.0, 1116.0]]],     [[[1117.0, 1118.0],       [1119.0, 1120.0],       [1121.0, 1122.0]],      [[1123.0, 1124.0],       [1125.0, 1126.0],       [1127.0, 1128.0]],      [[1129.0, 1130.0],       [1131.0, 1132.0],       [1133.0, 1134.0]]],     [[[1135.0, 1136.0],       [1137.0, 1138.0],       [1139.0, 1140.0]],      [[1141.0, 1142.0],       [1143.0, 1144.0],       [1145.0, 1146.0]],      [[1147.0, 1148.0],       [1149.0, 1150.0],       [1151.0, 1152.0]]]],    [[[[1153.0, 1154.0],       [1155.0, 1156.0],       [1157.0, 1158.0]],      [[1159.0, 1160.0],       [1161.0, 1162.0],       [1163.0, 1164.0]],      [[1165.0, 1166.0],       [1167.0, 1168.0],       [1169.0, 1170.0]]],     [[[1171.0, 1172.0],       [1173.0, 1174.0],       [1175.0, 1176.0]],      [[1177.0, 1178.0],       [1179.0, 1180.0],       [1181.0, 1182.0]],      [[1183.0, 1184.0],       [1185.0, 1186.0],       [1187.0, 1188.0]]],     [[[1189.0, 1190.0],       [1191.0, 1192.0],       [1193.0, 1194.0]],      [[1195.0, 1196.0],       [1197.0, 1198.0],       [1199.0, 1200.0]],      [[1201.0, 1202.0],       [1203.0, 1204.0],       [1205.0, 1206.0]]],     [[[1207.0, 1208.0],       [1209.0, 1210.0],       [1211.0, 1212.0]],      [[1213.0, 1214.0],       [1215.0, 1216.0],       [1217.0, 1218.0]],      [[1219.0, 1220.0],       [1221.0, 1222.0],       [1223.0, 1224.0]]]],    [[[[1225.0, 1226.0],       [1227.0, 1228.0],       [1229.0, 1230.0]],      [[1231.0, 1232.0],       [1233.0, 1234.0],       [1235.0, 1236.0]],      [[1237.0, 1238.0],       [1239.0, 1240.0],       [1241.0, 1242.0]]],     [[[1243.0, 1244.0],       [1245.0, 1246.0],       [1247.0, 1248.0]],      [[1249.0, 1250.0],       [1251.0, 1252.0],       [1253.0, 1254.0]],      [[1255.0, 1256.0],       [1257.0, 1258.0],       [1259.0, 1260.0]]],     [[[1261.0, 1262.0],       [1263.0, 1264.0],       [1265.0, 1266.0]],      [[1267.0, 1268.0],       [1269.0, 1270.0],       [1271.0, 1272.0]],      [[1273.0, 1274.0],       [1275.0, 1276.0],       [1277.0, 1278.0]]],     [[[1279.0, 1280.0],       [1281.0, 1282.0],       [1283.0, 1284.0]],      [[1285.0, 1286.0],       [1287.0, 1288.0],       [1289.0, 1290.0]],      [[1291.0, 1292.0],       [1293.0, 1294.0],       [1295.0, 1296.0]]]]],   [[[[[1297.0, 1298.0],       [1299.0, 1300.0],       [1301.0, 1302.0]],      [[1303.0, 1304.0],       [1305.0, 1306.0],       [1307.0, 1308.0]],      [[1309.0, 1310.0],       [1311.0, 1312.0],       [1313.0, 1314.0]]],     [[[1315.0, 1316.0],       [1317.0, 1318.0],       [1319.0, 1320.0]],      [[1321.0, 1322.0],       [1323.0, 1324.0],       [1325.0, 1326.0]],      [[1327.0, 1328.0],       [1329.0, 1330.0],       [1331.0, 1332.0]]],     [[[1333.0, 1334.0],       [1335.0, 1336.0],       [1337.0, 1338.0]],      [[1339.0, 1340.0],       [1341.0, 1342.0],       [1343.0, 1344.0]],      [[1345.0, 1346.0],       [1347.0, 1348.0],       [1349.0, 1350.0]]],     [[[1351.0, 1352.0],       [1353.0, 1354.0],       [1355.0, 1356.0]],      [[1357.0, 1358.0],       [1359.0, 1360.0],       [1361.0, 1362.0]],      [[1363.0, 1364.0],       [1365.0, 1366.0],       [1367.0, 1368.0]]]],    [[[[1369.0, 1370.0],       [1371.0, 1372.0],       [1373.0, 1374.0]],      [[1375.0, 1376.0],       [1377.0, 1378.0],       [1379.0, 1380.0]],      [[1381.0, 1382.0],       [1383.0, 1384.0],       [1385.0, 1386.0]]],     [[[1387.0, 1388.0],       [1389.0, 1390.0],       [1391.0, 1392.0]],      [[1393.0, 1394.0],       [1395.0, 1396.0],       [1397.0, 1398.0]],      [[1399.0, 1400.0],       [1401.0, 1402.0],       [1403.0, 1404.0]]],     [[[1405.0, 1406.0],       [1407.0, 1408.0],       [1409.0, 1410.0]],      [[1411.0, 1412.0],       [1413.0, 1414.0],       [1415.0, 1416.0]],      [[1417.0, 1418.0],       [1419.0, 1420.0],       [1421.0, 1422.0]]],     [[[1423.0, 1424.0],       [1425.0, 1426.0],       [1427.0, 1428.0]],      [[1429.0, 1430.0],       [1431.0, 1432.0],       [1433.0, 1434.0]],      [[1435.0, 1436.0],       [1437.0, 1438.0],       [1439.0, 1440.0]]]],    [[[[1441.0, 1442.0],       [1443.0, 1444.0],       [1445.0, 1446.0]],      [[1447.0, 1448.0],       [1449.0, 1450.0],       [1451.0, 1452.0]],      [[1453.0, 1454.0],       [1455.0, 1456.0],       [1457.0, 1458.0]]],     [[[1459.0, 1460.0],       [1461.0, 1462.0],       [1463.0, 1464.0]],      [[1465.0, 1466.0],       [1467.0, 1468.0],       [1469.0, 1470.0]],      [[1471.0, 1472.0],       [1473.0, 1474.0],       [1475.0, 1476.0]]],     [[[1477.0, 1478.0],       [1479.0, 1480.0],       [1481.0, 1482.0]],      [[1483.0, 1484.0],       [1485.0, 1486.0],       [1487.0, 1488.0]],      [[1489.0, 1490.0],       [1491.0, 1492.0],       [1493.0, 1494.0]]],     [[[1495.0, 1496.0],       [1497.0, 1498.0],       [1499.0, 1500.0]],      [[1501.0, 1502.0],       [1503.0, 1504.0],       [1505.0, 1506.0]],      [[1507.0, 1508.0],       [1509.0, 1510.0],       [1511.0, 1512.0]]]],    [[[[1513.0, 1514.0],       [1515.0, 1516.0],       [1517.0, 1518.0]],      [[1519.0, 1520.0],       [1521.0, 1522.0],       [1523.0, 1524.0]],      [[1525.0, 1526.0],       [1527.0, 1528.0],       [1529.0, 1530.0]]],     [[[1531.0, 1532.0],       [1533.0, 1534.0],       [1535.0, 1536.0]],      [[1537.0, 1538.0],       [1539.0, 1540.0],       [1541.0, 1542.0]],      [[1543.0, 1544.0],       [1545.0, 1546.0],       [1547.0, 1548.0]]],     [[[1549.0, 1550.0],       [1551.0, 1552.0],       [1553.0, 1554.0]],      [[1555.0, 1556.0],       [1557.0, 1558.0],       [1559.0, 1560.0]],      [[1561.0, 1562.0],       [1563.0, 1564.0],       [1565.0, 1566.0]]],     [[[1567.0, 1568.0],       [1569.0, 1570.0],       [1571.0, 1572.0]],      [[1573.0, 1574.0],       [1575.0, 1576.0],       [1577.0, 1578.0]],      [[1579.0, 1580.0],       [1581.0, 1582.0],       [1583.0, 1584.0]]]],    [[[[1585.0, 1586.0],       [1587.0, 1588.0],       [1589.0, 1590.0]],      [[1591.0, 1592.0],       [1593.0, 1594.0],       [1595.0, 1596.0]],      [[1597.0, 1598.0],       [1599.0, 1600.0],       [1601.0, 1602.0]]],     [[[1603.0, 1604.0],       [1605.0, 1606.0],       [1607.0, 1608.0]],      [[1609.0, 1610.0],       [1611.0, 1612.0],       [1613.0, 1614.0]],      [[1615.0, 1616.0],       [1617.0, 1618.0],       [1619.0, 1620.0]]],     [[[1621.0, 1622.0],       [1623.0, 1624.0],       [1625.0, 1626.0]],      [[1627.0, 1628.0],       [1629.0, 1630.0],       [1631.0, 1632.0]],      [[1633.0, 1634.0],       [1635.0, 1636.0],       [1637.0, 1638.0]]],     [[[1639.0, 1640.0],       [1641.0, 1642.0],       [1643.0, 1644.0]],      [[1645.0, 1646.0],       [1647.0, 1648.0],       [1649.0, 1650.0]],      [[1651.0, 1652.0],       [1653.0, 1654.0],       [1655.0, 1656.0]]]],    [[[[1657.0, 1658.0],       [1659.0, 1660.0],       [1661.0, 1662.0]],      [[1663.0, 1664.0],       [1665.0, 1666.0],       [1667.0, 1668.0]],      [[1669.0, 1670.0],       [1671.0, 1672.0],       [1673.0, 1674.0]]],     [[[1675.0, 1676.0],       [1677.0, 1678.0],       [1679.0, 1680.0]],      [[1681.0, 1682.0],       [1683.0, 1684.0],       [1685.0, 1686.0]],      [[1687.0, 1688.0],       [1689.0, 1690.0],       [1691.0, 1692.0]]],     [[[1693.0, 1694.0],       [1695.0, 1696.0],       [1697.0, 1698.0]],      [[1699.0, 1700.0],       [1701.0, 1702.0],       [1703.0, 1704.0]],      [[1705.0, 1706.0],       [1707.0, 1708.0],       [1709.0, 1710.0]]],     [[[1711.0, 1712.0],       [1713.0, 1714.0],       [1715.0, 1716.0]],      [[1717.0, 1718.0],       [1719.0, 1720.0],       [1721.0, 1722.0]],      [[1723.0, 1724.0],       [1725.0, 1726.0],       [1727.0, 1728.0]]]]]],  [[[[[[1729.0, 1730.0],       [1731.0, 1732.0],       [1733.0, 1734.0]],      [[1735.0, 1736.0],       [1737.0, 1738.0],       [1739.0, 1740.0]],      [[1741.0, 1742.0],       [1743.0, 1744.0],       [1745.0, 1746.0]]],     [[[1747.0, 1748.0],       [1749.0, 1750.0],       [1751.0, 1752.0]],      [[1753.0, 1754.0],       [1755.0, 1756.0],       [1757.0, 1758.0]],      [[1759.0, 1760.0],       [1761.0, 1762.0],       [1763.0, 1764.0]]],     [[[1765.0, 1766.0],       [1767.0, 1768.0],       [1769.0, 1770.0]],      [[1771.0, 1772.0],       [1773.0, 1774.0],       [1775.0, 1776.0]],      [[1777.0, 1778.0],       [1779.0, 1780.0],       [1781.0, 1782.0]]],     [[[1783.0, 1784.0],       [1785.0, 1786.0],       [1787.0, 1788.0]],      [[1789.0, 1790.0],       [1791.0, 1792.0],       [1793.0, 1794.0]],      [[1795.0, 1796.0],       [1797.0, 1798.0],       [1799.0, 1800.0]]]],    [[[[1801.0, 1802.0],       [1803.0, 1804.0],       [1805.0, 1806.0]],      [[1807.0, 1808.0],       [1809.0, 1810.0],       [1811.0, 1812.0]],      [[1813.0, 1814.0],       [1815.0, 1816.0],       [1817.0, 1818.0]]],     [[[1819.0, 1820.0],       [1821.0, 1822.0],       [1823.0, 1824.0]],      [[1825.0, 1826.0],       [1827.0, 1828.0],       [1829.0, 1830.0]],      [[1831.0, 1832.0],       [1833.0, 1834.0],       [1835.0, 1836.0]]],     [[[1837.0, 1838.0],       [1839.0, 1840.0],       [1841.0, 1842.0]],      [[1843.0, 1844.0],       [1845.0, 1846.0],       [1847.0, 1848.0]],      [[1849.0, 1850.0],       [1851.0, 1852.0],       [1853.0, 1854.0]]],     [[[1855.0, 1856.0],       [1857.0, 1858.0],       [1859.0, 1860.0]],      [[1861.0, 1862.0],       [1863.0, 1864.0],       [1865.0, 1866.0]],      [[1867.0, 1868.0],       [1869.0, 1870.0],       [1871.0, 1872.0]]]],    [[[[1873.0, 1874.0],       [1875.0, 1876.0],       [1877.0, 1878.0]],      [[1879.0, 1880.0],       [1881.0, 1882.0],       [1883.0, 1884.0]],      [[1885.0, 1886.0],       [1887.0, 1888.0],       [1889.0, 1890.0]]],     [[[1891.0, 1892.0],       [1893.0, 1894.0],       [1895.0, 1896.0]],      [[1897.0, 1898.0],       [1899.0, 1900.0],       [1901.0, 1902.0]],      [[1903.0, 1904.0],       [1905.0, 1906.0],       [1907.0, 1908.0]]],     [[[1909.0, 1910.0],       [1911.0, 1912.0],       [1913.0, 1914.0]],      [[1915.0, 1916.0],       [1917.0, 1918.0],       [1919.0, 1920.0]],      [[1921.0, 1922.0],       [1923.0, 1924.0],       [1925.0, 1926.0]]],     [[[1927.0, 1928.0],       [1929.0, 1930.0],       [1931.0, 1932.0]],      [[1933.0, 1934.0],       [1935.0, 1936.0],       [1937.0, 1938.0]],      [[1939.0, 1940.0],       [1941.0, 1942.0],       [1943.0, 1944.0]]]],    [[[[1945.0, 1946.0],       [1947.0, 1948.0],       [1949.0, 1950.0]],      [[1951.0, 1952.0],       [1953.0, 1954.0],       [1955.0, 1956.0]],      [[1957.0, 1958.0],       [1959.0, 1960.0],       [1961.0, 1962.0]]],     [[[1963.0, 1964.0],       [1965.0, 1966.0],       [1967.0, 1968.0]],      [[1969.0, 1970.0],       [1971.0, 1972.0],       [1973.0, 1974.0]],      [[1975.0, 1976.0],       [1977.0, 1978.0],       [1979.0, 1980.0]]],     [[[1981.0, 1982.0],       [1983.0, 1984.0],       [1985.0, 1986.0]],      [[1987.0, 1988.0],       [1989.0, 1990.0],       [1991.0, 1992.0]],      [[1993.0, 1994.0],       [1995.0, 1996.0],       [1997.0, 1998.0]]],     [[[1999.0, 2000.0],       [2001.0, 2002.0],       [2003.0, 2004.0]],      [[2005.0, 2006.0],       [2007.0, 2008.0],       [2009.0, 2010.0]],      [[2011.0, 2012.0],       [2013.0, 2014.0],       [2015.0, 2016.0]]]],    [[[[2017.0, 2018.0],       [2019.0, 2020.0],       [2021.0, 2022.0]],      [[2023.0, 2024.0],       [2025.0, 2026.0],       [2027.0, 2028.0]],      [[2029.0, 2030.0],       [2031.0, 2032.0],       [2033.0, 2034.0]]],     [[[2035.0, 2036.0],       [2037.0, 2038.0],       [2039.0, 2040.0]],      [[2041.0, 2042.0],       [2043.0, 2044.0],       [2045.0, 2046.0]],      [[2047.0, 2048.0],       [2049.0, 2050.0],       [2051.0, 2052.0]]],     [[[2053.0, 2054.0],       [2055.0, 2056.0],       [2057.0, 2058.0]],      [[2059.0, 2060.0],       [2061.0, 2062.0],       [2063.0, 2064.0]],      [[2065.0, 2066.0],       [2067.0, 2068.0],       [2069.0, 2070.0]]],     [[[2071.0, 2072.0],       [2073.0, 2074.0],       [2075.0, 2076.0]],      [[2077.0, 2078.0],       [2079.0, 2080.0],       [2081.0, 2082.0]],      [[2083.0, 2084.0],       [2085.0, 2086.0],       [2087.0, 2088.0]]]],    [[[[2089.0, 2090.0],       [2091.0, 2092.0],       [2093.0, 2094.0]],      [[2095.0, 2096.0],       [2097.0, 2098.0],       [2099.0, 2100.0]],      [[2101.0, 2102.0],       [2103.0, 2104.0],       [2105.0, 2106.0]]],     [[[2107.0, 2108.0],       [2109.0, 2110.0],       [2111.0, 2112.0]],      [[2113.0, 2114.0],       [2115.0, 2116.0],       [2117.0, 2118.0]],      [[2119.0, 2120.0],       [2121.0, 2122.0],       [2123.0, 2124.0]]],     [[[2125.0, 2126.0],       [2127.0, 2128.0],       [2129.0, 2130.0]],      [[2131.0, 2132.0],       [2133.0, 2134.0],       [2135.0, 2136.0]],      [[2137.0, 2138.0],       [2139.0, 2140.0],       [2141.0, 2142.0]]],     [[[2143.0, 2144.0],       [2145.0, 2146.0],       [2147.0, 2148.0]],      [[2149.0, 2150.0],       [2151.0, 2152.0],       [2153.0, 2154.0]],      [[2155.0, 2156.0],       [2157.0, 2158.0],       [2159.0, 2160.0]]]]],   [[[[[2161.0, 2162.0],       [2163.0, 2164.0],       [2165.0, 2166.0]],      [[2167.0, 2168.0],       [2169.0, 2170.0],       [2171.0, 2172.0]],      [[2173.0, 2174.0],       [2175.0, 2176.0],       [2177.0, 2178.0]]],     [[[2179.0, 2180.0],       [2181.0, 2182.0],       [2183.0, 2184.0]],      [[2185.0, 2186.0],       [2187.0, 2188.0],       [2189.0, 2190.0]],      [[2191.0, 2192.0],       [2193.0, 2194.0],       [2195.0, 2196.0]]],     [[[2197.0, 2198.0],       [2199.0, 2200.0],       [2201.0, 2202.0]],      [[2203.0, 2204.0],       [2205.0, 2206.0],       [2207.0, 2208.0]],      [[2209.0, 2210.0],       [2211.0, 2212.0],       [2213.0, 2214.0]]],     [[[2215.0, 2216.0],       [2217.0, 2218.0],       [2219.0, 2220.0]],      [[2221.0, 2222.0],       [2223.0, 2224.0],       [2225.0, 2226.0]],      [[2227.0, 2228.0],       [2229.0, 2230.0],       [2231.0, 2232.0]]]],    [[[[2233.0, 2234.0],       [2235.0, 2236.0],       [2237.0, 2238.0]],      [[2239.0, 2240.0],       [2241.0, 2242.0],       [2243.0, 2244.0]],      [[2245.0, 2246.0],       [2247.0, 2248.0],       [2249.0, 2250.0]]],     [[[2251.0, 2252.0],       [2253.0, 2254.0],       [2255.0, 2256.0]],      [[2257.0, 2258.0],       [2259.0, 2260.0],       [2261.0, 2262.0]],      [[2263.0, 2264.0],       [2265.0, 2266.0],       [2267.0, 2268.0]]],     [[[2269.0, 2270.0],       [2271.0, 2272.0],       [2273.0, 2274.0]],      [[2275.0, 2276.0],       [2277.0, 2278.0],       [2279.0, 2280.0]],      [[2281.0, 2282.0],       [2283.0, 2284.0],       [2285.0, 2286.0]]],     [[[2287.0, 2288.0],       [2289.0, 2290.0],       [2291.0, 2292.0]],      [[2293.0, 2294.0],       [2295.0, 2296.0],       [2297.0, 2298.0]],      [[2299.0, 2300.0],       [2301.0, 2302.0],       [2303.0, 2304.0]]]],    [[[[2305.0, 2306.0],       [2307.0, 2308.0],       [2309.0, 2310.0]],      [[2311.0, 2312.0],       [2313.0, 2314.0],       [2315.0, 2316.0]],      [[2317.0, 2318.0],       [2319.0, 2320.0],       [2321.0, 2322.0]]],     [[[2323.0, 2324.0],       [2325.0, 2326.0],       [2327.0, 2328.0]],      [[2329.0, 2330.0],       [2331.0, 2332.0],       [2333.0, 2334.0]],      [[2335.0, 2336.0],       [2337.0, 2338.0],       [2339.0, 2340.0]]],     [[[2341.0, 2342.0],       [2343.0, 2344.0],       [2345.0, 2346.0]],      [[2347.0, 2348.0],       [2349.0, 2350.0],       [2351.0, 2352.0]],      [[2353.0, 2354.0],       [2355.0, 2356.0],       [2357.0, 2358.0]]],     [[[2359.0, 2360.0],       [2361.0, 2362.0],       [2363.0, 2364.0]],      [[2365.0, 2366.0],       [2367.0, 2368.0],       [2369.0, 2370.0]],      [[2371.0, 2372.0],       [2373.0, 2374.0],       [2375.0, 2376.0]]]],    [[[[2377.0, 2378.0],       [2379.0, 2380.0],       [2381.0, 2382.0]],      [[2383.0, 2384.0],       [2385.0, 2386.0],       [2387.0, 2388.0]],      [[2389.0, 2390.0],       [2391.0, 2392.0],       [2393.0, 2394.0]]],     [[[2395.0, 2396.0],       [2397.0, 2398.0],       [2399.0, 2400.0]],      [[2401.0, 2402.0],       [2403.0, 2404.0],       [2405.0, 2406.0]],      [[2407.0, 2408.0],       [2409.0, 2410.0],       [2411.0, 2412.0]]],     [[[2413.0, 2414.0],       [2415.0, 2416.0],       [2417.0, 2418.0]],      [[2419.0, 2420.0],       [2421.0, 2422.0],       [2423.0, 2424.0]],      [[2425.0, 2426.0],       [2427.0, 2428.0],       [2429.0, 2430.0]]],     [[[2431.0, 2432.0],       [2433.0, 2434.0],       [2435.0, 2436.0]],      [[2437.0, 2438.0],       [2439.0, 2440.0],       [2441.0, 2442.0]],      [[2443.0, 2444.0],       [2445.0, 2446.0],       [2447.0, 2448.0]]]],    [[[[2449.0, 2450.0],       [2451.0, 2452.0],       [2453.0, 2454.0]],      [[2455.0, 2456.0],       [2457.0, 2458.0],       [2459.0, 2460.0]],      [[2461.0, 2462.0],       [2463.0, 2464.0],       [2465.0, 2466.0]]],     [[[2467.0, 2468.0],       [2469.0, 2470.0],       [2471.0, 2472.0]],      [[2473.0, 2474.0],       [2475.0, 2476.0],       [2477.0, 2478.0]],      [[2479.0, 2480.0],       [2481.0, 2482.0],       [2483.0, 2484.0]]],     [[[2485.0, 2486.0],       [2487.0, 2488.0],       [2489.0, 2490.0]],      [[2491.0, 2492.0],       [2493.0, 2494.0],       [2495.0, 2496.0]],      [[2497.0, 2498.0],       [2499.0, 2500.0],       [2501.0, 2502.0]]],     [[[2503.0, 2504.0],       [2505.0, 2506.0],       [2507.0, 2508.0]],      [[2509.0, 2510.0],       [2511.0, 2512.0],       [2513.0, 2514.0]],      [[2515.0, 2516.0],       [2517.0, 2518.0],       [2519.0, 2520.0]]]],    [[[[2521.0, 2522.0],       [2523.0, 2524.0],       [2525.0, 2526.0]],      [[2527.0, 2528.0],       [2529.0, 2530.0],       [2531.0, 2532.0]],      [[2533.0, 2534.0],       [2535.0, 2536.0],       [2537.0, 2538.0]]],     [[[2539.0, 2540.0],       [2541.0, 2542.0],       [2543.0, 2544.0]],      [[2545.0, 2546.0],       [2547.0, 2548.0],       [2549.0, 2550.0]],      [[2551.0, 2552.0],       [2553.0, 2554.0],       [2555.0, 2556.0]]],     [[[2557.0, 2558.0],       [2559.0, 2560.0],       [2561.0, 2562.0]],      [[2563.0, 2564.0],       [2565.0, 2566.0],       [2567.0, 2568.0]],      [[2569.0, 2570.0],       [2571.0, 2572.0],       [2573.0, 2574.0]]],     [[[2575.0, 2576.0],       [2577.0, 2578.0],       [2579.0, 2580.0]],      [[2581.0, 2582.0],       [2583.0, 2584.0],       [2585.0, 2586.0]],      [[2587.0, 2588.0],       [2589.0, 2590.0],       [2591.0, 2592.0]]]]]]] shape=[3, 2, 6, 4, 3, 3, 2], strides=[864, 432, 72, 18, 6, 2, 1], layout=C (0x1)), I32([2, 2, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [2, 2],  [1, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 245574246 629907491 4025518263 4192555755 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],     [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],     [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],     [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0]],    [[29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],     [43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],     [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0]],    [[57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],     [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],     [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],     [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]]],   [[[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],     [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],     [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],     [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0]],    [[113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],     [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0],     [127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],     [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0]],    [[141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],     [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],     [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],     [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]]],   [[[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],     [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],     [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],     [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0]],    [[197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],     [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0],     [211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],     [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0]],    [[225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],     [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],     [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]]],   [[[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],     [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],     [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],     [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0]],    [[281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],     [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0],     [295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],     [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0]],    [[309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],     [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],     [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],     [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]]]],  [[[[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],     [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],     [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],     [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0]],    [[365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],     [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0],     [379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],     [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0]],    [[393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],     [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],     [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],     [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]]],   [[[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],     [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],     [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],     [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0]],    [[449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],     [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0],     [463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],     [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0]],    [[477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],     [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],     [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],     [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]]],   [[[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],     [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],     [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],     [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0]],    [[533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],     [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0],     [547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],     [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0]],    [[561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],     [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],     [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],     [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]]],   [[[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],     [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],     [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],     [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0]],    [[617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],     [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0],     [631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],     [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0]],    [[645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],     [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],     [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],     [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]]]]] shape=[2, 4, 3, 4, 7], strides=[336, 84, 28, 7, 1], layout=C (0x1)), I32([2, 2] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 2],  [2, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 1783788365 3663138222 2355781355 1309060000 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0],     [4.0, 5.0, 6.0],     [7.0, 8.0, 9.0],     [10.0, 11.0, 12.0],     [13.0, 14.0, 15.0],     [16.0, 17.0, 18.0]],    [[19.0, 20.0, 21.0],     [22.0, 23.0, 24.0],     [25.0, 26.0, 27.0],     [28.0, 29.0, 30.0],     [31.0, 32.0, 33.0],     [34.0, 35.0, 36.0]],    [[37.0, 38.0, 39.0],     [40.0, 41.0, 42.0],     [43.0, 44.0, 45.0],     [46.0, 47.0, 48.0],     [49.0, 50.0, 51.0],     [52.0, 53.0, 54.0]],    [[55.0, 56.0, 57.0],     [58.0, 59.0, 60.0],     [61.0, 62.0, 63.0],     [64.0, 65.0, 66.0],     [67.0, 68.0, 69.0],     [70.0, 71.0, 72.0]],    [[73.0, 74.0, 75.0],     [76.0, 77.0, 78.0],     [79.0, 80.0, 81.0],     [82.0, 83.0, 84.0],     [85.0, 86.0, 87.0],     [88.0, 89.0, 90.0]],    [[91.0, 92.0, 93.0],     [94.0, 95.0, 96.0],     [97.0, 98.0, 99.0],     [100.0, 101.0, 102.0],     [103.0, 104.0, 105.0],     [106.0, 107.0, 108.0]],    [[109.0, 110.0, 111.0],     [112.0, 113.0, 114.0],     [115.0, 116.0, 117.0],     [118.0, 119.0, 120.0],     [121.0, 122.0, 123.0],     [124.0, 125.0, 126.0]]],   [[[127.0, 128.0, 129.0],     [130.0, 131.0, 132.0],     [133.0, 134.0, 135.0],     [136.0, 137.0, 138.0],     [139.0, 140.0, 141.0],     [142.0, 143.0, 144.0]],    [[145.0, 146.0, 147.0],     [148.0, 149.0, 150.0],     [151.0, 152.0, 153.0],     [154.0, 155.0, 156.0],     [157.0, 158.0, 159.0],     [160.0, 161.0, 162.0]],    [[163.0, 164.0, 165.0],     [166.0, 167.0, 168.0],     [169.0, 170.0, 171.0],     [172.0, 173.0, 174.0],     [175.0, 176.0, 177.0],     [178.0, 179.0, 180.0]],    [[181.0, 182.0, 183.0],     [184.0, 185.0, 186.0],     [187.0, 188.0, 189.0],     [190.0, 191.0, 192.0],     [193.0, 194.0, 195.0],     [196.0, 197.0, 198.0]],    [[199.0, 200.0, 201.0],     [202.0, 203.0, 204.0],     [205.0, 206.0, 207.0],     [208.0, 209.0, 210.0],     [211.0, 212.0, 213.0],     [214.0, 215.0, 216.0]],    [[217.0, 218.0, 219.0],     [220.0, 221.0, 222.0],     [223.0, 224.0, 225.0],     [226.0, 227.0, 228.0],     [229.0, 230.0, 231.0],     [232.0, 233.0, 234.0]],    [[235.0, 236.0, 237.0],     [238.0, 239.0, 240.0],     [241.0, 242.0, 243.0],     [244.0, 245.0, 246.0],     [247.0, 248.0, 249.0],     [250.0, 251.0, 252.0]]],   [[[253.0, 254.0, 255.0],     [256.0, 257.0, 258.0],     [259.0, 260.0, 261.0],     [262.0, 263.0, 264.0],     [265.0, 266.0, 267.0],     [268.0, 269.0, 270.0]],    [[271.0, 272.0, 273.0],     [274.0, 275.0, 276.0],     [277.0, 278.0, 279.0],     [280.0, 281.0, 282.0],     [283.0, 284.0, 285.0],     [286.0, 287.0, 288.0]],    [[289.0, 290.0, 291.0],     [292.0, 293.0, 294.0],     [295.0, 296.0, 297.0],     [298.0, 299.0, 300.0],     [301.0, 302.0, 303.0],     [304.0, 305.0, 306.0]],    [[307.0, 308.0, 309.0],     [310.0, 311.0, 312.0],     [313.0, 314.0, 315.0],     [316.0, 317.0, 318.0],     [319.0, 320.0, 321.0],     [322.0, 323.0, 324.0]],    [[325.0, 326.0, 327.0],     [328.0, 329.0, 330.0],     [331.0, 332.0, 333.0],     [334.0, 335.0, 336.0],     [337.0, 338.0, 339.0],     [340.0, 341.0, 342.0]],    [[343.0, 344.0, 345.0],     [346.0, 347.0, 348.0],     [349.0, 350.0, 351.0],     [352.0, 353.0, 354.0],     [355.0, 356.0, 357.0],     [358.0, 359.0, 360.0]],    [[361.0, 362.0, 363.0],     [364.0, 365.0, 366.0],     [367.0, 368.0, 369.0],     [370.0, 371.0, 372.0],     [373.0, 374.0, 375.0],     [376.0, 377.0, 378.0]]],   [[[379.0, 380.0, 381.0],     [382.0, 383.0, 384.0],     [385.0, 386.0, 387.0],     [388.0, 389.0, 390.0],     [391.0, 392.0, 393.0],     [394.0, 395.0, 396.0]],    [[397.0, 398.0, 399.0],     [400.0, 401.0, 402.0],     [403.0, 404.0, 405.0],     [406.0, 407.0, 408.0],     [409.0, 410.0, 411.0],     [412.0, 413.0, 414.0]],    [[415.0, 416.0, 417.0],     [418.0, 419.0, 420.0],     [421.0, 422.0, 423.0],     [424.0, 425.0, 426.0],     [427.0, 428.0, 429.0],     [430.0, 431.0, 432.0]],    [[433.0, 434.0, 435.0],     [436.0, 437.0, 438.0],     [439.0, 440.0, 441.0],     [442.0, 443.0, 444.0],     [445.0, 446.0, 447.0],     [448.0, 449.0, 450.0]],    [[451.0, 452.0, 453.0],     [454.0, 455.0, 456.0],     [457.0, 458.0, 459.0],     [460.0, 461.0, 462.0],     [463.0, 464.0, 465.0],     [466.0, 467.0, 468.0]],    [[469.0, 470.0, 471.0],     [472.0, 473.0, 474.0],     [475.0, 476.0, 477.0],     [478.0, 479.0, 480.0],     [481.0, 482.0, 483.0],     [484.0, 485.0, 486.0]],    [[487.0, 488.0, 489.0],     [490.0, 491.0, 492.0],     [493.0, 494.0, 495.0],     [496.0, 497.0, 498.0],     [499.0, 500.0, 501.0],     [502.0, 503.0, 504.0]]],   [[[505.0, 506.0, 507.0],     [508.0, 509.0, 510.0],     [511.0, 512.0, 513.0],     [514.0, 515.0, 516.0],     [517.0, 518.0, 519.0],     [520.0, 521.0, 522.0]],    [[523.0, 524.0, 525.0],     [526.0, 527.0, 528.0],     [529.0, 530.0, 531.0],     [532.0, 533.0, 534.0],     [535.0, 536.0, 537.0],     [538.0, 539.0, 540.0]],    [[541.0, 542.0, 543.0],     [544.0, 545.0, 546.0],     [547.0, 548.0, 549.0],     [550.0, 551.0, 552.0],     [553.0, 554.0, 555.0],     [556.0, 557.0, 558.0]],    [[559.0, 560.0, 561.0],     [562.0, 563.0, 564.0],     [565.0, 566.0, 567.0],     [568.0, 569.0, 570.0],     [571.0, 572.0, 573.0],     [574.0, 575.0, 576.0]],    [[577.0, 578.0, 579.0],     [580.0, 581.0, 582.0],     [583.0, 584.0, 585.0],     [586.0, 587.0, 588.0],     [589.0, 590.0, 591.0],     [592.0, 593.0, 594.0]],    [[595.0, 596.0, 597.0],     [598.0, 599.0, 600.0],     [601.0, 602.0, 603.0],     [604.0, 605.0, 606.0],     [607.0, 608.0, 609.0],     [610.0, 611.0, 612.0]],    [[613.0, 614.0, 615.0],     [616.0, 617.0, 618.0],     [619.0, 620.0, 621.0],     [622.0, 623.0, 624.0],     [625.0, 626.0, 627.0],     [628.0, 629.0, 630.0]]],   [[[631.0, 632.0, 633.0],     [634.0, 635.0, 636.0],     [637.0, 638.0, 639.0],     [640.0, 641.0, 642.0],     [643.0, 644.0, 645.0],     [646.0, 647.0, 648.0]],    [[649.0, 650.0, 651.0],     [652.0, 653.0, 654.0],     [655.0, 656.0, 657.0],     [658.0, 659.0, 660.0],     [661.0, 662.0, 663.0],     [664.0, 665.0, 666.0]],    [[667.0, 668.0, 669.0],     [670.0, 671.0, 672.0],     [673.0, 674.0, 675.0],     [676.0, 677.0, 678.0],     [679.0, 680.0, 681.0],     [682.0, 683.0, 684.0]],    [[685.0, 686.0, 687.0],     [688.0, 689.0, 690.0],     [691.0, 692.0, 693.0],     [694.0, 695.0, 696.0],     [697.0, 698.0, 699.0],     [700.0, 701.0, 702.0]],    [[703.0, 704.0, 705.0],     [706.0, 707.0, 708.0],     [709.0, 710.0, 711.0],     [712.0, 713.0, 714.0],     [715.0, 716.0, 717.0],     [718.0, 719.0, 720.0]],    [[721.0, 722.0, 723.0],     [724.0, 725.0, 726.0],     [727.0, 728.0, 729.0],     [730.0, 731.0, 732.0],     [733.0, 734.0, 735.0],     [736.0, 737.0, 738.0]],    [[739.0, 740.0, 741.0],     [742.0, 743.0, 744.0],     [745.0, 746.0, 747.0],     [748.0, 749.0, 750.0],     [751.0, 752.0, 753.0],     [754.0, 755.0, 756.0]]],   [[[757.0, 758.0, 759.0],     [760.0, 761.0, 762.0],     [763.0, 764.0, 765.0],     [766.0, 767.0, 768.0],     [769.0, 770.0, 771.0],     [772.0, 773.0, 774.0]],    [[775.0, 776.0, 777.0],     [778.0, 779.0, 780.0],     [781.0, 782.0, 783.0],     [784.0, 785.0, 786.0],     [787.0, 788.0, 789.0],     [790.0, 791.0, 792.0]],    [[793.0, 794.0, 795.0],     [796.0, 797.0, 798.0],     [799.0, 800.0, 801.0],     [802.0, 803.0, 804.0],     [805.0, 806.0, 807.0],     [808.0, 809.0, 810.0]],    [[811.0, 812.0, 813.0],     [814.0, 815.0, 816.0],     [817.0, 818.0, 819.0],     [820.0, 821.0, 822.0],     [823.0, 824.0, 825.0],     [826.0, 827.0, 828.0]],    [[829.0, 830.0, 831.0],     [832.0, 833.0, 834.0],     [835.0, 836.0, 837.0],     [838.0, 839.0, 840.0],     [841.0, 842.0, 843.0],     [844.0, 845.0, 846.0]],    [[847.0, 848.0, 849.0],     [850.0, 851.0, 852.0],     [853.0, 854.0, 855.0],     [856.0, 857.0, 858.0],     [859.0, 860.0, 861.0],     [862.0, 863.0, 864.0]],    [[865.0, 866.0, 867.0],     [868.0, 869.0, 870.0],     [871.0, 872.0, 873.0],     [874.0, 875.0, 876.0],     [877.0, 878.0, 879.0],     [880.0, 881.0, 882.0]]]]] shape=[1, 7, 7, 6, 3], strides=[882, 126, 18, 3, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[2, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 2706897729 341089469 3086911944 426974114 # shrinks to (ref i, ref bs, ref p) = (F32([[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],   [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],   [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0]],  [[22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],   [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],   [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],  [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],   [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],   [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0]]] shape=[3, 3, 7], strides=[21, 7, 1], layout=C (0x1)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), I32([[1, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 4235681308 4165367766 2032108283 156636734 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],      [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],      [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],      [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],      [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0]],     [[36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],      [43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],      [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],      [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],      [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0]],     [[71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],      [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0],      [85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],      [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],      [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0]]],    [[[106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],      [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],      [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0],      [127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],      [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],      [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],      [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],      [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0],      [169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0]],     [[176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],      [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],      [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],      [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],      [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]]],    [[[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],      [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],      [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],      [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],      [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0]],     [[246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0],      [253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],      [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],      [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],      [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],      [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0],      [295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],      [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],      [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0]]],    [[[316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],      [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],      [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0],      [337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],      [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0]],     [[351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],      [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],      [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],      [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0],      [379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0]],     [[386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],      [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],      [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],      [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],      [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]]],    [[[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],      [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],      [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],      [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],      [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0]],     [[456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0],      [463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],      [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],      [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],      [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0]],     [[491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],      [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0],      [505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],      [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],      [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0]]],    [[[526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],      [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],      [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0],      [547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],      [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0]],     [[561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],      [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],      [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],      [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0],      [589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0]],     [[596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],      [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],      [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],      [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],      [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]]],    [[[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],      [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0],      [645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],      [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],      [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0]],     [[666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0],      [673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],      [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],      [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],      [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0]],     [[701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],      [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0],      [715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],      [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0],      [729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0]]]],   [[[[736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],      [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],      [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0],      [757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],      [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0]],     [[771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],      [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0],      [785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],      [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0],      [799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0]],     [[806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0],      [813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],      [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],      [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],      [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]]],    [[[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],      [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],      [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],      [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0],      [869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0]],     [[876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0],      [883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],      [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0],      [897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],      [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0]],     [[911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],      [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0],      [925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],      [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],      [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0]]],    [[[946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0],      [953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],      [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0],      [967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],      [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0]],     [[981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],      [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],      [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],      [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0],      [1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0]],     [[1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],      [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],      [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0],      [1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],      [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]],    [[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],      [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0],      [1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],      [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],      [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0]],     [[1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0],      [1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],      [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],      [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],      [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0]],     [[1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],      [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0],      [1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],      [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0],      [1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0]]],    [[[1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],      [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],      [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0],      [1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],      [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0]],     [[1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],      [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0],      [1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],      [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0],      [1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0]],     [[1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0],      [1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],      [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],      [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],      [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]],    [[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],      [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],      [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],      [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0],      [1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0]],     [[1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0],      [1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],      [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0],      [1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],      [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0]],     [[1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],      [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0],      [1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],      [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],      [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0]]],    [[[1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0],      [1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],      [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0],      [1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],      [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0]],     [[1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],      [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],      [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],      [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0],      [1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0]],     [[1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],      [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],      [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0],      [1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],      [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]]]]],  [[[[[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],      [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0],      [1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],      [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],      [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0]],     [[1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0],      [1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],      [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],      [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],      [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0]],     [[1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],      [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0],      [1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],      [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0],      [1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0]]],    [[[1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],      [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],      [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0],      [1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],      [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0]],     [[1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],      [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0],      [1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],      [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0],      [1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0]],     [[1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0],      [1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],      [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],      [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],      [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]]],    [[[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],      [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],      [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],      [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0],      [1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0]],     [[1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0],      [1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],      [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0],      [1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],      [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0]],     [[1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],      [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0],      [1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],      [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],      [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0]]],    [[[1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0],      [1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],      [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0],      [1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],      [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0]],     [[1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],      [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],      [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],      [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0],      [1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0]],     [[1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],      [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],      [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0],      [1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],      [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]]],    [[[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],      [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0],      [1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],      [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],      [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0]],     [[1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0],      [1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],      [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],      [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],      [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0]],     [[1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0],      [1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0],      [1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0, 1981.0],      [1982.0, 1983.0, 1984.0, 1985.0, 1986.0, 1987.0, 1988.0],      [1989.0, 1990.0, 1991.0, 1992.0, 1993.0, 1994.0, 1995.0]]],    [[[1996.0, 1997.0, 1998.0, 1999.0, 2000.0, 2001.0, 2002.0],      [2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0],      [2010.0, 2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0],      [2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0, 2023.0],      [2024.0, 2025.0, 2026.0, 2027.0, 2028.0, 2029.0, 2030.0]],     [[2031.0, 2032.0, 2033.0, 2034.0, 2035.0, 2036.0, 2037.0],      [2038.0, 2039.0, 2040.0, 2041.0, 2042.0, 2043.0, 2044.0],      [2045.0, 2046.0, 2047.0, 2048.0, 2049.0, 2050.0, 2051.0],      [2052.0, 2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0],      [2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0, 2065.0]],     [[2066.0, 2067.0, 2068.0, 2069.0, 2070.0, 2071.0, 2072.0],      [2073.0, 2074.0, 2075.0, 2076.0, 2077.0, 2078.0, 2079.0],      [2080.0, 2081.0, 2082.0, 2083.0, 2084.0, 2085.0, 2086.0],      [2087.0, 2088.0, 2089.0, 2090.0, 2091.0, 2092.0, 2093.0],      [2094.0, 2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]]],    [[[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0, 2107.0],      [2108.0, 2109.0, 2110.0, 2111.0, 2112.0, 2113.0, 2114.0],      [2115.0, 2116.0, 2117.0, 2118.0, 2119.0, 2120.0, 2121.0],      [2122.0, 2123.0, 2124.0, 2125.0, 2126.0, 2127.0, 2128.0],      [2129.0, 2130.0, 2131.0, 2132.0, 2133.0, 2134.0, 2135.0]],     [[2136.0, 2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0],      [2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0, 2149.0],      [2150.0, 2151.0, 2152.0, 2153.0, 2154.0, 2155.0, 2156.0],      [2157.0, 2158.0, 2159.0, 2160.0, 2161.0, 2162.0, 2163.0],      [2164.0, 2165.0, 2166.0, 2167.0, 2168.0, 2169.0, 2170.0]],     [[2171.0, 2172.0, 2173.0, 2174.0, 2175.0, 2176.0, 2177.0],      [2178.0, 2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0],      [2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0, 2191.0],      [2192.0, 2193.0, 2194.0, 2195.0, 2196.0, 2197.0, 2198.0],      [2199.0, 2200.0, 2201.0, 2202.0, 2203.0, 2204.0, 2205.0]]]],   [[[[2206.0, 2207.0, 2208.0, 2209.0, 2210.0, 2211.0, 2212.0],      [2213.0, 2214.0, 2215.0, 2216.0, 2217.0, 2218.0, 2219.0],      [2220.0, 2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0],      [2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0, 2233.0],      [2234.0, 2235.0, 2236.0, 2237.0, 2238.0, 2239.0, 2240.0]],     [[2241.0, 2242.0, 2243.0, 2244.0, 2245.0, 2246.0, 2247.0],      [2248.0, 2249.0, 2250.0, 2251.0, 2252.0, 2253.0, 2254.0],      [2255.0, 2256.0, 2257.0, 2258.0, 2259.0, 2260.0, 2261.0],      [2262.0, 2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0],      [2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0, 2275.0]],     [[2276.0, 2277.0, 2278.0, 2279.0, 2280.0, 2281.0, 2282.0],      [2283.0, 2284.0, 2285.0, 2286.0, 2287.0, 2288.0, 2289.0],      [2290.0, 2291.0, 2292.0, 2293.0, 2294.0, 2295.0, 2296.0],      [2297.0, 2298.0, 2299.0, 2300.0, 2301.0, 2302.0, 2303.0],      [2304.0, 2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]]],    [[[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0, 2317.0],      [2318.0, 2319.0, 2320.0, 2321.0, 2322.0, 2323.0, 2324.0],      [2325.0, 2326.0, 2327.0, 2328.0, 2329.0, 2330.0, 2331.0],      [2332.0, 2333.0, 2334.0, 2335.0, 2336.0, 2337.0, 2338.0],      [2339.0, 2340.0, 2341.0, 2342.0, 2343.0, 2344.0, 2345.0]],     [[2346.0, 2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0],      [2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0, 2359.0],      [2360.0, 2361.0, 2362.0, 2363.0, 2364.0, 2365.0, 2366.0],      [2367.0, 2368.0, 2369.0, 2370.0, 2371.0, 2372.0, 2373.0],      [2374.0, 2375.0, 2376.0, 2377.0, 2378.0, 2379.0, 2380.0]],     [[2381.0, 2382.0, 2383.0, 2384.0, 2385.0, 2386.0, 2387.0],      [2388.0, 2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0],      [2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0, 2401.0],      [2402.0, 2403.0, 2404.0, 2405.0, 2406.0, 2407.0, 2408.0],      [2409.0, 2410.0, 2411.0, 2412.0, 2413.0, 2414.0, 2415.0]]],    [[[2416.0, 2417.0, 2418.0, 2419.0, 2420.0, 2421.0, 2422.0],      [2423.0, 2424.0, 2425.0, 2426.0, 2427.0, 2428.0, 2429.0],      [2430.0, 2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0],      [2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0, 2443.0],      [2444.0, 2445.0, 2446.0, 2447.0, 2448.0, 2449.0, 2450.0]],     [[2451.0, 2452.0, 2453.0, 2454.0, 2455.0, 2456.0, 2457.0],      [2458.0, 2459.0, 2460.0, 2461.0, 2462.0, 2463.0, 2464.0],      [2465.0, 2466.0, 2467.0, 2468.0, 2469.0, 2470.0, 2471.0],      [2472.0, 2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0],      [2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0, 2485.0]],     [[2486.0, 2487.0, 2488.0, 2489.0, 2490.0, 2491.0, 2492.0],      [2493.0, 2494.0, 2495.0, 2496.0, 2497.0, 2498.0, 2499.0],      [2500.0, 2501.0, 2502.0, 2503.0, 2504.0, 2505.0, 2506.0],      [2507.0, 2508.0, 2509.0, 2510.0, 2511.0, 2512.0, 2513.0],      [2514.0, 2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]]],    [[[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0, 2527.0],      [2528.0, 2529.0, 2530.0, 2531.0, 2532.0, 2533.0, 2534.0],      [2535.0, 2536.0, 2537.0, 2538.0, 2539.0, 2540.0, 2541.0],      [2542.0, 2543.0, 2544.0, 2545.0, 2546.0, 2547.0, 2548.0],      [2549.0, 2550.0, 2551.0, 2552.0, 2553.0, 2554.0, 2555.0]],     [[2556.0, 2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0],      [2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0, 2569.0],      [2570.0, 2571.0, 2572.0, 2573.0, 2574.0, 2575.0, 2576.0],      [2577.0, 2578.0, 2579.0, 2580.0, 2581.0, 2582.0, 2583.0],      [2584.0, 2585.0, 2586.0, 2587.0, 2588.0, 2589.0, 2590.0]],     [[2591.0, 2592.0, 2593.0, 2594.0, 2595.0, 2596.0, 2597.0],      [2598.0, 2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0],      [2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0, 2611.0],      [2612.0, 2613.0, 2614.0, 2615.0, 2616.0, 2617.0, 2618.0],      [2619.0, 2620.0, 2621.0, 2622.0, 2623.0, 2624.0, 2625.0]]],    [[[2626.0, 2627.0, 2628.0, 2629.0, 2630.0, 2631.0, 2632.0],      [2633.0, 2634.0, 2635.0, 2636.0, 2637.0, 2638.0, 2639.0],      [2640.0, 2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0],      [2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0, 2653.0],      [2654.0, 2655.0, 2656.0, 2657.0, 2658.0, 2659.0, 2660.0]],     [[2661.0, 2662.0, 2663.0, 2664.0, 2665.0, 2666.0, 2667.0],      [2668.0, 2669.0, 2670.0, 2671.0, 2672.0, 2673.0, 2674.0],      [2675.0, 2676.0, 2677.0, 2678.0, 2679.0, 2680.0, 2681.0],      [2682.0, 2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0],      [2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0, 2695.0]],     [[2696.0, 2697.0, 2698.0, 2699.0, 2700.0, 2701.0, 2702.0],      [2703.0, 2704.0, 2705.0, 2706.0, 2707.0, 2708.0, 2709.0],      [2710.0, 2711.0, 2712.0, 2713.0, 2714.0, 2715.0, 2716.0],      [2717.0, 2718.0, 2719.0, 2720.0, 2721.0, 2722.0, 2723.0],      [2724.0, 2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]]],    [[[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0, 2737.0],      [2738.0, 2739.0, 2740.0, 2741.0, 2742.0, 2743.0, 2744.0],      [2745.0, 2746.0, 2747.0, 2748.0, 2749.0, 2750.0, 2751.0],      [2752.0, 2753.0, 2754.0, 2755.0, 2756.0, 2757.0, 2758.0],      [2759.0, 2760.0, 2761.0, 2762.0, 2763.0, 2764.0, 2765.0]],     [[2766.0, 2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0],      [2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0, 2779.0],      [2780.0, 2781.0, 2782.0, 2783.0, 2784.0, 2785.0, 2786.0],      [2787.0, 2788.0, 2789.0, 2790.0, 2791.0, 2792.0, 2793.0],      [2794.0, 2795.0, 2796.0, 2797.0, 2798.0, 2799.0, 2800.0]],     [[2801.0, 2802.0, 2803.0, 2804.0, 2805.0, 2806.0, 2807.0],      [2808.0, 2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0],      [2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0, 2821.0],      [2822.0, 2823.0, 2824.0, 2825.0, 2826.0, 2827.0, 2828.0],      [2829.0, 2830.0, 2831.0, 2832.0, 2833.0, 2834.0, 2835.0]]],    [[[2836.0, 2837.0, 2838.0, 2839.0, 2840.0, 2841.0, 2842.0],      [2843.0, 2844.0, 2845.0, 2846.0, 2847.0, 2848.0, 2849.0],      [2850.0, 2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0],      [2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0, 2863.0],      [2864.0, 2865.0, 2866.0, 2867.0, 2868.0, 2869.0, 2870.0]],     [[2871.0, 2872.0, 2873.0, 2874.0, 2875.0, 2876.0, 2877.0],      [2878.0, 2879.0, 2880.0, 2881.0, 2882.0, 2883.0, 2884.0],      [2885.0, 2886.0, 2887.0, 2888.0, 2889.0, 2890.0, 2891.0],      [2892.0, 2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0],      [2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0, 2905.0]],     [[2906.0, 2907.0, 2908.0, 2909.0, 2910.0, 2911.0, 2912.0],      [2913.0, 2914.0, 2915.0, 2916.0, 2917.0, 2918.0, 2919.0],      [2920.0, 2921.0, 2922.0, 2923.0, 2924.0, 2925.0, 2926.0],      [2927.0, 2928.0, 2929.0, 2930.0, 2931.0, 2932.0, 2933.0],      [2934.0, 2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]]]]],  [[[[[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0, 2947.0],      [2948.0, 2949.0, 2950.0, 2951.0, 2952.0, 2953.0, 2954.0],      [2955.0, 2956.0, 2957.0, 2958.0, 2959.0, 2960.0, 2961.0],      [2962.0, 2963.0, 2964.0, 2965.0, 2966.0, 2967.0, 2968.0],      [2969.0, 2970.0, 2971.0, 2972.0, 2973.0, 2974.0, 2975.0]],     [[2976.0, 2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0],      [2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0, 2989.0],      [2990.0, 2991.0, 2992.0, 2993.0, 2994.0, 2995.0, 2996.0],      [2997.0, 2998.0, 2999.0, 3000.0, 3001.0, 3002.0, 3003.0],      [3004.0, 3005.0, 3006.0, 3007.0, 3008.0, 3009.0, 3010.0]],     [[3011.0, 3012.0, 3013.0, 3014.0, 3015.0, 3016.0, 3017.0],      [3018.0, 3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0],      [3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0, 3031.0],      [3032.0, 3033.0, 3034.0, 3035.0, 3036.0, 3037.0, 3038.0],      [3039.0, 3040.0, 3041.0, 3042.0, 3043.0, 3044.0, 3045.0]]],    [[[3046.0, 3047.0, 3048.0, 3049.0, 3050.0, 3051.0, 3052.0],      [3053.0, 3054.0, 3055.0, 3056.0, 3057.0, 3058.0, 3059.0],      [3060.0, 3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0],      [3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0, 3073.0],      [3074.0, 3075.0, 3076.0, 3077.0, 3078.0, 3079.0, 3080.0]],     [[3081.0, 3082.0, 3083.0, 3084.0, 3085.0, 3086.0, 3087.0],      [3088.0, 3089.0, 3090.0, 3091.0, 3092.0, 3093.0, 3094.0],      [3095.0, 3096.0, 3097.0, 3098.0, 3099.0, 3100.0, 3101.0],      [3102.0, 3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0],      [3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0, 3115.0]],     [[3116.0, 3117.0, 3118.0, 3119.0, 3120.0, 3121.0, 3122.0],      [3123.0, 3124.0, 3125.0, 3126.0, 3127.0, 3128.0, 3129.0],      [3130.0, 3131.0, 3132.0, 3133.0, 3134.0, 3135.0, 3136.0],      [3137.0, 3138.0, 3139.0, 3140.0, 3141.0, 3142.0, 3143.0],      [3144.0, 3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]]],    [[[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0, 3157.0],      [3158.0, 3159.0, 3160.0, 3161.0, 3162.0, 3163.0, 3164.0],      [3165.0, 3166.0, 3167.0, 3168.0, 3169.0, 3170.0, 3171.0],      [3172.0, 3173.0, 3174.0, 3175.0, 3176.0, 3177.0, 3178.0],      [3179.0, 3180.0, 3181.0, 3182.0, 3183.0, 3184.0, 3185.0]],     [[3186.0, 3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0],      [3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0, 3199.0],      [3200.0, 3201.0, 3202.0, 3203.0, 3204.0, 3205.0, 3206.0],      [3207.0, 3208.0, 3209.0, 3210.0, 3211.0, 3212.0, 3213.0],      [3214.0, 3215.0, 3216.0, 3217.0, 3218.0, 3219.0, 3220.0]],     [[3221.0, 3222.0, 3223.0, 3224.0, 3225.0, 3226.0, 3227.0],      [3228.0, 3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0],      [3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0, 3241.0],      [3242.0, 3243.0, 3244.0, 3245.0, 3246.0, 3247.0, 3248.0],      [3249.0, 3250.0, 3251.0, 3252.0, 3253.0, 3254.0, 3255.0]]],    [[[3256.0, 3257.0, 3258.0, 3259.0, 3260.0, 3261.0, 3262.0],      [3263.0, 3264.0, 3265.0, 3266.0, 3267.0, 3268.0, 3269.0],      [3270.0, 3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0],      [3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0, 3283.0],      [3284.0, 3285.0, 3286.0, 3287.0, 3288.0, 3289.0, 3290.0]],     [[3291.0, 3292.0, 3293.0, 3294.0, 3295.0, 3296.0, 3297.0],      [3298.0, 3299.0, 3300.0, 3301.0, 3302.0, 3303.0, 3304.0],      [3305.0, 3306.0, 3307.0, 3308.0, 3309.0, 3310.0, 3311.0],      [3312.0, 3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0],      [3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0, 3325.0]],     [[3326.0, 3327.0, 3328.0, 3329.0, 3330.0, 3331.0, 3332.0],      [3333.0, 3334.0, 3335.0, 3336.0, 3337.0, 3338.0, 3339.0],      [3340.0, 3341.0, 3342.0, 3343.0, 3344.0, 3345.0, 3346.0],      [3347.0, 3348.0, 3349.0, 3350.0, 3351.0, 3352.0, 3353.0],      [3354.0, 3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]]],    [[[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0, 3367.0],      [3368.0, 3369.0, 3370.0, 3371.0, 3372.0, 3373.0, 3374.0],      [3375.0, 3376.0, 3377.0, 3378.0, 3379.0, 3380.0, 3381.0],      [3382.0, 3383.0, 3384.0, 3385.0, 3386.0, 3387.0, 3388.0],      [3389.0, 3390.0, 3391.0, 3392.0, 3393.0, 3394.0, 3395.0]],     [[3396.0, 3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0],      [3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0, 3409.0],      [3410.0, 3411.0, 3412.0, 3413.0, 3414.0, 3415.0, 3416.0],      [3417.0, 3418.0, 3419.0, 3420.0, 3421.0, 3422.0, 3423.0],      [3424.0, 3425.0, 3426.0, 3427.0, 3428.0, 3429.0, 3430.0]],     [[3431.0, 3432.0, 3433.0, 3434.0, 3435.0, 3436.0, 3437.0],      [3438.0, 3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0],      [3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0, 3451.0],      [3452.0, 3453.0, 3454.0, 3455.0, 3456.0, 3457.0, 3458.0],      [3459.0, 3460.0, 3461.0, 3462.0, 3463.0, 3464.0, 3465.0]]],    [[[3466.0, 3467.0, 3468.0, 3469.0, 3470.0, 3471.0, 3472.0],      [3473.0, 3474.0, 3475.0, 3476.0, 3477.0, 3478.0, 3479.0],      [3480.0, 3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0],      [3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0, 3493.0],      [3494.0, 3495.0, 3496.0, 3497.0, 3498.0, 3499.0, 3500.0]],     [[3501.0, 3502.0, 3503.0, 3504.0, 3505.0, 3506.0, 3507.0],      [3508.0, 3509.0, 3510.0, 3511.0, 3512.0, 3513.0, 3514.0],      [3515.0, 3516.0, 3517.0, 3518.0, 3519.0, 3520.0, 3521.0],      [3522.0, 3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0],      [3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0, 3535.0]],     [[3536.0, 3537.0, 3538.0, 3539.0, 3540.0, 3541.0, 3542.0],      [3543.0, 3544.0, 3545.0, 3546.0, 3547.0, 3548.0, 3549.0],      [3550.0, 3551.0, 3552.0, 3553.0, 3554.0, 3555.0, 3556.0],      [3557.0, 3558.0, 3559.0, 3560.0, 3561.0, 3562.0, 3563.0],      [3564.0, 3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]]],    [[[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0, 3577.0],      [3578.0, 3579.0, 3580.0, 3581.0, 3582.0, 3583.0, 3584.0],      [3585.0, 3586.0, 3587.0, 3588.0, 3589.0, 3590.0, 3591.0],      [3592.0, 3593.0, 3594.0, 3595.0, 3596.0, 3597.0, 3598.0],      [3599.0, 3600.0, 3601.0, 3602.0, 3603.0, 3604.0, 3605.0]],     [[3606.0, 3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0],      [3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0, 3619.0],      [3620.0, 3621.0, 3622.0, 3623.0, 3624.0, 3625.0, 3626.0],      [3627.0, 3628.0, 3629.0, 3630.0, 3631.0, 3632.0, 3633.0],      [3634.0, 3635.0, 3636.0, 3637.0, 3638.0, 3639.0, 3640.0]],     [[3641.0, 3642.0, 3643.0, 3644.0, 3645.0, 3646.0, 3647.0],      [3648.0, 3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0],      [3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0, 3661.0],      [3662.0, 3663.0, 3664.0, 3665.0, 3666.0, 3667.0, 3668.0],      [3669.0, 3670.0, 3671.0, 3672.0, 3673.0, 3674.0, 3675.0]]]],   [[[[3676.0, 3677.0, 3678.0, 3679.0, 3680.0, 3681.0, 3682.0],      [3683.0, 3684.0, 3685.0, 3686.0, 3687.0, 3688.0, 3689.0],      [3690.0, 3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0],      [3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0, 3703.0],      [3704.0, 3705.0, 3706.0, 3707.0, 3708.0, 3709.0, 3710.0]],     [[3711.0, 3712.0, 3713.0, 3714.0, 3715.0, 3716.0, 3717.0],      [3718.0, 3719.0, 3720.0, 3721.0, 3722.0, 3723.0, 3724.0],      [3725.0, 3726.0, 3727.0, 3728.0, 3729.0, 3730.0, 3731.0],      [3732.0, 3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0],      [3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0, 3745.0]],     [[3746.0, 3747.0, 3748.0, 3749.0, 3750.0, 3751.0, 3752.0],      [3753.0, 3754.0, 3755.0, 3756.0, 3757.0, 3758.0, 3759.0],      [3760.0, 3761.0, 3762.0, 3763.0, 3764.0, 3765.0, 3766.0],      [3767.0, 3768.0, 3769.0, 3770.0, 3771.0, 3772.0, 3773.0],      [3774.0, 3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]]],    [[[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0, 3787.0],      [3788.0, 3789.0, 3790.0, 3791.0, 3792.0, 3793.0, 3794.0],      [3795.0, 3796.0, 3797.0, 3798.0, 3799.0, 3800.0, 3801.0],      [3802.0, 3803.0, 3804.0, 3805.0, 3806.0, 3807.0, 3808.0],      [3809.0, 3810.0, 3811.0, 3812.0, 3813.0, 3814.0, 3815.0]],     [[3816.0, 3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0],      [3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0, 3829.0],      [3830.0, 3831.0, 3832.0, 3833.0, 3834.0, 3835.0, 3836.0],      [3837.0, 3838.0, 3839.0, 3840.0, 3841.0, 3842.0, 3843.0],      [3844.0, 3845.0, 3846.0, 3847.0, 3848.0, 3849.0, 3850.0]],     [[3851.0, 3852.0, 3853.0, 3854.0, 3855.0, 3856.0, 3857.0],      [3858.0, 3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0],      [3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0, 3871.0],      [3872.0, 3873.0, 3874.0, 3875.0, 3876.0, 3877.0, 3878.0],      [3879.0, 3880.0, 3881.0, 3882.0, 3883.0, 3884.0, 3885.0]]],    [[[3886.0, 3887.0, 3888.0, 3889.0, 3890.0, 3891.0, 3892.0],      [3893.0, 3894.0, 3895.0, 3896.0, 3897.0, 3898.0, 3899.0],      [3900.0, 3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0],      [3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0, 3913.0],      [3914.0, 3915.0, 3916.0, 3917.0, 3918.0, 3919.0, 3920.0]],     [[3921.0, 3922.0, 3923.0, 3924.0, 3925.0, 3926.0, 3927.0],      [3928.0, 3929.0, 3930.0, 3931.0, 3932.0, 3933.0, 3934.0],      [3935.0, 3936.0, 3937.0, 3938.0, 3939.0, 3940.0, 3941.0],      [3942.0, 3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0],      [3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0, 3955.0]],     [[3956.0, 3957.0, 3958.0, 3959.0, 3960.0, 3961.0, 3962.0],      [3963.0, 3964.0, 3965.0, 3966.0, 3967.0, 3968.0, 3969.0],      [3970.0, 3971.0, 3972.0, 3973.0, 3974.0, 3975.0, 3976.0],      [3977.0, 3978.0, 3979.0, 3980.0, 3981.0, 3982.0, 3983.0],      [3984.0, 3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]]],    [[[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0, 3997.0],      [3998.0, 3999.0, 4000.0, 4001.0, 4002.0, 4003.0, 4004.0],      [4005.0, 4006.0, 4007.0, 4008.0, 4009.0, 4010.0, 4011.0],      [4012.0, 4013.0, 4014.0, 4015.0, 4016.0, 4017.0, 4018.0],      [4019.0, 4020.0, 4021.0, 4022.0, 4023.0, 4024.0, 4025.0]],     [[4026.0, 4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0],      [4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0, 4039.0],      [4040.0, 4041.0, 4042.0, 4043.0, 4044.0, 4045.0, 4046.0],      [4047.0, 4048.0, 4049.0, 4050.0, 4051.0, 4052.0, 4053.0],      [4054.0, 4055.0, 4056.0, 4057.0, 4058.0, 4059.0, 4060.0]],     [[4061.0, 4062.0, 4063.0, 4064.0, 4065.0, 4066.0, 4067.0],      [4068.0, 4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0],      [4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0, 4081.0],      [4082.0, 4083.0, 4084.0, 4085.0, 4086.0, 4087.0, 4088.0],      [4089.0, 4090.0, 4091.0, 4092.0, 4093.0, 4094.0, 4095.0]]],    [[[4096.0, 4097.0, 4098.0, 4099.0, 4100.0, 4101.0, 4102.0],      [4103.0, 4104.0, 4105.0, 4106.0, 4107.0, 4108.0, 4109.0],      [4110.0, 4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0],      [4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0, 4123.0],      [4124.0, 4125.0, 4126.0, 4127.0, 4128.0, 4129.0, 4130.0]],     [[4131.0, 4132.0, 4133.0, 4134.0, 4135.0, 4136.0, 4137.0],      [4138.0, 4139.0, 4140.0, 4141.0, 4142.0, 4143.0, 4144.0],      [4145.0, 4146.0, 4147.0, 4148.0, 4149.0, 4150.0, 4151.0],      [4152.0, 4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0],      [4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0, 4165.0]],     [[4166.0, 4167.0, 4168.0, 4169.0, 4170.0, 4171.0, 4172.0],      [4173.0, 4174.0, 4175.0, 4176.0, 4177.0, 4178.0, 4179.0],      [4180.0, 4181.0, 4182.0, 4183.0, 4184.0, 4185.0, 4186.0],      [4187.0, 4188.0, 4189.0, 4190.0, 4191.0, 4192.0, 4193.0],      [4194.0, 4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]]],    [[[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0, 4207.0],      [4208.0, 4209.0, 4210.0, 4211.0, 4212.0, 4213.0, 4214.0],      [4215.0, 4216.0, 4217.0, 4218.0, 4219.0, 4220.0, 4221.0],      [4222.0, 4223.0, 4224.0, 4225.0, 4226.0, 4227.0, 4228.0],      [4229.0, 4230.0, 4231.0, 4232.0, 4233.0, 4234.0, 4235.0]],     [[4236.0, 4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0],      [4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0, 4249.0],      [4250.0, 4251.0, 4252.0, 4253.0, 4254.0, 4255.0, 4256.0],      [4257.0, 4258.0, 4259.0, 4260.0, 4261.0, 4262.0, 4263.0],      [4264.0, 4265.0, 4266.0, 4267.0, 4268.0, 4269.0, 4270.0]],     [[4271.0, 4272.0, 4273.0, 4274.0, 4275.0, 4276.0, 4277.0],      [4278.0, 4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0],      [4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0, 4291.0],      [4292.0, 4293.0, 4294.0, 4295.0, 4296.0, 4297.0, 4298.0],      [4299.0, 4300.0, 4301.0, 4302.0, 4303.0, 4304.0, 4305.0]]],    [[[4306.0, 4307.0, 4308.0, 4309.0, 4310.0, 4311.0, 4312.0],      [4313.0, 4314.0, 4315.0, 4316.0, 4317.0, 4318.0, 4319.0],      [4320.0, 4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0],      [4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0, 4333.0],      [4334.0, 4335.0, 4336.0, 4337.0, 4338.0, 4339.0, 4340.0]],     [[4341.0, 4342.0, 4343.0, 4344.0, 4345.0, 4346.0, 4347.0],      [4348.0, 4349.0, 4350.0, 4351.0, 4352.0, 4353.0, 4354.0],      [4355.0, 4356.0, 4357.0, 4358.0, 4359.0, 4360.0, 4361.0],      [4362.0, 4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0],      [4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0, 4375.0]],     [[4376.0, 4377.0, 4378.0, 4379.0, 4380.0, 4381.0, 4382.0],      [4383.0, 4384.0, 4385.0, 4386.0, 4387.0, 4388.0, 4389.0],      [4390.0, 4391.0, 4392.0, 4393.0, 4394.0, 4395.0, 4396.0],      [4397.0, 4398.0, 4399.0, 4400.0, 4401.0, 4402.0, 4403.0],      [4404.0, 4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]]]]]] shape=[3, 2, 7, 3, 5, 7], strides=[1470, 735, 105, 35, 7, 1], layout=C (0x1)), I32([2, 2, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[0, 2],  [1, 2],  [0, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 279325912 2790143389 3318244644 2032839674 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0, 3.0, 4.0],       [5.0, 6.0, 7.0, 8.0],       [9.0, 10.0, 11.0, 12.0],       [13.0, 14.0, 15.0, 16.0],       [17.0, 18.0, 19.0, 20.0],       [21.0, 22.0, 23.0, 24.0]],      [[25.0, 26.0, 27.0, 28.0],       [29.0, 30.0, 31.0, 32.0],       [33.0, 34.0, 35.0, 36.0],       [37.0, 38.0, 39.0, 40.0],       [41.0, 42.0, 43.0, 44.0],       [45.0, 46.0, 47.0, 48.0]],      [[49.0, 50.0, 51.0, 52.0],       [53.0, 54.0, 55.0, 56.0],       [57.0, 58.0, 59.0, 60.0],       [61.0, 62.0, 63.0, 64.0],       [65.0, 66.0, 67.0, 68.0],       [69.0, 70.0, 71.0, 72.0]],      [[73.0, 74.0, 75.0, 76.0],       [77.0, 78.0, 79.0, 80.0],       [81.0, 82.0, 83.0, 84.0],       [85.0, 86.0, 87.0, 88.0],       [89.0, 90.0, 91.0, 92.0],       [93.0, 94.0, 95.0, 96.0]]],     [[[97.0, 98.0, 99.0, 100.0],       [101.0, 102.0, 103.0, 104.0],       [105.0, 106.0, 107.0, 108.0],       [109.0, 110.0, 111.0, 112.0],       [113.0, 114.0, 115.0, 116.0],       [117.0, 118.0, 119.0, 120.0]],      [[121.0, 122.0, 123.0, 124.0],       [125.0, 126.0, 127.0, 128.0],       [129.0, 130.0, 131.0, 132.0],       [133.0, 134.0, 135.0, 136.0],       [137.0, 138.0, 139.0, 140.0],       [141.0, 142.0, 143.0, 144.0]],      [[145.0, 146.0, 147.0, 148.0],       [149.0, 150.0, 151.0, 152.0],       [153.0, 154.0, 155.0, 156.0],       [157.0, 158.0, 159.0, 160.0],       [161.0, 162.0, 163.0, 164.0],       [165.0, 166.0, 167.0, 168.0]],      [[169.0, 170.0, 171.0, 172.0],       [173.0, 174.0, 175.0, 176.0],       [177.0, 178.0, 179.0, 180.0],       [181.0, 182.0, 183.0, 184.0],       [185.0, 186.0, 187.0, 188.0],       [189.0, 190.0, 191.0, 192.0]]]],    [[[[193.0, 194.0, 195.0, 196.0],       [197.0, 198.0, 199.0, 200.0],       [201.0, 202.0, 203.0, 204.0],       [205.0, 206.0, 207.0, 208.0],       [209.0, 210.0, 211.0, 212.0],       [213.0, 214.0, 215.0, 216.0]],      [[217.0, 218.0, 219.0, 220.0],       [221.0, 222.0, 223.0, 224.0],       [225.0, 226.0, 227.0, 228.0],       [229.0, 230.0, 231.0, 232.0],       [233.0, 234.0, 235.0, 236.0],       [237.0, 238.0, 239.0, 240.0]],      [[241.0, 242.0, 243.0, 244.0],       [245.0, 246.0, 247.0, 248.0],       [249.0, 250.0, 251.0, 252.0],       [253.0, 254.0, 255.0, 256.0],       [257.0, 258.0, 259.0, 260.0],       [261.0, 262.0, 263.0, 264.0]],      [[265.0, 266.0, 267.0, 268.0],       [269.0, 270.0, 271.0, 272.0],       [273.0, 274.0, 275.0, 276.0],       [277.0, 278.0, 279.0, 280.0],       [281.0, 282.0, 283.0, 284.0],       [285.0, 286.0, 287.0, 288.0]]],     [[[289.0, 290.0, 291.0, 292.0],       [293.0, 294.0, 295.0, 296.0],       [297.0, 298.0, 299.0, 300.0],       [301.0, 302.0, 303.0, 304.0],       [305.0, 306.0, 307.0, 308.0],       [309.0, 310.0, 311.0, 312.0]],      [[313.0, 314.0, 315.0, 316.0],       [317.0, 318.0, 319.0, 320.0],       [321.0, 322.0, 323.0, 324.0],       [325.0, 326.0, 327.0, 328.0],       [329.0, 330.0, 331.0, 332.0],       [333.0, 334.0, 335.0, 336.0]],      [[337.0, 338.0, 339.0, 340.0],       [341.0, 342.0, 343.0, 344.0],       [345.0, 346.0, 347.0, 348.0],       [349.0, 350.0, 351.0, 352.0],       [353.0, 354.0, 355.0, 356.0],       [357.0, 358.0, 359.0, 360.0]],      [[361.0, 362.0, 363.0, 364.0],       [365.0, 366.0, 367.0, 368.0],       [369.0, 370.0, 371.0, 372.0],       [373.0, 374.0, 375.0, 376.0],       [377.0, 378.0, 379.0, 380.0],       [381.0, 382.0, 383.0, 384.0]]]],    [[[[385.0, 386.0, 387.0, 388.0],       [389.0, 390.0, 391.0, 392.0],       [393.0, 394.0, 395.0, 396.0],       [397.0, 398.0, 399.0, 400.0],       [401.0, 402.0, 403.0, 404.0],       [405.0, 406.0, 407.0, 408.0]],      [[409.0, 410.0, 411.0, 412.0],       [413.0, 414.0, 415.0, 416.0],       [417.0, 418.0, 419.0, 420.0],       [421.0, 422.0, 423.0, 424.0],       [425.0, 426.0, 427.0, 428.0],       [429.0, 430.0, 431.0, 432.0]],      [[433.0, 434.0, 435.0, 436.0],       [437.0, 438.0, 439.0, 440.0],       [441.0, 442.0, 443.0, 444.0],       [445.0, 446.0, 447.0, 448.0],       [449.0, 450.0, 451.0, 452.0],       [453.0, 454.0, 455.0, 456.0]],      [[457.0, 458.0, 459.0, 460.0],       [461.0, 462.0, 463.0, 464.0],       [465.0, 466.0, 467.0, 468.0],       [469.0, 470.0, 471.0, 472.0],       [473.0, 474.0, 475.0, 476.0],       [477.0, 478.0, 479.0, 480.0]]],     [[[481.0, 482.0, 483.0, 484.0],       [485.0, 486.0, 487.0, 488.0],       [489.0, 490.0, 491.0, 492.0],       [493.0, 494.0, 495.0, 496.0],       [497.0, 498.0, 499.0, 500.0],       [501.0, 502.0, 503.0, 504.0]],      [[505.0, 506.0, 507.0, 508.0],       [509.0, 510.0, 511.0, 512.0],       [513.0, 514.0, 515.0, 516.0],       [517.0, 518.0, 519.0, 520.0],       [521.0, 522.0, 523.0, 524.0],       [525.0, 526.0, 527.0, 528.0]],      [[529.0, 530.0, 531.0, 532.0],       [533.0, 534.0, 535.0, 536.0],       [537.0, 538.0, 539.0, 540.0],       [541.0, 542.0, 543.0, 544.0],       [545.0, 546.0, 547.0, 548.0],       [549.0, 550.0, 551.0, 552.0]],      [[553.0, 554.0, 555.0, 556.0],       [557.0, 558.0, 559.0, 560.0],       [561.0, 562.0, 563.0, 564.0],       [565.0, 566.0, 567.0, 568.0],       [569.0, 570.0, 571.0, 572.0],       [573.0, 574.0, 575.0, 576.0]]]],    [[[[577.0, 578.0, 579.0, 580.0],       [581.0, 582.0, 583.0, 584.0],       [585.0, 586.0, 587.0, 588.0],       [589.0, 590.0, 591.0, 592.0],       [593.0, 594.0, 595.0, 596.0],       [597.0, 598.0, 599.0, 600.0]],      [[601.0, 602.0, 603.0, 604.0],       [605.0, 606.0, 607.0, 608.0],       [609.0, 610.0, 611.0, 612.0],       [613.0, 614.0, 615.0, 616.0],       [617.0, 618.0, 619.0, 620.0],       [621.0, 622.0, 623.0, 624.0]],      [[625.0, 626.0, 627.0, 628.0],       [629.0, 630.0, 631.0, 632.0],       [633.0, 634.0, 635.0, 636.0],       [637.0, 638.0, 639.0, 640.0],       [641.0, 642.0, 643.0, 644.0],       [645.0, 646.0, 647.0, 648.0]],      [[649.0, 650.0, 651.0, 652.0],       [653.0, 654.0, 655.0, 656.0],       [657.0, 658.0, 659.0, 660.0],       [661.0, 662.0, 663.0, 664.0],       [665.0, 666.0, 667.0, 668.0],       [669.0, 670.0, 671.0, 672.0]]],     [[[673.0, 674.0, 675.0, 676.0],       [677.0, 678.0, 679.0, 680.0],       [681.0, 682.0, 683.0, 684.0],       [685.0, 686.0, 687.0, 688.0],       [689.0, 690.0, 691.0, 692.0],       [693.0, 694.0, 695.0, 696.0]],      [[697.0, 698.0, 699.0, 700.0],       [701.0, 702.0, 703.0, 704.0],       [705.0, 706.0, 707.0, 708.0],       [709.0, 710.0, 711.0, 712.0],       [713.0, 714.0, 715.0, 716.0],       [717.0, 718.0, 719.0, 720.0]],      [[721.0, 722.0, 723.0, 724.0],       [725.0, 726.0, 727.0, 728.0],       [729.0, 730.0, 731.0, 732.0],       [733.0, 734.0, 735.0, 736.0],       [737.0, 738.0, 739.0, 740.0],       [741.0, 742.0, 743.0, 744.0]],      [[745.0, 746.0, 747.0, 748.0],       [749.0, 750.0, 751.0, 752.0],       [753.0, 754.0, 755.0, 756.0],       [757.0, 758.0, 759.0, 760.0],       [761.0, 762.0, 763.0, 764.0],       [765.0, 766.0, 767.0, 768.0]]]],    [[[[769.0, 770.0, 771.0, 772.0],       [773.0, 774.0, 775.0, 776.0],       [777.0, 778.0, 779.0, 780.0],       [781.0, 782.0, 783.0, 784.0],       [785.0, 786.0, 787.0, 788.0],       [789.0, 790.0, 791.0, 792.0]],      [[793.0, 794.0, 795.0, 796.0],       [797.0, 798.0, 799.0, 800.0],       [801.0, 802.0, 803.0, 804.0],       [805.0, 806.0, 807.0, 808.0],       [809.0, 810.0, 811.0, 812.0],       [813.0, 814.0, 815.0, 816.0]],      [[817.0, 818.0, 819.0, 820.0],       [821.0, 822.0, 823.0, 824.0],       [825.0, 826.0, 827.0, 828.0],       [829.0, 830.0, 831.0, 832.0],       [833.0, 834.0, 835.0, 836.0],       [837.0, 838.0, 839.0, 840.0]],      [[841.0, 842.0, 843.0, 844.0],       [845.0, 846.0, 847.0, 848.0],       [849.0, 850.0, 851.0, 852.0],       [853.0, 854.0, 855.0, 856.0],       [857.0, 858.0, 859.0, 860.0],       [861.0, 862.0, 863.0, 864.0]]],     [[[865.0, 866.0, 867.0, 868.0],       [869.0, 870.0, 871.0, 872.0],       [873.0, 874.0, 875.0, 876.0],       [877.0, 878.0, 879.0, 880.0],       [881.0, 882.0, 883.0, 884.0],       [885.0, 886.0, 887.0, 888.0]],      [[889.0, 890.0, 891.0, 892.0],       [893.0, 894.0, 895.0, 896.0],       [897.0, 898.0, 899.0, 900.0],       [901.0, 902.0, 903.0, 904.0],       [905.0, 906.0, 907.0, 908.0],       [909.0, 910.0, 911.0, 912.0]],      [[913.0, 914.0, 915.0, 916.0],       [917.0, 918.0, 919.0, 920.0],       [921.0, 922.0, 923.0, 924.0],       [925.0, 926.0, 927.0, 928.0],       [929.0, 930.0, 931.0, 932.0],       [933.0, 934.0, 935.0, 936.0]],      [[937.0, 938.0, 939.0, 940.0],       [941.0, 942.0, 943.0, 944.0],       [945.0, 946.0, 947.0, 948.0],       [949.0, 950.0, 951.0, 952.0],       [953.0, 954.0, 955.0, 956.0],       [957.0, 958.0, 959.0, 960.0]]]],    [[[[961.0, 962.0, 963.0, 964.0],       [965.0, 966.0, 967.0, 968.0],       [969.0, 970.0, 971.0, 972.0],       [973.0, 974.0, 975.0, 976.0],       [977.0, 978.0, 979.0, 980.0],       [981.0, 982.0, 983.0, 984.0]],      [[985.0, 986.0, 987.0, 988.0],       [989.0, 990.0, 991.0, 992.0],       [993.0, 994.0, 995.0, 996.0],       [997.0, 998.0, 999.0, 1000.0],       [1001.0, 1002.0, 1003.0, 1004.0],       [1005.0, 1006.0, 1007.0, 1008.0]],      [[1009.0, 1010.0, 1011.0, 1012.0],       [1013.0, 1014.0, 1015.0, 1016.0],       [1017.0, 1018.0, 1019.0, 1020.0],       [1021.0, 1022.0, 1023.0, 1024.0],       [1025.0, 1026.0, 1027.0, 1028.0],       [1029.0, 1030.0, 1031.0, 1032.0]],      [[1033.0, 1034.0, 1035.0, 1036.0],       [1037.0, 1038.0, 1039.0, 1040.0],       [1041.0, 1042.0, 1043.0, 1044.0],       [1045.0, 1046.0, 1047.0, 1048.0],       [1049.0, 1050.0, 1051.0, 1052.0],       [1053.0, 1054.0, 1055.0, 1056.0]]],     [[[1057.0, 1058.0, 1059.0, 1060.0],       [1061.0, 1062.0, 1063.0, 1064.0],       [1065.0, 1066.0, 1067.0, 1068.0],       [1069.0, 1070.0, 1071.0, 1072.0],       [1073.0, 1074.0, 1075.0, 1076.0],       [1077.0, 1078.0, 1079.0, 1080.0]],      [[1081.0, 1082.0, 1083.0, 1084.0],       [1085.0, 1086.0, 1087.0, 1088.0],       [1089.0, 1090.0, 1091.0, 1092.0],       [1093.0, 1094.0, 1095.0, 1096.0],       [1097.0, 1098.0, 1099.0, 1100.0],       [1101.0, 1102.0, 1103.0, 1104.0]],      [[1105.0, 1106.0, 1107.0, 1108.0],       [1109.0, 1110.0, 1111.0, 1112.0],       [1113.0, 1114.0, 1115.0, 1116.0],       [1117.0, 1118.0, 1119.0, 1120.0],       [1121.0, 1122.0, 1123.0, 1124.0],       [1125.0, 1126.0, 1127.0, 1128.0]],      [[1129.0, 1130.0, 1131.0, 1132.0],       [1133.0, 1134.0, 1135.0, 1136.0],       [1137.0, 1138.0, 1139.0, 1140.0],       [1141.0, 1142.0, 1143.0, 1144.0],       [1145.0, 1146.0, 1147.0, 1148.0],       [1149.0, 1150.0, 1151.0, 1152.0]]]]],   [[[[[1153.0, 1154.0, 1155.0, 1156.0],       [1157.0, 1158.0, 1159.0, 1160.0],       [1161.0, 1162.0, 1163.0, 1164.0],       [1165.0, 1166.0, 1167.0, 1168.0],       [1169.0, 1170.0, 1171.0, 1172.0],       [1173.0, 1174.0, 1175.0, 1176.0]],      [[1177.0, 1178.0, 1179.0, 1180.0],       [1181.0, 1182.0, 1183.0, 1184.0],       [1185.0, 1186.0, 1187.0, 1188.0],       [1189.0, 1190.0, 1191.0, 1192.0],       [1193.0, 1194.0, 1195.0, 1196.0],       [1197.0, 1198.0, 1199.0, 1200.0]],      [[1201.0, 1202.0, 1203.0, 1204.0],       [1205.0, 1206.0, 1207.0, 1208.0],       [1209.0, 1210.0, 1211.0, 1212.0],       [1213.0, 1214.0, 1215.0, 1216.0],       [1217.0, 1218.0, 1219.0, 1220.0],       [1221.0, 1222.0, 1223.0, 1224.0]],      [[1225.0, 1226.0, 1227.0, 1228.0],       [1229.0, 1230.0, 1231.0, 1232.0],       [1233.0, 1234.0, 1235.0, 1236.0],       [1237.0, 1238.0, 1239.0, 1240.0],       [1241.0, 1242.0, 1243.0, 1244.0],       [1245.0, 1246.0, 1247.0, 1248.0]]],     [[[1249.0, 1250.0, 1251.0, 1252.0],       [1253.0, 1254.0, 1255.0, 1256.0],       [1257.0, 1258.0, 1259.0, 1260.0],       [1261.0, 1262.0, 1263.0, 1264.0],       [1265.0, 1266.0, 1267.0, 1268.0],       [1269.0, 1270.0, 1271.0, 1272.0]],      [[1273.0, 1274.0, 1275.0, 1276.0],       [1277.0, 1278.0, 1279.0, 1280.0],       [1281.0, 1282.0, 1283.0, 1284.0],       [1285.0, 1286.0, 1287.0, 1288.0],       [1289.0, 1290.0, 1291.0, 1292.0],       [1293.0, 1294.0, 1295.0, 1296.0]],      [[1297.0, 1298.0, 1299.0, 1300.0],       [1301.0, 1302.0, 1303.0, 1304.0],       [1305.0, 1306.0, 1307.0, 1308.0],       [1309.0, 1310.0, 1311.0, 1312.0],       [1313.0, 1314.0, 1315.0, 1316.0],       [1317.0, 1318.0, 1319.0, 1320.0]],      [[1321.0, 1322.0, 1323.0, 1324.0],       [1325.0, 1326.0, 1327.0, 1328.0],       [1329.0, 1330.0, 1331.0, 1332.0],       [1333.0, 1334.0, 1335.0, 1336.0],       [1337.0, 1338.0, 1339.0, 1340.0],       [1341.0, 1342.0, 1343.0, 1344.0]]]],    [[[[1345.0, 1346.0, 1347.0, 1348.0],       [1349.0, 1350.0, 1351.0, 1352.0],       [1353.0, 1354.0, 1355.0, 1356.0],       [1357.0, 1358.0, 1359.0, 1360.0],       [1361.0, 1362.0, 1363.0, 1364.0],       [1365.0, 1366.0, 1367.0, 1368.0]],      [[1369.0, 1370.0, 1371.0, 1372.0],       [1373.0, 1374.0, 1375.0, 1376.0],       [1377.0, 1378.0, 1379.0, 1380.0],       [1381.0, 1382.0, 1383.0, 1384.0],       [1385.0, 1386.0, 1387.0, 1388.0],       [1389.0, 1390.0, 1391.0, 1392.0]],      [[1393.0, 1394.0, 1395.0, 1396.0],       [1397.0, 1398.0, 1399.0, 1400.0],       [1401.0, 1402.0, 1403.0, 1404.0],       [1405.0, 1406.0, 1407.0, 1408.0],       [1409.0, 1410.0, 1411.0, 1412.0],       [1413.0, 1414.0, 1415.0, 1416.0]],      [[1417.0, 1418.0, 1419.0, 1420.0],       [1421.0, 1422.0, 1423.0, 1424.0],       [1425.0, 1426.0, 1427.0, 1428.0],       [1429.0, 1430.0, 1431.0, 1432.0],       [1433.0, 1434.0, 1435.0, 1436.0],       [1437.0, 1438.0, 1439.0, 1440.0]]],     [[[1441.0, 1442.0, 1443.0, 1444.0],       [1445.0, 1446.0, 1447.0, 1448.0],       [1449.0, 1450.0, 1451.0, 1452.0],       [1453.0, 1454.0, 1455.0, 1456.0],       [1457.0, 1458.0, 1459.0, 1460.0],       [1461.0, 1462.0, 1463.0, 1464.0]],      [[1465.0, 1466.0, 1467.0, 1468.0],       [1469.0, 1470.0, 1471.0, 1472.0],       [1473.0, 1474.0, 1475.0, 1476.0],       [1477.0, 1478.0, 1479.0, 1480.0],       [1481.0, 1482.0, 1483.0, 1484.0],       [1485.0, 1486.0, 1487.0, 1488.0]],      [[1489.0, 1490.0, 1491.0, 1492.0],       [1493.0, 1494.0, 1495.0, 1496.0],       [1497.0, 1498.0, 1499.0, 1500.0],       [1501.0, 1502.0, 1503.0, 1504.0],       [1505.0, 1506.0, 1507.0, 1508.0],       [1509.0, 1510.0, 1511.0, 1512.0]],      [[1513.0, 1514.0, 1515.0, 1516.0],       [1517.0, 1518.0, 1519.0, 1520.0],       [1521.0, 1522.0, 1523.0, 1524.0],       [1525.0, 1526.0, 1527.0, 1528.0],       [1529.0, 1530.0, 1531.0, 1532.0],       [1533.0, 1534.0, 1535.0, 1536.0]]]],    [[[[1537.0, 1538.0, 1539.0, 1540.0],       [1541.0, 1542.0, 1543.0, 1544.0],       [1545.0, 1546.0, 1547.0, 1548.0],       [1549.0, 1550.0, 1551.0, 1552.0],       [1553.0, 1554.0, 1555.0, 1556.0],       [1557.0, 1558.0, 1559.0, 1560.0]],      [[1561.0, 1562.0, 1563.0, 1564.0],       [1565.0, 1566.0, 1567.0, 1568.0],       [1569.0, 1570.0, 1571.0, 1572.0],       [1573.0, 1574.0, 1575.0, 1576.0],       [1577.0, 1578.0, 1579.0, 1580.0],       [1581.0, 1582.0, 1583.0, 1584.0]],      [[1585.0, 1586.0, 1587.0, 1588.0],       [1589.0, 1590.0, 1591.0, 1592.0],       [1593.0, 1594.0, 1595.0, 1596.0],       [1597.0, 1598.0, 1599.0, 1600.0],       [1601.0, 1602.0, 1603.0, 1604.0],       [1605.0, 1606.0, 1607.0, 1608.0]],      [[1609.0, 1610.0, 1611.0, 1612.0],       [1613.0, 1614.0, 1615.0, 1616.0],       [1617.0, 1618.0, 1619.0, 1620.0],       [1621.0, 1622.0, 1623.0, 1624.0],       [1625.0, 1626.0, 1627.0, 1628.0],       [1629.0, 1630.0, 1631.0, 1632.0]]],     [[[1633.0, 1634.0, 1635.0, 1636.0],       [1637.0, 1638.0, 1639.0, 1640.0],       [1641.0, 1642.0, 1643.0, 1644.0],       [1645.0, 1646.0, 1647.0, 1648.0],       [1649.0, 1650.0, 1651.0, 1652.0],       [1653.0, 1654.0, 1655.0, 1656.0]],      [[1657.0, 1658.0, 1659.0, 1660.0],       [1661.0, 1662.0, 1663.0, 1664.0],       [1665.0, 1666.0, 1667.0, 1668.0],       [1669.0, 1670.0, 1671.0, 1672.0],       [1673.0, 1674.0, 1675.0, 1676.0],       [1677.0, 1678.0, 1679.0, 1680.0]],      [[1681.0, 1682.0, 1683.0, 1684.0],       [1685.0, 1686.0, 1687.0, 1688.0],       [1689.0, 1690.0, 1691.0, 1692.0],       [1693.0, 1694.0, 1695.0, 1696.0],       [1697.0, 1698.0, 1699.0, 1700.0],       [1701.0, 1702.0, 1703.0, 1704.0]],      [[1705.0, 1706.0, 1707.0, 1708.0],       [1709.0, 1710.0, 1711.0, 1712.0],       [1713.0, 1714.0, 1715.0, 1716.0],       [1717.0, 1718.0, 1719.0, 1720.0],       [1721.0, 1722.0, 1723.0, 1724.0],       [1725.0, 1726.0, 1727.0, 1728.0]]]],    [[[[1729.0, 1730.0, 1731.0, 1732.0],       [1733.0, 1734.0, 1735.0, 1736.0],       [1737.0, 1738.0, 1739.0, 1740.0],       [1741.0, 1742.0, 1743.0, 1744.0],       [1745.0, 1746.0, 1747.0, 1748.0],       [1749.0, 1750.0, 1751.0, 1752.0]],      [[1753.0, 1754.0, 1755.0, 1756.0],       [1757.0, 1758.0, 1759.0, 1760.0],       [1761.0, 1762.0, 1763.0, 1764.0],       [1765.0, 1766.0, 1767.0, 1768.0],       [1769.0, 1770.0, 1771.0, 1772.0],       [1773.0, 1774.0, 1775.0, 1776.0]],      [[1777.0, 1778.0, 1779.0, 1780.0],       [1781.0, 1782.0, 1783.0, 1784.0],       [1785.0, 1786.0, 1787.0, 1788.0],       [1789.0, 1790.0, 1791.0, 1792.0],       [1793.0, 1794.0, 1795.0, 1796.0],       [1797.0, 1798.0, 1799.0, 1800.0]],      [[1801.0, 1802.0, 1803.0, 1804.0],       [1805.0, 1806.0, 1807.0, 1808.0],       [1809.0, 1810.0, 1811.0, 1812.0],       [1813.0, 1814.0, 1815.0, 1816.0],       [1817.0, 1818.0, 1819.0, 1820.0],       [1821.0, 1822.0, 1823.0, 1824.0]]],     [[[1825.0, 1826.0, 1827.0, 1828.0],       [1829.0, 1830.0, 1831.0, 1832.0],       [1833.0, 1834.0, 1835.0, 1836.0],       [1837.0, 1838.0, 1839.0, 1840.0],       [1841.0, 1842.0, 1843.0, 1844.0],       [1845.0, 1846.0, 1847.0, 1848.0]],      [[1849.0, 1850.0, 1851.0, 1852.0],       [1853.0, 1854.0, 1855.0, 1856.0],       [1857.0, 1858.0, 1859.0, 1860.0],       [1861.0, 1862.0, 1863.0, 1864.0],       [1865.0, 1866.0, 1867.0, 1868.0],       [1869.0, 1870.0, 1871.0, 1872.0]],      [[1873.0, 1874.0, 1875.0, 1876.0],       [1877.0, 1878.0, 1879.0, 1880.0],       [1881.0, 1882.0, 1883.0, 1884.0],       [1885.0, 1886.0, 1887.0, 1888.0],       [1889.0, 1890.0, 1891.0, 1892.0],       [1893.0, 1894.0, 1895.0, 1896.0]],      [[1897.0, 1898.0, 1899.0, 1900.0],       [1901.0, 1902.0, 1903.0, 1904.0],       [1905.0, 1906.0, 1907.0, 1908.0],       [1909.0, 1910.0, 1911.0, 1912.0],       [1913.0, 1914.0, 1915.0, 1916.0],       [1917.0, 1918.0, 1919.0, 1920.0]]]],    [[[[1921.0, 1922.0, 1923.0, 1924.0],       [1925.0, 1926.0, 1927.0, 1928.0],       [1929.0, 1930.0, 1931.0, 1932.0],       [1933.0, 1934.0, 1935.0, 1936.0],       [1937.0, 1938.0, 1939.0, 1940.0],       [1941.0, 1942.0, 1943.0, 1944.0]],      [[1945.0, 1946.0, 1947.0, 1948.0],       [1949.0, 1950.0, 1951.0, 1952.0],       [1953.0, 1954.0, 1955.0, 1956.0],       [1957.0, 1958.0, 1959.0, 1960.0],       [1961.0, 1962.0, 1963.0, 1964.0],       [1965.0, 1966.0, 1967.0, 1968.0]],      [[1969.0, 1970.0, 1971.0, 1972.0],       [1973.0, 1974.0, 1975.0, 1976.0],       [1977.0, 1978.0, 1979.0, 1980.0],       [1981.0, 1982.0, 1983.0, 1984.0],       [1985.0, 1986.0, 1987.0, 1988.0],       [1989.0, 1990.0, 1991.0, 1992.0]],      [[1993.0, 1994.0, 1995.0, 1996.0],       [1997.0, 1998.0, 1999.0, 2000.0],       [2001.0, 2002.0, 2003.0, 2004.0],       [2005.0, 2006.0, 2007.0, 2008.0],       [2009.0, 2010.0, 2011.0, 2012.0],       [2013.0, 2014.0, 2015.0, 2016.0]]],     [[[2017.0, 2018.0, 2019.0, 2020.0],       [2021.0, 2022.0, 2023.0, 2024.0],       [2025.0, 2026.0, 2027.0, 2028.0],       [2029.0, 2030.0, 2031.0, 2032.0],       [2033.0, 2034.0, 2035.0, 2036.0],       [2037.0, 2038.0, 2039.0, 2040.0]],      [[2041.0, 2042.0, 2043.0, 2044.0],       [2045.0, 2046.0, 2047.0, 2048.0],       [2049.0, 2050.0, 2051.0, 2052.0],       [2053.0, 2054.0, 2055.0, 2056.0],       [2057.0, 2058.0, 2059.0, 2060.0],       [2061.0, 2062.0, 2063.0, 2064.0]],      [[2065.0, 2066.0, 2067.0, 2068.0],       [2069.0, 2070.0, 2071.0, 2072.0],       [2073.0, 2074.0, 2075.0, 2076.0],       [2077.0, 2078.0, 2079.0, 2080.0],       [2081.0, 2082.0, 2083.0, 2084.0],       [2085.0, 2086.0, 2087.0, 2088.0]],      [[2089.0, 2090.0, 2091.0, 2092.0],       [2093.0, 2094.0, 2095.0, 2096.0],       [2097.0, 2098.0, 2099.0, 2100.0],       [2101.0, 2102.0, 2103.0, 2104.0],       [2105.0, 2106.0, 2107.0, 2108.0],       [2109.0, 2110.0, 2111.0, 2112.0]]]],    [[[[2113.0, 2114.0, 2115.0, 2116.0],       [2117.0, 2118.0, 2119.0, 2120.0],       [2121.0, 2122.0, 2123.0, 2124.0],       [2125.0, 2126.0, 2127.0, 2128.0],       [2129.0, 2130.0, 2131.0, 2132.0],       [2133.0, 2134.0, 2135.0, 2136.0]],      [[2137.0, 2138.0, 2139.0, 2140.0],       [2141.0, 2142.0, 2143.0, 2144.0],       [2145.0, 2146.0, 2147.0, 2148.0],       [2149.0, 2150.0, 2151.0, 2152.0],       [2153.0, 2154.0, 2155.0, 2156.0],       [2157.0, 2158.0, 2159.0, 2160.0]],      [[2161.0, 2162.0, 2163.0, 2164.0],       [2165.0, 2166.0, 2167.0, 2168.0],       [2169.0, 2170.0, 2171.0, 2172.0],       [2173.0, 2174.0, 2175.0, 2176.0],       [2177.0, 2178.0, 2179.0, 2180.0],       [2181.0, 2182.0, 2183.0, 2184.0]],      [[2185.0, 2186.0, 2187.0, 2188.0],       [2189.0, 2190.0, 2191.0, 2192.0],       [2193.0, 2194.0, 2195.0, 2196.0],       [2197.0, 2198.0, 2199.0, 2200.0],       [2201.0, 2202.0, 2203.0, 2204.0],       [2205.0, 2206.0, 2207.0, 2208.0]]],     [[[2209.0, 2210.0, 2211.0, 2212.0],       [2213.0, 2214.0, 2215.0, 2216.0],       [2217.0, 2218.0, 2219.0, 2220.0],       [2221.0, 2222.0, 2223.0, 2224.0],       [2225.0, 2226.0, 2227.0, 2228.0],       [2229.0, 2230.0, 2231.0, 2232.0]],      [[2233.0, 2234.0, 2235.0, 2236.0],       [2237.0, 2238.0, 2239.0, 2240.0],       [2241.0, 2242.0, 2243.0, 2244.0],       [2245.0, 2246.0, 2247.0, 2248.0],       [2249.0, 2250.0, 2251.0, 2252.0],       [2253.0, 2254.0, 2255.0, 2256.0]],      [[2257.0, 2258.0, 2259.0, 2260.0],       [2261.0, 2262.0, 2263.0, 2264.0],       [2265.0, 2266.0, 2267.0, 2268.0],       [2269.0, 2270.0, 2271.0, 2272.0],       [2273.0, 2274.0, 2275.0, 2276.0],       [2277.0, 2278.0, 2279.0, 2280.0]],      [[2281.0, 2282.0, 2283.0, 2284.0],       [2285.0, 2286.0, 2287.0, 2288.0],       [2289.0, 2290.0, 2291.0, 2292.0],       [2293.0, 2294.0, 2295.0, 2296.0],       [2297.0, 2298.0, 2299.0, 2300.0],       [2301.0, 2302.0, 2303.0, 2304.0]]]]],   [[[[[2305.0, 2306.0, 2307.0, 2308.0],       [2309.0, 2310.0, 2311.0, 2312.0],       [2313.0, 2314.0, 2315.0, 2316.0],       [2317.0, 2318.0, 2319.0, 2320.0],       [2321.0, 2322.0, 2323.0, 2324.0],       [2325.0, 2326.0, 2327.0, 2328.0]],      [[2329.0, 2330.0, 2331.0, 2332.0],       [2333.0, 2334.0, 2335.0, 2336.0],       [2337.0, 2338.0, 2339.0, 2340.0],       [2341.0, 2342.0, 2343.0, 2344.0],       [2345.0, 2346.0, 2347.0, 2348.0],       [2349.0, 2350.0, 2351.0, 2352.0]],      [[2353.0, 2354.0, 2355.0, 2356.0],       [2357.0, 2358.0, 2359.0, 2360.0],       [2361.0, 2362.0, 2363.0, 2364.0],       [2365.0, 2366.0, 2367.0, 2368.0],       [2369.0, 2370.0, 2371.0, 2372.0],       [2373.0, 2374.0, 2375.0, 2376.0]],      [[2377.0, 2378.0, 2379.0, 2380.0],       [2381.0, 2382.0, 2383.0, 2384.0],       [2385.0, 2386.0, 2387.0, 2388.0],       [2389.0, 2390.0, 2391.0, 2392.0],       [2393.0, 2394.0, 2395.0, 2396.0],       [2397.0, 2398.0, 2399.0, 2400.0]]],     [[[2401.0, 2402.0, 2403.0, 2404.0],       [2405.0, 2406.0, 2407.0, 2408.0],       [2409.0, 2410.0, 2411.0, 2412.0],       [2413.0, 2414.0, 2415.0, 2416.0],       [2417.0, 2418.0, 2419.0, 2420.0],       [2421.0, 2422.0, 2423.0, 2424.0]],      [[2425.0, 2426.0, 2427.0, 2428.0],       [2429.0, 2430.0, 2431.0, 2432.0],       [2433.0, 2434.0, 2435.0, 2436.0],       [2437.0, 2438.0, 2439.0, 2440.0],       [2441.0, 2442.0, 2443.0, 2444.0],       [2445.0, 2446.0, 2447.0, 2448.0]],      [[2449.0, 2450.0, 2451.0, 2452.0],       [2453.0, 2454.0, 2455.0, 2456.0],       [2457.0, 2458.0, 2459.0, 2460.0],       [2461.0, 2462.0, 2463.0, 2464.0],       [2465.0, 2466.0, 2467.0, 2468.0],       [2469.0, 2470.0, 2471.0, 2472.0]],      [[2473.0, 2474.0, 2475.0, 2476.0],       [2477.0, 2478.0, 2479.0, 2480.0],       [2481.0, 2482.0, 2483.0, 2484.0],       [2485.0, 2486.0, 2487.0, 2488.0],       [2489.0, 2490.0, 2491.0, 2492.0],       [2493.0, 2494.0, 2495.0, 2496.0]]]],    [[[[2497.0, 2498.0, 2499.0, 2500.0],       [2501.0, 2502.0, 2503.0, 2504.0],       [2505.0, 2506.0, 2507.0, 2508.0],       [2509.0, 2510.0, 2511.0, 2512.0],       [2513.0, 2514.0, 2515.0, 2516.0],       [2517.0, 2518.0, 2519.0, 2520.0]],      [[2521.0, 2522.0, 2523.0, 2524.0],       [2525.0, 2526.0, 2527.0, 2528.0],       [2529.0, 2530.0, 2531.0, 2532.0],       [2533.0, 2534.0, 2535.0, 2536.0],       [2537.0, 2538.0, 2539.0, 2540.0],       [2541.0, 2542.0, 2543.0, 2544.0]],      [[2545.0, 2546.0, 2547.0, 2548.0],       [2549.0, 2550.0, 2551.0, 2552.0],       [2553.0, 2554.0, 2555.0, 2556.0],       [2557.0, 2558.0, 2559.0, 2560.0],       [2561.0, 2562.0, 2563.0, 2564.0],       [2565.0, 2566.0, 2567.0, 2568.0]],      [[2569.0, 2570.0, 2571.0, 2572.0],       [2573.0, 2574.0, 2575.0, 2576.0],       [2577.0, 2578.0, 2579.0, 2580.0],       [2581.0, 2582.0, 2583.0, 2584.0],       [2585.0, 2586.0, 2587.0, 2588.0],       [2589.0, 2590.0, 2591.0, 2592.0]]],     [[[2593.0, 2594.0, 2595.0, 2596.0],       [2597.0, 2598.0, 2599.0, 2600.0],       [2601.0, 2602.0, 2603.0, 2604.0],       [2605.0, 2606.0, 2607.0, 2608.0],       [2609.0, 2610.0, 2611.0, 2612.0],       [2613.0, 2614.0, 2615.0, 2616.0]],      [[2617.0, 2618.0, 2619.0, 2620.0],       [2621.0, 2622.0, 2623.0, 2624.0],       [2625.0, 2626.0, 2627.0, 2628.0],       [2629.0, 2630.0, 2631.0, 2632.0],       [2633.0, 2634.0, 2635.0, 2636.0],       [2637.0, 2638.0, 2639.0, 2640.0]],      [[2641.0, 2642.0, 2643.0, 2644.0],       [2645.0, 2646.0, 2647.0, 2648.0],       [2649.0, 2650.0, 2651.0, 2652.0],       [2653.0, 2654.0, 2655.0, 2656.0],       [2657.0, 2658.0, 2659.0, 2660.0],       [2661.0, 2662.0, 2663.0, 2664.0]],      [[2665.0, 2666.0, 2667.0, 2668.0],       [2669.0, 2670.0, 2671.0, 2672.0],       [2673.0, 2674.0, 2675.0, 2676.0],       [2677.0, 2678.0, 2679.0, 2680.0],       [2681.0, 2682.0, 2683.0, 2684.0],       [2685.0, 2686.0, 2687.0, 2688.0]]]],    [[[[2689.0, 2690.0, 2691.0, 2692.0],       [2693.0, 2694.0, 2695.0, 2696.0],       [2697.0, 2698.0, 2699.0, 2700.0],       [2701.0, 2702.0, 2703.0, 2704.0],       [2705.0, 2706.0, 2707.0, 2708.0],       [2709.0, 2710.0, 2711.0, 2712.0]],      [[2713.0, 2714.0, 2715.0, 2716.0],       [2717.0, 2718.0, 2719.0, 2720.0],       [2721.0, 2722.0, 2723.0, 2724.0],       [2725.0, 2726.0, 2727.0, 2728.0],       [2729.0, 2730.0, 2731.0, 2732.0],       [2733.0, 2734.0, 2735.0, 2736.0]],      [[2737.0, 2738.0, 2739.0, 2740.0],       [2741.0, 2742.0, 2743.0, 2744.0],       [2745.0, 2746.0, 2747.0, 2748.0],       [2749.0, 2750.0, 2751.0, 2752.0],       [2753.0, 2754.0, 2755.0, 2756.0],       [2757.0, 2758.0, 2759.0, 2760.0]],      [[2761.0, 2762.0, 2763.0, 2764.0],       [2765.0, 2766.0, 2767.0, 2768.0],       [2769.0, 2770.0, 2771.0, 2772.0],       [2773.0, 2774.0, 2775.0, 2776.0],       [2777.0, 2778.0, 2779.0, 2780.0],       [2781.0, 2782.0, 2783.0, 2784.0]]],     [[[2785.0, 2786.0, 2787.0, 2788.0],       [2789.0, 2790.0, 2791.0, 2792.0],       [2793.0, 2794.0, 2795.0, 2796.0],       [2797.0, 2798.0, 2799.0, 2800.0],       [2801.0, 2802.0, 2803.0, 2804.0],       [2805.0, 2806.0, 2807.0, 2808.0]],      [[2809.0, 2810.0, 2811.0, 2812.0],       [2813.0, 2814.0, 2815.0, 2816.0],       [2817.0, 2818.0, 2819.0, 2820.0],       [2821.0, 2822.0, 2823.0, 2824.0],       [2825.0, 2826.0, 2827.0, 2828.0],       [2829.0, 2830.0, 2831.0, 2832.0]],      [[2833.0, 2834.0, 2835.0, 2836.0],       [2837.0, 2838.0, 2839.0, 2840.0],       [2841.0, 2842.0, 2843.0, 2844.0],       [2845.0, 2846.0, 2847.0, 2848.0],       [2849.0, 2850.0, 2851.0, 2852.0],       [2853.0, 2854.0, 2855.0, 2856.0]],      [[2857.0, 2858.0, 2859.0, 2860.0],       [2861.0, 2862.0, 2863.0, 2864.0],       [2865.0, 2866.0, 2867.0, 2868.0],       [2869.0, 2870.0, 2871.0, 2872.0],       [2873.0, 2874.0, 2875.0, 2876.0],       [2877.0, 2878.0, 2879.0, 2880.0]]]],    [[[[2881.0, 2882.0, 2883.0, 2884.0],       [2885.0, 2886.0, 2887.0, 2888.0],       [2889.0, 2890.0, 2891.0, 2892.0],       [2893.0, 2894.0, 2895.0, 2896.0],       [2897.0, 2898.0, 2899.0, 2900.0],       [2901.0, 2902.0, 2903.0, 2904.0]],      [[2905.0, 2906.0, 2907.0, 2908.0],       [2909.0, 2910.0, 2911.0, 2912.0],       [2913.0, 2914.0, 2915.0, 2916.0],       [2917.0, 2918.0, 2919.0, 2920.0],       [2921.0, 2922.0, 2923.0, 2924.0],       [2925.0, 2926.0, 2927.0, 2928.0]],      [[2929.0, 2930.0, 2931.0, 2932.0],       [2933.0, 2934.0, 2935.0, 2936.0],       [2937.0, 2938.0, 2939.0, 2940.0],       [2941.0, 2942.0, 2943.0, 2944.0],       [2945.0, 2946.0, 2947.0, 2948.0],       [2949.0, 2950.0, 2951.0, 2952.0]],      [[2953.0, 2954.0, 2955.0, 2956.0],       [2957.0, 2958.0, 2959.0, 2960.0],       [2961.0, 2962.0, 2963.0, 2964.0],       [2965.0, 2966.0, 2967.0, 2968.0],       [2969.0, 2970.0, 2971.0, 2972.0],       [2973.0, 2974.0, 2975.0, 2976.0]]],     [[[2977.0, 2978.0, 2979.0, 2980.0],       [2981.0, 2982.0, 2983.0, 2984.0],       [2985.0, 2986.0, 2987.0, 2988.0],       [2989.0, 2990.0, 2991.0, 2992.0],       [2993.0, 2994.0, 2995.0, 2996.0],       [2997.0, 2998.0, 2999.0, 3000.0]],      [[3001.0, 3002.0, 3003.0, 3004.0],       [3005.0, 3006.0, 3007.0, 3008.0],       [3009.0, 3010.0, 3011.0, 3012.0],       [3013.0, 3014.0, 3015.0, 3016.0],       [3017.0, 3018.0, 3019.0, 3020.0],       [3021.0, 3022.0, 3023.0, 3024.0]],      [[3025.0, 3026.0, 3027.0, 3028.0],       [3029.0, 3030.0, 3031.0, 3032.0],       [3033.0, 3034.0, 3035.0, 3036.0],       [3037.0, 3038.0, 3039.0, 3040.0],       [3041.0, 3042.0, 3043.0, 3044.0],       [3045.0, 3046.0, 3047.0, 3048.0]],      [[3049.0, 3050.0, 3051.0, 3052.0],       [3053.0, 3054.0, 3055.0, 3056.0],       [3057.0, 3058.0, 3059.0, 3060.0],       [3061.0, 3062.0, 3063.0, 3064.0],       [3065.0, 3066.0, 3067.0, 3068.0],       [3069.0, 3070.0, 3071.0, 3072.0]]]],    [[[[3073.0, 3074.0, 3075.0, 3076.0],       [3077.0, 3078.0, 3079.0, 3080.0],       [3081.0, 3082.0, 3083.0, 3084.0],       [3085.0, 3086.0, 3087.0, 3088.0],       [3089.0, 3090.0, 3091.0, 3092.0],       [3093.0, 3094.0, 3095.0, 3096.0]],      [[3097.0, 3098.0, 3099.0, 3100.0],       [3101.0, 3102.0, 3103.0, 3104.0],       [3105.0, 3106.0, 3107.0, 3108.0],       [3109.0, 3110.0, 3111.0, 3112.0],       [3113.0, 3114.0, 3115.0, 3116.0],       [3117.0, 3118.0, 3119.0, 3120.0]],      [[3121.0, 3122.0, 3123.0, 3124.0],       [3125.0, 3126.0, 3127.0, 3128.0],       [3129.0, 3130.0, 3131.0, 3132.0],       [3133.0, 3134.0, 3135.0, 3136.0],       [3137.0, 3138.0, 3139.0, 3140.0],       [3141.0, 3142.0, 3143.0, 3144.0]],      [[3145.0, 3146.0, 3147.0, 3148.0],       [3149.0, 3150.0, 3151.0, 3152.0],       [3153.0, 3154.0, 3155.0, 3156.0],       [3157.0, 3158.0, 3159.0, 3160.0],       [3161.0, 3162.0, 3163.0, 3164.0],       [3165.0, 3166.0, 3167.0, 3168.0]]],     [[[3169.0, 3170.0, 3171.0, 3172.0],       [3173.0, 3174.0, 3175.0, 3176.0],       [3177.0, 3178.0, 3179.0, 3180.0],       [3181.0, 3182.0, 3183.0, 3184.0],       [3185.0, 3186.0, 3187.0, 3188.0],       [3189.0, 3190.0, 3191.0, 3192.0]],      [[3193.0, 3194.0, 3195.0, 3196.0],       [3197.0, 3198.0, 3199.0, 3200.0],       [3201.0, 3202.0, 3203.0, 3204.0],       [3205.0, 3206.0, 3207.0, 3208.0],       [3209.0, 3210.0, 3211.0, 3212.0],       [3213.0, 3214.0, 3215.0, 3216.0]],      [[3217.0, 3218.0, 3219.0, 3220.0],       [3221.0, 3222.0, 3223.0, 3224.0],       [3225.0, 3226.0, 3227.0, 3228.0],       [3229.0, 3230.0, 3231.0, 3232.0],       [3233.0, 3234.0, 3235.0, 3236.0],       [3237.0, 3238.0, 3239.0, 3240.0]],      [[3241.0, 3242.0, 3243.0, 3244.0],       [3245.0, 3246.0, 3247.0, 3248.0],       [3249.0, 3250.0, 3251.0, 3252.0],       [3253.0, 3254.0, 3255.0, 3256.0],       [3257.0, 3258.0, 3259.0, 3260.0],       [3261.0, 3262.0, 3263.0, 3264.0]]]],    [[[[3265.0, 3266.0, 3267.0, 3268.0],       [3269.0, 3270.0, 3271.0, 3272.0],       [3273.0, 3274.0, 3275.0, 3276.0],       [3277.0, 3278.0, 3279.0, 3280.0],       [3281.0, 3282.0, 3283.0, 3284.0],       [3285.0, 3286.0, 3287.0, 3288.0]],      [[3289.0, 3290.0, 3291.0, 3292.0],       [3293.0, 3294.0, 3295.0, 3296.0],       [3297.0, 3298.0, 3299.0, 3300.0],       [3301.0, 3302.0, 3303.0, 3304.0],       [3305.0, 3306.0, 3307.0, 3308.0],       [3309.0, 3310.0, 3311.0, 3312.0]],      [[3313.0, 3314.0, 3315.0, 3316.0],       [3317.0, 3318.0, 3319.0, 3320.0],       [3321.0, 3322.0, 3323.0, 3324.0],       [3325.0, 3326.0, 3327.0, 3328.0],       [3329.0, 3330.0, 3331.0, 3332.0],       [3333.0, 3334.0, 3335.0, 3336.0]],      [[3337.0, 3338.0, 3339.0, 3340.0],       [3341.0, 3342.0, 3343.0, 3344.0],       [3345.0, 3346.0, 3347.0, 3348.0],       [3349.0, 3350.0, 3351.0, 3352.0],       [3353.0, 3354.0, 3355.0, 3356.0],       [3357.0, 3358.0, 3359.0, 3360.0]]],     [[[3361.0, 3362.0, 3363.0, 3364.0],       [3365.0, 3366.0, 3367.0, 3368.0],       [3369.0, 3370.0, 3371.0, 3372.0],       [3373.0, 3374.0, 3375.0, 3376.0],       [3377.0, 3378.0, 3379.0, 3380.0],       [3381.0, 3382.0, 3383.0, 3384.0]],      [[3385.0, 3386.0, 3387.0, 3388.0],       [3389.0, 3390.0, 3391.0, 3392.0],       [3393.0, 3394.0, 3395.0, 3396.0],       [3397.0, 3398.0, 3399.0, 3400.0],       [3401.0, 3402.0, 3403.0, 3404.0],       [3405.0, 3406.0, 3407.0, 3408.0]],      [[3409.0, 3410.0, 3411.0, 3412.0],       [3413.0, 3414.0, 3415.0, 3416.0],       [3417.0, 3418.0, 3419.0, 3420.0],       [3421.0, 3422.0, 3423.0, 3424.0],       [3425.0, 3426.0, 3427.0, 3428.0],       [3429.0, 3430.0, 3431.0, 3432.0]],      [[3433.0, 3434.0, 3435.0, 3436.0],       [3437.0, 3438.0, 3439.0, 3440.0],       [3441.0, 3442.0, 3443.0, 3444.0],       [3445.0, 3446.0, 3447.0, 3448.0],       [3449.0, 3450.0, 3451.0, 3452.0],       [3453.0, 3454.0, 3455.0, 3456.0]]]]],   [[[[[3457.0, 3458.0, 3459.0, 3460.0],       [3461.0, 3462.0, 3463.0, 3464.0],       [3465.0, 3466.0, 3467.0, 3468.0],       [3469.0, 3470.0, 3471.0, 3472.0],       [3473.0, 3474.0, 3475.0, 3476.0],       [3477.0, 3478.0, 3479.0, 3480.0]],      [[3481.0, 3482.0, 3483.0, 3484.0],       [3485.0, 3486.0, 3487.0, 3488.0],       [3489.0, 3490.0, 3491.0, 3492.0],       [3493.0, 3494.0, 3495.0, 3496.0],       [3497.0, 3498.0, 3499.0, 3500.0],       [3501.0, 3502.0, 3503.0, 3504.0]],      [[3505.0, 3506.0, 3507.0, 3508.0],       [3509.0, 3510.0, 3511.0, 3512.0],       [3513.0, 3514.0, 3515.0, 3516.0],       [3517.0, 3518.0, 3519.0, 3520.0],       [3521.0, 3522.0, 3523.0, 3524.0],       [3525.0, 3526.0, 3527.0, 3528.0]],      [[3529.0, 3530.0, 3531.0, 3532.0],       [3533.0, 3534.0, 3535.0, 3536.0],       [3537.0, 3538.0, 3539.0, 3540.0],       [3541.0, 3542.0, 3543.0, 3544.0],       [3545.0, 3546.0, 3547.0, 3548.0],       [3549.0, 3550.0, 3551.0, 3552.0]]],     [[[3553.0, 3554.0, 3555.0, 3556.0],       [3557.0, 3558.0, 3559.0, 3560.0],       [3561.0, 3562.0, 3563.0, 3564.0],       [3565.0, 3566.0, 3567.0, 3568.0],       [3569.0, 3570.0, 3571.0, 3572.0],       [3573.0, 3574.0, 3575.0, 3576.0]],      [[3577.0, 3578.0, 3579.0, 3580.0],       [3581.0, 3582.0, 3583.0, 3584.0],       [3585.0, 3586.0, 3587.0, 3588.0],       [3589.0, 3590.0, 3591.0, 3592.0],       [3593.0, 3594.0, 3595.0, 3596.0],       [3597.0, 3598.0, 3599.0, 3600.0]],      [[3601.0, 3602.0, 3603.0, 3604.0],       [3605.0, 3606.0, 3607.0, 3608.0],       [3609.0, 3610.0, 3611.0, 3612.0],       [3613.0, 3614.0, 3615.0, 3616.0],       [3617.0, 3618.0, 3619.0, 3620.0],       [3621.0, 3622.0, 3623.0, 3624.0]],      [[3625.0, 3626.0, 3627.0, 3628.0],       [3629.0, 3630.0, 3631.0, 3632.0],       [3633.0, 3634.0, 3635.0, 3636.0],       [3637.0, 3638.0, 3639.0, 3640.0],       [3641.0, 3642.0, 3643.0, 3644.0],       [3645.0, 3646.0, 3647.0, 3648.0]]]],    [[[[3649.0, 3650.0, 3651.0, 3652.0],       [3653.0, 3654.0, 3655.0, 3656.0],       [3657.0, 3658.0, 3659.0, 3660.0],       [3661.0, 3662.0, 3663.0, 3664.0],       [3665.0, 3666.0, 3667.0, 3668.0],       [3669.0, 3670.0, 3671.0, 3672.0]],      [[3673.0, 3674.0, 3675.0, 3676.0],       [3677.0, 3678.0, 3679.0, 3680.0],       [3681.0, 3682.0, 3683.0, 3684.0],       [3685.0, 3686.0, 3687.0, 3688.0],       [3689.0, 3690.0, 3691.0, 3692.0],       [3693.0, 3694.0, 3695.0, 3696.0]],      [[3697.0, 3698.0, 3699.0, 3700.0],       [3701.0, 3702.0, 3703.0, 3704.0],       [3705.0, 3706.0, 3707.0, 3708.0],       [3709.0, 3710.0, 3711.0, 3712.0],       [3713.0, 3714.0, 3715.0, 3716.0],       [3717.0, 3718.0, 3719.0, 3720.0]],      [[3721.0, 3722.0, 3723.0, 3724.0],       [3725.0, 3726.0, 3727.0, 3728.0],       [3729.0, 3730.0, 3731.0, 3732.0],       [3733.0, 3734.0, 3735.0, 3736.0],       [3737.0, 3738.0, 3739.0, 3740.0],       [3741.0, 3742.0, 3743.0, 3744.0]]],     [[[3745.0, 3746.0, 3747.0, 3748.0],       [3749.0, 3750.0, 3751.0, 3752.0],       [3753.0, 3754.0, 3755.0, 3756.0],       [3757.0, 3758.0, 3759.0, 3760.0],       [3761.0, 3762.0, 3763.0, 3764.0],       [3765.0, 3766.0, 3767.0, 3768.0]],      [[3769.0, 3770.0, 3771.0, 3772.0],       [3773.0, 3774.0, 3775.0, 3776.0],       [3777.0, 3778.0, 3779.0, 3780.0],       [3781.0, 3782.0, 3783.0, 3784.0],       [3785.0, 3786.0, 3787.0, 3788.0],       [3789.0, 3790.0, 3791.0, 3792.0]],      [[3793.0, 3794.0, 3795.0, 3796.0],       [3797.0, 3798.0, 3799.0, 3800.0],       [3801.0, 3802.0, 3803.0, 3804.0],       [3805.0, 3806.0, 3807.0, 3808.0],       [3809.0, 3810.0, 3811.0, 3812.0],       [3813.0, 3814.0, 3815.0, 3816.0]],      [[3817.0, 3818.0, 3819.0, 3820.0],       [3821.0, 3822.0, 3823.0, 3824.0],       [3825.0, 3826.0, 3827.0, 3828.0],       [3829.0, 3830.0, 3831.0, 3832.0],       [3833.0, 3834.0, 3835.0, 3836.0],       [3837.0, 3838.0, 3839.0, 3840.0]]]],    [[[[3841.0, 3842.0, 3843.0, 3844.0],       [3845.0, 3846.0, 3847.0, 3848.0],       [3849.0, 3850.0, 3851.0, 3852.0],       [3853.0, 3854.0, 3855.0, 3856.0],       [3857.0, 3858.0, 3859.0, 3860.0],       [3861.0, 3862.0, 3863.0, 3864.0]],      [[3865.0, 3866.0, 3867.0, 3868.0],       [3869.0, 3870.0, 3871.0, 3872.0],       [3873.0, 3874.0, 3875.0, 3876.0],       [3877.0, 3878.0, 3879.0, 3880.0],       [3881.0, 3882.0, 3883.0, 3884.0],       [3885.0, 3886.0, 3887.0, 3888.0]],      [[3889.0, 3890.0, 3891.0, 3892.0],       [3893.0, 3894.0, 3895.0, 3896.0],       [3897.0, 3898.0, 3899.0, 3900.0],       [3901.0, 3902.0, 3903.0, 3904.0],       [3905.0, 3906.0, 3907.0, 3908.0],       [3909.0, 3910.0, 3911.0, 3912.0]],      [[3913.0, 3914.0, 3915.0, 3916.0],       [3917.0, 3918.0, 3919.0, 3920.0],       [3921.0, 3922.0, 3923.0, 3924.0],       [3925.0, 3926.0, 3927.0, 3928.0],       [3929.0, 3930.0, 3931.0, 3932.0],       [3933.0, 3934.0, 3935.0, 3936.0]]],     [[[3937.0, 3938.0, 3939.0, 3940.0],       [3941.0, 3942.0, 3943.0, 3944.0],       [3945.0, 3946.0, 3947.0, 3948.0],       [3949.0, 3950.0, 3951.0, 3952.0],       [3953.0, 3954.0, 3955.0, 3956.0],       [3957.0, 3958.0, 3959.0, 3960.0]],      [[3961.0, 3962.0, 3963.0, 3964.0],       [3965.0, 3966.0, 3967.0, 3968.0],       [3969.0, 3970.0, 3971.0, 3972.0],       [3973.0, 3974.0, 3975.0, 3976.0],       [3977.0, 3978.0, 3979.0, 3980.0],       [3981.0, 3982.0, 3983.0, 3984.0]],      [[3985.0, 3986.0, 3987.0, 3988.0],       [3989.0, 3990.0, 3991.0, 3992.0],       [3993.0, 3994.0, 3995.0, 3996.0],       [3997.0, 3998.0, 3999.0, 4000.0],       [4001.0, 4002.0, 4003.0, 4004.0],       [4005.0, 4006.0, 4007.0, 4008.0]],      [[4009.0, 4010.0, 4011.0, 4012.0],       [4013.0, 4014.0, 4015.0, 4016.0],       [4017.0, 4018.0, 4019.0, 4020.0],       [4021.0, 4022.0, 4023.0, 4024.0],       [4025.0, 4026.0, 4027.0, 4028.0],       [4029.0, 4030.0, 4031.0, 4032.0]]]],    [[[[4033.0, 4034.0, 4035.0, 4036.0],       [4037.0, 4038.0, 4039.0, 4040.0],       [4041.0, 4042.0, 4043.0, 4044.0],       [4045.0, 4046.0, 4047.0, 4048.0],       [4049.0, 4050.0, 4051.0, 4052.0],       [4053.0, 4054.0, 4055.0, 4056.0]],      [[4057.0, 4058.0, 4059.0, 4060.0],       [4061.0, 4062.0, 4063.0, 4064.0],       [4065.0, 4066.0, 4067.0, 4068.0],       [4069.0, 4070.0, 4071.0, 4072.0],       [4073.0, 4074.0, 4075.0, 4076.0],       [4077.0, 4078.0, 4079.0, 4080.0]],      [[4081.0, 4082.0, 4083.0, 4084.0],       [4085.0, 4086.0, 4087.0, 4088.0],       [4089.0, 4090.0, 4091.0, 4092.0],       [4093.0, 4094.0, 4095.0, 4096.0],       [4097.0, 4098.0, 4099.0, 4100.0],       [4101.0, 4102.0, 4103.0, 4104.0]],      [[4105.0, 4106.0, 4107.0, 4108.0],       [4109.0, 4110.0, 4111.0, 4112.0],       [4113.0, 4114.0, 4115.0, 4116.0],       [4117.0, 4118.0, 4119.0, 4120.0],       [4121.0, 4122.0, 4123.0, 4124.0],       [4125.0, 4126.0, 4127.0, 4128.0]]],     [[[4129.0, 4130.0, 4131.0, 4132.0],       [4133.0, 4134.0, 4135.0, 4136.0],       [4137.0, 4138.0, 4139.0, 4140.0],       [4141.0, 4142.0, 4143.0, 4144.0],       [4145.0, 4146.0, 4147.0, 4148.0],       [4149.0, 4150.0, 4151.0, 4152.0]],      [[4153.0, 4154.0, 4155.0, 4156.0],       [4157.0, 4158.0, 4159.0, 4160.0],       [4161.0, 4162.0, 4163.0, 4164.0],       [4165.0, 4166.0, 4167.0, 4168.0],       [4169.0, 4170.0, 4171.0, 4172.0],       [4173.0, 4174.0, 4175.0, 4176.0]],      [[4177.0, 4178.0, 4179.0, 4180.0],       [4181.0, 4182.0, 4183.0, 4184.0],       [4185.0, 4186.0, 4187.0, 4188.0],       [4189.0, 4190.0, 4191.0, 4192.0],       [4193.0, 4194.0, 4195.0, 4196.0],       [4197.0, 4198.0, 4199.0, 4200.0]],      [[4201.0, 4202.0, 4203.0, 4204.0],       [4205.0, 4206.0, 4207.0, 4208.0],       [4209.0, 4210.0, 4211.0, 4212.0],       [4213.0, 4214.0, 4215.0, 4216.0],       [4217.0, 4218.0, 4219.0, 4220.0],       [4221.0, 4222.0, 4223.0, 4224.0]]]],    [[[[4225.0, 4226.0, 4227.0, 4228.0],       [4229.0, 4230.0, 4231.0, 4232.0],       [4233.0, 4234.0, 4235.0, 4236.0],       [4237.0, 4238.0, 4239.0, 4240.0],       [4241.0, 4242.0, 4243.0, 4244.0],       [4245.0, 4246.0, 4247.0, 4248.0]],      [[4249.0, 4250.0, 4251.0, 4252.0],       [4253.0, 4254.0, 4255.0, 4256.0],       [4257.0, 4258.0, 4259.0, 4260.0],       [4261.0, 4262.0, 4263.0, 4264.0],       [4265.0, 4266.0, 4267.0, 4268.0],       [4269.0, 4270.0, 4271.0, 4272.0]],      [[4273.0, 4274.0, 4275.0, 4276.0],       [4277.0, 4278.0, 4279.0, 4280.0],       [4281.0, 4282.0, 4283.0, 4284.0],       [4285.0, 4286.0, 4287.0, 4288.0],       [4289.0, 4290.0, 4291.0, 4292.0],       [4293.0, 4294.0, 4295.0, 4296.0]],      [[4297.0, 4298.0, 4299.0, 4300.0],       [4301.0, 4302.0, 4303.0, 4304.0],       [4305.0, 4306.0, 4307.0, 4308.0],       [4309.0, 4310.0, 4311.0, 4312.0],       [4313.0, 4314.0, 4315.0, 4316.0],       [4317.0, 4318.0, 4319.0, 4320.0]]],     [[[4321.0, 4322.0, 4323.0, 4324.0],       [4325.0, 4326.0, 4327.0, 4328.0],       [4329.0, 4330.0, 4331.0, 4332.0],       [4333.0, 4334.0, 4335.0, 4336.0],       [4337.0, 4338.0, 4339.0, 4340.0],       [4341.0, 4342.0, 4343.0, 4344.0]],      [[4345.0, 4346.0, 4347.0, 4348.0],       [4349.0, 4350.0, 4351.0, 4352.0],       [4353.0, 4354.0, 4355.0, 4356.0],       [4357.0, 4358.0, 4359.0, 4360.0],       [4361.0, 4362.0, 4363.0, 4364.0],       [4365.0, 4366.0, 4367.0, 4368.0]],      [[4369.0, 4370.0, 4371.0, 4372.0],       [4373.0, 4374.0, 4375.0, 4376.0],       [4377.0, 4378.0, 4379.0, 4380.0],       [4381.0, 4382.0, 4383.0, 4384.0],       [4385.0, 4386.0, 4387.0, 4388.0],       [4389.0, 4390.0, 4391.0, 4392.0]],      [[4393.0, 4394.0, 4395.0, 4396.0],       [4397.0, 4398.0, 4399.0, 4400.0],       [4401.0, 4402.0, 4403.0, 4404.0],       [4405.0, 4406.0, 4407.0, 4408.0],       [4409.0, 4410.0, 4411.0, 4412.0],       [4413.0, 4414.0, 4415.0, 4416.0]]]],    [[[[4417.0, 4418.0, 4419.0, 4420.0],       [4421.0, 4422.0, 4423.0, 4424.0],       [4425.0, 4426.0, 4427.0, 4428.0],       [4429.0, 4430.0, 4431.0, 4432.0],       [4433.0, 4434.0, 4435.0, 4436.0],       [4437.0, 4438.0, 4439.0, 4440.0]],      [[4441.0, 4442.0, 4443.0, 4444.0],       [4445.0, 4446.0, 4447.0, 4448.0],       [4449.0, 4450.0, 4451.0, 4452.0],       [4453.0, 4454.0, 4455.0, 4456.0],       [4457.0, 4458.0, 4459.0, 4460.0],       [4461.0, 4462.0, 4463.0, 4464.0]],      [[4465.0, 4466.0, 4467.0, 4468.0],       [4469.0, 4470.0, 4471.0, 4472.0],       [4473.0, 4474.0, 4475.0, 4476.0],       [4477.0, 4478.0, 4479.0, 4480.0],       [4481.0, 4482.0, 4483.0, 4484.0],       [4485.0, 4486.0, 4487.0, 4488.0]],      [[4489.0, 4490.0, 4491.0, 4492.0],       [4493.0, 4494.0, 4495.0, 4496.0],       [4497.0, 4498.0, 4499.0, 4500.0],       [4501.0, 4502.0, 4503.0, 4504.0],       [4505.0, 4506.0, 4507.0, 4508.0],       [4509.0, 4510.0, 4511.0, 4512.0]]],     [[[4513.0, 4514.0, 4515.0, 4516.0],       [4517.0, 4518.0, 4519.0, 4520.0],       [4521.0, 4522.0, 4523.0, 4524.0],       [4525.0, 4526.0, 4527.0, 4528.0],       [4529.0, 4530.0, 4531.0, 4532.0],       [4533.0, 4534.0, 4535.0, 4536.0]],      [[4537.0, 4538.0, 4539.0, 4540.0],       [4541.0, 4542.0, 4543.0, 4544.0],       [4545.0, 4546.0, 4547.0, 4548.0],       [4549.0, 4550.0, 4551.0, 4552.0],       [4553.0, 4554.0, 4555.0, 4556.0],       [4557.0, 4558.0, 4559.0, 4560.0]],      [[4561.0, 4562.0, 4563.0, 4564.0],       [4565.0, 4566.0, 4567.0, 4568.0],       [4569.0, 4570.0, 4571.0, 4572.0],       [4573.0, 4574.0, 4575.0, 4576.0],       [4577.0, 4578.0, 4579.0, 4580.0],       [4581.0, 4582.0, 4583.0, 4584.0]],      [[4585.0, 4586.0, 4587.0, 4588.0],       [4589.0, 4590.0, 4591.0, 4592.0],       [4593.0, 4594.0, 4595.0, 4596.0],       [4597.0, 4598.0, 4599.0, 4600.0],       [4601.0, 4602.0, 4603.0, 4604.0],       [4605.0, 4606.0, 4607.0, 4608.0]]]]],   [[[[[4609.0, 4610.0, 4611.0, 4612.0],       [4613.0, 4614.0, 4615.0, 4616.0],       [4617.0, 4618.0, 4619.0, 4620.0],       [4621.0, 4622.0, 4623.0, 4624.0],       [4625.0, 4626.0, 4627.0, 4628.0],       [4629.0, 4630.0, 4631.0, 4632.0]],      [[4633.0, 4634.0, 4635.0, 4636.0],       [4637.0, 4638.0, 4639.0, 4640.0],       [4641.0, 4642.0, 4643.0, 4644.0],       [4645.0, 4646.0, 4647.0, 4648.0],       [4649.0, 4650.0, 4651.0, 4652.0],       [4653.0, 4654.0, 4655.0, 4656.0]],      [[4657.0, 4658.0, 4659.0, 4660.0],       [4661.0, 4662.0, 4663.0, 4664.0],       [4665.0, 4666.0, 4667.0, 4668.0],       [4669.0, 4670.0, 4671.0, 4672.0],       [4673.0, 4674.0, 4675.0, 4676.0],       [4677.0, 4678.0, 4679.0, 4680.0]],      [[4681.0, 4682.0, 4683.0, 4684.0],       [4685.0, 4686.0, 4687.0, 4688.0],       [4689.0, 4690.0, 4691.0, 4692.0],       [4693.0, 4694.0, 4695.0, 4696.0],       [4697.0, 4698.0, 4699.0, 4700.0],       [4701.0, 4702.0, 4703.0, 4704.0]]],     [[[4705.0, 4706.0, 4707.0, 4708.0],       [4709.0, 4710.0, 4711.0, 4712.0],       [4713.0, 4714.0, 4715.0, 4716.0],       [4717.0, 4718.0, 4719.0, 4720.0],       [4721.0, 4722.0, 4723.0, 4724.0],       [4725.0, 4726.0, 4727.0, 4728.0]],      [[4729.0, 4730.0, 4731.0, 4732.0],       [4733.0, 4734.0, 4735.0, 4736.0],       [4737.0, 4738.0, 4739.0, 4740.0],       [4741.0, 4742.0, 4743.0, 4744.0],       [4745.0, 4746.0, 4747.0, 4748.0],       [4749.0, 4750.0, 4751.0, 4752.0]],      [[4753.0, 4754.0, 4755.0, 4756.0],       [4757.0, 4758.0, 4759.0, 4760.0],       [4761.0, 4762.0, 4763.0, 4764.0],       [4765.0, 4766.0, 4767.0, 4768.0],       [4769.0, 4770.0, 4771.0, 4772.0],       [4773.0, 4774.0, 4775.0, 4776.0]],      [[4777.0, 4778.0, 4779.0, 4780.0],       [4781.0, 4782.0, 4783.0, 4784.0],       [4785.0, 4786.0, 4787.0, 4788.0],       [4789.0, 4790.0, 4791.0, 4792.0],       [4793.0, 4794.0, 4795.0, 4796.0],       [4797.0, 4798.0, 4799.0, 4800.0]]]],    [[[[4801.0, 4802.0, 4803.0, 4804.0],       [4805.0, 4806.0, 4807.0, 4808.0],       [4809.0, 4810.0, 4811.0, 4812.0],       [4813.0, 4814.0, 4815.0, 4816.0],       [4817.0, 4818.0, 4819.0, 4820.0],       [4821.0, 4822.0, 4823.0, 4824.0]],      [[4825.0, 4826.0, 4827.0, 4828.0],       [4829.0, 4830.0, 4831.0, 4832.0],       [4833.0, 4834.0, 4835.0, 4836.0],       [4837.0, 4838.0, 4839.0, 4840.0],       [4841.0, 4842.0, 4843.0, 4844.0],       [4845.0, 4846.0, 4847.0, 4848.0]],      [[4849.0, 4850.0, 4851.0, 4852.0],       [4853.0, 4854.0, 4855.0, 4856.0],       [4857.0, 4858.0, 4859.0, 4860.0],       [4861.0, 4862.0, 4863.0, 4864.0],       [4865.0, 4866.0, 4867.0, 4868.0],       [4869.0, 4870.0, 4871.0, 4872.0]],      [[4873.0, 4874.0, 4875.0, 4876.0],       [4877.0, 4878.0, 4879.0, 4880.0],       [4881.0, 4882.0, 4883.0, 4884.0],       [4885.0, 4886.0, 4887.0, 4888.0],       [4889.0, 4890.0, 4891.0, 4892.0],       [4893.0, 4894.0, 4895.0, 4896.0]]],     [[[4897.0, 4898.0, 4899.0, 4900.0],       [4901.0, 4902.0, 4903.0, 4904.0],       [4905.0, 4906.0, 4907.0, 4908.0],       [4909.0, 4910.0, 4911.0, 4912.0],       [4913.0, 4914.0, 4915.0, 4916.0],       [4917.0, 4918.0, 4919.0, 4920.0]],      [[4921.0, 4922.0, 4923.0, 4924.0],       [4925.0, 4926.0, 4927.0, 4928.0],       [4929.0, 4930.0, 4931.0, 4932.0],       [4933.0, 4934.0, 4935.0, 4936.0],       [4937.0, 4938.0, 4939.0, 4940.0],       [4941.0, 4942.0, 4943.0, 4944.0]],      [[4945.0, 4946.0, 4947.0, 4948.0],       [4949.0, 4950.0, 4951.0, 4952.0],       [4953.0, 4954.0, 4955.0, 4956.0],       [4957.0, 4958.0, 4959.0, 4960.0],       [4961.0, 4962.0, 4963.0, 4964.0],       [4965.0, 4966.0, 4967.0, 4968.0]],      [[4969.0, 4970.0, 4971.0, 4972.0],       [4973.0, 4974.0, 4975.0, 4976.0],       [4977.0, 4978.0, 4979.0, 4980.0],       [4981.0, 4982.0, 4983.0, 4984.0],       [4985.0, 4986.0, 4987.0, 4988.0],       [4989.0, 4990.0, 4991.0, 4992.0]]]],    [[[[4993.0, 4994.0, 4995.0, 4996.0],       [4997.0, 4998.0, 4999.0, 5000.0],       [5001.0, 5002.0, 5003.0, 5004.0],       [5005.0, 5006.0, 5007.0, 5008.0],       [5009.0, 5010.0, 5011.0, 5012.0],       [5013.0, 5014.0, 5015.0, 5016.0]],      [[5017.0, 5018.0, 5019.0, 5020.0],       [5021.0, 5022.0, 5023.0, 5024.0],       [5025.0, 5026.0, 5027.0, 5028.0],       [5029.0, 5030.0, 5031.0, 5032.0],       [5033.0, 5034.0, 5035.0, 5036.0],       [5037.0, 5038.0, 5039.0, 5040.0]],      [[5041.0, 5042.0, 5043.0, 5044.0],       [5045.0, 5046.0, 5047.0, 5048.0],       [5049.0, 5050.0, 5051.0, 5052.0],       [5053.0, 5054.0, 5055.0, 5056.0],       [5057.0, 5058.0, 5059.0, 5060.0],       [5061.0, 5062.0, 5063.0, 5064.0]],      [[5065.0, 5066.0, 5067.0, 5068.0],       [5069.0, 5070.0, 5071.0, 5072.0],       [5073.0, 5074.0, 5075.0, 5076.0],       [5077.0, 5078.0, 5079.0, 5080.0],       [5081.0, 5082.0, 5083.0, 5084.0],       [5085.0, 5086.0, 5087.0, 5088.0]]],     [[[5089.0, 5090.0, 5091.0, 5092.0],       [5093.0, 5094.0, 5095.0, 5096.0],       [5097.0, 5098.0, 5099.0, 5100.0],       [5101.0, 5102.0, 5103.0, 5104.0],       [5105.0, 5106.0, 5107.0, 5108.0],       [5109.0, 5110.0, 5111.0, 5112.0]],      [[5113.0, 5114.0, 5115.0, 5116.0],       [5117.0, 5118.0, 5119.0, 5120.0],       [5121.0, 5122.0, 5123.0, 5124.0],       [5125.0, 5126.0, 5127.0, 5128.0],       [5129.0, 5130.0, 5131.0, 5132.0],       [5133.0, 5134.0, 5135.0, 5136.0]],      [[5137.0, 5138.0, 5139.0, 5140.0],       [5141.0, 5142.0, 5143.0, 5144.0],       [5145.0, 5146.0, 5147.0, 5148.0],       [5149.0, 5150.0, 5151.0, 5152.0],       [5153.0, 5154.0, 5155.0, 5156.0],       [5157.0, 5158.0, 5159.0, 5160.0]],      [[5161.0, 5162.0, 5163.0, 5164.0],       [5165.0, 5166.0, 5167.0, 5168.0],       [5169.0, 5170.0, 5171.0, 5172.0],       [5173.0, 5174.0, 5175.0, 5176.0],       [5177.0, 5178.0, 5179.0, 5180.0],       [5181.0, 5182.0, 5183.0, 5184.0]]]],    [[[[5185.0, 5186.0, 5187.0, 5188.0],       [5189.0, 5190.0, 5191.0, 5192.0],       [5193.0, 5194.0, 5195.0, 5196.0],       [5197.0, 5198.0, 5199.0, 5200.0],       [5201.0, 5202.0, 5203.0, 5204.0],       [5205.0, 5206.0, 5207.0, 5208.0]],      [[5209.0, 5210.0, 5211.0, 5212.0],       [5213.0, 5214.0, 5215.0, 5216.0],       [5217.0, 5218.0, 5219.0, 5220.0],       [5221.0, 5222.0, 5223.0, 5224.0],       [5225.0, 5226.0, 5227.0, 5228.0],       [5229.0, 5230.0, 5231.0, 5232.0]],      [[5233.0, 5234.0, 5235.0, 5236.0],       [5237.0, 5238.0, 5239.0, 5240.0],       [5241.0, 5242.0, 5243.0, 5244.0],       [5245.0, 5246.0, 5247.0, 5248.0],       [5249.0, 5250.0, 5251.0, 5252.0],       [5253.0, 5254.0, 5255.0, 5256.0]],      [[5257.0, 5258.0, 5259.0, 5260.0],       [5261.0, 5262.0, 5263.0, 5264.0],       [5265.0, 5266.0, 5267.0, 5268.0],       [5269.0, 5270.0, 5271.0, 5272.0],       [5273.0, 5274.0, 5275.0, 5276.0],       [5277.0, 5278.0, 5279.0, 5280.0]]],     [[[5281.0, 5282.0, 5283.0, 5284.0],       [5285.0, 5286.0, 5287.0, 5288.0],       [5289.0, 5290.0, 5291.0, 5292.0],       [5293.0, 5294.0, 5295.0, 5296.0],       [5297.0, 5298.0, 5299.0, 5300.0],       [5301.0, 5302.0, 5303.0, 5304.0]],      [[5305.0, 5306.0, 5307.0, 5308.0],       [5309.0, 5310.0, 5311.0, 5312.0],       [5313.0, 5314.0, 5315.0, 5316.0],       [5317.0, 5318.0, 5319.0, 5320.0],       [5321.0, 5322.0, 5323.0, 5324.0],       [5325.0, 5326.0, 5327.0, 5328.0]],      [[5329.0, 5330.0, 5331.0, 5332.0],       [5333.0, 5334.0, 5335.0, 5336.0],       [5337.0, 5338.0, 5339.0, 5340.0],       [5341.0, 5342.0, 5343.0, 5344.0],       [5345.0, 5346.0, 5347.0, 5348.0],       [5349.0, 5350.0, 5351.0, 5352.0]],      [[5353.0, 5354.0, 5355.0, 5356.0],       [5357.0, 5358.0, 5359.0, 5360.0],       [5361.0, 5362.0, 5363.0, 5364.0],       [5365.0, 5366.0, 5367.0, 5368.0],       [5369.0, 5370.0, 5371.0, 5372.0],       [5373.0, 5374.0, 5375.0, 5376.0]]]],    [[[[5377.0, 5378.0, 5379.0, 5380.0],       [5381.0, 5382.0, 5383.0, 5384.0],       [5385.0, 5386.0, 5387.0, 5388.0],       [5389.0, 5390.0, 5391.0, 5392.0],       [5393.0, 5394.0, 5395.0, 5396.0],       [5397.0, 5398.0, 5399.0, 5400.0]],      [[5401.0, 5402.0, 5403.0, 5404.0],       [5405.0, 5406.0, 5407.0, 5408.0],       [5409.0, 5410.0, 5411.0, 5412.0],       [5413.0, 5414.0, 5415.0, 5416.0],       [5417.0, 5418.0, 5419.0, 5420.0],       [5421.0, 5422.0, 5423.0, 5424.0]],      [[5425.0, 5426.0, 5427.0, 5428.0],       [5429.0, 5430.0, 5431.0, 5432.0],       [5433.0, 5434.0, 5435.0, 5436.0],       [5437.0, 5438.0, 5439.0, 5440.0],       [5441.0, 5442.0, 5443.0, 5444.0],       [5445.0, 5446.0, 5447.0, 5448.0]],      [[5449.0, 5450.0, 5451.0, 5452.0],       [5453.0, 5454.0, 5455.0, 5456.0],       [5457.0, 5458.0, 5459.0, 5460.0],       [5461.0, 5462.0, 5463.0, 5464.0],       [5465.0, 5466.0, 5467.0, 5468.0],       [5469.0, 5470.0, 5471.0, 5472.0]]],     [[[5473.0, 5474.0, 5475.0, 5476.0],       [5477.0, 5478.0, 5479.0, 5480.0],       [5481.0, 5482.0, 5483.0, 5484.0],       [5485.0, 5486.0, 5487.0, 5488.0],       [5489.0, 5490.0, 5491.0, 5492.0],       [5493.0, 5494.0, 5495.0, 5496.0]],      [[5497.0, 5498.0, 5499.0, 5500.0],       [5501.0, 5502.0, 5503.0, 5504.0],       [5505.0, 5506.0, 5507.0, 5508.0],       [5509.0, 5510.0, 5511.0, 5512.0],       [5513.0, 5514.0, 5515.0, 5516.0],       [5517.0, 5518.0, 5519.0, 5520.0]],      [[5521.0, 5522.0, 5523.0, 5524.0],       [5525.0, 5526.0, 5527.0, 5528.0],       [5529.0, 5530.0, 5531.0, 5532.0],       [5533.0, 5534.0, 5535.0, 5536.0],       [5537.0, 5538.0, 5539.0, 5540.0],       [5541.0, 5542.0, 5543.0, 5544.0]],      [[5545.0, 5546.0, 5547.0, 5548.0],       [5549.0, 5550.0, 5551.0, 5552.0],       [5553.0, 5554.0, 5555.0, 5556.0],       [5557.0, 5558.0, 5559.0, 5560.0],       [5561.0, 5562.0, 5563.0, 5564.0],       [5565.0, 5566.0, 5567.0, 5568.0]]]],    [[[[5569.0, 5570.0, 5571.0, 5572.0],       [5573.0, 5574.0, 5575.0, 5576.0],       [5577.0, 5578.0, 5579.0, 5580.0],       [5581.0, 5582.0, 5583.0, 5584.0],       [5585.0, 5586.0, 5587.0, 5588.0],       [5589.0, 5590.0, 5591.0, 5592.0]],      [[5593.0, 5594.0, 5595.0, 5596.0],       [5597.0, 5598.0, 5599.0, 5600.0],       [5601.0, 5602.0, 5603.0, 5604.0],       [5605.0, 5606.0, 5607.0, 5608.0],       [5609.0, 5610.0, 5611.0, 5612.0],       [5613.0, 5614.0, 5615.0, 5616.0]],      [[5617.0, 5618.0, 5619.0, 5620.0],       [5621.0, 5622.0, 5623.0, 5624.0],       [5625.0, 5626.0, 5627.0, 5628.0],       [5629.0, 5630.0, 5631.0, 5632.0],       [5633.0, 5634.0, 5635.0, 5636.0],       [5637.0, 5638.0, 5639.0, 5640.0]],      [[5641.0, 5642.0, 5643.0, 5644.0],       [5645.0, 5646.0, 5647.0, 5648.0],       [5649.0, 5650.0, 5651.0, 5652.0],       [5653.0, 5654.0, 5655.0, 5656.0],       [5657.0, 5658.0, 5659.0, 5660.0],       [5661.0, 5662.0, 5663.0, 5664.0]]],     [[[5665.0, 5666.0, 5667.0, 5668.0],       [5669.0, 5670.0, 5671.0, 5672.0],       [5673.0, 5674.0, 5675.0, 5676.0],       [5677.0, 5678.0, 5679.0, 5680.0],       [5681.0, 5682.0, 5683.0, 5684.0],       [5685.0, 5686.0, 5687.0, 5688.0]],      [[5689.0, 5690.0, 5691.0, 5692.0],       [5693.0, 5694.0, 5695.0, 5696.0],       [5697.0, 5698.0, 5699.0, 5700.0],       [5701.0, 5702.0, 5703.0, 5704.0],       [5705.0, 5706.0, 5707.0, 5708.0],       [5709.0, 5710.0, 5711.0, 5712.0]],      [[5713.0, 5714.0, 5715.0, 5716.0],       [5717.0, 5718.0, 5719.0, 5720.0],       [5721.0, 5722.0, 5723.0, 5724.0],       [5725.0, 5726.0, 5727.0, 5728.0],       [5729.0, 5730.0, 5731.0, 5732.0],       [5733.0, 5734.0, 5735.0, 5736.0]],      [[5737.0, 5738.0, 5739.0, 5740.0],       [5741.0, 5742.0, 5743.0, 5744.0],       [5745.0, 5746.0, 5747.0, 5748.0],       [5749.0, 5750.0, 5751.0, 5752.0],       [5753.0, 5754.0, 5755.0, 5756.0],       [5757.0, 5758.0, 5759.0, 5760.0]]]]],   [[[[[5761.0, 5762.0, 5763.0, 5764.0],       [5765.0, 5766.0, 5767.0, 5768.0],       [5769.0, 5770.0, 5771.0, 5772.0],       [5773.0, 5774.0, 5775.0, 5776.0],       [5777.0, 5778.0, 5779.0, 5780.0],       [5781.0, 5782.0, 5783.0, 5784.0]],      [[5785.0, 5786.0, 5787.0, 5788.0],       [5789.0, 5790.0, 5791.0, 5792.0],       [5793.0, 5794.0, 5795.0, 5796.0],       [5797.0, 5798.0, 5799.0, 5800.0],       [5801.0, 5802.0, 5803.0, 5804.0],       [5805.0, 5806.0, 5807.0, 5808.0]],      [[5809.0, 5810.0, 5811.0, 5812.0],       [5813.0, 5814.0, 5815.0, 5816.0],       [5817.0, 5818.0, 5819.0, 5820.0],       [5821.0, 5822.0, 5823.0, 5824.0],       [5825.0, 5826.0, 5827.0, 5828.0],       [5829.0, 5830.0, 5831.0, 5832.0]],      [[5833.0, 5834.0, 5835.0, 5836.0],       [5837.0, 5838.0, 5839.0, 5840.0],       [5841.0, 5842.0, 5843.0, 5844.0],       [5845.0, 5846.0, 5847.0, 5848.0],       [5849.0, 5850.0, 5851.0, 5852.0],       [5853.0, 5854.0, 5855.0, 5856.0]]],     [[[5857.0, 5858.0, 5859.0, 5860.0],       [5861.0, 5862.0, 5863.0, 5864.0],       [5865.0, 5866.0, 5867.0, 5868.0],       [5869.0, 5870.0, 5871.0, 5872.0],       [5873.0, 5874.0, 5875.0, 5876.0],       [5877.0, 5878.0, 5879.0, 5880.0]],      [[5881.0, 5882.0, 5883.0, 5884.0],       [5885.0, 5886.0, 5887.0, 5888.0],       [5889.0, 5890.0, 5891.0, 5892.0],       [5893.0, 5894.0, 5895.0, 5896.0],       [5897.0, 5898.0, 5899.0, 5900.0],       [5901.0, 5902.0, 5903.0, 5904.0]],      [[5905.0, 5906.0, 5907.0, 5908.0],       [5909.0, 5910.0, 5911.0, 5912.0],       [5913.0, 5914.0, 5915.0, 5916.0],       [5917.0, 5918.0, 5919.0, 5920.0],       [5921.0, 5922.0, 5923.0, 5924.0],       [5925.0, 5926.0, 5927.0, 5928.0]],      [[5929.0, 5930.0, 5931.0, 5932.0],       [5933.0, 5934.0, 5935.0, 5936.0],       [5937.0, 5938.0, 5939.0, 5940.0],       [5941.0, 5942.0, 5943.0, 5944.0],       [5945.0, 5946.0, 5947.0, 5948.0],       [5949.0, 5950.0, 5951.0, 5952.0]]]],    [[[[5953.0, 5954.0, 5955.0, 5956.0],       [5957.0, 5958.0, 5959.0, 5960.0],       [5961.0, 5962.0, 5963.0, 5964.0],       [5965.0, 5966.0, 5967.0, 5968.0],       [5969.0, 5970.0, 5971.0, 5972.0],       [5973.0, 5974.0, 5975.0, 5976.0]],      [[5977.0, 5978.0, 5979.0, 5980.0],       [5981.0, 5982.0, 5983.0, 5984.0],       [5985.0, 5986.0, 5987.0, 5988.0],       [5989.0, 5990.0, 5991.0, 5992.0],       [5993.0, 5994.0, 5995.0, 5996.0],       [5997.0, 5998.0, 5999.0, 6000.0]],      [[6001.0, 6002.0, 6003.0, 6004.0],       [6005.0, 6006.0, 6007.0, 6008.0],       [6009.0, 6010.0, 6011.0, 6012.0],       [6013.0, 6014.0, 6015.0, 6016.0],       [6017.0, 6018.0, 6019.0, 6020.0],       [6021.0, 6022.0, 6023.0, 6024.0]],      [[6025.0, 6026.0, 6027.0, 6028.0],       [6029.0, 6030.0, 6031.0, 6032.0],       [6033.0, 6034.0, 6035.0, 6036.0],       [6037.0, 6038.0, 6039.0, 6040.0],       [6041.0, 6042.0, 6043.0, 6044.0],       [6045.0, 6046.0, 6047.0, 6048.0]]],     [[[6049.0, 6050.0, 6051.0, 6052.0],       [6053.0, 6054.0, 6055.0, 6056.0],       [6057.0, 6058.0, 6059.0, 6060.0],       [6061.0, 6062.0, 6063.0, 6064.0],       [6065.0, 6066.0, 6067.0, 6068.0],       [6069.0, 6070.0, 6071.0, 6072.0]],      [[6073.0, 6074.0, 6075.0, 6076.0],       [6077.0, 6078.0, 6079.0, 6080.0],       [6081.0, 6082.0, 6083.0, 6084.0],       [6085.0, 6086.0, 6087.0, 6088.0],       [6089.0, 6090.0, 6091.0, 6092.0],       [6093.0, 6094.0, 6095.0, 6096.0]],      [[6097.0, 6098.0, 6099.0, 6100.0],       [6101.0, 6102.0, 6103.0, 6104.0],       [6105.0, 6106.0, 6107.0, 6108.0],       [6109.0, 6110.0, 6111.0, 6112.0],       [6113.0, 6114.0, 6115.0, 6116.0],       [6117.0, 6118.0, 6119.0, 6120.0]],      [[6121.0, 6122.0, 6123.0, 6124.0],       [6125.0, 6126.0, 6127.0, 6128.0],       [6129.0, 6130.0, 6131.0, 6132.0],       [6133.0, 6134.0, 6135.0, 6136.0],       [6137.0, 6138.0, 6139.0, 6140.0],       [6141.0, 6142.0, 6143.0, 6144.0]]]],    [[[[6145.0, 6146.0, 6147.0, 6148.0],       [6149.0, 6150.0, 6151.0, 6152.0],       [6153.0, 6154.0, 6155.0, 6156.0],       [6157.0, 6158.0, 6159.0, 6160.0],       [6161.0, 6162.0, 6163.0, 6164.0],       [6165.0, 6166.0, 6167.0, 6168.0]],      [[6169.0, 6170.0, 6171.0, 6172.0],       [6173.0, 6174.0, 6175.0, 6176.0],       [6177.0, 6178.0, 6179.0, 6180.0],       [6181.0, 6182.0, 6183.0, 6184.0],       [6185.0, 6186.0, 6187.0, 6188.0],       [6189.0, 6190.0, 6191.0, 6192.0]],      [[6193.0, 6194.0, 6195.0, 6196.0],       [6197.0, 6198.0, 6199.0, 6200.0],       [6201.0, 6202.0, 6203.0, 6204.0],       [6205.0, 6206.0, 6207.0, 6208.0],       [6209.0, 6210.0, 6211.0, 6212.0],       [6213.0, 6214.0, 6215.0, 6216.0]],      [[6217.0, 6218.0, 6219.0, 6220.0],       [6221.0, 6222.0, 6223.0, 6224.0],       [6225.0, 6226.0, 6227.0, 6228.0],       [6229.0, 6230.0, 6231.0, 6232.0],       [6233.0, 6234.0, 6235.0, 6236.0],       [6237.0, 6238.0, 6239.0, 6240.0]]],     [[[6241.0, 6242.0, 6243.0, 6244.0],       [6245.0, 6246.0, 6247.0, 6248.0],       [6249.0, 6250.0, 6251.0, 6252.0],       [6253.0, 6254.0, 6255.0, 6256.0],       [6257.0, 6258.0, 6259.0, 6260.0],       [6261.0, 6262.0, 6263.0, 6264.0]],      [[6265.0, 6266.0, 6267.0, 6268.0],       [6269.0, 6270.0, 6271.0, 6272.0],       [6273.0, 6274.0, 6275.0, 6276.0],       [6277.0, 6278.0, 6279.0, 6280.0],       [6281.0, 6282.0, 6283.0, 6284.0],       [6285.0, 6286.0, 6287.0, 6288.0]],      [[6289.0, 6290.0, 6291.0, 6292.0],       [6293.0, 6294.0, 6295.0, 6296.0],       [6297.0, 6298.0, 6299.0, 6300.0],       [6301.0, 6302.0, 6303.0, 6304.0],       [6305.0, 6306.0, 6307.0, 6308.0],       [6309.0, 6310.0, 6311.0, 6312.0]],      [[6313.0, 6314.0, 6315.0, 6316.0],       [6317.0, 6318.0, 6319.0, 6320.0],       [6321.0, 6322.0, 6323.0, 6324.0],       [6325.0, 6326.0, 6327.0, 6328.0],       [6329.0, 6330.0, 6331.0, 6332.0],       [6333.0, 6334.0, 6335.0, 6336.0]]]],    [[[[6337.0, 6338.0, 6339.0, 6340.0],       [6341.0, 6342.0, 6343.0, 6344.0],       [6345.0, 6346.0, 6347.0, 6348.0],       [6349.0, 6350.0, 6351.0, 6352.0],       [6353.0, 6354.0, 6355.0, 6356.0],       [6357.0, 6358.0, 6359.0, 6360.0]],      [[6361.0, 6362.0, 6363.0, 6364.0],       [6365.0, 6366.0, 6367.0, 6368.0],       [6369.0, 6370.0, 6371.0, 6372.0],       [6373.0, 6374.0, 6375.0, 6376.0],       [6377.0, 6378.0, 6379.0, 6380.0],       [6381.0, 6382.0, 6383.0, 6384.0]],      [[6385.0, 6386.0, 6387.0, 6388.0],       [6389.0, 6390.0, 6391.0, 6392.0],       [6393.0, 6394.0, 6395.0, 6396.0],       [6397.0, 6398.0, 6399.0, 6400.0],       [6401.0, 6402.0, 6403.0, 6404.0],       [6405.0, 6406.0, 6407.0, 6408.0]],      [[6409.0, 6410.0, 6411.0, 6412.0],       [6413.0, 6414.0, 6415.0, 6416.0],       [6417.0, 6418.0, 6419.0, 6420.0],       [6421.0, 6422.0, 6423.0, 6424.0],       [6425.0, 6426.0, 6427.0, 6428.0],       [6429.0, 6430.0, 6431.0, 6432.0]]],     [[[6433.0, 6434.0, 6435.0, 6436.0],       [6437.0, 6438.0, 6439.0, 6440.0],       [6441.0, 6442.0, 6443.0, 6444.0],       [6445.0, 6446.0, 6447.0, 6448.0],       [6449.0, 6450.0, 6451.0, 6452.0],       [6453.0, 6454.0, 6455.0, 6456.0]],      [[6457.0, 6458.0, 6459.0, 6460.0],       [6461.0, 6462.0, 6463.0, 6464.0],       [6465.0, 6466.0, 6467.0, 6468.0],       [6469.0, 6470.0, 6471.0, 6472.0],       [6473.0, 6474.0, 6475.0, 6476.0],       [6477.0, 6478.0, 6479.0, 6480.0]],      [[6481.0, 6482.0, 6483.0, 6484.0],       [6485.0, 6486.0, 6487.0, 6488.0],       [6489.0, 6490.0, 6491.0, 6492.0],       [6493.0, 6494.0, 6495.0, 6496.0],       [6497.0, 6498.0, 6499.0, 6500.0],       [6501.0, 6502.0, 6503.0, 6504.0]],      [[6505.0, 6506.0, 6507.0, 6508.0],       [6509.0, 6510.0, 6511.0, 6512.0],       [6513.0, 6514.0, 6515.0, 6516.0],       [6517.0, 6518.0, 6519.0, 6520.0],       [6521.0, 6522.0, 6523.0, 6524.0],       [6525.0, 6526.0, 6527.0, 6528.0]]]],    [[[[6529.0, 6530.0, 6531.0, 6532.0],       [6533.0, 6534.0, 6535.0, 6536.0],       [6537.0, 6538.0, 6539.0, 6540.0],       [6541.0, 6542.0, 6543.0, 6544.0],       [6545.0, 6546.0, 6547.0, 6548.0],       [6549.0, 6550.0, 6551.0, 6552.0]],      [[6553.0, 6554.0, 6555.0, 6556.0],       [6557.0, 6558.0, 6559.0, 6560.0],       [6561.0, 6562.0, 6563.0, 6564.0],       [6565.0, 6566.0, 6567.0, 6568.0],       [6569.0, 6570.0, 6571.0, 6572.0],       [6573.0, 6574.0, 6575.0, 6576.0]],      [[6577.0, 6578.0, 6579.0, 6580.0],       [6581.0, 6582.0, 6583.0, 6584.0],       [6585.0, 6586.0, 6587.0, 6588.0],       [6589.0, 6590.0, 6591.0, 6592.0],       [6593.0, 6594.0, 6595.0, 6596.0],       [6597.0, 6598.0, 6599.0, 6600.0]],      [[6601.0, 6602.0, 6603.0, 6604.0],       [6605.0, 6606.0, 6607.0, 6608.0],       [6609.0, 6610.0, 6611.0, 6612.0],       [6613.0, 6614.0, 6615.0, 6616.0],       [6617.0, 6618.0, 6619.0, 6620.0],       [6621.0, 6622.0, 6623.0, 6624.0]]],     [[[6625.0, 6626.0, 6627.0, 6628.0],       [6629.0, 6630.0, 6631.0, 6632.0],       [6633.0, 6634.0, 6635.0, 6636.0],       [6637.0, 6638.0, 6639.0, 6640.0],       [6641.0, 6642.0, 6643.0, 6644.0],       [6645.0, 6646.0, 6647.0, 6648.0]],      [[6649.0, 6650.0, 6651.0, 6652.0],       [6653.0, 6654.0, 6655.0, 6656.0],       [6657.0, 6658.0, 6659.0, 6660.0],       [6661.0, 6662.0, 6663.0, 6664.0],       [6665.0, 6666.0, 6667.0, 6668.0],       [6669.0, 6670.0, 6671.0, 6672.0]],      [[6673.0, 6674.0, 6675.0, 6676.0],       [6677.0, 6678.0, 6679.0, 6680.0],       [6681.0, 6682.0, 6683.0, 6684.0],       [6685.0, 6686.0, 6687.0, 6688.0],       [6689.0, 6690.0, 6691.0, 6692.0],       [6693.0, 6694.0, 6695.0, 6696.0]],      [[6697.0, 6698.0, 6699.0, 6700.0],       [6701.0, 6702.0, 6703.0, 6704.0],       [6705.0, 6706.0, 6707.0, 6708.0],       [6709.0, 6710.0, 6711.0, 6712.0],       [6713.0, 6714.0, 6715.0, 6716.0],       [6717.0, 6718.0, 6719.0, 6720.0]]]],    [[[[6721.0, 6722.0, 6723.0, 6724.0],       [6725.0, 6726.0, 6727.0, 6728.0],       [6729.0, 6730.0, 6731.0, 6732.0],       [6733.0, 6734.0, 6735.0, 6736.0],       [6737.0, 6738.0, 6739.0, 6740.0],       [6741.0, 6742.0, 6743.0, 6744.0]],      [[6745.0, 6746.0, 6747.0, 6748.0],       [6749.0, 6750.0, 6751.0, 6752.0],       [6753.0, 6754.0, 6755.0, 6756.0],       [6757.0, 6758.0, 6759.0, 6760.0],       [6761.0, 6762.0, 6763.0, 6764.0],       [6765.0, 6766.0, 6767.0, 6768.0]],      [[6769.0, 6770.0, 6771.0, 6772.0],       [6773.0, 6774.0, 6775.0, 6776.0],       [6777.0, 6778.0, 6779.0, 6780.0],       [6781.0, 6782.0, 6783.0, 6784.0],       [6785.0, 6786.0, 6787.0, 6788.0],       [6789.0, 6790.0, 6791.0, 6792.0]],      [[6793.0, 6794.0, 6795.0, 6796.0],       [6797.0, 6798.0, 6799.0, 6800.0],       [6801.0, 6802.0, 6803.0, 6804.0],       [6805.0, 6806.0, 6807.0, 6808.0],       [6809.0, 6810.0, 6811.0, 6812.0],       [6813.0, 6814.0, 6815.0, 6816.0]]],     [[[6817.0, 6818.0, 6819.0, 6820.0],       [6821.0, 6822.0, 6823.0, 6824.0],       [6825.0, 6826.0, 6827.0, 6828.0],       [6829.0, 6830.0, 6831.0, 6832.0],       [6833.0, 6834.0, 6835.0, 6836.0],       [6837.0, 6838.0, 6839.0, 6840.0]],      [[6841.0, 6842.0, 6843.0, 6844.0],       [6845.0, 6846.0, 6847.0, 6848.0],       [6849.0, 6850.0, 6851.0, 6852.0],       [6853.0, 6854.0, 6855.0, 6856.0],       [6857.0, 6858.0, 6859.0, 6860.0],       [6861.0, 6862.0, 6863.0, 6864.0]],      [[6865.0, 6866.0, 6867.0, 6868.0],       [6869.0, 6870.0, 6871.0, 6872.0],       [6873.0, 6874.0, 6875.0, 6876.0],       [6877.0, 6878.0, 6879.0, 6880.0],       [6881.0, 6882.0, 6883.0, 6884.0],       [6885.0, 6886.0, 6887.0, 6888.0]],      [[6889.0, 6890.0, 6891.0, 6892.0],       [6893.0, 6894.0, 6895.0, 6896.0],       [6897.0, 6898.0, 6899.0, 6900.0],       [6901.0, 6902.0, 6903.0, 6904.0],       [6905.0, 6906.0, 6907.0, 6908.0],       [6909.0, 6910.0, 6911.0, 6912.0]]]]],   [[[[[6913.0, 6914.0, 6915.0, 6916.0],       [6917.0, 6918.0, 6919.0, 6920.0],       [6921.0, 6922.0, 6923.0, 6924.0],       [6925.0, 6926.0, 6927.0, 6928.0],       [6929.0, 6930.0, 6931.0, 6932.0],       [6933.0, 6934.0, 6935.0, 6936.0]],      [[6937.0, 6938.0, 6939.0, 6940.0],       [6941.0, 6942.0, 6943.0, 6944.0],       [6945.0, 6946.0, 6947.0, 6948.0],       [6949.0, 6950.0, 6951.0, 6952.0],       [6953.0, 6954.0, 6955.0, 6956.0],       [6957.0, 6958.0, 6959.0, 6960.0]],      [[6961.0, 6962.0, 6963.0, 6964.0],       [6965.0, 6966.0, 6967.0, 6968.0],       [6969.0, 6970.0, 6971.0, 6972.0],       [6973.0, 6974.0, 6975.0, 6976.0],       [6977.0, 6978.0, 6979.0, 6980.0],       [6981.0, 6982.0, 6983.0, 6984.0]],      [[6985.0, 6986.0, 6987.0, 6988.0],       [6989.0, 6990.0, 6991.0, 6992.0],       [6993.0, 6994.0, 6995.0, 6996.0],       [6997.0, 6998.0, 6999.0, 7000.0],       [7001.0, 7002.0, 7003.0, 7004.0],       [7005.0, 7006.0, 7007.0, 7008.0]]],     [[[7009.0, 7010.0, 7011.0, 7012.0],       [7013.0, 7014.0, 7015.0, 7016.0],       [7017.0, 7018.0, 7019.0, 7020.0],       [7021.0, 7022.0, 7023.0, 7024.0],       [7025.0, 7026.0, 7027.0, 7028.0],       [7029.0, 7030.0, 7031.0, 7032.0]],      [[7033.0, 7034.0, 7035.0, 7036.0],       [7037.0, 7038.0, 7039.0, 7040.0],       [7041.0, 7042.0, 7043.0, 7044.0],       [7045.0, 7046.0, 7047.0, 7048.0],       [7049.0, 7050.0, 7051.0, 7052.0],       [7053.0, 7054.0, 7055.0, 7056.0]],      [[7057.0, 7058.0, 7059.0, 7060.0],       [7061.0, 7062.0, 7063.0, 7064.0],       [7065.0, 7066.0, 7067.0, 7068.0],       [7069.0, 7070.0, 7071.0, 7072.0],       [7073.0, 7074.0, 7075.0, 7076.0],       [7077.0, 7078.0, 7079.0, 7080.0]],      [[7081.0, 7082.0, 7083.0, 7084.0],       [7085.0, 7086.0, 7087.0, 7088.0],       [7089.0, 7090.0, 7091.0, 7092.0],       [7093.0, 7094.0, 7095.0, 7096.0],       [7097.0, 7098.0, 7099.0, 7100.0],       [7101.0, 7102.0, 7103.0, 7104.0]]]],    [[[[7105.0, 7106.0, 7107.0, 7108.0],       [7109.0, 7110.0, 7111.0, 7112.0],       [7113.0, 7114.0, 7115.0, 7116.0],       [7117.0, 7118.0, 7119.0, 7120.0],       [7121.0, 7122.0, 7123.0, 7124.0],       [7125.0, 7126.0, 7127.0, 7128.0]],      [[7129.0, 7130.0, 7131.0, 7132.0],       [7133.0, 7134.0, 7135.0, 7136.0],       [7137.0, 7138.0, 7139.0, 7140.0],       [7141.0, 7142.0, 7143.0, 7144.0],       [7145.0, 7146.0, 7147.0, 7148.0],       [7149.0, 7150.0, 7151.0, 7152.0]],      [[7153.0, 7154.0, 7155.0, 7156.0],       [7157.0, 7158.0, 7159.0, 7160.0],       [7161.0, 7162.0, 7163.0, 7164.0],       [7165.0, 7166.0, 7167.0, 7168.0],       [7169.0, 7170.0, 7171.0, 7172.0],       [7173.0, 7174.0, 7175.0, 7176.0]],      [[7177.0, 7178.0, 7179.0, 7180.0],       [7181.0, 7182.0, 7183.0, 7184.0],       [7185.0, 7186.0, 7187.0, 7188.0],       [7189.0, 7190.0, 7191.0, 7192.0],       [7193.0, 7194.0, 7195.0, 7196.0],       [7197.0, 7198.0, 7199.0, 7200.0]]],     [[[7201.0, 7202.0, 7203.0, 7204.0],       [7205.0, 7206.0, 7207.0, 7208.0],       [7209.0, 7210.0, 7211.0, 7212.0],       [7213.0, 7214.0, 7215.0, 7216.0],       [7217.0, 7218.0, 7219.0, 7220.0],       [7221.0, 7222.0, 7223.0, 7224.0]],      [[7225.0, 7226.0, 7227.0, 7228.0],       [7229.0, 7230.0, 7231.0, 7232.0],       [7233.0, 7234.0, 7235.0, 7236.0],       [7237.0, 7238.0, 7239.0, 7240.0],       [7241.0, 7242.0, 7243.0, 7244.0],       [7245.0, 7246.0, 7247.0, 7248.0]],      [[7249.0, 7250.0, 7251.0, 7252.0],       [7253.0, 7254.0, 7255.0, 7256.0],       [7257.0, 7258.0, 7259.0, 7260.0],       [7261.0, 7262.0, 7263.0, 7264.0],       [7265.0, 7266.0, 7267.0, 7268.0],       [7269.0, 7270.0, 7271.0, 7272.0]],      [[7273.0, 7274.0, 7275.0, 7276.0],       [7277.0, 7278.0, 7279.0, 7280.0],       [7281.0, 7282.0, 7283.0, 7284.0],       [7285.0, 7286.0, 7287.0, 7288.0],       [7289.0, 7290.0, 7291.0, 7292.0],       [7293.0, 7294.0, 7295.0, 7296.0]]]],    [[[[7297.0, 7298.0, 7299.0, 7300.0],       [7301.0, 7302.0, 7303.0, 7304.0],       [7305.0, 7306.0, 7307.0, 7308.0],       [7309.0, 7310.0, 7311.0, 7312.0],       [7313.0, 7314.0, 7315.0, 7316.0],       [7317.0, 7318.0, 7319.0, 7320.0]],      [[7321.0, 7322.0, 7323.0, 7324.0],       [7325.0, 7326.0, 7327.0, 7328.0],       [7329.0, 7330.0, 7331.0, 7332.0],       [7333.0, 7334.0, 7335.0, 7336.0],       [7337.0, 7338.0, 7339.0, 7340.0],       [7341.0, 7342.0, 7343.0, 7344.0]],      [[7345.0, 7346.0, 7347.0, 7348.0],       [7349.0, 7350.0, 7351.0, 7352.0],       [7353.0, 7354.0, 7355.0, 7356.0],       [7357.0, 7358.0, 7359.0, 7360.0],       [7361.0, 7362.0, 7363.0, 7364.0],       [7365.0, 7366.0, 7367.0, 7368.0]],      [[7369.0, 7370.0, 7371.0, 7372.0],       [7373.0, 7374.0, 7375.0, 7376.0],       [7377.0, 7378.0, 7379.0, 7380.0],       [7381.0, 7382.0, 7383.0, 7384.0],       [7385.0, 7386.0, 7387.0, 7388.0],       [7389.0, 7390.0, 7391.0, 7392.0]]],     [[[7393.0, 7394.0, 7395.0, 7396.0],       [7397.0, 7398.0, 7399.0, 7400.0],       [7401.0, 7402.0, 7403.0, 7404.0],       [7405.0, 7406.0, 7407.0, 7408.0],       [7409.0, 7410.0, 7411.0, 7412.0],       [7413.0, 7414.0, 7415.0, 7416.0]],      [[7417.0, 7418.0, 7419.0, 7420.0],       [7421.0, 7422.0, 7423.0, 7424.0],       [7425.0, 7426.0, 7427.0, 7428.0],       [7429.0, 7430.0, 7431.0, 7432.0],       [7433.0, 7434.0, 7435.0, 7436.0],       [7437.0, 7438.0, 7439.0, 7440.0]],      [[7441.0, 7442.0, 7443.0, 7444.0],       [7445.0, 7446.0, 7447.0, 7448.0],       [7449.0, 7450.0, 7451.0, 7452.0],       [7453.0, 7454.0, 7455.0, 7456.0],       [7457.0, 7458.0, 7459.0, 7460.0],       [7461.0, 7462.0, 7463.0, 7464.0]],      [[7465.0, 7466.0, 7467.0, 7468.0],       [7469.0, 7470.0, 7471.0, 7472.0],       [7473.0, 7474.0, 7475.0, 7476.0],       [7477.0, 7478.0, 7479.0, 7480.0],       [7481.0, 7482.0, 7483.0, 7484.0],       [7485.0, 7486.0, 7487.0, 7488.0]]]],    [[[[7489.0, 7490.0, 7491.0, 7492.0],       [7493.0, 7494.0, 7495.0, 7496.0],       [7497.0, 7498.0, 7499.0, 7500.0],       [7501.0, 7502.0, 7503.0, 7504.0],       [7505.0, 7506.0, 7507.0, 7508.0],       [7509.0, 7510.0, 7511.0, 7512.0]],      [[7513.0, 7514.0, 7515.0, 7516.0],       [7517.0, 7518.0, 7519.0, 7520.0],       [7521.0, 7522.0, 7523.0, 7524.0],       [7525.0, 7526.0, 7527.0, 7528.0],       [7529.0, 7530.0, 7531.0, 7532.0],       [7533.0, 7534.0, 7535.0, 7536.0]],      [[7537.0, 7538.0, 7539.0, 7540.0],       [7541.0, 7542.0, 7543.0, 7544.0],       [7545.0, 7546.0, 7547.0, 7548.0],       [7549.0, 7550.0, 7551.0, 7552.0],       [7553.0, 7554.0, 7555.0, 7556.0],       [7557.0, 7558.0, 7559.0, 7560.0]],      [[7561.0, 7562.0, 7563.0, 7564.0],       [7565.0, 7566.0, 7567.0, 7568.0],       [7569.0, 7570.0, 7571.0, 7572.0],       [7573.0, 7574.0, 7575.0, 7576.0],       [7577.0, 7578.0, 7579.0, 7580.0],       [7581.0, 7582.0, 7583.0, 7584.0]]],     [[[7585.0, 7586.0, 7587.0, 7588.0],       [7589.0, 7590.0, 7591.0, 7592.0],       [7593.0, 7594.0, 7595.0, 7596.0],       [7597.0, 7598.0, 7599.0, 7600.0],       [7601.0, 7602.0, 7603.0, 7604.0],       [7605.0, 7606.0, 7607.0, 7608.0]],      [[7609.0, 7610.0, 7611.0, 7612.0],       [7613.0, 7614.0, 7615.0, 7616.0],       [7617.0, 7618.0, 7619.0, 7620.0],       [7621.0, 7622.0, 7623.0, 7624.0],       [7625.0, 7626.0, 7627.0, 7628.0],       [7629.0, 7630.0, 7631.0, 7632.0]],      [[7633.0, 7634.0, 7635.0, 7636.0],       [7637.0, 7638.0, 7639.0, 7640.0],       [7641.0, 7642.0, 7643.0, 7644.0],       [7645.0, 7646.0, 7647.0, 7648.0],       [7649.0, 7650.0, 7651.0, 7652.0],       [7653.0, 7654.0, 7655.0, 7656.0]],      [[7657.0, 7658.0, 7659.0, 7660.0],       [7661.0, 7662.0, 7663.0, 7664.0],       [7665.0, 7666.0, 7667.0, 7668.0],       [7669.0, 7670.0, 7671.0, 7672.0],       [7673.0, 7674.0, 7675.0, 7676.0],       [7677.0, 7678.0, 7679.0, 7680.0]]]],    [[[[7681.0, 7682.0, 7683.0, 7684.0],       [7685.0, 7686.0, 7687.0, 7688.0],       [7689.0, 7690.0, 7691.0, 7692.0],       [7693.0, 7694.0, 7695.0, 7696.0],       [7697.0, 7698.0, 7699.0, 7700.0],       [7701.0, 7702.0, 7703.0, 7704.0]],      [[7705.0, 7706.0, 7707.0, 7708.0],       [7709.0, 7710.0, 7711.0, 7712.0],       [7713.0, 7714.0, 7715.0, 7716.0],       [7717.0, 7718.0, 7719.0, 7720.0],       [7721.0, 7722.0, 7723.0, 7724.0],       [7725.0, 7726.0, 7727.0, 7728.0]],      [[7729.0, 7730.0, 7731.0, 7732.0],       [7733.0, 7734.0, 7735.0, 7736.0],       [7737.0, 7738.0, 7739.0, 7740.0],       [7741.0, 7742.0, 7743.0, 7744.0],       [7745.0, 7746.0, 7747.0, 7748.0],       [7749.0, 7750.0, 7751.0, 7752.0]],      [[7753.0, 7754.0, 7755.0, 7756.0],       [7757.0, 7758.0, 7759.0, 7760.0],       [7761.0, 7762.0, 7763.0, 7764.0],       [7765.0, 7766.0, 7767.0, 7768.0],       [7769.0, 7770.0, 7771.0, 7772.0],       [7773.0, 7774.0, 7775.0, 7776.0]]],     [[[7777.0, 7778.0, 7779.0, 7780.0],       [7781.0, 7782.0, 7783.0, 7784.0],       [7785.0, 7786.0, 7787.0, 7788.0],       [7789.0, 7790.0, 7791.0, 7792.0],       [7793.0, 7794.0, 7795.0, 7796.0],       [7797.0, 7798.0, 7799.0, 7800.0]],      [[7801.0, 7802.0, 7803.0, 7804.0],       [7805.0, 7806.0, 7807.0, 7808.0],       [7809.0, 7810.0, 7811.0, 7812.0],       [7813.0, 7814.0, 7815.0, 7816.0],       [7817.0, 7818.0, 7819.0, 7820.0],       [7821.0, 7822.0, 7823.0, 7824.0]],      [[7825.0, 7826.0, 7827.0, 7828.0],       [7829.0, 7830.0, 7831.0, 7832.0],       [7833.0, 7834.0, 7835.0, 7836.0],       [7837.0, 7838.0, 7839.0, 7840.0],       [7841.0, 7842.0, 7843.0, 7844.0],       [7845.0, 7846.0, 7847.0, 7848.0]],      [[7849.0, 7850.0, 7851.0, 7852.0],       [7853.0, 7854.0, 7855.0, 7856.0],       [7857.0, 7858.0, 7859.0, 7860.0],       [7861.0, 7862.0, 7863.0, 7864.0],       [7865.0, 7866.0, 7867.0, 7868.0],       [7869.0, 7870.0, 7871.0, 7872.0]]]],    [[[[7873.0, 7874.0, 7875.0, 7876.0],       [7877.0, 7878.0, 7879.0, 7880.0],       [7881.0, 7882.0, 7883.0, 7884.0],       [7885.0, 7886.0, 7887.0, 7888.0],       [7889.0, 7890.0, 7891.0, 7892.0],       [7893.0, 7894.0, 7895.0, 7896.0]],      [[7897.0, 7898.0, 7899.0, 7900.0],       [7901.0, 7902.0, 7903.0, 7904.0],       [7905.0, 7906.0, 7907.0, 7908.0],       [7909.0, 7910.0, 7911.0, 7912.0],       [7913.0, 7914.0, 7915.0, 7916.0],       [7917.0, 7918.0, 7919.0, 7920.0]],      [[7921.0, 7922.0, 7923.0, 7924.0],       [7925.0, 7926.0, 7927.0, 7928.0],       [7929.0, 7930.0, 7931.0, 7932.0],       [7933.0, 7934.0, 7935.0, 7936.0],       [7937.0, 7938.0, 7939.0, 7940.0],       [7941.0, 7942.0, 7943.0, 7944.0]],      [[7945.0, 7946.0, 7947.0, 7948.0],       [7949.0, 7950.0, 7951.0, 7952.0],       [7953.0, 7954.0, 7955.0, 7956.0],       [7957.0, 7958.0, 7959.0, 7960.0],       [7961.0, 7962.0, 7963.0, 7964.0],       [7965.0, 7966.0, 7967.0, 7968.0]]],     [[[7969.0, 7970.0, 7971.0, 7972.0],       [7973.0, 7974.0, 7975.0, 7976.0],       [7977.0, 7978.0, 7979.0, 7980.0],       [7981.0, 7982.0, 7983.0, 7984.0],       [7985.0, 7986.0, 7987.0, 7988.0],       [7989.0, 7990.0, 7991.0, 7992.0]],      [[7993.0, 7994.0, 7995.0, 7996.0],       [7997.0, 7998.0, 7999.0, 8000.0],       [8001.0, 8002.0, 8003.0, 8004.0],       [8005.0, 8006.0, 8007.0, 8008.0],       [8009.0, 8010.0, 8011.0, 8012.0],       [8013.0, 8014.0, 8015.0, 8016.0]],      [[8017.0, 8018.0, 8019.0, 8020.0],       [8021.0, 8022.0, 8023.0, 8024.0],       [8025.0, 8026.0, 8027.0, 8028.0],       [8029.0, 8030.0, 8031.0, 8032.0],       [8033.0, 8034.0, 8035.0, 8036.0],       [8037.0, 8038.0, 8039.0, 8040.0]],      [[8041.0, 8042.0, 8043.0, 8044.0],       [8045.0, 8046.0, 8047.0, 8048.0],       [8049.0, 8050.0, 8051.0, 8052.0],       [8053.0, 8054.0, 8055.0, 8056.0],       [8057.0, 8058.0, 8059.0, 8060.0],       [8061.0, 8062.0, 8063.0, 8064.0]]]]]]] shape=[1, 7, 6, 2, 4, 6, 4], strides=[8064, 1152, 192, 96, 24, 4, 1], layout=C (0x1)), I32([1, 2, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [0, 2],  [3, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 538309877 2542581499 2686813102 1899512611 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0, 3.0],       [4.0, 5.0, 6.0],       [7.0, 8.0, 9.0],       [10.0, 11.0, 12.0],       [13.0, 14.0, 15.0],       [16.0, 17.0, 18.0],       [19.0, 20.0, 21.0]]],     [[[22.0, 23.0, 24.0],       [25.0, 26.0, 27.0],       [28.0, 29.0, 30.0],       [31.0, 32.0, 33.0],       [34.0, 35.0, 36.0],       [37.0, 38.0, 39.0],       [40.0, 41.0, 42.0]]],     [[[43.0, 44.0, 45.0],       [46.0, 47.0, 48.0],       [49.0, 50.0, 51.0],       [52.0, 53.0, 54.0],       [55.0, 56.0, 57.0],       [58.0, 59.0, 60.0],       [61.0, 62.0, 63.0]]],     [[[64.0, 65.0, 66.0],       [67.0, 68.0, 69.0],       [70.0, 71.0, 72.0],       [73.0, 74.0, 75.0],       [76.0, 77.0, 78.0],       [79.0, 80.0, 81.0],       [82.0, 83.0, 84.0]]]],    [[[[85.0, 86.0, 87.0],       [88.0, 89.0, 90.0],       [91.0, 92.0, 93.0],       [94.0, 95.0, 96.0],       [97.0, 98.0, 99.0],       [100.0, 101.0, 102.0],       [103.0, 104.0, 105.0]]],     [[[106.0, 107.0, 108.0],       [109.0, 110.0, 111.0],       [112.0, 113.0, 114.0],       [115.0, 116.0, 117.0],       [118.0, 119.0, 120.0],       [121.0, 122.0, 123.0],       [124.0, 125.0, 126.0]]],     [[[127.0, 128.0, 129.0],       [130.0, 131.0, 132.0],       [133.0, 134.0, 135.0],       [136.0, 137.0, 138.0],       [139.0, 140.0, 141.0],       [142.0, 143.0, 144.0],       [145.0, 146.0, 147.0]]],     [[[148.0, 149.0, 150.0],       [151.0, 152.0, 153.0],       [154.0, 155.0, 156.0],       [157.0, 158.0, 159.0],       [160.0, 161.0, 162.0],       [163.0, 164.0, 165.0],       [166.0, 167.0, 168.0]]]],    [[[[169.0, 170.0, 171.0],       [172.0, 173.0, 174.0],       [175.0, 176.0, 177.0],       [178.0, 179.0, 180.0],       [181.0, 182.0, 183.0],       [184.0, 185.0, 186.0],       [187.0, 188.0, 189.0]]],     [[[190.0, 191.0, 192.0],       [193.0, 194.0, 195.0],       [196.0, 197.0, 198.0],       [199.0, 200.0, 201.0],       [202.0, 203.0, 204.0],       [205.0, 206.0, 207.0],       [208.0, 209.0, 210.0]]],     [[[211.0, 212.0, 213.0],       [214.0, 215.0, 216.0],       [217.0, 218.0, 219.0],       [220.0, 221.0, 222.0],       [223.0, 224.0, 225.0],       [226.0, 227.0, 228.0],       [229.0, 230.0, 231.0]]],     [[[232.0, 233.0, 234.0],       [235.0, 236.0, 237.0],       [238.0, 239.0, 240.0],       [241.0, 242.0, 243.0],       [244.0, 245.0, 246.0],       [247.0, 248.0, 249.0],       [250.0, 251.0, 252.0]]]],    [[[[253.0, 254.0, 255.0],       [256.0, 257.0, 258.0],       [259.0, 260.0, 261.0],       [262.0, 263.0, 264.0],       [265.0, 266.0, 267.0],       [268.0, 269.0, 270.0],       [271.0, 272.0, 273.0]]],     [[[274.0, 275.0, 276.0],       [277.0, 278.0, 279.0],       [280.0, 281.0, 282.0],       [283.0, 284.0, 285.0],       [286.0, 287.0, 288.0],       [289.0, 290.0, 291.0],       [292.0, 293.0, 294.0]]],     [[[295.0, 296.0, 297.0],       [298.0, 299.0, 300.0],       [301.0, 302.0, 303.0],       [304.0, 305.0, 306.0],       [307.0, 308.0, 309.0],       [310.0, 311.0, 312.0],       [313.0, 314.0, 315.0]]],     [[[316.0, 317.0, 318.0],       [319.0, 320.0, 321.0],       [322.0, 323.0, 324.0],       [325.0, 326.0, 327.0],       [328.0, 329.0, 330.0],       [331.0, 332.0, 333.0],       [334.0, 335.0, 336.0]]]],    [[[[337.0, 338.0, 339.0],       [340.0, 341.0, 342.0],       [343.0, 344.0, 345.0],       [346.0, 347.0, 348.0],       [349.0, 350.0, 351.0],       [352.0, 353.0, 354.0],       [355.0, 356.0, 357.0]]],     [[[358.0, 359.0, 360.0],       [361.0, 362.0, 363.0],       [364.0, 365.0, 366.0],       [367.0, 368.0, 369.0],       [370.0, 371.0, 372.0],       [373.0, 374.0, 375.0],       [376.0, 377.0, 378.0]]],     [[[379.0, 380.0, 381.0],       [382.0, 383.0, 384.0],       [385.0, 386.0, 387.0],       [388.0, 389.0, 390.0],       [391.0, 392.0, 393.0],       [394.0, 395.0, 396.0],       [397.0, 398.0, 399.0]]],     [[[400.0, 401.0, 402.0],       [403.0, 404.0, 405.0],       [406.0, 407.0, 408.0],       [409.0, 410.0, 411.0],       [412.0, 413.0, 414.0],       [415.0, 416.0, 417.0],       [418.0, 419.0, 420.0]]]]],   [[[[[421.0, 422.0, 423.0],       [424.0, 425.0, 426.0],       [427.0, 428.0, 429.0],       [430.0, 431.0, 432.0],       [433.0, 434.0, 435.0],       [436.0, 437.0, 438.0],       [439.0, 440.0, 441.0]]],     [[[442.0, 443.0, 444.0],       [445.0, 446.0, 447.0],       [448.0, 449.0, 450.0],       [451.0, 452.0, 453.0],       [454.0, 455.0, 456.0],       [457.0, 458.0, 459.0],       [460.0, 461.0, 462.0]]],     [[[463.0, 464.0, 465.0],       [466.0, 467.0, 468.0],       [469.0, 470.0, 471.0],       [472.0, 473.0, 474.0],       [475.0, 476.0, 477.0],       [478.0, 479.0, 480.0],       [481.0, 482.0, 483.0]]],     [[[484.0, 485.0, 486.0],       [487.0, 488.0, 489.0],       [490.0, 491.0, 492.0],       [493.0, 494.0, 495.0],       [496.0, 497.0, 498.0],       [499.0, 500.0, 501.0],       [502.0, 503.0, 504.0]]]],    [[[[505.0, 506.0, 507.0],       [508.0, 509.0, 510.0],       [511.0, 512.0, 513.0],       [514.0, 515.0, 516.0],       [517.0, 518.0, 519.0],       [520.0, 521.0, 522.0],       [523.0, 524.0, 525.0]]],     [[[526.0, 527.0, 528.0],       [529.0, 530.0, 531.0],       [532.0, 533.0, 534.0],       [535.0, 536.0, 537.0],       [538.0, 539.0, 540.0],       [541.0, 542.0, 543.0],       [544.0, 545.0, 546.0]]],     [[[547.0, 548.0, 549.0],       [550.0, 551.0, 552.0],       [553.0, 554.0, 555.0],       [556.0, 557.0, 558.0],       [559.0, 560.0, 561.0],       [562.0, 563.0, 564.0],       [565.0, 566.0, 567.0]]],     [[[568.0, 569.0, 570.0],       [571.0, 572.0, 573.0],       [574.0, 575.0, 576.0],       [577.0, 578.0, 579.0],       [580.0, 581.0, 582.0],       [583.0, 584.0, 585.0],       [586.0, 587.0, 588.0]]]],    [[[[589.0, 590.0, 591.0],       [592.0, 593.0, 594.0],       [595.0, 596.0, 597.0],       [598.0, 599.0, 600.0],       [601.0, 602.0, 603.0],       [604.0, 605.0, 606.0],       [607.0, 608.0, 609.0]]],     [[[610.0, 611.0, 612.0],       [613.0, 614.0, 615.0],       [616.0, 617.0, 618.0],       [619.0, 620.0, 621.0],       [622.0, 623.0, 624.0],       [625.0, 626.0, 627.0],       [628.0, 629.0, 630.0]]],     [[[631.0, 632.0, 633.0],       [634.0, 635.0, 636.0],       [637.0, 638.0, 639.0],       [640.0, 641.0, 642.0],       [643.0, 644.0, 645.0],       [646.0, 647.0, 648.0],       [649.0, 650.0, 651.0]]],     [[[652.0, 653.0, 654.0],       [655.0, 656.0, 657.0],       [658.0, 659.0, 660.0],       [661.0, 662.0, 663.0],       [664.0, 665.0, 666.0],       [667.0, 668.0, 669.0],       [670.0, 671.0, 672.0]]]],    [[[[673.0, 674.0, 675.0],       [676.0, 677.0, 678.0],       [679.0, 680.0, 681.0],       [682.0, 683.0, 684.0],       [685.0, 686.0, 687.0],       [688.0, 689.0, 690.0],       [691.0, 692.0, 693.0]]],     [[[694.0, 695.0, 696.0],       [697.0, 698.0, 699.0],       [700.0, 701.0, 702.0],       [703.0, 704.0, 705.0],       [706.0, 707.0, 708.0],       [709.0, 710.0, 711.0],       [712.0, 713.0, 714.0]]],     [[[715.0, 716.0, 717.0],       [718.0, 719.0, 720.0],       [721.0, 722.0, 723.0],       [724.0, 725.0, 726.0],       [727.0, 728.0, 729.0],       [730.0, 731.0, 732.0],       [733.0, 734.0, 735.0]]],     [[[736.0, 737.0, 738.0],       [739.0, 740.0, 741.0],       [742.0, 743.0, 744.0],       [745.0, 746.0, 747.0],       [748.0, 749.0, 750.0],       [751.0, 752.0, 753.0],       [754.0, 755.0, 756.0]]]],    [[[[757.0, 758.0, 759.0],       [760.0, 761.0, 762.0],       [763.0, 764.0, 765.0],       [766.0, 767.0, 768.0],       [769.0, 770.0, 771.0],       [772.0, 773.0, 774.0],       [775.0, 776.0, 777.0]]],     [[[778.0, 779.0, 780.0],       [781.0, 782.0, 783.0],       [784.0, 785.0, 786.0],       [787.0, 788.0, 789.0],       [790.0, 791.0, 792.0],       [793.0, 794.0, 795.0],       [796.0, 797.0, 798.0]]],     [[[799.0, 800.0, 801.0],       [802.0, 803.0, 804.0],       [805.0, 806.0, 807.0],       [808.0, 809.0, 810.0],       [811.0, 812.0, 813.0],       [814.0, 815.0, 816.0],       [817.0, 818.0, 819.0]]],     [[[820.0, 821.0, 822.0],       [823.0, 824.0, 825.0],       [826.0, 827.0, 828.0],       [829.0, 830.0, 831.0],       [832.0, 833.0, 834.0],       [835.0, 836.0, 837.0],       [838.0, 839.0, 840.0]]]]],   [[[[[841.0, 842.0, 843.0],       [844.0, 845.0, 846.0],       [847.0, 848.0, 849.0],       [850.0, 851.0, 852.0],       [853.0, 854.0, 855.0],       [856.0, 857.0, 858.0],       [859.0, 860.0, 861.0]]],     [[[862.0, 863.0, 864.0],       [865.0, 866.0, 867.0],       [868.0, 869.0, 870.0],       [871.0, 872.0, 873.0],       [874.0, 875.0, 876.0],       [877.0, 878.0, 879.0],       [880.0, 881.0, 882.0]]],     [[[883.0, 884.0, 885.0],       [886.0, 887.0, 888.0],       [889.0, 890.0, 891.0],       [892.0, 893.0, 894.0],       [895.0, 896.0, 897.0],       [898.0, 899.0, 900.0],       [901.0, 902.0, 903.0]]],     [[[904.0, 905.0, 906.0],       [907.0, 908.0, 909.0],       [910.0, 911.0, 912.0],       [913.0, 914.0, 915.0],       [916.0, 917.0, 918.0],       [919.0, 920.0, 921.0],       [922.0, 923.0, 924.0]]]],    [[[[925.0, 926.0, 927.0],       [928.0, 929.0, 930.0],       [931.0, 932.0, 933.0],       [934.0, 935.0, 936.0],       [937.0, 938.0, 939.0],       [940.0, 941.0, 942.0],       [943.0, 944.0, 945.0]]],     [[[946.0, 947.0, 948.0],       [949.0, 950.0, 951.0],       [952.0, 953.0, 954.0],       [955.0, 956.0, 957.0],       [958.0, 959.0, 960.0],       [961.0, 962.0, 963.0],       [964.0, 965.0, 966.0]]],     [[[967.0, 968.0, 969.0],       [970.0, 971.0, 972.0],       [973.0, 974.0, 975.0],       [976.0, 977.0, 978.0],       [979.0, 980.0, 981.0],       [982.0, 983.0, 984.0],       [985.0, 986.0, 987.0]]],     [[[988.0, 989.0, 990.0],       [991.0, 992.0, 993.0],       [994.0, 995.0, 996.0],       [997.0, 998.0, 999.0],       [1000.0, 1001.0, 1002.0],       [1003.0, 1004.0, 1005.0],       [1006.0, 1007.0, 1008.0]]]],    [[[[1009.0, 1010.0, 1011.0],       [1012.0, 1013.0, 1014.0],       [1015.0, 1016.0, 1017.0],       [1018.0, 1019.0, 1020.0],       [1021.0, 1022.0, 1023.0],       [1024.0, 1025.0, 1026.0],       [1027.0, 1028.0, 1029.0]]],     [[[1030.0, 1031.0, 1032.0],       [1033.0, 1034.0, 1035.0],       [1036.0, 1037.0, 1038.0],       [1039.0, 1040.0, 1041.0],       [1042.0, 1043.0, 1044.0],       [1045.0, 1046.0, 1047.0],       [1048.0, 1049.0, 1050.0]]],     [[[1051.0, 1052.0, 1053.0],       [1054.0, 1055.0, 1056.0],       [1057.0, 1058.0, 1059.0],       [1060.0, 1061.0, 1062.0],       [1063.0, 1064.0, 1065.0],       [1066.0, 1067.0, 1068.0],       [1069.0, 1070.0, 1071.0]]],     [[[1072.0, 1073.0, 1074.0],       [1075.0, 1076.0, 1077.0],       [1078.0, 1079.0, 1080.0],       [1081.0, 1082.0, 1083.0],       [1084.0, 1085.0, 1086.0],       [1087.0, 1088.0, 1089.0],       [1090.0, 1091.0, 1092.0]]]],    [[[[1093.0, 1094.0, 1095.0],       [1096.0, 1097.0, 1098.0],       [1099.0, 1100.0, 1101.0],       [1102.0, 1103.0, 1104.0],       [1105.0, 1106.0, 1107.0],       [1108.0, 1109.0, 1110.0],       [1111.0, 1112.0, 1113.0]]],     [[[1114.0, 1115.0, 1116.0],       [1117.0, 1118.0, 1119.0],       [1120.0, 1121.0, 1122.0],       [1123.0, 1124.0, 1125.0],       [1126.0, 1127.0, 1128.0],       [1129.0, 1130.0, 1131.0],       [1132.0, 1133.0, 1134.0]]],     [[[1135.0, 1136.0, 1137.0],       [1138.0, 1139.0, 1140.0],       [1141.0, 1142.0, 1143.0],       [1144.0, 1145.0, 1146.0],       [1147.0, 1148.0, 1149.0],       [1150.0, 1151.0, 1152.0],       [1153.0, 1154.0, 1155.0]]],     [[[1156.0, 1157.0, 1158.0],       [1159.0, 1160.0, 1161.0],       [1162.0, 1163.0, 1164.0],       [1165.0, 1166.0, 1167.0],       [1168.0, 1169.0, 1170.0],       [1171.0, 1172.0, 1173.0],       [1174.0, 1175.0, 1176.0]]]],    [[[[1177.0, 1178.0, 1179.0],       [1180.0, 1181.0, 1182.0],       [1183.0, 1184.0, 1185.0],       [1186.0, 1187.0, 1188.0],       [1189.0, 1190.0, 1191.0],       [1192.0, 1193.0, 1194.0],       [1195.0, 1196.0, 1197.0]]],     [[[1198.0, 1199.0, 1200.0],       [1201.0, 1202.0, 1203.0],       [1204.0, 1205.0, 1206.0],       [1207.0, 1208.0, 1209.0],       [1210.0, 1211.0, 1212.0],       [1213.0, 1214.0, 1215.0],       [1216.0, 1217.0, 1218.0]]],     [[[1219.0, 1220.0, 1221.0],       [1222.0, 1223.0, 1224.0],       [1225.0, 1226.0, 1227.0],       [1228.0, 1229.0, 1230.0],       [1231.0, 1232.0, 1233.0],       [1234.0, 1235.0, 1236.0],       [1237.0, 1238.0, 1239.0]]],     [[[1240.0, 1241.0, 1242.0],       [1243.0, 1244.0, 1245.0],       [1246.0, 1247.0, 1248.0],       [1249.0, 1250.0, 1251.0],       [1252.0, 1253.0, 1254.0],       [1255.0, 1256.0, 1257.0],       [1258.0, 1259.0, 1260.0]]]]]],  [[[[[[1261.0, 1262.0, 1263.0],       [1264.0, 1265.0, 1266.0],       [1267.0, 1268.0, 1269.0],       [1270.0, 1271.0, 1272.0],       [1273.0, 1274.0, 1275.0],       [1276.0, 1277.0, 1278.0],       [1279.0, 1280.0, 1281.0]]],     [[[1282.0, 1283.0, 1284.0],       [1285.0, 1286.0, 1287.0],       [1288.0, 1289.0, 1290.0],       [1291.0, 1292.0, 1293.0],       [1294.0, 1295.0, 1296.0],       [1297.0, 1298.0, 1299.0],       [1300.0, 1301.0, 1302.0]]],     [[[1303.0, 1304.0, 1305.0],       [1306.0, 1307.0, 1308.0],       [1309.0, 1310.0, 1311.0],       [1312.0, 1313.0, 1314.0],       [1315.0, 1316.0, 1317.0],       [1318.0, 1319.0, 1320.0],       [1321.0, 1322.0, 1323.0]]],     [[[1324.0, 1325.0, 1326.0],       [1327.0, 1328.0, 1329.0],       [1330.0, 1331.0, 1332.0],       [1333.0, 1334.0, 1335.0],       [1336.0, 1337.0, 1338.0],       [1339.0, 1340.0, 1341.0],       [1342.0, 1343.0, 1344.0]]]],    [[[[1345.0, 1346.0, 1347.0],       [1348.0, 1349.0, 1350.0],       [1351.0, 1352.0, 1353.0],       [1354.0, 1355.0, 1356.0],       [1357.0, 1358.0, 1359.0],       [1360.0, 1361.0, 1362.0],       [1363.0, 1364.0, 1365.0]]],     [[[1366.0, 1367.0, 1368.0],       [1369.0, 1370.0, 1371.0],       [1372.0, 1373.0, 1374.0],       [1375.0, 1376.0, 1377.0],       [1378.0, 1379.0, 1380.0],       [1381.0, 1382.0, 1383.0],       [1384.0, 1385.0, 1386.0]]],     [[[1387.0, 1388.0, 1389.0],       [1390.0, 1391.0, 1392.0],       [1393.0, 1394.0, 1395.0],       [1396.0, 1397.0, 1398.0],       [1399.0, 1400.0, 1401.0],       [1402.0, 1403.0, 1404.0],       [1405.0, 1406.0, 1407.0]]],     [[[1408.0, 1409.0, 1410.0],       [1411.0, 1412.0, 1413.0],       [1414.0, 1415.0, 1416.0],       [1417.0, 1418.0, 1419.0],       [1420.0, 1421.0, 1422.0],       [1423.0, 1424.0, 1425.0],       [1426.0, 1427.0, 1428.0]]]],    [[[[1429.0, 1430.0, 1431.0],       [1432.0, 1433.0, 1434.0],       [1435.0, 1436.0, 1437.0],       [1438.0, 1439.0, 1440.0],       [1441.0, 1442.0, 1443.0],       [1444.0, 1445.0, 1446.0],       [1447.0, 1448.0, 1449.0]]],     [[[1450.0, 1451.0, 1452.0],       [1453.0, 1454.0, 1455.0],       [1456.0, 1457.0, 1458.0],       [1459.0, 1460.0, 1461.0],       [1462.0, 1463.0, 1464.0],       [1465.0, 1466.0, 1467.0],       [1468.0, 1469.0, 1470.0]]],     [[[1471.0, 1472.0, 1473.0],       [1474.0, 1475.0, 1476.0],       [1477.0, 1478.0, 1479.0],       [1480.0, 1481.0, 1482.0],       [1483.0, 1484.0, 1485.0],       [1486.0, 1487.0, 1488.0],       [1489.0, 1490.0, 1491.0]]],     [[[1492.0, 1493.0, 1494.0],       [1495.0, 1496.0, 1497.0],       [1498.0, 1499.0, 1500.0],       [1501.0, 1502.0, 1503.0],       [1504.0, 1505.0, 1506.0],       [1507.0, 1508.0, 1509.0],       [1510.0, 1511.0, 1512.0]]]],    [[[[1513.0, 1514.0, 1515.0],       [1516.0, 1517.0, 1518.0],       [1519.0, 1520.0, 1521.0],       [1522.0, 1523.0, 1524.0],       [1525.0, 1526.0, 1527.0],       [1528.0, 1529.0, 1530.0],       [1531.0, 1532.0, 1533.0]]],     [[[1534.0, 1535.0, 1536.0],       [1537.0, 1538.0, 1539.0],       [1540.0, 1541.0, 1542.0],       [1543.0, 1544.0, 1545.0],       [1546.0, 1547.0, 1548.0],       [1549.0, 1550.0, 1551.0],       [1552.0, 1553.0, 1554.0]]],     [[[1555.0, 1556.0, 1557.0],       [1558.0, 1559.0, 1560.0],       [1561.0, 1562.0, 1563.0],       [1564.0, 1565.0, 1566.0],       [1567.0, 1568.0, 1569.0],       [1570.0, 1571.0, 1572.0],       [1573.0, 1574.0, 1575.0]]],     [[[1576.0, 1577.0, 1578.0],       [1579.0, 1580.0, 1581.0],       [1582.0, 1583.0, 1584.0],       [1585.0, 1586.0, 1587.0],       [1588.0, 1589.0, 1590.0],       [1591.0, 1592.0, 1593.0],       [1594.0, 1595.0, 1596.0]]]],    [[[[1597.0, 1598.0, 1599.0],       [1600.0, 1601.0, 1602.0],       [1603.0, 1604.0, 1605.0],       [1606.0, 1607.0, 1608.0],       [1609.0, 1610.0, 1611.0],       [1612.0, 1613.0, 1614.0],       [1615.0, 1616.0, 1617.0]]],     [[[1618.0, 1619.0, 1620.0],       [1621.0, 1622.0, 1623.0],       [1624.0, 1625.0, 1626.0],       [1627.0, 1628.0, 1629.0],       [1630.0, 1631.0, 1632.0],       [1633.0, 1634.0, 1635.0],       [1636.0, 1637.0, 1638.0]]],     [[[1639.0, 1640.0, 1641.0],       [1642.0, 1643.0, 1644.0],       [1645.0, 1646.0, 1647.0],       [1648.0, 1649.0, 1650.0],       [1651.0, 1652.0, 1653.0],       [1654.0, 1655.0, 1656.0],       [1657.0, 1658.0, 1659.0]]],     [[[1660.0, 1661.0, 1662.0],       [1663.0, 1664.0, 1665.0],       [1666.0, 1667.0, 1668.0],       [1669.0, 1670.0, 1671.0],       [1672.0, 1673.0, 1674.0],       [1675.0, 1676.0, 1677.0],       [1678.0, 1679.0, 1680.0]]]]],   [[[[[1681.0, 1682.0, 1683.0],       [1684.0, 1685.0, 1686.0],       [1687.0, 1688.0, 1689.0],       [1690.0, 1691.0, 1692.0],       [1693.0, 1694.0, 1695.0],       [1696.0, 1697.0, 1698.0],       [1699.0, 1700.0, 1701.0]]],     [[[1702.0, 1703.0, 1704.0],       [1705.0, 1706.0, 1707.0],       [1708.0, 1709.0, 1710.0],       [1711.0, 1712.0, 1713.0],       [1714.0, 1715.0, 1716.0],       [1717.0, 1718.0, 1719.0],       [1720.0, 1721.0, 1722.0]]],     [[[1723.0, 1724.0, 1725.0],       [1726.0, 1727.0, 1728.0],       [1729.0, 1730.0, 1731.0],       [1732.0, 1733.0, 1734.0],       [1735.0, 1736.0, 1737.0],       [1738.0, 1739.0, 1740.0],       [1741.0, 1742.0, 1743.0]]],     [[[1744.0, 1745.0, 1746.0],       [1747.0, 1748.0, 1749.0],       [1750.0, 1751.0, 1752.0],       [1753.0, 1754.0, 1755.0],       [1756.0, 1757.0, 1758.0],       [1759.0, 1760.0, 1761.0],       [1762.0, 1763.0, 1764.0]]]],    [[[[1765.0, 1766.0, 1767.0],       [1768.0, 1769.0, 1770.0],       [1771.0, 1772.0, 1773.0],       [1774.0, 1775.0, 1776.0],       [1777.0, 1778.0, 1779.0],       [1780.0, 1781.0, 1782.0],       [1783.0, 1784.0, 1785.0]]],     [[[1786.0, 1787.0, 1788.0],       [1789.0, 1790.0, 1791.0],       [1792.0, 1793.0, 1794.0],       [1795.0, 1796.0, 1797.0],       [1798.0, 1799.0, 1800.0],       [1801.0, 1802.0, 1803.0],       [1804.0, 1805.0, 1806.0]]],     [[[1807.0, 1808.0, 1809.0],       [1810.0, 1811.0, 1812.0],       [1813.0, 1814.0, 1815.0],       [1816.0, 1817.0, 1818.0],       [1819.0, 1820.0, 1821.0],       [1822.0, 1823.0, 1824.0],       [1825.0, 1826.0, 1827.0]]],     [[[1828.0, 1829.0, 1830.0],       [1831.0, 1832.0, 1833.0],       [1834.0, 1835.0, 1836.0],       [1837.0, 1838.0, 1839.0],       [1840.0, 1841.0, 1842.0],       [1843.0, 1844.0, 1845.0],       [1846.0, 1847.0, 1848.0]]]],    [[[[1849.0, 1850.0, 1851.0],       [1852.0, 1853.0, 1854.0],       [1855.0, 1856.0, 1857.0],       [1858.0, 1859.0, 1860.0],       [1861.0, 1862.0, 1863.0],       [1864.0, 1865.0, 1866.0],       [1867.0, 1868.0, 1869.0]]],     [[[1870.0, 1871.0, 1872.0],       [1873.0, 1874.0, 1875.0],       [1876.0, 1877.0, 1878.0],       [1879.0, 1880.0, 1881.0],       [1882.0, 1883.0, 1884.0],       [1885.0, 1886.0, 1887.0],       [1888.0, 1889.0, 1890.0]]],     [[[1891.0, 1892.0, 1893.0],       [1894.0, 1895.0, 1896.0],       [1897.0, 1898.0, 1899.0],       [1900.0, 1901.0, 1902.0],       [1903.0, 1904.0, 1905.0],       [1906.0, 1907.0, 1908.0],       [1909.0, 1910.0, 1911.0]]],     [[[1912.0, 1913.0, 1914.0],       [1915.0, 1916.0, 1917.0],       [1918.0, 1919.0, 1920.0],       [1921.0, 1922.0, 1923.0],       [1924.0, 1925.0, 1926.0],       [1927.0, 1928.0, 1929.0],       [1930.0, 1931.0, 1932.0]]]],    [[[[1933.0, 1934.0, 1935.0],       [1936.0, 1937.0, 1938.0],       [1939.0, 1940.0, 1941.0],       [1942.0, 1943.0, 1944.0],       [1945.0, 1946.0, 1947.0],       [1948.0, 1949.0, 1950.0],       [1951.0, 1952.0, 1953.0]]],     [[[1954.0, 1955.0, 1956.0],       [1957.0, 1958.0, 1959.0],       [1960.0, 1961.0, 1962.0],       [1963.0, 1964.0, 1965.0],       [1966.0, 1967.0, 1968.0],       [1969.0, 1970.0, 1971.0],       [1972.0, 1973.0, 1974.0]]],     [[[1975.0, 1976.0, 1977.0],       [1978.0, 1979.0, 1980.0],       [1981.0, 1982.0, 1983.0],       [1984.0, 1985.0, 1986.0],       [1987.0, 1988.0, 1989.0],       [1990.0, 1991.0, 1992.0],       [1993.0, 1994.0, 1995.0]]],     [[[1996.0, 1997.0, 1998.0],       [1999.0, 2000.0, 2001.0],       [2002.0, 2003.0, 2004.0],       [2005.0, 2006.0, 2007.0],       [2008.0, 2009.0, 2010.0],       [2011.0, 2012.0, 2013.0],       [2014.0, 2015.0, 2016.0]]]],    [[[[2017.0, 2018.0, 2019.0],       [2020.0, 2021.0, 2022.0],       [2023.0, 2024.0, 2025.0],       [2026.0, 2027.0, 2028.0],       [2029.0, 2030.0, 2031.0],       [2032.0, 2033.0, 2034.0],       [2035.0, 2036.0, 2037.0]]],     [[[2038.0, 2039.0, 2040.0],       [2041.0, 2042.0, 2043.0],       [2044.0, 2045.0, 2046.0],       [2047.0, 2048.0, 2049.0],       [2050.0, 2051.0, 2052.0],       [2053.0, 2054.0, 2055.0],       [2056.0, 2057.0, 2058.0]]],     [[[2059.0, 2060.0, 2061.0],       [2062.0, 2063.0, 2064.0],       [2065.0, 2066.0, 2067.0],       [2068.0, 2069.0, 2070.0],       [2071.0, 2072.0, 2073.0],       [2074.0, 2075.0, 2076.0],       [2077.0, 2078.0, 2079.0]]],     [[[2080.0, 2081.0, 2082.0],       [2083.0, 2084.0, 2085.0],       [2086.0, 2087.0, 2088.0],       [2089.0, 2090.0, 2091.0],       [2092.0, 2093.0, 2094.0],       [2095.0, 2096.0, 2097.0],       [2098.0, 2099.0, 2100.0]]]]],   [[[[[2101.0, 2102.0, 2103.0],       [2104.0, 2105.0, 2106.0],       [2107.0, 2108.0, 2109.0],       [2110.0, 2111.0, 2112.0],       [2113.0, 2114.0, 2115.0],       [2116.0, 2117.0, 2118.0],       [2119.0, 2120.0, 2121.0]]],     [[[2122.0, 2123.0, 2124.0],       [2125.0, 2126.0, 2127.0],       [2128.0, 2129.0, 2130.0],       [2131.0, 2132.0, 2133.0],       [2134.0, 2135.0, 2136.0],       [2137.0, 2138.0, 2139.0],       [2140.0, 2141.0, 2142.0]]],     [[[2143.0, 2144.0, 2145.0],       [2146.0, 2147.0, 2148.0],       [2149.0, 2150.0, 2151.0],       [2152.0, 2153.0, 2154.0],       [2155.0, 2156.0, 2157.0],       [2158.0, 2159.0, 2160.0],       [2161.0, 2162.0, 2163.0]]],     [[[2164.0, 2165.0, 2166.0],       [2167.0, 2168.0, 2169.0],       [2170.0, 2171.0, 2172.0],       [2173.0, 2174.0, 2175.0],       [2176.0, 2177.0, 2178.0],       [2179.0, 2180.0, 2181.0],       [2182.0, 2183.0, 2184.0]]]],    [[[[2185.0, 2186.0, 2187.0],       [2188.0, 2189.0, 2190.0],       [2191.0, 2192.0, 2193.0],       [2194.0, 2195.0, 2196.0],       [2197.0, 2198.0, 2199.0],       [2200.0, 2201.0, 2202.0],       [2203.0, 2204.0, 2205.0]]],     [[[2206.0, 2207.0, 2208.0],       [2209.0, 2210.0, 2211.0],       [2212.0, 2213.0, 2214.0],       [2215.0, 2216.0, 2217.0],       [2218.0, 2219.0, 2220.0],       [2221.0, 2222.0, 2223.0],       [2224.0, 2225.0, 2226.0]]],     [[[2227.0, 2228.0, 2229.0],       [2230.0, 2231.0, 2232.0],       [2233.0, 2234.0, 2235.0],       [2236.0, 2237.0, 2238.0],       [2239.0, 2240.0, 2241.0],       [2242.0, 2243.0, 2244.0],       [2245.0, 2246.0, 2247.0]]],     [[[2248.0, 2249.0, 2250.0],       [2251.0, 2252.0, 2253.0],       [2254.0, 2255.0, 2256.0],       [2257.0, 2258.0, 2259.0],       [2260.0, 2261.0, 2262.0],       [2263.0, 2264.0, 2265.0],       [2266.0, 2267.0, 2268.0]]]],    [[[[2269.0, 2270.0, 2271.0],       [2272.0, 2273.0, 2274.0],       [2275.0, 2276.0, 2277.0],       [2278.0, 2279.0, 2280.0],       [2281.0, 2282.0, 2283.0],       [2284.0, 2285.0, 2286.0],       [2287.0, 2288.0, 2289.0]]],     [[[2290.0, 2291.0, 2292.0],       [2293.0, 2294.0, 2295.0],       [2296.0, 2297.0, 2298.0],       [2299.0, 2300.0, 2301.0],       [2302.0, 2303.0, 2304.0],       [2305.0, 2306.0, 2307.0],       [2308.0, 2309.0, 2310.0]]],     [[[2311.0, 2312.0, 2313.0],       [2314.0, 2315.0, 2316.0],       [2317.0, 2318.0, 2319.0],       [2320.0, 2321.0, 2322.0],       [2323.0, 2324.0, 2325.0],       [2326.0, 2327.0, 2328.0],       [2329.0, 2330.0, 2331.0]]],     [[[2332.0, 2333.0, 2334.0],       [2335.0, 2336.0, 2337.0],       [2338.0, 2339.0, 2340.0],       [2341.0, 2342.0, 2343.0],       [2344.0, 2345.0, 2346.0],       [2347.0, 2348.0, 2349.0],       [2350.0, 2351.0, 2352.0]]]],    [[[[2353.0, 2354.0, 2355.0],       [2356.0, 2357.0, 2358.0],       [2359.0, 2360.0, 2361.0],       [2362.0, 2363.0, 2364.0],       [2365.0, 2366.0, 2367.0],       [2368.0, 2369.0, 2370.0],       [2371.0, 2372.0, 2373.0]]],     [[[2374.0, 2375.0, 2376.0],       [2377.0, 2378.0, 2379.0],       [2380.0, 2381.0, 2382.0],       [2383.0, 2384.0, 2385.0],       [2386.0, 2387.0, 2388.0],       [2389.0, 2390.0, 2391.0],       [2392.0, 2393.0, 2394.0]]],     [[[2395.0, 2396.0, 2397.0],       [2398.0, 2399.0, 2400.0],       [2401.0, 2402.0, 2403.0],       [2404.0, 2405.0, 2406.0],       [2407.0, 2408.0, 2409.0],       [2410.0, 2411.0, 2412.0],       [2413.0, 2414.0, 2415.0]]],     [[[2416.0, 2417.0, 2418.0],       [2419.0, 2420.0, 2421.0],       [2422.0, 2423.0, 2424.0],       [2425.0, 2426.0, 2427.0],       [2428.0, 2429.0, 2430.0],       [2431.0, 2432.0, 2433.0],       [2434.0, 2435.0, 2436.0]]]],    [[[[2437.0, 2438.0, 2439.0],       [2440.0, 2441.0, 2442.0],       [2443.0, 2444.0, 2445.0],       [2446.0, 2447.0, 2448.0],       [2449.0, 2450.0, 2451.0],       [2452.0, 2453.0, 2454.0],       [2455.0, 2456.0, 2457.0]]],     [[[2458.0, 2459.0, 2460.0],       [2461.0, 2462.0, 2463.0],       [2464.0, 2465.0, 2466.0],       [2467.0, 2468.0, 2469.0],       [2470.0, 2471.0, 2472.0],       [2473.0, 2474.0, 2475.0],       [2476.0, 2477.0, 2478.0]]],     [[[2479.0, 2480.0, 2481.0],       [2482.0, 2483.0, 2484.0],       [2485.0, 2486.0, 2487.0],       [2488.0, 2489.0, 2490.0],       [2491.0, 2492.0, 2493.0],       [2494.0, 2495.0, 2496.0],       [2497.0, 2498.0, 2499.0]]],     [[[2500.0, 2501.0, 2502.0],       [2503.0, 2504.0, 2505.0],       [2506.0, 2507.0, 2508.0],       [2509.0, 2510.0, 2511.0],       [2512.0, 2513.0, 2514.0],       [2515.0, 2516.0, 2517.0],       [2518.0, 2519.0, 2520.0]]]]]],  [[[[[[2521.0, 2522.0, 2523.0],       [2524.0, 2525.0, 2526.0],       [2527.0, 2528.0, 2529.0],       [2530.0, 2531.0, 2532.0],       [2533.0, 2534.0, 2535.0],       [2536.0, 2537.0, 2538.0],       [2539.0, 2540.0, 2541.0]]],     [[[2542.0, 2543.0, 2544.0],       [2545.0, 2546.0, 2547.0],       [2548.0, 2549.0, 2550.0],       [2551.0, 2552.0, 2553.0],       [2554.0, 2555.0, 2556.0],       [2557.0, 2558.0, 2559.0],       [2560.0, 2561.0, 2562.0]]],     [[[2563.0, 2564.0, 2565.0],       [2566.0, 2567.0, 2568.0],       [2569.0, 2570.0, 2571.0],       [2572.0, 2573.0, 2574.0],       [2575.0, 2576.0, 2577.0],       [2578.0, 2579.0, 2580.0],       [2581.0, 2582.0, 2583.0]]],     [[[2584.0, 2585.0, 2586.0],       [2587.0, 2588.0, 2589.0],       [2590.0, 2591.0, 2592.0],       [2593.0, 2594.0, 2595.0],       [2596.0, 2597.0, 2598.0],       [2599.0, 2600.0, 2601.0],       [2602.0, 2603.0, 2604.0]]]],    [[[[2605.0, 2606.0, 2607.0],       [2608.0, 2609.0, 2610.0],       [2611.0, 2612.0, 2613.0],       [2614.0, 2615.0, 2616.0],       [2617.0, 2618.0, 2619.0],       [2620.0, 2621.0, 2622.0],       [2623.0, 2624.0, 2625.0]]],     [[[2626.0, 2627.0, 2628.0],       [2629.0, 2630.0, 2631.0],       [2632.0, 2633.0, 2634.0],       [2635.0, 2636.0, 2637.0],       [2638.0, 2639.0, 2640.0],       [2641.0, 2642.0, 2643.0],       [2644.0, 2645.0, 2646.0]]],     [[[2647.0, 2648.0, 2649.0],       [2650.0, 2651.0, 2652.0],       [2653.0, 2654.0, 2655.0],       [2656.0, 2657.0, 2658.0],       [2659.0, 2660.0, 2661.0],       [2662.0, 2663.0, 2664.0],       [2665.0, 2666.0, 2667.0]]],     [[[2668.0, 2669.0, 2670.0],       [2671.0, 2672.0, 2673.0],       [2674.0, 2675.0, 2676.0],       [2677.0, 2678.0, 2679.0],       [2680.0, 2681.0, 2682.0],       [2683.0, 2684.0, 2685.0],       [2686.0, 2687.0, 2688.0]]]],    [[[[2689.0, 2690.0, 2691.0],       [2692.0, 2693.0, 2694.0],       [2695.0, 2696.0, 2697.0],       [2698.0, 2699.0, 2700.0],       [2701.0, 2702.0, 2703.0],       [2704.0, 2705.0, 2706.0],       [2707.0, 2708.0, 2709.0]]],     [[[2710.0, 2711.0, 2712.0],       [2713.0, 2714.0, 2715.0],       [2716.0, 2717.0, 2718.0],       [2719.0, 2720.0, 2721.0],       [2722.0, 2723.0, 2724.0],       [2725.0, 2726.0, 2727.0],       [2728.0, 2729.0, 2730.0]]],     [[[2731.0, 2732.0, 2733.0],       [2734.0, 2735.0, 2736.0],       [2737.0, 2738.0, 2739.0],       [2740.0, 2741.0, 2742.0],       [2743.0, 2744.0, 2745.0],       [2746.0, 2747.0, 2748.0],       [2749.0, 2750.0, 2751.0]]],     [[[2752.0, 2753.0, 2754.0],       [2755.0, 2756.0, 2757.0],       [2758.0, 2759.0, 2760.0],       [2761.0, 2762.0, 2763.0],       [2764.0, 2765.0, 2766.0],       [2767.0, 2768.0, 2769.0],       [2770.0, 2771.0, 2772.0]]]],    [[[[2773.0, 2774.0, 2775.0],       [2776.0, 2777.0, 2778.0],       [2779.0, 2780.0, 2781.0],       [2782.0, 2783.0, 2784.0],       [2785.0, 2786.0, 2787.0],       [2788.0, 2789.0, 2790.0],       [2791.0, 2792.0, 2793.0]]],     [[[2794.0, 2795.0, 2796.0],       [2797.0, 2798.0, 2799.0],       [2800.0, 2801.0, 2802.0],       [2803.0, 2804.0, 2805.0],       [2806.0, 2807.0, 2808.0],       [2809.0, 2810.0, 2811.0],       [2812.0, 2813.0, 2814.0]]],     [[[2815.0, 2816.0, 2817.0],       [2818.0, 2819.0, 2820.0],       [2821.0, 2822.0, 2823.0],       [2824.0, 2825.0, 2826.0],       [2827.0, 2828.0, 2829.0],       [2830.0, 2831.0, 2832.0],       [2833.0, 2834.0, 2835.0]]],     [[[2836.0, 2837.0, 2838.0],       [2839.0, 2840.0, 2841.0],       [2842.0, 2843.0, 2844.0],       [2845.0, 2846.0, 2847.0],       [2848.0, 2849.0, 2850.0],       [2851.0, 2852.0, 2853.0],       [2854.0, 2855.0, 2856.0]]]],    [[[[2857.0, 2858.0, 2859.0],       [2860.0, 2861.0, 2862.0],       [2863.0, 2864.0, 2865.0],       [2866.0, 2867.0, 2868.0],       [2869.0, 2870.0, 2871.0],       [2872.0, 2873.0, 2874.0],       [2875.0, 2876.0, 2877.0]]],     [[[2878.0, 2879.0, 2880.0],       [2881.0, 2882.0, 2883.0],       [2884.0, 2885.0, 2886.0],       [2887.0, 2888.0, 2889.0],       [2890.0, 2891.0, 2892.0],       [2893.0, 2894.0, 2895.0],       [2896.0, 2897.0, 2898.0]]],     [[[2899.0, 2900.0, 2901.0],       [2902.0, 2903.0, 2904.0],       [2905.0, 2906.0, 2907.0],       [2908.0, 2909.0, 2910.0],       [2911.0, 2912.0, 2913.0],       [2914.0, 2915.0, 2916.0],       [2917.0, 2918.0, 2919.0]]],     [[[2920.0, 2921.0, 2922.0],       [2923.0, 2924.0, 2925.0],       [2926.0, 2927.0, 2928.0],       [2929.0, 2930.0, 2931.0],       [2932.0, 2933.0, 2934.0],       [2935.0, 2936.0, 2937.0],       [2938.0, 2939.0, 2940.0]]]]],   [[[[[2941.0, 2942.0, 2943.0],       [2944.0, 2945.0, 2946.0],       [2947.0, 2948.0, 2949.0],       [2950.0, 2951.0, 2952.0],       [2953.0, 2954.0, 2955.0],       [2956.0, 2957.0, 2958.0],       [2959.0, 2960.0, 2961.0]]],     [[[2962.0, 2963.0, 2964.0],       [2965.0, 2966.0, 2967.0],       [2968.0, 2969.0, 2970.0],       [2971.0, 2972.0, 2973.0],       [2974.0, 2975.0, 2976.0],       [2977.0, 2978.0, 2979.0],       [2980.0, 2981.0, 2982.0]]],     [[[2983.0, 2984.0, 2985.0],       [2986.0, 2987.0, 2988.0],       [2989.0, 2990.0, 2991.0],       [2992.0, 2993.0, 2994.0],       [2995.0, 2996.0, 2997.0],       [2998.0, 2999.0, 3000.0],       [3001.0, 3002.0, 3003.0]]],     [[[3004.0, 3005.0, 3006.0],       [3007.0, 3008.0, 3009.0],       [3010.0, 3011.0, 3012.0],       [3013.0, 3014.0, 3015.0],       [3016.0, 3017.0, 3018.0],       [3019.0, 3020.0, 3021.0],       [3022.0, 3023.0, 3024.0]]]],    [[[[3025.0, 3026.0, 3027.0],       [3028.0, 3029.0, 3030.0],       [3031.0, 3032.0, 3033.0],       [3034.0, 3035.0, 3036.0],       [3037.0, 3038.0, 3039.0],       [3040.0, 3041.0, 3042.0],       [3043.0, 3044.0, 3045.0]]],     [[[3046.0, 3047.0, 3048.0],       [3049.0, 3050.0, 3051.0],       [3052.0, 3053.0, 3054.0],       [3055.0, 3056.0, 3057.0],       [3058.0, 3059.0, 3060.0],       [3061.0, 3062.0, 3063.0],       [3064.0, 3065.0, 3066.0]]],     [[[3067.0, 3068.0, 3069.0],       [3070.0, 3071.0, 3072.0],       [3073.0, 3074.0, 3075.0],       [3076.0, 3077.0, 3078.0],       [3079.0, 3080.0, 3081.0],       [3082.0, 3083.0, 3084.0],       [3085.0, 3086.0, 3087.0]]],     [[[3088.0, 3089.0, 3090.0],       [3091.0, 3092.0, 3093.0],       [3094.0, 3095.0, 3096.0],       [3097.0, 3098.0, 3099.0],       [3100.0, 3101.0, 3102.0],       [3103.0, 3104.0, 3105.0],       [3106.0, 3107.0, 3108.0]]]],    [[[[3109.0, 3110.0, 3111.0],       [3112.0, 3113.0, 3114.0],       [3115.0, 3116.0, 3117.0],       [3118.0, 3119.0, 3120.0],       [3121.0, 3122.0, 3123.0],       [3124.0, 3125.0, 3126.0],       [3127.0, 3128.0, 3129.0]]],     [[[3130.0, 3131.0, 3132.0],       [3133.0, 3134.0, 3135.0],       [3136.0, 3137.0, 3138.0],       [3139.0, 3140.0, 3141.0],       [3142.0, 3143.0, 3144.0],       [3145.0, 3146.0, 3147.0],       [3148.0, 3149.0, 3150.0]]],     [[[3151.0, 3152.0, 3153.0],       [3154.0, 3155.0, 3156.0],       [3157.0, 3158.0, 3159.0],       [3160.0, 3161.0, 3162.0],       [3163.0, 3164.0, 3165.0],       [3166.0, 3167.0, 3168.0],       [3169.0, 3170.0, 3171.0]]],     [[[3172.0, 3173.0, 3174.0],       [3175.0, 3176.0, 3177.0],       [3178.0, 3179.0, 3180.0],       [3181.0, 3182.0, 3183.0],       [3184.0, 3185.0, 3186.0],       [3187.0, 3188.0, 3189.0],       [3190.0, 3191.0, 3192.0]]]],    [[[[3193.0, 3194.0, 3195.0],       [3196.0, 3197.0, 3198.0],       [3199.0, 3200.0, 3201.0],       [3202.0, 3203.0, 3204.0],       [3205.0, 3206.0, 3207.0],       [3208.0, 3209.0, 3210.0],       [3211.0, 3212.0, 3213.0]]],     [[[3214.0, 3215.0, 3216.0],       [3217.0, 3218.0, 3219.0],       [3220.0, 3221.0, 3222.0],       [3223.0, 3224.0, 3225.0],       [3226.0, 3227.0, 3228.0],       [3229.0, 3230.0, 3231.0],       [3232.0, 3233.0, 3234.0]]],     [[[3235.0, 3236.0, 3237.0],       [3238.0, 3239.0, 3240.0],       [3241.0, 3242.0, 3243.0],       [3244.0, 3245.0, 3246.0],       [3247.0, 3248.0, 3249.0],       [3250.0, 3251.0, 3252.0],       [3253.0, 3254.0, 3255.0]]],     [[[3256.0, 3257.0, 3258.0],       [3259.0, 3260.0, 3261.0],       [3262.0, 3263.0, 3264.0],       [3265.0, 3266.0, 3267.0],       [3268.0, 3269.0, 3270.0],       [3271.0, 3272.0, 3273.0],       [3274.0, 3275.0, 3276.0]]]],    [[[[3277.0, 3278.0, 3279.0],       [3280.0, 3281.0, 3282.0],       [3283.0, 3284.0, 3285.0],       [3286.0, 3287.0, 3288.0],       [3289.0, 3290.0, 3291.0],       [3292.0, 3293.0, 3294.0],       [3295.0, 3296.0, 3297.0]]],     [[[3298.0, 3299.0, 3300.0],       [3301.0, 3302.0, 3303.0],       [3304.0, 3305.0, 3306.0],       [3307.0, 3308.0, 3309.0],       [3310.0, 3311.0, 3312.0],       [3313.0, 3314.0, 3315.0],       [3316.0, 3317.0, 3318.0]]],     [[[3319.0, 3320.0, 3321.0],       [3322.0, 3323.0, 3324.0],       [3325.0, 3326.0, 3327.0],       [3328.0, 3329.0, 3330.0],       [3331.0, 3332.0, 3333.0],       [3334.0, 3335.0, 3336.0],       [3337.0, 3338.0, 3339.0]]],     [[[3340.0, 3341.0, 3342.0],       [3343.0, 3344.0, 3345.0],       [3346.0, 3347.0, 3348.0],       [3349.0, 3350.0, 3351.0],       [3352.0, 3353.0, 3354.0],       [3355.0, 3356.0, 3357.0],       [3358.0, 3359.0, 3360.0]]]]],   [[[[[3361.0, 3362.0, 3363.0],       [3364.0, 3365.0, 3366.0],       [3367.0, 3368.0, 3369.0],       [3370.0, 3371.0, 3372.0],       [3373.0, 3374.0, 3375.0],       [3376.0, 3377.0, 3378.0],       [3379.0, 3380.0, 3381.0]]],     [[[3382.0, 3383.0, 3384.0],       [3385.0, 3386.0, 3387.0],       [3388.0, 3389.0, 3390.0],       [3391.0, 3392.0, 3393.0],       [3394.0, 3395.0, 3396.0],       [3397.0, 3398.0, 3399.0],       [3400.0, 3401.0, 3402.0]]],     [[[3403.0, 3404.0, 3405.0],       [3406.0, 3407.0, 3408.0],       [3409.0, 3410.0, 3411.0],       [3412.0, 3413.0, 3414.0],       [3415.0, 3416.0, 3417.0],       [3418.0, 3419.0, 3420.0],       [3421.0, 3422.0, 3423.0]]],     [[[3424.0, 3425.0, 3426.0],       [3427.0, 3428.0, 3429.0],       [3430.0, 3431.0, 3432.0],       [3433.0, 3434.0, 3435.0],       [3436.0, 3437.0, 3438.0],       [3439.0, 3440.0, 3441.0],       [3442.0, 3443.0, 3444.0]]]],    [[[[3445.0, 3446.0, 3447.0],       [3448.0, 3449.0, 3450.0],       [3451.0, 3452.0, 3453.0],       [3454.0, 3455.0, 3456.0],       [3457.0, 3458.0, 3459.0],       [3460.0, 3461.0, 3462.0],       [3463.0, 3464.0, 3465.0]]],     [[[3466.0, 3467.0, 3468.0],       [3469.0, 3470.0, 3471.0],       [3472.0, 3473.0, 3474.0],       [3475.0, 3476.0, 3477.0],       [3478.0, 3479.0, 3480.0],       [3481.0, 3482.0, 3483.0],       [3484.0, 3485.0, 3486.0]]],     [[[3487.0, 3488.0, 3489.0],       [3490.0, 3491.0, 3492.0],       [3493.0, 3494.0, 3495.0],       [3496.0, 3497.0, 3498.0],       [3499.0, 3500.0, 3501.0],       [3502.0, 3503.0, 3504.0],       [3505.0, 3506.0, 3507.0]]],     [[[3508.0, 3509.0, 3510.0],       [3511.0, 3512.0, 3513.0],       [3514.0, 3515.0, 3516.0],       [3517.0, 3518.0, 3519.0],       [3520.0, 3521.0, 3522.0],       [3523.0, 3524.0, 3525.0],       [3526.0, 3527.0, 3528.0]]]],    [[[[3529.0, 3530.0, 3531.0],       [3532.0, 3533.0, 3534.0],       [3535.0, 3536.0, 3537.0],       [3538.0, 3539.0, 3540.0],       [3541.0, 3542.0, 3543.0],       [3544.0, 3545.0, 3546.0],       [3547.0, 3548.0, 3549.0]]],     [[[3550.0, 3551.0, 3552.0],       [3553.0, 3554.0, 3555.0],       [3556.0, 3557.0, 3558.0],       [3559.0, 3560.0, 3561.0],       [3562.0, 3563.0, 3564.0],       [3565.0, 3566.0, 3567.0],       [3568.0, 3569.0, 3570.0]]],     [[[3571.0, 3572.0, 3573.0],       [3574.0, 3575.0, 3576.0],       [3577.0, 3578.0, 3579.0],       [3580.0, 3581.0, 3582.0],       [3583.0, 3584.0, 3585.0],       [3586.0, 3587.0, 3588.0],       [3589.0, 3590.0, 3591.0]]],     [[[3592.0, 3593.0, 3594.0],       [3595.0, 3596.0, 3597.0],       [3598.0, 3599.0, 3600.0],       [3601.0, 3602.0, 3603.0],       [3604.0, 3605.0, 3606.0],       [3607.0, 3608.0, 3609.0],       [3610.0, 3611.0, 3612.0]]]],    [[[[3613.0, 3614.0, 3615.0],       [3616.0, 3617.0, 3618.0],       [3619.0, 3620.0, 3621.0],       [3622.0, 3623.0, 3624.0],       [3625.0, 3626.0, 3627.0],       [3628.0, 3629.0, 3630.0],       [3631.0, 3632.0, 3633.0]]],     [[[3634.0, 3635.0, 3636.0],       [3637.0, 3638.0, 3639.0],       [3640.0, 3641.0, 3642.0],       [3643.0, 3644.0, 3645.0],       [3646.0, 3647.0, 3648.0],       [3649.0, 3650.0, 3651.0],       [3652.0, 3653.0, 3654.0]]],     [[[3655.0, 3656.0, 3657.0],       [3658.0, 3659.0, 3660.0],       [3661.0, 3662.0, 3663.0],       [3664.0, 3665.0, 3666.0],       [3667.0, 3668.0, 3669.0],       [3670.0, 3671.0, 3672.0],       [3673.0, 3674.0, 3675.0]]],     [[[3676.0, 3677.0, 3678.0],       [3679.0, 3680.0, 3681.0],       [3682.0, 3683.0, 3684.0],       [3685.0, 3686.0, 3687.0],       [3688.0, 3689.0, 3690.0],       [3691.0, 3692.0, 3693.0],       [3694.0, 3695.0, 3696.0]]]],    [[[[3697.0, 3698.0, 3699.0],       [3700.0, 3701.0, 3702.0],       [3703.0, 3704.0, 3705.0],       [3706.0, 3707.0, 3708.0],       [3709.0, 3710.0, 3711.0],       [3712.0, 3713.0, 3714.0],       [3715.0, 3716.0, 3717.0]]],     [[[3718.0, 3719.0, 3720.0],       [3721.0, 3722.0, 3723.0],       [3724.0, 3725.0, 3726.0],       [3727.0, 3728.0, 3729.0],       [3730.0, 3731.0, 3732.0],       [3733.0, 3734.0, 3735.0],       [3736.0, 3737.0, 3738.0]]],     [[[3739.0, 3740.0, 3741.0],       [3742.0, 3743.0, 3744.0],       [3745.0, 3746.0, 3747.0],       [3748.0, 3749.0, 3750.0],       [3751.0, 3752.0, 3753.0],       [3754.0, 3755.0, 3756.0],       [3757.0, 3758.0, 3759.0]]],     [[[3760.0, 3761.0, 3762.0],       [3763.0, 3764.0, 3765.0],       [3766.0, 3767.0, 3768.0],       [3769.0, 3770.0, 3771.0],       [3772.0, 3773.0, 3774.0],       [3775.0, 3776.0, 3777.0],       [3778.0, 3779.0, 3780.0]]]]]]] shape=[3, 3, 5, 4, 1, 7, 3], strides=[1260, 420, 84, 21, 21, 3, 1], layout=C (0x1)), I32([1, 1, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [0, 1],  [2, 3]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 3498087451 3889198853 2092426475 1671607467 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0],     [4.0, 5.0, 6.0],     [7.0, 8.0, 9.0],     [10.0, 11.0, 12.0],     [13.0, 14.0, 15.0]],    [[16.0, 17.0, 18.0],     [19.0, 20.0, 21.0],     [22.0, 23.0, 24.0],     [25.0, 26.0, 27.0],     [28.0, 29.0, 30.0]],    [[31.0, 32.0, 33.0],     [34.0, 35.0, 36.0],     [37.0, 38.0, 39.0],     [40.0, 41.0, 42.0],     [43.0, 44.0, 45.0]],    [[46.0, 47.0, 48.0],     [49.0, 50.0, 51.0],     [52.0, 53.0, 54.0],     [55.0, 56.0, 57.0],     [58.0, 59.0, 60.0]]],   [[[61.0, 62.0, 63.0],     [64.0, 65.0, 66.0],     [67.0, 68.0, 69.0],     [70.0, 71.0, 72.0],     [73.0, 74.0, 75.0]],    [[76.0, 77.0, 78.0],     [79.0, 80.0, 81.0],     [82.0, 83.0, 84.0],     [85.0, 86.0, 87.0],     [88.0, 89.0, 90.0]],    [[91.0, 92.0, 93.0],     [94.0, 95.0, 96.0],     [97.0, 98.0, 99.0],     [100.0, 101.0, 102.0],     [103.0, 104.0, 105.0]],    [[106.0, 107.0, 108.0],     [109.0, 110.0, 111.0],     [112.0, 113.0, 114.0],     [115.0, 116.0, 117.0],     [118.0, 119.0, 120.0]]],   [[[121.0, 122.0, 123.0],     [124.0, 125.0, 126.0],     [127.0, 128.0, 129.0],     [130.0, 131.0, 132.0],     [133.0, 134.0, 135.0]],    [[136.0, 137.0, 138.0],     [139.0, 140.0, 141.0],     [142.0, 143.0, 144.0],     [145.0, 146.0, 147.0],     [148.0, 149.0, 150.0]],    [[151.0, 152.0, 153.0],     [154.0, 155.0, 156.0],     [157.0, 158.0, 159.0],     [160.0, 161.0, 162.0],     [163.0, 164.0, 165.0]],    [[166.0, 167.0, 168.0],     [169.0, 170.0, 171.0],     [172.0, 173.0, 174.0],     [175.0, 176.0, 177.0],     [178.0, 179.0, 180.0]]],   [[[181.0, 182.0, 183.0],     [184.0, 185.0, 186.0],     [187.0, 188.0, 189.0],     [190.0, 191.0, 192.0],     [193.0, 194.0, 195.0]],    [[196.0, 197.0, 198.0],     [199.0, 200.0, 201.0],     [202.0, 203.0, 204.0],     [205.0, 206.0, 207.0],     [208.0, 209.0, 210.0]],    [[211.0, 212.0, 213.0],     [214.0, 215.0, 216.0],     [217.0, 218.0, 219.0],     [220.0, 221.0, 222.0],     [223.0, 224.0, 225.0]],    [[226.0, 227.0, 228.0],     [229.0, 230.0, 231.0],     [232.0, 233.0, 234.0],     [235.0, 236.0, 237.0],     [238.0, 239.0, 240.0]]],   [[[241.0, 242.0, 243.0],     [244.0, 245.0, 246.0],     [247.0, 248.0, 249.0],     [250.0, 251.0, 252.0],     [253.0, 254.0, 255.0]],    [[256.0, 257.0, 258.0],     [259.0, 260.0, 261.0],     [262.0, 263.0, 264.0],     [265.0, 266.0, 267.0],     [268.0, 269.0, 270.0]],    [[271.0, 272.0, 273.0],     [274.0, 275.0, 276.0],     [277.0, 278.0, 279.0],     [280.0, 281.0, 282.0],     [283.0, 284.0, 285.0]],    [[286.0, 287.0, 288.0],     [289.0, 290.0, 291.0],     [292.0, 293.0, 294.0],     [295.0, 296.0, 297.0],     [298.0, 299.0, 300.0]]],   [[[301.0, 302.0, 303.0],     [304.0, 305.0, 306.0],     [307.0, 308.0, 309.0],     [310.0, 311.0, 312.0],     [313.0, 314.0, 315.0]],    [[316.0, 317.0, 318.0],     [319.0, 320.0, 321.0],     [322.0, 323.0, 324.0],     [325.0, 326.0, 327.0],     [328.0, 329.0, 330.0]],    [[331.0, 332.0, 333.0],     [334.0, 335.0, 336.0],     [337.0, 338.0, 339.0],     [340.0, 341.0, 342.0],     [343.0, 344.0, 345.0]],    [[346.0, 347.0, 348.0],     [349.0, 350.0, 351.0],     [352.0, 353.0, 354.0],     [355.0, 356.0, 357.0],     [358.0, 359.0, 360.0]]],   [[[361.0, 362.0, 363.0],     [364.0, 365.0, 366.0],     [367.0, 368.0, 369.0],     [370.0, 371.0, 372.0],     [373.0, 374.0, 375.0]],    [[376.0, 377.0, 378.0],     [379.0, 380.0, 381.0],     [382.0, 383.0, 384.0],     [385.0, 386.0, 387.0],     [388.0, 389.0, 390.0]],    [[391.0, 392.0, 393.0],     [394.0, 395.0, 396.0],     [397.0, 398.0, 399.0],     [400.0, 401.0, 402.0],     [403.0, 404.0, 405.0]],    [[406.0, 407.0, 408.0],     [409.0, 410.0, 411.0],     [412.0, 413.0, 414.0],     [415.0, 416.0, 417.0],     [418.0, 419.0, 420.0]]]],  [[[[421.0, 422.0, 423.0],     [424.0, 425.0, 426.0],     [427.0, 428.0, 429.0],     [430.0, 431.0, 432.0],     [433.0, 434.0, 435.0]],    [[436.0, 437.0, 438.0],     [439.0, 440.0, 441.0],     [442.0, 443.0, 444.0],     [445.0, 446.0, 447.0],     [448.0, 449.0, 450.0]],    [[451.0, 452.0, 453.0],     [454.0, 455.0, 456.0],     [457.0, 458.0, 459.0],     [460.0, 461.0, 462.0],     [463.0, 464.0, 465.0]],    [[466.0, 467.0, 468.0],     [469.0, 470.0, 471.0],     [472.0, 473.0, 474.0],     [475.0, 476.0, 477.0],     [478.0, 479.0, 480.0]]],   [[[481.0, 482.0, 483.0],     [484.0, 485.0, 486.0],     [487.0, 488.0, 489.0],     [490.0, 491.0, 492.0],     [493.0, 494.0, 495.0]],    [[496.0, 497.0, 498.0],     [499.0, 500.0, 501.0],     [502.0, 503.0, 504.0],     [505.0, 506.0, 507.0],     [508.0, 509.0, 510.0]],    [[511.0, 512.0, 513.0],     [514.0, 515.0, 516.0],     [517.0, 518.0, 519.0],     [520.0, 521.0, 522.0],     [523.0, 524.0, 525.0]],    [[526.0, 527.0, 528.0],     [529.0, 530.0, 531.0],     [532.0, 533.0, 534.0],     [535.0, 536.0, 537.0],     [538.0, 539.0, 540.0]]],   [[[541.0, 542.0, 543.0],     [544.0, 545.0, 546.0],     [547.0, 548.0, 549.0],     [550.0, 551.0, 552.0],     [553.0, 554.0, 555.0]],    [[556.0, 557.0, 558.0],     [559.0, 560.0, 561.0],     [562.0, 563.0, 564.0],     [565.0, 566.0, 567.0],     [568.0, 569.0, 570.0]],    [[571.0, 572.0, 573.0],     [574.0, 575.0, 576.0],     [577.0, 578.0, 579.0],     [580.0, 581.0, 582.0],     [583.0, 584.0, 585.0]],    [[586.0, 587.0, 588.0],     [589.0, 590.0, 591.0],     [592.0, 593.0, 594.0],     [595.0, 596.0, 597.0],     [598.0, 599.0, 600.0]]],   [[[601.0, 602.0, 603.0],     [604.0, 605.0, 606.0],     [607.0, 608.0, 609.0],     [610.0, 611.0, 612.0],     [613.0, 614.0, 615.0]],    [[616.0, 617.0, 618.0],     [619.0, 620.0, 621.0],     [622.0, 623.0, 624.0],     [625.0, 626.0, 627.0],     [628.0, 629.0, 630.0]],    [[631.0, 632.0, 633.0],     [634.0, 635.0, 636.0],     [637.0, 638.0, 639.0],     [640.0, 641.0, 642.0],     [643.0, 644.0, 645.0]],    [[646.0, 647.0, 648.0],     [649.0, 650.0, 651.0],     [652.0, 653.0, 654.0],     [655.0, 656.0, 657.0],     [658.0, 659.0, 660.0]]],   [[[661.0, 662.0, 663.0],     [664.0, 665.0, 666.0],     [667.0, 668.0, 669.0],     [670.0, 671.0, 672.0],     [673.0, 674.0, 675.0]],    [[676.0, 677.0, 678.0],     [679.0, 680.0, 681.0],     [682.0, 683.0, 684.0],     [685.0, 686.0, 687.0],     [688.0, 689.0, 690.0]],    [[691.0, 692.0, 693.0],     [694.0, 695.0, 696.0],     [697.0, 698.0, 699.0],     [700.0, 701.0, 702.0],     [703.0, 704.0, 705.0]],    [[706.0, 707.0, 708.0],     [709.0, 710.0, 711.0],     [712.0, 713.0, 714.0],     [715.0, 716.0, 717.0],     [718.0, 719.0, 720.0]]],   [[[721.0, 722.0, 723.0],     [724.0, 725.0, 726.0],     [727.0, 728.0, 729.0],     [730.0, 731.0, 732.0],     [733.0, 734.0, 735.0]],    [[736.0, 737.0, 738.0],     [739.0, 740.0, 741.0],     [742.0, 743.0, 744.0],     [745.0, 746.0, 747.0],     [748.0, 749.0, 750.0]],    [[751.0, 752.0, 753.0],     [754.0, 755.0, 756.0],     [757.0, 758.0, 759.0],     [760.0, 761.0, 762.0],     [763.0, 764.0, 765.0]],    [[766.0, 767.0, 768.0],     [769.0, 770.0, 771.0],     [772.0, 773.0, 774.0],     [775.0, 776.0, 777.0],     [778.0, 779.0, 780.0]]],   [[[781.0, 782.0, 783.0],     [784.0, 785.0, 786.0],     [787.0, 788.0, 789.0],     [790.0, 791.0, 792.0],     [793.0, 794.0, 795.0]],    [[796.0, 797.0, 798.0],     [799.0, 800.0, 801.0],     [802.0, 803.0, 804.0],     [805.0, 806.0, 807.0],     [808.0, 809.0, 810.0]],    [[811.0, 812.0, 813.0],     [814.0, 815.0, 816.0],     [817.0, 818.0, 819.0],     [820.0, 821.0, 822.0],     [823.0, 824.0, 825.0]],    [[826.0, 827.0, 828.0],     [829.0, 830.0, 831.0],     [832.0, 833.0, 834.0],     [835.0, 836.0, 837.0],     [838.0, 839.0, 840.0]]]]] shape=[2, 7, 4, 5, 3], strides=[420, 60, 15, 3, 1], layout=C (0x1)), I32([1, 3, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [1, 1],  [1, 3]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 938324481 3685659208 4001621855 62650732 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0],      [2.0]],     [[3.0],      [4.0]],     [[5.0],      [6.0]],     [[7.0],      [8.0]],     [[9.0],      [10.0]],     [[11.0],      [12.0]],     [[13.0],      [14.0]]],    [[[15.0],      [16.0]],     [[17.0],      [18.0]],     [[19.0],      [20.0]],     [[21.0],      [22.0]],     [[23.0],      [24.0]],     [[25.0],      [26.0]],     [[27.0],      [28.0]]],    [[[29.0],      [30.0]],     [[31.0],      [32.0]],     [[33.0],      [34.0]],     [[35.0],      [36.0]],     [[37.0],      [38.0]],     [[39.0],      [40.0]],     [[41.0],      [42.0]]]],   [[[[43.0],      [44.0]],     [[45.0],      [46.0]],     [[47.0],      [48.0]],     [[49.0],      [50.0]],     [[51.0],      [52.0]],     [[53.0],      [54.0]],     [[55.0],      [56.0]]],    [[[57.0],      [58.0]],     [[59.0],      [60.0]],     [[61.0],      [62.0]],     [[63.0],      [64.0]],     [[65.0],      [66.0]],     [[67.0],      [68.0]],     [[69.0],      [70.0]]],    [[[71.0],      [72.0]],     [[73.0],      [74.0]],     [[75.0],      [76.0]],     [[77.0],      [78.0]],     [[79.0],      [80.0]],     [[81.0],      [82.0]],     [[83.0],      [84.0]]]],   [[[[85.0],      [86.0]],     [[87.0],      [88.0]],     [[89.0],      [90.0]],     [[91.0],      [92.0]],     [[93.0],      [94.0]],     [[95.0],      [96.0]],     [[97.0],      [98.0]]],    [[[99.0],      [100.0]],     [[101.0],      [102.0]],     [[103.0],      [104.0]],     [[105.0],      [106.0]],     [[107.0],      [108.0]],     [[109.0],      [110.0]],     [[111.0],      [112.0]]],    [[[113.0],      [114.0]],     [[115.0],      [116.0]],     [[117.0],      [118.0]],     [[119.0],      [120.0]],     [[121.0],      [122.0]],     [[123.0],      [124.0]],     [[125.0],      [126.0]]]]]] shape=[1, 3, 3, 7, 2, 1], strides=[126, 42, 14, 2, 1, 1], layout=C (0x1)), I32([3, 1, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[0, 3],  [0, 1],  [2, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 251430923 1502781701 921563853 629611462 # shrinks to (ref i, ref bs, ref p) = (F32([[[[1.0, 2.0, 3.0, 4.0, 5.0],    [6.0, 7.0, 8.0, 9.0, 10.0],    [11.0, 12.0, 13.0, 14.0, 15.0],    [16.0, 17.0, 18.0, 19.0, 20.0],    [21.0, 22.0, 23.0, 24.0, 25.0]]]] shape=[1, 1, 5, 5], strides=[25, 25, 5, 1], layout=C (0x1)), I32([1, 2] shape=[2], strides=[1], layout=C | F (0x3)), I32([[1, 1],  [3, 2]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 2113652809 1557540221 4151330462 465836177 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0],     [4.0, 5.0, 6.0],     [7.0, 8.0, 9.0],     [10.0, 11.0, 12.0]],    [[13.0, 14.0, 15.0],     [16.0, 17.0, 18.0],     [19.0, 20.0, 21.0],     [22.0, 23.0, 24.0]],    [[25.0, 26.0, 27.0],     [28.0, 29.0, 30.0],     [31.0, 32.0, 33.0],     [34.0, 35.0, 36.0]],    [[37.0, 38.0, 39.0],     [40.0, 41.0, 42.0],     [43.0, 44.0, 45.0],     [46.0, 47.0, 48.0]]],   [[[49.0, 50.0, 51.0],     [52.0, 53.0, 54.0],     [55.0, 56.0, 57.0],     [58.0, 59.0, 60.0]],    [[61.0, 62.0, 63.0],     [64.0, 65.0, 66.0],     [67.0, 68.0, 69.0],     [70.0, 71.0, 72.0]],    [[73.0, 74.0, 75.0],     [76.0, 77.0, 78.0],     [79.0, 80.0, 81.0],     [82.0, 83.0, 84.0]],    [[85.0, 86.0, 87.0],     [88.0, 89.0, 90.0],     [91.0, 92.0, 93.0],     [94.0, 95.0, 96.0]]]],  [[[[97.0, 98.0, 99.0],     [100.0, 101.0, 102.0],     [103.0, 104.0, 105.0],     [106.0, 107.0, 108.0]],    [[109.0, 110.0, 111.0],     [112.0, 113.0, 114.0],     [115.0, 116.0, 117.0],     [118.0, 119.0, 120.0]],    [[121.0, 122.0, 123.0],     [124.0, 125.0, 126.0],     [127.0, 128.0, 129.0],     [130.0, 131.0, 132.0]],    [[133.0, 134.0, 135.0],     [136.0, 137.0, 138.0],     [139.0, 140.0, 141.0],     [142.0, 143.0, 144.0]]],   [[[145.0, 146.0, 147.0],     [148.0, 149.0, 150.0],     [151.0, 152.0, 153.0],     [154.0, 155.0, 156.0]],    [[157.0, 158.0, 159.0],     [160.0, 161.0, 162.0],     [163.0, 164.0, 165.0],     [166.0, 167.0, 168.0]],    [[169.0, 170.0, 171.0],     [172.0, 173.0, 174.0],     [175.0, 176.0, 177.0],     [178.0, 179.0, 180.0]],    [[181.0, 182.0, 183.0],     [184.0, 185.0, 186.0],     [187.0, 188.0, 189.0],     [190.0, 191.0, 192.0]]]]] shape=[2, 2, 4, 4, 3], strides=[96, 48, 12, 3, 1], layout=C (0x1)), I32([1, 3] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [1, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 308502085 4090842210 240333265 1186707120 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],      [7.0, 8.0, 9.0, 10.0, 11.0, 12.0],      [13.0, 14.0, 15.0, 16.0, 17.0, 18.0],      [19.0, 20.0, 21.0, 22.0, 23.0, 24.0],      [25.0, 26.0, 27.0, 28.0, 29.0, 30.0]],     [[31.0, 32.0, 33.0, 34.0, 35.0, 36.0],      [37.0, 38.0, 39.0, 40.0, 41.0, 42.0],      [43.0, 44.0, 45.0, 46.0, 47.0, 48.0],      [49.0, 50.0, 51.0, 52.0, 53.0, 54.0],      [55.0, 56.0, 57.0, 58.0, 59.0, 60.0]],     [[61.0, 62.0, 63.0, 64.0, 65.0, 66.0],      [67.0, 68.0, 69.0, 70.0, 71.0, 72.0],      [73.0, 74.0, 75.0, 76.0, 77.0, 78.0],      [79.0, 80.0, 81.0, 82.0, 83.0, 84.0],      [85.0, 86.0, 87.0, 88.0, 89.0, 90.0]],     [[91.0, 92.0, 93.0, 94.0, 95.0, 96.0],      [97.0, 98.0, 99.0, 100.0, 101.0, 102.0],      [103.0, 104.0, 105.0, 106.0, 107.0, 108.0],      [109.0, 110.0, 111.0, 112.0, 113.0, 114.0],      [115.0, 116.0, 117.0, 118.0, 119.0, 120.0]],     [[121.0, 122.0, 123.0, 124.0, 125.0, 126.0],      [127.0, 128.0, 129.0, 130.0, 131.0, 132.0],      [133.0, 134.0, 135.0, 136.0, 137.0, 138.0],      [139.0, 140.0, 141.0, 142.0, 143.0, 144.0],      [145.0, 146.0, 147.0, 148.0, 149.0, 150.0]]],    [[[151.0, 152.0, 153.0, 154.0, 155.0, 156.0],      [157.0, 158.0, 159.0, 160.0, 161.0, 162.0],      [163.0, 164.0, 165.0, 166.0, 167.0, 168.0],      [169.0, 170.0, 171.0, 172.0, 173.0, 174.0],      [175.0, 176.0, 177.0, 178.0, 179.0, 180.0]],     [[181.0, 182.0, 183.0, 184.0, 185.0, 186.0],      [187.0, 188.0, 189.0, 190.0, 191.0, 192.0],      [193.0, 194.0, 195.0, 196.0, 197.0, 198.0],      [199.0, 200.0, 201.0, 202.0, 203.0, 204.0],      [205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],     [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0],      [217.0, 218.0, 219.0, 220.0, 221.0, 222.0],      [223.0, 224.0, 225.0, 226.0, 227.0, 228.0],      [229.0, 230.0, 231.0, 232.0, 233.0, 234.0],      [235.0, 236.0, 237.0, 238.0, 239.0, 240.0]],     [[241.0, 242.0, 243.0, 244.0, 245.0, 246.0],      [247.0, 248.0, 249.0, 250.0, 251.0, 252.0],      [253.0, 254.0, 255.0, 256.0, 257.0, 258.0],      [259.0, 260.0, 261.0, 262.0, 263.0, 264.0],      [265.0, 266.0, 267.0, 268.0, 269.0, 270.0]],     [[271.0, 272.0, 273.0, 274.0, 275.0, 276.0],      [277.0, 278.0, 279.0, 280.0, 281.0, 282.0],      [283.0, 284.0, 285.0, 286.0, 287.0, 288.0],      [289.0, 290.0, 291.0, 292.0, 293.0, 294.0],      [295.0, 296.0, 297.0, 298.0, 299.0, 300.0]]],    [[[301.0, 302.0, 303.0, 304.0, 305.0, 306.0],      [307.0, 308.0, 309.0, 310.0, 311.0, 312.0],      [313.0, 314.0, 315.0, 316.0, 317.0, 318.0],      [319.0, 320.0, 321.0, 322.0, 323.0, 324.0],      [325.0, 326.0, 327.0, 328.0, 329.0, 330.0]],     [[331.0, 332.0, 333.0, 334.0, 335.0, 336.0],      [337.0, 338.0, 339.0, 340.0, 341.0, 342.0],      [343.0, 344.0, 345.0, 346.0, 347.0, 348.0],      [349.0, 350.0, 351.0, 352.0, 353.0, 354.0],      [355.0, 356.0, 357.0, 358.0, 359.0, 360.0]],     [[361.0, 362.0, 363.0, 364.0, 365.0, 366.0],      [367.0, 368.0, 369.0, 370.0, 371.0, 372.0],      [373.0, 374.0, 375.0, 376.0, 377.0, 378.0],      [379.0, 380.0, 381.0, 382.0, 383.0, 384.0],      [385.0, 386.0, 387.0, 388.0, 389.0, 390.0]],     [[391.0, 392.0, 393.0, 394.0, 395.0, 396.0],      [397.0, 398.0, 399.0, 400.0, 401.0, 402.0],      [403.0, 404.0, 405.0, 406.0, 407.0, 408.0],      [409.0, 410.0, 411.0, 412.0, 413.0, 414.0],      [415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0],      [427.0, 428.0, 429.0, 430.0, 431.0, 432.0],      [433.0, 434.0, 435.0, 436.0, 437.0, 438.0],      [439.0, 440.0, 441.0, 442.0, 443.0, 444.0],      [445.0, 446.0, 447.0, 448.0, 449.0, 450.0]]],    [[[451.0, 452.0, 453.0, 454.0, 455.0, 456.0],      [457.0, 458.0, 459.0, 460.0, 461.0, 462.0],      [463.0, 464.0, 465.0, 466.0, 467.0, 468.0],      [469.0, 470.0, 471.0, 472.0, 473.0, 474.0],      [475.0, 476.0, 477.0, 478.0, 479.0, 480.0]],     [[481.0, 482.0, 483.0, 484.0, 485.0, 486.0],      [487.0, 488.0, 489.0, 490.0, 491.0, 492.0],      [493.0, 494.0, 495.0, 496.0, 497.0, 498.0],      [499.0, 500.0, 501.0, 502.0, 503.0, 504.0],      [505.0, 506.0, 507.0, 508.0, 509.0, 510.0]],     [[511.0, 512.0, 513.0, 514.0, 515.0, 516.0],      [517.0, 518.0, 519.0, 520.0, 521.0, 522.0],      [523.0, 524.0, 525.0, 526.0, 527.0, 528.0],      [529.0, 530.0, 531.0, 532.0, 533.0, 534.0],      [535.0, 536.0, 537.0, 538.0, 539.0, 540.0]],     [[541.0, 542.0, 543.0, 544.0, 545.0, 546.0],      [547.0, 548.0, 549.0, 550.0, 551.0, 552.0],      [553.0, 554.0, 555.0, 556.0, 557.0, 558.0],      [559.0, 560.0, 561.0, 562.0, 563.0, 564.0],      [565.0, 566.0, 567.0, 568.0, 569.0, 570.0]],     [[571.0, 572.0, 573.0, 574.0, 575.0, 576.0],      [577.0, 578.0, 579.0, 580.0, 581.0, 582.0],      [583.0, 584.0, 585.0, 586.0, 587.0, 588.0],      [589.0, 590.0, 591.0, 592.0, 593.0, 594.0],      [595.0, 596.0, 597.0, 598.0, 599.0, 600.0]]],    [[[601.0, 602.0, 603.0, 604.0, 605.0, 606.0],      [607.0, 608.0, 609.0, 610.0, 611.0, 612.0],      [613.0, 614.0, 615.0, 616.0, 617.0, 618.0],      [619.0, 620.0, 621.0, 622.0, 623.0, 624.0],      [625.0, 626.0, 627.0, 628.0, 629.0, 630.0]],     [[631.0, 632.0, 633.0, 634.0, 635.0, 636.0],      [637.0, 638.0, 639.0, 640.0, 641.0, 642.0],      [643.0, 644.0, 645.0, 646.0, 647.0, 648.0],      [649.0, 650.0, 651.0, 652.0, 653.0, 654.0],      [655.0, 656.0, 657.0, 658.0, 659.0, 660.0]],     [[661.0, 662.0, 663.0, 664.0, 665.0, 666.0],      [667.0, 668.0, 669.0, 670.0, 671.0, 672.0],      [673.0, 674.0, 675.0, 676.0, 677.0, 678.0],      [679.0, 680.0, 681.0, 682.0, 683.0, 684.0],      [685.0, 686.0, 687.0, 688.0, 689.0, 690.0]],     [[691.0, 692.0, 693.0, 694.0, 695.0, 696.0],      [697.0, 698.0, 699.0, 700.0, 701.0, 702.0],      [703.0, 704.0, 705.0, 706.0, 707.0, 708.0],      [709.0, 710.0, 711.0, 712.0, 713.0, 714.0],      [715.0, 716.0, 717.0, 718.0, 719.0, 720.0]],     [[721.0, 722.0, 723.0, 724.0, 725.0, 726.0],      [727.0, 728.0, 729.0, 730.0, 731.0, 732.0],      [733.0, 734.0, 735.0, 736.0, 737.0, 738.0],      [739.0, 740.0, 741.0, 742.0, 743.0, 744.0],      [745.0, 746.0, 747.0, 748.0, 749.0, 750.0]]],    [[[751.0, 752.0, 753.0, 754.0, 755.0, 756.0],      [757.0, 758.0, 759.0, 760.0, 761.0, 762.0],      [763.0, 764.0, 765.0, 766.0, 767.0, 768.0],      [769.0, 770.0, 771.0, 772.0, 773.0, 774.0],      [775.0, 776.0, 777.0, 778.0, 779.0, 780.0]],     [[781.0, 782.0, 783.0, 784.0, 785.0, 786.0],      [787.0, 788.0, 789.0, 790.0, 791.0, 792.0],      [793.0, 794.0, 795.0, 796.0, 797.0, 798.0],      [799.0, 800.0, 801.0, 802.0, 803.0, 804.0],      [805.0, 806.0, 807.0, 808.0, 809.0, 810.0]],     [[811.0, 812.0, 813.0, 814.0, 815.0, 816.0],      [817.0, 818.0, 819.0, 820.0, 821.0, 822.0],      [823.0, 824.0, 825.0, 826.0, 827.0, 828.0],      [829.0, 830.0, 831.0, 832.0, 833.0, 834.0],      [835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],     [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0],      [847.0, 848.0, 849.0, 850.0, 851.0, 852.0],      [853.0, 854.0, 855.0, 856.0, 857.0, 858.0],      [859.0, 860.0, 861.0, 862.0, 863.0, 864.0],      [865.0, 866.0, 867.0, 868.0, 869.0, 870.0]],     [[871.0, 872.0, 873.0, 874.0, 875.0, 876.0],      [877.0, 878.0, 879.0, 880.0, 881.0, 882.0],      [883.0, 884.0, 885.0, 886.0, 887.0, 888.0],      [889.0, 890.0, 891.0, 892.0, 893.0, 894.0],      [895.0, 896.0, 897.0, 898.0, 899.0, 900.0]]]],   [[[[901.0, 902.0, 903.0, 904.0, 905.0, 906.0],      [907.0, 908.0, 909.0, 910.0, 911.0, 912.0],      [913.0, 914.0, 915.0, 916.0, 917.0, 918.0],      [919.0, 920.0, 921.0, 922.0, 923.0, 924.0],      [925.0, 926.0, 927.0, 928.0, 929.0, 930.0]],     [[931.0, 932.0, 933.0, 934.0, 935.0, 936.0],      [937.0, 938.0, 939.0, 940.0, 941.0, 942.0],      [943.0, 944.0, 945.0, 946.0, 947.0, 948.0],      [949.0, 950.0, 951.0, 952.0, 953.0, 954.0],      [955.0, 956.0, 957.0, 958.0, 959.0, 960.0]],     [[961.0, 962.0, 963.0, 964.0, 965.0, 966.0],      [967.0, 968.0, 969.0, 970.0, 971.0, 972.0],      [973.0, 974.0, 975.0, 976.0, 977.0, 978.0],      [979.0, 980.0, 981.0, 982.0, 983.0, 984.0],      [985.0, 986.0, 987.0, 988.0, 989.0, 990.0]],     [[991.0, 992.0, 993.0, 994.0, 995.0, 996.0],      [997.0, 998.0, 999.0, 1000.0, 1001.0, 1002.0],      [1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0],      [1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0],      [1015.0, 1016.0, 1017.0, 1018.0, 1019.0, 1020.0]],     [[1021.0, 1022.0, 1023.0, 1024.0, 1025.0, 1026.0],      [1027.0, 1028.0, 1029.0, 1030.0, 1031.0, 1032.0],      [1033.0, 1034.0, 1035.0, 1036.0, 1037.0, 1038.0],      [1039.0, 1040.0, 1041.0, 1042.0, 1043.0, 1044.0],      [1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]],    [[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0],      [1057.0, 1058.0, 1059.0, 1060.0, 1061.0, 1062.0],      [1063.0, 1064.0, 1065.0, 1066.0, 1067.0, 1068.0],      [1069.0, 1070.0, 1071.0, 1072.0, 1073.0, 1074.0],      [1075.0, 1076.0, 1077.0, 1078.0, 1079.0, 1080.0]],     [[1081.0, 1082.0, 1083.0, 1084.0, 1085.0, 1086.0],      [1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0],      [1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0],      [1099.0, 1100.0, 1101.0, 1102.0, 1103.0, 1104.0],      [1105.0, 1106.0, 1107.0, 1108.0, 1109.0, 1110.0]],     [[1111.0, 1112.0, 1113.0, 1114.0, 1115.0, 1116.0],      [1117.0, 1118.0, 1119.0, 1120.0, 1121.0, 1122.0],      [1123.0, 1124.0, 1125.0, 1126.0, 1127.0, 1128.0],      [1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0],      [1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0]],     [[1141.0, 1142.0, 1143.0, 1144.0, 1145.0, 1146.0],      [1147.0, 1148.0, 1149.0, 1150.0, 1151.0, 1152.0],      [1153.0, 1154.0, 1155.0, 1156.0, 1157.0, 1158.0],      [1159.0, 1160.0, 1161.0, 1162.0, 1163.0, 1164.0],      [1165.0, 1166.0, 1167.0, 1168.0, 1169.0, 1170.0]],     [[1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0],      [1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0],      [1183.0, 1184.0, 1185.0, 1186.0, 1187.0, 1188.0],      [1189.0, 1190.0, 1191.0, 1192.0, 1193.0, 1194.0],      [1195.0, 1196.0, 1197.0, 1198.0, 1199.0, 1200.0]]],    [[[1201.0, 1202.0, 1203.0, 1204.0, 1205.0, 1206.0],      [1207.0, 1208.0, 1209.0, 1210.0, 1211.0, 1212.0],      [1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0],      [1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0],      [1225.0, 1226.0, 1227.0, 1228.0, 1229.0, 1230.0]],     [[1231.0, 1232.0, 1233.0, 1234.0, 1235.0, 1236.0],      [1237.0, 1238.0, 1239.0, 1240.0, 1241.0, 1242.0],      [1243.0, 1244.0, 1245.0, 1246.0, 1247.0, 1248.0],      [1249.0, 1250.0, 1251.0, 1252.0, 1253.0, 1254.0],      [1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]],     [[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0],      [1267.0, 1268.0, 1269.0, 1270.0, 1271.0, 1272.0],      [1273.0, 1274.0, 1275.0, 1276.0, 1277.0, 1278.0],      [1279.0, 1280.0, 1281.0, 1282.0, 1283.0, 1284.0],      [1285.0, 1286.0, 1287.0, 1288.0, 1289.0, 1290.0]],     [[1291.0, 1292.0, 1293.0, 1294.0, 1295.0, 1296.0],      [1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0],      [1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0],      [1309.0, 1310.0, 1311.0, 1312.0, 1313.0, 1314.0],      [1315.0, 1316.0, 1317.0, 1318.0, 1319.0, 1320.0]],     [[1321.0, 1322.0, 1323.0, 1324.0, 1325.0, 1326.0],      [1327.0, 1328.0, 1329.0, 1330.0, 1331.0, 1332.0],      [1333.0, 1334.0, 1335.0, 1336.0, 1337.0, 1338.0],      [1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0],      [1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0]]],    [[[1351.0, 1352.0, 1353.0, 1354.0, 1355.0, 1356.0],      [1357.0, 1358.0, 1359.0, 1360.0, 1361.0, 1362.0],      [1363.0, 1364.0, 1365.0, 1366.0, 1367.0, 1368.0],      [1369.0, 1370.0, 1371.0, 1372.0, 1373.0, 1374.0],      [1375.0, 1376.0, 1377.0, 1378.0, 1379.0, 1380.0]],     [[1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0],      [1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0],      [1393.0, 1394.0, 1395.0, 1396.0, 1397.0, 1398.0],      [1399.0, 1400.0, 1401.0, 1402.0, 1403.0, 1404.0],      [1405.0, 1406.0, 1407.0, 1408.0, 1409.0, 1410.0]],     [[1411.0, 1412.0, 1413.0, 1414.0, 1415.0, 1416.0],      [1417.0, 1418.0, 1419.0, 1420.0, 1421.0, 1422.0],      [1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0],      [1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0],      [1435.0, 1436.0, 1437.0, 1438.0, 1439.0, 1440.0]],     [[1441.0, 1442.0, 1443.0, 1444.0, 1445.0, 1446.0],      [1447.0, 1448.0, 1449.0, 1450.0, 1451.0, 1452.0],      [1453.0, 1454.0, 1455.0, 1456.0, 1457.0, 1458.0],      [1459.0, 1460.0, 1461.0, 1462.0, 1463.0, 1464.0],      [1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]],     [[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0],      [1477.0, 1478.0, 1479.0, 1480.0, 1481.0, 1482.0],      [1483.0, 1484.0, 1485.0, 1486.0, 1487.0, 1488.0],      [1489.0, 1490.0, 1491.0, 1492.0, 1493.0, 1494.0],      [1495.0, 1496.0, 1497.0, 1498.0, 1499.0, 1500.0]]],    [[[1501.0, 1502.0, 1503.0, 1504.0, 1505.0, 1506.0],      [1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0],      [1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0],      [1519.0, 1520.0, 1521.0, 1522.0, 1523.0, 1524.0],      [1525.0, 1526.0, 1527.0, 1528.0, 1529.0, 1530.0]],     [[1531.0, 1532.0, 1533.0, 1534.0, 1535.0, 1536.0],      [1537.0, 1538.0, 1539.0, 1540.0, 1541.0, 1542.0],      [1543.0, 1544.0, 1545.0, 1546.0, 1547.0, 1548.0],      [1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0],      [1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0]],     [[1561.0, 1562.0, 1563.0, 1564.0, 1565.0, 1566.0],      [1567.0, 1568.0, 1569.0, 1570.0, 1571.0, 1572.0],      [1573.0, 1574.0, 1575.0, 1576.0, 1577.0, 1578.0],      [1579.0, 1580.0, 1581.0, 1582.0, 1583.0, 1584.0],      [1585.0, 1586.0, 1587.0, 1588.0, 1589.0, 1590.0]],     [[1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0],      [1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0],      [1603.0, 1604.0, 1605.0, 1606.0, 1607.0, 1608.0],      [1609.0, 1610.0, 1611.0, 1612.0, 1613.0, 1614.0],      [1615.0, 1616.0, 1617.0, 1618.0, 1619.0, 1620.0]],     [[1621.0, 1622.0, 1623.0, 1624.0, 1625.0, 1626.0],      [1627.0, 1628.0, 1629.0, 1630.0, 1631.0, 1632.0],      [1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0],      [1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0],      [1645.0, 1646.0, 1647.0, 1648.0, 1649.0, 1650.0]]],    [[[1651.0, 1652.0, 1653.0, 1654.0, 1655.0, 1656.0],      [1657.0, 1658.0, 1659.0, 1660.0, 1661.0, 1662.0],      [1663.0, 1664.0, 1665.0, 1666.0, 1667.0, 1668.0],      [1669.0, 1670.0, 1671.0, 1672.0, 1673.0, 1674.0],      [1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]],     [[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0],      [1687.0, 1688.0, 1689.0, 1690.0, 1691.0, 1692.0],      [1693.0, 1694.0, 1695.0, 1696.0, 1697.0, 1698.0],      [1699.0, 1700.0, 1701.0, 1702.0, 1703.0, 1704.0],      [1705.0, 1706.0, 1707.0, 1708.0, 1709.0, 1710.0]],     [[1711.0, 1712.0, 1713.0, 1714.0, 1715.0, 1716.0],      [1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0],      [1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0],      [1729.0, 1730.0, 1731.0, 1732.0, 1733.0, 1734.0],      [1735.0, 1736.0, 1737.0, 1738.0, 1739.0, 1740.0]],     [[1741.0, 1742.0, 1743.0, 1744.0, 1745.0, 1746.0],      [1747.0, 1748.0, 1749.0, 1750.0, 1751.0, 1752.0],      [1753.0, 1754.0, 1755.0, 1756.0, 1757.0, 1758.0],      [1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0],      [1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0]],     [[1771.0, 1772.0, 1773.0, 1774.0, 1775.0, 1776.0],      [1777.0, 1778.0, 1779.0, 1780.0, 1781.0, 1782.0],      [1783.0, 1784.0, 1785.0, 1786.0, 1787.0, 1788.0],      [1789.0, 1790.0, 1791.0, 1792.0, 1793.0, 1794.0],      [1795.0, 1796.0, 1797.0, 1798.0, 1799.0, 1800.0]]]],   [[[[1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0],      [1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0],      [1813.0, 1814.0, 1815.0, 1816.0, 1817.0, 1818.0],      [1819.0, 1820.0, 1821.0, 1822.0, 1823.0, 1824.0],      [1825.0, 1826.0, 1827.0, 1828.0, 1829.0, 1830.0]],     [[1831.0, 1832.0, 1833.0, 1834.0, 1835.0, 1836.0],      [1837.0, 1838.0, 1839.0, 1840.0, 1841.0, 1842.0],      [1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0],      [1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0],      [1855.0, 1856.0, 1857.0, 1858.0, 1859.0, 1860.0]],     [[1861.0, 1862.0, 1863.0, 1864.0, 1865.0, 1866.0],      [1867.0, 1868.0, 1869.0, 1870.0, 1871.0, 1872.0],      [1873.0, 1874.0, 1875.0, 1876.0, 1877.0, 1878.0],      [1879.0, 1880.0, 1881.0, 1882.0, 1883.0, 1884.0],      [1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]],     [[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0],      [1897.0, 1898.0, 1899.0, 1900.0, 1901.0, 1902.0],      [1903.0, 1904.0, 1905.0, 1906.0, 1907.0, 1908.0],      [1909.0, 1910.0, 1911.0, 1912.0, 1913.0, 1914.0],      [1915.0, 1916.0, 1917.0, 1918.0, 1919.0, 1920.0]],     [[1921.0, 1922.0, 1923.0, 1924.0, 1925.0, 1926.0],      [1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0],      [1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0],      [1939.0, 1940.0, 1941.0, 1942.0, 1943.0, 1944.0],      [1945.0, 1946.0, 1947.0, 1948.0, 1949.0, 1950.0]]],    [[[1951.0, 1952.0, 1953.0, 1954.0, 1955.0, 1956.0],      [1957.0, 1958.0, 1959.0, 1960.0, 1961.0, 1962.0],      [1963.0, 1964.0, 1965.0, 1966.0, 1967.0, 1968.0],      [1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0],      [1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0]],     [[1981.0, 1982.0, 1983.0, 1984.0, 1985.0, 1986.0],      [1987.0, 1988.0, 1989.0, 1990.0, 1991.0, 1992.0],      [1993.0, 1994.0, 1995.0, 1996.0, 1997.0, 1998.0],      [1999.0, 2000.0, 2001.0, 2002.0, 2003.0, 2004.0],      [2005.0, 2006.0, 2007.0, 2008.0, 2009.0, 2010.0]],     [[2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0],      [2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0],      [2023.0, 2024.0, 2025.0, 2026.0, 2027.0, 2028.0],      [2029.0, 2030.0, 2031.0, 2032.0, 2033.0, 2034.0],      [2035.0, 2036.0, 2037.0, 2038.0, 2039.0, 2040.0]],     [[2041.0, 2042.0, 2043.0, 2044.0, 2045.0, 2046.0],      [2047.0, 2048.0, 2049.0, 2050.0, 2051.0, 2052.0],      [2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0],      [2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0],      [2065.0, 2066.0, 2067.0, 2068.0, 2069.0, 2070.0]],     [[2071.0, 2072.0, 2073.0, 2074.0, 2075.0, 2076.0],      [2077.0, 2078.0, 2079.0, 2080.0, 2081.0, 2082.0],      [2083.0, 2084.0, 2085.0, 2086.0, 2087.0, 2088.0],      [2089.0, 2090.0, 2091.0, 2092.0, 2093.0, 2094.0],      [2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]]],    [[[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0],      [2107.0, 2108.0, 2109.0, 2110.0, 2111.0, 2112.0],      [2113.0, 2114.0, 2115.0, 2116.0, 2117.0, 2118.0],      [2119.0, 2120.0, 2121.0, 2122.0, 2123.0, 2124.0],      [2125.0, 2126.0, 2127.0, 2128.0, 2129.0, 2130.0]],     [[2131.0, 2132.0, 2133.0, 2134.0, 2135.0, 2136.0],      [2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0],      [2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0],      [2149.0, 2150.0, 2151.0, 2152.0, 2153.0, 2154.0],      [2155.0, 2156.0, 2157.0, 2158.0, 2159.0, 2160.0]],     [[2161.0, 2162.0, 2163.0, 2164.0, 2165.0, 2166.0],      [2167.0, 2168.0, 2169.0, 2170.0, 2171.0, 2172.0],      [2173.0, 2174.0, 2175.0, 2176.0, 2177.0, 2178.0],      [2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0],      [2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0]],     [[2191.0, 2192.0, 2193.0, 2194.0, 2195.0, 2196.0],      [2197.0, 2198.0, 2199.0, 2200.0, 2201.0, 2202.0],      [2203.0, 2204.0, 2205.0, 2206.0, 2207.0, 2208.0],      [2209.0, 2210.0, 2211.0, 2212.0, 2213.0, 2214.0],      [2215.0, 2216.0, 2217.0, 2218.0, 2219.0, 2220.0]],     [[2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0],      [2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0],      [2233.0, 2234.0, 2235.0, 2236.0, 2237.0, 2238.0],      [2239.0, 2240.0, 2241.0, 2242.0, 2243.0, 2244.0],      [2245.0, 2246.0, 2247.0, 2248.0, 2249.0, 2250.0]]],    [[[2251.0, 2252.0, 2253.0, 2254.0, 2255.0, 2256.0],      [2257.0, 2258.0, 2259.0, 2260.0, 2261.0, 2262.0],      [2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0],      [2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0],      [2275.0, 2276.0, 2277.0, 2278.0, 2279.0, 2280.0]],     [[2281.0, 2282.0, 2283.0, 2284.0, 2285.0, 2286.0],      [2287.0, 2288.0, 2289.0, 2290.0, 2291.0, 2292.0],      [2293.0, 2294.0, 2295.0, 2296.0, 2297.0, 2298.0],      [2299.0, 2300.0, 2301.0, 2302.0, 2303.0, 2304.0],      [2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]],     [[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0],      [2317.0, 2318.0, 2319.0, 2320.0, 2321.0, 2322.0],      [2323.0, 2324.0, 2325.0, 2326.0, 2327.0, 2328.0],      [2329.0, 2330.0, 2331.0, 2332.0, 2333.0, 2334.0],      [2335.0, 2336.0, 2337.0, 2338.0, 2339.0, 2340.0]],     [[2341.0, 2342.0, 2343.0, 2344.0, 2345.0, 2346.0],      [2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0],      [2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0],      [2359.0, 2360.0, 2361.0, 2362.0, 2363.0, 2364.0],      [2365.0, 2366.0, 2367.0, 2368.0, 2369.0, 2370.0]],     [[2371.0, 2372.0, 2373.0, 2374.0, 2375.0, 2376.0],      [2377.0, 2378.0, 2379.0, 2380.0, 2381.0, 2382.0],      [2383.0, 2384.0, 2385.0, 2386.0, 2387.0, 2388.0],      [2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0],      [2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0]]],    [[[2401.0, 2402.0, 2403.0, 2404.0, 2405.0, 2406.0],      [2407.0, 2408.0, 2409.0, 2410.0, 2411.0, 2412.0],      [2413.0, 2414.0, 2415.0, 2416.0, 2417.0, 2418.0],      [2419.0, 2420.0, 2421.0, 2422.0, 2423.0, 2424.0],      [2425.0, 2426.0, 2427.0, 2428.0, 2429.0, 2430.0]],     [[2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0],      [2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0],      [2443.0, 2444.0, 2445.0, 2446.0, 2447.0, 2448.0],      [2449.0, 2450.0, 2451.0, 2452.0, 2453.0, 2454.0],      [2455.0, 2456.0, 2457.0, 2458.0, 2459.0, 2460.0]],     [[2461.0, 2462.0, 2463.0, 2464.0, 2465.0, 2466.0],      [2467.0, 2468.0, 2469.0, 2470.0, 2471.0, 2472.0],      [2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0],      [2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0],      [2485.0, 2486.0, 2487.0, 2488.0, 2489.0, 2490.0]],     [[2491.0, 2492.0, 2493.0, 2494.0, 2495.0, 2496.0],      [2497.0, 2498.0, 2499.0, 2500.0, 2501.0, 2502.0],      [2503.0, 2504.0, 2505.0, 2506.0, 2507.0, 2508.0],      [2509.0, 2510.0, 2511.0, 2512.0, 2513.0, 2514.0],      [2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]],     [[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0],      [2527.0, 2528.0, 2529.0, 2530.0, 2531.0, 2532.0],      [2533.0, 2534.0, 2535.0, 2536.0, 2537.0, 2538.0],      [2539.0, 2540.0, 2541.0, 2542.0, 2543.0, 2544.0],      [2545.0, 2546.0, 2547.0, 2548.0, 2549.0, 2550.0]]],    [[[2551.0, 2552.0, 2553.0, 2554.0, 2555.0, 2556.0],      [2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0],      [2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0],      [2569.0, 2570.0, 2571.0, 2572.0, 2573.0, 2574.0],      [2575.0, 2576.0, 2577.0, 2578.0, 2579.0, 2580.0]],     [[2581.0, 2582.0, 2583.0, 2584.0, 2585.0, 2586.0],      [2587.0, 2588.0, 2589.0, 2590.0, 2591.0, 2592.0],      [2593.0, 2594.0, 2595.0, 2596.0, 2597.0, 2598.0],      [2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0],      [2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0]],     [[2611.0, 2612.0, 2613.0, 2614.0, 2615.0, 2616.0],      [2617.0, 2618.0, 2619.0, 2620.0, 2621.0, 2622.0],      [2623.0, 2624.0, 2625.0, 2626.0, 2627.0, 2628.0],      [2629.0, 2630.0, 2631.0, 2632.0, 2633.0, 2634.0],      [2635.0, 2636.0, 2637.0, 2638.0, 2639.0, 2640.0]],     [[2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0],      [2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0],      [2653.0, 2654.0, 2655.0, 2656.0, 2657.0, 2658.0],      [2659.0, 2660.0, 2661.0, 2662.0, 2663.0, 2664.0],      [2665.0, 2666.0, 2667.0, 2668.0, 2669.0, 2670.0]],     [[2671.0, 2672.0, 2673.0, 2674.0, 2675.0, 2676.0],      [2677.0, 2678.0, 2679.0, 2680.0, 2681.0, 2682.0],      [2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0],      [2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0],      [2695.0, 2696.0, 2697.0, 2698.0, 2699.0, 2700.0]]]],   [[[[2701.0, 2702.0, 2703.0, 2704.0, 2705.0, 2706.0],      [2707.0, 2708.0, 2709.0, 2710.0, 2711.0, 2712.0],      [2713.0, 2714.0, 2715.0, 2716.0, 2717.0, 2718.0],      [2719.0, 2720.0, 2721.0, 2722.0, 2723.0, 2724.0],      [2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]],     [[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0],      [2737.0, 2738.0, 2739.0, 2740.0, 2741.0, 2742.0],      [2743.0, 2744.0, 2745.0, 2746.0, 2747.0, 2748.0],      [2749.0, 2750.0, 2751.0, 2752.0, 2753.0, 2754.0],      [2755.0, 2756.0, 2757.0, 2758.0, 2759.0, 2760.0]],     [[2761.0, 2762.0, 2763.0, 2764.0, 2765.0, 2766.0],      [2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0],      [2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0],      [2779.0, 2780.0, 2781.0, 2782.0, 2783.0, 2784.0],      [2785.0, 2786.0, 2787.0, 2788.0, 2789.0, 2790.0]],     [[2791.0, 2792.0, 2793.0, 2794.0, 2795.0, 2796.0],      [2797.0, 2798.0, 2799.0, 2800.0, 2801.0, 2802.0],      [2803.0, 2804.0, 2805.0, 2806.0, 2807.0, 2808.0],      [2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0],      [2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0]],     [[2821.0, 2822.0, 2823.0, 2824.0, 2825.0, 2826.0],      [2827.0, 2828.0, 2829.0, 2830.0, 2831.0, 2832.0],      [2833.0, 2834.0, 2835.0, 2836.0, 2837.0, 2838.0],      [2839.0, 2840.0, 2841.0, 2842.0, 2843.0, 2844.0],      [2845.0, 2846.0, 2847.0, 2848.0, 2849.0, 2850.0]]],    [[[2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0],      [2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0],      [2863.0, 2864.0, 2865.0, 2866.0, 2867.0, 2868.0],      [2869.0, 2870.0, 2871.0, 2872.0, 2873.0, 2874.0],      [2875.0, 2876.0, 2877.0, 2878.0, 2879.0, 2880.0]],     [[2881.0, 2882.0, 2883.0, 2884.0, 2885.0, 2886.0],      [2887.0, 2888.0, 2889.0, 2890.0, 2891.0, 2892.0],      [2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0],      [2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0],      [2905.0, 2906.0, 2907.0, 2908.0, 2909.0, 2910.0]],     [[2911.0, 2912.0, 2913.0, 2914.0, 2915.0, 2916.0],      [2917.0, 2918.0, 2919.0, 2920.0, 2921.0, 2922.0],      [2923.0, 2924.0, 2925.0, 2926.0, 2927.0, 2928.0],      [2929.0, 2930.0, 2931.0, 2932.0, 2933.0, 2934.0],      [2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]],     [[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0],      [2947.0, 2948.0, 2949.0, 2950.0, 2951.0, 2952.0],      [2953.0, 2954.0, 2955.0, 2956.0, 2957.0, 2958.0],      [2959.0, 2960.0, 2961.0, 2962.0, 2963.0, 2964.0],      [2965.0, 2966.0, 2967.0, 2968.0, 2969.0, 2970.0]],     [[2971.0, 2972.0, 2973.0, 2974.0, 2975.0, 2976.0],      [2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0],      [2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0],      [2989.0, 2990.0, 2991.0, 2992.0, 2993.0, 2994.0],      [2995.0, 2996.0, 2997.0, 2998.0, 2999.0, 3000.0]]],    [[[3001.0, 3002.0, 3003.0, 3004.0, 3005.0, 3006.0],      [3007.0, 3008.0, 3009.0, 3010.0, 3011.0, 3012.0],      [3013.0, 3014.0, 3015.0, 3016.0, 3017.0, 3018.0],      [3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0],      [3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0]],     [[3031.0, 3032.0, 3033.0, 3034.0, 3035.0, 3036.0],      [3037.0, 3038.0, 3039.0, 3040.0, 3041.0, 3042.0],      [3043.0, 3044.0, 3045.0, 3046.0, 3047.0, 3048.0],      [3049.0, 3050.0, 3051.0, 3052.0, 3053.0, 3054.0],      [3055.0, 3056.0, 3057.0, 3058.0, 3059.0, 3060.0]],     [[3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0],      [3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0],      [3073.0, 3074.0, 3075.0, 3076.0, 3077.0, 3078.0],      [3079.0, 3080.0, 3081.0, 3082.0, 3083.0, 3084.0],      [3085.0, 3086.0, 3087.0, 3088.0, 3089.0, 3090.0]],     [[3091.0, 3092.0, 3093.0, 3094.0, 3095.0, 3096.0],      [3097.0, 3098.0, 3099.0, 3100.0, 3101.0, 3102.0],      [3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0],      [3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0],      [3115.0, 3116.0, 3117.0, 3118.0, 3119.0, 3120.0]],     [[3121.0, 3122.0, 3123.0, 3124.0, 3125.0, 3126.0],      [3127.0, 3128.0, 3129.0, 3130.0, 3131.0, 3132.0],      [3133.0, 3134.0, 3135.0, 3136.0, 3137.0, 3138.0],      [3139.0, 3140.0, 3141.0, 3142.0, 3143.0, 3144.0],      [3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]]],    [[[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0],      [3157.0, 3158.0, 3159.0, 3160.0, 3161.0, 3162.0],      [3163.0, 3164.0, 3165.0, 3166.0, 3167.0, 3168.0],      [3169.0, 3170.0, 3171.0, 3172.0, 3173.0, 3174.0],      [3175.0, 3176.0, 3177.0, 3178.0, 3179.0, 3180.0]],     [[3181.0, 3182.0, 3183.0, 3184.0, 3185.0, 3186.0],      [3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0],      [3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0],      [3199.0, 3200.0, 3201.0, 3202.0, 3203.0, 3204.0],      [3205.0, 3206.0, 3207.0, 3208.0, 3209.0, 3210.0]],     [[3211.0, 3212.0, 3213.0, 3214.0, 3215.0, 3216.0],      [3217.0, 3218.0, 3219.0, 3220.0, 3221.0, 3222.0],      [3223.0, 3224.0, 3225.0, 3226.0, 3227.0, 3228.0],      [3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0],      [3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0]],     [[3241.0, 3242.0, 3243.0, 3244.0, 3245.0, 3246.0],      [3247.0, 3248.0, 3249.0, 3250.0, 3251.0, 3252.0],      [3253.0, 3254.0, 3255.0, 3256.0, 3257.0, 3258.0],      [3259.0, 3260.0, 3261.0, 3262.0, 3263.0, 3264.0],      [3265.0, 3266.0, 3267.0, 3268.0, 3269.0, 3270.0]],     [[3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0],      [3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0],      [3283.0, 3284.0, 3285.0, 3286.0, 3287.0, 3288.0],      [3289.0, 3290.0, 3291.0, 3292.0, 3293.0, 3294.0],      [3295.0, 3296.0, 3297.0, 3298.0, 3299.0, 3300.0]]],    [[[3301.0, 3302.0, 3303.0, 3304.0, 3305.0, 3306.0],      [3307.0, 3308.0, 3309.0, 3310.0, 3311.0, 3312.0],      [3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0],      [3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0],      [3325.0, 3326.0, 3327.0, 3328.0, 3329.0, 3330.0]],     [[3331.0, 3332.0, 3333.0, 3334.0, 3335.0, 3336.0],      [3337.0, 3338.0, 3339.0, 3340.0, 3341.0, 3342.0],      [3343.0, 3344.0, 3345.0, 3346.0, 3347.0, 3348.0],      [3349.0, 3350.0, 3351.0, 3352.0, 3353.0, 3354.0],      [3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]],     [[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0],      [3367.0, 3368.0, 3369.0, 3370.0, 3371.0, 3372.0],      [3373.0, 3374.0, 3375.0, 3376.0, 3377.0, 3378.0],      [3379.0, 3380.0, 3381.0, 3382.0, 3383.0, 3384.0],      [3385.0, 3386.0, 3387.0, 3388.0, 3389.0, 3390.0]],     [[3391.0, 3392.0, 3393.0, 3394.0, 3395.0, 3396.0],      [3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0],      [3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0],      [3409.0, 3410.0, 3411.0, 3412.0, 3413.0, 3414.0],      [3415.0, 3416.0, 3417.0, 3418.0, 3419.0, 3420.0]],     [[3421.0, 3422.0, 3423.0, 3424.0, 3425.0, 3426.0],      [3427.0, 3428.0, 3429.0, 3430.0, 3431.0, 3432.0],      [3433.0, 3434.0, 3435.0, 3436.0, 3437.0, 3438.0],      [3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0],      [3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0]]],    [[[3451.0, 3452.0, 3453.0, 3454.0, 3455.0, 3456.0],      [3457.0, 3458.0, 3459.0, 3460.0, 3461.0, 3462.0],      [3463.0, 3464.0, 3465.0, 3466.0, 3467.0, 3468.0],      [3469.0, 3470.0, 3471.0, 3472.0, 3473.0, 3474.0],      [3475.0, 3476.0, 3477.0, 3478.0, 3479.0, 3480.0]],     [[3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0],      [3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0],      [3493.0, 3494.0, 3495.0, 3496.0, 3497.0, 3498.0],      [3499.0, 3500.0, 3501.0, 3502.0, 3503.0, 3504.0],      [3505.0, 3506.0, 3507.0, 3508.0, 3509.0, 3510.0]],     [[3511.0, 3512.0, 3513.0, 3514.0, 3515.0, 3516.0],      [3517.0, 3518.0, 3519.0, 3520.0, 3521.0, 3522.0],      [3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0],      [3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0],      [3535.0, 3536.0, 3537.0, 3538.0, 3539.0, 3540.0]],     [[3541.0, 3542.0, 3543.0, 3544.0, 3545.0, 3546.0],      [3547.0, 3548.0, 3549.0, 3550.0, 3551.0, 3552.0],      [3553.0, 3554.0, 3555.0, 3556.0, 3557.0, 3558.0],      [3559.0, 3560.0, 3561.0, 3562.0, 3563.0, 3564.0],      [3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]],     [[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0],      [3577.0, 3578.0, 3579.0, 3580.0, 3581.0, 3582.0],      [3583.0, 3584.0, 3585.0, 3586.0, 3587.0, 3588.0],      [3589.0, 3590.0, 3591.0, 3592.0, 3593.0, 3594.0],      [3595.0, 3596.0, 3597.0, 3598.0, 3599.0, 3600.0]]]],   [[[[3601.0, 3602.0, 3603.0, 3604.0, 3605.0, 3606.0],      [3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0],      [3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0],      [3619.0, 3620.0, 3621.0, 3622.0, 3623.0, 3624.0],      [3625.0, 3626.0, 3627.0, 3628.0, 3629.0, 3630.0]],     [[3631.0, 3632.0, 3633.0, 3634.0, 3635.0, 3636.0],      [3637.0, 3638.0, 3639.0, 3640.0, 3641.0, 3642.0],      [3643.0, 3644.0, 3645.0, 3646.0, 3647.0, 3648.0],      [3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0],      [3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0]],     [[3661.0, 3662.0, 3663.0, 3664.0, 3665.0, 3666.0],      [3667.0, 3668.0, 3669.0, 3670.0, 3671.0, 3672.0],      [3673.0, 3674.0, 3675.0, 3676.0, 3677.0, 3678.0],      [3679.0, 3680.0, 3681.0, 3682.0, 3683.0, 3684.0],      [3685.0, 3686.0, 3687.0, 3688.0, 3689.0, 3690.0]],     [[3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0],      [3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0],      [3703.0, 3704.0, 3705.0, 3706.0, 3707.0, 3708.0],      [3709.0, 3710.0, 3711.0, 3712.0, 3713.0, 3714.0],      [3715.0, 3716.0, 3717.0, 3718.0, 3719.0, 3720.0]],     [[3721.0, 3722.0, 3723.0, 3724.0, 3725.0, 3726.0],      [3727.0, 3728.0, 3729.0, 3730.0, 3731.0, 3732.0],      [3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0],      [3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0],      [3745.0, 3746.0, 3747.0, 3748.0, 3749.0, 3750.0]]],    [[[3751.0, 3752.0, 3753.0, 3754.0, 3755.0, 3756.0],      [3757.0, 3758.0, 3759.0, 3760.0, 3761.0, 3762.0],      [3763.0, 3764.0, 3765.0, 3766.0, 3767.0, 3768.0],      [3769.0, 3770.0, 3771.0, 3772.0, 3773.0, 3774.0],      [3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]],     [[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0],      [3787.0, 3788.0, 3789.0, 3790.0, 3791.0, 3792.0],      [3793.0, 3794.0, 3795.0, 3796.0, 3797.0, 3798.0],      [3799.0, 3800.0, 3801.0, 3802.0, 3803.0, 3804.0],      [3805.0, 3806.0, 3807.0, 3808.0, 3809.0, 3810.0]],     [[3811.0, 3812.0, 3813.0, 3814.0, 3815.0, 3816.0],      [3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0],      [3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0],      [3829.0, 3830.0, 3831.0, 3832.0, 3833.0, 3834.0],      [3835.0, 3836.0, 3837.0, 3838.0, 3839.0, 3840.0]],     [[3841.0, 3842.0, 3843.0, 3844.0, 3845.0, 3846.0],      [3847.0, 3848.0, 3849.0, 3850.0, 3851.0, 3852.0],      [3853.0, 3854.0, 3855.0, 3856.0, 3857.0, 3858.0],      [3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0],      [3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0]],     [[3871.0, 3872.0, 3873.0, 3874.0, 3875.0, 3876.0],      [3877.0, 3878.0, 3879.0, 3880.0, 3881.0, 3882.0],      [3883.0, 3884.0, 3885.0, 3886.0, 3887.0, 3888.0],      [3889.0, 3890.0, 3891.0, 3892.0, 3893.0, 3894.0],      [3895.0, 3896.0, 3897.0, 3898.0, 3899.0, 3900.0]]],    [[[3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0],      [3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0],      [3913.0, 3914.0, 3915.0, 3916.0, 3917.0, 3918.0],      [3919.0, 3920.0, 3921.0, 3922.0, 3923.0, 3924.0],      [3925.0, 3926.0, 3927.0, 3928.0, 3929.0, 3930.0]],     [[3931.0, 3932.0, 3933.0, 3934.0, 3935.0, 3936.0],      [3937.0, 3938.0, 3939.0, 3940.0, 3941.0, 3942.0],      [3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0],      [3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0],      [3955.0, 3956.0, 3957.0, 3958.0, 3959.0, 3960.0]],     [[3961.0, 3962.0, 3963.0, 3964.0, 3965.0, 3966.0],      [3967.0, 3968.0, 3969.0, 3970.0, 3971.0, 3972.0],      [3973.0, 3974.0, 3975.0, 3976.0, 3977.0, 3978.0],      [3979.0, 3980.0, 3981.0, 3982.0, 3983.0, 3984.0],      [3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]],     [[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0],      [3997.0, 3998.0, 3999.0, 4000.0, 4001.0, 4002.0],      [4003.0, 4004.0, 4005.0, 4006.0, 4007.0, 4008.0],      [4009.0, 4010.0, 4011.0, 4012.0, 4013.0, 4014.0],      [4015.0, 4016.0, 4017.0, 4018.0, 4019.0, 4020.0]],     [[4021.0, 4022.0, 4023.0, 4024.0, 4025.0, 4026.0],      [4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0],      [4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0],      [4039.0, 4040.0, 4041.0, 4042.0, 4043.0, 4044.0],      [4045.0, 4046.0, 4047.0, 4048.0, 4049.0, 4050.0]]],    [[[4051.0, 4052.0, 4053.0, 4054.0, 4055.0, 4056.0],      [4057.0, 4058.0, 4059.0, 4060.0, 4061.0, 4062.0],      [4063.0, 4064.0, 4065.0, 4066.0, 4067.0, 4068.0],      [4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0],      [4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0]],     [[4081.0, 4082.0, 4083.0, 4084.0, 4085.0, 4086.0],      [4087.0, 4088.0, 4089.0, 4090.0, 4091.0, 4092.0],      [4093.0, 4094.0, 4095.0, 4096.0, 4097.0, 4098.0],      [4099.0, 4100.0, 4101.0, 4102.0, 4103.0, 4104.0],      [4105.0, 4106.0, 4107.0, 4108.0, 4109.0, 4110.0]],     [[4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0],      [4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0],      [4123.0, 4124.0, 4125.0, 4126.0, 4127.0, 4128.0],      [4129.0, 4130.0, 4131.0, 4132.0, 4133.0, 4134.0],      [4135.0, 4136.0, 4137.0, 4138.0, 4139.0, 4140.0]],     [[4141.0, 4142.0, 4143.0, 4144.0, 4145.0, 4146.0],      [4147.0, 4148.0, 4149.0, 4150.0, 4151.0, 4152.0],      [4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0],      [4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0],      [4165.0, 4166.0, 4167.0, 4168.0, 4169.0, 4170.0]],     [[4171.0, 4172.0, 4173.0, 4174.0, 4175.0, 4176.0],      [4177.0, 4178.0, 4179.0, 4180.0, 4181.0, 4182.0],      [4183.0, 4184.0, 4185.0, 4186.0, 4187.0, 4188.0],      [4189.0, 4190.0, 4191.0, 4192.0, 4193.0, 4194.0],      [4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]]],    [[[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0],      [4207.0, 4208.0, 4209.0, 4210.0, 4211.0, 4212.0],      [4213.0, 4214.0, 4215.0, 4216.0, 4217.0, 4218.0],      [4219.0, 4220.0, 4221.0, 4222.0, 4223.0, 4224.0],      [4225.0, 4226.0, 4227.0, 4228.0, 4229.0, 4230.0]],     [[4231.0, 4232.0, 4233.0, 4234.0, 4235.0, 4236.0],      [4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0],      [4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0],      [4249.0, 4250.0, 4251.0, 4252.0, 4253.0, 4254.0],      [4255.0, 4256.0, 4257.0, 4258.0, 4259.0, 4260.0]],     [[4261.0, 4262.0, 4263.0, 4264.0, 4265.0, 4266.0],      [4267.0, 4268.0, 4269.0, 4270.0, 4271.0, 4272.0],      [4273.0, 4274.0, 4275.0, 4276.0, 4277.0, 4278.0],      [4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0],      [4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0]],     [[4291.0, 4292.0, 4293.0, 4294.0, 4295.0, 4296.0],      [4297.0, 4298.0, 4299.0, 4300.0, 4301.0, 4302.0],      [4303.0, 4304.0, 4305.0, 4306.0, 4307.0, 4308.0],      [4309.0, 4310.0, 4311.0, 4312.0, 4313.0, 4314.0],      [4315.0, 4316.0, 4317.0, 4318.0, 4319.0, 4320.0]],     [[4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0],      [4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0],      [4333.0, 4334.0, 4335.0, 4336.0, 4337.0, 4338.0],      [4339.0, 4340.0, 4341.0, 4342.0, 4343.0, 4344.0],      [4345.0, 4346.0, 4347.0, 4348.0, 4349.0, 4350.0]]],    [[[4351.0, 4352.0, 4353.0, 4354.0, 4355.0, 4356.0],      [4357.0, 4358.0, 4359.0, 4360.0, 4361.0, 4362.0],      [4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0],      [4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0],      [4375.0, 4376.0, 4377.0, 4378.0, 4379.0, 4380.0]],     [[4381.0, 4382.0, 4383.0, 4384.0, 4385.0, 4386.0],      [4387.0, 4388.0, 4389.0, 4390.0, 4391.0, 4392.0],      [4393.0, 4394.0, 4395.0, 4396.0, 4397.0, 4398.0],      [4399.0, 4400.0, 4401.0, 4402.0, 4403.0, 4404.0],      [4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]],     [[4411.0, 4412.0, 4413.0, 4414.0, 4415.0, 4416.0],      [4417.0, 4418.0, 4419.0, 4420.0, 4421.0, 4422.0],      [4423.0, 4424.0, 4425.0, 4426.0, 4427.0, 4428.0],      [4429.0, 4430.0, 4431.0, 4432.0, 4433.0, 4434.0],      [4435.0, 4436.0, 4437.0, 4438.0, 4439.0, 4440.0]],     [[4441.0, 4442.0, 4443.0, 4444.0, 4445.0, 4446.0],      [4447.0, 4448.0, 4449.0, 4450.0, 4451.0, 4452.0],      [4453.0, 4454.0, 4455.0, 4456.0, 4457.0, 4458.0],      [4459.0, 4460.0, 4461.0, 4462.0, 4463.0, 4464.0],      [4465.0, 4466.0, 4467.0, 4468.0, 4469.0, 4470.0]],     [[4471.0, 4472.0, 4473.0, 4474.0, 4475.0, 4476.0],      [4477.0, 4478.0, 4479.0, 4480.0, 4481.0, 4482.0],      [4483.0, 4484.0, 4485.0, 4486.0, 4487.0, 4488.0],      [4489.0, 4490.0, 4491.0, 4492.0, 4493.0, 4494.0],      [4495.0, 4496.0, 4497.0, 4498.0, 4499.0, 4500.0]]]]]] shape=[1, 5, 6, 5, 5, 6], strides=[4500, 900, 150, 30, 6, 1], layout=C (0x1)), I32([2, 2] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [2, 2]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 1838335261 4037936958 2743816387 3423551424 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0],     [6.0, 7.0, 8.0, 9.0, 10.0],     [11.0, 12.0, 13.0, 14.0, 15.0]],    [[16.0, 17.0, 18.0, 19.0, 20.0],     [21.0, 22.0, 23.0, 24.0, 25.0],     [26.0, 27.0, 28.0, 29.0, 30.0]],    [[31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0],     [41.0, 42.0, 43.0, 44.0, 45.0]],    [[46.0, 47.0, 48.0, 49.0, 50.0],     [51.0, 52.0, 53.0, 54.0, 55.0],     [56.0, 57.0, 58.0, 59.0, 60.0]],    [[61.0, 62.0, 63.0, 64.0, 65.0],     [66.0, 67.0, 68.0, 69.0, 70.0],     [71.0, 72.0, 73.0, 74.0, 75.0]]],   [[[76.0, 77.0, 78.0, 79.0, 80.0],     [81.0, 82.0, 83.0, 84.0, 85.0],     [86.0, 87.0, 88.0, 89.0, 90.0]],    [[91.0, 92.0, 93.0, 94.0, 95.0],     [96.0, 97.0, 98.0, 99.0, 100.0],     [101.0, 102.0, 103.0, 104.0, 105.0]],    [[106.0, 107.0, 108.0, 109.0, 110.0],     [111.0, 112.0, 113.0, 114.0, 115.0],     [116.0, 117.0, 118.0, 119.0, 120.0]],    [[121.0, 122.0, 123.0, 124.0, 125.0],     [126.0, 127.0, 128.0, 129.0, 130.0],     [131.0, 132.0, 133.0, 134.0, 135.0]],    [[136.0, 137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0, 145.0],     [146.0, 147.0, 148.0, 149.0, 150.0]]],   [[[151.0, 152.0, 153.0, 154.0, 155.0],     [156.0, 157.0, 158.0, 159.0, 160.0],     [161.0, 162.0, 163.0, 164.0, 165.0]],    [[166.0, 167.0, 168.0, 169.0, 170.0],     [171.0, 172.0, 173.0, 174.0, 175.0],     [176.0, 177.0, 178.0, 179.0, 180.0]],    [[181.0, 182.0, 183.0, 184.0, 185.0],     [186.0, 187.0, 188.0, 189.0, 190.0],     [191.0, 192.0, 193.0, 194.0, 195.0]],    [[196.0, 197.0, 198.0, 199.0, 200.0],     [201.0, 202.0, 203.0, 204.0, 205.0],     [206.0, 207.0, 208.0, 209.0, 210.0]],    [[211.0, 212.0, 213.0, 214.0, 215.0],     [216.0, 217.0, 218.0, 219.0, 220.0],     [221.0, 222.0, 223.0, 224.0, 225.0]]],   [[[226.0, 227.0, 228.0, 229.0, 230.0],     [231.0, 232.0, 233.0, 234.0, 235.0],     [236.0, 237.0, 238.0, 239.0, 240.0]],    [[241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0],     [251.0, 252.0, 253.0, 254.0, 255.0]],    [[256.0, 257.0, 258.0, 259.0, 260.0],     [261.0, 262.0, 263.0, 264.0, 265.0],     [266.0, 267.0, 268.0, 269.0, 270.0]],    [[271.0, 272.0, 273.0, 274.0, 275.0],     [276.0, 277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0, 285.0]],    [[286.0, 287.0, 288.0, 289.0, 290.0],     [291.0, 292.0, 293.0, 294.0, 295.0],     [296.0, 297.0, 298.0, 299.0, 300.0]]],   [[[301.0, 302.0, 303.0, 304.0, 305.0],     [306.0, 307.0, 308.0, 309.0, 310.0],     [311.0, 312.0, 313.0, 314.0, 315.0]],    [[316.0, 317.0, 318.0, 319.0, 320.0],     [321.0, 322.0, 323.0, 324.0, 325.0],     [326.0, 327.0, 328.0, 329.0, 330.0]],    [[331.0, 332.0, 333.0, 334.0, 335.0],     [336.0, 337.0, 338.0, 339.0, 340.0],     [341.0, 342.0, 343.0, 344.0, 345.0]],    [[346.0, 347.0, 348.0, 349.0, 350.0],     [351.0, 352.0, 353.0, 354.0, 355.0],     [356.0, 357.0, 358.0, 359.0, 360.0]],    [[361.0, 362.0, 363.0, 364.0, 365.0],     [366.0, 367.0, 368.0, 369.0, 370.0],     [371.0, 372.0, 373.0, 374.0, 375.0]]],   [[[376.0, 377.0, 378.0, 379.0, 380.0],     [381.0, 382.0, 383.0, 384.0, 385.0],     [386.0, 387.0, 388.0, 389.0, 390.0]],    [[391.0, 392.0, 393.0, 394.0, 395.0],     [396.0, 397.0, 398.0, 399.0, 400.0],     [401.0, 402.0, 403.0, 404.0, 405.0]],    [[406.0, 407.0, 408.0, 409.0, 410.0],     [411.0, 412.0, 413.0, 414.0, 415.0],     [416.0, 417.0, 418.0, 419.0, 420.0]],    [[421.0, 422.0, 423.0, 424.0, 425.0],     [426.0, 427.0, 428.0, 429.0, 430.0],     [431.0, 432.0, 433.0, 434.0, 435.0]],    [[436.0, 437.0, 438.0, 439.0, 440.0],     [441.0, 442.0, 443.0, 444.0, 445.0],     [446.0, 447.0, 448.0, 449.0, 450.0]]],   [[[451.0, 452.0, 453.0, 454.0, 455.0],     [456.0, 457.0, 458.0, 459.0, 460.0],     [461.0, 462.0, 463.0, 464.0, 465.0]],    [[466.0, 467.0, 468.0, 469.0, 470.0],     [471.0, 472.0, 473.0, 474.0, 475.0],     [476.0, 477.0, 478.0, 479.0, 480.0]],    [[481.0, 482.0, 483.0, 484.0, 485.0],     [486.0, 487.0, 488.0, 489.0, 490.0],     [491.0, 492.0, 493.0, 494.0, 495.0]],    [[496.0, 497.0, 498.0, 499.0, 500.0],     [501.0, 502.0, 503.0, 504.0, 505.0],     [506.0, 507.0, 508.0, 509.0, 510.0]],    [[511.0, 512.0, 513.0, 514.0, 515.0],     [516.0, 517.0, 518.0, 519.0, 520.0],     [521.0, 522.0, 523.0, 524.0, 525.0]]]],  [[[[526.0, 527.0, 528.0, 529.0, 530.0],     [531.0, 532.0, 533.0, 534.0, 535.0],     [536.0, 537.0, 538.0, 539.0, 540.0]],    [[541.0, 542.0, 543.0, 544.0, 545.0],     [546.0, 547.0, 548.0, 549.0, 550.0],     [551.0, 552.0, 553.0, 554.0, 555.0]],    [[556.0, 557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0, 565.0],     [566.0, 567.0, 568.0, 569.0, 570.0]],    [[571.0, 572.0, 573.0, 574.0, 575.0],     [576.0, 577.0, 578.0, 579.0, 580.0],     [581.0, 582.0, 583.0, 584.0, 585.0]],    [[586.0, 587.0, 588.0, 589.0, 590.0],     [591.0, 592.0, 593.0, 594.0, 595.0],     [596.0, 597.0, 598.0, 599.0, 600.0]]],   [[[601.0, 602.0, 603.0, 604.0, 605.0],     [606.0, 607.0, 608.0, 609.0, 610.0],     [611.0, 612.0, 613.0, 614.0, 615.0]],    [[616.0, 617.0, 618.0, 619.0, 620.0],     [621.0, 622.0, 623.0, 624.0, 625.0],     [626.0, 627.0, 628.0, 629.0, 630.0]],    [[631.0, 632.0, 633.0, 634.0, 635.0],     [636.0, 637.0, 638.0, 639.0, 640.0],     [641.0, 642.0, 643.0, 644.0, 645.0]],    [[646.0, 647.0, 648.0, 649.0, 650.0],     [651.0, 652.0, 653.0, 654.0, 655.0],     [656.0, 657.0, 658.0, 659.0, 660.0]],    [[661.0, 662.0, 663.0, 664.0, 665.0],     [666.0, 667.0, 668.0, 669.0, 670.0],     [671.0, 672.0, 673.0, 674.0, 675.0]]],   [[[676.0, 677.0, 678.0, 679.0, 680.0],     [681.0, 682.0, 683.0, 684.0, 685.0],     [686.0, 687.0, 688.0, 689.0, 690.0]],    [[691.0, 692.0, 693.0, 694.0, 695.0],     [696.0, 697.0, 698.0, 699.0, 700.0],     [701.0, 702.0, 703.0, 704.0, 705.0]],    [[706.0, 707.0, 708.0, 709.0, 710.0],     [711.0, 712.0, 713.0, 714.0, 715.0],     [716.0, 717.0, 718.0, 719.0, 720.0]],    [[721.0, 722.0, 723.0, 724.0, 725.0],     [726.0, 727.0, 728.0, 729.0, 730.0],     [731.0, 732.0, 733.0, 734.0, 735.0]],    [[736.0, 737.0, 738.0, 739.0, 740.0],     [741.0, 742.0, 743.0, 744.0, 745.0],     [746.0, 747.0, 748.0, 749.0, 750.0]]],   [[[751.0, 752.0, 753.0, 754.0, 755.0],     [756.0, 757.0, 758.0, 759.0, 760.0],     [761.0, 762.0, 763.0, 764.0, 765.0]],    [[766.0, 767.0, 768.0, 769.0, 770.0],     [771.0, 772.0, 773.0, 774.0, 775.0],     [776.0, 777.0, 778.0, 779.0, 780.0]],    [[781.0, 782.0, 783.0, 784.0, 785.0],     [786.0, 787.0, 788.0, 789.0, 790.0],     [791.0, 792.0, 793.0, 794.0, 795.0]],    [[796.0, 797.0, 798.0, 799.0, 800.0],     [801.0, 802.0, 803.0, 804.0, 805.0],     [806.0, 807.0, 808.0, 809.0, 810.0]],    [[811.0, 812.0, 813.0, 814.0, 815.0],     [816.0, 817.0, 818.0, 819.0, 820.0],     [821.0, 822.0, 823.0, 824.0, 825.0]]],   [[[826.0, 827.0, 828.0, 829.0, 830.0],     [831.0, 832.0, 833.0, 834.0, 835.0],     [836.0, 837.0, 838.0, 839.0, 840.0]],    [[841.0, 842.0, 843.0, 844.0, 845.0],     [846.0, 847.0, 848.0, 849.0, 850.0],     [851.0, 852.0, 853.0, 854.0, 855.0]],    [[856.0, 857.0, 858.0, 859.0, 860.0],     [861.0, 862.0, 863.0, 864.0, 865.0],     [866.0, 867.0, 868.0, 869.0, 870.0]],    [[871.0, 872.0, 873.0, 874.0, 875.0],     [876.0, 877.0, 878.0, 879.0, 880.0],     [881.0, 882.0, 883.0, 884.0, 885.0]],    [[886.0, 887.0, 888.0, 889.0, 890.0],     [891.0, 892.0, 893.0, 894.0, 895.0],     [896.0, 897.0, 898.0, 899.0, 900.0]]],   [[[901.0, 902.0, 903.0, 904.0, 905.0],     [906.0, 907.0, 908.0, 909.0, 910.0],     [911.0, 912.0, 913.0, 914.0, 915.0]],    [[916.0, 917.0, 918.0, 919.0, 920.0],     [921.0, 922.0, 923.0, 924.0, 925.0],     [926.0, 927.0, 928.0, 929.0, 930.0]],    [[931.0, 932.0, 933.0, 934.0, 935.0],     [936.0, 937.0, 938.0, 939.0, 940.0],     [941.0, 942.0, 943.0, 944.0, 945.0]],    [[946.0, 947.0, 948.0, 949.0, 950.0],     [951.0, 952.0, 953.0, 954.0, 955.0],     [956.0, 957.0, 958.0, 959.0, 960.0]],    [[961.0, 962.0, 963.0, 964.0, 965.0],     [966.0, 967.0, 968.0, 969.0, 970.0],     [971.0, 972.0, 973.0, 974.0, 975.0]]],   [[[976.0, 977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0, 985.0],     [986.0, 987.0, 988.0, 989.0, 990.0]],    [[991.0, 992.0, 993.0, 994.0, 995.0],     [996.0, 997.0, 998.0, 999.0, 1000.0],     [1001.0, 1002.0, 1003.0, 1004.0, 1005.0]],    [[1006.0, 1007.0, 1008.0, 1009.0, 1010.0],     [1011.0, 1012.0, 1013.0, 1014.0, 1015.0],     [1016.0, 1017.0, 1018.0, 1019.0, 1020.0]],    [[1021.0, 1022.0, 1023.0, 1024.0, 1025.0],     [1026.0, 1027.0, 1028.0, 1029.0, 1030.0],     [1031.0, 1032.0, 1033.0, 1034.0, 1035.0]],    [[1036.0, 1037.0, 1038.0, 1039.0, 1040.0],     [1041.0, 1042.0, 1043.0, 1044.0, 1045.0],     [1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]]],  [[[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0],     [1056.0, 1057.0, 1058.0, 1059.0, 1060.0],     [1061.0, 1062.0, 1063.0, 1064.0, 1065.0]],    [[1066.0, 1067.0, 1068.0, 1069.0, 1070.0],     [1071.0, 1072.0, 1073.0, 1074.0, 1075.0],     [1076.0, 1077.0, 1078.0, 1079.0, 1080.0]],    [[1081.0, 1082.0, 1083.0, 1084.0, 1085.0],     [1086.0, 1087.0, 1088.0, 1089.0, 1090.0],     [1091.0, 1092.0, 1093.0, 1094.0, 1095.0]],    [[1096.0, 1097.0, 1098.0, 1099.0, 1100.0],     [1101.0, 1102.0, 1103.0, 1104.0, 1105.0],     [1106.0, 1107.0, 1108.0, 1109.0, 1110.0]],    [[1111.0, 1112.0, 1113.0, 1114.0, 1115.0],     [1116.0, 1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0, 1125.0]]],   [[[1126.0, 1127.0, 1128.0, 1129.0, 1130.0],     [1131.0, 1132.0, 1133.0, 1134.0, 1135.0],     [1136.0, 1137.0, 1138.0, 1139.0, 1140.0]],    [[1141.0, 1142.0, 1143.0, 1144.0, 1145.0],     [1146.0, 1147.0, 1148.0, 1149.0, 1150.0],     [1151.0, 1152.0, 1153.0, 1154.0, 1155.0]],    [[1156.0, 1157.0, 1158.0, 1159.0, 1160.0],     [1161.0, 1162.0, 1163.0, 1164.0, 1165.0],     [1166.0, 1167.0, 1168.0, 1169.0, 1170.0]],    [[1171.0, 1172.0, 1173.0, 1174.0, 1175.0],     [1176.0, 1177.0, 1178.0, 1179.0, 1180.0],     [1181.0, 1182.0, 1183.0, 1184.0, 1185.0]],    [[1186.0, 1187.0, 1188.0, 1189.0, 1190.0],     [1191.0, 1192.0, 1193.0, 1194.0, 1195.0],     [1196.0, 1197.0, 1198.0, 1199.0, 1200.0]]],   [[[1201.0, 1202.0, 1203.0, 1204.0, 1205.0],     [1206.0, 1207.0, 1208.0, 1209.0, 1210.0],     [1211.0, 1212.0, 1213.0, 1214.0, 1215.0]],    [[1216.0, 1217.0, 1218.0, 1219.0, 1220.0],     [1221.0, 1222.0, 1223.0, 1224.0, 1225.0],     [1226.0, 1227.0, 1228.0, 1229.0, 1230.0]],    [[1231.0, 1232.0, 1233.0, 1234.0, 1235.0],     [1236.0, 1237.0, 1238.0, 1239.0, 1240.0],     [1241.0, 1242.0, 1243.0, 1244.0, 1245.0]],    [[1246.0, 1247.0, 1248.0, 1249.0, 1250.0],     [1251.0, 1252.0, 1253.0, 1254.0, 1255.0],     [1256.0, 1257.0, 1258.0, 1259.0, 1260.0]],    [[1261.0, 1262.0, 1263.0, 1264.0, 1265.0],     [1266.0, 1267.0, 1268.0, 1269.0, 1270.0],     [1271.0, 1272.0, 1273.0, 1274.0, 1275.0]]],   [[[1276.0, 1277.0, 1278.0, 1279.0, 1280.0],     [1281.0, 1282.0, 1283.0, 1284.0, 1285.0],     [1286.0, 1287.0, 1288.0, 1289.0, 1290.0]],    [[1291.0, 1292.0, 1293.0, 1294.0, 1295.0],     [1296.0, 1297.0, 1298.0, 1299.0, 1300.0],     [1301.0, 1302.0, 1303.0, 1304.0, 1305.0]],    [[1306.0, 1307.0, 1308.0, 1309.0, 1310.0],     [1311.0, 1312.0, 1313.0, 1314.0, 1315.0],     [1316.0, 1317.0, 1318.0, 1319.0, 1320.0]],    [[1321.0, 1322.0, 1323.0, 1324.0, 1325.0],     [1326.0, 1327.0, 1328.0, 1329.0, 1330.0],     [1331.0, 1332.0, 1333.0, 1334.0, 1335.0]],    [[1336.0, 1337.0, 1338.0, 1339.0, 1340.0],     [1341.0, 1342.0, 1343.0, 1344.0, 1345.0],     [1346.0, 1347.0, 1348.0, 1349.0, 1350.0]]],   [[[1351.0, 1352.0, 1353.0, 1354.0, 1355.0],     [1356.0, 1357.0, 1358.0, 1359.0, 1360.0],     [1361.0, 1362.0, 1363.0, 1364.0, 1365.0]],    [[1366.0, 1367.0, 1368.0, 1369.0, 1370.0],     [1371.0, 1372.0, 1373.0, 1374.0, 1375.0],     [1376.0, 1377.0, 1378.0, 1379.0, 1380.0]],    [[1381.0, 1382.0, 1383.0, 1384.0, 1385.0],     [1386.0, 1387.0, 1388.0, 1389.0, 1390.0],     [1391.0, 1392.0, 1393.0, 1394.0, 1395.0]],    [[1396.0, 1397.0, 1398.0, 1399.0, 1400.0],     [1401.0, 1402.0, 1403.0, 1404.0, 1405.0],     [1406.0, 1407.0, 1408.0, 1409.0, 1410.0]],    [[1411.0, 1412.0, 1413.0, 1414.0, 1415.0],     [1416.0, 1417.0, 1418.0, 1419.0, 1420.0],     [1421.0, 1422.0, 1423.0, 1424.0, 1425.0]]],   [[[1426.0, 1427.0, 1428.0, 1429.0, 1430.0],     [1431.0, 1432.0, 1433.0, 1434.0, 1435.0],     [1436.0, 1437.0, 1438.0, 1439.0, 1440.0]],    [[1441.0, 1442.0, 1443.0, 1444.0, 1445.0],     [1446.0, 1447.0, 1448.0, 1449.0, 1450.0],     [1451.0, 1452.0, 1453.0, 1454.0, 1455.0]],    [[1456.0, 1457.0, 1458.0, 1459.0, 1460.0],     [1461.0, 1462.0, 1463.0, 1464.0, 1465.0],     [1466.0, 1467.0, 1468.0, 1469.0, 1470.0]],    [[1471.0, 1472.0, 1473.0, 1474.0, 1475.0],     [1476.0, 1477.0, 1478.0, 1479.0, 1480.0],     [1481.0, 1482.0, 1483.0, 1484.0, 1485.0]],    [[1486.0, 1487.0, 1488.0, 1489.0, 1490.0],     [1491.0, 1492.0, 1493.0, 1494.0, 1495.0],     [1496.0, 1497.0, 1498.0, 1499.0, 1500.0]]],   [[[1501.0, 1502.0, 1503.0, 1504.0, 1505.0],     [1506.0, 1507.0, 1508.0, 1509.0, 1510.0],     [1511.0, 1512.0, 1513.0, 1514.0, 1515.0]],    [[1516.0, 1517.0, 1518.0, 1519.0, 1520.0],     [1521.0, 1522.0, 1523.0, 1524.0, 1525.0],     [1526.0, 1527.0, 1528.0, 1529.0, 1530.0]],    [[1531.0, 1532.0, 1533.0, 1534.0, 1535.0],     [1536.0, 1537.0, 1538.0, 1539.0, 1540.0],     [1541.0, 1542.0, 1543.0, 1544.0, 1545.0]],    [[1546.0, 1547.0, 1548.0, 1549.0, 1550.0],     [1551.0, 1552.0, 1553.0, 1554.0, 1555.0],     [1556.0, 1557.0, 1558.0, 1559.0, 1560.0]],    [[1561.0, 1562.0, 1563.0, 1564.0, 1565.0],     [1566.0, 1567.0, 1568.0, 1569.0, 1570.0],     [1571.0, 1572.0, 1573.0, 1574.0, 1575.0]]]]] shape=[3, 7, 5, 3, 5], strides=[525, 75, 15, 5, 1], layout=C (0x1)), I32([2, 3] shape=[2], strides=[1], layout=C | F (0x3)), I32([[3, 2],  [2, 2]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 135607928 3500279943 4150006874 784578252 # shrinks to (ref i, ref bs, ref p) = (F32([[[1.0, 2.0, 3.0, 4.0],   [5.0, 6.0, 7.0, 8.0],   [9.0, 10.0, 11.0, 12.0]],  [[13.0, 14.0, 15.0, 16.0],   [17.0, 18.0, 19.0, 20.0],   [21.0, 22.0, 23.0, 24.0]],  [[25.0, 26.0, 27.0, 28.0],   [29.0, 30.0, 31.0, 32.0],   [33.0, 34.0, 35.0, 36.0]]] shape=[3, 3, 4], strides=[12, 4, 1], layout=C (0x1)), I32([3] shape=[1], strides=[1], layout=C | F (0x3)), I32([[0, 3]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 1580535696 2219159280 1254821716 25060782 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],     [7.0, 8.0, 9.0, 10.0, 11.0, 12.0],     [13.0, 14.0, 15.0, 16.0, 17.0, 18.0],     [19.0, 20.0, 21.0, 22.0, 23.0, 24.0],     [25.0, 26.0, 27.0, 28.0, 29.0, 30.0],     [31.0, 32.0, 33.0, 34.0, 35.0, 36.0]]],   [[[37.0, 38.0, 39.0, 40.0, 41.0, 42.0],     [43.0, 44.0, 45.0, 46.0, 47.0, 48.0],     [49.0, 50.0, 51.0, 52.0, 53.0, 54.0],     [55.0, 56.0, 57.0, 58.0, 59.0, 60.0],     [61.0, 62.0, 63.0, 64.0, 65.0, 66.0],     [67.0, 68.0, 69.0, 70.0, 71.0, 72.0]]],   [[[73.0, 74.0, 75.0, 76.0, 77.0, 78.0],     [79.0, 80.0, 81.0, 82.0, 83.0, 84.0],     [85.0, 86.0, 87.0, 88.0, 89.0, 90.0],     [91.0, 92.0, 93.0, 94.0, 95.0, 96.0],     [97.0, 98.0, 99.0, 100.0, 101.0, 102.0],     [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]],   [[[109.0, 110.0, 111.0, 112.0, 113.0, 114.0],     [115.0, 116.0, 117.0, 118.0, 119.0, 120.0],     [121.0, 122.0, 123.0, 124.0, 125.0, 126.0],     [127.0, 128.0, 129.0, 130.0, 131.0, 132.0],     [133.0, 134.0, 135.0, 136.0, 137.0, 138.0],     [139.0, 140.0, 141.0, 142.0, 143.0, 144.0]]],   [[[145.0, 146.0, 147.0, 148.0, 149.0, 150.0],     [151.0, 152.0, 153.0, 154.0, 155.0, 156.0],     [157.0, 158.0, 159.0, 160.0, 161.0, 162.0],     [163.0, 164.0, 165.0, 166.0, 167.0, 168.0],     [169.0, 170.0, 171.0, 172.0, 173.0, 174.0],     [175.0, 176.0, 177.0, 178.0, 179.0, 180.0]]]],  [[[[181.0, 182.0, 183.0, 184.0, 185.0, 186.0],     [187.0, 188.0, 189.0, 190.0, 191.0, 192.0],     [193.0, 194.0, 195.0, 196.0, 197.0, 198.0],     [199.0, 200.0, 201.0, 202.0, 203.0, 204.0],     [205.0, 206.0, 207.0, 208.0, 209.0, 210.0],     [211.0, 212.0, 213.0, 214.0, 215.0, 216.0]]],   [[[217.0, 218.0, 219.0, 220.0, 221.0, 222.0],     [223.0, 224.0, 225.0, 226.0, 227.0, 228.0],     [229.0, 230.0, 231.0, 232.0, 233.0, 234.0],     [235.0, 236.0, 237.0, 238.0, 239.0, 240.0],     [241.0, 242.0, 243.0, 244.0, 245.0, 246.0],     [247.0, 248.0, 249.0, 250.0, 251.0, 252.0]]],   [[[253.0, 254.0, 255.0, 256.0, 257.0, 258.0],     [259.0, 260.0, 261.0, 262.0, 263.0, 264.0],     [265.0, 266.0, 267.0, 268.0, 269.0, 270.0],     [271.0, 272.0, 273.0, 274.0, 275.0, 276.0],     [277.0, 278.0, 279.0, 280.0, 281.0, 282.0],     [283.0, 284.0, 285.0, 286.0, 287.0, 288.0]]],   [[[289.0, 290.0, 291.0, 292.0, 293.0, 294.0],     [295.0, 296.0, 297.0, 298.0, 299.0, 300.0],     [301.0, 302.0, 303.0, 304.0, 305.0, 306.0],     [307.0, 308.0, 309.0, 310.0, 311.0, 312.0],     [313.0, 314.0, 315.0, 316.0, 317.0, 318.0],     [319.0, 320.0, 321.0, 322.0, 323.0, 324.0]]],   [[[325.0, 326.0, 327.0, 328.0, 329.0, 330.0],     [331.0, 332.0, 333.0, 334.0, 335.0, 336.0],     [337.0, 338.0, 339.0, 340.0, 341.0, 342.0],     [343.0, 344.0, 345.0, 346.0, 347.0, 348.0],     [349.0, 350.0, 351.0, 352.0, 353.0, 354.0],     [355.0, 356.0, 357.0, 358.0, 359.0, 360.0]]]]] shape=[2, 5, 1, 6, 6], strides=[180, 36, 36, 6, 1], layout=C (0x1)), I32([3] shape=[1], strides=[1], layout=C | F (0x3)), I32([[3, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 113717519 2019313255 2346069503 1116384741 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0],     [6.0, 7.0, 8.0, 9.0, 10.0],     [11.0, 12.0, 13.0, 14.0, 15.0],     [16.0, 17.0, 18.0, 19.0, 20.0],     [21.0, 22.0, 23.0, 24.0, 25.0],     [26.0, 27.0, 28.0, 29.0, 30.0],     [31.0, 32.0, 33.0, 34.0, 35.0]],    [[36.0, 37.0, 38.0, 39.0, 40.0],     [41.0, 42.0, 43.0, 44.0, 45.0],     [46.0, 47.0, 48.0, 49.0, 50.0],     [51.0, 52.0, 53.0, 54.0, 55.0],     [56.0, 57.0, 58.0, 59.0, 60.0],     [61.0, 62.0, 63.0, 64.0, 65.0],     [66.0, 67.0, 68.0, 69.0, 70.0]],    [[71.0, 72.0, 73.0, 74.0, 75.0],     [76.0, 77.0, 78.0, 79.0, 80.0],     [81.0, 82.0, 83.0, 84.0, 85.0],     [86.0, 87.0, 88.0, 89.0, 90.0],     [91.0, 92.0, 93.0, 94.0, 95.0],     [96.0, 97.0, 98.0, 99.0, 100.0],     [101.0, 102.0, 103.0, 104.0, 105.0]],    [[106.0, 107.0, 108.0, 109.0, 110.0],     [111.0, 112.0, 113.0, 114.0, 115.0],     [116.0, 117.0, 118.0, 119.0, 120.0],     [121.0, 122.0, 123.0, 124.0, 125.0],     [126.0, 127.0, 128.0, 129.0, 130.0],     [131.0, 132.0, 133.0, 134.0, 135.0],     [136.0, 137.0, 138.0, 139.0, 140.0]],    [[141.0, 142.0, 143.0, 144.0, 145.0],     [146.0, 147.0, 148.0, 149.0, 150.0],     [151.0, 152.0, 153.0, 154.0, 155.0],     [156.0, 157.0, 158.0, 159.0, 160.0],     [161.0, 162.0, 163.0, 164.0, 165.0],     [166.0, 167.0, 168.0, 169.0, 170.0],     [171.0, 172.0, 173.0, 174.0, 175.0]],    [[176.0, 177.0, 178.0, 179.0, 180.0],     [181.0, 182.0, 183.0, 184.0, 185.0],     [186.0, 187.0, 188.0, 189.0, 190.0],     [191.0, 192.0, 193.0, 194.0, 195.0],     [196.0, 197.0, 198.0, 199.0, 200.0],     [201.0, 202.0, 203.0, 204.0, 205.0],     [206.0, 207.0, 208.0, 209.0, 210.0]]],   [[[211.0, 212.0, 213.0, 214.0, 215.0],     [216.0, 217.0, 218.0, 219.0, 220.0],     [221.0, 222.0, 223.0, 224.0, 225.0],     [226.0, 227.0, 228.0, 229.0, 230.0],     [231.0, 232.0, 233.0, 234.0, 235.0],     [236.0, 237.0, 238.0, 239.0, 240.0],     [241.0, 242.0, 243.0, 244.0, 245.0]],    [[246.0, 247.0, 248.0, 249.0, 250.0],     [251.0, 252.0, 253.0, 254.0, 255.0],     [256.0, 257.0, 258.0, 259.0, 260.0],     [261.0, 262.0, 263.0, 264.0, 265.0],     [266.0, 267.0, 268.0, 269.0, 270.0],     [271.0, 272.0, 273.0, 274.0, 275.0],     [276.0, 277.0, 278.0, 279.0, 280.0]],    [[281.0, 282.0, 283.0, 284.0, 285.0],     [286.0, 287.0, 288.0, 289.0, 290.0],     [291.0, 292.0, 293.0, 294.0, 295.0],     [296.0, 297.0, 298.0, 299.0, 300.0],     [301.0, 302.0, 303.0, 304.0, 305.0],     [306.0, 307.0, 308.0, 309.0, 310.0],     [311.0, 312.0, 313.0, 314.0, 315.0]],    [[316.0, 317.0, 318.0, 319.0, 320.0],     [321.0, 322.0, 323.0, 324.0, 325.0],     [326.0, 327.0, 328.0, 329.0, 330.0],     [331.0, 332.0, 333.0, 334.0, 335.0],     [336.0, 337.0, 338.0, 339.0, 340.0],     [341.0, 342.0, 343.0, 344.0, 345.0],     [346.0, 347.0, 348.0, 349.0, 350.0]],    [[351.0, 352.0, 353.0, 354.0, 355.0],     [356.0, 357.0, 358.0, 359.0, 360.0],     [361.0, 362.0, 363.0, 364.0, 365.0],     [366.0, 367.0, 368.0, 369.0, 370.0],     [371.0, 372.0, 373.0, 374.0, 375.0],     [376.0, 377.0, 378.0, 379.0, 380.0],     [381.0, 382.0, 383.0, 384.0, 385.0]],    [[386.0, 387.0, 388.0, 389.0, 390.0],     [391.0, 392.0, 393.0, 394.0, 395.0],     [396.0, 397.0, 398.0, 399.0, 400.0],     [401.0, 402.0, 403.0, 404.0, 405.0],     [406.0, 407.0, 408.0, 409.0, 410.0],     [411.0, 412.0, 413.0, 414.0, 415.0],     [416.0, 417.0, 418.0, 419.0, 420.0]]],   [[[421.0, 422.0, 423.0, 424.0, 425.0],     [426.0, 427.0, 428.0, 429.0, 430.0],     [431.0, 432.0, 433.0, 434.0, 435.0],     [436.0, 437.0, 438.0, 439.0, 440.0],     [441.0, 442.0, 443.0, 444.0, 445.0],     [446.0, 447.0, 448.0, 449.0, 450.0],     [451.0, 452.0, 453.0, 454.0, 455.0]],    [[456.0, 457.0, 458.0, 459.0, 460.0],     [461.0, 462.0, 463.0, 464.0, 465.0],     [466.0, 467.0, 468.0, 469.0, 470.0],     [471.0, 472.0, 473.0, 474.0, 475.0],     [476.0, 477.0, 478.0, 479.0, 480.0],     [481.0, 482.0, 483.0, 484.0, 485.0],     [486.0, 487.0, 488.0, 489.0, 490.0]],    [[491.0, 492.0, 493.0, 494.0, 495.0],     [496.0, 497.0, 498.0, 499.0, 500.0],     [501.0, 502.0, 503.0, 504.0, 505.0],     [506.0, 507.0, 508.0, 509.0, 510.0],     [511.0, 512.0, 513.0, 514.0, 515.0],     [516.0, 517.0, 518.0, 519.0, 520.0],     [521.0, 522.0, 523.0, 524.0, 525.0]],    [[526.0, 527.0, 528.0, 529.0, 530.0],     [531.0, 532.0, 533.0, 534.0, 535.0],     [536.0, 537.0, 538.0, 539.0, 540.0],     [541.0, 542.0, 543.0, 544.0, 545.0],     [546.0, 547.0, 548.0, 549.0, 550.0],     [551.0, 552.0, 553.0, 554.0, 555.0],     [556.0, 557.0, 558.0, 559.0, 560.0]],    [[561.0, 562.0, 563.0, 564.0, 565.0],     [566.0, 567.0, 568.0, 569.0, 570.0],     [571.0, 572.0, 573.0, 574.0, 575.0],     [576.0, 577.0, 578.0, 579.0, 580.0],     [581.0, 582.0, 583.0, 584.0, 585.0],     [586.0, 587.0, 588.0, 589.0, 590.0],     [591.0, 592.0, 593.0, 594.0, 595.0]],    [[596.0, 597.0, 598.0, 599.0, 600.0],     [601.0, 602.0, 603.0, 604.0, 605.0],     [606.0, 607.0, 608.0, 609.0, 610.0],     [611.0, 612.0, 613.0, 614.0, 615.0],     [616.0, 617.0, 618.0, 619.0, 620.0],     [621.0, 622.0, 623.0, 624.0, 625.0],     [626.0, 627.0, 628.0, 629.0, 630.0]]],   [[[631.0, 632.0, 633.0, 634.0, 635.0],     [636.0, 637.0, 638.0, 639.0, 640.0],     [641.0, 642.0, 643.0, 644.0, 645.0],     [646.0, 647.0, 648.0, 649.0, 650.0],     [651.0, 652.0, 653.0, 654.0, 655.0],     [656.0, 657.0, 658.0, 659.0, 660.0],     [661.0, 662.0, 663.0, 664.0, 665.0]],    [[666.0, 667.0, 668.0, 669.0, 670.0],     [671.0, 672.0, 673.0, 674.0, 675.0],     [676.0, 677.0, 678.0, 679.0, 680.0],     [681.0, 682.0, 683.0, 684.0, 685.0],     [686.0, 687.0, 688.0, 689.0, 690.0],     [691.0, 692.0, 693.0, 694.0, 695.0],     [696.0, 697.0, 698.0, 699.0, 700.0]],    [[701.0, 702.0, 703.0, 704.0, 705.0],     [706.0, 707.0, 708.0, 709.0, 710.0],     [711.0, 712.0, 713.0, 714.0, 715.0],     [716.0, 717.0, 718.0, 719.0, 720.0],     [721.0, 722.0, 723.0, 724.0, 725.0],     [726.0, 727.0, 728.0, 729.0, 730.0],     [731.0, 732.0, 733.0, 734.0, 735.0]],    [[736.0, 737.0, 738.0, 739.0, 740.0],     [741.0, 742.0, 743.0, 744.0, 745.0],     [746.0, 747.0, 748.0, 749.0, 750.0],     [751.0, 752.0, 753.0, 754.0, 755.0],     [756.0, 757.0, 758.0, 759.0, 760.0],     [761.0, 762.0, 763.0, 764.0, 765.0],     [766.0, 767.0, 768.0, 769.0, 770.0]],    [[771.0, 772.0, 773.0, 774.0, 775.0],     [776.0, 777.0, 778.0, 779.0, 780.0],     [781.0, 782.0, 783.0, 784.0, 785.0],     [786.0, 787.0, 788.0, 789.0, 790.0],     [791.0, 792.0, 793.0, 794.0, 795.0],     [796.0, 797.0, 798.0, 799.0, 800.0],     [801.0, 802.0, 803.0, 804.0, 805.0]],    [[806.0, 807.0, 808.0, 809.0, 810.0],     [811.0, 812.0, 813.0, 814.0, 815.0],     [816.0, 817.0, 818.0, 819.0, 820.0],     [821.0, 822.0, 823.0, 824.0, 825.0],     [826.0, 827.0, 828.0, 829.0, 830.0],     [831.0, 832.0, 833.0, 834.0, 835.0],     [836.0, 837.0, 838.0, 839.0, 840.0]]]],  [[[[841.0, 842.0, 843.0, 844.0, 845.0],     [846.0, 847.0, 848.0, 849.0, 850.0],     [851.0, 852.0, 853.0, 854.0, 855.0],     [856.0, 857.0, 858.0, 859.0, 860.0],     [861.0, 862.0, 863.0, 864.0, 865.0],     [866.0, 867.0, 868.0, 869.0, 870.0],     [871.0, 872.0, 873.0, 874.0, 875.0]],    [[876.0, 877.0, 878.0, 879.0, 880.0],     [881.0, 882.0, 883.0, 884.0, 885.0],     [886.0, 887.0, 888.0, 889.0, 890.0],     [891.0, 892.0, 893.0, 894.0, 895.0],     [896.0, 897.0, 898.0, 899.0, 900.0],     [901.0, 902.0, 903.0, 904.0, 905.0],     [906.0, 907.0, 908.0, 909.0, 910.0]],    [[911.0, 912.0, 913.0, 914.0, 915.0],     [916.0, 917.0, 918.0, 919.0, 920.0],     [921.0, 922.0, 923.0, 924.0, 925.0],     [926.0, 927.0, 928.0, 929.0, 930.0],     [931.0, 932.0, 933.0, 934.0, 935.0],     [936.0, 937.0, 938.0, 939.0, 940.0],     [941.0, 942.0, 943.0, 944.0, 945.0]],    [[946.0, 947.0, 948.0, 949.0, 950.0],     [951.0, 952.0, 953.0, 954.0, 955.0],     [956.0, 957.0, 958.0, 959.0, 960.0],     [961.0, 962.0, 963.0, 964.0, 965.0],     [966.0, 967.0, 968.0, 969.0, 970.0],     [971.0, 972.0, 973.0, 974.0, 975.0],     [976.0, 977.0, 978.0, 979.0, 980.0]],    [[981.0, 982.0, 983.0, 984.0, 985.0],     [986.0, 987.0, 988.0, 989.0, 990.0],     [991.0, 992.0, 993.0, 994.0, 995.0],     [996.0, 997.0, 998.0, 999.0, 1000.0],     [1001.0, 1002.0, 1003.0, 1004.0, 1005.0],     [1006.0, 1007.0, 1008.0, 1009.0, 1010.0],     [1011.0, 1012.0, 1013.0, 1014.0, 1015.0]],    [[1016.0, 1017.0, 1018.0, 1019.0, 1020.0],     [1021.0, 1022.0, 1023.0, 1024.0, 1025.0],     [1026.0, 1027.0, 1028.0, 1029.0, 1030.0],     [1031.0, 1032.0, 1033.0, 1034.0, 1035.0],     [1036.0, 1037.0, 1038.0, 1039.0, 1040.0],     [1041.0, 1042.0, 1043.0, 1044.0, 1045.0],     [1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]],   [[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0],     [1056.0, 1057.0, 1058.0, 1059.0, 1060.0],     [1061.0, 1062.0, 1063.0, 1064.0, 1065.0],     [1066.0, 1067.0, 1068.0, 1069.0, 1070.0],     [1071.0, 1072.0, 1073.0, 1074.0, 1075.0],     [1076.0, 1077.0, 1078.0, 1079.0, 1080.0],     [1081.0, 1082.0, 1083.0, 1084.0, 1085.0]],    [[1086.0, 1087.0, 1088.0, 1089.0, 1090.0],     [1091.0, 1092.0, 1093.0, 1094.0, 1095.0],     [1096.0, 1097.0, 1098.0, 1099.0, 1100.0],     [1101.0, 1102.0, 1103.0, 1104.0, 1105.0],     [1106.0, 1107.0, 1108.0, 1109.0, 1110.0],     [1111.0, 1112.0, 1113.0, 1114.0, 1115.0],     [1116.0, 1117.0, 1118.0, 1119.0, 1120.0]],    [[1121.0, 1122.0, 1123.0, 1124.0, 1125.0],     [1126.0, 1127.0, 1128.0, 1129.0, 1130.0],     [1131.0, 1132.0, 1133.0, 1134.0, 1135.0],     [1136.0, 1137.0, 1138.0, 1139.0, 1140.0],     [1141.0, 1142.0, 1143.0, 1144.0, 1145.0],     [1146.0, 1147.0, 1148.0, 1149.0, 1150.0],     [1151.0, 1152.0, 1153.0, 1154.0, 1155.0]],    [[1156.0, 1157.0, 1158.0, 1159.0, 1160.0],     [1161.0, 1162.0, 1163.0, 1164.0, 1165.0],     [1166.0, 1167.0, 1168.0, 1169.0, 1170.0],     [1171.0, 1172.0, 1173.0, 1174.0, 1175.0],     [1176.0, 1177.0, 1178.0, 1179.0, 1180.0],     [1181.0, 1182.0, 1183.0, 1184.0, 1185.0],     [1186.0, 1187.0, 1188.0, 1189.0, 1190.0]],    [[1191.0, 1192.0, 1193.0, 1194.0, 1195.0],     [1196.0, 1197.0, 1198.0, 1199.0, 1200.0],     [1201.0, 1202.0, 1203.0, 1204.0, 1205.0],     [1206.0, 1207.0, 1208.0, 1209.0, 1210.0],     [1211.0, 1212.0, 1213.0, 1214.0, 1215.0],     [1216.0, 1217.0, 1218.0, 1219.0, 1220.0],     [1221.0, 1222.0, 1223.0, 1224.0, 1225.0]],    [[1226.0, 1227.0, 1228.0, 1229.0, 1230.0],     [1231.0, 1232.0, 1233.0, 1234.0, 1235.0],     [1236.0, 1237.0, 1238.0, 1239.0, 1240.0],     [1241.0, 1242.0, 1243.0, 1244.0, 1245.0],     [1246.0, 1247.0, 1248.0, 1249.0, 1250.0],     [1251.0, 1252.0, 1253.0, 1254.0, 1255.0],     [1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]],   [[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0],     [1266.0, 1267.0, 1268.0, 1269.0, 1270.0],     [1271.0, 1272.0, 1273.0, 1274.0, 1275.0],     [1276.0, 1277.0, 1278.0, 1279.0, 1280.0],     [1281.0, 1282.0, 1283.0, 1284.0, 1285.0],     [1286.0, 1287.0, 1288.0, 1289.0, 1290.0],     [1291.0, 1292.0, 1293.0, 1294.0, 1295.0]],    [[1296.0, 1297.0, 1298.0, 1299.0, 1300.0],     [1301.0, 1302.0, 1303.0, 1304.0, 1305.0],     [1306.0, 1307.0, 1308.0, 1309.0, 1310.0],     [1311.0, 1312.0, 1313.0, 1314.0, 1315.0],     [1316.0, 1317.0, 1318.0, 1319.0, 1320.0],     [1321.0, 1322.0, 1323.0, 1324.0, 1325.0],     [1326.0, 1327.0, 1328.0, 1329.0, 1330.0]],    [[1331.0, 1332.0, 1333.0, 1334.0, 1335.0],     [1336.0, 1337.0, 1338.0, 1339.0, 1340.0],     [1341.0, 1342.0, 1343.0, 1344.0, 1345.0],     [1346.0, 1347.0, 1348.0, 1349.0, 1350.0],     [1351.0, 1352.0, 1353.0, 1354.0, 1355.0],     [1356.0, 1357.0, 1358.0, 1359.0, 1360.0],     [1361.0, 1362.0, 1363.0, 1364.0, 1365.0]],    [[1366.0, 1367.0, 1368.0, 1369.0, 1370.0],     [1371.0, 1372.0, 1373.0, 1374.0, 1375.0],     [1376.0, 1377.0, 1378.0, 1379.0, 1380.0],     [1381.0, 1382.0, 1383.0, 1384.0, 1385.0],     [1386.0, 1387.0, 1388.0, 1389.0, 1390.0],     [1391.0, 1392.0, 1393.0, 1394.0, 1395.0],     [1396.0, 1397.0, 1398.0, 1399.0, 1400.0]],    [[1401.0, 1402.0, 1403.0, 1404.0, 1405.0],     [1406.0, 1407.0, 1408.0, 1409.0, 1410.0],     [1411.0, 1412.0, 1413.0, 1414.0, 1415.0],     [1416.0, 1417.0, 1418.0, 1419.0, 1420.0],     [1421.0, 1422.0, 1423.0, 1424.0, 1425.0],     [1426.0, 1427.0, 1428.0, 1429.0, 1430.0],     [1431.0, 1432.0, 1433.0, 1434.0, 1435.0]],    [[1436.0, 1437.0, 1438.0, 1439.0, 1440.0],     [1441.0, 1442.0, 1443.0, 1444.0, 1445.0],     [1446.0, 1447.0, 1448.0, 1449.0, 1450.0],     [1451.0, 1452.0, 1453.0, 1454.0, 1455.0],     [1456.0, 1457.0, 1458.0, 1459.0, 1460.0],     [1461.0, 1462.0, 1463.0, 1464.0, 1465.0],     [1466.0, 1467.0, 1468.0, 1469.0, 1470.0]]],   [[[1471.0, 1472.0, 1473.0, 1474.0, 1475.0],     [1476.0, 1477.0, 1478.0, 1479.0, 1480.0],     [1481.0, 1482.0, 1483.0, 1484.0, 1485.0],     [1486.0, 1487.0, 1488.0, 1489.0, 1490.0],     [1491.0, 1492.0, 1493.0, 1494.0, 1495.0],     [1496.0, 1497.0, 1498.0, 1499.0, 1500.0],     [1501.0, 1502.0, 1503.0, 1504.0, 1505.0]],    [[1506.0, 1507.0, 1508.0, 1509.0, 1510.0],     [1511.0, 1512.0, 1513.0, 1514.0, 1515.0],     [1516.0, 1517.0, 1518.0, 1519.0, 1520.0],     [1521.0, 1522.0, 1523.0, 1524.0, 1525.0],     [1526.0, 1527.0, 1528.0, 1529.0, 1530.0],     [1531.0, 1532.0, 1533.0, 1534.0, 1535.0],     [1536.0, 1537.0, 1538.0, 1539.0, 1540.0]],    [[1541.0, 1542.0, 1543.0, 1544.0, 1545.0],     [1546.0, 1547.0, 1548.0, 1549.0, 1550.0],     [1551.0, 1552.0, 1553.0, 1554.0, 1555.0],     [1556.0, 1557.0, 1558.0, 1559.0, 1560.0],     [1561.0, 1562.0, 1563.0, 1564.0, 1565.0],     [1566.0, 1567.0, 1568.0, 1569.0, 1570.0],     [1571.0, 1572.0, 1573.0, 1574.0, 1575.0]],    [[1576.0, 1577.0, 1578.0, 1579.0, 1580.0],     [1581.0, 1582.0, 1583.0, 1584.0, 1585.0],     [1586.0, 1587.0, 1588.0, 1589.0, 1590.0],     [1591.0, 1592.0, 1593.0, 1594.0, 1595.0],     [1596.0, 1597.0, 1598.0, 1599.0, 1600.0],     [1601.0, 1602.0, 1603.0, 1604.0, 1605.0],     [1606.0, 1607.0, 1608.0, 1609.0, 1610.0]],    [[1611.0, 1612.0, 1613.0, 1614.0, 1615.0],     [1616.0, 1617.0, 1618.0, 1619.0, 1620.0],     [1621.0, 1622.0, 1623.0, 1624.0, 1625.0],     [1626.0, 1627.0, 1628.0, 1629.0, 1630.0],     [1631.0, 1632.0, 1633.0, 1634.0, 1635.0],     [1636.0, 1637.0, 1638.0, 1639.0, 1640.0],     [1641.0, 1642.0, 1643.0, 1644.0, 1645.0]],    [[1646.0, 1647.0, 1648.0, 1649.0, 1650.0],     [1651.0, 1652.0, 1653.0, 1654.0, 1655.0],     [1656.0, 1657.0, 1658.0, 1659.0, 1660.0],     [1661.0, 1662.0, 1663.0, 1664.0, 1665.0],     [1666.0, 1667.0, 1668.0, 1669.0, 1670.0],     [1671.0, 1672.0, 1673.0, 1674.0, 1675.0],     [1676.0, 1677.0, 1678.0, 1679.0, 1680.0]]]],  [[[[1681.0, 1682.0, 1683.0, 1684.0, 1685.0],     [1686.0, 1687.0, 1688.0, 1689.0, 1690.0],     [1691.0, 1692.0, 1693.0, 1694.0, 1695.0],     [1696.0, 1697.0, 1698.0, 1699.0, 1700.0],     [1701.0, 1702.0, 1703.0, 1704.0, 1705.0],     [1706.0, 1707.0, 1708.0, 1709.0, 1710.0],     [1711.0, 1712.0, 1713.0, 1714.0, 1715.0]],    [[1716.0, 1717.0, 1718.0, 1719.0, 1720.0],     [1721.0, 1722.0, 1723.0, 1724.0, 1725.0],     [1726.0, 1727.0, 1728.0, 1729.0, 1730.0],     [1731.0, 1732.0, 1733.0, 1734.0, 1735.0],     [1736.0, 1737.0, 1738.0, 1739.0, 1740.0],     [1741.0, 1742.0, 1743.0, 1744.0, 1745.0],     [1746.0, 1747.0, 1748.0, 1749.0, 1750.0]],    [[1751.0, 1752.0, 1753.0, 1754.0, 1755.0],     [1756.0, 1757.0, 1758.0, 1759.0, 1760.0],     [1761.0, 1762.0, 1763.0, 1764.0, 1765.0],     [1766.0, 1767.0, 1768.0, 1769.0, 1770.0],     [1771.0, 1772.0, 1773.0, 1774.0, 1775.0],     [1776.0, 1777.0, 1778.0, 1779.0, 1780.0],     [1781.0, 1782.0, 1783.0, 1784.0, 1785.0]],    [[1786.0, 1787.0, 1788.0, 1789.0, 1790.0],     [1791.0, 1792.0, 1793.0, 1794.0, 1795.0],     [1796.0, 1797.0, 1798.0, 1799.0, 1800.0],     [1801.0, 1802.0, 1803.0, 1804.0, 1805.0],     [1806.0, 1807.0, 1808.0, 1809.0, 1810.0],     [1811.0, 1812.0, 1813.0, 1814.0, 1815.0],     [1816.0, 1817.0, 1818.0, 1819.0, 1820.0]],    [[1821.0, 1822.0, 1823.0, 1824.0, 1825.0],     [1826.0, 1827.0, 1828.0, 1829.0, 1830.0],     [1831.0, 1832.0, 1833.0, 1834.0, 1835.0],     [1836.0, 1837.0, 1838.0, 1839.0, 1840.0],     [1841.0, 1842.0, 1843.0, 1844.0, 1845.0],     [1846.0, 1847.0, 1848.0, 1849.0, 1850.0],     [1851.0, 1852.0, 1853.0, 1854.0, 1855.0]],    [[1856.0, 1857.0, 1858.0, 1859.0, 1860.0],     [1861.0, 1862.0, 1863.0, 1864.0, 1865.0],     [1866.0, 1867.0, 1868.0, 1869.0, 1870.0],     [1871.0, 1872.0, 1873.0, 1874.0, 1875.0],     [1876.0, 1877.0, 1878.0, 1879.0, 1880.0],     [1881.0, 1882.0, 1883.0, 1884.0, 1885.0],     [1886.0, 1887.0, 1888.0, 1889.0, 1890.0]]],   [[[1891.0, 1892.0, 1893.0, 1894.0, 1895.0],     [1896.0, 1897.0, 1898.0, 1899.0, 1900.0],     [1901.0, 1902.0, 1903.0, 1904.0, 1905.0],     [1906.0, 1907.0, 1908.0, 1909.0, 1910.0],     [1911.0, 1912.0, 1913.0, 1914.0, 1915.0],     [1916.0, 1917.0, 1918.0, 1919.0, 1920.0],     [1921.0, 1922.0, 1923.0, 1924.0, 1925.0]],    [[1926.0, 1927.0, 1928.0, 1929.0, 1930.0],     [1931.0, 1932.0, 1933.0, 1934.0, 1935.0],     [1936.0, 1937.0, 1938.0, 1939.0, 1940.0],     [1941.0, 1942.0, 1943.0, 1944.0, 1945.0],     [1946.0, 1947.0, 1948.0, 1949.0, 1950.0],     [1951.0, 1952.0, 1953.0, 1954.0, 1955.0],     [1956.0, 1957.0, 1958.0, 1959.0, 1960.0]],    [[1961.0, 1962.0, 1963.0, 1964.0, 1965.0],     [1966.0, 1967.0, 1968.0, 1969.0, 1970.0],     [1971.0, 1972.0, 1973.0, 1974.0, 1975.0],     [1976.0, 1977.0, 1978.0, 1979.0, 1980.0],     [1981.0, 1982.0, 1983.0, 1984.0, 1985.0],     [1986.0, 1987.0, 1988.0, 1989.0, 1990.0],     [1991.0, 1992.0, 1993.0, 1994.0, 1995.0]],    [[1996.0, 1997.0, 1998.0, 1999.0, 2000.0],     [2001.0, 2002.0, 2003.0, 2004.0, 2005.0],     [2006.0, 2007.0, 2008.0, 2009.0, 2010.0],     [2011.0, 2012.0, 2013.0, 2014.0, 2015.0],     [2016.0, 2017.0, 2018.0, 2019.0, 2020.0],     [2021.0, 2022.0, 2023.0, 2024.0, 2025.0],     [2026.0, 2027.0, 2028.0, 2029.0, 2030.0]],    [[2031.0, 2032.0, 2033.0, 2034.0, 2035.0],     [2036.0, 2037.0, 2038.0, 2039.0, 2040.0],     [2041.0, 2042.0, 2043.0, 2044.0, 2045.0],     [2046.0, 2047.0, 2048.0, 2049.0, 2050.0],     [2051.0, 2052.0, 2053.0, 2054.0, 2055.0],     [2056.0, 2057.0, 2058.0, 2059.0, 2060.0],     [2061.0, 2062.0, 2063.0, 2064.0, 2065.0]],    [[2066.0, 2067.0, 2068.0, 2069.0, 2070.0],     [2071.0, 2072.0, 2073.0, 2074.0, 2075.0],     [2076.0, 2077.0, 2078.0, 2079.0, 2080.0],     [2081.0, 2082.0, 2083.0, 2084.0, 2085.0],     [2086.0, 2087.0, 2088.0, 2089.0, 2090.0],     [2091.0, 2092.0, 2093.0, 2094.0, 2095.0],     [2096.0, 2097.0, 2098.0, 2099.0, 2100.0]]],   [[[2101.0, 2102.0, 2103.0, 2104.0, 2105.0],     [2106.0, 2107.0, 2108.0, 2109.0, 2110.0],     [2111.0, 2112.0, 2113.0, 2114.0, 2115.0],     [2116.0, 2117.0, 2118.0, 2119.0, 2120.0],     [2121.0, 2122.0, 2123.0, 2124.0, 2125.0],     [2126.0, 2127.0, 2128.0, 2129.0, 2130.0],     [2131.0, 2132.0, 2133.0, 2134.0, 2135.0]],    [[2136.0, 2137.0, 2138.0, 2139.0, 2140.0],     [2141.0, 2142.0, 2143.0, 2144.0, 2145.0],     [2146.0, 2147.0, 2148.0, 2149.0, 2150.0],     [2151.0, 2152.0, 2153.0, 2154.0, 2155.0],     [2156.0, 2157.0, 2158.0, 2159.0, 2160.0],     [2161.0, 2162.0, 2163.0, 2164.0, 2165.0],     [2166.0, 2167.0, 2168.0, 2169.0, 2170.0]],    [[2171.0, 2172.0, 2173.0, 2174.0, 2175.0],     [2176.0, 2177.0, 2178.0, 2179.0, 2180.0],     [2181.0, 2182.0, 2183.0, 2184.0, 2185.0],     [2186.0, 2187.0, 2188.0, 2189.0, 2190.0],     [2191.0, 2192.0, 2193.0, 2194.0, 2195.0],     [2196.0, 2197.0, 2198.0, 2199.0, 2200.0],     [2201.0, 2202.0, 2203.0, 2204.0, 2205.0]],    [[2206.0, 2207.0, 2208.0, 2209.0, 2210.0],     [2211.0, 2212.0, 2213.0, 2214.0, 2215.0],     [2216.0, 2217.0, 2218.0, 2219.0, 2220.0],     [2221.0, 2222.0, 2223.0, 2224.0, 2225.0],     [2226.0, 2227.0, 2228.0, 2229.0, 2230.0],     [2231.0, 2232.0, 2233.0, 2234.0, 2235.0],     [2236.0, 2237.0, 2238.0, 2239.0, 2240.0]],    [[2241.0, 2242.0, 2243.0, 2244.0, 2245.0],     [2246.0, 2247.0, 2248.0, 2249.0, 2250.0],     [2251.0, 2252.0, 2253.0, 2254.0, 2255.0],     [2256.0, 2257.0, 2258.0, 2259.0, 2260.0],     [2261.0, 2262.0, 2263.0, 2264.0, 2265.0],     [2266.0, 2267.0, 2268.0, 2269.0, 2270.0],     [2271.0, 2272.0, 2273.0, 2274.0, 2275.0]],    [[2276.0, 2277.0, 2278.0, 2279.0, 2280.0],     [2281.0, 2282.0, 2283.0, 2284.0, 2285.0],     [2286.0, 2287.0, 2288.0, 2289.0, 2290.0],     [2291.0, 2292.0, 2293.0, 2294.0, 2295.0],     [2296.0, 2297.0, 2298.0, 2299.0, 2300.0],     [2301.0, 2302.0, 2303.0, 2304.0, 2305.0],     [2306.0, 2307.0, 2308.0, 2309.0, 2310.0]]],   [[[2311.0, 2312.0, 2313.0, 2314.0, 2315.0],     [2316.0, 2317.0, 2318.0, 2319.0, 2320.0],     [2321.0, 2322.0, 2323.0, 2324.0, 2325.0],     [2326.0, 2327.0, 2328.0, 2329.0, 2330.0],     [2331.0, 2332.0, 2333.0, 2334.0, 2335.0],     [2336.0, 2337.0, 2338.0, 2339.0, 2340.0],     [2341.0, 2342.0, 2343.0, 2344.0, 2345.0]],    [[2346.0, 2347.0, 2348.0, 2349.0, 2350.0],     [2351.0, 2352.0, 2353.0, 2354.0, 2355.0],     [2356.0, 2357.0, 2358.0, 2359.0, 2360.0],     [2361.0, 2362.0, 2363.0, 2364.0, 2365.0],     [2366.0, 2367.0, 2368.0, 2369.0, 2370.0],     [2371.0, 2372.0, 2373.0, 2374.0, 2375.0],     [2376.0, 2377.0, 2378.0, 2379.0, 2380.0]],    [[2381.0, 2382.0, 2383.0, 2384.0, 2385.0],     [2386.0, 2387.0, 2388.0, 2389.0, 2390.0],     [2391.0, 2392.0, 2393.0, 2394.0, 2395.0],     [2396.0, 2397.0, 2398.0, 2399.0, 2400.0],     [2401.0, 2402.0, 2403.0, 2404.0, 2405.0],     [2406.0, 2407.0, 2408.0, 2409.0, 2410.0],     [2411.0, 2412.0, 2413.0, 2414.0, 2415.0]],    [[2416.0, 2417.0, 2418.0, 2419.0, 2420.0],     [2421.0, 2422.0, 2423.0, 2424.0, 2425.0],     [2426.0, 2427.0, 2428.0, 2429.0, 2430.0],     [2431.0, 2432.0, 2433.0, 2434.0, 2435.0],     [2436.0, 2437.0, 2438.0, 2439.0, 2440.0],     [2441.0, 2442.0, 2443.0, 2444.0, 2445.0],     [2446.0, 2447.0, 2448.0, 2449.0, 2450.0]],    [[2451.0, 2452.0, 2453.0, 2454.0, 2455.0],     [2456.0, 2457.0, 2458.0, 2459.0, 2460.0],     [2461.0, 2462.0, 2463.0, 2464.0, 2465.0],     [2466.0, 2467.0, 2468.0, 2469.0, 2470.0],     [2471.0, 2472.0, 2473.0, 2474.0, 2475.0],     [2476.0, 2477.0, 2478.0, 2479.0, 2480.0],     [2481.0, 2482.0, 2483.0, 2484.0, 2485.0]],    [[2486.0, 2487.0, 2488.0, 2489.0, 2490.0],     [2491.0, 2492.0, 2493.0, 2494.0, 2495.0],     [2496.0, 2497.0, 2498.0, 2499.0, 2500.0],     [2501.0, 2502.0, 2503.0, 2504.0, 2505.0],     [2506.0, 2507.0, 2508.0, 2509.0, 2510.0],     [2511.0, 2512.0, 2513.0, 2514.0, 2515.0],     [2516.0, 2517.0, 2518.0, 2519.0, 2520.0]]]]] shape=[3, 4, 6, 7, 5], strides=[840, 210, 35, 5, 1], layout=C (0x1)), I32([2, 2, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[1, 1],  [0, 2],  [1, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 4184330367 178619895 2368119189 3931333916 # shrinks to (ref i, ref bs, ref p) = (F32([[[[1.0, 2.0],    [3.0, 4.0],    [5.0, 6.0],    [7.0, 8.0],    [9.0, 10.0]],   [[11.0, 12.0],    [13.0, 14.0],    [15.0, 16.0],    [17.0, 18.0],    [19.0, 20.0]],   [[21.0, 22.0],    [23.0, 24.0],    [25.0, 26.0],    [27.0, 28.0],    [29.0, 30.0]],   [[31.0, 32.0],    [33.0, 34.0],    [35.0, 36.0],    [37.0, 38.0],    [39.0, 40.0]]],  [[[41.0, 42.0],    [43.0, 44.0],    [45.0, 46.0],    [47.0, 48.0],    [49.0, 50.0]],   [[51.0, 52.0],    [53.0, 54.0],    [55.0, 56.0],    [57.0, 58.0],    [59.0, 60.0]],   [[61.0, 62.0],    [63.0, 64.0],    [65.0, 66.0],    [67.0, 68.0],    [69.0, 70.0]],   [[71.0, 72.0],    [73.0, 74.0],    [75.0, 76.0],    [77.0, 78.0],    [79.0, 80.0]]]] shape=[2, 4, 5, 2], strides=[40, 10, 2, 1], layout=C (0x1)), I32([2, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 2],  [2, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 2984237747 3768862472 3468400897 4102129388 # shrinks to (ref i, ref bs, ref p) = (F32([[[1.0],   [2.0],   [3.0],   [4.0],   [5.0],   [6.0]],  [[7.0],   [8.0],   [9.0],   [10.0],   [11.0],   [12.0]],  [[13.0],   [14.0],   [15.0],   [16.0],   [17.0],   [18.0]]] shape=[3, 6, 1], strides=[6, 1, 1], layout=C (0x1)), I32([1] shape=[1], strides=[1], layout=C | F (0x3)), I32([[1, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 2244523614 1803874248 3670981810 2893772378 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],       [7.0, 8.0, 9.0, 10.0, 11.0, 12.0],       [13.0, 14.0, 15.0, 16.0, 17.0, 18.0],       [19.0, 20.0, 21.0, 22.0, 23.0, 24.0],       [25.0, 26.0, 27.0, 28.0, 29.0, 30.0],       [31.0, 32.0, 33.0, 34.0, 35.0, 36.0],       [37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],      [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0],       [49.0, 50.0, 51.0, 52.0, 53.0, 54.0],       [55.0, 56.0, 57.0, 58.0, 59.0, 60.0],       [61.0, 62.0, 63.0, 64.0, 65.0, 66.0],       [67.0, 68.0, 69.0, 70.0, 71.0, 72.0],       [73.0, 74.0, 75.0, 76.0, 77.0, 78.0],       [79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],      [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0],       [91.0, 92.0, 93.0, 94.0, 95.0, 96.0],       [97.0, 98.0, 99.0, 100.0, 101.0, 102.0],       [103.0, 104.0, 105.0, 106.0, 107.0, 108.0],       [109.0, 110.0, 111.0, 112.0, 113.0, 114.0],       [115.0, 116.0, 117.0, 118.0, 119.0, 120.0],       [121.0, 122.0, 123.0, 124.0, 125.0, 126.0]],      [[127.0, 128.0, 129.0, 130.0, 131.0, 132.0],       [133.0, 134.0, 135.0, 136.0, 137.0, 138.0],       [139.0, 140.0, 141.0, 142.0, 143.0, 144.0],       [145.0, 146.0, 147.0, 148.0, 149.0, 150.0],       [151.0, 152.0, 153.0, 154.0, 155.0, 156.0],       [157.0, 158.0, 159.0, 160.0, 161.0, 162.0],       [163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],      [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0],       [175.0, 176.0, 177.0, 178.0, 179.0, 180.0],       [181.0, 182.0, 183.0, 184.0, 185.0, 186.0],       [187.0, 188.0, 189.0, 190.0, 191.0, 192.0],       [193.0, 194.0, 195.0, 196.0, 197.0, 198.0],       [199.0, 200.0, 201.0, 202.0, 203.0, 204.0],       [205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],      [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0],       [217.0, 218.0, 219.0, 220.0, 221.0, 222.0],       [223.0, 224.0, 225.0, 226.0, 227.0, 228.0],       [229.0, 230.0, 231.0, 232.0, 233.0, 234.0],       [235.0, 236.0, 237.0, 238.0, 239.0, 240.0],       [241.0, 242.0, 243.0, 244.0, 245.0, 246.0],       [247.0, 248.0, 249.0, 250.0, 251.0, 252.0]]],     [[[253.0, 254.0, 255.0, 256.0, 257.0, 258.0],       [259.0, 260.0, 261.0, 262.0, 263.0, 264.0],       [265.0, 266.0, 267.0, 268.0, 269.0, 270.0],       [271.0, 272.0, 273.0, 274.0, 275.0, 276.0],       [277.0, 278.0, 279.0, 280.0, 281.0, 282.0],       [283.0, 284.0, 285.0, 286.0, 287.0, 288.0],       [289.0, 290.0, 291.0, 292.0, 293.0, 294.0]],      [[295.0, 296.0, 297.0, 298.0, 299.0, 300.0],       [301.0, 302.0, 303.0, 304.0, 305.0, 306.0],       [307.0, 308.0, 309.0, 310.0, 311.0, 312.0],       [313.0, 314.0, 315.0, 316.0, 317.0, 318.0],       [319.0, 320.0, 321.0, 322.0, 323.0, 324.0],       [325.0, 326.0, 327.0, 328.0, 329.0, 330.0],       [331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],      [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0],       [343.0, 344.0, 345.0, 346.0, 347.0, 348.0],       [349.0, 350.0, 351.0, 352.0, 353.0, 354.0],       [355.0, 356.0, 357.0, 358.0, 359.0, 360.0],       [361.0, 362.0, 363.0, 364.0, 365.0, 366.0],       [367.0, 368.0, 369.0, 370.0, 371.0, 372.0],       [373.0, 374.0, 375.0, 376.0, 377.0, 378.0]],      [[379.0, 380.0, 381.0, 382.0, 383.0, 384.0],       [385.0, 386.0, 387.0, 388.0, 389.0, 390.0],       [391.0, 392.0, 393.0, 394.0, 395.0, 396.0],       [397.0, 398.0, 399.0, 400.0, 401.0, 402.0],       [403.0, 404.0, 405.0, 406.0, 407.0, 408.0],       [409.0, 410.0, 411.0, 412.0, 413.0, 414.0],       [415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],      [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0],       [427.0, 428.0, 429.0, 430.0, 431.0, 432.0],       [433.0, 434.0, 435.0, 436.0, 437.0, 438.0],       [439.0, 440.0, 441.0, 442.0, 443.0, 444.0],       [445.0, 446.0, 447.0, 448.0, 449.0, 450.0],       [451.0, 452.0, 453.0, 454.0, 455.0, 456.0],       [457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],      [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0],       [469.0, 470.0, 471.0, 472.0, 473.0, 474.0],       [475.0, 476.0, 477.0, 478.0, 479.0, 480.0],       [481.0, 482.0, 483.0, 484.0, 485.0, 486.0],       [487.0, 488.0, 489.0, 490.0, 491.0, 492.0],       [493.0, 494.0, 495.0, 496.0, 497.0, 498.0],       [499.0, 500.0, 501.0, 502.0, 503.0, 504.0]]],     [[[505.0, 506.0, 507.0, 508.0, 509.0, 510.0],       [511.0, 512.0, 513.0, 514.0, 515.0, 516.0],       [517.0, 518.0, 519.0, 520.0, 521.0, 522.0],       [523.0, 524.0, 525.0, 526.0, 527.0, 528.0],       [529.0, 530.0, 531.0, 532.0, 533.0, 534.0],       [535.0, 536.0, 537.0, 538.0, 539.0, 540.0],       [541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],      [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0],       [553.0, 554.0, 555.0, 556.0, 557.0, 558.0],       [559.0, 560.0, 561.0, 562.0, 563.0, 564.0],       [565.0, 566.0, 567.0, 568.0, 569.0, 570.0],       [571.0, 572.0, 573.0, 574.0, 575.0, 576.0],       [577.0, 578.0, 579.0, 580.0, 581.0, 582.0],       [583.0, 584.0, 585.0, 586.0, 587.0, 588.0]],      [[589.0, 590.0, 591.0, 592.0, 593.0, 594.0],       [595.0, 596.0, 597.0, 598.0, 599.0, 600.0],       [601.0, 602.0, 603.0, 604.0, 605.0, 606.0],       [607.0, 608.0, 609.0, 610.0, 611.0, 612.0],       [613.0, 614.0, 615.0, 616.0, 617.0, 618.0],       [619.0, 620.0, 621.0, 622.0, 623.0, 624.0],       [625.0, 626.0, 627.0, 628.0, 629.0, 630.0]],      [[631.0, 632.0, 633.0, 634.0, 635.0, 636.0],       [637.0, 638.0, 639.0, 640.0, 641.0, 642.0],       [643.0, 644.0, 645.0, 646.0, 647.0, 648.0],       [649.0, 650.0, 651.0, 652.0, 653.0, 654.0],       [655.0, 656.0, 657.0, 658.0, 659.0, 660.0],       [661.0, 662.0, 663.0, 664.0, 665.0, 666.0],       [667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],      [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0],       [679.0, 680.0, 681.0, 682.0, 683.0, 684.0],       [685.0, 686.0, 687.0, 688.0, 689.0, 690.0],       [691.0, 692.0, 693.0, 694.0, 695.0, 696.0],       [697.0, 698.0, 699.0, 700.0, 701.0, 702.0],       [703.0, 704.0, 705.0, 706.0, 707.0, 708.0],       [709.0, 710.0, 711.0, 712.0, 713.0, 714.0]],      [[715.0, 716.0, 717.0, 718.0, 719.0, 720.0],       [721.0, 722.0, 723.0, 724.0, 725.0, 726.0],       [727.0, 728.0, 729.0, 730.0, 731.0, 732.0],       [733.0, 734.0, 735.0, 736.0, 737.0, 738.0],       [739.0, 740.0, 741.0, 742.0, 743.0, 744.0],       [745.0, 746.0, 747.0, 748.0, 749.0, 750.0],       [751.0, 752.0, 753.0, 754.0, 755.0, 756.0]]],     [[[757.0, 758.0, 759.0, 760.0, 761.0, 762.0],       [763.0, 764.0, 765.0, 766.0, 767.0, 768.0],       [769.0, 770.0, 771.0, 772.0, 773.0, 774.0],       [775.0, 776.0, 777.0, 778.0, 779.0, 780.0],       [781.0, 782.0, 783.0, 784.0, 785.0, 786.0],       [787.0, 788.0, 789.0, 790.0, 791.0, 792.0],       [793.0, 794.0, 795.0, 796.0, 797.0, 798.0]],      [[799.0, 800.0, 801.0, 802.0, 803.0, 804.0],       [805.0, 806.0, 807.0, 808.0, 809.0, 810.0],       [811.0, 812.0, 813.0, 814.0, 815.0, 816.0],       [817.0, 818.0, 819.0, 820.0, 821.0, 822.0],       [823.0, 824.0, 825.0, 826.0, 827.0, 828.0],       [829.0, 830.0, 831.0, 832.0, 833.0, 834.0],       [835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],      [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0],       [847.0, 848.0, 849.0, 850.0, 851.0, 852.0],       [853.0, 854.0, 855.0, 856.0, 857.0, 858.0],       [859.0, 860.0, 861.0, 862.0, 863.0, 864.0],       [865.0, 866.0, 867.0, 868.0, 869.0, 870.0],       [871.0, 872.0, 873.0, 874.0, 875.0, 876.0],       [877.0, 878.0, 879.0, 880.0, 881.0, 882.0]],      [[883.0, 884.0, 885.0, 886.0, 887.0, 888.0],       [889.0, 890.0, 891.0, 892.0, 893.0, 894.0],       [895.0, 896.0, 897.0, 898.0, 899.0, 900.0],       [901.0, 902.0, 903.0, 904.0, 905.0, 906.0],       [907.0, 908.0, 909.0, 910.0, 911.0, 912.0],       [913.0, 914.0, 915.0, 916.0, 917.0, 918.0],       [919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],      [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0],       [931.0, 932.0, 933.0, 934.0, 935.0, 936.0],       [937.0, 938.0, 939.0, 940.0, 941.0, 942.0],       [943.0, 944.0, 945.0, 946.0, 947.0, 948.0],       [949.0, 950.0, 951.0, 952.0, 953.0, 954.0],       [955.0, 956.0, 957.0, 958.0, 959.0, 960.0],       [961.0, 962.0, 963.0, 964.0, 965.0, 966.0]],      [[967.0, 968.0, 969.0, 970.0, 971.0, 972.0],       [973.0, 974.0, 975.0, 976.0, 977.0, 978.0],       [979.0, 980.0, 981.0, 982.0, 983.0, 984.0],       [985.0, 986.0, 987.0, 988.0, 989.0, 990.0],       [991.0, 992.0, 993.0, 994.0, 995.0, 996.0],       [997.0, 998.0, 999.0, 1000.0, 1001.0, 1002.0],       [1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]]]],    [[[[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0],       [1015.0, 1016.0, 1017.0, 1018.0, 1019.0, 1020.0],       [1021.0, 1022.0, 1023.0, 1024.0, 1025.0, 1026.0],       [1027.0, 1028.0, 1029.0, 1030.0, 1031.0, 1032.0],       [1033.0, 1034.0, 1035.0, 1036.0, 1037.0, 1038.0],       [1039.0, 1040.0, 1041.0, 1042.0, 1043.0, 1044.0],       [1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]],      [[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0],       [1057.0, 1058.0, 1059.0, 1060.0, 1061.0, 1062.0],       [1063.0, 1064.0, 1065.0, 1066.0, 1067.0, 1068.0],       [1069.0, 1070.0, 1071.0, 1072.0, 1073.0, 1074.0],       [1075.0, 1076.0, 1077.0, 1078.0, 1079.0, 1080.0],       [1081.0, 1082.0, 1083.0, 1084.0, 1085.0, 1086.0],       [1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],      [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0],       [1099.0, 1100.0, 1101.0, 1102.0, 1103.0, 1104.0],       [1105.0, 1106.0, 1107.0, 1108.0, 1109.0, 1110.0],       [1111.0, 1112.0, 1113.0, 1114.0, 1115.0, 1116.0],       [1117.0, 1118.0, 1119.0, 1120.0, 1121.0, 1122.0],       [1123.0, 1124.0, 1125.0, 1126.0, 1127.0, 1128.0],       [1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0]],      [[1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0],       [1141.0, 1142.0, 1143.0, 1144.0, 1145.0, 1146.0],       [1147.0, 1148.0, 1149.0, 1150.0, 1151.0, 1152.0],       [1153.0, 1154.0, 1155.0, 1156.0, 1157.0, 1158.0],       [1159.0, 1160.0, 1161.0, 1162.0, 1163.0, 1164.0],       [1165.0, 1166.0, 1167.0, 1168.0, 1169.0, 1170.0],       [1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]],      [[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0],       [1183.0, 1184.0, 1185.0, 1186.0, 1187.0, 1188.0],       [1189.0, 1190.0, 1191.0, 1192.0, 1193.0, 1194.0],       [1195.0, 1196.0, 1197.0, 1198.0, 1199.0, 1200.0],       [1201.0, 1202.0, 1203.0, 1204.0, 1205.0, 1206.0],       [1207.0, 1208.0, 1209.0, 1210.0, 1211.0, 1212.0],       [1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0]],      [[1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0],       [1225.0, 1226.0, 1227.0, 1228.0, 1229.0, 1230.0],       [1231.0, 1232.0, 1233.0, 1234.0, 1235.0, 1236.0],       [1237.0, 1238.0, 1239.0, 1240.0, 1241.0, 1242.0],       [1243.0, 1244.0, 1245.0, 1246.0, 1247.0, 1248.0],       [1249.0, 1250.0, 1251.0, 1252.0, 1253.0, 1254.0],       [1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]],     [[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0],       [1267.0, 1268.0, 1269.0, 1270.0, 1271.0, 1272.0],       [1273.0, 1274.0, 1275.0, 1276.0, 1277.0, 1278.0],       [1279.0, 1280.0, 1281.0, 1282.0, 1283.0, 1284.0],       [1285.0, 1286.0, 1287.0, 1288.0, 1289.0, 1290.0],       [1291.0, 1292.0, 1293.0, 1294.0, 1295.0, 1296.0],       [1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0]],      [[1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0],       [1309.0, 1310.0, 1311.0, 1312.0, 1313.0, 1314.0],       [1315.0, 1316.0, 1317.0, 1318.0, 1319.0, 1320.0],       [1321.0, 1322.0, 1323.0, 1324.0, 1325.0, 1326.0],       [1327.0, 1328.0, 1329.0, 1330.0, 1331.0, 1332.0],       [1333.0, 1334.0, 1335.0, 1336.0, 1337.0, 1338.0],       [1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]],      [[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0],       [1351.0, 1352.0, 1353.0, 1354.0, 1355.0, 1356.0],       [1357.0, 1358.0, 1359.0, 1360.0, 1361.0, 1362.0],       [1363.0, 1364.0, 1365.0, 1366.0, 1367.0, 1368.0],       [1369.0, 1370.0, 1371.0, 1372.0, 1373.0, 1374.0],       [1375.0, 1376.0, 1377.0, 1378.0, 1379.0, 1380.0],       [1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0]],      [[1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0],       [1393.0, 1394.0, 1395.0, 1396.0, 1397.0, 1398.0],       [1399.0, 1400.0, 1401.0, 1402.0, 1403.0, 1404.0],       [1405.0, 1406.0, 1407.0, 1408.0, 1409.0, 1410.0],       [1411.0, 1412.0, 1413.0, 1414.0, 1415.0, 1416.0],       [1417.0, 1418.0, 1419.0, 1420.0, 1421.0, 1422.0],       [1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]],      [[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0],       [1435.0, 1436.0, 1437.0, 1438.0, 1439.0, 1440.0],       [1441.0, 1442.0, 1443.0, 1444.0, 1445.0, 1446.0],       [1447.0, 1448.0, 1449.0, 1450.0, 1451.0, 1452.0],       [1453.0, 1454.0, 1455.0, 1456.0, 1457.0, 1458.0],       [1459.0, 1460.0, 1461.0, 1462.0, 1463.0, 1464.0],       [1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]],      [[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0],       [1477.0, 1478.0, 1479.0, 1480.0, 1481.0, 1482.0],       [1483.0, 1484.0, 1485.0, 1486.0, 1487.0, 1488.0],       [1489.0, 1490.0, 1491.0, 1492.0, 1493.0, 1494.0],       [1495.0, 1496.0, 1497.0, 1498.0, 1499.0, 1500.0],       [1501.0, 1502.0, 1503.0, 1504.0, 1505.0, 1506.0],       [1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]]],     [[[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0],       [1519.0, 1520.0, 1521.0, 1522.0, 1523.0, 1524.0],       [1525.0, 1526.0, 1527.0, 1528.0, 1529.0, 1530.0],       [1531.0, 1532.0, 1533.0, 1534.0, 1535.0, 1536.0],       [1537.0, 1538.0, 1539.0, 1540.0, 1541.0, 1542.0],       [1543.0, 1544.0, 1545.0, 1546.0, 1547.0, 1548.0],       [1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0]],      [[1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0],       [1561.0, 1562.0, 1563.0, 1564.0, 1565.0, 1566.0],       [1567.0, 1568.0, 1569.0, 1570.0, 1571.0, 1572.0],       [1573.0, 1574.0, 1575.0, 1576.0, 1577.0, 1578.0],       [1579.0, 1580.0, 1581.0, 1582.0, 1583.0, 1584.0],       [1585.0, 1586.0, 1587.0, 1588.0, 1589.0, 1590.0],       [1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]],      [[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0],       [1603.0, 1604.0, 1605.0, 1606.0, 1607.0, 1608.0],       [1609.0, 1610.0, 1611.0, 1612.0, 1613.0, 1614.0],       [1615.0, 1616.0, 1617.0, 1618.0, 1619.0, 1620.0],       [1621.0, 1622.0, 1623.0, 1624.0, 1625.0, 1626.0],       [1627.0, 1628.0, 1629.0, 1630.0, 1631.0, 1632.0],       [1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0]],      [[1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0],       [1645.0, 1646.0, 1647.0, 1648.0, 1649.0, 1650.0],       [1651.0, 1652.0, 1653.0, 1654.0, 1655.0, 1656.0],       [1657.0, 1658.0, 1659.0, 1660.0, 1661.0, 1662.0],       [1663.0, 1664.0, 1665.0, 1666.0, 1667.0, 1668.0],       [1669.0, 1670.0, 1671.0, 1672.0, 1673.0, 1674.0],       [1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]],      [[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0],       [1687.0, 1688.0, 1689.0, 1690.0, 1691.0, 1692.0],       [1693.0, 1694.0, 1695.0, 1696.0, 1697.0, 1698.0],       [1699.0, 1700.0, 1701.0, 1702.0, 1703.0, 1704.0],       [1705.0, 1706.0, 1707.0, 1708.0, 1709.0, 1710.0],       [1711.0, 1712.0, 1713.0, 1714.0, 1715.0, 1716.0],       [1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0]],      [[1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0],       [1729.0, 1730.0, 1731.0, 1732.0, 1733.0, 1734.0],       [1735.0, 1736.0, 1737.0, 1738.0, 1739.0, 1740.0],       [1741.0, 1742.0, 1743.0, 1744.0, 1745.0, 1746.0],       [1747.0, 1748.0, 1749.0, 1750.0, 1751.0, 1752.0],       [1753.0, 1754.0, 1755.0, 1756.0, 1757.0, 1758.0],       [1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]]],     [[[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0],       [1771.0, 1772.0, 1773.0, 1774.0, 1775.0, 1776.0],       [1777.0, 1778.0, 1779.0, 1780.0, 1781.0, 1782.0],       [1783.0, 1784.0, 1785.0, 1786.0, 1787.0, 1788.0],       [1789.0, 1790.0, 1791.0, 1792.0, 1793.0, 1794.0],       [1795.0, 1796.0, 1797.0, 1798.0, 1799.0, 1800.0],       [1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0]],      [[1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0],       [1813.0, 1814.0, 1815.0, 1816.0, 1817.0, 1818.0],       [1819.0, 1820.0, 1821.0, 1822.0, 1823.0, 1824.0],       [1825.0, 1826.0, 1827.0, 1828.0, 1829.0, 1830.0],       [1831.0, 1832.0, 1833.0, 1834.0, 1835.0, 1836.0],       [1837.0, 1838.0, 1839.0, 1840.0, 1841.0, 1842.0],       [1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]],      [[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0],       [1855.0, 1856.0, 1857.0, 1858.0, 1859.0, 1860.0],       [1861.0, 1862.0, 1863.0, 1864.0, 1865.0, 1866.0],       [1867.0, 1868.0, 1869.0, 1870.0, 1871.0, 1872.0],       [1873.0, 1874.0, 1875.0, 1876.0, 1877.0, 1878.0],       [1879.0, 1880.0, 1881.0, 1882.0, 1883.0, 1884.0],       [1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]],      [[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0],       [1897.0, 1898.0, 1899.0, 1900.0, 1901.0, 1902.0],       [1903.0, 1904.0, 1905.0, 1906.0, 1907.0, 1908.0],       [1909.0, 1910.0, 1911.0, 1912.0, 1913.0, 1914.0],       [1915.0, 1916.0, 1917.0, 1918.0, 1919.0, 1920.0],       [1921.0, 1922.0, 1923.0, 1924.0, 1925.0, 1926.0],       [1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]],      [[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0],       [1939.0, 1940.0, 1941.0, 1942.0, 1943.0, 1944.0],       [1945.0, 1946.0, 1947.0, 1948.0, 1949.0, 1950.0],       [1951.0, 1952.0, 1953.0, 1954.0, 1955.0, 1956.0],       [1957.0, 1958.0, 1959.0, 1960.0, 1961.0, 1962.0],       [1963.0, 1964.0, 1965.0, 1966.0, 1967.0, 1968.0],       [1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0]],      [[1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0],       [1981.0, 1982.0, 1983.0, 1984.0, 1985.0, 1986.0],       [1987.0, 1988.0, 1989.0, 1990.0, 1991.0, 1992.0],       [1993.0, 1994.0, 1995.0, 1996.0, 1997.0, 1998.0],       [1999.0, 2000.0, 2001.0, 2002.0, 2003.0, 2004.0],       [2005.0, 2006.0, 2007.0, 2008.0, 2009.0, 2010.0],       [2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]]]],    [[[[2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0],       [2023.0, 2024.0, 2025.0, 2026.0, 2027.0, 2028.0],       [2029.0, 2030.0, 2031.0, 2032.0, 2033.0, 2034.0],       [2035.0, 2036.0, 2037.0, 2038.0, 2039.0, 2040.0],       [2041.0, 2042.0, 2043.0, 2044.0, 2045.0, 2046.0],       [2047.0, 2048.0, 2049.0, 2050.0, 2051.0, 2052.0],       [2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0]],      [[2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0],       [2065.0, 2066.0, 2067.0, 2068.0, 2069.0, 2070.0],       [2071.0, 2072.0, 2073.0, 2074.0, 2075.0, 2076.0],       [2077.0, 2078.0, 2079.0, 2080.0, 2081.0, 2082.0],       [2083.0, 2084.0, 2085.0, 2086.0, 2087.0, 2088.0],       [2089.0, 2090.0, 2091.0, 2092.0, 2093.0, 2094.0],       [2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]],      [[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0],       [2107.0, 2108.0, 2109.0, 2110.0, 2111.0, 2112.0],       [2113.0, 2114.0, 2115.0, 2116.0, 2117.0, 2118.0],       [2119.0, 2120.0, 2121.0, 2122.0, 2123.0, 2124.0],       [2125.0, 2126.0, 2127.0, 2128.0, 2129.0, 2130.0],       [2131.0, 2132.0, 2133.0, 2134.0, 2135.0, 2136.0],       [2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0]],      [[2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0],       [2149.0, 2150.0, 2151.0, 2152.0, 2153.0, 2154.0],       [2155.0, 2156.0, 2157.0, 2158.0, 2159.0, 2160.0],       [2161.0, 2162.0, 2163.0, 2164.0, 2165.0, 2166.0],       [2167.0, 2168.0, 2169.0, 2170.0, 2171.0, 2172.0],       [2173.0, 2174.0, 2175.0, 2176.0, 2177.0, 2178.0],       [2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0]],      [[2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0],       [2191.0, 2192.0, 2193.0, 2194.0, 2195.0, 2196.0],       [2197.0, 2198.0, 2199.0, 2200.0, 2201.0, 2202.0],       [2203.0, 2204.0, 2205.0, 2206.0, 2207.0, 2208.0],       [2209.0, 2210.0, 2211.0, 2212.0, 2213.0, 2214.0],       [2215.0, 2216.0, 2217.0, 2218.0, 2219.0, 2220.0],       [2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0]],      [[2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0],       [2233.0, 2234.0, 2235.0, 2236.0, 2237.0, 2238.0],       [2239.0, 2240.0, 2241.0, 2242.0, 2243.0, 2244.0],       [2245.0, 2246.0, 2247.0, 2248.0, 2249.0, 2250.0],       [2251.0, 2252.0, 2253.0, 2254.0, 2255.0, 2256.0],       [2257.0, 2258.0, 2259.0, 2260.0, 2261.0, 2262.0],       [2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0]]],     [[[2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0],       [2275.0, 2276.0, 2277.0, 2278.0, 2279.0, 2280.0],       [2281.0, 2282.0, 2283.0, 2284.0, 2285.0, 2286.0],       [2287.0, 2288.0, 2289.0, 2290.0, 2291.0, 2292.0],       [2293.0, 2294.0, 2295.0, 2296.0, 2297.0, 2298.0],       [2299.0, 2300.0, 2301.0, 2302.0, 2303.0, 2304.0],       [2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]],      [[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0],       [2317.0, 2318.0, 2319.0, 2320.0, 2321.0, 2322.0],       [2323.0, 2324.0, 2325.0, 2326.0, 2327.0, 2328.0],       [2329.0, 2330.0, 2331.0, 2332.0, 2333.0, 2334.0],       [2335.0, 2336.0, 2337.0, 2338.0, 2339.0, 2340.0],       [2341.0, 2342.0, 2343.0, 2344.0, 2345.0, 2346.0],       [2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0]],      [[2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0],       [2359.0, 2360.0, 2361.0, 2362.0, 2363.0, 2364.0],       [2365.0, 2366.0, 2367.0, 2368.0, 2369.0, 2370.0],       [2371.0, 2372.0, 2373.0, 2374.0, 2375.0, 2376.0],       [2377.0, 2378.0, 2379.0, 2380.0, 2381.0, 2382.0],       [2383.0, 2384.0, 2385.0, 2386.0, 2387.0, 2388.0],       [2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0]],      [[2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0],       [2401.0, 2402.0, 2403.0, 2404.0, 2405.0, 2406.0],       [2407.0, 2408.0, 2409.0, 2410.0, 2411.0, 2412.0],       [2413.0, 2414.0, 2415.0, 2416.0, 2417.0, 2418.0],       [2419.0, 2420.0, 2421.0, 2422.0, 2423.0, 2424.0],       [2425.0, 2426.0, 2427.0, 2428.0, 2429.0, 2430.0],       [2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0]],      [[2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0],       [2443.0, 2444.0, 2445.0, 2446.0, 2447.0, 2448.0],       [2449.0, 2450.0, 2451.0, 2452.0, 2453.0, 2454.0],       [2455.0, 2456.0, 2457.0, 2458.0, 2459.0, 2460.0],       [2461.0, 2462.0, 2463.0, 2464.0, 2465.0, 2466.0],       [2467.0, 2468.0, 2469.0, 2470.0, 2471.0, 2472.0],       [2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0]],      [[2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0],       [2485.0, 2486.0, 2487.0, 2488.0, 2489.0, 2490.0],       [2491.0, 2492.0, 2493.0, 2494.0, 2495.0, 2496.0],       [2497.0, 2498.0, 2499.0, 2500.0, 2501.0, 2502.0],       [2503.0, 2504.0, 2505.0, 2506.0, 2507.0, 2508.0],       [2509.0, 2510.0, 2511.0, 2512.0, 2513.0, 2514.0],       [2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]]],     [[[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0],       [2527.0, 2528.0, 2529.0, 2530.0, 2531.0, 2532.0],       [2533.0, 2534.0, 2535.0, 2536.0, 2537.0, 2538.0],       [2539.0, 2540.0, 2541.0, 2542.0, 2543.0, 2544.0],       [2545.0, 2546.0, 2547.0, 2548.0, 2549.0, 2550.0],       [2551.0, 2552.0, 2553.0, 2554.0, 2555.0, 2556.0],       [2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0]],      [[2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0],       [2569.0, 2570.0, 2571.0, 2572.0, 2573.0, 2574.0],       [2575.0, 2576.0, 2577.0, 2578.0, 2579.0, 2580.0],       [2581.0, 2582.0, 2583.0, 2584.0, 2585.0, 2586.0],       [2587.0, 2588.0, 2589.0, 2590.0, 2591.0, 2592.0],       [2593.0, 2594.0, 2595.0, 2596.0, 2597.0, 2598.0],       [2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0]],      [[2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0],       [2611.0, 2612.0, 2613.0, 2614.0, 2615.0, 2616.0],       [2617.0, 2618.0, 2619.0, 2620.0, 2621.0, 2622.0],       [2623.0, 2624.0, 2625.0, 2626.0, 2627.0, 2628.0],       [2629.0, 2630.0, 2631.0, 2632.0, 2633.0, 2634.0],       [2635.0, 2636.0, 2637.0, 2638.0, 2639.0, 2640.0],       [2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0]],      [[2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0],       [2653.0, 2654.0, 2655.0, 2656.0, 2657.0, 2658.0],       [2659.0, 2660.0, 2661.0, 2662.0, 2663.0, 2664.0],       [2665.0, 2666.0, 2667.0, 2668.0, 2669.0, 2670.0],       [2671.0, 2672.0, 2673.0, 2674.0, 2675.0, 2676.0],       [2677.0, 2678.0, 2679.0, 2680.0, 2681.0, 2682.0],       [2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0]],      [[2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0],       [2695.0, 2696.0, 2697.0, 2698.0, 2699.0, 2700.0],       [2701.0, 2702.0, 2703.0, 2704.0, 2705.0, 2706.0],       [2707.0, 2708.0, 2709.0, 2710.0, 2711.0, 2712.0],       [2713.0, 2714.0, 2715.0, 2716.0, 2717.0, 2718.0],       [2719.0, 2720.0, 2721.0, 2722.0, 2723.0, 2724.0],       [2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]],      [[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0],       [2737.0, 2738.0, 2739.0, 2740.0, 2741.0, 2742.0],       [2743.0, 2744.0, 2745.0, 2746.0, 2747.0, 2748.0],       [2749.0, 2750.0, 2751.0, 2752.0, 2753.0, 2754.0],       [2755.0, 2756.0, 2757.0, 2758.0, 2759.0, 2760.0],       [2761.0, 2762.0, 2763.0, 2764.0, 2765.0, 2766.0],       [2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0]]],     [[[2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0],       [2779.0, 2780.0, 2781.0, 2782.0, 2783.0, 2784.0],       [2785.0, 2786.0, 2787.0, 2788.0, 2789.0, 2790.0],       [2791.0, 2792.0, 2793.0, 2794.0, 2795.0, 2796.0],       [2797.0, 2798.0, 2799.0, 2800.0, 2801.0, 2802.0],       [2803.0, 2804.0, 2805.0, 2806.0, 2807.0, 2808.0],       [2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0]],      [[2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0],       [2821.0, 2822.0, 2823.0, 2824.0, 2825.0, 2826.0],       [2827.0, 2828.0, 2829.0, 2830.0, 2831.0, 2832.0],       [2833.0, 2834.0, 2835.0, 2836.0, 2837.0, 2838.0],       [2839.0, 2840.0, 2841.0, 2842.0, 2843.0, 2844.0],       [2845.0, 2846.0, 2847.0, 2848.0, 2849.0, 2850.0],       [2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0]],      [[2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0],       [2863.0, 2864.0, 2865.0, 2866.0, 2867.0, 2868.0],       [2869.0, 2870.0, 2871.0, 2872.0, 2873.0, 2874.0],       [2875.0, 2876.0, 2877.0, 2878.0, 2879.0, 2880.0],       [2881.0, 2882.0, 2883.0, 2884.0, 2885.0, 2886.0],       [2887.0, 2888.0, 2889.0, 2890.0, 2891.0, 2892.0],       [2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0]],      [[2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0],       [2905.0, 2906.0, 2907.0, 2908.0, 2909.0, 2910.0],       [2911.0, 2912.0, 2913.0, 2914.0, 2915.0, 2916.0],       [2917.0, 2918.0, 2919.0, 2920.0, 2921.0, 2922.0],       [2923.0, 2924.0, 2925.0, 2926.0, 2927.0, 2928.0],       [2929.0, 2930.0, 2931.0, 2932.0, 2933.0, 2934.0],       [2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]],      [[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0],       [2947.0, 2948.0, 2949.0, 2950.0, 2951.0, 2952.0],       [2953.0, 2954.0, 2955.0, 2956.0, 2957.0, 2958.0],       [2959.0, 2960.0, 2961.0, 2962.0, 2963.0, 2964.0],       [2965.0, 2966.0, 2967.0, 2968.0, 2969.0, 2970.0],       [2971.0, 2972.0, 2973.0, 2974.0, 2975.0, 2976.0],       [2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0]],      [[2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0],       [2989.0, 2990.0, 2991.0, 2992.0, 2993.0, 2994.0],       [2995.0, 2996.0, 2997.0, 2998.0, 2999.0, 3000.0],       [3001.0, 3002.0, 3003.0, 3004.0, 3005.0, 3006.0],       [3007.0, 3008.0, 3009.0, 3010.0, 3011.0, 3012.0],       [3013.0, 3014.0, 3015.0, 3016.0, 3017.0, 3018.0],       [3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0]]]],    [[[[3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0],       [3031.0, 3032.0, 3033.0, 3034.0, 3035.0, 3036.0],       [3037.0, 3038.0, 3039.0, 3040.0, 3041.0, 3042.0],       [3043.0, 3044.0, 3045.0, 3046.0, 3047.0, 3048.0],       [3049.0, 3050.0, 3051.0, 3052.0, 3053.0, 3054.0],       [3055.0, 3056.0, 3057.0, 3058.0, 3059.0, 3060.0],       [3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0]],      [[3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0],       [3073.0, 3074.0, 3075.0, 3076.0, 3077.0, 3078.0],       [3079.0, 3080.0, 3081.0, 3082.0, 3083.0, 3084.0],       [3085.0, 3086.0, 3087.0, 3088.0, 3089.0, 3090.0],       [3091.0, 3092.0, 3093.0, 3094.0, 3095.0, 3096.0],       [3097.0, 3098.0, 3099.0, 3100.0, 3101.0, 3102.0],       [3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0]],      [[3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0],       [3115.0, 3116.0, 3117.0, 3118.0, 3119.0, 3120.0],       [3121.0, 3122.0, 3123.0, 3124.0, 3125.0, 3126.0],       [3127.0, 3128.0, 3129.0, 3130.0, 3131.0, 3132.0],       [3133.0, 3134.0, 3135.0, 3136.0, 3137.0, 3138.0],       [3139.0, 3140.0, 3141.0, 3142.0, 3143.0, 3144.0],       [3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]],      [[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0],       [3157.0, 3158.0, 3159.0, 3160.0, 3161.0, 3162.0],       [3163.0, 3164.0, 3165.0, 3166.0, 3167.0, 3168.0],       [3169.0, 3170.0, 3171.0, 3172.0, 3173.0, 3174.0],       [3175.0, 3176.0, 3177.0, 3178.0, 3179.0, 3180.0],       [3181.0, 3182.0, 3183.0, 3184.0, 3185.0, 3186.0],       [3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0]],      [[3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0],       [3199.0, 3200.0, 3201.0, 3202.0, 3203.0, 3204.0],       [3205.0, 3206.0, 3207.0, 3208.0, 3209.0, 3210.0],       [3211.0, 3212.0, 3213.0, 3214.0, 3215.0, 3216.0],       [3217.0, 3218.0, 3219.0, 3220.0, 3221.0, 3222.0],       [3223.0, 3224.0, 3225.0, 3226.0, 3227.0, 3228.0],       [3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0]],      [[3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0],       [3241.0, 3242.0, 3243.0, 3244.0, 3245.0, 3246.0],       [3247.0, 3248.0, 3249.0, 3250.0, 3251.0, 3252.0],       [3253.0, 3254.0, 3255.0, 3256.0, 3257.0, 3258.0],       [3259.0, 3260.0, 3261.0, 3262.0, 3263.0, 3264.0],       [3265.0, 3266.0, 3267.0, 3268.0, 3269.0, 3270.0],       [3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0]]],     [[[3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0],       [3283.0, 3284.0, 3285.0, 3286.0, 3287.0, 3288.0],       [3289.0, 3290.0, 3291.0, 3292.0, 3293.0, 3294.0],       [3295.0, 3296.0, 3297.0, 3298.0, 3299.0, 3300.0],       [3301.0, 3302.0, 3303.0, 3304.0, 3305.0, 3306.0],       [3307.0, 3308.0, 3309.0, 3310.0, 3311.0, 3312.0],       [3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0]],      [[3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0],       [3325.0, 3326.0, 3327.0, 3328.0, 3329.0, 3330.0],       [3331.0, 3332.0, 3333.0, 3334.0, 3335.0, 3336.0],       [3337.0, 3338.0, 3339.0, 3340.0, 3341.0, 3342.0],       [3343.0, 3344.0, 3345.0, 3346.0, 3347.0, 3348.0],       [3349.0, 3350.0, 3351.0, 3352.0, 3353.0, 3354.0],       [3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]],      [[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0],       [3367.0, 3368.0, 3369.0, 3370.0, 3371.0, 3372.0],       [3373.0, 3374.0, 3375.0, 3376.0, 3377.0, 3378.0],       [3379.0, 3380.0, 3381.0, 3382.0, 3383.0, 3384.0],       [3385.0, 3386.0, 3387.0, 3388.0, 3389.0, 3390.0],       [3391.0, 3392.0, 3393.0, 3394.0, 3395.0, 3396.0],       [3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0]],      [[3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0],       [3409.0, 3410.0, 3411.0, 3412.0, 3413.0, 3414.0],       [3415.0, 3416.0, 3417.0, 3418.0, 3419.0, 3420.0],       [3421.0, 3422.0, 3423.0, 3424.0, 3425.0, 3426.0],       [3427.0, 3428.0, 3429.0, 3430.0, 3431.0, 3432.0],       [3433.0, 3434.0, 3435.0, 3436.0, 3437.0, 3438.0],       [3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0]],      [[3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0],       [3451.0, 3452.0, 3453.0, 3454.0, 3455.0, 3456.0],       [3457.0, 3458.0, 3459.0, 3460.0, 3461.0, 3462.0],       [3463.0, 3464.0, 3465.0, 3466.0, 3467.0, 3468.0],       [3469.0, 3470.0, 3471.0, 3472.0, 3473.0, 3474.0],       [3475.0, 3476.0, 3477.0, 3478.0, 3479.0, 3480.0],       [3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0]],      [[3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0],       [3493.0, 3494.0, 3495.0, 3496.0, 3497.0, 3498.0],       [3499.0, 3500.0, 3501.0, 3502.0, 3503.0, 3504.0],       [3505.0, 3506.0, 3507.0, 3508.0, 3509.0, 3510.0],       [3511.0, 3512.0, 3513.0, 3514.0, 3515.0, 3516.0],       [3517.0, 3518.0, 3519.0, 3520.0, 3521.0, 3522.0],       [3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0]]],     [[[3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0],       [3535.0, 3536.0, 3537.0, 3538.0, 3539.0, 3540.0],       [3541.0, 3542.0, 3543.0, 3544.0, 3545.0, 3546.0],       [3547.0, 3548.0, 3549.0, 3550.0, 3551.0, 3552.0],       [3553.0, 3554.0, 3555.0, 3556.0, 3557.0, 3558.0],       [3559.0, 3560.0, 3561.0, 3562.0, 3563.0, 3564.0],       [3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]],      [[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0],       [3577.0, 3578.0, 3579.0, 3580.0, 3581.0, 3582.0],       [3583.0, 3584.0, 3585.0, 3586.0, 3587.0, 3588.0],       [3589.0, 3590.0, 3591.0, 3592.0, 3593.0, 3594.0],       [3595.0, 3596.0, 3597.0, 3598.0, 3599.0, 3600.0],       [3601.0, 3602.0, 3603.0, 3604.0, 3605.0, 3606.0],       [3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0]],      [[3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0],       [3619.0, 3620.0, 3621.0, 3622.0, 3623.0, 3624.0],       [3625.0, 3626.0, 3627.0, 3628.0, 3629.0, 3630.0],       [3631.0, 3632.0, 3633.0, 3634.0, 3635.0, 3636.0],       [3637.0, 3638.0, 3639.0, 3640.0, 3641.0, 3642.0],       [3643.0, 3644.0, 3645.0, 3646.0, 3647.0, 3648.0],       [3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0]],      [[3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0],       [3661.0, 3662.0, 3663.0, 3664.0, 3665.0, 3666.0],       [3667.0, 3668.0, 3669.0, 3670.0, 3671.0, 3672.0],       [3673.0, 3674.0, 3675.0, 3676.0, 3677.0, 3678.0],       [3679.0, 3680.0, 3681.0, 3682.0, 3683.0, 3684.0],       [3685.0, 3686.0, 3687.0, 3688.0, 3689.0, 3690.0],       [3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0]],      [[3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0],       [3703.0, 3704.0, 3705.0, 3706.0, 3707.0, 3708.0],       [3709.0, 3710.0, 3711.0, 3712.0, 3713.0, 3714.0],       [3715.0, 3716.0, 3717.0, 3718.0, 3719.0, 3720.0],       [3721.0, 3722.0, 3723.0, 3724.0, 3725.0, 3726.0],       [3727.0, 3728.0, 3729.0, 3730.0, 3731.0, 3732.0],       [3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0]],      [[3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0],       [3745.0, 3746.0, 3747.0, 3748.0, 3749.0, 3750.0],       [3751.0, 3752.0, 3753.0, 3754.0, 3755.0, 3756.0],       [3757.0, 3758.0, 3759.0, 3760.0, 3761.0, 3762.0],       [3763.0, 3764.0, 3765.0, 3766.0, 3767.0, 3768.0],       [3769.0, 3770.0, 3771.0, 3772.0, 3773.0, 3774.0],       [3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]]],     [[[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0],       [3787.0, 3788.0, 3789.0, 3790.0, 3791.0, 3792.0],       [3793.0, 3794.0, 3795.0, 3796.0, 3797.0, 3798.0],       [3799.0, 3800.0, 3801.0, 3802.0, 3803.0, 3804.0],       [3805.0, 3806.0, 3807.0, 3808.0, 3809.0, 3810.0],       [3811.0, 3812.0, 3813.0, 3814.0, 3815.0, 3816.0],       [3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0]],      [[3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0],       [3829.0, 3830.0, 3831.0, 3832.0, 3833.0, 3834.0],       [3835.0, 3836.0, 3837.0, 3838.0, 3839.0, 3840.0],       [3841.0, 3842.0, 3843.0, 3844.0, 3845.0, 3846.0],       [3847.0, 3848.0, 3849.0, 3850.0, 3851.0, 3852.0],       [3853.0, 3854.0, 3855.0, 3856.0, 3857.0, 3858.0],       [3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0]],      [[3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0],       [3871.0, 3872.0, 3873.0, 3874.0, 3875.0, 3876.0],       [3877.0, 3878.0, 3879.0, 3880.0, 3881.0, 3882.0],       [3883.0, 3884.0, 3885.0, 3886.0, 3887.0, 3888.0],       [3889.0, 3890.0, 3891.0, 3892.0, 3893.0, 3894.0],       [3895.0, 3896.0, 3897.0, 3898.0, 3899.0, 3900.0],       [3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0]],      [[3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0],       [3913.0, 3914.0, 3915.0, 3916.0, 3917.0, 3918.0],       [3919.0, 3920.0, 3921.0, 3922.0, 3923.0, 3924.0],       [3925.0, 3926.0, 3927.0, 3928.0, 3929.0, 3930.0],       [3931.0, 3932.0, 3933.0, 3934.0, 3935.0, 3936.0],       [3937.0, 3938.0, 3939.0, 3940.0, 3941.0, 3942.0],       [3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0]],      [[3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0],       [3955.0, 3956.0, 3957.0, 3958.0, 3959.0, 3960.0],       [3961.0, 3962.0, 3963.0, 3964.0, 3965.0, 3966.0],       [3967.0, 3968.0, 3969.0, 3970.0, 3971.0, 3972.0],       [3973.0, 3974.0, 3975.0, 3976.0, 3977.0, 3978.0],       [3979.0, 3980.0, 3981.0, 3982.0, 3983.0, 3984.0],       [3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]],      [[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0],       [3997.0, 3998.0, 3999.0, 4000.0, 4001.0, 4002.0],       [4003.0, 4004.0, 4005.0, 4006.0, 4007.0, 4008.0],       [4009.0, 4010.0, 4011.0, 4012.0, 4013.0, 4014.0],       [4015.0, 4016.0, 4017.0, 4018.0, 4019.0, 4020.0],       [4021.0, 4022.0, 4023.0, 4024.0, 4025.0, 4026.0],       [4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0]]]],    [[[[4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0],       [4039.0, 4040.0, 4041.0, 4042.0, 4043.0, 4044.0],       [4045.0, 4046.0, 4047.0, 4048.0, 4049.0, 4050.0],       [4051.0, 4052.0, 4053.0, 4054.0, 4055.0, 4056.0],       [4057.0, 4058.0, 4059.0, 4060.0, 4061.0, 4062.0],       [4063.0, 4064.0, 4065.0, 4066.0, 4067.0, 4068.0],       [4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0]],      [[4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0],       [4081.0, 4082.0, 4083.0, 4084.0, 4085.0, 4086.0],       [4087.0, 4088.0, 4089.0, 4090.0, 4091.0, 4092.0],       [4093.0, 4094.0, 4095.0, 4096.0, 4097.0, 4098.0],       [4099.0, 4100.0, 4101.0, 4102.0, 4103.0, 4104.0],       [4105.0, 4106.0, 4107.0, 4108.0, 4109.0, 4110.0],       [4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0]],      [[4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0],       [4123.0, 4124.0, 4125.0, 4126.0, 4127.0, 4128.0],       [4129.0, 4130.0, 4131.0, 4132.0, 4133.0, 4134.0],       [4135.0, 4136.0, 4137.0, 4138.0, 4139.0, 4140.0],       [4141.0, 4142.0, 4143.0, 4144.0, 4145.0, 4146.0],       [4147.0, 4148.0, 4149.0, 4150.0, 4151.0, 4152.0],       [4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0]],      [[4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0],       [4165.0, 4166.0, 4167.0, 4168.0, 4169.0, 4170.0],       [4171.0, 4172.0, 4173.0, 4174.0, 4175.0, 4176.0],       [4177.0, 4178.0, 4179.0, 4180.0, 4181.0, 4182.0],       [4183.0, 4184.0, 4185.0, 4186.0, 4187.0, 4188.0],       [4189.0, 4190.0, 4191.0, 4192.0, 4193.0, 4194.0],       [4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]],      [[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0],       [4207.0, 4208.0, 4209.0, 4210.0, 4211.0, 4212.0],       [4213.0, 4214.0, 4215.0, 4216.0, 4217.0, 4218.0],       [4219.0, 4220.0, 4221.0, 4222.0, 4223.0, 4224.0],       [4225.0, 4226.0, 4227.0, 4228.0, 4229.0, 4230.0],       [4231.0, 4232.0, 4233.0, 4234.0, 4235.0, 4236.0],       [4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0]],      [[4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0],       [4249.0, 4250.0, 4251.0, 4252.0, 4253.0, 4254.0],       [4255.0, 4256.0, 4257.0, 4258.0, 4259.0, 4260.0],       [4261.0, 4262.0, 4263.0, 4264.0, 4265.0, 4266.0],       [4267.0, 4268.0, 4269.0, 4270.0, 4271.0, 4272.0],       [4273.0, 4274.0, 4275.0, 4276.0, 4277.0, 4278.0],       [4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0]]],     [[[4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0],       [4291.0, 4292.0, 4293.0, 4294.0, 4295.0, 4296.0],       [4297.0, 4298.0, 4299.0, 4300.0, 4301.0, 4302.0],       [4303.0, 4304.0, 4305.0, 4306.0, 4307.0, 4308.0],       [4309.0, 4310.0, 4311.0, 4312.0, 4313.0, 4314.0],       [4315.0, 4316.0, 4317.0, 4318.0, 4319.0, 4320.0],       [4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0]],      [[4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0],       [4333.0, 4334.0, 4335.0, 4336.0, 4337.0, 4338.0],       [4339.0, 4340.0, 4341.0, 4342.0, 4343.0, 4344.0],       [4345.0, 4346.0, 4347.0, 4348.0, 4349.0, 4350.0],       [4351.0, 4352.0, 4353.0, 4354.0, 4355.0, 4356.0],       [4357.0, 4358.0, 4359.0, 4360.0, 4361.0, 4362.0],       [4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0]],      [[4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0],       [4375.0, 4376.0, 4377.0, 4378.0, 4379.0, 4380.0],       [4381.0, 4382.0, 4383.0, 4384.0, 4385.0, 4386.0],       [4387.0, 4388.0, 4389.0, 4390.0, 4391.0, 4392.0],       [4393.0, 4394.0, 4395.0, 4396.0, 4397.0, 4398.0],       [4399.0, 4400.0, 4401.0, 4402.0, 4403.0, 4404.0],       [4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]],      [[4411.0, 4412.0, 4413.0, 4414.0, 4415.0, 4416.0],       [4417.0, 4418.0, 4419.0, 4420.0, 4421.0, 4422.0],       [4423.0, 4424.0, 4425.0, 4426.0, 4427.0, 4428.0],       [4429.0, 4430.0, 4431.0, 4432.0, 4433.0, 4434.0],       [4435.0, 4436.0, 4437.0, 4438.0, 4439.0, 4440.0],       [4441.0, 4442.0, 4443.0, 4444.0, 4445.0, 4446.0],       [4447.0, 4448.0, 4449.0, 4450.0, 4451.0, 4452.0]],      [[4453.0, 4454.0, 4455.0, 4456.0, 4457.0, 4458.0],       [4459.0, 4460.0, 4461.0, 4462.0, 4463.0, 4464.0],       [4465.0, 4466.0, 4467.0, 4468.0, 4469.0, 4470.0],       [4471.0, 4472.0, 4473.0, 4474.0, 4475.0, 4476.0],       [4477.0, 4478.0, 4479.0, 4480.0, 4481.0, 4482.0],       [4483.0, 4484.0, 4485.0, 4486.0, 4487.0, 4488.0],       [4489.0, 4490.0, 4491.0, 4492.0, 4493.0, 4494.0]],      [[4495.0, 4496.0, 4497.0, 4498.0, 4499.0, 4500.0],       [4501.0, 4502.0, 4503.0, 4504.0, 4505.0, 4506.0],       [4507.0, 4508.0, 4509.0, 4510.0, 4511.0, 4512.0],       [4513.0, 4514.0, 4515.0, 4516.0, 4517.0, 4518.0],       [4519.0, 4520.0, 4521.0, 4522.0, 4523.0, 4524.0],       [4525.0, 4526.0, 4527.0, 4528.0, 4529.0, 4530.0],       [4531.0, 4532.0, 4533.0, 4534.0, 4535.0, 4536.0]]],     [[[4537.0, 4538.0, 4539.0, 4540.0, 4541.0, 4542.0],       [4543.0, 4544.0, 4545.0, 4546.0, 4547.0, 4548.0],       [4549.0, 4550.0, 4551.0, 4552.0, 4553.0, 4554.0],       [4555.0, 4556.0, 4557.0, 4558.0, 4559.0, 4560.0],       [4561.0, 4562.0, 4563.0, 4564.0, 4565.0, 4566.0],       [4567.0, 4568.0, 4569.0, 4570.0, 4571.0, 4572.0],       [4573.0, 4574.0, 4575.0, 4576.0, 4577.0, 4578.0]],      [[4579.0, 4580.0, 4581.0, 4582.0, 4583.0, 4584.0],       [4585.0, 4586.0, 4587.0, 4588.0, 4589.0, 4590.0],       [4591.0, 4592.0, 4593.0, 4594.0, 4595.0, 4596.0],       [4597.0, 4598.0, 4599.0, 4600.0, 4601.0, 4602.0],       [4603.0, 4604.0, 4605.0, 4606.0, 4607.0, 4608.0],       [4609.0, 4610.0, 4611.0, 4612.0, 4613.0, 4614.0],       [4615.0, 4616.0, 4617.0, 4618.0, 4619.0, 4620.0]],      [[4621.0, 4622.0, 4623.0, 4624.0, 4625.0, 4626.0],       [4627.0, 4628.0, 4629.0, 4630.0, 4631.0, 4632.0],       [4633.0, 4634.0, 4635.0, 4636.0, 4637.0, 4638.0],       [4639.0, 4640.0, 4641.0, 4642.0, 4643.0, 4644.0],       [4645.0, 4646.0, 4647.0, 4648.0, 4649.0, 4650.0],       [4651.0, 4652.0, 4653.0, 4654.0, 4655.0, 4656.0],       [4657.0, 4658.0, 4659.0, 4660.0, 4661.0, 4662.0]],      [[4663.0, 4664.0, 4665.0, 4666.0, 4667.0, 4668.0],       [4669.0, 4670.0, 4671.0, 4672.0, 4673.0, 4674.0],       [4675.0, 4676.0, 4677.0, 4678.0, 4679.0, 4680.0],       [4681.0, 4682.0, 4683.0, 4684.0, 4685.0, 4686.0],       [4687.0, 4688.0, 4689.0, 4690.0, 4691.0, 4692.0],       [4693.0, 4694.0, 4695.0, 4696.0, 4697.0, 4698.0],       [4699.0, 4700.0, 4701.0, 4702.0, 4703.0, 4704.0]],      [[4705.0, 4706.0, 4707.0, 4708.0, 4709.0, 4710.0],       [4711.0, 4712.0, 4713.0, 4714.0, 4715.0, 4716.0],       [4717.0, 4718.0, 4719.0, 4720.0, 4721.0, 4722.0],       [4723.0, 4724.0, 4725.0, 4726.0, 4727.0, 4728.0],       [4729.0, 4730.0, 4731.0, 4732.0, 4733.0, 4734.0],       [4735.0, 4736.0, 4737.0, 4738.0, 4739.0, 4740.0],       [4741.0, 4742.0, 4743.0, 4744.0, 4745.0, 4746.0]],      [[4747.0, 4748.0, 4749.0, 4750.0, 4751.0, 4752.0],       [4753.0, 4754.0, 4755.0, 4756.0, 4757.0, 4758.0],       [4759.0, 4760.0, 4761.0, 4762.0, 4763.0, 4764.0],       [4765.0, 4766.0, 4767.0, 4768.0, 4769.0, 4770.0],       [4771.0, 4772.0, 4773.0, 4774.0, 4775.0, 4776.0],       [4777.0, 4778.0, 4779.0, 4780.0, 4781.0, 4782.0],       [4783.0, 4784.0, 4785.0, 4786.0, 4787.0, 4788.0]]],     [[[4789.0, 4790.0, 4791.0, 4792.0, 4793.0, 4794.0],       [4795.0, 4796.0, 4797.0, 4798.0, 4799.0, 4800.0],       [4801.0, 4802.0, 4803.0, 4804.0, 4805.0, 4806.0],       [4807.0, 4808.0, 4809.0, 4810.0, 4811.0, 4812.0],       [4813.0, 4814.0, 4815.0, 4816.0, 4817.0, 4818.0],       [4819.0, 4820.0, 4821.0, 4822.0, 4823.0, 4824.0],       [4825.0, 4826.0, 4827.0, 4828.0, 4829.0, 4830.0]],      [[4831.0, 4832.0, 4833.0, 4834.0, 4835.0, 4836.0],       [4837.0, 4838.0, 4839.0, 4840.0, 4841.0, 4842.0],       [4843.0, 4844.0, 4845.0, 4846.0, 4847.0, 4848.0],       [4849.0, 4850.0, 4851.0, 4852.0, 4853.0, 4854.0],       [4855.0, 4856.0, 4857.0, 4858.0, 4859.0, 4860.0],       [4861.0, 4862.0, 4863.0, 4864.0, 4865.0, 4866.0],       [4867.0, 4868.0, 4869.0, 4870.0, 4871.0, 4872.0]],      [[4873.0, 4874.0, 4875.0, 4876.0, 4877.0, 4878.0],       [4879.0, 4880.0, 4881.0, 4882.0, 4883.0, 4884.0],       [4885.0, 4886.0, 4887.0, 4888.0, 4889.0, 4890.0],       [4891.0, 4892.0, 4893.0, 4894.0, 4895.0, 4896.0],       [4897.0, 4898.0, 4899.0, 4900.0, 4901.0, 4902.0],       [4903.0, 4904.0, 4905.0, 4906.0, 4907.0, 4908.0],       [4909.0, 4910.0, 4911.0, 4912.0, 4913.0, 4914.0]],      [[4915.0, 4916.0, 4917.0, 4918.0, 4919.0, 4920.0],       [4921.0, 4922.0, 4923.0, 4924.0, 4925.0, 4926.0],       [4927.0, 4928.0, 4929.0, 4930.0, 4931.0, 4932.0],       [4933.0, 4934.0, 4935.0, 4936.0, 4937.0, 4938.0],       [4939.0, 4940.0, 4941.0, 4942.0, 4943.0, 4944.0],       [4945.0, 4946.0, 4947.0, 4948.0, 4949.0, 4950.0],       [4951.0, 4952.0, 4953.0, 4954.0, 4955.0, 4956.0]],      [[4957.0, 4958.0, 4959.0, 4960.0, 4961.0, 4962.0],       [4963.0, 4964.0, 4965.0, 4966.0, 4967.0, 4968.0],       [4969.0, 4970.0, 4971.0, 4972.0, 4973.0, 4974.0],       [4975.0, 4976.0, 4977.0, 4978.0, 4979.0, 4980.0],       [4981.0, 4982.0, 4983.0, 4984.0, 4985.0, 4986.0],       [4987.0, 4988.0, 4989.0, 4990.0, 4991.0, 4992.0],       [4993.0, 4994.0, 4995.0, 4996.0, 4997.0, 4998.0]],      [[4999.0, 5000.0, 5001.0, 5002.0, 5003.0, 5004.0],       [5005.0, 5006.0, 5007.0, 5008.0, 5009.0, 5010.0],       [5011.0, 5012.0, 5013.0, 5014.0, 5015.0, 5016.0],       [5017.0, 5018.0, 5019.0, 5020.0, 5021.0, 5022.0],       [5023.0, 5024.0, 5025.0, 5026.0, 5027.0, 5028.0],       [5029.0, 5030.0, 5031.0, 5032.0, 5033.0, 5034.0],       [5035.0, 5036.0, 5037.0, 5038.0, 5039.0, 5040.0]]]],    [[[[5041.0, 5042.0, 5043.0, 5044.0, 5045.0, 5046.0],       [5047.0, 5048.0, 5049.0, 5050.0, 5051.0, 5052.0],       [5053.0, 5054.0, 5055.0, 5056.0, 5057.0, 5058.0],       [5059.0, 5060.0, 5061.0, 5062.0, 5063.0, 5064.0],       [5065.0, 5066.0, 5067.0, 5068.0, 5069.0, 5070.0],       [5071.0, 5072.0, 5073.0, 5074.0, 5075.0, 5076.0],       [5077.0, 5078.0, 5079.0, 5080.0, 5081.0, 5082.0]],      [[5083.0, 5084.0, 5085.0, 5086.0, 5087.0, 5088.0],       [5089.0, 5090.0, 5091.0, 5092.0, 5093.0, 5094.0],       [5095.0, 5096.0, 5097.0, 5098.0, 5099.0, 5100.0],       [5101.0, 5102.0, 5103.0, 5104.0, 5105.0, 5106.0],       [5107.0, 5108.0, 5109.0, 5110.0, 5111.0, 5112.0],       [5113.0, 5114.0, 5115.0, 5116.0, 5117.0, 5118.0],       [5119.0, 5120.0, 5121.0, 5122.0, 5123.0, 5124.0]],      [[5125.0, 5126.0, 5127.0, 5128.0, 5129.0, 5130.0],       [5131.0, 5132.0, 5133.0, 5134.0, 5135.0, 5136.0],       [5137.0, 5138.0, 5139.0, 5140.0, 5141.0, 5142.0],       [5143.0, 5144.0, 5145.0, 5146.0, 5147.0, 5148.0],       [5149.0, 5150.0, 5151.0, 5152.0, 5153.0, 5154.0],       [5155.0, 5156.0, 5157.0, 5158.0, 5159.0, 5160.0],       [5161.0, 5162.0, 5163.0, 5164.0, 5165.0, 5166.0]],      [[5167.0, 5168.0, 5169.0, 5170.0, 5171.0, 5172.0],       [5173.0, 5174.0, 5175.0, 5176.0, 5177.0, 5178.0],       [5179.0, 5180.0, 5181.0, 5182.0, 5183.0, 5184.0],       [5185.0, 5186.0, 5187.0, 5188.0, 5189.0, 5190.0],       [5191.0, 5192.0, 5193.0, 5194.0, 5195.0, 5196.0],       [5197.0, 5198.0, 5199.0, 5200.0, 5201.0, 5202.0],       [5203.0, 5204.0, 5205.0, 5206.0, 5207.0, 5208.0]],      [[5209.0, 5210.0, 5211.0, 5212.0, 5213.0, 5214.0],       [5215.0, 5216.0, 5217.0, 5218.0, 5219.0, 5220.0],       [5221.0, 5222.0, 5223.0, 5224.0, 5225.0, 5226.0],       [5227.0, 5228.0, 5229.0, 5230.0, 5231.0, 5232.0],       [5233.0, 5234.0, 5235.0, 5236.0, 5237.0, 5238.0],       [5239.0, 5240.0, 5241.0, 5242.0, 5243.0, 5244.0],       [5245.0, 5246.0, 5247.0, 5248.0, 5249.0, 5250.0]],      [[5251.0, 5252.0, 5253.0, 5254.0, 5255.0, 5256.0],       [5257.0, 5258.0, 5259.0, 5260.0, 5261.0, 5262.0],       [5263.0, 5264.0, 5265.0, 5266.0, 5267.0, 5268.0],       [5269.0, 5270.0, 5271.0, 5272.0, 5273.0, 5274.0],       [5275.0, 5276.0, 5277.0, 5278.0, 5279.0, 5280.0],       [5281.0, 5282.0, 5283.0, 5284.0, 5285.0, 5286.0],       [5287.0, 5288.0, 5289.0, 5290.0, 5291.0, 5292.0]]],     [[[5293.0, 5294.0, 5295.0, 5296.0, 5297.0, 5298.0],       [5299.0, 5300.0, 5301.0, 5302.0, 5303.0, 5304.0],       [5305.0, 5306.0, 5307.0, 5308.0, 5309.0, 5310.0],       [5311.0, 5312.0, 5313.0, 5314.0, 5315.0, 5316.0],       [5317.0, 5318.0, 5319.0, 5320.0, 5321.0, 5322.0],       [5323.0, 5324.0, 5325.0, 5326.0, 5327.0, 5328.0],       [5329.0, 5330.0, 5331.0, 5332.0, 5333.0, 5334.0]],      [[5335.0, 5336.0, 5337.0, 5338.0, 5339.0, 5340.0],       [5341.0, 5342.0, 5343.0, 5344.0, 5345.0, 5346.0],       [5347.0, 5348.0, 5349.0, 5350.0, 5351.0, 5352.0],       [5353.0, 5354.0, 5355.0, 5356.0, 5357.0, 5358.0],       [5359.0, 5360.0, 5361.0, 5362.0, 5363.0, 5364.0],       [5365.0, 5366.0, 5367.0, 5368.0, 5369.0, 5370.0],       [5371.0, 5372.0, 5373.0, 5374.0, 5375.0, 5376.0]],      [[5377.0, 5378.0, 5379.0, 5380.0, 5381.0, 5382.0],       [5383.0, 5384.0, 5385.0, 5386.0, 5387.0, 5388.0],       [5389.0, 5390.0, 5391.0, 5392.0, 5393.0, 5394.0],       [5395.0, 5396.0, 5397.0, 5398.0, 5399.0, 5400.0],       [5401.0, 5402.0, 5403.0, 5404.0, 5405.0, 5406.0],       [5407.0, 5408.0, 5409.0, 5410.0, 5411.0, 5412.0],       [5413.0, 5414.0, 5415.0, 5416.0, 5417.0, 5418.0]],      [[5419.0, 5420.0, 5421.0, 5422.0, 5423.0, 5424.0],       [5425.0, 5426.0, 5427.0, 5428.0, 5429.0, 5430.0],       [5431.0, 5432.0, 5433.0, 5434.0, 5435.0, 5436.0],       [5437.0, 5438.0, 5439.0, 5440.0, 5441.0, 5442.0],       [5443.0, 5444.0, 5445.0, 5446.0, 5447.0, 5448.0],       [5449.0, 5450.0, 5451.0, 5452.0, 5453.0, 5454.0],       [5455.0, 5456.0, 5457.0, 5458.0, 5459.0, 5460.0]],      [[5461.0, 5462.0, 5463.0, 5464.0, 5465.0, 5466.0],       [5467.0, 5468.0, 5469.0, 5470.0, 5471.0, 5472.0],       [5473.0, 5474.0, 5475.0, 5476.0, 5477.0, 5478.0],       [5479.0, 5480.0, 5481.0, 5482.0, 5483.0, 5484.0],       [5485.0, 5486.0, 5487.0, 5488.0, 5489.0, 5490.0],       [5491.0, 5492.0, 5493.0, 5494.0, 5495.0, 5496.0],       [5497.0, 5498.0, 5499.0, 5500.0, 5501.0, 5502.0]],      [[5503.0, 5504.0, 5505.0, 5506.0, 5507.0, 5508.0],       [5509.0, 5510.0, 5511.0, 5512.0, 5513.0, 5514.0],       [5515.0, 5516.0, 5517.0, 5518.0, 5519.0, 5520.0],       [5521.0, 5522.0, 5523.0, 5524.0, 5525.0, 5526.0],       [5527.0, 5528.0, 5529.0, 5530.0, 5531.0, 5532.0],       [5533.0, 5534.0, 5535.0, 5536.0, 5537.0, 5538.0],       [5539.0, 5540.0, 5541.0, 5542.0, 5543.0, 5544.0]]],     [[[5545.0, 5546.0, 5547.0, 5548.0, 5549.0, 5550.0],       [5551.0, 5552.0, 5553.0, 5554.0, 5555.0, 5556.0],       [5557.0, 5558.0, 5559.0, 5560.0, 5561.0, 5562.0],       [5563.0, 5564.0, 5565.0, 5566.0, 5567.0, 5568.0],       [5569.0, 5570.0, 5571.0, 5572.0, 5573.0, 5574.0],       [5575.0, 5576.0, 5577.0, 5578.0, 5579.0, 5580.0],       [5581.0, 5582.0, 5583.0, 5584.0, 5585.0, 5586.0]],      [[5587.0, 5588.0, 5589.0, 5590.0, 5591.0, 5592.0],       [5593.0, 5594.0, 5595.0, 5596.0, 5597.0, 5598.0],       [5599.0, 5600.0, 5601.0, 5602.0, 5603.0, 5604.0],       [5605.0, 5606.0, 5607.0, 5608.0, 5609.0, 5610.0],       [5611.0, 5612.0, 5613.0, 5614.0, 5615.0, 5616.0],       [5617.0, 5618.0, 5619.0, 5620.0, 5621.0, 5622.0],       [5623.0, 5624.0, 5625.0, 5626.0, 5627.0, 5628.0]],      [[5629.0, 5630.0, 5631.0, 5632.0, 5633.0, 5634.0],       [5635.0, 5636.0, 5637.0, 5638.0, 5639.0, 5640.0],       [5641.0, 5642.0, 5643.0, 5644.0, 5645.0, 5646.0],       [5647.0, 5648.0, 5649.0, 5650.0, 5651.0, 5652.0],       [5653.0, 5654.0, 5655.0, 5656.0, 5657.0, 5658.0],       [5659.0, 5660.0, 5661.0, 5662.0, 5663.0, 5664.0],       [5665.0, 5666.0, 5667.0, 5668.0, 5669.0, 5670.0]],      [[5671.0, 5672.0, 5673.0, 5674.0, 5675.0, 5676.0],       [5677.0, 5678.0, 5679.0, 5680.0, 5681.0, 5682.0],       [5683.0, 5684.0, 5685.0, 5686.0, 5687.0, 5688.0],       [5689.0, 5690.0, 5691.0, 5692.0, 5693.0, 5694.0],       [5695.0, 5696.0, 5697.0, 5698.0, 5699.0, 5700.0],       [5701.0, 5702.0, 5703.0, 5704.0, 5705.0, 5706.0],       [5707.0, 5708.0, 5709.0, 5710.0, 5711.0, 5712.0]],      [[5713.0, 5714.0, 5715.0, 5716.0, 5717.0, 5718.0],       [5719.0, 5720.0, 5721.0, 5722.0, 5723.0, 5724.0],       [5725.0, 5726.0, 5727.0, 5728.0, 5729.0, 5730.0],       [5731.0, 5732.0, 5733.0, 5734.0, 5735.0, 5736.0],       [5737.0, 5738.0, 5739.0, 5740.0, 5741.0, 5742.0],       [5743.0, 5744.0, 5745.0, 5746.0, 5747.0, 5748.0],       [5749.0, 5750.0, 5751.0, 5752.0, 5753.0, 5754.0]],      [[5755.0, 5756.0, 5757.0, 5758.0, 5759.0, 5760.0],       [5761.0, 5762.0, 5763.0, 5764.0, 5765.0, 5766.0],       [5767.0, 5768.0, 5769.0, 5770.0, 5771.0, 5772.0],       [5773.0, 5774.0, 5775.0, 5776.0, 5777.0, 5778.0],       [5779.0, 5780.0, 5781.0, 5782.0, 5783.0, 5784.0],       [5785.0, 5786.0, 5787.0, 5788.0, 5789.0, 5790.0],       [5791.0, 5792.0, 5793.0, 5794.0, 5795.0, 5796.0]]],     [[[5797.0, 5798.0, 5799.0, 5800.0, 5801.0, 5802.0],       [5803.0, 5804.0, 5805.0, 5806.0, 5807.0, 5808.0],       [5809.0, 5810.0, 5811.0, 5812.0, 5813.0, 5814.0],       [5815.0, 5816.0, 5817.0, 5818.0, 5819.0, 5820.0],       [5821.0, 5822.0, 5823.0, 5824.0, 5825.0, 5826.0],       [5827.0, 5828.0, 5829.0, 5830.0, 5831.0, 5832.0],       [5833.0, 5834.0, 5835.0, 5836.0, 5837.0, 5838.0]],      [[5839.0, 5840.0, 5841.0, 5842.0, 5843.0, 5844.0],       [5845.0, 5846.0, 5847.0, 5848.0, 5849.0, 5850.0],       [5851.0, 5852.0, 5853.0, 5854.0, 5855.0, 5856.0],       [5857.0, 5858.0, 5859.0, 5860.0, 5861.0, 5862.0],       [5863.0, 5864.0, 5865.0, 5866.0, 5867.0, 5868.0],       [5869.0, 5870.0, 5871.0, 5872.0, 5873.0, 5874.0],       [5875.0, 5876.0, 5877.0, 5878.0, 5879.0, 5880.0]],      [[5881.0, 5882.0, 5883.0, 5884.0, 5885.0, 5886.0],       [5887.0, 5888.0, 5889.0, 5890.0, 5891.0, 5892.0],       [5893.0, 5894.0, 5895.0, 5896.0, 5897.0, 5898.0],       [5899.0, 5900.0, 5901.0, 5902.0, 5903.0, 5904.0],       [5905.0, 5906.0, 5907.0, 5908.0, 5909.0, 5910.0],       [5911.0, 5912.0, 5913.0, 5914.0, 5915.0, 5916.0],       [5917.0, 5918.0, 5919.0, 5920.0, 5921.0, 5922.0]],      [[5923.0, 5924.0, 5925.0, 5926.0, 5927.0, 5928.0],       [5929.0, 5930.0, 5931.0, 5932.0, 5933.0, 5934.0],       [5935.0, 5936.0, 5937.0, 5938.0, 5939.0, 5940.0],       [5941.0, 5942.0, 5943.0, 5944.0, 5945.0, 5946.0],       [5947.0, 5948.0, 5949.0, 5950.0, 5951.0, 5952.0],       [5953.0, 5954.0, 5955.0, 5956.0, 5957.0, 5958.0],       [5959.0, 5960.0, 5961.0, 5962.0, 5963.0, 5964.0]],      [[5965.0, 5966.0, 5967.0, 5968.0, 5969.0, 5970.0],       [5971.0, 5972.0, 5973.0, 5974.0, 5975.0, 5976.0],       [5977.0, 5978.0, 5979.0, 5980.0, 5981.0, 5982.0],       [5983.0, 5984.0, 5985.0, 5986.0, 5987.0, 5988.0],       [5989.0, 5990.0, 5991.0, 5992.0, 5993.0, 5994.0],       [5995.0, 5996.0, 5997.0, 5998.0, 5999.0, 6000.0],       [6001.0, 6002.0, 6003.0, 6004.0, 6005.0, 6006.0]],      [[6007.0, 6008.0, 6009.0, 6010.0, 6011.0, 6012.0],       [6013.0, 6014.0, 6015.0, 6016.0, 6017.0, 6018.0],       [6019.0, 6020.0, 6021.0, 6022.0, 6023.0, 6024.0],       [6025.0, 6026.0, 6027.0, 6028.0, 6029.0, 6030.0],       [6031.0, 6032.0, 6033.0, 6034.0, 6035.0, 6036.0],       [6037.0, 6038.0, 6039.0, 6040.0, 6041.0, 6042.0],       [6043.0, 6044.0, 6045.0, 6046.0, 6047.0, 6048.0]]]]],   [[[[[6049.0, 6050.0, 6051.0, 6052.0, 6053.0, 6054.0],       [6055.0, 6056.0, 6057.0, 6058.0, 6059.0, 6060.0],       [6061.0, 6062.0, 6063.0, 6064.0, 6065.0, 6066.0],       [6067.0, 6068.0, 6069.0, 6070.0, 6071.0, 6072.0],       [6073.0, 6074.0, 6075.0, 6076.0, 6077.0, 6078.0],       [6079.0, 6080.0, 6081.0, 6082.0, 6083.0, 6084.0],       [6085.0, 6086.0, 6087.0, 6088.0, 6089.0, 6090.0]],      [[6091.0, 6092.0, 6093.0, 6094.0, 6095.0, 6096.0],       [6097.0, 6098.0, 6099.0, 6100.0, 6101.0, 6102.0],       [6103.0, 6104.0, 6105.0, 6106.0, 6107.0, 6108.0],       [6109.0, 6110.0, 6111.0, 6112.0, 6113.0, 6114.0],       [6115.0, 6116.0, 6117.0, 6118.0, 6119.0, 6120.0],       [6121.0, 6122.0, 6123.0, 6124.0, 6125.0, 6126.0],       [6127.0, 6128.0, 6129.0, 6130.0, 6131.0, 6132.0]],      [[6133.0, 6134.0, 6135.0, 6136.0, 6137.0, 6138.0],       [6139.0, 6140.0, 6141.0, 6142.0, 6143.0, 6144.0],       [6145.0, 6146.0, 6147.0, 6148.0, 6149.0, 6150.0],       [6151.0, 6152.0, 6153.0, 6154.0, 6155.0, 6156.0],       [6157.0, 6158.0, 6159.0, 6160.0, 6161.0, 6162.0],       [6163.0, 6164.0, 6165.0, 6166.0, 6167.0, 6168.0],       [6169.0, 6170.0, 6171.0, 6172.0, 6173.0, 6174.0]],      [[6175.0, 6176.0, 6177.0, 6178.0, 6179.0, 6180.0],       [6181.0, 6182.0, 6183.0, 6184.0, 6185.0, 6186.0],       [6187.0, 6188.0, 6189.0, 6190.0, 6191.0, 6192.0],       [6193.0, 6194.0, 6195.0, 6196.0, 6197.0, 6198.0],       [6199.0, 6200.0, 6201.0, 6202.0, 6203.0, 6204.0],       [6205.0, 6206.0, 6207.0, 6208.0, 6209.0, 6210.0],       [6211.0, 6212.0, 6213.0, 6214.0, 6215.0, 6216.0]],      [[6217.0, 6218.0, 6219.0, 6220.0, 6221.0, 6222.0],       [6223.0, 6224.0, 6225.0, 6226.0, 6227.0, 6228.0],       [6229.0, 6230.0, 6231.0, 6232.0, 6233.0, 6234.0],       [6235.0, 6236.0, 6237.0, 6238.0, 6239.0, 6240.0],       [6241.0, 6242.0, 6243.0, 6244.0, 6245.0, 6246.0],       [6247.0, 6248.0, 6249.0, 6250.0, 6251.0, 6252.0],       [6253.0, 6254.0, 6255.0, 6256.0, 6257.0, 6258.0]],      [[6259.0, 6260.0, 6261.0, 6262.0, 6263.0, 6264.0],       [6265.0, 6266.0, 6267.0, 6268.0, 6269.0, 6270.0],       [6271.0, 6272.0, 6273.0, 6274.0, 6275.0, 6276.0],       [6277.0, 6278.0, 6279.0, 6280.0, 6281.0, 6282.0],       [6283.0, 6284.0, 6285.0, 6286.0, 6287.0, 6288.0],       [6289.0, 6290.0, 6291.0, 6292.0, 6293.0, 6294.0],       [6295.0, 6296.0, 6297.0, 6298.0, 6299.0, 6300.0]]],     [[[6301.0, 6302.0, 6303.0, 6304.0, 6305.0, 6306.0],       [6307.0, 6308.0, 6309.0, 6310.0, 6311.0, 6312.0],       [6313.0, 6314.0, 6315.0, 6316.0, 6317.0, 6318.0],       [6319.0, 6320.0, 6321.0, 6322.0, 6323.0, 6324.0],       [6325.0, 6326.0, 6327.0, 6328.0, 6329.0, 6330.0],       [6331.0, 6332.0, 6333.0, 6334.0, 6335.0, 6336.0],       [6337.0, 6338.0, 6339.0, 6340.0, 6341.0, 6342.0]],      [[6343.0, 6344.0, 6345.0, 6346.0, 6347.0, 6348.0],       [6349.0, 6350.0, 6351.0, 6352.0, 6353.0, 6354.0],       [6355.0, 6356.0, 6357.0, 6358.0, 6359.0, 6360.0],       [6361.0, 6362.0, 6363.0, 6364.0, 6365.0, 6366.0],       [6367.0, 6368.0, 6369.0, 6370.0, 6371.0, 6372.0],       [6373.0, 6374.0, 6375.0, 6376.0, 6377.0, 6378.0],       [6379.0, 6380.0, 6381.0, 6382.0, 6383.0, 6384.0]],      [[6385.0, 6386.0, 6387.0, 6388.0, 6389.0, 6390.0],       [6391.0, 6392.0, 6393.0, 6394.0, 6395.0, 6396.0],       [6397.0, 6398.0, 6399.0, 6400.0, 6401.0, 6402.0],       [6403.0, 6404.0, 6405.0, 6406.0, 6407.0, 6408.0],       [6409.0, 6410.0, 6411.0, 6412.0, 6413.0, 6414.0],       [6415.0, 6416.0, 6417.0, 6418.0, 6419.0, 6420.0],       [6421.0, 6422.0, 6423.0, 6424.0, 6425.0, 6426.0]],      [[6427.0, 6428.0, 6429.0, 6430.0, 6431.0, 6432.0],       [6433.0, 6434.0, 6435.0, 6436.0, 6437.0, 6438.0],       [6439.0, 6440.0, 6441.0, 6442.0, 6443.0, 6444.0],       [6445.0, 6446.0, 6447.0, 6448.0, 6449.0, 6450.0],       [6451.0, 6452.0, 6453.0, 6454.0, 6455.0, 6456.0],       [6457.0, 6458.0, 6459.0, 6460.0, 6461.0, 6462.0],       [6463.0, 6464.0, 6465.0, 6466.0, 6467.0, 6468.0]],      [[6469.0, 6470.0, 6471.0, 6472.0, 6473.0, 6474.0],       [6475.0, 6476.0, 6477.0, 6478.0, 6479.0, 6480.0],       [6481.0, 6482.0, 6483.0, 6484.0, 6485.0, 6486.0],       [6487.0, 6488.0, 6489.0, 6490.0, 6491.0, 6492.0],       [6493.0, 6494.0, 6495.0, 6496.0, 6497.0, 6498.0],       [6499.0, 6500.0, 6501.0, 6502.0, 6503.0, 6504.0],       [6505.0, 6506.0, 6507.0, 6508.0, 6509.0, 6510.0]],      [[6511.0, 6512.0, 6513.0, 6514.0, 6515.0, 6516.0],       [6517.0, 6518.0, 6519.0, 6520.0, 6521.0, 6522.0],       [6523.0, 6524.0, 6525.0, 6526.0, 6527.0, 6528.0],       [6529.0, 6530.0, 6531.0, 6532.0, 6533.0, 6534.0],       [6535.0, 6536.0, 6537.0, 6538.0, 6539.0, 6540.0],       [6541.0, 6542.0, 6543.0, 6544.0, 6545.0, 6546.0],       [6547.0, 6548.0, 6549.0, 6550.0, 6551.0, 6552.0]]],     [[[6553.0, 6554.0, 6555.0, 6556.0, 6557.0, 6558.0],       [6559.0, 6560.0, 6561.0, 6562.0, 6563.0, 6564.0],       [6565.0, 6566.0, 6567.0, 6568.0, 6569.0, 6570.0],       [6571.0, 6572.0, 6573.0, 6574.0, 6575.0, 6576.0],       [6577.0, 6578.0, 6579.0, 6580.0, 6581.0, 6582.0],       [6583.0, 6584.0, 6585.0, 6586.0, 6587.0, 6588.0],       [6589.0, 6590.0, 6591.0, 6592.0, 6593.0, 6594.0]],      [[6595.0, 6596.0, 6597.0, 6598.0, 6599.0, 6600.0],       [6601.0, 6602.0, 6603.0, 6604.0, 6605.0, 6606.0],       [6607.0, 6608.0, 6609.0, 6610.0, 6611.0, 6612.0],       [6613.0, 6614.0, 6615.0, 6616.0, 6617.0, 6618.0],       [6619.0, 6620.0, 6621.0, 6622.0, 6623.0, 6624.0],       [6625.0, 6626.0, 6627.0, 6628.0, 6629.0, 6630.0],       [6631.0, 6632.0, 6633.0, 6634.0, 6635.0, 6636.0]],      [[6637.0, 6638.0, 6639.0, 6640.0, 6641.0, 6642.0],       [6643.0, 6644.0, 6645.0, 6646.0, 6647.0, 6648.0],       [6649.0, 6650.0, 6651.0, 6652.0, 6653.0, 6654.0],       [6655.0, 6656.0, 6657.0, 6658.0, 6659.0, 6660.0],       [6661.0, 6662.0, 6663.0, 6664.0, 6665.0, 6666.0],       [6667.0, 6668.0, 6669.0, 6670.0, 6671.0, 6672.0],       [6673.0, 6674.0, 6675.0, 6676.0, 6677.0, 6678.0]],      [[6679.0, 6680.0, 6681.0, 6682.0, 6683.0, 6684.0],       [6685.0, 6686.0, 6687.0, 6688.0, 6689.0, 6690.0],       [6691.0, 6692.0, 6693.0, 6694.0, 6695.0, 6696.0],       [6697.0, 6698.0, 6699.0, 6700.0, 6701.0, 6702.0],       [6703.0, 6704.0, 6705.0, 6706.0, 6707.0, 6708.0],       [6709.0, 6710.0, 6711.0, 6712.0, 6713.0, 6714.0],       [6715.0, 6716.0, 6717.0, 6718.0, 6719.0, 6720.0]],      [[6721.0, 6722.0, 6723.0, 6724.0, 6725.0, 6726.0],       [6727.0, 6728.0, 6729.0, 6730.0, 6731.0, 6732.0],       [6733.0, 6734.0, 6735.0, 6736.0, 6737.0, 6738.0],       [6739.0, 6740.0, 6741.0, 6742.0, 6743.0, 6744.0],       [6745.0, 6746.0, 6747.0, 6748.0, 6749.0, 6750.0],       [6751.0, 6752.0, 6753.0, 6754.0, 6755.0, 6756.0],       [6757.0, 6758.0, 6759.0, 6760.0, 6761.0, 6762.0]],      [[6763.0, 6764.0, 6765.0, 6766.0, 6767.0, 6768.0],       [6769.0, 6770.0, 6771.0, 6772.0, 6773.0, 6774.0],       [6775.0, 6776.0, 6777.0, 6778.0, 6779.0, 6780.0],       [6781.0, 6782.0, 6783.0, 6784.0, 6785.0, 6786.0],       [6787.0, 6788.0, 6789.0, 6790.0, 6791.0, 6792.0],       [6793.0, 6794.0, 6795.0, 6796.0, 6797.0, 6798.0],       [6799.0, 6800.0, 6801.0, 6802.0, 6803.0, 6804.0]]],     [[[6805.0, 6806.0, 6807.0, 6808.0, 6809.0, 6810.0],       [6811.0, 6812.0, 6813.0, 6814.0, 6815.0, 6816.0],       [6817.0, 6818.0, 6819.0, 6820.0, 6821.0, 6822.0],       [6823.0, 6824.0, 6825.0, 6826.0, 6827.0, 6828.0],       [6829.0, 6830.0, 6831.0, 6832.0, 6833.0, 6834.0],       [6835.0, 6836.0, 6837.0, 6838.0, 6839.0, 6840.0],       [6841.0, 6842.0, 6843.0, 6844.0, 6845.0, 6846.0]],      [[6847.0, 6848.0, 6849.0, 6850.0, 6851.0, 6852.0],       [6853.0, 6854.0, 6855.0, 6856.0, 6857.0, 6858.0],       [6859.0, 6860.0, 6861.0, 6862.0, 6863.0, 6864.0],       [6865.0, 6866.0, 6867.0, 6868.0, 6869.0, 6870.0],       [6871.0, 6872.0, 6873.0, 6874.0, 6875.0, 6876.0],       [6877.0, 6878.0, 6879.0, 6880.0, 6881.0, 6882.0],       [6883.0, 6884.0, 6885.0, 6886.0, 6887.0, 6888.0]],      [[6889.0, 6890.0, 6891.0, 6892.0, 6893.0, 6894.0],       [6895.0, 6896.0, 6897.0, 6898.0, 6899.0, 6900.0],       [6901.0, 6902.0, 6903.0, 6904.0, 6905.0, 6906.0],       [6907.0, 6908.0, 6909.0, 6910.0, 6911.0, 6912.0],       [6913.0, 6914.0, 6915.0, 6916.0, 6917.0, 6918.0],       [6919.0, 6920.0, 6921.0, 6922.0, 6923.0, 6924.0],       [6925.0, 6926.0, 6927.0, 6928.0, 6929.0, 6930.0]],      [[6931.0, 6932.0, 6933.0, 6934.0, 6935.0, 6936.0],       [6937.0, 6938.0, 6939.0, 6940.0, 6941.0, 6942.0],       [6943.0, 6944.0, 6945.0, 6946.0, 6947.0, 6948.0],       [6949.0, 6950.0, 6951.0, 6952.0, 6953.0, 6954.0],       [6955.0, 6956.0, 6957.0, 6958.0, 6959.0, 6960.0],       [6961.0, 6962.0, 6963.0, 6964.0, 6965.0, 6966.0],       [6967.0, 6968.0, 6969.0, 6970.0, 6971.0, 6972.0]],      [[6973.0, 6974.0, 6975.0, 6976.0, 6977.0, 6978.0],       [6979.0, 6980.0, 6981.0, 6982.0, 6983.0, 6984.0],       [6985.0, 6986.0, 6987.0, 6988.0, 6989.0, 6990.0],       [6991.0, 6992.0, 6993.0, 6994.0, 6995.0, 6996.0],       [6997.0, 6998.0, 6999.0, 7000.0, 7001.0, 7002.0],       [7003.0, 7004.0, 7005.0, 7006.0, 7007.0, 7008.0],       [7009.0, 7010.0, 7011.0, 7012.0, 7013.0, 7014.0]],      [[7015.0, 7016.0, 7017.0, 7018.0, 7019.0, 7020.0],       [7021.0, 7022.0, 7023.0, 7024.0, 7025.0, 7026.0],       [7027.0, 7028.0, 7029.0, 7030.0, 7031.0, 7032.0],       [7033.0, 7034.0, 7035.0, 7036.0, 7037.0, 7038.0],       [7039.0, 7040.0, 7041.0, 7042.0, 7043.0, 7044.0],       [7045.0, 7046.0, 7047.0, 7048.0, 7049.0, 7050.0],       [7051.0, 7052.0, 7053.0, 7054.0, 7055.0, 7056.0]]]],    [[[[7057.0, 7058.0, 7059.0, 7060.0, 7061.0, 7062.0],       [7063.0, 7064.0, 7065.0, 7066.0, 7067.0, 7068.0],       [7069.0, 7070.0, 7071.0, 7072.0, 7073.0, 7074.0],       [7075.0, 7076.0, 7077.0, 7078.0, 7079.0, 7080.0],       [7081.0, 7082.0, 7083.0, 7084.0, 7085.0, 7086.0],       [7087.0, 7088.0, 7089.0, 7090.0, 7091.0, 7092.0],       [7093.0, 7094.0, 7095.0, 7096.0, 7097.0, 7098.0]],      [[7099.0, 7100.0, 7101.0, 7102.0, 7103.0, 7104.0],       [7105.0, 7106.0, 7107.0, 7108.0, 7109.0, 7110.0],       [7111.0, 7112.0, 7113.0, 7114.0, 7115.0, 7116.0],       [7117.0, 7118.0, 7119.0, 7120.0, 7121.0, 7122.0],       [7123.0, 7124.0, 7125.0, 7126.0, 7127.0, 7128.0],       [7129.0, 7130.0, 7131.0, 7132.0, 7133.0, 7134.0],       [7135.0, 7136.0, 7137.0, 7138.0, 7139.0, 7140.0]],      [[7141.0, 7142.0, 7143.0, 7144.0, 7145.0, 7146.0],       [7147.0, 7148.0, 7149.0, 7150.0, 7151.0, 7152.0],       [7153.0, 7154.0, 7155.0, 7156.0, 7157.0, 7158.0],       [7159.0, 7160.0, 7161.0, 7162.0, 7163.0, 7164.0],       [7165.0, 7166.0, 7167.0, 7168.0, 7169.0, 7170.0],       [7171.0, 7172.0, 7173.0, 7174.0, 7175.0, 7176.0],       [7177.0, 7178.0, 7179.0, 7180.0, 7181.0, 7182.0]],      [[7183.0, 7184.0, 7185.0, 7186.0, 7187.0, 7188.0],       [7189.0, 7190.0, 7191.0, 7192.0, 7193.0, 7194.0],       [7195.0, 7196.0, 7197.0, 7198.0, 7199.0, 7200.0],       [7201.0, 7202.0, 7203.0, 7204.0, 7205.0, 7206.0],       [7207.0, 7208.0, 7209.0, 7210.0, 7211.0, 7212.0],       [7213.0, 7214.0, 7215.0, 7216.0, 7217.0, 7218.0],       [7219.0, 7220.0, 7221.0, 7222.0, 7223.0, 7224.0]],      [[7225.0, 7226.0, 7227.0, 7228.0, 7229.0, 7230.0],       [7231.0, 7232.0, 7233.0, 7234.0, 7235.0, 7236.0],       [7237.0, 7238.0, 7239.0, 7240.0, 7241.0, 7242.0],       [7243.0, 7244.0, 7245.0, 7246.0, 7247.0, 7248.0],       [7249.0, 7250.0, 7251.0, 7252.0, 7253.0, 7254.0],       [7255.0, 7256.0, 7257.0, 7258.0, 7259.0, 7260.0],       [7261.0, 7262.0, 7263.0, 7264.0, 7265.0, 7266.0]],      [[7267.0, 7268.0, 7269.0, 7270.0, 7271.0, 7272.0],       [7273.0, 7274.0, 7275.0, 7276.0, 7277.0, 7278.0],       [7279.0, 7280.0, 7281.0, 7282.0, 7283.0, 7284.0],       [7285.0, 7286.0, 7287.0, 7288.0, 7289.0, 7290.0],       [7291.0, 7292.0, 7293.0, 7294.0, 7295.0, 7296.0],       [7297.0, 7298.0, 7299.0, 7300.0, 7301.0, 7302.0],       [7303.0, 7304.0, 7305.0, 7306.0, 7307.0, 7308.0]]],     [[[7309.0, 7310.0, 7311.0, 7312.0, 7313.0, 7314.0],       [7315.0, 7316.0, 7317.0, 7318.0, 7319.0, 7320.0],       [7321.0, 7322.0, 7323.0, 7324.0, 7325.0, 7326.0],       [7327.0, 7328.0, 7329.0, 7330.0, 7331.0, 7332.0],       [7333.0, 7334.0, 7335.0, 7336.0, 7337.0, 7338.0],       [7339.0, 7340.0, 7341.0, 7342.0, 7343.0, 7344.0],       [7345.0, 7346.0, 7347.0, 7348.0, 7349.0, 7350.0]],      [[7351.0, 7352.0, 7353.0, 7354.0, 7355.0, 7356.0],       [7357.0, 7358.0, 7359.0, 7360.0, 7361.0, 7362.0],       [7363.0, 7364.0, 7365.0, 7366.0, 7367.0, 7368.0],       [7369.0, 7370.0, 7371.0, 7372.0, 7373.0, 7374.0],       [7375.0, 7376.0, 7377.0, 7378.0, 7379.0, 7380.0],       [7381.0, 7382.0, 7383.0, 7384.0, 7385.0, 7386.0],       [7387.0, 7388.0, 7389.0, 7390.0, 7391.0, 7392.0]],      [[7393.0, 7394.0, 7395.0, 7396.0, 7397.0, 7398.0],       [7399.0, 7400.0, 7401.0, 7402.0, 7403.0, 7404.0],       [7405.0, 7406.0, 7407.0, 7408.0, 7409.0, 7410.0],       [7411.0, 7412.0, 7413.0, 7414.0, 7415.0, 7416.0],       [7417.0, 7418.0, 7419.0, 7420.0, 7421.0, 7422.0],       [7423.0, 7424.0, 7425.0, 7426.0, 7427.0, 7428.0],       [7429.0, 7430.0, 7431.0, 7432.0, 7433.0, 7434.0]],      [[7435.0, 7436.0, 7437.0, 7438.0, 7439.0, 7440.0],       [7441.0, 7442.0, 7443.0, 7444.0, 7445.0, 7446.0],       [7447.0, 7448.0, 7449.0, 7450.0, 7451.0, 7452.0],       [7453.0, 7454.0, 7455.0, 7456.0, 7457.0, 7458.0],       [7459.0, 7460.0, 7461.0, 7462.0, 7463.0, 7464.0],       [7465.0, 7466.0, 7467.0, 7468.0, 7469.0, 7470.0],       [7471.0, 7472.0, 7473.0, 7474.0, 7475.0, 7476.0]],      [[7477.0, 7478.0, 7479.0, 7480.0, 7481.0, 7482.0],       [7483.0, 7484.0, 7485.0, 7486.0, 7487.0, 7488.0],       [7489.0, 7490.0, 7491.0, 7492.0, 7493.0, 7494.0],       [7495.0, 7496.0, 7497.0, 7498.0, 7499.0, 7500.0],       [7501.0, 7502.0, 7503.0, 7504.0, 7505.0, 7506.0],       [7507.0, 7508.0, 7509.0, 7510.0, 7511.0, 7512.0],       [7513.0, 7514.0, 7515.0, 7516.0, 7517.0, 7518.0]],      [[7519.0, 7520.0, 7521.0, 7522.0, 7523.0, 7524.0],       [7525.0, 7526.0, 7527.0, 7528.0, 7529.0, 7530.0],       [7531.0, 7532.0, 7533.0, 7534.0, 7535.0, 7536.0],       [7537.0, 7538.0, 7539.0, 7540.0, 7541.0, 7542.0],       [7543.0, 7544.0, 7545.0, 7546.0, 7547.0, 7548.0],       [7549.0, 7550.0, 7551.0, 7552.0, 7553.0, 7554.0],       [7555.0, 7556.0, 7557.0, 7558.0, 7559.0, 7560.0]]],     [[[7561.0, 7562.0, 7563.0, 7564.0, 7565.0, 7566.0],       [7567.0, 7568.0, 7569.0, 7570.0, 7571.0, 7572.0],       [7573.0, 7574.0, 7575.0, 7576.0, 7577.0, 7578.0],       [7579.0, 7580.0, 7581.0, 7582.0, 7583.0, 7584.0],       [7585.0, 7586.0, 7587.0, 7588.0, 7589.0, 7590.0],       [7591.0, 7592.0, 7593.0, 7594.0, 7595.0, 7596.0],       [7597.0, 7598.0, 7599.0, 7600.0, 7601.0, 7602.0]],      [[7603.0, 7604.0, 7605.0, 7606.0, 7607.0, 7608.0],       [7609.0, 7610.0, 7611.0, 7612.0, 7613.0, 7614.0],       [7615.0, 7616.0, 7617.0, 7618.0, 7619.0, 7620.0],       [7621.0, 7622.0, 7623.0, 7624.0, 7625.0, 7626.0],       [7627.0, 7628.0, 7629.0, 7630.0, 7631.0, 7632.0],       [7633.0, 7634.0, 7635.0, 7636.0, 7637.0, 7638.0],       [7639.0, 7640.0, 7641.0, 7642.0, 7643.0, 7644.0]],      [[7645.0, 7646.0, 7647.0, 7648.0, 7649.0, 7650.0],       [7651.0, 7652.0, 7653.0, 7654.0, 7655.0, 7656.0],       [7657.0, 7658.0, 7659.0, 7660.0, 7661.0, 7662.0],       [7663.0, 7664.0, 7665.0, 7666.0, 7667.0, 7668.0],       [7669.0, 7670.0, 7671.0, 7672.0, 7673.0, 7674.0],       [7675.0, 7676.0, 7677.0, 7678.0, 7679.0, 7680.0],       [7681.0, 7682.0, 7683.0, 7684.0, 7685.0, 7686.0]],      [[7687.0, 7688.0, 7689.0, 7690.0, 7691.0, 7692.0],       [7693.0, 7694.0, 7695.0, 7696.0, 7697.0, 7698.0],       [7699.0, 7700.0, 7701.0, 7702.0, 7703.0, 7704.0],       [7705.0, 7706.0, 7707.0, 7708.0, 7709.0, 7710.0],       [7711.0, 7712.0, 7713.0, 7714.0, 7715.0, 7716.0],       [7717.0, 7718.0, 7719.0, 7720.0, 7721.0, 7722.0],       [7723.0, 7724.0, 7725.0, 7726.0, 7727.0, 7728.0]],      [[7729.0, 7730.0, 7731.0, 7732.0, 7733.0, 7734.0],       [7735.0, 7736.0, 7737.0, 7738.0, 7739.0, 7740.0],       [7741.0, 7742.0, 7743.0, 7744.0, 7745.0, 7746.0],       [7747.0, 7748.0, 7749.0, 7750.0, 7751.0, 7752.0],       [7753.0, 7754.0, 7755.0, 7756.0, 7757.0, 7758.0],       [7759.0, 7760.0, 7761.0, 7762.0, 7763.0, 7764.0],       [7765.0, 7766.0, 7767.0, 7768.0, 7769.0, 7770.0]],      [[7771.0, 7772.0, 7773.0, 7774.0, 7775.0, 7776.0],       [7777.0, 7778.0, 7779.0, 7780.0, 7781.0, 7782.0],       [7783.0, 7784.0, 7785.0, 7786.0, 7787.0, 7788.0],       [7789.0, 7790.0, 7791.0, 7792.0, 7793.0, 7794.0],       [7795.0, 7796.0, 7797.0, 7798.0, 7799.0, 7800.0],       [7801.0, 7802.0, 7803.0, 7804.0, 7805.0, 7806.0],       [7807.0, 7808.0, 7809.0, 7810.0, 7811.0, 7812.0]]],     [[[7813.0, 7814.0, 7815.0, 7816.0, 7817.0, 7818.0],       [7819.0, 7820.0, 7821.0, 7822.0, 7823.0, 7824.0],       [7825.0, 7826.0, 7827.0, 7828.0, 7829.0, 7830.0],       [7831.0, 7832.0, 7833.0, 7834.0, 7835.0, 7836.0],       [7837.0, 7838.0, 7839.0, 7840.0, 7841.0, 7842.0],       [7843.0, 7844.0, 7845.0, 7846.0, 7847.0, 7848.0],       [7849.0, 7850.0, 7851.0, 7852.0, 7853.0, 7854.0]],      [[7855.0, 7856.0, 7857.0, 7858.0, 7859.0, 7860.0],       [7861.0, 7862.0, 7863.0, 7864.0, 7865.0, 7866.0],       [7867.0, 7868.0, 7869.0, 7870.0, 7871.0, 7872.0],       [7873.0, 7874.0, 7875.0, 7876.0, 7877.0, 7878.0],       [7879.0, 7880.0, 7881.0, 7882.0, 7883.0, 7884.0],       [7885.0, 7886.0, 7887.0, 7888.0, 7889.0, 7890.0],       [7891.0, 7892.0, 7893.0, 7894.0, 7895.0, 7896.0]],      [[7897.0, 7898.0, 7899.0, 7900.0, 7901.0, 7902.0],       [7903.0, 7904.0, 7905.0, 7906.0, 7907.0, 7908.0],       [7909.0, 7910.0, 7911.0, 7912.0, 7913.0, 7914.0],       [7915.0, 7916.0, 7917.0, 7918.0, 7919.0, 7920.0],       [7921.0, 7922.0, 7923.0, 7924.0, 7925.0, 7926.0],       [7927.0, 7928.0, 7929.0, 7930.0, 7931.0, 7932.0],       [7933.0, 7934.0, 7935.0, 7936.0, 7937.0, 7938.0]],      [[7939.0, 7940.0, 7941.0, 7942.0, 7943.0, 7944.0],       [7945.0, 7946.0, 7947.0, 7948.0, 7949.0, 7950.0],       [7951.0, 7952.0, 7953.0, 7954.0, 7955.0, 7956.0],       [7957.0, 7958.0, 7959.0, 7960.0, 7961.0, 7962.0],       [7963.0, 7964.0, 7965.0, 7966.0, 7967.0, 7968.0],       [7969.0, 7970.0, 7971.0, 7972.0, 7973.0, 7974.0],       [7975.0, 7976.0, 7977.0, 7978.0, 7979.0, 7980.0]],      [[7981.0, 7982.0, 7983.0, 7984.0, 7985.0, 7986.0],       [7987.0, 7988.0, 7989.0, 7990.0, 7991.0, 7992.0],       [7993.0, 7994.0, 7995.0, 7996.0, 7997.0, 7998.0],       [7999.0, 8000.0, 8001.0, 8002.0, 8003.0, 8004.0],       [8005.0, 8006.0, 8007.0, 8008.0, 8009.0, 8010.0],       [8011.0, 8012.0, 8013.0, 8014.0, 8015.0, 8016.0],       [8017.0, 8018.0, 8019.0, 8020.0, 8021.0, 8022.0]],      [[8023.0, 8024.0, 8025.0, 8026.0, 8027.0, 8028.0],       [8029.0, 8030.0, 8031.0, 8032.0, 8033.0, 8034.0],       [8035.0, 8036.0, 8037.0, 8038.0, 8039.0, 8040.0],       [8041.0, 8042.0, 8043.0, 8044.0, 8045.0, 8046.0],       [8047.0, 8048.0, 8049.0, 8050.0, 8051.0, 8052.0],       [8053.0, 8054.0, 8055.0, 8056.0, 8057.0, 8058.0],       [8059.0, 8060.0, 8061.0, 8062.0, 8063.0, 8064.0]]]],    [[[[8065.0, 8066.0, 8067.0, 8068.0, 8069.0, 8070.0],       [8071.0, 8072.0, 8073.0, 8074.0, 8075.0, 8076.0],       [8077.0, 8078.0, 8079.0, 8080.0, 8081.0, 8082.0],       [8083.0, 8084.0, 8085.0, 8086.0, 8087.0, 8088.0],       [8089.0, 8090.0, 8091.0, 8092.0, 8093.0, 8094.0],       [8095.0, 8096.0, 8097.0, 8098.0, 8099.0, 8100.0],       [8101.0, 8102.0, 8103.0, 8104.0, 8105.0, 8106.0]],      [[8107.0, 8108.0, 8109.0, 8110.0, 8111.0, 8112.0],       [8113.0, 8114.0, 8115.0, 8116.0, 8117.0, 8118.0],       [8119.0, 8120.0, 8121.0, 8122.0, 8123.0, 8124.0],       [8125.0, 8126.0, 8127.0, 8128.0, 8129.0, 8130.0],       [8131.0, 8132.0, 8133.0, 8134.0, 8135.0, 8136.0],       [8137.0, 8138.0, 8139.0, 8140.0, 8141.0, 8142.0],       [8143.0, 8144.0, 8145.0, 8146.0, 8147.0, 8148.0]],      [[8149.0, 8150.0, 8151.0, 8152.0, 8153.0, 8154.0],       [8155.0, 8156.0, 8157.0, 8158.0, 8159.0, 8160.0],       [8161.0, 8162.0, 8163.0, 8164.0, 8165.0, 8166.0],       [8167.0, 8168.0, 8169.0, 8170.0, 8171.0, 8172.0],       [8173.0, 8174.0, 8175.0, 8176.0, 8177.0, 8178.0],       [8179.0, 8180.0, 8181.0, 8182.0, 8183.0, 8184.0],       [8185.0, 8186.0, 8187.0, 8188.0, 8189.0, 8190.0]],      [[8191.0, 8192.0, 8193.0, 8194.0, 8195.0, 8196.0],       [8197.0, 8198.0, 8199.0, 8200.0, 8201.0, 8202.0],       [8203.0, 8204.0, 8205.0, 8206.0, 8207.0, 8208.0],       [8209.0, 8210.0, 8211.0, 8212.0, 8213.0, 8214.0],       [8215.0, 8216.0, 8217.0, 8218.0, 8219.0, 8220.0],       [8221.0, 8222.0, 8223.0, 8224.0, 8225.0, 8226.0],       [8227.0, 8228.0, 8229.0, 8230.0, 8231.0, 8232.0]],      [[8233.0, 8234.0, 8235.0, 8236.0, 8237.0, 8238.0],       [8239.0, 8240.0, 8241.0, 8242.0, 8243.0, 8244.0],       [8245.0, 8246.0, 8247.0, 8248.0, 8249.0, 8250.0],       [8251.0, 8252.0, 8253.0, 8254.0, 8255.0, 8256.0],       [8257.0, 8258.0, 8259.0, 8260.0, 8261.0, 8262.0],       [8263.0, 8264.0, 8265.0, 8266.0, 8267.0, 8268.0],       [8269.0, 8270.0, 8271.0, 8272.0, 8273.0, 8274.0]],      [[8275.0, 8276.0, 8277.0, 8278.0, 8279.0, 8280.0],       [8281.0, 8282.0, 8283.0, 8284.0, 8285.0, 8286.0],       [8287.0, 8288.0, 8289.0, 8290.0, 8291.0, 8292.0],       [8293.0, 8294.0, 8295.0, 8296.0, 8297.0, 8298.0],       [8299.0, 8300.0, 8301.0, 8302.0, 8303.0, 8304.0],       [8305.0, 8306.0, 8307.0, 8308.0, 8309.0, 8310.0],       [8311.0, 8312.0, 8313.0, 8314.0, 8315.0, 8316.0]]],     [[[8317.0, 8318.0, 8319.0, 8320.0, 8321.0, 8322.0],       [8323.0, 8324.0, 8325.0, 8326.0, 8327.0, 8328.0],       [8329.0, 8330.0, 8331.0, 8332.0, 8333.0, 8334.0],       [8335.0, 8336.0, 8337.0, 8338.0, 8339.0, 8340.0],       [8341.0, 8342.0, 8343.0, 8344.0, 8345.0, 8346.0],       [8347.0, 8348.0, 8349.0, 8350.0, 8351.0, 8352.0],       [8353.0, 8354.0, 8355.0, 8356.0, 8357.0, 8358.0]],      [[8359.0, 8360.0, 8361.0, 8362.0, 8363.0, 8364.0],       [8365.0, 8366.0, 8367.0, 8368.0, 8369.0, 8370.0],       [8371.0, 8372.0, 8373.0, 8374.0, 8375.0, 8376.0],       [8377.0, 8378.0, 8379.0, 8380.0, 8381.0, 8382.0],       [8383.0, 8384.0, 8385.0, 8386.0, 8387.0, 8388.0],       [8389.0, 8390.0, 8391.0, 8392.0, 8393.0, 8394.0],       [8395.0, 8396.0, 8397.0, 8398.0, 8399.0, 8400.0]],      [[8401.0, 8402.0, 8403.0, 8404.0, 8405.0, 8406.0],       [8407.0, 8408.0, 8409.0, 8410.0, 8411.0, 8412.0],       [8413.0, 8414.0, 8415.0, 8416.0, 8417.0, 8418.0],       [8419.0, 8420.0, 8421.0, 8422.0, 8423.0, 8424.0],       [8425.0, 8426.0, 8427.0, 8428.0, 8429.0, 8430.0],       [8431.0, 8432.0, 8433.0, 8434.0, 8435.0, 8436.0],       [8437.0, 8438.0, 8439.0, 8440.0, 8441.0, 8442.0]],      [[8443.0, 8444.0, 8445.0, 8446.0, 8447.0, 8448.0],       [8449.0, 8450.0, 8451.0, 8452.0, 8453.0, 8454.0],       [8455.0, 8456.0, 8457.0, 8458.0, 8459.0, 8460.0],       [8461.0, 8462.0, 8463.0, 8464.0, 8465.0, 8466.0],       [8467.0, 8468.0, 8469.0, 8470.0, 8471.0, 8472.0],       [8473.0, 8474.0, 8475.0, 8476.0, 8477.0, 8478.0],       [8479.0, 8480.0, 8481.0, 8482.0, 8483.0, 8484.0]],      [[8485.0, 8486.0, 8487.0, 8488.0, 8489.0, 8490.0],       [8491.0, 8492.0, 8493.0, 8494.0, 8495.0, 8496.0],       [8497.0, 8498.0, 8499.0, 8500.0, 8501.0, 8502.0],       [8503.0, 8504.0, 8505.0, 8506.0, 8507.0, 8508.0],       [8509.0, 8510.0, 8511.0, 8512.0, 8513.0, 8514.0],       [8515.0, 8516.0, 8517.0, 8518.0, 8519.0, 8520.0],       [8521.0, 8522.0, 8523.0, 8524.0, 8525.0, 8526.0]],      [[8527.0, 8528.0, 8529.0, 8530.0, 8531.0, 8532.0],       [8533.0, 8534.0, 8535.0, 8536.0, 8537.0, 8538.0],       [8539.0, 8540.0, 8541.0, 8542.0, 8543.0, 8544.0],       [8545.0, 8546.0, 8547.0, 8548.0, 8549.0, 8550.0],       [8551.0, 8552.0, 8553.0, 8554.0, 8555.0, 8556.0],       [8557.0, 8558.0, 8559.0, 8560.0, 8561.0, 8562.0],       [8563.0, 8564.0, 8565.0, 8566.0, 8567.0, 8568.0]]],     [[[8569.0, 8570.0, 8571.0, 8572.0, 8573.0, 8574.0],       [8575.0, 8576.0, 8577.0, 8578.0, 8579.0, 8580.0],       [8581.0, 8582.0, 8583.0, 8584.0, 8585.0, 8586.0],       [8587.0, 8588.0, 8589.0, 8590.0, 8591.0, 8592.0],       [8593.0, 8594.0, 8595.0, 8596.0, 8597.0, 8598.0],       [8599.0, 8600.0, 8601.0, 8602.0, 8603.0, 8604.0],       [8605.0, 8606.0, 8607.0, 8608.0, 8609.0, 8610.0]],      [[8611.0, 8612.0, 8613.0, 8614.0, 8615.0, 8616.0],       [8617.0, 8618.0, 8619.0, 8620.0, 8621.0, 8622.0],       [8623.0, 8624.0, 8625.0, 8626.0, 8627.0, 8628.0],       [8629.0, 8630.0, 8631.0, 8632.0, 8633.0, 8634.0],       [8635.0, 8636.0, 8637.0, 8638.0, 8639.0, 8640.0],       [8641.0, 8642.0, 8643.0, 8644.0, 8645.0, 8646.0],       [8647.0, 8648.0, 8649.0, 8650.0, 8651.0, 8652.0]],      [[8653.0, 8654.0, 8655.0, 8656.0, 8657.0, 8658.0],       [8659.0, 8660.0, 8661.0, 8662.0, 8663.0, 8664.0],       [8665.0, 8666.0, 8667.0, 8668.0, 8669.0, 8670.0],       [8671.0, 8672.0, 8673.0, 8674.0, 8675.0, 8676.0],       [8677.0, 8678.0, 8679.0, 8680.0, 8681.0, 8682.0],       [8683.0, 8684.0, 8685.0, 8686.0, 8687.0, 8688.0],       [8689.0, 8690.0, 8691.0, 8692.0, 8693.0, 8694.0]],      [[8695.0, 8696.0, 8697.0, 8698.0, 8699.0, 8700.0],       [8701.0, 8702.0, 8703.0, 8704.0, 8705.0, 8706.0],       [8707.0, 8708.0, 8709.0, 8710.0, 8711.0, 8712.0],       [8713.0, 8714.0, 8715.0, 8716.0, 8717.0, 8718.0],       [8719.0, 8720.0, 8721.0, 8722.0, 8723.0, 8724.0],       [8725.0, 8726.0, 8727.0, 8728.0, 8729.0, 8730.0],       [8731.0, 8732.0, 8733.0, 8734.0, 8735.0, 8736.0]],      [[8737.0, 8738.0, 8739.0, 8740.0, 8741.0, 8742.0],       [8743.0, 8744.0, 8745.0, 8746.0, 8747.0, 8748.0],       [8749.0, 8750.0, 8751.0, 8752.0, 8753.0, 8754.0],       [8755.0, 8756.0, 8757.0, 8758.0, 8759.0, 8760.0],       [8761.0, 8762.0, 8763.0, 8764.0, 8765.0, 8766.0],       [8767.0, 8768.0, 8769.0, 8770.0, 8771.0, 8772.0],       [8773.0, 8774.0, 8775.0, 8776.0, 8777.0, 8778.0]],      [[8779.0, 8780.0, 8781.0, 8782.0, 8783.0, 8784.0],       [8785.0, 8786.0, 8787.0, 8788.0, 8789.0, 8790.0],       [8791.0, 8792.0, 8793.0, 8794.0, 8795.0, 8796.0],       [8797.0, 8798.0, 8799.0, 8800.0, 8801.0, 8802.0],       [8803.0, 8804.0, 8805.0, 8806.0, 8807.0, 8808.0],       [8809.0, 8810.0, 8811.0, 8812.0, 8813.0, 8814.0],       [8815.0, 8816.0, 8817.0, 8818.0, 8819.0, 8820.0]]],     [[[8821.0, 8822.0, 8823.0, 8824.0, 8825.0, 8826.0],       [8827.0, 8828.0, 8829.0, 8830.0, 8831.0, 8832.0],       [8833.0, 8834.0, 8835.0, 8836.0, 8837.0, 8838.0],       [8839.0, 8840.0, 8841.0, 8842.0, 8843.0, 8844.0],       [8845.0, 8846.0, 8847.0, 8848.0, 8849.0, 8850.0],       [8851.0, 8852.0, 8853.0, 8854.0, 8855.0, 8856.0],       [8857.0, 8858.0, 8859.0, 8860.0, 8861.0, 8862.0]],      [[8863.0, 8864.0, 8865.0, 8866.0, 8867.0, 8868.0],       [8869.0, 8870.0, 8871.0, 8872.0, 8873.0, 8874.0],       [8875.0, 8876.0, 8877.0, 8878.0, 8879.0, 8880.0],       [8881.0, 8882.0, 8883.0, 8884.0, 8885.0, 8886.0],       [8887.0, 8888.0, 8889.0, 8890.0, 8891.0, 8892.0],       [8893.0, 8894.0, 8895.0, 8896.0, 8897.0, 8898.0],       [8899.0, 8900.0, 8901.0, 8902.0, 8903.0, 8904.0]],      [[8905.0, 8906.0, 8907.0, 8908.0, 8909.0, 8910.0],       [8911.0, 8912.0, 8913.0, 8914.0, 8915.0, 8916.0],       [8917.0, 8918.0, 8919.0, 8920.0, 8921.0, 8922.0],       [8923.0, 8924.0, 8925.0, 8926.0, 8927.0, 8928.0],       [8929.0, 8930.0, 8931.0, 8932.0, 8933.0, 8934.0],       [8935.0, 8936.0, 8937.0, 8938.0, 8939.0, 8940.0],       [8941.0, 8942.0, 8943.0, 8944.0, 8945.0, 8946.0]],      [[8947.0, 8948.0, 8949.0, 8950.0, 8951.0, 8952.0],       [8953.0, 8954.0, 8955.0, 8956.0, 8957.0, 8958.0],       [8959.0, 8960.0, 8961.0, 8962.0, 8963.0, 8964.0],       [8965.0, 8966.0, 8967.0, 8968.0, 8969.0, 8970.0],       [8971.0, 8972.0, 8973.0, 8974.0, 8975.0, 8976.0],       [8977.0, 8978.0, 8979.0, 8980.0, 8981.0, 8982.0],       [8983.0, 8984.0, 8985.0, 8986.0, 8987.0, 8988.0]],      [[8989.0, 8990.0, 8991.0, 8992.0, 8993.0, 8994.0],       [8995.0, 8996.0, 8997.0, 8998.0, 8999.0, 9000.0],       [9001.0, 9002.0, 9003.0, 9004.0, 9005.0, 9006.0],       [9007.0, 9008.0, 9009.0, 9010.0, 9011.0, 9012.0],       [9013.0, 9014.0, 9015.0, 9016.0, 9017.0, 9018.0],       [9019.0, 9020.0, 9021.0, 9022.0, 9023.0, 9024.0],       [9025.0, 9026.0, 9027.0, 9028.0, 9029.0, 9030.0]],      [[9031.0, 9032.0, 9033.0, 9034.0, 9035.0, 9036.0],       [9037.0, 9038.0, 9039.0, 9040.0, 9041.0, 9042.0],       [9043.0, 9044.0, 9045.0, 9046.0, 9047.0, 9048.0],       [9049.0, 9050.0, 9051.0, 9052.0, 9053.0, 9054.0],       [9055.0, 9056.0, 9057.0, 9058.0, 9059.0, 9060.0],       [9061.0, 9062.0, 9063.0, 9064.0, 9065.0, 9066.0],       [9067.0, 9068.0, 9069.0, 9070.0, 9071.0, 9072.0]]]],    [[[[9073.0, 9074.0, 9075.0, 9076.0, 9077.0, 9078.0],       [9079.0, 9080.0, 9081.0, 9082.0, 9083.0, 9084.0],       [9085.0, 9086.0, 9087.0, 9088.0, 9089.0, 9090.0],       [9091.0, 9092.0, 9093.0, 9094.0, 9095.0, 9096.0],       [9097.0, 9098.0, 9099.0, 9100.0, 9101.0, 9102.0],       [9103.0, 9104.0, 9105.0, 9106.0, 9107.0, 9108.0],       [9109.0, 9110.0, 9111.0, 9112.0, 9113.0, 9114.0]],      [[9115.0, 9116.0, 9117.0, 9118.0, 9119.0, 9120.0],       [9121.0, 9122.0, 9123.0, 9124.0, 9125.0, 9126.0],       [9127.0, 9128.0, 9129.0, 9130.0, 9131.0, 9132.0],       [9133.0, 9134.0, 9135.0, 9136.0, 9137.0, 9138.0],       [9139.0, 9140.0, 9141.0, 9142.0, 9143.0, 9144.0],       [9145.0, 9146.0, 9147.0, 9148.0, 9149.0, 9150.0],       [9151.0, 9152.0, 9153.0, 9154.0, 9155.0, 9156.0]],      [[9157.0, 9158.0, 9159.0, 9160.0, 9161.0, 9162.0],       [9163.0, 9164.0, 9165.0, 9166.0, 9167.0, 9168.0],       [9169.0, 9170.0, 9171.0, 9172.0, 9173.0, 9174.0],       [9175.0, 9176.0, 9177.0, 9178.0, 9179.0, 9180.0],       [9181.0, 9182.0, 9183.0, 9184.0, 9185.0, 9186.0],       [9187.0, 9188.0, 9189.0, 9190.0, 9191.0, 9192.0],       [9193.0, 9194.0, 9195.0, 9196.0, 9197.0, 9198.0]],      [[9199.0, 9200.0, 9201.0, 9202.0, 9203.0, 9204.0],       [9205.0, 9206.0, 9207.0, 9208.0, 9209.0, 9210.0],       [9211.0, 9212.0, 9213.0, 9214.0, 9215.0, 9216.0],       [9217.0, 9218.0, 9219.0, 9220.0, 9221.0, 9222.0],       [9223.0, 9224.0, 9225.0, 9226.0, 9227.0, 9228.0],       [9229.0, 9230.0, 9231.0, 9232.0, 9233.0, 9234.0],       [9235.0, 9236.0, 9237.0, 9238.0, 9239.0, 9240.0]],      [[9241.0, 9242.0, 9243.0, 9244.0, 9245.0, 9246.0],       [9247.0, 9248.0, 9249.0, 9250.0, 9251.0, 9252.0],       [9253.0, 9254.0, 9255.0, 9256.0, 9257.0, 9258.0],       [9259.0, 9260.0, 9261.0, 9262.0, 9263.0, 9264.0],       [9265.0, 9266.0, 9267.0, 9268.0, 9269.0, 9270.0],       [9271.0, 9272.0, 9273.0, 9274.0, 9275.0, 9276.0],       [9277.0, 9278.0, 9279.0, 9280.0, 9281.0, 9282.0]],      [[9283.0, 9284.0, 9285.0, 9286.0, 9287.0, 9288.0],       [9289.0, 9290.0, 9291.0, 9292.0, 9293.0, 9294.0],       [9295.0, 9296.0, 9297.0, 9298.0, 9299.0, 9300.0],       [9301.0, 9302.0, 9303.0, 9304.0, 9305.0, 9306.0],       [9307.0, 9308.0, 9309.0, 9310.0, 9311.0, 9312.0],       [9313.0, 9314.0, 9315.0, 9316.0, 9317.0, 9318.0],       [9319.0, 9320.0, 9321.0, 9322.0, 9323.0, 9324.0]]],     [[[9325.0, 9326.0, 9327.0, 9328.0, 9329.0, 9330.0],       [9331.0, 9332.0, 9333.0, 9334.0, 9335.0, 9336.0],       [9337.0, 9338.0, 9339.0, 9340.0, 9341.0, 9342.0],       [9343.0, 9344.0, 9345.0, 9346.0, 9347.0, 9348.0],       [9349.0, 9350.0, 9351.0, 9352.0, 9353.0, 9354.0],       [9355.0, 9356.0, 9357.0, 9358.0, 9359.0, 9360.0],       [9361.0, 9362.0, 9363.0, 9364.0, 9365.0, 9366.0]],      [[9367.0, 9368.0, 9369.0, 9370.0, 9371.0, 9372.0],       [9373.0, 9374.0, 9375.0, 9376.0, 9377.0, 9378.0],       [9379.0, 9380.0, 9381.0, 9382.0, 9383.0, 9384.0],       [9385.0, 9386.0, 9387.0, 9388.0, 9389.0, 9390.0],       [9391.0, 9392.0, 9393.0, 9394.0, 9395.0, 9396.0],       [9397.0, 9398.0, 9399.0, 9400.0, 9401.0, 9402.0],       [9403.0, 9404.0, 9405.0, 9406.0, 9407.0, 9408.0]],      [[9409.0, 9410.0, 9411.0, 9412.0, 9413.0, 9414.0],       [9415.0, 9416.0, 9417.0, 9418.0, 9419.0, 9420.0],       [9421.0, 9422.0, 9423.0, 9424.0, 9425.0, 9426.0],       [9427.0, 9428.0, 9429.0, 9430.0, 9431.0, 9432.0],       [9433.0, 9434.0, 9435.0, 9436.0, 9437.0, 9438.0],       [9439.0, 9440.0, 9441.0, 9442.0, 9443.0, 9444.0],       [9445.0, 9446.0, 9447.0, 9448.0, 9449.0, 9450.0]],      [[9451.0, 9452.0, 9453.0, 9454.0, 9455.0, 9456.0],       [9457.0, 9458.0, 9459.0, 9460.0, 9461.0, 9462.0],       [9463.0, 9464.0, 9465.0, 9466.0, 9467.0, 9468.0],       [9469.0, 9470.0, 9471.0, 9472.0, 9473.0, 9474.0],       [9475.0, 9476.0, 9477.0, 9478.0, 9479.0, 9480.0],       [9481.0, 9482.0, 9483.0, 9484.0, 9485.0, 9486.0],       [9487.0, 9488.0, 9489.0, 9490.0, 9491.0, 9492.0]],      [[9493.0, 9494.0, 9495.0, 9496.0, 9497.0, 9498.0],       [9499.0, 9500.0, 9501.0, 9502.0, 9503.0, 9504.0],       [9505.0, 9506.0, 9507.0, 9508.0, 9509.0, 9510.0],       [9511.0, 9512.0, 9513.0, 9514.0, 9515.0, 9516.0],       [9517.0, 9518.0, 9519.0, 9520.0, 9521.0, 9522.0],       [9523.0, 9524.0, 9525.0, 9526.0, 9527.0, 9528.0],       [9529.0, 9530.0, 9531.0, 9532.0, 9533.0, 9534.0]],      [[9535.0, 9536.0, 9537.0, 9538.0, 9539.0, 9540.0],       [9541.0, 9542.0, 9543.0, 9544.0, 9545.0, 9546.0],       [9547.0, 9548.0, 9549.0, 9550.0, 9551.0, 9552.0],       [9553.0, 9554.0, 9555.0, 9556.0, 9557.0, 9558.0],       [9559.0, 9560.0, 9561.0, 9562.0, 9563.0, 9564.0],       [9565.0, 9566.0, 9567.0, 9568.0, 9569.0, 9570.0],       [9571.0, 9572.0, 9573.0, 9574.0, 9575.0, 9576.0]]],     [[[9577.0, 9578.0, 9579.0, 9580.0, 9581.0, 9582.0],       [9583.0, 9584.0, 9585.0, 9586.0, 9587.0, 9588.0],       [9589.0, 9590.0, 9591.0, 9592.0, 9593.0, 9594.0],       [9595.0, 9596.0, 9597.0, 9598.0, 9599.0, 9600.0],       [9601.0, 9602.0, 9603.0, 9604.0, 9605.0, 9606.0],       [9607.0, 9608.0, 9609.0, 9610.0, 9611.0, 9612.0],       [9613.0, 9614.0, 9615.0, 9616.0, 9617.0, 9618.0]],      [[9619.0, 9620.0, 9621.0, 9622.0, 9623.0, 9624.0],       [9625.0, 9626.0, 9627.0, 9628.0, 9629.0, 9630.0],       [9631.0, 9632.0, 9633.0, 9634.0, 9635.0, 9636.0],       [9637.0, 9638.0, 9639.0, 9640.0, 9641.0, 9642.0],       [9643.0, 9644.0, 9645.0, 9646.0, 9647.0, 9648.0],       [9649.0, 9650.0, 9651.0, 9652.0, 9653.0, 9654.0],       [9655.0, 9656.0, 9657.0, 9658.0, 9659.0, 9660.0]],      [[9661.0, 9662.0, 9663.0, 9664.0, 9665.0, 9666.0],       [9667.0, 9668.0, 9669.0, 9670.0, 9671.0, 9672.0],       [9673.0, 9674.0, 9675.0, 9676.0, 9677.0, 9678.0],       [9679.0, 9680.0, 9681.0, 9682.0, 9683.0, 9684.0],       [9685.0, 9686.0, 9687.0, 9688.0, 9689.0, 9690.0],       [9691.0, 9692.0, 9693.0, 9694.0, 9695.0, 9696.0],       [9697.0, 9698.0, 9699.0, 9700.0, 9701.0, 9702.0]],      [[9703.0, 9704.0, 9705.0, 9706.0, 9707.0, 9708.0],       [9709.0, 9710.0, 9711.0, 9712.0, 9713.0, 9714.0],       [9715.0, 9716.0, 9717.0, 9718.0, 9719.0, 9720.0],       [9721.0, 9722.0, 9723.0, 9724.0, 9725.0, 9726.0],       [9727.0, 9728.0, 9729.0, 9730.0, 9731.0, 9732.0],       [9733.0, 9734.0, 9735.0, 9736.0, 9737.0, 9738.0],       [9739.0, 9740.0, 9741.0, 9742.0, 9743.0, 9744.0]],      [[9745.0, 9746.0, 9747.0, 9748.0, 9749.0, 9750.0],       [9751.0, 9752.0, 9753.0, 9754.0, 9755.0, 9756.0],       [9757.0, 9758.0, 9759.0, 9760.0, 9761.0, 9762.0],       [9763.0, 9764.0, 9765.0, 9766.0, 9767.0, 9768.0],       [9769.0, 9770.0, 9771.0, 9772.0, 9773.0, 9774.0],       [9775.0, 9776.0, 9777.0, 9778.0, 9779.0, 9780.0],       [9781.0, 9782.0, 9783.0, 9784.0, 9785.0, 9786.0]],      [[9787.0, 9788.0, 9789.0, 9790.0, 9791.0, 9792.0],       [9793.0, 9794.0, 9795.0, 9796.0, 9797.0, 9798.0],       [9799.0, 9800.0, 9801.0, 9802.0, 9803.0, 9804.0],       [9805.0, 9806.0, 9807.0, 9808.0, 9809.0, 9810.0],       [9811.0, 9812.0, 9813.0, 9814.0, 9815.0, 9816.0],       [9817.0, 9818.0, 9819.0, 9820.0, 9821.0, 9822.0],       [9823.0, 9824.0, 9825.0, 9826.0, 9827.0, 9828.0]]],     [[[9829.0, 9830.0, 9831.0, 9832.0, 9833.0, 9834.0],       [9835.0, 9836.0, 9837.0, 9838.0, 9839.0, 9840.0],       [9841.0, 9842.0, 9843.0, 9844.0, 9845.0, 9846.0],       [9847.0, 9848.0, 9849.0, 9850.0, 9851.0, 9852.0],       [9853.0, 9854.0, 9855.0, 9856.0, 9857.0, 9858.0],       [9859.0, 9860.0, 9861.0, 9862.0, 9863.0, 9864.0],       [9865.0, 9866.0, 9867.0, 9868.0, 9869.0, 9870.0]],      [[9871.0, 9872.0, 9873.0, 9874.0, 9875.0, 9876.0],       [9877.0, 9878.0, 9879.0, 9880.0, 9881.0, 9882.0],       [9883.0, 9884.0, 9885.0, 9886.0, 9887.0, 9888.0],       [9889.0, 9890.0, 9891.0, 9892.0, 9893.0, 9894.0],       [9895.0, 9896.0, 9897.0, 9898.0, 9899.0, 9900.0],       [9901.0, 9902.0, 9903.0, 9904.0, 9905.0, 9906.0],       [9907.0, 9908.0, 9909.0, 9910.0, 9911.0, 9912.0]],      [[9913.0, 9914.0, 9915.0, 9916.0, 9917.0, 9918.0],       [9919.0, 9920.0, 9921.0, 9922.0, 9923.0, 9924.0],       [9925.0, 9926.0, 9927.0, 9928.0, 9929.0, 9930.0],       [9931.0, 9932.0, 9933.0, 9934.0, 9935.0, 9936.0],       [9937.0, 9938.0, 9939.0, 9940.0, 9941.0, 9942.0],       [9943.0, 9944.0, 9945.0, 9946.0, 9947.0, 9948.0],       [9949.0, 9950.0, 9951.0, 9952.0, 9953.0, 9954.0]],      [[9955.0, 9956.0, 9957.0, 9958.0, 9959.0, 9960.0],       [9961.0, 9962.0, 9963.0, 9964.0, 9965.0, 9966.0],       [9967.0, 9968.0, 9969.0, 9970.0, 9971.0, 9972.0],       [9973.0, 9974.0, 9975.0, 9976.0, 9977.0, 9978.0],       [9979.0, 9980.0, 9981.0, 9982.0, 9983.0, 9984.0],       [9985.0, 9986.0, 9987.0, 9988.0, 9989.0, 9990.0],       [9991.0, 9992.0, 9993.0, 9994.0, 9995.0, 9996.0]],      [[9997.0, 9998.0, 9999.0, 10000.0, 10001.0, 10002.0],       [10003.0, 10004.0, 10005.0, 10006.0, 10007.0, 10008.0],       [10009.0, 10010.0, 10011.0, 10012.0, 10013.0, 10014.0],       [10015.0, 10016.0, 10017.0, 10018.0, 10019.0, 10020.0],       [10021.0, 10022.0, 10023.0, 10024.0, 10025.0, 10026.0],       [10027.0, 10028.0, 10029.0, 10030.0, 10031.0, 10032.0],       [10033.0, 10034.0, 10035.0, 10036.0, 10037.0, 10038.0]],      [[10039.0, 10040.0, 10041.0, 10042.0, 10043.0, 10044.0],       [10045.0, 10046.0, 10047.0, 10048.0, 10049.0, 10050.0],       [10051.0, 10052.0, 10053.0, 10054.0, 10055.0, 10056.0],       [10057.0, 10058.0, 10059.0, 10060.0, 10061.0, 10062.0],       [10063.0, 10064.0, 10065.0, 10066.0, 10067.0, 10068.0],       [10069.0, 10070.0, 10071.0, 10072.0, 10073.0, 10074.0],       [10075.0, 10076.0, 10077.0, 10078.0, 10079.0, 10080.0]]]],    [[[[10081.0, 10082.0, 10083.0, 10084.0, 10085.0, 10086.0],       [10087.0, 10088.0, 10089.0, 10090.0, 10091.0, 10092.0],       [10093.0, 10094.0, 10095.0, 10096.0, 10097.0, 10098.0],       [10099.0, 10100.0, 10101.0, 10102.0, 10103.0, 10104.0],       [10105.0, 10106.0, 10107.0, 10108.0, 10109.0, 10110.0],       [10111.0, 10112.0, 10113.0, 10114.0, 10115.0, 10116.0],       [10117.0, 10118.0, 10119.0, 10120.0, 10121.0, 10122.0]],      [[10123.0, 10124.0, 10125.0, 10126.0, 10127.0, 10128.0],       [10129.0, 10130.0, 10131.0, 10132.0, 10133.0, 10134.0],       [10135.0, 10136.0, 10137.0, 10138.0, 10139.0, 10140.0],       [10141.0, 10142.0, 10143.0, 10144.0, 10145.0, 10146.0],       [10147.0, 10148.0, 10149.0, 10150.0, 10151.0, 10152.0],       [10153.0, 10154.0, 10155.0, 10156.0, 10157.0, 10158.0],       [10159.0, 10160.0, 10161.0, 10162.0, 10163.0, 10164.0]],      [[10165.0, 10166.0, 10167.0, 10168.0, 10169.0, 10170.0],       [10171.0, 10172.0, 10173.0, 10174.0, 10175.0, 10176.0],       [10177.0, 10178.0, 10179.0, 10180.0, 10181.0, 10182.0],       [10183.0, 10184.0, 10185.0, 10186.0, 10187.0, 10188.0],       [10189.0, 10190.0, 10191.0, 10192.0, 10193.0, 10194.0],       [10195.0, 10196.0, 10197.0, 10198.0, 10199.0, 10200.0],       [10201.0, 10202.0, 10203.0, 10204.0, 10205.0, 10206.0]],      [[10207.0, 10208.0, 10209.0, 10210.0, 10211.0, 10212.0],       [10213.0, 10214.0, 10215.0, 10216.0, 10217.0, 10218.0],       [10219.0, 10220.0, 10221.0, 10222.0, 10223.0, 10224.0],       [10225.0, 10226.0, 10227.0, 10228.0, 10229.0, 10230.0],       [10231.0, 10232.0, 10233.0, 10234.0, 10235.0, 10236.0],       [10237.0, 10238.0, 10239.0, 10240.0, 10241.0, 10242.0],       [10243.0, 10244.0, 10245.0, 10246.0, 10247.0, 10248.0]],      [[10249.0, 10250.0, 10251.0, 10252.0, 10253.0, 10254.0],       [10255.0, 10256.0, 10257.0, 10258.0, 10259.0, 10260.0],       [10261.0, 10262.0, 10263.0, 10264.0, 10265.0, 10266.0],       [10267.0, 10268.0, 10269.0, 10270.0, 10271.0, 10272.0],       [10273.0, 10274.0, 10275.0, 10276.0, 10277.0, 10278.0],       [10279.0, 10280.0, 10281.0, 10282.0, 10283.0, 10284.0],       [10285.0, 10286.0, 10287.0, 10288.0, 10289.0, 10290.0]],      [[10291.0, 10292.0, 10293.0, 10294.0, 10295.0, 10296.0],       [10297.0, 10298.0, 10299.0, 10300.0, 10301.0, 10302.0],       [10303.0, 10304.0, 10305.0, 10306.0, 10307.0, 10308.0],       [10309.0, 10310.0, 10311.0, 10312.0, 10313.0, 10314.0],       [10315.0, 10316.0, 10317.0, 10318.0, 10319.0, 10320.0],       [10321.0, 10322.0, 10323.0, 10324.0, 10325.0, 10326.0],       [10327.0, 10328.0, 10329.0, 10330.0, 10331.0, 10332.0]]],     [[[10333.0, 10334.0, 10335.0, 10336.0, 10337.0, 10338.0],       [10339.0, 10340.0, 10341.0, 10342.0, 10343.0, 10344.0],       [10345.0, 10346.0, 10347.0, 10348.0, 10349.0, 10350.0],       [10351.0, 10352.0, 10353.0, 10354.0, 10355.0, 10356.0],       [10357.0, 10358.0, 10359.0, 10360.0, 10361.0, 10362.0],       [10363.0, 10364.0, 10365.0, 10366.0, 10367.0, 10368.0],       [10369.0, 10370.0, 10371.0, 10372.0, 10373.0, 10374.0]],      [[10375.0, 10376.0, 10377.0, 10378.0, 10379.0, 10380.0],       [10381.0, 10382.0, 10383.0, 10384.0, 10385.0, 10386.0],       [10387.0, 10388.0, 10389.0, 10390.0, 10391.0, 10392.0],       [10393.0, 10394.0, 10395.0, 10396.0, 10397.0, 10398.0],       [10399.0, 10400.0, 10401.0, 10402.0, 10403.0, 10404.0],       [10405.0, 10406.0, 10407.0, 10408.0, 10409.0, 10410.0],       [10411.0, 10412.0, 10413.0, 10414.0, 10415.0, 10416.0]],      [[10417.0, 10418.0, 10419.0, 10420.0, 10421.0, 10422.0],       [10423.0, 10424.0, 10425.0, 10426.0, 10427.0, 10428.0],       [10429.0, 10430.0, 10431.0, 10432.0, 10433.0, 10434.0],       [10435.0, 10436.0, 10437.0, 10438.0, 10439.0, 10440.0],       [10441.0, 10442.0, 10443.0, 10444.0, 10445.0, 10446.0],       [10447.0, 10448.0, 10449.0, 10450.0, 10451.0, 10452.0],       [10453.0, 10454.0, 10455.0, 10456.0, 10457.0, 10458.0]],      [[10459.0, 10460.0, 10461.0, 10462.0, 10463.0, 10464.0],       [10465.0, 10466.0, 10467.0, 10468.0, 10469.0, 10470.0],       [10471.0, 10472.0, 10473.0, 10474.0, 10475.0, 10476.0],       [10477.0, 10478.0, 10479.0, 10480.0, 10481.0, 10482.0],       [10483.0, 10484.0, 10485.0, 10486.0, 10487.0, 10488.0],       [10489.0, 10490.0, 10491.0, 10492.0, 10493.0, 10494.0],       [10495.0, 10496.0, 10497.0, 10498.0, 10499.0, 10500.0]],      [[10501.0, 10502.0, 10503.0, 10504.0, 10505.0, 10506.0],       [10507.0, 10508.0, 10509.0, 10510.0, 10511.0, 10512.0],       [10513.0, 10514.0, 10515.0, 10516.0, 10517.0, 10518.0],       [10519.0, 10520.0, 10521.0, 10522.0, 10523.0, 10524.0],       [10525.0, 10526.0, 10527.0, 10528.0, 10529.0, 10530.0],       [10531.0, 10532.0, 10533.0, 10534.0, 10535.0, 10536.0],       [10537.0, 10538.0, 10539.0, 10540.0, 10541.0, 10542.0]],      [[10543.0, 10544.0, 10545.0, 10546.0, 10547.0, 10548.0],       [10549.0, 10550.0, 10551.0, 10552.0, 10553.0, 10554.0],       [10555.0, 10556.0, 10557.0, 10558.0, 10559.0, 10560.0],       [10561.0, 10562.0, 10563.0, 10564.0, 10565.0, 10566.0],       [10567.0, 10568.0, 10569.0, 10570.0, 10571.0, 10572.0],       [10573.0, 10574.0, 10575.0, 10576.0, 10577.0, 10578.0],       [10579.0, 10580.0, 10581.0, 10582.0, 10583.0, 10584.0]]],     [[[10585.0, 10586.0, 10587.0, 10588.0, 10589.0, 10590.0],       [10591.0, 10592.0, 10593.0, 10594.0, 10595.0, 10596.0],       [10597.0, 10598.0, 10599.0, 10600.0, 10601.0, 10602.0],       [10603.0, 10604.0, 10605.0, 10606.0, 10607.0, 10608.0],       [10609.0, 10610.0, 10611.0, 10612.0, 10613.0, 10614.0],       [10615.0, 10616.0, 10617.0, 10618.0, 10619.0, 10620.0],       [10621.0, 10622.0, 10623.0, 10624.0, 10625.0, 10626.0]],      [[10627.0, 10628.0, 10629.0, 10630.0, 10631.0, 10632.0],       [10633.0, 10634.0, 10635.0, 10636.0, 10637.0, 10638.0],       [10639.0, 10640.0, 10641.0, 10642.0, 10643.0, 10644.0],       [10645.0, 10646.0, 10647.0, 10648.0, 10649.0, 10650.0],       [10651.0, 10652.0, 10653.0, 10654.0, 10655.0, 10656.0],       [10657.0, 10658.0, 10659.0, 10660.0, 10661.0, 10662.0],       [10663.0, 10664.0, 10665.0, 10666.0, 10667.0, 10668.0]],      [[10669.0, 10670.0, 10671.0, 10672.0, 10673.0, 10674.0],       [10675.0, 10676.0, 10677.0, 10678.0, 10679.0, 10680.0],       [10681.0, 10682.0, 10683.0, 10684.0, 10685.0, 10686.0],       [10687.0, 10688.0, 10689.0, 10690.0, 10691.0, 10692.0],       [10693.0, 10694.0, 10695.0, 10696.0, 10697.0, 10698.0],       [10699.0, 10700.0, 10701.0, 10702.0, 10703.0, 10704.0],       [10705.0, 10706.0, 10707.0, 10708.0, 10709.0, 10710.0]],      [[10711.0, 10712.0, 10713.0, 10714.0, 10715.0, 10716.0],       [10717.0, 10718.0, 10719.0, 10720.0, 10721.0, 10722.0],       [10723.0, 10724.0, 10725.0, 10726.0, 10727.0, 10728.0],       [10729.0, 10730.0, 10731.0, 10732.0, 10733.0, 10734.0],       [10735.0, 10736.0, 10737.0, 10738.0, 10739.0, 10740.0],       [10741.0, 10742.0, 10743.0, 10744.0, 10745.0, 10746.0],       [10747.0, 10748.0, 10749.0, 10750.0, 10751.0, 10752.0]],      [[10753.0, 10754.0, 10755.0, 10756.0, 10757.0, 10758.0],       [10759.0, 10760.0, 10761.0, 10762.0, 10763.0, 10764.0],       [10765.0, 10766.0, 10767.0, 10768.0, 10769.0, 10770.0],       [10771.0, 10772.0, 10773.0, 10774.0, 10775.0, 10776.0],       [10777.0, 10778.0, 10779.0, 10780.0, 10781.0, 10782.0],       [10783.0, 10784.0, 10785.0, 10786.0, 10787.0, 10788.0],       [10789.0, 10790.0, 10791.0, 10792.0, 10793.0, 10794.0]],      [[10795.0, 10796.0, 10797.0, 10798.0, 10799.0, 10800.0],       [10801.0, 10802.0, 10803.0, 10804.0, 10805.0, 10806.0],       [10807.0, 10808.0, 10809.0, 10810.0, 10811.0, 10812.0],       [10813.0, 10814.0, 10815.0, 10816.0, 10817.0, 10818.0],       [10819.0, 10820.0, 10821.0, 10822.0, 10823.0, 10824.0],       [10825.0, 10826.0, 10827.0, 10828.0, 10829.0, 10830.0],       [10831.0, 10832.0, 10833.0, 10834.0, 10835.0, 10836.0]]],     [[[10837.0, 10838.0, 10839.0, 10840.0, 10841.0, 10842.0],       [10843.0, 10844.0, 10845.0, 10846.0, 10847.0, 10848.0],       [10849.0, 10850.0, 10851.0, 10852.0, 10853.0, 10854.0],       [10855.0, 10856.0, 10857.0, 10858.0, 10859.0, 10860.0],       [10861.0, 10862.0, 10863.0, 10864.0, 10865.0, 10866.0],       [10867.0, 10868.0, 10869.0, 10870.0, 10871.0, 10872.0],       [10873.0, 10874.0, 10875.0, 10876.0, 10877.0, 10878.0]],      [[10879.0, 10880.0, 10881.0, 10882.0, 10883.0, 10884.0],       [10885.0, 10886.0, 10887.0, 10888.0, 10889.0, 10890.0],       [10891.0, 10892.0, 10893.0, 10894.0, 10895.0, 10896.0],       [10897.0, 10898.0, 10899.0, 10900.0, 10901.0, 10902.0],       [10903.0, 10904.0, 10905.0, 10906.0, 10907.0, 10908.0],       [10909.0, 10910.0, 10911.0, 10912.0, 10913.0, 10914.0],       [10915.0, 10916.0, 10917.0, 10918.0, 10919.0, 10920.0]],      [[10921.0, 10922.0, 10923.0, 10924.0, 10925.0, 10926.0],       [10927.0, 10928.0, 10929.0, 10930.0, 10931.0, 10932.0],       [10933.0, 10934.0, 10935.0, 10936.0, 10937.0, 10938.0],       [10939.0, 10940.0, 10941.0, 10942.0, 10943.0, 10944.0],       [10945.0, 10946.0, 10947.0, 10948.0, 10949.0, 10950.0],       [10951.0, 10952.0, 10953.0, 10954.0, 10955.0, 10956.0],       [10957.0, 10958.0, 10959.0, 10960.0, 10961.0, 10962.0]],      [[10963.0, 10964.0, 10965.0, 10966.0, 10967.0, 10968.0],       [10969.0, 10970.0, 10971.0, 10972.0, 10973.0, 10974.0],       [10975.0, 10976.0, 10977.0, 10978.0, 10979.0, 10980.0],       [10981.0, 10982.0, 10983.0, 10984.0, 10985.0, 10986.0],       [10987.0, 10988.0, 10989.0, 10990.0, 10991.0, 10992.0],       [10993.0, 10994.0, 10995.0, 10996.0, 10997.0, 10998.0],       [10999.0, 11000.0, 11001.0, 11002.0, 11003.0, 11004.0]],      [[11005.0, 11006.0, 11007.0, 11008.0, 11009.0, 11010.0],       [11011.0, 11012.0, 11013.0, 11014.0, 11015.0, 11016.0],       [11017.0, 11018.0, 11019.0, 11020.0, 11021.0, 11022.0],       [11023.0, 11024.0, 11025.0, 11026.0, 11027.0, 11028.0],       [11029.0, 11030.0, 11031.0, 11032.0, 11033.0, 11034.0],       [11035.0, 11036.0, 11037.0, 11038.0, 11039.0, 11040.0],       [11041.0, 11042.0, 11043.0, 11044.0, 11045.0, 11046.0]],      [[11047.0, 11048.0, 11049.0, 11050.0, 11051.0, 11052.0],       [11053.0, 11054.0, 11055.0, 11056.0, 11057.0, 11058.0],       [11059.0, 11060.0, 11061.0, 11062.0, 11063.0, 11064.0],       [11065.0, 11066.0, 11067.0, 11068.0, 11069.0, 11070.0],       [11071.0, 11072.0, 11073.0, 11074.0, 11075.0, 11076.0],       [11077.0, 11078.0, 11079.0, 11080.0, 11081.0, 11082.0],       [11083.0, 11084.0, 11085.0, 11086.0, 11087.0, 11088.0]]]],    [[[[11089.0, 11090.0, 11091.0, 11092.0, 11093.0, 11094.0],       [11095.0, 11096.0, 11097.0, 11098.0, 11099.0, 11100.0],       [11101.0, 11102.0, 11103.0, 11104.0, 11105.0, 11106.0],       [11107.0, 11108.0, 11109.0, 11110.0, 11111.0, 11112.0],       [11113.0, 11114.0, 11115.0, 11116.0, 11117.0, 11118.0],       [11119.0, 11120.0, 11121.0, 11122.0, 11123.0, 11124.0],       [11125.0, 11126.0, 11127.0, 11128.0, 11129.0, 11130.0]],      [[11131.0, 11132.0, 11133.0, 11134.0, 11135.0, 11136.0],       [11137.0, 11138.0, 11139.0, 11140.0, 11141.0, 11142.0],       [11143.0, 11144.0, 11145.0, 11146.0, 11147.0, 11148.0],       [11149.0, 11150.0, 11151.0, 11152.0, 11153.0, 11154.0],       [11155.0, 11156.0, 11157.0, 11158.0, 11159.0, 11160.0],       [11161.0, 11162.0, 11163.0, 11164.0, 11165.0, 11166.0],       [11167.0, 11168.0, 11169.0, 11170.0, 11171.0, 11172.0]],      [[11173.0, 11174.0, 11175.0, 11176.0, 11177.0, 11178.0],       [11179.0, 11180.0, 11181.0, 11182.0, 11183.0, 11184.0],       [11185.0, 11186.0, 11187.0, 11188.0, 11189.0, 11190.0],       [11191.0, 11192.0, 11193.0, 11194.0, 11195.0, 11196.0],       [11197.0, 11198.0, 11199.0, 11200.0, 11201.0, 11202.0],       [11203.0, 11204.0, 11205.0, 11206.0, 11207.0, 11208.0],       [11209.0, 11210.0, 11211.0, 11212.0, 11213.0, 11214.0]],      [[11215.0, 11216.0, 11217.0, 11218.0, 11219.0, 11220.0],       [11221.0, 11222.0, 11223.0, 11224.0, 11225.0, 11226.0],       [11227.0, 11228.0, 11229.0, 11230.0, 11231.0, 11232.0],       [11233.0, 11234.0, 11235.0, 11236.0, 11237.0, 11238.0],       [11239.0, 11240.0, 11241.0, 11242.0, 11243.0, 11244.0],       [11245.0, 11246.0, 11247.0, 11248.0, 11249.0, 11250.0],       [11251.0, 11252.0, 11253.0, 11254.0, 11255.0, 11256.0]],      [[11257.0, 11258.0, 11259.0, 11260.0, 11261.0, 11262.0],       [11263.0, 11264.0, 11265.0, 11266.0, 11267.0, 11268.0],       [11269.0, 11270.0, 11271.0, 11272.0, 11273.0, 11274.0],       [11275.0, 11276.0, 11277.0, 11278.0, 11279.0, 11280.0],       [11281.0, 11282.0, 11283.0, 11284.0, 11285.0, 11286.0],       [11287.0, 11288.0, 11289.0, 11290.0, 11291.0, 11292.0],       [11293.0, 11294.0, 11295.0, 11296.0, 11297.0, 11298.0]],      [[11299.0, 11300.0, 11301.0, 11302.0, 11303.0, 11304.0],       [11305.0, 11306.0, 11307.0, 11308.0, 11309.0, 11310.0],       [11311.0, 11312.0, 11313.0, 11314.0, 11315.0, 11316.0],       [11317.0, 11318.0, 11319.0, 11320.0, 11321.0, 11322.0],       [11323.0, 11324.0, 11325.0, 11326.0, 11327.0, 11328.0],       [11329.0, 11330.0, 11331.0, 11332.0, 11333.0, 11334.0],       [11335.0, 11336.0, 11337.0, 11338.0, 11339.0, 11340.0]]],     [[[11341.0, 11342.0, 11343.0, 11344.0, 11345.0, 11346.0],       [11347.0, 11348.0, 11349.0, 11350.0, 11351.0, 11352.0],       [11353.0, 11354.0, 11355.0, 11356.0, 11357.0, 11358.0],       [11359.0, 11360.0, 11361.0, 11362.0, 11363.0, 11364.0],       [11365.0, 11366.0, 11367.0, 11368.0, 11369.0, 11370.0],       [11371.0, 11372.0, 11373.0, 11374.0, 11375.0, 11376.0],       [11377.0, 11378.0, 11379.0, 11380.0, 11381.0, 11382.0]],      [[11383.0, 11384.0, 11385.0, 11386.0, 11387.0, 11388.0],       [11389.0, 11390.0, 11391.0, 11392.0, 11393.0, 11394.0],       [11395.0, 11396.0, 11397.0, 11398.0, 11399.0, 11400.0],       [11401.0, 11402.0, 11403.0, 11404.0, 11405.0, 11406.0],       [11407.0, 11408.0, 11409.0, 11410.0, 11411.0, 11412.0],       [11413.0, 11414.0, 11415.0, 11416.0, 11417.0, 11418.0],       [11419.0, 11420.0, 11421.0, 11422.0, 11423.0, 11424.0]],      [[11425.0, 11426.0, 11427.0, 11428.0, 11429.0, 11430.0],       [11431.0, 11432.0, 11433.0, 11434.0, 11435.0, 11436.0],       [11437.0, 11438.0, 11439.0, 11440.0, 11441.0, 11442.0],       [11443.0, 11444.0, 11445.0, 11446.0, 11447.0, 11448.0],       [11449.0, 11450.0, 11451.0, 11452.0, 11453.0, 11454.0],       [11455.0, 11456.0, 11457.0, 11458.0, 11459.0, 11460.0],       [11461.0, 11462.0, 11463.0, 11464.0, 11465.0, 11466.0]],      [[11467.0, 11468.0, 11469.0, 11470.0, 11471.0, 11472.0],       [11473.0, 11474.0, 11475.0, 11476.0, 11477.0, 11478.0],       [11479.0, 11480.0, 11481.0, 11482.0, 11483.0, 11484.0],       [11485.0, 11486.0, 11487.0, 11488.0, 11489.0, 11490.0],       [11491.0, 11492.0, 11493.0, 11494.0, 11495.0, 11496.0],       [11497.0, 11498.0, 11499.0, 11500.0, 11501.0, 11502.0],       [11503.0, 11504.0, 11505.0, 11506.0, 11507.0, 11508.0]],      [[11509.0, 11510.0, 11511.0, 11512.0, 11513.0, 11514.0],       [11515.0, 11516.0, 11517.0, 11518.0, 11519.0, 11520.0],       [11521.0, 11522.0, 11523.0, 11524.0, 11525.0, 11526.0],       [11527.0, 11528.0, 11529.0, 11530.0, 11531.0, 11532.0],       [11533.0, 11534.0, 11535.0, 11536.0, 11537.0, 11538.0],       [11539.0, 11540.0, 11541.0, 11542.0, 11543.0, 11544.0],       [11545.0, 11546.0, 11547.0, 11548.0, 11549.0, 11550.0]],      [[11551.0, 11552.0, 11553.0, 11554.0, 11555.0, 11556.0],       [11557.0, 11558.0, 11559.0, 11560.0, 11561.0, 11562.0],       [11563.0, 11564.0, 11565.0, 11566.0, 11567.0, 11568.0],       [11569.0, 11570.0, 11571.0, 11572.0, 11573.0, 11574.0],       [11575.0, 11576.0, 11577.0, 11578.0, 11579.0, 11580.0],       [11581.0, 11582.0, 11583.0, 11584.0, 11585.0, 11586.0],       [11587.0, 11588.0, 11589.0, 11590.0, 11591.0, 11592.0]]],     [[[11593.0, 11594.0, 11595.0, 11596.0, 11597.0, 11598.0],       [11599.0, 11600.0, 11601.0, 11602.0, 11603.0, 11604.0],       [11605.0, 11606.0, 11607.0, 11608.0, 11609.0, 11610.0],       [11611.0, 11612.0, 11613.0, 11614.0, 11615.0, 11616.0],       [11617.0, 11618.0, 11619.0, 11620.0, 11621.0, 11622.0],       [11623.0, 11624.0, 11625.0, 11626.0, 11627.0, 11628.0],       [11629.0, 11630.0, 11631.0, 11632.0, 11633.0, 11634.0]],      [[11635.0, 11636.0, 11637.0, 11638.0, 11639.0, 11640.0],       [11641.0, 11642.0, 11643.0, 11644.0, 11645.0, 11646.0],       [11647.0, 11648.0, 11649.0, 11650.0, 11651.0, 11652.0],       [11653.0, 11654.0, 11655.0, 11656.0, 11657.0, 11658.0],       [11659.0, 11660.0, 11661.0, 11662.0, 11663.0, 11664.0],       [11665.0, 11666.0, 11667.0, 11668.0, 11669.0, 11670.0],       [11671.0, 11672.0, 11673.0, 11674.0, 11675.0, 11676.0]],      [[11677.0, 11678.0, 11679.0, 11680.0, 11681.0, 11682.0],       [11683.0, 11684.0, 11685.0, 11686.0, 11687.0, 11688.0],       [11689.0, 11690.0, 11691.0, 11692.0, 11693.0, 11694.0],       [11695.0, 11696.0, 11697.0, 11698.0, 11699.0, 11700.0],       [11701.0, 11702.0, 11703.0, 11704.0, 11705.0, 11706.0],       [11707.0, 11708.0, 11709.0, 11710.0, 11711.0, 11712.0],       [11713.0, 11714.0, 11715.0, 11716.0, 11717.0, 11718.0]],      [[11719.0, 11720.0, 11721.0, 11722.0, 11723.0, 11724.0],       [11725.0, 11726.0, 11727.0, 11728.0, 11729.0, 11730.0],       [11731.0, 11732.0, 11733.0, 11734.0, 11735.0, 11736.0],       [11737.0, 11738.0, 11739.0, 11740.0, 11741.0, 11742.0],       [11743.0, 11744.0, 11745.0, 11746.0, 11747.0, 11748.0],       [11749.0, 11750.0, 11751.0, 11752.0, 11753.0, 11754.0],       [11755.0, 11756.0, 11757.0, 11758.0, 11759.0, 11760.0]],      [[11761.0, 11762.0, 11763.0, 11764.0, 11765.0, 11766.0],       [11767.0, 11768.0, 11769.0, 11770.0, 11771.0, 11772.0],       [11773.0, 11774.0, 11775.0, 11776.0, 11777.0, 11778.0],       [11779.0, 11780.0, 11781.0, 11782.0, 11783.0, 11784.0],       [11785.0, 11786.0, 11787.0, 11788.0, 11789.0, 11790.0],       [11791.0, 11792.0, 11793.0, 11794.0, 11795.0, 11796.0],       [11797.0, 11798.0, 11799.0, 11800.0, 11801.0, 11802.0]],      [[11803.0, 11804.0, 11805.0, 11806.0, 11807.0, 11808.0],       [11809.0, 11810.0, 11811.0, 11812.0, 11813.0, 11814.0],       [11815.0, 11816.0, 11817.0, 11818.0, 11819.0, 11820.0],       [11821.0, 11822.0, 11823.0, 11824.0, 11825.0, 11826.0],       [11827.0, 11828.0, 11829.0, 11830.0, 11831.0, 11832.0],       [11833.0, 11834.0, 11835.0, 11836.0, 11837.0, 11838.0],       [11839.0, 11840.0, 11841.0, 11842.0, 11843.0, 11844.0]]],     [[[11845.0, 11846.0, 11847.0, 11848.0, 11849.0, 11850.0],       [11851.0, 11852.0, 11853.0, 11854.0, 11855.0, 11856.0],       [11857.0, 11858.0, 11859.0, 11860.0, 11861.0, 11862.0],       [11863.0, 11864.0, 11865.0, 11866.0, 11867.0, 11868.0],       [11869.0, 11870.0, 11871.0, 11872.0, 11873.0, 11874.0],       [11875.0, 11876.0, 11877.0, 11878.0, 11879.0, 11880.0],       [11881.0, 11882.0, 11883.0, 11884.0, 11885.0, 11886.0]],      [[11887.0, 11888.0, 11889.0, 11890.0, 11891.0, 11892.0],       [11893.0, 11894.0, 11895.0, 11896.0, 11897.0, 11898.0],       [11899.0, 11900.0, 11901.0, 11902.0, 11903.0, 11904.0],       [11905.0, 11906.0, 11907.0, 11908.0, 11909.0, 11910.0],       [11911.0, 11912.0, 11913.0, 11914.0, 11915.0, 11916.0],       [11917.0, 11918.0, 11919.0, 11920.0, 11921.0, 11922.0],       [11923.0, 11924.0, 11925.0, 11926.0, 11927.0, 11928.0]],      [[11929.0, 11930.0, 11931.0, 11932.0, 11933.0, 11934.0],       [11935.0, 11936.0, 11937.0, 11938.0, 11939.0, 11940.0],       [11941.0, 11942.0, 11943.0, 11944.0, 11945.0, 11946.0],       [11947.0, 11948.0, 11949.0, 11950.0, 11951.0, 11952.0],       [11953.0, 11954.0, 11955.0, 11956.0, 11957.0, 11958.0],       [11959.0, 11960.0, 11961.0, 11962.0, 11963.0, 11964.0],       [11965.0, 11966.0, 11967.0, 11968.0, 11969.0, 11970.0]],      [[11971.0, 11972.0, 11973.0, 11974.0, 11975.0, 11976.0],       [11977.0, 11978.0, 11979.0, 11980.0, 11981.0, 11982.0],       [11983.0, 11984.0, 11985.0, 11986.0, 11987.0, 11988.0],       [11989.0, 11990.0, 11991.0, 11992.0, 11993.0, 11994.0],       [11995.0, 11996.0, 11997.0, 11998.0, 11999.0, 12000.0],       [12001.0, 12002.0, 12003.0, 12004.0, 12005.0, 12006.0],       [12007.0, 12008.0, 12009.0, 12010.0, 12011.0, 12012.0]],      [[12013.0, 12014.0, 12015.0, 12016.0, 12017.0, 12018.0],       [12019.0, 12020.0, 12021.0, 12022.0, 12023.0, 12024.0],       [12025.0, 12026.0, 12027.0, 12028.0, 12029.0, 12030.0],       [12031.0, 12032.0, 12033.0, 12034.0, 12035.0, 12036.0],       [12037.0, 12038.0, 12039.0, 12040.0, 12041.0, 12042.0],       [12043.0, 12044.0, 12045.0, 12046.0, 12047.0, 12048.0],       [12049.0, 12050.0, 12051.0, 12052.0, 12053.0, 12054.0]],      [[12055.0, 12056.0, 12057.0, 12058.0, 12059.0, 12060.0],       [12061.0, 12062.0, 12063.0, 12064.0, 12065.0, 12066.0],       [12067.0, 12068.0, 12069.0, 12070.0, 12071.0, 12072.0],       [12073.0, 12074.0, 12075.0, 12076.0, 12077.0, 12078.0],       [12079.0, 12080.0, 12081.0, 12082.0, 12083.0, 12084.0],       [12085.0, 12086.0, 12087.0, 12088.0, 12089.0, 12090.0],       [12091.0, 12092.0, 12093.0, 12094.0, 12095.0, 12096.0]]]]],   [[[[[12097.0, 12098.0, 12099.0, 12100.0, 12101.0, 12102.0],       [12103.0, 12104.0, 12105.0, 12106.0, 12107.0, 12108.0],       [12109.0, 12110.0, 12111.0, 12112.0, 12113.0, 12114.0],       [12115.0, 12116.0, 12117.0, 12118.0, 12119.0, 12120.0],       [12121.0, 12122.0, 12123.0, 12124.0, 12125.0, 12126.0],       [12127.0, 12128.0, 12129.0, 12130.0, 12131.0, 12132.0],       [12133.0, 12134.0, 12135.0, 12136.0, 12137.0, 12138.0]],      [[12139.0, 12140.0, 12141.0, 12142.0, 12143.0, 12144.0],       [12145.0, 12146.0, 12147.0, 12148.0, 12149.0, 12150.0],       [12151.0, 12152.0, 12153.0, 12154.0, 12155.0, 12156.0],       [12157.0, 12158.0, 12159.0, 12160.0, 12161.0, 12162.0],       [12163.0, 12164.0, 12165.0, 12166.0, 12167.0, 12168.0],       [12169.0, 12170.0, 12171.0, 12172.0, 12173.0, 12174.0],       [12175.0, 12176.0, 12177.0, 12178.0, 12179.0, 12180.0]],      [[12181.0, 12182.0, 12183.0, 12184.0, 12185.0, 12186.0],       [12187.0, 12188.0, 12189.0, 12190.0, 12191.0, 12192.0],       [12193.0, 12194.0, 12195.0, 12196.0, 12197.0, 12198.0],       [12199.0, 12200.0, 12201.0, 12202.0, 12203.0, 12204.0],       [12205.0, 12206.0, 12207.0, 12208.0, 12209.0, 12210.0],       [12211.0, 12212.0, 12213.0, 12214.0, 12215.0, 12216.0],       [12217.0, 12218.0, 12219.0, 12220.0, 12221.0, 12222.0]],      [[12223.0, 12224.0, 12225.0, 12226.0, 12227.0, 12228.0],       [12229.0, 12230.0, 12231.0, 12232.0, 12233.0, 12234.0],       [12235.0, 12236.0, 12237.0, 12238.0, 12239.0, 12240.0],       [12241.0, 12242.0, 12243.0, 12244.0, 12245.0, 12246.0],       [12247.0, 12248.0, 12249.0, 12250.0, 12251.0, 12252.0],       [12253.0, 12254.0, 12255.0, 12256.0, 12257.0, 12258.0],       [12259.0, 12260.0, 12261.0, 12262.0, 12263.0, 12264.0]],      [[12265.0, 12266.0, 12267.0, 12268.0, 12269.0, 12270.0],       [12271.0, 12272.0, 12273.0, 12274.0, 12275.0, 12276.0],       [12277.0, 12278.0, 12279.0, 12280.0, 12281.0, 12282.0],       [12283.0, 12284.0, 12285.0, 12286.0, 12287.0, 12288.0],       [12289.0, 12290.0, 12291.0, 12292.0, 12293.0, 12294.0],       [12295.0, 12296.0, 12297.0, 12298.0, 12299.0, 12300.0],       [12301.0, 12302.0, 12303.0, 12304.0, 12305.0, 12306.0]],      [[12307.0, 12308.0, 12309.0, 12310.0, 12311.0, 12312.0],       [12313.0, 12314.0, 12315.0, 12316.0, 12317.0, 12318.0],       [12319.0, 12320.0, 12321.0, 12322.0, 12323.0, 12324.0],       [12325.0, 12326.0, 12327.0, 12328.0, 12329.0, 12330.0],       [12331.0, 12332.0, 12333.0, 12334.0, 12335.0, 12336.0],       [12337.0, 12338.0, 12339.0, 12340.0, 12341.0, 12342.0],       [12343.0, 12344.0, 12345.0, 12346.0, 12347.0, 12348.0]]],     [[[12349.0, 12350.0, 12351.0, 12352.0, 12353.0, 12354.0],       [12355.0, 12356.0, 12357.0, 12358.0, 12359.0, 12360.0],       [12361.0, 12362.0, 12363.0, 12364.0, 12365.0, 12366.0],       [12367.0, 12368.0, 12369.0, 12370.0, 12371.0, 12372.0],       [12373.0, 12374.0, 12375.0, 12376.0, 12377.0, 12378.0],       [12379.0, 12380.0, 12381.0, 12382.0, 12383.0, 12384.0],       [12385.0, 12386.0, 12387.0, 12388.0, 12389.0, 12390.0]],      [[12391.0, 12392.0, 12393.0, 12394.0, 12395.0, 12396.0],       [12397.0, 12398.0, 12399.0, 12400.0, 12401.0, 12402.0],       [12403.0, 12404.0, 12405.0, 12406.0, 12407.0, 12408.0],       [12409.0, 12410.0, 12411.0, 12412.0, 12413.0, 12414.0],       [12415.0, 12416.0, 12417.0, 12418.0, 12419.0, 12420.0],       [12421.0, 12422.0, 12423.0, 12424.0, 12425.0, 12426.0],       [12427.0, 12428.0, 12429.0, 12430.0, 12431.0, 12432.0]],      [[12433.0, 12434.0, 12435.0, 12436.0, 12437.0, 12438.0],       [12439.0, 12440.0, 12441.0, 12442.0, 12443.0, 12444.0],       [12445.0, 12446.0, 12447.0, 12448.0, 12449.0, 12450.0],       [12451.0, 12452.0, 12453.0, 12454.0, 12455.0, 12456.0],       [12457.0, 12458.0, 12459.0, 12460.0, 12461.0, 12462.0],       [12463.0, 12464.0, 12465.0, 12466.0, 12467.0, 12468.0],       [12469.0, 12470.0, 12471.0, 12472.0, 12473.0, 12474.0]],      [[12475.0, 12476.0, 12477.0, 12478.0, 12479.0, 12480.0],       [12481.0, 12482.0, 12483.0, 12484.0, 12485.0, 12486.0],       [12487.0, 12488.0, 12489.0, 12490.0, 12491.0, 12492.0],       [12493.0, 12494.0, 12495.0, 12496.0, 12497.0, 12498.0],       [12499.0, 12500.0, 12501.0, 12502.0, 12503.0, 12504.0],       [12505.0, 12506.0, 12507.0, 12508.0, 12509.0, 12510.0],       [12511.0, 12512.0, 12513.0, 12514.0, 12515.0, 12516.0]],      [[12517.0, 12518.0, 12519.0, 12520.0, 12521.0, 12522.0],       [12523.0, 12524.0, 12525.0, 12526.0, 12527.0, 12528.0],       [12529.0, 12530.0, 12531.0, 12532.0, 12533.0, 12534.0],       [12535.0, 12536.0, 12537.0, 12538.0, 12539.0, 12540.0],       [12541.0, 12542.0, 12543.0, 12544.0, 12545.0, 12546.0],       [12547.0, 12548.0, 12549.0, 12550.0, 12551.0, 12552.0],       [12553.0, 12554.0, 12555.0, 12556.0, 12557.0, 12558.0]],      [[12559.0, 12560.0, 12561.0, 12562.0, 12563.0, 12564.0],       [12565.0, 12566.0, 12567.0, 12568.0, 12569.0, 12570.0],       [12571.0, 12572.0, 12573.0, 12574.0, 12575.0, 12576.0],       [12577.0, 12578.0, 12579.0, 12580.0, 12581.0, 12582.0],       [12583.0, 12584.0, 12585.0, 12586.0, 12587.0, 12588.0],       [12589.0, 12590.0, 12591.0, 12592.0, 12593.0, 12594.0],       [12595.0, 12596.0, 12597.0, 12598.0, 12599.0, 12600.0]]],     [[[12601.0, 12602.0, 12603.0, 12604.0, 12605.0, 12606.0],       [12607.0, 12608.0, 12609.0, 12610.0, 12611.0, 12612.0],       [12613.0, 12614.0, 12615.0, 12616.0, 12617.0, 12618.0],       [12619.0, 12620.0, 12621.0, 12622.0, 12623.0, 12624.0],       [12625.0, 12626.0, 12627.0, 12628.0, 12629.0, 12630.0],       [12631.0, 12632.0, 12633.0, 12634.0, 12635.0, 12636.0],       [12637.0, 12638.0, 12639.0, 12640.0, 12641.0, 12642.0]],      [[12643.0, 12644.0, 12645.0, 12646.0, 12647.0, 12648.0],       [12649.0, 12650.0, 12651.0, 12652.0, 12653.0, 12654.0],       [12655.0, 12656.0, 12657.0, 12658.0, 12659.0, 12660.0],       [12661.0, 12662.0, 12663.0, 12664.0, 12665.0, 12666.0],       [12667.0, 12668.0, 12669.0, 12670.0, 12671.0, 12672.0],       [12673.0, 12674.0, 12675.0, 12676.0, 12677.0, 12678.0],       [12679.0, 12680.0, 12681.0, 12682.0, 12683.0, 12684.0]],      [[12685.0, 12686.0, 12687.0, 12688.0, 12689.0, 12690.0],       [12691.0, 12692.0, 12693.0, 12694.0, 12695.0, 12696.0],       [12697.0, 12698.0, 12699.0, 12700.0, 12701.0, 12702.0],       [12703.0, 12704.0, 12705.0, 12706.0, 12707.0, 12708.0],       [12709.0, 12710.0, 12711.0, 12712.0, 12713.0, 12714.0],       [12715.0, 12716.0, 12717.0, 12718.0, 12719.0, 12720.0],       [12721.0, 12722.0, 12723.0, 12724.0, 12725.0, 12726.0]],      [[12727.0, 12728.0, 12729.0, 12730.0, 12731.0, 12732.0],       [12733.0, 12734.0, 12735.0, 12736.0, 12737.0, 12738.0],       [12739.0, 12740.0, 12741.0, 12742.0, 12743.0, 12744.0],       [12745.0, 12746.0, 12747.0, 12748.0, 12749.0, 12750.0],       [12751.0, 12752.0, 12753.0, 12754.0, 12755.0, 12756.0],       [12757.0, 12758.0, 12759.0, 12760.0, 12761.0, 12762.0],       [12763.0, 12764.0, 12765.0, 12766.0, 12767.0, 12768.0]],      [[12769.0, 12770.0, 12771.0, 12772.0, 12773.0, 12774.0],       [12775.0, 12776.0, 12777.0, 12778.0, 12779.0, 12780.0],       [12781.0, 12782.0, 12783.0, 12784.0, 12785.0, 12786.0],       [12787.0, 12788.0, 12789.0, 12790.0, 12791.0, 12792.0],       [12793.0, 12794.0, 12795.0, 12796.0, 12797.0, 12798.0],       [12799.0, 12800.0, 12801.0, 12802.0, 12803.0, 12804.0],       [12805.0, 12806.0, 12807.0, 12808.0, 12809.0, 12810.0]],      [[12811.0, 12812.0, 12813.0, 12814.0, 12815.0, 12816.0],       [12817.0, 12818.0, 12819.0, 12820.0, 12821.0, 12822.0],       [12823.0, 12824.0, 12825.0, 12826.0, 12827.0, 12828.0],       [12829.0, 12830.0, 12831.0, 12832.0, 12833.0, 12834.0],       [12835.0, 12836.0, 12837.0, 12838.0, 12839.0, 12840.0],       [12841.0, 12842.0, 12843.0, 12844.0, 12845.0, 12846.0],       [12847.0, 12848.0, 12849.0, 12850.0, 12851.0, 12852.0]]],     [[[12853.0, 12854.0, 12855.0, 12856.0, 12857.0, 12858.0],       [12859.0, 12860.0, 12861.0, 12862.0, 12863.0, 12864.0],       [12865.0, 12866.0, 12867.0, 12868.0, 12869.0, 12870.0],       [12871.0, 12872.0, 12873.0, 12874.0, 12875.0, 12876.0],       [12877.0, 12878.0, 12879.0, 12880.0, 12881.0, 12882.0],       [12883.0, 12884.0, 12885.0, 12886.0, 12887.0, 12888.0],       [12889.0, 12890.0, 12891.0, 12892.0, 12893.0, 12894.0]],      [[12895.0, 12896.0, 12897.0, 12898.0, 12899.0, 12900.0],       [12901.0, 12902.0, 12903.0, 12904.0, 12905.0, 12906.0],       [12907.0, 12908.0, 12909.0, 12910.0, 12911.0, 12912.0],       [12913.0, 12914.0, 12915.0, 12916.0, 12917.0, 12918.0],       [12919.0, 12920.0, 12921.0, 12922.0, 12923.0, 12924.0],       [12925.0, 12926.0, 12927.0, 12928.0, 12929.0, 12930.0],       [12931.0, 12932.0, 12933.0, 12934.0, 12935.0, 12936.0]],      [[12937.0, 12938.0, 12939.0, 12940.0, 12941.0, 12942.0],       [12943.0, 12944.0, 12945.0, 12946.0, 12947.0, 12948.0],       [12949.0, 12950.0, 12951.0, 12952.0, 12953.0, 12954.0],       [12955.0, 12956.0, 12957.0, 12958.0, 12959.0, 12960.0],       [12961.0, 12962.0, 12963.0, 12964.0, 12965.0, 12966.0],       [12967.0, 12968.0, 12969.0, 12970.0, 12971.0, 12972.0],       [12973.0, 12974.0, 12975.0, 12976.0, 12977.0, 12978.0]],      [[12979.0, 12980.0, 12981.0, 12982.0, 12983.0, 12984.0],       [12985.0, 12986.0, 12987.0, 12988.0, 12989.0, 12990.0],       [12991.0, 12992.0, 12993.0, 12994.0, 12995.0, 12996.0],       [12997.0, 12998.0, 12999.0, 13000.0, 13001.0, 13002.0],       [13003.0, 13004.0, 13005.0, 13006.0, 13007.0, 13008.0],       [13009.0, 13010.0, 13011.0, 13012.0, 13013.0, 13014.0],       [13015.0, 13016.0, 13017.0, 13018.0, 13019.0, 13020.0]],      [[13021.0, 13022.0, 13023.0, 13024.0, 13025.0, 13026.0],       [13027.0, 13028.0, 13029.0, 13030.0, 13031.0, 13032.0],       [13033.0, 13034.0, 13035.0, 13036.0, 13037.0, 13038.0],       [13039.0, 13040.0, 13041.0, 13042.0, 13043.0, 13044.0],       [13045.0, 13046.0, 13047.0, 13048.0, 13049.0, 13050.0],       [13051.0, 13052.0, 13053.0, 13054.0, 13055.0, 13056.0],       [13057.0, 13058.0, 13059.0, 13060.0, 13061.0, 13062.0]],      [[13063.0, 13064.0, 13065.0, 13066.0, 13067.0, 13068.0],       [13069.0, 13070.0, 13071.0, 13072.0, 13073.0, 13074.0],       [13075.0, 13076.0, 13077.0, 13078.0, 13079.0, 13080.0],       [13081.0, 13082.0, 13083.0, 13084.0, 13085.0, 13086.0],       [13087.0, 13088.0, 13089.0, 13090.0, 13091.0, 13092.0],       [13093.0, 13094.0, 13095.0, 13096.0, 13097.0, 13098.0],       [13099.0, 13100.0, 13101.0, 13102.0, 13103.0, 13104.0]]]],    [[[[13105.0, 13106.0, 13107.0, 13108.0, 13109.0, 13110.0],       [13111.0, 13112.0, 13113.0, 13114.0, 13115.0, 13116.0],       [13117.0, 13118.0, 13119.0, 13120.0, 13121.0, 13122.0],       [13123.0, 13124.0, 13125.0, 13126.0, 13127.0, 13128.0],       [13129.0, 13130.0, 13131.0, 13132.0, 13133.0, 13134.0],       [13135.0, 13136.0, 13137.0, 13138.0, 13139.0, 13140.0],       [13141.0, 13142.0, 13143.0, 13144.0, 13145.0, 13146.0]],      [[13147.0, 13148.0, 13149.0, 13150.0, 13151.0, 13152.0],       [13153.0, 13154.0, 13155.0, 13156.0, 13157.0, 13158.0],       [13159.0, 13160.0, 13161.0, 13162.0, 13163.0, 13164.0],       [13165.0, 13166.0, 13167.0, 13168.0, 13169.0, 13170.0],       [13171.0, 13172.0, 13173.0, 13174.0, 13175.0, 13176.0],       [13177.0, 13178.0, 13179.0, 13180.0, 13181.0, 13182.0],       [13183.0, 13184.0, 13185.0, 13186.0, 13187.0, 13188.0]],      [[13189.0, 13190.0, 13191.0, 13192.0, 13193.0, 13194.0],       [13195.0, 13196.0, 13197.0, 13198.0, 13199.0, 13200.0],       [13201.0, 13202.0, 13203.0, 13204.0, 13205.0, 13206.0],       [13207.0, 13208.0, 13209.0, 13210.0, 13211.0, 13212.0],       [13213.0, 13214.0, 13215.0, 13216.0, 13217.0, 13218.0],       [13219.0, 13220.0, 13221.0, 13222.0, 13223.0, 13224.0],       [13225.0, 13226.0, 13227.0, 13228.0, 13229.0, 13230.0]],      [[13231.0, 13232.0, 13233.0, 13234.0, 13235.0, 13236.0],       [13237.0, 13238.0, 13239.0, 13240.0, 13241.0, 13242.0],       [13243.0, 13244.0, 13245.0, 13246.0, 13247.0, 13248.0],       [13249.0, 13250.0, 13251.0, 13252.0, 13253.0, 13254.0],       [13255.0, 13256.0, 13257.0, 13258.0, 13259.0, 13260.0],       [13261.0, 13262.0, 13263.0, 13264.0, 13265.0, 13266.0],       [13267.0, 13268.0, 13269.0, 13270.0, 13271.0, 13272.0]],      [[13273.0, 13274.0, 13275.0, 13276.0, 13277.0, 13278.0],       [13279.0, 13280.0, 13281.0, 13282.0, 13283.0, 13284.0],       [13285.0, 13286.0, 13287.0, 13288.0, 13289.0, 13290.0],       [13291.0, 13292.0, 13293.0, 13294.0, 13295.0, 13296.0],       [13297.0, 13298.0, 13299.0, 13300.0, 13301.0, 13302.0],       [13303.0, 13304.0, 13305.0, 13306.0, 13307.0, 13308.0],       [13309.0, 13310.0, 13311.0, 13312.0, 13313.0, 13314.0]],      [[13315.0, 13316.0, 13317.0, 13318.0, 13319.0, 13320.0],       [13321.0, 13322.0, 13323.0, 13324.0, 13325.0, 13326.0],       [13327.0, 13328.0, 13329.0, 13330.0, 13331.0, 13332.0],       [13333.0, 13334.0, 13335.0, 13336.0, 13337.0, 13338.0],       [13339.0, 13340.0, 13341.0, 13342.0, 13343.0, 13344.0],       [13345.0, 13346.0, 13347.0, 13348.0, 13349.0, 13350.0],       [13351.0, 13352.0, 13353.0, 13354.0, 13355.0, 13356.0]]],     [[[13357.0, 13358.0, 13359.0, 13360.0, 13361.0, 13362.0],       [13363.0, 13364.0, 13365.0, 13366.0, 13367.0, 13368.0],       [13369.0, 13370.0, 13371.0, 13372.0, 13373.0, 13374.0],       [13375.0, 13376.0, 13377.0, 13378.0, 13379.0, 13380.0],       [13381.0, 13382.0, 13383.0, 13384.0, 13385.0, 13386.0],       [13387.0, 13388.0, 13389.0, 13390.0, 13391.0, 13392.0],       [13393.0, 13394.0, 13395.0, 13396.0, 13397.0, 13398.0]],      [[13399.0, 13400.0, 13401.0, 13402.0, 13403.0, 13404.0],       [13405.0, 13406.0, 13407.0, 13408.0, 13409.0, 13410.0],       [13411.0, 13412.0, 13413.0, 13414.0, 13415.0, 13416.0],       [13417.0, 13418.0, 13419.0, 13420.0, 13421.0, 13422.0],       [13423.0, 13424.0, 13425.0, 13426.0, 13427.0, 13428.0],       [13429.0, 13430.0, 13431.0, 13432.0, 13433.0, 13434.0],       [13435.0, 13436.0, 13437.0, 13438.0, 13439.0, 13440.0]],      [[13441.0, 13442.0, 13443.0, 13444.0, 13445.0, 13446.0],       [13447.0, 13448.0, 13449.0, 13450.0, 13451.0, 13452.0],       [13453.0, 13454.0, 13455.0, 13456.0, 13457.0, 13458.0],       [13459.0, 13460.0, 13461.0, 13462.0, 13463.0, 13464.0],       [13465.0, 13466.0, 13467.0, 13468.0, 13469.0, 13470.0],       [13471.0, 13472.0, 13473.0, 13474.0, 13475.0, 13476.0],       [13477.0, 13478.0, 13479.0, 13480.0, 13481.0, 13482.0]],      [[13483.0, 13484.0, 13485.0, 13486.0, 13487.0, 13488.0],       [13489.0, 13490.0, 13491.0, 13492.0, 13493.0, 13494.0],       [13495.0, 13496.0, 13497.0, 13498.0, 13499.0, 13500.0],       [13501.0, 13502.0, 13503.0, 13504.0, 13505.0, 13506.0],       [13507.0, 13508.0, 13509.0, 13510.0, 13511.0, 13512.0],       [13513.0, 13514.0, 13515.0, 13516.0, 13517.0, 13518.0],       [13519.0, 13520.0, 13521.0, 13522.0, 13523.0, 13524.0]],      [[13525.0, 13526.0, 13527.0, 13528.0, 13529.0, 13530.0],       [13531.0, 13532.0, 13533.0, 13534.0, 13535.0, 13536.0],       [13537.0, 13538.0, 13539.0, 13540.0, 13541.0, 13542.0],       [13543.0, 13544.0, 13545.0, 13546.0, 13547.0, 13548.0],       [13549.0, 13550.0, 13551.0, 13552.0, 13553.0, 13554.0],       [13555.0, 13556.0, 13557.0, 13558.0, 13559.0, 13560.0],       [13561.0, 13562.0, 13563.0, 13564.0, 13565.0, 13566.0]],      [[13567.0, 13568.0, 13569.0, 13570.0, 13571.0, 13572.0],       [13573.0, 13574.0, 13575.0, 13576.0, 13577.0, 13578.0],       [13579.0, 13580.0, 13581.0, 13582.0, 13583.0, 13584.0],       [13585.0, 13586.0, 13587.0, 13588.0, 13589.0, 13590.0],       [13591.0, 13592.0, 13593.0, 13594.0, 13595.0, 13596.0],       [13597.0, 13598.0, 13599.0, 13600.0, 13601.0, 13602.0],       [13603.0, 13604.0, 13605.0, 13606.0, 13607.0, 13608.0]]],     [[[13609.0, 13610.0, 13611.0, 13612.0, 13613.0, 13614.0],       [13615.0, 13616.0, 13617.0, 13618.0, 13619.0, 13620.0],       [13621.0, 13622.0, 13623.0, 13624.0, 13625.0, 13626.0],       [13627.0, 13628.0, 13629.0, 13630.0, 13631.0, 13632.0],       [13633.0, 13634.0, 13635.0, 13636.0, 13637.0, 13638.0],       [13639.0, 13640.0, 13641.0, 13642.0, 13643.0, 13644.0],       [13645.0, 13646.0, 13647.0, 13648.0, 13649.0, 13650.0]],      [[13651.0, 13652.0, 13653.0, 13654.0, 13655.0, 13656.0],       [13657.0, 13658.0, 13659.0, 13660.0, 13661.0, 13662.0],       [13663.0, 13664.0, 13665.0, 13666.0, 13667.0, 13668.0],       [13669.0, 13670.0, 13671.0, 13672.0, 13673.0, 13674.0],       [13675.0, 13676.0, 13677.0, 13678.0, 13679.0, 13680.0],       [13681.0, 13682.0, 13683.0, 13684.0, 13685.0, 13686.0],       [13687.0, 13688.0, 13689.0, 13690.0, 13691.0, 13692.0]],      [[13693.0, 13694.0, 13695.0, 13696.0, 13697.0, 13698.0],       [13699.0, 13700.0, 13701.0, 13702.0, 13703.0, 13704.0],       [13705.0, 13706.0, 13707.0, 13708.0, 13709.0, 13710.0],       [13711.0, 13712.0, 13713.0, 13714.0, 13715.0, 13716.0],       [13717.0, 13718.0, 13719.0, 13720.0, 13721.0, 13722.0],       [13723.0, 13724.0, 13725.0, 13726.0, 13727.0, 13728.0],       [13729.0, 13730.0, 13731.0, 13732.0, 13733.0, 13734.0]],      [[13735.0, 13736.0, 13737.0, 13738.0, 13739.0, 13740.0],       [13741.0, 13742.0, 13743.0, 13744.0, 13745.0, 13746.0],       [13747.0, 13748.0, 13749.0, 13750.0, 13751.0, 13752.0],       [13753.0, 13754.0, 13755.0, 13756.0, 13757.0, 13758.0],       [13759.0, 13760.0, 13761.0, 13762.0, 13763.0, 13764.0],       [13765.0, 13766.0, 13767.0, 13768.0, 13769.0, 13770.0],       [13771.0, 13772.0, 13773.0, 13774.0, 13775.0, 13776.0]],      [[13777.0, 13778.0, 13779.0, 13780.0, 13781.0, 13782.0],       [13783.0, 13784.0, 13785.0, 13786.0, 13787.0, 13788.0],       [13789.0, 13790.0, 13791.0, 13792.0, 13793.0, 13794.0],       [13795.0, 13796.0, 13797.0, 13798.0, 13799.0, 13800.0],       [13801.0, 13802.0, 13803.0, 13804.0, 13805.0, 13806.0],       [13807.0, 13808.0, 13809.0, 13810.0, 13811.0, 13812.0],       [13813.0, 13814.0, 13815.0, 13816.0, 13817.0, 13818.0]],      [[13819.0, 13820.0, 13821.0, 13822.0, 13823.0, 13824.0],       [13825.0, 13826.0, 13827.0, 13828.0, 13829.0, 13830.0],       [13831.0, 13832.0, 13833.0, 13834.0, 13835.0, 13836.0],       [13837.0, 13838.0, 13839.0, 13840.0, 13841.0, 13842.0],       [13843.0, 13844.0, 13845.0, 13846.0, 13847.0, 13848.0],       [13849.0, 13850.0, 13851.0, 13852.0, 13853.0, 13854.0],       [13855.0, 13856.0, 13857.0, 13858.0, 13859.0, 13860.0]]],     [[[13861.0, 13862.0, 13863.0, 13864.0, 13865.0, 13866.0],       [13867.0, 13868.0, 13869.0, 13870.0, 13871.0, 13872.0],       [13873.0, 13874.0, 13875.0, 13876.0, 13877.0, 13878.0],       [13879.0, 13880.0, 13881.0, 13882.0, 13883.0, 13884.0],       [13885.0, 13886.0, 13887.0, 13888.0, 13889.0, 13890.0],       [13891.0, 13892.0, 13893.0, 13894.0, 13895.0, 13896.0],       [13897.0, 13898.0, 13899.0, 13900.0, 13901.0, 13902.0]],      [[13903.0, 13904.0, 13905.0, 13906.0, 13907.0, 13908.0],       [13909.0, 13910.0, 13911.0, 13912.0, 13913.0, 13914.0],       [13915.0, 13916.0, 13917.0, 13918.0, 13919.0, 13920.0],       [13921.0, 13922.0, 13923.0, 13924.0, 13925.0, 13926.0],       [13927.0, 13928.0, 13929.0, 13930.0, 13931.0, 13932.0],       [13933.0, 13934.0, 13935.0, 13936.0, 13937.0, 13938.0],       [13939.0, 13940.0, 13941.0, 13942.0, 13943.0, 13944.0]],      [[13945.0, 13946.0, 13947.0, 13948.0, 13949.0, 13950.0],       [13951.0, 13952.0, 13953.0, 13954.0, 13955.0, 13956.0],       [13957.0, 13958.0, 13959.0, 13960.0, 13961.0, 13962.0],       [13963.0, 13964.0, 13965.0, 13966.0, 13967.0, 13968.0],       [13969.0, 13970.0, 13971.0, 13972.0, 13973.0, 13974.0],       [13975.0, 13976.0, 13977.0, 13978.0, 13979.0, 13980.0],       [13981.0, 13982.0, 13983.0, 13984.0, 13985.0, 13986.0]],      [[13987.0, 13988.0, 13989.0, 13990.0, 13991.0, 13992.0],       [13993.0, 13994.0, 13995.0, 13996.0, 13997.0, 13998.0],       [13999.0, 14000.0, 14001.0, 14002.0, 14003.0, 14004.0],       [14005.0, 14006.0, 14007.0, 14008.0, 14009.0, 14010.0],       [14011.0, 14012.0, 14013.0, 14014.0, 14015.0, 14016.0],       [14017.0, 14018.0, 14019.0, 14020.0, 14021.0, 14022.0],       [14023.0, 14024.0, 14025.0, 14026.0, 14027.0, 14028.0]],      [[14029.0, 14030.0, 14031.0, 14032.0, 14033.0, 14034.0],       [14035.0, 14036.0, 14037.0, 14038.0, 14039.0, 14040.0],       [14041.0, 14042.0, 14043.0, 14044.0, 14045.0, 14046.0],       [14047.0, 14048.0, 14049.0, 14050.0, 14051.0, 14052.0],       [14053.0, 14054.0, 14055.0, 14056.0, 14057.0, 14058.0],       [14059.0, 14060.0, 14061.0, 14062.0, 14063.0, 14064.0],       [14065.0, 14066.0, 14067.0, 14068.0, 14069.0, 14070.0]],      [[14071.0, 14072.0, 14073.0, 14074.0, 14075.0, 14076.0],       [14077.0, 14078.0, 14079.0, 14080.0, 14081.0, 14082.0],       [14083.0, 14084.0, 14085.0, 14086.0, 14087.0, 14088.0],       [14089.0, 14090.0, 14091.0, 14092.0, 14093.0, 14094.0],       [14095.0, 14096.0, 14097.0, 14098.0, 14099.0, 14100.0],       [14101.0, 14102.0, 14103.0, 14104.0, 14105.0, 14106.0],       [14107.0, 14108.0, 14109.0, 14110.0, 14111.0, 14112.0]]]],    [[[[14113.0, 14114.0, 14115.0, 14116.0, 14117.0, 14118.0],       [14119.0, 14120.0, 14121.0, 14122.0, 14123.0, 14124.0],       [14125.0, 14126.0, 14127.0, 14128.0, 14129.0, 14130.0],       [14131.0, 14132.0, 14133.0, 14134.0, 14135.0, 14136.0],       [14137.0, 14138.0, 14139.0, 14140.0, 14141.0, 14142.0],       [14143.0, 14144.0, 14145.0, 14146.0, 14147.0, 14148.0],       [14149.0, 14150.0, 14151.0, 14152.0, 14153.0, 14154.0]],      [[14155.0, 14156.0, 14157.0, 14158.0, 14159.0, 14160.0],       [14161.0, 14162.0, 14163.0, 14164.0, 14165.0, 14166.0],       [14167.0, 14168.0, 14169.0, 14170.0, 14171.0, 14172.0],       [14173.0, 14174.0, 14175.0, 14176.0, 14177.0, 14178.0],       [14179.0, 14180.0, 14181.0, 14182.0, 14183.0, 14184.0],       [14185.0, 14186.0, 14187.0, 14188.0, 14189.0, 14190.0],       [14191.0, 14192.0, 14193.0, 14194.0, 14195.0, 14196.0]],      [[14197.0, 14198.0, 14199.0, 14200.0, 14201.0, 14202.0],       [14203.0, 14204.0, 14205.0, 14206.0, 14207.0, 14208.0],       [14209.0, 14210.0, 14211.0, 14212.0, 14213.0, 14214.0],       [14215.0, 14216.0, 14217.0, 14218.0, 14219.0, 14220.0],       [14221.0, 14222.0, 14223.0, 14224.0, 14225.0, 14226.0],       [14227.0, 14228.0, 14229.0, 14230.0, 14231.0, 14232.0],       [14233.0, 14234.0, 14235.0, 14236.0, 14237.0, 14238.0]],      [[14239.0, 14240.0, 14241.0, 14242.0, 14243.0, 14244.0],       [14245.0, 14246.0, 14247.0, 14248.0, 14249.0, 14250.0],       [14251.0, 14252.0, 14253.0, 14254.0, 14255.0, 14256.0],       [14257.0, 14258.0, 14259.0, 14260.0, 14261.0, 14262.0],       [14263.0, 14264.0, 14265.0, 14266.0, 14267.0, 14268.0],       [14269.0, 14270.0, 14271.0, 14272.0, 14273.0, 14274.0],       [14275.0, 14276.0, 14277.0, 14278.0, 14279.0, 14280.0]],      [[14281.0, 14282.0, 14283.0, 14284.0, 14285.0, 14286.0],       [14287.0, 14288.0, 14289.0, 14290.0, 14291.0, 14292.0],       [14293.0, 14294.0, 14295.0, 14296.0, 14297.0, 14298.0],       [14299.0, 14300.0, 14301.0, 14302.0, 14303.0, 14304.0],       [14305.0, 14306.0, 14307.0, 14308.0, 14309.0, 14310.0],       [14311.0, 14312.0, 14313.0, 14314.0, 14315.0, 14316.0],       [14317.0, 14318.0, 14319.0, 14320.0, 14321.0, 14322.0]],      [[14323.0, 14324.0, 14325.0, 14326.0, 14327.0, 14328.0],       [14329.0, 14330.0, 14331.0, 14332.0, 14333.0, 14334.0],       [14335.0, 14336.0, 14337.0, 14338.0, 14339.0, 14340.0],       [14341.0, 14342.0, 14343.0, 14344.0, 14345.0, 14346.0],       [14347.0, 14348.0, 14349.0, 14350.0, 14351.0, 14352.0],       [14353.0, 14354.0, 14355.0, 14356.0, 14357.0, 14358.0],       [14359.0, 14360.0, 14361.0, 14362.0, 14363.0, 14364.0]]],     [[[14365.0, 14366.0, 14367.0, 14368.0, 14369.0, 14370.0],       [14371.0, 14372.0, 14373.0, 14374.0, 14375.0, 14376.0],       [14377.0, 14378.0, 14379.0, 14380.0, 14381.0, 14382.0],       [14383.0, 14384.0, 14385.0, 14386.0, 14387.0, 14388.0],       [14389.0, 14390.0, 14391.0, 14392.0, 14393.0, 14394.0],       [14395.0, 14396.0, 14397.0, 14398.0, 14399.0, 14400.0],       [14401.0, 14402.0, 14403.0, 14404.0, 14405.0, 14406.0]],      [[14407.0, 14408.0, 14409.0, 14410.0, 14411.0, 14412.0],       [14413.0, 14414.0, 14415.0, 14416.0, 14417.0, 14418.0],       [14419.0, 14420.0, 14421.0, 14422.0, 14423.0, 14424.0],       [14425.0, 14426.0, 14427.0, 14428.0, 14429.0, 14430.0],       [14431.0, 14432.0, 14433.0, 14434.0, 14435.0, 14436.0],       [14437.0, 14438.0, 14439.0, 14440.0, 14441.0, 14442.0],       [14443.0, 14444.0, 14445.0, 14446.0, 14447.0, 14448.0]],      [[14449.0, 14450.0, 14451.0, 14452.0, 14453.0, 14454.0],       [14455.0, 14456.0, 14457.0, 14458.0, 14459.0, 14460.0],       [14461.0, 14462.0, 14463.0, 14464.0, 14465.0, 14466.0],       [14467.0, 14468.0, 14469.0, 14470.0, 14471.0, 14472.0],       [14473.0, 14474.0, 14475.0, 14476.0, 14477.0, 14478.0],       [14479.0, 14480.0, 14481.0, 14482.0, 14483.0, 14484.0],       [14485.0, 14486.0, 14487.0, 14488.0, 14489.0, 14490.0]],      [[14491.0, 14492.0, 14493.0, 14494.0, 14495.0, 14496.0],       [14497.0, 14498.0, 14499.0, 14500.0, 14501.0, 14502.0],       [14503.0, 14504.0, 14505.0, 14506.0, 14507.0, 14508.0],       [14509.0, 14510.0, 14511.0, 14512.0, 14513.0, 14514.0],       [14515.0, 14516.0, 14517.0, 14518.0, 14519.0, 14520.0],       [14521.0, 14522.0, 14523.0, 14524.0, 14525.0, 14526.0],       [14527.0, 14528.0, 14529.0, 14530.0, 14531.0, 14532.0]],      [[14533.0, 14534.0, 14535.0, 14536.0, 14537.0, 14538.0],       [14539.0, 14540.0, 14541.0, 14542.0, 14543.0, 14544.0],       [14545.0, 14546.0, 14547.0, 14548.0, 14549.0, 14550.0],       [14551.0, 14552.0, 14553.0, 14554.0, 14555.0, 14556.0],       [14557.0, 14558.0, 14559.0, 14560.0, 14561.0, 14562.0],       [14563.0, 14564.0, 14565.0, 14566.0, 14567.0, 14568.0],       [14569.0, 14570.0, 14571.0, 14572.0, 14573.0, 14574.0]],      [[14575.0, 14576.0, 14577.0, 14578.0, 14579.0, 14580.0],       [14581.0, 14582.0, 14583.0, 14584.0, 14585.0, 14586.0],       [14587.0, 14588.0, 14589.0, 14590.0, 14591.0, 14592.0],       [14593.0, 14594.0, 14595.0, 14596.0, 14597.0, 14598.0],       [14599.0, 14600.0, 14601.0, 14602.0, 14603.0, 14604.0],       [14605.0, 14606.0, 14607.0, 14608.0, 14609.0, 14610.0],       [14611.0, 14612.0, 14613.0, 14614.0, 14615.0, 14616.0]]],     [[[14617.0, 14618.0, 14619.0, 14620.0, 14621.0, 14622.0],       [14623.0, 14624.0, 14625.0, 14626.0, 14627.0, 14628.0],       [14629.0, 14630.0, 14631.0, 14632.0, 14633.0, 14634.0],       [14635.0, 14636.0, 14637.0, 14638.0, 14639.0, 14640.0],       [14641.0, 14642.0, 14643.0, 14644.0, 14645.0, 14646.0],       [14647.0, 14648.0, 14649.0, 14650.0, 14651.0, 14652.0],       [14653.0, 14654.0, 14655.0, 14656.0, 14657.0, 14658.0]],      [[14659.0, 14660.0, 14661.0, 14662.0, 14663.0, 14664.0],       [14665.0, 14666.0, 14667.0, 14668.0, 14669.0, 14670.0],       [14671.0, 14672.0, 14673.0, 14674.0, 14675.0, 14676.0],       [14677.0, 14678.0, 14679.0, 14680.0, 14681.0, 14682.0],       [14683.0, 14684.0, 14685.0, 14686.0, 14687.0, 14688.0],       [14689.0, 14690.0, 14691.0, 14692.0, 14693.0, 14694.0],       [14695.0, 14696.0, 14697.0, 14698.0, 14699.0, 14700.0]],      [[14701.0, 14702.0, 14703.0, 14704.0, 14705.0, 14706.0],       [14707.0, 14708.0, 14709.0, 14710.0, 14711.0, 14712.0],       [14713.0, 14714.0, 14715.0, 14716.0, 14717.0, 14718.0],       [14719.0, 14720.0, 14721.0, 14722.0, 14723.0, 14724.0],       [14725.0, 14726.0, 14727.0, 14728.0, 14729.0, 14730.0],       [14731.0, 14732.0, 14733.0, 14734.0, 14735.0, 14736.0],       [14737.0, 14738.0, 14739.0, 14740.0, 14741.0, 14742.0]],      [[14743.0, 14744.0, 14745.0, 14746.0, 14747.0, 14748.0],       [14749.0, 14750.0, 14751.0, 14752.0, 14753.0, 14754.0],       [14755.0, 14756.0, 14757.0, 14758.0, 14759.0, 14760.0],       [14761.0, 14762.0, 14763.0, 14764.0, 14765.0, 14766.0],       [14767.0, 14768.0, 14769.0, 14770.0, 14771.0, 14772.0],       [14773.0, 14774.0, 14775.0, 14776.0, 14777.0, 14778.0],       [14779.0, 14780.0, 14781.0, 14782.0, 14783.0, 14784.0]],      [[14785.0, 14786.0, 14787.0, 14788.0, 14789.0, 14790.0],       [14791.0, 14792.0, 14793.0, 14794.0, 14795.0, 14796.0],       [14797.0, 14798.0, 14799.0, 14800.0, 14801.0, 14802.0],       [14803.0, 14804.0, 14805.0, 14806.0, 14807.0, 14808.0],       [14809.0, 14810.0, 14811.0, 14812.0, 14813.0, 14814.0],       [14815.0, 14816.0, 14817.0, 14818.0, 14819.0, 14820.0],       [14821.0, 14822.0, 14823.0, 14824.0, 14825.0, 14826.0]],      [[14827.0, 14828.0, 14829.0, 14830.0, 14831.0, 14832.0],       [14833.0, 14834.0, 14835.0, 14836.0, 14837.0, 14838.0],       [14839.0, 14840.0, 14841.0, 14842.0, 14843.0, 14844.0],       [14845.0, 14846.0, 14847.0, 14848.0, 14849.0, 14850.0],       [14851.0, 14852.0, 14853.0, 14854.0, 14855.0, 14856.0],       [14857.0, 14858.0, 14859.0, 14860.0, 14861.0, 14862.0],       [14863.0, 14864.0, 14865.0, 14866.0, 14867.0, 14868.0]]],     [[[14869.0, 14870.0, 14871.0, 14872.0, 14873.0, 14874.0],       [14875.0, 14876.0, 14877.0, 14878.0, 14879.0, 14880.0],       [14881.0, 14882.0, 14883.0, 14884.0, 14885.0, 14886.0],       [14887.0, 14888.0, 14889.0, 14890.0, 14891.0, 14892.0],       [14893.0, 14894.0, 14895.0, 14896.0, 14897.0, 14898.0],       [14899.0, 14900.0, 14901.0, 14902.0, 14903.0, 14904.0],       [14905.0, 14906.0, 14907.0, 14908.0, 14909.0, 14910.0]],      [[14911.0, 14912.0, 14913.0, 14914.0, 14915.0, 14916.0],       [14917.0, 14918.0, 14919.0, 14920.0, 14921.0, 14922.0],       [14923.0, 14924.0, 14925.0, 14926.0, 14927.0, 14928.0],       [14929.0, 14930.0, 14931.0, 14932.0, 14933.0, 14934.0],       [14935.0, 14936.0, 14937.0, 14938.0, 14939.0, 14940.0],       [14941.0, 14942.0, 14943.0, 14944.0, 14945.0, 14946.0],       [14947.0, 14948.0, 14949.0, 14950.0, 14951.0, 14952.0]],      [[14953.0, 14954.0, 14955.0, 14956.0, 14957.0, 14958.0],       [14959.0, 14960.0, 14961.0, 14962.0, 14963.0, 14964.0],       [14965.0, 14966.0, 14967.0, 14968.0, 14969.0, 14970.0],       [14971.0, 14972.0, 14973.0, 14974.0, 14975.0, 14976.0],       [14977.0, 14978.0, 14979.0, 14980.0, 14981.0, 14982.0],       [14983.0, 14984.0, 14985.0, 14986.0, 14987.0, 14988.0],       [14989.0, 14990.0, 14991.0, 14992.0, 14993.0, 14994.0]],      [[14995.0, 14996.0, 14997.0, 14998.0, 14999.0, 15000.0],       [15001.0, 15002.0, 15003.0, 15004.0, 15005.0, 15006.0],       [15007.0, 15008.0, 15009.0, 15010.0, 15011.0, 15012.0],       [15013.0, 15014.0, 15015.0, 15016.0, 15017.0, 15018.0],       [15019.0, 15020.0, 15021.0, 15022.0, 15023.0, 15024.0],       [15025.0, 15026.0, 15027.0, 15028.0, 15029.0, 15030.0],       [15031.0, 15032.0, 15033.0, 15034.0, 15035.0, 15036.0]],      [[15037.0, 15038.0, 15039.0, 15040.0, 15041.0, 15042.0],       [15043.0, 15044.0, 15045.0, 15046.0, 15047.0, 15048.0],       [15049.0, 15050.0, 15051.0, 15052.0, 15053.0, 15054.0],       [15055.0, 15056.0, 15057.0, 15058.0, 15059.0, 15060.0],       [15061.0, 15062.0, 15063.0, 15064.0, 15065.0, 15066.0],       [15067.0, 15068.0, 15069.0, 15070.0, 15071.0, 15072.0],       [15073.0, 15074.0, 15075.0, 15076.0, 15077.0, 15078.0]],      [[15079.0, 15080.0, 15081.0, 15082.0, 15083.0, 15084.0],       [15085.0, 15086.0, 15087.0, 15088.0, 15089.0, 15090.0],       [15091.0, 15092.0, 15093.0, 15094.0, 15095.0, 15096.0],       [15097.0, 15098.0, 15099.0, 15100.0, 15101.0, 15102.0],       [15103.0, 15104.0, 15105.0, 15106.0, 15107.0, 15108.0],       [15109.0, 15110.0, 15111.0, 15112.0, 15113.0, 15114.0],       [15115.0, 15116.0, 15117.0, 15118.0, 15119.0, 15120.0]]]],    [[[[15121.0, 15122.0, 15123.0, 15124.0, 15125.0, 15126.0],       [15127.0, 15128.0, 15129.0, 15130.0, 15131.0, 15132.0],       [15133.0, 15134.0, 15135.0, 15136.0, 15137.0, 15138.0],       [15139.0, 15140.0, 15141.0, 15142.0, 15143.0, 15144.0],       [15145.0, 15146.0, 15147.0, 15148.0, 15149.0, 15150.0],       [15151.0, 15152.0, 15153.0, 15154.0, 15155.0, 15156.0],       [15157.0, 15158.0, 15159.0, 15160.0, 15161.0, 15162.0]],      [[15163.0, 15164.0, 15165.0, 15166.0, 15167.0, 15168.0],       [15169.0, 15170.0, 15171.0, 15172.0, 15173.0, 15174.0],       [15175.0, 15176.0, 15177.0, 15178.0, 15179.0, 15180.0],       [15181.0, 15182.0, 15183.0, 15184.0, 15185.0, 15186.0],       [15187.0, 15188.0, 15189.0, 15190.0, 15191.0, 15192.0],       [15193.0, 15194.0, 15195.0, 15196.0, 15197.0, 15198.0],       [15199.0, 15200.0, 15201.0, 15202.0, 15203.0, 15204.0]],      [[15205.0, 15206.0, 15207.0, 15208.0, 15209.0, 15210.0],       [15211.0, 15212.0, 15213.0, 15214.0, 15215.0, 15216.0],       [15217.0, 15218.0, 15219.0, 15220.0, 15221.0, 15222.0],       [15223.0, 15224.0, 15225.0, 15226.0, 15227.0, 15228.0],       [15229.0, 15230.0, 15231.0, 15232.0, 15233.0, 15234.0],       [15235.0, 15236.0, 15237.0, 15238.0, 15239.0, 15240.0],       [15241.0, 15242.0, 15243.0, 15244.0, 15245.0, 15246.0]],      [[15247.0, 15248.0, 15249.0, 15250.0, 15251.0, 15252.0],       [15253.0, 15254.0, 15255.0, 15256.0, 15257.0, 15258.0],       [15259.0, 15260.0, 15261.0, 15262.0, 15263.0, 15264.0],       [15265.0, 15266.0, 15267.0, 15268.0, 15269.0, 15270.0],       [15271.0, 15272.0, 15273.0, 15274.0, 15275.0, 15276.0],       [15277.0, 15278.0, 15279.0, 15280.0, 15281.0, 15282.0],       [15283.0, 15284.0, 15285.0, 15286.0, 15287.0, 15288.0]],      [[15289.0, 15290.0, 15291.0, 15292.0, 15293.0, 15294.0],       [15295.0, 15296.0, 15297.0, 15298.0, 15299.0, 15300.0],       [15301.0, 15302.0, 15303.0, 15304.0, 15305.0, 15306.0],       [15307.0, 15308.0, 15309.0, 15310.0, 15311.0, 15312.0],       [15313.0, 15314.0, 15315.0, 15316.0, 15317.0, 15318.0],       [15319.0, 15320.0, 15321.0, 15322.0, 15323.0, 15324.0],       [15325.0, 15326.0, 15327.0, 15328.0, 15329.0, 15330.0]],      [[15331.0, 15332.0, 15333.0, 15334.0, 15335.0, 15336.0],       [15337.0, 15338.0, 15339.0, 15340.0, 15341.0, 15342.0],       [15343.0, 15344.0, 15345.0, 15346.0, 15347.0, 15348.0],       [15349.0, 15350.0, 15351.0, 15352.0, 15353.0, 15354.0],       [15355.0, 15356.0, 15357.0, 15358.0, 15359.0, 15360.0],       [15361.0, 15362.0, 15363.0, 15364.0, 15365.0, 15366.0],       [15367.0, 15368.0, 15369.0, 15370.0, 15371.0, 15372.0]]],     [[[15373.0, 15374.0, 15375.0, 15376.0, 15377.0, 15378.0],       [15379.0, 15380.0, 15381.0, 15382.0, 15383.0, 15384.0],       [15385.0, 15386.0, 15387.0, 15388.0, 15389.0, 15390.0],       [15391.0, 15392.0, 15393.0, 15394.0, 15395.0, 15396.0],       [15397.0, 15398.0, 15399.0, 15400.0, 15401.0, 15402.0],       [15403.0, 15404.0, 15405.0, 15406.0, 15407.0, 15408.0],       [15409.0, 15410.0, 15411.0, 15412.0, 15413.0, 15414.0]],      [[15415.0, 15416.0, 15417.0, 15418.0, 15419.0, 15420.0],       [15421.0, 15422.0, 15423.0, 15424.0, 15425.0, 15426.0],       [15427.0, 15428.0, 15429.0, 15430.0, 15431.0, 15432.0],       [15433.0, 15434.0, 15435.0, 15436.0, 15437.0, 15438.0],       [15439.0, 15440.0, 15441.0, 15442.0, 15443.0, 15444.0],       [15445.0, 15446.0, 15447.0, 15448.0, 15449.0, 15450.0],       [15451.0, 15452.0, 15453.0, 15454.0, 15455.0, 15456.0]],      [[15457.0, 15458.0, 15459.0, 15460.0, 15461.0, 15462.0],       [15463.0, 15464.0, 15465.0, 15466.0, 15467.0, 15468.0],       [15469.0, 15470.0, 15471.0, 15472.0, 15473.0, 15474.0],       [15475.0, 15476.0, 15477.0, 15478.0, 15479.0, 15480.0],       [15481.0, 15482.0, 15483.0, 15484.0, 15485.0, 15486.0],       [15487.0, 15488.0, 15489.0, 15490.0, 15491.0, 15492.0],       [15493.0, 15494.0, 15495.0, 15496.0, 15497.0, 15498.0]],      [[15499.0, 15500.0, 15501.0, 15502.0, 15503.0, 15504.0],       [15505.0, 15506.0, 15507.0, 15508.0, 15509.0, 15510.0],       [15511.0, 15512.0, 15513.0, 15514.0, 15515.0, 15516.0],       [15517.0, 15518.0, 15519.0, 15520.0, 15521.0, 15522.0],       [15523.0, 15524.0, 15525.0, 15526.0, 15527.0, 15528.0],       [15529.0, 15530.0, 15531.0, 15532.0, 15533.0, 15534.0],       [15535.0, 15536.0, 15537.0, 15538.0, 15539.0, 15540.0]],      [[15541.0, 15542.0, 15543.0, 15544.0, 15545.0, 15546.0],       [15547.0, 15548.0, 15549.0, 15550.0, 15551.0, 15552.0],       [15553.0, 15554.0, 15555.0, 15556.0, 15557.0, 15558.0],       [15559.0, 15560.0, 15561.0, 15562.0, 15563.0, 15564.0],       [15565.0, 15566.0, 15567.0, 15568.0, 15569.0, 15570.0],       [15571.0, 15572.0, 15573.0, 15574.0, 15575.0, 15576.0],       [15577.0, 15578.0, 15579.0, 15580.0, 15581.0, 15582.0]],      [[15583.0, 15584.0, 15585.0, 15586.0, 15587.0, 15588.0],       [15589.0, 15590.0, 15591.0, 15592.0, 15593.0, 15594.0],       [15595.0, 15596.0, 15597.0, 15598.0, 15599.0, 15600.0],       [15601.0, 15602.0, 15603.0, 15604.0, 15605.0, 15606.0],       [15607.0, 15608.0, 15609.0, 15610.0, 15611.0, 15612.0],       [15613.0, 15614.0, 15615.0, 15616.0, 15617.0, 15618.0],       [15619.0, 15620.0, 15621.0, 15622.0, 15623.0, 15624.0]]],     [[[15625.0, 15626.0, 15627.0, 15628.0, 15629.0, 15630.0],       [15631.0, 15632.0, 15633.0, 15634.0, 15635.0, 15636.0],       [15637.0, 15638.0, 15639.0, 15640.0, 15641.0, 15642.0],       [15643.0, 15644.0, 15645.0, 15646.0, 15647.0, 15648.0],       [15649.0, 15650.0, 15651.0, 15652.0, 15653.0, 15654.0],       [15655.0, 15656.0, 15657.0, 15658.0, 15659.0, 15660.0],       [15661.0, 15662.0, 15663.0, 15664.0, 15665.0, 15666.0]],      [[15667.0, 15668.0, 15669.0, 15670.0, 15671.0, 15672.0],       [15673.0, 15674.0, 15675.0, 15676.0, 15677.0, 15678.0],       [15679.0, 15680.0, 15681.0, 15682.0, 15683.0, 15684.0],       [15685.0, 15686.0, 15687.0, 15688.0, 15689.0, 15690.0],       [15691.0, 15692.0, 15693.0, 15694.0, 15695.0, 15696.0],       [15697.0, 15698.0, 15699.0, 15700.0, 15701.0, 15702.0],       [15703.0, 15704.0, 15705.0, 15706.0, 15707.0, 15708.0]],      [[15709.0, 15710.0, 15711.0, 15712.0, 15713.0, 15714.0],       [15715.0, 15716.0, 15717.0, 15718.0, 15719.0, 15720.0],       [15721.0, 15722.0, 15723.0, 15724.0, 15725.0, 15726.0],       [15727.0, 15728.0, 15729.0, 15730.0, 15731.0, 15732.0],       [15733.0, 15734.0, 15735.0, 15736.0, 15737.0, 15738.0],       [15739.0, 15740.0, 15741.0, 15742.0, 15743.0, 15744.0],       [15745.0, 15746.0, 15747.0, 15748.0, 15749.0, 15750.0]],      [[15751.0, 15752.0, 15753.0, 15754.0, 15755.0, 15756.0],       [15757.0, 15758.0, 15759.0, 15760.0, 15761.0, 15762.0],       [15763.0, 15764.0, 15765.0, 15766.0, 15767.0, 15768.0],       [15769.0, 15770.0, 15771.0, 15772.0, 15773.0, 15774.0],       [15775.0, 15776.0, 15777.0, 15778.0, 15779.0, 15780.0],       [15781.0, 15782.0, 15783.0, 15784.0, 15785.0, 15786.0],       [15787.0, 15788.0, 15789.0, 15790.0, 15791.0, 15792.0]],      [[15793.0, 15794.0, 15795.0, 15796.0, 15797.0, 15798.0],       [15799.0, 15800.0, 15801.0, 15802.0, 15803.0, 15804.0],       [15805.0, 15806.0, 15807.0, 15808.0, 15809.0, 15810.0],       [15811.0, 15812.0, 15813.0, 15814.0, 15815.0, 15816.0],       [15817.0, 15818.0, 15819.0, 15820.0, 15821.0, 15822.0],       [15823.0, 15824.0, 15825.0, 15826.0, 15827.0, 15828.0],       [15829.0, 15830.0, 15831.0, 15832.0, 15833.0, 15834.0]],      [[15835.0, 15836.0, 15837.0, 15838.0, 15839.0, 15840.0],       [15841.0, 15842.0, 15843.0, 15844.0, 15845.0, 15846.0],       [15847.0, 15848.0, 15849.0, 15850.0, 15851.0, 15852.0],       [15853.0, 15854.0, 15855.0, 15856.0, 15857.0, 15858.0],       [15859.0, 15860.0, 15861.0, 15862.0, 15863.0, 15864.0],       [15865.0, 15866.0, 15867.0, 15868.0, 15869.0, 15870.0],       [15871.0, 15872.0, 15873.0, 15874.0, 15875.0, 15876.0]]],     [[[15877.0, 15878.0, 15879.0, 15880.0, 15881.0, 15882.0],       [15883.0, 15884.0, 15885.0, 15886.0, 15887.0, 15888.0],       [15889.0, 15890.0, 15891.0, 15892.0, 15893.0, 15894.0],       [15895.0, 15896.0, 15897.0, 15898.0, 15899.0, 15900.0],       [15901.0, 15902.0, 15903.0, 15904.0, 15905.0, 15906.0],       [15907.0, 15908.0, 15909.0, 15910.0, 15911.0, 15912.0],       [15913.0, 15914.0, 15915.0, 15916.0, 15917.0, 15918.0]],      [[15919.0, 15920.0, 15921.0, 15922.0, 15923.0, 15924.0],       [15925.0, 15926.0, 15927.0, 15928.0, 15929.0, 15930.0],       [15931.0, 15932.0, 15933.0, 15934.0, 15935.0, 15936.0],       [15937.0, 15938.0, 15939.0, 15940.0, 15941.0, 15942.0],       [15943.0, 15944.0, 15945.0, 15946.0, 15947.0, 15948.0],       [15949.0, 15950.0, 15951.0, 15952.0, 15953.0, 15954.0],       [15955.0, 15956.0, 15957.0, 15958.0, 15959.0, 15960.0]],      [[15961.0, 15962.0, 15963.0, 15964.0, 15965.0, 15966.0],       [15967.0, 15968.0, 15969.0, 15970.0, 15971.0, 15972.0],       [15973.0, 15974.0, 15975.0, 15976.0, 15977.0, 15978.0],       [15979.0, 15980.0, 15981.0, 15982.0, 15983.0, 15984.0],       [15985.0, 15986.0, 15987.0, 15988.0, 15989.0, 15990.0],       [15991.0, 15992.0, 15993.0, 15994.0, 15995.0, 15996.0],       [15997.0, 15998.0, 15999.0, 16000.0, 16001.0, 16002.0]],      [[16003.0, 16004.0, 16005.0, 16006.0, 16007.0, 16008.0],       [16009.0, 16010.0, 16011.0, 16012.0, 16013.0, 16014.0],       [16015.0, 16016.0, 16017.0, 16018.0, 16019.0, 16020.0],       [16021.0, 16022.0, 16023.0, 16024.0, 16025.0, 16026.0],       [16027.0, 16028.0, 16029.0, 16030.0, 16031.0, 16032.0],       [16033.0, 16034.0, 16035.0, 16036.0, 16037.0, 16038.0],       [16039.0, 16040.0, 16041.0, 16042.0, 16043.0, 16044.0]],      [[16045.0, 16046.0, 16047.0, 16048.0, 16049.0, 16050.0],       [16051.0, 16052.0, 16053.0, 16054.0, 16055.0, 16056.0],       [16057.0, 16058.0, 16059.0, 16060.0, 16061.0, 16062.0],       [16063.0, 16064.0, 16065.0, 16066.0, 16067.0, 16068.0],       [16069.0, 16070.0, 16071.0, 16072.0, 16073.0, 16074.0],       [16075.0, 16076.0, 16077.0, 16078.0, 16079.0, 16080.0],       [16081.0, 16082.0, 16083.0, 16084.0, 16085.0, 16086.0]],      [[16087.0, 16088.0, 16089.0, 16090.0, 16091.0, 16092.0],       [16093.0, 16094.0, 16095.0, 16096.0, 16097.0, 16098.0],       [16099.0, 16100.0, 16101.0, 16102.0, 16103.0, 16104.0],       [16105.0, 16106.0, 16107.0, 16108.0, 16109.0, 16110.0],       [16111.0, 16112.0, 16113.0, 16114.0, 16115.0, 16116.0],       [16117.0, 16118.0, 16119.0, 16120.0, 16121.0, 16122.0],       [16123.0, 16124.0, 16125.0, 16126.0, 16127.0, 16128.0]]]],    [[[[16129.0, 16130.0, 16131.0, 16132.0, 16133.0, 16134.0],       [16135.0, 16136.0, 16137.0, 16138.0, 16139.0, 16140.0],       [16141.0, 16142.0, 16143.0, 16144.0, 16145.0, 16146.0],       [16147.0, 16148.0, 16149.0, 16150.0, 16151.0, 16152.0],       [16153.0, 16154.0, 16155.0, 16156.0, 16157.0, 16158.0],       [16159.0, 16160.0, 16161.0, 16162.0, 16163.0, 16164.0],       [16165.0, 16166.0, 16167.0, 16168.0, 16169.0, 16170.0]],      [[16171.0, 16172.0, 16173.0, 16174.0, 16175.0, 16176.0],       [16177.0, 16178.0, 16179.0, 16180.0, 16181.0, 16182.0],       [16183.0, 16184.0, 16185.0, 16186.0, 16187.0, 16188.0],       [16189.0, 16190.0, 16191.0, 16192.0, 16193.0, 16194.0],       [16195.0, 16196.0, 16197.0, 16198.0, 16199.0, 16200.0],       [16201.0, 16202.0, 16203.0, 16204.0, 16205.0, 16206.0],       [16207.0, 16208.0, 16209.0, 16210.0, 16211.0, 16212.0]],      [[16213.0, 16214.0, 16215.0, 16216.0, 16217.0, 16218.0],       [16219.0, 16220.0, 16221.0, 16222.0, 16223.0, 16224.0],       [16225.0, 16226.0, 16227.0, 16228.0, 16229.0, 16230.0],       [16231.0, 16232.0, 16233.0, 16234.0, 16235.0, 16236.0],       [16237.0, 16238.0, 16239.0, 16240.0, 16241.0, 16242.0],       [16243.0, 16244.0, 16245.0, 16246.0, 16247.0, 16248.0],       [16249.0, 16250.0, 16251.0, 16252.0, 16253.0, 16254.0]],      [[16255.0, 16256.0, 16257.0, 16258.0, 16259.0, 16260.0],       [16261.0, 16262.0, 16263.0, 16264.0, 16265.0, 16266.0],       [16267.0, 16268.0, 16269.0, 16270.0, 16271.0, 16272.0],       [16273.0, 16274.0, 16275.0, 16276.0, 16277.0, 16278.0],       [16279.0, 16280.0, 16281.0, 16282.0, 16283.0, 16284.0],       [16285.0, 16286.0, 16287.0, 16288.0, 16289.0, 16290.0],       [16291.0, 16292.0, 16293.0, 16294.0, 16295.0, 16296.0]],      [[16297.0, 16298.0, 16299.0, 16300.0, 16301.0, 16302.0],       [16303.0, 16304.0, 16305.0, 16306.0, 16307.0, 16308.0],       [16309.0, 16310.0, 16311.0, 16312.0, 16313.0, 16314.0],       [16315.0, 16316.0, 16317.0, 16318.0, 16319.0, 16320.0],       [16321.0, 16322.0, 16323.0, 16324.0, 16325.0, 16326.0],       [16327.0, 16328.0, 16329.0, 16330.0, 16331.0, 16332.0],       [16333.0, 16334.0, 16335.0, 16336.0, 16337.0, 16338.0]],      [[16339.0, 16340.0, 16341.0, 16342.0, 16343.0, 16344.0],       [16345.0, 16346.0, 16347.0, 16348.0, 16349.0, 16350.0],       [16351.0, 16352.0, 16353.0, 16354.0, 16355.0, 16356.0],       [16357.0, 16358.0, 16359.0, 16360.0, 16361.0, 16362.0],       [16363.0, 16364.0, 16365.0, 16366.0, 16367.0, 16368.0],       [16369.0, 16370.0, 16371.0, 16372.0, 16373.0, 16374.0],       [16375.0, 16376.0, 16377.0, 16378.0, 16379.0, 16380.0]]],     [[[16381.0, 16382.0, 16383.0, 16384.0, 16385.0, 16386.0],       [16387.0, 16388.0, 16389.0, 16390.0, 16391.0, 16392.0],       [16393.0, 16394.0, 16395.0, 16396.0, 16397.0, 16398.0],       [16399.0, 16400.0, 16401.0, 16402.0, 16403.0, 16404.0],       [16405.0, 16406.0, 16407.0, 16408.0, 16409.0, 16410.0],       [16411.0, 16412.0, 16413.0, 16414.0, 16415.0, 16416.0],       [16417.0, 16418.0, 16419.0, 16420.0, 16421.0, 16422.0]],      [[16423.0, 16424.0, 16425.0, 16426.0, 16427.0, 16428.0],       [16429.0, 16430.0, 16431.0, 16432.0, 16433.0, 16434.0],       [16435.0, 16436.0, 16437.0, 16438.0, 16439.0, 16440.0],       [16441.0, 16442.0, 16443.0, 16444.0, 16445.0, 16446.0],       [16447.0, 16448.0, 16449.0, 16450.0, 16451.0, 16452.0],       [16453.0, 16454.0, 16455.0, 16456.0, 16457.0, 16458.0],       [16459.0, 16460.0, 16461.0, 16462.0, 16463.0, 16464.0]],      [[16465.0, 16466.0, 16467.0, 16468.0, 16469.0, 16470.0],       [16471.0, 16472.0, 16473.0, 16474.0, 16475.0, 16476.0],       [16477.0, 16478.0, 16479.0, 16480.0, 16481.0, 16482.0],       [16483.0, 16484.0, 16485.0, 16486.0, 16487.0, 16488.0],       [16489.0, 16490.0, 16491.0, 16492.0, 16493.0, 16494.0],       [16495.0, 16496.0, 16497.0, 16498.0, 16499.0, 16500.0],       [16501.0, 16502.0, 16503.0, 16504.0, 16505.0, 16506.0]],      [[16507.0, 16508.0, 16509.0, 16510.0, 16511.0, 16512.0],       [16513.0, 16514.0, 16515.0, 16516.0, 16517.0, 16518.0],       [16519.0, 16520.0, 16521.0, 16522.0, 16523.0, 16524.0],       [16525.0, 16526.0, 16527.0, 16528.0, 16529.0, 16530.0],       [16531.0, 16532.0, 16533.0, 16534.0, 16535.0, 16536.0],       [16537.0, 16538.0, 16539.0, 16540.0, 16541.0, 16542.0],       [16543.0, 16544.0, 16545.0, 16546.0, 16547.0, 16548.0]],      [[16549.0, 16550.0, 16551.0, 16552.0, 16553.0, 16554.0],       [16555.0, 16556.0, 16557.0, 16558.0, 16559.0, 16560.0],       [16561.0, 16562.0, 16563.0, 16564.0, 16565.0, 16566.0],       [16567.0, 16568.0, 16569.0, 16570.0, 16571.0, 16572.0],       [16573.0, 16574.0, 16575.0, 16576.0, 16577.0, 16578.0],       [16579.0, 16580.0, 16581.0, 16582.0, 16583.0, 16584.0],       [16585.0, 16586.0, 16587.0, 16588.0, 16589.0, 16590.0]],      [[16591.0, 16592.0, 16593.0, 16594.0, 16595.0, 16596.0],       [16597.0, 16598.0, 16599.0, 16600.0, 16601.0, 16602.0],       [16603.0, 16604.0, 16605.0, 16606.0, 16607.0, 16608.0],       [16609.0, 16610.0, 16611.0, 16612.0, 16613.0, 16614.0],       [16615.0, 16616.0, 16617.0, 16618.0, 16619.0, 16620.0],       [16621.0, 16622.0, 16623.0, 16624.0, 16625.0, 16626.0],       [16627.0, 16628.0, 16629.0, 16630.0, 16631.0, 16632.0]]],     [[[16633.0, 16634.0, 16635.0, 16636.0, 16637.0, 16638.0],       [16639.0, 16640.0, 16641.0, 16642.0, 16643.0, 16644.0],       [16645.0, 16646.0, 16647.0, 16648.0, 16649.0, 16650.0],       [16651.0, 16652.0, 16653.0, 16654.0, 16655.0, 16656.0],       [16657.0, 16658.0, 16659.0, 16660.0, 16661.0, 16662.0],       [16663.0, 16664.0, 16665.0, 16666.0, 16667.0, 16668.0],       [16669.0, 16670.0, 16671.0, 16672.0, 16673.0, 16674.0]],      [[16675.0, 16676.0, 16677.0, 16678.0, 16679.0, 16680.0],       [16681.0, 16682.0, 16683.0, 16684.0, 16685.0, 16686.0],       [16687.0, 16688.0, 16689.0, 16690.0, 16691.0, 16692.0],       [16693.0, 16694.0, 16695.0, 16696.0, 16697.0, 16698.0],       [16699.0, 16700.0, 16701.0, 16702.0, 16703.0, 16704.0],       [16705.0, 16706.0, 16707.0, 16708.0, 16709.0, 16710.0],       [16711.0, 16712.0, 16713.0, 16714.0, 16715.0, 16716.0]],      [[16717.0, 16718.0, 16719.0, 16720.0, 16721.0, 16722.0],       [16723.0, 16724.0, 16725.0, 16726.0, 16727.0, 16728.0],       [16729.0, 16730.0, 16731.0, 16732.0, 16733.0, 16734.0],       [16735.0, 16736.0, 16737.0, 16738.0, 16739.0, 16740.0],       [16741.0, 16742.0, 16743.0, 16744.0, 16745.0, 16746.0],       [16747.0, 16748.0, 16749.0, 16750.0, 16751.0, 16752.0],       [16753.0, 16754.0, 16755.0, 16756.0, 16757.0, 16758.0]],      [[16759.0, 16760.0, 16761.0, 16762.0, 16763.0, 16764.0],       [16765.0, 16766.0, 16767.0, 16768.0, 16769.0, 16770.0],       [16771.0, 16772.0, 16773.0, 16774.0, 16775.0, 16776.0],       [16777.0, 16778.0, 16779.0, 16780.0, 16781.0, 16782.0],       [16783.0, 16784.0, 16785.0, 16786.0, 16787.0, 16788.0],       [16789.0, 16790.0, 16791.0, 16792.0, 16793.0, 16794.0],       [16795.0, 16796.0, 16797.0, 16798.0, 16799.0, 16800.0]],      [[16801.0, 16802.0, 16803.0, 16804.0, 16805.0, 16806.0],       [16807.0, 16808.0, 16809.0, 16810.0, 16811.0, 16812.0],       [16813.0, 16814.0, 16815.0, 16816.0, 16817.0, 16818.0],       [16819.0, 16820.0, 16821.0, 16822.0, 16823.0, 16824.0],       [16825.0, 16826.0, 16827.0, 16828.0, 16829.0, 16830.0],       [16831.0, 16832.0, 16833.0, 16834.0, 16835.0, 16836.0],       [16837.0, 16838.0, 16839.0, 16840.0, 16841.0, 16842.0]],      [[16843.0, 16844.0, 16845.0, 16846.0, 16847.0, 16848.0],       [16849.0, 16850.0, 16851.0, 16852.0, 16853.0, 16854.0],       [16855.0, 16856.0, 16857.0, 16858.0, 16859.0, 16860.0],       [16861.0, 16862.0, 16863.0, 16864.0, 16865.0, 16866.0],       [16867.0, 16868.0, 16869.0, 16870.0, 16871.0, 16872.0],       [16873.0, 16874.0, 16875.0, 16876.0, 16877.0, 16878.0],       [16879.0, 16880.0, 16881.0, 16882.0, 16883.0, 16884.0]]],     [[[16885.0, 16886.0, 16887.0, 16888.0, 16889.0, 16890.0],       [16891.0, 16892.0, 16893.0, 16894.0, 16895.0, 16896.0],       [16897.0, 16898.0, 16899.0, 16900.0, 16901.0, 16902.0],       [16903.0, 16904.0, 16905.0, 16906.0, 16907.0, 16908.0],       [16909.0, 16910.0, 16911.0, 16912.0, 16913.0, 16914.0],       [16915.0, 16916.0, 16917.0, 16918.0, 16919.0, 16920.0],       [16921.0, 16922.0, 16923.0, 16924.0, 16925.0, 16926.0]],      [[16927.0, 16928.0, 16929.0, 16930.0, 16931.0, 16932.0],       [16933.0, 16934.0, 16935.0, 16936.0, 16937.0, 16938.0],       [16939.0, 16940.0, 16941.0, 16942.0, 16943.0, 16944.0],       [16945.0, 16946.0, 16947.0, 16948.0, 16949.0, 16950.0],       [16951.0, 16952.0, 16953.0, 16954.0, 16955.0, 16956.0],       [16957.0, 16958.0, 16959.0, 16960.0, 16961.0, 16962.0],       [16963.0, 16964.0, 16965.0, 16966.0, 16967.0, 16968.0]],      [[16969.0, 16970.0, 16971.0, 16972.0, 16973.0, 16974.0],       [16975.0, 16976.0, 16977.0, 16978.0, 16979.0, 16980.0],       [16981.0, 16982.0, 16983.0, 16984.0, 16985.0, 16986.0],       [16987.0, 16988.0, 16989.0, 16990.0, 16991.0, 16992.0],       [16993.0, 16994.0, 16995.0, 16996.0, 16997.0, 16998.0],       [16999.0, 17000.0, 17001.0, 17002.0, 17003.0, 17004.0],       [17005.0, 17006.0, 17007.0, 17008.0, 17009.0, 17010.0]],      [[17011.0, 17012.0, 17013.0, 17014.0, 17015.0, 17016.0],       [17017.0, 17018.0, 17019.0, 17020.0, 17021.0, 17022.0],       [17023.0, 17024.0, 17025.0, 17026.0, 17027.0, 17028.0],       [17029.0, 17030.0, 17031.0, 17032.0, 17033.0, 17034.0],       [17035.0, 17036.0, 17037.0, 17038.0, 17039.0, 17040.0],       [17041.0, 17042.0, 17043.0, 17044.0, 17045.0, 17046.0],       [17047.0, 17048.0, 17049.0, 17050.0, 17051.0, 17052.0]],      [[17053.0, 17054.0, 17055.0, 17056.0, 17057.0, 17058.0],       [17059.0, 17060.0, 17061.0, 17062.0, 17063.0, 17064.0],       [17065.0, 17066.0, 17067.0, 17068.0, 17069.0, 17070.0],       [17071.0, 17072.0, 17073.0, 17074.0, 17075.0, 17076.0],       [17077.0, 17078.0, 17079.0, 17080.0, 17081.0, 17082.0],       [17083.0, 17084.0, 17085.0, 17086.0, 17087.0, 17088.0],       [17089.0, 17090.0, 17091.0, 17092.0, 17093.0, 17094.0]],      [[17095.0, 17096.0, 17097.0, 17098.0, 17099.0, 17100.0],       [17101.0, 17102.0, 17103.0, 17104.0, 17105.0, 17106.0],       [17107.0, 17108.0, 17109.0, 17110.0, 17111.0, 17112.0],       [17113.0, 17114.0, 17115.0, 17116.0, 17117.0, 17118.0],       [17119.0, 17120.0, 17121.0, 17122.0, 17123.0, 17124.0],       [17125.0, 17126.0, 17127.0, 17128.0, 17129.0, 17130.0],       [17131.0, 17132.0, 17133.0, 17134.0, 17135.0, 17136.0]]]],    [[[[17137.0, 17138.0, 17139.0, 17140.0, 17141.0, 17142.0],       [17143.0, 17144.0, 17145.0, 17146.0, 17147.0, 17148.0],       [17149.0, 17150.0, 17151.0, 17152.0, 17153.0, 17154.0],       [17155.0, 17156.0, 17157.0, 17158.0, 17159.0, 17160.0],       [17161.0, 17162.0, 17163.0, 17164.0, 17165.0, 17166.0],       [17167.0, 17168.0, 17169.0, 17170.0, 17171.0, 17172.0],       [17173.0, 17174.0, 17175.0, 17176.0, 17177.0, 17178.0]],      [[17179.0, 17180.0, 17181.0, 17182.0, 17183.0, 17184.0],       [17185.0, 17186.0, 17187.0, 17188.0, 17189.0, 17190.0],       [17191.0, 17192.0, 17193.0, 17194.0, 17195.0, 17196.0],       [17197.0, 17198.0, 17199.0, 17200.0, 17201.0, 17202.0],       [17203.0, 17204.0, 17205.0, 17206.0, 17207.0, 17208.0],       [17209.0, 17210.0, 17211.0, 17212.0, 17213.0, 17214.0],       [17215.0, 17216.0, 17217.0, 17218.0, 17219.0, 17220.0]],      [[17221.0, 17222.0, 17223.0, 17224.0, 17225.0, 17226.0],       [17227.0, 17228.0, 17229.0, 17230.0, 17231.0, 17232.0],       [17233.0, 17234.0, 17235.0, 17236.0, 17237.0, 17238.0],       [17239.0, 17240.0, 17241.0, 17242.0, 17243.0, 17244.0],       [17245.0, 17246.0, 17247.0, 17248.0, 17249.0, 17250.0],       [17251.0, 17252.0, 17253.0, 17254.0, 17255.0, 17256.0],       [17257.0, 17258.0, 17259.0, 17260.0, 17261.0, 17262.0]],      [[17263.0, 17264.0, 17265.0, 17266.0, 17267.0, 17268.0],       [17269.0, 17270.0, 17271.0, 17272.0, 17273.0, 17274.0],       [17275.0, 17276.0, 17277.0, 17278.0, 17279.0, 17280.0],       [17281.0, 17282.0, 17283.0, 17284.0, 17285.0, 17286.0],       [17287.0, 17288.0, 17289.0, 17290.0, 17291.0, 17292.0],       [17293.0, 17294.0, 17295.0, 17296.0, 17297.0, 17298.0],       [17299.0, 17300.0, 17301.0, 17302.0, 17303.0, 17304.0]],      [[17305.0, 17306.0, 17307.0, 17308.0, 17309.0, 17310.0],       [17311.0, 17312.0, 17313.0, 17314.0, 17315.0, 17316.0],       [17317.0, 17318.0, 17319.0, 17320.0, 17321.0, 17322.0],       [17323.0, 17324.0, 17325.0, 17326.0, 17327.0, 17328.0],       [17329.0, 17330.0, 17331.0, 17332.0, 17333.0, 17334.0],       [17335.0, 17336.0, 17337.0, 17338.0, 17339.0, 17340.0],       [17341.0, 17342.0, 17343.0, 17344.0, 17345.0, 17346.0]],      [[17347.0, 17348.0, 17349.0, 17350.0, 17351.0, 17352.0],       [17353.0, 17354.0, 17355.0, 17356.0, 17357.0, 17358.0],       [17359.0, 17360.0, 17361.0, 17362.0, 17363.0, 17364.0],       [17365.0, 17366.0, 17367.0, 17368.0, 17369.0, 17370.0],       [17371.0, 17372.0, 17373.0, 17374.0, 17375.0, 17376.0],       [17377.0, 17378.0, 17379.0, 17380.0, 17381.0, 17382.0],       [17383.0, 17384.0, 17385.0, 17386.0, 17387.0, 17388.0]]],     [[[17389.0, 17390.0, 17391.0, 17392.0, 17393.0, 17394.0],       [17395.0, 17396.0, 17397.0, 17398.0, 17399.0, 17400.0],       [17401.0, 17402.0, 17403.0, 17404.0, 17405.0, 17406.0],       [17407.0, 17408.0, 17409.0, 17410.0, 17411.0, 17412.0],       [17413.0, 17414.0, 17415.0, 17416.0, 17417.0, 17418.0],       [17419.0, 17420.0, 17421.0, 17422.0, 17423.0, 17424.0],       [17425.0, 17426.0, 17427.0, 17428.0, 17429.0, 17430.0]],      [[17431.0, 17432.0, 17433.0, 17434.0, 17435.0, 17436.0],       [17437.0, 17438.0, 17439.0, 17440.0, 17441.0, 17442.0],       [17443.0, 17444.0, 17445.0, 17446.0, 17447.0, 17448.0],       [17449.0, 17450.0, 17451.0, 17452.0, 17453.0, 17454.0],       [17455.0, 17456.0, 17457.0, 17458.0, 17459.0, 17460.0],       [17461.0, 17462.0, 17463.0, 17464.0, 17465.0, 17466.0],       [17467.0, 17468.0, 17469.0, 17470.0, 17471.0, 17472.0]],      [[17473.0, 17474.0, 17475.0, 17476.0, 17477.0, 17478.0],       [17479.0, 17480.0, 17481.0, 17482.0, 17483.0, 17484.0],       [17485.0, 17486.0, 17487.0, 17488.0, 17489.0, 17490.0],       [17491.0, 17492.0, 17493.0, 17494.0, 17495.0, 17496.0],       [17497.0, 17498.0, 17499.0, 17500.0, 17501.0, 17502.0],       [17503.0, 17504.0, 17505.0, 17506.0, 17507.0, 17508.0],       [17509.0, 17510.0, 17511.0, 17512.0, 17513.0, 17514.0]],      [[17515.0, 17516.0, 17517.0, 17518.0, 17519.0, 17520.0],       [17521.0, 17522.0, 17523.0, 17524.0, 17525.0, 17526.0],       [17527.0, 17528.0, 17529.0, 17530.0, 17531.0, 17532.0],       [17533.0, 17534.0, 17535.0, 17536.0, 17537.0, 17538.0],       [17539.0, 17540.0, 17541.0, 17542.0, 17543.0, 17544.0],       [17545.0, 17546.0, 17547.0, 17548.0, 17549.0, 17550.0],       [17551.0, 17552.0, 17553.0, 17554.0, 17555.0, 17556.0]],      [[17557.0, 17558.0, 17559.0, 17560.0, 17561.0, 17562.0],       [17563.0, 17564.0, 17565.0, 17566.0, 17567.0, 17568.0],       [17569.0, 17570.0, 17571.0, 17572.0, 17573.0, 17574.0],       [17575.0, 17576.0, 17577.0, 17578.0, 17579.0, 17580.0],       [17581.0, 17582.0, 17583.0, 17584.0, 17585.0, 17586.0],       [17587.0, 17588.0, 17589.0, 17590.0, 17591.0, 17592.0],       [17593.0, 17594.0, 17595.0, 17596.0, 17597.0, 17598.0]],      [[17599.0, 17600.0, 17601.0, 17602.0, 17603.0, 17604.0],       [17605.0, 17606.0, 17607.0, 17608.0, 17609.0, 17610.0],       [17611.0, 17612.0, 17613.0, 17614.0, 17615.0, 17616.0],       [17617.0, 17618.0, 17619.0, 17620.0, 17621.0, 17622.0],       [17623.0, 17624.0, 17625.0, 17626.0, 17627.0, 17628.0],       [17629.0, 17630.0, 17631.0, 17632.0, 17633.0, 17634.0],       [17635.0, 17636.0, 17637.0, 17638.0, 17639.0, 17640.0]]],     [[[17641.0, 17642.0, 17643.0, 17644.0, 17645.0, 17646.0],       [17647.0, 17648.0, 17649.0, 17650.0, 17651.0, 17652.0],       [17653.0, 17654.0, 17655.0, 17656.0, 17657.0, 17658.0],       [17659.0, 17660.0, 17661.0, 17662.0, 17663.0, 17664.0],       [17665.0, 17666.0, 17667.0, 17668.0, 17669.0, 17670.0],       [17671.0, 17672.0, 17673.0, 17674.0, 17675.0, 17676.0],       [17677.0, 17678.0, 17679.0, 17680.0, 17681.0, 17682.0]],      [[17683.0, 17684.0, 17685.0, 17686.0, 17687.0, 17688.0],       [17689.0, 17690.0, 17691.0, 17692.0, 17693.0, 17694.0],       [17695.0, 17696.0, 17697.0, 17698.0, 17699.0, 17700.0],       [17701.0, 17702.0, 17703.0, 17704.0, 17705.0, 17706.0],       [17707.0, 17708.0, 17709.0, 17710.0, 17711.0, 17712.0],       [17713.0, 17714.0, 17715.0, 17716.0, 17717.0, 17718.0],       [17719.0, 17720.0, 17721.0, 17722.0, 17723.0, 17724.0]],      [[17725.0, 17726.0, 17727.0, 17728.0, 17729.0, 17730.0],       [17731.0, 17732.0, 17733.0, 17734.0, 17735.0, 17736.0],       [17737.0, 17738.0, 17739.0, 17740.0, 17741.0, 17742.0],       [17743.0, 17744.0, 17745.0, 17746.0, 17747.0, 17748.0],       [17749.0, 17750.0, 17751.0, 17752.0, 17753.0, 17754.0],       [17755.0, 17756.0, 17757.0, 17758.0, 17759.0, 17760.0],       [17761.0, 17762.0, 17763.0, 17764.0, 17765.0, 17766.0]],      [[17767.0, 17768.0, 17769.0, 17770.0, 17771.0, 17772.0],       [17773.0, 17774.0, 17775.0, 17776.0, 17777.0, 17778.0],       [17779.0, 17780.0, 17781.0, 17782.0, 17783.0, 17784.0],       [17785.0, 17786.0, 17787.0, 17788.0, 17789.0, 17790.0],       [17791.0, 17792.0, 17793.0, 17794.0, 17795.0, 17796.0],       [17797.0, 17798.0, 17799.0, 17800.0, 17801.0, 17802.0],       [17803.0, 17804.0, 17805.0, 17806.0, 17807.0, 17808.0]],      [[17809.0, 17810.0, 17811.0, 17812.0, 17813.0, 17814.0],       [17815.0, 17816.0, 17817.0, 17818.0, 17819.0, 17820.0],       [17821.0, 17822.0, 17823.0, 17824.0, 17825.0, 17826.0],       [17827.0, 17828.0, 17829.0, 17830.0, 17831.0, 17832.0],       [17833.0, 17834.0, 17835.0, 17836.0, 17837.0, 17838.0],       [17839.0, 17840.0, 17841.0, 17842.0, 17843.0, 17844.0],       [17845.0, 17846.0, 17847.0, 17848.0, 17849.0, 17850.0]],      [[17851.0, 17852.0, 17853.0, 17854.0, 17855.0, 17856.0],       [17857.0, 17858.0, 17859.0, 17860.0, 17861.0, 17862.0],       [17863.0, 17864.0, 17865.0, 17866.0, 17867.0, 17868.0],       [17869.0, 17870.0, 17871.0, 17872.0, 17873.0, 17874.0],       [17875.0, 17876.0, 17877.0, 17878.0, 17879.0, 17880.0],       [17881.0, 17882.0, 17883.0, 17884.0, 17885.0, 17886.0],       [17887.0, 17888.0, 17889.0, 17890.0, 17891.0, 17892.0]]],     [[[17893.0, 17894.0, 17895.0, 17896.0, 17897.0, 17898.0],       [17899.0, 17900.0, 17901.0, 17902.0, 17903.0, 17904.0],       [17905.0, 17906.0, 17907.0, 17908.0, 17909.0, 17910.0],       [17911.0, 17912.0, 17913.0, 17914.0, 17915.0, 17916.0],       [17917.0, 17918.0, 17919.0, 17920.0, 17921.0, 17922.0],       [17923.0, 17924.0, 17925.0, 17926.0, 17927.0, 17928.0],       [17929.0, 17930.0, 17931.0, 17932.0, 17933.0, 17934.0]],      [[17935.0, 17936.0, 17937.0, 17938.0, 17939.0, 17940.0],       [17941.0, 17942.0, 17943.0, 17944.0, 17945.0, 17946.0],       [17947.0, 17948.0, 17949.0, 17950.0, 17951.0, 17952.0],       [17953.0, 17954.0, 17955.0, 17956.0, 17957.0, 17958.0],       [17959.0, 17960.0, 17961.0, 17962.0, 17963.0, 17964.0],       [17965.0, 17966.0, 17967.0, 17968.0, 17969.0, 17970.0],       [17971.0, 17972.0, 17973.0, 17974.0, 17975.0, 17976.0]],      [[17977.0, 17978.0, 17979.0, 17980.0, 17981.0, 17982.0],       [17983.0, 17984.0, 17985.0, 17986.0, 17987.0, 17988.0],       [17989.0, 17990.0, 17991.0, 17992.0, 17993.0, 17994.0],       [17995.0, 17996.0, 17997.0, 17998.0, 17999.0, 18000.0],       [18001.0, 18002.0, 18003.0, 18004.0, 18005.0, 18006.0],       [18007.0, 18008.0, 18009.0, 18010.0, 18011.0, 18012.0],       [18013.0, 18014.0, 18015.0, 18016.0, 18017.0, 18018.0]],      [[18019.0, 18020.0, 18021.0, 18022.0, 18023.0, 18024.0],       [18025.0, 18026.0, 18027.0, 18028.0, 18029.0, 18030.0],       [18031.0, 18032.0, 18033.0, 18034.0, 18035.0, 18036.0],       [18037.0, 18038.0, 18039.0, 18040.0, 18041.0, 18042.0],       [18043.0, 18044.0, 18045.0, 18046.0, 18047.0, 18048.0],       [18049.0, 18050.0, 18051.0, 18052.0, 18053.0, 18054.0],       [18055.0, 18056.0, 18057.0, 18058.0, 18059.0, 18060.0]],      [[18061.0, 18062.0, 18063.0, 18064.0, 18065.0, 18066.0],       [18067.0, 18068.0, 18069.0, 18070.0, 18071.0, 18072.0],       [18073.0, 18074.0, 18075.0, 18076.0, 18077.0, 18078.0],       [18079.0, 18080.0, 18081.0, 18082.0, 18083.0, 18084.0],       [18085.0, 18086.0, 18087.0, 18088.0, 18089.0, 18090.0],       [18091.0, 18092.0, 18093.0, 18094.0, 18095.0, 18096.0],       [18097.0, 18098.0, 18099.0, 18100.0, 18101.0, 18102.0]],      [[18103.0, 18104.0, 18105.0, 18106.0, 18107.0, 18108.0],       [18109.0, 18110.0, 18111.0, 18112.0, 18113.0, 18114.0],       [18115.0, 18116.0, 18117.0, 18118.0, 18119.0, 18120.0],       [18121.0, 18122.0, 18123.0, 18124.0, 18125.0, 18126.0],       [18127.0, 18128.0, 18129.0, 18130.0, 18131.0, 18132.0],       [18133.0, 18134.0, 18135.0, 18136.0, 18137.0, 18138.0],       [18139.0, 18140.0, 18141.0, 18142.0, 18143.0, 18144.0]]]]],   [[[[[18145.0, 18146.0, 18147.0, 18148.0, 18149.0, 18150.0],       [18151.0, 18152.0, 18153.0, 18154.0, 18155.0, 18156.0],       [18157.0, 18158.0, 18159.0, 18160.0, 18161.0, 18162.0],       [18163.0, 18164.0, 18165.0, 18166.0, 18167.0, 18168.0],       [18169.0, 18170.0, 18171.0, 18172.0, 18173.0, 18174.0],       [18175.0, 18176.0, 18177.0, 18178.0, 18179.0, 18180.0],       [18181.0, 18182.0, 18183.0, 18184.0, 18185.0, 18186.0]],      [[18187.0, 18188.0, 18189.0, 18190.0, 18191.0, 18192.0],       [18193.0, 18194.0, 18195.0, 18196.0, 18197.0, 18198.0],       [18199.0, 18200.0, 18201.0, 18202.0, 18203.0, 18204.0],       [18205.0, 18206.0, 18207.0, 18208.0, 18209.0, 18210.0],       [18211.0, 18212.0, 18213.0, 18214.0, 18215.0, 18216.0],       [18217.0, 18218.0, 18219.0, 18220.0, 18221.0, 18222.0],       [18223.0, 18224.0, 18225.0, 18226.0, 18227.0, 18228.0]],      [[18229.0, 18230.0, 18231.0, 18232.0, 18233.0, 18234.0],       [18235.0, 18236.0, 18237.0, 18238.0, 18239.0, 18240.0],       [18241.0, 18242.0, 18243.0, 18244.0, 18245.0, 18246.0],       [18247.0, 18248.0, 18249.0, 18250.0, 18251.0, 18252.0],       [18253.0, 18254.0, 18255.0, 18256.0, 18257.0, 18258.0],       [18259.0, 18260.0, 18261.0, 18262.0, 18263.0, 18264.0],       [18265.0, 18266.0, 18267.0, 18268.0, 18269.0, 18270.0]],      [[18271.0, 18272.0, 18273.0, 18274.0, 18275.0, 18276.0],       [18277.0, 18278.0, 18279.0, 18280.0, 18281.0, 18282.0],       [18283.0, 18284.0, 18285.0, 18286.0, 18287.0, 18288.0],       [18289.0, 18290.0, 18291.0, 18292.0, 18293.0, 18294.0],       [18295.0, 18296.0, 18297.0, 18298.0, 18299.0, 18300.0],       [18301.0, 18302.0, 18303.0, 18304.0, 18305.0, 18306.0],       [18307.0, 18308.0, 18309.0, 18310.0, 18311.0, 18312.0]],      [[18313.0, 18314.0, 18315.0, 18316.0, 18317.0, 18318.0],       [18319.0, 18320.0, 18321.0, 18322.0, 18323.0, 18324.0],       [18325.0, 18326.0, 18327.0, 18328.0, 18329.0, 18330.0],       [18331.0, 18332.0, 18333.0, 18334.0, 18335.0, 18336.0],       [18337.0, 18338.0, 18339.0, 18340.0, 18341.0, 18342.0],       [18343.0, 18344.0, 18345.0, 18346.0, 18347.0, 18348.0],       [18349.0, 18350.0, 18351.0, 18352.0, 18353.0, 18354.0]],      [[18355.0, 18356.0, 18357.0, 18358.0, 18359.0, 18360.0],       [18361.0, 18362.0, 18363.0, 18364.0, 18365.0, 18366.0],       [18367.0, 18368.0, 18369.0, 18370.0, 18371.0, 18372.0],       [18373.0, 18374.0, 18375.0, 18376.0, 18377.0, 18378.0],       [18379.0, 18380.0, 18381.0, 18382.0, 18383.0, 18384.0],       [18385.0, 18386.0, 18387.0, 18388.0, 18389.0, 18390.0],       [18391.0, 18392.0, 18393.0, 18394.0, 18395.0, 18396.0]]],     [[[18397.0, 18398.0, 18399.0, 18400.0, 18401.0, 18402.0],       [18403.0, 18404.0, 18405.0, 18406.0, 18407.0, 18408.0],       [18409.0, 18410.0, 18411.0, 18412.0, 18413.0, 18414.0],       [18415.0, 18416.0, 18417.0, 18418.0, 18419.0, 18420.0],       [18421.0, 18422.0, 18423.0, 18424.0, 18425.0, 18426.0],       [18427.0, 18428.0, 18429.0, 18430.0, 18431.0, 18432.0],       [18433.0, 18434.0, 18435.0, 18436.0, 18437.0, 18438.0]],      [[18439.0, 18440.0, 18441.0, 18442.0, 18443.0, 18444.0],       [18445.0, 18446.0, 18447.0, 18448.0, 18449.0, 18450.0],       [18451.0, 18452.0, 18453.0, 18454.0, 18455.0, 18456.0],       [18457.0, 18458.0, 18459.0, 18460.0, 18461.0, 18462.0],       [18463.0, 18464.0, 18465.0, 18466.0, 18467.0, 18468.0],       [18469.0, 18470.0, 18471.0, 18472.0, 18473.0, 18474.0],       [18475.0, 18476.0, 18477.0, 18478.0, 18479.0, 18480.0]],      [[18481.0, 18482.0, 18483.0, 18484.0, 18485.0, 18486.0],       [18487.0, 18488.0, 18489.0, 18490.0, 18491.0, 18492.0],       [18493.0, 18494.0, 18495.0, 18496.0, 18497.0, 18498.0],       [18499.0, 18500.0, 18501.0, 18502.0, 18503.0, 18504.0],       [18505.0, 18506.0, 18507.0, 18508.0, 18509.0, 18510.0],       [18511.0, 18512.0, 18513.0, 18514.0, 18515.0, 18516.0],       [18517.0, 18518.0, 18519.0, 18520.0, 18521.0, 18522.0]],      [[18523.0, 18524.0, 18525.0, 18526.0, 18527.0, 18528.0],       [18529.0, 18530.0, 18531.0, 18532.0, 18533.0, 18534.0],       [18535.0, 18536.0, 18537.0, 18538.0, 18539.0, 18540.0],       [18541.0, 18542.0, 18543.0, 18544.0, 18545.0, 18546.0],       [18547.0, 18548.0, 18549.0, 18550.0, 18551.0, 18552.0],       [18553.0, 18554.0, 18555.0, 18556.0, 18557.0, 18558.0],       [18559.0, 18560.0, 18561.0, 18562.0, 18563.0, 18564.0]],      [[18565.0, 18566.0, 18567.0, 18568.0, 18569.0, 18570.0],       [18571.0, 18572.0, 18573.0, 18574.0, 18575.0, 18576.0],       [18577.0, 18578.0, 18579.0, 18580.0, 18581.0, 18582.0],       [18583.0, 18584.0, 18585.0, 18586.0, 18587.0, 18588.0],       [18589.0, 18590.0, 18591.0, 18592.0, 18593.0, 18594.0],       [18595.0, 18596.0, 18597.0, 18598.0, 18599.0, 18600.0],       [18601.0, 18602.0, 18603.0, 18604.0, 18605.0, 18606.0]],      [[18607.0, 18608.0, 18609.0, 18610.0, 18611.0, 18612.0],       [18613.0, 18614.0, 18615.0, 18616.0, 18617.0, 18618.0],       [18619.0, 18620.0, 18621.0, 18622.0, 18623.0, 18624.0],       [18625.0, 18626.0, 18627.0, 18628.0, 18629.0, 18630.0],       [18631.0, 18632.0, 18633.0, 18634.0, 18635.0, 18636.0],       [18637.0, 18638.0, 18639.0, 18640.0, 18641.0, 18642.0],       [18643.0, 18644.0, 18645.0, 18646.0, 18647.0, 18648.0]]],     [[[18649.0, 18650.0, 18651.0, 18652.0, 18653.0, 18654.0],       [18655.0, 18656.0, 18657.0, 18658.0, 18659.0, 18660.0],       [18661.0, 18662.0, 18663.0, 18664.0, 18665.0, 18666.0],       [18667.0, 18668.0, 18669.0, 18670.0, 18671.0, 18672.0],       [18673.0, 18674.0, 18675.0, 18676.0, 18677.0, 18678.0],       [18679.0, 18680.0, 18681.0, 18682.0, 18683.0, 18684.0],       [18685.0, 18686.0, 18687.0, 18688.0, 18689.0, 18690.0]],      [[18691.0, 18692.0, 18693.0, 18694.0, 18695.0, 18696.0],       [18697.0, 18698.0, 18699.0, 18700.0, 18701.0, 18702.0],       [18703.0, 18704.0, 18705.0, 18706.0, 18707.0, 18708.0],       [18709.0, 18710.0, 18711.0, 18712.0, 18713.0, 18714.0],       [18715.0, 18716.0, 18717.0, 18718.0, 18719.0, 18720.0],       [18721.0, 18722.0, 18723.0, 18724.0, 18725.0, 18726.0],       [18727.0, 18728.0, 18729.0, 18730.0, 18731.0, 18732.0]],      [[18733.0, 18734.0, 18735.0, 18736.0, 18737.0, 18738.0],       [18739.0, 18740.0, 18741.0, 18742.0, 18743.0, 18744.0],       [18745.0, 18746.0, 18747.0, 18748.0, 18749.0, 18750.0],       [18751.0, 18752.0, 18753.0, 18754.0, 18755.0, 18756.0],       [18757.0, 18758.0, 18759.0, 18760.0, 18761.0, 18762.0],       [18763.0, 18764.0, 18765.0, 18766.0, 18767.0, 18768.0],       [18769.0, 18770.0, 18771.0, 18772.0, 18773.0, 18774.0]],      [[18775.0, 18776.0, 18777.0, 18778.0, 18779.0, 18780.0],       [18781.0, 18782.0, 18783.0, 18784.0, 18785.0, 18786.0],       [18787.0, 18788.0, 18789.0, 18790.0, 18791.0, 18792.0],       [18793.0, 18794.0, 18795.0, 18796.0, 18797.0, 18798.0],       [18799.0, 18800.0, 18801.0, 18802.0, 18803.0, 18804.0],       [18805.0, 18806.0, 18807.0, 18808.0, 18809.0, 18810.0],       [18811.0, 18812.0, 18813.0, 18814.0, 18815.0, 18816.0]],      [[18817.0, 18818.0, 18819.0, 18820.0, 18821.0, 18822.0],       [18823.0, 18824.0, 18825.0, 18826.0, 18827.0, 18828.0],       [18829.0, 18830.0, 18831.0, 18832.0, 18833.0, 18834.0],       [18835.0, 18836.0, 18837.0, 18838.0, 18839.0, 18840.0],       [18841.0, 18842.0, 18843.0, 18844.0, 18845.0, 18846.0],       [18847.0, 18848.0, 18849.0, 18850.0, 18851.0, 18852.0],       [18853.0, 18854.0, 18855.0, 18856.0, 18857.0, 18858.0]],      [[18859.0, 18860.0, 18861.0, 18862.0, 18863.0, 18864.0],       [18865.0, 18866.0, 18867.0, 18868.0, 18869.0, 18870.0],       [18871.0, 18872.0, 18873.0, 18874.0, 18875.0, 18876.0],       [18877.0, 18878.0, 18879.0, 18880.0, 18881.0, 18882.0],       [18883.0, 18884.0, 18885.0, 18886.0, 18887.0, 18888.0],       [18889.0, 18890.0, 18891.0, 18892.0, 18893.0, 18894.0],       [18895.0, 18896.0, 18897.0, 18898.0, 18899.0, 18900.0]]],     [[[18901.0, 18902.0, 18903.0, 18904.0, 18905.0, 18906.0],       [18907.0, 18908.0, 18909.0, 18910.0, 18911.0, 18912.0],       [18913.0, 18914.0, 18915.0, 18916.0, 18917.0, 18918.0],       [18919.0, 18920.0, 18921.0, 18922.0, 18923.0, 18924.0],       [18925.0, 18926.0, 18927.0, 18928.0, 18929.0, 18930.0],       [18931.0, 18932.0, 18933.0, 18934.0, 18935.0, 18936.0],       [18937.0, 18938.0, 18939.0, 18940.0, 18941.0, 18942.0]],      [[18943.0, 18944.0, 18945.0, 18946.0, 18947.0, 18948.0],       [18949.0, 18950.0, 18951.0, 18952.0, 18953.0, 18954.0],       [18955.0, 18956.0, 18957.0, 18958.0, 18959.0, 18960.0],       [18961.0, 18962.0, 18963.0, 18964.0, 18965.0, 18966.0],       [18967.0, 18968.0, 18969.0, 18970.0, 18971.0, 18972.0],       [18973.0, 18974.0, 18975.0, 18976.0, 18977.0, 18978.0],       [18979.0, 18980.0, 18981.0, 18982.0, 18983.0, 18984.0]],      [[18985.0, 18986.0, 18987.0, 18988.0, 18989.0, 18990.0],       [18991.0, 18992.0, 18993.0, 18994.0, 18995.0, 18996.0],       [18997.0, 18998.0, 18999.0, 19000.0, 19001.0, 19002.0],       [19003.0, 19004.0, 19005.0, 19006.0, 19007.0, 19008.0],       [19009.0, 19010.0, 19011.0, 19012.0, 19013.0, 19014.0],       [19015.0, 19016.0, 19017.0, 19018.0, 19019.0, 19020.0],       [19021.0, 19022.0, 19023.0, 19024.0, 19025.0, 19026.0]],      [[19027.0, 19028.0, 19029.0, 19030.0, 19031.0, 19032.0],       [19033.0, 19034.0, 19035.0, 19036.0, 19037.0, 19038.0],       [19039.0, 19040.0, 19041.0, 19042.0, 19043.0, 19044.0],       [19045.0, 19046.0, 19047.0, 19048.0, 19049.0, 19050.0],       [19051.0, 19052.0, 19053.0, 19054.0, 19055.0, 19056.0],       [19057.0, 19058.0, 19059.0, 19060.0, 19061.0, 19062.0],       [19063.0, 19064.0, 19065.0, 19066.0, 19067.0, 19068.0]],      [[19069.0, 19070.0, 19071.0, 19072.0, 19073.0, 19074.0],       [19075.0, 19076.0, 19077.0, 19078.0, 19079.0, 19080.0],       [19081.0, 19082.0, 19083.0, 19084.0, 19085.0, 19086.0],       [19087.0, 19088.0, 19089.0, 19090.0, 19091.0, 19092.0],       [19093.0, 19094.0, 19095.0, 19096.0, 19097.0, 19098.0],       [19099.0, 19100.0, 19101.0, 19102.0, 19103.0, 19104.0],       [19105.0, 19106.0, 19107.0, 19108.0, 19109.0, 19110.0]],      [[19111.0, 19112.0, 19113.0, 19114.0, 19115.0, 19116.0],       [19117.0, 19118.0, 19119.0, 19120.0, 19121.0, 19122.0],       [19123.0, 19124.0, 19125.0, 19126.0, 19127.0, 19128.0],       [19129.0, 19130.0, 19131.0, 19132.0, 19133.0, 19134.0],       [19135.0, 19136.0, 19137.0, 19138.0, 19139.0, 19140.0],       [19141.0, 19142.0, 19143.0, 19144.0, 19145.0, 19146.0],       [19147.0, 19148.0, 19149.0, 19150.0, 19151.0, 19152.0]]]],    [[[[19153.0, 19154.0, 19155.0, 19156.0, 19157.0, 19158.0],       [19159.0, 19160.0, 19161.0, 19162.0, 19163.0, 19164.0],       [19165.0, 19166.0, 19167.0, 19168.0, 19169.0, 19170.0],       [19171.0, 19172.0, 19173.0, 19174.0, 19175.0, 19176.0],       [19177.0, 19178.0, 19179.0, 19180.0, 19181.0, 19182.0],       [19183.0, 19184.0, 19185.0, 19186.0, 19187.0, 19188.0],       [19189.0, 19190.0, 19191.0, 19192.0, 19193.0, 19194.0]],      [[19195.0, 19196.0, 19197.0, 19198.0, 19199.0, 19200.0],       [19201.0, 19202.0, 19203.0, 19204.0, 19205.0, 19206.0],       [19207.0, 19208.0, 19209.0, 19210.0, 19211.0, 19212.0],       [19213.0, 19214.0, 19215.0, 19216.0, 19217.0, 19218.0],       [19219.0, 19220.0, 19221.0, 19222.0, 19223.0, 19224.0],       [19225.0, 19226.0, 19227.0, 19228.0, 19229.0, 19230.0],       [19231.0, 19232.0, 19233.0, 19234.0, 19235.0, 19236.0]],      [[19237.0, 19238.0, 19239.0, 19240.0, 19241.0, 19242.0],       [19243.0, 19244.0, 19245.0, 19246.0, 19247.0, 19248.0],       [19249.0, 19250.0, 19251.0, 19252.0, 19253.0, 19254.0],       [19255.0, 19256.0, 19257.0, 19258.0, 19259.0, 19260.0],       [19261.0, 19262.0, 19263.0, 19264.0, 19265.0, 19266.0],       [19267.0, 19268.0, 19269.0, 19270.0, 19271.0, 19272.0],       [19273.0, 19274.0, 19275.0, 19276.0, 19277.0, 19278.0]],      [[19279.0, 19280.0, 19281.0, 19282.0, 19283.0, 19284.0],       [19285.0, 19286.0, 19287.0, 19288.0, 19289.0, 19290.0],       [19291.0, 19292.0, 19293.0, 19294.0, 19295.0, 19296.0],       [19297.0, 19298.0, 19299.0, 19300.0, 19301.0, 19302.0],       [19303.0, 19304.0, 19305.0, 19306.0, 19307.0, 19308.0],       [19309.0, 19310.0, 19311.0, 19312.0, 19313.0, 19314.0],       [19315.0, 19316.0, 19317.0, 19318.0, 19319.0, 19320.0]],      [[19321.0, 19322.0, 19323.0, 19324.0, 19325.0, 19326.0],       [19327.0, 19328.0, 19329.0, 19330.0, 19331.0, 19332.0],       [19333.0, 19334.0, 19335.0, 19336.0, 19337.0, 19338.0],       [19339.0, 19340.0, 19341.0, 19342.0, 19343.0, 19344.0],       [19345.0, 19346.0, 19347.0, 19348.0, 19349.0, 19350.0],       [19351.0, 19352.0, 19353.0, 19354.0, 19355.0, 19356.0],       [19357.0, 19358.0, 19359.0, 19360.0, 19361.0, 19362.0]],      [[19363.0, 19364.0, 19365.0, 19366.0, 19367.0, 19368.0],       [19369.0, 19370.0, 19371.0, 19372.0, 19373.0, 19374.0],       [19375.0, 19376.0, 19377.0, 19378.0, 19379.0, 19380.0],       [19381.0, 19382.0, 19383.0, 19384.0, 19385.0, 19386.0],       [19387.0, 19388.0, 19389.0, 19390.0, 19391.0, 19392.0],       [19393.0, 19394.0, 19395.0, 19396.0, 19397.0, 19398.0],       [19399.0, 19400.0, 19401.0, 19402.0, 19403.0, 19404.0]]],     [[[19405.0, 19406.0, 19407.0, 19408.0, 19409.0, 19410.0],       [19411.0, 19412.0, 19413.0, 19414.0, 19415.0, 19416.0],       [19417.0, 19418.0, 19419.0, 19420.0, 19421.0, 19422.0],       [19423.0, 19424.0, 19425.0, 19426.0, 19427.0, 19428.0],       [19429.0, 19430.0, 19431.0, 19432.0, 19433.0, 19434.0],       [19435.0, 19436.0, 19437.0, 19438.0, 19439.0, 19440.0],       [19441.0, 19442.0, 19443.0, 19444.0, 19445.0, 19446.0]],      [[19447.0, 19448.0, 19449.0, 19450.0, 19451.0, 19452.0],       [19453.0, 19454.0, 19455.0, 19456.0, 19457.0, 19458.0],       [19459.0, 19460.0, 19461.0, 19462.0, 19463.0, 19464.0],       [19465.0, 19466.0, 19467.0, 19468.0, 19469.0, 19470.0],       [19471.0, 19472.0, 19473.0, 19474.0, 19475.0, 19476.0],       [19477.0, 19478.0, 19479.0, 19480.0, 19481.0, 19482.0],       [19483.0, 19484.0, 19485.0, 19486.0, 19487.0, 19488.0]],      [[19489.0, 19490.0, 19491.0, 19492.0, 19493.0, 19494.0],       [19495.0, 19496.0, 19497.0, 19498.0, 19499.0, 19500.0],       [19501.0, 19502.0, 19503.0, 19504.0, 19505.0, 19506.0],       [19507.0, 19508.0, 19509.0, 19510.0, 19511.0, 19512.0],       [19513.0, 19514.0, 19515.0, 19516.0, 19517.0, 19518.0],       [19519.0, 19520.0, 19521.0, 19522.0, 19523.0, 19524.0],       [19525.0, 19526.0, 19527.0, 19528.0, 19529.0, 19530.0]],      [[19531.0, 19532.0, 19533.0, 19534.0, 19535.0, 19536.0],       [19537.0, 19538.0, 19539.0, 19540.0, 19541.0, 19542.0],       [19543.0, 19544.0, 19545.0, 19546.0, 19547.0, 19548.0],       [19549.0, 19550.0, 19551.0, 19552.0, 19553.0, 19554.0],       [19555.0, 19556.0, 19557.0, 19558.0, 19559.0, 19560.0],       [19561.0, 19562.0, 19563.0, 19564.0, 19565.0, 19566.0],       [19567.0, 19568.0, 19569.0, 19570.0, 19571.0, 19572.0]],      [[19573.0, 19574.0, 19575.0, 19576.0, 19577.0, 19578.0],       [19579.0, 19580.0, 19581.0, 19582.0, 19583.0, 19584.0],       [19585.0, 19586.0, 19587.0, 19588.0, 19589.0, 19590.0],       [19591.0, 19592.0, 19593.0, 19594.0, 19595.0, 19596.0],       [19597.0, 19598.0, 19599.0, 19600.0, 19601.0, 19602.0],       [19603.0, 19604.0, 19605.0, 19606.0, 19607.0, 19608.0],       [19609.0, 19610.0, 19611.0, 19612.0, 19613.0, 19614.0]],      [[19615.0, 19616.0, 19617.0, 19618.0, 19619.0, 19620.0],       [19621.0, 19622.0, 19623.0, 19624.0, 19625.0, 19626.0],       [19627.0, 19628.0, 19629.0, 19630.0, 19631.0, 19632.0],       [19633.0, 19634.0, 19635.0, 19636.0, 19637.0, 19638.0],       [19639.0, 19640.0, 19641.0, 19642.0, 19643.0, 19644.0],       [19645.0, 19646.0, 19647.0, 19648.0, 19649.0, 19650.0],       [19651.0, 19652.0, 19653.0, 19654.0, 19655.0, 19656.0]]],     [[[19657.0, 19658.0, 19659.0, 19660.0, 19661.0, 19662.0],       [19663.0, 19664.0, 19665.0, 19666.0, 19667.0, 19668.0],       [19669.0, 19670.0, 19671.0, 19672.0, 19673.0, 19674.0],       [19675.0, 19676.0, 19677.0, 19678.0, 19679.0, 19680.0],       [19681.0, 19682.0, 19683.0, 19684.0, 19685.0, 19686.0],       [19687.0, 19688.0, 19689.0, 19690.0, 19691.0, 19692.0],       [19693.0, 19694.0, 19695.0, 19696.0, 19697.0, 19698.0]],      [[19699.0, 19700.0, 19701.0, 19702.0, 19703.0, 19704.0],       [19705.0, 19706.0, 19707.0, 19708.0, 19709.0, 19710.0],       [19711.0, 19712.0, 19713.0, 19714.0, 19715.0, 19716.0],       [19717.0, 19718.0, 19719.0, 19720.0, 19721.0, 19722.0],       [19723.0, 19724.0, 19725.0, 19726.0, 19727.0, 19728.0],       [19729.0, 19730.0, 19731.0, 19732.0, 19733.0, 19734.0],       [19735.0, 19736.0, 19737.0, 19738.0, 19739.0, 19740.0]],      [[19741.0, 19742.0, 19743.0, 19744.0, 19745.0, 19746.0],       [19747.0, 19748.0, 19749.0, 19750.0, 19751.0, 19752.0],       [19753.0, 19754.0, 19755.0, 19756.0, 19757.0, 19758.0],       [19759.0, 19760.0, 19761.0, 19762.0, 19763.0, 19764.0],       [19765.0, 19766.0, 19767.0, 19768.0, 19769.0, 19770.0],       [19771.0, 19772.0, 19773.0, 19774.0, 19775.0, 19776.0],       [19777.0, 19778.0, 19779.0, 19780.0, 19781.0, 19782.0]],      [[19783.0, 19784.0, 19785.0, 19786.0, 19787.0, 19788.0],       [19789.0, 19790.0, 19791.0, 19792.0, 19793.0, 19794.0],       [19795.0, 19796.0, 19797.0, 19798.0, 19799.0, 19800.0],       [19801.0, 19802.0, 19803.0, 19804.0, 19805.0, 19806.0],       [19807.0, 19808.0, 19809.0, 19810.0, 19811.0, 19812.0],       [19813.0, 19814.0, 19815.0, 19816.0, 19817.0, 19818.0],       [19819.0, 19820.0, 19821.0, 19822.0, 19823.0, 19824.0]],      [[19825.0, 19826.0, 19827.0, 19828.0, 19829.0, 19830.0],       [19831.0, 19832.0, 19833.0, 19834.0, 19835.0, 19836.0],       [19837.0, 19838.0, 19839.0, 19840.0, 19841.0, 19842.0],       [19843.0, 19844.0, 19845.0, 19846.0, 19847.0, 19848.0],       [19849.0, 19850.0, 19851.0, 19852.0, 19853.0, 19854.0],       [19855.0, 19856.0, 19857.0, 19858.0, 19859.0, 19860.0],       [19861.0, 19862.0, 19863.0, 19864.0, 19865.0, 19866.0]],      [[19867.0, 19868.0, 19869.0, 19870.0, 19871.0, 19872.0],       [19873.0, 19874.0, 19875.0, 19876.0, 19877.0, 19878.0],       [19879.0, 19880.0, 19881.0, 19882.0, 19883.0, 19884.0],       [19885.0, 19886.0, 19887.0, 19888.0, 19889.0, 19890.0],       [19891.0, 19892.0, 19893.0, 19894.0, 19895.0, 19896.0],       [19897.0, 19898.0, 19899.0, 19900.0, 19901.0, 19902.0],       [19903.0, 19904.0, 19905.0, 19906.0, 19907.0, 19908.0]]],     [[[19909.0, 19910.0, 19911.0, 19912.0, 19913.0, 19914.0],       [19915.0, 19916.0, 19917.0, 19918.0, 19919.0, 19920.0],       [19921.0, 19922.0, 19923.0, 19924.0, 19925.0, 19926.0],       [19927.0, 19928.0, 19929.0, 19930.0, 19931.0, 19932.0],       [19933.0, 19934.0, 19935.0, 19936.0, 19937.0, 19938.0],       [19939.0, 19940.0, 19941.0, 19942.0, 19943.0, 19944.0],       [19945.0, 19946.0, 19947.0, 19948.0, 19949.0, 19950.0]],      [[19951.0, 19952.0, 19953.0, 19954.0, 19955.0, 19956.0],       [19957.0, 19958.0, 19959.0, 19960.0, 19961.0, 19962.0],       [19963.0, 19964.0, 19965.0, 19966.0, 19967.0, 19968.0],       [19969.0, 19970.0, 19971.0, 19972.0, 19973.0, 19974.0],       [19975.0, 19976.0, 19977.0, 19978.0, 19979.0, 19980.0],       [19981.0, 19982.0, 19983.0, 19984.0, 19985.0, 19986.0],       [19987.0, 19988.0, 19989.0, 19990.0, 19991.0, 19992.0]],      [[19993.0, 19994.0, 19995.0, 19996.0, 19997.0, 19998.0],       [19999.0, 20000.0, 20001.0, 20002.0, 20003.0, 20004.0],       [20005.0, 20006.0, 20007.0, 20008.0, 20009.0, 20010.0],       [20011.0, 20012.0, 20013.0, 20014.0, 20015.0, 20016.0],       [20017.0, 20018.0, 20019.0, 20020.0, 20021.0, 20022.0],       [20023.0, 20024.0, 20025.0, 20026.0, 20027.0, 20028.0],       [20029.0, 20030.0, 20031.0, 20032.0, 20033.0, 20034.0]],      [[20035.0, 20036.0, 20037.0, 20038.0, 20039.0, 20040.0],       [20041.0, 20042.0, 20043.0, 20044.0, 20045.0, 20046.0],       [20047.0, 20048.0, 20049.0, 20050.0, 20051.0, 20052.0],       [20053.0, 20054.0, 20055.0, 20056.0, 20057.0, 20058.0],       [20059.0, 20060.0, 20061.0, 20062.0, 20063.0, 20064.0],       [20065.0, 20066.0, 20067.0, 20068.0, 20069.0, 20070.0],       [20071.0, 20072.0, 20073.0, 20074.0, 20075.0, 20076.0]],      [[20077.0, 20078.0, 20079.0, 20080.0, 20081.0, 20082.0],       [20083.0, 20084.0, 20085.0, 20086.0, 20087.0, 20088.0],       [20089.0, 20090.0, 20091.0, 20092.0, 20093.0, 20094.0],       [20095.0, 20096.0, 20097.0, 20098.0, 20099.0, 20100.0],       [20101.0, 20102.0, 20103.0, 20104.0, 20105.0, 20106.0],       [20107.0, 20108.0, 20109.0, 20110.0, 20111.0, 20112.0],       [20113.0, 20114.0, 20115.0, 20116.0, 20117.0, 20118.0]],      [[20119.0, 20120.0, 20121.0, 20122.0, 20123.0, 20124.0],       [20125.0, 20126.0, 20127.0, 20128.0, 20129.0, 20130.0],       [20131.0, 20132.0, 20133.0, 20134.0, 20135.0, 20136.0],       [20137.0, 20138.0, 20139.0, 20140.0, 20141.0, 20142.0],       [20143.0, 20144.0, 20145.0, 20146.0, 20147.0, 20148.0],       [20149.0, 20150.0, 20151.0, 20152.0, 20153.0, 20154.0],       [20155.0, 20156.0, 20157.0, 20158.0, 20159.0, 20160.0]]]],    [[[[20161.0, 20162.0, 20163.0, 20164.0, 20165.0, 20166.0],       [20167.0, 20168.0, 20169.0, 20170.0, 20171.0, 20172.0],       [20173.0, 20174.0, 20175.0, 20176.0, 20177.0, 20178.0],       [20179.0, 20180.0, 20181.0, 20182.0, 20183.0, 20184.0],       [20185.0, 20186.0, 20187.0, 20188.0, 20189.0, 20190.0],       [20191.0, 20192.0, 20193.0, 20194.0, 20195.0, 20196.0],       [20197.0, 20198.0, 20199.0, 20200.0, 20201.0, 20202.0]],      [[20203.0, 20204.0, 20205.0, 20206.0, 20207.0, 20208.0],       [20209.0, 20210.0, 20211.0, 20212.0, 20213.0, 20214.0],       [20215.0, 20216.0, 20217.0, 20218.0, 20219.0, 20220.0],       [20221.0, 20222.0, 20223.0, 20224.0, 20225.0, 20226.0],       [20227.0, 20228.0, 20229.0, 20230.0, 20231.0, 20232.0],       [20233.0, 20234.0, 20235.0, 20236.0, 20237.0, 20238.0],       [20239.0, 20240.0, 20241.0, 20242.0, 20243.0, 20244.0]],      [[20245.0, 20246.0, 20247.0, 20248.0, 20249.0, 20250.0],       [20251.0, 20252.0, 20253.0, 20254.0, 20255.0, 20256.0],       [20257.0, 20258.0, 20259.0, 20260.0, 20261.0, 20262.0],       [20263.0, 20264.0, 20265.0, 20266.0, 20267.0, 20268.0],       [20269.0, 20270.0, 20271.0, 20272.0, 20273.0, 20274.0],       [20275.0, 20276.0, 20277.0, 20278.0, 20279.0, 20280.0],       [20281.0, 20282.0, 20283.0, 20284.0, 20285.0, 20286.0]],      [[20287.0, 20288.0, 20289.0, 20290.0, 20291.0, 20292.0],       [20293.0, 20294.0, 20295.0, 20296.0, 20297.0, 20298.0],       [20299.0, 20300.0, 20301.0, 20302.0, 20303.0, 20304.0],       [20305.0, 20306.0, 20307.0, 20308.0, 20309.0, 20310.0],       [20311.0, 20312.0, 20313.0, 20314.0, 20315.0, 20316.0],       [20317.0, 20318.0, 20319.0, 20320.0, 20321.0, 20322.0],       [20323.0, 20324.0, 20325.0, 20326.0, 20327.0, 20328.0]],      [[20329.0, 20330.0, 20331.0, 20332.0, 20333.0, 20334.0],       [20335.0, 20336.0, 20337.0, 20338.0, 20339.0, 20340.0],       [20341.0, 20342.0, 20343.0, 20344.0, 20345.0, 20346.0],       [20347.0, 20348.0, 20349.0, 20350.0, 20351.0, 20352.0],       [20353.0, 20354.0, 20355.0, 20356.0, 20357.0, 20358.0],       [20359.0, 20360.0, 20361.0, 20362.0, 20363.0, 20364.0],       [20365.0, 20366.0, 20367.0, 20368.0, 20369.0, 20370.0]],      [[20371.0, 20372.0, 20373.0, 20374.0, 20375.0, 20376.0],       [20377.0, 20378.0, 20379.0, 20380.0, 20381.0, 20382.0],       [20383.0, 20384.0, 20385.0, 20386.0, 20387.0, 20388.0],       [20389.0, 20390.0, 20391.0, 20392.0, 20393.0, 20394.0],       [20395.0, 20396.0, 20397.0, 20398.0, 20399.0, 20400.0],       [20401.0, 20402.0, 20403.0, 20404.0, 20405.0, 20406.0],       [20407.0, 20408.0, 20409.0, 20410.0, 20411.0, 20412.0]]],     [[[20413.0, 20414.0, 20415.0, 20416.0, 20417.0, 20418.0],       [20419.0, 20420.0, 20421.0, 20422.0, 20423.0, 20424.0],       [20425.0, 20426.0, 20427.0, 20428.0, 20429.0, 20430.0],       [20431.0, 20432.0, 20433.0, 20434.0, 20435.0, 20436.0],       [20437.0, 20438.0, 20439.0, 20440.0, 20441.0, 20442.0],       [20443.0, 20444.0, 20445.0, 20446.0, 20447.0, 20448.0],       [20449.0, 20450.0, 20451.0, 20452.0, 20453.0, 20454.0]],      [[20455.0, 20456.0, 20457.0, 20458.0, 20459.0, 20460.0],       [20461.0, 20462.0, 20463.0, 20464.0, 20465.0, 20466.0],       [20467.0, 20468.0, 20469.0, 20470.0, 20471.0, 20472.0],       [20473.0, 20474.0, 20475.0, 20476.0, 20477.0, 20478.0],       [20479.0, 20480.0, 20481.0, 20482.0, 20483.0, 20484.0],       [20485.0, 20486.0, 20487.0, 20488.0, 20489.0, 20490.0],       [20491.0, 20492.0, 20493.0, 20494.0, 20495.0, 20496.0]],      [[20497.0, 20498.0, 20499.0, 20500.0, 20501.0, 20502.0],       [20503.0, 20504.0, 20505.0, 20506.0, 20507.0, 20508.0],       [20509.0, 20510.0, 20511.0, 20512.0, 20513.0, 20514.0],       [20515.0, 20516.0, 20517.0, 20518.0, 20519.0, 20520.0],       [20521.0, 20522.0, 20523.0, 20524.0, 20525.0, 20526.0],       [20527.0, 20528.0, 20529.0, 20530.0, 20531.0, 20532.0],       [20533.0, 20534.0, 20535.0, 20536.0, 20537.0, 20538.0]],      [[20539.0, 20540.0, 20541.0, 20542.0, 20543.0, 20544.0],       [20545.0, 20546.0, 20547.0, 20548.0, 20549.0, 20550.0],       [20551.0, 20552.0, 20553.0, 20554.0, 20555.0, 20556.0],       [20557.0, 20558.0, 20559.0, 20560.0, 20561.0, 20562.0],       [20563.0, 20564.0, 20565.0, 20566.0, 20567.0, 20568.0],       [20569.0, 20570.0, 20571.0, 20572.0, 20573.0, 20574.0],       [20575.0, 20576.0, 20577.0, 20578.0, 20579.0, 20580.0]],      [[20581.0, 20582.0, 20583.0, 20584.0, 20585.0, 20586.0],       [20587.0, 20588.0, 20589.0, 20590.0, 20591.0, 20592.0],       [20593.0, 20594.0, 20595.0, 20596.0, 20597.0, 20598.0],       [20599.0, 20600.0, 20601.0, 20602.0, 20603.0, 20604.0],       [20605.0, 20606.0, 20607.0, 20608.0, 20609.0, 20610.0],       [20611.0, 20612.0, 20613.0, 20614.0, 20615.0, 20616.0],       [20617.0, 20618.0, 20619.0, 20620.0, 20621.0, 20622.0]],      [[20623.0, 20624.0, 20625.0, 20626.0, 20627.0, 20628.0],       [20629.0, 20630.0, 20631.0, 20632.0, 20633.0, 20634.0],       [20635.0, 20636.0, 20637.0, 20638.0, 20639.0, 20640.0],       [20641.0, 20642.0, 20643.0, 20644.0, 20645.0, 20646.0],       [20647.0, 20648.0, 20649.0, 20650.0, 20651.0, 20652.0],       [20653.0, 20654.0, 20655.0, 20656.0, 20657.0, 20658.0],       [20659.0, 20660.0, 20661.0, 20662.0, 20663.0, 20664.0]]],     [[[20665.0, 20666.0, 20667.0, 20668.0, 20669.0, 20670.0],       [20671.0, 20672.0, 20673.0, 20674.0, 20675.0, 20676.0],       [20677.0, 20678.0, 20679.0, 20680.0, 20681.0, 20682.0],       [20683.0, 20684.0, 20685.0, 20686.0, 20687.0, 20688.0],       [20689.0, 20690.0, 20691.0, 20692.0, 20693.0, 20694.0],       [20695.0, 20696.0, 20697.0, 20698.0, 20699.0, 20700.0],       [20701.0, 20702.0, 20703.0, 20704.0, 20705.0, 20706.0]],      [[20707.0, 20708.0, 20709.0, 20710.0, 20711.0, 20712.0],       [20713.0, 20714.0, 20715.0, 20716.0, 20717.0, 20718.0],       [20719.0, 20720.0, 20721.0, 20722.0, 20723.0, 20724.0],       [20725.0, 20726.0, 20727.0, 20728.0, 20729.0, 20730.0],       [20731.0, 20732.0, 20733.0, 20734.0, 20735.0, 20736.0],       [20737.0, 20738.0, 20739.0, 20740.0, 20741.0, 20742.0],       [20743.0, 20744.0, 20745.0, 20746.0, 20747.0, 20748.0]],      [[20749.0, 20750.0, 20751.0, 20752.0, 20753.0, 20754.0],       [20755.0, 20756.0, 20757.0, 20758.0, 20759.0, 20760.0],       [20761.0, 20762.0, 20763.0, 20764.0, 20765.0, 20766.0],       [20767.0, 20768.0, 20769.0, 20770.0, 20771.0, 20772.0],       [20773.0, 20774.0, 20775.0, 20776.0, 20777.0, 20778.0],       [20779.0, 20780.0, 20781.0, 20782.0, 20783.0, 20784.0],       [20785.0, 20786.0, 20787.0, 20788.0, 20789.0, 20790.0]],      [[20791.0, 20792.0, 20793.0, 20794.0, 20795.0, 20796.0],       [20797.0, 20798.0, 20799.0, 20800.0, 20801.0, 20802.0],       [20803.0, 20804.0, 20805.0, 20806.0, 20807.0, 20808.0],       [20809.0, 20810.0, 20811.0, 20812.0, 20813.0, 20814.0],       [20815.0, 20816.0, 20817.0, 20818.0, 20819.0, 20820.0],       [20821.0, 20822.0, 20823.0, 20824.0, 20825.0, 20826.0],       [20827.0, 20828.0, 20829.0, 20830.0, 20831.0, 20832.0]],      [[20833.0, 20834.0, 20835.0, 20836.0, 20837.0, 20838.0],       [20839.0, 20840.0, 20841.0, 20842.0, 20843.0, 20844.0],       [20845.0, 20846.0, 20847.0, 20848.0, 20849.0, 20850.0],       [20851.0, 20852.0, 20853.0, 20854.0, 20855.0, 20856.0],       [20857.0, 20858.0, 20859.0, 20860.0, 20861.0, 20862.0],       [20863.0, 20864.0, 20865.0, 20866.0, 20867.0, 20868.0],       [20869.0, 20870.0, 20871.0, 20872.0, 20873.0, 20874.0]],      [[20875.0, 20876.0, 20877.0, 20878.0, 20879.0, 20880.0],       [20881.0, 20882.0, 20883.0, 20884.0, 20885.0, 20886.0],       [20887.0, 20888.0, 20889.0, 20890.0, 20891.0, 20892.0],       [20893.0, 20894.0, 20895.0, 20896.0, 20897.0, 20898.0],       [20899.0, 20900.0, 20901.0, 20902.0, 20903.0, 20904.0],       [20905.0, 20906.0, 20907.0, 20908.0, 20909.0, 20910.0],       [20911.0, 20912.0, 20913.0, 20914.0, 20915.0, 20916.0]]],     [[[20917.0, 20918.0, 20919.0, 20920.0, 20921.0, 20922.0],       [20923.0, 20924.0, 20925.0, 20926.0, 20927.0, 20928.0],       [20929.0, 20930.0, 20931.0, 20932.0, 20933.0, 20934.0],       [20935.0, 20936.0, 20937.0, 20938.0, 20939.0, 20940.0],       [20941.0, 20942.0, 20943.0, 20944.0, 20945.0, 20946.0],       [20947.0, 20948.0, 20949.0, 20950.0, 20951.0, 20952.0],       [20953.0, 20954.0, 20955.0, 20956.0, 20957.0, 20958.0]],      [[20959.0, 20960.0, 20961.0, 20962.0, 20963.0, 20964.0],       [20965.0, 20966.0, 20967.0, 20968.0, 20969.0, 20970.0],       [20971.0, 20972.0, 20973.0, 20974.0, 20975.0, 20976.0],       [20977.0, 20978.0, 20979.0, 20980.0, 20981.0, 20982.0],       [20983.0, 20984.0, 20985.0, 20986.0, 20987.0, 20988.0],       [20989.0, 20990.0, 20991.0, 20992.0, 20993.0, 20994.0],       [20995.0, 20996.0, 20997.0, 20998.0, 20999.0, 21000.0]],      [[21001.0, 21002.0, 21003.0, 21004.0, 21005.0, 21006.0],       [21007.0, 21008.0, 21009.0, 21010.0, 21011.0, 21012.0],       [21013.0, 21014.0, 21015.0, 21016.0, 21017.0, 21018.0],       [21019.0, 21020.0, 21021.0, 21022.0, 21023.0, 21024.0],       [21025.0, 21026.0, 21027.0, 21028.0, 21029.0, 21030.0],       [21031.0, 21032.0, 21033.0, 21034.0, 21035.0, 21036.0],       [21037.0, 21038.0, 21039.0, 21040.0, 21041.0, 21042.0]],      [[21043.0, 21044.0, 21045.0, 21046.0, 21047.0, 21048.0],       [21049.0, 21050.0, 21051.0, 21052.0, 21053.0, 21054.0],       [21055.0, 21056.0, 21057.0, 21058.0, 21059.0, 21060.0],       [21061.0, 21062.0, 21063.0, 21064.0, 21065.0, 21066.0],       [21067.0, 21068.0, 21069.0, 21070.0, 21071.0, 21072.0],       [21073.0, 21074.0, 21075.0, 21076.0, 21077.0, 21078.0],       [21079.0, 21080.0, 21081.0, 21082.0, 21083.0, 21084.0]],      [[21085.0, 21086.0, 21087.0, 21088.0, 21089.0, 21090.0],       [21091.0, 21092.0, 21093.0, 21094.0, 21095.0, 21096.0],       [21097.0, 21098.0, 21099.0, 21100.0, 21101.0, 21102.0],       [21103.0, 21104.0, 21105.0, 21106.0, 21107.0, 21108.0],       [21109.0, 21110.0, 21111.0, 21112.0, 21113.0, 21114.0],       [21115.0, 21116.0, 21117.0, 21118.0, 21119.0, 21120.0],       [21121.0, 21122.0, 21123.0, 21124.0, 21125.0, 21126.0]],      [[21127.0, 21128.0, 21129.0, 21130.0, 21131.0, 21132.0],       [21133.0, 21134.0, 21135.0, 21136.0, 21137.0, 21138.0],       [21139.0, 21140.0, 21141.0, 21142.0, 21143.0, 21144.0],       [21145.0, 21146.0, 21147.0, 21148.0, 21149.0, 21150.0],       [21151.0, 21152.0, 21153.0, 21154.0, 21155.0, 21156.0],       [21157.0, 21158.0, 21159.0, 21160.0, 21161.0, 21162.0],       [21163.0, 21164.0, 21165.0, 21166.0, 21167.0, 21168.0]]]],    [[[[21169.0, 21170.0, 21171.0, 21172.0, 21173.0, 21174.0],       [21175.0, 21176.0, 21177.0, 21178.0, 21179.0, 21180.0],       [21181.0, 21182.0, 21183.0, 21184.0, 21185.0, 21186.0],       [21187.0, 21188.0, 21189.0, 21190.0, 21191.0, 21192.0],       [21193.0, 21194.0, 21195.0, 21196.0, 21197.0, 21198.0],       [21199.0, 21200.0, 21201.0, 21202.0, 21203.0, 21204.0],       [21205.0, 21206.0, 21207.0, 21208.0, 21209.0, 21210.0]],      [[21211.0, 21212.0, 21213.0, 21214.0, 21215.0, 21216.0],       [21217.0, 21218.0, 21219.0, 21220.0, 21221.0, 21222.0],       [21223.0, 21224.0, 21225.0, 21226.0, 21227.0, 21228.0],       [21229.0, 21230.0, 21231.0, 21232.0, 21233.0, 21234.0],       [21235.0, 21236.0, 21237.0, 21238.0, 21239.0, 21240.0],       [21241.0, 21242.0, 21243.0, 21244.0, 21245.0, 21246.0],       [21247.0, 21248.0, 21249.0, 21250.0, 21251.0, 21252.0]],      [[21253.0, 21254.0, 21255.0, 21256.0, 21257.0, 21258.0],       [21259.0, 21260.0, 21261.0, 21262.0, 21263.0, 21264.0],       [21265.0, 21266.0, 21267.0, 21268.0, 21269.0, 21270.0],       [21271.0, 21272.0, 21273.0, 21274.0, 21275.0, 21276.0],       [21277.0, 21278.0, 21279.0, 21280.0, 21281.0, 21282.0],       [21283.0, 21284.0, 21285.0, 21286.0, 21287.0, 21288.0],       [21289.0, 21290.0, 21291.0, 21292.0, 21293.0, 21294.0]],      [[21295.0, 21296.0, 21297.0, 21298.0, 21299.0, 21300.0],       [21301.0, 21302.0, 21303.0, 21304.0, 21305.0, 21306.0],       [21307.0, 21308.0, 21309.0, 21310.0, 21311.0, 21312.0],       [21313.0, 21314.0, 21315.0, 21316.0, 21317.0, 21318.0],       [21319.0, 21320.0, 21321.0, 21322.0, 21323.0, 21324.0],       [21325.0, 21326.0, 21327.0, 21328.0, 21329.0, 21330.0],       [21331.0, 21332.0, 21333.0, 21334.0, 21335.0, 21336.0]],      [[21337.0, 21338.0, 21339.0, 21340.0, 21341.0, 21342.0],       [21343.0, 21344.0, 21345.0, 21346.0, 21347.0, 21348.0],       [21349.0, 21350.0, 21351.0, 21352.0, 21353.0, 21354.0],       [21355.0, 21356.0, 21357.0, 21358.0, 21359.0, 21360.0],       [21361.0, 21362.0, 21363.0, 21364.0, 21365.0, 21366.0],       [21367.0, 21368.0, 21369.0, 21370.0, 21371.0, 21372.0],       [21373.0, 21374.0, 21375.0, 21376.0, 21377.0, 21378.0]],      [[21379.0, 21380.0, 21381.0, 21382.0, 21383.0, 21384.0],       [21385.0, 21386.0, 21387.0, 21388.0, 21389.0, 21390.0],       [21391.0, 21392.0, 21393.0, 21394.0, 21395.0, 21396.0],       [21397.0, 21398.0, 21399.0, 21400.0, 21401.0, 21402.0],       [21403.0, 21404.0, 21405.0, 21406.0, 21407.0, 21408.0],       [21409.0, 21410.0, 21411.0, 21412.0, 21413.0, 21414.0],       [21415.0, 21416.0, 21417.0, 21418.0, 21419.0, 21420.0]]],     [[[21421.0, 21422.0, 21423.0, 21424.0, 21425.0, 21426.0],       [21427.0, 21428.0, 21429.0, 21430.0, 21431.0, 21432.0],       [21433.0, 21434.0, 21435.0, 21436.0, 21437.0, 21438.0],       [21439.0, 21440.0, 21441.0, 21442.0, 21443.0, 21444.0],       [21445.0, 21446.0, 21447.0, 21448.0, 21449.0, 21450.0],       [21451.0, 21452.0, 21453.0, 21454.0, 21455.0, 21456.0],       [21457.0, 21458.0, 21459.0, 21460.0, 21461.0, 21462.0]],      [[21463.0, 21464.0, 21465.0, 21466.0, 21467.0, 21468.0],       [21469.0, 21470.0, 21471.0, 21472.0, 21473.0, 21474.0],       [21475.0, 21476.0, 21477.0, 21478.0, 21479.0, 21480.0],       [21481.0, 21482.0, 21483.0, 21484.0, 21485.0, 21486.0],       [21487.0, 21488.0, 21489.0, 21490.0, 21491.0, 21492.0],       [21493.0, 21494.0, 21495.0, 21496.0, 21497.0, 21498.0],       [21499.0, 21500.0, 21501.0, 21502.0, 21503.0, 21504.0]],      [[21505.0, 21506.0, 21507.0, 21508.0, 21509.0, 21510.0],       [21511.0, 21512.0, 21513.0, 21514.0, 21515.0, 21516.0],       [21517.0, 21518.0, 21519.0, 21520.0, 21521.0, 21522.0],       [21523.0, 21524.0, 21525.0, 21526.0, 21527.0, 21528.0],       [21529.0, 21530.0, 21531.0, 21532.0, 21533.0, 21534.0],       [21535.0, 21536.0, 21537.0, 21538.0, 21539.0, 21540.0],       [21541.0, 21542.0, 21543.0, 21544.0, 21545.0, 21546.0]],      [[21547.0, 21548.0, 21549.0, 21550.0, 21551.0, 21552.0],       [21553.0, 21554.0, 21555.0, 21556.0, 21557.0, 21558.0],       [21559.0, 21560.0, 21561.0, 21562.0, 21563.0, 21564.0],       [21565.0, 21566.0, 21567.0, 21568.0, 21569.0, 21570.0],       [21571.0, 21572.0, 21573.0, 21574.0, 21575.0, 21576.0],       [21577.0, 21578.0, 21579.0, 21580.0, 21581.0, 21582.0],       [21583.0, 21584.0, 21585.0, 21586.0, 21587.0, 21588.0]],      [[21589.0, 21590.0, 21591.0, 21592.0, 21593.0, 21594.0],       [21595.0, 21596.0, 21597.0, 21598.0, 21599.0, 21600.0],       [21601.0, 21602.0, 21603.0, 21604.0, 21605.0, 21606.0],       [21607.0, 21608.0, 21609.0, 21610.0, 21611.0, 21612.0],       [21613.0, 21614.0, 21615.0, 21616.0, 21617.0, 21618.0],       [21619.0, 21620.0, 21621.0, 21622.0, 21623.0, 21624.0],       [21625.0, 21626.0, 21627.0, 21628.0, 21629.0, 21630.0]],      [[21631.0, 21632.0, 21633.0, 21634.0, 21635.0, 21636.0],       [21637.0, 21638.0, 21639.0, 21640.0, 21641.0, 21642.0],       [21643.0, 21644.0, 21645.0, 21646.0, 21647.0, 21648.0],       [21649.0, 21650.0, 21651.0, 21652.0, 21653.0, 21654.0],       [21655.0, 21656.0, 21657.0, 21658.0, 21659.0, 21660.0],       [21661.0, 21662.0, 21663.0, 21664.0, 21665.0, 21666.0],       [21667.0, 21668.0, 21669.0, 21670.0, 21671.0, 21672.0]]],     [[[21673.0, 21674.0, 21675.0, 21676.0, 21677.0, 21678.0],       [21679.0, 21680.0, 21681.0, 21682.0, 21683.0, 21684.0],       [21685.0, 21686.0, 21687.0, 21688.0, 21689.0, 21690.0],       [21691.0, 21692.0, 21693.0, 21694.0, 21695.0, 21696.0],       [21697.0, 21698.0, 21699.0, 21700.0, 21701.0, 21702.0],       [21703.0, 21704.0, 21705.0, 21706.0, 21707.0, 21708.0],       [21709.0, 21710.0, 21711.0, 21712.0, 21713.0, 21714.0]],      [[21715.0, 21716.0, 21717.0, 21718.0, 21719.0, 21720.0],       [21721.0, 21722.0, 21723.0, 21724.0, 21725.0, 21726.0],       [21727.0, 21728.0, 21729.0, 21730.0, 21731.0, 21732.0],       [21733.0, 21734.0, 21735.0, 21736.0, 21737.0, 21738.0],       [21739.0, 21740.0, 21741.0, 21742.0, 21743.0, 21744.0],       [21745.0, 21746.0, 21747.0, 21748.0, 21749.0, 21750.0],       [21751.0, 21752.0, 21753.0, 21754.0, 21755.0, 21756.0]],      [[21757.0, 21758.0, 21759.0, 21760.0, 21761.0, 21762.0],       [21763.0, 21764.0, 21765.0, 21766.0, 21767.0, 21768.0],       [21769.0, 21770.0, 21771.0, 21772.0, 21773.0, 21774.0],       [21775.0, 21776.0, 21777.0, 21778.0, 21779.0, 21780.0],       [21781.0, 21782.0, 21783.0, 21784.0, 21785.0, 21786.0],       [21787.0, 21788.0, 21789.0, 21790.0, 21791.0, 21792.0],       [21793.0, 21794.0, 21795.0, 21796.0, 21797.0, 21798.0]],      [[21799.0, 21800.0, 21801.0, 21802.0, 21803.0, 21804.0],       [21805.0, 21806.0, 21807.0, 21808.0, 21809.0, 21810.0],       [21811.0, 21812.0, 21813.0, 21814.0, 21815.0, 21816.0],       [21817.0, 21818.0, 21819.0, 21820.0, 21821.0, 21822.0],       [21823.0, 21824.0, 21825.0, 21826.0, 21827.0, 21828.0],       [21829.0, 21830.0, 21831.0, 21832.0, 21833.0, 21834.0],       [21835.0, 21836.0, 21837.0, 21838.0, 21839.0, 21840.0]],      [[21841.0, 21842.0, 21843.0, 21844.0, 21845.0, 21846.0],       [21847.0, 21848.0, 21849.0, 21850.0, 21851.0, 21852.0],       [21853.0, 21854.0, 21855.0, 21856.0, 21857.0, 21858.0],       [21859.0, 21860.0, 21861.0, 21862.0, 21863.0, 21864.0],       [21865.0, 21866.0, 21867.0, 21868.0, 21869.0, 21870.0],       [21871.0, 21872.0, 21873.0, 21874.0, 21875.0, 21876.0],       [21877.0, 21878.0, 21879.0, 21880.0, 21881.0, 21882.0]],      [[21883.0, 21884.0, 21885.0, 21886.0, 21887.0, 21888.0],       [21889.0, 21890.0, 21891.0, 21892.0, 21893.0, 21894.0],       [21895.0, 21896.0, 21897.0, 21898.0, 21899.0, 21900.0],       [21901.0, 21902.0, 21903.0, 21904.0, 21905.0, 21906.0],       [21907.0, 21908.0, 21909.0, 21910.0, 21911.0, 21912.0],       [21913.0, 21914.0, 21915.0, 21916.0, 21917.0, 21918.0],       [21919.0, 21920.0, 21921.0, 21922.0, 21923.0, 21924.0]]],     [[[21925.0, 21926.0, 21927.0, 21928.0, 21929.0, 21930.0],       [21931.0, 21932.0, 21933.0, 21934.0, 21935.0, 21936.0],       [21937.0, 21938.0, 21939.0, 21940.0, 21941.0, 21942.0],       [21943.0, 21944.0, 21945.0, 21946.0, 21947.0, 21948.0],       [21949.0, 21950.0, 21951.0, 21952.0, 21953.0, 21954.0],       [21955.0, 21956.0, 21957.0, 21958.0, 21959.0, 21960.0],       [21961.0, 21962.0, 21963.0, 21964.0, 21965.0, 21966.0]],      [[21967.0, 21968.0, 21969.0, 21970.0, 21971.0, 21972.0],       [21973.0, 21974.0, 21975.0, 21976.0, 21977.0, 21978.0],       [21979.0, 21980.0, 21981.0, 21982.0, 21983.0, 21984.0],       [21985.0, 21986.0, 21987.0, 21988.0, 21989.0, 21990.0],       [21991.0, 21992.0, 21993.0, 21994.0, 21995.0, 21996.0],       [21997.0, 21998.0, 21999.0, 22000.0, 22001.0, 22002.0],       [22003.0, 22004.0, 22005.0, 22006.0, 22007.0, 22008.0]],      [[22009.0, 22010.0, 22011.0, 22012.0, 22013.0, 22014.0],       [22015.0, 22016.0, 22017.0, 22018.0, 22019.0, 22020.0],       [22021.0, 22022.0, 22023.0, 22024.0, 22025.0, 22026.0],       [22027.0, 22028.0, 22029.0, 22030.0, 22031.0, 22032.0],       [22033.0, 22034.0, 22035.0, 22036.0, 22037.0, 22038.0],       [22039.0, 22040.0, 22041.0, 22042.0, 22043.0, 22044.0],       [22045.0, 22046.0, 22047.0, 22048.0, 22049.0, 22050.0]],      [[22051.0, 22052.0, 22053.0, 22054.0, 22055.0, 22056.0],       [22057.0, 22058.0, 22059.0, 22060.0, 22061.0, 22062.0],       [22063.0, 22064.0, 22065.0, 22066.0, 22067.0, 22068.0],       [22069.0, 22070.0, 22071.0, 22072.0, 22073.0, 22074.0],       [22075.0, 22076.0, 22077.0, 22078.0, 22079.0, 22080.0],       [22081.0, 22082.0, 22083.0, 22084.0, 22085.0, 22086.0],       [22087.0, 22088.0, 22089.0, 22090.0, 22091.0, 22092.0]],      [[22093.0, 22094.0, 22095.0, 22096.0, 22097.0, 22098.0],       [22099.0, 22100.0, 22101.0, 22102.0, 22103.0, 22104.0],       [22105.0, 22106.0, 22107.0, 22108.0, 22109.0, 22110.0],       [22111.0, 22112.0, 22113.0, 22114.0, 22115.0, 22116.0],       [22117.0, 22118.0, 22119.0, 22120.0, 22121.0, 22122.0],       [22123.0, 22124.0, 22125.0, 22126.0, 22127.0, 22128.0],       [22129.0, 22130.0, 22131.0, 22132.0, 22133.0, 22134.0]],      [[22135.0, 22136.0, 22137.0, 22138.0, 22139.0, 22140.0],       [22141.0, 22142.0, 22143.0, 22144.0, 22145.0, 22146.0],       [22147.0, 22148.0, 22149.0, 22150.0, 22151.0, 22152.0],       [22153.0, 22154.0, 22155.0, 22156.0, 22157.0, 22158.0],       [22159.0, 22160.0, 22161.0, 22162.0, 22163.0, 22164.0],       [22165.0, 22166.0, 22167.0, 22168.0, 22169.0, 22170.0],       [22171.0, 22172.0, 22173.0, 22174.0, 22175.0, 22176.0]]]],    [[[[22177.0, 22178.0, 22179.0, 22180.0, 22181.0, 22182.0],       [22183.0, 22184.0, 22185.0, 22186.0, 22187.0, 22188.0],       [22189.0, 22190.0, 22191.0, 22192.0, 22193.0, 22194.0],       [22195.0, 22196.0, 22197.0, 22198.0, 22199.0, 22200.0],       [22201.0, 22202.0, 22203.0, 22204.0, 22205.0, 22206.0],       [22207.0, 22208.0, 22209.0, 22210.0, 22211.0, 22212.0],       [22213.0, 22214.0, 22215.0, 22216.0, 22217.0, 22218.0]],      [[22219.0, 22220.0, 22221.0, 22222.0, 22223.0, 22224.0],       [22225.0, 22226.0, 22227.0, 22228.0, 22229.0, 22230.0],       [22231.0, 22232.0, 22233.0, 22234.0, 22235.0, 22236.0],       [22237.0, 22238.0, 22239.0, 22240.0, 22241.0, 22242.0],       [22243.0, 22244.0, 22245.0, 22246.0, 22247.0, 22248.0],       [22249.0, 22250.0, 22251.0, 22252.0, 22253.0, 22254.0],       [22255.0, 22256.0, 22257.0, 22258.0, 22259.0, 22260.0]],      [[22261.0, 22262.0, 22263.0, 22264.0, 22265.0, 22266.0],       [22267.0, 22268.0, 22269.0, 22270.0, 22271.0, 22272.0],       [22273.0, 22274.0, 22275.0, 22276.0, 22277.0, 22278.0],       [22279.0, 22280.0, 22281.0, 22282.0, 22283.0, 22284.0],       [22285.0, 22286.0, 22287.0, 22288.0, 22289.0, 22290.0],       [22291.0, 22292.0, 22293.0, 22294.0, 22295.0, 22296.0],       [22297.0, 22298.0, 22299.0, 22300.0, 22301.0, 22302.0]],      [[22303.0, 22304.0, 22305.0, 22306.0, 22307.0, 22308.0],       [22309.0, 22310.0, 22311.0, 22312.0, 22313.0, 22314.0],       [22315.0, 22316.0, 22317.0, 22318.0, 22319.0, 22320.0],       [22321.0, 22322.0, 22323.0, 22324.0, 22325.0, 22326.0],       [22327.0, 22328.0, 22329.0, 22330.0, 22331.0, 22332.0],       [22333.0, 22334.0, 22335.0, 22336.0, 22337.0, 22338.0],       [22339.0, 22340.0, 22341.0, 22342.0, 22343.0, 22344.0]],      [[22345.0, 22346.0, 22347.0, 22348.0, 22349.0, 22350.0],       [22351.0, 22352.0, 22353.0, 22354.0, 22355.0, 22356.0],       [22357.0, 22358.0, 22359.0, 22360.0, 22361.0, 22362.0],       [22363.0, 22364.0, 22365.0, 22366.0, 22367.0, 22368.0],       [22369.0, 22370.0, 22371.0, 22372.0, 22373.0, 22374.0],       [22375.0, 22376.0, 22377.0, 22378.0, 22379.0, 22380.0],       [22381.0, 22382.0, 22383.0, 22384.0, 22385.0, 22386.0]],      [[22387.0, 22388.0, 22389.0, 22390.0, 22391.0, 22392.0],       [22393.0, 22394.0, 22395.0, 22396.0, 22397.0, 22398.0],       [22399.0, 22400.0, 22401.0, 22402.0, 22403.0, 22404.0],       [22405.0, 22406.0, 22407.0, 22408.0, 22409.0, 22410.0],       [22411.0, 22412.0, 22413.0, 22414.0, 22415.0, 22416.0],       [22417.0, 22418.0, 22419.0, 22420.0, 22421.0, 22422.0],       [22423.0, 22424.0, 22425.0, 22426.0, 22427.0, 22428.0]]],     [[[22429.0, 22430.0, 22431.0, 22432.0, 22433.0, 22434.0],       [22435.0, 22436.0, 22437.0, 22438.0, 22439.0, 22440.0],       [22441.0, 22442.0, 22443.0, 22444.0, 22445.0, 22446.0],       [22447.0, 22448.0, 22449.0, 22450.0, 22451.0, 22452.0],       [22453.0, 22454.0, 22455.0, 22456.0, 22457.0, 22458.0],       [22459.0, 22460.0, 22461.0, 22462.0, 22463.0, 22464.0],       [22465.0, 22466.0, 22467.0, 22468.0, 22469.0, 22470.0]],      [[22471.0, 22472.0, 22473.0, 22474.0, 22475.0, 22476.0],       [22477.0, 22478.0, 22479.0, 22480.0, 22481.0, 22482.0],       [22483.0, 22484.0, 22485.0, 22486.0, 22487.0, 22488.0],       [22489.0, 22490.0, 22491.0, 22492.0, 22493.0, 22494.0],       [22495.0, 22496.0, 22497.0, 22498.0, 22499.0, 22500.0],       [22501.0, 22502.0, 22503.0, 22504.0, 22505.0, 22506.0],       [22507.0, 22508.0, 22509.0, 22510.0, 22511.0, 22512.0]],      [[22513.0, 22514.0, 22515.0, 22516.0, 22517.0, 22518.0],       [22519.0, 22520.0, 22521.0, 22522.0, 22523.0, 22524.0],       [22525.0, 22526.0, 22527.0, 22528.0, 22529.0, 22530.0],       [22531.0, 22532.0, 22533.0, 22534.0, 22535.0, 22536.0],       [22537.0, 22538.0, 22539.0, 22540.0, 22541.0, 22542.0],       [22543.0, 22544.0, 22545.0, 22546.0, 22547.0, 22548.0],       [22549.0, 22550.0, 22551.0, 22552.0, 22553.0, 22554.0]],      [[22555.0, 22556.0, 22557.0, 22558.0, 22559.0, 22560.0],       [22561.0, 22562.0, 22563.0, 22564.0, 22565.0, 22566.0],       [22567.0, 22568.0, 22569.0, 22570.0, 22571.0, 22572.0],       [22573.0, 22574.0, 22575.0, 22576.0, 22577.0, 22578.0],       [22579.0, 22580.0, 22581.0, 22582.0, 22583.0, 22584.0],       [22585.0, 22586.0, 22587.0, 22588.0, 22589.0, 22590.0],       [22591.0, 22592.0, 22593.0, 22594.0, 22595.0, 22596.0]],      [[22597.0, 22598.0, 22599.0, 22600.0, 22601.0, 22602.0],       [22603.0, 22604.0, 22605.0, 22606.0, 22607.0, 22608.0],       [22609.0, 22610.0, 22611.0, 22612.0, 22613.0, 22614.0],       [22615.0, 22616.0, 22617.0, 22618.0, 22619.0, 22620.0],       [22621.0, 22622.0, 22623.0, 22624.0, 22625.0, 22626.0],       [22627.0, 22628.0, 22629.0, 22630.0, 22631.0, 22632.0],       [22633.0, 22634.0, 22635.0, 22636.0, 22637.0, 22638.0]],      [[22639.0, 22640.0, 22641.0, 22642.0, 22643.0, 22644.0],       [22645.0, 22646.0, 22647.0, 22648.0, 22649.0, 22650.0],       [22651.0, 22652.0, 22653.0, 22654.0, 22655.0, 22656.0],       [22657.0, 22658.0, 22659.0, 22660.0, 22661.0, 22662.0],       [22663.0, 22664.0, 22665.0, 22666.0, 22667.0, 22668.0],       [22669.0, 22670.0, 22671.0, 22672.0, 22673.0, 22674.0],       [22675.0, 22676.0, 22677.0, 22678.0, 22679.0, 22680.0]]],     [[[22681.0, 22682.0, 22683.0, 22684.0, 22685.0, 22686.0],       [22687.0, 22688.0, 22689.0, 22690.0, 22691.0, 22692.0],       [22693.0, 22694.0, 22695.0, 22696.0, 22697.0, 22698.0],       [22699.0, 22700.0, 22701.0, 22702.0, 22703.0, 22704.0],       [22705.0, 22706.0, 22707.0, 22708.0, 22709.0, 22710.0],       [22711.0, 22712.0, 22713.0, 22714.0, 22715.0, 22716.0],       [22717.0, 22718.0, 22719.0, 22720.0, 22721.0, 22722.0]],      [[22723.0, 22724.0, 22725.0, 22726.0, 22727.0, 22728.0],       [22729.0, 22730.0, 22731.0, 22732.0, 22733.0, 22734.0],       [22735.0, 22736.0, 22737.0, 22738.0, 22739.0, 22740.0],       [22741.0, 22742.0, 22743.0, 22744.0, 22745.0, 22746.0],       [22747.0, 22748.0, 22749.0, 22750.0, 22751.0, 22752.0],       [22753.0, 22754.0, 22755.0, 22756.0, 22757.0, 22758.0],       [22759.0, 22760.0, 22761.0, 22762.0, 22763.0, 22764.0]],      [[22765.0, 22766.0, 22767.0, 22768.0, 22769.0, 22770.0],       [22771.0, 22772.0, 22773.0, 22774.0, 22775.0, 22776.0],       [22777.0, 22778.0, 22779.0, 22780.0, 22781.0, 22782.0],       [22783.0, 22784.0, 22785.0, 22786.0, 22787.0, 22788.0],       [22789.0, 22790.0, 22791.0, 22792.0, 22793.0, 22794.0],       [22795.0, 22796.0, 22797.0, 22798.0, 22799.0, 22800.0],       [22801.0, 22802.0, 22803.0, 22804.0, 22805.0, 22806.0]],      [[22807.0, 22808.0, 22809.0, 22810.0, 22811.0, 22812.0],       [22813.0, 22814.0, 22815.0, 22816.0, 22817.0, 22818.0],       [22819.0, 22820.0, 22821.0, 22822.0, 22823.0, 22824.0],       [22825.0, 22826.0, 22827.0, 22828.0, 22829.0, 22830.0],       [22831.0, 22832.0, 22833.0, 22834.0, 22835.0, 22836.0],       [22837.0, 22838.0, 22839.0, 22840.0, 22841.0, 22842.0],       [22843.0, 22844.0, 22845.0, 22846.0, 22847.0, 22848.0]],      [[22849.0, 22850.0, 22851.0, 22852.0, 22853.0, 22854.0],       [22855.0, 22856.0, 22857.0, 22858.0, 22859.0, 22860.0],       [22861.0, 22862.0, 22863.0, 22864.0, 22865.0, 22866.0],       [22867.0, 22868.0, 22869.0, 22870.0, 22871.0, 22872.0],       [22873.0, 22874.0, 22875.0, 22876.0, 22877.0, 22878.0],       [22879.0, 22880.0, 22881.0, 22882.0, 22883.0, 22884.0],       [22885.0, 22886.0, 22887.0, 22888.0, 22889.0, 22890.0]],      [[22891.0, 22892.0, 22893.0, 22894.0, 22895.0, 22896.0],       [22897.0, 22898.0, 22899.0, 22900.0, 22901.0, 22902.0],       [22903.0, 22904.0, 22905.0, 22906.0, 22907.0, 22908.0],       [22909.0, 22910.0, 22911.0, 22912.0, 22913.0, 22914.0],       [22915.0, 22916.0, 22917.0, 22918.0, 22919.0, 22920.0],       [22921.0, 22922.0, 22923.0, 22924.0, 22925.0, 22926.0],       [22927.0, 22928.0, 22929.0, 22930.0, 22931.0, 22932.0]]],     [[[22933.0, 22934.0, 22935.0, 22936.0, 22937.0, 22938.0],       [22939.0, 22940.0, 22941.0, 22942.0, 22943.0, 22944.0],       [22945.0, 22946.0, 22947.0, 22948.0, 22949.0, 22950.0],       [22951.0, 22952.0, 22953.0, 22954.0, 22955.0, 22956.0],       [22957.0, 22958.0, 22959.0, 22960.0, 22961.0, 22962.0],       [22963.0, 22964.0, 22965.0, 22966.0, 22967.0, 22968.0],       [22969.0, 22970.0, 22971.0, 22972.0, 22973.0, 22974.0]],      [[22975.0, 22976.0, 22977.0, 22978.0, 22979.0, 22980.0],       [22981.0, 22982.0, 22983.0, 22984.0, 22985.0, 22986.0],       [22987.0, 22988.0, 22989.0, 22990.0, 22991.0, 22992.0],       [22993.0, 22994.0, 22995.0, 22996.0, 22997.0, 22998.0],       [22999.0, 23000.0, 23001.0, 23002.0, 23003.0, 23004.0],       [23005.0, 23006.0, 23007.0, 23008.0, 23009.0, 23010.0],       [23011.0, 23012.0, 23013.0, 23014.0, 23015.0, 23016.0]],      [[23017.0, 23018.0, 23019.0, 23020.0, 23021.0, 23022.0],       [23023.0, 23024.0, 23025.0, 23026.0, 23027.0, 23028.0],       [23029.0, 23030.0, 23031.0, 23032.0, 23033.0, 23034.0],       [23035.0, 23036.0, 23037.0, 23038.0, 23039.0, 23040.0],       [23041.0, 23042.0, 23043.0, 23044.0, 23045.0, 23046.0],       [23047.0, 23048.0, 23049.0, 23050.0, 23051.0, 23052.0],       [23053.0, 23054.0, 23055.0, 23056.0, 23057.0, 23058.0]],      [[23059.0, 23060.0, 23061.0, 23062.0, 23063.0, 23064.0],       [23065.0, 23066.0, 23067.0, 23068.0, 23069.0, 23070.0],       [23071.0, 23072.0, 23073.0, 23074.0, 23075.0, 23076.0],       [23077.0, 23078.0, 23079.0, 23080.0, 23081.0, 23082.0],       [23083.0, 23084.0, 23085.0, 23086.0, 23087.0, 23088.0],       [23089.0, 23090.0, 23091.0, 23092.0, 23093.0, 23094.0],       [23095.0, 23096.0, 23097.0, 23098.0, 23099.0, 23100.0]],      [[23101.0, 23102.0, 23103.0, 23104.0, 23105.0, 23106.0],       [23107.0, 23108.0, 23109.0, 23110.0, 23111.0, 23112.0],       [23113.0, 23114.0, 23115.0, 23116.0, 23117.0, 23118.0],       [23119.0, 23120.0, 23121.0, 23122.0, 23123.0, 23124.0],       [23125.0, 23126.0, 23127.0, 23128.0, 23129.0, 23130.0],       [23131.0, 23132.0, 23133.0, 23134.0, 23135.0, 23136.0],       [23137.0, 23138.0, 23139.0, 23140.0, 23141.0, 23142.0]],      [[23143.0, 23144.0, 23145.0, 23146.0, 23147.0, 23148.0],       [23149.0, 23150.0, 23151.0, 23152.0, 23153.0, 23154.0],       [23155.0, 23156.0, 23157.0, 23158.0, 23159.0, 23160.0],       [23161.0, 23162.0, 23163.0, 23164.0, 23165.0, 23166.0],       [23167.0, 23168.0, 23169.0, 23170.0, 23171.0, 23172.0],       [23173.0, 23174.0, 23175.0, 23176.0, 23177.0, 23178.0],       [23179.0, 23180.0, 23181.0, 23182.0, 23183.0, 23184.0]]]],    [[[[23185.0, 23186.0, 23187.0, 23188.0, 23189.0, 23190.0],       [23191.0, 23192.0, 23193.0, 23194.0, 23195.0, 23196.0],       [23197.0, 23198.0, 23199.0, 23200.0, 23201.0, 23202.0],       [23203.0, 23204.0, 23205.0, 23206.0, 23207.0, 23208.0],       [23209.0, 23210.0, 23211.0, 23212.0, 23213.0, 23214.0],       [23215.0, 23216.0, 23217.0, 23218.0, 23219.0, 23220.0],       [23221.0, 23222.0, 23223.0, 23224.0, 23225.0, 23226.0]],      [[23227.0, 23228.0, 23229.0, 23230.0, 23231.0, 23232.0],       [23233.0, 23234.0, 23235.0, 23236.0, 23237.0, 23238.0],       [23239.0, 23240.0, 23241.0, 23242.0, 23243.0, 23244.0],       [23245.0, 23246.0, 23247.0, 23248.0, 23249.0, 23250.0],       [23251.0, 23252.0, 23253.0, 23254.0, 23255.0, 23256.0],       [23257.0, 23258.0, 23259.0, 23260.0, 23261.0, 23262.0],       [23263.0, 23264.0, 23265.0, 23266.0, 23267.0, 23268.0]],      [[23269.0, 23270.0, 23271.0, 23272.0, 23273.0, 23274.0],       [23275.0, 23276.0, 23277.0, 23278.0, 23279.0, 23280.0],       [23281.0, 23282.0, 23283.0, 23284.0, 23285.0, 23286.0],       [23287.0, 23288.0, 23289.0, 23290.0, 23291.0, 23292.0],       [23293.0, 23294.0, 23295.0, 23296.0, 23297.0, 23298.0],       [23299.0, 23300.0, 23301.0, 23302.0, 23303.0, 23304.0],       [23305.0, 23306.0, 23307.0, 23308.0, 23309.0, 23310.0]],      [[23311.0, 23312.0, 23313.0, 23314.0, 23315.0, 23316.0],       [23317.0, 23318.0, 23319.0, 23320.0, 23321.0, 23322.0],       [23323.0, 23324.0, 23325.0, 23326.0, 23327.0, 23328.0],       [23329.0, 23330.0, 23331.0, 23332.0, 23333.0, 23334.0],       [23335.0, 23336.0, 23337.0, 23338.0, 23339.0, 23340.0],       [23341.0, 23342.0, 23343.0, 23344.0, 23345.0, 23346.0],       [23347.0, 23348.0, 23349.0, 23350.0, 23351.0, 23352.0]],      [[23353.0, 23354.0, 23355.0, 23356.0, 23357.0, 23358.0],       [23359.0, 23360.0, 23361.0, 23362.0, 23363.0, 23364.0],       [23365.0, 23366.0, 23367.0, 23368.0, 23369.0, 23370.0],       [23371.0, 23372.0, 23373.0, 23374.0, 23375.0, 23376.0],       [23377.0, 23378.0, 23379.0, 23380.0, 23381.0, 23382.0],       [23383.0, 23384.0, 23385.0, 23386.0, 23387.0, 23388.0],       [23389.0, 23390.0, 23391.0, 23392.0, 23393.0, 23394.0]],      [[23395.0, 23396.0, 23397.0, 23398.0, 23399.0, 23400.0],       [23401.0, 23402.0, 23403.0, 23404.0, 23405.0, 23406.0],       [23407.0, 23408.0, 23409.0, 23410.0, 23411.0, 23412.0],       [23413.0, 23414.0, 23415.0, 23416.0, 23417.0, 23418.0],       [23419.0, 23420.0, 23421.0, 23422.0, 23423.0, 23424.0],       [23425.0, 23426.0, 23427.0, 23428.0, 23429.0, 23430.0],       [23431.0, 23432.0, 23433.0, 23434.0, 23435.0, 23436.0]]],     [[[23437.0, 23438.0, 23439.0, 23440.0, 23441.0, 23442.0],       [23443.0, 23444.0, 23445.0, 23446.0, 23447.0, 23448.0],       [23449.0, 23450.0, 23451.0, 23452.0, 23453.0, 23454.0],       [23455.0, 23456.0, 23457.0, 23458.0, 23459.0, 23460.0],       [23461.0, 23462.0, 23463.0, 23464.0, 23465.0, 23466.0],       [23467.0, 23468.0, 23469.0, 23470.0, 23471.0, 23472.0],       [23473.0, 23474.0, 23475.0, 23476.0, 23477.0, 23478.0]],      [[23479.0, 23480.0, 23481.0, 23482.0, 23483.0, 23484.0],       [23485.0, 23486.0, 23487.0, 23488.0, 23489.0, 23490.0],       [23491.0, 23492.0, 23493.0, 23494.0, 23495.0, 23496.0],       [23497.0, 23498.0, 23499.0, 23500.0, 23501.0, 23502.0],       [23503.0, 23504.0, 23505.0, 23506.0, 23507.0, 23508.0],       [23509.0, 23510.0, 23511.0, 23512.0, 23513.0, 23514.0],       [23515.0, 23516.0, 23517.0, 23518.0, 23519.0, 23520.0]],      [[23521.0, 23522.0, 23523.0, 23524.0, 23525.0, 23526.0],       [23527.0, 23528.0, 23529.0, 23530.0, 23531.0, 23532.0],       [23533.0, 23534.0, 23535.0, 23536.0, 23537.0, 23538.0],       [23539.0, 23540.0, 23541.0, 23542.0, 23543.0, 23544.0],       [23545.0, 23546.0, 23547.0, 23548.0, 23549.0, 23550.0],       [23551.0, 23552.0, 23553.0, 23554.0, 23555.0, 23556.0],       [23557.0, 23558.0, 23559.0, 23560.0, 23561.0, 23562.0]],      [[23563.0, 23564.0, 23565.0, 23566.0, 23567.0, 23568.0],       [23569.0, 23570.0, 23571.0, 23572.0, 23573.0, 23574.0],       [23575.0, 23576.0, 23577.0, 23578.0, 23579.0, 23580.0],       [23581.0, 23582.0, 23583.0, 23584.0, 23585.0, 23586.0],       [23587.0, 23588.0, 23589.0, 23590.0, 23591.0, 23592.0],       [23593.0, 23594.0, 23595.0, 23596.0, 23597.0, 23598.0],       [23599.0, 23600.0, 23601.0, 23602.0, 23603.0, 23604.0]],      [[23605.0, 23606.0, 23607.0, 23608.0, 23609.0, 23610.0],       [23611.0, 23612.0, 23613.0, 23614.0, 23615.0, 23616.0],       [23617.0, 23618.0, 23619.0, 23620.0, 23621.0, 23622.0],       [23623.0, 23624.0, 23625.0, 23626.0, 23627.0, 23628.0],       [23629.0, 23630.0, 23631.0, 23632.0, 23633.0, 23634.0],       [23635.0, 23636.0, 23637.0, 23638.0, 23639.0, 23640.0],       [23641.0, 23642.0, 23643.0, 23644.0, 23645.0, 23646.0]],      [[23647.0, 23648.0, 23649.0, 23650.0, 23651.0, 23652.0],       [23653.0, 23654.0, 23655.0, 23656.0, 23657.0, 23658.0],       [23659.0, 23660.0, 23661.0, 23662.0, 23663.0, 23664.0],       [23665.0, 23666.0, 23667.0, 23668.0, 23669.0, 23670.0],       [23671.0, 23672.0, 23673.0, 23674.0, 23675.0, 23676.0],       [23677.0, 23678.0, 23679.0, 23680.0, 23681.0, 23682.0],       [23683.0, 23684.0, 23685.0, 23686.0, 23687.0, 23688.0]]],     [[[23689.0, 23690.0, 23691.0, 23692.0, 23693.0, 23694.0],       [23695.0, 23696.0, 23697.0, 23698.0, 23699.0, 23700.0],       [23701.0, 23702.0, 23703.0, 23704.0, 23705.0, 23706.0],       [23707.0, 23708.0, 23709.0, 23710.0, 23711.0, 23712.0],       [23713.0, 23714.0, 23715.0, 23716.0, 23717.0, 23718.0],       [23719.0, 23720.0, 23721.0, 23722.0, 23723.0, 23724.0],       [23725.0, 23726.0, 23727.0, 23728.0, 23729.0, 23730.0]],      [[23731.0, 23732.0, 23733.0, 23734.0, 23735.0, 23736.0],       [23737.0, 23738.0, 23739.0, 23740.0, 23741.0, 23742.0],       [23743.0, 23744.0, 23745.0, 23746.0, 23747.0, 23748.0],       [23749.0, 23750.0, 23751.0, 23752.0, 23753.0, 23754.0],       [23755.0, 23756.0, 23757.0, 23758.0, 23759.0, 23760.0],       [23761.0, 23762.0, 23763.0, 23764.0, 23765.0, 23766.0],       [23767.0, 23768.0, 23769.0, 23770.0, 23771.0, 23772.0]],      [[23773.0, 23774.0, 23775.0, 23776.0, 23777.0, 23778.0],       [23779.0, 23780.0, 23781.0, 23782.0, 23783.0, 23784.0],       [23785.0, 23786.0, 23787.0, 23788.0, 23789.0, 23790.0],       [23791.0, 23792.0, 23793.0, 23794.0, 23795.0, 23796.0],       [23797.0, 23798.0, 23799.0, 23800.0, 23801.0, 23802.0],       [23803.0, 23804.0, 23805.0, 23806.0, 23807.0, 23808.0],       [23809.0, 23810.0, 23811.0, 23812.0, 23813.0, 23814.0]],      [[23815.0, 23816.0, 23817.0, 23818.0, 23819.0, 23820.0],       [23821.0, 23822.0, 23823.0, 23824.0, 23825.0, 23826.0],       [23827.0, 23828.0, 23829.0, 23830.0, 23831.0, 23832.0],       [23833.0, 23834.0, 23835.0, 23836.0, 23837.0, 23838.0],       [23839.0, 23840.0, 23841.0, 23842.0, 23843.0, 23844.0],       [23845.0, 23846.0, 23847.0, 23848.0, 23849.0, 23850.0],       [23851.0, 23852.0, 23853.0, 23854.0, 23855.0, 23856.0]],      [[23857.0, 23858.0, 23859.0, 23860.0, 23861.0, 23862.0],       [23863.0, 23864.0, 23865.0, 23866.0, 23867.0, 23868.0],       [23869.0, 23870.0, 23871.0, 23872.0, 23873.0, 23874.0],       [23875.0, 23876.0, 23877.0, 23878.0, 23879.0, 23880.0],       [23881.0, 23882.0, 23883.0, 23884.0, 23885.0, 23886.0],       [23887.0, 23888.0, 23889.0, 23890.0, 23891.0, 23892.0],       [23893.0, 23894.0, 23895.0, 23896.0, 23897.0, 23898.0]],      [[23899.0, 23900.0, 23901.0, 23902.0, 23903.0, 23904.0],       [23905.0, 23906.0, 23907.0, 23908.0, 23909.0, 23910.0],       [23911.0, 23912.0, 23913.0, 23914.0, 23915.0, 23916.0],       [23917.0, 23918.0, 23919.0, 23920.0, 23921.0, 23922.0],       [23923.0, 23924.0, 23925.0, 23926.0, 23927.0, 23928.0],       [23929.0, 23930.0, 23931.0, 23932.0, 23933.0, 23934.0],       [23935.0, 23936.0, 23937.0, 23938.0, 23939.0, 23940.0]]],     [[[23941.0, 23942.0, 23943.0, 23944.0, 23945.0, 23946.0],       [23947.0, 23948.0, 23949.0, 23950.0, 23951.0, 23952.0],       [23953.0, 23954.0, 23955.0, 23956.0, 23957.0, 23958.0],       [23959.0, 23960.0, 23961.0, 23962.0, 23963.0, 23964.0],       [23965.0, 23966.0, 23967.0, 23968.0, 23969.0, 23970.0],       [23971.0, 23972.0, 23973.0, 23974.0, 23975.0, 23976.0],       [23977.0, 23978.0, 23979.0, 23980.0, 23981.0, 23982.0]],      [[23983.0, 23984.0, 23985.0, 23986.0, 23987.0, 23988.0],       [23989.0, 23990.0, 23991.0, 23992.0, 23993.0, 23994.0],       [23995.0, 23996.0, 23997.0, 23998.0, 23999.0, 24000.0],       [24001.0, 24002.0, 24003.0, 24004.0, 24005.0, 24006.0],       [24007.0, 24008.0, 24009.0, 24010.0, 24011.0, 24012.0],       [24013.0, 24014.0, 24015.0, 24016.0, 24017.0, 24018.0],       [24019.0, 24020.0, 24021.0, 24022.0, 24023.0, 24024.0]],      [[24025.0, 24026.0, 24027.0, 24028.0, 24029.0, 24030.0],       [24031.0, 24032.0, 24033.0, 24034.0, 24035.0, 24036.0],       [24037.0, 24038.0, 24039.0, 24040.0, 24041.0, 24042.0],       [24043.0, 24044.0, 24045.0, 24046.0, 24047.0, 24048.0],       [24049.0, 24050.0, 24051.0, 24052.0, 24053.0, 24054.0],       [24055.0, 24056.0, 24057.0, 24058.0, 24059.0, 24060.0],       [24061.0, 24062.0, 24063.0, 24064.0, 24065.0, 24066.0]],      [[24067.0, 24068.0, 24069.0, 24070.0, 24071.0, 24072.0],       [24073.0, 24074.0, 24075.0, 24076.0, 24077.0, 24078.0],       [24079.0, 24080.0, 24081.0, 24082.0, 24083.0, 24084.0],       [24085.0, 24086.0, 24087.0, 24088.0, 24089.0, 24090.0],       [24091.0, 24092.0, 24093.0, 24094.0, 24095.0, 24096.0],       [24097.0, 24098.0, 24099.0, 24100.0, 24101.0, 24102.0],       [24103.0, 24104.0, 24105.0, 24106.0, 24107.0, 24108.0]],      [[24109.0, 24110.0, 24111.0, 24112.0, 24113.0, 24114.0],       [24115.0, 24116.0, 24117.0, 24118.0, 24119.0, 24120.0],       [24121.0, 24122.0, 24123.0, 24124.0, 24125.0, 24126.0],       [24127.0, 24128.0, 24129.0, 24130.0, 24131.0, 24132.0],       [24133.0, 24134.0, 24135.0, 24136.0, 24137.0, 24138.0],       [24139.0, 24140.0, 24141.0, 24142.0, 24143.0, 24144.0],       [24145.0, 24146.0, 24147.0, 24148.0, 24149.0, 24150.0]],      [[24151.0, 24152.0, 24153.0, 24154.0, 24155.0, 24156.0],       [24157.0, 24158.0, 24159.0, 24160.0, 24161.0, 24162.0],       [24163.0, 24164.0, 24165.0, 24166.0, 24167.0, 24168.0],       [24169.0, 24170.0, 24171.0, 24172.0, 24173.0, 24174.0],       [24175.0, 24176.0, 24177.0, 24178.0, 24179.0, 24180.0],       [24181.0, 24182.0, 24183.0, 24184.0, 24185.0, 24186.0],       [24187.0, 24188.0, 24189.0, 24190.0, 24191.0, 24192.0]]]]],   [[[[[24193.0, 24194.0, 24195.0, 24196.0, 24197.0, 24198.0],       [24199.0, 24200.0, 24201.0, 24202.0, 24203.0, 24204.0],       [24205.0, 24206.0, 24207.0, 24208.0, 24209.0, 24210.0],       [24211.0, 24212.0, 24213.0, 24214.0, 24215.0, 24216.0],       [24217.0, 24218.0, 24219.0, 24220.0, 24221.0, 24222.0],       [24223.0, 24224.0, 24225.0, 24226.0, 24227.0, 24228.0],       [24229.0, 24230.0, 24231.0, 24232.0, 24233.0, 24234.0]],      [[24235.0, 24236.0, 24237.0, 24238.0, 24239.0, 24240.0],       [24241.0, 24242.0, 24243.0, 24244.0, 24245.0, 24246.0],       [24247.0, 24248.0, 24249.0, 24250.0, 24251.0, 24252.0],       [24253.0, 24254.0, 24255.0, 24256.0, 24257.0, 24258.0],       [24259.0, 24260.0, 24261.0, 24262.0, 24263.0, 24264.0],       [24265.0, 24266.0, 24267.0, 24268.0, 24269.0, 24270.0],       [24271.0, 24272.0, 24273.0, 24274.0, 24275.0, 24276.0]],      [[24277.0, 24278.0, 24279.0, 24280.0, 24281.0, 24282.0],       [24283.0, 24284.0, 24285.0, 24286.0, 24287.0, 24288.0],       [24289.0, 24290.0, 24291.0, 24292.0, 24293.0, 24294.0],       [24295.0, 24296.0, 24297.0, 24298.0, 24299.0, 24300.0],       [24301.0, 24302.0, 24303.0, 24304.0, 24305.0, 24306.0],       [24307.0, 24308.0, 24309.0, 24310.0, 24311.0, 24312.0],       [24313.0, 24314.0, 24315.0, 24316.0, 24317.0, 24318.0]],      [[24319.0, 24320.0, 24321.0, 24322.0, 24323.0, 24324.0],       [24325.0, 24326.0, 24327.0, 24328.0, 24329.0, 24330.0],       [24331.0, 24332.0, 24333.0, 24334.0, 24335.0, 24336.0],       [24337.0, 24338.0, 24339.0, 24340.0, 24341.0, 24342.0],       [24343.0, 24344.0, 24345.0, 24346.0, 24347.0, 24348.0],       [24349.0, 24350.0, 24351.0, 24352.0, 24353.0, 24354.0],       [24355.0, 24356.0, 24357.0, 24358.0, 24359.0, 24360.0]],      [[24361.0, 24362.0, 24363.0, 24364.0, 24365.0, 24366.0],       [24367.0, 24368.0, 24369.0, 24370.0, 24371.0, 24372.0],       [24373.0, 24374.0, 24375.0, 24376.0, 24377.0, 24378.0],       [24379.0, 24380.0, 24381.0, 24382.0, 24383.0, 24384.0],       [24385.0, 24386.0, 24387.0, 24388.0, 24389.0, 24390.0],       [24391.0, 24392.0, 24393.0, 24394.0, 24395.0, 24396.0],       [24397.0, 24398.0, 24399.0, 24400.0, 24401.0, 24402.0]],      [[24403.0, 24404.0, 24405.0, 24406.0, 24407.0, 24408.0],       [24409.0, 24410.0, 24411.0, 24412.0, 24413.0, 24414.0],       [24415.0, 24416.0, 24417.0, 24418.0, 24419.0, 24420.0],       [24421.0, 24422.0, 24423.0, 24424.0, 24425.0, 24426.0],       [24427.0, 24428.0, 24429.0, 24430.0, 24431.0, 24432.0],       [24433.0, 24434.0, 24435.0, 24436.0, 24437.0, 24438.0],       [24439.0, 24440.0, 24441.0, 24442.0, 24443.0, 24444.0]]],     [[[24445.0, 24446.0, 24447.0, 24448.0, 24449.0, 24450.0],       [24451.0, 24452.0, 24453.0, 24454.0, 24455.0, 24456.0],       [24457.0, 24458.0, 24459.0, 24460.0, 24461.0, 24462.0],       [24463.0, 24464.0, 24465.0, 24466.0, 24467.0, 24468.0],       [24469.0, 24470.0, 24471.0, 24472.0, 24473.0, 24474.0],       [24475.0, 24476.0, 24477.0, 24478.0, 24479.0, 24480.0],       [24481.0, 24482.0, 24483.0, 24484.0, 24485.0, 24486.0]],      [[24487.0, 24488.0, 24489.0, 24490.0, 24491.0, 24492.0],       [24493.0, 24494.0, 24495.0, 24496.0, 24497.0, 24498.0],       [24499.0, 24500.0, 24501.0, 24502.0, 24503.0, 24504.0],       [24505.0, 24506.0, 24507.0, 24508.0, 24509.0, 24510.0],       [24511.0, 24512.0, 24513.0, 24514.0, 24515.0, 24516.0],       [24517.0, 24518.0, 24519.0, 24520.0, 24521.0, 24522.0],       [24523.0, 24524.0, 24525.0, 24526.0, 24527.0, 24528.0]],      [[24529.0, 24530.0, 24531.0, 24532.0, 24533.0, 24534.0],       [24535.0, 24536.0, 24537.0, 24538.0, 24539.0, 24540.0],       [24541.0, 24542.0, 24543.0, 24544.0, 24545.0, 24546.0],       [24547.0, 24548.0, 24549.0, 24550.0, 24551.0, 24552.0],       [24553.0, 24554.0, 24555.0, 24556.0, 24557.0, 24558.0],       [24559.0, 24560.0, 24561.0, 24562.0, 24563.0, 24564.0],       [24565.0, 24566.0, 24567.0, 24568.0, 24569.0, 24570.0]],      [[24571.0, 24572.0, 24573.0, 24574.0, 24575.0, 24576.0],       [24577.0, 24578.0, 24579.0, 24580.0, 24581.0, 24582.0],       [24583.0, 24584.0, 24585.0, 24586.0, 24587.0, 24588.0],       [24589.0, 24590.0, 24591.0, 24592.0, 24593.0, 24594.0],       [24595.0, 24596.0, 24597.0, 24598.0, 24599.0, 24600.0],       [24601.0, 24602.0, 24603.0, 24604.0, 24605.0, 24606.0],       [24607.0, 24608.0, 24609.0, 24610.0, 24611.0, 24612.0]],      [[24613.0, 24614.0, 24615.0, 24616.0, 24617.0, 24618.0],       [24619.0, 24620.0, 24621.0, 24622.0, 24623.0, 24624.0],       [24625.0, 24626.0, 24627.0, 24628.0, 24629.0, 24630.0],       [24631.0, 24632.0, 24633.0, 24634.0, 24635.0, 24636.0],       [24637.0, 24638.0, 24639.0, 24640.0, 24641.0, 24642.0],       [24643.0, 24644.0, 24645.0, 24646.0, 24647.0, 24648.0],       [24649.0, 24650.0, 24651.0, 24652.0, 24653.0, 24654.0]],      [[24655.0, 24656.0, 24657.0, 24658.0, 24659.0, 24660.0],       [24661.0, 24662.0, 24663.0, 24664.0, 24665.0, 24666.0],       [24667.0, 24668.0, 24669.0, 24670.0, 24671.0, 24672.0],       [24673.0, 24674.0, 24675.0, 24676.0, 24677.0, 24678.0],       [24679.0, 24680.0, 24681.0, 24682.0, 24683.0, 24684.0],       [24685.0, 24686.0, 24687.0, 24688.0, 24689.0, 24690.0],       [24691.0, 24692.0, 24693.0, 24694.0, 24695.0, 24696.0]]],     [[[24697.0, 24698.0, 24699.0, 24700.0, 24701.0, 24702.0],       [24703.0, 24704.0, 24705.0, 24706.0, 24707.0, 24708.0],       [24709.0, 24710.0, 24711.0, 24712.0, 24713.0, 24714.0],       [24715.0, 24716.0, 24717.0, 24718.0, 24719.0, 24720.0],       [24721.0, 24722.0, 24723.0, 24724.0, 24725.0, 24726.0],       [24727.0, 24728.0, 24729.0, 24730.0, 24731.0, 24732.0],       [24733.0, 24734.0, 24735.0, 24736.0, 24737.0, 24738.0]],      [[24739.0, 24740.0, 24741.0, 24742.0, 24743.0, 24744.0],       [24745.0, 24746.0, 24747.0, 24748.0, 24749.0, 24750.0],       [24751.0, 24752.0, 24753.0, 24754.0, 24755.0, 24756.0],       [24757.0, 24758.0, 24759.0, 24760.0, 24761.0, 24762.0],       [24763.0, 24764.0, 24765.0, 24766.0, 24767.0, 24768.0],       [24769.0, 24770.0, 24771.0, 24772.0, 24773.0, 24774.0],       [24775.0, 24776.0, 24777.0, 24778.0, 24779.0, 24780.0]],      [[24781.0, 24782.0, 24783.0, 24784.0, 24785.0, 24786.0],       [24787.0, 24788.0, 24789.0, 24790.0, 24791.0, 24792.0],       [24793.0, 24794.0, 24795.0, 24796.0, 24797.0, 24798.0],       [24799.0, 24800.0, 24801.0, 24802.0, 24803.0, 24804.0],       [24805.0, 24806.0, 24807.0, 24808.0, 24809.0, 24810.0],       [24811.0, 24812.0, 24813.0, 24814.0, 24815.0, 24816.0],       [24817.0, 24818.0, 24819.0, 24820.0, 24821.0, 24822.0]],      [[24823.0, 24824.0, 24825.0, 24826.0, 24827.0, 24828.0],       [24829.0, 24830.0, 24831.0, 24832.0, 24833.0, 24834.0],       [24835.0, 24836.0, 24837.0, 24838.0, 24839.0, 24840.0],       [24841.0, 24842.0, 24843.0, 24844.0, 24845.0, 24846.0],       [24847.0, 24848.0, 24849.0, 24850.0, 24851.0, 24852.0],       [24853.0, 24854.0, 24855.0, 24856.0, 24857.0, 24858.0],       [24859.0, 24860.0, 24861.0, 24862.0, 24863.0, 24864.0]],      [[24865.0, 24866.0, 24867.0, 24868.0, 24869.0, 24870.0],       [24871.0, 24872.0, 24873.0, 24874.0, 24875.0, 24876.0],       [24877.0, 24878.0, 24879.0, 24880.0, 24881.0, 24882.0],       [24883.0, 24884.0, 24885.0, 24886.0, 24887.0, 24888.0],       [24889.0, 24890.0, 24891.0, 24892.0, 24893.0, 24894.0],       [24895.0, 24896.0, 24897.0, 24898.0, 24899.0, 24900.0],       [24901.0, 24902.0, 24903.0, 24904.0, 24905.0, 24906.0]],      [[24907.0, 24908.0, 24909.0, 24910.0, 24911.0, 24912.0],       [24913.0, 24914.0, 24915.0, 24916.0, 24917.0, 24918.0],       [24919.0, 24920.0, 24921.0, 24922.0, 24923.0, 24924.0],       [24925.0, 24926.0, 24927.0, 24928.0, 24929.0, 24930.0],       [24931.0, 24932.0, 24933.0, 24934.0, 24935.0, 24936.0],       [24937.0, 24938.0, 24939.0, 24940.0, 24941.0, 24942.0],       [24943.0, 24944.0, 24945.0, 24946.0, 24947.0, 24948.0]]],     [[[24949.0, 24950.0, 24951.0, 24952.0, 24953.0, 24954.0],       [24955.0, 24956.0, 24957.0, 24958.0, 24959.0, 24960.0],       [24961.0, 24962.0, 24963.0, 24964.0, 24965.0, 24966.0],       [24967.0, 24968.0, 24969.0, 24970.0, 24971.0, 24972.0],       [24973.0, 24974.0, 24975.0, 24976.0, 24977.0, 24978.0],       [24979.0, 24980.0, 24981.0, 24982.0, 24983.0, 24984.0],       [24985.0, 24986.0, 24987.0, 24988.0, 24989.0, 24990.0]],      [[24991.0, 24992.0, 24993.0, 24994.0, 24995.0, 24996.0],       [24997.0, 24998.0, 24999.0, 25000.0, 25001.0, 25002.0],       [25003.0, 25004.0, 25005.0, 25006.0, 25007.0, 25008.0],       [25009.0, 25010.0, 25011.0, 25012.0, 25013.0, 25014.0],       [25015.0, 25016.0, 25017.0, 25018.0, 25019.0, 25020.0],       [25021.0, 25022.0, 25023.0, 25024.0, 25025.0, 25026.0],       [25027.0, 25028.0, 25029.0, 25030.0, 25031.0, 25032.0]],      [[25033.0, 25034.0, 25035.0, 25036.0, 25037.0, 25038.0],       [25039.0, 25040.0, 25041.0, 25042.0, 25043.0, 25044.0],       [25045.0, 25046.0, 25047.0, 25048.0, 25049.0, 25050.0],       [25051.0, 25052.0, 25053.0, 25054.0, 25055.0, 25056.0],       [25057.0, 25058.0, 25059.0, 25060.0, 25061.0, 25062.0],       [25063.0, 25064.0, 25065.0, 25066.0, 25067.0, 25068.0],       [25069.0, 25070.0, 25071.0, 25072.0, 25073.0, 25074.0]],      [[25075.0, 25076.0, 25077.0, 25078.0, 25079.0, 25080.0],       [25081.0, 25082.0, 25083.0, 25084.0, 25085.0, 25086.0],       [25087.0, 25088.0, 25089.0, 25090.0, 25091.0, 25092.0],       [25093.0, 25094.0, 25095.0, 25096.0, 25097.0, 25098.0],       [25099.0, 25100.0, 25101.0, 25102.0, 25103.0, 25104.0],       [25105.0, 25106.0, 25107.0, 25108.0, 25109.0, 25110.0],       [25111.0, 25112.0, 25113.0, 25114.0, 25115.0, 25116.0]],      [[25117.0, 25118.0, 25119.0, 25120.0, 25121.0, 25122.0],       [25123.0, 25124.0, 25125.0, 25126.0, 25127.0, 25128.0],       [25129.0, 25130.0, 25131.0, 25132.0, 25133.0, 25134.0],       [25135.0, 25136.0, 25137.0, 25138.0, 25139.0, 25140.0],       [25141.0, 25142.0, 25143.0, 25144.0, 25145.0, 25146.0],       [25147.0, 25148.0, 25149.0, 25150.0, 25151.0, 25152.0],       [25153.0, 25154.0, 25155.0, 25156.0, 25157.0, 25158.0]],      [[25159.0, 25160.0, 25161.0, 25162.0, 25163.0, 25164.0],       [25165.0, 25166.0, 25167.0, 25168.0, 25169.0, 25170.0],       [25171.0, 25172.0, 25173.0, 25174.0, 25175.0, 25176.0],       [25177.0, 25178.0, 25179.0, 25180.0, 25181.0, 25182.0],       [25183.0, 25184.0, 25185.0, 25186.0, 25187.0, 25188.0],       [25189.0, 25190.0, 25191.0, 25192.0, 25193.0, 25194.0],       [25195.0, 25196.0, 25197.0, 25198.0, 25199.0, 25200.0]]]],    [[[[25201.0, 25202.0, 25203.0, 25204.0, 25205.0, 25206.0],       [25207.0, 25208.0, 25209.0, 25210.0, 25211.0, 25212.0],       [25213.0, 25214.0, 25215.0, 25216.0, 25217.0, 25218.0],       [25219.0, 25220.0, 25221.0, 25222.0, 25223.0, 25224.0],       [25225.0, 25226.0, 25227.0, 25228.0, 25229.0, 25230.0],       [25231.0, 25232.0, 25233.0, 25234.0, 25235.0, 25236.0],       [25237.0, 25238.0, 25239.0, 25240.0, 25241.0, 25242.0]],      [[25243.0, 25244.0, 25245.0, 25246.0, 25247.0, 25248.0],       [25249.0, 25250.0, 25251.0, 25252.0, 25253.0, 25254.0],       [25255.0, 25256.0, 25257.0, 25258.0, 25259.0, 25260.0],       [25261.0, 25262.0, 25263.0, 25264.0, 25265.0, 25266.0],       [25267.0, 25268.0, 25269.0, 25270.0, 25271.0, 25272.0],       [25273.0, 25274.0, 25275.0, 25276.0, 25277.0, 25278.0],       [25279.0, 25280.0, 25281.0, 25282.0, 25283.0, 25284.0]],      [[25285.0, 25286.0, 25287.0, 25288.0, 25289.0, 25290.0],       [25291.0, 25292.0, 25293.0, 25294.0, 25295.0, 25296.0],       [25297.0, 25298.0, 25299.0, 25300.0, 25301.0, 25302.0],       [25303.0, 25304.0, 25305.0, 25306.0, 25307.0, 25308.0],       [25309.0, 25310.0, 25311.0, 25312.0, 25313.0, 25314.0],       [25315.0, 25316.0, 25317.0, 25318.0, 25319.0, 25320.0],       [25321.0, 25322.0, 25323.0, 25324.0, 25325.0, 25326.0]],      [[25327.0, 25328.0, 25329.0, 25330.0, 25331.0, 25332.0],       [25333.0, 25334.0, 25335.0, 25336.0, 25337.0, 25338.0],       [25339.0, 25340.0, 25341.0, 25342.0, 25343.0, 25344.0],       [25345.0, 25346.0, 25347.0, 25348.0, 25349.0, 25350.0],       [25351.0, 25352.0, 25353.0, 25354.0, 25355.0, 25356.0],       [25357.0, 25358.0, 25359.0, 25360.0, 25361.0, 25362.0],       [25363.0, 25364.0, 25365.0, 25366.0, 25367.0, 25368.0]],      [[25369.0, 25370.0, 25371.0, 25372.0, 25373.0, 25374.0],       [25375.0, 25376.0, 25377.0, 25378.0, 25379.0, 25380.0],       [25381.0, 25382.0, 25383.0, 25384.0, 25385.0, 25386.0],       [25387.0, 25388.0, 25389.0, 25390.0, 25391.0, 25392.0],       [25393.0, 25394.0, 25395.0, 25396.0, 25397.0, 25398.0],       [25399.0, 25400.0, 25401.0, 25402.0, 25403.0, 25404.0],       [25405.0, 25406.0, 25407.0, 25408.0, 25409.0, 25410.0]],      [[25411.0, 25412.0, 25413.0, 25414.0, 25415.0, 25416.0],       [25417.0, 25418.0, 25419.0, 25420.0, 25421.0, 25422.0],       [25423.0, 25424.0, 25425.0, 25426.0, 25427.0, 25428.0],       [25429.0, 25430.0, 25431.0, 25432.0, 25433.0, 25434.0],       [25435.0, 25436.0, 25437.0, 25438.0, 25439.0, 25440.0],       [25441.0, 25442.0, 25443.0, 25444.0, 25445.0, 25446.0],       [25447.0, 25448.0, 25449.0, 25450.0, 25451.0, 25452.0]]],     [[[25453.0, 25454.0, 25455.0, 25456.0, 25457.0, 25458.0],       [25459.0, 25460.0, 25461.0, 25462.0, 25463.0, 25464.0],       [25465.0, 25466.0, 25467.0, 25468.0, 25469.0, 25470.0],       [25471.0, 25472.0, 25473.0, 25474.0, 25475.0, 25476.0],       [25477.0, 25478.0, 25479.0, 25480.0, 25481.0, 25482.0],       [25483.0, 25484.0, 25485.0, 25486.0, 25487.0, 25488.0],       [25489.0, 25490.0, 25491.0, 25492.0, 25493.0, 25494.0]],      [[25495.0, 25496.0, 25497.0, 25498.0, 25499.0, 25500.0],       [25501.0, 25502.0, 25503.0, 25504.0, 25505.0, 25506.0],       [25507.0, 25508.0, 25509.0, 25510.0, 25511.0, 25512.0],       [25513.0, 25514.0, 25515.0, 25516.0, 25517.0, 25518.0],       [25519.0, 25520.0, 25521.0, 25522.0, 25523.0, 25524.0],       [25525.0, 25526.0, 25527.0, 25528.0, 25529.0, 25530.0],       [25531.0, 25532.0, 25533.0, 25534.0, 25535.0, 25536.0]],      [[25537.0, 25538.0, 25539.0, 25540.0, 25541.0, 25542.0],       [25543.0, 25544.0, 25545.0, 25546.0, 25547.0, 25548.0],       [25549.0, 25550.0, 25551.0, 25552.0, 25553.0, 25554.0],       [25555.0, 25556.0, 25557.0, 25558.0, 25559.0, 25560.0],       [25561.0, 25562.0, 25563.0, 25564.0, 25565.0, 25566.0],       [25567.0, 25568.0, 25569.0, 25570.0, 25571.0, 25572.0],       [25573.0, 25574.0, 25575.0, 25576.0, 25577.0, 25578.0]],      [[25579.0, 25580.0, 25581.0, 25582.0, 25583.0, 25584.0],       [25585.0, 25586.0, 25587.0, 25588.0, 25589.0, 25590.0],       [25591.0, 25592.0, 25593.0, 25594.0, 25595.0, 25596.0],       [25597.0, 25598.0, 25599.0, 25600.0, 25601.0, 25602.0],       [25603.0, 25604.0, 25605.0, 25606.0, 25607.0, 25608.0],       [25609.0, 25610.0, 25611.0, 25612.0, 25613.0, 25614.0],       [25615.0, 25616.0, 25617.0, 25618.0, 25619.0, 25620.0]],      [[25621.0, 25622.0, 25623.0, 25624.0, 25625.0, 25626.0],       [25627.0, 25628.0, 25629.0, 25630.0, 25631.0, 25632.0],       [25633.0, 25634.0, 25635.0, 25636.0, 25637.0, 25638.0],       [25639.0, 25640.0, 25641.0, 25642.0, 25643.0, 25644.0],       [25645.0, 25646.0, 25647.0, 25648.0, 25649.0, 25650.0],       [25651.0, 25652.0, 25653.0, 25654.0, 25655.0, 25656.0],       [25657.0, 25658.0, 25659.0, 25660.0, 25661.0, 25662.0]],      [[25663.0, 25664.0, 25665.0, 25666.0, 25667.0, 25668.0],       [25669.0, 25670.0, 25671.0, 25672.0, 25673.0, 25674.0],       [25675.0, 25676.0, 25677.0, 25678.0, 25679.0, 25680.0],       [25681.0, 25682.0, 25683.0, 25684.0, 25685.0, 25686.0],       [25687.0, 25688.0, 25689.0, 25690.0, 25691.0, 25692.0],       [25693.0, 25694.0, 25695.0, 25696.0, 25697.0, 25698.0],       [25699.0, 25700.0, 25701.0, 25702.0, 25703.0, 25704.0]]],     [[[25705.0, 25706.0, 25707.0, 25708.0, 25709.0, 25710.0],       [25711.0, 25712.0, 25713.0, 25714.0, 25715.0, 25716.0],       [25717.0, 25718.0, 25719.0, 25720.0, 25721.0, 25722.0],       [25723.0, 25724.0, 25725.0, 25726.0, 25727.0, 25728.0],       [25729.0, 25730.0, 25731.0, 25732.0, 25733.0, 25734.0],       [25735.0, 25736.0, 25737.0, 25738.0, 25739.0, 25740.0],       [25741.0, 25742.0, 25743.0, 25744.0, 25745.0, 25746.0]],      [[25747.0, 25748.0, 25749.0, 25750.0, 25751.0, 25752.0],       [25753.0, 25754.0, 25755.0, 25756.0, 25757.0, 25758.0],       [25759.0, 25760.0, 25761.0, 25762.0, 25763.0, 25764.0],       [25765.0, 25766.0, 25767.0, 25768.0, 25769.0, 25770.0],       [25771.0, 25772.0, 25773.0, 25774.0, 25775.0, 25776.0],       [25777.0, 25778.0, 25779.0, 25780.0, 25781.0, 25782.0],       [25783.0, 25784.0, 25785.0, 25786.0, 25787.0, 25788.0]],      [[25789.0, 25790.0, 25791.0, 25792.0, 25793.0, 25794.0],       [25795.0, 25796.0, 25797.0, 25798.0, 25799.0, 25800.0],       [25801.0, 25802.0, 25803.0, 25804.0, 25805.0, 25806.0],       [25807.0, 25808.0, 25809.0, 25810.0, 25811.0, 25812.0],       [25813.0, 25814.0, 25815.0, 25816.0, 25817.0, 25818.0],       [25819.0, 25820.0, 25821.0, 25822.0, 25823.0, 25824.0],       [25825.0, 25826.0, 25827.0, 25828.0, 25829.0, 25830.0]],      [[25831.0, 25832.0, 25833.0, 25834.0, 25835.0, 25836.0],       [25837.0, 25838.0, 25839.0, 25840.0, 25841.0, 25842.0],       [25843.0, 25844.0, 25845.0, 25846.0, 25847.0, 25848.0],       [25849.0, 25850.0, 25851.0, 25852.0, 25853.0, 25854.0],       [25855.0, 25856.0, 25857.0, 25858.0, 25859.0, 25860.0],       [25861.0, 25862.0, 25863.0, 25864.0, 25865.0, 25866.0],       [25867.0, 25868.0, 25869.0, 25870.0, 25871.0, 25872.0]],      [[25873.0, 25874.0, 25875.0, 25876.0, 25877.0, 25878.0],       [25879.0, 25880.0, 25881.0, 25882.0, 25883.0, 25884.0],       [25885.0, 25886.0, 25887.0, 25888.0, 25889.0, 25890.0],       [25891.0, 25892.0, 25893.0, 25894.0, 25895.0, 25896.0],       [25897.0, 25898.0, 25899.0, 25900.0, 25901.0, 25902.0],       [25903.0, 25904.0, 25905.0, 25906.0, 25907.0, 25908.0],       [25909.0, 25910.0, 25911.0, 25912.0, 25913.0, 25914.0]],      [[25915.0, 25916.0, 25917.0, 25918.0, 25919.0, 25920.0],       [25921.0, 25922.0, 25923.0, 25924.0, 25925.0, 25926.0],       [25927.0, 25928.0, 25929.0, 25930.0, 25931.0, 25932.0],       [25933.0, 25934.0, 25935.0, 25936.0, 25937.0, 25938.0],       [25939.0, 25940.0, 25941.0, 25942.0, 25943.0, 25944.0],       [25945.0, 25946.0, 25947.0, 25948.0, 25949.0, 25950.0],       [25951.0, 25952.0, 25953.0, 25954.0, 25955.0, 25956.0]]],     [[[25957.0, 25958.0, 25959.0, 25960.0, 25961.0, 25962.0],       [25963.0, 25964.0, 25965.0, 25966.0, 25967.0, 25968.0],       [25969.0, 25970.0, 25971.0, 25972.0, 25973.0, 25974.0],       [25975.0, 25976.0, 25977.0, 25978.0, 25979.0, 25980.0],       [25981.0, 25982.0, 25983.0, 25984.0, 25985.0, 25986.0],       [25987.0, 25988.0, 25989.0, 25990.0, 25991.0, 25992.0],       [25993.0, 25994.0, 25995.0, 25996.0, 25997.0, 25998.0]],      [[25999.0, 26000.0, 26001.0, 26002.0, 26003.0, 26004.0],       [26005.0, 26006.0, 26007.0, 26008.0, 26009.0, 26010.0],       [26011.0, 26012.0, 26013.0, 26014.0, 26015.0, 26016.0],       [26017.0, 26018.0, 26019.0, 26020.0, 26021.0, 26022.0],       [26023.0, 26024.0, 26025.0, 26026.0, 26027.0, 26028.0],       [26029.0, 26030.0, 26031.0, 26032.0, 26033.0, 26034.0],       [26035.0, 26036.0, 26037.0, 26038.0, 26039.0, 26040.0]],      [[26041.0, 26042.0, 26043.0, 26044.0, 26045.0, 26046.0],       [26047.0, 26048.0, 26049.0, 26050.0, 26051.0, 26052.0],       [26053.0, 26054.0, 26055.0, 26056.0, 26057.0, 26058.0],       [26059.0, 26060.0, 26061.0, 26062.0, 26063.0, 26064.0],       [26065.0, 26066.0, 26067.0, 26068.0, 26069.0, 26070.0],       [26071.0, 26072.0, 26073.0, 26074.0, 26075.0, 26076.0],       [26077.0, 26078.0, 26079.0, 26080.0, 26081.0, 26082.0]],      [[26083.0, 26084.0, 26085.0, 26086.0, 26087.0, 26088.0],       [26089.0, 26090.0, 26091.0, 26092.0, 26093.0, 26094.0],       [26095.0, 26096.0, 26097.0, 26098.0, 26099.0, 26100.0],       [26101.0, 26102.0, 26103.0, 26104.0, 26105.0, 26106.0],       [26107.0, 26108.0, 26109.0, 26110.0, 26111.0, 26112.0],       [26113.0, 26114.0, 26115.0, 26116.0, 26117.0, 26118.0],       [26119.0, 26120.0, 26121.0, 26122.0, 26123.0, 26124.0]],      [[26125.0, 26126.0, 26127.0, 26128.0, 26129.0, 26130.0],       [26131.0, 26132.0, 26133.0, 26134.0, 26135.0, 26136.0],       [26137.0, 26138.0, 26139.0, 26140.0, 26141.0, 26142.0],       [26143.0, 26144.0, 26145.0, 26146.0, 26147.0, 26148.0],       [26149.0, 26150.0, 26151.0, 26152.0, 26153.0, 26154.0],       [26155.0, 26156.0, 26157.0, 26158.0, 26159.0, 26160.0],       [26161.0, 26162.0, 26163.0, 26164.0, 26165.0, 26166.0]],      [[26167.0, 26168.0, 26169.0, 26170.0, 26171.0, 26172.0],       [26173.0, 26174.0, 26175.0, 26176.0, 26177.0, 26178.0],       [26179.0, 26180.0, 26181.0, 26182.0, 26183.0, 26184.0],       [26185.0, 26186.0, 26187.0, 26188.0, 26189.0, 26190.0],       [26191.0, 26192.0, 26193.0, 26194.0, 26195.0, 26196.0],       [26197.0, 26198.0, 26199.0, 26200.0, 26201.0, 26202.0],       [26203.0, 26204.0, 26205.0, 26206.0, 26207.0, 26208.0]]]],    [[[[26209.0, 26210.0, 26211.0, 26212.0, 26213.0, 26214.0],       [26215.0, 26216.0, 26217.0, 26218.0, 26219.0, 26220.0],       [26221.0, 26222.0, 26223.0, 26224.0, 26225.0, 26226.0],       [26227.0, 26228.0, 26229.0, 26230.0, 26231.0, 26232.0],       [26233.0, 26234.0, 26235.0, 26236.0, 26237.0, 26238.0],       [26239.0, 26240.0, 26241.0, 26242.0, 26243.0, 26244.0],       [26245.0, 26246.0, 26247.0, 26248.0, 26249.0, 26250.0]],      [[26251.0, 26252.0, 26253.0, 26254.0, 26255.0, 26256.0],       [26257.0, 26258.0, 26259.0, 26260.0, 26261.0, 26262.0],       [26263.0, 26264.0, 26265.0, 26266.0, 26267.0, 26268.0],       [26269.0, 26270.0, 26271.0, 26272.0, 26273.0, 26274.0],       [26275.0, 26276.0, 26277.0, 26278.0, 26279.0, 26280.0],       [26281.0, 26282.0, 26283.0, 26284.0, 26285.0, 26286.0],       [26287.0, 26288.0, 26289.0, 26290.0, 26291.0, 26292.0]],      [[26293.0, 26294.0, 26295.0, 26296.0, 26297.0, 26298.0],       [26299.0, 26300.0, 26301.0, 26302.0, 26303.0, 26304.0],       [26305.0, 26306.0, 26307.0, 26308.0, 26309.0, 26310.0],       [26311.0, 26312.0, 26313.0, 26314.0, 26315.0, 26316.0],       [26317.0, 26318.0, 26319.0, 26320.0, 26321.0, 26322.0],       [26323.0, 26324.0, 26325.0, 26326.0, 26327.0, 26328.0],       [26329.0, 26330.0, 26331.0, 26332.0, 26333.0, 26334.0]],      [[26335.0, 26336.0, 26337.0, 26338.0, 26339.0, 26340.0],       [26341.0, 26342.0, 26343.0, 26344.0, 26345.0, 26346.0],       [26347.0, 26348.0, 26349.0, 26350.0, 26351.0, 26352.0],       [26353.0, 26354.0, 26355.0, 26356.0, 26357.0, 26358.0],       [26359.0, 26360.0, 26361.0, 26362.0, 26363.0, 26364.0],       [26365.0, 26366.0, 26367.0, 26368.0, 26369.0, 26370.0],       [26371.0, 26372.0, 26373.0, 26374.0, 26375.0, 26376.0]],      [[26377.0, 26378.0, 26379.0, 26380.0, 26381.0, 26382.0],       [26383.0, 26384.0, 26385.0, 26386.0, 26387.0, 26388.0],       [26389.0, 26390.0, 26391.0, 26392.0, 26393.0, 26394.0],       [26395.0, 26396.0, 26397.0, 26398.0, 26399.0, 26400.0],       [26401.0, 26402.0, 26403.0, 26404.0, 26405.0, 26406.0],       [26407.0, 26408.0, 26409.0, 26410.0, 26411.0, 26412.0],       [26413.0, 26414.0, 26415.0, 26416.0, 26417.0, 26418.0]],      [[26419.0, 26420.0, 26421.0, 26422.0, 26423.0, 26424.0],       [26425.0, 26426.0, 26427.0, 26428.0, 26429.0, 26430.0],       [26431.0, 26432.0, 26433.0, 26434.0, 26435.0, 26436.0],       [26437.0, 26438.0, 26439.0, 26440.0, 26441.0, 26442.0],       [26443.0, 26444.0, 26445.0, 26446.0, 26447.0, 26448.0],       [26449.0, 26450.0, 26451.0, 26452.0, 26453.0, 26454.0],       [26455.0, 26456.0, 26457.0, 26458.0, 26459.0, 26460.0]]],     [[[26461.0, 26462.0, 26463.0, 26464.0, 26465.0, 26466.0],       [26467.0, 26468.0, 26469.0, 26470.0, 26471.0, 26472.0],       [26473.0, 26474.0, 26475.0, 26476.0, 26477.0, 26478.0],       [26479.0, 26480.0, 26481.0, 26482.0, 26483.0, 26484.0],       [26485.0, 26486.0, 26487.0, 26488.0, 26489.0, 26490.0],       [26491.0, 26492.0, 26493.0, 26494.0, 26495.0, 26496.0],       [26497.0, 26498.0, 26499.0, 26500.0, 26501.0, 26502.0]],      [[26503.0, 26504.0, 26505.0, 26506.0, 26507.0, 26508.0],       [26509.0, 26510.0, 26511.0, 26512.0, 26513.0, 26514.0],       [26515.0, 26516.0, 26517.0, 26518.0, 26519.0, 26520.0],       [26521.0, 26522.0, 26523.0, 26524.0, 26525.0, 26526.0],       [26527.0, 26528.0, 26529.0, 26530.0, 26531.0, 26532.0],       [26533.0, 26534.0, 26535.0, 26536.0, 26537.0, 26538.0],       [26539.0, 26540.0, 26541.0, 26542.0, 26543.0, 26544.0]],      [[26545.0, 26546.0, 26547.0, 26548.0, 26549.0, 26550.0],       [26551.0, 26552.0, 26553.0, 26554.0, 26555.0, 26556.0],       [26557.0, 26558.0, 26559.0, 26560.0, 26561.0, 26562.0],       [26563.0, 26564.0, 26565.0, 26566.0, 26567.0, 26568.0],       [26569.0, 26570.0, 26571.0, 26572.0, 26573.0, 26574.0],       [26575.0, 26576.0, 26577.0, 26578.0, 26579.0, 26580.0],       [26581.0, 26582.0, 26583.0, 26584.0, 26585.0, 26586.0]],      [[26587.0, 26588.0, 26589.0, 26590.0, 26591.0, 26592.0],       [26593.0, 26594.0, 26595.0, 26596.0, 26597.0, 26598.0],       [26599.0, 26600.0, 26601.0, 26602.0, 26603.0, 26604.0],       [26605.0, 26606.0, 26607.0, 26608.0, 26609.0, 26610.0],       [26611.0, 26612.0, 26613.0, 26614.0, 26615.0, 26616.0],       [26617.0, 26618.0, 26619.0, 26620.0, 26621.0, 26622.0],       [26623.0, 26624.0, 26625.0, 26626.0, 26627.0, 26628.0]],      [[26629.0, 26630.0, 26631.0, 26632.0, 26633.0, 26634.0],       [26635.0, 26636.0, 26637.0, 26638.0, 26639.0, 26640.0],       [26641.0, 26642.0, 26643.0, 26644.0, 26645.0, 26646.0],       [26647.0, 26648.0, 26649.0, 26650.0, 26651.0, 26652.0],       [26653.0, 26654.0, 26655.0, 26656.0, 26657.0, 26658.0],       [26659.0, 26660.0, 26661.0, 26662.0, 26663.0, 26664.0],       [26665.0, 26666.0, 26667.0, 26668.0, 26669.0, 26670.0]],      [[26671.0, 26672.0, 26673.0, 26674.0, 26675.0, 26676.0],       [26677.0, 26678.0, 26679.0, 26680.0, 26681.0, 26682.0],       [26683.0, 26684.0, 26685.0, 26686.0, 26687.0, 26688.0],       [26689.0, 26690.0, 26691.0, 26692.0, 26693.0, 26694.0],       [26695.0, 26696.0, 26697.0, 26698.0, 26699.0, 26700.0],       [26701.0, 26702.0, 26703.0, 26704.0, 26705.0, 26706.0],       [26707.0, 26708.0, 26709.0, 26710.0, 26711.0, 26712.0]]],     [[[26713.0, 26714.0, 26715.0, 26716.0, 26717.0, 26718.0],       [26719.0, 26720.0, 26721.0, 26722.0, 26723.0, 26724.0],       [26725.0, 26726.0, 26727.0, 26728.0, 26729.0, 26730.0],       [26731.0, 26732.0, 26733.0, 26734.0, 26735.0, 26736.0],       [26737.0, 26738.0, 26739.0, 26740.0, 26741.0, 26742.0],       [26743.0, 26744.0, 26745.0, 26746.0, 26747.0, 26748.0],       [26749.0, 26750.0, 26751.0, 26752.0, 26753.0, 26754.0]],      [[26755.0, 26756.0, 26757.0, 26758.0, 26759.0, 26760.0],       [26761.0, 26762.0, 26763.0, 26764.0, 26765.0, 26766.0],       [26767.0, 26768.0, 26769.0, 26770.0, 26771.0, 26772.0],       [26773.0, 26774.0, 26775.0, 26776.0, 26777.0, 26778.0],       [26779.0, 26780.0, 26781.0, 26782.0, 26783.0, 26784.0],       [26785.0, 26786.0, 26787.0, 26788.0, 26789.0, 26790.0],       [26791.0, 26792.0, 26793.0, 26794.0, 26795.0, 26796.0]],      [[26797.0, 26798.0, 26799.0, 26800.0, 26801.0, 26802.0],       [26803.0, 26804.0, 26805.0, 26806.0, 26807.0, 26808.0],       [26809.0, 26810.0, 26811.0, 26812.0, 26813.0, 26814.0],       [26815.0, 26816.0, 26817.0, 26818.0, 26819.0, 26820.0],       [26821.0, 26822.0, 26823.0, 26824.0, 26825.0, 26826.0],       [26827.0, 26828.0, 26829.0, 26830.0, 26831.0, 26832.0],       [26833.0, 26834.0, 26835.0, 26836.0, 26837.0, 26838.0]],      [[26839.0, 26840.0, 26841.0, 26842.0, 26843.0, 26844.0],       [26845.0, 26846.0, 26847.0, 26848.0, 26849.0, 26850.0],       [26851.0, 26852.0, 26853.0, 26854.0, 26855.0, 26856.0],       [26857.0, 26858.0, 26859.0, 26860.0, 26861.0, 26862.0],       [26863.0, 26864.0, 26865.0, 26866.0, 26867.0, 26868.0],       [26869.0, 26870.0, 26871.0, 26872.0, 26873.0, 26874.0],       [26875.0, 26876.0, 26877.0, 26878.0, 26879.0, 26880.0]],      [[26881.0, 26882.0, 26883.0, 26884.0, 26885.0, 26886.0],       [26887.0, 26888.0, 26889.0, 26890.0, 26891.0, 26892.0],       [26893.0, 26894.0, 26895.0, 26896.0, 26897.0, 26898.0],       [26899.0, 26900.0, 26901.0, 26902.0, 26903.0, 26904.0],       [26905.0, 26906.0, 26907.0, 26908.0, 26909.0, 26910.0],       [26911.0, 26912.0, 26913.0, 26914.0, 26915.0, 26916.0],       [26917.0, 26918.0, 26919.0, 26920.0, 26921.0, 26922.0]],      [[26923.0, 26924.0, 26925.0, 26926.0, 26927.0, 26928.0],       [26929.0, 26930.0, 26931.0, 26932.0, 26933.0, 26934.0],       [26935.0, 26936.0, 26937.0, 26938.0, 26939.0, 26940.0],       [26941.0, 26942.0, 26943.0, 26944.0, 26945.0, 26946.0],       [26947.0, 26948.0, 26949.0, 26950.0, 26951.0, 26952.0],       [26953.0, 26954.0, 26955.0, 26956.0, 26957.0, 26958.0],       [26959.0, 26960.0, 26961.0, 26962.0, 26963.0, 26964.0]]],     [[[26965.0, 26966.0, 26967.0, 26968.0, 26969.0, 26970.0],       [26971.0, 26972.0, 26973.0, 26974.0, 26975.0, 26976.0],       [26977.0, 26978.0, 26979.0, 26980.0, 26981.0, 26982.0],       [26983.0, 26984.0, 26985.0, 26986.0, 26987.0, 26988.0],       [26989.0, 26990.0, 26991.0, 26992.0, 26993.0, 26994.0],       [26995.0, 26996.0, 26997.0, 26998.0, 26999.0, 27000.0],       [27001.0, 27002.0, 27003.0, 27004.0, 27005.0, 27006.0]],      [[27007.0, 27008.0, 27009.0, 27010.0, 27011.0, 27012.0],       [27013.0, 27014.0, 27015.0, 27016.0, 27017.0, 27018.0],       [27019.0, 27020.0, 27021.0, 27022.0, 27023.0, 27024.0],       [27025.0, 27026.0, 27027.0, 27028.0, 27029.0, 27030.0],       [27031.0, 27032.0, 27033.0, 27034.0, 27035.0, 27036.0],       [27037.0, 27038.0, 27039.0, 27040.0, 27041.0, 27042.0],       [27043.0, 27044.0, 27045.0, 27046.0, 27047.0, 27048.0]],      [[27049.0, 27050.0, 27051.0, 27052.0, 27053.0, 27054.0],       [27055.0, 27056.0, 27057.0, 27058.0, 27059.0, 27060.0],       [27061.0, 27062.0, 27063.0, 27064.0, 27065.0, 27066.0],       [27067.0, 27068.0, 27069.0, 27070.0, 27071.0, 27072.0],       [27073.0, 27074.0, 27075.0, 27076.0, 27077.0, 27078.0],       [27079.0, 27080.0, 27081.0, 27082.0, 27083.0, 27084.0],       [27085.0, 27086.0, 27087.0, 27088.0, 27089.0, 27090.0]],      [[27091.0, 27092.0, 27093.0, 27094.0, 27095.0, 27096.0],       [27097.0, 27098.0, 27099.0, 27100.0, 27101.0, 27102.0],       [27103.0, 27104.0, 27105.0, 27106.0, 27107.0, 27108.0],       [27109.0, 27110.0, 27111.0, 27112.0, 27113.0, 27114.0],       [27115.0, 27116.0, 27117.0, 27118.0, 27119.0, 27120.0],       [27121.0, 27122.0, 27123.0, 27124.0, 27125.0, 27126.0],       [27127.0, 27128.0, 27129.0, 27130.0, 27131.0, 27132.0]],      [[27133.0, 27134.0, 27135.0, 27136.0, 27137.0, 27138.0],       [27139.0, 27140.0, 27141.0, 27142.0, 27143.0, 27144.0],       [27145.0, 27146.0, 27147.0, 27148.0, 27149.0, 27150.0],       [27151.0, 27152.0, 27153.0, 27154.0, 27155.0, 27156.0],       [27157.0, 27158.0, 27159.0, 27160.0, 27161.0, 27162.0],       [27163.0, 27164.0, 27165.0, 27166.0, 27167.0, 27168.0],       [27169.0, 27170.0, 27171.0, 27172.0, 27173.0, 27174.0]],      [[27175.0, 27176.0, 27177.0, 27178.0, 27179.0, 27180.0],       [27181.0, 27182.0, 27183.0, 27184.0, 27185.0, 27186.0],       [27187.0, 27188.0, 27189.0, 27190.0, 27191.0, 27192.0],       [27193.0, 27194.0, 27195.0, 27196.0, 27197.0, 27198.0],       [27199.0, 27200.0, 27201.0, 27202.0, 27203.0, 27204.0],       [27205.0, 27206.0, 27207.0, 27208.0, 27209.0, 27210.0],       [27211.0, 27212.0, 27213.0, 27214.0, 27215.0, 27216.0]]]],    [[[[27217.0, 27218.0, 27219.0, 27220.0, 27221.0, 27222.0],       [27223.0, 27224.0, 27225.0, 27226.0, 27227.0, 27228.0],       [27229.0, 27230.0, 27231.0, 27232.0, 27233.0, 27234.0],       [27235.0, 27236.0, 27237.0, 27238.0, 27239.0, 27240.0],       [27241.0, 27242.0, 27243.0, 27244.0, 27245.0, 27246.0],       [27247.0, 27248.0, 27249.0, 27250.0, 27251.0, 27252.0],       [27253.0, 27254.0, 27255.0, 27256.0, 27257.0, 27258.0]],      [[27259.0, 27260.0, 27261.0, 27262.0, 27263.0, 27264.0],       [27265.0, 27266.0, 27267.0, 27268.0, 27269.0, 27270.0],       [27271.0, 27272.0, 27273.0, 27274.0, 27275.0, 27276.0],       [27277.0, 27278.0, 27279.0, 27280.0, 27281.0, 27282.0],       [27283.0, 27284.0, 27285.0, 27286.0, 27287.0, 27288.0],       [27289.0, 27290.0, 27291.0, 27292.0, 27293.0, 27294.0],       [27295.0, 27296.0, 27297.0, 27298.0, 27299.0, 27300.0]],      [[27301.0, 27302.0, 27303.0, 27304.0, 27305.0, 27306.0],       [27307.0, 27308.0, 27309.0, 27310.0, 27311.0, 27312.0],       [27313.0, 27314.0, 27315.0, 27316.0, 27317.0, 27318.0],       [27319.0, 27320.0, 27321.0, 27322.0, 27323.0, 27324.0],       [27325.0, 27326.0, 27327.0, 27328.0, 27329.0, 27330.0],       [27331.0, 27332.0, 27333.0, 27334.0, 27335.0, 27336.0],       [27337.0, 27338.0, 27339.0, 27340.0, 27341.0, 27342.0]],      [[27343.0, 27344.0, 27345.0, 27346.0, 27347.0, 27348.0],       [27349.0, 27350.0, 27351.0, 27352.0, 27353.0, 27354.0],       [27355.0, 27356.0, 27357.0, 27358.0, 27359.0, 27360.0],       [27361.0, 27362.0, 27363.0, 27364.0, 27365.0, 27366.0],       [27367.0, 27368.0, 27369.0, 27370.0, 27371.0, 27372.0],       [27373.0, 27374.0, 27375.0, 27376.0, 27377.0, 27378.0],       [27379.0, 27380.0, 27381.0, 27382.0, 27383.0, 27384.0]],      [[27385.0, 27386.0, 27387.0, 27388.0, 27389.0, 27390.0],       [27391.0, 27392.0, 27393.0, 27394.0, 27395.0, 27396.0],       [27397.0, 27398.0, 27399.0, 27400.0, 27401.0, 27402.0],       [27403.0, 27404.0, 27405.0, 27406.0, 27407.0, 27408.0],       [27409.0, 27410.0, 27411.0, 27412.0, 27413.0, 27414.0],       [27415.0, 27416.0, 27417.0, 27418.0, 27419.0, 27420.0],       [27421.0, 27422.0, 27423.0, 27424.0, 27425.0, 27426.0]],      [[27427.0, 27428.0, 27429.0, 27430.0, 27431.0, 27432.0],       [27433.0, 27434.0, 27435.0, 27436.0, 27437.0, 27438.0],       [27439.0, 27440.0, 27441.0, 27442.0, 27443.0, 27444.0],       [27445.0, 27446.0, 27447.0, 27448.0, 27449.0, 27450.0],       [27451.0, 27452.0, 27453.0, 27454.0, 27455.0, 27456.0],       [27457.0, 27458.0, 27459.0, 27460.0, 27461.0, 27462.0],       [27463.0, 27464.0, 27465.0, 27466.0, 27467.0, 27468.0]]],     [[[27469.0, 27470.0, 27471.0, 27472.0, 27473.0, 27474.0],       [27475.0, 27476.0, 27477.0, 27478.0, 27479.0, 27480.0],       [27481.0, 27482.0, 27483.0, 27484.0, 27485.0, 27486.0],       [27487.0, 27488.0, 27489.0, 27490.0, 27491.0, 27492.0],       [27493.0, 27494.0, 27495.0, 27496.0, 27497.0, 27498.0],       [27499.0, 27500.0, 27501.0, 27502.0, 27503.0, 27504.0],       [27505.0, 27506.0, 27507.0, 27508.0, 27509.0, 27510.0]],      [[27511.0, 27512.0, 27513.0, 27514.0, 27515.0, 27516.0],       [27517.0, 27518.0, 27519.0, 27520.0, 27521.0, 27522.0],       [27523.0, 27524.0, 27525.0, 27526.0, 27527.0, 27528.0],       [27529.0, 27530.0, 27531.0, 27532.0, 27533.0, 27534.0],       [27535.0, 27536.0, 27537.0, 27538.0, 27539.0, 27540.0],       [27541.0, 27542.0, 27543.0, 27544.0, 27545.0, 27546.0],       [27547.0, 27548.0, 27549.0, 27550.0, 27551.0, 27552.0]],      [[27553.0, 27554.0, 27555.0, 27556.0, 27557.0, 27558.0],       [27559.0, 27560.0, 27561.0, 27562.0, 27563.0, 27564.0],       [27565.0, 27566.0, 27567.0, 27568.0, 27569.0, 27570.0],       [27571.0, 27572.0, 27573.0, 27574.0, 27575.0, 27576.0],       [27577.0, 27578.0, 27579.0, 27580.0, 27581.0, 27582.0],       [27583.0, 27584.0, 27585.0, 27586.0, 27587.0, 27588.0],       [27589.0, 27590.0, 27591.0, 27592.0, 27593.0, 27594.0]],      [[27595.0, 27596.0, 27597.0, 27598.0, 27599.0, 27600.0],       [27601.0, 27602.0, 27603.0, 27604.0, 27605.0, 27606.0],       [27607.0, 27608.0, 27609.0, 27610.0, 27611.0, 27612.0],       [27613.0, 27614.0, 27615.0, 27616.0, 27617.0, 27618.0],       [27619.0, 27620.0, 27621.0, 27622.0, 27623.0, 27624.0],       [27625.0, 27626.0, 27627.0, 27628.0, 27629.0, 27630.0],       [27631.0, 27632.0, 27633.0, 27634.0, 27635.0, 27636.0]],      [[27637.0, 27638.0, 27639.0, 27640.0, 27641.0, 27642.0],       [27643.0, 27644.0, 27645.0, 27646.0, 27647.0, 27648.0],       [27649.0, 27650.0, 27651.0, 27652.0, 27653.0, 27654.0],       [27655.0, 27656.0, 27657.0, 27658.0, 27659.0, 27660.0],       [27661.0, 27662.0, 27663.0, 27664.0, 27665.0, 27666.0],       [27667.0, 27668.0, 27669.0, 27670.0, 27671.0, 27672.0],       [27673.0, 27674.0, 27675.0, 27676.0, 27677.0, 27678.0]],      [[27679.0, 27680.0, 27681.0, 27682.0, 27683.0, 27684.0],       [27685.0, 27686.0, 27687.0, 27688.0, 27689.0, 27690.0],       [27691.0, 27692.0, 27693.0, 27694.0, 27695.0, 27696.0],       [27697.0, 27698.0, 27699.0, 27700.0, 27701.0, 27702.0],       [27703.0, 27704.0, 27705.0, 27706.0, 27707.0, 27708.0],       [27709.0, 27710.0, 27711.0, 27712.0, 27713.0, 27714.0],       [27715.0, 27716.0, 27717.0, 27718.0, 27719.0, 27720.0]]],     [[[27721.0, 27722.0, 27723.0, 27724.0, 27725.0, 27726.0],       [27727.0, 27728.0, 27729.0, 27730.0, 27731.0, 27732.0],       [27733.0, 27734.0, 27735.0, 27736.0, 27737.0, 27738.0],       [27739.0, 27740.0, 27741.0, 27742.0, 27743.0, 27744.0],       [27745.0, 27746.0, 27747.0, 27748.0, 27749.0, 27750.0],       [27751.0, 27752.0, 27753.0, 27754.0, 27755.0, 27756.0],       [27757.0, 27758.0, 27759.0, 27760.0, 27761.0, 27762.0]],      [[27763.0, 27764.0, 27765.0, 27766.0, 27767.0, 27768.0],       [27769.0, 27770.0, 27771.0, 27772.0, 27773.0, 27774.0],       [27775.0, 27776.0, 27777.0, 27778.0, 27779.0, 27780.0],       [27781.0, 27782.0, 27783.0, 27784.0, 27785.0, 27786.0],       [27787.0, 27788.0, 27789.0, 27790.0, 27791.0, 27792.0],       [27793.0, 27794.0, 27795.0, 27796.0, 27797.0, 27798.0],       [27799.0, 27800.0, 27801.0, 27802.0, 27803.0, 27804.0]],      [[27805.0, 27806.0, 27807.0, 27808.0, 27809.0, 27810.0],       [27811.0, 27812.0, 27813.0, 27814.0, 27815.0, 27816.0],       [27817.0, 27818.0, 27819.0, 27820.0, 27821.0, 27822.0],       [27823.0, 27824.0, 27825.0, 27826.0, 27827.0, 27828.0],       [27829.0, 27830.0, 27831.0, 27832.0, 27833.0, 27834.0],       [27835.0, 27836.0, 27837.0, 27838.0, 27839.0, 27840.0],       [27841.0, 27842.0, 27843.0, 27844.0, 27845.0, 27846.0]],      [[27847.0, 27848.0, 27849.0, 27850.0, 27851.0, 27852.0],       [27853.0, 27854.0, 27855.0, 27856.0, 27857.0, 27858.0],       [27859.0, 27860.0, 27861.0, 27862.0, 27863.0, 27864.0],       [27865.0, 27866.0, 27867.0, 27868.0, 27869.0, 27870.0],       [27871.0, 27872.0, 27873.0, 27874.0, 27875.0, 27876.0],       [27877.0, 27878.0, 27879.0, 27880.0, 27881.0, 27882.0],       [27883.0, 27884.0, 27885.0, 27886.0, 27887.0, 27888.0]],      [[27889.0, 27890.0, 27891.0, 27892.0, 27893.0, 27894.0],       [27895.0, 27896.0, 27897.0, 27898.0, 27899.0, 27900.0],       [27901.0, 27902.0, 27903.0, 27904.0, 27905.0, 27906.0],       [27907.0, 27908.0, 27909.0, 27910.0, 27911.0, 27912.0],       [27913.0, 27914.0, 27915.0, 27916.0, 27917.0, 27918.0],       [27919.0, 27920.0, 27921.0, 27922.0, 27923.0, 27924.0],       [27925.0, 27926.0, 27927.0, 27928.0, 27929.0, 27930.0]],      [[27931.0, 27932.0, 27933.0, 27934.0, 27935.0, 27936.0],       [27937.0, 27938.0, 27939.0, 27940.0, 27941.0, 27942.0],       [27943.0, 27944.0, 27945.0, 27946.0, 27947.0, 27948.0],       [27949.0, 27950.0, 27951.0, 27952.0, 27953.0, 27954.0],       [27955.0, 27956.0, 27957.0, 27958.0, 27959.0, 27960.0],       [27961.0, 27962.0, 27963.0, 27964.0, 27965.0, 27966.0],       [27967.0, 27968.0, 27969.0, 27970.0, 27971.0, 27972.0]]],     [[[27973.0, 27974.0, 27975.0, 27976.0, 27977.0, 27978.0],       [27979.0, 27980.0, 27981.0, 27982.0, 27983.0, 27984.0],       [27985.0, 27986.0, 27987.0, 27988.0, 27989.0, 27990.0],       [27991.0, 27992.0, 27993.0, 27994.0, 27995.0, 27996.0],       [27997.0, 27998.0, 27999.0, 28000.0, 28001.0, 28002.0],       [28003.0, 28004.0, 28005.0, 28006.0, 28007.0, 28008.0],       [28009.0, 28010.0, 28011.0, 28012.0, 28013.0, 28014.0]],      [[28015.0, 28016.0, 28017.0, 28018.0, 28019.0, 28020.0],       [28021.0, 28022.0, 28023.0, 28024.0, 28025.0, 28026.0],       [28027.0, 28028.0, 28029.0, 28030.0, 28031.0, 28032.0],       [28033.0, 28034.0, 28035.0, 28036.0, 28037.0, 28038.0],       [28039.0, 28040.0, 28041.0, 28042.0, 28043.0, 28044.0],       [28045.0, 28046.0, 28047.0, 28048.0, 28049.0, 28050.0],       [28051.0, 28052.0, 28053.0, 28054.0, 28055.0, 28056.0]],      [[28057.0, 28058.0, 28059.0, 28060.0, 28061.0, 28062.0],       [28063.0, 28064.0, 28065.0, 28066.0, 28067.0, 28068.0],       [28069.0, 28070.0, 28071.0, 28072.0, 28073.0, 28074.0],       [28075.0, 28076.0, 28077.0, 28078.0, 28079.0, 28080.0],       [28081.0, 28082.0, 28083.0, 28084.0, 28085.0, 28086.0],       [28087.0, 28088.0, 28089.0, 28090.0, 28091.0, 28092.0],       [28093.0, 28094.0, 28095.0, 28096.0, 28097.0, 28098.0]],      [[28099.0, 28100.0, 28101.0, 28102.0, 28103.0, 28104.0],       [28105.0, 28106.0, 28107.0, 28108.0, 28109.0, 28110.0],       [28111.0, 28112.0, 28113.0, 28114.0, 28115.0, 28116.0],       [28117.0, 28118.0, 28119.0, 28120.0, 28121.0, 28122.0],       [28123.0, 28124.0, 28125.0, 28126.0, 28127.0, 28128.0],       [28129.0, 28130.0, 28131.0, 28132.0, 28133.0, 28134.0],       [28135.0, 28136.0, 28137.0, 28138.0, 28139.0, 28140.0]],      [[28141.0, 28142.0, 28143.0, 28144.0, 28145.0, 28146.0],       [28147.0, 28148.0, 28149.0, 28150.0, 28151.0, 28152.0],       [28153.0, 28154.0, 28155.0, 28156.0, 28157.0, 28158.0],       [28159.0, 28160.0, 28161.0, 28162.0, 28163.0, 28164.0],       [28165.0, 28166.0, 28167.0, 28168.0, 28169.0, 28170.0],       [28171.0, 28172.0, 28173.0, 28174.0, 28175.0, 28176.0],       [28177.0, 28178.0, 28179.0, 28180.0, 28181.0, 28182.0]],      [[28183.0, 28184.0, 28185.0, 28186.0, 28187.0, 28188.0],       [28189.0, 28190.0, 28191.0, 28192.0, 28193.0, 28194.0],       [28195.0, 28196.0, 28197.0, 28198.0, 28199.0, 28200.0],       [28201.0, 28202.0, 28203.0, 28204.0, 28205.0, 28206.0],       [28207.0, 28208.0, 28209.0, 28210.0, 28211.0, 28212.0],       [28213.0, 28214.0, 28215.0, 28216.0, 28217.0, 28218.0],       [28219.0, 28220.0, 28221.0, 28222.0, 28223.0, 28224.0]]]],    [[[[28225.0, 28226.0, 28227.0, 28228.0, 28229.0, 28230.0],       [28231.0, 28232.0, 28233.0, 28234.0, 28235.0, 28236.0],       [28237.0, 28238.0, 28239.0, 28240.0, 28241.0, 28242.0],       [28243.0, 28244.0, 28245.0, 28246.0, 28247.0, 28248.0],       [28249.0, 28250.0, 28251.0, 28252.0, 28253.0, 28254.0],       [28255.0, 28256.0, 28257.0, 28258.0, 28259.0, 28260.0],       [28261.0, 28262.0, 28263.0, 28264.0, 28265.0, 28266.0]],      [[28267.0, 28268.0, 28269.0, 28270.0, 28271.0, 28272.0],       [28273.0, 28274.0, 28275.0, 28276.0, 28277.0, 28278.0],       [28279.0, 28280.0, 28281.0, 28282.0, 28283.0, 28284.0],       [28285.0, 28286.0, 28287.0, 28288.0, 28289.0, 28290.0],       [28291.0, 28292.0, 28293.0, 28294.0, 28295.0, 28296.0],       [28297.0, 28298.0, 28299.0, 28300.0, 28301.0, 28302.0],       [28303.0, 28304.0, 28305.0, 28306.0, 28307.0, 28308.0]],      [[28309.0, 28310.0, 28311.0, 28312.0, 28313.0, 28314.0],       [28315.0, 28316.0, 28317.0, 28318.0, 28319.0, 28320.0],       [28321.0, 28322.0, 28323.0, 28324.0, 28325.0, 28326.0],       [28327.0, 28328.0, 28329.0, 28330.0, 28331.0, 28332.0],       [28333.0, 28334.0, 28335.0, 28336.0, 28337.0, 28338.0],       [28339.0, 28340.0, 28341.0, 28342.0, 28343.0, 28344.0],       [28345.0, 28346.0, 28347.0, 28348.0, 28349.0, 28350.0]],      [[28351.0, 28352.0, 28353.0, 28354.0, 28355.0, 28356.0],       [28357.0, 28358.0, 28359.0, 28360.0, 28361.0, 28362.0],       [28363.0, 28364.0, 28365.0, 28366.0, 28367.0, 28368.0],       [28369.0, 28370.0, 28371.0, 28372.0, 28373.0, 28374.0],       [28375.0, 28376.0, 28377.0, 28378.0, 28379.0, 28380.0],       [28381.0, 28382.0, 28383.0, 28384.0, 28385.0, 28386.0],       [28387.0, 28388.0, 28389.0, 28390.0, 28391.0, 28392.0]],      [[28393.0, 28394.0, 28395.0, 28396.0, 28397.0, 28398.0],       [28399.0, 28400.0, 28401.0, 28402.0, 28403.0, 28404.0],       [28405.0, 28406.0, 28407.0, 28408.0, 28409.0, 28410.0],       [28411.0, 28412.0, 28413.0, 28414.0, 28415.0, 28416.0],       [28417.0, 28418.0, 28419.0, 28420.0, 28421.0, 28422.0],       [28423.0, 28424.0, 28425.0, 28426.0, 28427.0, 28428.0],       [28429.0, 28430.0, 28431.0, 28432.0, 28433.0, 28434.0]],      [[28435.0, 28436.0, 28437.0, 28438.0, 28439.0, 28440.0],       [28441.0, 28442.0, 28443.0, 28444.0, 28445.0, 28446.0],       [28447.0, 28448.0, 28449.0, 28450.0, 28451.0, 28452.0],       [28453.0, 28454.0, 28455.0, 28456.0, 28457.0, 28458.0],       [28459.0, 28460.0, 28461.0, 28462.0, 28463.0, 28464.0],       [28465.0, 28466.0, 28467.0, 28468.0, 28469.0, 28470.0],       [28471.0, 28472.0, 28473.0, 28474.0, 28475.0, 28476.0]]],     [[[28477.0, 28478.0, 28479.0, 28480.0, 28481.0, 28482.0],       [28483.0, 28484.0, 28485.0, 28486.0, 28487.0, 28488.0],       [28489.0, 28490.0, 28491.0, 28492.0, 28493.0, 28494.0],       [28495.0, 28496.0, 28497.0, 28498.0, 28499.0, 28500.0],       [28501.0, 28502.0, 28503.0, 28504.0, 28505.0, 28506.0],       [28507.0, 28508.0, 28509.0, 28510.0, 28511.0, 28512.0],       [28513.0, 28514.0, 28515.0, 28516.0, 28517.0, 28518.0]],      [[28519.0, 28520.0, 28521.0, 28522.0, 28523.0, 28524.0],       [28525.0, 28526.0, 28527.0, 28528.0, 28529.0, 28530.0],       [28531.0, 28532.0, 28533.0, 28534.0, 28535.0, 28536.0],       [28537.0, 28538.0, 28539.0, 28540.0, 28541.0, 28542.0],       [28543.0, 28544.0, 28545.0, 28546.0, 28547.0, 28548.0],       [28549.0, 28550.0, 28551.0, 28552.0, 28553.0, 28554.0],       [28555.0, 28556.0, 28557.0, 28558.0, 28559.0, 28560.0]],      [[28561.0, 28562.0, 28563.0, 28564.0, 28565.0, 28566.0],       [28567.0, 28568.0, 28569.0, 28570.0, 28571.0, 28572.0],       [28573.0, 28574.0, 28575.0, 28576.0, 28577.0, 28578.0],       [28579.0, 28580.0, 28581.0, 28582.0, 28583.0, 28584.0],       [28585.0, 28586.0, 28587.0, 28588.0, 28589.0, 28590.0],       [28591.0, 28592.0, 28593.0, 28594.0, 28595.0, 28596.0],       [28597.0, 28598.0, 28599.0, 28600.0, 28601.0, 28602.0]],      [[28603.0, 28604.0, 28605.0, 28606.0, 28607.0, 28608.0],       [28609.0, 28610.0, 28611.0, 28612.0, 28613.0, 28614.0],       [28615.0, 28616.0, 28617.0, 28618.0, 28619.0, 28620.0],       [28621.0, 28622.0, 28623.0, 28624.0, 28625.0, 28626.0],       [28627.0, 28628.0, 28629.0, 28630.0, 28631.0, 28632.0],       [28633.0, 28634.0, 28635.0, 28636.0, 28637.0, 28638.0],       [28639.0, 28640.0, 28641.0, 28642.0, 28643.0, 28644.0]],      [[28645.0, 28646.0, 28647.0, 28648.0, 28649.0, 28650.0],       [28651.0, 28652.0, 28653.0, 28654.0, 28655.0, 28656.0],       [28657.0, 28658.0, 28659.0, 28660.0, 28661.0, 28662.0],       [28663.0, 28664.0, 28665.0, 28666.0, 28667.0, 28668.0],       [28669.0, 28670.0, 28671.0, 28672.0, 28673.0, 28674.0],       [28675.0, 28676.0, 28677.0, 28678.0, 28679.0, 28680.0],       [28681.0, 28682.0, 28683.0, 28684.0, 28685.0, 28686.0]],      [[28687.0, 28688.0, 28689.0, 28690.0, 28691.0, 28692.0],       [28693.0, 28694.0, 28695.0, 28696.0, 28697.0, 28698.0],       [28699.0, 28700.0, 28701.0, 28702.0, 28703.0, 28704.0],       [28705.0, 28706.0, 28707.0, 28708.0, 28709.0, 28710.0],       [28711.0, 28712.0, 28713.0, 28714.0, 28715.0, 28716.0],       [28717.0, 28718.0, 28719.0, 28720.0, 28721.0, 28722.0],       [28723.0, 28724.0, 28725.0, 28726.0, 28727.0, 28728.0]]],     [[[28729.0, 28730.0, 28731.0, 28732.0, 28733.0, 28734.0],       [28735.0, 28736.0, 28737.0, 28738.0, 28739.0, 28740.0],       [28741.0, 28742.0, 28743.0, 28744.0, 28745.0, 28746.0],       [28747.0, 28748.0, 28749.0, 28750.0, 28751.0, 28752.0],       [28753.0, 28754.0, 28755.0, 28756.0, 28757.0, 28758.0],       [28759.0, 28760.0, 28761.0, 28762.0, 28763.0, 28764.0],       [28765.0, 28766.0, 28767.0, 28768.0, 28769.0, 28770.0]],      [[28771.0, 28772.0, 28773.0, 28774.0, 28775.0, 28776.0],       [28777.0, 28778.0, 28779.0, 28780.0, 28781.0, 28782.0],       [28783.0, 28784.0, 28785.0, 28786.0, 28787.0, 28788.0],       [28789.0, 28790.0, 28791.0, 28792.0, 28793.0, 28794.0],       [28795.0, 28796.0, 28797.0, 28798.0, 28799.0, 28800.0],       [28801.0, 28802.0, 28803.0, 28804.0, 28805.0, 28806.0],       [28807.0, 28808.0, 28809.0, 28810.0, 28811.0, 28812.0]],      [[28813.0, 28814.0, 28815.0, 28816.0, 28817.0, 28818.0],       [28819.0, 28820.0, 28821.0, 28822.0, 28823.0, 28824.0],       [28825.0, 28826.0, 28827.0, 28828.0, 28829.0, 28830.0],       [28831.0, 28832.0, 28833.0, 28834.0, 28835.0, 28836.0],       [28837.0, 28838.0, 28839.0, 28840.0, 28841.0, 28842.0],       [28843.0, 28844.0, 28845.0, 28846.0, 28847.0, 28848.0],       [28849.0, 28850.0, 28851.0, 28852.0, 28853.0, 28854.0]],      [[28855.0, 28856.0, 28857.0, 28858.0, 28859.0, 28860.0],       [28861.0, 28862.0, 28863.0, 28864.0, 28865.0, 28866.0],       [28867.0, 28868.0, 28869.0, 28870.0, 28871.0, 28872.0],       [28873.0, 28874.0, 28875.0, 28876.0, 28877.0, 28878.0],       [28879.0, 28880.0, 28881.0, 28882.0, 28883.0, 28884.0],       [28885.0, 28886.0, 28887.0, 28888.0, 28889.0, 28890.0],       [28891.0, 28892.0, 28893.0, 28894.0, 28895.0, 28896.0]],      [[28897.0, 28898.0, 28899.0, 28900.0, 28901.0, 28902.0],       [28903.0, 28904.0, 28905.0, 28906.0, 28907.0, 28908.0],       [28909.0, 28910.0, 28911.0, 28912.0, 28913.0, 28914.0],       [28915.0, 28916.0, 28917.0, 28918.0, 28919.0, 28920.0],       [28921.0, 28922.0, 28923.0, 28924.0, 28925.0, 28926.0],       [28927.0, 28928.0, 28929.0, 28930.0, 28931.0, 28932.0],       [28933.0, 28934.0, 28935.0, 28936.0, 28937.0, 28938.0]],      [[28939.0, 28940.0, 28941.0, 28942.0, 28943.0, 28944.0],       [28945.0, 28946.0, 28947.0, 28948.0, 28949.0, 28950.0],       [28951.0, 28952.0, 28953.0, 28954.0, 28955.0, 28956.0],       [28957.0, 28958.0, 28959.0, 28960.0, 28961.0, 28962.0],       [28963.0, 28964.0, 28965.0, 28966.0, 28967.0, 28968.0],       [28969.0, 28970.0, 28971.0, 28972.0, 28973.0, 28974.0],       [28975.0, 28976.0, 28977.0, 28978.0, 28979.0, 28980.0]]],     [[[28981.0, 28982.0, 28983.0, 28984.0, 28985.0, 28986.0],       [28987.0, 28988.0, 28989.0, 28990.0, 28991.0, 28992.0],       [28993.0, 28994.0, 28995.0, 28996.0, 28997.0, 28998.0],       [28999.0, 29000.0, 29001.0, 29002.0, 29003.0, 29004.0],       [29005.0, 29006.0, 29007.0, 29008.0, 29009.0, 29010.0],       [29011.0, 29012.0, 29013.0, 29014.0, 29015.0, 29016.0],       [29017.0, 29018.0, 29019.0, 29020.0, 29021.0, 29022.0]],      [[29023.0, 29024.0, 29025.0, 29026.0, 29027.0, 29028.0],       [29029.0, 29030.0, 29031.0, 29032.0, 29033.0, 29034.0],       [29035.0, 29036.0, 29037.0, 29038.0, 29039.0, 29040.0],       [29041.0, 29042.0, 29043.0, 29044.0, 29045.0, 29046.0],       [29047.0, 29048.0, 29049.0, 29050.0, 29051.0, 29052.0],       [29053.0, 29054.0, 29055.0, 29056.0, 29057.0, 29058.0],       [29059.0, 29060.0, 29061.0, 29062.0, 29063.0, 29064.0]],      [[29065.0, 29066.0, 29067.0, 29068.0, 29069.0, 29070.0],       [29071.0, 29072.0, 29073.0, 29074.0, 29075.0, 29076.0],       [29077.0, 29078.0, 29079.0, 29080.0, 29081.0, 29082.0],       [29083.0, 29084.0, 29085.0, 29086.0, 29087.0, 29088.0],       [29089.0, 29090.0, 29091.0, 29092.0, 29093.0, 29094.0],       [29095.0, 29096.0, 29097.0, 29098.0, 29099.0, 29100.0],       [29101.0, 29102.0, 29103.0, 29104.0, 29105.0, 29106.0]],      [[29107.0, 29108.0, 29109.0, 29110.0, 29111.0, 29112.0],       [29113.0, 29114.0, 29115.0, 29116.0, 29117.0, 29118.0],       [29119.0, 29120.0, 29121.0, 29122.0, 29123.0, 29124.0],       [29125.0, 29126.0, 29127.0, 29128.0, 29129.0, 29130.0],       [29131.0, 29132.0, 29133.0, 29134.0, 29135.0, 29136.0],       [29137.0, 29138.0, 29139.0, 29140.0, 29141.0, 29142.0],       [29143.0, 29144.0, 29145.0, 29146.0, 29147.0, 29148.0]],      [[29149.0, 29150.0, 29151.0, 29152.0, 29153.0, 29154.0],       [29155.0, 29156.0, 29157.0, 29158.0, 29159.0, 29160.0],       [29161.0, 29162.0, 29163.0, 29164.0, 29165.0, 29166.0],       [29167.0, 29168.0, 29169.0, 29170.0, 29171.0, 29172.0],       [29173.0, 29174.0, 29175.0, 29176.0, 29177.0, 29178.0],       [29179.0, 29180.0, 29181.0, 29182.0, 29183.0, 29184.0],       [29185.0, 29186.0, 29187.0, 29188.0, 29189.0, 29190.0]],      [[29191.0, 29192.0, 29193.0, 29194.0, 29195.0, 29196.0],       [29197.0, 29198.0, 29199.0, 29200.0, 29201.0, 29202.0],       [29203.0, 29204.0, 29205.0, 29206.0, 29207.0, 29208.0],       [29209.0, 29210.0, 29211.0, 29212.0, 29213.0, 29214.0],       [29215.0, 29216.0, 29217.0, 29218.0, 29219.0, 29220.0],       [29221.0, 29222.0, 29223.0, 29224.0, 29225.0, 29226.0],       [29227.0, 29228.0, 29229.0, 29230.0, 29231.0, 29232.0]]]],    [[[[29233.0, 29234.0, 29235.0, 29236.0, 29237.0, 29238.0],       [29239.0, 29240.0, 29241.0, 29242.0, 29243.0, 29244.0],       [29245.0, 29246.0, 29247.0, 29248.0, 29249.0, 29250.0],       [29251.0, 29252.0, 29253.0, 29254.0, 29255.0, 29256.0],       [29257.0, 29258.0, 29259.0, 29260.0, 29261.0, 29262.0],       [29263.0, 29264.0, 29265.0, 29266.0, 29267.0, 29268.0],       [29269.0, 29270.0, 29271.0, 29272.0, 29273.0, 29274.0]],      [[29275.0, 29276.0, 29277.0, 29278.0, 29279.0, 29280.0],       [29281.0, 29282.0, 29283.0, 29284.0, 29285.0, 29286.0],       [29287.0, 29288.0, 29289.0, 29290.0, 29291.0, 29292.0],       [29293.0, 29294.0, 29295.0, 29296.0, 29297.0, 29298.0],       [29299.0, 29300.0, 29301.0, 29302.0, 29303.0, 29304.0],       [29305.0, 29306.0, 29307.0, 29308.0, 29309.0, 29310.0],       [29311.0, 29312.0, 29313.0, 29314.0, 29315.0, 29316.0]],      [[29317.0, 29318.0, 29319.0, 29320.0, 29321.0, 29322.0],       [29323.0, 29324.0, 29325.0, 29326.0, 29327.0, 29328.0],       [29329.0, 29330.0, 29331.0, 29332.0, 29333.0, 29334.0],       [29335.0, 29336.0, 29337.0, 29338.0, 29339.0, 29340.0],       [29341.0, 29342.0, 29343.0, 29344.0, 29345.0, 29346.0],       [29347.0, 29348.0, 29349.0, 29350.0, 29351.0, 29352.0],       [29353.0, 29354.0, 29355.0, 29356.0, 29357.0, 29358.0]],      [[29359.0, 29360.0, 29361.0, 29362.0, 29363.0, 29364.0],       [29365.0, 29366.0, 29367.0, 29368.0, 29369.0, 29370.0],       [29371.0, 29372.0, 29373.0, 29374.0, 29375.0, 29376.0],       [29377.0, 29378.0, 29379.0, 29380.0, 29381.0, 29382.0],       [29383.0, 29384.0, 29385.0, 29386.0, 29387.0, 29388.0],       [29389.0, 29390.0, 29391.0, 29392.0, 29393.0, 29394.0],       [29395.0, 29396.0, 29397.0, 29398.0, 29399.0, 29400.0]],      [[29401.0, 29402.0, 29403.0, 29404.0, 29405.0, 29406.0],       [29407.0, 29408.0, 29409.0, 29410.0, 29411.0, 29412.0],       [29413.0, 29414.0, 29415.0, 29416.0, 29417.0, 29418.0],       [29419.0, 29420.0, 29421.0, 29422.0, 29423.0, 29424.0],       [29425.0, 29426.0, 29427.0, 29428.0, 29429.0, 29430.0],       [29431.0, 29432.0, 29433.0, 29434.0, 29435.0, 29436.0],       [29437.0, 29438.0, 29439.0, 29440.0, 29441.0, 29442.0]],      [[29443.0, 29444.0, 29445.0, 29446.0, 29447.0, 29448.0],       [29449.0, 29450.0, 29451.0, 29452.0, 29453.0, 29454.0],       [29455.0, 29456.0, 29457.0, 29458.0, 29459.0, 29460.0],       [29461.0, 29462.0, 29463.0, 29464.0, 29465.0, 29466.0],       [29467.0, 29468.0, 29469.0, 29470.0, 29471.0, 29472.0],       [29473.0, 29474.0, 29475.0, 29476.0, 29477.0, 29478.0],       [29479.0, 29480.0, 29481.0, 29482.0, 29483.0, 29484.0]]],     [[[29485.0, 29486.0, 29487.0, 29488.0, 29489.0, 29490.0],       [29491.0, 29492.0, 29493.0, 29494.0, 29495.0, 29496.0],       [29497.0, 29498.0, 29499.0, 29500.0, 29501.0, 29502.0],       [29503.0, 29504.0, 29505.0, 29506.0, 29507.0, 29508.0],       [29509.0, 29510.0, 29511.0, 29512.0, 29513.0, 29514.0],       [29515.0, 29516.0, 29517.0, 29518.0, 29519.0, 29520.0],       [29521.0, 29522.0, 29523.0, 29524.0, 29525.0, 29526.0]],      [[29527.0, 29528.0, 29529.0, 29530.0, 29531.0, 29532.0],       [29533.0, 29534.0, 29535.0, 29536.0, 29537.0, 29538.0],       [29539.0, 29540.0, 29541.0, 29542.0, 29543.0, 29544.0],       [29545.0, 29546.0, 29547.0, 29548.0, 29549.0, 29550.0],       [29551.0, 29552.0, 29553.0, 29554.0, 29555.0, 29556.0],       [29557.0, 29558.0, 29559.0, 29560.0, 29561.0, 29562.0],       [29563.0, 29564.0, 29565.0, 29566.0, 29567.0, 29568.0]],      [[29569.0, 29570.0, 29571.0, 29572.0, 29573.0, 29574.0],       [29575.0, 29576.0, 29577.0, 29578.0, 29579.0, 29580.0],       [29581.0, 29582.0, 29583.0, 29584.0, 29585.0, 29586.0],       [29587.0, 29588.0, 29589.0, 29590.0, 29591.0, 29592.0],       [29593.0, 29594.0, 29595.0, 29596.0, 29597.0, 29598.0],       [29599.0, 29600.0, 29601.0, 29602.0, 29603.0, 29604.0],       [29605.0, 29606.0, 29607.0, 29608.0, 29609.0, 29610.0]],      [[29611.0, 29612.0, 29613.0, 29614.0, 29615.0, 29616.0],       [29617.0, 29618.0, 29619.0, 29620.0, 29621.0, 29622.0],       [29623.0, 29624.0, 29625.0, 29626.0, 29627.0, 29628.0],       [29629.0, 29630.0, 29631.0, 29632.0, 29633.0, 29634.0],       [29635.0, 29636.0, 29637.0, 29638.0, 29639.0, 29640.0],       [29641.0, 29642.0, 29643.0, 29644.0, 29645.0, 29646.0],       [29647.0, 29648.0, 29649.0, 29650.0, 29651.0, 29652.0]],      [[29653.0, 29654.0, 29655.0, 29656.0, 29657.0, 29658.0],       [29659.0, 29660.0, 29661.0, 29662.0, 29663.0, 29664.0],       [29665.0, 29666.0, 29667.0, 29668.0, 29669.0, 29670.0],       [29671.0, 29672.0, 29673.0, 29674.0, 29675.0, 29676.0],       [29677.0, 29678.0, 29679.0, 29680.0, 29681.0, 29682.0],       [29683.0, 29684.0, 29685.0, 29686.0, 29687.0, 29688.0],       [29689.0, 29690.0, 29691.0, 29692.0, 29693.0, 29694.0]],      [[29695.0, 29696.0, 29697.0, 29698.0, 29699.0, 29700.0],       [29701.0, 29702.0, 29703.0, 29704.0, 29705.0, 29706.0],       [29707.0, 29708.0, 29709.0, 29710.0, 29711.0, 29712.0],       [29713.0, 29714.0, 29715.0, 29716.0, 29717.0, 29718.0],       [29719.0, 29720.0, 29721.0, 29722.0, 29723.0, 29724.0],       [29725.0, 29726.0, 29727.0, 29728.0, 29729.0, 29730.0],       [29731.0, 29732.0, 29733.0, 29734.0, 29735.0, 29736.0]]],     [[[29737.0, 29738.0, 29739.0, 29740.0, 29741.0, 29742.0],       [29743.0, 29744.0, 29745.0, 29746.0, 29747.0, 29748.0],       [29749.0, 29750.0, 29751.0, 29752.0, 29753.0, 29754.0],       [29755.0, 29756.0, 29757.0, 29758.0, 29759.0, 29760.0],       [29761.0, 29762.0, 29763.0, 29764.0, 29765.0, 29766.0],       [29767.0, 29768.0, 29769.0, 29770.0, 29771.0, 29772.0],       [29773.0, 29774.0, 29775.0, 29776.0, 29777.0, 29778.0]],      [[29779.0, 29780.0, 29781.0, 29782.0, 29783.0, 29784.0],       [29785.0, 29786.0, 29787.0, 29788.0, 29789.0, 29790.0],       [29791.0, 29792.0, 29793.0, 29794.0, 29795.0, 29796.0],       [29797.0, 29798.0, 29799.0, 29800.0, 29801.0, 29802.0],       [29803.0, 29804.0, 29805.0, 29806.0, 29807.0, 29808.0],       [29809.0, 29810.0, 29811.0, 29812.0, 29813.0, 29814.0],       [29815.0, 29816.0, 29817.0, 29818.0, 29819.0, 29820.0]],      [[29821.0, 29822.0, 29823.0, 29824.0, 29825.0, 29826.0],       [29827.0, 29828.0, 29829.0, 29830.0, 29831.0, 29832.0],       [29833.0, 29834.0, 29835.0, 29836.0, 29837.0, 29838.0],       [29839.0, 29840.0, 29841.0, 29842.0, 29843.0, 29844.0],       [29845.0, 29846.0, 29847.0, 29848.0, 29849.0, 29850.0],       [29851.0, 29852.0, 29853.0, 29854.0, 29855.0, 29856.0],       [29857.0, 29858.0, 29859.0, 29860.0, 29861.0, 29862.0]],      [[29863.0, 29864.0, 29865.0, 29866.0, 29867.0, 29868.0],       [29869.0, 29870.0, 29871.0, 29872.0, 29873.0, 29874.0],       [29875.0, 29876.0, 29877.0, 29878.0, 29879.0, 29880.0],       [29881.0, 29882.0, 29883.0, 29884.0, 29885.0, 29886.0],       [29887.0, 29888.0, 29889.0, 29890.0, 29891.0, 29892.0],       [29893.0, 29894.0, 29895.0, 29896.0, 29897.0, 29898.0],       [29899.0, 29900.0, 29901.0, 29902.0, 29903.0, 29904.0]],      [[29905.0, 29906.0, 29907.0, 29908.0, 29909.0, 29910.0],       [29911.0, 29912.0, 29913.0, 29914.0, 29915.0, 29916.0],       [29917.0, 29918.0, 29919.0, 29920.0, 29921.0, 29922.0],       [29923.0, 29924.0, 29925.0, 29926.0, 29927.0, 29928.0],       [29929.0, 29930.0, 29931.0, 29932.0, 29933.0, 29934.0],       [29935.0, 29936.0, 29937.0, 29938.0, 29939.0, 29940.0],       [29941.0, 29942.0, 29943.0, 29944.0, 29945.0, 29946.0]],      [[29947.0, 29948.0, 29949.0, 29950.0, 29951.0, 29952.0],       [29953.0, 29954.0, 29955.0, 29956.0, 29957.0, 29958.0],       [29959.0, 29960.0, 29961.0, 29962.0, 29963.0, 29964.0],       [29965.0, 29966.0, 29967.0, 29968.0, 29969.0, 29970.0],       [29971.0, 29972.0, 29973.0, 29974.0, 29975.0, 29976.0],       [29977.0, 29978.0, 29979.0, 29980.0, 29981.0, 29982.0],       [29983.0, 29984.0, 29985.0, 29986.0, 29987.0, 29988.0]]],     [[[29989.0, 29990.0, 29991.0, 29992.0, 29993.0, 29994.0],       [29995.0, 29996.0, 29997.0, 29998.0, 29999.0, 30000.0],       [30001.0, 30002.0, 30003.0, 30004.0, 30005.0, 30006.0],       [30007.0, 30008.0, 30009.0, 30010.0, 30011.0, 30012.0],       [30013.0, 30014.0, 30015.0, 30016.0, 30017.0, 30018.0],       [30019.0, 30020.0, 30021.0, 30022.0, 30023.0, 30024.0],       [30025.0, 30026.0, 30027.0, 30028.0, 30029.0, 30030.0]],      [[30031.0, 30032.0, 30033.0, 30034.0, 30035.0, 30036.0],       [30037.0, 30038.0, 30039.0, 30040.0, 30041.0, 30042.0],       [30043.0, 30044.0, 30045.0, 30046.0, 30047.0, 30048.0],       [30049.0, 30050.0, 30051.0, 30052.0, 30053.0, 30054.0],       [30055.0, 30056.0, 30057.0, 30058.0, 30059.0, 30060.0],       [30061.0, 30062.0, 30063.0, 30064.0, 30065.0, 30066.0],       [30067.0, 30068.0, 30069.0, 30070.0, 30071.0, 30072.0]],      [[30073.0, 30074.0, 30075.0, 30076.0, 30077.0, 30078.0],       [30079.0, 30080.0, 30081.0, 30082.0, 30083.0, 30084.0],       [30085.0, 30086.0, 30087.0, 30088.0, 30089.0, 30090.0],       [30091.0, 30092.0, 30093.0, 30094.0, 30095.0, 30096.0],       [30097.0, 30098.0, 30099.0, 30100.0, 30101.0, 30102.0],       [30103.0, 30104.0, 30105.0, 30106.0, 30107.0, 30108.0],       [30109.0, 30110.0, 30111.0, 30112.0, 30113.0, 30114.0]],      [[30115.0, 30116.0, 30117.0, 30118.0, 30119.0, 30120.0],       [30121.0, 30122.0, 30123.0, 30124.0, 30125.0, 30126.0],       [30127.0, 30128.0, 30129.0, 30130.0, 30131.0, 30132.0],       [30133.0, 30134.0, 30135.0, 30136.0, 30137.0, 30138.0],       [30139.0, 30140.0, 30141.0, 30142.0, 30143.0, 30144.0],       [30145.0, 30146.0, 30147.0, 30148.0, 30149.0, 30150.0],       [30151.0, 30152.0, 30153.0, 30154.0, 30155.0, 30156.0]],      [[30157.0, 30158.0, 30159.0, 30160.0, 30161.0, 30162.0],       [30163.0, 30164.0, 30165.0, 30166.0, 30167.0, 30168.0],       [30169.0, 30170.0, 30171.0, 30172.0, 30173.0, 30174.0],       [30175.0, 30176.0, 30177.0, 30178.0, 30179.0, 30180.0],       [30181.0, 30182.0, 30183.0, 30184.0, 30185.0, 30186.0],       [30187.0, 30188.0, 30189.0, 30190.0, 30191.0, 30192.0],       [30193.0, 30194.0, 30195.0, 30196.0, 30197.0, 30198.0]],      [[30199.0, 30200.0, 30201.0, 30202.0, 30203.0, 30204.0],       [30205.0, 30206.0, 30207.0, 30208.0, 30209.0, 30210.0],       [30211.0, 30212.0, 30213.0, 30214.0, 30215.0, 30216.0],       [30217.0, 30218.0, 30219.0, 30220.0, 30221.0, 30222.0],       [30223.0, 30224.0, 30225.0, 30226.0, 30227.0, 30228.0],       [30229.0, 30230.0, 30231.0, 30232.0, 30233.0, 30234.0],       [30235.0, 30236.0, 30237.0, 30238.0, 30239.0, 30240.0]]]]],   [[[[[30241.0, 30242.0, 30243.0, 30244.0, 30245.0, 30246.0],       [30247.0, 30248.0, 30249.0, 30250.0, 30251.0, 30252.0],       [30253.0, 30254.0, 30255.0, 30256.0, 30257.0, 30258.0],       [30259.0, 30260.0, 30261.0, 30262.0, 30263.0, 30264.0],       [30265.0, 30266.0, 30267.0, 30268.0, 30269.0, 30270.0],       [30271.0, 30272.0, 30273.0, 30274.0, 30275.0, 30276.0],       [30277.0, 30278.0, 30279.0, 30280.0, 30281.0, 30282.0]],      [[30283.0, 30284.0, 30285.0, 30286.0, 30287.0, 30288.0],       [30289.0, 30290.0, 30291.0, 30292.0, 30293.0, 30294.0],       [30295.0, 30296.0, 30297.0, 30298.0, 30299.0, 30300.0],       [30301.0, 30302.0, 30303.0, 30304.0, 30305.0, 30306.0],       [30307.0, 30308.0, 30309.0, 30310.0, 30311.0, 30312.0],       [30313.0, 30314.0, 30315.0, 30316.0, 30317.0, 30318.0],       [30319.0, 30320.0, 30321.0, 30322.0, 30323.0, 30324.0]],      [[30325.0, 30326.0, 30327.0, 30328.0, 30329.0, 30330.0],       [30331.0, 30332.0, 30333.0, 30334.0, 30335.0, 30336.0],       [30337.0, 30338.0, 30339.0, 30340.0, 30341.0, 30342.0],       [30343.0, 30344.0, 30345.0, 30346.0, 30347.0, 30348.0],       [30349.0, 30350.0, 30351.0, 30352.0, 30353.0, 30354.0],       [30355.0, 30356.0, 30357.0, 30358.0, 30359.0, 30360.0],       [30361.0, 30362.0, 30363.0, 30364.0, 30365.0, 30366.0]],      [[30367.0, 30368.0, 30369.0, 30370.0, 30371.0, 30372.0],       [30373.0, 30374.0, 30375.0, 30376.0, 30377.0, 30378.0],       [30379.0, 30380.0, 30381.0, 30382.0, 30383.0, 30384.0],       [30385.0, 30386.0, 30387.0, 30388.0, 30389.0, 30390.0],       [30391.0, 30392.0, 30393.0, 30394.0, 30395.0, 30396.0],       [30397.0, 30398.0, 30399.0, 30400.0, 30401.0, 30402.0],       [30403.0, 30404.0, 30405.0, 30406.0, 30407.0, 30408.0]],      [[30409.0, 30410.0, 30411.0, 30412.0, 30413.0, 30414.0],       [30415.0, 30416.0, 30417.0, 30418.0, 30419.0, 30420.0],       [30421.0, 30422.0, 30423.0, 30424.0, 30425.0, 30426.0],       [30427.0, 30428.0, 30429.0, 30430.0, 30431.0, 30432.0],       [30433.0, 30434.0, 30435.0, 30436.0, 30437.0, 30438.0],       [30439.0, 30440.0, 30441.0, 30442.0, 30443.0, 30444.0],       [30445.0, 30446.0, 30447.0, 30448.0, 30449.0, 30450.0]],      [[30451.0, 30452.0, 30453.0, 30454.0, 30455.0, 30456.0],       [30457.0, 30458.0, 30459.0, 30460.0, 30461.0, 30462.0],       [30463.0, 30464.0, 30465.0, 30466.0, 30467.0, 30468.0],       [30469.0, 30470.0, 30471.0, 30472.0, 30473.0, 30474.0],       [30475.0, 30476.0, 30477.0, 30478.0, 30479.0, 30480.0],       [30481.0, 30482.0, 30483.0, 30484.0, 30485.0, 30486.0],       [30487.0, 30488.0, 30489.0, 30490.0, 30491.0, 30492.0]]],     [[[30493.0, 30494.0, 30495.0, 30496.0, 30497.0, 30498.0],       [30499.0, 30500.0, 30501.0, 30502.0, 30503.0, 30504.0],       [30505.0, 30506.0, 30507.0, 30508.0, 30509.0, 30510.0],       [30511.0, 30512.0, 30513.0, 30514.0, 30515.0, 30516.0],       [30517.0, 30518.0, 30519.0, 30520.0, 30521.0, 30522.0],       [30523.0, 30524.0, 30525.0, 30526.0, 30527.0, 30528.0],       [30529.0, 30530.0, 30531.0, 30532.0, 30533.0, 30534.0]],      [[30535.0, 30536.0, 30537.0, 30538.0, 30539.0, 30540.0],       [30541.0, 30542.0, 30543.0, 30544.0, 30545.0, 30546.0],       [30547.0, 30548.0, 30549.0, 30550.0, 30551.0, 30552.0],       [30553.0, 30554.0, 30555.0, 30556.0, 30557.0, 30558.0],       [30559.0, 30560.0, 30561.0, 30562.0, 30563.0, 30564.0],       [30565.0, 30566.0, 30567.0, 30568.0, 30569.0, 30570.0],       [30571.0, 30572.0, 30573.0, 30574.0, 30575.0, 30576.0]],      [[30577.0, 30578.0, 30579.0, 30580.0, 30581.0, 30582.0],       [30583.0, 30584.0, 30585.0, 30586.0, 30587.0, 30588.0],       [30589.0, 30590.0, 30591.0, 30592.0, 30593.0, 30594.0],       [30595.0, 30596.0, 30597.0, 30598.0, 30599.0, 30600.0],       [30601.0, 30602.0, 30603.0, 30604.0, 30605.0, 30606.0],       [30607.0, 30608.0, 30609.0, 30610.0, 30611.0, 30612.0],       [30613.0, 30614.0, 30615.0, 30616.0, 30617.0, 30618.0]],      [[30619.0, 30620.0, 30621.0, 30622.0, 30623.0, 30624.0],       [30625.0, 30626.0, 30627.0, 30628.0, 30629.0, 30630.0],       [30631.0, 30632.0, 30633.0, 30634.0, 30635.0, 30636.0],       [30637.0, 30638.0, 30639.0, 30640.0, 30641.0, 30642.0],       [30643.0, 30644.0, 30645.0, 30646.0, 30647.0, 30648.0],       [30649.0, 30650.0, 30651.0, 30652.0, 30653.0, 30654.0],       [30655.0, 30656.0, 30657.0, 30658.0, 30659.0, 30660.0]],      [[30661.0, 30662.0, 30663.0, 30664.0, 30665.0, 30666.0],       [30667.0, 30668.0, 30669.0, 30670.0, 30671.0, 30672.0],       [30673.0, 30674.0, 30675.0, 30676.0, 30677.0, 30678.0],       [30679.0, 30680.0, 30681.0, 30682.0, 30683.0, 30684.0],       [30685.0, 30686.0, 30687.0, 30688.0, 30689.0, 30690.0],       [30691.0, 30692.0, 30693.0, 30694.0, 30695.0, 30696.0],       [30697.0, 30698.0, 30699.0, 30700.0, 30701.0, 30702.0]],      [[30703.0, 30704.0, 30705.0, 30706.0, 30707.0, 30708.0],       [30709.0, 30710.0, 30711.0, 30712.0, 30713.0, 30714.0],       [30715.0, 30716.0, 30717.0, 30718.0, 30719.0, 30720.0],       [30721.0, 30722.0, 30723.0, 30724.0, 30725.0, 30726.0],       [30727.0, 30728.0, 30729.0, 30730.0, 30731.0, 30732.0],       [30733.0, 30734.0, 30735.0, 30736.0, 30737.0, 30738.0],       [30739.0, 30740.0, 30741.0, 30742.0, 30743.0, 30744.0]]],     [[[30745.0, 30746.0, 30747.0, 30748.0, 30749.0, 30750.0],       [30751.0, 30752.0, 30753.0, 30754.0, 30755.0, 30756.0],       [30757.0, 30758.0, 30759.0, 30760.0, 30761.0, 30762.0],       [30763.0, 30764.0, 30765.0, 30766.0, 30767.0, 30768.0],       [30769.0, 30770.0, 30771.0, 30772.0, 30773.0, 30774.0],       [30775.0, 30776.0, 30777.0, 30778.0, 30779.0, 30780.0],       [30781.0, 30782.0, 30783.0, 30784.0, 30785.0, 30786.0]],      [[30787.0, 30788.0, 30789.0, 30790.0, 30791.0, 30792.0],       [30793.0, 30794.0, 30795.0, 30796.0, 30797.0, 30798.0],       [30799.0, 30800.0, 30801.0, 30802.0, 30803.0, 30804.0],       [30805.0, 30806.0, 30807.0, 30808.0, 30809.0, 30810.0],       [30811.0, 30812.0, 30813.0, 30814.0, 30815.0, 30816.0],       [30817.0, 30818.0, 30819.0, 30820.0, 30821.0, 30822.0],       [30823.0, 30824.0, 30825.0, 30826.0, 30827.0, 30828.0]],      [[30829.0, 30830.0, 30831.0, 30832.0, 30833.0, 30834.0],       [30835.0, 30836.0, 30837.0, 30838.0, 30839.0, 30840.0],       [30841.0, 30842.0, 30843.0, 30844.0, 30845.0, 30846.0],       [30847.0, 30848.0, 30849.0, 30850.0, 30851.0, 30852.0],       [30853.0, 30854.0, 30855.0, 30856.0, 30857.0, 30858.0],       [30859.0, 30860.0, 30861.0, 30862.0, 30863.0, 30864.0],       [30865.0, 30866.0, 30867.0, 30868.0, 30869.0, 30870.0]],      [[30871.0, 30872.0, 30873.0, 30874.0, 30875.0, 30876.0],       [30877.0, 30878.0, 30879.0, 30880.0, 30881.0, 30882.0],       [30883.0, 30884.0, 30885.0, 30886.0, 30887.0, 30888.0],       [30889.0, 30890.0, 30891.0, 30892.0, 30893.0, 30894.0],       [30895.0, 30896.0, 30897.0, 30898.0, 30899.0, 30900.0],       [30901.0, 30902.0, 30903.0, 30904.0, 30905.0, 30906.0],       [30907.0, 30908.0, 30909.0, 30910.0, 30911.0, 30912.0]],      [[30913.0, 30914.0, 30915.0, 30916.0, 30917.0, 30918.0],       [30919.0, 30920.0, 30921.0, 30922.0, 30923.0, 30924.0],       [30925.0, 30926.0, 30927.0, 30928.0, 30929.0, 30930.0],       [30931.0, 30932.0, 30933.0, 30934.0, 30935.0, 30936.0],       [30937.0, 30938.0, 30939.0, 30940.0, 30941.0, 30942.0],       [30943.0, 30944.0, 30945.0, 30946.0, 30947.0, 30948.0],       [30949.0, 30950.0, 30951.0, 30952.0, 30953.0, 30954.0]],      [[30955.0, 30956.0, 30957.0, 30958.0, 30959.0, 30960.0],       [30961.0, 30962.0, 30963.0, 30964.0, 30965.0, 30966.0],       [30967.0, 30968.0, 30969.0, 30970.0, 30971.0, 30972.0],       [30973.0, 30974.0, 30975.0, 30976.0, 30977.0, 30978.0],       [30979.0, 30980.0, 30981.0, 30982.0, 30983.0, 30984.0],       [30985.0, 30986.0, 30987.0, 30988.0, 30989.0, 30990.0],       [30991.0, 30992.0, 30993.0, 30994.0, 30995.0, 30996.0]]],     [[[30997.0, 30998.0, 30999.0, 31000.0, 31001.0, 31002.0],       [31003.0, 31004.0, 31005.0, 31006.0, 31007.0, 31008.0],       [31009.0, 31010.0, 31011.0, 31012.0, 31013.0, 31014.0],       [31015.0, 31016.0, 31017.0, 31018.0, 31019.0, 31020.0],       [31021.0, 31022.0, 31023.0, 31024.0, 31025.0, 31026.0],       [31027.0, 31028.0, 31029.0, 31030.0, 31031.0, 31032.0],       [31033.0, 31034.0, 31035.0, 31036.0, 31037.0, 31038.0]],      [[31039.0, 31040.0, 31041.0, 31042.0, 31043.0, 31044.0],       [31045.0, 31046.0, 31047.0, 31048.0, 31049.0, 31050.0],       [31051.0, 31052.0, 31053.0, 31054.0, 31055.0, 31056.0],       [31057.0, 31058.0, 31059.0, 31060.0, 31061.0, 31062.0],       [31063.0, 31064.0, 31065.0, 31066.0, 31067.0, 31068.0],       [31069.0, 31070.0, 31071.0, 31072.0, 31073.0, 31074.0],       [31075.0, 31076.0, 31077.0, 31078.0, 31079.0, 31080.0]],      [[31081.0, 31082.0, 31083.0, 31084.0, 31085.0, 31086.0],       [31087.0, 31088.0, 31089.0, 31090.0, 31091.0, 31092.0],       [31093.0, 31094.0, 31095.0, 31096.0, 31097.0, 31098.0],       [31099.0, 31100.0, 31101.0, 31102.0, 31103.0, 31104.0],       [31105.0, 31106.0, 31107.0, 31108.0, 31109.0, 31110.0],       [31111.0, 31112.0, 31113.0, 31114.0, 31115.0, 31116.0],       [31117.0, 31118.0, 31119.0, 31120.0, 31121.0, 31122.0]],      [[31123.0, 31124.0, 31125.0, 31126.0, 31127.0, 31128.0],       [31129.0, 31130.0, 31131.0, 31132.0, 31133.0, 31134.0],       [31135.0, 31136.0, 31137.0, 31138.0, 31139.0, 31140.0],       [31141.0, 31142.0, 31143.0, 31144.0, 31145.0, 31146.0],       [31147.0, 31148.0, 31149.0, 31150.0, 31151.0, 31152.0],       [31153.0, 31154.0, 31155.0, 31156.0, 31157.0, 31158.0],       [31159.0, 31160.0, 31161.0, 31162.0, 31163.0, 31164.0]],      [[31165.0, 31166.0, 31167.0, 31168.0, 31169.0, 31170.0],       [31171.0, 31172.0, 31173.0, 31174.0, 31175.0, 31176.0],       [31177.0, 31178.0, 31179.0, 31180.0, 31181.0, 31182.0],       [31183.0, 31184.0, 31185.0, 31186.0, 31187.0, 31188.0],       [31189.0, 31190.0, 31191.0, 31192.0, 31193.0, 31194.0],       [31195.0, 31196.0, 31197.0, 31198.0, 31199.0, 31200.0],       [31201.0, 31202.0, 31203.0, 31204.0, 31205.0, 31206.0]],      [[31207.0, 31208.0, 31209.0, 31210.0, 31211.0, 31212.0],       [31213.0, 31214.0, 31215.0, 31216.0, 31217.0, 31218.0],       [31219.0, 31220.0, 31221.0, 31222.0, 31223.0, 31224.0],       [31225.0, 31226.0, 31227.0, 31228.0, 31229.0, 31230.0],       [31231.0, 31232.0, 31233.0, 31234.0, 31235.0, 31236.0],       [31237.0, 31238.0, 31239.0, 31240.0, 31241.0, 31242.0],       [31243.0, 31244.0, 31245.0, 31246.0, 31247.0, 31248.0]]]],    [[[[31249.0, 31250.0, 31251.0, 31252.0, 31253.0, 31254.0],       [31255.0, 31256.0, 31257.0, 31258.0, 31259.0, 31260.0],       [31261.0, 31262.0, 31263.0, 31264.0, 31265.0, 31266.0],       [31267.0, 31268.0, 31269.0, 31270.0, 31271.0, 31272.0],       [31273.0, 31274.0, 31275.0, 31276.0, 31277.0, 31278.0],       [31279.0, 31280.0, 31281.0, 31282.0, 31283.0, 31284.0],       [31285.0, 31286.0, 31287.0, 31288.0, 31289.0, 31290.0]],      [[31291.0, 31292.0, 31293.0, 31294.0, 31295.0, 31296.0],       [31297.0, 31298.0, 31299.0, 31300.0, 31301.0, 31302.0],       [31303.0, 31304.0, 31305.0, 31306.0, 31307.0, 31308.0],       [31309.0, 31310.0, 31311.0, 31312.0, 31313.0, 31314.0],       [31315.0, 31316.0, 31317.0, 31318.0, 31319.0, 31320.0],       [31321.0, 31322.0, 31323.0, 31324.0, 31325.0, 31326.0],       [31327.0, 31328.0, 31329.0, 31330.0, 31331.0, 31332.0]],      [[31333.0, 31334.0, 31335.0, 31336.0, 31337.0, 31338.0],       [31339.0, 31340.0, 31341.0, 31342.0, 31343.0, 31344.0],       [31345.0, 31346.0, 31347.0, 31348.0, 31349.0, 31350.0],       [31351.0, 31352.0, 31353.0, 31354.0, 31355.0, 31356.0],       [31357.0, 31358.0, 31359.0, 31360.0, 31361.0, 31362.0],       [31363.0, 31364.0, 31365.0, 31366.0, 31367.0, 31368.0],       [31369.0, 31370.0, 31371.0, 31372.0, 31373.0, 31374.0]],      [[31375.0, 31376.0, 31377.0, 31378.0, 31379.0, 31380.0],       [31381.0, 31382.0, 31383.0, 31384.0, 31385.0, 31386.0],       [31387.0, 31388.0, 31389.0, 31390.0, 31391.0, 31392.0],       [31393.0, 31394.0, 31395.0, 31396.0, 31397.0, 31398.0],       [31399.0, 31400.0, 31401.0, 31402.0, 31403.0, 31404.0],       [31405.0, 31406.0, 31407.0, 31408.0, 31409.0, 31410.0],       [31411.0, 31412.0, 31413.0, 31414.0, 31415.0, 31416.0]],      [[31417.0, 31418.0, 31419.0, 31420.0, 31421.0, 31422.0],       [31423.0, 31424.0, 31425.0, 31426.0, 31427.0, 31428.0],       [31429.0, 31430.0, 31431.0, 31432.0, 31433.0, 31434.0],       [31435.0, 31436.0, 31437.0, 31438.0, 31439.0, 31440.0],       [31441.0, 31442.0, 31443.0, 31444.0, 31445.0, 31446.0],       [31447.0, 31448.0, 31449.0, 31450.0, 31451.0, 31452.0],       [31453.0, 31454.0, 31455.0, 31456.0, 31457.0, 31458.0]],      [[31459.0, 31460.0, 31461.0, 31462.0, 31463.0, 31464.0],       [31465.0, 31466.0, 31467.0, 31468.0, 31469.0, 31470.0],       [31471.0, 31472.0, 31473.0, 31474.0, 31475.0, 31476.0],       [31477.0, 31478.0, 31479.0, 31480.0, 31481.0, 31482.0],       [31483.0, 31484.0, 31485.0, 31486.0, 31487.0, 31488.0],       [31489.0, 31490.0, 31491.0, 31492.0, 31493.0, 31494.0],       [31495.0, 31496.0, 31497.0, 31498.0, 31499.0, 31500.0]]],     [[[31501.0, 31502.0, 31503.0, 31504.0, 31505.0, 31506.0],       [31507.0, 31508.0, 31509.0, 31510.0, 31511.0, 31512.0],       [31513.0, 31514.0, 31515.0, 31516.0, 31517.0, 31518.0],       [31519.0, 31520.0, 31521.0, 31522.0, 31523.0, 31524.0],       [31525.0, 31526.0, 31527.0, 31528.0, 31529.0, 31530.0],       [31531.0, 31532.0, 31533.0, 31534.0, 31535.0, 31536.0],       [31537.0, 31538.0, 31539.0, 31540.0, 31541.0, 31542.0]],      [[31543.0, 31544.0, 31545.0, 31546.0, 31547.0, 31548.0],       [31549.0, 31550.0, 31551.0, 31552.0, 31553.0, 31554.0],       [31555.0, 31556.0, 31557.0, 31558.0, 31559.0, 31560.0],       [31561.0, 31562.0, 31563.0, 31564.0, 31565.0, 31566.0],       [31567.0, 31568.0, 31569.0, 31570.0, 31571.0, 31572.0],       [31573.0, 31574.0, 31575.0, 31576.0, 31577.0, 31578.0],       [31579.0, 31580.0, 31581.0, 31582.0, 31583.0, 31584.0]],      [[31585.0, 31586.0, 31587.0, 31588.0, 31589.0, 31590.0],       [31591.0, 31592.0, 31593.0, 31594.0, 31595.0, 31596.0],       [31597.0, 31598.0, 31599.0, 31600.0, 31601.0, 31602.0],       [31603.0, 31604.0, 31605.0, 31606.0, 31607.0, 31608.0],       [31609.0, 31610.0, 31611.0, 31612.0, 31613.0, 31614.0],       [31615.0, 31616.0, 31617.0, 31618.0, 31619.0, 31620.0],       [31621.0, 31622.0, 31623.0, 31624.0, 31625.0, 31626.0]],      [[31627.0, 31628.0, 31629.0, 31630.0, 31631.0, 31632.0],       [31633.0, 31634.0, 31635.0, 31636.0, 31637.0, 31638.0],       [31639.0, 31640.0, 31641.0, 31642.0, 31643.0, 31644.0],       [31645.0, 31646.0, 31647.0, 31648.0, 31649.0, 31650.0],       [31651.0, 31652.0, 31653.0, 31654.0, 31655.0, 31656.0],       [31657.0, 31658.0, 31659.0, 31660.0, 31661.0, 31662.0],       [31663.0, 31664.0, 31665.0, 31666.0, 31667.0, 31668.0]],      [[31669.0, 31670.0, 31671.0, 31672.0, 31673.0, 31674.0],       [31675.0, 31676.0, 31677.0, 31678.0, 31679.0, 31680.0],       [31681.0, 31682.0, 31683.0, 31684.0, 31685.0, 31686.0],       [31687.0, 31688.0, 31689.0, 31690.0, 31691.0, 31692.0],       [31693.0, 31694.0, 31695.0, 31696.0, 31697.0, 31698.0],       [31699.0, 31700.0, 31701.0, 31702.0, 31703.0, 31704.0],       [31705.0, 31706.0, 31707.0, 31708.0, 31709.0, 31710.0]],      [[31711.0, 31712.0, 31713.0, 31714.0, 31715.0, 31716.0],       [31717.0, 31718.0, 31719.0, 31720.0, 31721.0, 31722.0],       [31723.0, 31724.0, 31725.0, 31726.0, 31727.0, 31728.0],       [31729.0, 31730.0, 31731.0, 31732.0, 31733.0, 31734.0],       [31735.0, 31736.0, 31737.0, 31738.0, 31739.0, 31740.0],       [31741.0, 31742.0, 31743.0, 31744.0, 31745.0, 31746.0],       [31747.0, 31748.0, 31749.0, 31750.0, 31751.0, 31752.0]]],     [[[31753.0, 31754.0, 31755.0, 31756.0, 31757.0, 31758.0],       [31759.0, 31760.0, 31761.0, 31762.0, 31763.0, 31764.0],       [31765.0, 31766.0, 31767.0, 31768.0, 31769.0, 31770.0],       [31771.0, 31772.0, 31773.0, 31774.0, 31775.0, 31776.0],       [31777.0, 31778.0, 31779.0, 31780.0, 31781.0, 31782.0],       [31783.0, 31784.0, 31785.0, 31786.0, 31787.0, 31788.0],       [31789.0, 31790.0, 31791.0, 31792.0, 31793.0, 31794.0]],      [[31795.0, 31796.0, 31797.0, 31798.0, 31799.0, 31800.0],       [31801.0, 31802.0, 31803.0, 31804.0, 31805.0, 31806.0],       [31807.0, 31808.0, 31809.0, 31810.0, 31811.0, 31812.0],       [31813.0, 31814.0, 31815.0, 31816.0, 31817.0, 31818.0],       [31819.0, 31820.0, 31821.0, 31822.0, 31823.0, 31824.0],       [31825.0, 31826.0, 31827.0, 31828.0, 31829.0, 31830.0],       [31831.0, 31832.0, 31833.0, 31834.0, 31835.0, 31836.0]],      [[31837.0, 31838.0, 31839.0, 31840.0, 31841.0, 31842.0],       [31843.0, 31844.0, 31845.0, 31846.0, 31847.0, 31848.0],       [31849.0, 31850.0, 31851.0, 31852.0, 31853.0, 31854.0],       [31855.0, 31856.0, 31857.0, 31858.0, 31859.0, 31860.0],       [31861.0, 31862.0, 31863.0, 31864.0, 31865.0, 31866.0],       [31867.0, 31868.0, 31869.0, 31870.0, 31871.0, 31872.0],       [31873.0, 31874.0, 31875.0, 31876.0, 31877.0, 31878.0]],      [[31879.0, 31880.0, 31881.0, 31882.0, 31883.0, 31884.0],       [31885.0, 31886.0, 31887.0, 31888.0, 31889.0, 31890.0],       [31891.0, 31892.0, 31893.0, 31894.0, 31895.0, 31896.0],       [31897.0, 31898.0, 31899.0, 31900.0, 31901.0, 31902.0],       [31903.0, 31904.0, 31905.0, 31906.0, 31907.0, 31908.0],       [31909.0, 31910.0, 31911.0, 31912.0, 31913.0, 31914.0],       [31915.0, 31916.0, 31917.0, 31918.0, 31919.0, 31920.0]],      [[31921.0, 31922.0, 31923.0, 31924.0, 31925.0, 31926.0],       [31927.0, 31928.0, 31929.0, 31930.0, 31931.0, 31932.0],       [31933.0, 31934.0, 31935.0, 31936.0, 31937.0, 31938.0],       [31939.0, 31940.0, 31941.0, 31942.0, 31943.0, 31944.0],       [31945.0, 31946.0, 31947.0, 31948.0, 31949.0, 31950.0],       [31951.0, 31952.0, 31953.0, 31954.0, 31955.0, 31956.0],       [31957.0, 31958.0, 31959.0, 31960.0, 31961.0, 31962.0]],      [[31963.0, 31964.0, 31965.0, 31966.0, 31967.0, 31968.0],       [31969.0, 31970.0, 31971.0, 31972.0, 31973.0, 31974.0],       [31975.0, 31976.0, 31977.0, 31978.0, 31979.0, 31980.0],       [31981.0, 31982.0, 31983.0, 31984.0, 31985.0, 31986.0],       [31987.0, 31988.0, 31989.0, 31990.0, 31991.0, 31992.0],       [31993.0, 31994.0, 31995.0, 31996.0, 31997.0, 31998.0],       [31999.0, 32000.0, 32001.0, 32002.0, 32003.0, 32004.0]]],     [[[32005.0, 32006.0, 32007.0, 32008.0, 32009.0, 32010.0],       [32011.0, 32012.0, 32013.0, 32014.0, 32015.0, 32016.0],       [32017.0, 32018.0, 32019.0, 32020.0, 32021.0, 32022.0],       [32023.0, 32024.0, 32025.0, 32026.0, 32027.0, 32028.0],       [32029.0, 32030.0, 32031.0, 32032.0, 32033.0, 32034.0],       [32035.0, 32036.0, 32037.0, 32038.0, 32039.0, 32040.0],       [32041.0, 32042.0, 32043.0, 32044.0, 32045.0, 32046.0]],      [[32047.0, 32048.0, 32049.0, 32050.0, 32051.0, 32052.0],       [32053.0, 32054.0, 32055.0, 32056.0, 32057.0, 32058.0],       [32059.0, 32060.0, 32061.0, 32062.0, 32063.0, 32064.0],       [32065.0, 32066.0, 32067.0, 32068.0, 32069.0, 32070.0],       [32071.0, 32072.0, 32073.0, 32074.0, 32075.0, 32076.0],       [32077.0, 32078.0, 32079.0, 32080.0, 32081.0, 32082.0],       [32083.0, 32084.0, 32085.0, 32086.0, 32087.0, 32088.0]],      [[32089.0, 32090.0, 32091.0, 32092.0, 32093.0, 32094.0],       [32095.0, 32096.0, 32097.0, 32098.0, 32099.0, 32100.0],       [32101.0, 32102.0, 32103.0, 32104.0, 32105.0, 32106.0],       [32107.0, 32108.0, 32109.0, 32110.0, 32111.0, 32112.0],       [32113.0, 32114.0, 32115.0, 32116.0, 32117.0, 32118.0],       [32119.0, 32120.0, 32121.0, 32122.0, 32123.0, 32124.0],       [32125.0, 32126.0, 32127.0, 32128.0, 32129.0, 32130.0]],      [[32131.0, 32132.0, 32133.0, 32134.0, 32135.0, 32136.0],       [32137.0, 32138.0, 32139.0, 32140.0, 32141.0, 32142.0],       [32143.0, 32144.0, 32145.0, 32146.0, 32147.0, 32148.0],       [32149.0, 32150.0, 32151.0, 32152.0, 32153.0, 32154.0],       [32155.0, 32156.0, 32157.0, 32158.0, 32159.0, 32160.0],       [32161.0, 32162.0, 32163.0, 32164.0, 32165.0, 32166.0],       [32167.0, 32168.0, 32169.0, 32170.0, 32171.0, 32172.0]],      [[32173.0, 32174.0, 32175.0, 32176.0, 32177.0, 32178.0],       [32179.0, 32180.0, 32181.0, 32182.0, 32183.0, 32184.0],       [32185.0, 32186.0, 32187.0, 32188.0, 32189.0, 32190.0],       [32191.0, 32192.0, 32193.0, 32194.0, 32195.0, 32196.0],       [32197.0, 32198.0, 32199.0, 32200.0, 32201.0, 32202.0],       [32203.0, 32204.0, 32205.0, 32206.0, 32207.0, 32208.0],       [32209.0, 32210.0, 32211.0, 32212.0, 32213.0, 32214.0]],      [[32215.0, 32216.0, 32217.0, 32218.0, 32219.0, 32220.0],       [32221.0, 32222.0, 32223.0, 32224.0, 32225.0, 32226.0],       [32227.0, 32228.0, 32229.0, 32230.0, 32231.0, 32232.0],       [32233.0, 32234.0, 32235.0, 32236.0, 32237.0, 32238.0],       [32239.0, 32240.0, 32241.0, 32242.0, 32243.0, 32244.0],       [32245.0, 32246.0, 32247.0, 32248.0, 32249.0, 32250.0],       [32251.0, 32252.0, 32253.0, 32254.0, 32255.0, 32256.0]]]],    [[[[32257.0, 32258.0, 32259.0, 32260.0, 32261.0, 32262.0],       [32263.0, 32264.0, 32265.0, 32266.0, 32267.0, 32268.0],       [32269.0, 32270.0, 32271.0, 32272.0, 32273.0, 32274.0],       [32275.0, 32276.0, 32277.0, 32278.0, 32279.0, 32280.0],       [32281.0, 32282.0, 32283.0, 32284.0, 32285.0, 32286.0],       [32287.0, 32288.0, 32289.0, 32290.0, 32291.0, 32292.0],       [32293.0, 32294.0, 32295.0, 32296.0, 32297.0, 32298.0]],      [[32299.0, 32300.0, 32301.0, 32302.0, 32303.0, 32304.0],       [32305.0, 32306.0, 32307.0, 32308.0, 32309.0, 32310.0],       [32311.0, 32312.0, 32313.0, 32314.0, 32315.0, 32316.0],       [32317.0, 32318.0, 32319.0, 32320.0, 32321.0, 32322.0],       [32323.0, 32324.0, 32325.0, 32326.0, 32327.0, 32328.0],       [32329.0, 32330.0, 32331.0, 32332.0, 32333.0, 32334.0],       [32335.0, 32336.0, 32337.0, 32338.0, 32339.0, 32340.0]],      [[32341.0, 32342.0, 32343.0, 32344.0, 32345.0, 32346.0],       [32347.0, 32348.0, 32349.0, 32350.0, 32351.0, 32352.0],       [32353.0, 32354.0, 32355.0, 32356.0, 32357.0, 32358.0],       [32359.0, 32360.0, 32361.0, 32362.0, 32363.0, 32364.0],       [32365.0, 32366.0, 32367.0, 32368.0, 32369.0, 32370.0],       [32371.0, 32372.0, 32373.0, 32374.0, 32375.0, 32376.0],       [32377.0, 32378.0, 32379.0, 32380.0, 32381.0, 32382.0]],      [[32383.0, 32384.0, 32385.0, 32386.0, 32387.0, 32388.0],       [32389.0, 32390.0, 32391.0, 32392.0, 32393.0, 32394.0],       [32395.0, 32396.0, 32397.0, 32398.0, 32399.0, 32400.0],       [32401.0, 32402.0, 32403.0, 32404.0, 32405.0, 32406.0],       [32407.0, 32408.0, 32409.0, 32410.0, 32411.0, 32412.0],       [32413.0, 32414.0, 32415.0, 32416.0, 32417.0, 32418.0],       [32419.0, 32420.0, 32421.0, 32422.0, 32423.0, 32424.0]],      [[32425.0, 32426.0, 32427.0, 32428.0, 32429.0, 32430.0],       [32431.0, 32432.0, 32433.0, 32434.0, 32435.0, 32436.0],       [32437.0, 32438.0, 32439.0, 32440.0, 32441.0, 32442.0],       [32443.0, 32444.0, 32445.0, 32446.0, 32447.0, 32448.0],       [32449.0, 32450.0, 32451.0, 32452.0, 32453.0, 32454.0],       [32455.0, 32456.0, 32457.0, 32458.0, 32459.0, 32460.0],       [32461.0, 32462.0, 32463.0, 32464.0, 32465.0, 32466.0]],      [[32467.0, 32468.0, 32469.0, 32470.0, 32471.0, 32472.0],       [32473.0, 32474.0, 32475.0, 32476.0, 32477.0, 32478.0],       [32479.0, 32480.0, 32481.0, 32482.0, 32483.0, 32484.0],       [32485.0, 32486.0, 32487.0, 32488.0, 32489.0, 32490.0],       [32491.0, 32492.0, 32493.0, 32494.0, 32495.0, 32496.0],       [32497.0, 32498.0, 32499.0, 32500.0, 32501.0, 32502.0],       [32503.0, 32504.0, 32505.0, 32506.0, 32507.0, 32508.0]]],     [[[32509.0, 32510.0, 32511.0, 32512.0, 32513.0, 32514.0],       [32515.0, 32516.0, 32517.0, 32518.0, 32519.0, 32520.0],       [32521.0, 32522.0, 32523.0, 32524.0, 32525.0, 32526.0],       [32527.0, 32528.0, 32529.0, 32530.0, 32531.0, 32532.0],       [32533.0, 32534.0, 32535.0, 32536.0, 32537.0, 32538.0],       [32539.0, 32540.0, 32541.0, 32542.0, 32543.0, 32544.0],       [32545.0, 32546.0, 32547.0, 32548.0, 32549.0, 32550.0]],      [[32551.0, 32552.0, 32553.0, 32554.0, 32555.0, 32556.0],       [32557.0, 32558.0, 32559.0, 32560.0, 32561.0, 32562.0],       [32563.0, 32564.0, 32565.0, 32566.0, 32567.0, 32568.0],       [32569.0, 32570.0, 32571.0, 32572.0, 32573.0, 32574.0],       [32575.0, 32576.0, 32577.0, 32578.0, 32579.0, 32580.0],       [32581.0, 32582.0, 32583.0, 32584.0, 32585.0, 32586.0],       [32587.0, 32588.0, 32589.0, 32590.0, 32591.0, 32592.0]],      [[32593.0, 32594.0, 32595.0, 32596.0, 32597.0, 32598.0],       [32599.0, 32600.0, 32601.0, 32602.0, 32603.0, 32604.0],       [32605.0, 32606.0, 32607.0, 32608.0, 32609.0, 32610.0],       [32611.0, 32612.0, 32613.0, 32614.0, 32615.0, 32616.0],       [32617.0, 32618.0, 32619.0, 32620.0, 32621.0, 32622.0],       [32623.0, 32624.0, 32625.0, 32626.0, 32627.0, 32628.0],       [32629.0, 32630.0, 32631.0, 32632.0, 32633.0, 32634.0]],      [[32635.0, 32636.0, 32637.0, 32638.0, 32639.0, 32640.0],       [32641.0, 32642.0, 32643.0, 32644.0, 32645.0, 32646.0],       [32647.0, 32648.0, 32649.0, 32650.0, 32651.0, 32652.0],       [32653.0, 32654.0, 32655.0, 32656.0, 32657.0, 32658.0],       [32659.0, 32660.0, 32661.0, 32662.0, 32663.0, 32664.0],       [32665.0, 32666.0, 32667.0, 32668.0, 32669.0, 32670.0],       [32671.0, 32672.0, 32673.0, 32674.0, 32675.0, 32676.0]],      [[32677.0, 32678.0, 32679.0, 32680.0, 32681.0, 32682.0],       [32683.0, 32684.0, 32685.0, 32686.0, 32687.0, 32688.0],       [32689.0, 32690.0, 32691.0, 32692.0, 32693.0, 32694.0],       [32695.0, 32696.0, 32697.0, 32698.0, 32699.0, 32700.0],       [32701.0, 32702.0, 32703.0, 32704.0, 32705.0, 32706.0],       [32707.0, 32708.0, 32709.0, 32710.0, 32711.0, 32712.0],       [32713.0, 32714.0, 32715.0, 32716.0, 32717.0, 32718.0]],      [[32719.0, 32720.0, 32721.0, 32722.0, 32723.0, 32724.0],       [32725.0, 32726.0, 32727.0, 32728.0, 32729.0, 32730.0],       [32731.0, 32732.0, 32733.0, 32734.0, 32735.0, 32736.0],       [32737.0, 32738.0, 32739.0, 32740.0, 32741.0, 32742.0],       [32743.0, 32744.0, 32745.0, 32746.0, 32747.0, 32748.0],       [32749.0, 32750.0, 32751.0, 32752.0, 32753.0, 32754.0],       [32755.0, 32756.0, 32757.0, 32758.0, 32759.0, 32760.0]]],     [[[32761.0, 32762.0, 32763.0, 32764.0, 32765.0, 32766.0],       [32767.0, 32768.0, 32769.0, 32770.0, 32771.0, 32772.0],       [32773.0, 32774.0, 32775.0, 32776.0, 32777.0, 32778.0],       [32779.0, 32780.0, 32781.0, 32782.0, 32783.0, 32784.0],       [32785.0, 32786.0, 32787.0, 32788.0, 32789.0, 32790.0],       [32791.0, 32792.0, 32793.0, 32794.0, 32795.0, 32796.0],       [32797.0, 32798.0, 32799.0, 32800.0, 32801.0, 32802.0]],      [[32803.0, 32804.0, 32805.0, 32806.0, 32807.0, 32808.0],       [32809.0, 32810.0, 32811.0, 32812.0, 32813.0, 32814.0],       [32815.0, 32816.0, 32817.0, 32818.0, 32819.0, 32820.0],       [32821.0, 32822.0, 32823.0, 32824.0, 32825.0, 32826.0],       [32827.0, 32828.0, 32829.0, 32830.0, 32831.0, 32832.0],       [32833.0, 32834.0, 32835.0, 32836.0, 32837.0, 32838.0],       [32839.0, 32840.0, 32841.0, 32842.0, 32843.0, 32844.0]],      [[32845.0, 32846.0, 32847.0, 32848.0, 32849.0, 32850.0],       [32851.0, 32852.0, 32853.0, 32854.0, 32855.0, 32856.0],       [32857.0, 32858.0, 32859.0, 32860.0, 32861.0, 32862.0],       [32863.0, 32864.0, 32865.0, 32866.0, 32867.0, 32868.0],       [32869.0, 32870.0, 32871.0, 32872.0, 32873.0, 32874.0],       [32875.0, 32876.0, 32877.0, 32878.0, 32879.0, 32880.0],       [32881.0, 32882.0, 32883.0, 32884.0, 32885.0, 32886.0]],      [[32887.0, 32888.0, 32889.0, 32890.0, 32891.0, 32892.0],       [32893.0, 32894.0, 32895.0, 32896.0, 32897.0, 32898.0],       [32899.0, 32900.0, 32901.0, 32902.0, 32903.0, 32904.0],       [32905.0, 32906.0, 32907.0, 32908.0, 32909.0, 32910.0],       [32911.0, 32912.0, 32913.0, 32914.0, 32915.0, 32916.0],       [32917.0, 32918.0, 32919.0, 32920.0, 32921.0, 32922.0],       [32923.0, 32924.0, 32925.0, 32926.0, 32927.0, 32928.0]],      [[32929.0, 32930.0, 32931.0, 32932.0, 32933.0, 32934.0],       [32935.0, 32936.0, 32937.0, 32938.0, 32939.0, 32940.0],       [32941.0, 32942.0, 32943.0, 32944.0, 32945.0, 32946.0],       [32947.0, 32948.0, 32949.0, 32950.0, 32951.0, 32952.0],       [32953.0, 32954.0, 32955.0, 32956.0, 32957.0, 32958.0],       [32959.0, 32960.0, 32961.0, 32962.0, 32963.0, 32964.0],       [32965.0, 32966.0, 32967.0, 32968.0, 32969.0, 32970.0]],      [[32971.0, 32972.0, 32973.0, 32974.0, 32975.0, 32976.0],       [32977.0, 32978.0, 32979.0, 32980.0, 32981.0, 32982.0],       [32983.0, 32984.0, 32985.0, 32986.0, 32987.0, 32988.0],       [32989.0, 32990.0, 32991.0, 32992.0, 32993.0, 32994.0],       [32995.0, 32996.0, 32997.0, 32998.0, 32999.0, 33000.0],       [33001.0, 33002.0, 33003.0, 33004.0, 33005.0, 33006.0],       [33007.0, 33008.0, 33009.0, 33010.0, 33011.0, 33012.0]]],     [[[33013.0, 33014.0, 33015.0, 33016.0, 33017.0, 33018.0],       [33019.0, 33020.0, 33021.0, 33022.0, 33023.0, 33024.0],       [33025.0, 33026.0, 33027.0, 33028.0, 33029.0, 33030.0],       [33031.0, 33032.0, 33033.0, 33034.0, 33035.0, 33036.0],       [33037.0, 33038.0, 33039.0, 33040.0, 33041.0, 33042.0],       [33043.0, 33044.0, 33045.0, 33046.0, 33047.0, 33048.0],       [33049.0, 33050.0, 33051.0, 33052.0, 33053.0, 33054.0]],      [[33055.0, 33056.0, 33057.0, 33058.0, 33059.0, 33060.0],       [33061.0, 33062.0, 33063.0, 33064.0, 33065.0, 33066.0],       [33067.0, 33068.0, 33069.0, 33070.0, 33071.0, 33072.0],       [33073.0, 33074.0, 33075.0, 33076.0, 33077.0, 33078.0],       [33079.0, 33080.0, 33081.0, 33082.0, 33083.0, 33084.0],       [33085.0, 33086.0, 33087.0, 33088.0, 33089.0, 33090.0],       [33091.0, 33092.0, 33093.0, 33094.0, 33095.0, 33096.0]],      [[33097.0, 33098.0, 33099.0, 33100.0, 33101.0, 33102.0],       [33103.0, 33104.0, 33105.0, 33106.0, 33107.0, 33108.0],       [33109.0, 33110.0, 33111.0, 33112.0, 33113.0, 33114.0],       [33115.0, 33116.0, 33117.0, 33118.0, 33119.0, 33120.0],       [33121.0, 33122.0, 33123.0, 33124.0, 33125.0, 33126.0],       [33127.0, 33128.0, 33129.0, 33130.0, 33131.0, 33132.0],       [33133.0, 33134.0, 33135.0, 33136.0, 33137.0, 33138.0]],      [[33139.0, 33140.0, 33141.0, 33142.0, 33143.0, 33144.0],       [33145.0, 33146.0, 33147.0, 33148.0, 33149.0, 33150.0],       [33151.0, 33152.0, 33153.0, 33154.0, 33155.0, 33156.0],       [33157.0, 33158.0, 33159.0, 33160.0, 33161.0, 33162.0],       [33163.0, 33164.0, 33165.0, 33166.0, 33167.0, 33168.0],       [33169.0, 33170.0, 33171.0, 33172.0, 33173.0, 33174.0],       [33175.0, 33176.0, 33177.0, 33178.0, 33179.0, 33180.0]],      [[33181.0, 33182.0, 33183.0, 33184.0, 33185.0, 33186.0],       [33187.0, 33188.0, 33189.0, 33190.0, 33191.0, 33192.0],       [33193.0, 33194.0, 33195.0, 33196.0, 33197.0, 33198.0],       [33199.0, 33200.0, 33201.0, 33202.0, 33203.0, 33204.0],       [33205.0, 33206.0, 33207.0, 33208.0, 33209.0, 33210.0],       [33211.0, 33212.0, 33213.0, 33214.0, 33215.0, 33216.0],       [33217.0, 33218.0, 33219.0, 33220.0, 33221.0, 33222.0]],      [[33223.0, 33224.0, 33225.0, 33226.0, 33227.0, 33228.0],       [33229.0, 33230.0, 33231.0, 33232.0, 33233.0, 33234.0],       [33235.0, 33236.0, 33237.0, 33238.0, 33239.0, 33240.0],       [33241.0, 33242.0, 33243.0, 33244.0, 33245.0, 33246.0],       [33247.0, 33248.0, 33249.0, 33250.0, 33251.0, 33252.0],       [33253.0, 33254.0, 33255.0, 33256.0, 33257.0, 33258.0],       [33259.0, 33260.0, 33261.0, 33262.0, 33263.0, 33264.0]]]],    [[[[33265.0, 33266.0, 33267.0, 33268.0, 33269.0, 33270.0],       [33271.0, 33272.0, 33273.0, 33274.0, 33275.0, 33276.0],       [33277.0, 33278.0, 33279.0, 33280.0, 33281.0, 33282.0],       [33283.0, 33284.0, 33285.0, 33286.0, 33287.0, 33288.0],       [33289.0, 33290.0, 33291.0, 33292.0, 33293.0, 33294.0],       [33295.0, 33296.0, 33297.0, 33298.0, 33299.0, 33300.0],       [33301.0, 33302.0, 33303.0, 33304.0, 33305.0, 33306.0]],      [[33307.0, 33308.0, 33309.0, 33310.0, 33311.0, 33312.0],       [33313.0, 33314.0, 33315.0, 33316.0, 33317.0, 33318.0],       [33319.0, 33320.0, 33321.0, 33322.0, 33323.0, 33324.0],       [33325.0, 33326.0, 33327.0, 33328.0, 33329.0, 33330.0],       [33331.0, 33332.0, 33333.0, 33334.0, 33335.0, 33336.0],       [33337.0, 33338.0, 33339.0, 33340.0, 33341.0, 33342.0],       [33343.0, 33344.0, 33345.0, 33346.0, 33347.0, 33348.0]],      [[33349.0, 33350.0, 33351.0, 33352.0, 33353.0, 33354.0],       [33355.0, 33356.0, 33357.0, 33358.0, 33359.0, 33360.0],       [33361.0, 33362.0, 33363.0, 33364.0, 33365.0, 33366.0],       [33367.0, 33368.0, 33369.0, 33370.0, 33371.0, 33372.0],       [33373.0, 33374.0, 33375.0, 33376.0, 33377.0, 33378.0],       [33379.0, 33380.0, 33381.0, 33382.0, 33383.0, 33384.0],       [33385.0, 33386.0, 33387.0, 33388.0, 33389.0, 33390.0]],      [[33391.0, 33392.0, 33393.0, 33394.0, 33395.0, 33396.0],       [33397.0, 33398.0, 33399.0, 33400.0, 33401.0, 33402.0],       [33403.0, 33404.0, 33405.0, 33406.0, 33407.0, 33408.0],       [33409.0, 33410.0, 33411.0, 33412.0, 33413.0, 33414.0],       [33415.0, 33416.0, 33417.0, 33418.0, 33419.0, 33420.0],       [33421.0, 33422.0, 33423.0, 33424.0, 33425.0, 33426.0],       [33427.0, 33428.0, 33429.0, 33430.0, 33431.0, 33432.0]],      [[33433.0, 33434.0, 33435.0, 33436.0, 33437.0, 33438.0],       [33439.0, 33440.0, 33441.0, 33442.0, 33443.0, 33444.0],       [33445.0, 33446.0, 33447.0, 33448.0, 33449.0, 33450.0],       [33451.0, 33452.0, 33453.0, 33454.0, 33455.0, 33456.0],       [33457.0, 33458.0, 33459.0, 33460.0, 33461.0, 33462.0],       [33463.0, 33464.0, 33465.0, 33466.0, 33467.0, 33468.0],       [33469.0, 33470.0, 33471.0, 33472.0, 33473.0, 33474.0]],      [[33475.0, 33476.0, 33477.0, 33478.0, 33479.0, 33480.0],       [33481.0, 33482.0, 33483.0, 33484.0, 33485.0, 33486.0],       [33487.0, 33488.0, 33489.0, 33490.0, 33491.0, 33492.0],       [33493.0, 33494.0, 33495.0, 33496.0, 33497.0, 33498.0],       [33499.0, 33500.0, 33501.0, 33502.0, 33503.0, 33504.0],       [33505.0, 33506.0, 33507.0, 33508.0, 33509.0, 33510.0],       [33511.0, 33512.0, 33513.0, 33514.0, 33515.0, 33516.0]]],     [[[33517.0, 33518.0, 33519.0, 33520.0, 33521.0, 33522.0],       [33523.0, 33524.0, 33525.0, 33526.0, 33527.0, 33528.0],       [33529.0, 33530.0, 33531.0, 33532.0, 33533.0, 33534.0],       [33535.0, 33536.0, 33537.0, 33538.0, 33539.0, 33540.0],       [33541.0, 33542.0, 33543.0, 33544.0, 33545.0, 33546.0],       [33547.0, 33548.0, 33549.0, 33550.0, 33551.0, 33552.0],       [33553.0, 33554.0, 33555.0, 33556.0, 33557.0, 33558.0]],      [[33559.0, 33560.0, 33561.0, 33562.0, 33563.0, 33564.0],       [33565.0, 33566.0, 33567.0, 33568.0, 33569.0, 33570.0],       [33571.0, 33572.0, 33573.0, 33574.0, 33575.0, 33576.0],       [33577.0, 33578.0, 33579.0, 33580.0, 33581.0, 33582.0],       [33583.0, 33584.0, 33585.0, 33586.0, 33587.0, 33588.0],       [33589.0, 33590.0, 33591.0, 33592.0, 33593.0, 33594.0],       [33595.0, 33596.0, 33597.0, 33598.0, 33599.0, 33600.0]],      [[33601.0, 33602.0, 33603.0, 33604.0, 33605.0, 33606.0],       [33607.0, 33608.0, 33609.0, 33610.0, 33611.0, 33612.0],       [33613.0, 33614.0, 33615.0, 33616.0, 33617.0, 33618.0],       [33619.0, 33620.0, 33621.0, 33622.0, 33623.0, 33624.0],       [33625.0, 33626.0, 33627.0, 33628.0, 33629.0, 33630.0],       [33631.0, 33632.0, 33633.0, 33634.0, 33635.0, 33636.0],       [33637.0, 33638.0, 33639.0, 33640.0, 33641.0, 33642.0]],      [[33643.0, 33644.0, 33645.0, 33646.0, 33647.0, 33648.0],       [33649.0, 33650.0, 33651.0, 33652.0, 33653.0, 33654.0],       [33655.0, 33656.0, 33657.0, 33658.0, 33659.0, 33660.0],       [33661.0, 33662.0, 33663.0, 33664.0, 33665.0, 33666.0],       [33667.0, 33668.0, 33669.0, 33670.0, 33671.0, 33672.0],       [33673.0, 33674.0, 33675.0, 33676.0, 33677.0, 33678.0],       [33679.0, 33680.0, 33681.0, 33682.0, 33683.0, 33684.0]],      [[33685.0, 33686.0, 33687.0, 33688.0, 33689.0, 33690.0],       [33691.0, 33692.0, 33693.0, 33694.0, 33695.0, 33696.0],       [33697.0, 33698.0, 33699.0, 33700.0, 33701.0, 33702.0],       [33703.0, 33704.0, 33705.0, 33706.0, 33707.0, 33708.0],       [33709.0, 33710.0, 33711.0, 33712.0, 33713.0, 33714.0],       [33715.0, 33716.0, 33717.0, 33718.0, 33719.0, 33720.0],       [33721.0, 33722.0, 33723.0, 33724.0, 33725.0, 33726.0]],      [[33727.0, 33728.0, 33729.0, 33730.0, 33731.0, 33732.0],       [33733.0, 33734.0, 33735.0, 33736.0, 33737.0, 33738.0],       [33739.0, 33740.0, 33741.0, 33742.0, 33743.0, 33744.0],       [33745.0, 33746.0, 33747.0, 33748.0, 33749.0, 33750.0],       [33751.0, 33752.0, 33753.0, 33754.0, 33755.0, 33756.0],       [33757.0, 33758.0, 33759.0, 33760.0, 33761.0, 33762.0],       [33763.0, 33764.0, 33765.0, 33766.0, 33767.0, 33768.0]]],     [[[33769.0, 33770.0, 33771.0, 33772.0, 33773.0, 33774.0],       [33775.0, 33776.0, 33777.0, 33778.0, 33779.0, 33780.0],       [33781.0, 33782.0, 33783.0, 33784.0, 33785.0, 33786.0],       [33787.0, 33788.0, 33789.0, 33790.0, 33791.0, 33792.0],       [33793.0, 33794.0, 33795.0, 33796.0, 33797.0, 33798.0],       [33799.0, 33800.0, 33801.0, 33802.0, 33803.0, 33804.0],       [33805.0, 33806.0, 33807.0, 33808.0, 33809.0, 33810.0]],      [[33811.0, 33812.0, 33813.0, 33814.0, 33815.0, 33816.0],       [33817.0, 33818.0, 33819.0, 33820.0, 33821.0, 33822.0],       [33823.0, 33824.0, 33825.0, 33826.0, 33827.0, 33828.0],       [33829.0, 33830.0, 33831.0, 33832.0, 33833.0, 33834.0],       [33835.0, 33836.0, 33837.0, 33838.0, 33839.0, 33840.0],       [33841.0, 33842.0, 33843.0, 33844.0, 33845.0, 33846.0],       [33847.0, 33848.0, 33849.0, 33850.0, 33851.0, 33852.0]],      [[33853.0, 33854.0, 33855.0, 33856.0, 33857.0, 33858.0],       [33859.0, 33860.0, 33861.0, 33862.0, 33863.0, 33864.0],       [33865.0, 33866.0, 33867.0, 33868.0, 33869.0, 33870.0],       [33871.0, 33872.0, 33873.0, 33874.0, 33875.0, 33876.0],       [33877.0, 33878.0, 33879.0, 33880.0, 33881.0, 33882.0],       [33883.0, 33884.0, 33885.0, 33886.0, 33887.0, 33888.0],       [33889.0, 33890.0, 33891.0, 33892.0, 33893.0, 33894.0]],      [[33895.0, 33896.0, 33897.0, 33898.0, 33899.0, 33900.0],       [33901.0, 33902.0, 33903.0, 33904.0, 33905.0, 33906.0],       [33907.0, 33908.0, 33909.0, 33910.0, 33911.0, 33912.0],       [33913.0, 33914.0, 33915.0, 33916.0, 33917.0, 33918.0],       [33919.0, 33920.0, 33921.0, 33922.0, 33923.0, 33924.0],       [33925.0, 33926.0, 33927.0, 33928.0, 33929.0, 33930.0],       [33931.0, 33932.0, 33933.0, 33934.0, 33935.0, 33936.0]],      [[33937.0, 33938.0, 33939.0, 33940.0, 33941.0, 33942.0],       [33943.0, 33944.0, 33945.0, 33946.0, 33947.0, 33948.0],       [33949.0, 33950.0, 33951.0, 33952.0, 33953.0, 33954.0],       [33955.0, 33956.0, 33957.0, 33958.0, 33959.0, 33960.0],       [33961.0, 33962.0, 33963.0, 33964.0, 33965.0, 33966.0],       [33967.0, 33968.0, 33969.0, 33970.0, 33971.0, 33972.0],       [33973.0, 33974.0, 33975.0, 33976.0, 33977.0, 33978.0]],      [[33979.0, 33980.0, 33981.0, 33982.0, 33983.0, 33984.0],       [33985.0, 33986.0, 33987.0, 33988.0, 33989.0, 33990.0],       [33991.0, 33992.0, 33993.0, 33994.0, 33995.0, 33996.0],       [33997.0, 33998.0, 33999.0, 34000.0, 34001.0, 34002.0],       [34003.0, 34004.0, 34005.0, 34006.0, 34007.0, 34008.0],       [34009.0, 34010.0, 34011.0, 34012.0, 34013.0, 34014.0],       [34015.0, 34016.0, 34017.0, 34018.0, 34019.0, 34020.0]]],     [[[34021.0, 34022.0, 34023.0, 34024.0, 34025.0, 34026.0],       [34027.0, 34028.0, 34029.0, 34030.0, 34031.0, 34032.0],       [34033.0, 34034.0, 34035.0, 34036.0, 34037.0, 34038.0],       [34039.0, 34040.0, 34041.0, 34042.0, 34043.0, 34044.0],       [34045.0, 34046.0, 34047.0, 34048.0, 34049.0, 34050.0],       [34051.0, 34052.0, 34053.0, 34054.0, 34055.0, 34056.0],       [34057.0, 34058.0, 34059.0, 34060.0, 34061.0, 34062.0]],      [[34063.0, 34064.0, 34065.0, 34066.0, 34067.0, 34068.0],       [34069.0, 34070.0, 34071.0, 34072.0, 34073.0, 34074.0],       [34075.0, 34076.0, 34077.0, 34078.0, 34079.0, 34080.0],       [34081.0, 34082.0, 34083.0, 34084.0, 34085.0, 34086.0],       [34087.0, 34088.0, 34089.0, 34090.0, 34091.0, 34092.0],       [34093.0, 34094.0, 34095.0, 34096.0, 34097.0, 34098.0],       [34099.0, 34100.0, 34101.0, 34102.0, 34103.0, 34104.0]],      [[34105.0, 34106.0, 34107.0, 34108.0, 34109.0, 34110.0],       [34111.0, 34112.0, 34113.0, 34114.0, 34115.0, 34116.0],       [34117.0, 34118.0, 34119.0, 34120.0, 34121.0, 34122.0],       [34123.0, 34124.0, 34125.0, 34126.0, 34127.0, 34128.0],       [34129.0, 34130.0, 34131.0, 34132.0, 34133.0, 34134.0],       [34135.0, 34136.0, 34137.0, 34138.0, 34139.0, 34140.0],       [34141.0, 34142.0, 34143.0, 34144.0, 34145.0, 34146.0]],      [[34147.0, 34148.0, 34149.0, 34150.0, 34151.0, 34152.0],       [34153.0, 34154.0, 34155.0, 34156.0, 34157.0, 34158.0],       [34159.0, 34160.0, 34161.0, 34162.0, 34163.0, 34164.0],       [34165.0, 34166.0, 34167.0, 34168.0, 34169.0, 34170.0],       [34171.0, 34172.0, 34173.0, 34174.0, 34175.0, 34176.0],       [34177.0, 34178.0, 34179.0, 34180.0, 34181.0, 34182.0],       [34183.0, 34184.0, 34185.0, 34186.0, 34187.0, 34188.0]],      [[34189.0, 34190.0, 34191.0, 34192.0, 34193.0, 34194.0],       [34195.0, 34196.0, 34197.0, 34198.0, 34199.0, 34200.0],       [34201.0, 34202.0, 34203.0, 34204.0, 34205.0, 34206.0],       [34207.0, 34208.0, 34209.0, 34210.0, 34211.0, 34212.0],       [34213.0, 34214.0, 34215.0, 34216.0, 34217.0, 34218.0],       [34219.0, 34220.0, 34221.0, 34222.0, 34223.0, 34224.0],       [34225.0, 34226.0, 34227.0, 34228.0, 34229.0, 34230.0]],      [[34231.0, 34232.0, 34233.0, 34234.0, 34235.0, 34236.0],       [34237.0, 34238.0, 34239.0, 34240.0, 34241.0, 34242.0],       [34243.0, 34244.0, 34245.0, 34246.0, 34247.0, 34248.0],       [34249.0, 34250.0, 34251.0, 34252.0, 34253.0, 34254.0],       [34255.0, 34256.0, 34257.0, 34258.0, 34259.0, 34260.0],       [34261.0, 34262.0, 34263.0, 34264.0, 34265.0, 34266.0],       [34267.0, 34268.0, 34269.0, 34270.0, 34271.0, 34272.0]]]],    [[[[34273.0, 34274.0, 34275.0, 34276.0, 34277.0, 34278.0],       [34279.0, 34280.0, 34281.0, 34282.0, 34283.0, 34284.0],       [34285.0, 34286.0, 34287.0, 34288.0, 34289.0, 34290.0],       [34291.0, 34292.0, 34293.0, 34294.0, 34295.0, 34296.0],       [34297.0, 34298.0, 34299.0, 34300.0, 34301.0, 34302.0],       [34303.0, 34304.0, 34305.0, 34306.0, 34307.0, 34308.0],       [34309.0, 34310.0, 34311.0, 34312.0, 34313.0, 34314.0]],      [[34315.0, 34316.0, 34317.0, 34318.0, 34319.0, 34320.0],       [34321.0, 34322.0, 34323.0, 34324.0, 34325.0, 34326.0],       [34327.0, 34328.0, 34329.0, 34330.0, 34331.0, 34332.0],       [34333.0, 34334.0, 34335.0, 34336.0, 34337.0, 34338.0],       [34339.0, 34340.0, 34341.0, 34342.0, 34343.0, 34344.0],       [34345.0, 34346.0, 34347.0, 34348.0, 34349.0, 34350.0],       [34351.0, 34352.0, 34353.0, 34354.0, 34355.0, 34356.0]],      [[34357.0, 34358.0, 34359.0, 34360.0, 34361.0, 34362.0],       [34363.0, 34364.0, 34365.0, 34366.0, 34367.0, 34368.0],       [34369.0, 34370.0, 34371.0, 34372.0, 34373.0, 34374.0],       [34375.0, 34376.0, 34377.0, 34378.0, 34379.0, 34380.0],       [34381.0, 34382.0, 34383.0, 34384.0, 34385.0, 34386.0],       [34387.0, 34388.0, 34389.0, 34390.0, 34391.0, 34392.0],       [34393.0, 34394.0, 34395.0, 34396.0, 34397.0, 34398.0]],      [[34399.0, 34400.0, 34401.0, 34402.0, 34403.0, 34404.0],       [34405.0, 34406.0, 34407.0, 34408.0, 34409.0, 34410.0],       [34411.0, 34412.0, 34413.0, 34414.0, 34415.0, 34416.0],       [34417.0, 34418.0, 34419.0, 34420.0, 34421.0, 34422.0],       [34423.0, 34424.0, 34425.0, 34426.0, 34427.0, 34428.0],       [34429.0, 34430.0, 34431.0, 34432.0, 34433.0, 34434.0],       [34435.0, 34436.0, 34437.0, 34438.0, 34439.0, 34440.0]],      [[34441.0, 34442.0, 34443.0, 34444.0, 34445.0, 34446.0],       [34447.0, 34448.0, 34449.0, 34450.0, 34451.0, 34452.0],       [34453.0, 34454.0, 34455.0, 34456.0, 34457.0, 34458.0],       [34459.0, 34460.0, 34461.0, 34462.0, 34463.0, 34464.0],       [34465.0, 34466.0, 34467.0, 34468.0, 34469.0, 34470.0],       [34471.0, 34472.0, 34473.0, 34474.0, 34475.0, 34476.0],       [34477.0, 34478.0, 34479.0, 34480.0, 34481.0, 34482.0]],      [[34483.0, 34484.0, 34485.0, 34486.0, 34487.0, 34488.0],       [34489.0, 34490.0, 34491.0, 34492.0, 34493.0, 34494.0],       [34495.0, 34496.0, 34497.0, 34498.0, 34499.0, 34500.0],       [34501.0, 34502.0, 34503.0, 34504.0, 34505.0, 34506.0],       [34507.0, 34508.0, 34509.0, 34510.0, 34511.0, 34512.0],       [34513.0, 34514.0, 34515.0, 34516.0, 34517.0, 34518.0],       [34519.0, 34520.0, 34521.0, 34522.0, 34523.0, 34524.0]]],     [[[34525.0, 34526.0, 34527.0, 34528.0, 34529.0, 34530.0],       [34531.0, 34532.0, 34533.0, 34534.0, 34535.0, 34536.0],       [34537.0, 34538.0, 34539.0, 34540.0, 34541.0, 34542.0],       [34543.0, 34544.0, 34545.0, 34546.0, 34547.0, 34548.0],       [34549.0, 34550.0, 34551.0, 34552.0, 34553.0, 34554.0],       [34555.0, 34556.0, 34557.0, 34558.0, 34559.0, 34560.0],       [34561.0, 34562.0, 34563.0, 34564.0, 34565.0, 34566.0]],      [[34567.0, 34568.0, 34569.0, 34570.0, 34571.0, 34572.0],       [34573.0, 34574.0, 34575.0, 34576.0, 34577.0, 34578.0],       [34579.0, 34580.0, 34581.0, 34582.0, 34583.0, 34584.0],       [34585.0, 34586.0, 34587.0, 34588.0, 34589.0, 34590.0],       [34591.0, 34592.0, 34593.0, 34594.0, 34595.0, 34596.0],       [34597.0, 34598.0, 34599.0, 34600.0, 34601.0, 34602.0],       [34603.0, 34604.0, 34605.0, 34606.0, 34607.0, 34608.0]],      [[34609.0, 34610.0, 34611.0, 34612.0, 34613.0, 34614.0],       [34615.0, 34616.0, 34617.0, 34618.0, 34619.0, 34620.0],       [34621.0, 34622.0, 34623.0, 34624.0, 34625.0, 34626.0],       [34627.0, 34628.0, 34629.0, 34630.0, 34631.0, 34632.0],       [34633.0, 34634.0, 34635.0, 34636.0, 34637.0, 34638.0],       [34639.0, 34640.0, 34641.0, 34642.0, 34643.0, 34644.0],       [34645.0, 34646.0, 34647.0, 34648.0, 34649.0, 34650.0]],      [[34651.0, 34652.0, 34653.0, 34654.0, 34655.0, 34656.0],       [34657.0, 34658.0, 34659.0, 34660.0, 34661.0, 34662.0],       [34663.0, 34664.0, 34665.0, 34666.0, 34667.0, 34668.0],       [34669.0, 34670.0, 34671.0, 34672.0, 34673.0, 34674.0],       [34675.0, 34676.0, 34677.0, 34678.0, 34679.0, 34680.0],       [34681.0, 34682.0, 34683.0, 34684.0, 34685.0, 34686.0],       [34687.0, 34688.0, 34689.0, 34690.0, 34691.0, 34692.0]],      [[34693.0, 34694.0, 34695.0, 34696.0, 34697.0, 34698.0],       [34699.0, 34700.0, 34701.0, 34702.0, 34703.0, 34704.0],       [34705.0, 34706.0, 34707.0, 34708.0, 34709.0, 34710.0],       [34711.0, 34712.0, 34713.0, 34714.0, 34715.0, 34716.0],       [34717.0, 34718.0, 34719.0, 34720.0, 34721.0, 34722.0],       [34723.0, 34724.0, 34725.0, 34726.0, 34727.0, 34728.0],       [34729.0, 34730.0, 34731.0, 34732.0, 34733.0, 34734.0]],      [[34735.0, 34736.0, 34737.0, 34738.0, 34739.0, 34740.0],       [34741.0, 34742.0, 34743.0, 34744.0, 34745.0, 34746.0],       [34747.0, 34748.0, 34749.0, 34750.0, 34751.0, 34752.0],       [34753.0, 34754.0, 34755.0, 34756.0, 34757.0, 34758.0],       [34759.0, 34760.0, 34761.0, 34762.0, 34763.0, 34764.0],       [34765.0, 34766.0, 34767.0, 34768.0, 34769.0, 34770.0],       [34771.0, 34772.0, 34773.0, 34774.0, 34775.0, 34776.0]]],     [[[34777.0, 34778.0, 34779.0, 34780.0, 34781.0, 34782.0],       [34783.0, 34784.0, 34785.0, 34786.0, 34787.0, 34788.0],       [34789.0, 34790.0, 34791.0, 34792.0, 34793.0, 34794.0],       [34795.0, 34796.0, 34797.0, 34798.0, 34799.0, 34800.0],       [34801.0, 34802.0, 34803.0, 34804.0, 34805.0, 34806.0],       [34807.0, 34808.0, 34809.0, 34810.0, 34811.0, 34812.0],       [34813.0, 34814.0, 34815.0, 34816.0, 34817.0, 34818.0]],      [[34819.0, 34820.0, 34821.0, 34822.0, 34823.0, 34824.0],       [34825.0, 34826.0, 34827.0, 34828.0, 34829.0, 34830.0],       [34831.0, 34832.0, 34833.0, 34834.0, 34835.0, 34836.0],       [34837.0, 34838.0, 34839.0, 34840.0, 34841.0, 34842.0],       [34843.0, 34844.0, 34845.0, 34846.0, 34847.0, 34848.0],       [34849.0, 34850.0, 34851.0, 34852.0, 34853.0, 34854.0],       [34855.0, 34856.0, 34857.0, 34858.0, 34859.0, 34860.0]],      [[34861.0, 34862.0, 34863.0, 34864.0, 34865.0, 34866.0],       [34867.0, 34868.0, 34869.0, 34870.0, 34871.0, 34872.0],       [34873.0, 34874.0, 34875.0, 34876.0, 34877.0, 34878.0],       [34879.0, 34880.0, 34881.0, 34882.0, 34883.0, 34884.0],       [34885.0, 34886.0, 34887.0, 34888.0, 34889.0, 34890.0],       [34891.0, 34892.0, 34893.0, 34894.0, 34895.0, 34896.0],       [34897.0, 34898.0, 34899.0, 34900.0, 34901.0, 34902.0]],      [[34903.0, 34904.0, 34905.0, 34906.0, 34907.0, 34908.0],       [34909.0, 34910.0, 34911.0, 34912.0, 34913.0, 34914.0],       [34915.0, 34916.0, 34917.0, 34918.0, 34919.0, 34920.0],       [34921.0, 34922.0, 34923.0, 34924.0, 34925.0, 34926.0],       [34927.0, 34928.0, 34929.0, 34930.0, 34931.0, 34932.0],       [34933.0, 34934.0, 34935.0, 34936.0, 34937.0, 34938.0],       [34939.0, 34940.0, 34941.0, 34942.0, 34943.0, 34944.0]],      [[34945.0, 34946.0, 34947.0, 34948.0, 34949.0, 34950.0],       [34951.0, 34952.0, 34953.0, 34954.0, 34955.0, 34956.0],       [34957.0, 34958.0, 34959.0, 34960.0, 34961.0, 34962.0],       [34963.0, 34964.0, 34965.0, 34966.0, 34967.0, 34968.0],       [34969.0, 34970.0, 34971.0, 34972.0, 34973.0, 34974.0],       [34975.0, 34976.0, 34977.0, 34978.0, 34979.0, 34980.0],       [34981.0, 34982.0, 34983.0, 34984.0, 34985.0, 34986.0]],      [[34987.0, 34988.0, 34989.0, 34990.0, 34991.0, 34992.0],       [34993.0, 34994.0, 34995.0, 34996.0, 34997.0, 34998.0],       [34999.0, 35000.0, 35001.0, 35002.0, 35003.0, 35004.0],       [35005.0, 35006.0, 35007.0, 35008.0, 35009.0, 35010.0],       [35011.0, 35012.0, 35013.0, 35014.0, 35015.0, 35016.0],       [35017.0, 35018.0, 35019.0, 35020.0, 35021.0, 35022.0],       [35023.0, 35024.0, 35025.0, 35026.0, 35027.0, 35028.0]]],     [[[35029.0, 35030.0, 35031.0, 35032.0, 35033.0, 35034.0],       [35035.0, 35036.0, 35037.0, 35038.0, 35039.0, 35040.0],       [35041.0, 35042.0, 35043.0, 35044.0, 35045.0, 35046.0],       [35047.0, 35048.0, 35049.0, 35050.0, 35051.0, 35052.0],       [35053.0, 35054.0, 35055.0, 35056.0, 35057.0, 35058.0],       [35059.0, 35060.0, 35061.0, 35062.0, 35063.0, 35064.0],       [35065.0, 35066.0, 35067.0, 35068.0, 35069.0, 35070.0]],      [[35071.0, 35072.0, 35073.0, 35074.0, 35075.0, 35076.0],       [35077.0, 35078.0, 35079.0, 35080.0, 35081.0, 35082.0],       [35083.0, 35084.0, 35085.0, 35086.0, 35087.0, 35088.0],       [35089.0, 35090.0, 35091.0, 35092.0, 35093.0, 35094.0],       [35095.0, 35096.0, 35097.0, 35098.0, 35099.0, 35100.0],       [35101.0, 35102.0, 35103.0, 35104.0, 35105.0, 35106.0],       [35107.0, 35108.0, 35109.0, 35110.0, 35111.0, 35112.0]],      [[35113.0, 35114.0, 35115.0, 35116.0, 35117.0, 35118.0],       [35119.0, 35120.0, 35121.0, 35122.0, 35123.0, 35124.0],       [35125.0, 35126.0, 35127.0, 35128.0, 35129.0, 35130.0],       [35131.0, 35132.0, 35133.0, 35134.0, 35135.0, 35136.0],       [35137.0, 35138.0, 35139.0, 35140.0, 35141.0, 35142.0],       [35143.0, 35144.0, 35145.0, 35146.0, 35147.0, 35148.0],       [35149.0, 35150.0, 35151.0, 35152.0, 35153.0, 35154.0]],      [[35155.0, 35156.0, 35157.0, 35158.0, 35159.0, 35160.0],       [35161.0, 35162.0, 35163.0, 35164.0, 35165.0, 35166.0],       [35167.0, 35168.0, 35169.0, 35170.0, 35171.0, 35172.0],       [35173.0, 35174.0, 35175.0, 35176.0, 35177.0, 35178.0],       [35179.0, 35180.0, 35181.0, 35182.0, 35183.0, 35184.0],       [35185.0, 35186.0, 35187.0, 35188.0, 35189.0, 35190.0],       [35191.0, 35192.0, 35193.0, 35194.0, 35195.0, 35196.0]],      [[35197.0, 35198.0, 35199.0, 35200.0, 35201.0, 35202.0],       [35203.0, 35204.0, 35205.0, 35206.0, 35207.0, 35208.0],       [35209.0, 35210.0, 35211.0, 35212.0, 35213.0, 35214.0],       [35215.0, 35216.0, 35217.0, 35218.0, 35219.0, 35220.0],       [35221.0, 35222.0, 35223.0, 35224.0, 35225.0, 35226.0],       [35227.0, 35228.0, 35229.0, 35230.0, 35231.0, 35232.0],       [35233.0, 35234.0, 35235.0, 35236.0, 35237.0, 35238.0]],      [[35239.0, 35240.0, 35241.0, 35242.0, 35243.0, 35244.0],       [35245.0, 35246.0, 35247.0, 35248.0, 35249.0, 35250.0],       [35251.0, 35252.0, 35253.0, 35254.0, 35255.0, 35256.0],       [35257.0, 35258.0, 35259.0, 35260.0, 35261.0, 35262.0],       [35263.0, 35264.0, 35265.0, 35266.0, 35267.0, 35268.0],       [35269.0, 35270.0, 35271.0, 35272.0, 35273.0, 35274.0],       [35275.0, 35276.0, 35277.0, 35278.0, 35279.0, 35280.0]]]],    [[[[35281.0, 35282.0, 35283.0, 35284.0, 35285.0, 35286.0],       [35287.0, 35288.0, 35289.0, 35290.0, 35291.0, 35292.0],       [35293.0, 35294.0, 35295.0, 35296.0, 35297.0, 35298.0],       [35299.0, 35300.0, 35301.0, 35302.0, 35303.0, 35304.0],       [35305.0, 35306.0, 35307.0, 35308.0, 35309.0, 35310.0],       [35311.0, 35312.0, 35313.0, 35314.0, 35315.0, 35316.0],       [35317.0, 35318.0, 35319.0, 35320.0, 35321.0, 35322.0]],      [[35323.0, 35324.0, 35325.0, 35326.0, 35327.0, 35328.0],       [35329.0, 35330.0, 35331.0, 35332.0, 35333.0, 35334.0],       [35335.0, 35336.0, 35337.0, 35338.0, 35339.0, 35340.0],       [35341.0, 35342.0, 35343.0, 35344.0, 35345.0, 35346.0],       [35347.0, 35348.0, 35349.0, 35350.0, 35351.0, 35352.0],       [35353.0, 35354.0, 35355.0, 35356.0, 35357.0, 35358.0],       [35359.0, 35360.0, 35361.0, 35362.0, 35363.0, 35364.0]],      [[35365.0, 35366.0, 35367.0, 35368.0, 35369.0, 35370.0],       [35371.0, 35372.0, 35373.0, 35374.0, 35375.0, 35376.0],       [35377.0, 35378.0, 35379.0, 35380.0, 35381.0, 35382.0],       [35383.0, 35384.0, 35385.0, 35386.0, 35387.0, 35388.0],       [35389.0, 35390.0, 35391.0, 35392.0, 35393.0, 35394.0],       [35395.0, 35396.0, 35397.0, 35398.0, 35399.0, 35400.0],       [35401.0, 35402.0, 35403.0, 35404.0, 35405.0, 35406.0]],      [[35407.0, 35408.0, 35409.0, 35410.0, 35411.0, 35412.0],       [35413.0, 35414.0, 35415.0, 35416.0, 35417.0, 35418.0],       [35419.0, 35420.0, 35421.0, 35422.0, 35423.0, 35424.0],       [35425.0, 35426.0, 35427.0, 35428.0, 35429.0, 35430.0],       [35431.0, 35432.0, 35433.0, 35434.0, 35435.0, 35436.0],       [35437.0, 35438.0, 35439.0, 35440.0, 35441.0, 35442.0],       [35443.0, 35444.0, 35445.0, 35446.0, 35447.0, 35448.0]],      [[35449.0, 35450.0, 35451.0, 35452.0, 35453.0, 35454.0],       [35455.0, 35456.0, 35457.0, 35458.0, 35459.0, 35460.0],       [35461.0, 35462.0, 35463.0, 35464.0, 35465.0, 35466.0],       [35467.0, 35468.0, 35469.0, 35470.0, 35471.0, 35472.0],       [35473.0, 35474.0, 35475.0, 35476.0, 35477.0, 35478.0],       [35479.0, 35480.0, 35481.0, 35482.0, 35483.0, 35484.0],       [35485.0, 35486.0, 35487.0, 35488.0, 35489.0, 35490.0]],      [[35491.0, 35492.0, 35493.0, 35494.0, 35495.0, 35496.0],       [35497.0, 35498.0, 35499.0, 35500.0, 35501.0, 35502.0],       [35503.0, 35504.0, 35505.0, 35506.0, 35507.0, 35508.0],       [35509.0, 35510.0, 35511.0, 35512.0, 35513.0, 35514.0],       [35515.0, 35516.0, 35517.0, 35518.0, 35519.0, 35520.0],       [35521.0, 35522.0, 35523.0, 35524.0, 35525.0, 35526.0],       [35527.0, 35528.0, 35529.0, 35530.0, 35531.0, 35532.0]]],     [[[35533.0, 35534.0, 35535.0, 35536.0, 35537.0, 35538.0],       [35539.0, 35540.0, 35541.0, 35542.0, 35543.0, 35544.0],       [35545.0, 35546.0, 35547.0, 35548.0, 35549.0, 35550.0],       [35551.0, 35552.0, 35553.0, 35554.0, 35555.0, 35556.0],       [35557.0, 35558.0, 35559.0, 35560.0, 35561.0, 35562.0],       [35563.0, 35564.0, 35565.0, 35566.0, 35567.0, 35568.0],       [35569.0, 35570.0, 35571.0, 35572.0, 35573.0, 35574.0]],      [[35575.0, 35576.0, 35577.0, 35578.0, 35579.0, 35580.0],       [35581.0, 35582.0, 35583.0, 35584.0, 35585.0, 35586.0],       [35587.0, 35588.0, 35589.0, 35590.0, 35591.0, 35592.0],       [35593.0, 35594.0, 35595.0, 35596.0, 35597.0, 35598.0],       [35599.0, 35600.0, 35601.0, 35602.0, 35603.0, 35604.0],       [35605.0, 35606.0, 35607.0, 35608.0, 35609.0, 35610.0],       [35611.0, 35612.0, 35613.0, 35614.0, 35615.0, 35616.0]],      [[35617.0, 35618.0, 35619.0, 35620.0, 35621.0, 35622.0],       [35623.0, 35624.0, 35625.0, 35626.0, 35627.0, 35628.0],       [35629.0, 35630.0, 35631.0, 35632.0, 35633.0, 35634.0],       [35635.0, 35636.0, 35637.0, 35638.0, 35639.0, 35640.0],       [35641.0, 35642.0, 35643.0, 35644.0, 35645.0, 35646.0],       [35647.0, 35648.0, 35649.0, 35650.0, 35651.0, 35652.0],       [35653.0, 35654.0, 35655.0, 35656.0, 35657.0, 35658.0]],      [[35659.0, 35660.0, 35661.0, 35662.0, 35663.0, 35664.0],       [35665.0, 35666.0, 35667.0, 35668.0, 35669.0, 35670.0],       [35671.0, 35672.0, 35673.0, 35674.0, 35675.0, 35676.0],       [35677.0, 35678.0, 35679.0, 35680.0, 35681.0, 35682.0],       [35683.0, 35684.0, 35685.0, 35686.0, 35687.0, 35688.0],       [35689.0, 35690.0, 35691.0, 35692.0, 35693.0, 35694.0],       [35695.0, 35696.0, 35697.0, 35698.0, 35699.0, 35700.0]],      [[35701.0, 35702.0, 35703.0, 35704.0, 35705.0, 35706.0],       [35707.0, 35708.0, 35709.0, 35710.0, 35711.0, 35712.0],       [35713.0, 35714.0, 35715.0, 35716.0, 35717.0, 35718.0],       [35719.0, 35720.0, 35721.0, 35722.0, 35723.0, 35724.0],       [35725.0, 35726.0, 35727.0, 35728.0, 35729.0, 35730.0],       [35731.0, 35732.0, 35733.0, 35734.0, 35735.0, 35736.0],       [35737.0, 35738.0, 35739.0, 35740.0, 35741.0, 35742.0]],      [[35743.0, 35744.0, 35745.0, 35746.0, 35747.0, 35748.0],       [35749.0, 35750.0, 35751.0, 35752.0, 35753.0, 35754.0],       [35755.0, 35756.0, 35757.0, 35758.0, 35759.0, 35760.0],       [35761.0, 35762.0, 35763.0, 35764.0, 35765.0, 35766.0],       [35767.0, 35768.0, 35769.0, 35770.0, 35771.0, 35772.0],       [35773.0, 35774.0, 35775.0, 35776.0, 35777.0, 35778.0],       [35779.0, 35780.0, 35781.0, 35782.0, 35783.0, 35784.0]]],     [[[35785.0, 35786.0, 35787.0, 35788.0, 35789.0, 35790.0],       [35791.0, 35792.0, 35793.0, 35794.0, 35795.0, 35796.0],       [35797.0, 35798.0, 35799.0, 35800.0, 35801.0, 35802.0],       [35803.0, 35804.0, 35805.0, 35806.0, 35807.0, 35808.0],       [35809.0, 35810.0, 35811.0, 35812.0, 35813.0, 35814.0],       [35815.0, 35816.0, 35817.0, 35818.0, 35819.0, 35820.0],       [35821.0, 35822.0, 35823.0, 35824.0, 35825.0, 35826.0]],      [[35827.0, 35828.0, 35829.0, 35830.0, 35831.0, 35832.0],       [35833.0, 35834.0, 35835.0, 35836.0, 35837.0, 35838.0],       [35839.0, 35840.0, 35841.0, 35842.0, 35843.0, 35844.0],       [35845.0, 35846.0, 35847.0, 35848.0, 35849.0, 35850.0],       [35851.0, 35852.0, 35853.0, 35854.0, 35855.0, 35856.0],       [35857.0, 35858.0, 35859.0, 35860.0, 35861.0, 35862.0],       [35863.0, 35864.0, 35865.0, 35866.0, 35867.0, 35868.0]],      [[35869.0, 35870.0, 35871.0, 35872.0, 35873.0, 35874.0],       [35875.0, 35876.0, 35877.0, 35878.0, 35879.0, 35880.0],       [35881.0, 35882.0, 35883.0, 35884.0, 35885.0, 35886.0],       [35887.0, 35888.0, 35889.0, 35890.0, 35891.0, 35892.0],       [35893.0, 35894.0, 35895.0, 35896.0, 35897.0, 35898.0],       [35899.0, 35900.0, 35901.0, 35902.0, 35903.0, 35904.0],       [35905.0, 35906.0, 35907.0, 35908.0, 35909.0, 35910.0]],      [[35911.0, 35912.0, 35913.0, 35914.0, 35915.0, 35916.0],       [35917.0, 35918.0, 35919.0, 35920.0, 35921.0, 35922.0],       [35923.0, 35924.0, 35925.0, 35926.0, 35927.0, 35928.0],       [35929.0, 35930.0, 35931.0, 35932.0, 35933.0, 35934.0],       [35935.0, 35936.0, 35937.0, 35938.0, 35939.0, 35940.0],       [35941.0, 35942.0, 35943.0, 35944.0, 35945.0, 35946.0],       [35947.0, 35948.0, 35949.0, 35950.0, 35951.0, 35952.0]],      [[35953.0, 35954.0, 35955.0, 35956.0, 35957.0, 35958.0],       [35959.0, 35960.0, 35961.0, 35962.0, 35963.0, 35964.0],       [35965.0, 35966.0, 35967.0, 35968.0, 35969.0, 35970.0],       [35971.0, 35972.0, 35973.0, 35974.0, 35975.0, 35976.0],       [35977.0, 35978.0, 35979.0, 35980.0, 35981.0, 35982.0],       [35983.0, 35984.0, 35985.0, 35986.0, 35987.0, 35988.0],       [35989.0, 35990.0, 35991.0, 35992.0, 35993.0, 35994.0]],      [[35995.0, 35996.0, 35997.0, 35998.0, 35999.0, 36000.0],       [36001.0, 36002.0, 36003.0, 36004.0, 36005.0, 36006.0],       [36007.0, 36008.0, 36009.0, 36010.0, 36011.0, 36012.0],       [36013.0, 36014.0, 36015.0, 36016.0, 36017.0, 36018.0],       [36019.0, 36020.0, 36021.0, 36022.0, 36023.0, 36024.0],       [36025.0, 36026.0, 36027.0, 36028.0, 36029.0, 36030.0],       [36031.0, 36032.0, 36033.0, 36034.0, 36035.0, 36036.0]]],     [[[36037.0, 36038.0, 36039.0, 36040.0, 36041.0, 36042.0],       [36043.0, 36044.0, 36045.0, 36046.0, 36047.0, 36048.0],       [36049.0, 36050.0, 36051.0, 36052.0, 36053.0, 36054.0],       [36055.0, 36056.0, 36057.0, 36058.0, 36059.0, 36060.0],       [36061.0, 36062.0, 36063.0, 36064.0, 36065.0, 36066.0],       [36067.0, 36068.0, 36069.0, 36070.0, 36071.0, 36072.0],       [36073.0, 36074.0, 36075.0, 36076.0, 36077.0, 36078.0]],      [[36079.0, 36080.0, 36081.0, 36082.0, 36083.0, 36084.0],       [36085.0, 36086.0, 36087.0, 36088.0, 36089.0, 36090.0],       [36091.0, 36092.0, 36093.0, 36094.0, 36095.0, 36096.0],       [36097.0, 36098.0, 36099.0, 36100.0, 36101.0, 36102.0],       [36103.0, 36104.0, 36105.0, 36106.0, 36107.0, 36108.0],       [36109.0, 36110.0, 36111.0, 36112.0, 36113.0, 36114.0],       [36115.0, 36116.0, 36117.0, 36118.0, 36119.0, 36120.0]],      [[36121.0, 36122.0, 36123.0, 36124.0, 36125.0, 36126.0],       [36127.0, 36128.0, 36129.0, 36130.0, 36131.0, 36132.0],       [36133.0, 36134.0, 36135.0, 36136.0, 36137.0, 36138.0],       [36139.0, 36140.0, 36141.0, 36142.0, 36143.0, 36144.0],       [36145.0, 36146.0, 36147.0, 36148.0, 36149.0, 36150.0],       [36151.0, 36152.0, 36153.0, 36154.0, 36155.0, 36156.0],       [36157.0, 36158.0, 36159.0, 36160.0, 36161.0, 36162.0]],      [[36163.0, 36164.0, 36165.0, 36166.0, 36167.0, 36168.0],       [36169.0, 36170.0, 36171.0, 36172.0, 36173.0, 36174.0],       [36175.0, 36176.0, 36177.0, 36178.0, 36179.0, 36180.0],       [36181.0, 36182.0, 36183.0, 36184.0, 36185.0, 36186.0],       [36187.0, 36188.0, 36189.0, 36190.0, 36191.0, 36192.0],       [36193.0, 36194.0, 36195.0, 36196.0, 36197.0, 36198.0],       [36199.0, 36200.0, 36201.0, 36202.0, 36203.0, 36204.0]],      [[36205.0, 36206.0, 36207.0, 36208.0, 36209.0, 36210.0],       [36211.0, 36212.0, 36213.0, 36214.0, 36215.0, 36216.0],       [36217.0, 36218.0, 36219.0, 36220.0, 36221.0, 36222.0],       [36223.0, 36224.0, 36225.0, 36226.0, 36227.0, 36228.0],       [36229.0, 36230.0, 36231.0, 36232.0, 36233.0, 36234.0],       [36235.0, 36236.0, 36237.0, 36238.0, 36239.0, 36240.0],       [36241.0, 36242.0, 36243.0, 36244.0, 36245.0, 36246.0]],      [[36247.0, 36248.0, 36249.0, 36250.0, 36251.0, 36252.0],       [36253.0, 36254.0, 36255.0, 36256.0, 36257.0, 36258.0],       [36259.0, 36260.0, 36261.0, 36262.0, 36263.0, 36264.0],       [36265.0, 36266.0, 36267.0, 36268.0, 36269.0, 36270.0],       [36271.0, 36272.0, 36273.0, 36274.0, 36275.0, 36276.0],       [36277.0, 36278.0, 36279.0, 36280.0, 36281.0, 36282.0],       [36283.0, 36284.0, 36285.0, 36286.0, 36287.0, 36288.0]]]]]],  [[[[[[36289.0, 36290.0, 36291.0, 36292.0, 36293.0, 36294.0],       [36295.0, 36296.0, 36297.0, 36298.0, 36299.0, 36300.0],       [36301.0, 36302.0, 36303.0, 36304.0, 36305.0, 36306.0],       [36307.0, 36308.0, 36309.0, 36310.0, 36311.0, 36312.0],       [36313.0, 36314.0, 36315.0, 36316.0, 36317.0, 36318.0],       [36319.0, 36320.0, 36321.0, 36322.0, 36323.0, 36324.0],       [36325.0, 36326.0, 36327.0, 36328.0, 36329.0, 36330.0]],      [[36331.0, 36332.0, 36333.0, 36334.0, 36335.0, 36336.0],       [36337.0, 36338.0, 36339.0, 36340.0, 36341.0, 36342.0],       [36343.0, 36344.0, 36345.0, 36346.0, 36347.0, 36348.0],       [36349.0, 36350.0, 36351.0, 36352.0, 36353.0, 36354.0],       [36355.0, 36356.0, 36357.0, 36358.0, 36359.0, 36360.0],       [36361.0, 36362.0, 36363.0, 36364.0, 36365.0, 36366.0],       [36367.0, 36368.0, 36369.0, 36370.0, 36371.0, 36372.0]],      [[36373.0, 36374.0, 36375.0, 36376.0, 36377.0, 36378.0],       [36379.0, 36380.0, 36381.0, 36382.0, 36383.0, 36384.0],       [36385.0, 36386.0, 36387.0, 36388.0, 36389.0, 36390.0],       [36391.0, 36392.0, 36393.0, 36394.0, 36395.0, 36396.0],       [36397.0, 36398.0, 36399.0, 36400.0, 36401.0, 36402.0],       [36403.0, 36404.0, 36405.0, 36406.0, 36407.0, 36408.0],       [36409.0, 36410.0, 36411.0, 36412.0, 36413.0, 36414.0]],      [[36415.0, 36416.0, 36417.0, 36418.0, 36419.0, 36420.0],       [36421.0, 36422.0, 36423.0, 36424.0, 36425.0, 36426.0],       [36427.0, 36428.0, 36429.0, 36430.0, 36431.0, 36432.0],       [36433.0, 36434.0, 36435.0, 36436.0, 36437.0, 36438.0],       [36439.0, 36440.0, 36441.0, 36442.0, 36443.0, 36444.0],       [36445.0, 36446.0, 36447.0, 36448.0, 36449.0, 36450.0],       [36451.0, 36452.0, 36453.0, 36454.0, 36455.0, 36456.0]],      [[36457.0, 36458.0, 36459.0, 36460.0, 36461.0, 36462.0],       [36463.0, 36464.0, 36465.0, 36466.0, 36467.0, 36468.0],       [36469.0, 36470.0, 36471.0, 36472.0, 36473.0, 36474.0],       [36475.0, 36476.0, 36477.0, 36478.0, 36479.0, 36480.0],       [36481.0, 36482.0, 36483.0, 36484.0, 36485.0, 36486.0],       [36487.0, 36488.0, 36489.0, 36490.0, 36491.0, 36492.0],       [36493.0, 36494.0, 36495.0, 36496.0, 36497.0, 36498.0]],      [[36499.0, 36500.0, 36501.0, 36502.0, 36503.0, 36504.0],       [36505.0, 36506.0, 36507.0, 36508.0, 36509.0, 36510.0],       [36511.0, 36512.0, 36513.0, 36514.0, 36515.0, 36516.0],       [36517.0, 36518.0, 36519.0, 36520.0, 36521.0, 36522.0],       [36523.0, 36524.0, 36525.0, 36526.0, 36527.0, 36528.0],       [36529.0, 36530.0, 36531.0, 36532.0, 36533.0, 36534.0],       [36535.0, 36536.0, 36537.0, 36538.0, 36539.0, 36540.0]]],     [[[36541.0, 36542.0, 36543.0, 36544.0, 36545.0, 36546.0],       [36547.0, 36548.0, 36549.0, 36550.0, 36551.0, 36552.0],       [36553.0, 36554.0, 36555.0, 36556.0, 36557.0, 36558.0],       [36559.0, 36560.0, 36561.0, 36562.0, 36563.0, 36564.0],       [36565.0, 36566.0, 36567.0, 36568.0, 36569.0, 36570.0],       [36571.0, 36572.0, 36573.0, 36574.0, 36575.0, 36576.0],       [36577.0, 36578.0, 36579.0, 36580.0, 36581.0, 36582.0]],      [[36583.0, 36584.0, 36585.0, 36586.0, 36587.0, 36588.0],       [36589.0, 36590.0, 36591.0, 36592.0, 36593.0, 36594.0],       [36595.0, 36596.0, 36597.0, 36598.0, 36599.0, 36600.0],       [36601.0, 36602.0, 36603.0, 36604.0, 36605.0, 36606.0],       [36607.0, 36608.0, 36609.0, 36610.0, 36611.0, 36612.0],       [36613.0, 36614.0, 36615.0, 36616.0, 36617.0, 36618.0],       [36619.0, 36620.0, 36621.0, 36622.0, 36623.0, 36624.0]],      [[36625.0, 36626.0, 36627.0, 36628.0, 36629.0, 36630.0],       [36631.0, 36632.0, 36633.0, 36634.0, 36635.0, 36636.0],       [36637.0, 36638.0, 36639.0, 36640.0, 36641.0, 36642.0],       [36643.0, 36644.0, 36645.0, 36646.0, 36647.0, 36648.0],       [36649.0, 36650.0, 36651.0, 36652.0, 36653.0, 36654.0],       [36655.0, 36656.0, 36657.0, 36658.0, 36659.0, 36660.0],       [36661.0, 36662.0, 36663.0, 36664.0, 36665.0, 36666.0]],      [[36667.0, 36668.0, 36669.0, 36670.0, 36671.0, 36672.0],       [36673.0, 36674.0, 36675.0, 36676.0, 36677.0, 36678.0],       [36679.0, 36680.0, 36681.0, 36682.0, 36683.0, 36684.0],       [36685.0, 36686.0, 36687.0, 36688.0, 36689.0, 36690.0],       [36691.0, 36692.0, 36693.0, 36694.0, 36695.0, 36696.0],       [36697.0, 36698.0, 36699.0, 36700.0, 36701.0, 36702.0],       [36703.0, 36704.0, 36705.0, 36706.0, 36707.0, 36708.0]],      [[36709.0, 36710.0, 36711.0, 36712.0, 36713.0, 36714.0],       [36715.0, 36716.0, 36717.0, 36718.0, 36719.0, 36720.0],       [36721.0, 36722.0, 36723.0, 36724.0, 36725.0, 36726.0],       [36727.0, 36728.0, 36729.0, 36730.0, 36731.0, 36732.0],       [36733.0, 36734.0, 36735.0, 36736.0, 36737.0, 36738.0],       [36739.0, 36740.0, 36741.0, 36742.0, 36743.0, 36744.0],       [36745.0, 36746.0, 36747.0, 36748.0, 36749.0, 36750.0]],      [[36751.0, 36752.0, 36753.0, 36754.0, 36755.0, 36756.0],       [36757.0, 36758.0, 36759.0, 36760.0, 36761.0, 36762.0],       [36763.0, 36764.0, 36765.0, 36766.0, 36767.0, 36768.0],       [36769.0, 36770.0, 36771.0, 36772.0, 36773.0, 36774.0],       [36775.0, 36776.0, 36777.0, 36778.0, 36779.0, 36780.0],       [36781.0, 36782.0, 36783.0, 36784.0, 36785.0, 36786.0],       [36787.0, 36788.0, 36789.0, 36790.0, 36791.0, 36792.0]]],     [[[36793.0, 36794.0, 36795.0, 36796.0, 36797.0, 36798.0],       [36799.0, 36800.0, 36801.0, 36802.0, 36803.0, 36804.0],       [36805.0, 36806.0, 36807.0, 36808.0, 36809.0, 36810.0],       [36811.0, 36812.0, 36813.0, 36814.0, 36815.0, 36816.0],       [36817.0, 36818.0, 36819.0, 36820.0, 36821.0, 36822.0],       [36823.0, 36824.0, 36825.0, 36826.0, 36827.0, 36828.0],       [36829.0, 36830.0, 36831.0, 36832.0, 36833.0, 36834.0]],      [[36835.0, 36836.0, 36837.0, 36838.0, 36839.0, 36840.0],       [36841.0, 36842.0, 36843.0, 36844.0, 36845.0, 36846.0],       [36847.0, 36848.0, 36849.0, 36850.0, 36851.0, 36852.0],       [36853.0, 36854.0, 36855.0, 36856.0, 36857.0, 36858.0],       [36859.0, 36860.0, 36861.0, 36862.0, 36863.0, 36864.0],       [36865.0, 36866.0, 36867.0, 36868.0, 36869.0, 36870.0],       [36871.0, 36872.0, 36873.0, 36874.0, 36875.0, 36876.0]],      [[36877.0, 36878.0, 36879.0, 36880.0, 36881.0, 36882.0],       [36883.0, 36884.0, 36885.0, 36886.0, 36887.0, 36888.0],       [36889.0, 36890.0, 36891.0, 36892.0, 36893.0, 36894.0],       [36895.0, 36896.0, 36897.0, 36898.0, 36899.0, 36900.0],       [36901.0, 36902.0, 36903.0, 36904.0, 36905.0, 36906.0],       [36907.0, 36908.0, 36909.0, 36910.0, 36911.0, 36912.0],       [36913.0, 36914.0, 36915.0, 36916.0, 36917.0, 36918.0]],      [[36919.0, 36920.0, 36921.0, 36922.0, 36923.0, 36924.0],       [36925.0, 36926.0, 36927.0, 36928.0, 36929.0, 36930.0],       [36931.0, 36932.0, 36933.0, 36934.0, 36935.0, 36936.0],       [36937.0, 36938.0, 36939.0, 36940.0, 36941.0, 36942.0],       [36943.0, 36944.0, 36945.0, 36946.0, 36947.0, 36948.0],       [36949.0, 36950.0, 36951.0, 36952.0, 36953.0, 36954.0],       [36955.0, 36956.0, 36957.0, 36958.0, 36959.0, 36960.0]],      [[36961.0, 36962.0, 36963.0, 36964.0, 36965.0, 36966.0],       [36967.0, 36968.0, 36969.0, 36970.0, 36971.0, 36972.0],       [36973.0, 36974.0, 36975.0, 36976.0, 36977.0, 36978.0],       [36979.0, 36980.0, 36981.0, 36982.0, 36983.0, 36984.0],       [36985.0, 36986.0, 36987.0, 36988.0, 36989.0, 36990.0],       [36991.0, 36992.0, 36993.0, 36994.0, 36995.0, 36996.0],       [36997.0, 36998.0, 36999.0, 37000.0, 37001.0, 37002.0]],      [[37003.0, 37004.0, 37005.0, 37006.0, 37007.0, 37008.0],       [37009.0, 37010.0, 37011.0, 37012.0, 37013.0, 37014.0],       [37015.0, 37016.0, 37017.0, 37018.0, 37019.0, 37020.0],       [37021.0, 37022.0, 37023.0, 37024.0, 37025.0, 37026.0],       [37027.0, 37028.0, 37029.0, 37030.0, 37031.0, 37032.0],       [37033.0, 37034.0, 37035.0, 37036.0, 37037.0, 37038.0],       [37039.0, 37040.0, 37041.0, 37042.0, 37043.0, 37044.0]]],     [[[37045.0, 37046.0, 37047.0, 37048.0, 37049.0, 37050.0],       [37051.0, 37052.0, 37053.0, 37054.0, 37055.0, 37056.0],       [37057.0, 37058.0, 37059.0, 37060.0, 37061.0, 37062.0],       [37063.0, 37064.0, 37065.0, 37066.0, 37067.0, 37068.0],       [37069.0, 37070.0, 37071.0, 37072.0, 37073.0, 37074.0],       [37075.0, 37076.0, 37077.0, 37078.0, 37079.0, 37080.0],       [37081.0, 37082.0, 37083.0, 37084.0, 37085.0, 37086.0]],      [[37087.0, 37088.0, 37089.0, 37090.0, 37091.0, 37092.0],       [37093.0, 37094.0, 37095.0, 37096.0, 37097.0, 37098.0],       [37099.0, 37100.0, 37101.0, 37102.0, 37103.0, 37104.0],       [37105.0, 37106.0, 37107.0, 37108.0, 37109.0, 37110.0],       [37111.0, 37112.0, 37113.0, 37114.0, 37115.0, 37116.0],       [37117.0, 37118.0, 37119.0, 37120.0, 37121.0, 37122.0],       [37123.0, 37124.0, 37125.0, 37126.0, 37127.0, 37128.0]],      [[37129.0, 37130.0, 37131.0, 37132.0, 37133.0, 37134.0],       [37135.0, 37136.0, 37137.0, 37138.0, 37139.0, 37140.0],       [37141.0, 37142.0, 37143.0, 37144.0, 37145.0, 37146.0],       [37147.0, 37148.0, 37149.0, 37150.0, 37151.0, 37152.0],       [37153.0, 37154.0, 37155.0, 37156.0, 37157.0, 37158.0],       [37159.0, 37160.0, 37161.0, 37162.0, 37163.0, 37164.0],       [37165.0, 37166.0, 37167.0, 37168.0, 37169.0, 37170.0]],      [[37171.0, 37172.0, 37173.0, 37174.0, 37175.0, 37176.0],       [37177.0, 37178.0, 37179.0, 37180.0, 37181.0, 37182.0],       [37183.0, 37184.0, 37185.0, 37186.0, 37187.0, 37188.0],       [37189.0, 37190.0, 37191.0, 37192.0, 37193.0, 37194.0],       [37195.0, 37196.0, 37197.0, 37198.0, 37199.0, 37200.0],       [37201.0, 37202.0, 37203.0, 37204.0, 37205.0, 37206.0],       [37207.0, 37208.0, 37209.0, 37210.0, 37211.0, 37212.0]],      [[37213.0, 37214.0, 37215.0, 37216.0, 37217.0, 37218.0],       [37219.0, 37220.0, 37221.0, 37222.0, 37223.0, 37224.0],       [37225.0, 37226.0, 37227.0, 37228.0, 37229.0, 37230.0],       [37231.0, 37232.0, 37233.0, 37234.0, 37235.0, 37236.0],       [37237.0, 37238.0, 37239.0, 37240.0, 37241.0, 37242.0],       [37243.0, 37244.0, 37245.0, 37246.0, 37247.0, 37248.0],       [37249.0, 37250.0, 37251.0, 37252.0, 37253.0, 37254.0]],      [[37255.0, 37256.0, 37257.0, 37258.0, 37259.0, 37260.0],       [37261.0, 37262.0, 37263.0, 37264.0, 37265.0, 37266.0],       [37267.0, 37268.0, 37269.0, 37270.0, 37271.0, 37272.0],       [37273.0, 37274.0, 37275.0, 37276.0, 37277.0, 37278.0],       [37279.0, 37280.0, 37281.0, 37282.0, 37283.0, 37284.0],       [37285.0, 37286.0, 37287.0, 37288.0, 37289.0, 37290.0],       [37291.0, 37292.0, 37293.0, 37294.0, 37295.0, 37296.0]]]],    [[[[37297.0, 37298.0, 37299.0, 37300.0, 37301.0, 37302.0],       [37303.0, 37304.0, 37305.0, 37306.0, 37307.0, 37308.0],       [37309.0, 37310.0, 37311.0, 37312.0, 37313.0, 37314.0],       [37315.0, 37316.0, 37317.0, 37318.0, 37319.0, 37320.0],       [37321.0, 37322.0, 37323.0, 37324.0, 37325.0, 37326.0],       [37327.0, 37328.0, 37329.0, 37330.0, 37331.0, 37332.0],       [37333.0, 37334.0, 37335.0, 37336.0, 37337.0, 37338.0]],      [[37339.0, 37340.0, 37341.0, 37342.0, 37343.0, 37344.0],       [37345.0, 37346.0, 37347.0, 37348.0, 37349.0, 37350.0],       [37351.0, 37352.0, 37353.0, 37354.0, 37355.0, 37356.0],       [37357.0, 37358.0, 37359.0, 37360.0, 37361.0, 37362.0],       [37363.0, 37364.0, 37365.0, 37366.0, 37367.0, 37368.0],       [37369.0, 37370.0, 37371.0, 37372.0, 37373.0, 37374.0],       [37375.0, 37376.0, 37377.0, 37378.0, 37379.0, 37380.0]],      [[37381.0, 37382.0, 37383.0, 37384.0, 37385.0, 37386.0],       [37387.0, 37388.0, 37389.0, 37390.0, 37391.0, 37392.0],       [37393.0, 37394.0, 37395.0, 37396.0, 37397.0, 37398.0],       [37399.0, 37400.0, 37401.0, 37402.0, 37403.0, 37404.0],       [37405.0, 37406.0, 37407.0, 37408.0, 37409.0, 37410.0],       [37411.0, 37412.0, 37413.0, 37414.0, 37415.0, 37416.0],       [37417.0, 37418.0, 37419.0, 37420.0, 37421.0, 37422.0]],      [[37423.0, 37424.0, 37425.0, 37426.0, 37427.0, 37428.0],       [37429.0, 37430.0, 37431.0, 37432.0, 37433.0, 37434.0],       [37435.0, 37436.0, 37437.0, 37438.0, 37439.0, 37440.0],       [37441.0, 37442.0, 37443.0, 37444.0, 37445.0, 37446.0],       [37447.0, 37448.0, 37449.0, 37450.0, 37451.0, 37452.0],       [37453.0, 37454.0, 37455.0, 37456.0, 37457.0, 37458.0],       [37459.0, 37460.0, 37461.0, 37462.0, 37463.0, 37464.0]],      [[37465.0, 37466.0, 37467.0, 37468.0, 37469.0, 37470.0],       [37471.0, 37472.0, 37473.0, 37474.0, 37475.0, 37476.0],       [37477.0, 37478.0, 37479.0, 37480.0, 37481.0, 37482.0],       [37483.0, 37484.0, 37485.0, 37486.0, 37487.0, 37488.0],       [37489.0, 37490.0, 37491.0, 37492.0, 37493.0, 37494.0],       [37495.0, 37496.0, 37497.0, 37498.0, 37499.0, 37500.0],       [37501.0, 37502.0, 37503.0, 37504.0, 37505.0, 37506.0]],      [[37507.0, 37508.0, 37509.0, 37510.0, 37511.0, 37512.0],       [37513.0, 37514.0, 37515.0, 37516.0, 37517.0, 37518.0],       [37519.0, 37520.0, 37521.0, 37522.0, 37523.0, 37524.0],       [37525.0, 37526.0, 37527.0, 37528.0, 37529.0, 37530.0],       [37531.0, 37532.0, 37533.0, 37534.0, 37535.0, 37536.0],       [37537.0, 37538.0, 37539.0, 37540.0, 37541.0, 37542.0],       [37543.0, 37544.0, 37545.0, 37546.0, 37547.0, 37548.0]]],     [[[37549.0, 37550.0, 37551.0, 37552.0, 37553.0, 37554.0],       [37555.0, 37556.0, 37557.0, 37558.0, 37559.0, 37560.0],       [37561.0, 37562.0, 37563.0, 37564.0, 37565.0, 37566.0],       [37567.0, 37568.0, 37569.0, 37570.0, 37571.0, 37572.0],       [37573.0, 37574.0, 37575.0, 37576.0, 37577.0, 37578.0],       [37579.0, 37580.0, 37581.0, 37582.0, 37583.0, 37584.0],       [37585.0, 37586.0, 37587.0, 37588.0, 37589.0, 37590.0]],      [[37591.0, 37592.0, 37593.0, 37594.0, 37595.0, 37596.0],       [37597.0, 37598.0, 37599.0, 37600.0, 37601.0, 37602.0],       [37603.0, 37604.0, 37605.0, 37606.0, 37607.0, 37608.0],       [37609.0, 37610.0, 37611.0, 37612.0, 37613.0, 37614.0],       [37615.0, 37616.0, 37617.0, 37618.0, 37619.0, 37620.0],       [37621.0, 37622.0, 37623.0, 37624.0, 37625.0, 37626.0],       [37627.0, 37628.0, 37629.0, 37630.0, 37631.0, 37632.0]],      [[37633.0, 37634.0, 37635.0, 37636.0, 37637.0, 37638.0],       [37639.0, 37640.0, 37641.0, 37642.0, 37643.0, 37644.0],       [37645.0, 37646.0, 37647.0, 37648.0, 37649.0, 37650.0],       [37651.0, 37652.0, 37653.0, 37654.0, 37655.0, 37656.0],       [37657.0, 37658.0, 37659.0, 37660.0, 37661.0, 37662.0],       [37663.0, 37664.0, 37665.0, 37666.0, 37667.0, 37668.0],       [37669.0, 37670.0, 37671.0, 37672.0, 37673.0, 37674.0]],      [[37675.0, 37676.0, 37677.0, 37678.0, 37679.0, 37680.0],       [37681.0, 37682.0, 37683.0, 37684.0, 37685.0, 37686.0],       [37687.0, 37688.0, 37689.0, 37690.0, 37691.0, 37692.0],       [37693.0, 37694.0, 37695.0, 37696.0, 37697.0, 37698.0],       [37699.0, 37700.0, 37701.0, 37702.0, 37703.0, 37704.0],       [37705.0, 37706.0, 37707.0, 37708.0, 37709.0, 37710.0],       [37711.0, 37712.0, 37713.0, 37714.0, 37715.0, 37716.0]],      [[37717.0, 37718.0, 37719.0, 37720.0, 37721.0, 37722.0],       [37723.0, 37724.0, 37725.0, 37726.0, 37727.0, 37728.0],       [37729.0, 37730.0, 37731.0, 37732.0, 37733.0, 37734.0],       [37735.0, 37736.0, 37737.0, 37738.0, 37739.0, 37740.0],       [37741.0, 37742.0, 37743.0, 37744.0, 37745.0, 37746.0],       [37747.0, 37748.0, 37749.0, 37750.0, 37751.0, 37752.0],       [37753.0, 37754.0, 37755.0, 37756.0, 37757.0, 37758.0]],      [[37759.0, 37760.0, 37761.0, 37762.0, 37763.0, 37764.0],       [37765.0, 37766.0, 37767.0, 37768.0, 37769.0, 37770.0],       [37771.0, 37772.0, 37773.0, 37774.0, 37775.0, 37776.0],       [37777.0, 37778.0, 37779.0, 37780.0, 37781.0, 37782.0],       [37783.0, 37784.0, 37785.0, 37786.0, 37787.0, 37788.0],       [37789.0, 37790.0, 37791.0, 37792.0, 37793.0, 37794.0],       [37795.0, 37796.0, 37797.0, 37798.0, 37799.0, 37800.0]]],     [[[37801.0, 37802.0, 37803.0, 37804.0, 37805.0, 37806.0],       [37807.0, 37808.0, 37809.0, 37810.0, 37811.0, 37812.0],       [37813.0, 37814.0, 37815.0, 37816.0, 37817.0, 37818.0],       [37819.0, 37820.0, 37821.0, 37822.0, 37823.0, 37824.0],       [37825.0, 37826.0, 37827.0, 37828.0, 37829.0, 37830.0],       [37831.0, 37832.0, 37833.0, 37834.0, 37835.0, 37836.0],       [37837.0, 37838.0, 37839.0, 37840.0, 37841.0, 37842.0]],      [[37843.0, 37844.0, 37845.0, 37846.0, 37847.0, 37848.0],       [37849.0, 37850.0, 37851.0, 37852.0, 37853.0, 37854.0],       [37855.0, 37856.0, 37857.0, 37858.0, 37859.0, 37860.0],       [37861.0, 37862.0, 37863.0, 37864.0, 37865.0, 37866.0],       [37867.0, 37868.0, 37869.0, 37870.0, 37871.0, 37872.0],       [37873.0, 37874.0, 37875.0, 37876.0, 37877.0, 37878.0],       [37879.0, 37880.0, 37881.0, 37882.0, 37883.0, 37884.0]],      [[37885.0, 37886.0, 37887.0, 37888.0, 37889.0, 37890.0],       [37891.0, 37892.0, 37893.0, 37894.0, 37895.0, 37896.0],       [37897.0, 37898.0, 37899.0, 37900.0, 37901.0, 37902.0],       [37903.0, 37904.0, 37905.0, 37906.0, 37907.0, 37908.0],       [37909.0, 37910.0, 37911.0, 37912.0, 37913.0, 37914.0],       [37915.0, 37916.0, 37917.0, 37918.0, 37919.0, 37920.0],       [37921.0, 37922.0, 37923.0, 37924.0, 37925.0, 37926.0]],      [[37927.0, 37928.0, 37929.0, 37930.0, 37931.0, 37932.0],       [37933.0, 37934.0, 37935.0, 37936.0, 37937.0, 37938.0],       [37939.0, 37940.0, 37941.0, 37942.0, 37943.0, 37944.0],       [37945.0, 37946.0, 37947.0, 37948.0, 37949.0, 37950.0],       [37951.0, 37952.0, 37953.0, 37954.0, 37955.0, 37956.0],       [37957.0, 37958.0, 37959.0, 37960.0, 37961.0, 37962.0],       [37963.0, 37964.0, 37965.0, 37966.0, 37967.0, 37968.0]],      [[37969.0, 37970.0, 37971.0, 37972.0, 37973.0, 37974.0],       [37975.0, 37976.0, 37977.0, 37978.0, 37979.0, 37980.0],       [37981.0, 37982.0, 37983.0, 37984.0, 37985.0, 37986.0],       [37987.0, 37988.0, 37989.0, 37990.0, 37991.0, 37992.0],       [37993.0, 37994.0, 37995.0, 37996.0, 37997.0, 37998.0],       [37999.0, 38000.0, 38001.0, 38002.0, 38003.0, 38004.0],       [38005.0, 38006.0, 38007.0, 38008.0, 38009.0, 38010.0]],      [[38011.0, 38012.0, 38013.0, 38014.0, 38015.0, 38016.0],       [38017.0, 38018.0, 38019.0, 38020.0, 38021.0, 38022.0],       [38023.0, 38024.0, 38025.0, 38026.0, 38027.0, 38028.0],       [38029.0, 38030.0, 38031.0, 38032.0, 38033.0, 38034.0],       [38035.0, 38036.0, 38037.0, 38038.0, 38039.0, 38040.0],       [38041.0, 38042.0, 38043.0, 38044.0, 38045.0, 38046.0],       [38047.0, 38048.0, 38049.0, 38050.0, 38051.0, 38052.0]]],     [[[38053.0, 38054.0, 38055.0, 38056.0, 38057.0, 38058.0],       [38059.0, 38060.0, 38061.0, 38062.0, 38063.0, 38064.0],       [38065.0, 38066.0, 38067.0, 38068.0, 38069.0, 38070.0],       [38071.0, 38072.0, 38073.0, 38074.0, 38075.0, 38076.0],       [38077.0, 38078.0, 38079.0, 38080.0, 38081.0, 38082.0],       [38083.0, 38084.0, 38085.0, 38086.0, 38087.0, 38088.0],       [38089.0, 38090.0, 38091.0, 38092.0, 38093.0, 38094.0]],      [[38095.0, 38096.0, 38097.0, 38098.0, 38099.0, 38100.0],       [38101.0, 38102.0, 38103.0, 38104.0, 38105.0, 38106.0],       [38107.0, 38108.0, 38109.0, 38110.0, 38111.0, 38112.0],       [38113.0, 38114.0, 38115.0, 38116.0, 38117.0, 38118.0],       [38119.0, 38120.0, 38121.0, 38122.0, 38123.0, 38124.0],       [38125.0, 38126.0, 38127.0, 38128.0, 38129.0, 38130.0],       [38131.0, 38132.0, 38133.0, 38134.0, 38135.0, 38136.0]],      [[38137.0, 38138.0, 38139.0, 38140.0, 38141.0, 38142.0],       [38143.0, 38144.0, 38145.0, 38146.0, 38147.0, 38148.0],       [38149.0, 38150.0, 38151.0, 38152.0, 38153.0, 38154.0],       [38155.0, 38156.0, 38157.0, 38158.0, 38159.0, 38160.0],       [38161.0, 38162.0, 38163.0, 38164.0, 38165.0, 38166.0],       [38167.0, 38168.0, 38169.0, 38170.0, 38171.0, 38172.0],       [38173.0, 38174.0, 38175.0, 38176.0, 38177.0, 38178.0]],      [[38179.0, 38180.0, 38181.0, 38182.0, 38183.0, 38184.0],       [38185.0, 38186.0, 38187.0, 38188.0, 38189.0, 38190.0],       [38191.0, 38192.0, 38193.0, 38194.0, 38195.0, 38196.0],       [38197.0, 38198.0, 38199.0, 38200.0, 38201.0, 38202.0],       [38203.0, 38204.0, 38205.0, 38206.0, 38207.0, 38208.0],       [38209.0, 38210.0, 38211.0, 38212.0, 38213.0, 38214.0],       [38215.0, 38216.0, 38217.0, 38218.0, 38219.0, 38220.0]],      [[38221.0, 38222.0, 38223.0, 38224.0, 38225.0, 38226.0],       [38227.0, 38228.0, 38229.0, 38230.0, 38231.0, 38232.0],       [38233.0, 38234.0, 38235.0, 38236.0, 38237.0, 38238.0],       [38239.0, 38240.0, 38241.0, 38242.0, 38243.0, 38244.0],       [38245.0, 38246.0, 38247.0, 38248.0, 38249.0, 38250.0],       [38251.0, 38252.0, 38253.0, 38254.0, 38255.0, 38256.0],       [38257.0, 38258.0, 38259.0, 38260.0, 38261.0, 38262.0]],      [[38263.0, 38264.0, 38265.0, 38266.0, 38267.0, 38268.0],       [38269.0, 38270.0, 38271.0, 38272.0, 38273.0, 38274.0],       [38275.0, 38276.0, 38277.0, 38278.0, 38279.0, 38280.0],       [38281.0, 38282.0, 38283.0, 38284.0, 38285.0, 38286.0],       [38287.0, 38288.0, 38289.0, 38290.0, 38291.0, 38292.0],       [38293.0, 38294.0, 38295.0, 38296.0, 38297.0, 38298.0],       [38299.0, 38300.0, 38301.0, 38302.0, 38303.0, 38304.0]]]],    [[[[38305.0, 38306.0, 38307.0, 38308.0, 38309.0, 38310.0],       [38311.0, 38312.0, 38313.0, 38314.0, 38315.0, 38316.0],       [38317.0, 38318.0, 38319.0, 38320.0, 38321.0, 38322.0],       [38323.0, 38324.0, 38325.0, 38326.0, 38327.0, 38328.0],       [38329.0, 38330.0, 38331.0, 38332.0, 38333.0, 38334.0],       [38335.0, 38336.0, 38337.0, 38338.0, 38339.0, 38340.0],       [38341.0, 38342.0, 38343.0, 38344.0, 38345.0, 38346.0]],      [[38347.0, 38348.0, 38349.0, 38350.0, 38351.0, 38352.0],       [38353.0, 38354.0, 38355.0, 38356.0, 38357.0, 38358.0],       [38359.0, 38360.0, 38361.0, 38362.0, 38363.0, 38364.0],       [38365.0, 38366.0, 38367.0, 38368.0, 38369.0, 38370.0],       [38371.0, 38372.0, 38373.0, 38374.0, 38375.0, 38376.0],       [38377.0, 38378.0, 38379.0, 38380.0, 38381.0, 38382.0],       [38383.0, 38384.0, 38385.0, 38386.0, 38387.0, 38388.0]],      [[38389.0, 38390.0, 38391.0, 38392.0, 38393.0, 38394.0],       [38395.0, 38396.0, 38397.0, 38398.0, 38399.0, 38400.0],       [38401.0, 38402.0, 38403.0, 38404.0, 38405.0, 38406.0],       [38407.0, 38408.0, 38409.0, 38410.0, 38411.0, 38412.0],       [38413.0, 38414.0, 38415.0, 38416.0, 38417.0, 38418.0],       [38419.0, 38420.0, 38421.0, 38422.0, 38423.0, 38424.0],       [38425.0, 38426.0, 38427.0, 38428.0, 38429.0, 38430.0]],      [[38431.0, 38432.0, 38433.0, 38434.0, 38435.0, 38436.0],       [38437.0, 38438.0, 38439.0, 38440.0, 38441.0, 38442.0],       [38443.0, 38444.0, 38445.0, 38446.0, 38447.0, 38448.0],       [38449.0, 38450.0, 38451.0, 38452.0, 38453.0, 38454.0],       [38455.0, 38456.0, 38457.0, 38458.0, 38459.0, 38460.0],       [38461.0, 38462.0, 38463.0, 38464.0, 38465.0, 38466.0],       [38467.0, 38468.0, 38469.0, 38470.0, 38471.0, 38472.0]],      [[38473.0, 38474.0, 38475.0, 38476.0, 38477.0, 38478.0],       [38479.0, 38480.0, 38481.0, 38482.0, 38483.0, 38484.0],       [38485.0, 38486.0, 38487.0, 38488.0, 38489.0, 38490.0],       [38491.0, 38492.0, 38493.0, 38494.0, 38495.0, 38496.0],       [38497.0, 38498.0, 38499.0, 38500.0, 38501.0, 38502.0],       [38503.0, 38504.0, 38505.0, 38506.0, 38507.0, 38508.0],       [38509.0, 38510.0, 38511.0, 38512.0, 38513.0, 38514.0]],      [[38515.0, 38516.0, 38517.0, 38518.0, 38519.0, 38520.0],       [38521.0, 38522.0, 38523.0, 38524.0, 38525.0, 38526.0],       [38527.0, 38528.0, 38529.0, 38530.0, 38531.0, 38532.0],       [38533.0, 38534.0, 38535.0, 38536.0, 38537.0, 38538.0],       [38539.0, 38540.0, 38541.0, 38542.0, 38543.0, 38544.0],       [38545.0, 38546.0, 38547.0, 38548.0, 38549.0, 38550.0],       [38551.0, 38552.0, 38553.0, 38554.0, 38555.0, 38556.0]]],     [[[38557.0, 38558.0, 38559.0, 38560.0, 38561.0, 38562.0],       [38563.0, 38564.0, 38565.0, 38566.0, 38567.0, 38568.0],       [38569.0, 38570.0, 38571.0, 38572.0, 38573.0, 38574.0],       [38575.0, 38576.0, 38577.0, 38578.0, 38579.0, 38580.0],       [38581.0, 38582.0, 38583.0, 38584.0, 38585.0, 38586.0],       [38587.0, 38588.0, 38589.0, 38590.0, 38591.0, 38592.0],       [38593.0, 38594.0, 38595.0, 38596.0, 38597.0, 38598.0]],      [[38599.0, 38600.0, 38601.0, 38602.0, 38603.0, 38604.0],       [38605.0, 38606.0, 38607.0, 38608.0, 38609.0, 38610.0],       [38611.0, 38612.0, 38613.0, 38614.0, 38615.0, 38616.0],       [38617.0, 38618.0, 38619.0, 38620.0, 38621.0, 38622.0],       [38623.0, 38624.0, 38625.0, 38626.0, 38627.0, 38628.0],       [38629.0, 38630.0, 38631.0, 38632.0, 38633.0, 38634.0],       [38635.0, 38636.0, 38637.0, 38638.0, 38639.0, 38640.0]],      [[38641.0, 38642.0, 38643.0, 38644.0, 38645.0, 38646.0],       [38647.0, 38648.0, 38649.0, 38650.0, 38651.0, 38652.0],       [38653.0, 38654.0, 38655.0, 38656.0, 38657.0, 38658.0],       [38659.0, 38660.0, 38661.0, 38662.0, 38663.0, 38664.0],       [38665.0, 38666.0, 38667.0, 38668.0, 38669.0, 38670.0],       [38671.0, 38672.0, 38673.0, 38674.0, 38675.0, 38676.0],       [38677.0, 38678.0, 38679.0, 38680.0, 38681.0, 38682.0]],      [[38683.0, 38684.0, 38685.0, 38686.0, 38687.0, 38688.0],       [38689.0, 38690.0, 38691.0, 38692.0, 38693.0, 38694.0],       [38695.0, 38696.0, 38697.0, 38698.0, 38699.0, 38700.0],       [38701.0, 38702.0, 38703.0, 38704.0, 38705.0, 38706.0],       [38707.0, 38708.0, 38709.0, 38710.0, 38711.0, 38712.0],       [38713.0, 38714.0, 38715.0, 38716.0, 38717.0, 38718.0],       [38719.0, 38720.0, 38721.0, 38722.0, 38723.0, 38724.0]],      [[38725.0, 38726.0, 38727.0, 38728.0, 38729.0, 38730.0],       [38731.0, 38732.0, 38733.0, 38734.0, 38735.0, 38736.0],       [38737.0, 38738.0, 38739.0, 38740.0, 38741.0, 38742.0],       [38743.0, 38744.0, 38745.0, 38746.0, 38747.0, 38748.0],       [38749.0, 38750.0, 38751.0, 38752.0, 38753.0, 38754.0],       [38755.0, 38756.0, 38757.0, 38758.0, 38759.0, 38760.0],       [38761.0, 38762.0, 38763.0, 38764.0, 38765.0, 38766.0]],      [[38767.0, 38768.0, 38769.0, 38770.0, 38771.0, 38772.0],       [38773.0, 38774.0, 38775.0, 38776.0, 38777.0, 38778.0],       [38779.0, 38780.0, 38781.0, 38782.0, 38783.0, 38784.0],       [38785.0, 38786.0, 38787.0, 38788.0, 38789.0, 38790.0],       [38791.0, 38792.0, 38793.0, 38794.0, 38795.0, 38796.0],       [38797.0, 38798.0, 38799.0, 38800.0, 38801.0, 38802.0],       [38803.0, 38804.0, 38805.0, 38806.0, 38807.0, 38808.0]]],     [[[38809.0, 38810.0, 38811.0, 38812.0, 38813.0, 38814.0],       [38815.0, 38816.0, 38817.0, 38818.0, 38819.0, 38820.0],       [38821.0, 38822.0, 38823.0, 38824.0, 38825.0, 38826.0],       [38827.0, 38828.0, 38829.0, 38830.0, 38831.0, 38832.0],       [38833.0, 38834.0, 38835.0, 38836.0, 38837.0, 38838.0],       [38839.0, 38840.0, 38841.0, 38842.0, 38843.0, 38844.0],       [38845.0, 38846.0, 38847.0, 38848.0, 38849.0, 38850.0]],      [[38851.0, 38852.0, 38853.0, 38854.0, 38855.0, 38856.0],       [38857.0, 38858.0, 38859.0, 38860.0, 38861.0, 38862.0],       [38863.0, 38864.0, 38865.0, 38866.0, 38867.0, 38868.0],       [38869.0, 38870.0, 38871.0, 38872.0, 38873.0, 38874.0],       [38875.0, 38876.0, 38877.0, 38878.0, 38879.0, 38880.0],       [38881.0, 38882.0, 38883.0, 38884.0, 38885.0, 38886.0],       [38887.0, 38888.0, 38889.0, 38890.0, 38891.0, 38892.0]],      [[38893.0, 38894.0, 38895.0, 38896.0, 38897.0, 38898.0],       [38899.0, 38900.0, 38901.0, 38902.0, 38903.0, 38904.0],       [38905.0, 38906.0, 38907.0, 38908.0, 38909.0, 38910.0],       [38911.0, 38912.0, 38913.0, 38914.0, 38915.0, 38916.0],       [38917.0, 38918.0, 38919.0, 38920.0, 38921.0, 38922.0],       [38923.0, 38924.0, 38925.0, 38926.0, 38927.0, 38928.0],       [38929.0, 38930.0, 38931.0, 38932.0, 38933.0, 38934.0]],      [[38935.0, 38936.0, 38937.0, 38938.0, 38939.0, 38940.0],       [38941.0, 38942.0, 38943.0, 38944.0, 38945.0, 38946.0],       [38947.0, 38948.0, 38949.0, 38950.0, 38951.0, 38952.0],       [38953.0, 38954.0, 38955.0, 38956.0, 38957.0, 38958.0],       [38959.0, 38960.0, 38961.0, 38962.0, 38963.0, 38964.0],       [38965.0, 38966.0, 38967.0, 38968.0, 38969.0, 38970.0],       [38971.0, 38972.0, 38973.0, 38974.0, 38975.0, 38976.0]],      [[38977.0, 38978.0, 38979.0, 38980.0, 38981.0, 38982.0],       [38983.0, 38984.0, 38985.0, 38986.0, 38987.0, 38988.0],       [38989.0, 38990.0, 38991.0, 38992.0, 38993.0, 38994.0],       [38995.0, 38996.0, 38997.0, 38998.0, 38999.0, 39000.0],       [39001.0, 39002.0, 39003.0, 39004.0, 39005.0, 39006.0],       [39007.0, 39008.0, 39009.0, 39010.0, 39011.0, 39012.0],       [39013.0, 39014.0, 39015.0, 39016.0, 39017.0, 39018.0]],      [[39019.0, 39020.0, 39021.0, 39022.0, 39023.0, 39024.0],       [39025.0, 39026.0, 39027.0, 39028.0, 39029.0, 39030.0],       [39031.0, 39032.0, 39033.0, 39034.0, 39035.0, 39036.0],       [39037.0, 39038.0, 39039.0, 39040.0, 39041.0, 39042.0],       [39043.0, 39044.0, 39045.0, 39046.0, 39047.0, 39048.0],       [39049.0, 39050.0, 39051.0, 39052.0, 39053.0, 39054.0],       [39055.0, 39056.0, 39057.0, 39058.0, 39059.0, 39060.0]]],     [[[39061.0, 39062.0, 39063.0, 39064.0, 39065.0, 39066.0],       [39067.0, 39068.0, 39069.0, 39070.0, 39071.0, 39072.0],       [39073.0, 39074.0, 39075.0, 39076.0, 39077.0, 39078.0],       [39079.0, 39080.0, 39081.0, 39082.0, 39083.0, 39084.0],       [39085.0, 39086.0, 39087.0, 39088.0, 39089.0, 39090.0],       [39091.0, 39092.0, 39093.0, 39094.0, 39095.0, 39096.0],       [39097.0, 39098.0, 39099.0, 39100.0, 39101.0, 39102.0]],      [[39103.0, 39104.0, 39105.0, 39106.0, 39107.0, 39108.0],       [39109.0, 39110.0, 39111.0, 39112.0, 39113.0, 39114.0],       [39115.0, 39116.0, 39117.0, 39118.0, 39119.0, 39120.0],       [39121.0, 39122.0, 39123.0, 39124.0, 39125.0, 39126.0],       [39127.0, 39128.0, 39129.0, 39130.0, 39131.0, 39132.0],       [39133.0, 39134.0, 39135.0, 39136.0, 39137.0, 39138.0],       [39139.0, 39140.0, 39141.0, 39142.0, 39143.0, 39144.0]],      [[39145.0, 39146.0, 39147.0, 39148.0, 39149.0, 39150.0],       [39151.0, 39152.0, 39153.0, 39154.0, 39155.0, 39156.0],       [39157.0, 39158.0, 39159.0, 39160.0, 39161.0, 39162.0],       [39163.0, 39164.0, 39165.0, 39166.0, 39167.0, 39168.0],       [39169.0, 39170.0, 39171.0, 39172.0, 39173.0, 39174.0],       [39175.0, 39176.0, 39177.0, 39178.0, 39179.0, 39180.0],       [39181.0, 39182.0, 39183.0, 39184.0, 39185.0, 39186.0]],      [[39187.0, 39188.0, 39189.0, 39190.0, 39191.0, 39192.0],       [39193.0, 39194.0, 39195.0, 39196.0, 39197.0, 39198.0],       [39199.0, 39200.0, 39201.0, 39202.0, 39203.0, 39204.0],       [39205.0, 39206.0, 39207.0, 39208.0, 39209.0, 39210.0],       [39211.0, 39212.0, 39213.0, 39214.0, 39215.0, 39216.0],       [39217.0, 39218.0, 39219.0, 39220.0, 39221.0, 39222.0],       [39223.0, 39224.0, 39225.0, 39226.0, 39227.0, 39228.0]],      [[39229.0, 39230.0, 39231.0, 39232.0, 39233.0, 39234.0],       [39235.0, 39236.0, 39237.0, 39238.0, 39239.0, 39240.0],       [39241.0, 39242.0, 39243.0, 39244.0, 39245.0, 39246.0],       [39247.0, 39248.0, 39249.0, 39250.0, 39251.0, 39252.0],       [39253.0, 39254.0, 39255.0, 39256.0, 39257.0, 39258.0],       [39259.0, 39260.0, 39261.0, 39262.0, 39263.0, 39264.0],       [39265.0, 39266.0, 39267.0, 39268.0, 39269.0, 39270.0]],      [[39271.0, 39272.0, 39273.0, 39274.0, 39275.0, 39276.0],       [39277.0, 39278.0, 39279.0, 39280.0, 39281.0, 39282.0],       [39283.0, 39284.0, 39285.0, 39286.0, 39287.0, 39288.0],       [39289.0, 39290.0, 39291.0, 39292.0, 39293.0, 39294.0],       [39295.0, 39296.0, 39297.0, 39298.0, 39299.0, 39300.0],       [39301.0, 39302.0, 39303.0, 39304.0, 39305.0, 39306.0],       [39307.0, 39308.0, 39309.0, 39310.0, 39311.0, 39312.0]]]],    [[[[39313.0, 39314.0, 39315.0, 39316.0, 39317.0, 39318.0],       [39319.0, 39320.0, 39321.0, 39322.0, 39323.0, 39324.0],       [39325.0, 39326.0, 39327.0, 39328.0, 39329.0, 39330.0],       [39331.0, 39332.0, 39333.0, 39334.0, 39335.0, 39336.0],       [39337.0, 39338.0, 39339.0, 39340.0, 39341.0, 39342.0],       [39343.0, 39344.0, 39345.0, 39346.0, 39347.0, 39348.0],       [39349.0, 39350.0, 39351.0, 39352.0, 39353.0, 39354.0]],      [[39355.0, 39356.0, 39357.0, 39358.0, 39359.0, 39360.0],       [39361.0, 39362.0, 39363.0, 39364.0, 39365.0, 39366.0],       [39367.0, 39368.0, 39369.0, 39370.0, 39371.0, 39372.0],       [39373.0, 39374.0, 39375.0, 39376.0, 39377.0, 39378.0],       [39379.0, 39380.0, 39381.0, 39382.0, 39383.0, 39384.0],       [39385.0, 39386.0, 39387.0, 39388.0, 39389.0, 39390.0],       [39391.0, 39392.0, 39393.0, 39394.0, 39395.0, 39396.0]],      [[39397.0, 39398.0, 39399.0, 39400.0, 39401.0, 39402.0],       [39403.0, 39404.0, 39405.0, 39406.0, 39407.0, 39408.0],       [39409.0, 39410.0, 39411.0, 39412.0, 39413.0, 39414.0],       [39415.0, 39416.0, 39417.0, 39418.0, 39419.0, 39420.0],       [39421.0, 39422.0, 39423.0, 39424.0, 39425.0, 39426.0],       [39427.0, 39428.0, 39429.0, 39430.0, 39431.0, 39432.0],       [39433.0, 39434.0, 39435.0, 39436.0, 39437.0, 39438.0]],      [[39439.0, 39440.0, 39441.0, 39442.0, 39443.0, 39444.0],       [39445.0, 39446.0, 39447.0, 39448.0, 39449.0, 39450.0],       [39451.0, 39452.0, 39453.0, 39454.0, 39455.0, 39456.0],       [39457.0, 39458.0, 39459.0, 39460.0, 39461.0, 39462.0],       [39463.0, 39464.0, 39465.0, 39466.0, 39467.0, 39468.0],       [39469.0, 39470.0, 39471.0, 39472.0, 39473.0, 39474.0],       [39475.0, 39476.0, 39477.0, 39478.0, 39479.0, 39480.0]],      [[39481.0, 39482.0, 39483.0, 39484.0, 39485.0, 39486.0],       [39487.0, 39488.0, 39489.0, 39490.0, 39491.0, 39492.0],       [39493.0, 39494.0, 39495.0, 39496.0, 39497.0, 39498.0],       [39499.0, 39500.0, 39501.0, 39502.0, 39503.0, 39504.0],       [39505.0, 39506.0, 39507.0, 39508.0, 39509.0, 39510.0],       [39511.0, 39512.0, 39513.0, 39514.0, 39515.0, 39516.0],       [39517.0, 39518.0, 39519.0, 39520.0, 39521.0, 39522.0]],      [[39523.0, 39524.0, 39525.0, 39526.0, 39527.0, 39528.0],       [39529.0, 39530.0, 39531.0, 39532.0, 39533.0, 39534.0],       [39535.0, 39536.0, 39537.0, 39538.0, 39539.0, 39540.0],       [39541.0, 39542.0, 39543.0, 39544.0, 39545.0, 39546.0],       [39547.0, 39548.0, 39549.0, 39550.0, 39551.0, 39552.0],       [39553.0, 39554.0, 39555.0, 39556.0, 39557.0, 39558.0],       [39559.0, 39560.0, 39561.0, 39562.0, 39563.0, 39564.0]]],     [[[39565.0, 39566.0, 39567.0, 39568.0, 39569.0, 39570.0],       [39571.0, 39572.0, 39573.0, 39574.0, 39575.0, 39576.0],       [39577.0, 39578.0, 39579.0, 39580.0, 39581.0, 39582.0],       [39583.0, 39584.0, 39585.0, 39586.0, 39587.0, 39588.0],       [39589.0, 39590.0, 39591.0, 39592.0, 39593.0, 39594.0],       [39595.0, 39596.0, 39597.0, 39598.0, 39599.0, 39600.0],       [39601.0, 39602.0, 39603.0, 39604.0, 39605.0, 39606.0]],      [[39607.0, 39608.0, 39609.0, 39610.0, 39611.0, 39612.0],       [39613.0, 39614.0, 39615.0, 39616.0, 39617.0, 39618.0],       [39619.0, 39620.0, 39621.0, 39622.0, 39623.0, 39624.0],       [39625.0, 39626.0, 39627.0, 39628.0, 39629.0, 39630.0],       [39631.0, 39632.0, 39633.0, 39634.0, 39635.0, 39636.0],       [39637.0, 39638.0, 39639.0, 39640.0, 39641.0, 39642.0],       [39643.0, 39644.0, 39645.0, 39646.0, 39647.0, 39648.0]],      [[39649.0, 39650.0, 39651.0, 39652.0, 39653.0, 39654.0],       [39655.0, 39656.0, 39657.0, 39658.0, 39659.0, 39660.0],       [39661.0, 39662.0, 39663.0, 39664.0, 39665.0, 39666.0],       [39667.0, 39668.0, 39669.0, 39670.0, 39671.0, 39672.0],       [39673.0, 39674.0, 39675.0, 39676.0, 39677.0, 39678.0],       [39679.0, 39680.0, 39681.0, 39682.0, 39683.0, 39684.0],       [39685.0, 39686.0, 39687.0, 39688.0, 39689.0, 39690.0]],      [[39691.0, 39692.0, 39693.0, 39694.0, 39695.0, 39696.0],       [39697.0, 39698.0, 39699.0, 39700.0, 39701.0, 39702.0],       [39703.0, 39704.0, 39705.0, 39706.0, 39707.0, 39708.0],       [39709.0, 39710.0, 39711.0, 39712.0, 39713.0, 39714.0],       [39715.0, 39716.0, 39717.0, 39718.0, 39719.0, 39720.0],       [39721.0, 39722.0, 39723.0, 39724.0, 39725.0, 39726.0],       [39727.0, 39728.0, 39729.0, 39730.0, 39731.0, 39732.0]],      [[39733.0, 39734.0, 39735.0, 39736.0, 39737.0, 39738.0],       [39739.0, 39740.0, 39741.0, 39742.0, 39743.0, 39744.0],       [39745.0, 39746.0, 39747.0, 39748.0, 39749.0, 39750.0],       [39751.0, 39752.0, 39753.0, 39754.0, 39755.0, 39756.0],       [39757.0, 39758.0, 39759.0, 39760.0, 39761.0, 39762.0],       [39763.0, 39764.0, 39765.0, 39766.0, 39767.0, 39768.0],       [39769.0, 39770.0, 39771.0, 39772.0, 39773.0, 39774.0]],      [[39775.0, 39776.0, 39777.0, 39778.0, 39779.0, 39780.0],       [39781.0, 39782.0, 39783.0, 39784.0, 39785.0, 39786.0],       [39787.0, 39788.0, 39789.0, 39790.0, 39791.0, 39792.0],       [39793.0, 39794.0, 39795.0, 39796.0, 39797.0, 39798.0],       [39799.0, 39800.0, 39801.0, 39802.0, 39803.0, 39804.0],       [39805.0, 39806.0, 39807.0, 39808.0, 39809.0, 39810.0],       [39811.0, 39812.0, 39813.0, 39814.0, 39815.0, 39816.0]]],     [[[39817.0, 39818.0, 39819.0, 39820.0, 39821.0, 39822.0],       [39823.0, 39824.0, 39825.0, 39826.0, 39827.0, 39828.0],       [39829.0, 39830.0, 39831.0, 39832.0, 39833.0, 39834.0],       [39835.0, 39836.0, 39837.0, 39838.0, 39839.0, 39840.0],       [39841.0, 39842.0, 39843.0, 39844.0, 39845.0, 39846.0],       [39847.0, 39848.0, 39849.0, 39850.0, 39851.0, 39852.0],       [39853.0, 39854.0, 39855.0, 39856.0, 39857.0, 39858.0]],      [[39859.0, 39860.0, 39861.0, 39862.0, 39863.0, 39864.0],       [39865.0, 39866.0, 39867.0, 39868.0, 39869.0, 39870.0],       [39871.0, 39872.0, 39873.0, 39874.0, 39875.0, 39876.0],       [39877.0, 39878.0, 39879.0, 39880.0, 39881.0, 39882.0],       [39883.0, 39884.0, 39885.0, 39886.0, 39887.0, 39888.0],       [39889.0, 39890.0, 39891.0, 39892.0, 39893.0, 39894.0],       [39895.0, 39896.0, 39897.0, 39898.0, 39899.0, 39900.0]],      [[39901.0, 39902.0, 39903.0, 39904.0, 39905.0, 39906.0],       [39907.0, 39908.0, 39909.0, 39910.0, 39911.0, 39912.0],       [39913.0, 39914.0, 39915.0, 39916.0, 39917.0, 39918.0],       [39919.0, 39920.0, 39921.0, 39922.0, 39923.0, 39924.0],       [39925.0, 39926.0, 39927.0, 39928.0, 39929.0, 39930.0],       [39931.0, 39932.0, 39933.0, 39934.0, 39935.0, 39936.0],       [39937.0, 39938.0, 39939.0, 39940.0, 39941.0, 39942.0]],      [[39943.0, 39944.0, 39945.0, 39946.0, 39947.0, 39948.0],       [39949.0, 39950.0, 39951.0, 39952.0, 39953.0, 39954.0],       [39955.0, 39956.0, 39957.0, 39958.0, 39959.0, 39960.0],       [39961.0, 39962.0, 39963.0, 39964.0, 39965.0, 39966.0],       [39967.0, 39968.0, 39969.0, 39970.0, 39971.0, 39972.0],       [39973.0, 39974.0, 39975.0, 39976.0, 39977.0, 39978.0],       [39979.0, 39980.0, 39981.0, 39982.0, 39983.0, 39984.0]],      [[39985.0, 39986.0, 39987.0, 39988.0, 39989.0, 39990.0],       [39991.0, 39992.0, 39993.0, 39994.0, 39995.0, 39996.0],       [39997.0, 39998.0, 39999.0, 40000.0, 40001.0, 40002.0],       [40003.0, 40004.0, 40005.0, 40006.0, 40007.0, 40008.0],       [40009.0, 40010.0, 40011.0, 40012.0, 40013.0, 40014.0],       [40015.0, 40016.0, 40017.0, 40018.0, 40019.0, 40020.0],       [40021.0, 40022.0, 40023.0, 40024.0, 40025.0, 40026.0]],      [[40027.0, 40028.0, 40029.0, 40030.0, 40031.0, 40032.0],       [40033.0, 40034.0, 40035.0, 40036.0, 40037.0, 40038.0],       [40039.0, 40040.0, 40041.0, 40042.0, 40043.0, 40044.0],       [40045.0, 40046.0, 40047.0, 40048.0, 40049.0, 40050.0],       [40051.0, 40052.0, 40053.0, 40054.0, 40055.0, 40056.0],       [40057.0, 40058.0, 40059.0, 40060.0, 40061.0, 40062.0],       [40063.0, 40064.0, 40065.0, 40066.0, 40067.0, 40068.0]]],     [[[40069.0, 40070.0, 40071.0, 40072.0, 40073.0, 40074.0],       [40075.0, 40076.0, 40077.0, 40078.0, 40079.0, 40080.0],       [40081.0, 40082.0, 40083.0, 40084.0, 40085.0, 40086.0],       [40087.0, 40088.0, 40089.0, 40090.0, 40091.0, 40092.0],       [40093.0, 40094.0, 40095.0, 40096.0, 40097.0, 40098.0],       [40099.0, 40100.0, 40101.0, 40102.0, 40103.0, 40104.0],       [40105.0, 40106.0, 40107.0, 40108.0, 40109.0, 40110.0]],      [[40111.0, 40112.0, 40113.0, 40114.0, 40115.0, 40116.0],       [40117.0, 40118.0, 40119.0, 40120.0, 40121.0, 40122.0],       [40123.0, 40124.0, 40125.0, 40126.0, 40127.0, 40128.0],       [40129.0, 40130.0, 40131.0, 40132.0, 40133.0, 40134.0],       [40135.0, 40136.0, 40137.0, 40138.0, 40139.0, 40140.0],       [40141.0, 40142.0, 40143.0, 40144.0, 40145.0, 40146.0],       [40147.0, 40148.0, 40149.0, 40150.0, 40151.0, 40152.0]],      [[40153.0, 40154.0, 40155.0, 40156.0, 40157.0, 40158.0],       [40159.0, 40160.0, 40161.0, 40162.0, 40163.0, 40164.0],       [40165.0, 40166.0, 40167.0, 40168.0, 40169.0, 40170.0],       [40171.0, 40172.0, 40173.0, 40174.0, 40175.0, 40176.0],       [40177.0, 40178.0, 40179.0, 40180.0, 40181.0, 40182.0],       [40183.0, 40184.0, 40185.0, 40186.0, 40187.0, 40188.0],       [40189.0, 40190.0, 40191.0, 40192.0, 40193.0, 40194.0]],      [[40195.0, 40196.0, 40197.0, 40198.0, 40199.0, 40200.0],       [40201.0, 40202.0, 40203.0, 40204.0, 40205.0, 40206.0],       [40207.0, 40208.0, 40209.0, 40210.0, 40211.0, 40212.0],       [40213.0, 40214.0, 40215.0, 40216.0, 40217.0, 40218.0],       [40219.0, 40220.0, 40221.0, 40222.0, 40223.0, 40224.0],       [40225.0, 40226.0, 40227.0, 40228.0, 40229.0, 40230.0],       [40231.0, 40232.0, 40233.0, 40234.0, 40235.0, 40236.0]],      [[40237.0, 40238.0, 40239.0, 40240.0, 40241.0, 40242.0],       [40243.0, 40244.0, 40245.0, 40246.0, 40247.0, 40248.0],       [40249.0, 40250.0, 40251.0, 40252.0, 40253.0, 40254.0],       [40255.0, 40256.0, 40257.0, 40258.0, 40259.0, 40260.0],       [40261.0, 40262.0, 40263.0, 40264.0, 40265.0, 40266.0],       [40267.0, 40268.0, 40269.0, 40270.0, 40271.0, 40272.0],       [40273.0, 40274.0, 40275.0, 40276.0, 40277.0, 40278.0]],      [[40279.0, 40280.0, 40281.0, 40282.0, 40283.0, 40284.0],       [40285.0, 40286.0, 40287.0, 40288.0, 40289.0, 40290.0],       [40291.0, 40292.0, 40293.0, 40294.0, 40295.0, 40296.0],       [40297.0, 40298.0, 40299.0, 40300.0, 40301.0, 40302.0],       [40303.0, 40304.0, 40305.0, 40306.0, 40307.0, 40308.0],       [40309.0, 40310.0, 40311.0, 40312.0, 40313.0, 40314.0],       [40315.0, 40316.0, 40317.0, 40318.0, 40319.0, 40320.0]]]],    [[[[40321.0, 40322.0, 40323.0, 40324.0, 40325.0, 40326.0],       [40327.0, 40328.0, 40329.0, 40330.0, 40331.0, 40332.0],       [40333.0, 40334.0, 40335.0, 40336.0, 40337.0, 40338.0],       [40339.0, 40340.0, 40341.0, 40342.0, 40343.0, 40344.0],       [40345.0, 40346.0, 40347.0, 40348.0, 40349.0, 40350.0],       [40351.0, 40352.0, 40353.0, 40354.0, 40355.0, 40356.0],       [40357.0, 40358.0, 40359.0, 40360.0, 40361.0, 40362.0]],      [[40363.0, 40364.0, 40365.0, 40366.0, 40367.0, 40368.0],       [40369.0, 40370.0, 40371.0, 40372.0, 40373.0, 40374.0],       [40375.0, 40376.0, 40377.0, 40378.0, 40379.0, 40380.0],       [40381.0, 40382.0, 40383.0, 40384.0, 40385.0, 40386.0],       [40387.0, 40388.0, 40389.0, 40390.0, 40391.0, 40392.0],       [40393.0, 40394.0, 40395.0, 40396.0, 40397.0, 40398.0],       [40399.0, 40400.0, 40401.0, 40402.0, 40403.0, 40404.0]],      [[40405.0, 40406.0, 40407.0, 40408.0, 40409.0, 40410.0],       [40411.0, 40412.0, 40413.0, 40414.0, 40415.0, 40416.0],       [40417.0, 40418.0, 40419.0, 40420.0, 40421.0, 40422.0],       [40423.0, 40424.0, 40425.0, 40426.0, 40427.0, 40428.0],       [40429.0, 40430.0, 40431.0, 40432.0, 40433.0, 40434.0],       [40435.0, 40436.0, 40437.0, 40438.0, 40439.0, 40440.0],       [40441.0, 40442.0, 40443.0, 40444.0, 40445.0, 40446.0]],      [[40447.0, 40448.0, 40449.0, 40450.0, 40451.0, 40452.0],       [40453.0, 40454.0, 40455.0, 40456.0, 40457.0, 40458.0],       [40459.0, 40460.0, 40461.0, 40462.0, 40463.0, 40464.0],       [40465.0, 40466.0, 40467.0, 40468.0, 40469.0, 40470.0],       [40471.0, 40472.0, 40473.0, 40474.0, 40475.0, 40476.0],       [40477.0, 40478.0, 40479.0, 40480.0, 40481.0, 40482.0],       [40483.0, 40484.0, 40485.0, 40486.0, 40487.0, 40488.0]],      [[40489.0, 40490.0, 40491.0, 40492.0, 40493.0, 40494.0],       [40495.0, 40496.0, 40497.0, 40498.0, 40499.0, 40500.0],       [40501.0, 40502.0, 40503.0, 40504.0, 40505.0, 40506.0],       [40507.0, 40508.0, 40509.0, 40510.0, 40511.0, 40512.0],       [40513.0, 40514.0, 40515.0, 40516.0, 40517.0, 40518.0],       [40519.0, 40520.0, 40521.0, 40522.0, 40523.0, 40524.0],       [40525.0, 40526.0, 40527.0, 40528.0, 40529.0, 40530.0]],      [[40531.0, 40532.0, 40533.0, 40534.0, 40535.0, 40536.0],       [40537.0, 40538.0, 40539.0, 40540.0, 40541.0, 40542.0],       [40543.0, 40544.0, 40545.0, 40546.0, 40547.0, 40548.0],       [40549.0, 40550.0, 40551.0, 40552.0, 40553.0, 40554.0],       [40555.0, 40556.0, 40557.0, 40558.0, 40559.0, 40560.0],       [40561.0, 40562.0, 40563.0, 40564.0, 40565.0, 40566.0],       [40567.0, 40568.0, 40569.0, 40570.0, 40571.0, 40572.0]]],     [[[40573.0, 40574.0, 40575.0, 40576.0, 40577.0, 40578.0],       [40579.0, 40580.0, 40581.0, 40582.0, 40583.0, 40584.0],       [40585.0, 40586.0, 40587.0, 40588.0, 40589.0, 40590.0],       [40591.0, 40592.0, 40593.0, 40594.0, 40595.0, 40596.0],       [40597.0, 40598.0, 40599.0, 40600.0, 40601.0, 40602.0],       [40603.0, 40604.0, 40605.0, 40606.0, 40607.0, 40608.0],       [40609.0, 40610.0, 40611.0, 40612.0, 40613.0, 40614.0]],      [[40615.0, 40616.0, 40617.0, 40618.0, 40619.0, 40620.0],       [40621.0, 40622.0, 40623.0, 40624.0, 40625.0, 40626.0],       [40627.0, 40628.0, 40629.0, 40630.0, 40631.0, 40632.0],       [40633.0, 40634.0, 40635.0, 40636.0, 40637.0, 40638.0],       [40639.0, 40640.0, 40641.0, 40642.0, 40643.0, 40644.0],       [40645.0, 40646.0, 40647.0, 40648.0, 40649.0, 40650.0],       [40651.0, 40652.0, 40653.0, 40654.0, 40655.0, 40656.0]],      [[40657.0, 40658.0, 40659.0, 40660.0, 40661.0, 40662.0],       [40663.0, 40664.0, 40665.0, 40666.0, 40667.0, 40668.0],       [40669.0, 40670.0, 40671.0, 40672.0, 40673.0, 40674.0],       [40675.0, 40676.0, 40677.0, 40678.0, 40679.0, 40680.0],       [40681.0, 40682.0, 40683.0, 40684.0, 40685.0, 40686.0],       [40687.0, 40688.0, 40689.0, 40690.0, 40691.0, 40692.0],       [40693.0, 40694.0, 40695.0, 40696.0, 40697.0, 40698.0]],      [[40699.0, 40700.0, 40701.0, 40702.0, 40703.0, 40704.0],       [40705.0, 40706.0, 40707.0, 40708.0, 40709.0, 40710.0],       [40711.0, 40712.0, 40713.0, 40714.0, 40715.0, 40716.0],       [40717.0, 40718.0, 40719.0, 40720.0, 40721.0, 40722.0],       [40723.0, 40724.0, 40725.0, 40726.0, 40727.0, 40728.0],       [40729.0, 40730.0, 40731.0, 40732.0, 40733.0, 40734.0],       [40735.0, 40736.0, 40737.0, 40738.0, 40739.0, 40740.0]],      [[40741.0, 40742.0, 40743.0, 40744.0, 40745.0, 40746.0],       [40747.0, 40748.0, 40749.0, 40750.0, 40751.0, 40752.0],       [40753.0, 40754.0, 40755.0, 40756.0, 40757.0, 40758.0],       [40759.0, 40760.0, 40761.0, 40762.0, 40763.0, 40764.0],       [40765.0, 40766.0, 40767.0, 40768.0, 40769.0, 40770.0],       [40771.0, 40772.0, 40773.0, 40774.0, 40775.0, 40776.0],       [40777.0, 40778.0, 40779.0, 40780.0, 40781.0, 40782.0]],      [[40783.0, 40784.0, 40785.0, 40786.0, 40787.0, 40788.0],       [40789.0, 40790.0, 40791.0, 40792.0, 40793.0, 40794.0],       [40795.0, 40796.0, 40797.0, 40798.0, 40799.0, 40800.0],       [40801.0, 40802.0, 40803.0, 40804.0, 40805.0, 40806.0],       [40807.0, 40808.0, 40809.0, 40810.0, 40811.0, 40812.0],       [40813.0, 40814.0, 40815.0, 40816.0, 40817.0, 40818.0],       [40819.0, 40820.0, 40821.0, 40822.0, 40823.0, 40824.0]]],     [[[40825.0, 40826.0, 40827.0, 40828.0, 40829.0, 40830.0],       [40831.0, 40832.0, 40833.0, 40834.0, 40835.0, 40836.0],       [40837.0, 40838.0, 40839.0, 40840.0, 40841.0, 40842.0],       [40843.0, 40844.0, 40845.0, 40846.0, 40847.0, 40848.0],       [40849.0, 40850.0, 40851.0, 40852.0, 40853.0, 40854.0],       [40855.0, 40856.0, 40857.0, 40858.0, 40859.0, 40860.0],       [40861.0, 40862.0, 40863.0, 40864.0, 40865.0, 40866.0]],      [[40867.0, 40868.0, 40869.0, 40870.0, 40871.0, 40872.0],       [40873.0, 40874.0, 40875.0, 40876.0, 40877.0, 40878.0],       [40879.0, 40880.0, 40881.0, 40882.0, 40883.0, 40884.0],       [40885.0, 40886.0, 40887.0, 40888.0, 40889.0, 40890.0],       [40891.0, 40892.0, 40893.0, 40894.0, 40895.0, 40896.0],       [40897.0, 40898.0, 40899.0, 40900.0, 40901.0, 40902.0],       [40903.0, 40904.0, 40905.0, 40906.0, 40907.0, 40908.0]],      [[40909.0, 40910.0, 40911.0, 40912.0, 40913.0, 40914.0],       [40915.0, 40916.0, 40917.0, 40918.0, 40919.0, 40920.0],       [40921.0, 40922.0, 40923.0, 40924.0, 40925.0, 40926.0],       [40927.0, 40928.0, 40929.0, 40930.0, 40931.0, 40932.0],       [40933.0, 40934.0, 40935.0, 40936.0, 40937.0, 40938.0],       [40939.0, 40940.0, 40941.0, 40942.0, 40943.0, 40944.0],       [40945.0, 40946.0, 40947.0, 40948.0, 40949.0, 40950.0]],      [[40951.0, 40952.0, 40953.0, 40954.0, 40955.0, 40956.0],       [40957.0, 40958.0, 40959.0, 40960.0, 40961.0, 40962.0],       [40963.0, 40964.0, 40965.0, 40966.0, 40967.0, 40968.0],       [40969.0, 40970.0, 40971.0, 40972.0, 40973.0, 40974.0],       [40975.0, 40976.0, 40977.0, 40978.0, 40979.0, 40980.0],       [40981.0, 40982.0, 40983.0, 40984.0, 40985.0, 40986.0],       [40987.0, 40988.0, 40989.0, 40990.0, 40991.0, 40992.0]],      [[40993.0, 40994.0, 40995.0, 40996.0, 40997.0, 40998.0],       [40999.0, 41000.0, 41001.0, 41002.0, 41003.0, 41004.0],       [41005.0, 41006.0, 41007.0, 41008.0, 41009.0, 41010.0],       [41011.0, 41012.0, 41013.0, 41014.0, 41015.0, 41016.0],       [41017.0, 41018.0, 41019.0, 41020.0, 41021.0, 41022.0],       [41023.0, 41024.0, 41025.0, 41026.0, 41027.0, 41028.0],       [41029.0, 41030.0, 41031.0, 41032.0, 41033.0, 41034.0]],      [[41035.0, 41036.0, 41037.0, 41038.0, 41039.0, 41040.0],       [41041.0, 41042.0, 41043.0, 41044.0, 41045.0, 41046.0],       [41047.0, 41048.0, 41049.0, 41050.0, 41051.0, 41052.0],       [41053.0, 41054.0, 41055.0, 41056.0, 41057.0, 41058.0],       [41059.0, 41060.0, 41061.0, 41062.0, 41063.0, 41064.0],       [41065.0, 41066.0, 41067.0, 41068.0, 41069.0, 41070.0],       [41071.0, 41072.0, 41073.0, 41074.0, 41075.0, 41076.0]]],     [[[41077.0, 41078.0, 41079.0, 41080.0, 41081.0, 41082.0],       [41083.0, 41084.0, 41085.0, 41086.0, 41087.0, 41088.0],       [41089.0, 41090.0, 41091.0, 41092.0, 41093.0, 41094.0],       [41095.0, 41096.0, 41097.0, 41098.0, 41099.0, 41100.0],       [41101.0, 41102.0, 41103.0, 41104.0, 41105.0, 41106.0],       [41107.0, 41108.0, 41109.0, 41110.0, 41111.0, 41112.0],       [41113.0, 41114.0, 41115.0, 41116.0, 41117.0, 41118.0]],      [[41119.0, 41120.0, 41121.0, 41122.0, 41123.0, 41124.0],       [41125.0, 41126.0, 41127.0, 41128.0, 41129.0, 41130.0],       [41131.0, 41132.0, 41133.0, 41134.0, 41135.0, 41136.0],       [41137.0, 41138.0, 41139.0, 41140.0, 41141.0, 41142.0],       [41143.0, 41144.0, 41145.0, 41146.0, 41147.0, 41148.0],       [41149.0, 41150.0, 41151.0, 41152.0, 41153.0, 41154.0],       [41155.0, 41156.0, 41157.0, 41158.0, 41159.0, 41160.0]],      [[41161.0, 41162.0, 41163.0, 41164.0, 41165.0, 41166.0],       [41167.0, 41168.0, 41169.0, 41170.0, 41171.0, 41172.0],       [41173.0, 41174.0, 41175.0, 41176.0, 41177.0, 41178.0],       [41179.0, 41180.0, 41181.0, 41182.0, 41183.0, 41184.0],       [41185.0, 41186.0, 41187.0, 41188.0, 41189.0, 41190.0],       [41191.0, 41192.0, 41193.0, 41194.0, 41195.0, 41196.0],       [41197.0, 41198.0, 41199.0, 41200.0, 41201.0, 41202.0]],      [[41203.0, 41204.0, 41205.0, 41206.0, 41207.0, 41208.0],       [41209.0, 41210.0, 41211.0, 41212.0, 41213.0, 41214.0],       [41215.0, 41216.0, 41217.0, 41218.0, 41219.0, 41220.0],       [41221.0, 41222.0, 41223.0, 41224.0, 41225.0, 41226.0],       [41227.0, 41228.0, 41229.0, 41230.0, 41231.0, 41232.0],       [41233.0, 41234.0, 41235.0, 41236.0, 41237.0, 41238.0],       [41239.0, 41240.0, 41241.0, 41242.0, 41243.0, 41244.0]],      [[41245.0, 41246.0, 41247.0, 41248.0, 41249.0, 41250.0],       [41251.0, 41252.0, 41253.0, 41254.0, 41255.0, 41256.0],       [41257.0, 41258.0, 41259.0, 41260.0, 41261.0, 41262.0],       [41263.0, 41264.0, 41265.0, 41266.0, 41267.0, 41268.0],       [41269.0, 41270.0, 41271.0, 41272.0, 41273.0, 41274.0],       [41275.0, 41276.0, 41277.0, 41278.0, 41279.0, 41280.0],       [41281.0, 41282.0, 41283.0, 41284.0, 41285.0, 41286.0]],      [[41287.0, 41288.0, 41289.0, 41290.0, 41291.0, 41292.0],       [41293.0, 41294.0, 41295.0, 41296.0, 41297.0, 41298.0],       [41299.0, 41300.0, 41301.0, 41302.0, 41303.0, 41304.0],       [41305.0, 41306.0, 41307.0, 41308.0, 41309.0, 41310.0],       [41311.0, 41312.0, 41313.0, 41314.0, 41315.0, 41316.0],       [41317.0, 41318.0, 41319.0, 41320.0, 41321.0, 41322.0],       [41323.0, 41324.0, 41325.0, 41326.0, 41327.0, 41328.0]]]],    [[[[41329.0, 41330.0, 41331.0, 41332.0, 41333.0, 41334.0],       [41335.0, 41336.0, 41337.0, 41338.0, 41339.0, 41340.0],       [41341.0, 41342.0, 41343.0, 41344.0, 41345.0, 41346.0],       [41347.0, 41348.0, 41349.0, 41350.0, 41351.0, 41352.0],       [41353.0, 41354.0, 41355.0, 41356.0, 41357.0, 41358.0],       [41359.0, 41360.0, 41361.0, 41362.0, 41363.0, 41364.0],       [41365.0, 41366.0, 41367.0, 41368.0, 41369.0, 41370.0]],      [[41371.0, 41372.0, 41373.0, 41374.0, 41375.0, 41376.0],       [41377.0, 41378.0, 41379.0, 41380.0, 41381.0, 41382.0],       [41383.0, 41384.0, 41385.0, 41386.0, 41387.0, 41388.0],       [41389.0, 41390.0, 41391.0, 41392.0, 41393.0, 41394.0],       [41395.0, 41396.0, 41397.0, 41398.0, 41399.0, 41400.0],       [41401.0, 41402.0, 41403.0, 41404.0, 41405.0, 41406.0],       [41407.0, 41408.0, 41409.0, 41410.0, 41411.0, 41412.0]],      [[41413.0, 41414.0, 41415.0, 41416.0, 41417.0, 41418.0],       [41419.0, 41420.0, 41421.0, 41422.0, 41423.0, 41424.0],       [41425.0, 41426.0, 41427.0, 41428.0, 41429.0, 41430.0],       [41431.0, 41432.0, 41433.0, 41434.0, 41435.0, 41436.0],       [41437.0, 41438.0, 41439.0, 41440.0, 41441.0, 41442.0],       [41443.0, 41444.0, 41445.0, 41446.0, 41447.0, 41448.0],       [41449.0, 41450.0, 41451.0, 41452.0, 41453.0, 41454.0]],      [[41455.0, 41456.0, 41457.0, 41458.0, 41459.0, 41460.0],       [41461.0, 41462.0, 41463.0, 41464.0, 41465.0, 41466.0],       [41467.0, 41468.0, 41469.0, 41470.0, 41471.0, 41472.0],       [41473.0, 41474.0, 41475.0, 41476.0, 41477.0, 41478.0],       [41479.0, 41480.0, 41481.0, 41482.0, 41483.0, 41484.0],       [41485.0, 41486.0, 41487.0, 41488.0, 41489.0, 41490.0],       [41491.0, 41492.0, 41493.0, 41494.0, 41495.0, 41496.0]],      [[41497.0, 41498.0, 41499.0, 41500.0, 41501.0, 41502.0],       [41503.0, 41504.0, 41505.0, 41506.0, 41507.0, 41508.0],       [41509.0, 41510.0, 41511.0, 41512.0, 41513.0, 41514.0],       [41515.0, 41516.0, 41517.0, 41518.0, 41519.0, 41520.0],       [41521.0, 41522.0, 41523.0, 41524.0, 41525.0, 41526.0],       [41527.0, 41528.0, 41529.0, 41530.0, 41531.0, 41532.0],       [41533.0, 41534.0, 41535.0, 41536.0, 41537.0, 41538.0]],      [[41539.0, 41540.0, 41541.0, 41542.0, 41543.0, 41544.0],       [41545.0, 41546.0, 41547.0, 41548.0, 41549.0, 41550.0],       [41551.0, 41552.0, 41553.0, 41554.0, 41555.0, 41556.0],       [41557.0, 41558.0, 41559.0, 41560.0, 41561.0, 41562.0],       [41563.0, 41564.0, 41565.0, 41566.0, 41567.0, 41568.0],       [41569.0, 41570.0, 41571.0, 41572.0, 41573.0, 41574.0],       [41575.0, 41576.0, 41577.0, 41578.0, 41579.0, 41580.0]]],     [[[41581.0, 41582.0, 41583.0, 41584.0, 41585.0, 41586.0],       [41587.0, 41588.0, 41589.0, 41590.0, 41591.0, 41592.0],       [41593.0, 41594.0, 41595.0, 41596.0, 41597.0, 41598.0],       [41599.0, 41600.0, 41601.0, 41602.0, 41603.0, 41604.0],       [41605.0, 41606.0, 41607.0, 41608.0, 41609.0, 41610.0],       [41611.0, 41612.0, 41613.0, 41614.0, 41615.0, 41616.0],       [41617.0, 41618.0, 41619.0, 41620.0, 41621.0, 41622.0]],      [[41623.0, 41624.0, 41625.0, 41626.0, 41627.0, 41628.0],       [41629.0, 41630.0, 41631.0, 41632.0, 41633.0, 41634.0],       [41635.0, 41636.0, 41637.0, 41638.0, 41639.0, 41640.0],       [41641.0, 41642.0, 41643.0, 41644.0, 41645.0, 41646.0],       [41647.0, 41648.0, 41649.0, 41650.0, 41651.0, 41652.0],       [41653.0, 41654.0, 41655.0, 41656.0, 41657.0, 41658.0],       [41659.0, 41660.0, 41661.0, 41662.0, 41663.0, 41664.0]],      [[41665.0, 41666.0, 41667.0, 41668.0, 41669.0, 41670.0],       [41671.0, 41672.0, 41673.0, 41674.0, 41675.0, 41676.0],       [41677.0, 41678.0, 41679.0, 41680.0, 41681.0, 41682.0],       [41683.0, 41684.0, 41685.0, 41686.0, 41687.0, 41688.0],       [41689.0, 41690.0, 41691.0, 41692.0, 41693.0, 41694.0],       [41695.0, 41696.0, 41697.0, 41698.0, 41699.0, 41700.0],       [41701.0, 41702.0, 41703.0, 41704.0, 41705.0, 41706.0]],      [[41707.0, 41708.0, 41709.0, 41710.0, 41711.0, 41712.0],       [41713.0, 41714.0, 41715.0, 41716.0, 41717.0, 41718.0],       [41719.0, 41720.0, 41721.0, 41722.0, 41723.0, 41724.0],       [41725.0, 41726.0, 41727.0, 41728.0, 41729.0, 41730.0],       [41731.0, 41732.0, 41733.0, 41734.0, 41735.0, 41736.0],       [41737.0, 41738.0, 41739.0, 41740.0, 41741.0, 41742.0],       [41743.0, 41744.0, 41745.0, 41746.0, 41747.0, 41748.0]],      [[41749.0, 41750.0, 41751.0, 41752.0, 41753.0, 41754.0],       [41755.0, 41756.0, 41757.0, 41758.0, 41759.0, 41760.0],       [41761.0, 41762.0, 41763.0, 41764.0, 41765.0, 41766.0],       [41767.0, 41768.0, 41769.0, 41770.0, 41771.0, 41772.0],       [41773.0, 41774.0, 41775.0, 41776.0, 41777.0, 41778.0],       [41779.0, 41780.0, 41781.0, 41782.0, 41783.0, 41784.0],       [41785.0, 41786.0, 41787.0, 41788.0, 41789.0, 41790.0]],      [[41791.0, 41792.0, 41793.0, 41794.0, 41795.0, 41796.0],       [41797.0, 41798.0, 41799.0, 41800.0, 41801.0, 41802.0],       [41803.0, 41804.0, 41805.0, 41806.0, 41807.0, 41808.0],       [41809.0, 41810.0, 41811.0, 41812.0, 41813.0, 41814.0],       [41815.0, 41816.0, 41817.0, 41818.0, 41819.0, 41820.0],       [41821.0, 41822.0, 41823.0, 41824.0, 41825.0, 41826.0],       [41827.0, 41828.0, 41829.0, 41830.0, 41831.0, 41832.0]]],     [[[41833.0, 41834.0, 41835.0, 41836.0, 41837.0, 41838.0],       [41839.0, 41840.0, 41841.0, 41842.0, 41843.0, 41844.0],       [41845.0, 41846.0, 41847.0, 41848.0, 41849.0, 41850.0],       [41851.0, 41852.0, 41853.0, 41854.0, 41855.0, 41856.0],       [41857.0, 41858.0, 41859.0, 41860.0, 41861.0, 41862.0],       [41863.0, 41864.0, 41865.0, 41866.0, 41867.0, 41868.0],       [41869.0, 41870.0, 41871.0, 41872.0, 41873.0, 41874.0]],      [[41875.0, 41876.0, 41877.0, 41878.0, 41879.0, 41880.0],       [41881.0, 41882.0, 41883.0, 41884.0, 41885.0, 41886.0],       [41887.0, 41888.0, 41889.0, 41890.0, 41891.0, 41892.0],       [41893.0, 41894.0, 41895.0, 41896.0, 41897.0, 41898.0],       [41899.0, 41900.0, 41901.0, 41902.0, 41903.0, 41904.0],       [41905.0, 41906.0, 41907.0, 41908.0, 41909.0, 41910.0],       [41911.0, 41912.0, 41913.0, 41914.0, 41915.0, 41916.0]],      [[41917.0, 41918.0, 41919.0, 41920.0, 41921.0, 41922.0],       [41923.0, 41924.0, 41925.0, 41926.0, 41927.0, 41928.0],       [41929.0, 41930.0, 41931.0, 41932.0, 41933.0, 41934.0],       [41935.0, 41936.0, 41937.0, 41938.0, 41939.0, 41940.0],       [41941.0, 41942.0, 41943.0, 41944.0, 41945.0, 41946.0],       [41947.0, 41948.0, 41949.0, 41950.0, 41951.0, 41952.0],       [41953.0, 41954.0, 41955.0, 41956.0, 41957.0, 41958.0]],      [[41959.0, 41960.0, 41961.0, 41962.0, 41963.0, 41964.0],       [41965.0, 41966.0, 41967.0, 41968.0, 41969.0, 41970.0],       [41971.0, 41972.0, 41973.0, 41974.0, 41975.0, 41976.0],       [41977.0, 41978.0, 41979.0, 41980.0, 41981.0, 41982.0],       [41983.0, 41984.0, 41985.0, 41986.0, 41987.0, 41988.0],       [41989.0, 41990.0, 41991.0, 41992.0, 41993.0, 41994.0],       [41995.0, 41996.0, 41997.0, 41998.0, 41999.0, 42000.0]],      [[42001.0, 42002.0, 42003.0, 42004.0, 42005.0, 42006.0],       [42007.0, 42008.0, 42009.0, 42010.0, 42011.0, 42012.0],       [42013.0, 42014.0, 42015.0, 42016.0, 42017.0, 42018.0],       [42019.0, 42020.0, 42021.0, 42022.0, 42023.0, 42024.0],       [42025.0, 42026.0, 42027.0, 42028.0, 42029.0, 42030.0],       [42031.0, 42032.0, 42033.0, 42034.0, 42035.0, 42036.0],       [42037.0, 42038.0, 42039.0, 42040.0, 42041.0, 42042.0]],      [[42043.0, 42044.0, 42045.0, 42046.0, 42047.0, 42048.0],       [42049.0, 42050.0, 42051.0, 42052.0, 42053.0, 42054.0],       [42055.0, 42056.0, 42057.0, 42058.0, 42059.0, 42060.0],       [42061.0, 42062.0, 42063.0, 42064.0, 42065.0, 42066.0],       [42067.0, 42068.0, 42069.0, 42070.0, 42071.0, 42072.0],       [42073.0, 42074.0, 42075.0, 42076.0, 42077.0, 42078.0],       [42079.0, 42080.0, 42081.0, 42082.0, 42083.0, 42084.0]]],     [[[42085.0, 42086.0, 42087.0, 42088.0, 42089.0, 42090.0],       [42091.0, 42092.0, 42093.0, 42094.0, 42095.0, 42096.0],       [42097.0, 42098.0, 42099.0, 42100.0, 42101.0, 42102.0],       [42103.0, 42104.0, 42105.0, 42106.0, 42107.0, 42108.0],       [42109.0, 42110.0, 42111.0, 42112.0, 42113.0, 42114.0],       [42115.0, 42116.0, 42117.0, 42118.0, 42119.0, 42120.0],       [42121.0, 42122.0, 42123.0, 42124.0, 42125.0, 42126.0]],      [[42127.0, 42128.0, 42129.0, 42130.0, 42131.0, 42132.0],       [42133.0, 42134.0, 42135.0, 42136.0, 42137.0, 42138.0],       [42139.0, 42140.0, 42141.0, 42142.0, 42143.0, 42144.0],       [42145.0, 42146.0, 42147.0, 42148.0, 42149.0, 42150.0],       [42151.0, 42152.0, 42153.0, 42154.0, 42155.0, 42156.0],       [42157.0, 42158.0, 42159.0, 42160.0, 42161.0, 42162.0],       [42163.0, 42164.0, 42165.0, 42166.0, 42167.0, 42168.0]],      [[42169.0, 42170.0, 42171.0, 42172.0, 42173.0, 42174.0],       [42175.0, 42176.0, 42177.0, 42178.0, 42179.0, 42180.0],       [42181.0, 42182.0, 42183.0, 42184.0, 42185.0, 42186.0],       [42187.0, 42188.0, 42189.0, 42190.0, 42191.0, 42192.0],       [42193.0, 42194.0, 42195.0, 42196.0, 42197.0, 42198.0],       [42199.0, 42200.0, 42201.0, 42202.0, 42203.0, 42204.0],       [42205.0, 42206.0, 42207.0, 42208.0, 42209.0, 42210.0]],      [[42211.0, 42212.0, 42213.0, 42214.0, 42215.0, 42216.0],       [42217.0, 42218.0, 42219.0, 42220.0, 42221.0, 42222.0],       [42223.0, 42224.0, 42225.0, 42226.0, 42227.0, 42228.0],       [42229.0, 42230.0, 42231.0, 42232.0, 42233.0, 42234.0],       [42235.0, 42236.0, 42237.0, 42238.0, 42239.0, 42240.0],       [42241.0, 42242.0, 42243.0, 42244.0, 42245.0, 42246.0],       [42247.0, 42248.0, 42249.0, 42250.0, 42251.0, 42252.0]],      [[42253.0, 42254.0, 42255.0, 42256.0, 42257.0, 42258.0],       [42259.0, 42260.0, 42261.0, 42262.0, 42263.0, 42264.0],       [42265.0, 42266.0, 42267.0, 42268.0, 42269.0, 42270.0],       [42271.0, 42272.0, 42273.0, 42274.0, 42275.0, 42276.0],       [42277.0, 42278.0, 42279.0, 42280.0, 42281.0, 42282.0],       [42283.0, 42284.0, 42285.0, 42286.0, 42287.0, 42288.0],       [42289.0, 42290.0, 42291.0, 42292.0, 42293.0, 42294.0]],      [[42295.0, 42296.0, 42297.0, 42298.0, 42299.0, 42300.0],       [42301.0, 42302.0, 42303.0, 42304.0, 42305.0, 42306.0],       [42307.0, 42308.0, 42309.0, 42310.0, 42311.0, 42312.0],       [42313.0, 42314.0, 42315.0, 42316.0, 42317.0, 42318.0],       [42319.0, 42320.0, 42321.0, 42322.0, 42323.0, 42324.0],       [42325.0, 42326.0, 42327.0, 42328.0, 42329.0, 42330.0],       [42331.0, 42332.0, 42333.0, 42334.0, 42335.0, 42336.0]]]]],   [[[[[42337.0, 42338.0, 42339.0, 42340.0, 42341.0, 42342.0],       [42343.0, 42344.0, 42345.0, 42346.0, 42347.0, 42348.0],       [42349.0, 42350.0, 42351.0, 42352.0, 42353.0, 42354.0],       [42355.0, 42356.0, 42357.0, 42358.0, 42359.0, 42360.0],       [42361.0, 42362.0, 42363.0, 42364.0, 42365.0, 42366.0],       [42367.0, 42368.0, 42369.0, 42370.0, 42371.0, 42372.0],       [42373.0, 42374.0, 42375.0, 42376.0, 42377.0, 42378.0]],      [[42379.0, 42380.0, 42381.0, 42382.0, 42383.0, 42384.0],       [42385.0, 42386.0, 42387.0, 42388.0, 42389.0, 42390.0],       [42391.0, 42392.0, 42393.0, 42394.0, 42395.0, 42396.0],       [42397.0, 42398.0, 42399.0, 42400.0, 42401.0, 42402.0],       [42403.0, 42404.0, 42405.0, 42406.0, 42407.0, 42408.0],       [42409.0, 42410.0, 42411.0, 42412.0, 42413.0, 42414.0],       [42415.0, 42416.0, 42417.0, 42418.0, 42419.0, 42420.0]],      [[42421.0, 42422.0, 42423.0, 42424.0, 42425.0, 42426.0],       [42427.0, 42428.0, 42429.0, 42430.0, 42431.0, 42432.0],       [42433.0, 42434.0, 42435.0, 42436.0, 42437.0, 42438.0],       [42439.0, 42440.0, 42441.0, 42442.0, 42443.0, 42444.0],       [42445.0, 42446.0, 42447.0, 42448.0, 42449.0, 42450.0],       [42451.0, 42452.0, 42453.0, 42454.0, 42455.0, 42456.0],       [42457.0, 42458.0, 42459.0, 42460.0, 42461.0, 42462.0]],      [[42463.0, 42464.0, 42465.0, 42466.0, 42467.0, 42468.0],       [42469.0, 42470.0, 42471.0, 42472.0, 42473.0, 42474.0],       [42475.0, 42476.0, 42477.0, 42478.0, 42479.0, 42480.0],       [42481.0, 42482.0, 42483.0, 42484.0, 42485.0, 42486.0],       [42487.0, 42488.0, 42489.0, 42490.0, 42491.0, 42492.0],       [42493.0, 42494.0, 42495.0, 42496.0, 42497.0, 42498.0],       [42499.0, 42500.0, 42501.0, 42502.0, 42503.0, 42504.0]],      [[42505.0, 42506.0, 42507.0, 42508.0, 42509.0, 42510.0],       [42511.0, 42512.0, 42513.0, 42514.0, 42515.0, 42516.0],       [42517.0, 42518.0, 42519.0, 42520.0, 42521.0, 42522.0],       [42523.0, 42524.0, 42525.0, 42526.0, 42527.0, 42528.0],       [42529.0, 42530.0, 42531.0, 42532.0, 42533.0, 42534.0],       [42535.0, 42536.0, 42537.0, 42538.0, 42539.0, 42540.0],       [42541.0, 42542.0, 42543.0, 42544.0, 42545.0, 42546.0]],      [[42547.0, 42548.0, 42549.0, 42550.0, 42551.0, 42552.0],       [42553.0, 42554.0, 42555.0, 42556.0, 42557.0, 42558.0],       [42559.0, 42560.0, 42561.0, 42562.0, 42563.0, 42564.0],       [42565.0, 42566.0, 42567.0, 42568.0, 42569.0, 42570.0],       [42571.0, 42572.0, 42573.0, 42574.0, 42575.0, 42576.0],       [42577.0, 42578.0, 42579.0, 42580.0, 42581.0, 42582.0],       [42583.0, 42584.0, 42585.0, 42586.0, 42587.0, 42588.0]]],     [[[42589.0, 42590.0, 42591.0, 42592.0, 42593.0, 42594.0],       [42595.0, 42596.0, 42597.0, 42598.0, 42599.0, 42600.0],       [42601.0, 42602.0, 42603.0, 42604.0, 42605.0, 42606.0],       [42607.0, 42608.0, 42609.0, 42610.0, 42611.0, 42612.0],       [42613.0, 42614.0, 42615.0, 42616.0, 42617.0, 42618.0],       [42619.0, 42620.0, 42621.0, 42622.0, 42623.0, 42624.0],       [42625.0, 42626.0, 42627.0, 42628.0, 42629.0, 42630.0]],      [[42631.0, 42632.0, 42633.0, 42634.0, 42635.0, 42636.0],       [42637.0, 42638.0, 42639.0, 42640.0, 42641.0, 42642.0],       [42643.0, 42644.0, 42645.0, 42646.0, 42647.0, 42648.0],       [42649.0, 42650.0, 42651.0, 42652.0, 42653.0, 42654.0],       [42655.0, 42656.0, 42657.0, 42658.0, 42659.0, 42660.0],       [42661.0, 42662.0, 42663.0, 42664.0, 42665.0, 42666.0],       [42667.0, 42668.0, 42669.0, 42670.0, 42671.0, 42672.0]],      [[42673.0, 42674.0, 42675.0, 42676.0, 42677.0, 42678.0],       [42679.0, 42680.0, 42681.0, 42682.0, 42683.0, 42684.0],       [42685.0, 42686.0, 42687.0, 42688.0, 42689.0, 42690.0],       [42691.0, 42692.0, 42693.0, 42694.0, 42695.0, 42696.0],       [42697.0, 42698.0, 42699.0, 42700.0, 42701.0, 42702.0],       [42703.0, 42704.0, 42705.0, 42706.0, 42707.0, 42708.0],       [42709.0, 42710.0, 42711.0, 42712.0, 42713.0, 42714.0]],      [[42715.0, 42716.0, 42717.0, 42718.0, 42719.0, 42720.0],       [42721.0, 42722.0, 42723.0, 42724.0, 42725.0, 42726.0],       [42727.0, 42728.0, 42729.0, 42730.0, 42731.0, 42732.0],       [42733.0, 42734.0, 42735.0, 42736.0, 42737.0, 42738.0],       [42739.0, 42740.0, 42741.0, 42742.0, 42743.0, 42744.0],       [42745.0, 42746.0, 42747.0, 42748.0, 42749.0, 42750.0],       [42751.0, 42752.0, 42753.0, 42754.0, 42755.0, 42756.0]],      [[42757.0, 42758.0, 42759.0, 42760.0, 42761.0, 42762.0],       [42763.0, 42764.0, 42765.0, 42766.0, 42767.0, 42768.0],       [42769.0, 42770.0, 42771.0, 42772.0, 42773.0, 42774.0],       [42775.0, 42776.0, 42777.0, 42778.0, 42779.0, 42780.0],       [42781.0, 42782.0, 42783.0, 42784.0, 42785.0, 42786.0],       [42787.0, 42788.0, 42789.0, 42790.0, 42791.0, 42792.0],       [42793.0, 42794.0, 42795.0, 42796.0, 42797.0, 42798.0]],      [[42799.0, 42800.0, 42801.0, 42802.0, 42803.0, 42804.0],       [42805.0, 42806.0, 42807.0, 42808.0, 42809.0, 42810.0],       [42811.0, 42812.0, 42813.0, 42814.0, 42815.0, 42816.0],       [42817.0, 42818.0, 42819.0, 42820.0, 42821.0, 42822.0],       [42823.0, 42824.0, 42825.0, 42826.0, 42827.0, 42828.0],       [42829.0, 42830.0, 42831.0, 42832.0, 42833.0, 42834.0],       [42835.0, 42836.0, 42837.0, 42838.0, 42839.0, 42840.0]]],     [[[42841.0, 42842.0, 42843.0, 42844.0, 42845.0, 42846.0],       [42847.0, 42848.0, 42849.0, 42850.0, 42851.0, 42852.0],       [42853.0, 42854.0, 42855.0, 42856.0, 42857.0, 42858.0],       [42859.0, 42860.0, 42861.0, 42862.0, 42863.0, 42864.0],       [42865.0, 42866.0, 42867.0, 42868.0, 42869.0, 42870.0],       [42871.0, 42872.0, 42873.0, 42874.0, 42875.0, 42876.0],       [42877.0, 42878.0, 42879.0, 42880.0, 42881.0, 42882.0]],      [[42883.0, 42884.0, 42885.0, 42886.0, 42887.0, 42888.0],       [42889.0, 42890.0, 42891.0, 42892.0, 42893.0, 42894.0],       [42895.0, 42896.0, 42897.0, 42898.0, 42899.0, 42900.0],       [42901.0, 42902.0, 42903.0, 42904.0, 42905.0, 42906.0],       [42907.0, 42908.0, 42909.0, 42910.0, 42911.0, 42912.0],       [42913.0, 42914.0, 42915.0, 42916.0, 42917.0, 42918.0],       [42919.0, 42920.0, 42921.0, 42922.0, 42923.0, 42924.0]],      [[42925.0, 42926.0, 42927.0, 42928.0, 42929.0, 42930.0],       [42931.0, 42932.0, 42933.0, 42934.0, 42935.0, 42936.0],       [42937.0, 42938.0, 42939.0, 42940.0, 42941.0, 42942.0],       [42943.0, 42944.0, 42945.0, 42946.0, 42947.0, 42948.0],       [42949.0, 42950.0, 42951.0, 42952.0, 42953.0, 42954.0],       [42955.0, 42956.0, 42957.0, 42958.0, 42959.0, 42960.0],       [42961.0, 42962.0, 42963.0, 42964.0, 42965.0, 42966.0]],      [[42967.0, 42968.0, 42969.0, 42970.0, 42971.0, 42972.0],       [42973.0, 42974.0, 42975.0, 42976.0, 42977.0, 42978.0],       [42979.0, 42980.0, 42981.0, 42982.0, 42983.0, 42984.0],       [42985.0, 42986.0, 42987.0, 42988.0, 42989.0, 42990.0],       [42991.0, 42992.0, 42993.0, 42994.0, 42995.0, 42996.0],       [42997.0, 42998.0, 42999.0, 43000.0, 43001.0, 43002.0],       [43003.0, 43004.0, 43005.0, 43006.0, 43007.0, 43008.0]],      [[43009.0, 43010.0, 43011.0, 43012.0, 43013.0, 43014.0],       [43015.0, 43016.0, 43017.0, 43018.0, 43019.0, 43020.0],       [43021.0, 43022.0, 43023.0, 43024.0, 43025.0, 43026.0],       [43027.0, 43028.0, 43029.0, 43030.0, 43031.0, 43032.0],       [43033.0, 43034.0, 43035.0, 43036.0, 43037.0, 43038.0],       [43039.0, 43040.0, 43041.0, 43042.0, 43043.0, 43044.0],       [43045.0, 43046.0, 43047.0, 43048.0, 43049.0, 43050.0]],      [[43051.0, 43052.0, 43053.0, 43054.0, 43055.0, 43056.0],       [43057.0, 43058.0, 43059.0, 43060.0, 43061.0, 43062.0],       [43063.0, 43064.0, 43065.0, 43066.0, 43067.0, 43068.0],       [43069.0, 43070.0, 43071.0, 43072.0, 43073.0, 43074.0],       [43075.0, 43076.0, 43077.0, 43078.0, 43079.0, 43080.0],       [43081.0, 43082.0, 43083.0, 43084.0, 43085.0, 43086.0],       [43087.0, 43088.0, 43089.0, 43090.0, 43091.0, 43092.0]]],     [[[43093.0, 43094.0, 43095.0, 43096.0, 43097.0, 43098.0],       [43099.0, 43100.0, 43101.0, 43102.0, 43103.0, 43104.0],       [43105.0, 43106.0, 43107.0, 43108.0, 43109.0, 43110.0],       [43111.0, 43112.0, 43113.0, 43114.0, 43115.0, 43116.0],       [43117.0, 43118.0, 43119.0, 43120.0, 43121.0, 43122.0],       [43123.0, 43124.0, 43125.0, 43126.0, 43127.0, 43128.0],       [43129.0, 43130.0, 43131.0, 43132.0, 43133.0, 43134.0]],      [[43135.0, 43136.0, 43137.0, 43138.0, 43139.0, 43140.0],       [43141.0, 43142.0, 43143.0, 43144.0, 43145.0, 43146.0],       [43147.0, 43148.0, 43149.0, 43150.0, 43151.0, 43152.0],       [43153.0, 43154.0, 43155.0, 43156.0, 43157.0, 43158.0],       [43159.0, 43160.0, 43161.0, 43162.0, 43163.0, 43164.0],       [43165.0, 43166.0, 43167.0, 43168.0, 43169.0, 43170.0],       [43171.0, 43172.0, 43173.0, 43174.0, 43175.0, 43176.0]],      [[43177.0, 43178.0, 43179.0, 43180.0, 43181.0, 43182.0],       [43183.0, 43184.0, 43185.0, 43186.0, 43187.0, 43188.0],       [43189.0, 43190.0, 43191.0, 43192.0, 43193.0, 43194.0],       [43195.0, 43196.0, 43197.0, 43198.0, 43199.0, 43200.0],       [43201.0, 43202.0, 43203.0, 43204.0, 43205.0, 43206.0],       [43207.0, 43208.0, 43209.0, 43210.0, 43211.0, 43212.0],       [43213.0, 43214.0, 43215.0, 43216.0, 43217.0, 43218.0]],      [[43219.0, 43220.0, 43221.0, 43222.0, 43223.0, 43224.0],       [43225.0, 43226.0, 43227.0, 43228.0, 43229.0, 43230.0],       [43231.0, 43232.0, 43233.0, 43234.0, 43235.0, 43236.0],       [43237.0, 43238.0, 43239.0, 43240.0, 43241.0, 43242.0],       [43243.0, 43244.0, 43245.0, 43246.0, 43247.0, 43248.0],       [43249.0, 43250.0, 43251.0, 43252.0, 43253.0, 43254.0],       [43255.0, 43256.0, 43257.0, 43258.0, 43259.0, 43260.0]],      [[43261.0, 43262.0, 43263.0, 43264.0, 43265.0, 43266.0],       [43267.0, 43268.0, 43269.0, 43270.0, 43271.0, 43272.0],       [43273.0, 43274.0, 43275.0, 43276.0, 43277.0, 43278.0],       [43279.0, 43280.0, 43281.0, 43282.0, 43283.0, 43284.0],       [43285.0, 43286.0, 43287.0, 43288.0, 43289.0, 43290.0],       [43291.0, 43292.0, 43293.0, 43294.0, 43295.0, 43296.0],       [43297.0, 43298.0, 43299.0, 43300.0, 43301.0, 43302.0]],      [[43303.0, 43304.0, 43305.0, 43306.0, 43307.0, 43308.0],       [43309.0, 43310.0, 43311.0, 43312.0, 43313.0, 43314.0],       [43315.0, 43316.0, 43317.0, 43318.0, 43319.0, 43320.0],       [43321.0, 43322.0, 43323.0, 43324.0, 43325.0, 43326.0],       [43327.0, 43328.0, 43329.0, 43330.0, 43331.0, 43332.0],       [43333.0, 43334.0, 43335.0, 43336.0, 43337.0, 43338.0],       [43339.0, 43340.0, 43341.0, 43342.0, 43343.0, 43344.0]]]],    [[[[43345.0, 43346.0, 43347.0, 43348.0, 43349.0, 43350.0],       [43351.0, 43352.0, 43353.0, 43354.0, 43355.0, 43356.0],       [43357.0, 43358.0, 43359.0, 43360.0, 43361.0, 43362.0],       [43363.0, 43364.0, 43365.0, 43366.0, 43367.0, 43368.0],       [43369.0, 43370.0, 43371.0, 43372.0, 43373.0, 43374.0],       [43375.0, 43376.0, 43377.0, 43378.0, 43379.0, 43380.0],       [43381.0, 43382.0, 43383.0, 43384.0, 43385.0, 43386.0]],      [[43387.0, 43388.0, 43389.0, 43390.0, 43391.0, 43392.0],       [43393.0, 43394.0, 43395.0, 43396.0, 43397.0, 43398.0],       [43399.0, 43400.0, 43401.0, 43402.0, 43403.0, 43404.0],       [43405.0, 43406.0, 43407.0, 43408.0, 43409.0, 43410.0],       [43411.0, 43412.0, 43413.0, 43414.0, 43415.0, 43416.0],       [43417.0, 43418.0, 43419.0, 43420.0, 43421.0, 43422.0],       [43423.0, 43424.0, 43425.0, 43426.0, 43427.0, 43428.0]],      [[43429.0, 43430.0, 43431.0, 43432.0, 43433.0, 43434.0],       [43435.0, 43436.0, 43437.0, 43438.0, 43439.0, 43440.0],       [43441.0, 43442.0, 43443.0, 43444.0, 43445.0, 43446.0],       [43447.0, 43448.0, 43449.0, 43450.0, 43451.0, 43452.0],       [43453.0, 43454.0, 43455.0, 43456.0, 43457.0, 43458.0],       [43459.0, 43460.0, 43461.0, 43462.0, 43463.0, 43464.0],       [43465.0, 43466.0, 43467.0, 43468.0, 43469.0, 43470.0]],      [[43471.0, 43472.0, 43473.0, 43474.0, 43475.0, 43476.0],       [43477.0, 43478.0, 43479.0, 43480.0, 43481.0, 43482.0],       [43483.0, 43484.0, 43485.0, 43486.0, 43487.0, 43488.0],       [43489.0, 43490.0, 43491.0, 43492.0, 43493.0, 43494.0],       [43495.0, 43496.0, 43497.0, 43498.0, 43499.0, 43500.0],       [43501.0, 43502.0, 43503.0, 43504.0, 43505.0, 43506.0],       [43507.0, 43508.0, 43509.0, 43510.0, 43511.0, 43512.0]],      [[43513.0, 43514.0, 43515.0, 43516.0, 43517.0, 43518.0],       [43519.0, 43520.0, 43521.0, 43522.0, 43523.0, 43524.0],       [43525.0, 43526.0, 43527.0, 43528.0, 43529.0, 43530.0],       [43531.0, 43532.0, 43533.0, 43534.0, 43535.0, 43536.0],       [43537.0, 43538.0, 43539.0, 43540.0, 43541.0, 43542.0],       [43543.0, 43544.0, 43545.0, 43546.0, 43547.0, 43548.0],       [43549.0, 43550.0, 43551.0, 43552.0, 43553.0, 43554.0]],      [[43555.0, 43556.0, 43557.0, 43558.0, 43559.0, 43560.0],       [43561.0, 43562.0, 43563.0, 43564.0, 43565.0, 43566.0],       [43567.0, 43568.0, 43569.0, 43570.0, 43571.0, 43572.0],       [43573.0, 43574.0, 43575.0, 43576.0, 43577.0, 43578.0],       [43579.0, 43580.0, 43581.0, 43582.0, 43583.0, 43584.0],       [43585.0, 43586.0, 43587.0, 43588.0, 43589.0, 43590.0],       [43591.0, 43592.0, 43593.0, 43594.0, 43595.0, 43596.0]]],     [[[43597.0, 43598.0, 43599.0, 43600.0, 43601.0, 43602.0],       [43603.0, 43604.0, 43605.0, 43606.0, 43607.0, 43608.0],       [43609.0, 43610.0, 43611.0, 43612.0, 43613.0, 43614.0],       [43615.0, 43616.0, 43617.0, 43618.0, 43619.0, 43620.0],       [43621.0, 43622.0, 43623.0, 43624.0, 43625.0, 43626.0],       [43627.0, 43628.0, 43629.0, 43630.0, 43631.0, 43632.0],       [43633.0, 43634.0, 43635.0, 43636.0, 43637.0, 43638.0]],      [[43639.0, 43640.0, 43641.0, 43642.0, 43643.0, 43644.0],       [43645.0, 43646.0, 43647.0, 43648.0, 43649.0, 43650.0],       [43651.0, 43652.0, 43653.0, 43654.0, 43655.0, 43656.0],       [43657.0, 43658.0, 43659.0, 43660.0, 43661.0, 43662.0],       [43663.0, 43664.0, 43665.0, 43666.0, 43667.0, 43668.0],       [43669.0, 43670.0, 43671.0, 43672.0, 43673.0, 43674.0],       [43675.0, 43676.0, 43677.0, 43678.0, 43679.0, 43680.0]],      [[43681.0, 43682.0, 43683.0, 43684.0, 43685.0, 43686.0],       [43687.0, 43688.0, 43689.0, 43690.0, 43691.0, 43692.0],       [43693.0, 43694.0, 43695.0, 43696.0, 43697.0, 43698.0],       [43699.0, 43700.0, 43701.0, 43702.0, 43703.0, 43704.0],       [43705.0, 43706.0, 43707.0, 43708.0, 43709.0, 43710.0],       [43711.0, 43712.0, 43713.0, 43714.0, 43715.0, 43716.0],       [43717.0, 43718.0, 43719.0, 43720.0, 43721.0, 43722.0]],      [[43723.0, 43724.0, 43725.0, 43726.0, 43727.0, 43728.0],       [43729.0, 43730.0, 43731.0, 43732.0, 43733.0, 43734.0],       [43735.0, 43736.0, 43737.0, 43738.0, 43739.0, 43740.0],       [43741.0, 43742.0, 43743.0, 43744.0, 43745.0, 43746.0],       [43747.0, 43748.0, 43749.0, 43750.0, 43751.0, 43752.0],       [43753.0, 43754.0, 43755.0, 43756.0, 43757.0, 43758.0],       [43759.0, 43760.0, 43761.0, 43762.0, 43763.0, 43764.0]],      [[43765.0, 43766.0, 43767.0, 43768.0, 43769.0, 43770.0],       [43771.0, 43772.0, 43773.0, 43774.0, 43775.0, 43776.0],       [43777.0, 43778.0, 43779.0, 43780.0, 43781.0, 43782.0],       [43783.0, 43784.0, 43785.0, 43786.0, 43787.0, 43788.0],       [43789.0, 43790.0, 43791.0, 43792.0, 43793.0, 43794.0],       [43795.0, 43796.0, 43797.0, 43798.0, 43799.0, 43800.0],       [43801.0, 43802.0, 43803.0, 43804.0, 43805.0, 43806.0]],      [[43807.0, 43808.0, 43809.0, 43810.0, 43811.0, 43812.0],       [43813.0, 43814.0, 43815.0, 43816.0, 43817.0, 43818.0],       [43819.0, 43820.0, 43821.0, 43822.0, 43823.0, 43824.0],       [43825.0, 43826.0, 43827.0, 43828.0, 43829.0, 43830.0],       [43831.0, 43832.0, 43833.0, 43834.0, 43835.0, 43836.0],       [43837.0, 43838.0, 43839.0, 43840.0, 43841.0, 43842.0],       [43843.0, 43844.0, 43845.0, 43846.0, 43847.0, 43848.0]]],     [[[43849.0, 43850.0, 43851.0, 43852.0, 43853.0, 43854.0],       [43855.0, 43856.0, 43857.0, 43858.0, 43859.0, 43860.0],       [43861.0, 43862.0, 43863.0, 43864.0, 43865.0, 43866.0],       [43867.0, 43868.0, 43869.0, 43870.0, 43871.0, 43872.0],       [43873.0, 43874.0, 43875.0, 43876.0, 43877.0, 43878.0],       [43879.0, 43880.0, 43881.0, 43882.0, 43883.0, 43884.0],       [43885.0, 43886.0, 43887.0, 43888.0, 43889.0, 43890.0]],      [[43891.0, 43892.0, 43893.0, 43894.0, 43895.0, 43896.0],       [43897.0, 43898.0, 43899.0, 43900.0, 43901.0, 43902.0],       [43903.0, 43904.0, 43905.0, 43906.0, 43907.0, 43908.0],       [43909.0, 43910.0, 43911.0, 43912.0, 43913.0, 43914.0],       [43915.0, 43916.0, 43917.0, 43918.0, 43919.0, 43920.0],       [43921.0, 43922.0, 43923.0, 43924.0, 43925.0, 43926.0],       [43927.0, 43928.0, 43929.0, 43930.0, 43931.0, 43932.0]],      [[43933.0, 43934.0, 43935.0, 43936.0, 43937.0, 43938.0],       [43939.0, 43940.0, 43941.0, 43942.0, 43943.0, 43944.0],       [43945.0, 43946.0, 43947.0, 43948.0, 43949.0, 43950.0],       [43951.0, 43952.0, 43953.0, 43954.0, 43955.0, 43956.0],       [43957.0, 43958.0, 43959.0, 43960.0, 43961.0, 43962.0],       [43963.0, 43964.0, 43965.0, 43966.0, 43967.0, 43968.0],       [43969.0, 43970.0, 43971.0, 43972.0, 43973.0, 43974.0]],      [[43975.0, 43976.0, 43977.0, 43978.0, 43979.0, 43980.0],       [43981.0, 43982.0, 43983.0, 43984.0, 43985.0, 43986.0],       [43987.0, 43988.0, 43989.0, 43990.0, 43991.0, 43992.0],       [43993.0, 43994.0, 43995.0, 43996.0, 43997.0, 43998.0],       [43999.0, 44000.0, 44001.0, 44002.0, 44003.0, 44004.0],       [44005.0, 44006.0, 44007.0, 44008.0, 44009.0, 44010.0],       [44011.0, 44012.0, 44013.0, 44014.0, 44015.0, 44016.0]],      [[44017.0, 44018.0, 44019.0, 44020.0, 44021.0, 44022.0],       [44023.0, 44024.0, 44025.0, 44026.0, 44027.0, 44028.0],       [44029.0, 44030.0, 44031.0, 44032.0, 44033.0, 44034.0],       [44035.0, 44036.0, 44037.0, 44038.0, 44039.0, 44040.0],       [44041.0, 44042.0, 44043.0, 44044.0, 44045.0, 44046.0],       [44047.0, 44048.0, 44049.0, 44050.0, 44051.0, 44052.0],       [44053.0, 44054.0, 44055.0, 44056.0, 44057.0, 44058.0]],      [[44059.0, 44060.0, 44061.0, 44062.0, 44063.0, 44064.0],       [44065.0, 44066.0, 44067.0, 44068.0, 44069.0, 44070.0],       [44071.0, 44072.0, 44073.0, 44074.0, 44075.0, 44076.0],       [44077.0, 44078.0, 44079.0, 44080.0, 44081.0, 44082.0],       [44083.0, 44084.0, 44085.0, 44086.0, 44087.0, 44088.0],       [44089.0, 44090.0, 44091.0, 44092.0, 44093.0, 44094.0],       [44095.0, 44096.0, 44097.0, 44098.0, 44099.0, 44100.0]]],     [[[44101.0, 44102.0, 44103.0, 44104.0, 44105.0, 44106.0],       [44107.0, 44108.0, 44109.0, 44110.0, 44111.0, 44112.0],       [44113.0, 44114.0, 44115.0, 44116.0, 44117.0, 44118.0],       [44119.0, 44120.0, 44121.0, 44122.0, 44123.0, 44124.0],       [44125.0, 44126.0, 44127.0, 44128.0, 44129.0, 44130.0],       [44131.0, 44132.0, 44133.0, 44134.0, 44135.0, 44136.0],       [44137.0, 44138.0, 44139.0, 44140.0, 44141.0, 44142.0]],      [[44143.0, 44144.0, 44145.0, 44146.0, 44147.0, 44148.0],       [44149.0, 44150.0, 44151.0, 44152.0, 44153.0, 44154.0],       [44155.0, 44156.0, 44157.0, 44158.0, 44159.0, 44160.0],       [44161.0, 44162.0, 44163.0, 44164.0, 44165.0, 44166.0],       [44167.0, 44168.0, 44169.0, 44170.0, 44171.0, 44172.0],       [44173.0, 44174.0, 44175.0, 44176.0, 44177.0, 44178.0],       [44179.0, 44180.0, 44181.0, 44182.0, 44183.0, 44184.0]],      [[44185.0, 44186.0, 44187.0, 44188.0, 44189.0, 44190.0],       [44191.0, 44192.0, 44193.0, 44194.0, 44195.0, 44196.0],       [44197.0, 44198.0, 44199.0, 44200.0, 44201.0, 44202.0],       [44203.0, 44204.0, 44205.0, 44206.0, 44207.0, 44208.0],       [44209.0, 44210.0, 44211.0, 44212.0, 44213.0, 44214.0],       [44215.0, 44216.0, 44217.0, 44218.0, 44219.0, 44220.0],       [44221.0, 44222.0, 44223.0, 44224.0, 44225.0, 44226.0]],      [[44227.0, 44228.0, 44229.0, 44230.0, 44231.0, 44232.0],       [44233.0, 44234.0, 44235.0, 44236.0, 44237.0, 44238.0],       [44239.0, 44240.0, 44241.0, 44242.0, 44243.0, 44244.0],       [44245.0, 44246.0, 44247.0, 44248.0, 44249.0, 44250.0],       [44251.0, 44252.0, 44253.0, 44254.0, 44255.0, 44256.0],       [44257.0, 44258.0, 44259.0, 44260.0, 44261.0, 44262.0],       [44263.0, 44264.0, 44265.0, 44266.0, 44267.0, 44268.0]],      [[44269.0, 44270.0, 44271.0, 44272.0, 44273.0, 44274.0],       [44275.0, 44276.0, 44277.0, 44278.0, 44279.0, 44280.0],       [44281.0, 44282.0, 44283.0, 44284.0, 44285.0, 44286.0],       [44287.0, 44288.0, 44289.0, 44290.0, 44291.0, 44292.0],       [44293.0, 44294.0, 44295.0, 44296.0, 44297.0, 44298.0],       [44299.0, 44300.0, 44301.0, 44302.0, 44303.0, 44304.0],       [44305.0, 44306.0, 44307.0, 44308.0, 44309.0, 44310.0]],      [[44311.0, 44312.0, 44313.0, 44314.0, 44315.0, 44316.0],       [44317.0, 44318.0, 44319.0, 44320.0, 44321.0, 44322.0],       [44323.0, 44324.0, 44325.0, 44326.0, 44327.0, 44328.0],       [44329.0, 44330.0, 44331.0, 44332.0, 44333.0, 44334.0],       [44335.0, 44336.0, 44337.0, 44338.0, 44339.0, 44340.0],       [44341.0, 44342.0, 44343.0, 44344.0, 44345.0, 44346.0],       [44347.0, 44348.0, 44349.0, 44350.0, 44351.0, 44352.0]]]],    [[[[44353.0, 44354.0, 44355.0, 44356.0, 44357.0, 44358.0],       [44359.0, 44360.0, 44361.0, 44362.0, 44363.0, 44364.0],       [44365.0, 44366.0, 44367.0, 44368.0, 44369.0, 44370.0],       [44371.0, 44372.0, 44373.0, 44374.0, 44375.0, 44376.0],       [44377.0, 44378.0, 44379.0, 44380.0, 44381.0, 44382.0],       [44383.0, 44384.0, 44385.0, 44386.0, 44387.0, 44388.0],       [44389.0, 44390.0, 44391.0, 44392.0, 44393.0, 44394.0]],      [[44395.0, 44396.0, 44397.0, 44398.0, 44399.0, 44400.0],       [44401.0, 44402.0, 44403.0, 44404.0, 44405.0, 44406.0],       [44407.0, 44408.0, 44409.0, 44410.0, 44411.0, 44412.0],       [44413.0, 44414.0, 44415.0, 44416.0, 44417.0, 44418.0],       [44419.0, 44420.0, 44421.0, 44422.0, 44423.0, 44424.0],       [44425.0, 44426.0, 44427.0, 44428.0, 44429.0, 44430.0],       [44431.0, 44432.0, 44433.0, 44434.0, 44435.0, 44436.0]],      [[44437.0, 44438.0, 44439.0, 44440.0, 44441.0, 44442.0],       [44443.0, 44444.0, 44445.0, 44446.0, 44447.0, 44448.0],       [44449.0, 44450.0, 44451.0, 44452.0, 44453.0, 44454.0],       [44455.0, 44456.0, 44457.0, 44458.0, 44459.0, 44460.0],       [44461.0, 44462.0, 44463.0, 44464.0, 44465.0, 44466.0],       [44467.0, 44468.0, 44469.0, 44470.0, 44471.0, 44472.0],       [44473.0, 44474.0, 44475.0, 44476.0, 44477.0, 44478.0]],      [[44479.0, 44480.0, 44481.0, 44482.0, 44483.0, 44484.0],       [44485.0, 44486.0, 44487.0, 44488.0, 44489.0, 44490.0],       [44491.0, 44492.0, 44493.0, 44494.0, 44495.0, 44496.0],       [44497.0, 44498.0, 44499.0, 44500.0, 44501.0, 44502.0],       [44503.0, 44504.0, 44505.0, 44506.0, 44507.0, 44508.0],       [44509.0, 44510.0, 44511.0, 44512.0, 44513.0, 44514.0],       [44515.0, 44516.0, 44517.0, 44518.0, 44519.0, 44520.0]],      [[44521.0, 44522.0, 44523.0, 44524.0, 44525.0, 44526.0],       [44527.0, 44528.0, 44529.0, 44530.0, 44531.0, 44532.0],       [44533.0, 44534.0, 44535.0, 44536.0, 44537.0, 44538.0],       [44539.0, 44540.0, 44541.0, 44542.0, 44543.0, 44544.0],       [44545.0, 44546.0, 44547.0, 44548.0, 44549.0, 44550.0],       [44551.0, 44552.0, 44553.0, 44554.0, 44555.0, 44556.0],       [44557.0, 44558.0, 44559.0, 44560.0, 44561.0, 44562.0]],      [[44563.0, 44564.0, 44565.0, 44566.0, 44567.0, 44568.0],       [44569.0, 44570.0, 44571.0, 44572.0, 44573.0, 44574.0],       [44575.0, 44576.0, 44577.0, 44578.0, 44579.0, 44580.0],       [44581.0, 44582.0, 44583.0, 44584.0, 44585.0, 44586.0],       [44587.0, 44588.0, 44589.0, 44590.0, 44591.0, 44592.0],       [44593.0, 44594.0, 44595.0, 44596.0, 44597.0, 44598.0],       [44599.0, 44600.0, 44601.0, 44602.0, 44603.0, 44604.0]]],     [[[44605.0, 44606.0, 44607.0, 44608.0, 44609.0, 44610.0],       [44611.0, 44612.0, 44613.0, 44614.0, 44615.0, 44616.0],       [44617.0, 44618.0, 44619.0, 44620.0, 44621.0, 44622.0],       [44623.0, 44624.0, 44625.0, 44626.0, 44627.0, 44628.0],       [44629.0, 44630.0, 44631.0, 44632.0, 44633.0, 44634.0],       [44635.0, 44636.0, 44637.0, 44638.0, 44639.0, 44640.0],       [44641.0, 44642.0, 44643.0, 44644.0, 44645.0, 44646.0]],      [[44647.0, 44648.0, 44649.0, 44650.0, 44651.0, 44652.0],       [44653.0, 44654.0, 44655.0, 44656.0, 44657.0, 44658.0],       [44659.0, 44660.0, 44661.0, 44662.0, 44663.0, 44664.0],       [44665.0, 44666.0, 44667.0, 44668.0, 44669.0, 44670.0],       [44671.0, 44672.0, 44673.0, 44674.0, 44675.0, 44676.0],       [44677.0, 44678.0, 44679.0, 44680.0, 44681.0, 44682.0],       [44683.0, 44684.0, 44685.0, 44686.0, 44687.0, 44688.0]],      [[44689.0, 44690.0, 44691.0, 44692.0, 44693.0, 44694.0],       [44695.0, 44696.0, 44697.0, 44698.0, 44699.0, 44700.0],       [44701.0, 44702.0, 44703.0, 44704.0, 44705.0, 44706.0],       [44707.0, 44708.0, 44709.0, 44710.0, 44711.0, 44712.0],       [44713.0, 44714.0, 44715.0, 44716.0, 44717.0, 44718.0],       [44719.0, 44720.0, 44721.0, 44722.0, 44723.0, 44724.0],       [44725.0, 44726.0, 44727.0, 44728.0, 44729.0, 44730.0]],      [[44731.0, 44732.0, 44733.0, 44734.0, 44735.0, 44736.0],       [44737.0, 44738.0, 44739.0, 44740.0, 44741.0, 44742.0],       [44743.0, 44744.0, 44745.0, 44746.0, 44747.0, 44748.0],       [44749.0, 44750.0, 44751.0, 44752.0, 44753.0, 44754.0],       [44755.0, 44756.0, 44757.0, 44758.0, 44759.0, 44760.0],       [44761.0, 44762.0, 44763.0, 44764.0, 44765.0, 44766.0],       [44767.0, 44768.0, 44769.0, 44770.0, 44771.0, 44772.0]],      [[44773.0, 44774.0, 44775.0, 44776.0, 44777.0, 44778.0],       [44779.0, 44780.0, 44781.0, 44782.0, 44783.0, 44784.0],       [44785.0, 44786.0, 44787.0, 44788.0, 44789.0, 44790.0],       [44791.0, 44792.0, 44793.0, 44794.0, 44795.0, 44796.0],       [44797.0, 44798.0, 44799.0, 44800.0, 44801.0, 44802.0],       [44803.0, 44804.0, 44805.0, 44806.0, 44807.0, 44808.0],       [44809.0, 44810.0, 44811.0, 44812.0, 44813.0, 44814.0]],      [[44815.0, 44816.0, 44817.0, 44818.0, 44819.0, 44820.0],       [44821.0, 44822.0, 44823.0, 44824.0, 44825.0, 44826.0],       [44827.0, 44828.0, 44829.0, 44830.0, 44831.0, 44832.0],       [44833.0, 44834.0, 44835.0, 44836.0, 44837.0, 44838.0],       [44839.0, 44840.0, 44841.0, 44842.0, 44843.0, 44844.0],       [44845.0, 44846.0, 44847.0, 44848.0, 44849.0, 44850.0],       [44851.0, 44852.0, 44853.0, 44854.0, 44855.0, 44856.0]]],     [[[44857.0, 44858.0, 44859.0, 44860.0, 44861.0, 44862.0],       [44863.0, 44864.0, 44865.0, 44866.0, 44867.0, 44868.0],       [44869.0, 44870.0, 44871.0, 44872.0, 44873.0, 44874.0],       [44875.0, 44876.0, 44877.0, 44878.0, 44879.0, 44880.0],       [44881.0, 44882.0, 44883.0, 44884.0, 44885.0, 44886.0],       [44887.0, 44888.0, 44889.0, 44890.0, 44891.0, 44892.0],       [44893.0, 44894.0, 44895.0, 44896.0, 44897.0, 44898.0]],      [[44899.0, 44900.0, 44901.0, 44902.0, 44903.0, 44904.0],       [44905.0, 44906.0, 44907.0, 44908.0, 44909.0, 44910.0],       [44911.0, 44912.0, 44913.0, 44914.0, 44915.0, 44916.0],       [44917.0, 44918.0, 44919.0, 44920.0, 44921.0, 44922.0],       [44923.0, 44924.0, 44925.0, 44926.0, 44927.0, 44928.0],       [44929.0, 44930.0, 44931.0, 44932.0, 44933.0, 44934.0],       [44935.0, 44936.0, 44937.0, 44938.0, 44939.0, 44940.0]],      [[44941.0, 44942.0, 44943.0, 44944.0, 44945.0, 44946.0],       [44947.0, 44948.0, 44949.0, 44950.0, 44951.0, 44952.0],       [44953.0, 44954.0, 44955.0, 44956.0, 44957.0, 44958.0],       [44959.0, 44960.0, 44961.0, 44962.0, 44963.0, 44964.0],       [44965.0, 44966.0, 44967.0, 44968.0, 44969.0, 44970.0],       [44971.0, 44972.0, 44973.0, 44974.0, 44975.0, 44976.0],       [44977.0, 44978.0, 44979.0, 44980.0, 44981.0, 44982.0]],      [[44983.0, 44984.0, 44985.0, 44986.0, 44987.0, 44988.0],       [44989.0, 44990.0, 44991.0, 44992.0, 44993.0, 44994.0],       [44995.0, 44996.0, 44997.0, 44998.0, 44999.0, 45000.0],       [45001.0, 45002.0, 45003.0, 45004.0, 45005.0, 45006.0],       [45007.0, 45008.0, 45009.0, 45010.0, 45011.0, 45012.0],       [45013.0, 45014.0, 45015.0, 45016.0, 45017.0, 45018.0],       [45019.0, 45020.0, 45021.0, 45022.0, 45023.0, 45024.0]],      [[45025.0, 45026.0, 45027.0, 45028.0, 45029.0, 45030.0],       [45031.0, 45032.0, 45033.0, 45034.0, 45035.0, 45036.0],       [45037.0, 45038.0, 45039.0, 45040.0, 45041.0, 45042.0],       [45043.0, 45044.0, 45045.0, 45046.0, 45047.0, 45048.0],       [45049.0, 45050.0, 45051.0, 45052.0, 45053.0, 45054.0],       [45055.0, 45056.0, 45057.0, 45058.0, 45059.0, 45060.0],       [45061.0, 45062.0, 45063.0, 45064.0, 45065.0, 45066.0]],      [[45067.0, 45068.0, 45069.0, 45070.0, 45071.0, 45072.0],       [45073.0, 45074.0, 45075.0, 45076.0, 45077.0, 45078.0],       [45079.0, 45080.0, 45081.0, 45082.0, 45083.0, 45084.0],       [45085.0, 45086.0, 45087.0, 45088.0, 45089.0, 45090.0],       [45091.0, 45092.0, 45093.0, 45094.0, 45095.0, 45096.0],       [45097.0, 45098.0, 45099.0, 45100.0, 45101.0, 45102.0],       [45103.0, 45104.0, 45105.0, 45106.0, 45107.0, 45108.0]]],     [[[45109.0, 45110.0, 45111.0, 45112.0, 45113.0, 45114.0],       [45115.0, 45116.0, 45117.0, 45118.0, 45119.0, 45120.0],       [45121.0, 45122.0, 45123.0, 45124.0, 45125.0, 45126.0],       [45127.0, 45128.0, 45129.0, 45130.0, 45131.0, 45132.0],       [45133.0, 45134.0, 45135.0, 45136.0, 45137.0, 45138.0],       [45139.0, 45140.0, 45141.0, 45142.0, 45143.0, 45144.0],       [45145.0, 45146.0, 45147.0, 45148.0, 45149.0, 45150.0]],      [[45151.0, 45152.0, 45153.0, 45154.0, 45155.0, 45156.0],       [45157.0, 45158.0, 45159.0, 45160.0, 45161.0, 45162.0],       [45163.0, 45164.0, 45165.0, 45166.0, 45167.0, 45168.0],       [45169.0, 45170.0, 45171.0, 45172.0, 45173.0, 45174.0],       [45175.0, 45176.0, 45177.0, 45178.0, 45179.0, 45180.0],       [45181.0, 45182.0, 45183.0, 45184.0, 45185.0, 45186.0],       [45187.0, 45188.0, 45189.0, 45190.0, 45191.0, 45192.0]],      [[45193.0, 45194.0, 45195.0, 45196.0, 45197.0, 45198.0],       [45199.0, 45200.0, 45201.0, 45202.0, 45203.0, 45204.0],       [45205.0, 45206.0, 45207.0, 45208.0, 45209.0, 45210.0],       [45211.0, 45212.0, 45213.0, 45214.0, 45215.0, 45216.0],       [45217.0, 45218.0, 45219.0, 45220.0, 45221.0, 45222.0],       [45223.0, 45224.0, 45225.0, 45226.0, 45227.0, 45228.0],       [45229.0, 45230.0, 45231.0, 45232.0, 45233.0, 45234.0]],      [[45235.0, 45236.0, 45237.0, 45238.0, 45239.0, 45240.0],       [45241.0, 45242.0, 45243.0, 45244.0, 45245.0, 45246.0],       [45247.0, 45248.0, 45249.0, 45250.0, 45251.0, 45252.0],       [45253.0, 45254.0, 45255.0, 45256.0, 45257.0, 45258.0],       [45259.0, 45260.0, 45261.0, 45262.0, 45263.0, 45264.0],       [45265.0, 45266.0, 45267.0, 45268.0, 45269.0, 45270.0],       [45271.0, 45272.0, 45273.0, 45274.0, 45275.0, 45276.0]],      [[45277.0, 45278.0, 45279.0, 45280.0, 45281.0, 45282.0],       [45283.0, 45284.0, 45285.0, 45286.0, 45287.0, 45288.0],       [45289.0, 45290.0, 45291.0, 45292.0, 45293.0, 45294.0],       [45295.0, 45296.0, 45297.0, 45298.0, 45299.0, 45300.0],       [45301.0, 45302.0, 45303.0, 45304.0, 45305.0, 45306.0],       [45307.0, 45308.0, 45309.0, 45310.0, 45311.0, 45312.0],       [45313.0, 45314.0, 45315.0, 45316.0, 45317.0, 45318.0]],      [[45319.0, 45320.0, 45321.0, 45322.0, 45323.0, 45324.0],       [45325.0, 45326.0, 45327.0, 45328.0, 45329.0, 45330.0],       [45331.0, 45332.0, 45333.0, 45334.0, 45335.0, 45336.0],       [45337.0, 45338.0, 45339.0, 45340.0, 45341.0, 45342.0],       [45343.0, 45344.0, 45345.0, 45346.0, 45347.0, 45348.0],       [45349.0, 45350.0, 45351.0, 45352.0, 45353.0, 45354.0],       [45355.0, 45356.0, 45357.0, 45358.0, 45359.0, 45360.0]]]],    [[[[45361.0, 45362.0, 45363.0, 45364.0, 45365.0, 45366.0],       [45367.0, 45368.0, 45369.0, 45370.0, 45371.0, 45372.0],       [45373.0, 45374.0, 45375.0, 45376.0, 45377.0, 45378.0],       [45379.0, 45380.0, 45381.0, 45382.0, 45383.0, 45384.0],       [45385.0, 45386.0, 45387.0, 45388.0, 45389.0, 45390.0],       [45391.0, 45392.0, 45393.0, 45394.0, 45395.0, 45396.0],       [45397.0, 45398.0, 45399.0, 45400.0, 45401.0, 45402.0]],      [[45403.0, 45404.0, 45405.0, 45406.0, 45407.0, 45408.0],       [45409.0, 45410.0, 45411.0, 45412.0, 45413.0, 45414.0],       [45415.0, 45416.0, 45417.0, 45418.0, 45419.0, 45420.0],       [45421.0, 45422.0, 45423.0, 45424.0, 45425.0, 45426.0],       [45427.0, 45428.0, 45429.0, 45430.0, 45431.0, 45432.0],       [45433.0, 45434.0, 45435.0, 45436.0, 45437.0, 45438.0],       [45439.0, 45440.0, 45441.0, 45442.0, 45443.0, 45444.0]],      [[45445.0, 45446.0, 45447.0, 45448.0, 45449.0, 45450.0],       [45451.0, 45452.0, 45453.0, 45454.0, 45455.0, 45456.0],       [45457.0, 45458.0, 45459.0, 45460.0, 45461.0, 45462.0],       [45463.0, 45464.0, 45465.0, 45466.0, 45467.0, 45468.0],       [45469.0, 45470.0, 45471.0, 45472.0, 45473.0, 45474.0],       [45475.0, 45476.0, 45477.0, 45478.0, 45479.0, 45480.0],       [45481.0, 45482.0, 45483.0, 45484.0, 45485.0, 45486.0]],      [[45487.0, 45488.0, 45489.0, 45490.0, 45491.0, 45492.0],       [45493.0, 45494.0, 45495.0, 45496.0, 45497.0, 45498.0],       [45499.0, 45500.0, 45501.0, 45502.0, 45503.0, 45504.0],       [45505.0, 45506.0, 45507.0, 45508.0, 45509.0, 45510.0],       [45511.0, 45512.0, 45513.0, 45514.0, 45515.0, 45516.0],       [45517.0, 45518.0, 45519.0, 45520.0, 45521.0, 45522.0],       [45523.0, 45524.0, 45525.0, 45526.0, 45527.0, 45528.0]],      [[45529.0, 45530.0, 45531.0, 45532.0, 45533.0, 45534.0],       [45535.0, 45536.0, 45537.0, 45538.0, 45539.0, 45540.0],       [45541.0, 45542.0, 45543.0, 45544.0, 45545.0, 45546.0],       [45547.0, 45548.0, 45549.0, 45550.0, 45551.0, 45552.0],       [45553.0, 45554.0, 45555.0, 45556.0, 45557.0, 45558.0],       [45559.0, 45560.0, 45561.0, 45562.0, 45563.0, 45564.0],       [45565.0, 45566.0, 45567.0, 45568.0, 45569.0, 45570.0]],      [[45571.0, 45572.0, 45573.0, 45574.0, 45575.0, 45576.0],       [45577.0, 45578.0, 45579.0, 45580.0, 45581.0, 45582.0],       [45583.0, 45584.0, 45585.0, 45586.0, 45587.0, 45588.0],       [45589.0, 45590.0, 45591.0, 45592.0, 45593.0, 45594.0],       [45595.0, 45596.0, 45597.0, 45598.0, 45599.0, 45600.0],       [45601.0, 45602.0, 45603.0, 45604.0, 45605.0, 45606.0],       [45607.0, 45608.0, 45609.0, 45610.0, 45611.0, 45612.0]]],     [[[45613.0, 45614.0, 45615.0, 45616.0, 45617.0, 45618.0],       [45619.0, 45620.0, 45621.0, 45622.0, 45623.0, 45624.0],       [45625.0, 45626.0, 45627.0, 45628.0, 45629.0, 45630.0],       [45631.0, 45632.0, 45633.0, 45634.0, 45635.0, 45636.0],       [45637.0, 45638.0, 45639.0, 45640.0, 45641.0, 45642.0],       [45643.0, 45644.0, 45645.0, 45646.0, 45647.0, 45648.0],       [45649.0, 45650.0, 45651.0, 45652.0, 45653.0, 45654.0]],      [[45655.0, 45656.0, 45657.0, 45658.0, 45659.0, 45660.0],       [45661.0, 45662.0, 45663.0, 45664.0, 45665.0, 45666.0],       [45667.0, 45668.0, 45669.0, 45670.0, 45671.0, 45672.0],       [45673.0, 45674.0, 45675.0, 45676.0, 45677.0, 45678.0],       [45679.0, 45680.0, 45681.0, 45682.0, 45683.0, 45684.0],       [45685.0, 45686.0, 45687.0, 45688.0, 45689.0, 45690.0],       [45691.0, 45692.0, 45693.0, 45694.0, 45695.0, 45696.0]],      [[45697.0, 45698.0, 45699.0, 45700.0, 45701.0, 45702.0],       [45703.0, 45704.0, 45705.0, 45706.0, 45707.0, 45708.0],       [45709.0, 45710.0, 45711.0, 45712.0, 45713.0, 45714.0],       [45715.0, 45716.0, 45717.0, 45718.0, 45719.0, 45720.0],       [45721.0, 45722.0, 45723.0, 45724.0, 45725.0, 45726.0],       [45727.0, 45728.0, 45729.0, 45730.0, 45731.0, 45732.0],       [45733.0, 45734.0, 45735.0, 45736.0, 45737.0, 45738.0]],      [[45739.0, 45740.0, 45741.0, 45742.0, 45743.0, 45744.0],       [45745.0, 45746.0, 45747.0, 45748.0, 45749.0, 45750.0],       [45751.0, 45752.0, 45753.0, 45754.0, 45755.0, 45756.0],       [45757.0, 45758.0, 45759.0, 45760.0, 45761.0, 45762.0],       [45763.0, 45764.0, 45765.0, 45766.0, 45767.0, 45768.0],       [45769.0, 45770.0, 45771.0, 45772.0, 45773.0, 45774.0],       [45775.0, 45776.0, 45777.0, 45778.0, 45779.0, 45780.0]],      [[45781.0, 45782.0, 45783.0, 45784.0, 45785.0, 45786.0],       [45787.0, 45788.0, 45789.0, 45790.0, 45791.0, 45792.0],       [45793.0, 45794.0, 45795.0, 45796.0, 45797.0, 45798.0],       [45799.0, 45800.0, 45801.0, 45802.0, 45803.0, 45804.0],       [45805.0, 45806.0, 45807.0, 45808.0, 45809.0, 45810.0],       [45811.0, 45812.0, 45813.0, 45814.0, 45815.0, 45816.0],       [45817.0, 45818.0, 45819.0, 45820.0, 45821.0, 45822.0]],      [[45823.0, 45824.0, 45825.0, 45826.0, 45827.0, 45828.0],       [45829.0, 45830.0, 45831.0, 45832.0, 45833.0, 45834.0],       [45835.0, 45836.0, 45837.0, 45838.0, 45839.0, 45840.0],       [45841.0, 45842.0, 45843.0, 45844.0, 45845.0, 45846.0],       [45847.0, 45848.0, 45849.0, 45850.0, 45851.0, 45852.0],       [45853.0, 45854.0, 45855.0, 45856.0, 45857.0, 45858.0],       [45859.0, 45860.0, 45861.0, 45862.0, 45863.0, 45864.0]]],     [[[45865.0, 45866.0, 45867.0, 45868.0, 45869.0, 45870.0],       [45871.0, 45872.0, 45873.0, 45874.0, 45875.0, 45876.0],       [45877.0, 45878.0, 45879.0, 45880.0, 45881.0, 45882.0],       [45883.0, 45884.0, 45885.0, 45886.0, 45887.0, 45888.0],       [45889.0, 45890.0, 45891.0, 45892.0, 45893.0, 45894.0],       [45895.0, 45896.0, 45897.0, 45898.0, 45899.0, 45900.0],       [45901.0, 45902.0, 45903.0, 45904.0, 45905.0, 45906.0]],      [[45907.0, 45908.0, 45909.0, 45910.0, 45911.0, 45912.0],       [45913.0, 45914.0, 45915.0, 45916.0, 45917.0, 45918.0],       [45919.0, 45920.0, 45921.0, 45922.0, 45923.0, 45924.0],       [45925.0, 45926.0, 45927.0, 45928.0, 45929.0, 45930.0],       [45931.0, 45932.0, 45933.0, 45934.0, 45935.0, 45936.0],       [45937.0, 45938.0, 45939.0, 45940.0, 45941.0, 45942.0],       [45943.0, 45944.0, 45945.0, 45946.0, 45947.0, 45948.0]],      [[45949.0, 45950.0, 45951.0, 45952.0, 45953.0, 45954.0],       [45955.0, 45956.0, 45957.0, 45958.0, 45959.0, 45960.0],       [45961.0, 45962.0, 45963.0, 45964.0, 45965.0, 45966.0],       [45967.0, 45968.0, 45969.0, 45970.0, 45971.0, 45972.0],       [45973.0, 45974.0, 45975.0, 45976.0, 45977.0, 45978.0],       [45979.0, 45980.0, 45981.0, 45982.0, 45983.0, 45984.0],       [45985.0, 45986.0, 45987.0, 45988.0, 45989.0, 45990.0]],      [[45991.0, 45992.0, 45993.0, 45994.0, 45995.0, 45996.0],       [45997.0, 45998.0, 45999.0, 46000.0, 46001.0, 46002.0],       [46003.0, 46004.0, 46005.0, 46006.0, 46007.0, 46008.0],       [46009.0, 46010.0, 46011.0, 46012.0, 46013.0, 46014.0],       [46015.0, 46016.0, 46017.0, 46018.0, 46019.0, 46020.0],       [46021.0, 46022.0, 46023.0, 46024.0, 46025.0, 46026.0],       [46027.0, 46028.0, 46029.0, 46030.0, 46031.0, 46032.0]],      [[46033.0, 46034.0, 46035.0, 46036.0, 46037.0, 46038.0],       [46039.0, 46040.0, 46041.0, 46042.0, 46043.0, 46044.0],       [46045.0, 46046.0, 46047.0, 46048.0, 46049.0, 46050.0],       [46051.0, 46052.0, 46053.0, 46054.0, 46055.0, 46056.0],       [46057.0, 46058.0, 46059.0, 46060.0, 46061.0, 46062.0],       [46063.0, 46064.0, 46065.0, 46066.0, 46067.0, 46068.0],       [46069.0, 46070.0, 46071.0, 46072.0, 46073.0, 46074.0]],      [[46075.0, 46076.0, 46077.0, 46078.0, 46079.0, 46080.0],       [46081.0, 46082.0, 46083.0, 46084.0, 46085.0, 46086.0],       [46087.0, 46088.0, 46089.0, 46090.0, 46091.0, 46092.0],       [46093.0, 46094.0, 46095.0, 46096.0, 46097.0, 46098.0],       [46099.0, 46100.0, 46101.0, 46102.0, 46103.0, 46104.0],       [46105.0, 46106.0, 46107.0, 46108.0, 46109.0, 46110.0],       [46111.0, 46112.0, 46113.0, 46114.0, 46115.0, 46116.0]]],     [[[46117.0, 46118.0, 46119.0, 46120.0, 46121.0, 46122.0],       [46123.0, 46124.0, 46125.0, 46126.0, 46127.0, 46128.0],       [46129.0, 46130.0, 46131.0, 46132.0, 46133.0, 46134.0],       [46135.0, 46136.0, 46137.0, 46138.0, 46139.0, 46140.0],       [46141.0, 46142.0, 46143.0, 46144.0, 46145.0, 46146.0],       [46147.0, 46148.0, 46149.0, 46150.0, 46151.0, 46152.0],       [46153.0, 46154.0, 46155.0, 46156.0, 46157.0, 46158.0]],      [[46159.0, 46160.0, 46161.0, 46162.0, 46163.0, 46164.0],       [46165.0, 46166.0, 46167.0, 46168.0, 46169.0, 46170.0],       [46171.0, 46172.0, 46173.0, 46174.0, 46175.0, 46176.0],       [46177.0, 46178.0, 46179.0, 46180.0, 46181.0, 46182.0],       [46183.0, 46184.0, 46185.0, 46186.0, 46187.0, 46188.0],       [46189.0, 46190.0, 46191.0, 46192.0, 46193.0, 46194.0],       [46195.0, 46196.0, 46197.0, 46198.0, 46199.0, 46200.0]],      [[46201.0, 46202.0, 46203.0, 46204.0, 46205.0, 46206.0],       [46207.0, 46208.0, 46209.0, 46210.0, 46211.0, 46212.0],       [46213.0, 46214.0, 46215.0, 46216.0, 46217.0, 46218.0],       [46219.0, 46220.0, 46221.0, 46222.0, 46223.0, 46224.0],       [46225.0, 46226.0, 46227.0, 46228.0, 46229.0, 46230.0],       [46231.0, 46232.0, 46233.0, 46234.0, 46235.0, 46236.0],       [46237.0, 46238.0, 46239.0, 46240.0, 46241.0, 46242.0]],      [[46243.0, 46244.0, 46245.0, 46246.0, 46247.0, 46248.0],       [46249.0, 46250.0, 46251.0, 46252.0, 46253.0, 46254.0],       [46255.0, 46256.0, 46257.0, 46258.0, 46259.0, 46260.0],       [46261.0, 46262.0, 46263.0, 46264.0, 46265.0, 46266.0],       [46267.0, 46268.0, 46269.0, 46270.0, 46271.0, 46272.0],       [46273.0, 46274.0, 46275.0, 46276.0, 46277.0, 46278.0],       [46279.0, 46280.0, 46281.0, 46282.0, 46283.0, 46284.0]],      [[46285.0, 46286.0, 46287.0, 46288.0, 46289.0, 46290.0],       [46291.0, 46292.0, 46293.0, 46294.0, 46295.0, 46296.0],       [46297.0, 46298.0, 46299.0, 46300.0, 46301.0, 46302.0],       [46303.0, 46304.0, 46305.0, 46306.0, 46307.0, 46308.0],       [46309.0, 46310.0, 46311.0, 46312.0, 46313.0, 46314.0],       [46315.0, 46316.0, 46317.0, 46318.0, 46319.0, 46320.0],       [46321.0, 46322.0, 46323.0, 46324.0, 46325.0, 46326.0]],      [[46327.0, 46328.0, 46329.0, 46330.0, 46331.0, 46332.0],       [46333.0, 46334.0, 46335.0, 46336.0, 46337.0, 46338.0],       [46339.0, 46340.0, 46341.0, 46342.0, 46343.0, 46344.0],       [46345.0, 46346.0, 46347.0, 46348.0, 46349.0, 46350.0],       [46351.0, 46352.0, 46353.0, 46354.0, 46355.0, 46356.0],       [46357.0, 46358.0, 46359.0, 46360.0, 46361.0, 46362.0],       [46363.0, 46364.0, 46365.0, 46366.0, 46367.0, 46368.0]]]],    [[[[46369.0, 46370.0, 46371.0, 46372.0, 46373.0, 46374.0],       [46375.0, 46376.0, 46377.0, 46378.0, 46379.0, 46380.0],       [46381.0, 46382.0, 46383.0, 46384.0, 46385.0, 46386.0],       [46387.0, 46388.0, 46389.0, 46390.0, 46391.0, 46392.0],       [46393.0, 46394.0, 46395.0, 46396.0, 46397.0, 46398.0],       [46399.0, 46400.0, 46401.0, 46402.0, 46403.0, 46404.0],       [46405.0, 46406.0, 46407.0, 46408.0, 46409.0, 46410.0]],      [[46411.0, 46412.0, 46413.0, 46414.0, 46415.0, 46416.0],       [46417.0, 46418.0, 46419.0, 46420.0, 46421.0, 46422.0],       [46423.0, 46424.0, 46425.0, 46426.0, 46427.0, 46428.0],       [46429.0, 46430.0, 46431.0, 46432.0, 46433.0, 46434.0],       [46435.0, 46436.0, 46437.0, 46438.0, 46439.0, 46440.0],       [46441.0, 46442.0, 46443.0, 46444.0, 46445.0, 46446.0],       [46447.0, 46448.0, 46449.0, 46450.0, 46451.0, 46452.0]],      [[46453.0, 46454.0, 46455.0, 46456.0, 46457.0, 46458.0],       [46459.0, 46460.0, 46461.0, 46462.0, 46463.0, 46464.0],       [46465.0, 46466.0, 46467.0, 46468.0, 46469.0, 46470.0],       [46471.0, 46472.0, 46473.0, 46474.0, 46475.0, 46476.0],       [46477.0, 46478.0, 46479.0, 46480.0, 46481.0, 46482.0],       [46483.0, 46484.0, 46485.0, 46486.0, 46487.0, 46488.0],       [46489.0, 46490.0, 46491.0, 46492.0, 46493.0, 46494.0]],      [[46495.0, 46496.0, 46497.0, 46498.0, 46499.0, 46500.0],       [46501.0, 46502.0, 46503.0, 46504.0, 46505.0, 46506.0],       [46507.0, 46508.0, 46509.0, 46510.0, 46511.0, 46512.0],       [46513.0, 46514.0, 46515.0, 46516.0, 46517.0, 46518.0],       [46519.0, 46520.0, 46521.0, 46522.0, 46523.0, 46524.0],       [46525.0, 46526.0, 46527.0, 46528.0, 46529.0, 46530.0],       [46531.0, 46532.0, 46533.0, 46534.0, 46535.0, 46536.0]],      [[46537.0, 46538.0, 46539.0, 46540.0, 46541.0, 46542.0],       [46543.0, 46544.0, 46545.0, 46546.0, 46547.0, 46548.0],       [46549.0, 46550.0, 46551.0, 46552.0, 46553.0, 46554.0],       [46555.0, 46556.0, 46557.0, 46558.0, 46559.0, 46560.0],       [46561.0, 46562.0, 46563.0, 46564.0, 46565.0, 46566.0],       [46567.0, 46568.0, 46569.0, 46570.0, 46571.0, 46572.0],       [46573.0, 46574.0, 46575.0, 46576.0, 46577.0, 46578.0]],      [[46579.0, 46580.0, 46581.0, 46582.0, 46583.0, 46584.0],       [46585.0, 46586.0, 46587.0, 46588.0, 46589.0, 46590.0],       [46591.0, 46592.0, 46593.0, 46594.0, 46595.0, 46596.0],       [46597.0, 46598.0, 46599.0, 46600.0, 46601.0, 46602.0],       [46603.0, 46604.0, 46605.0, 46606.0, 46607.0, 46608.0],       [46609.0, 46610.0, 46611.0, 46612.0, 46613.0, 46614.0],       [46615.0, 46616.0, 46617.0, 46618.0, 46619.0, 46620.0]]],     [[[46621.0, 46622.0, 46623.0, 46624.0, 46625.0, 46626.0],       [46627.0, 46628.0, 46629.0, 46630.0, 46631.0, 46632.0],       [46633.0, 46634.0, 46635.0, 46636.0, 46637.0, 46638.0],       [46639.0, 46640.0, 46641.0, 46642.0, 46643.0, 46644.0],       [46645.0, 46646.0, 46647.0, 46648.0, 46649.0, 46650.0],       [46651.0, 46652.0, 46653.0, 46654.0, 46655.0, 46656.0],       [46657.0, 46658.0, 46659.0, 46660.0, 46661.0, 46662.0]],      [[46663.0, 46664.0, 46665.0, 46666.0, 46667.0, 46668.0],       [46669.0, 46670.0, 46671.0, 46672.0, 46673.0, 46674.0],       [46675.0, 46676.0, 46677.0, 46678.0, 46679.0, 46680.0],       [46681.0, 46682.0, 46683.0, 46684.0, 46685.0, 46686.0],       [46687.0, 46688.0, 46689.0, 46690.0, 46691.0, 46692.0],       [46693.0, 46694.0, 46695.0, 46696.0, 46697.0, 46698.0],       [46699.0, 46700.0, 46701.0, 46702.0, 46703.0, 46704.0]],      [[46705.0, 46706.0, 46707.0, 46708.0, 46709.0, 46710.0],       [46711.0, 46712.0, 46713.0, 46714.0, 46715.0, 46716.0],       [46717.0, 46718.0, 46719.0, 46720.0, 46721.0, 46722.0],       [46723.0, 46724.0, 46725.0, 46726.0, 46727.0, 46728.0],       [46729.0, 46730.0, 46731.0, 46732.0, 46733.0, 46734.0],       [46735.0, 46736.0, 46737.0, 46738.0, 46739.0, 46740.0],       [46741.0, 46742.0, 46743.0, 46744.0, 46745.0, 46746.0]],      [[46747.0, 46748.0, 46749.0, 46750.0, 46751.0, 46752.0],       [46753.0, 46754.0, 46755.0, 46756.0, 46757.0, 46758.0],       [46759.0, 46760.0, 46761.0, 46762.0, 46763.0, 46764.0],       [46765.0, 46766.0, 46767.0, 46768.0, 46769.0, 46770.0],       [46771.0, 46772.0, 46773.0, 46774.0, 46775.0, 46776.0],       [46777.0, 46778.0, 46779.0, 46780.0, 46781.0, 46782.0],       [46783.0, 46784.0, 46785.0, 46786.0, 46787.0, 46788.0]],      [[46789.0, 46790.0, 46791.0, 46792.0, 46793.0, 46794.0],       [46795.0, 46796.0, 46797.0, 46798.0, 46799.0, 46800.0],       [46801.0, 46802.0, 46803.0, 46804.0, 46805.0, 46806.0],       [46807.0, 46808.0, 46809.0, 46810.0, 46811.0, 46812.0],       [46813.0, 46814.0, 46815.0, 46816.0, 46817.0, 46818.0],       [46819.0, 46820.0, 46821.0, 46822.0, 46823.0, 46824.0],       [46825.0, 46826.0, 46827.0, 46828.0, 46829.0, 46830.0]],      [[46831.0, 46832.0, 46833.0, 46834.0, 46835.0, 46836.0],       [46837.0, 46838.0, 46839.0, 46840.0, 46841.0, 46842.0],       [46843.0, 46844.0, 46845.0, 46846.0, 46847.0, 46848.0],       [46849.0, 46850.0, 46851.0, 46852.0, 46853.0, 46854.0],       [46855.0, 46856.0, 46857.0, 46858.0, 46859.0, 46860.0],       [46861.0, 46862.0, 46863.0, 46864.0, 46865.0, 46866.0],       [46867.0, 46868.0, 46869.0, 46870.0, 46871.0, 46872.0]]],     [[[46873.0, 46874.0, 46875.0, 46876.0, 46877.0, 46878.0],       [46879.0, 46880.0, 46881.0, 46882.0, 46883.0, 46884.0],       [46885.0, 46886.0, 46887.0, 46888.0, 46889.0, 46890.0],       [46891.0, 46892.0, 46893.0, 46894.0, 46895.0, 46896.0],       [46897.0, 46898.0, 46899.0, 46900.0, 46901.0, 46902.0],       [46903.0, 46904.0, 46905.0, 46906.0, 46907.0, 46908.0],       [46909.0, 46910.0, 46911.0, 46912.0, 46913.0, 46914.0]],      [[46915.0, 46916.0, 46917.0, 46918.0, 46919.0, 46920.0],       [46921.0, 46922.0, 46923.0, 46924.0, 46925.0, 46926.0],       [46927.0, 46928.0, 46929.0, 46930.0, 46931.0, 46932.0],       [46933.0, 46934.0, 46935.0, 46936.0, 46937.0, 46938.0],       [46939.0, 46940.0, 46941.0, 46942.0, 46943.0, 46944.0],       [46945.0, 46946.0, 46947.0, 46948.0, 46949.0, 46950.0],       [46951.0, 46952.0, 46953.0, 46954.0, 46955.0, 46956.0]],      [[46957.0, 46958.0, 46959.0, 46960.0, 46961.0, 46962.0],       [46963.0, 46964.0, 46965.0, 46966.0, 46967.0, 46968.0],       [46969.0, 46970.0, 46971.0, 46972.0, 46973.0, 46974.0],       [46975.0, 46976.0, 46977.0, 46978.0, 46979.0, 46980.0],       [46981.0, 46982.0, 46983.0, 46984.0, 46985.0, 46986.0],       [46987.0, 46988.0, 46989.0, 46990.0, 46991.0, 46992.0],       [46993.0, 46994.0, 46995.0, 46996.0, 46997.0, 46998.0]],      [[46999.0, 47000.0, 47001.0, 47002.0, 47003.0, 47004.0],       [47005.0, 47006.0, 47007.0, 47008.0, 47009.0, 47010.0],       [47011.0, 47012.0, 47013.0, 47014.0, 47015.0, 47016.0],       [47017.0, 47018.0, 47019.0, 47020.0, 47021.0, 47022.0],       [47023.0, 47024.0, 47025.0, 47026.0, 47027.0, 47028.0],       [47029.0, 47030.0, 47031.0, 47032.0, 47033.0, 47034.0],       [47035.0, 47036.0, 47037.0, 47038.0, 47039.0, 47040.0]],      [[47041.0, 47042.0, 47043.0, 47044.0, 47045.0, 47046.0],       [47047.0, 47048.0, 47049.0, 47050.0, 47051.0, 47052.0],       [47053.0, 47054.0, 47055.0, 47056.0, 47057.0, 47058.0],       [47059.0, 47060.0, 47061.0, 47062.0, 47063.0, 47064.0],       [47065.0, 47066.0, 47067.0, 47068.0, 47069.0, 47070.0],       [47071.0, 47072.0, 47073.0, 47074.0, 47075.0, 47076.0],       [47077.0, 47078.0, 47079.0, 47080.0, 47081.0, 47082.0]],      [[47083.0, 47084.0, 47085.0, 47086.0, 47087.0, 47088.0],       [47089.0, 47090.0, 47091.0, 47092.0, 47093.0, 47094.0],       [47095.0, 47096.0, 47097.0, 47098.0, 47099.0, 47100.0],       [47101.0, 47102.0, 47103.0, 47104.0, 47105.0, 47106.0],       [47107.0, 47108.0, 47109.0, 47110.0, 47111.0, 47112.0],       [47113.0, 47114.0, 47115.0, 47116.0, 47117.0, 47118.0],       [47119.0, 47120.0, 47121.0, 47122.0, 47123.0, 47124.0]]],     [[[47125.0, 47126.0, 47127.0, 47128.0, 47129.0, 47130.0],       [47131.0, 47132.0, 47133.0, 47134.0, 47135.0, 47136.0],       [47137.0, 47138.0, 47139.0, 47140.0, 47141.0, 47142.0],       [47143.0, 47144.0, 47145.0, 47146.0, 47147.0, 47148.0],       [47149.0, 47150.0, 47151.0, 47152.0, 47153.0, 47154.0],       [47155.0, 47156.0, 47157.0, 47158.0, 47159.0, 47160.0],       [47161.0, 47162.0, 47163.0, 47164.0, 47165.0, 47166.0]],      [[47167.0, 47168.0, 47169.0, 47170.0, 47171.0, 47172.0],       [47173.0, 47174.0, 47175.0, 47176.0, 47177.0, 47178.0],       [47179.0, 47180.0, 47181.0, 47182.0, 47183.0, 47184.0],       [47185.0, 47186.0, 47187.0, 47188.0, 47189.0, 47190.0],       [47191.0, 47192.0, 47193.0, 47194.0, 47195.0, 47196.0],       [47197.0, 47198.0, 47199.0, 47200.0, 47201.0, 47202.0],       [47203.0, 47204.0, 47205.0, 47206.0, 47207.0, 47208.0]],      [[47209.0, 47210.0, 47211.0, 47212.0, 47213.0, 47214.0],       [47215.0, 47216.0, 47217.0, 47218.0, 47219.0, 47220.0],       [47221.0, 47222.0, 47223.0, 47224.0, 47225.0, 47226.0],       [47227.0, 47228.0, 47229.0, 47230.0, 47231.0, 47232.0],       [47233.0, 47234.0, 47235.0, 47236.0, 47237.0, 47238.0],       [47239.0, 47240.0, 47241.0, 47242.0, 47243.0, 47244.0],       [47245.0, 47246.0, 47247.0, 47248.0, 47249.0, 47250.0]],      [[47251.0, 47252.0, 47253.0, 47254.0, 47255.0, 47256.0],       [47257.0, 47258.0, 47259.0, 47260.0, 47261.0, 47262.0],       [47263.0, 47264.0, 47265.0, 47266.0, 47267.0, 47268.0],       [47269.0, 47270.0, 47271.0, 47272.0, 47273.0, 47274.0],       [47275.0, 47276.0, 47277.0, 47278.0, 47279.0, 47280.0],       [47281.0, 47282.0, 47283.0, 47284.0, 47285.0, 47286.0],       [47287.0, 47288.0, 47289.0, 47290.0, 47291.0, 47292.0]],      [[47293.0, 47294.0, 47295.0, 47296.0, 47297.0, 47298.0],       [47299.0, 47300.0, 47301.0, 47302.0, 47303.0, 47304.0],       [47305.0, 47306.0, 47307.0, 47308.0, 47309.0, 47310.0],       [47311.0, 47312.0, 47313.0, 47314.0, 47315.0, 47316.0],       [47317.0, 47318.0, 47319.0, 47320.0, 47321.0, 47322.0],       [47323.0, 47324.0, 47325.0, 47326.0, 47327.0, 47328.0],       [47329.0, 47330.0, 47331.0, 47332.0, 47333.0, 47334.0]],      [[47335.0, 47336.0, 47337.0, 47338.0, 47339.0, 47340.0],       [47341.0, 47342.0, 47343.0, 47344.0, 47345.0, 47346.0],       [47347.0, 47348.0, 47349.0, 47350.0, 47351.0, 47352.0],       [47353.0, 47354.0, 47355.0, 47356.0, 47357.0, 47358.0],       [47359.0, 47360.0, 47361.0, 47362.0, 47363.0, 47364.0],       [47365.0, 47366.0, 47367.0, 47368.0, 47369.0, 47370.0],       [47371.0, 47372.0, 47373.0, 47374.0, 47375.0, 47376.0]]]],    [[[[47377.0, 47378.0, 47379.0, 47380.0, 47381.0, 47382.0],       [47383.0, 47384.0, 47385.0, 47386.0, 47387.0, 47388.0],       [47389.0, 47390.0, 47391.0, 47392.0, 47393.0, 47394.0],       [47395.0, 47396.0, 47397.0, 47398.0, 47399.0, 47400.0],       [47401.0, 47402.0, 47403.0, 47404.0, 47405.0, 47406.0],       [47407.0, 47408.0, 47409.0, 47410.0, 47411.0, 47412.0],       [47413.0, 47414.0, 47415.0, 47416.0, 47417.0, 47418.0]],      [[47419.0, 47420.0, 47421.0, 47422.0, 47423.0, 47424.0],       [47425.0, 47426.0, 47427.0, 47428.0, 47429.0, 47430.0],       [47431.0, 47432.0, 47433.0, 47434.0, 47435.0, 47436.0],       [47437.0, 47438.0, 47439.0, 47440.0, 47441.0, 47442.0],       [47443.0, 47444.0, 47445.0, 47446.0, 47447.0, 47448.0],       [47449.0, 47450.0, 47451.0, 47452.0, 47453.0, 47454.0],       [47455.0, 47456.0, 47457.0, 47458.0, 47459.0, 47460.0]],      [[47461.0, 47462.0, 47463.0, 47464.0, 47465.0, 47466.0],       [47467.0, 47468.0, 47469.0, 47470.0, 47471.0, 47472.0],       [47473.0, 47474.0, 47475.0, 47476.0, 47477.0, 47478.0],       [47479.0, 47480.0, 47481.0, 47482.0, 47483.0, 47484.0],       [47485.0, 47486.0, 47487.0, 47488.0, 47489.0, 47490.0],       [47491.0, 47492.0, 47493.0, 47494.0, 47495.0, 47496.0],       [47497.0, 47498.0, 47499.0, 47500.0, 47501.0, 47502.0]],      [[47503.0, 47504.0, 47505.0, 47506.0, 47507.0, 47508.0],       [47509.0, 47510.0, 47511.0, 47512.0, 47513.0, 47514.0],       [47515.0, 47516.0, 47517.0, 47518.0, 47519.0, 47520.0],       [47521.0, 47522.0, 47523.0, 47524.0, 47525.0, 47526.0],       [47527.0, 47528.0, 47529.0, 47530.0, 47531.0, 47532.0],       [47533.0, 47534.0, 47535.0, 47536.0, 47537.0, 47538.0],       [47539.0, 47540.0, 47541.0, 47542.0, 47543.0, 47544.0]],      [[47545.0, 47546.0, 47547.0, 47548.0, 47549.0, 47550.0],       [47551.0, 47552.0, 47553.0, 47554.0, 47555.0, 47556.0],       [47557.0, 47558.0, 47559.0, 47560.0, 47561.0, 47562.0],       [47563.0, 47564.0, 47565.0, 47566.0, 47567.0, 47568.0],       [47569.0, 47570.0, 47571.0, 47572.0, 47573.0, 47574.0],       [47575.0, 47576.0, 47577.0, 47578.0, 47579.0, 47580.0],       [47581.0, 47582.0, 47583.0, 47584.0, 47585.0, 47586.0]],      [[47587.0, 47588.0, 47589.0, 47590.0, 47591.0, 47592.0],       [47593.0, 47594.0, 47595.0, 47596.0, 47597.0, 47598.0],       [47599.0, 47600.0, 47601.0, 47602.0, 47603.0, 47604.0],       [47605.0, 47606.0, 47607.0, 47608.0, 47609.0, 47610.0],       [47611.0, 47612.0, 47613.0, 47614.0, 47615.0, 47616.0],       [47617.0, 47618.0, 47619.0, 47620.0, 47621.0, 47622.0],       [47623.0, 47624.0, 47625.0, 47626.0, 47627.0, 47628.0]]],     [[[47629.0, 47630.0, 47631.0, 47632.0, 47633.0, 47634.0],       [47635.0, 47636.0, 47637.0, 47638.0, 47639.0, 47640.0],       [47641.0, 47642.0, 47643.0, 47644.0, 47645.0, 47646.0],       [47647.0, 47648.0, 47649.0, 47650.0, 47651.0, 47652.0],       [47653.0, 47654.0, 47655.0, 47656.0, 47657.0, 47658.0],       [47659.0, 47660.0, 47661.0, 47662.0, 47663.0, 47664.0],       [47665.0, 47666.0, 47667.0, 47668.0, 47669.0, 47670.0]],      [[47671.0, 47672.0, 47673.0, 47674.0, 47675.0, 47676.0],       [47677.0, 47678.0, 47679.0, 47680.0, 47681.0, 47682.0],       [47683.0, 47684.0, 47685.0, 47686.0, 47687.0, 47688.0],       [47689.0, 47690.0, 47691.0, 47692.0, 47693.0, 47694.0],       [47695.0, 47696.0, 47697.0, 47698.0, 47699.0, 47700.0],       [47701.0, 47702.0, 47703.0, 47704.0, 47705.0, 47706.0],       [47707.0, 47708.0, 47709.0, 47710.0, 47711.0, 47712.0]],      [[47713.0, 47714.0, 47715.0, 47716.0, 47717.0, 47718.0],       [47719.0, 47720.0, 47721.0, 47722.0, 47723.0, 47724.0],       [47725.0, 47726.0, 47727.0, 47728.0, 47729.0, 47730.0],       [47731.0, 47732.0, 47733.0, 47734.0, 47735.0, 47736.0],       [47737.0, 47738.0, 47739.0, 47740.0, 47741.0, 47742.0],       [47743.0, 47744.0, 47745.0, 47746.0, 47747.0, 47748.0],       [47749.0, 47750.0, 47751.0, 47752.0, 47753.0, 47754.0]],      [[47755.0, 47756.0, 47757.0, 47758.0, 47759.0, 47760.0],       [47761.0, 47762.0, 47763.0, 47764.0, 47765.0, 47766.0],       [47767.0, 47768.0, 47769.0, 47770.0, 47771.0, 47772.0],       [47773.0, 47774.0, 47775.0, 47776.0, 47777.0, 47778.0],       [47779.0, 47780.0, 47781.0, 47782.0, 47783.0, 47784.0],       [47785.0, 47786.0, 47787.0, 47788.0, 47789.0, 47790.0],       [47791.0, 47792.0, 47793.0, 47794.0, 47795.0, 47796.0]],      [[47797.0, 47798.0, 47799.0, 47800.0, 47801.0, 47802.0],       [47803.0, 47804.0, 47805.0, 47806.0, 47807.0, 47808.0],       [47809.0, 47810.0, 47811.0, 47812.0, 47813.0, 47814.0],       [47815.0, 47816.0, 47817.0, 47818.0, 47819.0, 47820.0],       [47821.0, 47822.0, 47823.0, 47824.0, 47825.0, 47826.0],       [47827.0, 47828.0, 47829.0, 47830.0, 47831.0, 47832.0],       [47833.0, 47834.0, 47835.0, 47836.0, 47837.0, 47838.0]],      [[47839.0, 47840.0, 47841.0, 47842.0, 47843.0, 47844.0],       [47845.0, 47846.0, 47847.0, 47848.0, 47849.0, 47850.0],       [47851.0, 47852.0, 47853.0, 47854.0, 47855.0, 47856.0],       [47857.0, 47858.0, 47859.0, 47860.0, 47861.0, 47862.0],       [47863.0, 47864.0, 47865.0, 47866.0, 47867.0, 47868.0],       [47869.0, 47870.0, 47871.0, 47872.0, 47873.0, 47874.0],       [47875.0, 47876.0, 47877.0, 47878.0, 47879.0, 47880.0]]],     [[[47881.0, 47882.0, 47883.0, 47884.0, 47885.0, 47886.0],       [47887.0, 47888.0, 47889.0, 47890.0, 47891.0, 47892.0],       [47893.0, 47894.0, 47895.0, 47896.0, 47897.0, 47898.0],       [47899.0, 47900.0, 47901.0, 47902.0, 47903.0, 47904.0],       [47905.0, 47906.0, 47907.0, 47908.0, 47909.0, 47910.0],       [47911.0, 47912.0, 47913.0, 47914.0, 47915.0, 47916.0],       [47917.0, 47918.0, 47919.0, 47920.0, 47921.0, 47922.0]],      [[47923.0, 47924.0, 47925.0, 47926.0, 47927.0, 47928.0],       [47929.0, 47930.0, 47931.0, 47932.0, 47933.0, 47934.0],       [47935.0, 47936.0, 47937.0, 47938.0, 47939.0, 47940.0],       [47941.0, 47942.0, 47943.0, 47944.0, 47945.0, 47946.0],       [47947.0, 47948.0, 47949.0, 47950.0, 47951.0, 47952.0],       [47953.0, 47954.0, 47955.0, 47956.0, 47957.0, 47958.0],       [47959.0, 47960.0, 47961.0, 47962.0, 47963.0, 47964.0]],      [[47965.0, 47966.0, 47967.0, 47968.0, 47969.0, 47970.0],       [47971.0, 47972.0, 47973.0, 47974.0, 47975.0, 47976.0],       [47977.0, 47978.0, 47979.0, 47980.0, 47981.0, 47982.0],       [47983.0, 47984.0, 47985.0, 47986.0, 47987.0, 47988.0],       [47989.0, 47990.0, 47991.0, 47992.0, 47993.0, 47994.0],       [47995.0, 47996.0, 47997.0, 47998.0, 47999.0, 48000.0],       [48001.0, 48002.0, 48003.0, 48004.0, 48005.0, 48006.0]],      [[48007.0, 48008.0, 48009.0, 48010.0, 48011.0, 48012.0],       [48013.0, 48014.0, 48015.0, 48016.0, 48017.0, 48018.0],       [48019.0, 48020.0, 48021.0, 48022.0, 48023.0, 48024.0],       [48025.0, 48026.0, 48027.0, 48028.0, 48029.0, 48030.0],       [48031.0, 48032.0, 48033.0, 48034.0, 48035.0, 48036.0],       [48037.0, 48038.0, 48039.0, 48040.0, 48041.0, 48042.0],       [48043.0, 48044.0, 48045.0, 48046.0, 48047.0, 48048.0]],      [[48049.0, 48050.0, 48051.0, 48052.0, 48053.0, 48054.0],       [48055.0, 48056.0, 48057.0, 48058.0, 48059.0, 48060.0],       [48061.0, 48062.0, 48063.0, 48064.0, 48065.0, 48066.0],       [48067.0, 48068.0, 48069.0, 48070.0, 48071.0, 48072.0],       [48073.0, 48074.0, 48075.0, 48076.0, 48077.0, 48078.0],       [48079.0, 48080.0, 48081.0, 48082.0, 48083.0, 48084.0],       [48085.0, 48086.0, 48087.0, 48088.0, 48089.0, 48090.0]],      [[48091.0, 48092.0, 48093.0, 48094.0, 48095.0, 48096.0],       [48097.0, 48098.0, 48099.0, 48100.0, 48101.0, 48102.0],       [48103.0, 48104.0, 48105.0, 48106.0, 48107.0, 48108.0],       [48109.0, 48110.0, 48111.0, 48112.0, 48113.0, 48114.0],       [48115.0, 48116.0, 48117.0, 48118.0, 48119.0, 48120.0],       [48121.0, 48122.0, 48123.0, 48124.0, 48125.0, 48126.0],       [48127.0, 48128.0, 48129.0, 48130.0, 48131.0, 48132.0]]],     [[[48133.0, 48134.0, 48135.0, 48136.0, 48137.0, 48138.0],       [48139.0, 48140.0, 48141.0, 48142.0, 48143.0, 48144.0],       [48145.0, 48146.0, 48147.0, 48148.0, 48149.0, 48150.0],       [48151.0, 48152.0, 48153.0, 48154.0, 48155.0, 48156.0],       [48157.0, 48158.0, 48159.0, 48160.0, 48161.0, 48162.0],       [48163.0, 48164.0, 48165.0, 48166.0, 48167.0, 48168.0],       [48169.0, 48170.0, 48171.0, 48172.0, 48173.0, 48174.0]],      [[48175.0, 48176.0, 48177.0, 48178.0, 48179.0, 48180.0],       [48181.0, 48182.0, 48183.0, 48184.0, 48185.0, 48186.0],       [48187.0, 48188.0, 48189.0, 48190.0, 48191.0, 48192.0],       [48193.0, 48194.0, 48195.0, 48196.0, 48197.0, 48198.0],       [48199.0, 48200.0, 48201.0, 48202.0, 48203.0, 48204.0],       [48205.0, 48206.0, 48207.0, 48208.0, 48209.0, 48210.0],       [48211.0, 48212.0, 48213.0, 48214.0, 48215.0, 48216.0]],      [[48217.0, 48218.0, 48219.0, 48220.0, 48221.0, 48222.0],       [48223.0, 48224.0, 48225.0, 48226.0, 48227.0, 48228.0],       [48229.0, 48230.0, 48231.0, 48232.0, 48233.0, 48234.0],       [48235.0, 48236.0, 48237.0, 48238.0, 48239.0, 48240.0],       [48241.0, 48242.0, 48243.0, 48244.0, 48245.0, 48246.0],       [48247.0, 48248.0, 48249.0, 48250.0, 48251.0, 48252.0],       [48253.0, 48254.0, 48255.0, 48256.0, 48257.0, 48258.0]],      [[48259.0, 48260.0, 48261.0, 48262.0, 48263.0, 48264.0],       [48265.0, 48266.0, 48267.0, 48268.0, 48269.0, 48270.0],       [48271.0, 48272.0, 48273.0, 48274.0, 48275.0, 48276.0],       [48277.0, 48278.0, 48279.0, 48280.0, 48281.0, 48282.0],       [48283.0, 48284.0, 48285.0, 48286.0, 48287.0, 48288.0],       [48289.0, 48290.0, 48291.0, 48292.0, 48293.0, 48294.0],       [48295.0, 48296.0, 48297.0, 48298.0, 48299.0, 48300.0]],      [[48301.0, 48302.0, 48303.0, 48304.0, 48305.0, 48306.0],       [48307.0, 48308.0, 48309.0, 48310.0, 48311.0, 48312.0],       [48313.0, 48314.0, 48315.0, 48316.0, 48317.0, 48318.0],       [48319.0, 48320.0, 48321.0, 48322.0, 48323.0, 48324.0],       [48325.0, 48326.0, 48327.0, 48328.0, 48329.0, 48330.0],       [48331.0, 48332.0, 48333.0, 48334.0, 48335.0, 48336.0],       [48337.0, 48338.0, 48339.0, 48340.0, 48341.0, 48342.0]],      [[48343.0, 48344.0, 48345.0, 48346.0, 48347.0, 48348.0],       [48349.0, 48350.0, 48351.0, 48352.0, 48353.0, 48354.0],       [48355.0, 48356.0, 48357.0, 48358.0, 48359.0, 48360.0],       [48361.0, 48362.0, 48363.0, 48364.0, 48365.0, 48366.0],       [48367.0, 48368.0, 48369.0, 48370.0, 48371.0, 48372.0],       [48373.0, 48374.0, 48375.0, 48376.0, 48377.0, 48378.0],       [48379.0, 48380.0, 48381.0, 48382.0, 48383.0, 48384.0]]]]],   [[[[[48385.0, 48386.0, 48387.0, 48388.0, 48389.0, 48390.0],       [48391.0, 48392.0, 48393.0, 48394.0, 48395.0, 48396.0],       [48397.0, 48398.0, 48399.0, 48400.0, 48401.0, 48402.0],       [48403.0, 48404.0, 48405.0, 48406.0, 48407.0, 48408.0],       [48409.0, 48410.0, 48411.0, 48412.0, 48413.0, 48414.0],       [48415.0, 48416.0, 48417.0, 48418.0, 48419.0, 48420.0],       [48421.0, 48422.0, 48423.0, 48424.0, 48425.0, 48426.0]],      [[48427.0, 48428.0, 48429.0, 48430.0, 48431.0, 48432.0],       [48433.0, 48434.0, 48435.0, 48436.0, 48437.0, 48438.0],       [48439.0, 48440.0, 48441.0, 48442.0, 48443.0, 48444.0],       [48445.0, 48446.0, 48447.0, 48448.0, 48449.0, 48450.0],       [48451.0, 48452.0, 48453.0, 48454.0, 48455.0, 48456.0],       [48457.0, 48458.0, 48459.0, 48460.0, 48461.0, 48462.0],       [48463.0, 48464.0, 48465.0, 48466.0, 48467.0, 48468.0]],      [[48469.0, 48470.0, 48471.0, 48472.0, 48473.0, 48474.0],       [48475.0, 48476.0, 48477.0, 48478.0, 48479.0, 48480.0],       [48481.0, 48482.0, 48483.0, 48484.0, 48485.0, 48486.0],       [48487.0, 48488.0, 48489.0, 48490.0, 48491.0, 48492.0],       [48493.0, 48494.0, 48495.0, 48496.0, 48497.0, 48498.0],       [48499.0, 48500.0, 48501.0, 48502.0, 48503.0, 48504.0],       [48505.0, 48506.0, 48507.0, 48508.0, 48509.0, 48510.0]],      [[48511.0, 48512.0, 48513.0, 48514.0, 48515.0, 48516.0],       [48517.0, 48518.0, 48519.0, 48520.0, 48521.0, 48522.0],       [48523.0, 48524.0, 48525.0, 48526.0, 48527.0, 48528.0],       [48529.0, 48530.0, 48531.0, 48532.0, 48533.0, 48534.0],       [48535.0, 48536.0, 48537.0, 48538.0, 48539.0, 48540.0],       [48541.0, 48542.0, 48543.0, 48544.0, 48545.0, 48546.0],       [48547.0, 48548.0, 48549.0, 48550.0, 48551.0, 48552.0]],      [[48553.0, 48554.0, 48555.0, 48556.0, 48557.0, 48558.0],       [48559.0, 48560.0, 48561.0, 48562.0, 48563.0, 48564.0],       [48565.0, 48566.0, 48567.0, 48568.0, 48569.0, 48570.0],       [48571.0, 48572.0, 48573.0, 48574.0, 48575.0, 48576.0],       [48577.0, 48578.0, 48579.0, 48580.0, 48581.0, 48582.0],       [48583.0, 48584.0, 48585.0, 48586.0, 48587.0, 48588.0],       [48589.0, 48590.0, 48591.0, 48592.0, 48593.0, 48594.0]],      [[48595.0, 48596.0, 48597.0, 48598.0, 48599.0, 48600.0],       [48601.0, 48602.0, 48603.0, 48604.0, 48605.0, 48606.0],       [48607.0, 48608.0, 48609.0, 48610.0, 48611.0, 48612.0],       [48613.0, 48614.0, 48615.0, 48616.0, 48617.0, 48618.0],       [48619.0, 48620.0, 48621.0, 48622.0, 48623.0, 48624.0],       [48625.0, 48626.0, 48627.0, 48628.0, 48629.0, 48630.0],       [48631.0, 48632.0, 48633.0, 48634.0, 48635.0, 48636.0]]],     [[[48637.0, 48638.0, 48639.0, 48640.0, 48641.0, 48642.0],       [48643.0, 48644.0, 48645.0, 48646.0, 48647.0, 48648.0],       [48649.0, 48650.0, 48651.0, 48652.0, 48653.0, 48654.0],       [48655.0, 48656.0, 48657.0, 48658.0, 48659.0, 48660.0],       [48661.0, 48662.0, 48663.0, 48664.0, 48665.0, 48666.0],       [48667.0, 48668.0, 48669.0, 48670.0, 48671.0, 48672.0],       [48673.0, 48674.0, 48675.0, 48676.0, 48677.0, 48678.0]],      [[48679.0, 48680.0, 48681.0, 48682.0, 48683.0, 48684.0],       [48685.0, 48686.0, 48687.0, 48688.0, 48689.0, 48690.0],       [48691.0, 48692.0, 48693.0, 48694.0, 48695.0, 48696.0],       [48697.0, 48698.0, 48699.0, 48700.0, 48701.0, 48702.0],       [48703.0, 48704.0, 48705.0, 48706.0, 48707.0, 48708.0],       [48709.0, 48710.0, 48711.0, 48712.0, 48713.0, 48714.0],       [48715.0, 48716.0, 48717.0, 48718.0, 48719.0, 48720.0]],      [[48721.0, 48722.0, 48723.0, 48724.0, 48725.0, 48726.0],       [48727.0, 48728.0, 48729.0, 48730.0, 48731.0, 48732.0],       [48733.0, 48734.0, 48735.0, 48736.0, 48737.0, 48738.0],       [48739.0, 48740.0, 48741.0, 48742.0, 48743.0, 48744.0],       [48745.0, 48746.0, 48747.0, 48748.0, 48749.0, 48750.0],       [48751.0, 48752.0, 48753.0, 48754.0, 48755.0, 48756.0],       [48757.0, 48758.0, 48759.0, 48760.0, 48761.0, 48762.0]],      [[48763.0, 48764.0, 48765.0, 48766.0, 48767.0, 48768.0],       [48769.0, 48770.0, 48771.0, 48772.0, 48773.0, 48774.0],       [48775.0, 48776.0, 48777.0, 48778.0, 48779.0, 48780.0],       [48781.0, 48782.0, 48783.0, 48784.0, 48785.0, 48786.0],       [48787.0, 48788.0, 48789.0, 48790.0, 48791.0, 48792.0],       [48793.0, 48794.0, 48795.0, 48796.0, 48797.0, 48798.0],       [48799.0, 48800.0, 48801.0, 48802.0, 48803.0, 48804.0]],      [[48805.0, 48806.0, 48807.0, 48808.0, 48809.0, 48810.0],       [48811.0, 48812.0, 48813.0, 48814.0, 48815.0, 48816.0],       [48817.0, 48818.0, 48819.0, 48820.0, 48821.0, 48822.0],       [48823.0, 48824.0, 48825.0, 48826.0, 48827.0, 48828.0],       [48829.0, 48830.0, 48831.0, 48832.0, 48833.0, 48834.0],       [48835.0, 48836.0, 48837.0, 48838.0, 48839.0, 48840.0],       [48841.0, 48842.0, 48843.0, 48844.0, 48845.0, 48846.0]],      [[48847.0, 48848.0, 48849.0, 48850.0, 48851.0, 48852.0],       [48853.0, 48854.0, 48855.0, 48856.0, 48857.0, 48858.0],       [48859.0, 48860.0, 48861.0, 48862.0, 48863.0, 48864.0],       [48865.0, 48866.0, 48867.0, 48868.0, 48869.0, 48870.0],       [48871.0, 48872.0, 48873.0, 48874.0, 48875.0, 48876.0],       [48877.0, 48878.0, 48879.0, 48880.0, 48881.0, 48882.0],       [48883.0, 48884.0, 48885.0, 48886.0, 48887.0, 48888.0]]],     [[[48889.0, 48890.0, 48891.0, 48892.0, 48893.0, 48894.0],       [48895.0, 48896.0, 48897.0, 48898.0, 48899.0, 48900.0],       [48901.0, 48902.0, 48903.0, 48904.0, 48905.0, 48906.0],       [48907.0, 48908.0, 48909.0, 48910.0, 48911.0, 48912.0],       [48913.0, 48914.0, 48915.0, 48916.0, 48917.0, 48918.0],       [48919.0, 48920.0, 48921.0, 48922.0, 48923.0, 48924.0],       [48925.0, 48926.0, 48927.0, 48928.0, 48929.0, 48930.0]],      [[48931.0, 48932.0, 48933.0, 48934.0, 48935.0, 48936.0],       [48937.0, 48938.0, 48939.0, 48940.0, 48941.0, 48942.0],       [48943.0, 48944.0, 48945.0, 48946.0, 48947.0, 48948.0],       [48949.0, 48950.0, 48951.0, 48952.0, 48953.0, 48954.0],       [48955.0, 48956.0, 48957.0, 48958.0, 48959.0, 48960.0],       [48961.0, 48962.0, 48963.0, 48964.0, 48965.0, 48966.0],       [48967.0, 48968.0, 48969.0, 48970.0, 48971.0, 48972.0]],      [[48973.0, 48974.0, 48975.0, 48976.0, 48977.0, 48978.0],       [48979.0, 48980.0, 48981.0, 48982.0, 48983.0, 48984.0],       [48985.0, 48986.0, 48987.0, 48988.0, 48989.0, 48990.0],       [48991.0, 48992.0, 48993.0, 48994.0, 48995.0, 48996.0],       [48997.0, 48998.0, 48999.0, 49000.0, 49001.0, 49002.0],       [49003.0, 49004.0, 49005.0, 49006.0, 49007.0, 49008.0],       [49009.0, 49010.0, 49011.0, 49012.0, 49013.0, 49014.0]],      [[49015.0, 49016.0, 49017.0, 49018.0, 49019.0, 49020.0],       [49021.0, 49022.0, 49023.0, 49024.0, 49025.0, 49026.0],       [49027.0, 49028.0, 49029.0, 49030.0, 49031.0, 49032.0],       [49033.0, 49034.0, 49035.0, 49036.0, 49037.0, 49038.0],       [49039.0, 49040.0, 49041.0, 49042.0, 49043.0, 49044.0],       [49045.0, 49046.0, 49047.0, 49048.0, 49049.0, 49050.0],       [49051.0, 49052.0, 49053.0, 49054.0, 49055.0, 49056.0]],      [[49057.0, 49058.0, 49059.0, 49060.0, 49061.0, 49062.0],       [49063.0, 49064.0, 49065.0, 49066.0, 49067.0, 49068.0],       [49069.0, 49070.0, 49071.0, 49072.0, 49073.0, 49074.0],       [49075.0, 49076.0, 49077.0, 49078.0, 49079.0, 49080.0],       [49081.0, 49082.0, 49083.0, 49084.0, 49085.0, 49086.0],       [49087.0, 49088.0, 49089.0, 49090.0, 49091.0, 49092.0],       [49093.0, 49094.0, 49095.0, 49096.0, 49097.0, 49098.0]],      [[49099.0, 49100.0, 49101.0, 49102.0, 49103.0, 49104.0],       [49105.0, 49106.0, 49107.0, 49108.0, 49109.0, 49110.0],       [49111.0, 49112.0, 49113.0, 49114.0, 49115.0, 49116.0],       [49117.0, 49118.0, 49119.0, 49120.0, 49121.0, 49122.0],       [49123.0, 49124.0, 49125.0, 49126.0, 49127.0, 49128.0],       [49129.0, 49130.0, 49131.0, 49132.0, 49133.0, 49134.0],       [49135.0, 49136.0, 49137.0, 49138.0, 49139.0, 49140.0]]],     [[[49141.0, 49142.0, 49143.0, 49144.0, 49145.0, 49146.0],       [49147.0, 49148.0, 49149.0, 49150.0, 49151.0, 49152.0],       [49153.0, 49154.0, 49155.0, 49156.0, 49157.0, 49158.0],       [49159.0, 49160.0, 49161.0, 49162.0, 49163.0, 49164.0],       [49165.0, 49166.0, 49167.0, 49168.0, 49169.0, 49170.0],       [49171.0, 49172.0, 49173.0, 49174.0, 49175.0, 49176.0],       [49177.0, 49178.0, 49179.0, 49180.0, 49181.0, 49182.0]],      [[49183.0, 49184.0, 49185.0, 49186.0, 49187.0, 49188.0],       [49189.0, 49190.0, 49191.0, 49192.0, 49193.0, 49194.0],       [49195.0, 49196.0, 49197.0, 49198.0, 49199.0, 49200.0],       [49201.0, 49202.0, 49203.0, 49204.0, 49205.0, 49206.0],       [49207.0, 49208.0, 49209.0, 49210.0, 49211.0, 49212.0],       [49213.0, 49214.0, 49215.0, 49216.0, 49217.0, 49218.0],       [49219.0, 49220.0, 49221.0, 49222.0, 49223.0, 49224.0]],      [[49225.0, 49226.0, 49227.0, 49228.0, 49229.0, 49230.0],       [49231.0, 49232.0, 49233.0, 49234.0, 49235.0, 49236.0],       [49237.0, 49238.0, 49239.0, 49240.0, 49241.0, 49242.0],       [49243.0, 49244.0, 49245.0, 49246.0, 49247.0, 49248.0],       [49249.0, 49250.0, 49251.0, 49252.0, 49253.0, 49254.0],       [49255.0, 49256.0, 49257.0, 49258.0, 49259.0, 49260.0],       [49261.0, 49262.0, 49263.0, 49264.0, 49265.0, 49266.0]],      [[49267.0, 49268.0, 49269.0, 49270.0, 49271.0, 49272.0],       [49273.0, 49274.0, 49275.0, 49276.0, 49277.0, 49278.0],       [49279.0, 49280.0, 49281.0, 49282.0, 49283.0, 49284.0],       [49285.0, 49286.0, 49287.0, 49288.0, 49289.0, 49290.0],       [49291.0, 49292.0, 49293.0, 49294.0, 49295.0, 49296.0],       [49297.0, 49298.0, 49299.0, 49300.0, 49301.0, 49302.0],       [49303.0, 49304.0, 49305.0, 49306.0, 49307.0, 49308.0]],      [[49309.0, 49310.0, 49311.0, 49312.0, 49313.0, 49314.0],       [49315.0, 49316.0, 49317.0, 49318.0, 49319.0, 49320.0],       [49321.0, 49322.0, 49323.0, 49324.0, 49325.0, 49326.0],       [49327.0, 49328.0, 49329.0, 49330.0, 49331.0, 49332.0],       [49333.0, 49334.0, 49335.0, 49336.0, 49337.0, 49338.0],       [49339.0, 49340.0, 49341.0, 49342.0, 49343.0, 49344.0],       [49345.0, 49346.0, 49347.0, 49348.0, 49349.0, 49350.0]],      [[49351.0, 49352.0, 49353.0, 49354.0, 49355.0, 49356.0],       [49357.0, 49358.0, 49359.0, 49360.0, 49361.0, 49362.0],       [49363.0, 49364.0, 49365.0, 49366.0, 49367.0, 49368.0],       [49369.0, 49370.0, 49371.0, 49372.0, 49373.0, 49374.0],       [49375.0, 49376.0, 49377.0, 49378.0, 49379.0, 49380.0],       [49381.0, 49382.0, 49383.0, 49384.0, 49385.0, 49386.0],       [49387.0, 49388.0, 49389.0, 49390.0, 49391.0, 49392.0]]]],    [[[[49393.0, 49394.0, 49395.0, 49396.0, 49397.0, 49398.0],       [49399.0, 49400.0, 49401.0, 49402.0, 49403.0, 49404.0],       [49405.0, 49406.0, 49407.0, 49408.0, 49409.0, 49410.0],       [49411.0, 49412.0, 49413.0, 49414.0, 49415.0, 49416.0],       [49417.0, 49418.0, 49419.0, 49420.0, 49421.0, 49422.0],       [49423.0, 49424.0, 49425.0, 49426.0, 49427.0, 49428.0],       [49429.0, 49430.0, 49431.0, 49432.0, 49433.0, 49434.0]],      [[49435.0, 49436.0, 49437.0, 49438.0, 49439.0, 49440.0],       [49441.0, 49442.0, 49443.0, 49444.0, 49445.0, 49446.0],       [49447.0, 49448.0, 49449.0, 49450.0, 49451.0, 49452.0],       [49453.0, 49454.0, 49455.0, 49456.0, 49457.0, 49458.0],       [49459.0, 49460.0, 49461.0, 49462.0, 49463.0, 49464.0],       [49465.0, 49466.0, 49467.0, 49468.0, 49469.0, 49470.0],       [49471.0, 49472.0, 49473.0, 49474.0, 49475.0, 49476.0]],      [[49477.0, 49478.0, 49479.0, 49480.0, 49481.0, 49482.0],       [49483.0, 49484.0, 49485.0, 49486.0, 49487.0, 49488.0],       [49489.0, 49490.0, 49491.0, 49492.0, 49493.0, 49494.0],       [49495.0, 49496.0, 49497.0, 49498.0, 49499.0, 49500.0],       [49501.0, 49502.0, 49503.0, 49504.0, 49505.0, 49506.0],       [49507.0, 49508.0, 49509.0, 49510.0, 49511.0, 49512.0],       [49513.0, 49514.0, 49515.0, 49516.0, 49517.0, 49518.0]],      [[49519.0, 49520.0, 49521.0, 49522.0, 49523.0, 49524.0],       [49525.0, 49526.0, 49527.0, 49528.0, 49529.0, 49530.0],       [49531.0, 49532.0, 49533.0, 49534.0, 49535.0, 49536.0],       [49537.0, 49538.0, 49539.0, 49540.0, 49541.0, 49542.0],       [49543.0, 49544.0, 49545.0, 49546.0, 49547.0, 49548.0],       [49549.0, 49550.0, 49551.0, 49552.0, 49553.0, 49554.0],       [49555.0, 49556.0, 49557.0, 49558.0, 49559.0, 49560.0]],      [[49561.0, 49562.0, 49563.0, 49564.0, 49565.0, 49566.0],       [49567.0, 49568.0, 49569.0, 49570.0, 49571.0, 49572.0],       [49573.0, 49574.0, 49575.0, 49576.0, 49577.0, 49578.0],       [49579.0, 49580.0, 49581.0, 49582.0, 49583.0, 49584.0],       [49585.0, 49586.0, 49587.0, 49588.0, 49589.0, 49590.0],       [49591.0, 49592.0, 49593.0, 49594.0, 49595.0, 49596.0],       [49597.0, 49598.0, 49599.0, 49600.0, 49601.0, 49602.0]],      [[49603.0, 49604.0, 49605.0, 49606.0, 49607.0, 49608.0],       [49609.0, 49610.0, 49611.0, 49612.0, 49613.0, 49614.0],       [49615.0, 49616.0, 49617.0, 49618.0, 49619.0, 49620.0],       [49621.0, 49622.0, 49623.0, 49624.0, 49625.0, 49626.0],       [49627.0, 49628.0, 49629.0, 49630.0, 49631.0, 49632.0],       [49633.0, 49634.0, 49635.0, 49636.0, 49637.0, 49638.0],       [49639.0, 49640.0, 49641.0, 49642.0, 49643.0, 49644.0]]],     [[[49645.0, 49646.0, 49647.0, 49648.0, 49649.0, 49650.0],       [49651.0, 49652.0, 49653.0, 49654.0, 49655.0, 49656.0],       [49657.0, 49658.0, 49659.0, 49660.0, 49661.0, 49662.0],       [49663.0, 49664.0, 49665.0, 49666.0, 49667.0, 49668.0],       [49669.0, 49670.0, 49671.0, 49672.0, 49673.0, 49674.0],       [49675.0, 49676.0, 49677.0, 49678.0, 49679.0, 49680.0],       [49681.0, 49682.0, 49683.0, 49684.0, 49685.0, 49686.0]],      [[49687.0, 49688.0, 49689.0, 49690.0, 49691.0, 49692.0],       [49693.0, 49694.0, 49695.0, 49696.0, 49697.0, 49698.0],       [49699.0, 49700.0, 49701.0, 49702.0, 49703.0, 49704.0],       [49705.0, 49706.0, 49707.0, 49708.0, 49709.0, 49710.0],       [49711.0, 49712.0, 49713.0, 49714.0, 49715.0, 49716.0],       [49717.0, 49718.0, 49719.0, 49720.0, 49721.0, 49722.0],       [49723.0, 49724.0, 49725.0, 49726.0, 49727.0, 49728.0]],      [[49729.0, 49730.0, 49731.0, 49732.0, 49733.0, 49734.0],       [49735.0, 49736.0, 49737.0, 49738.0, 49739.0, 49740.0],       [49741.0, 49742.0, 49743.0, 49744.0, 49745.0, 49746.0],       [49747.0, 49748.0, 49749.0, 49750.0, 49751.0, 49752.0],       [49753.0, 49754.0, 49755.0, 49756.0, 49757.0, 49758.0],       [49759.0, 49760.0, 49761.0, 49762.0, 49763.0, 49764.0],       [49765.0, 49766.0, 49767.0, 49768.0, 49769.0, 49770.0]],      [[49771.0, 49772.0, 49773.0, 49774.0, 49775.0, 49776.0],       [49777.0, 49778.0, 49779.0, 49780.0, 49781.0, 49782.0],       [49783.0, 49784.0, 49785.0, 49786.0, 49787.0, 49788.0],       [49789.0, 49790.0, 49791.0, 49792.0, 49793.0, 49794.0],       [49795.0, 49796.0, 49797.0, 49798.0, 49799.0, 49800.0],       [49801.0, 49802.0, 49803.0, 49804.0, 49805.0, 49806.0],       [49807.0, 49808.0, 49809.0, 49810.0, 49811.0, 49812.0]],      [[49813.0, 49814.0, 49815.0, 49816.0, 49817.0, 49818.0],       [49819.0, 49820.0, 49821.0, 49822.0, 49823.0, 49824.0],       [49825.0, 49826.0, 49827.0, 49828.0, 49829.0, 49830.0],       [49831.0, 49832.0, 49833.0, 49834.0, 49835.0, 49836.0],       [49837.0, 49838.0, 49839.0, 49840.0, 49841.0, 49842.0],       [49843.0, 49844.0, 49845.0, 49846.0, 49847.0, 49848.0],       [49849.0, 49850.0, 49851.0, 49852.0, 49853.0, 49854.0]],      [[49855.0, 49856.0, 49857.0, 49858.0, 49859.0, 49860.0],       [49861.0, 49862.0, 49863.0, 49864.0, 49865.0, 49866.0],       [49867.0, 49868.0, 49869.0, 49870.0, 49871.0, 49872.0],       [49873.0, 49874.0, 49875.0, 49876.0, 49877.0, 49878.0],       [49879.0, 49880.0, 49881.0, 49882.0, 49883.0, 49884.0],       [49885.0, 49886.0, 49887.0, 49888.0, 49889.0, 49890.0],       [49891.0, 49892.0, 49893.0, 49894.0, 49895.0, 49896.0]]],     [[[49897.0, 49898.0, 49899.0, 49900.0, 49901.0, 49902.0],       [49903.0, 49904.0, 49905.0, 49906.0, 49907.0, 49908.0],       [49909.0, 49910.0, 49911.0, 49912.0, 49913.0, 49914.0],       [49915.0, 49916.0, 49917.0, 49918.0, 49919.0, 49920.0],       [49921.0, 49922.0, 49923.0, 49924.0, 49925.0, 49926.0],       [49927.0, 49928.0, 49929.0, 49930.0, 49931.0, 49932.0],       [49933.0, 49934.0, 49935.0, 49936.0, 49937.0, 49938.0]],      [[49939.0, 49940.0, 49941.0, 49942.0, 49943.0, 49944.0],       [49945.0, 49946.0, 49947.0, 49948.0, 49949.0, 49950.0],       [49951.0, 49952.0, 49953.0, 49954.0, 49955.0, 49956.0],       [49957.0, 49958.0, 49959.0, 49960.0, 49961.0, 49962.0],       [49963.0, 49964.0, 49965.0, 49966.0, 49967.0, 49968.0],       [49969.0, 49970.0, 49971.0, 49972.0, 49973.0, 49974.0],       [49975.0, 49976.0, 49977.0, 49978.0, 49979.0, 49980.0]],      [[49981.0, 49982.0, 49983.0, 49984.0, 49985.0, 49986.0],       [49987.0, 49988.0, 49989.0, 49990.0, 49991.0, 49992.0],       [49993.0, 49994.0, 49995.0, 49996.0, 49997.0, 49998.0],       [49999.0, 50000.0, 50001.0, 50002.0, 50003.0, 50004.0],       [50005.0, 50006.0, 50007.0, 50008.0, 50009.0, 50010.0],       [50011.0, 50012.0, 50013.0, 50014.0, 50015.0, 50016.0],       [50017.0, 50018.0, 50019.0, 50020.0, 50021.0, 50022.0]],      [[50023.0, 50024.0, 50025.0, 50026.0, 50027.0, 50028.0],       [50029.0, 50030.0, 50031.0, 50032.0, 50033.0, 50034.0],       [50035.0, 50036.0, 50037.0, 50038.0, 50039.0, 50040.0],       [50041.0, 50042.0, 50043.0, 50044.0, 50045.0, 50046.0],       [50047.0, 50048.0, 50049.0, 50050.0, 50051.0, 50052.0],       [50053.0, 50054.0, 50055.0, 50056.0, 50057.0, 50058.0],       [50059.0, 50060.0, 50061.0, 50062.0, 50063.0, 50064.0]],      [[50065.0, 50066.0, 50067.0, 50068.0, 50069.0, 50070.0],       [50071.0, 50072.0, 50073.0, 50074.0, 50075.0, 50076.0],       [50077.0, 50078.0, 50079.0, 50080.0, 50081.0, 50082.0],       [50083.0, 50084.0, 50085.0, 50086.0, 50087.0, 50088.0],       [50089.0, 50090.0, 50091.0, 50092.0, 50093.0, 50094.0],       [50095.0, 50096.0, 50097.0, 50098.0, 50099.0, 50100.0],       [50101.0, 50102.0, 50103.0, 50104.0, 50105.0, 50106.0]],      [[50107.0, 50108.0, 50109.0, 50110.0, 50111.0, 50112.0],       [50113.0, 50114.0, 50115.0, 50116.0, 50117.0, 50118.0],       [50119.0, 50120.0, 50121.0, 50122.0, 50123.0, 50124.0],       [50125.0, 50126.0, 50127.0, 50128.0, 50129.0, 50130.0],       [50131.0, 50132.0, 50133.0, 50134.0, 50135.0, 50136.0],       [50137.0, 50138.0, 50139.0, 50140.0, 50141.0, 50142.0],       [50143.0, 50144.0, 50145.0, 50146.0, 50147.0, 50148.0]]],     [[[50149.0, 50150.0, 50151.0, 50152.0, 50153.0, 50154.0],       [50155.0, 50156.0, 50157.0, 50158.0, 50159.0, 50160.0],       [50161.0, 50162.0, 50163.0, 50164.0, 50165.0, 50166.0],       [50167.0, 50168.0, 50169.0, 50170.0, 50171.0, 50172.0],       [50173.0, 50174.0, 50175.0, 50176.0, 50177.0, 50178.0],       [50179.0, 50180.0, 50181.0, 50182.0, 50183.0, 50184.0],       [50185.0, 50186.0, 50187.0, 50188.0, 50189.0, 50190.0]],      [[50191.0, 50192.0, 50193.0, 50194.0, 50195.0, 50196.0],       [50197.0, 50198.0, 50199.0, 50200.0, 50201.0, 50202.0],       [50203.0, 50204.0, 50205.0, 50206.0, 50207.0, 50208.0],       [50209.0, 50210.0, 50211.0, 50212.0, 50213.0, 50214.0],       [50215.0, 50216.0, 50217.0, 50218.0, 50219.0, 50220.0],       [50221.0, 50222.0, 50223.0, 50224.0, 50225.0, 50226.0],       [50227.0, 50228.0, 50229.0, 50230.0, 50231.0, 50232.0]],      [[50233.0, 50234.0, 50235.0, 50236.0, 50237.0, 50238.0],       [50239.0, 50240.0, 50241.0, 50242.0, 50243.0, 50244.0],       [50245.0, 50246.0, 50247.0, 50248.0, 50249.0, 50250.0],       [50251.0, 50252.0, 50253.0, 50254.0, 50255.0, 50256.0],       [50257.0, 50258.0, 50259.0, 50260.0, 50261.0, 50262.0],       [50263.0, 50264.0, 50265.0, 50266.0, 50267.0, 50268.0],       [50269.0, 50270.0, 50271.0, 50272.0, 50273.0, 50274.0]],      [[50275.0, 50276.0, 50277.0, 50278.0, 50279.0, 50280.0],       [50281.0, 50282.0, 50283.0, 50284.0, 50285.0, 50286.0],       [50287.0, 50288.0, 50289.0, 50290.0, 50291.0, 50292.0],       [50293.0, 50294.0, 50295.0, 50296.0, 50297.0, 50298.0],       [50299.0, 50300.0, 50301.0, 50302.0, 50303.0, 50304.0],       [50305.0, 50306.0, 50307.0, 50308.0, 50309.0, 50310.0],       [50311.0, 50312.0, 50313.0, 50314.0, 50315.0, 50316.0]],      [[50317.0, 50318.0, 50319.0, 50320.0, 50321.0, 50322.0],       [50323.0, 50324.0, 50325.0, 50326.0, 50327.0, 50328.0],       [50329.0, 50330.0, 50331.0, 50332.0, 50333.0, 50334.0],       [50335.0, 50336.0, 50337.0, 50338.0, 50339.0, 50340.0],       [50341.0, 50342.0, 50343.0, 50344.0, 50345.0, 50346.0],       [50347.0, 50348.0, 50349.0, 50350.0, 50351.0, 50352.0],       [50353.0, 50354.0, 50355.0, 50356.0, 50357.0, 50358.0]],      [[50359.0, 50360.0, 50361.0, 50362.0, 50363.0, 50364.0],       [50365.0, 50366.0, 50367.0, 50368.0, 50369.0, 50370.0],       [50371.0, 50372.0, 50373.0, 50374.0, 50375.0, 50376.0],       [50377.0, 50378.0, 50379.0, 50380.0, 50381.0, 50382.0],       [50383.0, 50384.0, 50385.0, 50386.0, 50387.0, 50388.0],       [50389.0, 50390.0, 50391.0, 50392.0, 50393.0, 50394.0],       [50395.0, 50396.0, 50397.0, 50398.0, 50399.0, 50400.0]]]],    [[[[50401.0, 50402.0, 50403.0, 50404.0, 50405.0, 50406.0],       [50407.0, 50408.0, 50409.0, 50410.0, 50411.0, 50412.0],       [50413.0, 50414.0, 50415.0, 50416.0, 50417.0, 50418.0],       [50419.0, 50420.0, 50421.0, 50422.0, 50423.0, 50424.0],       [50425.0, 50426.0, 50427.0, 50428.0, 50429.0, 50430.0],       [50431.0, 50432.0, 50433.0, 50434.0, 50435.0, 50436.0],       [50437.0, 50438.0, 50439.0, 50440.0, 50441.0, 50442.0]],      [[50443.0, 50444.0, 50445.0, 50446.0, 50447.0, 50448.0],       [50449.0, 50450.0, 50451.0, 50452.0, 50453.0, 50454.0],       [50455.0, 50456.0, 50457.0, 50458.0, 50459.0, 50460.0],       [50461.0, 50462.0, 50463.0, 50464.0, 50465.0, 50466.0],       [50467.0, 50468.0, 50469.0, 50470.0, 50471.0, 50472.0],       [50473.0, 50474.0, 50475.0, 50476.0, 50477.0, 50478.0],       [50479.0, 50480.0, 50481.0, 50482.0, 50483.0, 50484.0]],      [[50485.0, 50486.0, 50487.0, 50488.0, 50489.0, 50490.0],       [50491.0, 50492.0, 50493.0, 50494.0, 50495.0, 50496.0],       [50497.0, 50498.0, 50499.0, 50500.0, 50501.0, 50502.0],       [50503.0, 50504.0, 50505.0, 50506.0, 50507.0, 50508.0],       [50509.0, 50510.0, 50511.0, 50512.0, 50513.0, 50514.0],       [50515.0, 50516.0, 50517.0, 50518.0, 50519.0, 50520.0],       [50521.0, 50522.0, 50523.0, 50524.0, 50525.0, 50526.0]],      [[50527.0, 50528.0, 50529.0, 50530.0, 50531.0, 50532.0],       [50533.0, 50534.0, 50535.0, 50536.0, 50537.0, 50538.0],       [50539.0, 50540.0, 50541.0, 50542.0, 50543.0, 50544.0],       [50545.0, 50546.0, 50547.0, 50548.0, 50549.0, 50550.0],       [50551.0, 50552.0, 50553.0, 50554.0, 50555.0, 50556.0],       [50557.0, 50558.0, 50559.0, 50560.0, 50561.0, 50562.0],       [50563.0, 50564.0, 50565.0, 50566.0, 50567.0, 50568.0]],      [[50569.0, 50570.0, 50571.0, 50572.0, 50573.0, 50574.0],       [50575.0, 50576.0, 50577.0, 50578.0, 50579.0, 50580.0],       [50581.0, 50582.0, 50583.0, 50584.0, 50585.0, 50586.0],       [50587.0, 50588.0, 50589.0, 50590.0, 50591.0, 50592.0],       [50593.0, 50594.0, 50595.0, 50596.0, 50597.0, 50598.0],       [50599.0, 50600.0, 50601.0, 50602.0, 50603.0, 50604.0],       [50605.0, 50606.0, 50607.0, 50608.0, 50609.0, 50610.0]],      [[50611.0, 50612.0, 50613.0, 50614.0, 50615.0, 50616.0],       [50617.0, 50618.0, 50619.0, 50620.0, 50621.0, 50622.0],       [50623.0, 50624.0, 50625.0, 50626.0, 50627.0, 50628.0],       [50629.0, 50630.0, 50631.0, 50632.0, 50633.0, 50634.0],       [50635.0, 50636.0, 50637.0, 50638.0, 50639.0, 50640.0],       [50641.0, 50642.0, 50643.0, 50644.0, 50645.0, 50646.0],       [50647.0, 50648.0, 50649.0, 50650.0, 50651.0, 50652.0]]],     [[[50653.0, 50654.0, 50655.0, 50656.0, 50657.0, 50658.0],       [50659.0, 50660.0, 50661.0, 50662.0, 50663.0, 50664.0],       [50665.0, 50666.0, 50667.0, 50668.0, 50669.0, 50670.0],       [50671.0, 50672.0, 50673.0, 50674.0, 50675.0, 50676.0],       [50677.0, 50678.0, 50679.0, 50680.0, 50681.0, 50682.0],       [50683.0, 50684.0, 50685.0, 50686.0, 50687.0, 50688.0],       [50689.0, 50690.0, 50691.0, 50692.0, 50693.0, 50694.0]],      [[50695.0, 50696.0, 50697.0, 50698.0, 50699.0, 50700.0],       [50701.0, 50702.0, 50703.0, 50704.0, 50705.0, 50706.0],       [50707.0, 50708.0, 50709.0, 50710.0, 50711.0, 50712.0],       [50713.0, 50714.0, 50715.0, 50716.0, 50717.0, 50718.0],       [50719.0, 50720.0, 50721.0, 50722.0, 50723.0, 50724.0],       [50725.0, 50726.0, 50727.0, 50728.0, 50729.0, 50730.0],       [50731.0, 50732.0, 50733.0, 50734.0, 50735.0, 50736.0]],      [[50737.0, 50738.0, 50739.0, 50740.0, 50741.0, 50742.0],       [50743.0, 50744.0, 50745.0, 50746.0, 50747.0, 50748.0],       [50749.0, 50750.0, 50751.0, 50752.0, 50753.0, 50754.0],       [50755.0, 50756.0, 50757.0, 50758.0, 50759.0, 50760.0],       [50761.0, 50762.0, 50763.0, 50764.0, 50765.0, 50766.0],       [50767.0, 50768.0, 50769.0, 50770.0, 50771.0, 50772.0],       [50773.0, 50774.0, 50775.0, 50776.0, 50777.0, 50778.0]],      [[50779.0, 50780.0, 50781.0, 50782.0, 50783.0, 50784.0],       [50785.0, 50786.0, 50787.0, 50788.0, 50789.0, 50790.0],       [50791.0, 50792.0, 50793.0, 50794.0, 50795.0, 50796.0],       [50797.0, 50798.0, 50799.0, 50800.0, 50801.0, 50802.0],       [50803.0, 50804.0, 50805.0, 50806.0, 50807.0, 50808.0],       [50809.0, 50810.0, 50811.0, 50812.0, 50813.0, 50814.0],       [50815.0, 50816.0, 50817.0, 50818.0, 50819.0, 50820.0]],      [[50821.0, 50822.0, 50823.0, 50824.0, 50825.0, 50826.0],       [50827.0, 50828.0, 50829.0, 50830.0, 50831.0, 50832.0],       [50833.0, 50834.0, 50835.0, 50836.0, 50837.0, 50838.0],       [50839.0, 50840.0, 50841.0, 50842.0, 50843.0, 50844.0],       [50845.0, 50846.0, 50847.0, 50848.0, 50849.0, 50850.0],       [50851.0, 50852.0, 50853.0, 50854.0, 50855.0, 50856.0],       [50857.0, 50858.0, 50859.0, 50860.0, 50861.0, 50862.0]],      [[50863.0, 50864.0, 50865.0, 50866.0, 50867.0, 50868.0],       [50869.0, 50870.0, 50871.0, 50872.0, 50873.0, 50874.0],       [50875.0, 50876.0, 50877.0, 50878.0, 50879.0, 50880.0],       [50881.0, 50882.0, 50883.0, 50884.0, 50885.0, 50886.0],       [50887.0, 50888.0, 50889.0, 50890.0, 50891.0, 50892.0],       [50893.0, 50894.0, 50895.0, 50896.0, 50897.0, 50898.0],       [50899.0, 50900.0, 50901.0, 50902.0, 50903.0, 50904.0]]],     [[[50905.0, 50906.0, 50907.0, 50908.0, 50909.0, 50910.0],       [50911.0, 50912.0, 50913.0, 50914.0, 50915.0, 50916.0],       [50917.0, 50918.0, 50919.0, 50920.0, 50921.0, 50922.0],       [50923.0, 50924.0, 50925.0, 50926.0, 50927.0, 50928.0],       [50929.0, 50930.0, 50931.0, 50932.0, 50933.0, 50934.0],       [50935.0, 50936.0, 50937.0, 50938.0, 50939.0, 50940.0],       [50941.0, 50942.0, 50943.0, 50944.0, 50945.0, 50946.0]],      [[50947.0, 50948.0, 50949.0, 50950.0, 50951.0, 50952.0],       [50953.0, 50954.0, 50955.0, 50956.0, 50957.0, 50958.0],       [50959.0, 50960.0, 50961.0, 50962.0, 50963.0, 50964.0],       [50965.0, 50966.0, 50967.0, 50968.0, 50969.0, 50970.0],       [50971.0, 50972.0, 50973.0, 50974.0, 50975.0, 50976.0],       [50977.0, 50978.0, 50979.0, 50980.0, 50981.0, 50982.0],       [50983.0, 50984.0, 50985.0, 50986.0, 50987.0, 50988.0]],      [[50989.0, 50990.0, 50991.0, 50992.0, 50993.0, 50994.0],       [50995.0, 50996.0, 50997.0, 50998.0, 50999.0, 51000.0],       [51001.0, 51002.0, 51003.0, 51004.0, 51005.0, 51006.0],       [51007.0, 51008.0, 51009.0, 51010.0, 51011.0, 51012.0],       [51013.0, 51014.0, 51015.0, 51016.0, 51017.0, 51018.0],       [51019.0, 51020.0, 51021.0, 51022.0, 51023.0, 51024.0],       [51025.0, 51026.0, 51027.0, 51028.0, 51029.0, 51030.0]],      [[51031.0, 51032.0, 51033.0, 51034.0, 51035.0, 51036.0],       [51037.0, 51038.0, 51039.0, 51040.0, 51041.0, 51042.0],       [51043.0, 51044.0, 51045.0, 51046.0, 51047.0, 51048.0],       [51049.0, 51050.0, 51051.0, 51052.0, 51053.0, 51054.0],       [51055.0, 51056.0, 51057.0, 51058.0, 51059.0, 51060.0],       [51061.0, 51062.0, 51063.0, 51064.0, 51065.0, 51066.0],       [51067.0, 51068.0, 51069.0, 51070.0, 51071.0, 51072.0]],      [[51073.0, 51074.0, 51075.0, 51076.0, 51077.0, 51078.0],       [51079.0, 51080.0, 51081.0, 51082.0, 51083.0, 51084.0],       [51085.0, 51086.0, 51087.0, 51088.0, 51089.0, 51090.0],       [51091.0, 51092.0, 51093.0, 51094.0, 51095.0, 51096.0],       [51097.0, 51098.0, 51099.0, 51100.0, 51101.0, 51102.0],       [51103.0, 51104.0, 51105.0, 51106.0, 51107.0, 51108.0],       [51109.0, 51110.0, 51111.0, 51112.0, 51113.0, 51114.0]],      [[51115.0, 51116.0, 51117.0, 51118.0, 51119.0, 51120.0],       [51121.0, 51122.0, 51123.0, 51124.0, 51125.0, 51126.0],       [51127.0, 51128.0, 51129.0, 51130.0, 51131.0, 51132.0],       [51133.0, 51134.0, 51135.0, 51136.0, 51137.0, 51138.0],       [51139.0, 51140.0, 51141.0, 51142.0, 51143.0, 51144.0],       [51145.0, 51146.0, 51147.0, 51148.0, 51149.0, 51150.0],       [51151.0, 51152.0, 51153.0, 51154.0, 51155.0, 51156.0]]],     [[[51157.0, 51158.0, 51159.0, 51160.0, 51161.0, 51162.0],       [51163.0, 51164.0, 51165.0, 51166.0, 51167.0, 51168.0],       [51169.0, 51170.0, 51171.0, 51172.0, 51173.0, 51174.0],       [51175.0, 51176.0, 51177.0, 51178.0, 51179.0, 51180.0],       [51181.0, 51182.0, 51183.0, 51184.0, 51185.0, 51186.0],       [51187.0, 51188.0, 51189.0, 51190.0, 51191.0, 51192.0],       [51193.0, 51194.0, 51195.0, 51196.0, 51197.0, 51198.0]],      [[51199.0, 51200.0, 51201.0, 51202.0, 51203.0, 51204.0],       [51205.0, 51206.0, 51207.0, 51208.0, 51209.0, 51210.0],       [51211.0, 51212.0, 51213.0, 51214.0, 51215.0, 51216.0],       [51217.0, 51218.0, 51219.0, 51220.0, 51221.0, 51222.0],       [51223.0, 51224.0, 51225.0, 51226.0, 51227.0, 51228.0],       [51229.0, 51230.0, 51231.0, 51232.0, 51233.0, 51234.0],       [51235.0, 51236.0, 51237.0, 51238.0, 51239.0, 51240.0]],      [[51241.0, 51242.0, 51243.0, 51244.0, 51245.0, 51246.0],       [51247.0, 51248.0, 51249.0, 51250.0, 51251.0, 51252.0],       [51253.0, 51254.0, 51255.0, 51256.0, 51257.0, 51258.0],       [51259.0, 51260.0, 51261.0, 51262.0, 51263.0, 51264.0],       [51265.0, 51266.0, 51267.0, 51268.0, 51269.0, 51270.0],       [51271.0, 51272.0, 51273.0, 51274.0, 51275.0, 51276.0],       [51277.0, 51278.0, 51279.0, 51280.0, 51281.0, 51282.0]],      [[51283.0, 51284.0, 51285.0, 51286.0, 51287.0, 51288.0],       [51289.0, 51290.0, 51291.0, 51292.0, 51293.0, 51294.0],       [51295.0, 51296.0, 51297.0, 51298.0, 51299.0, 51300.0],       [51301.0, 51302.0, 51303.0, 51304.0, 51305.0, 51306.0],       [51307.0, 51308.0, 51309.0, 51310.0, 51311.0, 51312.0],       [51313.0, 51314.0, 51315.0, 51316.0, 51317.0, 51318.0],       [51319.0, 51320.0, 51321.0, 51322.0, 51323.0, 51324.0]],      [[51325.0, 51326.0, 51327.0, 51328.0, 51329.0, 51330.0],       [51331.0, 51332.0, 51333.0, 51334.0, 51335.0, 51336.0],       [51337.0, 51338.0, 51339.0, 51340.0, 51341.0, 51342.0],       [51343.0, 51344.0, 51345.0, 51346.0, 51347.0, 51348.0],       [51349.0, 51350.0, 51351.0, 51352.0, 51353.0, 51354.0],       [51355.0, 51356.0, 51357.0, 51358.0, 51359.0, 51360.0],       [51361.0, 51362.0, 51363.0, 51364.0, 51365.0, 51366.0]],      [[51367.0, 51368.0, 51369.0, 51370.0, 51371.0, 51372.0],       [51373.0, 51374.0, 51375.0, 51376.0, 51377.0, 51378.0],       [51379.0, 51380.0, 51381.0, 51382.0, 51383.0, 51384.0],       [51385.0, 51386.0, 51387.0, 51388.0, 51389.0, 51390.0],       [51391.0, 51392.0, 51393.0, 51394.0, 51395.0, 51396.0],       [51397.0, 51398.0, 51399.0, 51400.0, 51401.0, 51402.0],       [51403.0, 51404.0, 51405.0, 51406.0, 51407.0, 51408.0]]]],    [[[[51409.0, 51410.0, 51411.0, 51412.0, 51413.0, 51414.0],       [51415.0, 51416.0, 51417.0, 51418.0, 51419.0, 51420.0],       [51421.0, 51422.0, 51423.0, 51424.0, 51425.0, 51426.0],       [51427.0, 51428.0, 51429.0, 51430.0, 51431.0, 51432.0],       [51433.0, 51434.0, 51435.0, 51436.0, 51437.0, 51438.0],       [51439.0, 51440.0, 51441.0, 51442.0, 51443.0, 51444.0],       [51445.0, 51446.0, 51447.0, 51448.0, 51449.0, 51450.0]],      [[51451.0, 51452.0, 51453.0, 51454.0, 51455.0, 51456.0],       [51457.0, 51458.0, 51459.0, 51460.0, 51461.0, 51462.0],       [51463.0, 51464.0, 51465.0, 51466.0, 51467.0, 51468.0],       [51469.0, 51470.0, 51471.0, 51472.0, 51473.0, 51474.0],       [51475.0, 51476.0, 51477.0, 51478.0, 51479.0, 51480.0],       [51481.0, 51482.0, 51483.0, 51484.0, 51485.0, 51486.0],       [51487.0, 51488.0, 51489.0, 51490.0, 51491.0, 51492.0]],      [[51493.0, 51494.0, 51495.0, 51496.0, 51497.0, 51498.0],       [51499.0, 51500.0, 51501.0, 51502.0, 51503.0, 51504.0],       [51505.0, 51506.0, 51507.0, 51508.0, 51509.0, 51510.0],       [51511.0, 51512.0, 51513.0, 51514.0, 51515.0, 51516.0],       [51517.0, 51518.0, 51519.0, 51520.0, 51521.0, 51522.0],       [51523.0, 51524.0, 51525.0, 51526.0, 51527.0, 51528.0],       [51529.0, 51530.0, 51531.0, 51532.0, 51533.0, 51534.0]],      [[51535.0, 51536.0, 51537.0, 51538.0, 51539.0, 51540.0],       [51541.0, 51542.0, 51543.0, 51544.0, 51545.0, 51546.0],       [51547.0, 51548.0, 51549.0, 51550.0, 51551.0, 51552.0],       [51553.0, 51554.0, 51555.0, 51556.0, 51557.0, 51558.0],       [51559.0, 51560.0, 51561.0, 51562.0, 51563.0, 51564.0],       [51565.0, 51566.0, 51567.0, 51568.0, 51569.0, 51570.0],       [51571.0, 51572.0, 51573.0, 51574.0, 51575.0, 51576.0]],      [[51577.0, 51578.0, 51579.0, 51580.0, 51581.0, 51582.0],       [51583.0, 51584.0, 51585.0, 51586.0, 51587.0, 51588.0],       [51589.0, 51590.0, 51591.0, 51592.0, 51593.0, 51594.0],       [51595.0, 51596.0, 51597.0, 51598.0, 51599.0, 51600.0],       [51601.0, 51602.0, 51603.0, 51604.0, 51605.0, 51606.0],       [51607.0, 51608.0, 51609.0, 51610.0, 51611.0, 51612.0],       [51613.0, 51614.0, 51615.0, 51616.0, 51617.0, 51618.0]],      [[51619.0, 51620.0, 51621.0, 51622.0, 51623.0, 51624.0],       [51625.0, 51626.0, 51627.0, 51628.0, 51629.0, 51630.0],       [51631.0, 51632.0, 51633.0, 51634.0, 51635.0, 51636.0],       [51637.0, 51638.0, 51639.0, 51640.0, 51641.0, 51642.0],       [51643.0, 51644.0, 51645.0, 51646.0, 51647.0, 51648.0],       [51649.0, 51650.0, 51651.0, 51652.0, 51653.0, 51654.0],       [51655.0, 51656.0, 51657.0, 51658.0, 51659.0, 51660.0]]],     [[[51661.0, 51662.0, 51663.0, 51664.0, 51665.0, 51666.0],       [51667.0, 51668.0, 51669.0, 51670.0, 51671.0, 51672.0],       [51673.0, 51674.0, 51675.0, 51676.0, 51677.0, 51678.0],       [51679.0, 51680.0, 51681.0, 51682.0, 51683.0, 51684.0],       [51685.0, 51686.0, 51687.0, 51688.0, 51689.0, 51690.0],       [51691.0, 51692.0, 51693.0, 51694.0, 51695.0, 51696.0],       [51697.0, 51698.0, 51699.0, 51700.0, 51701.0, 51702.0]],      [[51703.0, 51704.0, 51705.0, 51706.0, 51707.0, 51708.0],       [51709.0, 51710.0, 51711.0, 51712.0, 51713.0, 51714.0],       [51715.0, 51716.0, 51717.0, 51718.0, 51719.0, 51720.0],       [51721.0, 51722.0, 51723.0, 51724.0, 51725.0, 51726.0],       [51727.0, 51728.0, 51729.0, 51730.0, 51731.0, 51732.0],       [51733.0, 51734.0, 51735.0, 51736.0, 51737.0, 51738.0],       [51739.0, 51740.0, 51741.0, 51742.0, 51743.0, 51744.0]],      [[51745.0, 51746.0, 51747.0, 51748.0, 51749.0, 51750.0],       [51751.0, 51752.0, 51753.0, 51754.0, 51755.0, 51756.0],       [51757.0, 51758.0, 51759.0, 51760.0, 51761.0, 51762.0],       [51763.0, 51764.0, 51765.0, 51766.0, 51767.0, 51768.0],       [51769.0, 51770.0, 51771.0, 51772.0, 51773.0, 51774.0],       [51775.0, 51776.0, 51777.0, 51778.0, 51779.0, 51780.0],       [51781.0, 51782.0, 51783.0, 51784.0, 51785.0, 51786.0]],      [[51787.0, 51788.0, 51789.0, 51790.0, 51791.0, 51792.0],       [51793.0, 51794.0, 51795.0, 51796.0, 51797.0, 51798.0],       [51799.0, 51800.0, 51801.0, 51802.0, 51803.0, 51804.0],       [51805.0, 51806.0, 51807.0, 51808.0, 51809.0, 51810.0],       [51811.0, 51812.0, 51813.0, 51814.0, 51815.0, 51816.0],       [51817.0, 51818.0, 51819.0, 51820.0, 51821.0, 51822.0],       [51823.0, 51824.0, 51825.0, 51826.0, 51827.0, 51828.0]],      [[51829.0, 51830.0, 51831.0, 51832.0, 51833.0, 51834.0],       [51835.0, 51836.0, 51837.0, 51838.0, 51839.0, 51840.0],       [51841.0, 51842.0, 51843.0, 51844.0, 51845.0, 51846.0],       [51847.0, 51848.0, 51849.0, 51850.0, 51851.0, 51852.0],       [51853.0, 51854.0, 51855.0, 51856.0, 51857.0, 51858.0],       [51859.0, 51860.0, 51861.0, 51862.0, 51863.0, 51864.0],       [51865.0, 51866.0, 51867.0, 51868.0, 51869.0, 51870.0]],      [[51871.0, 51872.0, 51873.0, 51874.0, 51875.0, 51876.0],       [51877.0, 51878.0, 51879.0, 51880.0, 51881.0, 51882.0],       [51883.0, 51884.0, 51885.0, 51886.0, 51887.0, 51888.0],       [51889.0, 51890.0, 51891.0, 51892.0, 51893.0, 51894.0],       [51895.0, 51896.0, 51897.0, 51898.0, 51899.0, 51900.0],       [51901.0, 51902.0, 51903.0, 51904.0, 51905.0, 51906.0],       [51907.0, 51908.0, 51909.0, 51910.0, 51911.0, 51912.0]]],     [[[51913.0, 51914.0, 51915.0, 51916.0, 51917.0, 51918.0],       [51919.0, 51920.0, 51921.0, 51922.0, 51923.0, 51924.0],       [51925.0, 51926.0, 51927.0, 51928.0, 51929.0, 51930.0],       [51931.0, 51932.0, 51933.0, 51934.0, 51935.0, 51936.0],       [51937.0, 51938.0, 51939.0, 51940.0, 51941.0, 51942.0],       [51943.0, 51944.0, 51945.0, 51946.0, 51947.0, 51948.0],       [51949.0, 51950.0, 51951.0, 51952.0, 51953.0, 51954.0]],      [[51955.0, 51956.0, 51957.0, 51958.0, 51959.0, 51960.0],       [51961.0, 51962.0, 51963.0, 51964.0, 51965.0, 51966.0],       [51967.0, 51968.0, 51969.0, 51970.0, 51971.0, 51972.0],       [51973.0, 51974.0, 51975.0, 51976.0, 51977.0, 51978.0],       [51979.0, 51980.0, 51981.0, 51982.0, 51983.0, 51984.0],       [51985.0, 51986.0, 51987.0, 51988.0, 51989.0, 51990.0],       [51991.0, 51992.0, 51993.0, 51994.0, 51995.0, 51996.0]],      [[51997.0, 51998.0, 51999.0, 52000.0, 52001.0, 52002.0],       [52003.0, 52004.0, 52005.0, 52006.0, 52007.0, 52008.0],       [52009.0, 52010.0, 52011.0, 52012.0, 52013.0, 52014.0],       [52015.0, 52016.0, 52017.0, 52018.0, 52019.0, 52020.0],       [52021.0, 52022.0, 52023.0, 52024.0, 52025.0, 52026.0],       [52027.0, 52028.0, 52029.0, 52030.0, 52031.0, 52032.0],       [52033.0, 52034.0, 52035.0, 52036.0, 52037.0, 52038.0]],      [[52039.0, 52040.0, 52041.0, 52042.0, 52043.0, 52044.0],       [52045.0, 52046.0, 52047.0, 52048.0, 52049.0, 52050.0],       [52051.0, 52052.0, 52053.0, 52054.0, 52055.0, 52056.0],       [52057.0, 52058.0, 52059.0, 52060.0, 52061.0, 52062.0],       [52063.0, 52064.0, 52065.0, 52066.0, 52067.0, 52068.0],       [52069.0, 52070.0, 52071.0, 52072.0, 52073.0, 52074.0],       [52075.0, 52076.0, 52077.0, 52078.0, 52079.0, 52080.0]],      [[52081.0, 52082.0, 52083.0, 52084.0, 52085.0, 52086.0],       [52087.0, 52088.0, 52089.0, 52090.0, 52091.0, 52092.0],       [52093.0, 52094.0, 52095.0, 52096.0, 52097.0, 52098.0],       [52099.0, 52100.0, 52101.0, 52102.0, 52103.0, 52104.0],       [52105.0, 52106.0, 52107.0, 52108.0, 52109.0, 52110.0],       [52111.0, 52112.0, 52113.0, 52114.0, 52115.0, 52116.0],       [52117.0, 52118.0, 52119.0, 52120.0, 52121.0, 52122.0]],      [[52123.0, 52124.0, 52125.0, 52126.0, 52127.0, 52128.0],       [52129.0, 52130.0, 52131.0, 52132.0, 52133.0, 52134.0],       [52135.0, 52136.0, 52137.0, 52138.0, 52139.0, 52140.0],       [52141.0, 52142.0, 52143.0, 52144.0, 52145.0, 52146.0],       [52147.0, 52148.0, 52149.0, 52150.0, 52151.0, 52152.0],       [52153.0, 52154.0, 52155.0, 52156.0, 52157.0, 52158.0],       [52159.0, 52160.0, 52161.0, 52162.0, 52163.0, 52164.0]]],     [[[52165.0, 52166.0, 52167.0, 52168.0, 52169.0, 52170.0],       [52171.0, 52172.0, 52173.0, 52174.0, 52175.0, 52176.0],       [52177.0, 52178.0, 52179.0, 52180.0, 52181.0, 52182.0],       [52183.0, 52184.0, 52185.0, 52186.0, 52187.0, 52188.0],       [52189.0, 52190.0, 52191.0, 52192.0, 52193.0, 52194.0],       [52195.0, 52196.0, 52197.0, 52198.0, 52199.0, 52200.0],       [52201.0, 52202.0, 52203.0, 52204.0, 52205.0, 52206.0]],      [[52207.0, 52208.0, 52209.0, 52210.0, 52211.0, 52212.0],       [52213.0, 52214.0, 52215.0, 52216.0, 52217.0, 52218.0],       [52219.0, 52220.0, 52221.0, 52222.0, 52223.0, 52224.0],       [52225.0, 52226.0, 52227.0, 52228.0, 52229.0, 52230.0],       [52231.0, 52232.0, 52233.0, 52234.0, 52235.0, 52236.0],       [52237.0, 52238.0, 52239.0, 52240.0, 52241.0, 52242.0],       [52243.0, 52244.0, 52245.0, 52246.0, 52247.0, 52248.0]],      [[52249.0, 52250.0, 52251.0, 52252.0, 52253.0, 52254.0],       [52255.0, 52256.0, 52257.0, 52258.0, 52259.0, 52260.0],       [52261.0, 52262.0, 52263.0, 52264.0, 52265.0, 52266.0],       [52267.0, 52268.0, 52269.0, 52270.0, 52271.0, 52272.0],       [52273.0, 52274.0, 52275.0, 52276.0, 52277.0, 52278.0],       [52279.0, 52280.0, 52281.0, 52282.0, 52283.0, 52284.0],       [52285.0, 52286.0, 52287.0, 52288.0, 52289.0, 52290.0]],      [[52291.0, 52292.0, 52293.0, 52294.0, 52295.0, 52296.0],       [52297.0, 52298.0, 52299.0, 52300.0, 52301.0, 52302.0],       [52303.0, 52304.0, 52305.0, 52306.0, 52307.0, 52308.0],       [52309.0, 52310.0, 52311.0, 52312.0, 52313.0, 52314.0],       [52315.0, 52316.0, 52317.0, 52318.0, 52319.0, 52320.0],       [52321.0, 52322.0, 52323.0, 52324.0, 52325.0, 52326.0],       [52327.0, 52328.0, 52329.0, 52330.0, 52331.0, 52332.0]],      [[52333.0, 52334.0, 52335.0, 52336.0, 52337.0, 52338.0],       [52339.0, 52340.0, 52341.0, 52342.0, 52343.0, 52344.0],       [52345.0, 52346.0, 52347.0, 52348.0, 52349.0, 52350.0],       [52351.0, 52352.0, 52353.0, 52354.0, 52355.0, 52356.0],       [52357.0, 52358.0, 52359.0, 52360.0, 52361.0, 52362.0],       [52363.0, 52364.0, 52365.0, 52366.0, 52367.0, 52368.0],       [52369.0, 52370.0, 52371.0, 52372.0, 52373.0, 52374.0]],      [[52375.0, 52376.0, 52377.0, 52378.0, 52379.0, 52380.0],       [52381.0, 52382.0, 52383.0, 52384.0, 52385.0, 52386.0],       [52387.0, 52388.0, 52389.0, 52390.0, 52391.0, 52392.0],       [52393.0, 52394.0, 52395.0, 52396.0, 52397.0, 52398.0],       [52399.0, 52400.0, 52401.0, 52402.0, 52403.0, 52404.0],       [52405.0, 52406.0, 52407.0, 52408.0, 52409.0, 52410.0],       [52411.0, 52412.0, 52413.0, 52414.0, 52415.0, 52416.0]]]],    [[[[52417.0, 52418.0, 52419.0, 52420.0, 52421.0, 52422.0],       [52423.0, 52424.0, 52425.0, 52426.0, 52427.0, 52428.0],       [52429.0, 52430.0, 52431.0, 52432.0, 52433.0, 52434.0],       [52435.0, 52436.0, 52437.0, 52438.0, 52439.0, 52440.0],       [52441.0, 52442.0, 52443.0, 52444.0, 52445.0, 52446.0],       [52447.0, 52448.0, 52449.0, 52450.0, 52451.0, 52452.0],       [52453.0, 52454.0, 52455.0, 52456.0, 52457.0, 52458.0]],      [[52459.0, 52460.0, 52461.0, 52462.0, 52463.0, 52464.0],       [52465.0, 52466.0, 52467.0, 52468.0, 52469.0, 52470.0],       [52471.0, 52472.0, 52473.0, 52474.0, 52475.0, 52476.0],       [52477.0, 52478.0, 52479.0, 52480.0, 52481.0, 52482.0],       [52483.0, 52484.0, 52485.0, 52486.0, 52487.0, 52488.0],       [52489.0, 52490.0, 52491.0, 52492.0, 52493.0, 52494.0],       [52495.0, 52496.0, 52497.0, 52498.0, 52499.0, 52500.0]],      [[52501.0, 52502.0, 52503.0, 52504.0, 52505.0, 52506.0],       [52507.0, 52508.0, 52509.0, 52510.0, 52511.0, 52512.0],       [52513.0, 52514.0, 52515.0, 52516.0, 52517.0, 52518.0],       [52519.0, 52520.0, 52521.0, 52522.0, 52523.0, 52524.0],       [52525.0, 52526.0, 52527.0, 52528.0, 52529.0, 52530.0],       [52531.0, 52532.0, 52533.0, 52534.0, 52535.0, 52536.0],       [52537.0, 52538.0, 52539.0, 52540.0, 52541.0, 52542.0]],      [[52543.0, 52544.0, 52545.0, 52546.0, 52547.0, 52548.0],       [52549.0, 52550.0, 52551.0, 52552.0, 52553.0, 52554.0],       [52555.0, 52556.0, 52557.0, 52558.0, 52559.0, 52560.0],       [52561.0, 52562.0, 52563.0, 52564.0, 52565.0, 52566.0],       [52567.0, 52568.0, 52569.0, 52570.0, 52571.0, 52572.0],       [52573.0, 52574.0, 52575.0, 52576.0, 52577.0, 52578.0],       [52579.0, 52580.0, 52581.0, 52582.0, 52583.0, 52584.0]],      [[52585.0, 52586.0, 52587.0, 52588.0, 52589.0, 52590.0],       [52591.0, 52592.0, 52593.0, 52594.0, 52595.0, 52596.0],       [52597.0, 52598.0, 52599.0, 52600.0, 52601.0, 52602.0],       [52603.0, 52604.0, 52605.0, 52606.0, 52607.0, 52608.0],       [52609.0, 52610.0, 52611.0, 52612.0, 52613.0, 52614.0],       [52615.0, 52616.0, 52617.0, 52618.0, 52619.0, 52620.0],       [52621.0, 52622.0, 52623.0, 52624.0, 52625.0, 52626.0]],      [[52627.0, 52628.0, 52629.0, 52630.0, 52631.0, 52632.0],       [52633.0, 52634.0, 52635.0, 52636.0, 52637.0, 52638.0],       [52639.0, 52640.0, 52641.0, 52642.0, 52643.0, 52644.0],       [52645.0, 52646.0, 52647.0, 52648.0, 52649.0, 52650.0],       [52651.0, 52652.0, 52653.0, 52654.0, 52655.0, 52656.0],       [52657.0, 52658.0, 52659.0, 52660.0, 52661.0, 52662.0],       [52663.0, 52664.0, 52665.0, 52666.0, 52667.0, 52668.0]]],     [[[52669.0, 52670.0, 52671.0, 52672.0, 52673.0, 52674.0],       [52675.0, 52676.0, 52677.0, 52678.0, 52679.0, 52680.0],       [52681.0, 52682.0, 52683.0, 52684.0, 52685.0, 52686.0],       [52687.0, 52688.0, 52689.0, 52690.0, 52691.0, 52692.0],       [52693.0, 52694.0, 52695.0, 52696.0, 52697.0, 52698.0],       [52699.0, 52700.0, 52701.0, 52702.0, 52703.0, 52704.0],       [52705.0, 52706.0, 52707.0, 52708.0, 52709.0, 52710.0]],      [[52711.0, 52712.0, 52713.0, 52714.0, 52715.0, 52716.0],       [52717.0, 52718.0, 52719.0, 52720.0, 52721.0, 52722.0],       [52723.0, 52724.0, 52725.0, 52726.0, 52727.0, 52728.0],       [52729.0, 52730.0, 52731.0, 52732.0, 52733.0, 52734.0],       [52735.0, 52736.0, 52737.0, 52738.0, 52739.0, 52740.0],       [52741.0, 52742.0, 52743.0, 52744.0, 52745.0, 52746.0],       [52747.0, 52748.0, 52749.0, 52750.0, 52751.0, 52752.0]],      [[52753.0, 52754.0, 52755.0, 52756.0, 52757.0, 52758.0],       [52759.0, 52760.0, 52761.0, 52762.0, 52763.0, 52764.0],       [52765.0, 52766.0, 52767.0, 52768.0, 52769.0, 52770.0],       [52771.0, 52772.0, 52773.0, 52774.0, 52775.0, 52776.0],       [52777.0, 52778.0, 52779.0, 52780.0, 52781.0, 52782.0],       [52783.0, 52784.0, 52785.0, 52786.0, 52787.0, 52788.0],       [52789.0, 52790.0, 52791.0, 52792.0, 52793.0, 52794.0]],      [[52795.0, 52796.0, 52797.0, 52798.0, 52799.0, 52800.0],       [52801.0, 52802.0, 52803.0, 52804.0, 52805.0, 52806.0],       [52807.0, 52808.0, 52809.0, 52810.0, 52811.0, 52812.0],       [52813.0, 52814.0, 52815.0, 52816.0, 52817.0, 52818.0],       [52819.0, 52820.0, 52821.0, 52822.0, 52823.0, 52824.0],       [52825.0, 52826.0, 52827.0, 52828.0, 52829.0, 52830.0],       [52831.0, 52832.0, 52833.0, 52834.0, 52835.0, 52836.0]],      [[52837.0, 52838.0, 52839.0, 52840.0, 52841.0, 52842.0],       [52843.0, 52844.0, 52845.0, 52846.0, 52847.0, 52848.0],       [52849.0, 52850.0, 52851.0, 52852.0, 52853.0, 52854.0],       [52855.0, 52856.0, 52857.0, 52858.0, 52859.0, 52860.0],       [52861.0, 52862.0, 52863.0, 52864.0, 52865.0, 52866.0],       [52867.0, 52868.0, 52869.0, 52870.0, 52871.0, 52872.0],       [52873.0, 52874.0, 52875.0, 52876.0, 52877.0, 52878.0]],      [[52879.0, 52880.0, 52881.0, 52882.0, 52883.0, 52884.0],       [52885.0, 52886.0, 52887.0, 52888.0, 52889.0, 52890.0],       [52891.0, 52892.0, 52893.0, 52894.0, 52895.0, 52896.0],       [52897.0, 52898.0, 52899.0, 52900.0, 52901.0, 52902.0],       [52903.0, 52904.0, 52905.0, 52906.0, 52907.0, 52908.0],       [52909.0, 52910.0, 52911.0, 52912.0, 52913.0, 52914.0],       [52915.0, 52916.0, 52917.0, 52918.0, 52919.0, 52920.0]]],     [[[52921.0, 52922.0, 52923.0, 52924.0, 52925.0, 52926.0],       [52927.0, 52928.0, 52929.0, 52930.0, 52931.0, 52932.0],       [52933.0, 52934.0, 52935.0, 52936.0, 52937.0, 52938.0],       [52939.0, 52940.0, 52941.0, 52942.0, 52943.0, 52944.0],       [52945.0, 52946.0, 52947.0, 52948.0, 52949.0, 52950.0],       [52951.0, 52952.0, 52953.0, 52954.0, 52955.0, 52956.0],       [52957.0, 52958.0, 52959.0, 52960.0, 52961.0, 52962.0]],      [[52963.0, 52964.0, 52965.0, 52966.0, 52967.0, 52968.0],       [52969.0, 52970.0, 52971.0, 52972.0, 52973.0, 52974.0],       [52975.0, 52976.0, 52977.0, 52978.0, 52979.0, 52980.0],       [52981.0, 52982.0, 52983.0, 52984.0, 52985.0, 52986.0],       [52987.0, 52988.0, 52989.0, 52990.0, 52991.0, 52992.0],       [52993.0, 52994.0, 52995.0, 52996.0, 52997.0, 52998.0],       [52999.0, 53000.0, 53001.0, 53002.0, 53003.0, 53004.0]],      [[53005.0, 53006.0, 53007.0, 53008.0, 53009.0, 53010.0],       [53011.0, 53012.0, 53013.0, 53014.0, 53015.0, 53016.0],       [53017.0, 53018.0, 53019.0, 53020.0, 53021.0, 53022.0],       [53023.0, 53024.0, 53025.0, 53026.0, 53027.0, 53028.0],       [53029.0, 53030.0, 53031.0, 53032.0, 53033.0, 53034.0],       [53035.0, 53036.0, 53037.0, 53038.0, 53039.0, 53040.0],       [53041.0, 53042.0, 53043.0, 53044.0, 53045.0, 53046.0]],      [[53047.0, 53048.0, 53049.0, 53050.0, 53051.0, 53052.0],       [53053.0, 53054.0, 53055.0, 53056.0, 53057.0, 53058.0],       [53059.0, 53060.0, 53061.0, 53062.0, 53063.0, 53064.0],       [53065.0, 53066.0, 53067.0, 53068.0, 53069.0, 53070.0],       [53071.0, 53072.0, 53073.0, 53074.0, 53075.0, 53076.0],       [53077.0, 53078.0, 53079.0, 53080.0, 53081.0, 53082.0],       [53083.0, 53084.0, 53085.0, 53086.0, 53087.0, 53088.0]],      [[53089.0, 53090.0, 53091.0, 53092.0, 53093.0, 53094.0],       [53095.0, 53096.0, 53097.0, 53098.0, 53099.0, 53100.0],       [53101.0, 53102.0, 53103.0, 53104.0, 53105.0, 53106.0],       [53107.0, 53108.0, 53109.0, 53110.0, 53111.0, 53112.0],       [53113.0, 53114.0, 53115.0, 53116.0, 53117.0, 53118.0],       [53119.0, 53120.0, 53121.0, 53122.0, 53123.0, 53124.0],       [53125.0, 53126.0, 53127.0, 53128.0, 53129.0, 53130.0]],      [[53131.0, 53132.0, 53133.0, 53134.0, 53135.0, 53136.0],       [53137.0, 53138.0, 53139.0, 53140.0, 53141.0, 53142.0],       [53143.0, 53144.0, 53145.0, 53146.0, 53147.0, 53148.0],       [53149.0, 53150.0, 53151.0, 53152.0, 53153.0, 53154.0],       [53155.0, 53156.0, 53157.0, 53158.0, 53159.0, 53160.0],       [53161.0, 53162.0, 53163.0, 53164.0, 53165.0, 53166.0],       [53167.0, 53168.0, 53169.0, 53170.0, 53171.0, 53172.0]]],     [[[53173.0, 53174.0, 53175.0, 53176.0, 53177.0, 53178.0],       [53179.0, 53180.0, 53181.0, 53182.0, 53183.0, 53184.0],       [53185.0, 53186.0, 53187.0, 53188.0, 53189.0, 53190.0],       [53191.0, 53192.0, 53193.0, 53194.0, 53195.0, 53196.0],       [53197.0, 53198.0, 53199.0, 53200.0, 53201.0, 53202.0],       [53203.0, 53204.0, 53205.0, 53206.0, 53207.0, 53208.0],       [53209.0, 53210.0, 53211.0, 53212.0, 53213.0, 53214.0]],      [[53215.0, 53216.0, 53217.0, 53218.0, 53219.0, 53220.0],       [53221.0, 53222.0, 53223.0, 53224.0, 53225.0, 53226.0],       [53227.0, 53228.0, 53229.0, 53230.0, 53231.0, 53232.0],       [53233.0, 53234.0, 53235.0, 53236.0, 53237.0, 53238.0],       [53239.0, 53240.0, 53241.0, 53242.0, 53243.0, 53244.0],       [53245.0, 53246.0, 53247.0, 53248.0, 53249.0, 53250.0],       [53251.0, 53252.0, 53253.0, 53254.0, 53255.0, 53256.0]],      [[53257.0, 53258.0, 53259.0, 53260.0, 53261.0, 53262.0],       [53263.0, 53264.0, 53265.0, 53266.0, 53267.0, 53268.0],       [53269.0, 53270.0, 53271.0, 53272.0, 53273.0, 53274.0],       [53275.0, 53276.0, 53277.0, 53278.0, 53279.0, 53280.0],       [53281.0, 53282.0, 53283.0, 53284.0, 53285.0, 53286.0],       [53287.0, 53288.0, 53289.0, 53290.0, 53291.0, 53292.0],       [53293.0, 53294.0, 53295.0, 53296.0, 53297.0, 53298.0]],      [[53299.0, 53300.0, 53301.0, 53302.0, 53303.0, 53304.0],       [53305.0, 53306.0, 53307.0, 53308.0, 53309.0, 53310.0],       [53311.0, 53312.0, 53313.0, 53314.0, 53315.0, 53316.0],       [53317.0, 53318.0, 53319.0, 53320.0, 53321.0, 53322.0],       [53323.0, 53324.0, 53325.0, 53326.0, 53327.0, 53328.0],       [53329.0, 53330.0, 53331.0, 53332.0, 53333.0, 53334.0],       [53335.0, 53336.0, 53337.0, 53338.0, 53339.0, 53340.0]],      [[53341.0, 53342.0, 53343.0, 53344.0, 53345.0, 53346.0],       [53347.0, 53348.0, 53349.0, 53350.0, 53351.0, 53352.0],       [53353.0, 53354.0, 53355.0, 53356.0, 53357.0, 53358.0],       [53359.0, 53360.0, 53361.0, 53362.0, 53363.0, 53364.0],       [53365.0, 53366.0, 53367.0, 53368.0, 53369.0, 53370.0],       [53371.0, 53372.0, 53373.0, 53374.0, 53375.0, 53376.0],       [53377.0, 53378.0, 53379.0, 53380.0, 53381.0, 53382.0]],      [[53383.0, 53384.0, 53385.0, 53386.0, 53387.0, 53388.0],       [53389.0, 53390.0, 53391.0, 53392.0, 53393.0, 53394.0],       [53395.0, 53396.0, 53397.0, 53398.0, 53399.0, 53400.0],       [53401.0, 53402.0, 53403.0, 53404.0, 53405.0, 53406.0],       [53407.0, 53408.0, 53409.0, 53410.0, 53411.0, 53412.0],       [53413.0, 53414.0, 53415.0, 53416.0, 53417.0, 53418.0],       [53419.0, 53420.0, 53421.0, 53422.0, 53423.0, 53424.0]]]],    [[[[53425.0, 53426.0, 53427.0, 53428.0, 53429.0, 53430.0],       [53431.0, 53432.0, 53433.0, 53434.0, 53435.0, 53436.0],       [53437.0, 53438.0, 53439.0, 53440.0, 53441.0, 53442.0],       [53443.0, 53444.0, 53445.0, 53446.0, 53447.0, 53448.0],       [53449.0, 53450.0, 53451.0, 53452.0, 53453.0, 53454.0],       [53455.0, 53456.0, 53457.0, 53458.0, 53459.0, 53460.0],       [53461.0, 53462.0, 53463.0, 53464.0, 53465.0, 53466.0]],      [[53467.0, 53468.0, 53469.0, 53470.0, 53471.0, 53472.0],       [53473.0, 53474.0, 53475.0, 53476.0, 53477.0, 53478.0],       [53479.0, 53480.0, 53481.0, 53482.0, 53483.0, 53484.0],       [53485.0, 53486.0, 53487.0, 53488.0, 53489.0, 53490.0],       [53491.0, 53492.0, 53493.0, 53494.0, 53495.0, 53496.0],       [53497.0, 53498.0, 53499.0, 53500.0, 53501.0, 53502.0],       [53503.0, 53504.0, 53505.0, 53506.0, 53507.0, 53508.0]],      [[53509.0, 53510.0, 53511.0, 53512.0, 53513.0, 53514.0],       [53515.0, 53516.0, 53517.0, 53518.0, 53519.0, 53520.0],       [53521.0, 53522.0, 53523.0, 53524.0, 53525.0, 53526.0],       [53527.0, 53528.0, 53529.0, 53530.0, 53531.0, 53532.0],       [53533.0, 53534.0, 53535.0, 53536.0, 53537.0, 53538.0],       [53539.0, 53540.0, 53541.0, 53542.0, 53543.0, 53544.0],       [53545.0, 53546.0, 53547.0, 53548.0, 53549.0, 53550.0]],      [[53551.0, 53552.0, 53553.0, 53554.0, 53555.0, 53556.0],       [53557.0, 53558.0, 53559.0, 53560.0, 53561.0, 53562.0],       [53563.0, 53564.0, 53565.0, 53566.0, 53567.0, 53568.0],       [53569.0, 53570.0, 53571.0, 53572.0, 53573.0, 53574.0],       [53575.0, 53576.0, 53577.0, 53578.0, 53579.0, 53580.0],       [53581.0, 53582.0, 53583.0, 53584.0, 53585.0, 53586.0],       [53587.0, 53588.0, 53589.0, 53590.0, 53591.0, 53592.0]],      [[53593.0, 53594.0, 53595.0, 53596.0, 53597.0, 53598.0],       [53599.0, 53600.0, 53601.0, 53602.0, 53603.0, 53604.0],       [53605.0, 53606.0, 53607.0, 53608.0, 53609.0, 53610.0],       [53611.0, 53612.0, 53613.0, 53614.0, 53615.0, 53616.0],       [53617.0, 53618.0, 53619.0, 53620.0, 53621.0, 53622.0],       [53623.0, 53624.0, 53625.0, 53626.0, 53627.0, 53628.0],       [53629.0, 53630.0, 53631.0, 53632.0, 53633.0, 53634.0]],      [[53635.0, 53636.0, 53637.0, 53638.0, 53639.0, 53640.0],       [53641.0, 53642.0, 53643.0, 53644.0, 53645.0, 53646.0],       [53647.0, 53648.0, 53649.0, 53650.0, 53651.0, 53652.0],       [53653.0, 53654.0, 53655.0, 53656.0, 53657.0, 53658.0],       [53659.0, 53660.0, 53661.0, 53662.0, 53663.0, 53664.0],       [53665.0, 53666.0, 53667.0, 53668.0, 53669.0, 53670.0],       [53671.0, 53672.0, 53673.0, 53674.0, 53675.0, 53676.0]]],     [[[53677.0, 53678.0, 53679.0, 53680.0, 53681.0, 53682.0],       [53683.0, 53684.0, 53685.0, 53686.0, 53687.0, 53688.0],       [53689.0, 53690.0, 53691.0, 53692.0, 53693.0, 53694.0],       [53695.0, 53696.0, 53697.0, 53698.0, 53699.0, 53700.0],       [53701.0, 53702.0, 53703.0, 53704.0, 53705.0, 53706.0],       [53707.0, 53708.0, 53709.0, 53710.0, 53711.0, 53712.0],       [53713.0, 53714.0, 53715.0, 53716.0, 53717.0, 53718.0]],      [[53719.0, 53720.0, 53721.0, 53722.0, 53723.0, 53724.0],       [53725.0, 53726.0, 53727.0, 53728.0, 53729.0, 53730.0],       [53731.0, 53732.0, 53733.0, 53734.0, 53735.0, 53736.0],       [53737.0, 53738.0, 53739.0, 53740.0, 53741.0, 53742.0],       [53743.0, 53744.0, 53745.0, 53746.0, 53747.0, 53748.0],       [53749.0, 53750.0, 53751.0, 53752.0, 53753.0, 53754.0],       [53755.0, 53756.0, 53757.0, 53758.0, 53759.0, 53760.0]],      [[53761.0, 53762.0, 53763.0, 53764.0, 53765.0, 53766.0],       [53767.0, 53768.0, 53769.0, 53770.0, 53771.0, 53772.0],       [53773.0, 53774.0, 53775.0, 53776.0, 53777.0, 53778.0],       [53779.0, 53780.0, 53781.0, 53782.0, 53783.0, 53784.0],       [53785.0, 53786.0, 53787.0, 53788.0, 53789.0, 53790.0],       [53791.0, 53792.0, 53793.0, 53794.0, 53795.0, 53796.0],       [53797.0, 53798.0, 53799.0, 53800.0, 53801.0, 53802.0]],      [[53803.0, 53804.0, 53805.0, 53806.0, 53807.0, 53808.0],       [53809.0, 53810.0, 53811.0, 53812.0, 53813.0, 53814.0],       [53815.0, 53816.0, 53817.0, 53818.0, 53819.0, 53820.0],       [53821.0, 53822.0, 53823.0, 53824.0, 53825.0, 53826.0],       [53827.0, 53828.0, 53829.0, 53830.0, 53831.0, 53832.0],       [53833.0, 53834.0, 53835.0, 53836.0, 53837.0, 53838.0],       [53839.0, 53840.0, 53841.0, 53842.0, 53843.0, 53844.0]],      [[53845.0, 53846.0, 53847.0, 53848.0, 53849.0, 53850.0],       [53851.0, 53852.0, 53853.0, 53854.0, 53855.0, 53856.0],       [53857.0, 53858.0, 53859.0, 53860.0, 53861.0, 53862.0],       [53863.0, 53864.0, 53865.0, 53866.0, 53867.0, 53868.0],       [53869.0, 53870.0, 53871.0, 53872.0, 53873.0, 53874.0],       [53875.0, 53876.0, 53877.0, 53878.0, 53879.0, 53880.0],       [53881.0, 53882.0, 53883.0, 53884.0, 53885.0, 53886.0]],      [[53887.0, 53888.0, 53889.0, 53890.0, 53891.0, 53892.0],       [53893.0, 53894.0, 53895.0, 53896.0, 53897.0, 53898.0],       [53899.0, 53900.0, 53901.0, 53902.0, 53903.0, 53904.0],       [53905.0, 53906.0, 53907.0, 53908.0, 53909.0, 53910.0],       [53911.0, 53912.0, 53913.0, 53914.0, 53915.0, 53916.0],       [53917.0, 53918.0, 53919.0, 53920.0, 53921.0, 53922.0],       [53923.0, 53924.0, 53925.0, 53926.0, 53927.0, 53928.0]]],     [[[53929.0, 53930.0, 53931.0, 53932.0, 53933.0, 53934.0],       [53935.0, 53936.0, 53937.0, 53938.0, 53939.0, 53940.0],       [53941.0, 53942.0, 53943.0, 53944.0, 53945.0, 53946.0],       [53947.0, 53948.0, 53949.0, 53950.0, 53951.0, 53952.0],       [53953.0, 53954.0, 53955.0, 53956.0, 53957.0, 53958.0],       [53959.0, 53960.0, 53961.0, 53962.0, 53963.0, 53964.0],       [53965.0, 53966.0, 53967.0, 53968.0, 53969.0, 53970.0]],      [[53971.0, 53972.0, 53973.0, 53974.0, 53975.0, 53976.0],       [53977.0, 53978.0, 53979.0, 53980.0, 53981.0, 53982.0],       [53983.0, 53984.0, 53985.0, 53986.0, 53987.0, 53988.0],       [53989.0, 53990.0, 53991.0, 53992.0, 53993.0, 53994.0],       [53995.0, 53996.0, 53997.0, 53998.0, 53999.0, 54000.0],       [54001.0, 54002.0, 54003.0, 54004.0, 54005.0, 54006.0],       [54007.0, 54008.0, 54009.0, 54010.0, 54011.0, 54012.0]],      [[54013.0, 54014.0, 54015.0, 54016.0, 54017.0, 54018.0],       [54019.0, 54020.0, 54021.0, 54022.0, 54023.0, 54024.0],       [54025.0, 54026.0, 54027.0, 54028.0, 54029.0, 54030.0],       [54031.0, 54032.0, 54033.0, 54034.0, 54035.0, 54036.0],       [54037.0, 54038.0, 54039.0, 54040.0, 54041.0, 54042.0],       [54043.0, 54044.0, 54045.0, 54046.0, 54047.0, 54048.0],       [54049.0, 54050.0, 54051.0, 54052.0, 54053.0, 54054.0]],      [[54055.0, 54056.0, 54057.0, 54058.0, 54059.0, 54060.0],       [54061.0, 54062.0, 54063.0, 54064.0, 54065.0, 54066.0],       [54067.0, 54068.0, 54069.0, 54070.0, 54071.0, 54072.0],       [54073.0, 54074.0, 54075.0, 54076.0, 54077.0, 54078.0],       [54079.0, 54080.0, 54081.0, 54082.0, 54083.0, 54084.0],       [54085.0, 54086.0, 54087.0, 54088.0, 54089.0, 54090.0],       [54091.0, 54092.0, 54093.0, 54094.0, 54095.0, 54096.0]],      [[54097.0, 54098.0, 54099.0, 54100.0, 54101.0, 54102.0],       [54103.0, 54104.0, 54105.0, 54106.0, 54107.0, 54108.0],       [54109.0, 54110.0, 54111.0, 54112.0, 54113.0, 54114.0],       [54115.0, 54116.0, 54117.0, 54118.0, 54119.0, 54120.0],       [54121.0, 54122.0, 54123.0, 54124.0, 54125.0, 54126.0],       [54127.0, 54128.0, 54129.0, 54130.0, 54131.0, 54132.0],       [54133.0, 54134.0, 54135.0, 54136.0, 54137.0, 54138.0]],      [[54139.0, 54140.0, 54141.0, 54142.0, 54143.0, 54144.0],       [54145.0, 54146.0, 54147.0, 54148.0, 54149.0, 54150.0],       [54151.0, 54152.0, 54153.0, 54154.0, 54155.0, 54156.0],       [54157.0, 54158.0, 54159.0, 54160.0, 54161.0, 54162.0],       [54163.0, 54164.0, 54165.0, 54166.0, 54167.0, 54168.0],       [54169.0, 54170.0, 54171.0, 54172.0, 54173.0, 54174.0],       [54175.0, 54176.0, 54177.0, 54178.0, 54179.0, 54180.0]]],     [[[54181.0, 54182.0, 54183.0, 54184.0, 54185.0, 54186.0],       [54187.0, 54188.0, 54189.0, 54190.0, 54191.0, 54192.0],       [54193.0, 54194.0, 54195.0, 54196.0, 54197.0, 54198.0],       [54199.0, 54200.0, 54201.0, 54202.0, 54203.0, 54204.0],       [54205.0, 54206.0, 54207.0, 54208.0, 54209.0, 54210.0],       [54211.0, 54212.0, 54213.0, 54214.0, 54215.0, 54216.0],       [54217.0, 54218.0, 54219.0, 54220.0, 54221.0, 54222.0]],      [[54223.0, 54224.0, 54225.0, 54226.0, 54227.0, 54228.0],       [54229.0, 54230.0, 54231.0, 54232.0, 54233.0, 54234.0],       [54235.0, 54236.0, 54237.0, 54238.0, 54239.0, 54240.0],       [54241.0, 54242.0, 54243.0, 54244.0, 54245.0, 54246.0],       [54247.0, 54248.0, 54249.0, 54250.0, 54251.0, 54252.0],       [54253.0, 54254.0, 54255.0, 54256.0, 54257.0, 54258.0],       [54259.0, 54260.0, 54261.0, 54262.0, 54263.0, 54264.0]],      [[54265.0, 54266.0, 54267.0, 54268.0, 54269.0, 54270.0],       [54271.0, 54272.0, 54273.0, 54274.0, 54275.0, 54276.0],       [54277.0, 54278.0, 54279.0, 54280.0, 54281.0, 54282.0],       [54283.0, 54284.0, 54285.0, 54286.0, 54287.0, 54288.0],       [54289.0, 54290.0, 54291.0, 54292.0, 54293.0, 54294.0],       [54295.0, 54296.0, 54297.0, 54298.0, 54299.0, 54300.0],       [54301.0, 54302.0, 54303.0, 54304.0, 54305.0, 54306.0]],      [[54307.0, 54308.0, 54309.0, 54310.0, 54311.0, 54312.0],       [54313.0, 54314.0, 54315.0, 54316.0, 54317.0, 54318.0],       [54319.0, 54320.0, 54321.0, 54322.0, 54323.0, 54324.0],       [54325.0, 54326.0, 54327.0, 54328.0, 54329.0, 54330.0],       [54331.0, 54332.0, 54333.0, 54334.0, 54335.0, 54336.0],       [54337.0, 54338.0, 54339.0, 54340.0, 54341.0, 54342.0],       [54343.0, 54344.0, 54345.0, 54346.0, 54347.0, 54348.0]],      [[54349.0, 54350.0, 54351.0, 54352.0, 54353.0, 54354.0],       [54355.0, 54356.0, 54357.0, 54358.0, 54359.0, 54360.0],       [54361.0, 54362.0, 54363.0, 54364.0, 54365.0, 54366.0],       [54367.0, 54368.0, 54369.0, 54370.0, 54371.0, 54372.0],       [54373.0, 54374.0, 54375.0, 54376.0, 54377.0, 54378.0],       [54379.0, 54380.0, 54381.0, 54382.0, 54383.0, 54384.0],       [54385.0, 54386.0, 54387.0, 54388.0, 54389.0, 54390.0]],      [[54391.0, 54392.0, 54393.0, 54394.0, 54395.0, 54396.0],       [54397.0, 54398.0, 54399.0, 54400.0, 54401.0, 54402.0],       [54403.0, 54404.0, 54405.0, 54406.0, 54407.0, 54408.0],       [54409.0, 54410.0, 54411.0, 54412.0, 54413.0, 54414.0],       [54415.0, 54416.0, 54417.0, 54418.0, 54419.0, 54420.0],       [54421.0, 54422.0, 54423.0, 54424.0, 54425.0, 54426.0],       [54427.0, 54428.0, 54429.0, 54430.0, 54431.0, 54432.0]]]]],   [[[[[54433.0, 54434.0, 54435.0, 54436.0, 54437.0, 54438.0],       [54439.0, 54440.0, 54441.0, 54442.0, 54443.0, 54444.0],       [54445.0, 54446.0, 54447.0, 54448.0, 54449.0, 54450.0],       [54451.0, 54452.0, 54453.0, 54454.0, 54455.0, 54456.0],       [54457.0, 54458.0, 54459.0, 54460.0, 54461.0, 54462.0],       [54463.0, 54464.0, 54465.0, 54466.0, 54467.0, 54468.0],       [54469.0, 54470.0, 54471.0, 54472.0, 54473.0, 54474.0]],      [[54475.0, 54476.0, 54477.0, 54478.0, 54479.0, 54480.0],       [54481.0, 54482.0, 54483.0, 54484.0, 54485.0, 54486.0],       [54487.0, 54488.0, 54489.0, 54490.0, 54491.0, 54492.0],       [54493.0, 54494.0, 54495.0, 54496.0, 54497.0, 54498.0],       [54499.0, 54500.0, 54501.0, 54502.0, 54503.0, 54504.0],       [54505.0, 54506.0, 54507.0, 54508.0, 54509.0, 54510.0],       [54511.0, 54512.0, 54513.0, 54514.0, 54515.0, 54516.0]],      [[54517.0, 54518.0, 54519.0, 54520.0, 54521.0, 54522.0],       [54523.0, 54524.0, 54525.0, 54526.0, 54527.0, 54528.0],       [54529.0, 54530.0, 54531.0, 54532.0, 54533.0, 54534.0],       [54535.0, 54536.0, 54537.0, 54538.0, 54539.0, 54540.0],       [54541.0, 54542.0, 54543.0, 54544.0, 54545.0, 54546.0],       [54547.0, 54548.0, 54549.0, 54550.0, 54551.0, 54552.0],       [54553.0, 54554.0, 54555.0, 54556.0, 54557.0, 54558.0]],      [[54559.0, 54560.0, 54561.0, 54562.0, 54563.0, 54564.0],       [54565.0, 54566.0, 54567.0, 54568.0, 54569.0, 54570.0],       [54571.0, 54572.0, 54573.0, 54574.0, 54575.0, 54576.0],       [54577.0, 54578.0, 54579.0, 54580.0, 54581.0, 54582.0],       [54583.0, 54584.0, 54585.0, 54586.0, 54587.0, 54588.0],       [54589.0, 54590.0, 54591.0, 54592.0, 54593.0, 54594.0],       [54595.0, 54596.0, 54597.0, 54598.0, 54599.0, 54600.0]],      [[54601.0, 54602.0, 54603.0, 54604.0, 54605.0, 54606.0],       [54607.0, 54608.0, 54609.0, 54610.0, 54611.0, 54612.0],       [54613.0, 54614.0, 54615.0, 54616.0, 54617.0, 54618.0],       [54619.0, 54620.0, 54621.0, 54622.0, 54623.0, 54624.0],       [54625.0, 54626.0, 54627.0, 54628.0, 54629.0, 54630.0],       [54631.0, 54632.0, 54633.0, 54634.0, 54635.0, 54636.0],       [54637.0, 54638.0, 54639.0, 54640.0, 54641.0, 54642.0]],      [[54643.0, 54644.0, 54645.0, 54646.0, 54647.0, 54648.0],       [54649.0, 54650.0, 54651.0, 54652.0, 54653.0, 54654.0],       [54655.0, 54656.0, 54657.0, 54658.0, 54659.0, 54660.0],       [54661.0, 54662.0, 54663.0, 54664.0, 54665.0, 54666.0],       [54667.0, 54668.0, 54669.0, 54670.0, 54671.0, 54672.0],       [54673.0, 54674.0, 54675.0, 54676.0, 54677.0, 54678.0],       [54679.0, 54680.0, 54681.0, 54682.0, 54683.0, 54684.0]]],     [[[54685.0, 54686.0, 54687.0, 54688.0, 54689.0, 54690.0],       [54691.0, 54692.0, 54693.0, 54694.0, 54695.0, 54696.0],       [54697.0, 54698.0, 54699.0, 54700.0, 54701.0, 54702.0],       [54703.0, 54704.0, 54705.0, 54706.0, 54707.0, 54708.0],       [54709.0, 54710.0, 54711.0, 54712.0, 54713.0, 54714.0],       [54715.0, 54716.0, 54717.0, 54718.0, 54719.0, 54720.0],       [54721.0, 54722.0, 54723.0, 54724.0, 54725.0, 54726.0]],      [[54727.0, 54728.0, 54729.0, 54730.0, 54731.0, 54732.0],       [54733.0, 54734.0, 54735.0, 54736.0, 54737.0, 54738.0],       [54739.0, 54740.0, 54741.0, 54742.0, 54743.0, 54744.0],       [54745.0, 54746.0, 54747.0, 54748.0, 54749.0, 54750.0],       [54751.0, 54752.0, 54753.0, 54754.0, 54755.0, 54756.0],       [54757.0, 54758.0, 54759.0, 54760.0, 54761.0, 54762.0],       [54763.0, 54764.0, 54765.0, 54766.0, 54767.0, 54768.0]],      [[54769.0, 54770.0, 54771.0, 54772.0, 54773.0, 54774.0],       [54775.0, 54776.0, 54777.0, 54778.0, 54779.0, 54780.0],       [54781.0, 54782.0, 54783.0, 54784.0, 54785.0, 54786.0],       [54787.0, 54788.0, 54789.0, 54790.0, 54791.0, 54792.0],       [54793.0, 54794.0, 54795.0, 54796.0, 54797.0, 54798.0],       [54799.0, 54800.0, 54801.0, 54802.0, 54803.0, 54804.0],       [54805.0, 54806.0, 54807.0, 54808.0, 54809.0, 54810.0]],      [[54811.0, 54812.0, 54813.0, 54814.0, 54815.0, 54816.0],       [54817.0, 54818.0, 54819.0, 54820.0, 54821.0, 54822.0],       [54823.0, 54824.0, 54825.0, 54826.0, 54827.0, 54828.0],       [54829.0, 54830.0, 54831.0, 54832.0, 54833.0, 54834.0],       [54835.0, 54836.0, 54837.0, 54838.0, 54839.0, 54840.0],       [54841.0, 54842.0, 54843.0, 54844.0, 54845.0, 54846.0],       [54847.0, 54848.0, 54849.0, 54850.0, 54851.0, 54852.0]],      [[54853.0, 54854.0, 54855.0, 54856.0, 54857.0, 54858.0],       [54859.0, 54860.0, 54861.0, 54862.0, 54863.0, 54864.0],       [54865.0, 54866.0, 54867.0, 54868.0, 54869.0, 54870.0],       [54871.0, 54872.0, 54873.0, 54874.0, 54875.0, 54876.0],       [54877.0, 54878.0, 54879.0, 54880.0, 54881.0, 54882.0],       [54883.0, 54884.0, 54885.0, 54886.0, 54887.0, 54888.0],       [54889.0, 54890.0, 54891.0, 54892.0, 54893.0, 54894.0]],      [[54895.0, 54896.0, 54897.0, 54898.0, 54899.0, 54900.0],       [54901.0, 54902.0, 54903.0, 54904.0, 54905.0, 54906.0],       [54907.0, 54908.0, 54909.0, 54910.0, 54911.0, 54912.0],       [54913.0, 54914.0, 54915.0, 54916.0, 54917.0, 54918.0],       [54919.0, 54920.0, 54921.0, 54922.0, 54923.0, 54924.0],       [54925.0, 54926.0, 54927.0, 54928.0, 54929.0, 54930.0],       [54931.0, 54932.0, 54933.0, 54934.0, 54935.0, 54936.0]]],     [[[54937.0, 54938.0, 54939.0, 54940.0, 54941.0, 54942.0],       [54943.0, 54944.0, 54945.0, 54946.0, 54947.0, 54948.0],       [54949.0, 54950.0, 54951.0, 54952.0, 54953.0, 54954.0],       [54955.0, 54956.0, 54957.0, 54958.0, 54959.0, 54960.0],       [54961.0, 54962.0, 54963.0, 54964.0, 54965.0, 54966.0],       [54967.0, 54968.0, 54969.0, 54970.0, 54971.0, 54972.0],       [54973.0, 54974.0, 54975.0, 54976.0, 54977.0, 54978.0]],      [[54979.0, 54980.0, 54981.0, 54982.0, 54983.0, 54984.0],       [54985.0, 54986.0, 54987.0, 54988.0, 54989.0, 54990.0],       [54991.0, 54992.0, 54993.0, 54994.0, 54995.0, 54996.0],       [54997.0, 54998.0, 54999.0, 55000.0, 55001.0, 55002.0],       [55003.0, 55004.0, 55005.0, 55006.0, 55007.0, 55008.0],       [55009.0, 55010.0, 55011.0, 55012.0, 55013.0, 55014.0],       [55015.0, 55016.0, 55017.0, 55018.0, 55019.0, 55020.0]],      [[55021.0, 55022.0, 55023.0, 55024.0, 55025.0, 55026.0],       [55027.0, 55028.0, 55029.0, 55030.0, 55031.0, 55032.0],       [55033.0, 55034.0, 55035.0, 55036.0, 55037.0, 55038.0],       [55039.0, 55040.0, 55041.0, 55042.0, 55043.0, 55044.0],       [55045.0, 55046.0, 55047.0, 55048.0, 55049.0, 55050.0],       [55051.0, 55052.0, 55053.0, 55054.0, 55055.0, 55056.0],       [55057.0, 55058.0, 55059.0, 55060.0, 55061.0, 55062.0]],      [[55063.0, 55064.0, 55065.0, 55066.0, 55067.0, 55068.0],       [55069.0, 55070.0, 55071.0, 55072.0, 55073.0, 55074.0],       [55075.0, 55076.0, 55077.0, 55078.0, 55079.0, 55080.0],       [55081.0, 55082.0, 55083.0, 55084.0, 55085.0, 55086.0],       [55087.0, 55088.0, 55089.0, 55090.0, 55091.0, 55092.0],       [55093.0, 55094.0, 55095.0, 55096.0, 55097.0, 55098.0],       [55099.0, 55100.0, 55101.0, 55102.0, 55103.0, 55104.0]],      [[55105.0, 55106.0, 55107.0, 55108.0, 55109.0, 55110.0],       [55111.0, 55112.0, 55113.0, 55114.0, 55115.0, 55116.0],       [55117.0, 55118.0, 55119.0, 55120.0, 55121.0, 55122.0],       [55123.0, 55124.0, 55125.0, 55126.0, 55127.0, 55128.0],       [55129.0, 55130.0, 55131.0, 55132.0, 55133.0, 55134.0],       [55135.0, 55136.0, 55137.0, 55138.0, 55139.0, 55140.0],       [55141.0, 55142.0, 55143.0, 55144.0, 55145.0, 55146.0]],      [[55147.0, 55148.0, 55149.0, 55150.0, 55151.0, 55152.0],       [55153.0, 55154.0, 55155.0, 55156.0, 55157.0, 55158.0],       [55159.0, 55160.0, 55161.0, 55162.0, 55163.0, 55164.0],       [55165.0, 55166.0, 55167.0, 55168.0, 55169.0, 55170.0],       [55171.0, 55172.0, 55173.0, 55174.0, 55175.0, 55176.0],       [55177.0, 55178.0, 55179.0, 55180.0, 55181.0, 55182.0],       [55183.0, 55184.0, 55185.0, 55186.0, 55187.0, 55188.0]]],     [[[55189.0, 55190.0, 55191.0, 55192.0, 55193.0, 55194.0],       [55195.0, 55196.0, 55197.0, 55198.0, 55199.0, 55200.0],       [55201.0, 55202.0, 55203.0, 55204.0, 55205.0, 55206.0],       [55207.0, 55208.0, 55209.0, 55210.0, 55211.0, 55212.0],       [55213.0, 55214.0, 55215.0, 55216.0, 55217.0, 55218.0],       [55219.0, 55220.0, 55221.0, 55222.0, 55223.0, 55224.0],       [55225.0, 55226.0, 55227.0, 55228.0, 55229.0, 55230.0]],      [[55231.0, 55232.0, 55233.0, 55234.0, 55235.0, 55236.0],       [55237.0, 55238.0, 55239.0, 55240.0, 55241.0, 55242.0],       [55243.0, 55244.0, 55245.0, 55246.0, 55247.0, 55248.0],       [55249.0, 55250.0, 55251.0, 55252.0, 55253.0, 55254.0],       [55255.0, 55256.0, 55257.0, 55258.0, 55259.0, 55260.0],       [55261.0, 55262.0, 55263.0, 55264.0, 55265.0, 55266.0],       [55267.0, 55268.0, 55269.0, 55270.0, 55271.0, 55272.0]],      [[55273.0, 55274.0, 55275.0, 55276.0, 55277.0, 55278.0],       [55279.0, 55280.0, 55281.0, 55282.0, 55283.0, 55284.0],       [55285.0, 55286.0, 55287.0, 55288.0, 55289.0, 55290.0],       [55291.0, 55292.0, 55293.0, 55294.0, 55295.0, 55296.0],       [55297.0, 55298.0, 55299.0, 55300.0, 55301.0, 55302.0],       [55303.0, 55304.0, 55305.0, 55306.0, 55307.0, 55308.0],       [55309.0, 55310.0, 55311.0, 55312.0, 55313.0, 55314.0]],      [[55315.0, 55316.0, 55317.0, 55318.0, 55319.0, 55320.0],       [55321.0, 55322.0, 55323.0, 55324.0, 55325.0, 55326.0],       [55327.0, 55328.0, 55329.0, 55330.0, 55331.0, 55332.0],       [55333.0, 55334.0, 55335.0, 55336.0, 55337.0, 55338.0],       [55339.0, 55340.0, 55341.0, 55342.0, 55343.0, 55344.0],       [55345.0, 55346.0, 55347.0, 55348.0, 55349.0, 55350.0],       [55351.0, 55352.0, 55353.0, 55354.0, 55355.0, 55356.0]],      [[55357.0, 55358.0, 55359.0, 55360.0, 55361.0, 55362.0],       [55363.0, 55364.0, 55365.0, 55366.0, 55367.0, 55368.0],       [55369.0, 55370.0, 55371.0, 55372.0, 55373.0, 55374.0],       [55375.0, 55376.0, 55377.0, 55378.0, 55379.0, 55380.0],       [55381.0, 55382.0, 55383.0, 55384.0, 55385.0, 55386.0],       [55387.0, 55388.0, 55389.0, 55390.0, 55391.0, 55392.0],       [55393.0, 55394.0, 55395.0, 55396.0, 55397.0, 55398.0]],      [[55399.0, 55400.0, 55401.0, 55402.0, 55403.0, 55404.0],       [55405.0, 55406.0, 55407.0, 55408.0, 55409.0, 55410.0],       [55411.0, 55412.0, 55413.0, 55414.0, 55415.0, 55416.0],       [55417.0, 55418.0, 55419.0, 55420.0, 55421.0, 55422.0],       [55423.0, 55424.0, 55425.0, 55426.0, 55427.0, 55428.0],       [55429.0, 55430.0, 55431.0, 55432.0, 55433.0, 55434.0],       [55435.0, 55436.0, 55437.0, 55438.0, 55439.0, 55440.0]]]],    [[[[55441.0, 55442.0, 55443.0, 55444.0, 55445.0, 55446.0],       [55447.0, 55448.0, 55449.0, 55450.0, 55451.0, 55452.0],       [55453.0, 55454.0, 55455.0, 55456.0, 55457.0, 55458.0],       [55459.0, 55460.0, 55461.0, 55462.0, 55463.0, 55464.0],       [55465.0, 55466.0, 55467.0, 55468.0, 55469.0, 55470.0],       [55471.0, 55472.0, 55473.0, 55474.0, 55475.0, 55476.0],       [55477.0, 55478.0, 55479.0, 55480.0, 55481.0, 55482.0]],      [[55483.0, 55484.0, 55485.0, 55486.0, 55487.0, 55488.0],       [55489.0, 55490.0, 55491.0, 55492.0, 55493.0, 55494.0],       [55495.0, 55496.0, 55497.0, 55498.0, 55499.0, 55500.0],       [55501.0, 55502.0, 55503.0, 55504.0, 55505.0, 55506.0],       [55507.0, 55508.0, 55509.0, 55510.0, 55511.0, 55512.0],       [55513.0, 55514.0, 55515.0, 55516.0, 55517.0, 55518.0],       [55519.0, 55520.0, 55521.0, 55522.0, 55523.0, 55524.0]],      [[55525.0, 55526.0, 55527.0, 55528.0, 55529.0, 55530.0],       [55531.0, 55532.0, 55533.0, 55534.0, 55535.0, 55536.0],       [55537.0, 55538.0, 55539.0, 55540.0, 55541.0, 55542.0],       [55543.0, 55544.0, 55545.0, 55546.0, 55547.0, 55548.0],       [55549.0, 55550.0, 55551.0, 55552.0, 55553.0, 55554.0],       [55555.0, 55556.0, 55557.0, 55558.0, 55559.0, 55560.0],       [55561.0, 55562.0, 55563.0, 55564.0, 55565.0, 55566.0]],      [[55567.0, 55568.0, 55569.0, 55570.0, 55571.0, 55572.0],       [55573.0, 55574.0, 55575.0, 55576.0, 55577.0, 55578.0],       [55579.0, 55580.0, 55581.0, 55582.0, 55583.0, 55584.0],       [55585.0, 55586.0, 55587.0, 55588.0, 55589.0, 55590.0],       [55591.0, 55592.0, 55593.0, 55594.0, 55595.0, 55596.0],       [55597.0, 55598.0, 55599.0, 55600.0, 55601.0, 55602.0],       [55603.0, 55604.0, 55605.0, 55606.0, 55607.0, 55608.0]],      [[55609.0, 55610.0, 55611.0, 55612.0, 55613.0, 55614.0],       [55615.0, 55616.0, 55617.0, 55618.0, 55619.0, 55620.0],       [55621.0, 55622.0, 55623.0, 55624.0, 55625.0, 55626.0],       [55627.0, 55628.0, 55629.0, 55630.0, 55631.0, 55632.0],       [55633.0, 55634.0, 55635.0, 55636.0, 55637.0, 55638.0],       [55639.0, 55640.0, 55641.0, 55642.0, 55643.0, 55644.0],       [55645.0, 55646.0, 55647.0, 55648.0, 55649.0, 55650.0]],      [[55651.0, 55652.0, 55653.0, 55654.0, 55655.0, 55656.0],       [55657.0, 55658.0, 55659.0, 55660.0, 55661.0, 55662.0],       [55663.0, 55664.0, 55665.0, 55666.0, 55667.0, 55668.0],       [55669.0, 55670.0, 55671.0, 55672.0, 55673.0, 55674.0],       [55675.0, 55676.0, 55677.0, 55678.0, 55679.0, 55680.0],       [55681.0, 55682.0, 55683.0, 55684.0, 55685.0, 55686.0],       [55687.0, 55688.0, 55689.0, 55690.0, 55691.0, 55692.0]]],     [[[55693.0, 55694.0, 55695.0, 55696.0, 55697.0, 55698.0],       [55699.0, 55700.0, 55701.0, 55702.0, 55703.0, 55704.0],       [55705.0, 55706.0, 55707.0, 55708.0, 55709.0, 55710.0],       [55711.0, 55712.0, 55713.0, 55714.0, 55715.0, 55716.0],       [55717.0, 55718.0, 55719.0, 55720.0, 55721.0, 55722.0],       [55723.0, 55724.0, 55725.0, 55726.0, 55727.0, 55728.0],       [55729.0, 55730.0, 55731.0, 55732.0, 55733.0, 55734.0]],      [[55735.0, 55736.0, 55737.0, 55738.0, 55739.0, 55740.0],       [55741.0, 55742.0, 55743.0, 55744.0, 55745.0, 55746.0],       [55747.0, 55748.0, 55749.0, 55750.0, 55751.0, 55752.0],       [55753.0, 55754.0, 55755.0, 55756.0, 55757.0, 55758.0],       [55759.0, 55760.0, 55761.0, 55762.0, 55763.0, 55764.0],       [55765.0, 55766.0, 55767.0, 55768.0, 55769.0, 55770.0],       [55771.0, 55772.0, 55773.0, 55774.0, 55775.0, 55776.0]],      [[55777.0, 55778.0, 55779.0, 55780.0, 55781.0, 55782.0],       [55783.0, 55784.0, 55785.0, 55786.0, 55787.0, 55788.0],       [55789.0, 55790.0, 55791.0, 55792.0, 55793.0, 55794.0],       [55795.0, 55796.0, 55797.0, 55798.0, 55799.0, 55800.0],       [55801.0, 55802.0, 55803.0, 55804.0, 55805.0, 55806.0],       [55807.0, 55808.0, 55809.0, 55810.0, 55811.0, 55812.0],       [55813.0, 55814.0, 55815.0, 55816.0, 55817.0, 55818.0]],      [[55819.0, 55820.0, 55821.0, 55822.0, 55823.0, 55824.0],       [55825.0, 55826.0, 55827.0, 55828.0, 55829.0, 55830.0],       [55831.0, 55832.0, 55833.0, 55834.0, 55835.0, 55836.0],       [55837.0, 55838.0, 55839.0, 55840.0, 55841.0, 55842.0],       [55843.0, 55844.0, 55845.0, 55846.0, 55847.0, 55848.0],       [55849.0, 55850.0, 55851.0, 55852.0, 55853.0, 55854.0],       [55855.0, 55856.0, 55857.0, 55858.0, 55859.0, 55860.0]],      [[55861.0, 55862.0, 55863.0, 55864.0, 55865.0, 55866.0],       [55867.0, 55868.0, 55869.0, 55870.0, 55871.0, 55872.0],       [55873.0, 55874.0, 55875.0, 55876.0, 55877.0, 55878.0],       [55879.0, 55880.0, 55881.0, 55882.0, 55883.0, 55884.0],       [55885.0, 55886.0, 55887.0, 55888.0, 55889.0, 55890.0],       [55891.0, 55892.0, 55893.0, 55894.0, 55895.0, 55896.0],       [55897.0, 55898.0, 55899.0, 55900.0, 55901.0, 55902.0]],      [[55903.0, 55904.0, 55905.0, 55906.0, 55907.0, 55908.0],       [55909.0, 55910.0, 55911.0, 55912.0, 55913.0, 55914.0],       [55915.0, 55916.0, 55917.0, 55918.0, 55919.0, 55920.0],       [55921.0, 55922.0, 55923.0, 55924.0, 55925.0, 55926.0],       [55927.0, 55928.0, 55929.0, 55930.0, 55931.0, 55932.0],       [55933.0, 55934.0, 55935.0, 55936.0, 55937.0, 55938.0],       [55939.0, 55940.0, 55941.0, 55942.0, 55943.0, 55944.0]]],     [[[55945.0, 55946.0, 55947.0, 55948.0, 55949.0, 55950.0],       [55951.0, 55952.0, 55953.0, 55954.0, 55955.0, 55956.0],       [55957.0, 55958.0, 55959.0, 55960.0, 55961.0, 55962.0],       [55963.0, 55964.0, 55965.0, 55966.0, 55967.0, 55968.0],       [55969.0, 55970.0, 55971.0, 55972.0, 55973.0, 55974.0],       [55975.0, 55976.0, 55977.0, 55978.0, 55979.0, 55980.0],       [55981.0, 55982.0, 55983.0, 55984.0, 55985.0, 55986.0]],      [[55987.0, 55988.0, 55989.0, 55990.0, 55991.0, 55992.0],       [55993.0, 55994.0, 55995.0, 55996.0, 55997.0, 55998.0],       [55999.0, 56000.0, 56001.0, 56002.0, 56003.0, 56004.0],       [56005.0, 56006.0, 56007.0, 56008.0, 56009.0, 56010.0],       [56011.0, 56012.0, 56013.0, 56014.0, 56015.0, 56016.0],       [56017.0, 56018.0, 56019.0, 56020.0, 56021.0, 56022.0],       [56023.0, 56024.0, 56025.0, 56026.0, 56027.0, 56028.0]],      [[56029.0, 56030.0, 56031.0, 56032.0, 56033.0, 56034.0],       [56035.0, 56036.0, 56037.0, 56038.0, 56039.0, 56040.0],       [56041.0, 56042.0, 56043.0, 56044.0, 56045.0, 56046.0],       [56047.0, 56048.0, 56049.0, 56050.0, 56051.0, 56052.0],       [56053.0, 56054.0, 56055.0, 56056.0, 56057.0, 56058.0],       [56059.0, 56060.0, 56061.0, 56062.0, 56063.0, 56064.0],       [56065.0, 56066.0, 56067.0, 56068.0, 56069.0, 56070.0]],      [[56071.0, 56072.0, 56073.0, 56074.0, 56075.0, 56076.0],       [56077.0, 56078.0, 56079.0, 56080.0, 56081.0, 56082.0],       [56083.0, 56084.0, 56085.0, 56086.0, 56087.0, 56088.0],       [56089.0, 56090.0, 56091.0, 56092.0, 56093.0, 56094.0],       [56095.0, 56096.0, 56097.0, 56098.0, 56099.0, 56100.0],       [56101.0, 56102.0, 56103.0, 56104.0, 56105.0, 56106.0],       [56107.0, 56108.0, 56109.0, 56110.0, 56111.0, 56112.0]],      [[56113.0, 56114.0, 56115.0, 56116.0, 56117.0, 56118.0],       [56119.0, 56120.0, 56121.0, 56122.0, 56123.0, 56124.0],       [56125.0, 56126.0, 56127.0, 56128.0, 56129.0, 56130.0],       [56131.0, 56132.0, 56133.0, 56134.0, 56135.0, 56136.0],       [56137.0, 56138.0, 56139.0, 56140.0, 56141.0, 56142.0],       [56143.0, 56144.0, 56145.0, 56146.0, 56147.0, 56148.0],       [56149.0, 56150.0, 56151.0, 56152.0, 56153.0, 56154.0]],      [[56155.0, 56156.0, 56157.0, 56158.0, 56159.0, 56160.0],       [56161.0, 56162.0, 56163.0, 56164.0, 56165.0, 56166.0],       [56167.0, 56168.0, 56169.0, 56170.0, 56171.0, 56172.0],       [56173.0, 56174.0, 56175.0, 56176.0, 56177.0, 56178.0],       [56179.0, 56180.0, 56181.0, 56182.0, 56183.0, 56184.0],       [56185.0, 56186.0, 56187.0, 56188.0, 56189.0, 56190.0],       [56191.0, 56192.0, 56193.0, 56194.0, 56195.0, 56196.0]]],     [[[56197.0, 56198.0, 56199.0, 56200.0, 56201.0, 56202.0],       [56203.0, 56204.0, 56205.0, 56206.0, 56207.0, 56208.0],       [56209.0, 56210.0, 56211.0, 56212.0, 56213.0, 56214.0],       [56215.0, 56216.0, 56217.0, 56218.0, 56219.0, 56220.0],       [56221.0, 56222.0, 56223.0, 56224.0, 56225.0, 56226.0],       [56227.0, 56228.0, 56229.0, 56230.0, 56231.0, 56232.0],       [56233.0, 56234.0, 56235.0, 56236.0, 56237.0, 56238.0]],      [[56239.0, 56240.0, 56241.0, 56242.0, 56243.0, 56244.0],       [56245.0, 56246.0, 56247.0, 56248.0, 56249.0, 56250.0],       [56251.0, 56252.0, 56253.0, 56254.0, 56255.0, 56256.0],       [56257.0, 56258.0, 56259.0, 56260.0, 56261.0, 56262.0],       [56263.0, 56264.0, 56265.0, 56266.0, 56267.0, 56268.0],       [56269.0, 56270.0, 56271.0, 56272.0, 56273.0, 56274.0],       [56275.0, 56276.0, 56277.0, 56278.0, 56279.0, 56280.0]],      [[56281.0, 56282.0, 56283.0, 56284.0, 56285.0, 56286.0],       [56287.0, 56288.0, 56289.0, 56290.0, 56291.0, 56292.0],       [56293.0, 56294.0, 56295.0, 56296.0, 56297.0, 56298.0],       [56299.0, 56300.0, 56301.0, 56302.0, 56303.0, 56304.0],       [56305.0, 56306.0, 56307.0, 56308.0, 56309.0, 56310.0],       [56311.0, 56312.0, 56313.0, 56314.0, 56315.0, 56316.0],       [56317.0, 56318.0, 56319.0, 56320.0, 56321.0, 56322.0]],      [[56323.0, 56324.0, 56325.0, 56326.0, 56327.0, 56328.0],       [56329.0, 56330.0, 56331.0, 56332.0, 56333.0, 56334.0],       [56335.0, 56336.0, 56337.0, 56338.0, 56339.0, 56340.0],       [56341.0, 56342.0, 56343.0, 56344.0, 56345.0, 56346.0],       [56347.0, 56348.0, 56349.0, 56350.0, 56351.0, 56352.0],       [56353.0, 56354.0, 56355.0, 56356.0, 56357.0, 56358.0],       [56359.0, 56360.0, 56361.0, 56362.0, 56363.0, 56364.0]],      [[56365.0, 56366.0, 56367.0, 56368.0, 56369.0, 56370.0],       [56371.0, 56372.0, 56373.0, 56374.0, 56375.0, 56376.0],       [56377.0, 56378.0, 56379.0, 56380.0, 56381.0, 56382.0],       [56383.0, 56384.0, 56385.0, 56386.0, 56387.0, 56388.0],       [56389.0, 56390.0, 56391.0, 56392.0, 56393.0, 56394.0],       [56395.0, 56396.0, 56397.0, 56398.0, 56399.0, 56400.0],       [56401.0, 56402.0, 56403.0, 56404.0, 56405.0, 56406.0]],      [[56407.0, 56408.0, 56409.0, 56410.0, 56411.0, 56412.0],       [56413.0, 56414.0, 56415.0, 56416.0, 56417.0, 56418.0],       [56419.0, 56420.0, 56421.0, 56422.0, 56423.0, 56424.0],       [56425.0, 56426.0, 56427.0, 56428.0, 56429.0, 56430.0],       [56431.0, 56432.0, 56433.0, 56434.0, 56435.0, 56436.0],       [56437.0, 56438.0, 56439.0, 56440.0, 56441.0, 56442.0],       [56443.0, 56444.0, 56445.0, 56446.0, 56447.0, 56448.0]]]],    [[[[56449.0, 56450.0, 56451.0, 56452.0, 56453.0, 56454.0],       [56455.0, 56456.0, 56457.0, 56458.0, 56459.0, 56460.0],       [56461.0, 56462.0, 56463.0, 56464.0, 56465.0, 56466.0],       [56467.0, 56468.0, 56469.0, 56470.0, 56471.0, 56472.0],       [56473.0, 56474.0, 56475.0, 56476.0, 56477.0, 56478.0],       [56479.0, 56480.0, 56481.0, 56482.0, 56483.0, 56484.0],       [56485.0, 56486.0, 56487.0, 56488.0, 56489.0, 56490.0]],      [[56491.0, 56492.0, 56493.0, 56494.0, 56495.0, 56496.0],       [56497.0, 56498.0, 56499.0, 56500.0, 56501.0, 56502.0],       [56503.0, 56504.0, 56505.0, 56506.0, 56507.0, 56508.0],       [56509.0, 56510.0, 56511.0, 56512.0, 56513.0, 56514.0],       [56515.0, 56516.0, 56517.0, 56518.0, 56519.0, 56520.0],       [56521.0, 56522.0, 56523.0, 56524.0, 56525.0, 56526.0],       [56527.0, 56528.0, 56529.0, 56530.0, 56531.0, 56532.0]],      [[56533.0, 56534.0, 56535.0, 56536.0, 56537.0, 56538.0],       [56539.0, 56540.0, 56541.0, 56542.0, 56543.0, 56544.0],       [56545.0, 56546.0, 56547.0, 56548.0, 56549.0, 56550.0],       [56551.0, 56552.0, 56553.0, 56554.0, 56555.0, 56556.0],       [56557.0, 56558.0, 56559.0, 56560.0, 56561.0, 56562.0],       [56563.0, 56564.0, 56565.0, 56566.0, 56567.0, 56568.0],       [56569.0, 56570.0, 56571.0, 56572.0, 56573.0, 56574.0]],      [[56575.0, 56576.0, 56577.0, 56578.0, 56579.0, 56580.0],       [56581.0, 56582.0, 56583.0, 56584.0, 56585.0, 56586.0],       [56587.0, 56588.0, 56589.0, 56590.0, 56591.0, 56592.0],       [56593.0, 56594.0, 56595.0, 56596.0, 56597.0, 56598.0],       [56599.0, 56600.0, 56601.0, 56602.0, 56603.0, 56604.0],       [56605.0, 56606.0, 56607.0, 56608.0, 56609.0, 56610.0],       [56611.0, 56612.0, 56613.0, 56614.0, 56615.0, 56616.0]],      [[56617.0, 56618.0, 56619.0, 56620.0, 56621.0, 56622.0],       [56623.0, 56624.0, 56625.0, 56626.0, 56627.0, 56628.0],       [56629.0, 56630.0, 56631.0, 56632.0, 56633.0, 56634.0],       [56635.0, 56636.0, 56637.0, 56638.0, 56639.0, 56640.0],       [56641.0, 56642.0, 56643.0, 56644.0, 56645.0, 56646.0],       [56647.0, 56648.0, 56649.0, 56650.0, 56651.0, 56652.0],       [56653.0, 56654.0, 56655.0, 56656.0, 56657.0, 56658.0]],      [[56659.0, 56660.0, 56661.0, 56662.0, 56663.0, 56664.0],       [56665.0, 56666.0, 56667.0, 56668.0, 56669.0, 56670.0],       [56671.0, 56672.0, 56673.0, 56674.0, 56675.0, 56676.0],       [56677.0, 56678.0, 56679.0, 56680.0, 56681.0, 56682.0],       [56683.0, 56684.0, 56685.0, 56686.0, 56687.0, 56688.0],       [56689.0, 56690.0, 56691.0, 56692.0, 56693.0, 56694.0],       [56695.0, 56696.0, 56697.0, 56698.0, 56699.0, 56700.0]]],     [[[56701.0, 56702.0, 56703.0, 56704.0, 56705.0, 56706.0],       [56707.0, 56708.0, 56709.0, 56710.0, 56711.0, 56712.0],       [56713.0, 56714.0, 56715.0, 56716.0, 56717.0, 56718.0],       [56719.0, 56720.0, 56721.0, 56722.0, 56723.0, 56724.0],       [56725.0, 56726.0, 56727.0, 56728.0, 56729.0, 56730.0],       [56731.0, 56732.0, 56733.0, 56734.0, 56735.0, 56736.0],       [56737.0, 56738.0, 56739.0, 56740.0, 56741.0, 56742.0]],      [[56743.0, 56744.0, 56745.0, 56746.0, 56747.0, 56748.0],       [56749.0, 56750.0, 56751.0, 56752.0, 56753.0, 56754.0],       [56755.0, 56756.0, 56757.0, 56758.0, 56759.0, 56760.0],       [56761.0, 56762.0, 56763.0, 56764.0, 56765.0, 56766.0],       [56767.0, 56768.0, 56769.0, 56770.0, 56771.0, 56772.0],       [56773.0, 56774.0, 56775.0, 56776.0, 56777.0, 56778.0],       [56779.0, 56780.0, 56781.0, 56782.0, 56783.0, 56784.0]],      [[56785.0, 56786.0, 56787.0, 56788.0, 56789.0, 56790.0],       [56791.0, 56792.0, 56793.0, 56794.0, 56795.0, 56796.0],       [56797.0, 56798.0, 56799.0, 56800.0, 56801.0, 56802.0],       [56803.0, 56804.0, 56805.0, 56806.0, 56807.0, 56808.0],       [56809.0, 56810.0, 56811.0, 56812.0, 56813.0, 56814.0],       [56815.0, 56816.0, 56817.0, 56818.0, 56819.0, 56820.0],       [56821.0, 56822.0, 56823.0, 56824.0, 56825.0, 56826.0]],      [[56827.0, 56828.0, 56829.0, 56830.0, 56831.0, 56832.0],       [56833.0, 56834.0, 56835.0, 56836.0, 56837.0, 56838.0],       [56839.0, 56840.0, 56841.0, 56842.0, 56843.0, 56844.0],       [56845.0, 56846.0, 56847.0, 56848.0, 56849.0, 56850.0],       [56851.0, 56852.0, 56853.0, 56854.0, 56855.0, 56856.0],       [56857.0, 56858.0, 56859.0, 56860.0, 56861.0, 56862.0],       [56863.0, 56864.0, 56865.0, 56866.0, 56867.0, 56868.0]],      [[56869.0, 56870.0, 56871.0, 56872.0, 56873.0, 56874.0],       [56875.0, 56876.0, 56877.0, 56878.0, 56879.0, 56880.0],       [56881.0, 56882.0, 56883.0, 56884.0, 56885.0, 56886.0],       [56887.0, 56888.0, 56889.0, 56890.0, 56891.0, 56892.0],       [56893.0, 56894.0, 56895.0, 56896.0, 56897.0, 56898.0],       [56899.0, 56900.0, 56901.0, 56902.0, 56903.0, 56904.0],       [56905.0, 56906.0, 56907.0, 56908.0, 56909.0, 56910.0]],      [[56911.0, 56912.0, 56913.0, 56914.0, 56915.0, 56916.0],       [56917.0, 56918.0, 56919.0, 56920.0, 56921.0, 56922.0],       [56923.0, 56924.0, 56925.0, 56926.0, 56927.0, 56928.0],       [56929.0, 56930.0, 56931.0, 56932.0, 56933.0, 56934.0],       [56935.0, 56936.0, 56937.0, 56938.0, 56939.0, 56940.0],       [56941.0, 56942.0, 56943.0, 56944.0, 56945.0, 56946.0],       [56947.0, 56948.0, 56949.0, 56950.0, 56951.0, 56952.0]]],     [[[56953.0, 56954.0, 56955.0, 56956.0, 56957.0, 56958.0],       [56959.0, 56960.0, 56961.0, 56962.0, 56963.0, 56964.0],       [56965.0, 56966.0, 56967.0, 56968.0, 56969.0, 56970.0],       [56971.0, 56972.0, 56973.0, 56974.0, 56975.0, 56976.0],       [56977.0, 56978.0, 56979.0, 56980.0, 56981.0, 56982.0],       [56983.0, 56984.0, 56985.0, 56986.0, 56987.0, 56988.0],       [56989.0, 56990.0, 56991.0, 56992.0, 56993.0, 56994.0]],      [[56995.0, 56996.0, 56997.0, 56998.0, 56999.0, 57000.0],       [57001.0, 57002.0, 57003.0, 57004.0, 57005.0, 57006.0],       [57007.0, 57008.0, 57009.0, 57010.0, 57011.0, 57012.0],       [57013.0, 57014.0, 57015.0, 57016.0, 57017.0, 57018.0],       [57019.0, 57020.0, 57021.0, 57022.0, 57023.0, 57024.0],       [57025.0, 57026.0, 57027.0, 57028.0, 57029.0, 57030.0],       [57031.0, 57032.0, 57033.0, 57034.0, 57035.0, 57036.0]],      [[57037.0, 57038.0, 57039.0, 57040.0, 57041.0, 57042.0],       [57043.0, 57044.0, 57045.0, 57046.0, 57047.0, 57048.0],       [57049.0, 57050.0, 57051.0, 57052.0, 57053.0, 57054.0],       [57055.0, 57056.0, 57057.0, 57058.0, 57059.0, 57060.0],       [57061.0, 57062.0, 57063.0, 57064.0, 57065.0, 57066.0],       [57067.0, 57068.0, 57069.0, 57070.0, 57071.0, 57072.0],       [57073.0, 57074.0, 57075.0, 57076.0, 57077.0, 57078.0]],      [[57079.0, 57080.0, 57081.0, 57082.0, 57083.0, 57084.0],       [57085.0, 57086.0, 57087.0, 57088.0, 57089.0, 57090.0],       [57091.0, 57092.0, 57093.0, 57094.0, 57095.0, 57096.0],       [57097.0, 57098.0, 57099.0, 57100.0, 57101.0, 57102.0],       [57103.0, 57104.0, 57105.0, 57106.0, 57107.0, 57108.0],       [57109.0, 57110.0, 57111.0, 57112.0, 57113.0, 57114.0],       [57115.0, 57116.0, 57117.0, 57118.0, 57119.0, 57120.0]],      [[57121.0, 57122.0, 57123.0, 57124.0, 57125.0, 57126.0],       [57127.0, 57128.0, 57129.0, 57130.0, 57131.0, 57132.0],       [57133.0, 57134.0, 57135.0, 57136.0, 57137.0, 57138.0],       [57139.0, 57140.0, 57141.0, 57142.0, 57143.0, 57144.0],       [57145.0, 57146.0, 57147.0, 57148.0, 57149.0, 57150.0],       [57151.0, 57152.0, 57153.0, 57154.0, 57155.0, 57156.0],       [57157.0, 57158.0, 57159.0, 57160.0, 57161.0, 57162.0]],      [[57163.0, 57164.0, 57165.0, 57166.0, 57167.0, 57168.0],       [57169.0, 57170.0, 57171.0, 57172.0, 57173.0, 57174.0],       [57175.0, 57176.0, 57177.0, 57178.0, 57179.0, 57180.0],       [57181.0, 57182.0, 57183.0, 57184.0, 57185.0, 57186.0],       [57187.0, 57188.0, 57189.0, 57190.0, 57191.0, 57192.0],       [57193.0, 57194.0, 57195.0, 57196.0, 57197.0, 57198.0],       [57199.0, 57200.0, 57201.0, 57202.0, 57203.0, 57204.0]]],     [[[57205.0, 57206.0, 57207.0, 57208.0, 57209.0, 57210.0],       [57211.0, 57212.0, 57213.0, 57214.0, 57215.0, 57216.0],       [57217.0, 57218.0, 57219.0, 57220.0, 57221.0, 57222.0],       [57223.0, 57224.0, 57225.0, 57226.0, 57227.0, 57228.0],       [57229.0, 57230.0, 57231.0, 57232.0, 57233.0, 57234.0],       [57235.0, 57236.0, 57237.0, 57238.0, 57239.0, 57240.0],       [57241.0, 57242.0, 57243.0, 57244.0, 57245.0, 57246.0]],      [[57247.0, 57248.0, 57249.0, 57250.0, 57251.0, 57252.0],       [57253.0, 57254.0, 57255.0, 57256.0, 57257.0, 57258.0],       [57259.0, 57260.0, 57261.0, 57262.0, 57263.0, 57264.0],       [57265.0, 57266.0, 57267.0, 57268.0, 57269.0, 57270.0],       [57271.0, 57272.0, 57273.0, 57274.0, 57275.0, 57276.0],       [57277.0, 57278.0, 57279.0, 57280.0, 57281.0, 57282.0],       [57283.0, 57284.0, 57285.0, 57286.0, 57287.0, 57288.0]],      [[57289.0, 57290.0, 57291.0, 57292.0, 57293.0, 57294.0],       [57295.0, 57296.0, 57297.0, 57298.0, 57299.0, 57300.0],       [57301.0, 57302.0, 57303.0, 57304.0, 57305.0, 57306.0],       [57307.0, 57308.0, 57309.0, 57310.0, 57311.0, 57312.0],       [57313.0, 57314.0, 57315.0, 57316.0, 57317.0, 57318.0],       [57319.0, 57320.0, 57321.0, 57322.0, 57323.0, 57324.0],       [57325.0, 57326.0, 57327.0, 57328.0, 57329.0, 57330.0]],      [[57331.0, 57332.0, 57333.0, 57334.0, 57335.0, 57336.0],       [57337.0, 57338.0, 57339.0, 57340.0, 57341.0, 57342.0],       [57343.0, 57344.0, 57345.0, 57346.0, 57347.0, 57348.0],       [57349.0, 57350.0, 57351.0, 57352.0, 57353.0, 57354.0],       [57355.0, 57356.0, 57357.0, 57358.0, 57359.0, 57360.0],       [57361.0, 57362.0, 57363.0, 57364.0, 57365.0, 57366.0],       [57367.0, 57368.0, 57369.0, 57370.0, 57371.0, 57372.0]],      [[57373.0, 57374.0, 57375.0, 57376.0, 57377.0, 57378.0],       [57379.0, 57380.0, 57381.0, 57382.0, 57383.0, 57384.0],       [57385.0, 57386.0, 57387.0, 57388.0, 57389.0, 57390.0],       [57391.0, 57392.0, 57393.0, 57394.0, 57395.0, 57396.0],       [57397.0, 57398.0, 57399.0, 57400.0, 57401.0, 57402.0],       [57403.0, 57404.0, 57405.0, 57406.0, 57407.0, 57408.0],       [57409.0, 57410.0, 57411.0, 57412.0, 57413.0, 57414.0]],      [[57415.0, 57416.0, 57417.0, 57418.0, 57419.0, 57420.0],       [57421.0, 57422.0, 57423.0, 57424.0, 57425.0, 57426.0],       [57427.0, 57428.0, 57429.0, 57430.0, 57431.0, 57432.0],       [57433.0, 57434.0, 57435.0, 57436.0, 57437.0, 57438.0],       [57439.0, 57440.0, 57441.0, 57442.0, 57443.0, 57444.0],       [57445.0, 57446.0, 57447.0, 57448.0, 57449.0, 57450.0],       [57451.0, 57452.0, 57453.0, 57454.0, 57455.0, 57456.0]]]],    [[[[57457.0, 57458.0, 57459.0, 57460.0, 57461.0, 57462.0],       [57463.0, 57464.0, 57465.0, 57466.0, 57467.0, 57468.0],       [57469.0, 57470.0, 57471.0, 57472.0, 57473.0, 57474.0],       [57475.0, 57476.0, 57477.0, 57478.0, 57479.0, 57480.0],       [57481.0, 57482.0, 57483.0, 57484.0, 57485.0, 57486.0],       [57487.0, 57488.0, 57489.0, 57490.0, 57491.0, 57492.0],       [57493.0, 57494.0, 57495.0, 57496.0, 57497.0, 57498.0]],      [[57499.0, 57500.0, 57501.0, 57502.0, 57503.0, 57504.0],       [57505.0, 57506.0, 57507.0, 57508.0, 57509.0, 57510.0],       [57511.0, 57512.0, 57513.0, 57514.0, 57515.0, 57516.0],       [57517.0, 57518.0, 57519.0, 57520.0, 57521.0, 57522.0],       [57523.0, 57524.0, 57525.0, 57526.0, 57527.0, 57528.0],       [57529.0, 57530.0, 57531.0, 57532.0, 57533.0, 57534.0],       [57535.0, 57536.0, 57537.0, 57538.0, 57539.0, 57540.0]],      [[57541.0, 57542.0, 57543.0, 57544.0, 57545.0, 57546.0],       [57547.0, 57548.0, 57549.0, 57550.0, 57551.0, 57552.0],       [57553.0, 57554.0, 57555.0, 57556.0, 57557.0, 57558.0],       [57559.0, 57560.0, 57561.0, 57562.0, 57563.0, 57564.0],       [57565.0, 57566.0, 57567.0, 57568.0, 57569.0, 57570.0],       [57571.0, 57572.0, 57573.0, 57574.0, 57575.0, 57576.0],       [57577.0, 57578.0, 57579.0, 57580.0, 57581.0, 57582.0]],      [[57583.0, 57584.0, 57585.0, 57586.0, 57587.0, 57588.0],       [57589.0, 57590.0, 57591.0, 57592.0, 57593.0, 57594.0],       [57595.0, 57596.0, 57597.0, 57598.0, 57599.0, 57600.0],       [57601.0, 57602.0, 57603.0, 57604.0, 57605.0, 57606.0],       [57607.0, 57608.0, 57609.0, 57610.0, 57611.0, 57612.0],       [57613.0, 57614.0, 57615.0, 57616.0, 57617.0, 57618.0],       [57619.0, 57620.0, 57621.0, 57622.0, 57623.0, 57624.0]],      [[57625.0, 57626.0, 57627.0, 57628.0, 57629.0, 57630.0],       [57631.0, 57632.0, 57633.0, 57634.0, 57635.0, 57636.0],       [57637.0, 57638.0, 57639.0, 57640.0, 57641.0, 57642.0],       [57643.0, 57644.0, 57645.0, 57646.0, 57647.0, 57648.0],       [57649.0, 57650.0, 57651.0, 57652.0, 57653.0, 57654.0],       [57655.0, 57656.0, 57657.0, 57658.0, 57659.0, 57660.0],       [57661.0, 57662.0, 57663.0, 57664.0, 57665.0, 57666.0]],      [[57667.0, 57668.0, 57669.0, 57670.0, 57671.0, 57672.0],       [57673.0, 57674.0, 57675.0, 57676.0, 57677.0, 57678.0],       [57679.0, 57680.0, 57681.0, 57682.0, 57683.0, 57684.0],       [57685.0, 57686.0, 57687.0, 57688.0, 57689.0, 57690.0],       [57691.0, 57692.0, 57693.0, 57694.0, 57695.0, 57696.0],       [57697.0, 57698.0, 57699.0, 57700.0, 57701.0, 57702.0],       [57703.0, 57704.0, 57705.0, 57706.0, 57707.0, 57708.0]]],     [[[57709.0, 57710.0, 57711.0, 57712.0, 57713.0, 57714.0],       [57715.0, 57716.0, 57717.0, 57718.0, 57719.0, 57720.0],       [57721.0, 57722.0, 57723.0, 57724.0, 57725.0, 57726.0],       [57727.0, 57728.0, 57729.0, 57730.0, 57731.0, 57732.0],       [57733.0, 57734.0, 57735.0, 57736.0, 57737.0, 57738.0],       [57739.0, 57740.0, 57741.0, 57742.0, 57743.0, 57744.0],       [57745.0, 57746.0, 57747.0, 57748.0, 57749.0, 57750.0]],      [[57751.0, 57752.0, 57753.0, 57754.0, 57755.0, 57756.0],       [57757.0, 57758.0, 57759.0, 57760.0, 57761.0, 57762.0],       [57763.0, 57764.0, 57765.0, 57766.0, 57767.0, 57768.0],       [57769.0, 57770.0, 57771.0, 57772.0, 57773.0, 57774.0],       [57775.0, 57776.0, 57777.0, 57778.0, 57779.0, 57780.0],       [57781.0, 57782.0, 57783.0, 57784.0, 57785.0, 57786.0],       [57787.0, 57788.0, 57789.0, 57790.0, 57791.0, 57792.0]],      [[57793.0, 57794.0, 57795.0, 57796.0, 57797.0, 57798.0],       [57799.0, 57800.0, 57801.0, 57802.0, 57803.0, 57804.0],       [57805.0, 57806.0, 57807.0, 57808.0, 57809.0, 57810.0],       [57811.0, 57812.0, 57813.0, 57814.0, 57815.0, 57816.0],       [57817.0, 57818.0, 57819.0, 57820.0, 57821.0, 57822.0],       [57823.0, 57824.0, 57825.0, 57826.0, 57827.0, 57828.0],       [57829.0, 57830.0, 57831.0, 57832.0, 57833.0, 57834.0]],      [[57835.0, 57836.0, 57837.0, 57838.0, 57839.0, 57840.0],       [57841.0, 57842.0, 57843.0, 57844.0, 57845.0, 57846.0],       [57847.0, 57848.0, 57849.0, 57850.0, 57851.0, 57852.0],       [57853.0, 57854.0, 57855.0, 57856.0, 57857.0, 57858.0],       [57859.0, 57860.0, 57861.0, 57862.0, 57863.0, 57864.0],       [57865.0, 57866.0, 57867.0, 57868.0, 57869.0, 57870.0],       [57871.0, 57872.0, 57873.0, 57874.0, 57875.0, 57876.0]],      [[57877.0, 57878.0, 57879.0, 57880.0, 57881.0, 57882.0],       [57883.0, 57884.0, 57885.0, 57886.0, 57887.0, 57888.0],       [57889.0, 57890.0, 57891.0, 57892.0, 57893.0, 57894.0],       [57895.0, 57896.0, 57897.0, 57898.0, 57899.0, 57900.0],       [57901.0, 57902.0, 57903.0, 57904.0, 57905.0, 57906.0],       [57907.0, 57908.0, 57909.0, 57910.0, 57911.0, 57912.0],       [57913.0, 57914.0, 57915.0, 57916.0, 57917.0, 57918.0]],      [[57919.0, 57920.0, 57921.0, 57922.0, 57923.0, 57924.0],       [57925.0, 57926.0, 57927.0, 57928.0, 57929.0, 57930.0],       [57931.0, 57932.0, 57933.0, 57934.0, 57935.0, 57936.0],       [57937.0, 57938.0, 57939.0, 57940.0, 57941.0, 57942.0],       [57943.0, 57944.0, 57945.0, 57946.0, 57947.0, 57948.0],       [57949.0, 57950.0, 57951.0, 57952.0, 57953.0, 57954.0],       [57955.0, 57956.0, 57957.0, 57958.0, 57959.0, 57960.0]]],     [[[57961.0, 57962.0, 57963.0, 57964.0, 57965.0, 57966.0],       [57967.0, 57968.0, 57969.0, 57970.0, 57971.0, 57972.0],       [57973.0, 57974.0, 57975.0, 57976.0, 57977.0, 57978.0],       [57979.0, 57980.0, 57981.0, 57982.0, 57983.0, 57984.0],       [57985.0, 57986.0, 57987.0, 57988.0, 57989.0, 57990.0],       [57991.0, 57992.0, 57993.0, 57994.0, 57995.0, 57996.0],       [57997.0, 57998.0, 57999.0, 58000.0, 58001.0, 58002.0]],      [[58003.0, 58004.0, 58005.0, 58006.0, 58007.0, 58008.0],       [58009.0, 58010.0, 58011.0, 58012.0, 58013.0, 58014.0],       [58015.0, 58016.0, 58017.0, 58018.0, 58019.0, 58020.0],       [58021.0, 58022.0, 58023.0, 58024.0, 58025.0, 58026.0],       [58027.0, 58028.0, 58029.0, 58030.0, 58031.0, 58032.0],       [58033.0, 58034.0, 58035.0, 58036.0, 58037.0, 58038.0],       [58039.0, 58040.0, 58041.0, 58042.0, 58043.0, 58044.0]],      [[58045.0, 58046.0, 58047.0, 58048.0, 58049.0, 58050.0],       [58051.0, 58052.0, 58053.0, 58054.0, 58055.0, 58056.0],       [58057.0, 58058.0, 58059.0, 58060.0, 58061.0, 58062.0],       [58063.0, 58064.0, 58065.0, 58066.0, 58067.0, 58068.0],       [58069.0, 58070.0, 58071.0, 58072.0, 58073.0, 58074.0],       [58075.0, 58076.0, 58077.0, 58078.0, 58079.0, 58080.0],       [58081.0, 58082.0, 58083.0, 58084.0, 58085.0, 58086.0]],      [[58087.0, 58088.0, 58089.0, 58090.0, 58091.0, 58092.0],       [58093.0, 58094.0, 58095.0, 58096.0, 58097.0, 58098.0],       [58099.0, 58100.0, 58101.0, 58102.0, 58103.0, 58104.0],       [58105.0, 58106.0, 58107.0, 58108.0, 58109.0, 58110.0],       [58111.0, 58112.0, 58113.0, 58114.0, 58115.0, 58116.0],       [58117.0, 58118.0, 58119.0, 58120.0, 58121.0, 58122.0],       [58123.0, 58124.0, 58125.0, 58126.0, 58127.0, 58128.0]],      [[58129.0, 58130.0, 58131.0, 58132.0, 58133.0, 58134.0],       [58135.0, 58136.0, 58137.0, 58138.0, 58139.0, 58140.0],       [58141.0, 58142.0, 58143.0, 58144.0, 58145.0, 58146.0],       [58147.0, 58148.0, 58149.0, 58150.0, 58151.0, 58152.0],       [58153.0, 58154.0, 58155.0, 58156.0, 58157.0, 58158.0],       [58159.0, 58160.0, 58161.0, 58162.0, 58163.0, 58164.0],       [58165.0, 58166.0, 58167.0, 58168.0, 58169.0, 58170.0]],      [[58171.0, 58172.0, 58173.0, 58174.0, 58175.0, 58176.0],       [58177.0, 58178.0, 58179.0, 58180.0, 58181.0, 58182.0],       [58183.0, 58184.0, 58185.0, 58186.0, 58187.0, 58188.0],       [58189.0, 58190.0, 58191.0, 58192.0, 58193.0, 58194.0],       [58195.0, 58196.0, 58197.0, 58198.0, 58199.0, 58200.0],       [58201.0, 58202.0, 58203.0, 58204.0, 58205.0, 58206.0],       [58207.0, 58208.0, 58209.0, 58210.0, 58211.0, 58212.0]]],     [[[58213.0, 58214.0, 58215.0, 58216.0, 58217.0, 58218.0],       [58219.0, 58220.0, 58221.0, 58222.0, 58223.0, 58224.0],       [58225.0, 58226.0, 58227.0, 58228.0, 58229.0, 58230.0],       [58231.0, 58232.0, 58233.0, 58234.0, 58235.0, 58236.0],       [58237.0, 58238.0, 58239.0, 58240.0, 58241.0, 58242.0],       [58243.0, 58244.0, 58245.0, 58246.0, 58247.0, 58248.0],       [58249.0, 58250.0, 58251.0, 58252.0, 58253.0, 58254.0]],      [[58255.0, 58256.0, 58257.0, 58258.0, 58259.0, 58260.0],       [58261.0, 58262.0, 58263.0, 58264.0, 58265.0, 58266.0],       [58267.0, 58268.0, 58269.0, 58270.0, 58271.0, 58272.0],       [58273.0, 58274.0, 58275.0, 58276.0, 58277.0, 58278.0],       [58279.0, 58280.0, 58281.0, 58282.0, 58283.0, 58284.0],       [58285.0, 58286.0, 58287.0, 58288.0, 58289.0, 58290.0],       [58291.0, 58292.0, 58293.0, 58294.0, 58295.0, 58296.0]],      [[58297.0, 58298.0, 58299.0, 58300.0, 58301.0, 58302.0],       [58303.0, 58304.0, 58305.0, 58306.0, 58307.0, 58308.0],       [58309.0, 58310.0, 58311.0, 58312.0, 58313.0, 58314.0],       [58315.0, 58316.0, 58317.0, 58318.0, 58319.0, 58320.0],       [58321.0, 58322.0, 58323.0, 58324.0, 58325.0, 58326.0],       [58327.0, 58328.0, 58329.0, 58330.0, 58331.0, 58332.0],       [58333.0, 58334.0, 58335.0, 58336.0, 58337.0, 58338.0]],      [[58339.0, 58340.0, 58341.0, 58342.0, 58343.0, 58344.0],       [58345.0, 58346.0, 58347.0, 58348.0, 58349.0, 58350.0],       [58351.0, 58352.0, 58353.0, 58354.0, 58355.0, 58356.0],       [58357.0, 58358.0, 58359.0, 58360.0, 58361.0, 58362.0],       [58363.0, 58364.0, 58365.0, 58366.0, 58367.0, 58368.0],       [58369.0, 58370.0, 58371.0, 58372.0, 58373.0, 58374.0],       [58375.0, 58376.0, 58377.0, 58378.0, 58379.0, 58380.0]],      [[58381.0, 58382.0, 58383.0, 58384.0, 58385.0, 58386.0],       [58387.0, 58388.0, 58389.0, 58390.0, 58391.0, 58392.0],       [58393.0, 58394.0, 58395.0, 58396.0, 58397.0, 58398.0],       [58399.0, 58400.0, 58401.0, 58402.0, 58403.0, 58404.0],       [58405.0, 58406.0, 58407.0, 58408.0, 58409.0, 58410.0],       [58411.0, 58412.0, 58413.0, 58414.0, 58415.0, 58416.0],       [58417.0, 58418.0, 58419.0, 58420.0, 58421.0, 58422.0]],      [[58423.0, 58424.0, 58425.0, 58426.0, 58427.0, 58428.0],       [58429.0, 58430.0, 58431.0, 58432.0, 58433.0, 58434.0],       [58435.0, 58436.0, 58437.0, 58438.0, 58439.0, 58440.0],       [58441.0, 58442.0, 58443.0, 58444.0, 58445.0, 58446.0],       [58447.0, 58448.0, 58449.0, 58450.0, 58451.0, 58452.0],       [58453.0, 58454.0, 58455.0, 58456.0, 58457.0, 58458.0],       [58459.0, 58460.0, 58461.0, 58462.0, 58463.0, 58464.0]]]],    [[[[58465.0, 58466.0, 58467.0, 58468.0, 58469.0, 58470.0],       [58471.0, 58472.0, 58473.0, 58474.0, 58475.0, 58476.0],       [58477.0, 58478.0, 58479.0, 58480.0, 58481.0, 58482.0],       [58483.0, 58484.0, 58485.0, 58486.0, 58487.0, 58488.0],       [58489.0, 58490.0, 58491.0, 58492.0, 58493.0, 58494.0],       [58495.0, 58496.0, 58497.0, 58498.0, 58499.0, 58500.0],       [58501.0, 58502.0, 58503.0, 58504.0, 58505.0, 58506.0]],      [[58507.0, 58508.0, 58509.0, 58510.0, 58511.0, 58512.0],       [58513.0, 58514.0, 58515.0, 58516.0, 58517.0, 58518.0],       [58519.0, 58520.0, 58521.0, 58522.0, 58523.0, 58524.0],       [58525.0, 58526.0, 58527.0, 58528.0, 58529.0, 58530.0],       [58531.0, 58532.0, 58533.0, 58534.0, 58535.0, 58536.0],       [58537.0, 58538.0, 58539.0, 58540.0, 58541.0, 58542.0],       [58543.0, 58544.0, 58545.0, 58546.0, 58547.0, 58548.0]],      [[58549.0, 58550.0, 58551.0, 58552.0, 58553.0, 58554.0],       [58555.0, 58556.0, 58557.0, 58558.0, 58559.0, 58560.0],       [58561.0, 58562.0, 58563.0, 58564.0, 58565.0, 58566.0],       [58567.0, 58568.0, 58569.0, 58570.0, 58571.0, 58572.0],       [58573.0, 58574.0, 58575.0, 58576.0, 58577.0, 58578.0],       [58579.0, 58580.0, 58581.0, 58582.0, 58583.0, 58584.0],       [58585.0, 58586.0, 58587.0, 58588.0, 58589.0, 58590.0]],      [[58591.0, 58592.0, 58593.0, 58594.0, 58595.0, 58596.0],       [58597.0, 58598.0, 58599.0, 58600.0, 58601.0, 58602.0],       [58603.0, 58604.0, 58605.0, 58606.0, 58607.0, 58608.0],       [58609.0, 58610.0, 58611.0, 58612.0, 58613.0, 58614.0],       [58615.0, 58616.0, 58617.0, 58618.0, 58619.0, 58620.0],       [58621.0, 58622.0, 58623.0, 58624.0, 58625.0, 58626.0],       [58627.0, 58628.0, 58629.0, 58630.0, 58631.0, 58632.0]],      [[58633.0, 58634.0, 58635.0, 58636.0, 58637.0, 58638.0],       [58639.0, 58640.0, 58641.0, 58642.0, 58643.0, 58644.0],       [58645.0, 58646.0, 58647.0, 58648.0, 58649.0, 58650.0],       [58651.0, 58652.0, 58653.0, 58654.0, 58655.0, 58656.0],       [58657.0, 58658.0, 58659.0, 58660.0, 58661.0, 58662.0],       [58663.0, 58664.0, 58665.0, 58666.0, 58667.0, 58668.0],       [58669.0, 58670.0, 58671.0, 58672.0, 58673.0, 58674.0]],      [[58675.0, 58676.0, 58677.0, 58678.0, 58679.0, 58680.0],       [58681.0, 58682.0, 58683.0, 58684.0, 58685.0, 58686.0],       [58687.0, 58688.0, 58689.0, 58690.0, 58691.0, 58692.0],       [58693.0, 58694.0, 58695.0, 58696.0, 58697.0, 58698.0],       [58699.0, 58700.0, 58701.0, 58702.0, 58703.0, 58704.0],       [58705.0, 58706.0, 58707.0, 58708.0, 58709.0, 58710.0],       [58711.0, 58712.0, 58713.0, 58714.0, 58715.0, 58716.0]]],     [[[58717.0, 58718.0, 58719.0, 58720.0, 58721.0, 58722.0],       [58723.0, 58724.0, 58725.0, 58726.0, 58727.0, 58728.0],       [58729.0, 58730.0, 58731.0, 58732.0, 58733.0, 58734.0],       [58735.0, 58736.0, 58737.0, 58738.0, 58739.0, 58740.0],       [58741.0, 58742.0, 58743.0, 58744.0, 58745.0, 58746.0],       [58747.0, 58748.0, 58749.0, 58750.0, 58751.0, 58752.0],       [58753.0, 58754.0, 58755.0, 58756.0, 58757.0, 58758.0]],      [[58759.0, 58760.0, 58761.0, 58762.0, 58763.0, 58764.0],       [58765.0, 58766.0, 58767.0, 58768.0, 58769.0, 58770.0],       [58771.0, 58772.0, 58773.0, 58774.0, 58775.0, 58776.0],       [58777.0, 58778.0, 58779.0, 58780.0, 58781.0, 58782.0],       [58783.0, 58784.0, 58785.0, 58786.0, 58787.0, 58788.0],       [58789.0, 58790.0, 58791.0, 58792.0, 58793.0, 58794.0],       [58795.0, 58796.0, 58797.0, 58798.0, 58799.0, 58800.0]],      [[58801.0, 58802.0, 58803.0, 58804.0, 58805.0, 58806.0],       [58807.0, 58808.0, 58809.0, 58810.0, 58811.0, 58812.0],       [58813.0, 58814.0, 58815.0, 58816.0, 58817.0, 58818.0],       [58819.0, 58820.0, 58821.0, 58822.0, 58823.0, 58824.0],       [58825.0, 58826.0, 58827.0, 58828.0, 58829.0, 58830.0],       [58831.0, 58832.0, 58833.0, 58834.0, 58835.0, 58836.0],       [58837.0, 58838.0, 58839.0, 58840.0, 58841.0, 58842.0]],      [[58843.0, 58844.0, 58845.0, 58846.0, 58847.0, 58848.0],       [58849.0, 58850.0, 58851.0, 58852.0, 58853.0, 58854.0],       [58855.0, 58856.0, 58857.0, 58858.0, 58859.0, 58860.0],       [58861.0, 58862.0, 58863.0, 58864.0, 58865.0, 58866.0],       [58867.0, 58868.0, 58869.0, 58870.0, 58871.0, 58872.0],       [58873.0, 58874.0, 58875.0, 58876.0, 58877.0, 58878.0],       [58879.0, 58880.0, 58881.0, 58882.0, 58883.0, 58884.0]],      [[58885.0, 58886.0, 58887.0, 58888.0, 58889.0, 58890.0],       [58891.0, 58892.0, 58893.0, 58894.0, 58895.0, 58896.0],       [58897.0, 58898.0, 58899.0, 58900.0, 58901.0, 58902.0],       [58903.0, 58904.0, 58905.0, 58906.0, 58907.0, 58908.0],       [58909.0, 58910.0, 58911.0, 58912.0, 58913.0, 58914.0],       [58915.0, 58916.0, 58917.0, 58918.0, 58919.0, 58920.0],       [58921.0, 58922.0, 58923.0, 58924.0, 58925.0, 58926.0]],      [[58927.0, 58928.0, 58929.0, 58930.0, 58931.0, 58932.0],       [58933.0, 58934.0, 58935.0, 58936.0, 58937.0, 58938.0],       [58939.0, 58940.0, 58941.0, 58942.0, 58943.0, 58944.0],       [58945.0, 58946.0, 58947.0, 58948.0, 58949.0, 58950.0],       [58951.0, 58952.0, 58953.0, 58954.0, 58955.0, 58956.0],       [58957.0, 58958.0, 58959.0, 58960.0, 58961.0, 58962.0],       [58963.0, 58964.0, 58965.0, 58966.0, 58967.0, 58968.0]]],     [[[58969.0, 58970.0, 58971.0, 58972.0, 58973.0, 58974.0],       [58975.0, 58976.0, 58977.0, 58978.0, 58979.0, 58980.0],       [58981.0, 58982.0, 58983.0, 58984.0, 58985.0, 58986.0],       [58987.0, 58988.0, 58989.0, 58990.0, 58991.0, 58992.0],       [58993.0, 58994.0, 58995.0, 58996.0, 58997.0, 58998.0],       [58999.0, 59000.0, 59001.0, 59002.0, 59003.0, 59004.0],       [59005.0, 59006.0, 59007.0, 59008.0, 59009.0, 59010.0]],      [[59011.0, 59012.0, 59013.0, 59014.0, 59015.0, 59016.0],       [59017.0, 59018.0, 59019.0, 59020.0, 59021.0, 59022.0],       [59023.0, 59024.0, 59025.0, 59026.0, 59027.0, 59028.0],       [59029.0, 59030.0, 59031.0, 59032.0, 59033.0, 59034.0],       [59035.0, 59036.0, 59037.0, 59038.0, 59039.0, 59040.0],       [59041.0, 59042.0, 59043.0, 59044.0, 59045.0, 59046.0],       [59047.0, 59048.0, 59049.0, 59050.0, 59051.0, 59052.0]],      [[59053.0, 59054.0, 59055.0, 59056.0, 59057.0, 59058.0],       [59059.0, 59060.0, 59061.0, 59062.0, 59063.0, 59064.0],       [59065.0, 59066.0, 59067.0, 59068.0, 59069.0, 59070.0],       [59071.0, 59072.0, 59073.0, 59074.0, 59075.0, 59076.0],       [59077.0, 59078.0, 59079.0, 59080.0, 59081.0, 59082.0],       [59083.0, 59084.0, 59085.0, 59086.0, 59087.0, 59088.0],       [59089.0, 59090.0, 59091.0, 59092.0, 59093.0, 59094.0]],      [[59095.0, 59096.0, 59097.0, 59098.0, 59099.0, 59100.0],       [59101.0, 59102.0, 59103.0, 59104.0, 59105.0, 59106.0],       [59107.0, 59108.0, 59109.0, 59110.0, 59111.0, 59112.0],       [59113.0, 59114.0, 59115.0, 59116.0, 59117.0, 59118.0],       [59119.0, 59120.0, 59121.0, 59122.0, 59123.0, 59124.0],       [59125.0, 59126.0, 59127.0, 59128.0, 59129.0, 59130.0],       [59131.0, 59132.0, 59133.0, 59134.0, 59135.0, 59136.0]],      [[59137.0, 59138.0, 59139.0, 59140.0, 59141.0, 59142.0],       [59143.0, 59144.0, 59145.0, 59146.0, 59147.0, 59148.0],       [59149.0, 59150.0, 59151.0, 59152.0, 59153.0, 59154.0],       [59155.0, 59156.0, 59157.0, 59158.0, 59159.0, 59160.0],       [59161.0, 59162.0, 59163.0, 59164.0, 59165.0, 59166.0],       [59167.0, 59168.0, 59169.0, 59170.0, 59171.0, 59172.0],       [59173.0, 59174.0, 59175.0, 59176.0, 59177.0, 59178.0]],      [[59179.0, 59180.0, 59181.0, 59182.0, 59183.0, 59184.0],       [59185.0, 59186.0, 59187.0, 59188.0, 59189.0, 59190.0],       [59191.0, 59192.0, 59193.0, 59194.0, 59195.0, 59196.0],       [59197.0, 59198.0, 59199.0, 59200.0, 59201.0, 59202.0],       [59203.0, 59204.0, 59205.0, 59206.0, 59207.0, 59208.0],       [59209.0, 59210.0, 59211.0, 59212.0, 59213.0, 59214.0],       [59215.0, 59216.0, 59217.0, 59218.0, 59219.0, 59220.0]]],     [[[59221.0, 59222.0, 59223.0, 59224.0, 59225.0, 59226.0],       [59227.0, 59228.0, 59229.0, 59230.0, 59231.0, 59232.0],       [59233.0, 59234.0, 59235.0, 59236.0, 59237.0, 59238.0],       [59239.0, 59240.0, 59241.0, 59242.0, 59243.0, 59244.0],       [59245.0, 59246.0, 59247.0, 59248.0, 59249.0, 59250.0],       [59251.0, 59252.0, 59253.0, 59254.0, 59255.0, 59256.0],       [59257.0, 59258.0, 59259.0, 59260.0, 59261.0, 59262.0]],      [[59263.0, 59264.0, 59265.0, 59266.0, 59267.0, 59268.0],       [59269.0, 59270.0, 59271.0, 59272.0, 59273.0, 59274.0],       [59275.0, 59276.0, 59277.0, 59278.0, 59279.0, 59280.0],       [59281.0, 59282.0, 59283.0, 59284.0, 59285.0, 59286.0],       [59287.0, 59288.0, 59289.0, 59290.0, 59291.0, 59292.0],       [59293.0, 59294.0, 59295.0, 59296.0, 59297.0, 59298.0],       [59299.0, 59300.0, 59301.0, 59302.0, 59303.0, 59304.0]],      [[59305.0, 59306.0, 59307.0, 59308.0, 59309.0, 59310.0],       [59311.0, 59312.0, 59313.0, 59314.0, 59315.0, 59316.0],       [59317.0, 59318.0, 59319.0, 59320.0, 59321.0, 59322.0],       [59323.0, 59324.0, 59325.0, 59326.0, 59327.0, 59328.0],       [59329.0, 59330.0, 59331.0, 59332.0, 59333.0, 59334.0],       [59335.0, 59336.0, 59337.0, 59338.0, 59339.0, 59340.0],       [59341.0, 59342.0, 59343.0, 59344.0, 59345.0, 59346.0]],      [[59347.0, 59348.0, 59349.0, 59350.0, 59351.0, 59352.0],       [59353.0, 59354.0, 59355.0, 59356.0, 59357.0, 59358.0],       [59359.0, 59360.0, 59361.0, 59362.0, 59363.0, 59364.0],       [59365.0, 59366.0, 59367.0, 59368.0, 59369.0, 59370.0],       [59371.0, 59372.0, 59373.0, 59374.0, 59375.0, 59376.0],       [59377.0, 59378.0, 59379.0, 59380.0, 59381.0, 59382.0],       [59383.0, 59384.0, 59385.0, 59386.0, 59387.0, 59388.0]],      [[59389.0, 59390.0, 59391.0, 59392.0, 59393.0, 59394.0],       [59395.0, 59396.0, 59397.0, 59398.0, 59399.0, 59400.0],       [59401.0, 59402.0, 59403.0, 59404.0, 59405.0, 59406.0],       [59407.0, 59408.0, 59409.0, 59410.0, 59411.0, 59412.0],       [59413.0, 59414.0, 59415.0, 59416.0, 59417.0, 59418.0],       [59419.0, 59420.0, 59421.0, 59422.0, 59423.0, 59424.0],       [59425.0, 59426.0, 59427.0, 59428.0, 59429.0, 59430.0]],      [[59431.0, 59432.0, 59433.0, 59434.0, 59435.0, 59436.0],       [59437.0, 59438.0, 59439.0, 59440.0, 59441.0, 59442.0],       [59443.0, 59444.0, 59445.0, 59446.0, 59447.0, 59448.0],       [59449.0, 59450.0, 59451.0, 59452.0, 59453.0, 59454.0],       [59455.0, 59456.0, 59457.0, 59458.0, 59459.0, 59460.0],       [59461.0, 59462.0, 59463.0, 59464.0, 59465.0, 59466.0],       [59467.0, 59468.0, 59469.0, 59470.0, 59471.0, 59472.0]]]],    [[[[59473.0, 59474.0, 59475.0, 59476.0, 59477.0, 59478.0],       [59479.0, 59480.0, 59481.0, 59482.0, 59483.0, 59484.0],       [59485.0, 59486.0, 59487.0, 59488.0, 59489.0, 59490.0],       [59491.0, 59492.0, 59493.0, 59494.0, 59495.0, 59496.0],       [59497.0, 59498.0, 59499.0, 59500.0, 59501.0, 59502.0],       [59503.0, 59504.0, 59505.0, 59506.0, 59507.0, 59508.0],       [59509.0, 59510.0, 59511.0, 59512.0, 59513.0, 59514.0]],      [[59515.0, 59516.0, 59517.0, 59518.0, 59519.0, 59520.0],       [59521.0, 59522.0, 59523.0, 59524.0, 59525.0, 59526.0],       [59527.0, 59528.0, 59529.0, 59530.0, 59531.0, 59532.0],       [59533.0, 59534.0, 59535.0, 59536.0, 59537.0, 59538.0],       [59539.0, 59540.0, 59541.0, 59542.0, 59543.0, 59544.0],       [59545.0, 59546.0, 59547.0, 59548.0, 59549.0, 59550.0],       [59551.0, 59552.0, 59553.0, 59554.0, 59555.0, 59556.0]],      [[59557.0, 59558.0, 59559.0, 59560.0, 59561.0, 59562.0],       [59563.0, 59564.0, 59565.0, 59566.0, 59567.0, 59568.0],       [59569.0, 59570.0, 59571.0, 59572.0, 59573.0, 59574.0],       [59575.0, 59576.0, 59577.0, 59578.0, 59579.0, 59580.0],       [59581.0, 59582.0, 59583.0, 59584.0, 59585.0, 59586.0],       [59587.0, 59588.0, 59589.0, 59590.0, 59591.0, 59592.0],       [59593.0, 59594.0, 59595.0, 59596.0, 59597.0, 59598.0]],      [[59599.0, 59600.0, 59601.0, 59602.0, 59603.0, 59604.0],       [59605.0, 59606.0, 59607.0, 59608.0, 59609.0, 59610.0],       [59611.0, 59612.0, 59613.0, 59614.0, 59615.0, 59616.0],       [59617.0, 59618.0, 59619.0, 59620.0, 59621.0, 59622.0],       [59623.0, 59624.0, 59625.0, 59626.0, 59627.0, 59628.0],       [59629.0, 59630.0, 59631.0, 59632.0, 59633.0, 59634.0],       [59635.0, 59636.0, 59637.0, 59638.0, 59639.0, 59640.0]],      [[59641.0, 59642.0, 59643.0, 59644.0, 59645.0, 59646.0],       [59647.0, 59648.0, 59649.0, 59650.0, 59651.0, 59652.0],       [59653.0, 59654.0, 59655.0, 59656.0, 59657.0, 59658.0],       [59659.0, 59660.0, 59661.0, 59662.0, 59663.0, 59664.0],       [59665.0, 59666.0, 59667.0, 59668.0, 59669.0, 59670.0],       [59671.0, 59672.0, 59673.0, 59674.0, 59675.0, 59676.0],       [59677.0, 59678.0, 59679.0, 59680.0, 59681.0, 59682.0]],      [[59683.0, 59684.0, 59685.0, 59686.0, 59687.0, 59688.0],       [59689.0, 59690.0, 59691.0, 59692.0, 59693.0, 59694.0],       [59695.0, 59696.0, 59697.0, 59698.0, 59699.0, 59700.0],       [59701.0, 59702.0, 59703.0, 59704.0, 59705.0, 59706.0],       [59707.0, 59708.0, 59709.0, 59710.0, 59711.0, 59712.0],       [59713.0, 59714.0, 59715.0, 59716.0, 59717.0, 59718.0],       [59719.0, 59720.0, 59721.0, 59722.0, 59723.0, 59724.0]]],     [[[59725.0, 59726.0, 59727.0, 59728.0, 59729.0, 59730.0],       [59731.0, 59732.0, 59733.0, 59734.0, 59735.0, 59736.0],       [59737.0, 59738.0, 59739.0, 59740.0, 59741.0, 59742.0],       [59743.0, 59744.0, 59745.0, 59746.0, 59747.0, 59748.0],       [59749.0, 59750.0, 59751.0, 59752.0, 59753.0, 59754.0],       [59755.0, 59756.0, 59757.0, 59758.0, 59759.0, 59760.0],       [59761.0, 59762.0, 59763.0, 59764.0, 59765.0, 59766.0]],      [[59767.0, 59768.0, 59769.0, 59770.0, 59771.0, 59772.0],       [59773.0, 59774.0, 59775.0, 59776.0, 59777.0, 59778.0],       [59779.0, 59780.0, 59781.0, 59782.0, 59783.0, 59784.0],       [59785.0, 59786.0, 59787.0, 59788.0, 59789.0, 59790.0],       [59791.0, 59792.0, 59793.0, 59794.0, 59795.0, 59796.0],       [59797.0, 59798.0, 59799.0, 59800.0, 59801.0, 59802.0],       [59803.0, 59804.0, 59805.0, 59806.0, 59807.0, 59808.0]],      [[59809.0, 59810.0, 59811.0, 59812.0, 59813.0, 59814.0],       [59815.0, 59816.0, 59817.0, 59818.0, 59819.0, 59820.0],       [59821.0, 59822.0, 59823.0, 59824.0, 59825.0, 59826.0],       [59827.0, 59828.0, 59829.0, 59830.0, 59831.0, 59832.0],       [59833.0, 59834.0, 59835.0, 59836.0, 59837.0, 59838.0],       [59839.0, 59840.0, 59841.0, 59842.0, 59843.0, 59844.0],       [59845.0, 59846.0, 59847.0, 59848.0, 59849.0, 59850.0]],      [[59851.0, 59852.0, 59853.0, 59854.0, 59855.0, 59856.0],       [59857.0, 59858.0, 59859.0, 59860.0, 59861.0, 59862.0],       [59863.0, 59864.0, 59865.0, 59866.0, 59867.0, 59868.0],       [59869.0, 59870.0, 59871.0, 59872.0, 59873.0, 59874.0],       [59875.0, 59876.0, 59877.0, 59878.0, 59879.0, 59880.0],       [59881.0, 59882.0, 59883.0, 59884.0, 59885.0, 59886.0],       [59887.0, 59888.0, 59889.0, 59890.0, 59891.0, 59892.0]],      [[59893.0, 59894.0, 59895.0, 59896.0, 59897.0, 59898.0],       [59899.0, 59900.0, 59901.0, 59902.0, 59903.0, 59904.0],       [59905.0, 59906.0, 59907.0, 59908.0, 59909.0, 59910.0],       [59911.0, 59912.0, 59913.0, 59914.0, 59915.0, 59916.0],       [59917.0, 59918.0, 59919.0, 59920.0, 59921.0, 59922.0],       [59923.0, 59924.0, 59925.0, 59926.0, 59927.0, 59928.0],       [59929.0, 59930.0, 59931.0, 59932.0, 59933.0, 59934.0]],      [[59935.0, 59936.0, 59937.0, 59938.0, 59939.0, 59940.0],       [59941.0, 59942.0, 59943.0, 59944.0, 59945.0, 59946.0],       [59947.0, 59948.0, 59949.0, 59950.0, 59951.0, 59952.0],       [59953.0, 59954.0, 59955.0, 59956.0, 59957.0, 59958.0],       [59959.0, 59960.0, 59961.0, 59962.0, 59963.0, 59964.0],       [59965.0, 59966.0, 59967.0, 59968.0, 59969.0, 59970.0],       [59971.0, 59972.0, 59973.0, 59974.0, 59975.0, 59976.0]]],     [[[59977.0, 59978.0, 59979.0, 59980.0, 59981.0, 59982.0],       [59983.0, 59984.0, 59985.0, 59986.0, 59987.0, 59988.0],       [59989.0, 59990.0, 59991.0, 59992.0, 59993.0, 59994.0],       [59995.0, 59996.0, 59997.0, 59998.0, 59999.0, 60000.0],       [60001.0, 60002.0, 60003.0, 60004.0, 60005.0, 60006.0],       [60007.0, 60008.0, 60009.0, 60010.0, 60011.0, 60012.0],       [60013.0, 60014.0, 60015.0, 60016.0, 60017.0, 60018.0]],      [[60019.0, 60020.0, 60021.0, 60022.0, 60023.0, 60024.0],       [60025.0, 60026.0, 60027.0, 60028.0, 60029.0, 60030.0],       [60031.0, 60032.0, 60033.0, 60034.0, 60035.0, 60036.0],       [60037.0, 60038.0, 60039.0, 60040.0, 60041.0, 60042.0],       [60043.0, 60044.0, 60045.0, 60046.0, 60047.0, 60048.0],       [60049.0, 60050.0, 60051.0, 60052.0, 60053.0, 60054.0],       [60055.0, 60056.0, 60057.0, 60058.0, 60059.0, 60060.0]],      [[60061.0, 60062.0, 60063.0, 60064.0, 60065.0, 60066.0],       [60067.0, 60068.0, 60069.0, 60070.0, 60071.0, 60072.0],       [60073.0, 60074.0, 60075.0, 60076.0, 60077.0, 60078.0],       [60079.0, 60080.0, 60081.0, 60082.0, 60083.0, 60084.0],       [60085.0, 60086.0, 60087.0, 60088.0, 60089.0, 60090.0],       [60091.0, 60092.0, 60093.0, 60094.0, 60095.0, 60096.0],       [60097.0, 60098.0, 60099.0, 60100.0, 60101.0, 60102.0]],      [[60103.0, 60104.0, 60105.0, 60106.0, 60107.0, 60108.0],       [60109.0, 60110.0, 60111.0, 60112.0, 60113.0, 60114.0],       [60115.0, 60116.0, 60117.0, 60118.0, 60119.0, 60120.0],       [60121.0, 60122.0, 60123.0, 60124.0, 60125.0, 60126.0],       [60127.0, 60128.0, 60129.0, 60130.0, 60131.0, 60132.0],       [60133.0, 60134.0, 60135.0, 60136.0, 60137.0, 60138.0],       [60139.0, 60140.0, 60141.0, 60142.0, 60143.0, 60144.0]],      [[60145.0, 60146.0, 60147.0, 60148.0, 60149.0, 60150.0],       [60151.0, 60152.0, 60153.0, 60154.0, 60155.0, 60156.0],       [60157.0, 60158.0, 60159.0, 60160.0, 60161.0, 60162.0],       [60163.0, 60164.0, 60165.0, 60166.0, 60167.0, 60168.0],       [60169.0, 60170.0, 60171.0, 60172.0, 60173.0, 60174.0],       [60175.0, 60176.0, 60177.0, 60178.0, 60179.0, 60180.0],       [60181.0, 60182.0, 60183.0, 60184.0, 60185.0, 60186.0]],      [[60187.0, 60188.0, 60189.0, 60190.0, 60191.0, 60192.0],       [60193.0, 60194.0, 60195.0, 60196.0, 60197.0, 60198.0],       [60199.0, 60200.0, 60201.0, 60202.0, 60203.0, 60204.0],       [60205.0, 60206.0, 60207.0, 60208.0, 60209.0, 60210.0],       [60211.0, 60212.0, 60213.0, 60214.0, 60215.0, 60216.0],       [60217.0, 60218.0, 60219.0, 60220.0, 60221.0, 60222.0],       [60223.0, 60224.0, 60225.0, 60226.0, 60227.0, 60228.0]]],     [[[60229.0, 60230.0, 60231.0, 60232.0, 60233.0, 60234.0],       [60235.0, 60236.0, 60237.0, 60238.0, 60239.0, 60240.0],       [60241.0, 60242.0, 60243.0, 60244.0, 60245.0, 60246.0],       [60247.0, 60248.0, 60249.0, 60250.0, 60251.0, 60252.0],       [60253.0, 60254.0, 60255.0, 60256.0, 60257.0, 60258.0],       [60259.0, 60260.0, 60261.0, 60262.0, 60263.0, 60264.0],       [60265.0, 60266.0, 60267.0, 60268.0, 60269.0, 60270.0]],      [[60271.0, 60272.0, 60273.0, 60274.0, 60275.0, 60276.0],       [60277.0, 60278.0, 60279.0, 60280.0, 60281.0, 60282.0],       [60283.0, 60284.0, 60285.0, 60286.0, 60287.0, 60288.0],       [60289.0, 60290.0, 60291.0, 60292.0, 60293.0, 60294.0],       [60295.0, 60296.0, 60297.0, 60298.0, 60299.0, 60300.0],       [60301.0, 60302.0, 60303.0, 60304.0, 60305.0, 60306.0],       [60307.0, 60308.0, 60309.0, 60310.0, 60311.0, 60312.0]],      [[60313.0, 60314.0, 60315.0, 60316.0, 60317.0, 60318.0],       [60319.0, 60320.0, 60321.0, 60322.0, 60323.0, 60324.0],       [60325.0, 60326.0, 60327.0, 60328.0, 60329.0, 60330.0],       [60331.0, 60332.0, 60333.0, 60334.0, 60335.0, 60336.0],       [60337.0, 60338.0, 60339.0, 60340.0, 60341.0, 60342.0],       [60343.0, 60344.0, 60345.0, 60346.0, 60347.0, 60348.0],       [60349.0, 60350.0, 60351.0, 60352.0, 60353.0, 60354.0]],      [[60355.0, 60356.0, 60357.0, 60358.0, 60359.0, 60360.0],       [60361.0, 60362.0, 60363.0, 60364.0, 60365.0, 60366.0],       [60367.0, 60368.0, 60369.0, 60370.0, 60371.0, 60372.0],       [60373.0, 60374.0, 60375.0, 60376.0, 60377.0, 60378.0],       [60379.0, 60380.0, 60381.0, 60382.0, 60383.0, 60384.0],       [60385.0, 60386.0, 60387.0, 60388.0, 60389.0, 60390.0],       [60391.0, 60392.0, 60393.0, 60394.0, 60395.0, 60396.0]],      [[60397.0, 60398.0, 60399.0, 60400.0, 60401.0, 60402.0],       [60403.0, 60404.0, 60405.0, 60406.0, 60407.0, 60408.0],       [60409.0, 60410.0, 60411.0, 60412.0, 60413.0, 60414.0],       [60415.0, 60416.0, 60417.0, 60418.0, 60419.0, 60420.0],       [60421.0, 60422.0, 60423.0, 60424.0, 60425.0, 60426.0],       [60427.0, 60428.0, 60429.0, 60430.0, 60431.0, 60432.0],       [60433.0, 60434.0, 60435.0, 60436.0, 60437.0, 60438.0]],      [[60439.0, 60440.0, 60441.0, 60442.0, 60443.0, 60444.0],       [60445.0, 60446.0, 60447.0, 60448.0, 60449.0, 60450.0],       [60451.0, 60452.0, 60453.0, 60454.0, 60455.0, 60456.0],       [60457.0, 60458.0, 60459.0, 60460.0, 60461.0, 60462.0],       [60463.0, 60464.0, 60465.0, 60466.0, 60467.0, 60468.0],       [60469.0, 60470.0, 60471.0, 60472.0, 60473.0, 60474.0],       [60475.0, 60476.0, 60477.0, 60478.0, 60479.0, 60480.0]]]]],   [[[[[60481.0, 60482.0, 60483.0, 60484.0, 60485.0, 60486.0],       [60487.0, 60488.0, 60489.0, 60490.0, 60491.0, 60492.0],       [60493.0, 60494.0, 60495.0, 60496.0, 60497.0, 60498.0],       [60499.0, 60500.0, 60501.0, 60502.0, 60503.0, 60504.0],       [60505.0, 60506.0, 60507.0, 60508.0, 60509.0, 60510.0],       [60511.0, 60512.0, 60513.0, 60514.0, 60515.0, 60516.0],       [60517.0, 60518.0, 60519.0, 60520.0, 60521.0, 60522.0]],      [[60523.0, 60524.0, 60525.0, 60526.0, 60527.0, 60528.0],       [60529.0, 60530.0, 60531.0, 60532.0, 60533.0, 60534.0],       [60535.0, 60536.0, 60537.0, 60538.0, 60539.0, 60540.0],       [60541.0, 60542.0, 60543.0, 60544.0, 60545.0, 60546.0],       [60547.0, 60548.0, 60549.0, 60550.0, 60551.0, 60552.0],       [60553.0, 60554.0, 60555.0, 60556.0, 60557.0, 60558.0],       [60559.0, 60560.0, 60561.0, 60562.0, 60563.0, 60564.0]],      [[60565.0, 60566.0, 60567.0, 60568.0, 60569.0, 60570.0],       [60571.0, 60572.0, 60573.0, 60574.0, 60575.0, 60576.0],       [60577.0, 60578.0, 60579.0, 60580.0, 60581.0, 60582.0],       [60583.0, 60584.0, 60585.0, 60586.0, 60587.0, 60588.0],       [60589.0, 60590.0, 60591.0, 60592.0, 60593.0, 60594.0],       [60595.0, 60596.0, 60597.0, 60598.0, 60599.0, 60600.0],       [60601.0, 60602.0, 60603.0, 60604.0, 60605.0, 60606.0]],      [[60607.0, 60608.0, 60609.0, 60610.0, 60611.0, 60612.0],       [60613.0, 60614.0, 60615.0, 60616.0, 60617.0, 60618.0],       [60619.0, 60620.0, 60621.0, 60622.0, 60623.0, 60624.0],       [60625.0, 60626.0, 60627.0, 60628.0, 60629.0, 60630.0],       [60631.0, 60632.0, 60633.0, 60634.0, 60635.0, 60636.0],       [60637.0, 60638.0, 60639.0, 60640.0, 60641.0, 60642.0],       [60643.0, 60644.0, 60645.0, 60646.0, 60647.0, 60648.0]],      [[60649.0, 60650.0, 60651.0, 60652.0, 60653.0, 60654.0],       [60655.0, 60656.0, 60657.0, 60658.0, 60659.0, 60660.0],       [60661.0, 60662.0, 60663.0, 60664.0, 60665.0, 60666.0],       [60667.0, 60668.0, 60669.0, 60670.0, 60671.0, 60672.0],       [60673.0, 60674.0, 60675.0, 60676.0, 60677.0, 60678.0],       [60679.0, 60680.0, 60681.0, 60682.0, 60683.0, 60684.0],       [60685.0, 60686.0, 60687.0, 60688.0, 60689.0, 60690.0]],      [[60691.0, 60692.0, 60693.0, 60694.0, 60695.0, 60696.0],       [60697.0, 60698.0, 60699.0, 60700.0, 60701.0, 60702.0],       [60703.0, 60704.0, 60705.0, 60706.0, 60707.0, 60708.0],       [60709.0, 60710.0, 60711.0, 60712.0, 60713.0, 60714.0],       [60715.0, 60716.0, 60717.0, 60718.0, 60719.0, 60720.0],       [60721.0, 60722.0, 60723.0, 60724.0, 60725.0, 60726.0],       [60727.0, 60728.0, 60729.0, 60730.0, 60731.0, 60732.0]]],     [[[60733.0, 60734.0, 60735.0, 60736.0, 60737.0, 60738.0],       [60739.0, 60740.0, 60741.0, 60742.0, 60743.0, 60744.0],       [60745.0, 60746.0, 60747.0, 60748.0, 60749.0, 60750.0],       [60751.0, 60752.0, 60753.0, 60754.0, 60755.0, 60756.0],       [60757.0, 60758.0, 60759.0, 60760.0, 60761.0, 60762.0],       [60763.0, 60764.0, 60765.0, 60766.0, 60767.0, 60768.0],       [60769.0, 60770.0, 60771.0, 60772.0, 60773.0, 60774.0]],      [[60775.0, 60776.0, 60777.0, 60778.0, 60779.0, 60780.0],       [60781.0, 60782.0, 60783.0, 60784.0, 60785.0, 60786.0],       [60787.0, 60788.0, 60789.0, 60790.0, 60791.0, 60792.0],       [60793.0, 60794.0, 60795.0, 60796.0, 60797.0, 60798.0],       [60799.0, 60800.0, 60801.0, 60802.0, 60803.0, 60804.0],       [60805.0, 60806.0, 60807.0, 60808.0, 60809.0, 60810.0],       [60811.0, 60812.0, 60813.0, 60814.0, 60815.0, 60816.0]],      [[60817.0, 60818.0, 60819.0, 60820.0, 60821.0, 60822.0],       [60823.0, 60824.0, 60825.0, 60826.0, 60827.0, 60828.0],       [60829.0, 60830.0, 60831.0, 60832.0, 60833.0, 60834.0],       [60835.0, 60836.0, 60837.0, 60838.0, 60839.0, 60840.0],       [60841.0, 60842.0, 60843.0, 60844.0, 60845.0, 60846.0],       [60847.0, 60848.0, 60849.0, 60850.0, 60851.0, 60852.0],       [60853.0, 60854.0, 60855.0, 60856.0, 60857.0, 60858.0]],      [[60859.0, 60860.0, 60861.0, 60862.0, 60863.0, 60864.0],       [60865.0, 60866.0, 60867.0, 60868.0, 60869.0, 60870.0],       [60871.0, 60872.0, 60873.0, 60874.0, 60875.0, 60876.0],       [60877.0, 60878.0, 60879.0, 60880.0, 60881.0, 60882.0],       [60883.0, 60884.0, 60885.0, 60886.0, 60887.0, 60888.0],       [60889.0, 60890.0, 60891.0, 60892.0, 60893.0, 60894.0],       [60895.0, 60896.0, 60897.0, 60898.0, 60899.0, 60900.0]],      [[60901.0, 60902.0, 60903.0, 60904.0, 60905.0, 60906.0],       [60907.0, 60908.0, 60909.0, 60910.0, 60911.0, 60912.0],       [60913.0, 60914.0, 60915.0, 60916.0, 60917.0, 60918.0],       [60919.0, 60920.0, 60921.0, 60922.0, 60923.0, 60924.0],       [60925.0, 60926.0, 60927.0, 60928.0, 60929.0, 60930.0],       [60931.0, 60932.0, 60933.0, 60934.0, 60935.0, 60936.0],       [60937.0, 60938.0, 60939.0, 60940.0, 60941.0, 60942.0]],      [[60943.0, 60944.0, 60945.0, 60946.0, 60947.0, 60948.0],       [60949.0, 60950.0, 60951.0, 60952.0, 60953.0, 60954.0],       [60955.0, 60956.0, 60957.0, 60958.0, 60959.0, 60960.0],       [60961.0, 60962.0, 60963.0, 60964.0, 60965.0, 60966.0],       [60967.0, 60968.0, 60969.0, 60970.0, 60971.0, 60972.0],       [60973.0, 60974.0, 60975.0, 60976.0, 60977.0, 60978.0],       [60979.0, 60980.0, 60981.0, 60982.0, 60983.0, 60984.0]]],     [[[60985.0, 60986.0, 60987.0, 60988.0, 60989.0, 60990.0],       [60991.0, 60992.0, 60993.0, 60994.0, 60995.0, 60996.0],       [60997.0, 60998.0, 60999.0, 61000.0, 61001.0, 61002.0],       [61003.0, 61004.0, 61005.0, 61006.0, 61007.0, 61008.0],       [61009.0, 61010.0, 61011.0, 61012.0, 61013.0, 61014.0],       [61015.0, 61016.0, 61017.0, 61018.0, 61019.0, 61020.0],       [61021.0, 61022.0, 61023.0, 61024.0, 61025.0, 61026.0]],      [[61027.0, 61028.0, 61029.0, 61030.0, 61031.0, 61032.0],       [61033.0, 61034.0, 61035.0, 61036.0, 61037.0, 61038.0],       [61039.0, 61040.0, 61041.0, 61042.0, 61043.0, 61044.0],       [61045.0, 61046.0, 61047.0, 61048.0, 61049.0, 61050.0],       [61051.0, 61052.0, 61053.0, 61054.0, 61055.0, 61056.0],       [61057.0, 61058.0, 61059.0, 61060.0, 61061.0, 61062.0],       [61063.0, 61064.0, 61065.0, 61066.0, 61067.0, 61068.0]],      [[61069.0, 61070.0, 61071.0, 61072.0, 61073.0, 61074.0],       [61075.0, 61076.0, 61077.0, 61078.0, 61079.0, 61080.0],       [61081.0, 61082.0, 61083.0, 61084.0, 61085.0, 61086.0],       [61087.0, 61088.0, 61089.0, 61090.0, 61091.0, 61092.0],       [61093.0, 61094.0, 61095.0, 61096.0, 61097.0, 61098.0],       [61099.0, 61100.0, 61101.0, 61102.0, 61103.0, 61104.0],       [61105.0, 61106.0, 61107.0, 61108.0, 61109.0, 61110.0]],      [[61111.0, 61112.0, 61113.0, 61114.0, 61115.0, 61116.0],       [61117.0, 61118.0, 61119.0, 61120.0, 61121.0, 61122.0],       [61123.0, 61124.0, 61125.0, 61126.0, 61127.0, 61128.0],       [61129.0, 61130.0, 61131.0, 61132.0, 61133.0, 61134.0],       [61135.0, 61136.0, 61137.0, 61138.0, 61139.0, 61140.0],       [61141.0, 61142.0, 61143.0, 61144.0, 61145.0, 61146.0],       [61147.0, 61148.0, 61149.0, 61150.0, 61151.0, 61152.0]],      [[61153.0, 61154.0, 61155.0, 61156.0, 61157.0, 61158.0],       [61159.0, 61160.0, 61161.0, 61162.0, 61163.0, 61164.0],       [61165.0, 61166.0, 61167.0, 61168.0, 61169.0, 61170.0],       [61171.0, 61172.0, 61173.0, 61174.0, 61175.0, 61176.0],       [61177.0, 61178.0, 61179.0, 61180.0, 61181.0, 61182.0],       [61183.0, 61184.0, 61185.0, 61186.0, 61187.0, 61188.0],       [61189.0, 61190.0, 61191.0, 61192.0, 61193.0, 61194.0]],      [[61195.0, 61196.0, 61197.0, 61198.0, 61199.0, 61200.0],       [61201.0, 61202.0, 61203.0, 61204.0, 61205.0, 61206.0],       [61207.0, 61208.0, 61209.0, 61210.0, 61211.0, 61212.0],       [61213.0, 61214.0, 61215.0, 61216.0, 61217.0, 61218.0],       [61219.0, 61220.0, 61221.0, 61222.0, 61223.0, 61224.0],       [61225.0, 61226.0, 61227.0, 61228.0, 61229.0, 61230.0],       [61231.0, 61232.0, 61233.0, 61234.0, 61235.0, 61236.0]]],     [[[61237.0, 61238.0, 61239.0, 61240.0, 61241.0, 61242.0],       [61243.0, 61244.0, 61245.0, 61246.0, 61247.0, 61248.0],       [61249.0, 61250.0, 61251.0, 61252.0, 61253.0, 61254.0],       [61255.0, 61256.0, 61257.0, 61258.0, 61259.0, 61260.0],       [61261.0, 61262.0, 61263.0, 61264.0, 61265.0, 61266.0],       [61267.0, 61268.0, 61269.0, 61270.0, 61271.0, 61272.0],       [61273.0, 61274.0, 61275.0, 61276.0, 61277.0, 61278.0]],      [[61279.0, 61280.0, 61281.0, 61282.0, 61283.0, 61284.0],       [61285.0, 61286.0, 61287.0, 61288.0, 61289.0, 61290.0],       [61291.0, 61292.0, 61293.0, 61294.0, 61295.0, 61296.0],       [61297.0, 61298.0, 61299.0, 61300.0, 61301.0, 61302.0],       [61303.0, 61304.0, 61305.0, 61306.0, 61307.0, 61308.0],       [61309.0, 61310.0, 61311.0, 61312.0, 61313.0, 61314.0],       [61315.0, 61316.0, 61317.0, 61318.0, 61319.0, 61320.0]],      [[61321.0, 61322.0, 61323.0, 61324.0, 61325.0, 61326.0],       [61327.0, 61328.0, 61329.0, 61330.0, 61331.0, 61332.0],       [61333.0, 61334.0, 61335.0, 61336.0, 61337.0, 61338.0],       [61339.0, 61340.0, 61341.0, 61342.0, 61343.0, 61344.0],       [61345.0, 61346.0, 61347.0, 61348.0, 61349.0, 61350.0],       [61351.0, 61352.0, 61353.0, 61354.0, 61355.0, 61356.0],       [61357.0, 61358.0, 61359.0, 61360.0, 61361.0, 61362.0]],      [[61363.0, 61364.0, 61365.0, 61366.0, 61367.0, 61368.0],       [61369.0, 61370.0, 61371.0, 61372.0, 61373.0, 61374.0],       [61375.0, 61376.0, 61377.0, 61378.0, 61379.0, 61380.0],       [61381.0, 61382.0, 61383.0, 61384.0, 61385.0, 61386.0],       [61387.0, 61388.0, 61389.0, 61390.0, 61391.0, 61392.0],       [61393.0, 61394.0, 61395.0, 61396.0, 61397.0, 61398.0],       [61399.0, 61400.0, 61401.0, 61402.0, 61403.0, 61404.0]],      [[61405.0, 61406.0, 61407.0, 61408.0, 61409.0, 61410.0],       [61411.0, 61412.0, 61413.0, 61414.0, 61415.0, 61416.0],       [61417.0, 61418.0, 61419.0, 61420.0, 61421.0, 61422.0],       [61423.0, 61424.0, 61425.0, 61426.0, 61427.0, 61428.0],       [61429.0, 61430.0, 61431.0, 61432.0, 61433.0, 61434.0],       [61435.0, 61436.0, 61437.0, 61438.0, 61439.0, 61440.0],       [61441.0, 61442.0, 61443.0, 61444.0, 61445.0, 61446.0]],      [[61447.0, 61448.0, 61449.0, 61450.0, 61451.0, 61452.0],       [61453.0, 61454.0, 61455.0, 61456.0, 61457.0, 61458.0],       [61459.0, 61460.0, 61461.0, 61462.0, 61463.0, 61464.0],       [61465.0, 61466.0, 61467.0, 61468.0, 61469.0, 61470.0],       [61471.0, 61472.0, 61473.0, 61474.0, 61475.0, 61476.0],       [61477.0, 61478.0, 61479.0, 61480.0, 61481.0, 61482.0],       [61483.0, 61484.0, 61485.0, 61486.0, 61487.0, 61488.0]]]],    [[[[61489.0, 61490.0, 61491.0, 61492.0, 61493.0, 61494.0],       [61495.0, 61496.0, 61497.0, 61498.0, 61499.0, 61500.0],       [61501.0, 61502.0, 61503.0, 61504.0, 61505.0, 61506.0],       [61507.0, 61508.0, 61509.0, 61510.0, 61511.0, 61512.0],       [61513.0, 61514.0, 61515.0, 61516.0, 61517.0, 61518.0],       [61519.0, 61520.0, 61521.0, 61522.0, 61523.0, 61524.0],       [61525.0, 61526.0, 61527.0, 61528.0, 61529.0, 61530.0]],      [[61531.0, 61532.0, 61533.0, 61534.0, 61535.0, 61536.0],       [61537.0, 61538.0, 61539.0, 61540.0, 61541.0, 61542.0],       [61543.0, 61544.0, 61545.0, 61546.0, 61547.0, 61548.0],       [61549.0, 61550.0, 61551.0, 61552.0, 61553.0, 61554.0],       [61555.0, 61556.0, 61557.0, 61558.0, 61559.0, 61560.0],       [61561.0, 61562.0, 61563.0, 61564.0, 61565.0, 61566.0],       [61567.0, 61568.0, 61569.0, 61570.0, 61571.0, 61572.0]],      [[61573.0, 61574.0, 61575.0, 61576.0, 61577.0, 61578.0],       [61579.0, 61580.0, 61581.0, 61582.0, 61583.0, 61584.0],       [61585.0, 61586.0, 61587.0, 61588.0, 61589.0, 61590.0],       [61591.0, 61592.0, 61593.0, 61594.0, 61595.0, 61596.0],       [61597.0, 61598.0, 61599.0, 61600.0, 61601.0, 61602.0],       [61603.0, 61604.0, 61605.0, 61606.0, 61607.0, 61608.0],       [61609.0, 61610.0, 61611.0, 61612.0, 61613.0, 61614.0]],      [[61615.0, 61616.0, 61617.0, 61618.0, 61619.0, 61620.0],       [61621.0, 61622.0, 61623.0, 61624.0, 61625.0, 61626.0],       [61627.0, 61628.0, 61629.0, 61630.0, 61631.0, 61632.0],       [61633.0, 61634.0, 61635.0, 61636.0, 61637.0, 61638.0],       [61639.0, 61640.0, 61641.0, 61642.0, 61643.0, 61644.0],       [61645.0, 61646.0, 61647.0, 61648.0, 61649.0, 61650.0],       [61651.0, 61652.0, 61653.0, 61654.0, 61655.0, 61656.0]],      [[61657.0, 61658.0, 61659.0, 61660.0, 61661.0, 61662.0],       [61663.0, 61664.0, 61665.0, 61666.0, 61667.0, 61668.0],       [61669.0, 61670.0, 61671.0, 61672.0, 61673.0, 61674.0],       [61675.0, 61676.0, 61677.0, 61678.0, 61679.0, 61680.0],       [61681.0, 61682.0, 61683.0, 61684.0, 61685.0, 61686.0],       [61687.0, 61688.0, 61689.0, 61690.0, 61691.0, 61692.0],       [61693.0, 61694.0, 61695.0, 61696.0, 61697.0, 61698.0]],      [[61699.0, 61700.0, 61701.0, 61702.0, 61703.0, 61704.0],       [61705.0, 61706.0, 61707.0, 61708.0, 61709.0, 61710.0],       [61711.0, 61712.0, 61713.0, 61714.0, 61715.0, 61716.0],       [61717.0, 61718.0, 61719.0, 61720.0, 61721.0, 61722.0],       [61723.0, 61724.0, 61725.0, 61726.0, 61727.0, 61728.0],       [61729.0, 61730.0, 61731.0, 61732.0, 61733.0, 61734.0],       [61735.0, 61736.0, 61737.0, 61738.0, 61739.0, 61740.0]]],     [[[61741.0, 61742.0, 61743.0, 61744.0, 61745.0, 61746.0],       [61747.0, 61748.0, 61749.0, 61750.0, 61751.0, 61752.0],       [61753.0, 61754.0, 61755.0, 61756.0, 61757.0, 61758.0],       [61759.0, 61760.0, 61761.0, 61762.0, 61763.0, 61764.0],       [61765.0, 61766.0, 61767.0, 61768.0, 61769.0, 61770.0],       [61771.0, 61772.0, 61773.0, 61774.0, 61775.0, 61776.0],       [61777.0, 61778.0, 61779.0, 61780.0, 61781.0, 61782.0]],      [[61783.0, 61784.0, 61785.0, 61786.0, 61787.0, 61788.0],       [61789.0, 61790.0, 61791.0, 61792.0, 61793.0, 61794.0],       [61795.0, 61796.0, 61797.0, 61798.0, 61799.0, 61800.0],       [61801.0, 61802.0, 61803.0, 61804.0, 61805.0, 61806.0],       [61807.0, 61808.0, 61809.0, 61810.0, 61811.0, 61812.0],       [61813.0, 61814.0, 61815.0, 61816.0, 61817.0, 61818.0],       [61819.0, 61820.0, 61821.0, 61822.0, 61823.0, 61824.0]],      [[61825.0, 61826.0, 61827.0, 61828.0, 61829.0, 61830.0],       [61831.0, 61832.0, 61833.0, 61834.0, 61835.0, 61836.0],       [61837.0, 61838.0, 61839.0, 61840.0, 61841.0, 61842.0],       [61843.0, 61844.0, 61845.0, 61846.0, 61847.0, 61848.0],       [61849.0, 61850.0, 61851.0, 61852.0, 61853.0, 61854.0],       [61855.0, 61856.0, 61857.0, 61858.0, 61859.0, 61860.0],       [61861.0, 61862.0, 61863.0, 61864.0, 61865.0, 61866.0]],      [[61867.0, 61868.0, 61869.0, 61870.0, 61871.0, 61872.0],       [61873.0, 61874.0, 61875.0, 61876.0, 61877.0, 61878.0],       [61879.0, 61880.0, 61881.0, 61882.0, 61883.0, 61884.0],       [61885.0, 61886.0, 61887.0, 61888.0, 61889.0, 61890.0],       [61891.0, 61892.0, 61893.0, 61894.0, 61895.0, 61896.0],       [61897.0, 61898.0, 61899.0, 61900.0, 61901.0, 61902.0],       [61903.0, 61904.0, 61905.0, 61906.0, 61907.0, 61908.0]],      [[61909.0, 61910.0, 61911.0, 61912.0, 61913.0, 61914.0],       [61915.0, 61916.0, 61917.0, 61918.0, 61919.0, 61920.0],       [61921.0, 61922.0, 61923.0, 61924.0, 61925.0, 61926.0],       [61927.0, 61928.0, 61929.0, 61930.0, 61931.0, 61932.0],       [61933.0, 61934.0, 61935.0, 61936.0, 61937.0, 61938.0],       [61939.0, 61940.0, 61941.0, 61942.0, 61943.0, 61944.0],       [61945.0, 61946.0, 61947.0, 61948.0, 61949.0, 61950.0]],      [[61951.0, 61952.0, 61953.0, 61954.0, 61955.0, 61956.0],       [61957.0, 61958.0, 61959.0, 61960.0, 61961.0, 61962.0],       [61963.0, 61964.0, 61965.0, 61966.0, 61967.0, 61968.0],       [61969.0, 61970.0, 61971.0, 61972.0, 61973.0, 61974.0],       [61975.0, 61976.0, 61977.0, 61978.0, 61979.0, 61980.0],       [61981.0, 61982.0, 61983.0, 61984.0, 61985.0, 61986.0],       [61987.0, 61988.0, 61989.0, 61990.0, 61991.0, 61992.0]]],     [[[61993.0, 61994.0, 61995.0, 61996.0, 61997.0, 61998.0],       [61999.0, 62000.0, 62001.0, 62002.0, 62003.0, 62004.0],       [62005.0, 62006.0, 62007.0, 62008.0, 62009.0, 62010.0],       [62011.0, 62012.0, 62013.0, 62014.0, 62015.0, 62016.0],       [62017.0, 62018.0, 62019.0, 62020.0, 62021.0, 62022.0],       [62023.0, 62024.0, 62025.0, 62026.0, 62027.0, 62028.0],       [62029.0, 62030.0, 62031.0, 62032.0, 62033.0, 62034.0]],      [[62035.0, 62036.0, 62037.0, 62038.0, 62039.0, 62040.0],       [62041.0, 62042.0, 62043.0, 62044.0, 62045.0, 62046.0],       [62047.0, 62048.0, 62049.0, 62050.0, 62051.0, 62052.0],       [62053.0, 62054.0, 62055.0, 62056.0, 62057.0, 62058.0],       [62059.0, 62060.0, 62061.0, 62062.0, 62063.0, 62064.0],       [62065.0, 62066.0, 62067.0, 62068.0, 62069.0, 62070.0],       [62071.0, 62072.0, 62073.0, 62074.0, 62075.0, 62076.0]],      [[62077.0, 62078.0, 62079.0, 62080.0, 62081.0, 62082.0],       [62083.0, 62084.0, 62085.0, 62086.0, 62087.0, 62088.0],       [62089.0, 62090.0, 62091.0, 62092.0, 62093.0, 62094.0],       [62095.0, 62096.0, 62097.0, 62098.0, 62099.0, 62100.0],       [62101.0, 62102.0, 62103.0, 62104.0, 62105.0, 62106.0],       [62107.0, 62108.0, 62109.0, 62110.0, 62111.0, 62112.0],       [62113.0, 62114.0, 62115.0, 62116.0, 62117.0, 62118.0]],      [[62119.0, 62120.0, 62121.0, 62122.0, 62123.0, 62124.0],       [62125.0, 62126.0, 62127.0, 62128.0, 62129.0, 62130.0],       [62131.0, 62132.0, 62133.0, 62134.0, 62135.0, 62136.0],       [62137.0, 62138.0, 62139.0, 62140.0, 62141.0, 62142.0],       [62143.0, 62144.0, 62145.0, 62146.0, 62147.0, 62148.0],       [62149.0, 62150.0, 62151.0, 62152.0, 62153.0, 62154.0],       [62155.0, 62156.0, 62157.0, 62158.0, 62159.0, 62160.0]],      [[62161.0, 62162.0, 62163.0, 62164.0, 62165.0, 62166.0],       [62167.0, 62168.0, 62169.0, 62170.0, 62171.0, 62172.0],       [62173.0, 62174.0, 62175.0, 62176.0, 62177.0, 62178.0],       [62179.0, 62180.0, 62181.0, 62182.0, 62183.0, 62184.0],       [62185.0, 62186.0, 62187.0, 62188.0, 62189.0, 62190.0],       [62191.0, 62192.0, 62193.0, 62194.0, 62195.0, 62196.0],       [62197.0, 62198.0, 62199.0, 62200.0, 62201.0, 62202.0]],      [[62203.0, 62204.0, 62205.0, 62206.0, 62207.0, 62208.0],       [62209.0, 62210.0, 62211.0, 62212.0, 62213.0, 62214.0],       [62215.0, 62216.0, 62217.0, 62218.0, 62219.0, 62220.0],       [62221.0, 62222.0, 62223.0, 62224.0, 62225.0, 62226.0],       [62227.0, 62228.0, 62229.0, 62230.0, 62231.0, 62232.0],       [62233.0, 62234.0, 62235.0, 62236.0, 62237.0, 62238.0],       [62239.0, 62240.0, 62241.0, 62242.0, 62243.0, 62244.0]]],     [[[62245.0, 62246.0, 62247.0, 62248.0, 62249.0, 62250.0],       [62251.0, 62252.0, 62253.0, 62254.0, 62255.0, 62256.0],       [62257.0, 62258.0, 62259.0, 62260.0, 62261.0, 62262.0],       [62263.0, 62264.0, 62265.0, 62266.0, 62267.0, 62268.0],       [62269.0, 62270.0, 62271.0, 62272.0, 62273.0, 62274.0],       [62275.0, 62276.0, 62277.0, 62278.0, 62279.0, 62280.0],       [62281.0, 62282.0, 62283.0, 62284.0, 62285.0, 62286.0]],      [[62287.0, 62288.0, 62289.0, 62290.0, 62291.0, 62292.0],       [62293.0, 62294.0, 62295.0, 62296.0, 62297.0, 62298.0],       [62299.0, 62300.0, 62301.0, 62302.0, 62303.0, 62304.0],       [62305.0, 62306.0, 62307.0, 62308.0, 62309.0, 62310.0],       [62311.0, 62312.0, 62313.0, 62314.0, 62315.0, 62316.0],       [62317.0, 62318.0, 62319.0, 62320.0, 62321.0, 62322.0],       [62323.0, 62324.0, 62325.0, 62326.0, 62327.0, 62328.0]],      [[62329.0, 62330.0, 62331.0, 62332.0, 62333.0, 62334.0],       [62335.0, 62336.0, 62337.0, 62338.0, 62339.0, 62340.0],       [62341.0, 62342.0, 62343.0, 62344.0, 62345.0, 62346.0],       [62347.0, 62348.0, 62349.0, 62350.0, 62351.0, 62352.0],       [62353.0, 62354.0, 62355.0, 62356.0, 62357.0, 62358.0],       [62359.0, 62360.0, 62361.0, 62362.0, 62363.0, 62364.0],       [62365.0, 62366.0, 62367.0, 62368.0, 62369.0, 62370.0]],      [[62371.0, 62372.0, 62373.0, 62374.0, 62375.0, 62376.0],       [62377.0, 62378.0, 62379.0, 62380.0, 62381.0, 62382.0],       [62383.0, 62384.0, 62385.0, 62386.0, 62387.0, 62388.0],       [62389.0, 62390.0, 62391.0, 62392.0, 62393.0, 62394.0],       [62395.0, 62396.0, 62397.0, 62398.0, 62399.0, 62400.0],       [62401.0, 62402.0, 62403.0, 62404.0, 62405.0, 62406.0],       [62407.0, 62408.0, 62409.0, 62410.0, 62411.0, 62412.0]],      [[62413.0, 62414.0, 62415.0, 62416.0, 62417.0, 62418.0],       [62419.0, 62420.0, 62421.0, 62422.0, 62423.0, 62424.0],       [62425.0, 62426.0, 62427.0, 62428.0, 62429.0, 62430.0],       [62431.0, 62432.0, 62433.0, 62434.0, 62435.0, 62436.0],       [62437.0, 62438.0, 62439.0, 62440.0, 62441.0, 62442.0],       [62443.0, 62444.0, 62445.0, 62446.0, 62447.0, 62448.0],       [62449.0, 62450.0, 62451.0, 62452.0, 62453.0, 62454.0]],      [[62455.0, 62456.0, 62457.0, 62458.0, 62459.0, 62460.0],       [62461.0, 62462.0, 62463.0, 62464.0, 62465.0, 62466.0],       [62467.0, 62468.0, 62469.0, 62470.0, 62471.0, 62472.0],       [62473.0, 62474.0, 62475.0, 62476.0, 62477.0, 62478.0],       [62479.0, 62480.0, 62481.0, 62482.0, 62483.0, 62484.0],       [62485.0, 62486.0, 62487.0, 62488.0, 62489.0, 62490.0],       [62491.0, 62492.0, 62493.0, 62494.0, 62495.0, 62496.0]]]],    [[[[62497.0, 62498.0, 62499.0, 62500.0, 62501.0, 62502.0],       [62503.0, 62504.0, 62505.0, 62506.0, 62507.0, 62508.0],       [62509.0, 62510.0, 62511.0, 62512.0, 62513.0, 62514.0],       [62515.0, 62516.0, 62517.0, 62518.0, 62519.0, 62520.0],       [62521.0, 62522.0, 62523.0, 62524.0, 62525.0, 62526.0],       [62527.0, 62528.0, 62529.0, 62530.0, 62531.0, 62532.0],       [62533.0, 62534.0, 62535.0, 62536.0, 62537.0, 62538.0]],      [[62539.0, 62540.0, 62541.0, 62542.0, 62543.0, 62544.0],       [62545.0, 62546.0, 62547.0, 62548.0, 62549.0, 62550.0],       [62551.0, 62552.0, 62553.0, 62554.0, 62555.0, 62556.0],       [62557.0, 62558.0, 62559.0, 62560.0, 62561.0, 62562.0],       [62563.0, 62564.0, 62565.0, 62566.0, 62567.0, 62568.0],       [62569.0, 62570.0, 62571.0, 62572.0, 62573.0, 62574.0],       [62575.0, 62576.0, 62577.0, 62578.0, 62579.0, 62580.0]],      [[62581.0, 62582.0, 62583.0, 62584.0, 62585.0, 62586.0],       [62587.0, 62588.0, 62589.0, 62590.0, 62591.0, 62592.0],       [62593.0, 62594.0, 62595.0, 62596.0, 62597.0, 62598.0],       [62599.0, 62600.0, 62601.0, 62602.0, 62603.0, 62604.0],       [62605.0, 62606.0, 62607.0, 62608.0, 62609.0, 62610.0],       [62611.0, 62612.0, 62613.0, 62614.0, 62615.0, 62616.0],       [62617.0, 62618.0, 62619.0, 62620.0, 62621.0, 62622.0]],      [[62623.0, 62624.0, 62625.0, 62626.0, 62627.0, 62628.0],       [62629.0, 62630.0, 62631.0, 62632.0, 62633.0, 62634.0],       [62635.0, 62636.0, 62637.0, 62638.0, 62639.0, 62640.0],       [62641.0, 62642.0, 62643.0, 62644.0, 62645.0, 62646.0],       [62647.0, 62648.0, 62649.0, 62650.0, 62651.0, 62652.0],       [62653.0, 62654.0, 62655.0, 62656.0, 62657.0, 62658.0],       [62659.0, 62660.0, 62661.0, 62662.0, 62663.0, 62664.0]],      [[62665.0, 62666.0, 62667.0, 62668.0, 62669.0, 62670.0],       [62671.0, 62672.0, 62673.0, 62674.0, 62675.0, 62676.0],       [62677.0, 62678.0, 62679.0, 62680.0, 62681.0, 62682.0],       [62683.0, 62684.0, 62685.0, 62686.0, 62687.0, 62688.0],       [62689.0, 62690.0, 62691.0, 62692.0, 62693.0, 62694.0],       [62695.0, 62696.0, 62697.0, 62698.0, 62699.0, 62700.0],       [62701.0, 62702.0, 62703.0, 62704.0, 62705.0, 62706.0]],      [[62707.0, 62708.0, 62709.0, 62710.0, 62711.0, 62712.0],       [62713.0, 62714.0, 62715.0, 62716.0, 62717.0, 62718.0],       [62719.0, 62720.0, 62721.0, 62722.0, 62723.0, 62724.0],       [62725.0, 62726.0, 62727.0, 62728.0, 62729.0, 62730.0],       [62731.0, 62732.0, 62733.0, 62734.0, 62735.0, 62736.0],       [62737.0, 62738.0, 62739.0, 62740.0, 62741.0, 62742.0],       [62743.0, 62744.0, 62745.0, 62746.0, 62747.0, 62748.0]]],     [[[62749.0, 62750.0, 62751.0, 62752.0, 62753.0, 62754.0],       [62755.0, 62756.0, 62757.0, 62758.0, 62759.0, 62760.0],       [62761.0, 62762.0, 62763.0, 62764.0, 62765.0, 62766.0],       [62767.0, 62768.0, 62769.0, 62770.0, 62771.0, 62772.0],       [62773.0, 62774.0, 62775.0, 62776.0, 62777.0, 62778.0],       [62779.0, 62780.0, 62781.0, 62782.0, 62783.0, 62784.0],       [62785.0, 62786.0, 62787.0, 62788.0, 62789.0, 62790.0]],      [[62791.0, 62792.0, 62793.0, 62794.0, 62795.0, 62796.0],       [62797.0, 62798.0, 62799.0, 62800.0, 62801.0, 62802.0],       [62803.0, 62804.0, 62805.0, 62806.0, 62807.0, 62808.0],       [62809.0, 62810.0, 62811.0, 62812.0, 62813.0, 62814.0],       [62815.0, 62816.0, 62817.0, 62818.0, 62819.0, 62820.0],       [62821.0, 62822.0, 62823.0, 62824.0, 62825.0, 62826.0],       [62827.0, 62828.0, 62829.0, 62830.0, 62831.0, 62832.0]],      [[62833.0, 62834.0, 62835.0, 62836.0, 62837.0, 62838.0],       [62839.0, 62840.0, 62841.0, 62842.0, 62843.0, 62844.0],       [62845.0, 62846.0, 62847.0, 62848.0, 62849.0, 62850.0],       [62851.0, 62852.0, 62853.0, 62854.0, 62855.0, 62856.0],       [62857.0, 62858.0, 62859.0, 62860.0, 62861.0, 62862.0],       [62863.0, 62864.0, 62865.0, 62866.0, 62867.0, 62868.0],       [62869.0, 62870.0, 62871.0, 62872.0, 62873.0, 62874.0]],      [[62875.0, 62876.0, 62877.0, 62878.0, 62879.0, 62880.0],       [62881.0, 62882.0, 62883.0, 62884.0, 62885.0, 62886.0],       [62887.0, 62888.0, 62889.0, 62890.0, 62891.0, 62892.0],       [62893.0, 62894.0, 62895.0, 62896.0, 62897.0, 62898.0],       [62899.0, 62900.0, 62901.0, 62902.0, 62903.0, 62904.0],       [62905.0, 62906.0, 62907.0, 62908.0, 62909.0, 62910.0],       [62911.0, 62912.0, 62913.0, 62914.0, 62915.0, 62916.0]],      [[62917.0, 62918.0, 62919.0, 62920.0, 62921.0, 62922.0],       [62923.0, 62924.0, 62925.0, 62926.0, 62927.0, 62928.0],       [62929.0, 62930.0, 62931.0, 62932.0, 62933.0, 62934.0],       [62935.0, 62936.0, 62937.0, 62938.0, 62939.0, 62940.0],       [62941.0, 62942.0, 62943.0, 62944.0, 62945.0, 62946.0],       [62947.0, 62948.0, 62949.0, 62950.0, 62951.0, 62952.0],       [62953.0, 62954.0, 62955.0, 62956.0, 62957.0, 62958.0]],      [[62959.0, 62960.0, 62961.0, 62962.0, 62963.0, 62964.0],       [62965.0, 62966.0, 62967.0, 62968.0, 62969.0, 62970.0],       [62971.0, 62972.0, 62973.0, 62974.0, 62975.0, 62976.0],       [62977.0, 62978.0, 62979.0, 62980.0, 62981.0, 62982.0],       [62983.0, 62984.0, 62985.0, 62986.0, 62987.0, 62988.0],       [62989.0, 62990.0, 62991.0, 62992.0, 62993.0, 62994.0],       [62995.0, 62996.0, 62997.0, 62998.0, 62999.0, 63000.0]]],     [[[63001.0, 63002.0, 63003.0, 63004.0, 63005.0, 63006.0],       [63007.0, 63008.0, 63009.0, 63010.0, 63011.0, 63012.0],       [63013.0, 63014.0, 63015.0, 63016.0, 63017.0, 63018.0],       [63019.0, 63020.0, 63021.0, 63022.0, 63023.0, 63024.0],       [63025.0, 63026.0, 63027.0, 63028.0, 63029.0, 63030.0],       [63031.0, 63032.0, 63033.0, 63034.0, 63035.0, 63036.0],       [63037.0, 63038.0, 63039.0, 63040.0, 63041.0, 63042.0]],      [[63043.0, 63044.0, 63045.0, 63046.0, 63047.0, 63048.0],       [63049.0, 63050.0, 63051.0, 63052.0, 63053.0, 63054.0],       [63055.0, 63056.0, 63057.0, 63058.0, 63059.0, 63060.0],       [63061.0, 63062.0, 63063.0, 63064.0, 63065.0, 63066.0],       [63067.0, 63068.0, 63069.0, 63070.0, 63071.0, 63072.0],       [63073.0, 63074.0, 63075.0, 63076.0, 63077.0, 63078.0],       [63079.0, 63080.0, 63081.0, 63082.0, 63083.0, 63084.0]],      [[63085.0, 63086.0, 63087.0, 63088.0, 63089.0, 63090.0],       [63091.0, 63092.0, 63093.0, 63094.0, 63095.0, 63096.0],       [63097.0, 63098.0, 63099.0, 63100.0, 63101.0, 63102.0],       [63103.0, 63104.0, 63105.0, 63106.0, 63107.0, 63108.0],       [63109.0, 63110.0, 63111.0, 63112.0, 63113.0, 63114.0],       [63115.0, 63116.0, 63117.0, 63118.0, 63119.0, 63120.0],       [63121.0, 63122.0, 63123.0, 63124.0, 63125.0, 63126.0]],      [[63127.0, 63128.0, 63129.0, 63130.0, 63131.0, 63132.0],       [63133.0, 63134.0, 63135.0, 63136.0, 63137.0, 63138.0],       [63139.0, 63140.0, 63141.0, 63142.0, 63143.0, 63144.0],       [63145.0, 63146.0, 63147.0, 63148.0, 63149.0, 63150.0],       [63151.0, 63152.0, 63153.0, 63154.0, 63155.0, 63156.0],       [63157.0, 63158.0, 63159.0, 63160.0, 63161.0, 63162.0],       [63163.0, 63164.0, 63165.0, 63166.0, 63167.0, 63168.0]],      [[63169.0, 63170.0, 63171.0, 63172.0, 63173.0, 63174.0],       [63175.0, 63176.0, 63177.0, 63178.0, 63179.0, 63180.0],       [63181.0, 63182.0, 63183.0, 63184.0, 63185.0, 63186.0],       [63187.0, 63188.0, 63189.0, 63190.0, 63191.0, 63192.0],       [63193.0, 63194.0, 63195.0, 63196.0, 63197.0, 63198.0],       [63199.0, 63200.0, 63201.0, 63202.0, 63203.0, 63204.0],       [63205.0, 63206.0, 63207.0, 63208.0, 63209.0, 63210.0]],      [[63211.0, 63212.0, 63213.0, 63214.0, 63215.0, 63216.0],       [63217.0, 63218.0, 63219.0, 63220.0, 63221.0, 63222.0],       [63223.0, 63224.0, 63225.0, 63226.0, 63227.0, 63228.0],       [63229.0, 63230.0, 63231.0, 63232.0, 63233.0, 63234.0],       [63235.0, 63236.0, 63237.0, 63238.0, 63239.0, 63240.0],       [63241.0, 63242.0, 63243.0, 63244.0, 63245.0, 63246.0],       [63247.0, 63248.0, 63249.0, 63250.0, 63251.0, 63252.0]]],     [[[63253.0, 63254.0, 63255.0, 63256.0, 63257.0, 63258.0],       [63259.0, 63260.0, 63261.0, 63262.0, 63263.0, 63264.0],       [63265.0, 63266.0, 63267.0, 63268.0, 63269.0, 63270.0],       [63271.0, 63272.0, 63273.0, 63274.0, 63275.0, 63276.0],       [63277.0, 63278.0, 63279.0, 63280.0, 63281.0, 63282.0],       [63283.0, 63284.0, 63285.0, 63286.0, 63287.0, 63288.0],       [63289.0, 63290.0, 63291.0, 63292.0, 63293.0, 63294.0]],      [[63295.0, 63296.0, 63297.0, 63298.0, 63299.0, 63300.0],       [63301.0, 63302.0, 63303.0, 63304.0, 63305.0, 63306.0],       [63307.0, 63308.0, 63309.0, 63310.0, 63311.0, 63312.0],       [63313.0, 63314.0, 63315.0, 63316.0, 63317.0, 63318.0],       [63319.0, 63320.0, 63321.0, 63322.0, 63323.0, 63324.0],       [63325.0, 63326.0, 63327.0, 63328.0, 63329.0, 63330.0],       [63331.0, 63332.0, 63333.0, 63334.0, 63335.0, 63336.0]],      [[63337.0, 63338.0, 63339.0, 63340.0, 63341.0, 63342.0],       [63343.0, 63344.0, 63345.0, 63346.0, 63347.0, 63348.0],       [63349.0, 63350.0, 63351.0, 63352.0, 63353.0, 63354.0],       [63355.0, 63356.0, 63357.0, 63358.0, 63359.0, 63360.0],       [63361.0, 63362.0, 63363.0, 63364.0, 63365.0, 63366.0],       [63367.0, 63368.0, 63369.0, 63370.0, 63371.0, 63372.0],       [63373.0, 63374.0, 63375.0, 63376.0, 63377.0, 63378.0]],      [[63379.0, 63380.0, 63381.0, 63382.0, 63383.0, 63384.0],       [63385.0, 63386.0, 63387.0, 63388.0, 63389.0, 63390.0],       [63391.0, 63392.0, 63393.0, 63394.0, 63395.0, 63396.0],       [63397.0, 63398.0, 63399.0, 63400.0, 63401.0, 63402.0],       [63403.0, 63404.0, 63405.0, 63406.0, 63407.0, 63408.0],       [63409.0, 63410.0, 63411.0, 63412.0, 63413.0, 63414.0],       [63415.0, 63416.0, 63417.0, 63418.0, 63419.0, 63420.0]],      [[63421.0, 63422.0, 63423.0, 63424.0, 63425.0, 63426.0],       [63427.0, 63428.0, 63429.0, 63430.0, 63431.0, 63432.0],       [63433.0, 63434.0, 63435.0, 63436.0, 63437.0, 63438.0],       [63439.0, 63440.0, 63441.0, 63442.0, 63443.0, 63444.0],       [63445.0, 63446.0, 63447.0, 63448.0, 63449.0, 63450.0],       [63451.0, 63452.0, 63453.0, 63454.0, 63455.0, 63456.0],       [63457.0, 63458.0, 63459.0, 63460.0, 63461.0, 63462.0]],      [[63463.0, 63464.0, 63465.0, 63466.0, 63467.0, 63468.0],       [63469.0, 63470.0, 63471.0, 63472.0, 63473.0, 63474.0],       [63475.0, 63476.0, 63477.0, 63478.0, 63479.0, 63480.0],       [63481.0, 63482.0, 63483.0, 63484.0, 63485.0, 63486.0],       [63487.0, 63488.0, 63489.0, 63490.0, 63491.0, 63492.0],       [63493.0, 63494.0, 63495.0, 63496.0, 63497.0, 63498.0],       [63499.0, 63500.0, 63501.0, 63502.0, 63503.0, 63504.0]]]],    [[[[63505.0, 63506.0, 63507.0, 63508.0, 63509.0, 63510.0],       [63511.0, 63512.0, 63513.0, 63514.0, 63515.0, 63516.0],       [63517.0, 63518.0, 63519.0, 63520.0, 63521.0, 63522.0],       [63523.0, 63524.0, 63525.0, 63526.0, 63527.0, 63528.0],       [63529.0, 63530.0, 63531.0, 63532.0, 63533.0, 63534.0],       [63535.0, 63536.0, 63537.0, 63538.0, 63539.0, 63540.0],       [63541.0, 63542.0, 63543.0, 63544.0, 63545.0, 63546.0]],      [[63547.0, 63548.0, 63549.0, 63550.0, 63551.0, 63552.0],       [63553.0, 63554.0, 63555.0, 63556.0, 63557.0, 63558.0],       [63559.0, 63560.0, 63561.0, 63562.0, 63563.0, 63564.0],       [63565.0, 63566.0, 63567.0, 63568.0, 63569.0, 63570.0],       [63571.0, 63572.0, 63573.0, 63574.0, 63575.0, 63576.0],       [63577.0, 63578.0, 63579.0, 63580.0, 63581.0, 63582.0],       [63583.0, 63584.0, 63585.0, 63586.0, 63587.0, 63588.0]],      [[63589.0, 63590.0, 63591.0, 63592.0, 63593.0, 63594.0],       [63595.0, 63596.0, 63597.0, 63598.0, 63599.0, 63600.0],       [63601.0, 63602.0, 63603.0, 63604.0, 63605.0, 63606.0],       [63607.0, 63608.0, 63609.0, 63610.0, 63611.0, 63612.0],       [63613.0, 63614.0, 63615.0, 63616.0, 63617.0, 63618.0],       [63619.0, 63620.0, 63621.0, 63622.0, 63623.0, 63624.0],       [63625.0, 63626.0, 63627.0, 63628.0, 63629.0, 63630.0]],      [[63631.0, 63632.0, 63633.0, 63634.0, 63635.0, 63636.0],       [63637.0, 63638.0, 63639.0, 63640.0, 63641.0, 63642.0],       [63643.0, 63644.0, 63645.0, 63646.0, 63647.0, 63648.0],       [63649.0, 63650.0, 63651.0, 63652.0, 63653.0, 63654.0],       [63655.0, 63656.0, 63657.0, 63658.0, 63659.0, 63660.0],       [63661.0, 63662.0, 63663.0, 63664.0, 63665.0, 63666.0],       [63667.0, 63668.0, 63669.0, 63670.0, 63671.0, 63672.0]],      [[63673.0, 63674.0, 63675.0, 63676.0, 63677.0, 63678.0],       [63679.0, 63680.0, 63681.0, 63682.0, 63683.0, 63684.0],       [63685.0, 63686.0, 63687.0, 63688.0, 63689.0, 63690.0],       [63691.0, 63692.0, 63693.0, 63694.0, 63695.0, 63696.0],       [63697.0, 63698.0, 63699.0, 63700.0, 63701.0, 63702.0],       [63703.0, 63704.0, 63705.0, 63706.0, 63707.0, 63708.0],       [63709.0, 63710.0, 63711.0, 63712.0, 63713.0, 63714.0]],      [[63715.0, 63716.0, 63717.0, 63718.0, 63719.0, 63720.0],       [63721.0, 63722.0, 63723.0, 63724.0, 63725.0, 63726.0],       [63727.0, 63728.0, 63729.0, 63730.0, 63731.0, 63732.0],       [63733.0, 63734.0, 63735.0, 63736.0, 63737.0, 63738.0],       [63739.0, 63740.0, 63741.0, 63742.0, 63743.0, 63744.0],       [63745.0, 63746.0, 63747.0, 63748.0, 63749.0, 63750.0],       [63751.0, 63752.0, 63753.0, 63754.0, 63755.0, 63756.0]]],     [[[63757.0, 63758.0, 63759.0, 63760.0, 63761.0, 63762.0],       [63763.0, 63764.0, 63765.0, 63766.0, 63767.0, 63768.0],       [63769.0, 63770.0, 63771.0, 63772.0, 63773.0, 63774.0],       [63775.0, 63776.0, 63777.0, 63778.0, 63779.0, 63780.0],       [63781.0, 63782.0, 63783.0, 63784.0, 63785.0, 63786.0],       [63787.0, 63788.0, 63789.0, 63790.0, 63791.0, 63792.0],       [63793.0, 63794.0, 63795.0, 63796.0, 63797.0, 63798.0]],      [[63799.0, 63800.0, 63801.0, 63802.0, 63803.0, 63804.0],       [63805.0, 63806.0, 63807.0, 63808.0, 63809.0, 63810.0],       [63811.0, 63812.0, 63813.0, 63814.0, 63815.0, 63816.0],       [63817.0, 63818.0, 63819.0, 63820.0, 63821.0, 63822.0],       [63823.0, 63824.0, 63825.0, 63826.0, 63827.0, 63828.0],       [63829.0, 63830.0, 63831.0, 63832.0, 63833.0, 63834.0],       [63835.0, 63836.0, 63837.0, 63838.0, 63839.0, 63840.0]],      [[63841.0, 63842.0, 63843.0, 63844.0, 63845.0, 63846.0],       [63847.0, 63848.0, 63849.0, 63850.0, 63851.0, 63852.0],       [63853.0, 63854.0, 63855.0, 63856.0, 63857.0, 63858.0],       [63859.0, 63860.0, 63861.0, 63862.0, 63863.0, 63864.0],       [63865.0, 63866.0, 63867.0, 63868.0, 63869.0, 63870.0],       [63871.0, 63872.0, 63873.0, 63874.0, 63875.0, 63876.0],       [63877.0, 63878.0, 63879.0, 63880.0, 63881.0, 63882.0]],      [[63883.0, 63884.0, 63885.0, 63886.0, 63887.0, 63888.0],       [63889.0, 63890.0, 63891.0, 63892.0, 63893.0, 63894.0],       [63895.0, 63896.0, 63897.0, 63898.0, 63899.0, 63900.0],       [63901.0, 63902.0, 63903.0, 63904.0, 63905.0, 63906.0],       [63907.0, 63908.0, 63909.0, 63910.0, 63911.0, 63912.0],       [63913.0, 63914.0, 63915.0, 63916.0, 63917.0, 63918.0],       [63919.0, 63920.0, 63921.0, 63922.0, 63923.0, 63924.0]],      [[63925.0, 63926.0, 63927.0, 63928.0, 63929.0, 63930.0],       [63931.0, 63932.0, 63933.0, 63934.0, 63935.0, 63936.0],       [63937.0, 63938.0, 63939.0, 63940.0, 63941.0, 63942.0],       [63943.0, 63944.0, 63945.0, 63946.0, 63947.0, 63948.0],       [63949.0, 63950.0, 63951.0, 63952.0, 63953.0, 63954.0],       [63955.0, 63956.0, 63957.0, 63958.0, 63959.0, 63960.0],       [63961.0, 63962.0, 63963.0, 63964.0, 63965.0, 63966.0]],      [[63967.0, 63968.0, 63969.0, 63970.0, 63971.0, 63972.0],       [63973.0, 63974.0, 63975.0, 63976.0, 63977.0, 63978.0],       [63979.0, 63980.0, 63981.0, 63982.0, 63983.0, 63984.0],       [63985.0, 63986.0, 63987.0, 63988.0, 63989.0, 63990.0],       [63991.0, 63992.0, 63993.0, 63994.0, 63995.0, 63996.0],       [63997.0, 63998.0, 63999.0, 64000.0, 64001.0, 64002.0],       [64003.0, 64004.0, 64005.0, 64006.0, 64007.0, 64008.0]]],     [[[64009.0, 64010.0, 64011.0, 64012.0, 64013.0, 64014.0],       [64015.0, 64016.0, 64017.0, 64018.0, 64019.0, 64020.0],       [64021.0, 64022.0, 64023.0, 64024.0, 64025.0, 64026.0],       [64027.0, 64028.0, 64029.0, 64030.0, 64031.0, 64032.0],       [64033.0, 64034.0, 64035.0, 64036.0, 64037.0, 64038.0],       [64039.0, 64040.0, 64041.0, 64042.0, 64043.0, 64044.0],       [64045.0, 64046.0, 64047.0, 64048.0, 64049.0, 64050.0]],      [[64051.0, 64052.0, 64053.0, 64054.0, 64055.0, 64056.0],       [64057.0, 64058.0, 64059.0, 64060.0, 64061.0, 64062.0],       [64063.0, 64064.0, 64065.0, 64066.0, 64067.0, 64068.0],       [64069.0, 64070.0, 64071.0, 64072.0, 64073.0, 64074.0],       [64075.0, 64076.0, 64077.0, 64078.0, 64079.0, 64080.0],       [64081.0, 64082.0, 64083.0, 64084.0, 64085.0, 64086.0],       [64087.0, 64088.0, 64089.0, 64090.0, 64091.0, 64092.0]],      [[64093.0, 64094.0, 64095.0, 64096.0, 64097.0, 64098.0],       [64099.0, 64100.0, 64101.0, 64102.0, 64103.0, 64104.0],       [64105.0, 64106.0, 64107.0, 64108.0, 64109.0, 64110.0],       [64111.0, 64112.0, 64113.0, 64114.0, 64115.0, 64116.0],       [64117.0, 64118.0, 64119.0, 64120.0, 64121.0, 64122.0],       [64123.0, 64124.0, 64125.0, 64126.0, 64127.0, 64128.0],       [64129.0, 64130.0, 64131.0, 64132.0, 64133.0, 64134.0]],      [[64135.0, 64136.0, 64137.0, 64138.0, 64139.0, 64140.0],       [64141.0, 64142.0, 64143.0, 64144.0, 64145.0, 64146.0],       [64147.0, 64148.0, 64149.0, 64150.0, 64151.0, 64152.0],       [64153.0, 64154.0, 64155.0, 64156.0, 64157.0, 64158.0],       [64159.0, 64160.0, 64161.0, 64162.0, 64163.0, 64164.0],       [64165.0, 64166.0, 64167.0, 64168.0, 64169.0, 64170.0],       [64171.0, 64172.0, 64173.0, 64174.0, 64175.0, 64176.0]],      [[64177.0, 64178.0, 64179.0, 64180.0, 64181.0, 64182.0],       [64183.0, 64184.0, 64185.0, 64186.0, 64187.0, 64188.0],       [64189.0, 64190.0, 64191.0, 64192.0, 64193.0, 64194.0],       [64195.0, 64196.0, 64197.0, 64198.0, 64199.0, 64200.0],       [64201.0, 64202.0, 64203.0, 64204.0, 64205.0, 64206.0],       [64207.0, 64208.0, 64209.0, 64210.0, 64211.0, 64212.0],       [64213.0, 64214.0, 64215.0, 64216.0, 64217.0, 64218.0]],      [[64219.0, 64220.0, 64221.0, 64222.0, 64223.0, 64224.0],       [64225.0, 64226.0, 64227.0, 64228.0, 64229.0, 64230.0],       [64231.0, 64232.0, 64233.0, 64234.0, 64235.0, 64236.0],       [64237.0, 64238.0, 64239.0, 64240.0, 64241.0, 64242.0],       [64243.0, 64244.0, 64245.0, 64246.0, 64247.0, 64248.0],       [64249.0, 64250.0, 64251.0, 64252.0, 64253.0, 64254.0],       [64255.0, 64256.0, 64257.0, 64258.0, 64259.0, 64260.0]]],     [[[64261.0, 64262.0, 64263.0, 64264.0, 64265.0, 64266.0],       [64267.0, 64268.0, 64269.0, 64270.0, 64271.0, 64272.0],       [64273.0, 64274.0, 64275.0, 64276.0, 64277.0, 64278.0],       [64279.0, 64280.0, 64281.0, 64282.0, 64283.0, 64284.0],       [64285.0, 64286.0, 64287.0, 64288.0, 64289.0, 64290.0],       [64291.0, 64292.0, 64293.0, 64294.0, 64295.0, 64296.0],       [64297.0, 64298.0, 64299.0, 64300.0, 64301.0, 64302.0]],      [[64303.0, 64304.0, 64305.0, 64306.0, 64307.0, 64308.0],       [64309.0, 64310.0, 64311.0, 64312.0, 64313.0, 64314.0],       [64315.0, 64316.0, 64317.0, 64318.0, 64319.0, 64320.0],       [64321.0, 64322.0, 64323.0, 64324.0, 64325.0, 64326.0],       [64327.0, 64328.0, 64329.0, 64330.0, 64331.0, 64332.0],       [64333.0, 64334.0, 64335.0, 64336.0, 64337.0, 64338.0],       [64339.0, 64340.0, 64341.0, 64342.0, 64343.0, 64344.0]],      [[64345.0, 64346.0, 64347.0, 64348.0, 64349.0, 64350.0],       [64351.0, 64352.0, 64353.0, 64354.0, 64355.0, 64356.0],       [64357.0, 64358.0, 64359.0, 64360.0, 64361.0, 64362.0],       [64363.0, 64364.0, 64365.0, 64366.0, 64367.0, 64368.0],       [64369.0, 64370.0, 64371.0, 64372.0, 64373.0, 64374.0],       [64375.0, 64376.0, 64377.0, 64378.0, 64379.0, 64380.0],       [64381.0, 64382.0, 64383.0, 64384.0, 64385.0, 64386.0]],      [[64387.0, 64388.0, 64389.0, 64390.0, 64391.0, 64392.0],       [64393.0, 64394.0, 64395.0, 64396.0, 64397.0, 64398.0],       [64399.0, 64400.0, 64401.0, 64402.0, 64403.0, 64404.0],       [64405.0, 64406.0, 64407.0, 64408.0, 64409.0, 64410.0],       [64411.0, 64412.0, 64413.0, 64414.0, 64415.0, 64416.0],       [64417.0, 64418.0, 64419.0, 64420.0, 64421.0, 64422.0],       [64423.0, 64424.0, 64425.0, 64426.0, 64427.0, 64428.0]],      [[64429.0, 64430.0, 64431.0, 64432.0, 64433.0, 64434.0],       [64435.0, 64436.0, 64437.0, 64438.0, 64439.0, 64440.0],       [64441.0, 64442.0, 64443.0, 64444.0, 64445.0, 64446.0],       [64447.0, 64448.0, 64449.0, 64450.0, 64451.0, 64452.0],       [64453.0, 64454.0, 64455.0, 64456.0, 64457.0, 64458.0],       [64459.0, 64460.0, 64461.0, 64462.0, 64463.0, 64464.0],       [64465.0, 64466.0, 64467.0, 64468.0, 64469.0, 64470.0]],      [[64471.0, 64472.0, 64473.0, 64474.0, 64475.0, 64476.0],       [64477.0, 64478.0, 64479.0, 64480.0, 64481.0, 64482.0],       [64483.0, 64484.0, 64485.0, 64486.0, 64487.0, 64488.0],       [64489.0, 64490.0, 64491.0, 64492.0, 64493.0, 64494.0],       [64495.0, 64496.0, 64497.0, 64498.0, 64499.0, 64500.0],       [64501.0, 64502.0, 64503.0, 64504.0, 64505.0, 64506.0],       [64507.0, 64508.0, 64509.0, 64510.0, 64511.0, 64512.0]]]],    [[[[64513.0, 64514.0, 64515.0, 64516.0, 64517.0, 64518.0],       [64519.0, 64520.0, 64521.0, 64522.0, 64523.0, 64524.0],       [64525.0, 64526.0, 64527.0, 64528.0, 64529.0, 64530.0],       [64531.0, 64532.0, 64533.0, 64534.0, 64535.0, 64536.0],       [64537.0, 64538.0, 64539.0, 64540.0, 64541.0, 64542.0],       [64543.0, 64544.0, 64545.0, 64546.0, 64547.0, 64548.0],       [64549.0, 64550.0, 64551.0, 64552.0, 64553.0, 64554.0]],      [[64555.0, 64556.0, 64557.0, 64558.0, 64559.0, 64560.0],       [64561.0, 64562.0, 64563.0, 64564.0, 64565.0, 64566.0],       [64567.0, 64568.0, 64569.0, 64570.0, 64571.0, 64572.0],       [64573.0, 64574.0, 64575.0, 64576.0, 64577.0, 64578.0],       [64579.0, 64580.0, 64581.0, 64582.0, 64583.0, 64584.0],       [64585.0, 64586.0, 64587.0, 64588.0, 64589.0, 64590.0],       [64591.0, 64592.0, 64593.0, 64594.0, 64595.0, 64596.0]],      [[64597.0, 64598.0, 64599.0, 64600.0, 64601.0, 64602.0],       [64603.0, 64604.0, 64605.0, 64606.0, 64607.0, 64608.0],       [64609.0, 64610.0, 64611.0, 64612.0, 64613.0, 64614.0],       [64615.0, 64616.0, 64617.0, 64618.0, 64619.0, 64620.0],       [64621.0, 64622.0, 64623.0, 64624.0, 64625.0, 64626.0],       [64627.0, 64628.0, 64629.0, 64630.0, 64631.0, 64632.0],       [64633.0, 64634.0, 64635.0, 64636.0, 64637.0, 64638.0]],      [[64639.0, 64640.0, 64641.0, 64642.0, 64643.0, 64644.0],       [64645.0, 64646.0, 64647.0, 64648.0, 64649.0, 64650.0],       [64651.0, 64652.0, 64653.0, 64654.0, 64655.0, 64656.0],       [64657.0, 64658.0, 64659.0, 64660.0, 64661.0, 64662.0],       [64663.0, 64664.0, 64665.0, 64666.0, 64667.0, 64668.0],       [64669.0, 64670.0, 64671.0, 64672.0, 64673.0, 64674.0],       [64675.0, 64676.0, 64677.0, 64678.0, 64679.0, 64680.0]],      [[64681.0, 64682.0, 64683.0, 64684.0, 64685.0, 64686.0],       [64687.0, 64688.0, 64689.0, 64690.0, 64691.0, 64692.0],       [64693.0, 64694.0, 64695.0, 64696.0, 64697.0, 64698.0],       [64699.0, 64700.0, 64701.0, 64702.0, 64703.0, 64704.0],       [64705.0, 64706.0, 64707.0, 64708.0, 64709.0, 64710.0],       [64711.0, 64712.0, 64713.0, 64714.0, 64715.0, 64716.0],       [64717.0, 64718.0, 64719.0, 64720.0, 64721.0, 64722.0]],      [[64723.0, 64724.0, 64725.0, 64726.0, 64727.0, 64728.0],       [64729.0, 64730.0, 64731.0, 64732.0, 64733.0, 64734.0],       [64735.0, 64736.0, 64737.0, 64738.0, 64739.0, 64740.0],       [64741.0, 64742.0, 64743.0, 64744.0, 64745.0, 64746.0],       [64747.0, 64748.0, 64749.0, 64750.0, 64751.0, 64752.0],       [64753.0, 64754.0, 64755.0, 64756.0, 64757.0, 64758.0],       [64759.0, 64760.0, 64761.0, 64762.0, 64763.0, 64764.0]]],     [[[64765.0, 64766.0, 64767.0, 64768.0, 64769.0, 64770.0],       [64771.0, 64772.0, 64773.0, 64774.0, 64775.0, 64776.0],       [64777.0, 64778.0, 64779.0, 64780.0, 64781.0, 64782.0],       [64783.0, 64784.0, 64785.0, 64786.0, 64787.0, 64788.0],       [64789.0, 64790.0, 64791.0, 64792.0, 64793.0, 64794.0],       [64795.0, 64796.0, 64797.0, 64798.0, 64799.0, 64800.0],       [64801.0, 64802.0, 64803.0, 64804.0, 64805.0, 64806.0]],      [[64807.0, 64808.0, 64809.0, 64810.0, 64811.0, 64812.0],       [64813.0, 64814.0, 64815.0, 64816.0, 64817.0, 64818.0],       [64819.0, 64820.0, 64821.0, 64822.0, 64823.0, 64824.0],       [64825.0, 64826.0, 64827.0, 64828.0, 64829.0, 64830.0],       [64831.0, 64832.0, 64833.0, 64834.0, 64835.0, 64836.0],       [64837.0, 64838.0, 64839.0, 64840.0, 64841.0, 64842.0],       [64843.0, 64844.0, 64845.0, 64846.0, 64847.0, 64848.0]],      [[64849.0, 64850.0, 64851.0, 64852.0, 64853.0, 64854.0],       [64855.0, 64856.0, 64857.0, 64858.0, 64859.0, 64860.0],       [64861.0, 64862.0, 64863.0, 64864.0, 64865.0, 64866.0],       [64867.0, 64868.0, 64869.0, 64870.0, 64871.0, 64872.0],       [64873.0, 64874.0, 64875.0, 64876.0, 64877.0, 64878.0],       [64879.0, 64880.0, 64881.0, 64882.0, 64883.0, 64884.0],       [64885.0, 64886.0, 64887.0, 64888.0, 64889.0, 64890.0]],      [[64891.0, 64892.0, 64893.0, 64894.0, 64895.0, 64896.0],       [64897.0, 64898.0, 64899.0, 64900.0, 64901.0, 64902.0],       [64903.0, 64904.0, 64905.0, 64906.0, 64907.0, 64908.0],       [64909.0, 64910.0, 64911.0, 64912.0, 64913.0, 64914.0],       [64915.0, 64916.0, 64917.0, 64918.0, 64919.0, 64920.0],       [64921.0, 64922.0, 64923.0, 64924.0, 64925.0, 64926.0],       [64927.0, 64928.0, 64929.0, 64930.0, 64931.0, 64932.0]],      [[64933.0, 64934.0, 64935.0, 64936.0, 64937.0, 64938.0],       [64939.0, 64940.0, 64941.0, 64942.0, 64943.0, 64944.0],       [64945.0, 64946.0, 64947.0, 64948.0, 64949.0, 64950.0],       [64951.0, 64952.0, 64953.0, 64954.0, 64955.0, 64956.0],       [64957.0, 64958.0, 64959.0, 64960.0, 64961.0, 64962.0],       [64963.0, 64964.0, 64965.0, 64966.0, 64967.0, 64968.0],       [64969.0, 64970.0, 64971.0, 64972.0, 64973.0, 64974.0]],      [[64975.0, 64976.0, 64977.0, 64978.0, 64979.0, 64980.0],       [64981.0, 64982.0, 64983.0, 64984.0, 64985.0, 64986.0],       [64987.0, 64988.0, 64989.0, 64990.0, 64991.0, 64992.0],       [64993.0, 64994.0, 64995.0, 64996.0, 64997.0, 64998.0],       [64999.0, 65000.0, 65001.0, 65002.0, 65003.0, 65004.0],       [65005.0, 65006.0, 65007.0, 65008.0, 65009.0, 65010.0],       [65011.0, 65012.0, 65013.0, 65014.0, 65015.0, 65016.0]]],     [[[65017.0, 65018.0, 65019.0, 65020.0, 65021.0, 65022.0],       [65023.0, 65024.0, 65025.0, 65026.0, 65027.0, 65028.0],       [65029.0, 65030.0, 65031.0, 65032.0, 65033.0, 65034.0],       [65035.0, 65036.0, 65037.0, 65038.0, 65039.0, 65040.0],       [65041.0, 65042.0, 65043.0, 65044.0, 65045.0, 65046.0],       [65047.0, 65048.0, 65049.0, 65050.0, 65051.0, 65052.0],       [65053.0, 65054.0, 65055.0, 65056.0, 65057.0, 65058.0]],      [[65059.0, 65060.0, 65061.0, 65062.0, 65063.0, 65064.0],       [65065.0, 65066.0, 65067.0, 65068.0, 65069.0, 65070.0],       [65071.0, 65072.0, 65073.0, 65074.0, 65075.0, 65076.0],       [65077.0, 65078.0, 65079.0, 65080.0, 65081.0, 65082.0],       [65083.0, 65084.0, 65085.0, 65086.0, 65087.0, 65088.0],       [65089.0, 65090.0, 65091.0, 65092.0, 65093.0, 65094.0],       [65095.0, 65096.0, 65097.0, 65098.0, 65099.0, 65100.0]],      [[65101.0, 65102.0, 65103.0, 65104.0, 65105.0, 65106.0],       [65107.0, 65108.0, 65109.0, 65110.0, 65111.0, 65112.0],       [65113.0, 65114.0, 65115.0, 65116.0, 65117.0, 65118.0],       [65119.0, 65120.0, 65121.0, 65122.0, 65123.0, 65124.0],       [65125.0, 65126.0, 65127.0, 65128.0, 65129.0, 65130.0],       [65131.0, 65132.0, 65133.0, 65134.0, 65135.0, 65136.0],       [65137.0, 65138.0, 65139.0, 65140.0, 65141.0, 65142.0]],      [[65143.0, 65144.0, 65145.0, 65146.0, 65147.0, 65148.0],       [65149.0, 65150.0, 65151.0, 65152.0, 65153.0, 65154.0],       [65155.0, 65156.0, 65157.0, 65158.0, 65159.0, 65160.0],       [65161.0, 65162.0, 65163.0, 65164.0, 65165.0, 65166.0],       [65167.0, 65168.0, 65169.0, 65170.0, 65171.0, 65172.0],       [65173.0, 65174.0, 65175.0, 65176.0, 65177.0, 65178.0],       [65179.0, 65180.0, 65181.0, 65182.0, 65183.0, 65184.0]],      [[65185.0, 65186.0, 65187.0, 65188.0, 65189.0, 65190.0],       [65191.0, 65192.0, 65193.0, 65194.0, 65195.0, 65196.0],       [65197.0, 65198.0, 65199.0, 65200.0, 65201.0, 65202.0],       [65203.0, 65204.0, 65205.0, 65206.0, 65207.0, 65208.0],       [65209.0, 65210.0, 65211.0, 65212.0, 65213.0, 65214.0],       [65215.0, 65216.0, 65217.0, 65218.0, 65219.0, 65220.0],       [65221.0, 65222.0, 65223.0, 65224.0, 65225.0, 65226.0]],      [[65227.0, 65228.0, 65229.0, 65230.0, 65231.0, 65232.0],       [65233.0, 65234.0, 65235.0, 65236.0, 65237.0, 65238.0],       [65239.0, 65240.0, 65241.0, 65242.0, 65243.0, 65244.0],       [65245.0, 65246.0, 65247.0, 65248.0, 65249.0, 65250.0],       [65251.0, 65252.0, 65253.0, 65254.0, 65255.0, 65256.0],       [65257.0, 65258.0, 65259.0, 65260.0, 65261.0, 65262.0],       [65263.0, 65264.0, 65265.0, 65266.0, 65267.0, 65268.0]]],     [[[65269.0, 65270.0, 65271.0, 65272.0, 65273.0, 65274.0],       [65275.0, 65276.0, 65277.0, 65278.0, 65279.0, 65280.0],       [65281.0, 65282.0, 65283.0, 65284.0, 65285.0, 65286.0],       [65287.0, 65288.0, 65289.0, 65290.0, 65291.0, 65292.0],       [65293.0, 65294.0, 65295.0, 65296.0, 65297.0, 65298.0],       [65299.0, 65300.0, 65301.0, 65302.0, 65303.0, 65304.0],       [65305.0, 65306.0, 65307.0, 65308.0, 65309.0, 65310.0]],      [[65311.0, 65312.0, 65313.0, 65314.0, 65315.0, 65316.0],       [65317.0, 65318.0, 65319.0, 65320.0, 65321.0, 65322.0],       [65323.0, 65324.0, 65325.0, 65326.0, 65327.0, 65328.0],       [65329.0, 65330.0, 65331.0, 65332.0, 65333.0, 65334.0],       [65335.0, 65336.0, 65337.0, 65338.0, 65339.0, 65340.0],       [65341.0, 65342.0, 65343.0, 65344.0, 65345.0, 65346.0],       [65347.0, 65348.0, 65349.0, 65350.0, 65351.0, 65352.0]],      [[65353.0, 65354.0, 65355.0, 65356.0, 65357.0, 65358.0],       [65359.0, 65360.0, 65361.0, 65362.0, 65363.0, 65364.0],       [65365.0, 65366.0, 65367.0, 65368.0, 65369.0, 65370.0],       [65371.0, 65372.0, 65373.0, 65374.0, 65375.0, 65376.0],       [65377.0, 65378.0, 65379.0, 65380.0, 65381.0, 65382.0],       [65383.0, 65384.0, 65385.0, 65386.0, 65387.0, 65388.0],       [65389.0, 65390.0, 65391.0, 65392.0, 65393.0, 65394.0]],      [[65395.0, 65396.0, 65397.0, 65398.0, 65399.0, 65400.0],       [65401.0, 65402.0, 65403.0, 65404.0, 65405.0, 65406.0],       [65407.0, 65408.0, 65409.0, 65410.0, 65411.0, 65412.0],       [65413.0, 65414.0, 65415.0, 65416.0, 65417.0, 65418.0],       [65419.0, 65420.0, 65421.0, 65422.0, 65423.0, 65424.0],       [65425.0, 65426.0, 65427.0, 65428.0, 65429.0, 65430.0],       [65431.0, 65432.0, 65433.0, 65434.0, 65435.0, 65436.0]],      [[65437.0, 65438.0, 65439.0, 65440.0, 65441.0, 65442.0],       [65443.0, 65444.0, 65445.0, 65446.0, 65447.0, 65448.0],       [65449.0, 65450.0, 65451.0, 65452.0, 65453.0, 65454.0],       [65455.0, 65456.0, 65457.0, 65458.0, 65459.0, 65460.0],       [65461.0, 65462.0, 65463.0, 65464.0, 65465.0, 65466.0],       [65467.0, 65468.0, 65469.0, 65470.0, 65471.0, 65472.0],       [65473.0, 65474.0, 65475.0, 65476.0, 65477.0, 65478.0]],      [[65479.0, 65480.0, 65481.0, 65482.0, 65483.0, 65484.0],       [65485.0, 65486.0, 65487.0, 65488.0, 65489.0, 65490.0],       [65491.0, 65492.0, 65493.0, 65494.0, 65495.0, 65496.0],       [65497.0, 65498.0, 65499.0, 65500.0, 65501.0, 65502.0],       [65503.0, 65504.0, 65505.0, 65506.0, 65507.0, 65508.0],       [65509.0, 65510.0, 65511.0, 65512.0, 65513.0, 65514.0],       [65515.0, 65516.0, 65517.0, 65518.0, 65519.0, 65520.0]]]],    [[[[65521.0, 65522.0, 65523.0, 65524.0, 65525.0, 65526.0],       [65527.0, 65528.0, 65529.0, 65530.0, 65531.0, 65532.0],       [65533.0, 65534.0, 65535.0, 65536.0, 65537.0, 65538.0],       [65539.0, 65540.0, 65541.0, 65542.0, 65543.0, 65544.0],       [65545.0, 65546.0, 65547.0, 65548.0, 65549.0, 65550.0],       [65551.0, 65552.0, 65553.0, 65554.0, 65555.0, 65556.0],       [65557.0, 65558.0, 65559.0, 65560.0, 65561.0, 65562.0]],      [[65563.0, 65564.0, 65565.0, 65566.0, 65567.0, 65568.0],       [65569.0, 65570.0, 65571.0, 65572.0, 65573.0, 65574.0],       [65575.0, 65576.0, 65577.0, 65578.0, 65579.0, 65580.0],       [65581.0, 65582.0, 65583.0, 65584.0, 65585.0, 65586.0],       [65587.0, 65588.0, 65589.0, 65590.0, 65591.0, 65592.0],       [65593.0, 65594.0, 65595.0, 65596.0, 65597.0, 65598.0],       [65599.0, 65600.0, 65601.0, 65602.0, 65603.0, 65604.0]],      [[65605.0, 65606.0, 65607.0, 65608.0, 65609.0, 65610.0],       [65611.0, 65612.0, 65613.0, 65614.0, 65615.0, 65616.0],       [65617.0, 65618.0, 65619.0, 65620.0, 65621.0, 65622.0],       [65623.0, 65624.0, 65625.0, 65626.0, 65627.0, 65628.0],       [65629.0, 65630.0, 65631.0, 65632.0, 65633.0, 65634.0],       [65635.0, 65636.0, 65637.0, 65638.0, 65639.0, 65640.0],       [65641.0, 65642.0, 65643.0, 65644.0, 65645.0, 65646.0]],      [[65647.0, 65648.0, 65649.0, 65650.0, 65651.0, 65652.0],       [65653.0, 65654.0, 65655.0, 65656.0, 65657.0, 65658.0],       [65659.0, 65660.0, 65661.0, 65662.0, 65663.0, 65664.0],       [65665.0, 65666.0, 65667.0, 65668.0, 65669.0, 65670.0],       [65671.0, 65672.0, 65673.0, 65674.0, 65675.0, 65676.0],       [65677.0, 65678.0, 65679.0, 65680.0, 65681.0, 65682.0],       [65683.0, 65684.0, 65685.0, 65686.0, 65687.0, 65688.0]],      [[65689.0, 65690.0, 65691.0, 65692.0, 65693.0, 65694.0],       [65695.0, 65696.0, 65697.0, 65698.0, 65699.0, 65700.0],       [65701.0, 65702.0, 65703.0, 65704.0, 65705.0, 65706.0],       [65707.0, 65708.0, 65709.0, 65710.0, 65711.0, 65712.0],       [65713.0, 65714.0, 65715.0, 65716.0, 65717.0, 65718.0],       [65719.0, 65720.0, 65721.0, 65722.0, 65723.0, 65724.0],       [65725.0, 65726.0, 65727.0, 65728.0, 65729.0, 65730.0]],      [[65731.0, 65732.0, 65733.0, 65734.0, 65735.0, 65736.0],       [65737.0, 65738.0, 65739.0, 65740.0, 65741.0, 65742.0],       [65743.0, 65744.0, 65745.0, 65746.0, 65747.0, 65748.0],       [65749.0, 65750.0, 65751.0, 65752.0, 65753.0, 65754.0],       [65755.0, 65756.0, 65757.0, 65758.0, 65759.0, 65760.0],       [65761.0, 65762.0, 65763.0, 65764.0, 65765.0, 65766.0],       [65767.0, 65768.0, 65769.0, 65770.0, 65771.0, 65772.0]]],     [[[65773.0, 65774.0, 65775.0, 65776.0, 65777.0, 65778.0],       [65779.0, 65780.0, 65781.0, 65782.0, 65783.0, 65784.0],       [65785.0, 65786.0, 65787.0, 65788.0, 65789.0, 65790.0],       [65791.0, 65792.0, 65793.0, 65794.0, 65795.0, 65796.0],       [65797.0, 65798.0, 65799.0, 65800.0, 65801.0, 65802.0],       [65803.0, 65804.0, 65805.0, 65806.0, 65807.0, 65808.0],       [65809.0, 65810.0, 65811.0, 65812.0, 65813.0, 65814.0]],      [[65815.0, 65816.0, 65817.0, 65818.0, 65819.0, 65820.0],       [65821.0, 65822.0, 65823.0, 65824.0, 65825.0, 65826.0],       [65827.0, 65828.0, 65829.0, 65830.0, 65831.0, 65832.0],       [65833.0, 65834.0, 65835.0, 65836.0, 65837.0, 65838.0],       [65839.0, 65840.0, 65841.0, 65842.0, 65843.0, 65844.0],       [65845.0, 65846.0, 65847.0, 65848.0, 65849.0, 65850.0],       [65851.0, 65852.0, 65853.0, 65854.0, 65855.0, 65856.0]],      [[65857.0, 65858.0, 65859.0, 65860.0, 65861.0, 65862.0],       [65863.0, 65864.0, 65865.0, 65866.0, 65867.0, 65868.0],       [65869.0, 65870.0, 65871.0, 65872.0, 65873.0, 65874.0],       [65875.0, 65876.0, 65877.0, 65878.0, 65879.0, 65880.0],       [65881.0, 65882.0, 65883.0, 65884.0, 65885.0, 65886.0],       [65887.0, 65888.0, 65889.0, 65890.0, 65891.0, 65892.0],       [65893.0, 65894.0, 65895.0, 65896.0, 65897.0, 65898.0]],      [[65899.0, 65900.0, 65901.0, 65902.0, 65903.0, 65904.0],       [65905.0, 65906.0, 65907.0, 65908.0, 65909.0, 65910.0],       [65911.0, 65912.0, 65913.0, 65914.0, 65915.0, 65916.0],       [65917.0, 65918.0, 65919.0, 65920.0, 65921.0, 65922.0],       [65923.0, 65924.0, 65925.0, 65926.0, 65927.0, 65928.0],       [65929.0, 65930.0, 65931.0, 65932.0, 65933.0, 65934.0],       [65935.0, 65936.0, 65937.0, 65938.0, 65939.0, 65940.0]],      [[65941.0, 65942.0, 65943.0, 65944.0, 65945.0, 65946.0],       [65947.0, 65948.0, 65949.0, 65950.0, 65951.0, 65952.0],       [65953.0, 65954.0, 65955.0, 65956.0, 65957.0, 65958.0],       [65959.0, 65960.0, 65961.0, 65962.0, 65963.0, 65964.0],       [65965.0, 65966.0, 65967.0, 65968.0, 65969.0, 65970.0],       [65971.0, 65972.0, 65973.0, 65974.0, 65975.0, 65976.0],       [65977.0, 65978.0, 65979.0, 65980.0, 65981.0, 65982.0]],      [[65983.0, 65984.0, 65985.0, 65986.0, 65987.0, 65988.0],       [65989.0, 65990.0, 65991.0, 65992.0, 65993.0, 65994.0],       [65995.0, 65996.0, 65997.0, 65998.0, 65999.0, 66000.0],       [66001.0, 66002.0, 66003.0, 66004.0, 66005.0, 66006.0],       [66007.0, 66008.0, 66009.0, 66010.0, 66011.0, 66012.0],       [66013.0, 66014.0, 66015.0, 66016.0, 66017.0, 66018.0],       [66019.0, 66020.0, 66021.0, 66022.0, 66023.0, 66024.0]]],     [[[66025.0, 66026.0, 66027.0, 66028.0, 66029.0, 66030.0],       [66031.0, 66032.0, 66033.0, 66034.0, 66035.0, 66036.0],       [66037.0, 66038.0, 66039.0, 66040.0, 66041.0, 66042.0],       [66043.0, 66044.0, 66045.0, 66046.0, 66047.0, 66048.0],       [66049.0, 66050.0, 66051.0, 66052.0, 66053.0, 66054.0],       [66055.0, 66056.0, 66057.0, 66058.0, 66059.0, 66060.0],       [66061.0, 66062.0, 66063.0, 66064.0, 66065.0, 66066.0]],      [[66067.0, 66068.0, 66069.0, 66070.0, 66071.0, 66072.0],       [66073.0, 66074.0, 66075.0, 66076.0, 66077.0, 66078.0],       [66079.0, 66080.0, 66081.0, 66082.0, 66083.0, 66084.0],       [66085.0, 66086.0, 66087.0, 66088.0, 66089.0, 66090.0],       [66091.0, 66092.0, 66093.0, 66094.0, 66095.0, 66096.0],       [66097.0, 66098.0, 66099.0, 66100.0, 66101.0, 66102.0],       [66103.0, 66104.0, 66105.0, 66106.0, 66107.0, 66108.0]],      [[66109.0, 66110.0, 66111.0, 66112.0, 66113.0, 66114.0],       [66115.0, 66116.0, 66117.0, 66118.0, 66119.0, 66120.0],       [66121.0, 66122.0, 66123.0, 66124.0, 66125.0, 66126.0],       [66127.0, 66128.0, 66129.0, 66130.0, 66131.0, 66132.0],       [66133.0, 66134.0, 66135.0, 66136.0, 66137.0, 66138.0],       [66139.0, 66140.0, 66141.0, 66142.0, 66143.0, 66144.0],       [66145.0, 66146.0, 66147.0, 66148.0, 66149.0, 66150.0]],      [[66151.0, 66152.0, 66153.0, 66154.0, 66155.0, 66156.0],       [66157.0, 66158.0, 66159.0, 66160.0, 66161.0, 66162.0],       [66163.0, 66164.0, 66165.0, 66166.0, 66167.0, 66168.0],       [66169.0, 66170.0, 66171.0, 66172.0, 66173.0, 66174.0],       [66175.0, 66176.0, 66177.0, 66178.0, 66179.0, 66180.0],       [66181.0, 66182.0, 66183.0, 66184.0, 66185.0, 66186.0],       [66187.0, 66188.0, 66189.0, 66190.0, 66191.0, 66192.0]],      [[66193.0, 66194.0, 66195.0, 66196.0, 66197.0, 66198.0],       [66199.0, 66200.0, 66201.0, 66202.0, 66203.0, 66204.0],       [66205.0, 66206.0, 66207.0, 66208.0, 66209.0, 66210.0],       [66211.0, 66212.0, 66213.0, 66214.0, 66215.0, 66216.0],       [66217.0, 66218.0, 66219.0, 66220.0, 66221.0, 66222.0],       [66223.0, 66224.0, 66225.0, 66226.0, 66227.0, 66228.0],       [66229.0, 66230.0, 66231.0, 66232.0, 66233.0, 66234.0]],      [[66235.0, 66236.0, 66237.0, 66238.0, 66239.0, 66240.0],       [66241.0, 66242.0, 66243.0, 66244.0, 66245.0, 66246.0],       [66247.0, 66248.0, 66249.0, 66250.0, 66251.0, 66252.0],       [66253.0, 66254.0, 66255.0, 66256.0, 66257.0, 66258.0],       [66259.0, 66260.0, 66261.0, 66262.0, 66263.0, 66264.0],       [66265.0, 66266.0, 66267.0, 66268.0, 66269.0, 66270.0],       [66271.0, 66272.0, 66273.0, 66274.0, 66275.0, 66276.0]]],     [[[66277.0, 66278.0, 66279.0, 66280.0, 66281.0, 66282.0],       [66283.0, 66284.0, 66285.0, 66286.0, 66287.0, 66288.0],       [66289.0, 66290.0, 66291.0, 66292.0, 66293.0, 66294.0],       [66295.0, 66296.0, 66297.0, 66298.0, 66299.0, 66300.0],       [66301.0, 66302.0, 66303.0, 66304.0, 66305.0, 66306.0],       [66307.0, 66308.0, 66309.0, 66310.0, 66311.0, 66312.0],       [66313.0, 66314.0, 66315.0, 66316.0, 66317.0, 66318.0]],      [[66319.0, 66320.0, 66321.0, 66322.0, 66323.0, 66324.0],       [66325.0, 66326.0, 66327.0, 66328.0, 66329.0, 66330.0],       [66331.0, 66332.0, 66333.0, 66334.0, 66335.0, 66336.0],       [66337.0, 66338.0, 66339.0, 66340.0, 66341.0, 66342.0],       [66343.0, 66344.0, 66345.0, 66346.0, 66347.0, 66348.0],       [66349.0, 66350.0, 66351.0, 66352.0, 66353.0, 66354.0],       [66355.0, 66356.0, 66357.0, 66358.0, 66359.0, 66360.0]],      [[66361.0, 66362.0, 66363.0, 66364.0, 66365.0, 66366.0],       [66367.0, 66368.0, 66369.0, 66370.0, 66371.0, 66372.0],       [66373.0, 66374.0, 66375.0, 66376.0, 66377.0, 66378.0],       [66379.0, 66380.0, 66381.0, 66382.0, 66383.0, 66384.0],       [66385.0, 66386.0, 66387.0, 66388.0, 66389.0, 66390.0],       [66391.0, 66392.0, 66393.0, 66394.0, 66395.0, 66396.0],       [66397.0, 66398.0, 66399.0, 66400.0, 66401.0, 66402.0]],      [[66403.0, 66404.0, 66405.0, 66406.0, 66407.0, 66408.0],       [66409.0, 66410.0, 66411.0, 66412.0, 66413.0, 66414.0],       [66415.0, 66416.0, 66417.0, 66418.0, 66419.0, 66420.0],       [66421.0, 66422.0, 66423.0, 66424.0, 66425.0, 66426.0],       [66427.0, 66428.0, 66429.0, 66430.0, 66431.0, 66432.0],       [66433.0, 66434.0, 66435.0, 66436.0, 66437.0, 66438.0],       [66439.0, 66440.0, 66441.0, 66442.0, 66443.0, 66444.0]],      [[66445.0, 66446.0, 66447.0, 66448.0, 66449.0, 66450.0],       [66451.0, 66452.0, 66453.0, 66454.0, 66455.0, 66456.0],       [66457.0, 66458.0, 66459.0, 66460.0, 66461.0, 66462.0],       [66463.0, 66464.0, 66465.0, 66466.0, 66467.0, 66468.0],       [66469.0, 66470.0, 66471.0, 66472.0, 66473.0, 66474.0],       [66475.0, 66476.0, 66477.0, 66478.0, 66479.0, 66480.0],       [66481.0, 66482.0, 66483.0, 66484.0, 66485.0, 66486.0]],      [[66487.0, 66488.0, 66489.0, 66490.0, 66491.0, 66492.0],       [66493.0, 66494.0, 66495.0, 66496.0, 66497.0, 66498.0],       [66499.0, 66500.0, 66501.0, 66502.0, 66503.0, 66504.0],       [66505.0, 66506.0, 66507.0, 66508.0, 66509.0, 66510.0],       [66511.0, 66512.0, 66513.0, 66514.0, 66515.0, 66516.0],       [66517.0, 66518.0, 66519.0, 66520.0, 66521.0, 66522.0],       [66523.0, 66524.0, 66525.0, 66526.0, 66527.0, 66528.0]]]]],   [[[[[66529.0, 66530.0, 66531.0, 66532.0, 66533.0, 66534.0],       [66535.0, 66536.0, 66537.0, 66538.0, 66539.0, 66540.0],       [66541.0, 66542.0, 66543.0, 66544.0, 66545.0, 66546.0],       [66547.0, 66548.0, 66549.0, 66550.0, 66551.0, 66552.0],       [66553.0, 66554.0, 66555.0, 66556.0, 66557.0, 66558.0],       [66559.0, 66560.0, 66561.0, 66562.0, 66563.0, 66564.0],       [66565.0, 66566.0, 66567.0, 66568.0, 66569.0, 66570.0]],      [[66571.0, 66572.0, 66573.0, 66574.0, 66575.0, 66576.0],       [66577.0, 66578.0, 66579.0, 66580.0, 66581.0, 66582.0],       [66583.0, 66584.0, 66585.0, 66586.0, 66587.0, 66588.0],       [66589.0, 66590.0, 66591.0, 66592.0, 66593.0, 66594.0],       [66595.0, 66596.0, 66597.0, 66598.0, 66599.0, 66600.0],       [66601.0, 66602.0, 66603.0, 66604.0, 66605.0, 66606.0],       [66607.0, 66608.0, 66609.0, 66610.0, 66611.0, 66612.0]],      [[66613.0, 66614.0, 66615.0, 66616.0, 66617.0, 66618.0],       [66619.0, 66620.0, 66621.0, 66622.0, 66623.0, 66624.0],       [66625.0, 66626.0, 66627.0, 66628.0, 66629.0, 66630.0],       [66631.0, 66632.0, 66633.0, 66634.0, 66635.0, 66636.0],       [66637.0, 66638.0, 66639.0, 66640.0, 66641.0, 66642.0],       [66643.0, 66644.0, 66645.0, 66646.0, 66647.0, 66648.0],       [66649.0, 66650.0, 66651.0, 66652.0, 66653.0, 66654.0]],      [[66655.0, 66656.0, 66657.0, 66658.0, 66659.0, 66660.0],       [66661.0, 66662.0, 66663.0, 66664.0, 66665.0, 66666.0],       [66667.0, 66668.0, 66669.0, 66670.0, 66671.0, 66672.0],       [66673.0, 66674.0, 66675.0, 66676.0, 66677.0, 66678.0],       [66679.0, 66680.0, 66681.0, 66682.0, 66683.0, 66684.0],       [66685.0, 66686.0, 66687.0, 66688.0, 66689.0, 66690.0],       [66691.0, 66692.0, 66693.0, 66694.0, 66695.0, 66696.0]],      [[66697.0, 66698.0, 66699.0, 66700.0, 66701.0, 66702.0],       [66703.0, 66704.0, 66705.0, 66706.0, 66707.0, 66708.0],       [66709.0, 66710.0, 66711.0, 66712.0, 66713.0, 66714.0],       [66715.0, 66716.0, 66717.0, 66718.0, 66719.0, 66720.0],       [66721.0, 66722.0, 66723.0, 66724.0, 66725.0, 66726.0],       [66727.0, 66728.0, 66729.0, 66730.0, 66731.0, 66732.0],       [66733.0, 66734.0, 66735.0, 66736.0, 66737.0, 66738.0]],      [[66739.0, 66740.0, 66741.0, 66742.0, 66743.0, 66744.0],       [66745.0, 66746.0, 66747.0, 66748.0, 66749.0, 66750.0],       [66751.0, 66752.0, 66753.0, 66754.0, 66755.0, 66756.0],       [66757.0, 66758.0, 66759.0, 66760.0, 66761.0, 66762.0],       [66763.0, 66764.0, 66765.0, 66766.0, 66767.0, 66768.0],       [66769.0, 66770.0, 66771.0, 66772.0, 66773.0, 66774.0],       [66775.0, 66776.0, 66777.0, 66778.0, 66779.0, 66780.0]]],     [[[66781.0, 66782.0, 66783.0, 66784.0, 66785.0, 66786.0],       [66787.0, 66788.0, 66789.0, 66790.0, 66791.0, 66792.0],       [66793.0, 66794.0, 66795.0, 66796.0, 66797.0, 66798.0],       [66799.0, 66800.0, 66801.0, 66802.0, 66803.0, 66804.0],       [66805.0, 66806.0, 66807.0, 66808.0, 66809.0, 66810.0],       [66811.0, 66812.0, 66813.0, 66814.0, 66815.0, 66816.0],       [66817.0, 66818.0, 66819.0, 66820.0, 66821.0, 66822.0]],      [[66823.0, 66824.0, 66825.0, 66826.0, 66827.0, 66828.0],       [66829.0, 66830.0, 66831.0, 66832.0, 66833.0, 66834.0],       [66835.0, 66836.0, 66837.0, 66838.0, 66839.0, 66840.0],       [66841.0, 66842.0, 66843.0, 66844.0, 66845.0, 66846.0],       [66847.0, 66848.0, 66849.0, 66850.0, 66851.0, 66852.0],       [66853.0, 66854.0, 66855.0, 66856.0, 66857.0, 66858.0],       [66859.0, 66860.0, 66861.0, 66862.0, 66863.0, 66864.0]],      [[66865.0, 66866.0, 66867.0, 66868.0, 66869.0, 66870.0],       [66871.0, 66872.0, 66873.0, 66874.0, 66875.0, 66876.0],       [66877.0, 66878.0, 66879.0, 66880.0, 66881.0, 66882.0],       [66883.0, 66884.0, 66885.0, 66886.0, 66887.0, 66888.0],       [66889.0, 66890.0, 66891.0, 66892.0, 66893.0, 66894.0],       [66895.0, 66896.0, 66897.0, 66898.0, 66899.0, 66900.0],       [66901.0, 66902.0, 66903.0, 66904.0, 66905.0, 66906.0]],      [[66907.0, 66908.0, 66909.0, 66910.0, 66911.0, 66912.0],       [66913.0, 66914.0, 66915.0, 66916.0, 66917.0, 66918.0],       [66919.0, 66920.0, 66921.0, 66922.0, 66923.0, 66924.0],       [66925.0, 66926.0, 66927.0, 66928.0, 66929.0, 66930.0],       [66931.0, 66932.0, 66933.0, 66934.0, 66935.0, 66936.0],       [66937.0, 66938.0, 66939.0, 66940.0, 66941.0, 66942.0],       [66943.0, 66944.0, 66945.0, 66946.0, 66947.0, 66948.0]],      [[66949.0, 66950.0, 66951.0, 66952.0, 66953.0, 66954.0],       [66955.0, 66956.0, 66957.0, 66958.0, 66959.0, 66960.0],       [66961.0, 66962.0, 66963.0, 66964.0, 66965.0, 66966.0],       [66967.0, 66968.0, 66969.0, 66970.0, 66971.0, 66972.0],       [66973.0, 66974.0, 66975.0, 66976.0, 66977.0, 66978.0],       [66979.0, 66980.0, 66981.0, 66982.0, 66983.0, 66984.0],       [66985.0, 66986.0, 66987.0, 66988.0, 66989.0, 66990.0]],      [[66991.0, 66992.0, 66993.0, 66994.0, 66995.0, 66996.0],       [66997.0, 66998.0, 66999.0, 67000.0, 67001.0, 67002.0],       [67003.0, 67004.0, 67005.0, 67006.0, 67007.0, 67008.0],       [67009.0, 67010.0, 67011.0, 67012.0, 67013.0, 67014.0],       [67015.0, 67016.0, 67017.0, 67018.0, 67019.0, 67020.0],       [67021.0, 67022.0, 67023.0, 67024.0, 67025.0, 67026.0],       [67027.0, 67028.0, 67029.0, 67030.0, 67031.0, 67032.0]]],     [[[67033.0, 67034.0, 67035.0, 67036.0, 67037.0, 67038.0],       [67039.0, 67040.0, 67041.0, 67042.0, 67043.0, 67044.0],       [67045.0, 67046.0, 67047.0, 67048.0, 67049.0, 67050.0],       [67051.0, 67052.0, 67053.0, 67054.0, 67055.0, 67056.0],       [67057.0, 67058.0, 67059.0, 67060.0, 67061.0, 67062.0],       [67063.0, 67064.0, 67065.0, 67066.0, 67067.0, 67068.0],       [67069.0, 67070.0, 67071.0, 67072.0, 67073.0, 67074.0]],      [[67075.0, 67076.0, 67077.0, 67078.0, 67079.0, 67080.0],       [67081.0, 67082.0, 67083.0, 67084.0, 67085.0, 67086.0],       [67087.0, 67088.0, 67089.0, 67090.0, 67091.0, 67092.0],       [67093.0, 67094.0, 67095.0, 67096.0, 67097.0, 67098.0],       [67099.0, 67100.0, 67101.0, 67102.0, 67103.0, 67104.0],       [67105.0, 67106.0, 67107.0, 67108.0, 67109.0, 67110.0],       [67111.0, 67112.0, 67113.0, 67114.0, 67115.0, 67116.0]],      [[67117.0, 67118.0, 67119.0, 67120.0, 67121.0, 67122.0],       [67123.0, 67124.0, 67125.0, 67126.0, 67127.0, 67128.0],       [67129.0, 67130.0, 67131.0, 67132.0, 67133.0, 67134.0],       [67135.0, 67136.0, 67137.0, 67138.0, 67139.0, 67140.0],       [67141.0, 67142.0, 67143.0, 67144.0, 67145.0, 67146.0],       [67147.0, 67148.0, 67149.0, 67150.0, 67151.0, 67152.0],       [67153.0, 67154.0, 67155.0, 67156.0, 67157.0, 67158.0]],      [[67159.0, 67160.0, 67161.0, 67162.0, 67163.0, 67164.0],       [67165.0, 67166.0, 67167.0, 67168.0, 67169.0, 67170.0],       [67171.0, 67172.0, 67173.0, 67174.0, 67175.0, 67176.0],       [67177.0, 67178.0, 67179.0, 67180.0, 67181.0, 67182.0],       [67183.0, 67184.0, 67185.0, 67186.0, 67187.0, 67188.0],       [67189.0, 67190.0, 67191.0, 67192.0, 67193.0, 67194.0],       [67195.0, 67196.0, 67197.0, 67198.0, 67199.0, 67200.0]],      [[67201.0, 67202.0, 67203.0, 67204.0, 67205.0, 67206.0],       [67207.0, 67208.0, 67209.0, 67210.0, 67211.0, 67212.0],       [67213.0, 67214.0, 67215.0, 67216.0, 67217.0, 67218.0],       [67219.0, 67220.0, 67221.0, 67222.0, 67223.0, 67224.0],       [67225.0, 67226.0, 67227.0, 67228.0, 67229.0, 67230.0],       [67231.0, 67232.0, 67233.0, 67234.0, 67235.0, 67236.0],       [67237.0, 67238.0, 67239.0, 67240.0, 67241.0, 67242.0]],      [[67243.0, 67244.0, 67245.0, 67246.0, 67247.0, 67248.0],       [67249.0, 67250.0, 67251.0, 67252.0, 67253.0, 67254.0],       [67255.0, 67256.0, 67257.0, 67258.0, 67259.0, 67260.0],       [67261.0, 67262.0, 67263.0, 67264.0, 67265.0, 67266.0],       [67267.0, 67268.0, 67269.0, 67270.0, 67271.0, 67272.0],       [67273.0, 67274.0, 67275.0, 67276.0, 67277.0, 67278.0],       [67279.0, 67280.0, 67281.0, 67282.0, 67283.0, 67284.0]]],     [[[67285.0, 67286.0, 67287.0, 67288.0, 67289.0, 67290.0],       [67291.0, 67292.0, 67293.0, 67294.0, 67295.0, 67296.0],       [67297.0, 67298.0, 67299.0, 67300.0, 67301.0, 67302.0],       [67303.0, 67304.0, 67305.0, 67306.0, 67307.0, 67308.0],       [67309.0, 67310.0, 67311.0, 67312.0, 67313.0, 67314.0],       [67315.0, 67316.0, 67317.0, 67318.0, 67319.0, 67320.0],       [67321.0, 67322.0, 67323.0, 67324.0, 67325.0, 67326.0]],      [[67327.0, 67328.0, 67329.0, 67330.0, 67331.0, 67332.0],       [67333.0, 67334.0, 67335.0, 67336.0, 67337.0, 67338.0],       [67339.0, 67340.0, 67341.0, 67342.0, 67343.0, 67344.0],       [67345.0, 67346.0, 67347.0, 67348.0, 67349.0, 67350.0],       [67351.0, 67352.0, 67353.0, 67354.0, 67355.0, 67356.0],       [67357.0, 67358.0, 67359.0, 67360.0, 67361.0, 67362.0],       [67363.0, 67364.0, 67365.0, 67366.0, 67367.0, 67368.0]],      [[67369.0, 67370.0, 67371.0, 67372.0, 67373.0, 67374.0],       [67375.0, 67376.0, 67377.0, 67378.0, 67379.0, 67380.0],       [67381.0, 67382.0, 67383.0, 67384.0, 67385.0, 67386.0],       [67387.0, 67388.0, 67389.0, 67390.0, 67391.0, 67392.0],       [67393.0, 67394.0, 67395.0, 67396.0, 67397.0, 67398.0],       [67399.0, 67400.0, 67401.0, 67402.0, 67403.0, 67404.0],       [67405.0, 67406.0, 67407.0, 67408.0, 67409.0, 67410.0]],      [[67411.0, 67412.0, 67413.0, 67414.0, 67415.0, 67416.0],       [67417.0, 67418.0, 67419.0, 67420.0, 67421.0, 67422.0],       [67423.0, 67424.0, 67425.0, 67426.0, 67427.0, 67428.0],       [67429.0, 67430.0, 67431.0, 67432.0, 67433.0, 67434.0],       [67435.0, 67436.0, 67437.0, 67438.0, 67439.0, 67440.0],       [67441.0, 67442.0, 67443.0, 67444.0, 67445.0, 67446.0],       [67447.0, 67448.0, 67449.0, 67450.0, 67451.0, 67452.0]],      [[67453.0, 67454.0, 67455.0, 67456.0, 67457.0, 67458.0],       [67459.0, 67460.0, 67461.0, 67462.0, 67463.0, 67464.0],       [67465.0, 67466.0, 67467.0, 67468.0, 67469.0, 67470.0],       [67471.0, 67472.0, 67473.0, 67474.0, 67475.0, 67476.0],       [67477.0, 67478.0, 67479.0, 67480.0, 67481.0, 67482.0],       [67483.0, 67484.0, 67485.0, 67486.0, 67487.0, 67488.0],       [67489.0, 67490.0, 67491.0, 67492.0, 67493.0, 67494.0]],      [[67495.0, 67496.0, 67497.0, 67498.0, 67499.0, 67500.0],       [67501.0, 67502.0, 67503.0, 67504.0, 67505.0, 67506.0],       [67507.0, 67508.0, 67509.0, 67510.0, 67511.0, 67512.0],       [67513.0, 67514.0, 67515.0, 67516.0, 67517.0, 67518.0],       [67519.0, 67520.0, 67521.0, 67522.0, 67523.0, 67524.0],       [67525.0, 67526.0, 67527.0, 67528.0, 67529.0, 67530.0],       [67531.0, 67532.0, 67533.0, 67534.0, 67535.0, 67536.0]]]],    [[[[67537.0, 67538.0, 67539.0, 67540.0, 67541.0, 67542.0],       [67543.0, 67544.0, 67545.0, 67546.0, 67547.0, 67548.0],       [67549.0, 67550.0, 67551.0, 67552.0, 67553.0, 67554.0],       [67555.0, 67556.0, 67557.0, 67558.0, 67559.0, 67560.0],       [67561.0, 67562.0, 67563.0, 67564.0, 67565.0, 67566.0],       [67567.0, 67568.0, 67569.0, 67570.0, 67571.0, 67572.0],       [67573.0, 67574.0, 67575.0, 67576.0, 67577.0, 67578.0]],      [[67579.0, 67580.0, 67581.0, 67582.0, 67583.0, 67584.0],       [67585.0, 67586.0, 67587.0, 67588.0, 67589.0, 67590.0],       [67591.0, 67592.0, 67593.0, 67594.0, 67595.0, 67596.0],       [67597.0, 67598.0, 67599.0, 67600.0, 67601.0, 67602.0],       [67603.0, 67604.0, 67605.0, 67606.0, 67607.0, 67608.0],       [67609.0, 67610.0, 67611.0, 67612.0, 67613.0, 67614.0],       [67615.0, 67616.0, 67617.0, 67618.0, 67619.0, 67620.0]],      [[67621.0, 67622.0, 67623.0, 67624.0, 67625.0, 67626.0],       [67627.0, 67628.0, 67629.0, 67630.0, 67631.0, 67632.0],       [67633.0, 67634.0, 67635.0, 67636.0, 67637.0, 67638.0],       [67639.0, 67640.0, 67641.0, 67642.0, 67643.0, 67644.0],       [67645.0, 67646.0, 67647.0, 67648.0, 67649.0, 67650.0],       [67651.0, 67652.0, 67653.0, 67654.0, 67655.0, 67656.0],       [67657.0, 67658.0, 67659.0, 67660.0, 67661.0, 67662.0]],      [[67663.0, 67664.0, 67665.0, 67666.0, 67667.0, 67668.0],       [67669.0, 67670.0, 67671.0, 67672.0, 67673.0, 67674.0],       [67675.0, 67676.0, 67677.0, 67678.0, 67679.0, 67680.0],       [67681.0, 67682.0, 67683.0, 67684.0, 67685.0, 67686.0],       [67687.0, 67688.0, 67689.0, 67690.0, 67691.0, 67692.0],       [67693.0, 67694.0, 67695.0, 67696.0, 67697.0, 67698.0],       [67699.0, 67700.0, 67701.0, 67702.0, 67703.0, 67704.0]],      [[67705.0, 67706.0, 67707.0, 67708.0, 67709.0, 67710.0],       [67711.0, 67712.0, 67713.0, 67714.0, 67715.0, 67716.0],       [67717.0, 67718.0, 67719.0, 67720.0, 67721.0, 67722.0],       [67723.0, 67724.0, 67725.0, 67726.0, 67727.0, 67728.0],       [67729.0, 67730.0, 67731.0, 67732.0, 67733.0, 67734.0],       [67735.0, 67736.0, 67737.0, 67738.0, 67739.0, 67740.0],       [67741.0, 67742.0, 67743.0, 67744.0, 67745.0, 67746.0]],      [[67747.0, 67748.0, 67749.0, 67750.0, 67751.0, 67752.0],       [67753.0, 67754.0, 67755.0, 67756.0, 67757.0, 67758.0],       [67759.0, 67760.0, 67761.0, 67762.0, 67763.0, 67764.0],       [67765.0, 67766.0, 67767.0, 67768.0, 67769.0, 67770.0],       [67771.0, 67772.0, 67773.0, 67774.0, 67775.0, 67776.0],       [67777.0, 67778.0, 67779.0, 67780.0, 67781.0, 67782.0],       [67783.0, 67784.0, 67785.0, 67786.0, 67787.0, 67788.0]]],     [[[67789.0, 67790.0, 67791.0, 67792.0, 67793.0, 67794.0],       [67795.0, 67796.0, 67797.0, 67798.0, 67799.0, 67800.0],       [67801.0, 67802.0, 67803.0, 67804.0, 67805.0, 67806.0],       [67807.0, 67808.0, 67809.0, 67810.0, 67811.0, 67812.0],       [67813.0, 67814.0, 67815.0, 67816.0, 67817.0, 67818.0],       [67819.0, 67820.0, 67821.0, 67822.0, 67823.0, 67824.0],       [67825.0, 67826.0, 67827.0, 67828.0, 67829.0, 67830.0]],      [[67831.0, 67832.0, 67833.0, 67834.0, 67835.0, 67836.0],       [67837.0, 67838.0, 67839.0, 67840.0, 67841.0, 67842.0],       [67843.0, 67844.0, 67845.0, 67846.0, 67847.0, 67848.0],       [67849.0, 67850.0, 67851.0, 67852.0, 67853.0, 67854.0],       [67855.0, 67856.0, 67857.0, 67858.0, 67859.0, 67860.0],       [67861.0, 67862.0, 67863.0, 67864.0, 67865.0, 67866.0],       [67867.0, 67868.0, 67869.0, 67870.0, 67871.0, 67872.0]],      [[67873.0, 67874.0, 67875.0, 67876.0, 67877.0, 67878.0],       [67879.0, 67880.0, 67881.0, 67882.0, 67883.0, 67884.0],       [67885.0, 67886.0, 67887.0, 67888.0, 67889.0, 67890.0],       [67891.0, 67892.0, 67893.0, 67894.0, 67895.0, 67896.0],       [67897.0, 67898.0, 67899.0, 67900.0, 67901.0, 67902.0],       [67903.0, 67904.0, 67905.0, 67906.0, 67907.0, 67908.0],       [67909.0, 67910.0, 67911.0, 67912.0, 67913.0, 67914.0]],      [[67915.0, 67916.0, 67917.0, 67918.0, 67919.0, 67920.0],       [67921.0, 67922.0, 67923.0, 67924.0, 67925.0, 67926.0],       [67927.0, 67928.0, 67929.0, 67930.0, 67931.0, 67932.0],       [67933.0, 67934.0, 67935.0, 67936.0, 67937.0, 67938.0],       [67939.0, 67940.0, 67941.0, 67942.0, 67943.0, 67944.0],       [67945.0, 67946.0, 67947.0, 67948.0, 67949.0, 67950.0],       [67951.0, 67952.0, 67953.0, 67954.0, 67955.0, 67956.0]],      [[67957.0, 67958.0, 67959.0, 67960.0, 67961.0, 67962.0],       [67963.0, 67964.0, 67965.0, 67966.0, 67967.0, 67968.0],       [67969.0, 67970.0, 67971.0, 67972.0, 67973.0, 67974.0],       [67975.0, 67976.0, 67977.0, 67978.0, 67979.0, 67980.0],       [67981.0, 67982.0, 67983.0, 67984.0, 67985.0, 67986.0],       [67987.0, 67988.0, 67989.0, 67990.0, 67991.0, 67992.0],       [67993.0, 67994.0, 67995.0, 67996.0, 67997.0, 67998.0]],      [[67999.0, 68000.0, 68001.0, 68002.0, 68003.0, 68004.0],       [68005.0, 68006.0, 68007.0, 68008.0, 68009.0, 68010.0],       [68011.0, 68012.0, 68013.0, 68014.0, 68015.0, 68016.0],       [68017.0, 68018.0, 68019.0, 68020.0, 68021.0, 68022.0],       [68023.0, 68024.0, 68025.0, 68026.0, 68027.0, 68028.0],       [68029.0, 68030.0, 68031.0, 68032.0, 68033.0, 68034.0],       [68035.0, 68036.0, 68037.0, 68038.0, 68039.0, 68040.0]]],     [[[68041.0, 68042.0, 68043.0, 68044.0, 68045.0, 68046.0],       [68047.0, 68048.0, 68049.0, 68050.0, 68051.0, 68052.0],       [68053.0, 68054.0, 68055.0, 68056.0, 68057.0, 68058.0],       [68059.0, 68060.0, 68061.0, 68062.0, 68063.0, 68064.0],       [68065.0, 68066.0, 68067.0, 68068.0, 68069.0, 68070.0],       [68071.0, 68072.0, 68073.0, 68074.0, 68075.0, 68076.0],       [68077.0, 68078.0, 68079.0, 68080.0, 68081.0, 68082.0]],      [[68083.0, 68084.0, 68085.0, 68086.0, 68087.0, 68088.0],       [68089.0, 68090.0, 68091.0, 68092.0, 68093.0, 68094.0],       [68095.0, 68096.0, 68097.0, 68098.0, 68099.0, 68100.0],       [68101.0, 68102.0, 68103.0, 68104.0, 68105.0, 68106.0],       [68107.0, 68108.0, 68109.0, 68110.0, 68111.0, 68112.0],       [68113.0, 68114.0, 68115.0, 68116.0, 68117.0, 68118.0],       [68119.0, 68120.0, 68121.0, 68122.0, 68123.0, 68124.0]],      [[68125.0, 68126.0, 68127.0, 68128.0, 68129.0, 68130.0],       [68131.0, 68132.0, 68133.0, 68134.0, 68135.0, 68136.0],       [68137.0, 68138.0, 68139.0, 68140.0, 68141.0, 68142.0],       [68143.0, 68144.0, 68145.0, 68146.0, 68147.0, 68148.0],       [68149.0, 68150.0, 68151.0, 68152.0, 68153.0, 68154.0],       [68155.0, 68156.0, 68157.0, 68158.0, 68159.0, 68160.0],       [68161.0, 68162.0, 68163.0, 68164.0, 68165.0, 68166.0]],      [[68167.0, 68168.0, 68169.0, 68170.0, 68171.0, 68172.0],       [68173.0, 68174.0, 68175.0, 68176.0, 68177.0, 68178.0],       [68179.0, 68180.0, 68181.0, 68182.0, 68183.0, 68184.0],       [68185.0, 68186.0, 68187.0, 68188.0, 68189.0, 68190.0],       [68191.0, 68192.0, 68193.0, 68194.0, 68195.0, 68196.0],       [68197.0, 68198.0, 68199.0, 68200.0, 68201.0, 68202.0],       [68203.0, 68204.0, 68205.0, 68206.0, 68207.0, 68208.0]],      [[68209.0, 68210.0, 68211.0, 68212.0, 68213.0, 68214.0],       [68215.0, 68216.0, 68217.0, 68218.0, 68219.0, 68220.0],       [68221.0, 68222.0, 68223.0, 68224.0, 68225.0, 68226.0],       [68227.0, 68228.0, 68229.0, 68230.0, 68231.0, 68232.0],       [68233.0, 68234.0, 68235.0, 68236.0, 68237.0, 68238.0],       [68239.0, 68240.0, 68241.0, 68242.0, 68243.0, 68244.0],       [68245.0, 68246.0, 68247.0, 68248.0, 68249.0, 68250.0]],      [[68251.0, 68252.0, 68253.0, 68254.0, 68255.0, 68256.0],       [68257.0, 68258.0, 68259.0, 68260.0, 68261.0, 68262.0],       [68263.0, 68264.0, 68265.0, 68266.0, 68267.0, 68268.0],       [68269.0, 68270.0, 68271.0, 68272.0, 68273.0, 68274.0],       [68275.0, 68276.0, 68277.0, 68278.0, 68279.0, 68280.0],       [68281.0, 68282.0, 68283.0, 68284.0, 68285.0, 68286.0],       [68287.0, 68288.0, 68289.0, 68290.0, 68291.0, 68292.0]]],     [[[68293.0, 68294.0, 68295.0, 68296.0, 68297.0, 68298.0],       [68299.0, 68300.0, 68301.0, 68302.0, 68303.0, 68304.0],       [68305.0, 68306.0, 68307.0, 68308.0, 68309.0, 68310.0],       [68311.0, 68312.0, 68313.0, 68314.0, 68315.0, 68316.0],       [68317.0, 68318.0, 68319.0, 68320.0, 68321.0, 68322.0],       [68323.0, 68324.0, 68325.0, 68326.0, 68327.0, 68328.0],       [68329.0, 68330.0, 68331.0, 68332.0, 68333.0, 68334.0]],      [[68335.0, 68336.0, 68337.0, 68338.0, 68339.0, 68340.0],       [68341.0, 68342.0, 68343.0, 68344.0, 68345.0, 68346.0],       [68347.0, 68348.0, 68349.0, 68350.0, 68351.0, 68352.0],       [68353.0, 68354.0, 68355.0, 68356.0, 68357.0, 68358.0],       [68359.0, 68360.0, 68361.0, 68362.0, 68363.0, 68364.0],       [68365.0, 68366.0, 68367.0, 68368.0, 68369.0, 68370.0],       [68371.0, 68372.0, 68373.0, 68374.0, 68375.0, 68376.0]],      [[68377.0, 68378.0, 68379.0, 68380.0, 68381.0, 68382.0],       [68383.0, 68384.0, 68385.0, 68386.0, 68387.0, 68388.0],       [68389.0, 68390.0, 68391.0, 68392.0, 68393.0, 68394.0],       [68395.0, 68396.0, 68397.0, 68398.0, 68399.0, 68400.0],       [68401.0, 68402.0, 68403.0, 68404.0, 68405.0, 68406.0],       [68407.0, 68408.0, 68409.0, 68410.0, 68411.0, 68412.0],       [68413.0, 68414.0, 68415.0, 68416.0, 68417.0, 68418.0]],      [[68419.0, 68420.0, 68421.0, 68422.0, 68423.0, 68424.0],       [68425.0, 68426.0, 68427.0, 68428.0, 68429.0, 68430.0],       [68431.0, 68432.0, 68433.0, 68434.0, 68435.0, 68436.0],       [68437.0, 68438.0, 68439.0, 68440.0, 68441.0, 68442.0],       [68443.0, 68444.0, 68445.0, 68446.0, 68447.0, 68448.0],       [68449.0, 68450.0, 68451.0, 68452.0, 68453.0, 68454.0],       [68455.0, 68456.0, 68457.0, 68458.0, 68459.0, 68460.0]],      [[68461.0, 68462.0, 68463.0, 68464.0, 68465.0, 68466.0],       [68467.0, 68468.0, 68469.0, 68470.0, 68471.0, 68472.0],       [68473.0, 68474.0, 68475.0, 68476.0, 68477.0, 68478.0],       [68479.0, 68480.0, 68481.0, 68482.0, 68483.0, 68484.0],       [68485.0, 68486.0, 68487.0, 68488.0, 68489.0, 68490.0],       [68491.0, 68492.0, 68493.0, 68494.0, 68495.0, 68496.0],       [68497.0, 68498.0, 68499.0, 68500.0, 68501.0, 68502.0]],      [[68503.0, 68504.0, 68505.0, 68506.0, 68507.0, 68508.0],       [68509.0, 68510.0, 68511.0, 68512.0, 68513.0, 68514.0],       [68515.0, 68516.0, 68517.0, 68518.0, 68519.0, 68520.0],       [68521.0, 68522.0, 68523.0, 68524.0, 68525.0, 68526.0],       [68527.0, 68528.0, 68529.0, 68530.0, 68531.0, 68532.0],       [68533.0, 68534.0, 68535.0, 68536.0, 68537.0, 68538.0],       [68539.0, 68540.0, 68541.0, 68542.0, 68543.0, 68544.0]]]],    [[[[68545.0, 68546.0, 68547.0, 68548.0, 68549.0, 68550.0],       [68551.0, 68552.0, 68553.0, 68554.0, 68555.0, 68556.0],       [68557.0, 68558.0, 68559.0, 68560.0, 68561.0, 68562.0],       [68563.0, 68564.0, 68565.0, 68566.0, 68567.0, 68568.0],       [68569.0, 68570.0, 68571.0, 68572.0, 68573.0, 68574.0],       [68575.0, 68576.0, 68577.0, 68578.0, 68579.0, 68580.0],       [68581.0, 68582.0, 68583.0, 68584.0, 68585.0, 68586.0]],      [[68587.0, 68588.0, 68589.0, 68590.0, 68591.0, 68592.0],       [68593.0, 68594.0, 68595.0, 68596.0, 68597.0, 68598.0],       [68599.0, 68600.0, 68601.0, 68602.0, 68603.0, 68604.0],       [68605.0, 68606.0, 68607.0, 68608.0, 68609.0, 68610.0],       [68611.0, 68612.0, 68613.0, 68614.0, 68615.0, 68616.0],       [68617.0, 68618.0, 68619.0, 68620.0, 68621.0, 68622.0],       [68623.0, 68624.0, 68625.0, 68626.0, 68627.0, 68628.0]],      [[68629.0, 68630.0, 68631.0, 68632.0, 68633.0, 68634.0],       [68635.0, 68636.0, 68637.0, 68638.0, 68639.0, 68640.0],       [68641.0, 68642.0, 68643.0, 68644.0, 68645.0, 68646.0],       [68647.0, 68648.0, 68649.0, 68650.0, 68651.0, 68652.0],       [68653.0, 68654.0, 68655.0, 68656.0, 68657.0, 68658.0],       [68659.0, 68660.0, 68661.0, 68662.0, 68663.0, 68664.0],       [68665.0, 68666.0, 68667.0, 68668.0, 68669.0, 68670.0]],      [[68671.0, 68672.0, 68673.0, 68674.0, 68675.0, 68676.0],       [68677.0, 68678.0, 68679.0, 68680.0, 68681.0, 68682.0],       [68683.0, 68684.0, 68685.0, 68686.0, 68687.0, 68688.0],       [68689.0, 68690.0, 68691.0, 68692.0, 68693.0, 68694.0],       [68695.0, 68696.0, 68697.0, 68698.0, 68699.0, 68700.0],       [68701.0, 68702.0, 68703.0, 68704.0, 68705.0, 68706.0],       [68707.0, 68708.0, 68709.0, 68710.0, 68711.0, 68712.0]],      [[68713.0, 68714.0, 68715.0, 68716.0, 68717.0, 68718.0],       [68719.0, 68720.0, 68721.0, 68722.0, 68723.0, 68724.0],       [68725.0, 68726.0, 68727.0, 68728.0, 68729.0, 68730.0],       [68731.0, 68732.0, 68733.0, 68734.0, 68735.0, 68736.0],       [68737.0, 68738.0, 68739.0, 68740.0, 68741.0, 68742.0],       [68743.0, 68744.0, 68745.0, 68746.0, 68747.0, 68748.0],       [68749.0, 68750.0, 68751.0, 68752.0, 68753.0, 68754.0]],      [[68755.0, 68756.0, 68757.0, 68758.0, 68759.0, 68760.0],       [68761.0, 68762.0, 68763.0, 68764.0, 68765.0, 68766.0],       [68767.0, 68768.0, 68769.0, 68770.0, 68771.0, 68772.0],       [68773.0, 68774.0, 68775.0, 68776.0, 68777.0, 68778.0],       [68779.0, 68780.0, 68781.0, 68782.0, 68783.0, 68784.0],       [68785.0, 68786.0, 68787.0, 68788.0, 68789.0, 68790.0],       [68791.0, 68792.0, 68793.0, 68794.0, 68795.0, 68796.0]]],     [[[68797.0, 68798.0, 68799.0, 68800.0, 68801.0, 68802.0],       [68803.0, 68804.0, 68805.0, 68806.0, 68807.0, 68808.0],       [68809.0, 68810.0, 68811.0, 68812.0, 68813.0, 68814.0],       [68815.0, 68816.0, 68817.0, 68818.0, 68819.0, 68820.0],       [68821.0, 68822.0, 68823.0, 68824.0, 68825.0, 68826.0],       [68827.0, 68828.0, 68829.0, 68830.0, 68831.0, 68832.0],       [68833.0, 68834.0, 68835.0, 68836.0, 68837.0, 68838.0]],      [[68839.0, 68840.0, 68841.0, 68842.0, 68843.0, 68844.0],       [68845.0, 68846.0, 68847.0, 68848.0, 68849.0, 68850.0],       [68851.0, 68852.0, 68853.0, 68854.0, 68855.0, 68856.0],       [68857.0, 68858.0, 68859.0, 68860.0, 68861.0, 68862.0],       [68863.0, 68864.0, 68865.0, 68866.0, 68867.0, 68868.0],       [68869.0, 68870.0, 68871.0, 68872.0, 68873.0, 68874.0],       [68875.0, 68876.0, 68877.0, 68878.0, 68879.0, 68880.0]],      [[68881.0, 68882.0, 68883.0, 68884.0, 68885.0, 68886.0],       [68887.0, 68888.0, 68889.0, 68890.0, 68891.0, 68892.0],       [68893.0, 68894.0, 68895.0, 68896.0, 68897.0, 68898.0],       [68899.0, 68900.0, 68901.0, 68902.0, 68903.0, 68904.0],       [68905.0, 68906.0, 68907.0, 68908.0, 68909.0, 68910.0],       [68911.0, 68912.0, 68913.0, 68914.0, 68915.0, 68916.0],       [68917.0, 68918.0, 68919.0, 68920.0, 68921.0, 68922.0]],      [[68923.0, 68924.0, 68925.0, 68926.0, 68927.0, 68928.0],       [68929.0, 68930.0, 68931.0, 68932.0, 68933.0, 68934.0],       [68935.0, 68936.0, 68937.0, 68938.0, 68939.0, 68940.0],       [68941.0, 68942.0, 68943.0, 68944.0, 68945.0, 68946.0],       [68947.0, 68948.0, 68949.0, 68950.0, 68951.0, 68952.0],       [68953.0, 68954.0, 68955.0, 68956.0, 68957.0, 68958.0],       [68959.0, 68960.0, 68961.0, 68962.0, 68963.0, 68964.0]],      [[68965.0, 68966.0, 68967.0, 68968.0, 68969.0, 68970.0],       [68971.0, 68972.0, 68973.0, 68974.0, 68975.0, 68976.0],       [68977.0, 68978.0, 68979.0, 68980.0, 68981.0, 68982.0],       [68983.0, 68984.0, 68985.0, 68986.0, 68987.0, 68988.0],       [68989.0, 68990.0, 68991.0, 68992.0, 68993.0, 68994.0],       [68995.0, 68996.0, 68997.0, 68998.0, 68999.0, 69000.0],       [69001.0, 69002.0, 69003.0, 69004.0, 69005.0, 69006.0]],      [[69007.0, 69008.0, 69009.0, 69010.0, 69011.0, 69012.0],       [69013.0, 69014.0, 69015.0, 69016.0, 69017.0, 69018.0],       [69019.0, 69020.0, 69021.0, 69022.0, 69023.0, 69024.0],       [69025.0, 69026.0, 69027.0, 69028.0, 69029.0, 69030.0],       [69031.0, 69032.0, 69033.0, 69034.0, 69035.0, 69036.0],       [69037.0, 69038.0, 69039.0, 69040.0, 69041.0, 69042.0],       [69043.0, 69044.0, 69045.0, 69046.0, 69047.0, 69048.0]]],     [[[69049.0, 69050.0, 69051.0, 69052.0, 69053.0, 69054.0],       [69055.0, 69056.0, 69057.0, 69058.0, 69059.0, 69060.0],       [69061.0, 69062.0, 69063.0, 69064.0, 69065.0, 69066.0],       [69067.0, 69068.0, 69069.0, 69070.0, 69071.0, 69072.0],       [69073.0, 69074.0, 69075.0, 69076.0, 69077.0, 69078.0],       [69079.0, 69080.0, 69081.0, 69082.0, 69083.0, 69084.0],       [69085.0, 69086.0, 69087.0, 69088.0, 69089.0, 69090.0]],      [[69091.0, 69092.0, 69093.0, 69094.0, 69095.0, 69096.0],       [69097.0, 69098.0, 69099.0, 69100.0, 69101.0, 69102.0],       [69103.0, 69104.0, 69105.0, 69106.0, 69107.0, 69108.0],       [69109.0, 69110.0, 69111.0, 69112.0, 69113.0, 69114.0],       [69115.0, 69116.0, 69117.0, 69118.0, 69119.0, 69120.0],       [69121.0, 69122.0, 69123.0, 69124.0, 69125.0, 69126.0],       [69127.0, 69128.0, 69129.0, 69130.0, 69131.0, 69132.0]],      [[69133.0, 69134.0, 69135.0, 69136.0, 69137.0, 69138.0],       [69139.0, 69140.0, 69141.0, 69142.0, 69143.0, 69144.0],       [69145.0, 69146.0, 69147.0, 69148.0, 69149.0, 69150.0],       [69151.0, 69152.0, 69153.0, 69154.0, 69155.0, 69156.0],       [69157.0, 69158.0, 69159.0, 69160.0, 69161.0, 69162.0],       [69163.0, 69164.0, 69165.0, 69166.0, 69167.0, 69168.0],       [69169.0, 69170.0, 69171.0, 69172.0, 69173.0, 69174.0]],      [[69175.0, 69176.0, 69177.0, 69178.0, 69179.0, 69180.0],       [69181.0, 69182.0, 69183.0, 69184.0, 69185.0, 69186.0],       [69187.0, 69188.0, 69189.0, 69190.0, 69191.0, 69192.0],       [69193.0, 69194.0, 69195.0, 69196.0, 69197.0, 69198.0],       [69199.0, 69200.0, 69201.0, 69202.0, 69203.0, 69204.0],       [69205.0, 69206.0, 69207.0, 69208.0, 69209.0, 69210.0],       [69211.0, 69212.0, 69213.0, 69214.0, 69215.0, 69216.0]],      [[69217.0, 69218.0, 69219.0, 69220.0, 69221.0, 69222.0],       [69223.0, 69224.0, 69225.0, 69226.0, 69227.0, 69228.0],       [69229.0, 69230.0, 69231.0, 69232.0, 69233.0, 69234.0],       [69235.0, 69236.0, 69237.0, 69238.0, 69239.0, 69240.0],       [69241.0, 69242.0, 69243.0, 69244.0, 69245.0, 69246.0],       [69247.0, 69248.0, 69249.0, 69250.0, 69251.0, 69252.0],       [69253.0, 69254.0, 69255.0, 69256.0, 69257.0, 69258.0]],      [[69259.0, 69260.0, 69261.0, 69262.0, 69263.0, 69264.0],       [69265.0, 69266.0, 69267.0, 69268.0, 69269.0, 69270.0],       [69271.0, 69272.0, 69273.0, 69274.0, 69275.0, 69276.0],       [69277.0, 69278.0, 69279.0, 69280.0, 69281.0, 69282.0],       [69283.0, 69284.0, 69285.0, 69286.0, 69287.0, 69288.0],       [69289.0, 69290.0, 69291.0, 69292.0, 69293.0, 69294.0],       [69295.0, 69296.0, 69297.0, 69298.0, 69299.0, 69300.0]]],     [[[69301.0, 69302.0, 69303.0, 69304.0, 69305.0, 69306.0],       [69307.0, 69308.0, 69309.0, 69310.0, 69311.0, 69312.0],       [69313.0, 69314.0, 69315.0, 69316.0, 69317.0, 69318.0],       [69319.0, 69320.0, 69321.0, 69322.0, 69323.0, 69324.0],       [69325.0, 69326.0, 69327.0, 69328.0, 69329.0, 69330.0],       [69331.0, 69332.0, 69333.0, 69334.0, 69335.0, 69336.0],       [69337.0, 69338.0, 69339.0, 69340.0, 69341.0, 69342.0]],      [[69343.0, 69344.0, 69345.0, 69346.0, 69347.0, 69348.0],       [69349.0, 69350.0, 69351.0, 69352.0, 69353.0, 69354.0],       [69355.0, 69356.0, 69357.0, 69358.0, 69359.0, 69360.0],       [69361.0, 69362.0, 69363.0, 69364.0, 69365.0, 69366.0],       [69367.0, 69368.0, 69369.0, 69370.0, 69371.0, 69372.0],       [69373.0, 69374.0, 69375.0, 69376.0, 69377.0, 69378.0],       [69379.0, 69380.0, 69381.0, 69382.0, 69383.0, 69384.0]],      [[69385.0, 69386.0, 69387.0, 69388.0, 69389.0, 69390.0],       [69391.0, 69392.0, 69393.0, 69394.0, 69395.0, 69396.0],       [69397.0, 69398.0, 69399.0, 69400.0, 69401.0, 69402.0],       [69403.0, 69404.0, 69405.0, 69406.0, 69407.0, 69408.0],       [69409.0, 69410.0, 69411.0, 69412.0, 69413.0, 69414.0],       [69415.0, 69416.0, 69417.0, 69418.0, 69419.0, 69420.0],       [69421.0, 69422.0, 69423.0, 69424.0, 69425.0, 69426.0]],      [[69427.0, 69428.0, 69429.0, 69430.0, 69431.0, 69432.0],       [69433.0, 69434.0, 69435.0, 69436.0, 69437.0, 69438.0],       [69439.0, 69440.0, 69441.0, 69442.0, 69443.0, 69444.0],       [69445.0, 69446.0, 69447.0, 69448.0, 69449.0, 69450.0],       [69451.0, 69452.0, 69453.0, 69454.0, 69455.0, 69456.0],       [69457.0, 69458.0, 69459.0, 69460.0, 69461.0, 69462.0],       [69463.0, 69464.0, 69465.0, 69466.0, 69467.0, 69468.0]],      [[69469.0, 69470.0, 69471.0, 69472.0, 69473.0, 69474.0],       [69475.0, 69476.0, 69477.0, 69478.0, 69479.0, 69480.0],       [69481.0, 69482.0, 69483.0, 69484.0, 69485.0, 69486.0],       [69487.0, 69488.0, 69489.0, 69490.0, 69491.0, 69492.0],       [69493.0, 69494.0, 69495.0, 69496.0, 69497.0, 69498.0],       [69499.0, 69500.0, 69501.0, 69502.0, 69503.0, 69504.0],       [69505.0, 69506.0, 69507.0, 69508.0, 69509.0, 69510.0]],      [[69511.0, 69512.0, 69513.0, 69514.0, 69515.0, 69516.0],       [69517.0, 69518.0, 69519.0, 69520.0, 69521.0, 69522.0],       [69523.0, 69524.0, 69525.0, 69526.0, 69527.0, 69528.0],       [69529.0, 69530.0, 69531.0, 69532.0, 69533.0, 69534.0],       [69535.0, 69536.0, 69537.0, 69538.0, 69539.0, 69540.0],       [69541.0, 69542.0, 69543.0, 69544.0, 69545.0, 69546.0],       [69547.0, 69548.0, 69549.0, 69550.0, 69551.0, 69552.0]]]],    [[[[69553.0, 69554.0, 69555.0, 69556.0, 69557.0, 69558.0],       [69559.0, 69560.0, 69561.0, 69562.0, 69563.0, 69564.0],       [69565.0, 69566.0, 69567.0, 69568.0, 69569.0, 69570.0],       [69571.0, 69572.0, 69573.0, 69574.0, 69575.0, 69576.0],       [69577.0, 69578.0, 69579.0, 69580.0, 69581.0, 69582.0],       [69583.0, 69584.0, 69585.0, 69586.0, 69587.0, 69588.0],       [69589.0, 69590.0, 69591.0, 69592.0, 69593.0, 69594.0]],      [[69595.0, 69596.0, 69597.0, 69598.0, 69599.0, 69600.0],       [69601.0, 69602.0, 69603.0, 69604.0, 69605.0, 69606.0],       [69607.0, 69608.0, 69609.0, 69610.0, 69611.0, 69612.0],       [69613.0, 69614.0, 69615.0, 69616.0, 69617.0, 69618.0],       [69619.0, 69620.0, 69621.0, 69622.0, 69623.0, 69624.0],       [69625.0, 69626.0, 69627.0, 69628.0, 69629.0, 69630.0],       [69631.0, 69632.0, 69633.0, 69634.0, 69635.0, 69636.0]],      [[69637.0, 69638.0, 69639.0, 69640.0, 69641.0, 69642.0],       [69643.0, 69644.0, 69645.0, 69646.0, 69647.0, 69648.0],       [69649.0, 69650.0, 69651.0, 69652.0, 69653.0, 69654.0],       [69655.0, 69656.0, 69657.0, 69658.0, 69659.0, 69660.0],       [69661.0, 69662.0, 69663.0, 69664.0, 69665.0, 69666.0],       [69667.0, 69668.0, 69669.0, 69670.0, 69671.0, 69672.0],       [69673.0, 69674.0, 69675.0, 69676.0, 69677.0, 69678.0]],      [[69679.0, 69680.0, 69681.0, 69682.0, 69683.0, 69684.0],       [69685.0, 69686.0, 69687.0, 69688.0, 69689.0, 69690.0],       [69691.0, 69692.0, 69693.0, 69694.0, 69695.0, 69696.0],       [69697.0, 69698.0, 69699.0, 69700.0, 69701.0, 69702.0],       [69703.0, 69704.0, 69705.0, 69706.0, 69707.0, 69708.0],       [69709.0, 69710.0, 69711.0, 69712.0, 69713.0, 69714.0],       [69715.0, 69716.0, 69717.0, 69718.0, 69719.0, 69720.0]],      [[69721.0, 69722.0, 69723.0, 69724.0, 69725.0, 69726.0],       [69727.0, 69728.0, 69729.0, 69730.0, 69731.0, 69732.0],       [69733.0, 69734.0, 69735.0, 69736.0, 69737.0, 69738.0],       [69739.0, 69740.0, 69741.0, 69742.0, 69743.0, 69744.0],       [69745.0, 69746.0, 69747.0, 69748.0, 69749.0, 69750.0],       [69751.0, 69752.0, 69753.0, 69754.0, 69755.0, 69756.0],       [69757.0, 69758.0, 69759.0, 69760.0, 69761.0, 69762.0]],      [[69763.0, 69764.0, 69765.0, 69766.0, 69767.0, 69768.0],       [69769.0, 69770.0, 69771.0, 69772.0, 69773.0, 69774.0],       [69775.0, 69776.0, 69777.0, 69778.0, 69779.0, 69780.0],       [69781.0, 69782.0, 69783.0, 69784.0, 69785.0, 69786.0],       [69787.0, 69788.0, 69789.0, 69790.0, 69791.0, 69792.0],       [69793.0, 69794.0, 69795.0, 69796.0, 69797.0, 69798.0],       [69799.0, 69800.0, 69801.0, 69802.0, 69803.0, 69804.0]]],     [[[69805.0, 69806.0, 69807.0, 69808.0, 69809.0, 69810.0],       [69811.0, 69812.0, 69813.0, 69814.0, 69815.0, 69816.0],       [69817.0, 69818.0, 69819.0, 69820.0, 69821.0, 69822.0],       [69823.0, 69824.0, 69825.0, 69826.0, 69827.0, 69828.0],       [69829.0, 69830.0, 69831.0, 69832.0, 69833.0, 69834.0],       [69835.0, 69836.0, 69837.0, 69838.0, 69839.0, 69840.0],       [69841.0, 69842.0, 69843.0, 69844.0, 69845.0, 69846.0]],      [[69847.0, 69848.0, 69849.0, 69850.0, 69851.0, 69852.0],       [69853.0, 69854.0, 69855.0, 69856.0, 69857.0, 69858.0],       [69859.0, 69860.0, 69861.0, 69862.0, 69863.0, 69864.0],       [69865.0, 69866.0, 69867.0, 69868.0, 69869.0, 69870.0],       [69871.0, 69872.0, 69873.0, 69874.0, 69875.0, 69876.0],       [69877.0, 69878.0, 69879.0, 69880.0, 69881.0, 69882.0],       [69883.0, 69884.0, 69885.0, 69886.0, 69887.0, 69888.0]],      [[69889.0, 69890.0, 69891.0, 69892.0, 69893.0, 69894.0],       [69895.0, 69896.0, 69897.0, 69898.0, 69899.0, 69900.0],       [69901.0, 69902.0, 69903.0, 69904.0, 69905.0, 69906.0],       [69907.0, 69908.0, 69909.0, 69910.0, 69911.0, 69912.0],       [69913.0, 69914.0, 69915.0, 69916.0, 69917.0, 69918.0],       [69919.0, 69920.0, 69921.0, 69922.0, 69923.0, 69924.0],       [69925.0, 69926.0, 69927.0, 69928.0, 69929.0, 69930.0]],      [[69931.0, 69932.0, 69933.0, 69934.0, 69935.0, 69936.0],       [69937.0, 69938.0, 69939.0, 69940.0, 69941.0, 69942.0],       [69943.0, 69944.0, 69945.0, 69946.0, 69947.0, 69948.0],       [69949.0, 69950.0, 69951.0, 69952.0, 69953.0, 69954.0],       [69955.0, 69956.0, 69957.0, 69958.0, 69959.0, 69960.0],       [69961.0, 69962.0, 69963.0, 69964.0, 69965.0, 69966.0],       [69967.0, 69968.0, 69969.0, 69970.0, 69971.0, 69972.0]],      [[69973.0, 69974.0, 69975.0, 69976.0, 69977.0, 69978.0],       [69979.0, 69980.0, 69981.0, 69982.0, 69983.0, 69984.0],       [69985.0, 69986.0, 69987.0, 69988.0, 69989.0, 69990.0],       [69991.0, 69992.0, 69993.0, 69994.0, 69995.0, 69996.0],       [69997.0, 69998.0, 69999.0, 70000.0, 70001.0, 70002.0],       [70003.0, 70004.0, 70005.0, 70006.0, 70007.0, 70008.0],       [70009.0, 70010.0, 70011.0, 70012.0, 70013.0, 70014.0]],      [[70015.0, 70016.0, 70017.0, 70018.0, 70019.0, 70020.0],       [70021.0, 70022.0, 70023.0, 70024.0, 70025.0, 70026.0],       [70027.0, 70028.0, 70029.0, 70030.0, 70031.0, 70032.0],       [70033.0, 70034.0, 70035.0, 70036.0, 70037.0, 70038.0],       [70039.0, 70040.0, 70041.0, 70042.0, 70043.0, 70044.0],       [70045.0, 70046.0, 70047.0, 70048.0, 70049.0, 70050.0],       [70051.0, 70052.0, 70053.0, 70054.0, 70055.0, 70056.0]]],     [[[70057.0, 70058.0, 70059.0, 70060.0, 70061.0, 70062.0],       [70063.0, 70064.0, 70065.0, 70066.0, 70067.0, 70068.0],       [70069.0, 70070.0, 70071.0, 70072.0, 70073.0, 70074.0],       [70075.0, 70076.0, 70077.0, 70078.0, 70079.0, 70080.0],       [70081.0, 70082.0, 70083.0, 70084.0, 70085.0, 70086.0],       [70087.0, 70088.0, 70089.0, 70090.0, 70091.0, 70092.0],       [70093.0, 70094.0, 70095.0, 70096.0, 70097.0, 70098.0]],      [[70099.0, 70100.0, 70101.0, 70102.0, 70103.0, 70104.0],       [70105.0, 70106.0, 70107.0, 70108.0, 70109.0, 70110.0],       [70111.0, 70112.0, 70113.0, 70114.0, 70115.0, 70116.0],       [70117.0, 70118.0, 70119.0, 70120.0, 70121.0, 70122.0],       [70123.0, 70124.0, 70125.0, 70126.0, 70127.0, 70128.0],       [70129.0, 70130.0, 70131.0, 70132.0, 70133.0, 70134.0],       [70135.0, 70136.0, 70137.0, 70138.0, 70139.0, 70140.0]],      [[70141.0, 70142.0, 70143.0, 70144.0, 70145.0, 70146.0],       [70147.0, 70148.0, 70149.0, 70150.0, 70151.0, 70152.0],       [70153.0, 70154.0, 70155.0, 70156.0, 70157.0, 70158.0],       [70159.0, 70160.0, 70161.0, 70162.0, 70163.0, 70164.0],       [70165.0, 70166.0, 70167.0, 70168.0, 70169.0, 70170.0],       [70171.0, 70172.0, 70173.0, 70174.0, 70175.0, 70176.0],       [70177.0, 70178.0, 70179.0, 70180.0, 70181.0, 70182.0]],      [[70183.0, 70184.0, 70185.0, 70186.0, 70187.0, 70188.0],       [70189.0, 70190.0, 70191.0, 70192.0, 70193.0, 70194.0],       [70195.0, 70196.0, 70197.0, 70198.0, 70199.0, 70200.0],       [70201.0, 70202.0, 70203.0, 70204.0, 70205.0, 70206.0],       [70207.0, 70208.0, 70209.0, 70210.0, 70211.0, 70212.0],       [70213.0, 70214.0, 70215.0, 70216.0, 70217.0, 70218.0],       [70219.0, 70220.0, 70221.0, 70222.0, 70223.0, 70224.0]],      [[70225.0, 70226.0, 70227.0, 70228.0, 70229.0, 70230.0],       [70231.0, 70232.0, 70233.0, 70234.0, 70235.0, 70236.0],       [70237.0, 70238.0, 70239.0, 70240.0, 70241.0, 70242.0],       [70243.0, 70244.0, 70245.0, 70246.0, 70247.0, 70248.0],       [70249.0, 70250.0, 70251.0, 70252.0, 70253.0, 70254.0],       [70255.0, 70256.0, 70257.0, 70258.0, 70259.0, 70260.0],       [70261.0, 70262.0, 70263.0, 70264.0, 70265.0, 70266.0]],      [[70267.0, 70268.0, 70269.0, 70270.0, 70271.0, 70272.0],       [70273.0, 70274.0, 70275.0, 70276.0, 70277.0, 70278.0],       [70279.0, 70280.0, 70281.0, 70282.0, 70283.0, 70284.0],       [70285.0, 70286.0, 70287.0, 70288.0, 70289.0, 70290.0],       [70291.0, 70292.0, 70293.0, 70294.0, 70295.0, 70296.0],       [70297.0, 70298.0, 70299.0, 70300.0, 70301.0, 70302.0],       [70303.0, 70304.0, 70305.0, 70306.0, 70307.0, 70308.0]]],     [[[70309.0, 70310.0, 70311.0, 70312.0, 70313.0, 70314.0],       [70315.0, 70316.0, 70317.0, 70318.0, 70319.0, 70320.0],       [70321.0, 70322.0, 70323.0, 70324.0, 70325.0, 70326.0],       [70327.0, 70328.0, 70329.0, 70330.0, 70331.0, 70332.0],       [70333.0, 70334.0, 70335.0, 70336.0, 70337.0, 70338.0],       [70339.0, 70340.0, 70341.0, 70342.0, 70343.0, 70344.0],       [70345.0, 70346.0, 70347.0, 70348.0, 70349.0, 70350.0]],      [[70351.0, 70352.0, 70353.0, 70354.0, 70355.0, 70356.0],       [70357.0, 70358.0, 70359.0, 70360.0, 70361.0, 70362.0],       [70363.0, 70364.0, 70365.0, 70366.0, 70367.0, 70368.0],       [70369.0, 70370.0, 70371.0, 70372.0, 70373.0, 70374.0],       [70375.0, 70376.0, 70377.0, 70378.0, 70379.0, 70380.0],       [70381.0, 70382.0, 70383.0, 70384.0, 70385.0, 70386.0],       [70387.0, 70388.0, 70389.0, 70390.0, 70391.0, 70392.0]],      [[70393.0, 70394.0, 70395.0, 70396.0, 70397.0, 70398.0],       [70399.0, 70400.0, 70401.0, 70402.0, 70403.0, 70404.0],       [70405.0, 70406.0, 70407.0, 70408.0, 70409.0, 70410.0],       [70411.0, 70412.0, 70413.0, 70414.0, 70415.0, 70416.0],       [70417.0, 70418.0, 70419.0, 70420.0, 70421.0, 70422.0],       [70423.0, 70424.0, 70425.0, 70426.0, 70427.0, 70428.0],       [70429.0, 70430.0, 70431.0, 70432.0, 70433.0, 70434.0]],      [[70435.0, 70436.0, 70437.0, 70438.0, 70439.0, 70440.0],       [70441.0, 70442.0, 70443.0, 70444.0, 70445.0, 70446.0],       [70447.0, 70448.0, 70449.0, 70450.0, 70451.0, 70452.0],       [70453.0, 70454.0, 70455.0, 70456.0, 70457.0, 70458.0],       [70459.0, 70460.0, 70461.0, 70462.0, 70463.0, 70464.0],       [70465.0, 70466.0, 70467.0, 70468.0, 70469.0, 70470.0],       [70471.0, 70472.0, 70473.0, 70474.0, 70475.0, 70476.0]],      [[70477.0, 70478.0, 70479.0, 70480.0, 70481.0, 70482.0],       [70483.0, 70484.0, 70485.0, 70486.0, 70487.0, 70488.0],       [70489.0, 70490.0, 70491.0, 70492.0, 70493.0, 70494.0],       [70495.0, 70496.0, 70497.0, 70498.0, 70499.0, 70500.0],       [70501.0, 70502.0, 70503.0, 70504.0, 70505.0, 70506.0],       [70507.0, 70508.0, 70509.0, 70510.0, 70511.0, 70512.0],       [70513.0, 70514.0, 70515.0, 70516.0, 70517.0, 70518.0]],      [[70519.0, 70520.0, 70521.0, 70522.0, 70523.0, 70524.0],       [70525.0, 70526.0, 70527.0, 70528.0, 70529.0, 70530.0],       [70531.0, 70532.0, 70533.0, 70534.0, 70535.0, 70536.0],       [70537.0, 70538.0, 70539.0, 70540.0, 70541.0, 70542.0],       [70543.0, 70544.0, 70545.0, 70546.0, 70547.0, 70548.0],       [70549.0, 70550.0, 70551.0, 70552.0, 70553.0, 70554.0],       [70555.0, 70556.0, 70557.0, 70558.0, 70559.0, 70560.0]]]],    [[[[70561.0, 70562.0, 70563.0, 70564.0, 70565.0, 70566.0],       [70567.0, 70568.0, 70569.0, 70570.0, 70571.0, 70572.0],       [70573.0, 70574.0, 70575.0, 70576.0, 70577.0, 70578.0],       [70579.0, 70580.0, 70581.0, 70582.0, 70583.0, 70584.0],       [70585.0, 70586.0, 70587.0, 70588.0, 70589.0, 70590.0],       [70591.0, 70592.0, 70593.0, 70594.0, 70595.0, 70596.0],       [70597.0, 70598.0, 70599.0, 70600.0, 70601.0, 70602.0]],      [[70603.0, 70604.0, 70605.0, 70606.0, 70607.0, 70608.0],       [70609.0, 70610.0, 70611.0, 70612.0, 70613.0, 70614.0],       [70615.0, 70616.0, 70617.0, 70618.0, 70619.0, 70620.0],       [70621.0, 70622.0, 70623.0, 70624.0, 70625.0, 70626.0],       [70627.0, 70628.0, 70629.0, 70630.0, 70631.0, 70632.0],       [70633.0, 70634.0, 70635.0, 70636.0, 70637.0, 70638.0],       [70639.0, 70640.0, 70641.0, 70642.0, 70643.0, 70644.0]],      [[70645.0, 70646.0, 70647.0, 70648.0, 70649.0, 70650.0],       [70651.0, 70652.0, 70653.0, 70654.0, 70655.0, 70656.0],       [70657.0, 70658.0, 70659.0, 70660.0, 70661.0, 70662.0],       [70663.0, 70664.0, 70665.0, 70666.0, 70667.0, 70668.0],       [70669.0, 70670.0, 70671.0, 70672.0, 70673.0, 70674.0],       [70675.0, 70676.0, 70677.0, 70678.0, 70679.0, 70680.0],       [70681.0, 70682.0, 70683.0, 70684.0, 70685.0, 70686.0]],      [[70687.0, 70688.0, 70689.0, 70690.0, 70691.0, 70692.0],       [70693.0, 70694.0, 70695.0, 70696.0, 70697.0, 70698.0],       [70699.0, 70700.0, 70701.0, 70702.0, 70703.0, 70704.0],       [70705.0, 70706.0, 70707.0, 70708.0, 70709.0, 70710.0],       [70711.0, 70712.0, 70713.0, 70714.0, 70715.0, 70716.0],       [70717.0, 70718.0, 70719.0, 70720.0, 70721.0, 70722.0],       [70723.0, 70724.0, 70725.0, 70726.0, 70727.0, 70728.0]],      [[70729.0, 70730.0, 70731.0, 70732.0, 70733.0, 70734.0],       [70735.0, 70736.0, 70737.0, 70738.0, 70739.0, 70740.0],       [70741.0, 70742.0, 70743.0, 70744.0, 70745.0, 70746.0],       [70747.0, 70748.0, 70749.0, 70750.0, 70751.0, 70752.0],       [70753.0, 70754.0, 70755.0, 70756.0, 70757.0, 70758.0],       [70759.0, 70760.0, 70761.0, 70762.0, 70763.0, 70764.0],       [70765.0, 70766.0, 70767.0, 70768.0, 70769.0, 70770.0]],      [[70771.0, 70772.0, 70773.0, 70774.0, 70775.0, 70776.0],       [70777.0, 70778.0, 70779.0, 70780.0, 70781.0, 70782.0],       [70783.0, 70784.0, 70785.0, 70786.0, 70787.0, 70788.0],       [70789.0, 70790.0, 70791.0, 70792.0, 70793.0, 70794.0],       [70795.0, 70796.0, 70797.0, 70798.0, 70799.0, 70800.0],       [70801.0, 70802.0, 70803.0, 70804.0, 70805.0, 70806.0],       [70807.0, 70808.0, 70809.0, 70810.0, 70811.0, 70812.0]]],     [[[70813.0, 70814.0, 70815.0, 70816.0, 70817.0, 70818.0],       [70819.0, 70820.0, 70821.0, 70822.0, 70823.0, 70824.0],       [70825.0, 70826.0, 70827.0, 70828.0, 70829.0, 70830.0],       [70831.0, 70832.0, 70833.0, 70834.0, 70835.0, 70836.0],       [70837.0, 70838.0, 70839.0, 70840.0, 70841.0, 70842.0],       [70843.0, 70844.0, 70845.0, 70846.0, 70847.0, 70848.0],       [70849.0, 70850.0, 70851.0, 70852.0, 70853.0, 70854.0]],      [[70855.0, 70856.0, 70857.0, 70858.0, 70859.0, 70860.0],       [70861.0, 70862.0, 70863.0, 70864.0, 70865.0, 70866.0],       [70867.0, 70868.0, 70869.0, 70870.0, 70871.0, 70872.0],       [70873.0, 70874.0, 70875.0, 70876.0, 70877.0, 70878.0],       [70879.0, 70880.0, 70881.0, 70882.0, 70883.0, 70884.0],       [70885.0, 70886.0, 70887.0, 70888.0, 70889.0, 70890.0],       [70891.0, 70892.0, 70893.0, 70894.0, 70895.0, 70896.0]],      [[70897.0, 70898.0, 70899.0, 70900.0, 70901.0, 70902.0],       [70903.0, 70904.0, 70905.0, 70906.0, 70907.0, 70908.0],       [70909.0, 70910.0, 70911.0, 70912.0, 70913.0, 70914.0],       [70915.0, 70916.0, 70917.0, 70918.0, 70919.0, 70920.0],       [70921.0, 70922.0, 70923.0, 70924.0, 70925.0, 70926.0],       [70927.0, 70928.0, 70929.0, 70930.0, 70931.0, 70932.0],       [70933.0, 70934.0, 70935.0, 70936.0, 70937.0, 70938.0]],      [[70939.0, 70940.0, 70941.0, 70942.0, 70943.0, 70944.0],       [70945.0, 70946.0, 70947.0, 70948.0, 70949.0, 70950.0],       [70951.0, 70952.0, 70953.0, 70954.0, 70955.0, 70956.0],       [70957.0, 70958.0, 70959.0, 70960.0, 70961.0, 70962.0],       [70963.0, 70964.0, 70965.0, 70966.0, 70967.0, 70968.0],       [70969.0, 70970.0, 70971.0, 70972.0, 70973.0, 70974.0],       [70975.0, 70976.0, 70977.0, 70978.0, 70979.0, 70980.0]],      [[70981.0, 70982.0, 70983.0, 70984.0, 70985.0, 70986.0],       [70987.0, 70988.0, 70989.0, 70990.0, 70991.0, 70992.0],       [70993.0, 70994.0, 70995.0, 70996.0, 70997.0, 70998.0],       [70999.0, 71000.0, 71001.0, 71002.0, 71003.0, 71004.0],       [71005.0, 71006.0, 71007.0, 71008.0, 71009.0, 71010.0],       [71011.0, 71012.0, 71013.0, 71014.0, 71015.0, 71016.0],       [71017.0, 71018.0, 71019.0, 71020.0, 71021.0, 71022.0]],      [[71023.0, 71024.0, 71025.0, 71026.0, 71027.0, 71028.0],       [71029.0, 71030.0, 71031.0, 71032.0, 71033.0, 71034.0],       [71035.0, 71036.0, 71037.0, 71038.0, 71039.0, 71040.0],       [71041.0, 71042.0, 71043.0, 71044.0, 71045.0, 71046.0],       [71047.0, 71048.0, 71049.0, 71050.0, 71051.0, 71052.0],       [71053.0, 71054.0, 71055.0, 71056.0, 71057.0, 71058.0],       [71059.0, 71060.0, 71061.0, 71062.0, 71063.0, 71064.0]]],     [[[71065.0, 71066.0, 71067.0, 71068.0, 71069.0, 71070.0],       [71071.0, 71072.0, 71073.0, 71074.0, 71075.0, 71076.0],       [71077.0, 71078.0, 71079.0, 71080.0, 71081.0, 71082.0],       [71083.0, 71084.0, 71085.0, 71086.0, 71087.0, 71088.0],       [71089.0, 71090.0, 71091.0, 71092.0, 71093.0, 71094.0],       [71095.0, 71096.0, 71097.0, 71098.0, 71099.0, 71100.0],       [71101.0, 71102.0, 71103.0, 71104.0, 71105.0, 71106.0]],      [[71107.0, 71108.0, 71109.0, 71110.0, 71111.0, 71112.0],       [71113.0, 71114.0, 71115.0, 71116.0, 71117.0, 71118.0],       [71119.0, 71120.0, 71121.0, 71122.0, 71123.0, 71124.0],       [71125.0, 71126.0, 71127.0, 71128.0, 71129.0, 71130.0],       [71131.0, 71132.0, 71133.0, 71134.0, 71135.0, 71136.0],       [71137.0, 71138.0, 71139.0, 71140.0, 71141.0, 71142.0],       [71143.0, 71144.0, 71145.0, 71146.0, 71147.0, 71148.0]],      [[71149.0, 71150.0, 71151.0, 71152.0, 71153.0, 71154.0],       [71155.0, 71156.0, 71157.0, 71158.0, 71159.0, 71160.0],       [71161.0, 71162.0, 71163.0, 71164.0, 71165.0, 71166.0],       [71167.0, 71168.0, 71169.0, 71170.0, 71171.0, 71172.0],       [71173.0, 71174.0, 71175.0, 71176.0, 71177.0, 71178.0],       [71179.0, 71180.0, 71181.0, 71182.0, 71183.0, 71184.0],       [71185.0, 71186.0, 71187.0, 71188.0, 71189.0, 71190.0]],      [[71191.0, 71192.0, 71193.0, 71194.0, 71195.0, 71196.0],       [71197.0, 71198.0, 71199.0, 71200.0, 71201.0, 71202.0],       [71203.0, 71204.0, 71205.0, 71206.0, 71207.0, 71208.0],       [71209.0, 71210.0, 71211.0, 71212.0, 71213.0, 71214.0],       [71215.0, 71216.0, 71217.0, 71218.0, 71219.0, 71220.0],       [71221.0, 71222.0, 71223.0, 71224.0, 71225.0, 71226.0],       [71227.0, 71228.0, 71229.0, 71230.0, 71231.0, 71232.0]],      [[71233.0, 71234.0, 71235.0, 71236.0, 71237.0, 71238.0],       [71239.0, 71240.0, 71241.0, 71242.0, 71243.0, 71244.0],       [71245.0, 71246.0, 71247.0, 71248.0, 71249.0, 71250.0],       [71251.0, 71252.0, 71253.0, 71254.0, 71255.0, 71256.0],       [71257.0, 71258.0, 71259.0, 71260.0, 71261.0, 71262.0],       [71263.0, 71264.0, 71265.0, 71266.0, 71267.0, 71268.0],       [71269.0, 71270.0, 71271.0, 71272.0, 71273.0, 71274.0]],      [[71275.0, 71276.0, 71277.0, 71278.0, 71279.0, 71280.0],       [71281.0, 71282.0, 71283.0, 71284.0, 71285.0, 71286.0],       [71287.0, 71288.0, 71289.0, 71290.0, 71291.0, 71292.0],       [71293.0, 71294.0, 71295.0, 71296.0, 71297.0, 71298.0],       [71299.0, 71300.0, 71301.0, 71302.0, 71303.0, 71304.0],       [71305.0, 71306.0, 71307.0, 71308.0, 71309.0, 71310.0],       [71311.0, 71312.0, 71313.0, 71314.0, 71315.0, 71316.0]]],     [[[71317.0, 71318.0, 71319.0, 71320.0, 71321.0, 71322.0],       [71323.0, 71324.0, 71325.0, 71326.0, 71327.0, 71328.0],       [71329.0, 71330.0, 71331.0, 71332.0, 71333.0, 71334.0],       [71335.0, 71336.0, 71337.0, 71338.0, 71339.0, 71340.0],       [71341.0, 71342.0, 71343.0, 71344.0, 71345.0, 71346.0],       [71347.0, 71348.0, 71349.0, 71350.0, 71351.0, 71352.0],       [71353.0, 71354.0, 71355.0, 71356.0, 71357.0, 71358.0]],      [[71359.0, 71360.0, 71361.0, 71362.0, 71363.0, 71364.0],       [71365.0, 71366.0, 71367.0, 71368.0, 71369.0, 71370.0],       [71371.0, 71372.0, 71373.0, 71374.0, 71375.0, 71376.0],       [71377.0, 71378.0, 71379.0, 71380.0, 71381.0, 71382.0],       [71383.0, 71384.0, 71385.0, 71386.0, 71387.0, 71388.0],       [71389.0, 71390.0, 71391.0, 71392.0, 71393.0, 71394.0],       [71395.0, 71396.0, 71397.0, 71398.0, 71399.0, 71400.0]],      [[71401.0, 71402.0, 71403.0, 71404.0, 71405.0, 71406.0],       [71407.0, 71408.0, 71409.0, 71410.0, 71411.0, 71412.0],       [71413.0, 71414.0, 71415.0, 71416.0, 71417.0, 71418.0],       [71419.0, 71420.0, 71421.0, 71422.0, 71423.0, 71424.0],       [71425.0, 71426.0, 71427.0, 71428.0, 71429.0, 71430.0],       [71431.0, 71432.0, 71433.0, 71434.0, 71435.0, 71436.0],       [71437.0, 71438.0, 71439.0, 71440.0, 71441.0, 71442.0]],      [[71443.0, 71444.0, 71445.0, 71446.0, 71447.0, 71448.0],       [71449.0, 71450.0, 71451.0, 71452.0, 71453.0, 71454.0],       [71455.0, 71456.0, 71457.0, 71458.0, 71459.0, 71460.0],       [71461.0, 71462.0, 71463.0, 71464.0, 71465.0, 71466.0],       [71467.0, 71468.0, 71469.0, 71470.0, 71471.0, 71472.0],       [71473.0, 71474.0, 71475.0, 71476.0, 71477.0, 71478.0],       [71479.0, 71480.0, 71481.0, 71482.0, 71483.0, 71484.0]],      [[71485.0, 71486.0, 71487.0, 71488.0, 71489.0, 71490.0],       [71491.0, 71492.0, 71493.0, 71494.0, 71495.0, 71496.0],       [71497.0, 71498.0, 71499.0, 71500.0, 71501.0, 71502.0],       [71503.0, 71504.0, 71505.0, 71506.0, 71507.0, 71508.0],       [71509.0, 71510.0, 71511.0, 71512.0, 71513.0, 71514.0],       [71515.0, 71516.0, 71517.0, 71518.0, 71519.0, 71520.0],       [71521.0, 71522.0, 71523.0, 71524.0, 71525.0, 71526.0]],      [[71527.0, 71528.0, 71529.0, 71530.0, 71531.0, 71532.0],       [71533.0, 71534.0, 71535.0, 71536.0, 71537.0, 71538.0],       [71539.0, 71540.0, 71541.0, 71542.0, 71543.0, 71544.0],       [71545.0, 71546.0, 71547.0, 71548.0, 71549.0, 71550.0],       [71551.0, 71552.0, 71553.0, 71554.0, 71555.0, 71556.0],       [71557.0, 71558.0, 71559.0, 71560.0, 71561.0, 71562.0],       [71563.0, 71564.0, 71565.0, 71566.0, 71567.0, 71568.0]]]],    [[[[71569.0, 71570.0, 71571.0, 71572.0, 71573.0, 71574.0],       [71575.0, 71576.0, 71577.0, 71578.0, 71579.0, 71580.0],       [71581.0, 71582.0, 71583.0, 71584.0, 71585.0, 71586.0],       [71587.0, 71588.0, 71589.0, 71590.0, 71591.0, 71592.0],       [71593.0, 71594.0, 71595.0, 71596.0, 71597.0, 71598.0],       [71599.0, 71600.0, 71601.0, 71602.0, 71603.0, 71604.0],       [71605.0, 71606.0, 71607.0, 71608.0, 71609.0, 71610.0]],      [[71611.0, 71612.0, 71613.0, 71614.0, 71615.0, 71616.0],       [71617.0, 71618.0, 71619.0, 71620.0, 71621.0, 71622.0],       [71623.0, 71624.0, 71625.0, 71626.0, 71627.0, 71628.0],       [71629.0, 71630.0, 71631.0, 71632.0, 71633.0, 71634.0],       [71635.0, 71636.0, 71637.0, 71638.0, 71639.0, 71640.0],       [71641.0, 71642.0, 71643.0, 71644.0, 71645.0, 71646.0],       [71647.0, 71648.0, 71649.0, 71650.0, 71651.0, 71652.0]],      [[71653.0, 71654.0, 71655.0, 71656.0, 71657.0, 71658.0],       [71659.0, 71660.0, 71661.0, 71662.0, 71663.0, 71664.0],       [71665.0, 71666.0, 71667.0, 71668.0, 71669.0, 71670.0],       [71671.0, 71672.0, 71673.0, 71674.0, 71675.0, 71676.0],       [71677.0, 71678.0, 71679.0, 71680.0, 71681.0, 71682.0],       [71683.0, 71684.0, 71685.0, 71686.0, 71687.0, 71688.0],       [71689.0, 71690.0, 71691.0, 71692.0, 71693.0, 71694.0]],      [[71695.0, 71696.0, 71697.0, 71698.0, 71699.0, 71700.0],       [71701.0, 71702.0, 71703.0, 71704.0, 71705.0, 71706.0],       [71707.0, 71708.0, 71709.0, 71710.0, 71711.0, 71712.0],       [71713.0, 71714.0, 71715.0, 71716.0, 71717.0, 71718.0],       [71719.0, 71720.0, 71721.0, 71722.0, 71723.0, 71724.0],       [71725.0, 71726.0, 71727.0, 71728.0, 71729.0, 71730.0],       [71731.0, 71732.0, 71733.0, 71734.0, 71735.0, 71736.0]],      [[71737.0, 71738.0, 71739.0, 71740.0, 71741.0, 71742.0],       [71743.0, 71744.0, 71745.0, 71746.0, 71747.0, 71748.0],       [71749.0, 71750.0, 71751.0, 71752.0, 71753.0, 71754.0],       [71755.0, 71756.0, 71757.0, 71758.0, 71759.0, 71760.0],       [71761.0, 71762.0, 71763.0, 71764.0, 71765.0, 71766.0],       [71767.0, 71768.0, 71769.0, 71770.0, 71771.0, 71772.0],       [71773.0, 71774.0, 71775.0, 71776.0, 71777.0, 71778.0]],      [[71779.0, 71780.0, 71781.0, 71782.0, 71783.0, 71784.0],       [71785.0, 71786.0, 71787.0, 71788.0, 71789.0, 71790.0],       [71791.0, 71792.0, 71793.0, 71794.0, 71795.0, 71796.0],       [71797.0, 71798.0, 71799.0, 71800.0, 71801.0, 71802.0],       [71803.0, 71804.0, 71805.0, 71806.0, 71807.0, 71808.0],       [71809.0, 71810.0, 71811.0, 71812.0, 71813.0, 71814.0],       [71815.0, 71816.0, 71817.0, 71818.0, 71819.0, 71820.0]]],     [[[71821.0, 71822.0, 71823.0, 71824.0, 71825.0, 71826.0],       [71827.0, 71828.0, 71829.0, 71830.0, 71831.0, 71832.0],       [71833.0, 71834.0, 71835.0, 71836.0, 71837.0, 71838.0],       [71839.0, 71840.0, 71841.0, 71842.0, 71843.0, 71844.0],       [71845.0, 71846.0, 71847.0, 71848.0, 71849.0, 71850.0],       [71851.0, 71852.0, 71853.0, 71854.0, 71855.0, 71856.0],       [71857.0, 71858.0, 71859.0, 71860.0, 71861.0, 71862.0]],      [[71863.0, 71864.0, 71865.0, 71866.0, 71867.0, 71868.0],       [71869.0, 71870.0, 71871.0, 71872.0, 71873.0, 71874.0],       [71875.0, 71876.0, 71877.0, 71878.0, 71879.0, 71880.0],       [71881.0, 71882.0, 71883.0, 71884.0, 71885.0, 71886.0],       [71887.0, 71888.0, 71889.0, 71890.0, 71891.0, 71892.0],       [71893.0, 71894.0, 71895.0, 71896.0, 71897.0, 71898.0],       [71899.0, 71900.0, 71901.0, 71902.0, 71903.0, 71904.0]],      [[71905.0, 71906.0, 71907.0, 71908.0, 71909.0, 71910.0],       [71911.0, 71912.0, 71913.0, 71914.0, 71915.0, 71916.0],       [71917.0, 71918.0, 71919.0, 71920.0, 71921.0, 71922.0],       [71923.0, 71924.0, 71925.0, 71926.0, 71927.0, 71928.0],       [71929.0, 71930.0, 71931.0, 71932.0, 71933.0, 71934.0],       [71935.0, 71936.0, 71937.0, 71938.0, 71939.0, 71940.0],       [71941.0, 71942.0, 71943.0, 71944.0, 71945.0, 71946.0]],      [[71947.0, 71948.0, 71949.0, 71950.0, 71951.0, 71952.0],       [71953.0, 71954.0, 71955.0, 71956.0, 71957.0, 71958.0],       [71959.0, 71960.0, 71961.0, 71962.0, 71963.0, 71964.0],       [71965.0, 71966.0, 71967.0, 71968.0, 71969.0, 71970.0],       [71971.0, 71972.0, 71973.0, 71974.0, 71975.0, 71976.0],       [71977.0, 71978.0, 71979.0, 71980.0, 71981.0, 71982.0],       [71983.0, 71984.0, 71985.0, 71986.0, 71987.0, 71988.0]],      [[71989.0, 71990.0, 71991.0, 71992.0, 71993.0, 71994.0],       [71995.0, 71996.0, 71997.0, 71998.0, 71999.0, 72000.0],       [72001.0, 72002.0, 72003.0, 72004.0, 72005.0, 72006.0],       [72007.0, 72008.0, 72009.0, 72010.0, 72011.0, 72012.0],       [72013.0, 72014.0, 72015.0, 72016.0, 72017.0, 72018.0],       [72019.0, 72020.0, 72021.0, 72022.0, 72023.0, 72024.0],       [72025.0, 72026.0, 72027.0, 72028.0, 72029.0, 72030.0]],      [[72031.0, 72032.0, 72033.0, 72034.0, 72035.0, 72036.0],       [72037.0, 72038.0, 72039.0, 72040.0, 72041.0, 72042.0],       [72043.0, 72044.0, 72045.0, 72046.0, 72047.0, 72048.0],       [72049.0, 72050.0, 72051.0, 72052.0, 72053.0, 72054.0],       [72055.0, 72056.0, 72057.0, 72058.0, 72059.0, 72060.0],       [72061.0, 72062.0, 72063.0, 72064.0, 72065.0, 72066.0],       [72067.0, 72068.0, 72069.0, 72070.0, 72071.0, 72072.0]]],     [[[72073.0, 72074.0, 72075.0, 72076.0, 72077.0, 72078.0],       [72079.0, 72080.0, 72081.0, 72082.0, 72083.0, 72084.0],       [72085.0, 72086.0, 72087.0, 72088.0, 72089.0, 72090.0],       [72091.0, 72092.0, 72093.0, 72094.0, 72095.0, 72096.0],       [72097.0, 72098.0, 72099.0, 72100.0, 72101.0, 72102.0],       [72103.0, 72104.0, 72105.0, 72106.0, 72107.0, 72108.0],       [72109.0, 72110.0, 72111.0, 72112.0, 72113.0, 72114.0]],      [[72115.0, 72116.0, 72117.0, 72118.0, 72119.0, 72120.0],       [72121.0, 72122.0, 72123.0, 72124.0, 72125.0, 72126.0],       [72127.0, 72128.0, 72129.0, 72130.0, 72131.0, 72132.0],       [72133.0, 72134.0, 72135.0, 72136.0, 72137.0, 72138.0],       [72139.0, 72140.0, 72141.0, 72142.0, 72143.0, 72144.0],       [72145.0, 72146.0, 72147.0, 72148.0, 72149.0, 72150.0],       [72151.0, 72152.0, 72153.0, 72154.0, 72155.0, 72156.0]],      [[72157.0, 72158.0, 72159.0, 72160.0, 72161.0, 72162.0],       [72163.0, 72164.0, 72165.0, 72166.0, 72167.0, 72168.0],       [72169.0, 72170.0, 72171.0, 72172.0, 72173.0, 72174.0],       [72175.0, 72176.0, 72177.0, 72178.0, 72179.0, 72180.0],       [72181.0, 72182.0, 72183.0, 72184.0, 72185.0, 72186.0],       [72187.0, 72188.0, 72189.0, 72190.0, 72191.0, 72192.0],       [72193.0, 72194.0, 72195.0, 72196.0, 72197.0, 72198.0]],      [[72199.0, 72200.0, 72201.0, 72202.0, 72203.0, 72204.0],       [72205.0, 72206.0, 72207.0, 72208.0, 72209.0, 72210.0],       [72211.0, 72212.0, 72213.0, 72214.0, 72215.0, 72216.0],       [72217.0, 72218.0, 72219.0, 72220.0, 72221.0, 72222.0],       [72223.0, 72224.0, 72225.0, 72226.0, 72227.0, 72228.0],       [72229.0, 72230.0, 72231.0, 72232.0, 72233.0, 72234.0],       [72235.0, 72236.0, 72237.0, 72238.0, 72239.0, 72240.0]],      [[72241.0, 72242.0, 72243.0, 72244.0, 72245.0, 72246.0],       [72247.0, 72248.0, 72249.0, 72250.0, 72251.0, 72252.0],       [72253.0, 72254.0, 72255.0, 72256.0, 72257.0, 72258.0],       [72259.0, 72260.0, 72261.0, 72262.0, 72263.0, 72264.0],       [72265.0, 72266.0, 72267.0, 72268.0, 72269.0, 72270.0],       [72271.0, 72272.0, 72273.0, 72274.0, 72275.0, 72276.0],       [72277.0, 72278.0, 72279.0, 72280.0, 72281.0, 72282.0]],      [[72283.0, 72284.0, 72285.0, 72286.0, 72287.0, 72288.0],       [72289.0, 72290.0, 72291.0, 72292.0, 72293.0, 72294.0],       [72295.0, 72296.0, 72297.0, 72298.0, 72299.0, 72300.0],       [72301.0, 72302.0, 72303.0, 72304.0, 72305.0, 72306.0],       [72307.0, 72308.0, 72309.0, 72310.0, 72311.0, 72312.0],       [72313.0, 72314.0, 72315.0, 72316.0, 72317.0, 72318.0],       [72319.0, 72320.0, 72321.0, 72322.0, 72323.0, 72324.0]]],     [[[72325.0, 72326.0, 72327.0, 72328.0, 72329.0, 72330.0],       [72331.0, 72332.0, 72333.0, 72334.0, 72335.0, 72336.0],       [72337.0, 72338.0, 72339.0, 72340.0, 72341.0, 72342.0],       [72343.0, 72344.0, 72345.0, 72346.0, 72347.0, 72348.0],       [72349.0, 72350.0, 72351.0, 72352.0, 72353.0, 72354.0],       [72355.0, 72356.0, 72357.0, 72358.0, 72359.0, 72360.0],       [72361.0, 72362.0, 72363.0, 72364.0, 72365.0, 72366.0]],      [[72367.0, 72368.0, 72369.0, 72370.0, 72371.0, 72372.0],       [72373.0, 72374.0, 72375.0, 72376.0, 72377.0, 72378.0],       [72379.0, 72380.0, 72381.0, 72382.0, 72383.0, 72384.0],       [72385.0, 72386.0, 72387.0, 72388.0, 72389.0, 72390.0],       [72391.0, 72392.0, 72393.0, 72394.0, 72395.0, 72396.0],       [72397.0, 72398.0, 72399.0, 72400.0, 72401.0, 72402.0],       [72403.0, 72404.0, 72405.0, 72406.0, 72407.0, 72408.0]],      [[72409.0, 72410.0, 72411.0, 72412.0, 72413.0, 72414.0],       [72415.0, 72416.0, 72417.0, 72418.0, 72419.0, 72420.0],       [72421.0, 72422.0, 72423.0, 72424.0, 72425.0, 72426.0],       [72427.0, 72428.0, 72429.0, 72430.0, 72431.0, 72432.0],       [72433.0, 72434.0, 72435.0, 72436.0, 72437.0, 72438.0],       [72439.0, 72440.0, 72441.0, 72442.0, 72443.0, 72444.0],       [72445.0, 72446.0, 72447.0, 72448.0, 72449.0, 72450.0]],      [[72451.0, 72452.0, 72453.0, 72454.0, 72455.0, 72456.0],       [72457.0, 72458.0, 72459.0, 72460.0, 72461.0, 72462.0],       [72463.0, 72464.0, 72465.0, 72466.0, 72467.0, 72468.0],       [72469.0, 72470.0, 72471.0, 72472.0, 72473.0, 72474.0],       [72475.0, 72476.0, 72477.0, 72478.0, 72479.0, 72480.0],       [72481.0, 72482.0, 72483.0, 72484.0, 72485.0, 72486.0],       [72487.0, 72488.0, 72489.0, 72490.0, 72491.0, 72492.0]],      [[72493.0, 72494.0, 72495.0, 72496.0, 72497.0, 72498.0],       [72499.0, 72500.0, 72501.0, 72502.0, 72503.0, 72504.0],       [72505.0, 72506.0, 72507.0, 72508.0, 72509.0, 72510.0],       [72511.0, 72512.0, 72513.0, 72514.0, 72515.0, 72516.0],       [72517.0, 72518.0, 72519.0, 72520.0, 72521.0, 72522.0],       [72523.0, 72524.0, 72525.0, 72526.0, 72527.0, 72528.0],       [72529.0, 72530.0, 72531.0, 72532.0, 72533.0, 72534.0]],      [[72535.0, 72536.0, 72537.0, 72538.0, 72539.0, 72540.0],       [72541.0, 72542.0, 72543.0, 72544.0, 72545.0, 72546.0],       [72547.0, 72548.0, 72549.0, 72550.0, 72551.0, 72552.0],       [72553.0, 72554.0, 72555.0, 72556.0, 72557.0, 72558.0],       [72559.0, 72560.0, 72561.0, 72562.0, 72563.0, 72564.0],       [72565.0, 72566.0, 72567.0, 72568.0, 72569.0, 72570.0],       [72571.0, 72572.0, 72573.0, 72574.0, 72575.0, 72576.0]]]]]],  [[[[[[72577.0, 72578.0, 72579.0, 72580.0, 72581.0, 72582.0],       [72583.0, 72584.0, 72585.0, 72586.0, 72587.0, 72588.0],       [72589.0, 72590.0, 72591.0, 72592.0, 72593.0, 72594.0],       [72595.0, 72596.0, 72597.0, 72598.0, 72599.0, 72600.0],       [72601.0, 72602.0, 72603.0, 72604.0, 72605.0, 72606.0],       [72607.0, 72608.0, 72609.0, 72610.0, 72611.0, 72612.0],       [72613.0, 72614.0, 72615.0, 72616.0, 72617.0, 72618.0]],      [[72619.0, 72620.0, 72621.0, 72622.0, 72623.0, 72624.0],       [72625.0, 72626.0, 72627.0, 72628.0, 72629.0, 72630.0],       [72631.0, 72632.0, 72633.0, 72634.0, 72635.0, 72636.0],       [72637.0, 72638.0, 72639.0, 72640.0, 72641.0, 72642.0],       [72643.0, 72644.0, 72645.0, 72646.0, 72647.0, 72648.0],       [72649.0, 72650.0, 72651.0, 72652.0, 72653.0, 72654.0],       [72655.0, 72656.0, 72657.0, 72658.0, 72659.0, 72660.0]],      [[72661.0, 72662.0, 72663.0, 72664.0, 72665.0, 72666.0],       [72667.0, 72668.0, 72669.0, 72670.0, 72671.0, 72672.0],       [72673.0, 72674.0, 72675.0, 72676.0, 72677.0, 72678.0],       [72679.0, 72680.0, 72681.0, 72682.0, 72683.0, 72684.0],       [72685.0, 72686.0, 72687.0, 72688.0, 72689.0, 72690.0],       [72691.0, 72692.0, 72693.0, 72694.0, 72695.0, 72696.0],       [72697.0, 72698.0, 72699.0, 72700.0, 72701.0, 72702.0]],      [[72703.0, 72704.0, 72705.0, 72706.0, 72707.0, 72708.0],       [72709.0, 72710.0, 72711.0, 72712.0, 72713.0, 72714.0],       [72715.0, 72716.0, 72717.0, 72718.0, 72719.0, 72720.0],       [72721.0, 72722.0, 72723.0, 72724.0, 72725.0, 72726.0],       [72727.0, 72728.0, 72729.0, 72730.0, 72731.0, 72732.0],       [72733.0, 72734.0, 72735.0, 72736.0, 72737.0, 72738.0],       [72739.0, 72740.0, 72741.0, 72742.0, 72743.0, 72744.0]],      [[72745.0, 72746.0, 72747.0, 72748.0, 72749.0, 72750.0],       [72751.0, 72752.0, 72753.0, 72754.0, 72755.0, 72756.0],       [72757.0, 72758.0, 72759.0, 72760.0, 72761.0, 72762.0],       [72763.0, 72764.0, 72765.0, 72766.0, 72767.0, 72768.0],       [72769.0, 72770.0, 72771.0, 72772.0, 72773.0, 72774.0],       [72775.0, 72776.0, 72777.0, 72778.0, 72779.0, 72780.0],       [72781.0, 72782.0, 72783.0, 72784.0, 72785.0, 72786.0]],      [[72787.0, 72788.0, 72789.0, 72790.0, 72791.0, 72792.0],       [72793.0, 72794.0, 72795.0, 72796.0, 72797.0, 72798.0],       [72799.0, 72800.0, 72801.0, 72802.0, 72803.0, 72804.0],       [72805.0, 72806.0, 72807.0, 72808.0, 72809.0, 72810.0],       [72811.0, 72812.0, 72813.0, 72814.0, 72815.0, 72816.0],       [72817.0, 72818.0, 72819.0, 72820.0, 72821.0, 72822.0],       [72823.0, 72824.0, 72825.0, 72826.0, 72827.0, 72828.0]]],     [[[72829.0, 72830.0, 72831.0, 72832.0, 72833.0, 72834.0],       [72835.0, 72836.0, 72837.0, 72838.0, 72839.0, 72840.0],       [72841.0, 72842.0, 72843.0, 72844.0, 72845.0, 72846.0],       [72847.0, 72848.0, 72849.0, 72850.0, 72851.0, 72852.0],       [72853.0, 72854.0, 72855.0, 72856.0, 72857.0, 72858.0],       [72859.0, 72860.0, 72861.0, 72862.0, 72863.0, 72864.0],       [72865.0, 72866.0, 72867.0, 72868.0, 72869.0, 72870.0]],      [[72871.0, 72872.0, 72873.0, 72874.0, 72875.0, 72876.0],       [72877.0, 72878.0, 72879.0, 72880.0, 72881.0, 72882.0],       [72883.0, 72884.0, 72885.0, 72886.0, 72887.0, 72888.0],       [72889.0, 72890.0, 72891.0, 72892.0, 72893.0, 72894.0],       [72895.0, 72896.0, 72897.0, 72898.0, 72899.0, 72900.0],       [72901.0, 72902.0, 72903.0, 72904.0, 72905.0, 72906.0],       [72907.0, 72908.0, 72909.0, 72910.0, 72911.0, 72912.0]],      [[72913.0, 72914.0, 72915.0, 72916.0, 72917.0, 72918.0],       [72919.0, 72920.0, 72921.0, 72922.0, 72923.0, 72924.0],       [72925.0, 72926.0, 72927.0, 72928.0, 72929.0, 72930.0],       [72931.0, 72932.0, 72933.0, 72934.0, 72935.0, 72936.0],       [72937.0, 72938.0, 72939.0, 72940.0, 72941.0, 72942.0],       [72943.0, 72944.0, 72945.0, 72946.0, 72947.0, 72948.0],       [72949.0, 72950.0, 72951.0, 72952.0, 72953.0, 72954.0]],      [[72955.0, 72956.0, 72957.0, 72958.0, 72959.0, 72960.0],       [72961.0, 72962.0, 72963.0, 72964.0, 72965.0, 72966.0],       [72967.0, 72968.0, 72969.0, 72970.0, 72971.0, 72972.0],       [72973.0, 72974.0, 72975.0, 72976.0, 72977.0, 72978.0],       [72979.0, 72980.0, 72981.0, 72982.0, 72983.0, 72984.0],       [72985.0, 72986.0, 72987.0, 72988.0, 72989.0, 72990.0],       [72991.0, 72992.0, 72993.0, 72994.0, 72995.0, 72996.0]],      [[72997.0, 72998.0, 72999.0, 73000.0, 73001.0, 73002.0],       [73003.0, 73004.0, 73005.0, 73006.0, 73007.0, 73008.0],       [73009.0, 73010.0, 73011.0, 73012.0, 73013.0, 73014.0],       [73015.0, 73016.0, 73017.0, 73018.0, 73019.0, 73020.0],       [73021.0, 73022.0, 73023.0, 73024.0, 73025.0, 73026.0],       [73027.0, 73028.0, 73029.0, 73030.0, 73031.0, 73032.0],       [73033.0, 73034.0, 73035.0, 73036.0, 73037.0, 73038.0]],      [[73039.0, 73040.0, 73041.0, 73042.0, 73043.0, 73044.0],       [73045.0, 73046.0, 73047.0, 73048.0, 73049.0, 73050.0],       [73051.0, 73052.0, 73053.0, 73054.0, 73055.0, 73056.0],       [73057.0, 73058.0, 73059.0, 73060.0, 73061.0, 73062.0],       [73063.0, 73064.0, 73065.0, 73066.0, 73067.0, 73068.0],       [73069.0, 73070.0, 73071.0, 73072.0, 73073.0, 73074.0],       [73075.0, 73076.0, 73077.0, 73078.0, 73079.0, 73080.0]]],     [[[73081.0, 73082.0, 73083.0, 73084.0, 73085.0, 73086.0],       [73087.0, 73088.0, 73089.0, 73090.0, 73091.0, 73092.0],       [73093.0, 73094.0, 73095.0, 73096.0, 73097.0, 73098.0],       [73099.0, 73100.0, 73101.0, 73102.0, 73103.0, 73104.0],       [73105.0, 73106.0, 73107.0, 73108.0, 73109.0, 73110.0],       [73111.0, 73112.0, 73113.0, 73114.0, 73115.0, 73116.0],       [73117.0, 73118.0, 73119.0, 73120.0, 73121.0, 73122.0]],      [[73123.0, 73124.0, 73125.0, 73126.0, 73127.0, 73128.0],       [73129.0, 73130.0, 73131.0, 73132.0, 73133.0, 73134.0],       [73135.0, 73136.0, 73137.0, 73138.0, 73139.0, 73140.0],       [73141.0, 73142.0, 73143.0, 73144.0, 73145.0, 73146.0],       [73147.0, 73148.0, 73149.0, 73150.0, 73151.0, 73152.0],       [73153.0, 73154.0, 73155.0, 73156.0, 73157.0, 73158.0],       [73159.0, 73160.0, 73161.0, 73162.0, 73163.0, 73164.0]],      [[73165.0, 73166.0, 73167.0, 73168.0, 73169.0, 73170.0],       [73171.0, 73172.0, 73173.0, 73174.0, 73175.0, 73176.0],       [73177.0, 73178.0, 73179.0, 73180.0, 73181.0, 73182.0],       [73183.0, 73184.0, 73185.0, 73186.0, 73187.0, 73188.0],       [73189.0, 73190.0, 73191.0, 73192.0, 73193.0, 73194.0],       [73195.0, 73196.0, 73197.0, 73198.0, 73199.0, 73200.0],       [73201.0, 73202.0, 73203.0, 73204.0, 73205.0, 73206.0]],      [[73207.0, 73208.0, 73209.0, 73210.0, 73211.0, 73212.0],       [73213.0, 73214.0, 73215.0, 73216.0, 73217.0, 73218.0],       [73219.0, 73220.0, 73221.0, 73222.0, 73223.0, 73224.0],       [73225.0, 73226.0, 73227.0, 73228.0, 73229.0, 73230.0],       [73231.0, 73232.0, 73233.0, 73234.0, 73235.0, 73236.0],       [73237.0, 73238.0, 73239.0, 73240.0, 73241.0, 73242.0],       [73243.0, 73244.0, 73245.0, 73246.0, 73247.0, 73248.0]],      [[73249.0, 73250.0, 73251.0, 73252.0, 73253.0, 73254.0],       [73255.0, 73256.0, 73257.0, 73258.0, 73259.0, 73260.0],       [73261.0, 73262.0, 73263.0, 73264.0, 73265.0, 73266.0],       [73267.0, 73268.0, 73269.0, 73270.0, 73271.0, 73272.0],       [73273.0, 73274.0, 73275.0, 73276.0, 73277.0, 73278.0],       [73279.0, 73280.0, 73281.0, 73282.0, 73283.0, 73284.0],       [73285.0, 73286.0, 73287.0, 73288.0, 73289.0, 73290.0]],      [[73291.0, 73292.0, 73293.0, 73294.0, 73295.0, 73296.0],       [73297.0, 73298.0, 73299.0, 73300.0, 73301.0, 73302.0],       [73303.0, 73304.0, 73305.0, 73306.0, 73307.0, 73308.0],       [73309.0, 73310.0, 73311.0, 73312.0, 73313.0, 73314.0],       [73315.0, 73316.0, 73317.0, 73318.0, 73319.0, 73320.0],       [73321.0, 73322.0, 73323.0, 73324.0, 73325.0, 73326.0],       [73327.0, 73328.0, 73329.0, 73330.0, 73331.0, 73332.0]]],     [[[73333.0, 73334.0, 73335.0, 73336.0, 73337.0, 73338.0],       [73339.0, 73340.0, 73341.0, 73342.0, 73343.0, 73344.0],       [73345.0, 73346.0, 73347.0, 73348.0, 73349.0, 73350.0],       [73351.0, 73352.0, 73353.0, 73354.0, 73355.0, 73356.0],       [73357.0, 73358.0, 73359.0, 73360.0, 73361.0, 73362.0],       [73363.0, 73364.0, 73365.0, 73366.0, 73367.0, 73368.0],       [73369.0, 73370.0, 73371.0, 73372.0, 73373.0, 73374.0]],      [[73375.0, 73376.0, 73377.0, 73378.0, 73379.0, 73380.0],       [73381.0, 73382.0, 73383.0, 73384.0, 73385.0, 73386.0],       [73387.0, 73388.0, 73389.0, 73390.0, 73391.0, 73392.0],       [73393.0, 73394.0, 73395.0, 73396.0, 73397.0, 73398.0],       [73399.0, 73400.0, 73401.0, 73402.0, 73403.0, 73404.0],       [73405.0, 73406.0, 73407.0, 73408.0, 73409.0, 73410.0],       [73411.0, 73412.0, 73413.0, 73414.0, 73415.0, 73416.0]],      [[73417.0, 73418.0, 73419.0, 73420.0, 73421.0, 73422.0],       [73423.0, 73424.0, 73425.0, 73426.0, 73427.0, 73428.0],       [73429.0, 73430.0, 73431.0, 73432.0, 73433.0, 73434.0],       [73435.0, 73436.0, 73437.0, 73438.0, 73439.0, 73440.0],       [73441.0, 73442.0, 73443.0, 73444.0, 73445.0, 73446.0],       [73447.0, 73448.0, 73449.0, 73450.0, 73451.0, 73452.0],       [73453.0, 73454.0, 73455.0, 73456.0, 73457.0, 73458.0]],      [[73459.0, 73460.0, 73461.0, 73462.0, 73463.0, 73464.0],       [73465.0, 73466.0, 73467.0, 73468.0, 73469.0, 73470.0],       [73471.0, 73472.0, 73473.0, 73474.0, 73475.0, 73476.0],       [73477.0, 73478.0, 73479.0, 73480.0, 73481.0, 73482.0],       [73483.0, 73484.0, 73485.0, 73486.0, 73487.0, 73488.0],       [73489.0, 73490.0, 73491.0, 73492.0, 73493.0, 73494.0],       [73495.0, 73496.0, 73497.0, 73498.0, 73499.0, 73500.0]],      [[73501.0, 73502.0, 73503.0, 73504.0, 73505.0, 73506.0],       [73507.0, 73508.0, 73509.0, 73510.0, 73511.0, 73512.0],       [73513.0, 73514.0, 73515.0, 73516.0, 73517.0, 73518.0],       [73519.0, 73520.0, 73521.0, 73522.0, 73523.0, 73524.0],       [73525.0, 73526.0, 73527.0, 73528.0, 73529.0, 73530.0],       [73531.0, 73532.0, 73533.0, 73534.0, 73535.0, 73536.0],       [73537.0, 73538.0, 73539.0, 73540.0, 73541.0, 73542.0]],      [[73543.0, 73544.0, 73545.0, 73546.0, 73547.0, 73548.0],       [73549.0, 73550.0, 73551.0, 73552.0, 73553.0, 73554.0],       [73555.0, 73556.0, 73557.0, 73558.0, 73559.0, 73560.0],       [73561.0, 73562.0, 73563.0, 73564.0, 73565.0, 73566.0],       [73567.0, 73568.0, 73569.0, 73570.0, 73571.0, 73572.0],       [73573.0, 73574.0, 73575.0, 73576.0, 73577.0, 73578.0],       [73579.0, 73580.0, 73581.0, 73582.0, 73583.0, 73584.0]]]],    [[[[73585.0, 73586.0, 73587.0, 73588.0, 73589.0, 73590.0],       [73591.0, 73592.0, 73593.0, 73594.0, 73595.0, 73596.0],       [73597.0, 73598.0, 73599.0, 73600.0, 73601.0, 73602.0],       [73603.0, 73604.0, 73605.0, 73606.0, 73607.0, 73608.0],       [73609.0, 73610.0, 73611.0, 73612.0, 73613.0, 73614.0],       [73615.0, 73616.0, 73617.0, 73618.0, 73619.0, 73620.0],       [73621.0, 73622.0, 73623.0, 73624.0, 73625.0, 73626.0]],      [[73627.0, 73628.0, 73629.0, 73630.0, 73631.0, 73632.0],       [73633.0, 73634.0, 73635.0, 73636.0, 73637.0, 73638.0],       [73639.0, 73640.0, 73641.0, 73642.0, 73643.0, 73644.0],       [73645.0, 73646.0, 73647.0, 73648.0, 73649.0, 73650.0],       [73651.0, 73652.0, 73653.0, 73654.0, 73655.0, 73656.0],       [73657.0, 73658.0, 73659.0, 73660.0, 73661.0, 73662.0],       [73663.0, 73664.0, 73665.0, 73666.0, 73667.0, 73668.0]],      [[73669.0, 73670.0, 73671.0, 73672.0, 73673.0, 73674.0],       [73675.0, 73676.0, 73677.0, 73678.0, 73679.0, 73680.0],       [73681.0, 73682.0, 73683.0, 73684.0, 73685.0, 73686.0],       [73687.0, 73688.0, 73689.0, 73690.0, 73691.0, 73692.0],       [73693.0, 73694.0, 73695.0, 73696.0, 73697.0, 73698.0],       [73699.0, 73700.0, 73701.0, 73702.0, 73703.0, 73704.0],       [73705.0, 73706.0, 73707.0, 73708.0, 73709.0, 73710.0]],      [[73711.0, 73712.0, 73713.0, 73714.0, 73715.0, 73716.0],       [73717.0, 73718.0, 73719.0, 73720.0, 73721.0, 73722.0],       [73723.0, 73724.0, 73725.0, 73726.0, 73727.0, 73728.0],       [73729.0, 73730.0, 73731.0, 73732.0, 73733.0, 73734.0],       [73735.0, 73736.0, 73737.0, 73738.0, 73739.0, 73740.0],       [73741.0, 73742.0, 73743.0, 73744.0, 73745.0, 73746.0],       [73747.0, 73748.0, 73749.0, 73750.0, 73751.0, 73752.0]],      [[73753.0, 73754.0, 73755.0, 73756.0, 73757.0, 73758.0],       [73759.0, 73760.0, 73761.0, 73762.0, 73763.0, 73764.0],       [73765.0, 73766.0, 73767.0, 73768.0, 73769.0, 73770.0],       [73771.0, 73772.0, 73773.0, 73774.0, 73775.0, 73776.0],       [73777.0, 73778.0, 73779.0, 73780.0, 73781.0, 73782.0],       [73783.0, 73784.0, 73785.0, 73786.0, 73787.0, 73788.0],       [73789.0, 73790.0, 73791.0, 73792.0, 73793.0, 73794.0]],      [[73795.0, 73796.0, 73797.0, 73798.0, 73799.0, 73800.0],       [73801.0, 73802.0, 73803.0, 73804.0, 73805.0, 73806.0],       [73807.0, 73808.0, 73809.0, 73810.0, 73811.0, 73812.0],       [73813.0, 73814.0, 73815.0, 73816.0, 73817.0, 73818.0],       [73819.0, 73820.0, 73821.0, 73822.0, 73823.0, 73824.0],       [73825.0, 73826.0, 73827.0, 73828.0, 73829.0, 73830.0],       [73831.0, 73832.0, 73833.0, 73834.0, 73835.0, 73836.0]]],     [[[73837.0, 73838.0, 73839.0, 73840.0, 73841.0, 73842.0],       [73843.0, 73844.0, 73845.0, 73846.0, 73847.0, 73848.0],       [73849.0, 73850.0, 73851.0, 73852.0, 73853.0, 73854.0],       [73855.0, 73856.0, 73857.0, 73858.0, 73859.0, 73860.0],       [73861.0, 73862.0, 73863.0, 73864.0, 73865.0, 73866.0],       [73867.0, 73868.0, 73869.0, 73870.0, 73871.0, 73872.0],       [73873.0, 73874.0, 73875.0, 73876.0, 73877.0, 73878.0]],      [[73879.0, 73880.0, 73881.0, 73882.0, 73883.0, 73884.0],       [73885.0, 73886.0, 73887.0, 73888.0, 73889.0, 73890.0],       [73891.0, 73892.0, 73893.0, 73894.0, 73895.0, 73896.0],       [73897.0, 73898.0, 73899.0, 73900.0, 73901.0, 73902.0],       [73903.0, 73904.0, 73905.0, 73906.0, 73907.0, 73908.0],       [73909.0, 73910.0, 73911.0, 73912.0, 73913.0, 73914.0],       [73915.0, 73916.0, 73917.0, 73918.0, 73919.0, 73920.0]],      [[73921.0, 73922.0, 73923.0, 73924.0, 73925.0, 73926.0],       [73927.0, 73928.0, 73929.0, 73930.0, 73931.0, 73932.0],       [73933.0, 73934.0, 73935.0, 73936.0, 73937.0, 73938.0],       [73939.0, 73940.0, 73941.0, 73942.0, 73943.0, 73944.0],       [73945.0, 73946.0, 73947.0, 73948.0, 73949.0, 73950.0],       [73951.0, 73952.0, 73953.0, 73954.0, 73955.0, 73956.0],       [73957.0, 73958.0, 73959.0, 73960.0, 73961.0, 73962.0]],      [[73963.0, 73964.0, 73965.0, 73966.0, 73967.0, 73968.0],       [73969.0, 73970.0, 73971.0, 73972.0, 73973.0, 73974.0],       [73975.0, 73976.0, 73977.0, 73978.0, 73979.0, 73980.0],       [73981.0, 73982.0, 73983.0, 73984.0, 73985.0, 73986.0],       [73987.0, 73988.0, 73989.0, 73990.0, 73991.0, 73992.0],       [73993.0, 73994.0, 73995.0, 73996.0, 73997.0, 73998.0],       [73999.0, 74000.0, 74001.0, 74002.0, 74003.0, 74004.0]],      [[74005.0, 74006.0, 74007.0, 74008.0, 74009.0, 74010.0],       [74011.0, 74012.0, 74013.0, 74014.0, 74015.0, 74016.0],       [74017.0, 74018.0, 74019.0, 74020.0, 74021.0, 74022.0],       [74023.0, 74024.0, 74025.0, 74026.0, 74027.0, 74028.0],       [74029.0, 74030.0, 74031.0, 74032.0, 74033.0, 74034.0],       [74035.0, 74036.0, 74037.0, 74038.0, 74039.0, 74040.0],       [74041.0, 74042.0, 74043.0, 74044.0, 74045.0, 74046.0]],      [[74047.0, 74048.0, 74049.0, 74050.0, 74051.0, 74052.0],       [74053.0, 74054.0, 74055.0, 74056.0, 74057.0, 74058.0],       [74059.0, 74060.0, 74061.0, 74062.0, 74063.0, 74064.0],       [74065.0, 74066.0, 74067.0, 74068.0, 74069.0, 74070.0],       [74071.0, 74072.0, 74073.0, 74074.0, 74075.0, 74076.0],       [74077.0, 74078.0, 74079.0, 74080.0, 74081.0, 74082.0],       [74083.0, 74084.0, 74085.0, 74086.0, 74087.0, 74088.0]]],     [[[74089.0, 74090.0, 74091.0, 74092.0, 74093.0, 74094.0],       [74095.0, 74096.0, 74097.0, 74098.0, 74099.0, 74100.0],       [74101.0, 74102.0, 74103.0, 74104.0, 74105.0, 74106.0],       [74107.0, 74108.0, 74109.0, 74110.0, 74111.0, 74112.0],       [74113.0, 74114.0, 74115.0, 74116.0, 74117.0, 74118.0],       [74119.0, 74120.0, 74121.0, 74122.0, 74123.0, 74124.0],       [74125.0, 74126.0, 74127.0, 74128.0, 74129.0, 74130.0]],      [[74131.0, 74132.0, 74133.0, 74134.0, 74135.0, 74136.0],       [74137.0, 74138.0, 74139.0, 74140.0, 74141.0, 74142.0],       [74143.0, 74144.0, 74145.0, 74146.0, 74147.0, 74148.0],       [74149.0, 74150.0, 74151.0, 74152.0, 74153.0, 74154.0],       [74155.0, 74156.0, 74157.0, 74158.0, 74159.0, 74160.0],       [74161.0, 74162.0, 74163.0, 74164.0, 74165.0, 74166.0],       [74167.0, 74168.0, 74169.0, 74170.0, 74171.0, 74172.0]],      [[74173.0, 74174.0, 74175.0, 74176.0, 74177.0, 74178.0],       [74179.0, 74180.0, 74181.0, 74182.0, 74183.0, 74184.0],       [74185.0, 74186.0, 74187.0, 74188.0, 74189.0, 74190.0],       [74191.0, 74192.0, 74193.0, 74194.0, 74195.0, 74196.0],       [74197.0, 74198.0, 74199.0, 74200.0, 74201.0, 74202.0],       [74203.0, 74204.0, 74205.0, 74206.0, 74207.0, 74208.0],       [74209.0, 74210.0, 74211.0, 74212.0, 74213.0, 74214.0]],      [[74215.0, 74216.0, 74217.0, 74218.0, 74219.0, 74220.0],       [74221.0, 74222.0, 74223.0, 74224.0, 74225.0, 74226.0],       [74227.0, 74228.0, 74229.0, 74230.0, 74231.0, 74232.0],       [74233.0, 74234.0, 74235.0, 74236.0, 74237.0, 74238.0],       [74239.0, 74240.0, 74241.0, 74242.0, 74243.0, 74244.0],       [74245.0, 74246.0, 74247.0, 74248.0, 74249.0, 74250.0],       [74251.0, 74252.0, 74253.0, 74254.0, 74255.0, 74256.0]],      [[74257.0, 74258.0, 74259.0, 74260.0, 74261.0, 74262.0],       [74263.0, 74264.0, 74265.0, 74266.0, 74267.0, 74268.0],       [74269.0, 74270.0, 74271.0, 74272.0, 74273.0, 74274.0],       [74275.0, 74276.0, 74277.0, 74278.0, 74279.0, 74280.0],       [74281.0, 74282.0, 74283.0, 74284.0, 74285.0, 74286.0],       [74287.0, 74288.0, 74289.0, 74290.0, 74291.0, 74292.0],       [74293.0, 74294.0, 74295.0, 74296.0, 74297.0, 74298.0]],      [[74299.0, 74300.0, 74301.0, 74302.0, 74303.0, 74304.0],       [74305.0, 74306.0, 74307.0, 74308.0, 74309.0, 74310.0],       [74311.0, 74312.0, 74313.0, 74314.0, 74315.0, 74316.0],       [74317.0, 74318.0, 74319.0, 74320.0, 74321.0, 74322.0],       [74323.0, 74324.0, 74325.0, 74326.0, 74327.0, 74328.0],       [74329.0, 74330.0, 74331.0, 74332.0, 74333.0, 74334.0],       [74335.0, 74336.0, 74337.0, 74338.0, 74339.0, 74340.0]]],     [[[74341.0, 74342.0, 74343.0, 74344.0, 74345.0, 74346.0],       [74347.0, 74348.0, 74349.0, 74350.0, 74351.0, 74352.0],       [74353.0, 74354.0, 74355.0, 74356.0, 74357.0, 74358.0],       [74359.0, 74360.0, 74361.0, 74362.0, 74363.0, 74364.0],       [74365.0, 74366.0, 74367.0, 74368.0, 74369.0, 74370.0],       [74371.0, 74372.0, 74373.0, 74374.0, 74375.0, 74376.0],       [74377.0, 74378.0, 74379.0, 74380.0, 74381.0, 74382.0]],      [[74383.0, 74384.0, 74385.0, 74386.0, 74387.0, 74388.0],       [74389.0, 74390.0, 74391.0, 74392.0, 74393.0, 74394.0],       [74395.0, 74396.0, 74397.0, 74398.0, 74399.0, 74400.0],       [74401.0, 74402.0, 74403.0, 74404.0, 74405.0, 74406.0],       [74407.0, 74408.0, 74409.0, 74410.0, 74411.0, 74412.0],       [74413.0, 74414.0, 74415.0, 74416.0, 74417.0, 74418.0],       [74419.0, 74420.0, 74421.0, 74422.0, 74423.0, 74424.0]],      [[74425.0, 74426.0, 74427.0, 74428.0, 74429.0, 74430.0],       [74431.0, 74432.0, 74433.0, 74434.0, 74435.0, 74436.0],       [74437.0, 74438.0, 74439.0, 74440.0, 74441.0, 74442.0],       [74443.0, 74444.0, 74445.0, 74446.0, 74447.0, 74448.0],       [74449.0, 74450.0, 74451.0, 74452.0, 74453.0, 74454.0],       [74455.0, 74456.0, 74457.0, 74458.0, 74459.0, 74460.0],       [74461.0, 74462.0, 74463.0, 74464.0, 74465.0, 74466.0]],      [[74467.0, 74468.0, 74469.0, 74470.0, 74471.0, 74472.0],       [74473.0, 74474.0, 74475.0, 74476.0, 74477.0, 74478.0],       [74479.0, 74480.0, 74481.0, 74482.0, 74483.0, 74484.0],       [74485.0, 74486.0, 74487.0, 74488.0, 74489.0, 74490.0],       [74491.0, 74492.0, 74493.0, 74494.0, 74495.0, 74496.0],       [74497.0, 74498.0, 74499.0, 74500.0, 74501.0, 74502.0],       [74503.0, 74504.0, 74505.0, 74506.0, 74507.0, 74508.0]],      [[74509.0, 74510.0, 74511.0, 74512.0, 74513.0, 74514.0],       [74515.0, 74516.0, 74517.0, 74518.0, 74519.0, 74520.0],       [74521.0, 74522.0, 74523.0, 74524.0, 74525.0, 74526.0],       [74527.0, 74528.0, 74529.0, 74530.0, 74531.0, 74532.0],       [74533.0, 74534.0, 74535.0, 74536.0, 74537.0, 74538.0],       [74539.0, 74540.0, 74541.0, 74542.0, 74543.0, 74544.0],       [74545.0, 74546.0, 74547.0, 74548.0, 74549.0, 74550.0]],      [[74551.0, 74552.0, 74553.0, 74554.0, 74555.0, 74556.0],       [74557.0, 74558.0, 74559.0, 74560.0, 74561.0, 74562.0],       [74563.0, 74564.0, 74565.0, 74566.0, 74567.0, 74568.0],       [74569.0, 74570.0, 74571.0, 74572.0, 74573.0, 74574.0],       [74575.0, 74576.0, 74577.0, 74578.0, 74579.0, 74580.0],       [74581.0, 74582.0, 74583.0, 74584.0, 74585.0, 74586.0],       [74587.0, 74588.0, 74589.0, 74590.0, 74591.0, 74592.0]]]],    [[[[74593.0, 74594.0, 74595.0, 74596.0, 74597.0, 74598.0],       [74599.0, 74600.0, 74601.0, 74602.0, 74603.0, 74604.0],       [74605.0, 74606.0, 74607.0, 74608.0, 74609.0, 74610.0],       [74611.0, 74612.0, 74613.0, 74614.0, 74615.0, 74616.0],       [74617.0, 74618.0, 74619.0, 74620.0, 74621.0, 74622.0],       [74623.0, 74624.0, 74625.0, 74626.0, 74627.0, 74628.0],       [74629.0, 74630.0, 74631.0, 74632.0, 74633.0, 74634.0]],      [[74635.0, 74636.0, 74637.0, 74638.0, 74639.0, 74640.0],       [74641.0, 74642.0, 74643.0, 74644.0, 74645.0, 74646.0],       [74647.0, 74648.0, 74649.0, 74650.0, 74651.0, 74652.0],       [74653.0, 74654.0, 74655.0, 74656.0, 74657.0, 74658.0],       [74659.0, 74660.0, 74661.0, 74662.0, 74663.0, 74664.0],       [74665.0, 74666.0, 74667.0, 74668.0, 74669.0, 74670.0],       [74671.0, 74672.0, 74673.0, 74674.0, 74675.0, 74676.0]],      [[74677.0, 74678.0, 74679.0, 74680.0, 74681.0, 74682.0],       [74683.0, 74684.0, 74685.0, 74686.0, 74687.0, 74688.0],       [74689.0, 74690.0, 74691.0, 74692.0, 74693.0, 74694.0],       [74695.0, 74696.0, 74697.0, 74698.0, 74699.0, 74700.0],       [74701.0, 74702.0, 74703.0, 74704.0, 74705.0, 74706.0],       [74707.0, 74708.0, 74709.0, 74710.0, 74711.0, 74712.0],       [74713.0, 74714.0, 74715.0, 74716.0, 74717.0, 74718.0]],      [[74719.0, 74720.0, 74721.0, 74722.0, 74723.0, 74724.0],       [74725.0, 74726.0, 74727.0, 74728.0, 74729.0, 74730.0],       [74731.0, 74732.0, 74733.0, 74734.0, 74735.0, 74736.0],       [74737.0, 74738.0, 74739.0, 74740.0, 74741.0, 74742.0],       [74743.0, 74744.0, 74745.0, 74746.0, 74747.0, 74748.0],       [74749.0, 74750.0, 74751.0, 74752.0, 74753.0, 74754.0],       [74755.0, 74756.0, 74757.0, 74758.0, 74759.0, 74760.0]],      [[74761.0, 74762.0, 74763.0, 74764.0, 74765.0, 74766.0],       [74767.0, 74768.0, 74769.0, 74770.0, 74771.0, 74772.0],       [74773.0, 74774.0, 74775.0, 74776.0, 74777.0, 74778.0],       [74779.0, 74780.0, 74781.0, 74782.0, 74783.0, 74784.0],       [74785.0, 74786.0, 74787.0, 74788.0, 74789.0, 74790.0],       [74791.0, 74792.0, 74793.0, 74794.0, 74795.0, 74796.0],       [74797.0, 74798.0, 74799.0, 74800.0, 74801.0, 74802.0]],      [[74803.0, 74804.0, 74805.0, 74806.0, 74807.0, 74808.0],       [74809.0, 74810.0, 74811.0, 74812.0, 74813.0, 74814.0],       [74815.0, 74816.0, 74817.0, 74818.0, 74819.0, 74820.0],       [74821.0, 74822.0, 74823.0, 74824.0, 74825.0, 74826.0],       [74827.0, 74828.0, 74829.0, 74830.0, 74831.0, 74832.0],       [74833.0, 74834.0, 74835.0, 74836.0, 74837.0, 74838.0],       [74839.0, 74840.0, 74841.0, 74842.0, 74843.0, 74844.0]]],     [[[74845.0, 74846.0, 74847.0, 74848.0, 74849.0, 74850.0],       [74851.0, 74852.0, 74853.0, 74854.0, 74855.0, 74856.0],       [74857.0, 74858.0, 74859.0, 74860.0, 74861.0, 74862.0],       [74863.0, 74864.0, 74865.0, 74866.0, 74867.0, 74868.0],       [74869.0, 74870.0, 74871.0, 74872.0, 74873.0, 74874.0],       [74875.0, 74876.0, 74877.0, 74878.0, 74879.0, 74880.0],       [74881.0, 74882.0, 74883.0, 74884.0, 74885.0, 74886.0]],      [[74887.0, 74888.0, 74889.0, 74890.0, 74891.0, 74892.0],       [74893.0, 74894.0, 74895.0, 74896.0, 74897.0, 74898.0],       [74899.0, 74900.0, 74901.0, 74902.0, 74903.0, 74904.0],       [74905.0, 74906.0, 74907.0, 74908.0, 74909.0, 74910.0],       [74911.0, 74912.0, 74913.0, 74914.0, 74915.0, 74916.0],       [74917.0, 74918.0, 74919.0, 74920.0, 74921.0, 74922.0],       [74923.0, 74924.0, 74925.0, 74926.0, 74927.0, 74928.0]],      [[74929.0, 74930.0, 74931.0, 74932.0, 74933.0, 74934.0],       [74935.0, 74936.0, 74937.0, 74938.0, 74939.0, 74940.0],       [74941.0, 74942.0, 74943.0, 74944.0, 74945.0, 74946.0],       [74947.0, 74948.0, 74949.0, 74950.0, 74951.0, 74952.0],       [74953.0, 74954.0, 74955.0, 74956.0, 74957.0, 74958.0],       [74959.0, 74960.0, 74961.0, 74962.0, 74963.0, 74964.0],       [74965.0, 74966.0, 74967.0, 74968.0, 74969.0, 74970.0]],      [[74971.0, 74972.0, 74973.0, 74974.0, 74975.0, 74976.0],       [74977.0, 74978.0, 74979.0, 74980.0, 74981.0, 74982.0],       [74983.0, 74984.0, 74985.0, 74986.0, 74987.0, 74988.0],       [74989.0, 74990.0, 74991.0, 74992.0, 74993.0, 74994.0],       [74995.0, 74996.0, 74997.0, 74998.0, 74999.0, 75000.0],       [75001.0, 75002.0, 75003.0, 75004.0, 75005.0, 75006.0],       [75007.0, 75008.0, 75009.0, 75010.0, 75011.0, 75012.0]],      [[75013.0, 75014.0, 75015.0, 75016.0, 75017.0, 75018.0],       [75019.0, 75020.0, 75021.0, 75022.0, 75023.0, 75024.0],       [75025.0, 75026.0, 75027.0, 75028.0, 75029.0, 75030.0],       [75031.0, 75032.0, 75033.0, 75034.0, 75035.0, 75036.0],       [75037.0, 75038.0, 75039.0, 75040.0, 75041.0, 75042.0],       [75043.0, 75044.0, 75045.0, 75046.0, 75047.0, 75048.0],       [75049.0, 75050.0, 75051.0, 75052.0, 75053.0, 75054.0]],      [[75055.0, 75056.0, 75057.0, 75058.0, 75059.0, 75060.0],       [75061.0, 75062.0, 75063.0, 75064.0, 75065.0, 75066.0],       [75067.0, 75068.0, 75069.0, 75070.0, 75071.0, 75072.0],       [75073.0, 75074.0, 75075.0, 75076.0, 75077.0, 75078.0],       [75079.0, 75080.0, 75081.0, 75082.0, 75083.0, 75084.0],       [75085.0, 75086.0, 75087.0, 75088.0, 75089.0, 75090.0],       [75091.0, 75092.0, 75093.0, 75094.0, 75095.0, 75096.0]]],     [[[75097.0, 75098.0, 75099.0, 75100.0, 75101.0, 75102.0],       [75103.0, 75104.0, 75105.0, 75106.0, 75107.0, 75108.0],       [75109.0, 75110.0, 75111.0, 75112.0, 75113.0, 75114.0],       [75115.0, 75116.0, 75117.0, 75118.0, 75119.0, 75120.0],       [75121.0, 75122.0, 75123.0, 75124.0, 75125.0, 75126.0],       [75127.0, 75128.0, 75129.0, 75130.0, 75131.0, 75132.0],       [75133.0, 75134.0, 75135.0, 75136.0, 75137.0, 75138.0]],      [[75139.0, 75140.0, 75141.0, 75142.0, 75143.0, 75144.0],       [75145.0, 75146.0, 75147.0, 75148.0, 75149.0, 75150.0],       [75151.0, 75152.0, 75153.0, 75154.0, 75155.0, 75156.0],       [75157.0, 75158.0, 75159.0, 75160.0, 75161.0, 75162.0],       [75163.0, 75164.0, 75165.0, 75166.0, 75167.0, 75168.0],       [75169.0, 75170.0, 75171.0, 75172.0, 75173.0, 75174.0],       [75175.0, 75176.0, 75177.0, 75178.0, 75179.0, 75180.0]],      [[75181.0, 75182.0, 75183.0, 75184.0, 75185.0, 75186.0],       [75187.0, 75188.0, 75189.0, 75190.0, 75191.0, 75192.0],       [75193.0, 75194.0, 75195.0, 75196.0, 75197.0, 75198.0],       [75199.0, 75200.0, 75201.0, 75202.0, 75203.0, 75204.0],       [75205.0, 75206.0, 75207.0, 75208.0, 75209.0, 75210.0],       [75211.0, 75212.0, 75213.0, 75214.0, 75215.0, 75216.0],       [75217.0, 75218.0, 75219.0, 75220.0, 75221.0, 75222.0]],      [[75223.0, 75224.0, 75225.0, 75226.0, 75227.0, 75228.0],       [75229.0, 75230.0, 75231.0, 75232.0, 75233.0, 75234.0],       [75235.0, 75236.0, 75237.0, 75238.0, 75239.0, 75240.0],       [75241.0, 75242.0, 75243.0, 75244.0, 75245.0, 75246.0],       [75247.0, 75248.0, 75249.0, 75250.0, 75251.0, 75252.0],       [75253.0, 75254.0, 75255.0, 75256.0, 75257.0, 75258.0],       [75259.0, 75260.0, 75261.0, 75262.0, 75263.0, 75264.0]],      [[75265.0, 75266.0, 75267.0, 75268.0, 75269.0, 75270.0],       [75271.0, 75272.0, 75273.0, 75274.0, 75275.0, 75276.0],       [75277.0, 75278.0, 75279.0, 75280.0, 75281.0, 75282.0],       [75283.0, 75284.0, 75285.0, 75286.0, 75287.0, 75288.0],       [75289.0, 75290.0, 75291.0, 75292.0, 75293.0, 75294.0],       [75295.0, 75296.0, 75297.0, 75298.0, 75299.0, 75300.0],       [75301.0, 75302.0, 75303.0, 75304.0, 75305.0, 75306.0]],      [[75307.0, 75308.0, 75309.0, 75310.0, 75311.0, 75312.0],       [75313.0, 75314.0, 75315.0, 75316.0, 75317.0, 75318.0],       [75319.0, 75320.0, 75321.0, 75322.0, 75323.0, 75324.0],       [75325.0, 75326.0, 75327.0, 75328.0, 75329.0, 75330.0],       [75331.0, 75332.0, 75333.0, 75334.0, 75335.0, 75336.0],       [75337.0, 75338.0, 75339.0, 75340.0, 75341.0, 75342.0],       [75343.0, 75344.0, 75345.0, 75346.0, 75347.0, 75348.0]]],     [[[75349.0, 75350.0, 75351.0, 75352.0, 75353.0, 75354.0],       [75355.0, 75356.0, 75357.0, 75358.0, 75359.0, 75360.0],       [75361.0, 75362.0, 75363.0, 75364.0, 75365.0, 75366.0],       [75367.0, 75368.0, 75369.0, 75370.0, 75371.0, 75372.0],       [75373.0, 75374.0, 75375.0, 75376.0, 75377.0, 75378.0],       [75379.0, 75380.0, 75381.0, 75382.0, 75383.0, 75384.0],       [75385.0, 75386.0, 75387.0, 75388.0, 75389.0, 75390.0]],      [[75391.0, 75392.0, 75393.0, 75394.0, 75395.0, 75396.0],       [75397.0, 75398.0, 75399.0, 75400.0, 75401.0, 75402.0],       [75403.0, 75404.0, 75405.0, 75406.0, 75407.0, 75408.0],       [75409.0, 75410.0, 75411.0, 75412.0, 75413.0, 75414.0],       [75415.0, 75416.0, 75417.0, 75418.0, 75419.0, 75420.0],       [75421.0, 75422.0, 75423.0, 75424.0, 75425.0, 75426.0],       [75427.0, 75428.0, 75429.0, 75430.0, 75431.0, 75432.0]],      [[75433.0, 75434.0, 75435.0, 75436.0, 75437.0, 75438.0],       [75439.0, 75440.0, 75441.0, 75442.0, 75443.0, 75444.0],       [75445.0, 75446.0, 75447.0, 75448.0, 75449.0, 75450.0],       [75451.0, 75452.0, 75453.0, 75454.0, 75455.0, 75456.0],       [75457.0, 75458.0, 75459.0, 75460.0, 75461.0, 75462.0],       [75463.0, 75464.0, 75465.0, 75466.0, 75467.0, 75468.0],       [75469.0, 75470.0, 75471.0, 75472.0, 75473.0, 75474.0]],      [[75475.0, 75476.0, 75477.0, 75478.0, 75479.0, 75480.0],       [75481.0, 75482.0, 75483.0, 75484.0, 75485.0, 75486.0],       [75487.0, 75488.0, 75489.0, 75490.0, 75491.0, 75492.0],       [75493.0, 75494.0, 75495.0, 75496.0, 75497.0, 75498.0],       [75499.0, 75500.0, 75501.0, 75502.0, 75503.0, 75504.0],       [75505.0, 75506.0, 75507.0, 75508.0, 75509.0, 75510.0],       [75511.0, 75512.0, 75513.0, 75514.0, 75515.0, 75516.0]],      [[75517.0, 75518.0, 75519.0, 75520.0, 75521.0, 75522.0],       [75523.0, 75524.0, 75525.0, 75526.0, 75527.0, 75528.0],       [75529.0, 75530.0, 75531.0, 75532.0, 75533.0, 75534.0],       [75535.0, 75536.0, 75537.0, 75538.0, 75539.0, 75540.0],       [75541.0, 75542.0, 75543.0, 75544.0, 75545.0, 75546.0],       [75547.0, 75548.0, 75549.0, 75550.0, 75551.0, 75552.0],       [75553.0, 75554.0, 75555.0, 75556.0, 75557.0, 75558.0]],      [[75559.0, 75560.0, 75561.0, 75562.0, 75563.0, 75564.0],       [75565.0, 75566.0, 75567.0, 75568.0, 75569.0, 75570.0],       [75571.0, 75572.0, 75573.0, 75574.0, 75575.0, 75576.0],       [75577.0, 75578.0, 75579.0, 75580.0, 75581.0, 75582.0],       [75583.0, 75584.0, 75585.0, 75586.0, 75587.0, 75588.0],       [75589.0, 75590.0, 75591.0, 75592.0, 75593.0, 75594.0],       [75595.0, 75596.0, 75597.0, 75598.0, 75599.0, 75600.0]]]],    [[[[75601.0, 75602.0, 75603.0, 75604.0, 75605.0, 75606.0],       [75607.0, 75608.0, 75609.0, 75610.0, 75611.0, 75612.0],       [75613.0, 75614.0, 75615.0, 75616.0, 75617.0, 75618.0],       [75619.0, 75620.0, 75621.0, 75622.0, 75623.0, 75624.0],       [75625.0, 75626.0, 75627.0, 75628.0, 75629.0, 75630.0],       [75631.0, 75632.0, 75633.0, 75634.0, 75635.0, 75636.0],       [75637.0, 75638.0, 75639.0, 75640.0, 75641.0, 75642.0]],      [[75643.0, 75644.0, 75645.0, 75646.0, 75647.0, 75648.0],       [75649.0, 75650.0, 75651.0, 75652.0, 75653.0, 75654.0],       [75655.0, 75656.0, 75657.0, 75658.0, 75659.0, 75660.0],       [75661.0, 75662.0, 75663.0, 75664.0, 75665.0, 75666.0],       [75667.0, 75668.0, 75669.0, 75670.0, 75671.0, 75672.0],       [75673.0, 75674.0, 75675.0, 75676.0, 75677.0, 75678.0],       [75679.0, 75680.0, 75681.0, 75682.0, 75683.0, 75684.0]],      [[75685.0, 75686.0, 75687.0, 75688.0, 75689.0, 75690.0],       [75691.0, 75692.0, 75693.0, 75694.0, 75695.0, 75696.0],       [75697.0, 75698.0, 75699.0, 75700.0, 75701.0, 75702.0],       [75703.0, 75704.0, 75705.0, 75706.0, 75707.0, 75708.0],       [75709.0, 75710.0, 75711.0, 75712.0, 75713.0, 75714.0],       [75715.0, 75716.0, 75717.0, 75718.0, 75719.0, 75720.0],       [75721.0, 75722.0, 75723.0, 75724.0, 75725.0, 75726.0]],      [[75727.0, 75728.0, 75729.0, 75730.0, 75731.0, 75732.0],       [75733.0, 75734.0, 75735.0, 75736.0, 75737.0, 75738.0],       [75739.0, 75740.0, 75741.0, 75742.0, 75743.0, 75744.0],       [75745.0, 75746.0, 75747.0, 75748.0, 75749.0, 75750.0],       [75751.0, 75752.0, 75753.0, 75754.0, 75755.0, 75756.0],       [75757.0, 75758.0, 75759.0, 75760.0, 75761.0, 75762.0],       [75763.0, 75764.0, 75765.0, 75766.0, 75767.0, 75768.0]],      [[75769.0, 75770.0, 75771.0, 75772.0, 75773.0, 75774.0],       [75775.0, 75776.0, 75777.0, 75778.0, 75779.0, 75780.0],       [75781.0, 75782.0, 75783.0, 75784.0, 75785.0, 75786.0],       [75787.0, 75788.0, 75789.0, 75790.0, 75791.0, 75792.0],       [75793.0, 75794.0, 75795.0, 75796.0, 75797.0, 75798.0],       [75799.0, 75800.0, 75801.0, 75802.0, 75803.0, 75804.0],       [75805.0, 75806.0, 75807.0, 75808.0, 75809.0, 75810.0]],      [[75811.0, 75812.0, 75813.0, 75814.0, 75815.0, 75816.0],       [75817.0, 75818.0, 75819.0, 75820.0, 75821.0, 75822.0],       [75823.0, 75824.0, 75825.0, 75826.0, 75827.0, 75828.0],       [75829.0, 75830.0, 75831.0, 75832.0, 75833.0, 75834.0],       [75835.0, 75836.0, 75837.0, 75838.0, 75839.0, 75840.0],       [75841.0, 75842.0, 75843.0, 75844.0, 75845.0, 75846.0],       [75847.0, 75848.0, 75849.0, 75850.0, 75851.0, 75852.0]]],     [[[75853.0, 75854.0, 75855.0, 75856.0, 75857.0, 75858.0],       [75859.0, 75860.0, 75861.0, 75862.0, 75863.0, 75864.0],       [75865.0, 75866.0, 75867.0, 75868.0, 75869.0, 75870.0],       [75871.0, 75872.0, 75873.0, 75874.0, 75875.0, 75876.0],       [75877.0, 75878.0, 75879.0, 75880.0, 75881.0, 75882.0],       [75883.0, 75884.0, 75885.0, 75886.0, 75887.0, 75888.0],       [75889.0, 75890.0, 75891.0, 75892.0, 75893.0, 75894.0]],      [[75895.0, 75896.0, 75897.0, 75898.0, 75899.0, 75900.0],       [75901.0, 75902.0, 75903.0, 75904.0, 75905.0, 75906.0],       [75907.0, 75908.0, 75909.0, 75910.0, 75911.0, 75912.0],       [75913.0, 75914.0, 75915.0, 75916.0, 75917.0, 75918.0],       [75919.0, 75920.0, 75921.0, 75922.0, 75923.0, 75924.0],       [75925.0, 75926.0, 75927.0, 75928.0, 75929.0, 75930.0],       [75931.0, 75932.0, 75933.0, 75934.0, 75935.0, 75936.0]],      [[75937.0, 75938.0, 75939.0, 75940.0, 75941.0, 75942.0],       [75943.0, 75944.0, 75945.0, 75946.0, 75947.0, 75948.0],       [75949.0, 75950.0, 75951.0, 75952.0, 75953.0, 75954.0],       [75955.0, 75956.0, 75957.0, 75958.0, 75959.0, 75960.0],       [75961.0, 75962.0, 75963.0, 75964.0, 75965.0, 75966.0],       [75967.0, 75968.0, 75969.0, 75970.0, 75971.0, 75972.0],       [75973.0, 75974.0, 75975.0, 75976.0, 75977.0, 75978.0]],      [[75979.0, 75980.0, 75981.0, 75982.0, 75983.0, 75984.0],       [75985.0, 75986.0, 75987.0, 75988.0, 75989.0, 75990.0],       [75991.0, 75992.0, 75993.0, 75994.0, 75995.0, 75996.0],       [75997.0, 75998.0, 75999.0, 76000.0, 76001.0, 76002.0],       [76003.0, 76004.0, 76005.0, 76006.0, 76007.0, 76008.0],       [76009.0, 76010.0, 76011.0, 76012.0, 76013.0, 76014.0],       [76015.0, 76016.0, 76017.0, 76018.0, 76019.0, 76020.0]],      [[76021.0, 76022.0, 76023.0, 76024.0, 76025.0, 76026.0],       [76027.0, 76028.0, 76029.0, 76030.0, 76031.0, 76032.0],       [76033.0, 76034.0, 76035.0, 76036.0, 76037.0, 76038.0],       [76039.0, 76040.0, 76041.0, 76042.0, 76043.0, 76044.0],       [76045.0, 76046.0, 76047.0, 76048.0, 76049.0, 76050.0],       [76051.0, 76052.0, 76053.0, 76054.0, 76055.0, 76056.0],       [76057.0, 76058.0, 76059.0, 76060.0, 76061.0, 76062.0]],      [[76063.0, 76064.0, 76065.0, 76066.0, 76067.0, 76068.0],       [76069.0, 76070.0, 76071.0, 76072.0, 76073.0, 76074.0],       [76075.0, 76076.0, 76077.0, 76078.0, 76079.0, 76080.0],       [76081.0, 76082.0, 76083.0, 76084.0, 76085.0, 76086.0],       [76087.0, 76088.0, 76089.0, 76090.0, 76091.0, 76092.0],       [76093.0, 76094.0, 76095.0, 76096.0, 76097.0, 76098.0],       [76099.0, 76100.0, 76101.0, 76102.0, 76103.0, 76104.0]]],     [[[76105.0, 76106.0, 76107.0, 76108.0, 76109.0, 76110.0],       [76111.0, 76112.0, 76113.0, 76114.0, 76115.0, 76116.0],       [76117.0, 76118.0, 76119.0, 76120.0, 76121.0, 76122.0],       [76123.0, 76124.0, 76125.0, 76126.0, 76127.0, 76128.0],       [76129.0, 76130.0, 76131.0, 76132.0, 76133.0, 76134.0],       [76135.0, 76136.0, 76137.0, 76138.0, 76139.0, 76140.0],       [76141.0, 76142.0, 76143.0, 76144.0, 76145.0, 76146.0]],      [[76147.0, 76148.0, 76149.0, 76150.0, 76151.0, 76152.0],       [76153.0, 76154.0, 76155.0, 76156.0, 76157.0, 76158.0],       [76159.0, 76160.0, 76161.0, 76162.0, 76163.0, 76164.0],       [76165.0, 76166.0, 76167.0, 76168.0, 76169.0, 76170.0],       [76171.0, 76172.0, 76173.0, 76174.0, 76175.0, 76176.0],       [76177.0, 76178.0, 76179.0, 76180.0, 76181.0, 76182.0],       [76183.0, 76184.0, 76185.0, 76186.0, 76187.0, 76188.0]],      [[76189.0, 76190.0, 76191.0, 76192.0, 76193.0, 76194.0],       [76195.0, 76196.0, 76197.0, 76198.0, 76199.0, 76200.0],       [76201.0, 76202.0, 76203.0, 76204.0, 76205.0, 76206.0],       [76207.0, 76208.0, 76209.0, 76210.0, 76211.0, 76212.0],       [76213.0, 76214.0, 76215.0, 76216.0, 76217.0, 76218.0],       [76219.0, 76220.0, 76221.0, 76222.0, 76223.0, 76224.0],       [76225.0, 76226.0, 76227.0, 76228.0, 76229.0, 76230.0]],      [[76231.0, 76232.0, 76233.0, 76234.0, 76235.0, 76236.0],       [76237.0, 76238.0, 76239.0, 76240.0, 76241.0, 76242.0],       [76243.0, 76244.0, 76245.0, 76246.0, 76247.0, 76248.0],       [76249.0, 76250.0, 76251.0, 76252.0, 76253.0, 76254.0],       [76255.0, 76256.0, 76257.0, 76258.0, 76259.0, 76260.0],       [76261.0, 76262.0, 76263.0, 76264.0, 76265.0, 76266.0],       [76267.0, 76268.0, 76269.0, 76270.0, 76271.0, 76272.0]],      [[76273.0, 76274.0, 76275.0, 76276.0, 76277.0, 76278.0],       [76279.0, 76280.0, 76281.0, 76282.0, 76283.0, 76284.0],       [76285.0, 76286.0, 76287.0, 76288.0, 76289.0, 76290.0],       [76291.0, 76292.0, 76293.0, 76294.0, 76295.0, 76296.0],       [76297.0, 76298.0, 76299.0, 76300.0, 76301.0, 76302.0],       [76303.0, 76304.0, 76305.0, 76306.0, 76307.0, 76308.0],       [76309.0, 76310.0, 76311.0, 76312.0, 76313.0, 76314.0]],      [[76315.0, 76316.0, 76317.0, 76318.0, 76319.0, 76320.0],       [76321.0, 76322.0, 76323.0, 76324.0, 76325.0, 76326.0],       [76327.0, 76328.0, 76329.0, 76330.0, 76331.0, 76332.0],       [76333.0, 76334.0, 76335.0, 76336.0, 76337.0, 76338.0],       [76339.0, 76340.0, 76341.0, 76342.0, 76343.0, 76344.0],       [76345.0, 76346.0, 76347.0, 76348.0, 76349.0, 76350.0],       [76351.0, 76352.0, 76353.0, 76354.0, 76355.0, 76356.0]]],     [[[76357.0, 76358.0, 76359.0, 76360.0, 76361.0, 76362.0],       [76363.0, 76364.0, 76365.0, 76366.0, 76367.0, 76368.0],       [76369.0, 76370.0, 76371.0, 76372.0, 76373.0, 76374.0],       [76375.0, 76376.0, 76377.0, 76378.0, 76379.0, 76380.0],       [76381.0, 76382.0, 76383.0, 76384.0, 76385.0, 76386.0],       [76387.0, 76388.0, 76389.0, 76390.0, 76391.0, 76392.0],       [76393.0, 76394.0, 76395.0, 76396.0, 76397.0, 76398.0]],      [[76399.0, 76400.0, 76401.0, 76402.0, 76403.0, 76404.0],       [76405.0, 76406.0, 76407.0, 76408.0, 76409.0, 76410.0],       [76411.0, 76412.0, 76413.0, 76414.0, 76415.0, 76416.0],       [76417.0, 76418.0, 76419.0, 76420.0, 76421.0, 76422.0],       [76423.0, 76424.0, 76425.0, 76426.0, 76427.0, 76428.0],       [76429.0, 76430.0, 76431.0, 76432.0, 76433.0, 76434.0],       [76435.0, 76436.0, 76437.0, 76438.0, 76439.0, 76440.0]],      [[76441.0, 76442.0, 76443.0, 76444.0, 76445.0, 76446.0],       [76447.0, 76448.0, 76449.0, 76450.0, 76451.0, 76452.0],       [76453.0, 76454.0, 76455.0, 76456.0, 76457.0, 76458.0],       [76459.0, 76460.0, 76461.0, 76462.0, 76463.0, 76464.0],       [76465.0, 76466.0, 76467.0, 76468.0, 76469.0, 76470.0],       [76471.0, 76472.0, 76473.0, 76474.0, 76475.0, 76476.0],       [76477.0, 76478.0, 76479.0, 76480.0, 76481.0, 76482.0]],      [[76483.0, 76484.0, 76485.0, 76486.0, 76487.0, 76488.0],       [76489.0, 76490.0, 76491.0, 76492.0, 76493.0, 76494.0],       [76495.0, 76496.0, 76497.0, 76498.0, 76499.0, 76500.0],       [76501.0, 76502.0, 76503.0, 76504.0, 76505.0, 76506.0],       [76507.0, 76508.0, 76509.0, 76510.0, 76511.0, 76512.0],       [76513.0, 76514.0, 76515.0, 76516.0, 76517.0, 76518.0],       [76519.0, 76520.0, 76521.0, 76522.0, 76523.0, 76524.0]],      [[76525.0, 76526.0, 76527.0, 76528.0, 76529.0, 76530.0],       [76531.0, 76532.0, 76533.0, 76534.0, 76535.0, 76536.0],       [76537.0, 76538.0, 76539.0, 76540.0, 76541.0, 76542.0],       [76543.0, 76544.0, 76545.0, 76546.0, 76547.0, 76548.0],       [76549.0, 76550.0, 76551.0, 76552.0, 76553.0, 76554.0],       [76555.0, 76556.0, 76557.0, 76558.0, 76559.0, 76560.0],       [76561.0, 76562.0, 76563.0, 76564.0, 76565.0, 76566.0]],      [[76567.0, 76568.0, 76569.0, 76570.0, 76571.0, 76572.0],       [76573.0, 76574.0, 76575.0, 76576.0, 76577.0, 76578.0],       [76579.0, 76580.0, 76581.0, 76582.0, 76583.0, 76584.0],       [76585.0, 76586.0, 76587.0, 76588.0, 76589.0, 76590.0],       [76591.0, 76592.0, 76593.0, 76594.0, 76595.0, 76596.0],       [76597.0, 76598.0, 76599.0, 76600.0, 76601.0, 76602.0],       [76603.0, 76604.0, 76605.0, 76606.0, 76607.0, 76608.0]]]],    [[[[76609.0, 76610.0, 76611.0, 76612.0, 76613.0, 76614.0],       [76615.0, 76616.0, 76617.0, 76618.0, 76619.0, 76620.0],       [76621.0, 76622.0, 76623.0, 76624.0, 76625.0, 76626.0],       [76627.0, 76628.0, 76629.0, 76630.0, 76631.0, 76632.0],       [76633.0, 76634.0, 76635.0, 76636.0, 76637.0, 76638.0],       [76639.0, 76640.0, 76641.0, 76642.0, 76643.0, 76644.0],       [76645.0, 76646.0, 76647.0, 76648.0, 76649.0, 76650.0]],      [[76651.0, 76652.0, 76653.0, 76654.0, 76655.0, 76656.0],       [76657.0, 76658.0, 76659.0, 76660.0, 76661.0, 76662.0],       [76663.0, 76664.0, 76665.0, 76666.0, 76667.0, 76668.0],       [76669.0, 76670.0, 76671.0, 76672.0, 76673.0, 76674.0],       [76675.0, 76676.0, 76677.0, 76678.0, 76679.0, 76680.0],       [76681.0, 76682.0, 76683.0, 76684.0, 76685.0, 76686.0],       [76687.0, 76688.0, 76689.0, 76690.0, 76691.0, 76692.0]],      [[76693.0, 76694.0, 76695.0, 76696.0, 76697.0, 76698.0],       [76699.0, 76700.0, 76701.0, 76702.0, 76703.0, 76704.0],       [76705.0, 76706.0, 76707.0, 76708.0, 76709.0, 76710.0],       [76711.0, 76712.0, 76713.0, 76714.0, 76715.0, 76716.0],       [76717.0, 76718.0, 76719.0, 76720.0, 76721.0, 76722.0],       [76723.0, 76724.0, 76725.0, 76726.0, 76727.0, 76728.0],       [76729.0, 76730.0, 76731.0, 76732.0, 76733.0, 76734.0]],      [[76735.0, 76736.0, 76737.0, 76738.0, 76739.0, 76740.0],       [76741.0, 76742.0, 76743.0, 76744.0, 76745.0, 76746.0],       [76747.0, 76748.0, 76749.0, 76750.0, 76751.0, 76752.0],       [76753.0, 76754.0, 76755.0, 76756.0, 76757.0, 76758.0],       [76759.0, 76760.0, 76761.0, 76762.0, 76763.0, 76764.0],       [76765.0, 76766.0, 76767.0, 76768.0, 76769.0, 76770.0],       [76771.0, 76772.0, 76773.0, 76774.0, 76775.0, 76776.0]],      [[76777.0, 76778.0, 76779.0, 76780.0, 76781.0, 76782.0],       [76783.0, 76784.0, 76785.0, 76786.0, 76787.0, 76788.0],       [76789.0, 76790.0, 76791.0, 76792.0, 76793.0, 76794.0],       [76795.0, 76796.0, 76797.0, 76798.0, 76799.0, 76800.0],       [76801.0, 76802.0, 76803.0, 76804.0, 76805.0, 76806.0],       [76807.0, 76808.0, 76809.0, 76810.0, 76811.0, 76812.0],       [76813.0, 76814.0, 76815.0, 76816.0, 76817.0, 76818.0]],      [[76819.0, 76820.0, 76821.0, 76822.0, 76823.0, 76824.0],       [76825.0, 76826.0, 76827.0, 76828.0, 76829.0, 76830.0],       [76831.0, 76832.0, 76833.0, 76834.0, 76835.0, 76836.0],       [76837.0, 76838.0, 76839.0, 76840.0, 76841.0, 76842.0],       [76843.0, 76844.0, 76845.0, 76846.0, 76847.0, 76848.0],       [76849.0, 76850.0, 76851.0, 76852.0, 76853.0, 76854.0],       [76855.0, 76856.0, 76857.0, 76858.0, 76859.0, 76860.0]]],     [[[76861.0, 76862.0, 76863.0, 76864.0, 76865.0, 76866.0],       [76867.0, 76868.0, 76869.0, 76870.0, 76871.0, 76872.0],       [76873.0, 76874.0, 76875.0, 76876.0, 76877.0, 76878.0],       [76879.0, 76880.0, 76881.0, 76882.0, 76883.0, 76884.0],       [76885.0, 76886.0, 76887.0, 76888.0, 76889.0, 76890.0],       [76891.0, 76892.0, 76893.0, 76894.0, 76895.0, 76896.0],       [76897.0, 76898.0, 76899.0, 76900.0, 76901.0, 76902.0]],      [[76903.0, 76904.0, 76905.0, 76906.0, 76907.0, 76908.0],       [76909.0, 76910.0, 76911.0, 76912.0, 76913.0, 76914.0],       [76915.0, 76916.0, 76917.0, 76918.0, 76919.0, 76920.0],       [76921.0, 76922.0, 76923.0, 76924.0, 76925.0, 76926.0],       [76927.0, 76928.0, 76929.0, 76930.0, 76931.0, 76932.0],       [76933.0, 76934.0, 76935.0, 76936.0, 76937.0, 76938.0],       [76939.0, 76940.0, 76941.0, 76942.0, 76943.0, 76944.0]],      [[76945.0, 76946.0, 76947.0, 76948.0, 76949.0, 76950.0],       [76951.0, 76952.0, 76953.0, 76954.0, 76955.0, 76956.0],       [76957.0, 76958.0, 76959.0, 76960.0, 76961.0, 76962.0],       [76963.0, 76964.0, 76965.0, 76966.0, 76967.0, 76968.0],       [76969.0, 76970.0, 76971.0, 76972.0, 76973.0, 76974.0],       [76975.0, 76976.0, 76977.0, 76978.0, 76979.0, 76980.0],       [76981.0, 76982.0, 76983.0, 76984.0, 76985.0, 76986.0]],      [[76987.0, 76988.0, 76989.0, 76990.0, 76991.0, 76992.0],       [76993.0, 76994.0, 76995.0, 76996.0, 76997.0, 76998.0],       [76999.0, 77000.0, 77001.0, 77002.0, 77003.0, 77004.0],       [77005.0, 77006.0, 77007.0, 77008.0, 77009.0, 77010.0],       [77011.0, 77012.0, 77013.0, 77014.0, 77015.0, 77016.0],       [77017.0, 77018.0, 77019.0, 77020.0, 77021.0, 77022.0],       [77023.0, 77024.0, 77025.0, 77026.0, 77027.0, 77028.0]],      [[77029.0, 77030.0, 77031.0, 77032.0, 77033.0, 77034.0],       [77035.0, 77036.0, 77037.0, 77038.0, 77039.0, 77040.0],       [77041.0, 77042.0, 77043.0, 77044.0, 77045.0, 77046.0],       [77047.0, 77048.0, 77049.0, 77050.0, 77051.0, 77052.0],       [77053.0, 77054.0, 77055.0, 77056.0, 77057.0, 77058.0],       [77059.0, 77060.0, 77061.0, 77062.0, 77063.0, 77064.0],       [77065.0, 77066.0, 77067.0, 77068.0, 77069.0, 77070.0]],      [[77071.0, 77072.0, 77073.0, 77074.0, 77075.0, 77076.0],       [77077.0, 77078.0, 77079.0, 77080.0, 77081.0, 77082.0],       [77083.0, 77084.0, 77085.0, 77086.0, 77087.0, 77088.0],       [77089.0, 77090.0, 77091.0, 77092.0, 77093.0, 77094.0],       [77095.0, 77096.0, 77097.0, 77098.0, 77099.0, 77100.0],       [77101.0, 77102.0, 77103.0, 77104.0, 77105.0, 77106.0],       [77107.0, 77108.0, 77109.0, 77110.0, 77111.0, 77112.0]]],     [[[77113.0, 77114.0, 77115.0, 77116.0, 77117.0, 77118.0],       [77119.0, 77120.0, 77121.0, 77122.0, 77123.0, 77124.0],       [77125.0, 77126.0, 77127.0, 77128.0, 77129.0, 77130.0],       [77131.0, 77132.0, 77133.0, 77134.0, 77135.0, 77136.0],       [77137.0, 77138.0, 77139.0, 77140.0, 77141.0, 77142.0],       [77143.0, 77144.0, 77145.0, 77146.0, 77147.0, 77148.0],       [77149.0, 77150.0, 77151.0, 77152.0, 77153.0, 77154.0]],      [[77155.0, 77156.0, 77157.0, 77158.0, 77159.0, 77160.0],       [77161.0, 77162.0, 77163.0, 77164.0, 77165.0, 77166.0],       [77167.0, 77168.0, 77169.0, 77170.0, 77171.0, 77172.0],       [77173.0, 77174.0, 77175.0, 77176.0, 77177.0, 77178.0],       [77179.0, 77180.0, 77181.0, 77182.0, 77183.0, 77184.0],       [77185.0, 77186.0, 77187.0, 77188.0, 77189.0, 77190.0],       [77191.0, 77192.0, 77193.0, 77194.0, 77195.0, 77196.0]],      [[77197.0, 77198.0, 77199.0, 77200.0, 77201.0, 77202.0],       [77203.0, 77204.0, 77205.0, 77206.0, 77207.0, 77208.0],       [77209.0, 77210.0, 77211.0, 77212.0, 77213.0, 77214.0],       [77215.0, 77216.0, 77217.0, 77218.0, 77219.0, 77220.0],       [77221.0, 77222.0, 77223.0, 77224.0, 77225.0, 77226.0],       [77227.0, 77228.0, 77229.0, 77230.0, 77231.0, 77232.0],       [77233.0, 77234.0, 77235.0, 77236.0, 77237.0, 77238.0]],      [[77239.0, 77240.0, 77241.0, 77242.0, 77243.0, 77244.0],       [77245.0, 77246.0, 77247.0, 77248.0, 77249.0, 77250.0],       [77251.0, 77252.0, 77253.0, 77254.0, 77255.0, 77256.0],       [77257.0, 77258.0, 77259.0, 77260.0, 77261.0, 77262.0],       [77263.0, 77264.0, 77265.0, 77266.0, 77267.0, 77268.0],       [77269.0, 77270.0, 77271.0, 77272.0, 77273.0, 77274.0],       [77275.0, 77276.0, 77277.0, 77278.0, 77279.0, 77280.0]],      [[77281.0, 77282.0, 77283.0, 77284.0, 77285.0, 77286.0],       [77287.0, 77288.0, 77289.0, 77290.0, 77291.0, 77292.0],       [77293.0, 77294.0, 77295.0, 77296.0, 77297.0, 77298.0],       [77299.0, 77300.0, 77301.0, 77302.0, 77303.0, 77304.0],       [77305.0, 77306.0, 77307.0, 77308.0, 77309.0, 77310.0],       [77311.0, 77312.0, 77313.0, 77314.0, 77315.0, 77316.0],       [77317.0, 77318.0, 77319.0, 77320.0, 77321.0, 77322.0]],      [[77323.0, 77324.0, 77325.0, 77326.0, 77327.0, 77328.0],       [77329.0, 77330.0, 77331.0, 77332.0, 77333.0, 77334.0],       [77335.0, 77336.0, 77337.0, 77338.0, 77339.0, 77340.0],       [77341.0, 77342.0, 77343.0, 77344.0, 77345.0, 77346.0],       [77347.0, 77348.0, 77349.0, 77350.0, 77351.0, 77352.0],       [77353.0, 77354.0, 77355.0, 77356.0, 77357.0, 77358.0],       [77359.0, 77360.0, 77361.0, 77362.0, 77363.0, 77364.0]]],     [[[77365.0, 77366.0, 77367.0, 77368.0, 77369.0, 77370.0],       [77371.0, 77372.0, 77373.0, 77374.0, 77375.0, 77376.0],       [77377.0, 77378.0, 77379.0, 77380.0, 77381.0, 77382.0],       [77383.0, 77384.0, 77385.0, 77386.0, 77387.0, 77388.0],       [77389.0, 77390.0, 77391.0, 77392.0, 77393.0, 77394.0],       [77395.0, 77396.0, 77397.0, 77398.0, 77399.0, 77400.0],       [77401.0, 77402.0, 77403.0, 77404.0, 77405.0, 77406.0]],      [[77407.0, 77408.0, 77409.0, 77410.0, 77411.0, 77412.0],       [77413.0, 77414.0, 77415.0, 77416.0, 77417.0, 77418.0],       [77419.0, 77420.0, 77421.0, 77422.0, 77423.0, 77424.0],       [77425.0, 77426.0, 77427.0, 77428.0, 77429.0, 77430.0],       [77431.0, 77432.0, 77433.0, 77434.0, 77435.0, 77436.0],       [77437.0, 77438.0, 77439.0, 77440.0, 77441.0, 77442.0],       [77443.0, 77444.0, 77445.0, 77446.0, 77447.0, 77448.0]],      [[77449.0, 77450.0, 77451.0, 77452.0, 77453.0, 77454.0],       [77455.0, 77456.0, 77457.0, 77458.0, 77459.0, 77460.0],       [77461.0, 77462.0, 77463.0, 77464.0, 77465.0, 77466.0],       [77467.0, 77468.0, 77469.0, 77470.0, 77471.0, 77472.0],       [77473.0, 77474.0, 77475.0, 77476.0, 77477.0, 77478.0],       [77479.0, 77480.0, 77481.0, 77482.0, 77483.0, 77484.0],       [77485.0, 77486.0, 77487.0, 77488.0, 77489.0, 77490.0]],      [[77491.0, 77492.0, 77493.0, 77494.0, 77495.0, 77496.0],       [77497.0, 77498.0, 77499.0, 77500.0, 77501.0, 77502.0],       [77503.0, 77504.0, 77505.0, 77506.0, 77507.0, 77508.0],       [77509.0, 77510.0, 77511.0, 77512.0, 77513.0, 77514.0],       [77515.0, 77516.0, 77517.0, 77518.0, 77519.0, 77520.0],       [77521.0, 77522.0, 77523.0, 77524.0, 77525.0, 77526.0],       [77527.0, 77528.0, 77529.0, 77530.0, 77531.0, 77532.0]],      [[77533.0, 77534.0, 77535.0, 77536.0, 77537.0, 77538.0],       [77539.0, 77540.0, 77541.0, 77542.0, 77543.0, 77544.0],       [77545.0, 77546.0, 77547.0, 77548.0, 77549.0, 77550.0],       [77551.0, 77552.0, 77553.0, 77554.0, 77555.0, 77556.0],       [77557.0, 77558.0, 77559.0, 77560.0, 77561.0, 77562.0],       [77563.0, 77564.0, 77565.0, 77566.0, 77567.0, 77568.0],       [77569.0, 77570.0, 77571.0, 77572.0, 77573.0, 77574.0]],      [[77575.0, 77576.0, 77577.0, 77578.0, 77579.0, 77580.0],       [77581.0, 77582.0, 77583.0, 77584.0, 77585.0, 77586.0],       [77587.0, 77588.0, 77589.0, 77590.0, 77591.0, 77592.0],       [77593.0, 77594.0, 77595.0, 77596.0, 77597.0, 77598.0],       [77599.0, 77600.0, 77601.0, 77602.0, 77603.0, 77604.0],       [77605.0, 77606.0, 77607.0, 77608.0, 77609.0, 77610.0],       [77611.0, 77612.0, 77613.0, 77614.0, 77615.0, 77616.0]]]],    [[[[77617.0, 77618.0, 77619.0, 77620.0, 77621.0, 77622.0],       [77623.0, 77624.0, 77625.0, 77626.0, 77627.0, 77628.0],       [77629.0, 77630.0, 77631.0, 77632.0, 77633.0, 77634.0],       [77635.0, 77636.0, 77637.0, 77638.0, 77639.0, 77640.0],       [77641.0, 77642.0, 77643.0, 77644.0, 77645.0, 77646.0],       [77647.0, 77648.0, 77649.0, 77650.0, 77651.0, 77652.0],       [77653.0, 77654.0, 77655.0, 77656.0, 77657.0, 77658.0]],      [[77659.0, 77660.0, 77661.0, 77662.0, 77663.0, 77664.0],       [77665.0, 77666.0, 77667.0, 77668.0, 77669.0, 77670.0],       [77671.0, 77672.0, 77673.0, 77674.0, 77675.0, 77676.0],       [77677.0, 77678.0, 77679.0, 77680.0, 77681.0, 77682.0],       [77683.0, 77684.0, 77685.0, 77686.0, 77687.0, 77688.0],       [77689.0, 77690.0, 77691.0, 77692.0, 77693.0, 77694.0],       [77695.0, 77696.0, 77697.0, 77698.0, 77699.0, 77700.0]],      [[77701.0, 77702.0, 77703.0, 77704.0, 77705.0, 77706.0],       [77707.0, 77708.0, 77709.0, 77710.0, 77711.0, 77712.0],       [77713.0, 77714.0, 77715.0, 77716.0, 77717.0, 77718.0],       [77719.0, 77720.0, 77721.0, 77722.0, 77723.0, 77724.0],       [77725.0, 77726.0, 77727.0, 77728.0, 77729.0, 77730.0],       [77731.0, 77732.0, 77733.0, 77734.0, 77735.0, 77736.0],       [77737.0, 77738.0, 77739.0, 77740.0, 77741.0, 77742.0]],      [[77743.0, 77744.0, 77745.0, 77746.0, 77747.0, 77748.0],       [77749.0, 77750.0, 77751.0, 77752.0, 77753.0, 77754.0],       [77755.0, 77756.0, 77757.0, 77758.0, 77759.0, 77760.0],       [77761.0, 77762.0, 77763.0, 77764.0, 77765.0, 77766.0],       [77767.0, 77768.0, 77769.0, 77770.0, 77771.0, 77772.0],       [77773.0, 77774.0, 77775.0, 77776.0, 77777.0, 77778.0],       [77779.0, 77780.0, 77781.0, 77782.0, 77783.0, 77784.0]],      [[77785.0, 77786.0, 77787.0, 77788.0, 77789.0, 77790.0],       [77791.0, 77792.0, 77793.0, 77794.0, 77795.0, 77796.0],       [77797.0, 77798.0, 77799.0, 77800.0, 77801.0, 77802.0],       [77803.0, 77804.0, 77805.0, 77806.0, 77807.0, 77808.0],       [77809.0, 77810.0, 77811.0, 77812.0, 77813.0, 77814.0],       [77815.0, 77816.0, 77817.0, 77818.0, 77819.0, 77820.0],       [77821.0, 77822.0, 77823.0, 77824.0, 77825.0, 77826.0]],      [[77827.0, 77828.0, 77829.0, 77830.0, 77831.0, 77832.0],       [77833.0, 77834.0, 77835.0, 77836.0, 77837.0, 77838.0],       [77839.0, 77840.0, 77841.0, 77842.0, 77843.0, 77844.0],       [77845.0, 77846.0, 77847.0, 77848.0, 77849.0, 77850.0],       [77851.0, 77852.0, 77853.0, 77854.0, 77855.0, 77856.0],       [77857.0, 77858.0, 77859.0, 77860.0, 77861.0, 77862.0],       [77863.0, 77864.0, 77865.0, 77866.0, 77867.0, 77868.0]]],     [[[77869.0, 77870.0, 77871.0, 77872.0, 77873.0, 77874.0],       [77875.0, 77876.0, 77877.0, 77878.0, 77879.0, 77880.0],       [77881.0, 77882.0, 77883.0, 77884.0, 77885.0, 77886.0],       [77887.0, 77888.0, 77889.0, 77890.0, 77891.0, 77892.0],       [77893.0, 77894.0, 77895.0, 77896.0, 77897.0, 77898.0],       [77899.0, 77900.0, 77901.0, 77902.0, 77903.0, 77904.0],       [77905.0, 77906.0, 77907.0, 77908.0, 77909.0, 77910.0]],      [[77911.0, 77912.0, 77913.0, 77914.0, 77915.0, 77916.0],       [77917.0, 77918.0, 77919.0, 77920.0, 77921.0, 77922.0],       [77923.0, 77924.0, 77925.0, 77926.0, 77927.0, 77928.0],       [77929.0, 77930.0, 77931.0, 77932.0, 77933.0, 77934.0],       [77935.0, 77936.0, 77937.0, 77938.0, 77939.0, 77940.0],       [77941.0, 77942.0, 77943.0, 77944.0, 77945.0, 77946.0],       [77947.0, 77948.0, 77949.0, 77950.0, 77951.0, 77952.0]],      [[77953.0, 77954.0, 77955.0, 77956.0, 77957.0, 77958.0],       [77959.0, 77960.0, 77961.0, 77962.0, 77963.0, 77964.0],       [77965.0, 77966.0, 77967.0, 77968.0, 77969.0, 77970.0],       [77971.0, 77972.0, 77973.0, 77974.0, 77975.0, 77976.0],       [77977.0, 77978.0, 77979.0, 77980.0, 77981.0, 77982.0],       [77983.0, 77984.0, 77985.0, 77986.0, 77987.0, 77988.0],       [77989.0, 77990.0, 77991.0, 77992.0, 77993.0, 77994.0]],      [[77995.0, 77996.0, 77997.0, 77998.0, 77999.0, 78000.0],       [78001.0, 78002.0, 78003.0, 78004.0, 78005.0, 78006.0],       [78007.0, 78008.0, 78009.0, 78010.0, 78011.0, 78012.0],       [78013.0, 78014.0, 78015.0, 78016.0, 78017.0, 78018.0],       [78019.0, 78020.0, 78021.0, 78022.0, 78023.0, 78024.0],       [78025.0, 78026.0, 78027.0, 78028.0, 78029.0, 78030.0],       [78031.0, 78032.0, 78033.0, 78034.0, 78035.0, 78036.0]],      [[78037.0, 78038.0, 78039.0, 78040.0, 78041.0, 78042.0],       [78043.0, 78044.0, 78045.0, 78046.0, 78047.0, 78048.0],       [78049.0, 78050.0, 78051.0, 78052.0, 78053.0, 78054.0],       [78055.0, 78056.0, 78057.0, 78058.0, 78059.0, 78060.0],       [78061.0, 78062.0, 78063.0, 78064.0, 78065.0, 78066.0],       [78067.0, 78068.0, 78069.0, 78070.0, 78071.0, 78072.0],       [78073.0, 78074.0, 78075.0, 78076.0, 78077.0, 78078.0]],      [[78079.0, 78080.0, 78081.0, 78082.0, 78083.0, 78084.0],       [78085.0, 78086.0, 78087.0, 78088.0, 78089.0, 78090.0],       [78091.0, 78092.0, 78093.0, 78094.0, 78095.0, 78096.0],       [78097.0, 78098.0, 78099.0, 78100.0, 78101.0, 78102.0],       [78103.0, 78104.0, 78105.0, 78106.0, 78107.0, 78108.0],       [78109.0, 78110.0, 78111.0, 78112.0, 78113.0, 78114.0],       [78115.0, 78116.0, 78117.0, 78118.0, 78119.0, 78120.0]]],     [[[78121.0, 78122.0, 78123.0, 78124.0, 78125.0, 78126.0],       [78127.0, 78128.0, 78129.0, 78130.0, 78131.0, 78132.0],       [78133.0, 78134.0, 78135.0, 78136.0, 78137.0, 78138.0],       [78139.0, 78140.0, 78141.0, 78142.0, 78143.0, 78144.0],       [78145.0, 78146.0, 78147.0, 78148.0, 78149.0, 78150.0],       [78151.0, 78152.0, 78153.0, 78154.0, 78155.0, 78156.0],       [78157.0, 78158.0, 78159.0, 78160.0, 78161.0, 78162.0]],      [[78163.0, 78164.0, 78165.0, 78166.0, 78167.0, 78168.0],       [78169.0, 78170.0, 78171.0, 78172.0, 78173.0, 78174.0],       [78175.0, 78176.0, 78177.0, 78178.0, 78179.0, 78180.0],       [78181.0, 78182.0, 78183.0, 78184.0, 78185.0, 78186.0],       [78187.0, 78188.0, 78189.0, 78190.0, 78191.0, 78192.0],       [78193.0, 78194.0, 78195.0, 78196.0, 78197.0, 78198.0],       [78199.0, 78200.0, 78201.0, 78202.0, 78203.0, 78204.0]],      [[78205.0, 78206.0, 78207.0, 78208.0, 78209.0, 78210.0],       [78211.0, 78212.0, 78213.0, 78214.0, 78215.0, 78216.0],       [78217.0, 78218.0, 78219.0, 78220.0, 78221.0, 78222.0],       [78223.0, 78224.0, 78225.0, 78226.0, 78227.0, 78228.0],       [78229.0, 78230.0, 78231.0, 78232.0, 78233.0, 78234.0],       [78235.0, 78236.0, 78237.0, 78238.0, 78239.0, 78240.0],       [78241.0, 78242.0, 78243.0, 78244.0, 78245.0, 78246.0]],      [[78247.0, 78248.0, 78249.0, 78250.0, 78251.0, 78252.0],       [78253.0, 78254.0, 78255.0, 78256.0, 78257.0, 78258.0],       [78259.0, 78260.0, 78261.0, 78262.0, 78263.0, 78264.0],       [78265.0, 78266.0, 78267.0, 78268.0, 78269.0, 78270.0],       [78271.0, 78272.0, 78273.0, 78274.0, 78275.0, 78276.0],       [78277.0, 78278.0, 78279.0, 78280.0, 78281.0, 78282.0],       [78283.0, 78284.0, 78285.0, 78286.0, 78287.0, 78288.0]],      [[78289.0, 78290.0, 78291.0, 78292.0, 78293.0, 78294.0],       [78295.0, 78296.0, 78297.0, 78298.0, 78299.0, 78300.0],       [78301.0, 78302.0, 78303.0, 78304.0, 78305.0, 78306.0],       [78307.0, 78308.0, 78309.0, 78310.0, 78311.0, 78312.0],       [78313.0, 78314.0, 78315.0, 78316.0, 78317.0, 78318.0],       [78319.0, 78320.0, 78321.0, 78322.0, 78323.0, 78324.0],       [78325.0, 78326.0, 78327.0, 78328.0, 78329.0, 78330.0]],      [[78331.0, 78332.0, 78333.0, 78334.0, 78335.0, 78336.0],       [78337.0, 78338.0, 78339.0, 78340.0, 78341.0, 78342.0],       [78343.0, 78344.0, 78345.0, 78346.0, 78347.0, 78348.0],       [78349.0, 78350.0, 78351.0, 78352.0, 78353.0, 78354.0],       [78355.0, 78356.0, 78357.0, 78358.0, 78359.0, 78360.0],       [78361.0, 78362.0, 78363.0, 78364.0, 78365.0, 78366.0],       [78367.0, 78368.0, 78369.0, 78370.0, 78371.0, 78372.0]]],     [[[78373.0, 78374.0, 78375.0, 78376.0, 78377.0, 78378.0],       [78379.0, 78380.0, 78381.0, 78382.0, 78383.0, 78384.0],       [78385.0, 78386.0, 78387.0, 78388.0, 78389.0, 78390.0],       [78391.0, 78392.0, 78393.0, 78394.0, 78395.0, 78396.0],       [78397.0, 78398.0, 78399.0, 78400.0, 78401.0, 78402.0],       [78403.0, 78404.0, 78405.0, 78406.0, 78407.0, 78408.0],       [78409.0, 78410.0, 78411.0, 78412.0, 78413.0, 78414.0]],      [[78415.0, 78416.0, 78417.0, 78418.0, 78419.0, 78420.0],       [78421.0, 78422.0, 78423.0, 78424.0, 78425.0, 78426.0],       [78427.0, 78428.0, 78429.0, 78430.0, 78431.0, 78432.0],       [78433.0, 78434.0, 78435.0, 78436.0, 78437.0, 78438.0],       [78439.0, 78440.0, 78441.0, 78442.0, 78443.0, 78444.0],       [78445.0, 78446.0, 78447.0, 78448.0, 78449.0, 78450.0],       [78451.0, 78452.0, 78453.0, 78454.0, 78455.0, 78456.0]],      [[78457.0, 78458.0, 78459.0, 78460.0, 78461.0, 78462.0],       [78463.0, 78464.0, 78465.0, 78466.0, 78467.0, 78468.0],       [78469.0, 78470.0, 78471.0, 78472.0, 78473.0, 78474.0],       [78475.0, 78476.0, 78477.0, 78478.0, 78479.0, 78480.0],       [78481.0, 78482.0, 78483.0, 78484.0, 78485.0, 78486.0],       [78487.0, 78488.0, 78489.0, 78490.0, 78491.0, 78492.0],       [78493.0, 78494.0, 78495.0, 78496.0, 78497.0, 78498.0]],      [[78499.0, 78500.0, 78501.0, 78502.0, 78503.0, 78504.0],       [78505.0, 78506.0, 78507.0, 78508.0, 78509.0, 78510.0],       [78511.0, 78512.0, 78513.0, 78514.0, 78515.0, 78516.0],       [78517.0, 78518.0, 78519.0, 78520.0, 78521.0, 78522.0],       [78523.0, 78524.0, 78525.0, 78526.0, 78527.0, 78528.0],       [78529.0, 78530.0, 78531.0, 78532.0, 78533.0, 78534.0],       [78535.0, 78536.0, 78537.0, 78538.0, 78539.0, 78540.0]],      [[78541.0, 78542.0, 78543.0, 78544.0, 78545.0, 78546.0],       [78547.0, 78548.0, 78549.0, 78550.0, 78551.0, 78552.0],       [78553.0, 78554.0, 78555.0, 78556.0, 78557.0, 78558.0],       [78559.0, 78560.0, 78561.0, 78562.0, 78563.0, 78564.0],       [78565.0, 78566.0, 78567.0, 78568.0, 78569.0, 78570.0],       [78571.0, 78572.0, 78573.0, 78574.0, 78575.0, 78576.0],       [78577.0, 78578.0, 78579.0, 78580.0, 78581.0, 78582.0]],      [[78583.0, 78584.0, 78585.0, 78586.0, 78587.0, 78588.0],       [78589.0, 78590.0, 78591.0, 78592.0, 78593.0, 78594.0],       [78595.0, 78596.0, 78597.0, 78598.0, 78599.0, 78600.0],       [78601.0, 78602.0, 78603.0, 78604.0, 78605.0, 78606.0],       [78607.0, 78608.0, 78609.0, 78610.0, 78611.0, 78612.0],       [78613.0, 78614.0, 78615.0, 78616.0, 78617.0, 78618.0],       [78619.0, 78620.0, 78621.0, 78622.0, 78623.0, 78624.0]]]]],   [[[[[78625.0, 78626.0, 78627.0, 78628.0, 78629.0, 78630.0],       [78631.0, 78632.0, 78633.0, 78634.0, 78635.0, 78636.0],       [78637.0, 78638.0, 78639.0, 78640.0, 78641.0, 78642.0],       [78643.0, 78644.0, 78645.0, 78646.0, 78647.0, 78648.0],       [78649.0, 78650.0, 78651.0, 78652.0, 78653.0, 78654.0],       [78655.0, 78656.0, 78657.0, 78658.0, 78659.0, 78660.0],       [78661.0, 78662.0, 78663.0, 78664.0, 78665.0, 78666.0]],      [[78667.0, 78668.0, 78669.0, 78670.0, 78671.0, 78672.0],       [78673.0, 78674.0, 78675.0, 78676.0, 78677.0, 78678.0],       [78679.0, 78680.0, 78681.0, 78682.0, 78683.0, 78684.0],       [78685.0, 78686.0, 78687.0, 78688.0, 78689.0, 78690.0],       [78691.0, 78692.0, 78693.0, 78694.0, 78695.0, 78696.0],       [78697.0, 78698.0, 78699.0, 78700.0, 78701.0, 78702.0],       [78703.0, 78704.0, 78705.0, 78706.0, 78707.0, 78708.0]],      [[78709.0, 78710.0, 78711.0, 78712.0, 78713.0, 78714.0],       [78715.0, 78716.0, 78717.0, 78718.0, 78719.0, 78720.0],       [78721.0, 78722.0, 78723.0, 78724.0, 78725.0, 78726.0],       [78727.0, 78728.0, 78729.0, 78730.0, 78731.0, 78732.0],       [78733.0, 78734.0, 78735.0, 78736.0, 78737.0, 78738.0],       [78739.0, 78740.0, 78741.0, 78742.0, 78743.0, 78744.0],       [78745.0, 78746.0, 78747.0, 78748.0, 78749.0, 78750.0]],      [[78751.0, 78752.0, 78753.0, 78754.0, 78755.0, 78756.0],       [78757.0, 78758.0, 78759.0, 78760.0, 78761.0, 78762.0],       [78763.0, 78764.0, 78765.0, 78766.0, 78767.0, 78768.0],       [78769.0, 78770.0, 78771.0, 78772.0, 78773.0, 78774.0],       [78775.0, 78776.0, 78777.0, 78778.0, 78779.0, 78780.0],       [78781.0, 78782.0, 78783.0, 78784.0, 78785.0, 78786.0],       [78787.0, 78788.0, 78789.0, 78790.0, 78791.0, 78792.0]],      [[78793.0, 78794.0, 78795.0, 78796.0, 78797.0, 78798.0],       [78799.0, 78800.0, 78801.0, 78802.0, 78803.0, 78804.0],       [78805.0, 78806.0, 78807.0, 78808.0, 78809.0, 78810.0],       [78811.0, 78812.0, 78813.0, 78814.0, 78815.0, 78816.0],       [78817.0, 78818.0, 78819.0, 78820.0, 78821.0, 78822.0],       [78823.0, 78824.0, 78825.0, 78826.0, 78827.0, 78828.0],       [78829.0, 78830.0, 78831.0, 78832.0, 78833.0, 78834.0]],      [[78835.0, 78836.0, 78837.0, 78838.0, 78839.0, 78840.0],       [78841.0, 78842.0, 78843.0, 78844.0, 78845.0, 78846.0],       [78847.0, 78848.0, 78849.0, 78850.0, 78851.0, 78852.0],       [78853.0, 78854.0, 78855.0, 78856.0, 78857.0, 78858.0],       [78859.0, 78860.0, 78861.0, 78862.0, 78863.0, 78864.0],       [78865.0, 78866.0, 78867.0, 78868.0, 78869.0, 78870.0],       [78871.0, 78872.0, 78873.0, 78874.0, 78875.0, 78876.0]]],     [[[78877.0, 78878.0, 78879.0, 78880.0, 78881.0, 78882.0],       [78883.0, 78884.0, 78885.0, 78886.0, 78887.0, 78888.0],       [78889.0, 78890.0, 78891.0, 78892.0, 78893.0, 78894.0],       [78895.0, 78896.0, 78897.0, 78898.0, 78899.0, 78900.0],       [78901.0, 78902.0, 78903.0, 78904.0, 78905.0, 78906.0],       [78907.0, 78908.0, 78909.0, 78910.0, 78911.0, 78912.0],       [78913.0, 78914.0, 78915.0, 78916.0, 78917.0, 78918.0]],      [[78919.0, 78920.0, 78921.0, 78922.0, 78923.0, 78924.0],       [78925.0, 78926.0, 78927.0, 78928.0, 78929.0, 78930.0],       [78931.0, 78932.0, 78933.0, 78934.0, 78935.0, 78936.0],       [78937.0, 78938.0, 78939.0, 78940.0, 78941.0, 78942.0],       [78943.0, 78944.0, 78945.0, 78946.0, 78947.0, 78948.0],       [78949.0, 78950.0, 78951.0, 78952.0, 78953.0, 78954.0],       [78955.0, 78956.0, 78957.0, 78958.0, 78959.0, 78960.0]],      [[78961.0, 78962.0, 78963.0, 78964.0, 78965.0, 78966.0],       [78967.0, 78968.0, 78969.0, 78970.0, 78971.0, 78972.0],       [78973.0, 78974.0, 78975.0, 78976.0, 78977.0, 78978.0],       [78979.0, 78980.0, 78981.0, 78982.0, 78983.0, 78984.0],       [78985.0, 78986.0, 78987.0, 78988.0, 78989.0, 78990.0],       [78991.0, 78992.0, 78993.0, 78994.0, 78995.0, 78996.0],       [78997.0, 78998.0, 78999.0, 79000.0, 79001.0, 79002.0]],      [[79003.0, 79004.0, 79005.0, 79006.0, 79007.0, 79008.0],       [79009.0, 79010.0, 79011.0, 79012.0, 79013.0, 79014.0],       [79015.0, 79016.0, 79017.0, 79018.0, 79019.0, 79020.0],       [79021.0, 79022.0, 79023.0, 79024.0, 79025.0, 79026.0],       [79027.0, 79028.0, 79029.0, 79030.0, 79031.0, 79032.0],       [79033.0, 79034.0, 79035.0, 79036.0, 79037.0, 79038.0],       [79039.0, 79040.0, 79041.0, 79042.0, 79043.0, 79044.0]],      [[79045.0, 79046.0, 79047.0, 79048.0, 79049.0, 79050.0],       [79051.0, 79052.0, 79053.0, 79054.0, 79055.0, 79056.0],       [79057.0, 79058.0, 79059.0, 79060.0, 79061.0, 79062.0],       [79063.0, 79064.0, 79065.0, 79066.0, 79067.0, 79068.0],       [79069.0, 79070.0, 79071.0, 79072.0, 79073.0, 79074.0],       [79075.0, 79076.0, 79077.0, 79078.0, 79079.0, 79080.0],       [79081.0, 79082.0, 79083.0, 79084.0, 79085.0, 79086.0]],      [[79087.0, 79088.0, 79089.0, 79090.0, 79091.0, 79092.0],       [79093.0, 79094.0, 79095.0, 79096.0, 79097.0, 79098.0],       [79099.0, 79100.0, 79101.0, 79102.0, 79103.0, 79104.0],       [79105.0, 79106.0, 79107.0, 79108.0, 79109.0, 79110.0],       [79111.0, 79112.0, 79113.0, 79114.0, 79115.0, 79116.0],       [79117.0, 79118.0, 79119.0, 79120.0, 79121.0, 79122.0],       [79123.0, 79124.0, 79125.0, 79126.0, 79127.0, 79128.0]]],     [[[79129.0, 79130.0, 79131.0, 79132.0, 79133.0, 79134.0],       [79135.0, 79136.0, 79137.0, 79138.0, 79139.0, 79140.0],       [79141.0, 79142.0, 79143.0, 79144.0, 79145.0, 79146.0],       [79147.0, 79148.0, 79149.0, 79150.0, 79151.0, 79152.0],       [79153.0, 79154.0, 79155.0, 79156.0, 79157.0, 79158.0],       [79159.0, 79160.0, 79161.0, 79162.0, 79163.0, 79164.0],       [79165.0, 79166.0, 79167.0, 79168.0, 79169.0, 79170.0]],      [[79171.0, 79172.0, 79173.0, 79174.0, 79175.0, 79176.0],       [79177.0, 79178.0, 79179.0, 79180.0, 79181.0, 79182.0],       [79183.0, 79184.0, 79185.0, 79186.0, 79187.0, 79188.0],       [79189.0, 79190.0, 79191.0, 79192.0, 79193.0, 79194.0],       [79195.0, 79196.0, 79197.0, 79198.0, 79199.0, 79200.0],       [79201.0, 79202.0, 79203.0, 79204.0, 79205.0, 79206.0],       [79207.0, 79208.0, 79209.0, 79210.0, 79211.0, 79212.0]],      [[79213.0, 79214.0, 79215.0, 79216.0, 79217.0, 79218.0],       [79219.0, 79220.0, 79221.0, 79222.0, 79223.0, 79224.0],       [79225.0, 79226.0, 79227.0, 79228.0, 79229.0, 79230.0],       [79231.0, 79232.0, 79233.0, 79234.0, 79235.0, 79236.0],       [79237.0, 79238.0, 79239.0, 79240.0, 79241.0, 79242.0],       [79243.0, 79244.0, 79245.0, 79246.0, 79247.0, 79248.0],       [79249.0, 79250.0, 79251.0, 79252.0, 79253.0, 79254.0]],      [[79255.0, 79256.0, 79257.0, 79258.0, 79259.0, 79260.0],       [79261.0, 79262.0, 79263.0, 79264.0, 79265.0, 79266.0],       [79267.0, 79268.0, 79269.0, 79270.0, 79271.0, 79272.0],       [79273.0, 79274.0, 79275.0, 79276.0, 79277.0, 79278.0],       [79279.0, 79280.0, 79281.0, 79282.0, 79283.0, 79284.0],       [79285.0, 79286.0, 79287.0, 79288.0, 79289.0, 79290.0],       [79291.0, 79292.0, 79293.0, 79294.0, 79295.0, 79296.0]],      [[79297.0, 79298.0, 79299.0, 79300.0, 79301.0, 79302.0],       [79303.0, 79304.0, 79305.0, 79306.0, 79307.0, 79308.0],       [79309.0, 79310.0, 79311.0, 79312.0, 79313.0, 79314.0],       [79315.0, 79316.0, 79317.0, 79318.0, 79319.0, 79320.0],       [79321.0, 79322.0, 79323.0, 79324.0, 79325.0, 79326.0],       [79327.0, 79328.0, 79329.0, 79330.0, 79331.0, 79332.0],       [79333.0, 79334.0, 79335.0, 79336.0, 79337.0, 79338.0]],      [[79339.0, 79340.0, 79341.0, 79342.0, 79343.0, 79344.0],       [79345.0, 79346.0, 79347.0, 79348.0, 79349.0, 79350.0],       [79351.0, 79352.0, 79353.0, 79354.0, 79355.0, 79356.0],       [79357.0, 79358.0, 79359.0, 79360.0, 79361.0, 79362.0],       [79363.0, 79364.0, 79365.0, 79366.0, 79367.0, 79368.0],       [79369.0, 79370.0, 79371.0, 79372.0, 79373.0, 79374.0],       [79375.0, 79376.0, 79377.0, 79378.0, 79379.0, 79380.0]]],     [[[79381.0, 79382.0, 79383.0, 79384.0, 79385.0, 79386.0],       [79387.0, 79388.0, 79389.0, 79390.0, 79391.0, 79392.0],       [79393.0, 79394.0, 79395.0, 79396.0, 79397.0, 79398.0],       [79399.0, 79400.0, 79401.0, 79402.0, 79403.0, 79404.0],       [79405.0, 79406.0, 79407.0, 79408.0, 79409.0, 79410.0],       [79411.0, 79412.0, 79413.0, 79414.0, 79415.0, 79416.0],       [79417.0, 79418.0, 79419.0, 79420.0, 79421.0, 79422.0]],      [[79423.0, 79424.0, 79425.0, 79426.0, 79427.0, 79428.0],       [79429.0, 79430.0, 79431.0, 79432.0, 79433.0, 79434.0],       [79435.0, 79436.0, 79437.0, 79438.0, 79439.0, 79440.0],       [79441.0, 79442.0, 79443.0, 79444.0, 79445.0, 79446.0],       [79447.0, 79448.0, 79449.0, 79450.0, 79451.0, 79452.0],       [79453.0, 79454.0, 79455.0, 79456.0, 79457.0, 79458.0],       [79459.0, 79460.0, 79461.0, 79462.0, 79463.0, 79464.0]],      [[79465.0, 79466.0, 79467.0, 79468.0, 79469.0, 79470.0],       [79471.0, 79472.0, 79473.0, 79474.0, 79475.0, 79476.0],       [79477.0, 79478.0, 79479.0, 79480.0, 79481.0, 79482.0],       [79483.0, 79484.0, 79485.0, 79486.0, 79487.0, 79488.0],       [79489.0, 79490.0, 79491.0, 79492.0, 79493.0, 79494.0],       [79495.0, 79496.0, 79497.0, 79498.0, 79499.0, 79500.0],       [79501.0, 79502.0, 79503.0, 79504.0, 79505.0, 79506.0]],      [[79507.0, 79508.0, 79509.0, 79510.0, 79511.0, 79512.0],       [79513.0, 79514.0, 79515.0, 79516.0, 79517.0, 79518.0],       [79519.0, 79520.0, 79521.0, 79522.0, 79523.0, 79524.0],       [79525.0, 79526.0, 79527.0, 79528.0, 79529.0, 79530.0],       [79531.0, 79532.0, 79533.0, 79534.0, 79535.0, 79536.0],       [79537.0, 79538.0, 79539.0, 79540.0, 79541.0, 79542.0],       [79543.0, 79544.0, 79545.0, 79546.0, 79547.0, 79548.0]],      [[79549.0, 79550.0, 79551.0, 79552.0, 79553.0, 79554.0],       [79555.0, 79556.0, 79557.0, 79558.0, 79559.0, 79560.0],       [79561.0, 79562.0, 79563.0, 79564.0, 79565.0, 79566.0],       [79567.0, 79568.0, 79569.0, 79570.0, 79571.0, 79572.0],       [79573.0, 79574.0, 79575.0, 79576.0, 79577.0, 79578.0],       [79579.0, 79580.0, 79581.0, 79582.0, 79583.0, 79584.0],       [79585.0, 79586.0, 79587.0, 79588.0, 79589.0, 79590.0]],      [[79591.0, 79592.0, 79593.0, 79594.0, 79595.0, 79596.0],       [79597.0, 79598.0, 79599.0, 79600.0, 79601.0, 79602.0],       [79603.0, 79604.0, 79605.0, 79606.0, 79607.0, 79608.0],       [79609.0, 79610.0, 79611.0, 79612.0, 79613.0, 79614.0],       [79615.0, 79616.0, 79617.0, 79618.0, 79619.0, 79620.0],       [79621.0, 79622.0, 79623.0, 79624.0, 79625.0, 79626.0],       [79627.0, 79628.0, 79629.0, 79630.0, 79631.0, 79632.0]]]],    [[[[79633.0, 79634.0, 79635.0, 79636.0, 79637.0, 79638.0],       [79639.0, 79640.0, 79641.0, 79642.0, 79643.0, 79644.0],       [79645.0, 79646.0, 79647.0, 79648.0, 79649.0, 79650.0],       [79651.0, 79652.0, 79653.0, 79654.0, 79655.0, 79656.0],       [79657.0, 79658.0, 79659.0, 79660.0, 79661.0, 79662.0],       [79663.0, 79664.0, 79665.0, 79666.0, 79667.0, 79668.0],       [79669.0, 79670.0, 79671.0, 79672.0, 79673.0, 79674.0]],      [[79675.0, 79676.0, 79677.0, 79678.0, 79679.0, 79680.0],       [79681.0, 79682.0, 79683.0, 79684.0, 79685.0, 79686.0],       [79687.0, 79688.0, 79689.0, 79690.0, 79691.0, 79692.0],       [79693.0, 79694.0, 79695.0, 79696.0, 79697.0, 79698.0],       [79699.0, 79700.0, 79701.0, 79702.0, 79703.0, 79704.0],       [79705.0, 79706.0, 79707.0, 79708.0, 79709.0, 79710.0],       [79711.0, 79712.0, 79713.0, 79714.0, 79715.0, 79716.0]],      [[79717.0, 79718.0, 79719.0, 79720.0, 79721.0, 79722.0],       [79723.0, 79724.0, 79725.0, 79726.0, 79727.0, 79728.0],       [79729.0, 79730.0, 79731.0, 79732.0, 79733.0, 79734.0],       [79735.0, 79736.0, 79737.0, 79738.0, 79739.0, 79740.0],       [79741.0, 79742.0, 79743.0, 79744.0, 79745.0, 79746.0],       [79747.0, 79748.0, 79749.0, 79750.0, 79751.0, 79752.0],       [79753.0, 79754.0, 79755.0, 79756.0, 79757.0, 79758.0]],      [[79759.0, 79760.0, 79761.0, 79762.0, 79763.0, 79764.0],       [79765.0, 79766.0, 79767.0, 79768.0, 79769.0, 79770.0],       [79771.0, 79772.0, 79773.0, 79774.0, 79775.0, 79776.0],       [79777.0, 79778.0, 79779.0, 79780.0, 79781.0, 79782.0],       [79783.0, 79784.0, 79785.0, 79786.0, 79787.0, 79788.0],       [79789.0, 79790.0, 79791.0, 79792.0, 79793.0, 79794.0],       [79795.0, 79796.0, 79797.0, 79798.0, 79799.0, 79800.0]],      [[79801.0, 79802.0, 79803.0, 79804.0, 79805.0, 79806.0],       [79807.0, 79808.0, 79809.0, 79810.0, 79811.0, 79812.0],       [79813.0, 79814.0, 79815.0, 79816.0, 79817.0, 79818.0],       [79819.0, 79820.0, 79821.0, 79822.0, 79823.0, 79824.0],       [79825.0, 79826.0, 79827.0, 79828.0, 79829.0, 79830.0],       [79831.0, 79832.0, 79833.0, 79834.0, 79835.0, 79836.0],       [79837.0, 79838.0, 79839.0, 79840.0, 79841.0, 79842.0]],      [[79843.0, 79844.0, 79845.0, 79846.0, 79847.0, 79848.0],       [79849.0, 79850.0, 79851.0, 79852.0, 79853.0, 79854.0],       [79855.0, 79856.0, 79857.0, 79858.0, 79859.0, 79860.0],       [79861.0, 79862.0, 79863.0, 79864.0, 79865.0, 79866.0],       [79867.0, 79868.0, 79869.0, 79870.0, 79871.0, 79872.0],       [79873.0, 79874.0, 79875.0, 79876.0, 79877.0, 79878.0],       [79879.0, 79880.0, 79881.0, 79882.0, 79883.0, 79884.0]]],     [[[79885.0, 79886.0, 79887.0, 79888.0, 79889.0, 79890.0],       [79891.0, 79892.0, 79893.0, 79894.0, 79895.0, 79896.0],       [79897.0, 79898.0, 79899.0, 79900.0, 79901.0, 79902.0],       [79903.0, 79904.0, 79905.0, 79906.0, 79907.0, 79908.0],       [79909.0, 79910.0, 79911.0, 79912.0, 79913.0, 79914.0],       [79915.0, 79916.0, 79917.0, 79918.0, 79919.0, 79920.0],       [79921.0, 79922.0, 79923.0, 79924.0, 79925.0, 79926.0]],      [[79927.0, 79928.0, 79929.0, 79930.0, 79931.0, 79932.0],       [79933.0, 79934.0, 79935.0, 79936.0, 79937.0, 79938.0],       [79939.0, 79940.0, 79941.0, 79942.0, 79943.0, 79944.0],       [79945.0, 79946.0, 79947.0, 79948.0, 79949.0, 79950.0],       [79951.0, 79952.0, 79953.0, 79954.0, 79955.0, 79956.0],       [79957.0, 79958.0, 79959.0, 79960.0, 79961.0, 79962.0],       [79963.0, 79964.0, 79965.0, 79966.0, 79967.0, 79968.0]],      [[79969.0, 79970.0, 79971.0, 79972.0, 79973.0, 79974.0],       [79975.0, 79976.0, 79977.0, 79978.0, 79979.0, 79980.0],       [79981.0, 79982.0, 79983.0, 79984.0, 79985.0, 79986.0],       [79987.0, 79988.0, 79989.0, 79990.0, 79991.0, 79992.0],       [79993.0, 79994.0, 79995.0, 79996.0, 79997.0, 79998.0],       [79999.0, 80000.0, 80001.0, 80002.0, 80003.0, 80004.0],       [80005.0, 80006.0, 80007.0, 80008.0, 80009.0, 80010.0]],      [[80011.0, 80012.0, 80013.0, 80014.0, 80015.0, 80016.0],       [80017.0, 80018.0, 80019.0, 80020.0, 80021.0, 80022.0],       [80023.0, 80024.0, 80025.0, 80026.0, 80027.0, 80028.0],       [80029.0, 80030.0, 80031.0, 80032.0, 80033.0, 80034.0],       [80035.0, 80036.0, 80037.0, 80038.0, 80039.0, 80040.0],       [80041.0, 80042.0, 80043.0, 80044.0, 80045.0, 80046.0],       [80047.0, 80048.0, 80049.0, 80050.0, 80051.0, 80052.0]],      [[80053.0, 80054.0, 80055.0, 80056.0, 80057.0, 80058.0],       [80059.0, 80060.0, 80061.0, 80062.0, 80063.0, 80064.0],       [80065.0, 80066.0, 80067.0, 80068.0, 80069.0, 80070.0],       [80071.0, 80072.0, 80073.0, 80074.0, 80075.0, 80076.0],       [80077.0, 80078.0, 80079.0, 80080.0, 80081.0, 80082.0],       [80083.0, 80084.0, 80085.0, 80086.0, 80087.0, 80088.0],       [80089.0, 80090.0, 80091.0, 80092.0, 80093.0, 80094.0]],      [[80095.0, 80096.0, 80097.0, 80098.0, 80099.0, 80100.0],       [80101.0, 80102.0, 80103.0, 80104.0, 80105.0, 80106.0],       [80107.0, 80108.0, 80109.0, 80110.0, 80111.0, 80112.0],       [80113.0, 80114.0, 80115.0, 80116.0, 80117.0, 80118.0],       [80119.0, 80120.0, 80121.0, 80122.0, 80123.0, 80124.0],       [80125.0, 80126.0, 80127.0, 80128.0, 80129.0, 80130.0],       [80131.0, 80132.0, 80133.0, 80134.0, 80135.0, 80136.0]]],     [[[80137.0, 80138.0, 80139.0, 80140.0, 80141.0, 80142.0],       [80143.0, 80144.0, 80145.0, 80146.0, 80147.0, 80148.0],       [80149.0, 80150.0, 80151.0, 80152.0, 80153.0, 80154.0],       [80155.0, 80156.0, 80157.0, 80158.0, 80159.0, 80160.0],       [80161.0, 80162.0, 80163.0, 80164.0, 80165.0, 80166.0],       [80167.0, 80168.0, 80169.0, 80170.0, 80171.0, 80172.0],       [80173.0, 80174.0, 80175.0, 80176.0, 80177.0, 80178.0]],      [[80179.0, 80180.0, 80181.0, 80182.0, 80183.0, 80184.0],       [80185.0, 80186.0, 80187.0, 80188.0, 80189.0, 80190.0],       [80191.0, 80192.0, 80193.0, 80194.0, 80195.0, 80196.0],       [80197.0, 80198.0, 80199.0, 80200.0, 80201.0, 80202.0],       [80203.0, 80204.0, 80205.0, 80206.0, 80207.0, 80208.0],       [80209.0, 80210.0, 80211.0, 80212.0, 80213.0, 80214.0],       [80215.0, 80216.0, 80217.0, 80218.0, 80219.0, 80220.0]],      [[80221.0, 80222.0, 80223.0, 80224.0, 80225.0, 80226.0],       [80227.0, 80228.0, 80229.0, 80230.0, 80231.0, 80232.0],       [80233.0, 80234.0, 80235.0, 80236.0, 80237.0, 80238.0],       [80239.0, 80240.0, 80241.0, 80242.0, 80243.0, 80244.0],       [80245.0, 80246.0, 80247.0, 80248.0, 80249.0, 80250.0],       [80251.0, 80252.0, 80253.0, 80254.0, 80255.0, 80256.0],       [80257.0, 80258.0, 80259.0, 80260.0, 80261.0, 80262.0]],      [[80263.0, 80264.0, 80265.0, 80266.0, 80267.0, 80268.0],       [80269.0, 80270.0, 80271.0, 80272.0, 80273.0, 80274.0],       [80275.0, 80276.0, 80277.0, 80278.0, 80279.0, 80280.0],       [80281.0, 80282.0, 80283.0, 80284.0, 80285.0, 80286.0],       [80287.0, 80288.0, 80289.0, 80290.0, 80291.0, 80292.0],       [80293.0, 80294.0, 80295.0, 80296.0, 80297.0, 80298.0],       [80299.0, 80300.0, 80301.0, 80302.0, 80303.0, 80304.0]],      [[80305.0, 80306.0, 80307.0, 80308.0, 80309.0, 80310.0],       [80311.0, 80312.0, 80313.0, 80314.0, 80315.0, 80316.0],       [80317.0, 80318.0, 80319.0, 80320.0, 80321.0, 80322.0],       [80323.0, 80324.0, 80325.0, 80326.0, 80327.0, 80328.0],       [80329.0, 80330.0, 80331.0, 80332.0, 80333.0, 80334.0],       [80335.0, 80336.0, 80337.0, 80338.0, 80339.0, 80340.0],       [80341.0, 80342.0, 80343.0, 80344.0, 80345.0, 80346.0]],      [[80347.0, 80348.0, 80349.0, 80350.0, 80351.0, 80352.0],       [80353.0, 80354.0, 80355.0, 80356.0, 80357.0, 80358.0],       [80359.0, 80360.0, 80361.0, 80362.0, 80363.0, 80364.0],       [80365.0, 80366.0, 80367.0, 80368.0, 80369.0, 80370.0],       [80371.0, 80372.0, 80373.0, 80374.0, 80375.0, 80376.0],       [80377.0, 80378.0, 80379.0, 80380.0, 80381.0, 80382.0],       [80383.0, 80384.0, 80385.0, 80386.0, 80387.0, 80388.0]]],     [[[80389.0, 80390.0, 80391.0, 80392.0, 80393.0, 80394.0],       [80395.0, 80396.0, 80397.0, 80398.0, 80399.0, 80400.0],       [80401.0, 80402.0, 80403.0, 80404.0, 80405.0, 80406.0],       [80407.0, 80408.0, 80409.0, 80410.0, 80411.0, 80412.0],       [80413.0, 80414.0, 80415.0, 80416.0, 80417.0, 80418.0],       [80419.0, 80420.0, 80421.0, 80422.0, 80423.0, 80424.0],       [80425.0, 80426.0, 80427.0, 80428.0, 80429.0, 80430.0]],      [[80431.0, 80432.0, 80433.0, 80434.0, 80435.0, 80436.0],       [80437.0, 80438.0, 80439.0, 80440.0, 80441.0, 80442.0],       [80443.0, 80444.0, 80445.0, 80446.0, 80447.0, 80448.0],       [80449.0, 80450.0, 80451.0, 80452.0, 80453.0, 80454.0],       [80455.0, 80456.0, 80457.0, 80458.0, 80459.0, 80460.0],       [80461.0, 80462.0, 80463.0, 80464.0, 80465.0, 80466.0],       [80467.0, 80468.0, 80469.0, 80470.0, 80471.0, 80472.0]],      [[80473.0, 80474.0, 80475.0, 80476.0, 80477.0, 80478.0],       [80479.0, 80480.0, 80481.0, 80482.0, 80483.0, 80484.0],       [80485.0, 80486.0, 80487.0, 80488.0, 80489.0, 80490.0],       [80491.0, 80492.0, 80493.0, 80494.0, 80495.0, 80496.0],       [80497.0, 80498.0, 80499.0, 80500.0, 80501.0, 80502.0],       [80503.0, 80504.0, 80505.0, 80506.0, 80507.0, 80508.0],       [80509.0, 80510.0, 80511.0, 80512.0, 80513.0, 80514.0]],      [[80515.0, 80516.0, 80517.0, 80518.0, 80519.0, 80520.0],       [80521.0, 80522.0, 80523.0, 80524.0, 80525.0, 80526.0],       [80527.0, 80528.0, 80529.0, 80530.0, 80531.0, 80532.0],       [80533.0, 80534.0, 80535.0, 80536.0, 80537.0, 80538.0],       [80539.0, 80540.0, 80541.0, 80542.0, 80543.0, 80544.0],       [80545.0, 80546.0, 80547.0, 80548.0, 80549.0, 80550.0],       [80551.0, 80552.0, 80553.0, 80554.0, 80555.0, 80556.0]],      [[80557.0, 80558.0, 80559.0, 80560.0, 80561.0, 80562.0],       [80563.0, 80564.0, 80565.0, 80566.0, 80567.0, 80568.0],       [80569.0, 80570.0, 80571.0, 80572.0, 80573.0, 80574.0],       [80575.0, 80576.0, 80577.0, 80578.0, 80579.0, 80580.0],       [80581.0, 80582.0, 80583.0, 80584.0, 80585.0, 80586.0],       [80587.0, 80588.0, 80589.0, 80590.0, 80591.0, 80592.0],       [80593.0, 80594.0, 80595.0, 80596.0, 80597.0, 80598.0]],      [[80599.0, 80600.0, 80601.0, 80602.0, 80603.0, 80604.0],       [80605.0, 80606.0, 80607.0, 80608.0, 80609.0, 80610.0],       [80611.0, 80612.0, 80613.0, 80614.0, 80615.0, 80616.0],       [80617.0, 80618.0, 80619.0, 80620.0, 80621.0, 80622.0],       [80623.0, 80624.0, 80625.0, 80626.0, 80627.0, 80628.0],       [80629.0, 80630.0, 80631.0, 80632.0, 80633.0, 80634.0],       [80635.0, 80636.0, 80637.0, 80638.0, 80639.0, 80640.0]]]],    [[[[80641.0, 80642.0, 80643.0, 80644.0, 80645.0, 80646.0],       [80647.0, 80648.0, 80649.0, 80650.0, 80651.0, 80652.0],       [80653.0, 80654.0, 80655.0, 80656.0, 80657.0, 80658.0],       [80659.0, 80660.0, 80661.0, 80662.0, 80663.0, 80664.0],       [80665.0, 80666.0, 80667.0, 80668.0, 80669.0, 80670.0],       [80671.0, 80672.0, 80673.0, 80674.0, 80675.0, 80676.0],       [80677.0, 80678.0, 80679.0, 80680.0, 80681.0, 80682.0]],      [[80683.0, 80684.0, 80685.0, 80686.0, 80687.0, 80688.0],       [80689.0, 80690.0, 80691.0, 80692.0, 80693.0, 80694.0],       [80695.0, 80696.0, 80697.0, 80698.0, 80699.0, 80700.0],       [80701.0, 80702.0, 80703.0, 80704.0, 80705.0, 80706.0],       [80707.0, 80708.0, 80709.0, 80710.0, 80711.0, 80712.0],       [80713.0, 80714.0, 80715.0, 80716.0, 80717.0, 80718.0],       [80719.0, 80720.0, 80721.0, 80722.0, 80723.0, 80724.0]],      [[80725.0, 80726.0, 80727.0, 80728.0, 80729.0, 80730.0],       [80731.0, 80732.0, 80733.0, 80734.0, 80735.0, 80736.0],       [80737.0, 80738.0, 80739.0, 80740.0, 80741.0, 80742.0],       [80743.0, 80744.0, 80745.0, 80746.0, 80747.0, 80748.0],       [80749.0, 80750.0, 80751.0, 80752.0, 80753.0, 80754.0],       [80755.0, 80756.0, 80757.0, 80758.0, 80759.0, 80760.0],       [80761.0, 80762.0, 80763.0, 80764.0, 80765.0, 80766.0]],      [[80767.0, 80768.0, 80769.0, 80770.0, 80771.0, 80772.0],       [80773.0, 80774.0, 80775.0, 80776.0, 80777.0, 80778.0],       [80779.0, 80780.0, 80781.0, 80782.0, 80783.0, 80784.0],       [80785.0, 80786.0, 80787.0, 80788.0, 80789.0, 80790.0],       [80791.0, 80792.0, 80793.0, 80794.0, 80795.0, 80796.0],       [80797.0, 80798.0, 80799.0, 80800.0, 80801.0, 80802.0],       [80803.0, 80804.0, 80805.0, 80806.0, 80807.0, 80808.0]],      [[80809.0, 80810.0, 80811.0, 80812.0, 80813.0, 80814.0],       [80815.0, 80816.0, 80817.0, 80818.0, 80819.0, 80820.0],       [80821.0, 80822.0, 80823.0, 80824.0, 80825.0, 80826.0],       [80827.0, 80828.0, 80829.0, 80830.0, 80831.0, 80832.0],       [80833.0, 80834.0, 80835.0, 80836.0, 80837.0, 80838.0],       [80839.0, 80840.0, 80841.0, 80842.0, 80843.0, 80844.0],       [80845.0, 80846.0, 80847.0, 80848.0, 80849.0, 80850.0]],      [[80851.0, 80852.0, 80853.0, 80854.0, 80855.0, 80856.0],       [80857.0, 80858.0, 80859.0, 80860.0, 80861.0, 80862.0],       [80863.0, 80864.0, 80865.0, 80866.0, 80867.0, 80868.0],       [80869.0, 80870.0, 80871.0, 80872.0, 80873.0, 80874.0],       [80875.0, 80876.0, 80877.0, 80878.0, 80879.0, 80880.0],       [80881.0, 80882.0, 80883.0, 80884.0, 80885.0, 80886.0],       [80887.0, 80888.0, 80889.0, 80890.0, 80891.0, 80892.0]]],     [[[80893.0, 80894.0, 80895.0, 80896.0, 80897.0, 80898.0],       [80899.0, 80900.0, 80901.0, 80902.0, 80903.0, 80904.0],       [80905.0, 80906.0, 80907.0, 80908.0, 80909.0, 80910.0],       [80911.0, 80912.0, 80913.0, 80914.0, 80915.0, 80916.0],       [80917.0, 80918.0, 80919.0, 80920.0, 80921.0, 80922.0],       [80923.0, 80924.0, 80925.0, 80926.0, 80927.0, 80928.0],       [80929.0, 80930.0, 80931.0, 80932.0, 80933.0, 80934.0]],      [[80935.0, 80936.0, 80937.0, 80938.0, 80939.0, 80940.0],       [80941.0, 80942.0, 80943.0, 80944.0, 80945.0, 80946.0],       [80947.0, 80948.0, 80949.0, 80950.0, 80951.0, 80952.0],       [80953.0, 80954.0, 80955.0, 80956.0, 80957.0, 80958.0],       [80959.0, 80960.0, 80961.0, 80962.0, 80963.0, 80964.0],       [80965.0, 80966.0, 80967.0, 80968.0, 80969.0, 80970.0],       [80971.0, 80972.0, 80973.0, 80974.0, 80975.0, 80976.0]],      [[80977.0, 80978.0, 80979.0, 80980.0, 80981.0, 80982.0],       [80983.0, 80984.0, 80985.0, 80986.0, 80987.0, 80988.0],       [80989.0, 80990.0, 80991.0, 80992.0, 80993.0, 80994.0],       [80995.0, 80996.0, 80997.0, 80998.0, 80999.0, 81000.0],       [81001.0, 81002.0, 81003.0, 81004.0, 81005.0, 81006.0],       [81007.0, 81008.0, 81009.0, 81010.0, 81011.0, 81012.0],       [81013.0, 81014.0, 81015.0, 81016.0, 81017.0, 81018.0]],      [[81019.0, 81020.0, 81021.0, 81022.0, 81023.0, 81024.0],       [81025.0, 81026.0, 81027.0, 81028.0, 81029.0, 81030.0],       [81031.0, 81032.0, 81033.0, 81034.0, 81035.0, 81036.0],       [81037.0, 81038.0, 81039.0, 81040.0, 81041.0, 81042.0],       [81043.0, 81044.0, 81045.0, 81046.0, 81047.0, 81048.0],       [81049.0, 81050.0, 81051.0, 81052.0, 81053.0, 81054.0],       [81055.0, 81056.0, 81057.0, 81058.0, 81059.0, 81060.0]],      [[81061.0, 81062.0, 81063.0, 81064.0, 81065.0, 81066.0],       [81067.0, 81068.0, 81069.0, 81070.0, 81071.0, 81072.0],       [81073.0, 81074.0, 81075.0, 81076.0, 81077.0, 81078.0],       [81079.0, 81080.0, 81081.0, 81082.0, 81083.0, 81084.0],       [81085.0, 81086.0, 81087.0, 81088.0, 81089.0, 81090.0],       [81091.0, 81092.0, 81093.0, 81094.0, 81095.0, 81096.0],       [81097.0, 81098.0, 81099.0, 81100.0, 81101.0, 81102.0]],      [[81103.0, 81104.0, 81105.0, 81106.0, 81107.0, 81108.0],       [81109.0, 81110.0, 81111.0, 81112.0, 81113.0, 81114.0],       [81115.0, 81116.0, 81117.0, 81118.0, 81119.0, 81120.0],       [81121.0, 81122.0, 81123.0, 81124.0, 81125.0, 81126.0],       [81127.0, 81128.0, 81129.0, 81130.0, 81131.0, 81132.0],       [81133.0, 81134.0, 81135.0, 81136.0, 81137.0, 81138.0],       [81139.0, 81140.0, 81141.0, 81142.0, 81143.0, 81144.0]]],     [[[81145.0, 81146.0, 81147.0, 81148.0, 81149.0, 81150.0],       [81151.0, 81152.0, 81153.0, 81154.0, 81155.0, 81156.0],       [81157.0, 81158.0, 81159.0, 81160.0, 81161.0, 81162.0],       [81163.0, 81164.0, 81165.0, 81166.0, 81167.0, 81168.0],       [81169.0, 81170.0, 81171.0, 81172.0, 81173.0, 81174.0],       [81175.0, 81176.0, 81177.0, 81178.0, 81179.0, 81180.0],       [81181.0, 81182.0, 81183.0, 81184.0, 81185.0, 81186.0]],      [[81187.0, 81188.0, 81189.0, 81190.0, 81191.0, 81192.0],       [81193.0, 81194.0, 81195.0, 81196.0, 81197.0, 81198.0],       [81199.0, 81200.0, 81201.0, 81202.0, 81203.0, 81204.0],       [81205.0, 81206.0, 81207.0, 81208.0, 81209.0, 81210.0],       [81211.0, 81212.0, 81213.0, 81214.0, 81215.0, 81216.0],       [81217.0, 81218.0, 81219.0, 81220.0, 81221.0, 81222.0],       [81223.0, 81224.0, 81225.0, 81226.0, 81227.0, 81228.0]],      [[81229.0, 81230.0, 81231.0, 81232.0, 81233.0, 81234.0],       [81235.0, 81236.0, 81237.0, 81238.0, 81239.0, 81240.0],       [81241.0, 81242.0, 81243.0, 81244.0, 81245.0, 81246.0],       [81247.0, 81248.0, 81249.0, 81250.0, 81251.0, 81252.0],       [81253.0, 81254.0, 81255.0, 81256.0, 81257.0, 81258.0],       [81259.0, 81260.0, 81261.0, 81262.0, 81263.0, 81264.0],       [81265.0, 81266.0, 81267.0, 81268.0, 81269.0, 81270.0]],      [[81271.0, 81272.0, 81273.0, 81274.0, 81275.0, 81276.0],       [81277.0, 81278.0, 81279.0, 81280.0, 81281.0, 81282.0],       [81283.0, 81284.0, 81285.0, 81286.0, 81287.0, 81288.0],       [81289.0, 81290.0, 81291.0, 81292.0, 81293.0, 81294.0],       [81295.0, 81296.0, 81297.0, 81298.0, 81299.0, 81300.0],       [81301.0, 81302.0, 81303.0, 81304.0, 81305.0, 81306.0],       [81307.0, 81308.0, 81309.0, 81310.0, 81311.0, 81312.0]],      [[81313.0, 81314.0, 81315.0, 81316.0, 81317.0, 81318.0],       [81319.0, 81320.0, 81321.0, 81322.0, 81323.0, 81324.0],       [81325.0, 81326.0, 81327.0, 81328.0, 81329.0, 81330.0],       [81331.0, 81332.0, 81333.0, 81334.0, 81335.0, 81336.0],       [81337.0, 81338.0, 81339.0, 81340.0, 81341.0, 81342.0],       [81343.0, 81344.0, 81345.0, 81346.0, 81347.0, 81348.0],       [81349.0, 81350.0, 81351.0, 81352.0, 81353.0, 81354.0]],      [[81355.0, 81356.0, 81357.0, 81358.0, 81359.0, 81360.0],       [81361.0, 81362.0, 81363.0, 81364.0, 81365.0, 81366.0],       [81367.0, 81368.0, 81369.0, 81370.0, 81371.0, 81372.0],       [81373.0, 81374.0, 81375.0, 81376.0, 81377.0, 81378.0],       [81379.0, 81380.0, 81381.0, 81382.0, 81383.0, 81384.0],       [81385.0, 81386.0, 81387.0, 81388.0, 81389.0, 81390.0],       [81391.0, 81392.0, 81393.0, 81394.0, 81395.0, 81396.0]]],     [[[81397.0, 81398.0, 81399.0, 81400.0, 81401.0, 81402.0],       [81403.0, 81404.0, 81405.0, 81406.0, 81407.0, 81408.0],       [81409.0, 81410.0, 81411.0, 81412.0, 81413.0, 81414.0],       [81415.0, 81416.0, 81417.0, 81418.0, 81419.0, 81420.0],       [81421.0, 81422.0, 81423.0, 81424.0, 81425.0, 81426.0],       [81427.0, 81428.0, 81429.0, 81430.0, 81431.0, 81432.0],       [81433.0, 81434.0, 81435.0, 81436.0, 81437.0, 81438.0]],      [[81439.0, 81440.0, 81441.0, 81442.0, 81443.0, 81444.0],       [81445.0, 81446.0, 81447.0, 81448.0, 81449.0, 81450.0],       [81451.0, 81452.0, 81453.0, 81454.0, 81455.0, 81456.0],       [81457.0, 81458.0, 81459.0, 81460.0, 81461.0, 81462.0],       [81463.0, 81464.0, 81465.0, 81466.0, 81467.0, 81468.0],       [81469.0, 81470.0, 81471.0, 81472.0, 81473.0, 81474.0],       [81475.0, 81476.0, 81477.0, 81478.0, 81479.0, 81480.0]],      [[81481.0, 81482.0, 81483.0, 81484.0, 81485.0, 81486.0],       [81487.0, 81488.0, 81489.0, 81490.0, 81491.0, 81492.0],       [81493.0, 81494.0, 81495.0, 81496.0, 81497.0, 81498.0],       [81499.0, 81500.0, 81501.0, 81502.0, 81503.0, 81504.0],       [81505.0, 81506.0, 81507.0, 81508.0, 81509.0, 81510.0],       [81511.0, 81512.0, 81513.0, 81514.0, 81515.0, 81516.0],       [81517.0, 81518.0, 81519.0, 81520.0, 81521.0, 81522.0]],      [[81523.0, 81524.0, 81525.0, 81526.0, 81527.0, 81528.0],       [81529.0, 81530.0, 81531.0, 81532.0, 81533.0, 81534.0],       [81535.0, 81536.0, 81537.0, 81538.0, 81539.0, 81540.0],       [81541.0, 81542.0, 81543.0, 81544.0, 81545.0, 81546.0],       [81547.0, 81548.0, 81549.0, 81550.0, 81551.0, 81552.0],       [81553.0, 81554.0, 81555.0, 81556.0, 81557.0, 81558.0],       [81559.0, 81560.0, 81561.0, 81562.0, 81563.0, 81564.0]],      [[81565.0, 81566.0, 81567.0, 81568.0, 81569.0, 81570.0],       [81571.0, 81572.0, 81573.0, 81574.0, 81575.0, 81576.0],       [81577.0, 81578.0, 81579.0, 81580.0, 81581.0, 81582.0],       [81583.0, 81584.0, 81585.0, 81586.0, 81587.0, 81588.0],       [81589.0, 81590.0, 81591.0, 81592.0, 81593.0, 81594.0],       [81595.0, 81596.0, 81597.0, 81598.0, 81599.0, 81600.0],       [81601.0, 81602.0, 81603.0, 81604.0, 81605.0, 81606.0]],      [[81607.0, 81608.0, 81609.0, 81610.0, 81611.0, 81612.0],       [81613.0, 81614.0, 81615.0, 81616.0, 81617.0, 81618.0],       [81619.0, 81620.0, 81621.0, 81622.0, 81623.0, 81624.0],       [81625.0, 81626.0, 81627.0, 81628.0, 81629.0, 81630.0],       [81631.0, 81632.0, 81633.0, 81634.0, 81635.0, 81636.0],       [81637.0, 81638.0, 81639.0, 81640.0, 81641.0, 81642.0],       [81643.0, 81644.0, 81645.0, 81646.0, 81647.0, 81648.0]]]],    [[[[81649.0, 81650.0, 81651.0, 81652.0, 81653.0, 81654.0],       [81655.0, 81656.0, 81657.0, 81658.0, 81659.0, 81660.0],       [81661.0, 81662.0, 81663.0, 81664.0, 81665.0, 81666.0],       [81667.0, 81668.0, 81669.0, 81670.0, 81671.0, 81672.0],       [81673.0, 81674.0, 81675.0, 81676.0, 81677.0, 81678.0],       [81679.0, 81680.0, 81681.0, 81682.0, 81683.0, 81684.0],       [81685.0, 81686.0, 81687.0, 81688.0, 81689.0, 81690.0]],      [[81691.0, 81692.0, 81693.0, 81694.0, 81695.0, 81696.0],       [81697.0, 81698.0, 81699.0, 81700.0, 81701.0, 81702.0],       [81703.0, 81704.0, 81705.0, 81706.0, 81707.0, 81708.0],       [81709.0, 81710.0, 81711.0, 81712.0, 81713.0, 81714.0],       [81715.0, 81716.0, 81717.0, 81718.0, 81719.0, 81720.0],       [81721.0, 81722.0, 81723.0, 81724.0, 81725.0, 81726.0],       [81727.0, 81728.0, 81729.0, 81730.0, 81731.0, 81732.0]],      [[81733.0, 81734.0, 81735.0, 81736.0, 81737.0, 81738.0],       [81739.0, 81740.0, 81741.0, 81742.0, 81743.0, 81744.0],       [81745.0, 81746.0, 81747.0, 81748.0, 81749.0, 81750.0],       [81751.0, 81752.0, 81753.0, 81754.0, 81755.0, 81756.0],       [81757.0, 81758.0, 81759.0, 81760.0, 81761.0, 81762.0],       [81763.0, 81764.0, 81765.0, 81766.0, 81767.0, 81768.0],       [81769.0, 81770.0, 81771.0, 81772.0, 81773.0, 81774.0]],      [[81775.0, 81776.0, 81777.0, 81778.0, 81779.0, 81780.0],       [81781.0, 81782.0, 81783.0, 81784.0, 81785.0, 81786.0],       [81787.0, 81788.0, 81789.0, 81790.0, 81791.0, 81792.0],       [81793.0, 81794.0, 81795.0, 81796.0, 81797.0, 81798.0],       [81799.0, 81800.0, 81801.0, 81802.0, 81803.0, 81804.0],       [81805.0, 81806.0, 81807.0, 81808.0, 81809.0, 81810.0],       [81811.0, 81812.0, 81813.0, 81814.0, 81815.0, 81816.0]],      [[81817.0, 81818.0, 81819.0, 81820.0, 81821.0, 81822.0],       [81823.0, 81824.0, 81825.0, 81826.0, 81827.0, 81828.0],       [81829.0, 81830.0, 81831.0, 81832.0, 81833.0, 81834.0],       [81835.0, 81836.0, 81837.0, 81838.0, 81839.0, 81840.0],       [81841.0, 81842.0, 81843.0, 81844.0, 81845.0, 81846.0],       [81847.0, 81848.0, 81849.0, 81850.0, 81851.0, 81852.0],       [81853.0, 81854.0, 81855.0, 81856.0, 81857.0, 81858.0]],      [[81859.0, 81860.0, 81861.0, 81862.0, 81863.0, 81864.0],       [81865.0, 81866.0, 81867.0, 81868.0, 81869.0, 81870.0],       [81871.0, 81872.0, 81873.0, 81874.0, 81875.0, 81876.0],       [81877.0, 81878.0, 81879.0, 81880.0, 81881.0, 81882.0],       [81883.0, 81884.0, 81885.0, 81886.0, 81887.0, 81888.0],       [81889.0, 81890.0, 81891.0, 81892.0, 81893.0, 81894.0],       [81895.0, 81896.0, 81897.0, 81898.0, 81899.0, 81900.0]]],     [[[81901.0, 81902.0, 81903.0, 81904.0, 81905.0, 81906.0],       [81907.0, 81908.0, 81909.0, 81910.0, 81911.0, 81912.0],       [81913.0, 81914.0, 81915.0, 81916.0, 81917.0, 81918.0],       [81919.0, 81920.0, 81921.0, 81922.0, 81923.0, 81924.0],       [81925.0, 81926.0, 81927.0, 81928.0, 81929.0, 81930.0],       [81931.0, 81932.0, 81933.0, 81934.0, 81935.0, 81936.0],       [81937.0, 81938.0, 81939.0, 81940.0, 81941.0, 81942.0]],      [[81943.0, 81944.0, 81945.0, 81946.0, 81947.0, 81948.0],       [81949.0, 81950.0, 81951.0, 81952.0, 81953.0, 81954.0],       [81955.0, 81956.0, 81957.0, 81958.0, 81959.0, 81960.0],       [81961.0, 81962.0, 81963.0, 81964.0, 81965.0, 81966.0],       [81967.0, 81968.0, 81969.0, 81970.0, 81971.0, 81972.0],       [81973.0, 81974.0, 81975.0, 81976.0, 81977.0, 81978.0],       [81979.0, 81980.0, 81981.0, 81982.0, 81983.0, 81984.0]],      [[81985.0, 81986.0, 81987.0, 81988.0, 81989.0, 81990.0],       [81991.0, 81992.0, 81993.0, 81994.0, 81995.0, 81996.0],       [81997.0, 81998.0, 81999.0, 82000.0, 82001.0, 82002.0],       [82003.0, 82004.0, 82005.0, 82006.0, 82007.0, 82008.0],       [82009.0, 82010.0, 82011.0, 82012.0, 82013.0, 82014.0],       [82015.0, 82016.0, 82017.0, 82018.0, 82019.0, 82020.0],       [82021.0, 82022.0, 82023.0, 82024.0, 82025.0, 82026.0]],      [[82027.0, 82028.0, 82029.0, 82030.0, 82031.0, 82032.0],       [82033.0, 82034.0, 82035.0, 82036.0, 82037.0, 82038.0],       [82039.0, 82040.0, 82041.0, 82042.0, 82043.0, 82044.0],       [82045.0, 82046.0, 82047.0, 82048.0, 82049.0, 82050.0],       [82051.0, 82052.0, 82053.0, 82054.0, 82055.0, 82056.0],       [82057.0, 82058.0, 82059.0, 82060.0, 82061.0, 82062.0],       [82063.0, 82064.0, 82065.0, 82066.0, 82067.0, 82068.0]],      [[82069.0, 82070.0, 82071.0, 82072.0, 82073.0, 82074.0],       [82075.0, 82076.0, 82077.0, 82078.0, 82079.0, 82080.0],       [82081.0, 82082.0, 82083.0, 82084.0, 82085.0, 82086.0],       [82087.0, 82088.0, 82089.0, 82090.0, 82091.0, 82092.0],       [82093.0, 82094.0, 82095.0, 82096.0, 82097.0, 82098.0],       [82099.0, 82100.0, 82101.0, 82102.0, 82103.0, 82104.0],       [82105.0, 82106.0, 82107.0, 82108.0, 82109.0, 82110.0]],      [[82111.0, 82112.0, 82113.0, 82114.0, 82115.0, 82116.0],       [82117.0, 82118.0, 82119.0, 82120.0, 82121.0, 82122.0],       [82123.0, 82124.0, 82125.0, 82126.0, 82127.0, 82128.0],       [82129.0, 82130.0, 82131.0, 82132.0, 82133.0, 82134.0],       [82135.0, 82136.0, 82137.0, 82138.0, 82139.0, 82140.0],       [82141.0, 82142.0, 82143.0, 82144.0, 82145.0, 82146.0],       [82147.0, 82148.0, 82149.0, 82150.0, 82151.0, 82152.0]]],     [[[82153.0, 82154.0, 82155.0, 82156.0, 82157.0, 82158.0],       [82159.0, 82160.0, 82161.0, 82162.0, 82163.0, 82164.0],       [82165.0, 82166.0, 82167.0, 82168.0, 82169.0, 82170.0],       [82171.0, 82172.0, 82173.0, 82174.0, 82175.0, 82176.0],       [82177.0, 82178.0, 82179.0, 82180.0, 82181.0, 82182.0],       [82183.0, 82184.0, 82185.0, 82186.0, 82187.0, 82188.0],       [82189.0, 82190.0, 82191.0, 82192.0, 82193.0, 82194.0]],      [[82195.0, 82196.0, 82197.0, 82198.0, 82199.0, 82200.0],       [82201.0, 82202.0, 82203.0, 82204.0, 82205.0, 82206.0],       [82207.0, 82208.0, 82209.0, 82210.0, 82211.0, 82212.0],       [82213.0, 82214.0, 82215.0, 82216.0, 82217.0, 82218.0],       [82219.0, 82220.0, 82221.0, 82222.0, 82223.0, 82224.0],       [82225.0, 82226.0, 82227.0, 82228.0, 82229.0, 82230.0],       [82231.0, 82232.0, 82233.0, 82234.0, 82235.0, 82236.0]],      [[82237.0, 82238.0, 82239.0, 82240.0, 82241.0, 82242.0],       [82243.0, 82244.0, 82245.0, 82246.0, 82247.0, 82248.0],       [82249.0, 82250.0, 82251.0, 82252.0, 82253.0, 82254.0],       [82255.0, 82256.0, 82257.0, 82258.0, 82259.0, 82260.0],       [82261.0, 82262.0, 82263.0, 82264.0, 82265.0, 82266.0],       [82267.0, 82268.0, 82269.0, 82270.0, 82271.0, 82272.0],       [82273.0, 82274.0, 82275.0, 82276.0, 82277.0, 82278.0]],      [[82279.0, 82280.0, 82281.0, 82282.0, 82283.0, 82284.0],       [82285.0, 82286.0, 82287.0, 82288.0, 82289.0, 82290.0],       [82291.0, 82292.0, 82293.0, 82294.0, 82295.0, 82296.0],       [82297.0, 82298.0, 82299.0, 82300.0, 82301.0, 82302.0],       [82303.0, 82304.0, 82305.0, 82306.0, 82307.0, 82308.0],       [82309.0, 82310.0, 82311.0, 82312.0, 82313.0, 82314.0],       [82315.0, 82316.0, 82317.0, 82318.0, 82319.0, 82320.0]],      [[82321.0, 82322.0, 82323.0, 82324.0, 82325.0, 82326.0],       [82327.0, 82328.0, 82329.0, 82330.0, 82331.0, 82332.0],       [82333.0, 82334.0, 82335.0, 82336.0, 82337.0, 82338.0],       [82339.0, 82340.0, 82341.0, 82342.0, 82343.0, 82344.0],       [82345.0, 82346.0, 82347.0, 82348.0, 82349.0, 82350.0],       [82351.0, 82352.0, 82353.0, 82354.0, 82355.0, 82356.0],       [82357.0, 82358.0, 82359.0, 82360.0, 82361.0, 82362.0]],      [[82363.0, 82364.0, 82365.0, 82366.0, 82367.0, 82368.0],       [82369.0, 82370.0, 82371.0, 82372.0, 82373.0, 82374.0],       [82375.0, 82376.0, 82377.0, 82378.0, 82379.0, 82380.0],       [82381.0, 82382.0, 82383.0, 82384.0, 82385.0, 82386.0],       [82387.0, 82388.0, 82389.0, 82390.0, 82391.0, 82392.0],       [82393.0, 82394.0, 82395.0, 82396.0, 82397.0, 82398.0],       [82399.0, 82400.0, 82401.0, 82402.0, 82403.0, 82404.0]]],     [[[82405.0, 82406.0, 82407.0, 82408.0, 82409.0, 82410.0],       [82411.0, 82412.0, 82413.0, 82414.0, 82415.0, 82416.0],       [82417.0, 82418.0, 82419.0, 82420.0, 82421.0, 82422.0],       [82423.0, 82424.0, 82425.0, 82426.0, 82427.0, 82428.0],       [82429.0, 82430.0, 82431.0, 82432.0, 82433.0, 82434.0],       [82435.0, 82436.0, 82437.0, 82438.0, 82439.0, 82440.0],       [82441.0, 82442.0, 82443.0, 82444.0, 82445.0, 82446.0]],      [[82447.0, 82448.0, 82449.0, 82450.0, 82451.0, 82452.0],       [82453.0, 82454.0, 82455.0, 82456.0, 82457.0, 82458.0],       [82459.0, 82460.0, 82461.0, 82462.0, 82463.0, 82464.0],       [82465.0, 82466.0, 82467.0, 82468.0, 82469.0, 82470.0],       [82471.0, 82472.0, 82473.0, 82474.0, 82475.0, 82476.0],       [82477.0, 82478.0, 82479.0, 82480.0, 82481.0, 82482.0],       [82483.0, 82484.0, 82485.0, 82486.0, 82487.0, 82488.0]],      [[82489.0, 82490.0, 82491.0, 82492.0, 82493.0, 82494.0],       [82495.0, 82496.0, 82497.0, 82498.0, 82499.0, 82500.0],       [82501.0, 82502.0, 82503.0, 82504.0, 82505.0, 82506.0],       [82507.0, 82508.0, 82509.0, 82510.0, 82511.0, 82512.0],       [82513.0, 82514.0, 82515.0, 82516.0, 82517.0, 82518.0],       [82519.0, 82520.0, 82521.0, 82522.0, 82523.0, 82524.0],       [82525.0, 82526.0, 82527.0, 82528.0, 82529.0, 82530.0]],      [[82531.0, 82532.0, 82533.0, 82534.0, 82535.0, 82536.0],       [82537.0, 82538.0, 82539.0, 82540.0, 82541.0, 82542.0],       [82543.0, 82544.0, 82545.0, 82546.0, 82547.0, 82548.0],       [82549.0, 82550.0, 82551.0, 82552.0, 82553.0, 82554.0],       [82555.0, 82556.0, 82557.0, 82558.0, 82559.0, 82560.0],       [82561.0, 82562.0, 82563.0, 82564.0, 82565.0, 82566.0],       [82567.0, 82568.0, 82569.0, 82570.0, 82571.0, 82572.0]],      [[82573.0, 82574.0, 82575.0, 82576.0, 82577.0, 82578.0],       [82579.0, 82580.0, 82581.0, 82582.0, 82583.0, 82584.0],       [82585.0, 82586.0, 82587.0, 82588.0, 82589.0, 82590.0],       [82591.0, 82592.0, 82593.0, 82594.0, 82595.0, 82596.0],       [82597.0, 82598.0, 82599.0, 82600.0, 82601.0, 82602.0],       [82603.0, 82604.0, 82605.0, 82606.0, 82607.0, 82608.0],       [82609.0, 82610.0, 82611.0, 82612.0, 82613.0, 82614.0]],      [[82615.0, 82616.0, 82617.0, 82618.0, 82619.0, 82620.0],       [82621.0, 82622.0, 82623.0, 82624.0, 82625.0, 82626.0],       [82627.0, 82628.0, 82629.0, 82630.0, 82631.0, 82632.0],       [82633.0, 82634.0, 82635.0, 82636.0, 82637.0, 82638.0],       [82639.0, 82640.0, 82641.0, 82642.0, 82643.0, 82644.0],       [82645.0, 82646.0, 82647.0, 82648.0, 82649.0, 82650.0],       [82651.0, 82652.0, 82653.0, 82654.0, 82655.0, 82656.0]]]],    [[[[82657.0, 82658.0, 82659.0, 82660.0, 82661.0, 82662.0],       [82663.0, 82664.0, 82665.0, 82666.0, 82667.0, 82668.0],       [82669.0, 82670.0, 82671.0, 82672.0, 82673.0, 82674.0],       [82675.0, 82676.0, 82677.0, 82678.0, 82679.0, 82680.0],       [82681.0, 82682.0, 82683.0, 82684.0, 82685.0, 82686.0],       [82687.0, 82688.0, 82689.0, 82690.0, 82691.0, 82692.0],       [82693.0, 82694.0, 82695.0, 82696.0, 82697.0, 82698.0]],      [[82699.0, 82700.0, 82701.0, 82702.0, 82703.0, 82704.0],       [82705.0, 82706.0, 82707.0, 82708.0, 82709.0, 82710.0],       [82711.0, 82712.0, 82713.0, 82714.0, 82715.0, 82716.0],       [82717.0, 82718.0, 82719.0, 82720.0, 82721.0, 82722.0],       [82723.0, 82724.0, 82725.0, 82726.0, 82727.0, 82728.0],       [82729.0, 82730.0, 82731.0, 82732.0, 82733.0, 82734.0],       [82735.0, 82736.0, 82737.0, 82738.0, 82739.0, 82740.0]],      [[82741.0, 82742.0, 82743.0, 82744.0, 82745.0, 82746.0],       [82747.0, 82748.0, 82749.0, 82750.0, 82751.0, 82752.0],       [82753.0, 82754.0, 82755.0, 82756.0, 82757.0, 82758.0],       [82759.0, 82760.0, 82761.0, 82762.0, 82763.0, 82764.0],       [82765.0, 82766.0, 82767.0, 82768.0, 82769.0, 82770.0],       [82771.0, 82772.0, 82773.0, 82774.0, 82775.0, 82776.0],       [82777.0, 82778.0, 82779.0, 82780.0, 82781.0, 82782.0]],      [[82783.0, 82784.0, 82785.0, 82786.0, 82787.0, 82788.0],       [82789.0, 82790.0, 82791.0, 82792.0, 82793.0, 82794.0],       [82795.0, 82796.0, 82797.0, 82798.0, 82799.0, 82800.0],       [82801.0, 82802.0, 82803.0, 82804.0, 82805.0, 82806.0],       [82807.0, 82808.0, 82809.0, 82810.0, 82811.0, 82812.0],       [82813.0, 82814.0, 82815.0, 82816.0, 82817.0, 82818.0],       [82819.0, 82820.0, 82821.0, 82822.0, 82823.0, 82824.0]],      [[82825.0, 82826.0, 82827.0, 82828.0, 82829.0, 82830.0],       [82831.0, 82832.0, 82833.0, 82834.0, 82835.0, 82836.0],       [82837.0, 82838.0, 82839.0, 82840.0, 82841.0, 82842.0],       [82843.0, 82844.0, 82845.0, 82846.0, 82847.0, 82848.0],       [82849.0, 82850.0, 82851.0, 82852.0, 82853.0, 82854.0],       [82855.0, 82856.0, 82857.0, 82858.0, 82859.0, 82860.0],       [82861.0, 82862.0, 82863.0, 82864.0, 82865.0, 82866.0]],      [[82867.0, 82868.0, 82869.0, 82870.0, 82871.0, 82872.0],       [82873.0, 82874.0, 82875.0, 82876.0, 82877.0, 82878.0],       [82879.0, 82880.0, 82881.0, 82882.0, 82883.0, 82884.0],       [82885.0, 82886.0, 82887.0, 82888.0, 82889.0, 82890.0],       [82891.0, 82892.0, 82893.0, 82894.0, 82895.0, 82896.0],       [82897.0, 82898.0, 82899.0, 82900.0, 82901.0, 82902.0],       [82903.0, 82904.0, 82905.0, 82906.0, 82907.0, 82908.0]]],     [[[82909.0, 82910.0, 82911.0, 82912.0, 82913.0, 82914.0],       [82915.0, 82916.0, 82917.0, 82918.0, 82919.0, 82920.0],       [82921.0, 82922.0, 82923.0, 82924.0, 82925.0, 82926.0],       [82927.0, 82928.0, 82929.0, 82930.0, 82931.0, 82932.0],       [82933.0, 82934.0, 82935.0, 82936.0, 82937.0, 82938.0],       [82939.0, 82940.0, 82941.0, 82942.0, 82943.0, 82944.0],       [82945.0, 82946.0, 82947.0, 82948.0, 82949.0, 82950.0]],      [[82951.0, 82952.0, 82953.0, 82954.0, 82955.0, 82956.0],       [82957.0, 82958.0, 82959.0, 82960.0, 82961.0, 82962.0],       [82963.0, 82964.0, 82965.0, 82966.0, 82967.0, 82968.0],       [82969.0, 82970.0, 82971.0, 82972.0, 82973.0, 82974.0],       [82975.0, 82976.0, 82977.0, 82978.0, 82979.0, 82980.0],       [82981.0, 82982.0, 82983.0, 82984.0, 82985.0, 82986.0],       [82987.0, 82988.0, 82989.0, 82990.0, 82991.0, 82992.0]],      [[82993.0, 82994.0, 82995.0, 82996.0, 82997.0, 82998.0],       [82999.0, 83000.0, 83001.0, 83002.0, 83003.0, 83004.0],       [83005.0, 83006.0, 83007.0, 83008.0, 83009.0, 83010.0],       [83011.0, 83012.0, 83013.0, 83014.0, 83015.0, 83016.0],       [83017.0, 83018.0, 83019.0, 83020.0, 83021.0, 83022.0],       [83023.0, 83024.0, 83025.0, 83026.0, 83027.0, 83028.0],       [83029.0, 83030.0, 83031.0, 83032.0, 83033.0, 83034.0]],      [[83035.0, 83036.0, 83037.0, 83038.0, 83039.0, 83040.0],       [83041.0, 83042.0, 83043.0, 83044.0, 83045.0, 83046.0],       [83047.0, 83048.0, 83049.0, 83050.0, 83051.0, 83052.0],       [83053.0, 83054.0, 83055.0, 83056.0, 83057.0, 83058.0],       [83059.0, 83060.0, 83061.0, 83062.0, 83063.0, 83064.0],       [83065.0, 83066.0, 83067.0, 83068.0, 83069.0, 83070.0],       [83071.0, 83072.0, 83073.0, 83074.0, 83075.0, 83076.0]],      [[83077.0, 83078.0, 83079.0, 83080.0, 83081.0, 83082.0],       [83083.0, 83084.0, 83085.0, 83086.0, 83087.0, 83088.0],       [83089.0, 83090.0, 83091.0, 83092.0, 83093.0, 83094.0],       [83095.0, 83096.0, 83097.0, 83098.0, 83099.0, 83100.0],       [83101.0, 83102.0, 83103.0, 83104.0, 83105.0, 83106.0],       [83107.0, 83108.0, 83109.0, 83110.0, 83111.0, 83112.0],       [83113.0, 83114.0, 83115.0, 83116.0, 83117.0, 83118.0]],      [[83119.0, 83120.0, 83121.0, 83122.0, 83123.0, 83124.0],       [83125.0, 83126.0, 83127.0, 83128.0, 83129.0, 83130.0],       [83131.0, 83132.0, 83133.0, 83134.0, 83135.0, 83136.0],       [83137.0, 83138.0, 83139.0, 83140.0, 83141.0, 83142.0],       [83143.0, 83144.0, 83145.0, 83146.0, 83147.0, 83148.0],       [83149.0, 83150.0, 83151.0, 83152.0, 83153.0, 83154.0],       [83155.0, 83156.0, 83157.0, 83158.0, 83159.0, 83160.0]]],     [[[83161.0, 83162.0, 83163.0, 83164.0, 83165.0, 83166.0],       [83167.0, 83168.0, 83169.0, 83170.0, 83171.0, 83172.0],       [83173.0, 83174.0, 83175.0, 83176.0, 83177.0, 83178.0],       [83179.0, 83180.0, 83181.0, 83182.0, 83183.0, 83184.0],       [83185.0, 83186.0, 83187.0, 83188.0, 83189.0, 83190.0],       [83191.0, 83192.0, 83193.0, 83194.0, 83195.0, 83196.0],       [83197.0, 83198.0, 83199.0, 83200.0, 83201.0, 83202.0]],      [[83203.0, 83204.0, 83205.0, 83206.0, 83207.0, 83208.0],       [83209.0, 83210.0, 83211.0, 83212.0, 83213.0, 83214.0],       [83215.0, 83216.0, 83217.0, 83218.0, 83219.0, 83220.0],       [83221.0, 83222.0, 83223.0, 83224.0, 83225.0, 83226.0],       [83227.0, 83228.0, 83229.0, 83230.0, 83231.0, 83232.0],       [83233.0, 83234.0, 83235.0, 83236.0, 83237.0, 83238.0],       [83239.0, 83240.0, 83241.0, 83242.0, 83243.0, 83244.0]],      [[83245.0, 83246.0, 83247.0, 83248.0, 83249.0, 83250.0],       [83251.0, 83252.0, 83253.0, 83254.0, 83255.0, 83256.0],       [83257.0, 83258.0, 83259.0, 83260.0, 83261.0, 83262.0],       [83263.0, 83264.0, 83265.0, 83266.0, 83267.0, 83268.0],       [83269.0, 83270.0, 83271.0, 83272.0, 83273.0, 83274.0],       [83275.0, 83276.0, 83277.0, 83278.0, 83279.0, 83280.0],       [83281.0, 83282.0, 83283.0, 83284.0, 83285.0, 83286.0]],      [[83287.0, 83288.0, 83289.0, 83290.0, 83291.0, 83292.0],       [83293.0, 83294.0, 83295.0, 83296.0, 83297.0, 83298.0],       [83299.0, 83300.0, 83301.0, 83302.0, 83303.0, 83304.0],       [83305.0, 83306.0, 83307.0, 83308.0, 83309.0, 83310.0],       [83311.0, 83312.0, 83313.0, 83314.0, 83315.0, 83316.0],       [83317.0, 83318.0, 83319.0, 83320.0, 83321.0, 83322.0],       [83323.0, 83324.0, 83325.0, 83326.0, 83327.0, 83328.0]],      [[83329.0, 83330.0, 83331.0, 83332.0, 83333.0, 83334.0],       [83335.0, 83336.0, 83337.0, 83338.0, 83339.0, 83340.0],       [83341.0, 83342.0, 83343.0, 83344.0, 83345.0, 83346.0],       [83347.0, 83348.0, 83349.0, 83350.0, 83351.0, 83352.0],       [83353.0, 83354.0, 83355.0, 83356.0, 83357.0, 83358.0],       [83359.0, 83360.0, 83361.0, 83362.0, 83363.0, 83364.0],       [83365.0, 83366.0, 83367.0, 83368.0, 83369.0, 83370.0]],      [[83371.0, 83372.0, 83373.0, 83374.0, 83375.0, 83376.0],       [83377.0, 83378.0, 83379.0, 83380.0, 83381.0, 83382.0],       [83383.0, 83384.0, 83385.0, 83386.0, 83387.0, 83388.0],       [83389.0, 83390.0, 83391.0, 83392.0, 83393.0, 83394.0],       [83395.0, 83396.0, 83397.0, 83398.0, 83399.0, 83400.0],       [83401.0, 83402.0, 83403.0, 83404.0, 83405.0, 83406.0],       [83407.0, 83408.0, 83409.0, 83410.0, 83411.0, 83412.0]]],     [[[83413.0, 83414.0, 83415.0, 83416.0, 83417.0, 83418.0],       [83419.0, 83420.0, 83421.0, 83422.0, 83423.0, 83424.0],       [83425.0, 83426.0, 83427.0, 83428.0, 83429.0, 83430.0],       [83431.0, 83432.0, 83433.0, 83434.0, 83435.0, 83436.0],       [83437.0, 83438.0, 83439.0, 83440.0, 83441.0, 83442.0],       [83443.0, 83444.0, 83445.0, 83446.0, 83447.0, 83448.0],       [83449.0, 83450.0, 83451.0, 83452.0, 83453.0, 83454.0]],      [[83455.0, 83456.0, 83457.0, 83458.0, 83459.0, 83460.0],       [83461.0, 83462.0, 83463.0, 83464.0, 83465.0, 83466.0],       [83467.0, 83468.0, 83469.0, 83470.0, 83471.0, 83472.0],       [83473.0, 83474.0, 83475.0, 83476.0, 83477.0, 83478.0],       [83479.0, 83480.0, 83481.0, 83482.0, 83483.0, 83484.0],       [83485.0, 83486.0, 83487.0, 83488.0, 83489.0, 83490.0],       [83491.0, 83492.0, 83493.0, 83494.0, 83495.0, 83496.0]],      [[83497.0, 83498.0, 83499.0, 83500.0, 83501.0, 83502.0],       [83503.0, 83504.0, 83505.0, 83506.0, 83507.0, 83508.0],       [83509.0, 83510.0, 83511.0, 83512.0, 83513.0, 83514.0],       [83515.0, 83516.0, 83517.0, 83518.0, 83519.0, 83520.0],       [83521.0, 83522.0, 83523.0, 83524.0, 83525.0, 83526.0],       [83527.0, 83528.0, 83529.0, 83530.0, 83531.0, 83532.0],       [83533.0, 83534.0, 83535.0, 83536.0, 83537.0, 83538.0]],      [[83539.0, 83540.0, 83541.0, 83542.0, 83543.0, 83544.0],       [83545.0, 83546.0, 83547.0, 83548.0, 83549.0, 83550.0],       [83551.0, 83552.0, 83553.0, 83554.0, 83555.0, 83556.0],       [83557.0, 83558.0, 83559.0, 83560.0, 83561.0, 83562.0],       [83563.0, 83564.0, 83565.0, 83566.0, 83567.0, 83568.0],       [83569.0, 83570.0, 83571.0, 83572.0, 83573.0, 83574.0],       [83575.0, 83576.0, 83577.0, 83578.0, 83579.0, 83580.0]],      [[83581.0, 83582.0, 83583.0, 83584.0, 83585.0, 83586.0],       [83587.0, 83588.0, 83589.0, 83590.0, 83591.0, 83592.0],       [83593.0, 83594.0, 83595.0, 83596.0, 83597.0, 83598.0],       [83599.0, 83600.0, 83601.0, 83602.0, 83603.0, 83604.0],       [83605.0, 83606.0, 83607.0, 83608.0, 83609.0, 83610.0],       [83611.0, 83612.0, 83613.0, 83614.0, 83615.0, 83616.0],       [83617.0, 83618.0, 83619.0, 83620.0, 83621.0, 83622.0]],      [[83623.0, 83624.0, 83625.0, 83626.0, 83627.0, 83628.0],       [83629.0, 83630.0, 83631.0, 83632.0, 83633.0, 83634.0],       [83635.0, 83636.0, 83637.0, 83638.0, 83639.0, 83640.0],       [83641.0, 83642.0, 83643.0, 83644.0, 83645.0, 83646.0],       [83647.0, 83648.0, 83649.0, 83650.0, 83651.0, 83652.0],       [83653.0, 83654.0, 83655.0, 83656.0, 83657.0, 83658.0],       [83659.0, 83660.0, 83661.0, 83662.0, 83663.0, 83664.0]]]],    [[[[83665.0, 83666.0, 83667.0, 83668.0, 83669.0, 83670.0],       [83671.0, 83672.0, 83673.0, 83674.0, 83675.0, 83676.0],       [83677.0, 83678.0, 83679.0, 83680.0, 83681.0, 83682.0],       [83683.0, 83684.0, 83685.0, 83686.0, 83687.0, 83688.0],       [83689.0, 83690.0, 83691.0, 83692.0, 83693.0, 83694.0],       [83695.0, 83696.0, 83697.0, 83698.0, 83699.0, 83700.0],       [83701.0, 83702.0, 83703.0, 83704.0, 83705.0, 83706.0]],      [[83707.0, 83708.0, 83709.0, 83710.0, 83711.0, 83712.0],       [83713.0, 83714.0, 83715.0, 83716.0, 83717.0, 83718.0],       [83719.0, 83720.0, 83721.0, 83722.0, 83723.0, 83724.0],       [83725.0, 83726.0, 83727.0, 83728.0, 83729.0, 83730.0],       [83731.0, 83732.0, 83733.0, 83734.0, 83735.0, 83736.0],       [83737.0, 83738.0, 83739.0, 83740.0, 83741.0, 83742.0],       [83743.0, 83744.0, 83745.0, 83746.0, 83747.0, 83748.0]],      [[83749.0, 83750.0, 83751.0, 83752.0, 83753.0, 83754.0],       [83755.0, 83756.0, 83757.0, 83758.0, 83759.0, 83760.0],       [83761.0, 83762.0, 83763.0, 83764.0, 83765.0, 83766.0],       [83767.0, 83768.0, 83769.0, 83770.0, 83771.0, 83772.0],       [83773.0, 83774.0, 83775.0, 83776.0, 83777.0, 83778.0],       [83779.0, 83780.0, 83781.0, 83782.0, 83783.0, 83784.0],       [83785.0, 83786.0, 83787.0, 83788.0, 83789.0, 83790.0]],      [[83791.0, 83792.0, 83793.0, 83794.0, 83795.0, 83796.0],       [83797.0, 83798.0, 83799.0, 83800.0, 83801.0, 83802.0],       [83803.0, 83804.0, 83805.0, 83806.0, 83807.0, 83808.0],       [83809.0, 83810.0, 83811.0, 83812.0, 83813.0, 83814.0],       [83815.0, 83816.0, 83817.0, 83818.0, 83819.0, 83820.0],       [83821.0, 83822.0, 83823.0, 83824.0, 83825.0, 83826.0],       [83827.0, 83828.0, 83829.0, 83830.0, 83831.0, 83832.0]],      [[83833.0, 83834.0, 83835.0, 83836.0, 83837.0, 83838.0],       [83839.0, 83840.0, 83841.0, 83842.0, 83843.0, 83844.0],       [83845.0, 83846.0, 83847.0, 83848.0, 83849.0, 83850.0],       [83851.0, 83852.0, 83853.0, 83854.0, 83855.0, 83856.0],       [83857.0, 83858.0, 83859.0, 83860.0, 83861.0, 83862.0],       [83863.0, 83864.0, 83865.0, 83866.0, 83867.0, 83868.0],       [83869.0, 83870.0, 83871.0, 83872.0, 83873.0, 83874.0]],      [[83875.0, 83876.0, 83877.0, 83878.0, 83879.0, 83880.0],       [83881.0, 83882.0, 83883.0, 83884.0, 83885.0, 83886.0],       [83887.0, 83888.0, 83889.0, 83890.0, 83891.0, 83892.0],       [83893.0, 83894.0, 83895.0, 83896.0, 83897.0, 83898.0],       [83899.0, 83900.0, 83901.0, 83902.0, 83903.0, 83904.0],       [83905.0, 83906.0, 83907.0, 83908.0, 83909.0, 83910.0],       [83911.0, 83912.0, 83913.0, 83914.0, 83915.0, 83916.0]]],     [[[83917.0, 83918.0, 83919.0, 83920.0, 83921.0, 83922.0],       [83923.0, 83924.0, 83925.0, 83926.0, 83927.0, 83928.0],       [83929.0, 83930.0, 83931.0, 83932.0, 83933.0, 83934.0],       [83935.0, 83936.0, 83937.0, 83938.0, 83939.0, 83940.0],       [83941.0, 83942.0, 83943.0, 83944.0, 83945.0, 83946.0],       [83947.0, 83948.0, 83949.0, 83950.0, 83951.0, 83952.0],       [83953.0, 83954.0, 83955.0, 83956.0, 83957.0, 83958.0]],      [[83959.0, 83960.0, 83961.0, 83962.0, 83963.0, 83964.0],       [83965.0, 83966.0, 83967.0, 83968.0, 83969.0, 83970.0],       [83971.0, 83972.0, 83973.0, 83974.0, 83975.0, 83976.0],       [83977.0, 83978.0, 83979.0, 83980.0, 83981.0, 83982.0],       [83983.0, 83984.0, 83985.0, 83986.0, 83987.0, 83988.0],       [83989.0, 83990.0, 83991.0, 83992.0, 83993.0, 83994.0],       [83995.0, 83996.0, 83997.0, 83998.0, 83999.0, 84000.0]],      [[84001.0, 84002.0, 84003.0, 84004.0, 84005.0, 84006.0],       [84007.0, 84008.0, 84009.0, 84010.0, 84011.0, 84012.0],       [84013.0, 84014.0, 84015.0, 84016.0, 84017.0, 84018.0],       [84019.0, 84020.0, 84021.0, 84022.0, 84023.0, 84024.0],       [84025.0, 84026.0, 84027.0, 84028.0, 84029.0, 84030.0],       [84031.0, 84032.0, 84033.0, 84034.0, 84035.0, 84036.0],       [84037.0, 84038.0, 84039.0, 84040.0, 84041.0, 84042.0]],      [[84043.0, 84044.0, 84045.0, 84046.0, 84047.0, 84048.0],       [84049.0, 84050.0, 84051.0, 84052.0, 84053.0, 84054.0],       [84055.0, 84056.0, 84057.0, 84058.0, 84059.0, 84060.0],       [84061.0, 84062.0, 84063.0, 84064.0, 84065.0, 84066.0],       [84067.0, 84068.0, 84069.0, 84070.0, 84071.0, 84072.0],       [84073.0, 84074.0, 84075.0, 84076.0, 84077.0, 84078.0],       [84079.0, 84080.0, 84081.0, 84082.0, 84083.0, 84084.0]],      [[84085.0, 84086.0, 84087.0, 84088.0, 84089.0, 84090.0],       [84091.0, 84092.0, 84093.0, 84094.0, 84095.0, 84096.0],       [84097.0, 84098.0, 84099.0, 84100.0, 84101.0, 84102.0],       [84103.0, 84104.0, 84105.0, 84106.0, 84107.0, 84108.0],       [84109.0, 84110.0, 84111.0, 84112.0, 84113.0, 84114.0],       [84115.0, 84116.0, 84117.0, 84118.0, 84119.0, 84120.0],       [84121.0, 84122.0, 84123.0, 84124.0, 84125.0, 84126.0]],      [[84127.0, 84128.0, 84129.0, 84130.0, 84131.0, 84132.0],       [84133.0, 84134.0, 84135.0, 84136.0, 84137.0, 84138.0],       [84139.0, 84140.0, 84141.0, 84142.0, 84143.0, 84144.0],       [84145.0, 84146.0, 84147.0, 84148.0, 84149.0, 84150.0],       [84151.0, 84152.0, 84153.0, 84154.0, 84155.0, 84156.0],       [84157.0, 84158.0, 84159.0, 84160.0, 84161.0, 84162.0],       [84163.0, 84164.0, 84165.0, 84166.0, 84167.0, 84168.0]]],     [[[84169.0, 84170.0, 84171.0, 84172.0, 84173.0, 84174.0],       [84175.0, 84176.0, 84177.0, 84178.0, 84179.0, 84180.0],       [84181.0, 84182.0, 84183.0, 84184.0, 84185.0, 84186.0],       [84187.0, 84188.0, 84189.0, 84190.0, 84191.0, 84192.0],       [84193.0, 84194.0, 84195.0, 84196.0, 84197.0, 84198.0],       [84199.0, 84200.0, 84201.0, 84202.0, 84203.0, 84204.0],       [84205.0, 84206.0, 84207.0, 84208.0, 84209.0, 84210.0]],      [[84211.0, 84212.0, 84213.0, 84214.0, 84215.0, 84216.0],       [84217.0, 84218.0, 84219.0, 84220.0, 84221.0, 84222.0],       [84223.0, 84224.0, 84225.0, 84226.0, 84227.0, 84228.0],       [84229.0, 84230.0, 84231.0, 84232.0, 84233.0, 84234.0],       [84235.0, 84236.0, 84237.0, 84238.0, 84239.0, 84240.0],       [84241.0, 84242.0, 84243.0, 84244.0, 84245.0, 84246.0],       [84247.0, 84248.0, 84249.0, 84250.0, 84251.0, 84252.0]],      [[84253.0, 84254.0, 84255.0, 84256.0, 84257.0, 84258.0],       [84259.0, 84260.0, 84261.0, 84262.0, 84263.0, 84264.0],       [84265.0, 84266.0, 84267.0, 84268.0, 84269.0, 84270.0],       [84271.0, 84272.0, 84273.0, 84274.0, 84275.0, 84276.0],       [84277.0, 84278.0, 84279.0, 84280.0, 84281.0, 84282.0],       [84283.0, 84284.0, 84285.0, 84286.0, 84287.0, 84288.0],       [84289.0, 84290.0, 84291.0, 84292.0, 84293.0, 84294.0]],      [[84295.0, 84296.0, 84297.0, 84298.0, 84299.0, 84300.0],       [84301.0, 84302.0, 84303.0, 84304.0, 84305.0, 84306.0],       [84307.0, 84308.0, 84309.0, 84310.0, 84311.0, 84312.0],       [84313.0, 84314.0, 84315.0, 84316.0, 84317.0, 84318.0],       [84319.0, 84320.0, 84321.0, 84322.0, 84323.0, 84324.0],       [84325.0, 84326.0, 84327.0, 84328.0, 84329.0, 84330.0],       [84331.0, 84332.0, 84333.0, 84334.0, 84335.0, 84336.0]],      [[84337.0, 84338.0, 84339.0, 84340.0, 84341.0, 84342.0],       [84343.0, 84344.0, 84345.0, 84346.0, 84347.0, 84348.0],       [84349.0, 84350.0, 84351.0, 84352.0, 84353.0, 84354.0],       [84355.0, 84356.0, 84357.0, 84358.0, 84359.0, 84360.0],       [84361.0, 84362.0, 84363.0, 84364.0, 84365.0, 84366.0],       [84367.0, 84368.0, 84369.0, 84370.0, 84371.0, 84372.0],       [84373.0, 84374.0, 84375.0, 84376.0, 84377.0, 84378.0]],      [[84379.0, 84380.0, 84381.0, 84382.0, 84383.0, 84384.0],       [84385.0, 84386.0, 84387.0, 84388.0, 84389.0, 84390.0],       [84391.0, 84392.0, 84393.0, 84394.0, 84395.0, 84396.0],       [84397.0, 84398.0, 84399.0, 84400.0, 84401.0, 84402.0],       [84403.0, 84404.0, 84405.0, 84406.0, 84407.0, 84408.0],       [84409.0, 84410.0, 84411.0, 84412.0, 84413.0, 84414.0],       [84415.0, 84416.0, 84417.0, 84418.0, 84419.0, 84420.0]]],     [[[84421.0, 84422.0, 84423.0, 84424.0, 84425.0, 84426.0],       [84427.0, 84428.0, 84429.0, 84430.0, 84431.0, 84432.0],       [84433.0, 84434.0, 84435.0, 84436.0, 84437.0, 84438.0],       [84439.0, 84440.0, 84441.0, 84442.0, 84443.0, 84444.0],       [84445.0, 84446.0, 84447.0, 84448.0, 84449.0, 84450.0],       [84451.0, 84452.0, 84453.0, 84454.0, 84455.0, 84456.0],       [84457.0, 84458.0, 84459.0, 84460.0, 84461.0, 84462.0]],      [[84463.0, 84464.0, 84465.0, 84466.0, 84467.0, 84468.0],       [84469.0, 84470.0, 84471.0, 84472.0, 84473.0, 84474.0],       [84475.0, 84476.0, 84477.0, 84478.0, 84479.0, 84480.0],       [84481.0, 84482.0, 84483.0, 84484.0, 84485.0, 84486.0],       [84487.0, 84488.0, 84489.0, 84490.0, 84491.0, 84492.0],       [84493.0, 84494.0, 84495.0, 84496.0, 84497.0, 84498.0],       [84499.0, 84500.0, 84501.0, 84502.0, 84503.0, 84504.0]],      [[84505.0, 84506.0, 84507.0, 84508.0, 84509.0, 84510.0],       [84511.0, 84512.0, 84513.0, 84514.0, 84515.0, 84516.0],       [84517.0, 84518.0, 84519.0, 84520.0, 84521.0, 84522.0],       [84523.0, 84524.0, 84525.0, 84526.0, 84527.0, 84528.0],       [84529.0, 84530.0, 84531.0, 84532.0, 84533.0, 84534.0],       [84535.0, 84536.0, 84537.0, 84538.0, 84539.0, 84540.0],       [84541.0, 84542.0, 84543.0, 84544.0, 84545.0, 84546.0]],      [[84547.0, 84548.0, 84549.0, 84550.0, 84551.0, 84552.0],       [84553.0, 84554.0, 84555.0, 84556.0, 84557.0, 84558.0],       [84559.0, 84560.0, 84561.0, 84562.0, 84563.0, 84564.0],       [84565.0, 84566.0, 84567.0, 84568.0, 84569.0, 84570.0],       [84571.0, 84572.0, 84573.0, 84574.0, 84575.0, 84576.0],       [84577.0, 84578.0, 84579.0, 84580.0, 84581.0, 84582.0],       [84583.0, 84584.0, 84585.0, 84586.0, 84587.0, 84588.0]],      [[84589.0, 84590.0, 84591.0, 84592.0, 84593.0, 84594.0],       [84595.0, 84596.0, 84597.0, 84598.0, 84599.0, 84600.0],       [84601.0, 84602.0, 84603.0, 84604.0, 84605.0, 84606.0],       [84607.0, 84608.0, 84609.0, 84610.0, 84611.0, 84612.0],       [84613.0, 84614.0, 84615.0, 84616.0, 84617.0, 84618.0],       [84619.0, 84620.0, 84621.0, 84622.0, 84623.0, 84624.0],       [84625.0, 84626.0, 84627.0, 84628.0, 84629.0, 84630.0]],      [[84631.0, 84632.0, 84633.0, 84634.0, 84635.0, 84636.0],       [84637.0, 84638.0, 84639.0, 84640.0, 84641.0, 84642.0],       [84643.0, 84644.0, 84645.0, 84646.0, 84647.0, 84648.0],       [84649.0, 84650.0, 84651.0, 84652.0, 84653.0, 84654.0],       [84655.0, 84656.0, 84657.0, 84658.0, 84659.0, 84660.0],       [84661.0, 84662.0, 84663.0, 84664.0, 84665.0, 84666.0],       [84667.0, 84668.0, 84669.0, 84670.0, 84671.0, 84672.0]]]]],   [[[[[84673.0, 84674.0, 84675.0, 84676.0, 84677.0, 84678.0],       [84679.0, 84680.0, 84681.0, 84682.0, 84683.0, 84684.0],       [84685.0, 84686.0, 84687.0, 84688.0, 84689.0, 84690.0],       [84691.0, 84692.0, 84693.0, 84694.0, 84695.0, 84696.0],       [84697.0, 84698.0, 84699.0, 84700.0, 84701.0, 84702.0],       [84703.0, 84704.0, 84705.0, 84706.0, 84707.0, 84708.0],       [84709.0, 84710.0, 84711.0, 84712.0, 84713.0, 84714.0]],      [[84715.0, 84716.0, 84717.0, 84718.0, 84719.0, 84720.0],       [84721.0, 84722.0, 84723.0, 84724.0, 84725.0, 84726.0],       [84727.0, 84728.0, 84729.0, 84730.0, 84731.0, 84732.0],       [84733.0, 84734.0, 84735.0, 84736.0, 84737.0, 84738.0],       [84739.0, 84740.0, 84741.0, 84742.0, 84743.0, 84744.0],       [84745.0, 84746.0, 84747.0, 84748.0, 84749.0, 84750.0],       [84751.0, 84752.0, 84753.0, 84754.0, 84755.0, 84756.0]],      [[84757.0, 84758.0, 84759.0, 84760.0, 84761.0, 84762.0],       [84763.0, 84764.0, 84765.0, 84766.0, 84767.0, 84768.0],       [84769.0, 84770.0, 84771.0, 84772.0, 84773.0, 84774.0],       [84775.0, 84776.0, 84777.0, 84778.0, 84779.0, 84780.0],       [84781.0, 84782.0, 84783.0, 84784.0, 84785.0, 84786.0],       [84787.0, 84788.0, 84789.0, 84790.0, 84791.0, 84792.0],       [84793.0, 84794.0, 84795.0, 84796.0, 84797.0, 84798.0]],      [[84799.0, 84800.0, 84801.0, 84802.0, 84803.0, 84804.0],       [84805.0, 84806.0, 84807.0, 84808.0, 84809.0, 84810.0],       [84811.0, 84812.0, 84813.0, 84814.0, 84815.0, 84816.0],       [84817.0, 84818.0, 84819.0, 84820.0, 84821.0, 84822.0],       [84823.0, 84824.0, 84825.0, 84826.0, 84827.0, 84828.0],       [84829.0, 84830.0, 84831.0, 84832.0, 84833.0, 84834.0],       [84835.0, 84836.0, 84837.0, 84838.0, 84839.0, 84840.0]],      [[84841.0, 84842.0, 84843.0, 84844.0, 84845.0, 84846.0],       [84847.0, 84848.0, 84849.0, 84850.0, 84851.0, 84852.0],       [84853.0, 84854.0, 84855.0, 84856.0, 84857.0, 84858.0],       [84859.0, 84860.0, 84861.0, 84862.0, 84863.0, 84864.0],       [84865.0, 84866.0, 84867.0, 84868.0, 84869.0, 84870.0],       [84871.0, 84872.0, 84873.0, 84874.0, 84875.0, 84876.0],       [84877.0, 84878.0, 84879.0, 84880.0, 84881.0, 84882.0]],      [[84883.0, 84884.0, 84885.0, 84886.0, 84887.0, 84888.0],       [84889.0, 84890.0, 84891.0, 84892.0, 84893.0, 84894.0],       [84895.0, 84896.0, 84897.0, 84898.0, 84899.0, 84900.0],       [84901.0, 84902.0, 84903.0, 84904.0, 84905.0, 84906.0],       [84907.0, 84908.0, 84909.0, 84910.0, 84911.0, 84912.0],       [84913.0, 84914.0, 84915.0, 84916.0, 84917.0, 84918.0],       [84919.0, 84920.0, 84921.0, 84922.0, 84923.0, 84924.0]]],     [[[84925.0, 84926.0, 84927.0, 84928.0, 84929.0, 84930.0],       [84931.0, 84932.0, 84933.0, 84934.0, 84935.0, 84936.0],       [84937.0, 84938.0, 84939.0, 84940.0, 84941.0, 84942.0],       [84943.0, 84944.0, 84945.0, 84946.0, 84947.0, 84948.0],       [84949.0, 84950.0, 84951.0, 84952.0, 84953.0, 84954.0],       [84955.0, 84956.0, 84957.0, 84958.0, 84959.0, 84960.0],       [84961.0, 84962.0, 84963.0, 84964.0, 84965.0, 84966.0]],      [[84967.0, 84968.0, 84969.0, 84970.0, 84971.0, 84972.0],       [84973.0, 84974.0, 84975.0, 84976.0, 84977.0, 84978.0],       [84979.0, 84980.0, 84981.0, 84982.0, 84983.0, 84984.0],       [84985.0, 84986.0, 84987.0, 84988.0, 84989.0, 84990.0],       [84991.0, 84992.0, 84993.0, 84994.0, 84995.0, 84996.0],       [84997.0, 84998.0, 84999.0, 85000.0, 85001.0, 85002.0],       [85003.0, 85004.0, 85005.0, 85006.0, 85007.0, 85008.0]],      [[85009.0, 85010.0, 85011.0, 85012.0, 85013.0, 85014.0],       [85015.0, 85016.0, 85017.0, 85018.0, 85019.0, 85020.0],       [85021.0, 85022.0, 85023.0, 85024.0, 85025.0, 85026.0],       [85027.0, 85028.0, 85029.0, 85030.0, 85031.0, 85032.0],       [85033.0, 85034.0, 85035.0, 85036.0, 85037.0, 85038.0],       [85039.0, 85040.0, 85041.0, 85042.0, 85043.0, 85044.0],       [85045.0, 85046.0, 85047.0, 85048.0, 85049.0, 85050.0]],      [[85051.0, 85052.0, 85053.0, 85054.0, 85055.0, 85056.0],       [85057.0, 85058.0, 85059.0, 85060.0, 85061.0, 85062.0],       [85063.0, 85064.0, 85065.0, 85066.0, 85067.0, 85068.0],       [85069.0, 85070.0, 85071.0, 85072.0, 85073.0, 85074.0],       [85075.0, 85076.0, 85077.0, 85078.0, 85079.0, 85080.0],       [85081.0, 85082.0, 85083.0, 85084.0, 85085.0, 85086.0],       [85087.0, 85088.0, 85089.0, 85090.0, 85091.0, 85092.0]],      [[85093.0, 85094.0, 85095.0, 85096.0, 85097.0, 85098.0],       [85099.0, 85100.0, 85101.0, 85102.0, 85103.0, 85104.0],       [85105.0, 85106.0, 85107.0, 85108.0, 85109.0, 85110.0],       [85111.0, 85112.0, 85113.0, 85114.0, 85115.0, 85116.0],       [85117.0, 85118.0, 85119.0, 85120.0, 85121.0, 85122.0],       [85123.0, 85124.0, 85125.0, 85126.0, 85127.0, 85128.0],       [85129.0, 85130.0, 85131.0, 85132.0, 85133.0, 85134.0]],      [[85135.0, 85136.0, 85137.0, 85138.0, 85139.0, 85140.0],       [85141.0, 85142.0, 85143.0, 85144.0, 85145.0, 85146.0],       [85147.0, 85148.0, 85149.0, 85150.0, 85151.0, 85152.0],       [85153.0, 85154.0, 85155.0, 85156.0, 85157.0, 85158.0],       [85159.0, 85160.0, 85161.0, 85162.0, 85163.0, 85164.0],       [85165.0, 85166.0, 85167.0, 85168.0, 85169.0, 85170.0],       [85171.0, 85172.0, 85173.0, 85174.0, 85175.0, 85176.0]]],     [[[85177.0, 85178.0, 85179.0, 85180.0, 85181.0, 85182.0],       [85183.0, 85184.0, 85185.0, 85186.0, 85187.0, 85188.0],       [85189.0, 85190.0, 85191.0, 85192.0, 85193.0, 85194.0],       [85195.0, 85196.0, 85197.0, 85198.0, 85199.0, 85200.0],       [85201.0, 85202.0, 85203.0, 85204.0, 85205.0, 85206.0],       [85207.0, 85208.0, 85209.0, 85210.0, 85211.0, 85212.0],       [85213.0, 85214.0, 85215.0, 85216.0, 85217.0, 85218.0]],      [[85219.0, 85220.0, 85221.0, 85222.0, 85223.0, 85224.0],       [85225.0, 85226.0, 85227.0, 85228.0, 85229.0, 85230.0],       [85231.0, 85232.0, 85233.0, 85234.0, 85235.0, 85236.0],       [85237.0, 85238.0, 85239.0, 85240.0, 85241.0, 85242.0],       [85243.0, 85244.0, 85245.0, 85246.0, 85247.0, 85248.0],       [85249.0, 85250.0, 85251.0, 85252.0, 85253.0, 85254.0],       [85255.0, 85256.0, 85257.0, 85258.0, 85259.0, 85260.0]],      [[85261.0, 85262.0, 85263.0, 85264.0, 85265.0, 85266.0],       [85267.0, 85268.0, 85269.0, 85270.0, 85271.0, 85272.0],       [85273.0, 85274.0, 85275.0, 85276.0, 85277.0, 85278.0],       [85279.0, 85280.0, 85281.0, 85282.0, 85283.0, 85284.0],       [85285.0, 85286.0, 85287.0, 85288.0, 85289.0, 85290.0],       [85291.0, 85292.0, 85293.0, 85294.0, 85295.0, 85296.0],       [85297.0, 85298.0, 85299.0, 85300.0, 85301.0, 85302.0]],      [[85303.0, 85304.0, 85305.0, 85306.0, 85307.0, 85308.0],       [85309.0, 85310.0, 85311.0, 85312.0, 85313.0, 85314.0],       [85315.0, 85316.0, 85317.0, 85318.0, 85319.0, 85320.0],       [85321.0, 85322.0, 85323.0, 85324.0, 85325.0, 85326.0],       [85327.0, 85328.0, 85329.0, 85330.0, 85331.0, 85332.0],       [85333.0, 85334.0, 85335.0, 85336.0, 85337.0, 85338.0],       [85339.0, 85340.0, 85341.0, 85342.0, 85343.0, 85344.0]],      [[85345.0, 85346.0, 85347.0, 85348.0, 85349.0, 85350.0],       [85351.0, 85352.0, 85353.0, 85354.0, 85355.0, 85356.0],       [85357.0, 85358.0, 85359.0, 85360.0, 85361.0, 85362.0],       [85363.0, 85364.0, 85365.0, 85366.0, 85367.0, 85368.0],       [85369.0, 85370.0, 85371.0, 85372.0, 85373.0, 85374.0],       [85375.0, 85376.0, 85377.0, 85378.0, 85379.0, 85380.0],       [85381.0, 85382.0, 85383.0, 85384.0, 85385.0, 85386.0]],      [[85387.0, 85388.0, 85389.0, 85390.0, 85391.0, 85392.0],       [85393.0, 85394.0, 85395.0, 85396.0, 85397.0, 85398.0],       [85399.0, 85400.0, 85401.0, 85402.0, 85403.0, 85404.0],       [85405.0, 85406.0, 85407.0, 85408.0, 85409.0, 85410.0],       [85411.0, 85412.0, 85413.0, 85414.0, 85415.0, 85416.0],       [85417.0, 85418.0, 85419.0, 85420.0, 85421.0, 85422.0],       [85423.0, 85424.0, 85425.0, 85426.0, 85427.0, 85428.0]]],     [[[85429.0, 85430.0, 85431.0, 85432.0, 85433.0, 85434.0],       [85435.0, 85436.0, 85437.0, 85438.0, 85439.0, 85440.0],       [85441.0, 85442.0, 85443.0, 85444.0, 85445.0, 85446.0],       [85447.0, 85448.0, 85449.0, 85450.0, 85451.0, 85452.0],       [85453.0, 85454.0, 85455.0, 85456.0, 85457.0, 85458.0],       [85459.0, 85460.0, 85461.0, 85462.0, 85463.0, 85464.0],       [85465.0, 85466.0, 85467.0, 85468.0, 85469.0, 85470.0]],      [[85471.0, 85472.0, 85473.0, 85474.0, 85475.0, 85476.0],       [85477.0, 85478.0, 85479.0, 85480.0, 85481.0, 85482.0],       [85483.0, 85484.0, 85485.0, 85486.0, 85487.0, 85488.0],       [85489.0, 85490.0, 85491.0, 85492.0, 85493.0, 85494.0],       [85495.0, 85496.0, 85497.0, 85498.0, 85499.0, 85500.0],       [85501.0, 85502.0, 85503.0, 85504.0, 85505.0, 85506.0],       [85507.0, 85508.0, 85509.0, 85510.0, 85511.0, 85512.0]],      [[85513.0, 85514.0, 85515.0, 85516.0, 85517.0, 85518.0],       [85519.0, 85520.0, 85521.0, 85522.0, 85523.0, 85524.0],       [85525.0, 85526.0, 85527.0, 85528.0, 85529.0, 85530.0],       [85531.0, 85532.0, 85533.0, 85534.0, 85535.0, 85536.0],       [85537.0, 85538.0, 85539.0, 85540.0, 85541.0, 85542.0],       [85543.0, 85544.0, 85545.0, 85546.0, 85547.0, 85548.0],       [85549.0, 85550.0, 85551.0, 85552.0, 85553.0, 85554.0]],      [[85555.0, 85556.0, 85557.0, 85558.0, 85559.0, 85560.0],       [85561.0, 85562.0, 85563.0, 85564.0, 85565.0, 85566.0],       [85567.0, 85568.0, 85569.0, 85570.0, 85571.0, 85572.0],       [85573.0, 85574.0, 85575.0, 85576.0, 85577.0, 85578.0],       [85579.0, 85580.0, 85581.0, 85582.0, 85583.0, 85584.0],       [85585.0, 85586.0, 85587.0, 85588.0, 85589.0, 85590.0],       [85591.0, 85592.0, 85593.0, 85594.0, 85595.0, 85596.0]],      [[85597.0, 85598.0, 85599.0, 85600.0, 85601.0, 85602.0],       [85603.0, 85604.0, 85605.0, 85606.0, 85607.0, 85608.0],       [85609.0, 85610.0, 85611.0, 85612.0, 85613.0, 85614.0],       [85615.0, 85616.0, 85617.0, 85618.0, 85619.0, 85620.0],       [85621.0, 85622.0, 85623.0, 85624.0, 85625.0, 85626.0],       [85627.0, 85628.0, 85629.0, 85630.0, 85631.0, 85632.0],       [85633.0, 85634.0, 85635.0, 85636.0, 85637.0, 85638.0]],      [[85639.0, 85640.0, 85641.0, 85642.0, 85643.0, 85644.0],       [85645.0, 85646.0, 85647.0, 85648.0, 85649.0, 85650.0],       [85651.0, 85652.0, 85653.0, 85654.0, 85655.0, 85656.0],       [85657.0, 85658.0, 85659.0, 85660.0, 85661.0, 85662.0],       [85663.0, 85664.0, 85665.0, 85666.0, 85667.0, 85668.0],       [85669.0, 85670.0, 85671.0, 85672.0, 85673.0, 85674.0],       [85675.0, 85676.0, 85677.0, 85678.0, 85679.0, 85680.0]]]],    [[[[85681.0, 85682.0, 85683.0, 85684.0, 85685.0, 85686.0],       [85687.0, 85688.0, 85689.0, 85690.0, 85691.0, 85692.0],       [85693.0, 85694.0, 85695.0, 85696.0, 85697.0, 85698.0],       [85699.0, 85700.0, 85701.0, 85702.0, 85703.0, 85704.0],       [85705.0, 85706.0, 85707.0, 85708.0, 85709.0, 85710.0],       [85711.0, 85712.0, 85713.0, 85714.0, 85715.0, 85716.0],       [85717.0, 85718.0, 85719.0, 85720.0, 85721.0, 85722.0]],      [[85723.0, 85724.0, 85725.0, 85726.0, 85727.0, 85728.0],       [85729.0, 85730.0, 85731.0, 85732.0, 85733.0, 85734.0],       [85735.0, 85736.0, 85737.0, 85738.0, 85739.0, 85740.0],       [85741.0, 85742.0, 85743.0, 85744.0, 85745.0, 85746.0],       [85747.0, 85748.0, 85749.0, 85750.0, 85751.0, 85752.0],       [85753.0, 85754.0, 85755.0, 85756.0, 85757.0, 85758.0],       [85759.0, 85760.0, 85761.0, 85762.0, 85763.0, 85764.0]],      [[85765.0, 85766.0, 85767.0, 85768.0, 85769.0, 85770.0],       [85771.0, 85772.0, 85773.0, 85774.0, 85775.0, 85776.0],       [85777.0, 85778.0, 85779.0, 85780.0, 85781.0, 85782.0],       [85783.0, 85784.0, 85785.0, 85786.0, 85787.0, 85788.0],       [85789.0, 85790.0, 85791.0, 85792.0, 85793.0, 85794.0],       [85795.0, 85796.0, 85797.0, 85798.0, 85799.0, 85800.0],       [85801.0, 85802.0, 85803.0, 85804.0, 85805.0, 85806.0]],      [[85807.0, 85808.0, 85809.0, 85810.0, 85811.0, 85812.0],       [85813.0, 85814.0, 85815.0, 85816.0, 85817.0, 85818.0],       [85819.0, 85820.0, 85821.0, 85822.0, 85823.0, 85824.0],       [85825.0, 85826.0, 85827.0, 85828.0, 85829.0, 85830.0],       [85831.0, 85832.0, 85833.0, 85834.0, 85835.0, 85836.0],       [85837.0, 85838.0, 85839.0, 85840.0, 85841.0, 85842.0],       [85843.0, 85844.0, 85845.0, 85846.0, 85847.0, 85848.0]],      [[85849.0, 85850.0, 85851.0, 85852.0, 85853.0, 85854.0],       [85855.0, 85856.0, 85857.0, 85858.0, 85859.0, 85860.0],       [85861.0, 85862.0, 85863.0, 85864.0, 85865.0, 85866.0],       [85867.0, 85868.0, 85869.0, 85870.0, 85871.0, 85872.0],       [85873.0, 85874.0, 85875.0, 85876.0, 85877.0, 85878.0],       [85879.0, 85880.0, 85881.0, 85882.0, 85883.0, 85884.0],       [85885.0, 85886.0, 85887.0, 85888.0, 85889.0, 85890.0]],      [[85891.0, 85892.0, 85893.0, 85894.0, 85895.0, 85896.0],       [85897.0, 85898.0, 85899.0, 85900.0, 85901.0, 85902.0],       [85903.0, 85904.0, 85905.0, 85906.0, 85907.0, 85908.0],       [85909.0, 85910.0, 85911.0, 85912.0, 85913.0, 85914.0],       [85915.0, 85916.0, 85917.0, 85918.0, 85919.0, 85920.0],       [85921.0, 85922.0, 85923.0, 85924.0, 85925.0, 85926.0],       [85927.0, 85928.0, 85929.0, 85930.0, 85931.0, 85932.0]]],     [[[85933.0, 85934.0, 85935.0, 85936.0, 85937.0, 85938.0],       [85939.0, 85940.0, 85941.0, 85942.0, 85943.0, 85944.0],       [85945.0, 85946.0, 85947.0, 85948.0, 85949.0, 85950.0],       [85951.0, 85952.0, 85953.0, 85954.0, 85955.0, 85956.0],       [85957.0, 85958.0, 85959.0, 85960.0, 85961.0, 85962.0],       [85963.0, 85964.0, 85965.0, 85966.0, 85967.0, 85968.0],       [85969.0, 85970.0, 85971.0, 85972.0, 85973.0, 85974.0]],      [[85975.0, 85976.0, 85977.0, 85978.0, 85979.0, 85980.0],       [85981.0, 85982.0, 85983.0, 85984.0, 85985.0, 85986.0],       [85987.0, 85988.0, 85989.0, 85990.0, 85991.0, 85992.0],       [85993.0, 85994.0, 85995.0, 85996.0, 85997.0, 85998.0],       [85999.0, 86000.0, 86001.0, 86002.0, 86003.0, 86004.0],       [86005.0, 86006.0, 86007.0, 86008.0, 86009.0, 86010.0],       [86011.0, 86012.0, 86013.0, 86014.0, 86015.0, 86016.0]],      [[86017.0, 86018.0, 86019.0, 86020.0, 86021.0, 86022.0],       [86023.0, 86024.0, 86025.0, 86026.0, 86027.0, 86028.0],       [86029.0, 86030.0, 86031.0, 86032.0, 86033.0, 86034.0],       [86035.0, 86036.0, 86037.0, 86038.0, 86039.0, 86040.0],       [86041.0, 86042.0, 86043.0, 86044.0, 86045.0, 86046.0],       [86047.0, 86048.0, 86049.0, 86050.0, 86051.0, 86052.0],       [86053.0, 86054.0, 86055.0, 86056.0, 86057.0, 86058.0]],      [[86059.0, 86060.0, 86061.0, 86062.0, 86063.0, 86064.0],       [86065.0, 86066.0, 86067.0, 86068.0, 86069.0, 86070.0],       [86071.0, 86072.0, 86073.0, 86074.0, 86075.0, 86076.0],       [86077.0, 86078.0, 86079.0, 86080.0, 86081.0, 86082.0],       [86083.0, 86084.0, 86085.0, 86086.0, 86087.0, 86088.0],       [86089.0, 86090.0, 86091.0, 86092.0, 86093.0, 86094.0],       [86095.0, 86096.0, 86097.0, 86098.0, 86099.0, 86100.0]],      [[86101.0, 86102.0, 86103.0, 86104.0, 86105.0, 86106.0],       [86107.0, 86108.0, 86109.0, 86110.0, 86111.0, 86112.0],       [86113.0, 86114.0, 86115.0, 86116.0, 86117.0, 86118.0],       [86119.0, 86120.0, 86121.0, 86122.0, 86123.0, 86124.0],       [86125.0, 86126.0, 86127.0, 86128.0, 86129.0, 86130.0],       [86131.0, 86132.0, 86133.0, 86134.0, 86135.0, 86136.0],       [86137.0, 86138.0, 86139.0, 86140.0, 86141.0, 86142.0]],      [[86143.0, 86144.0, 86145.0, 86146.0, 86147.0, 86148.0],       [86149.0, 86150.0, 86151.0, 86152.0, 86153.0, 86154.0],       [86155.0, 86156.0, 86157.0, 86158.0, 86159.0, 86160.0],       [86161.0, 86162.0, 86163.0, 86164.0, 86165.0, 86166.0],       [86167.0, 86168.0, 86169.0, 86170.0, 86171.0, 86172.0],       [86173.0, 86174.0, 86175.0, 86176.0, 86177.0, 86178.0],       [86179.0, 86180.0, 86181.0, 86182.0, 86183.0, 86184.0]]],     [[[86185.0, 86186.0, 86187.0, 86188.0, 86189.0, 86190.0],       [86191.0, 86192.0, 86193.0, 86194.0, 86195.0, 86196.0],       [86197.0, 86198.0, 86199.0, 86200.0, 86201.0, 86202.0],       [86203.0, 86204.0, 86205.0, 86206.0, 86207.0, 86208.0],       [86209.0, 86210.0, 86211.0, 86212.0, 86213.0, 86214.0],       [86215.0, 86216.0, 86217.0, 86218.0, 86219.0, 86220.0],       [86221.0, 86222.0, 86223.0, 86224.0, 86225.0, 86226.0]],      [[86227.0, 86228.0, 86229.0, 86230.0, 86231.0, 86232.0],       [86233.0, 86234.0, 86235.0, 86236.0, 86237.0, 86238.0],       [86239.0, 86240.0, 86241.0, 86242.0, 86243.0, 86244.0],       [86245.0, 86246.0, 86247.0, 86248.0, 86249.0, 86250.0],       [86251.0, 86252.0, 86253.0, 86254.0, 86255.0, 86256.0],       [86257.0, 86258.0, 86259.0, 86260.0, 86261.0, 86262.0],       [86263.0, 86264.0, 86265.0, 86266.0, 86267.0, 86268.0]],      [[86269.0, 86270.0, 86271.0, 86272.0, 86273.0, 86274.0],       [86275.0, 86276.0, 86277.0, 86278.0, 86279.0, 86280.0],       [86281.0, 86282.0, 86283.0, 86284.0, 86285.0, 86286.0],       [86287.0, 86288.0, 86289.0, 86290.0, 86291.0, 86292.0],       [86293.0, 86294.0, 86295.0, 86296.0, 86297.0, 86298.0],       [86299.0, 86300.0, 86301.0, 86302.0, 86303.0, 86304.0],       [86305.0, 86306.0, 86307.0, 86308.0, 86309.0, 86310.0]],      [[86311.0, 86312.0, 86313.0, 86314.0, 86315.0, 86316.0],       [86317.0, 86318.0, 86319.0, 86320.0, 86321.0, 86322.0],       [86323.0, 86324.0, 86325.0, 86326.0, 86327.0, 86328.0],       [86329.0, 86330.0, 86331.0, 86332.0, 86333.0, 86334.0],       [86335.0, 86336.0, 86337.0, 86338.0, 86339.0, 86340.0],       [86341.0, 86342.0, 86343.0, 86344.0, 86345.0, 86346.0],       [86347.0, 86348.0, 86349.0, 86350.0, 86351.0, 86352.0]],      [[86353.0, 86354.0, 86355.0, 86356.0, 86357.0, 86358.0],       [86359.0, 86360.0, 86361.0, 86362.0, 86363.0, 86364.0],       [86365.0, 86366.0, 86367.0, 86368.0, 86369.0, 86370.0],       [86371.0, 86372.0, 86373.0, 86374.0, 86375.0, 86376.0],       [86377.0, 86378.0, 86379.0, 86380.0, 86381.0, 86382.0],       [86383.0, 86384.0, 86385.0, 86386.0, 86387.0, 86388.0],       [86389.0, 86390.0, 86391.0, 86392.0, 86393.0, 86394.0]],      [[86395.0, 86396.0, 86397.0, 86398.0, 86399.0, 86400.0],       [86401.0, 86402.0, 86403.0, 86404.0, 86405.0, 86406.0],       [86407.0, 86408.0, 86409.0, 86410.0, 86411.0, 86412.0],       [86413.0, 86414.0, 86415.0, 86416.0, 86417.0, 86418.0],       [86419.0, 86420.0, 86421.0, 86422.0, 86423.0, 86424.0],       [86425.0, 86426.0, 86427.0, 86428.0, 86429.0, 86430.0],       [86431.0, 86432.0, 86433.0, 86434.0, 86435.0, 86436.0]]],     [[[86437.0, 86438.0, 86439.0, 86440.0, 86441.0, 86442.0],       [86443.0, 86444.0, 86445.0, 86446.0, 86447.0, 86448.0],       [86449.0, 86450.0, 86451.0, 86452.0, 86453.0, 86454.0],       [86455.0, 86456.0, 86457.0, 86458.0, 86459.0, 86460.0],       [86461.0, 86462.0, 86463.0, 86464.0, 86465.0, 86466.0],       [86467.0, 86468.0, 86469.0, 86470.0, 86471.0, 86472.0],       [86473.0, 86474.0, 86475.0, 86476.0, 86477.0, 86478.0]],      [[86479.0, 86480.0, 86481.0, 86482.0, 86483.0, 86484.0],       [86485.0, 86486.0, 86487.0, 86488.0, 86489.0, 86490.0],       [86491.0, 86492.0, 86493.0, 86494.0, 86495.0, 86496.0],       [86497.0, 86498.0, 86499.0, 86500.0, 86501.0, 86502.0],       [86503.0, 86504.0, 86505.0, 86506.0, 86507.0, 86508.0],       [86509.0, 86510.0, 86511.0, 86512.0, 86513.0, 86514.0],       [86515.0, 86516.0, 86517.0, 86518.0, 86519.0, 86520.0]],      [[86521.0, 86522.0, 86523.0, 86524.0, 86525.0, 86526.0],       [86527.0, 86528.0, 86529.0, 86530.0, 86531.0, 86532.0],       [86533.0, 86534.0, 86535.0, 86536.0, 86537.0, 86538.0],       [86539.0, 86540.0, 86541.0, 86542.0, 86543.0, 86544.0],       [86545.0, 86546.0, 86547.0, 86548.0, 86549.0, 86550.0],       [86551.0, 86552.0, 86553.0, 86554.0, 86555.0, 86556.0],       [86557.0, 86558.0, 86559.0, 86560.0, 86561.0, 86562.0]],      [[86563.0, 86564.0, 86565.0, 86566.0, 86567.0, 86568.0],       [86569.0, 86570.0, 86571.0, 86572.0, 86573.0, 86574.0],       [86575.0, 86576.0, 86577.0, 86578.0, 86579.0, 86580.0],       [86581.0, 86582.0, 86583.0, 86584.0, 86585.0, 86586.0],       [86587.0, 86588.0, 86589.0, 86590.0, 86591.0, 86592.0],       [86593.0, 86594.0, 86595.0, 86596.0, 86597.0, 86598.0],       [86599.0, 86600.0, 86601.0, 86602.0, 86603.0, 86604.0]],      [[86605.0, 86606.0, 86607.0, 86608.0, 86609.0, 86610.0],       [86611.0, 86612.0, 86613.0, 86614.0, 86615.0, 86616.0],       [86617.0, 86618.0, 86619.0, 86620.0, 86621.0, 86622.0],       [86623.0, 86624.0, 86625.0, 86626.0, 86627.0, 86628.0],       [86629.0, 86630.0, 86631.0, 86632.0, 86633.0, 86634.0],       [86635.0, 86636.0, 86637.0, 86638.0, 86639.0, 86640.0],       [86641.0, 86642.0, 86643.0, 86644.0, 86645.0, 86646.0]],      [[86647.0, 86648.0, 86649.0, 86650.0, 86651.0, 86652.0],       [86653.0, 86654.0, 86655.0, 86656.0, 86657.0, 86658.0],       [86659.0, 86660.0, 86661.0, 86662.0, 86663.0, 86664.0],       [86665.0, 86666.0, 86667.0, 86668.0, 86669.0, 86670.0],       [86671.0, 86672.0, 86673.0, 86674.0, 86675.0, 86676.0],       [86677.0, 86678.0, 86679.0, 86680.0, 86681.0, 86682.0],       [86683.0, 86684.0, 86685.0, 86686.0, 86687.0, 86688.0]]]],    [[[[86689.0, 86690.0, 86691.0, 86692.0, 86693.0, 86694.0],       [86695.0, 86696.0, 86697.0, 86698.0, 86699.0, 86700.0],       [86701.0, 86702.0, 86703.0, 86704.0, 86705.0, 86706.0],       [86707.0, 86708.0, 86709.0, 86710.0, 86711.0, 86712.0],       [86713.0, 86714.0, 86715.0, 86716.0, 86717.0, 86718.0],       [86719.0, 86720.0, 86721.0, 86722.0, 86723.0, 86724.0],       [86725.0, 86726.0, 86727.0, 86728.0, 86729.0, 86730.0]],      [[86731.0, 86732.0, 86733.0, 86734.0, 86735.0, 86736.0],       [86737.0, 86738.0, 86739.0, 86740.0, 86741.0, 86742.0],       [86743.0, 86744.0, 86745.0, 86746.0, 86747.0, 86748.0],       [86749.0, 86750.0, 86751.0, 86752.0, 86753.0, 86754.0],       [86755.0, 86756.0, 86757.0, 86758.0, 86759.0, 86760.0],       [86761.0, 86762.0, 86763.0, 86764.0, 86765.0, 86766.0],       [86767.0, 86768.0, 86769.0, 86770.0, 86771.0, 86772.0]],      [[86773.0, 86774.0, 86775.0, 86776.0, 86777.0, 86778.0],       [86779.0, 86780.0, 86781.0, 86782.0, 86783.0, 86784.0],       [86785.0, 86786.0, 86787.0, 86788.0, 86789.0, 86790.0],       [86791.0, 86792.0, 86793.0, 86794.0, 86795.0, 86796.0],       [86797.0, 86798.0, 86799.0, 86800.0, 86801.0, 86802.0],       [86803.0, 86804.0, 86805.0, 86806.0, 86807.0, 86808.0],       [86809.0, 86810.0, 86811.0, 86812.0, 86813.0, 86814.0]],      [[86815.0, 86816.0, 86817.0, 86818.0, 86819.0, 86820.0],       [86821.0, 86822.0, 86823.0, 86824.0, 86825.0, 86826.0],       [86827.0, 86828.0, 86829.0, 86830.0, 86831.0, 86832.0],       [86833.0, 86834.0, 86835.0, 86836.0, 86837.0, 86838.0],       [86839.0, 86840.0, 86841.0, 86842.0, 86843.0, 86844.0],       [86845.0, 86846.0, 86847.0, 86848.0, 86849.0, 86850.0],       [86851.0, 86852.0, 86853.0, 86854.0, 86855.0, 86856.0]],      [[86857.0, 86858.0, 86859.0, 86860.0, 86861.0, 86862.0],       [86863.0, 86864.0, 86865.0, 86866.0, 86867.0, 86868.0],       [86869.0, 86870.0, 86871.0, 86872.0, 86873.0, 86874.0],       [86875.0, 86876.0, 86877.0, 86878.0, 86879.0, 86880.0],       [86881.0, 86882.0, 86883.0, 86884.0, 86885.0, 86886.0],       [86887.0, 86888.0, 86889.0, 86890.0, 86891.0, 86892.0],       [86893.0, 86894.0, 86895.0, 86896.0, 86897.0, 86898.0]],      [[86899.0, 86900.0, 86901.0, 86902.0, 86903.0, 86904.0],       [86905.0, 86906.0, 86907.0, 86908.0, 86909.0, 86910.0],       [86911.0, 86912.0, 86913.0, 86914.0, 86915.0, 86916.0],       [86917.0, 86918.0, 86919.0, 86920.0, 86921.0, 86922.0],       [86923.0, 86924.0, 86925.0, 86926.0, 86927.0, 86928.0],       [86929.0, 86930.0, 86931.0, 86932.0, 86933.0, 86934.0],       [86935.0, 86936.0, 86937.0, 86938.0, 86939.0, 86940.0]]],     [[[86941.0, 86942.0, 86943.0, 86944.0, 86945.0, 86946.0],       [86947.0, 86948.0, 86949.0, 86950.0, 86951.0, 86952.0],       [86953.0, 86954.0, 86955.0, 86956.0, 86957.0, 86958.0],       [86959.0, 86960.0, 86961.0, 86962.0, 86963.0, 86964.0],       [86965.0, 86966.0, 86967.0, 86968.0, 86969.0, 86970.0],       [86971.0, 86972.0, 86973.0, 86974.0, 86975.0, 86976.0],       [86977.0, 86978.0, 86979.0, 86980.0, 86981.0, 86982.0]],      [[86983.0, 86984.0, 86985.0, 86986.0, 86987.0, 86988.0],       [86989.0, 86990.0, 86991.0, 86992.0, 86993.0, 86994.0],       [86995.0, 86996.0, 86997.0, 86998.0, 86999.0, 87000.0],       [87001.0, 87002.0, 87003.0, 87004.0, 87005.0, 87006.0],       [87007.0, 87008.0, 87009.0, 87010.0, 87011.0, 87012.0],       [87013.0, 87014.0, 87015.0, 87016.0, 87017.0, 87018.0],       [87019.0, 87020.0, 87021.0, 87022.0, 87023.0, 87024.0]],      [[87025.0, 87026.0, 87027.0, 87028.0, 87029.0, 87030.0],       [87031.0, 87032.0, 87033.0, 87034.0, 87035.0, 87036.0],       [87037.0, 87038.0, 87039.0, 87040.0, 87041.0, 87042.0],       [87043.0, 87044.0, 87045.0, 87046.0, 87047.0, 87048.0],       [87049.0, 87050.0, 87051.0, 87052.0, 87053.0, 87054.0],       [87055.0, 87056.0, 87057.0, 87058.0, 87059.0, 87060.0],       [87061.0, 87062.0, 87063.0, 87064.0, 87065.0, 87066.0]],      [[87067.0, 87068.0, 87069.0, 87070.0, 87071.0, 87072.0],       [87073.0, 87074.0, 87075.0, 87076.0, 87077.0, 87078.0],       [87079.0, 87080.0, 87081.0, 87082.0, 87083.0, 87084.0],       [87085.0, 87086.0, 87087.0, 87088.0, 87089.0, 87090.0],       [87091.0, 87092.0, 87093.0, 87094.0, 87095.0, 87096.0],       [87097.0, 87098.0, 87099.0, 87100.0, 87101.0, 87102.0],       [87103.0, 87104.0, 87105.0, 87106.0, 87107.0, 87108.0]],      [[87109.0, 87110.0, 87111.0, 87112.0, 87113.0, 87114.0],       [87115.0, 87116.0, 87117.0, 87118.0, 87119.0, 87120.0],       [87121.0, 87122.0, 87123.0, 87124.0, 87125.0, 87126.0],       [87127.0, 87128.0, 87129.0, 87130.0, 87131.0, 87132.0],       [87133.0, 87134.0, 87135.0, 87136.0, 87137.0, 87138.0],       [87139.0, 87140.0, 87141.0, 87142.0, 87143.0, 87144.0],       [87145.0, 87146.0, 87147.0, 87148.0, 87149.0, 87150.0]],      [[87151.0, 87152.0, 87153.0, 87154.0, 87155.0, 87156.0],       [87157.0, 87158.0, 87159.0, 87160.0, 87161.0, 87162.0],       [87163.0, 87164.0, 87165.0, 87166.0, 87167.0, 87168.0],       [87169.0, 87170.0, 87171.0, 87172.0, 87173.0, 87174.0],       [87175.0, 87176.0, 87177.0, 87178.0, 87179.0, 87180.0],       [87181.0, 87182.0, 87183.0, 87184.0, 87185.0, 87186.0],       [87187.0, 87188.0, 87189.0, 87190.0, 87191.0, 87192.0]]],     [[[87193.0, 87194.0, 87195.0, 87196.0, 87197.0, 87198.0],       [87199.0, 87200.0, 87201.0, 87202.0, 87203.0, 87204.0],       [87205.0, 87206.0, 87207.0, 87208.0, 87209.0, 87210.0],       [87211.0, 87212.0, 87213.0, 87214.0, 87215.0, 87216.0],       [87217.0, 87218.0, 87219.0, 87220.0, 87221.0, 87222.0],       [87223.0, 87224.0, 87225.0, 87226.0, 87227.0, 87228.0],       [87229.0, 87230.0, 87231.0, 87232.0, 87233.0, 87234.0]],      [[87235.0, 87236.0, 87237.0, 87238.0, 87239.0, 87240.0],       [87241.0, 87242.0, 87243.0, 87244.0, 87245.0, 87246.0],       [87247.0, 87248.0, 87249.0, 87250.0, 87251.0, 87252.0],       [87253.0, 87254.0, 87255.0, 87256.0, 87257.0, 87258.0],       [87259.0, 87260.0, 87261.0, 87262.0, 87263.0, 87264.0],       [87265.0, 87266.0, 87267.0, 87268.0, 87269.0, 87270.0],       [87271.0, 87272.0, 87273.0, 87274.0, 87275.0, 87276.0]],      [[87277.0, 87278.0, 87279.0, 87280.0, 87281.0, 87282.0],       [87283.0, 87284.0, 87285.0, 87286.0, 87287.0, 87288.0],       [87289.0, 87290.0, 87291.0, 87292.0, 87293.0, 87294.0],       [87295.0, 87296.0, 87297.0, 87298.0, 87299.0, 87300.0],       [87301.0, 87302.0, 87303.0, 87304.0, 87305.0, 87306.0],       [87307.0, 87308.0, 87309.0, 87310.0, 87311.0, 87312.0],       [87313.0, 87314.0, 87315.0, 87316.0, 87317.0, 87318.0]],      [[87319.0, 87320.0, 87321.0, 87322.0, 87323.0, 87324.0],       [87325.0, 87326.0, 87327.0, 87328.0, 87329.0, 87330.0],       [87331.0, 87332.0, 87333.0, 87334.0, 87335.0, 87336.0],       [87337.0, 87338.0, 87339.0, 87340.0, 87341.0, 87342.0],       [87343.0, 87344.0, 87345.0, 87346.0, 87347.0, 87348.0],       [87349.0, 87350.0, 87351.0, 87352.0, 87353.0, 87354.0],       [87355.0, 87356.0, 87357.0, 87358.0, 87359.0, 87360.0]],      [[87361.0, 87362.0, 87363.0, 87364.0, 87365.0, 87366.0],       [87367.0, 87368.0, 87369.0, 87370.0, 87371.0, 87372.0],       [87373.0, 87374.0, 87375.0, 87376.0, 87377.0, 87378.0],       [87379.0, 87380.0, 87381.0, 87382.0, 87383.0, 87384.0],       [87385.0, 87386.0, 87387.0, 87388.0, 87389.0, 87390.0],       [87391.0, 87392.0, 87393.0, 87394.0, 87395.0, 87396.0],       [87397.0, 87398.0, 87399.0, 87400.0, 87401.0, 87402.0]],      [[87403.0, 87404.0, 87405.0, 87406.0, 87407.0, 87408.0],       [87409.0, 87410.0, 87411.0, 87412.0, 87413.0, 87414.0],       [87415.0, 87416.0, 87417.0, 87418.0, 87419.0, 87420.0],       [87421.0, 87422.0, 87423.0, 87424.0, 87425.0, 87426.0],       [87427.0, 87428.0, 87429.0, 87430.0, 87431.0, 87432.0],       [87433.0, 87434.0, 87435.0, 87436.0, 87437.0, 87438.0],       [87439.0, 87440.0, 87441.0, 87442.0, 87443.0, 87444.0]]],     [[[87445.0, 87446.0, 87447.0, 87448.0, 87449.0, 87450.0],       [87451.0, 87452.0, 87453.0, 87454.0, 87455.0, 87456.0],       [87457.0, 87458.0, 87459.0, 87460.0, 87461.0, 87462.0],       [87463.0, 87464.0, 87465.0, 87466.0, 87467.0, 87468.0],       [87469.0, 87470.0, 87471.0, 87472.0, 87473.0, 87474.0],       [87475.0, 87476.0, 87477.0, 87478.0, 87479.0, 87480.0],       [87481.0, 87482.0, 87483.0, 87484.0, 87485.0, 87486.0]],      [[87487.0, 87488.0, 87489.0, 87490.0, 87491.0, 87492.0],       [87493.0, 87494.0, 87495.0, 87496.0, 87497.0, 87498.0],       [87499.0, 87500.0, 87501.0, 87502.0, 87503.0, 87504.0],       [87505.0, 87506.0, 87507.0, 87508.0, 87509.0, 87510.0],       [87511.0, 87512.0, 87513.0, 87514.0, 87515.0, 87516.0],       [87517.0, 87518.0, 87519.0, 87520.0, 87521.0, 87522.0],       [87523.0, 87524.0, 87525.0, 87526.0, 87527.0, 87528.0]],      [[87529.0, 87530.0, 87531.0, 87532.0, 87533.0, 87534.0],       [87535.0, 87536.0, 87537.0, 87538.0, 87539.0, 87540.0],       [87541.0, 87542.0, 87543.0, 87544.0, 87545.0, 87546.0],       [87547.0, 87548.0, 87549.0, 87550.0, 87551.0, 87552.0],       [87553.0, 87554.0, 87555.0, 87556.0, 87557.0, 87558.0],       [87559.0, 87560.0, 87561.0, 87562.0, 87563.0, 87564.0],       [87565.0, 87566.0, 87567.0, 87568.0, 87569.0, 87570.0]],      [[87571.0, 87572.0, 87573.0, 87574.0, 87575.0, 87576.0],       [87577.0, 87578.0, 87579.0, 87580.0, 87581.0, 87582.0],       [87583.0, 87584.0, 87585.0, 87586.0, 87587.0, 87588.0],       [87589.0, 87590.0, 87591.0, 87592.0, 87593.0, 87594.0],       [87595.0, 87596.0, 87597.0, 87598.0, 87599.0, 87600.0],       [87601.0, 87602.0, 87603.0, 87604.0, 87605.0, 87606.0],       [87607.0, 87608.0, 87609.0, 87610.0, 87611.0, 87612.0]],      [[87613.0, 87614.0, 87615.0, 87616.0, 87617.0, 87618.0],       [87619.0, 87620.0, 87621.0, 87622.0, 87623.0, 87624.0],       [87625.0, 87626.0, 87627.0, 87628.0, 87629.0, 87630.0],       [87631.0, 87632.0, 87633.0, 87634.0, 87635.0, 87636.0],       [87637.0, 87638.0, 87639.0, 87640.0, 87641.0, 87642.0],       [87643.0, 87644.0, 87645.0, 87646.0, 87647.0, 87648.0],       [87649.0, 87650.0, 87651.0, 87652.0, 87653.0, 87654.0]],      [[87655.0, 87656.0, 87657.0, 87658.0, 87659.0, 87660.0],       [87661.0, 87662.0, 87663.0, 87664.0, 87665.0, 87666.0],       [87667.0, 87668.0, 87669.0, 87670.0, 87671.0, 87672.0],       [87673.0, 87674.0, 87675.0, 87676.0, 87677.0, 87678.0],       [87679.0, 87680.0, 87681.0, 87682.0, 87683.0, 87684.0],       [87685.0, 87686.0, 87687.0, 87688.0, 87689.0, 87690.0],       [87691.0, 87692.0, 87693.0, 87694.0, 87695.0, 87696.0]]]],    [[[[87697.0, 87698.0, 87699.0, 87700.0, 87701.0, 87702.0],       [87703.0, 87704.0, 87705.0, 87706.0, 87707.0, 87708.0],       [87709.0, 87710.0, 87711.0, 87712.0, 87713.0, 87714.0],       [87715.0, 87716.0, 87717.0, 87718.0, 87719.0, 87720.0],       [87721.0, 87722.0, 87723.0, 87724.0, 87725.0, 87726.0],       [87727.0, 87728.0, 87729.0, 87730.0, 87731.0, 87732.0],       [87733.0, 87734.0, 87735.0, 87736.0, 87737.0, 87738.0]],      [[87739.0, 87740.0, 87741.0, 87742.0, 87743.0, 87744.0],       [87745.0, 87746.0, 87747.0, 87748.0, 87749.0, 87750.0],       [87751.0, 87752.0, 87753.0, 87754.0, 87755.0, 87756.0],       [87757.0, 87758.0, 87759.0, 87760.0, 87761.0, 87762.0],       [87763.0, 87764.0, 87765.0, 87766.0, 87767.0, 87768.0],       [87769.0, 87770.0, 87771.0, 87772.0, 87773.0, 87774.0],       [87775.0, 87776.0, 87777.0, 87778.0, 87779.0, 87780.0]],      [[87781.0, 87782.0, 87783.0, 87784.0, 87785.0, 87786.0],       [87787.0, 87788.0, 87789.0, 87790.0, 87791.0, 87792.0],       [87793.0, 87794.0, 87795.0, 87796.0, 87797.0, 87798.0],       [87799.0, 87800.0, 87801.0, 87802.0, 87803.0, 87804.0],       [87805.0, 87806.0, 87807.0, 87808.0, 87809.0, 87810.0],       [87811.0, 87812.0, 87813.0, 87814.0, 87815.0, 87816.0],       [87817.0, 87818.0, 87819.0, 87820.0, 87821.0, 87822.0]],      [[87823.0, 87824.0, 87825.0, 87826.0, 87827.0, 87828.0],       [87829.0, 87830.0, 87831.0, 87832.0, 87833.0, 87834.0],       [87835.0, 87836.0, 87837.0, 87838.0, 87839.0, 87840.0],       [87841.0, 87842.0, 87843.0, 87844.0, 87845.0, 87846.0],       [87847.0, 87848.0, 87849.0, 87850.0, 87851.0, 87852.0],       [87853.0, 87854.0, 87855.0, 87856.0, 87857.0, 87858.0],       [87859.0, 87860.0, 87861.0, 87862.0, 87863.0, 87864.0]],      [[87865.0, 87866.0, 87867.0, 87868.0, 87869.0, 87870.0],       [87871.0, 87872.0, 87873.0, 87874.0, 87875.0, 87876.0],       [87877.0, 87878.0, 87879.0, 87880.0, 87881.0, 87882.0],       [87883.0, 87884.0, 87885.0, 87886.0, 87887.0, 87888.0],       [87889.0, 87890.0, 87891.0, 87892.0, 87893.0, 87894.0],       [87895.0, 87896.0, 87897.0, 87898.0, 87899.0, 87900.0],       [87901.0, 87902.0, 87903.0, 87904.0, 87905.0, 87906.0]],      [[87907.0, 87908.0, 87909.0, 87910.0, 87911.0, 87912.0],       [87913.0, 87914.0, 87915.0, 87916.0, 87917.0, 87918.0],       [87919.0, 87920.0, 87921.0, 87922.0, 87923.0, 87924.0],       [87925.0, 87926.0, 87927.0, 87928.0, 87929.0, 87930.0],       [87931.0, 87932.0, 87933.0, 87934.0, 87935.0, 87936.0],       [87937.0, 87938.0, 87939.0, 87940.0, 87941.0, 87942.0],       [87943.0, 87944.0, 87945.0, 87946.0, 87947.0, 87948.0]]],     [[[87949.0, 87950.0, 87951.0, 87952.0, 87953.0, 87954.0],       [87955.0, 87956.0, 87957.0, 87958.0, 87959.0, 87960.0],       [87961.0, 87962.0, 87963.0, 87964.0, 87965.0, 87966.0],       [87967.0, 87968.0, 87969.0, 87970.0, 87971.0, 87972.0],       [87973.0, 87974.0, 87975.0, 87976.0, 87977.0, 87978.0],       [87979.0, 87980.0, 87981.0, 87982.0, 87983.0, 87984.0],       [87985.0, 87986.0, 87987.0, 87988.0, 87989.0, 87990.0]],      [[87991.0, 87992.0, 87993.0, 87994.0, 87995.0, 87996.0],       [87997.0, 87998.0, 87999.0, 88000.0, 88001.0, 88002.0],       [88003.0, 88004.0, 88005.0, 88006.0, 88007.0, 88008.0],       [88009.0, 88010.0, 88011.0, 88012.0, 88013.0, 88014.0],       [88015.0, 88016.0, 88017.0, 88018.0, 88019.0, 88020.0],       [88021.0, 88022.0, 88023.0, 88024.0, 88025.0, 88026.0],       [88027.0, 88028.0, 88029.0, 88030.0, 88031.0, 88032.0]],      [[88033.0, 88034.0, 88035.0, 88036.0, 88037.0, 88038.0],       [88039.0, 88040.0, 88041.0, 88042.0, 88043.0, 88044.0],       [88045.0, 88046.0, 88047.0, 88048.0, 88049.0, 88050.0],       [88051.0, 88052.0, 88053.0, 88054.0, 88055.0, 88056.0],       [88057.0, 88058.0, 88059.0, 88060.0, 88061.0, 88062.0],       [88063.0, 88064.0, 88065.0, 88066.0, 88067.0, 88068.0],       [88069.0, 88070.0, 88071.0, 88072.0, 88073.0, 88074.0]],      [[88075.0, 88076.0, 88077.0, 88078.0, 88079.0, 88080.0],       [88081.0, 88082.0, 88083.0, 88084.0, 88085.0, 88086.0],       [88087.0, 88088.0, 88089.0, 88090.0, 88091.0, 88092.0],       [88093.0, 88094.0, 88095.0, 88096.0, 88097.0, 88098.0],       [88099.0, 88100.0, 88101.0, 88102.0, 88103.0, 88104.0],       [88105.0, 88106.0, 88107.0, 88108.0, 88109.0, 88110.0],       [88111.0, 88112.0, 88113.0, 88114.0, 88115.0, 88116.0]],      [[88117.0, 88118.0, 88119.0, 88120.0, 88121.0, 88122.0],       [88123.0, 88124.0, 88125.0, 88126.0, 88127.0, 88128.0],       [88129.0, 88130.0, 88131.0, 88132.0, 88133.0, 88134.0],       [88135.0, 88136.0, 88137.0, 88138.0, 88139.0, 88140.0],       [88141.0, 88142.0, 88143.0, 88144.0, 88145.0, 88146.0],       [88147.0, 88148.0, 88149.0, 88150.0, 88151.0, 88152.0],       [88153.0, 88154.0, 88155.0, 88156.0, 88157.0, 88158.0]],      [[88159.0, 88160.0, 88161.0, 88162.0, 88163.0, 88164.0],       [88165.0, 88166.0, 88167.0, 88168.0, 88169.0, 88170.0],       [88171.0, 88172.0, 88173.0, 88174.0, 88175.0, 88176.0],       [88177.0, 88178.0, 88179.0, 88180.0, 88181.0, 88182.0],       [88183.0, 88184.0, 88185.0, 88186.0, 88187.0, 88188.0],       [88189.0, 88190.0, 88191.0, 88192.0, 88193.0, 88194.0],       [88195.0, 88196.0, 88197.0, 88198.0, 88199.0, 88200.0]]],     [[[88201.0, 88202.0, 88203.0, 88204.0, 88205.0, 88206.0],       [88207.0, 88208.0, 88209.0, 88210.0, 88211.0, 88212.0],       [88213.0, 88214.0, 88215.0, 88216.0, 88217.0, 88218.0],       [88219.0, 88220.0, 88221.0, 88222.0, 88223.0, 88224.0],       [88225.0, 88226.0, 88227.0, 88228.0, 88229.0, 88230.0],       [88231.0, 88232.0, 88233.0, 88234.0, 88235.0, 88236.0],       [88237.0, 88238.0, 88239.0, 88240.0, 88241.0, 88242.0]],      [[88243.0, 88244.0, 88245.0, 88246.0, 88247.0, 88248.0],       [88249.0, 88250.0, 88251.0, 88252.0, 88253.0, 88254.0],       [88255.0, 88256.0, 88257.0, 88258.0, 88259.0, 88260.0],       [88261.0, 88262.0, 88263.0, 88264.0, 88265.0, 88266.0],       [88267.0, 88268.0, 88269.0, 88270.0, 88271.0, 88272.0],       [88273.0, 88274.0, 88275.0, 88276.0, 88277.0, 88278.0],       [88279.0, 88280.0, 88281.0, 88282.0, 88283.0, 88284.0]],      [[88285.0, 88286.0, 88287.0, 88288.0, 88289.0, 88290.0],       [88291.0, 88292.0, 88293.0, 88294.0, 88295.0, 88296.0],       [88297.0, 88298.0, 88299.0, 88300.0, 88301.0, 88302.0],       [88303.0, 88304.0, 88305.0, 88306.0, 88307.0, 88308.0],       [88309.0, 88310.0, 88311.0, 88312.0, 88313.0, 88314.0],       [88315.0, 88316.0, 88317.0, 88318.0, 88319.0, 88320.0],       [88321.0, 88322.0, 88323.0, 88324.0, 88325.0, 88326.0]],      [[88327.0, 88328.0, 88329.0, 88330.0, 88331.0, 88332.0],       [88333.0, 88334.0, 88335.0, 88336.0, 88337.0, 88338.0],       [88339.0, 88340.0, 88341.0, 88342.0, 88343.0, 88344.0],       [88345.0, 88346.0, 88347.0, 88348.0, 88349.0, 88350.0],       [88351.0, 88352.0, 88353.0, 88354.0, 88355.0, 88356.0],       [88357.0, 88358.0, 88359.0, 88360.0, 88361.0, 88362.0],       [88363.0, 88364.0, 88365.0, 88366.0, 88367.0, 88368.0]],      [[88369.0, 88370.0, 88371.0, 88372.0, 88373.0, 88374.0],       [88375.0, 88376.0, 88377.0, 88378.0, 88379.0, 88380.0],       [88381.0, 88382.0, 88383.0, 88384.0, 88385.0, 88386.0],       [88387.0, 88388.0, 88389.0, 88390.0, 88391.0, 88392.0],       [88393.0, 88394.0, 88395.0, 88396.0, 88397.0, 88398.0],       [88399.0, 88400.0, 88401.0, 88402.0, 88403.0, 88404.0],       [88405.0, 88406.0, 88407.0, 88408.0, 88409.0, 88410.0]],      [[88411.0, 88412.0, 88413.0, 88414.0, 88415.0, 88416.0],       [88417.0, 88418.0, 88419.0, 88420.0, 88421.0, 88422.0],       [88423.0, 88424.0, 88425.0, 88426.0, 88427.0, 88428.0],       [88429.0, 88430.0, 88431.0, 88432.0, 88433.0, 88434.0],       [88435.0, 88436.0, 88437.0, 88438.0, 88439.0, 88440.0],       [88441.0, 88442.0, 88443.0, 88444.0, 88445.0, 88446.0],       [88447.0, 88448.0, 88449.0, 88450.0, 88451.0, 88452.0]]],     [[[88453.0, 88454.0, 88455.0, 88456.0, 88457.0, 88458.0],       [88459.0, 88460.0, 88461.0, 88462.0, 88463.0, 88464.0],       [88465.0, 88466.0, 88467.0, 88468.0, 88469.0, 88470.0],       [88471.0, 88472.0, 88473.0, 88474.0, 88475.0, 88476.0],       [88477.0, 88478.0, 88479.0, 88480.0, 88481.0, 88482.0],       [88483.0, 88484.0, 88485.0, 88486.0, 88487.0, 88488.0],       [88489.0, 88490.0, 88491.0, 88492.0, 88493.0, 88494.0]],      [[88495.0, 88496.0, 88497.0, 88498.0, 88499.0, 88500.0],       [88501.0, 88502.0, 88503.0, 88504.0, 88505.0, 88506.0],       [88507.0, 88508.0, 88509.0, 88510.0, 88511.0, 88512.0],       [88513.0, 88514.0, 88515.0, 88516.0, 88517.0, 88518.0],       [88519.0, 88520.0, 88521.0, 88522.0, 88523.0, 88524.0],       [88525.0, 88526.0, 88527.0, 88528.0, 88529.0, 88530.0],       [88531.0, 88532.0, 88533.0, 88534.0, 88535.0, 88536.0]],      [[88537.0, 88538.0, 88539.0, 88540.0, 88541.0, 88542.0],       [88543.0, 88544.0, 88545.0, 88546.0, 88547.0, 88548.0],       [88549.0, 88550.0, 88551.0, 88552.0, 88553.0, 88554.0],       [88555.0, 88556.0, 88557.0, 88558.0, 88559.0, 88560.0],       [88561.0, 88562.0, 88563.0, 88564.0, 88565.0, 88566.0],       [88567.0, 88568.0, 88569.0, 88570.0, 88571.0, 88572.0],       [88573.0, 88574.0, 88575.0, 88576.0, 88577.0, 88578.0]],      [[88579.0, 88580.0, 88581.0, 88582.0, 88583.0, 88584.0],       [88585.0, 88586.0, 88587.0, 88588.0, 88589.0, 88590.0],       [88591.0, 88592.0, 88593.0, 88594.0, 88595.0, 88596.0],       [88597.0, 88598.0, 88599.0, 88600.0, 88601.0, 88602.0],       [88603.0, 88604.0, 88605.0, 88606.0, 88607.0, 88608.0],       [88609.0, 88610.0, 88611.0, 88612.0, 88613.0, 88614.0],       [88615.0, 88616.0, 88617.0, 88618.0, 88619.0, 88620.0]],      [[88621.0, 88622.0, 88623.0, 88624.0, 88625.0, 88626.0],       [88627.0, 88628.0, 88629.0, 88630.0, 88631.0, 88632.0],       [88633.0, 88634.0, 88635.0, 88636.0, 88637.0, 88638.0],       [88639.0, 88640.0, 88641.0, 88642.0, 88643.0, 88644.0],       [88645.0, 88646.0, 88647.0, 88648.0, 88649.0, 88650.0],       [88651.0, 88652.0, 88653.0, 88654.0, 88655.0, 88656.0],       [88657.0, 88658.0, 88659.0, 88660.0, 88661.0, 88662.0]],      [[88663.0, 88664.0, 88665.0, 88666.0, 88667.0, 88668.0],       [88669.0, 88670.0, 88671.0, 88672.0, 88673.0, 88674.0],       [88675.0, 88676.0, 88677.0, 88678.0, 88679.0, 88680.0],       [88681.0, 88682.0, 88683.0, 88684.0, 88685.0, 88686.0],       [88687.0, 88688.0, 88689.0, 88690.0, 88691.0, 88692.0],       [88693.0, 88694.0, 88695.0, 88696.0, 88697.0, 88698.0],       [88699.0, 88700.0, 88701.0, 88702.0, 88703.0, 88704.0]]]],    [[[[88705.0, 88706.0, 88707.0, 88708.0, 88709.0, 88710.0],       [88711.0, 88712.0, 88713.0, 88714.0, 88715.0, 88716.0],       [88717.0, 88718.0, 88719.0, 88720.0, 88721.0, 88722.0],       [88723.0, 88724.0, 88725.0, 88726.0, 88727.0, 88728.0],       [88729.0, 88730.0, 88731.0, 88732.0, 88733.0, 88734.0],       [88735.0, 88736.0, 88737.0, 88738.0, 88739.0, 88740.0],       [88741.0, 88742.0, 88743.0, 88744.0, 88745.0, 88746.0]],      [[88747.0, 88748.0, 88749.0, 88750.0, 88751.0, 88752.0],       [88753.0, 88754.0, 88755.0, 88756.0, 88757.0, 88758.0],       [88759.0, 88760.0, 88761.0, 88762.0, 88763.0, 88764.0],       [88765.0, 88766.0, 88767.0, 88768.0, 88769.0, 88770.0],       [88771.0, 88772.0, 88773.0, 88774.0, 88775.0, 88776.0],       [88777.0, 88778.0, 88779.0, 88780.0, 88781.0, 88782.0],       [88783.0, 88784.0, 88785.0, 88786.0, 88787.0, 88788.0]],      [[88789.0, 88790.0, 88791.0, 88792.0, 88793.0, 88794.0],       [88795.0, 88796.0, 88797.0, 88798.0, 88799.0, 88800.0],       [88801.0, 88802.0, 88803.0, 88804.0, 88805.0, 88806.0],       [88807.0, 88808.0, 88809.0, 88810.0, 88811.0, 88812.0],       [88813.0, 88814.0, 88815.0, 88816.0, 88817.0, 88818.0],       [88819.0, 88820.0, 88821.0, 88822.0, 88823.0, 88824.0],       [88825.0, 88826.0, 88827.0, 88828.0, 88829.0, 88830.0]],      [[88831.0, 88832.0, 88833.0, 88834.0, 88835.0, 88836.0],       [88837.0, 88838.0, 88839.0, 88840.0, 88841.0, 88842.0],       [88843.0, 88844.0, 88845.0, 88846.0, 88847.0, 88848.0],       [88849.0, 88850.0, 88851.0, 88852.0, 88853.0, 88854.0],       [88855.0, 88856.0, 88857.0, 88858.0, 88859.0, 88860.0],       [88861.0, 88862.0, 88863.0, 88864.0, 88865.0, 88866.0],       [88867.0, 88868.0, 88869.0, 88870.0, 88871.0, 88872.0]],      [[88873.0, 88874.0, 88875.0, 88876.0, 88877.0, 88878.0],       [88879.0, 88880.0, 88881.0, 88882.0, 88883.0, 88884.0],       [88885.0, 88886.0, 88887.0, 88888.0, 88889.0, 88890.0],       [88891.0, 88892.0, 88893.0, 88894.0, 88895.0, 88896.0],       [88897.0, 88898.0, 88899.0, 88900.0, 88901.0, 88902.0],       [88903.0, 88904.0, 88905.0, 88906.0, 88907.0, 88908.0],       [88909.0, 88910.0, 88911.0, 88912.0, 88913.0, 88914.0]],      [[88915.0, 88916.0, 88917.0, 88918.0, 88919.0, 88920.0],       [88921.0, 88922.0, 88923.0, 88924.0, 88925.0, 88926.0],       [88927.0, 88928.0, 88929.0, 88930.0, 88931.0, 88932.0],       [88933.0, 88934.0, 88935.0, 88936.0, 88937.0, 88938.0],       [88939.0, 88940.0, 88941.0, 88942.0, 88943.0, 88944.0],       [88945.0, 88946.0, 88947.0, 88948.0, 88949.0, 88950.0],       [88951.0, 88952.0, 88953.0, 88954.0, 88955.0, 88956.0]]],     [[[88957.0, 88958.0, 88959.0, 88960.0, 88961.0, 88962.0],       [88963.0, 88964.0, 88965.0, 88966.0, 88967.0, 88968.0],       [88969.0, 88970.0, 88971.0, 88972.0, 88973.0, 88974.0],       [88975.0, 88976.0, 88977.0, 88978.0, 88979.0, 88980.0],       [88981.0, 88982.0, 88983.0, 88984.0, 88985.0, 88986.0],       [88987.0, 88988.0, 88989.0, 88990.0, 88991.0, 88992.0],       [88993.0, 88994.0, 88995.0, 88996.0, 88997.0, 88998.0]],      [[88999.0, 89000.0, 89001.0, 89002.0, 89003.0, 89004.0],       [89005.0, 89006.0, 89007.0, 89008.0, 89009.0, 89010.0],       [89011.0, 89012.0, 89013.0, 89014.0, 89015.0, 89016.0],       [89017.0, 89018.0, 89019.0, 89020.0, 89021.0, 89022.0],       [89023.0, 89024.0, 89025.0, 89026.0, 89027.0, 89028.0],       [89029.0, 89030.0, 89031.0, 89032.0, 89033.0, 89034.0],       [89035.0, 89036.0, 89037.0, 89038.0, 89039.0, 89040.0]],      [[89041.0, 89042.0, 89043.0, 89044.0, 89045.0, 89046.0],       [89047.0, 89048.0, 89049.0, 89050.0, 89051.0, 89052.0],       [89053.0, 89054.0, 89055.0, 89056.0, 89057.0, 89058.0],       [89059.0, 89060.0, 89061.0, 89062.0, 89063.0, 89064.0],       [89065.0, 89066.0, 89067.0, 89068.0, 89069.0, 89070.0],       [89071.0, 89072.0, 89073.0, 89074.0, 89075.0, 89076.0],       [89077.0, 89078.0, 89079.0, 89080.0, 89081.0, 89082.0]],      [[89083.0, 89084.0, 89085.0, 89086.0, 89087.0, 89088.0],       [89089.0, 89090.0, 89091.0, 89092.0, 89093.0, 89094.0],       [89095.0, 89096.0, 89097.0, 89098.0, 89099.0, 89100.0],       [89101.0, 89102.0, 89103.0, 89104.0, 89105.0, 89106.0],       [89107.0, 89108.0, 89109.0, 89110.0, 89111.0, 89112.0],       [89113.0, 89114.0, 89115.0, 89116.0, 89117.0, 89118.0],       [89119.0, 89120.0, 89121.0, 89122.0, 89123.0, 89124.0]],      [[89125.0, 89126.0, 89127.0, 89128.0, 89129.0, 89130.0],       [89131.0, 89132.0, 89133.0, 89134.0, 89135.0, 89136.0],       [89137.0, 89138.0, 89139.0, 89140.0, 89141.0, 89142.0],       [89143.0, 89144.0, 89145.0, 89146.0, 89147.0, 89148.0],       [89149.0, 89150.0, 89151.0, 89152.0, 89153.0, 89154.0],       [89155.0, 89156.0, 89157.0, 89158.0, 89159.0, 89160.0],       [89161.0, 89162.0, 89163.0, 89164.0, 89165.0, 89166.0]],      [[89167.0, 89168.0, 89169.0, 89170.0, 89171.0, 89172.0],       [89173.0, 89174.0, 89175.0, 89176.0, 89177.0, 89178.0],       [89179.0, 89180.0, 89181.0, 89182.0, 89183.0, 89184.0],       [89185.0, 89186.0, 89187.0, 89188.0, 89189.0, 89190.0],       [89191.0, 89192.0, 89193.0, 89194.0, 89195.0, 89196.0],       [89197.0, 89198.0, 89199.0, 89200.0, 89201.0, 89202.0],       [89203.0, 89204.0, 89205.0, 89206.0, 89207.0, 89208.0]]],     [[[89209.0, 89210.0, 89211.0, 89212.0, 89213.0, 89214.0],       [89215.0, 89216.0, 89217.0, 89218.0, 89219.0, 89220.0],       [89221.0, 89222.0, 89223.0, 89224.0, 89225.0, 89226.0],       [89227.0, 89228.0, 89229.0, 89230.0, 89231.0, 89232.0],       [89233.0, 89234.0, 89235.0, 89236.0, 89237.0, 89238.0],       [89239.0, 89240.0, 89241.0, 89242.0, 89243.0, 89244.0],       [89245.0, 89246.0, 89247.0, 89248.0, 89249.0, 89250.0]],      [[89251.0, 89252.0, 89253.0, 89254.0, 89255.0, 89256.0],       [89257.0, 89258.0, 89259.0, 89260.0, 89261.0, 89262.0],       [89263.0, 89264.0, 89265.0, 89266.0, 89267.0, 89268.0],       [89269.0, 89270.0, 89271.0, 89272.0, 89273.0, 89274.0],       [89275.0, 89276.0, 89277.0, 89278.0, 89279.0, 89280.0],       [89281.0, 89282.0, 89283.0, 89284.0, 89285.0, 89286.0],       [89287.0, 89288.0, 89289.0, 89290.0, 89291.0, 89292.0]],      [[89293.0, 89294.0, 89295.0, 89296.0, 89297.0, 89298.0],       [89299.0, 89300.0, 89301.0, 89302.0, 89303.0, 89304.0],       [89305.0, 89306.0, 89307.0, 89308.0, 89309.0, 89310.0],       [89311.0, 89312.0, 89313.0, 89314.0, 89315.0, 89316.0],       [89317.0, 89318.0, 89319.0, 89320.0, 89321.0, 89322.0],       [89323.0, 89324.0, 89325.0, 89326.0, 89327.0, 89328.0],       [89329.0, 89330.0, 89331.0, 89332.0, 89333.0, 89334.0]],      [[89335.0, 89336.0, 89337.0, 89338.0, 89339.0, 89340.0],       [89341.0, 89342.0, 89343.0, 89344.0, 89345.0, 89346.0],       [89347.0, 89348.0, 89349.0, 89350.0, 89351.0, 89352.0],       [89353.0, 89354.0, 89355.0, 89356.0, 89357.0, 89358.0],       [89359.0, 89360.0, 89361.0, 89362.0, 89363.0, 89364.0],       [89365.0, 89366.0, 89367.0, 89368.0, 89369.0, 89370.0],       [89371.0, 89372.0, 89373.0, 89374.0, 89375.0, 89376.0]],      [[89377.0, 89378.0, 89379.0, 89380.0, 89381.0, 89382.0],       [89383.0, 89384.0, 89385.0, 89386.0, 89387.0, 89388.0],       [89389.0, 89390.0, 89391.0, 89392.0, 89393.0, 89394.0],       [89395.0, 89396.0, 89397.0, 89398.0, 89399.0, 89400.0],       [89401.0, 89402.0, 89403.0, 89404.0, 89405.0, 89406.0],       [89407.0, 89408.0, 89409.0, 89410.0, 89411.0, 89412.0],       [89413.0, 89414.0, 89415.0, 89416.0, 89417.0, 89418.0]],      [[89419.0, 89420.0, 89421.0, 89422.0, 89423.0, 89424.0],       [89425.0, 89426.0, 89427.0, 89428.0, 89429.0, 89430.0],       [89431.0, 89432.0, 89433.0, 89434.0, 89435.0, 89436.0],       [89437.0, 89438.0, 89439.0, 89440.0, 89441.0, 89442.0],       [89443.0, 89444.0, 89445.0, 89446.0, 89447.0, 89448.0],       [89449.0, 89450.0, 89451.0, 89452.0, 89453.0, 89454.0],       [89455.0, 89456.0, 89457.0, 89458.0, 89459.0, 89460.0]]],     [[[89461.0, 89462.0, 89463.0, 89464.0, 89465.0, 89466.0],       [89467.0, 89468.0, 89469.0, 89470.0, 89471.0, 89472.0],       [89473.0, 89474.0, 89475.0, 89476.0, 89477.0, 89478.0],       [89479.0, 89480.0, 89481.0, 89482.0, 89483.0, 89484.0],       [89485.0, 89486.0, 89487.0, 89488.0, 89489.0, 89490.0],       [89491.0, 89492.0, 89493.0, 89494.0, 89495.0, 89496.0],       [89497.0, 89498.0, 89499.0, 89500.0, 89501.0, 89502.0]],      [[89503.0, 89504.0, 89505.0, 89506.0, 89507.0, 89508.0],       [89509.0, 89510.0, 89511.0, 89512.0, 89513.0, 89514.0],       [89515.0, 89516.0, 89517.0, 89518.0, 89519.0, 89520.0],       [89521.0, 89522.0, 89523.0, 89524.0, 89525.0, 89526.0],       [89527.0, 89528.0, 89529.0, 89530.0, 89531.0, 89532.0],       [89533.0, 89534.0, 89535.0, 89536.0, 89537.0, 89538.0],       [89539.0, 89540.0, 89541.0, 89542.0, 89543.0, 89544.0]],      [[89545.0, 89546.0, 89547.0, 89548.0, 89549.0, 89550.0],       [89551.0, 89552.0, 89553.0, 89554.0, 89555.0, 89556.0],       [89557.0, 89558.0, 89559.0, 89560.0, 89561.0, 89562.0],       [89563.0, 89564.0, 89565.0, 89566.0, 89567.0, 89568.0],       [89569.0, 89570.0, 89571.0, 89572.0, 89573.0, 89574.0],       [89575.0, 89576.0, 89577.0, 89578.0, 89579.0, 89580.0],       [89581.0, 89582.0, 89583.0, 89584.0, 89585.0, 89586.0]],      [[89587.0, 89588.0, 89589.0, 89590.0, 89591.0, 89592.0],       [89593.0, 89594.0, 89595.0, 89596.0, 89597.0, 89598.0],       [89599.0, 89600.0, 89601.0, 89602.0, 89603.0, 89604.0],       [89605.0, 89606.0, 89607.0, 89608.0, 89609.0, 89610.0],       [89611.0, 89612.0, 89613.0, 89614.0, 89615.0, 89616.0],       [89617.0, 89618.0, 89619.0, 89620.0, 89621.0, 89622.0],       [89623.0, 89624.0, 89625.0, 89626.0, 89627.0, 89628.0]],      [[89629.0, 89630.0, 89631.0, 89632.0, 89633.0, 89634.0],       [89635.0, 89636.0, 89637.0, 89638.0, 89639.0, 89640.0],       [89641.0, 89642.0, 89643.0, 89644.0, 89645.0, 89646.0],       [89647.0, 89648.0, 89649.0, 89650.0, 89651.0, 89652.0],       [89653.0, 89654.0, 89655.0, 89656.0, 89657.0, 89658.0],       [89659.0, 89660.0, 89661.0, 89662.0, 89663.0, 89664.0],       [89665.0, 89666.0, 89667.0, 89668.0, 89669.0, 89670.0]],      [[89671.0, 89672.0, 89673.0, 89674.0, 89675.0, 89676.0],       [89677.0, 89678.0, 89679.0, 89680.0, 89681.0, 89682.0],       [89683.0, 89684.0, 89685.0, 89686.0, 89687.0, 89688.0],       [89689.0, 89690.0, 89691.0, 89692.0, 89693.0, 89694.0],       [89695.0, 89696.0, 89697.0, 89698.0, 89699.0, 89700.0],       [89701.0, 89702.0, 89703.0, 89704.0, 89705.0, 89706.0],       [89707.0, 89708.0, 89709.0, 89710.0, 89711.0, 89712.0]]]],    [[[[89713.0, 89714.0, 89715.0, 89716.0, 89717.0, 89718.0],       [89719.0, 89720.0, 89721.0, 89722.0, 89723.0, 89724.0],       [89725.0, 89726.0, 89727.0, 89728.0, 89729.0, 89730.0],       [89731.0, 89732.0, 89733.0, 89734.0, 89735.0, 89736.0],       [89737.0, 89738.0, 89739.0, 89740.0, 89741.0, 89742.0],       [89743.0, 89744.0, 89745.0, 89746.0, 89747.0, 89748.0],       [89749.0, 89750.0, 89751.0, 89752.0, 89753.0, 89754.0]],      [[89755.0, 89756.0, 89757.0, 89758.0, 89759.0, 89760.0],       [89761.0, 89762.0, 89763.0, 89764.0, 89765.0, 89766.0],       [89767.0, 89768.0, 89769.0, 89770.0, 89771.0, 89772.0],       [89773.0, 89774.0, 89775.0, 89776.0, 89777.0, 89778.0],       [89779.0, 89780.0, 89781.0, 89782.0, 89783.0, 89784.0],       [89785.0, 89786.0, 89787.0, 89788.0, 89789.0, 89790.0],       [89791.0, 89792.0, 89793.0, 89794.0, 89795.0, 89796.0]],      [[89797.0, 89798.0, 89799.0, 89800.0, 89801.0, 89802.0],       [89803.0, 89804.0, 89805.0, 89806.0, 89807.0, 89808.0],       [89809.0, 89810.0, 89811.0, 89812.0, 89813.0, 89814.0],       [89815.0, 89816.0, 89817.0, 89818.0, 89819.0, 89820.0],       [89821.0, 89822.0, 89823.0, 89824.0, 89825.0, 89826.0],       [89827.0, 89828.0, 89829.0, 89830.0, 89831.0, 89832.0],       [89833.0, 89834.0, 89835.0, 89836.0, 89837.0, 89838.0]],      [[89839.0, 89840.0, 89841.0, 89842.0, 89843.0, 89844.0],       [89845.0, 89846.0, 89847.0, 89848.0, 89849.0, 89850.0],       [89851.0, 89852.0, 89853.0, 89854.0, 89855.0, 89856.0],       [89857.0, 89858.0, 89859.0, 89860.0, 89861.0, 89862.0],       [89863.0, 89864.0, 89865.0, 89866.0, 89867.0, 89868.0],       [89869.0, 89870.0, 89871.0, 89872.0, 89873.0, 89874.0],       [89875.0, 89876.0, 89877.0, 89878.0, 89879.0, 89880.0]],      [[89881.0, 89882.0, 89883.0, 89884.0, 89885.0, 89886.0],       [89887.0, 89888.0, 89889.0, 89890.0, 89891.0, 89892.0],       [89893.0, 89894.0, 89895.0, 89896.0, 89897.0, 89898.0],       [89899.0, 89900.0, 89901.0, 89902.0, 89903.0, 89904.0],       [89905.0, 89906.0, 89907.0, 89908.0, 89909.0, 89910.0],       [89911.0, 89912.0, 89913.0, 89914.0, 89915.0, 89916.0],       [89917.0, 89918.0, 89919.0, 89920.0, 89921.0, 89922.0]],      [[89923.0, 89924.0, 89925.0, 89926.0, 89927.0, 89928.0],       [89929.0, 89930.0, 89931.0, 89932.0, 89933.0, 89934.0],       [89935.0, 89936.0, 89937.0, 89938.0, 89939.0, 89940.0],       [89941.0, 89942.0, 89943.0, 89944.0, 89945.0, 89946.0],       [89947.0, 89948.0, 89949.0, 89950.0, 89951.0, 89952.0],       [89953.0, 89954.0, 89955.0, 89956.0, 89957.0, 89958.0],       [89959.0, 89960.0, 89961.0, 89962.0, 89963.0, 89964.0]]],     [[[89965.0, 89966.0, 89967.0, 89968.0, 89969.0, 89970.0],       [89971.0, 89972.0, 89973.0, 89974.0, 89975.0, 89976.0],       [89977.0, 89978.0, 89979.0, 89980.0, 89981.0, 89982.0],       [89983.0, 89984.0, 89985.0, 89986.0, 89987.0, 89988.0],       [89989.0, 89990.0, 89991.0, 89992.0, 89993.0, 89994.0],       [89995.0, 89996.0, 89997.0, 89998.0, 89999.0, 90000.0],       [90001.0, 90002.0, 90003.0, 90004.0, 90005.0, 90006.0]],      [[90007.0, 90008.0, 90009.0, 90010.0, 90011.0, 90012.0],       [90013.0, 90014.0, 90015.0, 90016.0, 90017.0, 90018.0],       [90019.0, 90020.0, 90021.0, 90022.0, 90023.0, 90024.0],       [90025.0, 90026.0, 90027.0, 90028.0, 90029.0, 90030.0],       [90031.0, 90032.0, 90033.0, 90034.0, 90035.0, 90036.0],       [90037.0, 90038.0, 90039.0, 90040.0, 90041.0, 90042.0],       [90043.0, 90044.0, 90045.0, 90046.0, 90047.0, 90048.0]],      [[90049.0, 90050.0, 90051.0, 90052.0, 90053.0, 90054.0],       [90055.0, 90056.0, 90057.0, 90058.0, 90059.0, 90060.0],       [90061.0, 90062.0, 90063.0, 90064.0, 90065.0, 90066.0],       [90067.0, 90068.0, 90069.0, 90070.0, 90071.0, 90072.0],       [90073.0, 90074.0, 90075.0, 90076.0, 90077.0, 90078.0],       [90079.0, 90080.0, 90081.0, 90082.0, 90083.0, 90084.0],       [90085.0, 90086.0, 90087.0, 90088.0, 90089.0, 90090.0]],      [[90091.0, 90092.0, 90093.0, 90094.0, 90095.0, 90096.0],       [90097.0, 90098.0, 90099.0, 90100.0, 90101.0, 90102.0],       [90103.0, 90104.0, 90105.0, 90106.0, 90107.0, 90108.0],       [90109.0, 90110.0, 90111.0, 90112.0, 90113.0, 90114.0],       [90115.0, 90116.0, 90117.0, 90118.0, 90119.0, 90120.0],       [90121.0, 90122.0, 90123.0, 90124.0, 90125.0, 90126.0],       [90127.0, 90128.0, 90129.0, 90130.0, 90131.0, 90132.0]],      [[90133.0, 90134.0, 90135.0, 90136.0, 90137.0, 90138.0],       [90139.0, 90140.0, 90141.0, 90142.0, 90143.0, 90144.0],       [90145.0, 90146.0, 90147.0, 90148.0, 90149.0, 90150.0],       [90151.0, 90152.0, 90153.0, 90154.0, 90155.0, 90156.0],       [90157.0, 90158.0, 90159.0, 90160.0, 90161.0, 90162.0],       [90163.0, 90164.0, 90165.0, 90166.0, 90167.0, 90168.0],       [90169.0, 90170.0, 90171.0, 90172.0, 90173.0, 90174.0]],      [[90175.0, 90176.0, 90177.0, 90178.0, 90179.0, 90180.0],       [90181.0, 90182.0, 90183.0, 90184.0, 90185.0, 90186.0],       [90187.0, 90188.0, 90189.0, 90190.0, 90191.0, 90192.0],       [90193.0, 90194.0, 90195.0, 90196.0, 90197.0, 90198.0],       [90199.0, 90200.0, 90201.0, 90202.0, 90203.0, 90204.0],       [90205.0, 90206.0, 90207.0, 90208.0, 90209.0, 90210.0],       [90211.0, 90212.0, 90213.0, 90214.0, 90215.0, 90216.0]]],     [[[90217.0, 90218.0, 90219.0, 90220.0, 90221.0, 90222.0],       [90223.0, 90224.0, 90225.0, 90226.0, 90227.0, 90228.0],       [90229.0, 90230.0, 90231.0, 90232.0, 90233.0, 90234.0],       [90235.0, 90236.0, 90237.0, 90238.0, 90239.0, 90240.0],       [90241.0, 90242.0, 90243.0, 90244.0, 90245.0, 90246.0],       [90247.0, 90248.0, 90249.0, 90250.0, 90251.0, 90252.0],       [90253.0, 90254.0, 90255.0, 90256.0, 90257.0, 90258.0]],      [[90259.0, 90260.0, 90261.0, 90262.0, 90263.0, 90264.0],       [90265.0, 90266.0, 90267.0, 90268.0, 90269.0, 90270.0],       [90271.0, 90272.0, 90273.0, 90274.0, 90275.0, 90276.0],       [90277.0, 90278.0, 90279.0, 90280.0, 90281.0, 90282.0],       [90283.0, 90284.0, 90285.0, 90286.0, 90287.0, 90288.0],       [90289.0, 90290.0, 90291.0, 90292.0, 90293.0, 90294.0],       [90295.0, 90296.0, 90297.0, 90298.0, 90299.0, 90300.0]],      [[90301.0, 90302.0, 90303.0, 90304.0, 90305.0, 90306.0],       [90307.0, 90308.0, 90309.0, 90310.0, 90311.0, 90312.0],       [90313.0, 90314.0, 90315.0, 90316.0, 90317.0, 90318.0],       [90319.0, 90320.0, 90321.0, 90322.0, 90323.0, 90324.0],       [90325.0, 90326.0, 90327.0, 90328.0, 90329.0, 90330.0],       [90331.0, 90332.0, 90333.0, 90334.0, 90335.0, 90336.0],       [90337.0, 90338.0, 90339.0, 90340.0, 90341.0, 90342.0]],      [[90343.0, 90344.0, 90345.0, 90346.0, 90347.0, 90348.0],       [90349.0, 90350.0, 90351.0, 90352.0, 90353.0, 90354.0],       [90355.0, 90356.0, 90357.0, 90358.0, 90359.0, 90360.0],       [90361.0, 90362.0, 90363.0, 90364.0, 90365.0, 90366.0],       [90367.0, 90368.0, 90369.0, 90370.0, 90371.0, 90372.0],       [90373.0, 90374.0, 90375.0, 90376.0, 90377.0, 90378.0],       [90379.0, 90380.0, 90381.0, 90382.0, 90383.0, 90384.0]],      [[90385.0, 90386.0, 90387.0, 90388.0, 90389.0, 90390.0],       [90391.0, 90392.0, 90393.0, 90394.0, 90395.0, 90396.0],       [90397.0, 90398.0, 90399.0, 90400.0, 90401.0, 90402.0],       [90403.0, 90404.0, 90405.0, 90406.0, 90407.0, 90408.0],       [90409.0, 90410.0, 90411.0, 90412.0, 90413.0, 90414.0],       [90415.0, 90416.0, 90417.0, 90418.0, 90419.0, 90420.0],       [90421.0, 90422.0, 90423.0, 90424.0, 90425.0, 90426.0]],      [[90427.0, 90428.0, 90429.0, 90430.0, 90431.0, 90432.0],       [90433.0, 90434.0, 90435.0, 90436.0, 90437.0, 90438.0],       [90439.0, 90440.0, 90441.0, 90442.0, 90443.0, 90444.0],       [90445.0, 90446.0, 90447.0, 90448.0, 90449.0, 90450.0],       [90451.0, 90452.0, 90453.0, 90454.0, 90455.0, 90456.0],       [90457.0, 90458.0, 90459.0, 90460.0, 90461.0, 90462.0],       [90463.0, 90464.0, 90465.0, 90466.0, 90467.0, 90468.0]]],     [[[90469.0, 90470.0, 90471.0, 90472.0, 90473.0, 90474.0],       [90475.0, 90476.0, 90477.0, 90478.0, 90479.0, 90480.0],       [90481.0, 90482.0, 90483.0, 90484.0, 90485.0, 90486.0],       [90487.0, 90488.0, 90489.0, 90490.0, 90491.0, 90492.0],       [90493.0, 90494.0, 90495.0, 90496.0, 90497.0, 90498.0],       [90499.0, 90500.0, 90501.0, 90502.0, 90503.0, 90504.0],       [90505.0, 90506.0, 90507.0, 90508.0, 90509.0, 90510.0]],      [[90511.0, 90512.0, 90513.0, 90514.0, 90515.0, 90516.0],       [90517.0, 90518.0, 90519.0, 90520.0, 90521.0, 90522.0],       [90523.0, 90524.0, 90525.0, 90526.0, 90527.0, 90528.0],       [90529.0, 90530.0, 90531.0, 90532.0, 90533.0, 90534.0],       [90535.0, 90536.0, 90537.0, 90538.0, 90539.0, 90540.0],       [90541.0, 90542.0, 90543.0, 90544.0, 90545.0, 90546.0],       [90547.0, 90548.0, 90549.0, 90550.0, 90551.0, 90552.0]],      [[90553.0, 90554.0, 90555.0, 90556.0, 90557.0, 90558.0],       [90559.0, 90560.0, 90561.0, 90562.0, 90563.0, 90564.0],       [90565.0, 90566.0, 90567.0, 90568.0, 90569.0, 90570.0],       [90571.0, 90572.0, 90573.0, 90574.0, 90575.0, 90576.0],       [90577.0, 90578.0, 90579.0, 90580.0, 90581.0, 90582.0],       [90583.0, 90584.0, 90585.0, 90586.0, 90587.0, 90588.0],       [90589.0, 90590.0, 90591.0, 90592.0, 90593.0, 90594.0]],      [[90595.0, 90596.0, 90597.0, 90598.0, 90599.0, 90600.0],       [90601.0, 90602.0, 90603.0, 90604.0, 90605.0, 90606.0],       [90607.0, 90608.0, 90609.0, 90610.0, 90611.0, 90612.0],       [90613.0, 90614.0, 90615.0, 90616.0, 90617.0, 90618.0],       [90619.0, 90620.0, 90621.0, 90622.0, 90623.0, 90624.0],       [90625.0, 90626.0, 90627.0, 90628.0, 90629.0, 90630.0],       [90631.0, 90632.0, 90633.0, 90634.0, 90635.0, 90636.0]],      [[90637.0, 90638.0, 90639.0, 90640.0, 90641.0, 90642.0],       [90643.0, 90644.0, 90645.0, 90646.0, 90647.0, 90648.0],       [90649.0, 90650.0, 90651.0, 90652.0, 90653.0, 90654.0],       [90655.0, 90656.0, 90657.0, 90658.0, 90659.0, 90660.0],       [90661.0, 90662.0, 90663.0, 90664.0, 90665.0, 90666.0],       [90667.0, 90668.0, 90669.0, 90670.0, 90671.0, 90672.0],       [90673.0, 90674.0, 90675.0, 90676.0, 90677.0, 90678.0]],      [[90679.0, 90680.0, 90681.0, 90682.0, 90683.0, 90684.0],       [90685.0, 90686.0, 90687.0, 90688.0, 90689.0, 90690.0],       [90691.0, 90692.0, 90693.0, 90694.0, 90695.0, 90696.0],       [90697.0, 90698.0, 90699.0, 90700.0, 90701.0, 90702.0],       [90703.0, 90704.0, 90705.0, 90706.0, 90707.0, 90708.0],       [90709.0, 90710.0, 90711.0, 90712.0, 90713.0, 90714.0],       [90715.0, 90716.0, 90717.0, 90718.0, 90719.0, 90720.0]]]]],   [[[[[90721.0, 90722.0, 90723.0, 90724.0, 90725.0, 90726.0],       [90727.0, 90728.0, 90729.0, 90730.0, 90731.0, 90732.0],       [90733.0, 90734.0, 90735.0, 90736.0, 90737.0, 90738.0],       [90739.0, 90740.0, 90741.0, 90742.0, 90743.0, 90744.0],       [90745.0, 90746.0, 90747.0, 90748.0, 90749.0, 90750.0],       [90751.0, 90752.0, 90753.0, 90754.0, 90755.0, 90756.0],       [90757.0, 90758.0, 90759.0, 90760.0, 90761.0, 90762.0]],      [[90763.0, 90764.0, 90765.0, 90766.0, 90767.0, 90768.0],       [90769.0, 90770.0, 90771.0, 90772.0, 90773.0, 90774.0],       [90775.0, 90776.0, 90777.0, 90778.0, 90779.0, 90780.0],       [90781.0, 90782.0, 90783.0, 90784.0, 90785.0, 90786.0],       [90787.0, 90788.0, 90789.0, 90790.0, 90791.0, 90792.0],       [90793.0, 90794.0, 90795.0, 90796.0, 90797.0, 90798.0],       [90799.0, 90800.0, 90801.0, 90802.0, 90803.0, 90804.0]],      [[90805.0, 90806.0, 90807.0, 90808.0, 90809.0, 90810.0],       [90811.0, 90812.0, 90813.0, 90814.0, 90815.0, 90816.0],       [90817.0, 90818.0, 90819.0, 90820.0, 90821.0, 90822.0],       [90823.0, 90824.0, 90825.0, 90826.0, 90827.0, 90828.0],       [90829.0, 90830.0, 90831.0, 90832.0, 90833.0, 90834.0],       [90835.0, 90836.0, 90837.0, 90838.0, 90839.0, 90840.0],       [90841.0, 90842.0, 90843.0, 90844.0, 90845.0, 90846.0]],      [[90847.0, 90848.0, 90849.0, 90850.0, 90851.0, 90852.0],       [90853.0, 90854.0, 90855.0, 90856.0, 90857.0, 90858.0],       [90859.0, 90860.0, 90861.0, 90862.0, 90863.0, 90864.0],       [90865.0, 90866.0, 90867.0, 90868.0, 90869.0, 90870.0],       [90871.0, 90872.0, 90873.0, 90874.0, 90875.0, 90876.0],       [90877.0, 90878.0, 90879.0, 90880.0, 90881.0, 90882.0],       [90883.0, 90884.0, 90885.0, 90886.0, 90887.0, 90888.0]],      [[90889.0, 90890.0, 90891.0, 90892.0, 90893.0, 90894.0],       [90895.0, 90896.0, 90897.0, 90898.0, 90899.0, 90900.0],       [90901.0, 90902.0, 90903.0, 90904.0, 90905.0, 90906.0],       [90907.0, 90908.0, 90909.0, 90910.0, 90911.0, 90912.0],       [90913.0, 90914.0, 90915.0, 90916.0, 90917.0, 90918.0],       [90919.0, 90920.0, 90921.0, 90922.0, 90923.0, 90924.0],       [90925.0, 90926.0, 90927.0, 90928.0, 90929.0, 90930.0]],      [[90931.0, 90932.0, 90933.0, 90934.0, 90935.0, 90936.0],       [90937.0, 90938.0, 90939.0, 90940.0, 90941.0, 90942.0],       [90943.0, 90944.0, 90945.0, 90946.0, 90947.0, 90948.0],       [90949.0, 90950.0, 90951.0, 90952.0, 90953.0, 90954.0],       [90955.0, 90956.0, 90957.0, 90958.0, 90959.0, 90960.0],       [90961.0, 90962.0, 90963.0, 90964.0, 90965.0, 90966.0],       [90967.0, 90968.0, 90969.0, 90970.0, 90971.0, 90972.0]]],     [[[90973.0, 90974.0, 90975.0, 90976.0, 90977.0, 90978.0],       [90979.0, 90980.0, 90981.0, 90982.0, 90983.0, 90984.0],       [90985.0, 90986.0, 90987.0, 90988.0, 90989.0, 90990.0],       [90991.0, 90992.0, 90993.0, 90994.0, 90995.0, 90996.0],       [90997.0, 90998.0, 90999.0, 91000.0, 91001.0, 91002.0],       [91003.0, 91004.0, 91005.0, 91006.0, 91007.0, 91008.0],       [91009.0, 91010.0, 91011.0, 91012.0, 91013.0, 91014.0]],      [[91015.0, 91016.0, 91017.0, 91018.0, 91019.0, 91020.0],       [91021.0, 91022.0, 91023.0, 91024.0, 91025.0, 91026.0],       [91027.0, 91028.0, 91029.0, 91030.0, 91031.0, 91032.0],       [91033.0, 91034.0, 91035.0, 91036.0, 91037.0, 91038.0],       [91039.0, 91040.0, 91041.0, 91042.0, 91043.0, 91044.0],       [91045.0, 91046.0, 91047.0, 91048.0, 91049.0, 91050.0],       [91051.0, 91052.0, 91053.0, 91054.0, 91055.0, 91056.0]],      [[91057.0, 91058.0, 91059.0, 91060.0, 91061.0, 91062.0],       [91063.0, 91064.0, 91065.0, 91066.0, 91067.0, 91068.0],       [91069.0, 91070.0, 91071.0, 91072.0, 91073.0, 91074.0],       [91075.0, 91076.0, 91077.0, 91078.0, 91079.0, 91080.0],       [91081.0, 91082.0, 91083.0, 91084.0, 91085.0, 91086.0],       [91087.0, 91088.0, 91089.0, 91090.0, 91091.0, 91092.0],       [91093.0, 91094.0, 91095.0, 91096.0, 91097.0, 91098.0]],      [[91099.0, 91100.0, 91101.0, 91102.0, 91103.0, 91104.0],       [91105.0, 91106.0, 91107.0, 91108.0, 91109.0, 91110.0],       [91111.0, 91112.0, 91113.0, 91114.0, 91115.0, 91116.0],       [91117.0, 91118.0, 91119.0, 91120.0, 91121.0, 91122.0],       [91123.0, 91124.0, 91125.0, 91126.0, 91127.0, 91128.0],       [91129.0, 91130.0, 91131.0, 91132.0, 91133.0, 91134.0],       [91135.0, 91136.0, 91137.0, 91138.0, 91139.0, 91140.0]],      [[91141.0, 91142.0, 91143.0, 91144.0, 91145.0, 91146.0],       [91147.0, 91148.0, 91149.0, 91150.0, 91151.0, 91152.0],       [91153.0, 91154.0, 91155.0, 91156.0, 91157.0, 91158.0],       [91159.0, 91160.0, 91161.0, 91162.0, 91163.0, 91164.0],       [91165.0, 91166.0, 91167.0, 91168.0, 91169.0, 91170.0],       [91171.0, 91172.0, 91173.0, 91174.0, 91175.0, 91176.0],       [91177.0, 91178.0, 91179.0, 91180.0, 91181.0, 91182.0]],      [[91183.0, 91184.0, 91185.0, 91186.0, 91187.0, 91188.0],       [91189.0, 91190.0, 91191.0, 91192.0, 91193.0, 91194.0],       [91195.0, 91196.0, 91197.0, 91198.0, 91199.0, 91200.0],       [91201.0, 91202.0, 91203.0, 91204.0, 91205.0, 91206.0],       [91207.0, 91208.0, 91209.0, 91210.0, 91211.0, 91212.0],       [91213.0, 91214.0, 91215.0, 91216.0, 91217.0, 91218.0],       [91219.0, 91220.0, 91221.0, 91222.0, 91223.0, 91224.0]]],     [[[91225.0, 91226.0, 91227.0, 91228.0, 91229.0, 91230.0],       [91231.0, 91232.0, 91233.0, 91234.0, 91235.0, 91236.0],       [91237.0, 91238.0, 91239.0, 91240.0, 91241.0, 91242.0],       [91243.0, 91244.0, 91245.0, 91246.0, 91247.0, 91248.0],       [91249.0, 91250.0, 91251.0, 91252.0, 91253.0, 91254.0],       [91255.0, 91256.0, 91257.0, 91258.0, 91259.0, 91260.0],       [91261.0, 91262.0, 91263.0, 91264.0, 91265.0, 91266.0]],      [[91267.0, 91268.0, 91269.0, 91270.0, 91271.0, 91272.0],       [91273.0, 91274.0, 91275.0, 91276.0, 91277.0, 91278.0],       [91279.0, 91280.0, 91281.0, 91282.0, 91283.0, 91284.0],       [91285.0, 91286.0, 91287.0, 91288.0, 91289.0, 91290.0],       [91291.0, 91292.0, 91293.0, 91294.0, 91295.0, 91296.0],       [91297.0, 91298.0, 91299.0, 91300.0, 91301.0, 91302.0],       [91303.0, 91304.0, 91305.0, 91306.0, 91307.0, 91308.0]],      [[91309.0, 91310.0, 91311.0, 91312.0, 91313.0, 91314.0],       [91315.0, 91316.0, 91317.0, 91318.0, 91319.0, 91320.0],       [91321.0, 91322.0, 91323.0, 91324.0, 91325.0, 91326.0],       [91327.0, 91328.0, 91329.0, 91330.0, 91331.0, 91332.0],       [91333.0, 91334.0, 91335.0, 91336.0, 91337.0, 91338.0],       [91339.0, 91340.0, 91341.0, 91342.0, 91343.0, 91344.0],       [91345.0, 91346.0, 91347.0, 91348.0, 91349.0, 91350.0]],      [[91351.0, 91352.0, 91353.0, 91354.0, 91355.0, 91356.0],       [91357.0, 91358.0, 91359.0, 91360.0, 91361.0, 91362.0],       [91363.0, 91364.0, 91365.0, 91366.0, 91367.0, 91368.0],       [91369.0, 91370.0, 91371.0, 91372.0, 91373.0, 91374.0],       [91375.0, 91376.0, 91377.0, 91378.0, 91379.0, 91380.0],       [91381.0, 91382.0, 91383.0, 91384.0, 91385.0, 91386.0],       [91387.0, 91388.0, 91389.0, 91390.0, 91391.0, 91392.0]],      [[91393.0, 91394.0, 91395.0, 91396.0, 91397.0, 91398.0],       [91399.0, 91400.0, 91401.0, 91402.0, 91403.0, 91404.0],       [91405.0, 91406.0, 91407.0, 91408.0, 91409.0, 91410.0],       [91411.0, 91412.0, 91413.0, 91414.0, 91415.0, 91416.0],       [91417.0, 91418.0, 91419.0, 91420.0, 91421.0, 91422.0],       [91423.0, 91424.0, 91425.0, 91426.0, 91427.0, 91428.0],       [91429.0, 91430.0, 91431.0, 91432.0, 91433.0, 91434.0]],      [[91435.0, 91436.0, 91437.0, 91438.0, 91439.0, 91440.0],       [91441.0, 91442.0, 91443.0, 91444.0, 91445.0, 91446.0],       [91447.0, 91448.0, 91449.0, 91450.0, 91451.0, 91452.0],       [91453.0, 91454.0, 91455.0, 91456.0, 91457.0, 91458.0],       [91459.0, 91460.0, 91461.0, 91462.0, 91463.0, 91464.0],       [91465.0, 91466.0, 91467.0, 91468.0, 91469.0, 91470.0],       [91471.0, 91472.0, 91473.0, 91474.0, 91475.0, 91476.0]]],     [[[91477.0, 91478.0, 91479.0, 91480.0, 91481.0, 91482.0],       [91483.0, 91484.0, 91485.0, 91486.0, 91487.0, 91488.0],       [91489.0, 91490.0, 91491.0, 91492.0, 91493.0, 91494.0],       [91495.0, 91496.0, 91497.0, 91498.0, 91499.0, 91500.0],       [91501.0, 91502.0, 91503.0, 91504.0, 91505.0, 91506.0],       [91507.0, 91508.0, 91509.0, 91510.0, 91511.0, 91512.0],       [91513.0, 91514.0, 91515.0, 91516.0, 91517.0, 91518.0]],      [[91519.0, 91520.0, 91521.0, 91522.0, 91523.0, 91524.0],       [91525.0, 91526.0, 91527.0, 91528.0, 91529.0, 91530.0],       [91531.0, 91532.0, 91533.0, 91534.0, 91535.0, 91536.0],       [91537.0, 91538.0, 91539.0, 91540.0, 91541.0, 91542.0],       [91543.0, 91544.0, 91545.0, 91546.0, 91547.0, 91548.0],       [91549.0, 91550.0, 91551.0, 91552.0, 91553.0, 91554.0],       [91555.0, 91556.0, 91557.0, 91558.0, 91559.0, 91560.0]],      [[91561.0, 91562.0, 91563.0, 91564.0, 91565.0, 91566.0],       [91567.0, 91568.0, 91569.0, 91570.0, 91571.0, 91572.0],       [91573.0, 91574.0, 91575.0, 91576.0, 91577.0, 91578.0],       [91579.0, 91580.0, 91581.0, 91582.0, 91583.0, 91584.0],       [91585.0, 91586.0, 91587.0, 91588.0, 91589.0, 91590.0],       [91591.0, 91592.0, 91593.0, 91594.0, 91595.0, 91596.0],       [91597.0, 91598.0, 91599.0, 91600.0, 91601.0, 91602.0]],      [[91603.0, 91604.0, 91605.0, 91606.0, 91607.0, 91608.0],       [91609.0, 91610.0, 91611.0, 91612.0, 91613.0, 91614.0],       [91615.0, 91616.0, 91617.0, 91618.0, 91619.0, 91620.0],       [91621.0, 91622.0, 91623.0, 91624.0, 91625.0, 91626.0],       [91627.0, 91628.0, 91629.0, 91630.0, 91631.0, 91632.0],       [91633.0, 91634.0, 91635.0, 91636.0, 91637.0, 91638.0],       [91639.0, 91640.0, 91641.0, 91642.0, 91643.0, 91644.0]],      [[91645.0, 91646.0, 91647.0, 91648.0, 91649.0, 91650.0],       [91651.0, 91652.0, 91653.0, 91654.0, 91655.0, 91656.0],       [91657.0, 91658.0, 91659.0, 91660.0, 91661.0, 91662.0],       [91663.0, 91664.0, 91665.0, 91666.0, 91667.0, 91668.0],       [91669.0, 91670.0, 91671.0, 91672.0, 91673.0, 91674.0],       [91675.0, 91676.0, 91677.0, 91678.0, 91679.0, 91680.0],       [91681.0, 91682.0, 91683.0, 91684.0, 91685.0, 91686.0]],      [[91687.0, 91688.0, 91689.0, 91690.0, 91691.0, 91692.0],       [91693.0, 91694.0, 91695.0, 91696.0, 91697.0, 91698.0],       [91699.0, 91700.0, 91701.0, 91702.0, 91703.0, 91704.0],       [91705.0, 91706.0, 91707.0, 91708.0, 91709.0, 91710.0],       [91711.0, 91712.0, 91713.0, 91714.0, 91715.0, 91716.0],       [91717.0, 91718.0, 91719.0, 91720.0, 91721.0, 91722.0],       [91723.0, 91724.0, 91725.0, 91726.0, 91727.0, 91728.0]]]],    [[[[91729.0, 91730.0, 91731.0, 91732.0, 91733.0, 91734.0],       [91735.0, 91736.0, 91737.0, 91738.0, 91739.0, 91740.0],       [91741.0, 91742.0, 91743.0, 91744.0, 91745.0, 91746.0],       [91747.0, 91748.0, 91749.0, 91750.0, 91751.0, 91752.0],       [91753.0, 91754.0, 91755.0, 91756.0, 91757.0, 91758.0],       [91759.0, 91760.0, 91761.0, 91762.0, 91763.0, 91764.0],       [91765.0, 91766.0, 91767.0, 91768.0, 91769.0, 91770.0]],      [[91771.0, 91772.0, 91773.0, 91774.0, 91775.0, 91776.0],       [91777.0, 91778.0, 91779.0, 91780.0, 91781.0, 91782.0],       [91783.0, 91784.0, 91785.0, 91786.0, 91787.0, 91788.0],       [91789.0, 91790.0, 91791.0, 91792.0, 91793.0, 91794.0],       [91795.0, 91796.0, 91797.0, 91798.0, 91799.0, 91800.0],       [91801.0, 91802.0, 91803.0, 91804.0, 91805.0, 91806.0],       [91807.0, 91808.0, 91809.0, 91810.0, 91811.0, 91812.0]],      [[91813.0, 91814.0, 91815.0, 91816.0, 91817.0, 91818.0],       [91819.0, 91820.0, 91821.0, 91822.0, 91823.0, 91824.0],       [91825.0, 91826.0, 91827.0, 91828.0, 91829.0, 91830.0],       [91831.0, 91832.0, 91833.0, 91834.0, 91835.0, 91836.0],       [91837.0, 91838.0, 91839.0, 91840.0, 91841.0, 91842.0],       [91843.0, 91844.0, 91845.0, 91846.0, 91847.0, 91848.0],       [91849.0, 91850.0, 91851.0, 91852.0, 91853.0, 91854.0]],      [[91855.0, 91856.0, 91857.0, 91858.0, 91859.0, 91860.0],       [91861.0, 91862.0, 91863.0, 91864.0, 91865.0, 91866.0],       [91867.0, 91868.0, 91869.0, 91870.0, 91871.0, 91872.0],       [91873.0, 91874.0, 91875.0, 91876.0, 91877.0, 91878.0],       [91879.0, 91880.0, 91881.0, 91882.0, 91883.0, 91884.0],       [91885.0, 91886.0, 91887.0, 91888.0, 91889.0, 91890.0],       [91891.0, 91892.0, 91893.0, 91894.0, 91895.0, 91896.0]],      [[91897.0, 91898.0, 91899.0, 91900.0, 91901.0, 91902.0],       [91903.0, 91904.0, 91905.0, 91906.0, 91907.0, 91908.0],       [91909.0, 91910.0, 91911.0, 91912.0, 91913.0, 91914.0],       [91915.0, 91916.0, 91917.0, 91918.0, 91919.0, 91920.0],       [91921.0, 91922.0, 91923.0, 91924.0, 91925.0, 91926.0],       [91927.0, 91928.0, 91929.0, 91930.0, 91931.0, 91932.0],       [91933.0, 91934.0, 91935.0, 91936.0, 91937.0, 91938.0]],      [[91939.0, 91940.0, 91941.0, 91942.0, 91943.0, 91944.0],       [91945.0, 91946.0, 91947.0, 91948.0, 91949.0, 91950.0],       [91951.0, 91952.0, 91953.0, 91954.0, 91955.0, 91956.0],       [91957.0, 91958.0, 91959.0, 91960.0, 91961.0, 91962.0],       [91963.0, 91964.0, 91965.0, 91966.0, 91967.0, 91968.0],       [91969.0, 91970.0, 91971.0, 91972.0, 91973.0, 91974.0],       [91975.0, 91976.0, 91977.0, 91978.0, 91979.0, 91980.0]]],     [[[91981.0, 91982.0, 91983.0, 91984.0, 91985.0, 91986.0],       [91987.0, 91988.0, 91989.0, 91990.0, 91991.0, 91992.0],       [91993.0, 91994.0, 91995.0, 91996.0, 91997.0, 91998.0],       [91999.0, 92000.0, 92001.0, 92002.0, 92003.0, 92004.0],       [92005.0, 92006.0, 92007.0, 92008.0, 92009.0, 92010.0],       [92011.0, 92012.0, 92013.0, 92014.0, 92015.0, 92016.0],       [92017.0, 92018.0, 92019.0, 92020.0, 92021.0, 92022.0]],      [[92023.0, 92024.0, 92025.0, 92026.0, 92027.0, 92028.0],       [92029.0, 92030.0, 92031.0, 92032.0, 92033.0, 92034.0],       [92035.0, 92036.0, 92037.0, 92038.0, 92039.0, 92040.0],       [92041.0, 92042.0, 92043.0, 92044.0, 92045.0, 92046.0],       [92047.0, 92048.0, 92049.0, 92050.0, 92051.0, 92052.0],       [92053.0, 92054.0, 92055.0, 92056.0, 92057.0, 92058.0],       [92059.0, 92060.0, 92061.0, 92062.0, 92063.0, 92064.0]],      [[92065.0, 92066.0, 92067.0, 92068.0, 92069.0, 92070.0],       [92071.0, 92072.0, 92073.0, 92074.0, 92075.0, 92076.0],       [92077.0, 92078.0, 92079.0, 92080.0, 92081.0, 92082.0],       [92083.0, 92084.0, 92085.0, 92086.0, 92087.0, 92088.0],       [92089.0, 92090.0, 92091.0, 92092.0, 92093.0, 92094.0],       [92095.0, 92096.0, 92097.0, 92098.0, 92099.0, 92100.0],       [92101.0, 92102.0, 92103.0, 92104.0, 92105.0, 92106.0]],      [[92107.0, 92108.0, 92109.0, 92110.0, 92111.0, 92112.0],       [92113.0, 92114.0, 92115.0, 92116.0, 92117.0, 92118.0],       [92119.0, 92120.0, 92121.0, 92122.0, 92123.0, 92124.0],       [92125.0, 92126.0, 92127.0, 92128.0, 92129.0, 92130.0],       [92131.0, 92132.0, 92133.0, 92134.0, 92135.0, 92136.0],       [92137.0, 92138.0, 92139.0, 92140.0, 92141.0, 92142.0],       [92143.0, 92144.0, 92145.0, 92146.0, 92147.0, 92148.0]],      [[92149.0, 92150.0, 92151.0, 92152.0, 92153.0, 92154.0],       [92155.0, 92156.0, 92157.0, 92158.0, 92159.0, 92160.0],       [92161.0, 92162.0, 92163.0, 92164.0, 92165.0, 92166.0],       [92167.0, 92168.0, 92169.0, 92170.0, 92171.0, 92172.0],       [92173.0, 92174.0, 92175.0, 92176.0, 92177.0, 92178.0],       [92179.0, 92180.0, 92181.0, 92182.0, 92183.0, 92184.0],       [92185.0, 92186.0, 92187.0, 92188.0, 92189.0, 92190.0]],      [[92191.0, 92192.0, 92193.0, 92194.0, 92195.0, 92196.0],       [92197.0, 92198.0, 92199.0, 92200.0, 92201.0, 92202.0],       [92203.0, 92204.0, 92205.0, 92206.0, 92207.0, 92208.0],       [92209.0, 92210.0, 92211.0, 92212.0, 92213.0, 92214.0],       [92215.0, 92216.0, 92217.0, 92218.0, 92219.0, 92220.0],       [92221.0, 92222.0, 92223.0, 92224.0, 92225.0, 92226.0],       [92227.0, 92228.0, 92229.0, 92230.0, 92231.0, 92232.0]]],     [[[92233.0, 92234.0, 92235.0, 92236.0, 92237.0, 92238.0],       [92239.0, 92240.0, 92241.0, 92242.0, 92243.0, 92244.0],       [92245.0, 92246.0, 92247.0, 92248.0, 92249.0, 92250.0],       [92251.0, 92252.0, 92253.0, 92254.0, 92255.0, 92256.0],       [92257.0, 92258.0, 92259.0, 92260.0, 92261.0, 92262.0],       [92263.0, 92264.0, 92265.0, 92266.0, 92267.0, 92268.0],       [92269.0, 92270.0, 92271.0, 92272.0, 92273.0, 92274.0]],      [[92275.0, 92276.0, 92277.0, 92278.0, 92279.0, 92280.0],       [92281.0, 92282.0, 92283.0, 92284.0, 92285.0, 92286.0],       [92287.0, 92288.0, 92289.0, 92290.0, 92291.0, 92292.0],       [92293.0, 92294.0, 92295.0, 92296.0, 92297.0, 92298.0],       [92299.0, 92300.0, 92301.0, 92302.0, 92303.0, 92304.0],       [92305.0, 92306.0, 92307.0, 92308.0, 92309.0, 92310.0],       [92311.0, 92312.0, 92313.0, 92314.0, 92315.0, 92316.0]],      [[92317.0, 92318.0, 92319.0, 92320.0, 92321.0, 92322.0],       [92323.0, 92324.0, 92325.0, 92326.0, 92327.0, 92328.0],       [92329.0, 92330.0, 92331.0, 92332.0, 92333.0, 92334.0],       [92335.0, 92336.0, 92337.0, 92338.0, 92339.0, 92340.0],       [92341.0, 92342.0, 92343.0, 92344.0, 92345.0, 92346.0],       [92347.0, 92348.0, 92349.0, 92350.0, 92351.0, 92352.0],       [92353.0, 92354.0, 92355.0, 92356.0, 92357.0, 92358.0]],      [[92359.0, 92360.0, 92361.0, 92362.0, 92363.0, 92364.0],       [92365.0, 92366.0, 92367.0, 92368.0, 92369.0, 92370.0],       [92371.0, 92372.0, 92373.0, 92374.0, 92375.0, 92376.0],       [92377.0, 92378.0, 92379.0, 92380.0, 92381.0, 92382.0],       [92383.0, 92384.0, 92385.0, 92386.0, 92387.0, 92388.0],       [92389.0, 92390.0, 92391.0, 92392.0, 92393.0, 92394.0],       [92395.0, 92396.0, 92397.0, 92398.0, 92399.0, 92400.0]],      [[92401.0, 92402.0, 92403.0, 92404.0, 92405.0, 92406.0],       [92407.0, 92408.0, 92409.0, 92410.0, 92411.0, 92412.0],       [92413.0, 92414.0, 92415.0, 92416.0, 92417.0, 92418.0],       [92419.0, 92420.0, 92421.0, 92422.0, 92423.0, 92424.0],       [92425.0, 92426.0, 92427.0, 92428.0, 92429.0, 92430.0],       [92431.0, 92432.0, 92433.0, 92434.0, 92435.0, 92436.0],       [92437.0, 92438.0, 92439.0, 92440.0, 92441.0, 92442.0]],      [[92443.0, 92444.0, 92445.0, 92446.0, 92447.0, 92448.0],       [92449.0, 92450.0, 92451.0, 92452.0, 92453.0, 92454.0],       [92455.0, 92456.0, 92457.0, 92458.0, 92459.0, 92460.0],       [92461.0, 92462.0, 92463.0, 92464.0, 92465.0, 92466.0],       [92467.0, 92468.0, 92469.0, 92470.0, 92471.0, 92472.0],       [92473.0, 92474.0, 92475.0, 92476.0, 92477.0, 92478.0],       [92479.0, 92480.0, 92481.0, 92482.0, 92483.0, 92484.0]]],     [[[92485.0, 92486.0, 92487.0, 92488.0, 92489.0, 92490.0],       [92491.0, 92492.0, 92493.0, 92494.0, 92495.0, 92496.0],       [92497.0, 92498.0, 92499.0, 92500.0, 92501.0, 92502.0],       [92503.0, 92504.0, 92505.0, 92506.0, 92507.0, 92508.0],       [92509.0, 92510.0, 92511.0, 92512.0, 92513.0, 92514.0],       [92515.0, 92516.0, 92517.0, 92518.0, 92519.0, 92520.0],       [92521.0, 92522.0, 92523.0, 92524.0, 92525.0, 92526.0]],      [[92527.0, 92528.0, 92529.0, 92530.0, 92531.0, 92532.0],       [92533.0, 92534.0, 92535.0, 92536.0, 92537.0, 92538.0],       [92539.0, 92540.0, 92541.0, 92542.0, 92543.0, 92544.0],       [92545.0, 92546.0, 92547.0, 92548.0, 92549.0, 92550.0],       [92551.0, 92552.0, 92553.0, 92554.0, 92555.0, 92556.0],       [92557.0, 92558.0, 92559.0, 92560.0, 92561.0, 92562.0],       [92563.0, 92564.0, 92565.0, 92566.0, 92567.0, 92568.0]],      [[92569.0, 92570.0, 92571.0, 92572.0, 92573.0, 92574.0],       [92575.0, 92576.0, 92577.0, 92578.0, 92579.0, 92580.0],       [92581.0, 92582.0, 92583.0, 92584.0, 92585.0, 92586.0],       [92587.0, 92588.0, 92589.0, 92590.0, 92591.0, 92592.0],       [92593.0, 92594.0, 92595.0, 92596.0, 92597.0, 92598.0],       [92599.0, 92600.0, 92601.0, 92602.0, 92603.0, 92604.0],       [92605.0, 92606.0, 92607.0, 92608.0, 92609.0, 92610.0]],      [[92611.0, 92612.0, 92613.0, 92614.0, 92615.0, 92616.0],       [92617.0, 92618.0, 92619.0, 92620.0, 92621.0, 92622.0],       [92623.0, 92624.0, 92625.0, 92626.0, 92627.0, 92628.0],       [92629.0, 92630.0, 92631.0, 92632.0, 92633.0, 92634.0],       [92635.0, 92636.0, 92637.0, 92638.0, 92639.0, 92640.0],       [92641.0, 92642.0, 92643.0, 92644.0, 92645.0, 92646.0],       [92647.0, 92648.0, 92649.0, 92650.0, 92651.0, 92652.0]],      [[92653.0, 92654.0, 92655.0, 92656.0, 92657.0, 92658.0],       [92659.0, 92660.0, 92661.0, 92662.0, 92663.0, 92664.0],       [92665.0, 92666.0, 92667.0, 92668.0, 92669.0, 92670.0],       [92671.0, 92672.0, 92673.0, 92674.0, 92675.0, 92676.0],       [92677.0, 92678.0, 92679.0, 92680.0, 92681.0, 92682.0],       [92683.0, 92684.0, 92685.0, 92686.0, 92687.0, 92688.0],       [92689.0, 92690.0, 92691.0, 92692.0, 92693.0, 92694.0]],      [[92695.0, 92696.0, 92697.0, 92698.0, 92699.0, 92700.0],       [92701.0, 92702.0, 92703.0, 92704.0, 92705.0, 92706.0],       [92707.0, 92708.0, 92709.0, 92710.0, 92711.0, 92712.0],       [92713.0, 92714.0, 92715.0, 92716.0, 92717.0, 92718.0],       [92719.0, 92720.0, 92721.0, 92722.0, 92723.0, 92724.0],       [92725.0, 92726.0, 92727.0, 92728.0, 92729.0, 92730.0],       [92731.0, 92732.0, 92733.0, 92734.0, 92735.0, 92736.0]]]],    [[[[92737.0, 92738.0, 92739.0, 92740.0, 92741.0, 92742.0],       [92743.0, 92744.0, 92745.0, 92746.0, 92747.0, 92748.0],       [92749.0, 92750.0, 92751.0, 92752.0, 92753.0, 92754.0],       [92755.0, 92756.0, 92757.0, 92758.0, 92759.0, 92760.0],       [92761.0, 92762.0, 92763.0, 92764.0, 92765.0, 92766.0],       [92767.0, 92768.0, 92769.0, 92770.0, 92771.0, 92772.0],       [92773.0, 92774.0, 92775.0, 92776.0, 92777.0, 92778.0]],      [[92779.0, 92780.0, 92781.0, 92782.0, 92783.0, 92784.0],       [92785.0, 92786.0, 92787.0, 92788.0, 92789.0, 92790.0],       [92791.0, 92792.0, 92793.0, 92794.0, 92795.0, 92796.0],       [92797.0, 92798.0, 92799.0, 92800.0, 92801.0, 92802.0],       [92803.0, 92804.0, 92805.0, 92806.0, 92807.0, 92808.0],       [92809.0, 92810.0, 92811.0, 92812.0, 92813.0, 92814.0],       [92815.0, 92816.0, 92817.0, 92818.0, 92819.0, 92820.0]],      [[92821.0, 92822.0, 92823.0, 92824.0, 92825.0, 92826.0],       [92827.0, 92828.0, 92829.0, 92830.0, 92831.0, 92832.0],       [92833.0, 92834.0, 92835.0, 92836.0, 92837.0, 92838.0],       [92839.0, 92840.0, 92841.0, 92842.0, 92843.0, 92844.0],       [92845.0, 92846.0, 92847.0, 92848.0, 92849.0, 92850.0],       [92851.0, 92852.0, 92853.0, 92854.0, 92855.0, 92856.0],       [92857.0, 92858.0, 92859.0, 92860.0, 92861.0, 92862.0]],      [[92863.0, 92864.0, 92865.0, 92866.0, 92867.0, 92868.0],       [92869.0, 92870.0, 92871.0, 92872.0, 92873.0, 92874.0],       [92875.0, 92876.0, 92877.0, 92878.0, 92879.0, 92880.0],       [92881.0, 92882.0, 92883.0, 92884.0, 92885.0, 92886.0],       [92887.0, 92888.0, 92889.0, 92890.0, 92891.0, 92892.0],       [92893.0, 92894.0, 92895.0, 92896.0, 92897.0, 92898.0],       [92899.0, 92900.0, 92901.0, 92902.0, 92903.0, 92904.0]],      [[92905.0, 92906.0, 92907.0, 92908.0, 92909.0, 92910.0],       [92911.0, 92912.0, 92913.0, 92914.0, 92915.0, 92916.0],       [92917.0, 92918.0, 92919.0, 92920.0, 92921.0, 92922.0],       [92923.0, 92924.0, 92925.0, 92926.0, 92927.0, 92928.0],       [92929.0, 92930.0, 92931.0, 92932.0, 92933.0, 92934.0],       [92935.0, 92936.0, 92937.0, 92938.0, 92939.0, 92940.0],       [92941.0, 92942.0, 92943.0, 92944.0, 92945.0, 92946.0]],      [[92947.0, 92948.0, 92949.0, 92950.0, 92951.0, 92952.0],       [92953.0, 92954.0, 92955.0, 92956.0, 92957.0, 92958.0],       [92959.0, 92960.0, 92961.0, 92962.0, 92963.0, 92964.0],       [92965.0, 92966.0, 92967.0, 92968.0, 92969.0, 92970.0],       [92971.0, 92972.0, 92973.0, 92974.0, 92975.0, 92976.0],       [92977.0, 92978.0, 92979.0, 92980.0, 92981.0, 92982.0],       [92983.0, 92984.0, 92985.0, 92986.0, 92987.0, 92988.0]]],     [[[92989.0, 92990.0, 92991.0, 92992.0, 92993.0, 92994.0],       [92995.0, 92996.0, 92997.0, 92998.0, 92999.0, 93000.0],       [93001.0, 93002.0, 93003.0, 93004.0, 93005.0, 93006.0],       [93007.0, 93008.0, 93009.0, 93010.0, 93011.0, 93012.0],       [93013.0, 93014.0, 93015.0, 93016.0, 93017.0, 93018.0],       [93019.0, 93020.0, 93021.0, 93022.0, 93023.0, 93024.0],       [93025.0, 93026.0, 93027.0, 93028.0, 93029.0, 93030.0]],      [[93031.0, 93032.0, 93033.0, 93034.0, 93035.0, 93036.0],       [93037.0, 93038.0, 93039.0, 93040.0, 93041.0, 93042.0],       [93043.0, 93044.0, 93045.0, 93046.0, 93047.0, 93048.0],       [93049.0, 93050.0, 93051.0, 93052.0, 93053.0, 93054.0],       [93055.0, 93056.0, 93057.0, 93058.0, 93059.0, 93060.0],       [93061.0, 93062.0, 93063.0, 93064.0, 93065.0, 93066.0],       [93067.0, 93068.0, 93069.0, 93070.0, 93071.0, 93072.0]],      [[93073.0, 93074.0, 93075.0, 93076.0, 93077.0, 93078.0],       [93079.0, 93080.0, 93081.0, 93082.0, 93083.0, 93084.0],       [93085.0, 93086.0, 93087.0, 93088.0, 93089.0, 93090.0],       [93091.0, 93092.0, 93093.0, 93094.0, 93095.0, 93096.0],       [93097.0, 93098.0, 93099.0, 93100.0, 93101.0, 93102.0],       [93103.0, 93104.0, 93105.0, 93106.0, 93107.0, 93108.0],       [93109.0, 93110.0, 93111.0, 93112.0, 93113.0, 93114.0]],      [[93115.0, 93116.0, 93117.0, 93118.0, 93119.0, 93120.0],       [93121.0, 93122.0, 93123.0, 93124.0, 93125.0, 93126.0],       [93127.0, 93128.0, 93129.0, 93130.0, 93131.0, 93132.0],       [93133.0, 93134.0, 93135.0, 93136.0, 93137.0, 93138.0],       [93139.0, 93140.0, 93141.0, 93142.0, 93143.0, 93144.0],       [93145.0, 93146.0, 93147.0, 93148.0, 93149.0, 93150.0],       [93151.0, 93152.0, 93153.0, 93154.0, 93155.0, 93156.0]],      [[93157.0, 93158.0, 93159.0, 93160.0, 93161.0, 93162.0],       [93163.0, 93164.0, 93165.0, 93166.0, 93167.0, 93168.0],       [93169.0, 93170.0, 93171.0, 93172.0, 93173.0, 93174.0],       [93175.0, 93176.0, 93177.0, 93178.0, 93179.0, 93180.0],       [93181.0, 93182.0, 93183.0, 93184.0, 93185.0, 93186.0],       [93187.0, 93188.0, 93189.0, 93190.0, 93191.0, 93192.0],       [93193.0, 93194.0, 93195.0, 93196.0, 93197.0, 93198.0]],      [[93199.0, 93200.0, 93201.0, 93202.0, 93203.0, 93204.0],       [93205.0, 93206.0, 93207.0, 93208.0, 93209.0, 93210.0],       [93211.0, 93212.0, 93213.0, 93214.0, 93215.0, 93216.0],       [93217.0, 93218.0, 93219.0, 93220.0, 93221.0, 93222.0],       [93223.0, 93224.0, 93225.0, 93226.0, 93227.0, 93228.0],       [93229.0, 93230.0, 93231.0, 93232.0, 93233.0, 93234.0],       [93235.0, 93236.0, 93237.0, 93238.0, 93239.0, 93240.0]]],     [[[93241.0, 93242.0, 93243.0, 93244.0, 93245.0, 93246.0],       [93247.0, 93248.0, 93249.0, 93250.0, 93251.0, 93252.0],       [93253.0, 93254.0, 93255.0, 93256.0, 93257.0, 93258.0],       [93259.0, 93260.0, 93261.0, 93262.0, 93263.0, 93264.0],       [93265.0, 93266.0, 93267.0, 93268.0, 93269.0, 93270.0],       [93271.0, 93272.0, 93273.0, 93274.0, 93275.0, 93276.0],       [93277.0, 93278.0, 93279.0, 93280.0, 93281.0, 93282.0]],      [[93283.0, 93284.0, 93285.0, 93286.0, 93287.0, 93288.0],       [93289.0, 93290.0, 93291.0, 93292.0, 93293.0, 93294.0],       [93295.0, 93296.0, 93297.0, 93298.0, 93299.0, 93300.0],       [93301.0, 93302.0, 93303.0, 93304.0, 93305.0, 93306.0],       [93307.0, 93308.0, 93309.0, 93310.0, 93311.0, 93312.0],       [93313.0, 93314.0, 93315.0, 93316.0, 93317.0, 93318.0],       [93319.0, 93320.0, 93321.0, 93322.0, 93323.0, 93324.0]],      [[93325.0, 93326.0, 93327.0, 93328.0, 93329.0, 93330.0],       [93331.0, 93332.0, 93333.0, 93334.0, 93335.0, 93336.0],       [93337.0, 93338.0, 93339.0, 93340.0, 93341.0, 93342.0],       [93343.0, 93344.0, 93345.0, 93346.0, 93347.0, 93348.0],       [93349.0, 93350.0, 93351.0, 93352.0, 93353.0, 93354.0],       [93355.0, 93356.0, 93357.0, 93358.0, 93359.0, 93360.0],       [93361.0, 93362.0, 93363.0, 93364.0, 93365.0, 93366.0]],      [[93367.0, 93368.0, 93369.0, 93370.0, 93371.0, 93372.0],       [93373.0, 93374.0, 93375.0, 93376.0, 93377.0, 93378.0],       [93379.0, 93380.0, 93381.0, 93382.0, 93383.0, 93384.0],       [93385.0, 93386.0, 93387.0, 93388.0, 93389.0, 93390.0],       [93391.0, 93392.0, 93393.0, 93394.0, 93395.0, 93396.0],       [93397.0, 93398.0, 93399.0, 93400.0, 93401.0, 93402.0],       [93403.0, 93404.0, 93405.0, 93406.0, 93407.0, 93408.0]],      [[93409.0, 93410.0, 93411.0, 93412.0, 93413.0, 93414.0],       [93415.0, 93416.0, 93417.0, 93418.0, 93419.0, 93420.0],       [93421.0, 93422.0, 93423.0, 93424.0, 93425.0, 93426.0],       [93427.0, 93428.0, 93429.0, 93430.0, 93431.0, 93432.0],       [93433.0, 93434.0, 93435.0, 93436.0, 93437.0, 93438.0],       [93439.0, 93440.0, 93441.0, 93442.0, 93443.0, 93444.0],       [93445.0, 93446.0, 93447.0, 93448.0, 93449.0, 93450.0]],      [[93451.0, 93452.0, 93453.0, 93454.0, 93455.0, 93456.0],       [93457.0, 93458.0, 93459.0, 93460.0, 93461.0, 93462.0],       [93463.0, 93464.0, 93465.0, 93466.0, 93467.0, 93468.0],       [93469.0, 93470.0, 93471.0, 93472.0, 93473.0, 93474.0],       [93475.0, 93476.0, 93477.0, 93478.0, 93479.0, 93480.0],       [93481.0, 93482.0, 93483.0, 93484.0, 93485.0, 93486.0],       [93487.0, 93488.0, 93489.0, 93490.0, 93491.0, 93492.0]]],     [[[93493.0, 93494.0, 93495.0, 93496.0, 93497.0, 93498.0],       [93499.0, 93500.0, 93501.0, 93502.0, 93503.0, 93504.0],       [93505.0, 93506.0, 93507.0, 93508.0, 93509.0, 93510.0],       [93511.0, 93512.0, 93513.0, 93514.0, 93515.0, 93516.0],       [93517.0, 93518.0, 93519.0, 93520.0, 93521.0, 93522.0],       [93523.0, 93524.0, 93525.0, 93526.0, 93527.0, 93528.0],       [93529.0, 93530.0, 93531.0, 93532.0, 93533.0, 93534.0]],      [[93535.0, 93536.0, 93537.0, 93538.0, 93539.0, 93540.0],       [93541.0, 93542.0, 93543.0, 93544.0, 93545.0, 93546.0],       [93547.0, 93548.0, 93549.0, 93550.0, 93551.0, 93552.0],       [93553.0, 93554.0, 93555.0, 93556.0, 93557.0, 93558.0],       [93559.0, 93560.0, 93561.0, 93562.0, 93563.0, 93564.0],       [93565.0, 93566.0, 93567.0, 93568.0, 93569.0, 93570.0],       [93571.0, 93572.0, 93573.0, 93574.0, 93575.0, 93576.0]],      [[93577.0, 93578.0, 93579.0, 93580.0, 93581.0, 93582.0],       [93583.0, 93584.0, 93585.0, 93586.0, 93587.0, 93588.0],       [93589.0, 93590.0, 93591.0, 93592.0, 93593.0, 93594.0],       [93595.0, 93596.0, 93597.0, 93598.0, 93599.0, 93600.0],       [93601.0, 93602.0, 93603.0, 93604.0, 93605.0, 93606.0],       [93607.0, 93608.0, 93609.0, 93610.0, 93611.0, 93612.0],       [93613.0, 93614.0, 93615.0, 93616.0, 93617.0, 93618.0]],      [[93619.0, 93620.0, 93621.0, 93622.0, 93623.0, 93624.0],       [93625.0, 93626.0, 93627.0, 93628.0, 93629.0, 93630.0],       [93631.0, 93632.0, 93633.0, 93634.0, 93635.0, 93636.0],       [93637.0, 93638.0, 93639.0, 93640.0, 93641.0, 93642.0],       [93643.0, 93644.0, 93645.0, 93646.0, 93647.0, 93648.0],       [93649.0, 93650.0, 93651.0, 93652.0, 93653.0, 93654.0],       [93655.0, 93656.0, 93657.0, 93658.0, 93659.0, 93660.0]],      [[93661.0, 93662.0, 93663.0, 93664.0, 93665.0, 93666.0],       [93667.0, 93668.0, 93669.0, 93670.0, 93671.0, 93672.0],       [93673.0, 93674.0, 93675.0, 93676.0, 93677.0, 93678.0],       [93679.0, 93680.0, 93681.0, 93682.0, 93683.0, 93684.0],       [93685.0, 93686.0, 93687.0, 93688.0, 93689.0, 93690.0],       [93691.0, 93692.0, 93693.0, 93694.0, 93695.0, 93696.0],       [93697.0, 93698.0, 93699.0, 93700.0, 93701.0, 93702.0]],      [[93703.0, 93704.0, 93705.0, 93706.0, 93707.0, 93708.0],       [93709.0, 93710.0, 93711.0, 93712.0, 93713.0, 93714.0],       [93715.0, 93716.0, 93717.0, 93718.0, 93719.0, 93720.0],       [93721.0, 93722.0, 93723.0, 93724.0, 93725.0, 93726.0],       [93727.0, 93728.0, 93729.0, 93730.0, 93731.0, 93732.0],       [93733.0, 93734.0, 93735.0, 93736.0, 93737.0, 93738.0],       [93739.0, 93740.0, 93741.0, 93742.0, 93743.0, 93744.0]]]],    [[[[93745.0, 93746.0, 93747.0, 93748.0, 93749.0, 93750.0],       [93751.0, 93752.0, 93753.0, 93754.0, 93755.0, 93756.0],       [93757.0, 93758.0, 93759.0, 93760.0, 93761.0, 93762.0],       [93763.0, 93764.0, 93765.0, 93766.0, 93767.0, 93768.0],       [93769.0, 93770.0, 93771.0, 93772.0, 93773.0, 93774.0],       [93775.0, 93776.0, 93777.0, 93778.0, 93779.0, 93780.0],       [93781.0, 93782.0, 93783.0, 93784.0, 93785.0, 93786.0]],      [[93787.0, 93788.0, 93789.0, 93790.0, 93791.0, 93792.0],       [93793.0, 93794.0, 93795.0, 93796.0, 93797.0, 93798.0],       [93799.0, 93800.0, 93801.0, 93802.0, 93803.0, 93804.0],       [93805.0, 93806.0, 93807.0, 93808.0, 93809.0, 93810.0],       [93811.0, 93812.0, 93813.0, 93814.0, 93815.0, 93816.0],       [93817.0, 93818.0, 93819.0, 93820.0, 93821.0, 93822.0],       [93823.0, 93824.0, 93825.0, 93826.0, 93827.0, 93828.0]],      [[93829.0, 93830.0, 93831.0, 93832.0, 93833.0, 93834.0],       [93835.0, 93836.0, 93837.0, 93838.0, 93839.0, 93840.0],       [93841.0, 93842.0, 93843.0, 93844.0, 93845.0, 93846.0],       [93847.0, 93848.0, 93849.0, 93850.0, 93851.0, 93852.0],       [93853.0, 93854.0, 93855.0, 93856.0, 93857.0, 93858.0],       [93859.0, 93860.0, 93861.0, 93862.0, 93863.0, 93864.0],       [93865.0, 93866.0, 93867.0, 93868.0, 93869.0, 93870.0]],      [[93871.0, 93872.0, 93873.0, 93874.0, 93875.0, 93876.0],       [93877.0, 93878.0, 93879.0, 93880.0, 93881.0, 93882.0],       [93883.0, 93884.0, 93885.0, 93886.0, 93887.0, 93888.0],       [93889.0, 93890.0, 93891.0, 93892.0, 93893.0, 93894.0],       [93895.0, 93896.0, 93897.0, 93898.0, 93899.0, 93900.0],       [93901.0, 93902.0, 93903.0, 93904.0, 93905.0, 93906.0],       [93907.0, 93908.0, 93909.0, 93910.0, 93911.0, 93912.0]],      [[93913.0, 93914.0, 93915.0, 93916.0, 93917.0, 93918.0],       [93919.0, 93920.0, 93921.0, 93922.0, 93923.0, 93924.0],       [93925.0, 93926.0, 93927.0, 93928.0, 93929.0, 93930.0],       [93931.0, 93932.0, 93933.0, 93934.0, 93935.0, 93936.0],       [93937.0, 93938.0, 93939.0, 93940.0, 93941.0, 93942.0],       [93943.0, 93944.0, 93945.0, 93946.0, 93947.0, 93948.0],       [93949.0, 93950.0, 93951.0, 93952.0, 93953.0, 93954.0]],      [[93955.0, 93956.0, 93957.0, 93958.0, 93959.0, 93960.0],       [93961.0, 93962.0, 93963.0, 93964.0, 93965.0, 93966.0],       [93967.0, 93968.0, 93969.0, 93970.0, 93971.0, 93972.0],       [93973.0, 93974.0, 93975.0, 93976.0, 93977.0, 93978.0],       [93979.0, 93980.0, 93981.0, 93982.0, 93983.0, 93984.0],       [93985.0, 93986.0, 93987.0, 93988.0, 93989.0, 93990.0],       [93991.0, 93992.0, 93993.0, 93994.0, 93995.0, 93996.0]]],     [[[93997.0, 93998.0, 93999.0, 94000.0, 94001.0, 94002.0],       [94003.0, 94004.0, 94005.0, 94006.0, 94007.0, 94008.0],       [94009.0, 94010.0, 94011.0, 94012.0, 94013.0, 94014.0],       [94015.0, 94016.0, 94017.0, 94018.0, 94019.0, 94020.0],       [94021.0, 94022.0, 94023.0, 94024.0, 94025.0, 94026.0],       [94027.0, 94028.0, 94029.0, 94030.0, 94031.0, 94032.0],       [94033.0, 94034.0, 94035.0, 94036.0, 94037.0, 94038.0]],      [[94039.0, 94040.0, 94041.0, 94042.0, 94043.0, 94044.0],       [94045.0, 94046.0, 94047.0, 94048.0, 94049.0, 94050.0],       [94051.0, 94052.0, 94053.0, 94054.0, 94055.0, 94056.0],       [94057.0, 94058.0, 94059.0, 94060.0, 94061.0, 94062.0],       [94063.0, 94064.0, 94065.0, 94066.0, 94067.0, 94068.0],       [94069.0, 94070.0, 94071.0, 94072.0, 94073.0, 94074.0],       [94075.0, 94076.0, 94077.0, 94078.0, 94079.0, 94080.0]],      [[94081.0, 94082.0, 94083.0, 94084.0, 94085.0, 94086.0],       [94087.0, 94088.0, 94089.0, 94090.0, 94091.0, 94092.0],       [94093.0, 94094.0, 94095.0, 94096.0, 94097.0, 94098.0],       [94099.0, 94100.0, 94101.0, 94102.0, 94103.0, 94104.0],       [94105.0, 94106.0, 94107.0, 94108.0, 94109.0, 94110.0],       [94111.0, 94112.0, 94113.0, 94114.0, 94115.0, 94116.0],       [94117.0, 94118.0, 94119.0, 94120.0, 94121.0, 94122.0]],      [[94123.0, 94124.0, 94125.0, 94126.0, 94127.0, 94128.0],       [94129.0, 94130.0, 94131.0, 94132.0, 94133.0, 94134.0],       [94135.0, 94136.0, 94137.0, 94138.0, 94139.0, 94140.0],       [94141.0, 94142.0, 94143.0, 94144.0, 94145.0, 94146.0],       [94147.0, 94148.0, 94149.0, 94150.0, 94151.0, 94152.0],       [94153.0, 94154.0, 94155.0, 94156.0, 94157.0, 94158.0],       [94159.0, 94160.0, 94161.0, 94162.0, 94163.0, 94164.0]],      [[94165.0, 94166.0, 94167.0, 94168.0, 94169.0, 94170.0],       [94171.0, 94172.0, 94173.0, 94174.0, 94175.0, 94176.0],       [94177.0, 94178.0, 94179.0, 94180.0, 94181.0, 94182.0],       [94183.0, 94184.0, 94185.0, 94186.0, 94187.0, 94188.0],       [94189.0, 94190.0, 94191.0, 94192.0, 94193.0, 94194.0],       [94195.0, 94196.0, 94197.0, 94198.0, 94199.0, 94200.0],       [94201.0, 94202.0, 94203.0, 94204.0, 94205.0, 94206.0]],      [[94207.0, 94208.0, 94209.0, 94210.0, 94211.0, 94212.0],       [94213.0, 94214.0, 94215.0, 94216.0, 94217.0, 94218.0],       [94219.0, 94220.0, 94221.0, 94222.0, 94223.0, 94224.0],       [94225.0, 94226.0, 94227.0, 94228.0, 94229.0, 94230.0],       [94231.0, 94232.0, 94233.0, 94234.0, 94235.0, 94236.0],       [94237.0, 94238.0, 94239.0, 94240.0, 94241.0, 94242.0],       [94243.0, 94244.0, 94245.0, 94246.0, 94247.0, 94248.0]]],     [[[94249.0, 94250.0, 94251.0, 94252.0, 94253.0, 94254.0],       [94255.0, 94256.0, 94257.0, 94258.0, 94259.0, 94260.0],       [94261.0, 94262.0, 94263.0, 94264.0, 94265.0, 94266.0],       [94267.0, 94268.0, 94269.0, 94270.0, 94271.0, 94272.0],       [94273.0, 94274.0, 94275.0, 94276.0, 94277.0, 94278.0],       [94279.0, 94280.0, 94281.0, 94282.0, 94283.0, 94284.0],       [94285.0, 94286.0, 94287.0, 94288.0, 94289.0, 94290.0]],      [[94291.0, 94292.0, 94293.0, 94294.0, 94295.0, 94296.0],       [94297.0, 94298.0, 94299.0, 94300.0, 94301.0, 94302.0],       [94303.0, 94304.0, 94305.0, 94306.0, 94307.0, 94308.0],       [94309.0, 94310.0, 94311.0, 94312.0, 94313.0, 94314.0],       [94315.0, 94316.0, 94317.0, 94318.0, 94319.0, 94320.0],       [94321.0, 94322.0, 94323.0, 94324.0, 94325.0, 94326.0],       [94327.0, 94328.0, 94329.0, 94330.0, 94331.0, 94332.0]],      [[94333.0, 94334.0, 94335.0, 94336.0, 94337.0, 94338.0],       [94339.0, 94340.0, 94341.0, 94342.0, 94343.0, 94344.0],       [94345.0, 94346.0, 94347.0, 94348.0, 94349.0, 94350.0],       [94351.0, 94352.0, 94353.0, 94354.0, 94355.0, 94356.0],       [94357.0, 94358.0, 94359.0, 94360.0, 94361.0, 94362.0],       [94363.0, 94364.0, 94365.0, 94366.0, 94367.0, 94368.0],       [94369.0, 94370.0, 94371.0, 94372.0, 94373.0, 94374.0]],      [[94375.0, 94376.0, 94377.0, 94378.0, 94379.0, 94380.0],       [94381.0, 94382.0, 94383.0, 94384.0, 94385.0, 94386.0],       [94387.0, 94388.0, 94389.0, 94390.0, 94391.0, 94392.0],       [94393.0, 94394.0, 94395.0, 94396.0, 94397.0, 94398.0],       [94399.0, 94400.0, 94401.0, 94402.0, 94403.0, 94404.0],       [94405.0, 94406.0, 94407.0, 94408.0, 94409.0, 94410.0],       [94411.0, 94412.0, 94413.0, 94414.0, 94415.0, 94416.0]],      [[94417.0, 94418.0, 94419.0, 94420.0, 94421.0, 94422.0],       [94423.0, 94424.0, 94425.0, 94426.0, 94427.0, 94428.0],       [94429.0, 94430.0, 94431.0, 94432.0, 94433.0, 94434.0],       [94435.0, 94436.0, 94437.0, 94438.0, 94439.0, 94440.0],       [94441.0, 94442.0, 94443.0, 94444.0, 94445.0, 94446.0],       [94447.0, 94448.0, 94449.0, 94450.0, 94451.0, 94452.0],       [94453.0, 94454.0, 94455.0, 94456.0, 94457.0, 94458.0]],      [[94459.0, 94460.0, 94461.0, 94462.0, 94463.0, 94464.0],       [94465.0, 94466.0, 94467.0, 94468.0, 94469.0, 94470.0],       [94471.0, 94472.0, 94473.0, 94474.0, 94475.0, 94476.0],       [94477.0, 94478.0, 94479.0, 94480.0, 94481.0, 94482.0],       [94483.0, 94484.0, 94485.0, 94486.0, 94487.0, 94488.0],       [94489.0, 94490.0, 94491.0, 94492.0, 94493.0, 94494.0],       [94495.0, 94496.0, 94497.0, 94498.0, 94499.0, 94500.0]]],     [[[94501.0, 94502.0, 94503.0, 94504.0, 94505.0, 94506.0],       [94507.0, 94508.0, 94509.0, 94510.0, 94511.0, 94512.0],       [94513.0, 94514.0, 94515.0, 94516.0, 94517.0, 94518.0],       [94519.0, 94520.0, 94521.0, 94522.0, 94523.0, 94524.0],       [94525.0, 94526.0, 94527.0, 94528.0, 94529.0, 94530.0],       [94531.0, 94532.0, 94533.0, 94534.0, 94535.0, 94536.0],       [94537.0, 94538.0, 94539.0, 94540.0, 94541.0, 94542.0]],      [[94543.0, 94544.0, 94545.0, 94546.0, 94547.0, 94548.0],       [94549.0, 94550.0, 94551.0, 94552.0, 94553.0, 94554.0],       [94555.0, 94556.0, 94557.0, 94558.0, 94559.0, 94560.0],       [94561.0, 94562.0, 94563.0, 94564.0, 94565.0, 94566.0],       [94567.0, 94568.0, 94569.0, 94570.0, 94571.0, 94572.0],       [94573.0, 94574.0, 94575.0, 94576.0, 94577.0, 94578.0],       [94579.0, 94580.0, 94581.0, 94582.0, 94583.0, 94584.0]],      [[94585.0, 94586.0, 94587.0, 94588.0, 94589.0, 94590.0],       [94591.0, 94592.0, 94593.0, 94594.0, 94595.0, 94596.0],       [94597.0, 94598.0, 94599.0, 94600.0, 94601.0, 94602.0],       [94603.0, 94604.0, 94605.0, 94606.0, 94607.0, 94608.0],       [94609.0, 94610.0, 94611.0, 94612.0, 94613.0, 94614.0],       [94615.0, 94616.0, 94617.0, 94618.0, 94619.0, 94620.0],       [94621.0, 94622.0, 94623.0, 94624.0, 94625.0, 94626.0]],      [[94627.0, 94628.0, 94629.0, 94630.0, 94631.0, 94632.0],       [94633.0, 94634.0, 94635.0, 94636.0, 94637.0, 94638.0],       [94639.0, 94640.0, 94641.0, 94642.0, 94643.0, 94644.0],       [94645.0, 94646.0, 94647.0, 94648.0, 94649.0, 94650.0],       [94651.0, 94652.0, 94653.0, 94654.0, 94655.0, 94656.0],       [94657.0, 94658.0, 94659.0, 94660.0, 94661.0, 94662.0],       [94663.0, 94664.0, 94665.0, 94666.0, 94667.0, 94668.0]],      [[94669.0, 94670.0, 94671.0, 94672.0, 94673.0, 94674.0],       [94675.0, 94676.0, 94677.0, 94678.0, 94679.0, 94680.0],       [94681.0, 94682.0, 94683.0, 94684.0, 94685.0, 94686.0],       [94687.0, 94688.0, 94689.0, 94690.0, 94691.0, 94692.0],       [94693.0, 94694.0, 94695.0, 94696.0, 94697.0, 94698.0],       [94699.0, 94700.0, 94701.0, 94702.0, 94703.0, 94704.0],       [94705.0, 94706.0, 94707.0, 94708.0, 94709.0, 94710.0]],      [[94711.0, 94712.0, 94713.0, 94714.0, 94715.0, 94716.0],       [94717.0, 94718.0, 94719.0, 94720.0, 94721.0, 94722.0],       [94723.0, 94724.0, 94725.0, 94726.0, 94727.0, 94728.0],       [94729.0, 94730.0, 94731.0, 94732.0, 94733.0, 94734.0],       [94735.0, 94736.0, 94737.0, 94738.0, 94739.0, 94740.0],       [94741.0, 94742.0, 94743.0, 94744.0, 94745.0, 94746.0],       [94747.0, 94748.0, 94749.0, 94750.0, 94751.0, 94752.0]]]],    [[[[94753.0, 94754.0, 94755.0, 94756.0, 94757.0, 94758.0],       [94759.0, 94760.0, 94761.0, 94762.0, 94763.0, 94764.0],       [94765.0, 94766.0, 94767.0, 94768.0, 94769.0, 94770.0],       [94771.0, 94772.0, 94773.0, 94774.0, 94775.0, 94776.0],       [94777.0, 94778.0, 94779.0, 94780.0, 94781.0, 94782.0],       [94783.0, 94784.0, 94785.0, 94786.0, 94787.0, 94788.0],       [94789.0, 94790.0, 94791.0, 94792.0, 94793.0, 94794.0]],      [[94795.0, 94796.0, 94797.0, 94798.0, 94799.0, 94800.0],       [94801.0, 94802.0, 94803.0, 94804.0, 94805.0, 94806.0],       [94807.0, 94808.0, 94809.0, 94810.0, 94811.0, 94812.0],       [94813.0, 94814.0, 94815.0, 94816.0, 94817.0, 94818.0],       [94819.0, 94820.0, 94821.0, 94822.0, 94823.0, 94824.0],       [94825.0, 94826.0, 94827.0, 94828.0, 94829.0, 94830.0],       [94831.0, 94832.0, 94833.0, 94834.0, 94835.0, 94836.0]],      [[94837.0, 94838.0, 94839.0, 94840.0, 94841.0, 94842.0],       [94843.0, 94844.0, 94845.0, 94846.0, 94847.0, 94848.0],       [94849.0, 94850.0, 94851.0, 94852.0, 94853.0, 94854.0],       [94855.0, 94856.0, 94857.0, 94858.0, 94859.0, 94860.0],       [94861.0, 94862.0, 94863.0, 94864.0, 94865.0, 94866.0],       [94867.0, 94868.0, 94869.0, 94870.0, 94871.0, 94872.0],       [94873.0, 94874.0, 94875.0, 94876.0, 94877.0, 94878.0]],      [[94879.0, 94880.0, 94881.0, 94882.0, 94883.0, 94884.0],       [94885.0, 94886.0, 94887.0, 94888.0, 94889.0, 94890.0],       [94891.0, 94892.0, 94893.0, 94894.0, 94895.0, 94896.0],       [94897.0, 94898.0, 94899.0, 94900.0, 94901.0, 94902.0],       [94903.0, 94904.0, 94905.0, 94906.0, 94907.0, 94908.0],       [94909.0, 94910.0, 94911.0, 94912.0, 94913.0, 94914.0],       [94915.0, 94916.0, 94917.0, 94918.0, 94919.0, 94920.0]],      [[94921.0, 94922.0, 94923.0, 94924.0, 94925.0, 94926.0],       [94927.0, 94928.0, 94929.0, 94930.0, 94931.0, 94932.0],       [94933.0, 94934.0, 94935.0, 94936.0, 94937.0, 94938.0],       [94939.0, 94940.0, 94941.0, 94942.0, 94943.0, 94944.0],       [94945.0, 94946.0, 94947.0, 94948.0, 94949.0, 94950.0],       [94951.0, 94952.0, 94953.0, 94954.0, 94955.0, 94956.0],       [94957.0, 94958.0, 94959.0, 94960.0, 94961.0, 94962.0]],      [[94963.0, 94964.0, 94965.0, 94966.0, 94967.0, 94968.0],       [94969.0, 94970.0, 94971.0, 94972.0, 94973.0, 94974.0],       [94975.0, 94976.0, 94977.0, 94978.0, 94979.0, 94980.0],       [94981.0, 94982.0, 94983.0, 94984.0, 94985.0, 94986.0],       [94987.0, 94988.0, 94989.0, 94990.0, 94991.0, 94992.0],       [94993.0, 94994.0, 94995.0, 94996.0, 94997.0, 94998.0],       [94999.0, 95000.0, 95001.0, 95002.0, 95003.0, 95004.0]]],     [[[95005.0, 95006.0, 95007.0, 95008.0, 95009.0, 95010.0],       [95011.0, 95012.0, 95013.0, 95014.0, 95015.0, 95016.0],       [95017.0, 95018.0, 95019.0, 95020.0, 95021.0, 95022.0],       [95023.0, 95024.0, 95025.0, 95026.0, 95027.0, 95028.0],       [95029.0, 95030.0, 95031.0, 95032.0, 95033.0, 95034.0],       [95035.0, 95036.0, 95037.0, 95038.0, 95039.0, 95040.0],       [95041.0, 95042.0, 95043.0, 95044.0, 95045.0, 95046.0]],      [[95047.0, 95048.0, 95049.0, 95050.0, 95051.0, 95052.0],       [95053.0, 95054.0, 95055.0, 95056.0, 95057.0, 95058.0],       [95059.0, 95060.0, 95061.0, 95062.0, 95063.0, 95064.0],       [95065.0, 95066.0, 95067.0, 95068.0, 95069.0, 95070.0],       [95071.0, 95072.0, 95073.0, 95074.0, 95075.0, 95076.0],       [95077.0, 95078.0, 95079.0, 95080.0, 95081.0, 95082.0],       [95083.0, 95084.0, 95085.0, 95086.0, 95087.0, 95088.0]],      [[95089.0, 95090.0, 95091.0, 95092.0, 95093.0, 95094.0],       [95095.0, 95096.0, 95097.0, 95098.0, 95099.0, 95100.0],       [95101.0, 95102.0, 95103.0, 95104.0, 95105.0, 95106.0],       [95107.0, 95108.0, 95109.0, 95110.0, 95111.0, 95112.0],       [95113.0, 95114.0, 95115.0, 95116.0, 95117.0, 95118.0],       [95119.0, 95120.0, 95121.0, 95122.0, 95123.0, 95124.0],       [95125.0, 95126.0, 95127.0, 95128.0, 95129.0, 95130.0]],      [[95131.0, 95132.0, 95133.0, 95134.0, 95135.0, 95136.0],       [95137.0, 95138.0, 95139.0, 95140.0, 95141.0, 95142.0],       [95143.0, 95144.0, 95145.0, 95146.0, 95147.0, 95148.0],       [95149.0, 95150.0, 95151.0, 95152.0, 95153.0, 95154.0],       [95155.0, 95156.0, 95157.0, 95158.0, 95159.0, 95160.0],       [95161.0, 95162.0, 95163.0, 95164.0, 95165.0, 95166.0],       [95167.0, 95168.0, 95169.0, 95170.0, 95171.0, 95172.0]],      [[95173.0, 95174.0, 95175.0, 95176.0, 95177.0, 95178.0],       [95179.0, 95180.0, 95181.0, 95182.0, 95183.0, 95184.0],       [95185.0, 95186.0, 95187.0, 95188.0, 95189.0, 95190.0],       [95191.0, 95192.0, 95193.0, 95194.0, 95195.0, 95196.0],       [95197.0, 95198.0, 95199.0, 95200.0, 95201.0, 95202.0],       [95203.0, 95204.0, 95205.0, 95206.0, 95207.0, 95208.0],       [95209.0, 95210.0, 95211.0, 95212.0, 95213.0, 95214.0]],      [[95215.0, 95216.0, 95217.0, 95218.0, 95219.0, 95220.0],       [95221.0, 95222.0, 95223.0, 95224.0, 95225.0, 95226.0],       [95227.0, 95228.0, 95229.0, 95230.0, 95231.0, 95232.0],       [95233.0, 95234.0, 95235.0, 95236.0, 95237.0, 95238.0],       [95239.0, 95240.0, 95241.0, 95242.0, 95243.0, 95244.0],       [95245.0, 95246.0, 95247.0, 95248.0, 95249.0, 95250.0],       [95251.0, 95252.0, 95253.0, 95254.0, 95255.0, 95256.0]]],     [[[95257.0, 95258.0, 95259.0, 95260.0, 95261.0, 95262.0],       [95263.0, 95264.0, 95265.0, 95266.0, 95267.0, 95268.0],       [95269.0, 95270.0, 95271.0, 95272.0, 95273.0, 95274.0],       [95275.0, 95276.0, 95277.0, 95278.0, 95279.0, 95280.0],       [95281.0, 95282.0, 95283.0, 95284.0, 95285.0, 95286.0],       [95287.0, 95288.0, 95289.0, 95290.0, 95291.0, 95292.0],       [95293.0, 95294.0, 95295.0, 95296.0, 95297.0, 95298.0]],      [[95299.0, 95300.0, 95301.0, 95302.0, 95303.0, 95304.0],       [95305.0, 95306.0, 95307.0, 95308.0, 95309.0, 95310.0],       [95311.0, 95312.0, 95313.0, 95314.0, 95315.0, 95316.0],       [95317.0, 95318.0, 95319.0, 95320.0, 95321.0, 95322.0],       [95323.0, 95324.0, 95325.0, 95326.0, 95327.0, 95328.0],       [95329.0, 95330.0, 95331.0, 95332.0, 95333.0, 95334.0],       [95335.0, 95336.0, 95337.0, 95338.0, 95339.0, 95340.0]],      [[95341.0, 95342.0, 95343.0, 95344.0, 95345.0, 95346.0],       [95347.0, 95348.0, 95349.0, 95350.0, 95351.0, 95352.0],       [95353.0, 95354.0, 95355.0, 95356.0, 95357.0, 95358.0],       [95359.0, 95360.0, 95361.0, 95362.0, 95363.0, 95364.0],       [95365.0, 95366.0, 95367.0, 95368.0, 95369.0, 95370.0],       [95371.0, 95372.0, 95373.0, 95374.0, 95375.0, 95376.0],       [95377.0, 95378.0, 95379.0, 95380.0, 95381.0, 95382.0]],      [[95383.0, 95384.0, 95385.0, 95386.0, 95387.0, 95388.0],       [95389.0, 95390.0, 95391.0, 95392.0, 95393.0, 95394.0],       [95395.0, 95396.0, 95397.0, 95398.0, 95399.0, 95400.0],       [95401.0, 95402.0, 95403.0, 95404.0, 95405.0, 95406.0],       [95407.0, 95408.0, 95409.0, 95410.0, 95411.0, 95412.0],       [95413.0, 95414.0, 95415.0, 95416.0, 95417.0, 95418.0],       [95419.0, 95420.0, 95421.0, 95422.0, 95423.0, 95424.0]],      [[95425.0, 95426.0, 95427.0, 95428.0, 95429.0, 95430.0],       [95431.0, 95432.0, 95433.0, 95434.0, 95435.0, 95436.0],       [95437.0, 95438.0, 95439.0, 95440.0, 95441.0, 95442.0],       [95443.0, 95444.0, 95445.0, 95446.0, 95447.0, 95448.0],       [95449.0, 95450.0, 95451.0, 95452.0, 95453.0, 95454.0],       [95455.0, 95456.0, 95457.0, 95458.0, 95459.0, 95460.0],       [95461.0, 95462.0, 95463.0, 95464.0, 95465.0, 95466.0]],      [[95467.0, 95468.0, 95469.0, 95470.0, 95471.0, 95472.0],       [95473.0, 95474.0, 95475.0, 95476.0, 95477.0, 95478.0],       [95479.0, 95480.0, 95481.0, 95482.0, 95483.0, 95484.0],       [95485.0, 95486.0, 95487.0, 95488.0, 95489.0, 95490.0],       [95491.0, 95492.0, 95493.0, 95494.0, 95495.0, 95496.0],       [95497.0, 95498.0, 95499.0, 95500.0, 95501.0, 95502.0],       [95503.0, 95504.0, 95505.0, 95506.0, 95507.0, 95508.0]]],     [[[95509.0, 95510.0, 95511.0, 95512.0, 95513.0, 95514.0],       [95515.0, 95516.0, 95517.0, 95518.0, 95519.0, 95520.0],       [95521.0, 95522.0, 95523.0, 95524.0, 95525.0, 95526.0],       [95527.0, 95528.0, 95529.0, 95530.0, 95531.0, 95532.0],       [95533.0, 95534.0, 95535.0, 95536.0, 95537.0, 95538.0],       [95539.0, 95540.0, 95541.0, 95542.0, 95543.0, 95544.0],       [95545.0, 95546.0, 95547.0, 95548.0, 95549.0, 95550.0]],      [[95551.0, 95552.0, 95553.0, 95554.0, 95555.0, 95556.0],       [95557.0, 95558.0, 95559.0, 95560.0, 95561.0, 95562.0],       [95563.0, 95564.0, 95565.0, 95566.0, 95567.0, 95568.0],       [95569.0, 95570.0, 95571.0, 95572.0, 95573.0, 95574.0],       [95575.0, 95576.0, 95577.0, 95578.0, 95579.0, 95580.0],       [95581.0, 95582.0, 95583.0, 95584.0, 95585.0, 95586.0],       [95587.0, 95588.0, 95589.0, 95590.0, 95591.0, 95592.0]],      [[95593.0, 95594.0, 95595.0, 95596.0, 95597.0, 95598.0],       [95599.0, 95600.0, 95601.0, 95602.0, 95603.0, 95604.0],       [95605.0, 95606.0, 95607.0, 95608.0, 95609.0, 95610.0],       [95611.0, 95612.0, 95613.0, 95614.0, 95615.0, 95616.0],       [95617.0, 95618.0, 95619.0, 95620.0, 95621.0, 95622.0],       [95623.0, 95624.0, 95625.0, 95626.0, 95627.0, 95628.0],       [95629.0, 95630.0, 95631.0, 95632.0, 95633.0, 95634.0]],      [[95635.0, 95636.0, 95637.0, 95638.0, 95639.0, 95640.0],       [95641.0, 95642.0, 95643.0, 95644.0, 95645.0, 95646.0],       [95647.0, 95648.0, 95649.0, 95650.0, 95651.0, 95652.0],       [95653.0, 95654.0, 95655.0, 95656.0, 95657.0, 95658.0],       [95659.0, 95660.0, 95661.0, 95662.0, 95663.0, 95664.0],       [95665.0, 95666.0, 95667.0, 95668.0, 95669.0, 95670.0],       [95671.0, 95672.0, 95673.0, 95674.0, 95675.0, 95676.0]],      [[95677.0, 95678.0, 95679.0, 95680.0, 95681.0, 95682.0],       [95683.0, 95684.0, 95685.0, 95686.0, 95687.0, 95688.0],       [95689.0, 95690.0, 95691.0, 95692.0, 95693.0, 95694.0],       [95695.0, 95696.0, 95697.0, 95698.0, 95699.0, 95700.0],       [95701.0, 95702.0, 95703.0, 95704.0, 95705.0, 95706.0],       [95707.0, 95708.0, 95709.0, 95710.0, 95711.0, 95712.0],       [95713.0, 95714.0, 95715.0, 95716.0, 95717.0, 95718.0]],      [[95719.0, 95720.0, 95721.0, 95722.0, 95723.0, 95724.0],       [95725.0, 95726.0, 95727.0, 95728.0, 95729.0, 95730.0],       [95731.0, 95732.0, 95733.0, 95734.0, 95735.0, 95736.0],       [95737.0, 95738.0, 95739.0, 95740.0, 95741.0, 95742.0],       [95743.0, 95744.0, 95745.0, 95746.0, 95747.0, 95748.0],       [95749.0, 95750.0, 95751.0, 95752.0, 95753.0, 95754.0],       [95755.0, 95756.0, 95757.0, 95758.0, 95759.0, 95760.0]]]],    [[[[95761.0, 95762.0, 95763.0, 95764.0, 95765.0, 95766.0],       [95767.0, 95768.0, 95769.0, 95770.0, 95771.0, 95772.0],       [95773.0, 95774.0, 95775.0, 95776.0, 95777.0, 95778.0],       [95779.0, 95780.0, 95781.0, 95782.0, 95783.0, 95784.0],       [95785.0, 95786.0, 95787.0, 95788.0, 95789.0, 95790.0],       [95791.0, 95792.0, 95793.0, 95794.0, 95795.0, 95796.0],       [95797.0, 95798.0, 95799.0, 95800.0, 95801.0, 95802.0]],      [[95803.0, 95804.0, 95805.0, 95806.0, 95807.0, 95808.0],       [95809.0, 95810.0, 95811.0, 95812.0, 95813.0, 95814.0],       [95815.0, 95816.0, 95817.0, 95818.0, 95819.0, 95820.0],       [95821.0, 95822.0, 95823.0, 95824.0, 95825.0, 95826.0],       [95827.0, 95828.0, 95829.0, 95830.0, 95831.0, 95832.0],       [95833.0, 95834.0, 95835.0, 95836.0, 95837.0, 95838.0],       [95839.0, 95840.0, 95841.0, 95842.0, 95843.0, 95844.0]],      [[95845.0, 95846.0, 95847.0, 95848.0, 95849.0, 95850.0],       [95851.0, 95852.0, 95853.0, 95854.0, 95855.0, 95856.0],       [95857.0, 95858.0, 95859.0, 95860.0, 95861.0, 95862.0],       [95863.0, 95864.0, 95865.0, 95866.0, 95867.0, 95868.0],       [95869.0, 95870.0, 95871.0, 95872.0, 95873.0, 95874.0],       [95875.0, 95876.0, 95877.0, 95878.0, 95879.0, 95880.0],       [95881.0, 95882.0, 95883.0, 95884.0, 95885.0, 95886.0]],      [[95887.0, 95888.0, 95889.0, 95890.0, 95891.0, 95892.0],       [95893.0, 95894.0, 95895.0, 95896.0, 95897.0, 95898.0],       [95899.0, 95900.0, 95901.0, 95902.0, 95903.0, 95904.0],       [95905.0, 95906.0, 95907.0, 95908.0, 95909.0, 95910.0],       [95911.0, 95912.0, 95913.0, 95914.0, 95915.0, 95916.0],       [95917.0, 95918.0, 95919.0, 95920.0, 95921.0, 95922.0],       [95923.0, 95924.0, 95925.0, 95926.0, 95927.0, 95928.0]],      [[95929.0, 95930.0, 95931.0, 95932.0, 95933.0, 95934.0],       [95935.0, 95936.0, 95937.0, 95938.0, 95939.0, 95940.0],       [95941.0, 95942.0, 95943.0, 95944.0, 95945.0, 95946.0],       [95947.0, 95948.0, 95949.0, 95950.0, 95951.0, 95952.0],       [95953.0, 95954.0, 95955.0, 95956.0, 95957.0, 95958.0],       [95959.0, 95960.0, 95961.0, 95962.0, 95963.0, 95964.0],       [95965.0, 95966.0, 95967.0, 95968.0, 95969.0, 95970.0]],      [[95971.0, 95972.0, 95973.0, 95974.0, 95975.0, 95976.0],       [95977.0, 95978.0, 95979.0, 95980.0, 95981.0, 95982.0],       [95983.0, 95984.0, 95985.0, 95986.0, 95987.0, 95988.0],       [95989.0, 95990.0, 95991.0, 95992.0, 95993.0, 95994.0],       [95995.0, 95996.0, 95997.0, 95998.0, 95999.0, 96000.0],       [96001.0, 96002.0, 96003.0, 96004.0, 96005.0, 96006.0],       [96007.0, 96008.0, 96009.0, 96010.0, 96011.0, 96012.0]]],     [[[96013.0, 96014.0, 96015.0, 96016.0, 96017.0, 96018.0],       [96019.0, 96020.0, 96021.0, 96022.0, 96023.0, 96024.0],       [96025.0, 96026.0, 96027.0, 96028.0, 96029.0, 96030.0],       [96031.0, 96032.0, 96033.0, 96034.0, 96035.0, 96036.0],       [96037.0, 96038.0, 96039.0, 96040.0, 96041.0, 96042.0],       [96043.0, 96044.0, 96045.0, 96046.0, 96047.0, 96048.0],       [96049.0, 96050.0, 96051.0, 96052.0, 96053.0, 96054.0]],      [[96055.0, 96056.0, 96057.0, 96058.0, 96059.0, 96060.0],       [96061.0, 96062.0, 96063.0, 96064.0, 96065.0, 96066.0],       [96067.0, 96068.0, 96069.0, 96070.0, 96071.0, 96072.0],       [96073.0, 96074.0, 96075.0, 96076.0, 96077.0, 96078.0],       [96079.0, 96080.0, 96081.0, 96082.0, 96083.0, 96084.0],       [96085.0, 96086.0, 96087.0, 96088.0, 96089.0, 96090.0],       [96091.0, 96092.0, 96093.0, 96094.0, 96095.0, 96096.0]],      [[96097.0, 96098.0, 96099.0, 96100.0, 96101.0, 96102.0],       [96103.0, 96104.0, 96105.0, 96106.0, 96107.0, 96108.0],       [96109.0, 96110.0, 96111.0, 96112.0, 96113.0, 96114.0],       [96115.0, 96116.0, 96117.0, 96118.0, 96119.0, 96120.0],       [96121.0, 96122.0, 96123.0, 96124.0, 96125.0, 96126.0],       [96127.0, 96128.0, 96129.0, 96130.0, 96131.0, 96132.0],       [96133.0, 96134.0, 96135.0, 96136.0, 96137.0, 96138.0]],      [[96139.0, 96140.0, 96141.0, 96142.0, 96143.0, 96144.0],       [96145.0, 96146.0, 96147.0, 96148.0, 96149.0, 96150.0],       [96151.0, 96152.0, 96153.0, 96154.0, 96155.0, 96156.0],       [96157.0, 96158.0, 96159.0, 96160.0, 96161.0, 96162.0],       [96163.0, 96164.0, 96165.0, 96166.0, 96167.0, 96168.0],       [96169.0, 96170.0, 96171.0, 96172.0, 96173.0, 96174.0],       [96175.0, 96176.0, 96177.0, 96178.0, 96179.0, 96180.0]],      [[96181.0, 96182.0, 96183.0, 96184.0, 96185.0, 96186.0],       [96187.0, 96188.0, 96189.0, 96190.0, 96191.0, 96192.0],       [96193.0, 96194.0, 96195.0, 96196.0, 96197.0, 96198.0],       [96199.0, 96200.0, 96201.0, 96202.0, 96203.0, 96204.0],       [96205.0, 96206.0, 96207.0, 96208.0, 96209.0, 96210.0],       [96211.0, 96212.0, 96213.0, 96214.0, 96215.0, 96216.0],       [96217.0, 96218.0, 96219.0, 96220.0, 96221.0, 96222.0]],      [[96223.0, 96224.0, 96225.0, 96226.0, 96227.0, 96228.0],       [96229.0, 96230.0, 96231.0, 96232.0, 96233.0, 96234.0],       [96235.0, 96236.0, 96237.0, 96238.0, 96239.0, 96240.0],       [96241.0, 96242.0, 96243.0, 96244.0, 96245.0, 96246.0],       [96247.0, 96248.0, 96249.0, 96250.0, 96251.0, 96252.0],       [96253.0, 96254.0, 96255.0, 96256.0, 96257.0, 96258.0],       [96259.0, 96260.0, 96261.0, 96262.0, 96263.0, 96264.0]]],     [[[96265.0, 96266.0, 96267.0, 96268.0, 96269.0, 96270.0],       [96271.0, 96272.0, 96273.0, 96274.0, 96275.0, 96276.0],       [96277.0, 96278.0, 96279.0, 96280.0, 96281.0, 96282.0],       [96283.0, 96284.0, 96285.0, 96286.0, 96287.0, 96288.0],       [96289.0, 96290.0, 96291.0, 96292.0, 96293.0, 96294.0],       [96295.0, 96296.0, 96297.0, 96298.0, 96299.0, 96300.0],       [96301.0, 96302.0, 96303.0, 96304.0, 96305.0, 96306.0]],      [[96307.0, 96308.0, 96309.0, 96310.0, 96311.0, 96312.0],       [96313.0, 96314.0, 96315.0, 96316.0, 96317.0, 96318.0],       [96319.0, 96320.0, 96321.0, 96322.0, 96323.0, 96324.0],       [96325.0, 96326.0, 96327.0, 96328.0, 96329.0, 96330.0],       [96331.0, 96332.0, 96333.0, 96334.0, 96335.0, 96336.0],       [96337.0, 96338.0, 96339.0, 96340.0, 96341.0, 96342.0],       [96343.0, 96344.0, 96345.0, 96346.0, 96347.0, 96348.0]],      [[96349.0, 96350.0, 96351.0, 96352.0, 96353.0, 96354.0],       [96355.0, 96356.0, 96357.0, 96358.0, 96359.0, 96360.0],       [96361.0, 96362.0, 96363.0, 96364.0, 96365.0, 96366.0],       [96367.0, 96368.0, 96369.0, 96370.0, 96371.0, 96372.0],       [96373.0, 96374.0, 96375.0, 96376.0, 96377.0, 96378.0],       [96379.0, 96380.0, 96381.0, 96382.0, 96383.0, 96384.0],       [96385.0, 96386.0, 96387.0, 96388.0, 96389.0, 96390.0]],      [[96391.0, 96392.0, 96393.0, 96394.0, 96395.0, 96396.0],       [96397.0, 96398.0, 96399.0, 96400.0, 96401.0, 96402.0],       [96403.0, 96404.0, 96405.0, 96406.0, 96407.0, 96408.0],       [96409.0, 96410.0, 96411.0, 96412.0, 96413.0, 96414.0],       [96415.0, 96416.0, 96417.0, 96418.0, 96419.0, 96420.0],       [96421.0, 96422.0, 96423.0, 96424.0, 96425.0, 96426.0],       [96427.0, 96428.0, 96429.0, 96430.0, 96431.0, 96432.0]],      [[96433.0, 96434.0, 96435.0, 96436.0, 96437.0, 96438.0],       [96439.0, 96440.0, 96441.0, 96442.0, 96443.0, 96444.0],       [96445.0, 96446.0, 96447.0, 96448.0, 96449.0, 96450.0],       [96451.0, 96452.0, 96453.0, 96454.0, 96455.0, 96456.0],       [96457.0, 96458.0, 96459.0, 96460.0, 96461.0, 96462.0],       [96463.0, 96464.0, 96465.0, 96466.0, 96467.0, 96468.0],       [96469.0, 96470.0, 96471.0, 96472.0, 96473.0, 96474.0]],      [[96475.0, 96476.0, 96477.0, 96478.0, 96479.0, 96480.0],       [96481.0, 96482.0, 96483.0, 96484.0, 96485.0, 96486.0],       [96487.0, 96488.0, 96489.0, 96490.0, 96491.0, 96492.0],       [96493.0, 96494.0, 96495.0, 96496.0, 96497.0, 96498.0],       [96499.0, 96500.0, 96501.0, 96502.0, 96503.0, 96504.0],       [96505.0, 96506.0, 96507.0, 96508.0, 96509.0, 96510.0],       [96511.0, 96512.0, 96513.0, 96514.0, 96515.0, 96516.0]]],     [[[96517.0, 96518.0, 96519.0, 96520.0, 96521.0, 96522.0],       [96523.0, 96524.0, 96525.0, 96526.0, 96527.0, 96528.0],       [96529.0, 96530.0, 96531.0, 96532.0, 96533.0, 96534.0],       [96535.0, 96536.0, 96537.0, 96538.0, 96539.0, 96540.0],       [96541.0, 96542.0, 96543.0, 96544.0, 96545.0, 96546.0],       [96547.0, 96548.0, 96549.0, 96550.0, 96551.0, 96552.0],       [96553.0, 96554.0, 96555.0, 96556.0, 96557.0, 96558.0]],      [[96559.0, 96560.0, 96561.0, 96562.0, 96563.0, 96564.0],       [96565.0, 96566.0, 96567.0, 96568.0, 96569.0, 96570.0],       [96571.0, 96572.0, 96573.0, 96574.0, 96575.0, 96576.0],       [96577.0, 96578.0, 96579.0, 96580.0, 96581.0, 96582.0],       [96583.0, 96584.0, 96585.0, 96586.0, 96587.0, 96588.0],       [96589.0, 96590.0, 96591.0, 96592.0, 96593.0, 96594.0],       [96595.0, 96596.0, 96597.0, 96598.0, 96599.0, 96600.0]],      [[96601.0, 96602.0, 96603.0, 96604.0, 96605.0, 96606.0],       [96607.0, 96608.0, 96609.0, 96610.0, 96611.0, 96612.0],       [96613.0, 96614.0, 96615.0, 96616.0, 96617.0, 96618.0],       [96619.0, 96620.0, 96621.0, 96622.0, 96623.0, 96624.0],       [96625.0, 96626.0, 96627.0, 96628.0, 96629.0, 96630.0],       [96631.0, 96632.0, 96633.0, 96634.0, 96635.0, 96636.0],       [96637.0, 96638.0, 96639.0, 96640.0, 96641.0, 96642.0]],      [[96643.0, 96644.0, 96645.0, 96646.0, 96647.0, 96648.0],       [96649.0, 96650.0, 96651.0, 96652.0, 96653.0, 96654.0],       [96655.0, 96656.0, 96657.0, 96658.0, 96659.0, 96660.0],       [96661.0, 96662.0, 96663.0, 96664.0, 96665.0, 96666.0],       [96667.0, 96668.0, 96669.0, 96670.0, 96671.0, 96672.0],       [96673.0, 96674.0, 96675.0, 96676.0, 96677.0, 96678.0],       [96679.0, 96680.0, 96681.0, 96682.0, 96683.0, 96684.0]],      [[96685.0, 96686.0, 96687.0, 96688.0, 96689.0, 96690.0],       [96691.0, 96692.0, 96693.0, 96694.0, 96695.0, 96696.0],       [96697.0, 96698.0, 96699.0, 96700.0, 96701.0, 96702.0],       [96703.0, 96704.0, 96705.0, 96706.0, 96707.0, 96708.0],       [96709.0, 96710.0, 96711.0, 96712.0, 96713.0, 96714.0],       [96715.0, 96716.0, 96717.0, 96718.0, 96719.0, 96720.0],       [96721.0, 96722.0, 96723.0, 96724.0, 96725.0, 96726.0]],      [[96727.0, 96728.0, 96729.0, 96730.0, 96731.0, 96732.0],       [96733.0, 96734.0, 96735.0, 96736.0, 96737.0, 96738.0],       [96739.0, 96740.0, 96741.0, 96742.0, 96743.0, 96744.0],       [96745.0, 96746.0, 96747.0, 96748.0, 96749.0, 96750.0],       [96751.0, 96752.0, 96753.0, 96754.0, 96755.0, 96756.0],       [96757.0, 96758.0, 96759.0, 96760.0, 96761.0, 96762.0],       [96763.0, 96764.0, 96765.0, 96766.0, 96767.0, 96768.0]]]]],   [[[[[96769.0, 96770.0, 96771.0, 96772.0, 96773.0, 96774.0],       [96775.0, 96776.0, 96777.0, 96778.0, 96779.0, 96780.0],       [96781.0, 96782.0, 96783.0, 96784.0, 96785.0, 96786.0],       [96787.0, 96788.0, 96789.0, 96790.0, 96791.0, 96792.0],       [96793.0, 96794.0, 96795.0, 96796.0, 96797.0, 96798.0],       [96799.0, 96800.0, 96801.0, 96802.0, 96803.0, 96804.0],       [96805.0, 96806.0, 96807.0, 96808.0, 96809.0, 96810.0]],      [[96811.0, 96812.0, 96813.0, 96814.0, 96815.0, 96816.0],       [96817.0, 96818.0, 96819.0, 96820.0, 96821.0, 96822.0],       [96823.0, 96824.0, 96825.0, 96826.0, 96827.0, 96828.0],       [96829.0, 96830.0, 96831.0, 96832.0, 96833.0, 96834.0],       [96835.0, 96836.0, 96837.0, 96838.0, 96839.0, 96840.0],       [96841.0, 96842.0, 96843.0, 96844.0, 96845.0, 96846.0],       [96847.0, 96848.0, 96849.0, 96850.0, 96851.0, 96852.0]],      [[96853.0, 96854.0, 96855.0, 96856.0, 96857.0, 96858.0],       [96859.0, 96860.0, 96861.0, 96862.0, 96863.0, 96864.0],       [96865.0, 96866.0, 96867.0, 96868.0, 96869.0, 96870.0],       [96871.0, 96872.0, 96873.0, 96874.0, 96875.0, 96876.0],       [96877.0, 96878.0, 96879.0, 96880.0, 96881.0, 96882.0],       [96883.0, 96884.0, 96885.0, 96886.0, 96887.0, 96888.0],       [96889.0, 96890.0, 96891.0, 96892.0, 96893.0, 96894.0]],      [[96895.0, 96896.0, 96897.0, 96898.0, 96899.0, 96900.0],       [96901.0, 96902.0, 96903.0, 96904.0, 96905.0, 96906.0],       [96907.0, 96908.0, 96909.0, 96910.0, 96911.0, 96912.0],       [96913.0, 96914.0, 96915.0, 96916.0, 96917.0, 96918.0],       [96919.0, 96920.0, 96921.0, 96922.0, 96923.0, 96924.0],       [96925.0, 96926.0, 96927.0, 96928.0, 96929.0, 96930.0],       [96931.0, 96932.0, 96933.0, 96934.0, 96935.0, 96936.0]],      [[96937.0, 96938.0, 96939.0, 96940.0, 96941.0, 96942.0],       [96943.0, 96944.0, 96945.0, 96946.0, 96947.0, 96948.0],       [96949.0, 96950.0, 96951.0, 96952.0, 96953.0, 96954.0],       [96955.0, 96956.0, 96957.0, 96958.0, 96959.0, 96960.0],       [96961.0, 96962.0, 96963.0, 96964.0, 96965.0, 96966.0],       [96967.0, 96968.0, 96969.0, 96970.0, 96971.0, 96972.0],       [96973.0, 96974.0, 96975.0, 96976.0, 96977.0, 96978.0]],      [[96979.0, 96980.0, 96981.0, 96982.0, 96983.0, 96984.0],       [96985.0, 96986.0, 96987.0, 96988.0, 96989.0, 96990.0],       [96991.0, 96992.0, 96993.0, 96994.0, 96995.0, 96996.0],       [96997.0, 96998.0, 96999.0, 97000.0, 97001.0, 97002.0],       [97003.0, 97004.0, 97005.0, 97006.0, 97007.0, 97008.0],       [97009.0, 97010.0, 97011.0, 97012.0, 97013.0, 97014.0],       [97015.0, 97016.0, 97017.0, 97018.0, 97019.0, 97020.0]]],     [[[97021.0, 97022.0, 97023.0, 97024.0, 97025.0, 97026.0],       [97027.0, 97028.0, 97029.0, 97030.0, 97031.0, 97032.0],       [97033.0, 97034.0, 97035.0, 97036.0, 97037.0, 97038.0],       [97039.0, 97040.0, 97041.0, 97042.0, 97043.0, 97044.0],       [97045.0, 97046.0, 97047.0, 97048.0, 97049.0, 97050.0],       [97051.0, 97052.0, 97053.0, 97054.0, 97055.0, 97056.0],       [97057.0, 97058.0, 97059.0, 97060.0, 97061.0, 97062.0]],      [[97063.0, 97064.0, 97065.0, 97066.0, 97067.0, 97068.0],       [97069.0, 97070.0, 97071.0, 97072.0, 97073.0, 97074.0],       [97075.0, 97076.0, 97077.0, 97078.0, 97079.0, 97080.0],       [97081.0, 97082.0, 97083.0, 97084.0, 97085.0, 97086.0],       [97087.0, 97088.0, 97089.0, 97090.0, 97091.0, 97092.0],       [97093.0, 97094.0, 97095.0, 97096.0, 97097.0, 97098.0],       [97099.0, 97100.0, 97101.0, 97102.0, 97103.0, 97104.0]],      [[97105.0, 97106.0, 97107.0, 97108.0, 97109.0, 97110.0],       [97111.0, 97112.0, 97113.0, 97114.0, 97115.0, 97116.0],       [97117.0, 97118.0, 97119.0, 97120.0, 97121.0, 97122.0],       [97123.0, 97124.0, 97125.0, 97126.0, 97127.0, 97128.0],       [97129.0, 97130.0, 97131.0, 97132.0, 97133.0, 97134.0],       [97135.0, 97136.0, 97137.0, 97138.0, 97139.0, 97140.0],       [97141.0, 97142.0, 97143.0, 97144.0, 97145.0, 97146.0]],      [[97147.0, 97148.0, 97149.0, 97150.0, 97151.0, 97152.0],       [97153.0, 97154.0, 97155.0, 97156.0, 97157.0, 97158.0],       [97159.0, 97160.0, 97161.0, 97162.0, 97163.0, 97164.0],       [97165.0, 97166.0, 97167.0, 97168.0, 97169.0, 97170.0],       [97171.0, 97172.0, 97173.0, 97174.0, 97175.0, 97176.0],       [97177.0, 97178.0, 97179.0, 97180.0, 97181.0, 97182.0],       [97183.0, 97184.0, 97185.0, 97186.0, 97187.0, 97188.0]],      [[97189.0, 97190.0, 97191.0, 97192.0, 97193.0, 97194.0],       [97195.0, 97196.0, 97197.0, 97198.0, 97199.0, 97200.0],       [97201.0, 97202.0, 97203.0, 97204.0, 97205.0, 97206.0],       [97207.0, 97208.0, 97209.0, 97210.0, 97211.0, 97212.0],       [97213.0, 97214.0, 97215.0, 97216.0, 97217.0, 97218.0],       [97219.0, 97220.0, 97221.0, 97222.0, 97223.0, 97224.0],       [97225.0, 97226.0, 97227.0, 97228.0, 97229.0, 97230.0]],      [[97231.0, 97232.0, 97233.0, 97234.0, 97235.0, 97236.0],       [97237.0, 97238.0, 97239.0, 97240.0, 97241.0, 97242.0],       [97243.0, 97244.0, 97245.0, 97246.0, 97247.0, 97248.0],       [97249.0, 97250.0, 97251.0, 97252.0, 97253.0, 97254.0],       [97255.0, 97256.0, 97257.0, 97258.0, 97259.0, 97260.0],       [97261.0, 97262.0, 97263.0, 97264.0, 97265.0, 97266.0],       [97267.0, 97268.0, 97269.0, 97270.0, 97271.0, 97272.0]]],     [[[97273.0, 97274.0, 97275.0, 97276.0, 97277.0, 97278.0],       [97279.0, 97280.0, 97281.0, 97282.0, 97283.0, 97284.0],       [97285.0, 97286.0, 97287.0, 97288.0, 97289.0, 97290.0],       [97291.0, 97292.0, 97293.0, 97294.0, 97295.0, 97296.0],       [97297.0, 97298.0, 97299.0, 97300.0, 97301.0, 97302.0],       [97303.0, 97304.0, 97305.0, 97306.0, 97307.0, 97308.0],       [97309.0, 97310.0, 97311.0, 97312.0, 97313.0, 97314.0]],      [[97315.0, 97316.0, 97317.0, 97318.0, 97319.0, 97320.0],       [97321.0, 97322.0, 97323.0, 97324.0, 97325.0, 97326.0],       [97327.0, 97328.0, 97329.0, 97330.0, 97331.0, 97332.0],       [97333.0, 97334.0, 97335.0, 97336.0, 97337.0, 97338.0],       [97339.0, 97340.0, 97341.0, 97342.0, 97343.0, 97344.0],       [97345.0, 97346.0, 97347.0, 97348.0, 97349.0, 97350.0],       [97351.0, 97352.0, 97353.0, 97354.0, 97355.0, 97356.0]],      [[97357.0, 97358.0, 97359.0, 97360.0, 97361.0, 97362.0],       [97363.0, 97364.0, 97365.0, 97366.0, 97367.0, 97368.0],       [97369.0, 97370.0, 97371.0, 97372.0, 97373.0, 97374.0],       [97375.0, 97376.0, 97377.0, 97378.0, 97379.0, 97380.0],       [97381.0, 97382.0, 97383.0, 97384.0, 97385.0, 97386.0],       [97387.0, 97388.0, 97389.0, 97390.0, 97391.0, 97392.0],       [97393.0, 97394.0, 97395.0, 97396.0, 97397.0, 97398.0]],      [[97399.0, 97400.0, 97401.0, 97402.0, 97403.0, 97404.0],       [97405.0, 97406.0, 97407.0, 97408.0, 97409.0, 97410.0],       [97411.0, 97412.0, 97413.0, 97414.0, 97415.0, 97416.0],       [97417.0, 97418.0, 97419.0, 97420.0, 97421.0, 97422.0],       [97423.0, 97424.0, 97425.0, 97426.0, 97427.0, 97428.0],       [97429.0, 97430.0, 97431.0, 97432.0, 97433.0, 97434.0],       [97435.0, 97436.0, 97437.0, 97438.0, 97439.0, 97440.0]],      [[97441.0, 97442.0, 97443.0, 97444.0, 97445.0, 97446.0],       [97447.0, 97448.0, 97449.0, 97450.0, 97451.0, 97452.0],       [97453.0, 97454.0, 97455.0, 97456.0, 97457.0, 97458.0],       [97459.0, 97460.0, 97461.0, 97462.0, 97463.0, 97464.0],       [97465.0, 97466.0, 97467.0, 97468.0, 97469.0, 97470.0],       [97471.0, 97472.0, 97473.0, 97474.0, 97475.0, 97476.0],       [97477.0, 97478.0, 97479.0, 97480.0, 97481.0, 97482.0]],      [[97483.0, 97484.0, 97485.0, 97486.0, 97487.0, 97488.0],       [97489.0, 97490.0, 97491.0, 97492.0, 97493.0, 97494.0],       [97495.0, 97496.0, 97497.0, 97498.0, 97499.0, 97500.0],       [97501.0, 97502.0, 97503.0, 97504.0, 97505.0, 97506.0],       [97507.0, 97508.0, 97509.0, 97510.0, 97511.0, 97512.0],       [97513.0, 97514.0, 97515.0, 97516.0, 97517.0, 97518.0],       [97519.0, 97520.0, 97521.0, 97522.0, 97523.0, 97524.0]]],     [[[97525.0, 97526.0, 97527.0, 97528.0, 97529.0, 97530.0],       [97531.0, 97532.0, 97533.0, 97534.0, 97535.0, 97536.0],       [97537.0, 97538.0, 97539.0, 97540.0, 97541.0, 97542.0],       [97543.0, 97544.0, 97545.0, 97546.0, 97547.0, 97548.0],       [97549.0, 97550.0, 97551.0, 97552.0, 97553.0, 97554.0],       [97555.0, 97556.0, 97557.0, 97558.0, 97559.0, 97560.0],       [97561.0, 97562.0, 97563.0, 97564.0, 97565.0, 97566.0]],      [[97567.0, 97568.0, 97569.0, 97570.0, 97571.0, 97572.0],       [97573.0, 97574.0, 97575.0, 97576.0, 97577.0, 97578.0],       [97579.0, 97580.0, 97581.0, 97582.0, 97583.0, 97584.0],       [97585.0, 97586.0, 97587.0, 97588.0, 97589.0, 97590.0],       [97591.0, 97592.0, 97593.0, 97594.0, 97595.0, 97596.0],       [97597.0, 97598.0, 97599.0, 97600.0, 97601.0, 97602.0],       [97603.0, 97604.0, 97605.0, 97606.0, 97607.0, 97608.0]],      [[97609.0, 97610.0, 97611.0, 97612.0, 97613.0, 97614.0],       [97615.0, 97616.0, 97617.0, 97618.0, 97619.0, 97620.0],       [97621.0, 97622.0, 97623.0, 97624.0, 97625.0, 97626.0],       [97627.0, 97628.0, 97629.0, 97630.0, 97631.0, 97632.0],       [97633.0, 97634.0, 97635.0, 97636.0, 97637.0, 97638.0],       [97639.0, 97640.0, 97641.0, 97642.0, 97643.0, 97644.0],       [97645.0, 97646.0, 97647.0, 97648.0, 97649.0, 97650.0]],      [[97651.0, 97652.0, 97653.0, 97654.0, 97655.0, 97656.0],       [97657.0, 97658.0, 97659.0, 97660.0, 97661.0, 97662.0],       [97663.0, 97664.0, 97665.0, 97666.0, 97667.0, 97668.0],       [97669.0, 97670.0, 97671.0, 97672.0, 97673.0, 97674.0],       [97675.0, 97676.0, 97677.0, 97678.0, 97679.0, 97680.0],       [97681.0, 97682.0, 97683.0, 97684.0, 97685.0, 97686.0],       [97687.0, 97688.0, 97689.0, 97690.0, 97691.0, 97692.0]],      [[97693.0, 97694.0, 97695.0, 97696.0, 97697.0, 97698.0],       [97699.0, 97700.0, 97701.0, 97702.0, 97703.0, 97704.0],       [97705.0, 97706.0, 97707.0, 97708.0, 97709.0, 97710.0],       [97711.0, 97712.0, 97713.0, 97714.0, 97715.0, 97716.0],       [97717.0, 97718.0, 97719.0, 97720.0, 97721.0, 97722.0],       [97723.0, 97724.0, 97725.0, 97726.0, 97727.0, 97728.0],       [97729.0, 97730.0, 97731.0, 97732.0, 97733.0, 97734.0]],      [[97735.0, 97736.0, 97737.0, 97738.0, 97739.0, 97740.0],       [97741.0, 97742.0, 97743.0, 97744.0, 97745.0, 97746.0],       [97747.0, 97748.0, 97749.0, 97750.0, 97751.0, 97752.0],       [97753.0, 97754.0, 97755.0, 97756.0, 97757.0, 97758.0],       [97759.0, 97760.0, 97761.0, 97762.0, 97763.0, 97764.0],       [97765.0, 97766.0, 97767.0, 97768.0, 97769.0, 97770.0],       [97771.0, 97772.0, 97773.0, 97774.0, 97775.0, 97776.0]]]],    [[[[97777.0, 97778.0, 97779.0, 97780.0, 97781.0, 97782.0],       [97783.0, 97784.0, 97785.0, 97786.0, 97787.0, 97788.0],       [97789.0, 97790.0, 97791.0, 97792.0, 97793.0, 97794.0],       [97795.0, 97796.0, 97797.0, 97798.0, 97799.0, 97800.0],       [97801.0, 97802.0, 97803.0, 97804.0, 97805.0, 97806.0],       [97807.0, 97808.0, 97809.0, 97810.0, 97811.0, 97812.0],       [97813.0, 97814.0, 97815.0, 97816.0, 97817.0, 97818.0]],      [[97819.0, 97820.0, 97821.0, 97822.0, 97823.0, 97824.0],       [97825.0, 97826.0, 97827.0, 97828.0, 97829.0, 97830.0],       [97831.0, 97832.0, 97833.0, 97834.0, 97835.0, 97836.0],       [97837.0, 97838.0, 97839.0, 97840.0, 97841.0, 97842.0],       [97843.0, 97844.0, 97845.0, 97846.0, 97847.0, 97848.0],       [97849.0, 97850.0, 97851.0, 97852.0, 97853.0, 97854.0],       [97855.0, 97856.0, 97857.0, 97858.0, 97859.0, 97860.0]],      [[97861.0, 97862.0, 97863.0, 97864.0, 97865.0, 97866.0],       [97867.0, 97868.0, 97869.0, 97870.0, 97871.0, 97872.0],       [97873.0, 97874.0, 97875.0, 97876.0, 97877.0, 97878.0],       [97879.0, 97880.0, 97881.0, 97882.0, 97883.0, 97884.0],       [97885.0, 97886.0, 97887.0, 97888.0, 97889.0, 97890.0],       [97891.0, 97892.0, 97893.0, 97894.0, 97895.0, 97896.0],       [97897.0, 97898.0, 97899.0, 97900.0, 97901.0, 97902.0]],      [[97903.0, 97904.0, 97905.0, 97906.0, 97907.0, 97908.0],       [97909.0, 97910.0, 97911.0, 97912.0, 97913.0, 97914.0],       [97915.0, 97916.0, 97917.0, 97918.0, 97919.0, 97920.0],       [97921.0, 97922.0, 97923.0, 97924.0, 97925.0, 97926.0],       [97927.0, 97928.0, 97929.0, 97930.0, 97931.0, 97932.0],       [97933.0, 97934.0, 97935.0, 97936.0, 97937.0, 97938.0],       [97939.0, 97940.0, 97941.0, 97942.0, 97943.0, 97944.0]],      [[97945.0, 97946.0, 97947.0, 97948.0, 97949.0, 97950.0],       [97951.0, 97952.0, 97953.0, 97954.0, 97955.0, 97956.0],       [97957.0, 97958.0, 97959.0, 97960.0, 97961.0, 97962.0],       [97963.0, 97964.0, 97965.0, 97966.0, 97967.0, 97968.0],       [97969.0, 97970.0, 97971.0, 97972.0, 97973.0, 97974.0],       [97975.0, 97976.0, 97977.0, 97978.0, 97979.0, 97980.0],       [97981.0, 97982.0, 97983.0, 97984.0, 97985.0, 97986.0]],      [[97987.0, 97988.0, 97989.0, 97990.0, 97991.0, 97992.0],       [97993.0, 97994.0, 97995.0, 97996.0, 97997.0, 97998.0],       [97999.0, 98000.0, 98001.0, 98002.0, 98003.0, 98004.0],       [98005.0, 98006.0, 98007.0, 98008.0, 98009.0, 98010.0],       [98011.0, 98012.0, 98013.0, 98014.0, 98015.0, 98016.0],       [98017.0, 98018.0, 98019.0, 98020.0, 98021.0, 98022.0],       [98023.0, 98024.0, 98025.0, 98026.0, 98027.0, 98028.0]]],     [[[98029.0, 98030.0, 98031.0, 98032.0, 98033.0, 98034.0],       [98035.0, 98036.0, 98037.0, 98038.0, 98039.0, 98040.0],       [98041.0, 98042.0, 98043.0, 98044.0, 98045.0, 98046.0],       [98047.0, 98048.0, 98049.0, 98050.0, 98051.0, 98052.0],       [98053.0, 98054.0, 98055.0, 98056.0, 98057.0, 98058.0],       [98059.0, 98060.0, 98061.0, 98062.0, 98063.0, 98064.0],       [98065.0, 98066.0, 98067.0, 98068.0, 98069.0, 98070.0]],      [[98071.0, 98072.0, 98073.0, 98074.0, 98075.0, 98076.0],       [98077.0, 98078.0, 98079.0, 98080.0, 98081.0, 98082.0],       [98083.0, 98084.0, 98085.0, 98086.0, 98087.0, 98088.0],       [98089.0, 98090.0, 98091.0, 98092.0, 98093.0, 98094.0],       [98095.0, 98096.0, 98097.0, 98098.0, 98099.0, 98100.0],       [98101.0, 98102.0, 98103.0, 98104.0, 98105.0, 98106.0],       [98107.0, 98108.0, 98109.0, 98110.0, 98111.0, 98112.0]],      [[98113.0, 98114.0, 98115.0, 98116.0, 98117.0, 98118.0],       [98119.0, 98120.0, 98121.0, 98122.0, 98123.0, 98124.0],       [98125.0, 98126.0, 98127.0, 98128.0, 98129.0, 98130.0],       [98131.0, 98132.0, 98133.0, 98134.0, 98135.0, 98136.0],       [98137.0, 98138.0, 98139.0, 98140.0, 98141.0, 98142.0],       [98143.0, 98144.0, 98145.0, 98146.0, 98147.0, 98148.0],       [98149.0, 98150.0, 98151.0, 98152.0, 98153.0, 98154.0]],      [[98155.0, 98156.0, 98157.0, 98158.0, 98159.0, 98160.0],       [98161.0, 98162.0, 98163.0, 98164.0, 98165.0, 98166.0],       [98167.0, 98168.0, 98169.0, 98170.0, 98171.0, 98172.0],       [98173.0, 98174.0, 98175.0, 98176.0, 98177.0, 98178.0],       [98179.0, 98180.0, 98181.0, 98182.0, 98183.0, 98184.0],       [98185.0, 98186.0, 98187.0, 98188.0, 98189.0, 98190.0],       [98191.0, 98192.0, 98193.0, 98194.0, 98195.0, 98196.0]],      [[98197.0, 98198.0, 98199.0, 98200.0, 98201.0, 98202.0],       [98203.0, 98204.0, 98205.0, 98206.0, 98207.0, 98208.0],       [98209.0, 98210.0, 98211.0, 98212.0, 98213.0, 98214.0],       [98215.0, 98216.0, 98217.0, 98218.0, 98219.0, 98220.0],       [98221.0, 98222.0, 98223.0, 98224.0, 98225.0, 98226.0],       [98227.0, 98228.0, 98229.0, 98230.0, 98231.0, 98232.0],       [98233.0, 98234.0, 98235.0, 98236.0, 98237.0, 98238.0]],      [[98239.0, 98240.0, 98241.0, 98242.0, 98243.0, 98244.0],       [98245.0, 98246.0, 98247.0, 98248.0, 98249.0, 98250.0],       [98251.0, 98252.0, 98253.0, 98254.0, 98255.0, 98256.0],       [98257.0, 98258.0, 98259.0, 98260.0, 98261.0, 98262.0],       [98263.0, 98264.0, 98265.0, 98266.0, 98267.0, 98268.0],       [98269.0, 98270.0, 98271.0, 98272.0, 98273.0, 98274.0],       [98275.0, 98276.0, 98277.0, 98278.0, 98279.0, 98280.0]]],     [[[98281.0, 98282.0, 98283.0, 98284.0, 98285.0, 98286.0],       [98287.0, 98288.0, 98289.0, 98290.0, 98291.0, 98292.0],       [98293.0, 98294.0, 98295.0, 98296.0, 98297.0, 98298.0],       [98299.0, 98300.0, 98301.0, 98302.0, 98303.0, 98304.0],       [98305.0, 98306.0, 98307.0, 98308.0, 98309.0, 98310.0],       [98311.0, 98312.0, 98313.0, 98314.0, 98315.0, 98316.0],       [98317.0, 98318.0, 98319.0, 98320.0, 98321.0, 98322.0]],      [[98323.0, 98324.0, 98325.0, 98326.0, 98327.0, 98328.0],       [98329.0, 98330.0, 98331.0, 98332.0, 98333.0, 98334.0],       [98335.0, 98336.0, 98337.0, 98338.0, 98339.0, 98340.0],       [98341.0, 98342.0, 98343.0, 98344.0, 98345.0, 98346.0],       [98347.0, 98348.0, 98349.0, 98350.0, 98351.0, 98352.0],       [98353.0, 98354.0, 98355.0, 98356.0, 98357.0, 98358.0],       [98359.0, 98360.0, 98361.0, 98362.0, 98363.0, 98364.0]],      [[98365.0, 98366.0, 98367.0, 98368.0, 98369.0, 98370.0],       [98371.0, 98372.0, 98373.0, 98374.0, 98375.0, 98376.0],       [98377.0, 98378.0, 98379.0, 98380.0, 98381.0, 98382.0],       [98383.0, 98384.0, 98385.0, 98386.0, 98387.0, 98388.0],       [98389.0, 98390.0, 98391.0, 98392.0, 98393.0, 98394.0],       [98395.0, 98396.0, 98397.0, 98398.0, 98399.0, 98400.0],       [98401.0, 98402.0, 98403.0, 98404.0, 98405.0, 98406.0]],      [[98407.0, 98408.0, 98409.0, 98410.0, 98411.0, 98412.0],       [98413.0, 98414.0, 98415.0, 98416.0, 98417.0, 98418.0],       [98419.0, 98420.0, 98421.0, 98422.0, 98423.0, 98424.0],       [98425.0, 98426.0, 98427.0, 98428.0, 98429.0, 98430.0],       [98431.0, 98432.0, 98433.0, 98434.0, 98435.0, 98436.0],       [98437.0, 98438.0, 98439.0, 98440.0, 98441.0, 98442.0],       [98443.0, 98444.0, 98445.0, 98446.0, 98447.0, 98448.0]],      [[98449.0, 98450.0, 98451.0, 98452.0, 98453.0, 98454.0],       [98455.0, 98456.0, 98457.0, 98458.0, 98459.0, 98460.0],       [98461.0, 98462.0, 98463.0, 98464.0, 98465.0, 98466.0],       [98467.0, 98468.0, 98469.0, 98470.0, 98471.0, 98472.0],       [98473.0, 98474.0, 98475.0, 98476.0, 98477.0, 98478.0],       [98479.0, 98480.0, 98481.0, 98482.0, 98483.0, 98484.0],       [98485.0, 98486.0, 98487.0, 98488.0, 98489.0, 98490.0]],      [[98491.0, 98492.0, 98493.0, 98494.0, 98495.0, 98496.0],       [98497.0, 98498.0, 98499.0, 98500.0, 98501.0, 98502.0],       [98503.0, 98504.0, 98505.0, 98506.0, 98507.0, 98508.0],       [98509.0, 98510.0, 98511.0, 98512.0, 98513.0, 98514.0],       [98515.0, 98516.0, 98517.0, 98518.0, 98519.0, 98520.0],       [98521.0, 98522.0, 98523.0, 98524.0, 98525.0, 98526.0],       [98527.0, 98528.0, 98529.0, 98530.0, 98531.0, 98532.0]]],     [[[98533.0, 98534.0, 98535.0, 98536.0, 98537.0, 98538.0],       [98539.0, 98540.0, 98541.0, 98542.0, 98543.0, 98544.0],       [98545.0, 98546.0, 98547.0, 98548.0, 98549.0, 98550.0],       [98551.0, 98552.0, 98553.0, 98554.0, 98555.0, 98556.0],       [98557.0, 98558.0, 98559.0, 98560.0, 98561.0, 98562.0],       [98563.0, 98564.0, 98565.0, 98566.0, 98567.0, 98568.0],       [98569.0, 98570.0, 98571.0, 98572.0, 98573.0, 98574.0]],      [[98575.0, 98576.0, 98577.0, 98578.0, 98579.0, 98580.0],       [98581.0, 98582.0, 98583.0, 98584.0, 98585.0, 98586.0],       [98587.0, 98588.0, 98589.0, 98590.0, 98591.0, 98592.0],       [98593.0, 98594.0, 98595.0, 98596.0, 98597.0, 98598.0],       [98599.0, 98600.0, 98601.0, 98602.0, 98603.0, 98604.0],       [98605.0, 98606.0, 98607.0, 98608.0, 98609.0, 98610.0],       [98611.0, 98612.0, 98613.0, 98614.0, 98615.0, 98616.0]],      [[98617.0, 98618.0, 98619.0, 98620.0, 98621.0, 98622.0],       [98623.0, 98624.0, 98625.0, 98626.0, 98627.0, 98628.0],       [98629.0, 98630.0, 98631.0, 98632.0, 98633.0, 98634.0],       [98635.0, 98636.0, 98637.0, 98638.0, 98639.0, 98640.0],       [98641.0, 98642.0, 98643.0, 98644.0, 98645.0, 98646.0],       [98647.0, 98648.0, 98649.0, 98650.0, 98651.0, 98652.0],       [98653.0, 98654.0, 98655.0, 98656.0, 98657.0, 98658.0]],      [[98659.0, 98660.0, 98661.0, 98662.0, 98663.0, 98664.0],       [98665.0, 98666.0, 98667.0, 98668.0, 98669.0, 98670.0],       [98671.0, 98672.0, 98673.0, 98674.0, 98675.0, 98676.0],       [98677.0, 98678.0, 98679.0, 98680.0, 98681.0, 98682.0],       [98683.0, 98684.0, 98685.0, 98686.0, 98687.0, 98688.0],       [98689.0, 98690.0, 98691.0, 98692.0, 98693.0, 98694.0],       [98695.0, 98696.0, 98697.0, 98698.0, 98699.0, 98700.0]],      [[98701.0, 98702.0, 98703.0, 98704.0, 98705.0, 98706.0],       [98707.0, 98708.0, 98709.0, 98710.0, 98711.0, 98712.0],       [98713.0, 98714.0, 98715.0, 98716.0, 98717.0, 98718.0],       [98719.0, 98720.0, 98721.0, 98722.0, 98723.0, 98724.0],       [98725.0, 98726.0, 98727.0, 98728.0, 98729.0, 98730.0],       [98731.0, 98732.0, 98733.0, 98734.0, 98735.0, 98736.0],       [98737.0, 98738.0, 98739.0, 98740.0, 98741.0, 98742.0]],      [[98743.0, 98744.0, 98745.0, 98746.0, 98747.0, 98748.0],       [98749.0, 98750.0, 98751.0, 98752.0, 98753.0, 98754.0],       [98755.0, 98756.0, 98757.0, 98758.0, 98759.0, 98760.0],       [98761.0, 98762.0, 98763.0, 98764.0, 98765.0, 98766.0],       [98767.0, 98768.0, 98769.0, 98770.0, 98771.0, 98772.0],       [98773.0, 98774.0, 98775.0, 98776.0, 98777.0, 98778.0],       [98779.0, 98780.0, 98781.0, 98782.0, 98783.0, 98784.0]]]],    [[[[98785.0, 98786.0, 98787.0, 98788.0, 98789.0, 98790.0],       [98791.0, 98792.0, 98793.0, 98794.0, 98795.0, 98796.0],       [98797.0, 98798.0, 98799.0, 98800.0, 98801.0, 98802.0],       [98803.0, 98804.0, 98805.0, 98806.0, 98807.0, 98808.0],       [98809.0, 98810.0, 98811.0, 98812.0, 98813.0, 98814.0],       [98815.0, 98816.0, 98817.0, 98818.0, 98819.0, 98820.0],       [98821.0, 98822.0, 98823.0, 98824.0, 98825.0, 98826.0]],      [[98827.0, 98828.0, 98829.0, 98830.0, 98831.0, 98832.0],       [98833.0, 98834.0, 98835.0, 98836.0, 98837.0, 98838.0],       [98839.0, 98840.0, 98841.0, 98842.0, 98843.0, 98844.0],       [98845.0, 98846.0, 98847.0, 98848.0, 98849.0, 98850.0],       [98851.0, 98852.0, 98853.0, 98854.0, 98855.0, 98856.0],       [98857.0, 98858.0, 98859.0, 98860.0, 98861.0, 98862.0],       [98863.0, 98864.0, 98865.0, 98866.0, 98867.0, 98868.0]],      [[98869.0, 98870.0, 98871.0, 98872.0, 98873.0, 98874.0],       [98875.0, 98876.0, 98877.0, 98878.0, 98879.0, 98880.0],       [98881.0, 98882.0, 98883.0, 98884.0, 98885.0, 98886.0],       [98887.0, 98888.0, 98889.0, 98890.0, 98891.0, 98892.0],       [98893.0, 98894.0, 98895.0, 98896.0, 98897.0, 98898.0],       [98899.0, 98900.0, 98901.0, 98902.0, 98903.0, 98904.0],       [98905.0, 98906.0, 98907.0, 98908.0, 98909.0, 98910.0]],      [[98911.0, 98912.0, 98913.0, 98914.0, 98915.0, 98916.0],       [98917.0, 98918.0, 98919.0, 98920.0, 98921.0, 98922.0],       [98923.0, 98924.0, 98925.0, 98926.0, 98927.0, 98928.0],       [98929.0, 98930.0, 98931.0, 98932.0, 98933.0, 98934.0],       [98935.0, 98936.0, 98937.0, 98938.0, 98939.0, 98940.0],       [98941.0, 98942.0, 98943.0, 98944.0, 98945.0, 98946.0],       [98947.0, 98948.0, 98949.0, 98950.0, 98951.0, 98952.0]],      [[98953.0, 98954.0, 98955.0, 98956.0, 98957.0, 98958.0],       [98959.0, 98960.0, 98961.0, 98962.0, 98963.0, 98964.0],       [98965.0, 98966.0, 98967.0, 98968.0, 98969.0, 98970.0],       [98971.0, 98972.0, 98973.0, 98974.0, 98975.0, 98976.0],       [98977.0, 98978.0, 98979.0, 98980.0, 98981.0, 98982.0],       [98983.0, 98984.0, 98985.0, 98986.0, 98987.0, 98988.0],       [98989.0, 98990.0, 98991.0, 98992.0, 98993.0, 98994.0]],      [[98995.0, 98996.0, 98997.0, 98998.0, 98999.0, 99000.0],       [99001.0, 99002.0, 99003.0, 99004.0, 99005.0, 99006.0],       [99007.0, 99008.0, 99009.0, 99010.0, 99011.0, 99012.0],       [99013.0, 99014.0, 99015.0, 99016.0, 99017.0, 99018.0],       [99019.0, 99020.0, 99021.0, 99022.0, 99023.0, 99024.0],       [99025.0, 99026.0, 99027.0, 99028.0, 99029.0, 99030.0],       [99031.0, 99032.0, 99033.0, 99034.0, 99035.0, 99036.0]]],     [[[99037.0, 99038.0, 99039.0, 99040.0, 99041.0, 99042.0],       [99043.0, 99044.0, 99045.0, 99046.0, 99047.0, 99048.0],       [99049.0, 99050.0, 99051.0, 99052.0, 99053.0, 99054.0],       [99055.0, 99056.0, 99057.0, 99058.0, 99059.0, 99060.0],       [99061.0, 99062.0, 99063.0, 99064.0, 99065.0, 99066.0],       [99067.0, 99068.0, 99069.0, 99070.0, 99071.0, 99072.0],       [99073.0, 99074.0, 99075.0, 99076.0, 99077.0, 99078.0]],      [[99079.0, 99080.0, 99081.0, 99082.0, 99083.0, 99084.0],       [99085.0, 99086.0, 99087.0, 99088.0, 99089.0, 99090.0],       [99091.0, 99092.0, 99093.0, 99094.0, 99095.0, 99096.0],       [99097.0, 99098.0, 99099.0, 99100.0, 99101.0, 99102.0],       [99103.0, 99104.0, 99105.0, 99106.0, 99107.0, 99108.0],       [99109.0, 99110.0, 99111.0, 99112.0, 99113.0, 99114.0],       [99115.0, 99116.0, 99117.0, 99118.0, 99119.0, 99120.0]],      [[99121.0, 99122.0, 99123.0, 99124.0, 99125.0, 99126.0],       [99127.0, 99128.0, 99129.0, 99130.0, 99131.0, 99132.0],       [99133.0, 99134.0, 99135.0, 99136.0, 99137.0, 99138.0],       [99139.0, 99140.0, 99141.0, 99142.0, 99143.0, 99144.0],       [99145.0, 99146.0, 99147.0, 99148.0, 99149.0, 99150.0],       [99151.0, 99152.0, 99153.0, 99154.0, 99155.0, 99156.0],       [99157.0, 99158.0, 99159.0, 99160.0, 99161.0, 99162.0]],      [[99163.0, 99164.0, 99165.0, 99166.0, 99167.0, 99168.0],       [99169.0, 99170.0, 99171.0, 99172.0, 99173.0, 99174.0],       [99175.0, 99176.0, 99177.0, 99178.0, 99179.0, 99180.0],       [99181.0, 99182.0, 99183.0, 99184.0, 99185.0, 99186.0],       [99187.0, 99188.0, 99189.0, 99190.0, 99191.0, 99192.0],       [99193.0, 99194.0, 99195.0, 99196.0, 99197.0, 99198.0],       [99199.0, 99200.0, 99201.0, 99202.0, 99203.0, 99204.0]],      [[99205.0, 99206.0, 99207.0, 99208.0, 99209.0, 99210.0],       [99211.0, 99212.0, 99213.0, 99214.0, 99215.0, 99216.0],       [99217.0, 99218.0, 99219.0, 99220.0, 99221.0, 99222.0],       [99223.0, 99224.0, 99225.0, 99226.0, 99227.0, 99228.0],       [99229.0, 99230.0, 99231.0, 99232.0, 99233.0, 99234.0],       [99235.0, 99236.0, 99237.0, 99238.0, 99239.0, 99240.0],       [99241.0, 99242.0, 99243.0, 99244.0, 99245.0, 99246.0]],      [[99247.0, 99248.0, 99249.0, 99250.0, 99251.0, 99252.0],       [99253.0, 99254.0, 99255.0, 99256.0, 99257.0, 99258.0],       [99259.0, 99260.0, 99261.0, 99262.0, 99263.0, 99264.0],       [99265.0, 99266.0, 99267.0, 99268.0, 99269.0, 99270.0],       [99271.0, 99272.0, 99273.0, 99274.0, 99275.0, 99276.0],       [99277.0, 99278.0, 99279.0, 99280.0, 99281.0, 99282.0],       [99283.0, 99284.0, 99285.0, 99286.0, 99287.0, 99288.0]]],     [[[99289.0, 99290.0, 99291.0, 99292.0, 99293.0, 99294.0],       [99295.0, 99296.0, 99297.0, 99298.0, 99299.0, 99300.0],       [99301.0, 99302.0, 99303.0, 99304.0, 99305.0, 99306.0],       [99307.0, 99308.0, 99309.0, 99310.0, 99311.0, 99312.0],       [99313.0, 99314.0, 99315.0, 99316.0, 99317.0, 99318.0],       [99319.0, 99320.0, 99321.0, 99322.0, 99323.0, 99324.0],       [99325.0, 99326.0, 99327.0, 99328.0, 99329.0, 99330.0]],      [[99331.0, 99332.0, 99333.0, 99334.0, 99335.0, 99336.0],       [99337.0, 99338.0, 99339.0, 99340.0, 99341.0, 99342.0],       [99343.0, 99344.0, 99345.0, 99346.0, 99347.0, 99348.0],       [99349.0, 99350.0, 99351.0, 99352.0, 99353.0, 99354.0],       [99355.0, 99356.0, 99357.0, 99358.0, 99359.0, 99360.0],       [99361.0, 99362.0, 99363.0, 99364.0, 99365.0, 99366.0],       [99367.0, 99368.0, 99369.0, 99370.0, 99371.0, 99372.0]],      [[99373.0, 99374.0, 99375.0, 99376.0, 99377.0, 99378.0],       [99379.0, 99380.0, 99381.0, 99382.0, 99383.0, 99384.0],       [99385.0, 99386.0, 99387.0, 99388.0, 99389.0, 99390.0],       [99391.0, 99392.0, 99393.0, 99394.0, 99395.0, 99396.0],       [99397.0, 99398.0, 99399.0, 99400.0, 99401.0, 99402.0],       [99403.0, 99404.0, 99405.0, 99406.0, 99407.0, 99408.0],       [99409.0, 99410.0, 99411.0, 99412.0, 99413.0, 99414.0]],      [[99415.0, 99416.0, 99417.0, 99418.0, 99419.0, 99420.0],       [99421.0, 99422.0, 99423.0, 99424.0, 99425.0, 99426.0],       [99427.0, 99428.0, 99429.0, 99430.0, 99431.0, 99432.0],       [99433.0, 99434.0, 99435.0, 99436.0, 99437.0, 99438.0],       [99439.0, 99440.0, 99441.0, 99442.0, 99443.0, 99444.0],       [99445.0, 99446.0, 99447.0, 99448.0, 99449.0, 99450.0],       [99451.0, 99452.0, 99453.0, 99454.0, 99455.0, 99456.0]],      [[99457.0, 99458.0, 99459.0, 99460.0, 99461.0, 99462.0],       [99463.0, 99464.0, 99465.0, 99466.0, 99467.0, 99468.0],       [99469.0, 99470.0, 99471.0, 99472.0, 99473.0, 99474.0],       [99475.0, 99476.0, 99477.0, 99478.0, 99479.0, 99480.0],       [99481.0, 99482.0, 99483.0, 99484.0, 99485.0, 99486.0],       [99487.0, 99488.0, 99489.0, 99490.0, 99491.0, 99492.0],       [99493.0, 99494.0, 99495.0, 99496.0, 99497.0, 99498.0]],      [[99499.0, 99500.0, 99501.0, 99502.0, 99503.0, 99504.0],       [99505.0, 99506.0, 99507.0, 99508.0, 99509.0, 99510.0],       [99511.0, 99512.0, 99513.0, 99514.0, 99515.0, 99516.0],       [99517.0, 99518.0, 99519.0, 99520.0, 99521.0, 99522.0],       [99523.0, 99524.0, 99525.0, 99526.0, 99527.0, 99528.0],       [99529.0, 99530.0, 99531.0, 99532.0, 99533.0, 99534.0],       [99535.0, 99536.0, 99537.0, 99538.0, 99539.0, 99540.0]]],     [[[99541.0, 99542.0, 99543.0, 99544.0, 99545.0, 99546.0],       [99547.0, 99548.0, 99549.0, 99550.0, 99551.0, 99552.0],       [99553.0, 99554.0, 99555.0, 99556.0, 99557.0, 99558.0],       [99559.0, 99560.0, 99561.0, 99562.0, 99563.0, 99564.0],       [99565.0, 99566.0, 99567.0, 99568.0, 99569.0, 99570.0],       [99571.0, 99572.0, 99573.0, 99574.0, 99575.0, 99576.0],       [99577.0, 99578.0, 99579.0, 99580.0, 99581.0, 99582.0]],      [[99583.0, 99584.0, 99585.0, 99586.0, 99587.0, 99588.0],       [99589.0, 99590.0, 99591.0, 99592.0, 99593.0, 99594.0],       [99595.0, 99596.0, 99597.0, 99598.0, 99599.0, 99600.0],       [99601.0, 99602.0, 99603.0, 99604.0, 99605.0, 99606.0],       [99607.0, 99608.0, 99609.0, 99610.0, 99611.0, 99612.0],       [99613.0, 99614.0, 99615.0, 99616.0, 99617.0, 99618.0],       [99619.0, 99620.0, 99621.0, 99622.0, 99623.0, 99624.0]],      [[99625.0, 99626.0, 99627.0, 99628.0, 99629.0, 99630.0],       [99631.0, 99632.0, 99633.0, 99634.0, 99635.0, 99636.0],       [99637.0, 99638.0, 99639.0, 99640.0, 99641.0, 99642.0],       [99643.0, 99644.0, 99645.0, 99646.0, 99647.0, 99648.0],       [99649.0, 99650.0, 99651.0, 99652.0, 99653.0, 99654.0],       [99655.0, 99656.0, 99657.0, 99658.0, 99659.0, 99660.0],       [99661.0, 99662.0, 99663.0, 99664.0, 99665.0, 99666.0]],      [[99667.0, 99668.0, 99669.0, 99670.0, 99671.0, 99672.0],       [99673.0, 99674.0, 99675.0, 99676.0, 99677.0, 99678.0],       [99679.0, 99680.0, 99681.0, 99682.0, 99683.0, 99684.0],       [99685.0, 99686.0, 99687.0, 99688.0, 99689.0, 99690.0],       [99691.0, 99692.0, 99693.0, 99694.0, 99695.0, 99696.0],       [99697.0, 99698.0, 99699.0, 99700.0, 99701.0, 99702.0],       [99703.0, 99704.0, 99705.0, 99706.0, 99707.0, 99708.0]],      [[99709.0, 99710.0, 99711.0, 99712.0, 99713.0, 99714.0],       [99715.0, 99716.0, 99717.0, 99718.0, 99719.0, 99720.0],       [99721.0, 99722.0, 99723.0, 99724.0, 99725.0, 99726.0],       [99727.0, 99728.0, 99729.0, 99730.0, 99731.0, 99732.0],       [99733.0, 99734.0, 99735.0, 99736.0, 99737.0, 99738.0],       [99739.0, 99740.0, 99741.0, 99742.0, 99743.0, 99744.0],       [99745.0, 99746.0, 99747.0, 99748.0, 99749.0, 99750.0]],      [[99751.0, 99752.0, 99753.0, 99754.0, 99755.0, 99756.0],       [99757.0, 99758.0, 99759.0, 99760.0, 99761.0, 99762.0],       [99763.0, 99764.0, 99765.0, 99766.0, 99767.0, 99768.0],       [99769.0, 99770.0, 99771.0, 99772.0, 99773.0, 99774.0],       [99775.0, 99776.0, 99777.0, 99778.0, 99779.0, 99780.0],       [99781.0, 99782.0, 99783.0, 99784.0, 99785.0, 99786.0],       [99787.0, 99788.0, 99789.0, 99790.0, 99791.0, 99792.0]]]],    [[[[99793.0, 99794.0, 99795.0, 99796.0, 99797.0, 99798.0],       [99799.0, 99800.0, 99801.0, 99802.0, 99803.0, 99804.0],       [99805.0, 99806.0, 99807.0, 99808.0, 99809.0, 99810.0],       [99811.0, 99812.0, 99813.0, 99814.0, 99815.0, 99816.0],       [99817.0, 99818.0, 99819.0, 99820.0, 99821.0, 99822.0],       [99823.0, 99824.0, 99825.0, 99826.0, 99827.0, 99828.0],       [99829.0, 99830.0, 99831.0, 99832.0, 99833.0, 99834.0]],      [[99835.0, 99836.0, 99837.0, 99838.0, 99839.0, 99840.0],       [99841.0, 99842.0, 99843.0, 99844.0, 99845.0, 99846.0],       [99847.0, 99848.0, 99849.0, 99850.0, 99851.0, 99852.0],       [99853.0, 99854.0, 99855.0, 99856.0, 99857.0, 99858.0],       [99859.0, 99860.0, 99861.0, 99862.0, 99863.0, 99864.0],       [99865.0, 99866.0, 99867.0, 99868.0, 99869.0, 99870.0],       [99871.0, 99872.0, 99873.0, 99874.0, 99875.0, 99876.0]],      [[99877.0, 99878.0, 99879.0, 99880.0, 99881.0, 99882.0],       [99883.0, 99884.0, 99885.0, 99886.0, 99887.0, 99888.0],       [99889.0, 99890.0, 99891.0, 99892.0, 99893.0, 99894.0],       [99895.0, 99896.0, 99897.0, 99898.0, 99899.0, 99900.0],       [99901.0, 99902.0, 99903.0, 99904.0, 99905.0, 99906.0],       [99907.0, 99908.0, 99909.0, 99910.0, 99911.0, 99912.0],       [99913.0, 99914.0, 99915.0, 99916.0, 99917.0, 99918.0]],      [[99919.0, 99920.0, 99921.0, 99922.0, 99923.0, 99924.0],       [99925.0, 99926.0, 99927.0, 99928.0, 99929.0, 99930.0],       [99931.0, 99932.0, 99933.0, 99934.0, 99935.0, 99936.0],       [99937.0, 99938.0, 99939.0, 99940.0, 99941.0, 99942.0],       [99943.0, 99944.0, 99945.0, 99946.0, 99947.0, 99948.0],       [99949.0, 99950.0, 99951.0, 99952.0, 99953.0, 99954.0],       [99955.0, 99956.0, 99957.0, 99958.0, 99959.0, 99960.0]],      [[99961.0, 99962.0, 99963.0, 99964.0, 99965.0, 99966.0],       [99967.0, 99968.0, 99969.0, 99970.0, 99971.0, 99972.0],       [99973.0, 99974.0, 99975.0, 99976.0, 99977.0, 99978.0],       [99979.0, 99980.0, 99981.0, 99982.0, 99983.0, 99984.0],       [99985.0, 99986.0, 99987.0, 99988.0, 99989.0, 99990.0],       [99991.0, 99992.0, 99993.0, 99994.0, 99995.0, 99996.0],       [99997.0, 99998.0, 99999.0, 100000.0, 100001.0, 100002.0]],      [[100003.0, 100004.0, 100005.0, 100006.0, 100007.0, 100008.0],       [100009.0, 100010.0, 100011.0, 100012.0, 100013.0, 100014.0],       [100015.0, 100016.0, 100017.0, 100018.0, 100019.0, 100020.0],       [100021.0, 100022.0, 100023.0, 100024.0, 100025.0, 100026.0],       [100027.0, 100028.0, 100029.0, 100030.0, 100031.0, 100032.0],       [100033.0, 100034.0, 100035.0, 100036.0, 100037.0, 100038.0],       [100039.0, 100040.0, 100041.0, 100042.0, 100043.0, 100044.0]]],     [[[100045.0, 100046.0, 100047.0, 100048.0, 100049.0, 100050.0],       [100051.0, 100052.0, 100053.0, 100054.0, 100055.0, 100056.0],       [100057.0, 100058.0, 100059.0, 100060.0, 100061.0, 100062.0],       [100063.0, 100064.0, 100065.0, 100066.0, 100067.0, 100068.0],       [100069.0, 100070.0, 100071.0, 100072.0, 100073.0, 100074.0],       [100075.0, 100076.0, 100077.0, 100078.0, 100079.0, 100080.0],       [100081.0, 100082.0, 100083.0, 100084.0, 100085.0, 100086.0]],      [[100087.0, 100088.0, 100089.0, 100090.0, 100091.0, 100092.0],       [100093.0, 100094.0, 100095.0, 100096.0, 100097.0, 100098.0],       [100099.0, 100100.0, 100101.0, 100102.0, 100103.0, 100104.0],       [100105.0, 100106.0, 100107.0, 100108.0, 100109.0, 100110.0],       [100111.0, 100112.0, 100113.0, 100114.0, 100115.0, 100116.0],       [100117.0, 100118.0, 100119.0, 100120.0, 100121.0, 100122.0],       [100123.0, 100124.0, 100125.0, 100126.0, 100127.0, 100128.0]],      [[100129.0, 100130.0, 100131.0, 100132.0, 100133.0, 100134.0],       [100135.0, 100136.0, 100137.0, 100138.0, 100139.0, 100140.0],       [100141.0, 100142.0, 100143.0, 100144.0, 100145.0, 100146.0],       [100147.0, 100148.0, 100149.0, 100150.0, 100151.0, 100152.0],       [100153.0, 100154.0, 100155.0, 100156.0, 100157.0, 100158.0],       [100159.0, 100160.0, 100161.0, 100162.0, 100163.0, 100164.0],       [100165.0, 100166.0, 100167.0, 100168.0, 100169.0, 100170.0]],      [[100171.0, 100172.0, 100173.0, 100174.0, 100175.0, 100176.0],       [100177.0, 100178.0, 100179.0, 100180.0, 100181.0, 100182.0],       [100183.0, 100184.0, 100185.0, 100186.0, 100187.0, 100188.0],       [100189.0, 100190.0, 100191.0, 100192.0, 100193.0, 100194.0],       [100195.0, 100196.0, 100197.0, 100198.0, 100199.0, 100200.0],       [100201.0, 100202.0, 100203.0, 100204.0, 100205.0, 100206.0],       [100207.0, 100208.0, 100209.0, 100210.0, 100211.0, 100212.0]],      [[100213.0, 100214.0, 100215.0, 100216.0, 100217.0, 100218.0],       [100219.0, 100220.0, 100221.0, 100222.0, 100223.0, 100224.0],       [100225.0, 100226.0, 100227.0, 100228.0, 100229.0, 100230.0],       [100231.0, 100232.0, 100233.0, 100234.0, 100235.0, 100236.0],       [100237.0, 100238.0, 100239.0, 100240.0, 100241.0, 100242.0],       [100243.0, 100244.0, 100245.0, 100246.0, 100247.0, 100248.0],       [100249.0, 100250.0, 100251.0, 100252.0, 100253.0, 100254.0]],      [[100255.0, 100256.0, 100257.0, 100258.0, 100259.0, 100260.0],       [100261.0, 100262.0, 100263.0, 100264.0, 100265.0, 100266.0],       [100267.0, 100268.0, 100269.0, 100270.0, 100271.0, 100272.0],       [100273.0, 100274.0, 100275.0, 100276.0, 100277.0, 100278.0],       [100279.0, 100280.0, 100281.0, 100282.0, 100283.0, 100284.0],       [100285.0, 100286.0, 100287.0, 100288.0, 100289.0, 100290.0],       [100291.0, 100292.0, 100293.0, 100294.0, 100295.0, 100296.0]]],     [[[100297.0, 100298.0, 100299.0, 100300.0, 100301.0, 100302.0],       [100303.0, 100304.0, 100305.0, 100306.0, 100307.0, 100308.0],       [100309.0, 100310.0, 100311.0, 100312.0, 100313.0, 100314.0],       [100315.0, 100316.0, 100317.0, 100318.0, 100319.0, 100320.0],       [100321.0, 100322.0, 100323.0, 100324.0, 100325.0, 100326.0],       [100327.0, 100328.0, 100329.0, 100330.0, 100331.0, 100332.0],       [100333.0, 100334.0, 100335.0, 100336.0, 100337.0, 100338.0]],      [[100339.0, 100340.0, 100341.0, 100342.0, 100343.0, 100344.0],       [100345.0, 100346.0, 100347.0, 100348.0, 100349.0, 100350.0],       [100351.0, 100352.0, 100353.0, 100354.0, 100355.0, 100356.0],       [100357.0, 100358.0, 100359.0, 100360.0, 100361.0, 100362.0],       [100363.0, 100364.0, 100365.0, 100366.0, 100367.0, 100368.0],       [100369.0, 100370.0, 100371.0, 100372.0, 100373.0, 100374.0],       [100375.0, 100376.0, 100377.0, 100378.0, 100379.0, 100380.0]],      [[100381.0, 100382.0, 100383.0, 100384.0, 100385.0, 100386.0],       [100387.0, 100388.0, 100389.0, 100390.0, 100391.0, 100392.0],       [100393.0, 100394.0, 100395.0, 100396.0, 100397.0, 100398.0],       [100399.0, 100400.0, 100401.0, 100402.0, 100403.0, 100404.0],       [100405.0, 100406.0, 100407.0, 100408.0, 100409.0, 100410.0],       [100411.0, 100412.0, 100413.0, 100414.0, 100415.0, 100416.0],       [100417.0, 100418.0, 100419.0, 100420.0, 100421.0, 100422.0]],      [[100423.0, 100424.0, 100425.0, 100426.0, 100427.0, 100428.0],       [100429.0, 100430.0, 100431.0, 100432.0, 100433.0, 100434.0],       [100435.0, 100436.0, 100437.0, 100438.0, 100439.0, 100440.0],       [100441.0, 100442.0, 100443.0, 100444.0, 100445.0, 100446.0],       [100447.0, 100448.0, 100449.0, 100450.0, 100451.0, 100452.0],       [100453.0, 100454.0, 100455.0, 100456.0, 100457.0, 100458.0],       [100459.0, 100460.0, 100461.0, 100462.0, 100463.0, 100464.0]],      [[100465.0, 100466.0, 100467.0, 100468.0, 100469.0, 100470.0],       [100471.0, 100472.0, 100473.0, 100474.0, 100475.0, 100476.0],       [100477.0, 100478.0, 100479.0, 100480.0, 100481.0, 100482.0],       [100483.0, 100484.0, 100485.0, 100486.0, 100487.0, 100488.0],       [100489.0, 100490.0, 100491.0, 100492.0, 100493.0, 100494.0],       [100495.0, 100496.0, 100497.0, 100498.0, 100499.0, 100500.0],       [100501.0, 100502.0, 100503.0, 100504.0, 100505.0, 100506.0]],      [[100507.0, 100508.0, 100509.0, 100510.0, 100511.0, 100512.0],       [100513.0, 100514.0, 100515.0, 100516.0, 100517.0, 100518.0],       [100519.0, 100520.0, 100521.0, 100522.0, 100523.0, 100524.0],       [100525.0, 100526.0, 100527.0, 100528.0, 100529.0, 100530.0],       [100531.0, 100532.0, 100533.0, 100534.0, 100535.0, 100536.0],       [100537.0, 100538.0, 100539.0, 100540.0, 100541.0, 100542.0],       [100543.0, 100544.0, 100545.0, 100546.0, 100547.0, 100548.0]]],     [[[100549.0, 100550.0, 100551.0, 100552.0, 100553.0, 100554.0],       [100555.0, 100556.0, 100557.0, 100558.0, 100559.0, 100560.0],       [100561.0, 100562.0, 100563.0, 100564.0, 100565.0, 100566.0],       [100567.0, 100568.0, 100569.0, 100570.0, 100571.0, 100572.0],       [100573.0, 100574.0, 100575.0, 100576.0, 100577.0, 100578.0],       [100579.0, 100580.0, 100581.0, 100582.0, 100583.0, 100584.0],       [100585.0, 100586.0, 100587.0, 100588.0, 100589.0, 100590.0]],      [[100591.0, 100592.0, 100593.0, 100594.0, 100595.0, 100596.0],       [100597.0, 100598.0, 100599.0, 100600.0, 100601.0, 100602.0],       [100603.0, 100604.0, 100605.0, 100606.0, 100607.0, 100608.0],       [100609.0, 100610.0, 100611.0, 100612.0, 100613.0, 100614.0],       [100615.0, 100616.0, 100617.0, 100618.0, 100619.0, 100620.0],       [100621.0, 100622.0, 100623.0, 100624.0, 100625.0, 100626.0],       [100627.0, 100628.0, 100629.0, 100630.0, 100631.0, 100632.0]],      [[100633.0, 100634.0, 100635.0, 100636.0, 100637.0, 100638.0],       [100639.0, 100640.0, 100641.0, 100642.0, 100643.0, 100644.0],       [100645.0, 100646.0, 100647.0, 100648.0, 100649.0, 100650.0],       [100651.0, 100652.0, 100653.0, 100654.0, 100655.0, 100656.0],       [100657.0, 100658.0, 100659.0, 100660.0, 100661.0, 100662.0],       [100663.0, 100664.0, 100665.0, 100666.0, 100667.0, 100668.0],       [100669.0, 100670.0, 100671.0, 100672.0, 100673.0, 100674.0]],      [[100675.0, 100676.0, 100677.0, 100678.0, 100679.0, 100680.0],       [100681.0, 100682.0, 100683.0, 100684.0, 100685.0, 100686.0],       [100687.0, 100688.0, 100689.0, 100690.0, 100691.0, 100692.0],       [100693.0, 100694.0, 100695.0, 100696.0, 100697.0, 100698.0],       [100699.0, 100700.0, 100701.0, 100702.0, 100703.0, 100704.0],       [100705.0, 100706.0, 100707.0, 100708.0, 100709.0, 100710.0],       [100711.0, 100712.0, 100713.0, 100714.0, 100715.0, 100716.0]],      [[100717.0, 100718.0, 100719.0, 100720.0, 100721.0, 100722.0],       [100723.0, 100724.0, 100725.0, 100726.0, 100727.0, 100728.0],       [100729.0, 100730.0, 100731.0, 100732.0, 100733.0, 100734.0],       [100735.0, 100736.0, 100737.0, 100738.0, 100739.0, 100740.0],       [100741.0, 100742.0, 100743.0, 100744.0, 100745.0, 100746.0],       [100747.0, 100748.0, 100749.0, 100750.0, 100751.0, 100752.0],       [100753.0, 100754.0, 100755.0, 100756.0, 100757.0, 100758.0]],      [[100759.0, 100760.0, 100761.0, 100762.0, 100763.0, 100764.0],       [100765.0, 100766.0, 100767.0, 100768.0, 100769.0, 100770.0],       [100771.0, 100772.0, 100773.0, 100774.0, 100775.0, 100776.0],       [100777.0, 100778.0, 100779.0, 100780.0, 100781.0, 100782.0],       [100783.0, 100784.0, 100785.0, 100786.0, 100787.0, 100788.0],       [100789.0, 100790.0, 100791.0, 100792.0, 100793.0, 100794.0],       [100795.0, 100796.0, 100797.0, 100798.0, 100799.0, 100800.0]]]],    [[[[100801.0, 100802.0, 100803.0, 100804.0, 100805.0, 100806.0],       [100807.0, 100808.0, 100809.0, 100810.0, 100811.0, 100812.0],       [100813.0, 100814.0, 100815.0, 100816.0, 100817.0, 100818.0],       [100819.0, 100820.0, 100821.0, 100822.0, 100823.0, 100824.0],       [100825.0, 100826.0, 100827.0, 100828.0, 100829.0, 100830.0],       [100831.0, 100832.0, 100833.0, 100834.0, 100835.0, 100836.0],       [100837.0, 100838.0, 100839.0, 100840.0, 100841.0, 100842.0]],      [[100843.0, 100844.0, 100845.0, 100846.0, 100847.0, 100848.0],       [100849.0, 100850.0, 100851.0, 100852.0, 100853.0, 100854.0],       [100855.0, 100856.0, 100857.0, 100858.0, 100859.0, 100860.0],       [100861.0, 100862.0, 100863.0, 100864.0, 100865.0, 100866.0],       [100867.0, 100868.0, 100869.0, 100870.0, 100871.0, 100872.0],       [100873.0, 100874.0, 100875.0, 100876.0, 100877.0, 100878.0],       [100879.0, 100880.0, 100881.0, 100882.0, 100883.0, 100884.0]],      [[100885.0, 100886.0, 100887.0, 100888.0, 100889.0, 100890.0],       [100891.0, 100892.0, 100893.0, 100894.0, 100895.0, 100896.0],       [100897.0, 100898.0, 100899.0, 100900.0, 100901.0, 100902.0],       [100903.0, 100904.0, 100905.0, 100906.0, 100907.0, 100908.0],       [100909.0, 100910.0, 100911.0, 100912.0, 100913.0, 100914.0],       [100915.0, 100916.0, 100917.0, 100918.0, 100919.0, 100920.0],       [100921.0, 100922.0, 100923.0, 100924.0, 100925.0, 100926.0]],      [[100927.0, 100928.0, 100929.0, 100930.0, 100931.0, 100932.0],       [100933.0, 100934.0, 100935.0, 100936.0, 100937.0, 100938.0],       [100939.0, 100940.0, 100941.0, 100942.0, 100943.0, 100944.0],       [100945.0, 100946.0, 100947.0, 100948.0, 100949.0, 100950.0],       [100951.0, 100952.0, 100953.0, 100954.0, 100955.0, 100956.0],       [100957.0, 100958.0, 100959.0, 100960.0, 100961.0, 100962.0],       [100963.0, 100964.0, 100965.0, 100966.0, 100967.0, 100968.0]],      [[100969.0, 100970.0, 100971.0, 100972.0, 100973.0, 100974.0],       [100975.0, 100976.0, 100977.0, 100978.0, 100979.0, 100980.0],       [100981.0, 100982.0, 100983.0, 100984.0, 100985.0, 100986.0],       [100987.0, 100988.0, 100989.0, 100990.0, 100991.0, 100992.0],       [100993.0, 100994.0, 100995.0, 100996.0, 100997.0, 100998.0],       [100999.0, 101000.0, 101001.0, 101002.0, 101003.0, 101004.0],       [101005.0, 101006.0, 101007.0, 101008.0, 101009.0, 101010.0]],      [[101011.0, 101012.0, 101013.0, 101014.0, 101015.0, 101016.0],       [101017.0, 101018.0, 101019.0, 101020.0, 101021.0, 101022.0],       [101023.0, 101024.0, 101025.0, 101026.0, 101027.0, 101028.0],       [101029.0, 101030.0, 101031.0, 101032.0, 101033.0, 101034.0],       [101035.0, 101036.0, 101037.0, 101038.0, 101039.0, 101040.0],       [101041.0, 101042.0, 101043.0, 101044.0, 101045.0, 101046.0],       [101047.0, 101048.0, 101049.0, 101050.0, 101051.0, 101052.0]]],     [[[101053.0, 101054.0, 101055.0, 101056.0, 101057.0, 101058.0],       [101059.0, 101060.0, 101061.0, 101062.0, 101063.0, 101064.0],       [101065.0, 101066.0, 101067.0, 101068.0, 101069.0, 101070.0],       [101071.0, 101072.0, 101073.0, 101074.0, 101075.0, 101076.0],       [101077.0, 101078.0, 101079.0, 101080.0, 101081.0, 101082.0],       [101083.0, 101084.0, 101085.0, 101086.0, 101087.0, 101088.0],       [101089.0, 101090.0, 101091.0, 101092.0, 101093.0, 101094.0]],      [[101095.0, 101096.0, 101097.0, 101098.0, 101099.0, 101100.0],       [101101.0, 101102.0, 101103.0, 101104.0, 101105.0, 101106.0],       [101107.0, 101108.0, 101109.0, 101110.0, 101111.0, 101112.0],       [101113.0, 101114.0, 101115.0, 101116.0, 101117.0, 101118.0],       [101119.0, 101120.0, 101121.0, 101122.0, 101123.0, 101124.0],       [101125.0, 101126.0, 101127.0, 101128.0, 101129.0, 101130.0],       [101131.0, 101132.0, 101133.0, 101134.0, 101135.0, 101136.0]],      [[101137.0, 101138.0, 101139.0, 101140.0, 101141.0, 101142.0],       [101143.0, 101144.0, 101145.0, 101146.0, 101147.0, 101148.0],       [101149.0, 101150.0, 101151.0, 101152.0, 101153.0, 101154.0],       [101155.0, 101156.0, 101157.0, 101158.0, 101159.0, 101160.0],       [101161.0, 101162.0, 101163.0, 101164.0, 101165.0, 101166.0],       [101167.0, 101168.0, 101169.0, 101170.0, 101171.0, 101172.0],       [101173.0, 101174.0, 101175.0, 101176.0, 101177.0, 101178.0]],      [[101179.0, 101180.0, 101181.0, 101182.0, 101183.0, 101184.0],       [101185.0, 101186.0, 101187.0, 101188.0, 101189.0, 101190.0],       [101191.0, 101192.0, 101193.0, 101194.0, 101195.0, 101196.0],       [101197.0, 101198.0, 101199.0, 101200.0, 101201.0, 101202.0],       [101203.0, 101204.0, 101205.0, 101206.0, 101207.0, 101208.0],       [101209.0, 101210.0, 101211.0, 101212.0, 101213.0, 101214.0],       [101215.0, 101216.0, 101217.0, 101218.0, 101219.0, 101220.0]],      [[101221.0, 101222.0, 101223.0, 101224.0, 101225.0, 101226.0],       [101227.0, 101228.0, 101229.0, 101230.0, 101231.0, 101232.0],       [101233.0, 101234.0, 101235.0, 101236.0, 101237.0, 101238.0],       [101239.0, 101240.0, 101241.0, 101242.0, 101243.0, 101244.0],       [101245.0, 101246.0, 101247.0, 101248.0, 101249.0, 101250.0],       [101251.0, 101252.0, 101253.0, 101254.0, 101255.0, 101256.0],       [101257.0, 101258.0, 101259.0, 101260.0, 101261.0, 101262.0]],      [[101263.0, 101264.0, 101265.0, 101266.0, 101267.0, 101268.0],       [101269.0, 101270.0, 101271.0, 101272.0, 101273.0, 101274.0],       [101275.0, 101276.0, 101277.0, 101278.0, 101279.0, 101280.0],       [101281.0, 101282.0, 101283.0, 101284.0, 101285.0, 101286.0],       [101287.0, 101288.0, 101289.0, 101290.0, 101291.0, 101292.0],       [101293.0, 101294.0, 101295.0, 101296.0, 101297.0, 101298.0],       [101299.0, 101300.0, 101301.0, 101302.0, 101303.0, 101304.0]]],     [[[101305.0, 101306.0, 101307.0, 101308.0, 101309.0, 101310.0],       [101311.0, 101312.0, 101313.0, 101314.0, 101315.0, 101316.0],       [101317.0, 101318.0, 101319.0, 101320.0, 101321.0, 101322.0],       [101323.0, 101324.0, 101325.0, 101326.0, 101327.0, 101328.0],       [101329.0, 101330.0, 101331.0, 101332.0, 101333.0, 101334.0],       [101335.0, 101336.0, 101337.0, 101338.0, 101339.0, 101340.0],       [101341.0, 101342.0, 101343.0, 101344.0, 101345.0, 101346.0]],      [[101347.0, 101348.0, 101349.0, 101350.0, 101351.0, 101352.0],       [101353.0, 101354.0, 101355.0, 101356.0, 101357.0, 101358.0],       [101359.0, 101360.0, 101361.0, 101362.0, 101363.0, 101364.0],       [101365.0, 101366.0, 101367.0, 101368.0, 101369.0, 101370.0],       [101371.0, 101372.0, 101373.0, 101374.0, 101375.0, 101376.0],       [101377.0, 101378.0, 101379.0, 101380.0, 101381.0, 101382.0],       [101383.0, 101384.0, 101385.0, 101386.0, 101387.0, 101388.0]],      [[101389.0, 101390.0, 101391.0, 101392.0, 101393.0, 101394.0],       [101395.0, 101396.0, 101397.0, 101398.0, 101399.0, 101400.0],       [101401.0, 101402.0, 101403.0, 101404.0, 101405.0, 101406.0],       [101407.0, 101408.0, 101409.0, 101410.0, 101411.0, 101412.0],       [101413.0, 101414.0, 101415.0, 101416.0, 101417.0, 101418.0],       [101419.0, 101420.0, 101421.0, 101422.0, 101423.0, 101424.0],       [101425.0, 101426.0, 101427.0, 101428.0, 101429.0, 101430.0]],      [[101431.0, 101432.0, 101433.0, 101434.0, 101435.0, 101436.0],       [101437.0, 101438.0, 101439.0, 101440.0, 101441.0, 101442.0],       [101443.0, 101444.0, 101445.0, 101446.0, 101447.0, 101448.0],       [101449.0, 101450.0, 101451.0, 101452.0, 101453.0, 101454.0],       [101455.0, 101456.0, 101457.0, 101458.0, 101459.0, 101460.0],       [101461.0, 101462.0, 101463.0, 101464.0, 101465.0, 101466.0],       [101467.0, 101468.0, 101469.0, 101470.0, 101471.0, 101472.0]],      [[101473.0, 101474.0, 101475.0, 101476.0, 101477.0, 101478.0],       [101479.0, 101480.0, 101481.0, 101482.0, 101483.0, 101484.0],       [101485.0, 101486.0, 101487.0, 101488.0, 101489.0, 101490.0],       [101491.0, 101492.0, 101493.0, 101494.0, 101495.0, 101496.0],       [101497.0, 101498.0, 101499.0, 101500.0, 101501.0, 101502.0],       [101503.0, 101504.0, 101505.0, 101506.0, 101507.0, 101508.0],       [101509.0, 101510.0, 101511.0, 101512.0, 101513.0, 101514.0]],      [[101515.0, 101516.0, 101517.0, 101518.0, 101519.0, 101520.0],       [101521.0, 101522.0, 101523.0, 101524.0, 101525.0, 101526.0],       [101527.0, 101528.0, 101529.0, 101530.0, 101531.0, 101532.0],       [101533.0, 101534.0, 101535.0, 101536.0, 101537.0, 101538.0],       [101539.0, 101540.0, 101541.0, 101542.0, 101543.0, 101544.0],       [101545.0, 101546.0, 101547.0, 101548.0, 101549.0, 101550.0],       [101551.0, 101552.0, 101553.0, 101554.0, 101555.0, 101556.0]]],     [[[101557.0, 101558.0, 101559.0, 101560.0, 101561.0, 101562.0],       [101563.0, 101564.0, 101565.0, 101566.0, 101567.0, 101568.0],       [101569.0, 101570.0, 101571.0, 101572.0, 101573.0, 101574.0],       [101575.0, 101576.0, 101577.0, 101578.0, 101579.0, 101580.0],       [101581.0, 101582.0, 101583.0, 101584.0, 101585.0, 101586.0],       [101587.0, 101588.0, 101589.0, 101590.0, 101591.0, 101592.0],       [101593.0, 101594.0, 101595.0, 101596.0, 101597.0, 101598.0]],      [[101599.0, 101600.0, 101601.0, 101602.0, 101603.0, 101604.0],       [101605.0, 101606.0, 101607.0, 101608.0, 101609.0, 101610.0],       [101611.0, 101612.0, 101613.0, 101614.0, 101615.0, 101616.0],       [101617.0, 101618.0, 101619.0, 101620.0, 101621.0, 101622.0],       [101623.0, 101624.0, 101625.0, 101626.0, 101627.0, 101628.0],       [101629.0, 101630.0, 101631.0, 101632.0, 101633.0, 101634.0],       [101635.0, 101636.0, 101637.0, 101638.0, 101639.0, 101640.0]],      [[101641.0, 101642.0, 101643.0, 101644.0, 101645.0, 101646.0],       [101647.0, 101648.0, 101649.0, 101650.0, 101651.0, 101652.0],       [101653.0, 101654.0, 101655.0, 101656.0, 101657.0, 101658.0],       [101659.0, 101660.0, 101661.0, 101662.0, 101663.0, 101664.0],       [101665.0, 101666.0, 101667.0, 101668.0, 101669.0, 101670.0],       [101671.0, 101672.0, 101673.0, 101674.0, 101675.0, 101676.0],       [101677.0, 101678.0, 101679.0, 101680.0, 101681.0, 101682.0]],      [[101683.0, 101684.0, 101685.0, 101686.0, 101687.0, 101688.0],       [101689.0, 101690.0, 101691.0, 101692.0, 101693.0, 101694.0],       [101695.0, 101696.0, 101697.0, 101698.0, 101699.0, 101700.0],       [101701.0, 101702.0, 101703.0, 101704.0, 101705.0, 101706.0],       [101707.0, 101708.0, 101709.0, 101710.0, 101711.0, 101712.0],       [101713.0, 101714.0, 101715.0, 101716.0, 101717.0, 101718.0],       [101719.0, 101720.0, 101721.0, 101722.0, 101723.0, 101724.0]],      [[101725.0, 101726.0, 101727.0, 101728.0, 101729.0, 101730.0],       [101731.0, 101732.0, 101733.0, 101734.0, 101735.0, 101736.0],       [101737.0, 101738.0, 101739.0, 101740.0, 101741.0, 101742.0],       [101743.0, 101744.0, 101745.0, 101746.0, 101747.0, 101748.0],       [101749.0, 101750.0, 101751.0, 101752.0, 101753.0, 101754.0],       [101755.0, 101756.0, 101757.0, 101758.0, 101759.0, 101760.0],       [101761.0, 101762.0, 101763.0, 101764.0, 101765.0, 101766.0]],      [[101767.0, 101768.0, 101769.0, 101770.0, 101771.0, 101772.0],       [101773.0, 101774.0, 101775.0, 101776.0, 101777.0, 101778.0],       [101779.0, 101780.0, 101781.0, 101782.0, 101783.0, 101784.0],       [101785.0, 101786.0, 101787.0, 101788.0, 101789.0, 101790.0],       [101791.0, 101792.0, 101793.0, 101794.0, 101795.0, 101796.0],       [101797.0, 101798.0, 101799.0, 101800.0, 101801.0, 101802.0],       [101803.0, 101804.0, 101805.0, 101806.0, 101807.0, 101808.0]]]],    [[[[101809.0, 101810.0, 101811.0, 101812.0, 101813.0, 101814.0],       [101815.0, 101816.0, 101817.0, 101818.0, 101819.0, 101820.0],       [101821.0, 101822.0, 101823.0, 101824.0, 101825.0, 101826.0],       [101827.0, 101828.0, 101829.0, 101830.0, 101831.0, 101832.0],       [101833.0, 101834.0, 101835.0, 101836.0, 101837.0, 101838.0],       [101839.0, 101840.0, 101841.0, 101842.0, 101843.0, 101844.0],       [101845.0, 101846.0, 101847.0, 101848.0, 101849.0, 101850.0]],      [[101851.0, 101852.0, 101853.0, 101854.0, 101855.0, 101856.0],       [101857.0, 101858.0, 101859.0, 101860.0, 101861.0, 101862.0],       [101863.0, 101864.0, 101865.0, 101866.0, 101867.0, 101868.0],       [101869.0, 101870.0, 101871.0, 101872.0, 101873.0, 101874.0],       [101875.0, 101876.0, 101877.0, 101878.0, 101879.0, 101880.0],       [101881.0, 101882.0, 101883.0, 101884.0, 101885.0, 101886.0],       [101887.0, 101888.0, 101889.0, 101890.0, 101891.0, 101892.0]],      [[101893.0, 101894.0, 101895.0, 101896.0, 101897.0, 101898.0],       [101899.0, 101900.0, 101901.0, 101902.0, 101903.0, 101904.0],       [101905.0, 101906.0, 101907.0, 101908.0, 101909.0, 101910.0],       [101911.0, 101912.0, 101913.0, 101914.0, 101915.0, 101916.0],       [101917.0, 101918.0, 101919.0, 101920.0, 101921.0, 101922.0],       [101923.0, 101924.0, 101925.0, 101926.0, 101927.0, 101928.0],       [101929.0, 101930.0, 101931.0, 101932.0, 101933.0, 101934.0]],      [[101935.0, 101936.0, 101937.0, 101938.0, 101939.0, 101940.0],       [101941.0, 101942.0, 101943.0, 101944.0, 101945.0, 101946.0],       [101947.0, 101948.0, 101949.0, 101950.0, 101951.0, 101952.0],       [101953.0, 101954.0, 101955.0, 101956.0, 101957.0, 101958.0],       [101959.0, 101960.0, 101961.0, 101962.0, 101963.0, 101964.0],       [101965.0, 101966.0, 101967.0, 101968.0, 101969.0, 101970.0],       [101971.0, 101972.0, 101973.0, 101974.0, 101975.0, 101976.0]],      [[101977.0, 101978.0, 101979.0, 101980.0, 101981.0, 101982.0],       [101983.0, 101984.0, 101985.0, 101986.0, 101987.0, 101988.0],       [101989.0, 101990.0, 101991.0, 101992.0, 101993.0, 101994.0],       [101995.0, 101996.0, 101997.0, 101998.0, 101999.0, 102000.0],       [102001.0, 102002.0, 102003.0, 102004.0, 102005.0, 102006.0],       [102007.0, 102008.0, 102009.0, 102010.0, 102011.0, 102012.0],       [102013.0, 102014.0, 102015.0, 102016.0, 102017.0, 102018.0]],      [[102019.0, 102020.0, 102021.0, 102022.0, 102023.0, 102024.0],       [102025.0, 102026.0, 102027.0, 102028.0, 102029.0, 102030.0],       [102031.0, 102032.0, 102033.0, 102034.0, 102035.0, 102036.0],       [102037.0, 102038.0, 102039.0, 102040.0, 102041.0, 102042.0],       [102043.0, 102044.0, 102045.0, 102046.0, 102047.0, 102048.0],       [102049.0, 102050.0, 102051.0, 102052.0, 102053.0, 102054.0],       [102055.0, 102056.0, 102057.0, 102058.0, 102059.0, 102060.0]]],     [[[102061.0, 102062.0, 102063.0, 102064.0, 102065.0, 102066.0],       [102067.0, 102068.0, 102069.0, 102070.0, 102071.0, 102072.0],       [102073.0, 102074.0, 102075.0, 102076.0, 102077.0, 102078.0],       [102079.0, 102080.0, 102081.0, 102082.0, 102083.0, 102084.0],       [102085.0, 102086.0, 102087.0, 102088.0, 102089.0, 102090.0],       [102091.0, 102092.0, 102093.0, 102094.0, 102095.0, 102096.0],       [102097.0, 102098.0, 102099.0, 102100.0, 102101.0, 102102.0]],      [[102103.0, 102104.0, 102105.0, 102106.0, 102107.0, 102108.0],       [102109.0, 102110.0, 102111.0, 102112.0, 102113.0, 102114.0],       [102115.0, 102116.0, 102117.0, 102118.0, 102119.0, 102120.0],       [102121.0, 102122.0, 102123.0, 102124.0, 102125.0, 102126.0],       [102127.0, 102128.0, 102129.0, 102130.0, 102131.0, 102132.0],       [102133.0, 102134.0, 102135.0, 102136.0, 102137.0, 102138.0],       [102139.0, 102140.0, 102141.0, 102142.0, 102143.0, 102144.0]],      [[102145.0, 102146.0, 102147.0, 102148.0, 102149.0, 102150.0],       [102151.0, 102152.0, 102153.0, 102154.0, 102155.0, 102156.0],       [102157.0, 102158.0, 102159.0, 102160.0, 102161.0, 102162.0],       [102163.0, 102164.0, 102165.0, 102166.0, 102167.0, 102168.0],       [102169.0, 102170.0, 102171.0, 102172.0, 102173.0, 102174.0],       [102175.0, 102176.0, 102177.0, 102178.0, 102179.0, 102180.0],       [102181.0, 102182.0, 102183.0, 102184.0, 102185.0, 102186.0]],      [[102187.0, 102188.0, 102189.0, 102190.0, 102191.0, 102192.0],       [102193.0, 102194.0, 102195.0, 102196.0, 102197.0, 102198.0],       [102199.0, 102200.0, 102201.0, 102202.0, 102203.0, 102204.0],       [102205.0, 102206.0, 102207.0, 102208.0, 102209.0, 102210.0],       [102211.0, 102212.0, 102213.0, 102214.0, 102215.0, 102216.0],       [102217.0, 102218.0, 102219.0, 102220.0, 102221.0, 102222.0],       [102223.0, 102224.0, 102225.0, 102226.0, 102227.0, 102228.0]],      [[102229.0, 102230.0, 102231.0, 102232.0, 102233.0, 102234.0],       [102235.0, 102236.0, 102237.0, 102238.0, 102239.0, 102240.0],       [102241.0, 102242.0, 102243.0, 102244.0, 102245.0, 102246.0],       [102247.0, 102248.0, 102249.0, 102250.0, 102251.0, 102252.0],       [102253.0, 102254.0, 102255.0, 102256.0, 102257.0, 102258.0],       [102259.0, 102260.0, 102261.0, 102262.0, 102263.0, 102264.0],       [102265.0, 102266.0, 102267.0, 102268.0, 102269.0, 102270.0]],      [[102271.0, 102272.0, 102273.0, 102274.0, 102275.0, 102276.0],       [102277.0, 102278.0, 102279.0, 102280.0, 102281.0, 102282.0],       [102283.0, 102284.0, 102285.0, 102286.0, 102287.0, 102288.0],       [102289.0, 102290.0, 102291.0, 102292.0, 102293.0, 102294.0],       [102295.0, 102296.0, 102297.0, 102298.0, 102299.0, 102300.0],       [102301.0, 102302.0, 102303.0, 102304.0, 102305.0, 102306.0],       [102307.0, 102308.0, 102309.0, 102310.0, 102311.0, 102312.0]]],     [[[102313.0, 102314.0, 102315.0, 102316.0, 102317.0, 102318.0],       [102319.0, 102320.0, 102321.0, 102322.0, 102323.0, 102324.0],       [102325.0, 102326.0, 102327.0, 102328.0, 102329.0, 102330.0],       [102331.0, 102332.0, 102333.0, 102334.0, 102335.0, 102336.0],       [102337.0, 102338.0, 102339.0, 102340.0, 102341.0, 102342.0],       [102343.0, 102344.0, 102345.0, 102346.0, 102347.0, 102348.0],       [102349.0, 102350.0, 102351.0, 102352.0, 102353.0, 102354.0]],      [[102355.0, 102356.0, 102357.0, 102358.0, 102359.0, 102360.0],       [102361.0, 102362.0, 102363.0, 102364.0, 102365.0, 102366.0],       [102367.0, 102368.0, 102369.0, 102370.0, 102371.0, 102372.0],       [102373.0, 102374.0, 102375.0, 102376.0, 102377.0, 102378.0],       [102379.0, 102380.0, 102381.0, 102382.0, 102383.0, 102384.0],       [102385.0, 102386.0, 102387.0, 102388.0, 102389.0, 102390.0],       [102391.0, 102392.0, 102393.0, 102394.0, 102395.0, 102396.0]],      [[102397.0, 102398.0, 102399.0, 102400.0, 102401.0, 102402.0],       [102403.0, 102404.0, 102405.0, 102406.0, 102407.0, 102408.0],       [102409.0, 102410.0, 102411.0, 102412.0, 102413.0, 102414.0],       [102415.0, 102416.0, 102417.0, 102418.0, 102419.0, 102420.0],       [102421.0, 102422.0, 102423.0, 102424.0, 102425.0, 102426.0],       [102427.0, 102428.0, 102429.0, 102430.0, 102431.0, 102432.0],       [102433.0, 102434.0, 102435.0, 102436.0, 102437.0, 102438.0]],      [[102439.0, 102440.0, 102441.0, 102442.0, 102443.0, 102444.0],       [102445.0, 102446.0, 102447.0, 102448.0, 102449.0, 102450.0],       [102451.0, 102452.0, 102453.0, 102454.0, 102455.0, 102456.0],       [102457.0, 102458.0, 102459.0, 102460.0, 102461.0, 102462.0],       [102463.0, 102464.0, 102465.0, 102466.0, 102467.0, 102468.0],       [102469.0, 102470.0, 102471.0, 102472.0, 102473.0, 102474.0],       [102475.0, 102476.0, 102477.0, 102478.0, 102479.0, 102480.0]],      [[102481.0, 102482.0, 102483.0, 102484.0, 102485.0, 102486.0],       [102487.0, 102488.0, 102489.0, 102490.0, 102491.0, 102492.0],       [102493.0, 102494.0, 102495.0, 102496.0, 102497.0, 102498.0],       [102499.0, 102500.0, 102501.0, 102502.0, 102503.0, 102504.0],       [102505.0, 102506.0, 102507.0, 102508.0, 102509.0, 102510.0],       [102511.0, 102512.0, 102513.0, 102514.0, 102515.0, 102516.0],       [102517.0, 102518.0, 102519.0, 102520.0, 102521.0, 102522.0]],      [[102523.0, 102524.0, 102525.0, 102526.0, 102527.0, 102528.0],       [102529.0, 102530.0, 102531.0, 102532.0, 102533.0, 102534.0],       [102535.0, 102536.0, 102537.0, 102538.0, 102539.0, 102540.0],       [102541.0, 102542.0, 102543.0, 102544.0, 102545.0, 102546.0],       [102547.0, 102548.0, 102549.0, 102550.0, 102551.0, 102552.0],       [102553.0, 102554.0, 102555.0, 102556.0, 102557.0, 102558.0],       [102559.0, 102560.0, 102561.0, 102562.0, 102563.0, 102564.0]]],     [[[102565.0, 102566.0, 102567.0, 102568.0, 102569.0, 102570.0],       [102571.0, 102572.0, 102573.0, 102574.0, 102575.0, 102576.0],       [102577.0, 102578.0, 102579.0, 102580.0, 102581.0, 102582.0],       [102583.0, 102584.0, 102585.0, 102586.0, 102587.0, 102588.0],       [102589.0, 102590.0, 102591.0, 102592.0, 102593.0, 102594.0],       [102595.0, 102596.0, 102597.0, 102598.0, 102599.0, 102600.0],       [102601.0, 102602.0, 102603.0, 102604.0, 102605.0, 102606.0]],      [[102607.0, 102608.0, 102609.0, 102610.0, 102611.0, 102612.0],       [102613.0, 102614.0, 102615.0, 102616.0, 102617.0, 102618.0],       [102619.0, 102620.0, 102621.0, 102622.0, 102623.0, 102624.0],       [102625.0, 102626.0, 102627.0, 102628.0, 102629.0, 102630.0],       [102631.0, 102632.0, 102633.0, 102634.0, 102635.0, 102636.0],       [102637.0, 102638.0, 102639.0, 102640.0, 102641.0, 102642.0],       [102643.0, 102644.0, 102645.0, 102646.0, 102647.0, 102648.0]],      [[102649.0, 102650.0, 102651.0, 102652.0, 102653.0, 102654.0],       [102655.0, 102656.0, 102657.0, 102658.0, 102659.0, 102660.0],       [102661.0, 102662.0, 102663.0, 102664.0, 102665.0, 102666.0],       [102667.0, 102668.0, 102669.0, 102670.0, 102671.0, 102672.0],       [102673.0, 102674.0, 102675.0, 102676.0, 102677.0, 102678.0],       [102679.0, 102680.0, 102681.0, 102682.0, 102683.0, 102684.0],       [102685.0, 102686.0, 102687.0, 102688.0, 102689.0, 102690.0]],      [[102691.0, 102692.0, 102693.0, 102694.0, 102695.0, 102696.0],       [102697.0, 102698.0, 102699.0, 102700.0, 102701.0, 102702.0],       [102703.0, 102704.0, 102705.0, 102706.0, 102707.0, 102708.0],       [102709.0, 102710.0, 102711.0, 102712.0, 102713.0, 102714.0],       [102715.0, 102716.0, 102717.0, 102718.0, 102719.0, 102720.0],       [102721.0, 102722.0, 102723.0, 102724.0, 102725.0, 102726.0],       [102727.0, 102728.0, 102729.0, 102730.0, 102731.0, 102732.0]],      [[102733.0, 102734.0, 102735.0, 102736.0, 102737.0, 102738.0],       [102739.0, 102740.0, 102741.0, 102742.0, 102743.0, 102744.0],       [102745.0, 102746.0, 102747.0, 102748.0, 102749.0, 102750.0],       [102751.0, 102752.0, 102753.0, 102754.0, 102755.0, 102756.0],       [102757.0, 102758.0, 102759.0, 102760.0, 102761.0, 102762.0],       [102763.0, 102764.0, 102765.0, 102766.0, 102767.0, 102768.0],       [102769.0, 102770.0, 102771.0, 102772.0, 102773.0, 102774.0]],      [[102775.0, 102776.0, 102777.0, 102778.0, 102779.0, 102780.0],       [102781.0, 102782.0, 102783.0, 102784.0, 102785.0, 102786.0],       [102787.0, 102788.0, 102789.0, 102790.0, 102791.0, 102792.0],       [102793.0, 102794.0, 102795.0, 102796.0, 102797.0, 102798.0],       [102799.0, 102800.0, 102801.0, 102802.0, 102803.0, 102804.0],       [102805.0, 102806.0, 102807.0, 102808.0, 102809.0, 102810.0],       [102811.0, 102812.0, 102813.0, 102814.0, 102815.0, 102816.0]]]]],   [[[[[102817.0, 102818.0, 102819.0, 102820.0, 102821.0, 102822.0],       [102823.0, 102824.0, 102825.0, 102826.0, 102827.0, 102828.0],       [102829.0, 102830.0, 102831.0, 102832.0, 102833.0, 102834.0],       [102835.0, 102836.0, 102837.0, 102838.0, 102839.0, 102840.0],       [102841.0, 102842.0, 102843.0, 102844.0, 102845.0, 102846.0],       [102847.0, 102848.0, 102849.0, 102850.0, 102851.0, 102852.0],       [102853.0, 102854.0, 102855.0, 102856.0, 102857.0, 102858.0]],      [[102859.0, 102860.0, 102861.0, 102862.0, 102863.0, 102864.0],       [102865.0, 102866.0, 102867.0, 102868.0, 102869.0, 102870.0],       [102871.0, 102872.0, 102873.0, 102874.0, 102875.0, 102876.0],       [102877.0, 102878.0, 102879.0, 102880.0, 102881.0, 102882.0],       [102883.0, 102884.0, 102885.0, 102886.0, 102887.0, 102888.0],       [102889.0, 102890.0, 102891.0, 102892.0, 102893.0, 102894.0],       [102895.0, 102896.0, 102897.0, 102898.0, 102899.0, 102900.0]],      [[102901.0, 102902.0, 102903.0, 102904.0, 102905.0, 102906.0],       [102907.0, 102908.0, 102909.0, 102910.0, 102911.0, 102912.0],       [102913.0, 102914.0, 102915.0, 102916.0, 102917.0, 102918.0],       [102919.0, 102920.0, 102921.0, 102922.0, 102923.0, 102924.0],       [102925.0, 102926.0, 102927.0, 102928.0, 102929.0, 102930.0],       [102931.0, 102932.0, 102933.0, 102934.0, 102935.0, 102936.0],       [102937.0, 102938.0, 102939.0, 102940.0, 102941.0, 102942.0]],      [[102943.0, 102944.0, 102945.0, 102946.0, 102947.0, 102948.0],       [102949.0, 102950.0, 102951.0, 102952.0, 102953.0, 102954.0],       [102955.0, 102956.0, 102957.0, 102958.0, 102959.0, 102960.0],       [102961.0, 102962.0, 102963.0, 102964.0, 102965.0, 102966.0],       [102967.0, 102968.0, 102969.0, 102970.0, 102971.0, 102972.0],       [102973.0, 102974.0, 102975.0, 102976.0, 102977.0, 102978.0],       [102979.0, 102980.0, 102981.0, 102982.0, 102983.0, 102984.0]],      [[102985.0, 102986.0, 102987.0, 102988.0, 102989.0, 102990.0],       [102991.0, 102992.0, 102993.0, 102994.0, 102995.0, 102996.0],       [102997.0, 102998.0, 102999.0, 103000.0, 103001.0, 103002.0],       [103003.0, 103004.0, 103005.0, 103006.0, 103007.0, 103008.0],       [103009.0, 103010.0, 103011.0, 103012.0, 103013.0, 103014.0],       [103015.0, 103016.0, 103017.0, 103018.0, 103019.0, 103020.0],       [103021.0, 103022.0, 103023.0, 103024.0, 103025.0, 103026.0]],      [[103027.0, 103028.0, 103029.0, 103030.0, 103031.0, 103032.0],       [103033.0, 103034.0, 103035.0, 103036.0, 103037.0, 103038.0],       [103039.0, 103040.0, 103041.0, 103042.0, 103043.0, 103044.0],       [103045.0, 103046.0, 103047.0, 103048.0, 103049.0, 103050.0],       [103051.0, 103052.0, 103053.0, 103054.0, 103055.0, 103056.0],       [103057.0, 103058.0, 103059.0, 103060.0, 103061.0, 103062.0],       [103063.0, 103064.0, 103065.0, 103066.0, 103067.0, 103068.0]]],     [[[103069.0, 103070.0, 103071.0, 103072.0, 103073.0, 103074.0],       [103075.0, 103076.0, 103077.0, 103078.0, 103079.0, 103080.0],       [103081.0, 103082.0, 103083.0, 103084.0, 103085.0, 103086.0],       [103087.0, 103088.0, 103089.0, 103090.0, 103091.0, 103092.0],       [103093.0, 103094.0, 103095.0, 103096.0, 103097.0, 103098.0],       [103099.0, 103100.0, 103101.0, 103102.0, 103103.0, 103104.0],       [103105.0, 103106.0, 103107.0, 103108.0, 103109.0, 103110.0]],      [[103111.0, 103112.0, 103113.0, 103114.0, 103115.0, 103116.0],       [103117.0, 103118.0, 103119.0, 103120.0, 103121.0, 103122.0],       [103123.0, 103124.0, 103125.0, 103126.0, 103127.0, 103128.0],       [103129.0, 103130.0, 103131.0, 103132.0, 103133.0, 103134.0],       [103135.0, 103136.0, 103137.0, 103138.0, 103139.0, 103140.0],       [103141.0, 103142.0, 103143.0, 103144.0, 103145.0, 103146.0],       [103147.0, 103148.0, 103149.0, 103150.0, 103151.0, 103152.0]],      [[103153.0, 103154.0, 103155.0, 103156.0, 103157.0, 103158.0],       [103159.0, 103160.0, 103161.0, 103162.0, 103163.0, 103164.0],       [103165.0, 103166.0, 103167.0, 103168.0, 103169.0, 103170.0],       [103171.0, 103172.0, 103173.0, 103174.0, 103175.0, 103176.0],       [103177.0, 103178.0, 103179.0, 103180.0, 103181.0, 103182.0],       [103183.0, 103184.0, 103185.0, 103186.0, 103187.0, 103188.0],       [103189.0, 103190.0, 103191.0, 103192.0, 103193.0, 103194.0]],      [[103195.0, 103196.0, 103197.0, 103198.0, 103199.0, 103200.0],       [103201.0, 103202.0, 103203.0, 103204.0, 103205.0, 103206.0],       [103207.0, 103208.0, 103209.0, 103210.0, 103211.0, 103212.0],       [103213.0, 103214.0, 103215.0, 103216.0, 103217.0, 103218.0],       [103219.0, 103220.0, 103221.0, 103222.0, 103223.0, 103224.0],       [103225.0, 103226.0, 103227.0, 103228.0, 103229.0, 103230.0],       [103231.0, 103232.0, 103233.0, 103234.0, 103235.0, 103236.0]],      [[103237.0, 103238.0, 103239.0, 103240.0, 103241.0, 103242.0],       [103243.0, 103244.0, 103245.0, 103246.0, 103247.0, 103248.0],       [103249.0, 103250.0, 103251.0, 103252.0, 103253.0, 103254.0],       [103255.0, 103256.0, 103257.0, 103258.0, 103259.0, 103260.0],       [103261.0, 103262.0, 103263.0, 103264.0, 103265.0, 103266.0],       [103267.0, 103268.0, 103269.0, 103270.0, 103271.0, 103272.0],       [103273.0, 103274.0, 103275.0, 103276.0, 103277.0, 103278.0]],      [[103279.0, 103280.0, 103281.0, 103282.0, 103283.0, 103284.0],       [103285.0, 103286.0, 103287.0, 103288.0, 103289.0, 103290.0],       [103291.0, 103292.0, 103293.0, 103294.0, 103295.0, 103296.0],       [103297.0, 103298.0, 103299.0, 103300.0, 103301.0, 103302.0],       [103303.0, 103304.0, 103305.0, 103306.0, 103307.0, 103308.0],       [103309.0, 103310.0, 103311.0, 103312.0, 103313.0, 103314.0],       [103315.0, 103316.0, 103317.0, 103318.0, 103319.0, 103320.0]]],     [[[103321.0, 103322.0, 103323.0, 103324.0, 103325.0, 103326.0],       [103327.0, 103328.0, 103329.0, 103330.0, 103331.0, 103332.0],       [103333.0, 103334.0, 103335.0, 103336.0, 103337.0, 103338.0],       [103339.0, 103340.0, 103341.0, 103342.0, 103343.0, 103344.0],       [103345.0, 103346.0, 103347.0, 103348.0, 103349.0, 103350.0],       [103351.0, 103352.0, 103353.0, 103354.0, 103355.0, 103356.0],       [103357.0, 103358.0, 103359.0, 103360.0, 103361.0, 103362.0]],      [[103363.0, 103364.0, 103365.0, 103366.0, 103367.0, 103368.0],       [103369.0, 103370.0, 103371.0, 103372.0, 103373.0, 103374.0],       [103375.0, 103376.0, 103377.0, 103378.0, 103379.0, 103380.0],       [103381.0, 103382.0, 103383.0, 103384.0, 103385.0, 103386.0],       [103387.0, 103388.0, 103389.0, 103390.0, 103391.0, 103392.0],       [103393.0, 103394.0, 103395.0, 103396.0, 103397.0, 103398.0],       [103399.0, 103400.0, 103401.0, 103402.0, 103403.0, 103404.0]],      [[103405.0, 103406.0, 103407.0, 103408.0, 103409.0, 103410.0],       [103411.0, 103412.0, 103413.0, 103414.0, 103415.0, 103416.0],       [103417.0, 103418.0, 103419.0, 103420.0, 103421.0, 103422.0],       [103423.0, 103424.0, 103425.0, 103426.0, 103427.0, 103428.0],       [103429.0, 103430.0, 103431.0, 103432.0, 103433.0, 103434.0],       [103435.0, 103436.0, 103437.0, 103438.0, 103439.0, 103440.0],       [103441.0, 103442.0, 103443.0, 103444.0, 103445.0, 103446.0]],      [[103447.0, 103448.0, 103449.0, 103450.0, 103451.0, 103452.0],       [103453.0, 103454.0, 103455.0, 103456.0, 103457.0, 103458.0],       [103459.0, 103460.0, 103461.0, 103462.0, 103463.0, 103464.0],       [103465.0, 103466.0, 103467.0, 103468.0, 103469.0, 103470.0],       [103471.0, 103472.0, 103473.0, 103474.0, 103475.0, 103476.0],       [103477.0, 103478.0, 103479.0, 103480.0, 103481.0, 103482.0],       [103483.0, 103484.0, 103485.0, 103486.0, 103487.0, 103488.0]],      [[103489.0, 103490.0, 103491.0, 103492.0, 103493.0, 103494.0],       [103495.0, 103496.0, 103497.0, 103498.0, 103499.0, 103500.0],       [103501.0, 103502.0, 103503.0, 103504.0, 103505.0, 103506.0],       [103507.0, 103508.0, 103509.0, 103510.0, 103511.0, 103512.0],       [103513.0, 103514.0, 103515.0, 103516.0, 103517.0, 103518.0],       [103519.0, 103520.0, 103521.0, 103522.0, 103523.0, 103524.0],       [103525.0, 103526.0, 103527.0, 103528.0, 103529.0, 103530.0]],      [[103531.0, 103532.0, 103533.0, 103534.0, 103535.0, 103536.0],       [103537.0, 103538.0, 103539.0, 103540.0, 103541.0, 103542.0],       [103543.0, 103544.0, 103545.0, 103546.0, 103547.0, 103548.0],       [103549.0, 103550.0, 103551.0, 103552.0, 103553.0, 103554.0],       [103555.0, 103556.0, 103557.0, 103558.0, 103559.0, 103560.0],       [103561.0, 103562.0, 103563.0, 103564.0, 103565.0, 103566.0],       [103567.0, 103568.0, 103569.0, 103570.0, 103571.0, 103572.0]]],     [[[103573.0, 103574.0, 103575.0, 103576.0, 103577.0, 103578.0],       [103579.0, 103580.0, 103581.0, 103582.0, 103583.0, 103584.0],       [103585.0, 103586.0, 103587.0, 103588.0, 103589.0, 103590.0],       [103591.0, 103592.0, 103593.0, 103594.0, 103595.0, 103596.0],       [103597.0, 103598.0, 103599.0, 103600.0, 103601.0, 103602.0],       [103603.0, 103604.0, 103605.0, 103606.0, 103607.0, 103608.0],       [103609.0, 103610.0, 103611.0, 103612.0, 103613.0, 103614.0]],      [[103615.0, 103616.0, 103617.0, 103618.0, 103619.0, 103620.0],       [103621.0, 103622.0, 103623.0, 103624.0, 103625.0, 103626.0],       [103627.0, 103628.0, 103629.0, 103630.0, 103631.0, 103632.0],       [103633.0, 103634.0, 103635.0, 103636.0, 103637.0, 103638.0],       [103639.0, 103640.0, 103641.0, 103642.0, 103643.0, 103644.0],       [103645.0, 103646.0, 103647.0, 103648.0, 103649.0, 103650.0],       [103651.0, 103652.0, 103653.0, 103654.0, 103655.0, 103656.0]],      [[103657.0, 103658.0, 103659.0, 103660.0, 103661.0, 103662.0],       [103663.0, 103664.0, 103665.0, 103666.0, 103667.0, 103668.0],       [103669.0, 103670.0, 103671.0, 103672.0, 103673.0, 103674.0],       [103675.0, 103676.0, 103677.0, 103678.0, 103679.0, 103680.0],       [103681.0, 103682.0, 103683.0, 103684.0, 103685.0, 103686.0],       [103687.0, 103688.0, 103689.0, 103690.0, 103691.0, 103692.0],       [103693.0, 103694.0, 103695.0, 103696.0, 103697.0, 103698.0]],      [[103699.0, 103700.0, 103701.0, 103702.0, 103703.0, 103704.0],       [103705.0, 103706.0, 103707.0, 103708.0, 103709.0, 103710.0],       [103711.0, 103712.0, 103713.0, 103714.0, 103715.0, 103716.0],       [103717.0, 103718.0, 103719.0, 103720.0, 103721.0, 103722.0],       [103723.0, 103724.0, 103725.0, 103726.0, 103727.0, 103728.0],       [103729.0, 103730.0, 103731.0, 103732.0, 103733.0, 103734.0],       [103735.0, 103736.0, 103737.0, 103738.0, 103739.0, 103740.0]],      [[103741.0, 103742.0, 103743.0, 103744.0, 103745.0, 103746.0],       [103747.0, 103748.0, 103749.0, 103750.0, 103751.0, 103752.0],       [103753.0, 103754.0, 103755.0, 103756.0, 103757.0, 103758.0],       [103759.0, 103760.0, 103761.0, 103762.0, 103763.0, 103764.0],       [103765.0, 103766.0, 103767.0, 103768.0, 103769.0, 103770.0],       [103771.0, 103772.0, 103773.0, 103774.0, 103775.0, 103776.0],       [103777.0, 103778.0, 103779.0, 103780.0, 103781.0, 103782.0]],      [[103783.0, 103784.0, 103785.0, 103786.0, 103787.0, 103788.0],       [103789.0, 103790.0, 103791.0, 103792.0, 103793.0, 103794.0],       [103795.0, 103796.0, 103797.0, 103798.0, 103799.0, 103800.0],       [103801.0, 103802.0, 103803.0, 103804.0, 103805.0, 103806.0],       [103807.0, 103808.0, 103809.0, 103810.0, 103811.0, 103812.0],       [103813.0, 103814.0, 103815.0, 103816.0, 103817.0, 103818.0],       [103819.0, 103820.0, 103821.0, 103822.0, 103823.0, 103824.0]]]],    [[[[103825.0, 103826.0, 103827.0, 103828.0, 103829.0, 103830.0],       [103831.0, 103832.0, 103833.0, 103834.0, 103835.0, 103836.0],       [103837.0, 103838.0, 103839.0, 103840.0, 103841.0, 103842.0],       [103843.0, 103844.0, 103845.0, 103846.0, 103847.0, 103848.0],       [103849.0, 103850.0, 103851.0, 103852.0, 103853.0, 103854.0],       [103855.0, 103856.0, 103857.0, 103858.0, 103859.0, 103860.0],       [103861.0, 103862.0, 103863.0, 103864.0, 103865.0, 103866.0]],      [[103867.0, 103868.0, 103869.0, 103870.0, 103871.0, 103872.0],       [103873.0, 103874.0, 103875.0, 103876.0, 103877.0, 103878.0],       [103879.0, 103880.0, 103881.0, 103882.0, 103883.0, 103884.0],       [103885.0, 103886.0, 103887.0, 103888.0, 103889.0, 103890.0],       [103891.0, 103892.0, 103893.0, 103894.0, 103895.0, 103896.0],       [103897.0, 103898.0, 103899.0, 103900.0, 103901.0, 103902.0],       [103903.0, 103904.0, 103905.0, 103906.0, 103907.0, 103908.0]],      [[103909.0, 103910.0, 103911.0, 103912.0, 103913.0, 103914.0],       [103915.0, 103916.0, 103917.0, 103918.0, 103919.0, 103920.0],       [103921.0, 103922.0, 103923.0, 103924.0, 103925.0, 103926.0],       [103927.0, 103928.0, 103929.0, 103930.0, 103931.0, 103932.0],       [103933.0, 103934.0, 103935.0, 103936.0, 103937.0, 103938.0],       [103939.0, 103940.0, 103941.0, 103942.0, 103943.0, 103944.0],       [103945.0, 103946.0, 103947.0, 103948.0, 103949.0, 103950.0]],      [[103951.0, 103952.0, 103953.0, 103954.0, 103955.0, 103956.0],       [103957.0, 103958.0, 103959.0, 103960.0, 103961.0, 103962.0],       [103963.0, 103964.0, 103965.0, 103966.0, 103967.0, 103968.0],       [103969.0, 103970.0, 103971.0, 103972.0, 103973.0, 103974.0],       [103975.0, 103976.0, 103977.0, 103978.0, 103979.0, 103980.0],       [103981.0, 103982.0, 103983.0, 103984.0, 103985.0, 103986.0],       [103987.0, 103988.0, 103989.0, 103990.0, 103991.0, 103992.0]],      [[103993.0, 103994.0, 103995.0, 103996.0, 103997.0, 103998.0],       [103999.0, 104000.0, 104001.0, 104002.0, 104003.0, 104004.0],       [104005.0, 104006.0, 104007.0, 104008.0, 104009.0, 104010.0],       [104011.0, 104012.0, 104013.0, 104014.0, 104015.0, 104016.0],       [104017.0, 104018.0, 104019.0, 104020.0, 104021.0, 104022.0],       [104023.0, 104024.0, 104025.0, 104026.0, 104027.0, 104028.0],       [104029.0, 104030.0, 104031.0, 104032.0, 104033.0, 104034.0]],      [[104035.0, 104036.0, 104037.0, 104038.0, 104039.0, 104040.0],       [104041.0, 104042.0, 104043.0, 104044.0, 104045.0, 104046.0],       [104047.0, 104048.0, 104049.0, 104050.0, 104051.0, 104052.0],       [104053.0, 104054.0, 104055.0, 104056.0, 104057.0, 104058.0],       [104059.0, 104060.0, 104061.0, 104062.0, 104063.0, 104064.0],       [104065.0, 104066.0, 104067.0, 104068.0, 104069.0, 104070.0],       [104071.0, 104072.0, 104073.0, 104074.0, 104075.0, 104076.0]]],     [[[104077.0, 104078.0, 104079.0, 104080.0, 104081.0, 104082.0],       [104083.0, 104084.0, 104085.0, 104086.0, 104087.0, 104088.0],       [104089.0, 104090.0, 104091.0, 104092.0, 104093.0, 104094.0],       [104095.0, 104096.0, 104097.0, 104098.0, 104099.0, 104100.0],       [104101.0, 104102.0, 104103.0, 104104.0, 104105.0, 104106.0],       [104107.0, 104108.0, 104109.0, 104110.0, 104111.0, 104112.0],       [104113.0, 104114.0, 104115.0, 104116.0, 104117.0, 104118.0]],      [[104119.0, 104120.0, 104121.0, 104122.0, 104123.0, 104124.0],       [104125.0, 104126.0, 104127.0, 104128.0, 104129.0, 104130.0],       [104131.0, 104132.0, 104133.0, 104134.0, 104135.0, 104136.0],       [104137.0, 104138.0, 104139.0, 104140.0, 104141.0, 104142.0],       [104143.0, 104144.0, 104145.0, 104146.0, 104147.0, 104148.0],       [104149.0, 104150.0, 104151.0, 104152.0, 104153.0, 104154.0],       [104155.0, 104156.0, 104157.0, 104158.0, 104159.0, 104160.0]],      [[104161.0, 104162.0, 104163.0, 104164.0, 104165.0, 104166.0],       [104167.0, 104168.0, 104169.0, 104170.0, 104171.0, 104172.0],       [104173.0, 104174.0, 104175.0, 104176.0, 104177.0, 104178.0],       [104179.0, 104180.0, 104181.0, 104182.0, 104183.0, 104184.0],       [104185.0, 104186.0, 104187.0, 104188.0, 104189.0, 104190.0],       [104191.0, 104192.0, 104193.0, 104194.0, 104195.0, 104196.0],       [104197.0, 104198.0, 104199.0, 104200.0, 104201.0, 104202.0]],      [[104203.0, 104204.0, 104205.0, 104206.0, 104207.0, 104208.0],       [104209.0, 104210.0, 104211.0, 104212.0, 104213.0, 104214.0],       [104215.0, 104216.0, 104217.0, 104218.0, 104219.0, 104220.0],       [104221.0, 104222.0, 104223.0, 104224.0, 104225.0, 104226.0],       [104227.0, 104228.0, 104229.0, 104230.0, 104231.0, 104232.0],       [104233.0, 104234.0, 104235.0, 104236.0, 104237.0, 104238.0],       [104239.0, 104240.0, 104241.0, 104242.0, 104243.0, 104244.0]],      [[104245.0, 104246.0, 104247.0, 104248.0, 104249.0, 104250.0],       [104251.0, 104252.0, 104253.0, 104254.0, 104255.0, 104256.0],       [104257.0, 104258.0, 104259.0, 104260.0, 104261.0, 104262.0],       [104263.0, 104264.0, 104265.0, 104266.0, 104267.0, 104268.0],       [104269.0, 104270.0, 104271.0, 104272.0, 104273.0, 104274.0],       [104275.0, 104276.0, 104277.0, 104278.0, 104279.0, 104280.0],       [104281.0, 104282.0, 104283.0, 104284.0, 104285.0, 104286.0]],      [[104287.0, 104288.0, 104289.0, 104290.0, 104291.0, 104292.0],       [104293.0, 104294.0, 104295.0, 104296.0, 104297.0, 104298.0],       [104299.0, 104300.0, 104301.0, 104302.0, 104303.0, 104304.0],       [104305.0, 104306.0, 104307.0, 104308.0, 104309.0, 104310.0],       [104311.0, 104312.0, 104313.0, 104314.0, 104315.0, 104316.0],       [104317.0, 104318.0, 104319.0, 104320.0, 104321.0, 104322.0],       [104323.0, 104324.0, 104325.0, 104326.0, 104327.0, 104328.0]]],     [[[104329.0, 104330.0, 104331.0, 104332.0, 104333.0, 104334.0],       [104335.0, 104336.0, 104337.0, 104338.0, 104339.0, 104340.0],       [104341.0, 104342.0, 104343.0, 104344.0, 104345.0, 104346.0],       [104347.0, 104348.0, 104349.0, 104350.0, 104351.0, 104352.0],       [104353.0, 104354.0, 104355.0, 104356.0, 104357.0, 104358.0],       [104359.0, 104360.0, 104361.0, 104362.0, 104363.0, 104364.0],       [104365.0, 104366.0, 104367.0, 104368.0, 104369.0, 104370.0]],      [[104371.0, 104372.0, 104373.0, 104374.0, 104375.0, 104376.0],       [104377.0, 104378.0, 104379.0, 104380.0, 104381.0, 104382.0],       [104383.0, 104384.0, 104385.0, 104386.0, 104387.0, 104388.0],       [104389.0, 104390.0, 104391.0, 104392.0, 104393.0, 104394.0],       [104395.0, 104396.0, 104397.0, 104398.0, 104399.0, 104400.0],       [104401.0, 104402.0, 104403.0, 104404.0, 104405.0, 104406.0],       [104407.0, 104408.0, 104409.0, 104410.0, 104411.0, 104412.0]],      [[104413.0, 104414.0, 104415.0, 104416.0, 104417.0, 104418.0],       [104419.0, 104420.0, 104421.0, 104422.0, 104423.0, 104424.0],       [104425.0, 104426.0, 104427.0, 104428.0, 104429.0, 104430.0],       [104431.0, 104432.0, 104433.0, 104434.0, 104435.0, 104436.0],       [104437.0, 104438.0, 104439.0, 104440.0, 104441.0, 104442.0],       [104443.0, 104444.0, 104445.0, 104446.0, 104447.0, 104448.0],       [104449.0, 104450.0, 104451.0, 104452.0, 104453.0, 104454.0]],      [[104455.0, 104456.0, 104457.0, 104458.0, 104459.0, 104460.0],       [104461.0, 104462.0, 104463.0, 104464.0, 104465.0, 104466.0],       [104467.0, 104468.0, 104469.0, 104470.0, 104471.0, 104472.0],       [104473.0, 104474.0, 104475.0, 104476.0, 104477.0, 104478.0],       [104479.0, 104480.0, 104481.0, 104482.0, 104483.0, 104484.0],       [104485.0, 104486.0, 104487.0, 104488.0, 104489.0, 104490.0],       [104491.0, 104492.0, 104493.0, 104494.0, 104495.0, 104496.0]],      [[104497.0, 104498.0, 104499.0, 104500.0, 104501.0, 104502.0],       [104503.0, 104504.0, 104505.0, 104506.0, 104507.0, 104508.0],       [104509.0, 104510.0, 104511.0, 104512.0, 104513.0, 104514.0],       [104515.0, 104516.0, 104517.0, 104518.0, 104519.0, 104520.0],       [104521.0, 104522.0, 104523.0, 104524.0, 104525.0, 104526.0],       [104527.0, 104528.0, 104529.0, 104530.0, 104531.0, 104532.0],       [104533.0, 104534.0, 104535.0, 104536.0, 104537.0, 104538.0]],      [[104539.0, 104540.0, 104541.0, 104542.0, 104543.0, 104544.0],       [104545.0, 104546.0, 104547.0, 104548.0, 104549.0, 104550.0],       [104551.0, 104552.0, 104553.0, 104554.0, 104555.0, 104556.0],       [104557.0, 104558.0, 104559.0, 104560.0, 104561.0, 104562.0],       [104563.0, 104564.0, 104565.0, 104566.0, 104567.0, 104568.0],       [104569.0, 104570.0, 104571.0, 104572.0, 104573.0, 104574.0],       [104575.0, 104576.0, 104577.0, 104578.0, 104579.0, 104580.0]]],     [[[104581.0, 104582.0, 104583.0, 104584.0, 104585.0, 104586.0],       [104587.0, 104588.0, 104589.0, 104590.0, 104591.0, 104592.0],       [104593.0, 104594.0, 104595.0, 104596.0, 104597.0, 104598.0],       [104599.0, 104600.0, 104601.0, 104602.0, 104603.0, 104604.0],       [104605.0, 104606.0, 104607.0, 104608.0, 104609.0, 104610.0],       [104611.0, 104612.0, 104613.0, 104614.0, 104615.0, 104616.0],       [104617.0, 104618.0, 104619.0, 104620.0, 104621.0, 104622.0]],      [[104623.0, 104624.0, 104625.0, 104626.0, 104627.0, 104628.0],       [104629.0, 104630.0, 104631.0, 104632.0, 104633.0, 104634.0],       [104635.0, 104636.0, 104637.0, 104638.0, 104639.0, 104640.0],       [104641.0, 104642.0, 104643.0, 104644.0, 104645.0, 104646.0],       [104647.0, 104648.0, 104649.0, 104650.0, 104651.0, 104652.0],       [104653.0, 104654.0, 104655.0, 104656.0, 104657.0, 104658.0],       [104659.0, 104660.0, 104661.0, 104662.0, 104663.0, 104664.0]],      [[104665.0, 104666.0, 104667.0, 104668.0, 104669.0, 104670.0],       [104671.0, 104672.0, 104673.0, 104674.0, 104675.0, 104676.0],       [104677.0, 104678.0, 104679.0, 104680.0, 104681.0, 104682.0],       [104683.0, 104684.0, 104685.0, 104686.0, 104687.0, 104688.0],       [104689.0, 104690.0, 104691.0, 104692.0, 104693.0, 104694.0],       [104695.0, 104696.0, 104697.0, 104698.0, 104699.0, 104700.0],       [104701.0, 104702.0, 104703.0, 104704.0, 104705.0, 104706.0]],      [[104707.0, 104708.0, 104709.0, 104710.0, 104711.0, 104712.0],       [104713.0, 104714.0, 104715.0, 104716.0, 104717.0, 104718.0],       [104719.0, 104720.0, 104721.0, 104722.0, 104723.0, 104724.0],       [104725.0, 104726.0, 104727.0, 104728.0, 104729.0, 104730.0],       [104731.0, 104732.0, 104733.0, 104734.0, 104735.0, 104736.0],       [104737.0, 104738.0, 104739.0, 104740.0, 104741.0, 104742.0],       [104743.0, 104744.0, 104745.0, 104746.0, 104747.0, 104748.0]],      [[104749.0, 104750.0, 104751.0, 104752.0, 104753.0, 104754.0],       [104755.0, 104756.0, 104757.0, 104758.0, 104759.0, 104760.0],       [104761.0, 104762.0, 104763.0, 104764.0, 104765.0, 104766.0],       [104767.0, 104768.0, 104769.0, 104770.0, 104771.0, 104772.0],       [104773.0, 104774.0, 104775.0, 104776.0, 104777.0, 104778.0],       [104779.0, 104780.0, 104781.0, 104782.0, 104783.0, 104784.0],       [104785.0, 104786.0, 104787.0, 104788.0, 104789.0, 104790.0]],      [[104791.0, 104792.0, 104793.0, 104794.0, 104795.0, 104796.0],       [104797.0, 104798.0, 104799.0, 104800.0, 104801.0, 104802.0],       [104803.0, 104804.0, 104805.0, 104806.0, 104807.0, 104808.0],       [104809.0, 104810.0, 104811.0, 104812.0, 104813.0, 104814.0],       [104815.0, 104816.0, 104817.0, 104818.0, 104819.0, 104820.0],       [104821.0, 104822.0, 104823.0, 104824.0, 104825.0, 104826.0],       [104827.0, 104828.0, 104829.0, 104830.0, 104831.0, 104832.0]]]],    [[[[104833.0, 104834.0, 104835.0, 104836.0, 104837.0, 104838.0],       [104839.0, 104840.0, 104841.0, 104842.0, 104843.0, 104844.0],       [104845.0, 104846.0, 104847.0, 104848.0, 104849.0, 104850.0],       [104851.0, 104852.0, 104853.0, 104854.0, 104855.0, 104856.0],       [104857.0, 104858.0, 104859.0, 104860.0, 104861.0, 104862.0],       [104863.0, 104864.0, 104865.0, 104866.0, 104867.0, 104868.0],       [104869.0, 104870.0, 104871.0, 104872.0, 104873.0, 104874.0]],      [[104875.0, 104876.0, 104877.0, 104878.0, 104879.0, 104880.0],       [104881.0, 104882.0, 104883.0, 104884.0, 104885.0, 104886.0],       [104887.0, 104888.0, 104889.0, 104890.0, 104891.0, 104892.0],       [104893.0, 104894.0, 104895.0, 104896.0, 104897.0, 104898.0],       [104899.0, 104900.0, 104901.0, 104902.0, 104903.0, 104904.0],       [104905.0, 104906.0, 104907.0, 104908.0, 104909.0, 104910.0],       [104911.0, 104912.0, 104913.0, 104914.0, 104915.0, 104916.0]],      [[104917.0, 104918.0, 104919.0, 104920.0, 104921.0, 104922.0],       [104923.0, 104924.0, 104925.0, 104926.0, 104927.0, 104928.0],       [104929.0, 104930.0, 104931.0, 104932.0, 104933.0, 104934.0],       [104935.0, 104936.0, 104937.0, 104938.0, 104939.0, 104940.0],       [104941.0, 104942.0, 104943.0, 104944.0, 104945.0, 104946.0],       [104947.0, 104948.0, 104949.0, 104950.0, 104951.0, 104952.0],       [104953.0, 104954.0, 104955.0, 104956.0, 104957.0, 104958.0]],      [[104959.0, 104960.0, 104961.0, 104962.0, 104963.0, 104964.0],       [104965.0, 104966.0, 104967.0, 104968.0, 104969.0, 104970.0],       [104971.0, 104972.0, 104973.0, 104974.0, 104975.0, 104976.0],       [104977.0, 104978.0, 104979.0, 104980.0, 104981.0, 104982.0],       [104983.0, 104984.0, 104985.0, 104986.0, 104987.0, 104988.0],       [104989.0, 104990.0, 104991.0, 104992.0, 104993.0, 104994.0],       [104995.0, 104996.0, 104997.0, 104998.0, 104999.0, 105000.0]],      [[105001.0, 105002.0, 105003.0, 105004.0, 105005.0, 105006.0],       [105007.0, 105008.0, 105009.0, 105010.0, 105011.0, 105012.0],       [105013.0, 105014.0, 105015.0, 105016.0, 105017.0, 105018.0],       [105019.0, 105020.0, 105021.0, 105022.0, 105023.0, 105024.0],       [105025.0, 105026.0, 105027.0, 105028.0, 105029.0, 105030.0],       [105031.0, 105032.0, 105033.0, 105034.0, 105035.0, 105036.0],       [105037.0, 105038.0, 105039.0, 105040.0, 105041.0, 105042.0]],      [[105043.0, 105044.0, 105045.0, 105046.0, 105047.0, 105048.0],       [105049.0, 105050.0, 105051.0, 105052.0, 105053.0, 105054.0],       [105055.0, 105056.0, 105057.0, 105058.0, 105059.0, 105060.0],       [105061.0, 105062.0, 105063.0, 105064.0, 105065.0, 105066.0],       [105067.0, 105068.0, 105069.0, 105070.0, 105071.0, 105072.0],       [105073.0, 105074.0, 105075.0, 105076.0, 105077.0, 105078.0],       [105079.0, 105080.0, 105081.0, 105082.0, 105083.0, 105084.0]]],     [[[105085.0, 105086.0, 105087.0, 105088.0, 105089.0, 105090.0],       [105091.0, 105092.0, 105093.0, 105094.0, 105095.0, 105096.0],       [105097.0, 105098.0, 105099.0, 105100.0, 105101.0, 105102.0],       [105103.0, 105104.0, 105105.0, 105106.0, 105107.0, 105108.0],       [105109.0, 105110.0, 105111.0, 105112.0, 105113.0, 105114.0],       [105115.0, 105116.0, 105117.0, 105118.0, 105119.0, 105120.0],       [105121.0, 105122.0, 105123.0, 105124.0, 105125.0, 105126.0]],      [[105127.0, 105128.0, 105129.0, 105130.0, 105131.0, 105132.0],       [105133.0, 105134.0, 105135.0, 105136.0, 105137.0, 105138.0],       [105139.0, 105140.0, 105141.0, 105142.0, 105143.0, 105144.0],       [105145.0, 105146.0, 105147.0, 105148.0, 105149.0, 105150.0],       [105151.0, 105152.0, 105153.0, 105154.0, 105155.0, 105156.0],       [105157.0, 105158.0, 105159.0, 105160.0, 105161.0, 105162.0],       [105163.0, 105164.0, 105165.0, 105166.0, 105167.0, 105168.0]],      [[105169.0, 105170.0, 105171.0, 105172.0, 105173.0, 105174.0],       [105175.0, 105176.0, 105177.0, 105178.0, 105179.0, 105180.0],       [105181.0, 105182.0, 105183.0, 105184.0, 105185.0, 105186.0],       [105187.0, 105188.0, 105189.0, 105190.0, 105191.0, 105192.0],       [105193.0, 105194.0, 105195.0, 105196.0, 105197.0, 105198.0],       [105199.0, 105200.0, 105201.0, 105202.0, 105203.0, 105204.0],       [105205.0, 105206.0, 105207.0, 105208.0, 105209.0, 105210.0]],      [[105211.0, 105212.0, 105213.0, 105214.0, 105215.0, 105216.0],       [105217.0, 105218.0, 105219.0, 105220.0, 105221.0, 105222.0],       [105223.0, 105224.0, 105225.0, 105226.0, 105227.0, 105228.0],       [105229.0, 105230.0, 105231.0, 105232.0, 105233.0, 105234.0],       [105235.0, 105236.0, 105237.0, 105238.0, 105239.0, 105240.0],       [105241.0, 105242.0, 105243.0, 105244.0, 105245.0, 105246.0],       [105247.0, 105248.0, 105249.0, 105250.0, 105251.0, 105252.0]],      [[105253.0, 105254.0, 105255.0, 105256.0, 105257.0, 105258.0],       [105259.0, 105260.0, 105261.0, 105262.0, 105263.0, 105264.0],       [105265.0, 105266.0, 105267.0, 105268.0, 105269.0, 105270.0],       [105271.0, 105272.0, 105273.0, 105274.0, 105275.0, 105276.0],       [105277.0, 105278.0, 105279.0, 105280.0, 105281.0, 105282.0],       [105283.0, 105284.0, 105285.0, 105286.0, 105287.0, 105288.0],       [105289.0, 105290.0, 105291.0, 105292.0, 105293.0, 105294.0]],      [[105295.0, 105296.0, 105297.0, 105298.0, 105299.0, 105300.0],       [105301.0, 105302.0, 105303.0, 105304.0, 105305.0, 105306.0],       [105307.0, 105308.0, 105309.0, 105310.0, 105311.0, 105312.0],       [105313.0, 105314.0, 105315.0, 105316.0, 105317.0, 105318.0],       [105319.0, 105320.0, 105321.0, 105322.0, 105323.0, 105324.0],       [105325.0, 105326.0, 105327.0, 105328.0, 105329.0, 105330.0],       [105331.0, 105332.0, 105333.0, 105334.0, 105335.0, 105336.0]]],     [[[105337.0, 105338.0, 105339.0, 105340.0, 105341.0, 105342.0],       [105343.0, 105344.0, 105345.0, 105346.0, 105347.0, 105348.0],       [105349.0, 105350.0, 105351.0, 105352.0, 105353.0, 105354.0],       [105355.0, 105356.0, 105357.0, 105358.0, 105359.0, 105360.0],       [105361.0, 105362.0, 105363.0, 105364.0, 105365.0, 105366.0],       [105367.0, 105368.0, 105369.0, 105370.0, 105371.0, 105372.0],       [105373.0, 105374.0, 105375.0, 105376.0, 105377.0, 105378.0]],      [[105379.0, 105380.0, 105381.0, 105382.0, 105383.0, 105384.0],       [105385.0, 105386.0, 105387.0, 105388.0, 105389.0, 105390.0],       [105391.0, 105392.0, 105393.0, 105394.0, 105395.0, 105396.0],       [105397.0, 105398.0, 105399.0, 105400.0, 105401.0, 105402.0],       [105403.0, 105404.0, 105405.0, 105406.0, 105407.0, 105408.0],       [105409.0, 105410.0, 105411.0, 105412.0, 105413.0, 105414.0],       [105415.0, 105416.0, 105417.0, 105418.0, 105419.0, 105420.0]],      [[105421.0, 105422.0, 105423.0, 105424.0, 105425.0, 105426.0],       [105427.0, 105428.0, 105429.0, 105430.0, 105431.0, 105432.0],       [105433.0, 105434.0, 105435.0, 105436.0, 105437.0, 105438.0],       [105439.0, 105440.0, 105441.0, 105442.0, 105443.0, 105444.0],       [105445.0, 105446.0, 105447.0, 105448.0, 105449.0, 105450.0],       [105451.0, 105452.0, 105453.0, 105454.0, 105455.0, 105456.0],       [105457.0, 105458.0, 105459.0, 105460.0, 105461.0, 105462.0]],      [[105463.0, 105464.0, 105465.0, 105466.0, 105467.0, 105468.0],       [105469.0, 105470.0, 105471.0, 105472.0, 105473.0, 105474.0],       [105475.0, 105476.0, 105477.0, 105478.0, 105479.0, 105480.0],       [105481.0, 105482.0, 105483.0, 105484.0, 105485.0, 105486.0],       [105487.0, 105488.0, 105489.0, 105490.0, 105491.0, 105492.0],       [105493.0, 105494.0, 105495.0, 105496.0, 105497.0, 105498.0],       [105499.0, 105500.0, 105501.0, 105502.0, 105503.0, 105504.0]],      [[105505.0, 105506.0, 105507.0, 105508.0, 105509.0, 105510.0],       [105511.0, 105512.0, 105513.0, 105514.0, 105515.0, 105516.0],       [105517.0, 105518.0, 105519.0, 105520.0, 105521.0, 105522.0],       [105523.0, 105524.0, 105525.0, 105526.0, 105527.0, 105528.0],       [105529.0, 105530.0, 105531.0, 105532.0, 105533.0, 105534.0],       [105535.0, 105536.0, 105537.0, 105538.0, 105539.0, 105540.0],       [105541.0, 105542.0, 105543.0, 105544.0, 105545.0, 105546.0]],      [[105547.0, 105548.0, 105549.0, 105550.0, 105551.0, 105552.0],       [105553.0, 105554.0, 105555.0, 105556.0, 105557.0, 105558.0],       [105559.0, 105560.0, 105561.0, 105562.0, 105563.0, 105564.0],       [105565.0, 105566.0, 105567.0, 105568.0, 105569.0, 105570.0],       [105571.0, 105572.0, 105573.0, 105574.0, 105575.0, 105576.0],       [105577.0, 105578.0, 105579.0, 105580.0, 105581.0, 105582.0],       [105583.0, 105584.0, 105585.0, 105586.0, 105587.0, 105588.0]]],     [[[105589.0, 105590.0, 105591.0, 105592.0, 105593.0, 105594.0],       [105595.0, 105596.0, 105597.0, 105598.0, 105599.0, 105600.0],       [105601.0, 105602.0, 105603.0, 105604.0, 105605.0, 105606.0],       [105607.0, 105608.0, 105609.0, 105610.0, 105611.0, 105612.0],       [105613.0, 105614.0, 105615.0, 105616.0, 105617.0, 105618.0],       [105619.0, 105620.0, 105621.0, 105622.0, 105623.0, 105624.0],       [105625.0, 105626.0, 105627.0, 105628.0, 105629.0, 105630.0]],      [[105631.0, 105632.0, 105633.0, 105634.0, 105635.0, 105636.0],       [105637.0, 105638.0, 105639.0, 105640.0, 105641.0, 105642.0],       [105643.0, 105644.0, 105645.0, 105646.0, 105647.0, 105648.0],       [105649.0, 105650.0, 105651.0, 105652.0, 105653.0, 105654.0],       [105655.0, 105656.0, 105657.0, 105658.0, 105659.0, 105660.0],       [105661.0, 105662.0, 105663.0, 105664.0, 105665.0, 105666.0],       [105667.0, 105668.0, 105669.0, 105670.0, 105671.0, 105672.0]],      [[105673.0, 105674.0, 105675.0, 105676.0, 105677.0, 105678.0],       [105679.0, 105680.0, 105681.0, 105682.0, 105683.0, 105684.0],       [105685.0, 105686.0, 105687.0, 105688.0, 105689.0, 105690.0],       [105691.0, 105692.0, 105693.0, 105694.0, 105695.0, 105696.0],       [105697.0, 105698.0, 105699.0, 105700.0, 105701.0, 105702.0],       [105703.0, 105704.0, 105705.0, 105706.0, 105707.0, 105708.0],       [105709.0, 105710.0, 105711.0, 105712.0, 105713.0, 105714.0]],      [[105715.0, 105716.0, 105717.0, 105718.0, 105719.0, 105720.0],       [105721.0, 105722.0, 105723.0, 105724.0, 105725.0, 105726.0],       [105727.0, 105728.0, 105729.0, 105730.0, 105731.0, 105732.0],       [105733.0, 105734.0, 105735.0, 105736.0, 105737.0, 105738.0],       [105739.0, 105740.0, 105741.0, 105742.0, 105743.0, 105744.0],       [105745.0, 105746.0, 105747.0, 105748.0, 105749.0, 105750.0],       [105751.0, 105752.0, 105753.0, 105754.0, 105755.0, 105756.0]],      [[105757.0, 105758.0, 105759.0, 105760.0, 105761.0, 105762.0],       [105763.0, 105764.0, 105765.0, 105766.0, 105767.0, 105768.0],       [105769.0, 105770.0, 105771.0, 105772.0, 105773.0, 105774.0],       [105775.0, 105776.0, 105777.0, 105778.0, 105779.0, 105780.0],       [105781.0, 105782.0, 105783.0, 105784.0, 105785.0, 105786.0],       [105787.0, 105788.0, 105789.0, 105790.0, 105791.0, 105792.0],       [105793.0, 105794.0, 105795.0, 105796.0, 105797.0, 105798.0]],      [[105799.0, 105800.0, 105801.0, 105802.0, 105803.0, 105804.0],       [105805.0, 105806.0, 105807.0, 105808.0, 105809.0, 105810.0],       [105811.0, 105812.0, 105813.0, 105814.0, 105815.0, 105816.0],       [105817.0, 105818.0, 105819.0, 105820.0, 105821.0, 105822.0],       [105823.0, 105824.0, 105825.0, 105826.0, 105827.0, 105828.0],       [105829.0, 105830.0, 105831.0, 105832.0, 105833.0, 105834.0],       [105835.0, 105836.0, 105837.0, 105838.0, 105839.0, 105840.0]]]],    [[[[105841.0, 105842.0, 105843.0, 105844.0, 105845.0, 105846.0],       [105847.0, 105848.0, 105849.0, 105850.0, 105851.0, 105852.0],       [105853.0, 105854.0, 105855.0, 105856.0, 105857.0, 105858.0],       [105859.0, 105860.0, 105861.0, 105862.0, 105863.0, 105864.0],       [105865.0, 105866.0, 105867.0, 105868.0, 105869.0, 105870.0],       [105871.0, 105872.0, 105873.0, 105874.0, 105875.0, 105876.0],       [105877.0, 105878.0, 105879.0, 105880.0, 105881.0, 105882.0]],      [[105883.0, 105884.0, 105885.0, 105886.0, 105887.0, 105888.0],       [105889.0, 105890.0, 105891.0, 105892.0, 105893.0, 105894.0],       [105895.0, 105896.0, 105897.0, 105898.0, 105899.0, 105900.0],       [105901.0, 105902.0, 105903.0, 105904.0, 105905.0, 105906.0],       [105907.0, 105908.0, 105909.0, 105910.0, 105911.0, 105912.0],       [105913.0, 105914.0, 105915.0, 105916.0, 105917.0, 105918.0],       [105919.0, 105920.0, 105921.0, 105922.0, 105923.0, 105924.0]],      [[105925.0, 105926.0, 105927.0, 105928.0, 105929.0, 105930.0],       [105931.0, 105932.0, 105933.0, 105934.0, 105935.0, 105936.0],       [105937.0, 105938.0, 105939.0, 105940.0, 105941.0, 105942.0],       [105943.0, 105944.0, 105945.0, 105946.0, 105947.0, 105948.0],       [105949.0, 105950.0, 105951.0, 105952.0, 105953.0, 105954.0],       [105955.0, 105956.0, 105957.0, 105958.0, 105959.0, 105960.0],       [105961.0, 105962.0, 105963.0, 105964.0, 105965.0, 105966.0]],      [[105967.0, 105968.0, 105969.0, 105970.0, 105971.0, 105972.0],       [105973.0, 105974.0, 105975.0, 105976.0, 105977.0, 105978.0],       [105979.0, 105980.0, 105981.0, 105982.0, 105983.0, 105984.0],       [105985.0, 105986.0, 105987.0, 105988.0, 105989.0, 105990.0],       [105991.0, 105992.0, 105993.0, 105994.0, 105995.0, 105996.0],       [105997.0, 105998.0, 105999.0, 106000.0, 106001.0, 106002.0],       [106003.0, 106004.0, 106005.0, 106006.0, 106007.0, 106008.0]],      [[106009.0, 106010.0, 106011.0, 106012.0, 106013.0, 106014.0],       [106015.0, 106016.0, 106017.0, 106018.0, 106019.0, 106020.0],       [106021.0, 106022.0, 106023.0, 106024.0, 106025.0, 106026.0],       [106027.0, 106028.0, 106029.0, 106030.0, 106031.0, 106032.0],       [106033.0, 106034.0, 106035.0, 106036.0, 106037.0, 106038.0],       [106039.0, 106040.0, 106041.0, 106042.0, 106043.0, 106044.0],       [106045.0, 106046.0, 106047.0, 106048.0, 106049.0, 106050.0]],      [[106051.0, 106052.0, 106053.0, 106054.0, 106055.0, 106056.0],       [106057.0, 106058.0, 106059.0, 106060.0, 106061.0, 106062.0],       [106063.0, 106064.0, 106065.0, 106066.0, 106067.0, 106068.0],       [106069.0, 106070.0, 106071.0, 106072.0, 106073.0, 106074.0],       [106075.0, 106076.0, 106077.0, 106078.0, 106079.0, 106080.0],       [106081.0, 106082.0, 106083.0, 106084.0, 106085.0, 106086.0],       [106087.0, 106088.0, 106089.0, 106090.0, 106091.0, 106092.0]]],     [[[106093.0, 106094.0, 106095.0, 106096.0, 106097.0, 106098.0],       [106099.0, 106100.0, 106101.0, 106102.0, 106103.0, 106104.0],       [106105.0, 106106.0, 106107.0, 106108.0, 106109.0, 106110.0],       [106111.0, 106112.0, 106113.0, 106114.0, 106115.0, 106116.0],       [106117.0, 106118.0, 106119.0, 106120.0, 106121.0, 106122.0],       [106123.0, 106124.0, 106125.0, 106126.0, 106127.0, 106128.0],       [106129.0, 106130.0, 106131.0, 106132.0, 106133.0, 106134.0]],      [[106135.0, 106136.0, 106137.0, 106138.0, 106139.0, 106140.0],       [106141.0, 106142.0, 106143.0, 106144.0, 106145.0, 106146.0],       [106147.0, 106148.0, 106149.0, 106150.0, 106151.0, 106152.0],       [106153.0, 106154.0, 106155.0, 106156.0, 106157.0, 106158.0],       [106159.0, 106160.0, 106161.0, 106162.0, 106163.0, 106164.0],       [106165.0, 106166.0, 106167.0, 106168.0, 106169.0, 106170.0],       [106171.0, 106172.0, 106173.0, 106174.0, 106175.0, 106176.0]],      [[106177.0, 106178.0, 106179.0, 106180.0, 106181.0, 106182.0],       [106183.0, 106184.0, 106185.0, 106186.0, 106187.0, 106188.0],       [106189.0, 106190.0, 106191.0, 106192.0, 106193.0, 106194.0],       [106195.0, 106196.0, 106197.0, 106198.0, 106199.0, 106200.0],       [106201.0, 106202.0, 106203.0, 106204.0, 106205.0, 106206.0],       [106207.0, 106208.0, 106209.0, 106210.0, 106211.0, 106212.0],       [106213.0, 106214.0, 106215.0, 106216.0, 106217.0, 106218.0]],      [[106219.0, 106220.0, 106221.0, 106222.0, 106223.0, 106224.0],       [106225.0, 106226.0, 106227.0, 106228.0, 106229.0, 106230.0],       [106231.0, 106232.0, 106233.0, 106234.0, 106235.0, 106236.0],       [106237.0, 106238.0, 106239.0, 106240.0, 106241.0, 106242.0],       [106243.0, 106244.0, 106245.0, 106246.0, 106247.0, 106248.0],       [106249.0, 106250.0, 106251.0, 106252.0, 106253.0, 106254.0],       [106255.0, 106256.0, 106257.0, 106258.0, 106259.0, 106260.0]],      [[106261.0, 106262.0, 106263.0, 106264.0, 106265.0, 106266.0],       [106267.0, 106268.0, 106269.0, 106270.0, 106271.0, 106272.0],       [106273.0, 106274.0, 106275.0, 106276.0, 106277.0, 106278.0],       [106279.0, 106280.0, 106281.0, 106282.0, 106283.0, 106284.0],       [106285.0, 106286.0, 106287.0, 106288.0, 106289.0, 106290.0],       [106291.0, 106292.0, 106293.0, 106294.0, 106295.0, 106296.0],       [106297.0, 106298.0, 106299.0, 106300.0, 106301.0, 106302.0]],      [[106303.0, 106304.0, 106305.0, 106306.0, 106307.0, 106308.0],       [106309.0, 106310.0, 106311.0, 106312.0, 106313.0, 106314.0],       [106315.0, 106316.0, 106317.0, 106318.0, 106319.0, 106320.0],       [106321.0, 106322.0, 106323.0, 106324.0, 106325.0, 106326.0],       [106327.0, 106328.0, 106329.0, 106330.0, 106331.0, 106332.0],       [106333.0, 106334.0, 106335.0, 106336.0, 106337.0, 106338.0],       [106339.0, 106340.0, 106341.0, 106342.0, 106343.0, 106344.0]]],     [[[106345.0, 106346.0, 106347.0, 106348.0, 106349.0, 106350.0],       [106351.0, 106352.0, 106353.0, 106354.0, 106355.0, 106356.0],       [106357.0, 106358.0, 106359.0, 106360.0, 106361.0, 106362.0],       [106363.0, 106364.0, 106365.0, 106366.0, 106367.0, 106368.0],       [106369.0, 106370.0, 106371.0, 106372.0, 106373.0, 106374.0],       [106375.0, 106376.0, 106377.0, 106378.0, 106379.0, 106380.0],       [106381.0, 106382.0, 106383.0, 106384.0, 106385.0, 106386.0]],      [[106387.0, 106388.0, 106389.0, 106390.0, 106391.0, 106392.0],       [106393.0, 106394.0, 106395.0, 106396.0, 106397.0, 106398.0],       [106399.0, 106400.0, 106401.0, 106402.0, 106403.0, 106404.0],       [106405.0, 106406.0, 106407.0, 106408.0, 106409.0, 106410.0],       [106411.0, 106412.0, 106413.0, 106414.0, 106415.0, 106416.0],       [106417.0, 106418.0, 106419.0, 106420.0, 106421.0, 106422.0],       [106423.0, 106424.0, 106425.0, 106426.0, 106427.0, 106428.0]],      [[106429.0, 106430.0, 106431.0, 106432.0, 106433.0, 106434.0],       [106435.0, 106436.0, 106437.0, 106438.0, 106439.0, 106440.0],       [106441.0, 106442.0, 106443.0, 106444.0, 106445.0, 106446.0],       [106447.0, 106448.0, 106449.0, 106450.0, 106451.0, 106452.0],       [106453.0, 106454.0, 106455.0, 106456.0, 106457.0, 106458.0],       [106459.0, 106460.0, 106461.0, 106462.0, 106463.0, 106464.0],       [106465.0, 106466.0, 106467.0, 106468.0, 106469.0, 106470.0]],      [[106471.0, 106472.0, 106473.0, 106474.0, 106475.0, 106476.0],       [106477.0, 106478.0, 106479.0, 106480.0, 106481.0, 106482.0],       [106483.0, 106484.0, 106485.0, 106486.0, 106487.0, 106488.0],       [106489.0, 106490.0, 106491.0, 106492.0, 106493.0, 106494.0],       [106495.0, 106496.0, 106497.0, 106498.0, 106499.0, 106500.0],       [106501.0, 106502.0, 106503.0, 106504.0, 106505.0, 106506.0],       [106507.0, 106508.0, 106509.0, 106510.0, 106511.0, 106512.0]],      [[106513.0, 106514.0, 106515.0, 106516.0, 106517.0, 106518.0],       [106519.0, 106520.0, 106521.0, 106522.0, 106523.0, 106524.0],       [106525.0, 106526.0, 106527.0, 106528.0, 106529.0, 106530.0],       [106531.0, 106532.0, 106533.0, 106534.0, 106535.0, 106536.0],       [106537.0, 106538.0, 106539.0, 106540.0, 106541.0, 106542.0],       [106543.0, 106544.0, 106545.0, 106546.0, 106547.0, 106548.0],       [106549.0, 106550.0, 106551.0, 106552.0, 106553.0, 106554.0]],      [[106555.0, 106556.0, 106557.0, 106558.0, 106559.0, 106560.0],       [106561.0, 106562.0, 106563.0, 106564.0, 106565.0, 106566.0],       [106567.0, 106568.0, 106569.0, 106570.0, 106571.0, 106572.0],       [106573.0, 106574.0, 106575.0, 106576.0, 106577.0, 106578.0],       [106579.0, 106580.0, 106581.0, 106582.0, 106583.0, 106584.0],       [106585.0, 106586.0, 106587.0, 106588.0, 106589.0, 106590.0],       [106591.0, 106592.0, 106593.0, 106594.0, 106595.0, 106596.0]]],     [[[106597.0, 106598.0, 106599.0, 106600.0, 106601.0, 106602.0],       [106603.0, 106604.0, 106605.0, 106606.0, 106607.0, 106608.0],       [106609.0, 106610.0, 106611.0, 106612.0, 106613.0, 106614.0],       [106615.0, 106616.0, 106617.0, 106618.0, 106619.0, 106620.0],       [106621.0, 106622.0, 106623.0, 106624.0, 106625.0, 106626.0],       [106627.0, 106628.0, 106629.0, 106630.0, 106631.0, 106632.0],       [106633.0, 106634.0, 106635.0, 106636.0, 106637.0, 106638.0]],      [[106639.0, 106640.0, 106641.0, 106642.0, 106643.0, 106644.0],       [106645.0, 106646.0, 106647.0, 106648.0, 106649.0, 106650.0],       [106651.0, 106652.0, 106653.0, 106654.0, 106655.0, 106656.0],       [106657.0, 106658.0, 106659.0, 106660.0, 106661.0, 106662.0],       [106663.0, 106664.0, 106665.0, 106666.0, 106667.0, 106668.0],       [106669.0, 106670.0, 106671.0, 106672.0, 106673.0, 106674.0],       [106675.0, 106676.0, 106677.0, 106678.0, 106679.0, 106680.0]],      [[106681.0, 106682.0, 106683.0, 106684.0, 106685.0, 106686.0],       [106687.0, 106688.0, 106689.0, 106690.0, 106691.0, 106692.0],       [106693.0, 106694.0, 106695.0, 106696.0, 106697.0, 106698.0],       [106699.0, 106700.0, 106701.0, 106702.0, 106703.0, 106704.0],       [106705.0, 106706.0, 106707.0, 106708.0, 106709.0, 106710.0],       [106711.0, 106712.0, 106713.0, 106714.0, 106715.0, 106716.0],       [106717.0, 106718.0, 106719.0, 106720.0, 106721.0, 106722.0]],      [[106723.0, 106724.0, 106725.0, 106726.0, 106727.0, 106728.0],       [106729.0, 106730.0, 106731.0, 106732.0, 106733.0, 106734.0],       [106735.0, 106736.0, 106737.0, 106738.0, 106739.0, 106740.0],       [106741.0, 106742.0, 106743.0, 106744.0, 106745.0, 106746.0],       [106747.0, 106748.0, 106749.0, 106750.0, 106751.0, 106752.0],       [106753.0, 106754.0, 106755.0, 106756.0, 106757.0, 106758.0],       [106759.0, 106760.0, 106761.0, 106762.0, 106763.0, 106764.0]],      [[106765.0, 106766.0, 106767.0, 106768.0, 106769.0, 106770.0],       [106771.0, 106772.0, 106773.0, 106774.0, 106775.0, 106776.0],       [106777.0, 106778.0, 106779.0, 106780.0, 106781.0, 106782.0],       [106783.0, 106784.0, 106785.0, 106786.0, 106787.0, 106788.0],       [106789.0, 106790.0, 106791.0, 106792.0, 106793.0, 106794.0],       [106795.0, 106796.0, 106797.0, 106798.0, 106799.0, 106800.0],       [106801.0, 106802.0, 106803.0, 106804.0, 106805.0, 106806.0]],      [[106807.0, 106808.0, 106809.0, 106810.0, 106811.0, 106812.0],       [106813.0, 106814.0, 106815.0, 106816.0, 106817.0, 106818.0],       [106819.0, 106820.0, 106821.0, 106822.0, 106823.0, 106824.0],       [106825.0, 106826.0, 106827.0, 106828.0, 106829.0, 106830.0],       [106831.0, 106832.0, 106833.0, 106834.0, 106835.0, 106836.0],       [106837.0, 106838.0, 106839.0, 106840.0, 106841.0, 106842.0],       [106843.0, 106844.0, 106845.0, 106846.0, 106847.0, 106848.0]]]],    [[[[106849.0, 106850.0, 106851.0, 106852.0, 106853.0, 106854.0],       [106855.0, 106856.0, 106857.0, 106858.0, 106859.0, 106860.0],       [106861.0, 106862.0, 106863.0, 106864.0, 106865.0, 106866.0],       [106867.0, 106868.0, 106869.0, 106870.0, 106871.0, 106872.0],       [106873.0, 106874.0, 106875.0, 106876.0, 106877.0, 106878.0],       [106879.0, 106880.0, 106881.0, 106882.0, 106883.0, 106884.0],       [106885.0, 106886.0, 106887.0, 106888.0, 106889.0, 106890.0]],      [[106891.0, 106892.0, 106893.0, 106894.0, 106895.0, 106896.0],       [106897.0, 106898.0, 106899.0, 106900.0, 106901.0, 106902.0],       [106903.0, 106904.0, 106905.0, 106906.0, 106907.0, 106908.0],       [106909.0, 106910.0, 106911.0, 106912.0, 106913.0, 106914.0],       [106915.0, 106916.0, 106917.0, 106918.0, 106919.0, 106920.0],       [106921.0, 106922.0, 106923.0, 106924.0, 106925.0, 106926.0],       [106927.0, 106928.0, 106929.0, 106930.0, 106931.0, 106932.0]],      [[106933.0, 106934.0, 106935.0, 106936.0, 106937.0, 106938.0],       [106939.0, 106940.0, 106941.0, 106942.0, 106943.0, 106944.0],       [106945.0, 106946.0, 106947.0, 106948.0, 106949.0, 106950.0],       [106951.0, 106952.0, 106953.0, 106954.0, 106955.0, 106956.0],       [106957.0, 106958.0, 106959.0, 106960.0, 106961.0, 106962.0],       [106963.0, 106964.0, 106965.0, 106966.0, 106967.0, 106968.0],       [106969.0, 106970.0, 106971.0, 106972.0, 106973.0, 106974.0]],      [[106975.0, 106976.0, 106977.0, 106978.0, 106979.0, 106980.0],       [106981.0, 106982.0, 106983.0, 106984.0, 106985.0, 106986.0],       [106987.0, 106988.0, 106989.0, 106990.0, 106991.0, 106992.0],       [106993.0, 106994.0, 106995.0, 106996.0, 106997.0, 106998.0],       [106999.0, 107000.0, 107001.0, 107002.0, 107003.0, 107004.0],       [107005.0, 107006.0, 107007.0, 107008.0, 107009.0, 107010.0],       [107011.0, 107012.0, 107013.0, 107014.0, 107015.0, 107016.0]],      [[107017.0, 107018.0, 107019.0, 107020.0, 107021.0, 107022.0],       [107023.0, 107024.0, 107025.0, 107026.0, 107027.0, 107028.0],       [107029.0, 107030.0, 107031.0, 107032.0, 107033.0, 107034.0],       [107035.0, 107036.0, 107037.0, 107038.0, 107039.0, 107040.0],       [107041.0, 107042.0, 107043.0, 107044.0, 107045.0, 107046.0],       [107047.0, 107048.0, 107049.0, 107050.0, 107051.0, 107052.0],       [107053.0, 107054.0, 107055.0, 107056.0, 107057.0, 107058.0]],      [[107059.0, 107060.0, 107061.0, 107062.0, 107063.0, 107064.0],       [107065.0, 107066.0, 107067.0, 107068.0, 107069.0, 107070.0],       [107071.0, 107072.0, 107073.0, 107074.0, 107075.0, 107076.0],       [107077.0, 107078.0, 107079.0, 107080.0, 107081.0, 107082.0],       [107083.0, 107084.0, 107085.0, 107086.0, 107087.0, 107088.0],       [107089.0, 107090.0, 107091.0, 107092.0, 107093.0, 107094.0],       [107095.0, 107096.0, 107097.0, 107098.0, 107099.0, 107100.0]]],     [[[107101.0, 107102.0, 107103.0, 107104.0, 107105.0, 107106.0],       [107107.0, 107108.0, 107109.0, 107110.0, 107111.0, 107112.0],       [107113.0, 107114.0, 107115.0, 107116.0, 107117.0, 107118.0],       [107119.0, 107120.0, 107121.0, 107122.0, 107123.0, 107124.0],       [107125.0, 107126.0, 107127.0, 107128.0, 107129.0, 107130.0],       [107131.0, 107132.0, 107133.0, 107134.0, 107135.0, 107136.0],       [107137.0, 107138.0, 107139.0, 107140.0, 107141.0, 107142.0]],      [[107143.0, 107144.0, 107145.0, 107146.0, 107147.0, 107148.0],       [107149.0, 107150.0, 107151.0, 107152.0, 107153.0, 107154.0],       [107155.0, 107156.0, 107157.0, 107158.0, 107159.0, 107160.0],       [107161.0, 107162.0, 107163.0, 107164.0, 107165.0, 107166.0],       [107167.0, 107168.0, 107169.0, 107170.0, 107171.0, 107172.0],       [107173.0, 107174.0, 107175.0, 107176.0, 107177.0, 107178.0],       [107179.0, 107180.0, 107181.0, 107182.0, 107183.0, 107184.0]],      [[107185.0, 107186.0, 107187.0, 107188.0, 107189.0, 107190.0],       [107191.0, 107192.0, 107193.0, 107194.0, 107195.0, 107196.0],       [107197.0, 107198.0, 107199.0, 107200.0, 107201.0, 107202.0],       [107203.0, 107204.0, 107205.0, 107206.0, 107207.0, 107208.0],       [107209.0, 107210.0, 107211.0, 107212.0, 107213.0, 107214.0],       [107215.0, 107216.0, 107217.0, 107218.0, 107219.0, 107220.0],       [107221.0, 107222.0, 107223.0, 107224.0, 107225.0, 107226.0]],      [[107227.0, 107228.0, 107229.0, 107230.0, 107231.0, 107232.0],       [107233.0, 107234.0, 107235.0, 107236.0, 107237.0, 107238.0],       [107239.0, 107240.0, 107241.0, 107242.0, 107243.0, 107244.0],       [107245.0, 107246.0, 107247.0, 107248.0, 107249.0, 107250.0],       [107251.0, 107252.0, 107253.0, 107254.0, 107255.0, 107256.0],       [107257.0, 107258.0, 107259.0, 107260.0, 107261.0, 107262.0],       [107263.0, 107264.0, 107265.0, 107266.0, 107267.0, 107268.0]],      [[107269.0, 107270.0, 107271.0, 107272.0, 107273.0, 107274.0],       [107275.0, 107276.0, 107277.0, 107278.0, 107279.0, 107280.0],       [107281.0, 107282.0, 107283.0, 107284.0, 107285.0, 107286.0],       [107287.0, 107288.0, 107289.0, 107290.0, 107291.0, 107292.0],       [107293.0, 107294.0, 107295.0, 107296.0, 107297.0, 107298.0],       [107299.0, 107300.0, 107301.0, 107302.0, 107303.0, 107304.0],       [107305.0, 107306.0, 107307.0, 107308.0, 107309.0, 107310.0]],      [[107311.0, 107312.0, 107313.0, 107314.0, 107315.0, 107316.0],       [107317.0, 107318.0, 107319.0, 107320.0, 107321.0, 107322.0],       [107323.0, 107324.0, 107325.0, 107326.0, 107327.0, 107328.0],       [107329.0, 107330.0, 107331.0, 107332.0, 107333.0, 107334.0],       [107335.0, 107336.0, 107337.0, 107338.0, 107339.0, 107340.0],       [107341.0, 107342.0, 107343.0, 107344.0, 107345.0, 107346.0],       [107347.0, 107348.0, 107349.0, 107350.0, 107351.0, 107352.0]]],     [[[107353.0, 107354.0, 107355.0, 107356.0, 107357.0, 107358.0],       [107359.0, 107360.0, 107361.0, 107362.0, 107363.0, 107364.0],       [107365.0, 107366.0, 107367.0, 107368.0, 107369.0, 107370.0],       [107371.0, 107372.0, 107373.0, 107374.0, 107375.0, 107376.0],       [107377.0, 107378.0, 107379.0, 107380.0, 107381.0, 107382.0],       [107383.0, 107384.0, 107385.0, 107386.0, 107387.0, 107388.0],       [107389.0, 107390.0, 107391.0, 107392.0, 107393.0, 107394.0]],      [[107395.0, 107396.0, 107397.0, 107398.0, 107399.0, 107400.0],       [107401.0, 107402.0, 107403.0, 107404.0, 107405.0, 107406.0],       [107407.0, 107408.0, 107409.0, 107410.0, 107411.0, 107412.0],       [107413.0, 107414.0, 107415.0, 107416.0, 107417.0, 107418.0],       [107419.0, 107420.0, 107421.0, 107422.0, 107423.0, 107424.0],       [107425.0, 107426.0, 107427.0, 107428.0, 107429.0, 107430.0],       [107431.0, 107432.0, 107433.0, 107434.0, 107435.0, 107436.0]],      [[107437.0, 107438.0, 107439.0, 107440.0, 107441.0, 107442.0],       [107443.0, 107444.0, 107445.0, 107446.0, 107447.0, 107448.0],       [107449.0, 107450.0, 107451.0, 107452.0, 107453.0, 107454.0],       [107455.0, 107456.0, 107457.0, 107458.0, 107459.0, 107460.0],       [107461.0, 107462.0, 107463.0, 107464.0, 107465.0, 107466.0],       [107467.0, 107468.0, 107469.0, 107470.0, 107471.0, 107472.0],       [107473.0, 107474.0, 107475.0, 107476.0, 107477.0, 107478.0]],      [[107479.0, 107480.0, 107481.0, 107482.0, 107483.0, 107484.0],       [107485.0, 107486.0, 107487.0, 107488.0, 107489.0, 107490.0],       [107491.0, 107492.0, 107493.0, 107494.0, 107495.0, 107496.0],       [107497.0, 107498.0, 107499.0, 107500.0, 107501.0, 107502.0],       [107503.0, 107504.0, 107505.0, 107506.0, 107507.0, 107508.0],       [107509.0, 107510.0, 107511.0, 107512.0, 107513.0, 107514.0],       [107515.0, 107516.0, 107517.0, 107518.0, 107519.0, 107520.0]],      [[107521.0, 107522.0, 107523.0, 107524.0, 107525.0, 107526.0],       [107527.0, 107528.0, 107529.0, 107530.0, 107531.0, 107532.0],       [107533.0, 107534.0, 107535.0, 107536.0, 107537.0, 107538.0],       [107539.0, 107540.0, 107541.0, 107542.0, 107543.0, 107544.0],       [107545.0, 107546.0, 107547.0, 107548.0, 107549.0, 107550.0],       [107551.0, 107552.0, 107553.0, 107554.0, 107555.0, 107556.0],       [107557.0, 107558.0, 107559.0, 107560.0, 107561.0, 107562.0]],      [[107563.0, 107564.0, 107565.0, 107566.0, 107567.0, 107568.0],       [107569.0, 107570.0, 107571.0, 107572.0, 107573.0, 107574.0],       [107575.0, 107576.0, 107577.0, 107578.0, 107579.0, 107580.0],       [107581.0, 107582.0, 107583.0, 107584.0, 107585.0, 107586.0],       [107587.0, 107588.0, 107589.0, 107590.0, 107591.0, 107592.0],       [107593.0, 107594.0, 107595.0, 107596.0, 107597.0, 107598.0],       [107599.0, 107600.0, 107601.0, 107602.0, 107603.0, 107604.0]]],     [[[107605.0, 107606.0, 107607.0, 107608.0, 107609.0, 107610.0],       [107611.0, 107612.0, 107613.0, 107614.0, 107615.0, 107616.0],       [107617.0, 107618.0, 107619.0, 107620.0, 107621.0, 107622.0],       [107623.0, 107624.0, 107625.0, 107626.0, 107627.0, 107628.0],       [107629.0, 107630.0, 107631.0, 107632.0, 107633.0, 107634.0],       [107635.0, 107636.0, 107637.0, 107638.0, 107639.0, 107640.0],       [107641.0, 107642.0, 107643.0, 107644.0, 107645.0, 107646.0]],      [[107647.0, 107648.0, 107649.0, 107650.0, 107651.0, 107652.0],       [107653.0, 107654.0, 107655.0, 107656.0, 107657.0, 107658.0],       [107659.0, 107660.0, 107661.0, 107662.0, 107663.0, 107664.0],       [107665.0, 107666.0, 107667.0, 107668.0, 107669.0, 107670.0],       [107671.0, 107672.0, 107673.0, 107674.0, 107675.0, 107676.0],       [107677.0, 107678.0, 107679.0, 107680.0, 107681.0, 107682.0],       [107683.0, 107684.0, 107685.0, 107686.0, 107687.0, 107688.0]],      [[107689.0, 107690.0, 107691.0, 107692.0, 107693.0, 107694.0],       [107695.0, 107696.0, 107697.0, 107698.0, 107699.0, 107700.0],       [107701.0, 107702.0, 107703.0, 107704.0, 107705.0, 107706.0],       [107707.0, 107708.0, 107709.0, 107710.0, 107711.0, 107712.0],       [107713.0, 107714.0, 107715.0, 107716.0, 107717.0, 107718.0],       [107719.0, 107720.0, 107721.0, 107722.0, 107723.0, 107724.0],       [107725.0, 107726.0, 107727.0, 107728.0, 107729.0, 107730.0]],      [[107731.0, 107732.0, 107733.0, 107734.0, 107735.0, 107736.0],       [107737.0, 107738.0, 107739.0, 107740.0, 107741.0, 107742.0],       [107743.0, 107744.0, 107745.0, 107746.0, 107747.0, 107748.0],       [107749.0, 107750.0, 107751.0, 107752.0, 107753.0, 107754.0],       [107755.0, 107756.0, 107757.0, 107758.0, 107759.0, 107760.0],       [107761.0, 107762.0, 107763.0, 107764.0, 107765.0, 107766.0],       [107767.0, 107768.0, 107769.0, 107770.0, 107771.0, 107772.0]],      [[107773.0, 107774.0, 107775.0, 107776.0, 107777.0, 107778.0],       [107779.0, 107780.0, 107781.0, 107782.0, 107783.0, 107784.0],       [107785.0, 107786.0, 107787.0, 107788.0, 107789.0, 107790.0],       [107791.0, 107792.0, 107793.0, 107794.0, 107795.0, 107796.0],       [107797.0, 107798.0, 107799.0, 107800.0, 107801.0, 107802.0],       [107803.0, 107804.0, 107805.0, 107806.0, 107807.0, 107808.0],       [107809.0, 107810.0, 107811.0, 107812.0, 107813.0, 107814.0]],      [[107815.0, 107816.0, 107817.0, 107818.0, 107819.0, 107820.0],       [107821.0, 107822.0, 107823.0, 107824.0, 107825.0, 107826.0],       [107827.0, 107828.0, 107829.0, 107830.0, 107831.0, 107832.0],       [107833.0, 107834.0, 107835.0, 107836.0, 107837.0, 107838.0],       [107839.0, 107840.0, 107841.0, 107842.0, 107843.0, 107844.0],       [107845.0, 107846.0, 107847.0, 107848.0, 107849.0, 107850.0],       [107851.0, 107852.0, 107853.0, 107854.0, 107855.0, 107856.0]]]],    [[[[107857.0, 107858.0, 107859.0, 107860.0, 107861.0, 107862.0],       [107863.0, 107864.0, 107865.0, 107866.0, 107867.0, 107868.0],       [107869.0, 107870.0, 107871.0, 107872.0, 107873.0, 107874.0],       [107875.0, 107876.0, 107877.0, 107878.0, 107879.0, 107880.0],       [107881.0, 107882.0, 107883.0, 107884.0, 107885.0, 107886.0],       [107887.0, 107888.0, 107889.0, 107890.0, 107891.0, 107892.0],       [107893.0, 107894.0, 107895.0, 107896.0, 107897.0, 107898.0]],      [[107899.0, 107900.0, 107901.0, 107902.0, 107903.0, 107904.0],       [107905.0, 107906.0, 107907.0, 107908.0, 107909.0, 107910.0],       [107911.0, 107912.0, 107913.0, 107914.0, 107915.0, 107916.0],       [107917.0, 107918.0, 107919.0, 107920.0, 107921.0, 107922.0],       [107923.0, 107924.0, 107925.0, 107926.0, 107927.0, 107928.0],       [107929.0, 107930.0, 107931.0, 107932.0, 107933.0, 107934.0],       [107935.0, 107936.0, 107937.0, 107938.0, 107939.0, 107940.0]],      [[107941.0, 107942.0, 107943.0, 107944.0, 107945.0, 107946.0],       [107947.0, 107948.0, 107949.0, 107950.0, 107951.0, 107952.0],       [107953.0, 107954.0, 107955.0, 107956.0, 107957.0, 107958.0],       [107959.0, 107960.0, 107961.0, 107962.0, 107963.0, 107964.0],       [107965.0, 107966.0, 107967.0, 107968.0, 107969.0, 107970.0],       [107971.0, 107972.0, 107973.0, 107974.0, 107975.0, 107976.0],       [107977.0, 107978.0, 107979.0, 107980.0, 107981.0, 107982.0]],      [[107983.0, 107984.0, 107985.0, 107986.0, 107987.0, 107988.0],       [107989.0, 107990.0, 107991.0, 107992.0, 107993.0, 107994.0],       [107995.0, 107996.0, 107997.0, 107998.0, 107999.0, 108000.0],       [108001.0, 108002.0, 108003.0, 108004.0, 108005.0, 108006.0],       [108007.0, 108008.0, 108009.0, 108010.0, 108011.0, 108012.0],       [108013.0, 108014.0, 108015.0, 108016.0, 108017.0, 108018.0],       [108019.0, 108020.0, 108021.0, 108022.0, 108023.0, 108024.0]],      [[108025.0, 108026.0, 108027.0, 108028.0, 108029.0, 108030.0],       [108031.0, 108032.0, 108033.0, 108034.0, 108035.0, 108036.0],       [108037.0, 108038.0, 108039.0, 108040.0, 108041.0, 108042.0],       [108043.0, 108044.0, 108045.0, 108046.0, 108047.0, 108048.0],       [108049.0, 108050.0, 108051.0, 108052.0, 108053.0, 108054.0],       [108055.0, 108056.0, 108057.0, 108058.0, 108059.0, 108060.0],       [108061.0, 108062.0, 108063.0, 108064.0, 108065.0, 108066.0]],      [[108067.0, 108068.0, 108069.0, 108070.0, 108071.0, 108072.0],       [108073.0, 108074.0, 108075.0, 108076.0, 108077.0, 108078.0],       [108079.0, 108080.0, 108081.0, 108082.0, 108083.0, 108084.0],       [108085.0, 108086.0, 108087.0, 108088.0, 108089.0, 108090.0],       [108091.0, 108092.0, 108093.0, 108094.0, 108095.0, 108096.0],       [108097.0, 108098.0, 108099.0, 108100.0, 108101.0, 108102.0],       [108103.0, 108104.0, 108105.0, 108106.0, 108107.0, 108108.0]]],     [[[108109.0, 108110.0, 108111.0, 108112.0, 108113.0, 108114.0],       [108115.0, 108116.0, 108117.0, 108118.0, 108119.0, 108120.0],       [108121.0, 108122.0, 108123.0, 108124.0, 108125.0, 108126.0],       [108127.0, 108128.0, 108129.0, 108130.0, 108131.0, 108132.0],       [108133.0, 108134.0, 108135.0, 108136.0, 108137.0, 108138.0],       [108139.0, 108140.0, 108141.0, 108142.0, 108143.0, 108144.0],       [108145.0, 108146.0, 108147.0, 108148.0, 108149.0, 108150.0]],      [[108151.0, 108152.0, 108153.0, 108154.0, 108155.0, 108156.0],       [108157.0, 108158.0, 108159.0, 108160.0, 108161.0, 108162.0],       [108163.0, 108164.0, 108165.0, 108166.0, 108167.0, 108168.0],       [108169.0, 108170.0, 108171.0, 108172.0, 108173.0, 108174.0],       [108175.0, 108176.0, 108177.0, 108178.0, 108179.0, 108180.0],       [108181.0, 108182.0, 108183.0, 108184.0, 108185.0, 108186.0],       [108187.0, 108188.0, 108189.0, 108190.0, 108191.0, 108192.0]],      [[108193.0, 108194.0, 108195.0, 108196.0, 108197.0, 108198.0],       [108199.0, 108200.0, 108201.0, 108202.0, 108203.0, 108204.0],       [108205.0, 108206.0, 108207.0, 108208.0, 108209.0, 108210.0],       [108211.0, 108212.0, 108213.0, 108214.0, 108215.0, 108216.0],       [108217.0, 108218.0, 108219.0, 108220.0, 108221.0, 108222.0],       [108223.0, 108224.0, 108225.0, 108226.0, 108227.0, 108228.0],       [108229.0, 108230.0, 108231.0, 108232.0, 108233.0, 108234.0]],      [[108235.0, 108236.0, 108237.0, 108238.0, 108239.0, 108240.0],       [108241.0, 108242.0, 108243.0, 108244.0, 108245.0, 108246.0],       [108247.0, 108248.0, 108249.0, 108250.0, 108251.0, 108252.0],       [108253.0, 108254.0, 108255.0, 108256.0, 108257.0, 108258.0],       [108259.0, 108260.0, 108261.0, 108262.0, 108263.0, 108264.0],       [108265.0, 108266.0, 108267.0, 108268.0, 108269.0, 108270.0],       [108271.0, 108272.0, 108273.0, 108274.0, 108275.0, 108276.0]],      [[108277.0, 108278.0, 108279.0, 108280.0, 108281.0, 108282.0],       [108283.0, 108284.0, 108285.0, 108286.0, 108287.0, 108288.0],       [108289.0, 108290.0, 108291.0, 108292.0, 108293.0, 108294.0],       [108295.0, 108296.0, 108297.0, 108298.0, 108299.0, 108300.0],       [108301.0, 108302.0, 108303.0, 108304.0, 108305.0, 108306.0],       [108307.0, 108308.0, 108309.0, 108310.0, 108311.0, 108312.0],       [108313.0, 108314.0, 108315.0, 108316.0, 108317.0, 108318.0]],      [[108319.0, 108320.0, 108321.0, 108322.0, 108323.0, 108324.0],       [108325.0, 108326.0, 108327.0, 108328.0, 108329.0, 108330.0],       [108331.0, 108332.0, 108333.0, 108334.0, 108335.0, 108336.0],       [108337.0, 108338.0, 108339.0, 108340.0, 108341.0, 108342.0],       [108343.0, 108344.0, 108345.0, 108346.0, 108347.0, 108348.0],       [108349.0, 108350.0, 108351.0, 108352.0, 108353.0, 108354.0],       [108355.0, 108356.0, 108357.0, 108358.0, 108359.0, 108360.0]]],     [[[108361.0, 108362.0, 108363.0, 108364.0, 108365.0, 108366.0],       [108367.0, 108368.0, 108369.0, 108370.0, 108371.0, 108372.0],       [108373.0, 108374.0, 108375.0, 108376.0, 108377.0, 108378.0],       [108379.0, 108380.0, 108381.0, 108382.0, 108383.0, 108384.0],       [108385.0, 108386.0, 108387.0, 108388.0, 108389.0, 108390.0],       [108391.0, 108392.0, 108393.0, 108394.0, 108395.0, 108396.0],       [108397.0, 108398.0, 108399.0, 108400.0, 108401.0, 108402.0]],      [[108403.0, 108404.0, 108405.0, 108406.0, 108407.0, 108408.0],       [108409.0, 108410.0, 108411.0, 108412.0, 108413.0, 108414.0],       [108415.0, 108416.0, 108417.0, 108418.0, 108419.0, 108420.0],       [108421.0, 108422.0, 108423.0, 108424.0, 108425.0, 108426.0],       [108427.0, 108428.0, 108429.0, 108430.0, 108431.0, 108432.0],       [108433.0, 108434.0, 108435.0, 108436.0, 108437.0, 108438.0],       [108439.0, 108440.0, 108441.0, 108442.0, 108443.0, 108444.0]],      [[108445.0, 108446.0, 108447.0, 108448.0, 108449.0, 108450.0],       [108451.0, 108452.0, 108453.0, 108454.0, 108455.0, 108456.0],       [108457.0, 108458.0, 108459.0, 108460.0, 108461.0, 108462.0],       [108463.0, 108464.0, 108465.0, 108466.0, 108467.0, 108468.0],       [108469.0, 108470.0, 108471.0, 108472.0, 108473.0, 108474.0],       [108475.0, 108476.0, 108477.0, 108478.0, 108479.0, 108480.0],       [108481.0, 108482.0, 108483.0, 108484.0, 108485.0, 108486.0]],      [[108487.0, 108488.0, 108489.0, 108490.0, 108491.0, 108492.0],       [108493.0, 108494.0, 108495.0, 108496.0, 108497.0, 108498.0],       [108499.0, 108500.0, 108501.0, 108502.0, 108503.0, 108504.0],       [108505.0, 108506.0, 108507.0, 108508.0, 108509.0, 108510.0],       [108511.0, 108512.0, 108513.0, 108514.0, 108515.0, 108516.0],       [108517.0, 108518.0, 108519.0, 108520.0, 108521.0, 108522.0],       [108523.0, 108524.0, 108525.0, 108526.0, 108527.0, 108528.0]],      [[108529.0, 108530.0, 108531.0, 108532.0, 108533.0, 108534.0],       [108535.0, 108536.0, 108537.0, 108538.0, 108539.0, 108540.0],       [108541.0, 108542.0, 108543.0, 108544.0, 108545.0, 108546.0],       [108547.0, 108548.0, 108549.0, 108550.0, 108551.0, 108552.0],       [108553.0, 108554.0, 108555.0, 108556.0, 108557.0, 108558.0],       [108559.0, 108560.0, 108561.0, 108562.0, 108563.0, 108564.0],       [108565.0, 108566.0, 108567.0, 108568.0, 108569.0, 108570.0]],      [[108571.0, 108572.0, 108573.0, 108574.0, 108575.0, 108576.0],       [108577.0, 108578.0, 108579.0, 108580.0, 108581.0, 108582.0],       [108583.0, 108584.0, 108585.0, 108586.0, 108587.0, 108588.0],       [108589.0, 108590.0, 108591.0, 108592.0, 108593.0, 108594.0],       [108595.0, 108596.0, 108597.0, 108598.0, 108599.0, 108600.0],       [108601.0, 108602.0, 108603.0, 108604.0, 108605.0, 108606.0],       [108607.0, 108608.0, 108609.0, 108610.0, 108611.0, 108612.0]]],     [[[108613.0, 108614.0, 108615.0, 108616.0, 108617.0, 108618.0],       [108619.0, 108620.0, 108621.0, 108622.0, 108623.0, 108624.0],       [108625.0, 108626.0, 108627.0, 108628.0, 108629.0, 108630.0],       [108631.0, 108632.0, 108633.0, 108634.0, 108635.0, 108636.0],       [108637.0, 108638.0, 108639.0, 108640.0, 108641.0, 108642.0],       [108643.0, 108644.0, 108645.0, 108646.0, 108647.0, 108648.0],       [108649.0, 108650.0, 108651.0, 108652.0, 108653.0, 108654.0]],      [[108655.0, 108656.0, 108657.0, 108658.0, 108659.0, 108660.0],       [108661.0, 108662.0, 108663.0, 108664.0, 108665.0, 108666.0],       [108667.0, 108668.0, 108669.0, 108670.0, 108671.0, 108672.0],       [108673.0, 108674.0, 108675.0, 108676.0, 108677.0, 108678.0],       [108679.0, 108680.0, 108681.0, 108682.0, 108683.0, 108684.0],       [108685.0, 108686.0, 108687.0, 108688.0, 108689.0, 108690.0],       [108691.0, 108692.0, 108693.0, 108694.0, 108695.0, 108696.0]],      [[108697.0, 108698.0, 108699.0, 108700.0, 108701.0, 108702.0],       [108703.0, 108704.0, 108705.0, 108706.0, 108707.0, 108708.0],       [108709.0, 108710.0, 108711.0, 108712.0, 108713.0, 108714.0],       [108715.0, 108716.0, 108717.0, 108718.0, 108719.0, 108720.0],       [108721.0, 108722.0, 108723.0, 108724.0, 108725.0, 108726.0],       [108727.0, 108728.0, 108729.0, 108730.0, 108731.0, 108732.0],       [108733.0, 108734.0, 108735.0, 108736.0, 108737.0, 108738.0]],      [[108739.0, 108740.0, 108741.0, 108742.0, 108743.0, 108744.0],       [108745.0, 108746.0, 108747.0, 108748.0, 108749.0, 108750.0],       [108751.0, 108752.0, 108753.0, 108754.0, 108755.0, 108756.0],       [108757.0, 108758.0, 108759.0, 108760.0, 108761.0, 108762.0],       [108763.0, 108764.0, 108765.0, 108766.0, 108767.0, 108768.0],       [108769.0, 108770.0, 108771.0, 108772.0, 108773.0, 108774.0],       [108775.0, 108776.0, 108777.0, 108778.0, 108779.0, 108780.0]],      [[108781.0, 108782.0, 108783.0, 108784.0, 108785.0, 108786.0],       [108787.0, 108788.0, 108789.0, 108790.0, 108791.0, 108792.0],       [108793.0, 108794.0, 108795.0, 108796.0, 108797.0, 108798.0],       [108799.0, 108800.0, 108801.0, 108802.0, 108803.0, 108804.0],       [108805.0, 108806.0, 108807.0, 108808.0, 108809.0, 108810.0],       [108811.0, 108812.0, 108813.0, 108814.0, 108815.0, 108816.0],       [108817.0, 108818.0, 108819.0, 108820.0, 108821.0, 108822.0]],      [[108823.0, 108824.0, 108825.0, 108826.0, 108827.0, 108828.0],       [108829.0, 108830.0, 108831.0, 108832.0, 108833.0, 108834.0],       [108835.0, 108836.0, 108837.0, 108838.0, 108839.0, 108840.0],       [108841.0, 108842.0, 108843.0, 108844.0, 108845.0, 108846.0],       [108847.0, 108848.0, 108849.0, 108850.0, 108851.0, 108852.0],       [108853.0, 108854.0, 108855.0, 108856.0, 108857.0, 108858.0],       [108859.0, 108860.0, 108861.0, 108862.0, 108863.0, 108864.0]]]]]]] shape=[3, 6, 6, 4, 6, 7, 6], strides=[36288, 6048, 1008, 252, 42, 6, 1], layout=C (0x1)), I32([2, 3, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[0, 2],  [0, 3],  [0, 2]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2987606745 320186837 2185127505 4141730701 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0],     [6.0, 7.0, 8.0, 9.0, 10.0],     [11.0, 12.0, 13.0, 14.0, 15.0],     [16.0, 17.0, 18.0, 19.0, 20.0],     [21.0, 22.0, 23.0, 24.0, 25.0]],    [[26.0, 27.0, 28.0, 29.0, 30.0],     [31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0],     [41.0, 42.0, 43.0, 44.0, 45.0],     [46.0, 47.0, 48.0, 49.0, 50.0]],    [[51.0, 52.0, 53.0, 54.0, 55.0],     [56.0, 57.0, 58.0, 59.0, 60.0],     [61.0, 62.0, 63.0, 64.0, 65.0],     [66.0, 67.0, 68.0, 69.0, 70.0],     [71.0, 72.0, 73.0, 74.0, 75.0]],    [[76.0, 77.0, 78.0, 79.0, 80.0],     [81.0, 82.0, 83.0, 84.0, 85.0],     [86.0, 87.0, 88.0, 89.0, 90.0],     [91.0, 92.0, 93.0, 94.0, 95.0],     [96.0, 97.0, 98.0, 99.0, 100.0]],    [[101.0, 102.0, 103.0, 104.0, 105.0],     [106.0, 107.0, 108.0, 109.0, 110.0],     [111.0, 112.0, 113.0, 114.0, 115.0],     [116.0, 117.0, 118.0, 119.0, 120.0],     [121.0, 122.0, 123.0, 124.0, 125.0]],    [[126.0, 127.0, 128.0, 129.0, 130.0],     [131.0, 132.0, 133.0, 134.0, 135.0],     [136.0, 137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0, 145.0],     [146.0, 147.0, 148.0, 149.0, 150.0]]],   [[[151.0, 152.0, 153.0, 154.0, 155.0],     [156.0, 157.0, 158.0, 159.0, 160.0],     [161.0, 162.0, 163.0, 164.0, 165.0],     [166.0, 167.0, 168.0, 169.0, 170.0],     [171.0, 172.0, 173.0, 174.0, 175.0]],    [[176.0, 177.0, 178.0, 179.0, 180.0],     [181.0, 182.0, 183.0, 184.0, 185.0],     [186.0, 187.0, 188.0, 189.0, 190.0],     [191.0, 192.0, 193.0, 194.0, 195.0],     [196.0, 197.0, 198.0, 199.0, 200.0]],    [[201.0, 202.0, 203.0, 204.0, 205.0],     [206.0, 207.0, 208.0, 209.0, 210.0],     [211.0, 212.0, 213.0, 214.0, 215.0],     [216.0, 217.0, 218.0, 219.0, 220.0],     [221.0, 222.0, 223.0, 224.0, 225.0]],    [[226.0, 227.0, 228.0, 229.0, 230.0],     [231.0, 232.0, 233.0, 234.0, 235.0],     [236.0, 237.0, 238.0, 239.0, 240.0],     [241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0]],    [[251.0, 252.0, 253.0, 254.0, 255.0],     [256.0, 257.0, 258.0, 259.0, 260.0],     [261.0, 262.0, 263.0, 264.0, 265.0],     [266.0, 267.0, 268.0, 269.0, 270.0],     [271.0, 272.0, 273.0, 274.0, 275.0]],    [[276.0, 277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0, 285.0],     [286.0, 287.0, 288.0, 289.0, 290.0],     [291.0, 292.0, 293.0, 294.0, 295.0],     [296.0, 297.0, 298.0, 299.0, 300.0]]],   [[[301.0, 302.0, 303.0, 304.0, 305.0],     [306.0, 307.0, 308.0, 309.0, 310.0],     [311.0, 312.0, 313.0, 314.0, 315.0],     [316.0, 317.0, 318.0, 319.0, 320.0],     [321.0, 322.0, 323.0, 324.0, 325.0]],    [[326.0, 327.0, 328.0, 329.0, 330.0],     [331.0, 332.0, 333.0, 334.0, 335.0],     [336.0, 337.0, 338.0, 339.0, 340.0],     [341.0, 342.0, 343.0, 344.0, 345.0],     [346.0, 347.0, 348.0, 349.0, 350.0]],    [[351.0, 352.0, 353.0, 354.0, 355.0],     [356.0, 357.0, 358.0, 359.0, 360.0],     [361.0, 362.0, 363.0, 364.0, 365.0],     [366.0, 367.0, 368.0, 369.0, 370.0],     [371.0, 372.0, 373.0, 374.0, 375.0]],    [[376.0, 377.0, 378.0, 379.0, 380.0],     [381.0, 382.0, 383.0, 384.0, 385.0],     [386.0, 387.0, 388.0, 389.0, 390.0],     [391.0, 392.0, 393.0, 394.0, 395.0],     [396.0, 397.0, 398.0, 399.0, 400.0]],    [[401.0, 402.0, 403.0, 404.0, 405.0],     [406.0, 407.0, 408.0, 409.0, 410.0],     [411.0, 412.0, 413.0, 414.0, 415.0],     [416.0, 417.0, 418.0, 419.0, 420.0],     [421.0, 422.0, 423.0, 424.0, 425.0]],    [[426.0, 427.0, 428.0, 429.0, 430.0],     [431.0, 432.0, 433.0, 434.0, 435.0],     [436.0, 437.0, 438.0, 439.0, 440.0],     [441.0, 442.0, 443.0, 444.0, 445.0],     [446.0, 447.0, 448.0, 449.0, 450.0]]],   [[[451.0, 452.0, 453.0, 454.0, 455.0],     [456.0, 457.0, 458.0, 459.0, 460.0],     [461.0, 462.0, 463.0, 464.0, 465.0],     [466.0, 467.0, 468.0, 469.0, 470.0],     [471.0, 472.0, 473.0, 474.0, 475.0]],    [[476.0, 477.0, 478.0, 479.0, 480.0],     [481.0, 482.0, 483.0, 484.0, 485.0],     [486.0, 487.0, 488.0, 489.0, 490.0],     [491.0, 492.0, 493.0, 494.0, 495.0],     [496.0, 497.0, 498.0, 499.0, 500.0]],    [[501.0, 502.0, 503.0, 504.0, 505.0],     [506.0, 507.0, 508.0, 509.0, 510.0],     [511.0, 512.0, 513.0, 514.0, 515.0],     [516.0, 517.0, 518.0, 519.0, 520.0],     [521.0, 522.0, 523.0, 524.0, 525.0]],    [[526.0, 527.0, 528.0, 529.0, 530.0],     [531.0, 532.0, 533.0, 534.0, 535.0],     [536.0, 537.0, 538.0, 539.0, 540.0],     [541.0, 542.0, 543.0, 544.0, 545.0],     [546.0, 547.0, 548.0, 549.0, 550.0]],    [[551.0, 552.0, 553.0, 554.0, 555.0],     [556.0, 557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0, 565.0],     [566.0, 567.0, 568.0, 569.0, 570.0],     [571.0, 572.0, 573.0, 574.0, 575.0]],    [[576.0, 577.0, 578.0, 579.0, 580.0],     [581.0, 582.0, 583.0, 584.0, 585.0],     [586.0, 587.0, 588.0, 589.0, 590.0],     [591.0, 592.0, 593.0, 594.0, 595.0],     [596.0, 597.0, 598.0, 599.0, 600.0]]],   [[[601.0, 602.0, 603.0, 604.0, 605.0],     [606.0, 607.0, 608.0, 609.0, 610.0],     [611.0, 612.0, 613.0, 614.0, 615.0],     [616.0, 617.0, 618.0, 619.0, 620.0],     [621.0, 622.0, 623.0, 624.0, 625.0]],    [[626.0, 627.0, 628.0, 629.0, 630.0],     [631.0, 632.0, 633.0, 634.0, 635.0],     [636.0, 637.0, 638.0, 639.0, 640.0],     [641.0, 642.0, 643.0, 644.0, 645.0],     [646.0, 647.0, 648.0, 649.0, 650.0]],    [[651.0, 652.0, 653.0, 654.0, 655.0],     [656.0, 657.0, 658.0, 659.0, 660.0],     [661.0, 662.0, 663.0, 664.0, 665.0],     [666.0, 667.0, 668.0, 669.0, 670.0],     [671.0, 672.0, 673.0, 674.0, 675.0]],    [[676.0, 677.0, 678.0, 679.0, 680.0],     [681.0, 682.0, 683.0, 684.0, 685.0],     [686.0, 687.0, 688.0, 689.0, 690.0],     [691.0, 692.0, 693.0, 694.0, 695.0],     [696.0, 697.0, 698.0, 699.0, 700.0]],    [[701.0, 702.0, 703.0, 704.0, 705.0],     [706.0, 707.0, 708.0, 709.0, 710.0],     [711.0, 712.0, 713.0, 714.0, 715.0],     [716.0, 717.0, 718.0, 719.0, 720.0],     [721.0, 722.0, 723.0, 724.0, 725.0]],    [[726.0, 727.0, 728.0, 729.0, 730.0],     [731.0, 732.0, 733.0, 734.0, 735.0],     [736.0, 737.0, 738.0, 739.0, 740.0],     [741.0, 742.0, 743.0, 744.0, 745.0],     [746.0, 747.0, 748.0, 749.0, 750.0]]],   [[[751.0, 752.0, 753.0, 754.0, 755.0],     [756.0, 757.0, 758.0, 759.0, 760.0],     [761.0, 762.0, 763.0, 764.0, 765.0],     [766.0, 767.0, 768.0, 769.0, 770.0],     [771.0, 772.0, 773.0, 774.0, 775.0]],    [[776.0, 777.0, 778.0, 779.0, 780.0],     [781.0, 782.0, 783.0, 784.0, 785.0],     [786.0, 787.0, 788.0, 789.0, 790.0],     [791.0, 792.0, 793.0, 794.0, 795.0],     [796.0, 797.0, 798.0, 799.0, 800.0]],    [[801.0, 802.0, 803.0, 804.0, 805.0],     [806.0, 807.0, 808.0, 809.0, 810.0],     [811.0, 812.0, 813.0, 814.0, 815.0],     [816.0, 817.0, 818.0, 819.0, 820.0],     [821.0, 822.0, 823.0, 824.0, 825.0]],    [[826.0, 827.0, 828.0, 829.0, 830.0],     [831.0, 832.0, 833.0, 834.0, 835.0],     [836.0, 837.0, 838.0, 839.0, 840.0],     [841.0, 842.0, 843.0, 844.0, 845.0],     [846.0, 847.0, 848.0, 849.0, 850.0]],    [[851.0, 852.0, 853.0, 854.0, 855.0],     [856.0, 857.0, 858.0, 859.0, 860.0],     [861.0, 862.0, 863.0, 864.0, 865.0],     [866.0, 867.0, 868.0, 869.0, 870.0],     [871.0, 872.0, 873.0, 874.0, 875.0]],    [[876.0, 877.0, 878.0, 879.0, 880.0],     [881.0, 882.0, 883.0, 884.0, 885.0],     [886.0, 887.0, 888.0, 889.0, 890.0],     [891.0, 892.0, 893.0, 894.0, 895.0],     [896.0, 897.0, 898.0, 899.0, 900.0]]]],  [[[[901.0, 902.0, 903.0, 904.0, 905.0],     [906.0, 907.0, 908.0, 909.0, 910.0],     [911.0, 912.0, 913.0, 914.0, 915.0],     [916.0, 917.0, 918.0, 919.0, 920.0],     [921.0, 922.0, 923.0, 924.0, 925.0]],    [[926.0, 927.0, 928.0, 929.0, 930.0],     [931.0, 932.0, 933.0, 934.0, 935.0],     [936.0, 937.0, 938.0, 939.0, 940.0],     [941.0, 942.0, 943.0, 944.0, 945.0],     [946.0, 947.0, 948.0, 949.0, 950.0]],    [[951.0, 952.0, 953.0, 954.0, 955.0],     [956.0, 957.0, 958.0, 959.0, 960.0],     [961.0, 962.0, 963.0, 964.0, 965.0],     [966.0, 967.0, 968.0, 969.0, 970.0],     [971.0, 972.0, 973.0, 974.0, 975.0]],    [[976.0, 977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0, 985.0],     [986.0, 987.0, 988.0, 989.0, 990.0],     [991.0, 992.0, 993.0, 994.0, 995.0],     [996.0, 997.0, 998.0, 999.0, 1000.0]],    [[1001.0, 1002.0, 1003.0, 1004.0, 1005.0],     [1006.0, 1007.0, 1008.0, 1009.0, 1010.0],     [1011.0, 1012.0, 1013.0, 1014.0, 1015.0],     [1016.0, 1017.0, 1018.0, 1019.0, 1020.0],     [1021.0, 1022.0, 1023.0, 1024.0, 1025.0]],    [[1026.0, 1027.0, 1028.0, 1029.0, 1030.0],     [1031.0, 1032.0, 1033.0, 1034.0, 1035.0],     [1036.0, 1037.0, 1038.0, 1039.0, 1040.0],     [1041.0, 1042.0, 1043.0, 1044.0, 1045.0],     [1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]],   [[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0],     [1056.0, 1057.0, 1058.0, 1059.0, 1060.0],     [1061.0, 1062.0, 1063.0, 1064.0, 1065.0],     [1066.0, 1067.0, 1068.0, 1069.0, 1070.0],     [1071.0, 1072.0, 1073.0, 1074.0, 1075.0]],    [[1076.0, 1077.0, 1078.0, 1079.0, 1080.0],     [1081.0, 1082.0, 1083.0, 1084.0, 1085.0],     [1086.0, 1087.0, 1088.0, 1089.0, 1090.0],     [1091.0, 1092.0, 1093.0, 1094.0, 1095.0],     [1096.0, 1097.0, 1098.0, 1099.0, 1100.0]],    [[1101.0, 1102.0, 1103.0, 1104.0, 1105.0],     [1106.0, 1107.0, 1108.0, 1109.0, 1110.0],     [1111.0, 1112.0, 1113.0, 1114.0, 1115.0],     [1116.0, 1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0, 1125.0]],    [[1126.0, 1127.0, 1128.0, 1129.0, 1130.0],     [1131.0, 1132.0, 1133.0, 1134.0, 1135.0],     [1136.0, 1137.0, 1138.0, 1139.0, 1140.0],     [1141.0, 1142.0, 1143.0, 1144.0, 1145.0],     [1146.0, 1147.0, 1148.0, 1149.0, 1150.0]],    [[1151.0, 1152.0, 1153.0, 1154.0, 1155.0],     [1156.0, 1157.0, 1158.0, 1159.0, 1160.0],     [1161.0, 1162.0, 1163.0, 1164.0, 1165.0],     [1166.0, 1167.0, 1168.0, 1169.0, 1170.0],     [1171.0, 1172.0, 1173.0, 1174.0, 1175.0]],    [[1176.0, 1177.0, 1178.0, 1179.0, 1180.0],     [1181.0, 1182.0, 1183.0, 1184.0, 1185.0],     [1186.0, 1187.0, 1188.0, 1189.0, 1190.0],     [1191.0, 1192.0, 1193.0, 1194.0, 1195.0],     [1196.0, 1197.0, 1198.0, 1199.0, 1200.0]]],   [[[1201.0, 1202.0, 1203.0, 1204.0, 1205.0],     [1206.0, 1207.0, 1208.0, 1209.0, 1210.0],     [1211.0, 1212.0, 1213.0, 1214.0, 1215.0],     [1216.0, 1217.0, 1218.0, 1219.0, 1220.0],     [1221.0, 1222.0, 1223.0, 1224.0, 1225.0]],    [[1226.0, 1227.0, 1228.0, 1229.0, 1230.0],     [1231.0, 1232.0, 1233.0, 1234.0, 1235.0],     [1236.0, 1237.0, 1238.0, 1239.0, 1240.0],     [1241.0, 1242.0, 1243.0, 1244.0, 1245.0],     [1246.0, 1247.0, 1248.0, 1249.0, 1250.0]],    [[1251.0, 1252.0, 1253.0, 1254.0, 1255.0],     [1256.0, 1257.0, 1258.0, 1259.0, 1260.0],     [1261.0, 1262.0, 1263.0, 1264.0, 1265.0],     [1266.0, 1267.0, 1268.0, 1269.0, 1270.0],     [1271.0, 1272.0, 1273.0, 1274.0, 1275.0]],    [[1276.0, 1277.0, 1278.0, 1279.0, 1280.0],     [1281.0, 1282.0, 1283.0, 1284.0, 1285.0],     [1286.0, 1287.0, 1288.0, 1289.0, 1290.0],     [1291.0, 1292.0, 1293.0, 1294.0, 1295.0],     [1296.0, 1297.0, 1298.0, 1299.0, 1300.0]],    [[1301.0, 1302.0, 1303.0, 1304.0, 1305.0],     [1306.0, 1307.0, 1308.0, 1309.0, 1310.0],     [1311.0, 1312.0, 1313.0, 1314.0, 1315.0],     [1316.0, 1317.0, 1318.0, 1319.0, 1320.0],     [1321.0, 1322.0, 1323.0, 1324.0, 1325.0]],    [[1326.0, 1327.0, 1328.0, 1329.0, 1330.0],     [1331.0, 1332.0, 1333.0, 1334.0, 1335.0],     [1336.0, 1337.0, 1338.0, 1339.0, 1340.0],     [1341.0, 1342.0, 1343.0, 1344.0, 1345.0],     [1346.0, 1347.0, 1348.0, 1349.0, 1350.0]]],   [[[1351.0, 1352.0, 1353.0, 1354.0, 1355.0],     [1356.0, 1357.0, 1358.0, 1359.0, 1360.0],     [1361.0, 1362.0, 1363.0, 1364.0, 1365.0],     [1366.0, 1367.0, 1368.0, 1369.0, 1370.0],     [1371.0, 1372.0, 1373.0, 1374.0, 1375.0]],    [[1376.0, 1377.0, 1378.0, 1379.0, 1380.0],     [1381.0, 1382.0, 1383.0, 1384.0, 1385.0],     [1386.0, 1387.0, 1388.0, 1389.0, 1390.0],     [1391.0, 1392.0, 1393.0, 1394.0, 1395.0],     [1396.0, 1397.0, 1398.0, 1399.0, 1400.0]],    [[1401.0, 1402.0, 1403.0, 1404.0, 1405.0],     [1406.0, 1407.0, 1408.0, 1409.0, 1410.0],     [1411.0, 1412.0, 1413.0, 1414.0, 1415.0],     [1416.0, 1417.0, 1418.0, 1419.0, 1420.0],     [1421.0, 1422.0, 1423.0, 1424.0, 1425.0]],    [[1426.0, 1427.0, 1428.0, 1429.0, 1430.0],     [1431.0, 1432.0, 1433.0, 1434.0, 1435.0],     [1436.0, 1437.0, 1438.0, 1439.0, 1440.0],     [1441.0, 1442.0, 1443.0, 1444.0, 1445.0],     [1446.0, 1447.0, 1448.0, 1449.0, 1450.0]],    [[1451.0, 1452.0, 1453.0, 1454.0, 1455.0],     [1456.0, 1457.0, 1458.0, 1459.0, 1460.0],     [1461.0, 1462.0, 1463.0, 1464.0, 1465.0],     [1466.0, 1467.0, 1468.0, 1469.0, 1470.0],     [1471.0, 1472.0, 1473.0, 1474.0, 1475.0]],    [[1476.0, 1477.0, 1478.0, 1479.0, 1480.0],     [1481.0, 1482.0, 1483.0, 1484.0, 1485.0],     [1486.0, 1487.0, 1488.0, 1489.0, 1490.0],     [1491.0, 1492.0, 1493.0, 1494.0, 1495.0],     [1496.0, 1497.0, 1498.0, 1499.0, 1500.0]]],   [[[1501.0, 1502.0, 1503.0, 1504.0, 1505.0],     [1506.0, 1507.0, 1508.0, 1509.0, 1510.0],     [1511.0, 1512.0, 1513.0, 1514.0, 1515.0],     [1516.0, 1517.0, 1518.0, 1519.0, 1520.0],     [1521.0, 1522.0, 1523.0, 1524.0, 1525.0]],    [[1526.0, 1527.0, 1528.0, 1529.0, 1530.0],     [1531.0, 1532.0, 1533.0, 1534.0, 1535.0],     [1536.0, 1537.0, 1538.0, 1539.0, 1540.0],     [1541.0, 1542.0, 1543.0, 1544.0, 1545.0],     [1546.0, 1547.0, 1548.0, 1549.0, 1550.0]],    [[1551.0, 1552.0, 1553.0, 1554.0, 1555.0],     [1556.0, 1557.0, 1558.0, 1559.0, 1560.0],     [1561.0, 1562.0, 1563.0, 1564.0, 1565.0],     [1566.0, 1567.0, 1568.0, 1569.0, 1570.0],     [1571.0, 1572.0, 1573.0, 1574.0, 1575.0]],    [[1576.0, 1577.0, 1578.0, 1579.0, 1580.0],     [1581.0, 1582.0, 1583.0, 1584.0, 1585.0],     [1586.0, 1587.0, 1588.0, 1589.0, 1590.0],     [1591.0, 1592.0, 1593.0, 1594.0, 1595.0],     [1596.0, 1597.0, 1598.0, 1599.0, 1600.0]],    [[1601.0, 1602.0, 1603.0, 1604.0, 1605.0],     [1606.0, 1607.0, 1608.0, 1609.0, 1610.0],     [1611.0, 1612.0, 1613.0, 1614.0, 1615.0],     [1616.0, 1617.0, 1618.0, 1619.0, 1620.0],     [1621.0, 1622.0, 1623.0, 1624.0, 1625.0]],    [[1626.0, 1627.0, 1628.0, 1629.0, 1630.0],     [1631.0, 1632.0, 1633.0, 1634.0, 1635.0],     [1636.0, 1637.0, 1638.0, 1639.0, 1640.0],     [1641.0, 1642.0, 1643.0, 1644.0, 1645.0],     [1646.0, 1647.0, 1648.0, 1649.0, 1650.0]]],   [[[1651.0, 1652.0, 1653.0, 1654.0, 1655.0],     [1656.0, 1657.0, 1658.0, 1659.0, 1660.0],     [1661.0, 1662.0, 1663.0, 1664.0, 1665.0],     [1666.0, 1667.0, 1668.0, 1669.0, 1670.0],     [1671.0, 1672.0, 1673.0, 1674.0, 1675.0]],    [[1676.0, 1677.0, 1678.0, 1679.0, 1680.0],     [1681.0, 1682.0, 1683.0, 1684.0, 1685.0],     [1686.0, 1687.0, 1688.0, 1689.0, 1690.0],     [1691.0, 1692.0, 1693.0, 1694.0, 1695.0],     [1696.0, 1697.0, 1698.0, 1699.0, 1700.0]],    [[1701.0, 1702.0, 1703.0, 1704.0, 1705.0],     [1706.0, 1707.0, 1708.0, 1709.0, 1710.0],     [1711.0, 1712.0, 1713.0, 1714.0, 1715.0],     [1716.0, 1717.0, 1718.0, 1719.0, 1720.0],     [1721.0, 1722.0, 1723.0, 1724.0, 1725.0]],    [[1726.0, 1727.0, 1728.0, 1729.0, 1730.0],     [1731.0, 1732.0, 1733.0, 1734.0, 1735.0],     [1736.0, 1737.0, 1738.0, 1739.0, 1740.0],     [1741.0, 1742.0, 1743.0, 1744.0, 1745.0],     [1746.0, 1747.0, 1748.0, 1749.0, 1750.0]],    [[1751.0, 1752.0, 1753.0, 1754.0, 1755.0],     [1756.0, 1757.0, 1758.0, 1759.0, 1760.0],     [1761.0, 1762.0, 1763.0, 1764.0, 1765.0],     [1766.0, 1767.0, 1768.0, 1769.0, 1770.0],     [1771.0, 1772.0, 1773.0, 1774.0, 1775.0]],    [[1776.0, 1777.0, 1778.0, 1779.0, 1780.0],     [1781.0, 1782.0, 1783.0, 1784.0, 1785.0],     [1786.0, 1787.0, 1788.0, 1789.0, 1790.0],     [1791.0, 1792.0, 1793.0, 1794.0, 1795.0],     [1796.0, 1797.0, 1798.0, 1799.0, 1800.0]]]],  [[[[1801.0, 1802.0, 1803.0, 1804.0, 1805.0],     [1806.0, 1807.0, 1808.0, 1809.0, 1810.0],     [1811.0, 1812.0, 1813.0, 1814.0, 1815.0],     [1816.0, 1817.0, 1818.0, 1819.0, 1820.0],     [1821.0, 1822.0, 1823.0, 1824.0, 1825.0]],    [[1826.0, 1827.0, 1828.0, 1829.0, 1830.0],     [1831.0, 1832.0, 1833.0, 1834.0, 1835.0],     [1836.0, 1837.0, 1838.0, 1839.0, 1840.0],     [1841.0, 1842.0, 1843.0, 1844.0, 1845.0],     [1846.0, 1847.0, 1848.0, 1849.0, 1850.0]],    [[1851.0, 1852.0, 1853.0, 1854.0, 1855.0],     [1856.0, 1857.0, 1858.0, 1859.0, 1860.0],     [1861.0, 1862.0, 1863.0, 1864.0, 1865.0],     [1866.0, 1867.0, 1868.0, 1869.0, 1870.0],     [1871.0, 1872.0, 1873.0, 1874.0, 1875.0]],    [[1876.0, 1877.0, 1878.0, 1879.0, 1880.0],     [1881.0, 1882.0, 1883.0, 1884.0, 1885.0],     [1886.0, 1887.0, 1888.0, 1889.0, 1890.0],     [1891.0, 1892.0, 1893.0, 1894.0, 1895.0],     [1896.0, 1897.0, 1898.0, 1899.0, 1900.0]],    [[1901.0, 1902.0, 1903.0, 1904.0, 1905.0],     [1906.0, 1907.0, 1908.0, 1909.0, 1910.0],     [1911.0, 1912.0, 1913.0, 1914.0, 1915.0],     [1916.0, 1917.0, 1918.0, 1919.0, 1920.0],     [1921.0, 1922.0, 1923.0, 1924.0, 1925.0]],    [[1926.0, 1927.0, 1928.0, 1929.0, 1930.0],     [1931.0, 1932.0, 1933.0, 1934.0, 1935.0],     [1936.0, 1937.0, 1938.0, 1939.0, 1940.0],     [1941.0, 1942.0, 1943.0, 1944.0, 1945.0],     [1946.0, 1947.0, 1948.0, 1949.0, 1950.0]]],   [[[1951.0, 1952.0, 1953.0, 1954.0, 1955.0],     [1956.0, 1957.0, 1958.0, 1959.0, 1960.0],     [1961.0, 1962.0, 1963.0, 1964.0, 1965.0],     [1966.0, 1967.0, 1968.0, 1969.0, 1970.0],     [1971.0, 1972.0, 1973.0, 1974.0, 1975.0]],    [[1976.0, 1977.0, 1978.0, 1979.0, 1980.0],     [1981.0, 1982.0, 1983.0, 1984.0, 1985.0],     [1986.0, 1987.0, 1988.0, 1989.0, 1990.0],     [1991.0, 1992.0, 1993.0, 1994.0, 1995.0],     [1996.0, 1997.0, 1998.0, 1999.0, 2000.0]],    [[2001.0, 2002.0, 2003.0, 2004.0, 2005.0],     [2006.0, 2007.0, 2008.0, 2009.0, 2010.0],     [2011.0, 2012.0, 2013.0, 2014.0, 2015.0],     [2016.0, 2017.0, 2018.0, 2019.0, 2020.0],     [2021.0, 2022.0, 2023.0, 2024.0, 2025.0]],    [[2026.0, 2027.0, 2028.0, 2029.0, 2030.0],     [2031.0, 2032.0, 2033.0, 2034.0, 2035.0],     [2036.0, 2037.0, 2038.0, 2039.0, 2040.0],     [2041.0, 2042.0, 2043.0, 2044.0, 2045.0],     [2046.0, 2047.0, 2048.0, 2049.0, 2050.0]],    [[2051.0, 2052.0, 2053.0, 2054.0, 2055.0],     [2056.0, 2057.0, 2058.0, 2059.0, 2060.0],     [2061.0, 2062.0, 2063.0, 2064.0, 2065.0],     [2066.0, 2067.0, 2068.0, 2069.0, 2070.0],     [2071.0, 2072.0, 2073.0, 2074.0, 2075.0]],    [[2076.0, 2077.0, 2078.0, 2079.0, 2080.0],     [2081.0, 2082.0, 2083.0, 2084.0, 2085.0],     [2086.0, 2087.0, 2088.0, 2089.0, 2090.0],     [2091.0, 2092.0, 2093.0, 2094.0, 2095.0],     [2096.0, 2097.0, 2098.0, 2099.0, 2100.0]]],   [[[2101.0, 2102.0, 2103.0, 2104.0, 2105.0],     [2106.0, 2107.0, 2108.0, 2109.0, 2110.0],     [2111.0, 2112.0, 2113.0, 2114.0, 2115.0],     [2116.0, 2117.0, 2118.0, 2119.0, 2120.0],     [2121.0, 2122.0, 2123.0, 2124.0, 2125.0]],    [[2126.0, 2127.0, 2128.0, 2129.0, 2130.0],     [2131.0, 2132.0, 2133.0, 2134.0, 2135.0],     [2136.0, 2137.0, 2138.0, 2139.0, 2140.0],     [2141.0, 2142.0, 2143.0, 2144.0, 2145.0],     [2146.0, 2147.0, 2148.0, 2149.0, 2150.0]],    [[2151.0, 2152.0, 2153.0, 2154.0, 2155.0],     [2156.0, 2157.0, 2158.0, 2159.0, 2160.0],     [2161.0, 2162.0, 2163.0, 2164.0, 2165.0],     [2166.0, 2167.0, 2168.0, 2169.0, 2170.0],     [2171.0, 2172.0, 2173.0, 2174.0, 2175.0]],    [[2176.0, 2177.0, 2178.0, 2179.0, 2180.0],     [2181.0, 2182.0, 2183.0, 2184.0, 2185.0],     [2186.0, 2187.0, 2188.0, 2189.0, 2190.0],     [2191.0, 2192.0, 2193.0, 2194.0, 2195.0],     [2196.0, 2197.0, 2198.0, 2199.0, 2200.0]],    [[2201.0, 2202.0, 2203.0, 2204.0, 2205.0],     [2206.0, 2207.0, 2208.0, 2209.0, 2210.0],     [2211.0, 2212.0, 2213.0, 2214.0, 2215.0],     [2216.0, 2217.0, 2218.0, 2219.0, 2220.0],     [2221.0, 2222.0, 2223.0, 2224.0, 2225.0]],    [[2226.0, 2227.0, 2228.0, 2229.0, 2230.0],     [2231.0, 2232.0, 2233.0, 2234.0, 2235.0],     [2236.0, 2237.0, 2238.0, 2239.0, 2240.0],     [2241.0, 2242.0, 2243.0, 2244.0, 2245.0],     [2246.0, 2247.0, 2248.0, 2249.0, 2250.0]]],   [[[2251.0, 2252.0, 2253.0, 2254.0, 2255.0],     [2256.0, 2257.0, 2258.0, 2259.0, 2260.0],     [2261.0, 2262.0, 2263.0, 2264.0, 2265.0],     [2266.0, 2267.0, 2268.0, 2269.0, 2270.0],     [2271.0, 2272.0, 2273.0, 2274.0, 2275.0]],    [[2276.0, 2277.0, 2278.0, 2279.0, 2280.0],     [2281.0, 2282.0, 2283.0, 2284.0, 2285.0],     [2286.0, 2287.0, 2288.0, 2289.0, 2290.0],     [2291.0, 2292.0, 2293.0, 2294.0, 2295.0],     [2296.0, 2297.0, 2298.0, 2299.0, 2300.0]],    [[2301.0, 2302.0, 2303.0, 2304.0, 2305.0],     [2306.0, 2307.0, 2308.0, 2309.0, 2310.0],     [2311.0, 2312.0, 2313.0, 2314.0, 2315.0],     [2316.0, 2317.0, 2318.0, 2319.0, 2320.0],     [2321.0, 2322.0, 2323.0, 2324.0, 2325.0]],    [[2326.0, 2327.0, 2328.0, 2329.0, 2330.0],     [2331.0, 2332.0, 2333.0, 2334.0, 2335.0],     [2336.0, 2337.0, 2338.0, 2339.0, 2340.0],     [2341.0, 2342.0, 2343.0, 2344.0, 2345.0],     [2346.0, 2347.0, 2348.0, 2349.0, 2350.0]],    [[2351.0, 2352.0, 2353.0, 2354.0, 2355.0],     [2356.0, 2357.0, 2358.0, 2359.0, 2360.0],     [2361.0, 2362.0, 2363.0, 2364.0, 2365.0],     [2366.0, 2367.0, 2368.0, 2369.0, 2370.0],     [2371.0, 2372.0, 2373.0, 2374.0, 2375.0]],    [[2376.0, 2377.0, 2378.0, 2379.0, 2380.0],     [2381.0, 2382.0, 2383.0, 2384.0, 2385.0],     [2386.0, 2387.0, 2388.0, 2389.0, 2390.0],     [2391.0, 2392.0, 2393.0, 2394.0, 2395.0],     [2396.0, 2397.0, 2398.0, 2399.0, 2400.0]]],   [[[2401.0, 2402.0, 2403.0, 2404.0, 2405.0],     [2406.0, 2407.0, 2408.0, 2409.0, 2410.0],     [2411.0, 2412.0, 2413.0, 2414.0, 2415.0],     [2416.0, 2417.0, 2418.0, 2419.0, 2420.0],     [2421.0, 2422.0, 2423.0, 2424.0, 2425.0]],    [[2426.0, 2427.0, 2428.0, 2429.0, 2430.0],     [2431.0, 2432.0, 2433.0, 2434.0, 2435.0],     [2436.0, 2437.0, 2438.0, 2439.0, 2440.0],     [2441.0, 2442.0, 2443.0, 2444.0, 2445.0],     [2446.0, 2447.0, 2448.0, 2449.0, 2450.0]],    [[2451.0, 2452.0, 2453.0, 2454.0, 2455.0],     [2456.0, 2457.0, 2458.0, 2459.0, 2460.0],     [2461.0, 2462.0, 2463.0, 2464.0, 2465.0],     [2466.0, 2467.0, 2468.0, 2469.0, 2470.0],     [2471.0, 2472.0, 2473.0, 2474.0, 2475.0]],    [[2476.0, 2477.0, 2478.0, 2479.0, 2480.0],     [2481.0, 2482.0, 2483.0, 2484.0, 2485.0],     [2486.0, 2487.0, 2488.0, 2489.0, 2490.0],     [2491.0, 2492.0, 2493.0, 2494.0, 2495.0],     [2496.0, 2497.0, 2498.0, 2499.0, 2500.0]],    [[2501.0, 2502.0, 2503.0, 2504.0, 2505.0],     [2506.0, 2507.0, 2508.0, 2509.0, 2510.0],     [2511.0, 2512.0, 2513.0, 2514.0, 2515.0],     [2516.0, 2517.0, 2518.0, 2519.0, 2520.0],     [2521.0, 2522.0, 2523.0, 2524.0, 2525.0]],    [[2526.0, 2527.0, 2528.0, 2529.0, 2530.0],     [2531.0, 2532.0, 2533.0, 2534.0, 2535.0],     [2536.0, 2537.0, 2538.0, 2539.0, 2540.0],     [2541.0, 2542.0, 2543.0, 2544.0, 2545.0],     [2546.0, 2547.0, 2548.0, 2549.0, 2550.0]]],   [[[2551.0, 2552.0, 2553.0, 2554.0, 2555.0],     [2556.0, 2557.0, 2558.0, 2559.0, 2560.0],     [2561.0, 2562.0, 2563.0, 2564.0, 2565.0],     [2566.0, 2567.0, 2568.0, 2569.0, 2570.0],     [2571.0, 2572.0, 2573.0, 2574.0, 2575.0]],    [[2576.0, 2577.0, 2578.0, 2579.0, 2580.0],     [2581.0, 2582.0, 2583.0, 2584.0, 2585.0],     [2586.0, 2587.0, 2588.0, 2589.0, 2590.0],     [2591.0, 2592.0, 2593.0, 2594.0, 2595.0],     [2596.0, 2597.0, 2598.0, 2599.0, 2600.0]],    [[2601.0, 2602.0, 2603.0, 2604.0, 2605.0],     [2606.0, 2607.0, 2608.0, 2609.0, 2610.0],     [2611.0, 2612.0, 2613.0, 2614.0, 2615.0],     [2616.0, 2617.0, 2618.0, 2619.0, 2620.0],     [2621.0, 2622.0, 2623.0, 2624.0, 2625.0]],    [[2626.0, 2627.0, 2628.0, 2629.0, 2630.0],     [2631.0, 2632.0, 2633.0, 2634.0, 2635.0],     [2636.0, 2637.0, 2638.0, 2639.0, 2640.0],     [2641.0, 2642.0, 2643.0, 2644.0, 2645.0],     [2646.0, 2647.0, 2648.0, 2649.0, 2650.0]],    [[2651.0, 2652.0, 2653.0, 2654.0, 2655.0],     [2656.0, 2657.0, 2658.0, 2659.0, 2660.0],     [2661.0, 2662.0, 2663.0, 2664.0, 2665.0],     [2666.0, 2667.0, 2668.0, 2669.0, 2670.0],     [2671.0, 2672.0, 2673.0, 2674.0, 2675.0]],    [[2676.0, 2677.0, 2678.0, 2679.0, 2680.0],     [2681.0, 2682.0, 2683.0, 2684.0, 2685.0],     [2686.0, 2687.0, 2688.0, 2689.0, 2690.0],     [2691.0, 2692.0, 2693.0, 2694.0, 2695.0],     [2696.0, 2697.0, 2698.0, 2699.0, 2700.0]]]]] shape=[3, 6, 6, 5, 5], strides=[900, 150, 25, 5, 1], layout=C (0x1)), I32([3, 2, 2] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 3],  [0, 2],  [3, 2]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 1690712456 1439893044 2026898631 1081487774 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],     [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],     [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],     [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],     [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],    [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],     [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],     [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],     [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],     [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],     [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],    [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],     [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],     [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],     [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],     [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],     [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0]]],   [[[127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],     [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],     [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],     [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],     [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],    [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],     [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],     [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],     [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],     [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],     [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],    [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],     [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],     [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],     [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],     [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]]],   [[[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],     [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],     [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],     [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],     [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0]],    [[295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],     [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],     [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],     [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],     [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],     [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],    [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],     [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],     [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],     [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],     [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],     [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0]]],   [[[379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],     [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],     [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],     [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],     [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],     [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],    [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],     [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],     [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],     [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],     [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],     [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],    [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],     [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],     [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],     [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],     [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],     [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]]],   [[[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],     [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],     [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],     [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],     [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],     [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],    [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],     [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],     [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],     [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],     [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]],    [[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],     [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],     [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],     [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],     [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],     [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]]]],  [[[[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],     [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0],     [645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],     [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],     [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],     [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],    [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],     [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],     [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],     [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0],     [701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],     [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0]],    [[715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],     [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0],     [729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],     [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],     [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],     [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]]],   [[[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],     [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0],     [771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],     [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0],     [785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],     [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0]],    [[799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],     [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0],     [813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],     [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],     [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],     [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],    [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],     [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],     [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],     [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0],     [869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],     [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0]]],   [[[883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],     [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0],     [897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],     [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0],     [911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],     [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],    [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],     [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],     [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],     [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0],     [953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],     [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0]],    [[967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],     [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],     [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],     [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],     [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]]],   [[[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],     [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],     [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],     [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0],     [1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],     [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]],    [[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],     [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0],     [1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],     [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],     [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],     [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],    [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],     [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],     [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],     [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],     [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0]]],   [[[1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],     [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0],     [1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],     [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],     [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],     [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]],    [[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],     [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0],     [1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],     [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0],     [1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],     [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0]],    [[1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],     [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0],     [1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],     [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],     [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],     [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]]]] shape=[2, 5, 3, 6, 7], strides=[630, 126, 42, 7, 1], layout=C (0x1)), I32([1, 3, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [1, 2],  [3, 3]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 1625354339 1507447098 2858653793 3926171498 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],       [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],       [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],       [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],       [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0]],      [[36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],       [43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],       [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],       [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],       [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0]]],     [[[71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],       [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0],       [85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],       [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],       [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0]],      [[106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],       [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],       [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0],       [127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],       [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0]]],     [[[141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],       [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],       [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],       [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0],       [169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0]],      [[176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],       [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],       [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],       [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],       [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]]],     [[[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],       [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],       [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],       [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],       [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0]],      [[246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0],       [253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],       [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],       [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],       [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0]]],     [[[281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],       [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0],       [295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],       [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],       [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0]],      [[316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],       [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],       [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0],       [337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],       [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0]]],     [[[351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],       [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],       [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],       [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0],       [379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0]],      [[386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],       [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],       [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],       [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],       [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]]],     [[[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],       [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],       [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],       [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],       [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0]],      [[456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0],       [463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],       [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],       [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],       [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0]]]],    [[[[491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],       [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0],       [505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],       [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],       [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0]],      [[526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],       [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],       [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0],       [547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],       [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0]]],     [[[561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],       [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],       [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],       [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0],       [589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0]],      [[596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],       [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],       [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],       [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],       [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]]],     [[[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],       [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0],       [645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],       [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],       [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0]],      [[666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0],       [673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],       [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],       [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],       [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0]]],     [[[701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],       [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0],       [715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],       [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0],       [729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0]],      [[736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],       [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],       [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0],       [757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],       [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0]]],     [[[771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],       [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0],       [785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],       [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0],       [799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0]],      [[806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0],       [813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],       [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],       [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],       [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]]],     [[[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],       [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],       [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],       [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0],       [869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0]],      [[876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0],       [883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],       [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0],       [897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],       [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0]]],     [[[911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],       [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0],       [925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],       [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],       [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0]],      [[946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0],       [953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],       [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0],       [967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],       [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0]]]]],   [[[[[981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],       [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],       [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],       [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0],       [1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0]],      [[1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],       [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],       [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0],       [1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],       [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]]],     [[[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],       [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0],       [1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],       [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],       [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0]],      [[1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0],       [1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],       [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],       [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],       [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0]]],     [[[1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],       [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0],       [1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],       [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0],       [1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0]],      [[1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],       [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],       [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0],       [1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],       [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0]]],     [[[1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],       [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0],       [1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],       [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0],       [1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0]],      [[1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0],       [1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],       [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],       [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],       [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]],     [[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],       [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],       [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],       [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0],       [1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0]],      [[1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0],       [1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],       [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0],       [1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],       [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0]]],     [[[1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],       [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0],       [1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],       [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],       [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0]],      [[1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0],       [1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],       [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0],       [1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],       [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0]]],     [[[1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],       [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],       [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],       [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0],       [1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0]],      [[1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],       [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],       [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0],       [1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],       [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]]]],    [[[[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],       [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0],       [1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],       [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],       [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0]],      [[1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0],       [1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],       [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],       [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],       [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0]]],     [[[1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],       [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0],       [1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],       [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0],       [1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0]],      [[1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],       [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],       [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0],       [1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],       [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0]]],     [[[1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],       [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0],       [1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],       [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0],       [1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0]],      [[1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0],       [1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],       [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],       [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],       [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]]],     [[[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],       [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],       [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],       [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0],       [1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0]],      [[1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0],       [1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],       [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0],       [1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],       [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0]]],     [[[1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],       [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0],       [1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],       [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],       [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0]],      [[1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0],       [1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],       [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0],       [1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],       [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0]]],     [[[1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],       [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],       [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],       [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0],       [1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0]],      [[1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],       [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],       [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0],       [1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],       [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]]],     [[[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],       [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0],       [1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],       [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],       [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0]],      [[1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0],       [1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],       [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],       [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],       [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0]]]]]]] shape=[1, 2, 2, 7, 2, 5, 7], strides=[1960, 980, 490, 70, 35, 7, 1], layout=C (0x1)), I32([2, 2, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [1, 1],  [3, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 3337492897 259912945 3417862821 2209131800 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],      [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],      [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],      [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0]],     [[29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],      [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],      [43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],      [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0]],     [[57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],      [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],      [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],      [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],     [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],      [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],      [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],      [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0]],     [[113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],      [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0],      [127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],      [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],      [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],      [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],      [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],     [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],      [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],      [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],      [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0]]],    [[[197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],      [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0],      [211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],      [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0]],     [[225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],      [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],      [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],      [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]],     [[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],      [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],      [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],      [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],      [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0],      [295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],      [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0]],     [[309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],      [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],      [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],      [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],     [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],      [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],      [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],      [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0]],     [[365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],      [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0],      [379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],      [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0]]]],   [[[[393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],      [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],      [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],      [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],     [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],      [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],      [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],      [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0]],     [[449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],      [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0],      [463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],      [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0]],     [[477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],      [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],      [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],      [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]],     [[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],      [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],      [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],      [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0]],     [[533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],      [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0],      [547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],      [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0]],     [[561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],      [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],      [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],      [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]]],    [[[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],      [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],      [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],      [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0]],     [[617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],      [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0],      [631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],      [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0]],     [[645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],      [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],      [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],      [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],     [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],      [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],      [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],      [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0]],     [[701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],      [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0],      [715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],      [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0]],     [[729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],      [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],      [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],      [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]],     [[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],      [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0],      [771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],      [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0]]]],   [[[[785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],      [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0],      [799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],      [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0]],     [[813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],      [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],      [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],      [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],     [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],      [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],      [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],      [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0]],     [[869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],      [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0],      [883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],      [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0]],     [[897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],      [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0],      [911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],      [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],     [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],      [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],      [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],      [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0]],     [[953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],      [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0],      [967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],      [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0]]],    [[[981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],      [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],      [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],      [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]],     [[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],      [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],      [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],      [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0]],     [[1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],      [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0],      [1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],      [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0]],     [[1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],      [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],      [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],      [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],     [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],      [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],      [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],      [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0]],     [[1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],      [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0],      [1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],      [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0]],     [[1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],      [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],      [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],      [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]]]],   [[[[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],      [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0],      [1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],      [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0]],     [[1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],      [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0],      [1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],      [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0]],     [[1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],      [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],      [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],      [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]],     [[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],      [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],      [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],      [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0]],     [[1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0],      [1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0],      [1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],      [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0]],     [[1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],      [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0],      [1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],      [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]],     [[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],      [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],      [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0],      [1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0]]],    [[[1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],      [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0],      [1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],      [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0]],     [[1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],      [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],      [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],      [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]],     [[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0],      [1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],      [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],      [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0]],     [[1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],      [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0],      [1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],      [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0]],     [[1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],      [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],      [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0],      [1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]],     [[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],      [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],      [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],      [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0]],     [[1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],      [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0],      [1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],      [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0]]]],   [[[[1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0],      [1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],      [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],      [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]],     [[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],      [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0],      [1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],      [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0]],     [[1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],      [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0],      [1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0],      [1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0]],     [[1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],      [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],      [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],      [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]],     [[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],      [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],      [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],      [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0]],     [[1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0],      [1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0],      [1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],      [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0]],     [[1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],      [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0],      [1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],      [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]]],    [[[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],      [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],      [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0],      [1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0]],     [[1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],      [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0],      [1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],      [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0]],     [[1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],      [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],      [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],      [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]],     [[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0],      [1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],      [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],      [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0]],     [[1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],      [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0],      [1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],      [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0]],     [[1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],      [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],      [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0],      [1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]],     [[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],      [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],      [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],      [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0]]]]]] shape=[1, 5, 2, 7, 4, 7], strides=[1960, 392, 196, 28, 7, 1], layout=C (0x1)), I32([1, 1, 3] shape=[3], strides=[1], layout=C | F (0x3)), I32([[1, 1],  [0, 1],  [3, 2]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2274221103 807336656 2265941156 751830284 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],     [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],     [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],     [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],     [29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],     [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0]],    [[43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],     [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0],     [57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],     [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],     [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],     [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]],    [[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],     [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],     [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],     [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0],     [113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],     [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0]],    [[127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],     [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],     [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],     [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],     [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]],    [[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],     [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],     [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],     [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0],     [197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],     [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],    [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],     [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0],     [225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],     [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],     [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],     [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]],    [[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],     [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],     [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],     [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],     [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0]]],   [[[295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],     [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0],     [309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],     [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],     [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],     [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]],    [[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],     [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],     [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],     [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0],     [365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],     [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0]],    [[379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],     [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0],     [393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],     [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],     [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],     [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],    [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],     [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],     [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],     [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0],     [449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],     [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0]],    [[463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],     [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0],     [477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],     [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],     [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],     [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]],    [[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],     [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],     [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],     [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0],     [533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],     [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0]],    [[547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],     [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],     [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],     [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],     [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]]],   [[[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],     [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],     [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],     [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0],     [617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],     [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0]],    [[631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],     [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0],     [645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],     [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],     [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],     [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]],    [[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],     [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],     [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],     [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0],     [701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],     [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0]],    [[715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],     [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0],     [729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],     [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],     [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],     [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]],    [[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],     [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0],     [771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],     [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0],     [785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],     [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0]],    [[799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],     [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0],     [813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],     [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],     [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],     [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],    [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],     [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],     [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],     [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0],     [869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],     [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0]]],   [[[883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],     [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0],     [897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],     [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0],     [911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],     [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]],    [[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],     [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],     [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],     [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0],     [953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],     [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0]],    [[967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],     [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],     [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],     [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],     [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]],    [[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],     [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],     [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],     [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0],     [1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],     [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]],    [[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],     [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0],     [1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],     [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],     [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],     [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]],    [[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],     [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],     [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],     [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],     [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0]],    [[1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],     [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0],     [1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],     [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],     [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],     [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]]],   [[[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],     [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0],     [1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],     [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0],     [1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],     [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0]],    [[1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],     [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0],     [1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],     [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],     [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],     [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]],    [[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],     [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],     [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],     [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0],     [1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0],     [1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0]],    [[1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],     [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0],     [1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],     [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0],     [1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],     [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]],    [[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],     [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],     [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0],     [1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0],     [1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],     [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0]],    [[1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],     [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0],     [1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],     [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],     [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],     [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]],    [[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0],     [1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],     [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],     [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0],     [1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],     [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0]]],   [[[1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],     [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0],     [1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],     [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],     [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0],     [1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]],    [[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],     [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],     [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],     [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0],     [1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],     [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0]],    [[1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],     [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0],     [1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0],     [1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],     [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],     [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]],    [[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],     [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0],     [1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],     [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0],     [1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],     [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0]],    [[1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0],     [1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0],     [1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],     [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],     [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],     [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]],    [[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],     [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],     [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],     [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0],     [1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0],     [1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0]],    [[1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],     [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0],     [1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],     [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0],     [1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],     [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]]]],  [[[[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],     [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],     [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0],     [1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0],     [1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],     [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0]],    [[1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],     [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0],     [1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],     [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],     [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],     [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]],    [[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0],     [1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],     [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],     [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0],     [1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],     [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0]],    [[1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],     [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0],     [1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],     [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],     [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0],     [1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]],    [[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],     [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],     [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],     [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0],     [1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0],     [1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0]],    [[1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0, 1981.0],     [1982.0, 1983.0, 1984.0, 1985.0, 1986.0, 1987.0, 1988.0],     [1989.0, 1990.0, 1991.0, 1992.0, 1993.0, 1994.0, 1995.0],     [1996.0, 1997.0, 1998.0, 1999.0, 2000.0, 2001.0, 2002.0],     [2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0],     [2010.0, 2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]],    [[2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0, 2023.0],     [2024.0, 2025.0, 2026.0, 2027.0, 2028.0, 2029.0, 2030.0],     [2031.0, 2032.0, 2033.0, 2034.0, 2035.0, 2036.0, 2037.0],     [2038.0, 2039.0, 2040.0, 2041.0, 2042.0, 2043.0, 2044.0],     [2045.0, 2046.0, 2047.0, 2048.0, 2049.0, 2050.0, 2051.0],     [2052.0, 2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0]]],   [[[2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0, 2065.0],     [2066.0, 2067.0, 2068.0, 2069.0, 2070.0, 2071.0, 2072.0],     [2073.0, 2074.0, 2075.0, 2076.0, 2077.0, 2078.0, 2079.0],     [2080.0, 2081.0, 2082.0, 2083.0, 2084.0, 2085.0, 2086.0],     [2087.0, 2088.0, 2089.0, 2090.0, 2091.0, 2092.0, 2093.0],     [2094.0, 2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]],    [[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0, 2107.0],     [2108.0, 2109.0, 2110.0, 2111.0, 2112.0, 2113.0, 2114.0],     [2115.0, 2116.0, 2117.0, 2118.0, 2119.0, 2120.0, 2121.0],     [2122.0, 2123.0, 2124.0, 2125.0, 2126.0, 2127.0, 2128.0],     [2129.0, 2130.0, 2131.0, 2132.0, 2133.0, 2134.0, 2135.0],     [2136.0, 2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0]],    [[2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0, 2149.0],     [2150.0, 2151.0, 2152.0, 2153.0, 2154.0, 2155.0, 2156.0],     [2157.0, 2158.0, 2159.0, 2160.0, 2161.0, 2162.0, 2163.0],     [2164.0, 2165.0, 2166.0, 2167.0, 2168.0, 2169.0, 2170.0],     [2171.0, 2172.0, 2173.0, 2174.0, 2175.0, 2176.0, 2177.0],     [2178.0, 2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0]],    [[2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0, 2191.0],     [2192.0, 2193.0, 2194.0, 2195.0, 2196.0, 2197.0, 2198.0],     [2199.0, 2200.0, 2201.0, 2202.0, 2203.0, 2204.0, 2205.0],     [2206.0, 2207.0, 2208.0, 2209.0, 2210.0, 2211.0, 2212.0],     [2213.0, 2214.0, 2215.0, 2216.0, 2217.0, 2218.0, 2219.0],     [2220.0, 2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0]],    [[2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0, 2233.0],     [2234.0, 2235.0, 2236.0, 2237.0, 2238.0, 2239.0, 2240.0],     [2241.0, 2242.0, 2243.0, 2244.0, 2245.0, 2246.0, 2247.0],     [2248.0, 2249.0, 2250.0, 2251.0, 2252.0, 2253.0, 2254.0],     [2255.0, 2256.0, 2257.0, 2258.0, 2259.0, 2260.0, 2261.0],     [2262.0, 2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0]],    [[2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0, 2275.0],     [2276.0, 2277.0, 2278.0, 2279.0, 2280.0, 2281.0, 2282.0],     [2283.0, 2284.0, 2285.0, 2286.0, 2287.0, 2288.0, 2289.0],     [2290.0, 2291.0, 2292.0, 2293.0, 2294.0, 2295.0, 2296.0],     [2297.0, 2298.0, 2299.0, 2300.0, 2301.0, 2302.0, 2303.0],     [2304.0, 2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0]],    [[2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0, 2317.0],     [2318.0, 2319.0, 2320.0, 2321.0, 2322.0, 2323.0, 2324.0],     [2325.0, 2326.0, 2327.0, 2328.0, 2329.0, 2330.0, 2331.0],     [2332.0, 2333.0, 2334.0, 2335.0, 2336.0, 2337.0, 2338.0],     [2339.0, 2340.0, 2341.0, 2342.0, 2343.0, 2344.0, 2345.0],     [2346.0, 2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0]]],   [[[2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0, 2359.0],     [2360.0, 2361.0, 2362.0, 2363.0, 2364.0, 2365.0, 2366.0],     [2367.0, 2368.0, 2369.0, 2370.0, 2371.0, 2372.0, 2373.0],     [2374.0, 2375.0, 2376.0, 2377.0, 2378.0, 2379.0, 2380.0],     [2381.0, 2382.0, 2383.0, 2384.0, 2385.0, 2386.0, 2387.0],     [2388.0, 2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0]],    [[2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0, 2401.0],     [2402.0, 2403.0, 2404.0, 2405.0, 2406.0, 2407.0, 2408.0],     [2409.0, 2410.0, 2411.0, 2412.0, 2413.0, 2414.0, 2415.0],     [2416.0, 2417.0, 2418.0, 2419.0, 2420.0, 2421.0, 2422.0],     [2423.0, 2424.0, 2425.0, 2426.0, 2427.0, 2428.0, 2429.0],     [2430.0, 2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0]],    [[2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0, 2443.0],     [2444.0, 2445.0, 2446.0, 2447.0, 2448.0, 2449.0, 2450.0],     [2451.0, 2452.0, 2453.0, 2454.0, 2455.0, 2456.0, 2457.0],     [2458.0, 2459.0, 2460.0, 2461.0, 2462.0, 2463.0, 2464.0],     [2465.0, 2466.0, 2467.0, 2468.0, 2469.0, 2470.0, 2471.0],     [2472.0, 2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0]],    [[2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0, 2485.0],     [2486.0, 2487.0, 2488.0, 2489.0, 2490.0, 2491.0, 2492.0],     [2493.0, 2494.0, 2495.0, 2496.0, 2497.0, 2498.0, 2499.0],     [2500.0, 2501.0, 2502.0, 2503.0, 2504.0, 2505.0, 2506.0],     [2507.0, 2508.0, 2509.0, 2510.0, 2511.0, 2512.0, 2513.0],     [2514.0, 2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]],    [[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0, 2527.0],     [2528.0, 2529.0, 2530.0, 2531.0, 2532.0, 2533.0, 2534.0],     [2535.0, 2536.0, 2537.0, 2538.0, 2539.0, 2540.0, 2541.0],     [2542.0, 2543.0, 2544.0, 2545.0, 2546.0, 2547.0, 2548.0],     [2549.0, 2550.0, 2551.0, 2552.0, 2553.0, 2554.0, 2555.0],     [2556.0, 2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0]],    [[2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0, 2569.0],     [2570.0, 2571.0, 2572.0, 2573.0, 2574.0, 2575.0, 2576.0],     [2577.0, 2578.0, 2579.0, 2580.0, 2581.0, 2582.0, 2583.0],     [2584.0, 2585.0, 2586.0, 2587.0, 2588.0, 2589.0, 2590.0],     [2591.0, 2592.0, 2593.0, 2594.0, 2595.0, 2596.0, 2597.0],     [2598.0, 2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0]],    [[2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0, 2611.0],     [2612.0, 2613.0, 2614.0, 2615.0, 2616.0, 2617.0, 2618.0],     [2619.0, 2620.0, 2621.0, 2622.0, 2623.0, 2624.0, 2625.0],     [2626.0, 2627.0, 2628.0, 2629.0, 2630.0, 2631.0, 2632.0],     [2633.0, 2634.0, 2635.0, 2636.0, 2637.0, 2638.0, 2639.0],     [2640.0, 2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0]]],   [[[2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0, 2653.0],     [2654.0, 2655.0, 2656.0, 2657.0, 2658.0, 2659.0, 2660.0],     [2661.0, 2662.0, 2663.0, 2664.0, 2665.0, 2666.0, 2667.0],     [2668.0, 2669.0, 2670.0, 2671.0, 2672.0, 2673.0, 2674.0],     [2675.0, 2676.0, 2677.0, 2678.0, 2679.0, 2680.0, 2681.0],     [2682.0, 2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0]],    [[2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0, 2695.0],     [2696.0, 2697.0, 2698.0, 2699.0, 2700.0, 2701.0, 2702.0],     [2703.0, 2704.0, 2705.0, 2706.0, 2707.0, 2708.0, 2709.0],     [2710.0, 2711.0, 2712.0, 2713.0, 2714.0, 2715.0, 2716.0],     [2717.0, 2718.0, 2719.0, 2720.0, 2721.0, 2722.0, 2723.0],     [2724.0, 2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0]],    [[2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0, 2737.0],     [2738.0, 2739.0, 2740.0, 2741.0, 2742.0, 2743.0, 2744.0],     [2745.0, 2746.0, 2747.0, 2748.0, 2749.0, 2750.0, 2751.0],     [2752.0, 2753.0, 2754.0, 2755.0, 2756.0, 2757.0, 2758.0],     [2759.0, 2760.0, 2761.0, 2762.0, 2763.0, 2764.0, 2765.0],     [2766.0, 2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0]],    [[2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0, 2779.0],     [2780.0, 2781.0, 2782.0, 2783.0, 2784.0, 2785.0, 2786.0],     [2787.0, 2788.0, 2789.0, 2790.0, 2791.0, 2792.0, 2793.0],     [2794.0, 2795.0, 2796.0, 2797.0, 2798.0, 2799.0, 2800.0],     [2801.0, 2802.0, 2803.0, 2804.0, 2805.0, 2806.0, 2807.0],     [2808.0, 2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0]],    [[2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0, 2821.0],     [2822.0, 2823.0, 2824.0, 2825.0, 2826.0, 2827.0, 2828.0],     [2829.0, 2830.0, 2831.0, 2832.0, 2833.0, 2834.0, 2835.0],     [2836.0, 2837.0, 2838.0, 2839.0, 2840.0, 2841.0, 2842.0],     [2843.0, 2844.0, 2845.0, 2846.0, 2847.0, 2848.0, 2849.0],     [2850.0, 2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0]],    [[2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0, 2863.0],     [2864.0, 2865.0, 2866.0, 2867.0, 2868.0, 2869.0, 2870.0],     [2871.0, 2872.0, 2873.0, 2874.0, 2875.0, 2876.0, 2877.0],     [2878.0, 2879.0, 2880.0, 2881.0, 2882.0, 2883.0, 2884.0],     [2885.0, 2886.0, 2887.0, 2888.0, 2889.0, 2890.0, 2891.0],     [2892.0, 2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0]],    [[2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0, 2905.0],     [2906.0, 2907.0, 2908.0, 2909.0, 2910.0, 2911.0, 2912.0],     [2913.0, 2914.0, 2915.0, 2916.0, 2917.0, 2918.0, 2919.0],     [2920.0, 2921.0, 2922.0, 2923.0, 2924.0, 2925.0, 2926.0],     [2927.0, 2928.0, 2929.0, 2930.0, 2931.0, 2932.0, 2933.0],     [2934.0, 2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]]],   [[[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0, 2947.0],     [2948.0, 2949.0, 2950.0, 2951.0, 2952.0, 2953.0, 2954.0],     [2955.0, 2956.0, 2957.0, 2958.0, 2959.0, 2960.0, 2961.0],     [2962.0, 2963.0, 2964.0, 2965.0, 2966.0, 2967.0, 2968.0],     [2969.0, 2970.0, 2971.0, 2972.0, 2973.0, 2974.0, 2975.0],     [2976.0, 2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0]],    [[2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0, 2989.0],     [2990.0, 2991.0, 2992.0, 2993.0, 2994.0, 2995.0, 2996.0],     [2997.0, 2998.0, 2999.0, 3000.0, 3001.0, 3002.0, 3003.0],     [3004.0, 3005.0, 3006.0, 3007.0, 3008.0, 3009.0, 3010.0],     [3011.0, 3012.0, 3013.0, 3014.0, 3015.0, 3016.0, 3017.0],     [3018.0, 3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0]],    [[3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0, 3031.0],     [3032.0, 3033.0, 3034.0, 3035.0, 3036.0, 3037.0, 3038.0],     [3039.0, 3040.0, 3041.0, 3042.0, 3043.0, 3044.0, 3045.0],     [3046.0, 3047.0, 3048.0, 3049.0, 3050.0, 3051.0, 3052.0],     [3053.0, 3054.0, 3055.0, 3056.0, 3057.0, 3058.0, 3059.0],     [3060.0, 3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0]],    [[3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0, 3073.0],     [3074.0, 3075.0, 3076.0, 3077.0, 3078.0, 3079.0, 3080.0],     [3081.0, 3082.0, 3083.0, 3084.0, 3085.0, 3086.0, 3087.0],     [3088.0, 3089.0, 3090.0, 3091.0, 3092.0, 3093.0, 3094.0],     [3095.0, 3096.0, 3097.0, 3098.0, 3099.0, 3100.0, 3101.0],     [3102.0, 3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0]],    [[3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0, 3115.0],     [3116.0, 3117.0, 3118.0, 3119.0, 3120.0, 3121.0, 3122.0],     [3123.0, 3124.0, 3125.0, 3126.0, 3127.0, 3128.0, 3129.0],     [3130.0, 3131.0, 3132.0, 3133.0, 3134.0, 3135.0, 3136.0],     [3137.0, 3138.0, 3139.0, 3140.0, 3141.0, 3142.0, 3143.0],     [3144.0, 3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0]],    [[3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0, 3157.0],     [3158.0, 3159.0, 3160.0, 3161.0, 3162.0, 3163.0, 3164.0],     [3165.0, 3166.0, 3167.0, 3168.0, 3169.0, 3170.0, 3171.0],     [3172.0, 3173.0, 3174.0, 3175.0, 3176.0, 3177.0, 3178.0],     [3179.0, 3180.0, 3181.0, 3182.0, 3183.0, 3184.0, 3185.0],     [3186.0, 3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0]],    [[3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0, 3199.0],     [3200.0, 3201.0, 3202.0, 3203.0, 3204.0, 3205.0, 3206.0],     [3207.0, 3208.0, 3209.0, 3210.0, 3211.0, 3212.0, 3213.0],     [3214.0, 3215.0, 3216.0, 3217.0, 3218.0, 3219.0, 3220.0],     [3221.0, 3222.0, 3223.0, 3224.0, 3225.0, 3226.0, 3227.0],     [3228.0, 3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0]]],   [[[3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0, 3241.0],     [3242.0, 3243.0, 3244.0, 3245.0, 3246.0, 3247.0, 3248.0],     [3249.0, 3250.0, 3251.0, 3252.0, 3253.0, 3254.0, 3255.0],     [3256.0, 3257.0, 3258.0, 3259.0, 3260.0, 3261.0, 3262.0],     [3263.0, 3264.0, 3265.0, 3266.0, 3267.0, 3268.0, 3269.0],     [3270.0, 3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0]],    [[3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0, 3283.0],     [3284.0, 3285.0, 3286.0, 3287.0, 3288.0, 3289.0, 3290.0],     [3291.0, 3292.0, 3293.0, 3294.0, 3295.0, 3296.0, 3297.0],     [3298.0, 3299.0, 3300.0, 3301.0, 3302.0, 3303.0, 3304.0],     [3305.0, 3306.0, 3307.0, 3308.0, 3309.0, 3310.0, 3311.0],     [3312.0, 3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0]],    [[3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0, 3325.0],     [3326.0, 3327.0, 3328.0, 3329.0, 3330.0, 3331.0, 3332.0],     [3333.0, 3334.0, 3335.0, 3336.0, 3337.0, 3338.0, 3339.0],     [3340.0, 3341.0, 3342.0, 3343.0, 3344.0, 3345.0, 3346.0],     [3347.0, 3348.0, 3349.0, 3350.0, 3351.0, 3352.0, 3353.0],     [3354.0, 3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]],    [[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0, 3367.0],     [3368.0, 3369.0, 3370.0, 3371.0, 3372.0, 3373.0, 3374.0],     [3375.0, 3376.0, 3377.0, 3378.0, 3379.0, 3380.0, 3381.0],     [3382.0, 3383.0, 3384.0, 3385.0, 3386.0, 3387.0, 3388.0],     [3389.0, 3390.0, 3391.0, 3392.0, 3393.0, 3394.0, 3395.0],     [3396.0, 3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0]],    [[3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0, 3409.0],     [3410.0, 3411.0, 3412.0, 3413.0, 3414.0, 3415.0, 3416.0],     [3417.0, 3418.0, 3419.0, 3420.0, 3421.0, 3422.0, 3423.0],     [3424.0, 3425.0, 3426.0, 3427.0, 3428.0, 3429.0, 3430.0],     [3431.0, 3432.0, 3433.0, 3434.0, 3435.0, 3436.0, 3437.0],     [3438.0, 3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0]],    [[3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0, 3451.0],     [3452.0, 3453.0, 3454.0, 3455.0, 3456.0, 3457.0, 3458.0],     [3459.0, 3460.0, 3461.0, 3462.0, 3463.0, 3464.0, 3465.0],     [3466.0, 3467.0, 3468.0, 3469.0, 3470.0, 3471.0, 3472.0],     [3473.0, 3474.0, 3475.0, 3476.0, 3477.0, 3478.0, 3479.0],     [3480.0, 3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0]],    [[3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0, 3493.0],     [3494.0, 3495.0, 3496.0, 3497.0, 3498.0, 3499.0, 3500.0],     [3501.0, 3502.0, 3503.0, 3504.0, 3505.0, 3506.0, 3507.0],     [3508.0, 3509.0, 3510.0, 3511.0, 3512.0, 3513.0, 3514.0],     [3515.0, 3516.0, 3517.0, 3518.0, 3519.0, 3520.0, 3521.0],     [3522.0, 3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0]]]],  [[[[3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0, 3535.0],     [3536.0, 3537.0, 3538.0, 3539.0, 3540.0, 3541.0, 3542.0],     [3543.0, 3544.0, 3545.0, 3546.0, 3547.0, 3548.0, 3549.0],     [3550.0, 3551.0, 3552.0, 3553.0, 3554.0, 3555.0, 3556.0],     [3557.0, 3558.0, 3559.0, 3560.0, 3561.0, 3562.0, 3563.0],     [3564.0, 3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0]],    [[3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0, 3577.0],     [3578.0, 3579.0, 3580.0, 3581.0, 3582.0, 3583.0, 3584.0],     [3585.0, 3586.0, 3587.0, 3588.0, 3589.0, 3590.0, 3591.0],     [3592.0, 3593.0, 3594.0, 3595.0, 3596.0, 3597.0, 3598.0],     [3599.0, 3600.0, 3601.0, 3602.0, 3603.0, 3604.0, 3605.0],     [3606.0, 3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0]],    [[3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0, 3619.0],     [3620.0, 3621.0, 3622.0, 3623.0, 3624.0, 3625.0, 3626.0],     [3627.0, 3628.0, 3629.0, 3630.0, 3631.0, 3632.0, 3633.0],     [3634.0, 3635.0, 3636.0, 3637.0, 3638.0, 3639.0, 3640.0],     [3641.0, 3642.0, 3643.0, 3644.0, 3645.0, 3646.0, 3647.0],     [3648.0, 3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0]],    [[3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0, 3661.0],     [3662.0, 3663.0, 3664.0, 3665.0, 3666.0, 3667.0, 3668.0],     [3669.0, 3670.0, 3671.0, 3672.0, 3673.0, 3674.0, 3675.0],     [3676.0, 3677.0, 3678.0, 3679.0, 3680.0, 3681.0, 3682.0],     [3683.0, 3684.0, 3685.0, 3686.0, 3687.0, 3688.0, 3689.0],     [3690.0, 3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0]],    [[3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0, 3703.0],     [3704.0, 3705.0, 3706.0, 3707.0, 3708.0, 3709.0, 3710.0],     [3711.0, 3712.0, 3713.0, 3714.0, 3715.0, 3716.0, 3717.0],     [3718.0, 3719.0, 3720.0, 3721.0, 3722.0, 3723.0, 3724.0],     [3725.0, 3726.0, 3727.0, 3728.0, 3729.0, 3730.0, 3731.0],     [3732.0, 3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0]],    [[3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0, 3745.0],     [3746.0, 3747.0, 3748.0, 3749.0, 3750.0, 3751.0, 3752.0],     [3753.0, 3754.0, 3755.0, 3756.0, 3757.0, 3758.0, 3759.0],     [3760.0, 3761.0, 3762.0, 3763.0, 3764.0, 3765.0, 3766.0],     [3767.0, 3768.0, 3769.0, 3770.0, 3771.0, 3772.0, 3773.0],     [3774.0, 3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]],    [[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0, 3787.0],     [3788.0, 3789.0, 3790.0, 3791.0, 3792.0, 3793.0, 3794.0],     [3795.0, 3796.0, 3797.0, 3798.0, 3799.0, 3800.0, 3801.0],     [3802.0, 3803.0, 3804.0, 3805.0, 3806.0, 3807.0, 3808.0],     [3809.0, 3810.0, 3811.0, 3812.0, 3813.0, 3814.0, 3815.0],     [3816.0, 3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0]]],   [[[3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0, 3829.0],     [3830.0, 3831.0, 3832.0, 3833.0, 3834.0, 3835.0, 3836.0],     [3837.0, 3838.0, 3839.0, 3840.0, 3841.0, 3842.0, 3843.0],     [3844.0, 3845.0, 3846.0, 3847.0, 3848.0, 3849.0, 3850.0],     [3851.0, 3852.0, 3853.0, 3854.0, 3855.0, 3856.0, 3857.0],     [3858.0, 3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0]],    [[3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0, 3871.0],     [3872.0, 3873.0, 3874.0, 3875.0, 3876.0, 3877.0, 3878.0],     [3879.0, 3880.0, 3881.0, 3882.0, 3883.0, 3884.0, 3885.0],     [3886.0, 3887.0, 3888.0, 3889.0, 3890.0, 3891.0, 3892.0],     [3893.0, 3894.0, 3895.0, 3896.0, 3897.0, 3898.0, 3899.0],     [3900.0, 3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0]],    [[3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0, 3913.0],     [3914.0, 3915.0, 3916.0, 3917.0, 3918.0, 3919.0, 3920.0],     [3921.0, 3922.0, 3923.0, 3924.0, 3925.0, 3926.0, 3927.0],     [3928.0, 3929.0, 3930.0, 3931.0, 3932.0, 3933.0, 3934.0],     [3935.0, 3936.0, 3937.0, 3938.0, 3939.0, 3940.0, 3941.0],     [3942.0, 3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0]],    [[3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0, 3955.0],     [3956.0, 3957.0, 3958.0, 3959.0, 3960.0, 3961.0, 3962.0],     [3963.0, 3964.0, 3965.0, 3966.0, 3967.0, 3968.0, 3969.0],     [3970.0, 3971.0, 3972.0, 3973.0, 3974.0, 3975.0, 3976.0],     [3977.0, 3978.0, 3979.0, 3980.0, 3981.0, 3982.0, 3983.0],     [3984.0, 3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0]],    [[3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0, 3997.0],     [3998.0, 3999.0, 4000.0, 4001.0, 4002.0, 4003.0, 4004.0],     [4005.0, 4006.0, 4007.0, 4008.0, 4009.0, 4010.0, 4011.0],     [4012.0, 4013.0, 4014.0, 4015.0, 4016.0, 4017.0, 4018.0],     [4019.0, 4020.0, 4021.0, 4022.0, 4023.0, 4024.0, 4025.0],     [4026.0, 4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0]],    [[4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0, 4039.0],     [4040.0, 4041.0, 4042.0, 4043.0, 4044.0, 4045.0, 4046.0],     [4047.0, 4048.0, 4049.0, 4050.0, 4051.0, 4052.0, 4053.0],     [4054.0, 4055.0, 4056.0, 4057.0, 4058.0, 4059.0, 4060.0],     [4061.0, 4062.0, 4063.0, 4064.0, 4065.0, 4066.0, 4067.0],     [4068.0, 4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0]],    [[4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0, 4081.0],     [4082.0, 4083.0, 4084.0, 4085.0, 4086.0, 4087.0, 4088.0],     [4089.0, 4090.0, 4091.0, 4092.0, 4093.0, 4094.0, 4095.0],     [4096.0, 4097.0, 4098.0, 4099.0, 4100.0, 4101.0, 4102.0],     [4103.0, 4104.0, 4105.0, 4106.0, 4107.0, 4108.0, 4109.0],     [4110.0, 4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0]]],   [[[4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0, 4123.0],     [4124.0, 4125.0, 4126.0, 4127.0, 4128.0, 4129.0, 4130.0],     [4131.0, 4132.0, 4133.0, 4134.0, 4135.0, 4136.0, 4137.0],     [4138.0, 4139.0, 4140.0, 4141.0, 4142.0, 4143.0, 4144.0],     [4145.0, 4146.0, 4147.0, 4148.0, 4149.0, 4150.0, 4151.0],     [4152.0, 4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0]],    [[4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0, 4165.0],     [4166.0, 4167.0, 4168.0, 4169.0, 4170.0, 4171.0, 4172.0],     [4173.0, 4174.0, 4175.0, 4176.0, 4177.0, 4178.0, 4179.0],     [4180.0, 4181.0, 4182.0, 4183.0, 4184.0, 4185.0, 4186.0],     [4187.0, 4188.0, 4189.0, 4190.0, 4191.0, 4192.0, 4193.0],     [4194.0, 4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]],    [[4201.0, 4202.0, 4203.0, 4204.0, 4205.0, 4206.0, 4207.0],     [4208.0, 4209.0, 4210.0, 4211.0, 4212.0, 4213.0, 4214.0],     [4215.0, 4216.0, 4217.0, 4218.0, 4219.0, 4220.0, 4221.0],     [4222.0, 4223.0, 4224.0, 4225.0, 4226.0, 4227.0, 4228.0],     [4229.0, 4230.0, 4231.0, 4232.0, 4233.0, 4234.0, 4235.0],     [4236.0, 4237.0, 4238.0, 4239.0, 4240.0, 4241.0, 4242.0]],    [[4243.0, 4244.0, 4245.0, 4246.0, 4247.0, 4248.0, 4249.0],     [4250.0, 4251.0, 4252.0, 4253.0, 4254.0, 4255.0, 4256.0],     [4257.0, 4258.0, 4259.0, 4260.0, 4261.0, 4262.0, 4263.0],     [4264.0, 4265.0, 4266.0, 4267.0, 4268.0, 4269.0, 4270.0],     [4271.0, 4272.0, 4273.0, 4274.0, 4275.0, 4276.0, 4277.0],     [4278.0, 4279.0, 4280.0, 4281.0, 4282.0, 4283.0, 4284.0]],    [[4285.0, 4286.0, 4287.0, 4288.0, 4289.0, 4290.0, 4291.0],     [4292.0, 4293.0, 4294.0, 4295.0, 4296.0, 4297.0, 4298.0],     [4299.0, 4300.0, 4301.0, 4302.0, 4303.0, 4304.0, 4305.0],     [4306.0, 4307.0, 4308.0, 4309.0, 4310.0, 4311.0, 4312.0],     [4313.0, 4314.0, 4315.0, 4316.0, 4317.0, 4318.0, 4319.0],     [4320.0, 4321.0, 4322.0, 4323.0, 4324.0, 4325.0, 4326.0]],    [[4327.0, 4328.0, 4329.0, 4330.0, 4331.0, 4332.0, 4333.0],     [4334.0, 4335.0, 4336.0, 4337.0, 4338.0, 4339.0, 4340.0],     [4341.0, 4342.0, 4343.0, 4344.0, 4345.0, 4346.0, 4347.0],     [4348.0, 4349.0, 4350.0, 4351.0, 4352.0, 4353.0, 4354.0],     [4355.0, 4356.0, 4357.0, 4358.0, 4359.0, 4360.0, 4361.0],     [4362.0, 4363.0, 4364.0, 4365.0, 4366.0, 4367.0, 4368.0]],    [[4369.0, 4370.0, 4371.0, 4372.0, 4373.0, 4374.0, 4375.0],     [4376.0, 4377.0, 4378.0, 4379.0, 4380.0, 4381.0, 4382.0],     [4383.0, 4384.0, 4385.0, 4386.0, 4387.0, 4388.0, 4389.0],     [4390.0, 4391.0, 4392.0, 4393.0, 4394.0, 4395.0, 4396.0],     [4397.0, 4398.0, 4399.0, 4400.0, 4401.0, 4402.0, 4403.0],     [4404.0, 4405.0, 4406.0, 4407.0, 4408.0, 4409.0, 4410.0]]],   [[[4411.0, 4412.0, 4413.0, 4414.0, 4415.0, 4416.0, 4417.0],     [4418.0, 4419.0, 4420.0, 4421.0, 4422.0, 4423.0, 4424.0],     [4425.0, 4426.0, 4427.0, 4428.0, 4429.0, 4430.0, 4431.0],     [4432.0, 4433.0, 4434.0, 4435.0, 4436.0, 4437.0, 4438.0],     [4439.0, 4440.0, 4441.0, 4442.0, 4443.0, 4444.0, 4445.0],     [4446.0, 4447.0, 4448.0, 4449.0, 4450.0, 4451.0, 4452.0]],    [[4453.0, 4454.0, 4455.0, 4456.0, 4457.0, 4458.0, 4459.0],     [4460.0, 4461.0, 4462.0, 4463.0, 4464.0, 4465.0, 4466.0],     [4467.0, 4468.0, 4469.0, 4470.0, 4471.0, 4472.0, 4473.0],     [4474.0, 4475.0, 4476.0, 4477.0, 4478.0, 4479.0, 4480.0],     [4481.0, 4482.0, 4483.0, 4484.0, 4485.0, 4486.0, 4487.0],     [4488.0, 4489.0, 4490.0, 4491.0, 4492.0, 4493.0, 4494.0]],    [[4495.0, 4496.0, 4497.0, 4498.0, 4499.0, 4500.0, 4501.0],     [4502.0, 4503.0, 4504.0, 4505.0, 4506.0, 4507.0, 4508.0],     [4509.0, 4510.0, 4511.0, 4512.0, 4513.0, 4514.0, 4515.0],     [4516.0, 4517.0, 4518.0, 4519.0, 4520.0, 4521.0, 4522.0],     [4523.0, 4524.0, 4525.0, 4526.0, 4527.0, 4528.0, 4529.0],     [4530.0, 4531.0, 4532.0, 4533.0, 4534.0, 4535.0, 4536.0]],    [[4537.0, 4538.0, 4539.0, 4540.0, 4541.0, 4542.0, 4543.0],     [4544.0, 4545.0, 4546.0, 4547.0, 4548.0, 4549.0, 4550.0],     [4551.0, 4552.0, 4553.0, 4554.0, 4555.0, 4556.0, 4557.0],     [4558.0, 4559.0, 4560.0, 4561.0, 4562.0, 4563.0, 4564.0],     [4565.0, 4566.0, 4567.0, 4568.0, 4569.0, 4570.0, 4571.0],     [4572.0, 4573.0, 4574.0, 4575.0, 4576.0, 4577.0, 4578.0]],    [[4579.0, 4580.0, 4581.0, 4582.0, 4583.0, 4584.0, 4585.0],     [4586.0, 4587.0, 4588.0, 4589.0, 4590.0, 4591.0, 4592.0],     [4593.0, 4594.0, 4595.0, 4596.0, 4597.0, 4598.0, 4599.0],     [4600.0, 4601.0, 4602.0, 4603.0, 4604.0, 4605.0, 4606.0],     [4607.0, 4608.0, 4609.0, 4610.0, 4611.0, 4612.0, 4613.0],     [4614.0, 4615.0, 4616.0, 4617.0, 4618.0, 4619.0, 4620.0]],    [[4621.0, 4622.0, 4623.0, 4624.0, 4625.0, 4626.0, 4627.0],     [4628.0, 4629.0, 4630.0, 4631.0, 4632.0, 4633.0, 4634.0],     [4635.0, 4636.0, 4637.0, 4638.0, 4639.0, 4640.0, 4641.0],     [4642.0, 4643.0, 4644.0, 4645.0, 4646.0, 4647.0, 4648.0],     [4649.0, 4650.0, 4651.0, 4652.0, 4653.0, 4654.0, 4655.0],     [4656.0, 4657.0, 4658.0, 4659.0, 4660.0, 4661.0, 4662.0]],    [[4663.0, 4664.0, 4665.0, 4666.0, 4667.0, 4668.0, 4669.0],     [4670.0, 4671.0, 4672.0, 4673.0, 4674.0, 4675.0, 4676.0],     [4677.0, 4678.0, 4679.0, 4680.0, 4681.0, 4682.0, 4683.0],     [4684.0, 4685.0, 4686.0, 4687.0, 4688.0, 4689.0, 4690.0],     [4691.0, 4692.0, 4693.0, 4694.0, 4695.0, 4696.0, 4697.0],     [4698.0, 4699.0, 4700.0, 4701.0, 4702.0, 4703.0, 4704.0]]],   [[[4705.0, 4706.0, 4707.0, 4708.0, 4709.0, 4710.0, 4711.0],     [4712.0, 4713.0, 4714.0, 4715.0, 4716.0, 4717.0, 4718.0],     [4719.0, 4720.0, 4721.0, 4722.0, 4723.0, 4724.0, 4725.0],     [4726.0, 4727.0, 4728.0, 4729.0, 4730.0, 4731.0, 4732.0],     [4733.0, 4734.0, 4735.0, 4736.0, 4737.0, 4738.0, 4739.0],     [4740.0, 4741.0, 4742.0, 4743.0, 4744.0, 4745.0, 4746.0]],    [[4747.0, 4748.0, 4749.0, 4750.0, 4751.0, 4752.0, 4753.0],     [4754.0, 4755.0, 4756.0, 4757.0, 4758.0, 4759.0, 4760.0],     [4761.0, 4762.0, 4763.0, 4764.0, 4765.0, 4766.0, 4767.0],     [4768.0, 4769.0, 4770.0, 4771.0, 4772.0, 4773.0, 4774.0],     [4775.0, 4776.0, 4777.0, 4778.0, 4779.0, 4780.0, 4781.0],     [4782.0, 4783.0, 4784.0, 4785.0, 4786.0, 4787.0, 4788.0]],    [[4789.0, 4790.0, 4791.0, 4792.0, 4793.0, 4794.0, 4795.0],     [4796.0, 4797.0, 4798.0, 4799.0, 4800.0, 4801.0, 4802.0],     [4803.0, 4804.0, 4805.0, 4806.0, 4807.0, 4808.0, 4809.0],     [4810.0, 4811.0, 4812.0, 4813.0, 4814.0, 4815.0, 4816.0],     [4817.0, 4818.0, 4819.0, 4820.0, 4821.0, 4822.0, 4823.0],     [4824.0, 4825.0, 4826.0, 4827.0, 4828.0, 4829.0, 4830.0]],    [[4831.0, 4832.0, 4833.0, 4834.0, 4835.0, 4836.0, 4837.0],     [4838.0, 4839.0, 4840.0, 4841.0, 4842.0, 4843.0, 4844.0],     [4845.0, 4846.0, 4847.0, 4848.0, 4849.0, 4850.0, 4851.0],     [4852.0, 4853.0, 4854.0, 4855.0, 4856.0, 4857.0, 4858.0],     [4859.0, 4860.0, 4861.0, 4862.0, 4863.0, 4864.0, 4865.0],     [4866.0, 4867.0, 4868.0, 4869.0, 4870.0, 4871.0, 4872.0]],    [[4873.0, 4874.0, 4875.0, 4876.0, 4877.0, 4878.0, 4879.0],     [4880.0, 4881.0, 4882.0, 4883.0, 4884.0, 4885.0, 4886.0],     [4887.0, 4888.0, 4889.0, 4890.0, 4891.0, 4892.0, 4893.0],     [4894.0, 4895.0, 4896.0, 4897.0, 4898.0, 4899.0, 4900.0],     [4901.0, 4902.0, 4903.0, 4904.0, 4905.0, 4906.0, 4907.0],     [4908.0, 4909.0, 4910.0, 4911.0, 4912.0, 4913.0, 4914.0]],    [[4915.0, 4916.0, 4917.0, 4918.0, 4919.0, 4920.0, 4921.0],     [4922.0, 4923.0, 4924.0, 4925.0, 4926.0, 4927.0, 4928.0],     [4929.0, 4930.0, 4931.0, 4932.0, 4933.0, 4934.0, 4935.0],     [4936.0, 4937.0, 4938.0, 4939.0, 4940.0, 4941.0, 4942.0],     [4943.0, 4944.0, 4945.0, 4946.0, 4947.0, 4948.0, 4949.0],     [4950.0, 4951.0, 4952.0, 4953.0, 4954.0, 4955.0, 4956.0]],    [[4957.0, 4958.0, 4959.0, 4960.0, 4961.0, 4962.0, 4963.0],     [4964.0, 4965.0, 4966.0, 4967.0, 4968.0, 4969.0, 4970.0],     [4971.0, 4972.0, 4973.0, 4974.0, 4975.0, 4976.0, 4977.0],     [4978.0, 4979.0, 4980.0, 4981.0, 4982.0, 4983.0, 4984.0],     [4985.0, 4986.0, 4987.0, 4988.0, 4989.0, 4990.0, 4991.0],     [4992.0, 4993.0, 4994.0, 4995.0, 4996.0, 4997.0, 4998.0]]],   [[[4999.0, 5000.0, 5001.0, 5002.0, 5003.0, 5004.0, 5005.0],     [5006.0, 5007.0, 5008.0, 5009.0, 5010.0, 5011.0, 5012.0],     [5013.0, 5014.0, 5015.0, 5016.0, 5017.0, 5018.0, 5019.0],     [5020.0, 5021.0, 5022.0, 5023.0, 5024.0, 5025.0, 5026.0],     [5027.0, 5028.0, 5029.0, 5030.0, 5031.0, 5032.0, 5033.0],     [5034.0, 5035.0, 5036.0, 5037.0, 5038.0, 5039.0, 5040.0]],    [[5041.0, 5042.0, 5043.0, 5044.0, 5045.0, 5046.0, 5047.0],     [5048.0, 5049.0, 5050.0, 5051.0, 5052.0, 5053.0, 5054.0],     [5055.0, 5056.0, 5057.0, 5058.0, 5059.0, 5060.0, 5061.0],     [5062.0, 5063.0, 5064.0, 5065.0, 5066.0, 5067.0, 5068.0],     [5069.0, 5070.0, 5071.0, 5072.0, 5073.0, 5074.0, 5075.0],     [5076.0, 5077.0, 5078.0, 5079.0, 5080.0, 5081.0, 5082.0]],    [[5083.0, 5084.0, 5085.0, 5086.0, 5087.0, 5088.0, 5089.0],     [5090.0, 5091.0, 5092.0, 5093.0, 5094.0, 5095.0, 5096.0],     [5097.0, 5098.0, 5099.0, 5100.0, 5101.0, 5102.0, 5103.0],     [5104.0, 5105.0, 5106.0, 5107.0, 5108.0, 5109.0, 5110.0],     [5111.0, 5112.0, 5113.0, 5114.0, 5115.0, 5116.0, 5117.0],     [5118.0, 5119.0, 5120.0, 5121.0, 5122.0, 5123.0, 5124.0]],    [[5125.0, 5126.0, 5127.0, 5128.0, 5129.0, 5130.0, 5131.0],     [5132.0, 5133.0, 5134.0, 5135.0, 5136.0, 5137.0, 5138.0],     [5139.0, 5140.0, 5141.0, 5142.0, 5143.0, 5144.0, 5145.0],     [5146.0, 5147.0, 5148.0, 5149.0, 5150.0, 5151.0, 5152.0],     [5153.0, 5154.0, 5155.0, 5156.0, 5157.0, 5158.0, 5159.0],     [5160.0, 5161.0, 5162.0, 5163.0, 5164.0, 5165.0, 5166.0]],    [[5167.0, 5168.0, 5169.0, 5170.0, 5171.0, 5172.0, 5173.0],     [5174.0, 5175.0, 5176.0, 5177.0, 5178.0, 5179.0, 5180.0],     [5181.0, 5182.0, 5183.0, 5184.0, 5185.0, 5186.0, 5187.0],     [5188.0, 5189.0, 5190.0, 5191.0, 5192.0, 5193.0, 5194.0],     [5195.0, 5196.0, 5197.0, 5198.0, 5199.0, 5200.0, 5201.0],     [5202.0, 5203.0, 5204.0, 5205.0, 5206.0, 5207.0, 5208.0]],    [[5209.0, 5210.0, 5211.0, 5212.0, 5213.0, 5214.0, 5215.0],     [5216.0, 5217.0, 5218.0, 5219.0, 5220.0, 5221.0, 5222.0],     [5223.0, 5224.0, 5225.0, 5226.0, 5227.0, 5228.0, 5229.0],     [5230.0, 5231.0, 5232.0, 5233.0, 5234.0, 5235.0, 5236.0],     [5237.0, 5238.0, 5239.0, 5240.0, 5241.0, 5242.0, 5243.0],     [5244.0, 5245.0, 5246.0, 5247.0, 5248.0, 5249.0, 5250.0]],    [[5251.0, 5252.0, 5253.0, 5254.0, 5255.0, 5256.0, 5257.0],     [5258.0, 5259.0, 5260.0, 5261.0, 5262.0, 5263.0, 5264.0],     [5265.0, 5266.0, 5267.0, 5268.0, 5269.0, 5270.0, 5271.0],     [5272.0, 5273.0, 5274.0, 5275.0, 5276.0, 5277.0, 5278.0],     [5279.0, 5280.0, 5281.0, 5282.0, 5283.0, 5284.0, 5285.0],     [5286.0, 5287.0, 5288.0, 5289.0, 5290.0, 5291.0, 5292.0]]]]] shape=[3, 6, 7, 6, 7], strides=[1764, 294, 42, 7, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[2, 2]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 1863123834 1103973619 2215929888 2362288465 # shrinks to (ref i, ref bs, ref p) = (F32([[[[1.0, 2.0],    [3.0, 4.0],    [5.0, 6.0]],   [[7.0, 8.0],    [9.0, 10.0],    [11.0, 12.0]]],  [[[13.0, 14.0],    [15.0, 16.0],    [17.0, 18.0]],   [[19.0, 20.0],    [21.0, 22.0],    [23.0, 24.0]]]] shape=[2, 2, 3, 2], strides=[12, 6, 2, 1], layout=C (0x1)), I32([2] shape=[1], strides=[1], layout=C | F (0x3)), I32([[3, 1]] shape=[1, 2], strides=[2, 1], layout=C (0x1)))
xs 4117558675 4064208007 2133376722 249049850 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0],     [5.0, 6.0, 7.0, 8.0],     [9.0, 10.0, 11.0, 12.0],     [13.0, 14.0, 15.0, 16.0],     [17.0, 18.0, 19.0, 20.0],     [21.0, 22.0, 23.0, 24.0]],    [[25.0, 26.0, 27.0, 28.0],     [29.0, 30.0, 31.0, 32.0],     [33.0, 34.0, 35.0, 36.0],     [37.0, 38.0, 39.0, 40.0],     [41.0, 42.0, 43.0, 44.0],     [45.0, 46.0, 47.0, 48.0]],    [[49.0, 50.0, 51.0, 52.0],     [53.0, 54.0, 55.0, 56.0],     [57.0, 58.0, 59.0, 60.0],     [61.0, 62.0, 63.0, 64.0],     [65.0, 66.0, 67.0, 68.0],     [69.0, 70.0, 71.0, 72.0]],    [[73.0, 74.0, 75.0, 76.0],     [77.0, 78.0, 79.0, 80.0],     [81.0, 82.0, 83.0, 84.0],     [85.0, 86.0, 87.0, 88.0],     [89.0, 90.0, 91.0, 92.0],     [93.0, 94.0, 95.0, 96.0]],    [[97.0, 98.0, 99.0, 100.0],     [101.0, 102.0, 103.0, 104.0],     [105.0, 106.0, 107.0, 108.0],     [109.0, 110.0, 111.0, 112.0],     [113.0, 114.0, 115.0, 116.0],     [117.0, 118.0, 119.0, 120.0]],    [[121.0, 122.0, 123.0, 124.0],     [125.0, 126.0, 127.0, 128.0],     [129.0, 130.0, 131.0, 132.0],     [133.0, 134.0, 135.0, 136.0],     [137.0, 138.0, 139.0, 140.0],     [141.0, 142.0, 143.0, 144.0]],    [[145.0, 146.0, 147.0, 148.0],     [149.0, 150.0, 151.0, 152.0],     [153.0, 154.0, 155.0, 156.0],     [157.0, 158.0, 159.0, 160.0],     [161.0, 162.0, 163.0, 164.0],     [165.0, 166.0, 167.0, 168.0]]],   [[[169.0, 170.0, 171.0, 172.0],     [173.0, 174.0, 175.0, 176.0],     [177.0, 178.0, 179.0, 180.0],     [181.0, 182.0, 183.0, 184.0],     [185.0, 186.0, 187.0, 188.0],     [189.0, 190.0, 191.0, 192.0]],    [[193.0, 194.0, 195.0, 196.0],     [197.0, 198.0, 199.0, 200.0],     [201.0, 202.0, 203.0, 204.0],     [205.0, 206.0, 207.0, 208.0],     [209.0, 210.0, 211.0, 212.0],     [213.0, 214.0, 215.0, 216.0]],    [[217.0, 218.0, 219.0, 220.0],     [221.0, 222.0, 223.0, 224.0],     [225.0, 226.0, 227.0, 228.0],     [229.0, 230.0, 231.0, 232.0],     [233.0, 234.0, 235.0, 236.0],     [237.0, 238.0, 239.0, 240.0]],    [[241.0, 242.0, 243.0, 244.0],     [245.0, 246.0, 247.0, 248.0],     [249.0, 250.0, 251.0, 252.0],     [253.0, 254.0, 255.0, 256.0],     [257.0, 258.0, 259.0, 260.0],     [261.0, 262.0, 263.0, 264.0]],    [[265.0, 266.0, 267.0, 268.0],     [269.0, 270.0, 271.0, 272.0],     [273.0, 274.0, 275.0, 276.0],     [277.0, 278.0, 279.0, 280.0],     [281.0, 282.0, 283.0, 284.0],     [285.0, 286.0, 287.0, 288.0]],    [[289.0, 290.0, 291.0, 292.0],     [293.0, 294.0, 295.0, 296.0],     [297.0, 298.0, 299.0, 300.0],     [301.0, 302.0, 303.0, 304.0],     [305.0, 306.0, 307.0, 308.0],     [309.0, 310.0, 311.0, 312.0]],    [[313.0, 314.0, 315.0, 316.0],     [317.0, 318.0, 319.0, 320.0],     [321.0, 322.0, 323.0, 324.0],     [325.0, 326.0, 327.0, 328.0],     [329.0, 330.0, 331.0, 332.0],     [333.0, 334.0, 335.0, 336.0]]],   [[[337.0, 338.0, 339.0, 340.0],     [341.0, 342.0, 343.0, 344.0],     [345.0, 346.0, 347.0, 348.0],     [349.0, 350.0, 351.0, 352.0],     [353.0, 354.0, 355.0, 356.0],     [357.0, 358.0, 359.0, 360.0]],    [[361.0, 362.0, 363.0, 364.0],     [365.0, 366.0, 367.0, 368.0],     [369.0, 370.0, 371.0, 372.0],     [373.0, 374.0, 375.0, 376.0],     [377.0, 378.0, 379.0, 380.0],     [381.0, 382.0, 383.0, 384.0]],    [[385.0, 386.0, 387.0, 388.0],     [389.0, 390.0, 391.0, 392.0],     [393.0, 394.0, 395.0, 396.0],     [397.0, 398.0, 399.0, 400.0],     [401.0, 402.0, 403.0, 404.0],     [405.0, 406.0, 407.0, 408.0]],    [[409.0, 410.0, 411.0, 412.0],     [413.0, 414.0, 415.0, 416.0],     [417.0, 418.0, 419.0, 420.0],     [421.0, 422.0, 423.0, 424.0],     [425.0, 426.0, 427.0, 428.0],     [429.0, 430.0, 431.0, 432.0]],    [[433.0, 434.0, 435.0, 436.0],     [437.0, 438.0, 439.0, 440.0],     [441.0, 442.0, 443.0, 444.0],     [445.0, 446.0, 447.0, 448.0],     [449.0, 450.0, 451.0, 452.0],     [453.0, 454.0, 455.0, 456.0]],    [[457.0, 458.0, 459.0, 460.0],     [461.0, 462.0, 463.0, 464.0],     [465.0, 466.0, 467.0, 468.0],     [469.0, 470.0, 471.0, 472.0],     [473.0, 474.0, 475.0, 476.0],     [477.0, 478.0, 479.0, 480.0]],    [[481.0, 482.0, 483.0, 484.0],     [485.0, 486.0, 487.0, 488.0],     [489.0, 490.0, 491.0, 492.0],     [493.0, 494.0, 495.0, 496.0],     [497.0, 498.0, 499.0, 500.0],     [501.0, 502.0, 503.0, 504.0]]],   [[[505.0, 506.0, 507.0, 508.0],     [509.0, 510.0, 511.0, 512.0],     [513.0, 514.0, 515.0, 516.0],     [517.0, 518.0, 519.0, 520.0],     [521.0, 522.0, 523.0, 524.0],     [525.0, 526.0, 527.0, 528.0]],    [[529.0, 530.0, 531.0, 532.0],     [533.0, 534.0, 535.0, 536.0],     [537.0, 538.0, 539.0, 540.0],     [541.0, 542.0, 543.0, 544.0],     [545.0, 546.0, 547.0, 548.0],     [549.0, 550.0, 551.0, 552.0]],    [[553.0, 554.0, 555.0, 556.0],     [557.0, 558.0, 559.0, 560.0],     [561.0, 562.0, 563.0, 564.0],     [565.0, 566.0, 567.0, 568.0],     [569.0, 570.0, 571.0, 572.0],     [573.0, 574.0, 575.0, 576.0]],    [[577.0, 578.0, 579.0, 580.0],     [581.0, 582.0, 583.0, 584.0],     [585.0, 586.0, 587.0, 588.0],     [589.0, 590.0, 591.0, 592.0],     [593.0, 594.0, 595.0, 596.0],     [597.0, 598.0, 599.0, 600.0]],    [[601.0, 602.0, 603.0, 604.0],     [605.0, 606.0, 607.0, 608.0],     [609.0, 610.0, 611.0, 612.0],     [613.0, 614.0, 615.0, 616.0],     [617.0, 618.0, 619.0, 620.0],     [621.0, 622.0, 623.0, 624.0]],    [[625.0, 626.0, 627.0, 628.0],     [629.0, 630.0, 631.0, 632.0],     [633.0, 634.0, 635.0, 636.0],     [637.0, 638.0, 639.0, 640.0],     [641.0, 642.0, 643.0, 644.0],     [645.0, 646.0, 647.0, 648.0]],    [[649.0, 650.0, 651.0, 652.0],     [653.0, 654.0, 655.0, 656.0],     [657.0, 658.0, 659.0, 660.0],     [661.0, 662.0, 663.0, 664.0],     [665.0, 666.0, 667.0, 668.0],     [669.0, 670.0, 671.0, 672.0]]],   [[[673.0, 674.0, 675.0, 676.0],     [677.0, 678.0, 679.0, 680.0],     [681.0, 682.0, 683.0, 684.0],     [685.0, 686.0, 687.0, 688.0],     [689.0, 690.0, 691.0, 692.0],     [693.0, 694.0, 695.0, 696.0]],    [[697.0, 698.0, 699.0, 700.0],     [701.0, 702.0, 703.0, 704.0],     [705.0, 706.0, 707.0, 708.0],     [709.0, 710.0, 711.0, 712.0],     [713.0, 714.0, 715.0, 716.0],     [717.0, 718.0, 719.0, 720.0]],    [[721.0, 722.0, 723.0, 724.0],     [725.0, 726.0, 727.0, 728.0],     [729.0, 730.0, 731.0, 732.0],     [733.0, 734.0, 735.0, 736.0],     [737.0, 738.0, 739.0, 740.0],     [741.0, 742.0, 743.0, 744.0]],    [[745.0, 746.0, 747.0, 748.0],     [749.0, 750.0, 751.0, 752.0],     [753.0, 754.0, 755.0, 756.0],     [757.0, 758.0, 759.0, 760.0],     [761.0, 762.0, 763.0, 764.0],     [765.0, 766.0, 767.0, 768.0]],    [[769.0, 770.0, 771.0, 772.0],     [773.0, 774.0, 775.0, 776.0],     [777.0, 778.0, 779.0, 780.0],     [781.0, 782.0, 783.0, 784.0],     [785.0, 786.0, 787.0, 788.0],     [789.0, 790.0, 791.0, 792.0]],    [[793.0, 794.0, 795.0, 796.0],     [797.0, 798.0, 799.0, 800.0],     [801.0, 802.0, 803.0, 804.0],     [805.0, 806.0, 807.0, 808.0],     [809.0, 810.0, 811.0, 812.0],     [813.0, 814.0, 815.0, 816.0]],    [[817.0, 818.0, 819.0, 820.0],     [821.0, 822.0, 823.0, 824.0],     [825.0, 826.0, 827.0, 828.0],     [829.0, 830.0, 831.0, 832.0],     [833.0, 834.0, 835.0, 836.0],     [837.0, 838.0, 839.0, 840.0]]],   [[[841.0, 842.0, 843.0, 844.0],     [845.0, 846.0, 847.0, 848.0],     [849.0, 850.0, 851.0, 852.0],     [853.0, 854.0, 855.0, 856.0],     [857.0, 858.0, 859.0, 860.0],     [861.0, 862.0, 863.0, 864.0]],    [[865.0, 866.0, 867.0, 868.0],     [869.0, 870.0, 871.0, 872.0],     [873.0, 874.0, 875.0, 876.0],     [877.0, 878.0, 879.0, 880.0],     [881.0, 882.0, 883.0, 884.0],     [885.0, 886.0, 887.0, 888.0]],    [[889.0, 890.0, 891.0, 892.0],     [893.0, 894.0, 895.0, 896.0],     [897.0, 898.0, 899.0, 900.0],     [901.0, 902.0, 903.0, 904.0],     [905.0, 906.0, 907.0, 908.0],     [909.0, 910.0, 911.0, 912.0]],    [[913.0, 914.0, 915.0, 916.0],     [917.0, 918.0, 919.0, 920.0],     [921.0, 922.0, 923.0, 924.0],     [925.0, 926.0, 927.0, 928.0],     [929.0, 930.0, 931.0, 932.0],     [933.0, 934.0, 935.0, 936.0]],    [[937.0, 938.0, 939.0, 940.0],     [941.0, 942.0, 943.0, 944.0],     [945.0, 946.0, 947.0, 948.0],     [949.0, 950.0, 951.0, 952.0],     [953.0, 954.0, 955.0, 956.0],     [957.0, 958.0, 959.0, 960.0]],    [[961.0, 962.0, 963.0, 964.0],     [965.0, 966.0, 967.0, 968.0],     [969.0, 970.0, 971.0, 972.0],     [973.0, 974.0, 975.0, 976.0],     [977.0, 978.0, 979.0, 980.0],     [981.0, 982.0, 983.0, 984.0]],    [[985.0, 986.0, 987.0, 988.0],     [989.0, 990.0, 991.0, 992.0],     [993.0, 994.0, 995.0, 996.0],     [997.0, 998.0, 999.0, 1000.0],     [1001.0, 1002.0, 1003.0, 1004.0],     [1005.0, 1006.0, 1007.0, 1008.0]]]],  [[[[1009.0, 1010.0, 1011.0, 1012.0],     [1013.0, 1014.0, 1015.0, 1016.0],     [1017.0, 1018.0, 1019.0, 1020.0],     [1021.0, 1022.0, 1023.0, 1024.0],     [1025.0, 1026.0, 1027.0, 1028.0],     [1029.0, 1030.0, 1031.0, 1032.0]],    [[1033.0, 1034.0, 1035.0, 1036.0],     [1037.0, 1038.0, 1039.0, 1040.0],     [1041.0, 1042.0, 1043.0, 1044.0],     [1045.0, 1046.0, 1047.0, 1048.0],     [1049.0, 1050.0, 1051.0, 1052.0],     [1053.0, 1054.0, 1055.0, 1056.0]],    [[1057.0, 1058.0, 1059.0, 1060.0],     [1061.0, 1062.0, 1063.0, 1064.0],     [1065.0, 1066.0, 1067.0, 1068.0],     [1069.0, 1070.0, 1071.0, 1072.0],     [1073.0, 1074.0, 1075.0, 1076.0],     [1077.0, 1078.0, 1079.0, 1080.0]],    [[1081.0, 1082.0, 1083.0, 1084.0],     [1085.0, 1086.0, 1087.0, 1088.0],     [1089.0, 1090.0, 1091.0, 1092.0],     [1093.0, 1094.0, 1095.0, 1096.0],     [1097.0, 1098.0, 1099.0, 1100.0],     [1101.0, 1102.0, 1103.0, 1104.0]],    [[1105.0, 1106.0, 1107.0, 1108.0],     [1109.0, 1110.0, 1111.0, 1112.0],     [1113.0, 1114.0, 1115.0, 1116.0],     [1117.0, 1118.0, 1119.0, 1120.0],     [1121.0, 1122.0, 1123.0, 1124.0],     [1125.0, 1126.0, 1127.0, 1128.0]],    [[1129.0, 1130.0, 1131.0, 1132.0],     [1133.0, 1134.0, 1135.0, 1136.0],     [1137.0, 1138.0, 1139.0, 1140.0],     [1141.0, 1142.0, 1143.0, 1144.0],     [1145.0, 1146.0, 1147.0, 1148.0],     [1149.0, 1150.0, 1151.0, 1152.0]],    [[1153.0, 1154.0, 1155.0, 1156.0],     [1157.0, 1158.0, 1159.0, 1160.0],     [1161.0, 1162.0, 1163.0, 1164.0],     [1165.0, 1166.0, 1167.0, 1168.0],     [1169.0, 1170.0, 1171.0, 1172.0],     [1173.0, 1174.0, 1175.0, 1176.0]]],   [[[1177.0, 1178.0, 1179.0, 1180.0],     [1181.0, 1182.0, 1183.0, 1184.0],     [1185.0, 1186.0, 1187.0, 1188.0],     [1189.0, 1190.0, 1191.0, 1192.0],     [1193.0, 1194.0, 1195.0, 1196.0],     [1197.0, 1198.0, 1199.0, 1200.0]],    [[1201.0, 1202.0, 1203.0, 1204.0],     [1205.0, 1206.0, 1207.0, 1208.0],     [1209.0, 1210.0, 1211.0, 1212.0],     [1213.0, 1214.0, 1215.0, 1216.0],     [1217.0, 1218.0, 1219.0, 1220.0],     [1221.0, 1222.0, 1223.0, 1224.0]],    [[1225.0, 1226.0, 1227.0, 1228.0],     [1229.0, 1230.0, 1231.0, 1232.0],     [1233.0, 1234.0, 1235.0, 1236.0],     [1237.0, 1238.0, 1239.0, 1240.0],     [1241.0, 1242.0, 1243.0, 1244.0],     [1245.0, 1246.0, 1247.0, 1248.0]],    [[1249.0, 1250.0, 1251.0, 1252.0],     [1253.0, 1254.0, 1255.0, 1256.0],     [1257.0, 1258.0, 1259.0, 1260.0],     [1261.0, 1262.0, 1263.0, 1264.0],     [1265.0, 1266.0, 1267.0, 1268.0],     [1269.0, 1270.0, 1271.0, 1272.0]],    [[1273.0, 1274.0, 1275.0, 1276.0],     [1277.0, 1278.0, 1279.0, 1280.0],     [1281.0, 1282.0, 1283.0, 1284.0],     [1285.0, 1286.0, 1287.0, 1288.0],     [1289.0, 1290.0, 1291.0, 1292.0],     [1293.0, 1294.0, 1295.0, 1296.0]],    [[1297.0, 1298.0, 1299.0, 1300.0],     [1301.0, 1302.0, 1303.0, 1304.0],     [1305.0, 1306.0, 1307.0, 1308.0],     [1309.0, 1310.0, 1311.0, 1312.0],     [1313.0, 1314.0, 1315.0, 1316.0],     [1317.0, 1318.0, 1319.0, 1320.0]],    [[1321.0, 1322.0, 1323.0, 1324.0],     [1325.0, 1326.0, 1327.0, 1328.0],     [1329.0, 1330.0, 1331.0, 1332.0],     [1333.0, 1334.0, 1335.0, 1336.0],     [1337.0, 1338.0, 1339.0, 1340.0],     [1341.0, 1342.0, 1343.0, 1344.0]]],   [[[1345.0, 1346.0, 1347.0, 1348.0],     [1349.0, 1350.0, 1351.0, 1352.0],     [1353.0, 1354.0, 1355.0, 1356.0],     [1357.0, 1358.0, 1359.0, 1360.0],     [1361.0, 1362.0, 1363.0, 1364.0],     [1365.0, 1366.0, 1367.0, 1368.0]],    [[1369.0, 1370.0, 1371.0, 1372.0],     [1373.0, 1374.0, 1375.0, 1376.0],     [1377.0, 1378.0, 1379.0, 1380.0],     [1381.0, 1382.0, 1383.0, 1384.0],     [1385.0, 1386.0, 1387.0, 1388.0],     [1389.0, 1390.0, 1391.0, 1392.0]],    [[1393.0, 1394.0, 1395.0, 1396.0],     [1397.0, 1398.0, 1399.0, 1400.0],     [1401.0, 1402.0, 1403.0, 1404.0],     [1405.0, 1406.0, 1407.0, 1408.0],     [1409.0, 1410.0, 1411.0, 1412.0],     [1413.0, 1414.0, 1415.0, 1416.0]],    [[1417.0, 1418.0, 1419.0, 1420.0],     [1421.0, 1422.0, 1423.0, 1424.0],     [1425.0, 1426.0, 1427.0, 1428.0],     [1429.0, 1430.0, 1431.0, 1432.0],     [1433.0, 1434.0, 1435.0, 1436.0],     [1437.0, 1438.0, 1439.0, 1440.0]],    [[1441.0, 1442.0, 1443.0, 1444.0],     [1445.0, 1446.0, 1447.0, 1448.0],     [1449.0, 1450.0, 1451.0, 1452.0],     [1453.0, 1454.0, 1455.0, 1456.0],     [1457.0, 1458.0, 1459.0, 1460.0],     [1461.0, 1462.0, 1463.0, 1464.0]],    [[1465.0, 1466.0, 1467.0, 1468.0],     [1469.0, 1470.0, 1471.0, 1472.0],     [1473.0, 1474.0, 1475.0, 1476.0],     [1477.0, 1478.0, 1479.0, 1480.0],     [1481.0, 1482.0, 1483.0, 1484.0],     [1485.0, 1486.0, 1487.0, 1488.0]],    [[1489.0, 1490.0, 1491.0, 1492.0],     [1493.0, 1494.0, 1495.0, 1496.0],     [1497.0, 1498.0, 1499.0, 1500.0],     [1501.0, 1502.0, 1503.0, 1504.0],     [1505.0, 1506.0, 1507.0, 1508.0],     [1509.0, 1510.0, 1511.0, 1512.0]]],   [[[1513.0, 1514.0, 1515.0, 1516.0],     [1517.0, 1518.0, 1519.0, 1520.0],     [1521.0, 1522.0, 1523.0, 1524.0],     [1525.0, 1526.0, 1527.0, 1528.0],     [1529.0, 1530.0, 1531.0, 1532.0],     [1533.0, 1534.0, 1535.0, 1536.0]],    [[1537.0, 1538.0, 1539.0, 1540.0],     [1541.0, 1542.0, 1543.0, 1544.0],     [1545.0, 1546.0, 1547.0, 1548.0],     [1549.0, 1550.0, 1551.0, 1552.0],     [1553.0, 1554.0, 1555.0, 1556.0],     [1557.0, 1558.0, 1559.0, 1560.0]],    [[1561.0, 1562.0, 1563.0, 1564.0],     [1565.0, 1566.0, 1567.0, 1568.0],     [1569.0, 1570.0, 1571.0, 1572.0],     [1573.0, 1574.0, 1575.0, 1576.0],     [1577.0, 1578.0, 1579.0, 1580.0],     [1581.0, 1582.0, 1583.0, 1584.0]],    [[1585.0, 1586.0, 1587.0, 1588.0],     [1589.0, 1590.0, 1591.0, 1592.0],     [1593.0, 1594.0, 1595.0, 1596.0],     [1597.0, 1598.0, 1599.0, 1600.0],     [1601.0, 1602.0, 1603.0, 1604.0],     [1605.0, 1606.0, 1607.0, 1608.0]],    [[1609.0, 1610.0, 1611.0, 1612.0],     [1613.0, 1614.0, 1615.0, 1616.0],     [1617.0, 1618.0, 1619.0, 1620.0],     [1621.0, 1622.0, 1623.0, 1624.0],     [1625.0, 1626.0, 1627.0, 1628.0],     [1629.0, 1630.0, 1631.0, 1632.0]],    [[1633.0, 1634.0, 1635.0, 1636.0],     [1637.0, 1638.0, 1639.0, 1640.0],     [1641.0, 1642.0, 1643.0, 1644.0],     [1645.0, 1646.0, 1647.0, 1648.0],     [1649.0, 1650.0, 1651.0, 1652.0],     [1653.0, 1654.0, 1655.0, 1656.0]],    [[1657.0, 1658.0, 1659.0, 1660.0],     [1661.0, 1662.0, 1663.0, 1664.0],     [1665.0, 1666.0, 1667.0, 1668.0],     [1669.0, 1670.0, 1671.0, 1672.0],     [1673.0, 1674.0, 1675.0, 1676.0],     [1677.0, 1678.0, 1679.0, 1680.0]]],   [[[1681.0, 1682.0, 1683.0, 1684.0],     [1685.0, 1686.0, 1687.0, 1688.0],     [1689.0, 1690.0, 1691.0, 1692.0],     [1693.0, 1694.0, 1695.0, 1696.0],     [1697.0, 1698.0, 1699.0, 1700.0],     [1701.0, 1702.0, 1703.0, 1704.0]],    [[1705.0, 1706.0, 1707.0, 1708.0],     [1709.0, 1710.0, 1711.0, 1712.0],     [1713.0, 1714.0, 1715.0, 1716.0],     [1717.0, 1718.0, 1719.0, 1720.0],     [1721.0, 1722.0, 1723.0, 1724.0],     [1725.0, 1726.0, 1727.0, 1728.0]],    [[1729.0, 1730.0, 1731.0, 1732.0],     [1733.0, 1734.0, 1735.0, 1736.0],     [1737.0, 1738.0, 1739.0, 1740.0],     [1741.0, 1742.0, 1743.0, 1744.0],     [1745.0, 1746.0, 1747.0, 1748.0],     [1749.0, 1750.0, 1751.0, 1752.0]],    [[1753.0, 1754.0, 1755.0, 1756.0],     [1757.0, 1758.0, 1759.0, 1760.0],     [1761.0, 1762.0, 1763.0, 1764.0],     [1765.0, 1766.0, 1767.0, 1768.0],     [1769.0, 1770.0, 1771.0, 1772.0],     [1773.0, 1774.0, 1775.0, 1776.0]],    [[1777.0, 1778.0, 1779.0, 1780.0],     [1781.0, 1782.0, 1783.0, 1784.0],     [1785.0, 1786.0, 1787.0, 1788.0],     [1789.0, 1790.0, 1791.0, 1792.0],     [1793.0, 1794.0, 1795.0, 1796.0],     [1797.0, 1798.0, 1799.0, 1800.0]],    [[1801.0, 1802.0, 1803.0, 1804.0],     [1805.0, 1806.0, 1807.0, 1808.0],     [1809.0, 1810.0, 1811.0, 1812.0],     [1813.0, 1814.0, 1815.0, 1816.0],     [1817.0, 1818.0, 1819.0, 1820.0],     [1821.0, 1822.0, 1823.0, 1824.0]],    [[1825.0, 1826.0, 1827.0, 1828.0],     [1829.0, 1830.0, 1831.0, 1832.0],     [1833.0, 1834.0, 1835.0, 1836.0],     [1837.0, 1838.0, 1839.0, 1840.0],     [1841.0, 1842.0, 1843.0, 1844.0],     [1845.0, 1846.0, 1847.0, 1848.0]]],   [[[1849.0, 1850.0, 1851.0, 1852.0],     [1853.0, 1854.0, 1855.0, 1856.0],     [1857.0, 1858.0, 1859.0, 1860.0],     [1861.0, 1862.0, 1863.0, 1864.0],     [1865.0, 1866.0, 1867.0, 1868.0],     [1869.0, 1870.0, 1871.0, 1872.0]],    [[1873.0, 1874.0, 1875.0, 1876.0],     [1877.0, 1878.0, 1879.0, 1880.0],     [1881.0, 1882.0, 1883.0, 1884.0],     [1885.0, 1886.0, 1887.0, 1888.0],     [1889.0, 1890.0, 1891.0, 1892.0],     [1893.0, 1894.0, 1895.0, 1896.0]],    [[1897.0, 1898.0, 1899.0, 1900.0],     [1901.0, 1902.0, 1903.0, 1904.0],     [1905.0, 1906.0, 1907.0, 1908.0],     [1909.0, 1910.0, 1911.0, 1912.0],     [1913.0, 1914.0, 1915.0, 1916.0],     [1917.0, 1918.0, 1919.0, 1920.0]],    [[1921.0, 1922.0, 1923.0, 1924.0],     [1925.0, 1926.0, 1927.0, 1928.0],     [1929.0, 1930.0, 1931.0, 1932.0],     [1933.0, 1934.0, 1935.0, 1936.0],     [1937.0, 1938.0, 1939.0, 1940.0],     [1941.0, 1942.0, 1943.0, 1944.0]],    [[1945.0, 1946.0, 1947.0, 1948.0],     [1949.0, 1950.0, 1951.0, 1952.0],     [1953.0, 1954.0, 1955.0, 1956.0],     [1957.0, 1958.0, 1959.0, 1960.0],     [1961.0, 1962.0, 1963.0, 1964.0],     [1965.0, 1966.0, 1967.0, 1968.0]],    [[1969.0, 1970.0, 1971.0, 1972.0],     [1973.0, 1974.0, 1975.0, 1976.0],     [1977.0, 1978.0, 1979.0, 1980.0],     [1981.0, 1982.0, 1983.0, 1984.0],     [1985.0, 1986.0, 1987.0, 1988.0],     [1989.0, 1990.0, 1991.0, 1992.0]],    [[1993.0, 1994.0, 1995.0, 1996.0],     [1997.0, 1998.0, 1999.0, 2000.0],     [2001.0, 2002.0, 2003.0, 2004.0],     [2005.0, 2006.0, 2007.0, 2008.0],     [2009.0, 2010.0, 2011.0, 2012.0],     [2013.0, 2014.0, 2015.0, 2016.0]]]]] shape=[2, 6, 7, 6, 4], strides=[1008, 168, 24, 4, 1], layout=C (0x1)), I32([3, 2, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[2, 1],  [1, 2],  [2, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2228840940 4243776559 3263077396 412719432 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[[1.0]]],     [[[2.0]]],     [[[3.0]]]],    [[[[4.0]]],     [[[5.0]]],     [[[6.0]]]]],   [[[[[7.0]]],     [[[8.0]]],     [[[9.0]]]],    [[[[10.0]]],     [[[11.0]]],     [[[12.0]]]]],   [[[[[13.0]]],     [[[14.0]]],     [[[15.0]]]],    [[[[16.0]]],     [[[17.0]]],     [[[18.0]]]]],   [[[[[19.0]]],     [[[20.0]]],     [[[21.0]]]],    [[[[22.0]]],     [[[23.0]]],     [[[24.0]]]]],   [[[[[25.0]]],     [[[26.0]]],     [[[27.0]]]],    [[[[28.0]]],     [[[29.0]]],     [[[30.0]]]]],   [[[[[31.0]]],     [[[32.0]]],     [[[33.0]]]],    [[[[34.0]]],     [[[35.0]]],     [[[36.0]]]]]],  [[[[[[37.0]]],     [[[38.0]]],     [[[39.0]]]],    [[[[40.0]]],     [[[41.0]]],     [[[42.0]]]]],   [[[[[43.0]]],     [[[44.0]]],     [[[45.0]]]],    [[[[46.0]]],     [[[47.0]]],     [[[48.0]]]]],   [[[[[49.0]]],     [[[50.0]]],     [[[51.0]]]],    [[[[52.0]]],     [[[53.0]]],     [[[54.0]]]]],   [[[[[55.0]]],     [[[56.0]]],     [[[57.0]]]],    [[[[58.0]]],     [[[59.0]]],     [[[60.0]]]]],   [[[[[61.0]]],     [[[62.0]]],     [[[63.0]]]],    [[[[64.0]]],     [[[65.0]]],     [[[66.0]]]]],   [[[[[67.0]]],     [[[68.0]]],     [[[69.0]]]],    [[[[70.0]]],     [[[71.0]]],     [[[72.0]]]]]],  [[[[[[73.0]]],     [[[74.0]]],     [[[75.0]]]],    [[[[76.0]]],     [[[77.0]]],     [[[78.0]]]]],   [[[[[79.0]]],     [[[80.0]]],     [[[81.0]]]],    [[[[82.0]]],     [[[83.0]]],     [[[84.0]]]]],   [[[[[85.0]]],     [[[86.0]]],     [[[87.0]]]],    [[[[88.0]]],     [[[89.0]]],     [[[90.0]]]]],   [[[[[91.0]]],     [[[92.0]]],     [[[93.0]]]],    [[[[94.0]]],     [[[95.0]]],     [[[96.0]]]]],   [[[[[97.0]]],     [[[98.0]]],     [[[99.0]]]],    [[[[100.0]]],     [[[101.0]]],     [[[102.0]]]]],   [[[[[103.0]]],     [[[104.0]]],     [[[105.0]]]],    [[[[106.0]]],     [[[107.0]]],     [[[108.0]]]]]]] shape=[3, 6, 2, 3, 1, 1, 1], strides=[36, 6, 3, 1, 1, 1, 1], layout=C (0x1)), I32([1, 1, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[1, 1],  [3, 1],  [0, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2924052819 2525477587 82501562 1581537078 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],     [7.0, 8.0, 9.0, 10.0, 11.0, 12.0],     [13.0, 14.0, 15.0, 16.0, 17.0, 18.0],     [19.0, 20.0, 21.0, 22.0, 23.0, 24.0],     [25.0, 26.0, 27.0, 28.0, 29.0, 30.0]],    [[31.0, 32.0, 33.0, 34.0, 35.0, 36.0],     [37.0, 38.0, 39.0, 40.0, 41.0, 42.0],     [43.0, 44.0, 45.0, 46.0, 47.0, 48.0],     [49.0, 50.0, 51.0, 52.0, 53.0, 54.0],     [55.0, 56.0, 57.0, 58.0, 59.0, 60.0]],    [[61.0, 62.0, 63.0, 64.0, 65.0, 66.0],     [67.0, 68.0, 69.0, 70.0, 71.0, 72.0],     [73.0, 74.0, 75.0, 76.0, 77.0, 78.0],     [79.0, 80.0, 81.0, 82.0, 83.0, 84.0],     [85.0, 86.0, 87.0, 88.0, 89.0, 90.0]],    [[91.0, 92.0, 93.0, 94.0, 95.0, 96.0],     [97.0, 98.0, 99.0, 100.0, 101.0, 102.0],     [103.0, 104.0, 105.0, 106.0, 107.0, 108.0],     [109.0, 110.0, 111.0, 112.0, 113.0, 114.0],     [115.0, 116.0, 117.0, 118.0, 119.0, 120.0]],    [[121.0, 122.0, 123.0, 124.0, 125.0, 126.0],     [127.0, 128.0, 129.0, 130.0, 131.0, 132.0],     [133.0, 134.0, 135.0, 136.0, 137.0, 138.0],     [139.0, 140.0, 141.0, 142.0, 143.0, 144.0],     [145.0, 146.0, 147.0, 148.0, 149.0, 150.0]],    [[151.0, 152.0, 153.0, 154.0, 155.0, 156.0],     [157.0, 158.0, 159.0, 160.0, 161.0, 162.0],     [163.0, 164.0, 165.0, 166.0, 167.0, 168.0],     [169.0, 170.0, 171.0, 172.0, 173.0, 174.0],     [175.0, 176.0, 177.0, 178.0, 179.0, 180.0]]],   [[[181.0, 182.0, 183.0, 184.0, 185.0, 186.0],     [187.0, 188.0, 189.0, 190.0, 191.0, 192.0],     [193.0, 194.0, 195.0, 196.0, 197.0, 198.0],     [199.0, 200.0, 201.0, 202.0, 203.0, 204.0],     [205.0, 206.0, 207.0, 208.0, 209.0, 210.0]],    [[211.0, 212.0, 213.0, 214.0, 215.0, 216.0],     [217.0, 218.0, 219.0, 220.0, 221.0, 222.0],     [223.0, 224.0, 225.0, 226.0, 227.0, 228.0],     [229.0, 230.0, 231.0, 232.0, 233.0, 234.0],     [235.0, 236.0, 237.0, 238.0, 239.0, 240.0]],    [[241.0, 242.0, 243.0, 244.0, 245.0, 246.0],     [247.0, 248.0, 249.0, 250.0, 251.0, 252.0],     [253.0, 254.0, 255.0, 256.0, 257.0, 258.0],     [259.0, 260.0, 261.0, 262.0, 263.0, 264.0],     [265.0, 266.0, 267.0, 268.0, 269.0, 270.0]],    [[271.0, 272.0, 273.0, 274.0, 275.0, 276.0],     [277.0, 278.0, 279.0, 280.0, 281.0, 282.0],     [283.0, 284.0, 285.0, 286.0, 287.0, 288.0],     [289.0, 290.0, 291.0, 292.0, 293.0, 294.0],     [295.0, 296.0, 297.0, 298.0, 299.0, 300.0]],    [[301.0, 302.0, 303.0, 304.0, 305.0, 306.0],     [307.0, 308.0, 309.0, 310.0, 311.0, 312.0],     [313.0, 314.0, 315.0, 316.0, 317.0, 318.0],     [319.0, 320.0, 321.0, 322.0, 323.0, 324.0],     [325.0, 326.0, 327.0, 328.0, 329.0, 330.0]],    [[331.0, 332.0, 333.0, 334.0, 335.0, 336.0],     [337.0, 338.0, 339.0, 340.0, 341.0, 342.0],     [343.0, 344.0, 345.0, 346.0, 347.0, 348.0],     [349.0, 350.0, 351.0, 352.0, 353.0, 354.0],     [355.0, 356.0, 357.0, 358.0, 359.0, 360.0]]],   [[[361.0, 362.0, 363.0, 364.0, 365.0, 366.0],     [367.0, 368.0, 369.0, 370.0, 371.0, 372.0],     [373.0, 374.0, 375.0, 376.0, 377.0, 378.0],     [379.0, 380.0, 381.0, 382.0, 383.0, 384.0],     [385.0, 386.0, 387.0, 388.0, 389.0, 390.0]],    [[391.0, 392.0, 393.0, 394.0, 395.0, 396.0],     [397.0, 398.0, 399.0, 400.0, 401.0, 402.0],     [403.0, 404.0, 405.0, 406.0, 407.0, 408.0],     [409.0, 410.0, 411.0, 412.0, 413.0, 414.0],     [415.0, 416.0, 417.0, 418.0, 419.0, 420.0]],    [[421.0, 422.0, 423.0, 424.0, 425.0, 426.0],     [427.0, 428.0, 429.0, 430.0, 431.0, 432.0],     [433.0, 434.0, 435.0, 436.0, 437.0, 438.0],     [439.0, 440.0, 441.0, 442.0, 443.0, 444.0],     [445.0, 446.0, 447.0, 448.0, 449.0, 450.0]],    [[451.0, 452.0, 453.0, 454.0, 455.0, 456.0],     [457.0, 458.0, 459.0, 460.0, 461.0, 462.0],     [463.0, 464.0, 465.0, 466.0, 467.0, 468.0],     [469.0, 470.0, 471.0, 472.0, 473.0, 474.0],     [475.0, 476.0, 477.0, 478.0, 479.0, 480.0]],    [[481.0, 482.0, 483.0, 484.0, 485.0, 486.0],     [487.0, 488.0, 489.0, 490.0, 491.0, 492.0],     [493.0, 494.0, 495.0, 496.0, 497.0, 498.0],     [499.0, 500.0, 501.0, 502.0, 503.0, 504.0],     [505.0, 506.0, 507.0, 508.0, 509.0, 510.0]],    [[511.0, 512.0, 513.0, 514.0, 515.0, 516.0],     [517.0, 518.0, 519.0, 520.0, 521.0, 522.0],     [523.0, 524.0, 525.0, 526.0, 527.0, 528.0],     [529.0, 530.0, 531.0, 532.0, 533.0, 534.0],     [535.0, 536.0, 537.0, 538.0, 539.0, 540.0]]],   [[[541.0, 542.0, 543.0, 544.0, 545.0, 546.0],     [547.0, 548.0, 549.0, 550.0, 551.0, 552.0],     [553.0, 554.0, 555.0, 556.0, 557.0, 558.0],     [559.0, 560.0, 561.0, 562.0, 563.0, 564.0],     [565.0, 566.0, 567.0, 568.0, 569.0, 570.0]],    [[571.0, 572.0, 573.0, 574.0, 575.0, 576.0],     [577.0, 578.0, 579.0, 580.0, 581.0, 582.0],     [583.0, 584.0, 585.0, 586.0, 587.0, 588.0],     [589.0, 590.0, 591.0, 592.0, 593.0, 594.0],     [595.0, 596.0, 597.0, 598.0, 599.0, 600.0]],    [[601.0, 602.0, 603.0, 604.0, 605.0, 606.0],     [607.0, 608.0, 609.0, 610.0, 611.0, 612.0],     [613.0, 614.0, 615.0, 616.0, 617.0, 618.0],     [619.0, 620.0, 621.0, 622.0, 623.0, 624.0],     [625.0, 626.0, 627.0, 628.0, 629.0, 630.0]],    [[631.0, 632.0, 633.0, 634.0, 635.0, 636.0],     [637.0, 638.0, 639.0, 640.0, 641.0, 642.0],     [643.0, 644.0, 645.0, 646.0, 647.0, 648.0],     [649.0, 650.0, 651.0, 652.0, 653.0, 654.0],     [655.0, 656.0, 657.0, 658.0, 659.0, 660.0]],    [[661.0, 662.0, 663.0, 664.0, 665.0, 666.0],     [667.0, 668.0, 669.0, 670.0, 671.0, 672.0],     [673.0, 674.0, 675.0, 676.0, 677.0, 678.0],     [679.0, 680.0, 681.0, 682.0, 683.0, 684.0],     [685.0, 686.0, 687.0, 688.0, 689.0, 690.0]],    [[691.0, 692.0, 693.0, 694.0, 695.0, 696.0],     [697.0, 698.0, 699.0, 700.0, 701.0, 702.0],     [703.0, 704.0, 705.0, 706.0, 707.0, 708.0],     [709.0, 710.0, 711.0, 712.0, 713.0, 714.0],     [715.0, 716.0, 717.0, 718.0, 719.0, 720.0]]]],  [[[[721.0, 722.0, 723.0, 724.0, 725.0, 726.0],     [727.0, 728.0, 729.0, 730.0, 731.0, 732.0],     [733.0, 734.0, 735.0, 736.0, 737.0, 738.0],     [739.0, 740.0, 741.0, 742.0, 743.0, 744.0],     [745.0, 746.0, 747.0, 748.0, 749.0, 750.0]],    [[751.0, 752.0, 753.0, 754.0, 755.0, 756.0],     [757.0, 758.0, 759.0, 760.0, 761.0, 762.0],     [763.0, 764.0, 765.0, 766.0, 767.0, 768.0],     [769.0, 770.0, 771.0, 772.0, 773.0, 774.0],     [775.0, 776.0, 777.0, 778.0, 779.0, 780.0]],    [[781.0, 782.0, 783.0, 784.0, 785.0, 786.0],     [787.0, 788.0, 789.0, 790.0, 791.0, 792.0],     [793.0, 794.0, 795.0, 796.0, 797.0, 798.0],     [799.0, 800.0, 801.0, 802.0, 803.0, 804.0],     [805.0, 806.0, 807.0, 808.0, 809.0, 810.0]],    [[811.0, 812.0, 813.0, 814.0, 815.0, 816.0],     [817.0, 818.0, 819.0, 820.0, 821.0, 822.0],     [823.0, 824.0, 825.0, 826.0, 827.0, 828.0],     [829.0, 830.0, 831.0, 832.0, 833.0, 834.0],     [835.0, 836.0, 837.0, 838.0, 839.0, 840.0]],    [[841.0, 842.0, 843.0, 844.0, 845.0, 846.0],     [847.0, 848.0, 849.0, 850.0, 851.0, 852.0],     [853.0, 854.0, 855.0, 856.0, 857.0, 858.0],     [859.0, 860.0, 861.0, 862.0, 863.0, 864.0],     [865.0, 866.0, 867.0, 868.0, 869.0, 870.0]],    [[871.0, 872.0, 873.0, 874.0, 875.0, 876.0],     [877.0, 878.0, 879.0, 880.0, 881.0, 882.0],     [883.0, 884.0, 885.0, 886.0, 887.0, 888.0],     [889.0, 890.0, 891.0, 892.0, 893.0, 894.0],     [895.0, 896.0, 897.0, 898.0, 899.0, 900.0]]],   [[[901.0, 902.0, 903.0, 904.0, 905.0, 906.0],     [907.0, 908.0, 909.0, 910.0, 911.0, 912.0],     [913.0, 914.0, 915.0, 916.0, 917.0, 918.0],     [919.0, 920.0, 921.0, 922.0, 923.0, 924.0],     [925.0, 926.0, 927.0, 928.0, 929.0, 930.0]],    [[931.0, 932.0, 933.0, 934.0, 935.0, 936.0],     [937.0, 938.0, 939.0, 940.0, 941.0, 942.0],     [943.0, 944.0, 945.0, 946.0, 947.0, 948.0],     [949.0, 950.0, 951.0, 952.0, 953.0, 954.0],     [955.0, 956.0, 957.0, 958.0, 959.0, 960.0]],    [[961.0, 962.0, 963.0, 964.0, 965.0, 966.0],     [967.0, 968.0, 969.0, 970.0, 971.0, 972.0],     [973.0, 974.0, 975.0, 976.0, 977.0, 978.0],     [979.0, 980.0, 981.0, 982.0, 983.0, 984.0],     [985.0, 986.0, 987.0, 988.0, 989.0, 990.0]],    [[991.0, 992.0, 993.0, 994.0, 995.0, 996.0],     [997.0, 998.0, 999.0, 1000.0, 1001.0, 1002.0],     [1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0],     [1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0],     [1015.0, 1016.0, 1017.0, 1018.0, 1019.0, 1020.0]],    [[1021.0, 1022.0, 1023.0, 1024.0, 1025.0, 1026.0],     [1027.0, 1028.0, 1029.0, 1030.0, 1031.0, 1032.0],     [1033.0, 1034.0, 1035.0, 1036.0, 1037.0, 1038.0],     [1039.0, 1040.0, 1041.0, 1042.0, 1043.0, 1044.0],     [1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0]],    [[1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0],     [1057.0, 1058.0, 1059.0, 1060.0, 1061.0, 1062.0],     [1063.0, 1064.0, 1065.0, 1066.0, 1067.0, 1068.0],     [1069.0, 1070.0, 1071.0, 1072.0, 1073.0, 1074.0],     [1075.0, 1076.0, 1077.0, 1078.0, 1079.0, 1080.0]]],   [[[1081.0, 1082.0, 1083.0, 1084.0, 1085.0, 1086.0],     [1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0],     [1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0],     [1099.0, 1100.0, 1101.0, 1102.0, 1103.0, 1104.0],     [1105.0, 1106.0, 1107.0, 1108.0, 1109.0, 1110.0]],    [[1111.0, 1112.0, 1113.0, 1114.0, 1115.0, 1116.0],     [1117.0, 1118.0, 1119.0, 1120.0, 1121.0, 1122.0],     [1123.0, 1124.0, 1125.0, 1126.0, 1127.0, 1128.0],     [1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0],     [1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0]],    [[1141.0, 1142.0, 1143.0, 1144.0, 1145.0, 1146.0],     [1147.0, 1148.0, 1149.0, 1150.0, 1151.0, 1152.0],     [1153.0, 1154.0, 1155.0, 1156.0, 1157.0, 1158.0],     [1159.0, 1160.0, 1161.0, 1162.0, 1163.0, 1164.0],     [1165.0, 1166.0, 1167.0, 1168.0, 1169.0, 1170.0]],    [[1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0],     [1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0],     [1183.0, 1184.0, 1185.0, 1186.0, 1187.0, 1188.0],     [1189.0, 1190.0, 1191.0, 1192.0, 1193.0, 1194.0],     [1195.0, 1196.0, 1197.0, 1198.0, 1199.0, 1200.0]],    [[1201.0, 1202.0, 1203.0, 1204.0, 1205.0, 1206.0],     [1207.0, 1208.0, 1209.0, 1210.0, 1211.0, 1212.0],     [1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0],     [1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0],     [1225.0, 1226.0, 1227.0, 1228.0, 1229.0, 1230.0]],    [[1231.0, 1232.0, 1233.0, 1234.0, 1235.0, 1236.0],     [1237.0, 1238.0, 1239.0, 1240.0, 1241.0, 1242.0],     [1243.0, 1244.0, 1245.0, 1246.0, 1247.0, 1248.0],     [1249.0, 1250.0, 1251.0, 1252.0, 1253.0, 1254.0],     [1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]],   [[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0],     [1267.0, 1268.0, 1269.0, 1270.0, 1271.0, 1272.0],     [1273.0, 1274.0, 1275.0, 1276.0, 1277.0, 1278.0],     [1279.0, 1280.0, 1281.0, 1282.0, 1283.0, 1284.0],     [1285.0, 1286.0, 1287.0, 1288.0, 1289.0, 1290.0]],    [[1291.0, 1292.0, 1293.0, 1294.0, 1295.0, 1296.0],     [1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0],     [1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0],     [1309.0, 1310.0, 1311.0, 1312.0, 1313.0, 1314.0],     [1315.0, 1316.0, 1317.0, 1318.0, 1319.0, 1320.0]],    [[1321.0, 1322.0, 1323.0, 1324.0, 1325.0, 1326.0],     [1327.0, 1328.0, 1329.0, 1330.0, 1331.0, 1332.0],     [1333.0, 1334.0, 1335.0, 1336.0, 1337.0, 1338.0],     [1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0],     [1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0]],    [[1351.0, 1352.0, 1353.0, 1354.0, 1355.0, 1356.0],     [1357.0, 1358.0, 1359.0, 1360.0, 1361.0, 1362.0],     [1363.0, 1364.0, 1365.0, 1366.0, 1367.0, 1368.0],     [1369.0, 1370.0, 1371.0, 1372.0, 1373.0, 1374.0],     [1375.0, 1376.0, 1377.0, 1378.0, 1379.0, 1380.0]],    [[1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0],     [1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0],     [1393.0, 1394.0, 1395.0, 1396.0, 1397.0, 1398.0],     [1399.0, 1400.0, 1401.0, 1402.0, 1403.0, 1404.0],     [1405.0, 1406.0, 1407.0, 1408.0, 1409.0, 1410.0]],    [[1411.0, 1412.0, 1413.0, 1414.0, 1415.0, 1416.0],     [1417.0, 1418.0, 1419.0, 1420.0, 1421.0, 1422.0],     [1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0],     [1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0],     [1435.0, 1436.0, 1437.0, 1438.0, 1439.0, 1440.0]]]]] shape=[2, 4, 6, 5, 6], strides=[720, 180, 30, 6, 1], layout=C (0x1)), I32([2, 2] shape=[2], strides=[1], layout=C | F (0x3)), I32([[3, 1],  [1, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))
xs 3858399272 3935243603 3174388364 3824137134 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0]],     [[6.0, 7.0, 8.0, 9.0, 10.0]],     [[11.0, 12.0, 13.0, 14.0, 15.0]]],    [[[16.0, 17.0, 18.0, 19.0, 20.0]],     [[21.0, 22.0, 23.0, 24.0, 25.0]],     [[26.0, 27.0, 28.0, 29.0, 30.0]]],    [[[31.0, 32.0, 33.0, 34.0, 35.0]],     [[36.0, 37.0, 38.0, 39.0, 40.0]],     [[41.0, 42.0, 43.0, 44.0, 45.0]]],    [[[46.0, 47.0, 48.0, 49.0, 50.0]],     [[51.0, 52.0, 53.0, 54.0, 55.0]],     [[56.0, 57.0, 58.0, 59.0, 60.0]]],    [[[61.0, 62.0, 63.0, 64.0, 65.0]],     [[66.0, 67.0, 68.0, 69.0, 70.0]],     [[71.0, 72.0, 73.0, 74.0, 75.0]]]],   [[[[76.0, 77.0, 78.0, 79.0, 80.0]],     [[81.0, 82.0, 83.0, 84.0, 85.0]],     [[86.0, 87.0, 88.0, 89.0, 90.0]]],    [[[91.0, 92.0, 93.0, 94.0, 95.0]],     [[96.0, 97.0, 98.0, 99.0, 100.0]],     [[101.0, 102.0, 103.0, 104.0, 105.0]]],    [[[106.0, 107.0, 108.0, 109.0, 110.0]],     [[111.0, 112.0, 113.0, 114.0, 115.0]],     [[116.0, 117.0, 118.0, 119.0, 120.0]]],    [[[121.0, 122.0, 123.0, 124.0, 125.0]],     [[126.0, 127.0, 128.0, 129.0, 130.0]],     [[131.0, 132.0, 133.0, 134.0, 135.0]]],    [[[136.0, 137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0, 145.0]],     [[146.0, 147.0, 148.0, 149.0, 150.0]]]],   [[[[151.0, 152.0, 153.0, 154.0, 155.0]],     [[156.0, 157.0, 158.0, 159.0, 160.0]],     [[161.0, 162.0, 163.0, 164.0, 165.0]]],    [[[166.0, 167.0, 168.0, 169.0, 170.0]],     [[171.0, 172.0, 173.0, 174.0, 175.0]],     [[176.0, 177.0, 178.0, 179.0, 180.0]]],    [[[181.0, 182.0, 183.0, 184.0, 185.0]],     [[186.0, 187.0, 188.0, 189.0, 190.0]],     [[191.0, 192.0, 193.0, 194.0, 195.0]]],    [[[196.0, 197.0, 198.0, 199.0, 200.0]],     [[201.0, 202.0, 203.0, 204.0, 205.0]],     [[206.0, 207.0, 208.0, 209.0, 210.0]]],    [[[211.0, 212.0, 213.0, 214.0, 215.0]],     [[216.0, 217.0, 218.0, 219.0, 220.0]],     [[221.0, 222.0, 223.0, 224.0, 225.0]]]]],  [[[[[226.0, 227.0, 228.0, 229.0, 230.0]],     [[231.0, 232.0, 233.0, 234.0, 235.0]],     [[236.0, 237.0, 238.0, 239.0, 240.0]]],    [[[241.0, 242.0, 243.0, 244.0, 245.0]],     [[246.0, 247.0, 248.0, 249.0, 250.0]],     [[251.0, 252.0, 253.0, 254.0, 255.0]]],    [[[256.0, 257.0, 258.0, 259.0, 260.0]],     [[261.0, 262.0, 263.0, 264.0, 265.0]],     [[266.0, 267.0, 268.0, 269.0, 270.0]]],    [[[271.0, 272.0, 273.0, 274.0, 275.0]],     [[276.0, 277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0, 285.0]]],    [[[286.0, 287.0, 288.0, 289.0, 290.0]],     [[291.0, 292.0, 293.0, 294.0, 295.0]],     [[296.0, 297.0, 298.0, 299.0, 300.0]]]],   [[[[301.0, 302.0, 303.0, 304.0, 305.0]],     [[306.0, 307.0, 308.0, 309.0, 310.0]],     [[311.0, 312.0, 313.0, 314.0, 315.0]]],    [[[316.0, 317.0, 318.0, 319.0, 320.0]],     [[321.0, 322.0, 323.0, 324.0, 325.0]],     [[326.0, 327.0, 328.0, 329.0, 330.0]]],    [[[331.0, 332.0, 333.0, 334.0, 335.0]],     [[336.0, 337.0, 338.0, 339.0, 340.0]],     [[341.0, 342.0, 343.0, 344.0, 345.0]]],    [[[346.0, 347.0, 348.0, 349.0, 350.0]],     [[351.0, 352.0, 353.0, 354.0, 355.0]],     [[356.0, 357.0, 358.0, 359.0, 360.0]]],    [[[361.0, 362.0, 363.0, 364.0, 365.0]],     [[366.0, 367.0, 368.0, 369.0, 370.0]],     [[371.0, 372.0, 373.0, 374.0, 375.0]]]],   [[[[376.0, 377.0, 378.0, 379.0, 380.0]],     [[381.0, 382.0, 383.0, 384.0, 385.0]],     [[386.0, 387.0, 388.0, 389.0, 390.0]]],    [[[391.0, 392.0, 393.0, 394.0, 395.0]],     [[396.0, 397.0, 398.0, 399.0, 400.0]],     [[401.0, 402.0, 403.0, 404.0, 405.0]]],    [[[406.0, 407.0, 408.0, 409.0, 410.0]],     [[411.0, 412.0, 413.0, 414.0, 415.0]],     [[416.0, 417.0, 418.0, 419.0, 420.0]]],    [[[421.0, 422.0, 423.0, 424.0, 425.0]],     [[426.0, 427.0, 428.0, 429.0, 430.0]],     [[431.0, 432.0, 433.0, 434.0, 435.0]]],    [[[436.0, 437.0, 438.0, 439.0, 440.0]],     [[441.0, 442.0, 443.0, 444.0, 445.0]],     [[446.0, 447.0, 448.0, 449.0, 450.0]]]]]] shape=[2, 3, 5, 3, 1, 5], strides=[225, 75, 15, 5, 5, 1], layout=C (0x1)), I32([3, 2, 1] shape=[3], strides=[1], layout=C | F (0x3)), I32([[3, 3],  [2, 1],  [1, 1]] shape=[3, 2], strides=[2, 1], layout=C (0x1)))
xs 2202615424 2184759976 2294289092 2174890675 # shrinks to (ref i, ref bs, ref p) = (F32([[[[[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],      [8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0],      [15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0],      [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0]],     [[29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0],      [36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0],      [43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0],      [50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0]],     [[57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0],      [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],      [71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0],      [78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0]]],    [[[85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0],      [92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0],      [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0],      [106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0]],     [[113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0],      [120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0],      [127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0],      [134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0]],     [[141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0],      [148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0],      [155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0],      [162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0]]],    [[[169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0],      [176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0],      [183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0],      [190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0]],     [[197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0],      [204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0],      [211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0],      [218.0, 219.0, 220.0, 221.0, 222.0, 223.0, 224.0]],     [[225.0, 226.0, 227.0, 228.0, 229.0, 230.0, 231.0],      [232.0, 233.0, 234.0, 235.0, 236.0, 237.0, 238.0],      [239.0, 240.0, 241.0, 242.0, 243.0, 244.0, 245.0],      [246.0, 247.0, 248.0, 249.0, 250.0, 251.0, 252.0]]],    [[[253.0, 254.0, 255.0, 256.0, 257.0, 258.0, 259.0],      [260.0, 261.0, 262.0, 263.0, 264.0, 265.0, 266.0],      [267.0, 268.0, 269.0, 270.0, 271.0, 272.0, 273.0],      [274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0]],     [[281.0, 282.0, 283.0, 284.0, 285.0, 286.0, 287.0],      [288.0, 289.0, 290.0, 291.0, 292.0, 293.0, 294.0],      [295.0, 296.0, 297.0, 298.0, 299.0, 300.0, 301.0],      [302.0, 303.0, 304.0, 305.0, 306.0, 307.0, 308.0]],     [[309.0, 310.0, 311.0, 312.0, 313.0, 314.0, 315.0],      [316.0, 317.0, 318.0, 319.0, 320.0, 321.0, 322.0],      [323.0, 324.0, 325.0, 326.0, 327.0, 328.0, 329.0],      [330.0, 331.0, 332.0, 333.0, 334.0, 335.0, 336.0]]],    [[[337.0, 338.0, 339.0, 340.0, 341.0, 342.0, 343.0],      [344.0, 345.0, 346.0, 347.0, 348.0, 349.0, 350.0],      [351.0, 352.0, 353.0, 354.0, 355.0, 356.0, 357.0],      [358.0, 359.0, 360.0, 361.0, 362.0, 363.0, 364.0]],     [[365.0, 366.0, 367.0, 368.0, 369.0, 370.0, 371.0],      [372.0, 373.0, 374.0, 375.0, 376.0, 377.0, 378.0],      [379.0, 380.0, 381.0, 382.0, 383.0, 384.0, 385.0],      [386.0, 387.0, 388.0, 389.0, 390.0, 391.0, 392.0]],     [[393.0, 394.0, 395.0, 396.0, 397.0, 398.0, 399.0],      [400.0, 401.0, 402.0, 403.0, 404.0, 405.0, 406.0],      [407.0, 408.0, 409.0, 410.0, 411.0, 412.0, 413.0],      [414.0, 415.0, 416.0, 417.0, 418.0, 419.0, 420.0]]]],   [[[[421.0, 422.0, 423.0, 424.0, 425.0, 426.0, 427.0],      [428.0, 429.0, 430.0, 431.0, 432.0, 433.0, 434.0],      [435.0, 436.0, 437.0, 438.0, 439.0, 440.0, 441.0],      [442.0, 443.0, 444.0, 445.0, 446.0, 447.0, 448.0]],     [[449.0, 450.0, 451.0, 452.0, 453.0, 454.0, 455.0],      [456.0, 457.0, 458.0, 459.0, 460.0, 461.0, 462.0],      [463.0, 464.0, 465.0, 466.0, 467.0, 468.0, 469.0],      [470.0, 471.0, 472.0, 473.0, 474.0, 475.0, 476.0]],     [[477.0, 478.0, 479.0, 480.0, 481.0, 482.0, 483.0],      [484.0, 485.0, 486.0, 487.0, 488.0, 489.0, 490.0],      [491.0, 492.0, 493.0, 494.0, 495.0, 496.0, 497.0],      [498.0, 499.0, 500.0, 501.0, 502.0, 503.0, 504.0]]],    [[[505.0, 506.0, 507.0, 508.0, 509.0, 510.0, 511.0],      [512.0, 513.0, 514.0, 515.0, 516.0, 517.0, 518.0],      [519.0, 520.0, 521.0, 522.0, 523.0, 524.0, 525.0],      [526.0, 527.0, 528.0, 529.0, 530.0, 531.0, 532.0]],     [[533.0, 534.0, 535.0, 536.0, 537.0, 538.0, 539.0],      [540.0, 541.0, 542.0, 543.0, 544.0, 545.0, 546.0],      [547.0, 548.0, 549.0, 550.0, 551.0, 552.0, 553.0],      [554.0, 555.0, 556.0, 557.0, 558.0, 559.0, 560.0]],     [[561.0, 562.0, 563.0, 564.0, 565.0, 566.0, 567.0],      [568.0, 569.0, 570.0, 571.0, 572.0, 573.0, 574.0],      [575.0, 576.0, 577.0, 578.0, 579.0, 580.0, 581.0],      [582.0, 583.0, 584.0, 585.0, 586.0, 587.0, 588.0]]],    [[[589.0, 590.0, 591.0, 592.0, 593.0, 594.0, 595.0],      [596.0, 597.0, 598.0, 599.0, 600.0, 601.0, 602.0],      [603.0, 604.0, 605.0, 606.0, 607.0, 608.0, 609.0],      [610.0, 611.0, 612.0, 613.0, 614.0, 615.0, 616.0]],     [[617.0, 618.0, 619.0, 620.0, 621.0, 622.0, 623.0],      [624.0, 625.0, 626.0, 627.0, 628.0, 629.0, 630.0],      [631.0, 632.0, 633.0, 634.0, 635.0, 636.0, 637.0],      [638.0, 639.0, 640.0, 641.0, 642.0, 643.0, 644.0]],     [[645.0, 646.0, 647.0, 648.0, 649.0, 650.0, 651.0],      [652.0, 653.0, 654.0, 655.0, 656.0, 657.0, 658.0],      [659.0, 660.0, 661.0, 662.0, 663.0, 664.0, 665.0],      [666.0, 667.0, 668.0, 669.0, 670.0, 671.0, 672.0]]],    [[[673.0, 674.0, 675.0, 676.0, 677.0, 678.0, 679.0],      [680.0, 681.0, 682.0, 683.0, 684.0, 685.0, 686.0],      [687.0, 688.0, 689.0, 690.0, 691.0, 692.0, 693.0],      [694.0, 695.0, 696.0, 697.0, 698.0, 699.0, 700.0]],     [[701.0, 702.0, 703.0, 704.0, 705.0, 706.0, 707.0],      [708.0, 709.0, 710.0, 711.0, 712.0, 713.0, 714.0],      [715.0, 716.0, 717.0, 718.0, 719.0, 720.0, 721.0],      [722.0, 723.0, 724.0, 725.0, 726.0, 727.0, 728.0]],     [[729.0, 730.0, 731.0, 732.0, 733.0, 734.0, 735.0],      [736.0, 737.0, 738.0, 739.0, 740.0, 741.0, 742.0],      [743.0, 744.0, 745.0, 746.0, 747.0, 748.0, 749.0],      [750.0, 751.0, 752.0, 753.0, 754.0, 755.0, 756.0]]],    [[[757.0, 758.0, 759.0, 760.0, 761.0, 762.0, 763.0],      [764.0, 765.0, 766.0, 767.0, 768.0, 769.0, 770.0],      [771.0, 772.0, 773.0, 774.0, 775.0, 776.0, 777.0],      [778.0, 779.0, 780.0, 781.0, 782.0, 783.0, 784.0]],     [[785.0, 786.0, 787.0, 788.0, 789.0, 790.0, 791.0],      [792.0, 793.0, 794.0, 795.0, 796.0, 797.0, 798.0],      [799.0, 800.0, 801.0, 802.0, 803.0, 804.0, 805.0],      [806.0, 807.0, 808.0, 809.0, 810.0, 811.0, 812.0]],     [[813.0, 814.0, 815.0, 816.0, 817.0, 818.0, 819.0],      [820.0, 821.0, 822.0, 823.0, 824.0, 825.0, 826.0],      [827.0, 828.0, 829.0, 830.0, 831.0, 832.0, 833.0],      [834.0, 835.0, 836.0, 837.0, 838.0, 839.0, 840.0]]]],   [[[[841.0, 842.0, 843.0, 844.0, 845.0, 846.0, 847.0],      [848.0, 849.0, 850.0, 851.0, 852.0, 853.0, 854.0],      [855.0, 856.0, 857.0, 858.0, 859.0, 860.0, 861.0],      [862.0, 863.0, 864.0, 865.0, 866.0, 867.0, 868.0]],     [[869.0, 870.0, 871.0, 872.0, 873.0, 874.0, 875.0],      [876.0, 877.0, 878.0, 879.0, 880.0, 881.0, 882.0],      [883.0, 884.0, 885.0, 886.0, 887.0, 888.0, 889.0],      [890.0, 891.0, 892.0, 893.0, 894.0, 895.0, 896.0]],     [[897.0, 898.0, 899.0, 900.0, 901.0, 902.0, 903.0],      [904.0, 905.0, 906.0, 907.0, 908.0, 909.0, 910.0],      [911.0, 912.0, 913.0, 914.0, 915.0, 916.0, 917.0],      [918.0, 919.0, 920.0, 921.0, 922.0, 923.0, 924.0]]],    [[[925.0, 926.0, 927.0, 928.0, 929.0, 930.0, 931.0],      [932.0, 933.0, 934.0, 935.0, 936.0, 937.0, 938.0],      [939.0, 940.0, 941.0, 942.0, 943.0, 944.0, 945.0],      [946.0, 947.0, 948.0, 949.0, 950.0, 951.0, 952.0]],     [[953.0, 954.0, 955.0, 956.0, 957.0, 958.0, 959.0],      [960.0, 961.0, 962.0, 963.0, 964.0, 965.0, 966.0],      [967.0, 968.0, 969.0, 970.0, 971.0, 972.0, 973.0],      [974.0, 975.0, 976.0, 977.0, 978.0, 979.0, 980.0]],     [[981.0, 982.0, 983.0, 984.0, 985.0, 986.0, 987.0],      [988.0, 989.0, 990.0, 991.0, 992.0, 993.0, 994.0],      [995.0, 996.0, 997.0, 998.0, 999.0, 1000.0, 1001.0],      [1002.0, 1003.0, 1004.0, 1005.0, 1006.0, 1007.0, 1008.0]]],    [[[1009.0, 1010.0, 1011.0, 1012.0, 1013.0, 1014.0, 1015.0],      [1016.0, 1017.0, 1018.0, 1019.0, 1020.0, 1021.0, 1022.0],      [1023.0, 1024.0, 1025.0, 1026.0, 1027.0, 1028.0, 1029.0],      [1030.0, 1031.0, 1032.0, 1033.0, 1034.0, 1035.0, 1036.0]],     [[1037.0, 1038.0, 1039.0, 1040.0, 1041.0, 1042.0, 1043.0],      [1044.0, 1045.0, 1046.0, 1047.0, 1048.0, 1049.0, 1050.0],      [1051.0, 1052.0, 1053.0, 1054.0, 1055.0, 1056.0, 1057.0],      [1058.0, 1059.0, 1060.0, 1061.0, 1062.0, 1063.0, 1064.0]],     [[1065.0, 1066.0, 1067.0, 1068.0, 1069.0, 1070.0, 1071.0],      [1072.0, 1073.0, 1074.0, 1075.0, 1076.0, 1077.0, 1078.0],      [1079.0, 1080.0, 1081.0, 1082.0, 1083.0, 1084.0, 1085.0],      [1086.0, 1087.0, 1088.0, 1089.0, 1090.0, 1091.0, 1092.0]]],    [[[1093.0, 1094.0, 1095.0, 1096.0, 1097.0, 1098.0, 1099.0],      [1100.0, 1101.0, 1102.0, 1103.0, 1104.0, 1105.0, 1106.0],      [1107.0, 1108.0, 1109.0, 1110.0, 1111.0, 1112.0, 1113.0],      [1114.0, 1115.0, 1116.0, 1117.0, 1118.0, 1119.0, 1120.0]],     [[1121.0, 1122.0, 1123.0, 1124.0, 1125.0, 1126.0, 1127.0],      [1128.0, 1129.0, 1130.0, 1131.0, 1132.0, 1133.0, 1134.0],      [1135.0, 1136.0, 1137.0, 1138.0, 1139.0, 1140.0, 1141.0],      [1142.0, 1143.0, 1144.0, 1145.0, 1146.0, 1147.0, 1148.0]],     [[1149.0, 1150.0, 1151.0, 1152.0, 1153.0, 1154.0, 1155.0],      [1156.0, 1157.0, 1158.0, 1159.0, 1160.0, 1161.0, 1162.0],      [1163.0, 1164.0, 1165.0, 1166.0, 1167.0, 1168.0, 1169.0],      [1170.0, 1171.0, 1172.0, 1173.0, 1174.0, 1175.0, 1176.0]]],    [[[1177.0, 1178.0, 1179.0, 1180.0, 1181.0, 1182.0, 1183.0],      [1184.0, 1185.0, 1186.0, 1187.0, 1188.0, 1189.0, 1190.0],      [1191.0, 1192.0, 1193.0, 1194.0, 1195.0, 1196.0, 1197.0],      [1198.0, 1199.0, 1200.0, 1201.0, 1202.0, 1203.0, 1204.0]],     [[1205.0, 1206.0, 1207.0, 1208.0, 1209.0, 1210.0, 1211.0],      [1212.0, 1213.0, 1214.0, 1215.0, 1216.0, 1217.0, 1218.0],      [1219.0, 1220.0, 1221.0, 1222.0, 1223.0, 1224.0, 1225.0],      [1226.0, 1227.0, 1228.0, 1229.0, 1230.0, 1231.0, 1232.0]],     [[1233.0, 1234.0, 1235.0, 1236.0, 1237.0, 1238.0, 1239.0],      [1240.0, 1241.0, 1242.0, 1243.0, 1244.0, 1245.0, 1246.0],      [1247.0, 1248.0, 1249.0, 1250.0, 1251.0, 1252.0, 1253.0],      [1254.0, 1255.0, 1256.0, 1257.0, 1258.0, 1259.0, 1260.0]]]],   [[[[1261.0, 1262.0, 1263.0, 1264.0, 1265.0, 1266.0, 1267.0],      [1268.0, 1269.0, 1270.0, 1271.0, 1272.0, 1273.0, 1274.0],      [1275.0, 1276.0, 1277.0, 1278.0, 1279.0, 1280.0, 1281.0],      [1282.0, 1283.0, 1284.0, 1285.0, 1286.0, 1287.0, 1288.0]],     [[1289.0, 1290.0, 1291.0, 1292.0, 1293.0, 1294.0, 1295.0],      [1296.0, 1297.0, 1298.0, 1299.0, 1300.0, 1301.0, 1302.0],      [1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308.0, 1309.0],      [1310.0, 1311.0, 1312.0, 1313.0, 1314.0, 1315.0, 1316.0]],     [[1317.0, 1318.0, 1319.0, 1320.0, 1321.0, 1322.0, 1323.0],      [1324.0, 1325.0, 1326.0, 1327.0, 1328.0, 1329.0, 1330.0],      [1331.0, 1332.0, 1333.0, 1334.0, 1335.0, 1336.0, 1337.0],      [1338.0, 1339.0, 1340.0, 1341.0, 1342.0, 1343.0, 1344.0]]],    [[[1345.0, 1346.0, 1347.0, 1348.0, 1349.0, 1350.0, 1351.0],      [1352.0, 1353.0, 1354.0, 1355.0, 1356.0, 1357.0, 1358.0],      [1359.0, 1360.0, 1361.0, 1362.0, 1363.0, 1364.0, 1365.0],      [1366.0, 1367.0, 1368.0, 1369.0, 1370.0, 1371.0, 1372.0]],     [[1373.0, 1374.0, 1375.0, 1376.0, 1377.0, 1378.0, 1379.0],      [1380.0, 1381.0, 1382.0, 1383.0, 1384.0, 1385.0, 1386.0],      [1387.0, 1388.0, 1389.0, 1390.0, 1391.0, 1392.0, 1393.0],      [1394.0, 1395.0, 1396.0, 1397.0, 1398.0, 1399.0, 1400.0]],     [[1401.0, 1402.0, 1403.0, 1404.0, 1405.0, 1406.0, 1407.0],      [1408.0, 1409.0, 1410.0, 1411.0, 1412.0, 1413.0, 1414.0],      [1415.0, 1416.0, 1417.0, 1418.0, 1419.0, 1420.0, 1421.0],      [1422.0, 1423.0, 1424.0, 1425.0, 1426.0, 1427.0, 1428.0]]],    [[[1429.0, 1430.0, 1431.0, 1432.0, 1433.0, 1434.0, 1435.0],      [1436.0, 1437.0, 1438.0, 1439.0, 1440.0, 1441.0, 1442.0],      [1443.0, 1444.0, 1445.0, 1446.0, 1447.0, 1448.0, 1449.0],      [1450.0, 1451.0, 1452.0, 1453.0, 1454.0, 1455.0, 1456.0]],     [[1457.0, 1458.0, 1459.0, 1460.0, 1461.0, 1462.0, 1463.0],      [1464.0, 1465.0, 1466.0, 1467.0, 1468.0, 1469.0, 1470.0],      [1471.0, 1472.0, 1473.0, 1474.0, 1475.0, 1476.0, 1477.0],      [1478.0, 1479.0, 1480.0, 1481.0, 1482.0, 1483.0, 1484.0]],     [[1485.0, 1486.0, 1487.0, 1488.0, 1489.0, 1490.0, 1491.0],      [1492.0, 1493.0, 1494.0, 1495.0, 1496.0, 1497.0, 1498.0],      [1499.0, 1500.0, 1501.0, 1502.0, 1503.0, 1504.0, 1505.0],      [1506.0, 1507.0, 1508.0, 1509.0, 1510.0, 1511.0, 1512.0]]],    [[[1513.0, 1514.0, 1515.0, 1516.0, 1517.0, 1518.0, 1519.0],      [1520.0, 1521.0, 1522.0, 1523.0, 1524.0, 1525.0, 1526.0],      [1527.0, 1528.0, 1529.0, 1530.0, 1531.0, 1532.0, 1533.0],      [1534.0, 1535.0, 1536.0, 1537.0, 1538.0, 1539.0, 1540.0]],     [[1541.0, 1542.0, 1543.0, 1544.0, 1545.0, 1546.0, 1547.0],      [1548.0, 1549.0, 1550.0, 1551.0, 1552.0, 1553.0, 1554.0],      [1555.0, 1556.0, 1557.0, 1558.0, 1559.0, 1560.0, 1561.0],      [1562.0, 1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568.0]],     [[1569.0, 1570.0, 1571.0, 1572.0, 1573.0, 1574.0, 1575.0],      [1576.0, 1577.0, 1578.0, 1579.0, 1580.0, 1581.0, 1582.0],      [1583.0, 1584.0, 1585.0, 1586.0, 1587.0, 1588.0, 1589.0],      [1590.0, 1591.0, 1592.0, 1593.0, 1594.0, 1595.0, 1596.0]]],    [[[1597.0, 1598.0, 1599.0, 1600.0, 1601.0, 1602.0, 1603.0],      [1604.0, 1605.0, 1606.0, 1607.0, 1608.0, 1609.0, 1610.0],      [1611.0, 1612.0, 1613.0, 1614.0, 1615.0, 1616.0, 1617.0],      [1618.0, 1619.0, 1620.0, 1621.0, 1622.0, 1623.0, 1624.0]],     [[1625.0, 1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631.0],      [1632.0, 1633.0, 1634.0, 1635.0, 1636.0, 1637.0, 1638.0],      [1639.0, 1640.0, 1641.0, 1642.0, 1643.0, 1644.0, 1645.0],      [1646.0, 1647.0, 1648.0, 1649.0, 1650.0, 1651.0, 1652.0]],     [[1653.0, 1654.0, 1655.0, 1656.0, 1657.0, 1658.0, 1659.0],      [1660.0, 1661.0, 1662.0, 1663.0, 1664.0, 1665.0, 1666.0],      [1667.0, 1668.0, 1669.0, 1670.0, 1671.0, 1672.0, 1673.0],      [1674.0, 1675.0, 1676.0, 1677.0, 1678.0, 1679.0, 1680.0]]]],   [[[[1681.0, 1682.0, 1683.0, 1684.0, 1685.0, 1686.0, 1687.0],      [1688.0, 1689.0, 1690.0, 1691.0, 1692.0, 1693.0, 1694.0],      [1695.0, 1696.0, 1697.0, 1698.0, 1699.0, 1700.0, 1701.0],      [1702.0, 1703.0, 1704.0, 1705.0, 1706.0, 1707.0, 1708.0]],     [[1709.0, 1710.0, 1711.0, 1712.0, 1713.0, 1714.0, 1715.0],      [1716.0, 1717.0, 1718.0, 1719.0, 1720.0, 1721.0, 1722.0],      [1723.0, 1724.0, 1725.0, 1726.0, 1727.0, 1728.0, 1729.0],      [1730.0, 1731.0, 1732.0, 1733.0, 1734.0, 1735.0, 1736.0]],     [[1737.0, 1738.0, 1739.0, 1740.0, 1741.0, 1742.0, 1743.0],      [1744.0, 1745.0, 1746.0, 1747.0, 1748.0, 1749.0, 1750.0],      [1751.0, 1752.0, 1753.0, 1754.0, 1755.0, 1756.0, 1757.0],      [1758.0, 1759.0, 1760.0, 1761.0, 1762.0, 1763.0, 1764.0]]],    [[[1765.0, 1766.0, 1767.0, 1768.0, 1769.0, 1770.0, 1771.0],      [1772.0, 1773.0, 1774.0, 1775.0, 1776.0, 1777.0, 1778.0],      [1779.0, 1780.0, 1781.0, 1782.0, 1783.0, 1784.0, 1785.0],      [1786.0, 1787.0, 1788.0, 1789.0, 1790.0, 1791.0, 1792.0]],     [[1793.0, 1794.0, 1795.0, 1796.0, 1797.0, 1798.0, 1799.0],      [1800.0, 1801.0, 1802.0, 1803.0, 1804.0, 1805.0, 1806.0],      [1807.0, 1808.0, 1809.0, 1810.0, 1811.0, 1812.0, 1813.0],      [1814.0, 1815.0, 1816.0, 1817.0, 1818.0, 1819.0, 1820.0]],     [[1821.0, 1822.0, 1823.0, 1824.0, 1825.0, 1826.0, 1827.0],      [1828.0, 1829.0, 1830.0, 1831.0, 1832.0, 1833.0, 1834.0],      [1835.0, 1836.0, 1837.0, 1838.0, 1839.0, 1840.0, 1841.0],      [1842.0, 1843.0, 1844.0, 1845.0, 1846.0, 1847.0, 1848.0]]],    [[[1849.0, 1850.0, 1851.0, 1852.0, 1853.0, 1854.0, 1855.0],      [1856.0, 1857.0, 1858.0, 1859.0, 1860.0, 1861.0, 1862.0],      [1863.0, 1864.0, 1865.0, 1866.0, 1867.0, 1868.0, 1869.0],      [1870.0, 1871.0, 1872.0, 1873.0, 1874.0, 1875.0, 1876.0]],     [[1877.0, 1878.0, 1879.0, 1880.0, 1881.0, 1882.0, 1883.0],      [1884.0, 1885.0, 1886.0, 1887.0, 1888.0, 1889.0, 1890.0],      [1891.0, 1892.0, 1893.0, 1894.0, 1895.0, 1896.0, 1897.0],      [1898.0, 1899.0, 1900.0, 1901.0, 1902.0, 1903.0, 1904.0]],     [[1905.0, 1906.0, 1907.0, 1908.0, 1909.0, 1910.0, 1911.0],      [1912.0, 1913.0, 1914.0, 1915.0, 1916.0, 1917.0, 1918.0],      [1919.0, 1920.0, 1921.0, 1922.0, 1923.0, 1924.0, 1925.0],      [1926.0, 1927.0, 1928.0, 1929.0, 1930.0, 1931.0, 1932.0]]],    [[[1933.0, 1934.0, 1935.0, 1936.0, 1937.0, 1938.0, 1939.0],      [1940.0, 1941.0, 1942.0, 1943.0, 1944.0, 1945.0, 1946.0],      [1947.0, 1948.0, 1949.0, 1950.0, 1951.0, 1952.0, 1953.0],      [1954.0, 1955.0, 1956.0, 1957.0, 1958.0, 1959.0, 1960.0]],     [[1961.0, 1962.0, 1963.0, 1964.0, 1965.0, 1966.0, 1967.0],      [1968.0, 1969.0, 1970.0, 1971.0, 1972.0, 1973.0, 1974.0],      [1975.0, 1976.0, 1977.0, 1978.0, 1979.0, 1980.0, 1981.0],      [1982.0, 1983.0, 1984.0, 1985.0, 1986.0, 1987.0, 1988.0]],     [[1989.0, 1990.0, 1991.0, 1992.0, 1993.0, 1994.0, 1995.0],      [1996.0, 1997.0, 1998.0, 1999.0, 2000.0, 2001.0, 2002.0],      [2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2009.0],      [2010.0, 2011.0, 2012.0, 2013.0, 2014.0, 2015.0, 2016.0]]],    [[[2017.0, 2018.0, 2019.0, 2020.0, 2021.0, 2022.0, 2023.0],      [2024.0, 2025.0, 2026.0, 2027.0, 2028.0, 2029.0, 2030.0],      [2031.0, 2032.0, 2033.0, 2034.0, 2035.0, 2036.0, 2037.0],      [2038.0, 2039.0, 2040.0, 2041.0, 2042.0, 2043.0, 2044.0]],     [[2045.0, 2046.0, 2047.0, 2048.0, 2049.0, 2050.0, 2051.0],      [2052.0, 2053.0, 2054.0, 2055.0, 2056.0, 2057.0, 2058.0],      [2059.0, 2060.0, 2061.0, 2062.0, 2063.0, 2064.0, 2065.0],      [2066.0, 2067.0, 2068.0, 2069.0, 2070.0, 2071.0, 2072.0]],     [[2073.0, 2074.0, 2075.0, 2076.0, 2077.0, 2078.0, 2079.0],      [2080.0, 2081.0, 2082.0, 2083.0, 2084.0, 2085.0, 2086.0],      [2087.0, 2088.0, 2089.0, 2090.0, 2091.0, 2092.0, 2093.0],      [2094.0, 2095.0, 2096.0, 2097.0, 2098.0, 2099.0, 2100.0]]]]],  [[[[[2101.0, 2102.0, 2103.0, 2104.0, 2105.0, 2106.0, 2107.0],      [2108.0, 2109.0, 2110.0, 2111.0, 2112.0, 2113.0, 2114.0],      [2115.0, 2116.0, 2117.0, 2118.0, 2119.0, 2120.0, 2121.0],      [2122.0, 2123.0, 2124.0, 2125.0, 2126.0, 2127.0, 2128.0]],     [[2129.0, 2130.0, 2131.0, 2132.0, 2133.0, 2134.0, 2135.0],      [2136.0, 2137.0, 2138.0, 2139.0, 2140.0, 2141.0, 2142.0],      [2143.0, 2144.0, 2145.0, 2146.0, 2147.0, 2148.0, 2149.0],      [2150.0, 2151.0, 2152.0, 2153.0, 2154.0, 2155.0, 2156.0]],     [[2157.0, 2158.0, 2159.0, 2160.0, 2161.0, 2162.0, 2163.0],      [2164.0, 2165.0, 2166.0, 2167.0, 2168.0, 2169.0, 2170.0],      [2171.0, 2172.0, 2173.0, 2174.0, 2175.0, 2176.0, 2177.0],      [2178.0, 2179.0, 2180.0, 2181.0, 2182.0, 2183.0, 2184.0]]],    [[[2185.0, 2186.0, 2187.0, 2188.0, 2189.0, 2190.0, 2191.0],      [2192.0, 2193.0, 2194.0, 2195.0, 2196.0, 2197.0, 2198.0],      [2199.0, 2200.0, 2201.0, 2202.0, 2203.0, 2204.0, 2205.0],      [2206.0, 2207.0, 2208.0, 2209.0, 2210.0, 2211.0, 2212.0]],     [[2213.0, 2214.0, 2215.0, 2216.0, 2217.0, 2218.0, 2219.0],      [2220.0, 2221.0, 2222.0, 2223.0, 2224.0, 2225.0, 2226.0],      [2227.0, 2228.0, 2229.0, 2230.0, 2231.0, 2232.0, 2233.0],      [2234.0, 2235.0, 2236.0, 2237.0, 2238.0, 2239.0, 2240.0]],     [[2241.0, 2242.0, 2243.0, 2244.0, 2245.0, 2246.0, 2247.0],      [2248.0, 2249.0, 2250.0, 2251.0, 2252.0, 2253.0, 2254.0],      [2255.0, 2256.0, 2257.0, 2258.0, 2259.0, 2260.0, 2261.0],      [2262.0, 2263.0, 2264.0, 2265.0, 2266.0, 2267.0, 2268.0]]],    [[[2269.0, 2270.0, 2271.0, 2272.0, 2273.0, 2274.0, 2275.0],      [2276.0, 2277.0, 2278.0, 2279.0, 2280.0, 2281.0, 2282.0],      [2283.0, 2284.0, 2285.0, 2286.0, 2287.0, 2288.0, 2289.0],      [2290.0, 2291.0, 2292.0, 2293.0, 2294.0, 2295.0, 2296.0]],     [[2297.0, 2298.0, 2299.0, 2300.0, 2301.0, 2302.0, 2303.0],      [2304.0, 2305.0, 2306.0, 2307.0, 2308.0, 2309.0, 2310.0],      [2311.0, 2312.0, 2313.0, 2314.0, 2315.0, 2316.0, 2317.0],      [2318.0, 2319.0, 2320.0, 2321.0, 2322.0, 2323.0, 2324.0]],     [[2325.0, 2326.0, 2327.0, 2328.0, 2329.0, 2330.0, 2331.0],      [2332.0, 2333.0, 2334.0, 2335.0, 2336.0, 2337.0, 2338.0],      [2339.0, 2340.0, 2341.0, 2342.0, 2343.0, 2344.0, 2345.0],      [2346.0, 2347.0, 2348.0, 2349.0, 2350.0, 2351.0, 2352.0]]],    [[[2353.0, 2354.0, 2355.0, 2356.0, 2357.0, 2358.0, 2359.0],      [2360.0, 2361.0, 2362.0, 2363.0, 2364.0, 2365.0, 2366.0],      [2367.0, 2368.0, 2369.0, 2370.0, 2371.0, 2372.0, 2373.0],      [2374.0, 2375.0, 2376.0, 2377.0, 2378.0, 2379.0, 2380.0]],     [[2381.0, 2382.0, 2383.0, 2384.0, 2385.0, 2386.0, 2387.0],      [2388.0, 2389.0, 2390.0, 2391.0, 2392.0, 2393.0, 2394.0],      [2395.0, 2396.0, 2397.0, 2398.0, 2399.0, 2400.0, 2401.0],      [2402.0, 2403.0, 2404.0, 2405.0, 2406.0, 2407.0, 2408.0]],     [[2409.0, 2410.0, 2411.0, 2412.0, 2413.0, 2414.0, 2415.0],      [2416.0, 2417.0, 2418.0, 2419.0, 2420.0, 2421.0, 2422.0],      [2423.0, 2424.0, 2425.0, 2426.0, 2427.0, 2428.0, 2429.0],      [2430.0, 2431.0, 2432.0, 2433.0, 2434.0, 2435.0, 2436.0]]],    [[[2437.0, 2438.0, 2439.0, 2440.0, 2441.0, 2442.0, 2443.0],      [2444.0, 2445.0, 2446.0, 2447.0, 2448.0, 2449.0, 2450.0],      [2451.0, 2452.0, 2453.0, 2454.0, 2455.0, 2456.0, 2457.0],      [2458.0, 2459.0, 2460.0, 2461.0, 2462.0, 2463.0, 2464.0]],     [[2465.0, 2466.0, 2467.0, 2468.0, 2469.0, 2470.0, 2471.0],      [2472.0, 2473.0, 2474.0, 2475.0, 2476.0, 2477.0, 2478.0],      [2479.0, 2480.0, 2481.0, 2482.0, 2483.0, 2484.0, 2485.0],      [2486.0, 2487.0, 2488.0, 2489.0, 2490.0, 2491.0, 2492.0]],     [[2493.0, 2494.0, 2495.0, 2496.0, 2497.0, 2498.0, 2499.0],      [2500.0, 2501.0, 2502.0, 2503.0, 2504.0, 2505.0, 2506.0],      [2507.0, 2508.0, 2509.0, 2510.0, 2511.0, 2512.0, 2513.0],      [2514.0, 2515.0, 2516.0, 2517.0, 2518.0, 2519.0, 2520.0]]]],   [[[[2521.0, 2522.0, 2523.0, 2524.0, 2525.0, 2526.0, 2527.0],      [2528.0, 2529.0, 2530.0, 2531.0, 2532.0, 2533.0, 2534.0],      [2535.0, 2536.0, 2537.0, 2538.0, 2539.0, 2540.0, 2541.0],      [2542.0, 2543.0, 2544.0, 2545.0, 2546.0, 2547.0, 2548.0]],     [[2549.0, 2550.0, 2551.0, 2552.0, 2553.0, 2554.0, 2555.0],      [2556.0, 2557.0, 2558.0, 2559.0, 2560.0, 2561.0, 2562.0],      [2563.0, 2564.0, 2565.0, 2566.0, 2567.0, 2568.0, 2569.0],      [2570.0, 2571.0, 2572.0, 2573.0, 2574.0, 2575.0, 2576.0]],     [[2577.0, 2578.0, 2579.0, 2580.0, 2581.0, 2582.0, 2583.0],      [2584.0, 2585.0, 2586.0, 2587.0, 2588.0, 2589.0, 2590.0],      [2591.0, 2592.0, 2593.0, 2594.0, 2595.0, 2596.0, 2597.0],      [2598.0, 2599.0, 2600.0, 2601.0, 2602.0, 2603.0, 2604.0]]],    [[[2605.0, 2606.0, 2607.0, 2608.0, 2609.0, 2610.0, 2611.0],      [2612.0, 2613.0, 2614.0, 2615.0, 2616.0, 2617.0, 2618.0],      [2619.0, 2620.0, 2621.0, 2622.0, 2623.0, 2624.0, 2625.0],      [2626.0, 2627.0, 2628.0, 2629.0, 2630.0, 2631.0, 2632.0]],     [[2633.0, 2634.0, 2635.0, 2636.0, 2637.0, 2638.0, 2639.0],      [2640.0, 2641.0, 2642.0, 2643.0, 2644.0, 2645.0, 2646.0],      [2647.0, 2648.0, 2649.0, 2650.0, 2651.0, 2652.0, 2653.0],      [2654.0, 2655.0, 2656.0, 2657.0, 2658.0, 2659.0, 2660.0]],     [[2661.0, 2662.0, 2663.0, 2664.0, 2665.0, 2666.0, 2667.0],      [2668.0, 2669.0, 2670.0, 2671.0, 2672.0, 2673.0, 2674.0],      [2675.0, 2676.0, 2677.0, 2678.0, 2679.0, 2680.0, 2681.0],      [2682.0, 2683.0, 2684.0, 2685.0, 2686.0, 2687.0, 2688.0]]],    [[[2689.0, 2690.0, 2691.0, 2692.0, 2693.0, 2694.0, 2695.0],      [2696.0, 2697.0, 2698.0, 2699.0, 2700.0, 2701.0, 2702.0],      [2703.0, 2704.0, 2705.0, 2706.0, 2707.0, 2708.0, 2709.0],      [2710.0, 2711.0, 2712.0, 2713.0, 2714.0, 2715.0, 2716.0]],     [[2717.0, 2718.0, 2719.0, 2720.0, 2721.0, 2722.0, 2723.0],      [2724.0, 2725.0, 2726.0, 2727.0, 2728.0, 2729.0, 2730.0],      [2731.0, 2732.0, 2733.0, 2734.0, 2735.0, 2736.0, 2737.0],      [2738.0, 2739.0, 2740.0, 2741.0, 2742.0, 2743.0, 2744.0]],     [[2745.0, 2746.0, 2747.0, 2748.0, 2749.0, 2750.0, 2751.0],      [2752.0, 2753.0, 2754.0, 2755.0, 2756.0, 2757.0, 2758.0],      [2759.0, 2760.0, 2761.0, 2762.0, 2763.0, 2764.0, 2765.0],      [2766.0, 2767.0, 2768.0, 2769.0, 2770.0, 2771.0, 2772.0]]],    [[[2773.0, 2774.0, 2775.0, 2776.0, 2777.0, 2778.0, 2779.0],      [2780.0, 2781.0, 2782.0, 2783.0, 2784.0, 2785.0, 2786.0],      [2787.0, 2788.0, 2789.0, 2790.0, 2791.0, 2792.0, 2793.0],      [2794.0, 2795.0, 2796.0, 2797.0, 2798.0, 2799.0, 2800.0]],     [[2801.0, 2802.0, 2803.0, 2804.0, 2805.0, 2806.0, 2807.0],      [2808.0, 2809.0, 2810.0, 2811.0, 2812.0, 2813.0, 2814.0],      [2815.0, 2816.0, 2817.0, 2818.0, 2819.0, 2820.0, 2821.0],      [2822.0, 2823.0, 2824.0, 2825.0, 2826.0, 2827.0, 2828.0]],     [[2829.0, 2830.0, 2831.0, 2832.0, 2833.0, 2834.0, 2835.0],      [2836.0, 2837.0, 2838.0, 2839.0, 2840.0, 2841.0, 2842.0],      [2843.0, 2844.0, 2845.0, 2846.0, 2847.0, 2848.0, 2849.0],      [2850.0, 2851.0, 2852.0, 2853.0, 2854.0, 2855.0, 2856.0]]],    [[[2857.0, 2858.0, 2859.0, 2860.0, 2861.0, 2862.0, 2863.0],      [2864.0, 2865.0, 2866.0, 2867.0, 2868.0, 2869.0, 2870.0],      [2871.0, 2872.0, 2873.0, 2874.0, 2875.0, 2876.0, 2877.0],      [2878.0, 2879.0, 2880.0, 2881.0, 2882.0, 2883.0, 2884.0]],     [[2885.0, 2886.0, 2887.0, 2888.0, 2889.0, 2890.0, 2891.0],      [2892.0, 2893.0, 2894.0, 2895.0, 2896.0, 2897.0, 2898.0],      [2899.0, 2900.0, 2901.0, 2902.0, 2903.0, 2904.0, 2905.0],      [2906.0, 2907.0, 2908.0, 2909.0, 2910.0, 2911.0, 2912.0]],     [[2913.0, 2914.0, 2915.0, 2916.0, 2917.0, 2918.0, 2919.0],      [2920.0, 2921.0, 2922.0, 2923.0, 2924.0, 2925.0, 2926.0],      [2927.0, 2928.0, 2929.0, 2930.0, 2931.0, 2932.0, 2933.0],      [2934.0, 2935.0, 2936.0, 2937.0, 2938.0, 2939.0, 2940.0]]]],   [[[[2941.0, 2942.0, 2943.0, 2944.0, 2945.0, 2946.0, 2947.0],      [2948.0, 2949.0, 2950.0, 2951.0, 2952.0, 2953.0, 2954.0],      [2955.0, 2956.0, 2957.0, 2958.0, 2959.0, 2960.0, 2961.0],      [2962.0, 2963.0, 2964.0, 2965.0, 2966.0, 2967.0, 2968.0]],     [[2969.0, 2970.0, 2971.0, 2972.0, 2973.0, 2974.0, 2975.0],      [2976.0, 2977.0, 2978.0, 2979.0, 2980.0, 2981.0, 2982.0],      [2983.0, 2984.0, 2985.0, 2986.0, 2987.0, 2988.0, 2989.0],      [2990.0, 2991.0, 2992.0, 2993.0, 2994.0, 2995.0, 2996.0]],     [[2997.0, 2998.0, 2999.0, 3000.0, 3001.0, 3002.0, 3003.0],      [3004.0, 3005.0, 3006.0, 3007.0, 3008.0, 3009.0, 3010.0],      [3011.0, 3012.0, 3013.0, 3014.0, 3015.0, 3016.0, 3017.0],      [3018.0, 3019.0, 3020.0, 3021.0, 3022.0, 3023.0, 3024.0]]],    [[[3025.0, 3026.0, 3027.0, 3028.0, 3029.0, 3030.0, 3031.0],      [3032.0, 3033.0, 3034.0, 3035.0, 3036.0, 3037.0, 3038.0],      [3039.0, 3040.0, 3041.0, 3042.0, 3043.0, 3044.0, 3045.0],      [3046.0, 3047.0, 3048.0, 3049.0, 3050.0, 3051.0, 3052.0]],     [[3053.0, 3054.0, 3055.0, 3056.0, 3057.0, 3058.0, 3059.0],      [3060.0, 3061.0, 3062.0, 3063.0, 3064.0, 3065.0, 3066.0],      [3067.0, 3068.0, 3069.0, 3070.0, 3071.0, 3072.0, 3073.0],      [3074.0, 3075.0, 3076.0, 3077.0, 3078.0, 3079.0, 3080.0]],     [[3081.0, 3082.0, 3083.0, 3084.0, 3085.0, 3086.0, 3087.0],      [3088.0, 3089.0, 3090.0, 3091.0, 3092.0, 3093.0, 3094.0],      [3095.0, 3096.0, 3097.0, 3098.0, 3099.0, 3100.0, 3101.0],      [3102.0, 3103.0, 3104.0, 3105.0, 3106.0, 3107.0, 3108.0]]],    [[[3109.0, 3110.0, 3111.0, 3112.0, 3113.0, 3114.0, 3115.0],      [3116.0, 3117.0, 3118.0, 3119.0, 3120.0, 3121.0, 3122.0],      [3123.0, 3124.0, 3125.0, 3126.0, 3127.0, 3128.0, 3129.0],      [3130.0, 3131.0, 3132.0, 3133.0, 3134.0, 3135.0, 3136.0]],     [[3137.0, 3138.0, 3139.0, 3140.0, 3141.0, 3142.0, 3143.0],      [3144.0, 3145.0, 3146.0, 3147.0, 3148.0, 3149.0, 3150.0],      [3151.0, 3152.0, 3153.0, 3154.0, 3155.0, 3156.0, 3157.0],      [3158.0, 3159.0, 3160.0, 3161.0, 3162.0, 3163.0, 3164.0]],     [[3165.0, 3166.0, 3167.0, 3168.0, 3169.0, 3170.0, 3171.0],      [3172.0, 3173.0, 3174.0, 3175.0, 3176.0, 3177.0, 3178.0],      [3179.0, 3180.0, 3181.0, 3182.0, 3183.0, 3184.0, 3185.0],      [3186.0, 3187.0, 3188.0, 3189.0, 3190.0, 3191.0, 3192.0]]],    [[[3193.0, 3194.0, 3195.0, 3196.0, 3197.0, 3198.0, 3199.0],      [3200.0, 3201.0, 3202.0, 3203.0, 3204.0, 3205.0, 3206.0],      [3207.0, 3208.0, 3209.0, 3210.0, 3211.0, 3212.0, 3213.0],      [3214.0, 3215.0, 3216.0, 3217.0, 3218.0, 3219.0, 3220.0]],     [[3221.0, 3222.0, 3223.0, 3224.0, 3225.0, 3226.0, 3227.0],      [3228.0, 3229.0, 3230.0, 3231.0, 3232.0, 3233.0, 3234.0],      [3235.0, 3236.0, 3237.0, 3238.0, 3239.0, 3240.0, 3241.0],      [3242.0, 3243.0, 3244.0, 3245.0, 3246.0, 3247.0, 3248.0]],     [[3249.0, 3250.0, 3251.0, 3252.0, 3253.0, 3254.0, 3255.0],      [3256.0, 3257.0, 3258.0, 3259.0, 3260.0, 3261.0, 3262.0],      [3263.0, 3264.0, 3265.0, 3266.0, 3267.0, 3268.0, 3269.0],      [3270.0, 3271.0, 3272.0, 3273.0, 3274.0, 3275.0, 3276.0]]],    [[[3277.0, 3278.0, 3279.0, 3280.0, 3281.0, 3282.0, 3283.0],      [3284.0, 3285.0, 3286.0, 3287.0, 3288.0, 3289.0, 3290.0],      [3291.0, 3292.0, 3293.0, 3294.0, 3295.0, 3296.0, 3297.0],      [3298.0, 3299.0, 3300.0, 3301.0, 3302.0, 3303.0, 3304.0]],     [[3305.0, 3306.0, 3307.0, 3308.0, 3309.0, 3310.0, 3311.0],      [3312.0, 3313.0, 3314.0, 3315.0, 3316.0, 3317.0, 3318.0],      [3319.0, 3320.0, 3321.0, 3322.0, 3323.0, 3324.0, 3325.0],      [3326.0, 3327.0, 3328.0, 3329.0, 3330.0, 3331.0, 3332.0]],     [[3333.0, 3334.0, 3335.0, 3336.0, 3337.0, 3338.0, 3339.0],      [3340.0, 3341.0, 3342.0, 3343.0, 3344.0, 3345.0, 3346.0],      [3347.0, 3348.0, 3349.0, 3350.0, 3351.0, 3352.0, 3353.0],      [3354.0, 3355.0, 3356.0, 3357.0, 3358.0, 3359.0, 3360.0]]]],   [[[[3361.0, 3362.0, 3363.0, 3364.0, 3365.0, 3366.0, 3367.0],      [3368.0, 3369.0, 3370.0, 3371.0, 3372.0, 3373.0, 3374.0],      [3375.0, 3376.0, 3377.0, 3378.0, 3379.0, 3380.0, 3381.0],      [3382.0, 3383.0, 3384.0, 3385.0, 3386.0, 3387.0, 3388.0]],     [[3389.0, 3390.0, 3391.0, 3392.0, 3393.0, 3394.0, 3395.0],      [3396.0, 3397.0, 3398.0, 3399.0, 3400.0, 3401.0, 3402.0],      [3403.0, 3404.0, 3405.0, 3406.0, 3407.0, 3408.0, 3409.0],      [3410.0, 3411.0, 3412.0, 3413.0, 3414.0, 3415.0, 3416.0]],     [[3417.0, 3418.0, 3419.0, 3420.0, 3421.0, 3422.0, 3423.0],      [3424.0, 3425.0, 3426.0, 3427.0, 3428.0, 3429.0, 3430.0],      [3431.0, 3432.0, 3433.0, 3434.0, 3435.0, 3436.0, 3437.0],      [3438.0, 3439.0, 3440.0, 3441.0, 3442.0, 3443.0, 3444.0]]],    [[[3445.0, 3446.0, 3447.0, 3448.0, 3449.0, 3450.0, 3451.0],      [3452.0, 3453.0, 3454.0, 3455.0, 3456.0, 3457.0, 3458.0],      [3459.0, 3460.0, 3461.0, 3462.0, 3463.0, 3464.0, 3465.0],      [3466.0, 3467.0, 3468.0, 3469.0, 3470.0, 3471.0, 3472.0]],     [[3473.0, 3474.0, 3475.0, 3476.0, 3477.0, 3478.0, 3479.0],      [3480.0, 3481.0, 3482.0, 3483.0, 3484.0, 3485.0, 3486.0],      [3487.0, 3488.0, 3489.0, 3490.0, 3491.0, 3492.0, 3493.0],      [3494.0, 3495.0, 3496.0, 3497.0, 3498.0, 3499.0, 3500.0]],     [[3501.0, 3502.0, 3503.0, 3504.0, 3505.0, 3506.0, 3507.0],      [3508.0, 3509.0, 3510.0, 3511.0, 3512.0, 3513.0, 3514.0],      [3515.0, 3516.0, 3517.0, 3518.0, 3519.0, 3520.0, 3521.0],      [3522.0, 3523.0, 3524.0, 3525.0, 3526.0, 3527.0, 3528.0]]],    [[[3529.0, 3530.0, 3531.0, 3532.0, 3533.0, 3534.0, 3535.0],      [3536.0, 3537.0, 3538.0, 3539.0, 3540.0, 3541.0, 3542.0],      [3543.0, 3544.0, 3545.0, 3546.0, 3547.0, 3548.0, 3549.0],      [3550.0, 3551.0, 3552.0, 3553.0, 3554.0, 3555.0, 3556.0]],     [[3557.0, 3558.0, 3559.0, 3560.0, 3561.0, 3562.0, 3563.0],      [3564.0, 3565.0, 3566.0, 3567.0, 3568.0, 3569.0, 3570.0],      [3571.0, 3572.0, 3573.0, 3574.0, 3575.0, 3576.0, 3577.0],      [3578.0, 3579.0, 3580.0, 3581.0, 3582.0, 3583.0, 3584.0]],     [[3585.0, 3586.0, 3587.0, 3588.0, 3589.0, 3590.0, 3591.0],      [3592.0, 3593.0, 3594.0, 3595.0, 3596.0, 3597.0, 3598.0],      [3599.0, 3600.0, 3601.0, 3602.0, 3603.0, 3604.0, 3605.0],      [3606.0, 3607.0, 3608.0, 3609.0, 3610.0, 3611.0, 3612.0]]],    [[[3613.0, 3614.0, 3615.0, 3616.0, 3617.0, 3618.0, 3619.0],      [3620.0, 3621.0, 3622.0, 3623.0, 3624.0, 3625.0, 3626.0],      [3627.0, 3628.0, 3629.0, 3630.0, 3631.0, 3632.0, 3633.0],      [3634.0, 3635.0, 3636.0, 3637.0, 3638.0, 3639.0, 3640.0]],     [[3641.0, 3642.0, 3643.0, 3644.0, 3645.0, 3646.0, 3647.0],      [3648.0, 3649.0, 3650.0, 3651.0, 3652.0, 3653.0, 3654.0],      [3655.0, 3656.0, 3657.0, 3658.0, 3659.0, 3660.0, 3661.0],      [3662.0, 3663.0, 3664.0, 3665.0, 3666.0, 3667.0, 3668.0]],     [[3669.0, 3670.0, 3671.0, 3672.0, 3673.0, 3674.0, 3675.0],      [3676.0, 3677.0, 3678.0, 3679.0, 3680.0, 3681.0, 3682.0],      [3683.0, 3684.0, 3685.0, 3686.0, 3687.0, 3688.0, 3689.0],      [3690.0, 3691.0, 3692.0, 3693.0, 3694.0, 3695.0, 3696.0]]],    [[[3697.0, 3698.0, 3699.0, 3700.0, 3701.0, 3702.0, 3703.0],      [3704.0, 3705.0, 3706.0, 3707.0, 3708.0, 3709.0, 3710.0],      [3711.0, 3712.0, 3713.0, 3714.0, 3715.0, 3716.0, 3717.0],      [3718.0, 3719.0, 3720.0, 3721.0, 3722.0, 3723.0, 3724.0]],     [[3725.0, 3726.0, 3727.0, 3728.0, 3729.0, 3730.0, 3731.0],      [3732.0, 3733.0, 3734.0, 3735.0, 3736.0, 3737.0, 3738.0],      [3739.0, 3740.0, 3741.0, 3742.0, 3743.0, 3744.0, 3745.0],      [3746.0, 3747.0, 3748.0, 3749.0, 3750.0, 3751.0, 3752.0]],     [[3753.0, 3754.0, 3755.0, 3756.0, 3757.0, 3758.0, 3759.0],      [3760.0, 3761.0, 3762.0, 3763.0, 3764.0, 3765.0, 3766.0],      [3767.0, 3768.0, 3769.0, 3770.0, 3771.0, 3772.0, 3773.0],      [3774.0, 3775.0, 3776.0, 3777.0, 3778.0, 3779.0, 3780.0]]]],   [[[[3781.0, 3782.0, 3783.0, 3784.0, 3785.0, 3786.0, 3787.0],      [3788.0, 3789.0, 3790.0, 3791.0, 3792.0, 3793.0, 3794.0],      [3795.0, 3796.0, 3797.0, 3798.0, 3799.0, 3800.0, 3801.0],      [3802.0, 3803.0, 3804.0, 3805.0, 3806.0, 3807.0, 3808.0]],     [[3809.0, 3810.0, 3811.0, 3812.0, 3813.0, 3814.0, 3815.0],      [3816.0, 3817.0, 3818.0, 3819.0, 3820.0, 3821.0, 3822.0],      [3823.0, 3824.0, 3825.0, 3826.0, 3827.0, 3828.0, 3829.0],      [3830.0, 3831.0, 3832.0, 3833.0, 3834.0, 3835.0, 3836.0]],     [[3837.0, 3838.0, 3839.0, 3840.0, 3841.0, 3842.0, 3843.0],      [3844.0, 3845.0, 3846.0, 3847.0, 3848.0, 3849.0, 3850.0],      [3851.0, 3852.0, 3853.0, 3854.0, 3855.0, 3856.0, 3857.0],      [3858.0, 3859.0, 3860.0, 3861.0, 3862.0, 3863.0, 3864.0]]],    [[[3865.0, 3866.0, 3867.0, 3868.0, 3869.0, 3870.0, 3871.0],      [3872.0, 3873.0, 3874.0, 3875.0, 3876.0, 3877.0, 3878.0],      [3879.0, 3880.0, 3881.0, 3882.0, 3883.0, 3884.0, 3885.0],      [3886.0, 3887.0, 3888.0, 3889.0, 3890.0, 3891.0, 3892.0]],     [[3893.0, 3894.0, 3895.0, 3896.0, 3897.0, 3898.0, 3899.0],      [3900.0, 3901.0, 3902.0, 3903.0, 3904.0, 3905.0, 3906.0],      [3907.0, 3908.0, 3909.0, 3910.0, 3911.0, 3912.0, 3913.0],      [3914.0, 3915.0, 3916.0, 3917.0, 3918.0, 3919.0, 3920.0]],     [[3921.0, 3922.0, 3923.0, 3924.0, 3925.0, 3926.0, 3927.0],      [3928.0, 3929.0, 3930.0, 3931.0, 3932.0, 3933.0, 3934.0],      [3935.0, 3936.0, 3937.0, 3938.0, 3939.0, 3940.0, 3941.0],      [3942.0, 3943.0, 3944.0, 3945.0, 3946.0, 3947.0, 3948.0]]],    [[[3949.0, 3950.0, 3951.0, 3952.0, 3953.0, 3954.0, 3955.0],      [3956.0, 3957.0, 3958.0, 3959.0, 3960.0, 3961.0, 3962.0],      [3963.0, 3964.0, 3965.0, 3966.0, 3967.0, 3968.0, 3969.0],      [3970.0, 3971.0, 3972.0, 3973.0, 3974.0, 3975.0, 3976.0]],     [[3977.0, 3978.0, 3979.0, 3980.0, 3981.0, 3982.0, 3983.0],      [3984.0, 3985.0, 3986.0, 3987.0, 3988.0, 3989.0, 3990.0],      [3991.0, 3992.0, 3993.0, 3994.0, 3995.0, 3996.0, 3997.0],      [3998.0, 3999.0, 4000.0, 4001.0, 4002.0, 4003.0, 4004.0]],     [[4005.0, 4006.0, 4007.0, 4008.0, 4009.0, 4010.0, 4011.0],      [4012.0, 4013.0, 4014.0, 4015.0, 4016.0, 4017.0, 4018.0],      [4019.0, 4020.0, 4021.0, 4022.0, 4023.0, 4024.0, 4025.0],      [4026.0, 4027.0, 4028.0, 4029.0, 4030.0, 4031.0, 4032.0]]],    [[[4033.0, 4034.0, 4035.0, 4036.0, 4037.0, 4038.0, 4039.0],      [4040.0, 4041.0, 4042.0, 4043.0, 4044.0, 4045.0, 4046.0],      [4047.0, 4048.0, 4049.0, 4050.0, 4051.0, 4052.0, 4053.0],      [4054.0, 4055.0, 4056.0, 4057.0, 4058.0, 4059.0, 4060.0]],     [[4061.0, 4062.0, 4063.0, 4064.0, 4065.0, 4066.0, 4067.0],      [4068.0, 4069.0, 4070.0, 4071.0, 4072.0, 4073.0, 4074.0],      [4075.0, 4076.0, 4077.0, 4078.0, 4079.0, 4080.0, 4081.0],      [4082.0, 4083.0, 4084.0, 4085.0, 4086.0, 4087.0, 4088.0]],     [[4089.0, 4090.0, 4091.0, 4092.0, 4093.0, 4094.0, 4095.0],      [4096.0, 4097.0, 4098.0, 4099.0, 4100.0, 4101.0, 4102.0],      [4103.0, 4104.0, 4105.0, 4106.0, 4107.0, 4108.0, 4109.0],      [4110.0, 4111.0, 4112.0, 4113.0, 4114.0, 4115.0, 4116.0]]],    [[[4117.0, 4118.0, 4119.0, 4120.0, 4121.0, 4122.0, 4123.0],      [4124.0, 4125.0, 4126.0, 4127.0, 4128.0, 4129.0, 4130.0],      [4131.0, 4132.0, 4133.0, 4134.0, 4135.0, 4136.0, 4137.0],      [4138.0, 4139.0, 4140.0, 4141.0, 4142.0, 4143.0, 4144.0]],     [[4145.0, 4146.0, 4147.0, 4148.0, 4149.0, 4150.0, 4151.0],      [4152.0, 4153.0, 4154.0, 4155.0, 4156.0, 4157.0, 4158.0],      [4159.0, 4160.0, 4161.0, 4162.0, 4163.0, 4164.0, 4165.0],      [4166.0, 4167.0, 4168.0, 4169.0, 4170.0, 4171.0, 4172.0]],     [[4173.0, 4174.0, 4175.0, 4176.0, 4177.0, 4178.0, 4179.0],      [4180.0, 4181.0, 4182.0, 4183.0, 4184.0, 4185.0, 4186.0],      [4187.0, 4188.0, 4189.0, 4190.0, 4191.0, 4192.0, 4193.0],      [4194.0, 4195.0, 4196.0, 4197.0, 4198.0, 4199.0, 4200.0]]]]]] shape=[2, 5, 5, 3, 4, 7], strides=[2100, 420, 84, 28, 7, 1], layout=C (0x1)), I32([3, 1] shape=[2], strides=[1], layout=C | F (0x3)), I32([[2, 2],  [1, 1]] shape=[2, 2], strides=[2, 1], layout=C (0x1)))


================================================
FILE: tensorflow/tests/ops_nn_space_to_batch.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tensorflow;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::prelude::*;
use tract_ndarray::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType::DtFloat;

fn space_to_batch_strat() -> BoxedStrategy<(Tensor, Tensor, Tensor)> {
    use proptest::collection::vec;
    (1usize..4, vec(1usize..8, 1usize..4), vec(1usize..8, 1usize..4))
        .prop_flat_map(|(b, spatial_dims, non_spatial_dims)| {
            (
                Just(b),
                Just(spatial_dims.clone()),
                Just(non_spatial_dims),
                vec(1usize..4, spatial_dims.len()..spatial_dims.len() + 1),
                vec(0usize..4, spatial_dims.len()..spatial_dims.len() + 1),
            )
        })
        .prop_filter("block < input", |&(_, ref sd, _, ref bs, _)| {
            bs.iter().zip(sd.iter()).all(|(bs, is)| bs <= is)
        })
        .prop_map(
            |(b, sd, nsd, bs, left_pad): (
                usize,
                Vec<usize>,
                Vec<usize>,
                Vec<usize>,
                Vec<usize>,
            )| {
                let mut input_shape = vec![b];
                input_shape.extend(&sd);
                input_shape.extend(&nsd);
                let input = ArrayD::from_shape_vec(
                    input_shape.clone(),
                    (0..input_shape.iter().cloned().product()).map(|i| (1 + i) as f32).collect(),
                )
                .unwrap();
                let block_size = Array1::from_shape_fn(sd.len(), |i| bs[i] as i32).into_dyn();
                let padding = Array2::<i32>::from_shape_fn((sd.len(), 2), |(d, locus)| {
                    if locus == 0 {
                        left_pad[d] as i32
                    } else {
                        block_size[d] - (sd[d] + left_pad[d]) as i32 % block_size[d]
                    }
                });
                (input.into(), block_size.into(), padding.into_dyn().into())
            },
        )
        .boxed()
}

proptest! {
    #[test]
    fn space_to_batch((ref i, ref bs, ref p) in space_to_batch_strat()) {
        let graph = tfpb::graph()
            .node(placeholder_f32("input"))
            .node(const_i32("block_shape", bs))
            .node(const_i32("paddings", p))
            .node(tfpb::node().name("op").op("SpaceToBatchND").input("input")
            .input("block_shape")
            .input("paddings")
            .attr("T", DtFloat)
            );
        let graph = graph.write_to_bytes().unwrap();
        let inputs = vec!(("input", i.clone()));
        compare(&graph, inputs, "op")?
    }
}

fn batch_to_space_strat() -> BoxedStrategy<(Tensor, Tensor, Tensor)> {
    use crate::tract_tensorflow::tract_hir::internal::EvalOp;
    space_to_batch_strat()
        .prop_map(|(i, bs, p)| {
            let batches: Tensor =
                tract_tensorflow::ops::nn::s2b::raw::SpaceToBatch::new(f32::datum_type())
                    .eval(tvec![i.into(), bs.clone().into(), p.clone().into()])
                    .unwrap()
                    .remove(0)
                    .into_tensor();
            (batches, bs, p)
        })
        .boxed()
}

proptest! {
    #[test]
    fn batch_to_space((ref b, ref bs, ref c) in batch_to_space_strat()) {
        let graph = tfpb::graph()
            .node(placeholder_f32("input"))
            .node(const_i32("block_shape", bs))
            .node(const_i32("crops", c))
            .node(tfpb::node().name("op").op("BatchToSpaceND").input("input")
            .input("block_shape")
            .input("crops")
            .attr("T", DtFloat)
            );
        let graph = graph.write_to_bytes().unwrap();
        let inputs = vec!(("input", b.clone()));
        compare(&graph, inputs, "op")?
    }
}

#[test]
fn space_to_batch_1() {
    let graph = tfpb::graph()
        .node(placeholder_f32("input"))
        .node(const_i32("block_shape", &Tensor::from(arr1(&[2i32, 2]))))
        .node(const_i32("paddings", &Tensor::from(arr2(&[[0i32, 0], [0, 0]]))))
        .node(
            tfpb::node()
                .name("op")
                .op("SpaceToBatchND")
                .input("input")
                .input("block_shape")
                .input("paddings")
                .attr("T", DtFloat),
        );
    let graph = graph.write_to_bytes().unwrap();
    let i = tensor4(&[[[[1.0f32], [2.0]], [[3.0], [4.0]]]]);
    let inputs = vec![("input", i)];
    compare(&graph, inputs, "op").unwrap()
}

#[test]
fn batch_to_space_1() {
    let graph = tfpb::graph()
        .node(placeholder_f32("input"))
        .node(const_i32("block_shape", &Tensor::from(arr1(&[2i32, 2]))))
        .node(const_i32("crops", &Tensor::from(arr2(&[[0i32, 0], [0, 0]]))))
        .node(
            tfpb::node()
                .name("op")
                .op("BatchToSpaceND")
                .input("input")
                .input("block_shape")
                .input("crops")
                .attr("T", DtFloat),
        );
    let graph = graph.write_to_bytes().unwrap();
    let i = tensor4(&[[[[1.0f32]]], [[[2.0]]], [[[3.0]]], [[[4.0]]]]);
    let inputs = vec![("input", i)];
    compare(&graph, inputs, "op").unwrap()
}


================================================
FILE: tensorflow/tests/ops_random_uniform.rs
================================================
#![cfg(feature = "conform")]
#![allow(non_snake_case)]
extern crate env_logger;
#[macro_use]
extern crate log;
#[macro_use]
extern crate proptest;
extern crate tract_tensorflow;

mod utils;

use crate::utils::*;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_tensorflow::conform::*;
use tract_tensorflow::prelude::*;
use tract_tensorflow::tfpb;
use tract_tensorflow::tfpb::tensorflow::DataType;

fn random_uniform_float(shape: &[i32], seed: (i32, i32)) -> proptest::test_runner::TestCaseResult {
    let graph = tfpb::graph().node(const_i32("shape", &tensor1(&*shape))).node(
        tfpb::node()
            .name("op")
            .op("RandomUniform")
            .input("shape")
            .attr("T", DataType::DtInt32)
            .attr("dtype", DataType::DtFloat)
            .attr("seed", seed.0)
            .attr("seed2", seed.1),
    );
    let graph = graph.write_to_bytes().unwrap();
    compare::<&'static str>(&graph, vec![], "op")
}

proptest! {
    #[test]
    fn proptest_random_uniform_float(shape in vec(1..5, 0..4), seed in ((1..4),(1..4))) {
        random_uniform_float(&*shape, seed)?
    }
}

#[test]
fn random_uniform_float_1() {
    random_uniform_float(&[], (1, 1)).unwrap();
}


================================================
FILE: tensorflow/tests/utils/mod.rs
================================================
use tract_tensorflow::prelude::*;

fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}

#[derive(Copy, Clone, PartialEq, Debug)]
pub enum Mode {
    Infer,
    Type,
    Declutter,
    Opt,
}

pub fn compare<S: AsRef<str>>(
    graph: &[u8],
    inputs: Vec<(S, Tensor)>,
    output: &str,
) -> std::result::Result<(), ::proptest::test_runner::TestCaseError> {
    setup_test_logger();
    for mode in &[Mode::Infer, Mode::Type, Mode::Declutter, Mode::Opt] {
        debug!("mode: {:?}", mode);
        compare_optim(graph, &inputs, output, *mode)?;
    }
    Ok(())
}

pub fn run_tract<S: AsRef<str>>(
    graph: &[u8],
    inputs: &Vec<(S, Tensor)>,
    output: &str,
    mode: Mode,
) -> TractResult<TVec<Arc<Tensor>>> {
    let mut model = tract_tensorflow::tensorflow().model_for_read(&mut &*graph)?;
    model.set_input_names(&inputs.iter().map(|pair| pair.0.as_ref()).collect::<Vec<&str>>())?;
    model.select_outputs_by_name(&[output])?;
    for (ix, (_, tf)) in inputs.iter().enumerate() {
        model.set_input_fact(ix, tf.datum_type().fact(tf.shape()).into())?;
    }
    debug!("analysed");
    let inputs = inputs.iter().map(|pair| pair.1.clone()).collect();
    if mode == Mode::Infer {
        let plan = SimplePlan::new(&model)?;
        plan.run(inputs)
    } else {
        let mut model = model.into_typed()?;
        debug!("typed");
        if mode == Mode::Declutter {
            model = model.declutter()?;
            debug!("decluttered");
        } else if mode == Mode::Opt {
            model = model.declutter()?.optimize()?;
            debug!("optimized");
        };
        trace!("{:#?}", model);
        let plan = SimplePlan::new(&model)?;
        plan.run(inputs)
    }
}

pub fn compare_optim<S: AsRef<str>>(
    graph: &[u8],
    inputs: &Vec<(S, Tensor)>,
    output: &str,
    mode: Mode,
) -> std::result::Result<(), ::proptest::test_runner::TestCaseError> {
    setup_test_logger();
    let tf_inputs: Vec<(&str, Tensor)> =
        inputs.iter().map(|(s, m)| (s.as_ref(), m.clone())).collect();
    let expected = tract_tensorflow::conform::tf::for_slice(&graph)
        .unwrap()
        .run(tf_inputs.clone(), &output)
        .unwrap();
    info!("Mode: {:?} starting", mode);
    info!("Tensorflow says: {:?}", expected);

    let found = run_tract(graph, inputs, output, mode).unwrap();

    if let Err(e) = expected[0].close_enough(&found[0], true) {
        error!("{:?} (mode: {:?})", e, mode);
        error!("Tensorflow says: {:?}", expected);
        error!("Tract says     : {:?}", found);
        Err(e).unwrap()
    } else {
        info!("Mode: {:?} passed", mode);
        Ok(())
    }
}

#[allow(dead_code)]
pub fn infer<S: AsRef<str>>(
    graph: &[u8],
    inputs: Vec<(S, Tensor)>,
    output_str: &str,
) -> std::result::Result<(), ::proptest::test_runner::TestCaseError> {
    setup_test_logger();
    let mut model = tract_tensorflow::tensorflow().model_for_read(&mut &*graph).unwrap();
    model
        .set_input_names(&inputs.iter().map(|pair| pair.0.as_ref()).collect::<Vec<&str>>())
        .unwrap();
    model.select_outputs_by_name(&[output_str]).unwrap();
    for (ix, (_, tf)) in inputs.iter().enumerate() {
        model.set_input_fact(ix, tf.datum_type().fact(tf.shape()).into())?;
    }
    let plan = SimplePlan::new(&model).unwrap();
    let mut state = SimpleState::new(&plan).unwrap();
    for (ix, (_, t)) in inputs.iter().enumerate() {
        state.set_input(ix, t.clone()).unwrap();
    }
    let output = model.node_by_name(output_str).unwrap();
    info!("Checking {} behaviour against tensorflow", output.name);
    state.compute_recursively(output.id).unwrap();
    let _found = &state.values[output.id].as_ref().unwrap();

    info!("Checking inference consistency on {}", output.name);
    let input_vectors: TVec<InferenceFact> = output
        .inputs
        .iter()
        .map(|outlet| {
            state.values[outlet.node].as_ref().unwrap()[outlet.slot]
                .clone()
                .into_tensor()
                .clone()
                .into()
        })
        .collect();
    let output_vectors: TVec<InferenceFact> =
        tvec![state.values[output.id].as_ref().unwrap()[0].clone().into_tensor().clone().into(),];

    let input_facts = input_vectors.iter().collect();
    let output_facts = output_vectors.iter().collect();

    let output = model.node_by_name_mut(output_str).unwrap();
    let e = output.op.infer_facts(input_facts, output_facts, tvec!());
    prop_assert!(e.is_ok(), "{:?}", e);

    Ok(())
}


================================================
FILE: test-rt/infra/Cargo.toml
================================================
[package]
name = "infra"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow.workspace = true
downcast-rs.workspace = true
dyn-clone.workspace = true
env_logger.workspace = true
itertools.workspace = true
lazy_static.workspace = true
tract-core.workspace = true

[target.'cfg(not(target_family = "wasm"))'.dependencies]
proptest.workspace = true

[target.'cfg(target_family = "wasm")'.dependencies]
# Wasm doesn't support the `fork` feature of proptest.
proptest = { version = "1.0.0", default-features = false, features = ["std", "bit-set"] }


================================================
FILE: test-rt/infra/src/lib.rs
================================================
#![allow(clippy::len_zero)]
use core::fmt;
use std::collections::HashMap;
use std::fmt::Debug;
use std::io::Write;

use downcast_rs::Downcast;
use dyn_clone::DynClone;
use itertools::Itertools;
use proptest::prelude::{Arbitrary, any_with};
use proptest::strategy::Strategy;
use proptest::test_runner::{Config, FileFailurePersistence, TestRunner};
use tract_core::internal::Approximation;
use tract_core::runtime::Runtime;
use tract_core::tract_data::TractResult;

pub fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}

pub type TestResult = anyhow::Result<()>;

pub trait Test: Downcast + 'static + Send + Sync + DynClone {
    fn run(&self, id: &'static str, runtime: &dyn Runtime) -> TestResult {
        self.run_with_approx(id, runtime, Approximation::Close)
    }
    fn run_with_approx(
        &self,
        id: &'static str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult;
}
downcast_rs::impl_downcast!(Test);
dyn_clone::clone_trait_object!(Test);

#[derive(Clone, Debug, Copy, PartialEq, Eq)]
pub enum TestStatus {
    OK,
    Ignored,
    Skipped,
}

#[derive(Clone)]
pub enum TestSuite {
    Node(HashMap<String, TestSuite>),
    Leaf(Box<dyn Test>, TestStatus),
}

impl Default for TestSuite {
    fn default() -> Self {
        setup_test_logger();
        TestSuite::Node(Default::default())
    }
}

impl<T: Test> From<T> for TestSuite {
    fn from(value: T) -> Self {
        TestSuite::Leaf(Box::new(value), TestStatus::OK)
    }
}

impl TestSuite {
    pub fn add(&mut self, id: impl ToString, test: impl Into<TestSuite>) {
        match self {
            TestSuite::Node(it) => {
                it.insert(id.to_string(), test.into());
            }
            TestSuite::Leaf(..) => panic!("Can not add test case to a leaf"),
        }
    }

    pub fn add_arbitrary<A: Arbitrary + Test + Clone>(
        &mut self,
        id: impl ToString,
        params: A::Parameters,
    ) where
        A::Parameters: Clone + Send + Sync + Debug,
    {
        self.add(id, ProptestWrapper::<A>(params, |_| true));
    }

    pub fn add_arbitrary_with_filter<A: Arbitrary + Test + Clone>(
        &mut self,
        id: impl ToString,
        params: A::Parameters,
        filter: fn(&A) -> bool,
    ) where
        A::Parameters: Clone + Send + Sync + Debug,
    {
        self.add(id, ProptestWrapper::<A>(params, filter));
    }

    pub fn with(mut self, id: impl ToString, test: impl Into<TestSuite>) -> Self {
        self.add(id, test);
        self
    }

    pub fn add_test(&mut self, id: impl ToString, test: impl Test) {
        self.add_test_with_status(id, test, TestStatus::OK)
    }

    pub fn add_test_with_status(&mut self, id: impl ToString, test: impl Test, status: TestStatus) {
        match self {
            TestSuite::Node(it) => {
                it.insert(id.to_string(), TestSuite::Leaf(Box::new(test), status));
            }
            TestSuite::Leaf(..) => panic!("Can not add test case to a leaf"),
        }
    }

    pub fn get(&self, id: &str) -> &dyn Test {
        match self {
            TestSuite::Node(n) => {
                if let Some((head, tail)) = id.split_once("::") {
                    n[head].get(tail)
                } else {
                    n[id].get("")
                }
            }
            TestSuite::Leaf(test, _) => &**test,
        }
    }

    pub fn get_sub(&self, id: &str) -> &TestSuite {
        match self {
            TestSuite::Node(n) => {
                if let Some((head, tail)) = id.split_once("::") {
                    n[head].get_sub(tail)
                } else {
                    n[id].get_sub("")
                }
            }
            TestSuite::Leaf(_, _) => panic!(),
        }
    }

    pub fn get_sub_mut(&mut self, id: &str) -> &mut TestSuite {
        match self {
            TestSuite::Node(n) => {
                if let Some((head, tail)) = id.split_once("::") {
                    n.get_mut(head).unwrap().get_sub_mut(tail)
                } else {
                    n.get_mut(id).unwrap()
                }
            }
            TestSuite::Leaf(_, _) => panic!(),
        }
    }

    fn ignore_rec(
        &mut self,
        prefix: &mut Vec<String>,
        ignore: &dyn Fn(&[String], &dyn Test) -> bool,
    ) {
        match self {
            TestSuite::Node(n) => {
                for (id, test) in n.iter_mut().sorted_by_key(|(k, _)| k.to_owned()) {
                    prefix.push(id.to_owned());
                    test.ignore_rec(prefix, ignore);
                    prefix.pop();
                }
            }
            TestSuite::Leaf(case, run) => {
                if *run == TestStatus::OK && ignore(prefix, &**case) {
                    *run = TestStatus::Ignored
                }
            }
        }
    }

    pub fn ignore(&mut self, ign: &dyn Fn(&[String]) -> bool) {
        self.ignore_rec(&mut vec![], &|name, _| ign(name))
    }

    pub fn ignore_case(&mut self, ign: &dyn Fn(&[String], &dyn Test) -> bool) {
        self.ignore_rec(&mut vec![], ign)
    }

    fn skip_rec(&mut self, prefix: &mut Vec<String>, ign: &dyn Fn(&[String]) -> bool) {
        match self {
            TestSuite::Node(n) => {
                for (id, test) in n.iter_mut().sorted_by_key(|(k, _)| k.to_owned()) {
                    prefix.push(id.to_owned());
                    test.skip_rec(prefix, ign);
                    prefix.pop();
                }
            }
            TestSuite::Leaf(_, run) => {
                if ign(&*prefix) {
                    *run = TestStatus::Skipped
                }
            }
        }
    }

    pub fn skip(&mut self, ign: &dyn Fn(&[String]) -> bool) {
        self.skip_rec(&mut vec![], ign)
    }

    #[allow(clippy::too_many_arguments)]
    fn dump(
        &self,
        test_suite: &str,
        runtime: &str,
        prefix: &str,
        id: &str,
        rs: &mut impl Write,
        approx: &str,
    ) -> TractResult<()> {
        let full_id = [prefix, id].into_iter().filter(|s| s.len() > 0).join("::");
        match self {
            TestSuite::Node(h) => {
                if id.len() > 0 {
                    writeln!(rs, "mod {id} {{").unwrap();
                    writeln!(rs, "#[allow(unused_imports)] use super::*;").unwrap();
                }
                for (id, test) in h.iter().sorted_by_key(|(k, _)| k.to_owned()) {
                    test.dump(test_suite, runtime, &full_id, id, rs, approx)?;
                }
                if id.len() > 0 {
                    writeln!(rs, "}}").unwrap();
                }
            }
            TestSuite::Leaf(_, status) => {
                if *status != TestStatus::Skipped {
                    writeln!(rs, "#[allow(non_snake_case)]").unwrap();
                    writeln!(rs, "#[test]").unwrap();
                    if *status == TestStatus::Ignored {
                        writeln!(rs, "#[ignore]").unwrap();
                    }
                    writeln!(rs, "fn {id}() -> TractResult<()> {{",).unwrap();
                    writeln!(rs, "   let id = concat!(module_path!(), \"::{id}\");").unwrap();
                    writeln!(
                        rs,
                        "    {test_suite}.get({full_id:?}).run_with_approx(id, {runtime}, {approx})",
                        )
                        .unwrap();
                    writeln!(rs, "}}").unwrap();
                }
            }
        }
        Ok(())
    }

    pub fn test_runtime(&self, name: &str, test_suite: &str, runtime: &str, approx: &str) {
        let out_dir = std::env::var("OUT_DIR").unwrap();
        let out_dir = std::path::PathBuf::from(out_dir);
        let test_dir = out_dir.join("tests");
        std::fs::create_dir_all(&test_dir).unwrap();
        let test_file = test_dir.join(name).with_extension("rs");
        let mut rs = std::fs::File::create(test_file).unwrap();
        self.dump(test_suite, runtime, "", "", &mut rs, approx).unwrap();
    }
}

#[derive(Clone)]
struct ProptestWrapper<A: Arbitrary + Test + Clone>(A::Parameters, fn(&A) -> bool)
where
    A::Parameters: Clone + Send + Sync + Debug;

impl<A: Arbitrary + Test + Clone + Send + Sync> Debug for ProptestWrapper<A>
where
    A::Parameters: Clone + Send + Sync + Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{:?}", self.0)
    }
}

impl<A: Arbitrary + Test + Clone> Test for ProptestWrapper<A>
where
    A::Parameters: Clone + Send + Sync + Debug,
{
    fn run_with_approx(
        &self,
        id: &'static str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        // let crate_name = std::env::var("CARGO_PKG_NAME").unwrap_or("".to_string());
        // let name = format!("{crate_name}::{suite}::{id}");
        let mut runner = TestRunner::new(Config {
            failure_persistence: Some(Box::new(FileFailurePersistence::Off)),
            test_name: Some(id),
            ..Config::default()
        });
        runner.run(
            &any_with::<A>(self.0.clone()).prop_filter("Test case filter", |a| self.1(a)),
            |v| {
                v.run_with_approx(id, runtime, approx).map_err(|e| {
                    proptest::test_runner::TestCaseError::Fail(format!("{e:?}").into())
                })
            },
        )?;
        Ok(())
    }
}


================================================
FILE: test-rt/suite-onnx/Cargo.toml
================================================
[package]
name = "suite-onnx"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow.workspace = true
bytes.workspace = true
env_logger.workspace = true
fs2.workspace = true
itertools.workspace = true
lazy_static.workspace = true
log.workspace = true
prost.workspace = true
regex.workspace = true 
infra = { path = "../infra" }
tract-core.workspace = true
tract-onnx.workspace = true
tract-hir.workspace = true

[features]
onnx_1_4_1 = []
onnx_1_5_0 = []
onnx_1_6_0 = []
onnx_1_7_0 = []
onnx_1_8_1 = []
onnx_1_9_0 = []
onnx_1_10_2 = []
onnx_1_11_0 = []
onnx_1_12_0 = []
onnx_1_13_0 = []
onnx_1_14_1 = []
onnx_1_15_0 = []
onnx_1_16_2 = []
onnx_1_17_0 = []
onnx_1_18_0 = []
onnx_1_19_1 = []
default = [ "onnx_1_13_0" ]


================================================
FILE: test-rt/suite-onnx/node.txt
================================================
# test_.*_expanded
test_abs
test_acos
test_acos_example
test_acosh
test_acosh_example
test_add.*
test_and.*
test_argmax_default_axis_example
test_argmax_default_axis_example_select_last_index
test_argmax_default_axis_random
test_argmax_default_axis_random_select_last_index
test_argmax_keepdims_example
test_argmax_keepdims_example_select_last_index
test_argmax_keepdims_random
test_argmax_keepdims_random_select_last_index
test_argmax_negative_axis_keepdims_example
test_argmax_negative_axis_keepdims_example_select_last_index
test_argmax_negative_axis_keepdims_random
test_argmax_negative_axis_keepdims_random_select_last_index
test_argmax_no_keepdims_example
test_argmax_no_keepdims_example_select_last_index
test_argmax_no_keepdims_random
test_argmax_no_keepdims_random_select_last_index
test_argmin_default_axis_example
test_argmin_default_axis_example_select_last_index
test_argmin_default_axis_random
test_argmin_default_axis_random_select_last_index
test_argmin_keepdims_example
test_argmin_keepdims_example_select_last_index
test_argmin_keepdims_random
test_argmin_keepdims_random_select_last_index
test_argmin_negative_axis_keepdims_example
test_argmin_negative_axis_keepdims_example_select_last_index
test_argmin_negative_axis_keepdims_random
test_argmin_negative_axis_keepdims_random_select_last_index
test_argmin_no_keepdims_example
test_argmin_no_keepdims_example_select_last_index
test_argmin_no_keepdims_random
test_argmin_no_keepdims_random_select_last_index
test_asin
test_asin_example
test_asinh
test_asinh_example
test_atan
test_atan_example
test_atanh
test_atanh_example
test_averagepool_1d_default
test_averagepool_2d_ceil not-nnef
test_averagepool_2d_default
test_averagepool_2d_pads
test_averagepool_2d_pads_count_include_pad not-nnef
test_averagepool_2d_precomputed_pads
test_averagepool_2d_precomputed_pads_count_include_pad not-nnef
test_averagepool_2d_precomputed_same_upper
test_averagepool_2d_precomputed_strides
test_averagepool_2d_same_lower not-nnef
test_averagepool_2d_same_upper
test_averagepool_2d_strides
test_averagepool_3d_default
test_basic_convinteger                                                              input:x 
test_basic_conv_without_padding input:x
test_basic_conv_with_padding input:x
test_batchnorm_epsilon input:x
test_batchnorm_example input:x
test_bitshift.*
test_bitwise.*
test_blackmanwindow_expanded input:
test_blackmanwindow input:
test_blackmanwindow_symmetric_expanded input:
test_blackmanwindow_symmetric input:
test_cast_DOUBLE_to_FLOAT
test_cast_DOUBLE_to_FLOAT16
test_cast_FLOAT16_to_DOUBLE
test_cast_FLOAT16_to_FLOAT
test_cast_FLOAT_to_DOUBLE
test_cast_FLOAT_to_FLOAT16
# test_cast_FLOAT_to_STRING https://github.com/onnx/onnx/pull/1776 not-nnef
test_cast_FLOAT_to_STRING since:10
test_castlike_DOUBLE_to_FLOAT
test_castlike_DOUBLE_to_FLOAT16
test_castlike_DOUBLE_to_FLOAT16_expanded
test_castlike_DOUBLE_to_FLOAT_expanded
test_castlike_FLOAT16_to_DOUBLE
test_castlike_FLOAT16_to_DOUBLE_expanded
test_castlike_FLOAT16_to_FLOAT
test_castlike_FLOAT16_to_FLOAT_expanded
test_castlike_FLOAT_to_DOUBLE
test_castlike_FLOAT_to_DOUBLE_expanded
test_castlike_FLOAT_to_FLOAT16
test_castlike_FLOAT_to_FLOAT16_expanded
test_castlike_FLOAT_to_STRING
test_castlike_FLOAT_to_STRING_expanded
test_castlike_STRING_to_FLOAT
test_castlike_STRING_to_FLOAT_expanded not-nnef
test_cast_STRING_to_FLOAT not-nnef since:10
test_ceil
test_ceil_example
test_celu
test_celu_expanded
test_clip.*
test_concat.*
test_constant
test_constant_pad input:x
test_constant_pad_axes input:x
test_constantlike_ones_with_input not-nnef
test_constantlike_threes_with_shape_and_dtype not-nnef
test_constantlike_zeros_without_input_dtype not-nnef
test_convinteger_with_padding                                                       input:x 
test_convtranspose_1d input:X
test_convtranspose_3d input:X
test_convtranspose_dilations input:X
test_convtranspose input:X
test_convtranspose_kernel_shape input:X
test_convtranspose_output_shape input:X
test_convtranspose_pad input:X
test_convtranspose_pads input:X
test_convtranspose_with_kernel input:x
test_conv_with_strides_and_asymmetric_padding input:x
test_conv_with_strides_no_padding input:x
test_conv_with_strides_padding input:x
test_cos
test_cos_example
test_cosh
test_cosh_example
test_cumsum_1d_exclusive not-nnef input:x since:13
test_cumsum_1d input:x since:13
test_cumsum_1d_reverse_exclusive not-nnef input:x since:13
test_cumsum_1d_reverse input:x since:13
test_cumsum_2d_axis_0 input:x since:13
test_cumsum_2d_axis_1 input:x since:13
test_cumsum_2d_negative_axis input:x since:13
test_cumsum_2d not-nnef input:x since:13
test_depthtospace.*
test_dequantizelinear                                                               input:x not-nnef
# test_dft
# test_dft_axis
# test_dft_inverse
test_div.*
test_dropout_default
test_dropout_default_old
test_dropout_random not-nnef
test_dropout_random_old
test_dynamicquantizelinear_max_adjusted  not-nnef
test_dynamicquantizelinear_min_adjusted  not-nnef
test_dynamicquantizelinear  not-nnef
test_edge_pad input:x
test_einsum.*
test_elu
test_elu_default
test_elu_default_expanded_ver18
test_elu_example
test_elu_example_expanded_ver18
test_elu_expanded_ver18
test_equal.*
test_erf
test_exp
test_expand_dim_changed input:data
test_expand_dim_unchanged input:data
test_exp_example
test_eyelike_populate_off_main_diagonal
test_eyelike_with_dtype
test_eyelike_without_dtype
test_flatten_axis0
test_flatten_axis1
test_flatten_axis2
test_flatten_axis3
test_flatten_default_axis
test_flatten_negative_axis1
test_flatten_negative_axis2
test_flatten_negative_axis3
test_flatten_negative_axis4
test_floor
test_floor_example
test_gather_0
test_gather_1
test_gather_2d_indices
test_gather_elements_0
test_gather_elements_1
test_gather_elements_negative_indices
test_gathernd_example_float32
test_gathernd_example_int32
test_gathernd_example_int32_batch_dim1
test_gather_negative_indices
test_gemm_.*
test_globalaveragepool
test_globalaveragepool_precomputed
test_globalmaxpool
test_globalmaxpool_precomputed
test_gridsample.*                                                              since:16
test_greater.*
test_gru_batchwise
test_gru_defaults
test_gru_seq_length
test_gru_with_initial_bias
test_if
test_hammingwindow_expanded input:
test_hammingwindow input:
test_hammingwindow_symmetric_expanded input:
test_hammingwindow_symmetric input:
test_hannwindow_expanded input:
test_hannwindow input:
test_hannwindow_symmetric_expanded input:
test_hannwindow_symmetric input:
test_hardmax_axis_0
test_hardmax_axis_1
test_hardmax_axis_2
test_hardmax_default_axis
test_hardmax_example
test_hardmax_negative_axis
test_hardmax_one_hot
test_hardsigmoid
test_hardsigmoid_default
test_hardsigmoid_default_expanded_ver18
test_hardsigmoid_example
test_hardsigmoid_example_expanded_ver18
test_hardsigmoid_expanded_ver18
test_hardswish
test_hardswish_expanded
test_identity
test_instancenorm_example
test_isinf.*
test_isnan
test_layer_normalization.*
test_leakyrelu
test_leakyrelu_default
test_leakyrelu_default_expanded
test_leakyrelu_example
test_leakyrelu_example_expanded
test_leakyrelu_expanded
test_less.*
test_log
test_log_example
test_logsoftmax.*
test_lrn
test_lrn_default
test_lstm_batchwise
test_lstm_defaults
test_lstm_with_initial_bias
test_lstm_with_peepholes input:X
test_matmul_2d
test_matmul_3d
test_matmul_4d
test_matmulinteger                                                               
test_max_.*
test_maxpool_1d_default
test_maxpool_2d_ceil not-nnef
test_maxpool_2d_default
test_maxpool_2d_pads
test_maxpool_2d_precomputed_pads
test_maxpool_2d_precomputed_same_upper
test_maxpool_2d_precomputed_strides
test_maxpool_2d_same_lower not-nnef
test_maxpool_2d_same_upper
test_maxpool_2d_strides
test_maxpool_2d_uint8
test_maxpool_3d_default
test_maxpool_with_argmax_2d_precomputed_pads not-nnef
test_mean_example
test_mean_one_input
test_mean_two_inputs
test_melweightmatrix input:
test_min.*
test_mish_expanded
test_mod_broadcast not-nnef
test_mod_int64_fmod not-nnef
test_mod_mixed_sign_float16 not-nnef
test_mod_mixed_sign_float32 not-nnef
test_mod_mixed_sign_float64 not-nnef
test_mod_mixed_sign_int16 not-nnef
test_mod_mixed_sign_int32 not-nnef
test_mod_mixed_sign_int64 not-nnef
test_mod_mixed_sign_int8 not-nnef
test_mod_uint16 not-nnef
test_mod_uint32 not-nnef
test_mod_uint64 not-nnef
test_mod_uint8 not-nnef
test_mul.*
test_mvn_expanded
test_neg
test_negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1_expanded
test_negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded
test_negative_log_likelihood_loss_input_shape_is_NC_expanded
test_neg_example
test_nllloss_NCd1d2d3d4d5_mean_weight_expanded
test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded input:input
test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded input:input not-nnef
test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded
test_nllloss_NCd1d2_expanded input:input
test_nllloss_NCd1d2_no_weight_reduction_mean_ii_expanded
test_nllloss_NCd1d2_reduction_mean_expanded
test_nllloss_NCd1d2_reduction_sum_expanded
test_nllloss_NCd1d2_with_weight_expanded input:input
test_nllloss_NCd1d2_with_weight_reduction_mean_expanded
test_nllloss_NCd1d2_with_weight_reduction_sum_expanded
test_nllloss_NCd1d2_with_weight_reduction_sum_ii_expanded
test_nllloss_NCd1_expanded
test_nllloss_NCd1_ii_expanded
test_nllloss_NCd1_mean_weight_negative_ii_expanded
test_nllloss_NCd1_weight_expanded
test_nllloss_NCd1_weight_ii_expanded
test_nllloss_NC_expanded input:input
test_nonmaxsuppression_center_point_box_format onnx-ignore-output-shape
test_nonmaxsuppression_flipped_coordinates onnx-ignore-output-shape
test_nonmaxsuppression_identical_boxes onnx-ignore-output-shape
test_nonmaxsuppression_limit_output_size onnx-ignore-output-shape
test_nonmaxsuppression_single_box onnx-ignore-output-shape
test_nonmaxsuppression_suppress_by_IOU_and_scores onnx-ignore-output-shape
test_nonmaxsuppression_suppress_by_IOU onnx-ignore-output-shape
test_nonmaxsuppression_two_batches onnx-ignore-output-shape
test_nonmaxsuppression_two_classes onnx-ignore-output-shape
test_nonzero_example not-nnef
test_not_2d
test_not_3d
test_not_4d
test_onehot_negative_indices input:indices
test_onehot_with_axis input:indices
test_onehot_with_negative_axis input:indices
test_onehot_without_axis input:indices
test_or2d
test_or3d
test_or4d
test_or_bcast3v1d
test_or_bcast3v2d
test_or_bcast4v2d
test_or_bcast4v3d
test_or_bcast4v4d
test_pow
test_pow_bcast_array
test_pow_bcast_scalar
test_pow_example
test_pow_types_float
test_pow_types_float32_int32
test_pow_types_float32_int64
test_pow_types_float32_uint32
test_pow_types_float32_uint64
test_pow_types_int
test_pow_types_int32_float32
test_pow_types_int32_int32
test_pow_types_int64_float32
test_pow_types_int64_int64
test_prelu_broadcast
test_prelu_broadcast_expanded
test_prelu_example
test_prelu_example_expanded
test_qlinearmatmul_2D                                                                
test_qlinearmatmul_3D                                                                
test_quantizelinear                                                                 input:x not-nnef
test_range_float_type_positive_delta
test_range_int32_type_negative_delta
test_reciprocal
test_reciprocal_example
test_reduce_l1_default_axes_keepdims_example_expanded input:data
test_reduce_l1_default_axes_keepdims_example input:data
test_reduce_l1_default_axes_keepdims_random_expanded input:data
test_reduce_l1_default_axes_keepdims_random input:data
test_reduce_l1_do_not_keepdims_example_expanded input:data
test_reduce_l1_do_not_keepdims_example input:data
test_reduce_l1_do_not_keepdims_random_expanded input:data
test_reduce_l1_do_not_keepdims_random input:data
test_reduce_l1_keep_dims_example_expanded input:data
test_reduce_l1_keep_dims_example input:data
test_reduce_l1_keep_dims_random_expanded input:data
test_reduce_l1_keep_dims_random input:data
test_reduce_l1_negative_axes_keep_dims_example_expanded input:data
test_reduce_l1_negative_axes_keep_dims_example input:data
test_reduce_l1_negative_axes_keep_dims_random_expanded input:data
test_reduce_l1_negative_axes_keep_dims_random input:data
test_reduce_l2_default_axes_keepdims_example_expanded input:data
test_reduce_l2_default_axes_keepdims_example input:data
test_reduce_l2_default_axes_keepdims_random_expanded input:data
test_reduce_l2_default_axes_keepdims_random input:data
test_reduce_l2_do_not_keepdims_example_expanded input:data
test_reduce_l2_do_not_keepdims_example input:data
test_reduce_l2_do_not_keepdims_random_expanded input:data
test_reduce_l2_do_not_keepdims_random input:data
test_reduce_l2_keep_dims_example_expanded input:data
test_reduce_l2_keep_dims_example input:data
test_reduce_l2_keep_dims_random_expanded input:data
test_reduce_l2_keep_dims_random input:data
test_reduce_l2_negative_axes_keep_dims_example_expanded input:data
test_reduce_l2_negative_axes_keep_dims_example input:data
test_reduce_l2_negative_axes_keep_dims_random_expanded input:data
test_reduce_l2_negative_axes_keep_dims_random input:data
test_reduce_log_sum_asc_axes_expanded input:data
test_reduce_log_sum_asc_axes input:data
test_reduce_log_sum_default_expanded input:data
test_reduce_log_sum_default input:data
test_reduce_log_sum_desc_axes_expanded input:data
test_reduce_log_sum_desc_axes input:data
test_reduce_log_sum_exp_default_axes_keepdims_example_expanded input:data
test_reduce_log_sum_exp_default_axes_keepdims_example input:data
test_reduce_log_sum_exp_default_axes_keepdims_random_expanded input:data
test_reduce_log_sum_exp_default_axes_keepdims_random input:data
test_reduce_log_sum_exp_do_not_keepdims_example_expanded input:data
test_reduce_log_sum_exp_do_not_keepdims_example input:data
test_reduce_log_sum_exp_do_not_keepdims_random_expanded input:data
test_reduce_log_sum_exp_do_not_keepdims_random input:data
test_reduce_log_sum_exp_keepdims_example_expanded input:data
test_reduce_log_sum_exp_keepdims_example input:data
test_reduce_log_sum_exp_keepdims_random_expanded input:data
test_reduce_log_sum_exp_keepdims_random input:data
test_reduce_log_sum_exp_negative_axes_keepdims_example_expanded input:data
test_reduce_log_sum_exp_negative_axes_keepdims_example input:data
test_reduce_log_sum_exp_negative_axes_keepdims_random_expanded input:data
test_reduce_log_sum_exp_negative_axes_keepdims_random input:data
test_reduce_log_sum input:data
test_reduce_log_sum_negative_axes_expanded input:data
test_reduce_log_sum_negative_axes input:data
test_reduce_max_default_axes_keepdim_example input:data
test_reduce_max_default_axes_keepdims_random input:data
test_reduce_max_do_not_keepdims_example input:data
test_reduce_max_do_not_keepdims_random input:data
test_reduce_max_keepdims_example input:data
test_reduce_max_keepdims_random input:data
test_reduce_max_negative_axes_keepdims_example input:data
test_reduce_max_negative_axes_keepdims_random input:data
test_reduce_mean_default_axes_keepdims_example input:data
test_reduce_mean_default_axes_keepdims_random input:data
test_reduce_mean_do_not_keepdims_example input:data
test_reduce_mean_do_not_keepdims_random input:data
test_reduce_mean_keepdims_example input:data
test_reduce_mean_keepdims_random input:data
test_reduce_mean_negative_axes_keepdims_example input:data
test_reduce_mean_negative_axes_keepdims_random input:data
test_reduce_min_default_axes_keepdims_example input:data
test_reduce_min_default_axes_keepdims_random input:data
test_reduce_min_do_not_keepdims_example input:data
test_reduce_min_do_not_keepdims_random input:data
test_reduce_min_keepdims_example input:data
test_reduce_min_keepdims_random input:data
test_reduce_min_negative_axes_keepdims_example input:data
test_reduce_min_negative_axes_keepdims_random input:data
test_reduce_prod_default_axes_keepdims_example input:data
test_reduce_prod_default_axes_keepdims_random input:data
test_reduce_prod_do_not_keepdims_example input:data
test_reduce_prod_do_not_keepdims_random input:data
test_reduce_prod_keepdims_example input:data
test_reduce_prod_keepdims_random input:data
test_reduce_prod_negative_axes_keepdims_example input:data
test_reduce_prod_negative_axes_keepdims_random input:data
test_reduce_sum_default_axes_keepdims_example input:data
test_reduce_sum_default_axes_keepdims_random input:data
test_reduce_sum_do_not_keepdims_example input:data
test_reduce_sum_do_not_keepdims_random input:data
test_reduce_sum_empty_axes_input_noop_example input:data
test_reduce_sum_empty_axes_input_noop_random input:data
test_reduce_sum_keepdims_example input:data
test_reduce_sum_keepdims_random input:data
test_reduce_sum_negative_axes_keepdims_example input:data
test_reduce_sum_negative_axes_keepdims_random input:data
test_reduce_sum_square_default_axes_keepdims_example_expanded input:data
test_reduce_sum_square_default_axes_keepdims_example input:data
test_reduce_sum_square_default_axes_keepdims_random_expanded input:data
test_reduce_sum_square_default_axes_keepdims_random input:data
test_reduce_sum_square_do_not_keepdims_example_expanded input:data
test_reduce_sum_square_do_not_keepdims_example input:data
test_reduce_sum_square_do_not_keepdims_random_expanded input:data
test_reduce_sum_square_do_not_keepdims_random input:data
test_reduce_sum_square_keepdims_example_expanded input:data
test_reduce_sum_square_keepdims_example input:data
test_reduce_sum_square_keepdims_random_expanded input:data
test_reduce_sum_square_keepdims_random input:data
test_reduce_sum_square_negative_axes_keepdims_example_expanded input:data
test_reduce_sum_square_negative_axes_keepdims_example input:data
test_reduce_sum_square_negative_axes_keepdims_random_expanded input:data
test_reduce_sum_square_negative_axes_keepdims_random input:data
test_reflect_pad input:x
test_relu
test_relu_expanded_ver18
test_reshape_extended_dims input:data
test_reshape_negative_dim input:data
test_reshape_negative_extended_dims input:data
test_reshape_one_dim input:data
test_reshape_reduced_dims input:data
test_reshape_reordered_all_dims input:data
test_reshape_reordered_dims                                                         input:data not-nnef
test_reshape_reordered_last_dims input:data
test_reshape_zero_and_negative_dim input:data
test_reshape_zero_dim input:data
test_resize_downsample_scales_linear                                                input:X
test_resize_upsample_scales_linear_align_corners                                    input:X not-nnef
test_rnn_seq_length
test_round
test_scan9_sum
test_scatter_elements_with_axis
test_scatter_elements_with_negative_indices
test_scatter_elements_without_axis
test_scatter_elements_with_reduction_max
test_scatter_elements_with_reduction_min
test_scatternd
test_scatternd_add
test_scatternd_max
test_scatternd_min
test_scatternd_multiply
test_scatter_with_axis
test_scatter_without_axis
test_selu
test_selu_default
test_selu_default_expanded_ver18
test_selu_example
test_selu_example_expanded_ver18
test_selu_expanded_ver18
test_shape_clip_end onnx-ignore-output-type
test_shape_clip_start onnx-ignore-output-type
test_shape_end_1 onnx-ignore-output-type
test_shape_end_negative_1 onnx-ignore-output-type
test_shape_example onnx-ignore-output-type
test_shape onnx-ignore-output-type
test_shape_start_1_end_2 onnx-ignore-output-type
test_shape_start_1_end_negative_1 onnx-ignore-output-type
test_shape_start_1 onnx-ignore-output-type
test_shape_start_negative_1 onnx-ignore-output-type
test_shrink_hard
test_shrink_hard_expanded_ver18
test_shrink_soft
test_shrink_soft_expanded_ver18
test_sigmoid
test_sigmoid_example
test_sign
test_simple_rnn_batchwise
test_simple_rnn_defaults
test_simple_rnn_with_initial_bias
test_sin
test_sin_example
test_sinh
test_sinh_example
test_size_example onnx-ignore-output-type
test_size onnx-ignore-output-type
test_slice_default_axes input:x
test_slice_default_steps input:x
test_slice_end_out_of_bounds input:x
test_slice input:x
test_slice_neg input:x
test_slice_neg_steps input:x
test_slice_start_out_of_bounds input:x
test_softmax_axis_0
test_softmax_axis_0_expanded
test_softmax_axis_0_expanded_ver18
test_softmax_axis_1
test_softmax_axis_1_expanded
test_softmax_axis_1_expanded_ver18
test_softmax_axis_2
test_softmax_axis_2_expanded
test_softmax_axis_2_expanded_ver18
test_softmax_default_axis
test_softmax_default_axis_expanded
test_softmax_default_axis_expanded_ver18
test_softmax_example
test_softmax_example_expanded
test_softmax_example_expanded_ver18
test_softmax_large_number
test_softmax_large_number_expanded
test_softmax_large_number_expanded_ver18
test_softmax_negative_axis
test_softmax_negative_axis_expanded
test_softmax_negative_axis_expanded_ver18
test_softplus.*
test_softsign
test_softsign_example
test_softsign_example_expanded_ver18
test_softsign_expanded_ver18
test_spacetodepth
test_spacetodepth_example
test_split_1d_uneven_split_opset18
test_split_2d_uneven_split_opset18
test_split_equal_parts_1d
test_split_equal_parts_1d_opset13
test_split_equal_parts_1d_opset18
test_split_equal_parts_2d
test_split_equal_parts_2d_opset13
test_split_equal_parts_default_axis
test_split_equal_parts_default_axis_opset13
test_split_equal_parts_default_axis_opset18
test_split_variable_parts_1d input:input
test_split_variable_parts_1d_opset13 input:input
test_split_variable_parts_1d_opset18 input:input
test_split_variable_parts_2d input:input
test_split_variable_parts_2d_opset13 input:input
test_split_variable_parts_2d_opset18 input:input
test_split_variable_parts_default_axis input:input
test_split_variable_parts_default_axis_opset13 input:input
test_split_variable_parts_default_axis_opset18 input:input
test_split_zero_size_splits input:input
test_sqrt
test_sqrt_example
test_squeeze input:x
test_squeeze_negative_axes input:x
test_stft input:signal
test_stft_with_window input:signal
test_sub.*
test_sum.*
test_tan
test_tan_example
test_tanh
test_tanh_example
test_thresholdedrelu
test_thresholdedrelu_default
test_thresholdedrelu_default_expanded_ver18
test_thresholdedrelu_example
test_thresholdedrelu_example_expanded_ver18
test_thresholdedrelu_expanded_ver18
test_tile input:x
test_tile_precomputed input:x
test_top_k_negative_axis
test_top_k since:10
test_top_k_smallest
test_transpose_all_permutations_0
test_transpose_all_permutations_1
test_transpose_all_permutations_2
test_transpose_all_permutations_3
test_transpose_all_permutations_4
test_transpose_all_permutations_5
test_transpose_default
test_tril
test_tril_neg
test_tril_one_row_neg
test_tril_out_neg
test_tril_out_pos
test_tril_pos
test_tril_square
test_tril_square_neg
test_tril_zero
test_triu
test_triu_neg
test_triu_one_row
test_triu_out_neg_out
test_triu_out_pos
test_triu_pos
test_triu_square
test_triu_square_neg
test_triu_zero
test_unsqueeze_axis_0 input:x
test_unsqueeze_axis_1 input:x
test_unsqueeze_axis_2 input:x
test_unsqueeze_axis_3
test_unsqueeze_negative_axes input:x
test_unsqueeze not-nnef
test_unsqueeze_three_axes input:x
test_unsqueeze_two_axes input:x
test_unsqueeze_unsorted_axes input:x
test_where_example
test_where_long_example
test_xor2d
test_xor3d
test_xor4d
test_xor_bcast3v1d
test_xor_bcast3v2d
test_xor_bcast4v2d
test_xor_bcast4v3d
test_xor_bcast4v4d


================================================
FILE: test-rt/suite-onnx/pytorch-converted.txt
================================================
test_AvgPool1d
test_AvgPool1d_stride
test_AvgPool2d
test_AvgPool2d_stride
test_AvgPool3d
test_AvgPool3d_stride
test_AvgPool3d_stride1_pad0_gpu_input
test_BatchNorm1d_3d_input_eval
test_BatchNorm2d_eval
test_BatchNorm2d_momentum_eval
test_BatchNorm3d_eval
test_BatchNorm3d_momentum_eval
test_ConstantPad2d
test_Conv1d
test_Conv1d_dilated
test_Conv1d_groups
test_Conv1d_pad1
test_Conv1d_pad1size1
test_Conv1d_pad2
test_Conv1d_pad2size1
test_Conv1d_stride
test_Conv2d
test_Conv2d_depthwise
test_Conv2d_depthwise_padded
test_Conv2d_depthwise_strided
test_Conv2d_depthwise_with_multiplier
test_Conv2d_dilated
test_Conv2d_groups
test_Conv2d_groups_thnn
test_Conv2d_no_bias
test_Conv2d_padding
test_Conv2d_strided
test_Conv3d
test_Conv3d_dilated
test_Conv3d_dilated_strided
test_Conv3d_groups
test_Conv3d_no_bias
test_Conv3d_stride
test_Conv3d_stride_padding
test_ConvTranspose2d
test_ConvTranspose2d_no_bias
test_ELU
test_Embedding
test_Embedding_sparse
test_GLU
test_GLU_dim
test_LeakyReLU
test_LeakyReLU_with_negval
test_Linear
test_Linear_no_bias
test_LogSoftmax
test_MaxPool1d
test_MaxPool1d_stride
test_MaxPool2d
test_MaxPool3d
test_MaxPool3d_stride
test_MaxPool3d_stride_padding
test_PReLU_1d
test_PReLU_2d
test_PReLU_3d
test_PixelShuffle
test_PoissonNLLLLoss_no_reduce
test_ReLU
test_ReflectionPad2d
test_ReplicationPad2d
test_SELU
test_Sigmoid
test_Softmax
test_Softmin
test_Softplus
test_Softsign
test_Tanh
test_ZeroPad2d
test_log_softmax_dim3
test_log_softmax_lastdim
test_softmax_functional_dim3
test_softmax_lastdim

================================================
FILE: test-rt/suite-onnx/pytorch-operator.txt
================================================
test_operator_add_broadcast
test_operator_add_size1_broadcast
test_operator_add_size1_right_broadcast
test_operator_add_size1_singleton_broadcast
test_operator_addconstant
test_operator_addmm
test_operator_basic
test_operator_chunk
test_operator_clip
test_operator_concat2
test_operator_conv
test_operator_convtranspose
test_operator_exp
test_operator_flatten
test_operator_index
test_operator_max
test_operator_maxpool
test_operator_min
test_operator_mm
test_operator_non_float_params
test_operator_pad
test_operator_params
test_operator_permute2
test_operator_pow
test_operator_reduced_mean
test_operator_reduced_mean_keepdim
test_operator_reduced_sum
test_operator_reduced_sum_keepdim
test_operator_repeat
test_operator_repeat_dim_overflow
test_operator_selu
test_operator_sqrt
test_operator_symbolic_override_nested
test_operator_view

================================================
FILE: test-rt/suite-onnx/simple.txt
================================================
# test_shrink example shape not consistent with network not-nnef
test_expand_shape_model1 input:X
test_expand_shape_model2 input:X
test_expand_shape_model3 input:X
test_expand_shape_model4 input:X
test_shrink since:10
test_sign_model
test_single_relu_model


================================================
FILE: test-rt/suite-onnx/src/lib.rs
================================================
use log::*;
use prost::Message;
use regex::Regex;
use std::path::PathBuf;
use tract_hir::internal::*;
use tract_onnx::data_resolver::FopenDataResolver;
use tract_onnx::pb::TensorProto;
use tract_onnx::tensor::load_tensor;

use infra::{Test, TestStatus, TestSuite};

pub fn suite() -> &'static TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: TestSuite = full();
    };
    &SUITE
}

const MANIFEST_NODE: &str = include_str!("../node.txt");
const MANIFEST_SIMPLE: &str = include_str!("../simple.txt");
const MANIFEST_PYTORCH_CONVERTED: &str = include_str!("../pytorch-converted.txt");
const MANIFEST_PYTORCH_OPERATOR: &str = include_str!("../pytorch-operator.txt");

#[derive(Clone, Debug)]
struct OnnxTestCase {
    path: PathBuf,
    ignore_output_shapes: bool,
    ignore_output_type: bool,
    input: Option<String>,
}

impl Test for OnnxTestCase {
    fn run_with_approx(
        &self,
        _id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TractResult<()> {
        setup_test_logger();
        let model_file = self.path.join("model.onnx");
        info!("Loading {model_file:?}");
        let mut onnx = tract_onnx::onnx();

        // hack: some tests (test_nonmaxsuppression_*) include the output shapes in the onnx model
        // even though there should be no way of knowing them at optimization time. This breaks
        // the solver.
        if self.ignore_output_shapes {
            onnx = onnx.with_ignore_output_shapes(true);
        }
        // in some other cases, we need to deal with a tdim vs i64 mismatch (test for Shape, and Size)
        if self.ignore_output_type {
            onnx = onnx.with_ignore_output_types(true);
        }

        trace!("Proto Model:\n{:#?}", onnx.proto_model_for_path(&model_file));
        for d in std::fs::read_dir(&self.path)? {
            let mut model = onnx.model_for_path(&model_file)?;
            let d = d?;
            if d.metadata().unwrap().is_dir()
                && d.file_name().to_str().unwrap().starts_with("test_data_set_")
            {
                let data_path = d.path();
                let mut inputs = load_half_dataset("input", &data_path);
                if let Some(input) = &self.input {
                    let mut actual_inputs = vec![];
                    let mut actual_input_values = tvec![];
                    let input_outlets = model.input_outlets()?.to_vec();
                    for (ix, outlet) in input_outlets.iter().enumerate() {
                        if &model.node(outlet.node).name == input {
                            actual_inputs.push(*outlet);
                            actual_input_values.push(inputs[ix].clone());
                        } else {
                            model.node_mut(outlet.node).op =
                                Box::new(tract_hir::ops::konst::Const::new(
                                    inputs[ix].clone().into_arc_tensor(),
                                )?);
                        }
                    }
                    model.set_input_outlets(&actual_inputs)?;
                    inputs = actual_input_values;
                }
                info!("Analyse");
                trace!("Model:\n{model:#?}");
                model.analyse(false)?;
                model = model.incorporate()?;
                let model = model.into_typed()?.into_decluttered()?;
                info!("Test model (mode: {}) {:#?}", runtime.name(), self.path);
                let runnable = runtime.prepare(model)?;
                run_model(&*runnable, inputs, &data_path, approx)?;
                info!("Test model (mode: {}) {:#?} OK.", runtime.name(), self.path);
            }
        }
        Ok(())
    }
}

fn versions() -> Vec<(&'static str, usize)> {
    let mut versions = vec![];
    if cfg!(feature = "onnx_1_4_1") {
        versions.push(("1.4.1", 9));
    }
    if cfg!(feature = "onnx_1_5_0") {
        versions.push(("1.5.0", 10));
    }
    if cfg!(feature = "onnx_1_6_0") {
        versions.push(("1.6.0", 11));
    }
    if cfg!(feature = "onnx_1_7_0") {
        versions.push(("1.7.0", 12));
    }
    if cfg!(feature = "onnx_1_8_1") {
        versions.push(("1.8.1", 13));
    }
    if cfg!(feature = "onnx_1_9_0") {
        versions.push(("1.9.0", 14));
    }
    if cfg!(feature = "onnx_1_10_2") {
        versions.push(("1.10.2", 15));
    }
    if cfg!(feature = "onnx_1_11_0") {
        versions.push(("1.11.0", 16));
    }
    if cfg!(feature = "onnx_1_12_0") {
        versions.push(("1.12.0", 17));
    }
    if cfg!(feature = "onnx_1_13_0") {
        versions.push(("1.13.0", 18));
    }
    if cfg!(feature = "onnx_1_14_1") {
        versions.push(("1.14.1", 19));
    }
    if cfg!(feature = "onnx_1_15_0") {
        versions.push(("1.15.0", 20));
    }
    if cfg!(feature = "onnx_1_16_2") {
        versions.push(("1.16.2", 21));
    }
    if cfg!(feature = "onnx_1_17_0") {
        versions.push(("1.17.0", 22));
    }
    if cfg!(feature = "onnx_1_18_0") {
        versions.push(("1.18.0", 23));
    }
    if cfg!(feature = "onnx_1_19_1") {
        versions.push(("1.19.1", 24));
    }
    versions
}

pub fn dir() -> PathBuf {
    let cache = ::std::env::var("CACHEDIR").unwrap_or_else(|_| "../../.cached".to_string());
    std::fs::create_dir_all(&cache).unwrap();
    PathBuf::from(cache).join("onnx")
}

pub fn ensure_onnx_git_checkout() {
    use std::sync::Once;
    static START: Once = Once::new();
    START.call_once(|| {
        use fs2::FileExt;
        std::fs::create_dir_all(dir()).unwrap();
        let lockpath = dir().join(".lock");
        let lockfile = std::fs::File::create(lockpath).unwrap();
        lockfile.lock_exclusive().unwrap();
        for (v, _) in versions() {
            let wanted = dir().join(format!("onnx-{}", v.replace('.', "_")));
            if !wanted.join("onnx/backend/test/data").exists() {
                //let df = std::process::Command::new("df").arg("-h").output().unwrap();
                //dbg!(df);
                let tmp = wanted.with_extension("tmp");
                let _ = std::fs::remove_dir_all(&wanted);
                let _ = std::fs::remove_dir_all(&tmp);
                let run = std::process::Command::new("git")
                    .args(["clone", "--depth=1", "--branch"])
                    .arg(format!("v{v}"))
                    .arg("https://github.com/onnx/onnx")
                    .arg(&tmp)
                    .status()
                    .unwrap();
                if !run.success() {
                    panic!("Failed to clone onnx")
                }
                std::fs::rename(tmp, wanted).unwrap();
            }
        }
    });
}

fn full() -> TestSuite {
    ensure_onnx_git_checkout();
    let mut suite = TestSuite::default();
    for (tests_set, manifest) in [
        ("node", MANIFEST_NODE),
        ("simple", MANIFEST_SIMPLE),
        ("pytorch-operator", MANIFEST_PYTORCH_OPERATOR),
        ("pytorch-converted", MANIFEST_PYTORCH_CONVERTED),
    ] {
        let working_list: Vec<(Regex, Vec<String>)> = manifest
            .split('\n')
            .map(|s| s.to_string())
            .filter(|s| s.trim().len() > 1 && s.trim().as_bytes()[0] != b'#')
            .map(|s| {
                let mut splits = s.split_whitespace();
                let pat = splits.next().unwrap();
                let re = Regex::new(&format!("^{pat}$")).unwrap();
                (re, splits.map(|s| s.to_string()).collect())
            })
            .collect();

        let mut tags = TestSuite::default();
        for (onnx_tag, opset) in versions() {
            let node_tests = dir()
                .join(format!("onnx-{}", onnx_tag.replace('.', "_")))
                .join("onnx/backend/test/data")
                .join(tests_set);
            assert!(node_tests.exists());

            let identifier = "v".to_string() + &onnx_tag.replace('.', "_");

            let tests: Vec<String> = std::fs::read_dir(&node_tests)
                .unwrap()
                .map(|de| de.unwrap().file_name().to_str().unwrap().to_owned())
                .collect();
            let mut units = TestSuite::default();
            for t in &tests {
                let details =
                    working_list.iter().find(|pair| pair.0.is_match(t)).map(|pair| &*pair.1);
                let ignored = details.is_none()
                    || details.unwrap().iter().any(|s| {
                        s.strip_prefix("since:")
                            .map(|since| since.parse::<usize>().unwrap() > opset)
                            .unwrap_or(false)
                    });
                let ignore_output_shapes =
                    details.unwrap_or_default().iter().any(|s| s == "onnx-ignore-output-shape");
                let ignore_output_type =
                    details.unwrap_or_default().iter().any(|s| s == "onnx-ignore-output-type");
                let input = details
                    .unwrap_or_default()
                    .iter()
                    .find_map(|s| s.strip_prefix("input:"))
                    .map(|s| s.to_owned());
                units.add_test_with_status(
                    t,
                    OnnxTestCase {
                        path: node_tests.join(t),
                        ignore_output_type,
                        ignore_output_shapes,
                        input,
                    },
                    if ignored { TestStatus::Ignored } else { TestStatus::OK },
                );
            }
            tags.add(identifier, units);
        }
        suite.add(tests_set.replace('-', "_"), tags);
    }
    suite
}

#[allow(dead_code)]
fn setup_test_logger() {
    let _ = env_logger::Builder::from_env("TRACT_LOG").try_init();
}

pub fn load_half_dataset(prefix: &str, path: &std::path::Path) -> TVec<Tensor> {
    let mut vec = tvec!();
    let len = std::fs::read_dir(path)
        .map_err(|e| format!("accessing {path:?}, {e:?}"))
        .unwrap()
        .filter(|d| d.as_ref().unwrap().file_name().to_str().unwrap().starts_with(prefix))
        .count();
    for i in 0..len {
        let filename = path.join(format!("{prefix}_{i}.pb"));
        let bytes = bytes::Bytes::from(std::fs::read(filename).unwrap());
        let tensor = TensorProto::decode(bytes).unwrap();
        let tensor = load_tensor(&FopenDataResolver, &tensor, None).unwrap();
        vec.push(tensor)
    }
    debug!("{path:?}: {vec:?}");
    vec
}

fn run_model(
    model: &dyn Runnable,
    inputs: TVec<Tensor>,
    data_path: &std::path::Path,
    approx: Approximation,
) -> TractResult<()> {
    let expected = load_half_dataset("output", data_path);
    trace!("Loaded output asserts: {expected:?}");
    let inputs = inputs.into_iter().map(|t| t.into_tvalue()).collect();
    let computed = model.run(inputs)?;
    if computed.len() != expected.len() {
        panic!(
            "For {:?}, different number of results: got:{} expected:{}",
            data_path,
            computed.len(),
            expected.len()
        );
    }
    for (ix, (a, b)) in computed.iter().zip(expected.iter()).enumerate() {
        //                println!("computed: {:?}", computed[ix].dump(true));
        //                println!("expected: {:?}", expected[ix].dump(true));
        if let Err(e) = a.close_enough(b, approx) {
            bail!(
                "For {:?}, different ({approx:?}) result for output #{}:\ngot:\n{:?}\nexpected:\n{:?} \n{}",
                data_path,
                ix,
                a.cast_to::<f32>().unwrap().to_plain_array_view::<f32>().unwrap(),
                b.cast_to::<f32>().unwrap().to_plain_array_view::<f32>().unwrap(),
                e //                e.display_chain()
            );
        }
    }
    Ok(())
}


================================================
FILE: test-rt/suite-unit/Cargo.toml
================================================
[package]
name = "suite-unit"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
tract-core.workspace = true
tract-transformers.workspace = true
infra = { path = "../infra" }

[target.'cfg(not(target_family = "wasm"))'.dependencies]
proptest.workspace = true

[target.'cfg(target_family = "wasm")'.dependencies]
# Wasm doesn't support the `fork` feature of proptest.
proptest = { version = "1.0.0", default-features = false, features = ["std", "bit-set"] }


================================================
FILE: test-rt/suite-unit/src/apply_rope.rs
================================================
use infra::Test;
use infra::TestResult;
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ndarray::ArrayD;
use tract_core::ndarray::concatenate;
use tract_core::num_traits::Float;
use tract_transformers::ops::apply_rope::ApplyRope;

use crate::tensor;

#[derive(Debug, Clone)]
pub struct ApplyRopeProblem<F>
where
    F: Datum + Float,
{
    input: ArrayD<F>,
    cos: ArrayD<F>,
    sin: ArrayD<F>,
}

impl<F> Arbitrary for ApplyRopeProblem<F>
where
    F: Datum + Float,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<ApplyRopeProblem<F>>;

    fn arbitrary_with(_params: Self::Parameters) -> Self::Strategy {
        (2usize..5)
            .prop_flat_map(|rank| {
                let dim = 1usize..10;
                (vec(dim.clone(), 2..=2), vec(dim, (rank - 2)..=(rank - 2)))
            })
            .prop_flat_map(|(mut cos_sin_shape, extra_shape)| {
                cos_sin_shape[1] *= 2; // Ensure inner axis dim is multiple of 2
                (
                    tensor::<F>(&[extra_shape.clone(), cos_sin_shape.clone()].concat()),
                    tensor::<F>(&[extra_shape.clone(), cos_sin_shape.clone()].concat()),
                    tensor::<F>(&[extra_shape, cos_sin_shape.clone()].concat()),
                )
                    .prop_map(|(input, cos, sin)| Self { input, cos, sin })
            })
            .boxed()
    }
}

impl<F> ApplyRopeProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();

        let input = model
            .add_source("input", TypedFact::shape_and_dt_of(&self.input.clone().into_tensor()))?;
        let cos =
            model.add_source("cos", TypedFact::shape_and_dt_of(&self.cos.clone().into_tensor()))?;
        let sin =
            model.add_source("sin", TypedFact::shape_and_dt_of(&self.sin.clone().into_tensor()))?;

        let output = model.wire_node("apply_rope", ApplyRope, &[input, cos, sin])?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }

    fn reference(&self) -> ArrayD<F> {
        let input = self.input.clone();

        let inner_axis = input.shape().len() - 1;
        let axis = tract_ndarray::Axis(inner_axis);
        let len = input.shape()[inner_axis];
        assert!(len % 2 == 0, "Length of the last axis must be even");

        let mid = len / 2;

        // Slice and clone the two halves
        let first_half = input.slice_axis(axis, (..mid).into()).to_owned();
        let mut second_half = input.slice_axis(axis, (mid..).into()).to_owned();
        second_half.mapv_inplace(|x| -x);

        // Concatenate in reverse order
        let rotated_input = concatenate(axis, &[second_half.view(), first_half.view()]).unwrap();

        let brd_cos = self.cos.broadcast(input.raw_dim()).unwrap();
        let brd_sin = self.sin.broadcast(input.raw_dim()).unwrap();

        (input * brd_cos) + (rotated_input * brd_sin)
    }
}

impl<F> Test for ApplyRopeProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference().into_tensor();
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut output = runtime.prepare(model)?.run(tvec![
            self.input.clone().into_tvalue(),
            self.cos.clone().into_tvalue(),
            self.sin.clone().into_tvalue()
        ])?;
        let output = output.remove(0).into_tensor();

        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<ApplyRopeProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<ApplyRopeProblem<f16>>("proptest_f16", ());

    suite.add(
        "trivial_f32_0",
        ApplyRopeProblem {
            input: tensor2(&[[0f32, 1f32]]).into_plain_array::<f32>().unwrap(),
            cos: tensor2(&[[0f32, 0f32]]).into_plain_array::<f32>().unwrap(),
            sin: tensor2(&[[0f32, 0f32]]).into_plain_array::<f32>().unwrap(),
        },
    );
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/bin_einsum.rs
================================================
use std::{fmt, ops::Mul};

use infra::{Test, TestResult, TestSuite};
use proptest::prelude::*;
use proptest::strategy::BoxedStrategy;
use tract_core::internal::*;
use tract_ndarray::{ArrayD, Axis, Dimension};

use tract_core::ops::einsum::EinSum;
use tract_num_traits::{One, Zero};

#[derive(Debug, Clone)]
pub struct BinEinsumProblemParams {
    pub force_unique_non_trivial_m_n: bool,
    pub no_trivial_axes: bool,
    pub force_max_one_iter_axis: bool,
    pub max_dims: usize,
}

impl Default for BinEinsumProblemParams {
    fn default() -> BinEinsumProblemParams {
        BinEinsumProblemParams {
            force_unique_non_trivial_m_n: false,
            no_trivial_axes: false,
            force_max_one_iter_axis: false,
            max_dims: 8,
        }
    }
}

#[derive(Clone)]
pub struct BinEinsumProblem {
    expr: AxesMapping,
    a: Tensor,
    b: Tensor,
    a_constant: bool,
    b_constant: bool,
    unicast_add_constant: Option<Tensor>,
}

impl std::fmt::Debug for BinEinsumProblem {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{} A:{:?} B:{:?} a_constant:{:?} b_constant:{:?} unicast_add_constant:{:?}",
            self.expr, self.a, self.b, self.a_constant, self.b_constant, self.unicast_add_constant
        )
    }
}

impl Arbitrary for BinEinsumProblem {
    type Parameters = BinEinsumProblemParams;
    type Strategy = BoxedStrategy<BinEinsumProblem>;

    fn arbitrary_with(params: Self::Parameters) -> Self::Strategy {
        let supp_m_n_axes_range =
            if params.force_unique_non_trivial_m_n { 0..1usize } else { 0..2usize };
        assert!(params.max_dims >= 3);
        let remaining = params.max_dims - 3; // At least 1 m, and n

        supp_m_n_axes_range
            .clone()
            .prop_flat_map(move |supp_m_axes| {
                let remaining = remaining - supp_m_axes;
                let n_axes_range = if remaining < supp_m_n_axes_range.end {
                    0..(remaining + 1)
                } else {
                    supp_m_n_axes_range.clone()
                };
                let iter_axes_range =
                    if params.force_max_one_iter_axis { 0..2usize } else { 0..3usize };
                n_axes_range.prop_flat_map(move |supp_n_axes| {
                    let remaining = remaining - supp_n_axes;
                    let iter_axes_range = if remaining < iter_axes_range.end {
                        0..(remaining + 1)
                    } else {
                        iter_axes_range.clone()
                    };
                    iter_axes_range.clone().prop_flat_map(move |iter_axes| {
                        let remaining = remaining - iter_axes;
                        let trivial_m_n_axes_range =
                            if params.no_trivial_axes { 0..1usize } else { 0..2usize };
                        let trivial_m_axes_range = if remaining < trivial_m_n_axes_range.end {
                            0..(remaining + 1)
                        } else {
                            trivial_m_n_axes_range.clone()
                        };
                        trivial_m_axes_range.clone().prop_flat_map(move |trivial_m_axes| {
                            let remaining = remaining - trivial_m_axes;
                            let trivial_n_axes_range = if remaining < trivial_m_n_axes_range.end {
                                0..(remaining + 1)
                            } else {
                                trivial_m_n_axes_range.clone()
                            };
                            trivial_n_axes_range.clone().prop_flat_map(move |trivial_n_axes| {
                                Just((
                                    supp_m_axes,
                                    supp_n_axes,
                                    iter_axes,
                                    trivial_m_axes,
                                    trivial_n_axes,
                                ))
                            })
                        })
                    })
                })
            })
            .prop_map(|(supp_m_axes, supp_n_axes, iter_axes, trivial_m_axes, trivial_n_axes)| {
                let m_axes: String = ('a'..).take(supp_m_axes).collect();
                let trivial_m_axes: String = ('e'..).take(trivial_m_axes).collect();
                let n_axes: String = ('h'..).take(supp_n_axes).collect();
                let trivial_n_axes: String = ('o'..).take(trivial_n_axes).collect();
                let iter_axes: String = ('w'..).take(iter_axes).collect();
                let a_axes: Vec<char> =
                    (m_axes.clone() + "m" + &trivial_m_axes + &iter_axes + "k").chars().collect();
                let b_axes: Vec<char> =
                    (n_axes.clone() + "n" + &trivial_n_axes + &iter_axes + "k").chars().collect();
                let c_axes: Vec<char> =
                    (m_axes + &n_axes + "mn" + &trivial_m_axes + &trivial_n_axes + &iter_axes)
                        .chars()
                        .collect();
                (Just(a_axes), Just(b_axes), Just(c_axes))
            })
            .prop_flat_map(|(a, b, c)| (a.prop_shuffle(), b.prop_shuffle(), c.prop_shuffle()))
            .prop_map(|(a, b, c)| {
                let a: String = a.into_iter().collect();
                let b: String = b.into_iter().collect();
                let c: String = c.into_iter().collect();
                let expr: AxesMapping = format!("{a},{b}->{c}").parse().unwrap();
                //eprintln!("{expr}");
                expr
            })
            .prop_flat_map(|expr| {
                let dims = expr.iter_all_axes().count();
                (Just(expr), proptest::collection::vec(1..4usize, dims..=dims))
            })
            .prop_flat_map(|(expr, axis_dims)| {
                let shape_a: TVec<usize> = expr
                    .axes(InOut::In(0))
                    .map(|axis| {
                        expr.iter_all_axes()
                            .position(|x| (x == axis) && !('m'..='v').contains(&axis.repr))
                            .map(|dim| axis_dims[dim])
                            .unwrap_or(1)
                    })
                    .collect();
                let shape_b: TVec<usize> = expr
                    .axes(InOut::In(1))
                    .map(|axis| {
                        expr.iter_all_axes()
                            .position(|x| (x == axis) && !('m'..='v').contains(&axis.repr))
                            .map(|dim| axis_dims[dim])
                            .unwrap_or(1)
                    })
                    .collect();
                let shape_output: TVec<usize> = expr
                    .axes(InOut::Out(0))
                    .map(|axis| {
                        expr.iter_all_axes()
                            .position(|x| (x == axis) && !('m'..='v').contains(&axis.repr))
                            .map(|dim| axis_dims[dim])
                            .unwrap_or(1)
                    })
                    .collect();
                let unicast_add_constant = proptest::option::of(tensor(&shape_output));
                (Just(expr), tensor(&shape_a), tensor(&shape_b), 0..3usize, unicast_add_constant)
            })
            .prop_map(|(expr, a, b, a_b_constant, unicast_add_constant)| {
                let a_constant = (a_b_constant & 0x1) != 0;
                let b_constant = (a_b_constant & 0x2) != 0;
                BinEinsumProblem { expr, a, b, a_constant, b_constant, unicast_add_constant }
            })
            .boxed()
    }
}

pub fn tensor(shape: &[usize]) -> BoxedStrategy<Tensor> {
    let len = shape.iter().product::<usize>();
    let shape: Vec<usize> = shape.into();
    proptest::collection::vec((-10i8..=10i8).prop_map(|i| i as f32), len..=len)
        .prop_map(move |vec| ArrayD::from_shape_vec(shape.clone(), vec).unwrap().into_tensor())
        .boxed()
}

impl BinEinsumProblem {
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();
        let a = if self.a_constant {
            model.add_const("a", self.a.clone())?
        } else {
            model.add_source("a", TypedFact::shape_and_dt_of(&self.a))?
        };
        let b = if self.b_constant {
            model.add_const("b", self.b.clone())?
        } else {
            model.add_source("b", TypedFact::shape_and_dt_of(&self.b))?
        };

        let mut output = model.wire_node(
            "einsum",
            EinSum { axes: self.expr.clone(), operating_dt: f32::datum_type(), q_params: None },
            &[a, b],
        )?;

        if let Some(c) = &self.unicast_add_constant {
            let c = model.add_const("c", c.clone())?;
            output = model.wire_node("add", tract_core::ops::math::add(), &[output[0], c])?;
        }

        model.select_output_outlets(&output)?;

        //let test = model.node_by_name("einsum")?.op.as_op().downcast_ref::<EinSum>().unwrap();

        model = model.into_decluttered()?;
        //let test1 = model.node_by_name("einsum")?.op.as_op().downcast_ref::<EinSum>().unwrap();
        //dbg!(&test1.axes);
        Ok(model)
    }

    fn output_shape(&self) -> TVec<usize> {
        self.expr
            .axes(InOut::Out(0))
            .map(|axis| {
                let dim_in_a = axis.inputs[0].first().map(|pos| self.a.shape()[*pos]).unwrap_or(1);
                let dim_in_b = axis.inputs[1].first().map(|pos| self.b.shape()[*pos]).unwrap_or(1);
                dim_in_a.max(dim_in_b)
            })
            .collect()
    }

    fn reference<Acc: Datum + Copy + Zero + One + Mul<Acc, Output = Acc>>(&self) -> ArrayD<Acc> {
        let output_shape = self.output_shape();

        let a = self.a.cast_to::<Acc>().unwrap();
        let b = self.b.cast_to::<Acc>().unwrap();

        let a = a.to_plain_array_view::<Acc>().unwrap();
        let b = b.to_plain_array_view::<Acc>().unwrap();

        let k_axes: TVec<_> = self
            .expr
            .iter_all_axes()
            .filter(|axis| {
                axis.outputs[0].is_empty() && axis.inputs[0].len() == 1 && axis.inputs[1].len() == 1
            })
            .collect();

        let summing_shape: TVec<usize> = k_axes
            .iter()
            .map(|axis| {
                let dim_in_a = axis.inputs[0].first().map(|pos| self.a.shape()[*pos]).unwrap_or(1);
                let dim_in_b = axis.inputs[1].first().map(|pos| self.b.shape()[*pos]).unwrap_or(1);
                dim_in_a.max(dim_in_b)
            })
            .collect();

        let output = tract_ndarray::ArrayD::<Acc>::from_shape_fn(&*output_shape, |coords| {
            let coords = coords.as_array_view();
            let mut a = a.clone();
            let mut b = b.clone();
            for (axis, x) in self.expr.axes(InOut::Out(0)).zip(coords.iter()) {
                if let Some(pos) = axis.inputs[0].first() {
                    a.collapse_axis(Axis(*pos), if a.shape()[*pos] > 1 { *x } else { 0 });
                }

                if let Some(pos) = axis.inputs[1].first() {
                    b.collapse_axis(Axis(*pos), if b.shape()[*pos] > 1 { *x } else { 0 });
                }
            }

            let mut sum: Acc = Acc::zero();
            for sum_coords in tract_ndarray::indices(&*summing_shape) {
                let mut a = a.clone();
                let mut b = b.clone();

                let sum_coords = sum_coords.as_array_view();
                for (axis, x) in k_axes.iter().zip(sum_coords) {
                    a.collapse_axis(Axis(axis.inputs[0][0]), *x);
                    b.collapse_axis(Axis(axis.inputs[1][0]), *x);
                }

                let product = *a.iter().next().unwrap() * *b.iter().next().unwrap();
                sum = sum + product;
            }
            sum
        });
        if let Some(unicast_const) = self.unicast_add_constant.clone() {
            output + unicast_const.into_plain_array::<Acc>().unwrap()
        } else {
            output
        }
    }
}

impl Test for BinEinsumProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference::<f32>().into_tensor();
        //dbg!(&reference);
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let mut inputs = tvec![];
        if !self.a_constant {
            inputs.push(self.a.clone().into());
        }
        if !self.b_constant {
            inputs.push(self.b.clone().into());
        }
        let mut output = runtime.prepare(model)?.run(inputs)?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<BinEinsumProblem>("proptest", BinEinsumProblemParams::default());

    suite.add(
        "unicast_0",
        BinEinsumProblem {
            expr: "ak,gk->ag".parse().unwrap(),
            a: Tensor::zero::<f32>(&[1, 2]).unwrap(),
            b: Tensor::zero::<f32>(&[1, 2]).unwrap(),
            a_constant: false,
            b_constant: false,
            unicast_add_constant: Some(Tensor::zero::<f32>(&[1, 1]).unwrap()),
        },
    );

    suite.add(
        "unicast_1",
        BinEinsumProblem {
            expr: "ak,gk->ag".parse().unwrap(),
            a: Tensor::zero::<f32>(&[2, 1]).unwrap(),
            b: Tensor::zero::<f32>(&[2, 1]).unwrap(),
            a_constant: false,
            b_constant: false,
            unicast_add_constant: Some(tensor2(&[[0f32, 0.], [0., 1.]])),
        },
    );

    suite.add(
        "unicast_2",
        BinEinsumProblem {
            expr: "abk,gk->abg".parse().unwrap(),
            a: Tensor::zero::<f32>(&[2, 2, 1]).unwrap(),
            b: Tensor::zero::<f32>(&[1, 1]).unwrap(),
            a_constant: false,
            b_constant: false,
            unicast_add_constant: Some(tensor3(&[[[0f32], [0.]], [[0.], [1.]]])),
        },
    );

    suite.add(
        "trivial_0",
        BinEinsumProblem {
            expr: "ak,gk->ag".parse().unwrap(),
            a: tensor2(&[[1f32]]),
            b: tensor2(&[[0f32], [1f32]]),
            a_constant: false,
            b_constant: false,
            unicast_add_constant: None,
        },
    );

    suite.add(
        "trivial_1",
        BinEinsumProblem {
            expr: "akb,gk->gba".parse().unwrap(),
            a: tensor3(&[[[0f32], [0f32]]]),
            b: tensor2(&[[0f32, 0f32]]),
            a_constant: true,
            b_constant: false,
            unicast_add_constant: None,
        },
    );

    suite.add(
        "supp_axis_bug_0",
        BinEinsumProblem {
            expr: "bmk, abkn->bn".parse().unwrap(),
            a: Tensor::zero::<f32>(&[32, 1, 25]).unwrap(),
            b: Tensor::zero::<f32>(&[1, 32, 25, 64]).unwrap(),
            a_constant: false,
            b_constant: false,
            unicast_add_constant: None,
        },
    );

    suite.add(
        "m_axis_select_bug_0",
        BinEinsumProblem {
            expr: "wmkx,wknx->xmnw".parse().unwrap(),
            a: tensor4(&[[[[0f32, 0f32], [0f32, -1f32]]]]),
            b: tensor4(&[[[[0f32, 0f32]], [[0f32, 1f32]]]]),
            a_constant: false,
            b_constant: false,
            unicast_add_constant: None,
        },
    );

    suite.add(
        "cuda_binary_bug_with_bias",
        BinEinsumProblem {
            expr: "mewk,owkn->wnemo".parse()?,
            a: Tensor::zero::<f32>(&[1, 3, 1, 2])?,
            b: Tensor::zero::<f32>(&[1, 1, 2, 1])?,
            a_constant: false,
            b_constant: false,
            unicast_add_constant: Some(tensor1(&[0f32, 0f32, 1f32]).into_shape(&[1, 1, 3, 1, 1])?),
        },
    );

    // TODO: fix ensure_mkn() to handle multiple n axes
    //suite.add(
    //    "multiple_n_axes",
    //    BinEinsumProblem {
    //        expr: "kwa,gkwh->gahw".parse().unwrap(),
    //        a: Tensor::zero::<f32>(&[1, 2, 1]).unwrap(),
    //        b: Tensor::zero::<f32>(&[2, 1, 2, 2]).unwrap(),
    //        a_constant: false,
    //        b_constant: false,
    //        unicast_add_constant: None,
    //    }
    //);
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/binary.rs
================================================
use std::ops::Div;

use super::*;
use infra::{Test, TestResult};
use proptest::collection::vec;
use tract_core::ndarray::ArrayD;
use tract_core::num_traits::{AsPrimitive, FromPrimitive, Num, ToPrimitive};
use tract_core::ops::binary::TypedBinOp;
use tract_core::ops::logic;

#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
pub enum BinOps {
    Mul,
    Add,
    Div,
    Sub,
    Pow,
    Less,
    LessEqual,
    Greater,
    GreaterEqual,
    Equals,
    NotEquals,
}

pub const ALL_OPS: [BinOps; 11] = [
    BinOps::Mul,
    BinOps::Add,
    BinOps::Div,
    BinOps::Sub,
    BinOps::Pow,
    BinOps::Less,
    BinOps::LessEqual,
    BinOps::Greater,
    BinOps::GreaterEqual,
    BinOps::Equals,
    BinOps::NotEquals,
];

fn to_tract_op(op: BinOps) -> Box<dyn TypedOp> {
    match op {
        BinOps::Mul => Box::new(TypedBinOp(Box::new(tract_core::ops::math::Mul), None)),
        BinOps::Add => Box::new(TypedBinOp(Box::new(tract_core::ops::math::Add), None)),
        BinOps::Div => Box::new(TypedBinOp(Box::new(tract_core::ops::math::Div), None)),
        BinOps::Sub => Box::new(TypedBinOp(Box::new(tract_core::ops::math::Sub), None)),
        BinOps::Pow => Box::new(TypedBinOp(Box::new(tract_core::ops::math::Pow), None)),
        BinOps::Less => Box::new(TypedBinOp(logic::comp_lt(), None)),
        BinOps::LessEqual => Box::new(TypedBinOp(logic::comp_lte(), None)),
        BinOps::Greater => Box::new(TypedBinOp(logic::comp_gt(), None)),
        BinOps::GreaterEqual => Box::new(TypedBinOp(logic::comp_gte(), None)),
        BinOps::Equals => Box::new(TypedBinOp(logic::comp_eq(), None)),
        BinOps::NotEquals => Box::new(TypedBinOp(logic::comp_ne(), None)),
    }
}

pub trait SupportedElement:
    Datum
    + Num
    + Copy
    + FromPrimitive
    + ToPrimitive
    + 'static
    + Div<Output = Self>
    + PartialOrd
    + AsPrimitive<usize>
    + AsPrimitive<f32>
    + AsPrimitive<f64>
{
}

impl<T> SupportedElement for T where
    T: Datum
        + Num
        + Copy
        + FromPrimitive
        + ToPrimitive
        + 'static
        + Div<Output = Self>
        + PartialOrd
        + AsPrimitive<usize>
        + AsPrimitive<f32>
        + AsPrimitive<f64>
{
}

#[derive(Debug, Clone)]
pub struct BinaryOpProblem<T>
where
    T: SupportedElement,
{
    pub op: BinOps,
    pub lhs: ArrayD<T>,
    pub rhs: ArrayD<T>,
}

impl<T> Arbitrary for BinaryOpProblem<T>
where
    T: SupportedElement,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
        let lhs_shape_strat = prop::collection::vec(1usize..=5, 0..=4);
        lhs_shape_strat
            .prop_flat_map(|lhs_shape| {
                let rank = lhs_shape.len();
                let rhs_shape_strat = prop::collection::vec(1usize..=5, rank..=rank);
                (Just(lhs_shape), rhs_shape_strat)
            })
            .prop_flat_map(|(mut lhs_shape, rhs_shape)| {
                for idx in 0..lhs_shape.len() {
                    if (lhs_shape[idx] != rhs_shape[idx])
                        && lhs_shape[idx] != 1
                        && rhs_shape[idx] != 1
                    {
                        lhs_shape[idx] = rhs_shape[idx]
                    }
                }

                let lhs_len = lhs_shape.iter().product::<usize>();
                let rhs_len = rhs_shape.iter().product::<usize>();
                let lhs = vec(
                    (2u8..=10u8).prop_map(|i| T::from_u8(i).unwrap() / T::from_u8(2).unwrap()),
                    lhs_len..=lhs_len,
                )
                .prop_map(move |vec| ArrayD::from_shape_vec(lhs_shape.to_vec(), vec).unwrap())
                .boxed();
                let rhs = vec(
                    (2u8..=10u8).prop_map(|i| T::from_u8(i).unwrap() / T::from_u8(2).unwrap()),
                    rhs_len..=rhs_len,
                )
                .prop_map(move |vec| ArrayD::from_shape_vec(rhs_shape.to_vec(), vec).unwrap())
                .boxed();

                let mut ops = ALL_OPS.to_vec();

                // Avoid Sub overfow for uints and Unsupported type for Pow
                if std::any::TypeId::of::<T>() == std::any::TypeId::of::<u8>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<u16>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<u32>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<u64>()
                {
                    ops.retain(|op| !matches!(op, BinOps::Sub | BinOps::Pow));
                }

                if std::any::TypeId::of::<T>() == std::any::TypeId::of::<i8>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<i16>()
                {
                    ops.retain(|op| !matches!(op, BinOps::Pow));
                }
                let op_strategy = prop::sample::select(ops);

                (lhs, rhs, op_strategy)
            })
            .prop_map(|(lhs, rhs, op)| BinaryOpProblem { lhs, rhs, op })
            .boxed()
    }
}

fn eval_reference<FI: Datum, FO: Datum>(
    a: &Tensor,
    b: &Tensor,
    cab: impl Fn(&mut FO, &FI, &FI),
) -> TractResult<Tensor> {
    let out_shape = tract_core::broadcast::multi_broadcast(&[a.shape(), b.shape()])?;
    let mut out = unsafe { Tensor::uninitialized_dt(FO::datum_type(), &out_shape)? };
    let a_view = a.to_plain_array_view::<FI>()?;
    let b_view = b.to_plain_array_view::<FI>()?;
    let mut c_plain = out.try_as_plain_mut()?;
    let mut c = c_plain.to_array_view_mut::<FO>()?;
    tract_core::ndarray::Zip::from(&mut c)
        .and_broadcast(a_view)
        .and_broadcast(b_view)
        .for_each(cab);
    Ok(out)
}

impl<T> BinaryOpProblem<T>
where
    T: SupportedElement,
{
    pub fn reference(&self) -> TractResult<Tensor> {
        let lhs = self.lhs.clone().into_tensor();
        let rhs = self.rhs.clone().into_tensor();

        let res = match self.op {
            BinOps::Add => eval_reference(&lhs, &rhs, |c: &mut T, a: &T, b: &T| *c = *a + *b)?,
            BinOps::Sub => eval_reference(&lhs, &rhs, |c: &mut T, a: &T, b: &T| *c = *a - *b)?,
            BinOps::Mul => eval_reference(&lhs, &rhs, |c: &mut T, a: &T, b: &T| *c = *a * *b)?,
            BinOps::Div => eval_reference(&lhs, &rhs, |c: &mut T, a: &T, b: &T| *c = *a / *b)?,
            BinOps::Pow => eval_reference(&lhs, &rhs, |c: &mut T, a: &T, b: &T| {
                *c = T::from_f32(a.to_f32().unwrap().powf(b.to_f32().unwrap())).unwrap()
            })?,
            BinOps::Less => eval_reference(&lhs, &rhs, |c: &mut bool, a: &T, b: &T| *c = *a < *b)?,
            BinOps::LessEqual => {
                eval_reference(&lhs, &rhs, |c: &mut bool, a: &T, b: &T| *c = *a <= *b)?
            }
            BinOps::Greater => {
                eval_reference(&lhs, &rhs, |c: &mut bool, a: &T, b: &T| *c = *a > *b)?
            }
            BinOps::GreaterEqual => {
                eval_reference(&lhs, &rhs, |c: &mut bool, a: &T, b: &T| *c = *a >= *b)?
            }
            BinOps::Equals => {
                eval_reference(&lhs, &rhs, |c: &mut bool, a: &T, b: &T| *c = *a == *b)?
            }
            BinOps::NotEquals => {
                eval_reference(&lhs, &rhs, |c: &mut bool, a: &T, b: &T| *c = *a != *b)?
            }
        };
        Ok(res)
    }

    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();

        let lhs =
            model.add_source("lhs", TypedFact::shape_and_dt_of(&self.lhs.clone().into_tensor()))?;
        let rhs =
            model.add_source("rhs", TypedFact::shape_and_dt_of(&self.rhs.clone().into_tensor()))?;

        let output = model.wire_node("bin_op", to_tract_op(self.op), &[lhs, rhs])?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }
}

impl<T> Test for BinaryOpProblem<T>
where
    T: SupportedElement,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference()?;
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut output = runtime
            .prepare(model)?
            .run(tvec![self.lhs.clone().into_tvalue(), self.rhs.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<BinaryOpProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<BinaryOpProblem<f16>>("proptest_f16", ());
    suite.add_arbitrary::<BinaryOpProblem<u8>>("proptest_u8", ());
    suite.add_arbitrary::<BinaryOpProblem<i8>>("proptest_i8", ());
    suite.add_arbitrary::<BinaryOpProblem<u32>>("proptest_u32", ());
    suite.add_arbitrary::<BinaryOpProblem<i16>>("proptest_i16", ());
    suite.add_arbitrary::<BinaryOpProblem<i64>>("proptest_i64", ());

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/conv_f16.rs
================================================
use super::*;
use crate::conv_f32::{ConvProblem, ConvProblemParams};
use infra::*;
use tract_core::tract_data::half::f16;

#[derive(Debug, Clone)]
pub struct ConvProblemF16(pub ConvProblem);

impl ConvProblemF16 {
    fn tract(&self) -> TractResult<TypedModel> {
        let inner = &self.0;
        assert_eq!(inner.data.shape(), &*inner.shape_in.shape, "inconsistent shapes in test");
        let mut model = TypedModel::default();
        let wire = model.add_source("input", f16::fact(&inner.shape_in.shape))?;
        let ci = *inner.shape_in.c();
        let co = match inner.kernel_format {
            KernelFormat::OIHW => inner.kernel.shape()[0],
            KernelFormat::HWIO => inner.kernel.shape()[inner.kernel.ndim() - 1] * inner.group,
            KernelFormat::OHWI => inner.kernel.shape()[0] * inner.group,
        };
        let kernel_f16 = inner.kernel.mapv(f16::from_f32).into_arc_tensor();
        let kernel = model.add_const("kernel", kernel_f16)?;
        let bias_f16 = if let Some(bias) = &inner.bias {
            bias.mapv(f16::from_f32).into_arc_tensor()
        } else {
            rctensor0(f16::from_f32(0.0))
        };
        let bias = model.add_const("bias", bias_f16)?;
        let op = Conv::new(
            PoolSpec::new(
                inner.shape_in.fmt,
                inner.geo_ker().into(),
                inner.pad.clone(),
                Some(inner.dilations.clone()),
                Some(inner.strides.clone()),
                ci,
                co,
            ),
            inner.kernel_format,
            inner.group,
            None,
        );
        let wire = model.wire_node("conv", op, &[wire, kernel, bias])?[0];
        model.select_output_outlets(&[wire])?;
        Ok(model)
    }
}

impl Arbitrary for ConvProblemF16 {
    type Parameters = ConvProblemParams;
    type Strategy = BoxedStrategy<ConvProblemF16>;
    fn arbitrary_with(params: Self::Parameters) -> Self::Strategy {
        ConvProblem::arbitrary_with(params).prop_map(ConvProblemF16).boxed()
    }
}

impl Test for ConvProblemF16 {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.0.reference().into_tensor();
        let mut model = self.tract()?;
        model.declutter()?;
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let input_f16 = self.0.data.mapv(f16::from_f32).into_tensor();
        let mut output = runtime.prepare(model)?.run(tvec![input_f16.into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        // Cast output back to f32 for comparison with f32 reference
        let output_f32 = output.cast_to::<f32>()?.into_owned();
        output_f32.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<ConvProblemF16>("proptest", ConvProblemParams::default());
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/conv_f32.rs
================================================
use std::ops::Range;

use super::*;
use infra::*;
use proptest::collection::vec;
use tract_itertools::izip;

#[derive(Debug, Clone, Default)]
pub struct ConvProblemParams {
    pub no_group: bool,
    pub no_stride: bool,
    pub no_arbitrary_grouping: bool,
    pub geo_rank: Option<Range<usize>>,
    pub no_batch: bool,
    pub no_dilations: bool,
    pub no_bias: bool,
}

#[derive(Debug, Clone)]
pub struct ConvProblem {
    pub shape_in: DataShape,
    pub kernel_format: KernelFormat,
    pub group: usize,
    pub data: ArrayD<f32>,
    pub kernel: ArrayD<f32>,
    pub bias: Option<ArrayD<f32>>,
    pub pad: PaddingSpec,
    pub strides: TVec<usize>,
    pub dilations: TVec<usize>,
}

impl ConvProblem {
    pub fn geo_ker(&self) -> &[usize] {
        &self.kernel.shape()[self.kernel_format.h_axis()..][..self.shape_in.hw_rank()]
    }

    pub fn reference(&self) -> ArrayD<f32> {
        // dbg!(self);
        assert_eq!(self.data.shape(), &*self.shape_in.shape, "inconsistent shapes in test");
        let n = *self.shape_in.n().unwrap_or(&1);
        let ci_per_g = self.shape_in.c() / self.group;
        let co_per_g = match self.kernel_format {
            KernelFormat::OIHW => self.kernel.shape()[0] / self.group,
            KernelFormat::HWIO => self.kernel.shape()[self.kernel.ndim() - 1],
            KernelFormat::OHWI => self.kernel.shape()[0],
        };
        assert_eq!(self.strides.len(), self.geo_ker().len());
        assert_eq!(self.dilations.len(), self.geo_ker().len());
        let (shape_out, left_pads): (TVec<_>, TVec<_>) = match &self.pad {
            PaddingSpec::Valid => {
                izip!(self.shape_in.hw_dims(), self.geo_ker(), &self.strides, &self.dilations)
                    .map(|(i, k, s, d)| {
                        let kf = (k - 1) * d + 1;
                        let out = (*i + 1).saturating_sub(kf).divceil(*s);
                        (out, 0)
                    })
                    .unzip()
            }
            PaddingSpec::SameUpper => {
                izip!(self.shape_in.hw_dims(), self.geo_ker(), &self.strides, &self.dilations)
                    .map(|(input, k, stride, d)| {
                        let kf = (k - 1) * d + 1;
                        let out = input.divceil(*stride);
                        let pad = ((out - 1) * stride + kf).saturating_sub(*input);
                        (out, pad / 2)
                    })
                    .unzip()
            }
            PaddingSpec::SameLower => {
                izip!(self.shape_in.hw_dims(), self.geo_ker(), &self.strides, &self.dilations)
                    .map(|(input, k, stride, d)| {
                        let kf = (k - 1) * d + 1;
                        let out = input.divceil(*stride);
                        let pad = ((out - 1) * stride + kf).saturating_sub(*input);
                        (out, pad.divceil(2))
                    })
                    .unzip()
            }
            PaddingSpec::Explicit(l, r) => {
                izip!(self.shape_in.hw_dims(), self.geo_ker(), &self.strides, &self.dilations, l, r)
                    .map(|(input, k, stride, d, l, r)| {
                        let kf = (k - 1) * d + 1;
                        let out = (input + l + r).saturating_sub(kf) / *stride + 1;
                        (out, *l)
                    })
                    .unzip()
            }
            PaddingSpec::ExplicitOnnxPool(l, r, ceil) => {
                izip!(self.shape_in.hw_dims(), self.geo_ker(), &self.strides, &self.dilations, l, r)
                    .map(|(input, k, stride, d, l, r)| {
                        let kf = (k - 1) * d + 1;
                        let mut out = if *ceil {
                            (input + l + r).saturating_sub(kf).divceil(*stride) + 1
                        } else {
                            (input + l + r).saturating_sub(kf) / *stride + 1
                        };

                        // pytorch semantics diverge from onnx (and onnx are super weird)
                        // https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/Pool.h#L48C2-L54C6
                        if *ceil && (out - 1) * stride >= input + l {
                            out -= 1;
                        }
                        (out, *l)
                    })
                    .unzip()
            }
        };
        let shape_out = self
            .shape_in
            .fmt
            .from_n_c_hw(self.shape_in.n().cloned().unwrap_or(1), co_per_g * self.group, shape_out)
            .unwrap();
        // dbg!(&shape_out);
        let mut out = ArrayD::zeros(&*shape_out.shape);
        for n in 0..n {
            for g in 0..self.group {
                for geo_out in indices(shape_out.hw_dims()) {
                    let mut output_coords: TVec<usize> = geo_out.slice().into();
                    if self.shape_in.fmt.has_n() {
                        output_coords.insert(0, n);
                    }
                    output_coords.insert(shape_out.c_axis(), 0);
                    for geo_ker in indices(self.geo_ker()) {
                        let input_coords: TVec<isize> = izip!(
                            geo_out.slice(),
                            geo_ker.slice(),
                            &left_pads,
                            &self.strides,
                            &self.dilations
                        )
                        .map(|(out, ker, pad, stride, dil)| {
                            *out as isize * *stride as isize + (ker * dil) as isize - *pad as isize
                        })
                        .collect();
                        if izip!(&input_coords, self.shape_in.hw_dims())
                            .any(|(c, i)| *c < 0 || *c >= *i as isize)
                        {
                            continue;
                        }
                        let mut input_coords: TVec<usize> =
                            input_coords.into_iter().map(|d| d as usize).collect();
                        if self.shape_in.fmt.has_n() {
                            input_coords.insert(0, n);
                        }
                        input_coords.insert(self.shape_in.c_axis(), 0);
                        for ci in 0..ci_per_g {
                            input_coords[self.shape_in.c_axis()] = ci + g * ci_per_g;
                            let i = self.data[&*input_coords];
                            for co in 0..co_per_g {
                                output_coords[shape_out.c_axis()] = co + g * co_per_g;
                                let mut kernel_coords: TVec<usize> = geo_ker.slice().into();
                                match self.kernel_format {
                                    KernelFormat::OIHW => {
                                        kernel_coords.insert(0, ci);
                                        kernel_coords.insert(0, co + g * co_per_g);
                                    }
                                    KernelFormat::HWIO => {
                                        kernel_coords.push(ci + g * ci_per_g);
                                        kernel_coords.push(co);
                                    }
                                    KernelFormat::OHWI => {
                                        kernel_coords.insert(0, co);
                                        kernel_coords.push(ci + g * ci_per_g);
                                    }
                                }
                                let k = self.kernel[&*kernel_coords];
                                out[&*output_coords] += k * i;
                            }
                        }
                    }
                }
            }
        }
        if let Some(bias) = &self.bias {
            let mut shape = vec![1; out.ndim()];
            shape[shape_out.c_axis()] = bias.len();
            out += &bias.clone().into_shape_with_order(shape).unwrap();
        }
        out
    }

    pub fn tract(&self) -> TractResult<TypedModel> {
        assert_eq!(self.data.shape(), &*self.shape_in.shape, "inconsistent shapes in test");
        let mut model = TypedModel::default();
        let wire = model.add_source("input", f32::fact(&self.shape_in.shape))?;
        let ci = *self.shape_in.c();
        let co = match self.kernel_format {
            KernelFormat::OIHW => self.kernel.shape()[0],
            KernelFormat::HWIO => self.kernel.shape()[self.kernel.ndim() - 1] * self.group,
            KernelFormat::OHWI => self.kernel.shape()[0] * self.group,
        };
        let kernel = model.add_const("kernel", self.kernel.clone().into_arc_tensor())?;
        let bias = if let Some(bias) = &self.bias {
            bias.clone().into_arc_tensor()
        } else {
            rctensor0(0f32)
        };
        let bias = model.add_const("bias", bias)?;
        let op = Conv::new(
            PoolSpec::new(
                self.shape_in.fmt,
                self.geo_ker().into(),
                self.pad.clone(),
                Some(self.dilations.clone()),
                Some(self.strides.clone()),
                ci,
                co,
            ),
            self.kernel_format,
            self.group,
            None,
        );
        let wire = model.wire_node("conv", op, &[wire, kernel, bias])?[0];
        model.select_output_outlets(&[wire])?;
        Ok(model)
    }
}

impl Arbitrary for ConvProblem {
    type Parameters = ConvProblemParams;
    type Strategy = BoxedStrategy<ConvProblem>;
    fn arbitrary_with(params: Self::Parameters) -> Self::Strategy {
        let batch_range = if params.no_batch { 1usize..=1 } else { 1usize..=3 };
        let geo_rank = params.geo_rank.unwrap_or(1..4);
        (
            data_format(),
            kernel_format(),
            prop_oneof![Just(PaddingSpec::Valid), Just(PaddingSpec::SameUpper)],
            batch_range,
            1usize..=4,
            1usize..=4,
            1usize..=(if params.no_group { 1 } else { 3 }),
            geo_rank.prop_flat_map(shapes),
        )
            .prop_flat_map(
                move |(
                    df,
                    kf,
                    pad,
                    batch,
                    mut ci0,
                    mut co0,
                    group,
                    (mut ker_shape, data_shape),
                )| {
                    // FIXME in HWIO order, only regular and depthwise are supported
                    if params.no_arbitrary_grouping && group > 1 {
                        ci0 = 1;
                        co0 = 1;
                    }
                    if kf == KernelFormat::HWIO && group > 1 {
                        ci0 = 1;
                    }
                    let shape_in = df.from_n_c_hw(batch, ci0 * group, data_shape).unwrap();
                    match kf {
                        KernelFormat::HWIO => {
                            ker_shape.push(ci0 * group);
                            ker_shape.push(co0)
                        }
                        KernelFormat::OIHW => {
                            ker_shape.insert(0, ci0);
                            ker_shape.insert(0, co0 * group)
                        }
                        KernelFormat::OHWI => {
                            ker_shape.insert(0, co0);
                            ker_shape.push(ci0 * group);
                        }
                    };
                    let hw_rank = shape_in.hw_rank();
                    let max_stride = if params.no_stride { 1 } else { 3 };
                    let strides = vec(1usize..=max_stride, hw_rank..=hw_rank);
                    let max_dil = if params.no_dilations { 1 } else { 3 };
                    let dilations = vec(1usize..=max_dil, hw_rank..=hw_rank);
                    (
                        Just((kf, pad, shape_in, group)),
                        Just(ker_shape),
                        Just(co0),
                        strides,
                        dilations,
                    )
                },
            )
            .prop_flat_map(
                move |((kf, pad, shape_in, group), ker_shape, co0, strides, dilations)| {
                    let kernel = tensor(&ker_shape);
                    let data = tensor(&*shape_in.shape);
                    let bias = if params.no_bias {
                        Just(None).boxed()
                    } else {
                        proptest::option::of(tensor(&[co0 * group])).boxed()
                    };

                    (
                        Just((kf, pad, shape_in, group)),
                        data,
                        kernel,
                        bias,
                        Just(strides),
                        Just(dilations),
                    )
                },
            )
            .prop_map(
                |(
                    (kernel_format, pad, shape_in, group),
                    data,
                    kernel,
                    bias,
                    strides,
                    dilations,
                )| {
                    ConvProblem {
                        shape_in,
                        kernel_format,
                        group,
                        data,
                        kernel,
                        bias,
                        pad,
                        strides: strides.into(),
                        dilations: dilations.into(),
                    }
                },
            )
            .boxed()
    }
}

impl Test for ConvProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference().into_tensor();
        // dbg!(&reference);
        let mut model = self.tract()?;
        //       dbg!(&model);
        model.declutter()?;
        //       dbg!(&model);
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let mut output = runtime.prepare(model)?.run(tvec![self.data.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        // dbg!(&output);
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<ConvProblem>("proptest", ConvProblemParams::default());

    suite.add(
        "trivial_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1, 1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0f32]]]).into_dyn(),
            kernel: arr4(&[[[[0.0f32]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "trivial_1",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[1.0f32]]]).into_dyn(),
            kernel: arr3(&[[[1.0f32]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );
    suite.add(
        "trivial_2",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[1.0f32], [0.0]]]).into_dyn(),
            kernel: arr3(&[[[1.0f32]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "trivial_3",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0f32, 1.0]]]).into_dyn(),
            kernel: arr3(&[[[0.0f32], [1.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "ker_3x1",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [3, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::zeros(vec![1, 1, 3, 3]),
            kernel: ArrayD::zeros(vec![1, 1, 1, 3]),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "nchw_0",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0f32, 1.0]]]).into_dyn(),
            kernel: arr3(&[[[1f32]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "nchw_2d_0",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0.0f32, 1.0]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0f32, 1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "group_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0f32], [0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0f32]], [[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0f32, 1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0f32]], [[1.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_3",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0f32, 1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0f32]], [[1.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );
    suite.add(
        "group_4",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0f32, 1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0f32]], [[0.0]], [[0.0]], [[1.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );
    suite.add(
        "group_5",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1, 1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr3(&[[[0.0f32, 1.0]]]).into_dyn(),
            kernel: tensor4(&[[[[0.0f32]]], [[[0.0]]], [[[0.0]]], [[[0.0]]]])
                .into_plain_array::<f32>()
                .unwrap()
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );
    suite.add(
        "group_6",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr3(&[[[0.0f32, 1.0]]]).into_dyn(),
            kernel: tensor3(&[[[0.0f32]], [[0.0]], [[0.0]], [[0.0]]])
                .into_plain_array::<f32>()
                .unwrap()
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_7",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 2, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr3(&[[[0.0f32, 0.0], [0.0, 1.0]]]).into_dyn(),
            kernel: tensor3(&[[[0.0f32, 0.0]], [[0.0, 0.0]], [[0.0, 0.0]], [[0.0, 1.0]]])
                .into_plain_array::<f32>()
                .unwrap()
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );
    suite.add(
        "group_8",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 4, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0f32, 0.0, 0.0, 1.0]]).into_dyn(),
            kernel: tensor3(&[[[0.0f32], [0.0]], [[0.0], [0.0]]])
                .into_plain_array::<f32>()
                .unwrap()
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_9",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0f32, 0.0], [0.0, 1.0]]).into_dyn(),
            kernel: tensor3(&[[[0.0f32]], [[0.0]], [[0.0]], [[1.0]]])
                .into_plain_array::<f32>()
                .unwrap()
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );
    suite.add(
        "group_10",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 2, [2, 1, 4])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: ArrayD::<f32>::zeros(vec![2, 2, 1, 4]),
            kernel: ArrayD::from_elem(vec![4, 1, 1, 1, 2], 1.0f32),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1, 1),
            dilations: tvec!(1, 1, 1),
        },
    );
    suite.add(
        "group_11",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0, 1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0]], [[0.0]], [[0.0]], [[0.0]], [[0.0]], [[0.0]], [[0.0]], [[1.0]]])
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_12",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::HWIO,
            group: 2,
            data: arr2(&[[0.0, 0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0], [0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_13",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::HWIO,
            group: 2,
            data: arr2(&[[0.0, 1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0], [1.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_bias_0",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: ArrayD::<f32>::zeros(vec![1, 1, 2]),
            kernel: ArrayD::<f32>::zeros(vec![4, 1, 1]),
            bias: Some(ArrayD::<f32>::zeros(vec![4])),
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "group_bias_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 2,
            data: arr2(&[[0.0, 0.0]]).into_dyn(),
            kernel: ArrayD::<f32>::zeros(vec![4, 1, 1]),
            bias: Some(arr1(&[0.0, 0.0, 0.0, 1.0]).into_dyn()),
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "bias_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![2, 1]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 2]),
            bias: Some(ArrayD::<f32>::zeros(vec![1])),
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "bias_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            kernel: ArrayD::<f32>::zeros(vec![2, 1, 2]),
            data: ArrayD::<f32>::zeros(vec![2, 1]),
            bias: Some(arr1(&[0.0f32, 1.0]).into_dyn()),
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "bias_2",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 1, 2]),
            data: ArrayD::<f32>::zeros(vec![1, 1, 1, 2]),
            bias: Some(arr1(&[1.0]).into_dyn()),
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "bias_dil_0",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [4, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 4, 1]),
            data: ArrayD::<f32>::zeros(vec![1, 1, 4, 2]),
            bias: Some(arr1(&[1.0]).into_dyn()),
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(2, 1),
        },
    );

    suite.add(
        "bias_chw_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0f32, 0., 0.]]).into_dyn(),
            kernel: arr3(&[[[0f32]], [[0.]], [[0.]]]).into_dyn(),
            bias: Some(arr1(&[0f32, 0., 1.]).into_dyn()),
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "batch_0",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(2, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![2, 2, 1]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 2]),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "batch_1",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(2, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0f32]], [[1.0]]]).into_dyn(),
            kernel: arr3(&[[[1.0f32]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "batch_2",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(2, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0f32, 0.0]], [[0.0, 0.0]]]).into_dyn(),
            kernel: arr3(&[[[0.0f32]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "bias_3d_1",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [1, 1, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![1, 1, 1, 2]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 1, 1, 1]),
            bias: Some(ArrayD::<f32>::ones(vec![1])),
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1, 1),
            dilations: tvec!(1, 1, 1),
        },
    );

    suite.add(
        "batch_3d",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [2, 2, 1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![1, 1, 2, 2, 1]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 1, 1, 1]),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1, 1),
            dilations: tvec!(1, 1, 1),
        },
    );

    suite.add(
        "same_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![1, 1]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 1]),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "same_1d_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![1, 1]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 1]),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "same_1d_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0], [1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 2.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "same_2",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0], [0.0]], [[0.0], [1.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0], [0.0]]], [[[0.0], [1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "same_2d_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0], [0.0], [1.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 0.0, 0.0]]], [[[0.0, 0.0, 1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "same_2d_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0], [0.0]], [[1.0], [0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "same_2d_3",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [2, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "same_2d_4",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0.0, 0.0]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "strides_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0], [0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0], [0.0], [1.0]]).into_dyn(),
            kernel: arr3(&[[[1.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_2_dnn_padding_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [6])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(1), tvec!(1)),
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_2_dnn_padding_2",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(1), tvec!(1)),
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_2_dnn_padding_ceil_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(0), tvec!(1)),
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_2_dnn_padding_ceil_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0], [0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(0), tvec!(0)),
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_2",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0], [0.0], [1.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0, 1.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(3),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_2d_same_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0], [0.0], [1.0]]]).into_dyn(),
            kernel: arr4(&[[[[1.0, 0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 2),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "strides_2d_same_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![2, 3, 1]),
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 1, 3]),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 2),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "strides_2d_same_2",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0], [0.0], [1.0]], [[0.0], [0.0], [0.0]]]).into_dyn(),
            kernel: arr4(&[[[[1.0, 0.0, 0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 2),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "strides_2d_same_3",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0.0]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "strides_two_axes",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1, 1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(2, 2),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "strides_one_axis_explicit",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::ArrayD::<f32>::zeros(vec![3, 1]),
            kernel: tract_ndarray::ArrayD::<f32>::zeros(vec![1, 1, 3]),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(1), tvec!(0)),
            strides: tvec!(2),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_one_axis_explicit_prime",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::ArrayD::<f32>::zeros(vec![2, 1]),
            kernel: tract_ndarray::ArrayD::<f32>::zeros(vec![1, 1, 2]),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(0), tvec!(0)),
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "strides_two_axes_explicit",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2, 3])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::ArrayD::<f32>::zeros(vec![2, 3, 1]),
            kernel: tract_ndarray::ArrayD::<f32>::zeros(vec![1, 1, 2, 3]),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(0, 1), tvec!(0, 0)),
            strides: tvec!(1, 2),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "lazy_im2col_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0, 0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    let mut kernel = ArrayD::<f32>::zeros(vec![1, 4, 1, 3, 2]);
    let len = kernel.len();
    kernel.as_slice_mut().unwrap()[len - 1] = 1.0;
    suite.add(
        "lazy_im2col_big",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 4, [2, 5, 4])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![4, 2, 5, 4]),
            kernel,
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(2, 2, 2),
            dilations: tvec!(1, 1, 1),
        },
    );

    let mut kernel = ArrayD::<f32>::zeros(vec![1, 4, 1, 3, 2]);
    let len = kernel.len();
    kernel.as_slice_mut().unwrap()[len - 1] = 1.0;
    suite.add(
        "lazy_im2col_big_2",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(1, 4, [2, 5, 4])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: ArrayD::<f32>::zeros(vec![1, 2, 5, 4, 4]),
            kernel,
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(2, 3, 2),
            dilations: tvec!(1, 1, 1),
        },
    );

    let mut kernel = ArrayD::<f32>::zeros(vec![2, 2, 2, 1]);
    let len = kernel.len();
    kernel.as_slice_mut().unwrap()[len - 1] = 1.0;
    let mut data = ArrayD::<f32>::zeros(vec![2, 2, 2]);
    *data.as_slice_mut().unwrap().last_mut().unwrap() = 1.0;
    suite.add(
        "depthwise_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 2, [2, 2])?,
            kernel_format: KernelFormat::HWIO,
            group: 2,
            data,
            kernel,
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    let data = ArrayD::zeros(vec![2, 1]);
    let kernel = ArrayD::zeros(vec![1, 1, 1]);
    suite.add(
        "same_upper",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data,
            kernel,
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1),
            dilations: tvec!(1,),
        },
    );
    suite.add(
        "explicit_dnn_left",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(2), tvec!(0)),
            strides: tvec!(1),
            dilations: tvec!(1,),
        },
    );

    suite.add(
        "explicit_dnn_right_0",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(0), tvec!(2)),
            strides: tvec!(1),
            dilations: tvec!(1,),
        },
    );

    suite.add(
        "explicit_dnn_right_1",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [1])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: tract_ndarray::arr2(&[[0.0]]).into_dyn(),
            kernel: tract_ndarray::arr3(&[[[0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Explicit(tvec!(0), tvec!(1)),
            strides: tvec!(1),
            dilations: tvec!(1,),
        },
    );

    suite.add(
        "dnn_2d",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 2, [1, 1]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0.0]], [[1.0]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0]], [[1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "group_owhi_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 2, [1]).unwrap(),
            kernel_format: KernelFormat::OHWI,
            group: 2,
            data: arr2(&[[0.0], [0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1,),
        },
    );

    suite.add(
        "dw_k4_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 2, [8]).unwrap(),
            kernel_format: KernelFormat::OHWI,
            group: 2,
            data: arr2(&[
                [0.0f32, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
                [2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 5.0],
            ])
            .into_dyn(),
            kernel: arr3(&[[[1.0f32, 2.0], [-1.0, 1.0], [0.0, 3.0], [3.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1,),
        },
    );

    suite.add(
        "dw_k4_d2_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 2, [6, 2]).unwrap(),
            kernel_format: KernelFormat::HWIO,
            group: 2,
            data: arr3(&[
                [[0.0f32, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]],
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], [0.0, 0.0]],
            ])
            .into_dyn(),
            kernel: arr4(&[[[[0.0f32], [0.0]], [[0.0], [0.0]]], [[[0.0], [0.0]], [[0.0], [-1.0]]]])
                .into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "from_1d_to_2d",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [2]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0f32, 0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0f32, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "trivial_nchw",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 1]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0.0f32]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0f32]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    let mut data = Tensor::zero::<f32>(&[1, 5, 6]).unwrap();
    *data.try_as_plain_mut().unwrap().as_slice_mut::<f32>().unwrap().last_mut().unwrap() = 1.0;
    let mut kernel = Tensor::zero::<f32>(&[1, 1, 3, 2]).unwrap();
    *kernel.try_as_plain_mut().unwrap().as_slice_mut::<f32>().unwrap().last_mut().unwrap() = 1.0;
    suite.add(
        "pack_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [5, 6]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: data.into_plain_array::<f32>().unwrap(),
            kernel: kernel.into_plain_array::<f32>().unwrap(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
        },
    );

    suite.add(
        "pack_1",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [5]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0f32, -2.0, 0.0, 0.0, 0.0]]).into_dyn(),
            kernel: arr3(&[[[1f32]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "dil_0",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[1.]]).into_dyn(),
            kernel: arr3(&[[[1.]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(2),
        },
    );

    suite.add(
        "dil_1",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [2, 3]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 0.0, 0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 2),
        },
    );

    suite.add(
        "dil_2",
        ConvProblem {
            shape_in: DataFormat::HWC.from_n_c_hw(1, 1, [2]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0], [0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(2),
        },
    );

    suite.add(
        "dil_3",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 2, [1, 4]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 0.0, -1.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 0.0, 0.0, 0.0]], [[0.0, 0.0, 1.0, 0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::SameUpper,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 3),
        },
    );

    suite.add(
        "dil_4",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [2]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(2),
        },
    );

    suite.add(
        "dil_5",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [2]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr2(&[[0.0, 0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(2),
        },
    );

    suite.add(
        "dil_6",
        ConvProblem {
            shape_in: DataFormat::CHW.from_n_c_hw(1, 1, [1, 2]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[8.0, 3.0]]]).into_dyn(),
            kernel: arr4(&[[[[1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 2),
        },
    );

    suite.add(
        "dil_7",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 1]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0f32]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1, 1),
            dilations: tvec!(2, 2),
        },
    );

    suite.add(
        "dil_and_stride_2d",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [1, 1]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr4(&[[[[0f32]]]]).into_dyn(),
            kernel: arr4(&[[[[1.0]]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(3, 3),
            dilations: tvec!(2, 2),
        },
    );

    suite.add(
        "bug_metal_0",
        ConvProblem {
            shape_in: DataFormat::NHWC.from_n_c_hw(2, 1, [4]).unwrap(),
            kernel_format: KernelFormat::OIHW,
            group: 1,
            data: arr3(&[[[0f32], [0.], [0.], [0.]], [[0.], [0.], [0.], [1.]]]).into_dyn(),
            kernel: arr3(&[[[0f32]], [[1.]]]).into_dyn(),
            bias: None,
            pad: PaddingSpec::Valid,
            strides: tvec!(1),
            dilations: tvec!(1),
        },
    );

    suite.add(
        "bug_cuda_0",
        ConvProblem {
            shape_in: DataFormat::NCHW.from_n_c_hw(1, 1, [3, 2])?,
            kernel_format: KernelFormat::OIHW,
            group: 1,
            kernel: ArrayD::<f32>::zeros(vec![1, 1, 3, 1]),
            data: ArrayD::<f32>::zeros(vec![1, 1, 3, 2]),
            bias: Some(arr1(&[1.0]).into_dyn()),
            pad: PaddingSpec::Valid,
            strides: tvec!(3, 1),
            dilations: tvec!(3, 1),
        },
    );

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/conv_q.rs
================================================
use infra::{Test, TestSuite};
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ops::cnn::KernelFormat::*;
use tract_core::ops::cnn::{Conv, KernelFormat, PaddingSpec, PoolSpec};
use tract_core::ops::math::round_ties_to_even;
use tract_core::ops::nn::DataFormat::*;
use tract_core::ops::nn::DataShape;
use tract_core::tract_data::itertools::Itertools;
use tract_itertools::izip;
use tract_ndarray::*;

use crate::conv_f32::ConvProblemParams;
use crate::q_helpers::qtensor;

/* https://www.tensorflow.org/lite/performance/quantization_spec
CONV_2D
Input 0:
data_type  : int8
range      : [-128, 127]
granularity: per-tensor
Input 1 (Weight):
data_type  : int8
range      : [-127, 127]
granularity: per-axis (dim = 0)
restriction: zero_point = 0
Input 2 (Bias):
data_type  : int32
range      : [int32_min, int32_max]
granularity: per-axis
restriction: (scale, zero_point) = (input0_scale * input1_scale[...], 0)
Output 0:
data_type  : int8
range      : [-128, 127]
granularity: per-tensor
*/
#[allow(clippy::arc_with_non_send_sync)]
pub fn q_params(
    params: &QConvProblemParams,
    co: usize,
    kdt: DatumType,
    idt: DatumType,
    odt: DatumType,
) -> BoxedStrategy<[Tensor; 6]> {
    let params = params.clone();
    let per_channel =
        if params.tflite_rules && (kdt.is_unsigned() || idt.is_unsigned() || odt.is_unsigned()) {
            Just(false).boxed()
        } else {
            any::<bool>().boxed()
        };
    per_channel
        .prop_flat_map(move |per_channel| {
            let k0 = if per_channel && params.tflite_rules {
                Just(0i32).boxed()
            } else if kdt.is_signed() {
                (-10..10i32).boxed()
            } else {
                (0..20i32).boxed()
            };
            let x0 = if idt.is_signed() { -10i32..10i32 } else { 0..20 };
            let y0 = if odt.is_signed() { -10i32..10i32 } else { 0..20 };
            let k_scale_len = if per_channel { co } else { 1 };
            let k_scale = vec(-3..3i32, k_scale_len..=k_scale_len);
            (Just(per_channel), k0, x0, y0, k_scale, -3..3i32, -3..3i32)
        })
        .prop_map(|(per_channel, k0, x0, y0, k_scale, x_scale, y_scale)| {
            let k_scale_values = k_scale.iter().map(|x| 2f32.powi(*x)).collect_vec();
            [
                tensor0(x0),
                tensor0(2f32.powi(x_scale)),
                tensor0(k0),
                if per_channel { tensor1(&k_scale_values) } else { tensor0(k_scale_values[0]) },
                tensor0(y0),
                tensor0(2f32.powi(y_scale)),
            ]
        })
        .boxed()
}

#[derive(Debug, Clone, Default)]
pub struct QConvProblemParams {
    pub conv: ConvProblemParams,
    pub tflite_rules: bool,
}

#[derive(Debug, Clone)]
pub struct QConvProblem {
    pub shape_in: DataShape,
    pub kernel_format: KernelFormat,
    pub co: usize,
    pub group: usize,
    pub kernel: Tensor,
    pub bias: Option<Array1<i32>>,
    pub data: Tensor,
    pub qp: [Tensor; 6],
    pub raw_output_dt: DatumType,
}

impl QConvProblem {
    fn geo_ker(&self) -> &[usize] {
        &self.kernel.shape()[self.kernel_format.h_axis()..][..self.shape_in.hw_rank()]
    }

    fn reference(&self) -> Tensor {
        assert!(self.data.datum_type().size_of() == 1);
        assert!(self.kernel.datum_type().size_of() == 1);
        assert_eq!(self.data.shape(), &*self.shape_in.shape);
        let n = *self.shape_in.n().unwrap_or(&1);
        let ci_per_g = self.shape_in.c() / self.group;
        let co_per_g = self.co / self.group;
        let x0 = *self.qp[0].try_as_plain().unwrap().to_scalar::<i32>().unwrap();
        let k0 = *self.qp[2].try_as_plain().unwrap().to_scalar::<i32>().unwrap();
        let y0 = *self.qp[4].try_as_plain().unwrap().to_scalar::<i32>().unwrap();
        let x_scale = self.qp[1].cast_to_scalar::<f32>().unwrap();
        let y_scale = self.qp[5].cast_to_scalar::<f32>().unwrap();
        let kdt = self.kernel.datum_type();
        let idt = self.data.datum_type();
        let odt = self.raw_output_dt;
        assert!(k0 <= kdt.unquantized().max_value().cast_to_scalar::<i32>().unwrap());
        assert!(k0 >= kdt.unquantized().min_value().cast_to_scalar::<i32>().unwrap());
        assert!(x0 <= idt.unquantized().max_value().cast_to_scalar::<i32>().unwrap());
        assert!(x0 >= idt.unquantized().min_value().cast_to_scalar::<i32>().unwrap());
        assert!(y0 <= odt.unquantized().max_value().cast_to_scalar::<i32>().unwrap());
        assert!(y0 >= odt.unquantized().min_value().cast_to_scalar::<i32>().unwrap());
        let shape_out: TVec<usize> = izip!(self.shape_in.hw_dims(), self.geo_ker())
            .map(|(i, k)| (*i + 1).saturating_sub(*k))
            .collect();
        let shape_out = self
            .shape_in
            .fmt
            .from_n_c_hw(self.shape_in.n().cloned().unwrap_or(1), co_per_g * self.group, shape_out)
            .unwrap();
        // a is the kernel, it can be quantized per O axis
        let k_scale = if self.qp[3].len() == 1 {
            vec![self.qp[3].cast_to_scalar::<f32>().unwrap(); *shape_out.c()]
        } else {
            self.qp[3].try_as_plain().unwrap().as_slice::<f32>().unwrap().into()
        };
        let mut temp = ArrayD::<i32>::zeros(&*shape_out.shape);
        let data = self.data.cast_to::<i32>().unwrap();
        let data = data.to_plain_array_view::<i32>().unwrap();
        let kernel = self.kernel.cast_to::<i32>().unwrap();
        let kernel = kernel.to_plain_array_view::<i32>().unwrap();
        for n in 0..n {
            for g in 0..self.group {
                for geo_out in tract_ndarray::indices(shape_out.hw_dims()) {
                    let mut output_coords: TVec<usize> = geo_out.slice().into();
                    if self.shape_in.fmt.has_n() {
                        output_coords.insert(0, n);
                    }
                    output_coords.insert(shape_out.c_axis(), 0);
                    for geo_ker in tract_ndarray::indices(self.geo_ker()) {
                        let mut input_coords: TVec<usize> =
                            izip!(geo_out.slice(), geo_ker.slice()).map(|(a, b)| a + b).collect();
                        if self.shape_in.fmt.has_n() {
                            input_coords.insert(0, n);
                        }
                        input_coords.insert(self.shape_in.c_axis(), 0);
                        for ci in 0..ci_per_g {
                            input_coords[self.shape_in.c_axis()] = ci + g * ci_per_g;
                            let i = data[&*input_coords];
                            for co in 0..co_per_g {
                                output_coords[shape_out.c_axis()] = co + g * co_per_g;
                                let mut kernel_coords: TVec<usize> = geo_ker.slice().into();
                                match self.kernel_format {
                                    KernelFormat::OIHW => {
                                        kernel_coords.insert(0, ci);
                                        kernel_coords.insert(0, co + g * co_per_g);
                                    }
                                    KernelFormat::HWIO => {
                                        kernel_coords.push(ci + g * ci_per_g);
                                        kernel_coords.push(co);
                                    }
                                    KernelFormat::OHWI => {
                                        kernel_coords.insert(0, co);
                                        kernel_coords.push(ci + g * ci_per_g);
                                    }
                                }
                                let k = kernel[&*kernel_coords];
                                temp[&*output_coords] += (k - k0) * (i - x0);
                            }
                        }
                    }
                }
            }
        }
        if let Some(bias) = &self.bias {
            let mut shape = vec![1; temp.ndim()];
            shape[shape_out.c_axis()] = bias.len();
            temp += &bias.clone().into_shape_with_order(shape).unwrap();
        }
        let cdt = self.output_dt();
        temp.axis_iter_mut(Axis(shape_out.c_axis())).zip(k_scale).for_each(
            |(mut view, k_scale)| {
                view.mapv_inplace(|i| {
                    (round_ties_to_even(i as f32 / y_scale * k_scale * x_scale) as i32 + y0)
                        .max(cdt.unquantized().min_value().cast_to_scalar::<i32>().unwrap())
                        .min(cdt.unquantized().max_value().cast_to_scalar::<i32>().unwrap())
                });
            },
        );
        let mut tensor = temp.into_tensor().cast_to_dt(cdt.unquantized()).unwrap().into_owned();
        unsafe { tensor.set_datum_type(cdt) };
        tensor
    }

    fn output_dt(&self) -> DatumType {
        self.raw_output_dt.quantize(QParams::ZpScale {
            zero_point: self.qp[4].cast_to_scalar().unwrap(),
            scale: *self.qp[5].try_as_plain().unwrap().to_scalar().unwrap(),
        })
    }

    fn tract(&self) -> TractResult<TypedModel> {
        assert!(self.data.shape() == &*self.shape_in.shape);
        let mut model = TypedModel::default();
        let idt = self.data.datum_type().quantize(QParams::ZpScale {
            zero_point: self.qp[0].cast_to::<i32>()?.try_as_plain()?.as_slice::<i32>()?[0],
            scale: self.qp[1].cast_to::<f32>()?.try_as_plain()?.as_slice::<f32>()?[0],
        });
        let kdt = self.kernel.datum_type().quantize(QParams::ZpScale {
            zero_point: self.qp[2].cast_to::<i32>()?.try_as_plain()?.as_slice::<i32>()?[0],
            scale: self.qp[3].cast_to::<f32>()?.try_as_plain()?.as_slice::<f32>()?[0],
        });
        let wire = model.add_source("input", idt.fact(&self.shape_in.shape))?;
        let mut inputs = tvec!(wire);
        let mut kernel = self.kernel.clone().into_tensor();
        unsafe { kernel.set_datum_type(kdt) };
        inputs.push(model.add_const("kernel", kernel.into_arc_tensor())?);
        let bias = if let Some(bias) = &self.bias {
            bias.clone().into_arc_tensor()
        } else {
            rctensor0(0i32)
        };
        inputs.push(model.add_const("bias", bias)?);
        for (ix, qp) in self.qp.iter().enumerate() {
            inputs.push(model.add_const(format!("qp.{ix}"), qp.clone())?);
        }
        let op = Conv::new(
            PoolSpec::new(
                self.shape_in.fmt,
                self.geo_ker().into(),
                PaddingSpec::Valid,
                None,
                None,
                *self.shape_in.c(),
                self.co,
            ),
            self.kernel_format,
            self.group,
            Some(self.output_dt()),
        );
        let wire = model.wire_node("conv", op, &inputs)?[0];
        model.select_output_outlets(&[wire])?;
        Ok(model)
    }
}

impl Test for QConvProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let reference = self.reference();
        let mut model = self.tract().context("Building model")?;
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let model = runtime.prepare(model).context("Preparing model")?;
        let idt = self.data.datum_type().quantize(QParams::ZpScale {
            zero_point: self.qp[0].cast_to_scalar()?,
            scale: *self.qp[1].try_as_plain()?.to_scalar()?,
        });
        let data = self.data.clone().into_tensor().cast_to_dt(idt)?.into_owned().into_tvalue();
        let output = model.run(tvec!(data))?.remove(0);
        //eprintln!("reference: {reference:?}\noutput   : {output:?}");
        output.close_enough(&reference, approx)
    }
}

impl Arbitrary for QConvProblem {
    type Parameters = QConvProblemParams;
    type Strategy = BoxedStrategy<QConvProblem>;
    fn arbitrary_with(params: Self::Parameters) -> Self::Strategy {
        let geo_rank = params.conv.geo_rank.clone().unwrap_or(1..4);
        (
            crate::data_format(),
            crate::kernel_format(),
            1usize..=10,
            1usize..=8,
            1usize..=8,
            1usize..=(if params.conv.no_group { 1 } else { 3 }),
            geo_rank.prop_flat_map(crate::shapes),
            prop_oneof![Just(DatumType::I8), Just(DatumType::U8)],
            prop_oneof![Just(DatumType::I8), Just(DatumType::U8)],
            prop_oneof![Just(DatumType::I8), Just(DatumType::U8), Just(DatumType::I32)],
        )
            .prop_flat_map(
                move |(
                    df,
                    kf,
                    n,
                    mut ci0,
                    mut co0,
                    group,
                    (mut ker_shape, data_shape),
                    kdt,
                    idt,
                    odt,
                )| {
                    // FIXME in HWIO order, only regular and depthwise are supported
                    if params.conv.no_arbitrary_grouping && group > 1 {
                        ci0 = 1;
                        co0 = 1;
                    }
                    if kf == KernelFormat::HWIO && group > 1 {
                        ci0 = 1;
                    }
                    let qp = q_params(&params, co0 * group, kdt, idt, odt);
                    let shape_in = df.from_n_c_hw(n, ci0 * group, data_shape).unwrap();
                    let data_in = qtensor(shape_in.shape.iter().cloned().collect(), idt);
                    match kf {
                        KernelFormat::HWIO => {
                            ker_shape.push(ci0 * group);
                            ker_shape.push(co0)
                        }
                        KernelFormat::OIHW => {
                            ker_shape.insert(0, ci0);
                            ker_shape.insert(0, co0 * group)
                        }
                        KernelFormat::OHWI => {
                            ker_shape.insert(0, co0);
                            ker_shape.push(ci0 * group)
                        }
                    };
                    let kernel = qtensor(ker_shape, kdt);
                    let bias = proptest::option::of(
                        qtensor(vec![co0 * group], i32::datum_type()).prop_map(|b| {
                            arr1(
                                b.cast_to::<i32>()
                                    .unwrap()
                                    .try_as_plain()
                                    .unwrap()
                                    .as_slice::<i32>()
                                    .unwrap(),
                            )
                        }),
                    );
                    (Just((kf, shape_in, co0 * group, group, odt)), data_in, kernel, bias, qp)
                },
            )
            .prop_map(
                |((kernel_format, shape_in, co, group, raw_output_dt), data, kernel, bias, qp)| {
                    QConvProblem {
                        shape_in,
                        co,
                        kernel_format,
                        group,
                        data: data.into_tensor(),
                        kernel: kernel.into_tensor(),
                        bias,
                        qp,
                        raw_output_dt,
                    }
                },
            )
            .boxed()
    }
}

fn qp_noop_i8() -> [Tensor; 6] {
    [tensor0(0i32), tensor0(1f32), tensor0(0i32), tensor0(1f32), tensor0(0i32), tensor0(1f32)]
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<QConvProblem>("proptest", QConvProblemParams::default());

    suite.add(
        "trivial_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[0i8]]),
            kernel: tensor3(&[[[0i8]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    suite.add(
        "trivial_1",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[2i8]]),
            kernel: tensor3(&[[[64i8]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    suite.add(
        "trivial_2",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [2]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[-13i8], [26]]),
            kernel: tensor3(&[[[8i8, -2]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    suite.add(
        "trivial_3",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 2, [1]).unwrap(),
            co: 2,
            kernel_format: HWIO,
            group: 1,
            data: tensor2(&[[0i8], [0]]),
            kernel: tensor3(&[[[0i8, 0], [0, 0]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    suite.add(
        "trivial_4",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 2, [1]).unwrap(),
            co: 8,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[0i8], [1i8]]),
            kernel: tensor3(&[
                [[0], [0]],
                [[0], [0]],
                [[0], [0]],
                [[0], [0]],
                [[0], [0]],
                [[0], [0]],
                [[0], [0]],
                [[0], [1i8]],
            ]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(-2i32);
    qp[3] = tensor1(&[1f32, 0.5]);
    suite.add(
        "scale_per_channel_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            kernel_format: OIHW,
            co: 2,
            group: 1,
            data: tensor2(&[[0i8]]),
            kernel: tensor3(&[[[0i8]], [[7]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[3] = tensor1(&[1f32, 0.5]);
    suite.add(
        "scale_per_channel_1",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            kernel_format: OIHW,
            co: 2,
            group: 1,
            data: tensor2(&[[0i8]]),
            kernel: tensor3(&[[[0i8]], [[7]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    suite.add(
        "a0_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[1i8]]),
            kernel: tensor3(&[[[0i8]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(-3i32);
    qp[2] = tensor0(2i32);
    suite.add(
        "a0_b0_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [2]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[0i8, 0]]),
            kernel: tensor3(&[[[0i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[2] = tensor0(1i32);
    suite.add(
        "kernel_zp",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            kernel_format: OIHW,
            co: 1,
            group: 1,
            data: tensor2(&[[1i8]]),
            kernel: tensor3(&[[[0i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[4] = tensor0(-1i32);
    suite.add(
        "c_zp_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [2]).unwrap(),
            kernel_format: OIHW,
            co: 1,
            group: 1,
            data: tensor2(&[[0i8, 0]]),
            kernel: tensor3(&[[[0i8, 0]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    suite.add(
        "a0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            kernel_format: OIHW,
            co: 1,
            group: 1,
            data: tensor2(&[[0i8]]),
            kernel: tensor3(&[[[-1i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[2] = tensor0(1i32);
    suite.add(
        "b0_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [3]).unwrap(),
            kernel_format: OIHW,
            co: 1,
            group: 1,
            data: tensor2(&[[0i8, 0, 0]]),
            kernel: tensor3(&[[[0i8, 0]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[2] = tensor0(2i32);
    suite.add(
        "b0_1",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [6]).unwrap(),
            kernel_format: OIHW,
            co: 1,
            group: 1,
            data: tensor2(&[[0i8, 0, 0, 0, -20, 0]]),
            kernel: tensor3(&[[[0i8, 0]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    suite.add(
        "shape_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1, 2]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor3(&[[[0i8], [0]]]),
            kernel: tensor4(&[[[[0i8]]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    suite.add(
        "batch_0",
        QConvProblem {
            shape_in: NHWC.from_n_c_hw(3, 1, [2]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor3(&[[[0i8], [0]], [[0], [0]], [[0], [0]]]),
            kernel: tensor3(&[[[0i8, 0]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "batch_1",
        QConvProblem {
            shape_in: NHWC.from_n_c_hw(2, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            kernel: tensor3(&[[[1i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[5] = tensor0(9.274534f32);
    suite.add(
        "scale_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[-1i8]]),
            kernel: tensor3(&[[[1i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[5] = tensor0(1.1400417f32);
    suite.add(
        "scale_1",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[41i8]]),
            kernel: tensor3(&[[[1i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[1] = tensor0(0.5f32);
    suite.add(
        "scale_2",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[-1i8]]),
            kernel: tensor3(&[[[2i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[1] = tensor0(0.5f32);
    qp[5] = tensor0(2f32);
    suite.add(
        "scale_3",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[0i8]]),
            kernel: tensor3(&[[[0i8]]]),
            bias: Some(arr1(&[35i32])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[4] = tensor0(1i32);
    suite.add(
        "c0_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[0i8]]),
            kernel: tensor3(&[[[0i8]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    suite.add(
        "group_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 2, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 2,
            data: tensor2(&[[0i8, 0]]),
            kernel: tensor3(&[[[0i8]], [[0]]]),
            bias: None,
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    suite.add(
        "group_1",
        QConvProblem {
            shape_in: NCHW.from_n_c_hw(1, 2, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 2,
            data: tensor3(&[[[0i8], [0]]]),
            kernel: tensor3(&[[[1i8]], [[0]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    suite.add(
        "group_2",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 2, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 2,
            data: tensor2(&[[0i8, 0]]),
            kernel: tensor3(&[[[0i8]], [[1]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[0] = tensor0(2i32);
    qp[1] = tensor0(2f32);
    qp[3] = tensor0(0.5f32);
    qp[5] = tensor0(2f32);
    suite.add(
        "rounding_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[4i8]]),
            kernel: tensor3(&[[[-5i8]]]),
            bias: Some(arr1(&[-125i32])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[5] = tensor0(1.3759452f32);
    suite.add(
        "rounding_on_arm",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: tensor2(&[[1i8]]),
            kernel: tensor3(&[[[0i8]], [[-15]]]),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    suite.add(
        "bias_1",
        QConvProblem {
            shape_in: NHWC.from_n_c_hw(1, 1, [1, 1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1, 1]).unwrap(),
            bias: Some(arr1(&[1, 2])),
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );

    let qp = qp_noop_i8();
    suite.add(
        "bias_2",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            bias: Some(arr1(&[0, 1])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[0] = tensor0(-1i32);
    let mut kernel = Tensor::zero::<i8>(&[5, 1, 2]).unwrap();
    *kernel.try_as_plain_mut().unwrap().as_slice_mut::<i8>().unwrap().last_mut().unwrap() = -1;
    suite.add(
        "bias_3",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 1, [2]).unwrap(),
            co: 5,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[2, 1]).unwrap(),
            kernel,
            bias: Some(Array1::zeros([5])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    suite.add(
        "bias_4",
        QConvProblem {
            shape_in: NHWC.from_n_c_hw(1, 1, [1, 1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1, 1]).unwrap(),
            bias: Some(arr1(&[0, 1])),
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );

    suite.add(
        "bias_5",
        QConvProblem {
            shape_in: NHWC.from_n_c_hw(1, 1, [1, 1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[1, 1, 1, 1]).unwrap(),
            bias: Some(arr1(&[1])),
            qp: qp_noop_i8(),
            raw_output_dt: DatumType::I8,
        },
    );

    let qp = qp_noop_i8();
    suite.add(
        "bias_in_chw",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            bias: Some(arr1(&[0, 0])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "bias_with_batch",
        QConvProblem {
            shape_in: NCHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[1, 1, 1]).unwrap(),
            bias: Some(arr1(&[1])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "bias_vec_with_batch",
        QConvProblem {
            shape_in: NCHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            bias: Some(arr1(&[0, 1])),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "asan_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 2, [1]).unwrap(),
            co: 5,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 2]).unwrap(),
            kernel: Tensor::zero::<i8>(&[5, 2, 1]).unwrap(),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[3] = tensor1(&[1f32, 1f32]);
    suite.add(
        "tflite_per_axis_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[3] = tensor1(&[1f32, 1f32]);
    suite.add(
        "tflite_per_axis_1",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1, 2]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 2]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1, 2]).unwrap(),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[3] = tensor1(&[1f32, 1f32]);
    suite.add(
        "tflite_per_axis_nchw_0",
        QConvProblem {
            shape_in: NCHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[3] = tensor1(&[1f32, 1f32]);
    suite.add(
        "tflite_per_axis_nchw_1",
        QConvProblem {
            shape_in: NCHW.from_n_c_hw(1, 1, [2]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<i8>(&[1, 1, 2]).unwrap(),
            kernel: Tensor::zero::<i8>(&[2, 1, 2]).unwrap(),
            bias: None,
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "i8_u8",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            data: Tensor::zero::<u8>(&[1, 1]).unwrap(),
            kernel: Tensor::zero::<i8>(&[1, 1, 1]).unwrap(),
            bias: None,
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    /*
    let mut qp = qp_noop_i8();
    qp[2] = tensor0(-1i32);
    qp[5] = tensor0(0.5f32);
    suite.add(
    "i8_u8_0",
    QConvProblem {
    shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
    co: 1,
    kernel_format: OIHW,
    group: 1,
    data: Tensor::zero::<u8>(&[1, 1]).unwrap(),
    kernel: tensor3(&[[[1i8]]]),
    bias: None,
    qp,
    },
    );
    */
    let mut qp = qp_noop_i8();
    qp[3] = tensor0(2f32);
    suite.add(
        "i8_u8_ascale",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: Tensor::zero::<i8>(&[1, 1, 1]).unwrap(),
            bias: None,
            data: Tensor::zero::<u8>(&[1, 1]).unwrap(),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    suite.add(
        "i8_u8_d0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[-3i8]]]),
            bias: None,
            data: tensor2(&[[1u8]]),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[4] = tensor0(2i32);
    suite.add(
        "i8_u8_c0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[1i8]]]),
            bias: None,
            data: tensor2(&[[4u8]]),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "i8_u8_sat_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[-1i8]]]),
            bias: None,
            data: tensor2(&[[1u8]]),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    qp[4] = tensor0(2i32);
    suite.add(
        "i8_u8_weird_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[-1i8]]]),
            bias: None,
            data: tensor2(&[[0u8]]),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    qp[1] = tensor0(4f32);
    qp[3] = tensor0(2f32);
    suite.add(
        "i8_u8_scales_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[-1i8]]]),
            bias: None,
            data: tensor2(&[[0u8]]),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "u8_i8_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[0u8]]]),
            bias: None,
            data: tensor2(&[[0i8]]),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "u8_i8_1",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [2]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[0u8, 0]]]),
            bias: None,
            data: tensor2(&[[-9i8, 0]]),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let qp = qp_noop_i8();
    suite.add(
        "u8_i8_2",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[0u8]], [[0]]]),
            bias: None,
            data: tensor2(&[[0i8]]),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(6i32);
    suite.add(
        "u8_u8_i8_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[4u8]]]),
            bias: None,
            data: tensor2(&[[0i8]]),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    qp[1] = tensor0(2f32);
    qp[3] = tensor0(2f32);
    qp[4] = tensor0(2i32);
    suite.add(
        "many_qps_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[1u8]]]),
            bias: None,
            data: tensor2(&[[0u8]]),
            qp,
            raw_output_dt: DatumType::U8,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(1i32);
    suite.add(
        "i32_output_0",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[0i8]]]),
            bias: None,
            data: tensor2(&[[0i8]]),
            qp,
            raw_output_dt: DatumType::I32,
        },
    );
    let mut qp = qp_noop_i8();
    qp[1] = tensor0(0.25f32);
    qp[2] = tensor0(1i32);
    qp[5] = tensor0(0.5f32);
    suite.add(
        "i32_output_1",
        QConvProblem {
            shape_in: CHW.from_n_c_hw(1, 1, [1]).unwrap(),
            co: 1,
            kernel_format: OIHW,
            group: 1,
            kernel: tensor3(&[[[20i8]]]),
            bias: None,
            data: tensor2(&[[94i8]]),
            qp,
            raw_output_dt: DatumType::I32,
        },
    );
    let mut qp = qp_noop_i8();
    qp[0] = tensor0(-3);
    suite.add(
        "bin_by_scalar_and_bin_unicast_selection_0",
        QConvProblem {
            shape_in: NHWC.from_n_c_hw(2, 2, [4, 4]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 2,
            kernel: tensor4(&[[[[1i8]]], [[[0i8]]]]),
            bias: None,
            data: Tensor::zero::<i8>(&[2, 4, 4, 2]).unwrap(),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[3] = tensor1(&[1f32, 1f32]);
    suite.add(
        "batch_vec_scale",
        QConvProblem {
            shape_in: NCHW.from_n_c_hw(2, 1, [2]).unwrap(),
            co: 2,
            kernel_format: OIHW,
            group: 1,
            kernel: Tensor::zero::<i8>(&[2, 1, 1]).unwrap(),
            bias: None,
            data: Tensor::zero::<i8>(&[2, 1, 2]).unwrap(),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );

    let mut qp = qp_noop_i8();
    qp[1] = tensor1(&[0.5f32]);
    qp[3] = tensor1(&[1f32; 18]);
    qp[5] = tensor1(&[0.5f32]);
    suite.add(
        "timeout_0",
        QConvProblem {
            shape_in: HWC.from_n_c_hw(1, 3, [1]).unwrap(),
            co: 18,
            kernel_format: OIHW,
            group: 3,
            kernel: Tensor::zero::<i8>(&[18, 1, 1]).unwrap(),
            bias: None,
            data: Tensor::zero::<i8>(&[1, 3]).unwrap(),
            qp,
            raw_output_dt: DatumType::I8,
        },
    );
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/deconv.rs
================================================
use DataFormat::*;
use KernelFormat::*;
use infra::Test;
use infra::TestResult;
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::ops::cnn::conv::KernelFormat;
use tract_core::ops::cnn::*;
use tract_core::ops::nn::*;
use tract_ndarray as ndarray;
use tract_ndarray::{prelude::*, *};

use crate::data_format;
use crate::kernel_format;
use crate::tensor;

#[derive(Debug, Clone, Default)]
pub struct DeconvProblemParams {}

#[derive(Debug, Clone)]
struct DeconvProblem {
    data_format: DataFormat,
    kernel_format: KernelFormat,
    padding: PaddingSpec,
    input: ArrayD<f32>,
    kernel: ArrayD<f32>,
    bias: Option<ArrayD<f32>>,
    strides: TVec<usize>,
    dilations: TVec<usize>,
    adjustments: TVec<usize>,
    group: usize,
}

impl Arbitrary for DeconvProblem {
    type Strategy = BoxedStrategy<DeconvProblem>;
    type Parameters = DeconvProblemParams;
    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
        (1usize..4)
            .prop_flat_map(|georank| {
                (
                    data_format(),
                    kernel_format(),
                    prop_oneof![Just(PaddingSpec::Valid), Just(PaddingSpec::SameUpper)],
                    1usize..3,                         // n
                    1usize..4,                         // ci / group
                    1usize..4,                         // co / group
                    vec(1usize..4, georank..=georank), // kernel shape
                    vec(1usize..8, georank..=georank), // image shape
                    vec(1usize..4, georank..=georank), // strides
                    vec(1usize..4, georank..=georank), // dilations
                    1usize..4,                         // group
                )
            })
            .prop_filter(
                "dilation, strides and shapes in SAME",
                |(_, _, pad, _, _, _, hwk, _, strides, dilations, _)| {
                    pad == &PaddingSpec::Valid
                        || tract_itertools::izip!(hwk, dilations, strides)
                            .all(|(k, d, s)| (k - 1) * d > s - 1)
                },
            )
            .prop_flat_map(
                |(
                    df,
                    kf,
                    pad,
                    n,
                    ci_over_group,
                    co_over_group,
                    hwk,
                    hwi,
                    strides,
                    dilations,
                    group,
                )| {
                    let mut kernel_shape = hwk;
                    match kf {
                        OIHW => {
                            kernel_shape.insert(0, co_over_group * group);
                            kernel_shape.insert(1, ci_over_group);
                        }
                        HWIO => {
                            kernel_shape.push(ci_over_group * group);
                            kernel_shape.push(co_over_group);
                        }
                        OHWI => {
                            kernel_shape.insert(0, co_over_group);
                            kernel_shape.push(ci_over_group * group);
                        }
                    };
                    let data_shape = df.from_n_c_hw(n, ci_over_group * group, hwi).unwrap();
                    (
                        Just(df),
                        Just(kf),
                        Just(pad),
                        tensor(&data_shape.shape),
                        tensor(&kernel_shape),
                        proptest::option::of(tensor(&[co_over_group * group])),
                        Just(strides),
                        Just(dilations),
                        Just(group),
                    )
                },
            )
            .prop_map(
                |(
                    data_format,
                    kernel_format,
                    padding,
                    input,
                    kernel,
                    bias,
                    strides,
                    dilations,
                    group,
                )| {
                    let adjustments = tvec!(0; kernel.ndim() - 2); // FIXME maybe
                    DeconvProblem {
                        data_format,
                        kernel_format,
                        padding,
                        input,
                        kernel,
                        bias,
                        strides: strides.into(),
                        dilations: dilations.into(),
                        adjustments,
                        group,
                    }
                },
            )
            .boxed()
    }
}

impl DeconvProblem {
    fn as_op(&self) -> TractResult<Deconv> {
        let pool_spec = PoolSpec::new(
            self.data_format,
            self.kernel_format.spatial_shape(self.kernel.shape()).into(),
            self.padding.clone(),
            Some(self.dilations.clone()),
            Some(self.strides.clone()),
            self.kernel_format.input_channels(self.kernel.shape(), self.group).into_owned(),
            self.kernel_format.output_channels(self.kernel.shape(), self.group).into_owned(),
        );
        let op = Deconv::new(pool_spec, self.kernel_format, self.adjustments.clone(), self.group);
        Ok(op)
    }

    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();
        let src = model.add_source("src", f32::fact(self.input.shape()))?;
        let kernel = model.add_const("kernel", self.kernel.clone().into_tensor())?;
        let bias =
            self.bias.as_ref().map(|b| b.clone().into_tensor()).unwrap_or_else(|| tensor0(0f32));
        let bias = model.add_const("bias", bias)?;
        let output = model.wire_node(
            "deconv",
            self.as_op().context("Generating op")?,
            &[src, kernel, bias],
        )?;
        model.select_output_outlets(&output)?;
        Ok(model)
    }

    fn reference(&self) -> TractResult<ArrayD<f32>> {
        use std::iter::once;
        let co = match self.kernel_format {
            KernelFormat::HWIO => self.kernel.shape()[self.kernel.ndim() - 1] * self.group,
            KernelFormat::OIHW => self.kernel.shape()[0],
            KernelFormat::OHWI => self.kernel.shape()[0] * self.group,
        };
        let input_shape = self.data_format.shape(self.input.shape())?;
        let n = if self.data_format.has_n() { self.input.shape()[0] } else { 1 };
        let kernel_hwdims = self.kernel_format.spatial_shape(self.kernel.shape());
        let valid_output_shape_geo: TVec<usize> = tract_itertools::izip!(
            input_shape.hw_dims(),
            kernel_hwdims,
            self.strides.iter(),
            self.dilations.iter()
        )
        .map(|(i, k, s, d)| (i - 1) * s + (k - 1) * d + 1)
        .collect();
        let paddings: TVec<(usize, usize)> = if self.padding == PaddingSpec::Valid {
            tvec![(0, 0); valid_output_shape_geo.len()]
        } else {
            tract_itertools::izip!(input_shape.hw_dims(), &valid_output_shape_geo, &self.strides)
                .map(|(i, o, s)| o - i * s)
                .map(|total| (total / 2, total - total / 2))
                .collect()
        };
        let output_shape_geo = if self.padding == PaddingSpec::Valid {
            valid_output_shape_geo
        } else {
            tract_itertools::izip!(input_shape.hw_dims(), &self.strides)
                .map(|(i, s)| i * s)
                .collect()
        };
        let output_shape = self.data_format.from_n_c_hw(n, co, output_shape_geo)?;
        let mut output = ArrayD::zeros(&*output_shape.shape);
        if let Some(b) = &self.bias {
            let mut bias_shape = tvec!(1; output_shape.rank());
            bias_shape[output_shape.c_axis()] = co;
            let b = b.clone().into_shape_with_order(&*bias_shape)?;
            output += &b;
        }
        let co_per_group = co / self.group;
        let ci_per_group = input_shape.c() / self.group;
        for n in 0..n {
            for g in 0..self.group {
                for co in 0..co_per_group {
                    for ci in 0..ci_per_group {
                        for hwi in indices(input_shape.hw_dims()) {
                            for hwk in indices(kernel_hwdims) {
                                let hwo: TVec<isize> = tract_itertools::izip!(
                                    hwi.slice().iter(),
                                    hwk.slice().iter(),
                                    self.strides.iter(),
                                    self.dilations.iter(),
                                    paddings.iter(),
                                )
                                .map(|(i, k, s, d, p)| (i * s + k * d) as isize - p.0 as isize)
                                .collect();
                                let hwo: TVec<usize> = if hwo.iter().all(|x| *x >= 0) {
                                    hwo.iter().map(|x| *x as usize).collect()
                                } else {
                                    continue;
                                };
                                let i = self.data_format.from_n_c_hw(
                                    n,
                                    ci + g * ci_per_group,
                                    hwi.slice(),
                                )?;
                                let o =
                                    self.data_format.from_n_c_hw(n, co + g * co_per_group, hwo)?;
                                let k: TVec<usize> = match self.kernel_format {
                                    OIHW => once(co + co_per_group * g)
                                        .chain(once(ci))
                                        .chain(hwk.slice().iter().cloned())
                                        .collect(),
                                    HWIO => hwk
                                        .slice()
                                        .iter()
                                        .cloned()
                                        .chain(once(ci + ci_per_group * g))
                                        .chain(once(co))
                                        .collect(),
                                    OHWI => once(co)
                                        .chain(hwk.slice().iter().cloned())
                                        .chain(once(ci + ci_per_group * g))
                                        .collect(),
                                };
                                if let Some(cell) = output.get_mut(&*o.shape) {
                                    *cell += self.input[&*i.shape] * self.kernel[&*k]
                                }
                            }
                        }
                    }
                }
            }
        }
        Ok(output)
    }
}

impl Test for DeconvProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference().context("Running reference")?.into_tensor();
        let mut model = self.tract().context("Generating model")?;
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let mut output = runtime.prepare(model)?.run(tvec![self.input.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<DeconvProblem>("proptest", DeconvProblemParams::default());

    suite.add(
        "trivial_0",
        DeconvProblem {
            data_format: NCHW,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr4(&[[[[0.0]]]]).into_dyn(),
            kernel: arr4(&[[[[0.0]]]]).into_dyn(),
            bias: None,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "hwc_0",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr3(&[[[0.0]], [[0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0]]]]).into_dyn(),
            bias: None,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "geo_0",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr3(&[[[0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0], [0.0]]]]).into_dyn(),
            bias: None,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "hwio_0",
        DeconvProblem {
            data_format: HWC,
            kernel_format: HWIO,
            padding: PaddingSpec::Valid,
            input: arr3(&[[[0.0]]]).into_dyn(),
            kernel: arr4(&[[[[0.0, 0.0]]]]).into_dyn(),
            bias: None,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "strides_1",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0.0], [1.0]]).into_dyn(),
            kernel: arr3(&[[[1.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(2),
            dilations: tvec!(1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "same_upper_1",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::SameUpper,
            input: arr2(&[[0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "same_upper_dil",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::SameUpper,
            input: arr2(&[[0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(2),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "same_upper_strides",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::SameUpper,
            input: arr2(&[[0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0, 0.0, 0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(2),
            dilations: tvec!(1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "channel_0",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0]], [[0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 1,
        },
    );

    suite.add(
        "group_0",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0.0, 0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0]], [[0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "group_1",
        DeconvProblem {
            data_format: HWC,
            kernel_format: HWIO,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0.0, 0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0], [0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "group_2",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: ndarray::arr2(&[[1.0, 0.0]]).into_dyn(),
            kernel: ndarray::arr3(&[[[1.0]], [[0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "group_3",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: ndarray::arr2(&[[0.0, 1.0]]).into_dyn(),
            kernel: ndarray::arr3(&[[[0.0]], [[1.0]], [[0.0]], [[0.0]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "group_4",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0f32, 1.]]).into_dyn(),
            kernel: arr3(&[[[0f32]], [[1.]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "group_hwio_0",
        DeconvProblem {
            data_format: CHW,
            kernel_format: HWIO,
            padding: PaddingSpec::Valid,
            input: Array2::from_shape_vec((4, 1), vec![0f32, 0., 1., 0.]).unwrap().into_dyn(),
            kernel: Array3::from_shape_vec((1, 4, 1), vec![0f32, 0., 1., 0.]).unwrap().into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "bias_0",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0]]]).into_dyn(),
            bias: Some(arr1(&[1.0f32]).into_dyn()),
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 1,
        },
    );

    suite.add(
        "bias_1",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0.0], [0.0]]).into_dyn(),
            kernel: arr3(&[[[0.0]]]).into_dyn(),
            bias: Some(arr1(&[1.0f32]).into_dyn()),
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 1,
        },
    );

    suite.add(
        "bias_2",
        DeconvProblem {
            data_format: CHW,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0f32, 1.]]).into_dyn(),
            kernel: arr3(&[[[1f32]], [[0.]]]).into_dyn(),
            bias: Some(arr1(&[0f32, 0.]).into_dyn()),
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 1,
        },
    );

    suite.add(
        "bias_group_0",
        DeconvProblem {
            data_format: CHW,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr2(&[[0f32], [1.]]).into_dyn(),
            kernel: arr3(&[[[1f32]], [[0.]]]).into_dyn(),
            bias: Some(arr1(&[0f32, 0.]).into_dyn()),
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 2,
        },
    );

    suite.add(
        "rank_5_with_group",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr4(&[[[[0.0, 0.0, 0.0, 1.0]]]]).into_dyn(),
            kernel: arr1(&[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0])
                .into_shape_with_order(vec![2, 2, 1, 2, 1])
                .unwrap()
                .into_dyn(),
            bias: None,
            strides: tvec!(1, 1, 1),
            dilations: tvec!(1, 1, 1),
            adjustments: tvec!(0, 0, 0),
            group: 2,
        },
    );

    suite.add(
        "issue_512_simplified",
        DeconvProblem {
            data_format: NCHW,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: ndarray::Array4::zeros([1, 4, 1, 1]).into_dyn(),
            kernel: ndarray::Array4::zeros([2, 2, 1, 1]).into_dyn(),
            bias: None,
            strides: tvec!(1, 1),
            dilations: tvec!(1, 1),
            adjustments: tvec!(0, 0),
            group: 2,
        },
    );

    suite.add(
        "issue_optim_2d",
        DeconvProblem {
            data_format: HWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: ndarray::Array3::zeros([2, 2, 1]).into_dyn(),
            kernel: ndarray::Array4::zeros([1, 1, 1, 1]).into_dyn(),
            bias: None,
            strides: tvec!(1, 2),
            dilations: tvec!(1, 1),
            adjustments: tvec!(0, 0),
            group: 1,
        },
    );

    suite.add(
        "foo",
        DeconvProblem {
            data_format: NHWC,
            kernel_format: OIHW,
            padding: PaddingSpec::Valid,
            input: arr3(&[[[0f32]], [[1.]]]).into_dyn(),
            kernel: arr3(&[[[1f32]]]).into_dyn(),
            bias: None,
            strides: tvec!(1),
            dilations: tvec!(1),
            adjustments: tvec!(0),
            group: 1,
        },
    );

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/downsample.rs
================================================
use infra::{Test, TestSuite};
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ops::Downsample;

#[derive(Debug, Clone, Default)]
struct DownsampleProblem {
    input_shape: Vec<usize>,
    op: Downsample,
}

impl Arbitrary for DownsampleProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<DownsampleProblem>;
    fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
        vec(1..10usize, 1..5usize)
            .prop_flat_map(|input_shape| {
                let rank = input_shape.len();
                let stride_and_modulo =
                    (1..4usize).prop_flat_map(|stride| (Just(stride as isize), 0..stride));
                (Just(input_shape), 0..rank, stride_and_modulo, any::<bool>())
            })
            .prop_map(|(input_shape, axis, (stride, modulo), backward)| {
                let modulo = if backward { 0 } else { modulo.min(input_shape[axis] - 1) };
                let stride = if backward { -stride } else { stride };
                DownsampleProblem { input_shape, op: Downsample { axis, stride, modulo } }
            })
            .boxed()
    }
}

impl DownsampleProblem {
    fn reference(&self, input: &Tensor) -> TractResult<Tensor> {
        let len = input.shape()[self.op.axis];
        let mut slices = vec![];
        let mut current = if self.op.stride > 0 {
            self.op.modulo as isize
        } else {
            (len - 1 - self.op.modulo) as isize
        };
        while current >= 0 && current < input.shape()[self.op.axis] as isize {
            slices.push(input.slice(self.op.axis, current as usize, current as usize + 1)?);
            current += self.op.stride;
        }
        Tensor::stack_tensors(self.op.axis, &slices)
    }
}

impl Test for DownsampleProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let mut input = Tensor::zero::<f32>(&self.input_shape)?;
        input
            .try_as_plain_mut()?
            .as_slice_mut::<f32>()?
            .iter_mut()
            .enumerate()
            .for_each(|(ix, x)| *x = ix as f32);

        let reference = self.reference(&input).context("Computing reference")?;

        let mut model = TypedModel::default();
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let wire = model.add_source("input", TypedFact::shape_and_dt_of(&input))?;
        let output = model.wire_node("downsample", self.op.clone(), &[wire])?;
        model.select_output_outlets(&output)?;
        let mut output = runtime.prepare(model)?.run(tvec![input.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<DownsampleProblem>("proptest", ());

    suite.add_test(
        "neg_0",
        DownsampleProblem {
            input_shape: vec![1],
            op: Downsample { axis: 0, stride: -1, modulo: 0 },
        },
    );

    suite.add_test(
        "neg_1",
        DownsampleProblem {
            input_shape: vec![2],
            op: Downsample { axis: 0, stride: -2, modulo: 0 },
        },
    );

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/elmwise.rs
================================================
use std::ops::Div;

use infra::{Test, TestResult, TestSuite};
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ndarray::ArrayD;
use tract_core::num_traits::{AsPrimitive, FromPrimitive, Num, ToPrimitive};
use tract_core::ops::element_wise::ElementWiseOp;

#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
pub enum ElWiseOps {
    Neg,
    Abs,
    Sqr,
    Sqrt,
    Rsqrt,
    Recip,
    Ceil,
    Floor,
    Round,
    RoundHalfToEven,
    Exp,
    Sigmoid,
    Sin,
    Sinh,
    Asin,
    Asinh,
    Cos,
    Cosh,
    Acos,
    Acosh,
    Tan,
    Tanh,
    Atan,
    Atanh,
    // Erf, Erf is unstable in rust
    Ln,
}

pub const ALL_OPS: [ElWiseOps; 25] = [
    ElWiseOps::Neg,
    ElWiseOps::Abs,
    ElWiseOps::Sqr,
    ElWiseOps::Sqrt,
    ElWiseOps::Rsqrt,
    ElWiseOps::Recip,
    ElWiseOps::Ceil,
    ElWiseOps::Floor,
    ElWiseOps::Round,
    ElWiseOps::RoundHalfToEven,
    ElWiseOps::Exp,
    ElWiseOps::Sigmoid,
    ElWiseOps::Sin,
    ElWiseOps::Sinh,
    ElWiseOps::Asin,
    ElWiseOps::Asinh,
    ElWiseOps::Cos,
    ElWiseOps::Cosh,
    ElWiseOps::Acos,
    ElWiseOps::Acosh,
    ElWiseOps::Tan,
    ElWiseOps::Tanh,
    ElWiseOps::Atan,
    ElWiseOps::Atanh,
    //ElWiseOps::Erf,
    ElWiseOps::Ln,
];

pub trait SupportedElement:
    Datum
    + Num
    + Copy
    + FromPrimitive
    + ToPrimitive
    + 'static
    + Div<Output = Self>
    + PartialOrd
    + AsPrimitive<usize>
    + AsPrimitive<f32>
    + AsPrimitive<f64>
{
}

impl<T> SupportedElement for T where
    T: Datum
        + Num
        + Copy
        + FromPrimitive
        + ToPrimitive
        + 'static
        + Div<Output = Self>
        + PartialOrd
        + AsPrimitive<usize>
        + AsPrimitive<f32>
        + AsPrimitive<f64>
{
}

#[derive(Debug, Clone)]
pub struct ElWiseOpProblem<T>
where
    T: SupportedElement,
{
    pub op: ElWiseOps,
    pub input: ArrayD<T>,
}

impl<T> Arbitrary for ElWiseOpProblem<T>
where
    T: SupportedElement,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
        let shape_strategy = prop::collection::vec(1usize..=5, 0..=4);

        shape_strategy
            .prop_flat_map(|shape| {
                let len = shape.iter().product::<usize>();
                let input = vec(
                    (2u8..=10u8).prop_map(|i| T::from_u8(i).unwrap() / T::from_u8(2).unwrap()),
                    len..=len,
                )
                .prop_map(move |vec| ArrayD::from_shape_vec(shape.to_vec(), vec).unwrap())
                .boxed();

                let mut ops = ALL_OPS.to_vec();

                if std::any::TypeId::of::<T>() == std::any::TypeId::of::<u8>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<u16>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<u32>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<u64>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<i8>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<i16>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<i32>()
                    || std::any::TypeId::of::<T>() == std::any::TypeId::of::<i64>()
                {
                    ops.retain(|op| {
                        matches!(op, ElWiseOps::Ceil | ElWiseOps::Floor | ElWiseOps::Round)
                    });
                }

                let op_strategy = prop::sample::select(ops);
                (input, op_strategy)
            })
            .prop_map(|(input, op)| ElWiseOpProblem { input, op })
            .boxed()
    }
}

pub fn to_tract_op(op: &ElWiseOps) -> Box<dyn TypedOp> {
    let el_mini_op: Box<dyn ElementWiseMiniOp> = match op {
        ElWiseOps::Neg => Box::new(tract_core::ops::math::Neg {}),
        ElWiseOps::Abs => Box::new(tract_core::ops::math::Abs {}),
        ElWiseOps::Sqr => Box::new(tract_core::ops::math::Square {}),
        ElWiseOps::Sqrt => Box::new(tract_core::ops::math::Sqrt {}),
        ElWiseOps::Rsqrt => Box::new(tract_core::ops::math::Rsqrt {}),
        ElWiseOps::Recip => Box::new(tract_core::ops::math::Recip {}),
        ElWiseOps::Ceil => Box::new(tract_core::ops::math::Ceil {}),
        ElWiseOps::Floor => Box::new(tract_core::ops::math::Floor {}),
        ElWiseOps::Round => Box::new(tract_core::ops::math::Round {}),
        ElWiseOps::RoundHalfToEven => Box::new(tract_core::ops::math::RoundHalfToEven {}),
        ElWiseOps::Exp => Box::new(tract_core::ops::math::Exp {}),
        ElWiseOps::Sigmoid => Box::new(tract_core::ops::nn::Sigmoid {}),
        ElWiseOps::Sin => Box::new(tract_core::ops::math::Sin {}),
        ElWiseOps::Sinh => Box::new(tract_core::ops::math::Sinh {}),
        ElWiseOps::Asin => Box::new(tract_core::ops::math::Asin {}),
        ElWiseOps::Asinh => Box::new(tract_core::ops::math::Asinh {}),
        ElWiseOps::Cos => Box::new(tract_core::ops::math::Cos {}),
        ElWiseOps::Cosh => Box::new(tract_core::ops::math::Cosh {}),
        ElWiseOps::Acos => Box::new(tract_core::ops::math::Acos {}),
        ElWiseOps::Acosh => Box::new(tract_core::ops::math::Acosh {}),
        ElWiseOps::Tan => Box::new(tract_core::ops::math::Tan {}),
        ElWiseOps::Tanh => Box::new(tract_core::ops::math::Tanh {}),
        ElWiseOps::Atan => Box::new(tract_core::ops::math::Atan {}),
        ElWiseOps::Atanh => Box::new(tract_core::ops::math::Atanh {}),
        //ElWiseOps::Erf => Box::new(tract_core::ops::math::Erf {}),
        ElWiseOps::Ln => Box::new(tract_core::ops::math::Ln {}),
    };
    Box::new(ElementWiseOp(el_mini_op, None))
}

fn eval_reference<FI: Datum, FO: Datum>(
    a: &Tensor,
    func: impl Fn(&mut FO, &FI),
) -> TractResult<Tensor> {
    let mut out = unsafe { Tensor::uninitialized_dt(FO::datum_type(), a.shape())? };
    let a_view = a.to_plain_array_view::<FI>()?;
    let mut c_plain = out.try_as_plain_mut()?;
    let mut c = c_plain.to_array_view_mut::<FO>()?;
    tract_core::ndarray::Zip::from(&mut c).and_broadcast(a_view).for_each(func);
    Ok(out)
}

impl<T> ElWiseOpProblem<T>
where
    T: SupportedElement,
{
    fn f32_elmwise<F>(a: &T, op: F) -> T
    where
        T: SupportedElement,
        F: Fn(f32) -> f32,
    {
        let a_f32 = a.to_f32().unwrap();
        T::from_f32(op(a_f32)).unwrap()
    }

    pub fn reference(&self) -> TractResult<Tensor> {
        let inp = self.input.clone().into_tensor();

        let res = match self.op {
            ElWiseOps::Neg => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, |x| -x))?,
            ElWiseOps::Abs => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::abs))?,
            ElWiseOps::Sqr => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, |x| x * x))?,
            ElWiseOps::Sqrt => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::sqrt))?,
            ElWiseOps::Rsqrt => {
                eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, |x| 1.0 / x.sqrt()))?
            }
            ElWiseOps::Recip => {
                eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, |x| 1.0 / x))?
            }
            ElWiseOps::Ceil => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::ceil))?,
            ElWiseOps::Floor => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::floor))?,
            ElWiseOps::Round => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::round))?,
            ElWiseOps::RoundHalfToEven => {
                eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::round_ties_even))?
            }
            ElWiseOps::Exp => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::exp))?,
            ElWiseOps::Sigmoid => {
                eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, |x| 1. / (1. + (-x).exp())))?
            }
            ElWiseOps::Sin => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::sin))?,
            ElWiseOps::Sinh => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::sinh))?,
            ElWiseOps::Asin => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::asin))?,
            ElWiseOps::Asinh => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::asinh))?,
            ElWiseOps::Cos => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::cos))?,
            ElWiseOps::Cosh => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::cosh))?,
            ElWiseOps::Acos => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::acos))?,
            ElWiseOps::Acosh => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::acosh))?,
            ElWiseOps::Tan => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::tan))?,
            ElWiseOps::Tanh => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::tanh))?,
            ElWiseOps::Atan => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::atan))?,
            ElWiseOps::Atanh => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::atanh))?,
            ElWiseOps::Ln => eval_reference(&inp, |c, a| *c = Self::f32_elmwise(a, f32::ln))?,
        };
        Ok(res)
    }

    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();

        let input = model
            .add_source("input", TypedFact::shape_and_dt_of(&self.input.clone().into_tensor()))?;

        let output = model.wire_node("bin_op", to_tract_op(&self.op), &[input])?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }
}

impl<T> Test for ElWiseOpProblem<T>
where
    T: SupportedElement,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference()?;
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut output = runtime.prepare(model)?.run(tvec![self.input.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<ElWiseOpProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<ElWiseOpProblem<f16>>("proptest_f16", ());
    suite.add_arbitrary::<ElWiseOpProblem<u8>>("proptest_u8", ());
    suite.add_arbitrary::<ElWiseOpProblem<i8>>("proptest_i8", ());
    suite.add_arbitrary::<ElWiseOpProblem<u32>>("proptest_u32", ());
    suite.add_arbitrary::<ElWiseOpProblem<i16>>("proptest_i16", ());
    suite.add_arbitrary::<ElWiseOpProblem<i64>>("proptest_i64", ());

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/gelu_approximate.rs
================================================
use std::f32::consts::PI;

use infra::Test;
use infra::TestResult;
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ndarray::ArrayD;
use tract_core::num_traits::Float;
use tract_transformers::ops::gelu_approximate::gelu_approximate;

use crate::tensor;

#[derive(Debug, Clone)]
pub struct GeluApproximateProblem<F>
where
    F: Datum + Float,
{
    input: ArrayD<F>,
    fast_impl: bool,
}

impl<F> Arbitrary for GeluApproximateProblem<F>
where
    F: Datum + Float,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<GeluApproximateProblem<F>>;

    fn arbitrary_with(_params: Self::Parameters) -> Self::Strategy {
        (0usize..5)
            .prop_flat_map(|rank| {
                let other_dim = 1usize..10;
                vec(other_dim, rank..=rank)
            })
            .prop_flat_map(|shape| {
                (tensor::<F>(&shape), any::<bool>())
                    .prop_map(move |(input, fast_impl)| Self { input, fast_impl })
            })
            .boxed()
    }
}

impl<F> GeluApproximateProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();
        let input = self.input.clone().into_tensor();
        let input = model.add_source("input", TypedFact::shape_and_dt_of(&input))?;

        let output = model.wire_node("gelu", gelu_approximate(self.fast_impl), &[input])?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }

    fn reference(&self) -> ArrayD<F> {
        let input = &self.input;
        //0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)));
        input.mapv(|x| {
            let x_f32 = f32::from(x);
            let pow = if self.fast_impl { 2 } else { 3 };
            F::from(
                0.5 * x_f32
                    * (1. + ((2. / PI).sqrt() * (x_f32 + 0.044715 * x_f32.powi(pow))).tanh()),
            )
            .unwrap()
        })
    }
}

impl<F> Test for GeluApproximateProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference().into_tensor();
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut output = runtime.prepare(model)?.run(tvec!(self.input.clone().into_tvalue()))?;
        let output = output.remove(0).into_tensor();

        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<GeluApproximateProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<GeluApproximateProblem<f16>>("proptest_f16", ());

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/lib.rs
================================================
#![allow(clippy::manual_is_multiple_of)]
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::num_traits::Float;
use tract_core::ops::cnn::*;
use tract_core::ops::nn::*;
use tract_ndarray::*;

pub mod apply_rope;
pub mod bin_einsum;
pub mod binary;
pub mod conv_f16;
pub mod conv_f32;
pub mod conv_q;
pub mod deconv;
pub mod downsample;
pub mod elmwise;
pub mod gelu_approximate;
pub mod matmul_q40;
pub mod q_binary;
pub mod q_elmwise;
pub mod q_flavours;
pub mod q_helpers;
pub mod rms_norm;
pub mod scaled_masked_softmax;
pub mod sdpa;
pub mod silu;
pub mod slice;

pub fn suite() -> TractResult<TestSuite> {
    let mut suite: TestSuite = Default::default();
    suite.add("bin_einsum", bin_einsum::suite()?);
    suite.add("conv_f16", conv_f16::suite()?);
    suite.add("conv_f32", conv_f32::suite()?);
    suite.add("conv_q", conv_q::suite()?);
    suite.add("deconv", deconv::suite()?);
    suite.add("downsample", downsample::suite()?);
    suite.add("matmul_q40", matmul_q40::suite()?);
    suite.add("q_flavours", q_flavours::suite()?);
    suite.add("rms_norm", rms_norm::suite()?);
    suite.add("apply_rope", apply_rope::suite()?);
    suite.add("gelu_approximate", gelu_approximate::suite()?);
    suite.add("scaled_masked_softmax", scaled_masked_softmax::suite()?);
    suite.add("sdpa", sdpa::suite()?);
    suite.add("silu", silu::suite()?);
    suite.add("slice", slice::suite()?);
    suite.add("q_binary", q_binary::suite()?);
    suite.add("q_elmwise", q_elmwise::suite()?);
    suite.add("binary", binary::suite()?);
    suite.add("elmwise", elmwise::suite()?);
    Ok(suite)
}

pub fn tensor<'a, F: Datum + Float>(
    shape: impl IntoIterator<Item = &'a usize>,
) -> BoxedStrategy<ArrayD<F>> {
    let shape = shape.into_iter().copied().collect::<Vec<_>>();
    let len = shape.iter().product::<usize>();
    vec((-10i8..=10i8).prop_map(|i| F::from(i).unwrap()), len..=len)
        .prop_map(move |vec| ArrayD::from_shape_vec(shape.to_vec(), vec).unwrap())
        .boxed()
}
pub fn qtensor(shape: Vec<usize>) -> BoxedStrategy<ArrayD<i8>> {
    let len = shape.iter().product::<usize>();
    vec(any::<i8>(), len..=len)
        .prop_map(move |vec| ArrayD::from_shape_vec(shape.clone(), vec).unwrap())
        .boxed()
}

pub fn shapes(rank: usize) -> BoxedStrategy<(Vec<usize>, Vec<usize>)> {
    vec((1usize..5, 0usize..5).prop_map(|(k, exceed)| (k, k + exceed)), rank..=rank)
        .prop_map(|v| v.into_iter().unzip())
        .boxed()
}

pub fn data_format() -> impl Strategy<Value = DataFormat> {
    prop_oneof![
        Just(DataFormat::CHW),
        Just(DataFormat::HWC),
        Just(DataFormat::NCHW),
        Just(DataFormat::NHWC)
    ]
}

pub fn kernel_format() -> impl Strategy<Value = KernelFormat> {
    prop_oneof![
        Just(KernelFormat::OIHW),
        /* Just(KernelFormat::OHWI), */ Just(KernelFormat::HWIO)
    ]
}


================================================
FILE: test-rt/suite-unit/src/matmul_q40.rs
================================================
use std::fmt;

use infra::{Test, TestResult, TestSuite};
use proptest::prelude::*;
use proptest::strategy::BoxedStrategy;
use tract_core::internal::*;
use tract_core::ndarray::Ix2;
use tract_core::ops::array::{Pad, PadMode};
use tract_core::ops::konst::Const;
use tract_core::tract_linalg::block_quant::{
    BlockQuant, BlockQuantFact, BlockQuantStorage, Q4_0, Q8_1,
};
use tract_ndarray::{ArrayD, Axis};

use tract_core::ops::einsum::EinSum;

#[derive(Debug, Clone, Default)]
pub struct MatmulQ40ProblemParams {
    weights_in_b: bool,
}

#[derive(Clone)]
pub struct MatmulQ40Problem {
    a: Tensor,
    b: Tensor,
    weights_in_b: bool,
}

impl std::fmt::Debug for MatmulQ40Problem {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "a:{:?} b:{:?} weights_in_b:{:?}", self.a, self.b, self.weights_in_b)
    }
}

impl Arbitrary for MatmulQ40Problem {
    type Parameters = MatmulQ40ProblemParams;
    type Strategy = BoxedStrategy<MatmulQ40Problem>;

    fn arbitrary_with(params: Self::Parameters) -> Self::Strategy {
        (1..10usize, 1..128usize, 1..10usize)
            .prop_flat_map(|(m, k, n)| {
                let a = mm_q40_tensor(&[m, k]);
                let b = mm_q40_tensor(&[n, k]);

                (a, b)
            })
            .prop_map(move |(a, b)| MatmulQ40Problem { a, b, weights_in_b: params.weights_in_b })
            .boxed()
    }
}

fn mm_q40_tensor(shape: &[usize]) -> BoxedStrategy<Tensor> {
    let len = shape.iter().product::<usize>();
    let shape: Vec<usize> = shape.into();
    proptest::collection::vec((-100i8..=100i8).prop_map(|i| i as f32 / 100f32), len..=len)
        .prop_map(move |vec| ArrayD::from_shape_vec(shape.clone(), vec).unwrap().into_tensor())
        .boxed()
}

impl MatmulQ40Problem {
    fn pad_tensor(a: &Tensor, k_axis: usize) -> TractResult<Tensor> {
        let (mn, k) = (a.shape()[1 - k_axis], a.shape()[k_axis]);
        let shape =
            if k_axis == 0 { [k.next_multiple_of(32), mn] } else { [mn, k.next_multiple_of(32)] };
        let mut padded_a = Tensor::zero::<f32>(&shape)?;
        {
            let mut padded_a_plain = padded_a.try_as_plain_mut()?;
            padded_a_plain
                .to_array_view_mut::<f32>()?
                .slice_axis_move(Axis(k_axis), (0..k).into())
                .assign(&a.to_plain_array_view::<f32>()?);
        };

        Ok(padded_a)
    }

    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();

        let padded_a = Self::pad_tensor(&self.a, 1)?;

        let quant_a = Q4_0.quant_f32(padded_a.try_as_plain()?.as_slice::<f32>()?)?;

        let m = padded_a.shape()[0];
        let k = padded_a.shape()[1];
        let bqs = BlockQuantStorage::new(Box::new(Q4_0), m, k, Arc::new(quant_a))?;
        let bqf = BlockQuantFact::new(Box::new(Q4_0), tvec!(1, m, k));
        let packed_a = Arc::new(bqs.into_tensor_with_shape(f32::datum_type(), &[1, m, k]));

        let a =
            model.wire_node("a", Const::new_with_exotic_fact(packed_a, Box::new(bqf))?, &[])?[0];
        let b = model.add_source("b", TypedFact::shape_and_dt_of(&self.b))?;

        let k = self.b.shape()[1];
        let padded_b = model.wire_node(
            "pad_b",
            Pad::new(
                vec![(0, 0), (0, k.next_multiple_of(32) - k)],
                PadMode::Constant(rctensor0(0f32)),
            ),
            &[b],
        )?[0];

        let inputs = if !self.weights_in_b { [a, padded_b] } else { [padded_b, a] };
        // Block-quant tensor is rank 3 [G=1, rows, K]; add group dim to its axes
        let axes_str = if !self.weights_in_b { "gmk,nk->mn" } else { "mk,gnk->mn" };
        let output = model.wire_node(
            "einsum",
            EinSum { axes: axes_str.parse()?, operating_dt: f32::datum_type(), q_params: None },
            &inputs,
        )?;

        model.select_output_outlets(&output)?;
        //let test = model.node_by_name("einsum")?.op.as_op().downcast_ref::<EinSum>().unwrap();

        //let test1 = model.node_by_name("einsum")?.op.as_op().downcast_ref::<EinSum>().unwrap();
        //dbg!(&test1.axes);
        Ok(model)
    }

    fn reference(&self, simulate_q81_activation_loss: bool) -> TractResult<Tensor> {
        let padded_a = Self::pad_tensor(&self.a, 1)?;
        let quant_dequant_a = Q4_0.simulate_precision_loss(padded_a, 1)?;

        // The GGML CUDA kernel internally quantizes activations to Q8_1.
        // When testing against such a runtime, the reference must account
        // for that additional precision loss.
        let quant_dequant_b = if simulate_q81_activation_loss {
            let padded_b = Self::pad_tensor(&self.b, 1)?;
            Q8_1.simulate_precision_loss(padded_b, 1)?
        } else {
            self.b.clone()
        };

        let mut a_view = quant_dequant_a
            .to_plain_array_view::<f32>()?
            .slice_axis_move(Axis(1), (0..self.a.shape()[1]).into());
        let mut b_view = quant_dequant_b
            .to_plain_array_view::<f32>()?
            .slice_axis_move(Axis(1), (0..self.b.shape()[1]).into());

        if self.weights_in_b {
            (a_view, b_view) = (b_view, a_view);
        }
        let c = a_view.into_dimensionality::<Ix2>()?.dot(&b_view.into_dimensionality::<Ix2>()?.t());
        Ok(c.into_tensor())
    }
}

impl Test for MatmulQ40Problem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        _approx: Approximation,
    ) -> TestResult {
        let uses_q81_activations = runtime.name().contains("cuda");
        let reference = self.reference(uses_q81_activations)?;
        //dbg!(&reference);
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let mut inputs = tvec![];

        inputs.push(self.b.clone().into());

        let mut output = runtime.prepare(model)?.run(inputs)?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, Approximation::SuperApproximate)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<MatmulQ40Problem>("proptest", MatmulQ40ProblemParams::default());
    suite.add_arbitrary::<MatmulQ40Problem>(
        "proptest_weights_in_b",
        MatmulQ40ProblemParams { weights_in_b: true },
    );

    suite.add(
        "minimal_inputs",
        MatmulQ40Problem { a: tensor2(&[[0f32]]), b: tensor2(&[[0f32]]), weights_in_b: false },
    );

    suite.add(
        "minimal_matvec",
        MatmulQ40Problem {
            a: tensor2(&[[-1f32]]),
            b: tensor2(&[[0f32], [-1f32]]),
            weights_in_b: false,
        },
    );

    suite.add(
        "minimal_matvec_weights_in_b_0",
        MatmulQ40Problem {
            a: tensor2(&[[0f32, 1f32]]),
            b: tensor2(&[[0f32, 1f32]]),
            weights_in_b: true,
        },
    );

    //  a:1,1,F32 0 b:1,1,F32 0
    suite.add(
        "minimal_matvec_weights_in_b_1",
        MatmulQ40Problem { a: tensor2(&[[0f32]]), b: tensor2(&[[0f32]]), weights_in_b: true },
    );

    suite.add(
        "minimal_matvec_weights_in_b_2",
        MatmulQ40Problem {
            a: tensor2(&[[0f32], [0f32]]),
            b: tensor2(&[[0f32]]),
            weights_in_b: true,
        },
    );

    // Reduced from proptest — k=87 (not a multiple of 32) triggers Q8_1
    // activation quantization mismatch between CUDA GGML kernel and CPU reference.
    suite.add("proptest_reduced_k87", {
        #[rustfmt::skip]
        let a_row2: &[f32] = &[
            -0.69, -0.19, 0.0, 0.07, 0.0, 0.19, 0.0, -0.19, -0.94, 0.0,
            0.82, 0.32, 0.0, 0.32, -0.07, -0.07, 0.69, -0.07, -0.98, 0.19,
            0.56, -0.56, 0.0, -0.68, 0.68, -0.19, -0.07, 0.19, -0.07, -0.19,
            0.8, -0.56, 0.57, -0.07, 0.19, 0.82, -0.32, -0.32, 0.0, 0.07,
            0.0, 0.0, -0.82, 0.07, 0.0, -0.44, 0.44, 0.32, 0.07, 0.57,
            0.57, 0.0, 0.0, 0.57, 0.44, -0.07, 0.0, 0.0, 0.82, 0.69,
            0.32, -0.82, 0.44, 0.99, 0.18, 0.42, 0.66, 0.3, 0.0, 0.66,
            0.78, 0.0, -0.43, 0.18, 0.3, 0.0, 0.0, 0.78, -0.43, 0.66,
            0.0, -0.78, 0.0, -0.95, 0.18, 0.66, 0.3,
        ];
        let mut a_data = vec![0f32; 3 * 87];
        a_data[2 * 87..].copy_from_slice(a_row2);
        let a = Tensor::from_shape(&[3, 87], &a_data).unwrap();

        #[rustfmt::skip]
        let b_row1: &[f32] = &[
            -0.34, 0.54, -0.3, 0.32, -0.3, -0.2, 0.0, -0.08, -1.0, -0.04,
            0.91, 0.43, 0.0, 0.65, -0.34, -0.46, 0.76, 0.0, 0.0, 0.0,
            -0.18, 0.65, 0.1, 0.58, -0.98, 0.54, 0.06, 0.0, 0.0, 0.17,
            -0.71, 0.0, 0.0, 0.0, 0.0, 0.97, 0.0, 0.21, 0.07, -0.01,
            -0.54, -0.12, 0.0, 0.0, 0.2, 0.21, -0.6, -0.09, -0.15, -0.41,
            0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.0, 0.0, -0.54, -0.8,
            0.0, -0.28, 0.0, -0.71, 0.98, -0.21, 0.0, 0.0, 0.0, -0.61,
            -0.32, 0.19, 0.16, 0.16, 0.0, 0.0, 0.0, 0.06, 0.36, 0.09,
            0.06, -0.4, -0.04, -0.51, 0.09, 0.73, 0.02,
        ];
        let mut b_data = vec![0f32; 7 * 87];
        b_data[87..2 * 87].copy_from_slice(b_row1);
        let b = Tensor::from_shape(&[7, 87], &b_data).unwrap();

        MatmulQ40Problem { a, b, weights_in_b: false }
    });

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/q_binary.rs
================================================
use infra::{Test, TestSuite};
use proptest::prelude::*;
use tract_core::internal::*;

use crate::q_helpers::*;

#[derive(Debug, Clone)]
struct QBinaryOpProblem {
    operator: tract_core::ops::binary::TypedBinOp,
    tensor_a: Tensor,
    tensor_b: Tensor,
    c_dt: DatumType,
}

impl Default for QBinaryOpProblem {
    fn default() -> QBinaryOpProblem {
        QBinaryOpProblem {
            operator: tract_core::ops::math::mul(),
            tensor_a: Tensor::default(),
            tensor_b: Tensor::default(),
            c_dt: DatumType::QU8(QParams::ZpScale { zero_point: 0, scale: 1. }),
        }
    }
}

impl QOpProblem for QBinaryOpProblem {
    fn reference_float_ops(&self) -> TractResult<Tensor> {
        let a = self.tensor_a.cast_to::<f32>()?.clone().into_owned();
        let b = self.tensor_b.cast_to::<f32>()?.clone().into_owned();
        Ok(self.operator.eval(tvec![a.into_tvalue(), b.into_tvalue()])?.remove(0).into_tensor())
    }
}

impl Arbitrary for QBinaryOpProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<QBinaryOpProblem>;

    fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
        let tested_operators = prop_oneof![
            Just(tract_core::ops::math::mul()),
            Just(tract_core::ops::math::div()),
            Just(tract_core::ops::math::add()),
            Just(tract_core::ops::math::sub()),
            Just(tract_core::ops::math::min()),
            Just(tract_core::ops::math::max()),
        ];
        (
            (1..20usize),
            any::<bool>(),
            any::<bool>(),
            any::<bool>(),
            (1..4usize),
            (1..4usize),
            (1..4usize),
            tested_operators,
        )
            .prop_flat_map(|(len, a_signed, b_signed, c_signed, a_scale, b_scale, c_scale, op)| {
                let a_dt = pick_signed_datum(a_signed);
                let b_dt = pick_signed_datum(b_signed);
                let c_dt = pick_signed_datum(c_signed);
                fn just_scale(scale: usize) -> Just<f32> {
                    Just(scale as f32 * 0.5)
                }
                (
                    // tensor a
                    Just(a_dt),
                    qtensor(vec![1], a_dt),
                    just_scale(a_scale),
                    qtensor(vec![len], a_dt),
                    // tensor b
                    Just(b_dt),
                    qtensor(vec![1], b_dt),
                    just_scale(b_scale),
                    qtensor(vec![len], b_dt),
                    // dt of c
                    Just(c_dt),
                    qtensor(vec![1], c_dt),
                    just_scale(c_scale),
                    Just(op),
                )
            })
            .prop_map(
                |(
                    a_dt,
                    a_zp,
                    a_scale,
                    a_values,
                    b_dt,
                    b_zp,
                    b_scale,
                    b_values,
                    c_dt,
                    c_zp,
                    c_scale,
                    op,
                )| {
                    let tensor_a =
                        build_qtensor(a_values.into_tensor(), a_dt, a_zp.into_tensor(), a_scale);
                    let tensor_b =
                        build_qtensor(b_values.into_tensor(), b_dt, b_zp.into_tensor(), b_scale);
                    let c_dt = c_dt.quantize(QParams::ZpScale {
                        zero_point: c_zp.into_tensor().cast_to_scalar::<i32>().unwrap(),
                        scale: c_scale,
                    });
                    QBinaryOpProblem { operator: op.to_owned(), tensor_a, tensor_b, c_dt }
                },
            )
            .prop_filter("div does not allow 0 divisor", |q_prob| {
                !(q_prob.operator.name().to_string().as_str().to_lowercase() == "div"
                    && q_prob
                        .tensor_b
                        .to_owned()
                        .cast_to_dt(DatumType::F32)
                        .unwrap()
                        .try_as_plain()
                        .unwrap()
                        .to_array_view()
                        .unwrap()
                        .iter()
                        .any(|x: &f32| *x == 0.0))
            })
            .boxed()
    }
}

impl Test for QBinaryOpProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let mut model = TypedModel::default();
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let a = model.add_source(
            "a",
            TypedFact::dt_shape(self.tensor_a.datum_type(), self.tensor_a.shape()),
        )?;
        let b = model.add_const("b", self.tensor_b.clone().into_arc_tensor())?;
        // we need to wire correctly output to the provided operator {
        let mut op = self.operator.clone();
        op.1 = Some(self.c_dt);
        // }
        let c = model.wire_node("c", op, &[a, b])?[0];
        model.select_output_outlets(&[c])?;

        let result = runtime
            .prepare(model)
            .context("Preparing model for runtime")?
            .run(tvec![self.tensor_a.clone().into_tvalue()])
            .context("Running model with runtime")?
            .remove(0)
            .into_tensor();
        self.check_ref_with_approx(result, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<QBinaryOpProblem>("proptest", ());
    // simplification 0 at declutter constant
    suite.add(
        "trivial_mul_0_case",
        QBinaryOpProblem {
            operator: tract_core::ops::math::mul(),
            tensor_a: qu8_tensor0(0u8, 0, 1.)?,
            tensor_b: qu8_tensor0(0u8, 0, 1.)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_mul_as_qu8_overflow_clamp",
        QBinaryOpProblem {
            operator: tract_core::ops::math::mul(),
            tensor_a: qu8_tensor1(&[1_u8, 2, 3, 128], 0, 1.)?,
            tensor_b: qu8_tensor1(&[4u8], 0, 1.)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_mul_as_qu8_non_neutral_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::mul(),
            tensor_a: qu8_tensor1(&[1_u8, 2, 3, 128], 3, 2.)?,
            tensor_b: qu8_tensor1(&[4u8], 3, 2.)?,
            c_dt: qu8_dt(3, 2.),
        },
    );

    suite.add(
        "trivial_mul_as_qu8_non_aligned_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::mul(),
            tensor_a: qu8_tensor1(&[3_u8, 4, 10, 25], 3, 4.5)?,
            tensor_b: qu8_tensor1(&[6u8], 4, 2.5)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_max_0_as_qu8_non_aligned_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::max(),
            tensor_a: qu8_tensor1(&[100_u8, 5, 110, 99], 100, 4.5)?,
            tensor_b: qu8_tensor1(&[100u8], 100, 4.5)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_min_15_as_qu8_non_aligned_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::min(),
            tensor_a: qu8_tensor1(&[5_u8, 9, 8, 20], 5, 4.)?,
            tensor_b: qu8_tensor1(&[15u8], 10, 3.)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_max_15_as_qu8_non_aligned_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::max(),
            tensor_a: qu8_tensor1(&[5_u8, 9, 8, 20], 5, 4.)?,
            tensor_b: qu8_tensor1(&[15u8], 10, 3.)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_add_as_qu8_non_aligned_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::add(),
            tensor_a: qu8_tensor1(&[3_u8, 4, 10, 25], 3, 4.5)?,
            tensor_b: qu8_tensor1(&[6u8], 4, 2.5)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "trivial_div_as_qu8_non_aligned_scale_and_offset",
        QBinaryOpProblem {
            operator: tract_core::ops::math::div(),
            tensor_a: qu8_tensor1(&[3_u8, 4, 10, 25], 3, 4.5)?,
            tensor_b: qu8_tensor1(&[6u8], 4, 2.5)?,
            c_dt: qu8_dt(0, 1.),
        },
    );

    suite.add(
        "bug_invalid_to_scalar_0",
        QBinaryOpProblem {
            operator: tract_core::ops::math::max(),
            tensor_a: qu8_tensor1(&[0u8, 0u8], 0, 0.5)?,
            tensor_b: qu8_tensor1(&[0u8, 0u8], 0, 0.5)?,
            c_dt: qu8_dt(0, 0.5),
        },
    );

    suite.add(
        "bug_invalid_to_scalar_1",
        QBinaryOpProblem {
            operator: tract_core::ops::math::max(),
            tensor_a: qu8_tensor1(&[0u8, 0u8], 0, 0.5)?,
            tensor_b: qu8_tensor1(&[0u8, 0u8], 0, 0.5)?,
            c_dt: qu8_dt(1, 0.5),
        },
    );

    suite.add(
        "bug_aligned_dt_0",
        QBinaryOpProblem {
            operator: tract_core::ops::math::mul(),
            tensor_a: qu8_tensor1(&[0u8, 0, 0, 0, 0], 95, 1.5)?,
            tensor_b: qu8_tensor1(&[0u8, 0, 0, 0, 0], 95, 1.5)?,
            c_dt: qu8_dt(95, 1.5),
        },
    );

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/q_elmwise.rs
================================================
use infra::{Test, TestSuite};
use proptest::prelude::*;
use tract_core::internal::*;

use crate::q_helpers::*;

#[derive(Debug, Clone)]
struct QElmWiseOpProblem {
    operator: tract_core::ops::element_wise::ElementWiseOp,
    tensor_input: Tensor,
    out_dt: DatumType,
}

impl QOpProblem for QElmWiseOpProblem {
    fn reference_float_ops(&self) -> TractResult<Tensor> {
        let inp = self.tensor_input.cast_to::<f32>()?.clone().into_owned();
        Ok(self.operator.eval(tvec![inp.into_tvalue()])?.remove(0).into_tensor())
    }
}

impl Arbitrary for QElmWiseOpProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<QElmWiseOpProblem>;

    fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
        let tested_operators = prop_oneof![
            Just(tract_core::ops::math::cos()),
            Just(tract_core::ops::math::tanh()),
            Just(tract_core::ops::nn::sigmoid())
        ];
        ((1..20usize), any::<bool>(), any::<bool>(), (1..10usize), (1..10usize), tested_operators)
            .prop_flat_map(|(len, inp_signed, out_signed, inp_scale, out_scale, op)| {
                let inp_dt = pick_signed_datum(inp_signed);
                let out_dt = pick_signed_datum(out_signed);
                fn just_scale(scale: usize) -> Just<f32> {
                    Just(scale as f32 * 0.5)
                }
                (
                    // tensor a
                    Just(inp_dt),
                    qtensor(vec![1], inp_dt),
                    just_scale(inp_scale),
                    qtensor(vec![len], inp_dt),
                    // dt of c
                    Just(out_dt),
                    qtensor(vec![1], out_dt),
                    just_scale(out_scale),
                    Just(op),
                )
            })
            .prop_map(|(inp_dt, inp_zp, inp_scale, inp_values, out_dt, out_zp, out_scale, op)| {
                let tensor_input = build_qtensor(
                    inp_values.into_tensor(),
                    inp_dt,
                    inp_zp.into_tensor(),
                    inp_scale,
                );
                let out_dt = out_dt.quantize(QParams::ZpScale {
                    zero_point: out_zp.into_tensor().cast_to_scalar::<i32>().unwrap(),
                    scale: out_scale,
                });
                QElmWiseOpProblem { operator: op.to_owned(), tensor_input, out_dt }
            })
            .boxed()
    }
}

impl Test for QElmWiseOpProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let mut model = TypedModel::default();
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let inp = model.add_source(
            "inp",
            TypedFact::dt_shape(self.tensor_input.datum_type(), self.tensor_input.shape()),
        )?;
        // we need to wire correctly output to the provided operator {
        let mut op = self.operator.clone();
        op.1 = Some(self.out_dt);
        // }
        let out = model.wire_node("out", op, &[inp])?[0];
        model.select_output_outlets(&[out])?;

        let result = runtime
            .prepare(model)?
            .run(tvec![self.tensor_input.clone().into_tvalue()])?
            .remove(0)
            .into_tensor();
        self.check_ref_with_approx(result, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<QElmWiseOpProblem>("proptest", ());

    suite.add(
        "tanh_sweep_case",
        QElmWiseOpProblem {
            operator: tract_core::ops::math::tanh(),
            tensor_input: qu8_tensor1(&(0u8..=100).collect::<Box<[u8]>>(), 50, 0.05)?,
            out_dt: qu8_dt(127, 0.001),
        },
    );

    suite.add(
        "cos_switch_qi8_to_qu8_case",
        QElmWiseOpProblem {
            operator: tract_core::ops::math::cos(),
            tensor_input: qi8_tensor1(&[-16], 39, 0.5)?,
            out_dt: qu8_dt(2, 0.5),
        },
    );
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/q_flavours.rs
================================================
use infra::{Test, TestSuite};
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ops::quant::{offset_i8_as_u8, offset_u8_as_i8};

use crate::q_helpers::qtensor;

#[derive(Debug, Clone, Default)]
struct QFlavoursProblem {
    input: Tensor,
}

impl Arbitrary for QFlavoursProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<QFlavoursProblem>;
    fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
        ((1..20usize), any::<bool>())
            .prop_flat_map(|(len, signed)| {
                let dt = if signed { DatumType::I8 } else { DatumType::U8 };
                (Just(dt), qtensor(vec![1], dt), qtensor(vec![len], dt))
            })
            .prop_map(|(dt, zp, values)| {
                let zp = zp.into_tensor().cast_to_scalar::<i32>().unwrap();
                let mut values = values.into_tensor();
                let dt = dt.quantize(QParams::ZpScale { zero_point: zp, scale: 1f32 });
                unsafe {
                    values.set_datum_type(dt);
                }
                QFlavoursProblem { input: values }
            })
            .boxed()
    }
}

impl Test for QFlavoursProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let mut model = TypedModel::default();
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let wire = model.add_source("input", TypedFact::shape_and_dt_of(&self.input))?;
        let output = if self.input.datum_type().is_signed() {
            model.wire_node("flavour", offset_i8_as_u8(), &[wire])?
        } else {
            model.wire_node("flavour", offset_u8_as_i8(), &[wire])?
        };
        model.select_output_outlets(&output)?;
        let output = runtime
            .prepare(model)?
            .run(tvec![self.input.clone().into_tvalue()])?
            .remove(0)
            .into_tensor();
        //dbg!(&output);
        let reference = self.input.cast_to::<f32>()?;
        let comparison = output.cast_to::<f32>()?;
        comparison.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<QFlavoursProblem>("proptest", ());
    suite.add(
        "trivial_0",
        QFlavoursProblem {
            input: tensor0(0u8)
                .cast_to_dt(
                    u8::datum_type().quantize(QParams::ZpScale { zero_point: 0, scale: 1. }),
                )
                .unwrap()
                .into_owned(),
        },
    );
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/q_helpers.rs
================================================
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_ndarray::*;

pub fn qtensor(shape: Vec<usize>, dt: DatumType) -> BoxedStrategy<Tensor> {
    assert!(dt.unquantized() == dt);
    let len = shape.iter().product::<usize>();
    let range = if dt.is_signed() { -100..100i32 } else { 0..100i32 };
    vec(range, len..=len)
        .prop_map(move |v| {
            ArrayD::from_shape_vec(shape.clone(), v)
                .unwrap()
                .into_tensor()
                .cast_to_dt(dt.unquantized())
                .unwrap()
                .into_owned()
        })
        .boxed()
}

pub fn build_qtensor(values: Tensor, dt: DatumType, zp: Tensor, scale: f32) -> Tensor {
    let mut values = values;
    let zp = zp.cast_to_scalar::<i32>().unwrap();
    let dt = dt.quantize(QParams::ZpScale { zero_point: zp, scale });
    unsafe {
        values.set_datum_type(dt);
    }
    values
}

pub fn pick_signed_datum(signed: bool) -> DatumType {
    if signed { DatumType::I8 } else { DatumType::U8 }
}

pub fn qu8_dt(zp: i32, scale: f32) -> DatumType {
    u8::datum_type().with_zp_scale(zp, scale)
}

pub fn qi8_dt(zp: i32, scale: f32) -> DatumType {
    i8::datum_type().with_zp_scale(zp, scale)
}

pub fn qu8_tensor(tensor: Tensor, zp: i32, scale: f32) -> TractResult<Tensor> {
    Ok(tensor.cast_to_dt(qu8_dt(zp, scale))?.into_owned())
}

pub fn qi8_tensor(tensor: Tensor, zp: i32, scale: f32) -> TractResult<Tensor> {
    Ok(tensor.cast_to_dt(qi8_dt(zp, scale))?.into_owned())
}

pub fn qu8_tensor0(value: u8, zp: i32, scale: f32) -> TractResult<Tensor> {
    qu8_tensor(tensor0(value), zp, scale)
}

pub fn qu8_tensor1(values: &[u8], zp: i32, scale: f32) -> TractResult<Tensor> {
    qu8_tensor(tensor1(values), zp, scale)
}

pub fn qi8_tensor1(values: &[i8], zp: i32, scale: f32) -> TractResult<Tensor> {
    qi8_tensor(tensor1(values), zp, scale)
}

pub trait QOpProblem {
    fn reference_float_ops(&self) -> TractResult<Tensor>;

    fn check_ref_with_approx(&self, result: Tensor, approx: Approximation) -> infra::TestResult {
        let mut reference = self.reference_float_ops()?;
        let out_dt = result.datum_type();
        let (zero_point, scale) = out_dt.zp_scale();
        let min_repr_val =
            (out_dt.unquantized().min_value().cast_to_scalar::<f32>()? - zero_point as f32) * scale;
        let max_repr_val =
            (out_dt.unquantized().max_value().cast_to_scalar::<f32>()? - zero_point as f32) * scale;

        reference
            .try_as_plain_mut()?
            .to_array_view_mut()?
            .iter_mut()
            .for_each(|x: &mut f32| *x = (*x).clamp(min_repr_val, max_repr_val));

        let mut fp_results = result.cast_to::<f32>()?.into_owned();

        let acceptable_scale_error_ratio = match approx {
            Approximation::Exact => 0.,
            Approximation::Approximate => 2.,
            _ => 3.,
        };
        assert!(
            tract_core::ndarray::Zip::from(fp_results.try_as_plain_mut()?.to_array_view_mut()?)
                .and(reference.try_as_plain()?.to_array_view()?)
                .all(|x: &mut f32, xref: &f32| {
                    let closest_x = (*x).clamp(min_repr_val, max_repr_val);
                    // core maximal accepted distance by default
                    (xref - closest_x).abs() <= scale * acceptable_scale_error_ratio
                })
        );
        Ok(())
    }
}


================================================
FILE: test-rt/suite-unit/src/rms_norm.rs
================================================
use std::fmt;

use infra::Test;
use infra::TestResult;
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ndarray::ArrayD;
use tract_core::num_traits::Float;
use tract_core::num_traits::FromPrimitive;
use tract_transformers::ops::rms_norm::RmsNorm;

use crate::tensor;

#[derive(Clone)]
pub struct RmsNormProblem<F>
where
    F: Datum + Float,
{
    input: Tensor,
    axis: usize,
    eps: f32,
    _phantom: PhantomData<F>,
}

impl<F> std::fmt::Debug for RmsNormProblem<F>
where
    F: Datum + Float,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Input:{:?} Axis:{:?} Epsilon:{:?}", self.input, self.axis, self.eps)
    }
}

impl<F> Arbitrary for RmsNormProblem<F>
where
    F: Datum + Float,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<RmsNormProblem<F>>;

    fn arbitrary_with(_params: Self::Parameters) -> Self::Strategy {
        (0usize..3, 0usize..3)
            .prop_flat_map(|(left, right)| {
                let axis = left;
                let shape_len = usize::min(left + right, 4);
                let iter_ax_dim = 1usize..50;
                let other_dim = 1usize..5;
                (iter_ax_dim, vec(other_dim, shape_len..=shape_len), Just(axis))
            })
            .prop_flat_map(|(iter_dim, mut shape, axis)| {
                shape.insert(axis, iter_dim);
                let input = tensor::<F>(&shape);
                (input, Just(axis), 0f32..=1e6).prop_map(|(input, axis, eps)| Self {
                    input: input.into(),
                    axis,
                    eps: eps / 1e5,
                    _phantom: PhantomData,
                })
            })
            .boxed()
    }
}

impl<F> RmsNormProblem<F>
where
    F: Datum + Float + FromPrimitive,
{
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();
        let input = model.add_source("input", TypedFact::shape_and_dt_of(&self.input))?;

        let output = model.wire_node(
            "rms_norm",
            RmsNorm { axis: self.axis, eps: tensor0(self.eps).into_arc_tensor() },
            &[input],
        )?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }

    fn reference(&self) -> ArrayD<F> {
        let input = self.input.cast_to::<f32>().unwrap();

        let a = input.to_plain_array_view::<f32>().unwrap().to_owned();
        let mean_square = a.pow2().mean_axis(tract_ndarray::Axis(self.axis)).unwrap();

        let norm = mean_square
            .mapv(|ms| (ms + self.eps).sqrt())
            .insert_axis(tract_ndarray::Axis(self.axis));
        let broadcasted_norm = norm.broadcast(a.raw_dim()).unwrap().to_owned();

        (a / broadcasted_norm).mapv(|x| F::from(x).unwrap())
    }
}

impl<F> Test for RmsNormProblem<F>
where
    F: Datum + Float + FromPrimitive,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference().into_tensor();
        //dbg!(&reference);
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut output = runtime.prepare(model)?.run(tvec!(self.input.clone().into()))?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<RmsNormProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<RmsNormProblem<f16>>("proptest_f16", ());

    suite.add(
        "trivial_f32_0",
        RmsNormProblem::<f32> {
            input: tensor1(&[0f32]),
            axis: 0,
            eps: 0f32,
            _phantom: PhantomData,
        },
    );

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/scaled_masked_softmax.rs
================================================
use core::f32;

use infra::Test;
use infra::TestResult;
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ndarray::{Array5, ArrayD, Axis};
use tract_core::num_traits::{Float, Zero};
use tract_transformers::ops::scaled_masked_softmax::ScaledMaskedSoftmax;

use crate::tensor;

#[derive(Debug, Clone)]
pub struct ScaledMaskedSoftmaxProblem<F>
where
    F: Datum + Float,
{
    input: ArrayD<F>,
    mask: ArrayD<F>,
    scale: F,
}

impl<F> Arbitrary for ScaledMaskedSoftmaxProblem<F>
where
    F: Datum + Float,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<ScaledMaskedSoftmaxProblem<F>>;

    fn arbitrary_with(_params: Self::Parameters) -> Self::Strategy {
        // ScaledMaskSoftmax accepts ranks 2 to 5
        (vec(1usize..20, 2..=5), any::<bool>(), any::<bool>())
            .prop_flat_map(|(q_shape, broadcast_batch, broadcast_head)| {
                let mut m_shape = q_shape.clone();
                if broadcast_batch {
                    m_shape[0] = 1
                };
                if broadcast_head {
                    m_shape[1] = 1
                };
                (tensor::<F>(&q_shape), tensor::<f32>(&m_shape), -10..=10i32).prop_map(
                    |(input, mask, scale)| {
                        let mask = mask.mapv(|x| {
                            if x >= 0. {
                                F::from(0).unwrap()
                            } else {
                                F::from(f32::NEG_INFINITY).unwrap()
                            }
                        });
                        let scale = if scale.is_zero() { 1.0 } else { scale as f32 / 10. };
                        Self {
                            input: input.into_dimensionality().unwrap(),
                            mask: mask.into_dimensionality().unwrap(),
                            scale: F::from(scale).unwrap(),
                        }
                    },
                )
            })
            .boxed()
    }
}

impl<F> ScaledMaskedSoftmaxProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();

        let input = model
            .add_source("input", TypedFact::shape_and_dt_of(&self.input.clone().into_tensor()))?;
        let mask = model
            .add_source("mask", TypedFact::shape_and_dt_of(&self.mask.clone().into_tensor()))?;

        let output = model.wire_node(
            "scaled_masked_softmax",
            ScaledMaskedSoftmax {
                scale: tensor0(self.scale).into_arc_tensor(),
                post_softmax_mask: false,
            },
            &[input, mask],
        )?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }

    fn softmax(input: &Array5<F>, axis: usize) -> TractResult<Array5<F>> {
        let axis = tract_ndarray::Axis(axis);

        let max_per_axis = input.map_axis(axis, |lane| {
            lane.fold(F::from(f32::NEG_INFINITY).unwrap(), |a, &b| a.max(b))
        });

        let shifted = input - &max_per_axis.insert_axis(axis);
        let exp = shifted.mapv(F::exp);
        let sum_exp = exp.sum_axis(axis);

        let norm = sum_exp.insert_axis(axis);

        Ok(&exp / &norm)
    }

    fn reference(&self) -> TractResult<ArrayD<F>> {
        ensure!(self.input.ndim() == self.mask.ndim());
        let mut input = self.input.view();
        let mut mask = self.mask.clone();
        while input.ndim() < 5 {
            input.insert_axis_inplace(Axis(0));
            mask.insert_axis_inplace(Axis(0));
        }

        let scaled_input = input.mapv(|x| x * self.scale);
        let masked_input = scaled_input + mask;

        let mut output = Self::softmax(&masked_input.into_dimensionality()?, 4)?.into_dyn();
        while output.ndim() > self.input.ndim() {
            output.index_axis_inplace(Axis(0), 0);
        }
        Ok(output)
    }
}

impl<F> Test for ScaledMaskedSoftmaxProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        ensure!(!self.scale.is_zero());
        let reference = self.reference()?.into_tensor();
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        //dbg!(&self.input, &self.mask);
        let mut output = runtime
            .prepare(model)?
            .run(tvec![self.input.clone().into_tvalue(), self.mask.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        // dbg!(&reference, &output);
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<ScaledMaskedSoftmaxProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<ScaledMaskedSoftmaxProblem<f16>>("proptest_f16", ());

    suite.add(
        "trivial_f32_0",
        ScaledMaskedSoftmaxProblem {
            input: tensor4(&[[[[0f32]]]]).into_plain_array()?.into_dimensionality()?,
            mask: tensor4(&[[[[0f32]]]]).into_plain_array()?.into_dimensionality()?,
            scale: 1f32,
        },
    );

    suite.add(
        "trivial_f32_1",
        ScaledMaskedSoftmaxProblem {
            input: tensor4(&[[[[0f32, 0f32], [0f32, 0f32]]]])
                .into_plain_array()?
                .into_dimensionality()?,
            mask: tensor4(&[[[[f32::NEG_INFINITY, 0f32], [0f32, 0f32]]]])
                .into_plain_array()?
                .into_dimensionality()?,
            scale: 1f32,
        },
    );

    suite.add(
        "trivial_f32_2",
        ScaledMaskedSoftmaxProblem {
            input: arr4(&[[[[0f32, 0f32]]]]).into_dimensionality()?,
            mask: arr4(&[[[[f32::NEG_INFINITY, 0f32]]]]).into_dimensionality()?,
            scale: 1f32,
        },
    );

    suite.add(
        "trivial_f32_3",
        ScaledMaskedSoftmaxProblem {
            input: tensor4(&[[[[0f32, 0f32]]]]).into_plain_array()?.into_dimensionality()?,
            mask: tensor4(&[[[[0f32, 0f32]]]]).into_plain_array()?.into_dimensionality()?,
            scale: 1f32,
        },
    );
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/sdpa.rs
================================================
use infra::{Test, TestSuite};
use proptest::{
    prelude::{Arbitrary, BoxedStrategy, Just, Strategy, any},
    prop_oneof,
};
use tract_core::internal::*;
use tract_core::ndarray::{ArrayD, ArrayView4, arr3};
use tract_core::num_traits::Float;
use tract_ndarray::{Array2, Array4, ArrayView2, Axis, Ix3, Ix4, IxDyn, s};
use tract_transformers::ops::sdpa::Sdpa;

#[derive(Debug, Clone)]
pub struct SdpaProblemParams {
    pub embed_dims: Vec<usize>,
}

impl Default for SdpaProblemParams {
    fn default() -> SdpaProblemParams {
        SdpaProblemParams { embed_dims: vec![1, 2, 3] }
    }
}

#[derive(Debug, Clone)]
pub struct SdpaProblem<F>
where
    F: Datum + Float,
{
    q: ArrayD<F>,
    pub k: ArrayD<F>,
    v: ArrayD<F>,
    mask: Option<ArrayD<F>>,
    scale: Option<f32>,
    is_causal: bool,
}

impl SdpaProblem<f32> {
    fn to_f16(&self) -> SdpaProblem<f16> {
        SdpaProblem {
            q: self.q.mapv(f16::from_f32),
            k: self.k.mapv(f16::from_f32),
            v: self.v.mapv(f16::from_f32),
            mask: self.mask.as_ref().map(|m| m.mapv(f16::from_f32)),
            scale: self.scale,
            is_causal: self.is_causal,
        }
    }
}

impl<F> Arbitrary for SdpaProblem<F>
where
    F: Datum + Float,
{
    type Parameters = SdpaProblemParams;
    type Strategy = BoxedStrategy<SdpaProblem<F>>;

    fn arbitrary_with(params: Self::Parameters) -> Self::Strategy {
        prop_oneof![
            generate_3d_single_head::<F>(params.clone()),
            generate_4d_group_query_att::<F>(params, 4, 4)
        ]
        .boxed()
    }
}

fn generate_3d_single_head<F: Datum + Float>(
    params: SdpaProblemParams,
) -> BoxedStrategy<SdpaProblem<F>> {
    use tract_ndarray::Axis;
    generate_4d_group_query_att::<F>(params, 1, 1)
        .prop_map(|mut gqa| {
            gqa.q.index_axis_inplace(Axis(1), 0);
            gqa.k.index_axis_inplace(Axis(1), 0);
            gqa.v.index_axis_inplace(Axis(1), 0);
            if let Some(m) = &mut gqa.mask {
                m.index_axis_inplace(Axis(1), 0);
            }
            gqa
        })
        .boxed()
}

fn sdpa_tensor<F: Datum + Float>(shape: &[usize]) -> BoxedStrategy<ArrayD<F>> {
    let len = shape.iter().product::<usize>();
    let shape: Vec<usize> = shape.into();
    proptest::collection::vec(
        // (-80i8..=80i8).prop_map(|i| F::from(i as f32 / 100f32).unwrap()),
        (-3..=3).prop_map(|x| F::from(x as f32 / 8.).unwrap()),
        len..=len,
    )
    .prop_map(move |vec| ArrayD::from_shape_vec(shape.clone(), vec).unwrap())
    .boxed()
}

fn generate_4d_group_query_att<F: Datum + Float>(
    params: SdpaProblemParams,
    max_heads_repeat_factor: usize,
    max_kv_heads: usize,
) -> BoxedStrategy<SdpaProblem<F>> {
    (
        1..3usize,
        1..max_heads_repeat_factor + 1,
        1..max_kv_heads + 1,
        0..5usize,
        2..5usize,
        0..params.embed_dims.len(),
    )
        .prop_flat_map(move |(b, repeat_factor, n_kv_heads, past_seq_len, seq_len, embed_idx)| {
            let embed = params.embed_dims[embed_idx];
            let n_q_heads = repeat_factor * n_kv_heads;
            let q = sdpa_tensor::<F>(&[b, n_q_heads, seq_len, embed]);
            let k = sdpa_tensor::<F>(&[b, n_kv_heads, past_seq_len + seq_len, embed]);
            let v = sdpa_tensor::<F>(&[b, n_kv_heads, past_seq_len + seq_len, embed]);

            let scale_strategy = prop_oneof![Just(None), (0.1f32..1.0).prop_map(Some)];
            let mask = (any::<bool>(), any::<bool>(), any::<bool>());
            (mask, Just(past_seq_len), Just(seq_len), q, k, v, scale_strategy)
        })
        .prop_flat_map(|((full_b, full_h, causal), past_seq_len, seq_len, q, k, v, scale)| {
            let mask_strategy = if causal {
                Just(None).boxed()
            } else {
                let mask_b = if full_b { q.shape()[0] } else { 1 };
                let mask_h = if full_h { q.shape()[1] } else { 1 };
                prop_oneof![
                    Just(None),
                    sdpa_tensor(&[mask_b, mask_h, seq_len, past_seq_len + seq_len]).prop_map(Some)
                ]
                .boxed()
            };

            (Just(q), Just(k), Just(v), Just(scale), Just(causal), mask_strategy)
        })
        .prop_map(|(q, k, v, scale, is_causal, mask)| SdpaProblem {
            q,
            k,
            v,
            mask: mask.map(|m| m.into_dimensionality().unwrap()),
            scale,
            is_causal,
        })
        .boxed()
}

impl<F> SdpaProblem<F>
where
    F: Datum + Float + Copy + 'static,
{
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();

        let q = self.q.clone().into_tensor();
        let k = self.k.clone().into_tensor();
        let v = self.v.clone().into_tensor();

        ensure!(q.rank() == k.rank());
        ensure!(q.rank() == v.rank());

        let scale = self.scale.map(tensor0);

        let q_in = model.add_source("Q", TypedFact::shape_and_dt_of(&q))?;
        let k_in = model.add_source("K", TypedFact::shape_and_dt_of(&k))?;
        let v_in = model.add_source("V", TypedFact::shape_and_dt_of(&v))?;
        let mut inputs = vec![q_in, k_in, v_in];

        if let Some(mask) = &self.mask {
            ensure!(mask.ndim() == q.rank());
            let mask_in = model
                .add_source("mask", TypedFact::shape_and_dt_of(&mask.clone().into_tensor()))?;
            inputs.push(mask_in);
        }

        let dt = q.datum_type();
        let output = model.wire_node(
            "SDPA",
            Sdpa {
                scale,
                datum_type: dt,
                acc_datum_type: DatumType::F32,
                is_causal: self.is_causal,
            },
            &inputs,
        )?;
        model.select_output_outlets(&output)?;
        model.into_decluttered()
    }

    fn softmax(input: &Array2<f32>, axis: usize) -> Array2<f32> {
        let axis = tract_ndarray::Axis(axis);

        let max_per_axis =
            input.map_axis(axis, |lane| lane.fold(f32::NEG_INFINITY, |a, &b| a.max(b)));

        let shifted = input - &max_per_axis.insert_axis(axis);
        let exp = shifted.mapv(f32::exp);
        let sum_exp = exp.sum_axis(axis);

        let norm = sum_exp.insert_axis(axis);

        &exp / &norm
    }

    fn scaled_dot_product_attention_2d(
        queries: &ArrayView2<F>,
        keys: &ArrayView2<F>,
        values: &ArrayView2<F>,
        mask: Option<&ArrayView2<f32>>,
        scale: Option<f32>,
        is_causal: bool,
    ) -> Array2<F> {
        let d_k = keys.shape()[1] as f32;
        let scale_factor = scale.unwrap_or(1.0 / d_k.sqrt());
        let queries = queries.mapv(|q| q * F::from(scale_factor).unwrap());

        let queries_f32 = queries.mapv(|q| q.to_f32().unwrap());
        let keys_f32 = keys.mapv(|k| k.to_f32().unwrap());
        let values_f32 = values.mapv(|v| v.to_f32().unwrap());

        let mut scaled_input = queries_f32.dot(&keys_f32.t());

        if is_causal {
            let (q_len, k_len) = (queries.nrows(), keys.nrows());
            let p = k_len.saturating_sub(q_len);
            scaled_input.indexed_iter_mut().for_each(|((r, c), z)| {
                if c > p + r {
                    *z = f32::NEG_INFINITY;
                }
            });
        }

        if let Some(m) = mask {
            scaled_input += m;
        }

        let att_weights = Self::softmax(&scaled_input, 1);
        att_weights.dot(&values_f32).mapv(|r| F::from(r).unwrap())
    }

    fn reference_4d(
        &self,
        q: ArrayView4<F>,
        k: ArrayView4<F>,
        v: ArrayView4<F>,
        mask: Option<ArrayView4<f32>>,
    ) -> Array4<F> {
        let [b, q_heads, seq_len, _] = q.shape() else { unreachable!() };
        let [_, kv_heads, _, v_emb] = v.shape() else { unreachable!() };
        let mut output = Array4::<F>::zeros((*b, *q_heads, *seq_len, *v_emb));
        let repeat_factor = q_heads / kv_heads;

        for batch_idx in 0..*b {
            for kv_head_idx in 0..*kv_heads {
                for q_head_idx_in_group in 0..repeat_factor {
                    let q_head_idx = q_head_idx_in_group + repeat_factor * kv_head_idx;

                    let q_slice = q.slice(s![batch_idx, q_head_idx, .., ..]);
                    let k_slice = k.slice(s![batch_idx, kv_head_idx, .., ..]);
                    let v_slice = v.slice(s![batch_idx, kv_head_idx, .., ..]);
                    let mask_slice: Option<ArrayView2<f32>> = mask.as_ref().map(|m| {
                        m.slice(s![
                            batch_idx.min(m.shape()[0] - 1),
                            q_head_idx.min(m.shape()[1] - 1),
                            ..,
                            ..
                        ])
                    });

                    let out2 = Self::scaled_dot_product_attention_2d(
                        &q_slice,
                        &k_slice,
                        &v_slice,
                        mask_slice.as_ref(),
                        self.scale, // still f32
                        self.is_causal,
                    );
                    output.slice_mut(s![batch_idx, q_head_idx, .., ..]).assign(&out2);
                }
            }
        }
        output
    }

    fn reference(&self) -> TractResult<ArrayD<F>> {
        let mask: Option<ArrayD<f32>> = self.mask.as_ref().map(|m| m.mapv(|x| x.to_f32().unwrap()));

        match self.q.ndim() {
            3 => {
                let q = self.q.view().into_dimensionality::<Ix3>()?.insert_axis(Axis(1));
                let k = self.k.view().into_dimensionality::<Ix3>()?.insert_axis(Axis(1));
                let v = self.v.view().into_dimensionality::<Ix3>()?.insert_axis(Axis(1));
                let mask = mask
                    .as_ref()
                    .map(|m| {
                        TractResult::Ok(m.view().into_dimensionality::<Ix3>()?.insert_axis(Axis(1)))
                    })
                    .transpose()?;
                let out_4d = self.reference_4d(q, k, v, mask);
                Ok(out_4d.remove_axis(Axis(1)).into_dyn())
            }
            4 => {
                let q = self.q.view().into_dimensionality::<Ix4>().unwrap();
                let k = self.k.view().into_dimensionality::<Ix4>().unwrap();
                let v = self.v.view().into_dimensionality::<Ix4>().unwrap();
                let mask =
                    mask.as_ref().map(|m| m.view().into_dimensionality::<Ix4>()).transpose()?;
                Ok(self.reference_4d(q, k, v, mask).into_dyn())
            }
            _ => unreachable!(),
        }
    }
}

impl<F> Test for SdpaProblem<F>
where
    F: Datum + Float + Copy + 'static,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn tract_core::runtime::Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let reference = self.reference().context("Running reference")?.into_tensor();
        let mut model = self.tract().context("Wiring tract test model")?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut inputs = tvec![
            self.q.clone().into_tvalue(),
            self.k.clone().into_tvalue(),
            self.v.clone().into_tvalue()
        ];
        if let Some(mask) = &self.mask {
            inputs.push(mask.clone().into_tvalue());
        }
        let mut output = runtime.prepare(model)?.run(inputs)?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<SdpaProblem<f32>>("proptest_f32", SdpaProblemParams::default());
    suite.add_arbitrary::<SdpaProblem<f16>>("proptest_f16", SdpaProblemParams::default());
    suite.add(
        "trivial_f32_0",
        SdpaProblem {
            q: tensor3(&[[[0f32]]]).into_plain_array::<f32>()?,
            k: tensor3(&[[[0f32]]]).into_plain_array::<f32>()?,
            v: tensor3(&[[[0f32]]]).into_plain_array::<f32>()?,
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "causal_f32_0",
        SdpaProblem {
            q: arr3(&[[[0f32]]]).into_dyn(),
            k: arr3(&[[[0f32]]]).into_dyn(),
            v: arr3(&[[[0f32]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "causal_f32_1",
        SdpaProblem {
            q: arr3(&[[[0f32], [0.]]]).into_dyn(),
            k: arr3(&[[[0f32], [0.]]]).into_dyn(),
            v: arr3(&[[[0f32], [0.]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "causal_f32_2",
        SdpaProblem {
            q: arr3(&[[[0f32], [0f32]]]).into_dyn(),
            k: arr3(&[[[0f32], [0f32]]]).into_dyn(),
            v: arr3(&[[[0f32], [0f32]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "causal_with_s_and_p_0",
        SdpaProblem {
            q: arr3(&[[[0f32], [0.0]]]).into_dyn(),
            k: arr3(&[[[0f32], [0f32], [0f32]]]).into_dyn(),
            v: arr3(&[[[0f32], [0f32], [1f32]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "gqa_f32_0",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[1, 2, 1, 1])),
            k: ArrayD::<f32>::zeros(IxDyn(&[1, 2, 1, 1])),
            v: arr4(&[[[[0f32]], [[1f32]]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: false,
        },
    );
    suite.add(
        "gqa_f32_1",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[1, 2, 1, 1])),
            k: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 1, 1])),
            v: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 1, 1])),
            mask: None,
            scale: None,
            is_causal: false,
        },
    );
    suite.add(
        "gqa_f32_big_0",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[2, 8, 5, 64])),
            k: ArrayD::<f32>::zeros(IxDyn(&[2, 4, 5, 64])),
            v: ArrayD::<f32>::zeros(IxDyn(&[2, 4, 5, 64])),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "gqa_f32_big_1",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[2, 8, 5, 64])),
            k: ArrayD::<f32>::zeros(IxDyn(&[2, 1, 5, 64])),
            v: ArrayD::<f32>::zeros(IxDyn(&[2, 1, 5, 64])),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "gqa_f32_big_2",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[2, 2, 3, 64])),
            k: ArrayD::<f32>::zeros(IxDyn(&[2, 1, 3, 64])),
            v: ArrayD::<f32>::zeros(IxDyn(&[2, 1, 3, 64])),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );
    suite.add(
        "mask_0",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[1, 2, 1])),
            k: ArrayD::<f32>::zeros(IxDyn(&[1, 2, 1])),
            v: arr3(&[[[2f32], [0.]]]).into_dyn(),
            mask: Some(arr3(&[[[0.0f32, 0.0], [0.0, -1.0]]]).into_dyn()),
            scale: None,
            is_causal: false,
        },
    );
    suite.add(
        "gqa_f32_mask_simple",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 1])),
            k: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 1])),
            v: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 1])),
            mask: Some(arr3(&[[[0f32]]]).into_dyn()),
            scale: None,
            is_causal: false,
        },
    );
    suite.add(
        "gqa_f32_mask_0",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[2, 2, 3, 64])),
            k: ArrayD::<f32>::zeros(IxDyn(&[2, 1, 3, 64])),
            v: ArrayD::<f32>::zeros(IxDyn(&[2, 1, 3, 64])),
            mask: Some(ArrayD::<f32>::zeros(vec![1, 1, 3, 3])),
            scale: None,
            is_causal: false,
        },
    );
    suite.add(
        "gqa_f16_0",
        SdpaProblem {
            q: ArrayD::<f16>::zeros(IxDyn(&[1, 2, 64])),
            k: ArrayD::<f16>::zeros(IxDyn(&[1, 3, 64])),
            v: ArrayD::<f16>::zeros(IxDyn(&[1, 3, 64])),
            mask: None,
            scale: None,
            is_causal: true,
        },
    );

    suite.add(
        "gqa_f32_nocausal_nomask",
        SdpaProblem {
            q: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 2, 1])),
            k: ArrayD::<f32>::zeros(IxDyn(&[1, 1, 2, 1])),
            v: arr4(&[[[[0f32], [1f32]]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: false,
        },
    );

    suite.add(
        "f32_nocausal_nomask",
        SdpaProblem::<f32> {
            q: arr3(&[[[0.0], [1.0]]]).into_dyn(),
            k: arr3(&[[[0.0], [1.0]]]).into_dyn(),
            v: arr3(&[[[1.0], [0.0]]]).into_dyn(),
            mask: None,
            scale: None,
            is_causal: false,
        },
    );

    suite.add(
        "gqa_mh_mask",
        SdpaProblem::<f32> {
            q: arr4(&[[[[0.0], [0.0]], [[0.0], [0.0]]]]).into_dyn(),
            k: arr4(&[[[[0.0], [0.0]]]]).into_dyn(),
            v: arr4(&[[[[0.0], [0.01]]]]).into_dyn(),
            mask: Some(arr4(&[[[[0.0, 0.0], [0.0, 0.0]], [[0.0, 0.0], [-2.0, 0.0]]]]).into_dyn()),
            scale: None,
            is_causal: false,
        },
    );

    let mut v = ArrayD::zeros(IxDyn(&[2, 2, 64]));
    v[[1, 1, 63]] = -0.25f32;
    let mask_with_batch_cuda = SdpaProblem::<f32> {
        q: ArrayD::zeros(IxDyn(&[2, 2, 64])),
        k: ArrayD::zeros(IxDyn(&[2, 2, 64])),
        v,
        mask: Some(arr3(&[[[0.0f32, 0.25], [0.0, 0.0]], [[0.375, -0.25], [0.0, 0.0]]]).into_dyn()),
        scale: None,
        is_causal: false,
    };
    suite.add("f32_mask_with_batch_cuda", mask_with_batch_cuda.clone());
    suite.add("f16_mask_with_batch_cuda", mask_with_batch_cuda.to_f16());
    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/silu.rs
================================================
use infra::Test;
use infra::TestResult;
use infra::TestSuite;
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ndarray::ArrayD;
use tract_core::num_traits::Float;
use tract_transformers::ops::silu::silu;

use crate::tensor;

#[derive(Debug, Clone)]
pub struct SiluProblem<F>
where
    F: Datum + Float,
{
    input: ArrayD<F>,
}

impl<F> Arbitrary for SiluProblem<F>
where
    F: Datum + Float,
{
    type Parameters = ();
    type Strategy = BoxedStrategy<SiluProblem<F>>;

    fn arbitrary_with(_params: Self::Parameters) -> Self::Strategy {
        (0usize..5)
            .prop_flat_map(|rank| {
                let other_dim = 1usize..10;
                vec(other_dim, rank..=rank)
            })
            .prop_flat_map(|shape| tensor::<F>(&shape).prop_map(|input| Self { input }))
            .boxed()
    }
}

impl<F> SiluProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn tract(&self) -> TractResult<TypedModel> {
        let mut model = TypedModel::default();
        let input = self.input.clone().into_tensor();
        let input = model.add_source("input", TypedFact::shape_and_dt_of(&input))?;

        let output = model.wire_node("silu", silu(), &[input])?;
        model.select_output_outlets(&output)?;

        model = model.into_decluttered()?;
        Ok(model)
    }

    fn reference(&self) -> ArrayD<F> {
        let input = &self.input;
        input.mapv(|x| F::from(f32::from(x) / (1.0 + f32::from(-x).exp())).unwrap())
    }
}

impl<F> Test for SiluProblem<F>
where
    F: Datum + Float,
    f32: From<F>,
{
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> TestResult {
        let reference = self.reference().into_tensor();
        let mut model = self.tract()?;

        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));

        let mut output = runtime.prepare(model)?.run(tvec!(self.input.clone().into_tvalue()))?;
        let output = output.remove(0).into_tensor();

        if F::datum_type() == DatumType::F16 {
            output.close_enough(&reference, Approximation::VeryApproximate)
        } else {
            output.close_enough(&reference, approx)
        }
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();

    suite.add_arbitrary::<SiluProblem<f32>>("proptest_f32", ());
    suite.add_arbitrary::<SiluProblem<f16>>("proptest_f16", ());

    Ok(suite)
}


================================================
FILE: test-rt/suite-unit/src/slice.rs
================================================
use infra::{Test, TestSuite};
use proptest::collection::vec;
use proptest::prelude::*;
use tract_core::internal::*;
use tract_core::ops::array::Slice;

#[derive(Debug, Clone, Default)]
struct SliceProblem {
    input_shape: Vec<usize>,
    op: Slice,
}

impl Arbitrary for SliceProblem {
    type Parameters = ();
    type Strategy = BoxedStrategy<SliceProblem>;
    fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
        vec(1..10usize, 1..5usize)
            .prop_flat_map(|input_shape| {
                let rank = input_shape.len();
                (Just(input_shape), 0..rank)
            })
            .prop_flat_map(|(shape, axis)| {
                let b0 = 0..=shape[axis];
                let b1 = 0..=shape[axis];
                (Just(shape), Just(axis), b0, b1)
            })
            .prop_filter("non empty slice", |(_, _, b0, b1)| b0 != b1)
            .prop_map(|(input_shape, axis, b0, b1)| {
                let start = b0.min(b1).to_dim();
                let end = b0.max(b1).to_dim();
                SliceProblem { input_shape, op: Slice { axis, start, end } }
            })
            .boxed()
    }
}

impl Test for SliceProblem {
    fn run_with_approx(
        &self,
        id: &str,
        runtime: &dyn Runtime,
        approx: Approximation,
    ) -> infra::TestResult {
        let mut input = Tensor::zero::<f32>(&self.input_shape)?;
        input
            .try_as_plain_mut()?
            .as_slice_mut::<f32>()?
            .iter_mut()
            .enumerate()
            .for_each(|(ix, x)| *x = ix as f32);
        let reference = input.slice(
            self.op.axis,
            self.op.start.to_usize().unwrap(),
            self.op.end.to_usize().unwrap(),
        )?;
        let mut model = TypedModel::default();
        model.properties.insert("tract-rt-test.id".to_string(), rctensor0(id.to_string()));
        let wire = model.add_source("input", TypedFact::shape_and_dt_of(&input))?;
        let output = model.wire_node("slice", self.op.clone(), &[wire])?;
        model.select_output_outlets(&output)?;
        let prepared = runtime.prepare(model)?;
        let mut output = prepared.run(tvec![input.clone().into_tvalue()])?;
        let output = output.remove(0).into_tensor();
        output.close_enough(&reference, approx)
    }
}

pub fn suite() -> TractResult<TestSuite> {
    let mut suite = TestSuite::default();
    suite.add_arbitrary::<SliceProblem>("proptest", ());

    suite.add(
        "full_0",
        SliceProblem {
            input_shape: vec![3],
            op: Slice { axis: 0, start: 0.to_dim(), end: 3.to_dim() },
        },
    );

    suite.add(
        "empty_0",
        SliceProblem {
            input_shape: vec![2],
            op: Slice { axis: 0, start: 0.to_dim(), end: 1.to_dim() },
        },
    );

    suite.add(
        "full_ax_0",
        SliceProblem {
            input_shape: vec![1, 1, 2, 1],
            op: Slice { axis: 0, start: 0.to_dim(), end: 1.to_dim() },
        },
    );

    Ok(suite)
}


================================================
FILE: test-rt/test-blas/Cargo.toml
================================================
[package]
name = "test-blas"
version = "0.1.0"
edition = "2024"

[dependencies]

[build-dependencies]
infra = { path = "../infra" }
itertools.workspace = true
lazy_static.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-core = { workspace = true, features = [ "blis" ] }

[dev-dependencies]
infra = { path = "../infra" }
itertools.workspace = true
lazy_static.workspace = true
log.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-core = { workspace = true, features = [ "blis" ] }
tract-onnx-opl.workspace = true


================================================
FILE: test-rt/test-blas/build.rs
================================================
#[path = "suite.rs"]
mod suite;

fn main() {
    suite::suite().test_runtime(
        "as_blas",
        "suite::suite()",
        "as_blas()",
        "Approximation::Approximate",
    );
}


================================================
FILE: test-rt/test-blas/src/lib.rs
================================================
#![cfg(test)]
use std::fmt::Debug;

use tract_core::internal::*;

#[path = "../suite.rs"]
mod suite;

mod as_blas {
    use super::*;

    pub fn as_blas() -> &'static AsBlasRuntime {
        &AsBlasRuntime
    }

    #[derive(Debug)]
    pub struct AsBlasRuntime;

    impl Runtime for AsBlasRuntime {
        fn name(&self) -> StaticName {
            Cow::Borrowed("as_blas")
        }
        fn prepare_with_options(
            &self,
            mut model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            tract_core::transform::get_transform("as_blas")?.unwrap().transform(&mut model)?;
            Ok(Box::new(model.into_runnable_with_options(options)?))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    include!(concat!(env!("OUT_DIR"), "/tests/as_blas.rs"));
}


================================================
FILE: test-rt/test-blas/suite.rs
================================================
use infra::Test;

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);

    let mut unit = suite_unit::suite().unwrap().clone();
    unit.ignore_case(&ignore_unit);

    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn ignore_onnx(_t: &[String]) -> bool {
    false
}

fn ignore_unit(_t: &[String], _tc: &dyn Test) -> bool {
    false
}


================================================
FILE: test-rt/test-cuda/Cargo.toml
================================================
[package]
name = "test-cuda"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

[build-dependencies]
home.workspace = true
lazy_static.workspace = true
regex.workspace = true
infra = { path = "../infra" }
tract-core.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-gpu.workspace = true
tract-onnx-opl.workspace = true
pastey.workspace = true

[dev-dependencies]
home.workspace = true
regex.workspace = true
lazy_static.workspace = true
log.workspace = true
tract-core.workspace = true
tract-onnx-opl.workspace = true
infra = { path = "../infra" }
tract-cuda.workspace = true
tract-gpu.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
pastey.workspace = true


================================================
FILE: test-rt/test-cuda/build.rs
================================================
#[path = "suite.rs"]
mod suite;

fn main() {
    suite::suite().test_runtime(
        "tests",
        "suite::suite()",
        "runtime()",
        "Approximation::VeryApproximate",
    );
}


================================================
FILE: test-rt/test-cuda/src/lib.rs
================================================
#![cfg(test)]

use std::sync::Arc;
use tract_core::internal::*;

use pastey::paste;
use tract_core::runtime::Runtime;
use tract_core::tract_data::itertools::Itertools;

#[path = "../suite.rs"]
mod suite;

#[derive(Debug)]
struct CudaTestTransformState {
    state: TypedSimpleState,
    transpose_inputs: bool,
    use_arena: bool,
}

impl State for CudaTestTransformState {
    fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut state = if self.use_arena {
            let session_handler = tract_gpu::session_handler::DeviceSessionHandler::from_plan(
                self.state.plan(),
                &self.state.turn_state.resolved_symbols,
            )?;

            let plan = Arc::unwrap_or_clone(self.state.plan().clone())
                .with_session_handler(session_handler);
            let plan = Arc::new(plan);
            plan.spawn()?
        } else {
            self.state.clone()
        };

        if self.transpose_inputs {
            let inputs = inputs
                .into_iter()
                .map(|input| {
                    let input = input.into_tensor();
                    let rank = input.rank();
                    let perms = (0..rank).rev().collect_vec();
                    Ok(input.permute_axes(&perms)?.into_tvalue())
                })
                .collect::<TractResult<TVec<TValue>>>()?;

            state
                .run(inputs)?
                .into_iter()
                .map(|t| {
                    let t = t.into_tensor();
                    let rank = t.rank();
                    let perms = (0..rank).rev().collect_vec();
                    Ok(t.permute_axes(&perms)?.into_tvalue())
                })
                .collect()
        } else {
            state.run(inputs)
        }
    }

    fn input_count(&self) -> usize {
        self.state.input_count()
    }

    fn output_count(&self) -> usize {
        self.state.output_count()
    }

    fn runnable(&self) -> &dyn Runnable {
        self.state.runnable()
    }

    fn freeze(&self) -> Box<dyn FrozenState> {
        Box::new(self.state.freeze())
    }
}

#[derive(Debug)]
struct CudaTestTransformRunnable {
    runnable: Arc<TypedRunnableModel>,
    transpose_inputs: bool,
    use_arena: bool,
}

impl Runnable for CudaTestTransformRunnable {
    fn spawn(&self) -> TractResult<Box<dyn State>> {
        Ok(Box::new(CudaTestTransformState {
            state: self.runnable.spawn()?,
            transpose_inputs: self.transpose_inputs,
            use_arena: self.use_arena,
        }))
    }

    fn input_count(&self) -> usize {
        self.runnable.input_count()
    }

    fn output_count(&self) -> usize {
        self.runnable.output_count()
    }

    fn typed_model(&self) -> Option<&Arc<TypedModel>> {
        self.runnable.typed_model()
    }

    fn typed_plan(&self) -> Option<&Arc<TypedSimplePlan>> {
        self.runnable.typed_plan()
    }
}

#[derive(Debug)]
struct CudaTestRuntime {
    name: &'static str,
    phase: usize,
    optimize: bool,
    transpose_inputs: bool,
    use_arena: bool,
}

impl Runtime for CudaTestRuntime {
    fn name(&self) -> StaticName {
        self.name.into()
    }

    fn prepare_with_options(
        &self,
        mut model: TypedModel,
        options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        if self.transpose_inputs {
            for ix in 0..model.inputs.len() {
                let input = model.input_outlets()?[ix];
                let in_fact = model.outlet_fact(input)?;
                let rank = in_fact.rank();
                let shape = in_fact.shape.dims().into_iter().rev().collect::<TVec<_>>();
                let fact = in_fact.datum_type.fact(shape);

                let transposed_input = model.add_source(format!("transposed_input_{ix}"), fact)?;

                let mut patch = TypedModelPatch::default();
                let mut wire = patch.tap_model(&model, transposed_input)?;

                let perms = (0..rank).rev().collect_vec();
                let axis_ops = perm_to_ops(&perms);

                for (ax, op) in axis_ops.into_iter().enumerate() {
                    wire = patch.wire_node(format!("transposed_input.{ix}_{ax}"), op, &[wire])?[0];
                }
                patch.shunt_outside(&model, input, wire)?;
                patch.apply(&mut model)?;
            }

            // Delete old inputs
            for _ in 0..model.inputs.len() / 2 {
                let input = model.inputs.remove(0);
                model.node_mut(input.node).op = model.create_dummy();
            }

            for (ix, output) in model.outputs.clone().iter().enumerate() {
                let rank = model.outlet_fact(*output)?.rank();
                let mut wire = *output;
                let perms = (0..rank).rev().collect_vec();
                let axis_ops = perm_to_ops(&perms);

                for (ax, op) in axis_ops.into_iter().enumerate() {
                    wire = model.wire_node(format!("transposed_output.{ix}_{ax}"), op, &[wire])?[0];
                }
                model.outputs[ix] = wire;
            }
        }

        tract_cuda::CudaTransform.transform_up_to_phase(&mut model, self.phase)?;
        if self.optimize {
            model = model.into_optimized()?;
        }

        let runnable = CudaTestTransformRunnable {
            runnable: model.into_runnable_with_options(&options)?,
            transpose_inputs: self.transpose_inputs,
            use_arena: self.use_arena,
        };
        Ok(Box::new(runnable))
    }
    fn check(&self) -> TractResult<()> {
        tract_core::runtime::runtime_for_name("cuda").context("No cuda runtime found")?.check()
    }
}

macro_rules! cuda_test_suite {
    ($id: ident, $phase: expr, $optimize: expr, $transpose_inputs: ident, $use_arena: ident) => {
        paste! {
            mod [<$id>] {
                use super::*;

                fn runtime() -> &'static CudaTestRuntime {
                    lazy_static::lazy_static! {
                        static ref RT: CudaTestRuntime = CudaTestRuntime {
                            name: stringify!([<$id>]),
                            phase: $phase,
                            optimize: $optimize,
                            transpose_inputs: $transpose_inputs,
                            use_arena: $use_arena,
                        };
                    };
                    &RT
                }

                include!(concat!(env!("OUT_DIR"), "/tests/tests.rs"));
            }
        }
    };
}

//cuda_test_suite!(cuda_phase_2_translate, 2, false, , false, false);
//cuda_test_suite!(cuda_phase_3_post_translate, 3, false, , false, false);
cuda_test_suite!(optimized_cuda, usize::MAX, true, false, false);
cuda_test_suite!(optimized_cuda_with_arena, usize::MAX, true, false, true);
cuda_test_suite!(optimized_cuda_transpose, usize::MAX, true, true, false);


================================================
FILE: test-rt/test-cuda/suite.rs
================================================
use std::vec;

use infra::Test;
use suite_unit::bin_einsum::{BinEinsumProblem, BinEinsumProblemParams};
use suite_unit::conv_f16::ConvProblemF16;
use suite_unit::conv_f32::{ConvProblem, ConvProblemParams};
use suite_unit::sdpa::{SdpaProblem, SdpaProblemParams};
use tract_core::num_traits::Float;
use tract_core::prelude::Datum;
use tract_core::tract_data::half;

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);

    let mut unit = suite_unit::suite().unwrap().clone();
    unit.ignore_case(&ignore_unit);

    unit.get_sub_mut("bin_einsum").add_arbitrary::<BinEinsumProblem>(
        "proptest",
        BinEinsumProblemParams {
            force_unique_non_trivial_m_n: true,
            max_dims: 6,
            ..BinEinsumProblemParams::default()
        },
    );

    unit.get_sub_mut("conv_f32")
        .add_arbitrary::<ConvProblem>("proptest", ConvProblemParams::default());

    unit.get_sub_mut("conv_f16")
        .add_arbitrary::<ConvProblemF16>("proptest", ConvProblemParams::default());

    unit.get_sub_mut("sdpa").add_arbitrary::<SdpaProblem<half::f16>>(
        "proptest_f16",
        SdpaProblemParams { embed_dims: vec![64, 128] },
    );
    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn ignore_unit(t: &[String], case: &dyn Test) -> bool {
    if let Some(sdpab) = case.downcast_ref::<SdpaProblem<f32>>() {
        return !compatible_sdpa::<f32>(sdpab);
    }

    if let Some(sdpab) = case.downcast_ref::<SdpaProblem<half::f16>>() {
        return !compatible_sdpa::<half::f16>(sdpab);
    }
    t[0] == "sdpa" && t[1] == "proptest_f32"
}

fn compatible_sdpa<F: Datum + Float>(sdpap: &SdpaProblem<F>) -> bool {
    matches!(sdpap.k.shape().last().unwrap(), 64 | 80 | 96 | 112 | 128 | 256)
}

fn ignore_onnx(t: &[String]) -> bool {
    r#"
    test_slice_start_out_of_bounds
    test_nllloss_NCd1d2d3d4d5_mean_weight_expanded
    test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded
    test_tril_zero
    test_triu_zero
    "#
    .trim()
    .lines()
    .any(|s| t.last().unwrap() == s.trim())
}


================================================
FILE: test-rt/test-f16/Cargo.toml
================================================
[package]
name = "test-f16"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

[build-dependencies]
home.workspace = true
lazy_static.workspace = true
regex.workspace = true
infra = { path = "../infra" }
tract-core.workspace = true
tract-transformers.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-onnx-opl.workspace = true

[dev-dependencies]
home.workspace = true
regex.workspace = true
lazy_static.workspace = true
log.workspace = true
tract-core.workspace = true
tract-transformers.workspace = true
tract-nnef.workspace = true
tract-onnx-opl.workspace = true
infra = { path = "../infra" }
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }


================================================
FILE: test-rt/test-f16/build.rs
================================================
#[path = "suite.rs"]
mod suite;

fn main() {
    suite::suite().test_runtime(
        "tests",
        "suite::suite()",
        "runtime()",
        "Approximation::SuperApproximate",
    );
}


================================================
FILE: test-rt/test-f16/src/lib.rs
================================================
#![cfg(test)]

#[path = "../suite.rs"]
mod suite;

mod run_as_f16 {
    use super::*;
    use tract_core::internal::*;
    use tract_core::model::translator::Translate;

    #[derive(Debug)]
    struct RunAsF16;

    impl Runtime for RunAsF16 {
        fn name(&self) -> StaticName {
            "run_as_f16".into()
        }

        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            let outputs_dt =
                model.outputs.iter().map(|o| model.outlet_fact(*o).unwrap().datum_type).collect();
            let tr =
                tract_core::floats::FloatPrecisionTranslator::new(DatumType::F32, DatumType::F16);
            let model = tr.translate_model(&model)?;
            Ok(Box::new(RunnableAsF16(
                model.into_optimized()?.into_runnable_with_options(options)?,
                outputs_dt,
            )))
        }

        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    #[derive(Debug)]
    pub struct RunnableAsF16(pub Arc<TypedRunnableModel>, pub TVec<DatumType>);

    impl Runnable for RunnableAsF16 {
        fn spawn(&self) -> TractResult<Box<dyn State>> {
            Ok(Box::new(StateAsF16(self.0.spawn()?, self.1.clone())))
        }

        fn input_count(&self) -> usize {
            self.0.input_count()
        }

        fn output_count(&self) -> usize {
            self.0.output_count()
        }

        fn typed_model(&self) -> Option<&Arc<TypedModel>> {
            self.0.typed_model()
        }

        fn typed_plan(&self) -> Option<&Arc<TypedSimplePlan>> {
            self.0.typed_plan()
        }
    }

    #[derive(Debug)]
    struct StateAsF16(TypedSimpleState, TVec<DatumType>);

    impl State for StateAsF16 {
        fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
            let inputs = inputs
                .into_iter()
                .map(|v| {
                    if v.datum_type() == DatumType::F32 {
                        v.into_tensor()
                            .cast_to_dt(f16::datum_type())
                            .unwrap()
                            .into_owned()
                            .into_tvalue()
                    } else {
                        v
                    }
                })
                .collect();
            let outputs = self.0.run(inputs)?;
            Ok(outputs
                .into_iter()
                .zip(self.1.iter())
                .map(|(t, dt)| t.into_tensor().cast_to_dt(*dt).unwrap().into_owned().into_tvalue())
                .collect())
        }

        fn input_count(&self) -> usize {
            self.0.input_count()
        }

        fn output_count(&self) -> usize {
            self.0.output_count()
        }

        fn runnable(&self) -> &dyn Runnable {
            self.0.runnable()
        }

        fn freeze(&self) -> Box<dyn FrozenState> {
            Box::new(self.0.freeze())
        }
    }

    fn runtime() -> &'static RunAsF16 {
        static RUN_AS_F16: RunAsF16 = RunAsF16;
        &RUN_AS_F16
    }

    include!(concat!(env!("OUT_DIR"), "/tests/tests.rs"));
}

mod nnef_f16 {
    use std::fmt::Debug;

    use super::run_as_f16::RunnableAsF16;
    use super::*;
    use tract_core::internal::*;
    use tract_core::model::translator::Translate;
    use tract_nnef::internal::Nnef;
    use tract_onnx_opl::WithOnnx;
    use tract_transformers::WithTractTransformers;

    struct NnefF16(Nnef);

    impl Debug for NnefF16 {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            write!(f, "NnefF16")
        }
    }

    impl Runtime for NnefF16 {
        fn name(&self) -> StaticName {
            "nnef_f16".into()
        }

        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            let outputs_dt =
                model.outputs.iter().map(|o| model.outlet_fact(*o).unwrap().datum_type).collect();
            let tr =
                tract_core::floats::FloatPrecisionTranslator::new(DatumType::F32, DatumType::F16);
            let model = tr.translate_model(&model)?;
            let mut buf = vec![];
            self.0.write_to_tar(&model, &mut buf)?;
            let reloaded = self.0.model_for_read(&mut &*buf)?;
            Ok(Box::new(RunnableAsF16(
                reloaded.into_optimized()?.into_runnable_with_options(&options)?,
                outputs_dt,
            )))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    fn runtime() -> &'static NnefF16 {
        lazy_static::lazy_static! {
            static ref RT: NnefF16 = NnefF16(tract_nnef::nnef().with_onnx().with_tract_transformers());
        };
        &RT
    }

    include!(concat!(env!("OUT_DIR"), "/tests/tests.rs"));
}


================================================
FILE: test-rt/test-f16/suite.rs
================================================
use infra::Test;
use suite_unit::bin_einsum::{BinEinsumProblem, BinEinsumProblemParams};
use suite_unit::conv_q::{QConvProblem, QConvProblemParams};

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);
    let mut unit = suite_unit::suite().unwrap().clone();
    unit.get_sub_mut("bin_einsum").add_arbitrary::<BinEinsumProblem>(
        "proptest",
        BinEinsumProblemParams { max_dims: 7, ..Default::default() },
    );
    unit.ignore_case(&ignore_unit);
    unit.get_sub_mut("conv_q").add_arbitrary_with_filter::<QConvProblem>(
        "proptest",
        QConvProblemParams::default(),
        compatible_conv_q,
    );

    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn ignore_unit(t: &[String], case: &dyn Test) -> bool {
    #[allow(clippy::collapsible_if)]
    if let Some(qcp) = case.downcast_ref::<QConvProblem>() {
        if !compatible_conv_q(qcp) {
            return true;
        }
    }

    let [section, _unit] = t else { return false };
    ["q_flavours"].contains(&&**section)
}

fn ignore_onnx(t: &[String]) -> bool {
    r#"
test_averagepool_2d_ceil
test_averagepool_2d_pads_count_include_pad
test_averagepool_2d_precomputed_pads_count_include_pad
test_averagepool_2d_same_lower
test_cast_STRING_to_FLOAT
test_castlike_STRING_to_FLOAT_expanded
test_constantlike_ones_with_input
test_constantlike_threes_with_shape_and_dtype
test_constantlike_zeros_without_input_dtype
test_cumsum_1d_exclusive
test_cumsum_1d_reverse_exclusive
test_cumsum_2d
test_dequantizelinear
test_dropout_random
test_dynamicquantizelinear
test_dynamicquantizelinear_max_adjusted
test_dynamicquantizelinear_min_adjusted
test_gemm_broadcast
test_gemm_nobroadcast
test_if
test_maxpool_2d_ceil
test_maxpool_2d_same_lower
test_maxpool_with_argmax_2d_precomputed_pads
test_mod_broadcast
test_mod_int64_fmod
test_mod_mixed_sign_float16
test_mod_mixed_sign_float32
test_mod_mixed_sign_float64
test_mod_mixed_sign_int16
test_mod_mixed_sign_int32
test_mod_mixed_sign_int64
test_mod_mixed_sign_int8
test_mod_uint16
test_mod_uint32
test_mod_uint64
test_mod_uint8
test_matmulinteger
test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded
test_nonzero_example
test_quantizelinear
test_qlinearmatmul_2D
test_qlinearmatmul_3D
test_reduce_prod_default_axes_keepdims_example
test_reshape_reordered_dims
test_resize_upsample_scales_linear_align_corners
test_resize_downsample_scales_linear
test_unsqueeze
"#
    .trim()
    .lines()
    .any(|s| t.last().unwrap() == s.trim())
        || t.last().unwrap().starts_with("test_logsoftmax_large_number")
        || t.last().unwrap().starts_with("test_softmax_large_number")
        || t.last().unwrap().starts_with("test_resize")
}

fn compatible_conv_q(qcp: &QConvProblem) -> bool {
    qcp.qp.iter().all(|t| t.len() == 1)
}


================================================
FILE: test-rt/test-metal/Cargo.toml
================================================
[package]
name = "test-metal"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

[build-dependencies]
home.workspace = true
lazy_static.workspace = true
regex.workspace = true
infra = { path = "../infra" }
tract-core.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-onnx-opl.workspace = true
tract-metal.workspace = true
pastey.workspace = true

[dev-dependencies]
home.workspace = true
regex.workspace = true
lazy_static.workspace = true
log.workspace = true
tract-core.workspace = true
tract-metal.workspace = true
tract-gpu.workspace = true
tract-onnx-opl.workspace = true
infra = { path = "../infra" }
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
pastey.workspace = true


================================================
FILE: test-rt/test-metal/build.rs
================================================
#[path = "ggml_suite.rs"]
mod ggml_suite;
#[path = "suite.rs"]
mod suite;

fn main() {
    suite::suite().test_runtime("mlx", "suite::suite()", "runtime()", "Approximation::Approximate");

    suite::suite().test_runtime("mfa", "suite::suite()", "runtime()", "Approximation::Approximate");

    ggml_suite::suite().test_runtime(
        "ggml",
        "ggml_suite::suite()",
        "runtime()",
        "Approximation::Approximate",
    );

    ggml_suite::suite().test_runtime(
        "none",
        "ggml_suite::suite()",
        "runtime()",
        "Approximation::Approximate",
    );
}


================================================
FILE: test-rt/test-metal/ggml_suite.rs
================================================
use infra::Test;
use suite_unit::bin_einsum::{BinEinsumProblem, BinEinsumProblemParams};
use suite_unit::conv_f32::{ConvProblem, ConvProblemParams};

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);

    let mut unit = suite_unit::suite().unwrap().clone();
    unit.ignore_case(&ignore_unit);

    unit.get_sub_mut("bin_einsum").add_arbitrary::<BinEinsumProblem>(
        "proptest",
        BinEinsumProblemParams {
            force_unique_non_trivial_m_n: true,
            max_dims: 5,
            ..BinEinsumProblemParams::default()
        },
    );
    unit.get_sub_mut("conv_f32").add_arbitrary::<ConvProblem>(
        "proptest",
        ConvProblemParams { no_batch: true, ..ConvProblemParams::default() },
    );

    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn ignore_unit(t: &[String], _case: &dyn Test) -> bool {
    (t[0] == "bin_einsum" && t[1] == "proptest")
        || (t[0] == "conv_f32" && (t[1] == "proptest" || t[1] == "bug_metal_0"))
}

fn ignore_onnx(t: &[String]) -> bool {
    r#"
test_slice_start_out_of_bounds
test_nllloss_NCd1d2d3d4d5_mean_weight_expanded
test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded
test_tril_zero
test_triu_zero
"#
    .trim()
    .lines()
    .any(|s| t.last().unwrap() == s.trim())
}


================================================
FILE: test-rt/test-metal/src/lib.rs
================================================
#![cfg(all(test, any(target_os = "macos", target_os = "ios")))]

use std::sync::Arc;
use tract_core::internal::*;

use pastey::paste;
use tract_core::runtime::Runtime;
use tract_core::tract_data::itertools::Itertools;
use tract_metal::MetalGemmImplKind;

#[path = "../ggml_suite.rs"]
mod ggml_suite;
#[path = "../suite.rs"]
mod suite;

#[derive(Debug)]
struct MetalTestTransformState {
    state: TypedSimpleState,
    transpose_inputs: bool,
    use_arena: bool,
}

impl State for MetalTestTransformState {
    fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let mut state = if self.use_arena {
            let session_handler = tract_gpu::session_handler::DeviceSessionHandler::from_plan(
                self.state.plan(),
                &self.state.turn_state.resolved_symbols,
            )?;

            let plan = Arc::unwrap_or_clone(self.state.plan().clone())
                .with_session_handler(session_handler);
            Arc::new(plan).spawn()?
        } else {
            self.state.clone()
        };

        if self.transpose_inputs {
            let inputs = inputs
                .into_iter()
                .map(|input| {
                    let input = input.into_tensor();
                    let rank = input.rank();
                    let perms = (0..rank).rev().collect_vec();
                    Ok(input.permute_axes(&perms)?.into_tvalue())
                })
                .collect::<TractResult<TVec<TValue>>>()?;

            state
                .run(inputs)?
                .into_iter()
                .map(|t| {
                    let t = t.into_tensor();
                    let rank = t.rank();
                    let perms = (0..rank).rev().collect_vec();
                    Ok(t.permute_axes(&perms)?.into_tvalue())
                })
                .collect()
        } else {
            state.run(inputs)
        }
    }

    fn input_count(&self) -> usize {
        self.state.input_count()
    }

    fn output_count(&self) -> usize {
        self.state.output_count()
    }

    fn runnable(&self) -> &dyn Runnable {
        self.state.runnable()
    }

    fn freeze(&self) -> Box<dyn FrozenState> {
        Box::new(self.state.freeze())
    }
}

#[derive(Debug)]
struct MetalTestTransformRunnable {
    runnable: Arc<TypedRunnableModel>,
    transpose_inputs: bool,
    use_arena: bool,
}

impl Runnable for MetalTestTransformRunnable {
    fn spawn(&self) -> TractResult<Box<dyn State>> {
        Ok(Box::new(MetalTestTransformState {
            state: self.runnable.spawn()?,
            transpose_inputs: self.transpose_inputs,
            use_arena: self.use_arena,
        }))
    }

    fn input_count(&self) -> usize {
        self.runnable.input_count()
    }

    fn output_count(&self) -> usize {
        self.runnable.output_count()
    }

    fn typed_plan(&self) -> Option<&Arc<TypedSimplePlan>> {
        self.runnable.typed_plan()
    }

    fn typed_model(&self) -> Option<&Arc<TypedModel>> {
        self.runnable.typed_model()
    }
}

#[derive(Debug)]
struct MetalTestRuntime {
    name: &'static str,
    phase: usize,
    optimize: bool,
    gemm_impl: Option<MetalGemmImplKind>,
    transpose_inputs: bool,
    use_arena: bool,
}

impl Runtime for MetalTestRuntime {
    fn name(&self) -> StaticName {
        self.name.into()
    }

    fn prepare_with_options(
        &self,
        mut model: TypedModel,
        _options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        if self.transpose_inputs {
            for ix in 0..model.inputs.len() {
                let input = model.input_outlets()?[ix];
                let in_fact = model.outlet_fact(input)?;
                let rank = in_fact.rank();
                let shape = in_fact.shape.dims().into_iter().rev().collect::<TVec<_>>();
                let fact = in_fact.datum_type.fact(shape);

                let transposed_input = model.add_source(format!("transposed_input_{ix}"), fact)?;

                let mut patch = TypedModelPatch::default();
                let mut wire = patch.tap_model(&model, transposed_input)?;

                let perms = (0..rank).rev().collect_vec();
                let axis_ops = perm_to_ops(&perms);

                for (ax, op) in axis_ops.into_iter().enumerate() {
                    wire = patch.wire_node(format!("transposed_input.{ix}_{ax}"), op, &[wire])?[0];
                }
                patch.shunt_outside(&model, input, wire)?;
                patch.apply(&mut model)?;
            }

            // Delete old inputs
            for _ in 0..model.inputs.len() / 2 {
                let input = model.inputs.remove(0);
                model.node_mut(input.node).op = model.create_dummy();
            }

            for (ix, output) in model.outputs.clone().iter().enumerate() {
                let rank = model.outlet_fact(*output)?.rank();
                let mut wire = *output;
                let perms = (0..rank).rev().collect_vec();
                let axis_ops = perm_to_ops(&perms);

                for (ax, op) in axis_ops.into_iter().enumerate() {
                    wire = model.wire_node(format!("transposed_output.{ix}_{ax}"), op, &[wire])?[0];
                }
                model.outputs[ix] = wire;
            }
        }

        tract_metal::MetalTransform { gemm_impl: self.gemm_impl }
            .transform_up_to_phase(&mut model, self.phase)?;
        if self.optimize {
            model = model.into_optimized()?;
        }
        let runnable = MetalTestTransformRunnable {
            runnable: model.into_runnable()?,
            transpose_inputs: self.transpose_inputs,
            use_arena: self.use_arena,
        };
        Ok(Box::new(runnable))
    }

    fn check(&self) -> TractResult<()> {
        Ok(())
    }
}

macro_rules! metal_test_suite {
    ($id: ident, $phase: expr, $optimize: expr, $gemm_impl: expr, $transpose_inputs: ident, $use_arena: ident) => {
        paste! {
            mod [<$id _ $gemm_impl:lower>] {
                use super::*;

                fn runtime() -> &'static MetalTestRuntime {
                    lazy_static::lazy_static! {
                        static ref RT: MetalTestRuntime = MetalTestRuntime {
                            name: stringify!([<$id _ $gemm_impl:lower>]),
                            phase: $phase,
                            optimize: $optimize,
                            gemm_impl: $gemm_impl,
                            transpose_inputs: $transpose_inputs,
                            use_arena: $use_arena,
                        };
                    };
                    &RT
                }

                include!(concat!(env!("OUT_DIR"), "/tests/",  stringify!([<$gemm_impl:lower>]), ".rs"));
            }
        }
    };
}

macro_rules! metal_runtime {
    ($gemm_impl: expr) => {
        metal_test_suite!(metal_phase_2_translate, 2, false, $gemm_impl, false, false);
        metal_test_suite!(metal_phase_3_post_translate, 3, false, $gemm_impl, false, false);
        metal_test_suite!(optimized_metal, usize::MAX, true, $gemm_impl, false, false);
        metal_test_suite!(optimized_metal_transpose, usize::MAX, true, $gemm_impl, true, false);
    };
}

static MLX: Option<MetalGemmImplKind> = Some(MetalGemmImplKind::Mlx);
static MFA: Option<MetalGemmImplKind> = Some(MetalGemmImplKind::Mfa);
static GGML: Option<MetalGemmImplKind> = Some(MetalGemmImplKind::Ggml);

// Common transform
metal_test_suite!(metal_phase_0_einsum, 0, false, MLX, false, false);
metal_test_suite!(metal_phase_1_pre_translate, 1, false, MLX, false, false);

metal_runtime!(None);
metal_runtime!(MLX);
metal_runtime!(MFA);
metal_runtime!(GGML);


================================================
FILE: test-rt/test-metal/suite.rs
================================================
use infra::Test;
use suite_unit::bin_einsum::{BinEinsumProblem, BinEinsumProblemParams};
use suite_unit::conv_f32::{ConvProblem, ConvProblemParams};

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);

    let mut unit = suite_unit::suite().unwrap().clone();
    unit.ignore_case(&ignore_unit);

    unit.get_sub_mut("bin_einsum").add_arbitrary::<BinEinsumProblem>(
        "proptest",
        BinEinsumProblemParams {
            force_unique_non_trivial_m_n: true,
            max_dims: 6,
            ..BinEinsumProblemParams::default()
        },
    );
    unit.get_sub_mut("conv_f32").add_arbitrary::<ConvProblem>(
        "proptest",
        ConvProblemParams { no_batch: true, ..ConvProblemParams::default() },
    );

    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn ignore_unit(t: &[String], _case: &dyn Test) -> bool {
    (t[0] == "bin_einsum" && t[1] == "proptest")
        || (t[0] == "conv_f32" && (t[1] == "proptest" || t[1] == "bug_metal_0"))
        || t[0] == "matmul_q40"
}

fn ignore_onnx(t: &[String]) -> bool {
    r#"
test_slice_start_out_of_bounds
test_nllloss_NCd1d2d3d4d5_mean_weight_expanded
test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded
test_tril_zero
test_triu_zero
"#
    .trim()
    .lines()
    .any(|s| t.last().unwrap() == s.trim())
}


================================================
FILE: test-rt/test-nnef-cycle/Cargo.toml
================================================
[package]
name = "test-nnef-cycle"
version = "0.1.0"
edition = "2024"

[dependencies]

[build-dependencies]
infra = { path = "../infra" }
itertools.workspace = true
lazy_static.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-core.workspace = true
tract-transformers.workspace = true

[dev-dependencies]
infra = { path = "../infra" }
itertools.workspace = true
lazy_static.workspace = true
log.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }
tract-core.workspace = true
tract-transformers.workspace = true
tract-onnx-opl.workspace = true
tract-nnef.workspace = true


================================================
FILE: test-rt/test-nnef-cycle/build.rs
================================================
#[path = "suite.rs"]
mod suite;

fn main() {
    suite::suite().test_runtime(
        "nnef_cycle",
        "suite::suite()",
        "runtime()",
        "Approximation::Approximate",
    );
}


================================================
FILE: test-rt/test-nnef-cycle/src/lib.rs
================================================
#![cfg(test)]
use std::fmt::Debug;

use log::*;
use tract_nnef::internal::*;
use tract_onnx_opl::*;

#[path = "../suite.rs"]
mod suite;

mod nnef_predump {
    use super::*;

    #[allow(dead_code)]
    struct NnefPredumpRuntime(Nnef);

    impl Debug for NnefPredumpRuntime {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            write!(f, "NnefPredumpRuntime")
        }
    }

    impl Runtime for NnefPredumpRuntime {
        fn name(&self) -> StaticName {
            "nnef_predump".into()
        }

        fn prepare_with_options(
            &self,
            mut model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            tract_nnef::ser::rewrite_model(&mut model)?;
            Ok(Box::new(model.into_optimized()?.into_runnable_with_options(&options)?))
        }

        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    fn runtime() -> &'static NnefPredumpRuntime {
        lazy_static::lazy_static! {
            static ref RT: NnefPredumpRuntime = NnefPredumpRuntime(tract_nnef::nnef().with_onnx());
        };
        &RT
    }

    include!(concat!(env!("OUT_DIR"), "/tests/nnef_cycle.rs"));
}

mod nnef_cycle {
    use tract_transformers::WithTractTransformers;

    use super::*;

    struct NnefCyclingRuntime(Nnef);

    impl Debug for NnefCyclingRuntime {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            write!(f, "NnefCyclingRuntime")
        }
    }

    impl Runtime for NnefCyclingRuntime {
        fn name(&self) -> StaticName {
            "nnef_cycle".into()
        }

        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            info!("Store to NNEF");
            let mut buffer = vec![];
            // eprintln!("BEFORE NNEF:\n{model}");
            // dbg!(&model);
            self.0.write_to_tar(&model, &mut buffer)?;
            // self.0.write_to_dir(&model, "foo")?;
            info!("Reload from NNEF");
            let reloaded = self.0.model_for_read(&mut &*buffer)?;
            // eprintln!("RELOADED:\n{}", reloaded.clone().into_decluttered().unwrap());
            // dbg!(reloaded.clone().into_decluttered());
            Ok(Box::new(reloaded.into_optimized()?.into_runnable_with_options(&options)?))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    fn runtime() -> &'static NnefCyclingRuntime {
        lazy_static::lazy_static! {
            static ref RT: NnefCyclingRuntime = NnefCyclingRuntime(tract_nnef::nnef().with_onnx().with_tract_transformers());
        };
        &RT
    }

    include!(concat!(env!("OUT_DIR"), "/tests/nnef_cycle.rs"));
}


================================================
FILE: test-rt/test-nnef-cycle/suite.rs
================================================
use infra::Test;
use suite_unit::bin_einsum::{BinEinsumProblem, BinEinsumProblemParams};
use suite_unit::conv_q::{QConvProblem, QConvProblemParams};

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);

    let mut unit = suite_unit::suite().unwrap().clone();
    unit.ignore(&|name| name.len() == 2 && name[0] == "bin_einsum" && name[1] == "proptest");
    unit.get_sub_mut("bin_einsum").add_arbitrary::<BinEinsumProblem>(
        "proptest",
        BinEinsumProblemParams { max_dims: 7, ..Default::default() },
    );
    unit.ignore_case(&ignore_unit);
    unit.get_sub_mut("conv_q").add_arbitrary_with_filter::<QConvProblem>(
        "proptest",
        QConvProblemParams::default(),
        compatible_conv_q,
    );

    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn ignore_onnx(t: &[String]) -> bool {
    r#"
test_averagepool_2d_ceil
test_averagepool_2d_pads_count_include_pad
test_averagepool_2d_precomputed_pads_count_include_pad
test_averagepool_2d_same_lower
test_cast_STRING_to_FLOAT
test_castlike_STRING_to_FLOAT_expanded
test_constantlike_ones_with_input
test_constantlike_threes_with_shape_and_dtype
test_constantlike_zeros_without_input_dtype
test_cumsum_1d_exclusive
test_cumsum_1d_reverse_exclusive
test_cumsum_2d
test_dequantizelinear
test_dropout_random
test_dynamicquantizelinear
test_dynamicquantizelinear_max_adjusted
test_dynamicquantizelinear_min_adjusted
test_if
test_gemm_broadcast
test_gemm_nobroadcast
test_maxpool_2d_ceil
test_maxpool_2d_same_lower
test_maxpool_with_argmax_2d_precomputed_pads
test_mod_broadcast
test_mod_int64_fmod
test_mod_mixed_sign_float16
test_mod_mixed_sign_float32
test_mod_mixed_sign_float64
test_mod_mixed_sign_int16
test_mod_mixed_sign_int32
test_mod_mixed_sign_int64
test_mod_mixed_sign_int8
test_mod_uint16
test_mod_uint32
test_mod_uint64
test_mod_uint8
test_matmulinteger
test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded
test_nonzero_example
test_quantizelinear
test_qlinearmatmul_2D
test_qlinearmatmul_3D
test_reshape_reordered_dims
test_unsqueeze
"#
    .trim()
    .lines()
    .any(|s| t.last().unwrap() == s.trim())
}

fn ignore_unit(t: &[String], tc: &dyn Test) -> bool {
    if t[0] == "q_flavours" {
        return true;
    }
    if let Some(qcp) = tc.downcast_ref::<QConvProblem>() {
        return !compatible_conv_q(qcp);
    }
    false
}

fn compatible_conv_q(qcp: &QConvProblem) -> bool {
    qcp.qp.iter().all(|qp| qp.len() == 1)
}


================================================
FILE: test-rt/test-onnx-core/Cargo.toml
================================================
[package]
name = "test-onnx-core"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
edition = "2024"

[dependencies]
lazy_static.workspace = true
tract-core.workspace = true
tract-nnef.workspace = true
tract-onnx.workspace = true
suite-onnx = { path = "../suite-onnx" }

[features]
onnx_1_4_1 =  ["suite-onnx/onnx_1_4_1"]
onnx_1_5_0 =  ["suite-onnx/onnx_1_5_0"]
onnx_1_6_0 =  ["suite-onnx/onnx_1_6_0"]
onnx_1_7_0 =  ["suite-onnx/onnx_1_7_0"]
onnx_1_8_1 =  ["suite-onnx/onnx_1_8_1"]
onnx_1_9_0 =  ["suite-onnx/onnx_1_9_0"]
onnx_1_10_2 = ["suite-onnx/onnx_1_10_2"]
onnx_1_11_0 = ["suite-onnx/onnx_1_11_0"]
onnx_1_12_0 = ["suite-onnx/onnx_1_12_0"]
onnx_1_13_0 = ["suite-onnx/onnx_1_13_0"]
onnx_1_14_1 = ["suite-onnx/onnx_1_14_1"]
onnx_1_15_0 = ["suite-onnx/onnx_1_15_0"]
onnx_1_16_2 = ["suite-onnx/onnx_1_16_2"]
onnx_1_17_0 = ["suite-onnx/onnx_1_17_0"]
onnx_1_18_0 = ["suite-onnx/onnx_1_18_0"]
onnx_1_19_1 = ["suite-onnx/onnx_1_19_1"]
default = [ "onnx_1_13_0" ]

[build-dependencies]
suite-onnx = { path = "../suite-onnx" }


================================================
FILE: test-rt/test-onnx-core/build.rs
================================================
fn main() {
    let suite = suite_onnx::suite();
    suite.test_runtime("default", "suite_onnx::suite()", "default()", "Approximation::Approximate");
    suite.test_runtime(
        "unoptimized",
        "suite_onnx::suite()",
        "unoptimized()",
        "Approximation::Approximate",
    );
    suite.test_runtime("as_blas", "suite_onnx::suite()", "as_blas()", "Approximation::Approximate");
}


================================================
FILE: test-rt/test-onnx-core/debug-utils/Cargo.toml
================================================
[package]
name = "debug-utils"
version = "0.20.7-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
edition = "2024"

[workspace]
members = []

[dependencies]
protobuf = "*"
tract-onnx.workspace = true


================================================
FILE: test-rt/test-onnx-core/debug-utils/README.md
================================================


cargo run -- ../../../.cached/onnx/onnx/backend/test/data/real/test_inception_v1/inception_v1/model.onnx inception_v1_all_outputs.onnx

virtualenv -p python3 ort
source ./ort/bin/activate
pip install numpy onnx onnxruntime

python ./save_all.py inception_v1_all_outputs.onnx inception_v1_all_outputs.npz ../../../.cached/onnx/onnx/backend/test/data/real/test_inception_v1/inception_v1/test_data_0.npz


================================================
FILE: test-rt/test-onnx-core/debug-utils/save_all.py
================================================
import os
import sys
import numpy
import onnx
from onnx import numpy_helper
import onnxruntime as rt

model = sys.argv[1]
output_name = sys.argv[2]

print("model: " + model)
print("output: " + output_name)
sess = rt.InferenceSession(model)

known = {}
for i in range(3, len(sys.argv)):
    input_data = sys.argv[i]
    if input_data.endswith(".npz"):
        tensors = numpy.load(input_data)
        for name, array in tensors.items():
            known[name] = array
        name = sess.get_inputs()[i-3].name
    elif input_data.endswith(".pb"):
        new_tensor = onnx.TensorProto()
        with open(input_data, 'rb') as f:
            new_tensor.ParseFromString(f.read())
        name = new_tensor.name
        input_data = numpy_helper.to_array(new_tensor)

print("known: ", known)

inputs = {}
for input in sess.get_inputs():
    inputs[input.name] = known[input.name]

outputs = inputs.copy()
pred_onnx = sess.run(None, inputs)

for ix, output in enumerate(sess.get_outputs()):
    outputs[output.name] = pred_onnx[ix]

print("computed: ", outputs.keys())

for name, array in outputs.items():
    print(name)
    print(array)

os.mkdir(output_name)

for name in outputs:
    value = numpy_helper.from_array(outputs[name], name=name)
    with open(output_name + "/" + name + ".pb", 'wb') as f:
        f.write(value.SerializeToString())


================================================
FILE: test-rt/test-onnx-core/debug-utils/src/main.rs
================================================
use protobuf::Message;
use std::fs::File;
use tract_onnx::pb::{ModelProto, ValueInfoProto};

fn main() {
    let input = std::env::args().nth(1).unwrap();
    let output = std::env::args().nth(2).unwrap();
    let mut model =
        protobuf::parse_from_reader::<ModelProto>(&mut File::open(input).unwrap()).unwrap();
    let mut graph = model.take_graph();
    let all_outputs: Vec<tract_onnx::pb::ValueInfoProto> = graph
        .get_node()
        .iter()
        .flat_map(|n| {
            n.get_output().iter().map(|s| {
                let mut vip = ValueInfoProto::new();
                vip.set_name(s.to_string());
                vip
            })
        })
        .collect();
    graph.set_output(all_outputs.into());
    model.set_graph(graph);
    let mut f = File::create(output).unwrap();
    let mut stream = protobuf::stream::CodedOutputStream::new(&mut f);
    model.write_to(&mut stream).unwrap();
    stream.flush().unwrap();
}


================================================
FILE: test-rt/test-onnx-core/include-passing-ignored.sh
================================================
#!/bin/sh

cargo run


================================================
FILE: test-rt/test-onnx-core/src/bin/reset-test-list.rs
================================================
use std::collections::HashMap;
use std::io::{BufRead, Write};

const SETS: &[&str] = &["node", "real", "simple", "pytorch-operator", "pytorch-converted"];
const VERSIONS: &[&str] = &[
    "1.4.1", "1.5.0", "1.6.0", "1.7.0", "1.8.1", "1.9.0", "1.10.2", "1.11.0", "1.12.0", "1.13.0",
    "1.14.1", "1.15_0", "1_16_2", "1_17_0", "1_18_0", "1_19_1",
];

// const SETS: &[&str] = &["node"];
// const VERSIONS: &[&str] = &["1.4.1"];

fn run_set(set: &str, ver: &str) -> HashMap<String, usize> {
    let filter = format!("{set}_{ver}::").replace(['.', '-'].as_ref(), "_");
    let mut command = std::process::Command::new("cargo");
    command.arg("test").arg("--all-features");
    if set == "real" {
        command.arg("--release");
    }
    command.arg("--").arg("--ignored").arg(filter);
    let output = command.output().unwrap();
    let mut unexpected: HashMap<String, usize> = HashMap::default();
    for line in std::io::BufReader::new(&mut &*output.stdout).lines() {
        let line = line.unwrap();
        if line.ends_with("ok") {
            let test_id =
                line.split_whitespace().nth(1).unwrap().split("::").nth(2).unwrap().to_string();
            let level = line.split_whitespace().nth(1).unwrap().split("::").nth(1).unwrap();
            let level = match level {
                "nnef" => 3,
                "optim" => 2,
                "plain" => 1,
                _ => panic!(),
            };
            let entry = unexpected.entry(test_id.clone()).or_insert(0);
            *entry = (*entry).max(level);
        }
    }
    unexpected
}

fn process_unexpected(set: &str, ver: &str, unexpected: HashMap<String, usize>) {
    let file = format!("{set}-{ver}.txt");
    eprintln!("## {file} ##");
    let mut specs: HashMap<String, String> = HashMap::new();
    for line in std::fs::read_to_string(&file).unwrap().lines() {
        let test = line.split_whitespace().next().unwrap().to_string();
        let entry = specs.entry(test).or_default();
        if entry.len() < line.len() {
            *entry = line.to_string();
        }
    }
    for (test_id, level) in unexpected.into_iter() {
        eprintln!("* {test_id} level: {level}");
        let spec = specs
            .entry(test_id.to_string())
            .or_insert_with(|| format!("{test_id} not-nnef not-typable"));
        if level >= 3 {
            *spec =
                spec.split_whitespace().filter(|t| t != &"not-nnef").collect::<Vec<_>>().join(" ");
        }
        if level >= 2 {
            *spec = spec
                .split_whitespace()
                .filter(|t| t != &"not-typable")
                .collect::<Vec<_>>()
                .join(" ");
        }
    }
    let mut file = std::fs::OpenOptions::new().write(true).truncate(true).open(file).unwrap();
    let mut specs: Vec<String> = specs.into_iter().map(|e| e.1).collect();
    specs.sort();
    let buffer = specs.join("\n");
    file.write_all(buffer.as_bytes()).unwrap();
}

fn main() {
    let mut sets: HashMap<(&str, &str), _> = HashMap::default();
    for &set in SETS {
        for &ver in VERSIONS {
            eprintln!("Running {set} {ver}");
            sets.insert((set, ver), run_set(set, ver));
        }
    }
    for ((set, ver), unexpected) in sets.into_iter() {
        process_unexpected(set, ver, unexpected);
    }
}


================================================
FILE: test-rt/test-onnx-core/src/lib.rs
================================================
#![cfg(test)]
use tract_core::internal::*;

mod default {
    use super::*;
    pub fn default() -> &'static DefaultRuntime {
        &DefaultRuntime
    }
    include!(concat!(env!("OUT_DIR"), "/tests/default.rs"));
}

mod unoptimized {
    use super::*;

    pub fn unoptimized() -> &'static UnoptimizedRuntime {
        &UnoptimizedRuntime
    }

    #[derive(Debug)]
    pub struct UnoptimizedRuntime;

    impl Runtime for UnoptimizedRuntime {
        fn name(&self) -> StaticName {
            Cow::Borrowed("unoptimized")
        }
        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            Ok(Box::new(model.into_runnable_with_options(options)?))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    include!(concat!(env!("OUT_DIR"), "/tests/unoptimized.rs"));
}


================================================
FILE: test-rt/test-tflite/Cargo.toml
================================================
[package]
name = "test-tflite"
version = "0.1.0"
edition = "2024"

[dependencies]

[build-dependencies]
home.workspace = true
lazy_static.workspace = true
regex.workspace = true
infra = { path = "../infra" }
tract-core.workspace = true
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }

[dev-dependencies]
home.workspace = true
regex.workspace = true
lazy_static.workspace = true
log.workspace = true
tflitec.workspace = true
tract-core.workspace = true
tract-tflite.workspace = true
tract-onnx-opl.workspace = true
infra = { path = "../infra" }
suite-onnx = { path = "../suite-onnx" }
suite-unit = { path = "../suite-unit" }


================================================
FILE: test-rt/test-tflite/build.rs
================================================
#[path = "suite.rs"]
mod suite;

fn main() {
    suite::suite().test_runtime(
        "tests",
        "suite::suite()",
        "runtime()",
        "Approximation::Approximate",
    );
}


================================================
FILE: test-rt/test-tflite/src/lib.rs
================================================
#![cfg(test)]
use log::*;
use tract_tflite::Tflite;
use tract_tflite::internal::*;

#[path = "../suite.rs"]
mod suite;

mod tflite_runtime;

mod tflite_predump {
    use super::*;
    #[derive(Debug)]
    #[allow(dead_code)]
    struct TflitePredump(Tflite);

    impl Runtime for TflitePredump {
        fn name(&self) -> StaticName {
            "tflite-predump".into()
        }

        fn prepare_with_options(
            &self,
            mut model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            tract_tflite::rewriter::rewrite_for_tflite(&mut model).context("Preparing model")?;
            Ok(Box::new(model.into_runnable_with_options(&options)?))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    fn runtime() -> &'static TflitePredump {
        lazy_static::lazy_static! {
            static ref RT: TflitePredump = TflitePredump(Tflite::default());
        };
        &RT
    }

    include!(concat!(env!("OUT_DIR"), "/tests/tests.rs"));
}

mod tflite_cycle {
    use tract_tflite::internal::tract_core::ops::dummy::Dummy;

    use super::*;
    #[derive(Debug)]
    struct TfliteCyclingRuntime(Tflite);

    impl Runtime for TfliteCyclingRuntime {
        fn name(&self) -> StaticName {
            "tflite-cycle".into()
        }

        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            info!("Store to Tflite");
            let mut buffer = vec![];
            self.0.write(&model, &mut buffer).context("Translating model to tflite")?;
            info!("Reload from Tflite");
            let mut reloaded =
                self.0.model_for_read(&mut &*buffer).context("Reloading model from tflite")?;
            for i in 0..model.inputs.len() {
                if model.input_fact(i)? != reloaded.input_fact(i)?
                    && model.input_fact(i)?.datum_type.unquantized()
                        == reloaded.input_fact(i)?.datum_type.unquantized()
                {
                    let old_source_outlet = reloaded.inputs[i];
                    let name = reloaded.node(old_source_outlet.node).name.clone();
                    let new_source = reloaded.add_source(&name, model.input_fact(i)?.clone())?;
                    let wire = reloaded.wire_node(
                        format!("{name}.qp"),
                        tract_core::ops::cast::cast(reloaded.input_fact(i)?.datum_type),
                        &[new_source],
                    )?[0];
                    reloaded.inputs.pop();
                    reloaded.inputs[i] = new_source;
                    let succs = reloaded.node(old_source_outlet.node).outputs[0].successors.clone();
                    for succ in succs {
                        reloaded.add_edge(wire, succ)?;
                    }
                    for output in &mut reloaded.outputs {
                        if *output == old_source_outlet {
                            *output = new_source;
                        }
                    }
                    reloaded.nodes[old_source_outlet.node].name.push_str(".old");
                    reloaded.nodes[old_source_outlet.node].op = Box::new(Dummy);
                }
            }
            Ok(Box::new(
                reloaded
                    .into_optimized()
                    .context("Optimising post-cycle model")?
                    .into_runnable_with_options(options)?,
            ))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    fn runtime() -> &'static TfliteCyclingRuntime {
        lazy_static::lazy_static! {
            static ref RT: TfliteCyclingRuntime = TfliteCyclingRuntime(Tflite::default());
        };
        &RT
    }

    include!(concat!(env!("OUT_DIR"), "/tests/tests.rs"));
}


================================================
FILE: test-rt/test-tflite/src/tflite_runtime.rs
================================================
use std::fmt::Debug;

use tflitec::interpreter::Interpreter;
use tflitec::model::Model;
use tflitec::tensor::DataType;

use super::*;

struct TfliteRuntime(Tflite);

impl Debug for TfliteRuntime {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "TfliteRuntime")
    }
}

impl Runtime for TfliteRuntime {
    fn name(&self) -> StaticName {
        "tflite".into()
    }

    fn prepare_with_options(
        &self,
        model: TypedModel,
        _options: &RunOptions,
    ) -> TractResult<Box<dyn Runnable>> {
        let mut buffer = vec![];
        self.0.write(&model, &mut buffer).context("Translating model to tflite")?;
        // std::fs::write("foo.tflite", &buffer)?;
        Ok(Box::new(TfliteRunnable(buffer, Arc::new(model))))
    }
    fn check(&self) -> TractResult<()> {
        Ok(())
    }
}

#[derive(Clone)]
struct TfliteRunnable(Vec<u8>, Arc<TypedModel>);

impl Runnable for TfliteRunnable {
    fn spawn(&self) -> TractResult<Box<dyn State>> {
        Ok(Box::new(TfliteState(self.clone())))
    }

    fn input_count(&self) -> usize {
        self.1.inputs.len()
    }

    fn output_count(&self) -> usize {
        self.1.outputs.len()
    }

    fn typed_model(&self) -> Option<&Arc<TypedModel>> {
        Some(&self.1)
    }

    fn typed_plan(&self) -> Option<&Arc<TypedSimplePlan>> {
        None
    }
}

impl Debug for TfliteRunnable {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "TfliteRunnable")
    }
}

#[derive(Debug)]
struct TfliteState(TfliteRunnable);

impl State for TfliteState {
    fn run(&mut self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let model = Model::from_bytes(&self.0.0)?;
        let interpreter = Interpreter::new(&model, None)?;
        interpreter.allocate_tensors()?;
        ensure!(inputs.len() == interpreter.input_tensor_count());
        for (ix, input) in inputs.iter().enumerate() {
            let input_tensor = interpreter.input(ix)?;
            assert_eq!(input_tensor.shape().dimensions(), input.shape());
            input_tensor.set_data(input.as_bytes())?;
        }
        interpreter.invoke()?;
        let mut outputs = tvec![];
        for ix in 0..interpreter.output_tensor_count() {
            let output_tensor = interpreter.output(ix)?;
            let dt = match output_tensor.data_type() {
                DataType::Float32 => f32::datum_type(),
                DataType::Bool => bool::datum_type(),
                DataType::Int64 => i64::datum_type(),
                DataType::Uint8 => {
                    if let Some(qp) = output_tensor.quantization_parameters() {
                        u8::datum_type().quantize(QParams::ZpScale {
                            zero_point: qp.zero_point,
                            scale: qp.scale,
                        })
                    } else {
                        u8::datum_type()
                    }
                }
                DataType::Int8 => {
                    if let Some(qp) = output_tensor.quantization_parameters() {
                        i8::datum_type().quantize(QParams::ZpScale {
                            zero_point: qp.zero_point,
                            scale: qp.scale,
                        })
                    } else {
                        i8::datum_type()
                    }
                }
                _ => bail!("unknown type in tract tflitec test Runtime"),
            };
            let tensor = unsafe {
                Tensor::from_raw_dt(dt, output_tensor.shape().dimensions(), output_tensor.data())?
            };
            outputs.push(tensor.into_tvalue());
        }
        Ok(outputs)
    }

    fn input_count(&self) -> usize {
        self.0.input_count()
    }

    fn output_count(&self) -> usize {
        self.0.input_count()
    }

    fn runnable(&self) -> &dyn Runnable {
        todo!()
    }

    fn freeze(&self) -> Box<dyn FrozenState> {
        todo!()
    }
}

fn runtime() -> &'static TfliteRuntime {
    lazy_static::lazy_static! {
        static ref RT: TfliteRuntime = TfliteRuntime(Tflite::default());
    };
    &RT
}

include!(concat!(env!("OUT_DIR"), "/tests/tests.rs"));

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_trivial() -> TractResult<()> {
        let mut model = TypedModel::default();
        let wire = model.add_source("x", f32::fact([1]))?;
        model.select_output_outlets(&[wire])?;
        let out = runtime().prepare(model)?.run(tvec!(tensor1(&[0f32]).into_tvalue()))?.remove(0);
        assert_eq!(out, tensor1(&[0f32]).into_tvalue());
        Ok(())
    }
}


================================================
FILE: test-rt/test-tflite/suite.rs
================================================
use infra::Test;
use regex::Regex;
use suite_unit::bin_einsum::{BinEinsumProblem, BinEinsumProblemParams};
use suite_unit::conv_f32::{ConvProblem, ConvProblemParams};
use suite_unit::conv_q::{QConvProblem, QConvProblemParams};
use tract_core::internal::*;

pub fn suite() -> &'static infra::TestSuite {
    lazy_static::lazy_static! {
        static ref SUITE: infra::TestSuite  = mk_suite();
    };
    &SUITE
}

#[allow(clippy::needless_update)]
fn mk_suite() -> infra::TestSuite {
    let mut onnx = suite_onnx::suite().clone();
    onnx.ignore(&ignore_onnx);
    onnx.skip(&skip_onnx);

    let mut unit = suite_unit::suite().unwrap().clone();
    unit.ignore_case(&ignore_unit);
    let cv = ConvProblemParams {
        no_group: true,
        no_dilations: true,
        geo_rank: Some(1..3),
        ..ConvProblemParams::default()
    };
    unit.get_sub_mut("conv_f32").add_arbitrary::<ConvProblem>("proptest", cv.clone());
    unit.get_sub_mut("conv_q").add_arbitrary_with_filter::<QConvProblem>(
        "proptest",
        QConvProblemParams { conv: cv, tflite_rules: true, ..QConvProblemParams::default() },
        compatible_conv_q,
    );

    let einsum_params = BinEinsumProblemParams { max_dims: 4, ..BinEinsumProblemParams::default() };
    unit.get_sub_mut("bin_einsum")
        .add_arbitrary::<BinEinsumProblem>("proptest", einsum_params.clone());
    infra::TestSuite::default().with("onnx", onnx).with("unit", unit)
}

fn patterns(s: &str) -> Vec<Regex> {
    s.trim()
        .lines()
        .map(|s| s.split_once('#').map(|(left, _)| left).unwrap_or(s).trim())
        .filter(|s| !s.is_empty())
        .map(|pat| Regex::new(pat).unwrap())
        .collect()
}

fn ignore_onnx(t: &[String]) -> bool {
    let name = t.last().unwrap();
    let included = patterns(
        "
        _conv_
        Conv1d
        Conv2d

        test_averagepool_2d
        test_maxpool_2d

        squeeze
        _transpose_
        test_concat
        test_flatten
        test_reshape
        test_slice
        test_split

        test_where
        test_less
        test_greater
        test_equal
        test_not

        test_add
        test_mul
        test_sub
        test_div
        test_and
        test_or

        test_reduce
        test_softmax

        test_abs
        test_ceil
        test_exp
        test_floor
        test_log
        test_reciprocal
        test_square
        test_sqrt
        test_rsqrt

        test_cos
        test_sin
        # lol, no tan :)

        test_clip
        test_batchnorm
        test_hardswish
        test_leakyrelu
        test_prelu
        test_relu
        test_selu
        test_sigmoid
        test_tanh
        test_thresholdrelu
        ",
    );
    let excluded = patterns(
        "
            test_slice_start_out_of_bounds
            test_Conv1d_groups
            test_Conv2d_groups
            test_Conv1d_depthwise_with_multiplier
            test_Conv2d_depthwise_with_multiplier
            test_Conv2d_groups_thnn
            test_reshape_allowzero_reordered
            test_split_zero_size
            test_mul_uint8
            test_div_uint8
            test_reduce_log_sum_exp.*           # tflite does not support f64 reducers 🤷
            pool_2d_ceil
            pool_2d_pads
            pool_2d_precomputed_pads_count_include_pad
            pool_2d_same_lower
            test_cosh.*
            test_sinh.*
            ",
    );
    !included.iter().any(|pat| pat.is_match(name)) || excluded.iter().any(|pat| pat.is_match(name))
}

// We must *never* run these, even in --ignored mode, as they trigger buggy aborts in tflite runtime!
fn skip_onnx(t: &[String]) -> bool {
    let name = t.last().unwrap();
    let excluded = "
            test_clip_default_int8_max_expanded
            test_clip_default_int8_min_expanded
            test_BatchNorm3d_eval
            test_BatchNorm3d_momentum_eval
            test_PReLU_3d
            ";
    excluded.split_whitespace().any(|s| s == name)
}

fn ignore_unit(t: &[String], case: &dyn Test) -> bool {
    #[allow(clippy::collapsible_if)]
    if let Some(cp) = case.downcast_ref::<ConvProblem>() {
        if !compatible_conv_f32(cp) {
            return true;
        }
    }
    #[allow(clippy::collapsible_if)]
    if let Some(qcp) = case.downcast_ref::<QConvProblem>() {
        if !compatible_conv_q(qcp) {
            return true;
        }
    }

    if t[0] == "bin_einsum" && t[1] == "proptest" {
        return true;
    }

    let [section, _unit] = t else { return false };
    [
        "apply_rope",
        "binary",
        "conv_f16",
        "deconv",
        "elmwise",
        "gelu_approximate",
        "q_flavours",
        "q_binary",
        "q_elmwise",
        "matmul_q40",
        "rms_norm",
        "scaled_masked_softmax",
        "sdpa",
        "silu",
    ]
    .contains(&&**section)
}

fn compatible_conv_f32(qcp: &ConvProblem) -> bool {
    qcp.group == 1
        && (qcp.kernel.ndim() == 4 || qcp.kernel.ndim() == 3)
        && qcp.dilations.iter().all(|d| *d == 1)
}

fn compatible_conv_q(qcp: &QConvProblem) -> bool {
    if qcp.group != 1 {
        return false;
    }
    let idt = qcp.data.datum_type();
    let kdt = qcp.kernel.datum_type();
    let odt = qcp.raw_output_dt;
    if odt != idt.unquantized() {
        return false;
    }

    // all u8 and per-layer
    if idt.unquantized() == u8::datum_type()
        && kdt.unquantized() == u8::datum_type()
        && qcp.qp.iter().all(|qp| qp.is_uniform())
    {
        return true;
    }
    // all i8 and no zero_point
    if idt.unquantized() == i8::datum_type()
        && kdt.unquantized() == i8::datum_type()
        && qcp.qp[0].is_zero().unwrap()
        && qcp.qp[2].is_zero().unwrap()
        && qcp.qp[4].is_zero().unwrap()
    {
        return true;
    }
    false
}


================================================
FILE: test-rt/test-unit-core/Cargo.toml
================================================
[package]
name = "test-unit-core"
version = "0.1.0"
edition = "2024"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
tract-core.workspace = true

[dev-dependencies]
suite-unit = { path = "../suite-unit" }

[build-dependencies]
suite-unit = { path = "../suite-unit" }


================================================
FILE: test-rt/test-unit-core/build.rs
================================================
fn main() {
    let suite = suite_unit::suite().unwrap();
    suite.test_runtime(
        "raw",
        "suite_unit::suite().unwrap()",
        "raw()",
        "Approximation::Approximate",
    );
    suite.test_runtime(
        "decluttered",
        "suite_unit::suite().unwrap()",
        "decluttered()",
        "Approximation::Approximate",
    );
    suite.test_runtime(
        "optimized",
        "suite_unit::suite().unwrap()",
        "optimized()",
        "Approximation::Approximate",
    );
}


================================================
FILE: test-rt/test-unit-core/src/lib.rs
================================================
#![cfg(test)]
use tract_core::internal::*;

mod raw {
    use super::*;

    pub fn raw() -> &'static RawRuntime {
        &RawRuntime
    }

    #[derive(Debug)]
    pub struct RawRuntime;

    impl Runtime for RawRuntime {
        fn name(&self) -> StaticName {
            Cow::Borrowed("raw")
        }
        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            Ok(Box::new(model.into_runnable_with_options(options)?))
        }

        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    include!(concat!(env!("OUT_DIR"), "/tests/raw.rs"));
}

mod decluttered {
    use super::*;

    pub fn decluttered() -> &'static DeclutteredRuntime {
        &DeclutteredRuntime
    }

    #[derive(Debug)]
    pub struct DeclutteredRuntime;

    impl Runtime for DeclutteredRuntime {
        fn name(&self) -> StaticName {
            Cow::Borrowed("decluttered")
        }
        fn prepare_with_options(
            &self,
            model: TypedModel,
            options: &RunOptions,
        ) -> TractResult<Box<dyn Runnable>> {
            Ok(Box::new(model.into_decluttered()?.into_runnable_with_options(options)?))
        }
        fn check(&self) -> TractResult<()> {
            Ok(())
        }
    }

    include!(concat!(env!("OUT_DIR"), "/tests/decluttered.rs"));
}

mod optimized {
    use super::*;

    pub fn optimized() -> &'static DefaultRuntime {
        &DefaultRuntime
    }
    include!(concat!(env!("OUT_DIR"), "/tests/optimized.rs"));
}


================================================
FILE: test-rt/test-unit-core/src/main.rs
================================================
fn main() {
    println!("Hello, world!");
}


================================================
FILE: test-suite.sh
================================================
#!/bin/sh
exec ./.travis/native.sh


================================================
FILE: tflite/Cargo.toml
================================================
[package]
name = "tract-tflite"
version = "0.23.0-pre"
authors = ["Mathieu Poumeyrol <kali@zoy.org>"]
license = "MIT OR Apache-2.0"
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
edition = "2021"

[dependencies]
derive-new.workspace = true
flatbuffers.workspace = true
tract-core.workspace = true

[features]
complex = []


================================================
FILE: tflite/Readme.md
================================================
# tract-tflite

unimplemented, sausage is being made. If you want to help feel free to open a PR.

## Notes and Relevant Links

[link to the tflite c api](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/c)

[link to the related issue](https://github.com/sonos/tract/issues/1086)

The generated code handles creating a model from a flatbuffer table. Right now the main task (as far as I understand) is to start adding the code to build a Tract Model from the ModelBuffer.

So the modelBuffer(the model read from a flatbuffer file) has a few components (with associated functions) worth looking at: operator_codes, subgraphs, and then buffers.

- subgraphs are likely the primary thing needed to create a tract model
  - composed of tensors, inputs,outputs, operators, and a name
  - input and output are fairly small vectors, I suspect they may be indices
- buffers are sometimes empty (why?)

## Metadata

- [tensorflow docs on metadate, has information on subgraphs as well](https://www.tensorflow.org/lite/models/convert/metadata)

## Tensors

- probably need to convert from the generated datatypes to Tract's [DatumType](https://github.com/skewballfox/tract/blob/300db595a1ffe3088658643b694b41aaac71ee76/data/src/datum.rs#L121). it's in the toplevel data crate.
  - this is part of the depenendency tract-core
- [SO: what a variant tensor?](https://stackoverflow.com/questions/58899763/what-is-a-dt-variant-tensor)

### Operators

- the list of builtin Operators can be found in the [generated tflite schema](./src/tflite_generated.rs) around line 443 in the const array `ENUM_VALUES_BUILTIN_OPERATOR: [BuiltinOperator; 162]`.
- the official docs on supported supset of tensorflow operators in [TFLite](https://www.tensorflow.org/lite/guide/op_select_allowlist)
- the [tflite c code](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/c)

### Subgraphs

Right now, I'm testing with a specific model under test data, so this might not generalize to other models. If you open the model in [netron](netron.app), you'll find 3 separate graphs: main, sequential/net/while_cond, and sequential/net/while_body.

In the main graph, node 10 is just listed as while, but it's actually composed of the other subgraphs.

### scratchpad

I created a [repository for the sole purpose of poking around with tflite models](https://github.com/skewballfox/tflite_scratch), if you would like to add a model for testing please put it inside test data, and add any test input to lfs. If you write some utility that would be useful for others contributers, feel free to add it. Otherwise just clone it and forget it, it's just trow-away code.


================================================
FILE: tflite/schema/tflite.fbs
================================================
// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Revision History
// Version 0: Initial version.
// Version 1: Add subgraphs to schema.
// Version 2: Rename operators to conform to NN API.
// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
// Version 3a: Add new builtin op code field. Has backward compatibility with
//             version 3.
// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
//             version 3 and 3a.

namespace tflite;

// This corresponds to the version.
file_identifier "TFL3";
// File extension of any written files.
file_extension "tflite";

// IMPORTANT: All new members of tables, enums and unions must be added at the
// end to ensure backwards compatibility.

// The type of data stored in a tensor.
enum TensorType : byte {
  FLOAT32 = 0,
  FLOAT16 = 1,
  INT32 = 2,
  UINT8 = 3,
  INT64 = 4,
  STRING = 5,
  BOOL = 6,
  INT16 = 7,
  COMPLEX64 = 8,
  INT8 = 9,
  FLOAT64 = 10,
  COMPLEX128 = 11,
  UINT64 = 12,
  // Experimental: Resource and variant types are experimental, that are subject
  // to change. Do not implement custom kernels using resource & variant types
  // now.
  RESOURCE = 13,
  VARIANT = 14,
  UINT32 = 15,
  UINT16 = 16,
  INT4 = 17,
}

// Custom quantization parameters for experimenting with new quantization
// techniques.
table CustomQuantization {
  custom:[ubyte] (force_align: 16);
}

// Represents a specific quantization technique's parameters.
union QuantizationDetails {
  CustomQuantization,
}

// Parameters for converting a quantized tensor back to float.
table QuantizationParameters {
  // These four parameters are the asymmetric linear quantization parameters.
  // Given a quantized value q, the corresponding float value f should be:
  //   f = scale * (q - zero_point)
  // For other quantization types, the QuantizationDetails below is used.
  min:[float];  // For importing back into tensorflow.
  max:[float];  // For importing back into tensorflow.
  scale:[float];  // For dequantizing the tensor's values.
  zero_point:[long];

  // If this is not none, the other quantization parameters (i.e. min, max,
  // scale, zero_point fields above) are ignored and the value of the
  // QuantizationDetails union should be used.
  details:QuantizationDetails;

  // Specifies the dimension of the Tensor's shape that the scales and
  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
  // with quantization params:
  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
  // will be quantized across the second dimension of t.
  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
  quantized_dimension:int;
}

// Sparse tensors.
// We use a modification of the TACO format.
// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
//
// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
// potentially with a k-dimensional block (0 <= k <= n) with dims
// (dn, ..., dn+k-1), the format needs to specify:
//   1. In what order to traverse these dimensions. For example, to store a 2-D
//      matrix in row major order, the traversal order would be (d0, d1),
//      whereas to store it in column major order, the traversal order would be
//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
//      could be (d0, d1, d2, d3).
//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
//      tensor dimension in (d0, ..., dn-1).
//   3. In the traversal order defined above, the format (dense vs. sparse) and
//      index metadata for each dimension. For a dense dimension, this is just
//      the size of that dimension. For a sparse dimension, it's the same as
//      the compressed index defined in the Compressed Sparse Row (CSR) format.
//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)

// The storage type for a dimension. Currently we support:
//   1. DENSE: each coordinate in this dimension is stored implicitly.
//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
//      compression technique is the same what CSR uses.
// More types like a sparse dimension with a different compression technique
// could be added to the list in the future.
enum DimensionType : byte {
  DENSE = 0,
  SPARSE_CSR = 1,
}

table Int32Vector {
  values:[int];
}

table Uint16Vector {
  values:[ushort] (force_align: 4);
}

table Uint8Vector {
  values:[ubyte] (force_align: 4);
}

// Variable-typed buffer to store the index metadata for a sparse dimension.
// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
// vector. We don't want the per-dimensional index to overflow that range.
union SparseIndexVector {
  Int32Vector,
  Uint16Vector,
  Uint8Vector
}

table DimensionMetadata {
  // Whether a dimension is dense or sparse.
  format:DimensionType;
  // Index metadata used for a dimension.
  //   - If format is DimensionType.DENSE then we use the dense_size field to
  //     store the size of that dimension. Each index in that dimension is
  //     stored implicitly.
  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
  //     array_indices to encode that dimension. array_segments represents how
  //     to segment the indices array, each segment corresponds to one element
  //     in the previous dimension. array_indices represents the index of the
  //     non-zero elements within this dimension (as those in the CSR matrix
  //     format, where the first array is row pointers and the second array is
  //     column indices).
  dense_size:int;
  array_segments:SparseIndexVector;
  array_indices:SparseIndexVector;
}

// Parameters to encode a sparse TfLite tensor.
table SparsityParameters {
  // The traversal order of the dimensions defined in the `shape` field of the
  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
  // ..., dn-1),
  //   - if not block sparse, the traversal_order is just a permutation of (d0,
  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
  //     have traversal_order = (d0, d1).
  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
  //     traversal_order has n + k elements. The first n elements are still a
  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
  //     would have traversal_order = (d0, d1, d2, d3).
  traversal_order:[int];
  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
  // tensor dimension in (d0, ..., dn).
  // It's stored in the order of (dn, ..., dn+k-1).
  // If not block-sparse, this field is NULL.
  block_map:[int];
  // In the traversal order defined above, the metadata needed for
  // each dimension to locate the non-zero values in the original dense tensor.
  // The size of the dim_metadata array = the size of the traversal_order array
  // = n + k.
  dim_metadata:[DimensionMetadata];
}

// The nested tensor type for VARIANT type.
table VariantSubType {
  // The tensor shape.
  shape:[int];
  type:TensorType;
  // If false, the rank or the number of tensor dimensions is unknown.
  // If false, "shape" must be [].
  has_rank: bool = false;
}

table Tensor {
  // The tensor shape. The meaning of each entry is operator-specific but
  // builtin ops use: [batch size, height, width, number of channels] (That's
  // Tensorflow's NHWC).
  shape:[int];
  type:TensorType;
  // An index that refers to the buffers table at the root of the model. Or,
  // if there is no data buffer associated (i.e. intermediate results), then
  // this is 0 (which refers to an always existent empty buffer).
  //
  // The data_buffer itself is an opaque container, with the assumption that the
  // target device is little-endian. In addition, all builtin operators assume
  // the memory is ordered such that if `shape` is [4, 3, 2], then index
  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
  buffer:uint;
  name:string;  // For debugging and importing back into tensorflow.
  quantization:QuantizationParameters;  // Optional.

  is_variable:bool = false;

  // Parameters to encode a sparse tensor. See the example in
  // tensorflow/lite/testdata/sparse_tensor.json.
  sparsity:SparsityParameters;  // Optional.

  // Encodes `shape` with unknown dimensions. Unknown dimensions are
  // represented with -1.
  shape_signature:[int]; // Optional.

  // If false, the rank or the number of tensor dimensions is unknown.
  // If false, "shape" must be [].
  has_rank: bool = false;

  // The nested Tensor types for VARIANT type. This is always empty for
  // non-VARIANT types. This is optional because the nested type can be omitted.
  // Currently only 1 subtype is supported. The field is defined as an array for
  // flexibility of supporting multiple subtypes in the future.
  variant_tensors:[VariantSubType];
}

// A list of builtin operators. Builtin operators are slightly faster than custom
// ones, but not by much. Moreover, while custom operators accept an opaque
// object containing configuration parameters, builtins have a predetermined
// set of acceptable options.
// LINT.IfChange
enum BuiltinOperator : int32 {
  ADD = 0,
  AVERAGE_POOL_2D = 1,
  CONCATENATION = 2,
  CONV_2D = 3,
  DEPTHWISE_CONV_2D = 4,
  DEPTH_TO_SPACE = 5,
  DEQUANTIZE = 6,
  EMBEDDING_LOOKUP = 7,
  FLOOR = 8,
  FULLY_CONNECTED = 9,
  HASHTABLE_LOOKUP = 10,
  L2_NORMALIZATION = 11,
  L2_POOL_2D = 12,
  LOCAL_RESPONSE_NORMALIZATION = 13,
  LOGISTIC = 14,
  LSH_PROJECTION = 15,
  LSTM = 16,
  MAX_POOL_2D = 17,
  MUL = 18,
  RELU = 19,
  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
  // since different model developers use RELU1 in different ways. Never
  // create another op called RELU1.
  RELU_N1_TO_1 = 20,
  RELU6 = 21,
  RESHAPE = 22,
  RESIZE_BILINEAR = 23,
  RNN = 24,
  SOFTMAX = 25,
  SPACE_TO_DEPTH = 26,
  SVDF = 27,
  TANH = 28,
  CONCAT_EMBEDDINGS = 29,
  SKIP_GRAM = 30,
  CALL = 31,
  CUSTOM = 32,
  EMBEDDING_LOOKUP_SPARSE = 33,
  PAD = 34,
  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
  GATHER = 36,
  BATCH_TO_SPACE_ND = 37,
  SPACE_TO_BATCH_ND = 38,
  TRANSPOSE = 39,
  MEAN = 40,
  SUB = 41,
  DIV = 42,
  SQUEEZE = 43,
  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
  STRIDED_SLICE = 45,
  BIDIRECTIONAL_SEQUENCE_RNN = 46,
  EXP = 47,
  TOPK_V2 = 48,
  SPLIT = 49,
  LOG_SOFTMAX = 50,
  // DELEGATE is a special op type for the operations which are delegated to
  // other backends.
  // WARNING: Experimental interface, subject to change
  DELEGATE = 51,
  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
  CAST = 53,
  PRELU = 54,
  MAXIMUM = 55,
  ARG_MAX = 56,
  MINIMUM = 57,
  LESS = 58,
  NEG = 59,
  PADV2 = 60,
  GREATER = 61,
  GREATER_EQUAL = 62,
  LESS_EQUAL = 63,
  SELECT = 64,
  SLICE = 65,
  SIN = 66,
  TRANSPOSE_CONV = 67,
  SPARSE_TO_DENSE = 68,
  TILE = 69,
  EXPAND_DIMS = 70,
  EQUAL = 71,
  NOT_EQUAL = 72,
  LOG = 73,
  SUM = 74,
  SQRT = 75,
  RSQRT = 76,
  SHAPE = 77,
  POW = 78,
  ARG_MIN = 79,
  FAKE_QUANT = 80,
  REDUCE_PROD = 81,
  REDUCE_MAX = 82,
  PACK = 83,
  LOGICAL_OR = 84,
  ONE_HOT = 85,
  LOGICAL_AND = 86,
  LOGICAL_NOT = 87,
  UNPACK = 88,
  REDUCE_MIN = 89,
  FLOOR_DIV = 90,
  REDUCE_ANY = 91,
  SQUARE = 92,
  ZEROS_LIKE = 93,
  FILL = 94,
  FLOOR_MOD = 95,
  RANGE = 96,
  RESIZE_NEAREST_NEIGHBOR = 97,
  LEAKY_RELU = 98,
  SQUARED_DIFFERENCE = 99,
  MIRROR_PAD = 100,
  ABS = 101,
  SPLIT_V = 102,
  UNIQUE = 103,
  CEIL = 104,
  REVERSE_V2 = 105,
  ADD_N = 106,
  GATHER_ND = 107,
  COS = 108,
  WHERE = 109,
  RANK = 110,
  ELU = 111,
  REVERSE_SEQUENCE = 112,
  MATRIX_DIAG = 113,
  QUANTIZE = 114,
  MATRIX_SET_DIAG = 115,
  ROUND = 116,
  HARD_SWISH = 117,
  IF = 118,
  WHILE = 119,
  NON_MAX_SUPPRESSION_V4 = 120,
  NON_MAX_SUPPRESSION_V5 = 121,
  SCATTER_ND = 122,
  SELECT_V2 = 123,
  DENSIFY = 124,
  SEGMENT_SUM = 125,
  BATCH_MATMUL = 126,
  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
  CUMSUM = 128,
  CALL_ONCE = 129,
  BROADCAST_TO = 130,
  RFFT2D = 131,
  CONV_3D = 132,
  IMAG=133,
  REAL=134,
  COMPLEX_ABS=135,
  HASHTABLE = 136,
  HASHTABLE_FIND = 137,
  HASHTABLE_IMPORT = 138,
  HASHTABLE_SIZE = 139,
  REDUCE_ALL = 140,
  CONV_3D_TRANSPOSE = 141,
  VAR_HANDLE = 142,
  READ_VARIABLE = 143,
  ASSIGN_VARIABLE = 144,
  BROADCAST_ARGS = 145,
  RANDOM_STANDARD_NORMAL = 146,
  BUCKETIZE = 147,
  RANDOM_UNIFORM = 148,
  MULTINOMIAL = 149,
  GELU = 150,
  DYNAMIC_UPDATE_SLICE = 151,
  RELU_0_TO_1 = 152,
  UNSORTED_SEGMENT_PROD = 153,
  UNSORTED_SEGMENT_MAX = 154,
  UNSORTED_SEGMENT_SUM = 155,
  ATAN2 = 156,
  UNSORTED_SEGMENT_MIN = 157,
  SIGN = 158,
  BITCAST = 159,
  BITWISE_XOR = 160,
  RIGHT_SHIFT = 161,
}
// LINT.ThenChange(nnapi_linter/linter.proto)

// Options for the builtin operators.
union BuiltinOptions {
  Conv2DOptions,
  DepthwiseConv2DOptions,
  ConcatEmbeddingsOptions,
  LSHProjectionOptions,
  Pool2DOptions,
  SVDFOptions,
  RNNOptions,
  FullyConnectedOptions,
  SoftmaxOptions,
  ConcatenationOptions,
  AddOptions,
  L2NormOptions,
  LocalResponseNormalizationOptions,
  LSTMOptions,
  ResizeBilinearOptions,
  CallOptions,
  ReshapeOptions,
  SkipGramOptions,
  SpaceToDepthOptions,
  EmbeddingLookupSparseOptions,
  MulOptions,
  PadOptions,
  GatherOptions,
  BatchToSpaceNDOptions,
  SpaceToBatchNDOptions,
  TransposeOptions,
  ReducerOptions,
  SubOptions,
  DivOptions,
  SqueezeOptions,
  SequenceRNNOptions,
  StridedSliceOptions,
  ExpOptions,
  TopKV2Options,
  SplitOptions,
  LogSoftmaxOptions,
  CastOptions,
  DequantizeOptions,
  MaximumMinimumOptions,
  ArgMaxOptions,
  LessOptions,
  NegOptions,
  PadV2Options,
  GreaterOptions,
  GreaterEqualOptions,
  LessEqualOptions,
  SelectOptions,
  SliceOptions,
  TransposeConvOptions,
  SparseToDenseOptions,
  TileOptions,
  ExpandDimsOptions,
  EqualOptions,
  NotEqualOptions,
  ShapeOptions,
  PowOptions,
  ArgMinOptions,
  FakeQuantOptions,
  PackOptions,
  LogicalOrOptions,
  OneHotOptions,
  LogicalAndOptions,
  LogicalNotOptions,
  UnpackOptions,
  FloorDivOptions,
  SquareOptions,
  ZerosLikeOptions,
  FillOptions,
  BidirectionalSequenceLSTMOptions,
  BidirectionalSequenceRNNOptions,
  UnidirectionalSequenceLSTMOptions,
  FloorModOptions,
  RangeOptions,
  ResizeNearestNeighborOptions,
  LeakyReluOptions,
  SquaredDifferenceOptions,
  MirrorPadOptions,
  AbsOptions,
  SplitVOptions,
  UniqueOptions,
  ReverseV2Options,
  AddNOptions,
  GatherNdOptions,
  CosOptions,
  WhereOptions,
  RankOptions,
  ReverseSequenceOptions,
  MatrixDiagOptions,
  QuantizeOptions,
  MatrixSetDiagOptions,
  HardSwishOptions,
  IfOptions,
  WhileOptions,
  DepthToSpaceOptions,
  NonMaxSuppressionV4Options,
  NonMaxSuppressionV5Options,
  ScatterNdOptions,
  SelectV2Options,
  DensifyOptions,
  SegmentSumOptions,
  BatchMatMulOptions,
  CumsumOptions,
  CallOnceOptions,
  BroadcastToOptions,
  Rfft2dOptions,
  Conv3DOptions,
  HashtableOptions,
  HashtableFindOptions,
  HashtableImportOptions,
  HashtableSizeOptions,
  VarHandleOptions,
  ReadVariableOptions,
  AssignVariableOptions,
  RandomOptions,
  BucketizeOptions,
  GeluOptions,
  DynamicUpdateSliceOptions,
  UnsortedSegmentProdOptions,
  UnsortedSegmentMaxOptions,
  UnsortedSegmentMinOptions,
  UnsortedSegmentSumOptions,
  ATan2Options,
  SignOptions,
  BitcastOptions,
  BitwiseXorOptions,
  RightShiftOptions,
}

// LINT.IfChange
enum Padding : byte { SAME, VALID }
// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)

// LINT.IfChange
enum ActivationFunctionType : byte {
  NONE = 0,
  RELU = 1,
  RELU_N1_TO_1 = 2,
  RELU6 = 3,
  TANH = 4,
  SIGN_BIT = 5,
}
// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)

table Conv2DOptions {
  padding:Padding;
  stride_w:int;
  stride_h:int;
  fused_activation_function:ActivationFunctionType;
  dilation_w_factor:int = 1;
  dilation_h_factor:int = 1;
}

// Options for both Conv3D and Conv3DTranspose.
table Conv3DOptions {
  padding:Padding;
  stride_d:int;
  stride_w:int;
  stride_h:int;
  fused_activation_function:ActivationFunctionType;
  dilation_d_factor:int = 1;
  dilation_w_factor:int = 1;
  dilation_h_factor:int = 1;
}

table Pool2DOptions {
  padding:Padding;
  stride_w:int;
  stride_h:int;
  filter_width:int;
  filter_height:int;
  fused_activation_function:ActivationFunctionType;
}

table DepthwiseConv2DOptions {
  // Parameters for DepthwiseConv version 1 or above.
  padding:Padding;
  stride_w:int;
  stride_h:int;
  // `depth_multiplier` is redundant. It's used by CPU kernels in
  // TensorFlow 2.0 or below, but ignored in versions above.
  // See comments in lite/c/builtin_op_data.h for more details.
  depth_multiplier:int;
  fused_activation_function:ActivationFunctionType;
  // Parameters for DepthwiseConv version 2 or above.
  dilation_w_factor:int = 1;
  dilation_h_factor:int = 1;
}

table ConcatEmbeddingsOptions {
  num_channels:int;
  num_columns_per_channel:[int];
  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
}

enum LSHProjectionType: byte {
  UNKNOWN = 0,
  SPARSE = 1,
  DENSE = 2,
}

table LSHProjectionOptions {
  type: LSHProjectionType;
}

table SVDFOptions {
  rank:int;
  fused_activation_function:ActivationFunctionType;
  // For weights-only quantization, use asymmetric quantization for non
  // constant inputs at evaluation time.
  asymmetric_quantize_inputs:bool;
}

// An implementation of TensorFlow RNNCell.
table RNNOptions {
  fused_activation_function:ActivationFunctionType;
  asymmetric_quantize_inputs:bool;
}

// An implementation of TensorFlow dynamic_rnn with RNNCell.
table SequenceRNNOptions {
  time_major:bool;
  fused_activation_function:ActivationFunctionType;
  asymmetric_quantize_inputs:bool;
}

// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
table BidirectionalSequenceRNNOptions {
  time_major:bool;
  fused_activation_function:ActivationFunctionType;
  merge_outputs: bool;
  asymmetric_quantize_inputs:bool;
}

// LINT.IfChange
enum FullyConnectedOptionsWeightsFormat: byte {
  DEFAULT = 0,
  SHUFFLED4x16INT8 = 1,
}
// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)

// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
table FullyConnectedOptions {
  // Parameters for FullyConnected version 1 or above.
  fused_activation_function:ActivationFunctionType;

  // Parameters for FullyConnected version 2 or above.
  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;

  // Parameters for FullyConnected version 5 or above.
  // If set to true, then the number of dimension is preserved. Furthermore,
  // all but the last dimension of the input and output shapes will be equal.
  keep_num_dims: bool;

  // Parameters for FullyConnected version 7 or above.
  // If set to true, then weights-only op will use asymmetric quantization for
  // inputs.
  asymmetric_quantize_inputs: bool;
}

table SoftmaxOptions {
  beta: float;
}

// An implementation of TensorFlow concat.
table ConcatenationOptions {
  axis:int;
  fused_activation_function:ActivationFunctionType;
}

table AddOptions {
  fused_activation_function:ActivationFunctionType;
  // Parameters supported by version 3.
  pot_scale_int16:bool = true;
}

table MulOptions {
  fused_activation_function:ActivationFunctionType;
}

table L2NormOptions {
  // This field is currently ignored in the L2 Norm Op.
  fused_activation_function:ActivationFunctionType;
}

table LocalResponseNormalizationOptions {
  radius:int;
  bias:float;
  alpha:float;
  beta:float;
}

// LINT.IfChange
enum LSTMKernelType : byte {
  // Full LSTM kernel which supports peephole and projection.
  FULL = 0,
  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
  BASIC = 1,
}
// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)

// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
table LSTMOptions {
  // Parameters for LSTM version 1 or above.
  fused_activation_function:ActivationFunctionType;
  cell_clip: float; // Optional, 0.0 means no clipping
  proj_clip: float; // Optional, 0.0 means no clipping

  // Parameters for LSTM version 2 or above.
  // Basic kernel is only supported in version 2 or above.
  kernel_type: LSTMKernelType = FULL;

  // Parameters for LSTM version 4 or above.
  asymmetric_quantize_inputs: bool;
}

// An implementation of TensorFlow dynamic_rnn with LSTMCell.
table UnidirectionalSequenceLSTMOptions {
  fused_activation_function:ActivationFunctionType;
  cell_clip: float; // Optional, 0.0 means no clipping
  proj_clip: float; // Optional, 0.0 means no clipping

  // If true then first dimension is sequence, otherwise batch.
  time_major:bool;

  // Parameter for Unidirectional Sequence LSTM version 3.
  asymmetric_quantize_inputs:bool;

  // Parameter for unidirectional sequence RNN version 4.
  diagonal_recurrent_tensors:bool;
}

table BidirectionalSequenceLSTMOptions {
  // Parameters supported by version 1:
  fused_activation_function:ActivationFunctionType;
  cell_clip: float; // Optional, 0.0 means no clipping
  proj_clip: float; // Optional, 0.0 means no clipping

  // If true, store the outputs of both directions into the first output.
  merge_outputs: bool;

  // Parameters supported by version 2:
  // If true then first dimension is sequence, otherwise batch.
  // Version 1 implementations assumed time_major to be true, so this default
  // value should never change.
  time_major: bool = true;

  // Parameters for version 3 or above.
  asymmetric_quantize_inputs:bool;
}

table ResizeBilinearOptions {
  new_height: int (deprecated);
  new_width: int (deprecated);
  align_corners: bool;
  half_pixel_centers: bool;
}

table ResizeNearestNeighborOptions {
  align_corners: bool;
  half_pixel_centers: bool;
}

// A call operation options
table CallOptions {
  // The subgraph index that needs to be called.
  subgraph:uint;
}

table PadOptions {
}

table PadV2Options {
}

table ReshapeOptions {
  new_shape:[int];
}

table SpaceToBatchNDOptions {
}

table BatchToSpaceNDOptions {
}

table SkipGramOptions {
  ngram_size: int;
  max_skip_size: int;
  include_all_ngrams: bool;
}

table SpaceToDepthOptions {
  block_size: int;
}

table DepthToSpaceOptions {
  block_size: int;
}

table SubOptions {
  fused_activation_function:ActivationFunctionType;
  // Parameters supported by version 5
  pot_scale_int16:bool = true;
}

table DivOptions {
  fused_activation_function:ActivationFunctionType;
}

table TopKV2Options {
}

enum CombinerType : byte {
  SUM = 0,
  MEAN = 1,
  SQRTN = 2,
}

table EmbeddingLookupSparseOptions {
  combiner:CombinerType;
}

table GatherOptions {
  axis: int;
  // Parameters for Gather version 5 or above.
  batch_dims: int = 0;
}

table TransposeOptions {
}

table ExpOptions {
}

table CosOptions {
}

table ReducerOptions {
  keep_dims: bool;
}

table SqueezeOptions {
  squeeze_dims:[int];
}

table SplitOptions {
  num_splits: int;
}

table SplitVOptions {
  num_splits: int;
}

table StridedSliceOptions {
  begin_mask: int;
  end_mask: int;
  ellipsis_mask: int;
  new_axis_mask: int;
  shrink_axis_mask: int;
}

table LogSoftmaxOptions {
}

table CastOptions {
  in_data_type: TensorType;
  out_data_type: TensorType;
}

table DequantizeOptions {
}

table MaximumMinimumOptions {
}

table TileOptions {
}

table ArgMaxOptions {
  output_type : TensorType;
}

table ArgMinOptions {
  output_type : TensorType;
}

table GreaterOptions {
}

table GreaterEqualOptions {
}

table LessOptions {
}

table LessEqualOptions {
}

table NegOptions {
}

table SelectOptions {
}

table SliceOptions {
}

table TransposeConvOptions {
  // Parameters supported by version 1, 2, 3:
  padding:Padding;
  stride_w:int;
  stride_h:int;

  // Parameters supported by version 4:
  fused_activation_function:ActivationFunctionType = NONE;
}

table ExpandDimsOptions {
}

table SparseToDenseOptions {
  validate_indices:bool;
}

table EqualOptions {
}

table NotEqualOptions {
}

table ShapeOptions {
  // Optional output type of the operation (int32 or int64). Defaults to int32.
  out_type : TensorType;
}

table RankOptions {
}

table PowOptions {
}

table FakeQuantOptions {
  // Parameters supported by version 1:
  min:float;
  max:float;
  num_bits:int;

  // Parameters supported by version 2:
  narrow_range:bool;
}

table PackOptions {
  values_count:int;
  axis:int;
}

table LogicalOrOptions {
}

table OneHotOptions {
  axis:int;
}

table AbsOptions {
}


table HardSwishOptions {
}

table LogicalAndOptions {
}

table LogicalNotOptions {
}

table UnpackOptions {
  num:int;
  axis:int;
}

table FloorDivOptions {
}

table SquareOptions {
}

table ZerosLikeOptions {
}

table FillOptions {
}

table FloorModOptions {
}

table RangeOptions {
}

table LeakyReluOptions {
  alpha:float;
}

table SquaredDifferenceOptions {
}

// LINT.IfChange
enum MirrorPadMode : byte {
  // Doesn't include borders.
  REFLECT = 0,
  // Includes borders.
  SYMMETRIC = 1,
}
// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)

table MirrorPadOptions {
  mode:MirrorPadMode;
}

table UniqueOptions {
  idx_out_type:TensorType = INT32;
}

table ReverseV2Options {
}

table AddNOptions {
}

table GatherNdOptions {
}

table WhereOptions {
}

table ReverseSequenceOptions {
  seq_dim:int;
  batch_dim:int = 0;
}

table MatrixDiagOptions {
}

table QuantizeOptions {
}

table MatrixSetDiagOptions {
}

table IfOptions {
  then_subgraph_index:int;
  else_subgraph_index:int;
}

table CallOnceOptions {
  init_subgraph_index:int;
}

table WhileOptions {
  cond_subgraph_index:int;
  body_subgraph_index:int;
}

table NonMaxSuppressionV4Options {
}

table NonMaxSuppressionV5Options {
}

table ScatterNdOptions {
}

table SelectV2Options {
}

table DensifyOptions {
}

table SegmentSumOptions {
}

table BatchMatMulOptions {
  adj_x:bool;
  adj_y:bool;
  // Parameters for BatchMatMul version 4 or above.
  // If set to true, then weights-only op will use asymmetric quantization for
  // inputs.
  asymmetric_quantize_inputs: bool;
}

table CumsumOptions {
  exclusive:bool;
  reverse:bool;
}

table BroadcastToOptions {
}

table Rfft2dOptions {
}

table HashtableOptions {
  // The identity of hash tables. This identity will be used across different
  // subgraphs in the same interpreter instance.
  table_id:int;
  key_dtype:TensorType;
  value_dtype:TensorType;
}

table HashtableFindOptions {
}

table HashtableImportOptions {
}

table HashtableSizeOptions {
}

table VarHandleOptions {
  container:string;
  shared_name:string;
}

table ReadVariableOptions {
}

table AssignVariableOptions {
}

table RandomOptions {
  seed: long;
  seed2: long;
}

table BucketizeOptions {
  boundaries: [float];  // The bucket boundaries.
}

table GeluOptions {
  approximate: bool;
}

table DynamicUpdateSliceOptions {
}

table UnsortedSegmentProdOptions {
}

table UnsortedSegmentMaxOptions {
}

table UnsortedSegmentSumOptions {
}

table ATan2Options {
}

table UnsortedSegmentMinOptions{
}

table SignOptions {
}

table BitcastOptions {
}

table BitwiseXorOptions {
}

table RightShiftOptions {
}

// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
// builtin, or a string if the operator is custom.
table OperatorCode {
  // This field is for backward compatibility. This field will be used when
  // the value of the extended builtin_code field has less than
  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
  deprecated_builtin_code:byte;
  custom_code:string;

  // The version of the operator. The version need to be bumped whenever new
  // parameters are introduced into an op.
  version:int = 1;

  // This field is introduced for resolving op builtin code shortage problem
  // (the original BuiltinOperator enum field was represented as a byte).
  // This field will be used when the value of the extended builtin_code field
  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
  builtin_code:BuiltinOperator;
}

enum CustomOptionsFormat : byte {
  FLEXBUFFERS = 0,
}

// An operator takes tensors as inputs and outputs. The type of operation being
// performed is determined by an index into the list of valid OperatorCodes,
// while the specifics of each operations is configured using builtin_options
// or custom_options.
table Operator {
  // Index into the operator_codes array. Using an integer here avoids
  // complicate map lookups.
  opcode_index:uint;

  // Optional input are indicated by -1.
  inputs:[int];
  outputs:[int];

  builtin_options:BuiltinOptions;
  custom_options:[ubyte];
  custom_options_format:CustomOptionsFormat;

  // A list of booleans indicating the input tensors which are being mutated by
  // this operator.(e.g. used by RNN and LSTM).
  // For example, if the "inputs" array refers to 5 tensors and the second and
  // fifth are mutable variables, then this list will contain
  // [false, true, false, false, true].
  //
  // If the list is empty, no variable is mutated in this operator.
  // The list either has the same length as `inputs`, or is empty.
  mutating_variable_inputs:[bool];

  // A list of indices to the subgraph's "tensors" that are internal to an Op.
  // Internal tensors are those that do not flow in or out of the operation,
  // but instead are part of internal computation. As such, the operation's
  // implementation may manage its memory more efficiently. They are needed
  // however (i.e. not just an implementation detail) since they are part of the
  // computation, which may require relevant metadata such as quantization
  // parameters.
  intermediates:[int];
}

// The root type, defining a subgraph, which typically represents an entire
// model.
table SubGraph {
  // A list of all tensors used in this subgraph.
  tensors:[Tensor];

  // Indices of the tensors that are inputs into this subgraph. Note this is
  // the list of non-static tensors that feed into the subgraph for inference.
  inputs:[int];

  // Indices of the tensors that are outputs out of this subgraph. Note this is
  // the list of output tensors that are considered the product of the
  // subgraph's inference.
  outputs:[int];

  // All operators, in execution order.
  operators:[Operator];

  // Name of this subgraph (used for debugging).
  name:string;
}

// Table of raw data buffers (used for constant tensors). Referenced by tensors
// by index. The generous alignment accommodates mmap-friendly data structures.
table Buffer {
  data:[ubyte] (force_align: 16);
}

table Metadata {
  // A human readable string to uniquely identify a Metadata.
  name:string;
  // An index to the buffers table.
  buffer:uint;
}

// Map from an alias name of tensor to tensor index in the graph.
// This is used in Signature def.
table TensorMap {
  // Represents the alias to use for this tensor.
  name:string;

  // The actual tensor index in the primary graph, that 'name' corresponds to.
  tensor_index:uint;
}

// This corresponds to SignatureDef in Tensorflow SavedModel.
// The SignatureDef will be part of the SavedModel provided for conversion.
table SignatureDef {
  // Named inputs for this signature.
  inputs:[TensorMap];

  // Named outputs for this signature.
  outputs:[TensorMap];

  // Key value which was in the Tensorflow SavedModel SignatureDef map.
  signature_key:string;

  // Model tag, deprecated.
  deprecated_tag:string (deprecated);

  // Index of subgraphs that corresponds to the exported method.
  subgraph_index:uint;
}

table Model {
  // Version of the schema.
  version:uint;

  // A list of all operator codes used in this model. This is
  // kept in order because operators carry an index into this
  // vector.
  operator_codes:[OperatorCode];

  // All the subgraphs of the model. The 0th is assumed to be the main
  // model.
  subgraphs:[SubGraph];

  // A description of the model.
  description:string;

  // Buffers of the model.
  // Note the 0th entry of this array must be an empty buffer (sentinel).
  // This is a convention so that tensors without a buffer can provide 0 as
  // their buffer.
  buffers:[Buffer];

  // Metadata about the model. Indirects into the existings buffers list.
  // Deprecated, prefer to use metadata field.
  metadata_buffer:[int];

  // Metadata about the model.
  metadata:[Metadata];

  // Optional SignatureDefs for the model.
  signature_defs:[SignatureDef];
}

root_type Model;


================================================
FILE: tflite/src/lib.rs
================================================
#![allow(dead_code)]
#[macro_use]
extern crate derive_new;

mod model;
mod ops;
mod registry;
pub mod rewriter;
mod ser;
mod tensors;

#[allow(
    unused_imports,
    clippy::extra_unused_lifetimes,
    clippy::missing_safety_doc,
    clippy::derivable_impls,
    clippy::needless_lifetimes,
    clippy::too_long_first_doc_paragraph,
    unknown_lints,
    mismatched_lifetime_syntaxes
)]
mod tflite_generated;
pub use tflite_generated::tflite;

pub use model::Tflite;

pub mod prelude {
    pub use tract_core::prelude::*;
}

pub mod internal {
    pub use crate::model::TfliteProtoModel;
    pub use tract_core;
    pub use tract_core::internal::*;
}

pub fn tflite() -> Tflite {
    Tflite::default()
}


================================================
FILE: tflite/src/model.rs
================================================
use std::collections::hash_map::Entry;
use std::fmt::Debug;

use flatbuffers::FlatBufferBuilder;
use tract_core::internal::*;

use crate::registry::Registry;
use crate::tensors::{flat_tensor_to_tract_fact, flat_tensor_uses_per_axis_q};
use crate::tflite;
use crate::tflite::{Buffer, BufferArgs};

pub struct Tflite(Registry);

impl Debug for Tflite {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "tract-TfLite-framework")
    }
}

impl Default for Tflite {
    fn default() -> Self {
        let mut registry = Registry::default();
        crate::ops::register_all(&mut registry);
        Tflite(registry)
    }
}

#[derive(Clone, Debug)]
pub struct TfliteProtoModel(Vec<u8>);

impl TfliteProtoModel {
    fn new(buf: Vec<u8>) -> TractResult<TfliteProtoModel> {
        let _ = tflite::root_as_model(&buf)?;
        Ok(TfliteProtoModel(buf))
    }

    pub fn root(&self) -> tflite::Model<'_> {
        unsafe { tflite::root_as_model_unchecked(&self.0) }
    }
}

fn write_model<'fb>(
    registry: &Registry,
    model: &TypedModel,
) -> TractResult<FlatBufferBuilder<'fb>> {
    let mut model = model.clone();
    crate::rewriter::rewrite_for_tflite(&mut model).context("Pre-dump rewrite")?;
    let mut builder = flatbuffers::FlatBufferBuilder::new();
    let mut op_codes = vec![];
    let sentinel = Buffer::create(&mut builder, &BufferArgs { data: None });
    let mut buffers = vec![sentinel];
    crate::ser::ModelBuilder {
        registry,
        builder: &mut builder,
        op_codes: &mut op_codes,
        buffers: &mut buffers,
    }
    .write_model(&model)?;
    Ok(builder)
}

impl Tflite {
    pub fn write(&self, model: &TypedModel, mut w: impl std::io::Write) -> TractResult<()> {
        let builder = write_model(&self.0, model)?;
        w.write_all(builder.finished_data())?;
        Ok(())
    }
}

impl Framework<TfliteProtoModel, TypedModel> for Tflite {
    fn proto_model_for_read(
        &self,
        reader: &mut dyn std::io::Read,
    ) -> tract_core::prelude::TractResult<TfliteProtoModel> {
        let mut buf = vec![];
        reader.read_to_end(&mut buf)?;
        TfliteProtoModel::new(buf)
    }

    fn model_for_proto_model_with_model_template(
        &self,
        proto: &TfliteProtoModel,
        mut target: TypedModel,
    ) -> TractResult<TypedModel> {
        let root = proto.root();
        let main = &root.subgraphs().context("No subgraphs in Tflite model")?.get(0);
        let mut mapping = HashMap::new();
        for input in main.inputs().context("No inputs in Tflite model")? {
            if !flat_tensor_uses_per_axis_q(main, input) {
                let (fact, name) = flat_tensor_to_tract_fact(&root, main, input)?;
                let it = target.add_source(name, fact)?;
                mapping.insert(input, it);
            }
        }
        for op in main.operators().context("No operators in Tflite model")? {
            for input in op.inputs().context("No input in Tflite  operator")? {
                if let Entry::Vacant(slot) = mapping.entry(input) {
                    let (fact, name) = flat_tensor_to_tract_fact(&root, main, input)?;
                    let value = fact.konst.with_context(|| format!("Error in TF file for operator {op:?}. No prior computation nor constant for input {input}"))?;
                    let konst = target.add_const(name, value)?;
                    slot.insert(konst);
                }
            }
            self.0.deser_op(&root, main, &op, &mut target, &mut mapping).with_context(|| {
                format!("Translating proto-op from Tflite into tract op: {op:#?}")
            })?;
        }
        let outputs: TVec<_> = main
            .outputs()
            .context("No outputs in Tflite model")?
            .iter()
            .map(|o| mapping[&o])
            .collect();
        target.select_output_outlets(&outputs)?;
        Ok(target)
    }
}


================================================
FILE: tflite/src/ops/array.rs
================================================
use tract_core::internal::*;
use tract_core::ops::array::{MultiBroadcastTo, Slice, TypedConcat};
use tract_core::ops::cast::wire_cast;
use tract_core::ops::Downsample;
use tract_core::prelude::tract_itertools::Itertools;
use tract_ndarray::ArrayView2;

use crate::registry::{DeserOp, Registry};
use crate::ser::{BuiltinOp, SubgraphBuilder};
use crate::tflite::{
    ActivationFunctionType, BuiltinOperator, BuiltinOptions, ConcatenationOptions,
    ConcatenationOptionsArgs, ExpandDimsOptions, ExpandDimsOptionsArgs, ReshapeOptions,
    ReshapeOptionsArgs, SliceOptions, SliceOptionsArgs, SqueezeOptions, SqueezeOptionsArgs,
    StridedSliceOptions, StridedSliceOptionsArgs, TransposeOptions, TransposeOptionsArgs,
};

use super::wire_fused_activation;

pub fn register_all(reg: &mut Registry) {
    reg.reg_to_tflite(ser_axisop);
    reg.reg_to_tflite(ser_broadcast_to);
    reg.reg_to_tflite(ser_concat);
    reg.reg_to_tflite(ser_downsample);
    reg.reg_to_tflite(ser_slice);

    reg.reg_to_tract(BuiltinOperator::BROADCAST_TO, de_broadcast_to);
    reg.reg_to_tract(BuiltinOperator::CONCATENATION, de_concat);
    reg.reg_to_tract(BuiltinOperator::EXPAND_DIMS, de_expand_dims);
    reg.reg_to_tract(BuiltinOperator::PAD, de_pad);
    reg.reg_to_tract(BuiltinOperator::PADV2, de_padv2);
    reg.reg_to_tract(BuiltinOperator::RESHAPE, de_reshape);
    reg.reg_to_tract(BuiltinOperator::SHAPE, de_shape);
    reg.reg_to_tract(BuiltinOperator::SLICE, de_slice);
    reg.reg_to_tract(BuiltinOperator::SQUEEZE, de_squeeze);
    reg.reg_to_tract(BuiltinOperator::STRIDED_SLICE, de_strided_slice);
    reg.reg_to_tract(BuiltinOperator::TRANSPOSE, de_transpose);
}

fn de_broadcast_to(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (_input, shape) = args_2!(op.facts()?);
    let shape = shape.konst.clone().context("Dynamic BROADCAST_TO is not supported")?;
    let shape = shape
        .cast_to::<i32>()?
        .try_as_plain()?
        .as_slice::<i32>()?
        .iter()
        .map(|d| *d as usize)
        .collect();
    op.ctx.target.wire_node(op.prefix, MultiBroadcastTo { shape }, &op.inputs[0..1])
}

fn de_concat(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_concatenation_options);
    let rank = op.facts()?[0].rank();
    let axis =
        if options.axis() < 0 { rank as i32 + options.axis() } else { options.axis() } as usize;
    let dt = DatumType::super_type_for(op.facts()?.iter().map(|f| f.datum_type)).unwrap();
    let inputs = wire_cast(op.prefix, op.ctx.target, op.inputs, dt)?;
    let wires = op.ctx.target.wire_node(op.prefix, TypedConcat::new(axis), &inputs)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn de_expand_dims(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, axes) = args_2!(op.facts()?);
    let axes = axes.konst.clone().context("Dynamic EXPAND_DIMS is not supported")?;
    let mut wire = tvec!(op.inputs[0]);
    let prefix = op.prefix;
    for (ix, &axis) in axes.try_as_plain()?.as_slice::<i32>()?.iter().sorted().rev().enumerate() {
        let axis = if axis < 0 { axis + input.rank() as i32 } else { axis };
        wire =
            op.ctx.target.wire_node(format!("{prefix}.{ix}"), AxisOp::Add(axis as usize), &wire)?;
    }
    Ok(wire)
}

fn de_pad(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, pads) = args_2!(op.facts()?);
    let pads = pads.konst.as_ref().context("Dynamic PAD is not supported")?;
    let prefix = op.prefix;
    let pads: ArrayView2<i32> = pads.to_plain_array_view::<i32>()?.into_dimensionality()?;
    let pads: Vec<(usize, usize)> =
        pads.rows().into_iter().map(|row| (row[0] as usize, row[1] as usize)).collect();
    let mode =
        tract_core::ops::array::PadMode::Constant(Tensor::zero_scalar_dt(input.datum_type)?.into());
    op.ctx.target.wire_node(prefix, tract_core::ops::array::Pad { pads, mode }, &op.inputs[0..1])
}

fn de_padv2(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (_input, pads, value) = args_3!(op.facts()?);
    let pads = pads.konst.as_ref().context("Dynamic PADV2 is not supported")?;
    let prefix = op.prefix;
    let pads: ArrayView2<i32> = pads.to_plain_array_view::<i32>()?.into_dimensionality()?;
    let pads: Vec<(usize, usize)> =
        pads.rows().into_iter().map(|row| (row[0] as usize, row[1] as usize)).collect();
    let mode = tract_core::ops::array::PadMode::Constant(value.konst.context("Constant expected")?);
    op.ctx.target.wire_node(prefix, tract_core::ops::array::Pad { pads, mode }, &op.inputs[0..1])
}

fn de_reshape(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let input_shape: TVec<TDim> = op.ctx.target.outlet_fact(op.inputs[0])?.shape.to_tvec();
    let shape = if let Some(outlet) = op.inputs.get(1) {
        op.ctx.target.outlet_fact(*outlet)?.konst.clone().unwrap()
    } else {
        let options = builtin!(op, builtin_options_as_reshape_options);
        rctensor1(&options.new_shape().as_ref().unwrap().iter().collect::<Vec<i32>>())
    };
    let shape = shape.cast_to::<TDim>()?;
    let shape = shape.try_as_plain()?.as_slice::<TDim>()?;
    let mut wire = tvec!(op.inputs[0]);
    let prefix = op.prefix;
    for (ix, axis_op) in to_axis_ops_with_tf_rules(&input_shape, shape)?.into_iter().enumerate() {
        wire = op.ctx.target.wire_node(format!("{prefix}.{ix}"), axis_op, &wire)?;
    }
    Ok(wire)
}

fn de_shape(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let input = args_1!(op.facts()?);
    let wire = op.ctx.target.add_const(op.prefix, tensor1(&input.shape))?;
    Ok(tvec!(wire))
}

fn de_slice(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, begins, sizes) = args_3!(op.facts()?);
    let mut wire = tvec!(op.inputs[0]);
    if let (Some(begins), Some(sizes)) = (begins.konst, sizes.konst) {
        for ix in 0..input.rank() {
            let start = begins.try_as_plain()?.as_slice::<i32>()?[ix] as usize;
            let size = sizes.try_as_plain()?.as_slice::<i32>()?[ix] as usize;
            if start > 0 || size.to_dim() != input.shape[ix] {
                wire = op.ctx.target.wire_node(
                    format!("{}.{ix}", op.prefix),
                    Slice { axis: ix, start: start.to_dim(), end: (start + size).to_dim() },
                    &wire,
                )?
            }
        }
    }
    Ok(wire)
}

fn de_squeeze(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_squeeze_options);
    let mut wire = tvec!(op.inputs[0]);
    let prefix = op.prefix;
    let rank = op.facts()?[0].rank();
    for (ix, axis) in options.squeeze_dims().unwrap().iter().sorted().enumerate() {
        let axis = if axis < 0 { rank as i32 + axis } else { axis } as usize;
        wire = op.ctx.target.wire_node(format!("{prefix}.{ix}"), AxisOp::Rm(axis), &wire)?;
    }
    Ok(wire)
}

fn de_strided_slice(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_strided_slice_options);
    ensure!(options.new_axis_mask() == 0 && options.shrink_axis_mask() == 0);
    let slice = tract_core::ops::array::StridedSlice {
        begin_mask: options.begin_mask() as _,
        end_mask: options.end_mask() as _,
        shrink_axis_mask: options.shrink_axis_mask() as _,
        optional_axes_input: None,
        optional_steps_input: Some(3),
    };
    op.ctx.target.wire_node(op.prefix, slice, op.inputs)
}

fn de_transpose(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let perm = op
        .ctx
        .target
        .outlet_fact(op.inputs[1])?
        .konst
        .as_ref()
        .context("Dynamic TRANSPOSE in not supported by tract")?;
    let perm = perm.try_as_plain()?.as_slice::<i32>()?.iter().map(|x| *x as usize).collect_vec();
    let mut wire = tvec!(op.inputs[0]);
    let prefix = op.prefix;
    for (ix, axis_op) in perm_to_ops(&perm).into_iter().enumerate() {
        wire = op.ctx.target.wire_node(format!("{prefix}.{ix}"), axis_op, &wire)?;
    }
    Ok(wire)
}

fn ser_axisop(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &AxisOp,
) -> TractResult<()> {
    let mut inputs = tvec!(builder.outlets_to_tensors[&node.inputs[0]]);
    let output = builder.outlets_to_tensors[&node.id.into()];
    match op {
        AxisOp::Move(from, to) => {
            let rank = model.node_input_facts(node.id)?[0].rank();
            let mut permutation: Vec<i32> = (0..rank).map(|d| d as i32).collect();
            permutation.remove(*from);
            permutation.insert(*to, *from as _);
            inputs.push(builder.write_fact(
                format!("{}.perm", node.name),
                TypedFact::try_from(tensor1(&permutation))?,
            )?);
            let options = TransposeOptions::create(builder.fb(), &TransposeOptionsArgs {});
            builder.write_op_with_options(
                &inputs,
                &[output],
                BuiltinOp::new(39, 1, BuiltinOperator::TRANSPOSE, BuiltinOptions::TransposeOptions),
                options.as_union_value(),
            )
        }
        AxisOp::Add(a) => {
            inputs.push(builder.write_fact(
                format!("{}.axis", node.name),
                TypedFact::try_from(tensor0(*a as i32))?,
            )?);
            let options = ExpandDimsOptions::create(builder.fb(), &ExpandDimsOptionsArgs {});
            builder.write_op_with_options(
                &inputs,
                &[output],
                BuiltinOp::new(
                    70,
                    1,
                    BuiltinOperator::EXPAND_DIMS,
                    BuiltinOptions::ExpandDimsOptions,
                ),
                options.as_union_value(),
            )
        }
        AxisOp::Rm(a) => {
            let axes = builder.fb().create_vector(&[*a as i32]);
            let options = SqueezeOptions::create(
                builder.fb(),
                &SqueezeOptionsArgs { squeeze_dims: Some(axes) },
            );
            builder.write_op_with_options(
                &inputs,
                &[output],
                BuiltinOp::new(43, 1, BuiltinOperator::SQUEEZE, BuiltinOptions::SqueezeOptions),
                options.as_union_value(),
            )
        }
        AxisOp::Reshape(_, _, _) => {
            let new_shape = node.outputs[0]
                .fact
                .shape
                .iter()
                .map(|x| x.to_i32())
                .collect::<TractResult<Vec<i32>>>()?;
            let new_shape = builder.fb().create_vector(&new_shape);
            let options = ReshapeOptions::create(
                builder.fb(),
                &ReshapeOptionsArgs { new_shape: Some(new_shape) },
            );
            builder.write_op_with_options(
                &inputs,
                &[output],
                BuiltinOp::new(22, 1, BuiltinOperator::RESHAPE, BuiltinOptions::ReshapeOptions),
                options.as_union_value(),
            )
        }
    }
}

fn ser_broadcast_to(
    builder: &mut SubgraphBuilder,
    _model: &TypedModel,
    node: &TypedNode,
    _op: &MultiBroadcastTo,
) -> TractResult<()> {
    let mut inputs = tvec!(builder.outlets_to_tensors[&node.inputs[0]]);
    let output = builder.outlets_to_tensors[&node.id.into()];
    let shape =
        node.outputs[0].fact.shape.iter().map(|x| x.to_i32()).collect::<TractResult<Vec<i32>>>()?;
    let shape = builder
        .write_fact(format!("{}.shape", node.name), TypedFact::try_from(tensor1(&shape))?)?;
    inputs.push(shape);
    builder.write_op(&inputs, &[output], 130, 3, BuiltinOperator::BROADCAST_TO)
}

fn ser_concat(
    builder: &mut SubgraphBuilder,
    _model: &TypedModel,
    node: &TypedNode,
    op: &TypedConcat,
) -> TractResult<()> {
    let options = ConcatenationOptions::create(
        builder.fb(),
        &ConcatenationOptionsArgs {
            axis: op.axis as i32,
            fused_activation_function: ActivationFunctionType::NONE,
        },
    );
    let inputs = node.inputs.iter().map(|outlet| builder.outlets_to_tensors[outlet]).collect_vec();
    let output = builder.outlets_to_tensors[&node.id.into()];
    builder.write_op_with_options(
        &inputs,
        &[output],
        BuiltinOp::new(2, 1, BuiltinOperator::CONCATENATION, BuiltinOptions::ConcatenationOptions),
        options.as_union_value(),
    )
}

fn ser_downsample(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &Downsample,
) -> TractResult<()> {
    let input_fact = model.outlet_fact(node.inputs[0])?;
    let mut begins = tvec!(0i32; input_fact.rank());
    let mut ends = input_fact
        .shape
        .as_concrete()
        .context("Can not serialize symbolic dims to tflite")?
        .iter()
        .map(|d| *d as i32)
        .collect::<TVec<_>>();
    let mut strides = tvec!(1; input_fact.rank());
    strides[op.axis] = op.stride as i32;
    if op.modulo > 0 {
        begins[op.axis] = op.modulo as i32;
    } else if op.stride < 0 {
        begins[op.axis] = -1;
        ends[op.axis] = 0;
    }
    let mut inputs = tvec!(builder.outlets_to_tensors[&node.inputs[0]]);
    inputs.push(
        builder
            .write_fact(format!("{}.begins", node.name), TypedFact::try_from(tensor1(&begins))?)?,
    );
    inputs.push(
        builder.write_fact(format!("{}.ends", node.name), TypedFact::try_from(tensor1(&ends))?)?,
    );
    inputs.push(
        builder.write_fact(
            format!("{}.strides", node.name),
            TypedFact::try_from(tensor1(&strides))?,
        )?,
    );
    let output = builder.outlets_to_tensors[&OutletId::new(node.id, 0)];
    let options = StridedSliceOptions::create(
        builder.fb(),
        &StridedSliceOptionsArgs {
            begin_mask: 0,
            end_mask: 1 << op.axis,
            ellipsis_mask: 0,
            new_axis_mask: 0,
            shrink_axis_mask: 0,
        },
    );
    builder.write_op_with_options(
        &inputs,
        &[output],
        BuiltinOp::new(45, 1, BuiltinOperator::STRIDED_SLICE, BuiltinOptions::StridedSliceOptions),
        options.as_union_value(),
    )
}

fn ser_slice(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &Slice,
) -> TractResult<()> {
    let input_fact = model.outlet_fact(node.inputs[0])?;
    let mut begins = tvec!(0i32; input_fact.rank());
    let mut sizes = input_fact
        .shape
        .as_concrete()
        .context("Can not serialize symbolic dims to tflite")?
        .iter()
        .map(|d| *d as i32)
        .collect::<TVec<_>>();
    let begin = op.start.as_i64().context("Can not serialize symbolic dims to tflite")? as i32;
    let end = op.end.as_i64().context("Can not serialize symbolic dims to tflite")? as i32;
    begins[op.axis] = begin;
    sizes[op.axis] = end - begin;
    let begins = tensor1(&begins);
    let sizes = tensor1(&sizes);
    let mut inputs = tvec!(builder.outlets_to_tensors[&node.inputs[0]]);
    inputs.push(builder.write_fact(format!("{}.begins", node.name), TypedFact::try_from(begins)?)?);
    inputs.push(builder.write_fact(format!("{}.sizes", node.name), TypedFact::try_from(sizes)?)?);
    let output = builder.outlets_to_tensors[&OutletId::new(node.id, 0)];
    let options = SliceOptions::create(builder.fb(), &SliceOptionsArgs {});
    builder.write_op_with_options(
        &inputs,
        &[output],
        BuiltinOp::new(65, 1, BuiltinOperator::SLICE, BuiltinOptions::SliceOptions),
        options.as_union_value(),
    )
}


================================================
FILE: tflite/src/ops/cnn.rs
================================================
use super::wire_fused_activation;
use crate::registry::{DeserOp, Registry};
use crate::ser::{BuiltinOp, SubgraphBuilder};
use crate::tflite::{
    ActivationFunctionType, BuiltinOperator, BuiltinOptions, Conv2DOptions, Conv2DOptionsArgs,
    DepthwiseConv2DOptions, DepthwiseConv2DOptionsArgs, PadOptions, PadOptionsArgs, Padding,
    Pool2DOptions, Pool2DOptionsArgs,
};
use flatbuffers::{FlatBufferBuilder, WIPOffset};
use tract_core::internal::*;
use tract_core::ops as core;
use tract_core::ops::array::{Pad, PadMode};
use tract_core::ops::cast::cast;
use tract_core::ops::cnn::{Conv, MaxPool, PaddingSpec, PoolSpec};
use tract_core::ops::cnn::{KernelFormat, SumPool};
use tract_core::ops::nn::DataFormat;
use tract_core::prelude::tract_itertools::Itertools;

pub fn register_all(reg: &mut Registry) {
    reg.reg_to_tflite(ser_max_pool);
    reg.reg_to_tflite(ser_sum_pool);
    reg.reg_to_tract(BuiltinOperator::AVERAGE_POOL_2D, de_average_pool_2d);
    reg.reg_to_tract(BuiltinOperator::MAX_POOL_2D, de_max_pool_2d);
    reg.reg_to_tract(BuiltinOperator::CONV_2D, de_conv2d);
    reg.reg_to_tflite(ser_conv);
    reg.reg_to_tract(BuiltinOperator::DEPTHWISE_CONV_2D, de_dw_conv2d);
    reg.reg_to_tflite(ser_pad);
}

fn pool_2d_options<'fb>(
    fb: &mut FlatBufferBuilder<'fb>,
    pool_spec: &PoolSpec,
) -> TractResult<WIPOffset<Pool2DOptions<'fb>>> {
    ensure!(pool_spec.data_format == DataFormat::NHWC);
    ensure!(pool_spec.rank() == 2);
    ensure!(
        pool_spec.padding == PaddingSpec::Valid || pool_spec.padding == PaddingSpec::SameUpper,
        "unsupported padding {:?}",
        pool_spec.padding
    );
    let padding =
        if pool_spec.padding == PaddingSpec::Valid { Padding::VALID } else { Padding::SAME };
    let options = Pool2DOptions::create(
        fb,
        &Pool2DOptionsArgs {
            padding,
            stride_h: pool_spec.stride(0) as _,
            stride_w: pool_spec.stride(1) as _,
            filter_height: pool_spec.kernel_shape[0] as _,
            filter_width: pool_spec.kernel_shape[1] as _,
            fused_activation_function: ActivationFunctionType::NONE,
        },
    );
    Ok(options)
}

fn ser_max_pool(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &MaxPool,
) -> TractResult<()> {
    let inputs = tvec!(builder.map_outlet(model, node.inputs[0])?);
    let output = builder.outlets_to_tensors[&node.id.into()];
    let options = pool_2d_options(builder.fb(), &op.pool_spec)?;
    let op = BuiltinOp::new(17, 1, BuiltinOperator::MAX_POOL_2D, BuiltinOptions::Pool2DOptions);
    builder.write_op_with_options(&inputs, &[output], op, options.as_union_value())
}

fn ser_sum_pool(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &SumPool,
) -> TractResult<()> {
    ensure!(op.normalize);
    let inputs = tvec!(builder.map_outlet(model, node.inputs[0])?);
    let output = builder.outlets_to_tensors[&node.id.into()];
    let options = pool_2d_options(builder.fb(), &op.pool_spec)?;
    let op = BuiltinOp::new(1, 1, BuiltinOperator::AVERAGE_POOL_2D, BuiltinOptions::Pool2DOptions);
    builder.write_op_with_options(&inputs, &[output], op, options.as_union_value())
}

fn de_pool_2d_options(options: &Pool2DOptions, shape: &ShapeFact) -> TractResult<PoolSpec> {
    let strides = tvec!(options.stride_h() as usize, options.stride_w() as usize);
    let kernel_shape = tvec!(options.filter_height() as usize, options.filter_width() as usize);
    let padding = match options.padding() {
        Padding::SAME => PaddingSpec::SameUpper,
        Padding::VALID => PaddingSpec::Valid,
        _ => todo!(),
    };
    let ci =
        DataFormat::NHWC.shape(&shape)?.c().to_usize().context("Except defined integer depth")?;
    Ok(core::cnn::PoolSpec {
        data_format: DataFormat::NHWC,
        kernel_shape,
        padding,
        strides: Some(strides),
        dilations: None,
        input_channels: ci,
        output_channels: ci,
    })
}

fn de_average_pool_2d(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_pool_2_doptions);
    let pool_spec = de_pool_2d_options(&options, &op.output_facts[0].shape)?;
    let pool = core::cnn::SumPool { pool_spec, normalize: true, count_include_pad: false };
    let wires = op.ctx.target.wire_node(op.prefix, pool, &op.inputs[0..1])?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn de_max_pool_2d(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_pool_2_doptions);
    let pool_spec = de_pool_2d_options(&options, &op.output_facts[0].shape)?;
    let pool = core::cnn::MaxPool { pool_spec, with_index_outputs: None };
    let wires = op.ctx.target.wire_node(op.prefix, pool, &op.inputs[0..1])?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn ser_conv(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    conv: &Conv,
) -> TractResult<()> {
    ensure!(conv.pool_spec.data_format == DataFormat::NHWC);
    ensure!(model.node_input_facts(node.id)?[0].rank() == 4);
    ensure!(conv.kernel_fmt == KernelFormat::OHWI);
    ensure!(conv.group == 1 || conv.group.to_dim() == model.node_input_facts(node.id)?[0].shape[3]);
    ensure!(
        conv.pool_spec.padding == PaddingSpec::Valid
            || conv.pool_spec.padding == PaddingSpec::SameUpper
    );
    let node_name = &node.name;
    let mut inputs = tvec!(builder.map_outlet(model, node.inputs[0])?);
    if conv.q_params.is_some() {
        let facts = model.node_input_facts(node.id)?;
        let iscale = facts[0].datum_type.zp_scale().1;
        // 0 1 2 3  4  5  6  7  8
        // x w b x0 xs k0 ks y0 ys
        let k0_tract = facts[5].konst.as_ref().unwrap().cast_to_scalar::<i32>()? as i64;
        let kscale = facts[6].konst.as_ref().unwrap().try_as_plain()?.as_slice::<f32>()?;
        let per_channel = !kscale.iter().all_equal();
        if per_channel {
            let kernel = model
                .outlet_fact(node.inputs[1])?
                .konst
                .as_ref()
                .context("tract TODO: dynamic convolution and per-channel scales")?;
            let bias = model
                .outlet_fact(node.inputs[2])?
                .konst
                .as_ref()
                .context("tract TODO: dynamic convolution and per-channel scales")?;
            inputs.push(builder.write_fact_with_per_axis_q(
                format!("{node_name}.weights"),
                TypedFact::try_from(kernel.clone())?,
                &vec![k0_tract; conv.output_channels()],
                kscale,
                0,
            )?);
            let bscale = kscale.iter().map(|k| k * iscale).collect_vec();
            let bias = bias.clone().into_tensor().cast_to::<i32>()?.into_owned().into_arc_tensor();
            inputs.push(builder.write_fact_with_per_axis_q(
                format!("{node_name}.bias"),
                TypedFact::try_from(bias.clone())?,
                &vec![0i64; bias.len()],
                &bscale,
                0,
            )?);
        } else {
            inputs.push(builder.map_outlet(model, node.inputs[1])?);
            let bias = facts[2].konst.as_ref().context("FIXME: Dumper require constant bias")?;
            let bias_qdt = bias
                .datum_type()
                .quantize(QParams::ZpScale { zero_point: 0, scale: iscale * kscale[0] });
            let bias = bias.cast_to_dt(bias_qdt)?.into_owned();
            inputs
                .push(builder.write_fact(format!("{node_name}.bias"), TypedFact::try_from(bias)?)?);
        }
    } else {
        inputs.push(builder.map_outlet(model, node.inputs[1])?);
        ensure!(model.outlet_fact(node.inputs[2])?.rank() == 1);
        inputs.push(builder.map_outlet(model, node.inputs[2])?);
    }
    let output = builder.outlets_to_tensors[&node.id.into()];

    let padding =
        if conv.pool_spec.padding == PaddingSpec::Valid { Padding::VALID } else { Padding::SAME };
    if conv.group == 1 {
        let options = Conv2DOptions::create(
            builder.fb(),
            &Conv2DOptionsArgs {
                padding,
                stride_h: conv.pool_spec.stride(0) as _,
                stride_w: conv.pool_spec.stride(1) as _,
                dilation_h_factor: conv.pool_spec.dilation(0) as _,
                dilation_w_factor: conv.pool_spec.dilation(1) as _,
                fused_activation_function: ActivationFunctionType::NONE,
            },
        );
        builder.write_op_with_options(
            &inputs,
            &[output],
            BuiltinOp::new(3, 2, BuiltinOperator::CONV_2D, BuiltinOptions::Conv2DOptions),
            options.as_union_value(),
        )
    } else {
        let depth_multiplier = (conv.pool_spec.output_channels / conv.group) as i32;
        let options = DepthwiseConv2DOptions::create(
            builder.fb(),
            &DepthwiseConv2DOptionsArgs {
                padding,
                depth_multiplier,
                stride_h: conv.pool_spec.stride(0) as _,
                stride_w: conv.pool_spec.stride(1) as _,
                dilation_h_factor: conv.pool_spec.dilation(0) as _,
                dilation_w_factor: conv.pool_spec.dilation(1) as _,
                fused_activation_function: ActivationFunctionType::NONE,
            },
        );
        builder.write_op_with_options(
            &inputs,
            &[output],
            BuiltinOp::new(
                4,
                2,
                BuiltinOperator::DEPTHWISE_CONV_2D,
                BuiltinOptions::DepthwiseConv2DOptions,
            ),
            options.as_union_value(),
        )
    }
}

fn de_conv2d(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, kernel, bias) = args_3!(op.facts()?);
    let kernel_full_shape = kernel.shape.as_concrete().context("Expect concrete kernel shape")?;
    let kernel_spatial_shape = KernelFormat::OHWI.spatial_shape(kernel_full_shape);
    let options = builtin!(op, builtin_options_as_conv_2_doptions);
    let padding = match options.padding() {
        Padding::SAME => PaddingSpec::SameUpper,
        Padding::VALID => PaddingSpec::Valid,
        _ => todo!(),
    };
    let strides = tvec!(options.stride_h() as usize, options.stride_w() as usize);
    let dilations =
        tvec!(options.dilation_h_factor() as usize, options.dilation_w_factor() as usize);
    let input_channels = *KernelFormat::OHWI.i(kernel_full_shape);
    let output_channels = *KernelFormat::OHWI.o(kernel_full_shape);
    let pool_spec = core::cnn::PoolSpec {
        data_format: tract_core::ops::nn::DataFormat::NHWC,
        kernel_shape: kernel_spatial_shape.into(),
        padding,
        strides: Some(strides),
        dilations: Some(dilations),
        input_channels,
        output_channels,
    };
    let mut inputs = tvec!(op.inputs[0], op.inputs[1], op.inputs[2]);
    let q_params = super::linearops_quantization_suport(op, &input, &mut inputs)?;
    let bias_dt = bias.datum_type.unquantized();
    inputs[2] =
        op.ctx.target.wire_node(format!("{}.cast_bias", op.prefix), cast(bias_dt), &[inputs[2]])?
            [0];
    let conv = core::cnn::Conv { pool_spec, kernel_fmt: KernelFormat::OHWI, group: 1, q_params };
    let wires = op.ctx.target.wire_node(op.prefix, conv, &inputs)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn de_dw_conv2d(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, kernel, bias) = args_3!(op.facts()?);
    let kernel_full_shape: TVec<usize> = kernel.shape.as_concrete().unwrap().into();
    let kernel_shape: TVec<usize> = KernelFormat::OHWI.spatial_shape(&kernel_full_shape).into();
    let options = builtin!(op, builtin_options_as_depthwise_conv_2_doptions);
    let padding = match options.padding() {
        Padding::SAME => PaddingSpec::SameUpper,
        Padding::VALID => PaddingSpec::Valid,
        _ => todo!(),
    };
    let strides = tvec!(options.stride_h() as usize, options.stride_w() as usize);
    let dilations =
        tvec!(options.dilation_h_factor() as usize, options.dilation_w_factor() as usize);
    let output_channels = *KernelFormat::OHWI.i(&kernel_full_shape);
    let pool_spec = core::cnn::PoolSpec {
        data_format: tract_core::ops::nn::DataFormat::NHWC,
        kernel_shape,
        padding,
        strides: Some(strides),
        dilations: Some(dilations),
        input_channels: output_channels,
        output_channels,
    };
    let mut inputs = tvec!(op.inputs[0], op.inputs[1], op.inputs[2]);
    if bias.datum_type.is_quantized() {
        inputs[2] = op.ctx.target.wire_node(
            op.ctx.target.unique_name(format!("{}.bias", &op.prefix)),
            cast(bias.datum_type.unquantized()),
            &[inputs[2]],
        )?[0];
    }
    let q_params = super::linearops_quantization_suport(op, &input, &mut inputs)?;
    let conv = core::cnn::Conv {
        pool_spec,
        kernel_fmt: KernelFormat::OHWI,
        group: output_channels,
        q_params,
    };
    let wires = op.ctx.target.wire_node(op.prefix, conv, &inputs)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn ser_pad(
    builder: &mut SubgraphBuilder,
    _model: &TypedModel,
    node: &TypedNode,
    pad: &Pad,
) -> TractResult<()> {
    let node_name = &node.name;
    let mut inputs = tvec!(builder.outlets_to_tensors[&node.inputs[0]]);
    let outputs = (0..node.outputs.len())
        .map(|o| builder.outlets_to_tensors[&OutletId::new(node.id, o)])
        .collect_vec();
    let paddings = tract_ndarray::Array2::<i32>::from_shape_fn((pad.pads.len(), 2), |(d, side)| {
        (if side == 0 { pad.pads[d].0 } else { pad.pads[d].1 }) as i32
    });
    inputs.push(builder.write_fact(
        format!("{node_name}.paddings"),
        TypedFact::try_from(paddings.into_tensor())?,
    )?);
    let PadMode::Constant(pad_value) = &pad.mode else {
        bail!("Only constant padding is supported by tflite");
    };
    inputs.push(
        builder.write_fact(
            format!("{node_name}.pad_value"),
            TypedFact::try_from(pad_value.clone())?,
        )?,
    );
    let options = PadOptions::create(builder.fb(), &PadOptionsArgs {});
    builder.write_op_with_options(
        &inputs,
        &outputs,
        BuiltinOp::new(60, 1, BuiltinOperator::PADV2, BuiltinOptions::PadV2Options),
        options.as_union_value(),
    )?;
    Ok(())
}


================================================
FILE: tflite/src/ops/element_wise.rs
================================================
use crate::registry::{DeserOp, Registry};
use crate::ser::{BuiltinOp, SubgraphBuilder};
use crate::tflite::{
    AbsOptions, AbsOptionsArgs, BuiltinOperator, BuiltinOptions, CosOptions, CosOptionsArgs,
    ExpOptions, ExpOptionsArgs, HardSwishOptions, HardSwishOptionsArgs, LeakyReluOptions,
    LeakyReluOptionsArgs, LogicalNotOptions, LogicalNotOptionsArgs, SquareOptions,
    SquareOptionsArgs,
};
use tract_core::internal::*;
use tract_core::ops::element_wise::ElementWiseOp;
use tract_core::ops::logic::{not, Not};
use tract_core::ops::math::*;
use tract_core::ops::nn::{hard_swish, leaky_relu, sigmoid, HardSwish, LeakyRelu, Sigmoid};

pub fn register_all(reg: &mut Registry) {
    reg.reg_to_tflite(ser);

    reg.reg_to_tract(BuiltinOperator::ABS, |op| deser(op, abs()));
    reg.reg_to_tract(BuiltinOperator::CEIL, |op| deser(op, ceil()));
    reg.reg_to_tract(BuiltinOperator::COS, |op| deser(op, cos()));
    reg.reg_to_tract(BuiltinOperator::EXP, |op| deser(op, exp()));
    reg.reg_to_tract(BuiltinOperator::FLOOR, |op| deser(op, floor()));
    reg.reg_to_tract(BuiltinOperator::HARD_SWISH, |op| deser(op, hard_swish()));
    reg.reg_to_tract(BuiltinOperator::LEAKY_RELU, de_leaky_relu);
    reg.reg_to_tract(BuiltinOperator::LOG, |op| deser(op, ln()));
    reg.reg_to_tract(BuiltinOperator::LOGICAL_NOT, |op| deser(op, not()));
    reg.reg_to_tract(BuiltinOperator::SIN, |op| deser(op, sin()));
    reg.reg_to_tract(BuiltinOperator::LOGISTIC, |op| deser(op, sigmoid()));
    reg.reg_to_tract(BuiltinOperator::SQRT, |op| deser(op, sqrt()));
    reg.reg_to_tract(BuiltinOperator::SQUARE, |op| deser(op, square()));
    reg.reg_to_tract(BuiltinOperator::RSQRT, |op| deser(op, rsqrt()));
    reg.reg_to_tract(BuiltinOperator::TANH, |op| deser(op, tanh()));
}

fn deser(op: &mut DeserOp, ew: ElementWiseOp) -> TractResult<TVec<OutletId>> {
    op.ctx.target.wire_node(op.prefix, ew, op.inputs)
}

fn de_leaky_relu(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_leaky_relu_options);
    op.ctx.target.wire_node(op.prefix, leaky_relu(options.alpha()), op.inputs)
}

fn ser(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &ElementWiseOp,
) -> TractResult<()> {
    let input = builder.map_outlet(model, node.inputs[0])?;
    let output = builder.map_outlet(model, node.id.into())?;
    if (*op.0).is::<Abs>() {
        let options = AbsOptions::create(builder.fb(), &AbsOptionsArgs {});
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(101, 1, BuiltinOperator::ABS, BuiltinOptions::AbsOptions),
            options.as_union_value(),
        )
    } else if (*op.0).is::<Cos>() {
        let options = CosOptions::create(builder.fb(), &CosOptionsArgs {});
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(108, 1, BuiltinOperator::COS, BuiltinOptions::CosOptions),
            options.as_union_value(),
        )
    } else if (*op.0).is::<Exp>() {
        let options = ExpOptions::create(builder.fb(), &ExpOptionsArgs {});
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(47, 1, BuiltinOperator::EXP, BuiltinOptions::ExpOptions),
            options.as_union_value(),
        )
    } else if (*op.0).is::<HardSwish>() {
        let options = HardSwishOptions::create(builder.fb(), &HardSwishOptionsArgs {});
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(117, 1, BuiltinOperator::HARD_SWISH, BuiltinOptions::HardSwishOptions),
            options.as_union_value(),
        )
    } else if let Some(leaky) = (*op.0).downcast_ref::<LeakyRelu>() {
        let options =
            LeakyReluOptions::create(builder.fb(), &LeakyReluOptionsArgs { alpha: leaky.alpha });
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(98, 1, BuiltinOperator::LEAKY_RELU, BuiltinOptions::LeakyReluOptions),
            options.as_union_value(),
        )
    } else if (*op.0).is::<Not>() {
        let options = LogicalNotOptions::create(builder.fb(), &LogicalNotOptionsArgs {});
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(87, 1, BuiltinOperator::LOGICAL_NOT, BuiltinOptions::LogicalNotOptions),
            options.as_union_value(),
        )
    } else if (*op.0).is::<Square>() {
        let options = SquareOptions::create(builder.fb(), &SquareOptionsArgs {});
        builder.write_op_with_options(
            &[input],
            &[output],
            BuiltinOp::new(92, 1, BuiltinOperator::SQUARE, BuiltinOptions::SquareOptions),
            options.as_union_value(),
        )
    } else if (*op.0).is::<Ceil>() {
        builder.write_op(&[input], &[output], 104, 1, BuiltinOperator::CEIL)
    } else if (*op.0).is::<Floor>() {
        builder.write_op(&[input], &[output], 8, 1, BuiltinOperator::FLOOR)
    } else if (*op.0).is::<Sin>() {
        builder.write_op(&[input], &[output], 66, 1, BuiltinOperator::SIN)
    } else if (*op.0).is::<Sqrt>() {
        builder.write_op(&[input], &[output], 75, 1, BuiltinOperator::SQRT)
    } else if (*op.0).is::<Rsqrt>() {
        builder.write_op(&[input], &[output], 76, 1, BuiltinOperator::SQRT)
    } else if (*op.0).is::<Sigmoid>() {
        builder.write_op(&[input], &[output], 14, 1, BuiltinOperator::LOGISTIC)
    } else if (*op.0).is::<Tanh>() {
        builder.write_op(&[input], &[output], 28, 1, BuiltinOperator::TANH)
    } else if (*op.0).is::<Ln>() {
        builder.write_op(&[input], &[output], 73, 1, BuiltinOperator::LOG)
    } else {
        todo!("Serialization of ElementWise op {:?}", op)
    }
}


================================================
FILE: tflite/src/ops/math.rs
================================================
use crate::ops::wire_fused_activation;
use crate::registry::{DeserOp, Registry};
use crate::ser::{BuiltinOp, SubgraphBuilder};
use crate::tflite::{
    ActivationFunctionType, AddOptions, AddOptionsArgs, BuiltinOperator, BuiltinOptions,
    DivOptions, DivOptionsArgs, MaximumMinimumOptions, MaximumMinimumOptionsArgs, MulOptions,
    MulOptionsArgs, SubOptions, SubOptionsArgs,
};
use tract_core::internal::*;
use tract_core::ops::binary::TypedBinOp;
use tract_core::ops::cast::wire_cast;
use tract_core::ops::change_axes::wire_rank_broadcast;
use tract_core::ops::logic::{self, comp_eq, comp_gt, comp_gte, comp_lt, comp_lte, comp_ne};

pub fn register_all(reg: &mut Registry) {
    reg.reg_to_tflite(ser_bin);

    reg.reg_to_tract(BuiltinOperator::ADD, deser_add);
    reg.reg_to_tract(BuiltinOperator::SUB, deser_sub);
    reg.reg_to_tract(BuiltinOperator::MUL, deser_mul);
    reg.reg_to_tract(BuiltinOperator::DIV, deser_div);
    reg.reg_to_tract(BuiltinOperator::MAXIMUM, |op| deser_bin(op, tract_core::ops::math::max()));
    reg.reg_to_tract(BuiltinOperator::MINIMUM, |op| deser_bin(op, tract_core::ops::math::min()));

    reg.reg_to_tract(BuiltinOperator::EQUAL, |op| deser_comp(op, comp_eq()));
    reg.reg_to_tract(BuiltinOperator::NOT_EQUAL, |op| deser_comp(op, comp_ne()));
    reg.reg_to_tract(BuiltinOperator::LESS, |op| deser_comp(op, comp_lt()));
    reg.reg_to_tract(BuiltinOperator::LESS_EQUAL, |op| deser_comp(op, comp_lte()));
    reg.reg_to_tract(BuiltinOperator::GREATER, |op| deser_comp(op, comp_gt()));
    reg.reg_to_tract(BuiltinOperator::GREATER_EQUAL, |op| deser_comp(op, comp_gte()));
    reg.reg_to_tract(BuiltinOperator::LOGICAL_OR, |op| deser_bin(op, logic::or()));
    reg.reg_to_tract(BuiltinOperator::LOGICAL_AND, |op| deser_bin(op, logic::and()));
}

fn wire_cast_and_rank_broadcast(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let wire = wire_cast(
        format!("{}.cast", op.prefix),
        op.ctx.target,
        op.inputs,
        DatumType::super_type_for(op.facts()?.iter().map(|f| f.datum_type))
            .context("No super type")?,
    )?;
    wire_rank_broadcast(op.prefix, op.ctx.target, &wire)
}

fn deser_bin(op: &mut DeserOp, mini: TypedBinOp) -> TractResult<TVec<OutletId>> {
    let wires = wire_cast_and_rank_broadcast(op)?;
    op.ctx.target.wire_node(op.prefix, mini, &wires)
}

fn deser_comp(
    op: &mut DeserOp,
    comp: Box<dyn tract_core::ops::binary::BinMiniOp>,
) -> TractResult<TVec<OutletId>> {
    let wires = wire_cast_and_rank_broadcast(op)?;
    op.ctx.target.wire_node(op.prefix, TypedBinOp(comp, None), &wires)
}

fn deser_add(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_add_options);
    let wires = wire_cast_and_rank_broadcast(op)?;
    let wires = op.ctx.target.wire_node(op.prefix, tract_core::ops::math::add(), &wires)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn deser_sub(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_sub_options);
    let wires = wire_cast_and_rank_broadcast(op)?;
    let wires = op.ctx.target.wire_node(op.prefix, tract_core::ops::math::sub(), &wires)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn deser_mul(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_mul_options);
    let wires = wire_cast_and_rank_broadcast(op)?;
    let wires = op.ctx.target.wire_node(op.prefix, tract_core::ops::math::mul(), &wires)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn deser_div(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let options = builtin!(op, builtin_options_as_div_options);
    let wires = wire_cast_and_rank_broadcast(op)?;
    let wires = op.ctx.target.wire_node(op.prefix, tract_core::ops::math::div(), &wires)?;
    wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn ser_bin(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &TypedBinOp,
) -> TractResult<()> {
    use tract_linalg::BinOp;
    let inputs = builder.map_outlets(model, &node.inputs)?;
    let outputs = builder.map_outlets(model, [OutletId::from(node.id)])?;

    macro_rules! ser_logic {
        ($tract:ty, $code:expr, $version:expr, $builtin:ident) => {
            if op.0.is::<$tract>() {
                return builder.write_op(
                    &inputs,
                    &outputs,
                    $code,
                    $version,
                    BuiltinOperator::$builtin,
                );
            }
        };
    }

    ser_logic!(logic::Or, 84, 1, LOGICAL_OR);
    ser_logic!(logic::And, 86, 1, LOGICAL_AND);

    if op.0.is::<tract_core::ops::math::Div>() {
        let options = DivOptions::create(
            builder.fb(),
            &DivOptionsArgs { fused_activation_function: ActivationFunctionType::NONE },
        );
        return builder.write_op_with_options(
            &inputs,
            &outputs,
            BuiltinOp::new(42, 1, BuiltinOperator::DIV, BuiltinOptions::DivOptions),
            options.as_union_value(),
        );
    }

    // Comparison ops
    match op.0.name() {
        "LT" => return builder.write_op(&inputs, &outputs, 58, 1, BuiltinOperator::LESS),
        "GT" => return builder.write_op(&inputs, &outputs, 61, 1, BuiltinOperator::GREATER),
        "GTE" => return builder.write_op(&inputs, &outputs, 62, 1, BuiltinOperator::GREATER_EQUAL),
        "LTE" => return builder.write_op(&inputs, &outputs, 63, 1, BuiltinOperator::LESS_EQUAL),
        "Eq" => return builder.write_op(&inputs, &outputs, 71, 1, BuiltinOperator::EQUAL),
        "NE" => return builder.write_op(&inputs, &outputs, 72, 1, BuiltinOperator::NOT_EQUAL),
        _ => {}
    }

    match op.0.as_linalg_binop().with_context(|| "Missing implementation for binary")? {
        BinOp::Add => {
            let options = AddOptions::create(
                builder.fb(),
                &AddOptionsArgs {
                    fused_activation_function: ActivationFunctionType::NONE,
                    pot_scale_int16: false,
                },
            );
            builder.write_op_with_options(
                &inputs,
                &outputs,
                BuiltinOp::new(0, 1, BuiltinOperator::ADD, BuiltinOptions::AddOptions),
                options.as_union_value(),
            )
        }
        BinOp::Sub => {
            let options = SubOptions::create(
                builder.fb(),
                &SubOptionsArgs {
                    fused_activation_function: ActivationFunctionType::NONE,
                    pot_scale_int16: false,
                },
            );
            builder.write_op_with_options(
                &inputs,
                &outputs,
                BuiltinOp::new(41, 1, BuiltinOperator::SUB, BuiltinOptions::SubOptions),
                options.as_union_value(),
            )
        }
        BinOp::Mul => {
            let options = MulOptions::create(
                builder.fb(),
                &MulOptionsArgs { fused_activation_function: ActivationFunctionType::NONE },
            );
            builder.write_op_with_options(
                &inputs,
                &outputs,
                BuiltinOp::new(18, 1, BuiltinOperator::MUL, BuiltinOptions::MulOptions),
                options.as_union_value(),
            )
        }
        BinOp::Max => {
            let options =
                MaximumMinimumOptions::create(builder.fb(), &MaximumMinimumOptionsArgs {});
            builder.write_op_with_options(
                &inputs,
                &outputs,
                BuiltinOp::new(
                    55,
                    1,
                    BuiltinOperator::MAXIMUM,
                    BuiltinOptions::MaximumMinimumOptions,
                ),
                options.as_union_value(),
            )
        }
        BinOp::Min => {
            let options =
                MaximumMinimumOptions::create(builder.fb(), &MaximumMinimumOptionsArgs {});
            builder.write_op_with_options(
                &inputs,
                &outputs,
                BuiltinOp::new(
                    57,
                    1,
                    BuiltinOperator::MINIMUM,
                    BuiltinOptions::MaximumMinimumOptions,
                ),
                options.as_union_value(),
            )
        }
        it => todo!("Missing iplementation for binary {it:?} serialization"),
    }
}


================================================
FILE: tflite/src/ops/mod.rs
================================================
use tract_core::internal::*;
use tract_core::ops::change_axes::wire_with_rank_broadcast;
use tract_core::ops::logic::Iff;
use tract_core::prelude::tract_itertools::Itertools;

use crate::registry::{DeserContext, DeserOp, Registry};
use crate::ser::SubgraphBuilder;
use crate::tflite::{ActivationFunctionType, BuiltinOperator};

// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/core/c/builtin_op_data.h

macro_rules! builtin {
    ($op: expr, $id:ident) => {
        $op.flat.$id().with_context(|| {
            format!(
                "Wrong option type {:?} for operator {:?}",
                $op.flat.builtin_options_type(),
                $op.flat
            )
        })?
    };
}

mod array;
mod cnn;
mod element_wise;
mod math;
mod nn;

pub fn register_all(reg: &mut Registry) {
    array::register_all(reg);
    cnn::register_all(reg);
    element_wise::register_all(reg);
    math::register_all(reg);
    nn::register_all(reg);
    reg.reg_to_tflite(ser_iff);
    reg.reg_to_tract(BuiltinOperator::SELECT, de_iff);
    reg.reg_to_tract(BuiltinOperator::SELECT_V2, de_iff);
}

fn wire_fused_activation(
    op: &mut DeserOp,
    wires: &[OutletId],
    activation: &ActivationFunctionType,
) -> TractResult<TVec<OutletId>> {
    let prefix = format!("{}.fused", op.prefix);
    let mut op = DeserOp {
        ctx: DeserContext { model: op.ctx.model, subgraph: op.ctx.subgraph, target: op.ctx.target },
        prefix: &prefix,
        flat: op.flat,
        inputs: wires,
        output_facts: op.output_facts,
    };
    match *activation {
        ActivationFunctionType::NONE => Ok(wires.into()),
        ActivationFunctionType::RELU => nn::de_relu(&mut op),
        ActivationFunctionType::RELU6 => nn::de_relu6(&mut op),
        af => bail!("Unsupported fused activation type: {af:?}"),
    }
}

fn linearops_quantization_suport(
    op: &mut DeserOp,
    input: &TypedFact,
    inputs: &mut TVec<OutletId>,
) -> TractResult<Option<DatumType>> {
    if op.output_facts[0].datum_type.is_quantized() {
        let p = &op.prefix;
        let iqp = input.datum_type.qparams().unwrap();
        let oqp = op.output_facts[0].datum_type;
        let k_input = op.flat.inputs().unwrap().get(1);
        let k_tensor = op.ctx.subgraph.tensors().unwrap().get(k_input as usize);
        let k_qp = k_tensor.quantization().unwrap();
        let k_scale = if k_qp.scale().unwrap().len() > 1 {
            rctensor1(&k_qp.scale().unwrap().iter().collect_vec())
        } else {
            rctensor0(k_qp.scale().unwrap().get(0))
        };
        let k_zp = k_qp.zero_point().unwrap().iter().map(|i| i as i32).collect_vec();
        let k_zp = if k_zp.iter().all_equal() { tensor0(k_zp[0]) } else { tensor1(&k_zp) };
        inputs.push(op.ctx.target.add_const(format!("{p}.i0"), rctensor0(iqp.zp_scale().0))?);
        inputs.push(op.ctx.target.add_const(format!("{p}.iscale"), rctensor0(iqp.zp_scale().1))?);
        inputs.push(op.ctx.target.add_const(format!("{p}.k0"), k_zp.into_arc_tensor())?);
        inputs.push(op.ctx.target.add_const(format!("{p}.kscale"), k_scale)?);
        inputs.push(op.ctx.target.add_const(format!("{p}.c0"), rctensor0(oqp.zp_scale().0))?);
        inputs.push(op.ctx.target.add_const(format!("{p}.cscale"), rctensor0(oqp.zp_scale().1))?);
        Ok(Some(oqp))
    } else {
        Ok(None)
    }
}

fn ser_iff(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    _op: &Iff,
) -> TractResult<()> {
    let inputs = builder.map_outlets(model, &node.inputs)?;
    let outputs = builder.map_outlets(model, [OutletId::new(node.id, 0)])?;
    builder.write_op(&inputs, &outputs, 123, 1, BuiltinOperator::SELECT_V2)
}

fn de_iff(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    wire_with_rank_broadcast(op.prefix, op.ctx.target, Iff, op.inputs)
}


================================================
FILE: tflite/src/ops/nn.rs
================================================
use tract_core::internal::*;
use tract_core::ops as core;
use tract_core::ops::cast::wire_cast;
use tract_core::ops::cast::Cast;
use tract_core::ops::change_axes::wire_with_rank_broadcast;
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_core::ops::einsum::EinSum;
use tract_core::ops::math::add;
use tract_core::ops::nn::Softmax;
use tract_core::ops::nn::{Reduce, Reducer};
use tract_core::prelude::tract_itertools::Itertools;

use crate::registry::{DeserOp, Registry};
use crate::ser::BuiltinOp;
use crate::ser::SubgraphBuilder;
use crate::tflite::ArgMaxOptions;
use crate::tflite::ArgMaxOptionsArgs;
use crate::tflite::BatchMatMulOptions;
use crate::tflite::BatchMatMulOptionsArgs;
use crate::tflite::BuiltinOptions;
use crate::tflite::ExpandDimsOptions;
use crate::tflite::ExpandDimsOptionsArgs;
use crate::tflite::ReducerOptions;
use crate::tflite::ReducerOptionsArgs;
use crate::tflite::SoftmaxOptions;
use crate::tflite::SoftmaxOptionsArgs;
use crate::tflite::TensorType;
use crate::tflite::{BuiltinOperator, FullyConnectedOptionsWeightsFormat};

pub fn register_all(reg: &mut Registry) {
    reg.reg_to_tflite(ser_matmul);
    reg.reg_to_tract(BuiltinOperator::BATCH_MATMUL, de_batch_matmul);

    reg.reg_to_tract(BuiltinOperator::FULLY_CONNECTED, de_fully_connected);
    reg.reg_to_tract(BuiltinOperator::MEAN, de_reduce_mean);
    reg.reg_to_tflite(ser_softmax);
    reg.reg_to_tract(BuiltinOperator::SOFTMAX, de_softmax);

    reg.reg_to_tract(BuiltinOperator::RELU, de_relu);
    reg.reg_to_tract(BuiltinOperator::RELU6, de_relu6);

    reg.reg_to_tflite(ser_reduce);
    reg.reg_to_tract(BuiltinOperator::REDUCE_MAX, |op| de_reduce(op, Reducer::Max));
    reg.reg_to_tract(BuiltinOperator::REDUCE_MIN, |op| de_reduce(op, Reducer::Min));
    reg.reg_to_tract(BuiltinOperator::SUM, |op| de_reduce(op, Reducer::Sum));
    reg.reg_to_tract(BuiltinOperator::REDUCE_PROD, |op| de_reduce(op, Reducer::Prod));
}

fn de_batch_matmul(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (a, b) = args_2!(op.facts()?);
    let options = builtin!(op, builtin_options_as_batch_mat_mul_options);
    ensure!(a.datum_type.is_float());
    ensure!(!options.asymmetric_quantize_inputs());
    ensure!(a.rank() == b.rank());
    let rank = a.rank();
    let mut axes = tvec!(
        Axis::new('M', 2, 1).input(0, rank - 2 + options.adj_x() as usize).output(0, rank - 2),
        Axis::new('N', 2, 1).input(1, rank - 1 - options.adj_y() as usize).output(0, rank - 1),
        Axis::new('K', 2, 1)
            .input(0, rank - 1 - options.adj_x() as usize)
            .input(1, rank - 2 + options.adj_y() as usize)
    );
    for (ix, repr) in ('a'..).take(rank - 2).enumerate() {
        axes.push(Axis::new(repr, 2, 1).input(0, ix).input(1, ix).output(0, ix));
    }
    let axes: AxesMapping = AxesMapping::new(2, 1, axes)?;
    let einsum = EinSum { axes, q_params: None, operating_dt: a.datum_type };
    op.ctx.target.wire_node(op.prefix, einsum, op.inputs)
}

fn de_fully_connected(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, weights, bias) = args_3!(op.facts()?);
    let options = builtin!(op, builtin_options_as_fully_connected_options);
    ensure!(options.weights_format() == FullyConnectedOptionsWeightsFormat::DEFAULT);
    ensure!(!options.asymmetric_quantize_inputs());
    ensure!(input.rank() == 2);
    ensure!(weights.rank() == 2);
    ensure!(bias.rank() == 1);
    let mut inputs: TVec<OutletId> = op.inputs.into();
    let wires = if input.datum_type.is_float() {
        let axes = "BI,OI->BO".parse()?;
        let einsum = EinSum { axes, q_params: None, operating_dt: input.datum_type };
        let mut wires = op.ctx.target.wire_node(op.prefix, einsum, &inputs[0..2])?;
        if inputs.len() == 3 {
            let bias = op.ctx.target.wire_node(
                format!("{}.bias_rank", op.prefix),
                AxisOp::Add(0),
                &inputs[2..3],
            )?;
            wires = op.ctx.target.wire_node(
                format!("{}.bias", op.prefix),
                add(),
                &[wires[0], bias[0]],
            )?;
        }
        wires
    } else {
        let qp = super::linearops_quantization_suport(op, &input, &mut inputs)?;
        let axes = "BI,OI,O,,,,,,->BO".parse()?;
        let einsum = EinSum { axes, q_params: qp, operating_dt: i32::datum_type() };
        op.ctx.target.wire_node(op.prefix, einsum, &inputs)?
    };
    super::wire_fused_activation(op, &wires, &options.fused_activation_function())
}

fn de_reduce(op: &mut DeserOp, reducer: Reducer) -> TractResult<TVec<OutletId>> {
    let (_, axes) = args_2!(op.facts()?);
    let options = builtin!(op, builtin_options_as_reducer_options);
    let axes: TVec<usize> = axes
        .konst
        .as_ref()
        .unwrap()
        .try_as_plain()?
        .as_slice::<i32>()?
        .iter()
        .map(|d| *d as usize)
        .sorted()
        .collect();
    let p = &op.prefix;
    let mut wire = op.ctx.target.wire_node(
        format!("{p}.reduce"),
        core::nn::Reduce::new(axes.clone(), reducer),
        &[op.inputs[0]],
    )?;
    if !options.keep_dims() {
        for axis in axes.iter().rev() {
            wire =
                op.ctx.target.wire_node(format!("{p}.rm_axis_{axis}"), AxisOp::Rm(*axis), &wire)?;
        }
    }
    Ok(wire)
}

fn de_reduce_mean(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let (input, axes) = args_2!(op.facts()?);
    let axes: TVec<usize> = axes
        .konst
        .as_ref()
        .unwrap()
        .try_as_plain()?
        .as_slice::<i32>()?
        .iter()
        .map(|d| *d as usize)
        .sorted()
        .collect();
    let norm: TDim = axes.iter().map(|d| &input.shape[*d]).product();
    let wire = de_reduce(op, Reducer::Sum)?;
    let p = &op.prefix;
    let norm = op.ctx.target.add_const(format!("{p}.card"), tensor0(norm))?;
    let norm = op.ctx.target.wire_node(
        format!("{p}.as_float"),
        Cast { to: f32::datum_type() },
        &[norm],
    )?;
    let norm = op.ctx.target.wire_node(format!("{p}.recip"), core::math::recip(), &norm)?;
    wire_with_rank_broadcast(op.prefix, op.ctx.target, core::quant::scale(), &[norm[0], wire[0]])
}

fn de_softmax(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let input = args_1!(op.facts()?);
    let options = builtin!(op, builtin_options_as_softmax_options);
    ensure!(options.beta() == 1.0);
    let quant_output_dt = Some(input.datum_type).filter(|dt| !dt.is_float());
    let softmax = Softmax { axes: tvec!(input.rank() - 1), quant_output_dt, ..Softmax::default() };
    op.ctx.target.wire_node(op.prefix, softmax, op.inputs)
}

pub fn de_relu(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let input = op.inputs[0];
    let zero = op.ctx.target.add_const(format!("{}.zero", op.prefix), tensor0(0f32))?;
    let wires = wire_cast(
        op.prefix,
        op.ctx.target,
        &[input, zero],
        op.ctx.target.outlet_fact(input)?.datum_type,
    )?;
    wire_with_rank_broadcast(
        format!("{}.relu", op.prefix),
        op.ctx.target,
        core::math::max(),
        &wires,
    )
}

pub fn de_relu6(op: &mut DeserOp) -> TractResult<TVec<OutletId>> {
    let input = de_relu(op)?[0];
    let six = op.ctx.target.add_const(format!("{}.six", op.prefix), tensor0(6f32))?;
    let wires = wire_cast(
        op.prefix,
        op.ctx.target,
        &[input, six],
        op.ctx.target.outlet_fact(input)?.datum_type,
    )?;
    wire_with_rank_broadcast(
        format!("{}.relu6", op.prefix),
        op.ctx.target,
        core::math::min(),
        &wires,
    )
}

fn ser_matmul(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &PrefixMatMul,
) -> TractResult<()> {
    let mut inputs =
        [builder.map_outlet(model, node.inputs[0])?, builder.map_outlet(model, node.inputs[1])?];
    let (adj_x, adj_y) = if op.transpose_c {
        inputs.swap(0, 1);
        (!op.transpose_b, !op.transpose_a)
    } else {
        (op.transpose_a, op.transpose_b)
    };
    let output = builder.map_outlets(model, [OutletId::from(node.id)])?;
    let options = BatchMatMulOptions::create(
        builder.fb(),
        &BatchMatMulOptionsArgs { adj_x, adj_y, asymmetric_quantize_inputs: false },
    );
    builder.write_op_with_options(
        &inputs,
        &output,
        BuiltinOp::new(126, 1, BuiltinOperator::BATCH_MATMUL, BuiltinOptions::BatchMatMulOptions),
        options.as_union_value(),
    )?;
    Ok(())
}

fn ser_reduce(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &Reduce,
) -> TractResult<()> {
    let axes = builder.write_fact(
        format!("{}.axes", node.name),
        TypedFact::try_from(tensor1(&op.axes.iter().map(|axis| *axis as i32).collect_vec()))?,
    )?;
    let inputs = [builder.map_outlet(model, node.inputs[0])?, axes];
    let output = builder.map_outlets(model, [OutletId::from(node.id)])?;
    if matches!(op.reducer, Reducer::ArgMin(_) | Reducer::ArgMax(_)) {
        let mut intermediate_shape = model.outlet_fact(node.inputs[0])?.shape.to_vec();
        for axis in op.axes.iter().sorted().rev() {
            intermediate_shape.remove(*axis);
        }
        let intermediate_fact = i32::fact(intermediate_shape);
        let intermediate_tensor =
            builder.write_fact(format!("{}.removed_axes", node.name), intermediate_fact)?;
        let options = ArgMaxOptions::create(
            builder.fb(),
            &ArgMaxOptionsArgs { output_type: TensorType::INT64 },
        );
        builder.write_op_with_options(
            &inputs,
            &[intermediate_tensor],
            BuiltinOp::new(56, 1, BuiltinOperator::ARG_MAX, BuiltinOptions::ArgMaxOptions),
            options.as_union_value(),
        )?;
        let expand_dim_options = ExpandDimsOptions::create(builder.fb(), &ExpandDimsOptionsArgs {});
        builder.write_op_with_options(
            &[intermediate_tensor, axes],
            &output,
            BuiltinOp::new(70, 1, BuiltinOperator::EXPAND_DIMS, BuiltinOptions::ExpandDimsOptions),
            expand_dim_options.as_union_value(),
        )?;
        Ok(())
    } else {
        let options = ReducerOptions::create(builder.fb(), &ReducerOptionsArgs { keep_dims: true });
        ensure!(model.outlet_fact(node.inputs[0])?.datum_type != f64::datum_type());
        match op.reducer {
            Reducer::Max => builder.write_op_with_options(
                &inputs,
                &output,
                BuiltinOp::new(82, 1, BuiltinOperator::REDUCE_MAX, BuiltinOptions::ReducerOptions),
                options.as_union_value(),
            ),
            Reducer::Min => builder.write_op_with_options(
                &inputs,
                &output,
                BuiltinOp::new(89, 1, BuiltinOperator::REDUCE_MIN, BuiltinOptions::ReducerOptions),
                options.as_union_value(),
            ),
            Reducer::Prod => builder.write_op_with_options(
                &inputs,
                &output,
                BuiltinOp::new(81, 1, BuiltinOperator::REDUCE_PROD, BuiltinOptions::ReducerOptions),
                options.as_union_value(),
            ),
            Reducer::Sum => builder.write_op_with_options(
                &inputs,
                &output,
                BuiltinOp::new(74, 1, BuiltinOperator::SUM, BuiltinOptions::ReducerOptions),
                options.as_union_value(),
            ),
            Reducer::All | Reducer::Any => todo!(),
            Reducer::ArgMin(_) | Reducer::ArgMax(_) | Reducer::MeanOfSquares => unreachable!(),
        }
    }
}

fn ser_softmax(
    builder: &mut SubgraphBuilder,
    model: &TypedModel,
    node: &TypedNode,
    op: &Softmax,
) -> TractResult<()> {
    let rank = model.outlet_fact(node.inputs[0])?.rank();
    let input = builder.map_outlet(model, node.inputs[0])?;
    let output = builder.map_outlet(model, node.id.into())?;
    ensure!(&*op.axes == &[rank - 1]);
    let options = SoftmaxOptions::create(builder.fb(), &SoftmaxOptionsArgs { beta: 1f32 });
    builder.write_op_with_options(
        &[input],
        &[output],
        BuiltinOp::new(25, 1, BuiltinOperator::SOFTMAX, BuiltinOptions::SoftmaxOptions),
        options.as_union_value(),
    )
}


================================================
FILE: tflite/src/registry.rs
================================================
use std::any::TypeId;

use tract_core::internal::*;

use crate::ser::SubgraphBuilder;
use crate::tflite::{BuiltinOperator, Model, Operator, SubGraph};

pub type ToTract = Box<dyn Fn(&mut DeserOp) -> TractResult<TVec<OutletId>> + Send + Sync + 'static>;
pub type ToTflite<T> = fn(&mut SubgraphBuilder, &TypedModel, &TypedNode, &T) -> TractResult<()>;
pub type ToTfliteRaw = Box<
    dyn Fn(&mut SubgraphBuilder, &TypedModel, &TypedNode) -> TractResult<()>
        + Send
        + Sync
        + 'static,
>;

#[derive(Default)]
pub struct Registry {
    pub to_tract: HashMap<i32, ToTract>,
    pub to_tflite: HashMap<TypeId, ToTfliteRaw>,
}

pub struct DeserContext<'ctx> {
    pub model: &'ctx Model<'ctx>,
    pub subgraph: &'ctx SubGraph<'ctx>,
    pub target: &'ctx mut TypedModel,
}

pub struct DeserOp<'op> {
    pub ctx: DeserContext<'op>,
    pub prefix: &'op str,
    pub flat: &'op Operator<'op>,
    pub inputs: &'op [OutletId],
    pub output_facts: &'op [TypedFact],
}

impl DeserOp<'_> {
    pub fn facts(&self) -> TractResult<TVec<TypedFact>> {
        self.inputs
            .iter()
            .map(|o| self.ctx.target.outlet_fact(*o).cloned())
            .collect::<TractResult<TVec<_>>>()
    }
}

impl Registry {
    pub fn reg_to_tflite<T: Op>(&mut self, tflite: ToTflite<T>) {
        self.to_tflite.insert(
            std::any::TypeId::of::<T>(),
            Box::new(move |b, m, n| tflite(b, m, n, n.op_as::<T>().unwrap())),
        );
    }

    pub fn reg_to_tract<T>(&mut self, op: BuiltinOperator, to: T)
    where
        T: Fn(&mut DeserOp) -> TractResult<TVec<OutletId>> + Send + Sync + 'static,
    {
        self.to_tract.insert(op.0, Box::new(to));
    }

    pub fn deser_op(
        &self,
        model: &Model,
        subgraph: &SubGraph,
        flat_op: &Operator,
        target: &mut TypedModel,
        mapping: &mut HashMap<i32, OutletId>,
    ) -> TractResult<()> {
        let inputs: TVec<OutletId> =
            flat_op.inputs().unwrap().iter().map(|o| mapping[&o]).collect();
        let tensors = subgraph.tensors().unwrap();
        let prefix = tensors.get(flat_op.outputs().unwrap().get(0) as usize).name().unwrap();
        let opcode_index = flat_op.opcode_index();
        let operator_code = model.operator_codes().unwrap().get(opcode_index as _);
        let opcode = if operator_code.deprecated_builtin_code() as i32
            == BuiltinOperator::PLACEHOLDER_FOR_GREATER_OP_CODES.0
        {
            operator_code.builtin_code().0
        } else {
            operator_code.deprecated_builtin_code() as i32
        };
        let ctx = DeserContext { model, subgraph, target };
        let results = if let Some(op) = self.to_tract.get(&opcode) {
            let output_facts = flat_op
                .outputs()
                .unwrap()
                .iter()
                .map(|t| Ok(crate::tensors::flat_tensor_to_tract_fact(model, subgraph, t)?.0))
                .collect::<TractResult<TVec<TypedFact>>>()?;
            (op)(&mut DeserOp {
                ctx,
                prefix,
                flat: flat_op,
                inputs: &inputs,
                output_facts: &output_facts,
            })
            .with_context(|| format!("Opcode is {operator_code:#?}"))?
        } else {
            let facts =
                inputs.iter().map(|o| target.outlet_fact(*o)).collect::<TractResult<TVec<_>>>()?;
            bail!("Unsupported: {operator_code:#?}, inputs: {facts:#?}")
        };
        for (flat, wire) in flat_op.outputs().unwrap().iter().zip(results.iter()) {
            mapping.insert(flat, *wire);
        }
        Ok(())
    }
}


================================================
FILE: tflite/src/rewriter.rs
================================================
use tract_core::internal::*;
use tract_core::ops::array::{Pad, PadMode};
use tract_core::ops::cnn::{rewrite_conv_with_n_axis, KernelFormat, MaxPool, PoolSpec, SumPool};
use tract_core::ops::cnn::{Conv, PaddingSpec};
use tract_core::ops::einsum::prefix_matmul::PrefixMatMul;
use tract_core::ops::element_wise::ElementWiseOp;
use tract_core::ops::math::Recip;
use tract_core::ops::nn::{expand_mean_of_squares, DataFormat, Softmax};
use tract_core::tract_data::itertools::Itertools;

pub fn rewrite_for_tflite(model: &mut TypedModel) -> TractResult<()> {
    tract_core::ops::einsum::prefix_matmul::rewrite_einsum_to_prefix_matmul(model, true)?;
    Rewriter::default()
        .with_rule_for("trivial_axes_around_matmul", trivial_axes_around_matmul)
        .with_rule_for("kernel_in_ohwi", kernel_in_ohwi)
        .with_rule_for("bias_as_vector", bias_as_vector)
        //        .with_rule_for("per_layer_in_u8", per_layer_in_u8)
        .with_rule_for("make_1d_2d", make_1d_2d)
        .with_rule_for("rewrite_conv_with_n_axis", rewrite_conv_with_n_axis)
        .with_rule_for("conv-nchw-to-nhwc", conv_nchw_to_nhwc)
        .with_rule_for("maxpool-nchw-to-nhwc", maxpool_nchw_to_nhwc)
        .with_rule_for("sumpool-nchw-to-nhwc", sumpool_nchw_to_nhwc)
        .with_rule_for("padding", padding)
        .with_rule_for("manual_recip", manual_recip)
        .with_rule_for("softmax_on_last_axis", softmax_on_last_axis)
        .with_rule_for("expand-means-of-square", expand_mean_of_squares)
        .rewrite(&(), model)?;
    tract_core::optim::Optimizer::prop_consts().optimize(model)
}

fn trivial_axes_around_matmul(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &PrefixMatMul,
) -> TractResult<Option<TypedModelPatch>> {
    let facts = model.node_input_facts(node.id)?;
    let rank = facts[0].rank();
    if rank <= 4 {
        return Ok(None);
    }
    let trivial_axes = (0..rank - 2)
        .filter(|axis| facts[0].shape[*axis].is_one() && facts[1].shape[*axis].is_one())
        .collect_vec();

    ensure!(!trivial_axes.is_empty(), "Found Einsum with 4 > axes and no trivial axes");
    let mut patch = TypedModelPatch::default();
    let mut wire = patch.taps(model, &node.inputs)?;
    for axis in trivial_axes.iter().rev() {
        wire[0] =
            patch.wire_node(format!("{name}.rm_a_axis_{axis}"), AxisOp::Rm(*axis), &[wire[0]])?[0];
        wire[1] =
            patch.wire_node(format!("{name}.rm_b_axis_{axis}"), AxisOp::Rm(*axis), &[wire[1]])?[0];
    }
    let mut out = patch.wire_node(&node.name, *conv, &wire)?;
    for axis in trivial_axes {
        out = patch.wire_node(format!("{name}.add_axis_{axis}"), AxisOp::Add(axis), &out)?;
    }
    patch.shunt_outside(model, node.id.into(), out[0])?;
    Ok(Some(patch))
}

fn kernel_in_ohwi(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    if conv.kernel_fmt == KernelFormat::OHWI {
        return Ok(None);
    }
    if conv.group != 1 && conv.group != conv.output_channels() {
        bail!("Arbitrary grouping is not supported in tflite")
    }
    let mut patch = TypedModelPatch::default();
    let mut wire = patch.taps(model, &node.inputs)?;
    let prefix = format!("{name}.kernel_reorg");
    for (ix, op) in conv
        .kernel_fmt
        .kernel_as_group_o_i_h_w_ops(&patch.outlet_fact(wire[1])?.shape, conv.group)
        .into_iter()
        .enumerate()
    {
        wire[1] = patch.wire_node(format!("{prefix}.{ix}"), op, &[wire[1]])?[0];
    }
    let geo_rank = conv.pool_spec.kernel_shape.len();
    // group_o_i_h_w -> o_h_w_gi
    let ci = conv.input_channels();
    wire[1] =
        patch.wire_node(format!("{prefix}.mv_g"), AxisOp::Move(0, geo_rank + 2), &[wire[1]])?[0];
    wire[1] =
        patch.wire_node(format!("{prefix}.mv_i"), AxisOp::Move(1, geo_rank + 2), &[wire[1]])?[0];
    wire[1] = patch.wire_node(
        format!("{prefix}.gi"),
        AxisOp::Reshape(
            geo_rank + 1,
            tvec!(conv.group.to_dim(), (ci / conv.group).to_dim()),
            tvec!(ci.to_dim()),
        ),
        &[wire[1]],
    )?[0];
    let new = Conv { kernel_fmt: KernelFormat::OHWI, ..conv.clone() };
    wire = patch.wire_node(name, new, &wire)?;
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(Some(patch))
}

fn bias_as_vector(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    let bias_fact = model.outlet_fact(node.inputs[2])?;
    let co = conv.output_channels();
    if *bias_fact.shape == [co.to_dim()] {
        return Ok(None);
    }
    let mut patch = TypedModelPatch::default();
    let mut wire = patch.taps(model, &node.inputs)?;
    wire[2] = tract_core::ops::cnn::wire_reshape_bias_as_vector(
        &mut patch,
        name,
        wire[2],
        conv.output_channels(),
    )?[0];
    wire = patch.wire_node(name, conv.clone(), &wire)?;
    patch.shunt_outside(model, node.id.into(), wire[0])?;
    Ok(Some(patch))
}

/*
fn per_layer_in_u8(
_ctx: &(),
model: &TypedModel,
node: &TypedNode,
name: &str,
conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
let input_fact = model.outlet_fact(node.inputs[0])?;
let idt = input_fact.datum_type;
let kernel_fact = model.outlet_fact(node.inputs[1])?;
let kdt = kernel_fact.datum_type;
if idt.is_float() || model.outlet_fact(node.inputs[6])?.shape.len() > 1 {
return Ok(None);
}
if idt.unquantized() == u8::datum_type() && kdt.unquantized() == u8::datum_type() {
return Ok(None);
}
let mut patch = TypedModelPatch::default();
let wire = patch.taps(model, &node.inputs)?;
let [mut i, mut k, b, mut i0, is, mut k0, ks, o0, os] = &*wire else {
bail!("Unexpected number of inputs")
};
wire_ensure_q8_flavour(&mut patch, name, &mut i, "input", &mut i0, DatumType::U8)?;
wire_ensure_q8_flavour(&mut patch, name, &mut k, "kernel", &mut k0, DatumType::U8)?;
let output = patch.wire_node(name, conv.clone(), &[i, k, *b, i0, *is, k0, *ks, *o0, *os])?;
patch.shunt_outside(model, node.id.into(), output[0])?;
Ok(Some(patch))
}
*/

fn make_1d_2d(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    if conv.pool_spec.rank() == 1 {
        let mut new = conv.clone();
        new.pool_spec = conv.pool_spec.change_geo_axes(&AxisOp::Add(1))?;
        let mut patch = TypedModelPatch::default();
        let mut wire = patch.taps(model, &node.inputs)?;
        let pos_data = conv.pool_spec.data_format.h_axis() + 1;
        wire[0] = patch.wire_node(format!("{name}.add_dim"), AxisOp::Add(pos_data), &[wire[0]])?[0];
        let pos_kernel = conv.kernel_fmt.h_axis() + 1;
        wire[1] =
            patch.wire_node(format!("{name}.add_dim_k"), AxisOp::Add(pos_kernel), &[wire[1]])?[0];
        wire = patch.wire_node(name, new, &wire)?;
        wire = patch.wire_node(format!("{name}.rm_dim"), AxisOp::Rm(pos_data), &wire)?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}

fn conv_nchw_to_nhwc(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    nchw_to_nhwc(_ctx, model, node, name, &conv.pool_spec, &|pool_spec| {
        Box::new(Conv { pool_spec, ..conv.clone() })
    })
}

fn maxpool_nchw_to_nhwc(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    op: &MaxPool,
) -> TractResult<Option<TypedModelPatch>> {
    nchw_to_nhwc(_ctx, model, node, name, &op.pool_spec, &|pool_spec| {
        Box::new(MaxPool { pool_spec, ..op.clone() })
    })
}

fn sumpool_nchw_to_nhwc(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    op: &SumPool,
) -> TractResult<Option<TypedModelPatch>> {
    nchw_to_nhwc(_ctx, model, node, name, &op.pool_spec, &|pool_spec| {
        Box::new(SumPool { pool_spec, ..op.clone() })
    })
}

fn nchw_to_nhwc(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    old: &PoolSpec,
    op: &dyn Fn(PoolSpec) -> Box<dyn TypedOp>,
) -> TractResult<Option<TypedModelPatch>> {
    if !old.data_format.c_is_last() {
        let mut new = old.clone();
        new.data_format = match new.data_format {
            DataFormat::NHWC | DataFormat::HWC => unreachable!(),
            DataFormat::CHW => DataFormat::HWC,
            DataFormat::NCHW => DataFormat::NHWC,
        };
        let mut patch = TypedModelPatch::default();
        let fact = model.outlet_fact(node.inputs[0])?;
        let shape = old.data_format.shape(&fact.shape)?;
        let before = shape.c_axis();
        let after = fact.rank() - 1;
        let mut wire = patch.taps(model, &node.inputs)?;
        wire[0] =
            patch.wire_node(format!("{name}.nhwc"), AxisOp::Move(before, after), &[wire[0]])?[0];
        wire = patch.wire_node(name, op(new), &wire)?;
        wire = patch.wire_node(format!("{name}.nchw"), AxisOp::Move(after, before), &wire)?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}

fn padding(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    conv: &Conv,
) -> TractResult<Option<TypedModelPatch>> {
    if conv.pool_spec.padding != PaddingSpec::Valid
    // FIXME SameUpper should be usable, but I can't make sense of tflite output
    // && conv.pool_spec.padding != PaddingSpec::SameUpper
    {
        let fact = model.outlet_fact(node.inputs[0])?;
        let shape = conv.pool_spec.data_format.shape(&fact.shape)?;
        let actual = conv.pool_spec.computed_padding(shape.hw_dims());
        #[allow(clippy::single_element_loop)]
        for pad in [PaddingSpec::Valid /*, PaddingSpec::SameUpper*/] {
            let found = pad.compute(
                shape.hw_dims(),
                &conv.pool_spec.kernel_shape,
                &conv.pool_spec.dilations(),
                &conv.pool_spec.strides(),
            );
            if actual == found {
                let mut new = conv.clone();
                new.pool_spec.padding = pad;
                return Ok(Some(TypedModelPatch::replace_single_op(
                    model,
                    node,
                    &node.inputs,
                    new,
                )?));
            }
        }
        let mut patch = TypedModelPatch::default();
        let mut wires = patch.taps(model, &node.inputs)?;
        let mut pads = vec![(0usize, 0usize); fact.rank()];
        for (padding, axis) in actual.iter().zip(shape.hw_axes()) {
            pads[axis] = (padding.pad_before.to_usize()?, padding.pad_after.to_usize()?);
        }
        wires[0] = patch.wire_node(
            format!("{name}.padding"),
            Pad {
                pads,
                mode: PadMode::Constant(Tensor::zero_scalar_dt(fact.datum_type)?.into_arc_tensor()),
            },
            &wires[0..1],
        )?[0];
        let mut new = conv.clone();
        new.pool_spec.padding = PaddingSpec::Valid;
        wires = patch.wire_node(&node.name, new, &wires)?;
        patch.shunt_outside(model, node.id.into(), wires[0])?;
        return Ok(Some(patch));
    }
    Ok(None)
}

fn manual_recip(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    recip: &ElementWiseOp,
) -> TractResult<Option<TypedModelPatch>> {
    if recip.0.is::<Recip>() {
        let mut patch = TypedModelPatch::default();
        let input = patch.tap_model(model, node.inputs[0])?;
        let dt = model.outlet_fact(node.inputs[0])?.datum_type;
        let one = tensor0(1i32).cast_to_dt(dt)?.into_owned().into_tensor();
        let one = patch.add_const(format!("{name}.one"), one)?;
        let wire = wire_with_rank_broadcast(
            name,
            &mut patch,
            tract_core::ops::math::div(),
            &[one, input],
        )?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        Ok(Some(patch))
    } else {
        Ok(None)
    }
}

fn softmax_on_last_axis(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    name: &str,
    softmax: &Softmax,
) -> TractResult<Option<TypedModelPatch>> {
    let rank = model.outlet_fact(node.inputs[0])?.rank();
    ensure!(softmax.axes.len() == 1);
    if softmax.axes[0] != rank - 1 {
        let mut patch = TypedModelPatch::default();
        let mut wire = tvec!(patch.tap_model(model, node.inputs[0])?);
        wire = patch.wire_node(
            format!("{name}.move_axis"),
            AxisOp::Move(softmax.axes[0], rank - 1),
            &wire,
        )?;
        wire = patch.wire_node(
            format!("{name}.softmax"),
            Softmax { axes: tvec!(rank - 1), ..*softmax },
            &wire,
        )?;
        wire = patch.wire_node(
            format!("{name}.move_axis_back"),
            AxisOp::Move(rank - 1, softmax.axes[0]),
            &wire,
        )?;
        patch.shunt_outside(model, node.id.into(), wire[0])?;
        Ok(Some(patch))
    } else {
        Ok(None)
    }
}


================================================
FILE: tflite/src/ser.rs
================================================
use std::borrow::Borrow;

use tract_core::internal::*;
use tract_core::ops::konst::Const;
use tract_core::ops::source::TypedSource;
use tract_core::prelude::tract_itertools::Itertools;

use crate::registry::Registry;
use crate::tflite::{
    Buffer, BufferArgs, BuiltinOperator, BuiltinOptions, CustomOptionsFormat, Model, ModelArgs,
    Operator, OperatorArgs, OperatorCode, OperatorCodeArgs, QuantizationDetails,
    QuantizationParameters, QuantizationParametersArgs, SubGraph, SubGraphArgs, Tensor, TensorArgs,
};
use flatbuffers::{FlatBufferBuilder, UnionWIPOffset, WIPOffset};

#[derive(Debug, PartialEq, Copy, Clone, new)]
pub struct BuiltinOp {
    deprecated_builtin_code: i8,
    version: i32,
    code: BuiltinOperator,
    options_type: BuiltinOptions,
}

pub struct ModelBuilder<'f, 'b> {
    pub registry: &'b Registry,
    pub builder: &'b mut FlatBufferBuilder<'f>,
    pub op_codes: &'b mut Vec<BuiltinOp>,
    pub buffers: &'b mut Vec<WIPOffset<Buffer<'f>>>,
}

impl ModelBuilder<'_, '_> {
    pub fn write_model(&mut self, model: &TypedModel) -> TractResult<()> {
        let mut subgraph = SubgraphBuilder::new(self);
        subgraph.write_subgraph(model)?;
        let subgraph = subgraph.finish(model)?;
        let subgraphs = vec![subgraph];
        let subgraphs = self.builder.create_vector(&subgraphs);
        let buffers = self.builder.create_vector(self.buffers);
        let operator_codes = self
            .op_codes
            .iter()
            .map(|code| {
                OperatorCode::create(
                    self.builder,
                    &OperatorCodeArgs {
                        deprecated_builtin_code: code.deprecated_builtin_code,
                        custom_code: None,
                        version: code.version,
                        builtin_code: code.code,
                    },
                )
            })
            .collect_vec();
        let operator_codes = self.builder.create_vector(&operator_codes);
        let model = Model::create(
            self.builder,
            &ModelArgs {
                version: 3,
                operator_codes: Some(operator_codes),
                subgraphs: Some(subgraphs),
                description: None,
                buffers: Some(buffers),
                metadata_buffer: None,
                metadata: None,
                signature_defs: None,
            },
        );
        self.builder.finish(model, Some("TFL3"));
        Ok(())
    }

    fn operator_code_index(&mut self, builtin: BuiltinOp) -> u32 {
        if let Some(found) = self.op_codes.iter().position(|op| op == &builtin) {
            found as u32
        } else {
            self.op_codes.push(builtin);
            self.op_codes.len() as u32 - 1
        }
    }
}

pub struct SubgraphBuilder<'f, 'b, 'mb> {
    pub model: &'mb mut ModelBuilder<'f, 'b>,
    pub tensors: Vec<WIPOffset<Tensor<'f>>>,
    pub const_cache: Vec<(Arc<tract_core::prelude::Tensor>, i32)>,
    pub operators: Vec<WIPOffset<Operator<'f>>>,
    pub outlets_to_tensors: HashMap<OutletId, i32>,
}

impl<'f, 'b, 'mb> SubgraphBuilder<'f, 'b, 'mb> {
    fn new(model: &'mb mut ModelBuilder<'f, 'b>) -> SubgraphBuilder<'f, 'b, 'mb> {
        SubgraphBuilder {
            model,
            tensors: vec![],
            operators: vec![],
            outlets_to_tensors: HashMap::new(),
            const_cache: vec![],
        }
    }

    pub fn fb<'short>(&'short mut self) -> &'short mut FlatBufferBuilder<'f>
    where
        'f: 'short,
    {
        self.model.builder
    }

    pub fn map_outlet(&mut self, model: &TypedModel, outlet: OutletId) -> TractResult<i32> {
        if let Some(t) = self.outlets_to_tensors.get(&outlet) {
            Ok(*t)
        } else {
            let fact = model.outlet_fact(outlet)?;
            self.write_fact(format!("{}.{}", model.node(outlet.node).name, outlet.slot), fact)
        }
    }

    pub fn map_outlets(
        &mut self,
        model: &TypedModel,
        outlets: impl IntoIterator<Item = impl Borrow<OutletId>>,
    ) -> TractResult<TVec<i32>> {
        outlets.into_iter().map(|o| self.map_outlet(model, *o.borrow())).collect()
    }

    pub fn write_fact(
        &mut self,
        name: impl AsRef<str>,
        fact: impl Into<TypedFact>,
    ) -> TractResult<i32> {
        let fact = fact.into();
        if fact.datum_type.unquantized() == i8::datum_type()
            || fact.datum_type.unquantized() == u8::datum_type()
            || fact.datum_type.qparams().is_some()
        {
            let qp =
                fact.datum_type.qparams().unwrap_or(QParams::ZpScale { zero_point: 0, scale: 1. });
            self.write_fact_with_per_axis_q(
                name,
                fact,
                &[qp.zp_scale().0 as i64],
                &[qp.zp_scale().1],
                0,
            )
        } else {
            self.write_fact_with_quantization(name, fact, None)
        }
    }

    pub fn write_fact_faking_per_axis_q(
        &mut self,
        name: impl AsRef<str>,
        fact: impl Into<TypedFact>,
        axis: usize,
    ) -> TractResult<i32> {
        let fact = fact.into();
        if let Some(qp) = fact.datum_type.qparams() {
            let dim = fact.shape[axis].to_usize()?;
            self.write_fact_with_per_axis_q(
                name,
                fact,
                &vec![qp.zp_scale().0 as i64; dim],
                &vec![qp.zp_scale().1; dim],
                axis,
            )
        } else {
            self.write_fact_with_quantization(name, fact, None)
        }
    }

    pub fn write_fact_with_per_axis_q(
        &mut self,
        name: impl AsRef<str>,
        fact: impl Into<TypedFact>,
        zp: &[i64],
        scale: &[f32],
        axis: usize,
    ) -> TractResult<i32> {
        let fact = fact.into();
        let zero_point = self.fb().create_vector(zp);
        let scale = self.fb().create_vector(scale);
        let qp = QuantizationParameters::create(
            self.fb(),
            &QuantizationParametersArgs {
                min: None,
                max: None,
                zero_point: Some(zero_point),
                scale: Some(scale),
                details: None,
                details_type: QuantizationDetails::NONE,
                quantized_dimension: axis as i32,
            },
        );
        self.write_fact_with_quantization(name, fact, Some(qp))
    }

    pub fn write_fact_with_quantization(
        &mut self,
        name: impl AsRef<str>,
        fact: impl Into<TypedFact>,
        quantization: Option<WIPOffset<QuantizationParameters>>,
    ) -> TractResult<i32> {
        let fact = fact.into();
        let buffer = if let Some(k) = &fact.konst {
            if let Some(pair) = self.const_cache.iter().find(|(t, _id)| t == k) {
                return Ok(pair.1);
            }
            self.const_cache.push((k.clone(), self.tensors.len() as i32));

            let data = self.fb().create_vector(k.as_bytes());
            let buffer = Buffer::create(self.fb(), &BufferArgs { data: Some(data) });
            self.model.buffers.push(buffer);
            self.model.buffers.len() as u32 - 1
        } else {
            0
        };
        let shape = fact.shape.as_concrete().unwrap().iter().map(|d| *d as i32).collect_vec();
        let shape = self.fb().create_vector(&shape);
        let name = self.fb().create_string(name.as_ref());
        let tensor = Tensor::create(
            self.fb(),
            &TensorArgs {
                name: Some(name),
                buffer,
                is_variable: false,
                quantization,
                shape: Some(shape),
                type_: fact.datum_type.try_into()?,
                sparsity: None,
                shape_signature: None,
                has_rank: true,
                variant_tensors: None,
            },
        );
        self.tensors.push(tensor);
        Ok(self.tensors.len() as i32 - 1)
    }

    fn write_subgraph(&mut self, model: &TypedModel) -> TractResult<()> {
        for &node_id in &model.eval_order()? {
            let node = &model.nodes[node_id];
            // will serialize constants at the demand of operators only
            if node.op_is::<Const>() {
                continue;
            }
            // create fb tensors for all outputs
            for (slot, output) in node.outputs.iter().enumerate() {
                let name = model
                    .outlet_labels
                    .get(&OutletId::new(node.id, slot))
                    .map(Cow::Borrowed)
                    .unwrap_or_else(|| Cow::Owned(format!("outlet_{node_id}_{slot}")));
                let tensor = self.write_fact(name.as_str(), &output.fact)?;
                let outlet = OutletId::new(node.id, slot);
                self.outlets_to_tensors.insert(outlet, tensor);
            }
            // Source inputs are not reified
            if node.op_is::<TypedSource>() {
                continue;
            } else if let Some(to_tflite) =
                self.model.registry.to_tflite.get(&(*(node.op)).type_id())
            {
                to_tflite(self, model, node).with_context(|| format!("Translating {node}"))?;
            } else {
                bail!("No serializer for op: {}", node)
            };
        }
        Ok(())
    }

    pub fn write_op(
        &mut self,
        inputs: &[i32],
        outputs: &[i32],
        deprecated_builtin_code: i16,
        version: i32,
        code: BuiltinOperator,
    ) -> TractResult<()> {
        let op = BuiltinOp {
            deprecated_builtin_code: if deprecated_builtin_code > 127 {
                127i8
            } else {
                deprecated_builtin_code as i8
            },
            version,
            code,
            options_type: BuiltinOptions::NONE,
        };
        let opcode_index = self.model.operator_code_index(op);
        let inputs = self.fb().create_vector(inputs);
        let outputs = self.fb().create_vector(outputs);
        let operator = Operator::create(
            self.fb(),
            &OperatorArgs {
                inputs: Some(inputs),
                outputs: Some(outputs),
                opcode_index,
                builtin_options: None,
                builtin_options_type: op.options_type,
                custom_options: None,
                custom_options_format: CustomOptionsFormat::FLEXBUFFERS,
                mutating_variable_inputs: None,
                intermediates: None,
            },
        );
        self.operators.push(operator);
        Ok(())
    }

    pub fn write_op_with_options(
        &mut self,
        inputs: &[i32],
        outputs: &[i32],
        op: BuiltinOp,
        builtin_options: WIPOffset<UnionWIPOffset>,
    ) -> TractResult<()> {
        let opcode_index = self.model.operator_code_index(op);
        let inputs = self.fb().create_vector(inputs);
        let outputs = self.fb().create_vector(outputs);
        let operator = Operator::create(
            self.fb(),
            &OperatorArgs {
                inputs: Some(inputs),
                outputs: Some(outputs),
                opcode_index,
                builtin_options: Some(builtin_options),
                builtin_options_type: op.options_type,
                custom_options: None,
                custom_options_format: CustomOptionsFormat::FLEXBUFFERS,
                mutating_variable_inputs: None,
                intermediates: None,
            },
        );
        self.operators.push(operator);
        Ok(())
    }

    fn finish(self, model: &TypedModel) -> TractResult<WIPOffset<SubGraph<'f>>> {
        let Self {
            model: ModelBuilder { builder, .. },
            tensors,
            operators,
            outlets_to_tensors,
            ..
        } = self;
        let inputs = model.inputs.iter().map(|i| outlets_to_tensors[i]).collect_vec();
        let outputs = model.outputs.iter().map(|i| outlets_to_tensors[i]).collect_vec();
        let inputs = builder.create_vector(&inputs);
        let outputs = builder.create_vector(&outputs);
        let tensors = builder.create_vector(&tensors);
        let operators = builder.create_vector(&operators);

        Ok(SubGraph::create(
            builder,
            &SubGraphArgs {
                name: None,
                tensors: Some(tensors),
                inputs: Some(inputs),
                outputs: Some(outputs),
                operators: Some(operators),
            },
        ))
    }
}


================================================
FILE: tflite/src/tensors.rs
================================================
use crate::tflite::{Model, SubGraph};
use crate::tflite_generated::tflite::{TensorType, TensorType as BufferTensorType};
#[cfg(feature = "complex")]
use num_complex::Complex;
use tract_core::internal::*;
use tract_core::prelude::tract_itertools::Itertools;

impl TryFrom<BufferTensorType> for DatumType {
    type Error = TractError;
    fn try_from(t: BufferTensorType) -> TractResult<DatumType> {
        Ok(match t {
            BufferTensorType::FLOAT32 => DatumType::F32,
            BufferTensorType::FLOAT16 => DatumType::F16,
            BufferTensorType::INT32 => DatumType::I32,
            BufferTensorType::UINT8 => DatumType::U8,
            BufferTensorType::INT64 => DatumType::I64,
            BufferTensorType::STRING => DatumType::String,
            BufferTensorType::BOOL => DatumType::Bool,
            BufferTensorType::INT16 => DatumType::I16,
            #[cfg(feature = "complex")]
            BufferTensorType::COMPLEX64 => DatumType::ComplexF64, // TODO check this
            TensorType::INT8 => DatumType::I8,
            TensorType::FLOAT64 => DatumType::F64,
            //TensorType::COMPLEX128 => DatumType::ComplexF64,
            TensorType::UINT64 => DatumType::U64,
            TensorType::RESOURCE => DatumType::Blob, //TODO: check this
            TensorType::VARIANT => DatumType::Blob,  //TODO: check this
            TensorType::UINT32 => DatumType::U32,
            TensorType::UINT16 => DatumType::U16,
            //TensorType::COMPLEX128 => DatumType::ComplexF64,
            //TensorType::UINT4 => {DatumType::U4},
            _ => bail!("Unknown DatumType {:?}", t),
        })
    }
}

impl TryFrom<DatumType> for BufferTensorType {
    type Error = TractError;
    fn try_from(value: DatumType) -> Result<Self, Self::Error> {
        Ok(match value.unquantized() {
            DatumType::Bool => BufferTensorType::BOOL,
            DatumType::U8 => BufferTensorType::UINT8,
            DatumType::U16 => BufferTensorType::UINT16,
            DatumType::U32 => BufferTensorType::UINT32,
            DatumType::U64 => BufferTensorType::UINT64,
            DatumType::I8 => BufferTensorType::INT8,
            DatumType::I16 => BufferTensorType::INT16,
            DatumType::I32 => BufferTensorType::INT32,
            DatumType::I64 => BufferTensorType::INT64,
            DatumType::F16 => BufferTensorType::FLOAT16,
            DatumType::F32 => BufferTensorType::FLOAT32,
            DatumType::F64 => BufferTensorType::FLOAT64,
            _ => bail!("Unsupported DatumType {:?}", value),
        })
    }
}

#[allow(dead_code)]
fn create_tensor(dt: DatumType, shape: &[usize], data: &[u8]) -> TractResult<Tensor> {
    unsafe {
        match dt {
            DatumType::U8 => Tensor::from_raw::<u8>(shape, data),
            DatumType::U16 => Tensor::from_raw::<u16>(shape, data),
            DatumType::U32 => Tensor::from_raw::<u32>(shape, data),
            DatumType::U64 => Tensor::from_raw::<u64>(shape, data),
            DatumType::I8 => Tensor::from_raw::<i8>(shape, data),
            DatumType::I16 => Tensor::from_raw::<i16>(shape, data),
            DatumType::I32 => Tensor::from_raw::<i32>(shape, data),
            DatumType::I64 => Tensor::from_raw::<i64>(shape, data),
            DatumType::F16 => Tensor::from_raw::<f16>(shape, data),
            DatumType::F32 => Tensor::from_raw::<f32>(shape, data),
            DatumType::F64 => Tensor::from_raw::<f64>(shape, data),
            #[cfg(feature = "complex")]
            DatumType::ComplexF64 => Tensor::from_raw::<Complex<f64>>(&shape, data), // TODO check this
            DatumType::Bool => Ok(Tensor::from_raw::<u8>(shape, data)?
                .into_plain_array::<u8>()?
                .mapv(|x| x != 0)
                .into()),
            _ => unimplemented!("FIXME, raw tensor loading"),
        }
    }
}

pub fn flat_tensor_uses_per_axis_q<'m>(graph: &'m SubGraph<'m>, id: i32) -> bool {
    let flat = graph.tensors().unwrap().get(id as _);
    if let Some(qp) = flat.quantization() {
        if let (Some(scale), Some(zp)) = (qp.scale(), qp.zero_point()) {
            return !scale.iter().all_equal() || !zp.iter().all_equal();
        }
    }
    false
}

pub fn per_axis_q_params<'m>(
    graph: &'m SubGraph<'m>,
    id: i32,
) -> TractResult<(Vec<i32>, Vec<f32>)> {
    let flat = graph.tensors().unwrap().get(id as _);
    let Some(qp) = flat.quantization() else { bail!("Unquantized value") };
    let (Some(scale), Some(zp)) = (qp.scale(), qp.zero_point()) else { bail!("No ZP/scale found") };
    Ok((zp.iter().map(|i| i as i32).collect_vec(), scale.iter().collect_vec()))
}

pub fn flat_tensor_to_tract_fact<'m>(
    &model: &'m Model<'m>,
    graph: &'m SubGraph<'m>,
    id: i32,
) -> TractResult<(TypedFact, &'m str)> {
    let flat = graph.tensors().unwrap().get(id as _);
    let mut dt: DatumType = flat.type_().try_into()?;
    if let Some(qp) = flat.quantization() {
        if let (Some(scale), Some(zp)) = (qp.scale(), qp.zero_point()) {
            dt = dt.quantize(QParams::ZpScale { zero_point: zp.get(0) as _, scale: scale.get(0) })
        }
    }
    let mut fact = dt.fact(flat.shape().unwrap().iter().map(|d| d as usize).collect_vec());
    let buffer_ix = flat.buffer() as usize;
    if buffer_ix != 0 {
        let buffer = model.buffers().unwrap().get(flat.buffer() as usize);
        if let Some(data) = buffer.data() {
            let mut data = create_tensor(
                fact.datum_type.unquantized(),
                fact.shape.as_concrete().unwrap(),
                data.bytes(),
            )?;
            unsafe {
                data.set_datum_type(dt);
            };
            fact = TypedFact::try_from(data)?;
        }
    }
    Ok((fact, flat.name().unwrap()))
}

#[derive(Clone, Debug)]
pub struct PerAxisQ {
    axis: usize,
    zp: Vec<i32>,
    scale: Vec<f32>,
}


================================================
FILE: tflite/src/tflite_generated.rs
================================================
// automatically generated by the FlatBuffers compiler, do not modify

// @generated

use core::cmp::Ordering;
use core::mem;

extern crate flatbuffers;
use self::flatbuffers::{EndianScalar, Follow};

#[allow(unused_imports, dead_code)]
pub mod tflite {

    use core::cmp::Ordering;
    use core::mem;

    extern crate flatbuffers;
    use self::flatbuffers::{EndianScalar, Follow};

    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_TENSOR_TYPE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_TENSOR_TYPE: i8 = 17;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_TENSOR_TYPE: [TensorType; 18] = [
        TensorType::FLOAT32,
        TensorType::FLOAT16,
        TensorType::INT32,
        TensorType::UINT8,
        TensorType::INT64,
        TensorType::STRING,
        TensorType::BOOL,
        TensorType::INT16,
        TensorType::COMPLEX64,
        TensorType::INT8,
        TensorType::FLOAT64,
        TensorType::COMPLEX128,
        TensorType::UINT64,
        TensorType::RESOURCE,
        TensorType::VARIANT,
        TensorType::UINT32,
        TensorType::UINT16,
        TensorType::INT4,
    ];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct TensorType(pub i8);
    #[allow(non_upper_case_globals)]
    impl TensorType {
        pub const FLOAT32: Self = Self(0);
        pub const FLOAT16: Self = Self(1);
        pub const INT32: Self = Self(2);
        pub const UINT8: Self = Self(3);
        pub const INT64: Self = Self(4);
        pub const STRING: Self = Self(5);
        pub const BOOL: Self = Self(6);
        pub const INT16: Self = Self(7);
        pub const COMPLEX64: Self = Self(8);
        pub const INT8: Self = Self(9);
        pub const FLOAT64: Self = Self(10);
        pub const COMPLEX128: Self = Self(11);
        pub const UINT64: Self = Self(12);
        pub const RESOURCE: Self = Self(13);
        pub const VARIANT: Self = Self(14);
        pub const UINT32: Self = Self(15);
        pub const UINT16: Self = Self(16);
        pub const INT4: Self = Self(17);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 17;
        pub const ENUM_VALUES: &'static [Self] = &[
            Self::FLOAT32,
            Self::FLOAT16,
            Self::INT32,
            Self::UINT8,
            Self::INT64,
            Self::STRING,
            Self::BOOL,
            Self::INT16,
            Self::COMPLEX64,
            Self::INT8,
            Self::FLOAT64,
            Self::COMPLEX128,
            Self::UINT64,
            Self::RESOURCE,
            Self::VARIANT,
            Self::UINT32,
            Self::UINT16,
            Self::INT4,
        ];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::FLOAT32 => Some("FLOAT32"),
                Self::FLOAT16 => Some("FLOAT16"),
                Self::INT32 => Some("INT32"),
                Self::UINT8 => Some("UINT8"),
                Self::INT64 => Some("INT64"),
                Self::STRING => Some("STRING"),
                Self::BOOL => Some("BOOL"),
                Self::INT16 => Some("INT16"),
                Self::COMPLEX64 => Some("COMPLEX64"),
                Self::INT8 => Some("INT8"),
                Self::FLOAT64 => Some("FLOAT64"),
                Self::COMPLEX128 => Some("COMPLEX128"),
                Self::UINT64 => Some("UINT64"),
                Self::RESOURCE => Some("RESOURCE"),
                Self::VARIANT => Some("VARIANT"),
                Self::UINT32 => Some("UINT32"),
                Self::UINT16 => Some("UINT16"),
                Self::INT4 => Some("INT4"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for TensorType {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for TensorType {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for TensorType {
        type Output = TensorType;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for TensorType {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for TensorType {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for TensorType {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_QUANTIZATION_DETAILS: u8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_QUANTIZATION_DETAILS: u8 = 1;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_QUANTIZATION_DETAILS: [QuantizationDetails; 2] =
        [QuantizationDetails::NONE, QuantizationDetails::CustomQuantization];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct QuantizationDetails(pub u8);
    #[allow(non_upper_case_globals)]
    impl QuantizationDetails {
        pub const NONE: Self = Self(0);
        pub const CustomQuantization: Self = Self(1);

        pub const ENUM_MIN: u8 = 0;
        pub const ENUM_MAX: u8 = 1;
        pub const ENUM_VALUES: &'static [Self] = &[Self::NONE, Self::CustomQuantization];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::NONE => Some("NONE"),
                Self::CustomQuantization => Some("CustomQuantization"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for QuantizationDetails {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for QuantizationDetails {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for QuantizationDetails {
        type Output = QuantizationDetails;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<u8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for QuantizationDetails {
        type Scalar = u8;
        #[inline]
        fn to_little_endian(self) -> u8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: u8) -> Self {
            let b = u8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for QuantizationDetails {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            u8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for QuantizationDetails {}
    pub struct QuantizationDetailsUnionTableOffset {}

    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_DIMENSION_TYPE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_DIMENSION_TYPE: i8 = 1;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_DIMENSION_TYPE: [DimensionType; 2] =
        [DimensionType::DENSE, DimensionType::SPARSE_CSR];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct DimensionType(pub i8);
    #[allow(non_upper_case_globals)]
    impl DimensionType {
        pub const DENSE: Self = Self(0);
        pub const SPARSE_CSR: Self = Self(1);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 1;
        pub const ENUM_VALUES: &'static [Self] = &[Self::DENSE, Self::SPARSE_CSR];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::DENSE => Some("DENSE"),
                Self::SPARSE_CSR => Some("SPARSE_CSR"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for DimensionType {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for DimensionType {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for DimensionType {
        type Output = DimensionType;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for DimensionType {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for DimensionType {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for DimensionType {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_SPARSE_INDEX_VECTOR: u8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_SPARSE_INDEX_VECTOR: u8 = 3;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_SPARSE_INDEX_VECTOR: [SparseIndexVector; 4] = [
        SparseIndexVector::NONE,
        SparseIndexVector::Int32Vector,
        SparseIndexVector::Uint16Vector,
        SparseIndexVector::Uint8Vector,
    ];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct SparseIndexVector(pub u8);
    #[allow(non_upper_case_globals)]
    impl SparseIndexVector {
        pub const NONE: Self = Self(0);
        pub const Int32Vector: Self = Self(1);
        pub const Uint16Vector: Self = Self(2);
        pub const Uint8Vector: Self = Self(3);

        pub const ENUM_MIN: u8 = 0;
        pub const ENUM_MAX: u8 = 3;
        pub const ENUM_VALUES: &'static [Self] =
            &[Self::NONE, Self::Int32Vector, Self::Uint16Vector, Self::Uint8Vector];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::NONE => Some("NONE"),
                Self::Int32Vector => Some("Int32Vector"),
                Self::Uint16Vector => Some("Uint16Vector"),
                Self::Uint8Vector => Some("Uint8Vector"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for SparseIndexVector {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for SparseIndexVector {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for SparseIndexVector {
        type Output = SparseIndexVector;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<u8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for SparseIndexVector {
        type Scalar = u8;
        #[inline]
        fn to_little_endian(self) -> u8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: u8) -> Self {
            let b = u8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for SparseIndexVector {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            u8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for SparseIndexVector {}
    pub struct SparseIndexVectorUnionTableOffset {}

    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_BUILTIN_OPERATOR: i32 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_BUILTIN_OPERATOR: i32 = 161;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_BUILTIN_OPERATOR: [BuiltinOperator; 162] = [
        BuiltinOperator::ADD,
        BuiltinOperator::AVERAGE_POOL_2D,
        BuiltinOperator::CONCATENATION,
        BuiltinOperator::CONV_2D,
        BuiltinOperator::DEPTHWISE_CONV_2D,
        BuiltinOperator::DEPTH_TO_SPACE,
        BuiltinOperator::DEQUANTIZE,
        BuiltinOperator::EMBEDDING_LOOKUP,
        BuiltinOperator::FLOOR,
        BuiltinOperator::FULLY_CONNECTED,
        BuiltinOperator::HASHTABLE_LOOKUP,
        BuiltinOperator::L2_NORMALIZATION,
        BuiltinOperator::L2_POOL_2D,
        BuiltinOperator::LOCAL_RESPONSE_NORMALIZATION,
        BuiltinOperator::LOGISTIC,
        BuiltinOperator::LSH_PROJECTION,
        BuiltinOperator::LSTM,
        BuiltinOperator::MAX_POOL_2D,
        BuiltinOperator::MUL,
        BuiltinOperator::RELU,
        BuiltinOperator::RELU_N1_TO_1,
        BuiltinOperator::RELU6,
        BuiltinOperator::RESHAPE,
        BuiltinOperator::RESIZE_BILINEAR,
        BuiltinOperator::RNN,
        BuiltinOperator::SOFTMAX,
        BuiltinOperator::SPACE_TO_DEPTH,
        BuiltinOperator::SVDF,
        BuiltinOperator::TANH,
        BuiltinOperator::CONCAT_EMBEDDINGS,
        BuiltinOperator::SKIP_GRAM,
        BuiltinOperator::CALL,
        BuiltinOperator::CUSTOM,
        BuiltinOperator::EMBEDDING_LOOKUP_SPARSE,
        BuiltinOperator::PAD,
        BuiltinOperator::UNIDIRECTIONAL_SEQUENCE_RNN,
        BuiltinOperator::GATHER,
        BuiltinOperator::BATCH_TO_SPACE_ND,
        BuiltinOperator::SPACE_TO_BATCH_ND,
        BuiltinOperator::TRANSPOSE,
        BuiltinOperator::MEAN,
        BuiltinOperator::SUB,
        BuiltinOperator::DIV,
        BuiltinOperator::SQUEEZE,
        BuiltinOperator::UNIDIRECTIONAL_SEQUENCE_LSTM,
        BuiltinOperator::STRIDED_SLICE,
        BuiltinOperator::BIDIRECTIONAL_SEQUENCE_RNN,
        BuiltinOperator::EXP,
        BuiltinOperator::TOPK_V2,
        BuiltinOperator::SPLIT,
        BuiltinOperator::LOG_SOFTMAX,
        BuiltinOperator::DELEGATE,
        BuiltinOperator::BIDIRECTIONAL_SEQUENCE_LSTM,
        BuiltinOperator::CAST,
        BuiltinOperator::PRELU,
        BuiltinOperator::MAXIMUM,
        BuiltinOperator::ARG_MAX,
        BuiltinOperator::MINIMUM,
        BuiltinOperator::LESS,
        BuiltinOperator::NEG,
        BuiltinOperator::PADV2,
        BuiltinOperator::GREATER,
        BuiltinOperator::GREATER_EQUAL,
        BuiltinOperator::LESS_EQUAL,
        BuiltinOperator::SELECT,
        BuiltinOperator::SLICE,
        BuiltinOperator::SIN,
        BuiltinOperator::TRANSPOSE_CONV,
        BuiltinOperator::SPARSE_TO_DENSE,
        BuiltinOperator::TILE,
        BuiltinOperator::EXPAND_DIMS,
        BuiltinOperator::EQUAL,
        BuiltinOperator::NOT_EQUAL,
        BuiltinOperator::LOG,
        BuiltinOperator::SUM,
        BuiltinOperator::SQRT,
        BuiltinOperator::RSQRT,
        BuiltinOperator::SHAPE,
        BuiltinOperator::POW,
        BuiltinOperator::ARG_MIN,
        BuiltinOperator::FAKE_QUANT,
        BuiltinOperator::REDUCE_PROD,
        BuiltinOperator::REDUCE_MAX,
        BuiltinOperator::PACK,
        BuiltinOperator::LOGICAL_OR,
        BuiltinOperator::ONE_HOT,
        BuiltinOperator::LOGICAL_AND,
        BuiltinOperator::LOGICAL_NOT,
        BuiltinOperator::UNPACK,
        BuiltinOperator::REDUCE_MIN,
        BuiltinOperator::FLOOR_DIV,
        BuiltinOperator::REDUCE_ANY,
        BuiltinOperator::SQUARE,
        BuiltinOperator::ZEROS_LIKE,
        BuiltinOperator::FILL,
        BuiltinOperator::FLOOR_MOD,
        BuiltinOperator::RANGE,
        BuiltinOperator::RESIZE_NEAREST_NEIGHBOR,
        BuiltinOperator::LEAKY_RELU,
        BuiltinOperator::SQUARED_DIFFERENCE,
        BuiltinOperator::MIRROR_PAD,
        BuiltinOperator::ABS,
        BuiltinOperator::SPLIT_V,
        BuiltinOperator::UNIQUE,
        BuiltinOperator::CEIL,
        BuiltinOperator::REVERSE_V2,
        BuiltinOperator::ADD_N,
        BuiltinOperator::GATHER_ND,
        BuiltinOperator::COS,
        BuiltinOperator::WHERE,
        BuiltinOperator::RANK,
        BuiltinOperator::ELU,
        BuiltinOperator::REVERSE_SEQUENCE,
        BuiltinOperator::MATRIX_DIAG,
        BuiltinOperator::QUANTIZE,
        BuiltinOperator::MATRIX_SET_DIAG,
        BuiltinOperator::ROUND,
        BuiltinOperator::HARD_SWISH,
        BuiltinOperator::IF,
        BuiltinOperator::WHILE,
        BuiltinOperator::NON_MAX_SUPPRESSION_V4,
        BuiltinOperator::NON_MAX_SUPPRESSION_V5,
        BuiltinOperator::SCATTER_ND,
        BuiltinOperator::SELECT_V2,
        BuiltinOperator::DENSIFY,
        BuiltinOperator::SEGMENT_SUM,
        BuiltinOperator::BATCH_MATMUL,
        BuiltinOperator::PLACEHOLDER_FOR_GREATER_OP_CODES,
        BuiltinOperator::CUMSUM,
        BuiltinOperator::CALL_ONCE,
        BuiltinOperator::BROADCAST_TO,
        BuiltinOperator::RFFT2D,
        BuiltinOperator::CONV_3D,
        BuiltinOperator::IMAG,
        BuiltinOperator::REAL,
        BuiltinOperator::COMPLEX_ABS,
        BuiltinOperator::HASHTABLE,
        BuiltinOperator::HASHTABLE_FIND,
        BuiltinOperator::HASHTABLE_IMPORT,
        BuiltinOperator::HASHTABLE_SIZE,
        BuiltinOperator::REDUCE_ALL,
        BuiltinOperator::CONV_3D_TRANSPOSE,
        BuiltinOperator::VAR_HANDLE,
        BuiltinOperator::READ_VARIABLE,
        BuiltinOperator::ASSIGN_VARIABLE,
        BuiltinOperator::BROADCAST_ARGS,
        BuiltinOperator::RANDOM_STANDARD_NORMAL,
        BuiltinOperator::BUCKETIZE,
        BuiltinOperator::RANDOM_UNIFORM,
        BuiltinOperator::MULTINOMIAL,
        BuiltinOperator::GELU,
        BuiltinOperator::DYNAMIC_UPDATE_SLICE,
        BuiltinOperator::RELU_0_TO_1,
        BuiltinOperator::UNSORTED_SEGMENT_PROD,
        BuiltinOperator::UNSORTED_SEGMENT_MAX,
        BuiltinOperator::UNSORTED_SEGMENT_SUM,
        BuiltinOperator::ATAN2,
        BuiltinOperator::UNSORTED_SEGMENT_MIN,
        BuiltinOperator::SIGN,
        BuiltinOperator::BITCAST,
        BuiltinOperator::BITWISE_XOR,
        BuiltinOperator::RIGHT_SHIFT,
    ];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct BuiltinOperator(pub i32);
    #[allow(non_upper_case_globals)]
    impl BuiltinOperator {
        pub const ADD: Self = Self(0);
        pub const AVERAGE_POOL_2D: Self = Self(1);
        pub const CONCATENATION: Self = Self(2);
        pub const CONV_2D: Self = Self(3);
        pub const DEPTHWISE_CONV_2D: Self = Self(4);
        pub const DEPTH_TO_SPACE: Self = Self(5);
        pub const DEQUANTIZE: Self = Self(6);
        pub const EMBEDDING_LOOKUP: Self = Self(7);
        pub const FLOOR: Self = Self(8);
        pub const FULLY_CONNECTED: Self = Self(9);
        pub const HASHTABLE_LOOKUP: Self = Self(10);
        pub const L2_NORMALIZATION: Self = Self(11);
        pub const L2_POOL_2D: Self = Self(12);
        pub const LOCAL_RESPONSE_NORMALIZATION: Self = Self(13);
        pub const LOGISTIC: Self = Self(14);
        pub const LSH_PROJECTION: Self = Self(15);
        pub const LSTM: Self = Self(16);
        pub const MAX_POOL_2D: Self = Self(17);
        pub const MUL: Self = Self(18);
        pub const RELU: Self = Self(19);
        pub const RELU_N1_TO_1: Self = Self(20);
        pub const RELU6: Self = Self(21);
        pub const RESHAPE: Self = Self(22);
        pub const RESIZE_BILINEAR: Self = Self(23);
        pub const RNN: Self = Self(24);
        pub const SOFTMAX: Self = Self(25);
        pub const SPACE_TO_DEPTH: Self = Self(26);
        pub const SVDF: Self = Self(27);
        pub const TANH: Self = Self(28);
        pub const CONCAT_EMBEDDINGS: Self = Self(29);
        pub const SKIP_GRAM: Self = Self(30);
        pub const CALL: Self = Self(31);
        pub const CUSTOM: Self = Self(32);
        pub const EMBEDDING_LOOKUP_SPARSE: Self = Self(33);
        pub const PAD: Self = Self(34);
        pub const UNIDIRECTIONAL_SEQUENCE_RNN: Self = Self(35);
        pub const GATHER: Self = Self(36);
        pub const BATCH_TO_SPACE_ND: Self = Self(37);
        pub const SPACE_TO_BATCH_ND: Self = Self(38);
        pub const TRANSPOSE: Self = Self(39);
        pub const MEAN: Self = Self(40);
        pub const SUB: Self = Self(41);
        pub const DIV: Self = Self(42);
        pub const SQUEEZE: Self = Self(43);
        pub const UNIDIRECTIONAL_SEQUENCE_LSTM: Self = Self(44);
        pub const STRIDED_SLICE: Self = Self(45);
        pub const BIDIRECTIONAL_SEQUENCE_RNN: Self = Self(46);
        pub const EXP: Self = Self(47);
        pub const TOPK_V2: Self = Self(48);
        pub const SPLIT: Self = Self(49);
        pub const LOG_SOFTMAX: Self = Self(50);
        pub const DELEGATE: Self = Self(51);
        pub const BIDIRECTIONAL_SEQUENCE_LSTM: Self = Self(52);
        pub const CAST: Self = Self(53);
        pub const PRELU: Self = Self(54);
        pub const MAXIMUM: Self = Self(55);
        pub const ARG_MAX: Self = Self(56);
        pub const MINIMUM: Self = Self(57);
        pub const LESS: Self = Self(58);
        pub const NEG: Self = Self(59);
        pub const PADV2: Self = Self(60);
        pub const GREATER: Self = Self(61);
        pub const GREATER_EQUAL: Self = Self(62);
        pub const LESS_EQUAL: Self = Self(63);
        pub const SELECT: Self = Self(64);
        pub const SLICE: Self = Self(65);
        pub const SIN: Self = Self(66);
        pub const TRANSPOSE_CONV: Self = Self(67);
        pub const SPARSE_TO_DENSE: Self = Self(68);
        pub const TILE: Self = Self(69);
        pub const EXPAND_DIMS: Self = Self(70);
        pub const EQUAL: Self = Self(71);
        pub const NOT_EQUAL: Self = Self(72);
        pub const LOG: Self = Self(73);
        pub const SUM: Self = Self(74);
        pub const SQRT: Self = Self(75);
        pub const RSQRT: Self = Self(76);
        pub const SHAPE: Self = Self(77);
        pub const POW: Self = Self(78);
        pub const ARG_MIN: Self = Self(79);
        pub const FAKE_QUANT: Self = Self(80);
        pub const REDUCE_PROD: Self = Self(81);
        pub const REDUCE_MAX: Self = Self(82);
        pub const PACK: Self = Self(83);
        pub const LOGICAL_OR: Self = Self(84);
        pub const ONE_HOT: Self = Self(85);
        pub const LOGICAL_AND: Self = Self(86);
        pub const LOGICAL_NOT: Self = Self(87);
        pub const UNPACK: Self = Self(88);
        pub const REDUCE_MIN: Self = Self(89);
        pub const FLOOR_DIV: Self = Self(90);
        pub const REDUCE_ANY: Self = Self(91);
        pub const SQUARE: Self = Self(92);
        pub const ZEROS_LIKE: Self = Self(93);
        pub const FILL: Self = Self(94);
        pub const FLOOR_MOD: Self = Self(95);
        pub const RANGE: Self = Self(96);
        pub const RESIZE_NEAREST_NEIGHBOR: Self = Self(97);
        pub const LEAKY_RELU: Self = Self(98);
        pub const SQUARED_DIFFERENCE: Self = Self(99);
        pub const MIRROR_PAD: Self = Self(100);
        pub const ABS: Self = Self(101);
        pub const SPLIT_V: Self = Self(102);
        pub const UNIQUE: Self = Self(103);
        pub const CEIL: Self = Self(104);
        pub const REVERSE_V2: Self = Self(105);
        pub const ADD_N: Self = Self(106);
        pub const GATHER_ND: Self = Self(107);
        pub const COS: Self = Self(108);
        pub const WHERE: Self = Self(109);
        pub const RANK: Self = Self(110);
        pub const ELU: Self = Self(111);
        pub const REVERSE_SEQUENCE: Self = Self(112);
        pub const MATRIX_DIAG: Self = Self(113);
        pub const QUANTIZE: Self = Self(114);
        pub const MATRIX_SET_DIAG: Self = Self(115);
        pub const ROUND: Self = Self(116);
        pub const HARD_SWISH: Self = Self(117);
        pub const IF: Self = Self(118);
        pub const WHILE: Self = Self(119);
        pub const NON_MAX_SUPPRESSION_V4: Self = Self(120);
        pub const NON_MAX_SUPPRESSION_V5: Self = Self(121);
        pub const SCATTER_ND: Self = Self(122);
        pub const SELECT_V2: Self = Self(123);
        pub const DENSIFY: Self = Self(124);
        pub const SEGMENT_SUM: Self = Self(125);
        pub const BATCH_MATMUL: Self = Self(126);
        pub const PLACEHOLDER_FOR_GREATER_OP_CODES: Self = Self(127);
        pub const CUMSUM: Self = Self(128);
        pub const CALL_ONCE: Self = Self(129);
        pub const BROADCAST_TO: Self = Self(130);
        pub const RFFT2D: Self = Self(131);
        pub const CONV_3D: Self = Self(132);
        pub const IMAG: Self = Self(133);
        pub const REAL: Self = Self(134);
        pub const COMPLEX_ABS: Self = Self(135);
        pub const HASHTABLE: Self = Self(136);
        pub const HASHTABLE_FIND: Self = Self(137);
        pub const HASHTABLE_IMPORT: Self = Self(138);
        pub const HASHTABLE_SIZE: Self = Self(139);
        pub const REDUCE_ALL: Self = Self(140);
        pub const CONV_3D_TRANSPOSE: Self = Self(141);
        pub const VAR_HANDLE: Self = Self(142);
        pub const READ_VARIABLE: Self = Self(143);
        pub const ASSIGN_VARIABLE: Self = Self(144);
        pub const BROADCAST_ARGS: Self = Self(145);
        pub const RANDOM_STANDARD_NORMAL: Self = Self(146);
        pub const BUCKETIZE: Self = Self(147);
        pub const RANDOM_UNIFORM: Self = Self(148);
        pub const MULTINOMIAL: Self = Self(149);
        pub const GELU: Self = Self(150);
        pub const DYNAMIC_UPDATE_SLICE: Self = Self(151);
        pub const RELU_0_TO_1: Self = Self(152);
        pub const UNSORTED_SEGMENT_PROD: Self = Self(153);
        pub const UNSORTED_SEGMENT_MAX: Self = Self(154);
        pub const UNSORTED_SEGMENT_SUM: Self = Self(155);
        pub const ATAN2: Self = Self(156);
        pub const UNSORTED_SEGMENT_MIN: Self = Self(157);
        pub const SIGN: Self = Self(158);
        pub const BITCAST: Self = Self(159);
        pub const BITWISE_XOR: Self = Self(160);
        pub const RIGHT_SHIFT: Self = Self(161);

        pub const ENUM_MIN: i32 = 0;
        pub const ENUM_MAX: i32 = 161;
        pub const ENUM_VALUES: &'static [Self] = &[
            Self::ADD,
            Self::AVERAGE_POOL_2D,
            Self::CONCATENATION,
            Self::CONV_2D,
            Self::DEPTHWISE_CONV_2D,
            Self::DEPTH_TO_SPACE,
            Self::DEQUANTIZE,
            Self::EMBEDDING_LOOKUP,
            Self::FLOOR,
            Self::FULLY_CONNECTED,
            Self::HASHTABLE_LOOKUP,
            Self::L2_NORMALIZATION,
            Self::L2_POOL_2D,
            Self::LOCAL_RESPONSE_NORMALIZATION,
            Self::LOGISTIC,
            Self::LSH_PROJECTION,
            Self::LSTM,
            Self::MAX_POOL_2D,
            Self::MUL,
            Self::RELU,
            Self::RELU_N1_TO_1,
            Self::RELU6,
            Self::RESHAPE,
            Self::RESIZE_BILINEAR,
            Self::RNN,
            Self::SOFTMAX,
            Self::SPACE_TO_DEPTH,
            Self::SVDF,
            Self::TANH,
            Self::CONCAT_EMBEDDINGS,
            Self::SKIP_GRAM,
            Self::CALL,
            Self::CUSTOM,
            Self::EMBEDDING_LOOKUP_SPARSE,
            Self::PAD,
            Self::UNIDIRECTIONAL_SEQUENCE_RNN,
            Self::GATHER,
            Self::BATCH_TO_SPACE_ND,
            Self::SPACE_TO_BATCH_ND,
            Self::TRANSPOSE,
            Self::MEAN,
            Self::SUB,
            Self::DIV,
            Self::SQUEEZE,
            Self::UNIDIRECTIONAL_SEQUENCE_LSTM,
            Self::STRIDED_SLICE,
            Self::BIDIRECTIONAL_SEQUENCE_RNN,
            Self::EXP,
            Self::TOPK_V2,
            Self::SPLIT,
            Self::LOG_SOFTMAX,
            Self::DELEGATE,
            Self::BIDIRECTIONAL_SEQUENCE_LSTM,
            Self::CAST,
            Self::PRELU,
            Self::MAXIMUM,
            Self::ARG_MAX,
            Self::MINIMUM,
            Self::LESS,
            Self::NEG,
            Self::PADV2,
            Self::GREATER,
            Self::GREATER_EQUAL,
            Self::LESS_EQUAL,
            Self::SELECT,
            Self::SLICE,
            Self::SIN,
            Self::TRANSPOSE_CONV,
            Self::SPARSE_TO_DENSE,
            Self::TILE,
            Self::EXPAND_DIMS,
            Self::EQUAL,
            Self::NOT_EQUAL,
            Self::LOG,
            Self::SUM,
            Self::SQRT,
            Self::RSQRT,
            Self::SHAPE,
            Self::POW,
            Self::ARG_MIN,
            Self::FAKE_QUANT,
            Self::REDUCE_PROD,
            Self::REDUCE_MAX,
            Self::PACK,
            Self::LOGICAL_OR,
            Self::ONE_HOT,
            Self::LOGICAL_AND,
            Self::LOGICAL_NOT,
            Self::UNPACK,
            Self::REDUCE_MIN,
            Self::FLOOR_DIV,
            Self::REDUCE_ANY,
            Self::SQUARE,
            Self::ZEROS_LIKE,
            Self::FILL,
            Self::FLOOR_MOD,
            Self::RANGE,
            Self::RESIZE_NEAREST_NEIGHBOR,
            Self::LEAKY_RELU,
            Self::SQUARED_DIFFERENCE,
            Self::MIRROR_PAD,
            Self::ABS,
            Self::SPLIT_V,
            Self::UNIQUE,
            Self::CEIL,
            Self::REVERSE_V2,
            Self::ADD_N,
            Self::GATHER_ND,
            Self::COS,
            Self::WHERE,
            Self::RANK,
            Self::ELU,
            Self::REVERSE_SEQUENCE,
            Self::MATRIX_DIAG,
            Self::QUANTIZE,
            Self::MATRIX_SET_DIAG,
            Self::ROUND,
            Self::HARD_SWISH,
            Self::IF,
            Self::WHILE,
            Self::NON_MAX_SUPPRESSION_V4,
            Self::NON_MAX_SUPPRESSION_V5,
            Self::SCATTER_ND,
            Self::SELECT_V2,
            Self::DENSIFY,
            Self::SEGMENT_SUM,
            Self::BATCH_MATMUL,
            Self::PLACEHOLDER_FOR_GREATER_OP_CODES,
            Self::CUMSUM,
            Self::CALL_ONCE,
            Self::BROADCAST_TO,
            Self::RFFT2D,
            Self::CONV_3D,
            Self::IMAG,
            Self::REAL,
            Self::COMPLEX_ABS,
            Self::HASHTABLE,
            Self::HASHTABLE_FIND,
            Self::HASHTABLE_IMPORT,
            Self::HASHTABLE_SIZE,
            Self::REDUCE_ALL,
            Self::CONV_3D_TRANSPOSE,
            Self::VAR_HANDLE,
            Self::READ_VARIABLE,
            Self::ASSIGN_VARIABLE,
            Self::BROADCAST_ARGS,
            Self::RANDOM_STANDARD_NORMAL,
            Self::BUCKETIZE,
            Self::RANDOM_UNIFORM,
            Self::MULTINOMIAL,
            Self::GELU,
            Self::DYNAMIC_UPDATE_SLICE,
            Self::RELU_0_TO_1,
            Self::UNSORTED_SEGMENT_PROD,
            Self::UNSORTED_SEGMENT_MAX,
            Self::UNSORTED_SEGMENT_SUM,
            Self::ATAN2,
            Self::UNSORTED_SEGMENT_MIN,
            Self::SIGN,
            Self::BITCAST,
            Self::BITWISE_XOR,
            Self::RIGHT_SHIFT,
        ];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::ADD => Some("ADD"),
                Self::AVERAGE_POOL_2D => Some("AVERAGE_POOL_2D"),
                Self::CONCATENATION => Some("CONCATENATION"),
                Self::CONV_2D => Some("CONV_2D"),
                Self::DEPTHWISE_CONV_2D => Some("DEPTHWISE_CONV_2D"),
                Self::DEPTH_TO_SPACE => Some("DEPTH_TO_SPACE"),
                Self::DEQUANTIZE => Some("DEQUANTIZE"),
                Self::EMBEDDING_LOOKUP => Some("EMBEDDING_LOOKUP"),
                Self::FLOOR => Some("FLOOR"),
                Self::FULLY_CONNECTED => Some("FULLY_CONNECTED"),
                Self::HASHTABLE_LOOKUP => Some("HASHTABLE_LOOKUP"),
                Self::L2_NORMALIZATION => Some("L2_NORMALIZATION"),
                Self::L2_POOL_2D => Some("L2_POOL_2D"),
                Self::LOCAL_RESPONSE_NORMALIZATION => Some("LOCAL_RESPONSE_NORMALIZATION"),
                Self::LOGISTIC => Some("LOGISTIC"),
                Self::LSH_PROJECTION => Some("LSH_PROJECTION"),
                Self::LSTM => Some("LSTM"),
                Self::MAX_POOL_2D => Some("MAX_POOL_2D"),
                Self::MUL => Some("MUL"),
                Self::RELU => Some("RELU"),
                Self::RELU_N1_TO_1 => Some("RELU_N1_TO_1"),
                Self::RELU6 => Some("RELU6"),
                Self::RESHAPE => Some("RESHAPE"),
                Self::RESIZE_BILINEAR => Some("RESIZE_BILINEAR"),
                Self::RNN => Some("RNN"),
                Self::SOFTMAX => Some("SOFTMAX"),
                Self::SPACE_TO_DEPTH => Some("SPACE_TO_DEPTH"),
                Self::SVDF => Some("SVDF"),
                Self::TANH => Some("TANH"),
                Self::CONCAT_EMBEDDINGS => Some("CONCAT_EMBEDDINGS"),
                Self::SKIP_GRAM => Some("SKIP_GRAM"),
                Self::CALL => Some("CALL"),
                Self::CUSTOM => Some("CUSTOM"),
                Self::EMBEDDING_LOOKUP_SPARSE => Some("EMBEDDING_LOOKUP_SPARSE"),
                Self::PAD => Some("PAD"),
                Self::UNIDIRECTIONAL_SEQUENCE_RNN => Some("UNIDIRECTIONAL_SEQUENCE_RNN"),
                Self::GATHER => Some("GATHER"),
                Self::BATCH_TO_SPACE_ND => Some("BATCH_TO_SPACE_ND"),
                Self::SPACE_TO_BATCH_ND => Some("SPACE_TO_BATCH_ND"),
                Self::TRANSPOSE => Some("TRANSPOSE"),
                Self::MEAN => Some("MEAN"),
                Self::SUB => Some("SUB"),
                Self::DIV => Some("DIV"),
                Self::SQUEEZE => Some("SQUEEZE"),
                Self::UNIDIRECTIONAL_SEQUENCE_LSTM => Some("UNIDIRECTIONAL_SEQUENCE_LSTM"),
                Self::STRIDED_SLICE => Some("STRIDED_SLICE"),
                Self::BIDIRECTIONAL_SEQUENCE_RNN => Some("BIDIRECTIONAL_SEQUENCE_RNN"),
                Self::EXP => Some("EXP"),
                Self::TOPK_V2 => Some("TOPK_V2"),
                Self::SPLIT => Some("SPLIT"),
                Self::LOG_SOFTMAX => Some("LOG_SOFTMAX"),
                Self::DELEGATE => Some("DELEGATE"),
                Self::BIDIRECTIONAL_SEQUENCE_LSTM => Some("BIDIRECTIONAL_SEQUENCE_LSTM"),
                Self::CAST => Some("CAST"),
                Self::PRELU => Some("PRELU"),
                Self::MAXIMUM => Some("MAXIMUM"),
                Self::ARG_MAX => Some("ARG_MAX"),
                Self::MINIMUM => Some("MINIMUM"),
                Self::LESS => Some("LESS"),
                Self::NEG => Some("NEG"),
                Self::PADV2 => Some("PADV2"),
                Self::GREATER => Some("GREATER"),
                Self::GREATER_EQUAL => Some("GREATER_EQUAL"),
                Self::LESS_EQUAL => Some("LESS_EQUAL"),
                Self::SELECT => Some("SELECT"),
                Self::SLICE => Some("SLICE"),
                Self::SIN => Some("SIN"),
                Self::TRANSPOSE_CONV => Some("TRANSPOSE_CONV"),
                Self::SPARSE_TO_DENSE => Some("SPARSE_TO_DENSE"),
                Self::TILE => Some("TILE"),
                Self::EXPAND_DIMS => Some("EXPAND_DIMS"),
                Self::EQUAL => Some("EQUAL"),
                Self::NOT_EQUAL => Some("NOT_EQUAL"),
                Self::LOG => Some("LOG"),
                Self::SUM => Some("SUM"),
                Self::SQRT => Some("SQRT"),
                Self::RSQRT => Some("RSQRT"),
                Self::SHAPE => Some("SHAPE"),
                Self::POW => Some("POW"),
                Self::ARG_MIN => Some("ARG_MIN"),
                Self::FAKE_QUANT => Some("FAKE_QUANT"),
                Self::REDUCE_PROD => Some("REDUCE_PROD"),
                Self::REDUCE_MAX => Some("REDUCE_MAX"),
                Self::PACK => Some("PACK"),
                Self::LOGICAL_OR => Some("LOGICAL_OR"),
                Self::ONE_HOT => Some("ONE_HOT"),
                Self::LOGICAL_AND => Some("LOGICAL_AND"),
                Self::LOGICAL_NOT => Some("LOGICAL_NOT"),
                Self::UNPACK => Some("UNPACK"),
                Self::REDUCE_MIN => Some("REDUCE_MIN"),
                Self::FLOOR_DIV => Some("FLOOR_DIV"),
                Self::REDUCE_ANY => Some("REDUCE_ANY"),
                Self::SQUARE => Some("SQUARE"),
                Self::ZEROS_LIKE => Some("ZEROS_LIKE"),
                Self::FILL => Some("FILL"),
                Self::FLOOR_MOD => Some("FLOOR_MOD"),
                Self::RANGE => Some("RANGE"),
                Self::RESIZE_NEAREST_NEIGHBOR => Some("RESIZE_NEAREST_NEIGHBOR"),
                Self::LEAKY_RELU => Some("LEAKY_RELU"),
                Self::SQUARED_DIFFERENCE => Some("SQUARED_DIFFERENCE"),
                Self::MIRROR_PAD => Some("MIRROR_PAD"),
                Self::ABS => Some("ABS"),
                Self::SPLIT_V => Some("SPLIT_V"),
                Self::UNIQUE => Some("UNIQUE"),
                Self::CEIL => Some("CEIL"),
                Self::REVERSE_V2 => Some("REVERSE_V2"),
                Self::ADD_N => Some("ADD_N"),
                Self::GATHER_ND => Some("GATHER_ND"),
                Self::COS => Some("COS"),
                Self::WHERE => Some("WHERE"),
                Self::RANK => Some("RANK"),
                Self::ELU => Some("ELU"),
                Self::REVERSE_SEQUENCE => Some("REVERSE_SEQUENCE"),
                Self::MATRIX_DIAG => Some("MATRIX_DIAG"),
                Self::QUANTIZE => Some("QUANTIZE"),
                Self::MATRIX_SET_DIAG => Some("MATRIX_SET_DIAG"),
                Self::ROUND => Some("ROUND"),
                Self::HARD_SWISH => Some("HARD_SWISH"),
                Self::IF => Some("IF"),
                Self::WHILE => Some("WHILE"),
                Self::NON_MAX_SUPPRESSION_V4 => Some("NON_MAX_SUPPRESSION_V4"),
                Self::NON_MAX_SUPPRESSION_V5 => Some("NON_MAX_SUPPRESSION_V5"),
                Self::SCATTER_ND => Some("SCATTER_ND"),
                Self::SELECT_V2 => Some("SELECT_V2"),
                Self::DENSIFY => Some("DENSIFY"),
                Self::SEGMENT_SUM => Some("SEGMENT_SUM"),
                Self::BATCH_MATMUL => Some("BATCH_MATMUL"),
                Self::PLACEHOLDER_FOR_GREATER_OP_CODES => Some("PLACEHOLDER_FOR_GREATER_OP_CODES"),
                Self::CUMSUM => Some("CUMSUM"),
                Self::CALL_ONCE => Some("CALL_ONCE"),
                Self::BROADCAST_TO => Some("BROADCAST_TO"),
                Self::RFFT2D => Some("RFFT2D"),
                Self::CONV_3D => Some("CONV_3D"),
                Self::IMAG => Some("IMAG"),
                Self::REAL => Some("REAL"),
                Self::COMPLEX_ABS => Some("COMPLEX_ABS"),
                Self::HASHTABLE => Some("HASHTABLE"),
                Self::HASHTABLE_FIND => Some("HASHTABLE_FIND"),
                Self::HASHTABLE_IMPORT => Some("HASHTABLE_IMPORT"),
                Self::HASHTABLE_SIZE => Some("HASHTABLE_SIZE"),
                Self::REDUCE_ALL => Some("REDUCE_ALL"),
                Self::CONV_3D_TRANSPOSE => Some("CONV_3D_TRANSPOSE"),
                Self::VAR_HANDLE => Some("VAR_HANDLE"),
                Self::READ_VARIABLE => Some("READ_VARIABLE"),
                Self::ASSIGN_VARIABLE => Some("ASSIGN_VARIABLE"),
                Self::BROADCAST_ARGS => Some("BROADCAST_ARGS"),
                Self::RANDOM_STANDARD_NORMAL => Some("RANDOM_STANDARD_NORMAL"),
                Self::BUCKETIZE => Some("BUCKETIZE"),
                Self::RANDOM_UNIFORM => Some("RANDOM_UNIFORM"),
                Self::MULTINOMIAL => Some("MULTINOMIAL"),
                Self::GELU => Some("GELU"),
                Self::DYNAMIC_UPDATE_SLICE => Some("DYNAMIC_UPDATE_SLICE"),
                Self::RELU_0_TO_1 => Some("RELU_0_TO_1"),
                Self::UNSORTED_SEGMENT_PROD => Some("UNSORTED_SEGMENT_PROD"),
                Self::UNSORTED_SEGMENT_MAX => Some("UNSORTED_SEGMENT_MAX"),
                Self::UNSORTED_SEGMENT_SUM => Some("UNSORTED_SEGMENT_SUM"),
                Self::ATAN2 => Some("ATAN2"),
                Self::UNSORTED_SEGMENT_MIN => Some("UNSORTED_SEGMENT_MIN"),
                Self::SIGN => Some("SIGN"),
                Self::BITCAST => Some("BITCAST"),
                Self::BITWISE_XOR => Some("BITWISE_XOR"),
                Self::RIGHT_SHIFT => Some("RIGHT_SHIFT"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for BuiltinOperator {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for BuiltinOperator {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i32>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for BuiltinOperator {
        type Output = BuiltinOperator;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i32>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for BuiltinOperator {
        type Scalar = i32;
        #[inline]
        fn to_little_endian(self) -> i32 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i32) -> Self {
            let b = i32::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for BuiltinOperator {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i32::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for BuiltinOperator {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_BUILTIN_OPTIONS: u8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_BUILTIN_OPTIONS: u8 = 126;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_BUILTIN_OPTIONS: [BuiltinOptions; 127] = [
        BuiltinOptions::NONE,
        BuiltinOptions::Conv2DOptions,
        BuiltinOptions::DepthwiseConv2DOptions,
        BuiltinOptions::ConcatEmbeddingsOptions,
        BuiltinOptions::LSHProjectionOptions,
        BuiltinOptions::Pool2DOptions,
        BuiltinOptions::SVDFOptions,
        BuiltinOptions::RNNOptions,
        BuiltinOptions::FullyConnectedOptions,
        BuiltinOptions::SoftmaxOptions,
        BuiltinOptions::ConcatenationOptions,
        BuiltinOptions::AddOptions,
        BuiltinOptions::L2NormOptions,
        BuiltinOptions::LocalResponseNormalizationOptions,
        BuiltinOptions::LSTMOptions,
        BuiltinOptions::ResizeBilinearOptions,
        BuiltinOptions::CallOptions,
        BuiltinOptions::ReshapeOptions,
        BuiltinOptions::SkipGramOptions,
        BuiltinOptions::SpaceToDepthOptions,
        BuiltinOptions::EmbeddingLookupSparseOptions,
        BuiltinOptions::MulOptions,
        BuiltinOptions::PadOptions,
        BuiltinOptions::GatherOptions,
        BuiltinOptions::BatchToSpaceNDOptions,
        BuiltinOptions::SpaceToBatchNDOptions,
        BuiltinOptions::TransposeOptions,
        BuiltinOptions::ReducerOptions,
        BuiltinOptions::SubOptions,
        BuiltinOptions::DivOptions,
        BuiltinOptions::SqueezeOptions,
        BuiltinOptions::SequenceRNNOptions,
        BuiltinOptions::StridedSliceOptions,
        BuiltinOptions::ExpOptions,
        BuiltinOptions::TopKV2Options,
        BuiltinOptions::SplitOptions,
        BuiltinOptions::LogSoftmaxOptions,
        BuiltinOptions::CastOptions,
        BuiltinOptions::DequantizeOptions,
        BuiltinOptions::MaximumMinimumOptions,
        BuiltinOptions::ArgMaxOptions,
        BuiltinOptions::LessOptions,
        BuiltinOptions::NegOptions,
        BuiltinOptions::PadV2Options,
        BuiltinOptions::GreaterOptions,
        BuiltinOptions::GreaterEqualOptions,
        BuiltinOptions::LessEqualOptions,
        BuiltinOptions::SelectOptions,
        BuiltinOptions::SliceOptions,
        BuiltinOptions::TransposeConvOptions,
        BuiltinOptions::SparseToDenseOptions,
        BuiltinOptions::TileOptions,
        BuiltinOptions::ExpandDimsOptions,
        BuiltinOptions::EqualOptions,
        BuiltinOptions::NotEqualOptions,
        BuiltinOptions::ShapeOptions,
        BuiltinOptions::PowOptions,
        BuiltinOptions::ArgMinOptions,
        BuiltinOptions::FakeQuantOptions,
        BuiltinOptions::PackOptions,
        BuiltinOptions::LogicalOrOptions,
        BuiltinOptions::OneHotOptions,
        BuiltinOptions::LogicalAndOptions,
        BuiltinOptions::LogicalNotOptions,
        BuiltinOptions::UnpackOptions,
        BuiltinOptions::FloorDivOptions,
        BuiltinOptions::SquareOptions,
        BuiltinOptions::ZerosLikeOptions,
        BuiltinOptions::FillOptions,
        BuiltinOptions::BidirectionalSequenceLSTMOptions,
        BuiltinOptions::BidirectionalSequenceRNNOptions,
        BuiltinOptions::UnidirectionalSequenceLSTMOptions,
        BuiltinOptions::FloorModOptions,
        BuiltinOptions::RangeOptions,
        BuiltinOptions::ResizeNearestNeighborOptions,
        BuiltinOptions::LeakyReluOptions,
        BuiltinOptions::SquaredDifferenceOptions,
        BuiltinOptions::MirrorPadOptions,
        BuiltinOptions::AbsOptions,
        BuiltinOptions::SplitVOptions,
        BuiltinOptions::UniqueOptions,
        BuiltinOptions::ReverseV2Options,
        BuiltinOptions::AddNOptions,
        BuiltinOptions::GatherNdOptions,
        BuiltinOptions::CosOptions,
        BuiltinOptions::WhereOptions,
        BuiltinOptions::RankOptions,
        BuiltinOptions::ReverseSequenceOptions,
        BuiltinOptions::MatrixDiagOptions,
        BuiltinOptions::QuantizeOptions,
        BuiltinOptions::MatrixSetDiagOptions,
        BuiltinOptions::HardSwishOptions,
        BuiltinOptions::IfOptions,
        BuiltinOptions::WhileOptions,
        BuiltinOptions::DepthToSpaceOptions,
        BuiltinOptions::NonMaxSuppressionV4Options,
        BuiltinOptions::NonMaxSuppressionV5Options,
        BuiltinOptions::ScatterNdOptions,
        BuiltinOptions::SelectV2Options,
        BuiltinOptions::DensifyOptions,
        BuiltinOptions::SegmentSumOptions,
        BuiltinOptions::BatchMatMulOptions,
        BuiltinOptions::CumsumOptions,
        BuiltinOptions::CallOnceOptions,
        BuiltinOptions::BroadcastToOptions,
        BuiltinOptions::Rfft2dOptions,
        BuiltinOptions::Conv3DOptions,
        BuiltinOptions::HashtableOptions,
        BuiltinOptions::HashtableFindOptions,
        BuiltinOptions::HashtableImportOptions,
        BuiltinOptions::HashtableSizeOptions,
        BuiltinOptions::VarHandleOptions,
        BuiltinOptions::ReadVariableOptions,
        BuiltinOptions::AssignVariableOptions,
        BuiltinOptions::RandomOptions,
        BuiltinOptions::BucketizeOptions,
        BuiltinOptions::GeluOptions,
        BuiltinOptions::DynamicUpdateSliceOptions,
        BuiltinOptions::UnsortedSegmentProdOptions,
        BuiltinOptions::UnsortedSegmentMaxOptions,
        BuiltinOptions::UnsortedSegmentMinOptions,
        BuiltinOptions::UnsortedSegmentSumOptions,
        BuiltinOptions::ATan2Options,
        BuiltinOptions::SignOptions,
        BuiltinOptions::BitcastOptions,
        BuiltinOptions::BitwiseXorOptions,
        BuiltinOptions::RightShiftOptions,
    ];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct BuiltinOptions(pub u8);
    #[allow(non_upper_case_globals)]
    impl BuiltinOptions {
        pub const NONE: Self = Self(0);
        pub const Conv2DOptions: Self = Self(1);
        pub const DepthwiseConv2DOptions: Self = Self(2);
        pub const ConcatEmbeddingsOptions: Self = Self(3);
        pub const LSHProjectionOptions: Self = Self(4);
        pub const Pool2DOptions: Self = Self(5);
        pub const SVDFOptions: Self = Self(6);
        pub const RNNOptions: Self = Self(7);
        pub const FullyConnectedOptions: Self = Self(8);
        pub const SoftmaxOptions: Self = Self(9);
        pub const ConcatenationOptions: Self = Self(10);
        pub const AddOptions: Self = Self(11);
        pub const L2NormOptions: Self = Self(12);
        pub const LocalResponseNormalizationOptions: Self = Self(13);
        pub const LSTMOptions: Self = Self(14);
        pub const ResizeBilinearOptions: Self = Self(15);
        pub const CallOptions: Self = Self(16);
        pub const ReshapeOptions: Self = Self(17);
        pub const SkipGramOptions: Self = Self(18);
        pub const SpaceToDepthOptions: Self = Self(19);
        pub const EmbeddingLookupSparseOptions: Self = Self(20);
        pub const MulOptions: Self = Self(21);
        pub const PadOptions: Self = Self(22);
        pub const GatherOptions: Self = Self(23);
        pub const BatchToSpaceNDOptions: Self = Self(24);
        pub const SpaceToBatchNDOptions: Self = Self(25);
        pub const TransposeOptions: Self = Self(26);
        pub const ReducerOptions: Self = Self(27);
        pub const SubOptions: Self = Self(28);
        pub const DivOptions: Self = Self(29);
        pub const SqueezeOptions: Self = Self(30);
        pub const SequenceRNNOptions: Self = Self(31);
        pub const StridedSliceOptions: Self = Self(32);
        pub const ExpOptions: Self = Self(33);
        pub const TopKV2Options: Self = Self(34);
        pub const SplitOptions: Self = Self(35);
        pub const LogSoftmaxOptions: Self = Self(36);
        pub const CastOptions: Self = Self(37);
        pub const DequantizeOptions: Self = Self(38);
        pub const MaximumMinimumOptions: Self = Self(39);
        pub const ArgMaxOptions: Self = Self(40);
        pub const LessOptions: Self = Self(41);
        pub const NegOptions: Self = Self(42);
        pub const PadV2Options: Self = Self(43);
        pub const GreaterOptions: Self = Self(44);
        pub const GreaterEqualOptions: Self = Self(45);
        pub const LessEqualOptions: Self = Self(46);
        pub const SelectOptions: Self = Self(47);
        pub const SliceOptions: Self = Self(48);
        pub const TransposeConvOptions: Self = Self(49);
        pub const SparseToDenseOptions: Self = Self(50);
        pub const TileOptions: Self = Self(51);
        pub const ExpandDimsOptions: Self = Self(52);
        pub const EqualOptions: Self = Self(53);
        pub const NotEqualOptions: Self = Self(54);
        pub const ShapeOptions: Self = Self(55);
        pub const PowOptions: Self = Self(56);
        pub const ArgMinOptions: Self = Self(57);
        pub const FakeQuantOptions: Self = Self(58);
        pub const PackOptions: Self = Self(59);
        pub const LogicalOrOptions: Self = Self(60);
        pub const OneHotOptions: Self = Self(61);
        pub const LogicalAndOptions: Self = Self(62);
        pub const LogicalNotOptions: Self = Self(63);
        pub const UnpackOptions: Self = Self(64);
        pub const FloorDivOptions: Self = Self(65);
        pub const SquareOptions: Self = Self(66);
        pub const ZerosLikeOptions: Self = Self(67);
        pub const FillOptions: Self = Self(68);
        pub const BidirectionalSequenceLSTMOptions: Self = Self(69);
        pub const BidirectionalSequenceRNNOptions: Self = Self(70);
        pub const UnidirectionalSequenceLSTMOptions: Self = Self(71);
        pub const FloorModOptions: Self = Self(72);
        pub const RangeOptions: Self = Self(73);
        pub const ResizeNearestNeighborOptions: Self = Self(74);
        pub const LeakyReluOptions: Self = Self(75);
        pub const SquaredDifferenceOptions: Self = Self(76);
        pub const MirrorPadOptions: Self = Self(77);
        pub const AbsOptions: Self = Self(78);
        pub const SplitVOptions: Self = Self(79);
        pub const UniqueOptions: Self = Self(80);
        pub const ReverseV2Options: Self = Self(81);
        pub const AddNOptions: Self = Self(82);
        pub const GatherNdOptions: Self = Self(83);
        pub const CosOptions: Self = Self(84);
        pub const WhereOptions: Self = Self(85);
        pub const RankOptions: Self = Self(86);
        pub const ReverseSequenceOptions: Self = Self(87);
        pub const MatrixDiagOptions: Self = Self(88);
        pub const QuantizeOptions: Self = Self(89);
        pub const MatrixSetDiagOptions: Self = Self(90);
        pub const HardSwishOptions: Self = Self(91);
        pub const IfOptions: Self = Self(92);
        pub const WhileOptions: Self = Self(93);
        pub const DepthToSpaceOptions: Self = Self(94);
        pub const NonMaxSuppressionV4Options: Self = Self(95);
        pub const NonMaxSuppressionV5Options: Self = Self(96);
        pub const ScatterNdOptions: Self = Self(97);
        pub const SelectV2Options: Self = Self(98);
        pub const DensifyOptions: Self = Self(99);
        pub const SegmentSumOptions: Self = Self(100);
        pub const BatchMatMulOptions: Self = Self(101);
        pub const CumsumOptions: Self = Self(102);
        pub const CallOnceOptions: Self = Self(103);
        pub const BroadcastToOptions: Self = Self(104);
        pub const Rfft2dOptions: Self = Self(105);
        pub const Conv3DOptions: Self = Self(106);
        pub const HashtableOptions: Self = Self(107);
        pub const HashtableFindOptions: Self = Self(108);
        pub const HashtableImportOptions: Self = Self(109);
        pub const HashtableSizeOptions: Self = Self(110);
        pub const VarHandleOptions: Self = Self(111);
        pub const ReadVariableOptions: Self = Self(112);
        pub const AssignVariableOptions: Self = Self(113);
        pub const RandomOptions: Self = Self(114);
        pub const BucketizeOptions: Self = Self(115);
        pub const GeluOptions: Self = Self(116);
        pub const DynamicUpdateSliceOptions: Self = Self(117);
        pub const UnsortedSegmentProdOptions: Self = Self(118);
        pub const UnsortedSegmentMaxOptions: Self = Self(119);
        pub const UnsortedSegmentMinOptions: Self = Self(120);
        pub const UnsortedSegmentSumOptions: Self = Self(121);
        pub const ATan2Options: Self = Self(122);
        pub const SignOptions: Self = Self(123);
        pub const BitcastOptions: Self = Self(124);
        pub const BitwiseXorOptions: Self = Self(125);
        pub const RightShiftOptions: Self = Self(126);

        pub const ENUM_MIN: u8 = 0;
        pub const ENUM_MAX: u8 = 126;
        pub const ENUM_VALUES: &'static [Self] = &[
            Self::NONE,
            Self::Conv2DOptions,
            Self::DepthwiseConv2DOptions,
            Self::ConcatEmbeddingsOptions,
            Self::LSHProjectionOptions,
            Self::Pool2DOptions,
            Self::SVDFOptions,
            Self::RNNOptions,
            Self::FullyConnectedOptions,
            Self::SoftmaxOptions,
            Self::ConcatenationOptions,
            Self::AddOptions,
            Self::L2NormOptions,
            Self::LocalResponseNormalizationOptions,
            Self::LSTMOptions,
            Self::ResizeBilinearOptions,
            Self::CallOptions,
            Self::ReshapeOptions,
            Self::SkipGramOptions,
            Self::SpaceToDepthOptions,
            Self::EmbeddingLookupSparseOptions,
            Self::MulOptions,
            Self::PadOptions,
            Self::GatherOptions,
            Self::BatchToSpaceNDOptions,
            Self::SpaceToBatchNDOptions,
            Self::TransposeOptions,
            Self::ReducerOptions,
            Self::SubOptions,
            Self::DivOptions,
            Self::SqueezeOptions,
            Self::SequenceRNNOptions,
            Self::StridedSliceOptions,
            Self::ExpOptions,
            Self::TopKV2Options,
            Self::SplitOptions,
            Self::LogSoftmaxOptions,
            Self::CastOptions,
            Self::DequantizeOptions,
            Self::MaximumMinimumOptions,
            Self::ArgMaxOptions,
            Self::LessOptions,
            Self::NegOptions,
            Self::PadV2Options,
            Self::GreaterOptions,
            Self::GreaterEqualOptions,
            Self::LessEqualOptions,
            Self::SelectOptions,
            Self::SliceOptions,
            Self::TransposeConvOptions,
            Self::SparseToDenseOptions,
            Self::TileOptions,
            Self::ExpandDimsOptions,
            Self::EqualOptions,
            Self::NotEqualOptions,
            Self::ShapeOptions,
            Self::PowOptions,
            Self::ArgMinOptions,
            Self::FakeQuantOptions,
            Self::PackOptions,
            Self::LogicalOrOptions,
            Self::OneHotOptions,
            Self::LogicalAndOptions,
            Self::LogicalNotOptions,
            Self::UnpackOptions,
            Self::FloorDivOptions,
            Self::SquareOptions,
            Self::ZerosLikeOptions,
            Self::FillOptions,
            Self::BidirectionalSequenceLSTMOptions,
            Self::BidirectionalSequenceRNNOptions,
            Self::UnidirectionalSequenceLSTMOptions,
            Self::FloorModOptions,
            Self::RangeOptions,
            Self::ResizeNearestNeighborOptions,
            Self::LeakyReluOptions,
            Self::SquaredDifferenceOptions,
            Self::MirrorPadOptions,
            Self::AbsOptions,
            Self::SplitVOptions,
            Self::UniqueOptions,
            Self::ReverseV2Options,
            Self::AddNOptions,
            Self::GatherNdOptions,
            Self::CosOptions,
            Self::WhereOptions,
            Self::RankOptions,
            Self::ReverseSequenceOptions,
            Self::MatrixDiagOptions,
            Self::QuantizeOptions,
            Self::MatrixSetDiagOptions,
            Self::HardSwishOptions,
            Self::IfOptions,
            Self::WhileOptions,
            Self::DepthToSpaceOptions,
            Self::NonMaxSuppressionV4Options,
            Self::NonMaxSuppressionV5Options,
            Self::ScatterNdOptions,
            Self::SelectV2Options,
            Self::DensifyOptions,
            Self::SegmentSumOptions,
            Self::BatchMatMulOptions,
            Self::CumsumOptions,
            Self::CallOnceOptions,
            Self::BroadcastToOptions,
            Self::Rfft2dOptions,
            Self::Conv3DOptions,
            Self::HashtableOptions,
            Self::HashtableFindOptions,
            Self::HashtableImportOptions,
            Self::HashtableSizeOptions,
            Self::VarHandleOptions,
            Self::ReadVariableOptions,
            Self::AssignVariableOptions,
            Self::RandomOptions,
            Self::BucketizeOptions,
            Self::GeluOptions,
            Self::DynamicUpdateSliceOptions,
            Self::UnsortedSegmentProdOptions,
            Self::UnsortedSegmentMaxOptions,
            Self::UnsortedSegmentMinOptions,
            Self::UnsortedSegmentSumOptions,
            Self::ATan2Options,
            Self::SignOptions,
            Self::BitcastOptions,
            Self::BitwiseXorOptions,
            Self::RightShiftOptions,
        ];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::NONE => Some("NONE"),
                Self::Conv2DOptions => Some("Conv2DOptions"),
                Self::DepthwiseConv2DOptions => Some("DepthwiseConv2DOptions"),
                Self::ConcatEmbeddingsOptions => Some("ConcatEmbeddingsOptions"),
                Self::LSHProjectionOptions => Some("LSHProjectionOptions"),
                Self::Pool2DOptions => Some("Pool2DOptions"),
                Self::SVDFOptions => Some("SVDFOptions"),
                Self::RNNOptions => Some("RNNOptions"),
                Self::FullyConnectedOptions => Some("FullyConnectedOptions"),
                Self::SoftmaxOptions => Some("SoftmaxOptions"),
                Self::ConcatenationOptions => Some("ConcatenationOptions"),
                Self::AddOptions => Some("AddOptions"),
                Self::L2NormOptions => Some("L2NormOptions"),
                Self::LocalResponseNormalizationOptions => {
                    Some("LocalResponseNormalizationOptions")
                }
                Self::LSTMOptions => Some("LSTMOptions"),
                Self::ResizeBilinearOptions => Some("ResizeBilinearOptions"),
                Self::CallOptions => Some("CallOptions"),
                Self::ReshapeOptions => Some("ReshapeOptions"),
                Self::SkipGramOptions => Some("SkipGramOptions"),
                Self::SpaceToDepthOptions => Some("SpaceToDepthOptions"),
                Self::EmbeddingLookupSparseOptions => Some("EmbeddingLookupSparseOptions"),
                Self::MulOptions => Some("MulOptions"),
                Self::PadOptions => Some("PadOptions"),
                Self::GatherOptions => Some("GatherOptions"),
                Self::BatchToSpaceNDOptions => Some("BatchToSpaceNDOptions"),
                Self::SpaceToBatchNDOptions => Some("SpaceToBatchNDOptions"),
                Self::TransposeOptions => Some("TransposeOptions"),
                Self::ReducerOptions => Some("ReducerOptions"),
                Self::SubOptions => Some("SubOptions"),
                Self::DivOptions => Some("DivOptions"),
                Self::SqueezeOptions => Some("SqueezeOptions"),
                Self::SequenceRNNOptions => Some("SequenceRNNOptions"),
                Self::StridedSliceOptions => Some("StridedSliceOptions"),
                Self::ExpOptions => Some("ExpOptions"),
                Self::TopKV2Options => Some("TopKV2Options"),
                Self::SplitOptions => Some("SplitOptions"),
                Self::LogSoftmaxOptions => Some("LogSoftmaxOptions"),
                Self::CastOptions => Some("CastOptions"),
                Self::DequantizeOptions => Some("DequantizeOptions"),
                Self::MaximumMinimumOptions => Some("MaximumMinimumOptions"),
                Self::ArgMaxOptions => Some("ArgMaxOptions"),
                Self::LessOptions => Some("LessOptions"),
                Self::NegOptions => Some("NegOptions"),
                Self::PadV2Options => Some("PadV2Options"),
                Self::GreaterOptions => Some("GreaterOptions"),
                Self::GreaterEqualOptions => Some("GreaterEqualOptions"),
                Self::LessEqualOptions => Some("LessEqualOptions"),
                Self::SelectOptions => Some("SelectOptions"),
                Self::SliceOptions => Some("SliceOptions"),
                Self::TransposeConvOptions => Some("TransposeConvOptions"),
                Self::SparseToDenseOptions => Some("SparseToDenseOptions"),
                Self::TileOptions => Some("TileOptions"),
                Self::ExpandDimsOptions => Some("ExpandDimsOptions"),
                Self::EqualOptions => Some("EqualOptions"),
                Self::NotEqualOptions => Some("NotEqualOptions"),
                Self::ShapeOptions => Some("ShapeOptions"),
                Self::PowOptions => Some("PowOptions"),
                Self::ArgMinOptions => Some("ArgMinOptions"),
                Self::FakeQuantOptions => Some("FakeQuantOptions"),
                Self::PackOptions => Some("PackOptions"),
                Self::LogicalOrOptions => Some("LogicalOrOptions"),
                Self::OneHotOptions => Some("OneHotOptions"),
                Self::LogicalAndOptions => Some("LogicalAndOptions"),
                Self::LogicalNotOptions => Some("LogicalNotOptions"),
                Self::UnpackOptions => Some("UnpackOptions"),
                Self::FloorDivOptions => Some("FloorDivOptions"),
                Self::SquareOptions => Some("SquareOptions"),
                Self::ZerosLikeOptions => Some("ZerosLikeOptions"),
                Self::FillOptions => Some("FillOptions"),
                Self::BidirectionalSequenceLSTMOptions => Some("BidirectionalSequenceLSTMOptions"),
                Self::BidirectionalSequenceRNNOptions => Some("BidirectionalSequenceRNNOptions"),
                Self::UnidirectionalSequenceLSTMOptions => {
                    Some("UnidirectionalSequenceLSTMOptions")
                }
                Self::FloorModOptions => Some("FloorModOptions"),
                Self::RangeOptions => Some("RangeOptions"),
                Self::ResizeNearestNeighborOptions => Some("ResizeNearestNeighborOptions"),
                Self::LeakyReluOptions => Some("LeakyReluOptions"),
                Self::SquaredDifferenceOptions => Some("SquaredDifferenceOptions"),
                Self::MirrorPadOptions => Some("MirrorPadOptions"),
                Self::AbsOptions => Some("AbsOptions"),
                Self::SplitVOptions => Some("SplitVOptions"),
                Self::UniqueOptions => Some("UniqueOptions"),
                Self::ReverseV2Options => Some("ReverseV2Options"),
                Self::AddNOptions => Some("AddNOptions"),
                Self::GatherNdOptions => Some("GatherNdOptions"),
                Self::CosOptions => Some("CosOptions"),
                Self::WhereOptions => Some("WhereOptions"),
                Self::RankOptions => Some("RankOptions"),
                Self::ReverseSequenceOptions => Some("ReverseSequenceOptions"),
                Self::MatrixDiagOptions => Some("MatrixDiagOptions"),
                Self::QuantizeOptions => Some("QuantizeOptions"),
                Self::MatrixSetDiagOptions => Some("MatrixSetDiagOptions"),
                Self::HardSwishOptions => Some("HardSwishOptions"),
                Self::IfOptions => Some("IfOptions"),
                Self::WhileOptions => Some("WhileOptions"),
                Self::DepthToSpaceOptions => Some("DepthToSpaceOptions"),
                Self::NonMaxSuppressionV4Options => Some("NonMaxSuppressionV4Options"),
                Self::NonMaxSuppressionV5Options => Some("NonMaxSuppressionV5Options"),
                Self::ScatterNdOptions => Some("ScatterNdOptions"),
                Self::SelectV2Options => Some("SelectV2Options"),
                Self::DensifyOptions => Some("DensifyOptions"),
                Self::SegmentSumOptions => Some("SegmentSumOptions"),
                Self::BatchMatMulOptions => Some("BatchMatMulOptions"),
                Self::CumsumOptions => Some("CumsumOptions"),
                Self::CallOnceOptions => Some("CallOnceOptions"),
                Self::BroadcastToOptions => Some("BroadcastToOptions"),
                Self::Rfft2dOptions => Some("Rfft2dOptions"),
                Self::Conv3DOptions => Some("Conv3DOptions"),
                Self::HashtableOptions => Some("HashtableOptions"),
                Self::HashtableFindOptions => Some("HashtableFindOptions"),
                Self::HashtableImportOptions => Some("HashtableImportOptions"),
                Self::HashtableSizeOptions => Some("HashtableSizeOptions"),
                Self::VarHandleOptions => Some("VarHandleOptions"),
                Self::ReadVariableOptions => Some("ReadVariableOptions"),
                Self::AssignVariableOptions => Some("AssignVariableOptions"),
                Self::RandomOptions => Some("RandomOptions"),
                Self::BucketizeOptions => Some("BucketizeOptions"),
                Self::GeluOptions => Some("GeluOptions"),
                Self::DynamicUpdateSliceOptions => Some("DynamicUpdateSliceOptions"),
                Self::UnsortedSegmentProdOptions => Some("UnsortedSegmentProdOptions"),
                Self::UnsortedSegmentMaxOptions => Some("UnsortedSegmentMaxOptions"),
                Self::UnsortedSegmentMinOptions => Some("UnsortedSegmentMinOptions"),
                Self::UnsortedSegmentSumOptions => Some("UnsortedSegmentSumOptions"),
                Self::ATan2Options => Some("ATan2Options"),
                Self::SignOptions => Some("SignOptions"),
                Self::BitcastOptions => Some("BitcastOptions"),
                Self::BitwiseXorOptions => Some("BitwiseXorOptions"),
                Self::RightShiftOptions => Some("RightShiftOptions"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for BuiltinOptions {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for BuiltinOptions {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for BuiltinOptions {
        type Output = BuiltinOptions;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<u8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for BuiltinOptions {
        type Scalar = u8;
        #[inline]
        fn to_little_endian(self) -> u8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: u8) -> Self {
            let b = u8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for BuiltinOptions {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            u8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for BuiltinOptions {}
    pub struct BuiltinOptionsUnionTableOffset {}

    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_PADDING: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_PADDING: i8 = 1;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_PADDING: [Padding; 2] = [Padding::SAME, Padding::VALID];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct Padding(pub i8);
    #[allow(non_upper_case_globals)]
    impl Padding {
        pub const SAME: Self = Self(0);
        pub const VALID: Self = Self(1);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 1;
        pub const ENUM_VALUES: &'static [Self] = &[Self::SAME, Self::VALID];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::SAME => Some("SAME"),
                Self::VALID => Some("VALID"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for Padding {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for Padding {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for Padding {
        type Output = Padding;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for Padding {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for Padding {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for Padding {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_ACTIVATION_FUNCTION_TYPE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_ACTIVATION_FUNCTION_TYPE: i8 = 5;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_ACTIVATION_FUNCTION_TYPE: [ActivationFunctionType; 6] = [
        ActivationFunctionType::NONE,
        ActivationFunctionType::RELU,
        ActivationFunctionType::RELU_N1_TO_1,
        ActivationFunctionType::RELU6,
        ActivationFunctionType::TANH,
        ActivationFunctionType::SIGN_BIT,
    ];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct ActivationFunctionType(pub i8);
    #[allow(non_upper_case_globals)]
    impl ActivationFunctionType {
        pub const NONE: Self = Self(0);
        pub const RELU: Self = Self(1);
        pub const RELU_N1_TO_1: Self = Self(2);
        pub const RELU6: Self = Self(3);
        pub const TANH: Self = Self(4);
        pub const SIGN_BIT: Self = Self(5);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 5;
        pub const ENUM_VALUES: &'static [Self] =
            &[Self::NONE, Self::RELU, Self::RELU_N1_TO_1, Self::RELU6, Self::TANH, Self::SIGN_BIT];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::NONE => Some("NONE"),
                Self::RELU => Some("RELU"),
                Self::RELU_N1_TO_1 => Some("RELU_N1_TO_1"),
                Self::RELU6 => Some("RELU6"),
                Self::TANH => Some("TANH"),
                Self::SIGN_BIT => Some("SIGN_BIT"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for ActivationFunctionType {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for ActivationFunctionType {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for ActivationFunctionType {
        type Output = ActivationFunctionType;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for ActivationFunctionType {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for ActivationFunctionType {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for ActivationFunctionType {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_LSHPROJECTION_TYPE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_LSHPROJECTION_TYPE: i8 = 2;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_LSHPROJECTION_TYPE: [LSHProjectionType; 3] =
        [LSHProjectionType::UNKNOWN, LSHProjectionType::SPARSE, LSHProjectionType::DENSE];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct LSHProjectionType(pub i8);
    #[allow(non_upper_case_globals)]
    impl LSHProjectionType {
        pub const UNKNOWN: Self = Self(0);
        pub const SPARSE: Self = Self(1);
        pub const DENSE: Self = Self(2);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 2;
        pub const ENUM_VALUES: &'static [Self] = &[Self::UNKNOWN, Self::SPARSE, Self::DENSE];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::UNKNOWN => Some("UNKNOWN"),
                Self::SPARSE => Some("SPARSE"),
                Self::DENSE => Some("DENSE"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for LSHProjectionType {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for LSHProjectionType {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for LSHProjectionType {
        type Output = LSHProjectionType;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for LSHProjectionType {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for LSHProjectionType {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for LSHProjectionType {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_FULLY_CONNECTED_OPTIONS_WEIGHTS_FORMAT: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_FULLY_CONNECTED_OPTIONS_WEIGHTS_FORMAT: i8 = 1;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_FULLY_CONNECTED_OPTIONS_WEIGHTS_FORMAT:
        [FullyConnectedOptionsWeightsFormat; 2] = [
        FullyConnectedOptionsWeightsFormat::DEFAULT,
        FullyConnectedOptionsWeightsFormat::SHUFFLED4x16INT8,
    ];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct FullyConnectedOptionsWeightsFormat(pub i8);
    #[allow(non_upper_case_globals)]
    impl FullyConnectedOptionsWeightsFormat {
        pub const DEFAULT: Self = Self(0);
        pub const SHUFFLED4x16INT8: Self = Self(1);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 1;
        pub const ENUM_VALUES: &'static [Self] = &[Self::DEFAULT, Self::SHUFFLED4x16INT8];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::DEFAULT => Some("DEFAULT"),
                Self::SHUFFLED4x16INT8 => Some("SHUFFLED4x16INT8"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for FullyConnectedOptionsWeightsFormat {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for FullyConnectedOptionsWeightsFormat {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for FullyConnectedOptionsWeightsFormat {
        type Output = FullyConnectedOptionsWeightsFormat;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for FullyConnectedOptionsWeightsFormat {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for FullyConnectedOptionsWeightsFormat {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for FullyConnectedOptionsWeightsFormat {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_LSTMKERNEL_TYPE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_LSTMKERNEL_TYPE: i8 = 1;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_LSTMKERNEL_TYPE: [LSTMKernelType; 2] =
        [LSTMKernelType::FULL, LSTMKernelType::BASIC];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct LSTMKernelType(pub i8);
    #[allow(non_upper_case_globals)]
    impl LSTMKernelType {
        pub const FULL: Self = Self(0);
        pub const BASIC: Self = Self(1);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 1;
        pub const ENUM_VALUES: &'static [Self] = &[Self::FULL, Self::BASIC];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::FULL => Some("FULL"),
                Self::BASIC => Some("BASIC"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for LSTMKernelType {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for LSTMKernelType {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for LSTMKernelType {
        type Output = LSTMKernelType;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for LSTMKernelType {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for LSTMKernelType {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for LSTMKernelType {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_COMBINER_TYPE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_COMBINER_TYPE: i8 = 2;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_COMBINER_TYPE: [CombinerType; 3] =
        [CombinerType::SUM, CombinerType::MEAN, CombinerType::SQRTN];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct CombinerType(pub i8);
    #[allow(non_upper_case_globals)]
    impl CombinerType {
        pub const SUM: Self = Self(0);
        pub const MEAN: Self = Self(1);
        pub const SQRTN: Self = Self(2);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 2;
        pub const ENUM_VALUES: &'static [Self] = &[Self::SUM, Self::MEAN, Self::SQRTN];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::SUM => Some("SUM"),
                Self::MEAN => Some("MEAN"),
                Self::SQRTN => Some("SQRTN"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for CombinerType {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for CombinerType {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for CombinerType {
        type Output = CombinerType;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for CombinerType {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for CombinerType {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for CombinerType {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_MIRROR_PAD_MODE: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_MIRROR_PAD_MODE: i8 = 1;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_MIRROR_PAD_MODE: [MirrorPadMode; 2] =
        [MirrorPadMode::REFLECT, MirrorPadMode::SYMMETRIC];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct MirrorPadMode(pub i8);
    #[allow(non_upper_case_globals)]
    impl MirrorPadMode {
        pub const REFLECT: Self = Self(0);
        pub const SYMMETRIC: Self = Self(1);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 1;
        pub const ENUM_VALUES: &'static [Self] = &[Self::REFLECT, Self::SYMMETRIC];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::REFLECT => Some("REFLECT"),
                Self::SYMMETRIC => Some("SYMMETRIC"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for MirrorPadMode {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for MirrorPadMode {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for MirrorPadMode {
        type Output = MirrorPadMode;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for MirrorPadMode {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for MirrorPadMode {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for MirrorPadMode {}
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MIN_CUSTOM_OPTIONS_FORMAT: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    pub const ENUM_MAX_CUSTOM_OPTIONS_FORMAT: i8 = 0;
    #[deprecated(
        since = "2.0.0",
        note = "Use associated constants instead. This will no longer be generated in 2021."
    )]
    #[allow(non_camel_case_types)]
    pub const ENUM_VALUES_CUSTOM_OPTIONS_FORMAT: [CustomOptionsFormat; 1] =
        [CustomOptionsFormat::FLEXBUFFERS];

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    #[repr(transparent)]
    pub struct CustomOptionsFormat(pub i8);
    #[allow(non_upper_case_globals)]
    impl CustomOptionsFormat {
        pub const FLEXBUFFERS: Self = Self(0);

        pub const ENUM_MIN: i8 = 0;
        pub const ENUM_MAX: i8 = 0;
        pub const ENUM_VALUES: &'static [Self] = &[Self::FLEXBUFFERS];
        /// Returns the variant's name or "" if unknown.
        pub fn variant_name(self) -> Option<&'static str> {
            match self {
                Self::FLEXBUFFERS => Some("FLEXBUFFERS"),
                _ => None,
            }
        }
    }
    impl core::fmt::Debug for CustomOptionsFormat {
        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
            if let Some(name) = self.variant_name() {
                f.write_str(name)
            } else {
                f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
            }
        }
    }
    impl<'a> flatbuffers::Follow<'a> for CustomOptionsFormat {
        type Inner = Self;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
            Self(b)
        }
    }

    impl flatbuffers::Push for CustomOptionsFormat {
        type Output = CustomOptionsFormat;
        #[inline]
        unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
            flatbuffers::emplace_scalar::<i8>(dst, self.0);
        }
    }

    impl flatbuffers::EndianScalar for CustomOptionsFormat {
        type Scalar = i8;
        #[inline]
        fn to_little_endian(self) -> i8 {
            self.0.to_le()
        }
        #[inline]
        #[allow(clippy::wrong_self_convention)]
        fn from_little_endian(v: i8) -> Self {
            let b = i8::from_le(v);
            Self(b)
        }
    }

    impl<'a> flatbuffers::Verifiable for CustomOptionsFormat {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            i8::run_verifier(v, pos)
        }
    }

    impl flatbuffers::SimpleToVerifyInSlice for CustomOptionsFormat {}
    pub enum CustomQuantizationOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct CustomQuantization<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for CustomQuantization<'a> {
        type Inner = CustomQuantization<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> CustomQuantization<'a> {
        pub const VT_CUSTOM: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            CustomQuantization { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args CustomQuantizationArgs<'args>,
        ) -> flatbuffers::WIPOffset<CustomQuantization<'bldr>> {
            let mut builder = CustomQuantizationBuilder::new(_fbb);
            if let Some(x) = args.custom {
                builder.add_custom(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn custom(&self) -> Option<flatbuffers::Vector<'a, u8>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(
                    CustomQuantization::VT_CUSTOM,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for CustomQuantization<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>(
                    "custom",
                    Self::VT_CUSTOM,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct CustomQuantizationArgs<'a> {
        pub custom: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
    }
    impl<'a> Default for CustomQuantizationArgs<'a> {
        #[inline]
        fn default() -> Self {
            CustomQuantizationArgs { custom: None }
        }
    }

    pub struct CustomQuantizationBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> CustomQuantizationBuilder<'a, 'b> {
        #[inline]
        pub fn add_custom(&mut self, custom: flatbuffers::WIPOffset<flatbuffers::Vector<'b, u8>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                CustomQuantization::VT_CUSTOM,
                custom,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> CustomQuantizationBuilder<'a, 'b> {
            let start = _fbb.start_table();
            CustomQuantizationBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<CustomQuantization<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for CustomQuantization<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("CustomQuantization");
            ds.field("custom", &self.custom());
            ds.finish()
        }
    }
    pub enum QuantizationParametersOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct QuantizationParameters<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for QuantizationParameters<'a> {
        type Inner = QuantizationParameters<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> QuantizationParameters<'a> {
        pub const VT_MIN: flatbuffers::VOffsetT = 4;
        pub const VT_MAX: flatbuffers::VOffsetT = 6;
        pub const VT_SCALE: flatbuffers::VOffsetT = 8;
        pub const VT_ZERO_POINT: flatbuffers::VOffsetT = 10;
        pub const VT_DETAILS_TYPE: flatbuffers::VOffsetT = 12;
        pub const VT_DETAILS: flatbuffers::VOffsetT = 14;
        pub const VT_QUANTIZED_DIMENSION: flatbuffers::VOffsetT = 16;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            QuantizationParameters { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args QuantizationParametersArgs<'args>,
        ) -> flatbuffers::WIPOffset<QuantizationParameters<'bldr>> {
            let mut builder = QuantizationParametersBuilder::new(_fbb);
            builder.add_quantized_dimension(args.quantized_dimension);
            if let Some(x) = args.details {
                builder.add_details(x);
            }
            if let Some(x) = args.zero_point {
                builder.add_zero_point(x);
            }
            if let Some(x) = args.scale {
                builder.add_scale(x);
            }
            if let Some(x) = args.max {
                builder.add_max(x);
            }
            if let Some(x) = args.min {
                builder.add_min(x);
            }
            builder.add_details_type(args.details_type);
            builder.finish()
        }

        #[inline]
        pub fn min(&self) -> Option<flatbuffers::Vector<'a, f32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, f32>>>(
                    QuantizationParameters::VT_MIN,
                    None,
                )
            }
        }
        #[inline]
        pub fn max(&self) -> Option<flatbuffers::Vector<'a, f32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, f32>>>(
                    QuantizationParameters::VT_MAX,
                    None,
                )
            }
        }
        #[inline]
        pub fn scale(&self) -> Option<flatbuffers::Vector<'a, f32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, f32>>>(
                    QuantizationParameters::VT_SCALE,
                    None,
                )
            }
        }
        #[inline]
        pub fn zero_point(&self) -> Option<flatbuffers::Vector<'a, i64>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i64>>>(
                    QuantizationParameters::VT_ZERO_POINT,
                    None,
                )
            }
        }
        #[inline]
        pub fn details_type(&self) -> QuantizationDetails {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<QuantizationDetails>(
                        QuantizationParameters::VT_DETAILS_TYPE,
                        Some(QuantizationDetails::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn details(&self) -> Option<flatbuffers::Table<'a>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
                    QuantizationParameters::VT_DETAILS,
                    None,
                )
            }
        }
        #[inline]
        pub fn quantized_dimension(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<i32>(QuantizationParameters::VT_QUANTIZED_DIMENSION, Some(0))
                    .unwrap()
            }
        }
        #[inline]
        #[allow(non_snake_case)]
        pub fn details_as_custom_quantization(&self) -> Option<CustomQuantization<'a>> {
            if self.details_type() == QuantizationDetails::CustomQuantization {
                self.details().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { CustomQuantization::init_from_table(t) }
                })
            } else {
                None
            }
        }
    }

    impl flatbuffers::Verifiable for QuantizationParameters<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, f32>>>(
                    "min",
                    Self::VT_MIN,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, f32>>>(
                    "max",
                    Self::VT_MAX,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, f32>>>(
                    "scale",
                    Self::VT_SCALE,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(
                    "zero_point",
                    Self::VT_ZERO_POINT,
                    false,
                )?
                .visit_union::<QuantizationDetails, _>(
                    "details_type",
                    Self::VT_DETAILS_TYPE,
                    "details",
                    Self::VT_DETAILS,
                    false,
                    |key, v, pos| {
                        match key {
                            QuantizationDetails::CustomQuantization => v
                                .verify_union_variant::<flatbuffers::ForwardsUOffset<
                                CustomQuantization,
                            >>(
                                "QuantizationDetails::CustomQuantization",
                                pos,
                            ),
                            _ => Ok(()),
                        }
                    },
                )?
                .visit_field::<i32>("quantized_dimension", Self::VT_QUANTIZED_DIMENSION, false)?
                .finish();
            Ok(())
        }
    }
    pub struct QuantizationParametersArgs<'a> {
        pub min: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, f32>>>,
        pub max: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, f32>>>,
        pub scale: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, f32>>>,
        pub zero_point: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i64>>>,
        pub details_type: QuantizationDetails,
        pub details: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
        pub quantized_dimension: i32,
    }
    impl<'a> Default for QuantizationParametersArgs<'a> {
        #[inline]
        fn default() -> Self {
            QuantizationParametersArgs {
                min: None,
                max: None,
                scale: None,
                zero_point: None,
                details_type: QuantizationDetails::NONE,
                details: None,
                quantized_dimension: 0,
            }
        }
    }

    pub struct QuantizationParametersBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> QuantizationParametersBuilder<'a, 'b> {
        #[inline]
        pub fn add_min(&mut self, min: flatbuffers::WIPOffset<flatbuffers::Vector<'b, f32>>) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(QuantizationParameters::VT_MIN, min);
        }
        #[inline]
        pub fn add_max(&mut self, max: flatbuffers::WIPOffset<flatbuffers::Vector<'b, f32>>) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(QuantizationParameters::VT_MAX, max);
        }
        #[inline]
        pub fn add_scale(&mut self, scale: flatbuffers::WIPOffset<flatbuffers::Vector<'b, f32>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                QuantizationParameters::VT_SCALE,
                scale,
            );
        }
        #[inline]
        pub fn add_zero_point(
            &mut self,
            zero_point: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                QuantizationParameters::VT_ZERO_POINT,
                zero_point,
            );
        }
        #[inline]
        pub fn add_details_type(&mut self, details_type: QuantizationDetails) {
            self.fbb_.push_slot::<QuantizationDetails>(
                QuantizationParameters::VT_DETAILS_TYPE,
                details_type,
                QuantizationDetails::NONE,
            );
        }
        #[inline]
        pub fn add_details(
            &mut self,
            details: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                QuantizationParameters::VT_DETAILS,
                details,
            );
        }
        #[inline]
        pub fn add_quantized_dimension(&mut self, quantized_dimension: i32) {
            self.fbb_.push_slot::<i32>(
                QuantizationParameters::VT_QUANTIZED_DIMENSION,
                quantized_dimension,
                0,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> QuantizationParametersBuilder<'a, 'b> {
            let start = _fbb.start_table();
            QuantizationParametersBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<QuantizationParameters<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for QuantizationParameters<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("QuantizationParameters");
            ds.field("min", &self.min());
            ds.field("max", &self.max());
            ds.field("scale", &self.scale());
            ds.field("zero_point", &self.zero_point());
            ds.field("details_type", &self.details_type());
            match self.details_type() {
                QuantizationDetails::CustomQuantization => {
                    if let Some(x) = self.details_as_custom_quantization() {
                        ds.field("details", &x)
                    } else {
                        ds.field(
                            "details",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                _ => {
                    let x: Option<()> = None;
                    ds.field("details", &x)
                }
            };
            ds.field("quantized_dimension", &self.quantized_dimension());
            ds.finish()
        }
    }
    pub enum Int32VectorOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Int32Vector<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Int32Vector<'a> {
        type Inner = Int32Vector<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Int32Vector<'a> {
        pub const VT_VALUES: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Int32Vector { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args Int32VectorArgs<'args>,
        ) -> flatbuffers::WIPOffset<Int32Vector<'bldr>> {
            let mut builder = Int32VectorBuilder::new(_fbb);
            if let Some(x) = args.values {
                builder.add_values(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn values(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Int32Vector::VT_VALUES,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for Int32Vector<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "values",
                    Self::VT_VALUES,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct Int32VectorArgs<'a> {
        pub values: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
    }
    impl<'a> Default for Int32VectorArgs<'a> {
        #[inline]
        fn default() -> Self {
            Int32VectorArgs { values: None }
        }
    }

    pub struct Int32VectorBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Int32VectorBuilder<'a, 'b> {
        #[inline]
        pub fn add_values(&mut self, values: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Int32Vector::VT_VALUES, values);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Int32VectorBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Int32VectorBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Int32Vector<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Int32Vector<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Int32Vector");
            ds.field("values", &self.values());
            ds.finish()
        }
    }
    pub enum Uint16VectorOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Uint16Vector<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Uint16Vector<'a> {
        type Inner = Uint16Vector<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Uint16Vector<'a> {
        pub const VT_VALUES: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Uint16Vector { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args Uint16VectorArgs<'args>,
        ) -> flatbuffers::WIPOffset<Uint16Vector<'bldr>> {
            let mut builder = Uint16VectorBuilder::new(_fbb);
            if let Some(x) = args.values {
                builder.add_values(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn values(&self) -> Option<flatbuffers::Vector<'a, u16>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u16>>>(
                    Uint16Vector::VT_VALUES,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for Uint16Vector<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u16>>>(
                    "values",
                    Self::VT_VALUES,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct Uint16VectorArgs<'a> {
        pub values: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u16>>>,
    }
    impl<'a> Default for Uint16VectorArgs<'a> {
        #[inline]
        fn default() -> Self {
            Uint16VectorArgs { values: None }
        }
    }

    pub struct Uint16VectorBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Uint16VectorBuilder<'a, 'b> {
        #[inline]
        pub fn add_values(&mut self, values: flatbuffers::WIPOffset<flatbuffers::Vector<'b, u16>>) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(Uint16Vector::VT_VALUES, values);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> Uint16VectorBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Uint16VectorBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Uint16Vector<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Uint16Vector<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Uint16Vector");
            ds.field("values", &self.values());
            ds.finish()
        }
    }
    pub enum Uint8VectorOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Uint8Vector<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Uint8Vector<'a> {
        type Inner = Uint8Vector<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Uint8Vector<'a> {
        pub const VT_VALUES: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Uint8Vector { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args Uint8VectorArgs<'args>,
        ) -> flatbuffers::WIPOffset<Uint8Vector<'bldr>> {
            let mut builder = Uint8VectorBuilder::new(_fbb);
            if let Some(x) = args.values {
                builder.add_values(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn values(&self) -> Option<flatbuffers::Vector<'a, u8>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(
                    Uint8Vector::VT_VALUES,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for Uint8Vector<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>(
                    "values",
                    Self::VT_VALUES,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct Uint8VectorArgs<'a> {
        pub values: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
    }
    impl<'a> Default for Uint8VectorArgs<'a> {
        #[inline]
        fn default() -> Self {
            Uint8VectorArgs { values: None }
        }
    }

    pub struct Uint8VectorBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Uint8VectorBuilder<'a, 'b> {
        #[inline]
        pub fn add_values(&mut self, values: flatbuffers::WIPOffset<flatbuffers::Vector<'b, u8>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Uint8Vector::VT_VALUES, values);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Uint8VectorBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Uint8VectorBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Uint8Vector<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Uint8Vector<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Uint8Vector");
            ds.field("values", &self.values());
            ds.finish()
        }
    }
    pub enum DimensionMetadataOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DimensionMetadata<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DimensionMetadata<'a> {
        type Inner = DimensionMetadata<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DimensionMetadata<'a> {
        pub const VT_FORMAT: flatbuffers::VOffsetT = 4;
        pub const VT_DENSE_SIZE: flatbuffers::VOffsetT = 6;
        pub const VT_ARRAY_SEGMENTS_TYPE: flatbuffers::VOffsetT = 8;
        pub const VT_ARRAY_SEGMENTS: flatbuffers::VOffsetT = 10;
        pub const VT_ARRAY_INDICES_TYPE: flatbuffers::VOffsetT = 12;
        pub const VT_ARRAY_INDICES: flatbuffers::VOffsetT = 14;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DimensionMetadata { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args DimensionMetadataArgs,
        ) -> flatbuffers::WIPOffset<DimensionMetadata<'bldr>> {
            let mut builder = DimensionMetadataBuilder::new(_fbb);
            if let Some(x) = args.array_indices {
                builder.add_array_indices(x);
            }
            if let Some(x) = args.array_segments {
                builder.add_array_segments(x);
            }
            builder.add_dense_size(args.dense_size);
            builder.add_array_indices_type(args.array_indices_type);
            builder.add_array_segments_type(args.array_segments_type);
            builder.add_format(args.format);
            builder.finish()
        }

        #[inline]
        pub fn format(&self) -> DimensionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<DimensionType>(DimensionMetadata::VT_FORMAT, Some(DimensionType::DENSE))
                    .unwrap()
            }
        }
        #[inline]
        pub fn dense_size(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(DimensionMetadata::VT_DENSE_SIZE, Some(0)).unwrap() }
        }
        #[inline]
        pub fn array_segments_type(&self) -> SparseIndexVector {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<SparseIndexVector>(
                        DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
                        Some(SparseIndexVector::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn array_segments(&self) -> Option<flatbuffers::Table<'a>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
                    DimensionMetadata::VT_ARRAY_SEGMENTS,
                    None,
                )
            }
        }
        #[inline]
        pub fn array_indices_type(&self) -> SparseIndexVector {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<SparseIndexVector>(
                        DimensionMetadata::VT_ARRAY_INDICES_TYPE,
                        Some(SparseIndexVector::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn array_indices(&self) -> Option<flatbuffers::Table<'a>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
                    DimensionMetadata::VT_ARRAY_INDICES,
                    None,
                )
            }
        }
        #[inline]
        #[allow(non_snake_case)]
        pub fn array_segments_as_int_32_vector(&self) -> Option<Int32Vector<'a>> {
            if self.array_segments_type() == SparseIndexVector::Int32Vector {
                self.array_segments().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Int32Vector::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn array_segments_as_uint_16_vector(&self) -> Option<Uint16Vector<'a>> {
            if self.array_segments_type() == SparseIndexVector::Uint16Vector {
                self.array_segments().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Uint16Vector::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn array_segments_as_uint_8_vector(&self) -> Option<Uint8Vector<'a>> {
            if self.array_segments_type() == SparseIndexVector::Uint8Vector {
                self.array_segments().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Uint8Vector::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn array_indices_as_int_32_vector(&self) -> Option<Int32Vector<'a>> {
            if self.array_indices_type() == SparseIndexVector::Int32Vector {
                self.array_indices().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Int32Vector::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn array_indices_as_uint_16_vector(&self) -> Option<Uint16Vector<'a>> {
            if self.array_indices_type() == SparseIndexVector::Uint16Vector {
                self.array_indices().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Uint16Vector::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn array_indices_as_uint_8_vector(&self) -> Option<Uint8Vector<'a>> {
            if self.array_indices_type() == SparseIndexVector::Uint8Vector {
                self.array_indices().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Uint8Vector::init_from_table(t) }
                })
            } else {
                None
            }
        }
    }

    impl flatbuffers::Verifiable for DimensionMetadata<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<DimensionType>("format", Self::VT_FORMAT, false)?
                .visit_field::<i32>("dense_size", Self::VT_DENSE_SIZE, false)?
                .visit_union::<SparseIndexVector, _>(
                    "array_segments_type",
                    Self::VT_ARRAY_SEGMENTS_TYPE,
                    "array_segments",
                    Self::VT_ARRAY_SEGMENTS,
                    false,
                    |key, v, pos| match key {
                        SparseIndexVector::Int32Vector => v
                            .verify_union_variant::<flatbuffers::ForwardsUOffset<Int32Vector>>(
                                "SparseIndexVector::Int32Vector",
                                pos,
                            ),
                        SparseIndexVector::Uint16Vector => v
                            .verify_union_variant::<flatbuffers::ForwardsUOffset<Uint16Vector>>(
                                "SparseIndexVector::Uint16Vector",
                                pos,
                            ),
                        SparseIndexVector::Uint8Vector => v
                            .verify_union_variant::<flatbuffers::ForwardsUOffset<Uint8Vector>>(
                                "SparseIndexVector::Uint8Vector",
                                pos,
                            ),
                        _ => Ok(()),
                    },
                )?
                .visit_union::<SparseIndexVector, _>(
                    "array_indices_type",
                    Self::VT_ARRAY_INDICES_TYPE,
                    "array_indices",
                    Self::VT_ARRAY_INDICES,
                    false,
                    |key, v, pos| match key {
                        SparseIndexVector::Int32Vector => v
                            .verify_union_variant::<flatbuffers::ForwardsUOffset<Int32Vector>>(
                                "SparseIndexVector::Int32Vector",
                                pos,
                            ),
                        SparseIndexVector::Uint16Vector => v
                            .verify_union_variant::<flatbuffers::ForwardsUOffset<Uint16Vector>>(
                                "SparseIndexVector::Uint16Vector",
                                pos,
                            ),
                        SparseIndexVector::Uint8Vector => v
                            .verify_union_variant::<flatbuffers::ForwardsUOffset<Uint8Vector>>(
                                "SparseIndexVector::Uint8Vector",
                                pos,
                            ),
                        _ => Ok(()),
                    },
                )?
                .finish();
            Ok(())
        }
    }
    pub struct DimensionMetadataArgs {
        pub format: DimensionType,
        pub dense_size: i32,
        pub array_segments_type: SparseIndexVector,
        pub array_segments: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
        pub array_indices_type: SparseIndexVector,
        pub array_indices: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
    }
    impl<'a> Default for DimensionMetadataArgs {
        #[inline]
        fn default() -> Self {
            DimensionMetadataArgs {
                format: DimensionType::DENSE,
                dense_size: 0,
                array_segments_type: SparseIndexVector::NONE,
                array_segments: None,
                array_indices_type: SparseIndexVector::NONE,
                array_indices: None,
            }
        }
    }

    pub struct DimensionMetadataBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DimensionMetadataBuilder<'a, 'b> {
        #[inline]
        pub fn add_format(&mut self, format: DimensionType) {
            self.fbb_.push_slot::<DimensionType>(
                DimensionMetadata::VT_FORMAT,
                format,
                DimensionType::DENSE,
            );
        }
        #[inline]
        pub fn add_dense_size(&mut self, dense_size: i32) {
            self.fbb_.push_slot::<i32>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
        }
        #[inline]
        pub fn add_array_segments_type(&mut self, array_segments_type: SparseIndexVector) {
            self.fbb_.push_slot::<SparseIndexVector>(
                DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
                array_segments_type,
                SparseIndexVector::NONE,
            );
        }
        #[inline]
        pub fn add_array_segments(
            &mut self,
            array_segments: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                DimensionMetadata::VT_ARRAY_SEGMENTS,
                array_segments,
            );
        }
        #[inline]
        pub fn add_array_indices_type(&mut self, array_indices_type: SparseIndexVector) {
            self.fbb_.push_slot::<SparseIndexVector>(
                DimensionMetadata::VT_ARRAY_INDICES_TYPE,
                array_indices_type,
                SparseIndexVector::NONE,
            );
        }
        #[inline]
        pub fn add_array_indices(
            &mut self,
            array_indices: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                DimensionMetadata::VT_ARRAY_INDICES,
                array_indices,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> DimensionMetadataBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DimensionMetadataBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DimensionMetadata<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DimensionMetadata<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DimensionMetadata");
            ds.field("format", &self.format());
            ds.field("dense_size", &self.dense_size());
            ds.field("array_segments_type", &self.array_segments_type());
            match self.array_segments_type() {
                SparseIndexVector::Int32Vector => {
                    if let Some(x) = self.array_segments_as_int_32_vector() {
                        ds.field("array_segments", &x)
                    } else {
                        ds.field(
                            "array_segments",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                SparseIndexVector::Uint16Vector => {
                    if let Some(x) = self.array_segments_as_uint_16_vector() {
                        ds.field("array_segments", &x)
                    } else {
                        ds.field(
                            "array_segments",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                SparseIndexVector::Uint8Vector => {
                    if let Some(x) = self.array_segments_as_uint_8_vector() {
                        ds.field("array_segments", &x)
                    } else {
                        ds.field(
                            "array_segments",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                _ => {
                    let x: Option<()> = None;
                    ds.field("array_segments", &x)
                }
            };
            ds.field("array_indices_type", &self.array_indices_type());
            match self.array_indices_type() {
                SparseIndexVector::Int32Vector => {
                    if let Some(x) = self.array_indices_as_int_32_vector() {
                        ds.field("array_indices", &x)
                    } else {
                        ds.field(
                            "array_indices",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                SparseIndexVector::Uint16Vector => {
                    if let Some(x) = self.array_indices_as_uint_16_vector() {
                        ds.field("array_indices", &x)
                    } else {
                        ds.field(
                            "array_indices",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                SparseIndexVector::Uint8Vector => {
                    if let Some(x) = self.array_indices_as_uint_8_vector() {
                        ds.field("array_indices", &x)
                    } else {
                        ds.field(
                            "array_indices",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                _ => {
                    let x: Option<()> = None;
                    ds.field("array_indices", &x)
                }
            };
            ds.finish()
        }
    }
    pub enum SparsityParametersOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SparsityParameters<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SparsityParameters<'a> {
        type Inner = SparsityParameters<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SparsityParameters<'a> {
        pub const VT_TRAVERSAL_ORDER: flatbuffers::VOffsetT = 4;
        pub const VT_BLOCK_MAP: flatbuffers::VOffsetT = 6;
        pub const VT_DIM_METADATA: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SparsityParameters { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SparsityParametersArgs<'args>,
        ) -> flatbuffers::WIPOffset<SparsityParameters<'bldr>> {
            let mut builder = SparsityParametersBuilder::new(_fbb);
            if let Some(x) = args.dim_metadata {
                builder.add_dim_metadata(x);
            }
            if let Some(x) = args.block_map {
                builder.add_block_map(x);
            }
            if let Some(x) = args.traversal_order {
                builder.add_traversal_order(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn traversal_order(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    SparsityParameters::VT_TRAVERSAL_ORDER,
                    None,
                )
            }
        }
        #[inline]
        pub fn block_map(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    SparsityParameters::VT_BLOCK_MAP,
                    None,
                )
            }
        }
        #[inline]
        pub fn dim_metadata(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<DimensionMetadata<'a>>>>
        {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<DimensionMetadata>>,
                >>(SparsityParameters::VT_DIM_METADATA, None)
            }
        }
    }

    impl flatbuffers::Verifiable for SparsityParameters<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "traversal_order",
                    Self::VT_TRAVERSAL_ORDER,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "block_map",
                    Self::VT_BLOCK_MAP,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<DimensionMetadata>>,
                >>("dim_metadata", Self::VT_DIM_METADATA, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SparsityParametersArgs<'a> {
        pub traversal_order: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub block_map: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub dim_metadata: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<DimensionMetadata<'a>>>,
            >,
        >,
    }
    impl<'a> Default for SparsityParametersArgs<'a> {
        #[inline]
        fn default() -> Self {
            SparsityParametersArgs { traversal_order: None, block_map: None, dim_metadata: None }
        }
    }

    pub struct SparsityParametersBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SparsityParametersBuilder<'a, 'b> {
        #[inline]
        pub fn add_traversal_order(
            &mut self,
            traversal_order: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                SparsityParameters::VT_TRAVERSAL_ORDER,
                traversal_order,
            );
        }
        #[inline]
        pub fn add_block_map(
            &mut self,
            block_map: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                SparsityParameters::VT_BLOCK_MAP,
                block_map,
            );
        }
        #[inline]
        pub fn add_dim_metadata(
            &mut self,
            dim_metadata: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<DimensionMetadata<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                SparsityParameters::VT_DIM_METADATA,
                dim_metadata,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SparsityParametersBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SparsityParametersBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SparsityParameters<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SparsityParameters<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SparsityParameters");
            ds.field("traversal_order", &self.traversal_order());
            ds.field("block_map", &self.block_map());
            ds.field("dim_metadata", &self.dim_metadata());
            ds.finish()
        }
    }
    pub enum VariantSubTypeOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct VariantSubType<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for VariantSubType<'a> {
        type Inner = VariantSubType<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> VariantSubType<'a> {
        pub const VT_SHAPE: flatbuffers::VOffsetT = 4;
        pub const VT_TYPE_: flatbuffers::VOffsetT = 6;
        pub const VT_HAS_RANK: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            VariantSubType { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args VariantSubTypeArgs<'args>,
        ) -> flatbuffers::WIPOffset<VariantSubType<'bldr>> {
            let mut builder = VariantSubTypeBuilder::new(_fbb);
            if let Some(x) = args.shape {
                builder.add_shape(x);
            }
            builder.add_has_rank(args.has_rank);
            builder.add_type_(args.type_);
            builder.finish()
        }

        #[inline]
        pub fn shape(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    VariantSubType::VT_SHAPE,
                    None,
                )
            }
        }
        #[inline]
        pub fn type_(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(VariantSubType::VT_TYPE_, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
        #[inline]
        pub fn has_rank(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(VariantSubType::VT_HAS_RANK, Some(false)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for VariantSubType<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "shape",
                    Self::VT_SHAPE,
                    false,
                )?
                .visit_field::<TensorType>("type_", Self::VT_TYPE_, false)?
                .visit_field::<bool>("has_rank", Self::VT_HAS_RANK, false)?
                .finish();
            Ok(())
        }
    }
    pub struct VariantSubTypeArgs<'a> {
        pub shape: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub type_: TensorType,
        pub has_rank: bool,
    }
    impl<'a> Default for VariantSubTypeArgs<'a> {
        #[inline]
        fn default() -> Self {
            VariantSubTypeArgs { shape: None, type_: TensorType::FLOAT32, has_rank: false }
        }
    }

    pub struct VariantSubTypeBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> VariantSubTypeBuilder<'a, 'b> {
        #[inline]
        pub fn add_shape(&mut self, shape: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(VariantSubType::VT_SHAPE, shape);
        }
        #[inline]
        pub fn add_type_(&mut self, type_: TensorType) {
            self.fbb_.push_slot::<TensorType>(VariantSubType::VT_TYPE_, type_, TensorType::FLOAT32);
        }
        #[inline]
        pub fn add_has_rank(&mut self, has_rank: bool) {
            self.fbb_.push_slot::<bool>(VariantSubType::VT_HAS_RANK, has_rank, false);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> VariantSubTypeBuilder<'a, 'b> {
            let start = _fbb.start_table();
            VariantSubTypeBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<VariantSubType<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for VariantSubType<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("VariantSubType");
            ds.field("shape", &self.shape());
            ds.field("type_", &self.type_());
            ds.field("has_rank", &self.has_rank());
            ds.finish()
        }
    }
    pub enum TensorOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Tensor<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Tensor<'a> {
        type Inner = Tensor<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Tensor<'a> {
        pub const VT_SHAPE: flatbuffers::VOffsetT = 4;
        pub const VT_TYPE_: flatbuffers::VOffsetT = 6;
        pub const VT_BUFFER: flatbuffers::VOffsetT = 8;
        pub const VT_NAME: flatbuffers::VOffsetT = 10;
        pub const VT_QUANTIZATION: flatbuffers::VOffsetT = 12;
        pub const VT_IS_VARIABLE: flatbuffers::VOffsetT = 14;
        pub const VT_SPARSITY: flatbuffers::VOffsetT = 16;
        pub const VT_SHAPE_SIGNATURE: flatbuffers::VOffsetT = 18;
        pub const VT_HAS_RANK: flatbuffers::VOffsetT = 20;
        pub const VT_VARIANT_TENSORS: flatbuffers::VOffsetT = 22;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Tensor { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args TensorArgs<'args>,
        ) -> flatbuffers::WIPOffset<Tensor<'bldr>> {
            let mut builder = TensorBuilder::new(_fbb);
            if let Some(x) = args.variant_tensors {
                builder.add_variant_tensors(x);
            }
            if let Some(x) = args.shape_signature {
                builder.add_shape_signature(x);
            }
            if let Some(x) = args.sparsity {
                builder.add_sparsity(x);
            }
            if let Some(x) = args.quantization {
                builder.add_quantization(x);
            }
            if let Some(x) = args.name {
                builder.add_name(x);
            }
            builder.add_buffer(args.buffer);
            if let Some(x) = args.shape {
                builder.add_shape(x);
            }
            builder.add_has_rank(args.has_rank);
            builder.add_is_variable(args.is_variable);
            builder.add_type_(args.type_);
            builder.finish()
        }

        #[inline]
        pub fn shape(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Tensor::VT_SHAPE,
                    None,
                )
            }
        }
        #[inline]
        pub fn type_(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<TensorType>(Tensor::VT_TYPE_, Some(TensorType::FLOAT32)).unwrap()
            }
        }
        #[inline]
        pub fn buffer(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(Tensor::VT_BUFFER, Some(0)).unwrap() }
        }
        #[inline]
        pub fn name(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<&str>>(Tensor::VT_NAME, None) }
        }
        #[inline]
        pub fn quantization(&self) -> Option<QuantizationParameters<'a>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<QuantizationParameters>>(
                    Tensor::VT_QUANTIZATION,
                    None,
                )
            }
        }
        #[inline]
        pub fn is_variable(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(Tensor::VT_IS_VARIABLE, Some(false)).unwrap() }
        }
        #[inline]
        pub fn sparsity(&self) -> Option<SparsityParameters<'a>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<SparsityParameters>>(
                    Tensor::VT_SPARSITY,
                    None,
                )
            }
        }
        #[inline]
        pub fn shape_signature(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Tensor::VT_SHAPE_SIGNATURE,
                    None,
                )
            }
        }
        #[inline]
        pub fn has_rank(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(Tensor::VT_HAS_RANK, Some(false)).unwrap() }
        }
        #[inline]
        pub fn variant_tensors(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<VariantSubType<'a>>>>
        {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<VariantSubType>>,
                >>(Tensor::VT_VARIANT_TENSORS, None)
            }
        }
    }

    impl flatbuffers::Verifiable for Tensor<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "shape",
                    Self::VT_SHAPE,
                    false,
                )?
                .visit_field::<TensorType>("type_", Self::VT_TYPE_, false)?
                .visit_field::<u32>("buffer", Self::VT_BUFFER, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<QuantizationParameters>>(
                    "quantization",
                    Self::VT_QUANTIZATION,
                    false,
                )?
                .visit_field::<bool>("is_variable", Self::VT_IS_VARIABLE, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<SparsityParameters>>(
                    "sparsity",
                    Self::VT_SPARSITY,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "shape_signature",
                    Self::VT_SHAPE_SIGNATURE,
                    false,
                )?
                .visit_field::<bool>("has_rank", Self::VT_HAS_RANK, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<VariantSubType>>,
                >>("variant_tensors", Self::VT_VARIANT_TENSORS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct TensorArgs<'a> {
        pub shape: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub type_: TensorType,
        pub buffer: u32,
        pub name: Option<flatbuffers::WIPOffset<&'a str>>,
        pub quantization: Option<flatbuffers::WIPOffset<QuantizationParameters<'a>>>,
        pub is_variable: bool,
        pub sparsity: Option<flatbuffers::WIPOffset<SparsityParameters<'a>>>,
        pub shape_signature: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub has_rank: bool,
        pub variant_tensors: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<VariantSubType<'a>>>,
            >,
        >,
    }
    impl<'a> Default for TensorArgs<'a> {
        #[inline]
        fn default() -> Self {
            TensorArgs {
                shape: None,
                type_: TensorType::FLOAT32,
                buffer: 0,
                name: None,
                quantization: None,
                is_variable: false,
                sparsity: None,
                shape_signature: None,
                has_rank: false,
                variant_tensors: None,
            }
        }
    }

    pub struct TensorBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
        #[inline]
        pub fn add_shape(&mut self, shape: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_SHAPE, shape);
        }
        #[inline]
        pub fn add_type_(&mut self, type_: TensorType) {
            self.fbb_.push_slot::<TensorType>(Tensor::VT_TYPE_, type_, TensorType::FLOAT32);
        }
        #[inline]
        pub fn add_buffer(&mut self, buffer: u32) {
            self.fbb_.push_slot::<u32>(Tensor::VT_BUFFER, buffer, 0);
        }
        #[inline]
        pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_NAME, name);
        }
        #[inline]
        pub fn add_quantization(
            &mut self,
            quantization: flatbuffers::WIPOffset<QuantizationParameters<'b>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<QuantizationParameters>>(
                Tensor::VT_QUANTIZATION,
                quantization,
            );
        }
        #[inline]
        pub fn add_is_variable(&mut self, is_variable: bool) {
            self.fbb_.push_slot::<bool>(Tensor::VT_IS_VARIABLE, is_variable, false);
        }
        #[inline]
        pub fn add_sparsity(&mut self, sparsity: flatbuffers::WIPOffset<SparsityParameters<'b>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<SparsityParameters>>(
                Tensor::VT_SPARSITY,
                sparsity,
            );
        }
        #[inline]
        pub fn add_shape_signature(
            &mut self,
            shape_signature: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Tensor::VT_SHAPE_SIGNATURE,
                shape_signature,
            );
        }
        #[inline]
        pub fn add_has_rank(&mut self, has_rank: bool) {
            self.fbb_.push_slot::<bool>(Tensor::VT_HAS_RANK, has_rank, false);
        }
        #[inline]
        pub fn add_variant_tensors(
            &mut self,
            variant_tensors: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<VariantSubType<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Tensor::VT_VARIANT_TENSORS,
                variant_tensors,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorBuilder<'a, 'b> {
            let start = _fbb.start_table();
            TensorBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Tensor<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Tensor<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Tensor");
            ds.field("shape", &self.shape());
            ds.field("type_", &self.type_());
            ds.field("buffer", &self.buffer());
            ds.field("name", &self.name());
            ds.field("quantization", &self.quantization());
            ds.field("is_variable", &self.is_variable());
            ds.field("sparsity", &self.sparsity());
            ds.field("shape_signature", &self.shape_signature());
            ds.field("has_rank", &self.has_rank());
            ds.field("variant_tensors", &self.variant_tensors());
            ds.finish()
        }
    }
    pub enum Conv2DOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Conv2DOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Conv2DOptions<'a> {
        type Inner = Conv2DOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Conv2DOptions<'a> {
        pub const VT_PADDING: flatbuffers::VOffsetT = 4;
        pub const VT_STRIDE_W: flatbuffers::VOffsetT = 6;
        pub const VT_STRIDE_H: flatbuffers::VOffsetT = 8;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 10;
        pub const VT_DILATION_W_FACTOR: flatbuffers::VOffsetT = 12;
        pub const VT_DILATION_H_FACTOR: flatbuffers::VOffsetT = 14;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Conv2DOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args Conv2DOptionsArgs,
        ) -> flatbuffers::WIPOffset<Conv2DOptions<'bldr>> {
            let mut builder = Conv2DOptionsBuilder::new(_fbb);
            builder.add_dilation_h_factor(args.dilation_h_factor);
            builder.add_dilation_w_factor(args.dilation_w_factor);
            builder.add_stride_h(args.stride_h);
            builder.add_stride_w(args.stride_w);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_padding(args.padding);
            builder.finish()
        }

        #[inline]
        pub fn padding(&self) -> Padding {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<Padding>(Conv2DOptions::VT_PADDING, Some(Padding::SAME)).unwrap()
            }
        }
        #[inline]
        pub fn stride_w(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv2DOptions::VT_STRIDE_W, Some(0)).unwrap() }
        }
        #[inline]
        pub fn stride_h(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv2DOptions::VT_STRIDE_H, Some(0)).unwrap() }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn dilation_w_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv2DOptions::VT_DILATION_W_FACTOR, Some(1)).unwrap() }
        }
        #[inline]
        pub fn dilation_h_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv2DOptions::VT_DILATION_H_FACTOR, Some(1)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for Conv2DOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<Padding>("padding", Self::VT_PADDING, false)?
                .visit_field::<i32>("stride_w", Self::VT_STRIDE_W, false)?
                .visit_field::<i32>("stride_h", Self::VT_STRIDE_H, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<i32>("dilation_w_factor", Self::VT_DILATION_W_FACTOR, false)?
                .visit_field::<i32>("dilation_h_factor", Self::VT_DILATION_H_FACTOR, false)?
                .finish();
            Ok(())
        }
    }
    pub struct Conv2DOptionsArgs {
        pub padding: Padding,
        pub stride_w: i32,
        pub stride_h: i32,
        pub fused_activation_function: ActivationFunctionType,
        pub dilation_w_factor: i32,
        pub dilation_h_factor: i32,
    }
    impl<'a> Default for Conv2DOptionsArgs {
        #[inline]
        fn default() -> Self {
            Conv2DOptionsArgs {
                padding: Padding::SAME,
                stride_w: 0,
                stride_h: 0,
                fused_activation_function: ActivationFunctionType::NONE,
                dilation_w_factor: 1,
                dilation_h_factor: 1,
            }
        }
    }

    pub struct Conv2DOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Conv2DOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_padding(&mut self, padding: Padding) {
            self.fbb_.push_slot::<Padding>(Conv2DOptions::VT_PADDING, padding, Padding::SAME);
        }
        #[inline]
        pub fn add_stride_w(&mut self, stride_w: i32) {
            self.fbb_.push_slot::<i32>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
        }
        #[inline]
        pub fn add_stride_h(&mut self, stride_h: i32) {
            self.fbb_.push_slot::<i32>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_dilation_w_factor(&mut self, dilation_w_factor: i32) {
            self.fbb_.push_slot::<i32>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
        }
        #[inline]
        pub fn add_dilation_h_factor(&mut self, dilation_h_factor: i32) {
            self.fbb_.push_slot::<i32>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> Conv2DOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Conv2DOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Conv2DOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Conv2DOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Conv2DOptions");
            ds.field("padding", &self.padding());
            ds.field("stride_w", &self.stride_w());
            ds.field("stride_h", &self.stride_h());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("dilation_w_factor", &self.dilation_w_factor());
            ds.field("dilation_h_factor", &self.dilation_h_factor());
            ds.finish()
        }
    }
    pub enum Conv3DOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Conv3DOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Conv3DOptions<'a> {
        type Inner = Conv3DOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Conv3DOptions<'a> {
        pub const VT_PADDING: flatbuffers::VOffsetT = 4;
        pub const VT_STRIDE_D: flatbuffers::VOffsetT = 6;
        pub const VT_STRIDE_W: flatbuffers::VOffsetT = 8;
        pub const VT_STRIDE_H: flatbuffers::VOffsetT = 10;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 12;
        pub const VT_DILATION_D_FACTOR: flatbuffers::VOffsetT = 14;
        pub const VT_DILATION_W_FACTOR: flatbuffers::VOffsetT = 16;
        pub const VT_DILATION_H_FACTOR: flatbuffers::VOffsetT = 18;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Conv3DOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args Conv3DOptionsArgs,
        ) -> flatbuffers::WIPOffset<Conv3DOptions<'bldr>> {
            let mut builder = Conv3DOptionsBuilder::new(_fbb);
            builder.add_dilation_h_factor(args.dilation_h_factor);
            builder.add_dilation_w_factor(args.dilation_w_factor);
            builder.add_dilation_d_factor(args.dilation_d_factor);
            builder.add_stride_h(args.stride_h);
            builder.add_stride_w(args.stride_w);
            builder.add_stride_d(args.stride_d);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_padding(args.padding);
            builder.finish()
        }

        #[inline]
        pub fn padding(&self) -> Padding {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<Padding>(Conv3DOptions::VT_PADDING, Some(Padding::SAME)).unwrap()
            }
        }
        #[inline]
        pub fn stride_d(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv3DOptions::VT_STRIDE_D, Some(0)).unwrap() }
        }
        #[inline]
        pub fn stride_w(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv3DOptions::VT_STRIDE_W, Some(0)).unwrap() }
        }
        #[inline]
        pub fn stride_h(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv3DOptions::VT_STRIDE_H, Some(0)).unwrap() }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn dilation_d_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv3DOptions::VT_DILATION_D_FACTOR, Some(1)).unwrap() }
        }
        #[inline]
        pub fn dilation_w_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv3DOptions::VT_DILATION_W_FACTOR, Some(1)).unwrap() }
        }
        #[inline]
        pub fn dilation_h_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Conv3DOptions::VT_DILATION_H_FACTOR, Some(1)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for Conv3DOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<Padding>("padding", Self::VT_PADDING, false)?
                .visit_field::<i32>("stride_d", Self::VT_STRIDE_D, false)?
                .visit_field::<i32>("stride_w", Self::VT_STRIDE_W, false)?
                .visit_field::<i32>("stride_h", Self::VT_STRIDE_H, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<i32>("dilation_d_factor", Self::VT_DILATION_D_FACTOR, false)?
                .visit_field::<i32>("dilation_w_factor", Self::VT_DILATION_W_FACTOR, false)?
                .visit_field::<i32>("dilation_h_factor", Self::VT_DILATION_H_FACTOR, false)?
                .finish();
            Ok(())
        }
    }
    pub struct Conv3DOptionsArgs {
        pub padding: Padding,
        pub stride_d: i32,
        pub stride_w: i32,
        pub stride_h: i32,
        pub fused_activation_function: ActivationFunctionType,
        pub dilation_d_factor: i32,
        pub dilation_w_factor: i32,
        pub dilation_h_factor: i32,
    }
    impl<'a> Default for Conv3DOptionsArgs {
        #[inline]
        fn default() -> Self {
            Conv3DOptionsArgs {
                padding: Padding::SAME,
                stride_d: 0,
                stride_w: 0,
                stride_h: 0,
                fused_activation_function: ActivationFunctionType::NONE,
                dilation_d_factor: 1,
                dilation_w_factor: 1,
                dilation_h_factor: 1,
            }
        }
    }

    pub struct Conv3DOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Conv3DOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_padding(&mut self, padding: Padding) {
            self.fbb_.push_slot::<Padding>(Conv3DOptions::VT_PADDING, padding, Padding::SAME);
        }
        #[inline]
        pub fn add_stride_d(&mut self, stride_d: i32) {
            self.fbb_.push_slot::<i32>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
        }
        #[inline]
        pub fn add_stride_w(&mut self, stride_w: i32) {
            self.fbb_.push_slot::<i32>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
        }
        #[inline]
        pub fn add_stride_h(&mut self, stride_h: i32) {
            self.fbb_.push_slot::<i32>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_dilation_d_factor(&mut self, dilation_d_factor: i32) {
            self.fbb_.push_slot::<i32>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
        }
        #[inline]
        pub fn add_dilation_w_factor(&mut self, dilation_w_factor: i32) {
            self.fbb_.push_slot::<i32>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
        }
        #[inline]
        pub fn add_dilation_h_factor(&mut self, dilation_h_factor: i32) {
            self.fbb_.push_slot::<i32>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> Conv3DOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Conv3DOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Conv3DOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Conv3DOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Conv3DOptions");
            ds.field("padding", &self.padding());
            ds.field("stride_d", &self.stride_d());
            ds.field("stride_w", &self.stride_w());
            ds.field("stride_h", &self.stride_h());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("dilation_d_factor", &self.dilation_d_factor());
            ds.field("dilation_w_factor", &self.dilation_w_factor());
            ds.field("dilation_h_factor", &self.dilation_h_factor());
            ds.finish()
        }
    }
    pub enum Pool2DOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Pool2DOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Pool2DOptions<'a> {
        type Inner = Pool2DOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Pool2DOptions<'a> {
        pub const VT_PADDING: flatbuffers::VOffsetT = 4;
        pub const VT_STRIDE_W: flatbuffers::VOffsetT = 6;
        pub const VT_STRIDE_H: flatbuffers::VOffsetT = 8;
        pub const VT_FILTER_WIDTH: flatbuffers::VOffsetT = 10;
        pub const VT_FILTER_HEIGHT: flatbuffers::VOffsetT = 12;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 14;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Pool2DOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args Pool2DOptionsArgs,
        ) -> flatbuffers::WIPOffset<Pool2DOptions<'bldr>> {
            let mut builder = Pool2DOptionsBuilder::new(_fbb);
            builder.add_filter_height(args.filter_height);
            builder.add_filter_width(args.filter_width);
            builder.add_stride_h(args.stride_h);
            builder.add_stride_w(args.stride_w);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_padding(args.padding);
            builder.finish()
        }

        #[inline]
        pub fn padding(&self) -> Padding {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<Padding>(Pool2DOptions::VT_PADDING, Some(Padding::SAME)).unwrap()
            }
        }
        #[inline]
        pub fn stride_w(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Pool2DOptions::VT_STRIDE_W, Some(0)).unwrap() }
        }
        #[inline]
        pub fn stride_h(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Pool2DOptions::VT_STRIDE_H, Some(0)).unwrap() }
        }
        #[inline]
        pub fn filter_width(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Pool2DOptions::VT_FILTER_WIDTH, Some(0)).unwrap() }
        }
        #[inline]
        pub fn filter_height(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(Pool2DOptions::VT_FILTER_HEIGHT, Some(0)).unwrap() }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for Pool2DOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<Padding>("padding", Self::VT_PADDING, false)?
                .visit_field::<i32>("stride_w", Self::VT_STRIDE_W, false)?
                .visit_field::<i32>("stride_h", Self::VT_STRIDE_H, false)?
                .visit_field::<i32>("filter_width", Self::VT_FILTER_WIDTH, false)?
                .visit_field::<i32>("filter_height", Self::VT_FILTER_HEIGHT, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct Pool2DOptionsArgs {
        pub padding: Padding,
        pub stride_w: i32,
        pub stride_h: i32,
        pub filter_width: i32,
        pub filter_height: i32,
        pub fused_activation_function: ActivationFunctionType,
    }
    impl<'a> Default for Pool2DOptionsArgs {
        #[inline]
        fn default() -> Self {
            Pool2DOptionsArgs {
                padding: Padding::SAME,
                stride_w: 0,
                stride_h: 0,
                filter_width: 0,
                filter_height: 0,
                fused_activation_function: ActivationFunctionType::NONE,
            }
        }
    }

    pub struct Pool2DOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Pool2DOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_padding(&mut self, padding: Padding) {
            self.fbb_.push_slot::<Padding>(Pool2DOptions::VT_PADDING, padding, Padding::SAME);
        }
        #[inline]
        pub fn add_stride_w(&mut self, stride_w: i32) {
            self.fbb_.push_slot::<i32>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
        }
        #[inline]
        pub fn add_stride_h(&mut self, stride_h: i32) {
            self.fbb_.push_slot::<i32>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
        }
        #[inline]
        pub fn add_filter_width(&mut self, filter_width: i32) {
            self.fbb_.push_slot::<i32>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
        }
        #[inline]
        pub fn add_filter_height(&mut self, filter_height: i32) {
            self.fbb_.push_slot::<i32>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> Pool2DOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Pool2DOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Pool2DOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Pool2DOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Pool2DOptions");
            ds.field("padding", &self.padding());
            ds.field("stride_w", &self.stride_w());
            ds.field("stride_h", &self.stride_h());
            ds.field("filter_width", &self.filter_width());
            ds.field("filter_height", &self.filter_height());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.finish()
        }
    }
    pub enum DepthwiseConv2DOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DepthwiseConv2DOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DepthwiseConv2DOptions<'a> {
        type Inner = DepthwiseConv2DOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DepthwiseConv2DOptions<'a> {
        pub const VT_PADDING: flatbuffers::VOffsetT = 4;
        pub const VT_STRIDE_W: flatbuffers::VOffsetT = 6;
        pub const VT_STRIDE_H: flatbuffers::VOffsetT = 8;
        pub const VT_DEPTH_MULTIPLIER: flatbuffers::VOffsetT = 10;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 12;
        pub const VT_DILATION_W_FACTOR: flatbuffers::VOffsetT = 14;
        pub const VT_DILATION_H_FACTOR: flatbuffers::VOffsetT = 16;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DepthwiseConv2DOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args DepthwiseConv2DOptionsArgs,
        ) -> flatbuffers::WIPOffset<DepthwiseConv2DOptions<'bldr>> {
            let mut builder = DepthwiseConv2DOptionsBuilder::new(_fbb);
            builder.add_dilation_h_factor(args.dilation_h_factor);
            builder.add_dilation_w_factor(args.dilation_w_factor);
            builder.add_depth_multiplier(args.depth_multiplier);
            builder.add_stride_h(args.stride_h);
            builder.add_stride_w(args.stride_w);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_padding(args.padding);
            builder.finish()
        }

        #[inline]
        pub fn padding(&self) -> Padding {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<Padding>(DepthwiseConv2DOptions::VT_PADDING, Some(Padding::SAME))
                    .unwrap()
            }
        }
        #[inline]
        pub fn stride_w(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(DepthwiseConv2DOptions::VT_STRIDE_W, Some(0)).unwrap() }
        }
        #[inline]
        pub fn stride_h(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(DepthwiseConv2DOptions::VT_STRIDE_H, Some(0)).unwrap() }
        }
        #[inline]
        pub fn depth_multiplier(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, Some(0)).unwrap()
            }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn dilation_w_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, Some(1)).unwrap()
            }
        }
        #[inline]
        pub fn dilation_h_factor(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, Some(1)).unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for DepthwiseConv2DOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<Padding>("padding", Self::VT_PADDING, false)?
                .visit_field::<i32>("stride_w", Self::VT_STRIDE_W, false)?
                .visit_field::<i32>("stride_h", Self::VT_STRIDE_H, false)?
                .visit_field::<i32>("depth_multiplier", Self::VT_DEPTH_MULTIPLIER, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<i32>("dilation_w_factor", Self::VT_DILATION_W_FACTOR, false)?
                .visit_field::<i32>("dilation_h_factor", Self::VT_DILATION_H_FACTOR, false)?
                .finish();
            Ok(())
        }
    }
    pub struct DepthwiseConv2DOptionsArgs {
        pub padding: Padding,
        pub stride_w: i32,
        pub stride_h: i32,
        pub depth_multiplier: i32,
        pub fused_activation_function: ActivationFunctionType,
        pub dilation_w_factor: i32,
        pub dilation_h_factor: i32,
    }
    impl<'a> Default for DepthwiseConv2DOptionsArgs {
        #[inline]
        fn default() -> Self {
            DepthwiseConv2DOptionsArgs {
                padding: Padding::SAME,
                stride_w: 0,
                stride_h: 0,
                depth_multiplier: 0,
                fused_activation_function: ActivationFunctionType::NONE,
                dilation_w_factor: 1,
                dilation_h_factor: 1,
            }
        }
    }

    pub struct DepthwiseConv2DOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DepthwiseConv2DOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_padding(&mut self, padding: Padding) {
            self.fbb_.push_slot::<Padding>(
                DepthwiseConv2DOptions::VT_PADDING,
                padding,
                Padding::SAME,
            );
        }
        #[inline]
        pub fn add_stride_w(&mut self, stride_w: i32) {
            self.fbb_.push_slot::<i32>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
        }
        #[inline]
        pub fn add_stride_h(&mut self, stride_h: i32) {
            self.fbb_.push_slot::<i32>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
        }
        #[inline]
        pub fn add_depth_multiplier(&mut self, depth_multiplier: i32) {
            self.fbb_.push_slot::<i32>(
                DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER,
                depth_multiplier,
                0,
            );
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_dilation_w_factor(&mut self, dilation_w_factor: i32) {
            self.fbb_.push_slot::<i32>(
                DepthwiseConv2DOptions::VT_DILATION_W_FACTOR,
                dilation_w_factor,
                1,
            );
        }
        #[inline]
        pub fn add_dilation_h_factor(&mut self, dilation_h_factor: i32) {
            self.fbb_.push_slot::<i32>(
                DepthwiseConv2DOptions::VT_DILATION_H_FACTOR,
                dilation_h_factor,
                1,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> DepthwiseConv2DOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DepthwiseConv2DOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DepthwiseConv2DOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DepthwiseConv2DOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DepthwiseConv2DOptions");
            ds.field("padding", &self.padding());
            ds.field("stride_w", &self.stride_w());
            ds.field("stride_h", &self.stride_h());
            ds.field("depth_multiplier", &self.depth_multiplier());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("dilation_w_factor", &self.dilation_w_factor());
            ds.field("dilation_h_factor", &self.dilation_h_factor());
            ds.finish()
        }
    }
    pub enum ConcatEmbeddingsOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ConcatEmbeddingsOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ConcatEmbeddingsOptions<'a> {
        type Inner = ConcatEmbeddingsOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ConcatEmbeddingsOptions<'a> {
        pub const VT_NUM_CHANNELS: flatbuffers::VOffsetT = 4;
        pub const VT_NUM_COLUMNS_PER_CHANNEL: flatbuffers::VOffsetT = 6;
        pub const VT_EMBEDDING_DIM_PER_CHANNEL: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ConcatEmbeddingsOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ConcatEmbeddingsOptionsArgs<'args>,
        ) -> flatbuffers::WIPOffset<ConcatEmbeddingsOptions<'bldr>> {
            let mut builder = ConcatEmbeddingsOptionsBuilder::new(_fbb);
            if let Some(x) = args.embedding_dim_per_channel {
                builder.add_embedding_dim_per_channel(x);
            }
            if let Some(x) = args.num_columns_per_channel {
                builder.add_num_columns_per_channel(x);
            }
            builder.add_num_channels(args.num_channels);
            builder.finish()
        }

        #[inline]
        pub fn num_channels(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, Some(0)).unwrap()
            }
        }
        #[inline]
        pub fn num_columns_per_channel(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL,
                    None,
                )
            }
        }
        #[inline]
        pub fn embedding_dim_per_channel(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for ConcatEmbeddingsOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("num_channels", Self::VT_NUM_CHANNELS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "num_columns_per_channel",
                    Self::VT_NUM_COLUMNS_PER_CHANNEL,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "embedding_dim_per_channel",
                    Self::VT_EMBEDDING_DIM_PER_CHANNEL,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct ConcatEmbeddingsOptionsArgs<'a> {
        pub num_channels: i32,
        pub num_columns_per_channel: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub embedding_dim_per_channel: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
    }
    impl<'a> Default for ConcatEmbeddingsOptionsArgs<'a> {
        #[inline]
        fn default() -> Self {
            ConcatEmbeddingsOptionsArgs {
                num_channels: 0,
                num_columns_per_channel: None,
                embedding_dim_per_channel: None,
            }
        }
    }

    pub struct ConcatEmbeddingsOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ConcatEmbeddingsOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_num_channels(&mut self, num_channels: i32) {
            self.fbb_.push_slot::<i32>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
        }
        #[inline]
        pub fn add_num_columns_per_channel(
            &mut self,
            num_columns_per_channel: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL,
                num_columns_per_channel,
            );
        }
        #[inline]
        pub fn add_embedding_dim_per_channel(
            &mut self,
            embedding_dim_per_channel: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
                embedding_dim_per_channel,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ConcatEmbeddingsOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ConcatEmbeddingsOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ConcatEmbeddingsOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ConcatEmbeddingsOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ConcatEmbeddingsOptions");
            ds.field("num_channels", &self.num_channels());
            ds.field("num_columns_per_channel", &self.num_columns_per_channel());
            ds.field("embedding_dim_per_channel", &self.embedding_dim_per_channel());
            ds.finish()
        }
    }
    pub enum LSHProjectionOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LSHProjectionOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LSHProjectionOptions<'a> {
        type Inner = LSHProjectionOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LSHProjectionOptions<'a> {
        pub const VT_TYPE_: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LSHProjectionOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args LSHProjectionOptionsArgs,
        ) -> flatbuffers::WIPOffset<LSHProjectionOptions<'bldr>> {
            let mut builder = LSHProjectionOptionsBuilder::new(_fbb);
            builder.add_type_(args.type_);
            builder.finish()
        }

        #[inline]
        pub fn type_(&self) -> LSHProjectionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<LSHProjectionType>(
                        LSHProjectionOptions::VT_TYPE_,
                        Some(LSHProjectionType::UNKNOWN),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for LSHProjectionOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<LSHProjectionType>("type_", Self::VT_TYPE_, false)?
                .finish();
            Ok(())
        }
    }
    pub struct LSHProjectionOptionsArgs {
        pub type_: LSHProjectionType,
    }
    impl<'a> Default for LSHProjectionOptionsArgs {
        #[inline]
        fn default() -> Self {
            LSHProjectionOptionsArgs { type_: LSHProjectionType::UNKNOWN }
        }
    }

    pub struct LSHProjectionOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LSHProjectionOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_type_(&mut self, type_: LSHProjectionType) {
            self.fbb_.push_slot::<LSHProjectionType>(
                LSHProjectionOptions::VT_TYPE_,
                type_,
                LSHProjectionType::UNKNOWN,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LSHProjectionOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LSHProjectionOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LSHProjectionOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LSHProjectionOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LSHProjectionOptions");
            ds.field("type_", &self.type_());
            ds.finish()
        }
    }
    pub enum SVDFOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SVDFOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SVDFOptions<'a> {
        type Inner = SVDFOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SVDFOptions<'a> {
        pub const VT_RANK: flatbuffers::VOffsetT = 4;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 6;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SVDFOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SVDFOptionsArgs,
        ) -> flatbuffers::WIPOffset<SVDFOptions<'bldr>> {
            let mut builder = SVDFOptionsBuilder::new(_fbb);
            builder.add_rank(args.rank);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn rank(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(SVDFOptions::VT_RANK, Some(0)).unwrap() }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for SVDFOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("rank", Self::VT_RANK, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct SVDFOptionsArgs {
        pub rank: i32,
        pub fused_activation_function: ActivationFunctionType,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for SVDFOptionsArgs {
        #[inline]
        fn default() -> Self {
            SVDFOptionsArgs {
                rank: 0,
                fused_activation_function: ActivationFunctionType::NONE,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct SVDFOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SVDFOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_rank(&mut self, rank: i32) {
            self.fbb_.push_slot::<i32>(SVDFOptions::VT_RANK, rank, 0);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SVDFOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SVDFOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SVDFOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SVDFOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SVDFOptions");
            ds.field("rank", &self.rank());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum RNNOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct RNNOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for RNNOptions<'a> {
        type Inner = RNNOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> RNNOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            RNNOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args RNNOptionsArgs,
        ) -> flatbuffers::WIPOffset<RNNOptions<'bldr>> {
            let mut builder = RNNOptionsBuilder::new(_fbb);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for RNNOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct RNNOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for RNNOptionsArgs {
        #[inline]
        fn default() -> Self {
            RNNOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct RNNOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> RNNOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RNNOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            RNNOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<RNNOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for RNNOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("RNNOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum SequenceRNNOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SequenceRNNOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SequenceRNNOptions<'a> {
        type Inner = SequenceRNNOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SequenceRNNOptions<'a> {
        pub const VT_TIME_MAJOR: flatbuffers::VOffsetT = 4;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 6;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SequenceRNNOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SequenceRNNOptionsArgs,
        ) -> flatbuffers::WIPOffset<SequenceRNNOptions<'bldr>> {
            let mut builder = SequenceRNNOptionsBuilder::new(_fbb);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_time_major(args.time_major);
            builder.finish()
        }

        #[inline]
        pub fn time_major(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<bool>(SequenceRNNOptions::VT_TIME_MAJOR, Some(false)).unwrap()
            }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for SequenceRNNOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("time_major", Self::VT_TIME_MAJOR, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct SequenceRNNOptionsArgs {
        pub time_major: bool,
        pub fused_activation_function: ActivationFunctionType,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for SequenceRNNOptionsArgs {
        #[inline]
        fn default() -> Self {
            SequenceRNNOptionsArgs {
                time_major: false,
                fused_activation_function: ActivationFunctionType::NONE,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct SequenceRNNOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SequenceRNNOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_time_major(&mut self, time_major: bool) {
            self.fbb_.push_slot::<bool>(SequenceRNNOptions::VT_TIME_MAJOR, time_major, false);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SequenceRNNOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SequenceRNNOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SequenceRNNOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SequenceRNNOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SequenceRNNOptions");
            ds.field("time_major", &self.time_major());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum BidirectionalSequenceRNNOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BidirectionalSequenceRNNOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BidirectionalSequenceRNNOptions<'a> {
        type Inner = BidirectionalSequenceRNNOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BidirectionalSequenceRNNOptions<'a> {
        pub const VT_TIME_MAJOR: flatbuffers::VOffsetT = 4;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 6;
        pub const VT_MERGE_OUTPUTS: flatbuffers::VOffsetT = 8;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BidirectionalSequenceRNNOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args BidirectionalSequenceRNNOptionsArgs,
        ) -> flatbuffers::WIPOffset<BidirectionalSequenceRNNOptions<'bldr>> {
            let mut builder = BidirectionalSequenceRNNOptionsBuilder::new(_fbb);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_merge_outputs(args.merge_outputs);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_time_major(args.time_major);
            builder.finish()
        }

        #[inline]
        pub fn time_major(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, Some(false))
                    .unwrap()
            }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn merge_outputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, Some(false))
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(
                        BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                        Some(false),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for BidirectionalSequenceRNNOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("time_major", Self::VT_TIME_MAJOR, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<bool>("merge_outputs", Self::VT_MERGE_OUTPUTS, false)?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct BidirectionalSequenceRNNOptionsArgs {
        pub time_major: bool,
        pub fused_activation_function: ActivationFunctionType,
        pub merge_outputs: bool,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for BidirectionalSequenceRNNOptionsArgs {
        #[inline]
        fn default() -> Self {
            BidirectionalSequenceRNNOptionsArgs {
                time_major: false,
                fused_activation_function: ActivationFunctionType::NONE,
                merge_outputs: false,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct BidirectionalSequenceRNNOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BidirectionalSequenceRNNOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_time_major(&mut self, time_major: bool) {
            self.fbb_.push_slot::<bool>(
                BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
                time_major,
                false,
            );
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_merge_outputs(&mut self, merge_outputs: bool) {
            self.fbb_.push_slot::<bool>(
                BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS,
                merge_outputs,
                false,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BidirectionalSequenceRNNOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BidirectionalSequenceRNNOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BidirectionalSequenceRNNOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BidirectionalSequenceRNNOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BidirectionalSequenceRNNOptions");
            ds.field("time_major", &self.time_major());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("merge_outputs", &self.merge_outputs());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum FullyConnectedOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct FullyConnectedOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for FullyConnectedOptions<'a> {
        type Inner = FullyConnectedOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> FullyConnectedOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_WEIGHTS_FORMAT: flatbuffers::VOffsetT = 6;
        pub const VT_KEEP_NUM_DIMS: flatbuffers::VOffsetT = 8;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            FullyConnectedOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args FullyConnectedOptionsArgs,
        ) -> flatbuffers::WIPOffset<FullyConnectedOptions<'bldr>> {
            let mut builder = FullyConnectedOptionsBuilder::new(_fbb);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_keep_num_dims(args.keep_num_dims);
            builder.add_weights_format(args.weights_format);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn weights_format(&self) -> FullyConnectedOptionsWeightsFormat {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<FullyConnectedOptionsWeightsFormat>(
                        FullyConnectedOptions::VT_WEIGHTS_FORMAT,
                        Some(FullyConnectedOptionsWeightsFormat::DEFAULT),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn keep_num_dims(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<bool>(FullyConnectedOptions::VT_KEEP_NUM_DIMS, Some(false)).unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for FullyConnectedOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<FullyConnectedOptionsWeightsFormat>(
                    "weights_format",
                    Self::VT_WEIGHTS_FORMAT,
                    false,
                )?
                .visit_field::<bool>("keep_num_dims", Self::VT_KEEP_NUM_DIMS, false)?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct FullyConnectedOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub weights_format: FullyConnectedOptionsWeightsFormat,
        pub keep_num_dims: bool,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for FullyConnectedOptionsArgs {
        #[inline]
        fn default() -> Self {
            FullyConnectedOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                weights_format: FullyConnectedOptionsWeightsFormat::DEFAULT,
                keep_num_dims: false,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct FullyConnectedOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> FullyConnectedOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_weights_format(&mut self, weights_format: FullyConnectedOptionsWeightsFormat) {
            self.fbb_.push_slot::<FullyConnectedOptionsWeightsFormat>(
                FullyConnectedOptions::VT_WEIGHTS_FORMAT,
                weights_format,
                FullyConnectedOptionsWeightsFormat::DEFAULT,
            );
        }
        #[inline]
        pub fn add_keep_num_dims(&mut self, keep_num_dims: bool) {
            self.fbb_.push_slot::<bool>(
                FullyConnectedOptions::VT_KEEP_NUM_DIMS,
                keep_num_dims,
                false,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> FullyConnectedOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            FullyConnectedOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<FullyConnectedOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for FullyConnectedOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("FullyConnectedOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("weights_format", &self.weights_format());
            ds.field("keep_num_dims", &self.keep_num_dims());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum SoftmaxOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SoftmaxOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SoftmaxOptions<'a> {
        type Inner = SoftmaxOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SoftmaxOptions<'a> {
        pub const VT_BETA: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SoftmaxOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SoftmaxOptionsArgs,
        ) -> flatbuffers::WIPOffset<SoftmaxOptions<'bldr>> {
            let mut builder = SoftmaxOptionsBuilder::new(_fbb);
            builder.add_beta(args.beta);
            builder.finish()
        }

        #[inline]
        pub fn beta(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<f32>(SoftmaxOptions::VT_BETA, Some(0.0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for SoftmaxOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.visit_field::<f32>("beta", Self::VT_BETA, false)?.finish();
            Ok(())
        }
    }
    pub struct SoftmaxOptionsArgs {
        pub beta: f32,
    }
    impl<'a> Default for SoftmaxOptionsArgs {
        #[inline]
        fn default() -> Self {
            SoftmaxOptionsArgs { beta: 0.0 }
        }
    }

    pub struct SoftmaxOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SoftmaxOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_beta(&mut self, beta: f32) {
            self.fbb_.push_slot::<f32>(SoftmaxOptions::VT_BETA, beta, 0.0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SoftmaxOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SoftmaxOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SoftmaxOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SoftmaxOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SoftmaxOptions");
            ds.field("beta", &self.beta());
            ds.finish()
        }
    }
    pub enum ConcatenationOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ConcatenationOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ConcatenationOptions<'a> {
        type Inner = ConcatenationOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ConcatenationOptions<'a> {
        pub const VT_AXIS: flatbuffers::VOffsetT = 4;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ConcatenationOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ConcatenationOptionsArgs,
        ) -> flatbuffers::WIPOffset<ConcatenationOptions<'bldr>> {
            let mut builder = ConcatenationOptionsBuilder::new(_fbb);
            builder.add_axis(args.axis);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn axis(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(ConcatenationOptions::VT_AXIS, Some(0)).unwrap() }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for ConcatenationOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("axis", Self::VT_AXIS, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct ConcatenationOptionsArgs {
        pub axis: i32,
        pub fused_activation_function: ActivationFunctionType,
    }
    impl<'a> Default for ConcatenationOptionsArgs {
        #[inline]
        fn default() -> Self {
            ConcatenationOptionsArgs {
                axis: 0,
                fused_activation_function: ActivationFunctionType::NONE,
            }
        }
    }

    pub struct ConcatenationOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ConcatenationOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_axis(&mut self, axis: i32) {
            self.fbb_.push_slot::<i32>(ConcatenationOptions::VT_AXIS, axis, 0);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ConcatenationOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ConcatenationOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ConcatenationOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ConcatenationOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ConcatenationOptions");
            ds.field("axis", &self.axis());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.finish()
        }
    }
    pub enum AddOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct AddOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for AddOptions<'a> {
        type Inner = AddOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> AddOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_POT_SCALE_INT16: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            AddOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args AddOptionsArgs,
        ) -> flatbuffers::WIPOffset<AddOptions<'bldr>> {
            let mut builder = AddOptionsBuilder::new(_fbb);
            builder.add_pot_scale_int16(args.pot_scale_int16);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn pot_scale_int16(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(AddOptions::VT_POT_SCALE_INT16, Some(true)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for AddOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<bool>("pot_scale_int16", Self::VT_POT_SCALE_INT16, false)?
                .finish();
            Ok(())
        }
    }
    pub struct AddOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub pot_scale_int16: bool,
    }
    impl<'a> Default for AddOptionsArgs {
        #[inline]
        fn default() -> Self {
            AddOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                pot_scale_int16: true,
            }
        }
    }

    pub struct AddOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> AddOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_pot_scale_int16(&mut self, pot_scale_int16: bool) {
            self.fbb_.push_slot::<bool>(AddOptions::VT_POT_SCALE_INT16, pot_scale_int16, true);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> AddOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            AddOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<AddOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for AddOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("AddOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("pot_scale_int16", &self.pot_scale_int16());
            ds.finish()
        }
    }
    pub enum MulOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct MulOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for MulOptions<'a> {
        type Inner = MulOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> MulOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            MulOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args MulOptionsArgs,
        ) -> flatbuffers::WIPOffset<MulOptions<'bldr>> {
            let mut builder = MulOptionsBuilder::new(_fbb);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for MulOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct MulOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
    }
    impl<'a> Default for MulOptionsArgs {
        #[inline]
        fn default() -> Self {
            MulOptionsArgs { fused_activation_function: ActivationFunctionType::NONE }
        }
    }

    pub struct MulOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> MulOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MulOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            MulOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<MulOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for MulOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("MulOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.finish()
        }
    }
    pub enum L2NormOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct L2NormOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for L2NormOptions<'a> {
        type Inner = L2NormOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> L2NormOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            L2NormOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args L2NormOptionsArgs,
        ) -> flatbuffers::WIPOffset<L2NormOptions<'bldr>> {
            let mut builder = L2NormOptionsBuilder::new(_fbb);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for L2NormOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct L2NormOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
    }
    impl<'a> Default for L2NormOptionsArgs {
        #[inline]
        fn default() -> Self {
            L2NormOptionsArgs { fused_activation_function: ActivationFunctionType::NONE }
        }
    }

    pub struct L2NormOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> L2NormOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> L2NormOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            L2NormOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<L2NormOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for L2NormOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("L2NormOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.finish()
        }
    }
    pub enum LocalResponseNormalizationOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LocalResponseNormalizationOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LocalResponseNormalizationOptions<'a> {
        type Inner = LocalResponseNormalizationOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LocalResponseNormalizationOptions<'a> {
        pub const VT_RADIUS: flatbuffers::VOffsetT = 4;
        pub const VT_BIAS: flatbuffers::VOffsetT = 6;
        pub const VT_ALPHA: flatbuffers::VOffsetT = 8;
        pub const VT_BETA: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LocalResponseNormalizationOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args LocalResponseNormalizationOptionsArgs,
        ) -> flatbuffers::WIPOffset<LocalResponseNormalizationOptions<'bldr>> {
            let mut builder = LocalResponseNormalizationOptionsBuilder::new(_fbb);
            builder.add_beta(args.beta);
            builder.add_alpha(args.alpha);
            builder.add_bias(args.bias);
            builder.add_radius(args.radius);
            builder.finish()
        }

        #[inline]
        pub fn radius(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(LocalResponseNormalizationOptions::VT_RADIUS, Some(0)).unwrap()
            }
        }
        #[inline]
        pub fn bias(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<f32>(LocalResponseNormalizationOptions::VT_BIAS, Some(0.0)).unwrap()
            }
        }
        #[inline]
        pub fn alpha(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<f32>(LocalResponseNormalizationOptions::VT_ALPHA, Some(0.0))
                    .unwrap()
            }
        }
        #[inline]
        pub fn beta(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<f32>(LocalResponseNormalizationOptions::VT_BETA, Some(0.0)).unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for LocalResponseNormalizationOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("radius", Self::VT_RADIUS, false)?
                .visit_field::<f32>("bias", Self::VT_BIAS, false)?
                .visit_field::<f32>("alpha", Self::VT_ALPHA, false)?
                .visit_field::<f32>("beta", Self::VT_BETA, false)?
                .finish();
            Ok(())
        }
    }
    pub struct LocalResponseNormalizationOptionsArgs {
        pub radius: i32,
        pub bias: f32,
        pub alpha: f32,
        pub beta: f32,
    }
    impl<'a> Default for LocalResponseNormalizationOptionsArgs {
        #[inline]
        fn default() -> Self {
            LocalResponseNormalizationOptionsArgs { radius: 0, bias: 0.0, alpha: 0.0, beta: 0.0 }
        }
    }

    pub struct LocalResponseNormalizationOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LocalResponseNormalizationOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_radius(&mut self, radius: i32) {
            self.fbb_.push_slot::<i32>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
        }
        #[inline]
        pub fn add_bias(&mut self, bias: f32) {
            self.fbb_.push_slot::<f32>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0);
        }
        #[inline]
        pub fn add_alpha(&mut self, alpha: f32) {
            self.fbb_.push_slot::<f32>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0);
        }
        #[inline]
        pub fn add_beta(&mut self, beta: f32) {
            self.fbb_.push_slot::<f32>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LocalResponseNormalizationOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LocalResponseNormalizationOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LocalResponseNormalizationOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LocalResponseNormalizationOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LocalResponseNormalizationOptions");
            ds.field("radius", &self.radius());
            ds.field("bias", &self.bias());
            ds.field("alpha", &self.alpha());
            ds.field("beta", &self.beta());
            ds.finish()
        }
    }
    pub enum LSTMOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LSTMOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LSTMOptions<'a> {
        type Inner = LSTMOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LSTMOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_CELL_CLIP: flatbuffers::VOffsetT = 6;
        pub const VT_PROJ_CLIP: flatbuffers::VOffsetT = 8;
        pub const VT_KERNEL_TYPE: flatbuffers::VOffsetT = 10;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 12;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LSTMOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args LSTMOptionsArgs,
        ) -> flatbuffers::WIPOffset<LSTMOptions<'bldr>> {
            let mut builder = LSTMOptionsBuilder::new(_fbb);
            builder.add_proj_clip(args.proj_clip);
            builder.add_cell_clip(args.cell_clip);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_kernel_type(args.kernel_type);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn cell_clip(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<f32>(LSTMOptions::VT_CELL_CLIP, Some(0.0)).unwrap() }
        }
        #[inline]
        pub fn proj_clip(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<f32>(LSTMOptions::VT_PROJ_CLIP, Some(0.0)).unwrap() }
        }
        #[inline]
        pub fn kernel_type(&self) -> LSTMKernelType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<LSTMKernelType>(LSTMOptions::VT_KERNEL_TYPE, Some(LSTMKernelType::FULL))
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for LSTMOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<f32>("cell_clip", Self::VT_CELL_CLIP, false)?
                .visit_field::<f32>("proj_clip", Self::VT_PROJ_CLIP, false)?
                .visit_field::<LSTMKernelType>("kernel_type", Self::VT_KERNEL_TYPE, false)?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct LSTMOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub cell_clip: f32,
        pub proj_clip: f32,
        pub kernel_type: LSTMKernelType,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for LSTMOptionsArgs {
        #[inline]
        fn default() -> Self {
            LSTMOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                cell_clip: 0.0,
                proj_clip: 0.0,
                kernel_type: LSTMKernelType::FULL,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct LSTMOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LSTMOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_cell_clip(&mut self, cell_clip: f32) {
            self.fbb_.push_slot::<f32>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0);
        }
        #[inline]
        pub fn add_proj_clip(&mut self, proj_clip: f32) {
            self.fbb_.push_slot::<f32>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0);
        }
        #[inline]
        pub fn add_kernel_type(&mut self, kernel_type: LSTMKernelType) {
            self.fbb_.push_slot::<LSTMKernelType>(
                LSTMOptions::VT_KERNEL_TYPE,
                kernel_type,
                LSTMKernelType::FULL,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LSTMOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LSTMOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LSTMOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LSTMOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LSTMOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("cell_clip", &self.cell_clip());
            ds.field("proj_clip", &self.proj_clip());
            ds.field("kernel_type", &self.kernel_type());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum UnidirectionalSequenceLSTMOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UnidirectionalSequenceLSTMOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UnidirectionalSequenceLSTMOptions<'a> {
        type Inner = UnidirectionalSequenceLSTMOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UnidirectionalSequenceLSTMOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_CELL_CLIP: flatbuffers::VOffsetT = 6;
        pub const VT_PROJ_CLIP: flatbuffers::VOffsetT = 8;
        pub const VT_TIME_MAJOR: flatbuffers::VOffsetT = 10;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 12;
        pub const VT_DIAGONAL_RECURRENT_TENSORS: flatbuffers::VOffsetT = 14;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UnidirectionalSequenceLSTMOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args UnidirectionalSequenceLSTMOptionsArgs,
        ) -> flatbuffers::WIPOffset<UnidirectionalSequenceLSTMOptions<'bldr>> {
            let mut builder = UnidirectionalSequenceLSTMOptionsBuilder::new(_fbb);
            builder.add_proj_clip(args.proj_clip);
            builder.add_cell_clip(args.cell_clip);
            builder.add_diagonal_recurrent_tensors(args.diagonal_recurrent_tensors);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_time_major(args.time_major);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn cell_clip(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<f32>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, Some(0.0))
                    .unwrap()
            }
        }
        #[inline]
        pub fn proj_clip(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<f32>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, Some(0.0))
                    .unwrap()
            }
        }
        #[inline]
        pub fn time_major(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, Some(false))
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(
                        UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                        Some(false),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn diagonal_recurrent_tensors(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(
                        UnidirectionalSequenceLSTMOptions::VT_DIAGONAL_RECURRENT_TENSORS,
                        Some(false),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for UnidirectionalSequenceLSTMOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<f32>("cell_clip", Self::VT_CELL_CLIP, false)?
                .visit_field::<f32>("proj_clip", Self::VT_PROJ_CLIP, false)?
                .visit_field::<bool>("time_major", Self::VT_TIME_MAJOR, false)?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .visit_field::<bool>(
                    "diagonal_recurrent_tensors",
                    Self::VT_DIAGONAL_RECURRENT_TENSORS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct UnidirectionalSequenceLSTMOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub cell_clip: f32,
        pub proj_clip: f32,
        pub time_major: bool,
        pub asymmetric_quantize_inputs: bool,
        pub diagonal_recurrent_tensors: bool,
    }
    impl<'a> Default for UnidirectionalSequenceLSTMOptionsArgs {
        #[inline]
        fn default() -> Self {
            UnidirectionalSequenceLSTMOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                cell_clip: 0.0,
                proj_clip: 0.0,
                time_major: false,
                asymmetric_quantize_inputs: false,
                diagonal_recurrent_tensors: false,
            }
        }
    }

    pub struct UnidirectionalSequenceLSTMOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UnidirectionalSequenceLSTMOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_cell_clip(&mut self, cell_clip: f32) {
            self.fbb_.push_slot::<f32>(
                UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP,
                cell_clip,
                0.0,
            );
        }
        #[inline]
        pub fn add_proj_clip(&mut self, proj_clip: f32) {
            self.fbb_.push_slot::<f32>(
                UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP,
                proj_clip,
                0.0,
            );
        }
        #[inline]
        pub fn add_time_major(&mut self, time_major: bool) {
            self.fbb_.push_slot::<bool>(
                UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
                time_major,
                false,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn add_diagonal_recurrent_tensors(&mut self, diagonal_recurrent_tensors: bool) {
            self.fbb_.push_slot::<bool>(
                UnidirectionalSequenceLSTMOptions::VT_DIAGONAL_RECURRENT_TENSORS,
                diagonal_recurrent_tensors,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UnidirectionalSequenceLSTMOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UnidirectionalSequenceLSTMOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UnidirectionalSequenceLSTMOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UnidirectionalSequenceLSTMOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UnidirectionalSequenceLSTMOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("cell_clip", &self.cell_clip());
            ds.field("proj_clip", &self.proj_clip());
            ds.field("time_major", &self.time_major());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.field("diagonal_recurrent_tensors", &self.diagonal_recurrent_tensors());
            ds.finish()
        }
    }
    pub enum BidirectionalSequenceLSTMOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BidirectionalSequenceLSTMOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BidirectionalSequenceLSTMOptions<'a> {
        type Inner = BidirectionalSequenceLSTMOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BidirectionalSequenceLSTMOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_CELL_CLIP: flatbuffers::VOffsetT = 6;
        pub const VT_PROJ_CLIP: flatbuffers::VOffsetT = 8;
        pub const VT_MERGE_OUTPUTS: flatbuffers::VOffsetT = 10;
        pub const VT_TIME_MAJOR: flatbuffers::VOffsetT = 12;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 14;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BidirectionalSequenceLSTMOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args BidirectionalSequenceLSTMOptionsArgs,
        ) -> flatbuffers::WIPOffset<BidirectionalSequenceLSTMOptions<'bldr>> {
            let mut builder = BidirectionalSequenceLSTMOptionsBuilder::new(_fbb);
            builder.add_proj_clip(args.proj_clip);
            builder.add_cell_clip(args.cell_clip);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_time_major(args.time_major);
            builder.add_merge_outputs(args.merge_outputs);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn cell_clip(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<f32>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, Some(0.0))
                    .unwrap()
            }
        }
        #[inline]
        pub fn proj_clip(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<f32>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, Some(0.0))
                    .unwrap()
            }
        }
        #[inline]
        pub fn merge_outputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, Some(false))
                    .unwrap()
            }
        }
        #[inline]
        pub fn time_major(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, Some(true))
                    .unwrap()
            }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(
                        BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                        Some(false),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for BidirectionalSequenceLSTMOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<f32>("cell_clip", Self::VT_CELL_CLIP, false)?
                .visit_field::<f32>("proj_clip", Self::VT_PROJ_CLIP, false)?
                .visit_field::<bool>("merge_outputs", Self::VT_MERGE_OUTPUTS, false)?
                .visit_field::<bool>("time_major", Self::VT_TIME_MAJOR, false)?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct BidirectionalSequenceLSTMOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub cell_clip: f32,
        pub proj_clip: f32,
        pub merge_outputs: bool,
        pub time_major: bool,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for BidirectionalSequenceLSTMOptionsArgs {
        #[inline]
        fn default() -> Self {
            BidirectionalSequenceLSTMOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                cell_clip: 0.0,
                proj_clip: 0.0,
                merge_outputs: false,
                time_major: true,
                asymmetric_quantize_inputs: false,
            }
        }
    }

    pub struct BidirectionalSequenceLSTMOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BidirectionalSequenceLSTMOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_cell_clip(&mut self, cell_clip: f32) {
            self.fbb_.push_slot::<f32>(
                BidirectionalSequenceLSTMOptions::VT_CELL_CLIP,
                cell_clip,
                0.0,
            );
        }
        #[inline]
        pub fn add_proj_clip(&mut self, proj_clip: f32) {
            self.fbb_.push_slot::<f32>(
                BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP,
                proj_clip,
                0.0,
            );
        }
        #[inline]
        pub fn add_merge_outputs(&mut self, merge_outputs: bool) {
            self.fbb_.push_slot::<bool>(
                BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS,
                merge_outputs,
                false,
            );
        }
        #[inline]
        pub fn add_time_major(&mut self, time_major: bool) {
            self.fbb_.push_slot::<bool>(
                BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
                time_major,
                true,
            );
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BidirectionalSequenceLSTMOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BidirectionalSequenceLSTMOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BidirectionalSequenceLSTMOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BidirectionalSequenceLSTMOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BidirectionalSequenceLSTMOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("cell_clip", &self.cell_clip());
            ds.field("proj_clip", &self.proj_clip());
            ds.field("merge_outputs", &self.merge_outputs());
            ds.field("time_major", &self.time_major());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum ResizeBilinearOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ResizeBilinearOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ResizeBilinearOptions<'a> {
        type Inner = ResizeBilinearOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ResizeBilinearOptions<'a> {
        pub const VT_ALIGN_CORNERS: flatbuffers::VOffsetT = 8;
        pub const VT_HALF_PIXEL_CENTERS: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ResizeBilinearOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ResizeBilinearOptionsArgs,
        ) -> flatbuffers::WIPOffset<ResizeBilinearOptions<'bldr>> {
            let mut builder = ResizeBilinearOptionsBuilder::new(_fbb);
            builder.add_half_pixel_centers(args.half_pixel_centers);
            builder.add_align_corners(args.align_corners);
            builder.finish()
        }

        #[inline]
        pub fn align_corners(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<bool>(ResizeBilinearOptions::VT_ALIGN_CORNERS, Some(false)).unwrap()
            }
        }
        #[inline]
        pub fn half_pixel_centers(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for ResizeBilinearOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("align_corners", Self::VT_ALIGN_CORNERS, false)?
                .visit_field::<bool>("half_pixel_centers", Self::VT_HALF_PIXEL_CENTERS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ResizeBilinearOptionsArgs {
        pub align_corners: bool,
        pub half_pixel_centers: bool,
    }
    impl<'a> Default for ResizeBilinearOptionsArgs {
        #[inline]
        fn default() -> Self {
            ResizeBilinearOptionsArgs { align_corners: false, half_pixel_centers: false }
        }
    }

    pub struct ResizeBilinearOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ResizeBilinearOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_align_corners(&mut self, align_corners: bool) {
            self.fbb_.push_slot::<bool>(
                ResizeBilinearOptions::VT_ALIGN_CORNERS,
                align_corners,
                false,
            );
        }
        #[inline]
        pub fn add_half_pixel_centers(&mut self, half_pixel_centers: bool) {
            self.fbb_.push_slot::<bool>(
                ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS,
                half_pixel_centers,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ResizeBilinearOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ResizeBilinearOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ResizeBilinearOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ResizeBilinearOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ResizeBilinearOptions");
            ds.field("align_corners", &self.align_corners());
            ds.field("half_pixel_centers", &self.half_pixel_centers());
            ds.finish()
        }
    }
    pub enum ResizeNearestNeighborOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ResizeNearestNeighborOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ResizeNearestNeighborOptions<'a> {
        type Inner = ResizeNearestNeighborOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ResizeNearestNeighborOptions<'a> {
        pub const VT_ALIGN_CORNERS: flatbuffers::VOffsetT = 4;
        pub const VT_HALF_PIXEL_CENTERS: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ResizeNearestNeighborOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ResizeNearestNeighborOptionsArgs,
        ) -> flatbuffers::WIPOffset<ResizeNearestNeighborOptions<'bldr>> {
            let mut builder = ResizeNearestNeighborOptionsBuilder::new(_fbb);
            builder.add_half_pixel_centers(args.half_pixel_centers);
            builder.add_align_corners(args.align_corners);
            builder.finish()
        }

        #[inline]
        pub fn align_corners(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, Some(false))
                    .unwrap()
            }
        }
        #[inline]
        pub fn half_pixel_centers(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for ResizeNearestNeighborOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("align_corners", Self::VT_ALIGN_CORNERS, false)?
                .visit_field::<bool>("half_pixel_centers", Self::VT_HALF_PIXEL_CENTERS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ResizeNearestNeighborOptionsArgs {
        pub align_corners: bool,
        pub half_pixel_centers: bool,
    }
    impl<'a> Default for ResizeNearestNeighborOptionsArgs {
        #[inline]
        fn default() -> Self {
            ResizeNearestNeighborOptionsArgs { align_corners: false, half_pixel_centers: false }
        }
    }

    pub struct ResizeNearestNeighborOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ResizeNearestNeighborOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_align_corners(&mut self, align_corners: bool) {
            self.fbb_.push_slot::<bool>(
                ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
                align_corners,
                false,
            );
        }
        #[inline]
        pub fn add_half_pixel_centers(&mut self, half_pixel_centers: bool) {
            self.fbb_.push_slot::<bool>(
                ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
                half_pixel_centers,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ResizeNearestNeighborOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ResizeNearestNeighborOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ResizeNearestNeighborOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ResizeNearestNeighborOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ResizeNearestNeighborOptions");
            ds.field("align_corners", &self.align_corners());
            ds.field("half_pixel_centers", &self.half_pixel_centers());
            ds.finish()
        }
    }
    pub enum CallOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct CallOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for CallOptions<'a> {
        type Inner = CallOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> CallOptions<'a> {
        pub const VT_SUBGRAPH: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            CallOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args CallOptionsArgs,
        ) -> flatbuffers::WIPOffset<CallOptions<'bldr>> {
            let mut builder = CallOptionsBuilder::new(_fbb);
            builder.add_subgraph(args.subgraph);
            builder.finish()
        }

        #[inline]
        pub fn subgraph(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(CallOptions::VT_SUBGRAPH, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for CallOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.visit_field::<u32>("subgraph", Self::VT_SUBGRAPH, false)?.finish();
            Ok(())
        }
    }
    pub struct CallOptionsArgs {
        pub subgraph: u32,
    }
    impl<'a> Default for CallOptionsArgs {
        #[inline]
        fn default() -> Self {
            CallOptionsArgs { subgraph: 0 }
        }
    }

    pub struct CallOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> CallOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_subgraph(&mut self, subgraph: u32) {
            self.fbb_.push_slot::<u32>(CallOptions::VT_SUBGRAPH, subgraph, 0);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> CallOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            CallOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<CallOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for CallOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("CallOptions");
            ds.field("subgraph", &self.subgraph());
            ds.finish()
        }
    }
    pub enum PadOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct PadOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for PadOptions<'a> {
        type Inner = PadOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> PadOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            PadOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args PadOptionsArgs,
        ) -> flatbuffers::WIPOffset<PadOptions<'bldr>> {
            let mut builder = PadOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for PadOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct PadOptionsArgs {}
    impl<'a> Default for PadOptionsArgs {
        #[inline]
        fn default() -> Self {
            PadOptionsArgs {}
        }
    }

    pub struct PadOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> PadOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> PadOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            PadOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<PadOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for PadOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("PadOptions");
            ds.finish()
        }
    }
    pub enum PadV2OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct PadV2Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for PadV2Options<'a> {
        type Inner = PadV2Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> PadV2Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            PadV2Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args PadV2OptionsArgs,
        ) -> flatbuffers::WIPOffset<PadV2Options<'bldr>> {
            let mut builder = PadV2OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for PadV2Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct PadV2OptionsArgs {}
    impl<'a> Default for PadV2OptionsArgs {
        #[inline]
        fn default() -> Self {
            PadV2OptionsArgs {}
        }
    }

    pub struct PadV2OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> PadV2OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> PadV2OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            PadV2OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<PadV2Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for PadV2Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("PadV2Options");
            ds.finish()
        }
    }
    pub enum ReshapeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ReshapeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ReshapeOptions<'a> {
        type Inner = ReshapeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ReshapeOptions<'a> {
        pub const VT_NEW_SHAPE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ReshapeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ReshapeOptionsArgs<'args>,
        ) -> flatbuffers::WIPOffset<ReshapeOptions<'bldr>> {
            let mut builder = ReshapeOptionsBuilder::new(_fbb);
            if let Some(x) = args.new_shape {
                builder.add_new_shape(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn new_shape(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    ReshapeOptions::VT_NEW_SHAPE,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for ReshapeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "new_shape",
                    Self::VT_NEW_SHAPE,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct ReshapeOptionsArgs<'a> {
        pub new_shape: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
    }
    impl<'a> Default for ReshapeOptionsArgs<'a> {
        #[inline]
        fn default() -> Self {
            ReshapeOptionsArgs { new_shape: None }
        }
    }

    pub struct ReshapeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ReshapeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_new_shape(
            &mut self,
            new_shape: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                ReshapeOptions::VT_NEW_SHAPE,
                new_shape,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ReshapeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ReshapeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ReshapeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ReshapeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ReshapeOptions");
            ds.field("new_shape", &self.new_shape());
            ds.finish()
        }
    }
    pub enum SpaceToBatchNDOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SpaceToBatchNDOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SpaceToBatchNDOptions<'a> {
        type Inner = SpaceToBatchNDOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SpaceToBatchNDOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SpaceToBatchNDOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SpaceToBatchNDOptionsArgs,
        ) -> flatbuffers::WIPOffset<SpaceToBatchNDOptions<'bldr>> {
            let mut builder = SpaceToBatchNDOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SpaceToBatchNDOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SpaceToBatchNDOptionsArgs {}
    impl<'a> Default for SpaceToBatchNDOptionsArgs {
        #[inline]
        fn default() -> Self {
            SpaceToBatchNDOptionsArgs {}
        }
    }

    pub struct SpaceToBatchNDOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SpaceToBatchNDOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SpaceToBatchNDOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SpaceToBatchNDOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SpaceToBatchNDOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SpaceToBatchNDOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SpaceToBatchNDOptions");
            ds.finish()
        }
    }
    pub enum BatchToSpaceNDOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BatchToSpaceNDOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BatchToSpaceNDOptions<'a> {
        type Inner = BatchToSpaceNDOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BatchToSpaceNDOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BatchToSpaceNDOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args BatchToSpaceNDOptionsArgs,
        ) -> flatbuffers::WIPOffset<BatchToSpaceNDOptions<'bldr>> {
            let mut builder = BatchToSpaceNDOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for BatchToSpaceNDOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct BatchToSpaceNDOptionsArgs {}
    impl<'a> Default for BatchToSpaceNDOptionsArgs {
        #[inline]
        fn default() -> Self {
            BatchToSpaceNDOptionsArgs {}
        }
    }

    pub struct BatchToSpaceNDOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BatchToSpaceNDOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BatchToSpaceNDOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BatchToSpaceNDOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BatchToSpaceNDOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BatchToSpaceNDOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BatchToSpaceNDOptions");
            ds.finish()
        }
    }
    pub enum SkipGramOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SkipGramOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SkipGramOptions<'a> {
        type Inner = SkipGramOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SkipGramOptions<'a> {
        pub const VT_NGRAM_SIZE: flatbuffers::VOffsetT = 4;
        pub const VT_MAX_SKIP_SIZE: flatbuffers::VOffsetT = 6;
        pub const VT_INCLUDE_ALL_NGRAMS: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SkipGramOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SkipGramOptionsArgs,
        ) -> flatbuffers::WIPOffset<SkipGramOptions<'bldr>> {
            let mut builder = SkipGramOptionsBuilder::new(_fbb);
            builder.add_max_skip_size(args.max_skip_size);
            builder.add_ngram_size(args.ngram_size);
            builder.add_include_all_ngrams(args.include_all_ngrams);
            builder.finish()
        }

        #[inline]
        pub fn ngram_size(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(SkipGramOptions::VT_NGRAM_SIZE, Some(0)).unwrap() }
        }
        #[inline]
        pub fn max_skip_size(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(SkipGramOptions::VT_MAX_SKIP_SIZE, Some(0)).unwrap() }
        }
        #[inline]
        pub fn include_all_ngrams(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<bool>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, Some(false)).unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for SkipGramOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("ngram_size", Self::VT_NGRAM_SIZE, false)?
                .visit_field::<i32>("max_skip_size", Self::VT_MAX_SKIP_SIZE, false)?
                .visit_field::<bool>("include_all_ngrams", Self::VT_INCLUDE_ALL_NGRAMS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SkipGramOptionsArgs {
        pub ngram_size: i32,
        pub max_skip_size: i32,
        pub include_all_ngrams: bool,
    }
    impl<'a> Default for SkipGramOptionsArgs {
        #[inline]
        fn default() -> Self {
            SkipGramOptionsArgs { ngram_size: 0, max_skip_size: 0, include_all_ngrams: false }
        }
    }

    pub struct SkipGramOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SkipGramOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_ngram_size(&mut self, ngram_size: i32) {
            self.fbb_.push_slot::<i32>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
        }
        #[inline]
        pub fn add_max_skip_size(&mut self, max_skip_size: i32) {
            self.fbb_.push_slot::<i32>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
        }
        #[inline]
        pub fn add_include_all_ngrams(&mut self, include_all_ngrams: bool) {
            self.fbb_.push_slot::<bool>(
                SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
                include_all_ngrams,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SkipGramOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SkipGramOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SkipGramOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SkipGramOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SkipGramOptions");
            ds.field("ngram_size", &self.ngram_size());
            ds.field("max_skip_size", &self.max_skip_size());
            ds.field("include_all_ngrams", &self.include_all_ngrams());
            ds.finish()
        }
    }
    pub enum SpaceToDepthOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SpaceToDepthOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SpaceToDepthOptions<'a> {
        type Inner = SpaceToDepthOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SpaceToDepthOptions<'a> {
        pub const VT_BLOCK_SIZE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SpaceToDepthOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SpaceToDepthOptionsArgs,
        ) -> flatbuffers::WIPOffset<SpaceToDepthOptions<'bldr>> {
            let mut builder = SpaceToDepthOptionsBuilder::new(_fbb);
            builder.add_block_size(args.block_size);
            builder.finish()
        }

        #[inline]
        pub fn block_size(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(SpaceToDepthOptions::VT_BLOCK_SIZE, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for SpaceToDepthOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("block_size", Self::VT_BLOCK_SIZE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SpaceToDepthOptionsArgs {
        pub block_size: i32,
    }
    impl<'a> Default for SpaceToDepthOptionsArgs {
        #[inline]
        fn default() -> Self {
            SpaceToDepthOptionsArgs { block_size: 0 }
        }
    }

    pub struct SpaceToDepthOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SpaceToDepthOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_block_size(&mut self, block_size: i32) {
            self.fbb_.push_slot::<i32>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SpaceToDepthOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SpaceToDepthOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SpaceToDepthOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SpaceToDepthOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SpaceToDepthOptions");
            ds.field("block_size", &self.block_size());
            ds.finish()
        }
    }
    pub enum DepthToSpaceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DepthToSpaceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DepthToSpaceOptions<'a> {
        type Inner = DepthToSpaceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DepthToSpaceOptions<'a> {
        pub const VT_BLOCK_SIZE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DepthToSpaceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args DepthToSpaceOptionsArgs,
        ) -> flatbuffers::WIPOffset<DepthToSpaceOptions<'bldr>> {
            let mut builder = DepthToSpaceOptionsBuilder::new(_fbb);
            builder.add_block_size(args.block_size);
            builder.finish()
        }

        #[inline]
        pub fn block_size(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(DepthToSpaceOptions::VT_BLOCK_SIZE, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for DepthToSpaceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("block_size", Self::VT_BLOCK_SIZE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct DepthToSpaceOptionsArgs {
        pub block_size: i32,
    }
    impl<'a> Default for DepthToSpaceOptionsArgs {
        #[inline]
        fn default() -> Self {
            DepthToSpaceOptionsArgs { block_size: 0 }
        }
    }

    pub struct DepthToSpaceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DepthToSpaceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_block_size(&mut self, block_size: i32) {
            self.fbb_.push_slot::<i32>(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> DepthToSpaceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DepthToSpaceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DepthToSpaceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DepthToSpaceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DepthToSpaceOptions");
            ds.field("block_size", &self.block_size());
            ds.finish()
        }
    }
    pub enum SubOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SubOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SubOptions<'a> {
        type Inner = SubOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SubOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;
        pub const VT_POT_SCALE_INT16: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SubOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SubOptionsArgs,
        ) -> flatbuffers::WIPOffset<SubOptions<'bldr>> {
            let mut builder = SubOptionsBuilder::new(_fbb);
            builder.add_pot_scale_int16(args.pot_scale_int16);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn pot_scale_int16(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(SubOptions::VT_POT_SCALE_INT16, Some(true)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for SubOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .visit_field::<bool>("pot_scale_int16", Self::VT_POT_SCALE_INT16, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SubOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
        pub pot_scale_int16: bool,
    }
    impl<'a> Default for SubOptionsArgs {
        #[inline]
        fn default() -> Self {
            SubOptionsArgs {
                fused_activation_function: ActivationFunctionType::NONE,
                pot_scale_int16: true,
            }
        }
    }

    pub struct SubOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SubOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn add_pot_scale_int16(&mut self, pot_scale_int16: bool) {
            self.fbb_.push_slot::<bool>(SubOptions::VT_POT_SCALE_INT16, pot_scale_int16, true);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SubOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SubOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SubOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SubOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SubOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.field("pot_scale_int16", &self.pot_scale_int16());
            ds.finish()
        }
    }
    pub enum DivOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DivOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DivOptions<'a> {
        type Inner = DivOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DivOptions<'a> {
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DivOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args DivOptionsArgs,
        ) -> flatbuffers::WIPOffset<DivOptions<'bldr>> {
            let mut builder = DivOptionsBuilder::new(_fbb);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.finish()
        }

        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for DivOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct DivOptionsArgs {
        pub fused_activation_function: ActivationFunctionType,
    }
    impl<'a> Default for DivOptionsArgs {
        #[inline]
        fn default() -> Self {
            DivOptionsArgs { fused_activation_function: ActivationFunctionType::NONE }
        }
    }

    pub struct DivOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DivOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DivOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DivOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DivOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DivOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DivOptions");
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.finish()
        }
    }
    pub enum TopKV2OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct TopKV2Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for TopKV2Options<'a> {
        type Inner = TopKV2Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> TopKV2Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            TopKV2Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args TopKV2OptionsArgs,
        ) -> flatbuffers::WIPOffset<TopKV2Options<'bldr>> {
            let mut builder = TopKV2OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for TopKV2Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct TopKV2OptionsArgs {}
    impl<'a> Default for TopKV2OptionsArgs {
        #[inline]
        fn default() -> Self {
            TopKV2OptionsArgs {}
        }
    }

    pub struct TopKV2OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> TopKV2OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> TopKV2OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            TopKV2OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<TopKV2Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for TopKV2Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("TopKV2Options");
            ds.finish()
        }
    }
    pub enum EmbeddingLookupSparseOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct EmbeddingLookupSparseOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for EmbeddingLookupSparseOptions<'a> {
        type Inner = EmbeddingLookupSparseOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> EmbeddingLookupSparseOptions<'a> {
        pub const VT_COMBINER: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            EmbeddingLookupSparseOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args EmbeddingLookupSparseOptionsArgs,
        ) -> flatbuffers::WIPOffset<EmbeddingLookupSparseOptions<'bldr>> {
            let mut builder = EmbeddingLookupSparseOptionsBuilder::new(_fbb);
            builder.add_combiner(args.combiner);
            builder.finish()
        }

        #[inline]
        pub fn combiner(&self) -> CombinerType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<CombinerType>(
                        EmbeddingLookupSparseOptions::VT_COMBINER,
                        Some(CombinerType::SUM),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for EmbeddingLookupSparseOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<CombinerType>("combiner", Self::VT_COMBINER, false)?
                .finish();
            Ok(())
        }
    }
    pub struct EmbeddingLookupSparseOptionsArgs {
        pub combiner: CombinerType,
    }
    impl<'a> Default for EmbeddingLookupSparseOptionsArgs {
        #[inline]
        fn default() -> Self {
            EmbeddingLookupSparseOptionsArgs { combiner: CombinerType::SUM }
        }
    }

    pub struct EmbeddingLookupSparseOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> EmbeddingLookupSparseOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_combiner(&mut self, combiner: CombinerType) {
            self.fbb_.push_slot::<CombinerType>(
                EmbeddingLookupSparseOptions::VT_COMBINER,
                combiner,
                CombinerType::SUM,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> EmbeddingLookupSparseOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            EmbeddingLookupSparseOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<EmbeddingLookupSparseOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for EmbeddingLookupSparseOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("EmbeddingLookupSparseOptions");
            ds.field("combiner", &self.combiner());
            ds.finish()
        }
    }
    pub enum GatherOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct GatherOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for GatherOptions<'a> {
        type Inner = GatherOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> GatherOptions<'a> {
        pub const VT_AXIS: flatbuffers::VOffsetT = 4;
        pub const VT_BATCH_DIMS: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            GatherOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args GatherOptionsArgs,
        ) -> flatbuffers::WIPOffset<GatherOptions<'bldr>> {
            let mut builder = GatherOptionsBuilder::new(_fbb);
            builder.add_batch_dims(args.batch_dims);
            builder.add_axis(args.axis);
            builder.finish()
        }

        #[inline]
        pub fn axis(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(GatherOptions::VT_AXIS, Some(0)).unwrap() }
        }
        #[inline]
        pub fn batch_dims(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(GatherOptions::VT_BATCH_DIMS, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for GatherOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("axis", Self::VT_AXIS, false)?
                .visit_field::<i32>("batch_dims", Self::VT_BATCH_DIMS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct GatherOptionsArgs {
        pub axis: i32,
        pub batch_dims: i32,
    }
    impl<'a> Default for GatherOptionsArgs {
        #[inline]
        fn default() -> Self {
            GatherOptionsArgs { axis: 0, batch_dims: 0 }
        }
    }

    pub struct GatherOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> GatherOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_axis(&mut self, axis: i32) {
            self.fbb_.push_slot::<i32>(GatherOptions::VT_AXIS, axis, 0);
        }
        #[inline]
        pub fn add_batch_dims(&mut self, batch_dims: i32) {
            self.fbb_.push_slot::<i32>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> GatherOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            GatherOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<GatherOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for GatherOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("GatherOptions");
            ds.field("axis", &self.axis());
            ds.field("batch_dims", &self.batch_dims());
            ds.finish()
        }
    }
    pub enum TransposeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct TransposeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for TransposeOptions<'a> {
        type Inner = TransposeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> TransposeOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            TransposeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args TransposeOptionsArgs,
        ) -> flatbuffers::WIPOffset<TransposeOptions<'bldr>> {
            let mut builder = TransposeOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for TransposeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct TransposeOptionsArgs {}
    impl<'a> Default for TransposeOptionsArgs {
        #[inline]
        fn default() -> Self {
            TransposeOptionsArgs {}
        }
    }

    pub struct TransposeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> TransposeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> TransposeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            TransposeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<TransposeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for TransposeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("TransposeOptions");
            ds.finish()
        }
    }
    pub enum ExpOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ExpOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ExpOptions<'a> {
        type Inner = ExpOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ExpOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ExpOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ExpOptionsArgs,
        ) -> flatbuffers::WIPOffset<ExpOptions<'bldr>> {
            let mut builder = ExpOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ExpOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ExpOptionsArgs {}
    impl<'a> Default for ExpOptionsArgs {
        #[inline]
        fn default() -> Self {
            ExpOptionsArgs {}
        }
    }

    pub struct ExpOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ExpOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ExpOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ExpOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ExpOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ExpOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ExpOptions");
            ds.finish()
        }
    }
    pub enum CosOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct CosOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for CosOptions<'a> {
        type Inner = CosOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> CosOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            CosOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args CosOptionsArgs,
        ) -> flatbuffers::WIPOffset<CosOptions<'bldr>> {
            let mut builder = CosOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for CosOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct CosOptionsArgs {}
    impl<'a> Default for CosOptionsArgs {
        #[inline]
        fn default() -> Self {
            CosOptionsArgs {}
        }
    }

    pub struct CosOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> CosOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> CosOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            CosOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<CosOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for CosOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("CosOptions");
            ds.finish()
        }
    }
    pub enum ReducerOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ReducerOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ReducerOptions<'a> {
        type Inner = ReducerOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ReducerOptions<'a> {
        pub const VT_KEEP_DIMS: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ReducerOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ReducerOptionsArgs,
        ) -> flatbuffers::WIPOffset<ReducerOptions<'bldr>> {
            let mut builder = ReducerOptionsBuilder::new(_fbb);
            builder.add_keep_dims(args.keep_dims);
            builder.finish()
        }

        #[inline]
        pub fn keep_dims(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(ReducerOptions::VT_KEEP_DIMS, Some(false)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for ReducerOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("keep_dims", Self::VT_KEEP_DIMS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ReducerOptionsArgs {
        pub keep_dims: bool,
    }
    impl<'a> Default for ReducerOptionsArgs {
        #[inline]
        fn default() -> Self {
            ReducerOptionsArgs { keep_dims: false }
        }
    }

    pub struct ReducerOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ReducerOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_keep_dims(&mut self, keep_dims: bool) {
            self.fbb_.push_slot::<bool>(ReducerOptions::VT_KEEP_DIMS, keep_dims, false);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ReducerOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ReducerOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ReducerOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ReducerOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ReducerOptions");
            ds.field("keep_dims", &self.keep_dims());
            ds.finish()
        }
    }
    pub enum SqueezeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SqueezeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SqueezeOptions<'a> {
        type Inner = SqueezeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SqueezeOptions<'a> {
        pub const VT_SQUEEZE_DIMS: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SqueezeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SqueezeOptionsArgs<'args>,
        ) -> flatbuffers::WIPOffset<SqueezeOptions<'bldr>> {
            let mut builder = SqueezeOptionsBuilder::new(_fbb);
            if let Some(x) = args.squeeze_dims {
                builder.add_squeeze_dims(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn squeeze_dims(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    SqueezeOptions::VT_SQUEEZE_DIMS,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for SqueezeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "squeeze_dims",
                    Self::VT_SQUEEZE_DIMS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct SqueezeOptionsArgs<'a> {
        pub squeeze_dims: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
    }
    impl<'a> Default for SqueezeOptionsArgs<'a> {
        #[inline]
        fn default() -> Self {
            SqueezeOptionsArgs { squeeze_dims: None }
        }
    }

    pub struct SqueezeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SqueezeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_squeeze_dims(
            &mut self,
            squeeze_dims: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                SqueezeOptions::VT_SQUEEZE_DIMS,
                squeeze_dims,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SqueezeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SqueezeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SqueezeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SqueezeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SqueezeOptions");
            ds.field("squeeze_dims", &self.squeeze_dims());
            ds.finish()
        }
    }
    pub enum SplitOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SplitOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SplitOptions<'a> {
        type Inner = SplitOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SplitOptions<'a> {
        pub const VT_NUM_SPLITS: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SplitOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SplitOptionsArgs,
        ) -> flatbuffers::WIPOffset<SplitOptions<'bldr>> {
            let mut builder = SplitOptionsBuilder::new(_fbb);
            builder.add_num_splits(args.num_splits);
            builder.finish()
        }

        #[inline]
        pub fn num_splits(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(SplitOptions::VT_NUM_SPLITS, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for SplitOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("num_splits", Self::VT_NUM_SPLITS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SplitOptionsArgs {
        pub num_splits: i32,
    }
    impl<'a> Default for SplitOptionsArgs {
        #[inline]
        fn default() -> Self {
            SplitOptionsArgs { num_splits: 0 }
        }
    }

    pub struct SplitOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SplitOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_num_splits(&mut self, num_splits: i32) {
            self.fbb_.push_slot::<i32>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SplitOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SplitOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SplitOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SplitOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SplitOptions");
            ds.field("num_splits", &self.num_splits());
            ds.finish()
        }
    }
    pub enum SplitVOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SplitVOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SplitVOptions<'a> {
        type Inner = SplitVOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SplitVOptions<'a> {
        pub const VT_NUM_SPLITS: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SplitVOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SplitVOptionsArgs,
        ) -> flatbuffers::WIPOffset<SplitVOptions<'bldr>> {
            let mut builder = SplitVOptionsBuilder::new(_fbb);
            builder.add_num_splits(args.num_splits);
            builder.finish()
        }

        #[inline]
        pub fn num_splits(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(SplitVOptions::VT_NUM_SPLITS, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for SplitVOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("num_splits", Self::VT_NUM_SPLITS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SplitVOptionsArgs {
        pub num_splits: i32,
    }
    impl<'a> Default for SplitVOptionsArgs {
        #[inline]
        fn default() -> Self {
            SplitVOptionsArgs { num_splits: 0 }
        }
    }

    pub struct SplitVOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SplitVOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_num_splits(&mut self, num_splits: i32) {
            self.fbb_.push_slot::<i32>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SplitVOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SplitVOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SplitVOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SplitVOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SplitVOptions");
            ds.field("num_splits", &self.num_splits());
            ds.finish()
        }
    }
    pub enum StridedSliceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct StridedSliceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for StridedSliceOptions<'a> {
        type Inner = StridedSliceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> StridedSliceOptions<'a> {
        pub const VT_BEGIN_MASK: flatbuffers::VOffsetT = 4;
        pub const VT_END_MASK: flatbuffers::VOffsetT = 6;
        pub const VT_ELLIPSIS_MASK: flatbuffers::VOffsetT = 8;
        pub const VT_NEW_AXIS_MASK: flatbuffers::VOffsetT = 10;
        pub const VT_SHRINK_AXIS_MASK: flatbuffers::VOffsetT = 12;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            StridedSliceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args StridedSliceOptionsArgs,
        ) -> flatbuffers::WIPOffset<StridedSliceOptions<'bldr>> {
            let mut builder = StridedSliceOptionsBuilder::new(_fbb);
            builder.add_shrink_axis_mask(args.shrink_axis_mask);
            builder.add_new_axis_mask(args.new_axis_mask);
            builder.add_ellipsis_mask(args.ellipsis_mask);
            builder.add_end_mask(args.end_mask);
            builder.add_begin_mask(args.begin_mask);
            builder.finish()
        }

        #[inline]
        pub fn begin_mask(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(StridedSliceOptions::VT_BEGIN_MASK, Some(0)).unwrap() }
        }
        #[inline]
        pub fn end_mask(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(StridedSliceOptions::VT_END_MASK, Some(0)).unwrap() }
        }
        #[inline]
        pub fn ellipsis_mask(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(StridedSliceOptions::VT_ELLIPSIS_MASK, Some(0)).unwrap() }
        }
        #[inline]
        pub fn new_axis_mask(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(StridedSliceOptions::VT_NEW_AXIS_MASK, Some(0)).unwrap() }
        }
        #[inline]
        pub fn shrink_axis_mask(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, Some(0)).unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for StridedSliceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("begin_mask", Self::VT_BEGIN_MASK, false)?
                .visit_field::<i32>("end_mask", Self::VT_END_MASK, false)?
                .visit_field::<i32>("ellipsis_mask", Self::VT_ELLIPSIS_MASK, false)?
                .visit_field::<i32>("new_axis_mask", Self::VT_NEW_AXIS_MASK, false)?
                .visit_field::<i32>("shrink_axis_mask", Self::VT_SHRINK_AXIS_MASK, false)?
                .finish();
            Ok(())
        }
    }
    pub struct StridedSliceOptionsArgs {
        pub begin_mask: i32,
        pub end_mask: i32,
        pub ellipsis_mask: i32,
        pub new_axis_mask: i32,
        pub shrink_axis_mask: i32,
    }
    impl<'a> Default for StridedSliceOptionsArgs {
        #[inline]
        fn default() -> Self {
            StridedSliceOptionsArgs {
                begin_mask: 0,
                end_mask: 0,
                ellipsis_mask: 0,
                new_axis_mask: 0,
                shrink_axis_mask: 0,
            }
        }
    }

    pub struct StridedSliceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> StridedSliceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_begin_mask(&mut self, begin_mask: i32) {
            self.fbb_.push_slot::<i32>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
        }
        #[inline]
        pub fn add_end_mask(&mut self, end_mask: i32) {
            self.fbb_.push_slot::<i32>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
        }
        #[inline]
        pub fn add_ellipsis_mask(&mut self, ellipsis_mask: i32) {
            self.fbb_.push_slot::<i32>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
        }
        #[inline]
        pub fn add_new_axis_mask(&mut self, new_axis_mask: i32) {
            self.fbb_.push_slot::<i32>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
        }
        #[inline]
        pub fn add_shrink_axis_mask(&mut self, shrink_axis_mask: i32) {
            self.fbb_.push_slot::<i32>(
                StridedSliceOptions::VT_SHRINK_AXIS_MASK,
                shrink_axis_mask,
                0,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> StridedSliceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            StridedSliceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<StridedSliceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for StridedSliceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("StridedSliceOptions");
            ds.field("begin_mask", &self.begin_mask());
            ds.field("end_mask", &self.end_mask());
            ds.field("ellipsis_mask", &self.ellipsis_mask());
            ds.field("new_axis_mask", &self.new_axis_mask());
            ds.field("shrink_axis_mask", &self.shrink_axis_mask());
            ds.finish()
        }
    }
    pub enum LogSoftmaxOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LogSoftmaxOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LogSoftmaxOptions<'a> {
        type Inner = LogSoftmaxOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LogSoftmaxOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LogSoftmaxOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args LogSoftmaxOptionsArgs,
        ) -> flatbuffers::WIPOffset<LogSoftmaxOptions<'bldr>> {
            let mut builder = LogSoftmaxOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for LogSoftmaxOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct LogSoftmaxOptionsArgs {}
    impl<'a> Default for LogSoftmaxOptionsArgs {
        #[inline]
        fn default() -> Self {
            LogSoftmaxOptionsArgs {}
        }
    }

    pub struct LogSoftmaxOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LogSoftmaxOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LogSoftmaxOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LogSoftmaxOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LogSoftmaxOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LogSoftmaxOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LogSoftmaxOptions");
            ds.finish()
        }
    }
    pub enum CastOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct CastOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for CastOptions<'a> {
        type Inner = CastOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> CastOptions<'a> {
        pub const VT_IN_DATA_TYPE: flatbuffers::VOffsetT = 4;
        pub const VT_OUT_DATA_TYPE: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            CastOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args CastOptionsArgs,
        ) -> flatbuffers::WIPOffset<CastOptions<'bldr>> {
            let mut builder = CastOptionsBuilder::new(_fbb);
            builder.add_out_data_type(args.out_data_type);
            builder.add_in_data_type(args.in_data_type);
            builder.finish()
        }

        #[inline]
        pub fn in_data_type(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(CastOptions::VT_IN_DATA_TYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
        #[inline]
        pub fn out_data_type(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(CastOptions::VT_OUT_DATA_TYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for CastOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<TensorType>("in_data_type", Self::VT_IN_DATA_TYPE, false)?
                .visit_field::<TensorType>("out_data_type", Self::VT_OUT_DATA_TYPE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct CastOptionsArgs {
        pub in_data_type: TensorType,
        pub out_data_type: TensorType,
    }
    impl<'a> Default for CastOptionsArgs {
        #[inline]
        fn default() -> Self {
            CastOptionsArgs {
                in_data_type: TensorType::FLOAT32,
                out_data_type: TensorType::FLOAT32,
            }
        }
    }

    pub struct CastOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> CastOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_in_data_type(&mut self, in_data_type: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                CastOptions::VT_IN_DATA_TYPE,
                in_data_type,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn add_out_data_type(&mut self, out_data_type: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                CastOptions::VT_OUT_DATA_TYPE,
                out_data_type,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> CastOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            CastOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<CastOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for CastOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("CastOptions");
            ds.field("in_data_type", &self.in_data_type());
            ds.field("out_data_type", &self.out_data_type());
            ds.finish()
        }
    }
    pub enum DequantizeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DequantizeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DequantizeOptions<'a> {
        type Inner = DequantizeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DequantizeOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DequantizeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args DequantizeOptionsArgs,
        ) -> flatbuffers::WIPOffset<DequantizeOptions<'bldr>> {
            let mut builder = DequantizeOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for DequantizeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct DequantizeOptionsArgs {}
    impl<'a> Default for DequantizeOptionsArgs {
        #[inline]
        fn default() -> Self {
            DequantizeOptionsArgs {}
        }
    }

    pub struct DequantizeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DequantizeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> DequantizeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DequantizeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DequantizeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DequantizeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DequantizeOptions");
            ds.finish()
        }
    }
    pub enum MaximumMinimumOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct MaximumMinimumOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for MaximumMinimumOptions<'a> {
        type Inner = MaximumMinimumOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> MaximumMinimumOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            MaximumMinimumOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args MaximumMinimumOptionsArgs,
        ) -> flatbuffers::WIPOffset<MaximumMinimumOptions<'bldr>> {
            let mut builder = MaximumMinimumOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for MaximumMinimumOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct MaximumMinimumOptionsArgs {}
    impl<'a> Default for MaximumMinimumOptionsArgs {
        #[inline]
        fn default() -> Self {
            MaximumMinimumOptionsArgs {}
        }
    }

    pub struct MaximumMinimumOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> MaximumMinimumOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> MaximumMinimumOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            MaximumMinimumOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<MaximumMinimumOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for MaximumMinimumOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("MaximumMinimumOptions");
            ds.finish()
        }
    }
    pub enum TileOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct TileOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for TileOptions<'a> {
        type Inner = TileOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> TileOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            TileOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args TileOptionsArgs,
        ) -> flatbuffers::WIPOffset<TileOptions<'bldr>> {
            let mut builder = TileOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for TileOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct TileOptionsArgs {}
    impl<'a> Default for TileOptionsArgs {
        #[inline]
        fn default() -> Self {
            TileOptionsArgs {}
        }
    }

    pub struct TileOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> TileOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TileOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            TileOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<TileOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for TileOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("TileOptions");
            ds.finish()
        }
    }
    pub enum ArgMaxOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ArgMaxOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ArgMaxOptions<'a> {
        type Inner = ArgMaxOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ArgMaxOptions<'a> {
        pub const VT_OUTPUT_TYPE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ArgMaxOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ArgMaxOptionsArgs,
        ) -> flatbuffers::WIPOffset<ArgMaxOptions<'bldr>> {
            let mut builder = ArgMaxOptionsBuilder::new(_fbb);
            builder.add_output_type(args.output_type);
            builder.finish()
        }

        #[inline]
        pub fn output_type(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(ArgMaxOptions::VT_OUTPUT_TYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for ArgMaxOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<TensorType>("output_type", Self::VT_OUTPUT_TYPE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ArgMaxOptionsArgs {
        pub output_type: TensorType,
    }
    impl<'a> Default for ArgMaxOptionsArgs {
        #[inline]
        fn default() -> Self {
            ArgMaxOptionsArgs { output_type: TensorType::FLOAT32 }
        }
    }

    pub struct ArgMaxOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ArgMaxOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_output_type(&mut self, output_type: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                ArgMaxOptions::VT_OUTPUT_TYPE,
                output_type,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ArgMaxOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ArgMaxOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ArgMaxOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ArgMaxOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ArgMaxOptions");
            ds.field("output_type", &self.output_type());
            ds.finish()
        }
    }
    pub enum ArgMinOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ArgMinOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ArgMinOptions<'a> {
        type Inner = ArgMinOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ArgMinOptions<'a> {
        pub const VT_OUTPUT_TYPE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ArgMinOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ArgMinOptionsArgs,
        ) -> flatbuffers::WIPOffset<ArgMinOptions<'bldr>> {
            let mut builder = ArgMinOptionsBuilder::new(_fbb);
            builder.add_output_type(args.output_type);
            builder.finish()
        }

        #[inline]
        pub fn output_type(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(ArgMinOptions::VT_OUTPUT_TYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for ArgMinOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<TensorType>("output_type", Self::VT_OUTPUT_TYPE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ArgMinOptionsArgs {
        pub output_type: TensorType,
    }
    impl<'a> Default for ArgMinOptionsArgs {
        #[inline]
        fn default() -> Self {
            ArgMinOptionsArgs { output_type: TensorType::FLOAT32 }
        }
    }

    pub struct ArgMinOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ArgMinOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_output_type(&mut self, output_type: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                ArgMinOptions::VT_OUTPUT_TYPE,
                output_type,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ArgMinOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ArgMinOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ArgMinOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ArgMinOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ArgMinOptions");
            ds.field("output_type", &self.output_type());
            ds.finish()
        }
    }
    pub enum GreaterOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct GreaterOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for GreaterOptions<'a> {
        type Inner = GreaterOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> GreaterOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            GreaterOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args GreaterOptionsArgs,
        ) -> flatbuffers::WIPOffset<GreaterOptions<'bldr>> {
            let mut builder = GreaterOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for GreaterOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct GreaterOptionsArgs {}
    impl<'a> Default for GreaterOptionsArgs {
        #[inline]
        fn default() -> Self {
            GreaterOptionsArgs {}
        }
    }

    pub struct GreaterOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> GreaterOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> GreaterOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            GreaterOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<GreaterOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for GreaterOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("GreaterOptions");
            ds.finish()
        }
    }
    pub enum GreaterEqualOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct GreaterEqualOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for GreaterEqualOptions<'a> {
        type Inner = GreaterEqualOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> GreaterEqualOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            GreaterEqualOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args GreaterEqualOptionsArgs,
        ) -> flatbuffers::WIPOffset<GreaterEqualOptions<'bldr>> {
            let mut builder = GreaterEqualOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for GreaterEqualOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct GreaterEqualOptionsArgs {}
    impl<'a> Default for GreaterEqualOptionsArgs {
        #[inline]
        fn default() -> Self {
            GreaterEqualOptionsArgs {}
        }
    }

    pub struct GreaterEqualOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> GreaterEqualOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> GreaterEqualOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            GreaterEqualOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<GreaterEqualOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for GreaterEqualOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("GreaterEqualOptions");
            ds.finish()
        }
    }
    pub enum LessOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LessOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LessOptions<'a> {
        type Inner = LessOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LessOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LessOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args LessOptionsArgs,
        ) -> flatbuffers::WIPOffset<LessOptions<'bldr>> {
            let mut builder = LessOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for LessOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct LessOptionsArgs {}
    impl<'a> Default for LessOptionsArgs {
        #[inline]
        fn default() -> Self {
            LessOptionsArgs {}
        }
    }

    pub struct LessOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LessOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LessOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LessOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LessOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LessOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LessOptions");
            ds.finish()
        }
    }
    pub enum LessEqualOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LessEqualOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LessEqualOptions<'a> {
        type Inner = LessEqualOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LessEqualOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LessEqualOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args LessEqualOptionsArgs,
        ) -> flatbuffers::WIPOffset<LessEqualOptions<'bldr>> {
            let mut builder = LessEqualOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for LessEqualOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct LessEqualOptionsArgs {}
    impl<'a> Default for LessEqualOptionsArgs {
        #[inline]
        fn default() -> Self {
            LessEqualOptionsArgs {}
        }
    }

    pub struct LessEqualOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LessEqualOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LessEqualOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LessEqualOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LessEqualOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LessEqualOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LessEqualOptions");
            ds.finish()
        }
    }
    pub enum NegOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct NegOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for NegOptions<'a> {
        type Inner = NegOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> NegOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            NegOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args NegOptionsArgs,
        ) -> flatbuffers::WIPOffset<NegOptions<'bldr>> {
            let mut builder = NegOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for NegOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct NegOptionsArgs {}
    impl<'a> Default for NegOptionsArgs {
        #[inline]
        fn default() -> Self {
            NegOptionsArgs {}
        }
    }

    pub struct NegOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> NegOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> NegOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            NegOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<NegOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for NegOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("NegOptions");
            ds.finish()
        }
    }
    pub enum SelectOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SelectOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SelectOptions<'a> {
        type Inner = SelectOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SelectOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SelectOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SelectOptionsArgs,
        ) -> flatbuffers::WIPOffset<SelectOptions<'bldr>> {
            let mut builder = SelectOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SelectOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SelectOptionsArgs {}
    impl<'a> Default for SelectOptionsArgs {
        #[inline]
        fn default() -> Self {
            SelectOptionsArgs {}
        }
    }

    pub struct SelectOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SelectOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SelectOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SelectOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SelectOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SelectOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SelectOptions");
            ds.finish()
        }
    }
    pub enum SliceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SliceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SliceOptions<'a> {
        type Inner = SliceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SliceOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SliceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SliceOptionsArgs,
        ) -> flatbuffers::WIPOffset<SliceOptions<'bldr>> {
            let mut builder = SliceOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SliceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SliceOptionsArgs {}
    impl<'a> Default for SliceOptionsArgs {
        #[inline]
        fn default() -> Self {
            SliceOptionsArgs {}
        }
    }

    pub struct SliceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SliceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SliceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SliceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SliceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SliceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SliceOptions");
            ds.finish()
        }
    }
    pub enum TransposeConvOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct TransposeConvOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for TransposeConvOptions<'a> {
        type Inner = TransposeConvOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> TransposeConvOptions<'a> {
        pub const VT_PADDING: flatbuffers::VOffsetT = 4;
        pub const VT_STRIDE_W: flatbuffers::VOffsetT = 6;
        pub const VT_STRIDE_H: flatbuffers::VOffsetT = 8;
        pub const VT_FUSED_ACTIVATION_FUNCTION: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            TransposeConvOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args TransposeConvOptionsArgs,
        ) -> flatbuffers::WIPOffset<TransposeConvOptions<'bldr>> {
            let mut builder = TransposeConvOptionsBuilder::new(_fbb);
            builder.add_stride_h(args.stride_h);
            builder.add_stride_w(args.stride_w);
            builder.add_fused_activation_function(args.fused_activation_function);
            builder.add_padding(args.padding);
            builder.finish()
        }

        #[inline]
        pub fn padding(&self) -> Padding {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<Padding>(TransposeConvOptions::VT_PADDING, Some(Padding::SAME))
                    .unwrap()
            }
        }
        #[inline]
        pub fn stride_w(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(TransposeConvOptions::VT_STRIDE_W, Some(0)).unwrap() }
        }
        #[inline]
        pub fn stride_h(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(TransposeConvOptions::VT_STRIDE_H, Some(0)).unwrap() }
        }
        #[inline]
        pub fn fused_activation_function(&self) -> ActivationFunctionType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<ActivationFunctionType>(
                        TransposeConvOptions::VT_FUSED_ACTIVATION_FUNCTION,
                        Some(ActivationFunctionType::NONE),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for TransposeConvOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<Padding>("padding", Self::VT_PADDING, false)?
                .visit_field::<i32>("stride_w", Self::VT_STRIDE_W, false)?
                .visit_field::<i32>("stride_h", Self::VT_STRIDE_H, false)?
                .visit_field::<ActivationFunctionType>(
                    "fused_activation_function",
                    Self::VT_FUSED_ACTIVATION_FUNCTION,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct TransposeConvOptionsArgs {
        pub padding: Padding,
        pub stride_w: i32,
        pub stride_h: i32,
        pub fused_activation_function: ActivationFunctionType,
    }
    impl<'a> Default for TransposeConvOptionsArgs {
        #[inline]
        fn default() -> Self {
            TransposeConvOptionsArgs {
                padding: Padding::SAME,
                stride_w: 0,
                stride_h: 0,
                fused_activation_function: ActivationFunctionType::NONE,
            }
        }
    }

    pub struct TransposeConvOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> TransposeConvOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_padding(&mut self, padding: Padding) {
            self.fbb_.push_slot::<Padding>(
                TransposeConvOptions::VT_PADDING,
                padding,
                Padding::SAME,
            );
        }
        #[inline]
        pub fn add_stride_w(&mut self, stride_w: i32) {
            self.fbb_.push_slot::<i32>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
        }
        #[inline]
        pub fn add_stride_h(&mut self, stride_h: i32) {
            self.fbb_.push_slot::<i32>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
        }
        #[inline]
        pub fn add_fused_activation_function(
            &mut self,
            fused_activation_function: ActivationFunctionType,
        ) {
            self.fbb_.push_slot::<ActivationFunctionType>(
                TransposeConvOptions::VT_FUSED_ACTIVATION_FUNCTION,
                fused_activation_function,
                ActivationFunctionType::NONE,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> TransposeConvOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            TransposeConvOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<TransposeConvOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for TransposeConvOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("TransposeConvOptions");
            ds.field("padding", &self.padding());
            ds.field("stride_w", &self.stride_w());
            ds.field("stride_h", &self.stride_h());
            ds.field("fused_activation_function", &self.fused_activation_function());
            ds.finish()
        }
    }
    pub enum ExpandDimsOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ExpandDimsOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ExpandDimsOptions<'a> {
        type Inner = ExpandDimsOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ExpandDimsOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ExpandDimsOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ExpandDimsOptionsArgs,
        ) -> flatbuffers::WIPOffset<ExpandDimsOptions<'bldr>> {
            let mut builder = ExpandDimsOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ExpandDimsOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ExpandDimsOptionsArgs {}
    impl<'a> Default for ExpandDimsOptionsArgs {
        #[inline]
        fn default() -> Self {
            ExpandDimsOptionsArgs {}
        }
    }

    pub struct ExpandDimsOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ExpandDimsOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ExpandDimsOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ExpandDimsOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ExpandDimsOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ExpandDimsOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ExpandDimsOptions");
            ds.finish()
        }
    }
    pub enum SparseToDenseOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SparseToDenseOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SparseToDenseOptions<'a> {
        type Inner = SparseToDenseOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SparseToDenseOptions<'a> {
        pub const VT_VALIDATE_INDICES: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SparseToDenseOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SparseToDenseOptionsArgs,
        ) -> flatbuffers::WIPOffset<SparseToDenseOptions<'bldr>> {
            let mut builder = SparseToDenseOptionsBuilder::new(_fbb);
            builder.add_validate_indices(args.validate_indices);
            builder.finish()
        }

        #[inline]
        pub fn validate_indices(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(SparseToDenseOptions::VT_VALIDATE_INDICES, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for SparseToDenseOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("validate_indices", Self::VT_VALIDATE_INDICES, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SparseToDenseOptionsArgs {
        pub validate_indices: bool,
    }
    impl<'a> Default for SparseToDenseOptionsArgs {
        #[inline]
        fn default() -> Self {
            SparseToDenseOptionsArgs { validate_indices: false }
        }
    }

    pub struct SparseToDenseOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SparseToDenseOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_validate_indices(&mut self, validate_indices: bool) {
            self.fbb_.push_slot::<bool>(
                SparseToDenseOptions::VT_VALIDATE_INDICES,
                validate_indices,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SparseToDenseOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SparseToDenseOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SparseToDenseOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SparseToDenseOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SparseToDenseOptions");
            ds.field("validate_indices", &self.validate_indices());
            ds.finish()
        }
    }
    pub enum EqualOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct EqualOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for EqualOptions<'a> {
        type Inner = EqualOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> EqualOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            EqualOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args EqualOptionsArgs,
        ) -> flatbuffers::WIPOffset<EqualOptions<'bldr>> {
            let mut builder = EqualOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for EqualOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct EqualOptionsArgs {}
    impl<'a> Default for EqualOptionsArgs {
        #[inline]
        fn default() -> Self {
            EqualOptionsArgs {}
        }
    }

    pub struct EqualOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> EqualOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> EqualOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            EqualOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<EqualOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for EqualOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("EqualOptions");
            ds.finish()
        }
    }
    pub enum NotEqualOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct NotEqualOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for NotEqualOptions<'a> {
        type Inner = NotEqualOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> NotEqualOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            NotEqualOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args NotEqualOptionsArgs,
        ) -> flatbuffers::WIPOffset<NotEqualOptions<'bldr>> {
            let mut builder = NotEqualOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for NotEqualOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct NotEqualOptionsArgs {}
    impl<'a> Default for NotEqualOptionsArgs {
        #[inline]
        fn default() -> Self {
            NotEqualOptionsArgs {}
        }
    }

    pub struct NotEqualOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> NotEqualOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> NotEqualOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            NotEqualOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<NotEqualOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for NotEqualOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("NotEqualOptions");
            ds.finish()
        }
    }
    pub enum ShapeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ShapeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ShapeOptions<'a> {
        type Inner = ShapeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ShapeOptions<'a> {
        pub const VT_OUT_TYPE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ShapeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ShapeOptionsArgs,
        ) -> flatbuffers::WIPOffset<ShapeOptions<'bldr>> {
            let mut builder = ShapeOptionsBuilder::new(_fbb);
            builder.add_out_type(args.out_type);
            builder.finish()
        }

        #[inline]
        pub fn out_type(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(ShapeOptions::VT_OUT_TYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for ShapeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<TensorType>("out_type", Self::VT_OUT_TYPE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ShapeOptionsArgs {
        pub out_type: TensorType,
    }
    impl<'a> Default for ShapeOptionsArgs {
        #[inline]
        fn default() -> Self {
            ShapeOptionsArgs { out_type: TensorType::FLOAT32 }
        }
    }

    pub struct ShapeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ShapeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_out_type(&mut self, out_type: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                ShapeOptions::VT_OUT_TYPE,
                out_type,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ShapeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ShapeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ShapeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ShapeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ShapeOptions");
            ds.field("out_type", &self.out_type());
            ds.finish()
        }
    }
    pub enum RankOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct RankOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for RankOptions<'a> {
        type Inner = RankOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> RankOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            RankOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args RankOptionsArgs,
        ) -> flatbuffers::WIPOffset<RankOptions<'bldr>> {
            let mut builder = RankOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for RankOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct RankOptionsArgs {}
    impl<'a> Default for RankOptionsArgs {
        #[inline]
        fn default() -> Self {
            RankOptionsArgs {}
        }
    }

    pub struct RankOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> RankOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RankOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            RankOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<RankOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for RankOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("RankOptions");
            ds.finish()
        }
    }
    pub enum PowOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct PowOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for PowOptions<'a> {
        type Inner = PowOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> PowOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            PowOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args PowOptionsArgs,
        ) -> flatbuffers::WIPOffset<PowOptions<'bldr>> {
            let mut builder = PowOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for PowOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct PowOptionsArgs {}
    impl<'a> Default for PowOptionsArgs {
        #[inline]
        fn default() -> Self {
            PowOptionsArgs {}
        }
    }

    pub struct PowOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> PowOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> PowOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            PowOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<PowOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for PowOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("PowOptions");
            ds.finish()
        }
    }
    pub enum FakeQuantOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct FakeQuantOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for FakeQuantOptions<'a> {
        type Inner = FakeQuantOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> FakeQuantOptions<'a> {
        pub const VT_MIN: flatbuffers::VOffsetT = 4;
        pub const VT_MAX: flatbuffers::VOffsetT = 6;
        pub const VT_NUM_BITS: flatbuffers::VOffsetT = 8;
        pub const VT_NARROW_RANGE: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            FakeQuantOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args FakeQuantOptionsArgs,
        ) -> flatbuffers::WIPOffset<FakeQuantOptions<'bldr>> {
            let mut builder = FakeQuantOptionsBuilder::new(_fbb);
            builder.add_num_bits(args.num_bits);
            builder.add_max(args.max);
            builder.add_min(args.min);
            builder.add_narrow_range(args.narrow_range);
            builder.finish()
        }

        #[inline]
        pub fn min(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<f32>(FakeQuantOptions::VT_MIN, Some(0.0)).unwrap() }
        }
        #[inline]
        pub fn max(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<f32>(FakeQuantOptions::VT_MAX, Some(0.0)).unwrap() }
        }
        #[inline]
        pub fn num_bits(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(FakeQuantOptions::VT_NUM_BITS, Some(0)).unwrap() }
        }
        #[inline]
        pub fn narrow_range(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<bool>(FakeQuantOptions::VT_NARROW_RANGE, Some(false)).unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for FakeQuantOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<f32>("min", Self::VT_MIN, false)?
                .visit_field::<f32>("max", Self::VT_MAX, false)?
                .visit_field::<i32>("num_bits", Self::VT_NUM_BITS, false)?
                .visit_field::<bool>("narrow_range", Self::VT_NARROW_RANGE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct FakeQuantOptionsArgs {
        pub min: f32,
        pub max: f32,
        pub num_bits: i32,
        pub narrow_range: bool,
    }
    impl<'a> Default for FakeQuantOptionsArgs {
        #[inline]
        fn default() -> Self {
            FakeQuantOptionsArgs { min: 0.0, max: 0.0, num_bits: 0, narrow_range: false }
        }
    }

    pub struct FakeQuantOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> FakeQuantOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_min(&mut self, min: f32) {
            self.fbb_.push_slot::<f32>(FakeQuantOptions::VT_MIN, min, 0.0);
        }
        #[inline]
        pub fn add_max(&mut self, max: f32) {
            self.fbb_.push_slot::<f32>(FakeQuantOptions::VT_MAX, max, 0.0);
        }
        #[inline]
        pub fn add_num_bits(&mut self, num_bits: i32) {
            self.fbb_.push_slot::<i32>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
        }
        #[inline]
        pub fn add_narrow_range(&mut self, narrow_range: bool) {
            self.fbb_.push_slot::<bool>(FakeQuantOptions::VT_NARROW_RANGE, narrow_range, false);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> FakeQuantOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            FakeQuantOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<FakeQuantOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for FakeQuantOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("FakeQuantOptions");
            ds.field("min", &self.min());
            ds.field("max", &self.max());
            ds.field("num_bits", &self.num_bits());
            ds.field("narrow_range", &self.narrow_range());
            ds.finish()
        }
    }
    pub enum PackOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct PackOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for PackOptions<'a> {
        type Inner = PackOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> PackOptions<'a> {
        pub const VT_VALUES_COUNT: flatbuffers::VOffsetT = 4;
        pub const VT_AXIS: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            PackOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args PackOptionsArgs,
        ) -> flatbuffers::WIPOffset<PackOptions<'bldr>> {
            let mut builder = PackOptionsBuilder::new(_fbb);
            builder.add_axis(args.axis);
            builder.add_values_count(args.values_count);
            builder.finish()
        }

        #[inline]
        pub fn values_count(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(PackOptions::VT_VALUES_COUNT, Some(0)).unwrap() }
        }
        #[inline]
        pub fn axis(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(PackOptions::VT_AXIS, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for PackOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("values_count", Self::VT_VALUES_COUNT, false)?
                .visit_field::<i32>("axis", Self::VT_AXIS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct PackOptionsArgs {
        pub values_count: i32,
        pub axis: i32,
    }
    impl<'a> Default for PackOptionsArgs {
        #[inline]
        fn default() -> Self {
            PackOptionsArgs { values_count: 0, axis: 0 }
        }
    }

    pub struct PackOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> PackOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_values_count(&mut self, values_count: i32) {
            self.fbb_.push_slot::<i32>(PackOptions::VT_VALUES_COUNT, values_count, 0);
        }
        #[inline]
        pub fn add_axis(&mut self, axis: i32) {
            self.fbb_.push_slot::<i32>(PackOptions::VT_AXIS, axis, 0);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> PackOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            PackOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<PackOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for PackOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("PackOptions");
            ds.field("values_count", &self.values_count());
            ds.field("axis", &self.axis());
            ds.finish()
        }
    }
    pub enum LogicalOrOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LogicalOrOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LogicalOrOptions<'a> {
        type Inner = LogicalOrOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LogicalOrOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LogicalOrOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args LogicalOrOptionsArgs,
        ) -> flatbuffers::WIPOffset<LogicalOrOptions<'bldr>> {
            let mut builder = LogicalOrOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for LogicalOrOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct LogicalOrOptionsArgs {}
    impl<'a> Default for LogicalOrOptionsArgs {
        #[inline]
        fn default() -> Self {
            LogicalOrOptionsArgs {}
        }
    }

    pub struct LogicalOrOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LogicalOrOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LogicalOrOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LogicalOrOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LogicalOrOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LogicalOrOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LogicalOrOptions");
            ds.finish()
        }
    }
    pub enum OneHotOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct OneHotOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for OneHotOptions<'a> {
        type Inner = OneHotOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> OneHotOptions<'a> {
        pub const VT_AXIS: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            OneHotOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args OneHotOptionsArgs,
        ) -> flatbuffers::WIPOffset<OneHotOptions<'bldr>> {
            let mut builder = OneHotOptionsBuilder::new(_fbb);
            builder.add_axis(args.axis);
            builder.finish()
        }

        #[inline]
        pub fn axis(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(OneHotOptions::VT_AXIS, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for OneHotOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.visit_field::<i32>("axis", Self::VT_AXIS, false)?.finish();
            Ok(())
        }
    }
    pub struct OneHotOptionsArgs {
        pub axis: i32,
    }
    impl<'a> Default for OneHotOptionsArgs {
        #[inline]
        fn default() -> Self {
            OneHotOptionsArgs { axis: 0 }
        }
    }

    pub struct OneHotOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> OneHotOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_axis(&mut self, axis: i32) {
            self.fbb_.push_slot::<i32>(OneHotOptions::VT_AXIS, axis, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> OneHotOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            OneHotOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<OneHotOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for OneHotOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("OneHotOptions");
            ds.field("axis", &self.axis());
            ds.finish()
        }
    }
    pub enum AbsOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct AbsOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for AbsOptions<'a> {
        type Inner = AbsOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> AbsOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            AbsOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args AbsOptionsArgs,
        ) -> flatbuffers::WIPOffset<AbsOptions<'bldr>> {
            let mut builder = AbsOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for AbsOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct AbsOptionsArgs {}
    impl<'a> Default for AbsOptionsArgs {
        #[inline]
        fn default() -> Self {
            AbsOptionsArgs {}
        }
    }

    pub struct AbsOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> AbsOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> AbsOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            AbsOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<AbsOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for AbsOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("AbsOptions");
            ds.finish()
        }
    }
    pub enum HardSwishOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct HardSwishOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for HardSwishOptions<'a> {
        type Inner = HardSwishOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> HardSwishOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            HardSwishOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args HardSwishOptionsArgs,
        ) -> flatbuffers::WIPOffset<HardSwishOptions<'bldr>> {
            let mut builder = HardSwishOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for HardSwishOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct HardSwishOptionsArgs {}
    impl<'a> Default for HardSwishOptionsArgs {
        #[inline]
        fn default() -> Self {
            HardSwishOptionsArgs {}
        }
    }

    pub struct HardSwishOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> HardSwishOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> HardSwishOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            HardSwishOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<HardSwishOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for HardSwishOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("HardSwishOptions");
            ds.finish()
        }
    }
    pub enum LogicalAndOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LogicalAndOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LogicalAndOptions<'a> {
        type Inner = LogicalAndOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LogicalAndOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LogicalAndOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args LogicalAndOptionsArgs,
        ) -> flatbuffers::WIPOffset<LogicalAndOptions<'bldr>> {
            let mut builder = LogicalAndOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for LogicalAndOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct LogicalAndOptionsArgs {}
    impl<'a> Default for LogicalAndOptionsArgs {
        #[inline]
        fn default() -> Self {
            LogicalAndOptionsArgs {}
        }
    }

    pub struct LogicalAndOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LogicalAndOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LogicalAndOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LogicalAndOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LogicalAndOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LogicalAndOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LogicalAndOptions");
            ds.finish()
        }
    }
    pub enum LogicalNotOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LogicalNotOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LogicalNotOptions<'a> {
        type Inner = LogicalNotOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LogicalNotOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LogicalNotOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args LogicalNotOptionsArgs,
        ) -> flatbuffers::WIPOffset<LogicalNotOptions<'bldr>> {
            let mut builder = LogicalNotOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for LogicalNotOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct LogicalNotOptionsArgs {}
    impl<'a> Default for LogicalNotOptionsArgs {
        #[inline]
        fn default() -> Self {
            LogicalNotOptionsArgs {}
        }
    }

    pub struct LogicalNotOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LogicalNotOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LogicalNotOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LogicalNotOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LogicalNotOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LogicalNotOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LogicalNotOptions");
            ds.finish()
        }
    }
    pub enum UnpackOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UnpackOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UnpackOptions<'a> {
        type Inner = UnpackOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UnpackOptions<'a> {
        pub const VT_NUM: flatbuffers::VOffsetT = 4;
        pub const VT_AXIS: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UnpackOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args UnpackOptionsArgs,
        ) -> flatbuffers::WIPOffset<UnpackOptions<'bldr>> {
            let mut builder = UnpackOptionsBuilder::new(_fbb);
            builder.add_axis(args.axis);
            builder.add_num(args.num);
            builder.finish()
        }

        #[inline]
        pub fn num(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(UnpackOptions::VT_NUM, Some(0)).unwrap() }
        }
        #[inline]
        pub fn axis(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(UnpackOptions::VT_AXIS, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for UnpackOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("num", Self::VT_NUM, false)?
                .visit_field::<i32>("axis", Self::VT_AXIS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct UnpackOptionsArgs {
        pub num: i32,
        pub axis: i32,
    }
    impl<'a> Default for UnpackOptionsArgs {
        #[inline]
        fn default() -> Self {
            UnpackOptionsArgs { num: 0, axis: 0 }
        }
    }

    pub struct UnpackOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UnpackOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_num(&mut self, num: i32) {
            self.fbb_.push_slot::<i32>(UnpackOptions::VT_NUM, num, 0);
        }
        #[inline]
        pub fn add_axis(&mut self, axis: i32) {
            self.fbb_.push_slot::<i32>(UnpackOptions::VT_AXIS, axis, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UnpackOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UnpackOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UnpackOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UnpackOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UnpackOptions");
            ds.field("num", &self.num());
            ds.field("axis", &self.axis());
            ds.finish()
        }
    }
    pub enum FloorDivOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct FloorDivOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for FloorDivOptions<'a> {
        type Inner = FloorDivOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> FloorDivOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            FloorDivOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args FloorDivOptionsArgs,
        ) -> flatbuffers::WIPOffset<FloorDivOptions<'bldr>> {
            let mut builder = FloorDivOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for FloorDivOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct FloorDivOptionsArgs {}
    impl<'a> Default for FloorDivOptionsArgs {
        #[inline]
        fn default() -> Self {
            FloorDivOptionsArgs {}
        }
    }

    pub struct FloorDivOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> FloorDivOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> FloorDivOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            FloorDivOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<FloorDivOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for FloorDivOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("FloorDivOptions");
            ds.finish()
        }
    }
    pub enum SquareOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SquareOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SquareOptions<'a> {
        type Inner = SquareOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SquareOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SquareOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SquareOptionsArgs,
        ) -> flatbuffers::WIPOffset<SquareOptions<'bldr>> {
            let mut builder = SquareOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SquareOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SquareOptionsArgs {}
    impl<'a> Default for SquareOptionsArgs {
        #[inline]
        fn default() -> Self {
            SquareOptionsArgs {}
        }
    }

    pub struct SquareOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SquareOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SquareOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SquareOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SquareOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SquareOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SquareOptions");
            ds.finish()
        }
    }
    pub enum ZerosLikeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ZerosLikeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ZerosLikeOptions<'a> {
        type Inner = ZerosLikeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ZerosLikeOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ZerosLikeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ZerosLikeOptionsArgs,
        ) -> flatbuffers::WIPOffset<ZerosLikeOptions<'bldr>> {
            let mut builder = ZerosLikeOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ZerosLikeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ZerosLikeOptionsArgs {}
    impl<'a> Default for ZerosLikeOptionsArgs {
        #[inline]
        fn default() -> Self {
            ZerosLikeOptionsArgs {}
        }
    }

    pub struct ZerosLikeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ZerosLikeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ZerosLikeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ZerosLikeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ZerosLikeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ZerosLikeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ZerosLikeOptions");
            ds.finish()
        }
    }
    pub enum FillOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct FillOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for FillOptions<'a> {
        type Inner = FillOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> FillOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            FillOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args FillOptionsArgs,
        ) -> flatbuffers::WIPOffset<FillOptions<'bldr>> {
            let mut builder = FillOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for FillOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct FillOptionsArgs {}
    impl<'a> Default for FillOptionsArgs {
        #[inline]
        fn default() -> Self {
            FillOptionsArgs {}
        }
    }

    pub struct FillOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> FillOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FillOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            FillOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<FillOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for FillOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("FillOptions");
            ds.finish()
        }
    }
    pub enum FloorModOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct FloorModOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for FloorModOptions<'a> {
        type Inner = FloorModOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> FloorModOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            FloorModOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args FloorModOptionsArgs,
        ) -> flatbuffers::WIPOffset<FloorModOptions<'bldr>> {
            let mut builder = FloorModOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for FloorModOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct FloorModOptionsArgs {}
    impl<'a> Default for FloorModOptionsArgs {
        #[inline]
        fn default() -> Self {
            FloorModOptionsArgs {}
        }
    }

    pub struct FloorModOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> FloorModOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> FloorModOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            FloorModOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<FloorModOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for FloorModOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("FloorModOptions");
            ds.finish()
        }
    }
    pub enum RangeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct RangeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for RangeOptions<'a> {
        type Inner = RangeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> RangeOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            RangeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args RangeOptionsArgs,
        ) -> flatbuffers::WIPOffset<RangeOptions<'bldr>> {
            let mut builder = RangeOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for RangeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct RangeOptionsArgs {}
    impl<'a> Default for RangeOptionsArgs {
        #[inline]
        fn default() -> Self {
            RangeOptionsArgs {}
        }
    }

    pub struct RangeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> RangeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> RangeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            RangeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<RangeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for RangeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("RangeOptions");
            ds.finish()
        }
    }
    pub enum LeakyReluOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct LeakyReluOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for LeakyReluOptions<'a> {
        type Inner = LeakyReluOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> LeakyReluOptions<'a> {
        pub const VT_ALPHA: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            LeakyReluOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args LeakyReluOptionsArgs,
        ) -> flatbuffers::WIPOffset<LeakyReluOptions<'bldr>> {
            let mut builder = LeakyReluOptionsBuilder::new(_fbb);
            builder.add_alpha(args.alpha);
            builder.finish()
        }

        #[inline]
        pub fn alpha(&self) -> f32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<f32>(LeakyReluOptions::VT_ALPHA, Some(0.0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for LeakyReluOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.visit_field::<f32>("alpha", Self::VT_ALPHA, false)?.finish();
            Ok(())
        }
    }
    pub struct LeakyReluOptionsArgs {
        pub alpha: f32,
    }
    impl<'a> Default for LeakyReluOptionsArgs {
        #[inline]
        fn default() -> Self {
            LeakyReluOptionsArgs { alpha: 0.0 }
        }
    }

    pub struct LeakyReluOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> LeakyReluOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_alpha(&mut self, alpha: f32) {
            self.fbb_.push_slot::<f32>(LeakyReluOptions::VT_ALPHA, alpha, 0.0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> LeakyReluOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            LeakyReluOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<LeakyReluOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for LeakyReluOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("LeakyReluOptions");
            ds.field("alpha", &self.alpha());
            ds.finish()
        }
    }
    pub enum SquaredDifferenceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SquaredDifferenceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SquaredDifferenceOptions<'a> {
        type Inner = SquaredDifferenceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SquaredDifferenceOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SquaredDifferenceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SquaredDifferenceOptionsArgs,
        ) -> flatbuffers::WIPOffset<SquaredDifferenceOptions<'bldr>> {
            let mut builder = SquaredDifferenceOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SquaredDifferenceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SquaredDifferenceOptionsArgs {}
    impl<'a> Default for SquaredDifferenceOptionsArgs {
        #[inline]
        fn default() -> Self {
            SquaredDifferenceOptionsArgs {}
        }
    }

    pub struct SquaredDifferenceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SquaredDifferenceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SquaredDifferenceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SquaredDifferenceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SquaredDifferenceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SquaredDifferenceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SquaredDifferenceOptions");
            ds.finish()
        }
    }
    pub enum MirrorPadOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct MirrorPadOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for MirrorPadOptions<'a> {
        type Inner = MirrorPadOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> MirrorPadOptions<'a> {
        pub const VT_MODE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            MirrorPadOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args MirrorPadOptionsArgs,
        ) -> flatbuffers::WIPOffset<MirrorPadOptions<'bldr>> {
            let mut builder = MirrorPadOptionsBuilder::new(_fbb);
            builder.add_mode(args.mode);
            builder.finish()
        }

        #[inline]
        pub fn mode(&self) -> MirrorPadMode {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<MirrorPadMode>(MirrorPadOptions::VT_MODE, Some(MirrorPadMode::REFLECT))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for MirrorPadOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<MirrorPadMode>("mode", Self::VT_MODE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct MirrorPadOptionsArgs {
        pub mode: MirrorPadMode,
    }
    impl<'a> Default for MirrorPadOptionsArgs {
        #[inline]
        fn default() -> Self {
            MirrorPadOptionsArgs { mode: MirrorPadMode::REFLECT }
        }
    }

    pub struct MirrorPadOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> MirrorPadOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_mode(&mut self, mode: MirrorPadMode) {
            self.fbb_.push_slot::<MirrorPadMode>(
                MirrorPadOptions::VT_MODE,
                mode,
                MirrorPadMode::REFLECT,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> MirrorPadOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            MirrorPadOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<MirrorPadOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for MirrorPadOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("MirrorPadOptions");
            ds.field("mode", &self.mode());
            ds.finish()
        }
    }
    pub enum UniqueOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UniqueOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UniqueOptions<'a> {
        type Inner = UniqueOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UniqueOptions<'a> {
        pub const VT_IDX_OUT_TYPE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UniqueOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args UniqueOptionsArgs,
        ) -> flatbuffers::WIPOffset<UniqueOptions<'bldr>> {
            let mut builder = UniqueOptionsBuilder::new(_fbb);
            builder.add_idx_out_type(args.idx_out_type);
            builder.finish()
        }

        #[inline]
        pub fn idx_out_type(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(UniqueOptions::VT_IDX_OUT_TYPE, Some(TensorType::INT32))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for UniqueOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<TensorType>("idx_out_type", Self::VT_IDX_OUT_TYPE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct UniqueOptionsArgs {
        pub idx_out_type: TensorType,
    }
    impl<'a> Default for UniqueOptionsArgs {
        #[inline]
        fn default() -> Self {
            UniqueOptionsArgs { idx_out_type: TensorType::INT32 }
        }
    }

    pub struct UniqueOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UniqueOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_idx_out_type(&mut self, idx_out_type: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                UniqueOptions::VT_IDX_OUT_TYPE,
                idx_out_type,
                TensorType::INT32,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UniqueOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UniqueOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UniqueOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UniqueOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UniqueOptions");
            ds.field("idx_out_type", &self.idx_out_type());
            ds.finish()
        }
    }
    pub enum ReverseV2OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ReverseV2Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ReverseV2Options<'a> {
        type Inner = ReverseV2Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ReverseV2Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ReverseV2Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ReverseV2OptionsArgs,
        ) -> flatbuffers::WIPOffset<ReverseV2Options<'bldr>> {
            let mut builder = ReverseV2OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ReverseV2Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ReverseV2OptionsArgs {}
    impl<'a> Default for ReverseV2OptionsArgs {
        #[inline]
        fn default() -> Self {
            ReverseV2OptionsArgs {}
        }
    }

    pub struct ReverseV2OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ReverseV2OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ReverseV2OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ReverseV2OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ReverseV2Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ReverseV2Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ReverseV2Options");
            ds.finish()
        }
    }
    pub enum AddNOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct AddNOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for AddNOptions<'a> {
        type Inner = AddNOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> AddNOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            AddNOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args AddNOptionsArgs,
        ) -> flatbuffers::WIPOffset<AddNOptions<'bldr>> {
            let mut builder = AddNOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for AddNOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct AddNOptionsArgs {}
    impl<'a> Default for AddNOptionsArgs {
        #[inline]
        fn default() -> Self {
            AddNOptionsArgs {}
        }
    }

    pub struct AddNOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> AddNOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> AddNOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            AddNOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<AddNOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for AddNOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("AddNOptions");
            ds.finish()
        }
    }
    pub enum GatherNdOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct GatherNdOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for GatherNdOptions<'a> {
        type Inner = GatherNdOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> GatherNdOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            GatherNdOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args GatherNdOptionsArgs,
        ) -> flatbuffers::WIPOffset<GatherNdOptions<'bldr>> {
            let mut builder = GatherNdOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for GatherNdOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct GatherNdOptionsArgs {}
    impl<'a> Default for GatherNdOptionsArgs {
        #[inline]
        fn default() -> Self {
            GatherNdOptionsArgs {}
        }
    }

    pub struct GatherNdOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> GatherNdOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> GatherNdOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            GatherNdOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<GatherNdOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for GatherNdOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("GatherNdOptions");
            ds.finish()
        }
    }
    pub enum WhereOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct WhereOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for WhereOptions<'a> {
        type Inner = WhereOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> WhereOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            WhereOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args WhereOptionsArgs,
        ) -> flatbuffers::WIPOffset<WhereOptions<'bldr>> {
            let mut builder = WhereOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for WhereOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct WhereOptionsArgs {}
    impl<'a> Default for WhereOptionsArgs {
        #[inline]
        fn default() -> Self {
            WhereOptionsArgs {}
        }
    }

    pub struct WhereOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> WhereOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> WhereOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            WhereOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<WhereOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for WhereOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("WhereOptions");
            ds.finish()
        }
    }
    pub enum ReverseSequenceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ReverseSequenceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ReverseSequenceOptions<'a> {
        type Inner = ReverseSequenceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ReverseSequenceOptions<'a> {
        pub const VT_SEQ_DIM: flatbuffers::VOffsetT = 4;
        pub const VT_BATCH_DIM: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ReverseSequenceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ReverseSequenceOptionsArgs,
        ) -> flatbuffers::WIPOffset<ReverseSequenceOptions<'bldr>> {
            let mut builder = ReverseSequenceOptionsBuilder::new(_fbb);
            builder.add_batch_dim(args.batch_dim);
            builder.add_seq_dim(args.seq_dim);
            builder.finish()
        }

        #[inline]
        pub fn seq_dim(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(ReverseSequenceOptions::VT_SEQ_DIM, Some(0)).unwrap() }
        }
        #[inline]
        pub fn batch_dim(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(ReverseSequenceOptions::VT_BATCH_DIM, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for ReverseSequenceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("seq_dim", Self::VT_SEQ_DIM, false)?
                .visit_field::<i32>("batch_dim", Self::VT_BATCH_DIM, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ReverseSequenceOptionsArgs {
        pub seq_dim: i32,
        pub batch_dim: i32,
    }
    impl<'a> Default for ReverseSequenceOptionsArgs {
        #[inline]
        fn default() -> Self {
            ReverseSequenceOptionsArgs { seq_dim: 0, batch_dim: 0 }
        }
    }

    pub struct ReverseSequenceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ReverseSequenceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_seq_dim(&mut self, seq_dim: i32) {
            self.fbb_.push_slot::<i32>(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0);
        }
        #[inline]
        pub fn add_batch_dim(&mut self, batch_dim: i32) {
            self.fbb_.push_slot::<i32>(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ReverseSequenceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ReverseSequenceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ReverseSequenceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ReverseSequenceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ReverseSequenceOptions");
            ds.field("seq_dim", &self.seq_dim());
            ds.field("batch_dim", &self.batch_dim());
            ds.finish()
        }
    }
    pub enum MatrixDiagOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct MatrixDiagOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for MatrixDiagOptions<'a> {
        type Inner = MatrixDiagOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> MatrixDiagOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            MatrixDiagOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args MatrixDiagOptionsArgs,
        ) -> flatbuffers::WIPOffset<MatrixDiagOptions<'bldr>> {
            let mut builder = MatrixDiagOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for MatrixDiagOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct MatrixDiagOptionsArgs {}
    impl<'a> Default for MatrixDiagOptionsArgs {
        #[inline]
        fn default() -> Self {
            MatrixDiagOptionsArgs {}
        }
    }

    pub struct MatrixDiagOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> MatrixDiagOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> MatrixDiagOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            MatrixDiagOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<MatrixDiagOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for MatrixDiagOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("MatrixDiagOptions");
            ds.finish()
        }
    }
    pub enum QuantizeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct QuantizeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for QuantizeOptions<'a> {
        type Inner = QuantizeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> QuantizeOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            QuantizeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args QuantizeOptionsArgs,
        ) -> flatbuffers::WIPOffset<QuantizeOptions<'bldr>> {
            let mut builder = QuantizeOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for QuantizeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct QuantizeOptionsArgs {}
    impl<'a> Default for QuantizeOptionsArgs {
        #[inline]
        fn default() -> Self {
            QuantizeOptionsArgs {}
        }
    }

    pub struct QuantizeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> QuantizeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> QuantizeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            QuantizeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<QuantizeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for QuantizeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("QuantizeOptions");
            ds.finish()
        }
    }
    pub enum MatrixSetDiagOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct MatrixSetDiagOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for MatrixSetDiagOptions<'a> {
        type Inner = MatrixSetDiagOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> MatrixSetDiagOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            MatrixSetDiagOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args MatrixSetDiagOptionsArgs,
        ) -> flatbuffers::WIPOffset<MatrixSetDiagOptions<'bldr>> {
            let mut builder = MatrixSetDiagOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for MatrixSetDiagOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct MatrixSetDiagOptionsArgs {}
    impl<'a> Default for MatrixSetDiagOptionsArgs {
        #[inline]
        fn default() -> Self {
            MatrixSetDiagOptionsArgs {}
        }
    }

    pub struct MatrixSetDiagOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> MatrixSetDiagOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> MatrixSetDiagOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            MatrixSetDiagOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<MatrixSetDiagOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for MatrixSetDiagOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("MatrixSetDiagOptions");
            ds.finish()
        }
    }
    pub enum IfOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct IfOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for IfOptions<'a> {
        type Inner = IfOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> IfOptions<'a> {
        pub const VT_THEN_SUBGRAPH_INDEX: flatbuffers::VOffsetT = 4;
        pub const VT_ELSE_SUBGRAPH_INDEX: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            IfOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args IfOptionsArgs,
        ) -> flatbuffers::WIPOffset<IfOptions<'bldr>> {
            let mut builder = IfOptionsBuilder::new(_fbb);
            builder.add_else_subgraph_index(args.else_subgraph_index);
            builder.add_then_subgraph_index(args.then_subgraph_index);
            builder.finish()
        }

        #[inline]
        pub fn then_subgraph_index(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(IfOptions::VT_THEN_SUBGRAPH_INDEX, Some(0)).unwrap() }
        }
        #[inline]
        pub fn else_subgraph_index(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for IfOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("then_subgraph_index", Self::VT_THEN_SUBGRAPH_INDEX, false)?
                .visit_field::<i32>("else_subgraph_index", Self::VT_ELSE_SUBGRAPH_INDEX, false)?
                .finish();
            Ok(())
        }
    }
    pub struct IfOptionsArgs {
        pub then_subgraph_index: i32,
        pub else_subgraph_index: i32,
    }
    impl<'a> Default for IfOptionsArgs {
        #[inline]
        fn default() -> Self {
            IfOptionsArgs { then_subgraph_index: 0, else_subgraph_index: 0 }
        }
    }

    pub struct IfOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> IfOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_then_subgraph_index(&mut self, then_subgraph_index: i32) {
            self.fbb_.push_slot::<i32>(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0);
        }
        #[inline]
        pub fn add_else_subgraph_index(&mut self, else_subgraph_index: i32) {
            self.fbb_.push_slot::<i32>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IfOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            IfOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<IfOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for IfOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("IfOptions");
            ds.field("then_subgraph_index", &self.then_subgraph_index());
            ds.field("else_subgraph_index", &self.else_subgraph_index());
            ds.finish()
        }
    }
    pub enum CallOnceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct CallOnceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for CallOnceOptions<'a> {
        type Inner = CallOnceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> CallOnceOptions<'a> {
        pub const VT_INIT_SUBGRAPH_INDEX: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            CallOnceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args CallOnceOptionsArgs,
        ) -> flatbuffers::WIPOffset<CallOnceOptions<'bldr>> {
            let mut builder = CallOnceOptionsBuilder::new(_fbb);
            builder.add_init_subgraph_index(args.init_subgraph_index);
            builder.finish()
        }

        #[inline]
        pub fn init_subgraph_index(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i32>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, Some(0)).unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for CallOnceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("init_subgraph_index", Self::VT_INIT_SUBGRAPH_INDEX, false)?
                .finish();
            Ok(())
        }
    }
    pub struct CallOnceOptionsArgs {
        pub init_subgraph_index: i32,
    }
    impl<'a> Default for CallOnceOptionsArgs {
        #[inline]
        fn default() -> Self {
            CallOnceOptionsArgs { init_subgraph_index: 0 }
        }
    }

    pub struct CallOnceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> CallOnceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_init_subgraph_index(&mut self, init_subgraph_index: i32) {
            self.fbb_.push_slot::<i32>(
                CallOnceOptions::VT_INIT_SUBGRAPH_INDEX,
                init_subgraph_index,
                0,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> CallOnceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            CallOnceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<CallOnceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for CallOnceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("CallOnceOptions");
            ds.field("init_subgraph_index", &self.init_subgraph_index());
            ds.finish()
        }
    }
    pub enum WhileOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct WhileOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for WhileOptions<'a> {
        type Inner = WhileOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> WhileOptions<'a> {
        pub const VT_COND_SUBGRAPH_INDEX: flatbuffers::VOffsetT = 4;
        pub const VT_BODY_SUBGRAPH_INDEX: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            WhileOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args WhileOptionsArgs,
        ) -> flatbuffers::WIPOffset<WhileOptions<'bldr>> {
            let mut builder = WhileOptionsBuilder::new(_fbb);
            builder.add_body_subgraph_index(args.body_subgraph_index);
            builder.add_cond_subgraph_index(args.cond_subgraph_index);
            builder.finish()
        }

        #[inline]
        pub fn cond_subgraph_index(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(WhileOptions::VT_COND_SUBGRAPH_INDEX, Some(0)).unwrap() }
        }
        #[inline]
        pub fn body_subgraph_index(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(WhileOptions::VT_BODY_SUBGRAPH_INDEX, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for WhileOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("cond_subgraph_index", Self::VT_COND_SUBGRAPH_INDEX, false)?
                .visit_field::<i32>("body_subgraph_index", Self::VT_BODY_SUBGRAPH_INDEX, false)?
                .finish();
            Ok(())
        }
    }
    pub struct WhileOptionsArgs {
        pub cond_subgraph_index: i32,
        pub body_subgraph_index: i32,
    }
    impl<'a> Default for WhileOptionsArgs {
        #[inline]
        fn default() -> Self {
            WhileOptionsArgs { cond_subgraph_index: 0, body_subgraph_index: 0 }
        }
    }

    pub struct WhileOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> WhileOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_cond_subgraph_index(&mut self, cond_subgraph_index: i32) {
            self.fbb_.push_slot::<i32>(
                WhileOptions::VT_COND_SUBGRAPH_INDEX,
                cond_subgraph_index,
                0,
            );
        }
        #[inline]
        pub fn add_body_subgraph_index(&mut self, body_subgraph_index: i32) {
            self.fbb_.push_slot::<i32>(
                WhileOptions::VT_BODY_SUBGRAPH_INDEX,
                body_subgraph_index,
                0,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> WhileOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            WhileOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<WhileOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for WhileOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("WhileOptions");
            ds.field("cond_subgraph_index", &self.cond_subgraph_index());
            ds.field("body_subgraph_index", &self.body_subgraph_index());
            ds.finish()
        }
    }
    pub enum NonMaxSuppressionV4OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct NonMaxSuppressionV4Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for NonMaxSuppressionV4Options<'a> {
        type Inner = NonMaxSuppressionV4Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> NonMaxSuppressionV4Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            NonMaxSuppressionV4Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args NonMaxSuppressionV4OptionsArgs,
        ) -> flatbuffers::WIPOffset<NonMaxSuppressionV4Options<'bldr>> {
            let mut builder = NonMaxSuppressionV4OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for NonMaxSuppressionV4Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct NonMaxSuppressionV4OptionsArgs {}
    impl<'a> Default for NonMaxSuppressionV4OptionsArgs {
        #[inline]
        fn default() -> Self {
            NonMaxSuppressionV4OptionsArgs {}
        }
    }

    pub struct NonMaxSuppressionV4OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> NonMaxSuppressionV4OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> NonMaxSuppressionV4OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            NonMaxSuppressionV4OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<NonMaxSuppressionV4Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for NonMaxSuppressionV4Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("NonMaxSuppressionV4Options");
            ds.finish()
        }
    }
    pub enum NonMaxSuppressionV5OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct NonMaxSuppressionV5Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for NonMaxSuppressionV5Options<'a> {
        type Inner = NonMaxSuppressionV5Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> NonMaxSuppressionV5Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            NonMaxSuppressionV5Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args NonMaxSuppressionV5OptionsArgs,
        ) -> flatbuffers::WIPOffset<NonMaxSuppressionV5Options<'bldr>> {
            let mut builder = NonMaxSuppressionV5OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for NonMaxSuppressionV5Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct NonMaxSuppressionV5OptionsArgs {}
    impl<'a> Default for NonMaxSuppressionV5OptionsArgs {
        #[inline]
        fn default() -> Self {
            NonMaxSuppressionV5OptionsArgs {}
        }
    }

    pub struct NonMaxSuppressionV5OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> NonMaxSuppressionV5OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> NonMaxSuppressionV5OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            NonMaxSuppressionV5OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<NonMaxSuppressionV5Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for NonMaxSuppressionV5Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("NonMaxSuppressionV5Options");
            ds.finish()
        }
    }
    pub enum ScatterNdOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ScatterNdOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ScatterNdOptions<'a> {
        type Inner = ScatterNdOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ScatterNdOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ScatterNdOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ScatterNdOptionsArgs,
        ) -> flatbuffers::WIPOffset<ScatterNdOptions<'bldr>> {
            let mut builder = ScatterNdOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ScatterNdOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ScatterNdOptionsArgs {}
    impl<'a> Default for ScatterNdOptionsArgs {
        #[inline]
        fn default() -> Self {
            ScatterNdOptionsArgs {}
        }
    }

    pub struct ScatterNdOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ScatterNdOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ScatterNdOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ScatterNdOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ScatterNdOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ScatterNdOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ScatterNdOptions");
            ds.finish()
        }
    }
    pub enum SelectV2OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SelectV2Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SelectV2Options<'a> {
        type Inner = SelectV2Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SelectV2Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SelectV2Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SelectV2OptionsArgs,
        ) -> flatbuffers::WIPOffset<SelectV2Options<'bldr>> {
            let mut builder = SelectV2OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SelectV2Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SelectV2OptionsArgs {}
    impl<'a> Default for SelectV2OptionsArgs {
        #[inline]
        fn default() -> Self {
            SelectV2OptionsArgs {}
        }
    }

    pub struct SelectV2OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SelectV2OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SelectV2OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SelectV2OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SelectV2Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SelectV2Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SelectV2Options");
            ds.finish()
        }
    }
    pub enum DensifyOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DensifyOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DensifyOptions<'a> {
        type Inner = DensifyOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DensifyOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DensifyOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args DensifyOptionsArgs,
        ) -> flatbuffers::WIPOffset<DensifyOptions<'bldr>> {
            let mut builder = DensifyOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for DensifyOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct DensifyOptionsArgs {}
    impl<'a> Default for DensifyOptionsArgs {
        #[inline]
        fn default() -> Self {
            DensifyOptionsArgs {}
        }
    }

    pub struct DensifyOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DensifyOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> DensifyOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DensifyOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DensifyOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DensifyOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DensifyOptions");
            ds.finish()
        }
    }
    pub enum SegmentSumOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SegmentSumOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SegmentSumOptions<'a> {
        type Inner = SegmentSumOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SegmentSumOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SegmentSumOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SegmentSumOptionsArgs,
        ) -> flatbuffers::WIPOffset<SegmentSumOptions<'bldr>> {
            let mut builder = SegmentSumOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SegmentSumOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SegmentSumOptionsArgs {}
    impl<'a> Default for SegmentSumOptionsArgs {
        #[inline]
        fn default() -> Self {
            SegmentSumOptionsArgs {}
        }
    }

    pub struct SegmentSumOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SegmentSumOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SegmentSumOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SegmentSumOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SegmentSumOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SegmentSumOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SegmentSumOptions");
            ds.finish()
        }
    }
    pub enum BatchMatMulOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BatchMatMulOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BatchMatMulOptions<'a> {
        type Inner = BatchMatMulOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BatchMatMulOptions<'a> {
        pub const VT_ADJ_X: flatbuffers::VOffsetT = 4;
        pub const VT_ADJ_Y: flatbuffers::VOffsetT = 6;
        pub const VT_ASYMMETRIC_QUANTIZE_INPUTS: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BatchMatMulOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args BatchMatMulOptionsArgs,
        ) -> flatbuffers::WIPOffset<BatchMatMulOptions<'bldr>> {
            let mut builder = BatchMatMulOptionsBuilder::new(_fbb);
            builder.add_asymmetric_quantize_inputs(args.asymmetric_quantize_inputs);
            builder.add_adj_y(args.adj_y);
            builder.add_adj_x(args.adj_x);
            builder.finish()
        }

        #[inline]
        pub fn adj_x(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(BatchMatMulOptions::VT_ADJ_X, Some(false)).unwrap() }
        }
        #[inline]
        pub fn adj_y(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(BatchMatMulOptions::VT_ADJ_Y, Some(false)).unwrap() }
        }
        #[inline]
        pub fn asymmetric_quantize_inputs(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<bool>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, Some(false))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for BatchMatMulOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("adj_x", Self::VT_ADJ_X, false)?
                .visit_field::<bool>("adj_y", Self::VT_ADJ_Y, false)?
                .visit_field::<bool>(
                    "asymmetric_quantize_inputs",
                    Self::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct BatchMatMulOptionsArgs {
        pub adj_x: bool,
        pub adj_y: bool,
        pub asymmetric_quantize_inputs: bool,
    }
    impl<'a> Default for BatchMatMulOptionsArgs {
        #[inline]
        fn default() -> Self {
            BatchMatMulOptionsArgs { adj_x: false, adj_y: false, asymmetric_quantize_inputs: false }
        }
    }

    pub struct BatchMatMulOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BatchMatMulOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_adj_x(&mut self, adj_x: bool) {
            self.fbb_.push_slot::<bool>(BatchMatMulOptions::VT_ADJ_X, adj_x, false);
        }
        #[inline]
        pub fn add_adj_y(&mut self, adj_y: bool) {
            self.fbb_.push_slot::<bool>(BatchMatMulOptions::VT_ADJ_Y, adj_y, false);
        }
        #[inline]
        pub fn add_asymmetric_quantize_inputs(&mut self, asymmetric_quantize_inputs: bool) {
            self.fbb_.push_slot::<bool>(
                BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                asymmetric_quantize_inputs,
                false,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BatchMatMulOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BatchMatMulOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BatchMatMulOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BatchMatMulOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BatchMatMulOptions");
            ds.field("adj_x", &self.adj_x());
            ds.field("adj_y", &self.adj_y());
            ds.field("asymmetric_quantize_inputs", &self.asymmetric_quantize_inputs());
            ds.finish()
        }
    }
    pub enum CumsumOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct CumsumOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for CumsumOptions<'a> {
        type Inner = CumsumOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> CumsumOptions<'a> {
        pub const VT_EXCLUSIVE: flatbuffers::VOffsetT = 4;
        pub const VT_REVERSE: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            CumsumOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args CumsumOptionsArgs,
        ) -> flatbuffers::WIPOffset<CumsumOptions<'bldr>> {
            let mut builder = CumsumOptionsBuilder::new(_fbb);
            builder.add_reverse(args.reverse);
            builder.add_exclusive(args.exclusive);
            builder.finish()
        }

        #[inline]
        pub fn exclusive(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(CumsumOptions::VT_EXCLUSIVE, Some(false)).unwrap() }
        }
        #[inline]
        pub fn reverse(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(CumsumOptions::VT_REVERSE, Some(false)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for CumsumOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("exclusive", Self::VT_EXCLUSIVE, false)?
                .visit_field::<bool>("reverse", Self::VT_REVERSE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct CumsumOptionsArgs {
        pub exclusive: bool,
        pub reverse: bool,
    }
    impl<'a> Default for CumsumOptionsArgs {
        #[inline]
        fn default() -> Self {
            CumsumOptionsArgs { exclusive: false, reverse: false }
        }
    }

    pub struct CumsumOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> CumsumOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_exclusive(&mut self, exclusive: bool) {
            self.fbb_.push_slot::<bool>(CumsumOptions::VT_EXCLUSIVE, exclusive, false);
        }
        #[inline]
        pub fn add_reverse(&mut self, reverse: bool) {
            self.fbb_.push_slot::<bool>(CumsumOptions::VT_REVERSE, reverse, false);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> CumsumOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            CumsumOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<CumsumOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for CumsumOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("CumsumOptions");
            ds.field("exclusive", &self.exclusive());
            ds.field("reverse", &self.reverse());
            ds.finish()
        }
    }
    pub enum BroadcastToOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BroadcastToOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BroadcastToOptions<'a> {
        type Inner = BroadcastToOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BroadcastToOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BroadcastToOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args BroadcastToOptionsArgs,
        ) -> flatbuffers::WIPOffset<BroadcastToOptions<'bldr>> {
            let mut builder = BroadcastToOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for BroadcastToOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct BroadcastToOptionsArgs {}
    impl<'a> Default for BroadcastToOptionsArgs {
        #[inline]
        fn default() -> Self {
            BroadcastToOptionsArgs {}
        }
    }

    pub struct BroadcastToOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BroadcastToOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BroadcastToOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BroadcastToOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BroadcastToOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BroadcastToOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BroadcastToOptions");
            ds.finish()
        }
    }
    pub enum Rfft2dOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Rfft2dOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Rfft2dOptions<'a> {
        type Inner = Rfft2dOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Rfft2dOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Rfft2dOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args Rfft2dOptionsArgs,
        ) -> flatbuffers::WIPOffset<Rfft2dOptions<'bldr>> {
            let mut builder = Rfft2dOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for Rfft2dOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct Rfft2dOptionsArgs {}
    impl<'a> Default for Rfft2dOptionsArgs {
        #[inline]
        fn default() -> Self {
            Rfft2dOptionsArgs {}
        }
    }

    pub struct Rfft2dOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> Rfft2dOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> Rfft2dOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            Rfft2dOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Rfft2dOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Rfft2dOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Rfft2dOptions");
            ds.finish()
        }
    }
    pub enum HashtableOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct HashtableOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for HashtableOptions<'a> {
        type Inner = HashtableOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> HashtableOptions<'a> {
        pub const VT_TABLE_ID: flatbuffers::VOffsetT = 4;
        pub const VT_KEY_DTYPE: flatbuffers::VOffsetT = 6;
        pub const VT_VALUE_DTYPE: flatbuffers::VOffsetT = 8;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            HashtableOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args HashtableOptionsArgs,
        ) -> flatbuffers::WIPOffset<HashtableOptions<'bldr>> {
            let mut builder = HashtableOptionsBuilder::new(_fbb);
            builder.add_table_id(args.table_id);
            builder.add_value_dtype(args.value_dtype);
            builder.add_key_dtype(args.key_dtype);
            builder.finish()
        }

        #[inline]
        pub fn table_id(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(HashtableOptions::VT_TABLE_ID, Some(0)).unwrap() }
        }
        #[inline]
        pub fn key_dtype(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(HashtableOptions::VT_KEY_DTYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
        #[inline]
        pub fn value_dtype(&self) -> TensorType {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<TensorType>(HashtableOptions::VT_VALUE_DTYPE, Some(TensorType::FLOAT32))
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for HashtableOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i32>("table_id", Self::VT_TABLE_ID, false)?
                .visit_field::<TensorType>("key_dtype", Self::VT_KEY_DTYPE, false)?
                .visit_field::<TensorType>("value_dtype", Self::VT_VALUE_DTYPE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct HashtableOptionsArgs {
        pub table_id: i32,
        pub key_dtype: TensorType,
        pub value_dtype: TensorType,
    }
    impl<'a> Default for HashtableOptionsArgs {
        #[inline]
        fn default() -> Self {
            HashtableOptionsArgs {
                table_id: 0,
                key_dtype: TensorType::FLOAT32,
                value_dtype: TensorType::FLOAT32,
            }
        }
    }

    pub struct HashtableOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> HashtableOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_table_id(&mut self, table_id: i32) {
            self.fbb_.push_slot::<i32>(HashtableOptions::VT_TABLE_ID, table_id, 0);
        }
        #[inline]
        pub fn add_key_dtype(&mut self, key_dtype: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                HashtableOptions::VT_KEY_DTYPE,
                key_dtype,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn add_value_dtype(&mut self, value_dtype: TensorType) {
            self.fbb_.push_slot::<TensorType>(
                HashtableOptions::VT_VALUE_DTYPE,
                value_dtype,
                TensorType::FLOAT32,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> HashtableOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            HashtableOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<HashtableOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for HashtableOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("HashtableOptions");
            ds.field("table_id", &self.table_id());
            ds.field("key_dtype", &self.key_dtype());
            ds.field("value_dtype", &self.value_dtype());
            ds.finish()
        }
    }
    pub enum HashtableFindOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct HashtableFindOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for HashtableFindOptions<'a> {
        type Inner = HashtableFindOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> HashtableFindOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            HashtableFindOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args HashtableFindOptionsArgs,
        ) -> flatbuffers::WIPOffset<HashtableFindOptions<'bldr>> {
            let mut builder = HashtableFindOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for HashtableFindOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct HashtableFindOptionsArgs {}
    impl<'a> Default for HashtableFindOptionsArgs {
        #[inline]
        fn default() -> Self {
            HashtableFindOptionsArgs {}
        }
    }

    pub struct HashtableFindOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> HashtableFindOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> HashtableFindOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            HashtableFindOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<HashtableFindOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for HashtableFindOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("HashtableFindOptions");
            ds.finish()
        }
    }
    pub enum HashtableImportOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct HashtableImportOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for HashtableImportOptions<'a> {
        type Inner = HashtableImportOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> HashtableImportOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            HashtableImportOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args HashtableImportOptionsArgs,
        ) -> flatbuffers::WIPOffset<HashtableImportOptions<'bldr>> {
            let mut builder = HashtableImportOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for HashtableImportOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct HashtableImportOptionsArgs {}
    impl<'a> Default for HashtableImportOptionsArgs {
        #[inline]
        fn default() -> Self {
            HashtableImportOptionsArgs {}
        }
    }

    pub struct HashtableImportOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> HashtableImportOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> HashtableImportOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            HashtableImportOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<HashtableImportOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for HashtableImportOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("HashtableImportOptions");
            ds.finish()
        }
    }
    pub enum HashtableSizeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct HashtableSizeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for HashtableSizeOptions<'a> {
        type Inner = HashtableSizeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> HashtableSizeOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            HashtableSizeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args HashtableSizeOptionsArgs,
        ) -> flatbuffers::WIPOffset<HashtableSizeOptions<'bldr>> {
            let mut builder = HashtableSizeOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for HashtableSizeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct HashtableSizeOptionsArgs {}
    impl<'a> Default for HashtableSizeOptionsArgs {
        #[inline]
        fn default() -> Self {
            HashtableSizeOptionsArgs {}
        }
    }

    pub struct HashtableSizeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> HashtableSizeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> HashtableSizeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            HashtableSizeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<HashtableSizeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for HashtableSizeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("HashtableSizeOptions");
            ds.finish()
        }
    }
    pub enum VarHandleOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct VarHandleOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for VarHandleOptions<'a> {
        type Inner = VarHandleOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> VarHandleOptions<'a> {
        pub const VT_CONTAINER: flatbuffers::VOffsetT = 4;
        pub const VT_SHARED_NAME: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            VarHandleOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args VarHandleOptionsArgs<'args>,
        ) -> flatbuffers::WIPOffset<VarHandleOptions<'bldr>> {
            let mut builder = VarHandleOptionsBuilder::new(_fbb);
            if let Some(x) = args.shared_name {
                builder.add_shared_name(x);
            }
            if let Some(x) = args.container {
                builder.add_container(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn container(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<flatbuffers::ForwardsUOffset<&str>>(VarHandleOptions::VT_CONTAINER, None)
            }
        }
        #[inline]
        pub fn shared_name(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<&str>>(
                    VarHandleOptions::VT_SHARED_NAME,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for VarHandleOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
                    "container",
                    Self::VT_CONTAINER,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
                    "shared_name",
                    Self::VT_SHARED_NAME,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct VarHandleOptionsArgs<'a> {
        pub container: Option<flatbuffers::WIPOffset<&'a str>>,
        pub shared_name: Option<flatbuffers::WIPOffset<&'a str>>,
    }
    impl<'a> Default for VarHandleOptionsArgs<'a> {
        #[inline]
        fn default() -> Self {
            VarHandleOptionsArgs { container: None, shared_name: None }
        }
    }

    pub struct VarHandleOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> VarHandleOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_container(&mut self, container: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                VarHandleOptions::VT_CONTAINER,
                container,
            );
        }
        #[inline]
        pub fn add_shared_name(&mut self, shared_name: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                VarHandleOptions::VT_SHARED_NAME,
                shared_name,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> VarHandleOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            VarHandleOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<VarHandleOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for VarHandleOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("VarHandleOptions");
            ds.field("container", &self.container());
            ds.field("shared_name", &self.shared_name());
            ds.finish()
        }
    }
    pub enum ReadVariableOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ReadVariableOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ReadVariableOptions<'a> {
        type Inner = ReadVariableOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ReadVariableOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ReadVariableOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ReadVariableOptionsArgs,
        ) -> flatbuffers::WIPOffset<ReadVariableOptions<'bldr>> {
            let mut builder = ReadVariableOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ReadVariableOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ReadVariableOptionsArgs {}
    impl<'a> Default for ReadVariableOptionsArgs {
        #[inline]
        fn default() -> Self {
            ReadVariableOptionsArgs {}
        }
    }

    pub struct ReadVariableOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ReadVariableOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ReadVariableOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ReadVariableOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ReadVariableOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ReadVariableOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ReadVariableOptions");
            ds.finish()
        }
    }
    pub enum AssignVariableOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct AssignVariableOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for AssignVariableOptions<'a> {
        type Inner = AssignVariableOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> AssignVariableOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            AssignVariableOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args AssignVariableOptionsArgs,
        ) -> flatbuffers::WIPOffset<AssignVariableOptions<'bldr>> {
            let mut builder = AssignVariableOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for AssignVariableOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct AssignVariableOptionsArgs {}
    impl<'a> Default for AssignVariableOptionsArgs {
        #[inline]
        fn default() -> Self {
            AssignVariableOptionsArgs {}
        }
    }

    pub struct AssignVariableOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> AssignVariableOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> AssignVariableOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            AssignVariableOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<AssignVariableOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for AssignVariableOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("AssignVariableOptions");
            ds.finish()
        }
    }
    pub enum RandomOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct RandomOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for RandomOptions<'a> {
        type Inner = RandomOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> RandomOptions<'a> {
        pub const VT_SEED: flatbuffers::VOffsetT = 4;
        pub const VT_SEED2: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            RandomOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args RandomOptionsArgs,
        ) -> flatbuffers::WIPOffset<RandomOptions<'bldr>> {
            let mut builder = RandomOptionsBuilder::new(_fbb);
            builder.add_seed2(args.seed2);
            builder.add_seed(args.seed);
            builder.finish()
        }

        #[inline]
        pub fn seed(&self) -> i64 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i64>(RandomOptions::VT_SEED, Some(0)).unwrap() }
        }
        #[inline]
        pub fn seed2(&self) -> i64 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i64>(RandomOptions::VT_SEED2, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for RandomOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i64>("seed", Self::VT_SEED, false)?
                .visit_field::<i64>("seed2", Self::VT_SEED2, false)?
                .finish();
            Ok(())
        }
    }
    pub struct RandomOptionsArgs {
        pub seed: i64,
        pub seed2: i64,
    }
    impl<'a> Default for RandomOptionsArgs {
        #[inline]
        fn default() -> Self {
            RandomOptionsArgs { seed: 0, seed2: 0 }
        }
    }

    pub struct RandomOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> RandomOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_seed(&mut self, seed: i64) {
            self.fbb_.push_slot::<i64>(RandomOptions::VT_SEED, seed, 0);
        }
        #[inline]
        pub fn add_seed2(&mut self, seed2: i64) {
            self.fbb_.push_slot::<i64>(RandomOptions::VT_SEED2, seed2, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> RandomOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            RandomOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<RandomOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for RandomOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("RandomOptions");
            ds.field("seed", &self.seed());
            ds.field("seed2", &self.seed2());
            ds.finish()
        }
    }
    pub enum BucketizeOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BucketizeOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BucketizeOptions<'a> {
        type Inner = BucketizeOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BucketizeOptions<'a> {
        pub const VT_BOUNDARIES: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BucketizeOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args BucketizeOptionsArgs<'args>,
        ) -> flatbuffers::WIPOffset<BucketizeOptions<'bldr>> {
            let mut builder = BucketizeOptionsBuilder::new(_fbb);
            if let Some(x) = args.boundaries {
                builder.add_boundaries(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn boundaries(&self) -> Option<flatbuffers::Vector<'a, f32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, f32>>>(
                    BucketizeOptions::VT_BOUNDARIES,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for BucketizeOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, f32>>>(
                    "boundaries",
                    Self::VT_BOUNDARIES,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct BucketizeOptionsArgs<'a> {
        pub boundaries: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, f32>>>,
    }
    impl<'a> Default for BucketizeOptionsArgs<'a> {
        #[inline]
        fn default() -> Self {
            BucketizeOptionsArgs { boundaries: None }
        }
    }

    pub struct BucketizeOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BucketizeOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_boundaries(
            &mut self,
            boundaries: flatbuffers::WIPOffset<flatbuffers::Vector<'b, f32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                BucketizeOptions::VT_BOUNDARIES,
                boundaries,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BucketizeOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BucketizeOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BucketizeOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BucketizeOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BucketizeOptions");
            ds.field("boundaries", &self.boundaries());
            ds.finish()
        }
    }
    pub enum GeluOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct GeluOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for GeluOptions<'a> {
        type Inner = GeluOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> GeluOptions<'a> {
        pub const VT_APPROXIMATE: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            GeluOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args GeluOptionsArgs,
        ) -> flatbuffers::WIPOffset<GeluOptions<'bldr>> {
            let mut builder = GeluOptionsBuilder::new(_fbb);
            builder.add_approximate(args.approximate);
            builder.finish()
        }

        #[inline]
        pub fn approximate(&self) -> bool {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<bool>(GeluOptions::VT_APPROXIMATE, Some(false)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for GeluOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<bool>("approximate", Self::VT_APPROXIMATE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct GeluOptionsArgs {
        pub approximate: bool,
    }
    impl<'a> Default for GeluOptionsArgs {
        #[inline]
        fn default() -> Self {
            GeluOptionsArgs { approximate: false }
        }
    }

    pub struct GeluOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> GeluOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn add_approximate(&mut self, approximate: bool) {
            self.fbb_.push_slot::<bool>(GeluOptions::VT_APPROXIMATE, approximate, false);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> GeluOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            GeluOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<GeluOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for GeluOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("GeluOptions");
            ds.field("approximate", &self.approximate());
            ds.finish()
        }
    }
    pub enum DynamicUpdateSliceOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct DynamicUpdateSliceOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for DynamicUpdateSliceOptions<'a> {
        type Inner = DynamicUpdateSliceOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> DynamicUpdateSliceOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            DynamicUpdateSliceOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args DynamicUpdateSliceOptionsArgs,
        ) -> flatbuffers::WIPOffset<DynamicUpdateSliceOptions<'bldr>> {
            let mut builder = DynamicUpdateSliceOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for DynamicUpdateSliceOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct DynamicUpdateSliceOptionsArgs {}
    impl<'a> Default for DynamicUpdateSliceOptionsArgs {
        #[inline]
        fn default() -> Self {
            DynamicUpdateSliceOptionsArgs {}
        }
    }

    pub struct DynamicUpdateSliceOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> DynamicUpdateSliceOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> DynamicUpdateSliceOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            DynamicUpdateSliceOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<DynamicUpdateSliceOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for DynamicUpdateSliceOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("DynamicUpdateSliceOptions");
            ds.finish()
        }
    }
    pub enum UnsortedSegmentProdOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UnsortedSegmentProdOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UnsortedSegmentProdOptions<'a> {
        type Inner = UnsortedSegmentProdOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UnsortedSegmentProdOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UnsortedSegmentProdOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args UnsortedSegmentProdOptionsArgs,
        ) -> flatbuffers::WIPOffset<UnsortedSegmentProdOptions<'bldr>> {
            let mut builder = UnsortedSegmentProdOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for UnsortedSegmentProdOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct UnsortedSegmentProdOptionsArgs {}
    impl<'a> Default for UnsortedSegmentProdOptionsArgs {
        #[inline]
        fn default() -> Self {
            UnsortedSegmentProdOptionsArgs {}
        }
    }

    pub struct UnsortedSegmentProdOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UnsortedSegmentProdOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UnsortedSegmentProdOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UnsortedSegmentProdOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UnsortedSegmentProdOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UnsortedSegmentProdOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UnsortedSegmentProdOptions");
            ds.finish()
        }
    }
    pub enum UnsortedSegmentMaxOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UnsortedSegmentMaxOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UnsortedSegmentMaxOptions<'a> {
        type Inner = UnsortedSegmentMaxOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UnsortedSegmentMaxOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UnsortedSegmentMaxOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args UnsortedSegmentMaxOptionsArgs,
        ) -> flatbuffers::WIPOffset<UnsortedSegmentMaxOptions<'bldr>> {
            let mut builder = UnsortedSegmentMaxOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for UnsortedSegmentMaxOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct UnsortedSegmentMaxOptionsArgs {}
    impl<'a> Default for UnsortedSegmentMaxOptionsArgs {
        #[inline]
        fn default() -> Self {
            UnsortedSegmentMaxOptionsArgs {}
        }
    }

    pub struct UnsortedSegmentMaxOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UnsortedSegmentMaxOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UnsortedSegmentMaxOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UnsortedSegmentMaxOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UnsortedSegmentMaxOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UnsortedSegmentMaxOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UnsortedSegmentMaxOptions");
            ds.finish()
        }
    }
    pub enum UnsortedSegmentSumOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UnsortedSegmentSumOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UnsortedSegmentSumOptions<'a> {
        type Inner = UnsortedSegmentSumOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UnsortedSegmentSumOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UnsortedSegmentSumOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args UnsortedSegmentSumOptionsArgs,
        ) -> flatbuffers::WIPOffset<UnsortedSegmentSumOptions<'bldr>> {
            let mut builder = UnsortedSegmentSumOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for UnsortedSegmentSumOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct UnsortedSegmentSumOptionsArgs {}
    impl<'a> Default for UnsortedSegmentSumOptionsArgs {
        #[inline]
        fn default() -> Self {
            UnsortedSegmentSumOptionsArgs {}
        }
    }

    pub struct UnsortedSegmentSumOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UnsortedSegmentSumOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UnsortedSegmentSumOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UnsortedSegmentSumOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UnsortedSegmentSumOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UnsortedSegmentSumOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UnsortedSegmentSumOptions");
            ds.finish()
        }
    }
    pub enum ATan2OptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct ATan2Options<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for ATan2Options<'a> {
        type Inner = ATan2Options<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> ATan2Options<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            ATan2Options { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args ATan2OptionsArgs,
        ) -> flatbuffers::WIPOffset<ATan2Options<'bldr>> {
            let mut builder = ATan2OptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for ATan2Options<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct ATan2OptionsArgs {}
    impl<'a> Default for ATan2OptionsArgs {
        #[inline]
        fn default() -> Self {
            ATan2OptionsArgs {}
        }
    }

    pub struct ATan2OptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ATan2OptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> ATan2OptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ATan2OptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<ATan2Options<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for ATan2Options<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("ATan2Options");
            ds.finish()
        }
    }
    pub enum UnsortedSegmentMinOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct UnsortedSegmentMinOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for UnsortedSegmentMinOptions<'a> {
        type Inner = UnsortedSegmentMinOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> UnsortedSegmentMinOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            UnsortedSegmentMinOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args UnsortedSegmentMinOptionsArgs,
        ) -> flatbuffers::WIPOffset<UnsortedSegmentMinOptions<'bldr>> {
            let mut builder = UnsortedSegmentMinOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for UnsortedSegmentMinOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct UnsortedSegmentMinOptionsArgs {}
    impl<'a> Default for UnsortedSegmentMinOptionsArgs {
        #[inline]
        fn default() -> Self {
            UnsortedSegmentMinOptionsArgs {}
        }
    }

    pub struct UnsortedSegmentMinOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> UnsortedSegmentMinOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> UnsortedSegmentMinOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            UnsortedSegmentMinOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<UnsortedSegmentMinOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for UnsortedSegmentMinOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("UnsortedSegmentMinOptions");
            ds.finish()
        }
    }
    pub enum SignOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SignOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SignOptions<'a> {
        type Inner = SignOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SignOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SignOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args SignOptionsArgs,
        ) -> flatbuffers::WIPOffset<SignOptions<'bldr>> {
            let mut builder = SignOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for SignOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct SignOptionsArgs {}
    impl<'a> Default for SignOptionsArgs {
        #[inline]
        fn default() -> Self {
            SignOptionsArgs {}
        }
    }

    pub struct SignOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SignOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SignOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SignOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SignOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SignOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SignOptions");
            ds.finish()
        }
    }
    pub enum BitcastOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BitcastOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BitcastOptions<'a> {
        type Inner = BitcastOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BitcastOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BitcastOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args BitcastOptionsArgs,
        ) -> flatbuffers::WIPOffset<BitcastOptions<'bldr>> {
            let mut builder = BitcastOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for BitcastOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct BitcastOptionsArgs {}
    impl<'a> Default for BitcastOptionsArgs {
        #[inline]
        fn default() -> Self {
            BitcastOptionsArgs {}
        }
    }

    pub struct BitcastOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BitcastOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BitcastOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BitcastOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BitcastOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BitcastOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BitcastOptions");
            ds.finish()
        }
    }
    pub enum BitwiseXorOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct BitwiseXorOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for BitwiseXorOptions<'a> {
        type Inner = BitwiseXorOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> BitwiseXorOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            BitwiseXorOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args BitwiseXorOptionsArgs,
        ) -> flatbuffers::WIPOffset<BitwiseXorOptions<'bldr>> {
            let mut builder = BitwiseXorOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for BitwiseXorOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct BitwiseXorOptionsArgs {}
    impl<'a> Default for BitwiseXorOptionsArgs {
        #[inline]
        fn default() -> Self {
            BitwiseXorOptionsArgs {}
        }
    }

    pub struct BitwiseXorOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BitwiseXorOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> BitwiseXorOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BitwiseXorOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<BitwiseXorOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for BitwiseXorOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("BitwiseXorOptions");
            ds.finish()
        }
    }
    pub enum RightShiftOptionsOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct RightShiftOptions<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for RightShiftOptions<'a> {
        type Inner = RightShiftOptions<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> RightShiftOptions<'a> {
        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            RightShiftOptions { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            _args: &'args RightShiftOptionsArgs,
        ) -> flatbuffers::WIPOffset<RightShiftOptions<'bldr>> {
            let mut builder = RightShiftOptionsBuilder::new(_fbb);
            builder.finish()
        }
    }

    impl flatbuffers::Verifiable for RightShiftOptions<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?.finish();
            Ok(())
        }
    }
    pub struct RightShiftOptionsArgs {}
    impl<'a> Default for RightShiftOptionsArgs {
        #[inline]
        fn default() -> Self {
            RightShiftOptionsArgs {}
        }
    }

    pub struct RightShiftOptionsBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> RightShiftOptionsBuilder<'a, 'b> {
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> RightShiftOptionsBuilder<'a, 'b> {
            let start = _fbb.start_table();
            RightShiftOptionsBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<RightShiftOptions<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for RightShiftOptions<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("RightShiftOptions");
            ds.finish()
        }
    }
    pub enum OperatorCodeOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct OperatorCode<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for OperatorCode<'a> {
        type Inner = OperatorCode<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> OperatorCode<'a> {
        pub const VT_DEPRECATED_BUILTIN_CODE: flatbuffers::VOffsetT = 4;
        pub const VT_CUSTOM_CODE: flatbuffers::VOffsetT = 6;
        pub const VT_VERSION: flatbuffers::VOffsetT = 8;
        pub const VT_BUILTIN_CODE: flatbuffers::VOffsetT = 10;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            OperatorCode { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args OperatorCodeArgs<'args>,
        ) -> flatbuffers::WIPOffset<OperatorCode<'bldr>> {
            let mut builder = OperatorCodeBuilder::new(_fbb);
            builder.add_builtin_code(args.builtin_code);
            builder.add_version(args.version);
            if let Some(x) = args.custom_code {
                builder.add_custom_code(x);
            }
            builder.add_deprecated_builtin_code(args.deprecated_builtin_code);
            builder.finish()
        }

        #[inline]
        pub fn deprecated_builtin_code(&self) -> i8 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<i8>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, Some(0)).unwrap()
            }
        }
        #[inline]
        pub fn custom_code(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<flatbuffers::ForwardsUOffset<&str>>(OperatorCode::VT_CUSTOM_CODE, None)
            }
        }
        #[inline]
        pub fn version(&self) -> i32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<i32>(OperatorCode::VT_VERSION, Some(1)).unwrap() }
        }
        #[inline]
        pub fn builtin_code(&self) -> BuiltinOperator {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<BuiltinOperator>(
                        OperatorCode::VT_BUILTIN_CODE,
                        Some(BuiltinOperator::ADD),
                    )
                    .unwrap()
            }
        }
    }

    impl flatbuffers::Verifiable for OperatorCode<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<i8>(
                    "deprecated_builtin_code",
                    Self::VT_DEPRECATED_BUILTIN_CODE,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
                    "custom_code",
                    Self::VT_CUSTOM_CODE,
                    false,
                )?
                .visit_field::<i32>("version", Self::VT_VERSION, false)?
                .visit_field::<BuiltinOperator>("builtin_code", Self::VT_BUILTIN_CODE, false)?
                .finish();
            Ok(())
        }
    }
    pub struct OperatorCodeArgs<'a> {
        pub deprecated_builtin_code: i8,
        pub custom_code: Option<flatbuffers::WIPOffset<&'a str>>,
        pub version: i32,
        pub builtin_code: BuiltinOperator,
    }
    impl<'a> Default for OperatorCodeArgs<'a> {
        #[inline]
        fn default() -> Self {
            OperatorCodeArgs {
                deprecated_builtin_code: 0,
                custom_code: None,
                version: 1,
                builtin_code: BuiltinOperator::ADD,
            }
        }
    }

    pub struct OperatorCodeBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> OperatorCodeBuilder<'a, 'b> {
        #[inline]
        pub fn add_deprecated_builtin_code(&mut self, deprecated_builtin_code: i8) {
            self.fbb_.push_slot::<i8>(
                OperatorCode::VT_DEPRECATED_BUILTIN_CODE,
                deprecated_builtin_code,
                0,
            );
        }
        #[inline]
        pub fn add_custom_code(&mut self, custom_code: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                OperatorCode::VT_CUSTOM_CODE,
                custom_code,
            );
        }
        #[inline]
        pub fn add_version(&mut self, version: i32) {
            self.fbb_.push_slot::<i32>(OperatorCode::VT_VERSION, version, 1);
        }
        #[inline]
        pub fn add_builtin_code(&mut self, builtin_code: BuiltinOperator) {
            self.fbb_.push_slot::<BuiltinOperator>(
                OperatorCode::VT_BUILTIN_CODE,
                builtin_code,
                BuiltinOperator::ADD,
            );
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> OperatorCodeBuilder<'a, 'b> {
            let start = _fbb.start_table();
            OperatorCodeBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<OperatorCode<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for OperatorCode<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("OperatorCode");
            ds.field("deprecated_builtin_code", &self.deprecated_builtin_code());
            ds.field("custom_code", &self.custom_code());
            ds.field("version", &self.version());
            ds.field("builtin_code", &self.builtin_code());
            ds.finish()
        }
    }
    pub enum OperatorOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Operator<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Operator<'a> {
        type Inner = Operator<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Operator<'a> {
        pub const VT_OPCODE_INDEX: flatbuffers::VOffsetT = 4;
        pub const VT_INPUTS: flatbuffers::VOffsetT = 6;
        pub const VT_OUTPUTS: flatbuffers::VOffsetT = 8;
        pub const VT_BUILTIN_OPTIONS_TYPE: flatbuffers::VOffsetT = 10;
        pub const VT_BUILTIN_OPTIONS: flatbuffers::VOffsetT = 12;
        pub const VT_CUSTOM_OPTIONS: flatbuffers::VOffsetT = 14;
        pub const VT_CUSTOM_OPTIONS_FORMAT: flatbuffers::VOffsetT = 16;
        pub const VT_MUTATING_VARIABLE_INPUTS: flatbuffers::VOffsetT = 18;
        pub const VT_INTERMEDIATES: flatbuffers::VOffsetT = 20;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Operator { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args OperatorArgs<'args>,
        ) -> flatbuffers::WIPOffset<Operator<'bldr>> {
            let mut builder = OperatorBuilder::new(_fbb);
            if let Some(x) = args.intermediates {
                builder.add_intermediates(x);
            }
            if let Some(x) = args.mutating_variable_inputs {
                builder.add_mutating_variable_inputs(x);
            }
            if let Some(x) = args.custom_options {
                builder.add_custom_options(x);
            }
            if let Some(x) = args.builtin_options {
                builder.add_builtin_options(x);
            }
            if let Some(x) = args.outputs {
                builder.add_outputs(x);
            }
            if let Some(x) = args.inputs {
                builder.add_inputs(x);
            }
            builder.add_opcode_index(args.opcode_index);
            builder.add_custom_options_format(args.custom_options_format);
            builder.add_builtin_options_type(args.builtin_options_type);
            builder.finish()
        }

        #[inline]
        pub fn opcode_index(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(Operator::VT_OPCODE_INDEX, Some(0)).unwrap() }
        }
        #[inline]
        pub fn inputs(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Operator::VT_INPUTS,
                    None,
                )
            }
        }
        #[inline]
        pub fn outputs(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Operator::VT_OUTPUTS,
                    None,
                )
            }
        }
        #[inline]
        pub fn builtin_options_type(&self) -> BuiltinOptions {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<BuiltinOptions>(
                        Operator::VT_BUILTIN_OPTIONS_TYPE,
                        Some(BuiltinOptions::NONE),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn builtin_options(&self) -> Option<flatbuffers::Table<'a>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
                    Operator::VT_BUILTIN_OPTIONS,
                    None,
                )
            }
        }
        #[inline]
        pub fn custom_options(&self) -> Option<flatbuffers::Vector<'a, u8>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(
                    Operator::VT_CUSTOM_OPTIONS,
                    None,
                )
            }
        }
        #[inline]
        pub fn custom_options_format(&self) -> CustomOptionsFormat {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<CustomOptionsFormat>(
                        Operator::VT_CUSTOM_OPTIONS_FORMAT,
                        Some(CustomOptionsFormat::FLEXBUFFERS),
                    )
                    .unwrap()
            }
        }
        #[inline]
        pub fn mutating_variable_inputs(&self) -> Option<flatbuffers::Vector<'a, bool>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, bool>>>(
                    Operator::VT_MUTATING_VARIABLE_INPUTS,
                    None,
                )
            }
        }
        #[inline]
        pub fn intermediates(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Operator::VT_INTERMEDIATES,
                    None,
                )
            }
        }
        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_conv_2_doptions(&self) -> Option<Conv2DOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::Conv2DOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Conv2DOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_depthwise_conv_2_doptions(
            &self,
        ) -> Option<DepthwiseConv2DOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::DepthwiseConv2DOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { DepthwiseConv2DOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_concat_embeddings_options(
            &self,
        ) -> Option<ConcatEmbeddingsOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ConcatEmbeddingsOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ConcatEmbeddingsOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_lshprojection_options(&self) -> Option<LSHProjectionOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LSHProjectionOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LSHProjectionOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_pool_2_doptions(&self) -> Option<Pool2DOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::Pool2DOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Pool2DOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_svdfoptions(&self) -> Option<SVDFOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SVDFOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SVDFOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_rnnoptions(&self) -> Option<RNNOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::RNNOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { RNNOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_fully_connected_options(
            &self,
        ) -> Option<FullyConnectedOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::FullyConnectedOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { FullyConnectedOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_softmax_options(&self) -> Option<SoftmaxOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SoftmaxOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SoftmaxOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_concatenation_options(&self) -> Option<ConcatenationOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ConcatenationOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ConcatenationOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_add_options(&self) -> Option<AddOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::AddOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { AddOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_l2_norm_options(&self) -> Option<L2NormOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::L2NormOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { L2NormOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_local_response_normalization_options(
            &self,
        ) -> Option<LocalResponseNormalizationOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LocalResponseNormalizationOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LocalResponseNormalizationOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_lstmoptions(&self) -> Option<LSTMOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LSTMOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LSTMOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_resize_bilinear_options(
            &self,
        ) -> Option<ResizeBilinearOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ResizeBilinearOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ResizeBilinearOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_call_options(&self) -> Option<CallOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::CallOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { CallOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_reshape_options(&self) -> Option<ReshapeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ReshapeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ReshapeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_skip_gram_options(&self) -> Option<SkipGramOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SkipGramOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SkipGramOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_space_to_depth_options(&self) -> Option<SpaceToDepthOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SpaceToDepthOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SpaceToDepthOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_embedding_lookup_sparse_options(
            &self,
        ) -> Option<EmbeddingLookupSparseOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::EmbeddingLookupSparseOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { EmbeddingLookupSparseOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_mul_options(&self) -> Option<MulOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::MulOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { MulOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_pad_options(&self) -> Option<PadOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::PadOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { PadOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_gather_options(&self) -> Option<GatherOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::GatherOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { GatherOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_batch_to_space_ndoptions(
            &self,
        ) -> Option<BatchToSpaceNDOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BatchToSpaceNDOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BatchToSpaceNDOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_space_to_batch_ndoptions(
            &self,
        ) -> Option<SpaceToBatchNDOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SpaceToBatchNDOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SpaceToBatchNDOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_transpose_options(&self) -> Option<TransposeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::TransposeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { TransposeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_reducer_options(&self) -> Option<ReducerOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ReducerOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ReducerOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_sub_options(&self) -> Option<SubOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SubOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SubOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_div_options(&self) -> Option<DivOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::DivOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { DivOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_squeeze_options(&self) -> Option<SqueezeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SqueezeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SqueezeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_sequence_rnnoptions(&self) -> Option<SequenceRNNOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SequenceRNNOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SequenceRNNOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_strided_slice_options(&self) -> Option<StridedSliceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::StridedSliceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { StridedSliceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_exp_options(&self) -> Option<ExpOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ExpOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ExpOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_top_kv2_options(&self) -> Option<TopKV2Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::TopKV2Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { TopKV2Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_split_options(&self) -> Option<SplitOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SplitOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SplitOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_log_softmax_options(&self) -> Option<LogSoftmaxOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LogSoftmaxOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LogSoftmaxOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_cast_options(&self) -> Option<CastOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::CastOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { CastOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_dequantize_options(&self) -> Option<DequantizeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::DequantizeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { DequantizeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_maximum_minimum_options(
            &self,
        ) -> Option<MaximumMinimumOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::MaximumMinimumOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { MaximumMinimumOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_arg_max_options(&self) -> Option<ArgMaxOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ArgMaxOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ArgMaxOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_less_options(&self) -> Option<LessOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LessOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LessOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_neg_options(&self) -> Option<NegOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::NegOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { NegOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_pad_v2_options(&self) -> Option<PadV2Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::PadV2Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { PadV2Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_greater_options(&self) -> Option<GreaterOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::GreaterOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { GreaterOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_greater_equal_options(&self) -> Option<GreaterEqualOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::GreaterEqualOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { GreaterEqualOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_less_equal_options(&self) -> Option<LessEqualOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LessEqualOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LessEqualOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_select_options(&self) -> Option<SelectOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SelectOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SelectOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_slice_options(&self) -> Option<SliceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SliceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SliceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_transpose_conv_options(
            &self,
        ) -> Option<TransposeConvOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::TransposeConvOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { TransposeConvOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_sparse_to_dense_options(
            &self,
        ) -> Option<SparseToDenseOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SparseToDenseOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SparseToDenseOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_tile_options(&self) -> Option<TileOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::TileOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { TileOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_expand_dims_options(&self) -> Option<ExpandDimsOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ExpandDimsOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ExpandDimsOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_equal_options(&self) -> Option<EqualOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::EqualOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { EqualOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_not_equal_options(&self) -> Option<NotEqualOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::NotEqualOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { NotEqualOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_shape_options(&self) -> Option<ShapeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ShapeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ShapeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_pow_options(&self) -> Option<PowOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::PowOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { PowOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_arg_min_options(&self) -> Option<ArgMinOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ArgMinOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ArgMinOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_fake_quant_options(&self) -> Option<FakeQuantOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::FakeQuantOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { FakeQuantOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_pack_options(&self) -> Option<PackOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::PackOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { PackOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_logical_or_options(&self) -> Option<LogicalOrOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LogicalOrOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LogicalOrOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_one_hot_options(&self) -> Option<OneHotOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::OneHotOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { OneHotOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_logical_and_options(&self) -> Option<LogicalAndOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LogicalAndOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LogicalAndOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_logical_not_options(&self) -> Option<LogicalNotOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LogicalNotOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LogicalNotOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unpack_options(&self) -> Option<UnpackOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UnpackOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UnpackOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_floor_div_options(&self) -> Option<FloorDivOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::FloorDivOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { FloorDivOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_square_options(&self) -> Option<SquareOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SquareOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SquareOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_zeros_like_options(&self) -> Option<ZerosLikeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ZerosLikeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ZerosLikeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_fill_options(&self) -> Option<FillOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::FillOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { FillOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_bidirectional_sequence_lstmoptions(
            &self,
        ) -> Option<BidirectionalSequenceLSTMOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BidirectionalSequenceLSTMOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BidirectionalSequenceLSTMOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_bidirectional_sequence_rnnoptions(
            &self,
        ) -> Option<BidirectionalSequenceRNNOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BidirectionalSequenceRNNOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BidirectionalSequenceRNNOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unidirectional_sequence_lstmoptions(
            &self,
        ) -> Option<UnidirectionalSequenceLSTMOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UnidirectionalSequenceLSTMOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UnidirectionalSequenceLSTMOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_floor_mod_options(&self) -> Option<FloorModOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::FloorModOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { FloorModOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_range_options(&self) -> Option<RangeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::RangeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { RangeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_resize_nearest_neighbor_options(
            &self,
        ) -> Option<ResizeNearestNeighborOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ResizeNearestNeighborOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ResizeNearestNeighborOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_leaky_relu_options(&self) -> Option<LeakyReluOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::LeakyReluOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { LeakyReluOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_squared_difference_options(
            &self,
        ) -> Option<SquaredDifferenceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SquaredDifferenceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SquaredDifferenceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_mirror_pad_options(&self) -> Option<MirrorPadOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::MirrorPadOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { MirrorPadOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_abs_options(&self) -> Option<AbsOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::AbsOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { AbsOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_split_voptions(&self) -> Option<SplitVOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SplitVOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SplitVOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unique_options(&self) -> Option<UniqueOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UniqueOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UniqueOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_reverse_v2_options(&self) -> Option<ReverseV2Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ReverseV2Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ReverseV2Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_add_noptions(&self) -> Option<AddNOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::AddNOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { AddNOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_gather_nd_options(&self) -> Option<GatherNdOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::GatherNdOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { GatherNdOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_cos_options(&self) -> Option<CosOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::CosOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { CosOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_where_options(&self) -> Option<WhereOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::WhereOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { WhereOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_rank_options(&self) -> Option<RankOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::RankOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { RankOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_reverse_sequence_options(
            &self,
        ) -> Option<ReverseSequenceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ReverseSequenceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ReverseSequenceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_matrix_diag_options(&self) -> Option<MatrixDiagOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::MatrixDiagOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { MatrixDiagOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_quantize_options(&self) -> Option<QuantizeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::QuantizeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { QuantizeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_matrix_set_diag_options(
            &self,
        ) -> Option<MatrixSetDiagOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::MatrixSetDiagOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { MatrixSetDiagOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_hard_swish_options(&self) -> Option<HardSwishOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::HardSwishOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { HardSwishOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_if_options(&self) -> Option<IfOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::IfOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { IfOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_while_options(&self) -> Option<WhileOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::WhileOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { WhileOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_depth_to_space_options(&self) -> Option<DepthToSpaceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::DepthToSpaceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { DepthToSpaceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_non_max_suppression_v4_options(
            &self,
        ) -> Option<NonMaxSuppressionV4Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::NonMaxSuppressionV4Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { NonMaxSuppressionV4Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_non_max_suppression_v5_options(
            &self,
        ) -> Option<NonMaxSuppressionV5Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::NonMaxSuppressionV5Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { NonMaxSuppressionV5Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_scatter_nd_options(&self) -> Option<ScatterNdOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ScatterNdOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ScatterNdOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_select_v2_options(&self) -> Option<SelectV2Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SelectV2Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SelectV2Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_densify_options(&self) -> Option<DensifyOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::DensifyOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { DensifyOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_segment_sum_options(&self) -> Option<SegmentSumOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SegmentSumOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SegmentSumOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_batch_mat_mul_options(&self) -> Option<BatchMatMulOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BatchMatMulOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BatchMatMulOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_cumsum_options(&self) -> Option<CumsumOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::CumsumOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { CumsumOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_call_once_options(&self) -> Option<CallOnceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::CallOnceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { CallOnceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_broadcast_to_options(&self) -> Option<BroadcastToOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BroadcastToOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BroadcastToOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_rfft_2d_options(&self) -> Option<Rfft2dOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::Rfft2dOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Rfft2dOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_conv_3_doptions(&self) -> Option<Conv3DOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::Conv3DOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { Conv3DOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_hashtable_options(&self) -> Option<HashtableOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::HashtableOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { HashtableOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_hashtable_find_options(
            &self,
        ) -> Option<HashtableFindOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::HashtableFindOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { HashtableFindOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_hashtable_import_options(
            &self,
        ) -> Option<HashtableImportOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::HashtableImportOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { HashtableImportOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_hashtable_size_options(
            &self,
        ) -> Option<HashtableSizeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::HashtableSizeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { HashtableSizeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_var_handle_options(&self) -> Option<VarHandleOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::VarHandleOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { VarHandleOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_read_variable_options(&self) -> Option<ReadVariableOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ReadVariableOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ReadVariableOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_assign_variable_options(
            &self,
        ) -> Option<AssignVariableOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::AssignVariableOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { AssignVariableOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_random_options(&self) -> Option<RandomOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::RandomOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { RandomOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_bucketize_options(&self) -> Option<BucketizeOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BucketizeOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BucketizeOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_gelu_options(&self) -> Option<GeluOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::GeluOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { GeluOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_dynamic_update_slice_options(
            &self,
        ) -> Option<DynamicUpdateSliceOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::DynamicUpdateSliceOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { DynamicUpdateSliceOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unsorted_segment_prod_options(
            &self,
        ) -> Option<UnsortedSegmentProdOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UnsortedSegmentProdOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UnsortedSegmentProdOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unsorted_segment_max_options(
            &self,
        ) -> Option<UnsortedSegmentMaxOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UnsortedSegmentMaxOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UnsortedSegmentMaxOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unsorted_segment_min_options(
            &self,
        ) -> Option<UnsortedSegmentMinOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UnsortedSegmentMinOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UnsortedSegmentMinOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_unsorted_segment_sum_options(
            &self,
        ) -> Option<UnsortedSegmentSumOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::UnsortedSegmentSumOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { UnsortedSegmentSumOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_atan_2_options(&self) -> Option<ATan2Options<'a>> {
            if self.builtin_options_type() == BuiltinOptions::ATan2Options {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { ATan2Options::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_sign_options(&self) -> Option<SignOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::SignOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { SignOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_bitcast_options(&self) -> Option<BitcastOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BitcastOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BitcastOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_bitwise_xor_options(&self) -> Option<BitwiseXorOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::BitwiseXorOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { BitwiseXorOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }

        #[inline]
        #[allow(non_snake_case)]
        pub fn builtin_options_as_right_shift_options(&self) -> Option<RightShiftOptions<'a>> {
            if self.builtin_options_type() == BuiltinOptions::RightShiftOptions {
                self.builtin_options().map(|t| {
                    // Safety:
                    // Created from a valid Table for this object
                    // Which contains a valid union in this slot
                    unsafe { RightShiftOptions::init_from_table(t) }
                })
            } else {
                None
            }
        }
    }

    impl flatbuffers::Verifiable for Operator<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
     .visit_field::<u32>("opcode_index", Self::VT_OPCODE_INDEX, false)?
     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>("inputs", Self::VT_INPUTS, false)?
     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>("outputs", Self::VT_OUTPUTS, false)?
     .visit_union::<BuiltinOptions, _>("builtin_options_type", Self::VT_BUILTIN_OPTIONS_TYPE, "builtin_options", Self::VT_BUILTIN_OPTIONS, false, |key, v, pos| {
        match key {
          BuiltinOptions::Conv2DOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Conv2DOptions>>("BuiltinOptions::Conv2DOptions", pos),
          BuiltinOptions::DepthwiseConv2DOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DepthwiseConv2DOptions>>("BuiltinOptions::DepthwiseConv2DOptions", pos),
          BuiltinOptions::ConcatEmbeddingsOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ConcatEmbeddingsOptions>>("BuiltinOptions::ConcatEmbeddingsOptions", pos),
          BuiltinOptions::LSHProjectionOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LSHProjectionOptions>>("BuiltinOptions::LSHProjectionOptions", pos),
          BuiltinOptions::Pool2DOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Pool2DOptions>>("BuiltinOptions::Pool2DOptions", pos),
          BuiltinOptions::SVDFOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SVDFOptions>>("BuiltinOptions::SVDFOptions", pos),
          BuiltinOptions::RNNOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RNNOptions>>("BuiltinOptions::RNNOptions", pos),
          BuiltinOptions::FullyConnectedOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FullyConnectedOptions>>("BuiltinOptions::FullyConnectedOptions", pos),
          BuiltinOptions::SoftmaxOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SoftmaxOptions>>("BuiltinOptions::SoftmaxOptions", pos),
          BuiltinOptions::ConcatenationOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ConcatenationOptions>>("BuiltinOptions::ConcatenationOptions", pos),
          BuiltinOptions::AddOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<AddOptions>>("BuiltinOptions::AddOptions", pos),
          BuiltinOptions::L2NormOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<L2NormOptions>>("BuiltinOptions::L2NormOptions", pos),
          BuiltinOptions::LocalResponseNormalizationOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LocalResponseNormalizationOptions>>("BuiltinOptions::LocalResponseNormalizationOptions", pos),
          BuiltinOptions::LSTMOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LSTMOptions>>("BuiltinOptions::LSTMOptions", pos),
          BuiltinOptions::ResizeBilinearOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ResizeBilinearOptions>>("BuiltinOptions::ResizeBilinearOptions", pos),
          BuiltinOptions::CallOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<CallOptions>>("BuiltinOptions::CallOptions", pos),
          BuiltinOptions::ReshapeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ReshapeOptions>>("BuiltinOptions::ReshapeOptions", pos),
          BuiltinOptions::SkipGramOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SkipGramOptions>>("BuiltinOptions::SkipGramOptions", pos),
          BuiltinOptions::SpaceToDepthOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SpaceToDepthOptions>>("BuiltinOptions::SpaceToDepthOptions", pos),
          BuiltinOptions::EmbeddingLookupSparseOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<EmbeddingLookupSparseOptions>>("BuiltinOptions::EmbeddingLookupSparseOptions", pos),
          BuiltinOptions::MulOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<MulOptions>>("BuiltinOptions::MulOptions", pos),
          BuiltinOptions::PadOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<PadOptions>>("BuiltinOptions::PadOptions", pos),
          BuiltinOptions::GatherOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<GatherOptions>>("BuiltinOptions::GatherOptions", pos),
          BuiltinOptions::BatchToSpaceNDOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BatchToSpaceNDOptions>>("BuiltinOptions::BatchToSpaceNDOptions", pos),
          BuiltinOptions::SpaceToBatchNDOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SpaceToBatchNDOptions>>("BuiltinOptions::SpaceToBatchNDOptions", pos),
          BuiltinOptions::TransposeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<TransposeOptions>>("BuiltinOptions::TransposeOptions", pos),
          BuiltinOptions::ReducerOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ReducerOptions>>("BuiltinOptions::ReducerOptions", pos),
          BuiltinOptions::SubOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SubOptions>>("BuiltinOptions::SubOptions", pos),
          BuiltinOptions::DivOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DivOptions>>("BuiltinOptions::DivOptions", pos),
          BuiltinOptions::SqueezeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SqueezeOptions>>("BuiltinOptions::SqueezeOptions", pos),
          BuiltinOptions::SequenceRNNOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SequenceRNNOptions>>("BuiltinOptions::SequenceRNNOptions", pos),
          BuiltinOptions::StridedSliceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<StridedSliceOptions>>("BuiltinOptions::StridedSliceOptions", pos),
          BuiltinOptions::ExpOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ExpOptions>>("BuiltinOptions::ExpOptions", pos),
          BuiltinOptions::TopKV2Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<TopKV2Options>>("BuiltinOptions::TopKV2Options", pos),
          BuiltinOptions::SplitOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SplitOptions>>("BuiltinOptions::SplitOptions", pos),
          BuiltinOptions::LogSoftmaxOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LogSoftmaxOptions>>("BuiltinOptions::LogSoftmaxOptions", pos),
          BuiltinOptions::CastOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<CastOptions>>("BuiltinOptions::CastOptions", pos),
          BuiltinOptions::DequantizeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DequantizeOptions>>("BuiltinOptions::DequantizeOptions", pos),
          BuiltinOptions::MaximumMinimumOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<MaximumMinimumOptions>>("BuiltinOptions::MaximumMinimumOptions", pos),
          BuiltinOptions::ArgMaxOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ArgMaxOptions>>("BuiltinOptions::ArgMaxOptions", pos),
          BuiltinOptions::LessOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LessOptions>>("BuiltinOptions::LessOptions", pos),
          BuiltinOptions::NegOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<NegOptions>>("BuiltinOptions::NegOptions", pos),
          BuiltinOptions::PadV2Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<PadV2Options>>("BuiltinOptions::PadV2Options", pos),
          BuiltinOptions::GreaterOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<GreaterOptions>>("BuiltinOptions::GreaterOptions", pos),
          BuiltinOptions::GreaterEqualOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<GreaterEqualOptions>>("BuiltinOptions::GreaterEqualOptions", pos),
          BuiltinOptions::LessEqualOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LessEqualOptions>>("BuiltinOptions::LessEqualOptions", pos),
          BuiltinOptions::SelectOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SelectOptions>>("BuiltinOptions::SelectOptions", pos),
          BuiltinOptions::SliceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SliceOptions>>("BuiltinOptions::SliceOptions", pos),
          BuiltinOptions::TransposeConvOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<TransposeConvOptions>>("BuiltinOptions::TransposeConvOptions", pos),
          BuiltinOptions::SparseToDenseOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseToDenseOptions>>("BuiltinOptions::SparseToDenseOptions", pos),
          BuiltinOptions::TileOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<TileOptions>>("BuiltinOptions::TileOptions", pos),
          BuiltinOptions::ExpandDimsOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ExpandDimsOptions>>("BuiltinOptions::ExpandDimsOptions", pos),
          BuiltinOptions::EqualOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<EqualOptions>>("BuiltinOptions::EqualOptions", pos),
          BuiltinOptions::NotEqualOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<NotEqualOptions>>("BuiltinOptions::NotEqualOptions", pos),
          BuiltinOptions::ShapeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ShapeOptions>>("BuiltinOptions::ShapeOptions", pos),
          BuiltinOptions::PowOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<PowOptions>>("BuiltinOptions::PowOptions", pos),
          BuiltinOptions::ArgMinOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ArgMinOptions>>("BuiltinOptions::ArgMinOptions", pos),
          BuiltinOptions::FakeQuantOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FakeQuantOptions>>("BuiltinOptions::FakeQuantOptions", pos),
          BuiltinOptions::PackOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<PackOptions>>("BuiltinOptions::PackOptions", pos),
          BuiltinOptions::LogicalOrOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LogicalOrOptions>>("BuiltinOptions::LogicalOrOptions", pos),
          BuiltinOptions::OneHotOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<OneHotOptions>>("BuiltinOptions::OneHotOptions", pos),
          BuiltinOptions::LogicalAndOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LogicalAndOptions>>("BuiltinOptions::LogicalAndOptions", pos),
          BuiltinOptions::LogicalNotOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LogicalNotOptions>>("BuiltinOptions::LogicalNotOptions", pos),
          BuiltinOptions::UnpackOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UnpackOptions>>("BuiltinOptions::UnpackOptions", pos),
          BuiltinOptions::FloorDivOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloorDivOptions>>("BuiltinOptions::FloorDivOptions", pos),
          BuiltinOptions::SquareOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SquareOptions>>("BuiltinOptions::SquareOptions", pos),
          BuiltinOptions::ZerosLikeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ZerosLikeOptions>>("BuiltinOptions::ZerosLikeOptions", pos),
          BuiltinOptions::FillOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FillOptions>>("BuiltinOptions::FillOptions", pos),
          BuiltinOptions::BidirectionalSequenceLSTMOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BidirectionalSequenceLSTMOptions>>("BuiltinOptions::BidirectionalSequenceLSTMOptions", pos),
          BuiltinOptions::BidirectionalSequenceRNNOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BidirectionalSequenceRNNOptions>>("BuiltinOptions::BidirectionalSequenceRNNOptions", pos),
          BuiltinOptions::UnidirectionalSequenceLSTMOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UnidirectionalSequenceLSTMOptions>>("BuiltinOptions::UnidirectionalSequenceLSTMOptions", pos),
          BuiltinOptions::FloorModOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloorModOptions>>("BuiltinOptions::FloorModOptions", pos),
          BuiltinOptions::RangeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RangeOptions>>("BuiltinOptions::RangeOptions", pos),
          BuiltinOptions::ResizeNearestNeighborOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ResizeNearestNeighborOptions>>("BuiltinOptions::ResizeNearestNeighborOptions", pos),
          BuiltinOptions::LeakyReluOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LeakyReluOptions>>("BuiltinOptions::LeakyReluOptions", pos),
          BuiltinOptions::SquaredDifferenceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SquaredDifferenceOptions>>("BuiltinOptions::SquaredDifferenceOptions", pos),
          BuiltinOptions::MirrorPadOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<MirrorPadOptions>>("BuiltinOptions::MirrorPadOptions", pos),
          BuiltinOptions::AbsOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<AbsOptions>>("BuiltinOptions::AbsOptions", pos),
          BuiltinOptions::SplitVOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SplitVOptions>>("BuiltinOptions::SplitVOptions", pos),
          BuiltinOptions::UniqueOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UniqueOptions>>("BuiltinOptions::UniqueOptions", pos),
          BuiltinOptions::ReverseV2Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ReverseV2Options>>("BuiltinOptions::ReverseV2Options", pos),
          BuiltinOptions::AddNOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<AddNOptions>>("BuiltinOptions::AddNOptions", pos),
          BuiltinOptions::GatherNdOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<GatherNdOptions>>("BuiltinOptions::GatherNdOptions", pos),
          BuiltinOptions::CosOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<CosOptions>>("BuiltinOptions::CosOptions", pos),
          BuiltinOptions::WhereOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<WhereOptions>>("BuiltinOptions::WhereOptions", pos),
          BuiltinOptions::RankOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RankOptions>>("BuiltinOptions::RankOptions", pos),
          BuiltinOptions::ReverseSequenceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ReverseSequenceOptions>>("BuiltinOptions::ReverseSequenceOptions", pos),
          BuiltinOptions::MatrixDiagOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<MatrixDiagOptions>>("BuiltinOptions::MatrixDiagOptions", pos),
          BuiltinOptions::QuantizeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<QuantizeOptions>>("BuiltinOptions::QuantizeOptions", pos),
          BuiltinOptions::MatrixSetDiagOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<MatrixSetDiagOptions>>("BuiltinOptions::MatrixSetDiagOptions", pos),
          BuiltinOptions::HardSwishOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<HardSwishOptions>>("BuiltinOptions::HardSwishOptions", pos),
          BuiltinOptions::IfOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<IfOptions>>("BuiltinOptions::IfOptions", pos),
          BuiltinOptions::WhileOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<WhileOptions>>("BuiltinOptions::WhileOptions", pos),
          BuiltinOptions::DepthToSpaceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DepthToSpaceOptions>>("BuiltinOptions::DepthToSpaceOptions", pos),
          BuiltinOptions::NonMaxSuppressionV4Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<NonMaxSuppressionV4Options>>("BuiltinOptions::NonMaxSuppressionV4Options", pos),
          BuiltinOptions::NonMaxSuppressionV5Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<NonMaxSuppressionV5Options>>("BuiltinOptions::NonMaxSuppressionV5Options", pos),
          BuiltinOptions::ScatterNdOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ScatterNdOptions>>("BuiltinOptions::ScatterNdOptions", pos),
          BuiltinOptions::SelectV2Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SelectV2Options>>("BuiltinOptions::SelectV2Options", pos),
          BuiltinOptions::DensifyOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DensifyOptions>>("BuiltinOptions::DensifyOptions", pos),
          BuiltinOptions::SegmentSumOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SegmentSumOptions>>("BuiltinOptions::SegmentSumOptions", pos),
          BuiltinOptions::BatchMatMulOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BatchMatMulOptions>>("BuiltinOptions::BatchMatMulOptions", pos),
          BuiltinOptions::CumsumOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<CumsumOptions>>("BuiltinOptions::CumsumOptions", pos),
          BuiltinOptions::CallOnceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<CallOnceOptions>>("BuiltinOptions::CallOnceOptions", pos),
          BuiltinOptions::BroadcastToOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BroadcastToOptions>>("BuiltinOptions::BroadcastToOptions", pos),
          BuiltinOptions::Rfft2dOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Rfft2dOptions>>("BuiltinOptions::Rfft2dOptions", pos),
          BuiltinOptions::Conv3DOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Conv3DOptions>>("BuiltinOptions::Conv3DOptions", pos),
          BuiltinOptions::HashtableOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<HashtableOptions>>("BuiltinOptions::HashtableOptions", pos),
          BuiltinOptions::HashtableFindOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<HashtableFindOptions>>("BuiltinOptions::HashtableFindOptions", pos),
          BuiltinOptions::HashtableImportOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<HashtableImportOptions>>("BuiltinOptions::HashtableImportOptions", pos),
          BuiltinOptions::HashtableSizeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<HashtableSizeOptions>>("BuiltinOptions::HashtableSizeOptions", pos),
          BuiltinOptions::VarHandleOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<VarHandleOptions>>("BuiltinOptions::VarHandleOptions", pos),
          BuiltinOptions::ReadVariableOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ReadVariableOptions>>("BuiltinOptions::ReadVariableOptions", pos),
          BuiltinOptions::AssignVariableOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<AssignVariableOptions>>("BuiltinOptions::AssignVariableOptions", pos),
          BuiltinOptions::RandomOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RandomOptions>>("BuiltinOptions::RandomOptions", pos),
          BuiltinOptions::BucketizeOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BucketizeOptions>>("BuiltinOptions::BucketizeOptions", pos),
          BuiltinOptions::GeluOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<GeluOptions>>("BuiltinOptions::GeluOptions", pos),
          BuiltinOptions::DynamicUpdateSliceOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DynamicUpdateSliceOptions>>("BuiltinOptions::DynamicUpdateSliceOptions", pos),
          BuiltinOptions::UnsortedSegmentProdOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UnsortedSegmentProdOptions>>("BuiltinOptions::UnsortedSegmentProdOptions", pos),
          BuiltinOptions::UnsortedSegmentMaxOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UnsortedSegmentMaxOptions>>("BuiltinOptions::UnsortedSegmentMaxOptions", pos),
          BuiltinOptions::UnsortedSegmentMinOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UnsortedSegmentMinOptions>>("BuiltinOptions::UnsortedSegmentMinOptions", pos),
          BuiltinOptions::UnsortedSegmentSumOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<UnsortedSegmentSumOptions>>("BuiltinOptions::UnsortedSegmentSumOptions", pos),
          BuiltinOptions::ATan2Options => v.verify_union_variant::<flatbuffers::ForwardsUOffset<ATan2Options>>("BuiltinOptions::ATan2Options", pos),
          BuiltinOptions::SignOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SignOptions>>("BuiltinOptions::SignOptions", pos),
          BuiltinOptions::BitcastOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BitcastOptions>>("BuiltinOptions::BitcastOptions", pos),
          BuiltinOptions::BitwiseXorOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<BitwiseXorOptions>>("BuiltinOptions::BitwiseXorOptions", pos),
          BuiltinOptions::RightShiftOptions => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RightShiftOptions>>("BuiltinOptions::RightShiftOptions", pos),
          _ => Ok(()),
        }
     })?
     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>("custom_options", Self::VT_CUSTOM_OPTIONS, false)?
     .visit_field::<CustomOptionsFormat>("custom_options_format", Self::VT_CUSTOM_OPTIONS_FORMAT, false)?
     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, bool>>>("mutating_variable_inputs", Self::VT_MUTATING_VARIABLE_INPUTS, false)?
     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>("intermediates", Self::VT_INTERMEDIATES, false)?
     .finish();
            Ok(())
        }
    }
    pub struct OperatorArgs<'a> {
        pub opcode_index: u32,
        pub inputs: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub outputs: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub builtin_options_type: BuiltinOptions,
        pub builtin_options: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
        pub custom_options: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
        pub custom_options_format: CustomOptionsFormat,
        pub mutating_variable_inputs: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, bool>>>,
        pub intermediates: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
    }
    impl<'a> Default for OperatorArgs<'a> {
        #[inline]
        fn default() -> Self {
            OperatorArgs {
                opcode_index: 0,
                inputs: None,
                outputs: None,
                builtin_options_type: BuiltinOptions::NONE,
                builtin_options: None,
                custom_options: None,
                custom_options_format: CustomOptionsFormat::FLEXBUFFERS,
                mutating_variable_inputs: None,
                intermediates: None,
            }
        }
    }

    pub struct OperatorBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> OperatorBuilder<'a, 'b> {
        #[inline]
        pub fn add_opcode_index(&mut self, opcode_index: u32) {
            self.fbb_.push_slot::<u32>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
        }
        #[inline]
        pub fn add_inputs(&mut self, inputs: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Operator::VT_INPUTS, inputs);
        }
        #[inline]
        pub fn add_outputs(
            &mut self,
            outputs: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Operator::VT_OUTPUTS, outputs);
        }
        #[inline]
        pub fn add_builtin_options_type(&mut self, builtin_options_type: BuiltinOptions) {
            self.fbb_.push_slot::<BuiltinOptions>(
                Operator::VT_BUILTIN_OPTIONS_TYPE,
                builtin_options_type,
                BuiltinOptions::NONE,
            );
        }
        #[inline]
        pub fn add_builtin_options(
            &mut self,
            builtin_options: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Operator::VT_BUILTIN_OPTIONS,
                builtin_options,
            );
        }
        #[inline]
        pub fn add_custom_options(
            &mut self,
            custom_options: flatbuffers::WIPOffset<flatbuffers::Vector<'b, u8>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Operator::VT_CUSTOM_OPTIONS,
                custom_options,
            );
        }
        #[inline]
        pub fn add_custom_options_format(&mut self, custom_options_format: CustomOptionsFormat) {
            self.fbb_.push_slot::<CustomOptionsFormat>(
                Operator::VT_CUSTOM_OPTIONS_FORMAT,
                custom_options_format,
                CustomOptionsFormat::FLEXBUFFERS,
            );
        }
        #[inline]
        pub fn add_mutating_variable_inputs(
            &mut self,
            mutating_variable_inputs: flatbuffers::WIPOffset<flatbuffers::Vector<'b, bool>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Operator::VT_MUTATING_VARIABLE_INPUTS,
                mutating_variable_inputs,
            );
        }
        #[inline]
        pub fn add_intermediates(
            &mut self,
            intermediates: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Operator::VT_INTERMEDIATES,
                intermediates,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> OperatorBuilder<'a, 'b> {
            let start = _fbb.start_table();
            OperatorBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Operator<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Operator<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Operator");
            ds.field("opcode_index", &self.opcode_index());
            ds.field("inputs", &self.inputs());
            ds.field("outputs", &self.outputs());
            ds.field("builtin_options_type", &self.builtin_options_type());
            match self.builtin_options_type() {
                BuiltinOptions::Conv2DOptions => {
                    if let Some(x) = self.builtin_options_as_conv_2_doptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::DepthwiseConv2DOptions => {
                    if let Some(x) = self.builtin_options_as_depthwise_conv_2_doptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ConcatEmbeddingsOptions => {
                    if let Some(x) = self.builtin_options_as_concat_embeddings_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LSHProjectionOptions => {
                    if let Some(x) = self.builtin_options_as_lshprojection_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::Pool2DOptions => {
                    if let Some(x) = self.builtin_options_as_pool_2_doptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SVDFOptions => {
                    if let Some(x) = self.builtin_options_as_svdfoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::RNNOptions => {
                    if let Some(x) = self.builtin_options_as_rnnoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::FullyConnectedOptions => {
                    if let Some(x) = self.builtin_options_as_fully_connected_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SoftmaxOptions => {
                    if let Some(x) = self.builtin_options_as_softmax_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ConcatenationOptions => {
                    if let Some(x) = self.builtin_options_as_concatenation_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::AddOptions => {
                    if let Some(x) = self.builtin_options_as_add_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::L2NormOptions => {
                    if let Some(x) = self.builtin_options_as_l2_norm_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LocalResponseNormalizationOptions => {
                    if let Some(x) = self.builtin_options_as_local_response_normalization_options()
                    {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LSTMOptions => {
                    if let Some(x) = self.builtin_options_as_lstmoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ResizeBilinearOptions => {
                    if let Some(x) = self.builtin_options_as_resize_bilinear_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::CallOptions => {
                    if let Some(x) = self.builtin_options_as_call_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ReshapeOptions => {
                    if let Some(x) = self.builtin_options_as_reshape_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SkipGramOptions => {
                    if let Some(x) = self.builtin_options_as_skip_gram_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SpaceToDepthOptions => {
                    if let Some(x) = self.builtin_options_as_space_to_depth_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::EmbeddingLookupSparseOptions => {
                    if let Some(x) = self.builtin_options_as_embedding_lookup_sparse_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::MulOptions => {
                    if let Some(x) = self.builtin_options_as_mul_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::PadOptions => {
                    if let Some(x) = self.builtin_options_as_pad_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::GatherOptions => {
                    if let Some(x) = self.builtin_options_as_gather_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BatchToSpaceNDOptions => {
                    if let Some(x) = self.builtin_options_as_batch_to_space_ndoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SpaceToBatchNDOptions => {
                    if let Some(x) = self.builtin_options_as_space_to_batch_ndoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::TransposeOptions => {
                    if let Some(x) = self.builtin_options_as_transpose_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ReducerOptions => {
                    if let Some(x) = self.builtin_options_as_reducer_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SubOptions => {
                    if let Some(x) = self.builtin_options_as_sub_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::DivOptions => {
                    if let Some(x) = self.builtin_options_as_div_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SqueezeOptions => {
                    if let Some(x) = self.builtin_options_as_squeeze_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SequenceRNNOptions => {
                    if let Some(x) = self.builtin_options_as_sequence_rnnoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::StridedSliceOptions => {
                    if let Some(x) = self.builtin_options_as_strided_slice_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ExpOptions => {
                    if let Some(x) = self.builtin_options_as_exp_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::TopKV2Options => {
                    if let Some(x) = self.builtin_options_as_top_kv2_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SplitOptions => {
                    if let Some(x) = self.builtin_options_as_split_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LogSoftmaxOptions => {
                    if let Some(x) = self.builtin_options_as_log_softmax_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::CastOptions => {
                    if let Some(x) = self.builtin_options_as_cast_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::DequantizeOptions => {
                    if let Some(x) = self.builtin_options_as_dequantize_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::MaximumMinimumOptions => {
                    if let Some(x) = self.builtin_options_as_maximum_minimum_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ArgMaxOptions => {
                    if let Some(x) = self.builtin_options_as_arg_max_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LessOptions => {
                    if let Some(x) = self.builtin_options_as_less_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::NegOptions => {
                    if let Some(x) = self.builtin_options_as_neg_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::PadV2Options => {
                    if let Some(x) = self.builtin_options_as_pad_v2_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::GreaterOptions => {
                    if let Some(x) = self.builtin_options_as_greater_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::GreaterEqualOptions => {
                    if let Some(x) = self.builtin_options_as_greater_equal_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LessEqualOptions => {
                    if let Some(x) = self.builtin_options_as_less_equal_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SelectOptions => {
                    if let Some(x) = self.builtin_options_as_select_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SliceOptions => {
                    if let Some(x) = self.builtin_options_as_slice_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::TransposeConvOptions => {
                    if let Some(x) = self.builtin_options_as_transpose_conv_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SparseToDenseOptions => {
                    if let Some(x) = self.builtin_options_as_sparse_to_dense_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::TileOptions => {
                    if let Some(x) = self.builtin_options_as_tile_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ExpandDimsOptions => {
                    if let Some(x) = self.builtin_options_as_expand_dims_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::EqualOptions => {
                    if let Some(x) = self.builtin_options_as_equal_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::NotEqualOptions => {
                    if let Some(x) = self.builtin_options_as_not_equal_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ShapeOptions => {
                    if let Some(x) = self.builtin_options_as_shape_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::PowOptions => {
                    if let Some(x) = self.builtin_options_as_pow_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ArgMinOptions => {
                    if let Some(x) = self.builtin_options_as_arg_min_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::FakeQuantOptions => {
                    if let Some(x) = self.builtin_options_as_fake_quant_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::PackOptions => {
                    if let Some(x) = self.builtin_options_as_pack_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LogicalOrOptions => {
                    if let Some(x) = self.builtin_options_as_logical_or_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::OneHotOptions => {
                    if let Some(x) = self.builtin_options_as_one_hot_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LogicalAndOptions => {
                    if let Some(x) = self.builtin_options_as_logical_and_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LogicalNotOptions => {
                    if let Some(x) = self.builtin_options_as_logical_not_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UnpackOptions => {
                    if let Some(x) = self.builtin_options_as_unpack_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::FloorDivOptions => {
                    if let Some(x) = self.builtin_options_as_floor_div_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SquareOptions => {
                    if let Some(x) = self.builtin_options_as_square_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ZerosLikeOptions => {
                    if let Some(x) = self.builtin_options_as_zeros_like_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::FillOptions => {
                    if let Some(x) = self.builtin_options_as_fill_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BidirectionalSequenceLSTMOptions => {
                    if let Some(x) = self.builtin_options_as_bidirectional_sequence_lstmoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BidirectionalSequenceRNNOptions => {
                    if let Some(x) = self.builtin_options_as_bidirectional_sequence_rnnoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UnidirectionalSequenceLSTMOptions => {
                    if let Some(x) = self.builtin_options_as_unidirectional_sequence_lstmoptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::FloorModOptions => {
                    if let Some(x) = self.builtin_options_as_floor_mod_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::RangeOptions => {
                    if let Some(x) = self.builtin_options_as_range_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ResizeNearestNeighborOptions => {
                    if let Some(x) = self.builtin_options_as_resize_nearest_neighbor_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::LeakyReluOptions => {
                    if let Some(x) = self.builtin_options_as_leaky_relu_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SquaredDifferenceOptions => {
                    if let Some(x) = self.builtin_options_as_squared_difference_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::MirrorPadOptions => {
                    if let Some(x) = self.builtin_options_as_mirror_pad_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::AbsOptions => {
                    if let Some(x) = self.builtin_options_as_abs_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SplitVOptions => {
                    if let Some(x) = self.builtin_options_as_split_voptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UniqueOptions => {
                    if let Some(x) = self.builtin_options_as_unique_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ReverseV2Options => {
                    if let Some(x) = self.builtin_options_as_reverse_v2_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::AddNOptions => {
                    if let Some(x) = self.builtin_options_as_add_noptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::GatherNdOptions => {
                    if let Some(x) = self.builtin_options_as_gather_nd_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::CosOptions => {
                    if let Some(x) = self.builtin_options_as_cos_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::WhereOptions => {
                    if let Some(x) = self.builtin_options_as_where_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::RankOptions => {
                    if let Some(x) = self.builtin_options_as_rank_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ReverseSequenceOptions => {
                    if let Some(x) = self.builtin_options_as_reverse_sequence_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::MatrixDiagOptions => {
                    if let Some(x) = self.builtin_options_as_matrix_diag_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::QuantizeOptions => {
                    if let Some(x) = self.builtin_options_as_quantize_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::MatrixSetDiagOptions => {
                    if let Some(x) = self.builtin_options_as_matrix_set_diag_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::HardSwishOptions => {
                    if let Some(x) = self.builtin_options_as_hard_swish_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::IfOptions => {
                    if let Some(x) = self.builtin_options_as_if_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::WhileOptions => {
                    if let Some(x) = self.builtin_options_as_while_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::DepthToSpaceOptions => {
                    if let Some(x) = self.builtin_options_as_depth_to_space_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::NonMaxSuppressionV4Options => {
                    if let Some(x) = self.builtin_options_as_non_max_suppression_v4_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::NonMaxSuppressionV5Options => {
                    if let Some(x) = self.builtin_options_as_non_max_suppression_v5_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ScatterNdOptions => {
                    if let Some(x) = self.builtin_options_as_scatter_nd_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SelectV2Options => {
                    if let Some(x) = self.builtin_options_as_select_v2_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::DensifyOptions => {
                    if let Some(x) = self.builtin_options_as_densify_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SegmentSumOptions => {
                    if let Some(x) = self.builtin_options_as_segment_sum_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BatchMatMulOptions => {
                    if let Some(x) = self.builtin_options_as_batch_mat_mul_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::CumsumOptions => {
                    if let Some(x) = self.builtin_options_as_cumsum_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::CallOnceOptions => {
                    if let Some(x) = self.builtin_options_as_call_once_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BroadcastToOptions => {
                    if let Some(x) = self.builtin_options_as_broadcast_to_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::Rfft2dOptions => {
                    if let Some(x) = self.builtin_options_as_rfft_2d_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::Conv3DOptions => {
                    if let Some(x) = self.builtin_options_as_conv_3_doptions() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::HashtableOptions => {
                    if let Some(x) = self.builtin_options_as_hashtable_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::HashtableFindOptions => {
                    if let Some(x) = self.builtin_options_as_hashtable_find_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::HashtableImportOptions => {
                    if let Some(x) = self.builtin_options_as_hashtable_import_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::HashtableSizeOptions => {
                    if let Some(x) = self.builtin_options_as_hashtable_size_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::VarHandleOptions => {
                    if let Some(x) = self.builtin_options_as_var_handle_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ReadVariableOptions => {
                    if let Some(x) = self.builtin_options_as_read_variable_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::AssignVariableOptions => {
                    if let Some(x) = self.builtin_options_as_assign_variable_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::RandomOptions => {
                    if let Some(x) = self.builtin_options_as_random_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BucketizeOptions => {
                    if let Some(x) = self.builtin_options_as_bucketize_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::GeluOptions => {
                    if let Some(x) = self.builtin_options_as_gelu_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::DynamicUpdateSliceOptions => {
                    if let Some(x) = self.builtin_options_as_dynamic_update_slice_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UnsortedSegmentProdOptions => {
                    if let Some(x) = self.builtin_options_as_unsorted_segment_prod_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UnsortedSegmentMaxOptions => {
                    if let Some(x) = self.builtin_options_as_unsorted_segment_max_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UnsortedSegmentMinOptions => {
                    if let Some(x) = self.builtin_options_as_unsorted_segment_min_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::UnsortedSegmentSumOptions => {
                    if let Some(x) = self.builtin_options_as_unsorted_segment_sum_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::ATan2Options => {
                    if let Some(x) = self.builtin_options_as_atan_2_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::SignOptions => {
                    if let Some(x) = self.builtin_options_as_sign_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BitcastOptions => {
                    if let Some(x) = self.builtin_options_as_bitcast_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::BitwiseXorOptions => {
                    if let Some(x) = self.builtin_options_as_bitwise_xor_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                BuiltinOptions::RightShiftOptions => {
                    if let Some(x) = self.builtin_options_as_right_shift_options() {
                        ds.field("builtin_options", &x)
                    } else {
                        ds.field(
                            "builtin_options",
                            &"InvalidFlatbuffer: Union discriminant does not match value.",
                        )
                    }
                }
                _ => {
                    let x: Option<()> = None;
                    ds.field("builtin_options", &x)
                }
            };
            ds.field("custom_options", &self.custom_options());
            ds.field("custom_options_format", &self.custom_options_format());
            ds.field("mutating_variable_inputs", &self.mutating_variable_inputs());
            ds.field("intermediates", &self.intermediates());
            ds.finish()
        }
    }
    pub enum SubGraphOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SubGraph<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SubGraph<'a> {
        type Inner = SubGraph<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SubGraph<'a> {
        pub const VT_TENSORS: flatbuffers::VOffsetT = 4;
        pub const VT_INPUTS: flatbuffers::VOffsetT = 6;
        pub const VT_OUTPUTS: flatbuffers::VOffsetT = 8;
        pub const VT_OPERATORS: flatbuffers::VOffsetT = 10;
        pub const VT_NAME: flatbuffers::VOffsetT = 12;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SubGraph { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SubGraphArgs<'args>,
        ) -> flatbuffers::WIPOffset<SubGraph<'bldr>> {
            let mut builder = SubGraphBuilder::new(_fbb);
            if let Some(x) = args.name {
                builder.add_name(x);
            }
            if let Some(x) = args.operators {
                builder.add_operators(x);
            }
            if let Some(x) = args.outputs {
                builder.add_outputs(x);
            }
            if let Some(x) = args.inputs {
                builder.add_inputs(x);
            }
            if let Some(x) = args.tensors {
                builder.add_tensors(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn tensors(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Tensor<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Tensor>>,
                >>(SubGraph::VT_TENSORS, None)
            }
        }
        #[inline]
        pub fn inputs(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    SubGraph::VT_INPUTS,
                    None,
                )
            }
        }
        #[inline]
        pub fn outputs(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    SubGraph::VT_OUTPUTS,
                    None,
                )
            }
        }
        #[inline]
        pub fn operators(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Operator<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Operator>>,
                >>(SubGraph::VT_OPERATORS, None)
            }
        }
        #[inline]
        pub fn name(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<&str>>(SubGraph::VT_NAME, None) }
        }
    }

    impl flatbuffers::Verifiable for SubGraph<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Tensor>>,
                >>("tensors", Self::VT_TENSORS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "inputs",
                    Self::VT_INPUTS,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "outputs",
                    Self::VT_OUTPUTS,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Operator>>,
                >>("operators", Self::VT_OPERATORS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SubGraphArgs<'a> {
        pub tensors: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Tensor<'a>>>,
            >,
        >,
        pub inputs: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub outputs: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub operators: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Operator<'a>>>,
            >,
        >,
        pub name: Option<flatbuffers::WIPOffset<&'a str>>,
    }
    impl<'a> Default for SubGraphArgs<'a> {
        #[inline]
        fn default() -> Self {
            SubGraphArgs { tensors: None, inputs: None, outputs: None, operators: None, name: None }
        }
    }

    pub struct SubGraphBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SubGraphBuilder<'a, 'b> {
        #[inline]
        pub fn add_tensors(
            &mut self,
            tensors: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Tensor<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(SubGraph::VT_TENSORS, tensors);
        }
        #[inline]
        pub fn add_inputs(&mut self, inputs: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(SubGraph::VT_INPUTS, inputs);
        }
        #[inline]
        pub fn add_outputs(
            &mut self,
            outputs: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(SubGraph::VT_OUTPUTS, outputs);
        }
        #[inline]
        pub fn add_operators(
            &mut self,
            operators: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Operator<'b>>>,
            >,
        ) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(SubGraph::VT_OPERATORS, operators);
        }
        #[inline]
        pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(SubGraph::VT_NAME, name);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SubGraphBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SubGraphBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SubGraph<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SubGraph<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SubGraph");
            ds.field("tensors", &self.tensors());
            ds.field("inputs", &self.inputs());
            ds.field("outputs", &self.outputs());
            ds.field("operators", &self.operators());
            ds.field("name", &self.name());
            ds.finish()
        }
    }
    pub enum BufferOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Buffer<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Buffer<'a> {
        type Inner = Buffer<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Buffer<'a> {
        pub const VT_DATA: flatbuffers::VOffsetT = 4;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Buffer { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args BufferArgs<'args>,
        ) -> flatbuffers::WIPOffset<Buffer<'bldr>> {
            let mut builder = BufferBuilder::new(_fbb);
            if let Some(x) = args.data {
                builder.add_data(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn data(&self) -> Option<flatbuffers::Vector<'a, u8>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(
                    Buffer::VT_DATA,
                    None,
                )
            }
        }
    }

    impl flatbuffers::Verifiable for Buffer<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>(
                    "data",
                    Self::VT_DATA,
                    false,
                )?
                .finish();
            Ok(())
        }
    }
    pub struct BufferArgs<'a> {
        pub data: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
    }
    impl<'a> Default for BufferArgs<'a> {
        #[inline]
        fn default() -> Self {
            BufferArgs { data: None }
        }
    }

    pub struct BufferBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> BufferBuilder<'a, 'b> {
        #[inline]
        pub fn add_data(&mut self, data: flatbuffers::WIPOffset<flatbuffers::Vector<'b, u8>>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Buffer::VT_DATA, data);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BufferBuilder<'a, 'b> {
            let start = _fbb.start_table();
            BufferBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Buffer<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Buffer<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Buffer");
            ds.field("data", &self.data());
            ds.finish()
        }
    }
    pub enum MetadataOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Metadata<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Metadata<'a> {
        type Inner = Metadata<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Metadata<'a> {
        pub const VT_NAME: flatbuffers::VOffsetT = 4;
        pub const VT_BUFFER: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Metadata { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args MetadataArgs<'args>,
        ) -> flatbuffers::WIPOffset<Metadata<'bldr>> {
            let mut builder = MetadataBuilder::new(_fbb);
            builder.add_buffer(args.buffer);
            if let Some(x) = args.name {
                builder.add_name(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn name(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<&str>>(Metadata::VT_NAME, None) }
        }
        #[inline]
        pub fn buffer(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(Metadata::VT_BUFFER, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for Metadata<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
                .visit_field::<u32>("buffer", Self::VT_BUFFER, false)?
                .finish();
            Ok(())
        }
    }
    pub struct MetadataArgs<'a> {
        pub name: Option<flatbuffers::WIPOffset<&'a str>>,
        pub buffer: u32,
    }
    impl<'a> Default for MetadataArgs<'a> {
        #[inline]
        fn default() -> Self {
            MetadataArgs { name: None, buffer: 0 }
        }
    }

    pub struct MetadataBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> MetadataBuilder<'a, 'b> {
        #[inline]
        pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Metadata::VT_NAME, name);
        }
        #[inline]
        pub fn add_buffer(&mut self, buffer: u32) {
            self.fbb_.push_slot::<u32>(Metadata::VT_BUFFER, buffer, 0);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MetadataBuilder<'a, 'b> {
            let start = _fbb.start_table();
            MetadataBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Metadata<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Metadata<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Metadata");
            ds.field("name", &self.name());
            ds.field("buffer", &self.buffer());
            ds.finish()
        }
    }
    pub enum TensorMapOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct TensorMap<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for TensorMap<'a> {
        type Inner = TensorMap<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> TensorMap<'a> {
        pub const VT_NAME: flatbuffers::VOffsetT = 4;
        pub const VT_TENSOR_INDEX: flatbuffers::VOffsetT = 6;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            TensorMap { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args TensorMapArgs<'args>,
        ) -> flatbuffers::WIPOffset<TensorMap<'bldr>> {
            let mut builder = TensorMapBuilder::new(_fbb);
            builder.add_tensor_index(args.tensor_index);
            if let Some(x) = args.name {
                builder.add_name(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn name(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<&str>>(TensorMap::VT_NAME, None) }
        }
        #[inline]
        pub fn tensor_index(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(TensorMap::VT_TENSOR_INDEX, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for TensorMap<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
                .visit_field::<u32>("tensor_index", Self::VT_TENSOR_INDEX, false)?
                .finish();
            Ok(())
        }
    }
    pub struct TensorMapArgs<'a> {
        pub name: Option<flatbuffers::WIPOffset<&'a str>>,
        pub tensor_index: u32,
    }
    impl<'a> Default for TensorMapArgs<'a> {
        #[inline]
        fn default() -> Self {
            TensorMapArgs { name: None, tensor_index: 0 }
        }
    }

    pub struct TensorMapBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> TensorMapBuilder<'a, 'b> {
        #[inline]
        pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(TensorMap::VT_NAME, name);
        }
        #[inline]
        pub fn add_tensor_index(&mut self, tensor_index: u32) {
            self.fbb_.push_slot::<u32>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorMapBuilder<'a, 'b> {
            let start = _fbb.start_table();
            TensorMapBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<TensorMap<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for TensorMap<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("TensorMap");
            ds.field("name", &self.name());
            ds.field("tensor_index", &self.tensor_index());
            ds.finish()
        }
    }
    pub enum SignatureDefOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct SignatureDef<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for SignatureDef<'a> {
        type Inner = SignatureDef<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> SignatureDef<'a> {
        pub const VT_INPUTS: flatbuffers::VOffsetT = 4;
        pub const VT_OUTPUTS: flatbuffers::VOffsetT = 6;
        pub const VT_SIGNATURE_KEY: flatbuffers::VOffsetT = 8;
        pub const VT_SUBGRAPH_INDEX: flatbuffers::VOffsetT = 12;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            SignatureDef { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args SignatureDefArgs<'args>,
        ) -> flatbuffers::WIPOffset<SignatureDef<'bldr>> {
            let mut builder = SignatureDefBuilder::new(_fbb);
            builder.add_subgraph_index(args.subgraph_index);
            if let Some(x) = args.signature_key {
                builder.add_signature_key(x);
            }
            if let Some(x) = args.outputs {
                builder.add_outputs(x);
            }
            if let Some(x) = args.inputs {
                builder.add_inputs(x);
            }
            builder.finish()
        }

        #[inline]
        pub fn inputs(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorMap<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorMap>>,
                >>(SignatureDef::VT_INPUTS, None)
            }
        }
        #[inline]
        pub fn outputs(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorMap<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorMap>>,
                >>(SignatureDef::VT_OUTPUTS, None)
            }
        }
        #[inline]
        pub fn signature_key(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab
                    .get::<flatbuffers::ForwardsUOffset<&str>>(SignatureDef::VT_SIGNATURE_KEY, None)
            }
        }
        #[inline]
        pub fn subgraph_index(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(SignatureDef::VT_SUBGRAPH_INDEX, Some(0)).unwrap() }
        }
    }

    impl flatbuffers::Verifiable for SignatureDef<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorMap>>,
                >>("inputs", Self::VT_INPUTS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorMap>>,
                >>("outputs", Self::VT_OUTPUTS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
                    "signature_key",
                    Self::VT_SIGNATURE_KEY,
                    false,
                )?
                .visit_field::<u32>("subgraph_index", Self::VT_SUBGRAPH_INDEX, false)?
                .finish();
            Ok(())
        }
    }
    pub struct SignatureDefArgs<'a> {
        pub inputs: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorMap<'a>>>,
            >,
        >,
        pub outputs: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorMap<'a>>>,
            >,
        >,
        pub signature_key: Option<flatbuffers::WIPOffset<&'a str>>,
        pub subgraph_index: u32,
    }
    impl<'a> Default for SignatureDefArgs<'a> {
        #[inline]
        fn default() -> Self {
            SignatureDefArgs { inputs: None, outputs: None, signature_key: None, subgraph_index: 0 }
        }
    }

    pub struct SignatureDefBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> SignatureDefBuilder<'a, 'b> {
        #[inline]
        pub fn add_inputs(
            &mut self,
            inputs: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<TensorMap<'b>>>,
            >,
        ) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(SignatureDef::VT_INPUTS, inputs);
        }
        #[inline]
        pub fn add_outputs(
            &mut self,
            outputs: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<TensorMap<'b>>>,
            >,
        ) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(SignatureDef::VT_OUTPUTS, outputs);
        }
        #[inline]
        pub fn add_signature_key(&mut self, signature_key: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                SignatureDef::VT_SIGNATURE_KEY,
                signature_key,
            );
        }
        #[inline]
        pub fn add_subgraph_index(&mut self, subgraph_index: u32) {
            self.fbb_.push_slot::<u32>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
        }
        #[inline]
        pub fn new(
            _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        ) -> SignatureDefBuilder<'a, 'b> {
            let start = _fbb.start_table();
            SignatureDefBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<SignatureDef<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for SignatureDef<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("SignatureDef");
            ds.field("inputs", &self.inputs());
            ds.field("outputs", &self.outputs());
            ds.field("signature_key", &self.signature_key());
            ds.field("subgraph_index", &self.subgraph_index());
            ds.finish()
        }
    }
    pub enum ModelOffset {}
    #[derive(Copy, Clone, PartialEq)]

    pub struct Model<'a> {
        pub _tab: flatbuffers::Table<'a>,
    }

    impl<'a> flatbuffers::Follow<'a> for Model<'a> {
        type Inner = Model<'a>;
        #[inline]
        unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
            Self { _tab: flatbuffers::Table::new(buf, loc) }
        }
    }

    impl<'a> Model<'a> {
        pub const VT_VERSION: flatbuffers::VOffsetT = 4;
        pub const VT_OPERATOR_CODES: flatbuffers::VOffsetT = 6;
        pub const VT_SUBGRAPHS: flatbuffers::VOffsetT = 8;
        pub const VT_DESCRIPTION: flatbuffers::VOffsetT = 10;
        pub const VT_BUFFERS: flatbuffers::VOffsetT = 12;
        pub const VT_METADATA_BUFFER: flatbuffers::VOffsetT = 14;
        pub const VT_METADATA: flatbuffers::VOffsetT = 16;
        pub const VT_SIGNATURE_DEFS: flatbuffers::VOffsetT = 18;

        #[inline]
        pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
            Model { _tab: table }
        }
        #[allow(unused_mut)]
        pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
            _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
            args: &'args ModelArgs<'args>,
        ) -> flatbuffers::WIPOffset<Model<'bldr>> {
            let mut builder = ModelBuilder::new(_fbb);
            if let Some(x) = args.signature_defs {
                builder.add_signature_defs(x);
            }
            if let Some(x) = args.metadata {
                builder.add_metadata(x);
            }
            if let Some(x) = args.metadata_buffer {
                builder.add_metadata_buffer(x);
            }
            if let Some(x) = args.buffers {
                builder.add_buffers(x);
            }
            if let Some(x) = args.description {
                builder.add_description(x);
            }
            if let Some(x) = args.subgraphs {
                builder.add_subgraphs(x);
            }
            if let Some(x) = args.operator_codes {
                builder.add_operator_codes(x);
            }
            builder.add_version(args.version);
            builder.finish()
        }

        #[inline]
        pub fn version(&self) -> u32 {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe { self._tab.get::<u32>(Model::VT_VERSION, Some(0)).unwrap() }
        }
        #[inline]
        pub fn operator_codes(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<OperatorCode<'a>>>>
        {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<OperatorCode>>,
                >>(Model::VT_OPERATOR_CODES, None)
            }
        }
        #[inline]
        pub fn subgraphs(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<SubGraph<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<SubGraph>>,
                >>(Model::VT_SUBGRAPHS, None)
            }
        }
        #[inline]
        pub fn description(&self) -> Option<&'a str> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<&str>>(Model::VT_DESCRIPTION, None)
            }
        }
        #[inline]
        pub fn buffers(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Buffer<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Buffer>>,
                >>(Model::VT_BUFFERS, None)
            }
        }
        #[inline]
        pub fn metadata_buffer(&self) -> Option<flatbuffers::Vector<'a, i32>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
                    Model::VT_METADATA_BUFFER,
                    None,
                )
            }
        }
        #[inline]
        pub fn metadata(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Metadata<'a>>>> {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Metadata>>,
                >>(Model::VT_METADATA, None)
            }
        }
        #[inline]
        pub fn signature_defs(
            &self,
        ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<SignatureDef<'a>>>>
        {
            // Safety:
            // Created from valid Table for this object
            // which contains a valid value in this slot
            unsafe {
                self._tab.get::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<SignatureDef>>,
                >>(Model::VT_SIGNATURE_DEFS, None)
            }
        }
    }

    impl flatbuffers::Verifiable for Model<'_> {
        #[inline]
        fn run_verifier(
            v: &mut flatbuffers::Verifier,
            pos: usize,
        ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
            use self::flatbuffers::Verifiable;
            v.visit_table(pos)?
                .visit_field::<u32>("version", Self::VT_VERSION, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<OperatorCode>>,
                >>("operator_codes", Self::VT_OPERATOR_CODES, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<SubGraph>>,
                >>("subgraphs", Self::VT_SUBGRAPHS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
                    "description",
                    Self::VT_DESCRIPTION,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Buffer>>,
                >>("buffers", Self::VT_BUFFERS, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
                    "metadata_buffer",
                    Self::VT_METADATA_BUFFER,
                    false,
                )?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Metadata>>,
                >>("metadata", Self::VT_METADATA, false)?
                .visit_field::<flatbuffers::ForwardsUOffset<
                    flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<SignatureDef>>,
                >>("signature_defs", Self::VT_SIGNATURE_DEFS, false)?
                .finish();
            Ok(())
        }
    }
    pub struct ModelArgs<'a> {
        pub version: u32,
        pub operator_codes: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<OperatorCode<'a>>>,
            >,
        >,
        pub subgraphs: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<SubGraph<'a>>>,
            >,
        >,
        pub description: Option<flatbuffers::WIPOffset<&'a str>>,
        pub buffers: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Buffer<'a>>>,
            >,
        >,
        pub metadata_buffer: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
        pub metadata: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Metadata<'a>>>,
            >,
        >,
        pub signature_defs: Option<
            flatbuffers::WIPOffset<
                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<SignatureDef<'a>>>,
            >,
        >,
    }
    impl<'a> Default for ModelArgs<'a> {
        #[inline]
        fn default() -> Self {
            ModelArgs {
                version: 0,
                operator_codes: None,
                subgraphs: None,
                description: None,
                buffers: None,
                metadata_buffer: None,
                metadata: None,
                signature_defs: None,
            }
        }
    }

    pub struct ModelBuilder<'a: 'b, 'b> {
        fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
    }
    impl<'a: 'b, 'b> ModelBuilder<'a, 'b> {
        #[inline]
        pub fn add_version(&mut self, version: u32) {
            self.fbb_.push_slot::<u32>(Model::VT_VERSION, version, 0);
        }
        #[inline]
        pub fn add_operator_codes(
            &mut self,
            operator_codes: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<OperatorCode<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Model::VT_OPERATOR_CODES,
                operator_codes,
            );
        }
        #[inline]
        pub fn add_subgraphs(
            &mut self,
            subgraphs: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<SubGraph<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Model::VT_SUBGRAPHS, subgraphs);
        }
        #[inline]
        pub fn add_description(&mut self, description: flatbuffers::WIPOffset<&'b str>) {
            self.fbb_
                .push_slot_always::<flatbuffers::WIPOffset<_>>(Model::VT_DESCRIPTION, description);
        }
        #[inline]
        pub fn add_buffers(
            &mut self,
            buffers: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Buffer<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Model::VT_BUFFERS, buffers);
        }
        #[inline]
        pub fn add_metadata_buffer(
            &mut self,
            metadata_buffer: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Model::VT_METADATA_BUFFER,
                metadata_buffer,
            );
        }
        #[inline]
        pub fn add_metadata(
            &mut self,
            metadata: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Metadata<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Model::VT_METADATA, metadata);
        }
        #[inline]
        pub fn add_signature_defs(
            &mut self,
            signature_defs: flatbuffers::WIPOffset<
                flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<SignatureDef<'b>>>,
            >,
        ) {
            self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
                Model::VT_SIGNATURE_DEFS,
                signature_defs,
            );
        }
        #[inline]
        pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ModelBuilder<'a, 'b> {
            let start = _fbb.start_table();
            ModelBuilder { fbb_: _fbb, start_: start }
        }
        #[inline]
        pub fn finish(self) -> flatbuffers::WIPOffset<Model<'a>> {
            let o = self.fbb_.end_table(self.start_);
            flatbuffers::WIPOffset::new(o.value())
        }
    }

    impl core::fmt::Debug for Model<'_> {
        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
            let mut ds = f.debug_struct("Model");
            ds.field("version", &self.version());
            ds.field("operator_codes", &self.operator_codes());
            ds.field("subgraphs", &self.subgraphs());
            ds.field("description", &self.description());
            ds.field("buffers", &self.buffers());
            ds.field("metadata_buffer", &self.metadata_buffer());
            ds.field("metadata", &self.metadata());
            ds.field("signature_defs", &self.signature_defs());
            ds.finish()
        }
    }
    #[inline]
    /// Verifies that a buffer of bytes contains a `Model`
    /// and returns it.
    /// Note that verification is still experimental and may not
    /// catch every error, or be maximally performant. For the
    /// previous, unchecked, behavior use
    /// `root_as_model_unchecked`.
    pub fn root_as_model(buf: &[u8]) -> Result<Model, flatbuffers::InvalidFlatbuffer> {
        flatbuffers::root::<Model>(buf)
    }
    #[inline]
    /// Verifies that a buffer of bytes contains a size prefixed
    /// `Model` and returns it.
    /// Note that verification is still experimental and may not
    /// catch every error, or be maximally performant. For the
    /// previous, unchecked, behavior use
    /// `size_prefixed_root_as_model_unchecked`.
    pub fn size_prefixed_root_as_model(
        buf: &[u8],
    ) -> Result<Model, flatbuffers::InvalidFlatbuffer> {
        flatbuffers::size_prefixed_root::<Model>(buf)
    }
    #[inline]
    /// Verifies, with the given options, that a buffer of bytes
    /// contains a `Model` and returns it.
    /// Note that verification is still experimental and may not
    /// catch every error, or be maximally performant. For the
    /// previous, unchecked, behavior use
    /// `root_as_model_unchecked`.
    pub fn root_as_model_with_opts<'b, 'o>(
        opts: &'o flatbuffers::VerifierOptions,
        buf: &'b [u8],
    ) -> Result<Model<'b>, flatbuffers::InvalidFlatbuffer> {
        flatbuffers::root_with_opts::<Model<'b>>(opts, buf)
    }
    #[inline]
    /// Verifies, with the given verifier options, that a buffer of
    /// bytes contains a size prefixed `Model` and returns
    /// it. Note that verification is still experimental and may not
    /// catch every error, or be maximally performant. For the
    /// previous, unchecked, behavior use
    /// `root_as_model_unchecked`.
    pub fn size_prefixed_root_as_model_with_opts<'b, 'o>(
        opts: &'o flatbuffers::VerifierOptions,
        buf: &'b [u8],
    ) -> Result<Model<'b>, flatbuffers::InvalidFlatbuffer> {
        flatbuffers::size_prefixed_root_with_opts::<Model<'b>>(opts, buf)
    }
    #[inline]
    /// Assumes, without verification, that a buffer of bytes contains a Model and returns it.
    /// # Safety
    /// Callers must trust the given bytes do indeed contain a valid `Model`.
    pub unsafe fn root_as_model_unchecked(buf: &[u8]) -> Model {
        flatbuffers::root_unchecked::<Model>(buf)
    }
    #[inline]
    /// Assumes, without verification, that a buffer of bytes contains a size prefixed Model and returns it.
    /// # Safety
    /// Callers must trust the given bytes do indeed contain a valid size prefixed `Model`.
    pub unsafe fn size_prefixed_root_as_model_unchecked(buf: &[u8]) -> Model {
        flatbuffers::size_prefixed_root_unchecked::<Model>(buf)
    }
    pub const MODEL_IDENTIFIER: &str = "TFL3";

    #[inline]
    pub fn model_buffer_has_identifier(buf: &[u8]) -> bool {
        flatbuffers::buffer_has_identifier(buf, MODEL_IDENTIFIER, false)
    }

    #[inline]
    pub fn model_size_prefixed_buffer_has_identifier(buf: &[u8]) -> bool {
        flatbuffers::buffer_has_identifier(buf, MODEL_IDENTIFIER, true)
    }

    pub const MODEL_EXTENSION: &str = "tflite";

    #[inline]
    pub fn finish_model_buffer<'a, 'b>(
        fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        root: flatbuffers::WIPOffset<Model<'a>>,
    ) {
        fbb.finish(root, Some(MODEL_IDENTIFIER));
    }

    #[inline]
    pub fn finish_size_prefixed_model_buffer<'a, 'b>(
        fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
        root: flatbuffers::WIPOffset<Model<'a>>,
    ) {
        fbb.finish_size_prefixed(root, Some(MODEL_IDENTIFIER));
    }
} // pub mod tflite


================================================
FILE: transformers/Cargo.toml
================================================
[package]
name = "tract-transformers"
version = "0.23.0-pre"
license = "MIT OR Apache-2.0"
authors = ["Mathieu Poumeyrol <kali@zoy.org>", "Louis Chouraki <louis.chouraki@sonos.com>"]
description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference"
repository = "https://github.com/snipsco/tract"
keywords = [ "TensorFlow", "NeuralNetworks", "Transformers" ]
categories = [ "science" ]
autobenches = false
edition = "2024"
rust-version.workspace = true

[badges]
maintenance = { status = "actively-developed" }

[dependencies]
float-ord.workspace = true
tract-nnef.workspace = true


================================================
FILE: transformers/src/lib.rs
================================================
pub mod ops;
mod rewriter;
use std::collections::HashSet;

use rewriter::*;
use tract_nnef::internal::*;

register_simple_model_transform!("detect_apply_rope", ApplyRopeTransform);
register_simple_model_transform!("detect_scaled_masked_softmax", ScaledMaskedSoftmaxTransform);
register_simple_model_transform!("detect_kv_cache", KeyValueCacheTransform);
register_simple_model_transform!(
    "detect_sdpa_kv_cache_broadcast",
    SdpaFuseKvCacheBroadcastTransform
);
register_simple_model_transform!("unfold_kv_cache", UnfoldKeyValueCacheTransform);
register_simple_model_transform!("transformers_detect_all", TransformersTransform);

pub fn register(registry: &mut Registry) {
    ops::apply_rope::register(registry);
    ops::scaled_masked_softmax::register(registry);
    ops::sdpa::register(registry);
    ops::dyn_kv_cache::register(registry);
}

pub trait WithTractTransformers {
    fn enable_tract_transformers(&mut self);
    fn with_tract_transformers(self) -> Self;
}

impl WithTractTransformers for tract_nnef::framework::Nnef {
    fn enable_tract_transformers(&mut self) {
        self.enable_tract_core();
        self.registries.push(tract_transformers_registry());
    }

    fn with_tract_transformers(mut self) -> Self {
        self.enable_tract_transformers();
        self
    }
}

pub fn tract_transformers_registry() -> Registry {
    let mut reg = Registry::new("tract_transformers")
        .with_doc("Extension `tract_transformers` extends NNEF with operators")
        .with_doc("for transformer networks.")
        .with_doc("")
        .with_doc("Add `extension tract_transformers` to `graph.nnef`");

    register(&mut reg);
    reg
}

pub fn figure_out_causal_llm_b_s_p(
    model: &TypedModel,
) -> TractResult<(Option<Symbol>, Option<Symbol>, Option<Symbol>)> {
    // expectations:
    // - one input is for tokens, so integer dt (i64 ?) and typically of shape S or 1,S, or B,S
    // - other inputs are kv cache, some kind of float. shape features both S and P, and B if B is present in tokens
    let token_input = model
        .inputs
        .iter()
        .position(|i| model.outlet_fact(*i).unwrap().datum_type.is_integer())
        .context("No token input found")?;
    let tokens_symbols = model.input_fact(token_input)?.shape.volume().symbols();
    let kv_symbols = if let Some(kv_input) =
        model.inputs.iter().position(|i| model.outlet_fact(*i).unwrap().datum_type.is_float())
    {
        model.input_fact(kv_input)?.shape.volume().symbols()
    } else {
        // Look for KVCache Op
        let dummy_session_state = TurnState::default();
        let mut symbols = HashSet::new();
        for node in &model.nodes {
            if let Some((_, fact)) =
                node.op.state(&dummy_session_state, 0)?.and_then(|state| state.init_tensor_fact())
            {
                symbols = fact.shape.volume().symbols();
                break;
            }
        }
        symbols
    };

    let b = tokens_symbols.intersection(&kv_symbols).cloned().collect::<HashSet<_>>();
    let s = tokens_symbols.difference(&b).cloned().collect::<HashSet<_>>();
    let p = kv_symbols.difference(&b).cloned().collect::<HashSet<_>>();
    Ok((b.into_iter().next(), s.into_iter().next(), p.into_iter().next()))
}

pub fn memory_arena_hints_for_causal_llm(model: &TypedModel) -> TractResult<SymbolValues> {
    let (b, s, p) = figure_out_causal_llm_b_s_p(model)?;
    let mut values = SymbolValues::default()
        .with(&s.context("Could not determine sequence_len (S)")?, 1024)
        .with(&p.context("Could not determine past_sequence_len (P)")?, 0);
    if let Some(b) = b {
        values = values.with(&b, 1);
    }
    Ok(values)
}


================================================
FILE: transformers/src/ops/apply_rope.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_core::ops::array::{Slice, TypedConcat};
use tract_nnef::tract_core::ops::binary::BinMiniOp;
use tract_nnef::tract_core::ops::binary::TypedBinOp;
use tract_nnef::tract_core::ops::element_wise::ElementWiseOp;
use tract_nnef::tract_core::ops::math::{Add, Mul, Neg};

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_apply_rope);
    registry.register_primitive(
        "tract_transformers_apply_rope",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("cos"),
            TypeName::Scalar.tensor().named("sin"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_apply_rope,
    );
}

fn de_apply_rope(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let cos = invocation.named_arg_as(builder, "cos")?;
    let sin = invocation.named_arg_as(builder, "sin")?;
    builder.wire(ApplyRope, &[input, cos, sin])
}

fn ser_apply_rope(
    ast: &mut IntoAst,
    node: &TypedNode,
    _op: &ApplyRope,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let cos: Arc<RValue> = ast.mapping[&node.inputs[1]].clone();
    let sin: Arc<RValue> = ast.mapping[&node.inputs[2]].clone();
    Ok(Some(invocation("tract_transformers_apply_rope", &[input, cos, sin], &[])))
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct RotateHalf;

impl Op for RotateHalf {
    fn name(&self) -> StaticName {
        "RotateHalf".to_string().into()
    }
    op_as_typed_op!();
}

impl EvalOp for RotateHalf {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        let shape: TVec<_> = input.shape().into();
        let mut tensor = Tensor::zero_dt(input.datum_type(), &shape)?;

        let axis = shape.len() - 1;
        ensure!(
            shape[axis] % 2 == 0,
            "RotateHalf possible only if the most inner dimension of the shape {:?} is divible by 2",
            shape
        );
        let half = shape[axis] / 2;
        unsafe { tensor.assign_slice_unchecked(0..half, &input, half.., axis) };
        Neg {}.eval_in_place(&mut tensor, None)?;
        unsafe { tensor.assign_slice_unchecked(half.., &input, 0..half, axis) };
        Ok(tvec![tensor.into()])
    }
}

impl TypedOp for RotateHalf {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let dt = inputs[0].datum_type;
        let fact = dt.fact(inputs[0].shape.clone());
        Ok(tvec!(fact))
    }

    as_op!();
}

/// Search pattern:
/// Y = Concat(Neg(Slice(X, X.shape[-1]/2.., -1)), Slice(X, ..X.shape[-1]/2, -1))
pub fn rotate_half_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &TypedConcat,
) -> TractResult<Option<TypedModelPatch>> {
    let out_fact = model.node_output_facts(node.id)?[0];
    let dt = out_fact.datum_type;
    rule_if!(dt.is_float() || dt.is_integer());
    rule_if!(op.axis == out_fact.rank() - 1);

    let in_concat = model.previous_nodes(node);
    rule_if!(in_concat.len() == 2);

    let neg_half = in_concat[0];
    rule_if_some!(neg_half_op = neg_half.op_as::<ElementWiseOp>());
    rule_if!(neg_half_op.0.is::<Neg>());

    rule_if_some!(neg_half_slice = model.previous_node(neg_half));
    rule_if_some!(neg_half_slice_op = neg_half_slice.op_as::<Slice>());

    rule_if!(neg_half_slice_op.axis == op.axis);

    let pos_half = in_concat[1];
    rule_if_some!(pos_half_op = pos_half.op_as::<Slice>());

    rule_if!(pos_half_op.axis == op.axis);
    rule_if!(pos_half_op.end == neg_half_slice_op.start);
    rule_if!(neg_half_slice_op.end == out_fact.shape[op.axis].clone());

    // Ensure it is a half rotation
    rule_if_some!(pos_half_slice_end = pos_half_op.end.as_i64());
    rule_if_some!(concatenated_last_dim = out_fact.shape[op.axis].as_i64());
    rule_if!(pos_half_slice_end * 2 == concatenated_last_dim);

    let in_fact = model.node_input_facts(neg_half_slice.id)?[0];

    let mut patch = TypedModelPatch::default();
    let mut inputs = patch.taps(model, &neg_half_slice.inputs)?;

    if pos_half_op.start != 0.into() || neg_half_slice_op.end != in_fact.shape[op.axis] {
        inputs = patch.wire_node(
            format!("{node_name}.rotate_half.slice"),
            Slice {
                start: pos_half_op.start.clone(),
                end: neg_half_slice_op.end.clone(),
                axis: op.axis,
            },
            &inputs,
        )?;
    }

    let out = patch.wire_node(format!("{node_name}.rotate_half"), RotateHalf, &inputs)?;
    patch.shunt_outside(model, node.id.into(), out[0])?;

    Ok(Some(patch))
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct ApplyRope;

impl ApplyRope {
    pub fn is_supported_dt(dt: DatumType) -> bool {
        matches!(dt, DatumType::F32 | DatumType::F16)
    }
}

impl Op for ApplyRope {
    fn name(&self) -> StaticName {
        "ApplyRope".to_string().into()
    }
    op_as_typed_op!();
}

impl EvalOp for ApplyRope {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, cos, sin) = args_3!(inputs);
        let rotated_input = args_1!(RotateHalf.eval(tvec![input.clone()])?);
        let mul_with_cos = Mul.eval(input.clone(), cos, input.datum_type())?;
        let mul_with_sin = Mul.eval(rotated_input, sin, input.datum_type())?;
        let output = Add.eval(mul_with_cos.into(), mul_with_sin.into(), input.datum_type())?;
        Ok(tvec![output.into()])
    }
}

impl TypedOp for ApplyRope {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let dt = inputs[0].datum_type;
        let fact = dt.fact(inputs[0].shape.clone());
        Ok(tvec!(fact))
    }

    as_op!();
}

/// Search pattern:
/// Y = X * Cos + RotateHalf(X) * Sin
pub fn apply_rope_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &TypedBinOp,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if!(op.0.is::<Add>());

    let in_add = model.previous_nodes(node);
    rule_if!(in_add.len() == 2);

    let cos_mul = in_add[0];
    rule_if_let!(Some(cos_mul_op) = cos_mul.op_as::<TypedBinOp>());
    rule_if!(cos_mul_op.0.is::<Mul>());

    let sin_mul = in_add[1];
    rule_if_let!(Some(sin_mul_op) = sin_mul.op_as::<TypedBinOp>());
    rule_if!(sin_mul_op.0.is::<Mul>());

    rule_if_let!(
        Some((rotate_half_in_idx, rotate_half)) = model.single_prev_node_as::<RotateHalf>(sin_mul)
    );

    // If cos and rotate half don't share the same input, we check if they don't
    // input node that are the same.
    let (apply_rope_in, cos) = if !cos_mul.inputs.contains(&rotate_half.inputs[0]) {
        let Some(rotate_half_prev) = model.previous_node(rotate_half) else { return Ok(None) };
        let Some((cos_common_input_idx, _)) = model
            .previous_nodes(cos_mul)
            .iter()
            .enumerate()
            .find(|(_, n)| n.same_as(rotate_half_prev))
        else {
            return Ok(None);
        };
        (rotate_half.inputs[0], cos_mul.inputs[1 - cos_common_input_idx])
    } else {
        let apply_rope_in = rotate_half.inputs[0];
        let cos =
            if cos_mul.inputs[0] == apply_rope_in { cos_mul.inputs[1] } else { cos_mul.inputs[0] };
        (apply_rope_in, cos)
    };

    let sin = sin_mul.inputs[1 - rotate_half_in_idx];

    rule_if!(ApplyRope::is_supported_dt(model.outlet_fact(apply_rope_in)?.datum_type));
    rule_if!(ApplyRope::is_supported_dt(model.outlet_fact(cos)?.datum_type));
    rule_if!(ApplyRope::is_supported_dt(model.outlet_fact(sin)?.datum_type));

    let mut patch = TypedModelPatch::default();
    let input = patch.tap_model(model, apply_rope_in)?;
    let cos = patch.tap_model(model, cos)?;
    let sin = patch.tap_model(model, sin)?;
    let out = patch.wire_node(format!("{node_name}.apply_rope"), ApplyRope, &[input, cos, sin])?;
    patch.shunt_outside(model, node.id.into(), out[0])?;

    Ok(Some(patch))
}

#[cfg(test)]
mod tests {
    use super::*;
    use tract_nnef::tract_core::ops::math::Neg;
    use tract_num_traits::AsPrimitive;
    use tract_num_traits::Zero;

    fn run_test_case<F: Datum + Zero + Copy>(a_shape: &[usize]) -> TractResult<()>
    where
        usize: AsPrimitive<F>,
    {
        let a_len = a_shape.iter().product::<usize>();
        let input = Tensor::from_shape(a_shape, &(0..a_len).map(|f| f.as_()).collect::<Vec<F>>())?;
        let rotated = RotateHalf.eval(tvec![input.clone().into()])?;
        let mut back = args_1!(RotateHalf.eval(rotated)?).into_tensor();
        Neg {}.eval_in_place(&mut back, None)?;
        back.close_enough(&input, Approximation::Close)?;
        Ok(())
    }

    #[test]
    fn test_rotate_half() -> TractResult<()> {
        run_test_case::<f32>(&[2, 2])?;
        run_test_case::<f32>(&[512, 512])?;
        run_test_case::<f32>(&[10, 512, 1024])?;

        Ok(())
    }
}


================================================
FILE: transformers/src/ops/dyn_kv_cache.rs
================================================
use std::str::FromStr;

use tract_nnef::internal::*;
use tract_nnef::prelude::tract_itertools::Itertools;
use tract_nnef::ser::{datum_type, tdims};
use tract_nnef::tract_core::ops::OpStateFreeze;
use tract_nnef::tract_core::ops::array::TypedConcat;
use tract_nnef::tract_core::ops::source::TypedSource;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_dyn_kv_cache);
    registry.register_primitive(
        "tract_transformers_dyn_kv_cache",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::String.named("name"),
            TypeName::Integer.named("axis"),
            TypeName::String.named("datum_type"),
            TypeName::Integer.array().named("past_sequence_shape"),
            TypeName::Integer.array().named("input_sequence_shape"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_dyn_kv_cache,
    );
}

fn ser_dyn_kv_cache(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &DynKeyValueCache,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    Ok(Some(invocation(
        "tract_transformers_dyn_kv_cache",
        &[input],
        &[
            ("name", string(&op.name)),
            ("axis", numeric(op.axis)),
            ("datum_type", datum_type(op.past_sequence_fact.datum_type)),
            ("past_sequence_shape", tdims(op.past_sequence_fact.shape.dims())),
            ("input_sequence_shape", tdims(op.input_sequence_fact.shape.dims())),
        ],
    )))
}

fn de_dyn_kv_cache(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let name: String = invocation.named_arg_as(builder, "name")?;
    let axis: usize = invocation.named_arg_as(builder, "axis")?;
    let dt = DatumType::from_str(&invocation.named_arg_as::<String>(builder, "datum_type")?)?;
    let past_sequence_shape: TVec<TDim> = builder
        .allowing_new_symbols(|builder| invocation.named_arg_as(builder, "past_sequence_shape"))?;
    let input_sequence_shape: TVec<TDim> = builder
        .allowing_new_symbols(|builder| invocation.named_arg_as(builder, "input_sequence_shape"))?;
    builder.wire(
        DynKeyValueCache {
            name,
            axis,
            past_sequence_fact: dt.fact(&*past_sequence_shape),
            input_sequence_fact: dt.fact(&*input_sequence_shape),
        },
        &[input],
    )
}

#[derive(Debug, Clone)]
pub struct DynKeyValueCacheState {
    name: String,
    axis: usize,
    past_sequence_fact: TypedFact,
    kv_cache: Option<TValue>,
}

impl DynKeyValueCacheState {
    pub fn resolve_symbols(
        state: &mut TurnState,
        fact: TypedFact,
        concrete_shape: Option<&[usize]>,
    ) -> TractResult<()> {
        let unresolved = fact
            .shape
            .iter()
            .enumerate()
            .filter_map(|(ax, symb)| match symb {
                TDim::Sym(s) if state.resolved_symbols.get(s).is_none() => Some((ax, s)),
                _ => None,
            })
            .collect_vec();

        if unresolved.is_empty() {
            return Ok(());
        }

        ensure!(unresolved.len() == 1);
        let (ax, sym) = unresolved[0];
        if let Some(shape) = concrete_shape {
            ensure!(ax < shape.len());
            state.resolved_symbols.set(sym, shape[ax] as i64);
        } else {
            state.resolved_symbols.set(sym, 0);
        }

        if state.scenario.is_none() {
            state.scenario = sym.scope().unwrap().guess_scenario(&state.resolved_symbols)?;
        }
        Ok(())
    }

    pub fn truncate(&mut self, len: usize) -> TractResult<()> {
        if let Some(t) = self.kv_cache.as_mut() {
            *t = t.slice(self.axis, 0, len)?.into_tvalue();
        } else {
            bail!("Can not truncate a zero-len kv-cache value");
        }
        Ok(())
    }
}

impl OpState for DynKeyValueCacheState {
    fn load_from(
        &mut self,
        state: &mut TurnState,
        states: &mut dyn Iterator<Item = tract_nnef::prelude::TValue>,
    ) -> TractResult<()> {
        // KV Cache fact is always at index 0
        let kv_cache_init = states.next().context("Not enough state initializers")?;
        Self::resolve_symbols(state, self.past_sequence_fact.clone(), Some(kv_cache_init.shape()))?;
        self.kv_cache = Some(kv_cache_init.clone());

        Ok(())
    }

    fn save_to(&self, states: &mut Vec<TValue>) -> TractResult<()> {
        if let Some(kv_cache) = &self.kv_cache {
            states.push(kv_cache.clone());
            Ok(())
        } else {
            bail!("KV cache {} was never initialized", self.name)
        }
    }

    fn init_tensor_fact(&self) -> Option<(String, TypedFact)> {
        Some((self.name.clone(), self.past_sequence_fact.clone()))
    }

    fn resolve_symbols(&mut self, state: &mut TurnState) -> TractResult<()> {
        let shape = self.kv_cache.as_ref().map(|kv_cache| kv_cache.shape());
        Self::resolve_symbols(state, self.past_sequence_fact.clone(), shape)
    }

    fn eval(
        &mut self,
        _state: &mut TurnState,
        _op: &dyn Op,
        inputs: TVec<TValue>,
    ) -> TractResult<TVec<TValue>> {
        let input = args_1!(inputs);
        // build output
        let output = if let Some(curr) = self.kv_cache.take() {
            TypedConcat { axis: self.axis }.eval(tvec![curr, input])?.remove(0)
        } else {
            input
        };
        self.kv_cache = Some(output.clone());

        Ok(tvec!(output))
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct DynKeyValueCache {
    pub name: String,
    pub axis: usize,
    pub past_sequence_fact: TypedFact,
    pub input_sequence_fact: TypedFact,
}

impl Op for DynKeyValueCache {
    fn name(&self) -> StaticName {
        "DynamicKeyValueCache".to_string().into()
    }

    op_as_typed_op!();
}

impl EvalOp for DynKeyValueCache {
    fn is_stateless(&self) -> bool {
        false
    }

    fn state(
        &self,
        _session: &TurnState,
        _node_id: usize,
    ) -> TractResult<Option<Box<dyn OpState>>> {
        Ok(Some(Box::new(DynKeyValueCacheState {
            name: self.name.clone(),
            axis: self.axis,
            past_sequence_fact: self.past_sequence_fact.clone(),
            kv_cache: None,
        })))
    }
}

impl TypedOp for DynKeyValueCache {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(inputs.len() == 1);
        let input = inputs[0];
        let mut fact = input.without_value();

        fact.shape.set(
            self.axis,
            self.past_sequence_fact.shape.dims()[self.axis].clone()
                + self.input_sequence_fact.shape.dims()[self.axis].clone(),
        );
        Ok(tvec!(fact))
    }

    fn cost(&self, _inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let token_volume = self
            .past_sequence_fact
            .shape
            .iter()
            .enumerate()
            .filter(|(axis, _d)| *axis != self.axis)
            .map(|(_axis, d)| d)
            .product::<TDim>();
        Ok(tvec!((Cost::Custom(false, "KVCacheValuesPerToken".to_string()), token_volume)))
    }

    as_op!();
}

#[derive(Debug, Clone)]
pub struct FrozenDynKeyValueCacheState {
    name: String,
    axis: usize,
    past_sequence_fact: TypedFact,
    kv_cache: Option<Tensor>,
}

impl OpStateFreeze for DynKeyValueCacheState {
    fn freeze(&self) -> Box<dyn FrozenOpState> {
        Box::new(FrozenDynKeyValueCacheState {
            name: self.name.clone(),
            axis: self.axis,
            past_sequence_fact: self.past_sequence_fact.clone(),
            kv_cache: self.kv_cache.clone().map(|t| t.into_tensor()),
        })
    }
}

impl FrozenOpState for FrozenDynKeyValueCacheState {
    fn unfreeze(&self) -> Box<dyn OpState> {
        Box::new(DynKeyValueCacheState {
            axis: self.axis,
            name: self.name.clone(),
            past_sequence_fact: self.past_sequence_fact.clone(),
            kv_cache: self.kv_cache.clone().map(|t| t.into_tvalue()),
        })
    }
}

/// Reverse of `replace_kv_cache`: replaces a DynKeyValueCache node with Source + Concat,
/// restoring KV cache state as explicit model inputs and outputs.
pub fn unfold_kv_cache(target: &mut TypedModel, kv_node_id: usize) -> TractResult<()> {
    let node = target.node(kv_node_id);
    let op = node.op_as::<DynKeyValueCache>().context("Not a DynKeyValueCache node")?;
    let name = op.name.clone();
    let axis = op.axis;
    let past_fact = op.past_sequence_fact.clone();
    let input_fact = op.input_sequence_fact.clone();
    let existing_input = node.inputs[0];

    // Add a new Source node for the past KV cache
    let source_outlet = target.add_source(&name, past_fact)?;

    // Compute output fact for the Concat
    let mut output_fact = input_fact.clone();
    output_fact.shape.set(
        axis,
        target.outlet_fact(source_outlet)?.shape.dims()[axis].clone()
            + input_fact.shape.dims()[axis].clone(),
    );

    // Replace DynKeyValueCache op with TypedConcat
    let kv_node = target.node_mut(kv_node_id);
    kv_node.name = format!("{name}_concat");
    kv_node.op = Box::new(TypedConcat { axis });
    kv_node.outputs[0].fact = output_fact;

    // Rewire: Concat takes [source, existing_input] as inputs
    // Currently the node has [existing_input] at slot 0
    // We need [source_outlet, existing_input] at slots [0, 1]
    kv_node.inputs = vec![source_outlet, existing_input];

    // Update successor info on the source node
    target.nodes[source_outlet.node].outputs[source_outlet.slot]
        .successors
        .push(InletId::new(kv_node_id, 0));

    // Update the existing input's successor slot from 0 to 1
    target.nodes[existing_input.node].outputs[existing_input.slot].successors.iter_mut().for_each(
        |succ| {
            if succ.node == kv_node_id && succ.slot == 0 {
                succ.slot = 1;
            }
        },
    );

    // Add the Concat output to model outputs and label it so runtimes preserve the name
    let concat_outlet = OutletId::new(kv_node_id, 0);
    target.outputs.push(concat_outlet);
    target.set_outlet_label(concat_outlet, format!("{name}_concat"))?;

    Ok(())
}

/// Search pattern => Input -> Concat -> Output
/// Return type is for using rule-ensure macro
pub fn replace_kv_cache(target: &mut TypedModel, source_node_id: usize) -> TractResult<Option<()>> {
    assert!(target.node(source_node_id).op_is::<TypedSource>());
    let (concat_node_id, non_source_input_id, axis, input_facts) = {
        rule_if_some!(concat_node = target.next_node(target.node(source_node_id)));

        // Check KV Cache Pattern
        rule_if!(
            concat_node.op_is::<TypedConcat>()
                && concat_node.inputs.len() == 2
                && concat_node.outputs.len() == 1
                && target.outputs.contains(&concat_node.id.into())
        );

        let concat_in_facts = target.node_input_facts(concat_node.id)?;

        // Check on shapes
        let concat_in_shapes = [concat_in_facts[0].shape.dims(), concat_in_facts[1].shape.dims()];
        let rank = concat_in_shapes[0].len();
        let axes = (0..rank)
            .filter(|ax| concat_in_shapes[0][*ax] != concat_in_shapes[1][*ax])
            .collect_vec();
        ensure!(axes.len() == 1);

        let axis = axes[0];
        rule_if!(
            matches!(concat_in_shapes[0][axis], TDim::Sym(_))
                && matches!(concat_in_shapes[1][axis], TDim::Sym(_))
        );
        let mut facts = [concat_in_facts[0].clone(), concat_in_facts[1].clone()];
        if concat_node.inputs[0].node == source_node_id {
            (concat_node.id, concat_node.inputs[1].node, axis, facts)
        } else if concat_node.inputs[1].node == source_node_id {
            facts.swap(0, 1);
            (concat_node.id, concat_node.inputs[0].node, axis, facts)
        } else {
            return Ok(None);
        }
    };

    {
        // Replace Concat by KVCache
        let name = target.node_names().collect_vec()[source_node_id].to_string();
        let concat_node = target.node_mut(concat_node_id);
        concat_node.op = Box::new(DynKeyValueCache {
            name: name.clone(),
            axis,
            past_sequence_fact: input_facts[0].clone(),
            input_sequence_fact: input_facts[1].clone(),
        });
        concat_node.name = name;
        concat_node.inputs.retain(|input| input != &source_node_id.into());
    }

    {
        // Replace Source by Dummy Op for it to be cleaned later
        let dummy_op = target.create_dummy();
        let source_node = target.node_mut(source_node_id);
        source_node.outputs[0].successors.clear();
        source_node.op = dummy_op;
    }
    {
        // Non-source input is usually the second input of Concat. Rewire it to the only input of the new KVCache Op
        let non_source_input = target.node_mut(non_source_input_id);
        non_source_input.outputs.iter_mut().for_each(|output| {
            output.successors.iter_mut().for_each(|succ| {
                if succ.node == concat_node_id {
                    succ.slot = 0
                }
            })
        });
    }

    // Clean model I/Os
    target.outputs.retain(|output| output.node != concat_node_id);
    target.inputs.retain(|input| input.node != source_node_id);
    target.outlet_labels.remove(&concat_node_id.into());
    Ok(None)
}

#[cfg(test)]
mod tests {
    use super::*;
    use tract_num_traits::AsPrimitive;
    use tract_num_traits::Zero;

    fn run_test_case<F: Datum + Zero + Copy>(
        input_shapes: &[Vec<usize>],
        axis: usize,
    ) -> TractResult<()>
    where
        usize: AsPrimitive<F>,
    {
        let first_shape = &input_shapes[0];
        ensure!(input_shapes.iter().all(|shape| (shape.len() == first_shape.len())
            && (shape[..axis] == first_shape[..axis])
            && (if axis != (shape.len() - 1) {
                shape[(axis + 1)..] == first_shape[(axis + 1)..]
            } else {
                true
            })));

        let op_name = "test".to_string();
        let dummy_model = TypedModel::default();

        let make_shape =
            |sym: &str| {
                input_shapes[0]
                    .iter()
                    .enumerate()
                    .map(|(i, &dim)| {
                        if i == axis {
                            TDim::Sym(dummy_model.sym(sym))
                        } else {
                            TDim::Val(dim as _)
                        }
                    })
                    .collect::<TVec<TDim>>()
            };

        let past_shape = make_shape("P");
        let input_shape = make_shape("S");

        let op = DynKeyValueCache {
            name: op_name.clone(),
            past_sequence_fact: TypedFact::dt_shape(F::datum_type(), past_shape),
            input_sequence_fact: TypedFact::dt_shape(F::datum_type(), input_shape),
            axis,
        };

        let mut session_state = TurnState::default();
        let mut state = op.state(&mut session_state, 0)?.unwrap();

        let mut inputs = tvec![];

        // Init state with first shape
        let shape = &input_shapes[0];
        let len = shape.iter().product::<usize>();
        let input = Tensor::from_shape(shape, &(0..len).map(|f| f.as_()).collect::<Vec<F>>())?;
        inputs.push(input.clone().into_tvalue());

        let mut state_initializers = vec![input.into()].into_iter();

        state.load_from(&mut session_state, &mut state_initializers)?;

        for shape in input_shapes {
            let len = shape.iter().product::<usize>();
            let input = Tensor::from_shape(&shape, &(0..len).map(|f| f.as_()).collect::<Vec<F>>())?;
            inputs.push(input.clone().into_tvalue());
            state.eval(&mut session_state, &op, tvec!(input.clone().into()))?[0]
                .clone()
                .into_tensor();
        }

        let mut curr_states = vec![];
        state.save_to(&mut curr_states)?;
        let output = curr_states.remove(0);

        let reference = &TypedConcat { axis }.eval(inputs)?[0];
        output.close_enough(&reference.clone().into_tensor(), Approximation::Close)?;
        Ok(())
    }

    #[test]
    fn test_dyn_kv_cache() -> TractResult<()> {
        run_test_case::<f32>(&[vec![2, 2]], 0)?;
        run_test_case::<f32>(&[vec![2, 2], vec![4, 2]], 0)?;
        run_test_case::<f32>(&[vec![2, 2], vec![2, 1], vec![2, 3]], 1)?;
        Ok(())
    }

    #[test]
    fn test_unfold_kv_cache() -> TractResult<()> {
        // Build a model with DynKeyValueCache
        let mut model = TypedModel::default();
        let s = model.sym("S");
        let p = model.sym("P");

        let input_shape: TVec<TDim> = tvec![1.to_dim(), s.into(), 64.to_dim()];
        let past_shape: TVec<TDim> = tvec![1.to_dim(), p.into(), 64.to_dim()];

        let input = model.add_source("input", f32::fact(&input_shape))?;
        let op = DynKeyValueCache {
            name: "kv_cache_0".to_string(),
            axis: 1,
            past_sequence_fact: f32::fact(&past_shape),
            input_sequence_fact: f32::fact(&input_shape),
        };
        let out = model.wire_node("kv_cache", op, &[input])?;
        model.select_output_outlets(&out)?;

        // Model should have 1 input (input), 1 output (kv_cache)
        assert_eq!(model.inputs.len(), 1);
        assert_eq!(model.outputs.len(), 1);
        assert!(model.node(1).op_is::<DynKeyValueCache>());

        // Unfold
        unfold_kv_cache(&mut model, 1)?;

        // After unfold: 2 inputs (input + kv_cache_0 source), 2 outputs (original + concat)
        assert_eq!(model.inputs.len(), 2);
        assert_eq!(model.outputs.len(), 2);

        // The KV cache node should now be a Concat
        assert!(model.node(1).op_is::<TypedConcat>());
        let concat = model.node(1).op_as::<TypedConcat>().unwrap();
        assert_eq!(concat.axis, 1);

        // The new source node should exist
        let source_node_id = model.inputs[1].node;
        assert!(model.node(source_node_id).op_is::<TypedSource>());
        assert_eq!(model.node(source_node_id).name, "kv_cache_0");

        // Concat should have 2 inputs: [source, input]
        assert_eq!(model.node(1).inputs.len(), 2);
        assert_eq!(model.node(1).inputs[0].node, source_node_id);
        assert_eq!(model.node(1).inputs[1].node, 0); // original input

        Ok(())
    }

    #[test]
    fn test_fold_unfold_round_trip() -> TractResult<()> {
        use crate::rewriter::KeyValueCacheTransform;
        use tract_nnef::tract_core::transform::ModelTransform;

        // Build a model with Source + Concat (the pre-fold pattern)
        let mut model = TypedModel::default();
        let s = model.sym("S");
        let p = model.sym("P");

        let input_shape: TVec<TDim> = tvec![1.to_dim(), s.into(), 64.to_dim()];
        let past_shape: TVec<TDim> = tvec![1.to_dim(), p.into(), 64.to_dim()];

        let past = model.add_source("kv_past", f32::fact(&past_shape))?;
        let input = model.add_source("input", f32::fact(&input_shape))?;
        let concat = model.wire_node("concat", TypedConcat { axis: 1 }, &[past, input])?;
        model.select_output_outlets(&concat)?;

        let orig_input_count = model.inputs.len();
        let orig_output_count = model.outputs.len();

        // Fold: Source + Concat -> DynKeyValueCache
        KeyValueCacheTransform.transform(&mut model)?;
        assert_eq!(model.inputs.len(), orig_input_count - 1); // past source removed
        assert_eq!(model.outputs.len(), orig_output_count - 1); // concat output removed

        // Find the DynKeyValueCache node
        let kv_node_id = model.nodes().iter().find(|n| n.op_is::<DynKeyValueCache>()).unwrap().id;

        // Unfold: DynKeyValueCache -> Source + Concat
        unfold_kv_cache(&mut model, kv_node_id)?;

        // Should be back to original structure
        assert_eq!(model.inputs.len(), orig_input_count);
        assert_eq!(model.outputs.len(), orig_output_count);

        // Verify it's a Concat again
        let concat_node = model.nodes().iter().find(|n| n.op_is::<TypedConcat>()).unwrap();
        assert_eq!(concat_node.op_as::<TypedConcat>().unwrap().axis, 1);
        assert_eq!(concat_node.inputs.len(), 2);

        Ok(())
    }

    #[test]
    fn test_dyn_kv_cache_nnef_round_trip() -> TractResult<()> {
        use crate::WithTractTransformers;

        let mut model = TypedModel::default();
        let s = model.sym("S");
        let p = model.sym("P");

        let input_shape: TVec<TDim> = tvec![1.to_dim(), s.into(), 64.to_dim()];
        let past_shape: TVec<TDim> = tvec![1.to_dim(), p.into(), 64.to_dim()];

        let input = model.add_source("input", f32::fact(&input_shape))?;
        let op = DynKeyValueCache {
            name: "kv_cache_0".to_string(),
            axis: 1,
            past_sequence_fact: f32::fact(&past_shape),
            input_sequence_fact: f32::fact(&input_shape),
        };
        let out = model.wire_node("kv_cache", op, &[input])?;
        model.select_output_outlets(&out)?;

        let nnef = tract_nnef::nnef().with_tract_transformers();
        let mut buffer = vec![];
        nnef.write_to_tar(&model, &mut buffer)?;
        let reloaded = nnef.model_for_read(&mut &*buffer)?;

        assert_eq!(reloaded.nodes().len(), model.nodes().len());
        let reloaded_kv = reloaded.node(1);
        let reloaded_op = reloaded_kv.op_as::<DynKeyValueCache>().unwrap();
        assert_eq!(reloaded_op.name, "kv_cache_0");
        assert_eq!(reloaded_op.axis, 1);
        assert_eq!(reloaded_op.past_sequence_fact.datum_type, DatumType::F32);
        assert_eq!(reloaded_op.past_sequence_fact.shape.rank(), 3);
        assert_eq!(reloaded_op.input_sequence_fact.datum_type, DatumType::F32);
        assert_eq!(reloaded_op.input_sequence_fact.shape.rank(), 3);
        Ok(())
    }
}


================================================
FILE: transformers/src/ops/flash_sdpa.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::prelude::tract_itertools::Itertools;
use tract_nnef::tract_ndarray::{Array2, Array4, ArrayView2, ArrayView4, ArrayViewMut2, Ix4, s};

/// Tract operator wrapper.
#[derive(Clone, Debug, PartialEq)]
pub struct FlashSdpaOp {
    pub causal: bool,
    pub scale: Option<f32>,
}
impl Eq for FlashSdpaOp {}

impl Op for FlashSdpaOp {
    fn name(&self) -> StaticName {
        "FlashSDPA".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("causal={}, scale={:?}", self.causal, self.scale)])
    }

    op_as_typed_op!();
}

impl TypedOp for FlashSdpaOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let (q, k, v, opt_m) = match inputs.len() {
            3 => (inputs[0], inputs[1], inputs[2], None),
            4 => (inputs[0], inputs[1], inputs[2], Some(inputs[3])),
            _ => bail!("FlashSDPA expects 3 or 4 inputs (Q,K,V, optional mask), got {inputs:?}"),
        };

        // dtype checks
        ensure!(q.datum_type.is_float(), "Q must be floating point");
        ensure!(k.datum_type.is_float(), "K must be floating point");
        ensure!(v.datum_type.is_float(), "V must be floating point");
        if let Some(m) = opt_m {
            ensure!(m.datum_type.is_float(), "Mask must be floating point");
        }

        // rank checks
        ensure!(
            q.rank() == k.rank()
                && q.rank() == v.rank()
                && opt_m.as_ref().is_none_or(|m| m.rank() == q.rank()),
            "Q, K, V, mask must have the same rank, found {}, {}, {}, {:?} resp.",
            q.rank(),
            k.rank(),
            v.rank(),
            opt_m.as_ref().map(|m| m.rank().to_string())
        );
        ensure!(
            q.rank() == 3 || q.rank() == 4,
            "Inputs must be of rank 3 or 4, found {}",
            q.rank()
        );

        let one = 1.to_dim();

        let ((bq, hq, _tq, dq), (bk, hkv, tk, dk), (bv, hkv2, tk2, dv)) = if q.rank() == 4 {
            (
                (&q.shape[0], &q.shape[1], &q.shape[2], &q.shape[3]),
                (&k.shape[0], &k.shape[1], &k.shape[2], &k.shape[3]),
                (&v.shape[0], &v.shape[1], &v.shape[2], &v.shape[3]),
            )
        } else {
            (
                (&q.shape[0], &one, &q.shape[1], &q.shape[2]),
                (&k.shape[0], &one, &k.shape[1], &k.shape[2]),
                (&v.shape[0], &one, &v.shape[1], &v.shape[2]),
            )
        };

        ensure!(bq == bk && bq == bv, "Batch dims must match for Q/K/V");
        ensure!(hkv == hkv2, "K/V head counts must match");
        ensure!(tk == tk2, "K/V lengths must match");
        ensure!(dq == dk && dq == dv, "Head dims (D) must match across Q/K/V");

        // If heads are fully known, check GQA divisibility (best-effort).
        if let (Ok(hq), Ok(hkv)) = (hq.to_usize(), hkv.to_usize()) {
            ensure!(hq % hkv == 0, "num_q_heads must be a multiple of num_kv_heads for GQA");
        }

        // Output has same shape and dtype as Q.
        Ok(tvec!(q.without_value()))
    }

    as_op!();
}

impl EvalOp for FlashSdpaOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        ensure!(inputs.len() == 3 || inputs.len() == 4);
        let [q, k, v] = &inputs[0..3] else {
            bail!("Expects 3 or 4 inptus (Q, K, V, optional mask)")
        };

        let input_dt = q.datum_type();

        let (q, k, v) = (q.cast_to::<f32>()?, k.cast_to::<f32>()?, v.cast_to::<f32>()?);
        let mut q = q.to_plain_array_view::<f32>()?;
        let mut k = k.to_plain_array_view::<f32>()?;
        let mut v = v.to_plain_array_view::<f32>()?;

        let is_3d_case = q.ndim() == 3;

        if is_3d_case {
            q.insert_axis_inplace(tract_ndarray::Axis(1));
            k.insert_axis_inplace(tract_ndarray::Axis(1));
            v.insert_axis_inplace(tract_ndarray::Axis(1));
        }

        let (q, k, v) = (
            q.into_dimensionality::<Ix4>()?,
            k.into_dimensionality::<Ix4>()?,
            v.into_dimensionality::<Ix4>()?,
        );

        let (batch_size, num_q_heads, query_len, head_dim) =
            (q.shape()[0], q.shape()[1], q.shape()[2], q.shape()[3]);
        let (bk, num_kv_heads, kv_len, hd_k) =
            (k.shape()[0], k.shape()[1], k.shape()[2], k.shape()[3]);

        ensure!(batch_size == bk, "Batch mismatch between Q and K");
        ensure!(head_dim == hd_k, "Head dim mismatch between Q and K");
        ensure!(num_kv_heads == v.shape()[1], "K/V head mismatch");
        ensure!(kv_len == v.shape()[2] && head_dim == v.shape()[3], "K/V shape mismatch");

        // Views as fixed 4-D
        let q4 = q.to_shape((batch_size, num_q_heads, query_len, head_dim))?;
        let k4 = k.to_shape((batch_size, num_kv_heads, kv_len, head_dim))?;
        let v4 = v.to_shape((batch_size, num_kv_heads, kv_len, head_dim))?;

        let m = if let Some(m) = inputs.get(3) {
            Some(
                m.cast_to::<f32>()?.into_owned().into_plain_array::<f32>()?.into_shape_with_order(
                    (m.shape()[0], if m.rank() == 3 { 1 } else { m.shape()[1] }, query_len, kv_len),
                )?,
            )
        } else {
            None
        };

        let mut out = self
            .flash_attention_gqa(q4.view(), k4.view(), v4.view(), m.as_ref().map(|m| m.view()))
            .into_dyn();
        if is_3d_case {
            out.index_axis_inplace(tract_ndarray::Axis(1), 0);
        }

        Ok(tvec!(out.into_tensor().cast_to_dt(input_dt)?.into_owned().into_tvalue()))
    }
}

impl FlashSdpaOp {
    /// Flash Attention forward with Grouped-Query Attention (GQA).
    ///
    /// Shapes:
    ///   Q: (batch_size, num_q_heads, query_len, head_dim)
    ///   K: (batch_size, num_kv_heads, kv_len,   head_dim)
    ///   V: (batch_size, num_kv_heads, kv_len,   head_dim)
    /// Returns O: (batch_size, num_q_heads, query_len, head_dim)
    ///
    /// GQA mapping: each query head qh maps to a key/value head index kh = qh / group_size
    /// where group_size = num_q_heads / num_kv_heads (must divide exactly).
    pub fn flash_attention_gqa(
        &self,
        q: ArrayView4<f32>,
        k: ArrayView4<f32>,
        v: ArrayView4<f32>,
        mask: Option<ArrayView4<f32>>,
    ) -> Array4<f32> {
        // Explicit dimensions
        let (batch_size, num_q_heads, q_len, head_dim) = q.dim();
        let (_, num_kv_heads, kv_len, _) = k.dim();
        let scale = self.scale.unwrap_or((head_dim as f32).recip().sqrt());
        let group_size = num_q_heads / num_kv_heads;

        let block_kv_len = 32;
        let block_q_len = 32;

        let mut out = Array4::<f32>::zeros((batch_size, num_q_heads, q_len, head_dim));

        for b in 0..batch_size {
            let mb = b.min(mask.as_ref().map(|m| m.shape()[0] - 1).unwrap_or(0));
            for kvh in 0..num_kv_heads {
                for g in 0..group_size {
                    let qh = kvh * group_size + g;
                    let mh = qh.min(mask.as_ref().map(|m| m.shape()[1] - 1).unwrap_or(0));
                    let mut l = vec![0f32; q_len];
                    let mut m = vec![f32::NEG_INFINITY; q_len];
                    for kbix in 0..kv_len.div_ceil(block_kv_len) {
                        for qbix in 0..q_len.div_ceil(block_q_len) {
                            let kv_range =
                                (kbix * block_kv_len)..((kbix + 1) * block_kv_len).min(kv_len);
                            let q_range =
                                (qbix * block_q_len)..((qbix + 1) * block_q_len).min(q_len);
                            if let Some(mask) = &mask {
                                if mask
                                    .slice(s!(mb, mh, q_range.clone(), kv_range.clone()))
                                    .iter()
                                    .all(|x| *x < -65503.0)
                                {
                                    continue;
                                }
                            }
                            let m = &mut m[q_range.clone()];
                            let l = &mut l[q_range.clone()];
                            let qblock: ArrayView2<f32> = q.slice(s!(b, qh, q_range.clone(), ..));
                            let kblock: ArrayView2<f32> = k.slice(s!(b, kvh, kv_range.clone(), ..));
                            let vblock: ArrayView2<f32> = v.slice(s!(b, kvh, kv_range.clone(), ..));
                            let mut oblock: ArrayViewMut2<f32> =
                                out.slice_mut(s!(b, qh, q_range.clone(), ..));
                            // Sij <- QiKTj
                            let mut s = qblock.dot(&kblock.t()) * scale;
                            if let Some(mask) = &mask {
                                s += &mask.slice(s!(mb, mh, q_range.clone(), kv_range.clone()));
                            } else if self.causal {
                                let mask = Array2::from_elem(
                                    (q_range.len(), kv_range.len()),
                                    f32::NEG_INFINITY,
                                );
                                let mask = mask.triu(
                                    q_range.start as isize
                                        - kv_range.start as isize
                                        - q_len as isize
                                        + kv_len as isize
                                        + 1,
                                );
                                s += &mask;
                            };
                            let tile_m: Vec<f32> = s
                                .rows()
                                .into_iter()
                                .map(|row| {
                                    row.iter().copied().map(float_ord::FloatOrd).max().unwrap().0
                                })
                                .collect_vec();
                            for (row_ix, max) in tile_m.iter().enumerate() {
                                if max.is_finite() {
                                    s.row_mut(row_ix).iter_mut().for_each(|x| *x -= max);
                                }
                            }
                            // Sij <- exp(Sij * scale - max_of_row)
                            s.mapv_inplace(f32::exp);
                            let tile_l = s
                                .sum_axis(tract_ndarray::Axis(1))
                                .insert_axis(tract_ndarray::Axis(1));
                            // m_new = max(maxes, row_maxs)
                            let m_new =
                                (0..q_range.len()).map(|i| m[i].max(tile_m[i])).collect_vec();
                            // l_new = exp(m[i] - m_new[i]) * l[i] - exp(tile_m[i] - m_new[i]) * tile_l[i]
                            let l_new = (0..q_range.len())
                                .map(|i| {
                                    (m[i] - m_new[i]).exp() * l[i]
                                        + (tile_m[i] - m_new[i]).exp() * tile_l[(i, 0)]
                                })
                                .collect_vec();
                            for i in 0..q_range.len() {
                                let r_l_new = l_new[i].recip();
                                let mul_o = ((m[i] - m_new[i]).exp()) * l[i] * r_l_new;
                                let mul_sv = ((tile_m[i] - m_new[i]).exp()) * r_l_new;
                                for j in 0..head_dim {
                                    let sv = s.row(i).dot(&vblock.column(j));
                                    oblock[(i, j)] = oblock[(i, j)] * mul_o + sv * mul_sv;
                                }
                            }
                            l.copy_from_slice(&l_new);
                            m.copy_from_slice(&m_new);
                        }
                    }
                }
            }
        }
        out
    }
}


================================================
FILE: transformers/src/ops/mod.rs
================================================
pub mod apply_rope;
pub mod dyn_kv_cache;
pub mod flash_sdpa;
pub mod scaled_masked_softmax;
pub mod sdpa;
pub mod streamed_sdpa;

// Re-export ops that moved to core
pub mod rms_norm {
    pub use tract_nnef::tract_core::ops::nn::RmsNorm;
    pub use tract_nnef::tract_core::ops::nn::rms_norm::*;
}
pub mod silu {
    pub use tract_nnef::tract_core::ops::nn::Silu;
    pub use tract_nnef::tract_core::ops::nn::silu::*;
}
pub mod gelu_approximate {
    pub use tract_nnef::tract_core::ops::nn::GeluApproximate;
    pub use tract_nnef::tract_core::ops::nn::gelu_approximate::*;
}

pub use apply_rope::{apply_rope_rule, rotate_half_rule};
pub use dyn_kv_cache::{DynKeyValueCache, replace_kv_cache, unfold_kv_cache};
pub use scaled_masked_softmax::scaled_masked_softmax_rule;
pub use sdpa::fuse_kv_cache_broadcast_rule;


================================================
FILE: transformers/src/ops/scaled_masked_softmax.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_core::ops::binary::{BinMiniOp, TypedBinOp};
use tract_nnef::tract_core::ops::logic::Iff;
use tract_nnef::tract_core::ops::math::{Add, Mul};
use tract_nnef::tract_core::ops::nn::{Softmax, SoftmaxExp, SoftmaxKind};

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_scaled_masked_softmax);
    registry.register_primitive(
        "tract_transformers_scaled_masked_softmax",
        &[
            TypeName::Scalar.tensor().named("input"),
            TypeName::Scalar.tensor().named("mask"),
            TypeName::Scalar.named("scale"),
            TypeName::Logical.named("post_softmax_mask"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        de_scaled_masked_softmax,
    );
}

fn de_scaled_masked_softmax(
    builder: &mut ModelBuilder,
    invocation: &ResolvedInvocation,
) -> TractResult<Value> {
    let input = invocation.named_arg_as(builder, "input")?;
    let mask = invocation.named_arg_as(builder, "mask")?;
    let scale = invocation.named_arg_as(builder, "scale")?;
    let post_softmax_mask: bool =
        invocation.get_named_arg_as(builder, "post_softmax_mask")?.unwrap_or(false);
    builder.wire(ScaledMaskedSoftmax { scale, post_softmax_mask }, &[input, mask])
}

fn ser_scaled_masked_softmax(
    ast: &mut IntoAst,
    node: &TypedNode,
    op: &ScaledMaskedSoftmax,
) -> TractResult<Option<Arc<RValue>>> {
    let input = ast.mapping[&node.inputs[0]].clone();
    let mask = ast.mapping[&node.inputs[1]].clone();
    Ok(Some(invocation(
        "tract_transformers_scaled_masked_softmax",
        &[input, mask],
        &[
            ("scale", numeric(op.scale.cast_to_scalar::<f32>()?)),
            ("post_softmax_mask", logical(op.post_softmax_mask)),
        ],
    )))
}

/// Fused scale + mask + softmax over the last axis, with optional post-softmax zeroing.
///
/// - Float mask: `A = SOFTMAX(INPUT * SCALE + MASK, axis=-1)`
/// - Bool mask:  `A = SOFTMAX(IFF(MASK, INPUT * SCALE, -inf), axis=-1)`
///
/// If `post_softmax_mask` is true (bool mask only), also applies:
/// `A = IFF(MASK, A, 0)` — zeros out positions where the mask is false.
///
/// The mask dtype determines which mode is used at eval time.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct ScaledMaskedSoftmax {
    pub scale: Arc<Tensor>,
    pub post_softmax_mask: bool,
}

impl Op for ScaledMaskedSoftmax {
    fn name(&self) -> StaticName {
        "ScaledMaskedSoftmax".to_string().into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        let mut v = vec![format!("scale: {:?}", self.scale)];
        if self.post_softmax_mask {
            v.push("post_softmax_mask: true".to_string());
        }
        Ok(v)
    }
    op_as_typed_op!();
}

impl EvalOp for ScaledMaskedSoftmax {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let (input, mask) = args_2!(inputs);
        let softmax_axis = tvec!(input.rank() - 1);
        let dt = input.datum_type();
        let scale = self.scale.cast_to_dt(dt)?.into_owned();
        let scaled = Mul.eval(input, scale.into_tvalue(), dt)?;

        let pre_softmax: TValue = if mask.datum_type() == bool::datum_type() {
            // Boolean mask: keep score where mask=true, replace with -inf where mask=false.
            let fill = tensor0(-f32::INFINITY).cast_to_dt(dt)?.into_owned();
            Iff.eval(tvec![mask.clone(), scaled.into(), fill.into_tvalue()])?.remove(0)
        } else {
            Add.eval(scaled.into(), mask.clone(), dt)?.into()
        };

        let softmax_out = Softmax::new(softmax_axis, None, SoftmaxKind::Softmax(SoftmaxExp::Libc))
            .eval(tvec![pre_softmax])?[0]
            .clone();

        if self.post_softmax_mask {
            // Zero out positions where the bool mask is false.
            let zero = tensor0(0f32).cast_to_dt(dt)?.into_owned();
            Ok(Iff.eval(tvec![mask, softmax_out, zero.into_tvalue()])?)
        } else {
            Ok(tvec![softmax_out])
        }
    }
}

impl TypedOp for ScaledMaskedSoftmax {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        ensure!(!self.scale.is_zero()?);
        ensure!(inputs.len() == 2);
        let (input, mask) = (inputs[0], inputs[1]);
        ensure!(
            input.datum_type == mask.datum_type || mask.datum_type == bool::datum_type(),
            "mask must be same dtype as input or bool"
        );
        ensure!(
            input.rank() == mask.rank() || mask.datum_type == bool::datum_type(),
            "float mask must have same rank as input"
        );
        let dt = input.datum_type;
        let fact = dt.fact(input.shape.clone());
        Ok(tvec!(fact))
    }

    fn input_roi(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TVec<Option<TDim>>>> {
        // Introduction: mask's uniform_tdim defines which positions matter for scores.
        let mask_fact = model.outlet_fact(node.inputs[1])?;
        if let Some(mask_expr) = &mask_fact.uniform_tdim {
            return Ok(Some(tvec![Some(mask_expr.clone()), None]));
        }
        // Bubbling: delegate to the natural blanket implementation.
        tract_nnef::tract_core::optim::propagate_roi::bubble_roi(model, node)
    }

    as_op!();
}

/// Search pattern => A = SOFTMAX(A * SCALE + MASK, AXIS=-1)
pub fn scaled_masked_softmax_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Softmax,
) -> TractResult<Option<TypedModelPatch>> {
    let rank = node.outputs[0].fact.rank();
    rule_if!(op.axes.as_slice() == [rank - 1]);

    let in_fact = model.node_input_facts(node.id)?[0];
    let dt = in_fact.datum_type;
    // Only F16 and F32 is supported.
    rule_if!(matches!(dt, DatumType::F32 | DatumType::F16));

    // Try boolean-mask pattern first: Softmax(Iff(bool_mask, [Mul(x,scale) | x], fill))
    if let Some(patch) = try_match_bool_iff_softmax(model, node, node_name, op, dt)? {
        return Ok(Some(patch));
    }

    // Identify Add operator (Mask)
    rule_if_some!(add_prev = model.previous_node(node));
    rule_if_some!(add_prev_op = add_prev.op_as::<TypedBinOp>());
    rule_if!(add_prev_op.0.is::<Add>());

    let mut in_add = model.previous_nodes(add_prev);
    rule_if!(in_add.len() == 2);

    in_add.reverse();
    let (left, right) = (in_add.pop().unwrap(), in_add.pop().unwrap());

    let (scale_node, mask_outlet) = if left.op_is::<TypedBinOp>() {
        (left, add_prev.inputs[1])
    } else {
        (right, add_prev.inputs[0])
    };

    rule_if_some!(scale_op = scale_node.op_as::<TypedBinOp>());
    rule_if!(scale_op.0.is::<Mul>());

    // Retrieve Scale
    let mul_consts = model.collect_const_inputs(scale_node);
    rule_if!(mul_consts.len() == 1);
    let scale = mul_consts[0].val().clone();

    rule_if!(scale.len() == 1);
    rule_if!(scale.datum_type() == dt);

    let mut patch = TypedModelPatch::default();
    let input = patch.taps(model, &scale_node.inputs)?[0];
    let mask = patch.taps(model, &[mask_outlet])?[0];

    let out = patch.wire_node(
        format!("{node_name}.scaled_masked_softmax"),
        ScaledMaskedSoftmax { scale, post_softmax_mask: false },
        &[input, mask],
    )?;

    patch.shunt_outside(model, node.id.into(), out[0])?;
    Ok(Some(patch))
}

/// Pattern: Softmax(Iff(cond, A, B)) where exactly one of A/B is the fill (uniform/const)
/// and the other is the attention scores.
///
/// Two sub-cases handled:
///   - `Iff(mask,  scores, fill)` — cond=True means "valid, keep score"
///   - `Iff(~mask, fill,   scores)` — cond=True means "masked, replace with fill";
///     we look through a BitNot/Not predecessor to recover the non-negated mask.
///
/// Also detects a downstream post-softmax Iff of the form:
///   `Iff(~mask, 0, softmax_out)` and folds it into `post_softmax_mask=true`.
fn try_match_bool_iff_softmax(
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    _op: &Softmax,
    dt: DatumType,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if_some!(iff_node = model.previous_node(node));
    rule_if!(iff_node.op_is::<Iff>());

    let cond_outlet = iff_node.inputs[0];
    let branch_t = iff_node.inputs[1]; // true branch
    let branch_f = iff_node.inputs[2]; // false branch

    rule_if!(model.outlet_fact(cond_outlet)?.datum_type == bool::datum_type());

    // Decide which branch is the fill (uniform value) and which is the scores.
    let t_is_fill = outlet_is_uniform(model, branch_t);
    let f_is_fill = outlet_is_uniform(model, branch_f);
    // Require exactly one side to be fill.
    rule_if!(t_is_fill ^ f_is_fill);

    let (scores_outlet, bool_mask_outlet) = if f_is_fill {
        // Iff(mask, scores, fill) — condition is the direct "attend" mask (True=valid).
        (branch_t, cond_outlet)
    } else {
        // Iff(~mask, fill, scores) — condition is negated; unwrap the Not/BitNot.
        let original_mask = unwrap_bool_not(model, cond_outlet);
        rule_if_some!(original_mask = original_mask);
        (branch_f, original_mask)
    };

    // Optionally unwrap Mul(x, scale); fall back to identity scale (1.0).
    let (scores_outlet, scale) = try_extract_scale(model, scores_outlet, dt).unwrap_or_else(|| {
        let one = tensor0(1f32).cast_to_dt(dt).unwrap().into_owned().into_arc_tensor();
        (scores_outlet, one)
    });

    // Detect optional downstream post-softmax Iff: Iff(~mask, 0, softmax_out).
    let post_mask_succ = try_detect_post_softmax_iff(model, node)?;

    let mut patch = TypedModelPatch::default();
    let scores = patch.tap_model(model, scores_outlet)?;
    let bool_mask = patch.tap_model(model, bool_mask_outlet)?;

    let out = patch.wire_node(
        format!("{node_name}.scaled_masked_softmax"),
        ScaledMaskedSoftmax { scale, post_softmax_mask: post_mask_succ.is_some() },
        &[scores, bool_mask],
    )?;

    // Shunt the Softmax node, and also the downstream post-softmax Iff if present.
    patch.shunt_outside(model, node.id.into(), out[0])?;
    if let Some(post_iff_outlet) = post_mask_succ {
        patch.shunt_outside(model, post_iff_outlet, out[0])?;
    }
    Ok(Some(patch))
}

/// Checks whether the single successor of `softmax_node` is a post-softmax masking Iff:
///   `Iff(bool_cond, fill=0, softmax_out)` or `Iff(bool_cond, softmax_out, fill=0)`
///
/// Returns the outlet id of that Iff node if the pattern matches, `None` otherwise.
fn try_detect_post_softmax_iff(
    model: &TypedModel,
    softmax_node: &TypedNode,
) -> TractResult<Option<OutletId>> {
    rule_if_some!(succ = model.single_succ(softmax_node.id)?);
    rule_if!(succ.op_is::<Iff>());

    let cond_outlet = succ.inputs[0];
    let branch_t = succ.inputs[1];
    let branch_f = succ.inputs[2];

    rule_if!(model.outlet_fact(cond_outlet)?.datum_type == bool::datum_type());

    // Exactly one branch must be uniform (the zero fill).
    let t_is_fill = outlet_is_uniform(model, branch_t);
    let f_is_fill = outlet_is_uniform(model, branch_f);
    rule_if!(t_is_fill ^ f_is_fill);

    // The non-fill branch must be the softmax output.
    let softmax_outlet = OutletId::new(softmax_node.id, 0);
    let data_branch = if f_is_fill { branch_t } else { branch_f };
    rule_if!(data_branch == softmax_outlet);

    Ok(Some(OutletId::new(succ.id, 0)))
}

/// Returns true if the outlet carries a uniform (all-same-value) tensor —
/// i.e. it is a constant or its `uniform` field is set.
fn outlet_is_uniform(model: &TypedModel, outlet: OutletId) -> bool {
    model.outlet_fact(outlet).map(|f| f.konst.is_some() || f.uniform.is_some()).unwrap_or(false)
}

/// Walk the graph upstream from `outlet`, passing through shape-only ops
/// (AddAxis / RemoveAxis / MultiBroadcastTo), looking for a logical/bitwise NOT.
/// If found, returns the input to that NOT (the non-negated bool wire).
/// Returns `None` if no such NOT is reachable.
fn unwrap_bool_not(model: &TypedModel, outlet: OutletId) -> Option<OutletId> {
    use tract_nnef::tract_core::ops::array::MultiBroadcastTo;
    use tract_nnef::tract_core::ops::change_axes::AxisOp;
    use tract_nnef::tract_core::ops::element_wise::ElementWiseOp;
    use tract_nnef::tract_core::ops::logic::{BitNot, Not};

    let node = model.node(outlet.node);

    // Direct Not/BitNot on bool
    if let Some(ew) = node.op_as::<ElementWiseOp>()
        && (ew.0.is::<Not>() || ew.0.is::<BitNot>())
    {
        return Some(node.inputs[0]);
    }

    // Look through shape-transparent ops (AddAxis, RemoveAxis, broadcast)
    if node.op_is::<AxisOp>() || node.op_is::<MultiBroadcastTo>() {
        return unwrap_bool_not(model, node.inputs[0]);
    }

    None
}

/// If the node at `outlet` is `Mul(x, const_scale)`, return `(x_outlet, scale_tensor)`.
fn try_extract_scale(
    model: &TypedModel,
    outlet: OutletId,
    dt: DatumType,
) -> Option<(OutletId, Arc<Tensor>)> {
    let node = model.node(outlet.node);
    let bin = node.op_as::<TypedBinOp>()?;
    if !bin.0.is::<Mul>() {
        return None;
    }
    let consts = model.collect_const_inputs(node);
    if consts.len() != 1 {
        return None;
    }
    let scale = consts[0].val().clone();
    if scale.len() != 1 || scale.datum_type() != dt {
        return None;
    }
    // The non-const input is the scores.
    let scores_outlet = node
        .inputs
        .iter()
        .copied()
        .find(|o| model.outlet_fact(*o).map(|f| f.konst.is_none()).unwrap_or(false))?;
    Some((scores_outlet, scale))
}


================================================
FILE: transformers/src/ops/sdpa.rs
================================================
use std::str::FromStr;

use tract_core::ops::array::{MultiBroadcastTo, TypedConcat};
use tract_core::ops::cast::Cast;
use tract_core::ops::einsum::EinSum;
use tract_core::ops::source::TypedSource;
use tract_core::ops::{change_axes, math};
use tract_nnef::internal::*;
use tract_nnef::ser::datum_type;
use tract_nnef::tract_core::ops::math::mul;
use tract_nnef::tract_core::ops::nn::{Softmax, SoftmaxExp, SoftmaxKind};

use crate::ops::dyn_kv_cache::DynKeyValueCache;
use crate::ops::flash_sdpa::FlashSdpaOp;

use super::scaled_masked_softmax::ScaledMaskedSoftmax;

pub fn register(registry: &mut Registry) {
    registry.register_dumper(ser_sdpa);
    registry.register_primitive(
        "tract_transformers_sdpa",
        &[
            TypeName::Scalar.tensor().named("q"),
            TypeName::Scalar.tensor().named("k"),
            TypeName::Scalar.tensor().named("v"),
            TypeName::Scalar.tensor().named("mask"),
            TypeName::Scalar.named("scale"),
            TypeName::String.named("datum_type"),
            TypeName::String.named("acc_datum_type"),
            TypeName::Logical.named("is_causal"),
        ],
        &[("output", TypeName::Scalar.tensor())],
        deser_spda,
    );
}

fn ser_sdpa(ast: &mut IntoAst, node: &TypedNode, op: &Sdpa) -> TractResult<Option<Arc<RValue>>> {
    // Inputs settings
    let q = ast.mapping[&node.inputs[0]].clone();
    let k = ast.mapping[&node.inputs[1]].clone();
    let v = ast.mapping[&node.inputs[2]].clone();
    let mut inputs = vec![q, k, v];
    if let Some(mask) = node.inputs.get(3).as_ref().map(|it| ast.mapping[it].clone()) {
        inputs.push(mask);
    }

    // Attributes settings
    let mut attrs = vec![
        ("is_causal", logical(op.is_causal)),
        ("datum_type", datum_type(op.datum_type)),
        ("acc_datum_type", datum_type(op.acc_datum_type)),
    ];
    if let Some(scale) = op.scale.as_ref() {
        attrs.push(("scale", numeric(scale.cast_to_scalar::<f32>()?)));
    }

    Ok(Some(invocation("tract_transformers_sdpa", &inputs, &attrs)))
}

fn deser_spda(builder: &mut ModelBuilder, invocation: &ResolvedInvocation) -> TractResult<Value> {
    let q = invocation.named_arg_as(builder, "q")?;
    let k = invocation.named_arg_as(builder, "k")?;
    let v = invocation.named_arg_as(builder, "v")?;
    let mut inputs = vec![q, k, v];
    let q_rank = builder.model.outlet_fact(q)?.rank();
    if let Some(mut mask) = invocation.get_named_arg_as(builder, "mask")? {
        let mask_fact = builder.model.outlet_fact(mask)?;
        ensure!(mask_fact.rank() <= q_rank);
        for _ in mask_fact.rank()..q_rank {
            mask = builder.wire_as_outlets(AxisOp::Add(0), &[mask])?[0];
        }
        inputs.push(mask);
    };
    let scale: Option<f32> = invocation.get_named_arg_as(builder, "scale")?;
    let datum_type =
        DatumType::from_str(&invocation.named_arg_as::<String>(builder, "datum_type")?)?;
    let acc_datum_type =
        DatumType::from_str(&invocation.named_arg_as::<String>(builder, "acc_datum_type")?)?;
    let is_causal = invocation.named_arg_as(builder, "is_causal")?;
    builder.wire(Sdpa { scale: scale.map(tensor0), datum_type, acc_datum_type, is_causal }, &inputs)
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Sdpa {
    pub scale: Option<Tensor>,
    pub datum_type: DatumType,
    pub acc_datum_type: DatumType,
    pub is_causal: bool,
}

impl Sdpa {
    fn wire_softmax(
        &self,
        graph: &mut TypedModel,
        scores: OutletId,
        mask: Option<OutletId>,
        scale: f32,
    ) -> TractResult<OutletId> {
        let scores_fact = graph.outlet_fact(scores)?.clone();
        let rank = scores_fact.rank();
        ensure!(rank == 5);

        let scale = tensor0(scale).cast_to_dt(self.acc_datum_type)?.into_owned();
        if let Some(mask) = mask {
            ensure!(graph.outlet_fact(mask)?.rank() == 5);
            graph
                .wire_node(
                    "att_scaled_masked_softmax",
                    ScaledMaskedSoftmax { scale: scale.into(), post_softmax_mask: false },
                    &[scores, mask],
                )
                .map(|o| o[0])
        } else {
            let scale_const = graph.add_const("scale", scale)?;
            let scaled_scores = wire_with_rank_broadcast(
                "scale_scores",
                graph,
                math::mul(),
                &[scores, scale_const],
            )?[0];
            graph
                .wire_node(
                    "att_softmax",
                    Softmax::new(tvec![rank - 1], None, SoftmaxKind::Softmax(SoftmaxExp::Libc)),
                    &[scaled_scores],
                )
                .map(|o| o[0])
        }
    }

    fn build_sdpa_graph(&self, input_facts: TVec<&TypedFact>) -> TractResult<TypedModel> {
        use change_axes::AxisOp::*;
        let mut graph = TypedModel::default();
        let mut q = graph.add_source("q", input_facts[0].clone())?;
        let mut k = graph.add_source("k", input_facts[1].clone())?;
        let mut v = graph.add_source("v", input_facts[2].clone())?;
        let mut mask =
            input_facts.get(3).map(|m| graph.add_source("mask", (*m).clone())).transpose()?;

        if input_facts[0].rank() == 3 {
            q = graph.wire_node("reshape_q_heads", Add(1), &[q])?[0];
            k = graph.wire_node("reshape_k_heads", Add(1), &[k])?[0];
            v = graph.wire_node("reshape_v_heads", Add(1), &[v])?[0];
            if let Some(m) = &mut mask {
                *m = graph.wire_node("reshape_m_heads", Add(1), &[*m])?[0];
            }
        }

        let [_, qh, att_rows, _qd] = &*graph.outlet_fact(q)?.shape.clone() else { unreachable!() };
        let [_b, kh, att_cols, kd] = &*graph.outlet_fact(k)?.shape.clone() else { unreachable!() };

        let num_qh = qh.to_usize()?;
        let num_kh = kh.to_usize()?;
        let num_kd = kd.to_usize()?;

        let g = num_qh / num_kh;

        q = graph.wire_node(
            "reshape_q_gha",
            Reshape(1, tvec!(qh.clone()), tvec!(kh.clone(), g.to_dim())),
            &[q],
        )?[0];
        k = graph.wire_node("reshape_k_gha", change_axes::AxisOp::Add(2), &[k])?[0];
        v = graph.wire_node("reshape_v_gha", change_axes::AxisOp::Add(2), &[v])?[0];
        if let Some(m) = &mut mask {
            if graph.outlet_fact(*m)?.shape[1].is_one() {
                *m = graph.wire_node("reshape_m_heads_groups", Add(2), &[*m])?[0];
            } else {
                *m = graph.wire_node(
                    "reshape_m_head_groups",
                    Reshape(1, tvec!(qh.clone()), tvec!(kh.clone(), g.to_dim())),
                    &[*m],
                )?[0];
            }
        }

        let scale = self
            .scale
            .as_ref()
            .map(|t| *t.try_as_plain().unwrap().to_scalar::<f32>().unwrap())
            .unwrap_or_else(|| (num_kd as f32).sqrt().recip());

        if self.is_causal {
            mask = Some(
                wire_attention_mask(
                    &mut graph,
                    "sdpa",
                    self.acc_datum_type,
                    SdpaMaskMode::Causal,
                    5,
                    att_rows,
                    att_cols,
                )
                .context("Wiring causal mask")?,
            );
        };

        let scores_einsum = EinSum::new("bhgmk,bhgnk->bhgmn".parse().unwrap(), self.acc_datum_type);
        let scores = graph.wire_node("scores", scores_einsum, &[q, k])?[0];
        if let Some(m) = &mut mask {
            if graph.outlet_fact(*m)?.datum_type != self.acc_datum_type {
                *m = graph.wire_node("cast_mask", Cast::new(self.acc_datum_type), &[*m])?[0];
            }
        }

        let attention_weights =
            self.wire_softmax(&mut graph, scores, mask, scale).context("In wire_softmax")?;
        let mut output = graph.wire_node(
            "att_out",
            EinSum::new("bhgmn,bhgnv->bhgmv".parse().unwrap(), self.acc_datum_type),
            &[attention_weights, v],
        )?[0];
        output = graph.wire_node(
            "reshape_out_gha",
            Reshape(1, tvec!(kh.clone(), g.to_dim()), tvec!(qh.clone())),
            &[output],
        )?[0];
        if input_facts[0].rank() == 3 {
            output = graph.wire_node("reshape_out_heads", Rm(1), &[output])?[0];
        }
        if graph.outlet_fact(output)?.datum_type != input_facts[0].datum_type {
            output =
                graph.wire_node("cast_output", Cast::new(input_facts[0].datum_type), &[output])?[0];
        }
        graph.select_output_outlets(&[output])?;
        Ok(graph)
    }

    pub fn patch_sdpa(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        let input_facts = model.node_input_facts(node.id)?;
        let subgraph = self.build_sdpa_graph(input_facts)?;

        let mut patch = TypedModelPatch::new(format!("Explode SDPA node {}", node.name));
        patch.model = subgraph.into_decluttered()?;

        let body_inputs = patch.model.input_outlets()?;
        for (i, body_input_outlet) in body_inputs.iter().enumerate() {
            patch.taps.insert(*body_input_outlet, node.inputs[i]);
        }

        let body_outputs = patch.model.output_outlets()?;
        patch.shunt_outside(model, node.id.into(), body_outputs[0])?;
        //println!("{}",&patch.model);
        Ok(Some(patch))
    }
}

impl Op for Sdpa {
    fn name(&self) -> StaticName {
        "SDPA".into()
    }
    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!("scale: {:?}", self.scale)])
    }
    op_as_typed_op!();
}

impl EvalOp for Sdpa {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        let input_facts: TVec<TypedFact> = inputs
            .iter()
            .map(|tv| TypedFact::try_from(tv.clone().into_arc_tensor()))
            .collect::<TractResult<_>>()?;
        let input_fact_refs: TVec<&TypedFact> = input_facts.iter().collect();
        let body =
            self.build_sdpa_graph(input_fact_refs).context("Wiring adhoc fallback graph ")?;
        let plan = TypedSimplePlan::new(body)?;
        plan.run(inputs)
    }
}

impl TypedOp for Sdpa {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        if self.is_causal {
            ensure!(inputs.len() == 3, "Mask cannot be provided if is_causal=true")
        };
        let rank = inputs[0].rank();
        ensure!(rank == 3 || rank == 4, "Input tensors must be 3D or 4D");
        ensure!(
            inputs[..3].iter().map(|it| it.rank()).all(|r| r == rank),
            "Q, K and V should have the same rank {}",
            rank
        );
        let mask = inputs.get(3);
        ensure!(mask.is_none_or(|m| m.rank() == rank));

        let q_shape = &inputs[0].shape.dims();
        let k_shape = &inputs[1].shape.dims();
        let v_shape = &inputs[2].shape.dims();

        ensure!(
            q_shape[0] == k_shape[0]
                && q_shape[0] == v_shape[0]
                && mask.as_ref().is_none_or(|m| m.shape[0].is_one() || m.shape[0] == q_shape[0])
        );

        if rank == 4 {
            let q_heads = q_shape[1].to_i64()?;
            let k_heads = k_shape[1].to_i64()?;
            let v_heads = v_shape[1].to_i64()?;
            ensure!(k_heads == v_heads, "K and V must have the same number of heads.");
            ensure!(
                q_heads % k_heads == 0,
                "Q heads ({}) must be a multiple of K/V heads ({})",
                q_heads,
                k_heads
            );
            ensure!(
                mask.as_ref().is_none_or(|m| m.shape[1].is_one() || m.shape[1] == q_heads.into())
            );
        }

        let output_shape = match rank {
            3 => {
                if let (&[b, seq_len, _], &[_, _, out_dim]) = (q_shape, v_shape) {
                    tvec!(b.clone(), seq_len.clone(), out_dim.clone())
                } else {
                    unreachable!()
                }
            }
            4 => {
                if let (&[b, n_heads, seq_len, _], &[_, _, _, out_dim]) = (q_shape, v_shape) {
                    tvec!(b.clone(), n_heads.clone(), seq_len.clone(), out_dim.clone())
                } else {
                    unreachable!()
                }
            }
            _ => unreachable!(),
        };

        let out_fact = inputs[0].datum_type().unwrap().fact(output_shape);
        Ok(tvec!(out_fact))
    }

    fn cost(&self, inputs: &[&TypedFact]) -> TractResult<TVec<(Cost, TDim)>> {
        let dt = inputs[0].datum_type;
        let rank = inputs[0].rank();
        let q = &inputs[0].shape;
        let k = &inputs[1].shape;
        let v = &inputs[2].shape;
        // rank 3: [B, S_q, D_k], rank 4: [B, H, S_q, D_k]
        let (batch, heads, s_q, d_k) = if rank == 4 {
            (q[0].clone(), q[1].clone(), q[2].clone(), q[3].clone())
        } else {
            (q[0].clone(), 1.to_dim(), q[1].clone(), q[2].clone())
        };
        let s_k = k[rank - 2].clone();
        let d_v = v[rank - 1].clone();
        // Q*K^T: B*H*S_q*S_k*D_k, attn*V: B*H*S_q*S_k*D_v
        let fma = batch * &heads * &s_q * &s_k * (d_k + d_v);
        Ok(tvec!((Cost::FMA(dt), fma)))
    }

    fn codegen(
        &self,
        model: &TypedModel,
        node: &TypedNode,
    ) -> TractResult<Option<TypedModelPatch>> {
        if self.acc_datum_type.is::<f32>() {
            let scale = self.scale.as_ref().map(|t| t.cast_to_scalar()).transpose()?;
            let op = FlashSdpaOp { causal: self.is_causal, scale };
            TypedModelPatch::replace_single_op(model, node, &node.inputs, op).map(Some)
        } else {
            self.patch_sdpa(model, node).context("Wiring fallback SDPA")
        }
    }
    as_op!();
}

// KV cache broadcast is: input -> concat -> AddAxis -> Broadcast -> Reshape
pub fn match_broadcast_kv_cache_pattern(
    model: &TypedModel,
    start_outlet: OutletId,
) -> TractResult<Option<OutletId>> {
    // Find Reshape node
    let reshape_node = model.node(start_outlet.node);
    rule_if!(
        reshape_node.op_is::<change_axes::AxisOp>()
            && matches!(
                reshape_node.op_as::<change_axes::AxisOp>().unwrap(),
                change_axes::AxisOp::Reshape(1, _, _)
            )
    );

    // Find broadcast node
    rule_if_some!(broadcast_node = model.previous_node(reshape_node));
    rule_if!(broadcast_node.op_is::<MultiBroadcastTo>());

    // Find add axis node
    rule_if_some!(unsqueeze_node = model.previous_node(broadcast_node));
    rule_if!(
        unsqueeze_node.op_is::<change_axes::AxisOp>()
            && matches!(
                unsqueeze_node.op_as::<change_axes::AxisOp>().unwrap(),
                change_axes::AxisOp::Add(2)
            )
    );

    fn is_concat(model: &TypedModel, n: &Node<TypedFact, Box<dyn TypedOp>>) -> bool {
        n.op_is::<TypedConcat>()
            && n.inputs.len() == 2
            && n.outputs.len() == 1
            && model.outputs.contains(&n.id.into())
    }

    fn is_dynkv(n: &Node<TypedFact, Box<dyn TypedOp>>) -> bool {
        n.op_is::<DynKeyValueCache>() && n.inputs.len() == 1 && n.outputs.len() == 1
    }

    // Find concat or dyn kvcache node
    rule_if_some!(node = model.previous_node(unsqueeze_node));
    rule_if!(is_concat(model, node) || is_dynkv(node));

    let kv_outlet = unsqueeze_node.inputs[0];
    if is_dynkv(node) {
        return Ok(Some(kv_outlet));
    }

    // node is concat, we need to check one input is a source
    let input0_node = model.node(node.inputs[0].node);
    let input1_node = model.node(node.inputs[1].node);
    if input0_node.op_is::<TypedSource>() || input1_node.op_is::<TypedSource>() {
        return Ok(Some(kv_outlet));
    }

    Ok(None)
}

pub fn fuse_kv_cache_broadcast_rule(
    _ctx: &(),
    model: &TypedModel,
    node: &TypedNode,
    node_name: &str,
    op: &Sdpa,
) -> TractResult<Option<TypedModelPatch>> {
    rule_if_some!(new_k_outlet = match_broadcast_kv_cache_pattern(model, node.inputs[1])?);
    rule_if_some!(new_v_outlet = match_broadcast_kv_cache_pattern(model, node.inputs[2])?);

    let mut patch = TypedModelPatch::default();
    let mut new_sdpa_inputs = node.inputs.clone();
    new_sdpa_inputs[1] = new_k_outlet;
    new_sdpa_inputs[2] = new_v_outlet;

    let tapped_inputs = patch.taps(model, &new_sdpa_inputs)?;

    let new_sdpa_node = patch.wire_node(
        format!("{}.sdpa_fused_kv_broadcast", node_name),
        op.clone(),
        &tapped_inputs,
    )?;

    patch.shunt_outside(model, node.id.into(), new_sdpa_node[0])?;

    Ok(Some(patch))
}

pub enum SdpaMaskMode {
    Neutral,
    Causal,
}

pub fn wire_attention_mask(
    model: &mut TypedModel,
    prefix: &str,
    dt: DatumType,
    mode: SdpaMaskMode,
    rank: usize,
    q_len: &TDim,
    kv_len: &TDim,
) -> TractResult<OutletId> {
    let s_plus_p_outlet = model.add_const(prefix.to_string() + ".S_P", tensor0(kv_len.clone()))?;
    let p_outlet = model.add_const("P", tensor0(kv_len.clone() - q_len.clone()))?;

    let zero = model.add_const(prefix.to_string() + ".P", tensor0(TDim::Val(0)))?;
    let range_increment = model.add_const(prefix.to_string() + ".mask_s", tensor0(TDim::Val(1)))?;
    let s_range = model.wire_node(
        prefix.to_string() + ".mask_s_range",
        tract_core::ops::array::Range::new(q_len.clone()),
        &[p_outlet, s_plus_p_outlet, range_increment],
    )?;
    let s_plus_p_range = model.wire_node(
        prefix.to_string() + ".mask_s+p_range",
        tract_core::ops::array::Range::new(kv_len.clone()),
        &[zero, s_plus_p_outlet, range_increment],
    )?;
    let s_range_add_axis =
        model.wire_node(prefix.to_string() + ".mask_s_range.add_axis", AxisOp::Add(1), &s_range)?
            [0];
    let s_plus_p_range_add_axis = model.wire_node(
        prefix.to_string() + ".mask_s_plus_p_range.add_axis",
        AxisOp::Add(0),
        &s_plus_p_range,
    )?[0];

    let greater = model.wire_node(
        prefix.to_string() + ".mask.greater",
        tract_core::ops::binary::TypedBinOp(tract_core::ops::logic::comp_gt(), None),
        &[s_plus_p_range_add_axis, s_range_add_axis],
    )?[0];
    let cast_greater =
        model.wire_node(prefix.to_string() + ".mask.greater.cast", Cast::new(dt), &[greater])?[0];

    let multiplier = match mode {
        SdpaMaskMode::Causal => model.add_const("P", dt.min_value())?,
        SdpaMaskMode::Neutral => model.add_const("P", Tensor::zero_scalar_dt(dt)?)?,
    };
    let mask = wire_with_rank_broadcast(
        prefix.to_string() + ".2d",
        model,
        mul(),
        &[cast_greater, multiplier],
    )?[0];
    let reshaped_mask = model.wire_node(
        prefix,
        AxisOp::Reshape(0, tvec![], tvec![TDim::Val(1); rank - 2]),
        &[mask],
    )?[0];
    Ok(reshaped_mask)
}


================================================
FILE: transformers/src/ops/streamed_sdpa.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_ndarray::{Array4, ArrayView1, ArrayView2, ArrayView4, Ix4, s};

/// Tract operator wrapper.
#[derive(Clone, Debug, PartialEq)]
pub struct StreamedSdpaOp {
    pub causal: bool,
    pub block_k: usize,
    pub scale: Option<f32>,
}
impl Eq for StreamedSdpaOp {}

impl Op for StreamedSdpaOp {
    fn name(&self) -> StaticName {
        "StreamedSDPA".into()
    }

    fn info(&self) -> TractResult<Vec<String>> {
        Ok(vec![format!(
            "causal={}, block_k={}, scale={:?}",
            self.causal, self.block_k, self.scale
        )])
    }

    op_as_typed_op!();
}

impl TypedOp for StreamedSdpaOp {
    fn output_facts(&self, inputs: &[&TypedFact]) -> TractResult<TVec<TypedFact>> {
        let (q, k, v, opt_m) = match inputs.len() {
            3 => (inputs[0], inputs[1], inputs[2], None),
            4 => (inputs[0], inputs[1], inputs[2], Some(inputs[3])),
            _ => bail!("StreamSDPA expects 3 or 4 inputs (Q,K,V, optional mask), got {inputs:?}"),
        };

        // dtype checks
        ensure!(q.datum_type.is_float(), "Q must be floating point");
        ensure!(k.datum_type.is_float(), "K must be floating point");
        ensure!(v.datum_type.is_float(), "V must be floating point");
        if let Some(m) = opt_m {
            ensure!(m.datum_type.is_float(), "M must be floating point");
        }

        // rank checks
        ensure!(
            q.rank() == k.rank() && q.rank() == v.rank(),
            "Q, K and V must have the same rank, found {}, {}, {} resp.",
            q.rank(),
            k.rank(),
            v.rank()
        );
        ensure!(
            q.rank() == 3 || q.rank() == 4,
            "Inputs must be of rank 3 or 4, found {}",
            q.rank()
        );

        if let Some(m) = opt_m {
            ensure!(m.rank() == 2, "Mask must be of rank 2, found mask {:?}", m,);
        }

        let one = 1.to_dim();

        let ((bq, hq, _tq, dq), (bk, hkv, tk, dk), (bv, hkv2, tk2, dv)) = if q.rank() == 4 {
            (
                (&q.shape[0], &q.shape[1], &q.shape[2], &q.shape[3]),
                (&k.shape[0], &k.shape[1], &k.shape[2], &k.shape[3]),
                (&v.shape[0], &v.shape[1], &v.shape[2], &v.shape[3]),
            )
        } else {
            (
                (&q.shape[0], &one, &q.shape[1], &q.shape[2]),
                (&k.shape[0], &one, &k.shape[1], &k.shape[2]),
                (&v.shape[0], &one, &v.shape[1], &v.shape[2]),
            )
        };

        ensure!(bq == bk && bq == bv, "Batch dims must match for Q/K/V");
        ensure!(hkv == hkv2, "K/V head counts must match");
        ensure!(tk == tk2, "K/V lengths must match");
        ensure!(dq == dk && dq == dv, "Head dims (D) must match across Q/K/V");

        // If heads are fully known, check GQA divisibility (best-effort).
        if let (Ok(hq), Ok(hkv)) = (hq.to_usize(), hkv.to_usize()) {
            ensure!(hq % hkv == 0, "num_q_heads must be a multiple of num_kv_heads for GQA");
        }

        // Output has same shape and dtype as Q.
        Ok(tvec!(q.without_value()))
    }

    as_op!();
}

impl EvalOp for StreamedSdpaOp {
    fn is_stateless(&self) -> bool {
        true
    }

    fn eval(&self, inputs: TVec<TValue>) -> TractResult<TVec<TValue>> {
        ensure!(inputs.len() == 3 || inputs.len() == 4);
        let [q, k, v] = &inputs[0..3] else {
            bail!("Expects 3 or 4 inptus (Q, K, V, optional mask)")
        };

        let input_dt = q.datum_type();

        let (q, k, v) = (q.cast_to::<f32>()?, k.cast_to::<f32>()?, v.cast_to::<f32>()?);
        let mut q = q.to_plain_array_view::<f32>()?;
        let mut k = k.to_plain_array_view::<f32>()?;
        let mut v = v.to_plain_array_view::<f32>()?;

        let is_3d_case = q.ndim() == 3;

        if is_3d_case {
            q.insert_axis_inplace(tract_ndarray::Axis(1));
            k.insert_axis_inplace(tract_ndarray::Axis(1));
            v.insert_axis_inplace(tract_ndarray::Axis(1));
        }

        let (q, k, v) = (
            q.into_dimensionality::<Ix4>()?,
            k.into_dimensionality::<Ix4>()?,
            v.into_dimensionality::<Ix4>()?,
        );

        let (batch_size, num_q_heads, query_len, head_dim) =
            (q.shape()[0], q.shape()[1], q.shape()[2], q.shape()[3]);
        let (bk, num_kv_heads, kv_len, hd_k) =
            (k.shape()[0], k.shape()[1], k.shape()[2], k.shape()[3]);

        ensure!(batch_size == bk, "Batch mismatch between Q and K");
        ensure!(head_dim == hd_k, "Head dim mismatch between Q and K");
        ensure!(num_kv_heads == v.shape()[1], "K/V head mismatch");
        ensure!(kv_len == v.shape()[2] && head_dim == v.shape()[3], "K/V shape mismatch");

        // Views as fixed 4-D
        let q4 = q.to_shape((batch_size, num_q_heads, query_len, head_dim))?;
        let k4 = k.to_shape((batch_size, num_kv_heads, kv_len, head_dim))?;
        let v4 = v.to_shape((batch_size, num_kv_heads, kv_len, head_dim))?;

        let m = if let Some(m) = inputs.get(3) {
            Some(
                m.cast_to::<f32>()?
                    .into_owned()
                    .into_plain_array::<f32>()?
                    .into_shape_with_order((query_len, kv_len))?,
            )
        } else {
            None
        };

        let mut out = self
            .flash_attention_gqa(q4.view(), k4.view(), v4.view(), m.as_ref().map(|m| m.view()))
            .into_dyn();
        if is_3d_case {
            out.index_axis_inplace(tract_ndarray::Axis(1), 0);
        }

        Ok(tvec!(out.into_tensor().cast_to_dt(input_dt)?.into_owned().into_tvalue()))
    }
}

impl StreamedSdpaOp {
    /// Flash Attention forward with Grouped-Query Attention (GQA).
    ///
    /// Shapes:
    ///   Q: (batch_size, num_q_heads, query_len, head_dim)
    ///   K: (batch_size, num_kv_heads, kv_len,   head_dim)
    ///   V: (batch_size, num_kv_heads, kv_len,   head_dim)
    /// Returns O: (batch_size, num_q_heads, query_len, head_dim)
    ///
    /// GQA mapping: each query head qh maps to a key/value head index kh = qh / group_size
    /// where group_size = num_q_heads / num_kv_heads (must divide exactly).
    pub fn flash_attention_gqa(
        &self,
        q: ArrayView4<f32>,
        k: ArrayView4<f32>,
        v: ArrayView4<f32>,
        mask: Option<ArrayView2<f32>>,
    ) -> Array4<f32> {
        // Explicit dimensions
        let (batch_size, num_q_heads, query_len, head_dim) = q.dim();
        let (_, num_kv_heads, kv_len, _) = k.dim();
        let scale = self.scale.unwrap_or((head_dim as f32).recip().sqrt());
        let block_k = self.block_k.max(1);

        let mut out = Array4::<f32>::zeros((batch_size, num_q_heads, query_len, head_dim));
        let group_size = num_q_heads / num_kv_heads;

        for b in 0..batch_size {
            for kh in 0..num_kv_heads {
                let k_bh = k.slice(s![b, kh, .., ..]); // (kv_len, head_dim)
                let v_bh = v.slice(s![b, kh, .., ..]); // (kv_len, head_dim)
                for inner_qh in 0..group_size {
                    let qh_ix = kh * group_size + inner_qh;
                    let qh = q.slice(s![b, qh_ix, .., ..]);

                    for t_q in 0..query_len {
                        let q_vec = qh.slice(s![t_q, ..]);

                        // Streaming softmax state
                        let mut m = f32::NEG_INFINITY; // running max
                        let mut l = 0.0f32; // running sum of exp(scores - m)
                        let mut acc = vec![0.0f32; head_dim];

                        // Process in KV tiles
                        let mut kb = 0usize;
                        while kb < kv_len {
                            let kend = (kb + block_k).min(kv_len);

                            let mut block_max = f32::NEG_INFINITY;
                            let mut scores: Vec<f32> = Vec::with_capacity(kend - kb);
                            for i_k in kb..kend {
                                let mask = if let Some(mask) = mask {
                                    mask[(t_q, i_k)]
                                } else if self.causal && i_k > t_q {
                                    f32::NEG_INFINITY
                                } else {
                                    0.0f32
                                };
                                let s = (dot1d(q_vec.view(), k_bh.row(i_k).view()) * scale) + mask;
                                if s > block_max {
                                    block_max = s;
                                }
                                scores.push(s);
                            }

                            if !block_max.is_finite() {
                                kb = kend;
                                continue;
                            }

                            let new_m = if m > block_max { m } else { block_max };
                            let alpha = if m.is_finite() { (m - new_m).exp() } else { 0.0 };

                            if alpha != 1.0 {
                                acc.iter_mut().for_each(|a| *a *= alpha);
                                l *= alpha;
                            }

                            let mut block_l = 0.0f32;
                            for (off, i_k) in (kb..kend).enumerate() {
                                let s = scores[off];
                                if !s.is_finite() {
                                    continue;
                                }
                                let w = (s - new_m).exp();
                                block_l += w;
                                let v_row = v_bh.row(i_k);
                                for d_idx in 0..head_dim {
                                    acc[d_idx] += w * v_row[d_idx];
                                }
                            }

                            l += block_l;
                            m = new_m;
                            kb = kend;
                        }

                        let inv_l = if l > 0.0 { 1.0 / l } else { 0.0 };
                        for d_idx in 0..head_dim {
                            out[[b, qh_ix, t_q, d_idx]] = acc[d_idx] * inv_l;
                        }
                    }
                }
            }
        }

        out
    }
}

#[inline]
fn dot1d(a: ArrayView1<f32>, b: ArrayView1<f32>) -> f32 {
    debug_assert_eq!(a.len(), b.len());
    let mut s = 0.0f32;
    for i in 0..a.len() {
        s += a[i] * b[i];
    }
    s
}


================================================
FILE: transformers/src/rewriter.rs
================================================
use tract_nnef::internal::*;
use tract_nnef::tract_core::transform::ModelTransform;

use crate::ops;

#[derive(Debug, Default)]
pub struct ApplyRopeTransform;

impl ModelTransform for ApplyRopeTransform {
    fn name(&self) -> StaticName {
        "detect_apply_rope".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        Rewriter::default()
            .with_rule_for("detect-rotate-half", ops::rotate_half_rule)
            .with_rule_for("detect-apply-rope", ops::apply_rope_rule)
            .rewrite(&(), model)
    }
}

#[derive(Debug, Default)]
pub struct ScaledMaskedSoftmaxTransform;

impl ModelTransform for ScaledMaskedSoftmaxTransform {
    fn name(&self) -> StaticName {
        "detect_scaled_masked_softmax".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        Rewriter::default()
            .with_rule_for("detect-scaled-masked-softmax", ops::scaled_masked_softmax_rule)
            .rewrite(&(), model)
    }
}

#[derive(Debug, Default)]
pub struct KeyValueCacheTransform;

impl ModelTransform for KeyValueCacheTransform {
    fn name(&self) -> StaticName {
        "detect_kv_cache".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        let inputs = model.inputs.clone();

        for input in inputs {
            ops::replace_kv_cache(model, input.node)?;
        }
        Ok(())
    }
}

#[derive(Debug, Default)]
pub struct SdpaFuseKvCacheBroadcastTransform;

impl ModelTransform for SdpaFuseKvCacheBroadcastTransform {
    fn name(&self) -> StaticName {
        "detect_sdpa_kv_cache_broadcast".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        Rewriter::default()
            .with_rule_for("detect-sdpa-kv-cache-broadcast", ops::fuse_kv_cache_broadcast_rule)
            .rewrite(&(), model)
    }
}

#[derive(Debug, Default)]
pub struct UnfoldKeyValueCacheTransform;

impl ModelTransform for UnfoldKeyValueCacheTransform {
    fn name(&self) -> StaticName {
        "unfold_kv_cache".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        let kv_node_ids: Vec<usize> = model
            .nodes()
            .iter()
            .filter(|n| n.op_as::<ops::DynKeyValueCache>().is_some())
            .map(|n| n.id)
            .collect();
        for id in kv_node_ids {
            ops::unfold_kv_cache(model, id)?;
        }
        Ok(())
    }
}

// TODO: This is why Transform should be renamed to Remodel
#[derive(Debug, Default)]
pub struct TransformersTransform;

impl ModelTransform for TransformersTransform {
    fn name(&self) -> StaticName {
        "transformers_detect_all".into()
    }

    fn transform(&self, model: &mut TypedModel) -> TractResult<()> {
        KeyValueCacheTransform.transform(model)?;

        Rewriter::default()
            .with_rule_for("detect-rotate-half", ops::rotate_half_rule)
            .with_rule_for("detect-apply-rope", ops::apply_rope_rule)
            .with_rule_for("detect-scaled-masked-softmax", ops::scaled_masked_softmax_rule)
            .with_rule_for("detect-sdpa-kv-cache-broadcast", ops::fuse_kv_cache_broadcast_rule)
            .rewrite(&(), model)
    }
}


================================================
FILE: yank.sh
================================================
#!/bin/sh

VERSION=$1
. ./.all_crates.sh

if [ `uname` = "Darwin" ]
then
    SED=gsed
else
    SED=sed
fi

if [ -z "$VERSION" ]
then
    echo "Usage: $0 <version>" 
    exit 1
fi

for path in $ALL_CRATES_PATH
do
    crate=$(tomato get package.name $path/Cargo.toml)
    cargo yank --version $VERSION $crate
done